source:
lab.git/misc/findnonsjis.pl
@
a1128c4
| Last change on this file since a1128c4 was bf1a1ea, checked in by mitty <mitty@…>, 13 years ago | |
|---|---|
|
|
| File size: 1.0 KB | |
| Rev | Line | |
|---|---|---|
| [cdcfe90] | 1 | #! /usr/bin/perl -w |
| 2 | ||
| 3 | use strict; | |
| 4 | use warnings; | |
| 5 | use utf8; | |
| 6 | ||
| [6f4fe5e] | 7 | use Encode; |
| [cdcfe90] | 8 | |
| 9 | my $top = shift @ARGV || exit; | |
| 10 | if (! -d $top) { exit; } | |
| 11 | ||
| [6f4fe5e] | 12 | my $utf8 = find_encoding("utf8"); |
| 13 | ||
| [cdcfe90] | 14 | checkdir($top); |
| 15 | ||
| 16 | sub match { | |
| 17 | my $str = shift @_; | |
| 18 | ||
| [6f4fe5e] | 19 | my $sjis = encode("cp932", $utf8->decode($str), Encode::FB_HTMLCREF); |
| [cdcfe90] | 20 | if ($sjis =~ /&#\d{4,};/) { |
| [bf1a1ea] | 21 | Encode::from_to($sjis, "cp932", "utf8"); |
| 22 | return $sjis; | |
| [cdcfe90] | 23 | } |
| 24 | ||
| 25 | return ''; | |
| 26 | } | |
| 27 | ||
| 28 | sub checkdir { | |
| 29 | my $target = shift @_; | |
| 30 | ||
| [dfae53c] | 31 | print STDERR "# checking '$target'\n"; |
| [cdcfe90] | 32 | opendir(my $dir, $target) || return $target; |
| [0e3de64] | 33 | my @entries = sort grep { !m/^(\.|\.\.)$/g } readdir($dir); |
| [cdcfe90] | 34 | closedir($dir); |
| 35 | ||
| 36 | my @dirs; | |
| 37 | while (my $entry = shift @entries) { | |
| [bf1a1ea] | 38 | if (my $convert = match($entry)) { |
| 39 | print "'$target/$entry' can be converted to '$convert'\n"; | |
| [cdcfe90] | 40 | next; |
| 41 | } | |
| 42 | if (-d "$target/$entry") { | |
| 43 | push @dirs, $entry; | |
| 44 | next; | |
| 45 | } | |
| 46 | } | |
| 47 | ||
| 48 | while (my $entry = pop @dirs) { | |
| 49 | checkdir("$target/$entry"); | |
| 50 | } | |
| 51 | } |
Note: See TracBrowser
for help on using the repository browser.
