X-Git-Url: http://lab.mitty.jp/git/?a=blobdiff_plain;f=misc%2Ffindnonsjis.pl;h=71e9a0a165d29df505438aa42ae8595c3a9fc53b;hb=bf1a1eab8ab75f495c2d81fae907bed6a071ffae;hp=dbf8c9cf6b9346fd984bdb0241eb2018e6222123;hpb=6f4fe5ee0a6f3874705227f25dcda010177e113b;p=lab.git diff --git a/misc/findnonsjis.pl b/misc/findnonsjis.pl index dbf8c9c..71e9a0a 100755 --- a/misc/findnonsjis.pl +++ b/misc/findnonsjis.pl @@ -3,7 +3,6 @@ use strict; use warnings; use utf8; -use encoding 'utf-8'; use Encode; @@ -19,7 +18,8 @@ sub match { my $sjis = encode("cp932", $utf8->decode($str), Encode::FB_HTMLCREF); if ($sjis =~ /&#\d{4,};/) { - return 1; + Encode::from_to($sjis, "cp932", "utf8"); + return $sjis; } return ''; @@ -28,16 +28,15 @@ sub match { sub checkdir { my $target = shift @_; - print STDERR "checking '$target'\n"; + print STDERR "# checking '$target'\n"; opendir(my $dir, $target) || return $target; - my @entries = sort readdir($dir); + my @entries = sort grep { !m/^(\.|\.\.)$/g } readdir($dir); closedir($dir); my @dirs; while (my $entry = shift @entries) { - next if ($entry =~ /^\.+$/); - if (match($entry)) { - print "'$target/$entry' contains non Shift_JIS character\n"; + if (my $convert = match($entry)) { + print "'$target/$entry' can be converted to '$convert'\n"; next; } if (-d "$target/$entry") {