* output string contains numeric character references
authormitty <mitty@7d2118f6-f56c-43e7-95a2-4bb3031d96e7>
Fri, 25 Jan 2013 16:38:41 +0000 (16:38 +0000)
committermitty <mitty@7d2118f6-f56c-43e7-95a2-4bb3031d96e7>
Fri, 25 Jan 2013 16:38:41 +0000 (16:38 +0000)
git-svn-id: https://lab.mitty.jp/svn/lab/trunk@197 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

misc/findnonsjis.pl

index 1ae7aef..71e9a0a 100755 (executable)
@@ -18,7 +18,8 @@ sub match {
     
     my $sjis = encode("cp932", $utf8->decode($str), Encode::FB_HTMLCREF);
     if ($sjis =~ /&#\d{4,};/) {
-        return 1;
+        Encode::from_to($sjis, "cp932", "utf8");
+        return $sjis;
     }
     
     return '';
@@ -34,8 +35,8 @@ sub checkdir {
     
     my @dirs;
     while (my $entry = shift @entries) {
-        if (match($entry)) {
-            print "'$target/$entry' contains non Shift_JIS character\n";
+        if (my $convert = match($entry)) {
+            print "'$target/$entry' can be converted to '$convert'\n";
             next;
         }
         if (-d "$target/$entry") {