source:
lab.git/misc/findnonsjis.pl
@
41617a0
Last change on this file since 41617a0 was bf1a1ea, checked in by mitty <mitty@…>, 12 years ago | |
---|---|
|
|
File size: 1.0 KB |
Rev | Line | |
---|---|---|
[cdcfe90] | 1 | #! /usr/bin/perl -w |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | use utf8; | |
6 | ||
[6f4fe5e] | 7 | use Encode; |
[cdcfe90] | 8 | |
9 | my $top = shift @ARGV || exit; | |
10 | if (! -d $top) { exit; } | |
11 | ||
[6f4fe5e] | 12 | my $utf8 = find_encoding("utf8"); |
13 | ||
[cdcfe90] | 14 | checkdir($top); |
15 | ||
16 | sub match { | |
17 | my $str = shift @_; | |
18 | ||
[6f4fe5e] | 19 | my $sjis = encode("cp932", $utf8->decode($str), Encode::FB_HTMLCREF); |
[cdcfe90] | 20 | if ($sjis =~ /&#\d{4,};/) { |
[bf1a1ea] | 21 | Encode::from_to($sjis, "cp932", "utf8"); |
22 | return $sjis; | |
[cdcfe90] | 23 | } |
24 | ||
25 | return ''; | |
26 | } | |
27 | ||
28 | sub checkdir { | |
29 | my $target = shift @_; | |
30 | ||
[dfae53c] | 31 | print STDERR "# checking '$target'\n"; |
[cdcfe90] | 32 | opendir(my $dir, $target) || return $target; |
[0e3de64] | 33 | my @entries = sort grep { !m/^(\.|\.\.)$/g } readdir($dir); |
[cdcfe90] | 34 | closedir($dir); |
35 | ||
36 | my @dirs; | |
37 | while (my $entry = shift @entries) { | |
[bf1a1ea] | 38 | if (my $convert = match($entry)) { |
39 | print "'$target/$entry' can be converted to '$convert'\n"; | |
[cdcfe90] | 40 | next; |
41 | } | |
42 | if (-d "$target/$entry") { | |
43 | push @dirs, $entry; | |
44 | next; | |
45 | } | |
46 | } | |
47 | ||
48 | while (my $entry = pop @dirs) { | |
49 | checkdir("$target/$entry"); | |
50 | } | |
51 | } |
Note: See TracBrowser
for help on using the repository browser.