projects
/
lab.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
* crawler for Nikkei editorial articles
[lab.git]
/
misc
/
findsimplified.pl
diff --git
a/misc/findsimplified.pl
b/misc/findsimplified.pl
index
024e4cc
..
bb80414
100755
(executable)
--- a/
misc/findsimplified.pl
+++ b/
misc/findsimplified.pl
@@
-19,9
+19,12
@@
sub match {
my $str = shift @_;
$str = $utf8->decode($str);
my $str = shift @_;
$str = $utf8->decode($str);
- my $jtext = $ck->conv_c2j($str);
- if ($str ne $jtext) {
- return $utf8->encode($jtext);
+ my $sjis = encode("cp932", $str, Encode::FB_HTMLCREF);
+ if ($sjis =~ /&#\d{4,};/) {
+ my $jtext = $ck->conv_c2j($str);
+ if ($str ne $jtext) {
+ return $utf8->encode($jtext);
+ }
}
return '';
}
return '';