1 #! /usr/bin/env perl -w
8 binmode STDOUT => 'encoding(utf8)';
10 my $directory = shift @ARGV || "./";
11 my $nikkei_url = 'http://www.nikkei.com/news/editorial/';
12 my $nikkei = 'http://www.nikkei.com';
14 my $regex = 'href="([^"]+)(DGXDZO\w+000)/';
15 my $javascript = '<script .*?</script>';
17 my $content = get($nikkei_url);
18 while ($content =~ /$regex/g) {
19 my $article = "$nikkei$1$2/";
20 my $file = "$directory/$2.html";
22 if (-f "$file") { next; }
24 system("wget", "-q", $article, '-O', "$file");
28 open my $html, "<", $file;
32 $body =~ s/$javascript//g;
34 open $html, ">", $file;