#! /usr/bin/env perl -w

use strict;
use warnings;
use utf8;

use LWP::Simple;
binmode STDOUT => 'encoding(utf8)';

my $directory = shift @ARGV || "./";
my $nikkei_url = 'http://www.nikkei.com/news/editorial/';
my $nikkei     = 'http://www.nikkei.com';

my $regex = 'href="([^"]+)(DGXDZO\w+000)/';
my $javascript = '<script .*?</script>';

my $content = get($nikkei_url);
while ($content =~ /$regex/g) {
    my $article = "$nikkei$1$2/";
    my $file = "$directory/$2.html";
    
    if (-f "$file") { next; }
    
    system("wget", "-q", $article, '-O', "$file");
    sleep 1;
    
    if (-f "$file") {
        open my $html, "<", $file;
        local $/;
        my $body = <$html>;
        
        $body =~ s/$javascript//g;
        
        open $html, ">", $file;
        print $html $body;
    }
}
