From: mitty Date: Mon, 22 Apr 2013 06:50:49 +0000 (+0000) Subject: * crawler for Nikkei editorial articles X-Git-Url: http://lab.mitty.jp/git/?a=commitdiff_plain;h=151083b801351c91b78545e521da845c1b139530;p=lab.git * crawler for Nikkei editorial articles git-svn-id: https://lab.mitty.jp/svn/lab/trunk@213 7d2118f6-f56c-43e7-95a2-4bb3031d96e7 --- diff --git a/misc/nikkei.pl b/misc/nikkei.pl new file mode 100644 index 0000000..1df153d --- /dev/null +++ b/misc/nikkei.pl @@ -0,0 +1,37 @@ +#! /usr/bin/env perl -w + +use strict; +use warnings; +use utf8; + +use LWP::Simple; +binmode STDOUT => 'encoding(utf8)'; + +my $directory = shift @ARGV || "./"; +my $nikkei_url = 'http://www.nikkei.com/news/editorial/'; +my $nikkei = 'http://www.nikkei.com'; + +my $regex = 'href="([^"]+)(DGXDZO\w+000)/'; +my $javascript = '