From 151083b801351c91b78545e521da845c1b139530 Mon Sep 17 00:00:00 2001 From: mitty Date: Mon, 22 Apr 2013 06:50:49 +0000 Subject: [PATCH] * crawler for Nikkei editorial articles git-svn-id: https://lab.mitty.jp/svn/lab/trunk@213 7d2118f6-f56c-43e7-95a2-4bb3031d96e7 --- misc/nikkei.pl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 misc/nikkei.pl diff --git a/misc/nikkei.pl b/misc/nikkei.pl new file mode 100644 index 0000000..1df153d --- /dev/null +++ b/misc/nikkei.pl @@ -0,0 +1,37 @@ +#! /usr/bin/env perl -w + +use strict; +use warnings; +use utf8; + +use LWP::Simple; +binmode STDOUT => 'encoding(utf8)'; + +my $directory = shift @ARGV || "./"; +my $nikkei_url = 'http://www.nikkei.com/news/editorial/'; +my $nikkei = 'http://www.nikkei.com'; + +my $regex = 'href="([^"]+)(DGXDZO\w+000)/'; +my $javascript = '