From: mitty Date: Fri, 20 May 2011 14:44:30 +0000 (+0000) Subject: * script for Pukiwiki Plus! X-Git-Url: http://lab.mitty.jp/git/?a=commitdiff_plain;h=74f778c0eeed802c12174e0e6cff4851670e050d;p=lab.git * script for Pukiwiki Plus! * getsource.pl -> getting pukiwiki formatted source from Pukiwiki Plus! pages git-svn-id: https://lab.mitty.jp/svn/lab/trunk@120 7d2118f6-f56c-43e7-95a2-4bb3031d96e7 --- diff --git a/Dev/pukiwikiplus/getsource.pl b/Dev/pukiwikiplus/getsource.pl new file mode 100644 index 0000000..905ace9 --- /dev/null +++ b/Dev/pukiwikiplus/getsource.pl @@ -0,0 +1,33 @@ +#! /usr/bin/perl + +use strict; +use warnings; +use utf8; + +use Web::Scraper; +use URI; +use Data::Validate::URI qw(is_uri); +use Encode; + +if (@ARGV < 1) { + print "$0: url_to_pukiwikiplus_page\n"; + exit; +} + +my $url = $ARGV[0]; +die "$0: '$url' is not URI" unless is_uri($url); + +if ($url !~ /cmd=source/ ) { + $url =~ s#\?(.+)#\?cmd=source&page=$1#; +} + +print STDERR "getting: '$url'\n"; + +my $source = scraper { + process "pre#source", "source" => "TEXT"; + result "source"; +}; + +my $text = $source->scrape( URI->new($url) ); + +print encode('utf8', $text);