* getsource.pl -> getting pukiwiki formatted source from Pukiwiki Plus! pages
git-svn-id: https://lab.mitty.jp/svn/lab/trunk@120
7d2118f6-f56c-43e7-95a2-
4bb3031d96e7
--- /dev/null
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+use utf8;
+
+use Web::Scraper;
+use URI;
+use Data::Validate::URI qw(is_uri);
+use Encode;
+
+if (@ARGV < 1) {
+ print "$0: url_to_pukiwikiplus_page\n";
+ exit;
+}
+
+my $url = $ARGV[0];
+die "$0: '$url' is not URI" unless is_uri($url);
+
+if ($url !~ /cmd=source/ ) {
+ $url =~ s#\?(.+)#\?cmd=source&page=$1#;
+}
+
+print STDERR "getting: '$url'\n";
+
+my $source = scraper {
+ process "pre#source", "source" => "TEXT";
+ result "source";
+};
+
+my $text = $source->scrape( URI->new($url) );
+
+print encode('utf8', $text);