* script for Pukiwiki Plus!
authormitty <mitty@7d2118f6-f56c-43e7-95a2-4bb3031d96e7>
Fri, 20 May 2011 14:44:30 +0000 (14:44 +0000)
committermitty <mitty@7d2118f6-f56c-43e7-95a2-4bb3031d96e7>
Fri, 20 May 2011 14:44:30 +0000 (14:44 +0000)
 * getsource.pl -> getting pukiwiki formatted source from Pukiwiki Plus! pages

git-svn-id: https://lab.mitty.jp/svn/lab/trunk@120 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

Dev/pukiwikiplus/getsource.pl [new file with mode: 0644]

diff --git a/Dev/pukiwikiplus/getsource.pl b/Dev/pukiwikiplus/getsource.pl
new file mode 100644 (file)
index 0000000..905ace9
--- /dev/null
@@ -0,0 +1,33 @@
+#! /usr/bin/perl
+
+use strict;
+use warnings;
+use utf8;
+
+use Web::Scraper;
+use URI;
+use Data::Validate::URI qw(is_uri);
+use Encode;
+
+if (@ARGV < 1) {
+    print "$0: url_to_pukiwikiplus_page\n";
+    exit;
+}
+
+my $url = $ARGV[0];
+die "$0: '$url' is not URI" unless is_uri($url);
+
+if ($url !~ /cmd=source/ ) {
+    $url =~ s#\?(.+)#\?cmd=source&page=$1#;
+}
+
+print STDERR "getting: '$url'\n";
+
+my $source = scraper {
+    process "pre#source", "source" => "TEXT";
+    result "source";
+};
+
+my $text = $source->scrape( URI->new($url) );
+
+print encode('utf8', $text);