#! /usr/bin/perl use strict; use warnings; use utf8; use Web::Scraper; use URI; use Data::Validate::URI qw(is_uri); use Encode; ## get wiki formatted source text from PukiWiki Plus! page # '-f' option to save file automatically if (@ARGV < 1) { print "$0: [-f] url_to_pukiwikiplus_page\n"; exit; } my $file; if ($ARGV[0] eq '-f') { $file = shift @ARGV; } my $url = $ARGV[0]; die "$0: '$url' is not URI" unless is_uri($url); if ($url !~ /cmd=source/ ) { # set url to page of source plugin page (source.inc.php) $url =~ s#\?(.+)#\?cmd=source&page=$1#; if ($file) { $file = wikifile($1) . '.txt'; } } print STDERR "getting: '$url'\n"; my $source = scraper { # scrape page with '
' tag process "pre#source", "source" => "TEXT"; result "source"; }; my $text = $source->scrape( URI->new($url) ); if ($file) { print STDERR "saving: '$file'\n"; open STDOUT, ">$file"; } print encode('utf8', $text); sub wikifile { # get wiki text filename from pagename my $pagename = shift @_; $pagename =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg; return uc unpack("H*", $pagename); }