#! /usr/bin/perl use strict; use warnings; use utf8; use Web::Scraper; use URI; use Data::Validate::URI qw(is_uri); use Encode; ## get wiki formatted source text from PukiWiki Plus! page # '-f' option to save file automatically if (@ARGV < 1) { print "$0: [-f] url_to_pukiwikiplus_page\n"; exit; } my $file; if ($ARGV[0] eq '-f') { $file = shift @ARGV; } my $url = $ARGV[0]; die "$0: '$url' is not URI" unless is_uri($url); if ($url !~ /cmd=source/ ) { # set url to page of source plugin page (source.inc.php) $url =~ s#\?(.+)#\?cmd=source&page=$1#; if ($file) { $file = wikifile($1) . '.txt'; } } print STDERR "getting: '$url'\n"; my $source = scraper { # scrape page with '
' tag
    process "pre#source", "source" => "TEXT";
    result "source";
};

my $text = $source->scrape( URI->new($url) );

if ($file) {
    print STDERR "saving: '$file'\n";
    open STDOUT, ">$file";
}
print encode('utf8', $text);


sub wikifile {
    # get wiki text filename from pagename
    my $pagename = shift @_;
    
    $pagename =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
    return uc unpack("H*", $pagename);
}