source: lab/trunk/Dev/pukiwikiplus/getsource.pl @ 121

Last change on this file since 121 was 121, checked in by mitty, 13 years ago
  • accept -f option to save wiki text to file
File size: 921 bytes
Line 
1#! /usr/bin/perl
2
3use strict;
4use warnings;
5use utf8;
6
7use Web::Scraper;
8use URI;
9use Data::Validate::URI qw(is_uri);
10use Encode;
11
12if (@ARGV < 1) {
13    print "$0: [-f] url_to_pukiwikiplus_page\n";
14    exit;
15}
16
17my $file;
18if ($ARGV[0] eq '-f') {
19    $file = shift @ARGV;
20}
21
22my $url = $ARGV[0];
23die "$0: '$url' is not URI" unless is_uri($url);
24
25if ($url !~ /cmd=source/ ) {
26    $url =~ s#\?(.+)#\?cmd=source&page=$1#;
27    if ($file) {
28        $file = wikifile($1) . '.txt';
29    }
30}
31
32print STDERR "getting: '$url'\n";
33
34my $source = scraper {
35    process "pre#source", "source" => "TEXT";
36    result "source";
37};
38
39my $text = $source->scrape( URI->new($url) );
40
41if ($file) {
42    print STDERR "saving: '$file'\n";
43    open STDOUT, ">$file";
44}
45print encode('utf8', $text);
46
47
48sub wikifile {
49    my $pagename = shift @_;
50   
51    $pagename =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
52    return uc unpack("H*", $pagename);
53}
Note: See TracBrowser for help on using the repository browser.