source: lab.git/Dev/pukiwikiplus/getsource.pl @ dff3e87

trunk
Last change on this file since dff3e87 was e3948e3, checked in by mitty <mitty@…>, 14 years ago
  • add comments

git-svn-id: https://lab.mitty.jp/svn/lab/trunk@122 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

  • Property mode set to 100644
File size: 1.1 KB
RevLine 
[74f778c]1#! /usr/bin/perl
2
3use strict;
4use warnings;
5use utf8;
6
7use Web::Scraper;
8use URI;
9use Data::Validate::URI qw(is_uri);
10use Encode;
11
[e3948e3]12## get wiki formatted source text from PukiWiki Plus! page
13# '-f' option to save file automatically
14
[74f778c]15if (@ARGV < 1) {
[a1439aa]16    print "$0: [-f] url_to_pukiwikiplus_page\n";
[74f778c]17    exit;
18}
19
[a1439aa]20my $file;
21if ($ARGV[0] eq '-f') {
22    $file = shift @ARGV;
23}
24
[74f778c]25my $url = $ARGV[0];
26die "$0: '$url' is not URI" unless is_uri($url);
27
28if ($url !~ /cmd=source/ ) {
[e3948e3]29    # set url to page of source plugin page (source.inc.php)
[74f778c]30    $url =~ s#\?(.+)#\?cmd=source&page=$1#;
[a1439aa]31    if ($file) {
32        $file = wikifile($1) . '.txt';
33    }
[74f778c]34}
35
36print STDERR "getting: '$url'\n";
37
38my $source = scraper {
[e3948e3]39    # scrape page with '<pre id="source">' tag
[74f778c]40    process "pre#source", "source" => "TEXT";
41    result "source";
42};
43
44my $text = $source->scrape( URI->new($url) );
45
[a1439aa]46if ($file) {
47    print STDERR "saving: '$file'\n";
48    open STDOUT, ">$file";
49}
[74f778c]50print encode('utf8', $text);
[a1439aa]51
52
53sub wikifile {
[e3948e3]54    # get wiki text filename from pagename
[a1439aa]55    my $pagename = shift @_;
56   
57    $pagename =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
58    return uc unpack("H*", $pagename);
59}
Note: See TracBrowser for help on using the repository browser.