source: lab.git/Dev/pukiwikiplus/getsource.pl @ 072e48b

trunk
Last change on this file since 072e48b was e3948e3, checked in by mitty <mitty@…>, 14 years ago
  • add comments

git-svn-id: https://lab.mitty.jp/svn/lab/trunk@122 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

  • Property mode set to 100644
File size: 1.1 KB
Line 
1#! /usr/bin/perl
2
3use strict;
4use warnings;
5use utf8;
6
7use Web::Scraper;
8use URI;
9use Data::Validate::URI qw(is_uri);
10use Encode;
11
12## get wiki formatted source text from PukiWiki Plus! page
13# '-f' option to save file automatically
14
15if (@ARGV < 1) {
16    print "$0: [-f] url_to_pukiwikiplus_page\n";
17    exit;
18}
19
20my $file;
21if ($ARGV[0] eq '-f') {
22    $file = shift @ARGV;
23}
24
25my $url = $ARGV[0];
26die "$0: '$url' is not URI" unless is_uri($url);
27
28if ($url !~ /cmd=source/ ) {
29    # set url to page of source plugin page (source.inc.php)
30    $url =~ s#\?(.+)#\?cmd=source&page=$1#;
31    if ($file) {
32        $file = wikifile($1) . '.txt';
33    }
34}
35
36print STDERR "getting: '$url'\n";
37
38my $source = scraper {
39    # scrape page with '<pre id="source">' tag
40    process "pre#source", "source" => "TEXT";
41    result "source";
42};
43
44my $text = $source->scrape( URI->new($url) );
45
46if ($file) {
47    print STDERR "saving: '$file'\n";
48    open STDOUT, ">$file";
49}
50print encode('utf8', $text);
51
52
53sub wikifile {
54    # get wiki text filename from pagename
55    my $pagename = shift @_;
56   
57    $pagename =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
58    return uc unpack("H*", $pagename);
59}
Note: See TracBrowser for help on using the repository browser.