trunk
Line | |
---|
1 | #! /usr/bin/perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | use warnings; |
---|
5 | use utf8; |
---|
6 | |
---|
7 | use Web::Scraper; |
---|
8 | use URI; |
---|
9 | use Data::Validate::URI qw(is_uri); |
---|
10 | use Encode; |
---|
11 | |
---|
12 | if (@ARGV < 1) { |
---|
13 | print "$0: url_to_pukiwikiplus_page\n"; |
---|
14 | exit; |
---|
15 | } |
---|
16 | |
---|
17 | my $url = $ARGV[0]; |
---|
18 | die "$0: '$url' is not URI" unless is_uri($url); |
---|
19 | |
---|
20 | if ($url !~ /cmd=source/ ) { |
---|
21 | $url =~ s#\?(.+)#\?cmd=source&page=$1#; |
---|
22 | } |
---|
23 | |
---|
24 | print STDERR "getting: '$url'\n"; |
---|
25 | |
---|
26 | my $source = scraper { |
---|
27 | process "pre#source", "source" => "TEXT"; |
---|
28 | result "source"; |
---|
29 | }; |
---|
30 | |
---|
31 | my $text = $source->scrape( URI->new($url) ); |
---|
32 | |
---|
33 | print encode('utf8', $text); |
---|
Note: See
TracBrowser
for help on using the repository browser.