trunk
Rev | Line | |
---|
[74f778c] | 1 | #! /usr/bin/perl |
---|
| 2 | |
---|
| 3 | use strict; |
---|
| 4 | use warnings; |
---|
| 5 | use utf8; |
---|
| 6 | |
---|
| 7 | use Web::Scraper; |
---|
| 8 | use URI; |
---|
| 9 | use Data::Validate::URI qw(is_uri); |
---|
| 10 | use Encode; |
---|
| 11 | |
---|
| 12 | if (@ARGV < 1) { |
---|
| 13 | print "$0: url_to_pukiwikiplus_page\n"; |
---|
| 14 | exit; |
---|
| 15 | } |
---|
| 16 | |
---|
| 17 | my $url = $ARGV[0]; |
---|
| 18 | die "$0: '$url' is not URI" unless is_uri($url); |
---|
| 19 | |
---|
| 20 | if ($url !~ /cmd=source/ ) { |
---|
| 21 | $url =~ s#\?(.+)#\?cmd=source&page=$1#; |
---|
| 22 | } |
---|
| 23 | |
---|
| 24 | print STDERR "getting: '$url'\n"; |
---|
| 25 | |
---|
| 26 | my $source = scraper { |
---|
| 27 | process "pre#source", "source" => "TEXT"; |
---|
| 28 | result "source"; |
---|
| 29 | }; |
---|
| 30 | |
---|
| 31 | my $text = $source->scrape( URI->new($url) ); |
---|
| 32 | |
---|
| 33 | print encode('utf8', $text); |
---|
Note: See
TracBrowser
for help on using the repository browser.