trunk
| Rev | Line | |
|---|
| [74f778c] | 1 | #! /usr/bin/perl |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use warnings; |
|---|
| 5 | use utf8; |
|---|
| 6 | |
|---|
| 7 | use Web::Scraper; |
|---|
| 8 | use URI; |
|---|
| 9 | use Data::Validate::URI qw(is_uri); |
|---|
| 10 | use Encode; |
|---|
| 11 | |
|---|
| 12 | if (@ARGV < 1) { |
|---|
| 13 | print "$0: url_to_pukiwikiplus_page\n"; |
|---|
| 14 | exit; |
|---|
| 15 | } |
|---|
| 16 | |
|---|
| 17 | my $url = $ARGV[0]; |
|---|
| 18 | die "$0: '$url' is not URI" unless is_uri($url); |
|---|
| 19 | |
|---|
| 20 | if ($url !~ /cmd=source/ ) { |
|---|
| 21 | $url =~ s#\?(.+)#\?cmd=source&page=$1#; |
|---|
| 22 | } |
|---|
| 23 | |
|---|
| 24 | print STDERR "getting: '$url'\n"; |
|---|
| 25 | |
|---|
| 26 | my $source = scraper { |
|---|
| 27 | process "pre#source", "source" => "TEXT"; |
|---|
| 28 | result "source"; |
|---|
| 29 | }; |
|---|
| 30 | |
|---|
| 31 | my $text = $source->scrape( URI->new($url) ); |
|---|
| 32 | |
|---|
| 33 | print encode('utf8', $text); |
|---|
Note: See
TracBrowser
for help on using the repository browser.