Context Navigation

getsource.pl @ a1439aa

trunk

Last change on this file since a1439aa was a1439aa, checked in by mitty <mitty@…>, 14 years ago

git-svn-id: https://lab.mitty.jp/svn/lab/trunk@121 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

File size: 921 bytes

Line
1	#! /usr/bin/perl
2
3	use strict;
4	use warnings;
5	use utf8;
6
7	use Web::Scraper;
8	use URI;
9	use Data::Validate::URI qw(is_uri);
10	use Encode;
11
12	if (@ARGV < 1) {
13	print "$0: [-f] url_to_pukiwikiplus_page\n";
14	exit;
15	}
16
17	my $file;
18	if ($ARGV[0] eq '-f') {
19	$file = shift @ARGV;
20	}
21
22	my $url = $ARGV[0];
23	die "$0: '$url' is not URI" unless is_uri($url);
24
25	if ($url !~ /cmd=source/ ) {
26	$url =~ s#\?(.+)#\?cmd=source&page=$1#;
27	if ($file) {
28	$file = wikifile($1) . '.txt';
29	}
30	}
31
32	print STDERR "getting: '$url'\n";
33
34	my $source = scraper {
35	process "pre#source", "source" => "TEXT";
36	result "source";
37	};
38
39	my $text = $source->scrape( URI->new($url) );
40
41	if ($file) {
42	print STDERR "saving: '$file'\n";
43	open STDOUT, ">$file";
44	}
45	print encode('utf8', $text);
46
47
48	sub wikifile {
49	my $pagename = shift @_;
50
51	$pagename =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
52	return uc unpack("H*", $pagename);
53	}

Note: See TracBrowser for help on using the repository browser.