From: mitty Date: Thu, 24 Jan 2013 05:18:07 +0000 (+0000) Subject: * this script reads url list from file and gets content, then outputs page title X-Git-Url: http://lab.mitty.jp/git/?a=commitdiff_plain;h=788f3dee979e65cd4aa61e6552cd1b45cf92a01f;p=lab.git * this script reads url list from file and gets content, then outputs page title git-svn-id: https://lab.mitty.jp/svn/lab/trunk@186 7d2118f6-f56c-43e7-95a2-4bb3031d96e7 --- diff --git a/misc/pagetitle.pl b/misc/pagetitle.pl new file mode 100755 index 0000000..f3e790b --- /dev/null +++ b/misc/pagetitle.pl @@ -0,0 +1,33 @@ +#! /usr/bin/perl -w + +use strict; +use warnings; +use encoding 'utf-8'; + +use LWP::Simple; + +my $file = shift @ARGV || exit; +my $list; + +open $list, "<$file" || exit; + +my ($url, $content); +while ($url = <$list>) { + sleep 1; + chomp $url; + print STDERR "getting $url : "; + $content = get($url); + if (! $content) { + print STDERR "NG\n"; + print "$url -> \n"; + next; + } + print STDERR "OK\n"; + if ($content =~ /([^<]+)<\/title>/) { + my $title = $1; + print "$url -> $title\n"; + } + else { + print "$url -> <?>\n"; + } +}