From 788f3dee979e65cd4aa61e6552cd1b45cf92a01f Mon Sep 17 00:00:00 2001 From: mitty Date: Thu, 24 Jan 2013 05:18:07 +0000 Subject: [PATCH] * this script reads url list from file and gets content, then outputs page title git-svn-id: https://lab.mitty.jp/svn/lab/trunk@186 7d2118f6-f56c-43e7-95a2-4bb3031d96e7 --- misc/pagetitle.pl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 misc/pagetitle.pl diff --git a/misc/pagetitle.pl b/misc/pagetitle.pl new file mode 100755 index 0000000..f3e790b --- /dev/null +++ b/misc/pagetitle.pl @@ -0,0 +1,33 @@ +#! /usr/bin/perl -w + +use strict; +use warnings; +use encoding 'utf-8'; + +use LWP::Simple; + +my $file = shift @ARGV || exit; +my $list; + +open $list, "<$file" || exit; + +my ($url, $content); +while ($url = <$list>) { + sleep 1; + chomp $url; + print STDERR "getting $url : "; + $content = get($url); + if (! $content) { + print STDERR "NG\n"; + print "$url -> \n"; + next; + } + print STDERR "OK\n"; + if ($content =~ /([^<]+)<\/title>/) { + my $title = $1; + print "$url -> $title\n"; + } + else { + print "$url -> <?>\n"; + } +} -- 1.7.9.5