* this script reads url list from file and gets content, then outputs page title
authormitty <mitty@7d2118f6-f56c-43e7-95a2-4bb3031d96e7>
Thu, 24 Jan 2013 05:18:07 +0000 (05:18 +0000)
committermitty <mitty@7d2118f6-f56c-43e7-95a2-4bb3031d96e7>
Thu, 24 Jan 2013 05:18:07 +0000 (05:18 +0000)
git-svn-id: https://lab.mitty.jp/svn/lab/trunk@186 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

misc/pagetitle.pl [new file with mode: 0755]

diff --git a/misc/pagetitle.pl b/misc/pagetitle.pl
new file mode 100755 (executable)
index 0000000..f3e790b
--- /dev/null
@@ -0,0 +1,33 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+use encoding 'utf-8';
+
+use LWP::Simple;
+
+my $file = shift @ARGV || exit;
+my $list;
+
+open $list, "<$file" || exit;
+
+my ($url, $content);
+while ($url = <$list>) {
+    sleep 1;
+    chomp $url;
+    print STDERR "getting $url : ";
+    $content = get($url);
+    if (! $content) {
+        print STDERR "NG\n";
+        print "$url -> <NG>\n";
+        next;
+    }
+    print STDERR "OK\n";
+    if ($content =~ /<title>([^<]+)<\/title>/) {
+        my $title = $1;
+        print "$url -> $title\n";
+    }
+    else {
+        print "$url -> <?>\n";
+    }
+}