* this script reads url list from file and gets content, then outputs page title
[lab.git] / misc / pagetitle.pl
diff --git a/misc/pagetitle.pl b/misc/pagetitle.pl
new file mode 100755 (executable)
index 0000000..f3e790b
--- /dev/null
@@ -0,0 +1,33 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+use encoding 'utf-8';
+
+use LWP::Simple;
+
+my $file = shift @ARGV || exit;
+my $list;
+
+open $list, "<$file" || exit;
+
+my ($url, $content);
+while ($url = <$list>) {
+    sleep 1;
+    chomp $url;
+    print STDERR "getting $url : ";
+    $content = get($url);
+    if (! $content) {
+        print STDERR "NG\n";
+        print "$url -> <NG>\n";
+        next;
+    }
+    print STDERR "OK\n";
+    if ($content =~ /<title>([^<]+)<\/title>/) {
+        my $title = $1;
+        print "$url -> $title\n";
+    }
+    else {
+        print "$url -> <?>\n";
+    }
+}