source: lab.git/misc/pagetitle.pl @ 1f4b1b5

trunk
Last change on this file since 1f4b1b5 was 788f3de, checked in by mitty <mitty@…>, 12 years ago
  • this script reads url list from file and gets content, then outputs page title

git-svn-id: https://lab.mitty.jp/svn/lab/trunk@186 7d2118f6-f56c-43e7-95a2-4bb3031d96e7

  • Property mode set to 100755
File size: 590 bytes
Line 
1#! /usr/bin/perl -w
2
3use strict;
4use warnings;
5use encoding 'utf-8';
6
7use LWP::Simple;
8
9my $file = shift @ARGV || exit;
10my $list;
11
12open $list, "<$file" || exit;
13
14my ($url, $content);
15while ($url = <$list>) {
16    sleep 1;
17    chomp $url;
18    print STDERR "getting $url : ";
19    $content = get($url);
20    if (! $content) {
21        print STDERR "NG\n";
22        print "$url -> <NG>\n";
23        next;
24    }
25    print STDERR "OK\n";
26    if ($content =~ /<title>([^<]+)<\/title>/) {
27        my $title = $1;
28        print "$url -> $title\n";
29    }
30    else {
31        print "$url -> <?>\n";
32    }
33}
Note: See TracBrowser for help on using the repository browser.