source: lab/trunk/misc/pagetitle.pl @ 186

Last change on this file since 186 was 186, checked in by mitty, 11 years ago
  • this script reads url list from file and gets content, then outputs page title
  • Property svn:executable set to *
File size: 590 bytes
Line 
1#! /usr/bin/perl -w
2
3use strict;
4use warnings;
5use encoding 'utf-8';
6
7use LWP::Simple;
8
9my $file = shift @ARGV || exit;
10my $list;
11
12open $list, "<$file" || exit;
13
14my ($url, $content);
15while ($url = <$list>) {
16    sleep 1;
17    chomp $url;
18    print STDERR "getting $url : ";
19    $content = get($url);
20    if (! $content) {
21        print STDERR "NG\n";
22        print "$url -> <NG>\n";
23        next;
24    }
25    print STDERR "OK\n";
26    if ($content =~ /<title>([^<]+)<\/title>/) {
27        my $title = $1;
28        print "$url -> $title\n";
29    }
30    else {
31        print "$url -> <?>\n";
32    }
33}
Note: See TracBrowser for help on using the repository browser.