1#!/usr/local/bin/perl -w 2# 3# tv_grab_na_icon 4# 5# This script goes to the zap2it web site and downloads icon links or images 6# to the share/icons directory. 7# 8# (C)2001 - Robert Eden, free to use under the GNU License. 9# 10# Robert Eden - reden@cpan.org 11# 12# See cvs logs entries for module history 13# 14 15=pod 16 17=head1 NAME 18 19tv_grab_na_icons - Grab channel icon images or links from zap2it.com 20 21=head1 SYNOPSIS 22 23tv_grab_na_icons [--debug] [--links] [--share dir] 24 25tv_grab_na_icons --help 26 27tv_grab_na_icons --version 28 29=head1 DESCRIPTIONS 30 31This script goes to the zap2it web site and downloads icon 32links or images to the share/icons directory. 33 34It was designed to allow icons to be added by tv_grab_na_dd, 35but there is no reason it can't be used for other purposes. 36 37To minimize the load on zap2it.com, downloaded icons are recommended. 38Links are available for those applications that need it. 39 40Also to reduce the load on zap2it.com, this script should be run 41sparingly. There is really no need to run it frequently, new networks 42or icons don't show up that often. To emphasize that point, there is 43no --configure mode, the questions need to be answered each run. 44 45=head1 OPTIONS 46 47=over 48 49=item --links 50 51Store imge URLs in *.url files instead of downloading images. 52 53=item --share I<SHAREDIR> 54 55Icons are stored in I<SHAREDIR>/icons. The share directory is set at install time, 56but there may be times when it needs to be specified. (for example: no write access to the default share 57directory) 58 59=item --debug 60 61Turns on debugging mode. The grabber will be more verbose, and saves some extra debugging data with the icons. 62 63=item --version 64 65Show the version of the grabber. 66 67=item --help 68 69Print a help message and exit. 70 71=back 72 73=head1 SEE ALSO 74 75L<xmltv>. 76 77=head1 AUTHOR 78 79Robert Eden 80 81=cut 82 83use strict; 84use open qw(:utf8); 85use Getopt::Long; 86use Data::Dumper; 87use File::Basename; 88use WWW::Mechanize 1.02; 89use HTML::TreeBuilder; 90use XML::Twig 3.28; 91 92use XMLTV; 93use XMLTV::Ask; 94use XMLTV::Version '$Id: tv_grab_na_icons.in,v 1.17 2010/04/27 03:50:46 rmeden Exp $ '; 95use XMLTV::Usage <<END 96 97$0 - Grab channel icon images or links from zap2it.com 98 99tv_grab_na_icons [--debug] [--links] [--share dir] 100 101tv_grab_na_icons --help 102 103tv_grab_na_icons --version 104 105END 106; 107 108select STDERR; $|=1; 109select STDOUT; $|=1; 110 111my $opt_help=0; 112my $opt_debug=0; 113my $opt_links=0; 114my $SHARE_DIR = '/share'; 115my $fileno=0; 116my $file=undef; 117 118GetOptions( 119 'help' => \$opt_help, 120 'debug' => \$opt_debug, 121 'links' => \$opt_links, 122 'share=s' => \$SHARE_DIR, 123 ) 124 or usage(0); 125 126usage(1) if $opt_help; 127 128die "ERROR:Share directory '$SHARE_DIR' not found\n" unless -d $SHARE_DIR; 129die "ERROR:Share directory '$SHARE_DIR' not writable\n" unless -w $SHARE_DIR; 130mkdir "$SHARE_DIR/icons" unless -d "$SHARE_DIR/icons"; 131die "ERROR:directory '$SHARE_DIR/icons' not writable\n" unless -w "$SHARE_DIR/icons"; 132# 133# create our agent 134# 135my $a = WWW::Mechanize->new( env_proxy => 1 ); 136$a->agent(sprintf("%s/$XMLTV::VERSION",basename($0)||'xmltv')); 137 138print STDERR "Getting inital page\n" if $opt_debug; 139$a->get('http://www.zap2it.com/index'); 140check_page($a); 141 142# 143# select zip 144# 145while (1) 146{ 147 die "ERROR:Can't find zipcode form\n" unless find_form($a,"zipcode"); 148 149 my $zip=ask("\nPostal Code:"); 150 print STDERR "Submitting zip code $zip\n" if $opt_debug; 151 $a->field("zipcode",$zip); 152 $a->submit; 153 check_page($a); 154# 155# bug in zap2it.com, zip not being picked up from first page, try next page 156# 157 if (grep(/Enter ZIP for local listings/,$a->content)) { 158 die "ERROR:Can't find zipcode form\n" unless find_form($a,"zipcode"); 159 print STDERR "Submitting zip code $zip (again)\n" if $opt_debug; 160 $a->field("zipcode",$zip); 161 $a->submit; 162 check_page($a); 163 } 164 last if grep(/No Provider Selected/,$a->content); 165 last if grep(/Choose Your Provider/,$a->content); 166 print " Invalid Postal Code, try again\n"; 167} 168# for some reason, we only get a little data without this header 169$a->add_header( 'Accept-Language' => 'en-us,en;q=0.5'); 170 171# 172# select lineup 173# 174{ 175 my %lineups=(); 176 my $def_lineup=undef; 177 foreach $_ ($a->links) { 178 next unless ($_->url =~ /lineupId=(.+)/ ); 179 my $name=$_->text; 180 $name =~ s/\xa0//g; # drop bad characters 181 182 print "Lineup $name\n" if $opt_debug; 183 $lineups{$name}=$_; 184 $def_lineup=$name unless defined $def_lineup; 185 } 186 187#print "Link: ", Dumper($_) foreach $a->links(); 188 189 die "ERROR:Can't find provider links\n" unless defined $def_lineup; 190 191 my $name=$def_lineup; 192 $name=ask_choice("\nLineup?",$name,sort keys %lineups); 193 194 printf STDERR "getting lineup %s\n",$lineups{$name}->url if $opt_debug; 195#print Dumper($lineups{$name}); 196 $a->follow_link(url => $lineups{$name}->url); 197 check_page($a); 198} # select lineup 199 200# 201# select all channels 202# 203# print "Display All Channels\n" if $opt_debug; 204# $a->follow_link(text_regex => qr/ALL CHANNELS/i) || warn "*WARNING* Can't find all channels link, hope we got them all!\n"; 205# check_page($a); 206# 207# 208# Convert HTML to XML 209# 210# we can use TWIG to do this because we need to use utf8_mode to avoid a warning 211# 212my $tree = HTML::TreeBuilder->new; # empty tree 213 $tree->utf8_mode(1); 214 $tree->parse($a->content); 215 216my $xml = $tree->as_XML; 217 218# 219# Now parse the XML 220# 221my $image; 222my %icons; 223my $twig=XML::Twig->new( 224 twig_roots => { html => 1}, 225 twig_handlers => 226 { 227 td => sub { 228 my $img_el; 229 if (defined $_->att('class') 230 && $_->att('class') =~ /zc-st/ ) { 231 my $name=$_ -> parent_trimmed_text; 232 $name=~s/^([\.\d]+) //; # trim leading channel number 233 234 if ( $img_el=$_ -> first_descendant('img')) { 235 $icons{$name}=$img_el->att('src'); 236 print "Got image $name->$icons{$name}\n" if $opt_debug; 237 } 238 else { 239 print "Got $name but no image\n" if $opt_debug; 240 } 241 } 242 $_->twig->purge; 243 return 0; 244 } 245 }); 246$twig->parse_html($xml); 247 248# 249# check for problems 250# 251unless (keys %icons) 252{ 253 open FILE,">na_icon_error.html" || die "ERROR:Can't open na_icon_error.html\n"; 254 print FILE $a->content; 255 close FILE; 256 die "ERROR:No icons were found. Please check 'na_icon_error.html'\n"; 257} 258 259# 260# print results 261# 262my $base=$a->base; 263foreach (sort keys %icons) 264{ 265 $image=URI->new_abs($icons{$_},$base); 266 if ($opt_links) 267 { 268 $file="$SHARE_DIR/icons/$_.url"; 269 open(FILE,">$file") || die "ERROR:Can't write to $file\n"; 270 print FILE $image."\n"; 271 close FILE; 272 printf "Stored %10s in %20s\n",$_,$file; 273 } 274 else 275 { 276 my $type=(fileparse($image,'\..*'))[2]; 277 $file="$SHARE_DIR/icons/$_$type"; 278 printf "Getting %10s as %20s: %s\n",$_,$file,$a->mirror($image,$file)->message; 279 } 280} 281 282exit; 283 284#print Dumper($a); 285#print "Link: ", Dumper($_) foreach $a->links(); 286#print "Form: ", $_->dump foreach $a->forms(); 287#print $a->current_form->dump; 288 289# 290# check status, write out html file 291# 292sub check_page { 293 my $res=shift || die "ERROR:No Mechanize specified\n"; 294 $fileno++; 295 if ($opt_debug) 296 { 297 $file="na_icon_${fileno}"; 298 open FILE,">$file.html" || die "ERROR:Can't open $file.html\n"; 299 print FILE $res->content; 300 close FILE; 301 302 open FILE,">$file.txt" || die "ERROR:Can't open $file.txt\n"; 303 print FILE Dumper($res); 304 close FILE; 305 } 306 die "ERROR:page error ",$res->status_line unless $res->success; 307} # check_page 308 309# 310# subroutine to search for form w/o knowing it's name 311# 312sub find_form 313{ 314 my $mech=shift || die "ERROR:find_form: mechanize object not specified"; 315 my $name=shift || die "ERROR:find_form: field name not specified"; 316 my @forms=$mech->forms; 317 my $fn=0; 318 foreach (0..$#forms) 319 { 320 $fn=$_ if $forms[$_]->find_input($name); 321 } 322 $mech->form_number($fn+1) if $fn; 323 return $fn; 324} #find_form 325 326