1#!/usr/local/bin/perl -w
2#
3# tv_grab_na_icon
4#
5# This script goes to the zap2it web site and downloads icon links or images
6# to the share/icons directory.
7#
8# (C)2001 - Robert Eden, free to use under the GNU License.
9#
10#  Robert Eden - reden@cpan.org
11#
12#     See cvs logs entries for module history
13#
14
15=pod
16
17=head1 NAME
18
19tv_grab_na_icons - Grab channel icon images or links from zap2it.com
20
21=head1 SYNOPSIS
22
23tv_grab_na_icons [--debug] [--links] [--share dir]
24
25tv_grab_na_icons --help
26
27tv_grab_na_icons --version
28
29=head1 DESCRIPTIONS
30
31This script goes to the zap2it web site and downloads icon
32links or images to the share/icons directory.
33
34It was designed to allow icons to be added by tv_grab_na_dd,
35but there is no reason it can't be used for other purposes.
36
37To minimize the load on zap2it.com, downloaded icons are recommended.
38Links are available for those applications that need it.
39
40Also to reduce the load on zap2it.com, this script should be run
41sparingly. There is really no need to run it frequently, new networks
42or icons don't show up that often.  To emphasize that point, there is
43no --configure mode, the questions need to be answered each run.
44
45=head1 OPTIONS
46
47=over
48
49=item --links
50
51Store imge URLs in *.url files instead of downloading images.
52
53=item --share I<SHAREDIR>
54
55Icons are stored in I<SHAREDIR>/icons.  The share directory is set at install time,
56but there may be times when it needs to be specified. (for example: no write access to the default share
57directory)
58
59=item --debug
60
61Turns on debugging mode. The grabber will be more verbose, and saves some extra debugging data with the icons.
62
63=item --version
64
65Show the version of the grabber.
66
67=item --help
68
69Print a help message and exit.
70
71=back
72
73=head1 SEE ALSO
74
75L<xmltv>.
76
77=head1 AUTHOR
78
79Robert Eden
80
81=cut 
82
83use strict;
84use open qw(:utf8);
85use Getopt::Long;
86use Data::Dumper;
87use File::Basename;
88use WWW::Mechanize 1.02;
89use HTML::TreeBuilder;
90use XML::Twig 3.28;
91
92use XMLTV;
93use XMLTV::Ask;
94use XMLTV::Version '$Id: tv_grab_na_icons.in,v 1.17 2010/04/27 03:50:46 rmeden Exp $ ';
95use XMLTV::Usage <<END
96
97$0 - Grab channel icon images or links from zap2it.com
98
99tv_grab_na_icons [--debug] [--links] [--share dir]
100
101tv_grab_na_icons --help
102
103tv_grab_na_icons --version
104
105END
106;
107
108select STDERR; $|=1;
109select STDOUT; $|=1;
110
111my $opt_help=0;
112my $opt_debug=0;
113my $opt_links=0;
114my $SHARE_DIR = '/share';
115my $fileno=0;
116my $file=undef;
117
118GetOptions(
119	   'help'          => \$opt_help,
120	   'debug'         => \$opt_debug,
121	   'links'         => \$opt_links,
122	   'share=s'       => \$SHARE_DIR,
123	  )
124  or usage(0);
125
126usage(1) if $opt_help;
127
128die "ERROR:Share directory '$SHARE_DIR' not found\n" unless -d $SHARE_DIR;
129die "ERROR:Share directory '$SHARE_DIR' not writable\n" unless -w $SHARE_DIR;
130mkdir "$SHARE_DIR/icons" unless -d "$SHARE_DIR/icons";
131die "ERROR:directory '$SHARE_DIR/icons' not writable\n" unless -w "$SHARE_DIR/icons";
132#
133# create our agent
134#
135my $a = WWW::Mechanize->new( env_proxy => 1   );
136$a->agent(sprintf("%s/$XMLTV::VERSION",basename($0)||'xmltv'));
137
138print STDERR "Getting inital page\n" if $opt_debug;
139$a->get('http://www.zap2it.com/index');
140check_page($a);
141
142#
143# select zip
144#
145while (1)
146{
147    die "ERROR:Can't find zipcode form\n" unless find_form($a,"zipcode");
148
149    my $zip=ask("\nPostal Code:");
150    print STDERR "Submitting zip code $zip\n" if $opt_debug;
151    $a->field("zipcode",$zip);
152    $a->submit;
153    check_page($a);
154#
155# bug in zap2it.com, zip not being picked up from first page, try next page
156#
157    if (grep(/Enter ZIP for local listings/,$a->content)) {
158        die "ERROR:Can't find zipcode form\n" unless find_form($a,"zipcode");
159        print STDERR "Submitting zip code $zip (again)\n" if $opt_debug;
160        $a->field("zipcode",$zip);
161        $a->submit;
162    check_page($a);
163   }
164    last if grep(/No Provider Selected/,$a->content);
165    last if grep(/Choose Your Provider/,$a->content);
166    print "  Invalid Postal Code, try again\n";
167}
168# for some reason, we only get a little data without this header
169$a->add_header( 'Accept-Language' => 'en-us,en;q=0.5');
170
171#
172# select lineup
173#
174{
175    my %lineups=();
176    my $def_lineup=undef;
177    foreach $_ ($a->links) {
178        next unless ($_->url =~ /lineupId=(.+)/ );
179        my $name=$_->text;
180           $name =~ s/\xa0//g; # drop bad characters
181
182        print "Lineup $name\n" if $opt_debug;
183        $lineups{$name}=$_;
184        $def_lineup=$name unless defined $def_lineup;
185    }
186
187#print "Link: ", Dumper($_)  foreach $a->links();
188
189    die "ERROR:Can't find provider links\n" unless defined $def_lineup;
190
191    my $name=$def_lineup;
192    $name=ask_choice("\nLineup?",$name,sort keys %lineups);
193
194    printf STDERR "getting lineup %s\n",$lineups{$name}->url if $opt_debug;
195#print Dumper($lineups{$name});
196    $a->follow_link(url => $lineups{$name}->url);
197    check_page($a);
198} # select lineup
199
200#
201# select all channels
202#
203#    print "Display All Channels\n" if $opt_debug;
204#    $a->follow_link(text_regex => qr/ALL CHANNELS/i) || warn "*WARNING* Can't find all channels link, hope we got them all!\n";
205#    check_page($a);
206#
207#
208# Convert HTML to XML
209#
210# we can use TWIG to do this because we need to use utf8_mode to avoid a warning
211#
212my $tree = HTML::TreeBuilder->new; # empty tree
213   $tree->utf8_mode(1);
214   $tree->parse($a->content);
215
216my $xml = $tree->as_XML;
217
218#
219# Now parse the XML
220#
221my $image;
222my %icons;
223my $twig=XML::Twig->new(
224         twig_roots    => { html => 1},
225		 twig_handlers =>
226		      {
227		         td  => sub {
228                                  my $img_el;
229                                  if (defined  $_->att('class')
230                                            && $_->att('class') =~ /zc-st/ ) {
231                                      my $name=$_ -> parent_trimmed_text;
232                                      $name=~s/^([\.\d]+) //; # trim leading channel number
233
234                                      if ( $img_el=$_ -> first_descendant('img')) {
235                                          $icons{$name}=$img_el->att('src');
236                                          print "Got image $name->$icons{$name}\n" if $opt_debug;
237                                      }
238                                      else {
239                                          print "Got $name but no image\n" if $opt_debug;
240                                      }
241                                  }
242                                  $_->twig->purge;
243                                  return 0;
244		         }
245		      });
246$twig->parse_html($xml);
247
248#
249# check for problems
250#
251unless (keys %icons)
252{
253        open  FILE,">na_icon_error.html" || die "ERROR:Can't open na_icon_error.html\n";
254        print FILE $a->content;
255        close FILE;
256        die "ERROR:No icons were found.  Please check 'na_icon_error.html'\n";
257}
258
259#
260# print results
261#
262my $base=$a->base;
263foreach (sort keys %icons)
264{
265    $image=URI->new_abs($icons{$_},$base);
266    if ($opt_links)
267    {
268        $file="$SHARE_DIR/icons/$_.url";
269        open(FILE,">$file") || die "ERROR:Can't write to $file\n";
270            print FILE $image."\n";
271            close FILE;
272        printf "Stored %10s in %20s\n",$_,$file;
273    }
274    else
275    {
276        my $type=(fileparse($image,'\..*'))[2];
277        $file="$SHARE_DIR/icons/$_$type";
278        printf "Getting %10s as %20s: %s\n",$_,$file,$a->mirror($image,$file)->message;
279    }
280}
281
282exit;
283
284#print Dumper($a);
285#print "Link: ", Dumper($_)  foreach $a->links();
286#print "Form: ", $_->dump  foreach $a->forms();
287#print $a->current_form->dump;
288
289#
290# check status, write out html file
291#
292sub check_page {
293    my $res=shift || die "ERROR:No Mechanize specified\n";
294    $fileno++;
295    if ($opt_debug)
296    {
297        $file="na_icon_${fileno}";
298        open  FILE,">$file.html" || die "ERROR:Can't open $file.html\n";
299        print FILE $res->content;
300        close FILE;
301
302        open  FILE,">$file.txt" || die "ERROR:Can't open $file.txt\n";
303        print FILE Dumper($res);
304        close FILE;
305    }
306    die "ERROR:page error ",$res->status_line unless $res->success;
307} # check_page
308
309#
310# subroutine to search for form w/o knowing it's name
311#
312sub find_form
313{
314    my $mech=shift || die "ERROR:find_form: mechanize object not specified";
315    my $name=shift || die "ERROR:find_form: field name not specified";
316    my @forms=$mech->forms;
317    my $fn=0;
318    foreach (0..$#forms)
319    {
320        $fn=$_ if $forms[$_]->find_input($name);
321    }
322    $mech->form_number($fn+1) if $fn;
323    return $fn;
324} #find_form
325
326