1#!/usr/local/bin/perl -w
2
3=pod
4
5=head1 NAME
6
7tv_grab_il - Grab TV listings for Israel.
8
9=head1 SYNOPSIS
10
11tv_grab_il --help
12
13tv_grab_il --version
14
15tv_grab_il --capabilities
16
17tv_grab_il --description
18
19
20tv_grab_il [--config-file FILE]
21           [--days N] [--offset N] [--slow]
22           [--output FILE] [--quiet] [--debug]
23
24tv_grab_il --configure [--config-file FILE]
25
26tv_grab_il --configure-api [--stage NAME]
27           [--config-file FILE]
28           [--output FILE]
29
30tv_grab_il --list-channels [--config-file FILE]
31           [--output FILE] [--quiet] [--debug]
32
33=head1 DESCRIPTION
34
35Output TV listings in XMLTV format for many channels available in Israel.
36The data comes from tv-guide.walla.co.il.
37
38First you must run B<tv_grab_il --configure> to choose which channels
39you want to receive.
40
41Then running B<tv_grab_il> with no arguments will get a listings in XML
42format for the channels you chose for available days including today.
43
44=head1 OPTIONS
45
46B<--configure> Prompt for which channels to download and write the
47configuration file.
48
49B<--config-file FILE> Set the name of the configuration file, the
50default is B<~/.xmltv/tv_grab_il.conf>.  This is the file written by
51B<--configure> and read when grabbing.
52
53B<--output FILE> When grabbing, write output to FILE rather than
54standard output.
55
56B<--days N> When grabbing, grab N days rather than all available days.
57
58B<--offset N> Start grabbing at today + N days.  N may be negative.
59
60B<--slow> Get programme descriptions as well as title.  (Will take a
61long time with lots of channels selected.)
62
63B<--quiet> Suppress the progress-bar normally shown on standard error.
64
65B<--debug> Provide more information on progress to stderr to help in
66debugging.
67
68B<--list-channels> Write output giving <channel> elements for every
69channel available (ignoring the config file), but no programmes.
70
71B<--capabilities> Show which capabilities the grabber supports. For more
72information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>
73
74B<--version> Show the version of the grabber.
75
76B<--help> Print a help message and exit.
77
78=head1 ERROR HANDLING
79
80If the grabber fails to download data for some channel on a specific day,
81it will print an errormessage to STDERR and then continue with the other
82channels and days. The grabber will exit with a status code of 1 to indicate
83that the data is incomplete.
84
85=head1 ENVIRONMENT VARIABLES
86
87The environment variable HOME can be set to change where configuration
88files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
89it might be necessary to set HOME to a path without spaces in it.
90
91=head1 SUPPORTED CHANNELS
92
93For information on supported channels, see http://tv-guide.walla.co.il/
94
95=head1 AUTHOR
96
97lightpriest. This documentation and parts of the code
98based on various other tv_grabbers from the XMLTV-project.
99
100=head1 SEE ALSO
101
102L<xmltv(5)>.
103
104=cut
105
106use strict;
107use Encode;
108use XMLTV::Options qw/ParseOptions/;
109use XMLTV::ProgressBar;
110use XMLTV::Configure::Writer;
111use XMLTV::Get_nice qw(get_nice_tree);
112
113use POSIX qw(strftime);
114use DateTime;
115
116
117# only used while testing the 'slow' option
118##$XMLTV::Get_nice::Delay = 0;
119##use HTTP::Cache::Transparent;
120##HTTP::Cache::Transparent::init( {
121##    BasePath => '/root/.xmltv/cache',
122##    NoUpdate => 60*60,			# cache time in seconds
123##    MaxAge => 24,				# flush time in hours
124##    Verbose => 1,
125##} );
126
127
128my $channel_link_regexp = "\\?w=\\/\\/\\/[0-9]*\\/\\/[A-Za-z]*\\/1";
129my $channel_link_id = "\\?w=\\/\\/\\/([0-9]*)\\/\\/[A-Za-z]*\\/1";
130
131# Use XMLTV::Options::ParseOptions to parse the options and take care of the basic capabilities that a tv_grabber should
132my ($opt, $conf) = ParseOptions({
133  grabber_name => "tv_grab_il",
134  version => '$Id: tv_grab_il,v 1.28 2015/06/28 08:40:31 knowledgejunkie Exp $',
135  capabilities => [qw/baseline manualconfig apiconfig/],
136  stage_sub => \&config_stage,
137  listchannels_sub => \&write_channels,
138  description => "Israel (tv-guide.walla.co.il)",
139  extra_options    => [qw/slow/],      # grab descriptions from sub-page
140});
141
142sub config_stage {
143  my ($stage, $conf) = @_;
144
145  die "Unknown stage $stage" unless $stage eq "start";
146
147  my $result;
148  my $writer = new XMLTV::Configure::Writer(OUTPUT => \$result, encoding => 'utf-8');
149  $writer->start({'generator-info-name' => 'tv_grab_il'});
150  $writer->end('select-channels');
151  return $result;
152}
153
154sub fetch_channels {
155  my ($opt, $conf) = @_;
156
157  my $channels = {};
158
159  my $bar = new XMLTV::ProgressBar({
160    name => "Fetching channels",
161    count => 1
162  }) unless ($opt->{quiet} || $opt->{debug});
163
164  # Get the page containing the list of channels
165  my $tree = XMLTV::Get_nice::get_nice_tree('http://tv-guide.walla.co.il', undef, 'windows-1255');
166  my @channels = $tree->look_down("_tag", "a",
167    "href", qr/$channel_link_regexp/,
168    sub { !$_[0]->look_down('_tag', 'img') }
169  );
170
171  $bar->update() && $bar->finish && undef $bar if defined $bar;
172
173  $bar = new XMLTV::ProgressBar({
174    name => "Parsing result",
175    count => scalar @channels
176  }) unless ($opt->{quiet} || $opt->{debug});
177
178  # Browse through the downloaded list of channels and map them to a hash XMLTV::Writer would understand
179  foreach my $channel (@channels) {
180    if ($channel->as_text()) {
181      my ($id) = $channel->attr('href') =~ /$channel_link_id/;
182
183      # Try to fetch the icon
184      my $icon = $channel->parent();
185      $icon = $icon->right if $icon;
186      $icon = $icon->look_down('_tag', 'a', 'href', qr/$channel_link_regexp/) if $icon;
187      $icon = $icon->look_down('_tag', 'img') if $icon;
188      $icon = $icon->attr('src') if $icon;
189
190      $channels->{"$id.tv-guide.walla.co.il"} = {
191        id => "$id.tv-guide.walla.co.il",
192        'display-name' => [[ encode( 'utf-8', $channel->as_text()) ]],
193        url => [ $channel->attr('href') ]
194      };
195      $channels->{"$id.tv-guide.walla.co.il"}->{icon} = [ {src => ($icon || '')} ] if ($icon)
196
197    }
198
199    $bar->update() if defined $bar;
200  }
201
202  $bar->finish() && undef $bar if defined $bar;
203
204  # Notifying the user :)
205  $bar = new XMLTV::ProgressBar({
206    name => "Reformatting",
207    count => 1
208  }) unless ($opt->{quiet} || $opt->{debug});
209
210  $bar->update() && $bar->finish() if defined $bar;
211
212  return $channels;
213}
214
215sub write_channels {
216  my $channels = fetch_channels($opt, $conf);
217
218  # Let XMLTV::Writer format the results as a valid xmltv file
219  my $result;
220  my $writer = new XMLTV::Writer(OUTPUT => \$result, encoding => 'utf-8');
221  $writer->start({'generator-info-name' => 'tv_grab_il'});
222  $writer->write_channels($channels);
223  $writer->end();
224
225  return $result;
226}
227
228# Fetch the channels again to see what's available
229my $channels = fetch_channels($opt, $conf);
230
231# Configure initial elements for XMLTV::Writer
232#
233# Create a new hash for the channels so that channels without programmes
234# won't appear in the final XML
235my $encoding   = 'UTF-8';
236my $credits    = {'generator-info-name' => 'tv_grab_il'};
237my $w_channels = {};
238my $programmes = [];
239
240# Progress Bar :)
241my $bar = new XMLTV::ProgressBar({
242  name => "Fetching channels listings",
243  count => (scalar @{$conf->{channel}}) * $opt->{days}
244}) unless ($opt->{quiet} || $opt->{debug});
245
246# Fetch listings per channel
247foreach my $channel_id (@{$conf->{channel}}) {
248
249  # Check each channel still exists in walla's channels page
250  if ($channels->{$channel_id}) {
251    my ($walla_id) = ($channel_id =~ /^([0-9]*)\..*$/);
252
253    # Now grab listings for each channel on each day, according to the options in $opt
254    for (my $i=$opt->{offset}; $i < ($opt->{offset} + $opt->{days}); $i++) {
255      my $theday = DateTime->today()->add (days => $i)->set_time_zone('Asia/Jerusalem');
256      my $url = "http://tv-guide.walla.co.il/?w=/4//$walla_id//" . $theday->day_name() . "/1";
257
258      my $tree = XMLTV::Get_nice::get_nice_tree($url, undef, 'windows-1255');
259
260      if ($tree) {
261        my @shows = $tree->look_down('_tag', 'table', 'width', '100%', 'dir', 'ltr', 'cellpadding', '2', 'border', '0');
262        if (@shows) {
263          foreach my $show (@shows) {
264            my $title = $show->look_down('_tag', 'a', 'class', 'w3b');
265            my $show_hour_element = $show->look_down('_tag', 'span', 'class', 'w3b txt-w');
266
267            my @show_hour = split(/:/, $show_hour_element->as_text());
268            my $show_time = $theday->clone();
269            if ($show_hour[0] < 6) {
270              $show_time->add (days => 1);
271            }
272            $show_time->set(hour => $show_hour[0], minute => $show_hour[1], second => 0);
273
274
275            # If user wants descriptions then get them from the sub-page
276            #  (can also get stop time while we're there!)
277            my ($start, $stop, $desc) = ('', '', ''); my $stop_time;
278            if ( defined $opt->{slow} && $opt->{slow} ) {
279
280              if ( my $detailtree = XMLTV::Get_nice::get_nice_tree('http://tv-guide.walla.co.il/'.$title->attr('href'), undef, 'windows-1255') ) {
281                if ( my $table = $detailtree->look_down('_tag', 'table', 'class', 'wp-0-b') ) {   # get 1st "wp-0-b" table
282                  if ( my $td = $table->look_down('_tag', 'td', 'class', 'w2b', sub{ !$_[0]->look_down('_tag', 'h2') } ) ) {
283                    ($start, $stop) = $td->as_text() =~ /(\d\d:\d\d).*(\d\d:\d\d)/;
284                  }
285                  if ( my $td = $table->look_down('_tag', 'td', 'class', 'w3') ) {
286                    $desc = $td->as_text();
287                  }
288                }
289              }
290              if ( $stop ne '' ) {
291                my @stop_hour = split(/:/, $stop);
292                $stop_time = $show_time->clone();
293                $stop_time->add (days => 1)  if ( $stop_hour[0] < $show_hour[0] );  # assumes prog not last > 24hours!
294                $stop_time->set(hour => $stop_hour[0], minute => $stop_hour[1], second => 0);
295              }
296
297            }
298
299            my $prog= {
300              start => $show_time->strftime("%Y%m%d%H%M%S %z"),
301              title => [[ encode( 'utf-8', $title->as_text()) ]],
302              channel => $channel_id
303            };
304            $prog->{'stop'} = $stop_time->strftime("%Y%m%d%H%M%S %z")  if defined $stop_time;
305            $prog->{'desc'} = [[ encode( 'utf-8', $desc) ]]  if $desc ne '';
306            push @{$programmes}, $prog;
307
308          }
309
310          # Add this channel to the finalized XML
311          $w_channels->{$channel_id} = $channels->{$channel_id} unless $w_channels->{$channel_id};
312
313        } else {
314        }
315      } else {
316      }
317
318      $bar->update if defined $bar;
319    }
320  }
321}
322
323$bar->finish() && undef $bar if defined $bar;
324
325my %w_args;
326
327if (($opt->{offset} != 0) || ($opt->{days} != -999)) {
328  $w_args{offset} = $opt->{offset};
329  $w_args{days} = ($opt->{days} == -999) ? 100 : $opt->{days};
330  $w_args{cutoff} = '060000';
331}
332
333my $data = [];
334$data->[0] = $encoding;
335$data->[1] = $credits;
336$data->[2] = $w_channels;
337$data->[3] = $programmes;
338
339XMLTV::write_data($data, %w_args);
340