1#!/usr/local/bin/perl -w 2 3=pod 4 5=head1 NAME 6 7tv_grab_il - Grab TV listings for Israel. 8 9=head1 SYNOPSIS 10 11tv_grab_il --help 12 13tv_grab_il --version 14 15tv_grab_il --capabilities 16 17tv_grab_il --description 18 19 20tv_grab_il [--config-file FILE] 21 [--days N] [--offset N] [--slow] 22 [--output FILE] [--quiet] [--debug] 23 24tv_grab_il --configure [--config-file FILE] 25 26tv_grab_il --configure-api [--stage NAME] 27 [--config-file FILE] 28 [--output FILE] 29 30tv_grab_il --list-channels [--config-file FILE] 31 [--output FILE] [--quiet] [--debug] 32 33=head1 DESCRIPTION 34 35Output TV listings in XMLTV format for many channels available in Israel. 36The data comes from tv-guide.walla.co.il. 37 38First you must run B<tv_grab_il --configure> to choose which channels 39you want to receive. 40 41Then running B<tv_grab_il> with no arguments will get a listings in XML 42format for the channels you chose for available days including today. 43 44=head1 OPTIONS 45 46B<--configure> Prompt for which channels to download and write the 47configuration file. 48 49B<--config-file FILE> Set the name of the configuration file, the 50default is B<~/.xmltv/tv_grab_il.conf>. This is the file written by 51B<--configure> and read when grabbing. 52 53B<--output FILE> When grabbing, write output to FILE rather than 54standard output. 55 56B<--days N> When grabbing, grab N days rather than all available days. 57 58B<--offset N> Start grabbing at today + N days. N may be negative. 59 60B<--slow> Get programme descriptions as well as title. (Will take a 61long time with lots of channels selected.) 62 63B<--quiet> Suppress the progress-bar normally shown on standard error. 64 65B<--debug> Provide more information on progress to stderr to help in 66debugging. 67 68B<--list-channels> Write output giving <channel> elements for every 69channel available (ignoring the config file), but no programmes. 70 71B<--capabilities> Show which capabilities the grabber supports. For more 72information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities> 73 74B<--version> Show the version of the grabber. 75 76B<--help> Print a help message and exit. 77 78=head1 ERROR HANDLING 79 80If the grabber fails to download data for some channel on a specific day, 81it will print an errormessage to STDERR and then continue with the other 82channels and days. The grabber will exit with a status code of 1 to indicate 83that the data is incomplete. 84 85=head1 ENVIRONMENT VARIABLES 86 87The environment variable HOME can be set to change where configuration 88files are stored. All configuration is stored in $HOME/.xmltv/. On Windows, 89it might be necessary to set HOME to a path without spaces in it. 90 91=head1 SUPPORTED CHANNELS 92 93For information on supported channels, see http://tv-guide.walla.co.il/ 94 95=head1 AUTHOR 96 97lightpriest. This documentation and parts of the code 98based on various other tv_grabbers from the XMLTV-project. 99 100=head1 SEE ALSO 101 102L<xmltv(5)>. 103 104=cut 105 106use strict; 107use Encode; 108use XMLTV::Options qw/ParseOptions/; 109use XMLTV::ProgressBar; 110use XMLTV::Configure::Writer; 111use XMLTV::Get_nice qw(get_nice_tree); 112 113use POSIX qw(strftime); 114use DateTime; 115 116 117# only used while testing the 'slow' option 118##$XMLTV::Get_nice::Delay = 0; 119##use HTTP::Cache::Transparent; 120##HTTP::Cache::Transparent::init( { 121## BasePath => '/root/.xmltv/cache', 122## NoUpdate => 60*60, # cache time in seconds 123## MaxAge => 24, # flush time in hours 124## Verbose => 1, 125##} ); 126 127 128my $channel_link_regexp = "\\?w=\\/\\/\\/[0-9]*\\/\\/[A-Za-z]*\\/1"; 129my $channel_link_id = "\\?w=\\/\\/\\/([0-9]*)\\/\\/[A-Za-z]*\\/1"; 130 131# Use XMLTV::Options::ParseOptions to parse the options and take care of the basic capabilities that a tv_grabber should 132my ($opt, $conf) = ParseOptions({ 133 grabber_name => "tv_grab_il", 134 version => '$Id: tv_grab_il,v 1.28 2015/06/28 08:40:31 knowledgejunkie Exp $', 135 capabilities => [qw/baseline manualconfig apiconfig/], 136 stage_sub => \&config_stage, 137 listchannels_sub => \&write_channels, 138 description => "Israel (tv-guide.walla.co.il)", 139 extra_options => [qw/slow/], # grab descriptions from sub-page 140}); 141 142sub config_stage { 143 my ($stage, $conf) = @_; 144 145 die "Unknown stage $stage" unless $stage eq "start"; 146 147 my $result; 148 my $writer = new XMLTV::Configure::Writer(OUTPUT => \$result, encoding => 'utf-8'); 149 $writer->start({'generator-info-name' => 'tv_grab_il'}); 150 $writer->end('select-channels'); 151 return $result; 152} 153 154sub fetch_channels { 155 my ($opt, $conf) = @_; 156 157 my $channels = {}; 158 159 my $bar = new XMLTV::ProgressBar({ 160 name => "Fetching channels", 161 count => 1 162 }) unless ($opt->{quiet} || $opt->{debug}); 163 164 # Get the page containing the list of channels 165 my $tree = XMLTV::Get_nice::get_nice_tree('http://tv-guide.walla.co.il', undef, 'windows-1255'); 166 my @channels = $tree->look_down("_tag", "a", 167 "href", qr/$channel_link_regexp/, 168 sub { !$_[0]->look_down('_tag', 'img') } 169 ); 170 171 $bar->update() && $bar->finish && undef $bar if defined $bar; 172 173 $bar = new XMLTV::ProgressBar({ 174 name => "Parsing result", 175 count => scalar @channels 176 }) unless ($opt->{quiet} || $opt->{debug}); 177 178 # Browse through the downloaded list of channels and map them to a hash XMLTV::Writer would understand 179 foreach my $channel (@channels) { 180 if ($channel->as_text()) { 181 my ($id) = $channel->attr('href') =~ /$channel_link_id/; 182 183 # Try to fetch the icon 184 my $icon = $channel->parent(); 185 $icon = $icon->right if $icon; 186 $icon = $icon->look_down('_tag', 'a', 'href', qr/$channel_link_regexp/) if $icon; 187 $icon = $icon->look_down('_tag', 'img') if $icon; 188 $icon = $icon->attr('src') if $icon; 189 190 $channels->{"$id.tv-guide.walla.co.il"} = { 191 id => "$id.tv-guide.walla.co.il", 192 'display-name' => [[ encode( 'utf-8', $channel->as_text()) ]], 193 url => [ $channel->attr('href') ] 194 }; 195 $channels->{"$id.tv-guide.walla.co.il"}->{icon} = [ {src => ($icon || '')} ] if ($icon) 196 197 } 198 199 $bar->update() if defined $bar; 200 } 201 202 $bar->finish() && undef $bar if defined $bar; 203 204 # Notifying the user :) 205 $bar = new XMLTV::ProgressBar({ 206 name => "Reformatting", 207 count => 1 208 }) unless ($opt->{quiet} || $opt->{debug}); 209 210 $bar->update() && $bar->finish() if defined $bar; 211 212 return $channels; 213} 214 215sub write_channels { 216 my $channels = fetch_channels($opt, $conf); 217 218 # Let XMLTV::Writer format the results as a valid xmltv file 219 my $result; 220 my $writer = new XMLTV::Writer(OUTPUT => \$result, encoding => 'utf-8'); 221 $writer->start({'generator-info-name' => 'tv_grab_il'}); 222 $writer->write_channels($channels); 223 $writer->end(); 224 225 return $result; 226} 227 228# Fetch the channels again to see what's available 229my $channels = fetch_channels($opt, $conf); 230 231# Configure initial elements for XMLTV::Writer 232# 233# Create a new hash for the channels so that channels without programmes 234# won't appear in the final XML 235my $encoding = 'UTF-8'; 236my $credits = {'generator-info-name' => 'tv_grab_il'}; 237my $w_channels = {}; 238my $programmes = []; 239 240# Progress Bar :) 241my $bar = new XMLTV::ProgressBar({ 242 name => "Fetching channels listings", 243 count => (scalar @{$conf->{channel}}) * $opt->{days} 244}) unless ($opt->{quiet} || $opt->{debug}); 245 246# Fetch listings per channel 247foreach my $channel_id (@{$conf->{channel}}) { 248 249 # Check each channel still exists in walla's channels page 250 if ($channels->{$channel_id}) { 251 my ($walla_id) = ($channel_id =~ /^([0-9]*)\..*$/); 252 253 # Now grab listings for each channel on each day, according to the options in $opt 254 for (my $i=$opt->{offset}; $i < ($opt->{offset} + $opt->{days}); $i++) { 255 my $theday = DateTime->today()->add (days => $i)->set_time_zone('Asia/Jerusalem'); 256 my $url = "http://tv-guide.walla.co.il/?w=/4//$walla_id//" . $theday->day_name() . "/1"; 257 258 my $tree = XMLTV::Get_nice::get_nice_tree($url, undef, 'windows-1255'); 259 260 if ($tree) { 261 my @shows = $tree->look_down('_tag', 'table', 'width', '100%', 'dir', 'ltr', 'cellpadding', '2', 'border', '0'); 262 if (@shows) { 263 foreach my $show (@shows) { 264 my $title = $show->look_down('_tag', 'a', 'class', 'w3b'); 265 my $show_hour_element = $show->look_down('_tag', 'span', 'class', 'w3b txt-w'); 266 267 my @show_hour = split(/:/, $show_hour_element->as_text()); 268 my $show_time = $theday->clone(); 269 if ($show_hour[0] < 6) { 270 $show_time->add (days => 1); 271 } 272 $show_time->set(hour => $show_hour[0], minute => $show_hour[1], second => 0); 273 274 275 # If user wants descriptions then get them from the sub-page 276 # (can also get stop time while we're there!) 277 my ($start, $stop, $desc) = ('', '', ''); my $stop_time; 278 if ( defined $opt->{slow} && $opt->{slow} ) { 279 280 if ( my $detailtree = XMLTV::Get_nice::get_nice_tree('http://tv-guide.walla.co.il/'.$title->attr('href'), undef, 'windows-1255') ) { 281 if ( my $table = $detailtree->look_down('_tag', 'table', 'class', 'wp-0-b') ) { # get 1st "wp-0-b" table 282 if ( my $td = $table->look_down('_tag', 'td', 'class', 'w2b', sub{ !$_[0]->look_down('_tag', 'h2') } ) ) { 283 ($start, $stop) = $td->as_text() =~ /(\d\d:\d\d).*(\d\d:\d\d)/; 284 } 285 if ( my $td = $table->look_down('_tag', 'td', 'class', 'w3') ) { 286 $desc = $td->as_text(); 287 } 288 } 289 } 290 if ( $stop ne '' ) { 291 my @stop_hour = split(/:/, $stop); 292 $stop_time = $show_time->clone(); 293 $stop_time->add (days => 1) if ( $stop_hour[0] < $show_hour[0] ); # assumes prog not last > 24hours! 294 $stop_time->set(hour => $stop_hour[0], minute => $stop_hour[1], second => 0); 295 } 296 297 } 298 299 my $prog= { 300 start => $show_time->strftime("%Y%m%d%H%M%S %z"), 301 title => [[ encode( 'utf-8', $title->as_text()) ]], 302 channel => $channel_id 303 }; 304 $prog->{'stop'} = $stop_time->strftime("%Y%m%d%H%M%S %z") if defined $stop_time; 305 $prog->{'desc'} = [[ encode( 'utf-8', $desc) ]] if $desc ne ''; 306 push @{$programmes}, $prog; 307 308 } 309 310 # Add this channel to the finalized XML 311 $w_channels->{$channel_id} = $channels->{$channel_id} unless $w_channels->{$channel_id}; 312 313 } else { 314 } 315 } else { 316 } 317 318 $bar->update if defined $bar; 319 } 320 } 321} 322 323$bar->finish() && undef $bar if defined $bar; 324 325my %w_args; 326 327if (($opt->{offset} != 0) || ($opt->{days} != -999)) { 328 $w_args{offset} = $opt->{offset}; 329 $w_args{days} = ($opt->{days} == -999) ? 100 : $opt->{days}; 330 $w_args{cutoff} = '060000'; 331} 332 333my $data = []; 334$data->[0] = $encoding; 335$data->[1] = $credits; 336$data->[2] = $w_channels; 337$data->[3] = $programmes; 338 339XMLTV::write_data($data, %w_args); 340