1#!/usr/local/bin/perl 2=pod 3 4=head1 NAME 5 6tv_grab_dtv_la - Grab TV listings for Direct TV Latin America 7 8=head1 SYNOPSIS 9 10tv_grab_dtv_la --help 11 12tv_grab_dtv_la [--config-file FILE] --configure [--gui OPTION] 13 14tv_grab_dtv_la [--config-file FILE] [--output FILE] [--days N] 15 [--offset N] [--min-delay N] [--max-delay N] [--quiet] 16 17tv_grab_dtv_la --list-channels --loc [ar | cb | cl | co | ec | pe | pr | uy | ve] 18 19tv_grab_dtv_la --capabilities 20 21tv_grab_dtv_la --version 22 23=head1 DESCRIPTION 24 25Output TV listings for Direct TV channels available in Latin America. 26Listings for the following countries are currently available: 27Argentina, Caribbean ('cb'), Chile, Colombia, Ecuador, 28Peru, Puerto Rico, Trinidad, Uruguay, Venezuela. 29 30The TV listings come from http://directstage.directvla.com/ 31The grabber relies on parsing HTML so it might stop working at any time. 32 33First run B<tv_grab_dtv_la --configure> to choose, first of all your country 34and then which channels you want to download. Then running B<tv_grab_dtv_la> 35with no arguments will output listings in XML format to standard output. 36 37The grabber doesn't generate stop times, so you may want to run 38tv_sort on the output to generate them. 39 40B<--configure> Prompt for which channels, and write the configuration file. 41 42B<--config-file FILE> Set the name of the configuration file, the 43default is B<~/.xmltv/tv_grab_dtv_la.conf>. This is the file written by 44B<--configure> and read when grabbing. 45 46B<--gui OPTION> Use this option to enable a graphical interface to be used. 47OPTION may be 'Tk', or left blank for the best available choice. 48Additional allowed values of OPTION are 'Term' for normal terminal output 49(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar. 50 51B<--output FILE> Write to FILE rather than standard output. 52 53B<--days N> Grab N days. The default is 3. 54 55B<--offset N> Start N days in the future. The default is to start 56from today. 57 58B<--min-delay N> You must insert a delay between page requests to avoid 59unnecessary load on the website. If you try to grab pages too quickly then 60it's likely you will get banned by the website providers (and may get 61all other xmltv users banned as well!). 62Suggested value: 1 (seconds) 63 64B<--max-delay N> Maximum delay between web page fetches. 65Suggested value: 3 (seconds) 66 67B<--quiet> Suppress the progress messages normally written to standard 68error. 69 70B<--capabilities> Show which capabilities the grabber supports. For more 71information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities> 72 73B<--version> Show the version of the grabber. 74 75B<--help> Print a help message and exit. 76 77=head1 SEE ALSO 78 79L<xmltv(5)>, L<tv_grab_ar>. 80 81=head1 AUTHOR 82 83Most of the grabber was made by Lic. Christian A. Rodriguez <car@cespi.unlp.edu.ar>, with a 84lot of help from others, specially Joaquin Salvarredy <jsalvarredy@cespi.unlp.edu.ar> who 85tested the grabber from its early versions and Lic. Nicolas Macia <nmacia@cespi.unlp.edu.ar> 86 87=head1 BUGS 88 89This grabber extracts all information from Direct TV Latin America website. Any change in this 90web page may cause this grabber to stop working. 91 92=cut 93 94# Author's TODOs & thoughts 95# 96# Add better channel names 97# 98( #(facilitate code-folding) 99# 100# 2016-03-14 101# 102# URLS 103# http://www.directv.com.ar/ 104# https://www.directv.com.ar/programacion/guia-de-programacion 105# http://www.directv.com.ar/programacion/guia-de-canales 106# 107# http://www.directv.cl/ 108# https://www.directv.cl/guia/guia.aspx?type=&link=nav/ 109# http://www.directv.cl/planes/guia-de-canales 110# 111# http://www.directv.com.co/ 112# https://www.directv.com.co/guia/guia.aspx?type= 113# http://www.directv.com.co/paquetes/guia-de-canales 114# 115# http://www.directv.com.ec/ 116# https://www.directv.com.ec/guia/guia.aspx?type= 117# http://www.directv.com.ec/planes/guia-de-canales 118# 119# http://www.directv.com.pe/ 120# https://www.directv.com.pe/guia/guia.aspx?type= 121# http://www.directv.com.pe/paquetes/guia-de-canales 122# 123# http://www2.directvpr.com/ 124# https://www.directvpr.com/guia/guia.aspx?type=&link=nav 125# http://www.directvpr.com/guia-de-canales?link=nav 126 127# http://www.directv.com.uy/ 128# https://www.directv.com.uy/guia/guia.aspx?type= 129# http://www.directv.com.uy/paquetes/guia-de-canales 130# 131# http://www.directv.com.ve 132# https://www.directv.com.ve/guia/guia.aspx 133# http://www.directv.com.ve/planes/guia-de-canales 134# 135); 136 137 138###################################################################### 139## REQUIRED LIBRARIES 140###################################################################### 141use warnings; 142use strict; 143 144use XMLTV; 145use XMLTV::Version '$Id: tv_grab_dtv_la,v 1.12 2016/03/15 01:13:11 knowledgejunkie Exp $ '; 146use XMLTV::Capabilities qw/baseline manualconfig/; 147use XMLTV::Description 'Latin America Direct TV listings'; 148use XMLTV::Memoize; 149use XMLTV::ProgressBar; 150use XMLTV::Ask; 151use XMLTV::Config_file; 152use XMLTV::Mode; 153use XMLTV::Date; 154use XMLTV::DST; 155use XMLTV::Usage <<END 156$0: get Latin America Direct-TV listings in XMLTV format 157To configure: $0 --configure [--config-file FILE] 158To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] 159 [--offset N] [--quiet] 160To list channels: $0 --list-channels 161To show capabilities: $0 --capabilities 162To show version: $0 --version 163END 164; 165#use HTML::Form; 166use HTML::TreeBuilder; 167use Getopt::Long; 168use Date::Manip; 169use Date::Parse; 170use Date::Language; 171use LWP::UserAgent; 172use HTTP::Cookies; 173use Encode qw(from_to is_utf8 _utf8_off encode); 174use utf8; 175use JSON::PP; 176use Data::Dumper; 177 178 179# ${Log::TraceMessages::On} = 1; 180# to switch TRACE in remove the comment from prev. line 181 182# Use Log::TraceMessages if installed. 183BEGIN { 184 eval { require Log::TraceMessages }; 185 if ($@) { 186 *t = sub {}; 187 *d = sub { '' }; 188 } 189 else { 190 *t = \&Log::TraceMessages::t; 191 *d = \&Log::TraceMessages::d; 192 Log::TraceMessages::check_argv(); 193 } 194} 195 196 197###################################################################### 198## GLOBAL VARIABLES 199###################################################################### 200my $warnings = 0; 201 202my ($opt_days, $opt_offset, $opt_help, $opt_output, 203 $opt_configure, $opt_config_file, $opt_gui, 204 $opt_quiet, $opt_list_channels, $opt_loc, 205 $opt_min_delay, $opt_max_delay, $opt_debug); 206 207# Attributes of the root element in output. 208my $HEAD = { 209 'source-info-url' => 'http://directstage.directvla.com/', 210 'source-data-url' => 'http://directstage.directvla.com/', 211 'generator-info-name' => 'tv_grab_dtv_la', 212 'generator-info-url' => 'http://xmltv.org/', 213 }; 214 215my $channels_icon_url="http://www.lyngsat.com/packages/directvlatin.html"; 216my $countries_list_url="http://directstage.directvla.com/"; 217 218# So we are not affected by winter/summer timezone 219$XMLTV::DST::Mode='none'; 220 221# timezone to use (for all countries!) 222my $TZ="-0300"; 223 224# default language 225my $LANG="es"; 226my $OUT_ENCODING="UTF-8"; 227 228# Selected country 229my %country; 230 231# Full list of channels 232my @ch_all; 233my $CHANNELS_URL=undef; 234 235# Providers name for creating unique channel id 236my $PROVIDER_NAME="dtv.la"; 237 238# Progressbar 239my $mainbar; 240 241# Private UserAgent 242my $cookies = HTTP::Cookies->new; 243my $ua = LWP::UserAgent->new; 244$ua->cookie_jar($cookies); 245 246$ua->agent("xmltv/$XMLTV::VERSION"); 247$ua->parse_head(0); 248$ua->env_proxy; 249 250 251# undocumented --cache option. 252# not sure this will work with ajax post requests ? 253XMLTV::Memoize::check_argv('get_tree'); 254 255###################################################################### 256## SUBROUTINES 257###################################################################### 258 259###################################################################### 260## Returns a trimmed string 261sub trim { 262 my $string = shift; 263 $string =~ s/^\s+|\s+$//g if defined $string; 264 return $string; 265} 266 267###################################################################### 268## Returns a TreeBuilder instance 269 270# You must insert a delay between page requests to avoid 271# unnecessary load on the website. If you try to grab pages too quickly then 272# it's likely you will get banned by the website providers (and may get 273# all other xmltv users banned as well - it's trivial to ban by user-agent string). 274# 275my $last_get_time; 276# 277sub get_tree ($;$$) { 278 my $url = shift; 279 my $method = shift || 'get'; 280 my $data = shift; 281 my $r; 282 283 print STDERR "$method: $url ".($data?"[$data]":'')." \n" if $opt_debug; 284 285 # let's not overload the website with too many requests so we'll restrict the request frequency (as per Get_nice) 286 287 my $Delay = $opt_max_delay - $opt_min_delay; # in seconds 288 my $MinDelay = $opt_min_delay; # in seconds 289 290 if (defined $last_get_time) { 291 # A page has already been retrieved recently. See if we need 292 # to sleep for a while before getting the next page - being 293 # nice to the server. 294 my $next_get_time = $last_get_time + (rand $Delay) + $MinDelay; 295 my $sleep_time = $next_get_time - time(); 296 sleep $sleep_time if $sleep_time > 0; 297 } 298 299 if (!defined $method || lc($method) eq 'get') { 300 $r = $ua->get($url); 301 302 } elsif (lc($method) eq 'post') { 303 $r = $ua->post($url, $data); # $data must be a hash 304 305 } elsif (lc($method) eq 'jsonpost') { 306 # 307 # create the http request 308 my $req = HTTP::Request->new( 'POST', $url ); 309 ##$req->header( 'Content-Type' => 'application/json' ); 310 $req->content_type( 'application/json; charset=utf-8' ); 311 $req->content( $data ); # data must be json 312 313 # execute the request 314 $r = $ua->request($req); 315 316 } else { 317 die "unknown fetch method '$method'"; 318 } 319 $last_get_time = time(); 320 321 #print STDERR Dumper($r);die; 322 die "Could not fetch $url". (lc($method) eq 'jsonpost'?"[$data]":'') .", error: " . $r->status_line if ($r->is_error); 323 324 my $t; 325 if (lc($method) eq 'jsonpost') { 326 # expect a json reply! 327 $t = JSON::PP->new()->utf8(1)->decode($r->content) or die "cannot parse content of $url\n"; 328 329 } else { 330 $t = new HTML::TreeBuilder; 331 #$t->utf8_mode(1); 332 $data=$r->decoded_content('default_charset'=>'utf8'); 333 #$data=decode('UTF-8',$data) if (is_utf8($data)); 334 $t->parse($data) or die "Cannot parse content of Tree\n"; 335 $t->eof; 336 } 337 return $t; 338} 339 340###################################################################### 341## Bump a YYYYMMDD date by one. 342sub nextday { 343 my $d = shift; 344 my $p = parse_date($d); 345 my $n = DateCalc($p, '+ 1 day'); 346 return UnixDate($n, '%Q'); 347} 348 349###################################################################### 350## Returns the URL for grabbing channels 351sub get_channels_url { 352 if (not defined $CHANNELS_URL){ 353 die "No country specified, run me with --configure\n" if not keys %country; 354 355 # as at Apr 2014 it looks like they're still working on the website: all the Caribbean channels 356 # point to the same place. 357 if ( $country{'id'} eq 'CB' ) { 358 $CHANNELS_URL = "http://www.directvcaribbean.com/tt/channel-description"; 359 } 360 else { 361 # although some of the sites have this as a subdir (e.g. under 'paquetes' or 'planes') it still 362 # seems to work without that 363 $CHANNELS_URL = $country{url} . 'guia-de-canales'; 364 } 365 366 } 367 368 return $CHANNELS_URL; 369} 370 371###################################################################### 372## Returns the URL for grabbing specified channel programs 373sub get_channel_programs_url($) { 374 ##my $ch_id=shift; 375 ##my $base_url=get_channels_url(); 376 ##$base_url=~ s/default/detailch/; 377 ##return "$base_url?c=$ch_id&n=chname"; 378 379 # e.g. http://www.directv.com.ar/guia/Services/ProgrammingGuideAjax.asmx/GetProgramming 380 381 return $country{'url'} . 'guia/Services/ProgrammingGuideAjax.asmx/GetProgramming'; 382} 383 384###################################################################### 385## Returns the URL for grabbing programme details 386sub get_program_detail_url() { 387 388 # e.g. http://www.directv.com.ar/guia/Services/ProgrammingGuideAjax.asmx/GetProgrammingDetail 389 390 return $country{'url'} . 'guia/Services/ProgrammingGuideAjax.asmx/GetProgrammingDetail'; 391} 392 393###################################################################### 394## Converts the given datetime format to the needed UTC format 395sub datetime_for_program( $;$ ){ 396 my ($date,$strdt)=@_; 397 $strdt=~ /^(\w*)\s+(\d{1,2}:\d{1,2})/; 398 if ( defined $1 and defined $2) { 399 my $weekday=$1; 400 my $time=UnixDate($2,"%H:%M"); 401 if ( UnixDate($date,"%a") eq $weekday ){ 402 return utc_offset("$date $time", $TZ) 403 } 404 } 405 return undef; 406} 407 408###################################################################### 409## Returns channel programs for the specified date and channel id 410sub get_channel_programs ( $$$$ ) { 411 my ($ref_dates, $ref_channels, $ref_ch_all, $ref_programmes) = @_; 412 413 # convert @dates & @channels into hashes for faster searching 414 my %_dates = map { $_ => 1 } @$ref_dates; 415 my %_channels = map { $_ => 1 } @$ref_channels; 416 417 my @_ch_all = @$ref_ch_all; 418 419 # temporary store for programmes we fetch (used for detecting duplicates and clumps) 420 my $programmes = {}; 421 422 # for parsing non-English language dates 423 my $lang; 424 if ( $country{'id'} eq 'CB' ) { # Caribe is currently in English 425 $lang = Date::Language->new('English'); 426 } else { 427 $lang = Date::Language->new('Spanish'); 428 } 429 430 # site now uses a jQuery AJAX POST with JSON content in UTF-8 431 # e.g. { "day":19, "time":"12","minute":"30", "month":"4", "year":"2014", "onlyFavorites":"N" } 432 # 433 # data are avialable in a 4-hour windows (all channels combined) 434 # 435 foreach my $date (@$ref_dates) { 436 my ( $_y, $_m, $_d ) = $date =~ /(\d\d\d\d)(\d\d)(\d\d)/; 437 for (my $i=0; $i<24; $i+=4) { 438 my $_h = substr("0$i", -2, 2); 439 440 my $data = '{ "day":'.$_d.', "time":"'.$_h.'","minute":"00", "month":"'.$_m.'", "year":"'.$_y.'", "onlyFavorites":"N" }'; 441 ##print STDERR $data."\n"; 442 443 444 my $json = get_tree( get_channel_programs_url(undef), 'jsonpost', $data ); 445 ##print STDERR Dumper($json);die; 446 447 # response is a JSON string containing just one k:v pair, 'd' => "..." 448 # (see http://www.directv.com.ar/guia/js/Program-Guide/ProgrammingGuideAjax.js for details) 449 450 # we don't need the overhead of TreeBuilder - we'll go 'old school' and use a regexp 451 my (@li) = $json->{'d'} =~ m/(<li.*?<\/li>)/g; 452 ##print Dumper(@li);die; 453 454 foreach my $li (@li) { 455 456 # <li id="PG_ctl02_Prog_ctl00_liItem" class="" style="width:267px" eventId="121190335202" channel="121"><a href="javascript:return false;" class="ChannelArrowLeft"></a><span id="PG_ctl02_Prog_ctl00_HDCh" style="padding-left:10px;"><literal id="PG_ctl02_Prog_ctl00_litHDCh">Las aventuras de Robin Hood</literal></span><a href="/guia/RecordBox.aspx?iframe&eventId=121190335202&page=grid" class="Action loginAvailable"></a></li> 457 # <li id="PG_ctl02_Prog_ctl01_liItem" class=" toolTip" style="width:99.5px" title="Enciclopedia digital del cosmos" eventId="121190335203" channel="121"><span id="PG_ctl02_Prog_ctl01_HDCh"><literal id="PG_ctl02_Prog_ctl01_litHDCh">Enciclopedia di...</literal></span><a href="/guia/RecordBox.aspx?iframe&eventId=121190335203&page=grid" class="Action loginAvailable"></a></li> 458 459 # <li id="PG_ctl214_Prog_ctl00_liItem" class=" PpvVenezuela toolTip last disabled" style="width:806px" eventid="" channel="1003" original-title="Programación no disponible"><span id="PG_ctl214_Prog_ctl00_HDCh" style="padding-right:10px;" class="HdActive"><literal id="PG_ctl214_Prog_ctl00_litHDCh">Programación no disponible</literal><span id="PG_ctl214_Prog_ctl00_imgIcon" class="3d"></span></span><a href="javascript:return false;" class="ChannelArrowRight"></a><a href="https://www.directv.com.ve/midirectv/PPVBrowse.aspx?language=&section=DOD&film=" class="Action loginAvailable"></a></li> 460 461 # Method: 462 # The programme schedule is returned as a 4-hour window. Unfortunately the html contains nothing of use 463 # other than the title, channel id and eventId. There isn't even a start time! So: 464 # 1. Extract all the <li> items 465 # 2. Ignore any which aren't for a requested channel 466 # 3. Ignore any which have already started (as they will have already been picked up in a previous 4-hour window) 467 # 4. Fetch the programme details using the eventId 468 # 5. Parse the prog details and add to a hash 469 # 470 471 # parse the <li> element 472 my ( $eventId, $channelId, $hasStarted ) = $li =~ m/eventId="(\d*)"\schannel="(\d*)".*?(?(?=.*ChannelArrowLeft)(ChannelArrowLeft)|())/; 473 ##if ($hasStarted) {print STDERR "skipping $eventId, $channelId\n";} 474 next if $hasStarted; # if prog has already started 475 next if ! $_channels{ $channelId }; # if channel not wanted 476 next if !defined $eventId || $eventId eq '';# e.g. Programación no disponible (can't output anything since no start/stop time!) 477 478 # post content: { "eventId":121190335202, "day":20, "time":"4","minute":"0", "month":"4", "year":"2014" } 479 my $data = '{ "eventId":'.$eventId.', "day":'.$_d.', "time":"'.$_h.'","minute":"0", "month":"'.$_m.'", "year":"'.$_y.'" }'; 480 481 my $json = get_tree( get_program_detail_url(), 'jsonpost', $data ); 482 ##print STDERR Dumper($json);die; 483 484 my $t = HTML::TreeBuilder->new()->parse( $json->{'d'} ) or die "cannot parse content of programme detail\n"; 485 $t->eof; 486 ##$t->dump();die; 487 488 my $p; # programme 489 490 my $div; if ( my $_t = $t->look_down('_tag'=>'h2') ) { $div = $_t->parent(); } 491 if (!defined $div) { 492 # why is it not? 493 print STDERR 'Warn: No programme description (no <h2> element for "eventId":'.$eventId.', "day":'.$_d.', "time":"'.$_h.")\n"; 494 next; 495 } 496 497 my $h; # html (tree) element 498 499 if ( $h = $div->look_down('_tag'=>'h2') ) { 500 if ( my $h_ = $div->look_down('_tag'=>'img', 'alt'=>'HD program') ) { 501 $p->{'video'}->{'quality'} = 'HDTV'; 502 } 503 $p->{'title'} = trim( $h->as_text() ); 504 $h->detach; 505 } 506 # 'title' is mandatory in the DTD. If we don't have one then set to unknown 507 $p->{'title'} = ($LANG eq 'pt_BR' ? 'ignorado' : 'incógnito') if (!defined $p->{'title'} || $p->{'title'} eq ''); 508 509 510 # 1st <p> is the description 511 if ( $h = $div->look_down('_tag'=>'p') ) { 512 $p->{'desc'} = trim( $h->as_text() ); 513 $h->detach; 514 } 515 516 # 2nd <p> is the start time and duration 517 if ( $h = $div->look_down('_tag'=>'p') ) { 518 my $h_ = trim( $h->as_text() ); 519 my ($_junk, $_date, $_dur) = $h_ =~ m/(Comienza|Start):\s*(.*?)\|(.*?)$/s; # Caribe = "Start:" 520 521 # Date::Language doesn't currently do Portuguese 522 # (the Sky BR site isn't handled in this grabber anyway) 523 my $dt; 524 if ( $country{'id'} eq 'BR' ) { 525 die "\n Sorry I don't speak Portuguese \n"; 526 } else { 527 $dt = $lang->str2time($_date, $TZ); 528 } 529 530 $p->{'start_epoch'} = $lang->str2time($_date, $TZ); 531 ( $p->{'duration'} ) = $_dur =~ /(\d*)\s(?=minutos|minutes)/; 532 $p->{'stop_epoch'} = $p->{'start_epoch'} + ( $p->{'duration'} * 60 ) if $p->{'duration'}; 533 $p->{'start'} = $lang->time2str( "%Y%m%d%H%M%S %z", $p->{'start_epoch'}, $TZ ); 534 $p->{'stop'} = $lang->time2str( "%Y%m%d%H%M%S %z", $p->{'stop_epoch'}, $TZ ); 535 $h->detach; 536 } 537 538 # <div> class "Rank" = rating & programme url 539 if ( $h = $div->look_down('_tag'=>'div', 'class'=>qw/Rank/) ) { 540 if ( my $h = $h->look_down('_tag'=>'p') ) { 541 my $h_ = trim( $h->as_text() ); 542 ( $p->{'rating'} ) = $h_ =~ m/Rating:\s*(\S*)\s/s; 543 } 544 545 if ( my $h = $h->look_down('_tag'=>'div') ) { 546 if ( my $h_ = $h->look_down('_tag'=>'a') ) { 547 my $h__ = trim( $h_->attr('href') ); 548 $h__ = $country{'url'} . $h__ if ( $h__ !~ /^http/ ); 549 $p->{'url'} = $h__; 550 } 551 } 552 $h->detach; 553 } 554 555 556 557 # Reformat the data to Create the data structure for the programme 558 my $p_out = {}; 559 $p_out->{'channel'} = $channelId . '.' . $PROVIDER_NAME; 560 $p_out->{'title'} = [[ encode($OUT_ENCODING, $p->{'title'}), $LANG ]]; 561 $p_out->{'start'} = $p->{'start'}; 562 $p_out->{'stop'} = $p->{'stop'} if (defined $p->{'stop'} && $p->{'stop'} ne ''); 563 $p_out->{'desc'} = [[ encode($OUT_ENCODING, $p->{'desc'}), $LANG ]] if (defined $p->{'desc'} && $p->{'desc'} ne ''); 564 $p_out->{'sub-title'} = [[ encode($OUT_ENCODING, $p->{'sub_title'}), $LANG ]] if (defined $p->{'sub_title'} && $p->{'sub_title'} ne ''); 565 $p_out->{'rating'} = [[ $p->{'rating'} ]] if (defined $p->{'rating'} && $p->{'rating'} ne ''); 566 $p_out->{'url'} = [ encode($OUT_ENCODING, $p->{'url'}) ] if (defined $p->{'url'} && $p->{'url'} ne ''); 567 $p_out->{'video'} = $p->{'video'} if (defined $p->{'video'}); 568 569 # store the programme avoiding duplicates 570 # also check for duplicate start times and set clumpidx 571 if ( defined $programmes->{ $channelId }->{ $p->{'start_epoch'} } ) { 572 # duplicate prog or contemporary? 573 my $dup = 0; 574 foreach my $_p ( @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } } ) { 575 $dup = 1 if ( $_p->{'title'}[0][0] eq $p_out->{'title'}[0][0] ); # duplicate 576 } 577 next if $dup; # ignore duplicates (go to next <li> programme element) 578 if (!$dup) { 579 # contemporary programme so set clumpidx 580 my $numclumps = scalar @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } } + 1; 581 # set (or adjust) clumpidx of existing programmes 582 my $i = 0; 583 foreach my $_p ( @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } } ) { 584 $_p->{'clumpidx'} = "$i/$numclumps"; 585 $i++; 586 } 587 # set clumpidx for new programme 588 $p_out->{'clumpidx'} = "$i/$numclumps"; 589 } 590 } 591 592 # store the programme in our temporary store 593 push @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } }, $p_out; 594 595 } 596 597 $mainbar->update() if not $opt_quiet; 598 } 599 600 } 601 602 603 # All data has been gathered. We can now write the programme element to the output. 604 # 605 foreach ( keys %{$programmes} ) { 606 my $_ch_progs = $programmes->{$_}; 607 foreach ( sort keys %{$_ch_progs} ) { 608 my $_dt_progs = $_ch_progs->{$_}; 609 foreach (@{ $_dt_progs }) { 610 push @{$ref_programmes}, $_; 611 } 612 } 613 } 614} 615 616###################################################################### 617## Returns the list of channels 618# 619# Note: I've noticed that sometimes there's more channels on the actual programme schedule page 620# than in the channels guide page :-( So we may need to switch and get the list of channels 621# from the AJAX fetch on the schedules page (although the icons may be smaller?) 622# 623sub get_channels { 624 my $bar = new XMLTV::ProgressBar("Getting list of channels for $country{name}", 1) if not $opt_quiet; 625 626 my %channels; 627 my $url=get_channels_url(); 628 629 # Get channels that are transmiting now 630 my $tree = get_tree($url); 631 get_channels_from_tree($tree,\%channels); 632 # We will try to find more channels for later hours 633 #get_channels_for_later_hours($tree,\%channels); 634 635 # Finish using Tree 636 $tree=undef; 637 $bar->update() && $bar->finish() if not $opt_quiet; 638 return %channels; 639} 640 641###################################################################### 642## Simulate a form filling to retrieve more channels for later hours 643sub get_channels_for_later_hours() { 644 my ($tree,$channels) = @_; 645 646 # First we get the form elemento to call iteratively for each option from a select 647 my $form_elem = $tree->look_down( 648 "_tag"=>"form", sub { 649 defined $_[0]->attr('name') and $_[0]->attr('name')=~ /Form1/i 650 } 651 ); 652 # The name of the select element is: 653 my $search_for_input="ddlTime"; 654 my %needed_form_elems=('ddlTime','select','ddlDay','select','btnSubmit','input'); 655 656 # Form to call iteratively 657 my $form=HTML::Form->parse($form_elem->as_HTML(),get_channels_url()); 658 my $input; 659 660 foreach my $ninput (keys %needed_form_elems){ 661 $input=$form->find_input($ninput); 662 663 # There is a bug in the source HTML. The field we need is outside the form tag 664 if (not defined $input) { 665 # We try to fix this problem 666 my $broken_elem = $tree->look_down( 667 "_tag"=>$needed_form_elems{$ninput}, sub { 668 defined $_[0]->attr('name') and $_[0]->attr('name')=~ /$ninput/i 669 } 670 ); 671 $form_elem->insert_element($broken_elem); 672 $form=HTML::Form->parse($form_elem->as_HTML(),get_channels_url()); 673 $input=$form->find_input($ninput); 674 die "Cannot retrieve field $ninput. Aborting" if (not defined $input); 675 } 676 } 677 # Now for each value of the select, we will call get_channels_from_tree subroutine 678 $input=$form->find_input($search_for_input); 679 my $default_value=$input->value; 680 foreach ($input->possible_values) { 681 if ($_ != $default_value) { 682 $form->value($search_for_input,$_); 683 my $r=$ua->request($form->click); 684 die "Error doing automatic form filling. Aboring" if ($r->is_error); 685 my $t = new HTML::TreeBuilder; 686 #$t->utf8_mode(1); 687 my $data=$r->decoded_content('default_charset'=>'utf8'); 688 #$data=from_to($data,'UTF-8',$OUT_ENCODING) if (is_utf8($data)); 689 $t->parse($data) or die "Cannot parse content of Tree\n"; 690 $t->eof; 691 get_channels_from_tree($t,$channels); 692 } 693 } 694} 695 696###################################################################### 697## Return the list of channels for a tree representation of an HTML page 698sub get_channels_from_tree( ) { 699 my ($tree,$channels) = @_; 700 701 # see if there's a 'pMain' so we can ignore the CMS content (which contains the on-demand channels) 702 my $chan_div = $tree->look_down('_tag' => 'div', 'id' => 'pMain'); 703 $tree = $chan_div if $chan_div; 704 705 my @chan_groups = $tree->look_down('_tag' => 'div', 'class' => 'guia-canales')->look_down('_tag' => 'div', 'class' => 'combo-canal-content'); 706 707 foreach (@chan_groups) { 708 my @chan_elems = $_->look_down('_tag' => 'li'); 709 foreach (@chan_elems) { 710 # <li> 711 # <a itemprop="makesOffer" href="http://www.directv.com.pe/guia/ChannelDetail.aspx?id=197"><img src="http://www.ondirectv.com/Thumbnail.ashx?image=LOGOS/canales/v2/197.png&width=64&height=32" alt="TVPeru " width="64" height="32" title="TVPeru - Canal 197"><br> 712 # <span>197</span> 713 # </a> 714 # </li> 715 716 if ( my $chan = $_->look_down('_tag' => 'a') ) { 717 my ($chan_id, $chan_name, $chan_url, %chan_icon) = ('', '', '', ()); 718 $chan_id = trim( $chan->look_down('_tag' => 'span')->as_text() ); 719 $chan_url = $chan->attr('href'); 720 if ( my $chan_img = $chan->look_down('_tag' => 'img') ) { 721 $chan_name = trim( $chan_img->attr('alt') ); 722 $chan_icon{'src'} = $chan_img->attr('src'); 723 $chan_icon{'width'} = $chan_img->attr('width') if defined $chan_img->attr('width'); 724 $chan_icon{'height'} = $chan_img->attr('height') if defined $chan_img->attr('height'); 725 } 726 727 $chan_name="$chan_name ($chan_id)"; 728 if (not exists ${$channels} { $chan_id }) { 729 ${$channels} {$chan_id}=$chan_name; 730 push @ch_all, { 731 'display-name' => [[ encode("UTF-8",$chan_name), $LANG ],[$chan_id]], 732 'channel-num' => $chan_id , 733 'id' => "$chan_id.$PROVIDER_NAME", 734 'icon' => [ \%chan_icon ], 735 }; 736 } 737 } 738 } 739 } 740 741} 742 743###################################################################### 744## Get a list of possible countries 745sub get_countries( ) { 746 my $country_codes = { 'Argentina' => 'AR' 747 , 'Caribe' => 'CB' 748 , 'Chile' => 'CL' 749 , 'Colombia' => 'CO' 750 , 'Ecuador' => 'EC' 751 , 'Perú' => 'PE' 752 , 'Puerto Rico' => 'PR' 753 , 'Uruguay' => 'UY' 754 , 'Venezuela' => 'VE' 755 }; 756 757 my $tree = get_tree($countries_list_url); 758 my @options=$tree->look_down('_tag' => 'div', 'class' => 'box-menu')->look_down('_tag' => 'div', 'class' => 'items')->look_down('_tag' => 'a'); 759 my %countries; 760 foreach my $tag (@options){ 761 my %country; 762 $country{'name'} = $tag->as_text(); 763 $country{'url'} = $tag->attr('href') . "/"; 764 # Default URLs to https:// - programme guide is https; channel lists will redirect to http 765 $country{'url'} =~ s/^http:/https:/; 766 $country{'id'} = $country_codes->{$country{'name'}}; 767 768 # we won't do the Sky sites - they are very different to DirecTV 769 if ( $country{'name'} =~ /(SKY Brazil|SKY México)/ ) { 770 #print "Skipping country - $country{'name'} \n" unless $opt_quiet; 771 next; 772 } 773 774 if ( !defined $country_codes->{$country{'name'}} ) { 775 print "Unknown country - $country{'name'} \n" unless $opt_quiet; 776 next; 777 } 778 779 $countries{$tag->as_text()} = \%country; 780 } 781 return %countries; 782} 783 784###################################################################### 785## Return the user-selected country 786sub select_country( ) { 787 my %countries = get_countries(); 788 my @names = sort keys %countries; 789 my $choice = ask_choice("Select your country:", $names[0], @names); 790 return ( id=>$countries{$choice}{'id'}, name=>$choice, url=>$countries{$choice}{'url'} ); 791} 792 793###################################################################### 794## Return the channel icons from LyngSat 795sub get_channel_icons() { 796 my $bar = new XMLTV::ProgressBar("Trying to fetch channel icons for $country{name}", $#ch_all + 1) if not $opt_quiet; 797 my $tree=get_tree($channels_icon_url); 798 my $table=$tree->look_down( 799 '_tag'=>'table',sub { 800 defined $_[0]->attr('width') and $_[0]->attr('width')== '600' 801 } 802 ); 803 foreach my $ch (@ch_all){ 804 my $ch_num=$ch->{'channel-num'}; 805 my $tr=$table->look_down( 806 '_tag'=>'tr',sub { 807 my @td=$_[0]->content_list(); 808 defined $td[0] and $td[0]->as_text() =~ /\s*$ch_num\s*/ 809 } 810 ); 811 if (defined $tr){ 812 my $img=$tr->look_down( 813 '_tag'=>'img'); 814 $ch->{icon}=[ { src=>$img->attr('src')} ] if defined $img and defined $img->attr('src'); 815 } 816 $bar->update() if not $opt_quiet; 817 } 818 $bar->finish() if not $opt_quiet; 819} 820 821###################################################################### 822## Return the channel icons from the DirecTV site 823sub get_channel_icons_dtv() { 824 my $bar = new XMLTV::ProgressBar("Fetching channel icons for $country{name}", $#ch_all + 1) if not $opt_quiet; 825 my $tree=get_tree( get_channels_url() ); 826 my $table=$tree->look_down('_tag' => 'div', 'class' => 'guia-canales'); 827 828 foreach my $ch (@ch_all){ 829 my $ch_num=$ch->{'channel-num'}; 830 my $chan_img; 831 if ( my $chan_a = $table->look_down('_tag'=>'a', 'href'=>qr/ChannelDetail.aspx\?id=$ch_num/) ){ 832 $chan_img = $chan_a->look_down('_tag'=>'img'); 833 } 834 if (defined $chan_img) { 835 my %chan_icon; 836 $chan_icon{'src'} = $chan_img->attr('src'); 837 $chan_icon{'width'} = $chan_img->attr('width') if defined $chan_img->attr('width'); 838 $chan_icon{'height'} = $chan_img->attr('height') if defined $chan_img->attr('height'); 839 $ch->{icon}=[ \%chan_icon ]; 840 } 841 $bar->update() if not $opt_quiet; 842 } 843 $bar->finish() if not $opt_quiet; 844} 845 846###################################################################### 847## MAIN PROGRAM 848###################################################################### 849 850###################################################################### 851## get options 852# Get options. 853 854$opt_days = 3; # default 855$opt_offset = 0; # default 856$opt_quiet = 0; # default 857$opt_min_delay = 1; 858$opt_max_delay = 3; 859$opt_debug = 0; 860 861GetOptions( 862 'days=i' => \$opt_days, 863 'offset=i' => \$opt_offset, 864 'help' => \$opt_help, 865 'configure' => \$opt_configure, 866 'config-file=s' => \$opt_config_file, 867 'gui:s' => \$opt_gui, 868 'output=s' => \$opt_output, 869 'quiet' => \$opt_quiet, 870 'list-channels' => \$opt_list_channels, 871 'debug' => \$opt_debug, 872 'loc=s' => \$opt_loc, 873 'min-delay=f' => \$opt_min_delay, 874 'max-delay=f' => \$opt_max_delay, 875) or usage(0); 876 877$opt_min_delay = (0.5, $opt_min_delay)[0.5 < $opt_min_delay]; 878$opt_max_delay = (0.5, $opt_max_delay)[0.5 < $opt_max_delay]; 879 880die 'number of days must not be negative' if (defined $opt_days && $opt_days < 0); 881usage(1) if $opt_help; 882 883XMLTV::Ask::init($opt_gui); 884my $mode = XMLTV::Mode::mode( 885 'grab', # default 886 $opt_configure => 'configure', 887 $opt_list_channels => 'list-channels', 888); 889 890# File that stores which channels to download. 891my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_dtv_la', $opt_quiet); 892my @config_lines; # used only in grab mode 893if ($mode eq 'configure') { 894 XMLTV::Config_file::check_no_overwrite($config_file); 895}elsif ($mode eq 'grab') { 896 @config_lines = XMLTV::Config_file::read_lines($config_file); 897}elsif ($mode eq 'list-channels') { 898 # Config file not used. 899}else { 900 die 901} 902 903## Whatever we are doing, we need the channels data. 904##my %channels = get_channels(); # sets @ch_all 905my %channels; 906my @channels; 907 908###################################################################### 909## write configuration 910# 911if ($mode eq 'configure') { 912 open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; 913 %country= select_country(); 914 print CONF "country $country{id} $country{name} $country{url} \n"; 915 %channels = get_channels(); # sets @ch_all 916 917 # Ask about each channel. 918 my @chs = sort keys %channels; 919 my @names = map { $channels{$_} } @chs; 920 my @qs = map { "add channel $_?" } @names; 921 my @want = ask_many_boolean(1, @qs); 922 foreach (@chs) { 923 my $w = shift @want; 924 warn("cannot read input, stopping channel questions"), last 925 if not defined $w; 926 # No need to print to user - XMLTV::Ask is verbose enough. 927 928 # Print a config line, but comment it out if channel not wanted. 929 print CONF '#' if not $w; 930 my $name = shift @names; 931 print CONF "channel $_ $name\n"; 932 # TODO don't store display-name in config file. 933 } 934 close CONF or warn "cannot close $config_file: $!"; 935 say("Finished configuration."); 936 exit(); 937} 938 939# Not configuration, we must be writing something, either full 940# listings or just channels. 941 942die if $mode ne 'grab' and $mode ne 'list-channels'; 943 944# Options to be used for XMLTV::Writer. 945my %w_args; 946if (defined $opt_output) { 947 my $fh = new IO::File(">$opt_output"); 948 die "cannot write to $opt_output: $!" if not defined $fh; 949 $w_args{OUTPUT} = $fh; 950} 951$w_args{encoding} = $OUT_ENCODING; 952my $writer = new XMLTV::Writer(%w_args); 953$writer->start($HEAD); 954 955if ($mode eq 'list-channels') { 956 # must have a country before we can list channels! 957 die "please select a country ('--loc xx')" if (!defined $opt_loc || $opt_loc eq ''); 958 959 my %countries = get_countries(); 960 my ($key, $value); 961 while ( ($key, $value) = each %countries ) { 962 undef $key; 963 if ( $value->{'id'} eq uc($opt_loc) ) { 964 %country = ( id => $value->{'id'}, name => $value->{'name'}, url => $value->{'url'} ); 965 last; 966 } 967 } 968 969 %channels = get_channels(); # sets @ch_all based on %country 970 971 foreach (@ch_all) { 972 delete $_->{'channel-num'}; # not an DTD item! 973 $writer->write_channel($_) ; 974 } 975 $writer->end(); 976 exit(); 977} 978 979 980###################################################################### 981## We are producing full listings. 982die if $mode ne 'grab'; 983 984## Read configuration 985# @channels = id list of channels to grab 986# %channels = id => name of channels to grab 987# @ch_all = id + ch-num + display-name of channels to grab 988# 989my $line_num = 1; 990foreach (@config_lines) { 991 ++ $line_num; 992 next if not defined; 993 if (/^country:?\s+(\S+)\s+(\S+)\s+([^\#]+)/) { 994 %country=( id => $1, name=>$2, url=>$3 ); 995 }else{ 996 if (/^channel:?\s+(\S+)\s+([^\#]+)/) { 997 my $ch_did = $1; 998 my $ch_name = $2; 999 $ch_name =~ s/\s*$//; 1000 push @channels, $ch_did; 1001 #CAR 1002 push @ch_all, { 1003 'display-name' => [[ $ch_name, $LANG ],[$ch_did]], 1004 'channel-num' => $ch_did , 1005 'id'=> "$ch_did.$PROVIDER_NAME" }; 1006 $channels{$ch_did} = $ch_name; 1007 } else { 1008 warn "$config_file:$line_num: bad line\n"; 1009 } 1010 } 1011} 1012 1013###################################################################### 1014## begin main program 1015## Assume the listings source uses CET (see BUGS above). 1016my $now = DateCalc(parse_date('now'), "$opt_offset days"); 1017 1018die "No channels specified, run me with --configure\n" if not keys %channels; 1019die "No country specified, run me with --configure\n" if not keys %country; 1020my @to_get; 1021 1022## we change language if country is Brazil 1023$LANG="pt_BR" if $country{name} =~ /brazil/i; 1024 1025# Dates requested for programs listing 1026# @dates = list of dates to grab (yyyymmdd) 1027# 1028my $day=UnixDate($now,'%Q'); 1029my @dates; 1030for (my $i=0;$i<$opt_days;$i++) { 1031 push @dates, $day; 1032 #for each day 1033 $day=nextday($day); 1034 die if not defined $day; 1035} 1036 1037# Try to get channel icons 1038# adds %icon to @ch_all 1039# 1040##get_channel_icons(); # LyngSat 1041get_channel_icons_dtv(); # DirecTV 1042 1043# Write the <channel> elements 1044# data from %channels 1045# @to_get = array of @dates (yyyymmdd), chan-id (e.g. 122), chan-name (e.g. 122.dtv.la) 1046# 1047foreach my $ch_did (@channels) { 1048 my $index=0; 1049 my $ch_name=$channels{$ch_did}; 1050 my $ch_xid="$ch_did.$PROVIDER_NAME"; 1051 while (${$ch_all[$index]}{'id'} ne $ch_xid) { 1052 $index=$index+1; 1053 } 1054 my $ch_num=${ch_all[$index]}{'channel-num'}; 1055 my $to_add={ 1056 id => $ch_xid, 1057 'display-name' => [ 1058 [ encode($OUT_ENCODING, $ch_name), $LANG ], 1059 [ $ch_num ] ] 1060 }; 1061 $to_add->{icon}=${ch_all[$index]}{icon} if (exists ${ch_all[$index]}{icon} ); 1062 $writer->write_channel($to_add); 1063 # build array of station-days to grab 1064 push @to_get, [ \@dates, $ch_xid, $ch_num ]; 1065} 1066 1067# This progress bar is for both downloading and parsing. Maybe 1068# they could be separate. 1069##my $mainbar = new XMLTV::ProgressBar("getting listings for $country{name}", $#to_get + 1) if not $opt_quiet; 1070$mainbar = new XMLTV::ProgressBar("Getting listings for $country{name}", (@dates * 6) ) if not $opt_quiet; 1071 1072# Grab requested data 1073# [ <v1.4 and write the output xml ] 1074# [v1.4 - now done all together rather than one station-day at a time] 1075##foreach (@to_get) { 1076## foreach (get_channel_programs($_->[0], $_->[1], $_->[2])) { 1077## $writer->write_programme($_); 1078## } 1079## $mainbar->update() if not $opt_quiet; 1080##} 1081 1082# Data store before being written as XML 1083my @programmes = (); 1084 1085# Fetch the data 1086# (note the params are all globals so the params aren't strictly necessary 1087# but let's try for some better programming practice ;-) 1088get_channel_programs(\@dates, \@channels, \@ch_all, \@programmes); 1089 1090# Write the <programme> elements 1091foreach (@programmes) { 1092 $writer->write_programme($_); 1093} 1094 1095# end the progress bar 1096$mainbar->finish() if not $opt_quiet; 1097 1098# close xml file 1099$writer->end(); 1100 1101# Signal that something went wrong if there were warnings. 1102exit(1) if $warnings; 1103 1104# All data fetched ok. 1105#debug "Exiting without warnings."; 1106exit(0); 1107