1#!/usr/local/bin/perl
2=pod
3
4=head1 NAME
5
6tv_grab_dtv_la - Grab TV listings for Direct TV Latin America
7
8=head1 SYNOPSIS
9
10tv_grab_dtv_la --help
11
12tv_grab_dtv_la [--config-file FILE] --configure [--gui OPTION]
13
14tv_grab_dtv_la [--config-file FILE] [--output FILE] [--days N]
15               [--offset N] [--min-delay N] [--max-delay N] [--quiet]
16
17tv_grab_dtv_la --list-channels --loc [ar | cb | cl | co | ec | pe | pr | uy | ve]
18
19tv_grab_dtv_la --capabilities
20
21tv_grab_dtv_la --version
22
23=head1 DESCRIPTION
24
25Output TV listings for Direct TV channels available in Latin America.
26Listings for the following countries are currently available:
27Argentina, Caribbean ('cb'), Chile, Colombia, Ecuador,
28Peru, Puerto Rico, Trinidad, Uruguay, Venezuela.
29
30The TV listings come from http://directstage.directvla.com/
31The grabber relies on parsing HTML so it might stop working at any time.
32
33First run B<tv_grab_dtv_la --configure> to choose, first of all your country
34and then which channels you want to download. Then running B<tv_grab_dtv_la>
35with no arguments will output listings in XML format to standard output.
36
37The grabber doesn't generate stop times, so you may want to run
38tv_sort on the output to generate them.
39
40B<--configure> Prompt for which channels, and write the configuration file.
41
42B<--config-file FILE> Set the name of the configuration file, the
43default is B<~/.xmltv/tv_grab_dtv_la.conf>.  This is the file written by
44B<--configure> and read when grabbing.
45
46B<--gui OPTION> Use this option to enable a graphical interface to be used.
47OPTION may be 'Tk', or left blank for the best available choice.
48Additional allowed values of OPTION are 'Term' for normal terminal output
49(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.
50
51B<--output FILE> Write to FILE rather than standard output.
52
53B<--days N> Grab N days.  The default is 3.
54
55B<--offset N> Start N days in the future.  The default is to start
56from today.
57
58B<--min-delay N> You must insert a delay between page requests to avoid
59unnecessary load on the website. If you try to grab pages too quickly then
60it's likely you will get banned by the website providers (and may get
61all other xmltv users banned as well!).
62Suggested value: 1 (seconds)
63
64B<--max-delay N> Maximum delay between web page fetches.
65Suggested value: 3 (seconds)
66
67B<--quiet> Suppress the progress messages normally written to standard
68error.
69
70B<--capabilities> Show which capabilities the grabber supports. For more
71information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>
72
73B<--version> Show the version of the grabber.
74
75B<--help> Print a help message and exit.
76
77=head1 SEE ALSO
78
79L<xmltv(5)>, L<tv_grab_ar>.
80
81=head1 AUTHOR
82
83Most of the grabber was made by Lic. Christian A. Rodriguez <car@cespi.unlp.edu.ar>, with a
84lot of help from others, specially Joaquin Salvarredy <jsalvarredy@cespi.unlp.edu.ar> who
85tested the grabber from its early versions and Lic. Nicolas Macia <nmacia@cespi.unlp.edu.ar>
86
87=head1 BUGS
88
89This grabber extracts all information from Direct TV Latin America website. Any change in this
90web page may cause this grabber to stop working.
91
92=cut
93
94# Author's TODOs & thoughts
95#
96# Add better channel names
97#
98(	#(facilitate code-folding)
99#
100# 2016-03-14
101#
102#  URLS
103#    http://www.directv.com.ar/
104#    https://www.directv.com.ar/programacion/guia-de-programacion
105#    http://www.directv.com.ar/programacion/guia-de-canales
106#
107#    http://www.directv.cl/
108#    https://www.directv.cl/guia/guia.aspx?type=&link=nav/
109#    http://www.directv.cl/planes/guia-de-canales
110#
111#    http://www.directv.com.co/
112#    https://www.directv.com.co/guia/guia.aspx?type=
113#    http://www.directv.com.co/paquetes/guia-de-canales
114#
115#    http://www.directv.com.ec/
116#    https://www.directv.com.ec/guia/guia.aspx?type=
117#    http://www.directv.com.ec/planes/guia-de-canales
118#
119#    http://www.directv.com.pe/
120#    https://www.directv.com.pe/guia/guia.aspx?type=
121#    http://www.directv.com.pe/paquetes/guia-de-canales
122#
123#    http://www2.directvpr.com/
124#    https://www.directvpr.com/guia/guia.aspx?type=&link=nav
125#    http://www.directvpr.com/guia-de-canales?link=nav
126
127#    http://www.directv.com.uy/
128#    https://www.directv.com.uy/guia/guia.aspx?type=
129#    http://www.directv.com.uy/paquetes/guia-de-canales
130#
131#    http://www.directv.com.ve
132#    https://www.directv.com.ve/guia/guia.aspx
133#    http://www.directv.com.ve/planes/guia-de-canales
134#
135);
136
137
138######################################################################
139## REQUIRED LIBRARIES
140######################################################################
141use warnings;
142use strict;
143
144use XMLTV;
145use XMLTV::Version '$Id: tv_grab_dtv_la,v 1.12 2016/03/15 01:13:11 knowledgejunkie Exp $ ';
146use XMLTV::Capabilities qw/baseline manualconfig/;
147use XMLTV::Description 'Latin America Direct TV listings';
148use XMLTV::Memoize;
149use XMLTV::ProgressBar;
150use XMLTV::Ask;
151use XMLTV::Config_file;
152use XMLTV::Mode;
153use XMLTV::Date;
154use XMLTV::DST;
155use XMLTV::Usage <<END
156$0: get Latin America Direct-TV listings in XMLTV format
157To configure: $0 --configure [--config-file FILE]
158To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
159		[--offset N] [--quiet]
160To list channels: $0 --list-channels
161To show capabilities: $0 --capabilities
162To show version: $0 --version
163END
164;
165#use HTML::Form;
166use HTML::TreeBuilder;
167use Getopt::Long;
168use Date::Manip;
169use Date::Parse;
170use Date::Language;
171use LWP::UserAgent;
172use HTTP::Cookies;
173use Encode qw(from_to is_utf8 _utf8_off encode);
174use utf8;
175use JSON::PP;
176use Data::Dumper;
177
178
179# ${Log::TraceMessages::On} = 1;
180# to switch TRACE in remove the comment from prev. line
181
182# Use Log::TraceMessages if installed.
183BEGIN {
184	eval { require Log::TraceMessages };
185	if ($@) {
186	*t = sub {};
187	*d = sub { '' };
188	}
189	else {
190	*t = \&Log::TraceMessages::t;
191	*d = \&Log::TraceMessages::d;
192	Log::TraceMessages::check_argv();
193	}
194}
195
196
197######################################################################
198## GLOBAL VARIABLES
199######################################################################
200my $warnings = 0;
201
202my ($opt_days, $opt_offset, $opt_help, $opt_output,
203	$opt_configure, $opt_config_file, $opt_gui,
204	$opt_quiet, $opt_list_channels, $opt_loc,
205	$opt_min_delay, $opt_max_delay, $opt_debug);
206
207# Attributes of the root element in output.
208my $HEAD = {
209			'source-info-url'     => 'http://directstage.directvla.com/',
210			'source-data-url'     => 'http://directstage.directvla.com/',
211			'generator-info-name' => 'tv_grab_dtv_la',
212			'generator-info-url'  => 'http://xmltv.org/',
213			};
214
215my $channels_icon_url="http://www.lyngsat.com/packages/directvlatin.html";
216my $countries_list_url="http://directstage.directvla.com/";
217
218# So we are not affected by winter/summer timezone
219$XMLTV::DST::Mode='none';
220
221# timezone to use (for all countries!)
222my $TZ="-0300";
223
224# default language
225my $LANG="es";
226my $OUT_ENCODING="UTF-8";
227
228# Selected country
229my %country;
230
231# Full list of channels
232my @ch_all;
233my $CHANNELS_URL=undef;
234
235# Providers name for creating unique channel id
236my $PROVIDER_NAME="dtv.la";
237
238# Progressbar
239my $mainbar;
240
241# Private UserAgent
242my $cookies = HTTP::Cookies->new;
243my $ua = LWP::UserAgent->new;
244$ua->cookie_jar($cookies);
245
246$ua->agent("xmltv/$XMLTV::VERSION");
247$ua->parse_head(0);
248$ua->env_proxy;
249
250
251# undocumented --cache option.
252# not sure this will work with ajax post requests ?
253XMLTV::Memoize::check_argv('get_tree');
254
255######################################################################
256## SUBROUTINES
257######################################################################
258
259######################################################################
260## Returns a trimmed string
261sub trim {
262	my $string = shift;
263	$string =~ s/^\s+|\s+$//g  if defined $string;
264	return $string;
265}
266
267######################################################################
268## Returns a TreeBuilder instance
269
270# You must insert a delay between page requests to avoid
271# unnecessary load on the website. If you try to grab pages too quickly then
272# it's likely you will get banned by the website providers (and may get
273# all other xmltv users banned as well - it's trivial to ban by user-agent string).
274#
275my $last_get_time;
276#
277sub get_tree ($;$$) {
278	my $url = shift;
279	my $method = shift || 'get';
280	my $data = shift;
281	my $r;
282
283	print STDERR "$method: $url ".($data?"[$data]":'')." \n" if $opt_debug;
284
285	# let's not overload the website with too many requests so we'll restrict the request frequency (as per Get_nice)
286
287	my $Delay = $opt_max_delay - $opt_min_delay; 	# in seconds
288	my $MinDelay = $opt_min_delay; 					# in seconds
289
290	if (defined $last_get_time) {
291		# A page has already been retrieved recently.  See if we need
292		# to sleep for a while before getting the next page - being
293		# nice to the server.
294		my $next_get_time = $last_get_time + (rand $Delay) + $MinDelay;
295		my $sleep_time = $next_get_time - time();
296		sleep $sleep_time if $sleep_time > 0;
297	}
298
299	if (!defined $method || lc($method) eq 'get') {
300		$r = $ua->get($url);
301
302	} elsif (lc($method) eq 'post') {
303		$r = $ua->post($url, $data);	# $data must be a hash
304
305	} elsif (lc($method) eq 'jsonpost') {
306		#
307		# create the http request
308		my $req = HTTP::Request->new( 'POST', $url );
309		##$req->header( 'Content-Type' => 'application/json' );
310		$req->content_type( 'application/json; charset=utf-8' );
311		$req->content( $data );			# data must be json
312
313		# execute the request
314		$r = $ua->request($req);
315
316	} else {
317		die "unknown fetch method '$method'";
318	}
319	$last_get_time = time();
320
321	#print STDERR Dumper($r);die;
322	die "Could not fetch $url". (lc($method) eq 'jsonpost'?"[$data]":'') .", error: " . $r->status_line  if ($r->is_error);
323
324	my $t;
325	if (lc($method) eq 'jsonpost') {
326		# expect a json reply!
327		$t = JSON::PP->new()->utf8(1)->decode($r->content) or die "cannot parse content of $url\n";
328
329	} else {
330		$t = new HTML::TreeBuilder;
331		#$t->utf8_mode(1);
332		$data=$r->decoded_content('default_charset'=>'utf8');
333		#$data=decode('UTF-8',$data) if (is_utf8($data));
334		$t->parse($data) or die "Cannot parse content of Tree\n";
335		$t->eof;
336	}
337	return $t;
338}
339
340######################################################################
341## Bump a YYYYMMDD date by one.
342sub nextday {
343	my $d = shift;
344	my $p = parse_date($d);
345	my $n = DateCalc($p, '+ 1 day');
346	return UnixDate($n, '%Q');
347}
348
349######################################################################
350## Returns the URL for grabbing channels
351sub get_channels_url {
352	if (not defined $CHANNELS_URL){
353		die "No country specified, run me with --configure\n" if not keys %country;
354
355		# as at Apr 2014 it looks like they're still working on the website: all the Caribbean channels
356		# point to the same place.
357		if ( $country{'id'} eq 'CB' ) {
358			$CHANNELS_URL = "http://www.directvcaribbean.com/tt/channel-description";
359		}
360		else {
361			# although some of the sites have this as a subdir (e.g. under 'paquetes' or 'planes') it still
362			# seems to work without that
363			$CHANNELS_URL = $country{url} . 'guia-de-canales';
364		}
365
366	}
367
368	return $CHANNELS_URL;
369}
370
371######################################################################
372## Returns the URL for grabbing specified channel programs
373sub get_channel_programs_url($) {
374	##my $ch_id=shift;
375	##my $base_url=get_channels_url();
376	##$base_url=~ s/default/detailch/;
377	##return "$base_url?c=$ch_id&n=chname";
378
379	# e.g. http://www.directv.com.ar/guia/Services/ProgrammingGuideAjax.asmx/GetProgramming
380
381	return $country{'url'} . 'guia/Services/ProgrammingGuideAjax.asmx/GetProgramming';
382}
383
384######################################################################
385## Returns the URL for grabbing programme details
386sub get_program_detail_url() {
387
388	# e.g. http://www.directv.com.ar/guia/Services/ProgrammingGuideAjax.asmx/GetProgrammingDetail
389
390	return $country{'url'} . 'guia/Services/ProgrammingGuideAjax.asmx/GetProgrammingDetail';
391}
392
393######################################################################
394## Converts the given datetime format to the needed UTC format
395sub datetime_for_program( $;$ ){
396	my ($date,$strdt)=@_;
397	$strdt=~ /^(\w*)\s+(\d{1,2}:\d{1,2})/;
398	if ( defined $1 and defined $2) {
399		my $weekday=$1;
400		my $time=UnixDate($2,"%H:%M");
401		if ( UnixDate($date,"%a") eq $weekday ){
402		    return utc_offset("$date $time", $TZ)
403		}
404	}
405	return undef;
406}
407
408######################################################################
409## Returns channel programs for the specified date and channel id
410sub get_channel_programs ( $$$$ ) {
411	my ($ref_dates, $ref_channels, $ref_ch_all, $ref_programmes) = @_;
412
413	# convert @dates & @channels into hashes for faster searching
414	my %_dates = map { $_ => 1 } @$ref_dates;
415	my %_channels = map { $_ => 1 } @$ref_channels;
416
417	my @_ch_all = @$ref_ch_all;
418
419	# temporary store for programmes we fetch (used for detecting duplicates and clumps)
420	my $programmes = {};
421
422	# for parsing non-English language dates
423	my $lang;
424	if ( $country{'id'} eq 'CB' ) {	# Caribe is currently in English
425		$lang = Date::Language->new('English');
426	} else {
427		$lang = Date::Language->new('Spanish');
428	}
429
430	# site now uses a jQuery AJAX POST with JSON content in UTF-8
431	# e.g. { "day":19, "time":"12","minute":"30", "month":"4", "year":"2014", "onlyFavorites":"N" }
432	#
433	# data are avialable in a 4-hour windows (all channels combined)
434	#
435	foreach my $date (@$ref_dates) {
436		my ( $_y, $_m, $_d ) = $date =~ /(\d\d\d\d)(\d\d)(\d\d)/;
437		for (my $i=0; $i<24; $i+=4) {
438			my $_h = substr("0$i", -2, 2);
439
440			my $data = '{ "day":'.$_d.', "time":"'.$_h.'","minute":"00", "month":"'.$_m.'", "year":"'.$_y.'", "onlyFavorites":"N" }';
441			##print STDERR $data."\n";
442
443
444			my $json = get_tree( get_channel_programs_url(undef), 'jsonpost', $data );
445			##print STDERR Dumper($json);die;
446
447			# response is a JSON string containing just one k:v pair, 'd' => "..."
448			#  (see http://www.directv.com.ar/guia/js/Program-Guide/ProgrammingGuideAjax.js for details)
449
450			# we don't need the overhead of TreeBuilder - we'll go 'old school' and use a regexp
451			my (@li) = $json->{'d'} =~ m/(<li.*?<\/li>)/g;
452			##print Dumper(@li);die;
453
454			foreach my $li (@li) {
455
456				# <li id="PG_ctl02_Prog_ctl00_liItem" class="" style="width:267px" eventId="121190335202" channel="121"><a href="javascript:return false;" class="ChannelArrowLeft"></a><span id="PG_ctl02_Prog_ctl00_HDCh" style="padding-left:10px;"><literal id="PG_ctl02_Prog_ctl00_litHDCh">Las aventuras de Robin Hood</literal></span><a href="/guia/RecordBox.aspx?iframe&eventId=121190335202&page=grid" class="Action loginAvailable"></a></li>
457				# <li id="PG_ctl02_Prog_ctl01_liItem" class=" toolTip" style="width:99.5px" title="Enciclopedia digital del cosmos" eventId="121190335203" channel="121"><span id="PG_ctl02_Prog_ctl01_HDCh"><literal id="PG_ctl02_Prog_ctl01_litHDCh">Enciclopedia di...</literal></span><a href="/guia/RecordBox.aspx?iframe&eventId=121190335203&page=grid" class="Action loginAvailable"></a></li>
458
459				# <li id="PG_ctl214_Prog_ctl00_liItem" class=" PpvVenezuela toolTip last disabled" style="width:806px" eventid="" channel="1003" original-title="Programación no disponible"><span id="PG_ctl214_Prog_ctl00_HDCh" style="padding-right:10px;" class="HdActive"><literal id="PG_ctl214_Prog_ctl00_litHDCh">Programación no disponible</literal><span id="PG_ctl214_Prog_ctl00_imgIcon" class="3d"></span></span><a href="javascript:return false;" class="ChannelArrowRight"></a><a href="https://www.directv.com.ve/midirectv/PPVBrowse.aspx?language=&amp;section=DOD&amp;film=" class="Action loginAvailable"></a></li>
460
461				# Method:
462				# The programme schedule is returned as a 4-hour window. Unfortunately the html contains nothing of use
463				# other than the title, channel id and eventId. There isn't even a start time! So:
464				# 1. Extract all the <li> items
465				# 2. Ignore any which aren't for a requested channel
466				# 3. Ignore any which have already started (as they will have already been picked up in a previous 4-hour window)
467				# 4. Fetch the programme details using the eventId
468				# 5. Parse the prog details and add to a hash
469				#
470
471				# parse the <li> element
472				my ( $eventId, $channelId, $hasStarted ) = $li =~ m/eventId="(\d*)"\schannel="(\d*)".*?(?(?=.*ChannelArrowLeft)(ChannelArrowLeft)|())/;
473				##if ($hasStarted) {print STDERR "skipping $eventId, $channelId\n";}
474				next if $hasStarted;						# if prog has already started
475				next if ! $_channels{ $channelId };			# if channel not wanted
476				next if !defined $eventId || $eventId eq '';# e.g. Programación no disponible (can't output anything since no start/stop time!)
477
478				# post content: { "eventId":121190335202, "day":20, "time":"4","minute":"0", "month":"4", "year":"2014" }
479				my $data = '{ "eventId":'.$eventId.', "day":'.$_d.', "time":"'.$_h.'","minute":"0", "month":"'.$_m.'", "year":"'.$_y.'" }';
480
481				my $json = get_tree( get_program_detail_url(), 'jsonpost', $data );
482				##print STDERR Dumper($json);die;
483
484				my $t = HTML::TreeBuilder->new()->parse( $json->{'d'} ) or die "cannot parse content of programme detail\n";
485				$t->eof;
486				##$t->dump();die;
487
488				my $p;	# programme
489
490				my $div;  if ( my $_t = $t->look_down('_tag'=>'h2') ) { $div = $_t->parent(); }
491				if (!defined $div) {
492					# why is it not?
493					print STDERR 'Warn: No programme description (no <h2> element for "eventId":'.$eventId.', "day":'.$_d.', "time":"'.$_h.")\n";
494					next;
495				}
496
497				my $h;	# html (tree) element
498
499				if ( $h = $div->look_down('_tag'=>'h2') ) {
500					if ( my $h_ = $div->look_down('_tag'=>'img', 'alt'=>'HD program') ) {
501						$p->{'video'}->{'quality'} = 'HDTV';
502					}
503					$p->{'title'} = trim( $h->as_text() );
504					$h->detach;
505				}
506				# 'title' is mandatory in the DTD. If we don't have one then set to unknown
507				$p->{'title'} = ($LANG eq 'pt_BR' ? 'ignorado' : 'incógnito') if (!defined $p->{'title'} || $p->{'title'} eq '');
508
509
510				# 1st <p> is the description
511				if ( $h = $div->look_down('_tag'=>'p') ) {
512					$p->{'desc'} = trim( $h->as_text() );
513					$h->detach;
514				}
515
516				# 2nd <p> is the start time and duration
517				if ( $h = $div->look_down('_tag'=>'p') ) {
518					my $h_ = trim( $h->as_text() );
519					my ($_junk, $_date, $_dur) = $h_ =~ m/(Comienza|Start):\s*(.*?)\|(.*?)$/s;		# Caribe = "Start:"
520
521					# Date::Language doesn't currently do Portuguese
522					#  (the Sky BR site isn't handled in this grabber anyway)
523					my $dt;
524					if ( $country{'id'} eq 'BR' ) {
525						die "\n Sorry I don't speak Portuguese \n";
526					} else {
527						$dt = $lang->str2time($_date, $TZ);
528					}
529
530					$p->{'start_epoch'} = $lang->str2time($_date, $TZ);
531					( $p->{'duration'} ) = $_dur =~ /(\d*)\s(?=minutos|minutes)/;
532					$p->{'stop_epoch'} = $p->{'start_epoch'} + ( $p->{'duration'} * 60 )  if $p->{'duration'};
533					$p->{'start'} = $lang->time2str( "%Y%m%d%H%M%S %z", $p->{'start_epoch'}, $TZ );
534					$p->{'stop'} = $lang->time2str( "%Y%m%d%H%M%S %z", $p->{'stop_epoch'}, $TZ );
535					$h->detach;
536				}
537
538				# <div> class "Rank" = rating & programme url
539				if ( $h = $div->look_down('_tag'=>'div', 'class'=>qw/Rank/) ) {
540					if ( my $h = $h->look_down('_tag'=>'p') ) {
541						my $h_ = trim( $h->as_text() );
542						( $p->{'rating'} ) = $h_ =~ m/Rating:\s*(\S*)\s/s;
543					}
544
545					if ( my $h = $h->look_down('_tag'=>'div') ) {
546						if ( my $h_ = $h->look_down('_tag'=>'a') ) {
547							my $h__ = trim( $h_->attr('href') );
548							$h__ = $country{'url'} . $h__  if ( $h__ !~ /^http/ );
549							$p->{'url'} = $h__;
550						}
551					}
552					$h->detach;
553				}
554
555
556
557				# Reformat the data to Create the data structure for the programme
558				my $p_out = {};
559				$p_out->{'channel'} 	= $channelId . '.' . $PROVIDER_NAME;
560				$p_out->{'title'} 		= [[ encode($OUT_ENCODING, $p->{'title'}), $LANG ]];
561				$p_out->{'start'} 		= $p->{'start'};
562				$p_out->{'stop'} 		= $p->{'stop'}  if (defined $p->{'stop'} && $p->{'stop'} ne '');
563				$p_out->{'desc'} 		= [[ encode($OUT_ENCODING, $p->{'desc'}), $LANG ]]  if (defined $p->{'desc'} && $p->{'desc'} ne '');
564				$p_out->{'sub-title'} 	= [[ encode($OUT_ENCODING, $p->{'sub_title'}), $LANG ]]  if (defined $p->{'sub_title'} && $p->{'sub_title'} ne '');
565				$p_out->{'rating'} 		= [[ $p->{'rating'} ]]  if (defined $p->{'rating'} && $p->{'rating'} ne '');
566				$p_out->{'url'} 	  	= [ encode($OUT_ENCODING, $p->{'url'}) ]  if (defined $p->{'url'} && $p->{'url'} ne '');
567				$p_out->{'video'} 		= $p->{'video'}  if (defined $p->{'video'});
568
569				# store the programme avoiding duplicates
570				# also check for duplicate start times and set clumpidx
571				if ( defined $programmes->{ $channelId }->{ $p->{'start_epoch'} } ) {
572					# duplicate prog or contemporary?
573					my $dup = 0;
574					foreach my $_p ( @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } } ) {
575						$dup = 1  if ( $_p->{'title'}[0][0] eq $p_out->{'title'}[0][0] );	# duplicate
576					}
577					next if $dup;	# ignore duplicates (go to next <li> programme element)
578					if (!$dup) {
579						# contemporary programme so set clumpidx
580						my $numclumps = scalar @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } }  + 1;
581						# set (or adjust) clumpidx of existing programmes
582						my $i = 0;
583						foreach my $_p ( @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } } ) {
584							$_p->{'clumpidx'} = "$i/$numclumps";
585							$i++;
586									}
587						# set clumpidx for new programme
588						$p_out->{'clumpidx'} = "$i/$numclumps";
589					}
590				}
591
592				# store the programme in our temporary store
593				push @{ $programmes->{ $channelId }->{ $p->{'start_epoch'} } }, $p_out;
594
595			}
596
597			$mainbar->update() if not $opt_quiet;
598		}
599
600	}
601
602
603	# All data has been gathered. We can now write the programme element to the output.
604	#
605	foreach ( keys %{$programmes} ) {
606		my $_ch_progs = $programmes->{$_};
607		foreach ( sort keys %{$_ch_progs} ) {
608			my $_dt_progs = $_ch_progs->{$_};
609			foreach (@{ $_dt_progs }) {
610				push @{$ref_programmes}, $_;
611			}
612		}
613	}
614}
615
616######################################################################
617## Returns the list of channels
618#
619# Note: I've noticed that sometimes there's more channels on the actual programme schedule page
620#     than in the channels guide page :-(  So we may need to switch and get the list of channels
621#     from the AJAX fetch on the schedules page (although the icons may be smaller?)
622#
623sub get_channels {
624	my $bar = new XMLTV::ProgressBar("Getting list of channels for $country{name}", 1) if not $opt_quiet;
625
626	my %channels;
627	my $url=get_channels_url();
628
629	# Get channels that are transmiting now
630	my $tree = get_tree($url);
631	get_channels_from_tree($tree,\%channels);
632	# We will try to find more channels for later hours
633	#get_channels_for_later_hours($tree,\%channels);
634
635	# Finish using Tree
636	$tree=undef;
637	$bar->update() && $bar->finish() if not $opt_quiet;
638	return %channels;
639}
640
641######################################################################
642## Simulate a form filling to retrieve more channels for later hours
643sub get_channels_for_later_hours() {
644	my ($tree,$channels) = @_;
645
646	# First we get the form elemento to call iteratively for each option from a select
647	my $form_elem = $tree->look_down(
648		"_tag"=>"form", sub {
649		    defined $_[0]->attr('name') and $_[0]->attr('name')=~ /Form1/i
650		}
651	);
652	# The name of the select element is:
653	my $search_for_input="ddlTime";
654	my %needed_form_elems=('ddlTime','select','ddlDay','select','btnSubmit','input');
655
656	# Form to call iteratively
657	my $form=HTML::Form->parse($form_elem->as_HTML(),get_channels_url());
658	my $input;
659
660	foreach my $ninput (keys %needed_form_elems){
661		$input=$form->find_input($ninput);
662
663		# There is a bug in the source HTML. The field we need is outside the form tag
664		if (not defined $input) {
665		# We try to fix this problem
666			my $broken_elem = $tree->look_down(
667				"_tag"=>$needed_form_elems{$ninput}, sub {
668						defined $_[0]->attr('name') and $_[0]->attr('name')=~ /$ninput/i
669				    }
670			);
671			$form_elem->insert_element($broken_elem);
672			$form=HTML::Form->parse($form_elem->as_HTML(),get_channels_url());
673			$input=$form->find_input($ninput);
674			die "Cannot retrieve field $ninput. Aborting" if (not defined $input);
675		}
676	}
677	# Now for each value of the select, we will call get_channels_from_tree subroutine
678	$input=$form->find_input($search_for_input);
679	my $default_value=$input->value;
680	foreach ($input->possible_values) {
681		if ($_ != $default_value) {
682			$form->value($search_for_input,$_);
683			my $r=$ua->request($form->click);
684			die "Error doing automatic form filling. Aboring" if ($r->is_error);
685			my $t = new HTML::TreeBuilder;
686			#$t->utf8_mode(1);
687			my $data=$r->decoded_content('default_charset'=>'utf8');
688			#$data=from_to($data,'UTF-8',$OUT_ENCODING) if (is_utf8($data));
689			$t->parse($data) or die "Cannot parse content of Tree\n";
690			$t->eof;
691			get_channels_from_tree($t,$channels);
692		}
693	}
694}
695
696######################################################################
697## Return the list of channels for a tree representation of an HTML page
698sub get_channels_from_tree( ) {
699	my ($tree,$channels) = @_;
700
701	# see if there's a 'pMain' so we can ignore the CMS content (which contains the on-demand channels)
702	my $chan_div = $tree->look_down('_tag' => 'div', 'id' => 'pMain');
703	$tree = $chan_div  if $chan_div;
704
705	my @chan_groups = $tree->look_down('_tag' => 'div', 'class' => 'guia-canales')->look_down('_tag' => 'div', 'class' => 'combo-canal-content');
706
707	foreach (@chan_groups) {
708		my @chan_elems = $_->look_down('_tag' => 'li');
709		foreach (@chan_elems) {
710			# <li>
711			#   <a itemprop="makesOffer" href="http://www.directv.com.pe/guia/ChannelDetail.aspx?id=197"><img src="http://www.ondirectv.com/Thumbnail.ashx?image=LOGOS/canales/v2/197.png&amp;width=64&amp;height=32" alt="TVPeru " width="64" height="32" title="TVPeru  - Canal 197"><br>
712			# 	  <span>197</span>
713			#	 </a>
714			# </li>
715
716			if ( my $chan = $_->look_down('_tag' => 'a') ) {
717				my ($chan_id, $chan_name, $chan_url, %chan_icon) = ('', '', '', ());
718				$chan_id = trim( $chan->look_down('_tag' => 'span')->as_text() );
719				$chan_url = $chan->attr('href');
720				if ( my $chan_img = $chan->look_down('_tag' => 'img') ) {
721					$chan_name = trim( $chan_img->attr('alt') );
722					$chan_icon{'src'} = $chan_img->attr('src');
723					$chan_icon{'width'} = $chan_img->attr('width')  if defined $chan_img->attr('width');
724					$chan_icon{'height'} = $chan_img->attr('height')  if defined $chan_img->attr('height');
725				}
726
727				$chan_name="$chan_name ($chan_id)";
728				if (not exists  ${$channels} { $chan_id }) {
729					${$channels} {$chan_id}=$chan_name;
730					 push @ch_all, {
731									'display-name' => [[ encode("UTF-8",$chan_name), $LANG ],[$chan_id]],
732									'channel-num' => $chan_id  ,
733									'id' => "$chan_id.$PROVIDER_NAME",
734									'icon' => [ \%chan_icon ],
735									 };
736				}
737			}
738		}
739	}
740
741}
742
743######################################################################
744## Get a list of possible countries
745sub get_countries( ) {
746	my $country_codes = { 'Argentina' => 'AR'
747                        , 'Caribe' => 'CB'
748                        , 'Chile' => 'CL'
749                        , 'Colombia' => 'CO'
750                        , 'Ecuador' => 'EC'
751                        , 'Perú' => 'PE'
752                        , 'Puerto Rico' => 'PR'
753                        , 'Uruguay' => 'UY'
754                        , 'Venezuela' => 'VE'
755                        };
756
757	my $tree = get_tree($countries_list_url);
758	my @options=$tree->look_down('_tag' => 'div', 'class' => 'box-menu')->look_down('_tag' => 'div', 'class' => 'items')->look_down('_tag' => 'a');
759	my %countries;
760	foreach my $tag (@options){
761		my %country;
762		$country{'name'} = $tag->as_text();
763		$country{'url'} = $tag->attr('href') . "/";
764        # Default  URLs to https://  - programme guide is https; channel lists will redirect to http
765        $country{'url'} =~ s/^http:/https:/;
766		$country{'id'} = $country_codes->{$country{'name'}};
767
768		# we won't do the Sky sites - they are very different to DirecTV
769		if ( $country{'name'} =~ /(SKY Brazil|SKY México)/ ) {
770			#print "Skipping country - $country{'name'} \n" unless $opt_quiet;
771			next;
772		}
773
774		if ( !defined $country_codes->{$country{'name'}} ) {
775			print "Unknown country - $country{'name'} \n" unless $opt_quiet;
776			next;
777		}
778
779			$countries{$tag->as_text()} = \%country;
780	}
781	return %countries;
782}
783
784######################################################################
785## Return the user-selected country
786sub select_country( ) {
787	my %countries = get_countries();
788	my @names = sort keys %countries;
789	my $choice = ask_choice("Select your country:", $names[0], @names);
790	return ( id=>$countries{$choice}{'id'}, name=>$choice, url=>$countries{$choice}{'url'} );
791}
792
793######################################################################
794## Return the channel icons from LyngSat
795sub get_channel_icons() {
796	my $bar = new XMLTV::ProgressBar("Trying to fetch channel icons for $country{name}", $#ch_all + 1) if not $opt_quiet;
797	my $tree=get_tree($channels_icon_url);
798	my $table=$tree->look_down(
799		'_tag'=>'table',sub {
800			defined $_[0]->attr('width') and $_[0]->attr('width')== '600'
801		}
802	);
803	foreach my $ch (@ch_all){
804		my $ch_num=$ch->{'channel-num'};
805		my $tr=$table->look_down(
806			'_tag'=>'tr',sub {
807				my @td=$_[0]->content_list();
808				defined $td[0] and $td[0]->as_text() =~ /\s*$ch_num\s*/
809		    }
810		);
811		if (defined $tr){
812			my $img=$tr->look_down(
813				'_tag'=>'img');
814			$ch->{icon}=[ { src=>$img->attr('src')} ] if defined $img and defined $img->attr('src');
815		}
816		$bar->update() if not $opt_quiet;
817	}
818	$bar->finish() if not $opt_quiet;
819}
820
821######################################################################
822## Return the channel icons from the DirecTV site
823sub get_channel_icons_dtv() {
824	my $bar = new XMLTV::ProgressBar("Fetching channel icons for $country{name}", $#ch_all + 1) if not $opt_quiet;
825	my $tree=get_tree( get_channels_url() );
826	my $table=$tree->look_down('_tag' => 'div', 'class' => 'guia-canales');
827
828	foreach my $ch (@ch_all){
829		my $ch_num=$ch->{'channel-num'};
830		my $chan_img;
831		if ( my $chan_a = $table->look_down('_tag'=>'a', 'href'=>qr/ChannelDetail.aspx\?id=$ch_num/) ){
832			$chan_img = $chan_a->look_down('_tag'=>'img');
833		}
834		if (defined $chan_img) {
835			my %chan_icon;
836			$chan_icon{'src'} = $chan_img->attr('src');
837			$chan_icon{'width'} = $chan_img->attr('width')  if defined $chan_img->attr('width');
838			$chan_icon{'height'} = $chan_img->attr('height')  if defined $chan_img->attr('height');
839			$ch->{icon}=[ \%chan_icon ];
840		}
841		$bar->update() if not $opt_quiet;
842	}
843	$bar->finish() if not $opt_quiet;
844}
845
846######################################################################
847## MAIN PROGRAM
848######################################################################
849
850######################################################################
851## get options
852# Get options.
853
854$opt_days       = 3; # default
855$opt_offset     = 0; # default
856$opt_quiet      = 0; # default
857$opt_min_delay  = 1;
858$opt_max_delay  = 3;
859$opt_debug      = 0;
860
861GetOptions(
862	'days=i'		=> \$opt_days,
863	'offset=i'      => \$opt_offset,
864	'help'		  => \$opt_help,
865	'configure'     => \$opt_configure,
866	'config-file=s' => \$opt_config_file,
867	'gui:s'		 => \$opt_gui,
868	'output=s'      => \$opt_output,
869	'quiet'		 => \$opt_quiet,
870	'list-channels' => \$opt_list_channels,
871	'debug'		 => \$opt_debug,
872	'loc=s'		 => \$opt_loc,
873	'min-delay=f'   => \$opt_min_delay,
874	'max-delay=f'   => \$opt_max_delay,
875) or usage(0);
876
877$opt_min_delay = (0.5, $opt_min_delay)[0.5 < $opt_min_delay];
878$opt_max_delay = (0.5, $opt_max_delay)[0.5 < $opt_max_delay];
879
880die 'number of days must not be negative' if (defined $opt_days && $opt_days < 0);
881usage(1) if $opt_help;
882
883XMLTV::Ask::init($opt_gui);
884my $mode = XMLTV::Mode::mode(
885	'grab', # default
886	$opt_configure => 'configure',
887	$opt_list_channels => 'list-channels',
888);
889
890# File that stores which channels to download.
891my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_dtv_la', $opt_quiet);
892my @config_lines; # used only in grab mode
893if ($mode eq 'configure') {
894	XMLTV::Config_file::check_no_overwrite($config_file);
895}elsif ($mode eq 'grab') {
896	@config_lines = XMLTV::Config_file::read_lines($config_file);
897}elsif ($mode eq 'list-channels') {
898	# Config file not used.
899}else {
900	die
901}
902
903## Whatever we are doing, we need the channels data.
904##my %channels = get_channels(); # sets @ch_all
905my %channels;
906my @channels;
907
908######################################################################
909## write configuration
910#
911if ($mode eq 'configure') {
912	open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
913	%country= select_country();
914	print CONF "country $country{id} $country{name} $country{url} \n";
915	%channels = get_channels(); # sets @ch_all
916
917	# Ask about each channel.
918	my @chs = sort keys %channels;
919	my @names = map { $channels{$_} } @chs;
920	my @qs = map { "add channel $_?" } @names;
921	my @want = ask_many_boolean(1, @qs);
922	foreach (@chs) {
923		my $w = shift @want;
924		warn("cannot read input, stopping channel questions"), last
925		if not defined $w;
926		# No need to print to user - XMLTV::Ask is verbose enough.
927
928		# Print a config line, but comment it out if channel not wanted.
929		print CONF '#' if not $w;
930		my $name = shift @names;
931		print CONF "channel $_ $name\n";
932		# TODO don't store display-name in config file.
933		}
934	close CONF or warn "cannot close $config_file: $!";
935	say("Finished configuration.");
936	exit();
937}
938
939# Not configuration, we must be writing something, either full
940# listings or just channels.
941
942die if $mode ne 'grab' and $mode ne 'list-channels';
943
944# Options to be used for XMLTV::Writer.
945my %w_args;
946if (defined $opt_output) {
947	my $fh = new IO::File(">$opt_output");
948	die "cannot write to $opt_output: $!" if not defined $fh;
949	$w_args{OUTPUT} = $fh;
950}
951$w_args{encoding} = $OUT_ENCODING;
952my $writer = new XMLTV::Writer(%w_args);
953$writer->start($HEAD);
954
955if ($mode eq 'list-channels') {
956	# must have a country before we can list channels!
957	die "please select a country ('--loc xx')"  if (!defined $opt_loc || $opt_loc eq '');
958
959	my %countries = get_countries();
960    my ($key, $value);
961	while ( ($key, $value) = each %countries ) {
962		undef $key;
963		if ( $value->{'id'} eq uc($opt_loc) ) {
964			%country = ( id => $value->{'id'}, name => $value->{'name'}, url => $value->{'url'} );
965			last;
966		}
967	}
968
969	%channels = get_channels(); # sets @ch_all based on %country
970
971	foreach (@ch_all) {
972		delete $_->{'channel-num'};  # not an DTD item!
973		$writer->write_channel($_) ;
974	}
975	$writer->end();
976	exit();
977}
978
979
980######################################################################
981## We are producing full listings.
982die if $mode ne 'grab';
983
984## Read configuration
985# @channels = id list of channels to grab
986# %channels = id => name of channels to grab
987# @ch_all = id + ch-num + display-name of channels to grab
988#
989my $line_num = 1;
990foreach (@config_lines) {
991	++ $line_num;
992	next if not defined;
993	if (/^country:?\s+(\S+)\s+(\S+)\s+([^\#]+)/) {
994		%country=( id => $1, name=>$2, url=>$3 );
995	}else{
996		if (/^channel:?\s+(\S+)\s+([^\#]+)/) {
997			my $ch_did = $1;
998			my $ch_name = $2;
999			$ch_name =~ s/\s*$//;
1000			push @channels, $ch_did;
1001			#CAR
1002			push @ch_all, {
1003							'display-name' => [[ $ch_name, $LANG ],[$ch_did]],
1004							'channel-num' => $ch_did  ,
1005							'id'=> "$ch_did.$PROVIDER_NAME" };
1006			$channels{$ch_did} = $ch_name;
1007		} else {
1008			warn "$config_file:$line_num: bad line\n";
1009		}
1010	}
1011}
1012
1013######################################################################
1014## begin main program
1015## Assume the listings source uses CET (see BUGS above).
1016my $now = DateCalc(parse_date('now'), "$opt_offset days");
1017
1018die "No channels specified, run me with --configure\n" if not keys %channels;
1019die "No country specified, run me with --configure\n" if not keys %country;
1020my @to_get;
1021
1022## we change language if country is Brazil
1023$LANG="pt_BR" if $country{name} =~ /brazil/i;
1024
1025# Dates requested for programs listing
1026# @dates = list of dates to grab (yyyymmdd)
1027#
1028my $day=UnixDate($now,'%Q');
1029my @dates;
1030for (my $i=0;$i<$opt_days;$i++) {
1031	push @dates, $day;
1032	#for each day
1033	$day=nextday($day);
1034	die if not defined $day;
1035}
1036
1037# Try to get channel icons
1038# adds %icon to @ch_all
1039#
1040##get_channel_icons();    # LyngSat
1041get_channel_icons_dtv();	# DirecTV
1042
1043# Write the <channel> elements
1044# data from %channels
1045# @to_get = array of @dates (yyyymmdd), chan-id (e.g. 122), chan-name (e.g. 122.dtv.la)
1046#
1047foreach my $ch_did (@channels) {
1048	my $index=0;
1049	my $ch_name=$channels{$ch_did};
1050	my $ch_xid="$ch_did.$PROVIDER_NAME";
1051	while (${$ch_all[$index]}{'id'} ne $ch_xid) {
1052		$index=$index+1;
1053	}
1054	my $ch_num=${ch_all[$index]}{'channel-num'};
1055	my $to_add={
1056		id => $ch_xid,
1057		'display-name' => [
1058		    [ encode($OUT_ENCODING, $ch_name), $LANG ],
1059		    [ $ch_num ] ]
1060	};
1061	$to_add->{icon}=${ch_all[$index]}{icon} if (exists ${ch_all[$index]}{icon} );
1062	$writer->write_channel($to_add);
1063	# build array of station-days to grab
1064	push @to_get, [ \@dates, $ch_xid, $ch_num ];
1065}
1066
1067# This progress bar is for both downloading and parsing.  Maybe
1068# they could be separate.
1069##my $mainbar = new XMLTV::ProgressBar("getting listings for $country{name}", $#to_get + 1) if not $opt_quiet;
1070$mainbar = new XMLTV::ProgressBar("Getting listings for $country{name}", (@dates * 6) ) if not $opt_quiet;
1071
1072# Grab requested data
1073# [ <v1.4 and write the output xml ]
1074# [v1.4 - now done all together rather than one station-day at a time]
1075##foreach (@to_get) {
1076##	foreach (get_channel_programs($_->[0], $_->[1], $_->[2])) {
1077##		$writer->write_programme($_);
1078##	}
1079##	$mainbar->update() if not $opt_quiet;
1080##}
1081
1082# Data store before being written as XML
1083my @programmes = ();
1084
1085# Fetch the data
1086# (note the params are all globals so the params aren't strictly necessary
1087#   but let's try for some better programming practice ;-)
1088get_channel_programs(\@dates, \@channels, \@ch_all, \@programmes);
1089
1090# Write the <programme> elements
1091foreach (@programmes) {
1092	$writer->write_programme($_);
1093}
1094
1095# end the progress bar
1096$mainbar->finish() if not $opt_quiet;
1097
1098# close xml file
1099$writer->end();
1100
1101# Signal that something went wrong if there were warnings.
1102exit(1) if $warnings;
1103
1104# All data fetched ok.
1105#debug "Exiting without warnings.";
1106exit(0);
1107