1#!/usr/local/bin/perl -w
2=pod
3
4=head1 NAME
5
6tv_grab_za - Grab TV listings for South Africa.
7
8=head1 SYNOPSIS
9
10tv_grab_za --help
11
12tv_grab_za [--config-file FILE] --configure [--gui OPTION]
13
14tv_grab_za [--config-file FILE] [--output FILE] [--days N]
15	   [--quiet] [--retries N]
16
17=head1 DESCRIPTION
18
19Output TV listings for DSTV channels available in South Africa.
20The data comes from www.dstv.com. The grabber relies on
21parsing HTML so it might stop working at any time.
22
23First run B<tv_grab_za --configure> to choose, which channels you want
24to download. Then running B<tv_grab_za> with no arguments will output
25listings in XML format to standard output.
26
27B<--configure> Prompt for which channels,
28and write the configuration file.
29
30B<--config-file FILE> Set the name of the configuration file, the
31default is B<~/.xmltv/tv_grab_za.conf>.  This is the file written by
32B<--configure> and read when grabbing.
33
34B<--gui OPTION> Use this option to enable a graphical interface to be used.
35OPTION may be 'Tk', or left blank for the best available choice.
36Additional allowed values of OPTION are 'Term' for normal terminal output
37(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.
38
39B<--output FILE> write to FILE rather than standard output.
40
41B<--days N> grab N days.  Can be 1, 7, 14 or 30.  Default is 14
42
43B<--quiet> suppress the progress messages normally written to standard
44error.
45
46B<--retries> number of retries before failing channel download.
47
48B<--help> print a help message and exit.
49
50=head1 SEE ALSO
51
52L<xmltv(5)>.
53
54=head1 AUTHORS
55Chris Picton <cpicton@users.sf.net>
56Neil Garratt <ngarratt@users.sf.net>
57
58Based on tv_grab_fi by Matti Airas.
59
60Latest version always available at http://xmltv.cvs.sourceforge.net/xmltv/xmltv/grab/za/
61
62=head1 BUGS
63
64Does not automatically update itself, when DSTV changes their site
65
66=cut
67
68######################################################################
69# initializations
70
71use strict;
72
73use XMLTV::Version '$Id: tv_grab_za,v 1.45 2014/05/01 19:26:55 bilbo_uk Exp $ ';
74use XMLTV::Capabilities qw/baseline manualconfig cache/;
75use XMLTV::Description 'South Africa';
76
77
78use Getopt::Long;
79use List::Util qw(min);
80use List::Util qw(max);
81use Date::Manip;
82use HTML::TreeBuilder;
83use HTML::Entities; # parse entities
84use IO::File;
85use Digest::MD5 qw(md5 md5_hex);
86use Encode;
87
88use POSIX qw(strftime);
89
90
91#use LWP::Simple qw($ua);
92use LWP::Simple;
93use LWP::UserAgent;
94use HTTP::Request::Common qw(GET);
95use HTTP::Cookies;
96
97
98use XMLTV;
99use XMLTV::Memoize;
100use XMLTV::ProgressBar;
101use XMLTV::Ask;
102use XMLTV::Config_file;
103use XMLTV::DST;
104use XMLTV::Get_nice;
105my $cookies = HTTP::Cookies->new;
106$XMLTV::Get_nice::ua->cookie_jar($cookies);
107use XMLTV::Mode;
108use XMLTV::Date;
109# Todo: perhaps we should internationalize messages and docs?
110use XMLTV::Usage <<END
111$0: get South African television listings in XMLTV format
112To configure: $0 --configure [--config-file FILE]
113To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
114		[--quiet] [--retries]
115END
116  ;
117
118# Attributes of the root element in output.
119my $HEAD = { 'source-info-url'	 => 'http://www.dstv.com/',
120			 # 'source-data-url'	 => "http://www.dstv.com/dstv-guide/default.asp",
121			 'source-data-url'	 => "http://mobi.dstv.com/guide/",
122			 'generator-info-name' => 'XMLTV',
123			 'generator-info-url'  => 'http://xmltv.org/',
124		   };
125
126# The timezone in South Africa.
127my $TZ="+0200";
128
129# default language
130my $LANG="en";
131
132# character encoding of output file
133my $ENCODING = 'ISO-8859-1';
134
135our %dstvchannelmap;
136our %mnetchannelmap;
137
138my %mnetchannelfixups = (
139    'Africa Magic Channel (C-Band)' => 'AfricaMagic',
140    'Bloomberg Information TV' => 'Bloomberg',
141    'China Central Television 4' => 'CCTV 4',
142    'Channel O - Sound Television' => 'Channel O',
143    'CNBC' => 'CNBC Africa',
144    'CNN International' => 'CNN',
145    'Deukom - DW' => 'Deutchse Welle',
146    'E-Entertainment' => 'E! Entertainment',
147    'eTV' => 'e-TV',
148    'Go (K-World Teen)' => 'GO',
149    'Hallmark Entertainment Network' => 'Hallmark',
150    'K-TV World' => 'K-All Day',
151    'M-Net Domestic' => 'M-Net',
152    'M-Net Series' => 'M-Net Series',
153    'Parliamentary Service' => 'Parliamentary',
154    'Reality TV' => 'Zone Reality',
155    'Rhema Network' => 'Rhema TV',
156    'Summit' => 'Summit TV',
157    'SuperSport' => 'SuperSport 1',
158    'SuperSport 3' => 'SuperSport 3 (Soccer)',
159    'SuperSport 5' => 'SuperSport 5 (Highlights)',
160    'SuperSport Zone Mosaic' => 'SuperSport Zone',
161    'Trinity Broadcasting Network' => 'TBN',
162    'Turner Classic Movies' => 'TCM',
163    'TV5 Afrique' => 'TV5',
164    'VH1' => 'VH-1',
165	);
166
167my %dstvchannelfixups = (
168   'CNN International' => 'CNN',
169   'Sony Entertainment Television' => 'Sony Entertainment',
170   'SABC 1' => 'SABC1',
171   'SABC 2' => 'SABC2',
172   'SABC 3' => 'SABC3',
173   'Crime & Investigation Network' => 'Crime & Investigation',
174   'E! Entertainment Television' => 'E! Entertainment',
175   'SuperSport MaXimo 1' => 'SuperSport Maximo',
176   'MagicWorld' => 'Magic World',
177   'Deutsche Welle' => 'Deutchse Welle'
178   );
179
180#These entries appear on the channel index page, but no schedules for them exist on the site.
181my @dstvignorechannels = ('Soweto TV', 'Ignition');
182
183#my %dstvfilehashes = (
184#	'1494729404' => '0',
185#	'3139098187' => '1',
186#	'2091571851' => '2',
187#	'2860538121' => '3',
188#	'3348398793' => '4',
189#	'1813599985' => '5',
190#	'1153776246' => '6',
191#	'1367985183' => '7',
192#	'3033721747' => '8',
193#	'2699942871' => '9',
194#	);
195my %dstvfilehashes = (
196	'937c943580ac202fc64a80dbd3be3aab' => '0',
197	'40154b2e17f12abc83304910e8b2c184' => '1',
198	'261d6eeefee8ee6f398e8d4bef8b51df' => '2',
199	'f0e730108d788a4fef7966157d223e12' => '3',
200	'309cad2597b2273ecda6614169e79a78' => '4',
201	'675fd8104b6fa3ae317cbdc7cb301400' => '5',
202	'1d8960a26dce4fd9172a06154d66f692' => '6',
203	'479765dcd17d683a3fdbcd5740e11c15' => '7',
204	'6eeba41c618fdba24c8fd554023385a9' => '8',
205	'f888465466ffa7c7c3cc6c5f12414ad3' => '9',
206);
207
208our %dstvtimehashes = ();
209
210my $viewstate = '';
211my $eventvalidation = '';
212
213my $ua = initialise_ua();
214
215# Set up cache if needed
216XMLTV::Memoize::check_argv('get_url');
217
218######################################################################
219# Get options.
220my ($opt_days, $opt_offset, $opt_help, $opt_output,
221	$opt_configure, $opt_config_file, $opt_gui,
222	$opt_quiet, $opt_list_channels, $opt_opentime,
223	$opt_opentime_combined, $opt_retries, $opt_mnet_fallback,
224	$opt_dstv_fallback, $days_exceeded);
225#$opt_days  = 14; # default
226$opt_quiet  = 0; # default
227GetOptions('days=i'		=> \$opt_days,
228	   'offset=i'		   => \$opt_offset,
229		   'help'		  => \$opt_help,
230		   'configure'	 => \$opt_configure,
231		   'opentime'	  => \$opt_opentime,
232		   'opentime-combined'	  => \$opt_opentime_combined,
233		   'config-file=s' => \$opt_config_file,
234		   'gui:s'		 => \$opt_gui,
235		   'output=s'	  => \$opt_output,
236		   'quiet'		 => \$opt_quiet,
237		   'retries'		 => \$opt_retries,
238		   'mnet-fallback'		 => \$opt_mnet_fallback,
239		   'list-channels'		 => \$opt_list_channels,
240		  )
241  or usage(0);
242
243# DEPRECATED
244if (0){
245# dstv.com only allows us to grab one of a few fixed day ranges and
246# they all start from today.  For baseline compliance, data outside
247# the range specified is stripped. mnet.co.za doesn't have this issue,
248# so it's now the default site to use. An extra day is also downloaded
249# just to calculate the end time of the last program of the previous day
250#
251my %allowed_days = (1 => 0, 7 => 1, 14 => 2);
252die "--offset cannot be negative" if defined $opt_offset and $opt_offset < 0;
253die "--days must be positive" if defined $opt_days and $opt_days <= 0;
254if ($opt_offset) {
255	$opt_days += $opt_offset;
256} else { $opt_offset = 0; }
257
258my $use_days;
259if ($opt_days) {
260    $use_days = min grep { $_ >= $opt_days } keys %allowed_days;
261    if (not defined $use_days) {
262        $opt_days = $use_days = max keys %allowed_days;
263        warn "rounding down to $use_days days for download (must be one of ".(keys %allowed_days).")\n";
264        $days_exceeded = 1;
265    } elsif (($use_days != $opt_days) && (!$opt_quiet)) {
266        warn "dstv.com only supports the following days: ".(keys %allowed_days).". $use_days day(s) will be downloaded and extraneous ones skipped\n";
267    }
268# OK, now $use_days has the number of days to grab starting from now,
269# if that was specified on the command line. If this is specified in the
270# config file it will also set this variable. $opt_days will contain the
271# number of days we actually want to keep data for.
272
273}
274}
275
276# mobi.dstv.com only allows us to grab for 8 days including today
277# (for now we will drop the end time of the last programme of the last day
278#  - stop time is optional according to the DTD).
279#
280my $max_days = 8;
281$opt_offset = 0 if !$opt_offset;
282$opt_days = 7 if !$opt_days;
283my %allowed_days = (1 => 1, 2 => 2, 3 => 3, 4 => 4, 5 => 5, 6 => 6, 7 => 7, 8 => 8);
284die "--offset cannot be negative" if $opt_offset < 0;
285die "--days must be positive" if $opt_days <= 0;
286die "--days exceeds site availability ($max_days)" if $opt_days > $max_days;
287$opt_days = min($opt_days, $max_days);
288die "--offset ($opt_offset) plus --days ($opt_days) exceeds site availability ($max_days)" if ($opt_offset + $opt_days > $max_days);
289my $use_days = 0;
290
291
292
293# Default retries = 3;
294$opt_retries = 3 if !$opt_retries;
295
296usage(1) if $opt_help;
297
298
299
300XMLTV::Ask::init($opt_gui);
301
302my $mode = XMLTV::Mode::mode('grab', # default
303							 $opt_configure => 'configure',
304							 $opt_list_channels => 'list-channels',
305							);
306
307# File that stores which channels to download.
308my $config_file
309  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_za', $opt_quiet);
310
311
312
313init_cookies();
314
315if ($mode eq 'configure') {
316	mode_configure();
317	exit();
318}
319
320# Whatever we are doing, we need the channels data.
321my %channels;
322my %options;
323
324read_config(\%channels);
325
326if (not defined $use_days) {
327    # Not got from command line or config file; default it.
328    $opt_days = $use_days = 14;
329}
330
331
332#$opt_dstv_fallback = 1 if !(defined $opt_mnet_fallback  || defined $opt_dstv_fallback);
333
334
335
336#######################################
337# Options to be used for XMLTV::Writer.
338my %w_args;
339if (defined $opt_output) {
340	my $fh = new IO::File(">$opt_output");
341	die "cannot write to $opt_output: $!" if not defined $fh;
342	$w_args{OUTPUT} = $fh;
343}
344$w_args{encoding} = $ENCODING;
345my $writer = new XMLTV::Writer(%w_args);
346$writer->start($HEAD);
347
348if ($mode eq 'list-channels') {
349	# Write channels mode.
350	get_channels(\%channels);
351	foreach my $id (keys %channels) {
352		$writer->write_channel({id => $id, 'display-name' => [ [ $channels{$id}, 'en' ] ]});
353	}
354	$writer->end();
355	exit();
356}
357
358######################################################################
359# We are producing full listings.
360die if $mode ne 'grab';
361
362
363# Prepare channel maps
364
365#get_mnet_channel_mappings() if defined $opt_mnet_fallback || $opt_dstv_fallback;
366
367#get_dstv_channel_mappings();
368#if (keys %dstvchannelmap == 0) {
369# die "error: can't open channel map (http://www.dstv.com/DStv_Guide/default.aspx)";
370#}
371######################################################################
372# begin main program
373
374
375# Print out the channels
376die "No channels specified, run me with --configure first\n"
377  if not keys %channels;
378
379foreach my $chanid (keys %channels) {
380	my $n=$channels{$chanid};
381	my $ch_xid="$chanid.dstv.com";
382	$writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $n , 'en' ] ] });
383}
384
385my $bar = new XMLTV::ProgressBar('getting listings', (scalar keys %channels) * min(($opt_days + 1),$max_days))
386  if not $opt_quiet;
387
388foreach (keys %channels) {
389	process_html($_);
390	#update $bar if not $opt_quiet;
391}
392$bar->finish() if not $opt_quiet;
393$writer->end();
394
395if (defined $days_exceeded) {
396    $! = 9;
397    die;
398}
399
400######################################################################
401# subroutine definitions
402
403# Use Log::TraceMessages if installed.
404BEGIN {
405	eval { require Log::TraceMessages };
406	if ($@) {
407		*t = sub {};
408		*d = sub { '' };
409	}
410	else {
411		*t = \&Log::TraceMessages::t;
412		*d = \&Log::TraceMessages::d;
413		Log::TraceMessages::check_argv();
414	}
415}
416
417#sub tidy( $ ) {
418#	for (my $tmp = shift) {
419#		if (not defined $tmp) { return };
420#		tr/\t\205/ /d;
421#		s/([^\012\015\040-\176\240-\377]+)//g;
422#		return $_;
423#	}
424#}
425
426# Remove bad chars from an element
427sub tidy( $ ) {
428    my $html = shift;
429    return $html if !defined $html;
430    $html =~ s/(\s)\xA0/$1/g;    # replace 'space-&nbsp;' with 'space'
431    $html =~ s/\xA0/ /g;         # replace any remaining &nbsp; with space
432    $html =~ s/\xAD//g;          # delete soft hyphens
433    return $html;
434}
435
436
437
438# The URI to get listings for a given channel.
439sub dstv_channel_uri( $ ) {			# DEPRECATED
440	my $ch = shift;
441	my $mapped = dstv_channel_map($ch);
442	die "cannot look up '$ch' in map" if not defined $mapped;
443	die if not defined $use_days;
444	my $days_param = $allowed_days{$use_days};
445    return "http://guide.dstv.com/listing/default.aspx?drpChannels=$mapped&drpDays=$days_param";
446}
447
448# Returns the option ID on the DSTV site for a given channel name
449sub dstv_channel_map ($) {			# DEPRECATED
450	my $ch = shift;
451	if (!%dstvchannelmap) {
452		get_dstv_channel_mappings();
453	}
454	return $dstvchannelmap{$ch};
455}
456
457# The URI to get listings for a given channel.
458sub dstvnew_channel_uri( $$ ) {
459	my $ch = shift;
460	my $mapped = dstvnew_channel_map($ch);
461	die "cannot look up '$ch' in map" if not defined $mapped;
462	my $day = shift;
463  return "http://mobi.dstv.com/guide/$mapped/$day";
464}
465
466# Returns the option ID on the DSTV site for a given channel name
467sub dstvnew_channel_map ($) {
468	my $ch = shift;
469	if (!%dstvchannelmap) {
470		get_dstvnew_channel_mappings();
471	}
472	return $dstvchannelmap{$ch};
473}
474
475sub mnet_channel_map ($) {			# DEPRECATED
476	my $ch = shift;
477	if (!%mnetchannelmap) {
478		get_mnet_channel_mappings();
479	}
480	return $mnetchannelmap{$ch};
481}
482
483sub post_process($$) {
484	my $title = shift;
485	my $desc = shift;
486
487	my $r = undef;
488	my $subtitle = undef;
489  my $episode_num = undef;
490	my $year = undef;
491	my $actors = undef;
492	my $director = undef;
493	my $writers = undef;	   # Unused right now
494	my $commentators = undef;  # Unused right now
495	my $category = undef;
496	my $subtitles = undef;
497	my $dolby = undef;
498
499	# Try to get full title from description if title seems cut off
500	$title =~ s/(^\s+|\s+$)//g;
501	if ($title =~ /\.\.\.$/ ) {
502		my $temp = $title;
503		$temp =~ s/.\.\.\.$//g;
504		# Try get full title from description;
505		if ($desc =~ /^'?(${temp}[^\.\?]+[^\'])'?[\.\?]\s+(.+)/i) {
506			t "REMAPPING TITLE from $title to $1";
507			$title = $1;
508			$desc = $2;
509			$title =~ s/(^\s+|\s+$)//g;
510			$desc =~ s/(^\s+|\s+$)//g;
511			t "New desc = $desc";
512		}
513	}
514
515	if ($desc =~ /^'([^\.]+)'\.\s+(.+)/) {	   # don't know why this excludes '.'
516                                               #   - means it fails to detect 'S1/E13 - ...A Better Place.'.
517		$subtitle = $1;
518		$desc = $2;
519		t "FOUND EPISODE TITLE: $subtitle";
520		t "Title: $title";
521		t "New desc = $desc";
522		$category = "series";
523	}
524
525	if ($subtitle && $subtitle =~ /^S?(\d+)\/E?(\d+)( - )?(.*)$/) {
526		$episode_num = ($1-1) . "." . ($2-1) . ".0/1";
527		$subtitle = $4;
528		t "FOUND EPISODE NUMBER: $episode_num";
529		$category = "series";
530	}
531
532	if ($desc =~ /^Aka ([^\.]+)\. (.*)/) {
533		$desc = $2;
534		my $aka = $1;
535		t "Aka found: $aka\n";
536		# TODO - do something with the aka
537	}
538
539	if ($desc =~ /,? (HI|English) Subtitles\.?/) {
540		$desc =~ s/,? (HI|English) Subtitles\.?//g;
541		t "REMOVING Subtitles string";
542		$subtitles = 1;
543	}
544
545	if ($desc =~ /,? DD\.?/) {
546		$desc =~ s/,? DD\.?//g;
547		t "REMOVING DD string";
548		$dolby = 1;
549	}
550
551	if ($title =~ /^Press .i.$/) {
552		$title = $subtitle;
553		$subtitle = undef;
554	}
555
556	if ($desc =~ /(.*) \((\d{4})\)\s*([^\.]+)\.?\s*$/) {
557		$year = $2;
558		$director = encode($ENCODING, $3);
559		$desc = $1;
560		t "desc = $desc\n";
561		t "Year = $year\n";
562		t "Director = $director\n";
563	}
564
565	if ($desc =~ /(.*) \((\d{4})\)\s*$/) {
566		$desc = $1;
567		$year = $2;
568		t "desc = $desc\n";
569		t "Year = $year\n";
570	}
571
572	if (defined $year && $desc =~ /(.*\.)\s+([^\.]+ [A-Z][^\.]+)\.\s*/) {
573		$desc = $1;
574		$actors = $2;
575		if (defined $actors) {
576			$actors =~ s/^\s+//g;
577			$actors =~ s/\s+$//g;
578			my @a = split(/,\s+/, $actors);
579			$actors = [];
580			foreach my $a (@a) {
581				push @$actors, encode($ENCODING, $a);
582			}
583		}
584		$category = "movie";
585	}
586
587	# Trim whitespace from elements
588	$title =~ s/(^\s+|\s+$)//g;
589	$desc =~ s/(^\s+|\s+$)//g;
590	$subtitle =~ s/(^\s+|\s+$)//g if $subtitle;
591
592	$desc = "No description available" if ($desc eq "");
593
594	# Encode into output charset
595	$desc     = encode($ENCODING, tidy($desc));
596	$title    = encode($ENCODING, tidy($title));
597	$subtitle = encode($ENCODING, tidy($subtitle));
598
599	$r->{title} = [[$title]];
600	$r->{'sub-title'} = [[$subtitle]] if $subtitle;
601	$r->{'episode-num'} = [[$episode_num, "xmltv_ns"]] if $episode_num;
602	$r->{desc} = [[$desc]];
603	$r->{category} = [[ $category, 'en' ]] if $category;
604	$r->{'subtitles'} = [ { type => 'teletext' } ] if $subtitles;
605	$r->{'audio'}->{"stereo"} = "dolby digital" if $dolby;
606	# credits
607	my %c;
608	$c{director} = [ $director ] if $director;
609	$c{actor} = $actors if $actors;
610	$c{writer} = $writers if $writers;
611	$c{commentator} = $commentators if $commentators;
612	$c{director} = [ $director ] if $director;
613	$r->{date} = $year if $year;
614
615	$r->{credits} = \%c if %c;
616
617	return $r;
618}
619
620sub process_dstv_html {			# DEPRECATED
621	my $chanid = shift;
622	my $name = $channels{$chanid};
623
624	my $now = time();
625	my $data;
626	my $tries = 0;
627
628	# URI just for error reporting.
629	my $uri = dstv_channel_uri $name;
630	local $SIG{__DIE__} = sub { die "$uri:$_[0]\n" };
631	$data = tidy(get_dstv($name));
632
633	if ($data =~ /:error:(.*):/) {
634		return $data;
635	}
636
637	# Get time mappings
638#	get_dstv_time_mappings($data);
639
640	# parse the page to a document object
641	my $tree = HTML::TreeBuilder->new();
642        $tree->utf8_mode(1);
643	$tree->parse($data) or die "cannot parse content\n";
644	$tree->eof;
645	my ($prev_r, $r, $prev_time);
646
647	my @array_ot;
648
649	# Find the main table, and loop through all the table rows
650
651	# Find the date headers on the page
652	my @date_headers = $tree->look_down(_tag => "td", class => 'srch_date_chnl_head');
653	my $offset_counter = 0;
654	foreach my $td (@date_headers) {
655        	$offset_counter++;
656	        next if (($offset_counter <= $opt_offset) || ($offset_counter > $opt_days+1));
657		my $date = $td->as_text();
658		$date =~ s/^[^0-9]+//g;
659
660
661		my $tr = $td->parent();
662		while (($tr = $tr->right())) {
663			last if !defined $tr;
664
665			my $result = $tr;
666
667            my @alternating = $result->look_down(
668                sub {
669                    lc($_[0]->attr('_tag')) eq 'td' and lc($_[0]->attr('class')) eq 'srch_rslt_alternating'
670                    or lc($_[0]->attr('_tag')) eq 'td' and lc($_[0]->attr('class')) eq 'srch_rslt'
671                }
672            );
673
674			my $temp;
675            last if !defined $alternating[0];
676            $temp = $alternating[0];
677			my $time = $temp->look_down(_tag => 'b');
678			die 'no <b> thing (for time) found' if not defined $time;
679			$time = $time->as_text;
680			$time =~ /^(\d)(\d):(\d)(\d)$/ or die "bad time '$time'";
681
682            $temp = $alternating[1]->look_down(_tag => "a", name => qr/Bookmark/)->look_down(_tag => "b");
683            my $title = $temp->as_text;
684
685			my ($rating, $duration);
686			my $tempstring = $temp->right->as_text();
687
688			if (defined $tempstring) {
689				$rating = $1 if $tempstring =~ /Rating: ?(.+?)\s+/;
690				$duration = $1 if $tempstring =~ /Duration: ?([0-9:]+)/;
691			}
692
693			t "$title: $rating: $duration\n";
694
695			my $desc = $alternating[2]->as_text();
696			t "---\n$desc\n---\n";
697
698			my $start = gen_start_time($date, $time, $now);
699
700			my $r = post_process($title, $desc);
701
702			$start = $start . " $TZ";
703
704			if ($rating) { $rating =~ s/(^\s+|\s+$)//g; }
705			else { $rating = "Family"; }
706
707			$r->{rating} = [[$rating, "DSTV"]];
708			$r->{start} = $start;
709			$r->{channel} = "$chanid.dstv.com";
710
711			if (defined $prev_r) {
712				$prev_r->{stop} = $start;
713				$writer->write_programme($prev_r);
714			}
715
716			$prev_time = $time;
717			$prev_r = $r;
718			if ($offset_counter > $opt_days) {
719				$offset_counter++;
720		                last;
721			}
722		}
723	}
724	$data = "success";
725	$tree->delete;
726	return $data;
727}
728
729sub process_mnet_html {			# DEPRECATED
730	my $chanid = shift;
731	my $name = $channels{$chanid};
732
733	my $now = time();
734	my $data;
735	my $tries = 0;
736	$data = tidy(get_mnet($name));
737
738	if ($data =~ /:error:(.*):/) {
739		return $data;
740	}
741
742	# parse the page to a document object
743	my $tree = HTML::TreeBuilder->new();
744        $tree->utf8_mode(1);
745	$tree->parse($data) or die "cannot parse get_mnet data for $name\n";
746	$tree->eof;
747	my ($prev_r, $r, $prev_time);
748
749   	my @array_ot;
750
751	# Find the date headers on the page
752
753	my @tags = $tree->look_down(
754	  sub {
755		# the lcs are to fold case
756		lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduledate'
757		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'date'
758		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduletime'
759		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'time'
760		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduletitle'
761		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'title'
762		or lc($_[0]->attr('_tag')) eq 'p'
763		or lc($_[0]->attr('_tag')) eq 'a'
764	  }
765	);
766
767
768	my ($date,$time,$title,$rating);
769	my $days_done = 0;
770	foreach my $tag (@tags) {
771		my $tag_text = $tag->as_text;
772		if ($tag->attr('class') && lc($tag->attr('class')) =~ /date$/ ) {
773		# Date
774			$tag_text =~ s/\240/ /g;
775			$tag_text =~ /(\d+) (.*) (\d{4})/;
776			$date = "$1 $2";
777			$days_done++;
778			next;
779		}
780		if ($tag->attr('class') && lc($tag->attr('class')) =~ /time$/) {
781		# Time
782			$tag_text =~ /^\s?(\d\d:\d\d)$/;
783			$time = "$1";
784
785			next;
786		}
787		if ($tag->attr('class') && lc($tag->attr('class')) =~ /title$/) {
788		# Title
789			$tag_text =~ s/[\302\240]//g;
790			$title = $tag_text;
791			next;
792		}
793
794		if ($tag->attr('_tag') && $tag->attr('_tag') eq 'a' &&
795			$tag->attr('onclick') && $tag->attr('onclick') =~ /OpenAgeRestriction/) {
796		# Rating
797			$rating = $tag_text;
798			next;
799		}
800
801		if ($tag->attr('_tag') && $tag->attr('_tag') eq 'p' &&
802			$date && $time && $title) {
803		# Description
804
805			my $desc = $tag_text;
806			$desc =~ s/(^\s+|\s+$)//g;
807			t "---\n$desc\n---\n";
808
809			my $start = gen_start_time($date, $time, $now);
810
811			my $r = post_process($title, $desc);
812
813			$start = $start . " $TZ";
814
815			if ($rating) { $rating =~ s/(^\s+|\s+$)//g; }
816			else { $rating = "Family"; }
817
818			$r->{rating} = [[$rating, "DSTV"]];
819			$r->{start} = $start;
820			$r->{channel} = "$chanid.dstv.com";
821
822			if (defined $prev_r) {
823				$prev_r->{stop} = $start;
824				$writer->write_programme($prev_r);
825			}
826			$prev_time = $time;
827			$prev_r = $r;
828			undef $title;
829			undef $time;
830			if ($days_done > ($opt_days-$opt_offset)) { last; }
831		}
832	}
833
834	$data = "success";
835	$tree->delete;
836	return $data;
837}
838
839sub process_dstvnew_html {
840	my $chanid = shift;
841	my $daytograb = shift;
842	my $name = $channels{$chanid};
843
844	my $now = time();
845	my ($data, $prev_r, $r);
846
847	# For each day requested get the page and extract the programmes.
848	# Listings don't have a duration or a stop time so we need to work "one behind"
849	#  (i.e. we can only write a show once we have the start time of the *next* show).
850	# This means we need to grab an extra day to calculate the stop time of the last show.
851	# (Obviously if the 'offset + days' exceeds the site limit (i.e. 8 days data) then we can't do this
852	#  and the last programme will be omitted from the xml).
853
854  for (my $i=$opt_offset; $i <= ($opt_offset + $opt_days) && $i < $max_days; $i++) {
855
856		# get the page and parse to a document object
857		my $result = get_dstvnew($name, $i);
858		my $tree = HTML::TreeBuilder->new();
859		$tree->utf8_mode(1);
860		$tree->parse($result) or die "cannot parse content\n";
861		$tree->eof;
862		#use Data::Dumper; print Dumper($tree);exit;
863
864		# Did we get a listings page?
865		my $page404 = $tree->look_down(_tag => "body")->look_down(_tag => "h1");
866		next if $page404 && $page404->as_text =~ /Page Not Found/;
867
868
869		# Did we get a schedule?
870		#   <div class="ui-listings">
871		#    <div class="ui-empty">
872		#     <p>Sorry, there is no scheduled programme information for the channel and day you have selected.</p>
873		#    </div>
874		#   </div><!-- end .ui-listings -->
875		#
876		my $nolistings = $tree->look_down(_tag => "div", class => 'ui-empty');
877		next if $nolistings;
878
879
880		# Get the date for this schedule
881		#   Can't find a consistent way of getting this :(
882		#   so we'll have to assume it matches the request
883		#
884		my $date = POSIX::strftime('%d %B', gmtime( time() + ($i * 86400) ));
885		die 'could not find date of schedule' if !$date;
886
887
888		# Find the main table, and loop through all the table rows
889		#   <table id="ui-search-results">
890		#    <tr class="ui-listing">
891		#     <td class="ui-left">
892		#       <span class="ui-time">00:35</span>
893		#     </td>
894		#     <td class="ui-right">
895		#       <h4 class="ui-title">House Of Cards</h4>
896		#       <p class="ui-synopsis">&#039;S1/E5&#039;. Claire discovers that her own ambitions are at risk because of her husband&#039;s stance on the education bill. Zoe finds that work and play don&#039;t have to be mutually exclusive. (16)</p>
897		#     </td>
898		#    </tr>
899		#
900		# ??? sometimes it seems we get  'div' => 'ui-listings' but other times we get 'table' => 'ui-search-results' !!!
901		my $schedule = $tree->look_down(_tag => 'table', id => 'ui-search-results');
902		if (!$schedule) {
903			#print STDERR 'could not find schedule'."\n" if !$schedule;
904			$schedule = $tree->look_down(_tag => 'div', id => 'ui-listings');
905		}
906		die 'could not find schedule'."\n" if !$schedule;
907		my @shows = $schedule->look_down(_tag => "tr", class => 'ui-listing');
908
909		# Process each programme
910		foreach my $show (@shows) {
911
912			my $time  = $show->look_down(class => 'ui-time')->as_text;
913			my $title = $show->look_down(class => 'ui-title')->as_text;
914			my $desc  = $show->look_down(class => 'ui-synopsis')->as_text;
915
916			my $rating;
917			if (defined $desc) {
918				$rating = $1 if $desc =~ / \((13|16|18|PG|PG13|R18|Family)\)$/;		# any others?  is there a better way (e.g. 2 numeric || 4 alpha-num)?
919			}
920			$desc =~ s/\Q($rating)// if $rating;
921
922			my $start = gen_start_time($date, $time, $now);
923			$start = $start . " $TZ";
924
925			my $r = post_process($title, $desc);
926
927			$r->{rating} = [[$rating, "DSTV"]] if $rating;
928			$r->{start} = $start;
929			$r->{channel} = "$chanid.dstv.com";
930
931			if (defined $prev_r) {
932				$prev_r->{stop} = $start;
933				$writer->write_programme($prev_r);
934			}
935			$prev_r = $r;
936
937			# if we are here only to get the start time of the first programme, then we are done
938			if (($opt_offset + $opt_days) == $i) { last; }
939
940		}
941		$tree->delete;
942		update $bar if not $opt_quiet;
943
944	}
945
946	$data = "success";
947	return $data;
948}
949
950
951####
952# process_table: fetch a URL and process it
953#
954# arguments:
955#	Date::Manip object giving the day to grab
956#	xmltv id of channel
957#	katso id of channel
958#
959# returns: list of programme hashes to write
960#
961sub process_html {
962	my $id = shift;
963	my $name = $channels{$id};
964
965	t "Getting Channel $id";
966
967	my $result;
968
969	my @order = ('dstv');
970#	my @order = ('mnet', 'dstv');
971#	if ($opt_mnet_fallback) {
972#		@order = ('dstv', 'mnet');
973#	}
974	my %processfuncs = (
975		# 'dstv' => \&process_dstv_html,
976		'dstv' => \&process_dstvnew_html,
977		# 'mnet' => \&process_mnet_html,
978		);
979
980	foreach my $site (@order) {
981		my $func = $processfuncs{$site};
982		$result = &{$func}($id);
983		t "result: $result";
984		if ($result !~ /^:error:(.*):/) {
985			return;
986		}
987		say("\nSite $site returned no data - attempting next site for $name");
988	}
989	say("\nskipping channel '$name'. All sites failed");
990}
991
992# get channel listing
993sub get_channels {
994
995	my $channels = shift;
996
997	my @urls = (
998#		'Google' , 'http://www.google.com/search?q=cache%3Awww.dstv.com%2Fmain.aspx%3FID%3D136',
999#		'DSTV'   , 'http://www.dstv.com/dstvsa/content/en/sa/dstv_premium?categorylistsearch=cl_results&category_id=158',
1000# vmlf - Added new link for channel list of premium bouquet
1001# 		 There are other DSTV bouquets available, each one corresponds to a different bId
1002#		'DSTV'   , 'http://www.dstv.com/dstvsa/content/en/sa/products?bId=1',
1003		'DSTV' , 'http://mobi.dstv.com/?enter=za',
1004	);
1005
1006	my $local_data;
1007	my $i=0;
1008	my $bar;
1009	for ($i=0; $i < $#urls; $i+=2) {
1010		my $key = $urls[$i];
1011		my $url = $urls[$i+1];
1012		$bar = new XMLTV::ProgressBar("Getting list of channels from $key site", 1) if not $opt_quiet;
1013		t "Getting $key from $url";
1014
1015		$local_data = get_url('GET', $url);
1016		if (!defined $local_data || $local_data =~ /^:error/) {
1017			$bar->finish() if not $opt_quiet;
1018			print STDERR "Unable to get channel listing from $key site\n"
1019				if not $opt_quiet;
1020			next;
1021		}
1022		last;
1023	}
1024	if (!defined $local_data || $local_data =~ /^:error/) {
1025		print STDERR "Unable to get channel listing from any site\n.  Please check your connectivity or try again later\n"
1026			if not $opt_quiet;
1027		die;
1028	}
1029
1030	t "Got channel data ".length($local_data)." bytes - about to parse";
1031
1032	my $tree = HTML::TreeBuilder->new();
1033  $tree->utf8_mode(1);
1034	$tree->parse($local_data) or die "cannot parse content of channels page\n";
1035	$tree->eof;
1036
1037# honir : DEPRECATED
1038#	# vmlf - DSTV NEW channel list page url includes all types of channel,
1039#	#		so we need filter channels that are inside the videoChannels div
1040#	#		to get the tv channels only
1041#	my @list = $tree->look_down(
1042#		_tag   => 'span',
1043#		sub {
1044# 			$_[0]->look_up(_tag => 'div', id => 'videoChannels') and
1045# 			$_[0]->look_up(_tag => 'div', id => 'channel_list') and
1046# 			id => 'header_back'
1047#		},
1048#	);
1049#	foreach my $entry (@list) {
1050#		my $temp = $entry->right();
1051#
1052#		$temp =~ /([\w\s&\+!-]+).*\|[^\d]+(\d+)$/;
1053#		my $name = $1;
1054#		my $chanid = $2;
1055
1056	my @list = $tree->look_down( _tag => 'form', id => 'guide-channel-select' )->look_down( _tag => 'option' );
1057	foreach my $entry (@list) {
1058
1059		my $chanrefid = $entry->attr('value');
1060		next if $chanrefid eq '0' || $chanrefid eq '';
1061
1062		my $temp = $entry->as_text();
1063		$temp =~ /^(\d*)\s(.*)$/;
1064		my $name = $2;
1065		my $chanid = $1;
1066
1067		$name =~ s/\s+$//g;
1068		if (exists {map { $_ => 1 } @dstvignorechannels}->{$name}) {
1069			t "Ignore bogus channel $name";
1070		} else {
1071			t "Channel $chanid = $name";
1072			$channels->{$chanid} = $name;
1073		}
1074	}
1075	$tree->delete;
1076	die "no channels could be found" if not keys %$channels;
1077	update $bar if not $opt_quiet;
1078	$bar->finish() if not $opt_quiet;
1079}
1080
1081# Bump a YYYYMMDD date by one.
1082sub nextday {
1083	my $d = shift;
1084	my $p = parse_date($d);
1085	my $n = DateCalc($p, '+ 1 day');
1086	return UnixDate($n, '%Q');
1087}
1088
1089sub mode_configure {
1090
1091	XMLTV::Config_file::check_no_overwrite($config_file);
1092	get_channels(\%channels);
1093
1094	open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
1095
1096	# Ask about each channel.
1097	my @chs = sort {uc($channels{$a}) cmp uc($channels{$b})} keys %channels;
1098	my @qs = map { "add channel '$channels{$_}'? " } @chs;
1099	my @want = ask_many_boolean(1, @qs);
1100	foreach (@chs) {
1101		my $w = shift @want;
1102		warn("cannot read input, stopping channel questions"), last
1103			if not defined $w;
1104		# Print a config line, but comment it out if channel not wanted.
1105		print CONF '#' if not $w;
1106		my $name = $channels{$_};
1107		print CONF "channel $_ $name\n";
1108	}
1109
1110	#my @choices = (1,7,14);
1111	my @choices = (1,2,3,4,5,6,7,8);
1112	my $days = ask_choice("Number of days to retrieve",$choices[2], @choices);
1113	print CONF "option days $days\n";
1114
1115	my $retries = ask("Number of retries for failed downloads? (3)");
1116	$retries = 3 if $retries eq "";
1117	print CONF "option retries $retries\n";
1118
1119	my $timeout = ask("Timeout for requests? (240)");
1120	$timeout = 240 if $timeout eq "";
1121	print CONF "option timeout $timeout\n";
1122
1123#	say ("This grabber can get the listings from either mnet.co.za, or dstv.com");
1124#	say ("Which site would you like to use as the main site (mnet recommended)");
1125#	@choices = ('dstv','mnet');
1126#	my $fallback_option = ask_choice("Select one of: ",$choices[1], @choices);
1127#	if ($fallback_option eq 'dstv') {
1128#		print CONF "option mnet-fallback 1\n";
1129#	} else {
1130#		print CONF "option dstv-fallback 1\n";
1131#	}
1132	close CONF or warn "cannot close $config_file: $!";
1133	say("Finished configuration. ");
1134
1135	exit();
1136}
1137
1138sub update_dstv_eventstate {			# DEPRECATED
1139# update form state attributes
1140	my $data = shift;
1141
1142	if ($data =~ /id=\"__VIEWSTATE\" value=\"(.*)\"/) {
1143		$viewstate = $1;
1144		t "got viewstate: $viewstate";
1145	} else {
1146		print STDERR  "VIEWSTATE not found\n" if not $opt_quiet;
1147	}
1148
1149	if ($data =~ /id=\"__EVENTVALIDATION\" value=\"(.*)\"/) {
1150		$eventvalidation = $1;
1151		t "got eventvalidation: $eventvalidation";
1152	} else {
1153		print STDERR  "EVENTVALIDATION not found\n" if not $opt_quiet;
1154	}
1155}
1156
1157# Initialize cookies and retrieve current channel ID's
1158sub get_dstv_channel_mappings {			# DEPRECATED
1159	t "refreshing dstv channel mappings";
1160
1161	my $url = "http://guide.dstv.com/listing/default.aspx";
1162	my $data = get_url("GET", $url);
1163
1164	if ($data =~ /^:error:/) {
1165		print STDERR  "Error getting dstv channel state data: $data\n"
1166			if not $opt_quiet;
1167		return;
1168	}
1169
1170	update_dstv_eventstate($data);
1171
1172	my %info = (
1173		'__VIEWSTATE' => $viewstate,
1174		'drpBouquet' => '1',
1175		'drpChannels' => '0',
1176		'drpDays' => '0',
1177		'txtKeyword' => 'Keyword...',
1178		'__EVENTVALIDATION' => $eventvalidation,
1179		'__EVENTTARGET' => 'drpBouquet',
1180		'__EVENTARGUMENT' => '',
1181		'__LASTFOCUS' => '',
1182	);
1183
1184 	$data = get_url("POST", $url, $url, undef, \%info);
1185
1186        if ($data =~ /^:error:/) {
1187                print STDERR  "Error Getting dstv channel mappings: $data\n"
1188                        if not $opt_quiet;
1189                return;
1190        }
1191
1192	update_dstv_eventstate($data);
1193
1194    my $chantree = HTML::TreeBuilder->new();
1195	$chantree->utf8_mode(1);
1196	$chantree->parse($data) or die "cannot parse content of channels page\n";
1197	$chantree->eof;
1198
1199    my $channame;
1200    my $chanid;
1201    my $chanselect = $chantree->look_down(_tag => 'select', name => 'drpChannels');
1202    my @chan_list = $chanselect->look_down(_tag => "option");
1203    foreach my $chanentry (@chan_list) {
1204        $chanid = $chanentry->attr('value');
1205        $channame = $chanentry->as_text;
1206        $dstvchannelmap{$channame} = $chanid;
1207	if ($dstvchannelfixups{$channame}) {
1208	        $dstvchannelmap{$dstvchannelfixups{$channame}} = $chanid;
1209	}
1210
1211        t "Found channel $channame; internal reference $chanid";
1212    }
1213
1214	$chantree->delete;
1215
1216	t "Refresh successful";
1217	die "no channels could be found" if not keys %dstvchannelmap;
1218}
1219
1220# Initialize cookies and retrieve current channel ID's
1221sub get_mnet_channel_mappings {			# DEPRECATED
1222
1223	t "refreshing mnet channel mappings";
1224
1225	my $url = 'http://www.mnet.co.za/schedules/default.asp';
1226	my $result = get_url("GET", $url);
1227	if ($result =~ /^:error:/) {
1228		if ($result =~ /^:error:no data:(.+)$/s) {
1229			$result = $1;
1230		} else {
1231			print STDERR  "Error Getting mnet channel mappings: $result\n"
1232				if not $opt_quiet;
1233			return;
1234		}
1235	}
1236
1237	my $chantree = HTML::TreeBuilder->new();
1238        $chantree->utf8_mode(1);
1239	$chantree->parse($result) or die "cannot parse content of $url\n";
1240	$chantree->eof;
1241
1242	my $chanselect = $chantree->look_down(_tag => 'select', name => 'channelid');
1243  	my @chan_list = $chanselect->look_down(_tag => "option");
1244	foreach my $chanentry (@chan_list) {
1245		my $chantemp = $chanentry->as_text;
1246        my $newchan;
1247		$chantemp =~ s/(^\s+|\s+$)//g;
1248		if ($chanentry->attr('value') =~ /^[\d\(\)]+$/) {
1249			foreach my $fixup (keys %mnetchannelfixups) {
1250				if ($fixup eq $chantemp) {
1251					$newchan = $mnetchannelfixups{$fixup};
1252					t "fixing up $chantemp to $newchan";
1253				}
1254            }
1255            $newchan = $chantemp if not defined $newchan;
1256    		$mnetchannelmap{$newchan} = $chanentry->attr('value');
1257		}
1258	}
1259	$chantree->delete;
1260}
1261
1262# Initialize cookies and retrieve current channel ID's
1263sub get_dstvnew_channel_mappings {
1264	t "refreshing dstv channel mappings";
1265
1266	my $url = "http://mobi.dstv.com/?enter=za";
1267
1268	my $result = get_url('GET', $url);
1269	my $chantree = HTML::TreeBuilder->new();
1270	$chantree->utf8_mode(1);
1271	$chantree->parse($result) or die "cannot parse content\n";
1272	$chantree->eof;
1273
1274	my ($channame, $chanid, $channum);
1275	my @chan_list = $chantree->look_down( _tag => 'form', id => 'guide-channel-select' )->look_down( _tag => 'option' );
1276	foreach my $chanentry (@chan_list) {
1277		$chanid = $chanentry->attr('value');
1278		next if $chanid eq '0' || $chanid eq '';
1279
1280		my $temp = $chanentry->as_text;
1281		$temp =~ /^(\d*)\s(.*)$/;
1282		$channum = $1;
1283		$channame = $2;
1284
1285		$dstvchannelmap{$channame} = $chanid;
1286		if ($dstvchannelfixups{$channame}) {
1287			$dstvchannelmap{$dstvchannelfixups{$channame}} = $chanid;
1288		}
1289
1290		t "Found channel $channame; internal reference $chanid";
1291	}
1292
1293	$chantree->delete;
1294
1295	t "Refresh successful";
1296	die "no channels could be found" if not keys %dstvchannelmap;
1297}
1298
1299sub get_dstv_time_mappings() {			# DEPRECATED
1300	my $data = shift;
1301	my ($res,$req);
1302
1303	my $tree = HTML::TreeBuilder->new();
1304        $tree->utf8_mode(1);
1305	$tree->parse($data) or die "cannot parse dstv time mappings\n";
1306	$tree->eof;
1307
1308	my @tags = $tree->look_down(
1309	  sub {
1310		# the lcs are to fold case
1311		lc($_[0]->attr('_tag')) eq 'img' and lc($_[0]->attr('src')) =~ /^get\.aspx\?guid/
1312	  }
1313	);
1314
1315	our %dstvtimehashes = ();
1316	foreach my $tag (@tags) {
1317		my $temptag = $tag->attr('src');
1318		$tag->attr('src') =~ /^get\.aspx\?GUID=(.*)$/;
1319		my $guid = $1;
1320		if (not defined $dstvtimehashes{$guid}) {
1321			# unique guid - get data
1322			my $url = "http://www.dstv.com/DStv_Guide/get.aspx?GUID=$1";
1323			t "getting time mapping for GUID: $1";
1324			$req = GET $url;
1325			$req->header('Accept-Encoding','gzip');
1326			$req->header('Referer','http://www.dstv.com/DStv_Guide/default.aspx');
1327			$res = $ua->request($req);
1328			if ($res->is_success) {
1329				if (($res->headers()->header('Content-Encoding')) &&
1330					($res->headers()->header('Content-Encoding') eq 'gzip')) {
1331						$res->content(Compress::Zlib::memGunzip($res->content));
1332				}
1333				# hash
1334				my $imagehash = md5_hex($res->content);
1335				# compare
1336				if (defined $dstvfilehashes{$imagehash}) {
1337					$dstvtimehashes{$guid} = $dstvfilehashes{$imagehash};
1338				} else {
1339					if (! -f "$guid.gif") {
1340						if (not $opt_quiet) {
1341							print STDERR "Undefined image mapping for GUID=$guid\n";
1342							print STDERR "MD5 = $imagehash\n";
1343							print STDERR "Saving to file $guid.gif\n";
1344						}
1345					   if (!open GIF, ">$guid.gif") {
1346						   print STDERR "Cannot write file: $!\n"
1347						   	if not $opt_quiet;
1348						   next;
1349					   }
1350					   print GIF $res->content;
1351					   close GIF;
1352					}
1353				}
1354
1355			}
1356		}
1357	}
1358
1359	$tree->delete;
1360
1361}
1362
1363# Download listings for a channel name - refresh mappings if necessary
1364sub get_dstv() {			# DEPRECATED
1365	my $channame = shift;
1366    my $url = "http://guide.dstv.com/listing/default.aspx";
1367
1368	my $mapped = dstv_channel_map($channame);
1369	die "cannot look up '$channame' in map" if not defined $mapped;
1370	die if not defined $use_days;
1371	my $days_param = $allowed_days{$use_days};
1372
1373	my %info = (
1374        '__VIEWSTATE' => $viewstate,
1375		'drpBouquet' => '1',
1376		'drpChannels' => $mapped,
1377		'drpDays' => $days_param,
1378		'txtKeyword' => 'Keyword...',
1379		'btnSubmit.x' => '16',
1380		'btnSubmit.y' => '15',
1381		'__EVENTVALIDATION' => $eventvalidation,
1382                '__EVENTTARGET' => '',
1383                '__EVENTARGUMENT' => '',
1384                '__LASTFOCUS' => '',
1385	);
1386	t "getting channel: $channame (ID = $mapped)";
1387
1388 	my $result = get_url("POST", $url, $url, undef, \%info);
1389
1390	if ($result =~ /^:error:/) {
1391		# Always attempt a refresh of channel mappings once
1392		get_dstv_channel_mappings();
1393    	$mapped = dstv_channel_map($channame);
1394    	die "cannot look up '$channame' in map" if not defined $mapped;
1395	    %info = (
1396            '__VIEWSTATE' => $viewstate,
1397    		'drpChannels' => $mapped,
1398    		'drpDays' => $days_param,
1399    		'txtKeyword' => '',
1400    		'btnSubmit.x' => '15',
1401    		'btnSubmit.y' => '12',
1402    		'__EVENTVALIDATION' => $eventvalidation,
1403    	);
1404     	$result = get_url("POST", $url, $url, undef, \%info);
1405	}
1406
1407	return $result;
1408}
1409
1410sub get_mnet() {			# DEPRECATED
1411	my $channame = shift;
1412	my $data;
1413	my $tries = 0;
1414	my $req;
1415	my $res;
1416
1417	my $chanid = mnet_channel_map($channame);
1418	if (not defined $chanid) {
1419	    my $msg = "no corresponding mnet channel found for $channame";
1420	    print STDERR "\n$msg" if not $opt_quiet;
1421	    # This seems to be the convention for returning errors.
1422	    return ":error:$msg:";
1423	}
1424
1425	my $start_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_offset)*86400)));
1426    my $end_date;
1427    if ($opt_days < max keys %allowed_days) {
1428    	$end_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_days)*86400)));
1429    } else {
1430    	$end_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_days-1)*86400)));
1431    }
1432	my %info = (
1433		'startDate' => $start_date,
1434		'EndDate' => $end_date,
1435		'sType' => '5',
1436		'channelid' => $chanid,
1437		'searchstring' => '',
1438		'channel' => $chanid,
1439		'theType' => 'today',
1440		'firstRun' => 'false',
1441	);
1442	t "getting channel: $channame (ID = $chanid)";
1443 	$data = get_url("POST", "http://www.mnet.co.za/schedules/default.asp", 'http://www.mnet.co.za/schedules/default.asp', undef, \%info);
1444	return $data;
1445}
1446
1447# Download listings for a channel-day - refresh mappings if necessary
1448sub get_dstvnew() {
1449	my $channame = shift;
1450	my $daytograb = shift;
1451  my $url = dstvnew_channel_uri($channame, $daytograb);
1452	#print STDERR $url."\n";
1453 	my $result = get_url('GET', $url);
1454
1455	#my $fn = 'grab'.time(); my $fhok = open my $fh, '>', $fn or warning("Cannot open file $fn");  print $fh $result; close $fh;
1456
1457	return $result;
1458}
1459
1460sub init_cookies {
1461	# get_nice('http://guide.dstv.com/listing/default.aspx');
1462	get_nice('http://mobi.dstv.com/home');
1463	my $bar = new XMLTV::ProgressBar('Initialising cookies', 1)
1464	  if not $opt_quiet;
1465	update $bar if not $opt_quiet;
1466	$bar->finish() if not $opt_quiet;
1467}
1468
1469sub gen_start_time {
1470	my ($date, $time, $now) = @_;
1471
1472	# Date = 'Friday 23 May'
1473	# Time = '14:00';
1474	# str2time sometimes gets the wrong year
1475	# Append the current year to the date
1476	# If we are in Nov or Dec, reading for Jan or Feb, year++
1477	$date =~ s/^(Today|Tomorrow|Tommorrow|Tommorow|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)//g;
1478	my $year = (gmtime($now))[5] + 1900;
1479	my $mon = (gmtime($now))[4] + 1;
1480	if (($mon == 11 || $mon == 12) && ($date =~ /(January|February)/)) {
1481		$year++;
1482	}
1483	my $timestamp = UnixDate("$date $year $time", "%s");
1484#	my $timestamp = str2time("$date $year $time");
1485	if (!defined $timestamp) {
1486		print STDERR "Error: Cannot decode time: $date $year $time\n";
1487	}
1488	my $rv = POSIX::strftime("%Y%m%d%H%M%S", gmtime($timestamp));
1489	return $rv;
1490}
1491
1492sub initialise_ua {
1493	my $cookies = HTTP::Cookies->new;
1494	#my $ua = LWP::UserAgent->new(keep_alive => 1);
1495	my $ua = LWP::UserAgent->new;
1496	# Cookies
1497	$ua->cookie_jar($cookies);
1498	# Define user agent type
1499	$ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US)');
1500	# Define timouts
1501	$ua->timeout(240);
1502	# Use proxy if set in http_proxy etc.
1503	$ua->env_proxy;
1504
1505	return $ua;
1506}
1507
1508sub read_config {
1509    my $channels = shift;
1510    my $options = shift;
1511
1512    my @config_lines = XMLTV::Config_file::read_lines($config_file);
1513
1514
1515    # Read configuration.
1516    my $line_num = 1;
1517    foreach (@config_lines) {
1518	++ $line_num;
1519	next if not defined;
1520	s/#.*//g;
1521	next if /^\s+$/;
1522	s/\s+$//g;
1523	if (/^channel/) {
1524	    my (undef, $chanid, $name) = split(/\s+/, $_, 3);
1525	    $channels->{$chanid} = $name;
1526	}
1527	if (/^option/) {
1528	    my (undef, $conf_option, $conf_value) = split(/\s+/, $_, 3);
1529
1530#	    $opt_mnet_fallback = 1 if $conf_option eq 'mnet-fallback';
1531#	    $opt_dstv_fallback = 1 if $conf_option eq 'dstv-fallback';
1532	    $opt_retries = $conf_value if $conf_option eq 'retries';
1533	    $ua->timeout($conf_value) if $conf_option eq 'timeout';
1534
1535	    if ($conf_option eq 'days') {
1536		if (defined $opt_days) {
1537		    # Day stuff was given on the command line.  This
1538		    # should override whatever's in the config file.
1539		    #
1540		} else {
1541		    # Set the number of days from the config file.  It
1542		    # must be one of the numbers allowed by the site.
1543		    $opt_days = $use_days = $conf_value;
1544		    die "bad number of days $use_days in config file\n"
1545		      if not grep { $_ == $use_days } keys %allowed_days;
1546		}
1547	    }
1548	}
1549    }
1550#    die 'config file: --mnet-fallback and --dstv-fallback are mutually exclusive'
1551#      if (defined $opt_mnet_fallback && $opt_dstv_fallback);
1552}
1553
1554sub get_url($$$$$) {
1555
1556	my $method = shift;
1557	my $url = shift;
1558	my $referrer = shift;
1559	my $agent = shift;
1560	my $varhash = shift;
1561
1562
1563	t "Downloading URL: $url";
1564	my $req = GET "$url";
1565	$req->header('Accept-Encoding','gzip');
1566	$req->header('Referer',$referrer) if defined $referrer;
1567	$req->agent($agent) if defined $agent;
1568
1569	my $tries = 0;
1570	my $data;
1571    my $offset_counter = 0;
1572	while ($tries < $opt_retries && not defined $data) {
1573		$tries++;
1574		my $res;
1575
1576		t "Attempt $tries";
1577
1578		if (lc($method) eq 'post') {
1579			$res = $ua->post($url, $varhash);
1580		} else {
1581			$res = $ua->request(GET "$url");
1582		}
1583		if ($res->is_success) {
1584			if (($res->headers()->header('Content-Encoding')) &&
1585			 ($res->headers()->header('Content-Encoding') eq 'gzip')) {
1586				$res->content(Compress::Zlib::memGunzip($res->content));
1587			}
1588			if (! $res->content =~ /class="srch_rslt_head1"/) {
1589				t "No listing data found";
1590				$data = ":error:no data:" . $res->content;
1591			} else {
1592				$data = $res->content;
1593			}
1594		} else {
1595			print STDERR "\nserver error: " . $res->status_line
1596				if not $opt_quiet;
1597			t "Failed"
1598		}
1599	}
1600	if (not $data) {$data = ":error:maximum retries:"};
1601	return $data;
1602}