1#!/usr/local/bin/perl -w 2=pod 3 4=head1 NAME 5 6tv_grab_za - Grab TV listings for South Africa. 7 8=head1 SYNOPSIS 9 10tv_grab_za --help 11 12tv_grab_za [--config-file FILE] --configure [--gui OPTION] 13 14tv_grab_za [--config-file FILE] [--output FILE] [--days N] 15 [--quiet] [--retries N] 16 17=head1 DESCRIPTION 18 19Output TV listings for DSTV channels available in South Africa. 20The data comes from www.dstv.com. The grabber relies on 21parsing HTML so it might stop working at any time. 22 23First run B<tv_grab_za --configure> to choose, which channels you want 24to download. Then running B<tv_grab_za> with no arguments will output 25listings in XML format to standard output. 26 27B<--configure> Prompt for which channels, 28and write the configuration file. 29 30B<--config-file FILE> Set the name of the configuration file, the 31default is B<~/.xmltv/tv_grab_za.conf>. This is the file written by 32B<--configure> and read when grabbing. 33 34B<--gui OPTION> Use this option to enable a graphical interface to be used. 35OPTION may be 'Tk', or left blank for the best available choice. 36Additional allowed values of OPTION are 'Term' for normal terminal output 37(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar. 38 39B<--output FILE> write to FILE rather than standard output. 40 41B<--days N> grab N days. Can be 1, 7, 14 or 30. Default is 14 42 43B<--quiet> suppress the progress messages normally written to standard 44error. 45 46B<--retries> number of retries before failing channel download. 47 48B<--help> print a help message and exit. 49 50=head1 SEE ALSO 51 52L<xmltv(5)>. 53 54=head1 AUTHORS 55Chris Picton <cpicton@users.sf.net> 56Neil Garratt <ngarratt@users.sf.net> 57 58Based on tv_grab_fi by Matti Airas. 59 60Latest version always available at http://xmltv.cvs.sourceforge.net/xmltv/xmltv/grab/za/ 61 62=head1 BUGS 63 64Does not automatically update itself, when DSTV changes their site 65 66=cut 67 68###################################################################### 69# initializations 70 71use strict; 72 73use XMLTV::Version '$Id: tv_grab_za,v 1.45 2014/05/01 19:26:55 bilbo_uk Exp $ '; 74use XMLTV::Capabilities qw/baseline manualconfig cache/; 75use XMLTV::Description 'South Africa'; 76 77 78use Getopt::Long; 79use List::Util qw(min); 80use List::Util qw(max); 81use Date::Manip; 82use HTML::TreeBuilder; 83use HTML::Entities; # parse entities 84use IO::File; 85use Digest::MD5 qw(md5 md5_hex); 86use Encode; 87 88use POSIX qw(strftime); 89 90 91#use LWP::Simple qw($ua); 92use LWP::Simple; 93use LWP::UserAgent; 94use HTTP::Request::Common qw(GET); 95use HTTP::Cookies; 96 97 98use XMLTV; 99use XMLTV::Memoize; 100use XMLTV::ProgressBar; 101use XMLTV::Ask; 102use XMLTV::Config_file; 103use XMLTV::DST; 104use XMLTV::Get_nice; 105my $cookies = HTTP::Cookies->new; 106$XMLTV::Get_nice::ua->cookie_jar($cookies); 107use XMLTV::Mode; 108use XMLTV::Date; 109# Todo: perhaps we should internationalize messages and docs? 110use XMLTV::Usage <<END 111$0: get South African television listings in XMLTV format 112To configure: $0 --configure [--config-file FILE] 113To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] 114 [--quiet] [--retries] 115END 116 ; 117 118# Attributes of the root element in output. 119my $HEAD = { 'source-info-url' => 'http://www.dstv.com/', 120 # 'source-data-url' => "http://www.dstv.com/dstv-guide/default.asp", 121 'source-data-url' => "http://mobi.dstv.com/guide/", 122 'generator-info-name' => 'XMLTV', 123 'generator-info-url' => 'http://xmltv.org/', 124 }; 125 126# The timezone in South Africa. 127my $TZ="+0200"; 128 129# default language 130my $LANG="en"; 131 132# character encoding of output file 133my $ENCODING = 'ISO-8859-1'; 134 135our %dstvchannelmap; 136our %mnetchannelmap; 137 138my %mnetchannelfixups = ( 139 'Africa Magic Channel (C-Band)' => 'AfricaMagic', 140 'Bloomberg Information TV' => 'Bloomberg', 141 'China Central Television 4' => 'CCTV 4', 142 'Channel O - Sound Television' => 'Channel O', 143 'CNBC' => 'CNBC Africa', 144 'CNN International' => 'CNN', 145 'Deukom - DW' => 'Deutchse Welle', 146 'E-Entertainment' => 'E! Entertainment', 147 'eTV' => 'e-TV', 148 'Go (K-World Teen)' => 'GO', 149 'Hallmark Entertainment Network' => 'Hallmark', 150 'K-TV World' => 'K-All Day', 151 'M-Net Domestic' => 'M-Net', 152 'M-Net Series' => 'M-Net Series', 153 'Parliamentary Service' => 'Parliamentary', 154 'Reality TV' => 'Zone Reality', 155 'Rhema Network' => 'Rhema TV', 156 'Summit' => 'Summit TV', 157 'SuperSport' => 'SuperSport 1', 158 'SuperSport 3' => 'SuperSport 3 (Soccer)', 159 'SuperSport 5' => 'SuperSport 5 (Highlights)', 160 'SuperSport Zone Mosaic' => 'SuperSport Zone', 161 'Trinity Broadcasting Network' => 'TBN', 162 'Turner Classic Movies' => 'TCM', 163 'TV5 Afrique' => 'TV5', 164 'VH1' => 'VH-1', 165 ); 166 167my %dstvchannelfixups = ( 168 'CNN International' => 'CNN', 169 'Sony Entertainment Television' => 'Sony Entertainment', 170 'SABC 1' => 'SABC1', 171 'SABC 2' => 'SABC2', 172 'SABC 3' => 'SABC3', 173 'Crime & Investigation Network' => 'Crime & Investigation', 174 'E! Entertainment Television' => 'E! Entertainment', 175 'SuperSport MaXimo 1' => 'SuperSport Maximo', 176 'MagicWorld' => 'Magic World', 177 'Deutsche Welle' => 'Deutchse Welle' 178 ); 179 180#These entries appear on the channel index page, but no schedules for them exist on the site. 181my @dstvignorechannels = ('Soweto TV', 'Ignition'); 182 183#my %dstvfilehashes = ( 184# '1494729404' => '0', 185# '3139098187' => '1', 186# '2091571851' => '2', 187# '2860538121' => '3', 188# '3348398793' => '4', 189# '1813599985' => '5', 190# '1153776246' => '6', 191# '1367985183' => '7', 192# '3033721747' => '8', 193# '2699942871' => '9', 194# ); 195my %dstvfilehashes = ( 196 '937c943580ac202fc64a80dbd3be3aab' => '0', 197 '40154b2e17f12abc83304910e8b2c184' => '1', 198 '261d6eeefee8ee6f398e8d4bef8b51df' => '2', 199 'f0e730108d788a4fef7966157d223e12' => '3', 200 '309cad2597b2273ecda6614169e79a78' => '4', 201 '675fd8104b6fa3ae317cbdc7cb301400' => '5', 202 '1d8960a26dce4fd9172a06154d66f692' => '6', 203 '479765dcd17d683a3fdbcd5740e11c15' => '7', 204 '6eeba41c618fdba24c8fd554023385a9' => '8', 205 'f888465466ffa7c7c3cc6c5f12414ad3' => '9', 206); 207 208our %dstvtimehashes = (); 209 210my $viewstate = ''; 211my $eventvalidation = ''; 212 213my $ua = initialise_ua(); 214 215# Set up cache if needed 216XMLTV::Memoize::check_argv('get_url'); 217 218###################################################################### 219# Get options. 220my ($opt_days, $opt_offset, $opt_help, $opt_output, 221 $opt_configure, $opt_config_file, $opt_gui, 222 $opt_quiet, $opt_list_channels, $opt_opentime, 223 $opt_opentime_combined, $opt_retries, $opt_mnet_fallback, 224 $opt_dstv_fallback, $days_exceeded); 225#$opt_days = 14; # default 226$opt_quiet = 0; # default 227GetOptions('days=i' => \$opt_days, 228 'offset=i' => \$opt_offset, 229 'help' => \$opt_help, 230 'configure' => \$opt_configure, 231 'opentime' => \$opt_opentime, 232 'opentime-combined' => \$opt_opentime_combined, 233 'config-file=s' => \$opt_config_file, 234 'gui:s' => \$opt_gui, 235 'output=s' => \$opt_output, 236 'quiet' => \$opt_quiet, 237 'retries' => \$opt_retries, 238 'mnet-fallback' => \$opt_mnet_fallback, 239 'list-channels' => \$opt_list_channels, 240 ) 241 or usage(0); 242 243# DEPRECATED 244if (0){ 245# dstv.com only allows us to grab one of a few fixed day ranges and 246# they all start from today. For baseline compliance, data outside 247# the range specified is stripped. mnet.co.za doesn't have this issue, 248# so it's now the default site to use. An extra day is also downloaded 249# just to calculate the end time of the last program of the previous day 250# 251my %allowed_days = (1 => 0, 7 => 1, 14 => 2); 252die "--offset cannot be negative" if defined $opt_offset and $opt_offset < 0; 253die "--days must be positive" if defined $opt_days and $opt_days <= 0; 254if ($opt_offset) { 255 $opt_days += $opt_offset; 256} else { $opt_offset = 0; } 257 258my $use_days; 259if ($opt_days) { 260 $use_days = min grep { $_ >= $opt_days } keys %allowed_days; 261 if (not defined $use_days) { 262 $opt_days = $use_days = max keys %allowed_days; 263 warn "rounding down to $use_days days for download (must be one of ".(keys %allowed_days).")\n"; 264 $days_exceeded = 1; 265 } elsif (($use_days != $opt_days) && (!$opt_quiet)) { 266 warn "dstv.com only supports the following days: ".(keys %allowed_days).". $use_days day(s) will be downloaded and extraneous ones skipped\n"; 267 } 268# OK, now $use_days has the number of days to grab starting from now, 269# if that was specified on the command line. If this is specified in the 270# config file it will also set this variable. $opt_days will contain the 271# number of days we actually want to keep data for. 272 273} 274} 275 276# mobi.dstv.com only allows us to grab for 8 days including today 277# (for now we will drop the end time of the last programme of the last day 278# - stop time is optional according to the DTD). 279# 280my $max_days = 8; 281$opt_offset = 0 if !$opt_offset; 282$opt_days = 7 if !$opt_days; 283my %allowed_days = (1 => 1, 2 => 2, 3 => 3, 4 => 4, 5 => 5, 6 => 6, 7 => 7, 8 => 8); 284die "--offset cannot be negative" if $opt_offset < 0; 285die "--days must be positive" if $opt_days <= 0; 286die "--days exceeds site availability ($max_days)" if $opt_days > $max_days; 287$opt_days = min($opt_days, $max_days); 288die "--offset ($opt_offset) plus --days ($opt_days) exceeds site availability ($max_days)" if ($opt_offset + $opt_days > $max_days); 289my $use_days = 0; 290 291 292 293# Default retries = 3; 294$opt_retries = 3 if !$opt_retries; 295 296usage(1) if $opt_help; 297 298 299 300XMLTV::Ask::init($opt_gui); 301 302my $mode = XMLTV::Mode::mode('grab', # default 303 $opt_configure => 'configure', 304 $opt_list_channels => 'list-channels', 305 ); 306 307# File that stores which channels to download. 308my $config_file 309 = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_za', $opt_quiet); 310 311 312 313init_cookies(); 314 315if ($mode eq 'configure') { 316 mode_configure(); 317 exit(); 318} 319 320# Whatever we are doing, we need the channels data. 321my %channels; 322my %options; 323 324read_config(\%channels); 325 326if (not defined $use_days) { 327 # Not got from command line or config file; default it. 328 $opt_days = $use_days = 14; 329} 330 331 332#$opt_dstv_fallback = 1 if !(defined $opt_mnet_fallback || defined $opt_dstv_fallback); 333 334 335 336####################################### 337# Options to be used for XMLTV::Writer. 338my %w_args; 339if (defined $opt_output) { 340 my $fh = new IO::File(">$opt_output"); 341 die "cannot write to $opt_output: $!" if not defined $fh; 342 $w_args{OUTPUT} = $fh; 343} 344$w_args{encoding} = $ENCODING; 345my $writer = new XMLTV::Writer(%w_args); 346$writer->start($HEAD); 347 348if ($mode eq 'list-channels') { 349 # Write channels mode. 350 get_channels(\%channels); 351 foreach my $id (keys %channels) { 352 $writer->write_channel({id => $id, 'display-name' => [ [ $channels{$id}, 'en' ] ]}); 353 } 354 $writer->end(); 355 exit(); 356} 357 358###################################################################### 359# We are producing full listings. 360die if $mode ne 'grab'; 361 362 363# Prepare channel maps 364 365#get_mnet_channel_mappings() if defined $opt_mnet_fallback || $opt_dstv_fallback; 366 367#get_dstv_channel_mappings(); 368#if (keys %dstvchannelmap == 0) { 369# die "error: can't open channel map (http://www.dstv.com/DStv_Guide/default.aspx)"; 370#} 371###################################################################### 372# begin main program 373 374 375# Print out the channels 376die "No channels specified, run me with --configure first\n" 377 if not keys %channels; 378 379foreach my $chanid (keys %channels) { 380 my $n=$channels{$chanid}; 381 my $ch_xid="$chanid.dstv.com"; 382 $writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $n , 'en' ] ] }); 383} 384 385my $bar = new XMLTV::ProgressBar('getting listings', (scalar keys %channels) * min(($opt_days + 1),$max_days)) 386 if not $opt_quiet; 387 388foreach (keys %channels) { 389 process_html($_); 390 #update $bar if not $opt_quiet; 391} 392$bar->finish() if not $opt_quiet; 393$writer->end(); 394 395if (defined $days_exceeded) { 396 $! = 9; 397 die; 398} 399 400###################################################################### 401# subroutine definitions 402 403# Use Log::TraceMessages if installed. 404BEGIN { 405 eval { require Log::TraceMessages }; 406 if ($@) { 407 *t = sub {}; 408 *d = sub { '' }; 409 } 410 else { 411 *t = \&Log::TraceMessages::t; 412 *d = \&Log::TraceMessages::d; 413 Log::TraceMessages::check_argv(); 414 } 415} 416 417#sub tidy( $ ) { 418# for (my $tmp = shift) { 419# if (not defined $tmp) { return }; 420# tr/\t\205/ /d; 421# s/([^\012\015\040-\176\240-\377]+)//g; 422# return $_; 423# } 424#} 425 426# Remove bad chars from an element 427sub tidy( $ ) { 428 my $html = shift; 429 return $html if !defined $html; 430 $html =~ s/(\s)\xA0/$1/g; # replace 'space- ' with 'space' 431 $html =~ s/\xA0/ /g; # replace any remaining with space 432 $html =~ s/\xAD//g; # delete soft hyphens 433 return $html; 434} 435 436 437 438# The URI to get listings for a given channel. 439sub dstv_channel_uri( $ ) { # DEPRECATED 440 my $ch = shift; 441 my $mapped = dstv_channel_map($ch); 442 die "cannot look up '$ch' in map" if not defined $mapped; 443 die if not defined $use_days; 444 my $days_param = $allowed_days{$use_days}; 445 return "http://guide.dstv.com/listing/default.aspx?drpChannels=$mapped&drpDays=$days_param"; 446} 447 448# Returns the option ID on the DSTV site for a given channel name 449sub dstv_channel_map ($) { # DEPRECATED 450 my $ch = shift; 451 if (!%dstvchannelmap) { 452 get_dstv_channel_mappings(); 453 } 454 return $dstvchannelmap{$ch}; 455} 456 457# The URI to get listings for a given channel. 458sub dstvnew_channel_uri( $$ ) { 459 my $ch = shift; 460 my $mapped = dstvnew_channel_map($ch); 461 die "cannot look up '$ch' in map" if not defined $mapped; 462 my $day = shift; 463 return "http://mobi.dstv.com/guide/$mapped/$day"; 464} 465 466# Returns the option ID on the DSTV site for a given channel name 467sub dstvnew_channel_map ($) { 468 my $ch = shift; 469 if (!%dstvchannelmap) { 470 get_dstvnew_channel_mappings(); 471 } 472 return $dstvchannelmap{$ch}; 473} 474 475sub mnet_channel_map ($) { # DEPRECATED 476 my $ch = shift; 477 if (!%mnetchannelmap) { 478 get_mnet_channel_mappings(); 479 } 480 return $mnetchannelmap{$ch}; 481} 482 483sub post_process($$) { 484 my $title = shift; 485 my $desc = shift; 486 487 my $r = undef; 488 my $subtitle = undef; 489 my $episode_num = undef; 490 my $year = undef; 491 my $actors = undef; 492 my $director = undef; 493 my $writers = undef; # Unused right now 494 my $commentators = undef; # Unused right now 495 my $category = undef; 496 my $subtitles = undef; 497 my $dolby = undef; 498 499 # Try to get full title from description if title seems cut off 500 $title =~ s/(^\s+|\s+$)//g; 501 if ($title =~ /\.\.\.$/ ) { 502 my $temp = $title; 503 $temp =~ s/.\.\.\.$//g; 504 # Try get full title from description; 505 if ($desc =~ /^'?(${temp}[^\.\?]+[^\'])'?[\.\?]\s+(.+)/i) { 506 t "REMAPPING TITLE from $title to $1"; 507 $title = $1; 508 $desc = $2; 509 $title =~ s/(^\s+|\s+$)//g; 510 $desc =~ s/(^\s+|\s+$)//g; 511 t "New desc = $desc"; 512 } 513 } 514 515 if ($desc =~ /^'([^\.]+)'\.\s+(.+)/) { # don't know why this excludes '.' 516 # - means it fails to detect 'S1/E13 - ...A Better Place.'. 517 $subtitle = $1; 518 $desc = $2; 519 t "FOUND EPISODE TITLE: $subtitle"; 520 t "Title: $title"; 521 t "New desc = $desc"; 522 $category = "series"; 523 } 524 525 if ($subtitle && $subtitle =~ /^S?(\d+)\/E?(\d+)( - )?(.*)$/) { 526 $episode_num = ($1-1) . "." . ($2-1) . ".0/1"; 527 $subtitle = $4; 528 t "FOUND EPISODE NUMBER: $episode_num"; 529 $category = "series"; 530 } 531 532 if ($desc =~ /^Aka ([^\.]+)\. (.*)/) { 533 $desc = $2; 534 my $aka = $1; 535 t "Aka found: $aka\n"; 536 # TODO - do something with the aka 537 } 538 539 if ($desc =~ /,? (HI|English) Subtitles\.?/) { 540 $desc =~ s/,? (HI|English) Subtitles\.?//g; 541 t "REMOVING Subtitles string"; 542 $subtitles = 1; 543 } 544 545 if ($desc =~ /,? DD\.?/) { 546 $desc =~ s/,? DD\.?//g; 547 t "REMOVING DD string"; 548 $dolby = 1; 549 } 550 551 if ($title =~ /^Press .i.$/) { 552 $title = $subtitle; 553 $subtitle = undef; 554 } 555 556 if ($desc =~ /(.*) \((\d{4})\)\s*([^\.]+)\.?\s*$/) { 557 $year = $2; 558 $director = encode($ENCODING, $3); 559 $desc = $1; 560 t "desc = $desc\n"; 561 t "Year = $year\n"; 562 t "Director = $director\n"; 563 } 564 565 if ($desc =~ /(.*) \((\d{4})\)\s*$/) { 566 $desc = $1; 567 $year = $2; 568 t "desc = $desc\n"; 569 t "Year = $year\n"; 570 } 571 572 if (defined $year && $desc =~ /(.*\.)\s+([^\.]+ [A-Z][^\.]+)\.\s*/) { 573 $desc = $1; 574 $actors = $2; 575 if (defined $actors) { 576 $actors =~ s/^\s+//g; 577 $actors =~ s/\s+$//g; 578 my @a = split(/,\s+/, $actors); 579 $actors = []; 580 foreach my $a (@a) { 581 push @$actors, encode($ENCODING, $a); 582 } 583 } 584 $category = "movie"; 585 } 586 587 # Trim whitespace from elements 588 $title =~ s/(^\s+|\s+$)//g; 589 $desc =~ s/(^\s+|\s+$)//g; 590 $subtitle =~ s/(^\s+|\s+$)//g if $subtitle; 591 592 $desc = "No description available" if ($desc eq ""); 593 594 # Encode into output charset 595 $desc = encode($ENCODING, tidy($desc)); 596 $title = encode($ENCODING, tidy($title)); 597 $subtitle = encode($ENCODING, tidy($subtitle)); 598 599 $r->{title} = [[$title]]; 600 $r->{'sub-title'} = [[$subtitle]] if $subtitle; 601 $r->{'episode-num'} = [[$episode_num, "xmltv_ns"]] if $episode_num; 602 $r->{desc} = [[$desc]]; 603 $r->{category} = [[ $category, 'en' ]] if $category; 604 $r->{'subtitles'} = [ { type => 'teletext' } ] if $subtitles; 605 $r->{'audio'}->{"stereo"} = "dolby digital" if $dolby; 606 # credits 607 my %c; 608 $c{director} = [ $director ] if $director; 609 $c{actor} = $actors if $actors; 610 $c{writer} = $writers if $writers; 611 $c{commentator} = $commentators if $commentators; 612 $c{director} = [ $director ] if $director; 613 $r->{date} = $year if $year; 614 615 $r->{credits} = \%c if %c; 616 617 return $r; 618} 619 620sub process_dstv_html { # DEPRECATED 621 my $chanid = shift; 622 my $name = $channels{$chanid}; 623 624 my $now = time(); 625 my $data; 626 my $tries = 0; 627 628 # URI just for error reporting. 629 my $uri = dstv_channel_uri $name; 630 local $SIG{__DIE__} = sub { die "$uri:$_[0]\n" }; 631 $data = tidy(get_dstv($name)); 632 633 if ($data =~ /:error:(.*):/) { 634 return $data; 635 } 636 637 # Get time mappings 638# get_dstv_time_mappings($data); 639 640 # parse the page to a document object 641 my $tree = HTML::TreeBuilder->new(); 642 $tree->utf8_mode(1); 643 $tree->parse($data) or die "cannot parse content\n"; 644 $tree->eof; 645 my ($prev_r, $r, $prev_time); 646 647 my @array_ot; 648 649 # Find the main table, and loop through all the table rows 650 651 # Find the date headers on the page 652 my @date_headers = $tree->look_down(_tag => "td", class => 'srch_date_chnl_head'); 653 my $offset_counter = 0; 654 foreach my $td (@date_headers) { 655 $offset_counter++; 656 next if (($offset_counter <= $opt_offset) || ($offset_counter > $opt_days+1)); 657 my $date = $td->as_text(); 658 $date =~ s/^[^0-9]+//g; 659 660 661 my $tr = $td->parent(); 662 while (($tr = $tr->right())) { 663 last if !defined $tr; 664 665 my $result = $tr; 666 667 my @alternating = $result->look_down( 668 sub { 669 lc($_[0]->attr('_tag')) eq 'td' and lc($_[0]->attr('class')) eq 'srch_rslt_alternating' 670 or lc($_[0]->attr('_tag')) eq 'td' and lc($_[0]->attr('class')) eq 'srch_rslt' 671 } 672 ); 673 674 my $temp; 675 last if !defined $alternating[0]; 676 $temp = $alternating[0]; 677 my $time = $temp->look_down(_tag => 'b'); 678 die 'no <b> thing (for time) found' if not defined $time; 679 $time = $time->as_text; 680 $time =~ /^(\d)(\d):(\d)(\d)$/ or die "bad time '$time'"; 681 682 $temp = $alternating[1]->look_down(_tag => "a", name => qr/Bookmark/)->look_down(_tag => "b"); 683 my $title = $temp->as_text; 684 685 my ($rating, $duration); 686 my $tempstring = $temp->right->as_text(); 687 688 if (defined $tempstring) { 689 $rating = $1 if $tempstring =~ /Rating: ?(.+?)\s+/; 690 $duration = $1 if $tempstring =~ /Duration: ?([0-9:]+)/; 691 } 692 693 t "$title: $rating: $duration\n"; 694 695 my $desc = $alternating[2]->as_text(); 696 t "---\n$desc\n---\n"; 697 698 my $start = gen_start_time($date, $time, $now); 699 700 my $r = post_process($title, $desc); 701 702 $start = $start . " $TZ"; 703 704 if ($rating) { $rating =~ s/(^\s+|\s+$)//g; } 705 else { $rating = "Family"; } 706 707 $r->{rating} = [[$rating, "DSTV"]]; 708 $r->{start} = $start; 709 $r->{channel} = "$chanid.dstv.com"; 710 711 if (defined $prev_r) { 712 $prev_r->{stop} = $start; 713 $writer->write_programme($prev_r); 714 } 715 716 $prev_time = $time; 717 $prev_r = $r; 718 if ($offset_counter > $opt_days) { 719 $offset_counter++; 720 last; 721 } 722 } 723 } 724 $data = "success"; 725 $tree->delete; 726 return $data; 727} 728 729sub process_mnet_html { # DEPRECATED 730 my $chanid = shift; 731 my $name = $channels{$chanid}; 732 733 my $now = time(); 734 my $data; 735 my $tries = 0; 736 $data = tidy(get_mnet($name)); 737 738 if ($data =~ /:error:(.*):/) { 739 return $data; 740 } 741 742 # parse the page to a document object 743 my $tree = HTML::TreeBuilder->new(); 744 $tree->utf8_mode(1); 745 $tree->parse($data) or die "cannot parse get_mnet data for $name\n"; 746 $tree->eof; 747 my ($prev_r, $r, $prev_time); 748 749 my @array_ot; 750 751 # Find the date headers on the page 752 753 my @tags = $tree->look_down( 754 sub { 755 # the lcs are to fold case 756 lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduledate' 757 or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'date' 758 or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduletime' 759 or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'time' 760 or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduletitle' 761 or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'title' 762 or lc($_[0]->attr('_tag')) eq 'p' 763 or lc($_[0]->attr('_tag')) eq 'a' 764 } 765 ); 766 767 768 my ($date,$time,$title,$rating); 769 my $days_done = 0; 770 foreach my $tag (@tags) { 771 my $tag_text = $tag->as_text; 772 if ($tag->attr('class') && lc($tag->attr('class')) =~ /date$/ ) { 773 # Date 774 $tag_text =~ s/\240/ /g; 775 $tag_text =~ /(\d+) (.*) (\d{4})/; 776 $date = "$1 $2"; 777 $days_done++; 778 next; 779 } 780 if ($tag->attr('class') && lc($tag->attr('class')) =~ /time$/) { 781 # Time 782 $tag_text =~ /^\s?(\d\d:\d\d)$/; 783 $time = "$1"; 784 785 next; 786 } 787 if ($tag->attr('class') && lc($tag->attr('class')) =~ /title$/) { 788 # Title 789 $tag_text =~ s/[\302\240]//g; 790 $title = $tag_text; 791 next; 792 } 793 794 if ($tag->attr('_tag') && $tag->attr('_tag') eq 'a' && 795 $tag->attr('onclick') && $tag->attr('onclick') =~ /OpenAgeRestriction/) { 796 # Rating 797 $rating = $tag_text; 798 next; 799 } 800 801 if ($tag->attr('_tag') && $tag->attr('_tag') eq 'p' && 802 $date && $time && $title) { 803 # Description 804 805 my $desc = $tag_text; 806 $desc =~ s/(^\s+|\s+$)//g; 807 t "---\n$desc\n---\n"; 808 809 my $start = gen_start_time($date, $time, $now); 810 811 my $r = post_process($title, $desc); 812 813 $start = $start . " $TZ"; 814 815 if ($rating) { $rating =~ s/(^\s+|\s+$)//g; } 816 else { $rating = "Family"; } 817 818 $r->{rating} = [[$rating, "DSTV"]]; 819 $r->{start} = $start; 820 $r->{channel} = "$chanid.dstv.com"; 821 822 if (defined $prev_r) { 823 $prev_r->{stop} = $start; 824 $writer->write_programme($prev_r); 825 } 826 $prev_time = $time; 827 $prev_r = $r; 828 undef $title; 829 undef $time; 830 if ($days_done > ($opt_days-$opt_offset)) { last; } 831 } 832 } 833 834 $data = "success"; 835 $tree->delete; 836 return $data; 837} 838 839sub process_dstvnew_html { 840 my $chanid = shift; 841 my $daytograb = shift; 842 my $name = $channels{$chanid}; 843 844 my $now = time(); 845 my ($data, $prev_r, $r); 846 847 # For each day requested get the page and extract the programmes. 848 # Listings don't have a duration or a stop time so we need to work "one behind" 849 # (i.e. we can only write a show once we have the start time of the *next* show). 850 # This means we need to grab an extra day to calculate the stop time of the last show. 851 # (Obviously if the 'offset + days' exceeds the site limit (i.e. 8 days data) then we can't do this 852 # and the last programme will be omitted from the xml). 853 854 for (my $i=$opt_offset; $i <= ($opt_offset + $opt_days) && $i < $max_days; $i++) { 855 856 # get the page and parse to a document object 857 my $result = get_dstvnew($name, $i); 858 my $tree = HTML::TreeBuilder->new(); 859 $tree->utf8_mode(1); 860 $tree->parse($result) or die "cannot parse content\n"; 861 $tree->eof; 862 #use Data::Dumper; print Dumper($tree);exit; 863 864 # Did we get a listings page? 865 my $page404 = $tree->look_down(_tag => "body")->look_down(_tag => "h1"); 866 next if $page404 && $page404->as_text =~ /Page Not Found/; 867 868 869 # Did we get a schedule? 870 # <div class="ui-listings"> 871 # <div class="ui-empty"> 872 # <p>Sorry, there is no scheduled programme information for the channel and day you have selected.</p> 873 # </div> 874 # </div><!-- end .ui-listings --> 875 # 876 my $nolistings = $tree->look_down(_tag => "div", class => 'ui-empty'); 877 next if $nolistings; 878 879 880 # Get the date for this schedule 881 # Can't find a consistent way of getting this :( 882 # so we'll have to assume it matches the request 883 # 884 my $date = POSIX::strftime('%d %B', gmtime( time() + ($i * 86400) )); 885 die 'could not find date of schedule' if !$date; 886 887 888 # Find the main table, and loop through all the table rows 889 # <table id="ui-search-results"> 890 # <tr class="ui-listing"> 891 # <td class="ui-left"> 892 # <span class="ui-time">00:35</span> 893 # </td> 894 # <td class="ui-right"> 895 # <h4 class="ui-title">House Of Cards</h4> 896 # <p class="ui-synopsis">'S1/E5'. Claire discovers that her own ambitions are at risk because of her husband's stance on the education bill. Zoe finds that work and play don't have to be mutually exclusive. (16)</p> 897 # </td> 898 # </tr> 899 # 900 # ??? sometimes it seems we get 'div' => 'ui-listings' but other times we get 'table' => 'ui-search-results' !!! 901 my $schedule = $tree->look_down(_tag => 'table', id => 'ui-search-results'); 902 if (!$schedule) { 903 #print STDERR 'could not find schedule'."\n" if !$schedule; 904 $schedule = $tree->look_down(_tag => 'div', id => 'ui-listings'); 905 } 906 die 'could not find schedule'."\n" if !$schedule; 907 my @shows = $schedule->look_down(_tag => "tr", class => 'ui-listing'); 908 909 # Process each programme 910 foreach my $show (@shows) { 911 912 my $time = $show->look_down(class => 'ui-time')->as_text; 913 my $title = $show->look_down(class => 'ui-title')->as_text; 914 my $desc = $show->look_down(class => 'ui-synopsis')->as_text; 915 916 my $rating; 917 if (defined $desc) { 918 $rating = $1 if $desc =~ / \((13|16|18|PG|PG13|R18|Family)\)$/; # any others? is there a better way (e.g. 2 numeric || 4 alpha-num)? 919 } 920 $desc =~ s/\Q($rating)// if $rating; 921 922 my $start = gen_start_time($date, $time, $now); 923 $start = $start . " $TZ"; 924 925 my $r = post_process($title, $desc); 926 927 $r->{rating} = [[$rating, "DSTV"]] if $rating; 928 $r->{start} = $start; 929 $r->{channel} = "$chanid.dstv.com"; 930 931 if (defined $prev_r) { 932 $prev_r->{stop} = $start; 933 $writer->write_programme($prev_r); 934 } 935 $prev_r = $r; 936 937 # if we are here only to get the start time of the first programme, then we are done 938 if (($opt_offset + $opt_days) == $i) { last; } 939 940 } 941 $tree->delete; 942 update $bar if not $opt_quiet; 943 944 } 945 946 $data = "success"; 947 return $data; 948} 949 950 951#### 952# process_table: fetch a URL and process it 953# 954# arguments: 955# Date::Manip object giving the day to grab 956# xmltv id of channel 957# katso id of channel 958# 959# returns: list of programme hashes to write 960# 961sub process_html { 962 my $id = shift; 963 my $name = $channels{$id}; 964 965 t "Getting Channel $id"; 966 967 my $result; 968 969 my @order = ('dstv'); 970# my @order = ('mnet', 'dstv'); 971# if ($opt_mnet_fallback) { 972# @order = ('dstv', 'mnet'); 973# } 974 my %processfuncs = ( 975 # 'dstv' => \&process_dstv_html, 976 'dstv' => \&process_dstvnew_html, 977 # 'mnet' => \&process_mnet_html, 978 ); 979 980 foreach my $site (@order) { 981 my $func = $processfuncs{$site}; 982 $result = &{$func}($id); 983 t "result: $result"; 984 if ($result !~ /^:error:(.*):/) { 985 return; 986 } 987 say("\nSite $site returned no data - attempting next site for $name"); 988 } 989 say("\nskipping channel '$name'. All sites failed"); 990} 991 992# get channel listing 993sub get_channels { 994 995 my $channels = shift; 996 997 my @urls = ( 998# 'Google' , 'http://www.google.com/search?q=cache%3Awww.dstv.com%2Fmain.aspx%3FID%3D136', 999# 'DSTV' , 'http://www.dstv.com/dstvsa/content/en/sa/dstv_premium?categorylistsearch=cl_results&category_id=158', 1000# vmlf - Added new link for channel list of premium bouquet 1001# There are other DSTV bouquets available, each one corresponds to a different bId 1002# 'DSTV' , 'http://www.dstv.com/dstvsa/content/en/sa/products?bId=1', 1003 'DSTV' , 'http://mobi.dstv.com/?enter=za', 1004 ); 1005 1006 my $local_data; 1007 my $i=0; 1008 my $bar; 1009 for ($i=0; $i < $#urls; $i+=2) { 1010 my $key = $urls[$i]; 1011 my $url = $urls[$i+1]; 1012 $bar = new XMLTV::ProgressBar("Getting list of channels from $key site", 1) if not $opt_quiet; 1013 t "Getting $key from $url"; 1014 1015 $local_data = get_url('GET', $url); 1016 if (!defined $local_data || $local_data =~ /^:error/) { 1017 $bar->finish() if not $opt_quiet; 1018 print STDERR "Unable to get channel listing from $key site\n" 1019 if not $opt_quiet; 1020 next; 1021 } 1022 last; 1023 } 1024 if (!defined $local_data || $local_data =~ /^:error/) { 1025 print STDERR "Unable to get channel listing from any site\n. Please check your connectivity or try again later\n" 1026 if not $opt_quiet; 1027 die; 1028 } 1029 1030 t "Got channel data ".length($local_data)." bytes - about to parse"; 1031 1032 my $tree = HTML::TreeBuilder->new(); 1033 $tree->utf8_mode(1); 1034 $tree->parse($local_data) or die "cannot parse content of channels page\n"; 1035 $tree->eof; 1036 1037# honir : DEPRECATED 1038# # vmlf - DSTV NEW channel list page url includes all types of channel, 1039# # so we need filter channels that are inside the videoChannels div 1040# # to get the tv channels only 1041# my @list = $tree->look_down( 1042# _tag => 'span', 1043# sub { 1044# $_[0]->look_up(_tag => 'div', id => 'videoChannels') and 1045# $_[0]->look_up(_tag => 'div', id => 'channel_list') and 1046# id => 'header_back' 1047# }, 1048# ); 1049# foreach my $entry (@list) { 1050# my $temp = $entry->right(); 1051# 1052# $temp =~ /([\w\s&\+!-]+).*\|[^\d]+(\d+)$/; 1053# my $name = $1; 1054# my $chanid = $2; 1055 1056 my @list = $tree->look_down( _tag => 'form', id => 'guide-channel-select' )->look_down( _tag => 'option' ); 1057 foreach my $entry (@list) { 1058 1059 my $chanrefid = $entry->attr('value'); 1060 next if $chanrefid eq '0' || $chanrefid eq ''; 1061 1062 my $temp = $entry->as_text(); 1063 $temp =~ /^(\d*)\s(.*)$/; 1064 my $name = $2; 1065 my $chanid = $1; 1066 1067 $name =~ s/\s+$//g; 1068 if (exists {map { $_ => 1 } @dstvignorechannels}->{$name}) { 1069 t "Ignore bogus channel $name"; 1070 } else { 1071 t "Channel $chanid = $name"; 1072 $channels->{$chanid} = $name; 1073 } 1074 } 1075 $tree->delete; 1076 die "no channels could be found" if not keys %$channels; 1077 update $bar if not $opt_quiet; 1078 $bar->finish() if not $opt_quiet; 1079} 1080 1081# Bump a YYYYMMDD date by one. 1082sub nextday { 1083 my $d = shift; 1084 my $p = parse_date($d); 1085 my $n = DateCalc($p, '+ 1 day'); 1086 return UnixDate($n, '%Q'); 1087} 1088 1089sub mode_configure { 1090 1091 XMLTV::Config_file::check_no_overwrite($config_file); 1092 get_channels(\%channels); 1093 1094 open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; 1095 1096 # Ask about each channel. 1097 my @chs = sort {uc($channels{$a}) cmp uc($channels{$b})} keys %channels; 1098 my @qs = map { "add channel '$channels{$_}'? " } @chs; 1099 my @want = ask_many_boolean(1, @qs); 1100 foreach (@chs) { 1101 my $w = shift @want; 1102 warn("cannot read input, stopping channel questions"), last 1103 if not defined $w; 1104 # Print a config line, but comment it out if channel not wanted. 1105 print CONF '#' if not $w; 1106 my $name = $channels{$_}; 1107 print CONF "channel $_ $name\n"; 1108 } 1109 1110 #my @choices = (1,7,14); 1111 my @choices = (1,2,3,4,5,6,7,8); 1112 my $days = ask_choice("Number of days to retrieve",$choices[2], @choices); 1113 print CONF "option days $days\n"; 1114 1115 my $retries = ask("Number of retries for failed downloads? (3)"); 1116 $retries = 3 if $retries eq ""; 1117 print CONF "option retries $retries\n"; 1118 1119 my $timeout = ask("Timeout for requests? (240)"); 1120 $timeout = 240 if $timeout eq ""; 1121 print CONF "option timeout $timeout\n"; 1122 1123# say ("This grabber can get the listings from either mnet.co.za, or dstv.com"); 1124# say ("Which site would you like to use as the main site (mnet recommended)"); 1125# @choices = ('dstv','mnet'); 1126# my $fallback_option = ask_choice("Select one of: ",$choices[1], @choices); 1127# if ($fallback_option eq 'dstv') { 1128# print CONF "option mnet-fallback 1\n"; 1129# } else { 1130# print CONF "option dstv-fallback 1\n"; 1131# } 1132 close CONF or warn "cannot close $config_file: $!"; 1133 say("Finished configuration. "); 1134 1135 exit(); 1136} 1137 1138sub update_dstv_eventstate { # DEPRECATED 1139# update form state attributes 1140 my $data = shift; 1141 1142 if ($data =~ /id=\"__VIEWSTATE\" value=\"(.*)\"/) { 1143 $viewstate = $1; 1144 t "got viewstate: $viewstate"; 1145 } else { 1146 print STDERR "VIEWSTATE not found\n" if not $opt_quiet; 1147 } 1148 1149 if ($data =~ /id=\"__EVENTVALIDATION\" value=\"(.*)\"/) { 1150 $eventvalidation = $1; 1151 t "got eventvalidation: $eventvalidation"; 1152 } else { 1153 print STDERR "EVENTVALIDATION not found\n" if not $opt_quiet; 1154 } 1155} 1156 1157# Initialize cookies and retrieve current channel ID's 1158sub get_dstv_channel_mappings { # DEPRECATED 1159 t "refreshing dstv channel mappings"; 1160 1161 my $url = "http://guide.dstv.com/listing/default.aspx"; 1162 my $data = get_url("GET", $url); 1163 1164 if ($data =~ /^:error:/) { 1165 print STDERR "Error getting dstv channel state data: $data\n" 1166 if not $opt_quiet; 1167 return; 1168 } 1169 1170 update_dstv_eventstate($data); 1171 1172 my %info = ( 1173 '__VIEWSTATE' => $viewstate, 1174 'drpBouquet' => '1', 1175 'drpChannels' => '0', 1176 'drpDays' => '0', 1177 'txtKeyword' => 'Keyword...', 1178 '__EVENTVALIDATION' => $eventvalidation, 1179 '__EVENTTARGET' => 'drpBouquet', 1180 '__EVENTARGUMENT' => '', 1181 '__LASTFOCUS' => '', 1182 ); 1183 1184 $data = get_url("POST", $url, $url, undef, \%info); 1185 1186 if ($data =~ /^:error:/) { 1187 print STDERR "Error Getting dstv channel mappings: $data\n" 1188 if not $opt_quiet; 1189 return; 1190 } 1191 1192 update_dstv_eventstate($data); 1193 1194 my $chantree = HTML::TreeBuilder->new(); 1195 $chantree->utf8_mode(1); 1196 $chantree->parse($data) or die "cannot parse content of channels page\n"; 1197 $chantree->eof; 1198 1199 my $channame; 1200 my $chanid; 1201 my $chanselect = $chantree->look_down(_tag => 'select', name => 'drpChannels'); 1202 my @chan_list = $chanselect->look_down(_tag => "option"); 1203 foreach my $chanentry (@chan_list) { 1204 $chanid = $chanentry->attr('value'); 1205 $channame = $chanentry->as_text; 1206 $dstvchannelmap{$channame} = $chanid; 1207 if ($dstvchannelfixups{$channame}) { 1208 $dstvchannelmap{$dstvchannelfixups{$channame}} = $chanid; 1209 } 1210 1211 t "Found channel $channame; internal reference $chanid"; 1212 } 1213 1214 $chantree->delete; 1215 1216 t "Refresh successful"; 1217 die "no channels could be found" if not keys %dstvchannelmap; 1218} 1219 1220# Initialize cookies and retrieve current channel ID's 1221sub get_mnet_channel_mappings { # DEPRECATED 1222 1223 t "refreshing mnet channel mappings"; 1224 1225 my $url = 'http://www.mnet.co.za/schedules/default.asp'; 1226 my $result = get_url("GET", $url); 1227 if ($result =~ /^:error:/) { 1228 if ($result =~ /^:error:no data:(.+)$/s) { 1229 $result = $1; 1230 } else { 1231 print STDERR "Error Getting mnet channel mappings: $result\n" 1232 if not $opt_quiet; 1233 return; 1234 } 1235 } 1236 1237 my $chantree = HTML::TreeBuilder->new(); 1238 $chantree->utf8_mode(1); 1239 $chantree->parse($result) or die "cannot parse content of $url\n"; 1240 $chantree->eof; 1241 1242 my $chanselect = $chantree->look_down(_tag => 'select', name => 'channelid'); 1243 my @chan_list = $chanselect->look_down(_tag => "option"); 1244 foreach my $chanentry (@chan_list) { 1245 my $chantemp = $chanentry->as_text; 1246 my $newchan; 1247 $chantemp =~ s/(^\s+|\s+$)//g; 1248 if ($chanentry->attr('value') =~ /^[\d\(\)]+$/) { 1249 foreach my $fixup (keys %mnetchannelfixups) { 1250 if ($fixup eq $chantemp) { 1251 $newchan = $mnetchannelfixups{$fixup}; 1252 t "fixing up $chantemp to $newchan"; 1253 } 1254 } 1255 $newchan = $chantemp if not defined $newchan; 1256 $mnetchannelmap{$newchan} = $chanentry->attr('value'); 1257 } 1258 } 1259 $chantree->delete; 1260} 1261 1262# Initialize cookies and retrieve current channel ID's 1263sub get_dstvnew_channel_mappings { 1264 t "refreshing dstv channel mappings"; 1265 1266 my $url = "http://mobi.dstv.com/?enter=za"; 1267 1268 my $result = get_url('GET', $url); 1269 my $chantree = HTML::TreeBuilder->new(); 1270 $chantree->utf8_mode(1); 1271 $chantree->parse($result) or die "cannot parse content\n"; 1272 $chantree->eof; 1273 1274 my ($channame, $chanid, $channum); 1275 my @chan_list = $chantree->look_down( _tag => 'form', id => 'guide-channel-select' )->look_down( _tag => 'option' ); 1276 foreach my $chanentry (@chan_list) { 1277 $chanid = $chanentry->attr('value'); 1278 next if $chanid eq '0' || $chanid eq ''; 1279 1280 my $temp = $chanentry->as_text; 1281 $temp =~ /^(\d*)\s(.*)$/; 1282 $channum = $1; 1283 $channame = $2; 1284 1285 $dstvchannelmap{$channame} = $chanid; 1286 if ($dstvchannelfixups{$channame}) { 1287 $dstvchannelmap{$dstvchannelfixups{$channame}} = $chanid; 1288 } 1289 1290 t "Found channel $channame; internal reference $chanid"; 1291 } 1292 1293 $chantree->delete; 1294 1295 t "Refresh successful"; 1296 die "no channels could be found" if not keys %dstvchannelmap; 1297} 1298 1299sub get_dstv_time_mappings() { # DEPRECATED 1300 my $data = shift; 1301 my ($res,$req); 1302 1303 my $tree = HTML::TreeBuilder->new(); 1304 $tree->utf8_mode(1); 1305 $tree->parse($data) or die "cannot parse dstv time mappings\n"; 1306 $tree->eof; 1307 1308 my @tags = $tree->look_down( 1309 sub { 1310 # the lcs are to fold case 1311 lc($_[0]->attr('_tag')) eq 'img' and lc($_[0]->attr('src')) =~ /^get\.aspx\?guid/ 1312 } 1313 ); 1314 1315 our %dstvtimehashes = (); 1316 foreach my $tag (@tags) { 1317 my $temptag = $tag->attr('src'); 1318 $tag->attr('src') =~ /^get\.aspx\?GUID=(.*)$/; 1319 my $guid = $1; 1320 if (not defined $dstvtimehashes{$guid}) { 1321 # unique guid - get data 1322 my $url = "http://www.dstv.com/DStv_Guide/get.aspx?GUID=$1"; 1323 t "getting time mapping for GUID: $1"; 1324 $req = GET $url; 1325 $req->header('Accept-Encoding','gzip'); 1326 $req->header('Referer','http://www.dstv.com/DStv_Guide/default.aspx'); 1327 $res = $ua->request($req); 1328 if ($res->is_success) { 1329 if (($res->headers()->header('Content-Encoding')) && 1330 ($res->headers()->header('Content-Encoding') eq 'gzip')) { 1331 $res->content(Compress::Zlib::memGunzip($res->content)); 1332 } 1333 # hash 1334 my $imagehash = md5_hex($res->content); 1335 # compare 1336 if (defined $dstvfilehashes{$imagehash}) { 1337 $dstvtimehashes{$guid} = $dstvfilehashes{$imagehash}; 1338 } else { 1339 if (! -f "$guid.gif") { 1340 if (not $opt_quiet) { 1341 print STDERR "Undefined image mapping for GUID=$guid\n"; 1342 print STDERR "MD5 = $imagehash\n"; 1343 print STDERR "Saving to file $guid.gif\n"; 1344 } 1345 if (!open GIF, ">$guid.gif") { 1346 print STDERR "Cannot write file: $!\n" 1347 if not $opt_quiet; 1348 next; 1349 } 1350 print GIF $res->content; 1351 close GIF; 1352 } 1353 } 1354 1355 } 1356 } 1357 } 1358 1359 $tree->delete; 1360 1361} 1362 1363# Download listings for a channel name - refresh mappings if necessary 1364sub get_dstv() { # DEPRECATED 1365 my $channame = shift; 1366 my $url = "http://guide.dstv.com/listing/default.aspx"; 1367 1368 my $mapped = dstv_channel_map($channame); 1369 die "cannot look up '$channame' in map" if not defined $mapped; 1370 die if not defined $use_days; 1371 my $days_param = $allowed_days{$use_days}; 1372 1373 my %info = ( 1374 '__VIEWSTATE' => $viewstate, 1375 'drpBouquet' => '1', 1376 'drpChannels' => $mapped, 1377 'drpDays' => $days_param, 1378 'txtKeyword' => 'Keyword...', 1379 'btnSubmit.x' => '16', 1380 'btnSubmit.y' => '15', 1381 '__EVENTVALIDATION' => $eventvalidation, 1382 '__EVENTTARGET' => '', 1383 '__EVENTARGUMENT' => '', 1384 '__LASTFOCUS' => '', 1385 ); 1386 t "getting channel: $channame (ID = $mapped)"; 1387 1388 my $result = get_url("POST", $url, $url, undef, \%info); 1389 1390 if ($result =~ /^:error:/) { 1391 # Always attempt a refresh of channel mappings once 1392 get_dstv_channel_mappings(); 1393 $mapped = dstv_channel_map($channame); 1394 die "cannot look up '$channame' in map" if not defined $mapped; 1395 %info = ( 1396 '__VIEWSTATE' => $viewstate, 1397 'drpChannels' => $mapped, 1398 'drpDays' => $days_param, 1399 'txtKeyword' => '', 1400 'btnSubmit.x' => '15', 1401 'btnSubmit.y' => '12', 1402 '__EVENTVALIDATION' => $eventvalidation, 1403 ); 1404 $result = get_url("POST", $url, $url, undef, \%info); 1405 } 1406 1407 return $result; 1408} 1409 1410sub get_mnet() { # DEPRECATED 1411 my $channame = shift; 1412 my $data; 1413 my $tries = 0; 1414 my $req; 1415 my $res; 1416 1417 my $chanid = mnet_channel_map($channame); 1418 if (not defined $chanid) { 1419 my $msg = "no corresponding mnet channel found for $channame"; 1420 print STDERR "\n$msg" if not $opt_quiet; 1421 # This seems to be the convention for returning errors. 1422 return ":error:$msg:"; 1423 } 1424 1425 my $start_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_offset)*86400))); 1426 my $end_date; 1427 if ($opt_days < max keys %allowed_days) { 1428 $end_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_days)*86400))); 1429 } else { 1430 $end_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_days-1)*86400))); 1431 } 1432 my %info = ( 1433 'startDate' => $start_date, 1434 'EndDate' => $end_date, 1435 'sType' => '5', 1436 'channelid' => $chanid, 1437 'searchstring' => '', 1438 'channel' => $chanid, 1439 'theType' => 'today', 1440 'firstRun' => 'false', 1441 ); 1442 t "getting channel: $channame (ID = $chanid)"; 1443 $data = get_url("POST", "http://www.mnet.co.za/schedules/default.asp", 'http://www.mnet.co.za/schedules/default.asp', undef, \%info); 1444 return $data; 1445} 1446 1447# Download listings for a channel-day - refresh mappings if necessary 1448sub get_dstvnew() { 1449 my $channame = shift; 1450 my $daytograb = shift; 1451 my $url = dstvnew_channel_uri($channame, $daytograb); 1452 #print STDERR $url."\n"; 1453 my $result = get_url('GET', $url); 1454 1455 #my $fn = 'grab'.time(); my $fhok = open my $fh, '>', $fn or warning("Cannot open file $fn"); print $fh $result; close $fh; 1456 1457 return $result; 1458} 1459 1460sub init_cookies { 1461 # get_nice('http://guide.dstv.com/listing/default.aspx'); 1462 get_nice('http://mobi.dstv.com/home'); 1463 my $bar = new XMLTV::ProgressBar('Initialising cookies', 1) 1464 if not $opt_quiet; 1465 update $bar if not $opt_quiet; 1466 $bar->finish() if not $opt_quiet; 1467} 1468 1469sub gen_start_time { 1470 my ($date, $time, $now) = @_; 1471 1472 # Date = 'Friday 23 May' 1473 # Time = '14:00'; 1474 # str2time sometimes gets the wrong year 1475 # Append the current year to the date 1476 # If we are in Nov or Dec, reading for Jan or Feb, year++ 1477 $date =~ s/^(Today|Tomorrow|Tommorrow|Tommorow|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)//g; 1478 my $year = (gmtime($now))[5] + 1900; 1479 my $mon = (gmtime($now))[4] + 1; 1480 if (($mon == 11 || $mon == 12) && ($date =~ /(January|February)/)) { 1481 $year++; 1482 } 1483 my $timestamp = UnixDate("$date $year $time", "%s"); 1484# my $timestamp = str2time("$date $year $time"); 1485 if (!defined $timestamp) { 1486 print STDERR "Error: Cannot decode time: $date $year $time\n"; 1487 } 1488 my $rv = POSIX::strftime("%Y%m%d%H%M%S", gmtime($timestamp)); 1489 return $rv; 1490} 1491 1492sub initialise_ua { 1493 my $cookies = HTTP::Cookies->new; 1494 #my $ua = LWP::UserAgent->new(keep_alive => 1); 1495 my $ua = LWP::UserAgent->new; 1496 # Cookies 1497 $ua->cookie_jar($cookies); 1498 # Define user agent type 1499 $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US)'); 1500 # Define timouts 1501 $ua->timeout(240); 1502 # Use proxy if set in http_proxy etc. 1503 $ua->env_proxy; 1504 1505 return $ua; 1506} 1507 1508sub read_config { 1509 my $channels = shift; 1510 my $options = shift; 1511 1512 my @config_lines = XMLTV::Config_file::read_lines($config_file); 1513 1514 1515 # Read configuration. 1516 my $line_num = 1; 1517 foreach (@config_lines) { 1518 ++ $line_num; 1519 next if not defined; 1520 s/#.*//g; 1521 next if /^\s+$/; 1522 s/\s+$//g; 1523 if (/^channel/) { 1524 my (undef, $chanid, $name) = split(/\s+/, $_, 3); 1525 $channels->{$chanid} = $name; 1526 } 1527 if (/^option/) { 1528 my (undef, $conf_option, $conf_value) = split(/\s+/, $_, 3); 1529 1530# $opt_mnet_fallback = 1 if $conf_option eq 'mnet-fallback'; 1531# $opt_dstv_fallback = 1 if $conf_option eq 'dstv-fallback'; 1532 $opt_retries = $conf_value if $conf_option eq 'retries'; 1533 $ua->timeout($conf_value) if $conf_option eq 'timeout'; 1534 1535 if ($conf_option eq 'days') { 1536 if (defined $opt_days) { 1537 # Day stuff was given on the command line. This 1538 # should override whatever's in the config file. 1539 # 1540 } else { 1541 # Set the number of days from the config file. It 1542 # must be one of the numbers allowed by the site. 1543 $opt_days = $use_days = $conf_value; 1544 die "bad number of days $use_days in config file\n" 1545 if not grep { $_ == $use_days } keys %allowed_days; 1546 } 1547 } 1548 } 1549 } 1550# die 'config file: --mnet-fallback and --dstv-fallback are mutually exclusive' 1551# if (defined $opt_mnet_fallback && $opt_dstv_fallback); 1552} 1553 1554sub get_url($$$$$) { 1555 1556 my $method = shift; 1557 my $url = shift; 1558 my $referrer = shift; 1559 my $agent = shift; 1560 my $varhash = shift; 1561 1562 1563 t "Downloading URL: $url"; 1564 my $req = GET "$url"; 1565 $req->header('Accept-Encoding','gzip'); 1566 $req->header('Referer',$referrer) if defined $referrer; 1567 $req->agent($agent) if defined $agent; 1568 1569 my $tries = 0; 1570 my $data; 1571 my $offset_counter = 0; 1572 while ($tries < $opt_retries && not defined $data) { 1573 $tries++; 1574 my $res; 1575 1576 t "Attempt $tries"; 1577 1578 if (lc($method) eq 'post') { 1579 $res = $ua->post($url, $varhash); 1580 } else { 1581 $res = $ua->request(GET "$url"); 1582 } 1583 if ($res->is_success) { 1584 if (($res->headers()->header('Content-Encoding')) && 1585 ($res->headers()->header('Content-Encoding') eq 'gzip')) { 1586 $res->content(Compress::Zlib::memGunzip($res->content)); 1587 } 1588 if (! $res->content =~ /class="srch_rslt_head1"/) { 1589 t "No listing data found"; 1590 $data = ":error:no data:" . $res->content; 1591 } else { 1592 $data = $res->content; 1593 } 1594 } else { 1595 print STDERR "\nserver error: " . $res->status_line 1596 if not $opt_quiet; 1597 t "Failed" 1598 } 1599 } 1600 if (not $data) {$data = ":error:maximum retries:"}; 1601 return $data; 1602}