1#!/usr/local/bin/perl 2 3=pod 4 5=head1 NAME 6 7tv_grab_pt - Grab TV listings for Portugal. 8 9=head1 SYNOPSIS 10 11tv_grab_pt --help 12 13tv_grab_pt [--config-file FILE] --configure [--gui OPTION] 14 15tv_grab_pt [--config-file FILE] [--output FILE] [--days N] 16 [--offset N] [--fast] [--quiet] [--icons] 17 18tv_grab_pt --list-channels 19 20=head1 DESCRIPTION 21 22Output TV listings for several channels available in Portugal. 23It supports the public network and the private NetCabo network. 24 25First run B<tv_grab_pt --configure> to choose, which channels you want 26to download. Then running B<tv_grab_pt> with no arguments will output 27listings in XML format to standard output. 28 29B<--configure> Prompt for which channels, 30and write the configuration file. 31 32B<--gui OPTION> Use this option to enable a graphical interface to be used. 33OPTION may be 'Tk', or left blank for the best available choice. 34Additional allowed values of OPTION are 'Term' for normal terminal output 35(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. 36 37B<--config-file FILE> Set the name of the configuration file, the 38default is B<~/.xmltv/tv_grab_pt.conf>. This is the file written by 39B<--configure> and read when grabbing. 40 41B<--days N> Grab N days. The default is 7 days. 42 43B<--offset N> Start N days in the future. The default is to start 44from today. 45 46B<--fast> Only fetch summary information for each programme. This is 47only title, start/stop times, category, episode number. 48 49B<--output FILE> Write to FILE rather than standard output. 50 51B<--quiet> Suppress the progress messages normally written to standard 52error. 53 54B<--icons> Fetches channels icons/logos [deprecated - this is now the default] 55 56B<--version> Show the version of the grabber. 57 58B<--help> Print a help message and exit. 59 60=head1 SEE ALSO 61 62L<xmltv(5)>. 63 64=head1 AUTHOR 65 66Bruno Tavares, gawen@users.sourceforge.net, based on tv_grab_es, from Ramon Roca. 67 68Grabber Site : http://bat.is-a-geek.com/XMLGrabPt 69 70=head1 BUGS 71 72=cut 73 74###################################################################### 75# initializations 76 77use warnings; 78use strict; 79use XMLTV::Version '$Id: tv_grab_pt,v 1.58 2016/03/26 15:51:52 bilbo_uk Exp $ '; 80use XMLTV::Capabilities qw/baseline manualconfig cache/; 81use XMLTV::Description 'Portugal'; 82use Getopt::Long; 83#use Date::Manip; 84use DateTime; 85#use Data::Dumper; 86use HTML::TreeBuilder; 87use HTML::Entities; # parse entities 88use HTTP::Cache::Transparent; 89use Encode; 90use IO::File; 91use File::Path; 92use File::Basename; 93#use LWP::UserAgent; 94 95use XMLTV; 96use XMLTV::Memoize; 97use XMLTV::ProgressBar; 98use XMLTV::Ask; 99use XMLTV::Config_file; 100use XMLTV::DST; 101use XMLTV::Get_nice 0.005067; 102use XMLTV::Mode; 103# Todo: perhaps we should internationalize messages and docs? 104use XMLTV::Usage <<END 105$0: get Portuguese television listings in XMLTV format 106To configure: $0 --configure [--config-file FILE] [--gui OPTION] 107To grab listings: $0 [--config-file FILE] [--output FILE] [--quiet] [--offset OFFSET] [--days DAYS] [--icons] 108To list channels: $0 --list-channels 109END 110 ; 111 112my $DOMAIN = 'nos.pt'; 113my $SOURCE_URL = "http://www.$DOMAIN"; 114 115# Attributes of the root element in output. 116my $HEAD = { 'source-info-url' => "http://$DOMAIN/", 117 'source-data-url' => "http://www.$DOMAIN/particulares/televisao/guia-tv/", 118 'generator-info-name' => 'XMLTV', 119 'generator-info-url' => 'http://xmltv.org/', 120 }; 121 122# default language 123my $LANG="pt"; 124 125# Global channel_data 126our @ch_all; 127 128###################################################################### 129# get options 130 131# Get options, including undocumented --cache option. 132XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux'); 133our ($opt_help, $opt_output, 134 $opt_configure, $opt_config_file, $opt_gui, $opt_quiet, 135 $opt_list_channels, $opt_offset, $opt_days, $opt_fast, $opt_icons, 136 $opt_debug); 137$opt_quiet = 0; # default 138$opt_days = 7; # default 139$opt_offset = 0; # default 140$opt_fast = 0; # default 141$opt_debug = 0; 142GetOptions('help' => \$opt_help, 143 'configure' => \$opt_configure, 144 'config-file=s' => \$opt_config_file, 145 'gui:s' => \$opt_gui, 146 'output=s' => \$opt_output, 147 'quiet' => \$opt_quiet, 148 'list-channels' => \$opt_list_channels, 149 'offset=i' => \$opt_offset, 150 'days=i' => \$opt_days, 151 'fast' => \$opt_fast, 152 'icons' => \$opt_icons, # Fetches channels icons/logos [deprecated - this is now the default] 153 'debug' => \$opt_debug, # undocumented 154 ) 155 or usage(0); 156usage(1) if $opt_help; 157 158# Initialise the web page cache 159HTTP::Cache::Transparent::init( { 160 BasePath => get_default_cachedir(), 161 NoUpdate => 4*3600, # cache time in seconds 162 MaxAge => 24, # flush time in hours 163 Verbose => $opt_debug, 164} ); 165##$XMLTV::Get_nice::Delay = 0 if $opt_debug; 166 167XMLTV::Ask::init($opt_gui); 168 169 170our $first_day = ($opt_offset || 0); 171our $last_day = $first_day + $opt_days; 172die 'cannot grab more than one week ahead' if $first_day >= 7 || $last_day > 7; 173 174my $mode = XMLTV::Mode::mode('grab', # default 175 $opt_configure => 'configure', 176 $opt_list_channels => 'list-channels', 177 ); 178 179# File that stores which channels to download. 180my $config_file 181 = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_pt', $opt_quiet); 182 183my @config_lines; # used only in grab mode 184if ($mode eq 'configure') { 185 XMLTV::Config_file::check_no_overwrite($config_file); 186 mkpath(dirname($config_file)); 187} 188elsif ($mode eq 'grab') { 189 @config_lines = XMLTV::Config_file::read_lines($config_file); 190} 191elsif ($mode eq 'list-channels') { 192 # Config file not used. 193} 194else { die } 195 196# Whatever we are doing, we need the channels data. 197my $token; 198my %channels = get_channels(); # sets @ch_all 199my %channelnumbers; 200my @channels; 201 202my %icons = (); 203%icons = get_icons() if $opt_icons; 204 205 206###################################################################### 207# write configuration 208 209if ($mode eq 'configure') { 210 open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; 211 212 # Ask about each channel. 213 my @chs = sort keys %channels; 214 my @names = map { $channels{$_}->{'channel-name'} } @chs; 215 my @qs = map { "add channel $_?" } @names; 216 my @want = ask_many_boolean(1, @qs); 217 foreach (@chs) { 218 my $w = shift @want; 219 warn("cannot read input, stopping channel questions"), last 220 if not defined $w; 221 # No need to print to user - XMLTV::Ask is verbose enough. 222 223 # Print a config line, but comment it out if channel not wanted. 224 print CONF '#' if not $w; 225 my $name = shift @names; 226 print CONF "channel $_.$DOMAIN\n"; 227 } 228 229 close CONF or warn "cannot close $config_file: $!"; 230 say("Finished configuration."); 231 232 exit(); 233} 234 235 236# Not configuration, we must be writing something, either full 237# listings or just channels. 238# 239die if $mode ne 'grab' and $mode ne 'list-channels'; 240 241# Options to be used for XMLTV::Writer. 242my %w_args; 243if (defined $opt_output) { 244 my $fh = new IO::File(">$opt_output"); 245 die "cannot write to $opt_output: $!" if not defined $fh; 246 $w_args{OUTPUT} = $fh; 247} 248$w_args{encoding} = 'UTF-8'; 249my $writer; 250sub start_writing() { ($writer = new XMLTV::Writer(%w_args))->start($HEAD) } 251 252if ($mode eq 'list-channels') { 253 start_writing; 254 foreach (@ch_all) { 255 $_{'icon'} = [{'src' => $icons{$_}}] if(defined($icons{$_})); 256 } 257 $writer->write_channel($_) foreach @ch_all; 258 $writer->end(); 259 exit(); 260} 261 262###################################################################### 263# We are producing full listings. 264die if $mode ne 'grab'; 265 266# Read configuration 267my $line_num = 1; 268foreach (@config_lines) { 269 ++$line_num; 270 next if not defined; 271 272 # For now, check that $DOMAIN appears on every line. This 273 # ensures we don't have a config file left over from the old 274 # grabber. 275 # 276 if (/^channel:?\s+(.+)\.nos\.pt\s*$/) { 277 my $ch_did = $1; 278 die if not defined $ch_did; 279 push @channels, $ch_did; 280 } 281 elsif (/^channel:?\s+(.+)\.tvcabo\.pt\s*$/) { 282 # old site but has same channel numbers 283 my $ch_did = $1; 284 die if not defined $ch_did; 285 push @channels, $ch_did; 286 } 287 elsif (/^channel/) { 288 die <<END 289The configuration file is left over from the old tv_grab_pt. The new 290site uses different channels so you need to reconfigure the grabber. 291END 292 ; 293 } 294 else { 295 warn "$config_file:$line_num: bad line\n"; 296 } 297} 298 299###################################################################### 300# begin main program 301 302start_writing; 303 304# Assume the listings source uses CET (see BUGS above). 305die "No channels specified, run me with --configure\n" 306 if not keys %channels; 307my @to_get; 308 309# Write the channels elements 310foreach my $ch_did (@channels) { 311 die if not defined $ch_did; 312 313 # avoid XMLTV barfing when channel is no longer available 314 if (!defined $channels{$ch_did}) { 315 print STDERR "\nChannel $ch_did not found in current channels lineup \n"; 316 next; 317 } 318 319 my $ch_name=$channels{$ch_did}->{'channel-name'}; 320 my $channel = { 'id' => $channels{$ch_did}->{'id'}, 321 'display-name' => $channels{$ch_did}->{'display-name'}, 322 'icon' => $channels{$ch_did}->{'icon'}, 323 }; 324 $channel->{'icon'} = [{'src' => $icons{$ch_did}}] if(defined($icons{$ch_did})); 325 326 $writer->write_channel($channel); 327} 328 329 330# the order in which we fetch the channels matters 331# This progress bar is for both downloading and parsing. Maybe 332# they could be separate. 333# 334 335my $bar = new XMLTV::ProgressBar('getting listings', scalar @channels) 336 if not $opt_quiet; 337 338# time limits for grab 339my $today_date = DateTime->today(time_zone => 'Europe/Lisbon'); 340my $grab_start = $today_date->epoch() + ($opt_offset * 86400); 341my $grab_stop = $grab_start + ($opt_days * 86400); 342print STDERR "\n start/end grab: $grab_start $grab_stop \n" if $opt_debug; 343 344my $some=0; 345foreach my $ch_did (@channels) { 346 #skip legacy channels... 347 next unless $channels{$ch_did}; 348 foreach (process_table($ch_did)) { 349 $writer->write_programme($_); 350 $some = 1; 351 } 352 update $bar if $bar; 353} 354if (not $some) { 355 die "no programmes found\n" unless $some; 356} 357 358$writer->end(); 359 360###################################################################### 361# subroutine definitions 362 363# Use Log::TraceMessages if installed. 364BEGIN { 365 eval { require Log::TraceMessages }; 366 if ($@) { 367 *t = sub {}; 368 *d = sub { '' }; 369 } 370 else { 371 *t = \&Log::TraceMessages::t; 372 *d = \&Log::TraceMessages::d; 373 Log::TraceMessages::check_argv(); 374 } 375} 376 377# Clean up bad characters in HTML. 378sub _tidy( $ ) { 379 for (my $s = shift) { 380 # Character 150 seems to be used for 'versus' in sporting 381 # events, but I don't know what that is in Portuguese. 382 # 383 #s/\s\226\s/ vs /g; 384 return $_; 385 } 386} 387 388# Remove bad chars from an element 389sub tidy( $ ) { 390 return $_[0] if !defined $_[0]; 391 $_[0] =~ s/(\s)\xA0/$1/og; # replace 'space- ' with 'space' 392 $_[0] =~ s/\xA0/ /og; # replace any remaining with space 393 $_[0] =~ s/\xAD//og; # delete soft hyphens 394 return $_[0]; 395} 396 397# Wrapper around Encode (and fix_utf8) 398sub toUTF8( $ ) { 399 return fix_utf8( Encode::encode("utf-8", $_[0]) ); 400} 401 402# UTF-8 fixups. 403sub fix_utf8( $ ) { 404 # The details page claims to be utf-8 but there are some invalid characters in the incoming data 405 # e.g. it claims en-dash as C2 96 (which is a control code in utf-8!) 406 # Looks like an improper conversion from Windows-1252 in the source data 407 # 408 return $_[0] if !defined $_[0]; 409 $_[0] =~ s/\xC2\x96/\xE2\x80\x93/og; # replace invalid en-dash with correct value 410 $_[0] =~ s/\xC2\x80/\xE2\x82\xAC/og; # euro 411 $_[0] =~ s/\xC2\x85/\xE2\x80\xA6/og; # ellipsis 412 $_[0] =~ s/\xC2\x92/\xE2\x80\x99/og; # apostrophe 413 $_[0] =~ s/\xC2\x93/\xE2\x80\x9C/og; # open double quote 414 $_[0] =~ s/\xC2\x94/\xE2\x80\x9D/og; # close double quote 415 $_[0] =~ s/\xC2[\x80-\x9F]//og; # dump the rest 416 return $_[0]; 417} 418 419# Remove leading & trailing spaces 420sub trim( $ ) { 421 # Remove leading & trailing spaces 422 $_[0] =~ s/^\s+|\s+$//g; 423 return $_[0]; 424} 425 426sub process_table { 427 my ($ch_xmltv_id) = @_; 428 429 t "Getting channel $ch_xmltv_id\n"; 430 431 $ch_xmltv_id =~ /(.+?)\.zon\.pt/; 432 433 # This seems like a useful link but I can't see how to get the channelindex 434 # http://www.zon.pt/_layouts/EPGGetProgramsForChannels.aspx?cIndex=1&day=1&order=grelha&category=&numChannels=1 435 436 # http://www.zon.pt/tv/guiaTV/Pages/Guia-TV-programacao.aspx?channelSigla=5 437 # 2014-05-19 http://www.nos.pt/particulares/televisao/guia-tv/Pages/channel.aspx?channel=5 438 # 439 my $url = $HEAD->{'source-data-url'} . 'Pages/channel.aspx?channel='.$ch_xmltv_id; 440 print STDERR " URL= $url \n" if $opt_debug; 441 t $url; 442 443 my $tree = get_nice_tree($url, '', 'UTF-8'); 444 445 my $programmes = {}; 446 my $firstdaynum; 447 448 if ( my $h = $tree->look_down('_tag' => 'div', 'id' => 'programs-container') ) { 449 if ( my @h2 = $h->look_down('_tag' => 'div', 'class' => qr/programs-day-list/) ) { 450 DAY: 451 foreach my $h_day (@h2) { # schedule for a day 452 my ($daynum) = $h_day->attr('id') =~ /day(\d*)/; 453 $firstdaynum = $daynum if !defined $firstdaynum; 454 455 my $dt = $today_date->clone()->set_day($daynum); 456 $dt->add( months => 1 ) if $daynum < $firstdaynum; 457 #print STDERR "\n" . $dt->strftime("%Y%m%d%H%M%S %z") . "\n"; 458 459 next DAY if $dt->epoch() < $grab_start || $dt->epoch() >= $grab_stop; 460 461 if ( my @h3 = $h_day->look_down('_tag' => 'li') ) { # progs for a day 462 my $j = 0; 463 PROG: 464 foreach my $h_prog (@h3) { # each prog 465 $j++; 466 467 #<li style="height:119px;"> 468 # <span style="height:55px"> 469 # <a class="series" id="71841" href="#" title="Anatomia de Grey T.9 Ep.22"> 470 # <span class="program">Anatomia de Grey T.9 Ep.22</span><br> 471 # <span class="duration">02:19 - 03:03</span> 472 # </a> 473 # </span> 474 #</li> 475 476 my ( $p_id, $p_category, $p_title, $p_desc, $p_timespan, $p_start, $p_stop, $p_start_epoch, $p_stop_epoch, $p_episode_num ); 477 478 if ( my $a = $h_prog->look_down('_tag' => 'a') ) { 479 $p_category = $a->attr('class'); 480 $p_id = $a->attr('id'); 481 $p_title = $a->attr('title'); 482 483 if ( my $s = $a->look_down('_tag' => 'span', 'class' => 'program') ) { 484 $p_desc = tidy $s->as_text(); 485 } 486 487 if ( my $s = $a->look_down('_tag' => 'span', 'class' => 'duration') ) { 488 my ($start_h, $start_m, $stop_h, $stop_m) = $s->as_text() =~ /(\d\d):(\d\d)\s-\s(\d\d):(\d\d)/; 489 490 my $start = $dt->clone(); $start->set( 'hour' => $start_h, 'minute' => $start_m ); 491 my $stop = $dt->clone(); $stop->set ( 'hour' => $stop_h, 'minute' => $stop_m ); 492 493 if ($stop_h < $start_h) { # stop hh < start hh = assume we've gone to the next (or previous) day 494 $start->subtract( days => 1 ) if $j == 1; # first programme in day 495 $stop->add( days => 1 ) if $j != 1; # not first programme in day 496 } 497 498 $p_start = $start->strftime("%Y%m%d%H%M%S %z"); 499 $p_stop = $stop->strftime("%Y%m%d%H%M%S %z"); 500 $p_start_epoch = $start->epoch(); 501 $p_stop_epoch = $stop->epoch(); 502 $p_timespan = $s->as_text(); 503 } 504 505 506 # strip the SnnEnn out of the title (e.g. "Anatomia de Grey T.9 Ep.24") 507 my ($p_ser, $p_ep) = ('', ''); my $p_match; 508 if ( ($p_match, $p_ser, $p_ep) = $p_title =~ /.*(T\.(\d*)\sEp\.(\d*))/ ) { 509 $p_episode_num = --$p_ser . ' . ' . --$p_ep . ' . '; 510 } elsif ( ($p_match, $p_ep) = $p_title =~ /.*(Ep\.(\d*))/ ) { 511 $p_episode_num = ' . ' . --$p_ep . ' . '; 512 } 513 $p_title =~ s/$p_match// if $p_match; 514 trim $p_title; 515 } 516 517 next PROG if $p_start_epoch < $grab_start || $p_start_epoch >= $grab_stop; 518 519 my %prog; 520 $prog{'channel'} = "$ch_xmltv_id.$DOMAIN"; 521 $prog{'id'} = $p_id; 522 $prog{'title'} = $p_title; 523 $prog{'category'} = $p_category; 524 $prog{'start'} = $p_start; 525 $prog{'stop'} = $p_stop; 526 $prog{'episode-num'} = $p_episode_num; 527 $prog{'timespan'} = $p_timespan; # not an xmltv item: used in process_details_page() 528 529 # if user wants details then get them from the programme page 530 if (!$opt_fast) { 531 process_details_page ( $ch_xmltv_id, \%prog, $tree ); 532 } 533 534 # store the programme avoiding duplicates 535 # also check for duplicate start times and set clumpidx 536 { 537 if ( defined $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } ) { 538 # duplicate prog or contemporary? 539 my $dup = 0; my $_P; 540 foreach $_P ( @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } } ) { 541 $dup = 1 if ( $_P->{'title'} eq $prog{'title'} ); # duplicate 542 } 543 next PROG if $dup; # ignore duplicates (go to next programme) 544 if (!$dup) { 545 # contemporary programme so set clumpidx 546 my $numclumps = scalar @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } } + 1; 547 # set (or adjust) clumpidx of existing programmes 548 my $i = 0; 549 foreach $_P ( @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } } ) { 550 $_P->{'clumpidx'} = "$i/$numclumps"; 551 $i++; 552 } 553 # set clumpidx for new programme 554 $prog{'clumpidx'} = "$i/$numclumps"; 555 } 556 } 557 } 558 559 # store the programme 560 push @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } }, \%prog; 561 562 } # end each prog 563 } # end progs for each day 564 } # schedule for a day 565 } # programs-day-list 566 } # programs-container 567 568 569 # did we get any programmes? 570 if ( scalar $programmes == 0 ) { 571 warn "$url ($ch_xmltv_id) : no programmes found\n"; 572 return; 573 } 574 575 # format the programmes ready for XMLTV::Writer 576 my @r; 577 foreach ( keys %{$programmes} ) { 578 my $_ch_progs = $programmes->{$_}; 579 foreach ( sort keys %{$_ch_progs} ) { 580 my $_dt_progs = $_ch_progs->{$_}; 581 foreach (@{ $_dt_progs }) { 582 push @r, make_programme_hash( $ch_xmltv_id, $_ ); 583 } 584 } 585 } 586 return @r; 587} 588 589sub process_details_page { 590 my ($ch_xmltv_id, $prog, $s_tree) = @_; 591 592 t "Getting prog details $$prog{'id'} \n"; 593 594 my $channelAcronym1; 595 if ( my $h = $s_tree->look_down('_tag' => 'div', 'id' => 'channel-logo') ) { 596 if ( my $h2 = $h->look_down('_tag' => 'img') ) { 597 $channelAcronym1 = $h2->attr('alt'); 598 } 599 } 600 my $HoursToAddOrRemove = 0; 601 if ( my $h = $s_tree->look_down('_tag' => 'input', 'id' => 'ctl00_PlaceHolderMain_channelProgr_HoursToAddOrRemove') ) { 602 $HoursToAddOrRemove = $h->attr('value'); 603 } 604 my $programId1 = $$prog{'id'}; 605 my ($timespan) = $$prog{'timespan'} =~ s/ /%20/g; 606 my @substr = split(/-/, $$prog{'timespan'}); 607 608 my $url = $HEAD->{'source-info-url'} . '_layouts/15/Armstrong/ApplicationPages/EPGGetProgramsAndDetails.aspx/GetProgramDetails'; 609 print STDERR " URL= $url \n" if $opt_debug; 610 t $url; 611 612 my $json = "{ 'programId':'$programId1', 'channelAcronym':'$channelAcronym1', 'hour':'$HoursToAddOrRemove', 'startHour':'$substr[0]', 'endHour':'$substr[1]' }"; 613 614 615 # This is what the page returns. Looks like an old school delimited list 616 # (c.f. OnCallGetProgramDetailsComplete JS code) 617 # 618 # Anatomia de Grey T.9 Ep.23_#|$_O drama médico mais famoso da televisão centra a sua história nas vidas profissionais e pessoais de um grupo de médicos cirurgiões e dos seus supervisores._#|$_277055_resized_352x198.jpg_#|$_02:47 _#|$_ 03:28_#|$_RTP 1_#|$_2014-05-07T02:47:00+01:00_#|$_2014-05-07T03:28:00+01:00_#|$_false 619 # 620 621 # Emulate an AJAX post for the requested content 622 my $content = post_nice_json($url, $json); 623 my @data = split(/_#\|\$_/, $content->{'d'}); 624 625 # We could check the title matches what we already have but why not just trust the 'id' is correct ;-) 626 627 $$prog{'desc'} = tidy $data[1]; # store the description in our prog hash 628 $$prog{'icon'} = "http://images.$DOMAIN/" . $data[2]; # [2] is a uri to the programme image 629 630 return; 631} 632 633 634# reformat the data to something acceptable to xmltv:::writer 635sub make_programme_hash { 636 my ( $ch_xmltv_id, $cur ) = @_; 637 638 my %prog; 639 640 $prog{channel} = $cur->{'channel'}; 641 642 #$prog{channel} =~ s/\s/_/g; 643 644 $prog{'title'} = [ [ toUTF8( $cur->{'title'} ), $LANG ] ]; 645 $prog{'sub-title'} = [ [ toUTF8( $cur->{'subtitle'} ), $LANG ] ] if $cur->{'subtitle'}; 646 $prog{'category'} = [ [ toUTF8( $cur->{'category'} ), $LANG ] ] if $cur->{'category'}; 647 $prog{'episode-num'} = [[ $cur->{'episode-num'}, 'xmltv_ns' ]] if $cur->{'episode-num'}; 648 $prog{'start'} = $cur->{'start'} if $cur->{'start'}; 649 $prog{'stop'} = $cur->{'stop'} if $cur->{'stop'}; 650 $prog{'desc'} = [ [ toUTF8( $cur->{'desc'} ), $LANG ] ] if $cur->{'desc'}; 651 $prog{'icon'} = [ { 'src' => $cur->{'icon'} } ] if $cur->{'icon'}; 652 $prog{'credits'} = $cur->{'credits'} if $cur->{'credits'}; 653 $prog{'date'} = $cur->{'year'} if $cur->{'year'}; 654 655 return \%prog; 656} 657 658# get channel listing 659sub get_channels { 660 my $bar = new XMLTV::ProgressBar( 'getting list of channels', 1 ) if not $opt_quiet; 661 my %channels; 662 663 # retrieve channels via a dummy call to the schedule page 664 # http://www.zon.pt/tv/guiaTV/Pages/GuiaTV.aspx 665 # 2014-05-19 http://www.nos.pt/particulares/televisao/guia-tv/Pages/default.aspx 666 my $url = $HEAD->{'source-data-url'} . 'Pages/default.aspx'; 667 t $url; 668 669 my $tree = get_nice_tree($url, '', 'UTF-8'); 670 671 # <div id="channels-list-container"> 672 # <ul id="channels-list-slider"> 673 # <li> 674 # <span class="channel-number">001</span> 675 # <span class="channel-logo"> 676 # <a href='/tv/guiaTV/Pages/Guia-TV-programacao.aspx?channelSigla=5' title='RTP 1'> 677 # <img src='/EPGChannelImages/RTP1.png' alt='logótipo RTP 1' /> 678 # </a> 679 # </span> 680 # </li> 681 # 2014-05-19 682 # <li> 683 # <span class="channel-logo"> 684 # <a href="/particulares/televisao/guia-tv/Pages/channel.aspx?channel=5" title="RTP 1"> 685 # <img src="//images.nos.pt/EPGChannelImages/RTP1.png" alt="logótipo RTP 1"> 686 # </a> 687 # </span> 688 # <span class="channel-number">001</span> 689 # </li> 690 # http://images.nos.pt/EPGChannelImages/RTP1.png 691 # 692 if ( my $h = $tree->look_down('_tag' => 'div', 'id' => 'channels-list-container') ) { 693 if ( my $h2 = $h->look_down('_tag' => 'ul', 'id' => 'channels-list-slider') ) { 694 my @h3 = $h2->look_down('_tag' => 'li'); 695 foreach my $elem (@h3) { 696 my ($channel_id, $channel_name, $channel_logo); 697 if ( my $h4 = $elem->look_down('_tag' => 'a') ) { 698 $channel_name = toUTF8( $h4->attr('title') ); 699 ($channel_id) = $h4->attr('href') =~ /channel=(\d*)/; 700 } 701 if ( my $h4 = $elem->look_down('_tag' => 'img') ) { 702 $channel_logo = 'http:' . $h4->attr('src'); 703 } 704 # store the channel 705 my $ch = 706 { 707 'channel-name' => $channel_name, 708 'display-name' => [ [ $channel_name, $LANG ] ], 709 'id' => $channel_id.'.'.$DOMAIN, 710 'icon' => [ { 'src' => $channel_logo } ], 711 }; 712 $channels{$channel_id} = $ch; 713 push @ch_all, $ch; 714 715 } #foreach 716 } 717 } 718 die "no channels could be found" if not keys %channels; 719 720 update $bar if not $opt_quiet; 721 $tree->delete; 722 return %channels; 723} 724 725sub nextday { 726 my $d = shift; 727 my $p = ParseDate($d); 728 my $n = DateCalc($p, '+ 1 day'); 729 return UnixDate($n, '%Q'); 730} 731 732sub get_icons { # deprecated 733 my %icons; 734 my $url= $HEAD->{"source-info-url"}."/Televisao/ListaProgramas.aspx?dia=0&package=9&cat=&channelSigla="; 735 my $chan; 736 my $tag; 737 my $addr; 738 739 my $bar = new XMLTV::ProgressBar('grabbing icons', scalar(keys(%channels))) 740 if not $opt_quiet; 741 742 foreach (keys %channels) { 743 my $tb = get_nice_tree $url.encode_entities($_), \&tidy; 744 745 $tag = $tb->look_down('_tag' => 'img', 746 sub { 747 return ($_[0]->attr('src') =~ m/Shared\/img\/televisao\/BackofficeImages\//); 748 }); 749 update $bar if not $opt_quiet; 750 751 unless(ref($tag) eq "HTML::Element") { 752 $tb->delete; 753 next; 754 }; 755 756 $icons{$_} = $tag->attr('src'); 757 $icons{$_} =~ s/\.\./$HEAD->{'source-info-url'}/; 758 759 $tb->delete; 760 } 761 $bar->finish() if not $opt_quiet; 762 763 return %icons; 764} 765 766 767# Get the user's home directory 768sub get_default_dir { 769 my $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH} 770 if defined( $ENV{HOMEDRIVE} ) and defined( $ENV{HOMEPATH} ); 771 772 my $home = $ENV{HOME} || $winhome || "."; 773 return $home; 774} 775 776# Set default cache dir = $HOME/.xmltv/cache 777sub get_default_cachedir { 778 return get_default_dir() . "/.xmltv/cache"; 779} 780