1#!/usr/local/bin/perl
2
3=pod
4
5=head1 NAME
6
7tv_grab_pt - Grab TV listings for Portugal.
8
9=head1 SYNOPSIS
10
11tv_grab_pt --help
12
13tv_grab_pt [--config-file FILE] --configure [--gui OPTION]
14
15tv_grab_pt [--config-file FILE] [--output FILE] [--days N]
16           [--offset N] [--fast] [--quiet] [--icons]
17
18tv_grab_pt --list-channels
19
20=head1 DESCRIPTION
21
22Output TV listings for several channels available in Portugal.
23It supports the public network and the private NetCabo network.
24
25First run B<tv_grab_pt --configure> to choose, which channels you want
26to download. Then running B<tv_grab_pt> with no arguments will output
27listings in XML format to standard output.
28
29B<--configure> Prompt for which channels,
30and write the configuration file.
31
32B<--gui OPTION> Use this option to enable a graphical interface to be used.
33OPTION may be 'Tk', or left blank for the best available choice.
34Additional allowed values of OPTION are 'Term' for normal terminal output
35(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.
36
37B<--config-file FILE> Set the name of the configuration file, the
38default is B<~/.xmltv/tv_grab_pt.conf>.  This is the file written by
39B<--configure> and read when grabbing.
40
41B<--days N> Grab N days.  The default is 7 days.
42
43B<--offset N> Start N days in the future.  The default is to start
44from today.
45
46B<--fast> Only fetch summary information for each programme. This is
47only title, start/stop times, category, episode number.
48
49B<--output FILE> Write to FILE rather than standard output.
50
51B<--quiet> Suppress the progress messages normally written to standard
52error.
53
54B<--icons> Fetches channels icons/logos [deprecated - this is now the default]
55
56B<--version> Show the version of the grabber.
57
58B<--help> Print a help message and exit.
59
60=head1 SEE ALSO
61
62L<xmltv(5)>.
63
64=head1 AUTHOR
65
66Bruno Tavares, gawen@users.sourceforge.net, based on tv_grab_es, from Ramon Roca.
67
68Grabber Site : http://bat.is-a-geek.com/XMLGrabPt
69
70=head1 BUGS
71
72=cut
73
74######################################################################
75# initializations
76
77use warnings;
78use strict;
79use XMLTV::Version '$Id: tv_grab_pt,v 1.58 2016/03/26 15:51:52 bilbo_uk Exp $ ';
80use XMLTV::Capabilities qw/baseline manualconfig cache/;
81use XMLTV::Description 'Portugal';
82use Getopt::Long;
83#use Date::Manip;
84use DateTime;
85#use Data::Dumper;
86use HTML::TreeBuilder;
87use HTML::Entities; # parse entities
88use HTTP::Cache::Transparent;
89use Encode;
90use IO::File;
91use File::Path;
92use File::Basename;
93#use LWP::UserAgent;
94
95use XMLTV;
96use XMLTV::Memoize;
97use XMLTV::ProgressBar;
98use XMLTV::Ask;
99use XMLTV::Config_file;
100use XMLTV::DST;
101use XMLTV::Get_nice 0.005067;
102use XMLTV::Mode;
103# Todo: perhaps we should internationalize messages and docs?
104use XMLTV::Usage <<END
105$0: get Portuguese television listings in XMLTV format
106To configure: $0 --configure [--config-file FILE] [--gui OPTION]
107To grab listings: $0 [--config-file FILE] [--output FILE] [--quiet] [--offset OFFSET] [--days DAYS] [--icons]
108To list channels: $0 --list-channels
109END
110  ;
111
112my $DOMAIN = 'nos.pt';
113my $SOURCE_URL = "http://www.$DOMAIN";
114
115# Attributes of the root element in output.
116my $HEAD = { 'source-info-url'     => "http://$DOMAIN/",
117             'source-data-url'     => "http://www.$DOMAIN/particulares/televisao/guia-tv/",
118             'generator-info-name' => 'XMLTV',
119             'generator-info-url'  => 'http://xmltv.org/',
120           };
121
122# default language
123my $LANG="pt";
124
125# Global channel_data
126our @ch_all;
127
128######################################################################
129# get options
130
131# Get options, including undocumented --cache option.
132XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
133our ($opt_help, $opt_output,
134    $opt_configure, $opt_config_file, $opt_gui, $opt_quiet,
135    $opt_list_channels, $opt_offset, $opt_days, $opt_fast, $opt_icons,
136    $opt_debug);
137$opt_quiet  = 0; # default
138$opt_days   = 7; # default
139$opt_offset = 0; # default
140$opt_fast   = 0; # default
141$opt_debug  = 0;
142GetOptions('help'          => \$opt_help,
143           'configure'     => \$opt_configure,
144           'config-file=s' => \$opt_config_file,
145           'gui:s'         => \$opt_gui,
146           'output=s'      => \$opt_output,
147           'quiet'         => \$opt_quiet,
148           'list-channels' => \$opt_list_channels,
149           'offset=i'      => \$opt_offset,
150           'days=i'        => \$opt_days,
151           'fast'          => \$opt_fast,
152           'icons'         => \$opt_icons,         # Fetches channels icons/logos [deprecated - this is now the default]
153           'debug'         => \$opt_debug,         # undocumented
154          )
155  or usage(0);
156usage(1) if $opt_help;
157
158# Initialise the web page cache
159HTTP::Cache::Transparent::init( {
160    BasePath => get_default_cachedir(),
161    NoUpdate => 4*3600,         # cache time in seconds
162    MaxAge   => 24,               # flush time in hours
163    Verbose  => $opt_debug,
164} );
165##$XMLTV::Get_nice::Delay = 0 if $opt_debug;
166
167XMLTV::Ask::init($opt_gui);
168
169
170our $first_day = ($opt_offset || 0);
171our $last_day  = $first_day + $opt_days;
172die 'cannot grab more than one week ahead' if $first_day >= 7 || $last_day > 7;
173
174my $mode = XMLTV::Mode::mode('grab', # default
175                             $opt_configure => 'configure',
176                             $opt_list_channels => 'list-channels',
177                            );
178
179# File that stores which channels to download.
180my $config_file
181  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_pt', $opt_quiet);
182
183my @config_lines; # used only in grab mode
184if ($mode eq 'configure') {
185    XMLTV::Config_file::check_no_overwrite($config_file);
186    mkpath(dirname($config_file));
187}
188elsif ($mode eq 'grab') {
189    @config_lines = XMLTV::Config_file::read_lines($config_file);
190}
191elsif ($mode eq 'list-channels') {
192    # Config file not used.
193}
194else { die }
195
196# Whatever we are doing, we need the channels data.
197my $token;
198my %channels = get_channels(); # sets @ch_all
199my %channelnumbers;
200my @channels;
201
202my %icons = ();
203%icons = get_icons() if $opt_icons;
204
205
206######################################################################
207# write configuration
208
209if ($mode eq 'configure') {
210    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
211
212    # Ask about each channel.
213    my @chs = sort keys %channels;
214    my @names = map { $channels{$_}->{'channel-name'} } @chs;
215    my @qs = map { "add channel $_?" } @names;
216    my @want = ask_many_boolean(1, @qs);
217    foreach (@chs) {
218        my $w = shift @want;
219        warn("cannot read input, stopping channel questions"), last
220          if not defined $w;
221        # No need to print to user - XMLTV::Ask is verbose enough.
222
223        # Print a config line, but comment it out if channel not wanted.
224        print CONF '#' if not $w;
225        my $name = shift @names;
226        print CONF "channel $_.$DOMAIN\n";
227    }
228
229    close CONF or warn "cannot close $config_file: $!";
230    say("Finished configuration.");
231
232    exit();
233}
234
235
236# Not configuration, we must be writing something, either full
237# listings or just channels.
238#
239die if $mode ne 'grab' and $mode ne 'list-channels';
240
241# Options to be used for XMLTV::Writer.
242my %w_args;
243if (defined $opt_output) {
244    my $fh = new IO::File(">$opt_output");
245    die "cannot write to $opt_output: $!" if not defined $fh;
246    $w_args{OUTPUT} = $fh;
247}
248$w_args{encoding} = 'UTF-8';
249my $writer;
250sub start_writing() { ($writer = new XMLTV::Writer(%w_args))->start($HEAD) }
251
252if ($mode eq 'list-channels') {
253    start_writing;
254    foreach (@ch_all) {
255        $_{'icon'} = [{'src' => $icons{$_}}] if(defined($icons{$_}));
256    }
257    $writer->write_channel($_) foreach @ch_all;
258    $writer->end();
259    exit();
260}
261
262######################################################################
263# We are producing full listings.
264die if $mode ne 'grab';
265
266# Read configuration
267my $line_num = 1;
268foreach (@config_lines) {
269    ++$line_num;
270    next if not defined;
271
272    # For now, check that $DOMAIN appears on every line.  This
273    # ensures we don't have a config file left over from the old
274    # grabber.
275    #
276    if (/^channel:?\s+(.+)\.nos\.pt\s*$/) {
277        my $ch_did = $1;
278        die if not defined $ch_did;
279        push @channels, $ch_did;
280    }
281    elsif (/^channel:?\s+(.+)\.tvcabo\.pt\s*$/) {
282        # old site but has same channel numbers
283        my $ch_did = $1;
284        die if not defined $ch_did;
285        push @channels, $ch_did;
286    }
287    elsif (/^channel/) {
288        die <<END
289The configuration file is left over from the old tv_grab_pt.  The new
290site uses different channels so you need to reconfigure the grabber.
291END
292          ;
293    }
294    else {
295        warn "$config_file:$line_num: bad line\n";
296    }
297}
298
299######################################################################
300# begin main program
301
302start_writing;
303
304# Assume the listings source uses CET (see BUGS above).
305die "No channels specified, run me with --configure\n"
306  if not keys %channels;
307my @to_get;
308
309# Write the channels elements
310foreach my $ch_did (@channels) {
311    die if not defined $ch_did;
312
313    # avoid XMLTV barfing when channel is no longer available
314    if (!defined $channels{$ch_did}) {
315        print STDERR "\nChannel $ch_did not found in current channels lineup \n";
316        next;
317    }
318
319    my $ch_name=$channels{$ch_did}->{'channel-name'};
320    my $channel = { 'id'           => $channels{$ch_did}->{'id'},
321                    'display-name' => $channels{$ch_did}->{'display-name'},
322                    'icon'         => $channels{$ch_did}->{'icon'},
323    };
324    $channel->{'icon'} = [{'src' => $icons{$ch_did}}] if(defined($icons{$ch_did}));
325
326    $writer->write_channel($channel);
327}
328
329
330# the order in which we fetch the channels matters
331# This progress bar is for both downloading and parsing.  Maybe
332# they could be separate.
333#
334
335my $bar = new XMLTV::ProgressBar('getting listings', scalar @channels)
336  if not $opt_quiet;
337
338# time limits for grab
339my $today_date = DateTime->today(time_zone => 'Europe/Lisbon');
340my $grab_start = $today_date->epoch() + ($opt_offset * 86400);
341my $grab_stop  = $grab_start + ($opt_days * 86400);
342print STDERR "\n start/end grab: $grab_start $grab_stop \n" if $opt_debug;
343
344my $some=0;
345foreach my $ch_did (@channels) {
346    #skip legacy channels...
347    next unless $channels{$ch_did};
348    foreach (process_table($ch_did)) {
349        $writer->write_programme($_);
350        $some = 1;
351    }
352    update $bar if $bar;
353}
354if (not $some) {
355  die "no programmes found\n" unless $some;
356}
357
358$writer->end();
359
360######################################################################
361# subroutine definitions
362
363# Use Log::TraceMessages if installed.
364BEGIN {
365    eval { require Log::TraceMessages };
366    if ($@) {
367        *t = sub {};
368        *d = sub { '' };
369    }
370    else {
371        *t = \&Log::TraceMessages::t;
372        *d = \&Log::TraceMessages::d;
373        Log::TraceMessages::check_argv();
374    }
375}
376
377# Clean up bad characters in HTML.
378sub _tidy( $ ) {
379    for (my $s = shift) {
380        # Character 150 seems to be used for 'versus' in sporting
381        # events, but I don't know what that is in Portuguese.
382        #
383        #s/\s\226\s/ vs /g;
384        return $_;
385    }
386}
387
388# Remove bad chars from an element
389sub tidy( $ ) {
390    return $_[0] if !defined $_[0];
391    $_[0] =~ s/(\s)\xA0/$1/og;    # replace 'space-&nbsp;' with 'space'
392    $_[0] =~ s/\xA0/ /og;         # replace any remaining &nbsp; with space
393    $_[0] =~ s/\xAD//og;          # delete soft hyphens
394    return $_[0];
395}
396
397# Wrapper around Encode (and fix_utf8)
398sub toUTF8( $ )  {
399    return fix_utf8( Encode::encode("utf-8", $_[0]) );
400}
401
402# UTF-8 fixups.
403sub fix_utf8( $ ) {
404    #  The details page claims to be utf-8 but there are some invalid characters in the incoming data
405    #  e.g. it claims en-dash as C2 96 (which is a control code in utf-8!)
406    #  Looks like an improper conversion from Windows-1252 in the source data
407    #
408    return $_[0] if !defined $_[0];
409    $_[0] =~ s/\xC2\x96/\xE2\x80\x93/og;      # replace invalid en-dash with correct value
410    $_[0] =~ s/\xC2\x80/\xE2\x82\xAC/og;      # euro
411    $_[0] =~ s/\xC2\x85/\xE2\x80\xA6/og;      # ellipsis
412    $_[0] =~ s/\xC2\x92/\xE2\x80\x99/og;      # apostrophe
413    $_[0] =~ s/\xC2\x93/\xE2\x80\x9C/og;      # open double quote
414    $_[0] =~ s/\xC2\x94/\xE2\x80\x9D/og;      # close double quote
415    $_[0] =~ s/\xC2[\x80-\x9F]//og;           # dump the rest
416    return $_[0];
417}
418
419# Remove leading & trailing spaces
420sub trim( $ ) {
421    # Remove leading & trailing spaces
422    $_[0] =~ s/^\s+|\s+$//g;
423    return $_[0];
424}
425
426sub process_table {
427    my ($ch_xmltv_id) = @_;
428
429    t "Getting channel $ch_xmltv_id\n";
430
431    $ch_xmltv_id =~ /(.+?)\.zon\.pt/;
432
433    # This seems like a useful link but I can't see how to get the channelindex
434    #   http://www.zon.pt/_layouts/EPGGetProgramsForChannels.aspx?cIndex=1&day=1&order=grelha&category=&numChannels=1
435
436    # http://www.zon.pt/tv/guiaTV/Pages/Guia-TV-programacao.aspx?channelSigla=5
437    # 2014-05-19  http://www.nos.pt/particulares/televisao/guia-tv/Pages/channel.aspx?channel=5
438    #
439    my $url = $HEAD->{'source-data-url'} . 'Pages/channel.aspx?channel='.$ch_xmltv_id;
440    print STDERR " URL= $url \n" if $opt_debug;
441    t $url;
442
443    my $tree  = get_nice_tree($url, '', 'UTF-8');
444
445    my $programmes = {};
446    my $firstdaynum;
447
448    if ( my $h = $tree->look_down('_tag' => 'div', 'id' => 'programs-container') ) {
449        if ( my @h2 = $h->look_down('_tag' => 'div', 'class' => qr/programs-day-list/) ) {
450            DAY:
451            foreach my $h_day (@h2) { # schedule for a day
452                my ($daynum) = $h_day->attr('id') =~ /day(\d*)/;
453                $firstdaynum = $daynum if !defined $firstdaynum;
454
455                my $dt = $today_date->clone()->set_day($daynum);
456                $dt->add( months => 1 )  if $daynum < $firstdaynum;
457                #print STDERR "\n" . $dt->strftime("%Y%m%d%H%M%S %z") . "\n";
458
459                next DAY if $dt->epoch() < $grab_start || $dt->epoch() >= $grab_stop;
460
461                if ( my @h3 = $h_day->look_down('_tag' => 'li') ) { # progs for a day
462                    my $j = 0;
463                    PROG:
464                    foreach my $h_prog (@h3) { # each prog
465                        $j++;
466
467                        #<li style="height:119px;">
468                        #    <span style="height:55px">
469                        #        <a class="series" id="71841" href="#" title="Anatomia de Grey T.9 Ep.22">
470                        #            <span class="program">Anatomia de Grey T.9 Ep.22</span><br>
471                        #            <span class="duration">02:19 - 03:03</span>
472                        #        </a>
473                        #    </span>
474                        #</li>
475
476                        my ( $p_id, $p_category, $p_title, $p_desc, $p_timespan, $p_start, $p_stop, $p_start_epoch, $p_stop_epoch, $p_episode_num );
477
478                        if ( my $a = $h_prog->look_down('_tag' => 'a') ) {
479                            $p_category = $a->attr('class');
480                            $p_id = $a->attr('id');
481                            $p_title = $a->attr('title');
482
483                            if ( my $s = $a->look_down('_tag' => 'span', 'class' => 'program') ) {
484                                $p_desc = tidy $s->as_text();
485                            }
486
487                            if ( my $s = $a->look_down('_tag' => 'span', 'class' => 'duration') ) {
488                                my ($start_h, $start_m, $stop_h, $stop_m) = $s->as_text() =~ /(\d\d):(\d\d)\s-\s(\d\d):(\d\d)/;
489
490                                my $start = $dt->clone();  $start->set( 'hour' => $start_h, 'minute' => $start_m );
491                                my $stop  = $dt->clone();  $stop->set ( 'hour' => $stop_h,  'minute' => $stop_m  );
492
493                                if ($stop_h < $start_h) {  # stop hh < start hh = assume we've gone to the next (or previous) day
494                                    $start->subtract( days => 1 ) if $j == 1;    # first programme in day
495                                    $stop->add( days => 1 )       if $j != 1;    # not first programme in day
496                                }
497
498                                $p_start = $start->strftime("%Y%m%d%H%M%S %z");
499                                $p_stop  = $stop->strftime("%Y%m%d%H%M%S %z");
500                                $p_start_epoch = $start->epoch();
501                                $p_stop_epoch  = $stop->epoch();
502                                $p_timespan = $s->as_text();
503                            }
504
505
506                            # strip the SnnEnn out of the title (e.g. "Anatomia de Grey T.9 Ep.24")
507                            my ($p_ser, $p_ep) = ('', '');  my $p_match;
508                            if ( ($p_match, $p_ser, $p_ep) = $p_title =~ /.*(T\.(\d*)\sEp\.(\d*))/ ) {
509                                $p_episode_num = --$p_ser . ' . ' . --$p_ep . ' . ';
510                            } elsif ( ($p_match, $p_ep) = $p_title =~ /.*(Ep\.(\d*))/ ) {
511                                $p_episode_num = ' . ' . --$p_ep . ' . ';
512                            }
513                            $p_title =~ s/$p_match//  if $p_match;
514                            trim $p_title;
515                        }
516
517                        next PROG if $p_start_epoch < $grab_start || $p_start_epoch >= $grab_stop;
518
519                        my %prog;
520                        $prog{'channel'}     = "$ch_xmltv_id.$DOMAIN";
521                        $prog{'id'}          = $p_id;
522                        $prog{'title'}       = $p_title;
523                        $prog{'category'}    = $p_category;
524                        $prog{'start'}       = $p_start;
525                        $prog{'stop'}        = $p_stop;
526                        $prog{'episode-num'} = $p_episode_num;
527                        $prog{'timespan'}    = $p_timespan;        # not an xmltv item: used in process_details_page()
528
529                        # if user wants details then get them from the programme page
530                        if (!$opt_fast) {
531                            process_details_page ( $ch_xmltv_id, \%prog, $tree );
532                        }
533
534                        # store the programme avoiding duplicates
535                        # also check for duplicate start times and set clumpidx
536                        {
537                            if ( defined $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } ) {
538                                # duplicate prog or contemporary?
539                                my $dup = 0; my $_P;
540                                foreach $_P ( @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } } ) {
541                                    $dup = 1  if ( $_P->{'title'} eq $prog{'title'} );    # duplicate
542                                }
543                                next PROG if $dup;    # ignore duplicates (go to next programme)
544                                if (!$dup) {
545                                    # contemporary programme so set clumpidx
546                                    my $numclumps = scalar @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } }  + 1;
547                                    # set (or adjust) clumpidx of existing programmes
548                                    my $i = 0;
549                                    foreach $_P ( @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } } ) {
550                                        $_P->{'clumpidx'} = "$i/$numclumps";
551                                        $i++;
552                                    }
553                                    # set clumpidx for new programme
554                                    $prog{'clumpidx'} = "$i/$numclumps";
555                                }
556                            }
557                        }
558
559                        # store the programme
560                        push @{ $programmes->{ $ch_xmltv_id }->{ $p_start_epoch } }, \%prog;
561
562                    } # end each prog
563                } # end progs for each day
564            } # schedule for a day
565        } # programs-day-list
566    } # programs-container
567
568
569    # did we get any programmes?
570    if ( scalar $programmes == 0 ) {
571            warn "$url ($ch_xmltv_id) : no programmes found\n";
572        return;
573    }
574
575    # format the programmes ready for XMLTV::Writer
576    my @r;
577    foreach ( keys %{$programmes} ) {
578        my $_ch_progs = $programmes->{$_};
579        foreach ( sort keys %{$_ch_progs} ) {
580            my $_dt_progs = $_ch_progs->{$_};
581            foreach (@{ $_dt_progs }) {
582                push @r, make_programme_hash( $ch_xmltv_id, $_ );
583             }
584        }
585    }
586    return @r;
587}
588
589sub process_details_page {
590    my ($ch_xmltv_id, $prog, $s_tree) = @_;
591
592    t "Getting prog details $$prog{'id'} \n";
593
594    my $channelAcronym1;
595    if ( my $h = $s_tree->look_down('_tag' => 'div', 'id' => 'channel-logo') ) {
596        if ( my $h2 = $h->look_down('_tag' => 'img') ) {
597            $channelAcronym1 = $h2->attr('alt');
598        }
599    }
600    my $HoursToAddOrRemove = 0;
601    if ( my $h = $s_tree->look_down('_tag' => 'input', 'id' => 'ctl00_PlaceHolderMain_channelProgr_HoursToAddOrRemove') ) {
602        $HoursToAddOrRemove = $h->attr('value');
603    }
604    my $programId1 = $$prog{'id'};
605    my ($timespan) = $$prog{'timespan'} =~ s/ /%20/g;
606    my @substr = split(/-/, $$prog{'timespan'});
607
608    my $url = $HEAD->{'source-info-url'} . '_layouts/15/Armstrong/ApplicationPages/EPGGetProgramsAndDetails.aspx/GetProgramDetails';
609    print STDERR " URL= $url \n" if $opt_debug;
610    t $url;
611
612    my $json = "{ 'programId':'$programId1', 'channelAcronym':'$channelAcronym1', 'hour':'$HoursToAddOrRemove', 'startHour':'$substr[0]', 'endHour':'$substr[1]' }";
613
614
615    # This is what the page returns. Looks like an old school delimited list
616    #    (c.f. OnCallGetProgramDetailsComplete JS code)
617    #
618    #    Anatomia de Grey T.9 Ep.23_#|$_O drama médico mais famoso da televisão centra a sua história nas vidas profissionais e pessoais de um grupo de médicos cirurgiões e dos seus supervisores._#|$_277055_resized_352x198.jpg_#|$_02:47 _#|$_ 03:28_#|$_RTP 1_#|$_2014-05-07T02:47:00+01:00_#|$_2014-05-07T03:28:00+01:00_#|$_false
619    #
620
621    # Emulate an AJAX post for the requested content
622    my $content = post_nice_json($url, $json);
623    my @data = split(/_#\|\$_/, $content->{'d'});
624
625    # We could check the title matches what we already have but why not just trust the 'id' is correct ;-)
626
627    $$prog{'desc'} = tidy $data[1];        # store the description in our prog hash
628    $$prog{'icon'} = "http://images.$DOMAIN/" . $data[2];        # [2] is a uri to the programme image
629
630    return;
631}
632
633
634# reformat the data to something acceptable to xmltv:::writer
635sub make_programme_hash {
636    my ( $ch_xmltv_id, $cur ) = @_;
637
638    my %prog;
639
640    $prog{channel} = $cur->{'channel'};
641
642    #$prog{channel} =~ s/\s/_/g;
643
644    $prog{'title'} = [ [ toUTF8( $cur->{'title'} ), $LANG ] ];
645    $prog{'sub-title'} = [ [ toUTF8( $cur->{'subtitle'} ), $LANG ] ] if $cur->{'subtitle'};
646    $prog{'category'} = [ [ toUTF8( $cur->{'category'} ), $LANG ] ] if $cur->{'category'};
647    $prog{'episode-num'} = [[ $cur->{'episode-num'}, 'xmltv_ns' ]] if $cur->{'episode-num'};
648    $prog{'start'} = $cur->{'start'} if $cur->{'start'};
649    $prog{'stop'} = $cur->{'stop'} if $cur->{'stop'};
650    $prog{'desc'} = [ [ toUTF8( $cur->{'desc'} ), $LANG ] ] if $cur->{'desc'};
651    $prog{'icon'} = [ { 'src' => $cur->{'icon'} } ] if $cur->{'icon'};
652    $prog{'credits'} = $cur->{'credits'} if $cur->{'credits'};
653    $prog{'date'}    = $cur->{'year'}    if $cur->{'year'};
654
655    return \%prog;
656}
657
658# get channel listing
659sub get_channels {
660    my $bar = new XMLTV::ProgressBar( 'getting list of channels', 1 )  if not $opt_quiet;
661    my %channels;
662
663    # retrieve channels via a dummy call to the schedule page
664    #   http://www.zon.pt/tv/guiaTV/Pages/GuiaTV.aspx
665    #  2014-05-19  http://www.nos.pt/particulares/televisao/guia-tv/Pages/default.aspx
666    my $url = $HEAD->{'source-data-url'} . 'Pages/default.aspx';
667    t $url;
668
669    my $tree  = get_nice_tree($url, '', 'UTF-8');
670
671    #  <div id="channels-list-container">
672    #    <ul id="channels-list-slider">
673    #        <li>
674    #            <span class="channel-number">001</span>
675    #            <span class="channel-logo">
676    #                <a href='/tv/guiaTV/Pages/Guia-TV-programacao.aspx?channelSigla=5' title='RTP 1'>
677    #                    <img src='/EPGChannelImages/RTP1.png' alt='logótipo RTP 1' />
678    #                </a>
679    #            </span>
680    #        </li>
681    #  2014-05-19
682    #        <li>
683    #            <span class="channel-logo">
684    #                <a href="/particulares/televisao/guia-tv/Pages/channel.aspx?channel=5" title="RTP 1">
685    #                    <img src="//images.nos.pt/EPGChannelImages/RTP1.png" alt="logótipo RTP 1">
686    #                </a>
687    #            </span>
688    #            <span class="channel-number">001</span>
689    #        </li>
690    #    http://images.nos.pt/EPGChannelImages/RTP1.png
691    #
692    if ( my $h = $tree->look_down('_tag' => 'div', 'id' => 'channels-list-container') ) {
693        if ( my $h2 = $h->look_down('_tag' => 'ul', 'id' => 'channels-list-slider') ) {
694            my @h3 = $h2->look_down('_tag' => 'li');
695            foreach my $elem (@h3) {
696                my ($channel_id, $channel_name, $channel_logo);
697                if ( my $h4 = $elem->look_down('_tag' => 'a') ) {
698                    $channel_name = toUTF8( $h4->attr('title') );
699                    ($channel_id) = $h4->attr('href') =~ /channel=(\d*)/;
700                }
701                if ( my $h4 = $elem->look_down('_tag' => 'img') ) {
702                    $channel_logo = 'http:' . $h4->attr('src');
703                }
704                # store the channel
705                my $ch =
706              {
707                'channel-name'  => $channel_name,
708                'display-name'  => [ [ $channel_name, $LANG ] ],
709                'id'            => $channel_id.'.'.$DOMAIN,
710                'icon'          => [ { 'src' => $channel_logo } ],
711              };
712                $channels{$channel_id} = $ch;
713                push @ch_all, $ch;
714
715            } #foreach
716        }
717    }
718    die "no channels could be found" if not keys %channels;
719
720    update $bar if not $opt_quiet;
721    $tree->delete;
722    return %channels;
723}
724
725sub nextday {
726    my $d = shift;
727    my $p = ParseDate($d);
728    my $n = DateCalc($p, '+ 1 day');
729    return UnixDate($n, '%Q');
730}
731
732sub get_icons { # deprecated
733    my %icons;
734    my $url= $HEAD->{"source-info-url"}."/Televisao/ListaProgramas.aspx?dia=0&package=9&cat=&channelSigla=";
735    my $chan;
736    my $tag;
737    my $addr;
738
739    my $bar = new XMLTV::ProgressBar('grabbing icons', scalar(keys(%channels)))
740      if not $opt_quiet;
741
742    foreach (keys %channels) {
743        my $tb = get_nice_tree $url.encode_entities($_), \&tidy;
744
745        $tag = $tb->look_down('_tag' => 'img',
746        sub {
747            return ($_[0]->attr('src') =~ m/Shared\/img\/televisao\/BackofficeImages\//);
748        });
749        update $bar if not $opt_quiet;
750
751        unless(ref($tag) eq "HTML::Element") {
752                $tb->delete;
753                next;
754        };
755
756        $icons{$_} = $tag->attr('src');
757        $icons{$_} =~ s/\.\./$HEAD->{'source-info-url'}/;
758
759        $tb->delete;
760    }
761    $bar->finish() if not $opt_quiet;
762
763    return %icons;
764}
765
766
767# Get the user's home directory
768sub get_default_dir {
769    my $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH}
770            if defined( $ENV{HOMEDRIVE} ) and defined( $ENV{HOMEPATH} );
771
772    my $home = $ENV{HOME} || $winhome || ".";
773    return $home;
774}
775
776# Set default cache dir = $HOME/.xmltv/cache
777sub get_default_cachedir {
778    return get_default_dir() . "/.xmltv/cache";
779}
780