1#!/usr/local/bin/perl
2
3=pod
4
5=head1 NAME
6
7tv_grab_ch_bluewin - Grab TV listings for Switzerland (from fernsehen.bluewin.ch webpage).
8
9=head1 SYNOPSIS
10
11tv_grab_ch_bluewin --help
12
13tv_grab_ch_bluewin [--config-file FILE] --configure [--gui OPTION]
14
15tv_grab_ch_bluewin [--config-file FILE] [--output FILE] [--quiet]
16           [--days N] [--offset N]
17
18tv_grab_ch_bluewin --list-channels
19
20tv_grab_de_bluewin --capabilities
21
22tv_grab_de_bluewin --version
23
24=head1 DESCRIPTION
25
26Output TV listings for several channels available in Switzerland and
27(partly) central Europe.
28The data comes from fernsehen.bluewin.ch. The grabber relies on
29parsing HTML so it might stop working at any time.
30
31First run B<tv_grab_ch_bluewin --configure> to choose, which channels
32you want to download. Then running B<tv_grab_ch_bluewin> with no
33arguments will output listings in XML format to standard output.
34
35B<--configure> Ask for each available channel whether to download
36and write the configuration file.
37
38B<--config-file FILE> Set the name of the configuration file, the
39default is B<~/.xmltv/tv_grab_ch_bluewin.conf>.  This is the file
40written by B<--configure> and read when grabbing.
41
42B<--gui OPTION> Use this option to enable a graphical interface to be used.
43OPTION may be 'Tk', or left blank for the best available choice.
44Additional allowed values of OPTION are 'Term' for normal terminal output
45(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.
46
47B<--output FILE> Write to FILE rather than standard output.
48
49B<--days N> Grab N days.  The default is fourteen.
50
51B<--offset N> Start N days in the future.  The default is to start
52>from now on (= zero).
53
54B<--quiet> Suppress the progress messages normally written to standard
55error.
56
57B<--list-channels> Write output giving <channel> elements for every
58channel available (ignoring the config file), but no programmes.
59
60B<--capabilities> Show which capabilities the grabber supports. For more
61information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>
62
63B<--version> Show the version of the grabber.
64
65B<--help> print a help message and exit.
66
67
68=head1 SEE ALSO
69
70L<xmltv(5)>.
71
72=head1 AUTHOR
73
74Daniel Bittel <daniel.bittel@solnet.ch>. Inspired by tv_grab_ch by Stefan Siegl.
75Adaption to the new design of bluewin by Ren� B�hlmann.
76
77=head1 BUGS
78
79If you happen to find a bug, you're requested to send a mail to me
80at B<daniel.bittel@solnet.ch> or to one of the XMLTV mailing lists, see webpages
81at http://sourceforge.net/projects/xmltv/.
82
83=cut
84
85use warnings;
86use strict;
87use Time::Local;
88use Date::Manip;
89use XMLTV::Version '$Id: tv_grab_ch_bluewin.in,v 1.6 2010/09/02 05:07:40 rmeden Exp $ ';
90use XMLTV::Capabilities qw/baseline manualconfig cache share/;
91use XMLTV::Description 'Switzerland (www.bluewin.ch)';
92use Getopt::Long;
93use HTML::TreeBuilder;
94use HTML::Entities;
95use URI::Escape;
96use XMLTV;
97use XMLTV::Ask;
98use XMLTV::ProgressBar;
99use XMLTV::DST;
100use XMLTV::Config_file;
101use XMLTV::Mode;
102use XMLTV::Get_nice;
103use XMLTV::Memoize;
104use XMLTV::Usage<<END
105$0: get Swiss television listings from www.bluewin.ch in XMLTV format
106To configure: $0 --configure [--config-file FILE] [--gui OPTION]
107To grab data: $0 [--config-file FILE] [--output FILE] [--quiet]
108                 [--days N] [--offset N]
109Channel List: $0 --list-channels
110To show capabilities: $0 --capabilities
111To show version: $0 --version
112
113Don't try to run this grabber between midnight and ~6 o'clock in the morning
114to get data for the current day (ergo: without offset):
115When viewing Bluewins website after midnight, I found they only display
116data from early in the morning of that day.
117END
118  ;
119
120# Use Log::TraceMessages if installed.
121BEGIN {
122    eval { require Log::TraceMessages };
123    if ($@) {
124        *t = sub {};
125        *d = sub { '' };
126    }
127    else {
128        *t = \&Log::TraceMessages::t;
129        *d = \&Log::TraceMessages::d;
130    }
131}
132
133
134
135## our own prototypes first ...
136sub get_channels();
137sub channel_id($);
138sub get_page($);
139sub grab_channel_group($$);
140
141## attributes of xmltv root element
142my $head = {
143    'source-data-url'      => 'http://epg.sso.bluewin.ch/de/index.php/channelview/',
144    'source-info-url'      => 'http://epg.sso.bluewin.ch/de/detailview.php?action=DetailView&BroadcastID=',
145    'generator-info-name'  => 'XMLTV',
146    'generator-info-url'   => 'http://xmltv.org/',
147};
148
149my @groupid = ('10','11','12','13','14','15','16','17','18','20','22','25');
150#my @groupid = ('12');
151
152## the timezone fernsehen.ch lives in is, CET/CEST
153my constant $TZ = "+0100";
154my constant $lang = "de";
155
156
157
158## Parse argv now.  First do undocumented --cache option.
159XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
160
161
162
163my $opt_configure;
164my $opt_config_file;
165my $opt_gui;
166my $opt_output;
167my $opt_days = 14;
168my $opt_offset = 0;
169my $opt_quiet = 0;
170my $opt_list_channels;
171my $opt_help;
172my $opt_share;
173
174GetOptions(
175    'configure'      => \$opt_configure,
176    'config-file=s'  => \$opt_config_file,
177    'gui:s'          => \$opt_gui,
178    'output=s'       => \$opt_output,
179    'days=i'         => \$opt_days,
180    'offset=i'       => \$opt_offset,
181    'quiet'          => \$opt_quiet,
182    'list-channels'  => \$opt_list_channels,
183    'help'           => \$opt_help,
184    'share=s'        => \$opt_share,
185) or usage(0);
186
187usage(1) if $opt_help;
188
189XMLTV::Ask::init($opt_gui);
190
191## make sure offset+days arguments are within range
192die "neither offset nor days may be negative"
193  if($opt_offset < 0 || $opt_days < 0);
194
195
196## calculate global start/stop times ...
197my $grab_start = DateCalc("00:00:00", "+ $opt_offset days");
198my $grab_stop = DateCalc($grab_start, "+ $opt_days days");
199
200
201my $mode = XMLTV::Mode::mode('grab', # default value
202    $opt_configure 	=> 'configure',
203    $opt_list_channels	=> 'list-channels',
204);
205
206
207
208## initialize config file support
209my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_ch_bluewin', $opt_quiet);
210my @config_lines;
211
212if($mode eq 'configure') {
213    XMLTV::Config_file::check_no_overwrite($config_file);
214}
215elsif($mode eq 'grab' || $mode eq 'list-channels') {
216    @config_lines = XMLTV::Config_file::read_lines($config_file);
217}
218else { die("never heard of XMLTV mode $mode, sorry :-(") }
219
220
221
222## hey, we cant live without channel data, so lets get those now!
223my $bar = new XMLTV::ProgressBar( 'getting list of channels', scalar(@groupid) )
224    if not $opt_quiet;
225
226my %channels = get_channels();
227$bar->finish() if not $opt_quiet;
228
229
230
231# share/ directory for storing channel mapping files.  This next line
232# is altered by processing through tv_grab_ch_bluewin.PL.  But we can
233# use the current directory instead of share/tv_grab_ch_bluewin for
234# development.
235#
236# The 'source' file tv_grab_ch_bluewin.in has $SHARE_DIR undef, which
237# means use the current directory.  In any case the directory can be
238# overridden with the --share option (useful for testing).
239#
240my $SHARE_DIR = undef;
241
242$SHARE_DIR = $opt_share if defined $opt_share;
243my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_ch_bluewin" : '.';
244
245
246# Read the file with channel mappings.
247(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;
248my (%chid_mapping, %seen);
249my $line_num = 0;
250foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
251    ++ $line_num;
252    next unless defined;
253    my $where = "$CHANNEL_NAMES_FILE:$line_num";
254
255    my @fields = split m/:/;
256    print @fields   if(@fields != 2 );
257    die "$where: wrong number of fields"
258      if(@fields != 2 );
259
260    my ($xmltv_id, $bluewin_ch_id) = @fields;
261    warn "$where: bluewin.ch id $bluewin_ch_id seen already\n"
262      if defined $chid_mapping{$bluewin_ch_id};
263    $chid_mapping{$bluewin_ch_id} = $xmltv_id;
264
265    warn "$where: XMLTV id $xmltv_id seen already\n"
266      if $seen{$xmltv_id}++;
267}
268
269my @requests;
270
271## read our configuration file now
272my $line = 1;
273foreach(@config_lines) {
274    $line ++;
275    next unless defined;
276
277    if (/^channel:?\s+(\S+)/) {
278	warn("\nConfigured channel $1 not available anymore. \nPlease reconfigure tv_grab_ch_bluewin.\n"),
279	  next unless(defined($channels{$1}));
280	push @requests, $1;
281    }
282    elsif (/^map:?\s+(\S+)\s+(\S+)/) {
283	# Override anything set in the channel_ids file.
284	$chid_mapping{$1} = $2;
285    }
286    else {
287	warn "$config_file:$line: bad line\n";
288    }
289}
290
291## if we're requested to do so, write out a new config file ...
292if ($mode eq 'configure') {
293    open(CONFIG, ">$config_file") or die("cannot write to $config_file, due to: $!");
294
295    ## now let's annoy the user, sorry, I meant ask ..
296    my @chs = sort keys %channels;
297    my @names = map { $channels{$_} } @chs;
298    my @qs = map { "add channel $_?" } @names;
299    my @want = ask_many_boolean(1, @qs);
300
301    foreach (@chs) {
302	my $w = shift @want;
303	my $chname = shift @names;
304
305	warn("cannot read input, stopping to ask questions ..."), last if not defined $w;
306
307	print CONFIG '#' if not $w; #- comment line out if user answer 'no'
308
309	# shall we store the display name in the config file?
310	# leave it in, since it probably makes it a lot easier for the
311	# user to choose which channel to comment/uncommet - when manually
312	# viing the config file -- are there people who do that?
313	print CONFIG "channel $_ #$chname\n";
314    }
315
316    close CONFIG or warn "unable to nicely close the config file: $!";
317    say("Finished configuration.");
318
319    exit();
320}
321
322
323
324## well, we don't have to write a config file, so, probably it's some xml stuff :)
325## if not, let's go dying ...
326die unless($mode eq 'grab' or $mode eq 'list-channels');
327
328my %writer_args;
329if (defined $opt_output) {
330    my $handle = new IO::File(">$opt_output");
331    die "cannot write to output file, $opt_output: $!" unless (defined $handle);
332    $writer_args{'OUTPUT'} = $handle;
333}
334
335$writer_args{'encoding'} = 'ISO-8859-1';
336
337
338if( defined( $opt_days )) {
339    $writer_args{offset} = $opt_offset;
340	$writer_args{days} = $opt_days;
341	$writer_args{cutoff} = "060000";
342}
343
344## create our writer object
345my $writer = new XMLTV::Writer(%writer_args);
346$writer->start($head);
347
348
349
350if ($mode eq 'list-channels') {
351    foreach (keys %channels) {
352        my %channel = ('id'           => channel_id($_),
353                       'display-name' => [[$channels{$_}, $lang]],
354			'icon'        => [(0 => "http://epg.sso.bluewin.ch/images/tvchannel_logos/$_")]);
355        $writer->write_channel(\%channel);
356    }
357
358    $writer->end();
359    exit();
360}
361
362
363
364## there's only one thing, why we might exist: write out tvdata!
365die unless ($mode eq 'grab');
366die "No channels specified, run me with --configure flag\n" unless(scalar(@requests));
367
368
369
370## write out <channel> tags
371foreach(@requests) {
372    my $id = channel_id($_);
373    my %icon = ('src' => "http://epg.sso.bluewin.ch/images/tvchannel_logos/$_");
374    my %channel = ('id'           => $id,
375                   'display-name' => [[$channels{$_}, $lang]],
376		    'icon'        => [{'src' => "http://epg.sso.bluewin.ch/images/tvchannel_logos/$_"}]);
377    $writer->write_channel(\%channel);
378}
379
380
381## the page doesn't specify the year when the programmes begin or end, thus
382## we need to guess, store current year and month globally as needed for every
383## programme ...
384my ($cur_year, $cur_month) = ParseDate('now') =~ m/(....)(..)/;
385
386
387## write out <programme> tags
388$bar = new XMLTV::ProgressBar('grabbing channels       ', scalar(@requests)*13*$opt_days)
389  if not $opt_quiet;
390
391foreach my $id (@groupid) {
392    grab_channel_group($id,\@requests);
393}
394
395$bar->finish()
396    unless($opt_quiet);
397
398## hey, looks like we've finished ...
399$writer->end();
400
401
402
403## channel_id($s) :: turn site channel id into an xmltv id
404sub channel_id($) {
405    for (my $s = shift) {
406        $_ = lc(defined($chid_mapping{$_}) ? $chid_mapping{$_} : "$_.bluewin.ch");
407	$_ = "C$_" if /^\d/;
408	return $_;
409    }
410}
411
412
413sub array_contains($$) {
414
415    my $reqs = shift;
416    my $element = shift;
417
418    foreach (@$reqs) {
419
420        if ($_ eq $element) {
421            return 1;
422        }
423    }
424    return 0;
425}
426
427## grab_channel($start, $laststart, $laststop, $stop)
428sub grab_channel_group($$) {
429    my ($start, $laststart, $laststop, $stop);
430
431    my $group = shift;
432    my $requests = shift;
433
434    my $channel;
435
436    my $grabDate = $grab_start;
437  grab_channel_loop:
438    my $tb = HTML::TreeBuilder->new();
439    my $got = 0;
440
441    my $loop_date = timelocal(0,0,6,substr($grabDate,6,2),substr($grabDate,4,2)-1,substr($grabDate,0,4));
442
443
444    my $url=$head->{q(source-data-url)};
445
446
447    $url = "$url?action=ChannelView&SupergroupID=".$group."&date=$loop_date&segments=11111";
448
449    $tb->parse(get_page($url))
450      or die "cannot parse content of $url";
451    $tb->eof;
452
453    my $col=0;
454
455    my @channels;
456
457    #We need to know the channel order
458
459    foreach($tb->look_down('_tag' => 'div', 'class' => 'segment_logo' )) {
460        next unless(ref($_) eq "HTML::Element");
461
462        my $chan = $_;
463
464        my $img=undef;
465
466        foreach($chan->look_down('_tag' => 'img')) {
467	    next unless(ref($_) eq "HTML::Element");
468
469	    if (ref($img) eq "HTML::Element") {
470                die "Multiple img tags!";
471            }
472	    if ($_->attr('title')) {
473                $img=$_;
474            }
475        }
476
477        my $channel_name = $img->attr('title');
478        my $logo = $img->attr('src');
479        $logo =~ m/tvchannel_logos\/(.+\.[a-z][a-z][a-z])/ or die "unable to extract logo";
480        $logo = $1;
481
482        push(@channels,$logo);
483	$col++;
484    }
485
486    $col=0;
487
488    foreach($tb->look_down('_tag' => 'div', 'class' => 'segment')) {
489        next unless(ref($_) eq "HTML::Element");
490
491        if (not array_contains($requests,$channels[$col])) {
492            my $len =@channels;
493            $col = ($col+1) % $len;
494            next;
495        }
496
497        	$bar->update() if not $opt_quiet;
498
499	my $segment = $_;
500
501 	my @classes=("segment_content","segment_content_2");
502    foreach(@classes) {
503
504	foreach($segment->look_down('_tag' => 'div', 'class' => $_)) {
505            next unless(ref($_) eq "HTML::Element");
506
507
508            my $segment_content = $_;
509
510	    my $id=undef;
511
512            for (@{$segment_content->extract_links('a')}) {
513		if ($id) {
514                    die "Multiple link tags!";
515		}
516                my($link, $element, $attr, $tag) = @$_;
517		$link =~ m/\(([0-9]+)\)/ or die "Unable to extract id!";
518		$id = $1;
519            }
520
521            if (not $id) {
522		#$segment_content->dump();
523		next;
524                die "Unable to get id!";
525	    }
526
527
528            my $prog_time=$segment_content->look_down('_tag' => 'div', 'class' => 'programm_time ');
529            if (ref($prog_time) ne "HTML::Element") {
530               die "Time tag not found";
531            }
532            my @content = $prog_time->content_list();
533            my $time = $content[0];
534
535            if (not $time) {
536                die "Unable to get time!";
537	    }
538
539
540
541            my ($hh,$mm) = split(/:/,$time);
542            my $realstartdate = $grabDate;
543            if ($hh<6) {
544                $realstartdate = &DateCalc($realstartdate,"+ 1 day");
545            }
546            my $file_date = substr($realstartdate,6,2).'.'.substr($realstartdate,4,2). '.'.substr($realstartdate,0,4);
547
548	    my @details = get_details($id,$channels[$col],$file_date,$time);
549
550            my $realenddate = $realstartdate;
551
552            my $emm;
553            my $ehh;
554            if (length($details[4])) {
555                ($ehh,$emm) = split(/:/,$details[4]);
556            }
557            else {
558                $emm=$mm;
559                $ehh=($hh+1)%24;
560            }
561
562            if ($ehh<$hh) {
563                $realenddate = &DateCalc($realenddate,"+ 1 day");
564            }
565            if (length($ehh)<2) {
566                $ehh="0$ehh";
567            }
568
569            my %show;
570	    $show{channel} = channel_id($channels[$col]);
571
572
573            my $startdate = substr($realstartdate,0,4) . substr($realstartdate,4,2) . substr($realstartdate,6,2);
574            my $enddate = substr($realenddate,0,4) . substr($realenddate,4,2) . substr($realenddate,6,2);
575
576            $show{start} = "$startdate".$hh.$mm."00 $TZ";
577            $show{stop} = "$enddate".$ehh.$emm."00 $TZ";
578            $show{category} = [[$details[2],$lang]];
579            $show{'title'} = [[$details[0],$lang]];
580            if (length($details[1])) {
581                $show{'sub-title'} = [[$details[1],$lang]];
582            }
583            if (length($details[5])) {
584                $show{'year'} = [[$details[5],$lang]];
585            }
586
587
588	    $writer->write_programme(\%show);
589
590       }
591     }
592
593	my $len =@channels;
594        $col = ($col+1)%$len;
595    }
596    $tb->delete();
597
598    $grabDate = &DateCalc($grabDate,"+ 1 day");
599
600    if(Date_Cmp($grab_stop, $grabDate) > 0) {
601		goto grab_channel_loop;
602    }
603
604}
605
606## get_details ($id, $channel, $date, $time)
607sub get_details ($$$$) {
608	my $id= shift;
609        my $channel = shift;
610        my $date = shift;
611        my $time = shift;
612
613	my $url=$head->{q(source-info-url)} . $id;
614
615	my @result = ("","","","","","","","","","","","","");
616	## tilte, episode title, cat, description, endtime, year, actors, director,
617	## writer, presenter, audio, subtitles, previously-shown
618        my $len = @result;
619        for (my $i=0; $i<$len;$i++) {
620            $result[$i]="";
621        }
622
623	my $tb=new HTML::TreeBuilder();
624	$tb->parse(get_page($url))
625	  or die "cannot parse content of $url";
626	$tb->eof;
627
628        my $tit1 = $tb->look_down('_tag' => 'span', 'class' => 'tit1');
629        if (ref($tit1) ne "HTML::Element") {
630           die "Title tag not found";
631        }
632
633        my @content = $tit1->content_list();
634        $result[0] = $content[0];
635        chop($result[0]);
636        chomp($result[0]);
637
638        foreach($tit1->look_down('_tag' => 'wbr')) {
639            my $stit = $_;
640            if (ref($stit) eq "HTML::Element") {
641               $stit = $stit->right();
642            }
643            if (length($stit)) {
644               if (length($result[1])) {
645                   chop($result[1]);
646                   chomp($result[1]);
647                   $result[0].="- $result[1]";
648               }
649               $result[1] = $stit;
650            }
651        }
652
653        my $titd = $tb->look_down('_tag' => 'span', 'class' => 'titdblue');
654        if (ref($titd) ne "HTML::Element") {
655           die "Category tag not found";
656        }
657
658        @content = $titd->content_list();
659	$content[0] =~ m/([^:]+):/ or die "unable to extract category";
660        $result[2] = $1;
661
662        if ($content[0] =~ m/([1-2][0-9][0-9][0-9])/) {
663            $content[5] = $1;
664        }
665
666
667        my $desc = $tb->look_down('_tag' => 'div', 'class' => 'text');
668        if (ref($desc) ne "HTML::Element") {
669           die "Text tag not found";
670        }
671
672        @content = $desc->content_list();
673        $result[3] = $content[$#content];
674
675        #<table border="0" cellpadding="0" cellspacing="5" width="100%" style="border: 1px solid #99ccff;"
676        my $bc = $tb->look_down('_tag' => 'table', 'border' => '0', 'cellspacing' => '5', 'width' => '100%');
677        @content = $bc->content_list();
678        foreach(@content) {
679            next unless(ref($_) eq "HTML::Element");
680
681            my $check = $_->look_down('_tag' => 'img', 'src' => '/images/tvchannel_logos/'.$channel);
682            if (ref($check) ne "HTML::Element") {
683                next;
684            }
685
686            if (not $_->as_text() =~ m/$date.*$time \- ([0-2][0-9]:[0-6][0-9])/) {
687                next;
688            }
689
690            $result[4] = $1;
691
692
693        }
694
695
696    $tb->delete();
697
698	return @result;
699}
700
701## get channel listing
702sub get_channels() {
703    my %channels;
704    my $url=$head->{q(source-data-url)};
705
706    my $tb=new HTML::TreeBuilder();
707
708    ## getting the channels directly selectable
709    foreach(@groupid) {
710
711        $tb->parse(get_page($url ."?action=ChannelView&SupergroupID=".$_."&date=1164430800&segments=00000"))
712            or die "cannot parse content of $url";
713        $tb->eof;
714
715        foreach($tb->look_down('_tag' => 'div', 'class' => 'segment_logo' )) {
716            next unless(ref($_) eq "HTML::Element");
717
718            my $chan = $_;
719
720            my $img=undef;
721
722            foreach($chan->look_down('_tag' => 'img')) {
723	        next unless(ref($_) eq "HTML::Element");
724
725	        if (ref($img) eq "HTML::Element") {
726	            die "Multiple img tags!";
727                }
728	        if ($_->attr('title')) {
729	            $img=$_;
730                }
731            }
732
733            my $channel_name = $img->attr('title');
734            my $logo = $img->attr('src');
735            $logo =~ m/tvchannel_logos\/(.+\.[a-z][a-z][a-z])/ or die "unable to extract logo";
736            $logo = $1;
737
738            $channels{$logo} = $channel_name;
739        }
740	$bar->update() if not $opt_quiet;
741
742
743    }
744
745    $tb->delete;
746    return %channels;
747}
748
749
750
751## get_page($url) :: try to download $url via http://, look for closing </body> tag or die
752sub get_page($) {
753    my $url = shift;
754    my $retry = 0;
755
756    local $SIG{__DIE__} = sub { die "\n$url: $_[0]" };
757
758    while($retry < 2) {
759        my $got = eval { get_nice($url . ($retry ? "&retry=$retry" : "")); };
760        $retry ++;
761
762        next if($@); # unable to download, doesn't look too good for us.
763        return $got;
764    }
765
766    die "cannot grab webpage $url (tried $retry times). giving up. sorry";
767}
768