1#!/usr/local/bin/perl 2 3=pod 4 5=head1 NAME 6 7tv_grab_ch_bluewin - Grab TV listings for Switzerland (from fernsehen.bluewin.ch webpage). 8 9=head1 SYNOPSIS 10 11tv_grab_ch_bluewin --help 12 13tv_grab_ch_bluewin [--config-file FILE] --configure [--gui OPTION] 14 15tv_grab_ch_bluewin [--config-file FILE] [--output FILE] [--quiet] 16 [--days N] [--offset N] 17 18tv_grab_ch_bluewin --list-channels 19 20tv_grab_de_bluewin --capabilities 21 22tv_grab_de_bluewin --version 23 24=head1 DESCRIPTION 25 26Output TV listings for several channels available in Switzerland and 27(partly) central Europe. 28The data comes from fernsehen.bluewin.ch. The grabber relies on 29parsing HTML so it might stop working at any time. 30 31First run B<tv_grab_ch_bluewin --configure> to choose, which channels 32you want to download. Then running B<tv_grab_ch_bluewin> with no 33arguments will output listings in XML format to standard output. 34 35B<--configure> Ask for each available channel whether to download 36and write the configuration file. 37 38B<--config-file FILE> Set the name of the configuration file, the 39default is B<~/.xmltv/tv_grab_ch_bluewin.conf>. This is the file 40written by B<--configure> and read when grabbing. 41 42B<--gui OPTION> Use this option to enable a graphical interface to be used. 43OPTION may be 'Tk', or left blank for the best available choice. 44Additional allowed values of OPTION are 'Term' for normal terminal output 45(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. 46 47B<--output FILE> Write to FILE rather than standard output. 48 49B<--days N> Grab N days. The default is fourteen. 50 51B<--offset N> Start N days in the future. The default is to start 52>from now on (= zero). 53 54B<--quiet> Suppress the progress messages normally written to standard 55error. 56 57B<--list-channels> Write output giving <channel> elements for every 58channel available (ignoring the config file), but no programmes. 59 60B<--capabilities> Show which capabilities the grabber supports. For more 61information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities> 62 63B<--version> Show the version of the grabber. 64 65B<--help> print a help message and exit. 66 67 68=head1 SEE ALSO 69 70L<xmltv(5)>. 71 72=head1 AUTHOR 73 74Daniel Bittel <daniel.bittel@solnet.ch>. Inspired by tv_grab_ch by Stefan Siegl. 75Adaption to the new design of bluewin by Ren� B�hlmann. 76 77=head1 BUGS 78 79If you happen to find a bug, you're requested to send a mail to me 80at B<daniel.bittel@solnet.ch> or to one of the XMLTV mailing lists, see webpages 81at http://sourceforge.net/projects/xmltv/. 82 83=cut 84 85use warnings; 86use strict; 87use Time::Local; 88use Date::Manip; 89use XMLTV::Version '$Id: tv_grab_ch_bluewin.in,v 1.6 2010/09/02 05:07:40 rmeden Exp $ '; 90use XMLTV::Capabilities qw/baseline manualconfig cache share/; 91use XMLTV::Description 'Switzerland (www.bluewin.ch)'; 92use Getopt::Long; 93use HTML::TreeBuilder; 94use HTML::Entities; 95use URI::Escape; 96use XMLTV; 97use XMLTV::Ask; 98use XMLTV::ProgressBar; 99use XMLTV::DST; 100use XMLTV::Config_file; 101use XMLTV::Mode; 102use XMLTV::Get_nice; 103use XMLTV::Memoize; 104use XMLTV::Usage<<END 105$0: get Swiss television listings from www.bluewin.ch in XMLTV format 106To configure: $0 --configure [--config-file FILE] [--gui OPTION] 107To grab data: $0 [--config-file FILE] [--output FILE] [--quiet] 108 [--days N] [--offset N] 109Channel List: $0 --list-channels 110To show capabilities: $0 --capabilities 111To show version: $0 --version 112 113Don't try to run this grabber between midnight and ~6 o'clock in the morning 114to get data for the current day (ergo: without offset): 115When viewing Bluewins website after midnight, I found they only display 116data from early in the morning of that day. 117END 118 ; 119 120# Use Log::TraceMessages if installed. 121BEGIN { 122 eval { require Log::TraceMessages }; 123 if ($@) { 124 *t = sub {}; 125 *d = sub { '' }; 126 } 127 else { 128 *t = \&Log::TraceMessages::t; 129 *d = \&Log::TraceMessages::d; 130 } 131} 132 133 134 135## our own prototypes first ... 136sub get_channels(); 137sub channel_id($); 138sub get_page($); 139sub grab_channel_group($$); 140 141## attributes of xmltv root element 142my $head = { 143 'source-data-url' => 'http://epg.sso.bluewin.ch/de/index.php/channelview/', 144 'source-info-url' => 'http://epg.sso.bluewin.ch/de/detailview.php?action=DetailView&BroadcastID=', 145 'generator-info-name' => 'XMLTV', 146 'generator-info-url' => 'http://xmltv.org/', 147}; 148 149my @groupid = ('10','11','12','13','14','15','16','17','18','20','22','25'); 150#my @groupid = ('12'); 151 152## the timezone fernsehen.ch lives in is, CET/CEST 153my constant $TZ = "+0100"; 154my constant $lang = "de"; 155 156 157 158## Parse argv now. First do undocumented --cache option. 159XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux'); 160 161 162 163my $opt_configure; 164my $opt_config_file; 165my $opt_gui; 166my $opt_output; 167my $opt_days = 14; 168my $opt_offset = 0; 169my $opt_quiet = 0; 170my $opt_list_channels; 171my $opt_help; 172my $opt_share; 173 174GetOptions( 175 'configure' => \$opt_configure, 176 'config-file=s' => \$opt_config_file, 177 'gui:s' => \$opt_gui, 178 'output=s' => \$opt_output, 179 'days=i' => \$opt_days, 180 'offset=i' => \$opt_offset, 181 'quiet' => \$opt_quiet, 182 'list-channels' => \$opt_list_channels, 183 'help' => \$opt_help, 184 'share=s' => \$opt_share, 185) or usage(0); 186 187usage(1) if $opt_help; 188 189XMLTV::Ask::init($opt_gui); 190 191## make sure offset+days arguments are within range 192die "neither offset nor days may be negative" 193 if($opt_offset < 0 || $opt_days < 0); 194 195 196## calculate global start/stop times ... 197my $grab_start = DateCalc("00:00:00", "+ $opt_offset days"); 198my $grab_stop = DateCalc($grab_start, "+ $opt_days days"); 199 200 201my $mode = XMLTV::Mode::mode('grab', # default value 202 $opt_configure => 'configure', 203 $opt_list_channels => 'list-channels', 204); 205 206 207 208## initialize config file support 209my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_ch_bluewin', $opt_quiet); 210my @config_lines; 211 212if($mode eq 'configure') { 213 XMLTV::Config_file::check_no_overwrite($config_file); 214} 215elsif($mode eq 'grab' || $mode eq 'list-channels') { 216 @config_lines = XMLTV::Config_file::read_lines($config_file); 217} 218else { die("never heard of XMLTV mode $mode, sorry :-(") } 219 220 221 222## hey, we cant live without channel data, so lets get those now! 223my $bar = new XMLTV::ProgressBar( 'getting list of channels', scalar(@groupid) ) 224 if not $opt_quiet; 225 226my %channels = get_channels(); 227$bar->finish() if not $opt_quiet; 228 229 230 231# share/ directory for storing channel mapping files. This next line 232# is altered by processing through tv_grab_ch_bluewin.PL. But we can 233# use the current directory instead of share/tv_grab_ch_bluewin for 234# development. 235# 236# The 'source' file tv_grab_ch_bluewin.in has $SHARE_DIR undef, which 237# means use the current directory. In any case the directory can be 238# overridden with the --share option (useful for testing). 239# 240my $SHARE_DIR = undef; 241 242$SHARE_DIR = $opt_share if defined $opt_share; 243my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_ch_bluewin" : '.'; 244 245 246# Read the file with channel mappings. 247(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s; 248my (%chid_mapping, %seen); 249my $line_num = 0; 250foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) { 251 ++ $line_num; 252 next unless defined; 253 my $where = "$CHANNEL_NAMES_FILE:$line_num"; 254 255 my @fields = split m/:/; 256 print @fields if(@fields != 2 ); 257 die "$where: wrong number of fields" 258 if(@fields != 2 ); 259 260 my ($xmltv_id, $bluewin_ch_id) = @fields; 261 warn "$where: bluewin.ch id $bluewin_ch_id seen already\n" 262 if defined $chid_mapping{$bluewin_ch_id}; 263 $chid_mapping{$bluewin_ch_id} = $xmltv_id; 264 265 warn "$where: XMLTV id $xmltv_id seen already\n" 266 if $seen{$xmltv_id}++; 267} 268 269my @requests; 270 271## read our configuration file now 272my $line = 1; 273foreach(@config_lines) { 274 $line ++; 275 next unless defined; 276 277 if (/^channel:?\s+(\S+)/) { 278 warn("\nConfigured channel $1 not available anymore. \nPlease reconfigure tv_grab_ch_bluewin.\n"), 279 next unless(defined($channels{$1})); 280 push @requests, $1; 281 } 282 elsif (/^map:?\s+(\S+)\s+(\S+)/) { 283 # Override anything set in the channel_ids file. 284 $chid_mapping{$1} = $2; 285 } 286 else { 287 warn "$config_file:$line: bad line\n"; 288 } 289} 290 291## if we're requested to do so, write out a new config file ... 292if ($mode eq 'configure') { 293 open(CONFIG, ">$config_file") or die("cannot write to $config_file, due to: $!"); 294 295 ## now let's annoy the user, sorry, I meant ask .. 296 my @chs = sort keys %channels; 297 my @names = map { $channels{$_} } @chs; 298 my @qs = map { "add channel $_?" } @names; 299 my @want = ask_many_boolean(1, @qs); 300 301 foreach (@chs) { 302 my $w = shift @want; 303 my $chname = shift @names; 304 305 warn("cannot read input, stopping to ask questions ..."), last if not defined $w; 306 307 print CONFIG '#' if not $w; #- comment line out if user answer 'no' 308 309 # shall we store the display name in the config file? 310 # leave it in, since it probably makes it a lot easier for the 311 # user to choose which channel to comment/uncommet - when manually 312 # viing the config file -- are there people who do that? 313 print CONFIG "channel $_ #$chname\n"; 314 } 315 316 close CONFIG or warn "unable to nicely close the config file: $!"; 317 say("Finished configuration."); 318 319 exit(); 320} 321 322 323 324## well, we don't have to write a config file, so, probably it's some xml stuff :) 325## if not, let's go dying ... 326die unless($mode eq 'grab' or $mode eq 'list-channels'); 327 328my %writer_args; 329if (defined $opt_output) { 330 my $handle = new IO::File(">$opt_output"); 331 die "cannot write to output file, $opt_output: $!" unless (defined $handle); 332 $writer_args{'OUTPUT'} = $handle; 333} 334 335$writer_args{'encoding'} = 'ISO-8859-1'; 336 337 338if( defined( $opt_days )) { 339 $writer_args{offset} = $opt_offset; 340 $writer_args{days} = $opt_days; 341 $writer_args{cutoff} = "060000"; 342} 343 344## create our writer object 345my $writer = new XMLTV::Writer(%writer_args); 346$writer->start($head); 347 348 349 350if ($mode eq 'list-channels') { 351 foreach (keys %channels) { 352 my %channel = ('id' => channel_id($_), 353 'display-name' => [[$channels{$_}, $lang]], 354 'icon' => [(0 => "http://epg.sso.bluewin.ch/images/tvchannel_logos/$_")]); 355 $writer->write_channel(\%channel); 356 } 357 358 $writer->end(); 359 exit(); 360} 361 362 363 364## there's only one thing, why we might exist: write out tvdata! 365die unless ($mode eq 'grab'); 366die "No channels specified, run me with --configure flag\n" unless(scalar(@requests)); 367 368 369 370## write out <channel> tags 371foreach(@requests) { 372 my $id = channel_id($_); 373 my %icon = ('src' => "http://epg.sso.bluewin.ch/images/tvchannel_logos/$_"); 374 my %channel = ('id' => $id, 375 'display-name' => [[$channels{$_}, $lang]], 376 'icon' => [{'src' => "http://epg.sso.bluewin.ch/images/tvchannel_logos/$_"}]); 377 $writer->write_channel(\%channel); 378} 379 380 381## the page doesn't specify the year when the programmes begin or end, thus 382## we need to guess, store current year and month globally as needed for every 383## programme ... 384my ($cur_year, $cur_month) = ParseDate('now') =~ m/(....)(..)/; 385 386 387## write out <programme> tags 388$bar = new XMLTV::ProgressBar('grabbing channels ', scalar(@requests)*13*$opt_days) 389 if not $opt_quiet; 390 391foreach my $id (@groupid) { 392 grab_channel_group($id,\@requests); 393} 394 395$bar->finish() 396 unless($opt_quiet); 397 398## hey, looks like we've finished ... 399$writer->end(); 400 401 402 403## channel_id($s) :: turn site channel id into an xmltv id 404sub channel_id($) { 405 for (my $s = shift) { 406 $_ = lc(defined($chid_mapping{$_}) ? $chid_mapping{$_} : "$_.bluewin.ch"); 407 $_ = "C$_" if /^\d/; 408 return $_; 409 } 410} 411 412 413sub array_contains($$) { 414 415 my $reqs = shift; 416 my $element = shift; 417 418 foreach (@$reqs) { 419 420 if ($_ eq $element) { 421 return 1; 422 } 423 } 424 return 0; 425} 426 427## grab_channel($start, $laststart, $laststop, $stop) 428sub grab_channel_group($$) { 429 my ($start, $laststart, $laststop, $stop); 430 431 my $group = shift; 432 my $requests = shift; 433 434 my $channel; 435 436 my $grabDate = $grab_start; 437 grab_channel_loop: 438 my $tb = HTML::TreeBuilder->new(); 439 my $got = 0; 440 441 my $loop_date = timelocal(0,0,6,substr($grabDate,6,2),substr($grabDate,4,2)-1,substr($grabDate,0,4)); 442 443 444 my $url=$head->{q(source-data-url)}; 445 446 447 $url = "$url?action=ChannelView&SupergroupID=".$group."&date=$loop_date&segments=11111"; 448 449 $tb->parse(get_page($url)) 450 or die "cannot parse content of $url"; 451 $tb->eof; 452 453 my $col=0; 454 455 my @channels; 456 457 #We need to know the channel order 458 459 foreach($tb->look_down('_tag' => 'div', 'class' => 'segment_logo' )) { 460 next unless(ref($_) eq "HTML::Element"); 461 462 my $chan = $_; 463 464 my $img=undef; 465 466 foreach($chan->look_down('_tag' => 'img')) { 467 next unless(ref($_) eq "HTML::Element"); 468 469 if (ref($img) eq "HTML::Element") { 470 die "Multiple img tags!"; 471 } 472 if ($_->attr('title')) { 473 $img=$_; 474 } 475 } 476 477 my $channel_name = $img->attr('title'); 478 my $logo = $img->attr('src'); 479 $logo =~ m/tvchannel_logos\/(.+\.[a-z][a-z][a-z])/ or die "unable to extract logo"; 480 $logo = $1; 481 482 push(@channels,$logo); 483 $col++; 484 } 485 486 $col=0; 487 488 foreach($tb->look_down('_tag' => 'div', 'class' => 'segment')) { 489 next unless(ref($_) eq "HTML::Element"); 490 491 if (not array_contains($requests,$channels[$col])) { 492 my $len =@channels; 493 $col = ($col+1) % $len; 494 next; 495 } 496 497 $bar->update() if not $opt_quiet; 498 499 my $segment = $_; 500 501 my @classes=("segment_content","segment_content_2"); 502 foreach(@classes) { 503 504 foreach($segment->look_down('_tag' => 'div', 'class' => $_)) { 505 next unless(ref($_) eq "HTML::Element"); 506 507 508 my $segment_content = $_; 509 510 my $id=undef; 511 512 for (@{$segment_content->extract_links('a')}) { 513 if ($id) { 514 die "Multiple link tags!"; 515 } 516 my($link, $element, $attr, $tag) = @$_; 517 $link =~ m/\(([0-9]+)\)/ or die "Unable to extract id!"; 518 $id = $1; 519 } 520 521 if (not $id) { 522 #$segment_content->dump(); 523 next; 524 die "Unable to get id!"; 525 } 526 527 528 my $prog_time=$segment_content->look_down('_tag' => 'div', 'class' => 'programm_time '); 529 if (ref($prog_time) ne "HTML::Element") { 530 die "Time tag not found"; 531 } 532 my @content = $prog_time->content_list(); 533 my $time = $content[0]; 534 535 if (not $time) { 536 die "Unable to get time!"; 537 } 538 539 540 541 my ($hh,$mm) = split(/:/,$time); 542 my $realstartdate = $grabDate; 543 if ($hh<6) { 544 $realstartdate = &DateCalc($realstartdate,"+ 1 day"); 545 } 546 my $file_date = substr($realstartdate,6,2).'.'.substr($realstartdate,4,2). '.'.substr($realstartdate,0,4); 547 548 my @details = get_details($id,$channels[$col],$file_date,$time); 549 550 my $realenddate = $realstartdate; 551 552 my $emm; 553 my $ehh; 554 if (length($details[4])) { 555 ($ehh,$emm) = split(/:/,$details[4]); 556 } 557 else { 558 $emm=$mm; 559 $ehh=($hh+1)%24; 560 } 561 562 if ($ehh<$hh) { 563 $realenddate = &DateCalc($realenddate,"+ 1 day"); 564 } 565 if (length($ehh)<2) { 566 $ehh="0$ehh"; 567 } 568 569 my %show; 570 $show{channel} = channel_id($channels[$col]); 571 572 573 my $startdate = substr($realstartdate,0,4) . substr($realstartdate,4,2) . substr($realstartdate,6,2); 574 my $enddate = substr($realenddate,0,4) . substr($realenddate,4,2) . substr($realenddate,6,2); 575 576 $show{start} = "$startdate".$hh.$mm."00 $TZ"; 577 $show{stop} = "$enddate".$ehh.$emm."00 $TZ"; 578 $show{category} = [[$details[2],$lang]]; 579 $show{'title'} = [[$details[0],$lang]]; 580 if (length($details[1])) { 581 $show{'sub-title'} = [[$details[1],$lang]]; 582 } 583 if (length($details[5])) { 584 $show{'year'} = [[$details[5],$lang]]; 585 } 586 587 588 $writer->write_programme(\%show); 589 590 } 591 } 592 593 my $len =@channels; 594 $col = ($col+1)%$len; 595 } 596 $tb->delete(); 597 598 $grabDate = &DateCalc($grabDate,"+ 1 day"); 599 600 if(Date_Cmp($grab_stop, $grabDate) > 0) { 601 goto grab_channel_loop; 602 } 603 604} 605 606## get_details ($id, $channel, $date, $time) 607sub get_details ($$$$) { 608 my $id= shift; 609 my $channel = shift; 610 my $date = shift; 611 my $time = shift; 612 613 my $url=$head->{q(source-info-url)} . $id; 614 615 my @result = ("","","","","","","","","","","","",""); 616 ## tilte, episode title, cat, description, endtime, year, actors, director, 617 ## writer, presenter, audio, subtitles, previously-shown 618 my $len = @result; 619 for (my $i=0; $i<$len;$i++) { 620 $result[$i]=""; 621 } 622 623 my $tb=new HTML::TreeBuilder(); 624 $tb->parse(get_page($url)) 625 or die "cannot parse content of $url"; 626 $tb->eof; 627 628 my $tit1 = $tb->look_down('_tag' => 'span', 'class' => 'tit1'); 629 if (ref($tit1) ne "HTML::Element") { 630 die "Title tag not found"; 631 } 632 633 my @content = $tit1->content_list(); 634 $result[0] = $content[0]; 635 chop($result[0]); 636 chomp($result[0]); 637 638 foreach($tit1->look_down('_tag' => 'wbr')) { 639 my $stit = $_; 640 if (ref($stit) eq "HTML::Element") { 641 $stit = $stit->right(); 642 } 643 if (length($stit)) { 644 if (length($result[1])) { 645 chop($result[1]); 646 chomp($result[1]); 647 $result[0].="- $result[1]"; 648 } 649 $result[1] = $stit; 650 } 651 } 652 653 my $titd = $tb->look_down('_tag' => 'span', 'class' => 'titdblue'); 654 if (ref($titd) ne "HTML::Element") { 655 die "Category tag not found"; 656 } 657 658 @content = $titd->content_list(); 659 $content[0] =~ m/([^:]+):/ or die "unable to extract category"; 660 $result[2] = $1; 661 662 if ($content[0] =~ m/([1-2][0-9][0-9][0-9])/) { 663 $content[5] = $1; 664 } 665 666 667 my $desc = $tb->look_down('_tag' => 'div', 'class' => 'text'); 668 if (ref($desc) ne "HTML::Element") { 669 die "Text tag not found"; 670 } 671 672 @content = $desc->content_list(); 673 $result[3] = $content[$#content]; 674 675 #<table border="0" cellpadding="0" cellspacing="5" width="100%" style="border: 1px solid #99ccff;" 676 my $bc = $tb->look_down('_tag' => 'table', 'border' => '0', 'cellspacing' => '5', 'width' => '100%'); 677 @content = $bc->content_list(); 678 foreach(@content) { 679 next unless(ref($_) eq "HTML::Element"); 680 681 my $check = $_->look_down('_tag' => 'img', 'src' => '/images/tvchannel_logos/'.$channel); 682 if (ref($check) ne "HTML::Element") { 683 next; 684 } 685 686 if (not $_->as_text() =~ m/$date.*$time \- ([0-2][0-9]:[0-6][0-9])/) { 687 next; 688 } 689 690 $result[4] = $1; 691 692 693 } 694 695 696 $tb->delete(); 697 698 return @result; 699} 700 701## get channel listing 702sub get_channels() { 703 my %channels; 704 my $url=$head->{q(source-data-url)}; 705 706 my $tb=new HTML::TreeBuilder(); 707 708 ## getting the channels directly selectable 709 foreach(@groupid) { 710 711 $tb->parse(get_page($url ."?action=ChannelView&SupergroupID=".$_."&date=1164430800&segments=00000")) 712 or die "cannot parse content of $url"; 713 $tb->eof; 714 715 foreach($tb->look_down('_tag' => 'div', 'class' => 'segment_logo' )) { 716 next unless(ref($_) eq "HTML::Element"); 717 718 my $chan = $_; 719 720 my $img=undef; 721 722 foreach($chan->look_down('_tag' => 'img')) { 723 next unless(ref($_) eq "HTML::Element"); 724 725 if (ref($img) eq "HTML::Element") { 726 die "Multiple img tags!"; 727 } 728 if ($_->attr('title')) { 729 $img=$_; 730 } 731 } 732 733 my $channel_name = $img->attr('title'); 734 my $logo = $img->attr('src'); 735 $logo =~ m/tvchannel_logos\/(.+\.[a-z][a-z][a-z])/ or die "unable to extract logo"; 736 $logo = $1; 737 738 $channels{$logo} = $channel_name; 739 } 740 $bar->update() if not $opt_quiet; 741 742 743 } 744 745 $tb->delete; 746 return %channels; 747} 748 749 750 751## get_page($url) :: try to download $url via http://, look for closing </body> tag or die 752sub get_page($) { 753 my $url = shift; 754 my $retry = 0; 755 756 local $SIG{__DIE__} = sub { die "\n$url: $_[0]" }; 757 758 while($retry < 2) { 759 my $got = eval { get_nice($url . ($retry ? "&retry=$retry" : "")); }; 760 $retry ++; 761 762 next if($@); # unable to download, doesn't look too good for us. 763 return $got; 764 } 765 766 die "cannot grab webpage $url (tried $retry times). giving up. sorry"; 767} 768