1#!/usr/local/bin/perl -w 2 3=pod 4 5=head1 NAME 6 7tv_grab_eu_epgdata - Grab TV listings for parts of Europe. 8 9=head1 SYNOPSIS 10 11tv_grab_eu_epgdata --help 12tv_grab_eu_epgdata 13tv_grab_eu_epgdata --version 14 15tv_grab_eu_epgdata --capabilities 16 17tv_grab_eu_epgdata --description 18 19 20tv_grab_eu_epgdata [--config-file FILE] 21 [--days N] [--offset N] 22 [--output FILE] [--quiet] [--debug] 23 24tv_grab_eu_epgdata --configure [--config-file FILE] 25 26tv_grab_eu_epgdata --configure-api [--stage NAME] 27 [--config-file FILE] 28 [--output FILE] 29 30tv_grab_eu_epgdata --list-channels [--config-file FILE] 31 [--output FILE] [--quiet] [--debug] 32 33tv_grab_eu_epgdata --preferredmethod 34 35=head1 DESCRIPTION 36 37Output TV and listings in XMLTV format for many stations 38available in Europe. 39 40First you must run B<tv_grab_eu_epgdata --configure> to choose which stations 41you want to receive. 42 43Then running B<tv_grab_eu_epgdata> with no arguments will get a listings for 44the stations you chose for five days including today. 45 46This is a commercial grabber. 47Go to http://wiki.xmltv.org/index.php/EU_epgdata to sign up or 48send an e-mail to service@epgdata.com for further information. 49It's also possible to ask for a test account. 50 51=head1 OPTIONS 52 53B<--configure> Prompt for which stations to download and write the 54configuration file. 55 56B<--config-file FILE> Set the name of the configuration file, the 57default is B<~/.xmltv/tv_grab_eu_epgdata.conf>. This is the file written by 58B<--configure> and read when grabbing. 59 60B<--gui OPTION> Use this option to enable a graphical interface to be used. 61OPTION may be 'Tk', or left blank for the best available choice. 62Additional allowed values of OPTION are 'Term' for normal terminal output 63(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. 64 65B<--output FILE> When grabbing, write output to FILE rather than 66standard output. 67 68B<--days N> When grabbing, grab N days rather than 5. 69 70B<--offset N> Start grabbing at today + N days. 71 72B<--quiet> Do not show status messages. 73 74B<--debug> Provide more information on progress to stderr to help in 75debugging. 76 77B<--list-channels> Output a list of all channels that data is available 78 for. The list is in xmltv-format. 79 80B<--version> Show the version of the grabber. 81 82B<--help> Print a help message and exit. 83 84B<--preferredmethod> Shows the preferred method for downloading data 85 (see http://wiki.xmltv.org/index.php/XmltvCapabilities) 86=head1 ENVIRONMENT VARIABLES 87 88The environment variable HOME can be set to change where configuration 89files are stored. All configuration is stored in $HOME/.xmltv/. On Windows, 90it might be necessary to set HOME to a path without spaces in it. 91 92=head1 SUPPORTED CHANNELS 93 94For a list of supported channels, see the channel_ids file distributed with this grabber. 95If additional channels are available, you will receive a warning when you run --configure. 96 97Once I am aware that new channels are available, the channel_ids file will be updated and 98this grabber will automatically fetch an updated copy. 99 100=head1 COMPATIBILITY 101 102The channel ids used in this grabber aim to be mostly possible with other grabbers, eg 103tv_grab_de_prisma and some other grabbers for other countries. 104NOTE: Retaining compatibility was not always possible or practicable. 105You can get a list of channel ids using --list-channels 106 107=head1 AUTHOR 108 109Michael Haas, laga -at- laga -dot- ath -dot - cx. This documentation is copied 110from tv_grab_se_swedb by Mattias Holmlund, which in turn was copied from tv_grab_uk by Ed Avis. 111Parts of the code are copied from tv_grab_se_swedb and tv_grab_na_dd (in XMLTV 0.5.45) as well 112as various other sources. 113 114=head1 BUGS 115 116There's no proper support for channels with locally different schedules. For example, 117if your EPG package is a German one, you'll get the EPG schedule for Germany 118even if you preferred the Swiss schedule which is also available in the data (for some channels at least). 119 120Timezones are not handled correctly. Currently, you have to enter your 121time zone manually during the configure step. You have to do this every 122time your time zone changes, eg for daylight saving time 123("Sommerzeit" and "Normalzeit" for my fellow Germans). 124I'll try to have this fixed for the next XMLTV release. 125Please see this thread for further discussion and some additional issues: 126 http://thread.gmane.org/gmane.comp.tv.xmltv.devel/7919 127FYI: you can modify the time zone directly in the config file which is 128usually located at ~/.xmltv/tv_grab_eu_epgdata.conf or 129~/.mythtv/FOO.xmltv where FOO is the name of your video source in MythTV. 130 131If the data source gives us data for one day, they'll also cover a part of the following day. 132Maybe this should be fixed. Please note: data is not overlapping! So if we want to get data for 133today, we might as well grab yesterday because that'll give us EPG till ~5am for today. 134 135I'm sure this list is not complete. Let me know if you encounter additional problems. 136 137=cut 138 139 140use strict; 141use warnings; 142use LWP::Simple qw($ua getstore is_success); 143use Archive::Zip; 144use File::Temp qw/ tempdir /; 145use XML::Twig; 146 147use XMLTV; 148use XMLTV::Options qw/ParseOptions/; 149use XMLTV::Configure::Writer; 150use XMLTV::Supplement qw/GetSupplement/; 151use HTTP::Request::Common; 152 153# deal with umlauts 154use HTML::Entities; 155 156# to parse expiry and start/stop dates 157use Date::Format; 158use DateTime::Format::Strptime; 159 160use XMLTV::Memoize; XMLTV::Memoize::check_argv('getstore'); 161 162# set user agent 163$ua->agent("xmltv/$XMLTV::VERSION"); 164$ua->default_header(Accept => '*/*'); 165 166our(%genre, $channelgroup, $expiry_date, %chanid, $country); 167our $tmp = tempdir(CLEANUP => 1) . '/'; 168 169# set up XML::Twig 170our $epg = new XML::Twig(twig_handlers => { data => \&printepg }, 171 output_encoding => 'UTF-8'); 172our $channels = new XML::Twig(twig_handlers => { data => \&printchannels }, 173 output_encoding => 'UTF-8'); 174our $genre = new XML::Twig(twig_handlers => { data => \&makegenrehash }, 175 output_encoding => 'UTF-8'); 176 177# build a hash: epgdata.com channel id -> xmltv channel id 178my $chanids = GetSupplement('tv_grab_eu_epgdata', 'channel_ids'); 179 180my @lines = split(/[\n\r]+/, $chanids); 181foreach my $line (@lines) { 182 if ($line !~ '^#') { 183 my @chanid_array = split(';', $line); 184 chomp($chanid_array[1]); 185 $chanid{$chanid_array[0]} = $chanid_array[1]; 186 } 187} 188 189my ($opt, $conf) = ParseOptions({ 190 grabber_name => 'tv_grab_eu_epgdata', 191 capabilities => [qw/baseline manualconfig tkconfig apiconfig cache preferredmethod/], 192 stage_sub => \&config_stage, 193 listchannels_sub => \&list_channels, 194 version => '$Id: tv_grab_eu_epgdata,v 1.45 2017/01/23 14:59:14 yunosh Exp $', 195 description => 'Parts of Europe (commercial) (www.epgdata.com)', 196 preferredmethod => 'allatonce', 197}); 198 199my $pin = $conf->{pin}->[0]; 200die 'Sorry, your PIN is not defined. Run tv_grab_eu_epgdata --configure to fix this.\n' unless $pin; 201 202# country is determined by the filenames downloaded from the server 203# and used to determine the time zone if not specified in the config 204our $tz = $conf->{tz}->[0]; 205my %country_tz = ( 206 'at', 'Europe/Vienna', 207 'ch', 'Europe/Zurich', 208 'de', 'Europe/Berlin', 209 'es', 'Europe/Madrid', 210 'fr', 'Europe/Paris', 211 'it', 'Europe/Rome', 212 'nl', 'Europe/Amsterdam', 213); 214 215sub config_stage { 216 # shamelessly stolen from http://wiki.xmltv.org/index.php/HowtoWriteAGrabber 217 218 my ($stage, $conf) = @_; 219 # Sample stage_sub that only needs a single stage. 220 die 'Unknown stage $stage' if $stage ne 'start'; 221 222 my $result; 223 my $configwriter = new XMLTV::Configure::Writer(OUTPUT => \$result, 224 encoding => 'ISO-8859-1'); 225 $configwriter->start({ grabber => 'tv_grab_eu_epgdata' }); 226 $configwriter->write_string({ 227 id => 'pin', 228 title => [ [ 'Enter your PIN for epgdata.com', 'en' ] ], 229 description => [ 230 [ 'This alphanumeric string is used for authentication with epgdata.com. 231 Go to http://wiki.xmltv.org/index.php/EU_epgdata to sign up or 232 send an e-mail to service@epgdata.com for further information', 233 'en' ] 234 ], 235 default => '', 236 }); 237 $configwriter->write_string({ 238 id => 'tz', 239 title => [ [ 'Time zone for your EPG data', 'en' ] ], 240 description => [ 241 [ 'Enter the time zone or the time offset from UTC of the data here. 242 Your may omit this to activate automatic detection. 243 Examples: "" (to activate automatic detection), "local" (your local system timezone), "Europe/Berlin", "+0100" (without quotation marks)', 244 'en' ] 245 ], 246 default => '', 247 }); 248 249 $configwriter->end('select-channels'); 250 return $result; 251} 252 253# construct writer object 254# taken from tv_grab_na_dd (XMLTV 0.4.45) 255# XMLTV::Options does not redirect stdout properly for us 256# XML::Twig probably messes it up, I don't know. :/ 257my %w_args; 258if (defined $opt->{output}) { 259 my $fh = new IO::File(">$opt->{output}"); 260 die "ERROR: cannot write to $opt->{output}: $!" if not defined $fh; 261 $w_args{OUTPUT} = $fh; 262} 263$w_args{encoding} = 'UTF-8'; 264$w_args{ENCODING} = 'UTF-8'; 265 266our $writer = new XMLTV::Writer(%w_args); 267 268# determine the timezone 269if (not $tz) { 270 $tz = $country_tz{$country} if $country; 271} 272if (not $tz) { 273 warn 'Unable to determine country/timezone of data. ', 274 'You may specify tz in your configuration. ', 275 "Falling back to your local system time zone.\n"; 276 $tz = 'local'; 277} 278warn "tz=$tz\n" if $opt->{debug}; 279my $start_stop_parser = 280 DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M:%S', 281 time_zone => $tz); 282 283# do all the work. 284prepareinclude($conf,$opt); 285our @xmlfiles = downloadepg($opt->{days}, $opt->{offset}, $pin); 286exit 1 unless @xmlfiles; 287processxml(@xmlfiles); 288 289sub sanitize { 290 my $input = shift; 291 # we only want to keep alphanumeric characters 292 $input =~ s/[^a-zA-Z0-9_\-\.]//gi; 293 return $input; 294} 295 296sub downloadepg { 297 my $days = shift; 298 my $offset = shift; 299 my $pin = shift; 300 my $i = 0; 301 my @filenames; 302 my $baseurl='http://www.epgdata.com'; 303 304 # we've got to start counting at 0 305 # if we did "$i <= $days", we'd end up with one zip file too much 306 while ($i < $days) { 307 my $dataoffset = $i + $offset; 308 my $url = "$baseurl/index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml"; 309 # get file name from content-disposition header 310 warn "url=$url\n" if $opt->{debug}; 311 my $response = $ua->request(GET $url); 312 if (!$response->is_success) { 313 warn "Unable to retrieve data from URL $url: " . $response->message . ' (' . $response->code . ')'; 314 return []; 315 } 316 if ($response->header('x-epgdata-packageavailable') eq '1') { 317 my $filename = sanitize($response->header('content-disposition')); 318 $expiry_date = $response->header('x-epgdata-timeout'); 319 $channelgroup = sanitize($response->header('x-epgdata-channelgroup')); 320 $filename =~ s/^.*=//; 321 ($country) = ($filename =~ /[^_]*_[^_]*_([^_]*)_[^_]*/) unless $country; # format: xyz########_########_de_qy.zip 322 323 warn "filename=$filename\n" if $opt->{debug}; 324 warn 'Downloading zip file for day ', $dataoffset + 1, "\n" unless $opt->{quiet}; 325 open(F,">$tmp" . $filename); 326 binmode(F); 327 print F $response->content; 328 close(F); 329 push @filenames, $tmp . $filename; 330 } else { 331 warn "No more zip files available for download\n" unless $opt->{quiet}; 332 last; 333 } 334 $i++; 335 } 336 warn 'Your PIN will expire around ' . time2str('%C', $expiry_date) . "\n" unless $opt->{quiet}; 337 return unzip(@filenames); 338} 339 340# for simplicity's sake, always call with $conf as argument at least 341sub prepareinclude { 342 my ($conf, $opt) = @_; 343 my $pin = $conf->{pin}->[0]; 344 my $includeurl = "http://www.epgdata.com/index.php?action=sendInclude&iOEM=&pin=$pin&dataType=xml"; 345 warn "Downloading include zip file\n" unless $opt->{quiet}; 346 if (is_success(getstore($includeurl, $tmp . 'includezip'))) { 347 my @zipfiles=($tmp . 'includezip'); 348 unzip(@zipfiles); 349 } 350} 351 352# returns list of *.xml files 353sub unzip { 354 my @xmlfilelist; 355 foreach my $zipfile (@_) { 356 warn "Extracting *.dtd and *.xml from $zipfile\n" if $opt->{debug}; 357 my $zip = Archive::Zip->new($zipfile); 358 my @filelist = $zip->memberNames; 359 foreach my $filename (@filelist) { 360 # we only care about .dtd and .xml right now 361 my $isdtd = 1 if $filename =~ /\.dtd/; 362 my $isxml = 1 if $filename =~ /\.xml/; 363 $zip->extractMember($filename, $tmp . sanitize($filename)) if ($isdtd or $isxml); 364 push @xmlfilelist, ($tmp . sanitize($filename)) if $isxml; 365 } 366 } 367 return @xmlfilelist; 368} 369 370 371sub processxml { 372 $writer->start({ 'generator-info-name' => 'tv_grab_eu_epgdata' }); 373 $genre->parsefile($tmp . 'genre.xml'); 374 $channels->parsefile($tmp . 'channel_' . sanitize($channelgroup) . '.xml'); 375 foreach my $xmlfile (@_) { 376 warn "Processing $xmlfile\n" if $opt->{debug}; 377 $epg->parsefile($xmlfile); 378 } 379 $writer->end(); 380} 381 382sub makegenrehash { 383 my ($twig, $genre) = @_; 384 my $genreid = $genre->first_child('g0')->text; 385 my $genrename = $genre->first_child('g1')->text; 386 $genre{$genreid} = $genrename; 387 $twig->purge; 388} 389 390sub format_start_stop { 391 my ($ts) = @_; 392 my $dt = $start_stop_parser->parse_datetime( $ts ); 393 return $dt->strftime('%Y%m%d%H%M%S %z'); 394} 395 396sub printepg { 397 my ($twig, $sendung) = @_; 398 my $internalchanid = $sendung->first_child('d2')->text; 399 my $internalregionid = $sendung->first_child('d3')->text; 400 our $chanid; 401 if (defined $main::chanid{$internalchanid}) { 402 $chanid = $main::chanid{$internalchanid}; 403 } else { 404 $chanid = $internalchanid; 405 # FIXME: not sure if this is correct. 406 # Maybe we should behave differently if we encounter an unknown ID, 407 # but this ought to be OK for now 408 } 409 410 # alright, let's try this: 411 # push the channel ids we want to grab in an array 412 # http://effectiveperl.blogspot.com/ 413 my %configuredchannels = map { $_, 1 } @{$conf->{channel}}; 414 # does the channel we're currently processing exist in the hash? 415 # BTW: this is not a lot more efficient in our case than looping over a list 416 # but a few seconds are better than nothing :) 417 if ($configuredchannels{$chanid} && $internalregionid == '0') { 418 my $title = $sendung->first_child('d19')->text; 419 my $subtitle = $sendung->first_child('d20')->text; 420 my $desc = $sendung->first_child('d21')->text; 421 my $start = $sendung->first_child('d4')->text; 422 my $stop = $sendung->first_child('d5')->text; 423 my $length = $sendung->first_child('d7')->text; 424 my $category = $sendung->first_child('d10')->text; 425 my $internalgenreid = $sendung->first_child('d25')->text; 426 my $age_rating = $sendung->first_child('d16')->text; 427 my $star_rating = $sendung->first_child('d30')->text; 428 my $wide_aspect = $sendung->first_child('d29')->text; 429 my $sequence = $sendung->first_child('d26')->text; 430 my $country = $sendung->first_child('d32')->text; 431 my $production_year = $sendung->first_child('d33')->text; 432 433 # people 434 my $presenter = $sendung->first_child('d34')->text; 435 my $studio_guest = $sendung->first_child('d35')->text; 436 my $director = $sendung->first_child('d36')->text; 437 my $actor = $sendung->first_child('d37')->text; 438 439 # black and white? 440 my $bw_colour = $sendung->first_child('d11')->text; 441 my $subtitles = $sendung->first_child('d13')->text; 442 my $stereo_audio = $sendung->first_child('d27')->text; 443 my $dolby_audio = $sendung->first_child('d28')->text; 444 # I was told that technics_hd is supposed to exist 445 # However, it's not listed in qy.dtd 446 # my $hd_video = $sendung->first_child('XXX')->text; 447 448 our %prog = ('channel' => $chanid, 449 'start' => format_start_stop($start), 450 'stop' => format_start_stop($stop), 451 'title' => [ [ $title ] ]); 452 453 if (length($subtitle) > 0) { 454 push @{$prog{'sub-title'}}, [ $subtitle ]; 455 } 456 if (length($length) > 0) { 457 $prog{'length'} = $length * 60; 458 } 459 460 # use hardcoded categories for mythtv 461 if ($category eq '100') { 462 push @{$prog{'category'}}, [ 'movie' ]; 463 } elsif ($category eq '200') { 464 push @{$prog{'category'}}, [ 'series' ]; 465 } elsif ($category eq '300') { 466 push @{$prog{'category'}}, [ 'sports' ]; 467 } elsif ($category eq '400' || $category eq '500' || $category eq '600') { 468 push @{$prog{'category'}}, [ 'tvshow' ]; 469 } 470 471 if (exists $genre{$internalgenreid} ) { 472 push @{$prog{'category'}}, [ $genre{$internalgenreid} ]; 473 } 474 475 if (length($desc) > 0) { 476 push @{$prog{'desc'}}, [$desc]; 477 } 478 479 # people 480 if (length($actor) > 0) { 481 $actor =~ s/\s\([^)]*\)//g; # chop the rolenames 482 my @actors = split / - /, $actor; # split people 483 foreach (@actors) { 484 # strip blanks 485 s/^\s+//; 486 s/\s+$// 487 } 488 push @{$prog{'credits'}{'actor'}}, @actors; 489 } 490 491 if (length($director) > 0) { 492 my @directors = split / und | & /, $director; 493 push @{$prog{'credits'}{'director'}}, @directors; 494 } 495 496 if (length($studio_guest) > 0) { 497 $studio_guest =~ s/\s\(.*\)//g; # chop the rolenames 498 my @guests = split / - /, $studio_guest; # split people 499 foreach (@guests) { 500 # strip blanks 501 s/^\s+//; 502 s/\s+$//; 503 } 504 push @{$prog{'credits'}{'guest'}}, @guests; 505 } 506 507 if (length($presenter) > 0) { 508 push @{$prog{'credits'}{'presenter'}}, $presenter; 509 } 510 511 # star-rating: the data source seems to say <d30>0</d30> 512 # if they mean "unknown" 513 # valid values seem to be 1 to 5 514 # 2 is never used 515 if ($star_rating gt 0) { 516 $prog{'star-rating'} = [ ($star_rating - 1) . '/4' ]; 517 } 518 519 if ($age_rating gt 0) { 520 $prog{'rating'} = [ [ $age_rating ] ]; 521 } 522 523 if ($wide_aspect == 1) { 524 $prog{'video'}->{'aspect'} = '16:9'; 525 } else { 526 $prog{'video'}->{'aspect'} = '4:3'; 527 } 528 529 if ($bw_colour == 1) { 530 $prog{'video'}->{'colour'} = 0; 531 } else { 532 $prog{'video'}->{'colour'} = 1; 533 } 534 535 if ($sequence gt 0) { 536 $prog{'episode-num'} = [ [ $sequence, 'onscreen' ] ]; 537 } 538 539 # check for dolby first 540 # not sure if dolby_audio and stereo_audio can be true 541 # simultaneously in the source data, but it's better to be 542 # on the safe side. 543 # If stereo_audio is false, is it safe to assume the programme 544 # will be broadcast in mono? 545 # I mean, this is the 21th century, right? 546 # Also, what does dolby mean in this context? 547 # How does it apply to analog broadcasts? 548 if ($dolby_audio == 1) { 549 $prog{'audio'}->{'stereo'} = 'dolby digital'; 550 } elsif ($stereo_audio == 1) { 551 $prog{'audio'}->{'stereo'} = 'stereo'; 552 } 553 554 if ($subtitles == 1) { 555 $prog{'subtitles'} = [ { type => 'teletext' } ]; 556 } 557 558 if (length($country) > 0) { 559 my @countries = split /\|/, $country; 560 foreach (@countries) { 561 push @{$prog{'country'}}, [ $_ ]; 562 } 563 } 564 565 if (length($production_year) > 0) { 566 $production_year =~ s/(\d*).*/$1/; #take only first year 567 $prog{'date'} = $production_year; 568 } 569 570 $writer->write_programme(\%main::prog); 571 } 572 $twig->purge; 573} 574 575# this is called as a handler for the channels twig 576# which is in turn called by processxml() 577sub printchannels { 578 my ($twig, $channel) = @_; 579 my $internalchanid = $channel->first_child('ch4')->text; 580 our $chanid; 581 if (defined $main::chanid{$internalchanid}) { 582 $chanid = $main::chanid{$internalchanid}; 583 } else { 584 # FIXME: not sure if this is correct. 585 # Maybe we should just return if we don't know the channel id 586 $chanid = $internalchanid; 587 } 588 my @names; 589 if ($channel->first_child('ch0')->text) { 590 push(@names, [ $channel->first_child('ch0')->text ]); 591 } 592 if ($channel->first_child('ch11')->text) { 593 push(@names, [ $channel->first_child('ch11')->text ]); 594 } 595 if ($channel->first_child('ch1')->text) { 596 push(@names, [ $channel->first_child('ch1')->text ]); 597 } 598 foreach my $conf_channel (@{$conf->{channel}}) { 599 if ($conf_channel eq $chanid) { 600 my %ch = ( 601 'id' => $chanid, 602 'display-name' => \@names 603 ); 604 $writer->write_channel(\%ch); 605 } 606 } 607} 608 609# this lists all _available_ channels 610# used for --configure 611# independent from printchannels which will print list of configured channels 612sub list_channels { 613 my ($conf, $opt) = @_; 614 my $pin = $conf->{pin}->[0]; 615 # make sure we know $channelgroup 616 downloadepg('1', '0', $pin); 617 if (!$channelgroup) { 618 return ''; 619 } 620 prepareinclude($conf, $opt); 621 # borrowed from http://www.xmltwig.com/xmltwig/ex_fm1 622 $channels->parsefile($tmp . 'channel_' . $channelgroup . '.xml'); 623 my $channel_list = $channels->root; 624 my @channels = $channel_list->children; 625 my $xmltv_channel_list = "<tv generator-info-name=\"tv_grab_eu_epgdata\">\n"; 626 627 foreach my $channel (@channels) { 628 my $internalchanid = $channel->first_child('ch4')->text; 629 our $chanid; 630 if (defined $main::chanid{$internalchanid}) { 631 $chanid = $main::chanid{$internalchanid}; 632 } else { 633 $chanid = $internalchanid; 634 warn "New channel with ID $internalchanid found. Please update chann 635el_ids file!" unless $opt->{quiet}; 636 } 637 638 my $name = $channel->first_child('ch0')->xml_text; 639 $xmltv_channel_list = <<END; 640 $xmltv_channel_list 641 <channel id="$chanid"> 642 <display-name>$name</display-name> 643 </channel> 644END 645 } 646 $xmltv_channel_list = $xmltv_channel_list . '</tv>'; 647 return $xmltv_channel_list; 648} 649 650