1#!/usr/local/bin/perl -w
2
3=pod
4
5=head1 NAME
6
7tv_grab_eu_epgdata - Grab TV listings for parts of Europe.
8
9=head1 SYNOPSIS
10
11tv_grab_eu_epgdata --help
12tv_grab_eu_epgdata
13tv_grab_eu_epgdata --version
14
15tv_grab_eu_epgdata --capabilities
16
17tv_grab_eu_epgdata --description
18
19
20tv_grab_eu_epgdata [--config-file FILE]
21                   [--days N] [--offset N]
22                   [--output FILE] [--quiet] [--debug]
23
24tv_grab_eu_epgdata --configure [--config-file FILE]
25
26tv_grab_eu_epgdata --configure-api [--stage NAME]
27                   [--config-file FILE]
28                   [--output FILE]
29
30tv_grab_eu_epgdata --list-channels [--config-file FILE]
31                   [--output FILE] [--quiet] [--debug]
32
33tv_grab_eu_epgdata --preferredmethod
34
35=head1 DESCRIPTION
36
37Output TV and listings in XMLTV format for many stations
38available in Europe.
39
40First you must run B<tv_grab_eu_epgdata --configure> to choose which stations
41you want to receive.
42
43Then running B<tv_grab_eu_epgdata> with no arguments will get a listings for
44the stations you chose for five days including today.
45
46This is a commercial grabber.
47Go to http://wiki.xmltv.org/index.php/EU_epgdata to sign up or
48send an e-mail to service@epgdata.com for further information.
49It's also possible to ask for a test account.
50
51=head1 OPTIONS
52
53B<--configure> Prompt for which stations to download and write the
54configuration file.
55
56B<--config-file FILE> Set the name of the configuration file, the
57default is B<~/.xmltv/tv_grab_eu_epgdata.conf>.  This is the file written by
58B<--configure> and read when grabbing.
59
60B<--gui OPTION> Use this option to enable a graphical interface to be used.
61OPTION may be 'Tk', or left blank for the best available choice.
62Additional allowed values of OPTION are 'Term' for normal terminal output
63(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.
64
65B<--output FILE> When grabbing, write output to FILE rather than
66standard output.
67
68B<--days N> When grabbing, grab N days rather than 5.
69
70B<--offset N> Start grabbing at today + N days.
71
72B<--quiet> Do not show status messages.
73
74B<--debug> Provide more information on progress to stderr to help in
75debugging.
76
77B<--list-channels>    Output a list of all channels that data is available
78                      for. The list is in xmltv-format.
79
80B<--version> Show the version of the grabber.
81
82B<--help> Print a help message and exit.
83
84B<--preferredmethod> Shows the preferred method for downloading data
85                     (see http://wiki.xmltv.org/index.php/XmltvCapabilities)
86=head1 ENVIRONMENT VARIABLES
87
88The environment variable HOME can be set to change where configuration
89files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
90it might be necessary to set HOME to a path without spaces in it.
91
92=head1 SUPPORTED CHANNELS
93
94For a list of supported channels, see the channel_ids file distributed with this grabber.
95If additional channels are available, you will receive a warning when you run --configure.
96
97Once I am aware that new channels are available, the channel_ids file will be updated and
98this grabber will automatically fetch an updated copy.
99
100=head1 COMPATIBILITY
101
102The channel ids used in this grabber aim to be mostly possible with other grabbers, eg
103tv_grab_de_prisma and some other grabbers for other countries.
104NOTE: Retaining compatibility was not always possible or practicable.
105You can get a list of channel ids using --list-channels
106
107=head1 AUTHOR
108
109Michael Haas, laga -at- laga -dot- ath -dot - cx. This documentation is copied
110from tv_grab_se_swedb by Mattias Holmlund, which in turn was copied from tv_grab_uk by Ed Avis.
111Parts of the code are copied from tv_grab_se_swedb and tv_grab_na_dd (in XMLTV 0.5.45) as well
112as various other sources.
113
114=head1 BUGS
115
116There's no proper support for channels with locally different schedules. For example,
117if your EPG package is a German one, you'll get the EPG schedule for Germany
118even if you preferred the Swiss schedule which is also available in the data (for some channels at least).
119
120Timezones are not handled correctly. Currently, you have to enter your
121time zone manually during the configure step. You have to do this every
122time your time zone changes, eg for daylight saving time
123("Sommerzeit" and "Normalzeit" for my fellow Germans).
124I'll try to have this fixed for the next XMLTV release.
125Please see this thread for further discussion and some additional issues:
126    http://thread.gmane.org/gmane.comp.tv.xmltv.devel/7919
127FYI: you can modify the time zone directly in the config file which is
128usually located at ~/.xmltv/tv_grab_eu_epgdata.conf or
129~/.mythtv/FOO.xmltv where FOO is the name of your video source in MythTV.
130
131If the data source gives us data for one day, they'll also cover a part of the following day.
132Maybe this should be fixed. Please note: data is not overlapping! So if we want to get data for
133today, we might as well grab yesterday because that'll give us EPG till ~5am for today.
134
135I'm sure this list is not complete. Let me know if you encounter additional problems.
136
137=cut
138
139
140use strict;
141use warnings;
142use LWP::Simple qw($ua getstore is_success);
143use Archive::Zip;
144use File::Temp qw/ tempdir /;
145use XML::Twig;
146
147use XMLTV;
148use XMLTV::Options qw/ParseOptions/;
149use XMLTV::Configure::Writer;
150use XMLTV::Supplement qw/GetSupplement/;
151use HTTP::Request::Common;
152
153# deal with umlauts
154use HTML::Entities;
155
156# to parse expiry and start/stop dates
157use Date::Format;
158use DateTime::Format::Strptime;
159
160use XMLTV::Memoize; XMLTV::Memoize::check_argv('getstore');
161
162# set user agent
163$ua->agent("xmltv/$XMLTV::VERSION");
164$ua->default_header(Accept => '*/*');
165
166our(%genre, $channelgroup, $expiry_date, %chanid, $country);
167our $tmp = tempdir(CLEANUP => 1) . '/';
168
169# set up XML::Twig
170our $epg      = new XML::Twig(twig_handlers => { data => \&printepg },
171                              output_encoding => 'UTF-8');
172our $channels = new XML::Twig(twig_handlers => { data => \&printchannels },
173                              output_encoding => 'UTF-8');
174our $genre    = new XML::Twig(twig_handlers => { data => \&makegenrehash },
175                              output_encoding => 'UTF-8');
176
177# build a hash: epgdata.com channel id -> xmltv channel id
178my $chanids = GetSupplement('tv_grab_eu_epgdata', 'channel_ids');
179
180my @lines = split(/[\n\r]+/, $chanids);
181foreach my $line (@lines) {
182    if ($line !~ '^#') {
183        my @chanid_array = split(';', $line);
184        chomp($chanid_array[1]);
185        $chanid{$chanid_array[0]} = $chanid_array[1];
186    }
187}
188
189my ($opt, $conf) = ParseOptions({
190    grabber_name     => 'tv_grab_eu_epgdata',
191    capabilities     => [qw/baseline manualconfig tkconfig apiconfig cache preferredmethod/],
192    stage_sub        => \&config_stage,
193    listchannels_sub => \&list_channels,
194    version          => '$Id: tv_grab_eu_epgdata,v 1.45 2017/01/23 14:59:14 yunosh Exp $',
195    description      => 'Parts of Europe (commercial) (www.epgdata.com)',
196    preferredmethod  => 'allatonce',
197});
198
199my $pin = $conf->{pin}->[0];
200die 'Sorry, your PIN is not defined. Run tv_grab_eu_epgdata --configure to fix this.\n' unless $pin;
201
202# country is determined by the filenames downloaded from the server
203# and used to determine the time zone if not specified in the config
204our $tz = $conf->{tz}->[0];
205my %country_tz = (
206    'at', 'Europe/Vienna',
207    'ch', 'Europe/Zurich',
208    'de', 'Europe/Berlin',
209    'es', 'Europe/Madrid',
210    'fr', 'Europe/Paris',
211    'it', 'Europe/Rome',
212    'nl', 'Europe/Amsterdam',
213);
214
215sub config_stage {
216    # shamelessly stolen from http://wiki.xmltv.org/index.php/HowtoWriteAGrabber
217
218    my ($stage, $conf) = @_;
219    # Sample stage_sub that only needs a single stage.
220    die 'Unknown stage $stage' if $stage ne 'start';
221
222    my $result;
223    my $configwriter = new XMLTV::Configure::Writer(OUTPUT => \$result,
224                                                    encoding => 'ISO-8859-1');
225    $configwriter->start({ grabber => 'tv_grab_eu_epgdata' });
226    $configwriter->write_string({
227	id          => 'pin',
228        title       => [ [ 'Enter your PIN for epgdata.com', 'en' ] ],
229        description => [
230            [ 'This alphanumeric string is used for authentication with epgdata.com.
231            Go to http://wiki.xmltv.org/index.php/EU_epgdata to sign up or
232            send an e-mail to service@epgdata.com for further information',
233            'en' ]
234        ],
235        default     => '',
236    });
237    $configwriter->write_string({
238        id          => 'tz',
239        title       => [ [ 'Time zone for your EPG data', 'en' ] ],
240        description => [
241            [ 'Enter the time zone or the time offset from UTC of the data here.
242            Your may omit this to activate automatic detection.
243            Examples: "" (to activate automatic detection), "local" (your local system timezone), "Europe/Berlin", "+0100" (without quotation marks)',
244            'en' ]
245        ],
246        default => '',
247    });
248
249    $configwriter->end('select-channels');
250    return $result;
251}
252
253# construct writer object
254# taken from tv_grab_na_dd (XMLTV 0.4.45)
255# XMLTV::Options does not redirect stdout properly for us
256# XML::Twig probably messes it up, I don't know. :/
257my %w_args;
258if (defined $opt->{output}) {
259    my $fh = new IO::File(">$opt->{output}");
260    die "ERROR: cannot write to $opt->{output}: $!" if not defined $fh;
261    $w_args{OUTPUT} = $fh;
262}
263$w_args{encoding} = 'UTF-8';
264$w_args{ENCODING} = 'UTF-8';
265
266our $writer = new XMLTV::Writer(%w_args);
267
268# determine the timezone
269if (not $tz) {
270    $tz = $country_tz{$country} if $country;
271}
272if (not $tz) {
273    warn 'Unable to determine country/timezone of data. ',
274         'You may specify tz in your configuration. ',
275         "Falling back to your local system time zone.\n";
276    $tz = 'local';
277}
278warn "tz=$tz\n" if $opt->{debug};
279my $start_stop_parser =
280    DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M:%S',
281                                    time_zone => $tz);
282
283# do all the work.
284prepareinclude($conf,$opt);
285our @xmlfiles = downloadepg($opt->{days}, $opt->{offset}, $pin);
286exit 1 unless @xmlfiles;
287processxml(@xmlfiles);
288
289sub sanitize {
290    my $input = shift;
291    # we only want to keep alphanumeric characters
292    $input =~ s/[^a-zA-Z0-9_\-\.]//gi;
293    return $input;
294}
295
296sub downloadepg {
297    my $days   = shift;
298    my $offset = shift;
299    my $pin    = shift;
300    my $i      = 0;
301    my @filenames;
302    my $baseurl='http://www.epgdata.com';
303
304    # we've got to start counting at 0
305    # if we did "$i <= $days", we'd end up with one zip file too much
306    while ($i < $days) {
307        my $dataoffset = $i + $offset;
308        my $url = "$baseurl/index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml";
309        # get file name from content-disposition header
310        warn "url=$url\n" if $opt->{debug};
311        my $response = $ua->request(GET $url);
312        if (!$response->is_success) {
313            warn "Unable to retrieve data from URL $url: " . $response->message . ' (' . $response->code . ')';
314            return [];
315        }
316        if ($response->header('x-epgdata-packageavailable') eq '1') {
317            my $filename  = sanitize($response->header('content-disposition'));
318            $expiry_date  = $response->header('x-epgdata-timeout');
319            $channelgroup = sanitize($response->header('x-epgdata-channelgroup'));
320            $filename     =~  s/^.*=//;
321            ($country)    = ($filename =~ /[^_]*_[^_]*_([^_]*)_[^_]*/) unless $country; # format: xyz########_########_de_qy.zip
322
323            warn "filename=$filename\n" if $opt->{debug};
324            warn 'Downloading zip file for day ', $dataoffset + 1, "\n" unless $opt->{quiet};
325            open(F,">$tmp" . $filename);
326            binmode(F);
327            print F $response->content;
328            close(F);
329            push @filenames, $tmp . $filename;
330        } else {
331            warn "No more zip files available for download\n" unless $opt->{quiet};
332            last;
333        }
334	$i++;
335    }
336    warn 'Your PIN will expire around ' . time2str('%C', $expiry_date) . "\n" unless $opt->{quiet};
337    return unzip(@filenames);
338}
339
340# for simplicity's sake, always call with $conf as argument at least
341sub prepareinclude {
342    my ($conf, $opt) = @_;
343    my $pin = $conf->{pin}->[0];
344    my $includeurl = "http://www.epgdata.com/index.php?action=sendInclude&iOEM=&pin=$pin&dataType=xml";
345    warn "Downloading include zip file\n" unless $opt->{quiet};
346    if (is_success(getstore($includeurl, $tmp . 'includezip'))) {
347        my @zipfiles=($tmp . 'includezip');
348        unzip(@zipfiles);
349    }
350}
351
352# returns list of *.xml files
353sub unzip {
354    my @xmlfilelist;
355    foreach my $zipfile (@_) {
356        warn "Extracting *.dtd and *.xml from $zipfile\n" if $opt->{debug};
357        my $zip = Archive::Zip->new($zipfile);
358        my @filelist = $zip->memberNames;
359        foreach my $filename (@filelist) {
360            # we only care about .dtd and .xml right now
361            my $isdtd = 1 if $filename =~ /\.dtd/;
362            my $isxml = 1 if $filename =~ /\.xml/;
363            $zip->extractMember($filename, $tmp . sanitize($filename)) if ($isdtd or $isxml);
364            push @xmlfilelist, ($tmp . sanitize($filename)) if $isxml;
365        }
366    }
367    return @xmlfilelist;
368}
369
370
371sub processxml {
372    $writer->start({ 'generator-info-name' => 'tv_grab_eu_epgdata' });
373    $genre->parsefile($tmp . 'genre.xml');
374    $channels->parsefile($tmp . 'channel_' . sanitize($channelgroup) . '.xml');
375    foreach my $xmlfile (@_) {
376        warn "Processing $xmlfile\n" if $opt->{debug};
377	$epg->parsefile($xmlfile);
378    }
379    $writer->end();
380}
381
382sub makegenrehash {
383    my ($twig, $genre) = @_;
384    my $genreid        = $genre->first_child('g0')->text;
385    my $genrename      = $genre->first_child('g1')->text;
386    $genre{$genreid}   = $genrename;
387    $twig->purge;
388}
389
390sub format_start_stop {
391    my ($ts) = @_;
392    my $dt   =  $start_stop_parser->parse_datetime( $ts );
393    return $dt->strftime('%Y%m%d%H%M%S %z');
394}
395
396sub printepg {
397    my ($twig, $sendung)  = @_;
398    my $internalchanid    = $sendung->first_child('d2')->text;
399    my $internalregionid  = $sendung->first_child('d3')->text;
400    our $chanid;
401    if (defined $main::chanid{$internalchanid}) {
402        $chanid = $main::chanid{$internalchanid};
403    } else {
404        $chanid = $internalchanid;
405        # FIXME: not sure if this is correct.
406        # Maybe we should behave differently if we encounter an unknown ID,
407        # but this ought to be OK for now
408    }
409
410    # alright, let's try this:
411    # push the channel ids we want to grab in an array
412    # http://effectiveperl.blogspot.com/
413    my %configuredchannels = map { $_, 1 } @{$conf->{channel}};
414    # does the channel we're currently processing exist in the hash?
415    # BTW: this is not a lot more efficient in our case than looping over a list
416    # but a few seconds are better than nothing :)
417    if ($configuredchannels{$chanid} && $internalregionid == '0') {
418        my $title           = $sendung->first_child('d19')->text;
419        my $subtitle        = $sendung->first_child('d20')->text;
420        my $desc            = $sendung->first_child('d21')->text;
421        my $start           = $sendung->first_child('d4')->text;
422        my $stop            = $sendung->first_child('d5')->text;
423        my $length          = $sendung->first_child('d7')->text;
424        my $category        = $sendung->first_child('d10')->text;
425        my $internalgenreid = $sendung->first_child('d25')->text;
426        my $age_rating      = $sendung->first_child('d16')->text;
427        my $star_rating     = $sendung->first_child('d30')->text;
428        my $wide_aspect     = $sendung->first_child('d29')->text;
429        my $sequence        = $sendung->first_child('d26')->text;
430        my $country         = $sendung->first_child('d32')->text;
431        my $production_year = $sendung->first_child('d33')->text;
432
433        # people
434        my $presenter    = $sendung->first_child('d34')->text;
435        my $studio_guest = $sendung->first_child('d35')->text;
436        my $director     = $sendung->first_child('d36')->text;
437        my $actor        = $sendung->first_child('d37')->text;
438
439        # black and white?
440        my $bw_colour    = $sendung->first_child('d11')->text;
441        my $subtitles    = $sendung->first_child('d13')->text;
442        my $stereo_audio = $sendung->first_child('d27')->text;
443        my $dolby_audio  = $sendung->first_child('d28')->text;
444        # I was told that technics_hd is supposed to exist
445        # However, it's not listed in qy.dtd
446        # my $hd_video = $sendung->first_child('XXX')->text;
447
448        our %prog = ('channel' => $chanid,
449                     'start'   => format_start_stop($start),
450                     'stop'    => format_start_stop($stop),
451                     'title'   => [ [ $title ] ]);
452
453        if (length($subtitle) > 0) {
454            push @{$prog{'sub-title'}}, [ $subtitle ];
455        }
456        if (length($length) > 0) {
457            $prog{'length'} = $length * 60;
458        }
459
460        # use hardcoded categories for mythtv
461        if ($category eq '100') {
462            push @{$prog{'category'}}, [ 'movie' ];
463        } elsif ($category eq '200') {
464            push @{$prog{'category'}}, [ 'series' ];
465        } elsif ($category eq '300') {
466            push @{$prog{'category'}}, [ 'sports' ];
467        } elsif ($category eq '400' || $category eq '500' || $category eq '600') {
468            push @{$prog{'category'}}, [ 'tvshow' ];
469        }
470
471        if (exists $genre{$internalgenreid} ) {
472            push @{$prog{'category'}}, [ $genre{$internalgenreid} ];
473        }
474
475        if (length($desc) > 0) {
476            push @{$prog{'desc'}}, [$desc];
477        }
478
479        # people
480        if (length($actor) > 0) {
481            $actor =~ s/\s\([^)]*\)//g;                 # chop the rolenames
482            my @actors = split / - /, $actor;           # split people
483            foreach (@actors) {
484                # strip blanks
485                s/^\s+//;
486                s/\s+$//
487            }
488            push @{$prog{'credits'}{'actor'}}, @actors;
489        }
490
491        if (length($director) > 0) {
492            my @directors = split / und | & /, $director;
493            push @{$prog{'credits'}{'director'}}, @directors;
494        }
495
496        if (length($studio_guest) > 0) {
497            $studio_guest =~ s/\s\(.*\)//g;             # chop the rolenames
498            my @guests = split / - /, $studio_guest;    # split people
499            foreach (@guests) {
500                # strip blanks
501                s/^\s+//;
502                s/\s+$//;
503            }
504            push @{$prog{'credits'}{'guest'}}, @guests;
505        }
506
507        if (length($presenter) > 0) {
508            push @{$prog{'credits'}{'presenter'}}, $presenter;
509        }
510
511        # star-rating: the data source seems to say <d30>0</d30>
512        # if they mean "unknown"
513        # valid values seem to be 1 to 5
514        # 2 is never used
515        if ($star_rating gt 0) {
516            $prog{'star-rating'} = [ ($star_rating - 1) . '/4' ];
517        }
518
519        if ($age_rating gt 0) {
520            $prog{'rating'} = [ [ $age_rating ] ];
521        }
522
523        if ($wide_aspect == 1) {
524            $prog{'video'}->{'aspect'} = '16:9';
525        } else {
526            $prog{'video'}->{'aspect'} = '4:3';
527        }
528
529        if ($bw_colour == 1) {
530            $prog{'video'}->{'colour'} = 0;
531        } else {
532            $prog{'video'}->{'colour'} = 1;
533        }
534
535        if ($sequence gt 0) {
536            $prog{'episode-num'} = [ [ $sequence, 'onscreen' ] ];
537        }
538
539        # check for dolby first
540        # not sure if dolby_audio and stereo_audio can be true
541        # simultaneously in the source data, but it's better to be
542        # on the safe side.
543        # If stereo_audio is false, is it safe to assume the programme
544        # will be broadcast in mono?
545        # I mean, this is the 21th century, right?
546        # Also, what does dolby mean in this context?
547        # How does it apply to analog broadcasts?
548        if ($dolby_audio == 1) {
549            $prog{'audio'}->{'stereo'} = 'dolby digital';
550        } elsif ($stereo_audio == 1) {
551            $prog{'audio'}->{'stereo'} = 'stereo';
552        }
553
554        if ($subtitles == 1) {
555            $prog{'subtitles'} = [ { type => 'teletext' } ];
556        }
557
558        if (length($country) > 0) {
559            my @countries = split /\|/, $country;
560            foreach (@countries) {
561                push @{$prog{'country'}}, [ $_ ];
562            }
563        }
564
565        if (length($production_year) > 0) {
566            $production_year =~ s/(\d*).*/$1/;	#take only first year
567            $prog{'date'} = $production_year;
568        }
569
570        $writer->write_programme(\%main::prog);
571    }
572    $twig->purge;
573}
574
575# this is called as a handler for the channels twig
576# which is in turn called by processxml()
577sub printchannels {
578    my ($twig, $channel) = @_;
579    my $internalchanid   = $channel->first_child('ch4')->text;
580    our $chanid;
581    if (defined $main::chanid{$internalchanid}) {
582         $chanid = $main::chanid{$internalchanid};
583    } else {
584         # FIXME: not sure if this is correct.
585         # Maybe we should just return if we don't know the channel id
586         $chanid = $internalchanid;
587    }
588    my @names;
589    if ($channel->first_child('ch0')->text) {
590        push(@names, [ $channel->first_child('ch0')->text ]);
591    }
592    if ($channel->first_child('ch11')->text) {
593        push(@names, [ $channel->first_child('ch11')->text ]);
594    }
595    if ($channel->first_child('ch1')->text) {
596        push(@names, [ $channel->first_child('ch1')->text ]);
597    }
598    foreach my $conf_channel (@{$conf->{channel}}) {
599        if ($conf_channel eq $chanid) {
600            my %ch = (
601                'id' => $chanid,
602                'display-name' => \@names
603            );
604            $writer->write_channel(\%ch);
605        }
606    }
607}
608
609# this lists all _available_ channels
610# used for --configure
611# independent from printchannels which will print list of configured channels
612sub list_channels {
613    my ($conf, $opt) = @_;
614    my $pin = $conf->{pin}->[0];
615    # make sure we know $channelgroup
616    downloadepg('1', '0', $pin);
617    if (!$channelgroup) {
618        return '';
619    }
620    prepareinclude($conf, $opt);
621    # borrowed from http://www.xmltwig.com/xmltwig/ex_fm1
622    $channels->parsefile($tmp . 'channel_' . $channelgroup . '.xml');
623    my $channel_list       = $channels->root;
624    my @channels           = $channel_list->children;
625    my $xmltv_channel_list = "<tv generator-info-name=\"tv_grab_eu_epgdata\">\n";
626
627    foreach my $channel (@channels) {
628        my $internalchanid = $channel->first_child('ch4')->text;
629        our $chanid;
630        if (defined $main::chanid{$internalchanid}) {
631            $chanid = $main::chanid{$internalchanid};
632        } else {
633            $chanid = $internalchanid;
634            warn "New channel with ID $internalchanid found. Please update chann
635el_ids file!" unless $opt->{quiet};
636        }
637
638        my $name = $channel->first_child('ch0')->xml_text;
639        $xmltv_channel_list = <<END;
640    $xmltv_channel_list
641    <channel id="$chanid">
642        <display-name>$name</display-name>
643    </channel>
644END
645     }
646     $xmltv_channel_list = $xmltv_channel_list . '</tv>';
647     return $xmltv_channel_list;
648}
649
650