1#!/usr/local/bin/perl -w
2
3=pod
4
5=head1 NAME
6
7tv_grab_uk_bleb - Grab TV listings for the United Kingdom, from bleb.org
8
9=head1 SYNOPSIS
10
11tv_grab_uk_bleb --help
12
13tv_grab_uk_bleb [--config-file FILE] --configure [--gui OPTION]
14
15tv_grab_uk_bleb [--config-file FILE] [--output FILE] [--quiet]
16                [--days N] [--offset N]
17
18=head1 DESCRIPTION
19
20Output TV and radio listings in XMLTV format for many stations
21available in Britain.  The data comes from the bleb.org web site.
22
23=head1 USAGE
24
25First you must run B<tv_grab_uk_bleb --configure> to choose which
26stations you want to receive.  Then running B<tv_grab_uk_bleb> with no
27arguments will get about a week<39>s listings for the stations
28you chose.
29
30B<--configure> Prompt for which stations to download and write the
31configuration file.
32
33B<--gui OPTION> Use this option to enable a graphical interface to be used.
34OPTION may be 'Tk', or left blank for the best available choice.
35Additional allowed values of OPTION are 'Term' for normal terminal output
36(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.
37
38B<--config-file FILE> Set the name of the configuration file, the
39default is B<~/.xmltv/tv_grab_uk_bleb.conf>.  This is the file written by
40B<--configure> and read when grabbing.
41
42B<--output FILE> When grabbing, write output to FILE rather than
43standard output.
44
45B<--days N> When grabbing, grab N days rather than as many as
46possible.
47
48B<--offset N> Start grabbing at today + N.  N may be negative.
49
50B<--quiet> Suppress the progress messages normally written to standard
51error.
52
53B<--version> Show the version of the grabber.
54
55B<--help> Print a help message and exit.
56
57=head1 SEE ALSO
58
59L<xmltv(5)>, L<http://www.bleb.org/>
60
61=head1 AUTHOR
62
63Andy Balaam, axis3x3@users.sourceforge.net
64
65Icon URLs collated by Lawrence, MagicLGH@aol.com
66
67Based on tv_grab_nl_wolf by Ed Avis
68
69=cut
70
71use strict;
72
73use XMLTV::Version '$Id: tv_grab_uk_bleb.in,v 1.22 2015/07/03 01:42:13 knowledgejunkie Exp $ ';
74use XMLTV::Capabilities qw/baseline manualconfig/;
75use XMLTV::Description 'United Kingdom (bleb.org)';
76
77use Archive::Zip;
78use IO::Scalar;
79
80# Workaround from <http://rt.cpan.org/NoAuth/Bug.html?id=7855>.
81use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
82Archive::Zip::setErrorHandler( sub { die @_ } );
83use IO::Scalar;
84BEGIN {
85    # Override to allow seekable IO::Scalars
86    no warnings;
87    package Archive::Zip::Archive;
88    sub _isSeekable {
89	my $fh = shift;
90	no warnings; # avoid '-f on unopened filehandle'
91	return (-f $fh || UNIVERSAL::isa( $fh, 'IO::Scalar' ));
92    }
93    # Override to force print to use seekable IO::Scalars
94    package IO::Scalar;
95    sub print {
96	my $self = shift;
97	# *$self->{Pos} = length(${*$self->{SR}} .= join('', @_));
98	my $pos = *$self->{Pos};
99	my $buf = join('', @_);
100	my $len = length($buf);
101	substr(${*$self->{SR}}, $pos, $len) = $buf;
102	*$self->{Pos} += $len;
103	1;
104    }
105} # BEGIN
106
107# Make sure you explicitly turn OFF the Data Descriptor.
108# e.g. $member->hasDataDescriptor(0);
109
110# We work by inheriting from XMLTV::Grab_XML and overriding certain
111# methods.
112#
113use XMLTV::Grab_XML;
114
115package Grab_XML_uk_bleb;
116use base 'XMLTV::Grab_XML';
117
118use Date::Manip;
119use XMLTV::DST;
120
121use XMLTV::Ask;
122use XMLTV::Config_file;
123use XMLTV::Date qw(parse_date);
124use XMLTV::Get_nice;
125use XMLTV::TZ   qw(tz_to_num);
126#use XMLTV::Supplement qw/GetSupplement/;
127
128BEGIN {
129    if (int(Date::Manip::DateManipVersion) >= 6) {
130        Date::Manip::Date_Init("SetDate=now,UTC");
131    } else {
132        Date::Manip::Date_Init("TZ=UTC");
133    }
134}
135
136# Memoize one routine if possible.
137eval { require Memoize };
138unless ($@) {
139    for ('tz_to_num') {
140        Memoize::memoize($_) or warn "cannot memoize $_";
141    }
142}
143
144sub country( $ ) {
145    my $pkg = shift;
146    return 'UK';
147}
148
149my $URL_HOST     = 'http://www.bleb.org';
150my $URL_DIR      = '/tv/data/listings';
151my $url_base     = "$URL_HOST$URL_DIR";
152my $url_channels = "$URL_HOST$URL_DIR";
153
154my $now = parse_date('now');
155
156# Returns a hash mapping YYYYMMDD to URL.
157sub urls_by_date( $$$ ) {
158    my $pkg = shift;
159    my $opt_config_file = shift;
160    my $opt_quiet = shift;
161    my $config_file = XMLTV::Config_file::filename($opt_config_file,
162        'tv_grab_uk_bleb', $opt_quiet);
163
164    my %ans; # This is a hash to return that is urls indexed by date
165
166    my @channels; # This holds the names of channels
167
168    # Do the channels from the config file
169    foreach my $line (XMLTV::Config_file::read_lines($config_file, 0)) {
170        next if not $line;
171
172        # Remove whitespace and trailing comments
173        if ($line =~ /\s*(.*?)#.*\s*/) {
174            $line = $1;
175        }
176        push @channels, $line;
177    }
178    my $channels_string = join(',', @channels);
179
180    # Do the dates
181    for (my $off = -1; $off < 7; ++$off) {
182        my $date = DateCalc($now, $off.' days');
183
184        if ($date =~ /^(\d{8})/) {
185            $date = $1;
186        }
187	else {
188            warn("Strange.  No date found at beginning of 'now' string.");
189        }
190
191        $ans{$date} = $url_base.'?format=XMLTV&file=zip&channels='
192            .$channels_string.'&days='.$off;
193    }
194    return %ans;
195}
196
197# Unzip the data and return it
198sub xml_from_data( $$ ) {
199    my $pkg = shift;
200    my $zipped_data = shift;
201
202    my $fake_filehandle = IO::Scalar->new(\$zipped_data);
203
204    my $zip = Archive::Zip->new();
205    $zip->readFromFileHandle($fake_filehandle);
206
207    my $data_file = $zip->memberNamed('data.xml');
208    my $xml = $data_file->contents();
209
210    $xml = correct_emptydescs($xml);
211    $xml = correct_timezones($xml);
212    # $xml = add_channel_icons($xml);
213
214    return Grab_XML_uk_bleb->remove_early_stop_times($xml);
215}
216
217# Disabled 2010-09-01 as most icons URLs are broken
218sub add_channel_icons( $ ) {
219    my $xml = shift;
220
221    my %channel_urls;
222
223    my $str = GetSupplement( 'tv_grab_uk_bleb', 'icon_urls' );
224    foreach (split( /\n/, $str )) {
225        s/#.*//;
226        tr/\r//d;
227        next if m/^\s*$/;
228        my @fields = split;
229        my ($channel_id, $channel_url) = @fields;
230
231        $channel_urls{$channel_id} = $channel_url;
232
233    }
234
235    # Do the regex to put in icons
236    $xml =~ s{(<channel id=\")(.*?)(\">.*?)(</display-name>)}
237             {$1.$2.$3.$4.'<icon src="'.$channel_urls{$2}.'" />'}esg;
238
239    return $xml;
240}
241
242# Removes description tags which are empty.
243sub correct_emptydescs( $ ) {
244    my @lines = split /\n/, shift;
245    foreach my $line (@lines) {
246        if ($line =~ /<desc lang="en"><\/desc>/) {
247
248            # Just remove the line
249            $line =~ s/.*//;
250
251        }
252    }
253    return join("\n", @lines);
254}
255
256# Adds timezones which are guessed at by DST
257sub correct_timezones( $ ) {
258    my @lines = split /\n/, shift;
259    foreach my $line (@lines) {
260        if ($line =~ /<programme/) {
261
262            # Check for times without timezones
263            $line =~ s/(start|stop)="(\d+)"/qq'$1="'.utc_offset($2, "+0000").qq'"'/eg;
264
265        }
266    }
267    return join("\n", @lines);
268}
269
270sub configure( $$$ ) {
271    my $pkg = shift;
272    my $opt_config_file = shift;
273    my $opt_quiet = shift;
274
275    my $config_file = XMLTV::Config_file::filename($opt_config_file,
276        'tv_grab_uk_bleb', $opt_quiet);
277
278    XMLTV::Config_file::check_no_overwrite($config_file);
279
280    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
281
282    my $bar = new XMLTV::ProgressBar('getting available channels', 1)
283        if not $opt_quiet;
284
285    my $page = get_nice($url_channels);
286    $bar->update() if not $opt_quiet;
287    $bar->finish() if not $opt_quiet;
288
289    if ($page =~ /Available channels are: <tt id="channels">(.*?)<\/tt>/) {
290        my @channels = split(', ', $1);
291
292        # Actively filter out unavailable channels (as of 2015-07-02)
293        my @unavailable_channels = (
294            '4seven',               'al_jazeera_english',             'bbc7',
295            'bbc_6music',           'bbc_radio1',                     'bbc_radio1_xtra',
296            'bbc_radio2',           'bbc_radio3',                     'bbc_radio4',
297            'bbc_radio5_live',      'bbc_radio5_live_sports_extra',   'bbc_radio_scotland',
298            'bbc_world_service',    'bravo',                          'citv',
299            'discovery_real_time',  'itv1',                           'itv1_hd',
300            'itv2',                 'itv3',                           'itv4',
301            'men_and_motors',       'nick_junior',                    'oneword',
302            's4c2',                 'sky_movies_classics',            'sky_movies_hd1',
303            'sky_movies_hd2',       'sky_travel',                     'teachers_tv',
304            'virgin1',              'yesterday',
305        );
306        my %available_channels;
307        @available_channels{ @channels } = undef;
308        delete @available_channels{ @unavailable_channels };
309        @channels = sort keys %available_channels;
310
311        my @questions;
312
313        foreach my $chan (@channels) {
314            push @questions, "Add channel $chan? ";
315        }
316        my @answers = ask_many_boolean(1, @questions);
317
318        for (my $i=0; $i < $#channels; $i++) {
319            if ($answers[$i]) {
320                print CONF $channels[$i]."\n";
321            }
322        }
323        say("Configuration complete.");
324    }
325    else {
326        say("Unable to download channels list from $url_channels.");
327        die;
328    }
329}
330
331Grab_XML_uk_bleb->go();
332
333