1#!/usr/local/bin/perl -w 2 3=pod 4 5=head1 NAME 6 7tv_grab_uk_bleb - Grab TV listings for the United Kingdom, from bleb.org 8 9=head1 SYNOPSIS 10 11tv_grab_uk_bleb --help 12 13tv_grab_uk_bleb [--config-file FILE] --configure [--gui OPTION] 14 15tv_grab_uk_bleb [--config-file FILE] [--output FILE] [--quiet] 16 [--days N] [--offset N] 17 18=head1 DESCRIPTION 19 20Output TV and radio listings in XMLTV format for many stations 21available in Britain. The data comes from the bleb.org web site. 22 23=head1 USAGE 24 25First you must run B<tv_grab_uk_bleb --configure> to choose which 26stations you want to receive. Then running B<tv_grab_uk_bleb> with no 27arguments will get about a week<39>s listings for the stations 28you chose. 29 30B<--configure> Prompt for which stations to download and write the 31configuration file. 32 33B<--gui OPTION> Use this option to enable a graphical interface to be used. 34OPTION may be 'Tk', or left blank for the best available choice. 35Additional allowed values of OPTION are 'Term' for normal terminal output 36(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar. 37 38B<--config-file FILE> Set the name of the configuration file, the 39default is B<~/.xmltv/tv_grab_uk_bleb.conf>. This is the file written by 40B<--configure> and read when grabbing. 41 42B<--output FILE> When grabbing, write output to FILE rather than 43standard output. 44 45B<--days N> When grabbing, grab N days rather than as many as 46possible. 47 48B<--offset N> Start grabbing at today + N. N may be negative. 49 50B<--quiet> Suppress the progress messages normally written to standard 51error. 52 53B<--version> Show the version of the grabber. 54 55B<--help> Print a help message and exit. 56 57=head1 SEE ALSO 58 59L<xmltv(5)>, L<http://www.bleb.org/> 60 61=head1 AUTHOR 62 63Andy Balaam, axis3x3@users.sourceforge.net 64 65Icon URLs collated by Lawrence, MagicLGH@aol.com 66 67Based on tv_grab_nl_wolf by Ed Avis 68 69=cut 70 71use strict; 72 73use XMLTV::Version '$Id: tv_grab_uk_bleb.in,v 1.22 2015/07/03 01:42:13 knowledgejunkie Exp $ '; 74use XMLTV::Capabilities qw/baseline manualconfig/; 75use XMLTV::Description 'United Kingdom (bleb.org)'; 76 77use Archive::Zip; 78use IO::Scalar; 79 80# Workaround from <http://rt.cpan.org/NoAuth/Bug.html?id=7855>. 81use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); 82Archive::Zip::setErrorHandler( sub { die @_ } ); 83use IO::Scalar; 84BEGIN { 85 # Override to allow seekable IO::Scalars 86 no warnings; 87 package Archive::Zip::Archive; 88 sub _isSeekable { 89 my $fh = shift; 90 no warnings; # avoid '-f on unopened filehandle' 91 return (-f $fh || UNIVERSAL::isa( $fh, 'IO::Scalar' )); 92 } 93 # Override to force print to use seekable IO::Scalars 94 package IO::Scalar; 95 sub print { 96 my $self = shift; 97 # *$self->{Pos} = length(${*$self->{SR}} .= join('', @_)); 98 my $pos = *$self->{Pos}; 99 my $buf = join('', @_); 100 my $len = length($buf); 101 substr(${*$self->{SR}}, $pos, $len) = $buf; 102 *$self->{Pos} += $len; 103 1; 104 } 105} # BEGIN 106 107# Make sure you explicitly turn OFF the Data Descriptor. 108# e.g. $member->hasDataDescriptor(0); 109 110# We work by inheriting from XMLTV::Grab_XML and overriding certain 111# methods. 112# 113use XMLTV::Grab_XML; 114 115package Grab_XML_uk_bleb; 116use base 'XMLTV::Grab_XML'; 117 118use Date::Manip; 119use XMLTV::DST; 120 121use XMLTV::Ask; 122use XMLTV::Config_file; 123use XMLTV::Date qw(parse_date); 124use XMLTV::Get_nice; 125use XMLTV::TZ qw(tz_to_num); 126#use XMLTV::Supplement qw/GetSupplement/; 127 128BEGIN { 129 if (int(Date::Manip::DateManipVersion) >= 6) { 130 Date::Manip::Date_Init("SetDate=now,UTC"); 131 } else { 132 Date::Manip::Date_Init("TZ=UTC"); 133 } 134} 135 136# Memoize one routine if possible. 137eval { require Memoize }; 138unless ($@) { 139 for ('tz_to_num') { 140 Memoize::memoize($_) or warn "cannot memoize $_"; 141 } 142} 143 144sub country( $ ) { 145 my $pkg = shift; 146 return 'UK'; 147} 148 149my $URL_HOST = 'http://www.bleb.org'; 150my $URL_DIR = '/tv/data/listings'; 151my $url_base = "$URL_HOST$URL_DIR"; 152my $url_channels = "$URL_HOST$URL_DIR"; 153 154my $now = parse_date('now'); 155 156# Returns a hash mapping YYYYMMDD to URL. 157sub urls_by_date( $$$ ) { 158 my $pkg = shift; 159 my $opt_config_file = shift; 160 my $opt_quiet = shift; 161 my $config_file = XMLTV::Config_file::filename($opt_config_file, 162 'tv_grab_uk_bleb', $opt_quiet); 163 164 my %ans; # This is a hash to return that is urls indexed by date 165 166 my @channels; # This holds the names of channels 167 168 # Do the channels from the config file 169 foreach my $line (XMLTV::Config_file::read_lines($config_file, 0)) { 170 next if not $line; 171 172 # Remove whitespace and trailing comments 173 if ($line =~ /\s*(.*?)#.*\s*/) { 174 $line = $1; 175 } 176 push @channels, $line; 177 } 178 my $channels_string = join(',', @channels); 179 180 # Do the dates 181 for (my $off = -1; $off < 7; ++$off) { 182 my $date = DateCalc($now, $off.' days'); 183 184 if ($date =~ /^(\d{8})/) { 185 $date = $1; 186 } 187 else { 188 warn("Strange. No date found at beginning of 'now' string."); 189 } 190 191 $ans{$date} = $url_base.'?format=XMLTV&file=zip&channels=' 192 .$channels_string.'&days='.$off; 193 } 194 return %ans; 195} 196 197# Unzip the data and return it 198sub xml_from_data( $$ ) { 199 my $pkg = shift; 200 my $zipped_data = shift; 201 202 my $fake_filehandle = IO::Scalar->new(\$zipped_data); 203 204 my $zip = Archive::Zip->new(); 205 $zip->readFromFileHandle($fake_filehandle); 206 207 my $data_file = $zip->memberNamed('data.xml'); 208 my $xml = $data_file->contents(); 209 210 $xml = correct_emptydescs($xml); 211 $xml = correct_timezones($xml); 212 # $xml = add_channel_icons($xml); 213 214 return Grab_XML_uk_bleb->remove_early_stop_times($xml); 215} 216 217# Disabled 2010-09-01 as most icons URLs are broken 218sub add_channel_icons( $ ) { 219 my $xml = shift; 220 221 my %channel_urls; 222 223 my $str = GetSupplement( 'tv_grab_uk_bleb', 'icon_urls' ); 224 foreach (split( /\n/, $str )) { 225 s/#.*//; 226 tr/\r//d; 227 next if m/^\s*$/; 228 my @fields = split; 229 my ($channel_id, $channel_url) = @fields; 230 231 $channel_urls{$channel_id} = $channel_url; 232 233 } 234 235 # Do the regex to put in icons 236 $xml =~ s{(<channel id=\")(.*?)(\">.*?)(</display-name>)} 237 {$1.$2.$3.$4.'<icon src="'.$channel_urls{$2}.'" />'}esg; 238 239 return $xml; 240} 241 242# Removes description tags which are empty. 243sub correct_emptydescs( $ ) { 244 my @lines = split /\n/, shift; 245 foreach my $line (@lines) { 246 if ($line =~ /<desc lang="en"><\/desc>/) { 247 248 # Just remove the line 249 $line =~ s/.*//; 250 251 } 252 } 253 return join("\n", @lines); 254} 255 256# Adds timezones which are guessed at by DST 257sub correct_timezones( $ ) { 258 my @lines = split /\n/, shift; 259 foreach my $line (@lines) { 260 if ($line =~ /<programme/) { 261 262 # Check for times without timezones 263 $line =~ s/(start|stop)="(\d+)"/qq'$1="'.utc_offset($2, "+0000").qq'"'/eg; 264 265 } 266 } 267 return join("\n", @lines); 268} 269 270sub configure( $$$ ) { 271 my $pkg = shift; 272 my $opt_config_file = shift; 273 my $opt_quiet = shift; 274 275 my $config_file = XMLTV::Config_file::filename($opt_config_file, 276 'tv_grab_uk_bleb', $opt_quiet); 277 278 XMLTV::Config_file::check_no_overwrite($config_file); 279 280 open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; 281 282 my $bar = new XMLTV::ProgressBar('getting available channels', 1) 283 if not $opt_quiet; 284 285 my $page = get_nice($url_channels); 286 $bar->update() if not $opt_quiet; 287 $bar->finish() if not $opt_quiet; 288 289 if ($page =~ /Available channels are: <tt id="channels">(.*?)<\/tt>/) { 290 my @channels = split(', ', $1); 291 292 # Actively filter out unavailable channels (as of 2015-07-02) 293 my @unavailable_channels = ( 294 '4seven', 'al_jazeera_english', 'bbc7', 295 'bbc_6music', 'bbc_radio1', 'bbc_radio1_xtra', 296 'bbc_radio2', 'bbc_radio3', 'bbc_radio4', 297 'bbc_radio5_live', 'bbc_radio5_live_sports_extra', 'bbc_radio_scotland', 298 'bbc_world_service', 'bravo', 'citv', 299 'discovery_real_time', 'itv1', 'itv1_hd', 300 'itv2', 'itv3', 'itv4', 301 'men_and_motors', 'nick_junior', 'oneword', 302 's4c2', 'sky_movies_classics', 'sky_movies_hd1', 303 'sky_movies_hd2', 'sky_travel', 'teachers_tv', 304 'virgin1', 'yesterday', 305 ); 306 my %available_channels; 307 @available_channels{ @channels } = undef; 308 delete @available_channels{ @unavailable_channels }; 309 @channels = sort keys %available_channels; 310 311 my @questions; 312 313 foreach my $chan (@channels) { 314 push @questions, "Add channel $chan? "; 315 } 316 my @answers = ask_many_boolean(1, @questions); 317 318 for (my $i=0; $i < $#channels; $i++) { 319 if ($answers[$i]) { 320 print CONF $channels[$i]."\n"; 321 } 322 } 323 say("Configuration complete."); 324 } 325 else { 326 say("Unable to download channels list from $url_channels."); 327 die; 328 } 329} 330 331Grab_XML_uk_bleb->go(); 332 333