1#!/usr/local/bin/perl -w 2# -*- mode: perl; coding: utf-8 -*- ########################################### 3# 4# Setup 5# 6############################################################################### 7use 5.008; # we process Unicode texts 8use strict; 9use warnings; 10 11use constant VERSION => '$Id: tv_grab_fi.pl,v 2.05 2014/06/21 16:36:15 stefanb2 Exp $ '; 12 13############################################################################### 14# INSERT: SOURCES 15############################################################################### 16package main; 17 18# Perl core modules 19use Getopt::Long; 20use List::Util qw(shuffle); 21use Pod::Usage; 22 23# CUT CODE START 24############################################################################### 25# Load internal modules 26use FindBin qw($Bin); 27BEGIN { 28 foreach my $source (<$Bin/fi/*.pm>, <$Bin/fi/source/*.pm>) { 29 require "$source"; 30 } 31} 32############################################################################### 33# CUT CODE END 34 35# Generate source module list 36my @sources; 37BEGIN { 38 @sources = map { s/::$//; $_ } 39 map { "fi::source::" . $_ } 40 sort 41 grep { ${ $::{'fi::'}->{'source::'}->{$_}->{ENABLED} } } 42 keys %{ $::{'fi::'}->{'source::'} }; 43 die "$0: couldn't find any source modules?" unless @sources; 44} 45 46# Import from internal modules 47fi::common->import(':main'); 48 49# Basic XMLTV modules 50use XMLTV::Version VERSION; 51use XMLTV::Capabilities qw(baseline manualconfig cache); 52use XMLTV::Description 'Finland (' . 53 join(', ', map { $_->description() } @sources ) . 54 ')'; 55 56# NOTE: We will only reach the rest of the code only when the script is called 57# without --version, --capabilities or --description 58# Reminder of XMLTV modules 59use XMLTV::Get_nice; 60use XMLTV::Memoize; 61 62############################################################################### 63# 64# Main program 65# 66############################################################################### 67# Forward declarations 68sub doConfigure(); 69sub doListChannels(); 70sub doGrab(); 71 72# Command line option default values 73my %Option = ( 74 days => 14, 75 quiet => 0, 76 debug => 0, 77 offset => 0, 78 ); 79 80# Enable caching. This will remove "--cache [file]" from @ARGV 81XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux'); 82 83# Process command line options 84if (GetOptions(\%Option, 85 "configure", 86 "config-file=s", 87 "days=i", 88 "debug|d+", 89 "gui:s", 90 "help|h|?", 91 "list-channels", 92 "no-randomize", 93 "offset=i", 94 "output=s", 95 "quiet", 96 "test-mode")) { 97 98 pod2usage(-exitstatus => 0, 99 -verbose => 2) 100 if $Option{help}; 101 102 setDebug($Option{debug}); 103 setQuiet($Option{quiet}); 104 105 if ($Option{configure}) { 106 # Configure mode 107 doConfigure(); 108 109 } elsif ($Option{'list-channels'}) { 110 # List channels mode 111 doListChannels(); 112 113 } else { 114 # Grab mode (default) 115 doGrab(); 116 } 117} else { 118 pod2usage(2); 119} 120 121# That's all folks 122exit 0; 123 124############################################################################### 125# 126# Utility functions for the different modes 127# 128############################################################################### 129sub _getConfigFile() { 130 require XMLTV::Config_file; 131 return(XMLTV::Config_file::filename($Option{'config-file'}, 132 "tv_grab_fi", 133 $Option{quiet})); 134} 135 136{ 137 my $ofh; 138 139 sub _createXMLTVWriter() { 140 141 # Output file handling 142 $ofh = \*STDOUT; 143 if (defined $Option{output}) { 144 open($ofh, ">", $Option{output}) 145 or die "$0: cannot open file '$Option{output}' for writing: $!"; 146 } 147 148 # Create XMLTV writer for UTF-8 encoded text 149 binmode($ofh, ":utf8"); 150 my $writer = XMLTV::Writer->new( 151 encoding => 'UTF-8', 152 OUTPUT => \*STDOUT, 153 ); 154 155 #### HACK CODE #### 156 $writer->start({ 157 "generator-info-name" => "XMLTV", 158 "generator-info-url" => "http://xmltv.org/", 159 "source-info-url" => "multiple", # TBA 160 "source-data-url" => "multiple", # TBA 161 }); 162 #### HACK CODE #### 163 164 return($writer); 165 } 166 167 sub _closeXMLTVWriter($) { 168 my($writer) = @_; 169 $writer->end(); 170 171 # close output file 172 if ($Option{output}) { 173 close($ofh) or die "$0: write error on file '$Option{output}': $!"; 174 } 175 message("DONE"); 176 } 177} 178 179sub _addChannel($$$$) { 180 my($writer, $id, $name, $language) = @_; 181 $writer->write_channel({ 182 id => $id, 183 'display-name' => [[$name, $language]], 184 }); 185} 186 187{ 188 my $bar; 189 190 sub _createProgressBar($$) { 191 my($label, $count) = @_; 192 return if $Option{quiet}; 193 194 require XMLTV::Ask; 195 require XMLTV::ProgressBar; 196 XMLTV::Ask::init($Option{gui}); 197 $bar = XMLTV::ProgressBar->new({ 198 name => $label, 199 count => $count, 200 }); 201 } 202 203 sub _updateProgressBar() { $bar->update() if defined $bar } 204 sub _destroyProgressBar() { $bar->finish() if defined $bar } 205} 206 207sub _getChannels($$) { 208 my($callback, $opaque) = @_; 209 210 # Get channels from all sources 211 _createProgressBar("getting list of channels", @sources); 212 foreach my $source (@sources) { 213 debug(1, "requesting channel list from source '" . $source->description ."'"); 214 if (my $list = $source->channels()) { 215 die "test failure: source '" . $source->description . "' didn't find any channels!\n" 216 if ($Option{'test-mode'} && (keys %{$list} == 0)); 217 218 while (my($id, $value) = each %{ $list }) { 219 my($language, $name) = split(" ", $value, 2); 220 $callback->($opaque, $id, $name, $language); 221 } 222 } 223 _updateProgressBar(); 224 } 225 _destroyProgressBar(); 226} 227 228############################################################################### 229# 230# Configure Mode 231# 232############################################################################### 233sub doConfigure() { 234 # Get configuration file name 235 my $file = _getConfigFile(); 236 XMLTV::Config_file::check_no_overwrite($file); 237 238 # Open configuration file. Assume UTF-8 encoding 239 open(my $fh, ">:utf8", $file) 240 or die "$0: can't open configuration file '$file': $!"; 241 print $fh "# -*- coding: utf-8 -*-\n"; 242 243 # Get channels 244 my %channels; 245 _getChannels(sub { 246 # We only need name and ID 247 my(undef, $id, $name) = @_; 248 $channels{$id} = $name; 249 }, 250 undef); 251 252 # Query user 253 my @sorted = sort keys %channels; 254 my @answers = XMLTV::Ask::ask_many_boolean(1, map { "add channel $channels{$_} ($_)?" } @sorted); 255 256 # Generate configuration file contents from answers 257 foreach my $id (@sorted) { 258 warn("\nunexpected end of input reached\n"), last 259 unless @answers; 260 261 # Write selection to configuration file 262 my $answer = shift(@answers); 263 print $fh ($answer ? "" : "#"), "channel $id $channels{$id}\n"; 264 } 265 266 # Check for write errors 267 close($fh) 268 or die "$0: can't write to configuration file '$file': $!"; 269 message("DONE"); 270} 271 272############################################################################### 273# 274# List Channels Mode 275# 276############################################################################### 277sub doListChannels() { 278 # Create XMLTV writer 279 my $writer = _createXMLTVWriter(); 280 281 # Get channels 282 _getChannels(sub { 283 my($writer, $id, $name, $language) = @_; 284 _addChannel($writer, $id, $name, $language); 285 }, 286 $writer); 287 288 # Done writing 289 _closeXMLTVWriter($writer); 290} 291 292############################################################################### 293# 294# Grab Mode 295# 296############################################################################### 297sub doGrab() { 298 # Sanity check 299 die "$0: --offset must be a non-negative integer" 300 unless $Option{offset} >= 0; 301 die "$0: --days must be an integer larger than 0" 302 unless $Option{days} > 0; 303 304 # Get configuation 305 my %channels; 306 { 307 # Get configuration file name 308 my $file = _getConfigFile(); 309 310 # Open configuration file. Assume UTF-8 encoding 311 open(my $fh, "<:utf8", $file) 312 or die "$0: can't open configuration file '$file': $!"; 313 314 # Process configuration information 315 while (<$fh>) { 316 317 # Comment removal, white space trimming and compressing 318 s/\#.*//; 319 s/^\s+//; 320 s/\s+$//; 321 next unless length; # skip empty lines 322 s/\s+/ /; 323 324 # Channel definition 325 if (my($id, $name) = /^channel (\S+) (.+)/) { 326 debug(1, "duplicate channel definion in line $.:$id ($name)") 327 if exists $channels{$id}; 328 $channels{$id} = $name; 329 330 # Programme definition 331 } elsif (fi::programme->parseConfigLine($_)) { 332 # Nothing to be done here 333 334 } else { 335 warn("bad configuration line in file '$file', line $.: $_\n"); 336 } 337 } 338 339 close($fh); 340 } 341 342 # Generate list of days 343 my $dates = fi::day->generate($Option{offset}, $Option{days}); 344 345 # Set up time zone 346 setTimeZone(); 347 348 # Create XMLTV writer 349 my $writer = _createXMLTVWriter(); 350 351 # Generate task list with one task per channel and day 352 my @tasklist; 353 foreach my $id (sort keys %channels) { 354 for (my $i = 1; $i < $#{ $dates }; $i++) { 355 push(@tasklist, [$id, 356 @{ $dates }[$i - 1..$i + 1], 357 $Option{offset} + $i - 1]); 358 } 359 } 360 361 # Randomize the task list in order to create a random access pattern 362 # NOTE: if you use only one source, then this is basically a no-op 363 if (not $Option{'no-randomize'}) { 364 debug(1, "Randomizing task list"); 365 @tasklist = shuffle(@tasklist); 366 } 367 368 # For each entry in the task list 369 my %seen; 370 my @programmes; 371 _createProgressBar("getting listings", @tasklist); 372 foreach my $task (@tasklist) { 373 my($id, $yesterday, $today, $tomorrow, $offset) = @{$task}; 374 debug(1, "XMLTV channel ID '$id' fetching day $today"); 375 foreach my $source (@sources) { 376 if (my $programmes = $source->grab($id, 377 $yesterday, $today, $tomorrow, 378 $offset)) { 379 380 if (@{ $programmes }) { 381 # Add channel ID & name (once) 382 _addChannel($writer, $id, $channels{$id}, 383 $programmes->[0]->language()) 384 unless $seen{$id}++; 385 386 # Add programmes to list 387 push(@programmes, @{ $programmes }); 388 } elsif ($Option{'test-mode'}) { 389 die "test failure: source '" . $source->description . "' didn't retrieve any programmes for '$id'!\n"; 390 } 391 } 392 } 393 _updateProgressBar(); 394 } 395 _destroyProgressBar(); 396 397 # Dump programs 398 message("writing XMLTV programme data"); 399 $_->dump($writer) foreach (@programmes); 400 401 # Done writing 402 _closeXMLTVWriter($writer); 403} 404 405############################################################################### 406# 407# Man page 408# 409############################################################################### 410__END__ 411=pod 412 413=head1 NAME 414 415tv_grab_fi - Grab TV listings for Finland 416 417=head1 SYNOPSIS 418 419tv_grab_fi [--cache E<lt>FILEE<gt>] 420 [--config-file E<lt>FILEE<gt>] 421 [--days E<lt>NE<gt>] 422 [--gui [E<lt>OPTIONE<gt>]] 423 [--no-randomize] 424 [--offset E<lt>NE<gt>] 425 [--output E<lt>FILEE<gt>] 426 [--quiet] 427 428tv_grab_fi --capabilities 429 430tv_grab_fi --configure 431 [--cache E<lt>FILEE<gt>] 432 [--config-file E<lt>FILEE<gt>] 433 [--gui [E<lt>OPTIONE<gt>]] 434 [--quiet] 435 436tv_grab_fi --description 437 438tv_grab_fi --help|-h|-? 439 440tv_grab_fi --list-channels 441 [--cache E<lt>FILEE<gt>] 442 [--gui [E<lt>OPTIONE<gt>]] 443 [--quiet] 444 445tv_grab_fi --version 446 447=head1 DESCRIPTION 448 449Grab TV listings for several channels available in Finland. The data comes 450from various sources, e.g. www.telkku.com. The grabber relies on parsing HTML, 451so it might stop working when the web page layout is changed. 452 453You need to run C<tv_grab_fi --configure> first to create the channel 454configuration for your setup. Subsequently runs of C<tv_grab_fi> will grab 455the latest data, process them and produce XML data on the standard output. 456 457=head1 COMMANDS 458 459=over 8 460 461=item B<NONE> 462 463Grab mode. 464 465=item B<--capabilities> 466 467Show the capabilities this grabber supports. See also 468L<http://wiki.xmltv.org/index.php/XmltvCapabilities>. 469 470=item B<--configure> 471 472Generate the configuration file by asking the users which channels to grab. 473 474=item B<--description> 475 476Print the description for this grabber. 477 478=item B<--help|-h|-?> 479 480Show this help page. 481 482=item B<--list-channels> 483 484Fetch all available channels from the various sources and write them to the 485standard output. 486 487=item B<--version> 488 489Show the version of this grabber. 490 491=back 492 493=head1 GENERIC OPTIONS 494 495=over 8 496 497=item B<--cache F<FILE>> 498 499File name to cache the fetched HTML data in. This speeds up subsequent runs 500using the same data. 501 502=item B<--gui [OPTION]> 503 504Enable the graphical user interface. If you don't specify B<OPTION> then 505XMLTV will automatically choose the best available GUI. Allowed values are: 506 507=over 4 508 509=item B<Term> 510 511Terminal output with a progress bar 512 513=item B<TermNoProgressBar> 514 515Terminal output without progress bar 516 517=item B<Tk> 518 519Tk-based GUI 520 521=back 522 523=item B<--quiet> 524 525Suppress any progress messages to the standard output. 526 527=back 528 529=head1 CONFIGURE MODE OPTIONS 530 531=over 8 532 533=item B<--config-file F<FILE>> 534 535File name to write the configuration to. 536 537Default is F<$HOME/.xmltv/tv_grab_fi.conf>. 538 539=back 540 541=head1 GRAB MODE OPTIONS 542 543=over 8 544 545=item B<--config-file F<FILE>> 546 547File name to read the configuration from. 548 549Default is F<$HOME/.xmltv/tv_grab_fi.conf>. 550 551=item B<--days C<N>> 552 553Grab C<N> days of TV data. 554 555Default is 14 days. 556 557=item B<--no-randomize> 558 559Grab TV data in deterministic order, i.e. first fetch channel 1, days 1 to N, 560then channel 2, and so on. 561 562Default is to use a random access pattern. If you only grab TV data from one 563source then the randomizing is a no-op. 564 565=item B<--offset C<N>> 566 567Grab TV data starting at C<N> days in the future. 568 569Default is 0, i.e. today. 570 571=item B<--output F<FILE>> 572 573Write the XML data to F<FILE> instead of the standard output. 574 575=back 576 577=head1 CONFIGURATION FILE SYNTAX 578 579The configuration file is line oriented, each line can contain one command. 580Empty lines and everything after the C<#> comment character is ignored. 581Supported commands are: 582 583=over 8 584 585=item B<channel ID NAME> 586 587Grab information for this channel. C<ID> depends on the source, C<NAME> is 588ignored and forwarded as is to the XMLTV output file. This information can be 589automatically generated using the grabber in the configuration mode. 590 591=item B<series description NAME> 592 593If a programme title matches C<NAME> then the first sentence of the 594description, i.e. everything up to the first period (C<.>), question mark 595(C<?>) or exclamation mark (C<!>), is removed from the description and is used 596as the name of the episode. 597 598=item B<series title NAME> 599 600If a programme title contains a colon (C<:>) then the grabber checks if the 601left-hand side of the colon matches C<NAME>. If it does then the left-hand 602side is used as programme title and the right-hand side as the name of the 603episode. 604 605=item B<title map "FROM" 'TO'> 606 607If the programme title starts with the string C<FROM> then replace this part 608with the string C<TO>. The strings must be enclosed in single quotes (C<'>) or 609double quotes (C<">). The title mapping occurs before the C<series> command 610processing. 611 612=item B<title strip parental level> 613 614At the beginning of 2012 some programme descriptions started to include 615parental levels at the end of the title, e.g. C<(S)>. With this command all 616parental levels will be removed from the titles automatically. This removal 617occurs before the title mapping. 618 619=back 620 621=head1 SEE ALSO 622 623L<xmltv>. 624 625=head1 AUTHORS 626 627=head2 Current 628 629=over 630 631=item Stefan Becker C<chemobejk at gmail dot com> 632 633=item Ville Ahonen C<ville dot ahonen at iki dot fi> 634 635=back 636 637=head2 Retired 638 639=over 640 641=item Matti Airas 642 643=back 644 645=head1 BUGS 646 647The channels are identified by channel number rather than the RFC2838 form 648recommended by the XMLTV DTD. 649 650=cut 651