1#!/usr/local/bin/perl -w
2# -*- mode: perl; coding: utf-8 -*- ###########################################
3#
4# Setup
5#
6###############################################################################
7use 5.008; # we process Unicode texts
8use strict;
9use warnings;
10
11use constant VERSION => '$Id: tv_grab_fi.pl,v 2.05 2014/06/21 16:36:15 stefanb2 Exp $ ';
12
13###############################################################################
14# INSERT: SOURCES
15###############################################################################
16package main;
17
18# Perl core modules
19use Getopt::Long;
20use List::Util qw(shuffle);
21use Pod::Usage;
22
23# CUT CODE START
24###############################################################################
25# Load internal modules
26use FindBin qw($Bin);
27BEGIN {
28  foreach my $source (<$Bin/fi/*.pm>, <$Bin/fi/source/*.pm>) {
29    require "$source";
30  }
31}
32###############################################################################
33# CUT CODE END
34
35# Generate source module list
36my @sources;
37BEGIN {
38  @sources = map { s/::$//; $_ }
39    map { "fi::source::" . $_ }
40    sort
41    grep { ${ $::{'fi::'}->{'source::'}->{$_}->{ENABLED} } }
42    keys %{ $::{'fi::'}->{'source::'} };
43  die "$0: couldn't find any source modules?" unless @sources;
44}
45
46# Import from internal modules
47fi::common->import(':main');
48
49# Basic XMLTV modules
50use XMLTV::Version VERSION;
51use XMLTV::Capabilities qw(baseline manualconfig cache);
52use XMLTV::Description 'Finland (' .
53  join(', ', map { $_->description() } @sources ) .
54  ')';
55
56# NOTE: We will only reach the rest of the code only when the script is called
57#       without --version, --capabilities or --description
58# Reminder of XMLTV modules
59use XMLTV::Get_nice;
60use XMLTV::Memoize;
61
62###############################################################################
63#
64# Main program
65#
66###############################################################################
67# Forward declarations
68sub doConfigure();
69sub doListChannels();
70sub doGrab();
71
72# Command line option default values
73my %Option = (
74	      days   => 14,
75	      quiet  =>  0,
76	      debug  =>  0,
77	      offset =>  0,
78	     );
79
80# Enable caching. This will remove "--cache [file]" from @ARGV
81XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
82
83# Process command line options
84if (GetOptions(\%Option,
85	       "configure",
86	       "config-file=s",
87	       "days=i",
88	       "debug|d+",
89	       "gui:s",
90	       "help|h|?",
91	       "list-channels",
92	       "no-randomize",
93	       "offset=i",
94	       "output=s",
95	       "quiet",
96	       "test-mode")) {
97
98  pod2usage(-exitstatus => 0,
99	    -verbose => 2)
100    if $Option{help};
101
102  setDebug($Option{debug});
103  setQuiet($Option{quiet});
104
105  if ($Option{configure}) {
106    # Configure mode
107    doConfigure();
108
109  } elsif ($Option{'list-channels'}) {
110    # List channels mode
111    doListChannels();
112
113  } else {
114    # Grab mode (default)
115    doGrab();
116  }
117} else {
118  pod2usage(2);
119}
120
121# That's all folks
122exit 0;
123
124###############################################################################
125#
126# Utility functions for the different modes
127#
128###############################################################################
129sub _getConfigFile() {
130  require XMLTV::Config_file;
131  return(XMLTV::Config_file::filename($Option{'config-file'},
132				      "tv_grab_fi",
133				      $Option{quiet}));
134}
135
136{
137  my $ofh;
138
139  sub _createXMLTVWriter() {
140
141    # Output file handling
142    $ofh = \*STDOUT;
143    if (defined $Option{output}) {
144      open($ofh, ">", $Option{output})
145	or die "$0: cannot open file '$Option{output}' for writing: $!";
146    }
147
148    # Create XMLTV writer for UTF-8 encoded text
149    binmode($ofh, ":utf8");
150    my $writer = XMLTV::Writer->new(
151				    encoding => 'UTF-8',
152				    OUTPUT   => \*STDOUT,
153				   );
154
155    #### HACK CODE ####
156    $writer->start({
157		    "generator-info-name" => "XMLTV",
158		    "generator-info-url"  => "http://xmltv.org/",
159		    "source-info-url"     => "multiple", # TBA
160		    "source-data-url"     => "multiple", # TBA
161		   });
162    #### HACK CODE ####
163
164    return($writer);
165  }
166
167  sub _closeXMLTVWriter($) {
168    my($writer) = @_;
169    $writer->end();
170
171    # close output file
172    if ($Option{output}) {
173      close($ofh) or die "$0: write error on file '$Option{output}': $!";
174    }
175    message("DONE");
176  }
177}
178
179sub _addChannel($$$$) {
180  my($writer, $id, $name, $language) = @_;
181  $writer->write_channel({
182			  id             => $id,
183			  'display-name' => [[$name, $language]],
184			 });
185}
186
187{
188  my $bar;
189
190  sub _createProgressBar($$) {
191    my($label, $count) = @_;
192    return if $Option{quiet};
193
194    require XMLTV::Ask;
195    require XMLTV::ProgressBar;
196    XMLTV::Ask::init($Option{gui});
197    $bar = XMLTV::ProgressBar->new({
198				    name  => $label,
199				    count => $count,
200				   });
201  }
202
203  sub _updateProgressBar()  { $bar->update() if defined $bar }
204  sub _destroyProgressBar() { $bar->finish() if defined $bar }
205}
206
207sub _getChannels($$) {
208  my($callback, $opaque) = @_;
209
210  # Get channels from all sources
211  _createProgressBar("getting list of channels", @sources);
212  foreach my $source (@sources) {
213    debug(1, "requesting channel list from source '" . $source->description ."'");
214    if (my $list = $source->channels()) {
215      die "test failure: source '" . $source->description . "' didn't find any channels!\n"
216	if ($Option{'test-mode'} && (keys %{$list} == 0));
217
218      while (my($id, $value) = each %{ $list }) {
219	my($language, $name) = split(" ", $value, 2);
220	$callback->($opaque, $id, $name, $language);
221      }
222    }
223    _updateProgressBar();
224  }
225  _destroyProgressBar();
226}
227
228###############################################################################
229#
230# Configure Mode
231#
232###############################################################################
233sub doConfigure() {
234  # Get configuration file name
235  my $file = _getConfigFile();
236  XMLTV::Config_file::check_no_overwrite($file);
237
238  # Open configuration file. Assume UTF-8 encoding
239  open(my $fh, ">:utf8", $file)
240      or die "$0: can't open configuration file '$file': $!";
241  print $fh "# -*- coding: utf-8 -*-\n";
242
243  # Get channels
244  my %channels;
245  _getChannels(sub {
246		 # We only need name and ID
247		 my(undef, $id, $name) = @_;
248		 $channels{$id} = $name;
249	       },
250	       undef);
251
252  # Query user
253  my @sorted  = sort keys %channels;
254  my @answers = XMLTV::Ask::ask_many_boolean(1, map { "add channel $channels{$_} ($_)?" } @sorted);
255
256  # Generate configuration file contents from answers
257  foreach my $id (@sorted) {
258    warn("\nunexpected end of input reached\n"), last
259      unless @answers;
260
261    # Write selection to configuration file
262    my $answer = shift(@answers);
263    print $fh ($answer ? "" : "#"), "channel $id $channels{$id}\n";
264  }
265
266  # Check for write errors
267  close($fh)
268    or die "$0: can't write to configuration file '$file': $!";
269  message("DONE");
270}
271
272###############################################################################
273#
274# List Channels Mode
275#
276###############################################################################
277sub doListChannels() {
278  # Create XMLTV writer
279  my $writer = _createXMLTVWriter();
280
281  # Get channels
282  _getChannels(sub {
283		 my($writer, $id, $name, $language) = @_;
284		 _addChannel($writer, $id, $name, $language);
285		 },
286	       $writer);
287
288  # Done writing
289  _closeXMLTVWriter($writer);
290}
291
292###############################################################################
293#
294# Grab Mode
295#
296###############################################################################
297sub doGrab() {
298  # Sanity check
299  die "$0: --offset must be a non-negative integer"
300    unless $Option{offset} >= 0;
301  die "$0: --days must be an integer larger than 0"
302    unless $Option{days} > 0;
303
304  # Get configuation
305  my %channels;
306  {
307    # Get configuration file name
308    my $file = _getConfigFile();
309
310    # Open configuration file. Assume UTF-8 encoding
311    open(my $fh, "<:utf8", $file)
312      or die "$0: can't open configuration file '$file': $!";
313
314    # Process configuration information
315    while (<$fh>) {
316
317      # Comment removal, white space trimming and compressing
318      s/\#.*//;
319      s/^\s+//;
320      s/\s+$//;
321      next unless length;	# skip empty lines
322      s/\s+/ /;
323
324      # Channel definition
325      if (my($id, $name) = /^channel (\S+) (.+)/) {
326	debug(1, "duplicate channel definion in line $.:$id ($name)")
327	  if exists $channels{$id};
328	$channels{$id} = $name;
329
330      # Programme definition
331      } elsif (fi::programme->parseConfigLine($_)) {
332	# Nothing to be done here
333
334      } else {
335	warn("bad configuration line in file '$file', line $.: $_\n");
336      }
337    }
338
339    close($fh);
340  }
341
342  # Generate list of days
343  my $dates = fi::day->generate($Option{offset}, $Option{days});
344
345  # Set up time zone
346  setTimeZone();
347
348  # Create XMLTV writer
349  my $writer = _createXMLTVWriter();
350
351  # Generate task list with one task per channel and day
352  my @tasklist;
353  foreach my $id (sort keys %channels) {
354    for (my $i = 1; $i < $#{ $dates }; $i++) {
355      push(@tasklist, [$id,
356		       @{ $dates }[$i - 1..$i + 1],
357		       $Option{offset} + $i - 1]);
358    }
359  }
360
361  # Randomize the task list in order to create a random access pattern
362  # NOTE: if you use only one source, then this is basically a no-op
363  if (not $Option{'no-randomize'}) {
364    debug(1, "Randomizing task list");
365    @tasklist = shuffle(@tasklist);
366  }
367
368  # For each entry in the task list
369  my %seen;
370  my @programmes;
371  _createProgressBar("getting listings", @tasklist);
372  foreach my $task (@tasklist) {
373    my($id, $yesterday, $today, $tomorrow, $offset) = @{$task};
374    debug(1, "XMLTV channel ID '$id' fetching day $today");
375    foreach my $source (@sources) {
376      if (my $programmes = $source->grab($id,
377					 $yesterday, $today, $tomorrow,
378					 $offset)) {
379
380	if (@{ $programmes }) {
381	  # Add channel ID & name (once)
382	  _addChannel($writer, $id, $channels{$id},
383		      $programmes->[0]->language())
384	    unless $seen{$id}++;
385
386	  # Add programmes to list
387	  push(@programmes, @{ $programmes });
388	} elsif ($Option{'test-mode'}) {
389	  die "test failure: source '" . $source->description . "' didn't retrieve any programmes for '$id'!\n";
390	}
391      }
392    }
393    _updateProgressBar();
394  }
395  _destroyProgressBar();
396
397  # Dump programs
398  message("writing XMLTV programme data");
399  $_->dump($writer) foreach (@programmes);
400
401  # Done writing
402  _closeXMLTVWriter($writer);
403}
404
405###############################################################################
406#
407# Man page
408#
409###############################################################################
410__END__
411=pod
412
413=head1 NAME
414
415tv_grab_fi - Grab TV listings for Finland
416
417=head1 SYNOPSIS
418
419tv_grab_fi [--cache E<lt>FILEE<gt>]
420           [--config-file E<lt>FILEE<gt>]
421           [--days E<lt>NE<gt>]
422           [--gui [E<lt>OPTIONE<gt>]]
423           [--no-randomize]
424           [--offset E<lt>NE<gt>]
425           [--output E<lt>FILEE<gt>]
426           [--quiet]
427
428tv_grab_fi  --capabilities
429
430tv_grab_fi  --configure
431           [--cache E<lt>FILEE<gt>]
432           [--config-file E<lt>FILEE<gt>]
433           [--gui [E<lt>OPTIONE<gt>]]
434           [--quiet]
435
436tv_grab_fi  --description
437
438tv_grab_fi  --help|-h|-?
439
440tv_grab_fi  --list-channels
441           [--cache E<lt>FILEE<gt>]
442           [--gui [E<lt>OPTIONE<gt>]]
443           [--quiet]
444
445tv_grab_fi  --version
446
447=head1 DESCRIPTION
448
449Grab TV listings for several channels available in Finland. The data comes
450from various sources, e.g. www.telkku.com. The grabber relies on parsing HTML,
451so it might stop working when the web page layout is changed.
452
453You need to run C<tv_grab_fi --configure> first to create the channel
454configuration for your setup. Subsequently runs of C<tv_grab_fi> will grab
455the latest data, process them and produce XML data on the standard output.
456
457=head1 COMMANDS
458
459=over 8
460
461=item B<NONE>
462
463Grab mode.
464
465=item B<--capabilities>
466
467Show the capabilities this grabber supports. See also
468L<http://wiki.xmltv.org/index.php/XmltvCapabilities>.
469
470=item B<--configure>
471
472Generate the configuration file by asking the users which channels to grab.
473
474=item B<--description>
475
476Print the description for this grabber.
477
478=item B<--help|-h|-?>
479
480Show this help page.
481
482=item B<--list-channels>
483
484Fetch all available channels from the various sources and write them to the
485standard output.
486
487=item B<--version>
488
489Show the version of this grabber.
490
491=back
492
493=head1 GENERIC OPTIONS
494
495=over 8
496
497=item B<--cache F<FILE>>
498
499File name to cache the fetched HTML data in. This speeds up subsequent runs
500using the same data.
501
502=item B<--gui [OPTION]>
503
504Enable the graphical user interface. If you don't specify B<OPTION> then
505XMLTV will automatically choose the best available GUI. Allowed values are:
506
507=over 4
508
509=item B<Term>
510
511Terminal output with a progress bar
512
513=item B<TermNoProgressBar>
514
515Terminal output without progress bar
516
517=item B<Tk>
518
519Tk-based GUI
520
521=back
522
523=item B<--quiet>
524
525Suppress any progress messages to the standard output.
526
527=back
528
529=head1 CONFIGURE MODE OPTIONS
530
531=over 8
532
533=item B<--config-file F<FILE>>
534
535File name to write the configuration to.
536
537Default is F<$HOME/.xmltv/tv_grab_fi.conf>.
538
539=back
540
541=head1 GRAB MODE OPTIONS
542
543=over 8
544
545=item B<--config-file F<FILE>>
546
547File name to read the configuration from.
548
549Default is F<$HOME/.xmltv/tv_grab_fi.conf>.
550
551=item B<--days C<N>>
552
553Grab C<N> days of TV data.
554
555Default is 14 days.
556
557=item B<--no-randomize>
558
559Grab TV data in deterministic order, i.e. first fetch channel 1, days 1 to N,
560then channel 2, and so on.
561
562Default is to use a random access pattern. If you only grab TV data from one
563source then the randomizing is a no-op.
564
565=item B<--offset C<N>>
566
567Grab TV data starting at C<N> days in the future.
568
569Default is 0, i.e. today.
570
571=item B<--output F<FILE>>
572
573Write the XML data to F<FILE> instead of the standard output.
574
575=back
576
577=head1 CONFIGURATION FILE SYNTAX
578
579The configuration file is line oriented, each line can contain one command.
580Empty lines and everything after the C<#> comment character is ignored.
581Supported commands are:
582
583=over 8
584
585=item B<channel ID NAME>
586
587Grab information for this channel. C<ID> depends on the source, C<NAME> is
588ignored and forwarded as is to the XMLTV output file. This information can be
589automatically generated using the grabber in the configuration mode.
590
591=item B<series description NAME>
592
593If a programme title matches C<NAME> then the first sentence of the
594description, i.e. everything up to the first period (C<.>), question mark
595(C<?>) or exclamation mark (C<!>), is removed from the description and is used
596as the name of the episode.
597
598=item B<series title NAME>
599
600If a programme title contains a colon (C<:>) then the grabber checks if the
601left-hand side of the colon matches C<NAME>. If it does then the left-hand
602side is used as programme title and the right-hand side as the name of the
603episode.
604
605=item B<title map "FROM" 'TO'>
606
607If the programme title starts with the string C<FROM> then replace this part
608with the string C<TO>. The strings must be enclosed in single quotes (C<'>) or
609double quotes (C<">). The title mapping occurs before the C<series> command
610processing.
611
612=item B<title strip parental level>
613
614At the beginning of 2012 some programme descriptions started to include
615parental levels at the end of the title, e.g. C<(S)>. With this command all
616parental levels will be removed from the titles automatically. This removal
617occurs before the title mapping.
618
619=back
620
621=head1 SEE ALSO
622
623L<xmltv>.
624
625=head1 AUTHORS
626
627=head2 Current
628
629=over
630
631=item Stefan Becker C<chemobejk at gmail dot com>
632
633=item Ville Ahonen C<ville dot ahonen at iki dot fi>
634
635=back
636
637=head2 Retired
638
639=over
640
641=item Matti Airas
642
643=back
644
645=head1 BUGS
646
647The channels are identified by channel number rather than the RFC2838 form
648recommended by the XMLTV DTD.
649
650=cut
651