1# -*- mode: perl; coding: utf-8 -*- ###########################################
2#
3# tv_grab_fi: source specific grabber code for http://www.yle.fi
4#
5###############################################################################
6#
7# Setup
8#
9# VERSION: $Id: yle.pm,v 2.13 2017/10/07 20:39:00 stefanb2 Exp $
10#
11# INSERT FROM HERE ############################################################
12package fi::source::yle;
13use strict;
14use warnings;
15use Date::Manip;
16
17BEGIN {
18  our $ENABLED = 1;
19}
20
21# Import from internal modules
22fi::common->import();
23
24# Description
25sub description { 'yle.fi' }
26
27my %languages = (
28    "fi" => [ "areena", "opas"  ],
29    "sv" => [ "arenan", "guide" ],
30);
31
32# Grab channel list
33sub channels {
34  my %channels;
35
36  # yle.fi offers program guides in multiple languages
37  foreach my $code (sort keys %languages) {
38
39    # Fetch & parse HTML (do not ignore HTML5 <time>)
40    my $root = fetchTree("https://$languages{$code}[0].yle.fi/tv/$languages{$code}[1]",
41                         undef, undef, 1);
42    if ($root) {
43
44      #
45      # Channel list can be found from this list:
46      #
47      #   <ul class="guide-channels">
48      #    <li class="guide-channels__channel">
49      #	    <h2 class="channel-header">
50      #      <a>...<div class="channel-header__logo " ... aria-label="Yle TV1"></div></a>
51      #	    </h2>
52      #     ...
53      #    </li>
54      #	   ...
55      #   </ul>
56      #
57      if (my @divs = $root->look_down("_tag"       => "div",
58                                      "aria-label" => qr/^.+$/)) {
59	debug(2, "Source ${code}.yle.fi found " . scalar(@divs) . " channels");
60	foreach my $div (@divs) {
61	  my $name = $div->attr("aria-label");
62
63	  if (defined($name) && length($name)) {
64	    # replace space with hyphen
65	    my $id;
66	    ($id = $name) =~ s/ /-/g;
67
68	    debug(3, "channel '$name' ($id)");
69	    $channels{"${id}.${code}.yle.fi"} = "$code $name";
70	  }
71	}
72      }
73
74      # Done with the HTML tree
75      $root->delete();
76
77    } else {
78      return;
79    }
80  }
81
82  debug(2, "Source yle.fi parsed " . scalar(keys %channels) . " channels");
83  return(\%channels);
84}
85
86# Grab one day
87sub grab {
88  my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_;
89
90  # Get channel number from XMLTV id
91  return unless my($channel, $code) = ($id =~ /^([^.]+)\.([^.]+)\.yle\.fi$/);
92  $channel =~ s/-/ /g;
93
94  # Fetch & parse HTML (do not ignore HTML5 <time>)
95  my $root = fetchTree("https://$languages{$code}[0].yle.fi/tv/$languages{$code}[1]?t=" . $today->ymdd(),
96		       undef, undef, 1);
97  if ($root) {
98    my @objects;
99
100    #
101    # Each programme can be found in a separate <li> node
102    #
103    #   <ul class="guide-channels">
104    #    <li class="guide-channels__channel">
105    #	  <h2 class="channel-header">
106    #      <a>...<div class="channel-header__logo " ... aria-label="Yle TV1"></div></a>
107    #	  </h2>
108    #     <ul class="schedule-list">
109    #      <li class="schedule-card ..." ... itemtype="http://schema.org/Movie">
110    #       ...
111    #       <time datetime="2017-07-11T06:25:00+03:00" itemprop="startDate">06.25</time>
112    #       <time datetime="2017-07-11T06:55:00+03:00" itemprop="endDate"></time>
113    #       ...
114    #       <span itemprop="name">Mikä meitä lihottaa?</span>
115    #       ...
116    #       <span itemprop="description">1/8. Lihavuusepidemia. ...</span>
117    #       ...
118    #      </li>
119    #      ...
120    #     </ul>
121    #    </li>
122    #	 ...
123    #   </ul>
124    #
125    if (my $div = $root->look_down("_tag"       => "div",
126                                   "aria-label" => qr/^${channel}$/)) {
127      if (my $parent = $div->look_up("class" => "guide-channels__channel")) {
128	if (my @programmes = $parent->look_down("class" => qr/^schedule-card\s+/)) {
129	  foreach my $programme (@programmes) {
130	    my $start = $programme->look_down("itemprop", "startDate");
131	    my $end   = $programme->look_down("itemprop", "endDate");
132	    my $title = $programme->look_down("itemprop", "name");
133	    my $desc  = $programme->look_down("itemprop", "description");
134
135	    if ($start && $end && $title && $desc) {
136	      $start = UnixDate($start->attr("datetime"), "%s");
137	      $end   = UnixDate($end->attr("datetime"),   "%s");
138
139	      my $category = $programme->attr("itemtype") =~ /Movie/ ? "elokuvat" : undef;
140
141	      # NOTE: entries with same start and end time are invalid
142	      if ($start && $end && ($start != $end)) {
143
144		$title = $title->as_text();
145		$title =~ s/^\s+//;
146		$title =~ s/\s+$//;
147
148		if (length($title)) {
149
150		  $desc = $desc->as_text();
151		  $desc =~ s/^\s+//;
152		  $desc =~ s/\s+$//;
153
154		  debug(3, "List entry $channel ($start -> $end) $title");
155		  debug(4, $desc);
156		  debug(4, $category) if defined $category;
157
158		  # Create program object
159		  my $object = fi::programme->new($id, $code, $title, $start, $end);
160		  $object->category($category);
161		  $object->description($desc);
162		  push(@objects, $object);
163		}
164	      }
165	    }
166	  }
167	}
168      }
169    }
170
171    # Done with the HTML tree
172    $root->delete();
173
174    # Fix overlapping programmes
175    fi::programme->fixOverlaps(\@objects);
176
177    return(\@objects);
178  }
179
180  return;
181}
182
183# That's all folks
1841;
185