1# -*- mode: perl; coding: utf-8 -*- ########################################### 2# 3# tv_grab_fi: source specific grabber code for http://www.yle.fi 4# 5############################################################################### 6# 7# Setup 8# 9# VERSION: $Id: yle.pm,v 2.13 2017/10/07 20:39:00 stefanb2 Exp $ 10# 11# INSERT FROM HERE ############################################################ 12package fi::source::yle; 13use strict; 14use warnings; 15use Date::Manip; 16 17BEGIN { 18 our $ENABLED = 1; 19} 20 21# Import from internal modules 22fi::common->import(); 23 24# Description 25sub description { 'yle.fi' } 26 27my %languages = ( 28 "fi" => [ "areena", "opas" ], 29 "sv" => [ "arenan", "guide" ], 30); 31 32# Grab channel list 33sub channels { 34 my %channels; 35 36 # yle.fi offers program guides in multiple languages 37 foreach my $code (sort keys %languages) { 38 39 # Fetch & parse HTML (do not ignore HTML5 <time>) 40 my $root = fetchTree("https://$languages{$code}[0].yle.fi/tv/$languages{$code}[1]", 41 undef, undef, 1); 42 if ($root) { 43 44 # 45 # Channel list can be found from this list: 46 # 47 # <ul class="guide-channels"> 48 # <li class="guide-channels__channel"> 49 # <h2 class="channel-header"> 50 # <a>...<div class="channel-header__logo " ... aria-label="Yle TV1"></div></a> 51 # </h2> 52 # ... 53 # </li> 54 # ... 55 # </ul> 56 # 57 if (my @divs = $root->look_down("_tag" => "div", 58 "aria-label" => qr/^.+$/)) { 59 debug(2, "Source ${code}.yle.fi found " . scalar(@divs) . " channels"); 60 foreach my $div (@divs) { 61 my $name = $div->attr("aria-label"); 62 63 if (defined($name) && length($name)) { 64 # replace space with hyphen 65 my $id; 66 ($id = $name) =~ s/ /-/g; 67 68 debug(3, "channel '$name' ($id)"); 69 $channels{"${id}.${code}.yle.fi"} = "$code $name"; 70 } 71 } 72 } 73 74 # Done with the HTML tree 75 $root->delete(); 76 77 } else { 78 return; 79 } 80 } 81 82 debug(2, "Source yle.fi parsed " . scalar(keys %channels) . " channels"); 83 return(\%channels); 84} 85 86# Grab one day 87sub grab { 88 my($self, $id, $yesterday, $today, $tomorrow, $offset) = @_; 89 90 # Get channel number from XMLTV id 91 return unless my($channel, $code) = ($id =~ /^([^.]+)\.([^.]+)\.yle\.fi$/); 92 $channel =~ s/-/ /g; 93 94 # Fetch & parse HTML (do not ignore HTML5 <time>) 95 my $root = fetchTree("https://$languages{$code}[0].yle.fi/tv/$languages{$code}[1]?t=" . $today->ymdd(), 96 undef, undef, 1); 97 if ($root) { 98 my @objects; 99 100 # 101 # Each programme can be found in a separate <li> node 102 # 103 # <ul class="guide-channels"> 104 # <li class="guide-channels__channel"> 105 # <h2 class="channel-header"> 106 # <a>...<div class="channel-header__logo " ... aria-label="Yle TV1"></div></a> 107 # </h2> 108 # <ul class="schedule-list"> 109 # <li class="schedule-card ..." ... itemtype="http://schema.org/Movie"> 110 # ... 111 # <time datetime="2017-07-11T06:25:00+03:00" itemprop="startDate">06.25</time> 112 # <time datetime="2017-07-11T06:55:00+03:00" itemprop="endDate"></time> 113 # ... 114 # <span itemprop="name">Mikä meitä lihottaa?</span> 115 # ... 116 # <span itemprop="description">1/8. Lihavuusepidemia. ...</span> 117 # ... 118 # </li> 119 # ... 120 # </ul> 121 # </li> 122 # ... 123 # </ul> 124 # 125 if (my $div = $root->look_down("_tag" => "div", 126 "aria-label" => qr/^${channel}$/)) { 127 if (my $parent = $div->look_up("class" => "guide-channels__channel")) { 128 if (my @programmes = $parent->look_down("class" => qr/^schedule-card\s+/)) { 129 foreach my $programme (@programmes) { 130 my $start = $programme->look_down("itemprop", "startDate"); 131 my $end = $programme->look_down("itemprop", "endDate"); 132 my $title = $programme->look_down("itemprop", "name"); 133 my $desc = $programme->look_down("itemprop", "description"); 134 135 if ($start && $end && $title && $desc) { 136 $start = UnixDate($start->attr("datetime"), "%s"); 137 $end = UnixDate($end->attr("datetime"), "%s"); 138 139 my $category = $programme->attr("itemtype") =~ /Movie/ ? "elokuvat" : undef; 140 141 # NOTE: entries with same start and end time are invalid 142 if ($start && $end && ($start != $end)) { 143 144 $title = $title->as_text(); 145 $title =~ s/^\s+//; 146 $title =~ s/\s+$//; 147 148 if (length($title)) { 149 150 $desc = $desc->as_text(); 151 $desc =~ s/^\s+//; 152 $desc =~ s/\s+$//; 153 154 debug(3, "List entry $channel ($start -> $end) $title"); 155 debug(4, $desc); 156 debug(4, $category) if defined $category; 157 158 # Create program object 159 my $object = fi::programme->new($id, $code, $title, $start, $end); 160 $object->category($category); 161 $object->description($desc); 162 push(@objects, $object); 163 } 164 } 165 } 166 } 167 } 168 } 169 } 170 171 # Done with the HTML tree 172 $root->delete(); 173 174 # Fix overlapping programmes 175 fi::programme->fixOverlaps(\@objects); 176 177 return(\@objects); 178 } 179 180 return; 181} 182 183# That's all folks 1841; 185