1#!/usr/local/bin/perl -w 2use strict; 3use utf8; 4use DateTime; 5use DateTime::Format::W3CDTF; 6use Encode; 7use LWP::Simple (); 8use HTML::TreeBuilder::XPath; 9use URI; 10use YAML; 11 12my $url = "http://www.wowow.co.jp/liga/contents/top.html"; 13my $html = decode('shift_jis', LWP::Simple::get($url)); 14my $tree = HTML::TreeBuilder::XPath->new; 15$tree->parse($html); 16$tree->eof; 17 18my $feed = { 19 title => 'WOWOW リーガ・エスパニョーラ番組表', 20 link => "http://www.wowow.co.jp/liga/", 21}; 22 23my @teams = $tree->findnodes(q(//table[@width=573]/tr/td/img[@width=90])); 24my @dates = $tree->findnodes(q(//table[@width=368]/tr/td[@class="date"])); 25my @links = $tree->findnodes(q(//p[@class="cardview"]/a)); 26 27while (my($t1, $t2) = splice(@teams, 0, 2)) { 28 my $link = (shift @links)->attr('href'); 29 # onair, repeat 30 for (1..2) { 31 my($date, $channel) = munge_datetime(shift @dates); 32 33 push @{$feed->{entry}}, { 34 title => $t1->attr('alt') . ' vs ' . $t2->attr('alt'), 35 link => URI->new_abs($link, $url)->as_string, 36 date => $date, 37 tags => [ $channel ], 38 }; 39 } 40} 41 42binmode STDOUT, ":utf8"; 43print YAML::Dump $feed; 44 45sub munge_datetime { 46 my $date = shift->content->[0]; 47 48 # 10月15日(日)深夜2:55 WOWOW/BS-5ch/191ch 49 $date =~ m!^\s*(\d{1,2})月(\d{1,2})日[(\(].*?[)\)]\s*(午前|午後|深夜)(\d{1,2}):(\d{2})\s*WOWOW.*?(\d{3}[cc][hh])! 50 or die "No match: $date"; 51 my($month, $day, $am_pm_midnight, $hour, $minute, $channel) = ($1, $2, $3, $4, $5, $6); 52 $hour += 12 if $am_pm_midnight eq '午後'; 53 54 my $dt = DateTime->new( 55 year => DateTime->now->year, 56 month => $month, 57 day => $day, 58 hour => $hour, 59 minute => $minute, 60 time_zone => 'Asia/Tokyo', 61 ); 62 $dt->add( days => 1 ) if $am_pm_midnight eq '深夜'; 63 64 return DateTime::Format::W3CDTF->format_datetime($dt), $channel; 65} 66 67