1#!/usr/local/bin/perl -w 2 3eval 'exec /usr/local/bin/perl -w -S $0 ${1+"$@"}' 4 if 0; # not running under some shell 5 6=pod 7 8=head1 NAME 9 10tv_grab_es_miguiatv - Alternative TV grabber for Spain. 11 12=head1 SYNOPSIS 13 14tv_grab_es_miguiatv --help 15 16tv_grab_es_miguiatv [--config-file FILE] --configure [--gui OPTION] 17 18tv_grab_es_miguiatv [--config-file FILE] [--output FILE] [--days N] 19 [--offset N] [--quiet] 20 21tv_grab_es_miguiatv --list-channels 22 23tv_grab_es_miguiatv --capabilities 24 25tv_grab_es_miguiatv --version 26 27=head1 DESCRIPTION 28 29Output TV listings for spanish channels from www.miguiatv.com. 30Supports analogue and digital (D+) channels. 31 32First run B<tv_grab_es_miguiatv --configure> to choose, which channels you want 33to download. Then running B<tv_grab_es_miguiatv> with no arguments will output 34listings in XML format to standard output. 35 36B<--configure> Prompt for which channels, 37and write the configuration file. 38 39B<--config-file FILE> Set the name of the configuration file, the 40default is B<~/.xmltv/tv_grab_es_miguiatv.conf>. This is the file written by 41B<--configure> and read when grabbing. 42 43B<--gui OPTION> Use this option to enable a graphical interface to be used. 44OPTION may be 'Tk', or left blank for the best available choice. 45Additional allowed values of OPTION are 'Term' for normal terminal output 46(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar. 47 48B<--output FILE> Write to FILE rather than standard output. 49 50B<--days N> Grab N days. The default is 3. 51 52B<--offset N> Start N days in the future. The default is to start 53from today. 54 55B<--quiet> Suppress the progress messages normally written to standard 56error. 57 58B<--capabilities> Show which capabilities the grabber supports. For more 59information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities> 60 61B<--version> Show the version of the grabber. 62 63B<--help> Print a help message and exit. 64 65=head1 SEE ALSO 66 67L<xmltv(5)>. 68 69=head1 AUTHOR 70 71Alberto Gonz�lez (alberto@pesadilla.org) based on tv_grab_es_laguiatv from CandU and tv_grab_es from Ramon Roca. 72 73=head1 BUGS 74 75=cut 76 77# 78 79 80###################################################################### 81# initializations 82 83use strict; 84use XMLTV::Version '$Id: tv_grab_es_miguiatv,v 1.5 2013/12/02 22:02:07 dekarl Exp $ '; 85use XMLTV::Capabilities qw/baseline manualconfig cache/; 86use XMLTV::Description 'Spain (miguiatv.com)'; 87use Getopt::Long; 88use Date::Manip; 89use HTML::TreeBuilder; 90use HTML::Entities; # parse entities 91use IO::File; 92use Data::Dumper; 93use Encode qw(decode_utf8 encode_utf8); 94 95 96use XMLTV; 97use XMLTV::Memoize; 98use XMLTV::ProgressBar; 99use XMLTV::Ask; 100use XMLTV::Config_file; 101use XMLTV::DST; 102use XMLTV::Get_nice; 103use XMLTV::Mode; 104use XMLTV::Date; 105# Todo: perhaps we should internationalize messages and docs? 106use XMLTV::Usage <<END 107$0: get Spanish television listings in XMLTV format 108To configure: $0 --configure [--config-file FILE] 109To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] 110 [--offset N] [--quiet] 111To list channels: $0 --list-channels 112To show capabilities: $0 --capabilities 113To show version: $0 --version 114END 115 ; 116 117# Attributes of the root element in output. 118my $HEAD = { 'source-info-url' => 'http://www.miguiatv.com/todos-los-canales', 119 'source-data-url' => 'http://www.miguiatv.com/todos-los-canales', 120 'generator-info-name' => 'XMLTV', 121 'generator-info-url' => 'http://xmltv.org/', 122 }; 123 124# Whether zero-length programmes should be included in the output. 125my $WRITE_ZERO_LENGTH = 0; 126my $DO_SLOWER_DESC_GET = 0; 127 128# default language 129my $LANG="es"; 130 131# Global channel_data 132our @ch_all; 133 134# debug print function 135sub debug_print 136{ 137 # my ($str) = @_; 138 139 # print $str; 140} 141 142 143###################################################################### 144# get options 145 146# Get options, including undocumented --cache option. 147XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux'); 148my ($opt_days, $opt_offset, $opt_help, $opt_output, 149 $opt_configure, $opt_config_file, $opt_gui, 150 $opt_quiet, $opt_list_channels); 151$opt_days = 3; # default 152$opt_offset = 0; # default 153$opt_quiet = 0; # default 154GetOptions('days=i' => \$opt_days, 155 'offset=i' => \$opt_offset, 156 'help' => \$opt_help, 157 'configure' => \$opt_configure, 158 'config-file=s' => \$opt_config_file, 159 'gui:s' => \$opt_gui, 160 'output=s' => \$opt_output, 161 'quiet' => \$opt_quiet, 162 'list-channels' => \$opt_list_channels 163 ) 164 or usage(0); 165die 'number of days must not be negative' 166 if (defined $opt_days && $opt_days < 0); 167usage(1) if $opt_help; 168 169XMLTV::Ask::init($opt_gui); 170 171my $mode = XMLTV::Mode::mode('grab', # default 172 $opt_configure => 'configure', 173 $opt_list_channels => 'list-channels', 174 ); 175 176# File that stores which channels to download. 177my $config_file 178 = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_es_miguiatv', $opt_quiet); 179 180my @config_lines; # used only in grab mode 181if ($mode eq 'configure') { 182 XMLTV::Config_file::check_no_overwrite($config_file); 183} 184elsif ($mode eq 'grab') { 185 @config_lines = XMLTV::Config_file::read_lines($config_file); 186} 187elsif ($mode eq 'list-channels') { 188 # Config file not used. 189} 190else { die } 191 192# Whatever we are doing, we need the channels data. 193my %channels; # sets @ch_all 194my @channels; 195my %urls; 196###################################################################### 197# write configuration 198 199if ($mode eq 'configure') { 200 %channels = get_channels(); 201 202 open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; 203 204 # Ask about getting descs 205 my $getdescs = ask_boolean("Do you want to get descriptions (very slow)"); 206 warn("cannot read input, using default") 207 if not defined $getdescs; 208 209 print CONF "getdescriptions "; 210 print CONF "yes\n" if $getdescs; 211 print CONF "no\n" if not $getdescs; 212 213 # Ask about each channel. 214 my @chs = sort keys %channels; 215 my @names = map { $channels{$_} } @chs; 216 my @qs = map { "Add channel $_?" } @names; 217 my @want = ask_many_boolean(1, @qs); 218 foreach (@chs) { 219 my $w = shift @want; 220 warn("cannot read input, stopping channel questions"), last 221 if not defined $w; 222 # No need to print to user - XMLTV::Ask is verbose enough. 223 224 # Print a config line, but comment it out if channel not wanted. 225 print CONF '#' if not $w; 226 my $name = shift @names; 227 print CONF "channel $_ $name\n"; 228 # TODO don't store display-name in config file. 229 } 230 231 close CONF or warn "cannot close $config_file: $!"; 232 say("Finished configuration."); 233 234 exit(); 235} 236 237 238# Not configuration, we must be writing something, either full 239# listings or just channels. 240# 241die if $mode ne 'grab' and $mode ne 'list-channels'; 242 243# Options to be used for XMLTV::Writer. 244my %w_args; 245if (defined $opt_output) { 246 my $fh = new IO::File(">$opt_output"); 247 die "cannot write to $opt_output: $!" if not defined $fh; 248 $w_args{OUTPUT} = $fh; 249} 250#$w_args{encoding} = 'ISO-8859-15'; 251$w_args{encoding} = 'utf-8'; 252my $writer = new XMLTV::Writer(%w_args); 253$writer->start($HEAD); 254 255if ($mode eq 'list-channels') { 256 $writer->write_channel($_) foreach @ch_all; 257 $writer->end(); 258 exit(); 259} 260 261###################################################################### 262# We are producing full listings. 263die if $mode ne 'grab'; 264 265# Read configuration 266my $line_num = 1; 267foreach (@config_lines) { 268 ++ $line_num; 269 next if not defined; 270 if (/getdescriptions:?\s+(\S+)/) 271 { 272 if($1 eq "yes") 273 { 274 $DO_SLOWER_DESC_GET = 1; 275 } 276 } 277 elsif (/^channel:?\s+(\S+)\s+([^\#]+)/) 278 { 279 my $ch_did = $1; 280 my $ch_name = $2; 281 $ch_name =~ s/\s*$//; 282 push @channels, $ch_did; 283 $channels{$ch_did} = $ch_name; 284 } 285 else { 286 warn "$config_file:$line_num: bad line\n"; 287 } 288} 289 290###################################################################### 291# begin main program 292 293# Assume the listings source uses CET (see BUGS above). 294my $now = DateCalc(parse_date('now'), "$opt_offset days"); 295die "No channels specified, run me with --configure\n" 296 if not keys %channels; 297my @to_get; 298 299 300# the order in which we fetch the channels matters 301foreach my $ch_did (@channels) { 302 my $ch_name=$channels{$ch_did}; 303 my $ch_xid="$ch_did.miguiatv.com"; 304 $writer->write_channel({ id => $ch_xid, 305 'display-name' => [ [ encode_utf8($ch_name) ] ] }); 306 my $day=UnixDate($now,'%Q'); 307 for (my $i=0;$i<$opt_days;$i++) { 308 push @to_get, [ $day, $ch_xid, $ch_did ]; 309 #for each day 310 $day=nextday($day); die if not defined $day; 311 } 312} 313 314# This progress bar is for both downloading and parsing. Maybe 315# they could be separate. 316# 317get_urls(); 318my $bar = new XMLTV::ProgressBar({name => 'getting listings', count => scalar @to_get}) 319 if not $opt_quiet; 320foreach (@to_get) { 321 foreach (process_table($_->[0], $_->[1], $_->[2])) { 322 $writer->write_programme($_); 323 } 324 update $bar if not $opt_quiet; 325} 326$bar->finish() if not $opt_quiet; 327$writer->end(); 328 329###################################################################### 330# subroutine definitions 331 332# Use Log::TraceMessages if installed. 333BEGIN { 334 eval { require Log::TraceMessages }; 335 if ($@) { 336 *t = sub {}; 337 *d = sub { '' }; 338 } 339 else { 340 *t = \&Log::TraceMessages::t; 341 *d = \&Log::TraceMessages::d; 342 Log::TraceMessages::check_argv(); 343 } 344} 345 346# Returns a TreeBuilder instance for a given url. The url is retrieved 347# via get_nice(), decoded into a Perl string, processed to remove HTML 348# entities and then parsed into a HTML::TreeBuilder object 349# 350sub get_tree( $ ) { 351 my $url = shift; 352 my $content = get_nice($url); 353 $content = decode_utf8($content); 354 $content = tidy_html($content); 355 my $t = new HTML::TreeBuilder; 356 $t->parse($content) or die "Cannot parse content of Tree\n"; 357 $t->eof; 358 return $t; 359} 360 361# Replaces specific HTML entities with text replacements, and then 362# decodes any remaining entities in the string 363# 364sub tidy_html( $ ) { 365 for (my $s = shift) { 366 # handle specific entities 367 s/ / /g; 368 # decode remaining entities 369 decode_entities($s); 370 371 return $s; 372 } 373} 374 375#### 376# process_table: fetch a URL and process it 377# 378# arguments: 379# Date::Manip object giving the day to grab 380# xmltv id of channel 381# elpais.es id of channel 382# 383# returns: list of the programme hashes to write 384# 385sub process_table { 386 387 my ($date, $ch_xmltv_id, $ch_es_id) = @_; 388 my $today = UnixDate($date, '%Y%m%d'); 389 390 my $url = $urls{$ch_es_id}; 391 $url =~ s/programacion/$today/; 392 debug_print "Getting $url\n"; 393 t $url; 394 local $SIG{__WARN__} = sub 395 { 396 warn "$url: $_[0]"; 397 }; 398 399 # parse the page to a document object 400 my $tree = get_tree($url); 401 #my $tree = get_nice_tree($url); 402 my @program_data = get_program_data($tree); 403 my $bump_start_day=0; 404 405 my @r; 406 while (@program_data) { 407 my $cur = shift @program_data; 408 my $next = shift @program_data; 409 unshift @program_data,$next if $next; 410 411 my $p = make_programme_hash($date, $ch_xmltv_id, $ch_es_id, $cur, $next); 412 if (not $p) { 413 require Data::Dumper; 414 my $d = Data::Dumper::Dumper($cur); 415 warn "cannot write programme on $ch_xmltv_id on $date:\n$d\n"; 416 } 417 else { 418 push @r, $p; 419 } 420 421 if (!$bump_start_day && bump_start_day($cur,$next)) { 422 $bump_start_day=1; 423 $date = UnixDate(DateCalc($date,"+ 1 day"),'%Q'); 424 } 425 } 426 return @r; 427} 428 429 430sub make_programme_hash { 431 my ($date, $ch_xmltv_id, $ch_es_id, $cur, $next) = @_; 432 433 my %prog; 434 435 $prog{channel}=$ch_xmltv_id; 436 $prog{title}=[ [ encode_utf8($cur->{title}), $LANG ] ]; 437 $prog{"sub-title"}=[ [ encode_utf8($cur->{subtitle}), $LANG ] ] if defined $cur->{subtitle}; 438 #$prog{category}=[ [ $cur->{category}, $LANG ] ]; 439 440 t "turning local time $cur->{time}, on date $date, into UTC"; 441 eval { $prog{start}=utc_offset("$date $cur->{time}", '+0100') }; 442 if ($@) { 443 warn "bad time string: $cur->{time}"; 444 return undef; 445 } 446 t "...got $prog{start}"; 447 # FIXME: parse description field further 448 449 $prog{desc}=[ [ encode_utf8($cur->{desc}), $LANG ] ] if defined $cur->{desc}; 450 451 return \%prog; 452} 453sub bump_start_day { 454 my ($cur,$next) = @_; 455 if (!defined($next)) { 456 return undef; 457 } 458 my $start = UnixDate($cur->{time},'%H:%M'); 459 my $stop = UnixDate($next->{time},'%H:%M'); 460 if (Date_Cmp($start,$stop)>0) { 461 return 1; 462 } else { 463 return 0; 464 } 465} 466 467 468# get time, title, description 469sub get_program_data 470{ 471 my ($tree) = @_; 472 my @data; 473 #my @inputs = $tree->find("class","show_even","class","show_odd"); 474 my @inputs = $tree->find("tr"); 475 for my $elem (@inputs) { 476 if($elem->attr('class') && ($elem->attr('class') eq "show_odd" || $elem->attr('class') eq "show_even")) { 477 my $time = $elem->attr('_content')->[0]->attr('_content')->[0]; 478 my $td = pop @{$elem->attr('_content')}; 479 for my $table (@{$td->attr('_content')}) { 480 if($table->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[1]) { 481 my $title = $table->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[0]->attr('_content')->[1]->attr('_content')->[0]; 482 483 if( $table->attr('_content')->[0]->attr('_content')->[1]->attr('_content')->[0]->attr('_content') || $table->attr('_content')->[1]->attr('_content')->[0]->attr('_content')) { 484 if($table->attr('_content')->[1]->attr('_content')->[0]) { 485 my $description=" "; 486 if( $table->attr('_content')->[1]->attr('_content')->[0]->attr('_content')) { 487 $description = $table->attr('_content')->[1]->attr('_content')->[0]->attr('_content')->[0]; 488 } else { 489 $description = $table->attr('_content')->[1]->attr('_content')->[0]->attr('_content')->[0]; 490 } 491 my %h = ( 492 time => $time, 493 category => $title, 494 title=> $title, 495 desc => $description 496 ); 497 push @data,\%h; 498 } 499 } 500 } 501 } 502 } 503 } 504 505 return @data; 506 507 508 my $xml = XMLin($tree); 509 if(ref($xml->{channel}->{item}) eq "ARRAY") { 510 my $elementos = $#{$xml->{channel}->{item}}; 511 for (my $i=0;$i<$elementos;$i++) { 512 my ($title,$time) = split(/\s*-\s*/,$xml->{channel}->{item}->[$i]->{title},2); 513 my $description = $xml->{channel}->{item}->[$i]->{description}; 514 ($time) = $time =~ /(\d+:\d+)/; 515 my $year = (((localtime(time))[5])+1900); 516 #$time = $year . $mes . sprintf("%02d",$dia) . $hora . $minuto . "00 +0100"; 517 $description =~ s/[^\n]*\n//; 518 if(length($description) > 5) { 519 my %h = ( 520 time => $time, 521 title=> $title, 522 desc => $description 523 ); 524 push @data,\%h; 525 } 526 } 527 } 528 529 return @data; 530} 531 532 533# get channel listing 534sub get_channels 535{ 536 my $bar = new XMLTV::ProgressBar({name => 'finding channels', count => 1}) 537 if not $opt_quiet; 538 my %channels; 539 my $url='http://www.miguiatv.com/todos-los-canales'; 540 t $url; 541 my $channel_id; 542 my $channel_name; 543 my $tree = get_tree $url; 544 #my $tree = get_nice_tree $url; 545 my @inputs = $tree->find("div"); 546 foreach my $elem (@inputs) { 547 if($elem->attr('class') && $elem->attr('class') eq "footer_channels") { 548 549 for my $div ( @{$elem->attr('_content')}) { 550 for my $li ( @{$div->attr('_content')}) { 551 pop @{$li->attr('_content')}; 552 for my $ul ( @{$li->attr('_content')}) { 553 if(ref($ul) eq "HTML::Element") { 554 if($ul->attr('href')) { 555 $channel_name = pop @{$ul->attr('_content')}; 556 $channel_name =~ s/^\s+//; 557 $channel_name =~ s/\s+$//; 558 $channel_id = convert_name_to_id($channel_name); 559 $channels{$channel_id}=$channel_name; 560 } 561 } 562 } 563 } 564 } 565 } 566 } 567 568 die "no channels could be found" if not keys %channels; 569 update $bar if not $opt_quiet; 570 $bar->finish() if not $opt_quiet; 571 return %channels; 572} 573 574# get xml list for channels 575sub get_urls 576{ 577 my $bar = new XMLTV::ProgressBar({name => 'getting urls', count => 1}) 578 if not $opt_quiet; 579 my %channels; 580 my $url='http://www.miguiatv.com/todos-los-canales'; 581 t $url; 582 my $channel_id; 583 my $channel_name; 584 my $tree = get_tree $url; 585 #my $tree = get_nice_tree $url; 586 my @inputs = $tree->find("div"); 587 foreach my $elem (@inputs) { 588 if($elem->attr('class') && $elem->attr('class') eq "footer_channels") { 589 590 for my $div ( @{$elem->attr('_content')}) { 591 for my $li ( @{$div->attr('_content')}) { 592 pop @{$li->attr('_content')}; 593 for my $ul ( @{$li->attr('_content')}) { 594 if(ref($ul) eq "HTML::Element") { 595 if($ul->attr('href')) { 596 $channel_name = pop @{$ul->attr('_content')}; 597 $channel_name =~ s/^\s+//; 598 $channel_name =~ s/\s+$//; 599 $channel_id = convert_name_to_id($channel_name); 600 $urls{$channel_id}=$ul->attr('href'); 601 } 602 } 603 } 604 } 605 } 606 } 607 } 608 609 die "no channels could be found" if not keys %urls; 610 611 update $bar if not $opt_quiet; 612 $bar->finish() if not $opt_quiet; 613} 614sub convert_name_to_id 615{ 616 my ($str) = @_; 617 618 619 $str =~ s/([^A-Za-z0-9])/sprintf("-%02X", ord($1))/seg; 620 621 $str = "C" . $str; 622 return $str; 623} 624 625# Bump a DDMMYYYY date by one. 626sub nextday { 627 my $d = shift; 628 my $p = parse_date($d); 629 my $n = DateCalc($p, '+ 1 day'); 630 return UnixDate($n, '%Q'); 631} 632