1#!/usr/local/bin/perl 2 3################################################# 4# Changelog: 5# 13/07/2005 6# - removed backend lastampa, now using 7# wfactory.net instead (basically the same) 8# - removed backend mytv, site doesn't provide 9# data anymore. Didn't remove the code since 10# it might come in handy in the future. 11# 25/08/2005 12# - updated after changes in skytv.it site 13# - first test with simple double language messages and docs 14# 14/06/2006 15# - minor update for changes in skytv.it site (now skylife.it) 16# 16/08/2006 17# - fixes to skytv 18# - skytv now handles categories when using --slow 19# 11/01/2007 20# - added backend boingtv 21# - new option --cache-slow 22# 13/02/2007 23# - added backend skylife (soon to replace skytv) 24# 27/05/2007 25# - fixed boingtv.it after site change (thanks Paolo Asioli) 26# 02/07/2007 27# - fixed skylife after site change (thanks Marco Coli) 28# 20/09/2007 29# fixes for warnings when in quiet mode 30# 06/11/2007 31# added backend mtv.it, as skylife strangely doesn't carry it, and wfactory is a stuttering site. 32# 08/12/2007 33# skylife.it has moved to guidatv.sky.it 34# code cleanup 35# 15/01/2008 36# major optimizations in skylife.it! (thanks Massimo Savazzi) 37# 30/06/2008 38# better handling of season /episodes after site changes 39# now using also the dtd tags episode-num 40# 24/09/2008 41# aggiunti mediasetpremium e raisat 42# 25/09/2008 43# aggiunto iris 44# 04/02/2009 45# update per sky per sito cambiato completamente 46# tolto wfactory, il sito non va piu' 47# sistemato rai4 48# nuovi canali per mediasetpremium 49# 02/03/2009 50# piccoli fix per skylife 51# aggiunto backend rai.it (che include rai4, rai gulp e altri canali 'inediti') 52# 10/11/2009 53# sistemato boingtv (grazie r.ghetta) 54# 22/02/2010 55# sistemato iris (grazie gpancot) 56# 23/02/2010 57# sistemato rai.it (grazie gianni kurgo) 58# 14/09/2010 59# aggiunto nuovo backend dahlia 60# aggiunto nuovo backend k2 (grazie a r.ghetta!) 61# fix per skylife di r.ghetta 62# 18/10/2010 63# aggiunto backend la7 e riattivato mtv.it (grazie gpancot) 64# aggiunto backend mediaset 65# 25/10/2010 66# patch da mennucc per possibili errori di parsing di data 67# patch da wyrdmeister per aggiungere la7d e un fix per raiit 68# 30/10/2010 69# rimosso k2 (grazie rghetta) 70# fix per la7 (grazie rghetta) 71# rimosso searchch 72# 28/12/2010 73# aggiunta opzione --mythweb-categories per utilizzare le categorie usate da mythweb invece di quelle del sito 74# 25/02/2011 75# aggiunto patch per mediaset da charon66 76# tolto backend dahlia 77# 24/07/2011 78# nuovi canali 79# 23/09/2013 80# nuovi canali e bugfixes 81# 23/08/2015 82# disabled mtvit backend - site doesn't provide data anymore. 83 84################################################# 85# TODO 86# - add more informative errors in xml 87################################################# 88 89#pod below is handled at install time 90my $POD_GOES_HERE; 91 92#default language for warnings set at install time 93my $DEF_LANG = 'eng'; 94 95###################################################################### 96# initializations 97use warnings; 98use strict; 99 100use XMLTV::Version '$Id: tv_grab_it.in,v 1.108 2015/08/23 02:04:36 knowledgejunkie Exp $'; 101use XMLTV::Capabilities qw/baseline manualconfig cache/; 102use XMLTV::Description 'Italy'; 103use XMLTV::Supplement qw/GetSupplement/; 104use HTML::Entities; 105use HTML::Parser; 106use URI::Escape; 107use Getopt::Long; 108use Date::Manip; 109use Memoize; 110use XMLTV; 111use XMLTV::Memoize; 112use XMLTV::Ask; 113use XMLTV::Config_file; 114use XMLTV::ProgressBar; 115use XMLTV::DST; 116use XMLTV::Get_nice; 117use XMLTV::Mode; 118 119#i hate to do this but it seems that skylife is blocking user agents not containing 'mozilla' 120#we still advertise ourselves as xmltv so they can block us if they really want to 121$XMLTV::Get_nice::ua->agent("Mozilla/5.0 xmltv/$XMLTV::VERSION"); 122 123use XMLTV::Usage <<END 124$0: get Italian television listings in XMLTV format 125To configure: $0 --configure [--config-file FILE] 126To grab listings: $0 [--config-file FILE] [--output FILE] [--days N] 127 [--offset N] [--quiet] [--slow] [--verbose] [--backend] 128 [--errors-in-xml] [--cache-slow] [--mythweb-categories] 129To list available channels: $0 [--output FILE] [--quiet] --list-channels 130To show capabilities: $0 --capabilities 131To show version: $0 --version 132END 133 ; 134 135# Use Log::TraceMessages if installed. 136BEGIN { 137 eval { require Log::TraceMessages }; 138 if ($@) { 139 *t = sub {}; 140 *d = sub { '' }; 141 } 142 else { 143 *t = \&Log::TraceMessages::t; 144 *d = \&Log::TraceMessages::d; 145 Log::TraceMessages::check_argv(); 146 } 147} 148 149#max days on the server 150my $MAX_DAYS=7; 151 152# default language 153my $LANG="it"; 154my $date_today = UnixDate("today", '%Y-%m-%d'); 155 156my @default_backends = ('mediaset', 'skylife', 'raiit', 'mediaset_guidatv', 'mediasetpremium', 'iris', 'boingtv', 'la7'); 157 158 159my %channels; #to store display names 160 161# backend configurations 162my %backend_info 163 = ( 164 'skylife' => 165 { domain => 'guidatv.sky.it', 166 base_chan => 'http://guidatv.sky.it/app/guidatv/contenuti/data/grid/', 167 base_icon => 'http://guidatv.sky.it/app/guidatv/images/epgimages/channels/grid/', 168 base_data => 'http://guidatv.sky.it/app/guidatv/contenuti/data/grid/', 169 base_slow => 'http://guidatv.sky.it/guidatv/programma/', 170 rturl => "http://guidatv.sky.it/", 171 needs_login => 0, 172 needs_cookies => 0, 173 fetch_data_sub => \&skylife_fetch_data, 174 channel_list_sub => \&skylife_get_channels_list, 175 }, 176 177 'raisat' => 178 { domain => 'raisat.it', 179 base_chan => 'http://www.raisat.it/canaliListForXML.jsp', 180 #base_data => 'http://www.raisat.it/generaxmlpalinsesto.jsp', 181 base_data => 'http://212.162.68.116/generaxmlpalinsesto.jsp', 182 rturl => "http://www.raisat.it/", 183 needs_login => 0, 184 needs_cookies => 0, 185 fetch_data_sub => \&raisat_fetch_data, 186 channel_list_sub => \&raisat_get_channels_list, 187 }, 188 189 'boingtv' => 190 { domain => 'boingtv.it', 191 base_chan => 'http://www.boingtv.it/xml/palinsesto.xml', 192 base_data => 'http://www.boingtv.it/xml/palinsesto.xml', 193 rturl => "http://www.boingtv.it/xml/palinsesto.xml", 194 needs_login => 0, 195 needs_cookies => 0, 196 fetch_data_sub => \&boingtv_fetch_data, 197 channel_list_sub => \&boingtv_get_channels_list, 198 }, 199 200 'sitcom1' => 201 { domain => 'sitcom1.it', 202 base_chan => 'http://www.sitcom1.it/guidatv.asp', 203 base_data => 'http://www.sitcom1.it/guidatv.asp', 204 rturl => "http://www.sitcom1.it/guidatv.asp", 205 needs_login => 0, 206 needs_cookies => 0, 207 fetch_data_sub => \&sitcom1_fetch_data, 208 channel_list_sub => \&sitcom1_get_channels_list, 209 }, 210 211 'iris' => 212 { domain => 'iris.mediaset.it', 213 base_chan => 'http://iris.mediaset.it/palinsesto/palinsesto1.shtml', 214 base_data => 'http://iris.mediaset.it/palinsesto/', 215 rturl => "http://iris.mediaset.it/palinsesto/palinsesto1.shtml", 216 needs_login => 0, 217 needs_cookies => 0, 218 fetch_data_sub => \&iris_fetch_data, 219 channel_list_sub => \&iris_get_channels_list, 220 }, 221 222 # Disabled 2015-08-23 by knowledgejunkie: site no longer provides listings 223 # 'mtvit' => 224 # { domain => 'www.mtv.it', 225 # base_chan => 'http://www.mtv.it/', 226 # base_data => 'http://www.mtv.it/', 227 # rturl => "http://www.mtv.it/tv/guida-tv/", 228 # needs_login => 0, 229 # needs_cookies => 0, 230 # fetch_data_sub => \&mtvit_fetch_data, 231 # channel_list_sub => \&mtvit_get_channels_list, 232 # }, 233 234 'mediasetpremium' => 235 { domain => 'mediasetpremium.mediaset.it', 236 base_chan => 'http://www.mediasetpremium.mediaset.it/export/palinsesto.xml', 237 base_data => 'http://www.mediasetpremium.mediaset.it/export/palinsesto', 238 rturl => "http://www.mediasetpremium.mediaset.it/", 239 needs_login => 0, 240 needs_cookies => 0, 241 fetch_data_sub => \&mediasetpremium_fetch_data, 242 channel_list_sub => \&mediasetpremium_get_channels_list, 243 }, 244 245 'raiit' => 246 { domain => 'rai.it', 247 base_chan => 'http://www.rai.it/dl/portale/GuidaProgrammi.html', 248 base_data => 'http://www.rai.it/dl/portale/html/palinsesti/guidatv/static/', 249 rturl => "http://www.rai.it/", 250 needs_login => 0, 251 needs_cookies => 0, 252 fetch_data_sub => \&raiit_fetch_data, 253 channel_list_sub => \&raiit_get_channels_list, 254 }, 255 256 'dahlia' => 257 { domain => 'dahliatv.it', 258 base_chan => 'http://www.dahliatv.it/guidatv', 259 base_data => 'http://www.dahliatv.it/html/portlet/ext/epg/epg.jsp', 260 rturl => "http://www.dahliatv.it/", 261 needs_login => 0, 262 needs_cookies => 0, 263 fetch_data_sub => \&dahlia_fetch_data, 264 channel_list_sub => \&dahlia_get_channels_list, 265 }, 266 267 268 'la7' => 269 { domain => 'la7.it', 270 base_chan => 'http://www.la7.it/guidatv/index.html', 271 base_data => 'http://www.la7.it/guidatv/index_', 272 rturl => "http://www.la7.it/guidatv/index", 273 needs_login => 0, 274 needs_cookies => 0, 275 fetch_data_sub => \&la7_fetch_data, 276 channel_list_sub => \&la7_get_channels_list, 277 }, 278 279 'mediaset' => 280 { domain => 'mediaset.it', 281 base_chan => 'http://www.tv.mediaset.it/dati/palinsesto/palinsesto-mondotv.xml', 282 base_data => 'http://www.tv.mediaset.it/dati/palinsesto/palinsesto-mondotv.xml', 283 rturl => "http://www.tv.mediaset.it/dati/palinsesto/palinsesto-mondotv.xml", 284 needs_login => 0, 285 needs_cookies => 0, 286 fetch_data_sub => \&mediaset_fetch_data, 287 channel_list_sub => \&mediaset_get_channels_list, 288 }, 289 290 'mediaset_guidatv' => 291 { domain => 'mediaset_guidatv.it', 292 base_chan => 'http://www.mediaset.it/guidatv/palinsesto.xml', 293 base_data => 'http://www.mediaset.it/guidatv/palinsesto.xml', 294 rturl => "http://www.mediaset.it/guidatv/palinsesto.xml", 295 needs_login => 0, 296 needs_cookies => 0, 297 fetch_data_sub => \&mediaset_guidatv_fetch_data, 298 channel_list_sub => \&mediaset_guidatv_get_channels_list, 299 }, 300 301 ); 302 303###################################################################### 304# Get options, including undocumented --cache option. 305XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux') # cache on disk 306 or memoize('XMLTV::Get_nice::get_nice_aux') # cache in memory 307 or die "cannot memoize 'XMLTV::Get_nice::get_nice_aux': $!"; 308 309my ($opt_days, 310 $opt_offset, 311 $opt_help, 312 $opt_output, 313 $opt_slow, 314 $opt_verbose, 315 $opt_configure, 316 $opt_config_file, 317 $opt_gui, 318 $opt_quiet, 319 $opt_errors_in_xml, 320 @opt_backends, 321 $opt_list_channels, 322 $opt_cache_slow, 323 $opt_mythweb_categories, 324 ); 325 326# server only holds 7 days, so if there is an offset days must be 327# opt_days-offset or less. 328 329$opt_offset = 0; # default 330$opt_quiet = 0; # default 331$opt_slow = 0; # default 332$opt_verbose = 0; # default 333 334GetOptions('days=i' => \$opt_days, 335 'offset=i' => \$opt_offset, 336 'help' => \$opt_help, 337 'configure' => \$opt_configure, 338 'config-file=s' => \$opt_config_file, 339 'gui:s' => \$opt_gui, 340 'output=s' => \$opt_output, 341 'quiet' => \$opt_quiet, 342 'slow' => \$opt_slow, 343 'verbose' => \$opt_verbose, 344 'errors-in-xml' => \$opt_errors_in_xml, 345 'backend=s' => \@opt_backends, 346 'list-channels' => \$opt_list_channels, 347 'cache-slow' => \$opt_cache_slow, 348 'mythweb-categories' => \$opt_mythweb_categories, 349 ) 350 or usage(0); 351 352die ($DEF_LANG eq 'eng' ? 353 "number of days (--days) must not be negative. You gave: $opt_days\n" : 354 "il numero di giorni (--days) non puo' essere negativo. Hai specificato: $opt_days\n") 355 if (defined $opt_days && $opt_days < 0); 356 357die ($DEF_LANG eq 'eng' ? 358 "offset days (--offset) must not be negative. You gave: $opt_offset\n" : 359 "l'intervallo di partenza (--offset) non puo' essere negativo. Hai specificato: $opt_offset\n") 360 if ($opt_offset < 0); 361usage(1) if $opt_help; 362 363if ($opt_quiet) { 364 $opt_verbose = 0; 365} 366 367$opt_days = $opt_days || $MAX_DAYS; 368 369$opt_slow = 1 if ($opt_cache_slow); 370 371my $mode = XMLTV::Mode::mode('grab', 372 $opt_list_channels => 'list-channels', 373 $opt_configure => 'configure'); 374 375# parse the --backend option 376@opt_backends = split(/,/,join(',',@opt_backends)); #we allow both multiple --backend and --backend=name1,name2 377 378my @backends = (); 379foreach (@opt_backends) { 380 if (defined $backend_info{$_}) { 381 push @backends, $_; 382 } 383 else { 384 warn ($DEF_LANG eq 'eng' ? 385 "Unknown backend $_!\nProbably you need to update! go to xmltv.org for latest release.\nFor latest version get http://snapshot.xmltv.org or http://alpha-exe.xmltv.org if you are on windows." : 386 "Fonte sconosciuta $_!Probabilmente devi aggiornare il programma! Vai su xmltv.org per l'ultima release.\nPer la versione piu' aggiornata vai su http://snapshot.xmltv.org o http://alpha-exe.xmltv.org per windows.\n" 387 ); 388 } 389} 390unless (@backends) { 391 @backends = @default_backends; 392 if (@opt_backends) { #we specified backends but we didn't like them, warn the user 393 warn ($DEF_LANG eq 'eng' ? 394 "No good backend specified, falling back on defaults\n" : 395 "Nessuna fonte corretta specificata, uso i default\n" 396 ); 397 } 398} 399 400XMLTV::Ask::init($opt_gui); 401 402# reads the file channel_ids, which contains the tables to convert 403# between backends' ids and XMLTV ids of channels. 404# to support multiple backends i add a ini-style [section] header 405# there are two fields: xmltv_id and site_id. 406 407my $str = GetSupplement( "tv_grab_it", "channel_ids" ); 408my $CHANNEL_NAMES_FILE = "channel_ids"; 409 410my (%xmltv_chanid, %seen); 411my $line_num = 0; 412my $backend; 413foreach (split( /\n/, $str )) { 414 ++ $line_num; 415 tr/\r//d; 416 417 s/#.*//; 418 next if m/^\s*$/; 419 420 my $where = "$CHANNEL_NAMES_FILE:$line_num"; 421 422 if (/^\[(.*)\]$/) { 423 if (defined $backend_info{$1}) { #esiste la configurazione 424 $backend = $1; 425 } 426 else { 427 warn ($DEF_LANG eq 'eng' ? 428 "Unknown backend $1 in $where\n" : 429 "Fonte sconosciuta $1 in $where\n"); 430 $backend = undef; 431 } 432 } 433 elsif ($backend) { 434 my @fields = split /;/; 435 die ($DEF_LANG eq 'eng' ? 436 "$where: wrong number of fields" : 437 "$where: numero di campi errato") 438 if @fields != 2;#3; 439 440 my ($xmltv_id, $site_id) = @fields; 441 442 warn ($DEF_LANG eq 'eng' ? 443 "$where: backend id $site_id for site '$backend' seen already\n" : 444 "$where: fonte con id $site_id per il sito '$backend' gia' visto!\n" 445 ) 446 if defined $backend_info{$backend}{site_ids}{$xmltv_id}; 447 $backend_info{$backend}{site_ids}{$xmltv_id}{site_id} = $site_id; 448 #$backend_info{$backend}{site_ids}{$xmltv_id}{satellite} = $sat; 449 450 warn ($DEF_LANG eq 'eng' ? 451 "$where: XMLTV_id $xmltv_id for site '$backend' seen already\n" : 452 "$where: XMLTV_id $xmltv_id per il sito '$backend' gia' visto!\n" ) 453 if $seen{$backend.$xmltv_id}++; 454 } 455} 456 457# File that stores which channels to download. Not needed for 458# list-channels mode. 459# 460my $config_file; 461unless ($mode eq 'list-channels') { 462 $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_it', $opt_quiet); 463} 464 465XMLTV::Config_file::check_no_overwrite($config_file) if $mode eq 'configure'; 466 467# Arguments for XMLTV::Writer. 468my %w_args; 469if (defined $opt_output) { 470 die($DEF_LANG eq 'eng' ? 471 "cannot give --output with --configure" : 472 "non e' possibile specificare --output con --configure") 473 if $mode eq 'configure'; 474 my $fh = new IO::File(">$opt_output"); 475 die ($DEF_LANG eq 'eng' ? 476 "cannot write to $opt_output: $!" : 477 "impossibile scrivere su $opt_output") if not defined $fh; 478 $w_args{OUTPUT} = $fh; 479} 480$w_args{encoding} = 'ISO-8859-1'; 481 482 483$line_num = 0; 484 485my $foundchannels; 486 487my $bar = new XMLTV::ProgressBar(($DEF_LANG eq 'eng' ? 'getting list of channels' : 'prendo la lista dei canali'), scalar @backends) 488 if not $opt_quiet; 489# find list of available channels 490foreach $backend (@backends) { 491 %{$backend_info{$backend}{channels}} = &{$backend_info{$backend}{channel_list_sub}}($backend_info{$backend}{base_chan}); 492 $foundchannels+=scalar(keys(%{$backend_info{$backend}{channels}})); 493 494 if (not $opt_quiet) { 495 update $bar; 496 } 497} 498$bar->finish() if (not $opt_quiet); 499die ($DEF_LANG eq 'eng' ? "no channels could be found" : "nessun canale trovato") unless ($foundchannels); 500warn ($DEF_LANG eq 'eng' ? 501 "VERBOSE: $foundchannels channels found.\n" : 502 "VERBOSE: $foundchannels canali trovati.\n") if ($opt_verbose); 503 504###################################################################### 505# write configuration 506if ($mode eq 'configure') { 507 open(CONF, ">$config_file") or die ($DEF_LANG eq 'eng' ? 508 "cannot write to $config_file: $!" : 509 "impossibile scrivere su $config_file: $!"); 510 511 my %channels; 512 foreach $backend (@backends) { 513 #faccio un hash con tutti gli id 514 foreach (keys %{$backend_info{$backend}{channels}}) { 515 $channels{$_} = xmltv_chanid($backend, $_); 516 } 517 518 #not used yet 519 if ($backend_info{$backend}{needs_login}) { 520 say "To get listings on '$backend' you will need a login on the site.\n"; 521 my $username_wanted = ask_boolean('Do you have a login?', 0); 522 if ($username_wanted) { 523 $backend_info{$backend}{username} = ask("Username:"); 524 print CONF "username: $backend:$backend_info{$backend}{username}\n"; 525 } 526 } 527 } 528 529 #double reverse to get rid of duplicates 530 %channels = reverse %channels; 531 %channels = reverse %channels; 532 533 # Ask about each channel. 534 my @names = sort keys %channels; 535 my @qs = map { ($DEF_LANG eq 'eng' ? "add channel $_?" : "aggiungo il canale $_?") } @names; 536 my @want = ask_many_boolean(1, @qs); 537 foreach (@names) { 538 die if $_ =~ tr/\r\n//; 539 my $w = shift @want; 540 warn("cannot read input, stopping channel questions"), last 541 if not defined $w; 542 # No need to print to user - XMLTV::Ask is verbose enough. 543 544 # Print a config line, but comment it out if channel not wanted. 545 print CONF '#' if not $w; 546 print CONF "channel ".$channels{$_}." # $_\n"; 547 } 548 549 close CONF or warn ($DEF_LANG eq 'eng' ? 550 "cannot close $config_file: $!" : 551 "impossibile chiudere $config_file: $!"); 552 say(($DEF_LANG eq 'eng' ? "Finished configuration." : "Configurazione terminata.")); 553 554 exit(); 555} 556 557# Not configuring, must be writing some XML. 558my $w = new XMLTV::Writer(%w_args); 559 560my $source_info_str = join ",", map {'http://'.$backend_info{$_}{domain}} @backends; 561my $source_data_str = join ",", map {$backend_info{$_}{rturl}} @backends; 562 563$w->start({ 'source-info-url' => $source_info_str , 564 'source-data-url' => $source_data_str, 565 'generator-info-name' => 'XMLTV', 566 'generator-info-url' => 'http://xmltv.org/', 567 }); 568 569 570my %display_names; 571my %list_display_names; 572foreach my $back (@backends) { 573 foreach (keys %{$backend_info{$back}{site_ids}}) { 574 $display_names{$_} = [$backend_info{$back}{site_ids}{$_}{site_id}, $back]; #per controllare altri attributi tipo l'icona devo sapere da che backend viene il canale 575 $list_display_names{$_} = $backend_info{$back}{site_ids}{$_}{site_id}; 576 } 577} 578 579if ($mode eq 'list-channels') { 580 # Write all known channels then finish. 581 foreach my $xmltv_id (sort keys %list_display_names) { 582 next if not defined $display_names{$xmltv_id}; 583 584 my @display_name= [ [ $display_names{$xmltv_id}->[0] ] ]; 585 my $backend = $display_names{$xmltv_id}->[1]; 586 587 my @chaninfo; 588 if (defined $backend_info{$backend}{site_ids}{$xmltv_id}{channum}) { #abbiamo il numero di canale, lo mettiamo come display name secondario 589 @chaninfo = ('display-name' => [ [ $display_names{$xmltv_id}->[0] ], [ $backend_info{$backend}{site_ids}{$xmltv_id}{channum}]]); 590 } 591 else { 592 @chaninfo = ('display-name' => [ [ $display_names{$xmltv_id}->[0] ] ]); 593 } 594 #aggiungo l'icona se ce l'ho 595 if (defined $backend_info{$backend}{site_ids}{$xmltv_id}{icon}) { 596 push @chaninfo , (icon => [{src => $backend_info{$backend}{site_ids}{$xmltv_id}{icon}}]); 597 598 } 599 600 $w->write_channel({ 601 id => $xmltv_id, 602 @chaninfo 603 }); 604 } 605 $w->end; 606 exit; 607} 608 609 610###################################################################### 611# read configuration 612my @channels; 613$line_num = 0; 614foreach (XMLTV::Config_file::read_lines($config_file)) { 615 ++ $line_num; 616 next if not defined; 617 if (/^channel:?\s*(.*\S+)\s*$/) { 618 push @channels, $1; 619 } 620 elsif (/^username:?\s+(\S+):(\S+)/){ 621 if (defined $backend_info{$1}) { #esiste la configurazione 622 $backend_info{$1}{username} = $2; 623 } 624 else { 625 warn ($DEF_LANG eq 'eng' ? 626 "Found username for unknown backend $1 in $config_file\n" : 627 "Trovato un nome utente per una fonte sconosciuta $1 in $config_file\n"); 628 } 629 } 630 else { 631 warn ($DEF_LANG eq 'eng' ? 632 "$config_file:$line_num: bad line\n" : 633 "$config_file:$line_num: linea errata\n"); 634 } 635} 636 637 638###################################################################### 639# sort out problem in offset options 640if ($opt_offset >= $MAX_DAYS) { 641 warn ($DEF_LANG eq 'eng' ? 642 "Day offset too big. No program information will be fetched.\n" : 643 "Intervallo specificato troppo grande. Nessun dato verra' scaricato.\n"); 644 $opt_offset = 0; 645 $opt_days = 0; 646} 647my $days2get; 648if (($opt_days+$opt_offset) > $MAX_DAYS) { 649 $days2get=$MAX_DAYS-$opt_offset; 650 warn ($DEF_LANG eq 'eng' ? 651 "The server only has info for ".($MAX_DAYS-1)." days from today.\n" : 652 "Il server ha informazioni solo per ".($MAX_DAYS-1)." giorni da oggi.\n"); 653 if ($days2get > 1) { 654 warn ($DEF_LANG eq 'eng' ? 655 "You'll get listings for only $days2get days.\n" : 656 "Scarico programmi solo per $days2get giorni.\n"); 657 } 658 else { 659 warn ($DEF_LANG eq 'eng' ? 660 "You'll get listings for only 1 day.\n" : 661 "Scarico programmi solo per un giorno.\n"); 662 } 663 } 664 else { 665 $days2get=$opt_days; 666 } 667t "will get $days2get days from $opt_offset onwards"; 668 669 670###################################################################### 671# grabbing listings 672 673foreach my $xmltv_id (@channels) { 674 next if not defined $display_names{$xmltv_id}; 675 676 my @display_name= [ [ $display_names{$xmltv_id}->[0] ] ]; 677 my $backend = $display_names{$xmltv_id}->[1]; 678 679 my @chaninfo; 680 if (defined $backend_info{$backend}{site_ids}{$xmltv_id}{channum}) { #abbiamo il numero di canale, lo mettiamo come display name secondario 681 @chaninfo = ('display-name' => [ [ $display_names{$xmltv_id}->[0] ], [ $backend_info{$backend}{site_ids}{$xmltv_id}{channum}]]); 682 } 683 else { 684 @chaninfo = ('display-name' => [ [ $display_names{$xmltv_id}->[0] ] ]); 685 } 686 #aggiungo l'icona se ce l'ho 687 if (defined $backend_info{$backend}{site_ids}{$xmltv_id}{icon}) { 688 push @chaninfo , (icon => [{src => $backend_info{$backend}{site_ids}{$xmltv_id}{icon}}]); 689 } 690 691 $w->write_channel({ 692 id => $xmltv_id, 693 @chaninfo 694 }); 695} 696 697#make a list of channels and days to grab 698my @to_get; 699foreach my $day ($opt_offset .. ($days2get + $opt_offset - 1)) { 700 foreach my $channel (@channels) { 701 push @to_get, [$channel, $day]; 702 } 703} 704 705$bar = new XMLTV::ProgressBar(($DEF_LANG eq 'eng' ? 'getting listings' : 'scarico programmi'), scalar @to_get) 706 if not $opt_quiet; 707 708## If we aren't getting any days of program data then clear out the list 709## that was created to fetch \. 710#if ($days2get == 0) {@to_get = ();} 711 712foreach (@to_get) { 713 my $day = $_->[1]; 714 my $channel = $_->[0]; 715 716 #this is where i would handle cookies and logins if needed 717 warn ($DEF_LANG eq 'eng' ? 718 "VERBOSE: Grabbing channel $channel, day $day\n" : 719 "VERBOSE: Prendo dati per il canale $channel, giorno $day\n") if ($opt_verbose); 720 721 my $error; 722 foreach $backend (@backends) { 723 warn ($DEF_LANG eq 'eng' ? 724 "VERBOSE: Trying with $backend\n" : 725 "VERBOSE: Provo con $backend\n") if ($opt_verbose); 726 727 my @dati; $error = 0; 728 ($error, @dati) = &{$backend_info{$backend}{fetch_data_sub}}($channel, $day); 729 730 #TODO different kinds of errors? 731 if ($error) { 732 warn ($DEF_LANG eq 'eng' ? 733 "VERBOSE: Error fetching channel $channel day $day with backend $backend\n" : 734 "VERBOSE: Errore nello scaricare i dati per $channel, giorno $day con $backend\n") if ($opt_verbose); 735 } 736 else { 737 $w->write_programme($_) foreach @dati; 738 last; 739 } 740 } 741 742 #nessuno ci e' riuscito 743 if ($error) { 744 #this is an easier way to know about errors if all of our scripts are automated 745 if ($opt_errors_in_xml) { 746 $w->write_programme( 747 { 748 title => [[($DEF_LANG eq 'eng' ? 'ERROR FETCHING DATA' : 'ERRORE DI SCARICAMENTO DATI'), $LANG]], 749 start => xmltv_date('00:01', $day), 750 stop => xmltv_date('23:59', $day), 751 channel => $channel, 752 desc => [[($DEF_LANG eq 'eng' ? 753 "XMLTV couldn't grab data for $channel, day $day. Sorry about that." : 754 "XMLTV non e' riuscito a scaricare i dati per $channel, giorno $day. Spiacente."), $LANG]], 755 } 756 ); 757 } 758 else { 759 warn ($DEF_LANG eq 'eng' ? 760 "I couldn't fetch data for channel $channel, day $day from any backend!!\n" : 761 "Non sono riuscito a scaricare i dati per $channel, giorno $day da nessuna fonte!!\n") if (not $opt_quiet); 762 } 763 } 764 765 update $bar if not $opt_quiet; 766} 767$w->end; 768$bar->finish() if not $opt_quiet; 769 770##################### 771# general functions # 772##################### 773 774#################################################### 775# xmltv_chanid 776# to handle channels that are not yet in the channel_ids file 777sub xmltv_chanid { 778 my ($backend, $channel_id) = @_; 779 my %chan_ids; 780 781 #reverse id hash 782 foreach my $xmltv_id (keys %{$backend_info{$backend}{site_ids}}) { 783 my $site_id = $backend_info{$backend}{site_ids}{$xmltv_id}{site_id}; 784 $chan_ids{$site_id} = $xmltv_id; 785 next if (not defined $site_id); 786 } 787 788 if (defined $chan_ids{$channel_id}) { 789 return $chan_ids{$channel_id}; 790 } 791 else { 792 warn ($DEF_LANG eq 'eng' ? 793 "***Channel |$channel_id| for '$backend' is not in channel_ids, should be updated.\n" : 794 "***Il canale |$channel_id| su '$backend' non e' in channel_ids, andrebbe aggiornato.\n" 795 ) unless $opt_quiet; 796 my $oldid=$channel_id; 797 $channel_id=~ s/\W//gs; 798 799 #make up an id 800 my $id = lc($channel_id).".".$backend_info{$backend}{domain}; 801 #warn ("-->update: $id;$oldid\n"); 802 803 ##update backend info 804 #$backend_info{$backend}{site_ids}{$id}{site_id} = $channel_id; 805 return $id; 806 } 807 808 809} 810 811########################################################## 812# tidy 813# decodes entities and removes some illegal chars 814sub tidy($) { 815 for (my $tmp=shift) { 816 s/[\000-\037]//gm; # remove control characters 817 s/[\222]/\'/gm; # messed up char 818 s/[\224]/\"/gm; # end quote 819 s/[\205]/\.\.\./gm; # ... must be something messed up in my regexps? 820 s/[\223]/\"/gm; #start quote 821 s/[\221]/\'/gm; 822 s/\\\'/\'/gm; 823 #s/�/�/gm;# s/è/�/g;# s/�/\'/g;# s/è/�/g;# s/à/�/g;# s/ì/�/g;# s/�/\.\.\./g; #mah... 824 825 826 if (s/[\200-\237]//g) { 827 if ($opt_verbose){ 828 warn ($DEF_LANG eq 'eng' ? 829 "VERBOSE: removing illegal char: |\\".ord($&)."|\n" : 830 "VERBOSE: tolgo carattere illegale: |\\".ord($&)."|\n"); 831 } 832 } 833 834 # Remove leading white space 835 s/^\s*//; 836 # Remove trailing white space 837 s/\s*$//; 838 # FIXME handle a with a grave accent encoded as utf-8 (fallout from LWP::Simple?) 839 s/\xc3\xa0/\xe0/g; 840 return decode_entities($_); 841 } 842} 843 844 845#################################################### 846# xmltv_date 847# this returns a date formatted like 20021229121300 CET 848# first argument is time (like '14:20') 849# second is date offset from today 850sub xmltv_date { 851 my ($time, $offset) = @_; 852 853 $time =~/([0-9]+?):([0-9]+).*/ or die ($DEF_LANG eq 'eng' ? "bad time $time" : "strano orario $time"); 854 my $hour=$1; my $min=$2; 855 856 my $data = &DateCalc("today","+ ".$offset." days"); 857 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo data') if not defined $data; 858 return utc_offset(UnixDate($data, '%Y%m%d').$hour.$min.'00', '+0100'); 859} 860 861 862######################## 863# boingtv.it functions # 864######################## 865 866######################################################### 867# boingtv_get_channels_list 868# since this site only has one channel this is a fake sub 869sub boingtv_get_channels_list { 870 my %chan_hash = ( 'boingtv' ,'www.boingtv.it'); 871 872 return %chan_hash; 873} 874 875#################################################### 876# boingtv_fetch_data 877# 2 parameters: xmltv_id of channel 878# day offset 879# returns an error or an array of data 880sub boingtv_fetch_data { 881 my ($xmltv_id, $offset) = @_; 882 my $content; 883 884 my $site_id = $backend_info{boingtv}{site_ids}{$xmltv_id}{site_id}; 885 886 if (not defined $site_id) { 887 warn ($DEF_LANG eq 'eng' ? 888 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 889 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 890 return (1, ()); 891 } 892 893 # build url to grab 894 # very strange site: only has data till next sunday. if the offset it's too big we return an empty array 895 # but we don't return an error 896 my $day_of_week = UnixDate("today", '%w'); #1 (Monday) to 7 (Sunday) 897 if ($day_of_week + $offset > 7) { 898 return (0, ()); 899 } 900 901 my $date_grab = &DateCalc("today","+ ".$offset." days"); 902 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo di data') if not defined $date_grab; 903 $date_grab = UnixDate($date_grab, '%Y%m%d'); 904 905 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H") if ($offset == 0); 906 my $url = $backend_info{boingtv}{base_data}.$cachestring; 907 908 warn ($DEF_LANG eq 'eng' ? 909 "VERBOSE: fetching $url\n" : 910 "VERBOSE: scarico $url\n") if ($opt_verbose); 911 912 eval { $content=get_nice($url) }; 913 if ($@) { #get_nice has died 914 warn ($DEF_LANG eq 'eng' ? 915 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend boingtv\n" : 916 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte boingtv\n") if ($opt_verbose); 917 918 # Indicate to the caller that we had problems 919 return (1, ()); 920 } 921 922 my @programmes = (); 923 warn "VERBOSE: parsing...\n" if ($opt_verbose); 924 my @lines = split /\n/, $content; 925 my $title = ''; 926 my $time_start = ''; 927 my $description = ''; 928 #split the lines 929 foreach my $line (@lines) { 930 next unless $line=~/EVENT/; 931 $line=~/timestamp="(.*?)".*name="(.*?)".*description="(.*?)"/; 932 933 my %programme = (); 934 935 eval { 936 ($title, $time_start, $description) = ($2, utc_offset($1.'00', '+0100'), $3) ; 937 } or do { 938 warn 'skipping programme, error: ' . $@ ; 939 next ; 940 }; 941 942 # Three mandatory fields: title, start, channel. 943 if (not defined $title) { 944 warn 'no title found, skipping programme'; 945 next; 946 } 947 $programme{title}=[[tidy($title), $LANG] ]; 948 if (not defined $time_start) { 949 warn "no start time for title $title, skipping programme"; 950 next; 951 } 952 953 #dobbiamo buttare via quello che non ci interessa 954 next unless ($time_start=~/^$date_grab/); 955 956 $programme{desc}=[[tidy($description), $LANG] ] if ($description ne ''); 957 $programme{start}=$time_start;#xmltv_date($time_start, $offset + $past_midnight); 958 $programme{channel}=$xmltv_id; 959 960 #put info in array 961 push @programmes, {%programme}; 962 } 963 964 if (scalar @programmes) { 965 return (0, @programmes); 966 } 967 else { 968 # there is a number of reasons why we could get an empty array. 969 # so we return an error 970 return (1, @programmes); 971 } 972} 973 974######################## 975# mtv.it functions # 976######################## 977 978######################################################### 979# mtvit_get_channels_list 980# since this site only has one channel this is a fake sub 981sub mtvit_get_channels_list { 982 my %chan_hash = ( 'MTV' ,'www.mtv.it'); 983 984 return %chan_hash; 985} 986 987#################################################### 988# mtvit_fetch_data 989# 2 parameters: xmltv_id of channel 990# day offset 991# returns an error or an array of data 992sub mtvit_fetch_data { 993 my ($xmltv_id, $offset) = @_; 994 my $content; 995 996 my $site_id = $backend_info{mtvit}{site_ids}{$xmltv_id}{site_id}; 997 998 if (not defined $site_id) { 999 warn ($DEF_LANG eq 'eng' ? 1000 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 1001 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 1002 return (1, ()); 1003 } 1004 1005 # build url to grab 1006 # http://tv.mtv.it/guidatv.php?tvguidedate=2014-11-05 1007 my $grabdate = UnixDate(&DateCalc("today","+ ".$offset." days"), '%Y:%m:%d'); 1008 my ($anno, $mese, $giorno) = split /:/, $grabdate; 1009 1010 #my $url = $backend_info{mtvit}{base_data}.'?canaleSel=MTV&giorno_guid='.$giorno.'%2F'.$mese.'%2F'.$anno; 1011 #my $url = $backend_info{mtvit}{base_data}.'guidatv.php?tvguidedate='.$anno.'-'.$mese.'-'.$giorno; # http://tv.mtv.it/guidatv.php?tvguidedate=2015-06-19 1012 my $url = $backend_info{mtvit}{base_data}.'tv/guida-tv/'.$anno.'-'.$mese.'-'.$giorno; # http://www.mtv.it/tv/guida-tv/2015-06-21 1013 1014 warn ($DEF_LANG eq 'eng' ? 1015 "VERBOSE: fetching $url\n" : 1016 "VERBOSE: scarico $url\n") if ($opt_verbose); 1017 1018 eval { $content=get_nice($url) }; 1019 1020 if ($@) { #get_nice has died 1021 warn ($DEF_LANG eq 'eng' ? 1022 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend mtvit\n" : 1023 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte mtvit\n") if ($opt_verbose); 1024 1025 # Indicate to the caller that we had problems 1026 return (1, ()); 1027 } 1028 1029 #$content=~/colonna centrale(.*)colonna destra/s; $content=$1; 1030 #$content=~/<ol id=\"broadcasts\">(.*)?<div id=\"rightColumn\">/s; $content=$1; 1031 #$content=~/<ul class="video_carousel tv">(.*?)<\/ul>/s; $content=$1; 1032 $content=~/<ul class="tvguide canale1">(.*?)<\/ul>/s; $content=$1; 1033 $content=~s/[\n|\r]+//gm; 1034 my @programmes = (); 1035 warn "VERBOSE: parsing...\n" if ($opt_verbose); 1036 1037 my @lines = split /<li/, $content; 1038 1039 # <li class="hh5" data-mtv-epg-desc="Capodanno a Palm Springs. E' la notte di San Silvestro e Jay decide di portare la famiglia a festeggiare in un hotel di Palm Springs di cui ha buoni ricordi. Ma il locale non e' piu' come un tempo..." data-mtv-epg-title="Modern Family" data-mtv-epg-date='13:35' data-mtv-epg-dateend='14:00'><span class="date">13:35</span><b><span class="inside"><span>Modern Family</span></b></li> 1040 1041 #split the lines 1042 foreach my $line (@lines) { 1043 #next unless $line=~/span class=\'time\'/; 1044 #next unless $line=~/div class="time"/; 1045 next unless $line=~/span class="date"/; 1046 1047 my %programme = (); 1048 1049 # <span class='time'>07:00</span><h3>NEWS</h3><p>Le notizie del giorno una finestra sull'attualita' e la cronaca.</p></li> 1050 #$line=~/<span class=\'time\'>(.*?)<\/span><h3>(.*?)<\/h3><p>(.*?)<\/p>/; 1051 1052 # <div class="time">07:30</div>...<h3 class="ondemand">MTV News</h3><p>Il telegiornale di MTV con le notizie piu' importanti del giorno. #MTVNEWS</p> 1053 #$line=~/div class="time">(.*?)<\/div>.*?<h3.*?>(.*?)<\/h3>\s*<p>(.*?)<\/p>/; 1054 #my ($title, $time_start, $description) = ($2, $1, $3); 1055 #if ($title=~/<a href.*?>(.*?)<\/a/) {$title = $1;} 1056 1057 #$line=~/data-mtv-epg-desc="(.*?)".*?data-mtv-epg-title="(.*?)".*?data-mtv-epg-date=['"](\d\d:\d\d)?['"].*?data-mtv-epg-dateend=['"](\d\d:\d\d)?['"]/; 1058 (my $title) = $line=~ /data-mtv-epg-title=['"](.*?)['"]/; 1059 (my $time_start) = $line=~ /data-mtv-epg-date=['"](\d\d:\d\d)?['"]/; 1060 (my $description) = $line=~ /data-mtv-epg-desc=['"](.*?)['"]/; 1061 1062 # Three mandatory fields: title, start, channel. 1063 if (not defined $title) { 1064 warn 'no title found, skipping programme'; 1065 next; 1066 } 1067 $programme{title}=[[tidy($title), $LANG] ]; 1068 if (not defined $time_start) { 1069 warn "no start time for title $title, skipping programme"; 1070 next; 1071 } 1072 1073 $programme{desc}=[[tidy($description), $LANG] ] if ($description ne ''); 1074 $programme{start}=xmltv_date($time_start, $offset); 1075 1076 my $time_start2 = $time_start; 1077 $time_start2=~s/://; 1078 1079 #if ($time_start2 <700 and $time_start2>=0) { 1080 if ($time_start2 <730 and $time_start2>=0) { 1081 $programme{start}=xmltv_date($time_start, $offset + 1); 1082 } 1083 else { 1084 $programme{start}=xmltv_date($time_start, $offset); 1085 } 1086 1087 $programme{channel}=$xmltv_id; 1088 1089 #put info in array 1090 push @programmes, {%programme}; 1091 } 1092 1093 if (scalar @programmes) { 1094 return (0, @programmes); 1095 } 1096 else { 1097 # there is a number of reasons why we could get an empty array. 1098 # so we return an error 1099 return (1, @programmes); 1100 } 1101} 1102 1103######################## 1104# sitcom1.it functions # 1105######################## 1106 1107######################################################### 1108# sticom1_get_channels_list 1109# since this site only has one channel this is a fake sub 1110sub sitcom1_get_channels_list { 1111 my %chan_hash = ( 'sitcom1' ,'www.sitcom1.it'); 1112 1113 return %chan_hash; 1114} 1115 1116#################################################### 1117# sitcom1_fetch_data 1118# 2 parameters: xmltv_id of channel 1119# day offset 1120# returns an error or an array of data 1121sub sitcom1_fetch_data { 1122 my ($xmltv_id, $offset) = @_; 1123 my $content; 1124 1125 # date to grab 1126 my $grabdate = UnixDate(&DateCalc("today","+ ".$offset." days"), '%Y:%m:%d'); 1127 my ($anno, $mese, $giorno) = split /:/, $grabdate; 1128 1129 # build urls to grab 1130 my @urls; 1131 push @urls, $backend_info{sitcom1}{base_data}."?id=1&anno=$anno&mese=$mese&giorno=$giorno"; 1132 push @urls, $backend_info{sitcom1}{base_data}."?id=2&anno=$anno&mese=$mese&giorno=$giorno"; 1133 push @urls, $backend_info{sitcom1}{base_data}."?id=3&anno=$anno&mese=$mese&giorno=$giorno"; 1134 push @urls, $backend_info{sitcom1}{base_data}."?id=4&anno=$anno&mese=$mese&giorno=$giorno"; 1135 1136 1137 my $site_id = $backend_info{sitcom1}{site_ids}{$xmltv_id}{site_id}; 1138 1139 if (not defined $site_id) { 1140 warn ($DEF_LANG eq 'eng' ? 1141 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 1142 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 1143 return (1, ()); 1144 } 1145 1146 my @programmes = (); 1147 foreach my $url (@urls) { 1148 1149 warn ($DEF_LANG eq 'eng' ? 1150 "VERBOSE: fetching $url\n" : 1151 "VERBOSE: scarico $url\n") if ($opt_verbose); 1152 1153 eval { $content=get_nice($url) }; 1154 if ($@) { #get_nice has died 1155 warn ($DEF_LANG eq 'eng' ? 1156 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend sitcom1\n" : 1157 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte sitcom1\n") if ($opt_verbose); 1158 1159 # Indicate to the caller that we had problems 1160 return (1, ()); 1161 } 1162 1163 warn "VERBOSE: parsing...\n" if ($opt_verbose); 1164 1165 $content=~s/\n+//igm; 1166 $content=~s/\r+//igm; 1167 $content=~/\Q****** -->\E(.*)\Q<!-- ******\E/; 1168 $content = tidy($1); 1169 1170 my @lines = split /height=\"20\"/, $content; 1171 1172 #split the lines 1173 foreach my $line (@lines) { 1174 next if ($line!~/bgcolor/); 1175 $line=~/10%\".>.(.*?)<\/td.*?href=\"(.*?)\".*?B>(.*?)<\/B.*puntate(.*?)\'.*? >(.*?)<\/a/; 1176 #warn "||$1||$2||$3||$4||$5||\n"; 1177 1178 my %programme = (); 1179 my ($title, $time_start, $subtitle) = ($3, $1, $5); 1180 my $category; 1181 if ($title=~/Film|Tv movie/i) { 1182 $category = $title; 1183 $title = $subtitle; 1184 $subtitle = undef; 1185 } 1186 1187 # Three mandatory fields: title, start, channel. 1188 if (not defined $title) { 1189 warn 'no title found, skipping programme'; 1190 next; 1191 } 1192 $programme{title}=[[tidy($title), $LANG] ]; 1193 if (not defined $time_start) { 1194 warn "no start time for title $title, skipping programme"; 1195 next; 1196 } 1197 $time_start =~/(..).(..)/; my $time_start2 = "$1:$2"; 1198 1199 $programme{start}=xmltv_date($time_start2, $offset); 1200 $programme{'sub-title'}=[[tidy($subtitle), $LANG] ] if defined($subtitle); 1201 $programme{category}=[[tidy($category), $LANG ]] if defined $category; 1202 $programme{channel}=$xmltv_id; 1203 1204 #put info in array 1205 push @programmes, {%programme}; 1206 } 1207 } 1208 1209 if (scalar @programmes) { 1210 return (0, @programmes); 1211 } 1212 else { 1213 # there is a number of reasons why we could get an empty array. 1214 # so we return an error 1215 return (1, @programmes); 1216 } 1217} 1218 1219 1220######################## 1221# skylife.it functions # 1222######################## 1223 1224my %skylife_skip_ids; 1225#################################################### 1226# skylife_get_channels_list 1227sub skylife_get_channels_list { 1228 my %chan_hash; 1229 1230 #l'elenco delle categorie si potrebbe prendere in automatico da qui: http://static.sky.it/static/js/epg/init_grid.js 1231 my @categories = ('cinema', 'mondi', 'sport', 'news', 'bambini', 'musica', 'intrattenimento', 'primafila', 'hd'); 1232 1233 foreach my $url (@categories) { 1234 $url = $backend_info{skylife}{base_chan}.'grid_'.$url.'_channels.js'; 1235 1236 warn ($DEF_LANG eq 'eng' ? 1237 "VERBOSE: Getting channel list from $url\n" : 1238 "VERBOSE: Scarico la lista dei canali da $url\n") if ($opt_verbose); 1239 1240 my $content; 1241 eval { $content = get_nice($url); }; 1242 if ($@) { #get_nice has died 1243 warn ($DEF_LANG eq 'eng' ? 1244 "VERBOSE: Cannot get skylife's channel list ($url). Site \\n" : 1245 "VERBOSE: Non sono riuscito a prendere la lista dei canali di skylife ($url). Il sito non funziona?\n") unless ($opt_quiet); 1246 return (); 1247 } 1248 1249 $content=~s/[\r|\n]//igm; 1250 $content=~s/\s\s+//igm; 1251 my @canali = split /\{/, $content; 1252 foreach my $canale (@canali) { 1253 # "id":"101", "name":"Sky Cinema 1", "number":"301", "service":"101", "channellogo":"http://guidatv.sky.it/app/guidatv/images/epgimages/channels/grid/301_grid.gif", "channelvisore":"http://guidatv.sky.it/app/guidatv/images/epgimages/channels/visore/301_visore.gif", "logomsite":"http://guidatv.sky.it/app/guidatv/images/epgimages/channels/msite/LCh301_EPG.png" } , 1254 $canale=~/\"id\":\"(.*?)\",\"name\":\"(.*?)\",\"number\":\"(.*?)\",\"service\":\"(.*?)\",\"channellogo\":\"(.*?)\",\"channelvisore\":\"(.*?)\",\"logomsite\":\"(.*?)\"}/; 1255 next if (not defined $2); 1256 #print "|$1|$2|$3|$4|$5|$6|$7!\n"; 1257 1258 my $name = tidy($2); 1259 my $iconurl = $6; 1260 my $logo2 = $5; 1261 my $logo3 = $7; 1262 my $channum = $3; #dove metto il 3???chi si ricorda?? 1263 $chan_hash{$name} = "$1"; 1264 1265 #update backend info, in case this is a new channel not in channel_ids 1266 my $xmltv_id = xmltv_chanid('skylife', $name); 1267 $backend_info{skylife}{site_ids}{$xmltv_id}{site_id} = $name; 1268 $backend_info{skylife}{site_ids}{$xmltv_id}{icon} = $iconurl; 1269 $backend_info{skylife}{site_ids}{$xmltv_id}{channum} = $channum; 1270 $backend_info{skylife}{site_ids}{$xmltv_id}{chanid} = $chan_hash{$name}; 1271 } 1272 } 1273 return %chan_hash; 1274} 1275 1276sub skylife_fetch_data { 1277 my ($xmltv_id, $offset) = @_; 1278 my $content; 1279 my %mythweb_categories = ( 1280 'notiziario' => 'news', 1281 'rugby' => 'sport', 1282 'calcio' => 'sport', 1283 'golf' => 'sport', 1284 'wrestling' => 'sport', 1285 'hockey' => 'sport', 1286 'basket' => 'sport', 1287 'drammatico' => 'drama', 1288 'cartoni animati' => 'children', 1289 'animazione' => 'children', 1290 'soap opera' => 'soaps', 1291 'romantico' => 'romance', 1292 'fantastico' => 'fantasy', 1293 'commedia' => 'comedy', 1294 'azione' => 'actions', 1295 'avventura' => 'actions', 1296 'fantascienza' => 'fantasy', 1297 'thriller' => 'crime', 1298 'guerra' => 'war', 1299 'film' => 'movie', 1300 'scienza' => 'science', 1301 'tecnologia' => 'science', 1302 'natura' => 'nature', 1303 'viaggi' => 'travel', 1304 'magazine viaggi' => 'travel', 1305 'storia' => 'history', 1306 'gioco' => 'game', 1307 'cucina' => 'food', 1308 ); 1309 1310 my $site_id = $backend_info{skylife}{site_ids}{$xmltv_id}{site_id}; 1311 if (not defined $site_id) { 1312 warn ($DEF_LANG eq 'eng' ? 1313 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 1314 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 1315 return (1, ()); 1316 } 1317 1318 # Indicate to the caller if we have problems (channels disappearing from the site maybe) 1319 return (1, ()) if (not defined ($backend_info{skylife}{channels}{$site_id})); 1320 1321 # build url to grab 1322 my $url = $backend_info{skylife}{base_data}.UnixDate(&DateCalc("today","+ ".$offset." days"), '%y_%m_%d').'/ch_'.$backend_info{skylife}{site_ids}{$xmltv_id}{chanid}.'.js'; 1323 1324 #as with other grabber we trick memoize into not caching data 1325 #however, we do this only for the first day, other days use cache 1326 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H") if ($offset == 0); 1327 1328 my %prog_to_check = (); 1329 my $lastid; 1330 1331 my $grabdate = UnixDate(&DateCalc("today","+ ".$offset." days"), '%Y%m%d'); 1332 $url.=$cachestring if (($offset) == 0); 1333 1334 warn "VERBOSE: fetching $url\n" if ($opt_verbose); 1335 1336 eval { $content=get_nice($url) }; 1337 if ($@) { #get_nice has died 1338 warn ($DEF_LANG eq 'eng' ? 1339 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend skylife\n" : 1340 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte skylife\n") if ($opt_verbose); 1341 1342 # Indicate to the caller that we had problems 1343 return (1, ()); 1344 } 1345 1346 warn "VERBOSE: parsing...\n" if ($opt_verbose); 1347 1348 #split and parse the lines 1349 my @lines = split /{\"id\":/, $content; 1350 foreach my $line (@lines) { 1351 next if ($line=~/{\"channel\"/); 1352 1353 my ($id, $pid, $start, $durata, $title, $title2, $desc, $cat, $cat2, $prima); 1354 if ($line=~/\"(\S+?)\",\s+\"pid\":\"(.*?)\",\s+\"starttime\":\"(.*?)\",\s+\"dur\":\"(\d+?)\",\s+\"title\":\"(.*?)\",\s+\"normalizedtitle\":\s*\"(.*?)\",\s+\"desc\":\"(.*?)\",\s+\"genre\":\"(.*?)\",\s+\"subgenre\":\"(.*)\",\s+\"prima\":(.*)/m){ 1355 ($id, $pid, $start, $durata, $title, $title2, $desc, $cat, $cat2, $prima) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); 1356 } 1357 else { 1358 warn "errore in $line\n"; 1359 next; 1360 } 1361 next if ($title eq ''); 1362 1363 if (defined $id and defined $lastid) { 1364 my $this_start = $start; $this_start=~s/\:/\./; 1365 my $last_start = $prog_to_check{$lastid}->[1]; $last_start=~/.*? (..):(..)$/; $last_start="$1.$2"; 1366 last if ($this_start < $last_start) #they can work as decimals, 0.32 < 23.44 1367 # $skylife_skip_ids{$id}++; 1368 } 1369 1370 #if this programs starts at 00:00 maybe it could have actually started yesterday 1371 #this isn't usually a problem unless --offest is set 1372 #in this case we skip the programme entirely in order not to give possibly false information 1373 #(ok, this is basically a hack to keep the xmltv validator happy, else it complains with 'not-additive') 1374 next if($start eq '00:00' and $opt_offset and not defined $lastid); 1375 1376 #warn "|$id|$pid|$start|$durata|$title|$title2|$desc|$cat|$cat2|$prima|\n"; 1377 #($id, $pid, $start, $durata, $title, $title2, $desc, $cat, $cat2, $prima) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); 1378 $prog_to_check{$id} = [$title, $grabdate." ".$start, $durata, $title, $title2, $desc, $cat, $cat2, $prima, $pid, $id] unless $skylife_skip_ids{$id}; 1379 $lastid=$id; 1380 } 1381 1382 my @programmes = (); 1383 foreach (sort keys %prog_to_check) { 1384 my %programme = (); 1385 my ($start, $durata, $title, $title2, $desc, $cat, $cat2, $prima, $pid, $id); 1386 ($title, $start, $durata, $title, $title2, $desc, $cat, $cat2, $prima, $pid, $id) = @{$prog_to_check{$_}}; 1387 $programme{title} = [[tidy($title), $LANG] ]; 1388 $programme{start} = utc_offset(UnixDate($start, '%Y%m%d%H%M').'00', '+0100'); 1389 $programme{stop} = utc_offset(UnixDate(&DateCalc("$start","+ $durata minutes"), '%Y%m%d%H%M').'00', '+0100'); 1390 $programme{channel} = $xmltv_id; 1391 $cat2=$mythweb_categories{lc(tidy($cat2))} if (defined $mythweb_categories{lc(tidy($cat2))} and $opt_mythweb_categories); 1392 $cat=$mythweb_categories{lc(tidy($cat))} if (defined $mythweb_categories{lc(tidy($cat))} and $opt_mythweb_categories); 1393 1394 push (@{$programme{category}}, [tidy($cat2), $LANG ]) if (defined $cat2 and tidy($cat2) ne ''); 1395 push (@{$programme{category}}, [tidy($cat), $LANG ]) if (defined $cat and tidy($cat) ne ''); 1396 $programme{desc}=[[tidy($desc), $LANG ]] if ($desc ne ''); 1397 #FIXME$programme{premiere} = ['prima TV', $LANG ] if ($prima ne 'false'); 1398 skylife_parse_data_slow($desc, $title2, $cat, $cat2, $id, $pid, \%programme); 1399 push @programmes, {%programme} if ($start and $title); 1400 } 1401 1402 if (scalar @programmes) { 1403 return (0, @programmes); 1404 } 1405 else { 1406 # there is a number of reasons why we could get an empty array. 1407 # so we return an error 1408 return (1, @programmes); 1409 } 1410} 1411 1412sub skylife_parse_data_slow { 1413 my ($content, $title2, $cat, $cat2, $id, $pid, $programme) = @_; 1414 1415 my $desc = $content; 1416 my $slowurl; 1417 1418 #if ($opt_slow and tidy($cat) ne '' and tidy($cat2) ne '') { 1419 if ($opt_slow) { 1420# queste categorie sono nel .js di sky ma in realta' la descrizione c'e' per quasi tutti 1421# if (($cat eq 'intrattenimento') and 1422# ($cat2 eq 'fiction' or 1423# $cat2 eq 'animazione' or 1424# $cat2 eq 'sit com' or 1425# $cat2 eq 'telefilm' or 1426# $cat2 eq 'soap opera' or 1427# $cat2 eq 'telenovela')){ 1428 $slowurl='http://guidatv.sky.it/EpgBackend/event_description.do?eid='.$id; 1429 warn "VERBOSE: fetching $slowurl\n" if ($opt_verbose); 1430 eval { $content=get_nice($slowurl) }; 1431 if ($@) { #get_nice has died 1432 warn ($DEF_LANG eq 'eng' ? 1433 "VERBOSE: Error fetching $slowurl\n" : 1434 "VERBOSE: Errore nello scaricare $slowurl\n") if ($opt_verbose); 1435 # Indicate to the caller that we had problems 1436 return (1, ()); 1437 } 1438 $content=~/description\":\"(.*?)\"/; 1439 $desc = $1; 1440 } 1441#} 1442 $desc = '' if ($id eq '-1'); 1443 if ($opt_slow and ($cat2 eq ' ' and $content eq '')){ 1444 #potrebbe essere vuoto, passiamo alla pagina generica 1445 $cat=~s/ //igm; 1446 #$cat='altriprogrammi' if ($cat eq 'altri programmi'); 1447 $cat2='generico' if ($cat2 eq ' '); 1448 #provo con l'altro url 1449 $slowurl = $backend_info{skylife}{base_slow}.$cat.'/'.$cat2.'/'.$title2.'_'.$pid.'.shtml'; 1450 warn "VERBOSE: fetching $slowurl\n" if ($opt_verbose); 1451 eval { $content=get_nice($slowurl) }; 1452 if ($@) { #get_nice has died 1453 #non diamo errori, potrebbe non esistere l'url 1454 return (1, ()); 1455 } 1456 $content=~s/\n//igm; 1457 if ($content =~/\"episodio\".*?<div class=\"testo\">(.*?)<\/div>(.*)/igm){ 1458 $content = $1; 1459 $desc = $content; 1460 } 1461 elsif ($content =~/<div class=\"testo\"><p>(.*?)<\/p><\/div>(.*)/igm){ 1462 $content = $1; 1463 $content =~s/<br \/>/\n/igm; 1464 $desc = tidy($content); 1465 } 1466 elsif ($content eq ''){ 1467 #give up 1468 warn "niente da fare programma vuoto $title2 $id $pid $slowurl...\n-- "; 1469 return; 1470 } 1471 if ($content=~/<div class=\"__pilat\">(.*)/){ 1472 $desc = $1; 1473 } 1474 #warn $desc; 1475 } 1476 1477 1478 my ($cast, $country, $director, $year, $length, $subtitle, $episode, $season, $prossima, $fulldesc, $filmcat); 1479 $desc=~s/\\\'/\'/igm; 1480 1481 if ($desc=~/(.*?)\' Stagione - Ep.(\d+?) - (.*)/) { 1482 $season = $1; 1483 $episode =$2; 1484 $desc = $3 if ($3 ne ''); 1485 } 1486 elsif ($desc=~/(.*?)\' Stagione Episodio (\d+?) - (.*)/) { 1487 $season = $1; 1488 $episode =$2; 1489 $desc = $3 if ($3 ne ''); 1490 } 1491 elsif ($desc=~/(.*?)\' Stagione Ep.(\d+?) -(.*)/) { 1492 $season = $1; 1493 $episode =$2; 1494 $desc = $3 if ($3 ne ''); 1495 } 1496 1497 if ($desc=~/(.*?) - (.*)/) { 1498 $subtitle = $1 if ($1 ne '' and $1 ne $programme->{title}); 1499 $desc = $2 if ($2 ne ''); 1500 1501 if ($subtitle=~/(.*?)\\\' Stagione/){$season = $1;} 1502 if ($subtitle=~/Ep.(\d+)/) {$episode = $1;} 1503 $subtitle='' if ($season or $episode); 1504 } 1505 $desc=~s/^\s+//; 1506 1507 1508 if ($desc=~/^\'(.*?)\' (.*)/) { 1509 $subtitle.= ' - ' if ($subtitle); 1510 $subtitle= $1 if ($1 ne '' and $1 ne $programme->{title}); 1511 $desc = $2 if ($2 ne ''); 1512 } 1513 1514 my $strseason = ''; 1515 $strseason.= 'Stagione '.$season if ($season); 1516 if ($episode and $season){ 1517 $strseason.= ' Episodio '.$episode ; 1518 } 1519 elsif ($episode) { 1520 $strseason.= 'Episodio '.$episode ; 1521 } 1522 1523 if ($strseason ne '' and $subtitle){ 1524 $subtitle="$strseason - ".$subtitle ; 1525 } 1526 elsif ($strseason ne '') { 1527 $subtitle=$strseason; 1528 }; 1529 1530 $fulldesc = $desc; 1531 if ($cat eq 'film'){ 1532 if ($desc=~/(.*) (Prox:.*)$/) { 1533 $desc = $1; 1534 $prossima = $2; 1535 } 1536 } 1537 1538 if ($desc=~/(.*)\. (\w+)\. \((\d+)\'\) Di (.*?). Con (.*?) \(([A-Z]+) (\d+?)\)$/) { 1539 $filmcat = $2; 1540 $length = $3; 1541 $director = $4; 1542 $cast = $5; 1543 $country = $6; 1544 $year = $7; 1545 $desc = $1 || ''; 1546 } 1547 elsif ($desc=~/Regia di (.*?), con (.*?); (.*?) (\d+?)\.(.*)/) { 1548 $director = $1; 1549 $cast = $2; 1550 $country = $3; 1551 $year = $4; 1552 my $length = $5; 1553 $desc = $6 || ''; 1554 } 1555 elsif ($desc=~/Regia di (.*?), con (.*?); (.*?) (\d+?) \((\d+) min\)\. (.*)/) { 1556 $director = $1; 1557 $cast = $2; 1558 $country = $3; 1559 $year = $4; 1560 my $length = $5; 1561 $desc = $6 || ''; 1562 } 1563 elsif ($desc=~/^(\d+)\. Con ([A-Z].*?)\.(.*)/) { 1564 $year = $1; 1565 $cast = $2; 1566 $desc = $3 || ''; 1567 } 1568 elsif ($desc=~/^Con ([A-Z].*?)\. (.*)/) { 1569 $cast = $1; 1570 $desc = $2 || ''; 1571 } 1572 1573 #tricky one 1574 if ($desc=~/^con (.*?)\. (.*)/) { 1575 $desc = $2; 1576 $cast = $1; 1577 if ($cast=~/(.*?); (.*)/) { 1578 $cast = $1; 1579 $country = $2; 1580 } 1581 } 1582 1583 1584 if ($cast) { 1585 my $lastcast; 1586 ($cast, $lastcast) = split / e /, $cast; 1587 my @cast = split /,/, $cast; push @cast, $lastcast if ($lastcast); 1588 foreach (@cast) { 1589 s/^\s+//; s/\s+$//; 1590 (push @{$programme->{credits}->{actor}}, $_); 1591 } 1592 } 1593 1594 $content=~s/[\n|\r]+//gm; 1595 undef $season if (defined $season and $season!~/\d+/); 1596 1597 $programme->{length}= $length*60 if ($length); 1598 $programme->{date}= $year if ($year); 1599 $programme->{'sub-title'}=[[$subtitle, $LANG] ] if ($subtitle); 1600 push@{$programme->{'episode-num'}}, [$strseason, 'onscreen'] if ($strseason); 1601 push@{$programme->{'episode-num'}}, [(defined $season ? ($season-1) : '').".".(defined $episode ? ($episode-1) : '').".0/1", 'xmltv_ns'] if ($strseason); 1602 #push@{$programme->{category}}, [tidy($filmcat), $LANG ] if (tidy($filmcat) ne ''); 1603 1604 push @{$programme->{credits}->{director}}, $director if ($director); 1605 push (@{$programme->{country}}, [$country, $LANG]) if ($country); 1606 $programme->{desc}=[[tidy($fulldesc), $LANG ]] if ($fulldesc ne ''); 1607} 1608 1609 1610######################### 1611# raisat.it functions # 1612######################### 1613 1614#################################################### 1615# raisat_get_channels_list 1616sub raisat_get_channels_list { 1617 my %chan_hash; 1618 1619 ############################################### 1620 #putroppo il file che dovrebbe darci gli id dei canali in realta' li ha sbagliati, e manca rai4 1621 #per adesso li ritorno a mano, forse in futuro la parte sotto funzionera' correttamente 1622 #in realta' e' cosi' 1623 #1=raisat extra #2=raisat premium 1624 #3=cinema #4=gambero rosso 1625 #6=yoyo #7=smash 1626 #8=rai4 1627 %chan_hash = ("EXTRA", 1, 1628 "PREMIUM", 2, 1629 "CINEMA", 3, 1630 "GAMBERO ROSSO", 4, 1631 "YOYO", 6, 1632 "SMASH", 7, 1633 "RAI4", 8); 1634 foreach (keys %chan_hash) { 1635 my $xmltv_id = xmltv_chanid('raisat', $_); 1636 $backend_info{raisat}{site_ids}{$xmltv_id}{site_id} = $_; 1637 $backend_info{raisat}{site_ids}{$xmltv_id}{icon} = 'http://www.raisat.it/canali/loghi/'.$chan_hash{$_}.'.jpg'; 1638 } 1639 return %chan_hash; 1640 ################################################ fine fake sub 1641 my $base = shift; 1642 1643 my $content; 1644 warn ($DEF_LANG eq 'eng' ? 1645 "VERBOSE: Getting channel list from $base\n" : 1646 "VERBOSE: Scarico la lista dei canali da $base\n") if ($opt_verbose); 1647 1648 eval { $content = get_nice($base); }; 1649 if ($@) { #get_nice has died 1650 warn ($DEF_LANG eq 'eng' ? 1651 "VERBOSE: Cannot get raisat's channel list ($base). Site down?\n" : 1652 "VERBOSE: Non sono riuscito a prendere la lista dei canali di raisat ($base). Il sito non funziona?\n" 1653 ) unless ($opt_quiet); 1654 return (); 1655 } 1656 1657 my @lines = split /<canale/, $content; 1658 foreach my $l (@lines) { 1659 if ($l=~/colore=\"(.*?)\".*id=\"(.*?)\" label=\"(.*?)\".*? logo=\"(.*?)\"/){ 1660 next unless ($1); 1661 my ($r_id, $r_name, $r_logo) = ($2, $3, $backend_info{raisat}{rturl}.$4); 1662 $chan_hash{$r_name}=$r_id; 1663 #print "|$1|$2|$3|$4|\n"; 1664 1665 #update backend info, in case this is a new channel not in channel_ids 1666 my $xmltv_id = xmltv_chanid('raisat', $r_name); 1667 $backend_info{raisat}{site_ids}{$xmltv_id}{site_id} = $r_name; 1668 $backend_info{raisat}{site_ids}{$xmltv_id}{icon} = $r_logo; 1669 } 1670 1671 } 1672 1673 return %chan_hash; 1674} 1675 1676#################################################### 1677# raisat_fetch_data 1678# 2 parameters: xmltv_id of channel 1679# day offset 1680# returns an error or an array of data 1681 1682sub raisat_fetch_data { 1683 my ($xmltv_id, $offset) = @_; 1684 my $content; 1685 1686 my $site_id = $backend_info{raisat}{site_ids}{$xmltv_id}{site_id}; 1687 1688 if (not defined $site_id) { 1689 warn ($DEF_LANG eq 'eng' ? 1690 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 1691 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 1692 return (1, ()); 1693 } 1694 1695 my %chan = %{$backend_info{raisat}{channels}}; 1696 my $channel_name = $backend_info{raisat}{site_ids}{$xmltv_id}{site_id}; 1697 my $channel_num = $chan{$channel_name}; 1698 # build url to grab" 1699 1700 if (not defined $channel_num) { 1701 # if we get here it means that the site should have the channel (it's in channel_ids) 1702 # but for some reason we are missing it's site id (probably the site is down) 1703 # we return an error so that another backend will by used, if possible 1704 warn ($DEF_LANG eq 'eng' ? 1705 "VERBOSE: \tThis site appears to be down!\n" : 1706 "VERBOSE: \tQuesto sito non sembra funzionare!!\n") if ($opt_verbose); 1707 return (1, ()); 1708 } 1709 1710 my $date_grab = &DateCalc("today","+ ".$offset." days"); 1711 1712 1713 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo di data') if not defined $date_grab; 1714 $date_grab = UnixDate($date_grab, '%d/%m/%Y'); 1715 1716 my $url = $backend_info{raisat}{base_data}.'?id='.$channel_num.'&dat='.$date_grab; 1717 1718 #to trick memoize into not caching data with add a string to the url, based on time, with hourly resolution 1719 #so if we redownload data within 5 minutes (we are within the same run) it comes from the cache 1720 #but if we download it tomorrow it doesn't. 1721 #this makes sense if you use the --cache option and you want to cache only the --slow data, to speed up things 1722 #when you grab data every two-three days, but you don't want to miss schedule changes 1723 #this string is ignored by the server 1724 if ($opt_cache_slow) { 1725 my $cachestring = "&pippo=".UnixDate("today","%Y%m%d%H"); 1726 $url.=$cachestring; 1727 } 1728 warn ($DEF_LANG eq 'eng' ? 1729 "VERBOSE: fetching $url\n" : 1730 "VERBOSE: scarico $url\n") if ($opt_verbose); 1731 1732 eval { $content=get_nice($url) }; 1733 if ($@) { #get_nice has died 1734 warn ($DEF_LANG eq 'eng' ? 1735 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend raisat\n" : 1736 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte raisat\n") if ($opt_verbose); 1737 1738 # Indicate to the caller that we had problems 1739 return (1, ()); 1740 } 1741 1742 my @programmes = (); 1743 warn "VERBOSE: parsing...\n" if ($opt_verbose); 1744 1745 $content=~/<palinsesto>(.*)<\/palinsesto>/s; 1746 $content = $1; 1747 1748 #split and parse 1749 my @p = split /<prog/, $content; 1750 foreach my $pr (@p) { 1751 #print $pr."-----------------\n"; 1752 my @lines = split /\n/, $pr; 1753 my %programme = (); 1754 my ($title, $time_start, $time_end, $id, $anno, $nazione, $regia, $desc, $cast, $epnum, $subtitle); 1755 1756 my $lastline; 1757 foreach my $l(@lines) { 1758 next unless ($l=~/\w/); 1759 next if ($l=~/<dati>|<\/dati>|<\/prog>/); 1760 if (($l=~/<descrizione>/ and $l!~/<\/descrizione>/) or ($l=~/<cast>/ and $l!~/<\/cast>/)) { 1761 $lastline = $l; 1762 next; 1763 } 1764 #descrizione e cast possono essere lunghe 1765 if (($l=~/<\/descrizione>/ and $l!~/<descrizione>/) or ($l=~/<\/cast>/ and $l!~/<cast>/)) { 1766 chop($lastline); 1767 $l = $lastline.$l; 1768 #warn "concatenato, ||$l||\n"; 1769 } 1770 #warn "||$_!!\n"; 1771 if ($l=~/mat=\"(.*?)\" ora=\"(.*?)\">/) { 1772 $id = $1; 1773 $time_start=$2; 1774 #warn "id: $id\n"; 1775 #warn "time_start: $time_start\n"; 1776 } 1777 elsif ($l=~/<titolo><\!\[CDATA\[(.*?)\]\]><\/titolo>/){ 1778 $title=$1; 1779 next unless ($title); 1780 if ($title=~/^(.*) - (.*Ep\..*)$/) { 1781 $title = $1; 1782 $subtitle = $2; 1783 $subtitle=~s/^\s+//; 1784 } 1785 elsif ($title=~/^(.*) (.*Ep\..*)$/) { 1786 $title = $1; 1787 $subtitle = $2; 1788 $subtitle=~s/^\s+//; 1789 } 1790 1791 #print "titolo: $title\n"; 1792 } 1793 elsif ($l=~/<descrizione><\!\[CDATA\[(.*?)\]\]><\/descrizione>/){ 1794 $desc = $1; 1795 #warn "descrizione: $desc\n"; 1796 } 1797 elsif ($l=~/<regia><\!\[CDATA\[(.*?)\]\]><\/regia>/){ 1798 $regia=$1; 1799 #warn "regia: $1\n"; 1800 } 1801 elsif ($l=~/<anno><\!\[CDATA\[(.*?)\]\]><\/anno>/){ 1802 $anno=$1; 1803 #print "anno: $1\n"; 1804 } 1805 elsif ($l=~/<nazione><\!\[CDATA\[(.*?)\]\]><\/nazione>/){ 1806 $nazione=$1; 1807 #print "nazione: $1\n"; 1808 } 1809 elsif ($l=~/<cast><\!\[CDATA\[(.*?)\]\]><\/cast>/){ 1810 $cast = $1; 1811 #warn "cast: $1\n"; 1812 } 1813 1814 1815 #elsif (/<><\!\[CDATA\[(.*?)\]\]><\/>/){ 1816 #} 1817 else { 1818 warn "linea sconosciuta $l \n"; 1819 } 1820 $lastline = $l; 1821 } 1822 next unless $title; 1823 # Three mandatory fields: title, start, channel. 1824 if (not defined $title) { 1825 warn ($DEF_LANG eq 'eng' ? 'no title found, skipping programme' : 'titolo non trovato, salto'); 1826 next; 1827 } 1828 $programme{title}=[[tidy($title), $LANG] ]; 1829 if (not defined $time_start) { 1830 warn ($DEF_LANG eq 'eng' ? "no start time for title $title, skipping programme" : "nessun orario di inizio per $title, salto"); 1831 next; 1832 } 1833 #$time_start=~s/://; 1834 $programme{start}=xmltv_date($time_start, $offset); 1835 if (not defined $xmltv_id) { 1836 warn ($DEF_LANG eq 'eng' ? "no channel for programme $title at $time_start, skipping programme" : "canale non trovato per $title alle $time_start, salto"); 1837 next; 1838 } 1839 1840 $programme{channel}=$xmltv_id; 1841 $programme{title} = [[tidy($title), $LANG] ]; 1842 $programme{date}= $anno if ($anno); 1843 $programme{'sub-title'}=[[$subtitle, $LANG] ] if ($subtitle); 1844 1845 push (@{$programme{country}}, [$nazione, $LANG]) if ($nazione); 1846 $programme{desc}=[[tidy($desc), $LANG ]] if (defined $desc and $desc ne ''); 1847 if (defined $cast and $cast ne '') { 1848 my @actors = split /, /, $cast; 1849 foreach (@actors) { 1850 push @{$programme{credits}{actor}}, $_; 1851 } 1852 } 1853 1854 if (defined $regia and $regia ne '') { 1855 if ($regia=~/,/) { 1856 my @directors = split /, /, $regia; 1857 foreach (@directors) { 1858 push @{$programme{credits}{director}}, $_; 1859 } 1860 } 1861 else { 1862 push @{$programme{credits}{director}}, $regia; 1863 } 1864 } 1865 1866 #put info in array 1867 push @programmes, {%programme}; 1868 } 1869 1870 if (scalar @programmes) { 1871 return (0, @programmes); 1872 } 1873 else { 1874 # there is a number of reasons why we could get an empty array. 1875 # so we return an error 1876 return (1, @programmes); 1877 } 1878} 1879 1880############################## 1881# mediasetpremium functions # 1882############################## 1883 1884#################################################### 1885# mediasetpremium_get_channels_list 1886sub mediasetpremium_get_channels_list { 1887 my %chan_hash; 1888 1889 ############################################### 1890 # l'unico url che ho trovato che contiene l'elenco di tutti i canali e' 1891 # http://www.mediasetpremium.mediaset.it/export/palinsesto.xml 1892 # pero' e' un file di circa 1.5 mega, visto che questa funzione viene chiamata ad ogni 1893 # esecuzione del grabber preferisco fare anche qui una finta funzione che mi 1894 # ritorna un valore fisso 1895 1896 1897 %chan_hash = ("Premium Calcio 1", "K1", 1898 "Premium Calcio 2", "K2", 1899 "Premium Calcio 3", "K3", 1900 "Premium Calcio 4", "K4", 1901 "Premium Calcio 5", "K5", 1902 "Premium Calcio 6", "K6", 1903 "Disney +1", "DZ", 1904 "Disney", "DY", 1905 "Joi +1", "KW", 1906 "Joi", "KJ", 1907 "Mya +1", "KZ", 1908 "Mya", "KD", 1909 "Premium Calcio", "KC", 1910 "Steel +1", "KY", 1911 "Premium Action", "KS", 1912 "Playhouse", "KP", 1913 "Cartoon Network", "KN", 1914 "Hiro", "KU", 1915 "Cartoon Network", "KN", 1916 "Premium Cinema", "KE", 1917 "Studio Universal", "KR", 1918 "Premium Cinema Emotion", "KO", 1919 "Premium Cinema Energy", "KG", 1920 "Premium Extra", "K8", 1921 "Premium Extra 2", "K9", 1922 "BBC Knowledge", "EB", 1923 "Discovery World", "ED", 1924 "Disney Junior", "KP", 1925 "Premium Cinema Comedy", "LC", 1926 "Premium Crime", "LR", 1927 ); 1928 1929 foreach (keys %chan_hash) { 1930 my $xmltv_id = xmltv_chanid('mediasetpremium', $_); 1931 $backend_info{mediasetpremium}{site_ids}{$xmltv_id}{site_id} = $_; 1932 } 1933 return %chan_hash; 1934 ################################################ fine fake sub 1935 1936 1937 my $base = shift; 1938 1939 my $content; 1940 warn ($DEF_LANG eq 'eng' ? 1941 "VERBOSE: Getting channel list from $base\n" : 1942 "VERBOSE: Scarico la lista dei canali da $base\n") if ($opt_verbose); 1943 1944 eval { $content = get_nice($base); }; 1945 if ($@) { #get_nice has died 1946 warn ($DEF_LANG eq 'eng' ? 1947 "VERBOSE: Cannot get mediasetpremium's channel list ($base). Site down?\n" : 1948 "VERBOSE: Non sono riuscito a prendere la lista dei canali di mediasetpremium ($base). Il sito non funziona?\n" 1949 ) unless ($opt_quiet); 1950 return (); 1951 } 1952 1953 my @lines = split /<canale description/, $content; 1954 my %seen; 1955 foreach my $l (@lines) { 1956 if ($l=~/\=\"(.*?)\" id\=\"(.*?)\">/){ 1957 print "|$1|$2|\n" unless $seen{$1}++; 1958 my ($id, $name) = ($2, $1); 1959 $chan_hash{$name}=$id; 1960 1961 #update backend info, in case this is a new channel not in channel_ids 1962 my $xmltv_id = xmltv_chanid('mediasetpremium', $name); 1963 $backend_info{mediasetpremium}{site_ids}{$xmltv_id}{site_id} = $name; 1964 }; 1965 } 1966 1967 return %chan_hash; 1968} 1969 1970#################################################### 1971# mediasetpremium_fetch_data 1972# 2 parameters: xmltv_id of channel 1973# day offset 1974# returns an error or an array of data 1975 1976sub mediasetpremium_fetch_data { 1977 my ($xmltv_id, $offset) = @_; 1978 my $content; 1979 1980 my $site_id = $backend_info{mediasetpremium}{site_ids}{$xmltv_id}{site_id}; 1981 1982 if (not defined $site_id) { 1983 warn ($DEF_LANG eq 'eng' ? 1984 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 1985 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 1986 return (1, ()); 1987 } 1988 1989 my %chan = %{$backend_info{mediasetpremium}{channels}}; 1990 my $channel_name = $backend_info{mediasetpremium}{site_ids}{$xmltv_id}{site_id}; 1991 my $channel_num = $chan{$channel_name}; 1992 # build url to grab" 1993 1994 if (not defined $channel_num) { 1995 # if we get here it means that the site should have the channel (it's in channel_ids) 1996 # but for some reason we are missing it's site id (probably the site is down) 1997 # we return an error so that another backend will by used, if possible 1998 warn ($DEF_LANG eq 'eng' ? 1999 "VERBOSE: \tThis site appears to be down!\n" : 2000 "VERBOSE: \tQuesto sito non sembra funzionare!!\n") if ($opt_verbose); 2001 return (1, ()); 2002 } 2003 2004 my $date_grab = &DateCalc("today","+ ".$offset." days"); 2005 2006 2007 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo di data') if not defined $date_grab; 2008 $date_grab = UnixDate($date_grab, '%Y%m%d'); 2009 2010 my $url = $backend_info{mediasetpremium}{base_data}.'-'.$date_grab.'-'.lc($channel_num).'.xml'; 2011 2012 #to trick memoize into not caching data with add a string to the url, based on time, with hourly resolution 2013 #so if we redownload data within 5 minutes (we are within the same run) it comes from the cache 2014 #but if we download it tomorrow it doesn't. 2015 #this makes sense if you use the --cache option and you want to cache only the --slow data, to speed up things 2016 #when you grab data every two-three days, but you don't want to miss schedule changes 2017 #this string is ignored by the server 2018 if ($opt_cache_slow) { 2019 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H"); 2020 $url.=$cachestring; 2021 } 2022 warn ($DEF_LANG eq 'eng' ? 2023 "VERBOSE: fetching $url\n" : 2024 "VERBOSE: scarico $url\n") if ($opt_verbose); 2025 2026 eval { $content=get_nice($url) }; 2027 if ($@) { #get_nice has died 2028 warn ($DEF_LANG eq 'eng' ? 2029 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend mediasetpremium\n" : 2030 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte mediasetpremium\n") if ($opt_verbose); 2031 2032 # Indicate to the caller that we had problems 2033 return (1, ()); 2034 } 2035 2036 my @programmes = (); 2037 warn "VERBOSE: parsing...\n" if ($opt_verbose); 2038 2039 #split and parse 2040 my @p = split /<prg/, $content; 2041 foreach my $pr (@p) { 2042 next unless $pr=~/ orafine=\"(.*?)\" orainizio=\"(.*?)\" primaTv=\"(.*?)\" replica=\"(.*?)\"/; 2043 #warn $pr."-----------------\n"; 2044 #warn "|$1|$2|$3|$4|\n"; 2045 my @lines = split /></, $pr; 2046 2047 my %programme = (); 2048 my ($title, $time_start, $time_end, $rerun, $first, $id, $anno, $nazione, 2049 $desc, $epnum, $subtitle, $category, $rating, $audio, $doppioaudio, $sottotitoli); 2050 2051 ($time_start, $time_end, $rerun, $first) = ($2, $1, $3, $4); 2052 2053 2054 2055 my $lastline; 2056 foreach my $l(@lines) { 2057 next if ($l=~/anno\/|paese\/|linkScheda|trafficLight\/|orafine|\!--sort|\!-- canale|\/durata|\/prg|\/canale|\/giorno|\/palinsesto/); 2058 if ($l=~/titolo>(.*?)<\/titolo/){ 2059 $title=$1; 2060 #warn "titolo: $title\n"; 2061 } 2062 elsif ($l=~/descrizione>(.*?)<\/descrizione/){ 2063 $subtitle = $1; 2064 #warn "descrizione: $desc\n"; 2065 } 2066 elsif ($l=~/anno>(.*?)<\/anno/){ 2067 $anno=$1; 2068 #warn "anno: $1\n"; 2069 } 2070 elsif ($l=~/paese>(.*?)<\/paese/){ 2071 $nazione=$1; 2072 #warn "nazione: $1\n"; 2073 } 2074 elsif ($l=~/tipologia>(.*?)<\/tipologia/){ 2075 $category=$1; 2076 #warn "categoria: $1\n"; 2077 } 2078 elsif ($l=~/parentalRating>(.*?)<\/parentalRating/){ 2079 $rating=$1; 2080 #warn "categoria: $1\n"; 2081 } 2082 elsif ($l=~/trafficLight>(.*?)<\/trafficLight/){ 2083 $rating=$1; 2084 #warn "categoria: $1\n"; 2085 } 2086 elsif ($l=~/parentalRating\/.*/){ 2087 #$rating=$1; 2088 #warn "categoria: $1\n"; 2089 } 2090 elsif ($l=~/audio audioType=\"(.*?)\" doppioAudio=\"(.*?)\" sottotitoli=\"(.*?)\">Not used<\/audio/ or 2091 $l=~/audio audioType=\"(.*?)\" doppioAudio=\"(.*?)\" sottotitoli=\"(.*?)\">Not live<\/audio/){ 2092 ($audio, $doppioaudio, $sottotitoli) = ($1, $2, $3); 2093 #warn "audio: $audio, $doppioaudio, $sottotitoli\n"; 2094 } 2095 2096 else { 2097 warn "linea sconosciuta !!$l!! \n"; 2098 } 2099 $lastline = $l; 2100 } 2101 next unless $title; 2102 2103 $time_end=~s/:/\./; $time_start=~s/:/\./; 2104 my $next_day_end = 0; my $next_day_start = 0; 2105 if ($time_end < 6) { 2106 $next_day_end = 1; 2107 } 2108 if ($time_start < 6) { 2109 $next_day_start = 1; 2110 $next_day_end = 1; 2111 } 2112 $time_end=~s/\./:/; $time_start=~s/\./:/; 2113 $programme{stop}=xmltv_date($time_end, $offset + $next_day_end); 2114 2115 2116 # Three mandatory fields: title, start, channel. 2117 if (not defined $title) { 2118 warn ($DEF_LANG eq 'eng' ? 'no title found, skipping programme' : 'titolo non trovato, salto'); 2119 next; 2120 } 2121 $programme{title}=[[tidy($title), $LANG] ]; 2122 if (not defined $time_start) { 2123 warn ($DEF_LANG eq 'eng' ? "no start time for title $title, skipping programme" : "nessun orario di inizio per $title, salto"); 2124 next; 2125 } 2126 #$time_start=~s/://; 2127 $programme{start}=xmltv_date($time_start, $offset + $next_day_start); 2128 if (not defined $xmltv_id) { 2129 warn ($DEF_LANG eq 'eng' ? "no channel for programme $title at $time_start, skipping programme" : "canale non trovato per $title alle $time_start, salto"); 2130 next; 2131 } 2132 2133 $programme{channel}=$xmltv_id; 2134 $programme{title} = [[tidy($title), $LANG] ] if (defined $title); 2135 $programme{date}= $anno if (defined $anno); 2136 $programme{premiere} = ['prima TV', $LANG ] if(defined $first and $first ne 'No'); 2137 #non mi e' chiaro come funziona 2138 #$programme{audio}= ['stereo', tidy($audio)] if (defined $audio); 2139 $programme{rating}=[[tidy($rating), $LANG ]] if (defined $rating); 2140 $programme{category}=[[tidy($category), 'mediaset' ]] if defined $category; 2141 2142 #$programme{'sub-title'}=[[$subtitle, $LANG] ] if (defined $subtitle and $subtitle ne ''); 2143 #push@{$programme->{'episode-num'}}, [$strseason, 'onscreen'] if ($strseason); 2144 #push@{$programme->{'episode-num'}}, [(defined $season ? ($season-1) : '').".".(defined $episode ? ($episode-1) : '').".0/1", 'xmltv_ns'] if ($strseason); 2145 2146 push (@{$programme{country}}, [$nazione, $LANG]) if ($nazione and $nazione ne '--'); 2147 $programme{desc}=[[tidy($desc), $LANG ]] if (defined $desc and $desc ne ''); 2148 2149 #put info in array 2150 push @programmes, {%programme}; 2151 } 2152 2153 if (scalar @programmes) { 2154 return (0, @programmes); 2155 } 2156 else { 2157 # there is a number of reasons why we could get an empty array. 2158 # so we return an error 2159 return (1, @programmes); 2160 } 2161} 2162 2163######################## 2164# iris functions # 2165######################## 2166 2167######################################################### 2168# iris_get_channels_list 2169# since this site only has one channel this is a fake sub 2170sub iris_get_channels_list { 2171 my %chan_hash = ( 'iris' ,'iris.mediaset.it'); 2172 2173 return %chan_hash; 2174} 2175 2176#################################################### 2177# iris_fetch_data 2178# 2 parameters: xmltv_id of channel 2179# day offset 2180# returns an error or an array of data 2181sub iris_fetch_data { 2182 my ($xmltv_id, $offset) = @_; 2183 my $content; 2184 2185 my %mesi=('01', 'GENNAIO', '02', 'FEBBRAIO', '03', 'MARZO', 2186 '04', 'APRILE', '05', 'MAGGIO', '06', 'GIUGNO', 2187 '07', 'LUGLIO', '08', 'AGOSTO', '09', 'SETTEMBRE', 2188 '10', 'OTTOBRE', '11', 'NOVEMBRE', '12', 'DICEMBRE'); 2189 2190 # date to grab 2191 my $grabdate = UnixDate(&DateCalc("today","+ ".$offset." days"), '%Y:%m:%d'); 2192 my ($anno, $mese, $giorno) = split /:/, $grabdate; 2193 my $dateok = "$giorno ".$mesi{$mese}; 2194 2195 # build urls to grab 2196 my @urls; 2197 2198 #per fare prima prendo tutti e 4 i file e butto quello che non serve 2199 #metto una stringa dopo l'url per evitare che i file vadano nella cache del giorno dopo 2200 2201 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d"); 2202 2203 push @urls, $backend_info{iris}{base_data}."palinsesto1.shtml".$cachestring; 2204 push @urls, $backend_info{iris}{base_data}."palinsesto2.shtml".$cachestring; 2205 push @urls, $backend_info{iris}{base_data}."palinsesto3.shtml".$cachestring; 2206 push @urls, $backend_info{iris}{base_data}."palinsesto4.shtml".$cachestring; 2207 2208 my $site_id = $backend_info{iris}{site_ids}{$xmltv_id}{site_id}; 2209 2210 if (not defined $site_id) { 2211 warn ($DEF_LANG eq 'eng' ? 2212 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 2213 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 2214 return (1, ()); 2215 } 2216 2217 my @programmes = (); 2218 foreach my $url (@urls) { 2219 2220 warn ($DEF_LANG eq 'eng' ? 2221 "VERBOSE: fetching $url\n" : 2222 "VERBOSE: scarico $url\n") if ($opt_verbose); 2223 2224 eval { $content=get_nice($url) }; 2225 if ($@) { #get_nice has died 2226 warn ($DEF_LANG eq 'eng' ? 2227 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend iris\n" : 2228 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte iris\n") if ($opt_verbose); 2229 2230 # Indicate to the caller that we had problems 2231 return (1, ()); 2232 } 2233 2234 warn "VERBOSE: parsing...\n" if ($opt_verbose); 2235 2236 my @days = split /class\=\"day/, $content; 2237 2238 my $grabok = 0; 2239 my ($title, $time_start); 2240 foreach my $day (@days){ 2241 my @ore = split /<div class\=\"ora\"\>/, $day; 2242 foreach my $ora (@ore) { 2243 if ($ora=~/<p>(.*?)<\/p><\/div><div class=\"info\"><h4>(.*?)<\/h4>(.*)/){ 2244 if ($grabok) { 2245 $title = $2; 2246 $time_start = $1; 2247 2248 my %programme = (); 2249 my $ex = $3; 2250 if ($ex=~/^<p>(.*?)<\/p>/) {$ex = $1;} else {$ex = "";} 2251 if ($ex=~/(.*)REGIA DI\: (.*)/){ 2252 $ex = $1; 2253 push @{$programme{credits}{director}}, $2; 2254 } 2255 if ($ex=~/CON\: (.*)/){ 2256 my $c = $1; 2257 my @cast; 2258 if ($ex=~/ - /){@cast = split / - /, $c;} 2259 else {@cast = split / /, $c;} 2260 2261 foreach my $act (@cast){ 2262 next if (tidy($act) eq ''); 2263 push @{$programme{credits}{actor}}, tidy($act); 2264 } 2265 } 2266 if ($ex=~/A CURA DI (.*)/){ 2267 push @{$programme{credits}{presenter}}, tidy($1); 2268 } 2269 2270 my $time_start2 = $time_start; 2271 $time_start2=~s/:/\./; 2272 my $next_day_start = 0; 2273 if ($time_start2 < 7) { 2274 $next_day_start = 1; 2275 } 2276 2277 2278 $programme{title}=[[tidy($title), $LANG] ]; 2279 $programme{start}=xmltv_date($time_start, $offset + $next_day_start); 2280 $programme{channel}=$xmltv_id; 2281 #put info in array 2282 push @programmes, {%programme}; 2283 2284 } 2285 2286 } 2287 elsif ($ora=~/<h3>.*? (.*?)<\/h3/) { 2288 if ($dateok eq $1) { 2289 $grabok = 1; 2290 } 2291 else { 2292 $grabok = 0; 2293 } 2294 } 2295 } 2296 } 2297 #} 2298 } 2299 2300 if (scalar @programmes) { 2301 return (0, @programmes); 2302 } 2303 else { 2304 # there is a number of reasons why we could get an empty array. 2305 # so we return an error 2306 return (1, @programmes); 2307 } 2308} 2309 2310 2311 2312######################## 2313# rai.it functions # 2314######################## 2315 2316#################################################### 2317# raiit_get_channels_list 2318sub raiit_get_channels_list { 2319 my %chan_hash; 2320 2321 2322 my $url = $backend_info{raiit}{base_chan}; 2323 warn ($DEF_LANG eq 'eng' ? 2324 "VERBOSE: Getting channel list from $url\n" : 2325 "VERBOSE: Scarico la lista dei canali da $url\n") if ($opt_verbose); 2326 2327 my $content; 2328 eval { $content = get_nice($url); }; 2329 if ($@) { #get_nice has died 2330 warn ($DEF_LANG eq 'eng' ? 2331 "VERBOSE: Cannot get raiit's channel list ($url). Site \\n" : 2332 "VERBOSE: Non sono riuscito a prendere la lista dei canali di raiit($url). Il sito non funziona?\n") unless ($opt_quiet); 2333 return (); 2334 } 2335 2336 $content=~s/\n|\r//igm; 2337 $content=~/<a name=\"NP_ContentsMenu\">.*?<\/style>(.*?)<a name=\"GP_MenuAccessibile/; 2338 $content=$1; 2339 2340 my @canali = split /<a style=\"\"/, $content; 2341 foreach my $canale (@canali) { 2342 next unless ($canale=~/ class=/); 2343 $canale=~/generico \" title=\"(.*?)\" id=\"(.*?)\"/; 2344 my $name = tidy($1); 2345 my $id =$2; $id=~/^btn_(.*)/; $id=$1; $id=~s/_/-/igm; 2346 $chan_hash{$name} = "$id"; 2347 2348 #warn "canale name $name id $id\n"; 2349 2350 #update backend info, in case this is a new channel not in channel_ids 2351 my $xmltv_id = xmltv_chanid('raiit', $name); 2352 $backend_info{raiit}{site_ids}{$xmltv_id}{site_id} = $name; 2353 $backend_info{raiit}{site_ids}{$xmltv_id}{chanid} = $chan_hash{$name}; 2354 } 2355 return %chan_hash; 2356} 2357 2358#################################################### 2359# raiit_fetch_data 2360# 2 parameters: xmltv_id of channel 2361# day offset 2362# returns an error or an array of data 2363 2364sub raiit_fetch_data { 2365 my ($xmltv_id, $offset) = @_; 2366 my $content; 2367 2368 my %romans =( 2369 'I' => 1, 2370 'II' => 2, 2371 'III' => 3, 2372 'IV' => 4, 2373 'V' => 5, 2374 'VI' => 6, 2375 'VII' => 7, 2376 'VIII' => 8, 2377 'IX' => 9, 2378 'X' => 10, 2379 'XI' => 11, 2380 'XII' => 12, 2381 'XIII' => 13, 2382 'XIV' => 14, 2383 'XV' => 15, 2384 'XVI' => 16, 2385 'XVII' => 17, 2386 'XVIII' => 18, 2387 'XIX' => 19, 2388 'XX' => 20, 2389 ); 2390 2391 my $site_id = $backend_info{raiit}{site_ids}{$xmltv_id}{site_id}; 2392 if (not defined $site_id) { 2393 warn ($DEF_LANG eq 'eng' ? 2394 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 2395 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 2396 return (1, ()); 2397 } 2398 2399 # Indicate to the caller if we have problems (channels disappearing from the site maybe) 2400 return (1, ()) if (not defined ($backend_info{raiit}{channels}{$site_id})); 2401 2402 # build url to grab 2403 my $url = $backend_info{raiit}{base_data}.$backend_info{raiit}{site_ids}{$xmltv_id}{chanid}.'_'.UnixDate(&DateCalc("today","+ ".$offset." days"), '%Y_%m_%d').'.html'; 2404 2405 2406 #as with other grabber we trick memoize into not caching data 2407 #however, we do this only for the first day, other days use cache 2408 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H") if ($offset == 0); 2409 2410 my @prog_to_check = (); 2411 2412 my $grabdate = UnixDate(&DateCalc("today","+ ".($offset)." days"), '%Y%m%d'); 2413 my $tomorrowdate = UnixDate(&DateCalc("today","+ ".($offset + 1)." days"), '%Y%m%d'); 2414 $url.=$cachestring if (($offset) == 0); 2415 2416 warn "VERBOSE: fetching $url\n" if ($opt_verbose); 2417 2418 eval { $content=get_nice($url) }; 2419 if ($@) { #get_nice has died 2420 warn ($DEF_LANG eq 'eng' ? 2421 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend raiit\n" : 2422 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte raiit\n") if ($opt_verbose); 2423 2424 # Indicate to the caller that we had problems 2425 return (1, ()); 2426 } 2427 2428 warn "VERBOSE: parsing...\n" if ($opt_verbose); 2429 2430 #split and parse the lines 2431 my @lines = split /\n/, $content; 2432 foreach my $line (@lines) { 2433 next unless ($line=~/^<span class/); 2434 2435 #<span class="ora">10:45</span><span class="info">Winx Club - Ep. 9</span> at ./tv_grab_it line 2275 2436# $line=~/span class=\"ora\">(.*?)<\/span><span class=\"info\">(.*?)<\/span/; 2437 #<span class="ora">21:10</span> <span class="info"><a ID_EVENTO="61995269" LAVORABILE="1" rel="Rai4">Fuoco assassino</a></span> at ./tv_grab_it line 2301. 2438 $line=~/span class=\"ora\">(.*?)<\/span> <span class=\"info\"><a.*?>(.*?)<\/a><\/span/; 2439 my ($start, $title) = ($1, $2); 2440 next unless $start; 2441 next unless $title; 2442 my $start2=$start; $start2=~s/:/\./; 2443 if ($start2 <6 and $start2>=0) { #they can work as decimals, 0.32 < 23.44 2444 push @prog_to_check, [$title, $tomorrowdate." ".$start]; 2445 } 2446 else { 2447 push @prog_to_check, [tidy($title), $grabdate." ".$start]; 2448 } 2449 } 2450 2451 2452 2453 my @programmes = (); 2454 foreach (@prog_to_check) { 2455 my %programme = (); 2456 my ($title, $start) = @{$_}; 2457 2458 my $temptitle = $title; 2459 2460 my ($subtitle, $category, $season, $episode, $desc); 2461 if ($title=~/title=\"(.*?)\"/){ 2462 $title = $1; 2463 } 2464 if ($title=~/<a>(.*?)<\/a>/){ 2465 2466 $title = $1; 2467 } 2468 if ($title=~/<a href.*?>(.*?)<\/a>/){ 2469 2470 $title = $1; 2471 } 2472 2473 if ($title=~/FILM TV (.*)/){ 2474 $title = $1; 2475 $category='Film'; 2476 } 2477 elsif ($title=~/FILM (.*)/){ 2478 $title = $1; 2479 $category='Film'; 2480 } 2481 elsif ($title=~/TELEFILM (.*)/){ 2482 $title = $1; 2483 $category='Telefilm'; 2484 } 2485 2486 if ($title=~/(.*) - Ep.(\d+)/i){ 2487 ($title, $episode) = ($1, $2); 2488 } 2489 elsif ($title=~/(.*) - Ep. (\d+)/i){ 2490 ($title, $episode) = ($1, $2); 2491 } 2492 elsif ($title=~/(.*) Ep.(\d+)/i){ 2493 ($title, $episode) = ($1, $2); 2494 } 2495 elsif ($title=~/(.*) Ep. (\d+)/i){ 2496 ($title, $episode) = ($1, $2); 2497 } 2498 2499 2500 2501 if ($title=~/(.*) - (\d+).\ serie\s+Ep\.(\d+)/){ 2502 ($title, $season, $episode) = ($1, $2, $3); 2503 } 2504 elsif ($title=~/(.*) - (\d+).\ serie\s+Ep. (\d+)/){ 2505 ($title, $season, $episode) = ($1, $2, $3); 2506 } 2507 elsif ($title=~/(.*) (\d+).\ serie\s+Ep. (\d+)/){ 2508 ($title, $season, $episode) = ($1, $2, $3); 2509 } 2510 elsif ($title=~/(.*) (\d+).\ serie\s+Ep.(\d+)/){ 2511 ($title, $season, $episode) = ($1, $2, $3); 2512 } 2513 elsif ($title=~/(.*) (\d+).\ serie - Ep.(\d+)/){ 2514 ($title, $season, $episode) = ($1, $2, $3); 2515 } 2516 2517 2518 2519 if ($title=~/(.*) - (.*)/){ 2520 ($title, $subtitle) = ($1, $2); 2521 } 2522 elsif ($title=~/(.*): \"(.*?)\"/){ 2523 ($title, $subtitle) = ($1, $2); 2524 } 2525 elsif ($title=~/(.*) \"(.*?)\"$/){ 2526 ($title, $subtitle) = ($1, $2); 2527 } 2528 if ($title=~/(.*) [Ss]erie$/){ 2529 $title = $1; 2530 } 2531 2532 #prove per numerali romani 2533 my $roman; 2534 if ($title=~/(.*?) [sS]tagione ([IVX]+)/){ 2535 ($title, $roman) = ($1, $2); 2536 } 2537 elsif ($title=~/(.*?) ([IVX]+) [sS]tagione/){ 2538 ($title, $roman) = ($1, $2); 2539 } 2540 elsif (defined $episode and not defined $season and $title=~/(.*?) ([IVX]+)$/){ 2541 ($title, $roman) = ($1, $2); 2542 } 2543 2544 if ($title=~/(.*) [Ss]erie$/){ 2545 $title = $1; 2546 } 2547 2548 2549 $season = $romans{$roman} if (defined $roman); 2550 2551 if ($temptitle=~/.*Ep\..*? - (.*)/){ 2552 $subtitle = $1; 2553 } 2554 if ($title=~/(.*) tx (.*)/){ 2555 ($title, $subtitle) = ($1, $2); 2556 } 2557 2558 #if ( 2559 2560 my $strseason = ''; 2561 $strseason.= 'Stagione '.$season if ($season); 2562 if ($episode and $season){ 2563 $strseason.= ' Episodio '.$episode ; 2564 } 2565 elsif ($episode) { 2566 $strseason.= 'Episodio '.$episode ; 2567 } 2568 2569 if ($strseason ne '' and $subtitle){ 2570 $subtitle="$strseason - ".$subtitle ; 2571 } 2572 elsif ($strseason ne '') { 2573 $subtitle=$strseason; 2574 }; 2575 2576 $programme{title} = [[tidy($title), $LANG] ]; 2577 $programme{start} = utc_offset(UnixDate($start, '%Y%m%d%H%M').'00', '+0100'); 2578 $programme{channel} = $xmltv_id; 2579 $programme{desc}=[[tidy($desc), $LANG ]] if (defined $desc and $desc ne ''); 2580 push@{$programme{'episode-num'}}, [$strseason, 'onscreen'] if ($strseason); 2581 push@{$programme{'episode-num'}}, [(defined $season ? ($season-1) : '').".".(defined $episode ? ($episode-1) : '').".0/1", 'xmltv_ns'] if ($strseason); 2582 $programme{'sub-title'}=[[$subtitle, $LANG] ] if (defined $subtitle and $subtitle ne ''); 2583 push (@{$programme{category}}, [tidy($category), $LANG ]) if (defined $category and tidy($category) ne ''); 2584 #$programme{desc}=[[tidy($desc), $LANG ]] if ($desc ne ''); 2585 push @programmes, {%programme} if ($start and $title); 2586 #warn "titolo $title\nsott $subtitle\nepisode $episode\nstagione $season\nseason $strseason\n------\n"; 2587 } 2588 2589 if (scalar @programmes) { 2590 return (0, @programmes); 2591 } 2592 else { 2593 # there is a number of reasons why we could get an empty array. 2594 # so we return an error 2595 return (1, @programmes); 2596 } 2597} 2598 2599######################### 2600# dahliatv.it functions # 2601######################### 2602 2603#################################################### 2604# dahlia_get_channels_list 2605sub dahlia_get_channels_list { 2606 my %chan_hash; 2607 2608 my $url = $backend_info{dahlia}{base_chan}; 2609 warn ($DEF_LANG eq 'eng' ? 2610 "VERBOSE: Getting channel list from $url\n" : 2611 "VERBOSE: Scarico la lista dei canali da $url\n") if ($opt_verbose); 2612 2613 my $content; 2614 eval { $content = get_nice($url); }; 2615 $content=~/(.*?)box_canali_body/igm; 2616 $content=$1; 2617 2618 if ($@) { #get_nice has died 2619 warn ($DEF_LANG eq 'eng' ? 2620 "VERBOSE: Cannot get dahliatv.it's channel list ($url). Site down?\n" : 2621 "VERBOSE: Non sono riuscito a prendere la lista dei canali di dahliatv.it ($url). Il sito non funziona?\n" 2622 ) unless ($opt_quiet); 2623 return (); 2624 } 2625 2626 2627 my @lines = split /<div class=\"/, $content; 2628 foreach my $line (@lines) { 2629 if ($line=~/(.*?)\">/){ 2630#<div id="lun" class="panel"> 2631#div class="sport"></div><div class="sport2"></div><div class="extra"></div><div class="xtreme"></div><div class="eros"></div><div class="explorer"></div><div class="palermo"></div><div class="calcio1"></div><div class="calcio2"></div><div class="calcio3"></div><div class="calcio4"></div><div class="calcio5"></div><div class="adult1"></div><div class="adult2"></div><div class="adult3"></div></div><div id="mar" class="panel"> 2632 2633 2634 2635 my ($channame)=(tidy($1)); 2636 next if (not defined $channame); 2637 2638 $chan_hash{$channame} = $backend_info{dahlia}{base_data}; 2639 2640 2641 my $xmltv_id = xmltv_chanid('dahlia', $channame); 2642 $backend_info{dahlia}{site_ids}{$xmltv_id}{site_id} = $channame; 2643 } 2644 } 2645 2646 return %chan_hash; 2647} 2648 2649#################################################### 2650# dahlia_fetch_data 2651# 2 parameters: xmltv_id of channel 2652# day offset 2653# returns an error or an array of data 2654 2655sub dahlia_fetch_data { 2656 my ($xmltv_id, $offset) = @_; 2657 my $content; 2658 2659 my $site_id = $backend_info{dahlia}{site_ids}{$xmltv_id}{site_id}; 2660 2661 if (not defined $site_id) { 2662 warn ($DEF_LANG eq 'eng' ? 2663 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 2664 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 2665 return (1, ()); 2666 } 2667 2668 my %chan = %{$backend_info{dahlia}{channels}}; 2669 my $channel_name = $backend_info{dahlia}{site_ids}{$xmltv_id}{site_id}; 2670 2671 my $date_grab = &DateCalc("today","+ ".$offset." days"); 2672 2673 2674 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo di data') if not defined $date_grab; 2675 $date_grab = UnixDate($date_grab, '%Y%m%d'); 2676 2677 my $url = $backend_info{dahlia}{base_data}; 2678 2679 #to trick memoize into not caching data with add a string to the url, based on time, with hourly resolution 2680 #so if we redownload data within 5 minutes (we are within the same run) it comes from the cache 2681 #but if we download it tomorrow it doesn't. 2682 #this makes sense if you use the --cache option and you want to cache only the --slow data, to speed up things 2683 #when you grab data every two-three days, but you don't want to miss schedule changes 2684 #this string is ignored by the server 2685 if ($opt_cache_slow) { 2686 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H"); 2687 $url.=$cachestring; 2688 } 2689 warn ($DEF_LANG eq 'eng' ? 2690 "VERBOSE: fetching $url\n" : 2691 "VERBOSE: scarico $url\n") if ($opt_verbose); 2692 2693 eval { $content=get_nice($url) }; 2694 if ($@) { #get_nice has died 2695 warn ($DEF_LANG eq 'eng' ? 2696 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend dahlia\n" : 2697 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte dahlia\n") if ($opt_verbose); 2698 2699 # Indicate to the caller that we had problems 2700 return (1, ()); 2701 } 2702 2703 my @programmes = (); 2704 warn "VERBOSE: parsing...\n" if ($opt_verbose); 2705 2706 my $grabdate = UnixDate(&DateCalc("today","+ ".($offset)." days"), '%Y/%m/%d'); 2707 #split and parse 2708 my @p = split /<Event/, $content; 2709 foreach my $pr (@p) { 2710 $pr=~/NAME=\"(.*?)\" SERVICENAME=\"(.*?)\" SYNOPSIS=\"(.*?)\" TIMEOFFSET=\"(.*?)\" date=\"(.*?)\" dayofweek=\"(.*?)\" theme=\"(.*?)\" time=\"(.*?)\" visualduration=\"(.*?)\" visualtime=\"(.*)\"/; 2711 2712# <Event ALFAORDER="A" DURATION="00:55:00" EVENT_TYPE="FTA" LOG_LINE="" MINIMALAGE="0" NAME="CESENA VS MILAN" SERVICENAME="calcio2" 2713# SYNOPSIS="CAMPIONATO SERIE A TIM - CESENA VS MILAN - ANTICIPO 2A GIORNATA ANDATA" 2714# TIMEOFFSET="+2" date="2010/09/11" dayofweek="Sabato" theme="Calcio" time="20:40:00" visualduration="00:55:00" visualtime="20:40"/> 2715 2716 my ($title, $channel, $desc, $date, $category, $time_start, $duration, $visualtime) = ($1, $2, $3, $5, $7, $8, $9, $10); 2717 next unless $title; 2718 next if $xmltv_id ne xmltv_chanid('dahlia', $channel); 2719 next if $date ne $grabdate; 2720 next if $visualtime=~/ del /; 2721 $date=~/(....).(..).(..)/; $date="$1$2$3"; 2722 $time_start=~/(..).(..).(..)/; $time_start=$date."$1$2$3"; 2723 2724 my %programme = (); 2725 2726# $time_end=~s/:/\./; $time_start=~s/:/\./; 2727# my $next_day_end = 0; my $next_day_start = 0; 2728# if ($time_end < 6) { 2729# $next_day_end = 1; 2730#à } 2731# if ($time_start < 6) { 2732# $next_day_start = 1; 2733# $next_day_end = 1; 2734# } 2735# $time_end=~s/\./:/; $time_start=~s/\./:/; 2736# $programme{stop}=xmltv_date($time_end, $offset + $next_day_end); 2737 2738 2739 # Three mandatory fields: title, start, channel. 2740 if (not defined $title) { 2741 warn ($DEF_LANG eq 'eng' ? 'no title found, skipping programme' : 'titolo non trovato, salto'); 2742 next; 2743 } 2744 $programme{title}=[[tidy($title), $LANG] ]; 2745 if (not defined $time_start) { 2746 warn ($DEF_LANG eq 'eng' ? "no start time for title $title, skipping programme" : "nessun orario di inizio per $title, salto"); 2747 next; 2748 } 2749 #$time_start=~s/://; 2750 $programme{start}=utc_offset($time_start, '+0100'); 2751 if (not defined $xmltv_id) { 2752 warn ($DEF_LANG eq 'eng' ? "no channel for programme $title at $time_start, skipping programme" : "canale non trovato per $title alle $time_start, salto"); 2753 next; 2754 } 2755 $duration=~/(..).(..).(..)/; 2756 my $time_stop = &DateCalc("$time_start","+ $1 hours $2 minutes $3 seconds"); 2757 $time_stop=~s/\://g; 2758 $programme{stop}=utc_offset($time_stop, '+0100'); 2759 $programme{channel}=$xmltv_id; 2760 $programme{title} = [[tidy($title), $LANG] ] if (defined $title); 2761 $programme{category}=[[tidy($category), $LANG ]] if defined $category; 2762 $programme{desc}=[[tidy($desc), $LANG ]] if (defined $desc and $desc ne ''); 2763 2764 #put info in array 2765 push @programmes, {%programme}; 2766 } 2767 2768 if (scalar @programmes) { 2769 return (0, @programmes); 2770 } 2771 else { 2772 # there is a number of reasons why we could get an empty array. 2773 # so we return an error 2774 return (1, @programmes); 2775 } 2776} 2777 2778######################## 2779# la7.it functions # 2780######################## 2781 2782######################################################### 2783# _get_channels_list 2784# since this site only has two channels this is a fake sub 2785sub la7_get_channels_list { 2786 my %chan_hash = ( 'La7' ,'www.la7.it', 'La7D', 'www.la7d.it'); 2787 2788 return %chan_hash; 2789} 2790#################################################### 2791# la7_fetch_data 2792# 2 parameters: xmltv_id of channel 2793# day offset 2794# returns an error or an array of data 2795sub la7_fetch_data { 2796 my ($xmltv_id, $offset) = @_; 2797 my $content; 2798 2799 my $site_id = $backend_info{la7}{site_ids}{$xmltv_id}{site_id}; 2800 if (not defined $site_id) { 2801 warn ($DEF_LANG eq 'eng' ? 2802 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 2803 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 2804 return (1, ()); 2805 } 2806 2807 # adds today date after url to avoid using today's data for tomorrow 2808 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d"); 2809 2810 my $url = $backend_info{la7}{base_data}.$offset.'.html'.$cachestring; 2811 $url=~ s/index_0/index/; 2812 2813 2814 warn ($DEF_LANG eq 'eng' ? 2815 "VERBOSE: fetching $url\n" : 2816 "VERBOSE: scarico $url\n") if ($opt_verbose); 2817 2818 eval { $content=get_nice($url) }; 2819 if ($@) { #get_nice has died 2820 warn "errore get_nice"; 2821 return (1, ()); 2822 } 2823 2824 my @programmes = (); 2825 2826 my @lines = split /<li>\n<div class=\"sx\">\n/, $content; 2827 2828 my $got_la7d = 0; 2829 2830 foreach my $line (@lines) { 2831 2832 # Reach La7D 2833 if( !$got_la7d && $xmltv_id eq 'www.la7d.it' ) { 2834 next unless ($line =~ /<div id=\"palinsesto_la7d\"/); 2835 $got_la7d = 1; 2836 next; 2837 } 2838 2839 # Reach begin of programs 2840 next unless $line=~/<p class=\"ora/; 2841 2842 my %programme = (); 2843 2844 $line=~/<p class=\"ora\">(.*?)<\/p>\n<h5>\n(.*?)\n<\/h5>.*?\n.*?\n.*?\n.*?\n(.*?)\n/m; 2845 $line=~/<p class=\"ora\">(.*?)<\/p>\n<h5>\n<a.*\">\n(.*?)\n<\/a>.*?\n.*?\n.*?\n.*?\n.*?\n(.*?)\n/m; 2846 my ($time_start, $title, $desc) = ($1,$2, $3); 2847 2848 # Three mandatory fields: title, start, channel. 2849 if (not defined $title) { 2850 warn 'no title found, skipping programme'; 2851 next; 2852 } 2853 $programme{title}=[[tidy($title), $LANG] ]; 2854 if (not defined $time_start) { 2855 warn "no start time for title $title, skipping programme"; 2856 next; 2857 } 2858 2859 my $time_start2 = $time_start; 2860 $time_start2=~s/://; 2861 2862 if ($time_start2 <600 and $time_start2>=0) { 2863 $time_start = xmltv_date($time_start, $offset + 1); 2864 } 2865 else { 2866 $time_start = xmltv_date($time_start, $offset ); 2867 } 2868 2869 $programme{start}=$time_start;#xmltv_date($time_start, $offset + $past_midnight); 2870 $programme{channel}=$xmltv_id; 2871 $programme{desc}=[[tidy($desc), $LANG ]] if (defined $desc and $desc ne ''); 2872 2873 #put info in array 2874 push @programmes, {%programme}; 2875 2876 # This check must be at the end of the cycle, 2877 # otherwise we loose the last program of every day 2878 if( $xmltv_id eq 'www.la7.it' ) { 2879 # Stop at the end of La7 2880 last if ($line =~ /<div id=\"palinsesto_la7d\"/); 2881 } 2882 2883 } 2884 2885 if (scalar @programmes) { 2886 return (0, @programmes); 2887 } 2888 else { 2889 # there is a number of reasons why we could get an empty array. 2890 # so we return an error 2891 return (1, @programmes); 2892 } 2893} 2894 2895 2896######################### 2897# mediaset.it functions # 2898######################### 2899 2900#################################################### 2901# mediaset_get_channels_list 2902sub mediaset_get_channels_list { 2903 my %chan_hash; 2904 2905 my $url = $backend_info{mediaset}{base_chan}; 2906 warn ($DEF_LANG eq 'eng' ? 2907 "VERBOSE: Getting channel list from $url\n" : 2908 "VERBOSE: Scarico la lista dei canali da $url\n") if ($opt_verbose); 2909 2910 my $content; 2911 eval { $content = get_nice($url); }; 2912 if ($@) { #get_nice has died 2913 warn ($DEF_LANG eq 'eng' ? 2914 "VERBOSE: Cannot get mediaset's channel list ($url). Site \\n" : 2915 "VERBOSE: Non sono riuscito a prendere la lista dei canali di mediaset ($url). Il sito non funziona?\n") unless ($opt_quiet); 2916 return (); 2917 } 2918 2919 my @canali = split /\n/, $content; 2920 2921 foreach my $canale (@canali) { 2922 next unless $canale=~/rete nome=\"(.*?)\" id=\"(.*?)\"/; 2923 next if (not defined $1); 2924 my $name = tidy($1); 2925 next if defined $chan_hash{$name}; 2926 $chan_hash{$name} = "$2"; 2927 2928 #update backend info, in case this is a new channel not in channel_ids 2929 my $xmltv_id = xmltv_chanid('mediaset', $name); 2930 $backend_info{mediaset}{site_ids}{$xmltv_id}{site_id} = $name; 2931 $backend_info{mediaset}{site_ids}{$xmltv_id}{chanid} = $chan_hash{$name}; 2932 } 2933 2934 return %chan_hash; 2935} 2936 2937#################################################### 2938# mediaset_fetch_data 2939# 2 parameters: xmltv_id of channel 2940# day offset 2941# returns an error or an array of data 2942 2943sub mediaset_fetch_data { 2944 my ($xmltv_id, $offset) = @_; 2945 my $content; 2946 2947 my $site_id = $backend_info{mediaset}{site_ids}{$xmltv_id}{chanid}; 2948 2949 if (not defined $site_id) { 2950 warn ($DEF_LANG eq 'eng' ? 2951 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 2952 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 2953 return (1, ()); 2954 } 2955 2956 my %chan = %{$backend_info{mediaset}{channels}}; 2957 2958 my $date_grab = &DateCalc("today","+ ".$offset." days"); 2959 2960 2961 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo di data') if not defined $date_grab; 2962 $date_grab = UnixDate($date_grab, '%Y%m%d'); 2963 2964 my $url = $backend_info{mediaset}{base_data}; 2965 2966 #to trick memoize into not caching data with add a string to the url, based on time, with hourly resolution 2967 #so if we redownload data within 5 minutes (we are within the same run) it comes from the cache 2968 #but if we download it tomorrow it doesn't. 2969 #this makes sense if you use the --cache option and you want to cache only the --slow data, to speed up things 2970 #when you grab data every two-three days, but you don't want to miss schedule changes 2971 #this string is ignored by the server 2972 if ($opt_cache_slow) { 2973 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H"); 2974 $url.=$cachestring; 2975 } 2976 warn ($DEF_LANG eq 'eng' ? 2977 "VERBOSE: fetching $url\n" : 2978 "VERBOSE: scarico $url\n") if ($opt_verbose); 2979 2980 eval { $content=get_nice($url) }; 2981 if ($@) { #get_nice has died 2982 warn ($DEF_LANG eq 'eng' ? 2983 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend mediasetpremium\n" : 2984 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte mediasetpremium\n") if ($opt_verbose); 2985 2986 # Indicate to the caller that we had problems 2987 return (1, ()); 2988 } 2989 2990 my @programmes = (); 2991 warn "VERBOSE: parsing...\n" if ($opt_verbose); 2992 2993 #split and parse 2994 my @p = split /\n/, $content; 2995 foreach my $pr (@p) { 2996 next unless $pr=~/$site_id/; 2997 next unless $pr=~/$date_grab/; 2998 next unless $pr=~/programma idref=\"(.*?)\" timestamp=\"(.*?)\" criptato=\"(.*?)\" titolo=\"(.*?)\" ora=\"(.*?)\"/; 2999# warn $pr."-----------------\n"; 3000# warn "|$1|$2|$3|$4|$5|\n"; 3001 my %programme = (); 3002 my ($id, $time_start, $criptato, $title) = ($1, $5, $3, $4); 3003 my $subtitle; 3004 3005 next unless $title; 3006 3007# $time_end=~s/:/\./; $time_start=~s/:/\./; 3008# my $next_day_end = 0; my $next_day_start = 0; 3009# if ($time_end < 6) { 3010# $next_day_end = 1; 3011# } 3012# if ($time_start < 6) { 3013# $next_day_start = 1; 3014# $next_day_end = 1; 3015# } 3016# $time_end=~s/\./:/; $time_start=~s/\./:/; 3017# $programme{stop}=xmltv_date($time_end, $offset + $next_day_end); 3018 3019 3020 # Three mandatory fields: title, start, channel. 3021 if (not defined $title) { 3022 warn ($DEF_LANG eq 'eng' ? 'no title found, skipping programme' : 'titolo non trovato, salto'); 3023 next; 3024 } 3025 $programme{title}=[[tidy($title), $LANG] ]; 3026 if (not defined $time_start) { 3027 warn ($DEF_LANG eq 'eng' ? "no start time for title $title, skipping programme" : "nessun orario di inizio per $title, salto"); 3028 next; 3029 } 3030 #$time_start=~s/://; 3031 $programme{start}=xmltv_date($time_start, $offset); 3032 if (not defined $xmltv_id) { 3033 warn ($DEF_LANG eq 'eng' ? "no channel for programme $title at $time_start, skipping programme" : "canale non trovato per $title alle $time_start, salto"); 3034 next; 3035 } 3036 3037 $programme{channel}=$xmltv_id; 3038 if ($title =~ m/--/) { 3039 my @fulltitle = split("--", $title); 3040 $title = $fulltitle[0]; 3041 $subtitle= tidy($fulltitle[1]); 3042 } 3043 $programme{title} = [[tidy($title), $LANG] ] if (defined $title); 3044 $programme{'sub-title'}=[[tidy($subtitle), $LANG ]] if (defined $subtitle); 3045 3046 #put info in array 3047 push @programmes, {%programme}; 3048 } 3049 3050 if (scalar @programmes) { 3051 return (0, @programmes); 3052 } 3053 else { 3054 # there is a number of reasons why we could get an empty array. 3055 # so we return an error 3056 return (1, @programmes); 3057 } 3058} 3059 3060############################### 3061# mediaset_guidatv functions # 3062############################### 3063 3064#################################################### 3065# mediaset_guidatv_get_channels_list 3066sub mediaset_guidatv_get_channels_list { 3067 3068 ############################################### 3069 # l'unico url che ho trovato che contiene l'elenco di tutti i canali e' 3070 # http://www.mediaset.it/guidatv/palinsesto.xml 3071 # pero' e' un file di circa 2.5 mega, visto che questa funzione viene chiamata ad ogni 3072 # esecuzione del grabber preferisco fare anche qui una finta funzione che mi 3073 # ritorna un valore fisso 3074 # 3075 ## [honir] Don't like hard-coding variable data like this, so commented out 3076 ## 3077 if (0) { 3078 3079 my %chan_hash; 3080 3081 %chan_hash = ("BBC Knowledge", "EB", 3082 "Boing", "KB", 3083 "Canale 5", "C5", 3084 "Cartoonito", "LA", 3085 "Cartoon Network", "KN", 3086 "Discovery World", "ED", 3087 "Disney", "DY", 3088 "Disney Junior", "KP", 3089 "Eurosport 2", "EF", 3090 "Eurosport", "EE", 3091 "Fox Sports", "0F", 3092 "Fox Sports Plus", "0P", 3093 "Iris", "KI", 3094 "Italia 1", "I1", 3095 "Joi", "KJ", 3096 "La5", "KA", 3097 "Mediaset Extra", "KQ", 3098 "Mediaset Italia 2", "I2", 3099 "Mya", "KD", 3100 "Premium Action", "KS", 3101 "Premium Calcio", "KC", 3102 "Premium Cinema", "KE", 3103 "Premium Cinema Comedy", "LC", 3104 "Premium Cinema Emotion", "KO", 3105 "Premium Cinema Energy", "KG", 3106 "Premium Crime", "LR", 3107 "Retequattro", "R4", 3108 "Studio Universal", "KR", 3109 "TgCom24", "TG24", 3110 "Top Crime", "LT" 3111 ); 3112 3113 foreach (keys %chan_hash) { 3114 my $xmltv_id = xmltv_chanid('mediaset_guidatv', $_); 3115# warn $_."chan_hash-------------------"; 3116# warn $xmltv_id."xmltv_id-------------------"; 3117 $backend_info{mediaset_guidatv}{site_ids}{$xmltv_id}{site_id} = $_; 3118 } 3119 return %chan_hash; 3120 3121 } 3122 ################################################ fine fake sub 3123 3124 3125 my %chan_hash; 3126 3127 my $url = $backend_info{mediaset_guidatv}{base_chan}; 3128 warn ($DEF_LANG eq 'eng' ? 3129 "VERBOSE: Getting channel list from $url\n" : 3130 "VERBOSE: Scarico la lista dei canali da $url\n") if ($opt_verbose); 3131 3132 my $content; 3133 eval { $content = get_nice($url); }; 3134 if ($@) { #get_nice has died 3135 warn ($DEF_LANG eq 'eng' ? 3136 "VERBOSE: Cannot get mediaset_guidatv's channel list ($url). Site \\n" : 3137 "VERBOSE: Non sono riuscito a prendere la lista dei canali di mediaset_guidatv ($url). Il sito non funziona?\n") unless ($opt_quiet); 3138 return (); 3139 } 3140 3141 #split and parse 3142 my @events = split /<evento/, $content; 3143 foreach my $e (@events) { 3144 my ($c) = ( $e =~ /<channel>(.*?)<\/channel>/ ); 3145 my ($n) = ( $e =~ /<channelDescription>(.*?)<\/channelDescription>/ ); 3146 ($n) = ( $n =~ /<!\[CDATA\[(.*)\]\]>/ ) if defined $n; 3147 3148 next if (not defined $n); 3149 my $name = tidy($n); 3150 next if defined $chan_hash{$name}; 3151 3152 $chan_hash{$name} = "$c"; 3153 3154 #update backend info, in case this is a new channel not in channel_ids 3155 my $xmltv_id = xmltv_chanid('mediaset_guidatv', $name); 3156 $backend_info{mediaset_guidatv}{site_ids}{$xmltv_id}{site_id} = $name; 3157 $backend_info{mediaset_guidatv}{site_ids}{$xmltv_id}{chanid} = $chan_hash{$name}; 3158 } 3159 3160 return %chan_hash; 3161} 3162 3163 3164#################################################### 3165# mediaset_guidatv_fetch_data 3166# 2 parameters: xmltv_id of channel 3167# day offset 3168# returns an error or an array of data 3169 3170sub mediaset_guidatv_fetch_data { 3171 my ($xmltv_id, $offset) = @_; 3172 my $content; 3173 3174 my $site_id = $backend_info{mediaset_guidatv}{site_ids}{$xmltv_id}{site_id}; 3175# warn $site_id."site_id-----------------\n"; 3176 if (not defined $site_id) { 3177 warn ($DEF_LANG eq 'eng' ? 3178 "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" : 3179 "VERBOSE: \tQuesto sito non sa niente di $xmltv_id!\n" ) if ($opt_verbose); 3180 return (1, ()); 3181 } 3182 3183 my %chan = %{$backend_info{mediaset_guidatv}{channels}}; 3184 my $channel_name = $backend_info{mediaset_guidatv}{site_ids}{$xmltv_id}{site_id}; 3185# warn "test-".$channel_name."channel_name-----------------\n"; 3186 my $channel_num = $chan{$channel_name}; 3187# warn $channel_num."channel_num-----------------\n"; 3188# warn $chan{"Canale 5"}."channel_num Canale 5-----------------\n"; 3189# warn $chan{"Retequattro"}."channel_num Rete 4-----------------\n"; 3190# warn $chan{"Cartoonito"}."channel_num Cartoonito-----------------\n"; 3191 # build url to grab" 3192 3193 if (not defined $channel_num) { 3194 # if we get here it means that the site should have the channel (it's in channel_ids) 3195 # but for some reason we are missing it's site id (probably the site is down) 3196 # we return an error so that another backend will by used, if possible 3197 warn ($DEF_LANG eq 'eng' ? 3198 "VERBOSE: \tThis site appears to be down!\n" : 3199 "VERBOSE: \tQuesto sito non sembra funzionare!!\n") if ($opt_verbose); 3200 return (1, ()); 3201 } 3202 3203 my $date_grab = &DateCalc("today","+ ".$offset." days"); 3204 3205 3206 die ($DEF_LANG eq 'eng' ? 'date calculation failed' : 'errore di calcolo di data') if not defined $date_grab; 3207 $date_grab = UnixDate($date_grab, '%Y%m%d'); 3208 my $date_grab2 = UnixDate($date_grab, '%Y-%m-%d'); 3209# warn $date_grab2; 3210 my $url = $backend_info{mediaset_guidatv}{base_data}; 3211 3212 #to trick memoize into not caching data with add a string to the url, based on time, with hourly resolution 3213 #so if we redownload data within 5 minutes (we are within the same run) it comes from the cache 3214 #but if we download it tomorrow it doesn't. 3215 #this makes sense if you use the --cache option and you want to cache only the --slow data, to speed up things 3216 #when you grab data every two-three days, but you don't want to miss schedule changes 3217 #this string is ignored by the server 3218 if ($opt_cache_slow) { 3219 my $cachestring = "?pippo=".UnixDate("today","%Y%m%d%H"); 3220 $url.=$cachestring; 3221 } 3222 warn ($DEF_LANG eq 'eng' ? 3223 "VERBOSE: fetching $url\n" : 3224 "VERBOSE: scarico $url\n") if ($opt_verbose); 3225 3226 eval { $content=get_nice($url) }; 3227 if ($@) { #get_nice has died 3228 warn ($DEF_LANG eq 'eng' ? 3229 "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend mediaset_guidatv\n" : 3230 "VERBOSE: Errore nello scaricare $url, canale $xmltv_id, giorno $offset, fonte mediaset_guidatv\n") if ($opt_verbose); 3231 3232 # Indicate to the caller that we had problems 3233 return (1, ()); 3234 } 3235 3236 my @programmes = (); 3237 warn "VERBOSE: parsing...\n" if ($opt_verbose); 3238 3239 #split and parse 3240 my @p = split /<programmazione/, $content; 3241 foreach my $pr (@p) { 3242 next unless $pr=~/$date_grab2/;#cerca la programmazione del giorno selezionato 3243# next if $pr=~/<data>/; 3244# next if $pr=~/<listaeventi>/; 3245# warn $pr."-----------------\n"; 3246# warn $date_grab2."-----------------\n"; 3247# warn "|$1|$2|$3|$4|$5|\n"; 3248 my @lines = split /<evento/, $pr; 3249 my $lastline; 3250 foreach my $l(@lines) { 3251 next unless $l=~/<channel>$channel_num<\/channel>/; 3252# next if ($l=~/anno\/|paese\/|linkScheda|trafficLight\/|orafine|\!--sort|\!-- canale|\/durata|\/prg|\/canale|\/giorno|\/palinsesto/); 3253 3254 my %programme = (); 3255 my ($title, $time_start, $time_end, $guidatv_channel_num, $rerun, $first, $id, $anno, $nazione, 3256 $desc, $epnum, $subtitle, $category, $rating, $audio, $doppioaudio, $sottotitoli); 3257 3258# if ($l=~/<title><\!\[CDATA\[(.*?)\]\]<\/title>/){ 3259 if ($l=~/<displayTitle><!\[CDATA\[(.*?)\]\]><\/displayTitle>/){ 3260 $title=$1; 3261# warn "titolo: $title\n"; 3262 } 3263 if ($l=~/<title><!\[CDATA\[(.*?)\]\]><\/title>/){ 3264 $subtitle=$1; 3265 $subtitle =~ s/[ -]*\Q$title\E[ -]*//; # remove title from subtitle 3266 undef $subtitle if lc($subtitle) eq lc($title); # kill subtitle if the same as title 3267# warn "sottotitolo: $subtitle\n"; 3268 } 3269 if ($l=~/<startTime>(.*?)<\/startTime>/){ 3270 $time_start=$1; 3271# warn "time_start: $time_start\n"; 3272 } 3273 if ($l=~/<endTime>(.*?)<\/endTime>/){ 3274 $time_end=$1; 3275# warn "time_end: $time_end\n"; 3276 } 3277 if ($l=~/<channel>(.*?)<\/channel>/){ 3278 $guidatv_channel_num=$1; 3279# warn "channel_num: $guidatv_channel_num\n"; 3280 } 3281 if ($l=~/<genere> ?(.*?)<\/genere>/){ 3282 $category=$1; 3283# warn "category: $category\n"; 3284 } 3285 else { 3286# warn "linea sconosciuta !!$l!! \n"; 3287 } 3288 $lastline = $l; 3289 3290 $time_end=~s/:/\./; $time_start=~s/:/\./; 3291 my $next_day_end = 0; my $next_day_start = 0; 3292 if ($time_end < 6) { 3293 $next_day_end = 1; 3294 } 3295 if ($time_start < 6) { 3296 $next_day_start = 1; 3297 $next_day_end = 1; 3298 } 3299 $time_end=~s/\./:/; $time_start=~s/\./:/; 3300 $programme{stop}=xmltv_date($time_end, $offset + $next_day_end); 3301 3302 3303 # Three mandatory fields: title, start, channel. 3304 if (not defined $title) { 3305 warn ($DEF_LANG eq 'eng' ? 'no title found, skipping programme' : 'titolo non trovato, salto'); 3306 next; 3307 } 3308 $programme{title}=[[tidy($title), $LANG] ]; 3309 if (not defined $time_start) { 3310 warn ($DEF_LANG eq 'eng' ? "no start time for title $title, skipping programme" : "nessun orario di inizio per $title, salto"); 3311 next; 3312 } 3313 #$time_start=~s/://; 3314 $programme{start}=xmltv_date($time_start, $offset + $next_day_start); 3315 if (not defined $xmltv_id) { 3316 warn ($DEF_LANG eq 'eng' ? "no channel for programme $title at $time_start, skipping programme" : "canale non trovato per $title alle $time_start, salto"); 3317 next; 3318 } 3319 $programme{channel}=$xmltv_id; 3320 $programme{title} = [[tidy($title), $LANG] ] if (defined $title); 3321 $programme{date}= $anno if (defined $anno); 3322 $programme{premiere} = ['prima TV', $LANG ] if(defined $first and $first ne 'No'); 3323 #non mi e' chiaro come funziona 3324 #$programme{audio}= ['stereo', tidy($audio)] if (defined $audio); 3325 $programme{rating}=[[tidy($rating), $LANG ]] if (defined $rating); 3326 $programme{category}=[[tidy($category), $LANG ]] if (defined $category and $category ne ''); 3327 3328 $programme{'sub-title'}=[[$subtitle, $LANG] ] if (defined $subtitle and $subtitle ne ''); 3329 #push@{$programme->{'episode-num'}}, [$strseason, 'onscreen'] if ($strseason); 3330 #push@{$programme->{'episode-num'}}, [(defined $season ? ($season-1) : '').".".(defined $episode ? ($episode-1) : '').".0/1", 'xmltv_ns'] if ($strseason); 3331 3332 push (@{$programme{country}}, [$nazione, $LANG]) if ($nazione and $nazione ne '--'); 3333 $programme{desc}=[[tidy($desc), $LANG ]] if (defined $desc and $desc ne ''); 3334 3335 #put info in array 3336 push @programmes, {%programme}; 3337 3338 } 3339 } 3340 3341 if (scalar @programmes) { 3342 return (0, @programmes); 3343 } 3344 else { 3345 # there is a number of reasons why we could get an empty array. 3346 # so we return an error 3347 return (1, @programmes); 3348 } 3349} 3350 3351