1# Routines for handling the 'clump index' associated with some 2# programmes. This is a way of working around missing information in 3# some listings sources by saying that two or more programmes share a 4# timeslot, they appear in a particular order, but we don't know the 5# exact time when one stops and the next begins. 6# 7# For example if the listings source gives at 11:00 'News; Weather' 8# then we know that News has start time 11:00 and clumpidx 0/2, while 9# Weather has start time 11:00 and clumpidx 1/2. We know that Weather 10# follows News, and they are both in the 11:00 timeslot, but not more 11# than that. 12# 13# This clumpidx stuff does its job, but it's ugly to deal with - as 14# demonstrated by the existence of this library. I plan to replace it 15# soonish. 16# 17# The purpose of this module is to let you alter or delete programmes 18# which are part of a clump without having to worry about updating the 19# others. The module exports routines for building a symmetric 20# 'relation' relating pairs of scalars; you should use that to relate 21# programmes which share a clump. Then after modifying a programme 22# which has a clumpidx set, call fix_clumps() passing in the relation, 23# and it will modify the other programmes in the clump. 24# 25# Again, this all works but a better mechanism is needed. 26# 27# $Id: Clumps.pm,v 1.16 2015/07/12 00:59:01 knowledgejunkie Exp $ 28# 29 30package XMLTV::Clumps; 31use XMLTV::Date; 32use Date::Manip; # no Date_Init(), that can be done by the app 33use Tie::RefHash; 34 35# Use Log::TraceMessages if installed. 36BEGIN { 37 eval { require Log::TraceMessages }; 38 if ($@) { 39 *t = sub {}; 40 *d = sub { '' }; 41 } 42 else { 43 *t = \&Log::TraceMessages::t; 44 *d = \&Log::TraceMessages::d; 45 } 46} 47 48# Won't Memoize, you can do that yourself. 49use base 'Exporter'; 50our @EXPORT_OK = qw(new_relation related relate unrelate nuke_from_rel 51 relatives clump_relation fix_clumps); 52 53sub new_relation(); 54sub related( $$$ ); 55sub relate( $$$ ); 56sub unrelate( $$$ ); 57sub nuke_from_rel( $$ ); 58sub relatives( $$ ); 59sub clump_relation( $ ); 60sub fix_clumps( $$$ ); 61sub check_same_channel( $ ); # private 62 63 64# Routines to handle a symmmetric 'relation'. This is used to keep 65# track of which programmes are sharing a clump so that fix_clumps() 66# can sort them out if needed. 67# 68# FIXME make this OO. 69# 70sub new_relation() { 71 die 'usage: new_relation()' if @_; 72 my %h; tie %h, 'Tie::RefHash'; 73 return \%h; 74} 75sub related( $$$ ) { 76 die 'usage: related(relation, a, b)' if @_ != 3; 77 my ($rel, $a, $b) = @_; 78 my $list = $rel->{$a}; 79 return 0 if not defined $list; 80 foreach (@$list) { 81 return 1 if "$_" eq "$b"; 82 } 83 return 0; 84} 85sub relate( $$$ ) { 86 die 'usage: related(relation, a, b)' if @_ != 3; 87 my ($rel, $a, $b) = @_; 88 unless (related($rel, $a, $b)) { 89 check_same_channel([$a, $b]); 90 push @{$rel->{$a}}, $b; 91 push @{$rel->{$b}}, $a; 92 } 93} 94sub unrelate( $$$ ) { 95 die 'usage: related(relation, a, b)' if @_ != 3; 96 my ($rel, $a, $b) = @_; 97 die unless related($rel, $a, $b) and related($rel, $b, $a); 98 @{$rel->{$a}} = grep { "$_" ne "$b" } @{$rel->{$a}}; 99 @{$rel->{$b}} = grep { "$_" ne "$a" } @{$rel->{$b}}; 100} 101sub nuke_from_rel( $$ ) { 102 die 'usage: nuke_from_rel(relation, a)' if @_ != 2; 103 my ($rel, $a) = @_; 104 die unless ref($rel) eq 'HASH'; 105 foreach (@{relatives($rel, $a)}) { 106 die unless related($rel, $a, $_); 107 unrelate($rel, $a, $_); 108 } 109 110 # Tidy up by removing from hash 111 die if defined $rel->{$a} and @{$rel->{$a}}; 112 delete $rel->{$a}; 113} 114sub relatives( $$ ) { 115 die 'usage: relatives(relation, a)' if @_ != 2; 116 my ($rel, $a) = @_; 117 die unless ref($rel) eq 'HASH'; 118 if ($rel->{$a}) { 119 return [ @{$rel->{$a}} ]; # make a copy 120 } 121 else { 122 return []; 123 } 124} 125 126 127# Private. Wrappers for Date::Manip and XMLTV::Date; 128sub pd( $ ) { 129 for ($_[0]) { 130 return undef if not defined; 131 return parse_date($_); 132 } 133} 134 135 136# Make a relation grouping together programmes sharing a clump. 137# 138# Parameter: reference to list of programmes 139# 140# Returns: a relation saying which programmes share clumps. 141# 142sub clump_relation( $ ) { 143 my $progs = shift; 144 my $related = new_relation(); 145 my %todo; 146 foreach (@$progs) { 147 my $clumpidx = $_->{clumpidx}; 148 next if not defined $clumpidx or $clumpidx eq '0/1'; 149 push @{$todo{$_->{channel}}->{pd($_->{start})}}, $_; 150 } 151 t 'updating $related from todo list'; 152 foreach my $ch (keys %todo) { 153 our %times; local *times = $todo{$ch}; 154 my $times = $todo{$ch}; 155 foreach my $t (keys %times) { 156 t "todo list for channel $ch, time $t"; 157 my @l = @{$times{$t}}; 158 t 'list of programmes: ' . d(\@l); 159 foreach my $ai (0 .. $#l) { 160 foreach my $bi ($ai+1 .. $#l) { 161 my $a = $l[$ai]; my $b = $l[$bi]; 162 t "$a and $b related"; 163 die if "$a" eq "$b"; 164 warn "$a, $b over-related" if related($related, $a, $b); 165 relate($related, $a, $b); 166 } 167 } 168 } 169 } 170 return $related; 171} 172 173 174# fix_clumps() 175# 176# When a programme sharing a clump has been modified or replaced, 177# patch things up so that other things in the clump are consistent. 178# 179# Parameters: 180# original programme 181# (ref to) list of new programmes resulting from it 182# clump relation 183# 184# Modifies the programme and others in its clump as necessary. 185# 186sub fix_clumps( $$$ ) { 187 die 'usage: fix_clumps(old programme, listref of replacements, clump relation)' if @_ != 3; 188 my ($orig, $new, $rel) = @_; 189 # Optimize common case. 190 return if not defined $orig->{clumpidx} or $orig->{clumpidx} eq '0/1'; 191 die if ref($rel) ne 'HASH'; 192 die if ref($new) ne 'ARRAY'; 193 our @new; local *new = $new; 194# local $Log::TraceMessages::On = 1; 195 t 'fix_clumps() ENTRY'; 196 t 'original programme: ' . d $orig; 197 t 'new programmes: ' . d \@new; 198 t 'clump relation: ' . d $rel; 199 200 sub by_start { Date_Cmp(pd($a->{start}), pd($b->{start})) } 201 sub by_clumpidx { 202 $a->{clumpidx} =~ m!^(\d+)/(\d+)$! or die; 203 my ($ac, $n) = ($1, $2); 204 $b->{clumpidx} =~ m!^(\d+)/$n$! or die; 205 my $bc = $1; 206 if ($ac == $bc) { 207 t 'do not sort: ' . d($a) . ' and ' . d($b); 208 warn "$a->{clumpidx} and $b->{clumpidx} do not sort"; 209 } 210 $ac <=> $bc; 211 } 212 sub by_date { 213 by_start($a, $b) 214 || by_clumpidx($a, $b) 215 || warn "programmes do not sort"; 216 } 217 218 my @relatives = @{relatives($rel, $orig)}; 219 if (not @relatives) { 220# local $Log::TraceMessages::On = 1; 221 t 'programme without relatives: ' . d $orig; 222 warn "programme has clumpidx of $orig->{clumpidx}, but cannot find others in same clump\n"; 223 return; 224 } 225 check_same_channel(\@relatives); 226 @relatives = sort by_date @relatives; 227 t 'relatives of orig (sorted): ' . d \@relatives; 228 check_same_channel(\@new); # could relax this later 229 t 'orig turned into: ' . d \@new; 230 231 t 'how many programmes has $prog been split into?'; 232 if (@new == 0) { 233 t 'deleted programme entirely!'; 234 nuke_from_rel($rel, $orig); 235 236 if (@relatives == 0) { 237 die; 238 } 239 elsif (@relatives == 1) { 240 delete $relatives[0]->{clumpidx}; 241 } 242 elsif (@relatives >= 2) { 243 # Just decrement the index of all following programmes. 244 my $orig_clumpidx = $orig->{clumpidx}; 245 $orig_clumpidx =~ /^(\d+)/ or die; 246 $orig_clumpidx = $1; 247 foreach (@relatives) { 248 my $rel_clumpidx = $_->{clumpidx}; 249 $rel_clumpidx =~ /^(\d+)/ or die; 250 $rel_clumpidx = $1; 251 -- $rel_clumpidx if $rel_clumpidx > $orig_clumpidx; 252 $_->{clumpidx} = "$rel_clumpidx/" . scalar @relatives; 253 } 254 } 255 else { die } 256 } 257 elsif (@new >= 1) { 258# local $Log::TraceMessages::On = 1; 259 t 'split into one or more programmes'; 260 @new = sort by_date @$new; 261 nuke_from_rel($rel, $orig); 262 263 if (@relatives) { 264 # Find where the original programme slotted into the clump 265 # and insert the new programmes there. 266 # 267 my @old_all = sort by_date ($orig, @relatives); 268 check_same_channel(\@old_all); 269 t 'old clump sorted by date (incl. orig): ' . d \@old_all; 270 @new = sort by_date @new; 271 t 'new shows sorted by date: ' . d \@new; 272 273 # Fix the start and end times of the other shows in the 274 # clump. The shows in @new may give different (narrower) 275 # times to the one show they came from, so that we have 276 # more information about the start and end times of the 277 # other shows in the clump. Eg 09:30 0/2 '09:30 AAA, 278 # 10:00 BBB' sharing a clump with 09:30 1/2 'CCC'. When 279 # the first programme gets split into two, we know that 280 # the start time for C must be 10:00 at the earliest. 281 # Clear? 282 # 283 # Keep around both parsed and unparsed versions of the 284 # same date, to keep timezone information. This needs to 285 # be handled better. 286 # 287 my $start_new_unp = $new->[0]->{start}; 288 my $start_new = pd($start_new_unp); 289 t "new shows start at $start_new"; 290 291 # The known stop time for @new is the last date 292 # mentioned. Eg if the last show ends at 10:00 we know 293 # @new as a whole ends at 10:00. But if the last show has 294 # no stop time but starts at 09:30 then we know @new as a 295 # whole ends at *at the earliest* 09:30. 296 # 297 my $stop_new; 298 foreach (reverse @new) { 299 foreach (pd($_->{start}), pd($_->{stop})) { 300 next if not defined; 301 if (not defined $stop_new 302 or Date_Cmp($_, $stop_new) > 0) { 303 $stop_new = $_; 304 } 305 } 306 } 307 t "lub of new shows is $stop_new"; 308 309 # However if other shows shared a clump, they do not start 310 # at the stop time of @new! They overlap with it. The 311 # shows coming later in the clump will have the same start 312 # time as the last show of @new. 313 # 314 # For example, two shows in a clump from 10:00 to 11:00. 315 # The first is split into something at 10:00 and something 316 # at 10:30. The second part of the original clump will 317 # now 'start' at 10:30 and overlap with the last of the 318 # new shows. 319 # 320 my $start_last_new_unp = $new[-1]->{start}; 321 my $start_last_new = pd($start_last_new_unp); 322 t 'last of the new programmes starts at: ' . d $start_last_new; 323 324 # Add the programmes coming before @new to the output. 325 # These should have stop times before @new's start. 326 # 327 my @new_all; 328 t 'add shows coming before replaced one'; 329 while (@old_all) { 330 my $old = shift @old_all; 331 last if $old eq $orig; 332 t "adding 'before' show: " . d $old; 333 die if not defined $old->{start}; 334 die if not defined $start_new; 335 die unless Date_Cmp(pd($old->{start}), $start_new) <= 0; 336 my $old_stop = pd($old->{stop}); 337 t 'has stop time: ' . d $old_stop; 338# if (defined $old_stop) { 339# die if not defined $stop_new; 340# die "stop time $old_stop of old programme is earlier than lub of new shows $stop_new" 341# if Date_Cmp($old_stop, $stop_new) < 0; 342# die "stop time $old_stop of old programme is earlier than start of new shows $start_new" 343# if Date_Cmp($old_stop, $start_new) < 0; 344# } 345 $old->{stop} = $start_new_unp; 346 t "set stop time to $old->{stop}"; 347 348 push @new_all, $old; 349 } 350 351 # Slot in the new programmes. 352 t 'got to orig show, slot in new programmes'; 353 push @new_all, @new; 354 t 'so far, list of new programmes: ' . d \@new_all; 355 356 # Now the shows at the end, after the programme which was 357 # split. 358 # 359 t 'do shows coming after the orig one'; 360 while (@old_all) { 361 my $old = shift @old_all; 362 t "doing 'after' show: " . d $old; 363 my $old_start = pd($old->{start}); 364 die if not defined $old_start; 365 t "current start time: $old_start"; 366 die if not defined $start_new; 367 die if not defined $stop_new; 368 die unless Date_Cmp($start_new, $old_start) <= 0; 369 die unless Date_Cmp($old_start, $stop_new) <= 0; 370 371 # These shows overlapped with the old programme. So 372 # now they will overlap with the last of the shows it 373 # was split into. 374 # 375 $old->{start} = $start_last_new_unp; 376 t "set start time to $old->{start}"; 377 t 'adding programme to list: ' . d $old; 378 379 push @new_all, $old; 380 } 381 382 t 'new list of programmes from original clump: ' . d \@new_all; 383 check_same_channel(\@new_all); 384 385 t 'now regenerate the clumpidxes'; 386 while (@new_all) { 387 my $first = shift @new_all; 388 t 'taking first programme from list: ' . d $first; 389 t 'building clump for this programme'; 390 my @clump = ($first); 391 my $start = pd($first->{start}); 392 die if not defined $start; 393 while (@new_all) { 394 my $next = shift @new_all; 395 die if not defined $next->{start}; 396 if (not Date_Cmp(pd($next->{start}), $start)) { 397 push @clump, $next; 398 } 399 else { 400 unshift @new_all, $next; 401 last; 402 } 403 } 404 t 'clump is: ' . d \@clump; 405 my $clump_size = scalar @clump; 406 t "$clump_size shows in clump"; 407 for (my $i = 0; $i < $clump_size; $i++) { 408 my $c = $clump[$i]; 409 if ($clump_size == 1) { 410 t 'deleting clumpidx from programme'; 411 delete $c->{clumpidx}; 412 } 413 else { 414 $c->{clumpidx} = "$i/$clump_size"; 415 t "set clumpidx for programme to $c->{clumpidx}"; 416 } 417 } 418 419 t 're-relating programmes in this clump (if more than one)'; 420 foreach my $a (@clump) { 421 foreach my $b (@clump) { 422 next if $a == $b; 423 relate($rel, $a, $b); 424 } 425 } 426 } 427 t 'finished regenerating clumpidxes'; 428 } 429 } 430} 431 432 433# Private. 434sub check_same_channel( $ ) { 435 my $progs = shift; 436 my $ch; 437 foreach my $prog (@$progs) { 438 for ($prog->{channel}) { 439 if (not defined) { 440 t 'no channel! ' . d $prog; 441 die 'programme has no channel'; 442 } 443 if (not defined $ch) { 444 $ch = $_; 445 } 446 elsif ($ch eq $_) { 447 # Okay. 448 } 449 else { 450 t 'same clump, different channels: ' . d($progs->[0]) . ' and ' . d($prog); 451 die "programmes in same clump have different channels: $_, $ch"; 452 } 453 } 454 } 455} 456 457 4581; 459