1# Routines for handling the 'clump index' associated with some
2# programmes.  This is a way of working around missing information in
3# some listings sources by saying that two or more programmes share a
4# timeslot, they appear in a particular order, but we don't know the
5# exact time when one stops and the next begins.
6#
7# For example if the listings source gives at 11:00 'News; Weather'
8# then we know that News has start time 11:00 and clumpidx 0/2, while
9# Weather has start time 11:00 and clumpidx 1/2.  We know that Weather
10# follows News, and they are both in the 11:00 timeslot, but not more
11# than that.
12#
13# This clumpidx stuff does its job, but it's ugly to deal with - as
14# demonstrated by the existence of this library.  I plan to replace it
15# soonish.
16#
17# The purpose of this module is to let you alter or delete programmes
18# which are part of a clump without having to worry about updating the
19# others.  The module exports routines for building a symmetric
20# 'relation' relating pairs of scalars; you should use that to relate
21# programmes which share a clump.  Then after modifying a programme
22# which has a clumpidx set, call fix_clumps() passing in the relation,
23# and it will modify the other programmes in the clump.
24#
25# Again, this all works but a better mechanism is needed.
26#
27# $Id: Clumps.pm,v 1.16 2015/07/12 00:59:01 knowledgejunkie Exp $
28#
29
30package XMLTV::Clumps;
31use XMLTV::Date;
32use Date::Manip; # no Date_Init(), that can be done by the app
33use Tie::RefHash;
34
35# Use Log::TraceMessages if installed.
36BEGIN {
37    eval { require Log::TraceMessages };
38    if ($@) {
39	*t = sub {};
40	*d = sub { '' };
41    }
42    else {
43	*t = \&Log::TraceMessages::t;
44	*d = \&Log::TraceMessages::d;
45    }
46}
47
48# Won't Memoize, you can do that yourself.
49use base 'Exporter';
50our @EXPORT_OK = qw(new_relation related relate unrelate nuke_from_rel
51		    relatives clump_relation fix_clumps);
52
53sub new_relation();
54sub related( $$$ );
55sub relate( $$$ );
56sub unrelate( $$$ );
57sub nuke_from_rel( $$ );
58sub relatives( $$ );
59sub clump_relation( $ );
60sub fix_clumps( $$$ );
61sub check_same_channel( $ ); # private
62
63
64# Routines to handle a symmmetric 'relation'.  This is used to keep
65# track of which programmes are sharing a clump so that fix_clumps()
66# can sort them out if needed.
67#
68# FIXME make this OO.
69#
70sub new_relation() {
71    die 'usage: new_relation()' if @_;
72    my %h; tie %h, 'Tie::RefHash';
73    return \%h;
74}
75sub related( $$$ ) {
76    die 'usage: related(relation, a, b)' if @_ != 3;
77    my ($rel, $a, $b) = @_;
78    my $list = $rel->{$a};
79    return 0 if not defined $list;
80    foreach (@$list) {
81	return 1 if "$_" eq "$b";
82    }
83    return 0;
84}
85sub relate( $$$ ) {
86    die 'usage: related(relation, a, b)' if @_ != 3;
87    my ($rel, $a, $b) = @_;
88    unless (related($rel, $a, $b)) {
89	check_same_channel([$a, $b]);
90	push @{$rel->{$a}}, $b;
91	push @{$rel->{$b}}, $a;
92    }
93}
94sub unrelate( $$$ ) {
95    die 'usage: related(relation, a, b)' if @_ != 3;
96    my ($rel, $a, $b) = @_;
97    die unless related($rel, $a, $b) and related($rel, $b, $a);
98    @{$rel->{$a}} = grep { "$_" ne "$b" } @{$rel->{$a}};
99    @{$rel->{$b}} = grep { "$_" ne "$a" } @{$rel->{$b}};
100}
101sub nuke_from_rel( $$ ) {
102    die 'usage: nuke_from_rel(relation, a)' if @_ != 2;
103    my ($rel, $a) = @_;
104    die unless ref($rel) eq 'HASH';
105    foreach (@{relatives($rel, $a)}) {
106	die unless related($rel, $a, $_);
107	unrelate($rel, $a, $_);
108    }
109
110    # Tidy up by removing from hash
111    die if defined $rel->{$a} and @{$rel->{$a}};
112    delete $rel->{$a};
113}
114sub relatives( $$ ) {
115    die 'usage: relatives(relation, a)' if @_ != 2;
116    my ($rel, $a) = @_;
117    die unless ref($rel) eq 'HASH';
118    if ($rel->{$a}) {
119	return [ @{$rel->{$a}} ]; # make a copy
120    }
121    else {
122	return [];
123    }
124}
125
126
127# Private.  Wrappers for Date::Manip and XMLTV::Date;
128sub pd( $ ) {
129    for ($_[0]) {
130	return undef if not defined;
131	return parse_date($_);
132    }
133}
134
135
136# Make a relation grouping together programmes sharing a clump.
137#
138# Parameter: reference to list of programmes
139#
140# Returns: a relation saying which programmes share clumps.
141#
142sub clump_relation( $ ) {
143    my $progs = shift;
144    my $related = new_relation();
145    my %todo;
146    foreach (@$progs) {
147	my $clumpidx = $_->{clumpidx};
148	next if not defined $clumpidx or $clumpidx eq '0/1';
149	push @{$todo{$_->{channel}}->{pd($_->{start})}}, $_;
150    }
151    t 'updating $related from todo list';
152    foreach my $ch (keys %todo) {
153	our %times; local *times = $todo{$ch};
154	my $times = $todo{$ch};
155	foreach my $t (keys %times) {
156	    t "todo list for channel $ch, time $t";
157	    my @l = @{$times{$t}};
158	    t 'list of programmes: ' . d(\@l);
159	    foreach my $ai (0 .. $#l) {
160		foreach my $bi ($ai+1 .. $#l) {
161		    my $a = $l[$ai]; my $b = $l[$bi];
162		    t "$a and $b related";
163		    die if "$a" eq "$b";
164		    warn "$a, $b over-related" if related($related, $a, $b);
165		    relate($related, $a, $b);
166		}
167	    }
168	}
169    }
170    return $related;
171}
172
173
174# fix_clumps()
175#
176# When a programme sharing a clump has been modified or replaced,
177# patch things up so that other things in the clump are consistent.
178#
179# Parameters:
180#   original programme
181#   (ref to) list of new programmes resulting from it
182#   clump relation
183#
184# Modifies the programme and others in its clump as necessary.
185#
186sub fix_clumps( $$$ ) {
187    die 'usage: fix_clumps(old programme, listref of replacements, clump relation)' if @_ != 3;
188    my ($orig, $new, $rel) = @_;
189    # Optimize common case.
190    return if not defined $orig->{clumpidx} or $orig->{clumpidx} eq '0/1';
191    die if ref($rel) ne 'HASH';
192    die if ref($new) ne 'ARRAY';
193    our @new; local *new = $new;
194#    local $Log::TraceMessages::On = 1;
195    t 'fix_clumps() ENTRY';
196    t 'original programme: ' . d $orig;
197    t 'new programmes: ' . d \@new;
198    t 'clump relation: ' . d $rel;
199
200    sub by_start { Date_Cmp(pd($a->{start}), pd($b->{start})) }
201    sub by_clumpidx {
202	$a->{clumpidx} =~ m!^(\d+)/(\d+)$! or die;
203	my ($ac, $n) = ($1, $2);
204	$b->{clumpidx} =~ m!^(\d+)/$n$! or die;
205	my $bc = $1;
206	if ($ac == $bc) {
207	    t 'do not sort: ' . d($a) . ' and ' . d($b);
208	    warn "$a->{clumpidx} and $b->{clumpidx} do not sort";
209	}
210	$ac <=> $bc;
211    }
212    sub by_date {
213	by_start($a, $b)
214	  || by_clumpidx($a, $b)
215	    || warn "programmes do not sort";
216    }
217
218    my @relatives = @{relatives($rel, $orig)};
219    if (not @relatives) {
220#	local $Log::TraceMessages::On = 1;
221	t 'programme without relatives: ' . d $orig;
222	warn "programme has clumpidx of $orig->{clumpidx}, but cannot find others in same clump\n";
223	return;
224    }
225    check_same_channel(\@relatives);
226    @relatives = sort by_date @relatives;
227    t 'relatives of orig (sorted): ' . d \@relatives;
228    check_same_channel(\@new); # could relax this later
229    t 'orig turned into: ' . d \@new;
230
231    t 'how many programmes has $prog been split into?';
232    if (@new == 0) {
233	t 'deleted programme entirely!';
234	nuke_from_rel($rel, $orig);
235
236	if (@relatives == 0) {
237	    die;
238	}
239	elsif (@relatives == 1) {
240	    delete $relatives[0]->{clumpidx};
241	}
242	elsif (@relatives >= 2) {
243	    # Just decrement the index of all following programmes.
244	    my $orig_clumpidx = $orig->{clumpidx};
245	    $orig_clumpidx =~ /^(\d+)/ or die;
246	    $orig_clumpidx = $1;
247	    foreach (@relatives) {
248		my $rel_clumpidx = $_->{clumpidx};
249		$rel_clumpidx =~ /^(\d+)/ or die;
250		$rel_clumpidx = $1;
251		-- $rel_clumpidx if $rel_clumpidx > $orig_clumpidx;
252		$_->{clumpidx} = "$rel_clumpidx/" . scalar @relatives;
253	    }
254	}
255	else { die }
256    }
257    elsif (@new >= 1) {
258#	local $Log::TraceMessages::On = 1;
259	t 'split into one or more programmes';
260	@new = sort by_date @$new;
261	nuke_from_rel($rel, $orig);
262
263	if (@relatives) {
264	    # Find where the original programme slotted into the clump
265	    # and insert the new programmes there.
266	    #
267	    my @old_all = sort by_date ($orig, @relatives);
268	    check_same_channel(\@old_all);
269	    t 'old clump sorted by date (incl. orig): ' . d \@old_all;
270	    @new = sort by_date @new;
271	    t 'new shows sorted by date: ' . d \@new;
272
273	    # Fix the start and end times of the other shows in the
274	    # clump.  The shows in @new may give different (narrower)
275	    # times to the one show they came from, so that we have
276	    # more information about the start and end times of the
277	    # other shows in the clump.  Eg 09:30 0/2 '09:30 AAA,
278	    # 10:00 BBB' sharing a clump with 09:30 1/2 'CCC'.  When
279	    # the first programme gets split into two, we know that
280	    # the start time for C must be 10:00 at the earliest.
281	    # Clear?
282	    #
283	    # Keep around both parsed and unparsed versions of the
284	    # same date, to keep timezone information.  This needs to
285	    # be handled better.
286	    #
287	    my $start_new_unp = $new->[0]->{start};
288	    my $start_new = pd($start_new_unp);
289	    t "new shows start at $start_new";
290
291	    # The known stop time for @new is the last date
292	    # mentioned.  Eg if the last show ends at 10:00 we know
293	    # @new as a whole ends at 10:00.  But if the last show has
294	    # no stop time but starts at 09:30 then we know @new as a
295	    # whole ends at *at the earliest* 09:30.
296	    #
297	    my $stop_new;
298	    foreach (reverse @new) {
299		foreach (pd($_->{start}), pd($_->{stop})) {
300		    next if not defined;
301		    if (not defined $stop_new
302			or Date_Cmp($_, $stop_new) > 0) {
303			$stop_new = $_;
304		    }
305		}
306	    }
307	    t "lub of new shows is $stop_new";
308
309	    # However if other shows shared a clump, they do not start
310	    # at the stop time of @new!  They overlap with it.  The
311	    # shows coming later in the clump will have the same start
312	    # time as the last show of @new.
313	    #
314	    # For example, two shows in a clump from 10:00 to 11:00.
315	    # The first is split into something at 10:00 and something
316	    # at 10:30.  The second part of the original clump will
317	    # now 'start' at 10:30 and overlap with the last of the
318	    # new shows.
319	    #
320	    my $start_last_new_unp = $new[-1]->{start};
321	    my $start_last_new = pd($start_last_new_unp);
322	    t 'last of the new programmes starts at: ' . d $start_last_new;
323
324	    # Add the programmes coming before @new to the output.
325	    # These should have stop times before @new's start.
326	    #
327	    my @new_all;
328	    t 'add shows coming before replaced one';
329	    while (@old_all) {
330		my $old = shift @old_all;
331		last if $old eq $orig;
332		t "adding 'before' show: " . d $old;
333		die if not defined $old->{start};
334		die if not defined $start_new;
335		die unless Date_Cmp(pd($old->{start}), $start_new) <= 0;
336		my $old_stop = pd($old->{stop});
337		t 'has stop time: ' . d $old_stop;
338# 		if (defined $old_stop) {
339# 		    die if not defined $stop_new;
340# 		    die "stop time $old_stop of old programme is earlier than lub of new shows $stop_new"
341# 		      if Date_Cmp($old_stop, $stop_new) < 0;
342# 		    die "stop time $old_stop of old programme is earlier than start of new shows $start_new"
343# 		      if Date_Cmp($old_stop, $start_new) < 0;
344# 		}
345		$old->{stop} = $start_new_unp;
346		t "set stop time to $old->{stop}";
347
348		push @new_all, $old;
349	    }
350
351	    # Slot in the new programmes.
352	    t 'got to orig show, slot in new programmes';
353	    push @new_all, @new;
354	    t 'so far, list of new programmes: ' . d \@new_all;
355
356	    # Now the shows at the end, after the programme which was
357	    # split.
358	    #
359	    t 'do shows coming after the orig one';
360	    while (@old_all) {
361		my $old = shift @old_all;
362		t "doing 'after' show: " . d $old;
363		my $old_start = pd($old->{start});
364		die if not defined $old_start;
365		t "current start time: $old_start";
366		die if not defined $start_new;
367		die if not defined $stop_new;
368		die unless Date_Cmp($start_new, $old_start) <= 0;
369		die unless Date_Cmp($old_start, $stop_new) <= 0;
370
371		# These shows overlapped with the old programme.  So
372		# now they will overlap with the last of the shows it
373		# was split into.
374		#
375		$old->{start} = $start_last_new_unp;
376		t "set start time to $old->{start}";
377		t 'adding programme to list: ' . d $old;
378
379		push @new_all, $old;
380	    }
381
382	    t 'new list of programmes from original clump: ' . d \@new_all;
383	    check_same_channel(\@new_all);
384
385	    t 'now regenerate the clumpidxes';
386	    while (@new_all) {
387		my $first = shift @new_all;
388		t 'taking first programme from list: ' . d $first;
389		t 'building clump for this programme';
390		my @clump = ($first);
391		my $start = pd($first->{start});
392		die if not defined $start;
393		while (@new_all) {
394		    my $next = shift @new_all;
395		    die if not defined $next->{start};
396		    if (not Date_Cmp(pd($next->{start}), $start)) {
397			push @clump, $next;
398		    }
399		    else {
400			unshift @new_all, $next;
401			last;
402		    }
403		}
404		t 'clump is: ' . d \@clump;
405		my $clump_size = scalar @clump;
406		t "$clump_size shows in clump";
407		for (my $i = 0; $i < $clump_size; $i++) {
408		    my $c = $clump[$i];
409		    if ($clump_size == 1) {
410			t 'deleting clumpidx from programme';
411			delete $c->{clumpidx};
412		    }
413		    else {
414			$c->{clumpidx} = "$i/$clump_size";
415			t "set clumpidx for programme to $c->{clumpidx}";
416		    }
417		}
418
419		t 're-relating programmes in this clump (if more than one)';
420		foreach my $a (@clump) {
421		    foreach my $b (@clump) {
422			next if $a == $b;
423			relate($rel, $a, $b);
424		    }
425		}
426	    }
427	    t 'finished regenerating clumpidxes';
428	}
429    }
430}
431
432
433# Private.
434sub check_same_channel( $ ) {
435    my $progs = shift;
436    my $ch;
437    foreach my $prog (@$progs) {
438	for ($prog->{channel}) {
439	    if (not defined) {
440		t 'no channel! ' . d $prog;
441		die 'programme has no channel';
442	    }
443	    if (not defined $ch) {
444		$ch = $_;
445	    }
446	    elsif ($ch eq $_) {
447		# Okay.
448	    }
449	    else {
450		t 'same clump, different channels: ' . d($progs->[0]) . ' and ' . d($prog);
451		die "programmes in same clump have different channels: $_, $ch";
452	    }
453	}
454    }
455}
456
457
4581;
459