1#!/usr/local/bin/perl -w
2#
3# Run lots of filter programs on lots of inputs and check the output
4# is as expected.  Stderr is checked if there is an 'expected_err'
5# file but we do not allow for filters that return an error code.  In
6# fact, they're not filters at all: we assume that each can take an
7# input filename and the --output option.
8#
9# -- Ed Avis, ed@membled.com, 2002-02-14
10# $Id: test_filters.t,v 1.43 2015/07/12 02:36:07 knowledgejunkie Exp $
11#
12
13use strict;
14use Getopt::Long;
15use File::Copy;
16use XMLTV::Usage <<END
17$0: test suite for filter programs
18usage: $0 [--tests-dir DIR] [--cmds-dir DIR] [--verbose] [--full] [cmd_regexp...]
19END
20;
21
22sub run( $$$$ );
23sub read_file( $ );
24
25# tv_to_latex depends on Lingua::Preferred and that module's behaviour
26# is influenced by the current language.
27#
28$ENV{LANG} = 'C';
29
30my $tests_dir = 't/data';     # directory test files live in
31die "no directory $tests_dir" if not -d $tests_dir;
32my $cmds_dir = 'blib/script'; # directory filter programs live in
33die "no directory $cmds_dir" if not -d $cmds_dir;
34my $verbose = 0;
35
36# Whether to run the full tests, or just a few.
37my $full = 0;
38
39GetOptions('tests-dir=s' => \$tests_dir, 'cmds-dir=s' => \$cmds_dir,
40	   'verbose' => \$verbose, 'full' => \$full)
41  or usage(0);
42
43if (not $full) {
44    warn "running small test suite, use $0 --full for the whole lot\n";
45}
46
47# Commands to run.  For each command and input file we have an
48# 'expected output' file to compare against.  Also each command has an
49# 'idempotent' flag.  If this is true then we check that (for example)
50# tv_cat | tv_cat has the same effect as tv_cat, for all input files.
51#
52# A list of pairs: the first element of the pair is a list of command
53# and arguments, the second is the idempotent flag.
54#
55my @cmds
56  = (
57     [ [ 'tv_cat'                                                ], 1 ],
58     [ [ 'tv_extractinfo_en'                                     ], 1 ],
59     # We assume that most usages of tv_grep are idempotent on the sample
60     # files given.  But see BUGS section of manual page.
61     [ [ 'tv_grep', '--channel-name', 'd'                        ], 1 ],
62     [ [ 'tv_grep', '--not', '--channel-name', 'd'               ], 1 ],
63     [ [ 'tv_sort'                                               ], 1 ],
64     [ [ 'tv_sort', '--by-channel'                               ], 1 ],
65     [ [ 'tv_to_latex'                                           ], 0 ],
66     [ [ 'tv_to_text',                                           ], 0 ],
67     [ [ 'tv_remove_some_overlapping'                            ], 1 ],
68     [ [ 'tv_grep', '--on-after', '200302161330 UTC'             ], 1 ],
69     [ [ 'tv_grep', '--on-before', '200302161330 UTC'            ], 1 ],
70    );
71
72if ($full) {
73    push @cmds,
74      (
75       [ [ 'tv_grep', '--channel', 'xyz', '--or', '--channel', 'b' ], 1 ],
76       [ [ 'tv_grep', '--channel', 'xyz', '--or', '--not', '--channel', 'b' ], 1 ],
77       [ [ 'tv_grep', '--previously-shown', ''                     ], 1 ],
78       [ [ 'tv_grep', 'a'                                          ], 1 ],
79       [ [ 'tv_grep', '--category', 'b'                            ], 1 ],
80       [ [ 'tv_grep', '-i', '--last-chance', 'c'                   ], 1 ],
81       [ [ 'tv_grep', '--premiere', ''                             ], 1 ],
82       [ [ 'tv_grep', '--new'                                      ], 1 ],
83       [ [ 'tv_grep', '--channel-id', 'channel4.com'               ], 1 ],
84       [ [ 'tv_grep', '--not', '--channel-id', 'channel4.com'      ], 1 ],
85       [ [ 'tv_grep', '--on-after', '2002-02-05 UTC'               ], 1 ],
86       [ [ 'tv_grep', '--eval', 'scalar keys %$_ > 5'              ], 0 ],
87       [ [ 'tv_grep', '--category', 'e', '--and', '--title', 'f'   ], 1 ],
88       [ [ 'tv_grep', '--category', 'g', '--or', '--title', 'h'    ], 1 ],
89       [ [ 'tv_grep', '-i', '--category', 'i', '--title', 'j'      ], 1 ],
90       [ [ 'tv_grep', '-i', '--category', 'i', '--title', 'h'      ], 1 ],
91      );
92}
93
94if (@ARGV) {
95    # Remaining arguments are regexps to match commands to run.
96    my @new_cmds;
97    my %seen;
98    foreach my $arg (@ARGV) {
99	foreach my $cmd (@cmds) {
100	    for (join(' ', @{$cmd->[0]})) {
101		push @new_cmds, $cmd if /$arg/ and not $seen{$_}++;
102	    }
103	}
104    }
105    die "no commands matched regexps: @ARGV" if not @new_cmds;
106    @cmds = @new_cmds;
107    print "running commands:\n", join("\n", map { join(' ', @{$_->[0]}) } @cmds), "\n";
108}
109
110# Input files we could use to build test command lines.
111my @inputs = <$tests_dir/*.xml>;
112my @inputs_gz = <$tests_dir/*.xml.gz>; s/\.gz$// foreach @inputs_gz;
113@inputs = sort (@inputs, @inputs_gz);
114die "no test cases (*.xml, *.xml.gz) found in $tests_dir"
115  if not @inputs;
116foreach (@inputs) {
117    s!^\Q$tests_dir\E/!!o or die;
118}
119
120# We want to test multiple input files.  But it would be way OTT to
121# test all permutations of all input files up to some length.  Instead
122# we pick all single files and a handful of pairs.
123#
124my @tests;
125
126# The input file empty.xml is special: we particularly like to use it
127# in tests.  Then there are another two files we refer to by name.
128#
129my $empty_input = 'empty.xml';
130foreach ($empty_input, 'simple.xml', 'x-whatever.xml') {
131    die "file $tests_dir/$_ not found" if not -f "$tests_dir/$_";
132}
133
134# We need to track the encoding of each input file so we don't try to
135# mix them on the same command line (not allowed).
136#
137my %input_encoding;
138foreach (@inputs) {
139    $input_encoding{$_} = ($_ eq 'test_livre.xml') ? 'ISO-8859-1' : 'UTF-8';
140}
141my %all_encodings = reverse %input_encoding;
142
143# For historical reasons we like to have certain files at the front of
144# the list.  Aargh, this is so horrible.
145#
146sub move_to_front( \@$ ) {
147    our @l; local *l = shift;
148    my $elem = shift;
149    my @r;
150    foreach (@l) {
151	if ($_ eq $elem) {
152	    unshift @r, $_;
153	}
154	else {
155	    push @r, $_;
156	}
157    }
158    @l = @r;
159}
160foreach ('dups.xml', 'clump.xml', 'amp.xml', $empty_input) {
161    move_to_front @inputs, $_;
162}
163
164# Add a test to the list.  Arguments are listref of filenames, and
165# optional name for this set of files.
166#
167sub add_test( $;$ ) {
168    my ($files, $name) = @_;
169    $name = join('_', @$files) if not defined $name;
170    my $enc;
171    foreach (@$files) {
172	if (defined $enc and $enc ne $input_encoding{$_}) {
173	    die 'trying to add test with two different encodings';
174	}
175	else {
176	    $enc = $input_encoding{$_};
177	}
178    }
179    push @tests, { inputs => $files, name => $name };
180}
181
182# A quick and effective test for each command is to run it on all the
183# input files at once.  But we have to segregate them by encoding.
184#
185my %used_enc_name;
186foreach my $enc (sort keys %all_encodings) {
187    (my $enc_name = $enc) =~ tr/[A-Za-z0-9]//dc;
188    die "cannot make name for encoding $enc"
189      if $enc_name eq '';
190    die "two encodings go to same name $enc_name"
191      if $used_enc_name{$enc_name}++;
192    my @files = grep { $input_encoding{$_} eq $enc } @inputs;
193    if (@files == 0) {
194	# Shouldn't happen.
195	die "strange, no files for $enc";
196    }
197    elsif (@files == 1) {
198	# No point adding this as it will be run as an individual
199	# test.
200	#
201    }
202    else {
203	add_test(\@files, "all_$enc_name");
204    }
205}
206
207# One important test is two empty files in the middle of the list.
208add_test([ $inputs[1], $empty_input, $empty_input, $inputs[2] ]);
209
210# Another special case we want to run every time.
211add_test([ 'simple.xml', 'x-whatever.xml' ]);
212
213# Another - check that duplicate channels are removed.
214add_test([ 'test.xml', 'test.xml' ]);
215
216if ($full) {
217    # Test some pairs of files, but not all possible pairs.
218    my $pair_limit = 4; die "too few inputs" if $pair_limit > @inputs;
219    foreach my $i (0 .. $pair_limit - 1) {
220	foreach my $j (0 .. $pair_limit - 1) {
221	    add_test([ $inputs[$i], $inputs[$j] ]);
222	}
223    }
224
225    # Then all the single files.
226    add_test([ $_ ]) foreach @inputs;
227}
228else {
229    # Check overlapping warning from tv_sort.  This ends up giving the
230    # input file to every command, not just tv_sort; oh well.
231    #
232    # Not needed in the case when $full is true because we test every
233    # individual file then.
234    #
235    add_test([ 'overlap.xml' ]);
236}
237
238# Any other environment needed (relative to $tests_dir)
239$ENV{PERL5LIB} .= ":..";
240
241my %seen;
242
243# Count total number of tests to run.
244my $num_tests = 0;
245foreach (@cmds) {
246    $num_tests += scalar @tests;
247    $num_tests += scalar @tests if $_->[1]; # idem. test
248}
249print "1..$num_tests\n";
250my $test_num = 0;
251foreach my $pair (@cmds) {
252    my ($cmd, $idem) = @$pair;
253    foreach my $test (@tests) {
254	my @test_inputs = @{$test->{inputs}};
255	++ $test_num;
256	my $test_name = join('_', @$cmd, $test->{name});
257	$test_name =~ tr/A-Za-z0-9/_/sc;
258	die "two tests munge to $test_name"
259	  if $seen{$test_name}++;
260
261	my @cmd = @$cmd;
262	my $base     = "$tests_dir/$test_name";
263	my $expected = "$base.expected";
264	my $out      = "$base.out";
265	my $err      = "$base.err";
266
267	# Gunzip automatically before testing, gzip back again
268	# afterwards.  Keys matter, values do not.
269	#
270	my (%to_gzip, %to_gunzip);
271	foreach (@test_inputs, $expected) {
272	    my $gz = "$_.gz";
273	    if (not -e and -e $gz) {
274		$to_gunzip{$gz}++ && die "$gz seen twice";
275		$to_gzip{$_}++ && die "$_ seen twice";
276	    }
277	}
278	system 'gzip', '-d', keys %to_gunzip if %to_gunzip;
279
280	# To unlink when tests are done - this hash can change.
281	# Again, only keys are important.  (FIXME should encapsulate
282	# as 'Set' datatype.)
283	#
284	my %to_unlink = ($out => undef, $err => undef);
285
286	my $out_content; # contents of $out, to be filled in later
287
288        # TODO File::Spec
289	$cmd[0] = "$cmds_dir/$cmd[0]";
290	$cmd[0] =~ s!/!\\!g if $^O eq 'MSWin32';
291	if ($verbose) {
292	    print STDERR "test $test_num: @cmd @test_inputs\n";
293	}
294
295	my @in = map { "$tests_dir/$_" } @test_inputs;
296	my $okay = run(\@cmd, \@in, $out, $err);
297	# assume: if $okay then -e $out.
298
299	my $have_expected = -e $expected;
300	if (not $okay) {
301	    print "not ok $test_num\n";
302	    delete $to_unlink{$out}; delete $to_unlink{$err};
303	}
304	elsif ($okay and not $have_expected) {
305	    # This should happen after adding a new test case, never
306	    # when just running the tests.
307	    #
308	    warn "creating $expected\n";
309	    copy($out, $expected)
310	      or die "cannot copy $out to $expected: $!";
311	    # Don't print any message - the test just 'did not run'.
312	}
313	elsif ($okay and $have_expected) {
314	    $out_content = read_file($out);
315	    my $expected_content = read_file($expected);
316
317	    if ($out_content ne $expected_content) {
318		warn "failure for @cmd @in, see $base.*\n";
319		print "not ok $test_num\n";
320		$okay = 0;
321		delete $to_unlink{$out}; delete $to_unlink{$err};
322	    }
323	    else {
324		# The output was correct: if there's also an 'expected
325		# error' file check that.  Otherwise we do not check
326		# what was printed on stderr.
327		#
328		my $expected_err = "$base.expected_err";
329		if (-e $expected_err) {
330		    my $err_content = read_file($err);
331		    my $expected_content = read_file($expected_err);
332
333		    if ($err_content ne $expected_content) {
334			warn "failure for stderr of @cmd @in, see $base.*\n";
335			print "not ok $test_num\n";
336			$okay = 0;
337			delete $to_unlink{$out}; delete $to_unlink{$err};
338		    }
339		    else {
340			print "ok $test_num\n";
341		    }
342		}
343		else {
344		    # Don't check stderr.
345		    print "ok $test_num\n";
346		}
347	    }
348	}
349	else { die }
350
351	if ($idem) {
352	    ++ $test_num;
353	    if ($verbose) {
354		print STDERR "test $test_num: ";
355		print STDERR "check that @cmd is idempotent on this input\n";
356	    }
357	    if ($okay) {
358		die if not -e $out;
359		# Run the command again, on its own output.
360		my $twice_out = "$base.twice_out";
361		my $twice_err = "$base.twice_err";
362		$to_unlink{$twice_out} = $to_unlink{$twice_err} = undef;
363
364		my $twice_okay = run(\@cmd, [ $out ], $twice_out, $twice_err);
365		# assume: if $twice_okay then -e $twice_out.
366
367		if (not $twice_okay) {
368		    print "not ok $test_num\n";
369		    delete $to_unlink{$out};
370		    delete $to_unlink{$twice_out};
371		    delete $to_unlink{$twice_err};
372		}
373		else {
374		    my $twice_out_content = read_file($twice_out);
375		    my $ok;
376		    if (not defined $out_content) {
377			warn "cannot run idempotence test for @cmd\n";
378			$ok = 0;
379		    }
380		    elsif ($twice_out_content ne $out_content) {
381			warn "failure for idempotence of @cmd, see $base.*\n";
382			$ok = 0;
383		    }
384		    else { $ok = 1 }
385
386		    if (not $ok) {
387			print "not ok $test_num\n";
388			delete $to_unlink{$out};
389			delete $to_unlink{$twice_out};
390			delete $to_unlink{$twice_err};
391		    }
392		    else {
393			print "ok $test_num\n";
394		    }
395		}
396	    }
397	    else {
398		warn "skipping idempotence test for @cmd on @test_inputs\n";
399		# Do not print 'ok' or 'not ok'.
400	    }
401	}
402
403	foreach (keys %to_unlink) {
404	    (not -e) or unlink or warn "cannot unlink $_: $!";
405	}
406	system 'gzip', keys %to_gzip if %to_gzip;
407    }
408}
409die "ran $test_num tests, expected to run $num_tests"
410  if $test_num != $num_tests;
411
412
413# run()
414#
415# Run a Perl command redirecting input and output.  This is not fully
416# general - it relies on the --output option working for redirecting
417# output.  (Don't know why I decided this, but it does.)
418#
419# Parameters:
420#   (ref to) list of command and arguments
421#   (ref to) list of input filenames
422#   output filename
423#   error output filename
424#
425# This routine is specialized to Perl stuff running during the test
426# suite; it has the necessary -Iwhatever arguments.
427#
428# Dies if error opening or closing files, or if the command is killed
429# by a signal.  Otherwise creates the output files, and returns
430# success or failure of the command.
431#
432sub run( $$$$ ) {
433    my ($cmd, $in, $out, $err) = @_; die if not defined $cmd;
434    my @cmd = (qw(perl -Iblib/arch -Iblib/lib), @$cmd,
435	       @$in,
436	       '--output', $out);
437
438    # Redirect stderr to file $err.
439    open(OLDERR, '>&STDERR') or die "cannot dup stderr: $!\n";
440    if (not open(STDERR, ">$err")) {
441	print OLDERR "cannot write to $err: $!\n";
442	exit(1);
443    }
444
445    # Run the command.
446    my $r = system(@cmd);
447
448    # Restore old stderr.
449    if (not close(STDERR)) {
450	print OLDERR "cannot close $err: $!\n";
451	exit(1);
452    }
453    if (not open(STDERR, ">&OLDERR")) {
454	print OLDERR "cannot dup stderr back again: $!\n";
455	exit(1);
456    }
457
458    # Check command return status.
459    if ($r) {
460	my ($status, $sig, $core) = ($? >> 8, $? & 127, $? & 128);
461	if ($sig) {
462	    die "@cmd killed by signal $sig, aborting";
463	}
464	warn "@cmd failed: $status, $sig, $core\n";
465	return 0;
466    }
467
468    return 1;
469}
470
471
472sub read_file( $ ) {
473    my $f = shift;
474    local $/ = undef;
475    local *FH;
476    open(FH, $f) or die "cannot open $f: $!";
477    my $content = <FH>;
478    close FH or die "cannot close $f: $!";
479    return $content;
480}
481