1#!/usr/local/bin/perl -w 2# 3# Run lots of filter programs on lots of inputs and check the output 4# is as expected. Stderr is checked if there is an 'expected_err' 5# file but we do not allow for filters that return an error code. In 6# fact, they're not filters at all: we assume that each can take an 7# input filename and the --output option. 8# 9# -- Ed Avis, ed@membled.com, 2002-02-14 10# $Id: test_filters.t,v 1.43 2015/07/12 02:36:07 knowledgejunkie Exp $ 11# 12 13use strict; 14use Getopt::Long; 15use File::Copy; 16use XMLTV::Usage <<END 17$0: test suite for filter programs 18usage: $0 [--tests-dir DIR] [--cmds-dir DIR] [--verbose] [--full] [cmd_regexp...] 19END 20; 21 22sub run( $$$$ ); 23sub read_file( $ ); 24 25# tv_to_latex depends on Lingua::Preferred and that module's behaviour 26# is influenced by the current language. 27# 28$ENV{LANG} = 'C'; 29 30my $tests_dir = 't/data'; # directory test files live in 31die "no directory $tests_dir" if not -d $tests_dir; 32my $cmds_dir = 'blib/script'; # directory filter programs live in 33die "no directory $cmds_dir" if not -d $cmds_dir; 34my $verbose = 0; 35 36# Whether to run the full tests, or just a few. 37my $full = 0; 38 39GetOptions('tests-dir=s' => \$tests_dir, 'cmds-dir=s' => \$cmds_dir, 40 'verbose' => \$verbose, 'full' => \$full) 41 or usage(0); 42 43if (not $full) { 44 warn "running small test suite, use $0 --full for the whole lot\n"; 45} 46 47# Commands to run. For each command and input file we have an 48# 'expected output' file to compare against. Also each command has an 49# 'idempotent' flag. If this is true then we check that (for example) 50# tv_cat | tv_cat has the same effect as tv_cat, for all input files. 51# 52# A list of pairs: the first element of the pair is a list of command 53# and arguments, the second is the idempotent flag. 54# 55my @cmds 56 = ( 57 [ [ 'tv_cat' ], 1 ], 58 [ [ 'tv_extractinfo_en' ], 1 ], 59 # We assume that most usages of tv_grep are idempotent on the sample 60 # files given. But see BUGS section of manual page. 61 [ [ 'tv_grep', '--channel-name', 'd' ], 1 ], 62 [ [ 'tv_grep', '--not', '--channel-name', 'd' ], 1 ], 63 [ [ 'tv_sort' ], 1 ], 64 [ [ 'tv_sort', '--by-channel' ], 1 ], 65 [ [ 'tv_to_latex' ], 0 ], 66 [ [ 'tv_to_text', ], 0 ], 67 [ [ 'tv_remove_some_overlapping' ], 1 ], 68 [ [ 'tv_grep', '--on-after', '200302161330 UTC' ], 1 ], 69 [ [ 'tv_grep', '--on-before', '200302161330 UTC' ], 1 ], 70 ); 71 72if ($full) { 73 push @cmds, 74 ( 75 [ [ 'tv_grep', '--channel', 'xyz', '--or', '--channel', 'b' ], 1 ], 76 [ [ 'tv_grep', '--channel', 'xyz', '--or', '--not', '--channel', 'b' ], 1 ], 77 [ [ 'tv_grep', '--previously-shown', '' ], 1 ], 78 [ [ 'tv_grep', 'a' ], 1 ], 79 [ [ 'tv_grep', '--category', 'b' ], 1 ], 80 [ [ 'tv_grep', '-i', '--last-chance', 'c' ], 1 ], 81 [ [ 'tv_grep', '--premiere', '' ], 1 ], 82 [ [ 'tv_grep', '--new' ], 1 ], 83 [ [ 'tv_grep', '--channel-id', 'channel4.com' ], 1 ], 84 [ [ 'tv_grep', '--not', '--channel-id', 'channel4.com' ], 1 ], 85 [ [ 'tv_grep', '--on-after', '2002-02-05 UTC' ], 1 ], 86 [ [ 'tv_grep', '--eval', 'scalar keys %$_ > 5' ], 0 ], 87 [ [ 'tv_grep', '--category', 'e', '--and', '--title', 'f' ], 1 ], 88 [ [ 'tv_grep', '--category', 'g', '--or', '--title', 'h' ], 1 ], 89 [ [ 'tv_grep', '-i', '--category', 'i', '--title', 'j' ], 1 ], 90 [ [ 'tv_grep', '-i', '--category', 'i', '--title', 'h' ], 1 ], 91 ); 92} 93 94if (@ARGV) { 95 # Remaining arguments are regexps to match commands to run. 96 my @new_cmds; 97 my %seen; 98 foreach my $arg (@ARGV) { 99 foreach my $cmd (@cmds) { 100 for (join(' ', @{$cmd->[0]})) { 101 push @new_cmds, $cmd if /$arg/ and not $seen{$_}++; 102 } 103 } 104 } 105 die "no commands matched regexps: @ARGV" if not @new_cmds; 106 @cmds = @new_cmds; 107 print "running commands:\n", join("\n", map { join(' ', @{$_->[0]}) } @cmds), "\n"; 108} 109 110# Input files we could use to build test command lines. 111my @inputs = <$tests_dir/*.xml>; 112my @inputs_gz = <$tests_dir/*.xml.gz>; s/\.gz$// foreach @inputs_gz; 113@inputs = sort (@inputs, @inputs_gz); 114die "no test cases (*.xml, *.xml.gz) found in $tests_dir" 115 if not @inputs; 116foreach (@inputs) { 117 s!^\Q$tests_dir\E/!!o or die; 118} 119 120# We want to test multiple input files. But it would be way OTT to 121# test all permutations of all input files up to some length. Instead 122# we pick all single files and a handful of pairs. 123# 124my @tests; 125 126# The input file empty.xml is special: we particularly like to use it 127# in tests. Then there are another two files we refer to by name. 128# 129my $empty_input = 'empty.xml'; 130foreach ($empty_input, 'simple.xml', 'x-whatever.xml') { 131 die "file $tests_dir/$_ not found" if not -f "$tests_dir/$_"; 132} 133 134# We need to track the encoding of each input file so we don't try to 135# mix them on the same command line (not allowed). 136# 137my %input_encoding; 138foreach (@inputs) { 139 $input_encoding{$_} = ($_ eq 'test_livre.xml') ? 'ISO-8859-1' : 'UTF-8'; 140} 141my %all_encodings = reverse %input_encoding; 142 143# For historical reasons we like to have certain files at the front of 144# the list. Aargh, this is so horrible. 145# 146sub move_to_front( \@$ ) { 147 our @l; local *l = shift; 148 my $elem = shift; 149 my @r; 150 foreach (@l) { 151 if ($_ eq $elem) { 152 unshift @r, $_; 153 } 154 else { 155 push @r, $_; 156 } 157 } 158 @l = @r; 159} 160foreach ('dups.xml', 'clump.xml', 'amp.xml', $empty_input) { 161 move_to_front @inputs, $_; 162} 163 164# Add a test to the list. Arguments are listref of filenames, and 165# optional name for this set of files. 166# 167sub add_test( $;$ ) { 168 my ($files, $name) = @_; 169 $name = join('_', @$files) if not defined $name; 170 my $enc; 171 foreach (@$files) { 172 if (defined $enc and $enc ne $input_encoding{$_}) { 173 die 'trying to add test with two different encodings'; 174 } 175 else { 176 $enc = $input_encoding{$_}; 177 } 178 } 179 push @tests, { inputs => $files, name => $name }; 180} 181 182# A quick and effective test for each command is to run it on all the 183# input files at once. But we have to segregate them by encoding. 184# 185my %used_enc_name; 186foreach my $enc (sort keys %all_encodings) { 187 (my $enc_name = $enc) =~ tr/[A-Za-z0-9]//dc; 188 die "cannot make name for encoding $enc" 189 if $enc_name eq ''; 190 die "two encodings go to same name $enc_name" 191 if $used_enc_name{$enc_name}++; 192 my @files = grep { $input_encoding{$_} eq $enc } @inputs; 193 if (@files == 0) { 194 # Shouldn't happen. 195 die "strange, no files for $enc"; 196 } 197 elsif (@files == 1) { 198 # No point adding this as it will be run as an individual 199 # test. 200 # 201 } 202 else { 203 add_test(\@files, "all_$enc_name"); 204 } 205} 206 207# One important test is two empty files in the middle of the list. 208add_test([ $inputs[1], $empty_input, $empty_input, $inputs[2] ]); 209 210# Another special case we want to run every time. 211add_test([ 'simple.xml', 'x-whatever.xml' ]); 212 213# Another - check that duplicate channels are removed. 214add_test([ 'test.xml', 'test.xml' ]); 215 216if ($full) { 217 # Test some pairs of files, but not all possible pairs. 218 my $pair_limit = 4; die "too few inputs" if $pair_limit > @inputs; 219 foreach my $i (0 .. $pair_limit - 1) { 220 foreach my $j (0 .. $pair_limit - 1) { 221 add_test([ $inputs[$i], $inputs[$j] ]); 222 } 223 } 224 225 # Then all the single files. 226 add_test([ $_ ]) foreach @inputs; 227} 228else { 229 # Check overlapping warning from tv_sort. This ends up giving the 230 # input file to every command, not just tv_sort; oh well. 231 # 232 # Not needed in the case when $full is true because we test every 233 # individual file then. 234 # 235 add_test([ 'overlap.xml' ]); 236} 237 238# Any other environment needed (relative to $tests_dir) 239$ENV{PERL5LIB} .= ":.."; 240 241my %seen; 242 243# Count total number of tests to run. 244my $num_tests = 0; 245foreach (@cmds) { 246 $num_tests += scalar @tests; 247 $num_tests += scalar @tests if $_->[1]; # idem. test 248} 249print "1..$num_tests\n"; 250my $test_num = 0; 251foreach my $pair (@cmds) { 252 my ($cmd, $idem) = @$pair; 253 foreach my $test (@tests) { 254 my @test_inputs = @{$test->{inputs}}; 255 ++ $test_num; 256 my $test_name = join('_', @$cmd, $test->{name}); 257 $test_name =~ tr/A-Za-z0-9/_/sc; 258 die "two tests munge to $test_name" 259 if $seen{$test_name}++; 260 261 my @cmd = @$cmd; 262 my $base = "$tests_dir/$test_name"; 263 my $expected = "$base.expected"; 264 my $out = "$base.out"; 265 my $err = "$base.err"; 266 267 # Gunzip automatically before testing, gzip back again 268 # afterwards. Keys matter, values do not. 269 # 270 my (%to_gzip, %to_gunzip); 271 foreach (@test_inputs, $expected) { 272 my $gz = "$_.gz"; 273 if (not -e and -e $gz) { 274 $to_gunzip{$gz}++ && die "$gz seen twice"; 275 $to_gzip{$_}++ && die "$_ seen twice"; 276 } 277 } 278 system 'gzip', '-d', keys %to_gunzip if %to_gunzip; 279 280 # To unlink when tests are done - this hash can change. 281 # Again, only keys are important. (FIXME should encapsulate 282 # as 'Set' datatype.) 283 # 284 my %to_unlink = ($out => undef, $err => undef); 285 286 my $out_content; # contents of $out, to be filled in later 287 288 # TODO File::Spec 289 $cmd[0] = "$cmds_dir/$cmd[0]"; 290 $cmd[0] =~ s!/!\\!g if $^O eq 'MSWin32'; 291 if ($verbose) { 292 print STDERR "test $test_num: @cmd @test_inputs\n"; 293 } 294 295 my @in = map { "$tests_dir/$_" } @test_inputs; 296 my $okay = run(\@cmd, \@in, $out, $err); 297 # assume: if $okay then -e $out. 298 299 my $have_expected = -e $expected; 300 if (not $okay) { 301 print "not ok $test_num\n"; 302 delete $to_unlink{$out}; delete $to_unlink{$err}; 303 } 304 elsif ($okay and not $have_expected) { 305 # This should happen after adding a new test case, never 306 # when just running the tests. 307 # 308 warn "creating $expected\n"; 309 copy($out, $expected) 310 or die "cannot copy $out to $expected: $!"; 311 # Don't print any message - the test just 'did not run'. 312 } 313 elsif ($okay and $have_expected) { 314 $out_content = read_file($out); 315 my $expected_content = read_file($expected); 316 317 if ($out_content ne $expected_content) { 318 warn "failure for @cmd @in, see $base.*\n"; 319 print "not ok $test_num\n"; 320 $okay = 0; 321 delete $to_unlink{$out}; delete $to_unlink{$err}; 322 } 323 else { 324 # The output was correct: if there's also an 'expected 325 # error' file check that. Otherwise we do not check 326 # what was printed on stderr. 327 # 328 my $expected_err = "$base.expected_err"; 329 if (-e $expected_err) { 330 my $err_content = read_file($err); 331 my $expected_content = read_file($expected_err); 332 333 if ($err_content ne $expected_content) { 334 warn "failure for stderr of @cmd @in, see $base.*\n"; 335 print "not ok $test_num\n"; 336 $okay = 0; 337 delete $to_unlink{$out}; delete $to_unlink{$err}; 338 } 339 else { 340 print "ok $test_num\n"; 341 } 342 } 343 else { 344 # Don't check stderr. 345 print "ok $test_num\n"; 346 } 347 } 348 } 349 else { die } 350 351 if ($idem) { 352 ++ $test_num; 353 if ($verbose) { 354 print STDERR "test $test_num: "; 355 print STDERR "check that @cmd is idempotent on this input\n"; 356 } 357 if ($okay) { 358 die if not -e $out; 359 # Run the command again, on its own output. 360 my $twice_out = "$base.twice_out"; 361 my $twice_err = "$base.twice_err"; 362 $to_unlink{$twice_out} = $to_unlink{$twice_err} = undef; 363 364 my $twice_okay = run(\@cmd, [ $out ], $twice_out, $twice_err); 365 # assume: if $twice_okay then -e $twice_out. 366 367 if (not $twice_okay) { 368 print "not ok $test_num\n"; 369 delete $to_unlink{$out}; 370 delete $to_unlink{$twice_out}; 371 delete $to_unlink{$twice_err}; 372 } 373 else { 374 my $twice_out_content = read_file($twice_out); 375 my $ok; 376 if (not defined $out_content) { 377 warn "cannot run idempotence test for @cmd\n"; 378 $ok = 0; 379 } 380 elsif ($twice_out_content ne $out_content) { 381 warn "failure for idempotence of @cmd, see $base.*\n"; 382 $ok = 0; 383 } 384 else { $ok = 1 } 385 386 if (not $ok) { 387 print "not ok $test_num\n"; 388 delete $to_unlink{$out}; 389 delete $to_unlink{$twice_out}; 390 delete $to_unlink{$twice_err}; 391 } 392 else { 393 print "ok $test_num\n"; 394 } 395 } 396 } 397 else { 398 warn "skipping idempotence test for @cmd on @test_inputs\n"; 399 # Do not print 'ok' or 'not ok'. 400 } 401 } 402 403 foreach (keys %to_unlink) { 404 (not -e) or unlink or warn "cannot unlink $_: $!"; 405 } 406 system 'gzip', keys %to_gzip if %to_gzip; 407 } 408} 409die "ran $test_num tests, expected to run $num_tests" 410 if $test_num != $num_tests; 411 412 413# run() 414# 415# Run a Perl command redirecting input and output. This is not fully 416# general - it relies on the --output option working for redirecting 417# output. (Don't know why I decided this, but it does.) 418# 419# Parameters: 420# (ref to) list of command and arguments 421# (ref to) list of input filenames 422# output filename 423# error output filename 424# 425# This routine is specialized to Perl stuff running during the test 426# suite; it has the necessary -Iwhatever arguments. 427# 428# Dies if error opening or closing files, or if the command is killed 429# by a signal. Otherwise creates the output files, and returns 430# success or failure of the command. 431# 432sub run( $$$$ ) { 433 my ($cmd, $in, $out, $err) = @_; die if not defined $cmd; 434 my @cmd = (qw(perl -Iblib/arch -Iblib/lib), @$cmd, 435 @$in, 436 '--output', $out); 437 438 # Redirect stderr to file $err. 439 open(OLDERR, '>&STDERR') or die "cannot dup stderr: $!\n"; 440 if (not open(STDERR, ">$err")) { 441 print OLDERR "cannot write to $err: $!\n"; 442 exit(1); 443 } 444 445 # Run the command. 446 my $r = system(@cmd); 447 448 # Restore old stderr. 449 if (not close(STDERR)) { 450 print OLDERR "cannot close $err: $!\n"; 451 exit(1); 452 } 453 if (not open(STDERR, ">&OLDERR")) { 454 print OLDERR "cannot dup stderr back again: $!\n"; 455 exit(1); 456 } 457 458 # Check command return status. 459 if ($r) { 460 my ($status, $sig, $core) = ($? >> 8, $? & 127, $? & 128); 461 if ($sig) { 462 die "@cmd killed by signal $sig, aborting"; 463 } 464 warn "@cmd failed: $status, $sig, $core\n"; 465 return 0; 466 } 467 468 return 1; 469} 470 471 472sub read_file( $ ) { 473 my $f = shift; 474 local $/ = undef; 475 local *FH; 476 open(FH, $f) or die "cannot open $f: $!"; 477 my $content = <FH>; 478 close FH or die "cannot close $f: $!"; 479 return $content; 480} 481