1#!/usr/bin/perl 2use version; 3our $API_VERSION = $Bio::SeqIO::msout::API_VERSION; 4use strict; 5use File::Path qw(mkpath rmtree); 6 7BEGIN { 8 use Bio::Root::Test; 9 10 test_begin( 11 -tests => 165, 12 -requires_modules => [q(Bio::SeqIO::msout)], 13 -requires_networking => 0 14 ); 15 16 use_ok('Bio::SeqIO::msout'); 17 18} 19 20# skip tests if the msout.pm module is too old. 21my $api_version = $Bio::SeqIO::msout::API_VERSION; 22cmp_ok( $api_version, '>=', qv('1.1.5'), 23 "Bio::SeqIO::msout is at least api version 1.1.5" ); 24 25test_file_1( 0, "msout/msout_infile1" ); # 23 tests 26test_file_2( 0, "msout/msout_infile2" ); # 22 tests 27test_file_3( 0, "msout/msout_infile3" ); # 17 tests 28 29# tests to run for api versions >= 1.1.6 30SKIP: { 31 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6), 22 32 unless $api_version >= qv('1.1.6'); 33 test_file_1( 0, q(msout/msout_infile4) ); 34} 35 36# tests to run for api versions >= 1.1.7 37SKIP: { 38 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.7), 4 39 unless $api_version >= qv('1.1.7'); 40 bad_test_file_1( 0, q(msout/bad_msout_infile1) ); # 2 tests 41 bad_test_file_2( 0, q(msout/bad_msout_infile2) ); # 2 tests 42} 43 44# tests to run for api version >= 1.1.8 45SKIP: { 46 skip q($Bio::SeqIO::msout::API_VERSION < 1.1.8), 75 47 unless $api_version >= qv('1.1.8'); 48 49 test_file_1( 0, "msout/msout_infile1", 100 ); 50 test_file_2( 0, "msout/msout_infile2", 10 ); 51 test_file_1( 0, q(msout/msout_infile4), 100 ); 52 bad_test_file_1( 0, q(msout/bad_msout_infile1), 1000 ); 53 bad_test_file_2( 0, q(msout/bad_msout_infile2), 1000 ); 54 bad_n_sites( 0, q(msout/msout_infile1) ); # 2 tests 55} 56 57sub create_dir { 58 59 my $dir = shift; 60 61 $dir = Bio::Root::Test::test_input_file($dir); 62 63 unless ( -d $dir ) { 64 mkpath($dir); 65 } 66} 67 68sub remove_dir { 69 70 my $dir = shift; 71 72 $dir = Bio::Root::Test::test_input_file($dir); 73 74 if ( -d $dir ) { 75 rmtree($dir); 76 } 77 else { warn "Tried to remove $dir, but it does not exist" } 78} 79 80sub test_file_1 { 81############################################################################## 82## Test file 1 83############################################################################## 84 85 my $gzip = shift; 86 my $infile = shift; 87 my $n_sites = shift; 88 $infile = Bio::Root::Test::test_input_file($infile); 89 90 # the files are now part of the git repo and don't have to be printed 91 # print_file1( $infile, $gzip ); 92 93 my $file_sequence = $infile; 94 if ($gzip) { 95 $file_sequence = "gzip -dc <$file_sequence |"; 96 } 97 my $msout = Bio::SeqIO->new( 98 -file => "$file_sequence", 99 -format => 'msout', 100 -n_sites => $n_sites, 101 ); 102 103 isa_ok( $msout, 'Bio::SeqIO::msout' ); 104 105 my $rh_base_conversion_table = $msout->get_base_conversion_table; 106 107 my %attributes = ( 108 RUNS => 3, 109 SEGSITES => 7, 110 N_SITES => $n_sites, 111 SEEDS => [qw(1 1 1)], 112 MS_INFO_LINE => 'ms 6 3 -s 7 -I 3 3 2 1', 113 TOT_RUN_HAPS => 6, 114 POPS => [qw(3 2 1)], 115 NEXT_RUN_NUM => 1, 116 LAST_READ_HAP_NUM => 0, 117 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)], 118 CURRENT_RUN_SEGSITES => 7 119 ); 120 121 foreach my $attribute ( keys %attributes ) { 122 my $func = lc($attribute); 123 124 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) { 125 $func = ucfirst($func); 126 } 127 128 $func = 'get_' . $func; 129 my @returns = $msout->$func(); 130 my ( $return, $got ); 131 132 # If there were more than one return value, then compare references to 133 # arrays instead of scalars 134 unless ( @returns > 1 ) { 135 $got = shift @returns; 136 } 137 else { $got = \@returns } 138 139 my $expected = $attributes{$attribute}; 140 141 if ( defined $got && defined $expected ) { 142 is_deeply( $got, $expected, "Get $attribute" ); 143 } 144 else { is_deeply( $got, $expected, "Get $attribute" ) } 145 } 146 147 # Testing next_hap at beginning of run 148 my @data_got = 149 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq ); 150 my @data_expected; 151 if ( !defined($n_sites) ) { 152 @data_expected = qw(1111111); 153 } 154 else { 155 @data_expected = 156 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000); 157 } 158 is_deeply( \@data_got, \@data_expected, 159 "Get next_hap at beginning of run" ); 160 161 # Testing next_hap after beginning of run 162 @data_got = 163 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq ); 164 if ( !defined($n_sites) ) { 165 @data_expected = qw(5555555); 166 } 167 else { 168 @data_expected = 169 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000); 170 } 171 is_deeply( \@data_got, \@data_expected, 172 "Get next_hap after beginning of run" ); 173 174 # Surprise test! testing msout::outgroup 175 my $outgroup = $msout->outgroup; 176 is( $outgroup, 1, "Testing msout::outgroup" ); 177 178 # Testing next_pop after beginning of pop 179 @data_got = 180 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop ); 181 if ( !defined($n_sites) ) { 182 @data_expected = qw(4444444); 183 } 184 else { 185 @data_expected = 186 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000); 187 } 188 is_deeply( \@data_got, \@data_expected, 189 "Get next_pop after beginning of pop" ); 190 191 # Testing next_pop at beginning of pop 192 @data_got = 193 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop ); 194 if ( !defined($n_sites) ) { 195 @data_expected = qw(4444444 5555555); 196 } 197 else { 198 @data_expected = 199 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000); 200 } 201 is_deeply( \@data_got, \@data_expected, 202 "Get next_pop at beginning of pop" ); 203 204 # Testing next_run after beginning of run 205 @data_got = 206 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run ); 207 if ( !defined($n_sites) ) { 208 @data_expected = qw(4444444); 209 } 210 else { 211 @data_expected = 212 qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000); 213 } 214 is_deeply( \@data_got, \@data_expected, 215 "Get next_run after beginning of run" ); 216 217 # Testing next_pop at beginning of run 218 @data_got = 219 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop ); 220 if ( !defined($n_sites) ) { 221 @data_expected = qw(5555555 5555555 5555555); 222 } 223 else { 224 @data_expected = 225 qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000); 226 } 227 is_deeply( \@data_got, \@data_expected, 228 "Get next_pop at beginning of run" ); 229 230 # Testing next_hap after pop 231 @data_got = $msout->get_next_hap; 232 @data_expected = qw(1010101); 233 is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" ); 234 235 # Testing next_run after pop and hap 236 @data_got = 237 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run ); 238 if ( !defined($n_sites) ) { 239 @data_expected = qw(1111111 1515151); 240 } 241 else { 242 @data_expected = 243 qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000); 244 } 245 is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" ); 246 247 # Testing next_run at beginning of run 248 @data_got = 249 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run ); 250 if ( !defined($n_sites) ) { 251 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151); 252 } 253 else { 254 @data_expected = 255 qw(1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000); 256 } 257 is_deeply( \@data_got, \@data_expected, 258 "Get next_run at beginning of run" ); 259 260 is( $msout->get_next_run_num, undef, 'have all lines been read?' ); 261} 262 263sub test_file_2 { 264############################################################################## 265## Test file 2 266############################################################################## 267 268 my $gzip = shift; 269 my $infile = shift; 270 my $n_sites = shift; 271 $infile = Bio::Root::Test::test_input_file($infile); 272 273 # the files are now part of the git repo and don't have to be printed 274 # print_file2( $infile, $gzip ); 275 276 my $file_sequence = $infile; 277 if ($gzip) { 278 $file_sequence = "gzip -dc <$file_sequence |"; 279 } 280 281 my $msout = Bio::SeqIO->new( 282 -file => "$file_sequence", 283 -format => 'msout', 284 -n_sites => $n_sites, 285 ); 286 287 isa_ok( $msout, 'Bio::SeqIO::msout' ); 288 289 my %attributes = ( 290 RUNS => 3, 291 SEGSITES => 7, 292 N_SITES => $n_sites, 293 SEEDS => [qw(1 1 1)], 294 MS_INFO_LINE => 'ms 6 3', 295 TOT_RUN_HAPS => 6, 296 POPS => 6, 297 NEXT_RUN_NUM => 1, 298 LAST_READ_HAP_NUM => 0, 299 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)], 300 CURRENT_RUN_SEGSITES => 7 301 ); 302 303 foreach my $attribute ( keys %attributes ) { 304 my $func = lc($attribute); 305 306 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) { 307 $func = ucfirst($func); 308 } 309 310 $func = 'get_' . $func; 311 my @returns = $msout->$func(); 312 my ( $return, $got ); 313 314 # If there were more than one return value, then compare references to 315 # arrays instead of scalars 316 unless ( @returns > 1 ) { 317 $got = shift @returns; 318 } 319 else { $got = \@returns } 320 321 my $expected = $attributes{$attribute}; 322 323 if ( defined $got && defined $expected ) { 324 is_deeply( $got, $expected, "Get $attribute" ); 325 } 326 else { is_deeply( $got, $expected, "Get $attribute" ) } 327 } 328 329 my $rh_base_conversion_table = $msout->get_base_conversion_table; 330 331 # Testing next_hap at beginning of run 332 my @data_got = $msout->get_next_hap; 333 my @data_expected = '1111111'; 334 is_deeply( \@data_got, \@data_expected, 335 "Get next_hap at beginning of run" ); 336 337 # Testing next_hap after beginning of run 338 @data_got = 339 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq ); 340 if ( !defined($n_sites) ) { 341 @data_expected = '5555555'; 342 } 343 else { 344 @data_expected = '5555055500'; 345 } 346 is_deeply( \@data_got, \@data_expected, 347 "Get next_hap after beginning of run" ); 348 349 # Surprise test! testing msout::outgroup 350 my $outgroup = $msout->outgroup; 351 is( $outgroup, 0, "Testing msout::outgroup" ); 352 353 # Testing next_pop after beginning of pop 354 @data_got = 355 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop ); 356 if ( !defined($n_sites) ) { 357 @data_expected = qw(4444444 4444444 5555555 4444444); 358 } 359 else { 360 @data_expected = qw(4444044400 4444044400 5555055500 4444044400); 361 } 362 is_deeply( \@data_got, \@data_expected, 363 "Get next_pop after beginning of pop" ); 364 365 # Testing next_pop at beginning of pop/run 366 @data_got = 367 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop ); 368 if ( !defined($n_sites) ) { 369 @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151); 370 } 371 else { 372 @data_expected = 373 qw(5555055500 5555055500 5555055500 1010010100 1111011100 1515015100); 374 } 375 is_deeply( \@data_got, \@data_expected, 376 "Get next_pop at beginning of pop/run" ); 377 378 # Testing next_run at beginning of run 379 @data_got = 380 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run ); 381 if ( !defined($n_sites) ) { 382 @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151); 383 } 384 else { 385 @data_expected = 386 qw(1414014100 1414014100 1515015100 1414014100 1515015100 1515015100); 387 } 388 is_deeply( \@data_got, \@data_expected, 389 "Get next_run at beginning of run" ); 390 391 # Testing next_hap at beginning of run 2 392 @data_got = 393 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq ); 394 if ( !defined($n_sites) ) { 395 @data_expected = '1515151'; 396 } 397 else { 398 @data_expected = '1515015100'; 399 } 400 is_deeply( \@data_got, \@data_expected, 401 "Get next_hap at beginning of run 2" ); 402 403 # Testing next_run after hap 404 @data_got = 405 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run ); 406 if ( !defined($n_sites) ) { 407 @data_expected = qw(5050505 5151515 5555555 5454545 5454545); 408 } 409 else { 410 @data_expected = 411 qw(5050050500 5151051500 5555055500 5454054500 5454054500); 412 } 413 is_deeply( \@data_got, \@data_expected, "Get next_run after hap" ); 414 415 is( $msout->get_next_run_num, 5, 'next run should be 5.' ); 416 417 # getting the last hap of the file via next hap 418 # Testing next_run after hap 419 @data_got = $msout->get_next_hap; 420 @data_expected = qw( 5555555 ); 421 is_deeply( \@data_got, \@data_expected, "Get last hap through next_hap" ); 422 423} 424 425sub test_file_3 { 426############################################################################## 427## Test file 3 428############################################################################## 429 430 my $gzip = shift; 431 my $infile = shift; 432 $infile = Bio::Root::Test::test_input_file($infile); 433 434 # the files are now part of the git repo and don't have to be printed 435 # print_file3( $infile, $gzip ); 436 437 my $file_sequence = $infile; 438 if ($gzip) { 439 $file_sequence = "gzip -dc <$file_sequence |"; 440 } 441 my $msout = Bio::SeqIO->new( 442 -file => "$file_sequence", 443 -format => 'msout', 444 ); 445 446 isa_ok( $msout, 'Bio::SeqIO::msout' ); 447 448 my $rh_base_conversion_table = $msout->get_base_conversion_table; 449 450 my %attributes = ( 451 RUNS => 1, 452 SEGSITES => 7, 453 SEEDS => [qw(1 1 1)], 454 MS_INFO_LINE => 'ms 3 1', 455 TOT_RUN_HAPS => 3, 456 POPS => 3, 457 NEXT_RUN_NUM => 1, 458 LAST_READ_HAP_NUM => 0, 459 POSITIONS => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)], 460 CURRENT_RUN_SEGSITES => 7 461 ); 462 463 foreach my $attribute ( keys %attributes ) { 464 my $func = lc($attribute); 465 466 if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) { 467 $func = ucfirst($func); 468 } 469 470 $func = 'get_' . $func; 471 my @returns = $msout->$func(); 472 my ( $return, $got ); 473 474 # If there were more than one return value, then compare references to 475 # arrays instead of scalars 476 unless ( @returns > 1 ) { 477 $got = shift @returns; 478 } 479 else { $got = \@returns } 480 481 my $expected = $attributes{$attribute}; 482 483 if ( defined $got && defined $expected ) { 484 is_deeply( $got, $expected, "Get $attribute" ); 485 } 486 else { is_deeply( $got, $expected, "Get $attribute" ) } 487 } 488 489 # Testing next_hap at beginning of run 490 my @data_got = 491 convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop ); 492 my @data_expected = qw(1111111 5555555 4444444); 493 is_deeply( \@data_got, \@data_expected, "Get next_pop at end of run" ); 494 495 is( $msout->get_next_run_num, undef, 'have all lines been read?' ); 496 497 # Testing what happens when we read from empty stream 498 @data_got = $msout->get_next_pop; 499 @data_expected = (); 500 is_deeply( \@data_got, \@data_expected, "Get next_pop at eof" ); 501 502 # Testing what happens when we read from empty stream 503 @data_got = $msout->get_next_run; 504 @data_expected = (); 505 is_deeply( \@data_got, \@data_expected, "Get next_run at eof" ); 506 507 # Testing what happens when we read from empty stream 508 @data_got = $msout->get_next_hap; 509 @data_expected = undef; 510 is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" ); 511 512 # Testing what happens when we read from empty stream 513 @data_got = $msout->get_next_seq; 514 @data_expected = (); 515 is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" ); 516 517} 518 519sub print_to_file { 520 my ( $ra_in, $out ) = @_; 521 open my $OUT, '>', $out or die "\nCould not write outfile '$out': $!\n"; 522 print $OUT ("@$ra_in"); 523 close $OUT; 524} 525 526sub convert_bases_to_nums { 527 528 my ( $rh_base_conversion_table, @seqs ) = @_; 529 530 my @out_seqstrings; 531 foreach my $seq (@seqs) { 532 my $seqstring = $seq->seq; 533 foreach my $base ( keys %{$rh_base_conversion_table} ) { 534 $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g; 535 } 536 push @out_seqstrings, $seqstring; 537 } 538 539 return @out_seqstrings; 540 541} 542 543sub bad_test_file_1 { 544############################################################################## 545## Bad Test file 1 546############################################################################## 547 548 # This sub tests to see if msout.pm will catch if the msinfo line's 549 # advertized haps are less than are actually in the file 550 551 my $gzip = shift; 552 my $infile = shift; 553 my $n_sites = shift; 554 $infile = test_input_file($infile); 555 556 my $file_sequence = $infile; 557 if ($gzip) { 558 $file_sequence = "gunzip -c <$file_sequence |"; 559 } 560 my $msout = Bio::SeqIO->new( 561 -file => "$file_sequence", 562 -format => 'msout', 563 -n_sites => $n_sites, 564 ); 565 566 isa_ok( $msout, 'Bio::SeqIO::msout' ); 567 568 throws_ok { $msout->get_next_run } 569qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/, 570 q(Caught error in bad msout file 1); 571 572} 573 574sub bad_test_file_2 { 575############################################################################## 576## Bad Test file 2 577############################################################################## 578 579 # This sub tests to see if msout.pm will catch if the msinfo line's 580 # advertized haps are more than are actually in the file 581 582 my $gzip = shift; 583 my $infile = shift; 584 my $n_sites = shift; 585 $infile = test_input_file($infile); 586 587 my $file_sequence = $infile; 588 if ($gzip) { 589 $file_sequence = "gunzip -c <$file_sequence |"; 590 } 591 my $msout = Bio::SeqIO->new( 592 -file => "$file_sequence", 593 -format => 'msout', 594 -n_sites => $n_sites, 595 ); 596 597 isa_ok( $msout, 'Bio::SeqIO::msout' ); 598 599 throws_ok { $msout->get_next_run } 600qr/\'\/\/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/, 601 q(Caught error in bad msout file 2); 602 603} 604 605sub bad_n_sites { 606############################################################################## 607## Bad n_sites 608############################################################################## 609 610 # this sub tests if msout.pm dies when n_sites is smaller than segsites 611 my $gzip = shift; 612 my $infile = shift; 613 $infile = Bio::Root::Test::test_input_file($infile); 614 615 my $file_sequence = $infile; 616 if ($gzip) { 617 $file_sequence = "gzip -dc <$file_sequence |"; 618 } 619 my $msout = Bio::SeqIO->new( 620 -file => "$file_sequence", 621 -format => 'msout', 622 ); 623 624 # test nsites -1 625 throws_ok { $msout->set_n_sites(-1) } qr|first argument needs to be a positive integer. argument supplied: -1|; 626 627 # test nsites smaller than next hap 628 $msout->set_n_sites(1); 629 throws_ok{$msout->get_next_seq} qr/n_sites needs to be at least the number of segsites of every run/, 'too few n_sites failed OK'; 630 631} 632