1#!/usr/bin/perl
2use version;
3our $API_VERSION = $Bio::SeqIO::msout::API_VERSION;
4use strict;
5use File::Path qw(mkpath rmtree);
6
7BEGIN {
8    use Bio::Root::Test;
9
10    test_begin(
11        -tests               => 165,
12        -requires_modules    => [q(Bio::SeqIO::msout)],
13        -requires_networking => 0
14    );
15
16    use_ok('Bio::SeqIO::msout');
17
18}
19
20# skip tests if the msout.pm module is too old.
21my $api_version = $Bio::SeqIO::msout::API_VERSION;
22cmp_ok( $api_version, '>=', qv('1.1.5'),
23    "Bio::SeqIO::msout is at least api version 1.1.5" );
24
25test_file_1( 0, "msout/msout_infile1" );    # 23 tests
26test_file_2( 0, "msout/msout_infile2" );    # 22 tests
27test_file_3( 0, "msout/msout_infile3" );    # 17 tests
28
29# tests to run for api versions >= 1.1.6
30SKIP: {
31    skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6), 22
32      unless $api_version >= qv('1.1.6');
33    test_file_1( 0, q(msout/msout_infile4) );
34}
35
36# tests to run for api versions >= 1.1.7
37SKIP: {
38    skip q($Bio::SeqIO::msout::API_VERSION < 1.1.7), 4
39      unless $api_version >= qv('1.1.7');
40    bad_test_file_1( 0, q(msout/bad_msout_infile1) ); # 2 tests
41    bad_test_file_2( 0, q(msout/bad_msout_infile2) ); # 2 tests
42}
43
44# tests to run for api version >= 1.1.8
45SKIP: {
46    skip q($Bio::SeqIO::msout::API_VERSION < 1.1.8), 75
47      unless $api_version >= qv('1.1.8');
48
49    test_file_1( 0, "msout/msout_infile1", 100 );
50    test_file_2( 0, "msout/msout_infile2", 10 );
51    test_file_1( 0, q(msout/msout_infile4), 100 );
52    bad_test_file_1( 0, q(msout/bad_msout_infile1), 1000 );
53    bad_test_file_2( 0, q(msout/bad_msout_infile2), 1000 );
54    bad_n_sites( 0, q(msout/msout_infile1) ); # 2 tests
55}
56
57sub create_dir {
58
59    my $dir = shift;
60
61    $dir = Bio::Root::Test::test_input_file($dir);
62
63    unless ( -d $dir ) {
64        mkpath($dir);
65    }
66}
67
68sub remove_dir {
69
70    my $dir = shift;
71
72    $dir = Bio::Root::Test::test_input_file($dir);
73
74    if ( -d $dir ) {
75        rmtree($dir);
76    }
77    else { warn "Tried to remove $dir, but it does not exist" }
78}
79
80sub test_file_1 {
81##############################################################################
82## Test file 1
83##############################################################################
84
85    my $gzip    = shift;
86    my $infile  = shift;
87    my $n_sites = shift;
88    $infile = Bio::Root::Test::test_input_file($infile);
89
90    # the files are now part of the git repo and don't have to be printed
91    #    print_file1( $infile, $gzip );
92
93    my $file_sequence = $infile;
94    if ($gzip) {
95        $file_sequence = "gzip -dc <$file_sequence |";
96    }
97    my $msout = Bio::SeqIO->new(
98        -file    => "$file_sequence",
99        -format  => 'msout',
100        -n_sites => $n_sites,
101    );
102
103    isa_ok( $msout, 'Bio::SeqIO::msout' );
104
105    my $rh_base_conversion_table = $msout->get_base_conversion_table;
106
107    my %attributes = (
108        RUNS                 => 3,
109        SEGSITES             => 7,
110        N_SITES              => $n_sites,
111        SEEDS                => [qw(1 1 1)],
112        MS_INFO_LINE         => 'ms 6 3 -s 7 -I 3 3 2 1',
113        TOT_RUN_HAPS         => 6,
114        POPS                 => [qw(3 2 1)],
115        NEXT_RUN_NUM         => 1,
116        LAST_READ_HAP_NUM    => 0,
117        POSITIONS            => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
118        CURRENT_RUN_SEGSITES => 7
119    );
120
121    foreach my $attribute ( keys %attributes ) {
122        my $func = lc($attribute);
123
124        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
125            $func = ucfirst($func);
126        }
127
128        $func = 'get_' . $func;
129        my @returns = $msout->$func();
130        my ( $return, $got );
131
132        # If there were more than one return value, then compare references to
133        # arrays instead of scalars
134        unless ( @returns > 1 ) {
135            $got = shift @returns;
136        }
137        else { $got = \@returns }
138
139        my $expected = $attributes{$attribute};
140
141        if ( defined $got && defined $expected ) {
142            is_deeply( $got, $expected, "Get $attribute" );
143        }
144        else { is_deeply( $got, $expected, "Get $attribute" ) }
145    }
146
147    # Testing next_hap at beginning of run
148    my @data_got =
149      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
150    my @data_expected;
151    if ( !defined($n_sites) ) {
152        @data_expected = qw(1111111);
153    }
154    else {
155        @data_expected =
156          qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000);
157    }
158    is_deeply( \@data_got, \@data_expected,
159        "Get next_hap at beginning of run" );
160
161    # Testing next_hap after beginning of run
162    @data_got =
163      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
164    if ( !defined($n_sites) ) {
165        @data_expected = qw(5555555);
166    }
167    else {
168        @data_expected =
169          qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
170    }
171    is_deeply( \@data_got, \@data_expected,
172        "Get next_hap after beginning of run" );
173
174    # Surprise test! testing msout::outgroup
175    my $outgroup = $msout->outgroup;
176    is( $outgroup, 1, "Testing msout::outgroup" );
177
178    # Testing next_pop after beginning of pop
179    @data_got =
180      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
181    if ( !defined($n_sites) ) {
182        @data_expected = qw(4444444);
183    }
184    else {
185        @data_expected =
186          qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
187    }
188    is_deeply( \@data_got, \@data_expected,
189        "Get next_pop after beginning of pop" );
190
191    # Testing next_pop at beginning of pop
192    @data_got =
193      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
194    if ( !defined($n_sites) ) {
195        @data_expected = qw(4444444 5555555);
196    }
197    else {
198        @data_expected =
199          qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
200    }
201    is_deeply( \@data_got, \@data_expected,
202        "Get next_pop at beginning of pop" );
203
204    # Testing next_run after beginning of run
205    @data_got =
206      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
207    if ( !defined($n_sites) ) {
208        @data_expected = qw(4444444);
209    }
210    else {
211        @data_expected =
212          qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
213    }
214    is_deeply( \@data_got, \@data_expected,
215        "Get next_run after beginning of run" );
216
217    # Testing next_pop at beginning of run
218    @data_got =
219      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
220    if ( !defined($n_sites) ) {
221        @data_expected = qw(5555555 5555555 5555555);
222    }
223    else {
224        @data_expected =
225          qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
226    }
227    is_deeply( \@data_got, \@data_expected,
228        "Get next_pop at beginning of run" );
229
230    # Testing next_hap after pop
231    @data_got      = $msout->get_next_hap;
232    @data_expected = qw(1010101);
233    is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" );
234
235    # Testing next_run after pop and hap
236    @data_got =
237      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
238    if ( !defined($n_sites) ) {
239        @data_expected = qw(1111111 1515151);
240    }
241    else {
242        @data_expected =
243          qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
244    }
245    is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" );
246
247    # Testing next_run at beginning of run
248    @data_got =
249      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
250    if ( !defined($n_sites) ) {
251        @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
252    }
253    else {
254        @data_expected =
255          qw(1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
256    }
257    is_deeply( \@data_got, \@data_expected,
258        "Get next_run at beginning of run" );
259
260    is( $msout->get_next_run_num, undef, 'have all lines been read?' );
261}
262
263sub test_file_2 {
264##############################################################################
265## Test file 2
266##############################################################################
267
268    my $gzip    = shift;
269    my $infile  = shift;
270    my $n_sites = shift;
271    $infile = Bio::Root::Test::test_input_file($infile);
272
273    # the files are now part of the git repo and don't have to be printed
274    #    print_file2( $infile, $gzip );
275
276    my $file_sequence = $infile;
277    if ($gzip) {
278        $file_sequence = "gzip -dc <$file_sequence |";
279    }
280
281    my $msout = Bio::SeqIO->new(
282        -file    => "$file_sequence",
283        -format  => 'msout',
284        -n_sites => $n_sites,
285    );
286
287    isa_ok( $msout, 'Bio::SeqIO::msout' );
288
289    my %attributes = (
290        RUNS                 => 3,
291        SEGSITES             => 7,
292        N_SITES              => $n_sites,
293        SEEDS                => [qw(1 1 1)],
294        MS_INFO_LINE         => 'ms 6 3',
295        TOT_RUN_HAPS         => 6,
296        POPS                 => 6,
297        NEXT_RUN_NUM         => 1,
298        LAST_READ_HAP_NUM    => 0,
299        POSITIONS            => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
300        CURRENT_RUN_SEGSITES => 7
301    );
302
303    foreach my $attribute ( keys %attributes ) {
304        my $func = lc($attribute);
305
306        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
307            $func = ucfirst($func);
308        }
309
310        $func = 'get_' . $func;
311        my @returns = $msout->$func();
312        my ( $return, $got );
313
314        # If there were more than one return value, then compare references to
315        # arrays instead of scalars
316        unless ( @returns > 1 ) {
317            $got = shift @returns;
318        }
319        else { $got = \@returns }
320
321        my $expected = $attributes{$attribute};
322
323        if ( defined $got && defined $expected ) {
324            is_deeply( $got, $expected, "Get $attribute" );
325        }
326        else { is_deeply( $got, $expected, "Get $attribute" ) }
327    }
328
329    my $rh_base_conversion_table = $msout->get_base_conversion_table;
330
331    # Testing next_hap at beginning of run
332    my @data_got      = $msout->get_next_hap;
333    my @data_expected = '1111111';
334    is_deeply( \@data_got, \@data_expected,
335        "Get next_hap at beginning of run" );
336
337    # Testing next_hap after beginning of run
338    @data_got =
339      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
340    if ( !defined($n_sites) ) {
341        @data_expected = '5555555';
342    }
343    else {
344        @data_expected = '5555055500';
345    }
346    is_deeply( \@data_got, \@data_expected,
347        "Get next_hap after beginning of run" );
348
349    # Surprise test! testing msout::outgroup
350    my $outgroup = $msout->outgroup;
351    is( $outgroup, 0, "Testing msout::outgroup" );
352
353    # Testing next_pop after beginning of pop
354    @data_got =
355      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
356    if ( !defined($n_sites) ) {
357        @data_expected = qw(4444444 4444444 5555555 4444444);
358    }
359    else {
360        @data_expected = qw(4444044400 4444044400 5555055500 4444044400);
361    }
362    is_deeply( \@data_got, \@data_expected,
363        "Get next_pop after beginning of pop" );
364
365    # Testing next_pop at beginning of pop/run
366    @data_got =
367      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
368    if ( !defined($n_sites) ) {
369        @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151);
370    }
371    else {
372        @data_expected =
373          qw(5555055500 5555055500 5555055500 1010010100 1111011100 1515015100);
374    }
375    is_deeply( \@data_got, \@data_expected,
376        "Get next_pop at beginning of pop/run" );
377
378    # Testing next_run at beginning of run
379    @data_got =
380      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
381    if ( !defined($n_sites) ) {
382        @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
383    }
384    else {
385        @data_expected =
386          qw(1414014100 1414014100 1515015100 1414014100 1515015100 1515015100);
387    }
388    is_deeply( \@data_got, \@data_expected,
389        "Get next_run at beginning of run" );
390
391    # Testing next_hap at beginning of run 2
392    @data_got =
393      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
394    if ( !defined($n_sites) ) {
395        @data_expected = '1515151';
396    }
397    else {
398        @data_expected = '1515015100';
399    }
400    is_deeply( \@data_got, \@data_expected,
401        "Get next_hap at beginning of run 2" );
402
403    # Testing next_run after hap
404    @data_got =
405      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
406    if ( !defined($n_sites) ) {
407        @data_expected = qw(5050505 5151515 5555555 5454545 5454545);
408    }
409    else {
410        @data_expected =
411          qw(5050050500 5151051500 5555055500 5454054500 5454054500);
412    }
413    is_deeply( \@data_got, \@data_expected, "Get next_run after hap" );
414
415    is( $msout->get_next_run_num, 5, 'next run should be 5.' );
416
417    # getting the last hap of the file via next hap
418    # Testing next_run after hap
419    @data_got      = $msout->get_next_hap;
420    @data_expected = qw( 5555555 );
421    is_deeply( \@data_got, \@data_expected, "Get last hap through next_hap" );
422
423}
424
425sub test_file_3 {
426##############################################################################
427## Test file 3
428##############################################################################
429
430    my $gzip    = shift;
431    my $infile  = shift;
432    $infile = Bio::Root::Test::test_input_file($infile);
433
434    # the files are now part of the git repo and don't have to be printed
435    #    print_file3( $infile, $gzip );
436
437    my $file_sequence = $infile;
438    if ($gzip) {
439        $file_sequence = "gzip -dc <$file_sequence |";
440    }
441    my $msout = Bio::SeqIO->new(
442        -file   => "$file_sequence",
443        -format => 'msout',
444    );
445
446    isa_ok( $msout, 'Bio::SeqIO::msout' );
447
448    my $rh_base_conversion_table = $msout->get_base_conversion_table;
449
450    my %attributes = (
451        RUNS                 => 1,
452        SEGSITES             => 7,
453        SEEDS                => [qw(1 1 1)],
454        MS_INFO_LINE         => 'ms 3 1',
455        TOT_RUN_HAPS         => 3,
456        POPS                 => 3,
457        NEXT_RUN_NUM         => 1,
458        LAST_READ_HAP_NUM    => 0,
459        POSITIONS            => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
460        CURRENT_RUN_SEGSITES => 7
461    );
462
463    foreach my $attribute ( keys %attributes ) {
464        my $func = lc($attribute);
465
466        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
467            $func = ucfirst($func);
468        }
469
470        $func = 'get_' . $func;
471        my @returns = $msout->$func();
472        my ( $return, $got );
473
474        # If there were more than one return value, then compare references to
475        # arrays instead of scalars
476        unless ( @returns > 1 ) {
477            $got = shift @returns;
478        }
479        else { $got = \@returns }
480
481        my $expected = $attributes{$attribute};
482
483        if ( defined $got && defined $expected ) {
484            is_deeply( $got, $expected, "Get $attribute" );
485        }
486        else { is_deeply( $got, $expected, "Get $attribute" ) }
487    }
488
489    # Testing next_hap at beginning of run
490    my @data_got =
491      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
492    my @data_expected = qw(1111111 5555555 4444444);
493    is_deeply( \@data_got, \@data_expected, "Get next_pop at end of run" );
494
495    is( $msout->get_next_run_num, undef, 'have all lines been read?' );
496
497    # Testing what happens when we read from empty stream
498    @data_got      = $msout->get_next_pop;
499    @data_expected = ();
500    is_deeply( \@data_got, \@data_expected, "Get next_pop at eof" );
501
502    # Testing what happens when we read from empty stream
503    @data_got      = $msout->get_next_run;
504    @data_expected = ();
505    is_deeply( \@data_got, \@data_expected, "Get next_run at eof" );
506
507    # Testing what happens when we read from empty stream
508    @data_got      = $msout->get_next_hap;
509    @data_expected = undef;
510    is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" );
511
512    # Testing what happens when we read from empty stream
513    @data_got      = $msout->get_next_seq;
514    @data_expected = ();
515    is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" );
516
517}
518
519sub print_to_file {
520    my ( $ra_in, $out ) = @_;
521    open my $OUT, '>', $out or die "\nCould not write outfile '$out': $!\n";
522    print $OUT ("@$ra_in");
523    close $OUT;
524}
525
526sub convert_bases_to_nums {
527
528    my ( $rh_base_conversion_table, @seqs ) = @_;
529
530    my @out_seqstrings;
531    foreach my $seq (@seqs) {
532        my $seqstring = $seq->seq;
533        foreach my $base ( keys %{$rh_base_conversion_table} ) {
534            $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
535        }
536        push @out_seqstrings, $seqstring;
537    }
538
539    return @out_seqstrings;
540
541}
542
543sub bad_test_file_1 {
544##############################################################################
545## Bad Test file 1
546##############################################################################
547
548    # This sub tests to see if msout.pm will catch if the msinfo line's
549    # advertized haps are less than are actually in the file
550
551    my $gzip    = shift;
552    my $infile  = shift;
553    my $n_sites = shift;
554    $infile = test_input_file($infile);
555
556    my $file_sequence = $infile;
557    if ($gzip) {
558        $file_sequence = "gunzip -c <$file_sequence |";
559    }
560    my $msout = Bio::SeqIO->new(
561        -file    => "$file_sequence",
562        -format  => 'msout',
563        -n_sites => $n_sites,
564    );
565
566    isa_ok( $msout, 'Bio::SeqIO::msout' );
567
568    throws_ok { $msout->get_next_run }
569qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/,
570      q(Caught error in bad msout file 1);
571
572}
573
574sub bad_test_file_2 {
575##############################################################################
576## Bad Test file 2
577##############################################################################
578
579    # This sub tests to see if msout.pm will catch if the msinfo line's
580    # advertized haps are more than are actually in the file
581
582    my $gzip    = shift;
583    my $infile  = shift;
584    my $n_sites = shift;
585    $infile = test_input_file($infile);
586
587    my $file_sequence = $infile;
588    if ($gzip) {
589        $file_sequence = "gunzip -c <$file_sequence |";
590    }
591    my $msout = Bio::SeqIO->new(
592        -file    => "$file_sequence",
593        -format  => 'msout',
594        -n_sites => $n_sites,
595    );
596
597    isa_ok( $msout, 'Bio::SeqIO::msout' );
598
599    throws_ok { $msout->get_next_run }
600qr/\'\/\/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/,
601      q(Caught error in bad msout file 2);
602
603}
604
605sub bad_n_sites {
606##############################################################################
607## Bad n_sites
608##############################################################################
609
610    # this sub tests if msout.pm dies when n_sites is smaller than segsites
611    my $gzip    = shift;
612    my $infile  = shift;
613    $infile = Bio::Root::Test::test_input_file($infile);
614
615    my $file_sequence = $infile;
616    if ($gzip) {
617        $file_sequence = "gzip -dc <$file_sequence |";
618    }
619    my $msout = Bio::SeqIO->new(
620            -file    => "$file_sequence",
621            -format  => 'msout',
622    );
623
624    # test nsites -1
625    throws_ok { $msout->set_n_sites(-1) } qr|first argument needs to be a positive integer. argument supplied: -1|;
626
627    # test nsites smaller than next hap
628    $msout->set_n_sites(1);
629    throws_ok{$msout->get_next_seq} qr/n_sites needs to be at least the number of segsites of every run/, 'too few n_sites failed OK';
630
631}
632