1# -*-Perl-*- Test Harness script for Bioperl
2# $Id$
3
4use strict;
5
6BEGIN {
7    use Bio::Root::Test;
8
9    test_begin(-tests => 85);
10
11    use_ok('Bio::Seq::Quality');
12}
13
14use Bio::SeqIO;
15
16my $DEBUG = test_debug();
17
18# create some random sequence object with no id
19my $seqobj_broken = Bio::Seq::Quality->
20    new( -seq => "ATCGATCGA",
21	 );
22
23my $seqobj;
24lives_ok {
25    $seqobj = Bio::Seq::Quality->
26	new( -seq => "ATCGATCGA",
27	     -id  => 'QualityFragment-12',
28	     -accession_number => 'X78121',
29	     );
30};
31
32
33# create some random quality object with the same number of qualities
34# and the same identifiers
35my $string_quals = "10 20 30 40 50 40 30 20 10";
36my $qualobj;
37lives_ok {
38    $qualobj = Bio::Seq::Quality->
39	new( -qual => $string_quals,
40	     -id  => 'QualityFragment-12',
41	     -accession_number => 'X78121',
42	     );
43};
44
45# check to see what happens when you construct the Quality object
46ok my $swq1 = Bio::Seq::Quality->
47    new( -seq => "ATCGATCGA",
48	 -id  => 'QualityFragment-12',
49	 -accession_number => 'X78121',
50	 -qual	=>	$string_quals);
51
52
53print("Testing various weird constructors...\n") if $DEBUG;
54print("\ta) No ids, Sequence object, no quality...\n") if $DEBUG;
55
56# w for weird
57my $wswq1;
58lives_ok {
59    $wswq1 = Bio::Seq::Quality->
60	new( -seq  => "ATCGATCGA",
61	     -qual => "");
62};
63print $@ if $DEBUG;
64
65
66print("\tb) No ids, no sequence, quality object...\n") if $DEBUG;
67	# note that you must provide a alphabet for this one.
68$wswq1 = Bio::Seq::Quality->
69    new( -seq => "",
70	 -qual => $string_quals,
71	 -alphabet => 'dna'
72	 );
73
74print("\tc) Absolutely nothing. (HAHAHAHA)...\n") if $DEBUG;
75lives_ok {
76    $wswq1 = Bio::Seq::Quality->new( -seq => "",
77				     -qual => "",
78				     -alphabet => 'dna'
79				     );
80};
81
82
83print("\td) Absolutely nothing but an ID\n") if $DEBUG;
84lives_ok {
85    $wswq1 = Bio::Seq::Quality->
86	new( -seq => "",
87	     -qual => "",
88	     -alphabet => 'dna',
89	     -id => 'an object with no sequence and no quality but with an id'
90	     );
91};
92
93print("\td) No sequence, no quality, no ID...\n") if $DEBUG;
94warnings_like {
95    $wswq1 = Bio::Seq::Quality->
96	new( -seq  =>	"",
97	     -qual =>	"",
98	     -verbose => 0);
99} qr/not guess alphabet/i;
100
101print("Testing various methods and behaviors...\n") if $DEBUG;
102
103print("1. Testing the seq() method...\n") if $DEBUG;
104	print("\t1a) get\n") if $DEBUG;
105	my $original_seq = $swq1->seq();
106	is ($original_seq, "ATCGATCGA");
107	print("\t1b) set\n") if $DEBUG;
108	ok ($swq1->seq("AAAAAAAAAAAA"));
109	print("\t1c) get (again, to make sure the set was done.)\n") if $DEBUG;
110	is($swq1->seq(), "AAAAAAAAAAAA");
111	print("\tSetting the sequence back to the original value...\n") if $DEBUG;
112	$swq1->seq($original_seq);
113
114
115print("2. Testing the qual() method...\n") if $DEBUG;
116	print("\t2a) get\n") if $DEBUG;
117	my @qual = @{$swq1->qual()};
118	my $str_qual = join(' ',@qual);
119	is $str_qual, "10 20 30 40 50 40 30 20 10";
120	print("\t2b) set\n") if $DEBUG;
121	ok $swq1->qual("10 10 10 10 10");
122	print("\t2c) get (again, to make sure the set was done.)\n") if $DEBUG;
123	my @qual2 = @{$swq1->qual()};
124	my $str_qual2 = join(' ',@qual2);
125	is($str_qual2, "10 10 10 10 10 0 0 0 0"); ###!
126	print("\tSetting the quality back to the original value...\n") if $DEBUG;
127	$swq1->qual($str_qual);
128
129print("3. Testing the length() method...\n") if $DEBUG;
130	print("\t3a) When lengths are equal...\n") if $DEBUG;
131	is($swq1->length(), 9);
132	print("\t3b) When lengths are different\n") if $DEBUG;
133	$swq1->qual("10 10 10 10 10");
134	isnt ($swq1->length(), "DIFFERENT");
135
136
137print("6. Testing the subqual() method...\n") if $DEBUG;
138     my $t_subqual = "10 20 30 40 50 60 70 80 90";
139     $swq1->qual($t_subqual);
140     print("\t6d) Testing the subqual at the start (border condition)\n") if $DEBUG;
141     ok ('10 20 30' eq join(' ',@{$swq1->subqual(1,3)}));
142     print("\t6d) Testing the subqual at the end (border condition)\n") if $DEBUG;
143     ok ('70 80 90' eq join(' ',@{$swq1->subqual(7,9)}));
144     print("\t6d) Testing the subqual in the middle\n") if $DEBUG;
145     ok ('40 50 60' eq join(' ',@{$swq1->subqual(4,6)}));
146
147print("7. Testing cases where quality is zero...\n") if $DEBUG;
148$swq1 = Bio::Seq::Quality->new(-seq =>  'G',
149                               -qual => '0',
150			       );
151my $swq2 = Bio::Seq::Quality->new(-seq =>  'G',
152                                  -qual => '65',
153				  );
154is $swq1->length, $swq2->length;
155
156$swq1 = Bio::Seq::Quality->new(-seq =>  'GC',
157                               -qual => '0 0',
158			       );
159$swq2 = Bio::Seq::Quality->new(-seq =>  'GT',
160                               -qual => '65 0',
161			       );
162my $swq3 = Bio::Seq::Quality->new(-seq =>  'AG',
163                               -qual => '0 60',
164			       );
165is $swq1->length, $swq2->length;
166is $swq1->length, $swq3->length;
167
168
169#
170# end of test inherited from seqwithquality.t
171#
172#################################################################
173#
174# testing new functionality
175#
176
177my $qual = '0 1 2 3 4 5 6 7 8 9 11 12 13';
178my $trace = '0 5 10 15 20 25 30 35 40 45 50 55 60';
179
180ok my $seq = Bio::Seq::Quality->new
181    ( -qual => $qual,
182      -trace_indices => $trace,
183      -seq =>  'atcgatcgatcgt',
184      -id  => 'human_id',
185      -accession_number => 'S000012',
186      -verbose => $DEBUG >= 0 ? $DEBUG : 0
187);
188
189
190print("2. Testing the trace() method...\n") if $DEBUG;
191	print("\t2a) get\n") if $DEBUG;
192	my @trace = @{$seq->trace()};
193	my $str_trace = join(' ',@trace);
194	is $str_trace, $trace;
195	print("\t2b) set\n") if $DEBUG;
196	ok $seq->trace("10 10 10 10 10");
197	print("\t2c) get (again, to make sure the set was done.)\n") if $DEBUG;
198	my @trace2 = @{$seq->trace()};
199	my $str_trace2 = join(' ',@trace2);
200	is($str_trace2, "10 10 10 10 10 0 0 0 0 0 0 0 0"); ###!
201	print("\tSetting the trace back to the original value...\n") if $DEBUG;
202	$seq->trace($trace);
203
204
205
206is_deeply $seq->qual, [split / /, $qual];
207is_deeply $seq->trace, [split / /, $trace];
208is_deeply $seq->trace_indices, [split / /, $trace]; #deprecated
209
210is $seq->qual_text, $qual;
211is $seq->trace_text, $trace;
212
213is join (' ', @{$seq->subqual(2, 3)}), '1 2';
214is $seq->subqual_text(2, 3), '1 2';
215is join (' ', @{$seq->subqual(2, 3, "9 9")}), '9 9';
216is $seq->subqual_text(2, 3, "8 8"), '8 8';
217
218is join (' ', @{$seq->subtrace(2, 3)}), '5 10';
219is $seq->subtrace_text(2, 3), '5 10';
220is join (' ', @{$seq->subtrace(2, 3, "9 9")}), '9 9';
221is $seq->subtrace_text(2, 3, "8 8"), '8 8';
222
223
224is $seq->trace_index_at(5), 20;
225is join(' ', @{$seq->sub_trace_index(5,6)}), "20 25";
226
227is $seq->baseat(2), 't';
228is $seq->baseat(3), 'c';
229is $seq->baseat(4), 'g';
230is $seq->baseat(5), 'a';
231
232
233#############################################
234#
235# same tests using Seq::Meta::Array methods follow ...
236#
237
238my $meta = '0 1 2 3 4 5 6 7 8 9 11 12';
239$trace = '0 5 10 15 20 25 30 35 40 45 50 55';
240my @trace_array = qw(0 5 10 15 20 25 30 35 40 45 50 55);
241
242ok $seq = Bio::Seq::Quality->new
243    ( -meta => $meta,
244      -seq =>  'atcgatcgatcg',
245      -id  => 'human_id',
246      -accession_number => 'S000012',
247      -verbose => $DEBUG >= 0 ? $DEBUG : 0
248);
249
250$seq->named_meta('trace', \@trace_array);
251
252is_deeply $seq->meta, [split / /, $meta];
253is_deeply $seq->named_meta('trace'), [split / /, $trace];
254
255is $seq->meta_text, $meta;
256is $seq->named_meta_text('trace'), $trace;
257
258is join (' ', @{$seq->submeta(2, 3)}), '1 2';
259is $seq->submeta_text(2, 3), '1 2';
260is join (' ', @{$seq->submeta(2, 3, "9 9")}), '9 9';
261is $seq->submeta_text(2, 3, "8 8"), '8 8';
262
263is join (' ', @{$seq->named_submeta('trace', 2, 3)}), '5 10';
264is $seq->named_submeta_text('trace', 2, 3), '5 10';
265is join (' ', @{$seq->named_submeta('trace', 2, 3, "9 9")}), '9 9';
266is $seq->named_submeta_text('trace', 2, 3, "8 8"), '8 8';
267
268
269ok $seq = Bio::Seq::Quality->new(
270    -seq => "ATGGGGGTGGTGGTACCCTATGGGGGTGGTGGTACCCT",
271    -qual => "10 59 12 75 63 76 84 36 42 10 35 97 81 50 81 53 93 13 38 10 59 12 75 63 76 84 36 42 10 35 97 81 50 81 53 93 13 38",
272    -trace_indices => "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38"
273);
274
275my $rev;
276ok $rev = $seq->revcom;
277is $rev->seq, 'AGGGTACCACCACCCCCATAGGGTACCACCACCCCCAT';
278is $rev->qual_text, "38 13 93 53 81 50 81 97 35 10 42 36 84 76 63 75 12 59 10 38 13 93 53 81 50 81 97 35 10 42 36 84 76 63 75 12 59 10";
279
280
281# selecting ranges based on quality
282
283# test seq with three high quality regions (13, 12 and 3), one very short (3)
284ok $seq = Bio::Seq::Quality->new(
285    -seq => "ATGGGGGTGGTGGTACCCTATGGGGGTGGTGGTACCCT",
286    -qual => "0 5 10 20 30 40 40 50 50 50 50 50 40 10 10 10 5 5 20 20 30 40 50 44 44 50 50 50 50 50 5 5 40 40 40 40 50 50"
287    );
288
289
290is $seq->threshold, undef;
291is $seq->threshold(10), 10;
292is $seq->threshold(13), 13;
293
294is $seq->count_clear_ranges, 3;
295
296my $newseq = $seq->get_clear_range;
297is $newseq->length, 12;
298
299
300my @ranges = $seq->get_all_clean_ranges;
301is scalar @ranges, 3;
302my $min_length = 10;
303@ranges = $seq->get_all_clean_ranges($min_length);
304is scalar @ranges, 2;
305
306my $seqio = Bio::SeqIO->new(
307    -file   => test_input_file('test_clear_range.fastq'),
308    -format => 'fastq'
309);
310
311while ( my $seq = $seqio->next_seq() ) {
312    my $newqualobj;
313    lives_ok { $newqualobj = $seq->get_clear_range(0) };
314    if ($newqualobj) {
315        is($newqualobj->id, $seq->id, 'Bug 2845');
316    } else {
317        ok(0, "No object returned via get_clear_range()");
318    }
319}
320
321
322
323
324#############################################
325#
326# try testing some 'meta morphic relations'
327#
328
329## belief; As the threshold is increased, the number of clear ranges
330## (ncr) should not decrease.
331
332## belief; As the thrshold is increased, the length of the clear
333## ranges (lcr) should not decrease.
334
335## belief; As the threshold is incrazed, the clear range length (clr)
336## should not increase. Sorry for the terribe var names.
337
338## belief; The number of clear ranges should vary between zero and
339## half the sequence length.
340
341## belief; The length of the clear ranges should vary between zero and
342## the sequence length.
343
344## belief; The length of the clear range should vary between zero and
345## the sequence length.
346
347## belief; The lenght of the clear range should not be larger than the
348## length of hte clear ranges.
349
350
351my @bases = qw (A T C G a t c g);
352my @qualities = 0..65;
353
354
355## See beliefs above:
356my $ncr_thresh_sanity = 0;
357my $lcr_thresh_sanity = 0;
358my $clr_thresh_sanity = 0;
359
360my $ncr_range_sanity = 0;
361my $lcr_range_sanity = 0;
362my $clr_range_sanity = 0;
363
364my $final_loss_of_sanity = 0;
365
366
367
368## Go time:
369
370for (1..100){
371    $seq = join("", map {$bases[rand(@bases)]} 1..1000  );
372    $qual = join(" ", map {$qualities[rand(@qualities)]} 1..1000  );
373
374    $seq = Bio::Seq::Quality->
375	new(
376	    -seq => $seq,
377	    -qual => $qual,
378	    );
379
380    $seq->threshold(10);
381    my $a_ncr = $seq->count_clear_ranges;
382    my $a_lcr = $seq->clear_ranges_length;
383    my $a_clr = scalar(@{$seq->get_clear_range->qual});
384
385    $ncr_range_sanity ++ if $a_ncr >= 0 && $a_ncr <=  500;
386    $lcr_range_sanity ++ if $a_lcr >= 0 && $a_lcr <= 1000;
387    $clr_range_sanity ++ if $a_clr >= 0 && $a_clr <= 1000;
388    $final_loss_of_sanity ++ if $a_lcr >= $a_clr;
389
390    $seq->threshold(20);
391    my $b_ncr = $seq->count_clear_ranges;
392    my $b_lcr = $seq->clear_ranges_length;
393    my $b_clr = scalar(@{$seq->get_clear_range->qual});
394
395    $ncr_range_sanity ++ if $b_ncr >= 0 && $b_ncr <=  500;
396    $lcr_range_sanity ++ if $b_lcr >= 0 && $b_lcr <= 1000;
397    $clr_range_sanity ++ if $b_clr >= 0 && $b_clr <= 1000;
398    $final_loss_of_sanity ++ if $b_lcr >= $b_clr;
399
400
401    $seq->threshold(30);
402    my $c_ncr = $seq->count_clear_ranges;
403    my $c_lcr = $seq->clear_ranges_length;
404    my $c_clr = scalar(@{$seq->get_clear_range->qual});
405
406    $ncr_range_sanity ++ if $c_ncr >= 0 && $c_ncr <=  500;
407    $lcr_range_sanity ++ if $c_lcr >= 0 && $c_lcr <= 1000;
408    $clr_range_sanity ++ if $c_clr >= 0 && $c_clr <= 1000;
409    $final_loss_of_sanity ++ if $c_lcr >= $c_clr;
410
411
412
413    $ncr_thresh_sanity ++ if
414	$a_ncr <= $b_ncr &&
415	$b_ncr <= $c_ncr;
416
417    $lcr_thresh_sanity ++ if
418	$a_ncr <= $b_ncr &&
419	$b_ncr <= $c_ncr;
420
421    $clr_thresh_sanity ++ if
422	$a_clr >= $b_clr &&
423	$b_clr >= $c_clr;
424
425}
426
427is $ncr_thresh_sanity, 100;
428is $lcr_thresh_sanity, 100;
429is $clr_thresh_sanity, 100;
430
431is $ncr_range_sanity, 300;
432is $lcr_range_sanity, 300;
433is $clr_range_sanity, 300;
434
435is $final_loss_of_sanity, 300;
436
437
438
439
440## Test the mask sequence function ...
441
442## Ideally we'd at least test each function with each permutation of constructors.
443
444my $x = Bio::Seq::Quality->
445    new( -seq => "aaaattttccccgggg",
446	 -qual =>"1 1 1 1 2 2 2 2 1 1 1 1 3 3 3 3");
447
448$x->threshold(1);
449
450is $x->mask_below_threshold, "aaaattttccccgggg";
451
452$x->threshold(2);
453
454is $x->mask_below_threshold, "XXXXttttXXXXgggg";
455
456$x->threshold(3);
457
458is $x->mask_below_threshold, "XXXXXXXXXXXXgggg";
459
460$x->threshold(4);
461
462is $x->mask_below_threshold, "XXXXXXXXXXXXXXXX";
463
464