1#
2# BioPerl module for Bio::Seq::SequenceTrace
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Chad Matsalla <bioinformatics@dieselwurks.com
7#
8# Copyright Chad Matsalla
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace
17
18=head1 SYNOPSIS
19
20  # example code here
21
22=head1 DESCRIPTION
23
24This object stores a sequence with its trace.
25
26=head1 FEEDBACK
27
28=head2 Mailing Lists
29
30User feedback is an integral part of the evolution of this and other
31Bioperl modules. Send your comments and suggestions preferably to one
32of the Bioperl mailing lists.  Your participation is much appreciated.
33
34  bioperl-l@bioperl.org                  - General discussion
35  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
36
37=head2 Support
38
39Please direct usage questions or support issues to the mailing list:
40
41I<bioperl-l@bioperl.org>
42
43rather than to the module maintainer directly. Many experienced and
44reponsive experts will be able look at the problem and quickly
45address it. Please include a thorough description of the problem
46with code and data examples if at all possible.
47
48=head2 Reporting Bugs
49
50Report bugs to the Bioperl bug tracking system to help us keep track
51the bugs and their resolution.  Bug reports can be submitted via the
52web:
53
54  https://github.com/bioperl/bioperl-live/issues
55
56=head1 AUTHOR - Chad Matsalla
57
58Email bioinformatics@dieselwurks.com
59
60
61The rest of the documentation details each of the object methods.
62Internal methods are usually preceded with a _
63
64=cut
65
66
67package Bio::Seq::SequenceTrace;
68$Bio::Seq::SequenceTrace::VERSION = '1.7.7';
69
70use strict;
71use Bio::Seq::QualI;
72use Bio::PrimarySeqI;
73use Bio::PrimarySeq;
74use Bio::Seq::PrimaryQual;
75
76use base qw(Bio::Root::Root Bio::Seq::Quality Bio::Seq::TraceI);
77
78=head2 new()
79
80 Title   : new()
81 Usage   : $st = Bio::Seq::SequenceTrace->new
82     (    -swq =>   Bio::Seq::SequenceWithQuality,
83          -trace_a  =>   \@trace_values_for_a_channel,
84          -trace_t  =>   \@trace_values_for_t_channel,
85          -trace_g  =>   \@trace_values_for_g_channel,
86          -trace_c  =>   \@trace_values_for_c_channel,
87          -accuracy_a    =>   \@a_accuracies,
88          -accuracy_t    =>   \@t_accuracies,
89          -accuracy_g    =>   \@g_accuracies,
90          -accuracy_c    =>   \@c_accuracies,
91          -peak_indices    => '0 5 10 15 20 25 30 35'
92     );
93 Function: Returns a new Bio::Seq::SequenceTrace object from basic
94        constructors.
95 Returns : a new Bio::Seq::SequenceTrace object
96Arguments: I think that these are all describes in the usage above.
97
98=cut
99
100sub new {
101    my ($class, @args) = @_;
102    my $self = $class->SUPER::new(@args);
103	# default: turn OFF the warnings
104	$self->{suppress_warnings} = 1;
105    my($swq,$peak_indices,$trace_a,$trace_t,
106          $trace_g,$trace_c,$acc_a,$acc_t,$acc_g,$acc_c) =
107          $self->_rearrange([qw(
108               SWQ
109               PEAK_INDICES
110               TRACE_A
111               TRACE_T
112               TRACE_G
113               TRACE_C
114               ACCURACY_A
115               ACCURACY_T
116               ACCURACY_G
117               ACCURACY_C )], @args);
118          # first, deal with the sequence and quality information
119     if ($swq && ref($swq) eq "Bio::Seq::Quality") {
120          $self->{swq} = $swq;
121     }
122     else {
123          $self->throw("A Bio::Seq::SequenceTrace object must be created with a
124               Bio::Seq::Quality object. You provided this type of object: "
125               .ref($swq));
126     }
127     if (!$acc_a) {
128          # this means that you probably did not provide traces and accuracies
129          # and that they need to be synthesized
130          $self->set_accuracies();
131     }
132     else {
133          $self->accuracies('a',$acc_a);
134          $self->accuracies('t',$acc_t);
135          $self->accuracies('g',$acc_g);
136          $self->accuracies('c',$acc_c);
137     }
138     if (!$trace_a) {
139          $self->_synthesize_traces();
140     }
141     else {
142          $self->trace('a',$trace_a);
143          $self->trace('t',$trace_t);
144          $self->trace('g',$trace_g);
145          $self->trace('c',$trace_c);
146          $self->peak_indices($peak_indices);
147     }
148     $self->id($self->seq_obj->id);
149    return $self;
150}
151
152sub swq_obj {
153     my $self = shift;
154     $self->warn('swq_obj() is deprecated: use seq_obj()');
155     return $self->{swq};
156}
157
158
159
160=head2 trace($base,\@new_values)
161
162 Title   : trace($base,\@new_values)
163 Usage   : @trace_Values  = @{$obj->trace($base,\@new_values)};
164 Function: Returns the trace values as a reference to an array containing the
165     trace values. The individual elements of the trace array are not validated
166     and can be any numeric value.
167 Returns : A reference to an array.
168 Status  :
169Arguments: $base : which color channel would you like the trace values for?
170               - $base must be one of "A","T","G","C"
171          \@new_values : a reference to an array of values containing trace
172               data for this base
173
174=cut
175
176sub trace {
177   my ($self,$base_channel,$values) = @_;
178     if (!$base_channel) {
179          $self->throw('You must provide a valid base channel (atgc) to use trace()');
180     }
181     $base_channel =~ tr/A-Z/a-z/;
182     if ($base_channel !~ /[acgt]/) {
183          $self->throw('You must provide a valid base channel (atgc) to use trace()');
184     }
185     if ($values) {
186             if (ref($values) eq "ARRAY") {
187                  $self->{trace}->{$base_channel} = $values;
188             }
189             else {
190                    my @trace = split(' ',$values);
191                  $self->{trace}->{$base_channel} = \@trace;
192             }
193     }
194     if ($self->{trace}->{$base_channel}) {
195          return $self->{trace}->{$base_channel};
196     }
197     else {
198          return;
199     }
200}
201
202
203=head2 peak_indices($new_indices)
204
205 Title   : peak_indices($new_indices)
206 Usage   : $indices = $obj->peak_indices($new_indices);
207 Function: Return the trace index points for this object.
208 Returns : A scalar
209 Args    : If used, the trace indices will be set to the provided value.
210
211=cut
212
213sub peak_indices {
214   my ($self,$peak_indices)= @_;
215     if ($peak_indices) {
216          if (ref($peak_indices) eq "ARRAY") {
217               $self->{peak_indices} = $peak_indices;
218          }
219          else {
220               my @indices = split(' ',$peak_indices);
221               $self->{peak_indices} = \@indices;
222         }
223     }
224     if (!$self->{peak_indices}) {
225          my @temp = ();
226          $self->{peak_indices} = \@temp;
227     }
228     return $self->{peak_indices};
229}
230
231
232=head2 _reset_peak_indices()
233
234 Title   : _rest_peak_indices()
235 Usage   : $obj->_reset_peak_indices();
236 Function: Reset the peak indices.
237 Returns : Nothing.
238 Args    : None.
239 Notes   : When you create a sub_trace_object, the peak indices
240     will still be pointing to the apporpriate location _in the
241     original trace_. In order to fix this, the initial value must
242     be subtracted from each value here. ie. The first peak index
243     must be "1".
244
245=cut
246
247sub _reset_peak_indices {
248   my $self = shift;
249     my $length = $self->length();
250     my $subtractive = $self->peak_index_at(1);
251     my ($original,$new);
252     $self->peak_index_at(1,"null");
253     for (my $counter=2; $counter<= $length; $counter++) {
254          my $original = $self->peak_index_at($counter);
255          $new = $original - $subtractive;
256          $self->peak_index_at($counter,$new);
257     }
258     return;
259}
260
261
262
263
264
265=head2 peak_index_at($position)
266
267 Title   : peak_index_at($position)
268 Usage   : $peak_index = $obj->peak_index_at($postition);
269 Function: Return the trace iindex point at this position
270 Returns : A scalar
271 Args    : If used, the trace index at this position will be
272     set to the provided value.
273
274=cut
275
276sub peak_index_at {
277   my ($self,$position,$value)= @_;
278   if ($value) {
279          if ($value eq "null") {
280               $self->peak_indices->[$position-1] = "0";
281          }
282          else {
283               $self->peak_indices->[$position-1] = $value;
284          }
285   }
286    return $self->peak_indices()->[$position-1];
287}
288
289=head2 alphabet()
290
291 Title   : alphabet();
292 Usage   : $molecule_type = $obj->alphabet();
293 Function: Get the molecule type from the PrimarySeq object.
294 Returns : What what PrimarySeq says the type of the sequence is.
295 Args    : None.
296
297=cut
298
299sub alphabet {
300	my $self = shift;
301	return $self->{swq}->alphabet;
302}
303
304=head2 display_id()
305
306 Title   : display_id()
307 Usage   : $id_string = $obj->display_id();
308 Function: Returns the display id, aka the common name of the Quality
309        object.
310        The semantics of this is that it is the most likely string to be
311        used as an identifier of the quality sequence, and likely to have
312        "human" readability.  The id is equivalent to the ID field of the
313        GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
314        database. In fasta format, the >(\S+) is presumed to be the id,
315        though some people overload the id to embed other information.
316        Bioperl does not use any embedded information in the ID field,
317        and people are encouraged to use other mechanisms (accession
318	field for example, or extending the sequence object) to solve
319	this. Notice that $seq->id() maps to this function, mainly for
320        legacy/convience issues.
321	This method sets the display_id for the Quality object.
322 Returns : A string
323 Args    : If a scalar is provided, it is set as the new display_id for
324	the Quality object.
325 Status  : Virtual
326
327=cut
328
329sub display_id {
330   my ($self,$value) = @_;
331   if( defined $value) {
332      $self->{swq}->display_id($value);
333    }
334    return $self->{swq}->display_id();
335
336}
337
338=head2 accession_number()
339
340 Title   : accession_number()
341 Usage   : $unique_biological_key = $obj->accession_number();
342 Function: Returns the unique biological id for a sequence, commonly
343        called the accession_number. For sequences from established
344        databases, the implementors should try to use the correct
345        accession number. Notice that primary_id() provides the unique id
346        for the implementation, allowing multiple objects to have the same
347        accession number in a particular implementation. For sequences
348        with no accession number, this method should return "unknown".
349	This method sets the accession_number for the Quality
350	object.
351 Returns : A string (the value of accession_number)
352 Args    : If a scalar is provided, it is set as the new accession_number
353	for the Quality object.
354 Status  : Virtual
355
356
357=cut
358
359sub accession_number {
360    my( $self, $acc ) = @_;
361    if (defined $acc) {
362        $self->{swq}->accession_number($acc);
363    } else {
364        $acc = $self->{swq}->accession_number();
365        $acc = 'unknown' unless defined $acc;
366    }
367    return $acc;
368}
369
370=head2 primary_id()
371
372 Title   : primary_id()
373 Usage   : $unique_implementation_key = $obj->primary_id();
374 Function: Returns the unique id for this object in this implementation.
375        This allows implementations to manage their own object ids in a
376        way the implementation can control clients can expect one id to
377        map to one object. For sequences with no accession number, this
378        method should return a stringified memory location.
379	This method sets the primary_id for the Quality
380	object.
381 Returns : A string. (the value of primary_id)
382 Args    : If a scalar is provided, it is set as the new primary_id for
383	the Quality object.
384
385=cut
386
387sub primary_id {
388   my ($self,$value) = @_;
389   if ($value) {
390      $self->{swq}->primary_id($value);
391    }
392   return $self->{swq}->primary_id();
393
394}
395
396=head2 desc()
397
398 Title   : desc()
399 Usage   : $qual->desc($newval); _or_
400           $description = $qual->desc();
401 Function: Get/set description text for this Quality object.
402 Returns : A string. (the value of desc)
403 Args    : If a scalar is provided, it is set as the new desc for the
404	   Quality object.
405
406=cut
407
408sub desc {
409	# a mechanism to set the desc for the Quality object.
410	# probably will be used most often by set_common_features()
411   my ($self,$value) = @_;
412   if( defined $value) {
413      $self->{swq}->desc($value);
414    }
415      return $self->{swq}->desc();
416}
417
418=head2 id()
419
420 Title   : id()
421 Usage   : $id = $qual->id();
422 Function: Return the ID of the quality. This should normally be (and
423        actually is in the implementation provided here) just a synonym
424        for display_id().
425 Returns : A string. (the value of id)
426 Args    : If a scalar is provided, it is set as the new id for the
427	   Quality object.
428
429=cut
430
431sub id {
432   my ($self,$value) = @_;
433   if (!$self) { $self->throw("no value for self in $value"); }
434   if( defined $value ) {
435       $self->{swq}->display_id($value);
436   }
437   return $self->{swq}->display_id();
438}
439
440=head2 seq
441
442 Title   : seq()
443 Usage   : $string    = $obj->seq(); _or_
444	$obj->seq("atctatcatca");
445 Function: Returns the sequence that is contained in the imbedded in the
446	PrimarySeq object within the Quality object
447 Returns : A scalar (the seq() value for the imbedded PrimarySeq object.)
448 Args    : If a scalar is provided, the Quality object will
449	attempt to set that as the sequence for the imbedded PrimarySeq
450	object. Otherwise, the value of seq() for the PrimarySeq object
451	is returned.
452 Notes   : This is probably not a good idea because you then should call
453	length() to make sure that the sequence and quality are of the
454	same length. Even then, how can you make sure that this sequence
455	belongs with that quality? I provided this to give you rope to
456	hang yourself with. Tie it to a strong device and use a good
457	knot.
458
459=cut
460
461sub seq {
462	my ($self,$value) = @_;
463	if( defined $value) {
464		$self->{swq}->seq($value);
465	}
466	return $self->{swq}->seq();
467}
468
469=head2 qual()
470
471 Title   : qual()
472 Usage   : @quality_values  = @{$obj->qual()}; _or_
473	$obj->qual("10 10 20 40 50");
474 Function: Returns the quality as imbedded in the PrimaryQual object
475	within the Quality object.
476 Returns : A reference to an array containing the quality values in the
477	PrimaryQual object.
478 Args    : If a scalar is provided, the Quality object will
479	attempt to set that as the quality for the imbedded PrimaryQual
480	object. Otherwise, the value of qual() for the PrimaryQual
481	object is returned.
482 Notes   : This is probably not a good idea because you then should call
483	length() to make sure that the sequence and quality are of the
484	same length. Even then, how can you make sure that this sequence
485	belongs with that quality? I provided this to give you a strong
486	board with which to flagellate yourself.
487
488=cut
489
490sub qual {
491	my ($self,$value) = @_;
492
493	if( defined $value) {
494		$self->{swq}->qual($value);
495	}
496	return $self->{swq}->qual();
497}
498
499
500
501
502=head2 length()
503
504 Title   : length()
505 Usage   : $length = $seqWqual->length();
506 Function: Get the length of the Quality sequence/quality.
507 Returns : Returns the length of the sequence and quality
508 Args    : None.
509
510=cut
511
512sub length {
513    my $self = shift;
514    return $self->seq_obj->length;
515}
516
517
518=head2 qual_obj
519
520 Title   : qual_obj($different_obj)
521 Usage   : $qualobj = $seqWqual->qual_obj(); _or_
522	$qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj);
523 Function: Get the Qualilty object that is imbedded in the
524	Quality object or if a reference to a PrimaryQual object
525	is provided, set this as the PrimaryQual object imbedded in the
526	Quality object.
527 Returns : A reference to a Bio::Seq::Quality object.
528
529Identical to L<seq_obj>.
530
531=cut
532
533sub qual_obj {
534    my ($self,$value) = @_;
535#    return $self->{swq}->qual_obj($value);
536    return $self->{swq};
537}
538
539
540=head2 seq_obj
541
542 Title   : seq_obj()
543 Usage   : $seqobj = $seqWqual->seq_obj(); _or_
544	$seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj);
545 Function: Get the PrimarySeq object that is imbedded in the
546	Quality object or if a reference to a PrimarySeq object is
547	provided, set this as the PrimarySeq object imbedded in the
548	Quality object.
549 Returns : A reference to a Bio::PrimarySeq object.
550
551=cut
552
553sub seq_obj {
554    my ($self,$value) = @_;
555    return $self->{swq};
556}
557
558=head2 _set_descriptors
559
560 Title   : _set_descriptors()
561 Usage   : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id,
562	$alphabet);
563 Function: Set the descriptors for the Quality object. Try to
564	match the descriptors in the PrimarySeq object and in the
565	PrimaryQual object if descriptors were not provided with
566	construction.
567 Returns : Nothing.
568 Args    : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found
569	in the new() method.
570 Notes   : Really only intended to be called by the new() method. If
571	you want to invoke a similar function try
572	set_common_descriptors().
573
574=cut
575
576
577sub _set_descriptors {
578    my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_;
579    $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid,
580				   $desc,$given_id,$alphabet);
581}
582
583=head2 subseq($start,$end)
584
585 Title   : subseq($start,$end)
586 Usage   : $subsequence = $obj->subseq($start,$end);
587 Function: Returns the subseq from start to end, where the first base
588           is 1 and the number is inclusive, ie 1-2 are the first two
589           bases of the sequence.
590 Returns : A string.
591 Args    : Two positions.
592
593=cut
594
595sub subseq {
596    my ($self,@args) = @_;
597    # does a single value work?
598    return $self->{swq}->subseq(@args);
599}
600
601=head2 baseat($position)
602
603 Title   : baseat($position)
604 Usage   : $base_at_position_6 = $obj->baseat("6");
605 Function: Returns a single base at the given position, where the first
606	base is 1 and the number is inclusive, ie 1-2 are the first two
607	bases of the sequence.
608 Returns : A scalar.
609 Args    : A position.
610
611=cut
612
613sub baseat {
614    my ($self,$val) = @_;
615    return $self->{swq}->subseq($val,$val);
616}
617
618=head2 subqual($start,$end)
619
620 Title   : subqual($start,$end)
621 Usage   : @qualities = @{$obj->subqual(10,20);
622 Function: returns the quality values from $start to $end, where the
623        first value is 1 and the number is inclusive, ie 1-2 are the
624	first two bases of the sequence. Start cannot be larger than
625	end but can be equal.
626 Returns : A reference to an array.
627 Args    : a start position and an end position
628
629=cut
630
631sub subqual {
632    my ($self,@args) = @_;
633    return $self->{swq}->subqual(@args);
634}
635
636=head2 qualat($position)
637
638 Title   : qualat($position)
639 Usage   : $quality = $obj->qualat(10);
640 Function: Return the quality value at the given location, where the
641        first value is 1 and the number is inclusive, ie 1-2 are the
642	first two bases of the sequence. Start cannot be larger than
643	end but can be equal.
644 Returns : A scalar.
645 Args    : A position.
646
647=cut
648
649sub qualat {
650    my ($self,$val) = @_;
651    return $self->{swq}->qualat($val);
652}
653
654=head2 sub_peak_index($start,$end)
655
656 Title   : sub_peak_index($start,$end)
657 Usage   : @peak_indices = @{$obj->sub_peak_index(10,20);
658 Function: returns the trace index values from $start to $end, where the
659        first value is 1 and the number is inclusive, ie 1-2 are the
660	first two trace indices for this channel.
661 Returns : A reference to an array.
662 Args    : a start position and an end position
663
664=cut
665
666sub sub_peak_index {
667   my ($self,$start,$end) = @_;
668   if( $start > $end ){
669       $self->throw("in sub_peak_index, start [$start] has to be greater than end [$end]");
670   }
671
672   if( $start <= 0 || $end > $self->length ) {
673       $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
674   }
675
676   # remove one from start, and then length is end-start
677
678   $start--;
679     $end--;
680     my @sub_peak_index_array = @{$self->{peak_indices}}[$start..$end];
681
682     #   return substr $self->seq(), $start, ($end-$start);
683     return \@sub_peak_index_array;
684
685}
686
687=head2 sub_trace($start,$end)
688
689 Title   : sub_trace($base_channel,$start,$end)
690 Usage   : @trace_values = @{$obj->sub_trace('a',10,20)};
691 Function: returns the trace values from $start to $end, where the
692        first value is 1 and the number is inclusive, ie 1-2 are the
693	first two bases of the sequence. Start cannot be larger than
694	end but can be e_peak_index.
695 Returns : A reference to an array.
696 Args    : a start position and an end position
697
698=cut
699
700sub sub_trace {
701   my ($self,$base_channel,$start,$end) = @_;
702   if( $start > $end ){
703       $self->throw("in sub_trace, start [$start] has to be greater than end [$end]");
704   }
705
706   if( $start <= 0 || $end > $self->trace_length() ) {
707       $self->throw("You have to have start positive and length less than the total length of traces [$start:$end] Total ".$self->trace_length."");
708   }
709
710   # remove one from start, and then length is end-start
711
712   $start--;
713     $end--;
714     my @sub_peak_index_array = @{$self->trace($base_channel)}[$start..$end];
715
716     #   return substr $self->seq(), $start, ($end-$start);
717     return \@sub_peak_index_array;
718
719}
720
721=head2 trace_length()
722
723 Title   : trace_length()
724 Usage   : $trace_length = $obj->trace_length();
725 Function: Return the length of the trace if all four traces (atgc)
726     are the same. Otherwise, throw an error.
727 Returns : A scalar.
728 Args    : none
729
730=cut
731
732sub trace_length {
733    my $self = shift;
734     if ( !$self->trace('a') || !$self->trace('t') || !$self->trace('g') || !$self->trace('c') ) {
735           $self->warn("One or more of the trace channels are missing. Cannot give you a length.");
736
737     }
738     my $lengtha = scalar(@{$self->trace('a')});
739     my $lengtht = scalar(@{$self->trace('t')});
740     my $lengthg = scalar(@{$self->trace('g')});
741     my $lengthc = scalar(@{$self->trace('c')});
742     if (($lengtha == $lengtht) && ($lengtha == $lengthg) && ($lengtha == $lengthc) ) {
743          return $lengtha;
744     }
745     $self->warn("Not all of the trace indices are the same length".
746          " Here are their lengths: a: $lengtha t:$lengtht ".
747          " g: $lengthg c: $lengthc");
748}
749
750
751
752=head2 sub_trace_object($start,$end)
753
754 Title   : sub_trace_object($start,$end)
755 Usage   : $smaller_object = $object->sub_trace_object('1','100');
756 Function: Get a subset of the sequence, its quality, and its trace.
757 Returns : A reference to a Bio::Seq::SequenceTrace object
758 Args    : a start position and an end position
759 Notes   :
760     - the start and end position refer to the positions of _bases_.
761     - for example, to get a sub SequenceTrace for bases 5-10,
762          use this routine.
763          - you will get the bases, qualities, and the trace values
764          - you can then use this object to synthesize a new scf
765               using seqIO::scf.
766
767=cut
768
769sub sub_trace_object {
770     my ($self,$start,$end) = @_;
771          my ($start2,$end2);
772        my @subs = @{$self->sub_peak_index($start,$end)};
773        $start2 = shift(@subs);
774        $end2 =  pop(@subs);
775     my $new_object =  Bio::Seq::SequenceTrace->new(
776               -swq =>   Bio::Seq::Quality->new(
777                             -seq => $self->subseq($start,$end),
778                             -qual     =>   $self->subqual($start,$end),
779                             -id    =>   $self->id()
780                         ),
781             -trace_a  => $self->sub_trace('a',$start2,$end2),
782             -trace_t  => $self->sub_trace('t',$start2,$end2),
783             -trace_g  => $self->sub_trace('g',$start2,$end2),
784             -trace_c  => $self->sub_trace('c',$start2,$end2),
785             -peak_indices =>   $self->sub_peak_index($start,$end)
786
787        );
788     $new_object->set_accuracies();
789     $new_object->_reset_peak_indices();
790     return $new_object;
791}
792
793=head2 _synthesize_traces()
794
795 Title   : _synthesize_traces()
796 Usage   : $obj->_synthesize_traces();
797 Function: Synthesize false traces for this object.
798 Returns : Nothing.
799 Args    : None.
800 Notes   : This method is intended to be invoked when this
801     object is created with a SWQ object- that is to say that
802     there is a sequence and a set of qualities but there was
803     no actual trace data.
804
805=cut
806
807sub _synthesize_traces {
808     my ($self) = shift;
809     $self->peak_indices(qw());
810#ml     my $version = 2;
811          # the user should be warned if traces already exist
812          #
813          #
814#ml     ( my $sequence = $self->seq() ) =~ tr/a-z/A-Z/;
815#ml     my @quals = @{$self->qual()};
816#ml     my $info;
817         # build the ramp for the first base.
818         # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score.
819         # REMEMBER: A C G T
820         # note to self-> smooth this thing out a bit later
821     my $ramp_data;
822    @{$ramp_data->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 );
823         # the width of the ramp
824    $ramp_data->{'ramp_width'} = scalar(@{$ramp_data->{'ramp'}});
825         # how far should the peaks overlap?
826    $ramp_data->{'ramp_overlap'} = 1;
827          # where should the peaks be located?
828    $ramp_data->{'peak_at'} = 7;
829    $ramp_data->{'ramp_total_length'} =
830          $self->seq_obj()->length() * $ramp_data->{'ramp_width'}
831          - $self->seq_obj()->length() * $ramp_data->{'ramp_overlap'};
832    my $pos;
833    my $total_length = $ramp_data->{ramp_total_length};
834     $self->initialize_traces("0",$total_length+2);
835         # now populate them
836    my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position);
837#ml    my $sequence_length = $self->length();
838    my $half_ramp = int($ramp_data->{'ramp_width'}/2);
839    for ($pos = 0; $pos<$self->length();$pos++) {
840          $current_base = uc $self->seq_obj()->subseq($pos+1,$pos+1);
841               # print("Synthesizing the ramp for $current_base\n");
842          my $all_bases = "ATGC";
843          $peak_quality = $self->qual_obj()->qualat($pos+1);
844                    # where should the peak for this base be placed? Modeled after a mktrace scf
845          $place_base_at = ($pos * $ramp_data->{'ramp_width'}) -
846                      ($pos * $ramp_data->{'ramp_overlap'}) -
847                   $half_ramp + $ramp_data->{'ramp_width'} - 1;
848               # print("Placing this base at this position: $place_base_at\n");
849          push @{$self->peak_indices()},$place_base_at;
850          $ramp_position = $place_base_at - $half_ramp;
851          if ($current_base =~ "N" ) {
852               $current_base = "A";
853          }
854          for ($current_ramp = 0; $current_ramp < $ramp_data->{'ramp_width'};  $current_ramp++) {
855                    # print("Placing a trace value here: $current_base ".($ramp_position+$current_ramp+1)." ".$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]."\n");
856             $self->trace_value_at($current_base,$ramp_position+$current_ramp+1,$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]);
857          }
858          $self->peak_index_at($pos+1,
859              $place_base_at+1
860          );
861#ml          my $other_bases = $self->_get_other_bases($current_base);
862          # foreach ( split('',$other_bases) ) {
863          #          push @{$self->{'text'}->{"v3_base_accuracy"}->{$_}},0;
864          #}
865    }
866}
867
868
869
870
871
872=head2 _dump_traces($transformed)
873
874 Title   : _dump_traces("transformed")
875 Usage   : &_dump_traces($ra,$rc,$rg,$rt);
876 Function: Used in debugging. Prints all traces one beside each other.
877 Returns : Nothing.
878 Args    : References to the arrays containing the traces for A,C,G,T.
879 Notes   : Beats using dumpValue, I'll tell ya. Much better then using
880           join' ' too.
881     - if a scalar is included as an argument (any scalar), this
882     procedure will dump the _delta'd trace. If you don't know what
883     that means you should not be using this.
884
885=cut
886
887#'
888sub _dump_traces {
889    my ($self) = @_;
890    my (@sA,@sT,@sG,@sC);
891    print ("Count\ta\tc\tg\tt\n");
892     my $length = $self->trace_length();
893    for (my $curr=1; $curr <= $length; $curr++) {
894     print(($curr-1)."\t".$self->trace_value_at('a',$curr).
895                "\t".$self->trace_value_at('c',$curr).
896                "\t".$self->trace_value_at('g',$curr).
897                "\t".$self->trace_value_at('t',$curr)."\n");
898    }
899    return;
900}
901
902=head2 _initialize_traces()
903
904 Title   : _initialize_traces()
905 Usage   : $trace_object->_initialize_traces();
906 Function: Creates empty arrays to hold synthetic trace values.
907 Returns : Nothing.
908 Args    : None.
909
910=cut
911
912sub initialize_traces {
913     my ($self,$value,$length) = @_;
914     foreach (qw(a t g c)) {
915          my @temp;
916          for (my $count=0; $count<$length; $count++) {
917               $temp[$count] = $value;
918          }
919          $self->trace($_,\@temp);
920     }
921}
922
923=head2 trace_value_at($channel,$position)
924
925 Title   : trace_value_at($channel,$position)
926 Usage   : $value = $trace_object->trace_value_at($channel,$position);
927 Function: What is the value of the trace for this base at this position?
928 Returns : A scalar represnting the trace value here.
929 Args    : a base channel (a,t,g,c)
930           a position ( < $trace_object->trace_length() )
931
932=cut
933
934sub trace_value_at {
935     my ($self,$channel,$position,$value) = @_;
936     if ($value) {
937          $self->trace($channel)->[$position] = $value;
938     }
939     return $self->sub_trace($channel,($position),($position))->[0];
940}
941
942sub _deprecated_get_scf_version_2_base_structure {
943          # this sub is deprecated- check inside SeqIO::scf
944     my $self = shift;
945     my (@structure,$current);
946     my $length = $self->length();
947     for ($current=1; $current <= $self->length() ; $current++) {
948           my $base_here = $self->seq_obj()->subseq($current,$current);
949          $base_here = lc($base_here);
950          my $probabilities;
951          $probabilities->{$base_here} = $self->qual_obj()->qualat($current);
952          my $other_bases = "atgc";
953          my $empty = "";
954          $other_bases =~ s/$base_here/$empty/e;
955          foreach ( split('',$other_bases) ) {
956               $probabilities->{$_} = "0";
957          }
958          @structure = (
959               @structure,
960              $self->peak_index_at($current),
961              $probabilities->{'a'},
962              $probabilities->{'t'},
963              $probabilities->{'g'},
964              $probabilities->{'c'}
965         );
966
967     }
968     return \@structure;
969}
970
971sub _deprecated_get_scf_version_3_base_structure {
972     my $self = shift;
973     my $structure;
974     $structure = join('',$self->peak_indices());
975     return $structure;
976}
977
978
979=head2 accuracies($channel,$position)
980
981 Title   : trace_value_at($channel,$position)
982 Usage   : $value = $trace_object->trace_value_at($channel,$position);
983 Function: What is the value of the trace for this base at this position?
984 Returns : A scalar representing the trace value here.
985 Args    : a base channel (a,t,g,c)
986           a position ( < $trace_object->trace_length() )
987
988=cut
989
990
991sub accuracies {
992     my ($self,$channel,$value) = @_;
993     if ($value) {
994          if (ref($value) eq "ARRAY") {
995               $self->{accuracies}->{$channel} = $value;
996          }
997          else {
998               my @acc = split(' ',$value);
999               $self->{accuracies}->{$channel} = \@acc;
1000          }
1001     }
1002     return $self->{accuracies}->{$channel};
1003}
1004
1005
1006=head2 set_accuracies()
1007
1008 Title   : set_sccuracies()
1009 Usage   : $trace_object->set_accuracies();
1010 Function: Take a sequence's quality and synthesize proper scf-style
1011     base accuracies that can then be accessed with
1012     accuracies("a") or something like it.
1013 Returns : Nothing.
1014 Args    : None.
1015
1016=cut
1017
1018sub set_accuracies {
1019     my $self = shift;
1020     my $count = 0;
1021     my $length = $self->length();
1022     for ($count=1; $count <= $length; $count++) {
1023          my $base_here = $self->seq_obj()->subseq($count,$count);
1024          my $qual_here = $self->qual_obj()->qualat($count);
1025          $self->accuracy_at($base_here,$count,$qual_here);
1026          my $other_bases = $self->_get_other_bases($base_here);
1027          foreach (split('',$other_bases)) {
1028               $self->accuracy_at($_,$count,"null");
1029          }
1030     }
1031}
1032
1033
1034=head2 scf_dump()
1035
1036 Title   : scf_dump()
1037 Usage   : $trace_object->scf_dump();
1038 Function: Prints out the contents of the structures representing
1039     the SequenceTrace in a manner similar to io_lib's scf_dump.
1040 Returns : Nothing. Prints out the contents of the structures
1041     used to represent the sequence and its trace.
1042 Args    : None.
1043 Notes   : Used in debugging, obviously.
1044
1045=cut
1046
1047sub scf_dump {
1048     my $self = shift;
1049     my $count;
1050     for ($count=1;$count<=$self->length();$count++) {
1051          my $base_here = lc($self->seq_obj()->subseq($count,$count));
1052          print($base_here." ".sprintf("%05d",$self->peak_index_at($count))."\t");
1053          foreach (sort qw(a c g t)) {
1054               print(sprintf("%03d",$self->accuracy_at($_,$count))."\t");
1055          }
1056          print("\n");
1057     }
1058     $self->_dump_traces();
1059}
1060
1061=head2 _get_other_bases($this_base)
1062
1063 Title   : _get_other_bases($this_base)
1064 Usage   : $other_bases = $trace_object->_get_other_bases($this_base);
1065 Function: A utility routine to return bases other then the one provided.
1066     I was doing this over and over so I put it here.
1067 Returns : Three of a,t,g and c.
1068 Args    : A base (atgc)
1069 Notes   : $obj->_get_other_bases("a") returns "tgc"
1070
1071=cut
1072
1073sub _get_other_bases {
1074     my ($self,$this_base) = @_;
1075     $this_base = lc($this_base);
1076     my $all_bases = "atgc";
1077     my $empty = "";
1078     $all_bases =~ s/$this_base/$empty/e;
1079     return $all_bases;
1080}
1081
1082
1083=head2 accuracy_at($base,$position)
1084
1085 Title   : accuracy_at($base,$position)
1086 Usage   : $accuracy = $trace_object->accuracy_at($base,$position);
1087 Function:
1088 Returns : Returns the accuracy of finding $base at $position.
1089 Args    : 1. a base channel (atgc) 2. a value to _set_ the accuracy
1090 Notes   : $obj->_get_other_bases("a") returns "tgc"
1091
1092=cut
1093
1094
1095sub accuracy_at {
1096     my ($self,$base,$position,$value) = @_;
1097     $base = lc($base);
1098     if ($value) {
1099          if ($value eq "null") {
1100               $self->{accuracies}->{$base}->[$position-1] = "0";
1101          }
1102          else {
1103               $self->{accuracies}->{$base}->[$position-1] = $value;
1104          }
1105     }
1106     return $self->{accuracies}->{$base}->[$position-1];
1107}
1108
11091;
1110
1111