1# 2# BioPerl module for Bio::Seq::SequenceTrace 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Chad Matsalla <bioinformatics@dieselwurks.com 7# 8# Copyright Chad Matsalla 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace 17 18=head1 SYNOPSIS 19 20 # example code here 21 22=head1 DESCRIPTION 23 24This object stores a sequence with its trace. 25 26=head1 FEEDBACK 27 28=head2 Mailing Lists 29 30User feedback is an integral part of the evolution of this and other 31Bioperl modules. Send your comments and suggestions preferably to one 32of the Bioperl mailing lists. Your participation is much appreciated. 33 34 bioperl-l@bioperl.org - General discussion 35 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 36 37=head2 Support 38 39Please direct usage questions or support issues to the mailing list: 40 41I<bioperl-l@bioperl.org> 42 43rather than to the module maintainer directly. Many experienced and 44reponsive experts will be able look at the problem and quickly 45address it. Please include a thorough description of the problem 46with code and data examples if at all possible. 47 48=head2 Reporting Bugs 49 50Report bugs to the Bioperl bug tracking system to help us keep track 51the bugs and their resolution. Bug reports can be submitted via the 52web: 53 54 https://github.com/bioperl/bioperl-live/issues 55 56=head1 AUTHOR - Chad Matsalla 57 58Email bioinformatics@dieselwurks.com 59 60 61The rest of the documentation details each of the object methods. 62Internal methods are usually preceded with a _ 63 64=cut 65 66 67package Bio::Seq::SequenceTrace; 68$Bio::Seq::SequenceTrace::VERSION = '1.7.7'; 69 70use strict; 71use Bio::Seq::QualI; 72use Bio::PrimarySeqI; 73use Bio::PrimarySeq; 74use Bio::Seq::PrimaryQual; 75 76use base qw(Bio::Root::Root Bio::Seq::Quality Bio::Seq::TraceI); 77 78=head2 new() 79 80 Title : new() 81 Usage : $st = Bio::Seq::SequenceTrace->new 82 ( -swq => Bio::Seq::SequenceWithQuality, 83 -trace_a => \@trace_values_for_a_channel, 84 -trace_t => \@trace_values_for_t_channel, 85 -trace_g => \@trace_values_for_g_channel, 86 -trace_c => \@trace_values_for_c_channel, 87 -accuracy_a => \@a_accuracies, 88 -accuracy_t => \@t_accuracies, 89 -accuracy_g => \@g_accuracies, 90 -accuracy_c => \@c_accuracies, 91 -peak_indices => '0 5 10 15 20 25 30 35' 92 ); 93 Function: Returns a new Bio::Seq::SequenceTrace object from basic 94 constructors. 95 Returns : a new Bio::Seq::SequenceTrace object 96Arguments: I think that these are all describes in the usage above. 97 98=cut 99 100sub new { 101 my ($class, @args) = @_; 102 my $self = $class->SUPER::new(@args); 103 # default: turn OFF the warnings 104 $self->{suppress_warnings} = 1; 105 my($swq,$peak_indices,$trace_a,$trace_t, 106 $trace_g,$trace_c,$acc_a,$acc_t,$acc_g,$acc_c) = 107 $self->_rearrange([qw( 108 SWQ 109 PEAK_INDICES 110 TRACE_A 111 TRACE_T 112 TRACE_G 113 TRACE_C 114 ACCURACY_A 115 ACCURACY_T 116 ACCURACY_G 117 ACCURACY_C )], @args); 118 # first, deal with the sequence and quality information 119 if ($swq && ref($swq) eq "Bio::Seq::Quality") { 120 $self->{swq} = $swq; 121 } 122 else { 123 $self->throw("A Bio::Seq::SequenceTrace object must be created with a 124 Bio::Seq::Quality object. You provided this type of object: " 125 .ref($swq)); 126 } 127 if (!$acc_a) { 128 # this means that you probably did not provide traces and accuracies 129 # and that they need to be synthesized 130 $self->set_accuracies(); 131 } 132 else { 133 $self->accuracies('a',$acc_a); 134 $self->accuracies('t',$acc_t); 135 $self->accuracies('g',$acc_g); 136 $self->accuracies('c',$acc_c); 137 } 138 if (!$trace_a) { 139 $self->_synthesize_traces(); 140 } 141 else { 142 $self->trace('a',$trace_a); 143 $self->trace('t',$trace_t); 144 $self->trace('g',$trace_g); 145 $self->trace('c',$trace_c); 146 $self->peak_indices($peak_indices); 147 } 148 $self->id($self->seq_obj->id); 149 return $self; 150} 151 152sub swq_obj { 153 my $self = shift; 154 $self->warn('swq_obj() is deprecated: use seq_obj()'); 155 return $self->{swq}; 156} 157 158 159 160=head2 trace($base,\@new_values) 161 162 Title : trace($base,\@new_values) 163 Usage : @trace_Values = @{$obj->trace($base,\@new_values)}; 164 Function: Returns the trace values as a reference to an array containing the 165 trace values. The individual elements of the trace array are not validated 166 and can be any numeric value. 167 Returns : A reference to an array. 168 Status : 169Arguments: $base : which color channel would you like the trace values for? 170 - $base must be one of "A","T","G","C" 171 \@new_values : a reference to an array of values containing trace 172 data for this base 173 174=cut 175 176sub trace { 177 my ($self,$base_channel,$values) = @_; 178 if (!$base_channel) { 179 $self->throw('You must provide a valid base channel (atgc) to use trace()'); 180 } 181 $base_channel =~ tr/A-Z/a-z/; 182 if ($base_channel !~ /[acgt]/) { 183 $self->throw('You must provide a valid base channel (atgc) to use trace()'); 184 } 185 if ($values) { 186 if (ref($values) eq "ARRAY") { 187 $self->{trace}->{$base_channel} = $values; 188 } 189 else { 190 my @trace = split(' ',$values); 191 $self->{trace}->{$base_channel} = \@trace; 192 } 193 } 194 if ($self->{trace}->{$base_channel}) { 195 return $self->{trace}->{$base_channel}; 196 } 197 else { 198 return; 199 } 200} 201 202 203=head2 peak_indices($new_indices) 204 205 Title : peak_indices($new_indices) 206 Usage : $indices = $obj->peak_indices($new_indices); 207 Function: Return the trace index points for this object. 208 Returns : A scalar 209 Args : If used, the trace indices will be set to the provided value. 210 211=cut 212 213sub peak_indices { 214 my ($self,$peak_indices)= @_; 215 if ($peak_indices) { 216 if (ref($peak_indices) eq "ARRAY") { 217 $self->{peak_indices} = $peak_indices; 218 } 219 else { 220 my @indices = split(' ',$peak_indices); 221 $self->{peak_indices} = \@indices; 222 } 223 } 224 if (!$self->{peak_indices}) { 225 my @temp = (); 226 $self->{peak_indices} = \@temp; 227 } 228 return $self->{peak_indices}; 229} 230 231 232=head2 _reset_peak_indices() 233 234 Title : _rest_peak_indices() 235 Usage : $obj->_reset_peak_indices(); 236 Function: Reset the peak indices. 237 Returns : Nothing. 238 Args : None. 239 Notes : When you create a sub_trace_object, the peak indices 240 will still be pointing to the apporpriate location _in the 241 original trace_. In order to fix this, the initial value must 242 be subtracted from each value here. ie. The first peak index 243 must be "1". 244 245=cut 246 247sub _reset_peak_indices { 248 my $self = shift; 249 my $length = $self->length(); 250 my $subtractive = $self->peak_index_at(1); 251 my ($original,$new); 252 $self->peak_index_at(1,"null"); 253 for (my $counter=2; $counter<= $length; $counter++) { 254 my $original = $self->peak_index_at($counter); 255 $new = $original - $subtractive; 256 $self->peak_index_at($counter,$new); 257 } 258 return; 259} 260 261 262 263 264 265=head2 peak_index_at($position) 266 267 Title : peak_index_at($position) 268 Usage : $peak_index = $obj->peak_index_at($postition); 269 Function: Return the trace iindex point at this position 270 Returns : A scalar 271 Args : If used, the trace index at this position will be 272 set to the provided value. 273 274=cut 275 276sub peak_index_at { 277 my ($self,$position,$value)= @_; 278 if ($value) { 279 if ($value eq "null") { 280 $self->peak_indices->[$position-1] = "0"; 281 } 282 else { 283 $self->peak_indices->[$position-1] = $value; 284 } 285 } 286 return $self->peak_indices()->[$position-1]; 287} 288 289=head2 alphabet() 290 291 Title : alphabet(); 292 Usage : $molecule_type = $obj->alphabet(); 293 Function: Get the molecule type from the PrimarySeq object. 294 Returns : What what PrimarySeq says the type of the sequence is. 295 Args : None. 296 297=cut 298 299sub alphabet { 300 my $self = shift; 301 return $self->{swq}->alphabet; 302} 303 304=head2 display_id() 305 306 Title : display_id() 307 Usage : $id_string = $obj->display_id(); 308 Function: Returns the display id, aka the common name of the Quality 309 object. 310 The semantics of this is that it is the most likely string to be 311 used as an identifier of the quality sequence, and likely to have 312 "human" readability. The id is equivalent to the ID field of the 313 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl 314 database. In fasta format, the >(\S+) is presumed to be the id, 315 though some people overload the id to embed other information. 316 Bioperl does not use any embedded information in the ID field, 317 and people are encouraged to use other mechanisms (accession 318 field for example, or extending the sequence object) to solve 319 this. Notice that $seq->id() maps to this function, mainly for 320 legacy/convience issues. 321 This method sets the display_id for the Quality object. 322 Returns : A string 323 Args : If a scalar is provided, it is set as the new display_id for 324 the Quality object. 325 Status : Virtual 326 327=cut 328 329sub display_id { 330 my ($self,$value) = @_; 331 if( defined $value) { 332 $self->{swq}->display_id($value); 333 } 334 return $self->{swq}->display_id(); 335 336} 337 338=head2 accession_number() 339 340 Title : accession_number() 341 Usage : $unique_biological_key = $obj->accession_number(); 342 Function: Returns the unique biological id for a sequence, commonly 343 called the accession_number. For sequences from established 344 databases, the implementors should try to use the correct 345 accession number. Notice that primary_id() provides the unique id 346 for the implementation, allowing multiple objects to have the same 347 accession number in a particular implementation. For sequences 348 with no accession number, this method should return "unknown". 349 This method sets the accession_number for the Quality 350 object. 351 Returns : A string (the value of accession_number) 352 Args : If a scalar is provided, it is set as the new accession_number 353 for the Quality object. 354 Status : Virtual 355 356 357=cut 358 359sub accession_number { 360 my( $self, $acc ) = @_; 361 if (defined $acc) { 362 $self->{swq}->accession_number($acc); 363 } else { 364 $acc = $self->{swq}->accession_number(); 365 $acc = 'unknown' unless defined $acc; 366 } 367 return $acc; 368} 369 370=head2 primary_id() 371 372 Title : primary_id() 373 Usage : $unique_implementation_key = $obj->primary_id(); 374 Function: Returns the unique id for this object in this implementation. 375 This allows implementations to manage their own object ids in a 376 way the implementation can control clients can expect one id to 377 map to one object. For sequences with no accession number, this 378 method should return a stringified memory location. 379 This method sets the primary_id for the Quality 380 object. 381 Returns : A string. (the value of primary_id) 382 Args : If a scalar is provided, it is set as the new primary_id for 383 the Quality object. 384 385=cut 386 387sub primary_id { 388 my ($self,$value) = @_; 389 if ($value) { 390 $self->{swq}->primary_id($value); 391 } 392 return $self->{swq}->primary_id(); 393 394} 395 396=head2 desc() 397 398 Title : desc() 399 Usage : $qual->desc($newval); _or_ 400 $description = $qual->desc(); 401 Function: Get/set description text for this Quality object. 402 Returns : A string. (the value of desc) 403 Args : If a scalar is provided, it is set as the new desc for the 404 Quality object. 405 406=cut 407 408sub desc { 409 # a mechanism to set the desc for the Quality object. 410 # probably will be used most often by set_common_features() 411 my ($self,$value) = @_; 412 if( defined $value) { 413 $self->{swq}->desc($value); 414 } 415 return $self->{swq}->desc(); 416} 417 418=head2 id() 419 420 Title : id() 421 Usage : $id = $qual->id(); 422 Function: Return the ID of the quality. This should normally be (and 423 actually is in the implementation provided here) just a synonym 424 for display_id(). 425 Returns : A string. (the value of id) 426 Args : If a scalar is provided, it is set as the new id for the 427 Quality object. 428 429=cut 430 431sub id { 432 my ($self,$value) = @_; 433 if (!$self) { $self->throw("no value for self in $value"); } 434 if( defined $value ) { 435 $self->{swq}->display_id($value); 436 } 437 return $self->{swq}->display_id(); 438} 439 440=head2 seq 441 442 Title : seq() 443 Usage : $string = $obj->seq(); _or_ 444 $obj->seq("atctatcatca"); 445 Function: Returns the sequence that is contained in the imbedded in the 446 PrimarySeq object within the Quality object 447 Returns : A scalar (the seq() value for the imbedded PrimarySeq object.) 448 Args : If a scalar is provided, the Quality object will 449 attempt to set that as the sequence for the imbedded PrimarySeq 450 object. Otherwise, the value of seq() for the PrimarySeq object 451 is returned. 452 Notes : This is probably not a good idea because you then should call 453 length() to make sure that the sequence and quality are of the 454 same length. Even then, how can you make sure that this sequence 455 belongs with that quality? I provided this to give you rope to 456 hang yourself with. Tie it to a strong device and use a good 457 knot. 458 459=cut 460 461sub seq { 462 my ($self,$value) = @_; 463 if( defined $value) { 464 $self->{swq}->seq($value); 465 } 466 return $self->{swq}->seq(); 467} 468 469=head2 qual() 470 471 Title : qual() 472 Usage : @quality_values = @{$obj->qual()}; _or_ 473 $obj->qual("10 10 20 40 50"); 474 Function: Returns the quality as imbedded in the PrimaryQual object 475 within the Quality object. 476 Returns : A reference to an array containing the quality values in the 477 PrimaryQual object. 478 Args : If a scalar is provided, the Quality object will 479 attempt to set that as the quality for the imbedded PrimaryQual 480 object. Otherwise, the value of qual() for the PrimaryQual 481 object is returned. 482 Notes : This is probably not a good idea because you then should call 483 length() to make sure that the sequence and quality are of the 484 same length. Even then, how can you make sure that this sequence 485 belongs with that quality? I provided this to give you a strong 486 board with which to flagellate yourself. 487 488=cut 489 490sub qual { 491 my ($self,$value) = @_; 492 493 if( defined $value) { 494 $self->{swq}->qual($value); 495 } 496 return $self->{swq}->qual(); 497} 498 499 500 501 502=head2 length() 503 504 Title : length() 505 Usage : $length = $seqWqual->length(); 506 Function: Get the length of the Quality sequence/quality. 507 Returns : Returns the length of the sequence and quality 508 Args : None. 509 510=cut 511 512sub length { 513 my $self = shift; 514 return $self->seq_obj->length; 515} 516 517 518=head2 qual_obj 519 520 Title : qual_obj($different_obj) 521 Usage : $qualobj = $seqWqual->qual_obj(); _or_ 522 $qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj); 523 Function: Get the Qualilty object that is imbedded in the 524 Quality object or if a reference to a PrimaryQual object 525 is provided, set this as the PrimaryQual object imbedded in the 526 Quality object. 527 Returns : A reference to a Bio::Seq::Quality object. 528 529Identical to L<seq_obj>. 530 531=cut 532 533sub qual_obj { 534 my ($self,$value) = @_; 535# return $self->{swq}->qual_obj($value); 536 return $self->{swq}; 537} 538 539 540=head2 seq_obj 541 542 Title : seq_obj() 543 Usage : $seqobj = $seqWqual->seq_obj(); _or_ 544 $seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj); 545 Function: Get the PrimarySeq object that is imbedded in the 546 Quality object or if a reference to a PrimarySeq object is 547 provided, set this as the PrimarySeq object imbedded in the 548 Quality object. 549 Returns : A reference to a Bio::PrimarySeq object. 550 551=cut 552 553sub seq_obj { 554 my ($self,$value) = @_; 555 return $self->{swq}; 556} 557 558=head2 _set_descriptors 559 560 Title : _set_descriptors() 561 Usage : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id, 562 $alphabet); 563 Function: Set the descriptors for the Quality object. Try to 564 match the descriptors in the PrimarySeq object and in the 565 PrimaryQual object if descriptors were not provided with 566 construction. 567 Returns : Nothing. 568 Args : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found 569 in the new() method. 570 Notes : Really only intended to be called by the new() method. If 571 you want to invoke a similar function try 572 set_common_descriptors(). 573 574=cut 575 576 577sub _set_descriptors { 578 my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_; 579 $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid, 580 $desc,$given_id,$alphabet); 581} 582 583=head2 subseq($start,$end) 584 585 Title : subseq($start,$end) 586 Usage : $subsequence = $obj->subseq($start,$end); 587 Function: Returns the subseq from start to end, where the first base 588 is 1 and the number is inclusive, ie 1-2 are the first two 589 bases of the sequence. 590 Returns : A string. 591 Args : Two positions. 592 593=cut 594 595sub subseq { 596 my ($self,@args) = @_; 597 # does a single value work? 598 return $self->{swq}->subseq(@args); 599} 600 601=head2 baseat($position) 602 603 Title : baseat($position) 604 Usage : $base_at_position_6 = $obj->baseat("6"); 605 Function: Returns a single base at the given position, where the first 606 base is 1 and the number is inclusive, ie 1-2 are the first two 607 bases of the sequence. 608 Returns : A scalar. 609 Args : A position. 610 611=cut 612 613sub baseat { 614 my ($self,$val) = @_; 615 return $self->{swq}->subseq($val,$val); 616} 617 618=head2 subqual($start,$end) 619 620 Title : subqual($start,$end) 621 Usage : @qualities = @{$obj->subqual(10,20); 622 Function: returns the quality values from $start to $end, where the 623 first value is 1 and the number is inclusive, ie 1-2 are the 624 first two bases of the sequence. Start cannot be larger than 625 end but can be equal. 626 Returns : A reference to an array. 627 Args : a start position and an end position 628 629=cut 630 631sub subqual { 632 my ($self,@args) = @_; 633 return $self->{swq}->subqual(@args); 634} 635 636=head2 qualat($position) 637 638 Title : qualat($position) 639 Usage : $quality = $obj->qualat(10); 640 Function: Return the quality value at the given location, where the 641 first value is 1 and the number is inclusive, ie 1-2 are the 642 first two bases of the sequence. Start cannot be larger than 643 end but can be equal. 644 Returns : A scalar. 645 Args : A position. 646 647=cut 648 649sub qualat { 650 my ($self,$val) = @_; 651 return $self->{swq}->qualat($val); 652} 653 654=head2 sub_peak_index($start,$end) 655 656 Title : sub_peak_index($start,$end) 657 Usage : @peak_indices = @{$obj->sub_peak_index(10,20); 658 Function: returns the trace index values from $start to $end, where the 659 first value is 1 and the number is inclusive, ie 1-2 are the 660 first two trace indices for this channel. 661 Returns : A reference to an array. 662 Args : a start position and an end position 663 664=cut 665 666sub sub_peak_index { 667 my ($self,$start,$end) = @_; 668 if( $start > $end ){ 669 $self->throw("in sub_peak_index, start [$start] has to be greater than end [$end]"); 670 } 671 672 if( $start <= 0 || $end > $self->length ) { 673 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length.""); 674 } 675 676 # remove one from start, and then length is end-start 677 678 $start--; 679 $end--; 680 my @sub_peak_index_array = @{$self->{peak_indices}}[$start..$end]; 681 682 # return substr $self->seq(), $start, ($end-$start); 683 return \@sub_peak_index_array; 684 685} 686 687=head2 sub_trace($start,$end) 688 689 Title : sub_trace($base_channel,$start,$end) 690 Usage : @trace_values = @{$obj->sub_trace('a',10,20)}; 691 Function: returns the trace values from $start to $end, where the 692 first value is 1 and the number is inclusive, ie 1-2 are the 693 first two bases of the sequence. Start cannot be larger than 694 end but can be e_peak_index. 695 Returns : A reference to an array. 696 Args : a start position and an end position 697 698=cut 699 700sub sub_trace { 701 my ($self,$base_channel,$start,$end) = @_; 702 if( $start > $end ){ 703 $self->throw("in sub_trace, start [$start] has to be greater than end [$end]"); 704 } 705 706 if( $start <= 0 || $end > $self->trace_length() ) { 707 $self->throw("You have to have start positive and length less than the total length of traces [$start:$end] Total ".$self->trace_length.""); 708 } 709 710 # remove one from start, and then length is end-start 711 712 $start--; 713 $end--; 714 my @sub_peak_index_array = @{$self->trace($base_channel)}[$start..$end]; 715 716 # return substr $self->seq(), $start, ($end-$start); 717 return \@sub_peak_index_array; 718 719} 720 721=head2 trace_length() 722 723 Title : trace_length() 724 Usage : $trace_length = $obj->trace_length(); 725 Function: Return the length of the trace if all four traces (atgc) 726 are the same. Otherwise, throw an error. 727 Returns : A scalar. 728 Args : none 729 730=cut 731 732sub trace_length { 733 my $self = shift; 734 if ( !$self->trace('a') || !$self->trace('t') || !$self->trace('g') || !$self->trace('c') ) { 735 $self->warn("One or more of the trace channels are missing. Cannot give you a length."); 736 737 } 738 my $lengtha = scalar(@{$self->trace('a')}); 739 my $lengtht = scalar(@{$self->trace('t')}); 740 my $lengthg = scalar(@{$self->trace('g')}); 741 my $lengthc = scalar(@{$self->trace('c')}); 742 if (($lengtha == $lengtht) && ($lengtha == $lengthg) && ($lengtha == $lengthc) ) { 743 return $lengtha; 744 } 745 $self->warn("Not all of the trace indices are the same length". 746 " Here are their lengths: a: $lengtha t:$lengtht ". 747 " g: $lengthg c: $lengthc"); 748} 749 750 751 752=head2 sub_trace_object($start,$end) 753 754 Title : sub_trace_object($start,$end) 755 Usage : $smaller_object = $object->sub_trace_object('1','100'); 756 Function: Get a subset of the sequence, its quality, and its trace. 757 Returns : A reference to a Bio::Seq::SequenceTrace object 758 Args : a start position and an end position 759 Notes : 760 - the start and end position refer to the positions of _bases_. 761 - for example, to get a sub SequenceTrace for bases 5-10, 762 use this routine. 763 - you will get the bases, qualities, and the trace values 764 - you can then use this object to synthesize a new scf 765 using seqIO::scf. 766 767=cut 768 769sub sub_trace_object { 770 my ($self,$start,$end) = @_; 771 my ($start2,$end2); 772 my @subs = @{$self->sub_peak_index($start,$end)}; 773 $start2 = shift(@subs); 774 $end2 = pop(@subs); 775 my $new_object = Bio::Seq::SequenceTrace->new( 776 -swq => Bio::Seq::Quality->new( 777 -seq => $self->subseq($start,$end), 778 -qual => $self->subqual($start,$end), 779 -id => $self->id() 780 ), 781 -trace_a => $self->sub_trace('a',$start2,$end2), 782 -trace_t => $self->sub_trace('t',$start2,$end2), 783 -trace_g => $self->sub_trace('g',$start2,$end2), 784 -trace_c => $self->sub_trace('c',$start2,$end2), 785 -peak_indices => $self->sub_peak_index($start,$end) 786 787 ); 788 $new_object->set_accuracies(); 789 $new_object->_reset_peak_indices(); 790 return $new_object; 791} 792 793=head2 _synthesize_traces() 794 795 Title : _synthesize_traces() 796 Usage : $obj->_synthesize_traces(); 797 Function: Synthesize false traces for this object. 798 Returns : Nothing. 799 Args : None. 800 Notes : This method is intended to be invoked when this 801 object is created with a SWQ object- that is to say that 802 there is a sequence and a set of qualities but there was 803 no actual trace data. 804 805=cut 806 807sub _synthesize_traces { 808 my ($self) = shift; 809 $self->peak_indices(qw()); 810#ml my $version = 2; 811 # the user should be warned if traces already exist 812 # 813 # 814#ml ( my $sequence = $self->seq() ) =~ tr/a-z/A-Z/; 815#ml my @quals = @{$self->qual()}; 816#ml my $info; 817 # build the ramp for the first base. 818 # a ramp looks like this "1 4 13 29 51 71 80 71 51 29 13 4 1" times the quality score. 819 # REMEMBER: A C G T 820 # note to self-> smooth this thing out a bit later 821 my $ramp_data; 822 @{$ramp_data->{'ramp'}} = qw( 1 4 13 29 51 75 80 75 51 29 13 4 1 ); 823 # the width of the ramp 824 $ramp_data->{'ramp_width'} = scalar(@{$ramp_data->{'ramp'}}); 825 # how far should the peaks overlap? 826 $ramp_data->{'ramp_overlap'} = 1; 827 # where should the peaks be located? 828 $ramp_data->{'peak_at'} = 7; 829 $ramp_data->{'ramp_total_length'} = 830 $self->seq_obj()->length() * $ramp_data->{'ramp_width'} 831 - $self->seq_obj()->length() * $ramp_data->{'ramp_overlap'}; 832 my $pos; 833 my $total_length = $ramp_data->{ramp_total_length}; 834 $self->initialize_traces("0",$total_length+2); 835 # now populate them 836 my ($current_base,$place_base_at,$peak_quality,$ramp_counter,$current_ramp,$ramp_position); 837#ml my $sequence_length = $self->length(); 838 my $half_ramp = int($ramp_data->{'ramp_width'}/2); 839 for ($pos = 0; $pos<$self->length();$pos++) { 840 $current_base = uc $self->seq_obj()->subseq($pos+1,$pos+1); 841 # print("Synthesizing the ramp for $current_base\n"); 842 my $all_bases = "ATGC"; 843 $peak_quality = $self->qual_obj()->qualat($pos+1); 844 # where should the peak for this base be placed? Modeled after a mktrace scf 845 $place_base_at = ($pos * $ramp_data->{'ramp_width'}) - 846 ($pos * $ramp_data->{'ramp_overlap'}) - 847 $half_ramp + $ramp_data->{'ramp_width'} - 1; 848 # print("Placing this base at this position: $place_base_at\n"); 849 push @{$self->peak_indices()},$place_base_at; 850 $ramp_position = $place_base_at - $half_ramp; 851 if ($current_base =~ "N" ) { 852 $current_base = "A"; 853 } 854 for ($current_ramp = 0; $current_ramp < $ramp_data->{'ramp_width'}; $current_ramp++) { 855 # print("Placing a trace value here: $current_base ".($ramp_position+$current_ramp+1)." ".$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]."\n"); 856 $self->trace_value_at($current_base,$ramp_position+$current_ramp+1,$peak_quality*$ramp_data->{'ramp'}->[$current_ramp]); 857 } 858 $self->peak_index_at($pos+1, 859 $place_base_at+1 860 ); 861#ml my $other_bases = $self->_get_other_bases($current_base); 862 # foreach ( split('',$other_bases) ) { 863 # push @{$self->{'text'}->{"v3_base_accuracy"}->{$_}},0; 864 #} 865 } 866} 867 868 869 870 871 872=head2 _dump_traces($transformed) 873 874 Title : _dump_traces("transformed") 875 Usage : &_dump_traces($ra,$rc,$rg,$rt); 876 Function: Used in debugging. Prints all traces one beside each other. 877 Returns : Nothing. 878 Args : References to the arrays containing the traces for A,C,G,T. 879 Notes : Beats using dumpValue, I'll tell ya. Much better then using 880 join' ' too. 881 - if a scalar is included as an argument (any scalar), this 882 procedure will dump the _delta'd trace. If you don't know what 883 that means you should not be using this. 884 885=cut 886 887#' 888sub _dump_traces { 889 my ($self) = @_; 890 my (@sA,@sT,@sG,@sC); 891 print ("Count\ta\tc\tg\tt\n"); 892 my $length = $self->trace_length(); 893 for (my $curr=1; $curr <= $length; $curr++) { 894 print(($curr-1)."\t".$self->trace_value_at('a',$curr). 895 "\t".$self->trace_value_at('c',$curr). 896 "\t".$self->trace_value_at('g',$curr). 897 "\t".$self->trace_value_at('t',$curr)."\n"); 898 } 899 return; 900} 901 902=head2 _initialize_traces() 903 904 Title : _initialize_traces() 905 Usage : $trace_object->_initialize_traces(); 906 Function: Creates empty arrays to hold synthetic trace values. 907 Returns : Nothing. 908 Args : None. 909 910=cut 911 912sub initialize_traces { 913 my ($self,$value,$length) = @_; 914 foreach (qw(a t g c)) { 915 my @temp; 916 for (my $count=0; $count<$length; $count++) { 917 $temp[$count] = $value; 918 } 919 $self->trace($_,\@temp); 920 } 921} 922 923=head2 trace_value_at($channel,$position) 924 925 Title : trace_value_at($channel,$position) 926 Usage : $value = $trace_object->trace_value_at($channel,$position); 927 Function: What is the value of the trace for this base at this position? 928 Returns : A scalar represnting the trace value here. 929 Args : a base channel (a,t,g,c) 930 a position ( < $trace_object->trace_length() ) 931 932=cut 933 934sub trace_value_at { 935 my ($self,$channel,$position,$value) = @_; 936 if ($value) { 937 $self->trace($channel)->[$position] = $value; 938 } 939 return $self->sub_trace($channel,($position),($position))->[0]; 940} 941 942sub _deprecated_get_scf_version_2_base_structure { 943 # this sub is deprecated- check inside SeqIO::scf 944 my $self = shift; 945 my (@structure,$current); 946 my $length = $self->length(); 947 for ($current=1; $current <= $self->length() ; $current++) { 948 my $base_here = $self->seq_obj()->subseq($current,$current); 949 $base_here = lc($base_here); 950 my $probabilities; 951 $probabilities->{$base_here} = $self->qual_obj()->qualat($current); 952 my $other_bases = "atgc"; 953 my $empty = ""; 954 $other_bases =~ s/$base_here/$empty/e; 955 foreach ( split('',$other_bases) ) { 956 $probabilities->{$_} = "0"; 957 } 958 @structure = ( 959 @structure, 960 $self->peak_index_at($current), 961 $probabilities->{'a'}, 962 $probabilities->{'t'}, 963 $probabilities->{'g'}, 964 $probabilities->{'c'} 965 ); 966 967 } 968 return \@structure; 969} 970 971sub _deprecated_get_scf_version_3_base_structure { 972 my $self = shift; 973 my $structure; 974 $structure = join('',$self->peak_indices()); 975 return $structure; 976} 977 978 979=head2 accuracies($channel,$position) 980 981 Title : trace_value_at($channel,$position) 982 Usage : $value = $trace_object->trace_value_at($channel,$position); 983 Function: What is the value of the trace for this base at this position? 984 Returns : A scalar representing the trace value here. 985 Args : a base channel (a,t,g,c) 986 a position ( < $trace_object->trace_length() ) 987 988=cut 989 990 991sub accuracies { 992 my ($self,$channel,$value) = @_; 993 if ($value) { 994 if (ref($value) eq "ARRAY") { 995 $self->{accuracies}->{$channel} = $value; 996 } 997 else { 998 my @acc = split(' ',$value); 999 $self->{accuracies}->{$channel} = \@acc; 1000 } 1001 } 1002 return $self->{accuracies}->{$channel}; 1003} 1004 1005 1006=head2 set_accuracies() 1007 1008 Title : set_sccuracies() 1009 Usage : $trace_object->set_accuracies(); 1010 Function: Take a sequence's quality and synthesize proper scf-style 1011 base accuracies that can then be accessed with 1012 accuracies("a") or something like it. 1013 Returns : Nothing. 1014 Args : None. 1015 1016=cut 1017 1018sub set_accuracies { 1019 my $self = shift; 1020 my $count = 0; 1021 my $length = $self->length(); 1022 for ($count=1; $count <= $length; $count++) { 1023 my $base_here = $self->seq_obj()->subseq($count,$count); 1024 my $qual_here = $self->qual_obj()->qualat($count); 1025 $self->accuracy_at($base_here,$count,$qual_here); 1026 my $other_bases = $self->_get_other_bases($base_here); 1027 foreach (split('',$other_bases)) { 1028 $self->accuracy_at($_,$count,"null"); 1029 } 1030 } 1031} 1032 1033 1034=head2 scf_dump() 1035 1036 Title : scf_dump() 1037 Usage : $trace_object->scf_dump(); 1038 Function: Prints out the contents of the structures representing 1039 the SequenceTrace in a manner similar to io_lib's scf_dump. 1040 Returns : Nothing. Prints out the contents of the structures 1041 used to represent the sequence and its trace. 1042 Args : None. 1043 Notes : Used in debugging, obviously. 1044 1045=cut 1046 1047sub scf_dump { 1048 my $self = shift; 1049 my $count; 1050 for ($count=1;$count<=$self->length();$count++) { 1051 my $base_here = lc($self->seq_obj()->subseq($count,$count)); 1052 print($base_here." ".sprintf("%05d",$self->peak_index_at($count))."\t"); 1053 foreach (sort qw(a c g t)) { 1054 print(sprintf("%03d",$self->accuracy_at($_,$count))."\t"); 1055 } 1056 print("\n"); 1057 } 1058 $self->_dump_traces(); 1059} 1060 1061=head2 _get_other_bases($this_base) 1062 1063 Title : _get_other_bases($this_base) 1064 Usage : $other_bases = $trace_object->_get_other_bases($this_base); 1065 Function: A utility routine to return bases other then the one provided. 1066 I was doing this over and over so I put it here. 1067 Returns : Three of a,t,g and c. 1068 Args : A base (atgc) 1069 Notes : $obj->_get_other_bases("a") returns "tgc" 1070 1071=cut 1072 1073sub _get_other_bases { 1074 my ($self,$this_base) = @_; 1075 $this_base = lc($this_base); 1076 my $all_bases = "atgc"; 1077 my $empty = ""; 1078 $all_bases =~ s/$this_base/$empty/e; 1079 return $all_bases; 1080} 1081 1082 1083=head2 accuracy_at($base,$position) 1084 1085 Title : accuracy_at($base,$position) 1086 Usage : $accuracy = $trace_object->accuracy_at($base,$position); 1087 Function: 1088 Returns : Returns the accuracy of finding $base at $position. 1089 Args : 1. a base channel (atgc) 2. a value to _set_ the accuracy 1090 Notes : $obj->_get_other_bases("a") returns "tgc" 1091 1092=cut 1093 1094 1095sub accuracy_at { 1096 my ($self,$base,$position,$value) = @_; 1097 $base = lc($base); 1098 if ($value) { 1099 if ($value eq "null") { 1100 $self->{accuracies}->{$base}->[$position-1] = "0"; 1101 } 1102 else { 1103 $self->{accuracies}->{$base}->[$position-1] = $value; 1104 } 1105 } 1106 return $self->{accuracies}->{$base}->[$position-1]; 1107} 1108 11091; 1110 1111