1# $Id$
2#
3# BioPerl module for Bio::SeqFeature::Annotated
4#
5# Please direct questions and support issues to <bioperl-l@bioperl.org>
6#
7# Cared for by Allen Day <allenday at ucla.edu>
8#
9# Copyright Allen Day
10#
11# You may distribute this module under the same terms as perl itself
12
13# POD documentation - main docs before the code
14
15=head1 NAME
16
17Bio::SeqFeature::Annotated - PLEASE PUT SOMETHING HERE
18
19=head1 SYNOPSIS
20
21    # none yet, complain to authors
22
23=head1 DESCRIPTION
24
25None yet, complain to authors.
26
27=head1 Implemented Interfaces
28
29This class implements the following interfaces.
30
31=over 4
32
33=item Bio::SeqFeatureI
34
35Note that this includes implementing Bio::RangeI.
36
37=item Bio::AnnotatableI
38
39=item Bio::FeatureHolderI
40
41Features held by a feature are essentially sub-features.
42
43=back
44
45=head1 FEEDBACK
46
47=head2 Mailing Lists
48
49User feedback is an integral part of the evolution of this and other
50Bioperl modules. Send your comments and suggestions preferably to one
51of the Bioperl mailing lists.  Your participation is much appreciated.
52
53  bioperl-l@bioperl.org                  - General discussion
54  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
55
56=head2 Support
57
58Please direct usage questions or support issues to the mailing list:
59
60I<bioperl-l@bioperl.org>
61
62rather than to the module maintainer directly. Many experienced and
63reponsive experts will be able look at the problem and quickly
64address it. Please include a thorough description of the problem
65with code and data examples if at all possible.
66
67=head2 Reporting Bugs
68
69Report bugs to the Bioperl bug tracking system to help us keep track
70the bugs and their resolution.  Bug reports can be submitted via
71the web:
72
73  http://bugzilla.open-bio.org/
74
75=head1 AUTHOR - Allen Day
76
77Allen Day E<lt>allenday at ucla.eduE<gt>
78
79=head1 APPENDIX
80
81The rest of the documentation details each of the object
82methods. Internal methods are usually preceded with a _
83
84=cut
85
86
87package Bio::SeqFeature::Annotated;
88BEGIN {
89  $Bio::SeqFeature::Annotated::AUTHORITY = 'cpan:BIOPERLML';
90}
91$Bio::SeqFeature::Annotated::VERSION = '1.6.905';
92use strict;
93
94use Bio::Annotation::Collection;
95use Bio::Annotation::OntologyTerm;
96use Bio::Annotation::Target;
97use Bio::LocatableSeq;
98use Bio::Location::Simple;
99use Bio::Ontology::OntologyStore;
100use Bio::Tools::GFF;
101use Bio::SeqFeature::AnnotationAdaptor;
102use Data::Dumper;
103use URI::Escape;
104
105use base qw(Bio::Root::Root
106    Bio::SeqFeature::TypedSeqFeatureI
107    Bio::AnnotatableI
108    Bio::FeatureHolderI);
109
110our %tagclass = (
111  comment        => 'Bio::Annotation::Comment',
112  dblink         => 'Bio::Annotation::DBLink',
113  description    => 'Bio::Annotation::SimpleValue',
114  gene_name      => 'Bio::Annotation::SimpleValue',
115  ontology_term  => 'Bio::Annotation::OntologyTerm',
116  reference      => 'Bio::Annotation::Reference',
117  __DEFAULT__    => 'Bio::Annotation::SimpleValue',
118);
119
120our %tag2text = (
121  'Bio::Annotation::Comment'        => 'text',
122  'Bio::Annotation::DBLink'         => 'primary_id',
123  'Bio::Annotation::SimpleValue'    => 'value',
124  'Bio::Annotation::SimpleValue'    => 'value',
125  'Bio::Annotation::OntologyTerm'   => 'name',
126  'Bio::Annotation::Reference'      => 'title',
127  __DEFAULT__                       => 'value',
128);
129
130######################################
131#get_SeqFeatures
132#display_name
133#primary_tag
134#source_tag                       x with warning
135#has_tag
136#get_tag_values
137#get_tagset_values
138#get_all_tags
139#attach_seq
140#seq                              x
141#entire_seq                       x
142#seq_id
143#gff_string
144#_static_gff_handler
145#start                            x
146#end                              x
147#strand                           x
148#location
149#primary_id
150
151=head1 PREAMBLE
152
153Okay, where to start...
154
155The original idea for this class appears to lump all SeqFeatureI data
156(primary_tag, source_tag, etc) into AnnotationI objects into an
157Bio::Annotation::Collection. The type is then checked against SOFA.
158
159There have been several requests to have type checking be optionally run.
160
161Bio::FeatureHolderI::create_hierarchy_from_ParentIDs
162Bio::FeatureHolderI::feature_count
163Bio::FeatureHolderI::get_all_SeqFeatures
164Bio::FeatureHolderI::set_ParentIDs_from_hierarchy
165Bio::RangeI::contains
166Bio::RangeI::disconnected_ranges
167Bio::RangeI::equals
168Bio::RangeI::intersection
169Bio::RangeI::offsetStranded
170Bio::RangeI::overlap_extent
171Bio::RangeI::overlaps
172Bio::RangeI::subtract
173Bio::RangeI::union
174Bio::SeqFeature::Annotated::Dumper
175Bio::SeqFeature::Annotated::MAX_TYPE_CACHE_MEMBERS
176Bio::SeqFeature::Annotated::add_Annotation
177Bio::SeqFeature::Annotated::add_SeqFeature
178Bio::SeqFeature::Annotated::add_tag_value
179Bio::SeqFeature::Annotated::add_target
180Bio::SeqFeature::Annotated::annotation
181Bio::SeqFeature::Annotated::attach_seq
182Bio::SeqFeature::Annotated::display_name
183Bio::SeqFeature::Annotated::each_target
184Bio::SeqFeature::Annotated::end
185Bio::SeqFeature::Annotated::entire_seq
186Bio::SeqFeature::Annotated::frame
187Bio::SeqFeature::Annotated::from_feature
188Bio::SeqFeature::Annotated::get_Annotations
189Bio::SeqFeature::Annotated::get_SeqFeatures
190Bio::SeqFeature::Annotated::get_all_tags
191Bio::SeqFeature::Annotated::get_tag_values
192Bio::SeqFeature::Annotated::get_tagset_values
193Bio::SeqFeature::Annotated::has_tag
194Bio::SeqFeature::Annotated::length
195Bio::SeqFeature::Annotated::location
196Bio::SeqFeature::Annotated::name
197Bio::SeqFeature::Annotated::new
198Bio::SeqFeature::Annotated::phase
199Bio::SeqFeature::Annotated::primary_tag
200Bio::SeqFeature::Annotated::remove_Annotations
201Bio::SeqFeature::Annotated::remove_SeqFeatures
202Bio::SeqFeature::Annotated::remove_tag
203Bio::SeqFeature::Annotated::score
204Bio::SeqFeature::Annotated::seq
205Bio::SeqFeature::Annotated::seq_id
206Bio::SeqFeature::Annotated::source
207Bio::SeqFeature::Annotated::source_tag
208Bio::SeqFeature::Annotated::start
209Bio::SeqFeature::Annotated::strand
210Bio::SeqFeature::Annotated::type
211Bio::SeqFeature::Annotated::uri_escape
212Bio::SeqFeature::Annotated::uri_unescape
213Bio::SeqFeature::TypedSeqFeatureI::croak
214Bio::SeqFeature::TypedSeqFeatureI::ontology_term
215Bio::SeqFeatureI::generate_unique_persistent_id
216Bio::SeqFeatureI::gff_string
217Bio::SeqFeatureI::primary_id
218Bio::SeqFeatureI::spliced_seq
219
220=cut
221
222sub new {
223    my ( $caller, @args) = @_;
224    my ($self) = $caller->SUPER::new(@args);
225
226    $self->_initialize(@args);
227
228    return $self;
229}
230
231sub _initialize {
232  my ($self,@args) = @_;
233  my ($start, $end, $strand, $frame, $phase, $score,
234      $name, $annot, $location,
235      $display_name, # deprecate
236      $seq_id, $type,$source,$feature
237     ) =
238        $self->_rearrange([qw(START
239                              END
240                              STRAND
241                              FRAME
242                              PHASE
243                              SCORE
244                              NAME
245                              ANNOTATION
246                              LOCATION
247                              DISPLAY_NAME
248                              SEQ_ID
249                              TYPE
250                              SOURCE
251			      FEATURE
252                             )], @args);
253  defined $start        && $self->start($start);
254  defined $end          && $self->end($end);
255  defined $strand       && $self->strand($strand);
256  defined $frame        && $self->frame($frame);
257  defined $phase        && $self->phase($phase);
258  defined $score        && $self->score($score);
259  defined $source       && ref($source) ? $self->source($source) : $self->source_tag($source);
260  defined $type         && ref($type) ? $self->type($type) : $self->primary_tag($type);
261  defined $location     && $self->location($location);
262  defined $annot        && $self->annotation($annot);
263  defined $feature      && $self->from_feature($feature);
264
265  if( defined($display_name) && defined($name) ){
266	  $self->throw('Cannot define (-id and -seq_id) or (-name and -display_name) attributes');
267  }
268  defined $seq_id                   && $self->seq_id($seq_id);
269  defined ($name || $display_name)  && $self->name($name || $display_name);
270}
271
272=head1 ATTRIBUTE ACCESSORS FOR Bio::SeqFeature::Annotated
273
274=cut
275
276=head2 from_feature
277
278  Usage: $obj->from_feature($myfeature);
279  Desc : initialize this object with the contents of another feature
280         object.  Useful for converting objects like
281         L<Bio::SeqFeature::Generic> to this class
282  Ret  : nothing meaningful
283  Args : a single object of some other feature type,
284  Side Effects: throws error on failure
285  Example:
286
287=cut
288
289sub from_feature {
290    my ($self,$feat,%opts) = @_;
291
292    # should deal with any SeqFeatureI implementation (i.e. we don't want to
293    # automatically force a OO-heavy implementation on all classes)
294    ref($feat) && ($feat->isa('Bio::SeqFeatureI'))
295      or $self->throw('invalid arguments to from_feature');
296
297    #TODO: add overrides in opts for these values, so people don't have to screw up their feature object
298    #if they don't want to
299
300    ### set most of the data
301    foreach my $fieldname (qw/ start end strand frame score location seq_id source_tag primary_tag/) {
302      #no strict 'refs'; #using symbolic refs, yes, but using them for methods is allowed now
303      $self->$fieldname( $feat->$fieldname );
304    }
305
306    # now pick up the annotations/tags of the other feature
307    # We'll use AnnotationAdaptor to convert everything over
308
309    my %no_copy = map {$_ => 1} qw/seq_id source type frame phase score/;
310    my $adaptor = Bio::SeqFeature::AnnotationAdaptor->new(-feature => $feat);
311    for my $key ( $adaptor->get_all_annotation_keys() ) {
312        next if $no_copy{$key};
313        my @values = $adaptor->get_Annotations($key);
314        @values = _aggregate_scalar_annotations(\%opts,$key,@values);
315        foreach my $val (@values) {
316            $self->add_Annotation($key,$val)
317        }
318    }
319}
320#given a key and its values, make the values into
321#Bio::Annotation::\w+ objects
322
323sub _aggregate_scalar_annotations {
324  my ($opts,$key,@values) = @_;
325
326  #anything that's not an object, make it a SimpleValue
327  @values = map { ref($_) ? $_ : Bio::Annotation::SimpleValue->new(-value => $_) } @values;
328
329  #try to make Target objects
330  if($key eq 'Target' && (@values == 3 || @values == 4)
331     && @values == grep {$_->isa('Bio::Annotation::SimpleValue')} @values
332    ) {
333    @values = map {$_->value} @values;
334    #make a strand if it doesn't have one, enforcing start <= end
335    if(@values == 3) {
336      if($values[1] <= $values[2]) {
337	$values[3] = '+';
338      } else {
339	@values[1,2] = @values[2,1];
340	$values[3] = '-';
341      }
342    }
343    return ( Bio::Annotation::Target->new( -target_id => $values[0],
344					   -start     => $values[1],
345					   -end       => $values[2],
346					   -strand    => $values[3],
347					 )
348	   );
349  }
350  #try to make DBLink objects
351  elsif($key eq 'dblink' || $key eq 'Dbxref') {
352    return map {
353      if( /:/ ) { #convert to a DBLink if it has a colon in it
354	my ($db,$id) = split /:/,$_->value;
355	Bio::Annotation::DBLink->new( -database   => $db,
356				      -primary_id => $id,
357				    );
358      } else { #otherwise leave as a SimpleValue
359	$_
360      }
361    } @values;
362  }
363  #make OntologyTerm objects
364  elsif($key eq 'Ontology_term') {
365    return map { Bio::Annotation::OntologyTerm->new(-identifier => $_->value) } @values
366  }
367  #make Comment objects
368  elsif($key eq 'comment') {
369    return map { Bio::Annotation::Comment->new( -text => $_->value ) } @values;
370  }
371
372  return @values;
373}
374
375
376=head2 seq_id()
377
378 Usage   : $obj->seq_id($newval)
379 Function: holds a string corresponding to the unique
380           seq_id of the sequence underlying the feature
381           (e.g. database accession or primary key).
382 Returns : string representing the seq_id.
383 Args    : on set, some string or a Bio::Annotation::SimpleValue object.
384
385=cut
386
387sub seq_id {
388  my($self,$val) = @_;
389  if (defined($val)) {
390      my $term = undef;
391      if (!ref($val)) {
392	  $term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val));
393      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
394	  $term = $val;
395      }
396      if (!defined($term) || ($term->value =~ /^>/)) {
397	  $self->throw('give seq_id() a scalar or Bio::Annotation::SimpleValue object, not '.$val);
398      }
399      $self->remove_Annotations('seq_id');
400      $self->add_Annotation('seq_id', $term);
401  }
402
403  $self->seq_id('.') unless $self->get_Annotations('seq_id'); # make sure we always have something
404
405  return ($self->get_Annotations('seq_id'))[0]->value;
406}
407
408=head2 name()
409
410 Usage   : $obj->name($newval)
411 Function: human-readable name for the feature.
412 Returns : value of name (a scalar)
413 Args    : on set, new value (a scalar or undef, optional)
414
415=cut
416
417sub name {
418  my($self,$val) = @_;
419  $self->{'name'} = $val if defined($val);
420  return $self->{'name'};
421}
422
423=head2 type()
424
425 Usage   : $obj->type($newval)
426 Function: a SOFA type for the feature.
427 Returns : Bio::Annotation::OntologyTerm object representing the type.
428           NB: to get a string, use primary_tag().
429 Args    : on set, Bio::Annotation::OntologyTerm object.
430           NB: to set a string (SOFA name or identifier), use primary_tag()
431
432=cut
433
434use constant MAX_TYPE_CACHE_MEMBERS => 20;
435sub type {
436  my($self,$val) = @_;
437  if(defined($val)){
438    my $term = undef;
439
440    if(!ref($val)){
441      $self->throw("give type() a Bio::Annotation::OntologyTerm object, not a string");
442    }
443    elsif(ref($val) && $val->isa('Bio::Annotation::OntologyTerm')){
444      $term = $val;
445    }
446    else {
447      #we have the wrong type of object
448      $self->throw('give type() a SOFA term name, identifier, or Bio::Annotation::OntologyTerm object, not '.$val);
449    }
450    $self->remove_Annotations('type');
451    $self->add_Annotation('type',$term);
452  }
453
454  return $self->get_Annotations('type');
455}
456
457=head2 source()
458
459 Usage   : $obj->source($newval)
460 Function: holds the source of the feature.
461 Returns : a Bio::Annotation::SimpleValue representing the source.
462           NB: to get a string, use source_tag()
463 Args    : on set, a Bio::Annotation::SimpleValue object.
464           NB: to set a string, use source_tag()
465
466=cut
467
468sub source {
469  my($self,$val) = @_;
470
471  if (defined($val)) {
472      my $term;
473      if (!ref($val)) {
474        $self->throw("give source() a Bio::Annotation::SimpleValue object, not a string");
475        #$term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val));
476      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
477	  $term = $val;
478      } else {
479	  $self->throw('give source() a scalar or Bio::Annotation::SimpleValue object, not '.$val);
480      }
481      $self->remove_Annotations('source');
482      $self->add_Annotation('source', $term);
483  }
484
485  unless ($self->get_Annotations('source')) {
486    $self->source(Bio::Annotation::SimpleValue->new(-value => '.'));
487  }
488  return $self->get_Annotations('source');
489}
490
491=head2 score()
492
493 Usage   : $score = $feat->score()
494           $feat->score($score)
495 Function: holds a value corresponding to the score of the feature.
496 Returns : a string representing the score.
497 Args    : on set, a scalar or a Bio::Annotation::SimpleValue object.
498
499=cut
500
501sub score {
502  my $self = shift;
503  my $val = shift;
504
505  if(defined($val)){
506      my $term = undef;
507      if (!ref($val)) {
508	  $term = Bio::Annotation::SimpleValue->new(-value => $val);
509      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
510	  $term = $val;
511      }
512
513      if ($term->value ne '.' &&
514           (!defined($term) || ($term->value !~ /^[+-]?\d+\.?\d*(e-\d+)?/))) {
515	  $self->throw("'$val' is not a valid score");
516      }
517      $self->remove_Annotations('score');
518      $self->add_Annotation('score', $term);
519  }
520
521  $self->score('.') unless scalar($self->get_Annotations('score')); # make sure we always have something
522
523  return ($self->get_Annotations('score'))[0]->display_text;
524}
525
526=head2 phase()
527
528 Usage   : $phase = $feat->phase()
529           $feat->phase($phase)
530 Function: get/set on phase information
531 Returns : a string 0,1,2,'.'
532 Args    : on set, one of 0,1,2,'.' or a Bio::Annotation::SimpleValue
533           object holding one of 0,1,2,'.' as its value.
534
535=cut
536
537sub phase {
538  my $self = shift;
539  my $val = shift;
540
541  if(defined($val)){
542      my $term = undef;
543      if (!ref($val)) {
544	  $term = Bio::Annotation::SimpleValue->new(-value => $val);
545      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
546	  $term = $val;
547      }
548      if (!defined($term) || ($term->value !~ /^[0-2.]$/)) {
549	  $self->throw("'$val' is not a valid phase");
550      }
551      $self->remove_Annotations('phase');
552      $self->add_Annotation('phase', $term);
553  }
554
555  $self->phase('.') unless $self->get_Annotations('phase'); # make sure we always have something
556
557  return ($self->get_Annotations('phase'))[0]->value;
558}
559
560
561=head2 frame()
562
563 Usage   : $frame = $feat->frame()
564           $feat->frame($phase)
565 Function: get/set on phase information
566 Returns : a string 0,1,2,'.'
567 Args    : on set, one of 0,1,2,'.' or a Bio::Annotation::SimpleValue
568           object holding one of 0,1,2,'.' as its value.
569
570=cut
571
572sub frame {
573  my $self = shift;
574  my $val = shift;
575
576  if(defined($val)){
577      my $term = undef;
578      if (!ref($val)) {
579	  $term = Bio::Annotation::SimpleValue->new(-value => $val);
580      } elsif (ref($val) && $val->isa('Bio::Annotation::SimpleValue')) {
581	  $term = $val;
582      }
583      if (!defined($term) || ($term->value !~ /^[0-2.]$/)) {
584	  $self->throw("'$val' is not a valid frame");
585      }
586      $self->remove_Annotations('frame');
587      $self->add_Annotation('frame', $term);
588  }
589
590  $self->frame('.') unless $self->get_Annotations('frame'); # make sure we always have something
591
592  return ($self->get_Annotations('frame'))[0]->value;
593}
594
595############################################################
596
597=head1 SHORTCUT METHODS TO ACCESS Bio::AnnotatableI INTERFACE METHODS
598
599=cut
600
601=head2 add_Annotation()
602
603 Usage   :
604 Function: $obj->add_Annotation() is a shortcut to $obj->annotation->add_Annotation
605 Returns :
606 Args    :
607
608=cut
609
610sub add_Annotation {
611  my ($self,@args) = @_;
612  return $self->annotation->add_Annotation(@args);
613}
614
615=head2 remove_Annotations()
616
617 Usage   :
618 Function: $obj->remove_Annotations() is a shortcut to $obj->annotation->remove_Annotations
619 Returns :
620 Args    :
621
622=cut
623
624sub remove_Annotations {
625  my ($self,@args) = @_;
626  return $self->annotation->remove_Annotations(@args);
627}
628
629############################################################
630
631=head1 INTERFACE METHODS FOR Bio::SeqFeatureI
632
633Note that no methods are deprecated.  Any SeqFeatureI methods must return
634strings (no objects).
635
636=cut
637
638=head2 display_name()
639
640=cut
641
642sub display_name {
643  my $self = shift;
644  return $self->name(@_);
645}
646
647=head2 primary_tag()
648
649=cut
650
651sub primary_tag {
652  my $self = shift;
653  if (@_) {
654    my $val = shift;
655    my $term;
656    if(!ref($val) && $val){
657      #we have a plain text annotation coming in.  try to map it to SOFA.
658
659      our %__type_cache; #a little cache of plaintext types we've already seen
660
661      #clear our cache if it gets too big
662      if(scalar(keys %__type_cache) > MAX_TYPE_CACHE_MEMBERS) {
663        %__type_cache = ();
664      }
665
666      #set $term to either a cached value, or look up a new one, throwing
667      #up if not found
668      my $anntext = $val;
669      if ($__type_cache{$anntext}) {
670        $term = $__type_cache{$anntext};
671      } else {
672        my $sofa = Bio::Ontology::OntologyStore->get_instance->get_ontology('Sequence Ontology OBO');
673        my ($soterm) = $anntext =~ /^\D+:\d+$/ #does it look like an ident?
674          ? ($sofa->find_terms(-identifier => $anntext))[0] #yes, lookup by ident
675          : ($sofa->find_terms(-name => $anntext))[0];      #no, lookup by name
676        #throw if it's not in SOFA
677        unless($soterm){
678          $self->throw("couldn't find a SOFA term matching type '$val'.");
679        }
680        my $newterm = Bio::Annotation::OntologyTerm->new;
681        $newterm->term($soterm);
682        $term = $newterm;
683      }
684
685      $self->type($term);
686    }
687  }
688
689  my $t = $self->type() || return;
690  return $t->name;
691}
692
693=head2 source_tag()
694
695=cut
696
697sub source_tag {
698  my $self = shift;
699  if (@_) {
700    my $val = shift;
701    if(!ref($val) && $val){
702      my $term = Bio::Annotation::SimpleValue->new(-value => uri_unescape($val));
703      $self->source($term);
704    }
705  }
706  my $t = $self->source() || return;
707  return $t->display_text;
708}
709
710
711=head2 attach_seq()
712
713 Usage   : $sf->attach_seq($seq)
714 Function: Attaches a Bio::Seq object to this feature. This
715           Bio::Seq object is for the *entire* sequence: ie
716           from 1 to 10000
717 Returns : TRUE on success
718 Args    : a Bio::PrimarySeqI compliant object
719
720=cut
721
722sub attach_seq {
723   my ($self, $seq) = @_;
724
725   if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
726       $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
727   }
728
729   $self->{'seq'} = $seq;
730
731   # attach to sub features if they want it
732   foreach ( $self->get_SeqFeatures() ) {
733       $_->attach_seq($seq);
734   }
735   return 1;
736}
737
738=head2 seq()
739
740 Usage   : $tseq = $sf->seq()
741 Function: returns a truncated version of seq() with bounds matching this feature
742 Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
743           bounded by start & end, or undef if there is no sequence attached
744 Args    : none
745
746=cut
747
748sub seq {
749  my ($self) = @_;
750
751  return unless defined($self->entire_seq());
752
753  my $seq = $self->entire_seq->trunc($self->start(), $self->end());
754
755  if ( defined $self->strand && $self->strand == -1 ) {
756    $seq = $seq->revcom;
757  }
758
759  return $seq;
760}
761
762=head2 entire_seq()
763
764 Usage   : $whole_seq = $sf->entire_seq()
765 Function: gives the entire sequence that this seqfeature is attached to
766 Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
767           sequence attached
768 Args    : none
769
770=cut
771
772sub entire_seq {
773  return shift->{'seq'};
774}
775
776############################################################
777
778=head1 INTERFACE METHODS FOR Bio::RangeI
779
780 as inherited via Bio::SeqFeatureI
781
782=cut
783
784=head2 length()
785
786 Usage   : $feature->length()
787 Function: Get the feature length computed as $feat->end - $feat->start + 1
788 Returns : integer
789 Args    : none
790
791=cut
792
793sub length {
794  my $self = shift;
795  return $self->end() - $self->start() + 1;
796}
797
798=head2 start()
799
800 Usage   : $obj->start($newval)
801 Function: Get/set on the start coordinate of the feature
802 Returns : integer
803 Args    : on set, new value (a scalar or undef, optional)
804
805=cut
806
807sub start {
808  my ($self,$value) = @_;
809  return $self->location->start($value);
810}
811
812=head2 end()
813
814 Usage   : $obj->end($newval)
815 Function: Get/set on the end coordinate of the feature
816 Returns : integer
817 Args    : on set, new value (a scalar or undef, optional)
818
819=cut
820
821sub end {
822  my ($self,$value) = @_;
823  return $self->location->end($value);
824}
825
826=head2 strand()
827
828 Usage   : $strand = $feat->strand($newval)
829 Function: get/set on strand information, being 1,-1 or 0
830 Returns : -1,1 or 0
831 Args    : on set, new value (a scalar or undef, optional)
832
833=cut
834
835sub strand {
836  my $self = shift;
837  return $self->location->strand(@_);
838}
839
840
841############################################################
842
843=head1 INTERFACE METHODS FOR Bio::FeatureHolderI
844
845This includes methods for retrieving, adding, and removing
846features. Since this is already a feature, features held by this
847feature holder are essentially sub-features.
848
849=cut
850
851=head2 get_SeqFeatures
852
853 Usage   : @feats = $feat->get_SeqFeatures();
854 Function: Returns an array of Bio::SeqFeatureI objects
855 Returns : An array
856 Args    : none
857
858=cut
859
860sub get_SeqFeatures {
861  return @{ shift->{'sub_array'} || []};
862}
863
864=head2 add_SeqFeature()
865
866 Usage   : $feat->add_SeqFeature($subfeat);
867           $feat->add_SeqFeature($subfeat,'EXPAND')
868 Function: adds a SeqFeature into the subSeqFeature array.
869           with no 'EXPAND' qualifer, subfeat will be tested
870           as to whether it lies inside the parent, and throw
871           an exception if not.
872
873           If EXPAND is used, the parent''s start/end/strand will
874           be adjusted so that it grows to accommodate the new
875           subFeature
876 Example :
877 Returns : nothing
878 Args    : a Bio::SeqFeatureI object
879
880=cut
881
882sub add_SeqFeature {
883  my ($self,$val, $expand) = @_;
884
885  return unless $val;
886
887  if ((!ref($val)) || !$val->isa('Bio::SeqFeatureI') ) {
888      $self->throw((ref($val) ? ref($val) : $val)
889                   ." does not implement Bio::SeqFeatureI.");
890  }
891
892  if($expand && ($expand eq 'EXPAND')) {
893      $self->_expand_region($val);
894  } else {
895      if ( !$self->contains($val) ) {
896	  $self->warn("$val is not contained within parent feature, and expansion is not valid, ignoring.");
897	  return;
898      }
899  }
900
901  push(@{$self->{'sub_array'}},$val);
902}
903
904=head2 remove_SeqFeatures()
905
906 Usage   : $obj->remove_SeqFeatures
907 Function: Removes all sub SeqFeatures.  If you want to remove only a subset,
908           remove that subset from the returned array, and add back the rest.
909 Returns : The array of Bio::SeqFeatureI implementing sub-features that was
910           deleted from this feature.
911 Args    : none
912
913=cut
914
915sub remove_SeqFeatures {
916  my ($self) = @_;
917
918  my @subfeats = @{$self->{'sub_array'} || []};
919  $self->{'sub_array'} = []; # zap the array.
920  return @subfeats;
921}
922
923############################################################
924
925=head1 INTERFACE METHODS FOR Bio::AnnotatableI
926
927=cut
928
929=head2 annotation()
930
931 Usage   : $obj->annotation($annot_obj)
932 Function: Get/set the annotation collection object for annotating this
933           feature.
934 Returns : A Bio::AnnotationCollectionI object
935 Args    : newvalue (optional)
936
937=cut
938
939sub annotation {
940    my ($obj,$value) = @_;
941
942    # we are smart if someone references the object and there hasn't been
943    # one set yet
944    if(defined $value || ! defined $obj->{'annotation'} ) {
945        $value = Bio::Annotation::Collection->new() unless ( defined $value );
946        $obj->{'annotation'} = $value;
947    }
948    return $obj->{'annotation'};
949}
950
951############################################################
952
953=head2 location()
954
955 Usage   : my $location = $seqfeature->location()
956 Function: returns a location object suitable for identifying location
957           of feature on sequence or parent feature
958 Returns : Bio::LocationI object
959 Args    : [optional] Bio::LocationI object to set the value to.
960
961=cut
962
963sub location {
964  my($self, $value ) = @_;
965
966  if (defined($value)) {
967    unless (ref($value) and $value->isa('Bio::LocationI')) {
968      $self->throw("object $value pretends to be a location but ".
969                   "does not implement Bio::LocationI");
970    }
971    $self->{'location'} = $value;
972  }
973  elsif (! $self->{'location'}) {
974    # guarantees a real location object is returned every time
975    $self->{'location'} = Bio::Location::Simple->new();
976  }
977  return $self->{'location'};
978}
979
980=head2 add_target()
981
982 Usage   : $seqfeature->add_target(Bio::LocatableSeq->new(...));
983 Function: adds a target location on another reference sequence for this feature
984 Returns : true on success
985 Args    : a Bio::LocatableSeq object
986
987=cut
988
989sub add_target {
990  my ($self,$seq) = @_;
991  $self->throw("$seq is not a Bio::LocatableSeq, bailing out") unless ref($seq) and seq->isa('Bio::LocatableSeq');
992  push @{ $self->{'targets'} }, $seq;
993  return $seq;
994}
995
996=head2 each_target()
997
998 Usage   : @targets = $seqfeature->each_target();
999 Function: Returns a list of Bio::LocatableSeqs which are the locations of this object.
1000           To obtain the "primary" location, see L</location()>.
1001 Returns : a list of 0..N Bio::LocatableSeq objects
1002 Args    : none
1003
1004=cut
1005
1006sub each_target {
1007  my ($self) = @_;
1008  return $self->{'targets'} ? @{ $self->{'targets'} } : ();
1009}
1010
1011=head2 _expand_region
1012
1013 Title   : _expand_region
1014 Usage   : $self->_expand_region($feature);
1015 Function: Expand the total region covered by this feature to
1016           accomodate for the given feature.
1017
1018           May be called whenever any kind of subfeature is added to this
1019           feature. add_SeqFeature() already does this.
1020 Returns :
1021 Args    : A Bio::SeqFeatureI implementing object.
1022
1023=cut
1024
1025sub _expand_region {
1026    my ($self, $feat) = @_;
1027    if(! $feat->isa('Bio::SeqFeatureI')) {
1028        $self->warn("$feat does not implement Bio::SeqFeatureI");
1029    }
1030    # if this doesn't have start/end set - forget it!
1031    if((! defined($self->start())) && (! defined $self->end())) {
1032        $self->start($feat->start());
1033        $self->end($feat->end());
1034        $self->strand($feat->strand) unless defined($self->strand());
1035#        $self->strand($feat->strand) unless $self->strand();
1036    } else {
1037        my $range = $self->union($feat);
1038        $self->start($range->start);
1039        $self->end($range->end);
1040        $self->strand($range->strand);
1041    }
1042}
1043
1044=head2 get_Annotations
1045
1046 Usage   : my $parent   = $obj->get_Annotations('Parent');
1047           my @parents = $obj->get_Annotations('Parent');
1048 Function: a wrapper around Bio::Annotation::Collection::get_Annotations().
1049 Returns : returns annotations as
1050           Bio::Annotation::Collection::get_Annotations() does, but
1051           additionally returns a single scalar in scalar context
1052           instead of list context so that if an annotation tag
1053           contains only a single value, you can do:
1054
1055           $parent = $feature->get_Annotations('Parent');
1056
1057           instead of:
1058
1059           ($parent) = ($feature->get_Annotations('Parent'))[0];
1060
1061           if the 'Parent' tag has multiple values and is called in a
1062           scalar context, the number of annotations is returned.
1063
1064 Args    : an annotation tag name.
1065
1066=cut
1067
1068sub get_Annotations {
1069    my $self = shift;
1070
1071    my @annotations = $self->annotation->get_Annotations(@_);
1072
1073    if(wantarray){
1074        return @annotations;
1075    } elsif(scalar(@annotations) == 1){
1076        return $annotations[0];
1077    } else {
1078        return scalar(@annotations);
1079    }
1080}
1081
1082=head1 Bio::SeqFeatureI implemented methods
1083
1084These are specialized implementations of SeqFeatureI methods which call the
1085internal Bio::Annotation::AnnotationCollection object. Just prior to the 1.5
1086release the below methods were moved from Bio::SeqFeatureI to Bio::AnnotatableI,
1087and having Bio::SeqFeatureI inherit Bio::AnnotatableI. This behavior forced all
1088Bio::SeqFeatureI-implementing classes to use Bio::AnnotationI objects for any
1089data. It is the consensus of the core developers that this be rolled back in
1090favor of a more flexible approach by rolling back the above changes and making
1091this class Bio::AnnotatableI. The SeqFeatureI tag-related methods are
1092reimplemented in order to approximate the same behavior as before.
1093
1094The methods below allow mapping of the "get_tag_values()"-style annotation
1095access to Bio::AnnotationCollectionI. These need not be implemented in a
1096Bio::AnnotationCollectionI compliant class, as they are built on top of the
1097methods.  For usage, see Bio::SeqFeatureI.
1098
1099=cut
1100
1101=head2 has_tag
1102
1103=cut
1104
1105sub has_tag {
1106  my ($self,$tag) = @_;
1107  return scalar($self->annotation->get_Annotations($tag));
1108}
1109
1110=head2 add_tag_value
1111
1112=cut
1113
1114sub add_tag_value {
1115  my ($self,$tag,@vals) = @_;
1116
1117  foreach my $val (@vals){
1118    my $class = $tagclass{$tag}   || $tagclass{__DEFAULT__};
1119    my $slot  = $tag2text{$class};
1120
1121    my $a = $class->new();
1122    $a->$slot($val);
1123
1124    $self->annotation->add_Annotation($tag,$a);
1125  }
1126
1127  return 1;
1128}
1129
1130=head2 get_tag_values
1131
1132 Usage   : @annotations = $obj->get_tag_values($tag)
1133 Function: returns annotations corresponding to $tag
1134 Returns : a list of scalars
1135 Args    : tag name
1136
1137=cut
1138
1139sub get_tag_values {
1140    my ($self,$tag) = @_;
1141    if(!$tagclass{$tag} && $self->annotation->get_Annotations($tag)){
1142        #new tag, haven't seen it yet but it exists.  add to registry
1143        my($proto) = $self->annotation->get_Annotations($tag);
1144        # we can only register if there's a method known for obtaining the value
1145        if (exists($tag2text{ref($proto)})) {
1146            $tagclass{$tag} = ref($proto);
1147        }
1148    }
1149
1150    my $slot  = $tag2text{ $tagclass{$tag} || $tagclass{__DEFAULT__} };
1151
1152    return map { $_->$slot } $self->annotation->get_Annotations($tag);
1153}
1154
1155=head2 get_tagset_values
1156
1157 Usage   : @annotations = $obj->get_tagset_values($tag1,$tag2)
1158 Function: returns annotations corresponding to a list of tags.
1159           this is a convenience method equivalent to multiple calls
1160           to get_tag_values with each tag in the list.
1161 Returns : a list of Bio::AnnotationI objects.
1162 Args    : a list of tag names
1163
1164=cut
1165
1166sub get_tagset_values {
1167  my ($self,@tags) = @_;
1168  my @r = ();
1169  foreach my $tag (@tags){
1170    my $slot  = $tag2text{ $tagclass{$tag} || $tagclass{__DEFAULT__} };
1171    push @r, map { $_->$slot } $self->annotation->get_Annotations($tag);
1172  }
1173  return @r;
1174}
1175
1176=head2 get_all_tags
1177
1178 Usage   : @tags = $obj->get_all_tags()
1179 Function: returns a list of annotation tag names.
1180 Returns : a list of tag names
1181 Args    : none
1182
1183=cut
1184
1185sub get_all_tags {
1186  my ($self,@args) = @_;
1187  return $self->annotation->get_all_annotation_keys(@args);
1188}
1189
1190=head2 remove_tag
1191
1192 Usage   : See remove_Annotations().
1193 Function:
1194 Returns :
1195 Args    :
1196 Note    : Contrary to what the name suggests, this method removes
1197           all annotations corresponding to $tag, not just a
1198           single anntoation.
1199
1200=cut
1201
1202sub remove_tag {
1203  my ($self,@args) = @_;
1204  return $self->annotation->remove_Annotations(@args);
1205}
1206
12071;
1208