1package Ace::Sequence::Feature;
2use strict;
3
4use Ace qw(:DEFAULT rearrange);
5use Ace::Object;
6use Ace::Sequence::Homol;
7use Carp;
8use AutoLoader 'AUTOLOAD';
9use vars '@ISA','%REV';
10@ISA = 'Ace::Sequence';  # for convenience sake only
11%REV = ('+1' => '-1',
12	'-1' => '+1');  # war is peace, &c.
13
14use overload
15  '""' => 'asString',
16  ;
17
18# parse a line from a sequence list
19sub new {
20  my $pack = shift;
21  my ($parent,$ref,$r_offset,$r_strand,$abs,$gff_line,$db) = @_;
22  my ($sourceseq,$method,$type,$start,$end,$score,$strand,$frame,$group) = split "\t",$gff_line;
23  if (defined($strand)) {
24    $strand = $strand eq '-' ? '-1' : '+1';
25  } else {
26    $strand = 0;
27  }
28
29  # for efficiency/performance, we don't use superclass new() method, but modify directly
30  # handling coordinates.  See SCRAPS below for what should be in here
31  $strand = '+1' if $strand < 0 && $r_strand < 0;  # two wrongs do make a right
32  ($start,$end) = ($end,$start) if $strand < 0;
33  my $offset = $start - 1;
34  my $length = ($end > $start) ? $end - $offset : $end - $offset - 2;
35
36  # handle negative strands
37  $offset ||= 0;
38  $offset *= -1 if $r_strand < 0 && $strand != $r_strand;
39
40  my $self= bless {
41		   obj      => $ref,
42		   offset   => $offset,
43		   length   => $length,
44		   parent   => $parent,
45		   p_offset => $r_offset,
46		   refseq   => [$ref,$r_offset,$r_strand],
47		   strand   => $r_strand,
48		   fstrand  => $strand,
49		   absolute => $abs,
50		   info     => {
51				seqname=> $sourceseq,
52				method => $method,
53				type   => $type,
54				score  => $score,
55				frame  => $frame,
56				group  => $group,
57				db     => $db,
58			       }
59		  },$pack;
60  return $self;
61}
62
63sub smapped { 1; }
64
65# $_[0] is field name, $_[1] is self, $_[2] is optional replacement value
66sub _field {
67  my $self = shift;
68  my $field = shift;
69  my $v = $self->{info}{$field};
70  $self->{info}{$field} = shift if @_;
71  return if defined $v && $v eq '.';
72  return $v;
73}
74
75sub strand { return $_[0]->{fstrand} }
76
77sub seqname   {
78  my $self = shift;
79  my $seq = $self->_field('seqname');
80  $self->db->fetch(Sequence=>$seq);
81}
82
83sub method    { shift->_field('method',@_) }  # ... I prefer "method"
84sub subtype   { shift->_field('method',@_) }  # ... or even "subtype"
85sub type      { shift->_field('type',@_)   }  # ... I prefer "type"
86sub score     { shift->_field('score',@_)  }  # float indicating some sort of score
87sub frame     { shift->_field('frame',@_)  }  # one of 1, 2, 3 or undef
88sub info      {                  # returns Ace::Object(s) with info about the feature
89  my $self = shift;
90  unless ($self->{group}) {
91    my $info = $self->{info}{group} || 'Method "'.$self->method.'"';
92    $info =~ s/(\"[^\"]*);([^\"]*\")/$1$;$2/g;
93    my @data = split(/\s*;\s*/,$info);
94    foreach (@data) { s/$;/;/g }
95    $self->{group} = [map {$self->toAce($_)} @data];
96  }
97  return wantarray ? @{$self->{group}} : $self->{group}->[0];
98}
99
100# bioperl compatibility
101sub primary_tag { shift->type(@_)    }
102sub source_tag  { shift->subtype(@_) }
103
104sub db { # database identifier (from Ace::Sequence::Multi)
105  my $self = shift;
106  my $db = $self->_field('db',@_);
107  return $db || $self->SUPER::db;
108}
109
110sub group  { $_[0]->info; }
111sub target { $_[0]->info; }
112
113sub asString {
114  my $self = shift;
115  my $name = $self->SUPER::asString;
116  my $type = $self->type;
117  return "$type:$name";
118}
119
120# unique ID
121sub id {
122  my $self = shift;
123  my $source = $self->source->name;
124  my $start = $self->start;
125  my $end = $self->end;
126  return "$source/$start,$end";
127}
128
129# map info into a reasonable set of ace objects
130sub toAce {
131    my $self = shift;
132    my $thing = shift;
133    my ($tag,@values) = $thing=~/(\"[^\"]+?\"|\S+)/g;
134    foreach (@values) { # strip the damn quotes
135      s/^\"(.*)\"$/$1/;  # get rid of leading and trailing quotes
136    }
137    return $self->tag2ace($tag,@values);
138}
139
140# synthesize an artificial Ace object based on the tag
141sub tag2ace {
142    my $self = shift;
143    my ($tag,@data) = @_;
144
145    # Special cases, hardcoded in Ace GFF code...
146    my $db = $self->db;;
147    my $class = $db->class;
148
149    # for Notes we just return a text, no database associated
150    return $class->new(Text=>$data[0]) if $tag eq 'Note';
151
152    # for homols, we create the indicated Protein or Sequence object
153    # then generate a bogus Homology object (for future compatability??)
154    if ($tag eq 'Target') {
155	my ($objname,$start,$end) = @data;
156	my ($classe,$name) = $objname =~ /^(\w+):(.+)/;
157	return Ace::Sequence::Homol->new_homol($classe,$name,$db,$start,$end);
158    }
159
160    # General case:
161    my $obj = $class->new($tag=>$data[0],$self->db);
162
163    return $obj if defined $obj;
164
165    # Last resort, return a Text
166    return $class->new(Text=>$data[0]);
167}
168
169sub sub_SeqFeature {
170  return wantarray ? () : 0;
171}
172
1731;
174
175=head1 NAME
176
177Ace::Sequence::Feature - Examine Sequence Feature Tables
178
179=head1 SYNOPSIS
180
181    # open database connection and get an Ace::Object sequence
182    use Ace::Sequence;
183
184    # get a megabase from the middle of chromosome I
185    $seq = Ace::Sequence->new(-name   => 'CHROMOSOME_I,
186                              -db     => $db,
187			      -offset => 3_000_000,
188			      -length => 1_000_000);
189
190    # get all the homologies (a list of Ace::Sequence::Feature objs)
191    @homol = $seq->features('Similarity');
192
193    # Get information about the first one
194    $feature = $homol[0];
195    $type    = $feature->type;
196    $subtype = $feature->subtype;
197    $start   = $feature->start;
198    $end     = $feature->end;
199    $score   = $feature->score;
200
201    # Follow the target
202    $target  = $feature->info;
203
204    # print the target's start and end positions
205    print $target->start,'-',$target->end, "\n";
206
207=head1 DESCRIPTION
208
209I<Ace::Sequence::Feature> is a subclass of L<Ace::Sequence::Feature>
210specialized for returning information about particular features in a
211GFF format feature table.
212
213=head1  OBJECT CREATION
214
215You will not ordinarily create an I<Ace::Sequence::Feature> object
216directly.  Instead, objects will be created in response to a feature()
217call to an I<Ace::Sequence> object.  If you wish to create an
218I<Ace::Sequence::Feature> object directly, please consult the source
219code for the I<new()> method.
220
221=head1 OBJECT METHODS
222
223Most methods are inherited from I<Ace::Sequence>.  The following
224methods are also supported:
225
226=over 4
227
228=item seqname()
229
230  $object = $feature->seqname;
231
232Return the ACeDB Sequence object that this feature is attached to.
233The return value is an I<Ace::Object> of the Sequence class.  This
234corresponds to the first field of the GFF format and does not
235necessarily correspond to the I<Ace::Sequence> object from which the
236feature was obtained (use source_seq() for that).
237
238=item source()
239
240=item method()
241
242=item subtype()
243
244  $source = $feature->source;
245
246These three methods are all synonyms for the same thing.  They return
247the second field of the GFF format, called "source" in the
248documentation.  This is usually the method or algorithm used to
249predict the feature, such as "GeneFinder" or "tRNA" scan.  To avoid
250ambiguity and enhance readability, the method() and subtype() synonyms
251are also recognized.
252
253=item feature()
254
255=item type()
256
257  $type = $feature->type;
258
259These two methods are also synonyms.  They return the type of the
260feature, such as "exon", "similarity" or "Predicted_gene".  In the GFF
261documentation this is called the "feature" field.  For readability,
262you can also use type() to fetch the field.
263
264=item abs_start()
265
266  $start = $feature->abs_start;
267
268This method returns the absolute start of the feature within the
269sequence segment indicated by seqname().  As in the I<Ace::Sequence>
270method, use start() to obtain the start of the feature relative to its
271source.
272
273=item abs_start()
274
275  $start = $feature->abs_start;
276
277This method returns the start of the feature relative to the sequence
278segment indicated by seqname().  As in the I<Ace::Sequence> method,
279you will more usually use the inherited start() method to obtain the
280start of the feature relative to its source sequence (the
281I<Ace::Sequence> from which it was originally derived).
282
283=item abs_end()
284
285  $start = $feature->abs_end;
286
287This method returns the end of the feature relative to the sequence
288segment indicated by seqname().  As in the I<Ace::Sequence> method,
289you will more usually use the inherited end() method to obtain the end
290of the feature relative to the I<Ace::Sequence> from which it was
291derived.
292
293=item score()
294
295  $score = $feature->score;
296
297For features that are associated with a numeric score, such as
298similarities, this returns that value.  For other features, this
299method returns undef.
300
301=item strand()
302
303  $strand = $feature->strand;
304
305Returns the strandedness of this feature, either "+1" or "-1".  For
306features that are not stranded, returns 0.
307
308=item reversed()
309
310  $reversed = $feature->reversed;
311
312Returns true if the feature is reversed relative to its source
313sequence.
314
315=item frame()
316
317  $frame = $feature->frame;
318
319For features that have a frame, such as a predicted coding sequence,
320returns the frame, either 0, 1 or 2.  For other features, returns undef.
321
322=item group()
323
324=item info()
325
326=item target()
327
328  $info = $feature->info;
329
330These methods (synonyms for one another) return an Ace::Object
331containing other information about the feature derived from the 8th
332field of the GFF format, the so-called "group" field.  The type of the
333Ace::Object is dependent on the nature of the feature.  The
334possibilities are shown in the table below:
335
336  Feature Type           Value of Group Field
337  ------------            --------------------
338
339  note                   A Text object containing the note.
340
341  similarity             An Ace::Sequence::Homology object containing
342                         the target and its start/stop positions.
343
344  intron                 An Ace::Object containing the gene from
345  exon                   which the feature is derived.
346  misc_feature
347
348  other                  A Text object containing the group data.
349
350=item asString()
351
352  $label = $feature->asString;
353
354Returns a human-readable identifier describing the nature of the
355feature.  The format is:
356
357 $type:$name/$start-$end
358
359for example:
360
361 exon:ZK154.3/1-67
362
363This method is also called automatically when the object is treated in
364a string context.
365
366=back
367
368=head1 SEE ALSO
369
370L<Ace>, L<Ace::Object>, L<Ace::Sequence>,L<Ace::Sequence::Homol>,
371L<Ace::Sequence::FeatureList>, L<GFF>
372
373=head1 AUTHOR
374
375Lincoln Stein <lstein@cshl.org> with extensive help from Jean
376Thierry-Mieg <mieg@kaa.crbm.cnrs-mop.fr>
377
378Copyright (c) 1999, Lincoln D. Stein
379
380This library is free software; you can redistribute it and/or modify
381it under the same terms as Perl itself.  See DISCLAIMER.txt for
382disclaimers of warranty.
383
384=cut
385
386
387__END__
388# SCRAPS
389# the new() code done "right"
390# sub new {
391#    my $pack = shift;
392#    my ($ref,$r_offset,$r_strand,$gff_line) = @_;
393#    my ($sourceseq,$method,$type,$start,$end,$score,$strand,$frame,$group) = split "\t";
394#    ($start,$end) = ($end,$start) if $strand < 0;
395#    my $self = $pack->SUPER::new($source,$start,$end);
396#    $self->{info} = {
397#  				seqname=> $sourceseq,
398#  				method => $method,
399#  				type   => $type,
400#  				score  => $score,
401#  				frame  => $frame,
402#  				group  => $group,
403#  		  };
404#    $self->{fstrand} = $strand;
405#    return $self;
406#  }
407
408