1package Ace::Sequence::Feature; 2use strict; 3 4use Ace qw(:DEFAULT rearrange); 5use Ace::Object; 6use Ace::Sequence::Homol; 7use Carp; 8use AutoLoader 'AUTOLOAD'; 9use vars '@ISA','%REV'; 10@ISA = 'Ace::Sequence'; # for convenience sake only 11%REV = ('+1' => '-1', 12 '-1' => '+1'); # war is peace, &c. 13 14use overload 15 '""' => 'asString', 16 ; 17 18# parse a line from a sequence list 19sub new { 20 my $pack = shift; 21 my ($parent,$ref,$r_offset,$r_strand,$abs,$gff_line,$db) = @_; 22 my ($sourceseq,$method,$type,$start,$end,$score,$strand,$frame,$group) = split "\t",$gff_line; 23 if (defined($strand)) { 24 $strand = $strand eq '-' ? '-1' : '+1'; 25 } else { 26 $strand = 0; 27 } 28 29 # for efficiency/performance, we don't use superclass new() method, but modify directly 30 # handling coordinates. See SCRAPS below for what should be in here 31 $strand = '+1' if $strand < 0 && $r_strand < 0; # two wrongs do make a right 32 ($start,$end) = ($end,$start) if $strand < 0; 33 my $offset = $start - 1; 34 my $length = ($end > $start) ? $end - $offset : $end - $offset - 2; 35 36 # handle negative strands 37 $offset ||= 0; 38 $offset *= -1 if $r_strand < 0 && $strand != $r_strand; 39 40 my $self= bless { 41 obj => $ref, 42 offset => $offset, 43 length => $length, 44 parent => $parent, 45 p_offset => $r_offset, 46 refseq => [$ref,$r_offset,$r_strand], 47 strand => $r_strand, 48 fstrand => $strand, 49 absolute => $abs, 50 info => { 51 seqname=> $sourceseq, 52 method => $method, 53 type => $type, 54 score => $score, 55 frame => $frame, 56 group => $group, 57 db => $db, 58 } 59 },$pack; 60 return $self; 61} 62 63sub smapped { 1; } 64 65# $_[0] is field name, $_[1] is self, $_[2] is optional replacement value 66sub _field { 67 my $self = shift; 68 my $field = shift; 69 my $v = $self->{info}{$field}; 70 $self->{info}{$field} = shift if @_; 71 return if defined $v && $v eq '.'; 72 return $v; 73} 74 75sub strand { return $_[0]->{fstrand} } 76 77sub seqname { 78 my $self = shift; 79 my $seq = $self->_field('seqname'); 80 $self->db->fetch(Sequence=>$seq); 81} 82 83sub method { shift->_field('method',@_) } # ... I prefer "method" 84sub subtype { shift->_field('method',@_) } # ... or even "subtype" 85sub type { shift->_field('type',@_) } # ... I prefer "type" 86sub score { shift->_field('score',@_) } # float indicating some sort of score 87sub frame { shift->_field('frame',@_) } # one of 1, 2, 3 or undef 88sub info { # returns Ace::Object(s) with info about the feature 89 my $self = shift; 90 unless ($self->{group}) { 91 my $info = $self->{info}{group} || 'Method "'.$self->method.'"'; 92 $info =~ s/(\"[^\"]*);([^\"]*\")/$1$;$2/g; 93 my @data = split(/\s*;\s*/,$info); 94 foreach (@data) { s/$;/;/g } 95 $self->{group} = [map {$self->toAce($_)} @data]; 96 } 97 return wantarray ? @{$self->{group}} : $self->{group}->[0]; 98} 99 100# bioperl compatibility 101sub primary_tag { shift->type(@_) } 102sub source_tag { shift->subtype(@_) } 103 104sub db { # database identifier (from Ace::Sequence::Multi) 105 my $self = shift; 106 my $db = $self->_field('db',@_); 107 return $db || $self->SUPER::db; 108} 109 110sub group { $_[0]->info; } 111sub target { $_[0]->info; } 112 113sub asString { 114 my $self = shift; 115 my $name = $self->SUPER::asString; 116 my $type = $self->type; 117 return "$type:$name"; 118} 119 120# unique ID 121sub id { 122 my $self = shift; 123 my $source = $self->source->name; 124 my $start = $self->start; 125 my $end = $self->end; 126 return "$source/$start,$end"; 127} 128 129# map info into a reasonable set of ace objects 130sub toAce { 131 my $self = shift; 132 my $thing = shift; 133 my ($tag,@values) = $thing=~/(\"[^\"]+?\"|\S+)/g; 134 foreach (@values) { # strip the damn quotes 135 s/^\"(.*)\"$/$1/; # get rid of leading and trailing quotes 136 } 137 return $self->tag2ace($tag,@values); 138} 139 140# synthesize an artificial Ace object based on the tag 141sub tag2ace { 142 my $self = shift; 143 my ($tag,@data) = @_; 144 145 # Special cases, hardcoded in Ace GFF code... 146 my $db = $self->db;; 147 my $class = $db->class; 148 149 # for Notes we just return a text, no database associated 150 return $class->new(Text=>$data[0]) if $tag eq 'Note'; 151 152 # for homols, we create the indicated Protein or Sequence object 153 # then generate a bogus Homology object (for future compatability??) 154 if ($tag eq 'Target') { 155 my ($objname,$start,$end) = @data; 156 my ($classe,$name) = $objname =~ /^(\w+):(.+)/; 157 return Ace::Sequence::Homol->new_homol($classe,$name,$db,$start,$end); 158 } 159 160 # General case: 161 my $obj = $class->new($tag=>$data[0],$self->db); 162 163 return $obj if defined $obj; 164 165 # Last resort, return a Text 166 return $class->new(Text=>$data[0]); 167} 168 169sub sub_SeqFeature { 170 return wantarray ? () : 0; 171} 172 1731; 174 175=head1 NAME 176 177Ace::Sequence::Feature - Examine Sequence Feature Tables 178 179=head1 SYNOPSIS 180 181 # open database connection and get an Ace::Object sequence 182 use Ace::Sequence; 183 184 # get a megabase from the middle of chromosome I 185 $seq = Ace::Sequence->new(-name => 'CHROMOSOME_I, 186 -db => $db, 187 -offset => 3_000_000, 188 -length => 1_000_000); 189 190 # get all the homologies (a list of Ace::Sequence::Feature objs) 191 @homol = $seq->features('Similarity'); 192 193 # Get information about the first one 194 $feature = $homol[0]; 195 $type = $feature->type; 196 $subtype = $feature->subtype; 197 $start = $feature->start; 198 $end = $feature->end; 199 $score = $feature->score; 200 201 # Follow the target 202 $target = $feature->info; 203 204 # print the target's start and end positions 205 print $target->start,'-',$target->end, "\n"; 206 207=head1 DESCRIPTION 208 209I<Ace::Sequence::Feature> is a subclass of L<Ace::Sequence::Feature> 210specialized for returning information about particular features in a 211GFF format feature table. 212 213=head1 OBJECT CREATION 214 215You will not ordinarily create an I<Ace::Sequence::Feature> object 216directly. Instead, objects will be created in response to a feature() 217call to an I<Ace::Sequence> object. If you wish to create an 218I<Ace::Sequence::Feature> object directly, please consult the source 219code for the I<new()> method. 220 221=head1 OBJECT METHODS 222 223Most methods are inherited from I<Ace::Sequence>. The following 224methods are also supported: 225 226=over 4 227 228=item seqname() 229 230 $object = $feature->seqname; 231 232Return the ACeDB Sequence object that this feature is attached to. 233The return value is an I<Ace::Object> of the Sequence class. This 234corresponds to the first field of the GFF format and does not 235necessarily correspond to the I<Ace::Sequence> object from which the 236feature was obtained (use source_seq() for that). 237 238=item source() 239 240=item method() 241 242=item subtype() 243 244 $source = $feature->source; 245 246These three methods are all synonyms for the same thing. They return 247the second field of the GFF format, called "source" in the 248documentation. This is usually the method or algorithm used to 249predict the feature, such as "GeneFinder" or "tRNA" scan. To avoid 250ambiguity and enhance readability, the method() and subtype() synonyms 251are also recognized. 252 253=item feature() 254 255=item type() 256 257 $type = $feature->type; 258 259These two methods are also synonyms. They return the type of the 260feature, such as "exon", "similarity" or "Predicted_gene". In the GFF 261documentation this is called the "feature" field. For readability, 262you can also use type() to fetch the field. 263 264=item abs_start() 265 266 $start = $feature->abs_start; 267 268This method returns the absolute start of the feature within the 269sequence segment indicated by seqname(). As in the I<Ace::Sequence> 270method, use start() to obtain the start of the feature relative to its 271source. 272 273=item abs_start() 274 275 $start = $feature->abs_start; 276 277This method returns the start of the feature relative to the sequence 278segment indicated by seqname(). As in the I<Ace::Sequence> method, 279you will more usually use the inherited start() method to obtain the 280start of the feature relative to its source sequence (the 281I<Ace::Sequence> from which it was originally derived). 282 283=item abs_end() 284 285 $start = $feature->abs_end; 286 287This method returns the end of the feature relative to the sequence 288segment indicated by seqname(). As in the I<Ace::Sequence> method, 289you will more usually use the inherited end() method to obtain the end 290of the feature relative to the I<Ace::Sequence> from which it was 291derived. 292 293=item score() 294 295 $score = $feature->score; 296 297For features that are associated with a numeric score, such as 298similarities, this returns that value. For other features, this 299method returns undef. 300 301=item strand() 302 303 $strand = $feature->strand; 304 305Returns the strandedness of this feature, either "+1" or "-1". For 306features that are not stranded, returns 0. 307 308=item reversed() 309 310 $reversed = $feature->reversed; 311 312Returns true if the feature is reversed relative to its source 313sequence. 314 315=item frame() 316 317 $frame = $feature->frame; 318 319For features that have a frame, such as a predicted coding sequence, 320returns the frame, either 0, 1 or 2. For other features, returns undef. 321 322=item group() 323 324=item info() 325 326=item target() 327 328 $info = $feature->info; 329 330These methods (synonyms for one another) return an Ace::Object 331containing other information about the feature derived from the 8th 332field of the GFF format, the so-called "group" field. The type of the 333Ace::Object is dependent on the nature of the feature. The 334possibilities are shown in the table below: 335 336 Feature Type Value of Group Field 337 ------------ -------------------- 338 339 note A Text object containing the note. 340 341 similarity An Ace::Sequence::Homology object containing 342 the target and its start/stop positions. 343 344 intron An Ace::Object containing the gene from 345 exon which the feature is derived. 346 misc_feature 347 348 other A Text object containing the group data. 349 350=item asString() 351 352 $label = $feature->asString; 353 354Returns a human-readable identifier describing the nature of the 355feature. The format is: 356 357 $type:$name/$start-$end 358 359for example: 360 361 exon:ZK154.3/1-67 362 363This method is also called automatically when the object is treated in 364a string context. 365 366=back 367 368=head1 SEE ALSO 369 370L<Ace>, L<Ace::Object>, L<Ace::Sequence>,L<Ace::Sequence::Homol>, 371L<Ace::Sequence::FeatureList>, L<GFF> 372 373=head1 AUTHOR 374 375Lincoln Stein <lstein@cshl.org> with extensive help from Jean 376Thierry-Mieg <mieg@kaa.crbm.cnrs-mop.fr> 377 378Copyright (c) 1999, Lincoln D. Stein 379 380This library is free software; you can redistribute it and/or modify 381it under the same terms as Perl itself. See DISCLAIMER.txt for 382disclaimers of warranty. 383 384=cut 385 386 387__END__ 388# SCRAPS 389# the new() code done "right" 390# sub new { 391# my $pack = shift; 392# my ($ref,$r_offset,$r_strand,$gff_line) = @_; 393# my ($sourceseq,$method,$type,$start,$end,$score,$strand,$frame,$group) = split "\t"; 394# ($start,$end) = ($end,$start) if $strand < 0; 395# my $self = $pack->SUPER::new($source,$start,$end); 396# $self->{info} = { 397# seqname=> $sourceseq, 398# method => $method, 399# type => $type, 400# score => $score, 401# frame => $frame, 402# group => $group, 403# }; 404# $self->{fstrand} = $strand; 405# return $self; 406# } 407 408