1
2=head1 NAME
3
4Bio::FeatureIO::interpro - read features from InterPro XML
5
6=head1 SYNOPSIS
7
8  my $in = Bio::FeatureIO(-format=>'interpro');
9  while (my $feat = $in->next_feature) {
10    # do something with the Bio::SeqFeatureI object
11  }
12
13=head1 DESCRIPTION
14
15See L<http://www.ebi.ac.uk/interpro/documentation.html>.
16
17=head1 FEEDBACK
18
19=head2 Mailing Lists
20
21User feedback is an integral part of the evolution of this and other
22Bioperl modules. Send your comments and suggestions preferably to
23the Bioperl mailing list.  Your participation is much appreciated.
24
25  bioperl-l@bioperl.org                  - General discussion
26  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
27
28=head2 Support
29
30Please direct usage questions or support issues to the mailing list:
31
32I<bioperl-l@bioperl.org>
33
34rather than to the module maintainer directly. Many experienced and
35reponsive experts will be able look at the problem and quickly
36address it. Please include a thorough description of the problem
37with code and data examples if at all possible.
38
39=head2 Reporting Bugs
40
41Report bugs to the Bioperl bug tracking system to help us keep track
42of the bugs and their resolution. Bug reports can be submitted via
43the web:
44
45  http://bugzilla.open-bio.org/
46
47=head1 AUTHOR - Allen Day
48
49Email allenday@ucla.edu
50
51=head1 APPENDIX
52
53The rest of the documentation details each of the object methods.
54Internal methods are usually preceded with a _
55
56=cut
57
58
59# Let the code begin...
60
61package Bio::FeatureIO::interpro;
62BEGIN {
63  $Bio::FeatureIO::interpro::AUTHORITY = 'cpan:BIOPERLML';
64}
65$Bio::FeatureIO::interpro::VERSION = '1.6.905';
66use strict;
67use base qw(Bio::FeatureIO);
68use Bio::SeqFeature::Annotated;
69use Bio::OntologyIO;
70
71use Bio::Annotation::Comment;
72use Bio::Annotation::DBLink;
73use Bio::Annotation::OntologyTerm;
74use Bio::Annotation::SimpleValue;
75use Bio::Annotation::Target;
76
77use URI::Escape;
78use XML::DOM;
79use XML::DOM::XPath;
80
81sub _initialize {
82  my($self,%arg) = @_;
83
84  $self->SUPER::_initialize(%arg);
85  $self->xml_parser(XML::DOM::Parser->new());
86  my $buf;
87  while(($buf = $self->_readline()) && $buf !~ /<protein/){
88    next;
89  }
90  $self->_pushback($buf);
91}
92
93sub next_feature {
94  my $self =shift;
95  my $buf;    #line buffer
96  my $ok = 0; #true if there is another <protein/> record in stream
97  my $record; #holds the record to be parsed and returned.
98
99  #try to dump buffer from last record before moving on to next record
100  my $f = $self->_shift_feature_buffer();
101  if($f){
102    return $f;
103  }
104
105  while(my $buf = $self->_readline()){
106    $ok = 1 if $buf =~ m!<protein!;
107    $record .= $buf;
108    last if $buf =~ m!</protein>!;
109  }
110  return unless $ok;
111
112  my $dom = $self->xml_parser->parse($record);
113
114
115  my ($pNode) = $dom->findnodes('/protein');
116
117  my @iNodes = $pNode->findnodes('/protein/interpro');
118
119  foreach my $iNode (@iNodes){
120    my @cNodes = $iNode->findnodes('classification');
121    my @mNodes = $iNode->findnodes('match');
122
123    #we don't handle these
124    #my @nNodes = $iNode->findnodes('contains');
125    #my @fNodes = $iNode->findnodes('found_in');
126
127    foreach my $mNode (@mNodes){
128      my @lNodes = $mNode->findnodes('location');
129      foreach my $lNode (@lNodes){
130        my $feature = Bio::SeqFeature::Annotated->new(
131                                                      -start  => $lNode->getAttribute('start'),
132                                                      -end    => $lNode->getAttribute('end'),
133                                                      -score  => $lNode->getAttribute('score'),
134#                                                      -seq_id => $pNode->getAttribute('id'),
135                                                     );
136        $feature->seq_id->value($pNode->getAttribute('id'));
137
138#warn $pNode->getAttribute('id');
139
140        $feature->source( $lNode->getAttribute('evidence') );
141
142        my $t = Bio::Annotation::OntologyTerm->new(-identifier => 'SO:0000417', -name => 'polypeptide_domain');
143        $feature->add_Annotation('type',$t);
144
145        my $c = Bio::Annotation::Comment->new(-tagname => 'comment', -text => $iNode->getAttribute('name'));
146        $feature->add_Annotation($c);
147
148        my $d = Bio::Annotation::DBLink->new();
149        $d->database($mNode->getAttribute('dbname'));
150        $d->primary_id($mNode->getAttribute('id'));
151        $d->optional_id($mNode->getAttribute('name'));
152        $feature->annotation->add_Annotation('dblink',$d);
153
154        my $s = Bio::Annotation::SimpleValue->new(-tagname => 'status', -value => $lNode->getAttribute('status'));
155        $feature->annotation->add_Annotation($s);
156
157        foreach my $cNode (@cNodes){
158          my $o = Bio::Annotation::OntologyTerm->new(-identifier => $cNode->getAttribute('id'));
159          $feature->annotation->add_Annotation('ontology_term',$o);
160        }
161
162        $self->_push_feature_buffer($feature);
163      }
164    }
165  }
166
167  return $self->_shift_feature_buffer;
168}
169
170=head2 _push_feature_buffer()
171
172 Usage   :
173 Function:
174 Returns :
175 Args    :
176
177
178=cut
179
180sub _push_feature_buffer {
181  my ($self,$f) = @_;
182
183  if(ref($f)){
184    push @{ $self->{feature_buffer} }, $f;
185  }
186}
187
188=head2 _shift_feature_buffer()
189
190 Usage   :
191 Function:
192 Returns :
193 Args    :
194
195
196=cut
197
198sub _shift_feature_buffer {
199  my ($self) = @_;
200  return $self->{feature_buffer} ? shift @{ $self->{feature_buffer} } : undef;
201}
202
203=head2 xml_parser()
204
205 Usage   : $obj->xml_parser($newval)
206 Function:
207 Example :
208 Returns : value of xml_parser (a scalar)
209 Args    : on set, new value (a scalar or undef, optional)
210
211
212=cut
213
214sub xml_parser {
215  my($self,$val) = @_;
216  $self->{'xml_parser'} = $val if defined($val);
217  return $self->{'xml_parser'};
218}
219
2201;
221