1#
2# BioPerl module for Bio::Search::Result::GenericResult
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Jason Stajich <jason@bioperl.org>
7#
8# Copyright Jason Stajich
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Search::Result::GenericResult - Generic Implementation of
17Bio::Search::Result::ResultI interface applicable to most search
18results.
19
20=head1 SYNOPSIS
21
22
23    # typically one gets Results from a SearchIO stream
24    use Bio::SearchIO;
25    my $io = Bio::SearchIO->new(-format => 'blast',
26                                -file   => 't/data/HUMBETGLOA.tblastx');
27    while( my $result = $io->next_result ) {
28        # process all search results within the input stream
29        while( my $hit = $result->next_hit ) {
30            # insert code here for hit processing
31        }
32    }
33
34    use Bio::Search::Result::GenericResult;
35    my @hits = (); # would be a list of Bio::Search::Hit::HitI objects
36    # typically these are created from a Bio::SearchIO stream
37    my $result = Bio::Search::Result::GenericResult->new
38        ( -query_name        => 'HUMBETGLOA',
39          -query_accession   => ''
40          -query_description => 'Human haplotype C4 beta-globin gene, complete cds.'
41          -query_length      => 3002
42          -database_name     => 'ecoli.aa'
43          -database_letters  => 4662239,
44          -database_entries  => 400,
45          -parameters        => { 'e' => '0.001' },
46          -statistics        => { 'kappa' => 0.731 },
47          -algorithm         => 'blastp',
48          -algorithm_version => '2.1.2',
49          );
50
51    my $id = $result->query_name();
52
53    my $desc = $result->query_description();
54
55    my $name = $result->database_name();
56
57    my $size = $result->database_letters();
58
59    my $num_entries = $result->database_entries();
60
61    my $gap_ext = $result->get_parameter('e');
62
63    my @params = $result->available_parameters;
64
65    my $kappa = $result->get_statistic('kappa');
66
67    my @statnames = $result->available_statistics;
68
69# TODO: Show how to configure a SearchIO stream so that it generates
70#       GenericResult objects.
71
72
73=head1 DESCRIPTION
74
75This object is an implementation of the Bio::Search::Result::ResultI
76interface and provides a generic place to store results from a
77sequence database search.
78
79Unless you're writing a parser, you won't ever need to create a
80GenericResult or any other ResultI-implementing object. If you use
81the SearchIO system, ResultI objects are created automatically from
82a SearchIO stream which returns Bio::Search::Result::ResultI objects.
83
84For documentation on what you can do with GenericResult (and other ResultI
85objects), please see the API documentation in
86L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>.
87
88=head1 FEEDBACK
89
90=head2 Mailing Lists
91
92User feedback is an integral part of the evolution of this and other
93Bioperl modules. Send your comments and suggestions preferably to
94the Bioperl mailing list.  Your participation is much appreciated.
95
96  bioperl-l@bioperl.org                  - General discussion
97  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
98
99=head2 Support
100
101Please direct usage questions or support issues to the mailing list:
102
103I<bioperl-l@bioperl.org>
104
105rather than to the module maintainer directly. Many experienced and
106reponsive experts will be able look at the problem and quickly
107address it. Please include a thorough description of the problem
108with code and data examples if at all possible.
109
110=head2 Reporting Bugs
111
112Report bugs to the Bioperl bug tracking system to help us keep track
113of the bugs and their resolution. Bug reports can be submitted via the
114web:
115
116  https://github.com/bioperl/bioperl-live/issues
117
118=head1 AUTHOR - Jason Stajich and Steve Chervitz
119
120Email jason@bioperl.org
121Email sac@bioperl.org
122
123=head1 CONTRIBUTORS
124
125Sendu Bala, bix@sendu.me.uk
126
127=head1 APPENDIX
128
129The rest of the documentation details each of the object methods.
130Internal methods are usually preceded with a _
131
132=cut
133
134
135# Let the code begin...
136
137
138package Bio::Search::Result::GenericResult;
139$Bio::Search::Result::GenericResult::VERSION = '1.7.7';
140use strict;
141
142use Bio::Search::GenericStatistics;
143use Bio::Tools::Run::GenericParameters;
144
145# bug #1420
146#use overload
147#    '""' => \&to_string;
148
149use base qw(Bio::Root::Root Bio::Search::Result::ResultI);
150
151=head2 new
152
153 Title   : new
154 Usage   : my $obj = Bio::Search::Result::GenericResult->new();
155 Function: Builds a new Bio::Search::Result::GenericResult object
156 Returns : Bio::Search::Result::GenericResult
157 Args    : -query_name        => Name of query Sequence
158           -query_accession   => Query accession number (if available)
159           -query_description => Description of query sequence
160           -query_length      => Length of query sequence
161           -database_name     => Name of database
162           -database_letters  => Number of residues in database
163           -database_entries  => Number of entries in database
164           -hits              => array ref of Bio::Search::Hit::HitI objects
165           -parameters        => hash ref of search parameters (key => value)
166           -statistics        => hash ref of search statistics (key => value)
167           -algorithm         => program name (blastx)
168           -algorithm_version   => version of the algorithm (2.1.2)
169           -algorithm_reference => literature reference string for this algorithm
170           -rid               => value of the BLAST Request ID (eg. RID: ZABJ4EA7014)
171           -hit_factory       => Bio::Factory::ObjectFactoryI capable of making
172                                 Bio::Search::Hit::HitI objects
173
174=cut
175
176sub new {
177  my($class,@args) = @_;
178
179  my $self = $class->SUPER::new(@args);
180
181  $self->{'_hits'} = [];
182  $self->{'_hitindex'} = 0;
183  $self->{'_statistics'} = Bio::Search::GenericStatistics->new();
184  $self->{'_parameters'} = Bio::Tools::Run::GenericParameters->new();
185
186  my ($qname,$qacc,$qdesc,$qlen, $qgi,
187      $dbname,$dblet,$dbent,$params,
188      $stats, $hits, $algo, $algo_v,
189      $prog_ref, $algo_r, $rid, $hit_factory) = $self->_rearrange([qw(QUERY_NAME
190                                                  QUERY_ACCESSION
191                                                  QUERY_DESCRIPTION
192                                                  QUERY_LENGTH
193                                                  QUERY_GI
194                                                  DATABASE_NAME
195                                                  DATABASE_LETTERS
196                                                  DATABASE_ENTRIES
197                                                  PARAMETERS
198                                                  STATISTICS
199                                                  HITS
200                                                  ALGORITHM
201                                                  ALGORITHM_VERSION
202                                                  PROGRAM_REFERENCE
203                                                  ALGORITHM_REFERENCE
204                                                  RID
205                                                  HIT_FACTORY
206                                                 )],@args);
207
208  $algo_r ||= $prog_ref;
209  defined $algo   && $self->algorithm($algo);
210  defined $algo_v && $self->algorithm_version($algo_v);
211  defined $algo_r && $self->algorithm_reference($algo_r);
212
213  defined $rid && $self->rid($rid);
214
215  defined $qname && $self->query_name($qname);
216  defined $qacc  && $self->query_accession($qacc);
217  defined $qdesc && $self->query_description($qdesc);
218  defined $qlen  && $self->query_length($qlen);
219  defined $qgi   && $self->query_gi($qgi);
220  defined $dbname && $self->database_name($dbname);
221  defined $dblet  && $self->database_letters($dblet);
222  defined $dbent  && $self->database_entries($dbent);
223
224  defined $hit_factory && $self->hit_factory($hit_factory);
225
226  if( defined $params ) {
227      if( ref($params) !~ /hash/i ) {
228          $self->throw("Must specify a hash reference with the parameter '-parameters");
229      }
230      while( my ($key,$value) = each %{$params} ) {
231          $self->{'_parameters'}->set_parameter($key   =>   $value);
232               # $self->add_parameter($key,$value);
233      }
234  }
235  if( defined $stats ) {
236      if( ref($stats) !~ /hash/i ) {
237          $self->throw("Must specify a hash reference with the parameter '-statistics");
238      }
239      while( my ($key,$value) = each %{$stats} ) {
240          $self->{'_statistics'}->set_statistic($key   =>   $value);
241          # $self->add_statistic($key,$value);
242      }
243  }
244
245  if( defined $hits  ) {
246      $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i;
247
248      foreach my $s ( @$hits ) {
249          $self->add_hit($s);
250      }
251  }
252  return $self;
253}
254
255=head2 algorithm
256
257 Title   : algorithm
258 Usage   : my $r_type = $hsp->algorithm
259 Function: Obtain the name of the algorithm used to obtain the Result
260 Returns : string (e.g., BLASTP)
261 Args    : [optional] scalar string to set value
262
263=cut
264
265sub algorithm{
266    my ($self,$value) = @_;
267    my $previous = $self->{'_algorithm'};
268    if( defined $value || ! defined $previous ) {
269        $value = $previous = '' unless defined $value;
270        $self->{'_algorithm'} = $value;
271    }
272    return $previous;
273}
274
275=head2 algorithm_version
276
277 Title   : algorithm_version
278 Usage   : my $r_version = $hsp->algorithm_version
279 Function: Obtain the version of the algorithm used to obtain the Result
280 Returns : string (e.g., 2.1.2)
281 Args    : [optional] scalar string to set algorithm version value
282
283=cut
284
285sub algorithm_version{
286    my ($self,$value) = @_;
287    my $previous = $self->{'_algorithm_version'};
288    if( defined $value || ! defined $previous ) {
289        $value = $previous = '' unless defined $value;
290        $self->{'_algorithm_version'} = $value;
291    }
292
293    return $previous;
294}
295
296=head2 Bio::Search::Result::ResultI interface methods
297
298Bio::Search::Result::ResultI implementation
299
300=head2 next_hit
301
302 Title   : next_hit
303 Usage   : while( $hit = $result->next_hit()) { ... }
304 Function: Returns the next available Hit object, representing potential
305           matches between the query and various entities from the database.
306 Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
307 Args    : none
308
309
310=cut
311
312sub next_hit {
313    my ($self,@args) = @_;
314    my $index = $self->_nexthitindex;
315    return if $index > scalar @{$self->{'_hits'}};
316
317    my $hit = $self->{'_hits'}->[$index];
318    if (ref($hit) eq 'HASH') {
319        my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
320        $hit = $factory->create_object(%{$hit});
321        $self->{'_hits'}->[$index] = $hit;
322        delete $self->{_hashes}->{$index};
323    }
324    return $hit;
325}
326
327=head2 query_name
328
329 Title   : query_name
330 Usage   : $id = $result->query_name();
331 Function: Get the string identifier of the query used by the
332           algorithm that performed the search.
333 Returns : a string.
334 Args    : [optional] new string value for query name
335
336=cut
337
338sub query_name {
339    my ($self,$value) = @_;
340    my $previous = $self->{'_queryname'};
341    if( defined $value || ! defined $previous ) {
342        $value = $previous = '' unless defined $value;
343        $self->{'_queryname'} = $value;
344    }
345    return $previous;
346}
347
348=head2 query_accession
349
350 Title   : query_accession
351 Usage   : $id = $result->query_accession();
352 Function: Get the accession (if available) for the query sequence
353 Returns : a string
354 Args    : [optional] new string value for accession
355
356=cut
357
358sub query_accession {
359    my ($self,$value) = @_;
360    my $previous = $self->{'_queryacc'};
361    if( defined $value || ! defined $previous ) {
362        $value = $previous = '' unless defined $value;
363        $self->{'_queryacc'} = $value;
364    }
365    return $previous;
366}
367
368=head2 query_gi
369
370 Title   : query_gi
371 Usage   : $acc = $hit->query_gi();
372 Function: Retrieve the NCBI Unique ID (aka the GI #),
373           if available, for the query
374 Returns : a scalar string (empty string if not set)
375 Args    : none
376
377=cut
378
379sub query_gi {
380    my ($self,$value) = @_;
381    if( defined $value ) {
382        $self->{'_query_gi'} = $value;
383    } else {
384        $self->{'_query_gi'} = $self->query_name =~ m{^gi\|(\d+)} ? $1 : '';
385    }
386    return $self->{'_query_gi'};
387}
388
389=head2 query_length
390
391 Title   : query_length
392 Usage   : $id = $result->query_length();
393 Function: Get the length of the query sequence
394           used in the search.
395 Returns : a number
396 Args    :  [optional] new integer value for query length
397
398=cut
399
400sub query_length {
401    my ($self,$value) = @_;
402    my $previous = $self->{'_querylength'};
403    if( defined $value || ! defined $previous ) {
404        $value = $previous = 0 unless defined $value;
405        $self->{'_querylength'} = $value;
406    }
407    return $previous;
408}
409
410=head2 query_description
411
412 Title   : query_description
413 Usage   : $id = $result->query_description();
414 Function: Get the description of the query sequence
415           used in the search.
416 Returns : a string
417 Args    : [optional] new string for the query description
418
419=cut
420
421sub query_description {
422    my ($self,$value) = @_;
423    my $previous = $self->{'_querydesc'};
424    if( defined $value || ! defined $previous ) {
425        $value = $previous = '' unless defined $value;
426        $self->{'_querydesc'} = $value;
427    }
428    return $previous;
429}
430
431
432=head2 database_name
433
434 Title   : database_name
435 Usage   : $name = $result->database_name()
436 Function: Used to obtain the name of the database that the query was searched
437           against by the algorithm.
438 Returns : a scalar string
439 Args    : [optional] new string for the db name
440
441=cut
442
443sub database_name {
444    my ($self,$value) = @_;
445    my $previous = $self->{'_dbname'};
446    if( defined $value || ! defined $previous ) {
447        $value = $previous = '' unless defined $value;
448        $self->{'_dbname'} = $value;
449    }
450    return $previous;
451}
452
453=head2 database_letters
454
455 Title   : database_letters
456 Usage   : $size = $result->database_letters()
457 Function: Used to obtain the size of database that was searched against.
458 Returns : a scalar integer (units specific to algorithm, but probably the
459           total number of residues in the database, if available) or undef if
460           the information was not available to the Processor object.
461 Args    : [optional] new scalar integer for number of letters in db
462
463
464=cut
465
466sub database_letters {
467    my ($self,$value) = @_;
468    my $previous = $self->{'_dbletters'};
469    if( defined $value || ! defined $previous ) {
470        $value = $previous = '' unless defined $value;
471        $self->{'_dbletters'} = $value;
472    }
473    return $previous;
474}
475
476=head2 database_entries
477
478 Title   : database_entries
479 Usage   : $num_entries = $result->database_entries()
480 Function: Used to obtain the number of entries contained in the database.
481 Returns : a scalar integer representing the number of entities in the database
482           or undef if the information was not available.
483 Args    : [optional] new integer for the number of sequence entries in the db
484
485
486=cut
487
488sub database_entries {
489    my ($self,$value) = @_;
490    my $previous = $self->{'_dbentries'};
491    if( defined $value || ! defined $previous ) {
492        $value = $previous = '' unless defined $value;
493        $self->{'_dbentries'} = $value;
494    }
495    return $previous;
496}
497
498=head2 get_parameter
499
500 Title   : get_parameter
501 Usage   : my $gap_ext = $report->get_parameter('gapext')
502 Function: Returns the value for a specific parameter used
503           when running this report
504 Returns : string
505 Args    : name of parameter (string)
506
507=cut
508
509sub get_parameter {
510   my ($self,$name) = @_;
511   return $self->{'_parameters'}->get_parameter($name);
512}
513
514=head2 available_parameters
515
516 Title   : available_parameters
517 Usage   : my @params = $report->available_paramters
518 Function: Returns the names of the available parameters
519 Returns : Return list of available parameters used for this report
520 Args    : none
521
522=cut
523
524sub available_parameters{
525   my ($self) = @_;
526   return $self->{'_parameters'}->available_parameters;
527}
528
529
530=head2 get_statistic
531
532 Title   : get_statistic
533 Usage   : my $gap_ext = $report->get_statistic('kappa')
534 Function: Returns the value for a specific statistic available
535           from this report
536 Returns : string
537 Args    : name of statistic (string)
538
539=cut
540
541sub get_statistic{
542   my ($self,$key) = @_;
543   return $self->{'_statistics'}->get_statistic($key);
544}
545
546=head2 available_statistics
547
548 Title   : available_statistics
549 Usage   : my @statnames = $report->available_statistics
550 Function: Returns the names of the available statistics
551 Returns : Return list of available statistics used for this report
552 Args    : none
553
554=cut
555
556sub available_statistics{
557   my ($self) = @_;
558   return $self->{'_statistics'}->available_statistics;
559}
560
561=head2 Bio::Search::Report
562
563Bio::Search::Result::GenericResult specific methods
564
565=head2 add_hit
566
567 Title   : add_hit
568 Usage   : $report->add_hit($hit)
569 Function: Adds a HitI to the stored list of hits
570 Returns : Number of HitI currently stored
571 Args    : Bio::Search::Hit::HitI
572
573=cut
574
575sub add_hit {
576    my ($self,$s) = @_;
577    if (ref($s) eq 'HASH' || $s->isa('Bio::Search::Hit::HitI') ) {
578        push @{$self->{'_hits'}}, $s;
579    }
580    else {
581        $self->throw("Passed in " .ref($s)." as a Hit which is not a Bio::Search::HitI.");
582    }
583
584    if (ref($s) eq 'HASH') {
585        $self->{_hashes}->{$#{$self->{'_hits'}}} = 1;
586    }
587    return scalar @{$self->{'_hits'}};
588}
589
590=head2 hit_factory
591
592 Title   : hit_factory
593 Usage   : $hit->hit_factory($hit_factory)
594 Function: Get/set the factory used to build HitI objects if necessary.
595 Returns : Bio::Factory::ObjectFactoryI
596 Args    : Bio::Factory::ObjectFactoryI
597
598=cut
599
600sub hit_factory {
601    my $self = shift;
602    if (@_) { $self->{_hit_factory} = shift }
603    return $self->{_hit_factory} || return;
604}
605
606=head2 rewind
607
608 Title   : rewind
609 Usage   : $result->rewind;
610 Function: Allow one to reset the Hit iterator to the beginning
611           Since this is an in-memory implementation
612 Returns : none
613 Args    : none
614
615=cut
616
617sub rewind{
618   my ($self) = @_;
619   $self->{'_hitindex'} = 0;
620}
621
622
623=head2 _nexthitindex
624
625 Title   : _nexthitindex
626 Usage   : private
627
628=cut
629
630sub _nexthitindex{
631   my ($self,@args) = @_;
632   return $self->{'_hitindex'}++;
633}
634
635
636=head2 add_parameter
637
638 Title   : add_parameter
639 Usage   : $report->add_parameter('gapext', 11);
640 Function: Adds a parameter
641 Returns : none
642 Args    : key  - key value name for this parama
643           value - value for this parameter
644
645=cut
646
647sub add_parameter {
648   my ($self,$key,$value) = @_;
649   $self->{'_parameters'}->set_parameter($key => $value);
650}
651
652
653=head2 add_statistic
654
655 Title   : add_statistic
656 Usage   : $report->add_statistic('lambda', 2.3);
657 Function: Adds a parameter
658 Returns : none
659 Args    : key  - key value name for this parama
660           value - value for this parameter
661
662=cut
663
664sub add_statistic {
665   my ($self,$key,$value) = @_;
666   $self->{'_statistics'}->set_statistic($key => $value);
667   return;
668}
669
670
671=head2 num_hits
672
673 Title   : num_hits
674 Usage   : my $hitcount= $result->num_hits
675 Function: returns the number of hits for this query result
676 Returns : integer
677 Args    : none
678
679=cut
680
681sub num_hits{
682   my ($self) = shift;
683   if (not defined $self->{'_hits'}) {
684       $self->throw("Can't get Hits: data not collected.");
685    }
686    return scalar(@{$self->{'_hits'}});
687}
688
689
690=head2 hits
691
692 Title   : hits
693 Usage   : my @hits = $result->hits
694 Function: Returns the available hits for this Result
695 Returns : Array of L<Bio::Search::Hit::HitI> objects
696 Args    : none
697
698
699=cut
700
701sub hits {
702    my ($self) = shift;
703
704    foreach my $i (keys %{$self->{_hashes} || {}}) {
705        my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory");
706        $self->{'_hits'}->[$i] = $factory->create_object(%{$self->{'_hits'}->[$i]});
707        delete $self->{_hashes}->{$i};
708    }
709
710    my @hits = ();
711    if (ref $self->{'_hits'}) {
712        @hits = @{$self->{'_hits'}};
713    }
714    return @hits;
715}
716
717=head2 algorithm_reference
718
719 Title   : algorithm_reference
720 Usage   : $obj->algorithm_reference($newval)
721 Function:
722 Returns : string containing literature reference for the algorithm
723 Args    : newvalue string (optional)
724 Comments: Formerly named program_reference(), which is still supported
725           for backwards compatibility.
726
727=cut
728
729sub algorithm_reference{
730   my ($self,$value) = @_;
731   if( defined $value) {
732      $self->{'algorithm_reference'} = $value;
733    }
734    return $self->{'algorithm_reference'};
735}
736
737=head2 program_reference
738
739 Title   : program_reference
740 Usage   : $obj->program_reference()
741 Function:
742 Returns : string containing literature reference for the algorithm
743 Args    :
744 Comments: Deprecated - use algorithm_reference() instead.
745
746=cut
747
748sub program_reference { shift->algorithm_reference(@_); }
749
750=head2 rid
751
752 Title   : rid
753 Usage   : $obj->rid($newval)
754 Function:
755 Returns : value of the BLAST Request ID (eg. RID: ZABJ4EA7014)
756 Args    : newvalue (optional)
757 Comments: The default implementation in ResultI returns an empty string
758           rather than throwing a NotImplemented exception, since
759           the RID may not always be available and is not critical.
760           See: (1) https://www.ncbi.nlm.nih.gov/Class/MLACourse/Modules/BLAST/rid.html
761                (2) https://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/new/node63.html
762=cut
763
764sub rid{
765    my ($self,$value) = @_;
766    if( defined $value) {
767	   $self->{'rid'} = $value;
768	}
769	return $self->{'rid'};
770}
771
772=head2 no_hits_found
773
774See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI>
775
776=cut
777
778sub no_hits_found {
779    my $self = shift;
780
781    # Watch the double negative!
782    # result = 0 means "yes hits were found"
783    # result = 1 means "no hits were found"
784
785    return $self->{'_no_hits_found'};
786}
787
788
789=head2 set_no_hits_found
790
791See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI>
792
793=cut
794
795sub set_no_hits_found {
796    my $self = shift;
797    $self->{'_no_hits_found'} = 1;
798}
799
800
801=head2 to_string
802
803 Title   : to_string
804 Usage   : print $blast->to_string;
805 Function: Returns a string representation for the Blast result.
806           Primarily intended for debugging purposes.
807 Example : see usage
808 Returns : A string of the form:
809           [GenericResult] <analysis_method> query=<name> <description> db=<database
810           e.g.:
811           [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ
812 Args    : None
813
814=cut
815
816sub to_string {
817    my $self = shift;
818    my $str = ref($self) . ", algorithm= " . $self->algorithm . ", query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
819    return $str;
820}
821
8221;
823