1# 2# BioPerl module for Bio::Search::Result::GenericResult 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Jason Stajich <jason@bioperl.org> 7# 8# Copyright Jason Stajich 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Search::Result::GenericResult - Generic Implementation of 17Bio::Search::Result::ResultI interface applicable to most search 18results. 19 20=head1 SYNOPSIS 21 22 23 # typically one gets Results from a SearchIO stream 24 use Bio::SearchIO; 25 my $io = Bio::SearchIO->new(-format => 'blast', 26 -file => 't/data/HUMBETGLOA.tblastx'); 27 while( my $result = $io->next_result ) { 28 # process all search results within the input stream 29 while( my $hit = $result->next_hit ) { 30 # insert code here for hit processing 31 } 32 } 33 34 use Bio::Search::Result::GenericResult; 35 my @hits = (); # would be a list of Bio::Search::Hit::HitI objects 36 # typically these are created from a Bio::SearchIO stream 37 my $result = Bio::Search::Result::GenericResult->new 38 ( -query_name => 'HUMBETGLOA', 39 -query_accession => '' 40 -query_description => 'Human haplotype C4 beta-globin gene, complete cds.' 41 -query_length => 3002 42 -database_name => 'ecoli.aa' 43 -database_letters => 4662239, 44 -database_entries => 400, 45 -parameters => { 'e' => '0.001' }, 46 -statistics => { 'kappa' => 0.731 }, 47 -algorithm => 'blastp', 48 -algorithm_version => '2.1.2', 49 ); 50 51 my $id = $result->query_name(); 52 53 my $desc = $result->query_description(); 54 55 my $name = $result->database_name(); 56 57 my $size = $result->database_letters(); 58 59 my $num_entries = $result->database_entries(); 60 61 my $gap_ext = $result->get_parameter('e'); 62 63 my @params = $result->available_parameters; 64 65 my $kappa = $result->get_statistic('kappa'); 66 67 my @statnames = $result->available_statistics; 68 69# TODO: Show how to configure a SearchIO stream so that it generates 70# GenericResult objects. 71 72 73=head1 DESCRIPTION 74 75This object is an implementation of the Bio::Search::Result::ResultI 76interface and provides a generic place to store results from a 77sequence database search. 78 79Unless you're writing a parser, you won't ever need to create a 80GenericResult or any other ResultI-implementing object. If you use 81the SearchIO system, ResultI objects are created automatically from 82a SearchIO stream which returns Bio::Search::Result::ResultI objects. 83 84For documentation on what you can do with GenericResult (and other ResultI 85objects), please see the API documentation in 86L<Bio::Search::Result::ResultI|Bio::Search::Result::ResultI>. 87 88=head1 FEEDBACK 89 90=head2 Mailing Lists 91 92User feedback is an integral part of the evolution of this and other 93Bioperl modules. Send your comments and suggestions preferably to 94the Bioperl mailing list. Your participation is much appreciated. 95 96 bioperl-l@bioperl.org - General discussion 97 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 98 99=head2 Support 100 101Please direct usage questions or support issues to the mailing list: 102 103I<bioperl-l@bioperl.org> 104 105rather than to the module maintainer directly. Many experienced and 106reponsive experts will be able look at the problem and quickly 107address it. Please include a thorough description of the problem 108with code and data examples if at all possible. 109 110=head2 Reporting Bugs 111 112Report bugs to the Bioperl bug tracking system to help us keep track 113of the bugs and their resolution. Bug reports can be submitted via the 114web: 115 116 https://github.com/bioperl/bioperl-live/issues 117 118=head1 AUTHOR - Jason Stajich and Steve Chervitz 119 120Email jason@bioperl.org 121Email sac@bioperl.org 122 123=head1 CONTRIBUTORS 124 125Sendu Bala, bix@sendu.me.uk 126 127=head1 APPENDIX 128 129The rest of the documentation details each of the object methods. 130Internal methods are usually preceded with a _ 131 132=cut 133 134 135# Let the code begin... 136 137 138package Bio::Search::Result::GenericResult; 139$Bio::Search::Result::GenericResult::VERSION = '1.7.7'; 140use strict; 141 142use Bio::Search::GenericStatistics; 143use Bio::Tools::Run::GenericParameters; 144 145# bug #1420 146#use overload 147# '""' => \&to_string; 148 149use base qw(Bio::Root::Root Bio::Search::Result::ResultI); 150 151=head2 new 152 153 Title : new 154 Usage : my $obj = Bio::Search::Result::GenericResult->new(); 155 Function: Builds a new Bio::Search::Result::GenericResult object 156 Returns : Bio::Search::Result::GenericResult 157 Args : -query_name => Name of query Sequence 158 -query_accession => Query accession number (if available) 159 -query_description => Description of query sequence 160 -query_length => Length of query sequence 161 -database_name => Name of database 162 -database_letters => Number of residues in database 163 -database_entries => Number of entries in database 164 -hits => array ref of Bio::Search::Hit::HitI objects 165 -parameters => hash ref of search parameters (key => value) 166 -statistics => hash ref of search statistics (key => value) 167 -algorithm => program name (blastx) 168 -algorithm_version => version of the algorithm (2.1.2) 169 -algorithm_reference => literature reference string for this algorithm 170 -rid => value of the BLAST Request ID (eg. RID: ZABJ4EA7014) 171 -hit_factory => Bio::Factory::ObjectFactoryI capable of making 172 Bio::Search::Hit::HitI objects 173 174=cut 175 176sub new { 177 my($class,@args) = @_; 178 179 my $self = $class->SUPER::new(@args); 180 181 $self->{'_hits'} = []; 182 $self->{'_hitindex'} = 0; 183 $self->{'_statistics'} = Bio::Search::GenericStatistics->new(); 184 $self->{'_parameters'} = Bio::Tools::Run::GenericParameters->new(); 185 186 my ($qname,$qacc,$qdesc,$qlen, $qgi, 187 $dbname,$dblet,$dbent,$params, 188 $stats, $hits, $algo, $algo_v, 189 $prog_ref, $algo_r, $rid, $hit_factory) = $self->_rearrange([qw(QUERY_NAME 190 QUERY_ACCESSION 191 QUERY_DESCRIPTION 192 QUERY_LENGTH 193 QUERY_GI 194 DATABASE_NAME 195 DATABASE_LETTERS 196 DATABASE_ENTRIES 197 PARAMETERS 198 STATISTICS 199 HITS 200 ALGORITHM 201 ALGORITHM_VERSION 202 PROGRAM_REFERENCE 203 ALGORITHM_REFERENCE 204 RID 205 HIT_FACTORY 206 )],@args); 207 208 $algo_r ||= $prog_ref; 209 defined $algo && $self->algorithm($algo); 210 defined $algo_v && $self->algorithm_version($algo_v); 211 defined $algo_r && $self->algorithm_reference($algo_r); 212 213 defined $rid && $self->rid($rid); 214 215 defined $qname && $self->query_name($qname); 216 defined $qacc && $self->query_accession($qacc); 217 defined $qdesc && $self->query_description($qdesc); 218 defined $qlen && $self->query_length($qlen); 219 defined $qgi && $self->query_gi($qgi); 220 defined $dbname && $self->database_name($dbname); 221 defined $dblet && $self->database_letters($dblet); 222 defined $dbent && $self->database_entries($dbent); 223 224 defined $hit_factory && $self->hit_factory($hit_factory); 225 226 if( defined $params ) { 227 if( ref($params) !~ /hash/i ) { 228 $self->throw("Must specify a hash reference with the parameter '-parameters"); 229 } 230 while( my ($key,$value) = each %{$params} ) { 231 $self->{'_parameters'}->set_parameter($key => $value); 232 # $self->add_parameter($key,$value); 233 } 234 } 235 if( defined $stats ) { 236 if( ref($stats) !~ /hash/i ) { 237 $self->throw("Must specify a hash reference with the parameter '-statistics"); 238 } 239 while( my ($key,$value) = each %{$stats} ) { 240 $self->{'_statistics'}->set_statistic($key => $value); 241 # $self->add_statistic($key,$value); 242 } 243 } 244 245 if( defined $hits ) { 246 $self->throw("Must define arrayref of Hits when initializing a $class\n") unless ref($hits) =~ /array/i; 247 248 foreach my $s ( @$hits ) { 249 $self->add_hit($s); 250 } 251 } 252 return $self; 253} 254 255=head2 algorithm 256 257 Title : algorithm 258 Usage : my $r_type = $hsp->algorithm 259 Function: Obtain the name of the algorithm used to obtain the Result 260 Returns : string (e.g., BLASTP) 261 Args : [optional] scalar string to set value 262 263=cut 264 265sub algorithm{ 266 my ($self,$value) = @_; 267 my $previous = $self->{'_algorithm'}; 268 if( defined $value || ! defined $previous ) { 269 $value = $previous = '' unless defined $value; 270 $self->{'_algorithm'} = $value; 271 } 272 return $previous; 273} 274 275=head2 algorithm_version 276 277 Title : algorithm_version 278 Usage : my $r_version = $hsp->algorithm_version 279 Function: Obtain the version of the algorithm used to obtain the Result 280 Returns : string (e.g., 2.1.2) 281 Args : [optional] scalar string to set algorithm version value 282 283=cut 284 285sub algorithm_version{ 286 my ($self,$value) = @_; 287 my $previous = $self->{'_algorithm_version'}; 288 if( defined $value || ! defined $previous ) { 289 $value = $previous = '' unless defined $value; 290 $self->{'_algorithm_version'} = $value; 291 } 292 293 return $previous; 294} 295 296=head2 Bio::Search::Result::ResultI interface methods 297 298Bio::Search::Result::ResultI implementation 299 300=head2 next_hit 301 302 Title : next_hit 303 Usage : while( $hit = $result->next_hit()) { ... } 304 Function: Returns the next available Hit object, representing potential 305 matches between the query and various entities from the database. 306 Returns : a Bio::Search::Hit::HitI object or undef if there are no more. 307 Args : none 308 309 310=cut 311 312sub next_hit { 313 my ($self,@args) = @_; 314 my $index = $self->_nexthitindex; 315 return if $index > scalar @{$self->{'_hits'}}; 316 317 my $hit = $self->{'_hits'}->[$index]; 318 if (ref($hit) eq 'HASH') { 319 my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory"); 320 $hit = $factory->create_object(%{$hit}); 321 $self->{'_hits'}->[$index] = $hit; 322 delete $self->{_hashes}->{$index}; 323 } 324 return $hit; 325} 326 327=head2 query_name 328 329 Title : query_name 330 Usage : $id = $result->query_name(); 331 Function: Get the string identifier of the query used by the 332 algorithm that performed the search. 333 Returns : a string. 334 Args : [optional] new string value for query name 335 336=cut 337 338sub query_name { 339 my ($self,$value) = @_; 340 my $previous = $self->{'_queryname'}; 341 if( defined $value || ! defined $previous ) { 342 $value = $previous = '' unless defined $value; 343 $self->{'_queryname'} = $value; 344 } 345 return $previous; 346} 347 348=head2 query_accession 349 350 Title : query_accession 351 Usage : $id = $result->query_accession(); 352 Function: Get the accession (if available) for the query sequence 353 Returns : a string 354 Args : [optional] new string value for accession 355 356=cut 357 358sub query_accession { 359 my ($self,$value) = @_; 360 my $previous = $self->{'_queryacc'}; 361 if( defined $value || ! defined $previous ) { 362 $value = $previous = '' unless defined $value; 363 $self->{'_queryacc'} = $value; 364 } 365 return $previous; 366} 367 368=head2 query_gi 369 370 Title : query_gi 371 Usage : $acc = $hit->query_gi(); 372 Function: Retrieve the NCBI Unique ID (aka the GI #), 373 if available, for the query 374 Returns : a scalar string (empty string if not set) 375 Args : none 376 377=cut 378 379sub query_gi { 380 my ($self,$value) = @_; 381 if( defined $value ) { 382 $self->{'_query_gi'} = $value; 383 } else { 384 $self->{'_query_gi'} = $self->query_name =~ m{^gi\|(\d+)} ? $1 : ''; 385 } 386 return $self->{'_query_gi'}; 387} 388 389=head2 query_length 390 391 Title : query_length 392 Usage : $id = $result->query_length(); 393 Function: Get the length of the query sequence 394 used in the search. 395 Returns : a number 396 Args : [optional] new integer value for query length 397 398=cut 399 400sub query_length { 401 my ($self,$value) = @_; 402 my $previous = $self->{'_querylength'}; 403 if( defined $value || ! defined $previous ) { 404 $value = $previous = 0 unless defined $value; 405 $self->{'_querylength'} = $value; 406 } 407 return $previous; 408} 409 410=head2 query_description 411 412 Title : query_description 413 Usage : $id = $result->query_description(); 414 Function: Get the description of the query sequence 415 used in the search. 416 Returns : a string 417 Args : [optional] new string for the query description 418 419=cut 420 421sub query_description { 422 my ($self,$value) = @_; 423 my $previous = $self->{'_querydesc'}; 424 if( defined $value || ! defined $previous ) { 425 $value = $previous = '' unless defined $value; 426 $self->{'_querydesc'} = $value; 427 } 428 return $previous; 429} 430 431 432=head2 database_name 433 434 Title : database_name 435 Usage : $name = $result->database_name() 436 Function: Used to obtain the name of the database that the query was searched 437 against by the algorithm. 438 Returns : a scalar string 439 Args : [optional] new string for the db name 440 441=cut 442 443sub database_name { 444 my ($self,$value) = @_; 445 my $previous = $self->{'_dbname'}; 446 if( defined $value || ! defined $previous ) { 447 $value = $previous = '' unless defined $value; 448 $self->{'_dbname'} = $value; 449 } 450 return $previous; 451} 452 453=head2 database_letters 454 455 Title : database_letters 456 Usage : $size = $result->database_letters() 457 Function: Used to obtain the size of database that was searched against. 458 Returns : a scalar integer (units specific to algorithm, but probably the 459 total number of residues in the database, if available) or undef if 460 the information was not available to the Processor object. 461 Args : [optional] new scalar integer for number of letters in db 462 463 464=cut 465 466sub database_letters { 467 my ($self,$value) = @_; 468 my $previous = $self->{'_dbletters'}; 469 if( defined $value || ! defined $previous ) { 470 $value = $previous = '' unless defined $value; 471 $self->{'_dbletters'} = $value; 472 } 473 return $previous; 474} 475 476=head2 database_entries 477 478 Title : database_entries 479 Usage : $num_entries = $result->database_entries() 480 Function: Used to obtain the number of entries contained in the database. 481 Returns : a scalar integer representing the number of entities in the database 482 or undef if the information was not available. 483 Args : [optional] new integer for the number of sequence entries in the db 484 485 486=cut 487 488sub database_entries { 489 my ($self,$value) = @_; 490 my $previous = $self->{'_dbentries'}; 491 if( defined $value || ! defined $previous ) { 492 $value = $previous = '' unless defined $value; 493 $self->{'_dbentries'} = $value; 494 } 495 return $previous; 496} 497 498=head2 get_parameter 499 500 Title : get_parameter 501 Usage : my $gap_ext = $report->get_parameter('gapext') 502 Function: Returns the value for a specific parameter used 503 when running this report 504 Returns : string 505 Args : name of parameter (string) 506 507=cut 508 509sub get_parameter { 510 my ($self,$name) = @_; 511 return $self->{'_parameters'}->get_parameter($name); 512} 513 514=head2 available_parameters 515 516 Title : available_parameters 517 Usage : my @params = $report->available_paramters 518 Function: Returns the names of the available parameters 519 Returns : Return list of available parameters used for this report 520 Args : none 521 522=cut 523 524sub available_parameters{ 525 my ($self) = @_; 526 return $self->{'_parameters'}->available_parameters; 527} 528 529 530=head2 get_statistic 531 532 Title : get_statistic 533 Usage : my $gap_ext = $report->get_statistic('kappa') 534 Function: Returns the value for a specific statistic available 535 from this report 536 Returns : string 537 Args : name of statistic (string) 538 539=cut 540 541sub get_statistic{ 542 my ($self,$key) = @_; 543 return $self->{'_statistics'}->get_statistic($key); 544} 545 546=head2 available_statistics 547 548 Title : available_statistics 549 Usage : my @statnames = $report->available_statistics 550 Function: Returns the names of the available statistics 551 Returns : Return list of available statistics used for this report 552 Args : none 553 554=cut 555 556sub available_statistics{ 557 my ($self) = @_; 558 return $self->{'_statistics'}->available_statistics; 559} 560 561=head2 Bio::Search::Report 562 563Bio::Search::Result::GenericResult specific methods 564 565=head2 add_hit 566 567 Title : add_hit 568 Usage : $report->add_hit($hit) 569 Function: Adds a HitI to the stored list of hits 570 Returns : Number of HitI currently stored 571 Args : Bio::Search::Hit::HitI 572 573=cut 574 575sub add_hit { 576 my ($self,$s) = @_; 577 if (ref($s) eq 'HASH' || $s->isa('Bio::Search::Hit::HitI') ) { 578 push @{$self->{'_hits'}}, $s; 579 } 580 else { 581 $self->throw("Passed in " .ref($s)." as a Hit which is not a Bio::Search::HitI."); 582 } 583 584 if (ref($s) eq 'HASH') { 585 $self->{_hashes}->{$#{$self->{'_hits'}}} = 1; 586 } 587 return scalar @{$self->{'_hits'}}; 588} 589 590=head2 hit_factory 591 592 Title : hit_factory 593 Usage : $hit->hit_factory($hit_factory) 594 Function: Get/set the factory used to build HitI objects if necessary. 595 Returns : Bio::Factory::ObjectFactoryI 596 Args : Bio::Factory::ObjectFactoryI 597 598=cut 599 600sub hit_factory { 601 my $self = shift; 602 if (@_) { $self->{_hit_factory} = shift } 603 return $self->{_hit_factory} || return; 604} 605 606=head2 rewind 607 608 Title : rewind 609 Usage : $result->rewind; 610 Function: Allow one to reset the Hit iterator to the beginning 611 Since this is an in-memory implementation 612 Returns : none 613 Args : none 614 615=cut 616 617sub rewind{ 618 my ($self) = @_; 619 $self->{'_hitindex'} = 0; 620} 621 622 623=head2 _nexthitindex 624 625 Title : _nexthitindex 626 Usage : private 627 628=cut 629 630sub _nexthitindex{ 631 my ($self,@args) = @_; 632 return $self->{'_hitindex'}++; 633} 634 635 636=head2 add_parameter 637 638 Title : add_parameter 639 Usage : $report->add_parameter('gapext', 11); 640 Function: Adds a parameter 641 Returns : none 642 Args : key - key value name for this parama 643 value - value for this parameter 644 645=cut 646 647sub add_parameter { 648 my ($self,$key,$value) = @_; 649 $self->{'_parameters'}->set_parameter($key => $value); 650} 651 652 653=head2 add_statistic 654 655 Title : add_statistic 656 Usage : $report->add_statistic('lambda', 2.3); 657 Function: Adds a parameter 658 Returns : none 659 Args : key - key value name for this parama 660 value - value for this parameter 661 662=cut 663 664sub add_statistic { 665 my ($self,$key,$value) = @_; 666 $self->{'_statistics'}->set_statistic($key => $value); 667 return; 668} 669 670 671=head2 num_hits 672 673 Title : num_hits 674 Usage : my $hitcount= $result->num_hits 675 Function: returns the number of hits for this query result 676 Returns : integer 677 Args : none 678 679=cut 680 681sub num_hits{ 682 my ($self) = shift; 683 if (not defined $self->{'_hits'}) { 684 $self->throw("Can't get Hits: data not collected."); 685 } 686 return scalar(@{$self->{'_hits'}}); 687} 688 689 690=head2 hits 691 692 Title : hits 693 Usage : my @hits = $result->hits 694 Function: Returns the available hits for this Result 695 Returns : Array of L<Bio::Search::Hit::HitI> objects 696 Args : none 697 698 699=cut 700 701sub hits { 702 my ($self) = shift; 703 704 foreach my $i (keys %{$self->{_hashes} || {}}) { 705 my $factory = $self->hit_factory || $self->throw("Tried to get a Hit, but it was a hash ref and we have no hit factory"); 706 $self->{'_hits'}->[$i] = $factory->create_object(%{$self->{'_hits'}->[$i]}); 707 delete $self->{_hashes}->{$i}; 708 } 709 710 my @hits = (); 711 if (ref $self->{'_hits'}) { 712 @hits = @{$self->{'_hits'}}; 713 } 714 return @hits; 715} 716 717=head2 algorithm_reference 718 719 Title : algorithm_reference 720 Usage : $obj->algorithm_reference($newval) 721 Function: 722 Returns : string containing literature reference for the algorithm 723 Args : newvalue string (optional) 724 Comments: Formerly named program_reference(), which is still supported 725 for backwards compatibility. 726 727=cut 728 729sub algorithm_reference{ 730 my ($self,$value) = @_; 731 if( defined $value) { 732 $self->{'algorithm_reference'} = $value; 733 } 734 return $self->{'algorithm_reference'}; 735} 736 737=head2 program_reference 738 739 Title : program_reference 740 Usage : $obj->program_reference() 741 Function: 742 Returns : string containing literature reference for the algorithm 743 Args : 744 Comments: Deprecated - use algorithm_reference() instead. 745 746=cut 747 748sub program_reference { shift->algorithm_reference(@_); } 749 750=head2 rid 751 752 Title : rid 753 Usage : $obj->rid($newval) 754 Function: 755 Returns : value of the BLAST Request ID (eg. RID: ZABJ4EA7014) 756 Args : newvalue (optional) 757 Comments: The default implementation in ResultI returns an empty string 758 rather than throwing a NotImplemented exception, since 759 the RID may not always be available and is not critical. 760 See: (1) https://www.ncbi.nlm.nih.gov/Class/MLACourse/Modules/BLAST/rid.html 761 (2) https://www.ncbi.nlm.nih.gov/staff/tao/URLAPI/new/node63.html 762=cut 763 764sub rid{ 765 my ($self,$value) = @_; 766 if( defined $value) { 767 $self->{'rid'} = $value; 768 } 769 return $self->{'rid'}; 770} 771 772=head2 no_hits_found 773 774See documentation in L<Bio::Search::Result::ResultI::no_hits_found()|Bio::Search::Result::ResultI> 775 776=cut 777 778sub no_hits_found { 779 my $self = shift; 780 781 # Watch the double negative! 782 # result = 0 means "yes hits were found" 783 # result = 1 means "no hits were found" 784 785 return $self->{'_no_hits_found'}; 786} 787 788 789=head2 set_no_hits_found 790 791See documentation in L<Bio::Search::Result::ResultI::set_no_hits_found()|Bio::Search::Result::ResultI> 792 793=cut 794 795sub set_no_hits_found { 796 my $self = shift; 797 $self->{'_no_hits_found'} = 1; 798} 799 800 801=head2 to_string 802 803 Title : to_string 804 Usage : print $blast->to_string; 805 Function: Returns a string representation for the Blast result. 806 Primarily intended for debugging purposes. 807 Example : see usage 808 Returns : A string of the form: 809 [GenericResult] <analysis_method> query=<name> <description> db=<database 810 e.g.: 811 [GenericResult] BLASTP query=YEL060C vacuolar protease B, db=PDBUNIQ 812 Args : None 813 814=cut 815 816sub to_string { 817 my $self = shift; 818 my $str = ref($self) . ", algorithm= " . $self->algorithm . ", query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name; 819 return $str; 820} 821 8221; 823