1# 2# BioPerl module for Bio::Seq::SeqBuilder 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Hilmar Lapp <hlapp at gmx.net> 7# 8# Copyright Hilmar Lapp 9# 10# You may distribute this module under the same terms as perl itself 11 12# 13# (c) Hilmar Lapp, hlapp at gmx.net, 2002. 14# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. 15# 16# You may distribute this module under the same terms as perl itself. 17# Refer to the Perl Artistic License (see the license accompanying this 18# software package, or see http://www.perl.com/language/misc/Artistic.html) 19# for the terms under which you may use, modify, and redistribute this module. 20# 21# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 22# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 23# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 24# 25 26# POD documentation - main docs before the code 27 28=head1 NAME 29 30Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers 31 32=head1 SYNOPSIS 33 34 use Bio::SeqIO; 35 36 # usually you won't instantiate this yourself - a SeqIO object - 37 # you will have one already 38 my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank"); 39 my $builder = $seqin->sequence_builder(); 40 41 # if you need only sequence, id, and description (e.g. for 42 # conversion to FASTA format): 43 $builder->want_none(); 44 $builder->add_wanted_slot('display_id','desc','seq'); 45 46 # if you want everything except the sequence and features 47 $builder->want_all(1); # this is the default if it's untouched 48 $builder->add_unwanted_slot('seq','features'); 49 50 # if you want only human sequences shorter than 5kb and skip all 51 # others 52 $builder->add_object_condition(sub { 53 my $h = shift; 54 return 0 if $h->{'-length'} > 5000; 55 return 0 if exists($h->{'-species'}) && 56 ($h->{'-species'}->binomial() ne "Homo sapiens"); 57 return 1; 58 }); 59 60 # when you are finished with configuring the builder, just use 61 # the SeqIO API as you would normally 62 while(my $seq = $seqin->next_seq()) { 63 # do something 64 } 65 66=head1 DESCRIPTION 67 68This is an implementation of L<Bio::Factory::ObjectBuilderI> used by 69parsers of rich sequence streams. It provides for a relatively 70easy-to-use configurator of the parsing flow. 71 72Configuring the parsing process may be for you if you need much less 73information, or much less sequence, than the stream actually 74contains. Configuration can in both cases speed up the parsing time 75considerably, because unwanted sections or the rest of unwanted 76sequences are skipped over by the parser. This configuration could 77also conserve memory if you're running out of available RAM. 78 79See the methods of the class-specific implementation section for 80further documentation of what can be configured. 81 82=head1 FEEDBACK 83 84=head2 Mailing Lists 85 86User feedback is an integral part of the evolution of this and other 87Bioperl modules. Send your comments and suggestions preferably to 88the Bioperl mailing list. Your participation is much appreciated. 89 90 bioperl-l@bioperl.org - General discussion 91 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 92 93=head2 Support 94 95Please direct usage questions or support issues to the mailing list: 96 97I<bioperl-l@bioperl.org> 98 99rather than to the module maintainer directly. Many experienced and 100reponsive experts will be able look at the problem and quickly 101address it. Please include a thorough description of the problem 102with code and data examples if at all possible. 103 104=head2 Reporting Bugs 105 106Report bugs to the Bioperl bug tracking system to help us keep track 107of the bugs and their resolution. Bug reports can be submitted via 108the web: 109 110 https://github.com/bioperl/bioperl-live/issues 111 112=head1 AUTHOR - Hilmar Lapp 113 114Email hlapp at gmx.net 115 116=head1 APPENDIX 117 118The rest of the documentation details each of the object methods. 119Internal methods are usually preceded with a _ 120 121=cut 122 123 124# Let the code begin... 125 126 127package Bio::Seq::SeqBuilder; 128$Bio::Seq::SeqBuilder::VERSION = '1.7.7'; 129use strict; 130 131# Object preamble - inherits from Bio::Root::Root 132 133 134use base qw(Bio::Root::Root Bio::Factory::ObjectBuilderI); 135 136my %slot_param_map = ("add_SeqFeature" => "features", 137 ); 138my %param_slot_map = ("features" => "add_SeqFeature", 139 ); 140 141=head2 new 142 143 Title : new 144 Usage : my $obj = Bio::Seq::SeqBuilder->new(); 145 Function: Builds a new Bio::Seq::SeqBuilder object 146 Returns : an instance of Bio::Seq::SeqBuilder 147 Args : 148 149=cut 150 151sub new { 152 my($class,@args) = @_; 153 154 my $self = $class->SUPER::new(@args); 155 156 $self->{'wanted_slots'} = []; 157 $self->{'unwanted_slots'} = []; 158 $self->{'object_conds'} = []; 159 $self->{'_objhash'} = {}; 160 $self->want_all(1); 161 162 return $self; 163} 164 165=head1 Methods for implementing L<Bio::Factory::ObjectBuilderI> 166 167=cut 168 169=head2 want_slot 170 171 Title : want_slot 172 Usage : 173 Function: Whether or not the object builder wants to populate the 174 specified slot of the object to be built. 175 176 The slot can be specified either as the name of the 177 respective method, or the initialization parameter that 178 would be otherwise passed to new() of the object to be 179 built. 180 181 Note that usually only the parser will call this 182 method. Use add_wanted_slots and add_unwanted_slots for 183 configuration. 184 185 Example : 186 Returns : TRUE if the object builder wants to populate the slot, and 187 FALSE otherwise. 188 Args : the name of the slot (a string) 189 190 191=cut 192 193sub want_slot{ 194 my ($self,$slot) = @_; 195 my $ok = 0; 196 197 $slot = substr($slot,1) if substr($slot,0,1) eq '-'; 198 if($self->want_all()) { 199 foreach ($self->get_unwanted_slots()) { 200 # this always overrides in want-all mode 201 return 0 if($slot eq $_); 202 } 203 if(! exists($self->{'_objskel'})) { 204 $self->{'_objskel'} = $self->sequence_factory->create_object(); 205 } 206 if(exists($param_slot_map{$slot})) { 207 $ok = $self->{'_objskel'}->can($param_slot_map{$slot}); 208 } else { 209 $ok = $self->{'_objskel'}->can($slot); 210 } 211 return $ok if $ok; 212 # even if the object 'cannot' do this slot, it might have been 213 # added to the list of wanted slot, so carry on 214} 215 foreach ($self->get_wanted_slots()) { 216 if($slot eq $_) { 217 $ok = 1; 218 last; 219 } 220 } 221 return $ok; 222} 223 224=head2 add_slot_value 225 226 Title : add_slot_value 227 Usage : 228 Function: Adds one or more values to the specified slot of the object 229 to be built. 230 231 Naming the slot is the same as for want_slot(). 232 233 The object builder may further filter the content to be 234 set, or even completely ignore the request. 235 236 If this method reports failure, the caller should not add 237 more values to the same slot. In addition, the caller may 238 find it appropriate to abandon the object being built 239 altogether. 240 241 This implementation will allow the caller to overwrite the 242 return value from want_slot(), because the slot is not 243 checked against want_slot(). 244 245 Note that usually only the parser will call this method, 246 but you may call it from anywhere if you know what you are 247 doing. A derived class may be used to further manipulate 248 the value to be added. 249 250 Example : 251 Returns : TRUE on success, and FALSE otherwise 252 Args : the name of the slot (a string) 253 parameters determining the value to be set 254 255 OR 256 257 alternatively, a list of slotname/value pairs in the style 258 of named parameters as they would be passed to new(), where 259 each element at an even index is the parameter (slot) name 260 starting with a dash, and each element at an odd index is 261 the value of the preceding name. 262 263=cut 264 265sub add_slot_value{ 266 my ($self,$slot,@args) = @_; 267 268 my $h = $self->{'_objhash'}; 269 return unless $h; 270 # multiple named parameter variant of calling? 271 if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) { 272 unshift(@args, $slot); 273 while(@args) { 274 my $key = shift(@args); 275 $h->{$key} = shift(@args); 276 } 277 } else { 278 if($slot eq 'add_SeqFeature') { 279 $slot = '-'.$slot_param_map{$slot}; 280 $h->{$slot} = [] unless $h->{$slot}; 281 push(@{$h->{$slot}}, @args); 282 } else { 283 $slot = '-'.$slot unless substr($slot,0,1) eq '-'; 284 $h->{$slot} = $args[0]; 285 } 286 } 287 return 1; 288} 289 290=head2 want_object 291 292 Title : want_object 293 Usage : 294 Function: Whether or not the object builder is still interested in 295 continuing with the object being built. 296 297 If this method returns FALSE, the caller should not add any 298 more values to slots, or otherwise risks that the builder 299 throws an exception. In addition, make_object() is likely 300 to return undef after this method returned FALSE. 301 302 Note that usually only the parser will call this 303 method. Use add_object_condition for configuration. 304 305 Example : 306 Returns : TRUE if the object builder wants to continue building 307 the present object, and FALSE otherwise. 308 Args : none 309 310=cut 311 312sub want_object{ 313 my $self = shift; 314 315 my $ok = 1; 316 foreach my $cond ($self->get_object_conditions()) { 317 $ok = &$cond($self->{'_objhash'}); 318 last unless $ok; 319 } 320 delete $self->{'_objhash'} unless $ok; 321 return $ok; 322} 323 324=head2 make_object 325 326 Title : make_object 327 Usage : 328 Function: Get the built object. 329 330 This method is allowed to return undef if no value has ever 331 been added since the last call to make_object(), or if 332 want_object() returned FALSE (or would have returned FALSE) 333 before calling this method. 334 335 For an implementation that allows consecutive building of 336 objects, a caller must call this method once, and only 337 once, between subsequent objects to be built. I.e., a call 338 to make_object implies 'end_object.' 339 340 Example : 341 Returns : the object that was built 342 Args : none 343 344=cut 345 346sub make_object{ 347 my $self = shift; 348 349 my $obj; 350 if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) { 351 $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}}); 352 } 353 $self->{'_objhash'} = {}; # reset 354 return $obj; 355} 356 357=head1 Implementation specific methods 358 359These methods allow one to conveniently configure this sequence object 360builder as to which slots are desired, and under which circumstances a 361sequence object should be abandoned altogether. The default mode is 362want_all(1), which means the builder will report all slots as wanted 363that the object created by the sequence factory supports. 364 365You can add specific slots you want through add_wanted_slots(). In 366most cases, you will want to call want_none() before in order to relax 367zero acceptance through a list of wanted slots. 368 369Alternatively, you can add specific unwanted slots through 370add_unwanted_slots(). In this case, you will usually want to call 371want_all(1) before (which is the default if you never touched the 372builder) to restrict unrestricted acceptance. 373 374I.e., want_all(1) means want all slots except for the unwanted, and 375want_none() means only those explicitly wanted. 376 377If a slot is in both the unwanted and the wanted list, the following 378rules hold. In want-all mode, the unwanted list overrules. In 379want-none mode, the wanted list overrides the unwanted list. If this 380is confusing to you, just try to avoid having slots at the same time 381in the wanted and the unwanted lists. 382 383=cut 384 385=head2 get_wanted_slots 386 387 Title : get_wanted_slots 388 Usage : $obj->get_wanted_slots($newval) 389 Function: Get the list of wanted slots 390 Example : 391 Returns : a list of strings 392 Args : 393 394 395=cut 396 397sub get_wanted_slots{ 398 my $self = shift; 399 400 return @{$self->{'wanted_slots'}}; 401} 402 403=head2 add_wanted_slot 404 405 Title : add_wanted_slot 406 Usage : 407 Function: Adds the specified slots to the list of wanted slots. 408 Example : 409 Returns : TRUE 410 Args : an array of slot names (strings) 411 412=cut 413 414sub add_wanted_slot{ 415 my ($self,@slots) = @_; 416 417 my $myslots = $self->{'wanted_slots'}; 418 foreach my $slot (@slots) { 419 if(! grep { $slot eq $_; } @$myslots) { 420 push(@$myslots, $slot); 421 } 422 } 423 return 1; 424} 425 426=head2 remove_wanted_slots 427 428 Title : remove_wanted_slots 429 Usage : 430 Function: Removes all wanted slots added previously through 431 add_wanted_slots(). 432 Example : 433 Returns : the previous list of wanted slot names 434 Args : none 435 436=cut 437 438sub remove_wanted_slots{ 439 my $self = shift; 440 my @slots = $self->get_wanted_slots(); 441 $self->{'wanted_slots'} = []; 442 return @slots; 443} 444 445=head2 get_unwanted_slots 446 447 Title : get_unwanted_slots 448 Usage : $obj->get_unwanted_slots($newval) 449 Function: Get the list of unwanted slots. 450 Example : 451 Returns : a list of strings 452 Args : none 453 454=cut 455 456sub get_unwanted_slots{ 457 my $self = shift; 458 459 return @{$self->{'unwanted_slots'}}; 460} 461 462=head2 add_unwanted_slot 463 464 Title : add_unwanted_slot 465 Usage : 466 Function: Adds the specified slots to the list of unwanted slots. 467 Example : 468 Returns : TRUE 469 Args : an array of slot names (strings) 470 471=cut 472 473sub add_unwanted_slot{ 474 my ($self,@slots) = @_; 475 476 my $myslots = $self->{'unwanted_slots'}; 477 foreach my $slot (@slots) { 478 if(! grep { $slot eq $_; } @$myslots) { 479 push(@$myslots, $slot); 480 } 481 } 482 return 1; 483} 484 485=head2 remove_unwanted_slots 486 487 Title : remove_unwanted_slots 488 Usage : 489 Function: Removes the list of unwanted slots added previously through 490 add_unwanted_slots(). 491 Example : 492 Returns : the previous list of unwanted slot names 493 Args : none 494 495=cut 496 497sub remove_unwanted_slots{ 498 my $self = shift; 499 my @slots = $self->get_unwanted_slots(); 500 $self->{'unwanted_slots'} = []; 501 return @slots; 502} 503 504=head2 want_none 505 506 Title : want_none 507 Usage : 508 Function: Disables all slots. After calling this method, want_slot() 509 will return FALSE regardless of slot name. 510 511 This is different from removed_wanted_slots() in that it 512 also sets want_all() to FALSE. Note that it also resets the 513 list of unwanted slots in order to avoid slots being in 514 both lists. 515 516 Example : 517 Returns : TRUE 518 Args : none 519 520=cut 521 522sub want_none{ 523 my $self = shift; 524 525 $self->want_all(0); 526 $self->remove_wanted_slots(); 527 $self->remove_unwanted_slots(); 528 return 1; 529} 530 531=head2 want_all 532 533 Title : want_all 534 Usage : $obj->want_all($newval) 535 Function: Whether or not this sequence object builder wants to 536 populate all slots that the object has. Whether an object 537 supports a slot is generally determined by what can() 538 returns. You can add additional 'virtual' slots by calling 539 add_wanted_slot. 540 541 This will be ON by default. Call $obj->want_none() to 542 disable all slots. 543 544 Example : 545 Returns : TRUE if this builder wants to populate all slots, and 546 FALSE otherwise. 547 Args : on set, new value (a scalar or undef, optional) 548 549=cut 550 551sub want_all{ 552 my $self = shift; 553 554 return $self->{'want_all'} = shift if @_; 555 return $self->{'want_all'}; 556} 557 558=head2 get_object_conditions 559 560 Title : get_object_conditions 561 Usage : 562 Function: Get the list of conditions an object must meet in order to 563 be 'wanted.' See want_object() for where this is used. 564 565 Conditions in this implementation are closures (anonymous 566 functions) which are passed one parameter, a hash reference 567 the keys of which are equal to initialization 568 parameters. The closure must return TRUE to make the object 569 'wanted.' 570 571 Conditions will be implicitly ANDed. 572 573 Example : 574 Returns : a list of closures 575 Args : none 576 577=cut 578 579sub get_object_conditions{ 580 my $self = shift; 581 582 return @{$self->{'object_conds'}}; 583} 584 585=head2 add_object_condition 586 587 Title : add_object_condition 588 Usage : 589 Function: Adds a condition an object must meet in order to be 'wanted.' 590 See want_object() for where this is used. 591 592 Conditions in this implementation must be closures 593 (anonymous functions). These will be passed one parameter, 594 which is a hash reference with the sequence object 595 initialization parameters being the keys. 596 597 Conditions are implicitly ANDed. If you want other 598 operators, perform those tests inside of one closure 599 instead of multiple. This will also be more efficient. 600 601 Example : 602 Returns : TRUE 603 Args : the list of conditions 604 605=cut 606 607sub add_object_condition{ 608 my ($self,@conds) = @_; 609 610 if(grep { ref($_) ne 'CODE'; } @conds) { 611 $self->throw("conditions against which to validate an object ". 612 "must be anonymous code blocks"); 613 } 614 push(@{$self->{'object_conds'}}, @conds); 615 return 1; 616} 617 618=head2 remove_object_conditions 619 620 Title : remove_object_conditions 621 Usage : 622 Function: Removes the conditions an object must meet in order to be 623 'wanted.' 624 Example : 625 Returns : The list of previously set conditions (an array of closures) 626 Args : none 627 628=cut 629 630sub remove_object_conditions{ 631 my $self = shift; 632 my @conds = $self->get_object_conditions(); 633 $self->{'object_conds'} = []; 634 return @conds; 635} 636 637=head1 Methods to control what type of object is built 638 639=cut 640 641=head2 sequence_factory 642 643 Title : sequence_factory 644 Usage : $obj->sequence_factory($newval) 645 Function: Get/set the sequence factory to be used by this object 646 builder. 647 Example : 648 Returns : the Bio::Factory::SequenceFactoryI implementing object to use 649 Args : on set, new value (a Bio::Factory::SequenceFactoryI 650 implementing object or undef, optional) 651 652=cut 653 654sub sequence_factory{ 655 my $self = shift; 656 657 if(@_) { 658 delete $self->{'_objskel'}; 659 return $self->{'sequence_factory'} = shift; 660 } 661 return $self->{'sequence_factory'}; 662} 663 6641; 665