1#
2# BioPerl module for Bio::Seq::SeqBuilder
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Hilmar Lapp <hlapp at gmx.net>
7#
8# Copyright Hilmar Lapp
9#
10# You may distribute this module under the same terms as perl itself
11
12#
13# (c) Hilmar Lapp, hlapp at gmx.net, 2002.
14# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
15#
16# You may distribute this module under the same terms as perl itself.
17# Refer to the Perl Artistic License (see the license accompanying this
18# software package, or see http://www.perl.com/language/misc/Artistic.html)
19# for the terms under which you may use, modify, and redistribute this module.
20#
21# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
22# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
24#
25
26# POD documentation - main docs before the code
27
28=head1 NAME
29
30Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers
31
32=head1 SYNOPSIS
33
34   use Bio::SeqIO;
35
36   # usually you won't instantiate this yourself - a SeqIO object -
37   # you will have one already
38   my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank");
39   my $builder = $seqin->sequence_builder();
40
41   # if you need only sequence, id, and description (e.g. for
42   # conversion to FASTA format):
43   $builder->want_none();
44   $builder->add_wanted_slot('display_id','desc','seq');
45
46   # if you want everything except the sequence and features
47   $builder->want_all(1); # this is the default if it's untouched
48   $builder->add_unwanted_slot('seq','features');
49
50   # if you want only human sequences shorter than 5kb and skip all
51   # others
52   $builder->add_object_condition(sub {
53       my $h = shift;
54       return 0 if $h->{'-length'} > 5000;
55       return 0 if exists($h->{'-species'}) &&
56                   ($h->{'-species'}->binomial() ne "Homo sapiens");
57       return 1;
58   });
59
60   # when you are finished with configuring the builder, just use
61   # the SeqIO API as you would normally
62   while(my $seq = $seqin->next_seq()) {
63       # do something
64   }
65
66=head1 DESCRIPTION
67
68This is an implementation of L<Bio::Factory::ObjectBuilderI> used by
69parsers of rich sequence streams. It provides for a relatively
70easy-to-use configurator of the parsing flow.
71
72Configuring the parsing process may be for you if you need much less
73information, or much less sequence, than the stream actually
74contains. Configuration can in both cases speed up the parsing time
75considerably, because unwanted sections or the rest of unwanted
76sequences are skipped over by the parser. This configuration could
77also conserve memory if you're running out of available RAM.
78
79See the methods of the class-specific implementation section for
80further documentation of what can be configured.
81
82=head1 FEEDBACK
83
84=head2 Mailing Lists
85
86User feedback is an integral part of the evolution of this and other
87Bioperl modules. Send your comments and suggestions preferably to
88the Bioperl mailing list.  Your participation is much appreciated.
89
90  bioperl-l@bioperl.org                  - General discussion
91  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
92
93=head2 Support
94
95Please direct usage questions or support issues to the mailing list:
96
97I<bioperl-l@bioperl.org>
98
99rather than to the module maintainer directly. Many experienced and
100reponsive experts will be able look at the problem and quickly
101address it. Please include a thorough description of the problem
102with code and data examples if at all possible.
103
104=head2 Reporting Bugs
105
106Report bugs to the Bioperl bug tracking system to help us keep track
107of the bugs and their resolution. Bug reports can be submitted via
108the web:
109
110  https://github.com/bioperl/bioperl-live/issues
111
112=head1 AUTHOR - Hilmar Lapp
113
114Email hlapp at gmx.net
115
116=head1 APPENDIX
117
118The rest of the documentation details each of the object methods.
119Internal methods are usually preceded with a _
120
121=cut
122
123
124# Let the code begin...
125
126
127package Bio::Seq::SeqBuilder;
128$Bio::Seq::SeqBuilder::VERSION = '1.7.7';
129use strict;
130
131# Object preamble - inherits from Bio::Root::Root
132
133
134use base qw(Bio::Root::Root Bio::Factory::ObjectBuilderI);
135
136my %slot_param_map = ("add_SeqFeature" => "features",
137		      );
138my %param_slot_map = ("features"       => "add_SeqFeature",
139		      );
140
141=head2 new
142
143 Title   : new
144 Usage   : my $obj = Bio::Seq::SeqBuilder->new();
145 Function: Builds a new Bio::Seq::SeqBuilder object
146 Returns : an instance of Bio::Seq::SeqBuilder
147 Args    :
148
149=cut
150
151sub new {
152    my($class,@args) = @_;
153
154    my $self = $class->SUPER::new(@args);
155
156    $self->{'wanted_slots'} = [];
157    $self->{'unwanted_slots'} = [];
158    $self->{'object_conds'} = [];
159    $self->{'_objhash'} = {};
160    $self->want_all(1);
161
162    return $self;
163}
164
165=head1 Methods for implementing L<Bio::Factory::ObjectBuilderI>
166
167=cut
168
169=head2 want_slot
170
171 Title   : want_slot
172 Usage   :
173 Function: Whether or not the object builder wants to populate the
174           specified slot of the object to be built.
175
176           The slot can be specified either as the name of the
177           respective method, or the initialization parameter that
178           would be otherwise passed to new() of the object to be
179           built.
180
181           Note that usually only the parser will call this
182           method. Use add_wanted_slots and add_unwanted_slots for
183           configuration.
184
185 Example :
186 Returns : TRUE if the object builder wants to populate the slot, and
187           FALSE otherwise.
188 Args    : the name of the slot (a string)
189
190
191=cut
192
193sub want_slot{
194	my ($self,$slot) = @_;
195	my $ok = 0;
196
197	$slot = substr($slot,1) if substr($slot,0,1) eq '-';
198	if($self->want_all()) {
199	foreach ($self->get_unwanted_slots()) {
200		# this always overrides in want-all mode
201		return 0 if($slot eq $_);
202	}
203	if(! exists($self->{'_objskel'})) {
204		$self->{'_objskel'} = $self->sequence_factory->create_object();
205	}
206	if(exists($param_slot_map{$slot})) {
207		$ok = $self->{'_objskel'}->can($param_slot_map{$slot});
208	} else {
209		$ok = $self->{'_objskel'}->can($slot);
210	}
211	return $ok if $ok;
212	# even if the object 'cannot' do this slot, it might have been
213	# added to the list of wanted slot, so carry on
214}
215	foreach ($self->get_wanted_slots()) {
216		if($slot eq $_) {
217			$ok = 1;
218			last;
219		}
220	}
221	return $ok;
222}
223
224=head2 add_slot_value
225
226 Title   : add_slot_value
227 Usage   :
228 Function: Adds one or more values to the specified slot of the object
229           to be built.
230
231           Naming the slot is the same as for want_slot().
232
233           The object builder may further filter the content to be
234           set, or even completely ignore the request.
235
236           If this method reports failure, the caller should not add
237           more values to the same slot. In addition, the caller may
238           find it appropriate to abandon the object being built
239           altogether.
240
241           This implementation will allow the caller to overwrite the
242           return value from want_slot(), because the slot is not
243           checked against want_slot().
244
245           Note that usually only the parser will call this method,
246           but you may call it from anywhere if you know what you are
247           doing. A derived class may be used to further manipulate
248           the value to be added.
249
250 Example :
251 Returns : TRUE on success, and FALSE otherwise
252 Args    : the name of the slot (a string)
253           parameters determining the value to be set
254
255                 OR
256
257           alternatively, a list of slotname/value pairs in the style
258           of named parameters as they would be passed to new(), where
259           each element at an even index is the parameter (slot) name
260           starting with a dash, and each element at an odd index is
261           the value of the preceding name.
262
263=cut
264
265sub add_slot_value{
266	my ($self,$slot,@args) = @_;
267
268	my $h = $self->{'_objhash'};
269	return unless $h;
270	# multiple named parameter variant of calling?
271	if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) {
272		unshift(@args, $slot);
273		while(@args) {
274			my $key = shift(@args);
275			$h->{$key} = shift(@args);
276		}
277	} else {
278		if($slot eq 'add_SeqFeature') {
279			$slot = '-'.$slot_param_map{$slot};
280			$h->{$slot} = [] unless $h->{$slot};
281			push(@{$h->{$slot}}, @args);
282		} else {
283			$slot = '-'.$slot unless substr($slot,0,1) eq '-';
284			$h->{$slot} = $args[0];
285		}
286	}
287	return 1;
288}
289
290=head2 want_object
291
292 Title   : want_object
293 Usage   :
294 Function: Whether or not the object builder is still interested in
295           continuing with the object being built.
296
297           If this method returns FALSE, the caller should not add any
298           more values to slots, or otherwise risks that the builder
299           throws an exception. In addition, make_object() is likely
300           to return undef after this method returned FALSE.
301
302           Note that usually only the parser will call this
303           method. Use add_object_condition for configuration.
304
305 Example :
306 Returns : TRUE if the object builder wants to continue building
307           the present object, and FALSE otherwise.
308 Args    : none
309
310=cut
311
312sub want_object{
313	my $self = shift;
314
315	my $ok = 1;
316	foreach my $cond ($self->get_object_conditions()) {
317		$ok = &$cond($self->{'_objhash'});
318		last unless $ok;
319	}
320	delete $self->{'_objhash'} unless $ok;
321	return $ok;
322}
323
324=head2 make_object
325
326 Title   : make_object
327 Usage   :
328 Function: Get the built object.
329
330           This method is allowed to return undef if no value has ever
331           been added since the last call to make_object(), or if
332           want_object() returned FALSE (or would have returned FALSE)
333           before calling this method.
334
335           For an implementation that allows consecutive building of
336           objects, a caller must call this method once, and only
337           once, between subsequent objects to be built. I.e., a call
338           to make_object implies 'end_object.'
339
340 Example :
341 Returns : the object that was built
342 Args    : none
343
344=cut
345
346sub make_object{
347	my $self = shift;
348
349	my $obj;
350	if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) {
351		$obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}});
352	}
353	$self->{'_objhash'} = {}; # reset
354	return $obj;
355}
356
357=head1 Implementation specific methods
358
359These methods allow one to conveniently configure this sequence object
360builder as to which slots are desired, and under which circumstances a
361sequence object should be abandoned altogether. The default mode is
362want_all(1), which means the builder will report all slots as wanted
363that the object created by the sequence factory supports.
364
365You can add specific slots you want through add_wanted_slots(). In
366most cases, you will want to call want_none() before in order to relax
367zero acceptance through a list of wanted slots.
368
369Alternatively, you can add specific unwanted slots through
370add_unwanted_slots(). In this case, you will usually want to call
371want_all(1) before (which is the default if you never touched the
372builder) to restrict unrestricted acceptance.
373
374I.e., want_all(1) means want all slots except for the unwanted, and
375want_none() means only those explicitly wanted.
376
377If a slot is in both the unwanted and the wanted list, the following
378rules hold. In want-all mode, the unwanted list overrules. In
379want-none mode, the wanted list overrides the unwanted list. If this
380is confusing to you, just try to avoid having slots at the same time
381in the wanted and the unwanted lists.
382
383=cut
384
385=head2 get_wanted_slots
386
387 Title   : get_wanted_slots
388 Usage   : $obj->get_wanted_slots($newval)
389 Function: Get the list of wanted slots
390 Example :
391 Returns : a list of strings
392 Args    :
393
394
395=cut
396
397sub get_wanted_slots{
398	my $self = shift;
399
400	return @{$self->{'wanted_slots'}};
401}
402
403=head2 add_wanted_slot
404
405 Title   : add_wanted_slot
406 Usage   :
407 Function: Adds the specified slots to the list of wanted slots.
408 Example :
409 Returns : TRUE
410 Args    : an array of slot names (strings)
411
412=cut
413
414sub add_wanted_slot{
415	my ($self,@slots) = @_;
416
417	my $myslots = $self->{'wanted_slots'};
418	foreach my $slot (@slots) {
419		if(! grep { $slot eq $_; } @$myslots) {
420			push(@$myslots, $slot);
421		}
422	}
423	return 1;
424}
425
426=head2 remove_wanted_slots
427
428 Title   : remove_wanted_slots
429 Usage   :
430 Function: Removes all wanted slots added previously through
431           add_wanted_slots().
432 Example :
433 Returns : the previous list of wanted slot names
434 Args    : none
435
436=cut
437
438sub remove_wanted_slots{
439	my $self = shift;
440	my @slots = $self->get_wanted_slots();
441	$self->{'wanted_slots'} = [];
442	return @slots;
443}
444
445=head2 get_unwanted_slots
446
447 Title   : get_unwanted_slots
448 Usage   : $obj->get_unwanted_slots($newval)
449 Function: Get the list of unwanted slots.
450 Example :
451 Returns : a list of strings
452 Args    : none
453
454=cut
455
456sub get_unwanted_slots{
457	my $self = shift;
458
459	return @{$self->{'unwanted_slots'}};
460}
461
462=head2 add_unwanted_slot
463
464 Title   : add_unwanted_slot
465 Usage   :
466 Function: Adds the specified slots to the list of unwanted slots.
467 Example :
468 Returns : TRUE
469 Args    : an array of slot names (strings)
470
471=cut
472
473sub add_unwanted_slot{
474	my ($self,@slots) = @_;
475
476	my $myslots = $self->{'unwanted_slots'};
477	foreach my $slot (@slots) {
478		if(! grep { $slot eq $_; } @$myslots) {
479			push(@$myslots, $slot);
480		}
481	}
482	return 1;
483}
484
485=head2 remove_unwanted_slots
486
487 Title   : remove_unwanted_slots
488 Usage   :
489 Function: Removes the list of unwanted slots added previously through
490           add_unwanted_slots().
491 Example :
492 Returns : the previous list of unwanted slot names
493 Args    : none
494
495=cut
496
497sub remove_unwanted_slots{
498	my $self = shift;
499	my @slots = $self->get_unwanted_slots();
500	$self->{'unwanted_slots'} = [];
501	return @slots;
502}
503
504=head2 want_none
505
506 Title   : want_none
507 Usage   :
508 Function: Disables all slots. After calling this method, want_slot()
509           will return FALSE regardless of slot name.
510
511           This is different from removed_wanted_slots() in that it
512           also sets want_all() to FALSE. Note that it also resets the
513           list of unwanted slots in order to avoid slots being in
514           both lists.
515
516 Example :
517 Returns : TRUE
518 Args    : none
519
520=cut
521
522sub want_none{
523	my $self = shift;
524
525	$self->want_all(0);
526	$self->remove_wanted_slots();
527	$self->remove_unwanted_slots();
528	return 1;
529}
530
531=head2 want_all
532
533 Title   : want_all
534 Usage   : $obj->want_all($newval)
535 Function: Whether or not this sequence object builder wants to
536           populate all slots that the object has. Whether an object
537           supports a slot is generally determined by what can()
538           returns. You can add additional 'virtual' slots by calling
539           add_wanted_slot.
540
541           This will be ON by default. Call $obj->want_none() to
542           disable all slots.
543
544 Example :
545 Returns : TRUE if this builder wants to populate all slots, and
546           FALSE otherwise.
547 Args    : on set, new value (a scalar or undef, optional)
548
549=cut
550
551sub want_all{
552	my $self = shift;
553
554	return $self->{'want_all'} = shift if @_;
555	return $self->{'want_all'};
556}
557
558=head2 get_object_conditions
559
560 Title   : get_object_conditions
561 Usage   :
562 Function: Get the list of conditions an object must meet in order to
563           be 'wanted.' See want_object() for where this is used.
564
565           Conditions in this implementation are closures (anonymous
566           functions) which are passed one parameter, a hash reference
567           the keys of which are equal to initialization
568           parameters. The closure must return TRUE to make the object
569           'wanted.'
570
571           Conditions will be implicitly ANDed.
572
573 Example :
574 Returns : a list of closures
575 Args    : none
576
577=cut
578
579sub get_object_conditions{
580	my $self = shift;
581
582	return @{$self->{'object_conds'}};
583}
584
585=head2 add_object_condition
586
587 Title   : add_object_condition
588 Usage   :
589 Function: Adds a condition an object must meet in order to be 'wanted.'
590           See want_object() for where this is used.
591
592           Conditions in this implementation must be closures
593           (anonymous functions). These will be passed one parameter,
594           which is a hash reference with the sequence object
595           initialization parameters being the keys.
596
597           Conditions are implicitly ANDed. If you want other
598           operators, perform those tests inside of one closure
599           instead of multiple.  This will also be more efficient.
600
601 Example :
602 Returns : TRUE
603 Args    : the list of conditions
604
605=cut
606
607sub add_object_condition{
608	my ($self,@conds) = @_;
609
610	if(grep { ref($_) ne 'CODE'; } @conds) {
611		$self->throw("conditions against which to validate an object ".
612						 "must be anonymous code blocks");
613	}
614	push(@{$self->{'object_conds'}}, @conds);
615	return 1;
616}
617
618=head2 remove_object_conditions
619
620 Title   : remove_object_conditions
621 Usage   :
622 Function: Removes the conditions an object must meet in order to be
623           'wanted.'
624 Example :
625 Returns : The list of previously set conditions (an array of closures)
626 Args    : none
627
628=cut
629
630sub remove_object_conditions{
631	my $self = shift;
632	my @conds = $self->get_object_conditions();
633	$self->{'object_conds'} = [];
634	return @conds;
635}
636
637=head1 Methods to control what type of object is built
638
639=cut
640
641=head2 sequence_factory
642
643 Title   : sequence_factory
644 Usage   : $obj->sequence_factory($newval)
645 Function: Get/set the sequence factory to be used by this object
646           builder.
647 Example :
648 Returns : the Bio::Factory::SequenceFactoryI implementing object to use
649 Args    : on set, new value (a Bio::Factory::SequenceFactoryI
650           implementing object or undef, optional)
651
652=cut
653
654sub sequence_factory{
655	my $self = shift;
656
657	if(@_) {
658		delete $self->{'_objskel'};
659		return $self->{'sequence_factory'} = shift;
660	}
661	return $self->{'sequence_factory'};
662}
663
6641;
665