1#
2# BioPerl module for Bio::Tools::Run::WrapperBase::CommandExts
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Mark A. Jensen <maj -at- fortinbras -dot- us>
7#
8# Copyright Mark A. Jensen
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Tools::Run::WrapperBase::CommandExts - Extensions to WrapperBase for handling programs with commands *ALPHA*
17
18=head1 SYNOPSIS
19
20Devs, see L</DEVELOPER INTERFACE>.
21Users, see L</USER INTERFACE>.
22
23=head1 DESCRIPTION
24
25This is a developer-focused experimental module. The main idea is to
26extend L<Bio::Tools::Run::WrapperBase> to make it relatively easy to
27create run wrappers around I<suites> of related programs, like
28C<samtools> or C<blast+>.
29
30Some definitions:
31
32=over
33
34=item * program
35
36The program is the command-line frontend application. C<samtools>, for example, is run from the command line as follows:
37
38 $ samtools view -bS in.bam > out.sam
39 $ samtools faidx
40
41=item * command
42
43The command is the specific component of a suite run by executing the
44program. In the example above, C<view> and C<faidx> are commands.
45
46=item * command prefix
47
48The command prefix is an abbreviation of the command name used
49internally by C<CommandExts> method, and sometimes by the user of the
50factory for specifying command line parameters to subcommands of
51composite commands.
52
53=item * composite command
54
55A composite command is a pipeline or script representing a series of
56separate executions of different commands. Composite commands can be
57specified by configuring C<CommandExts> appropriately; the composite
58command can be run by the user from a factory in the same way as
59ordinary commands.
60
61=item * options, parameters, switches and filespecs
62
63An option is any command-line option; i.e., a specification set off by
64a command-line by a specifier (like C<-v> or C<--outfile>). Parameters
65are command-line options that accept a value (C<-title mydb>);
66switches are boolean flags (C<--no-filter>). Filespecs are barewords
67at the end of the command line that usually indicate input or output
68files. In this module, this includes files that capture STDIN, STDOUT,
69or STDERR via redirection.
70
71=item * pseudo-program
72
73A "pseudo-program" is a way to refer to a collection of related
74applications that are run independently from the command line, rather
75than via a frontend program. The C<blast+> suite of programs is an
76example: C<blastn>, C<makeblastdb>, etc. C<CommandExts> can be
77configured to create a single factory for a suite of related,
78independent programs that treats each independent program as a
79"pseudo-program" command.
80
81=back
82
83This module essentially adds the non-assembler-specific wrapper
84machinery of fangly's L<Bio::Tools::Run::AssemblerBase> to the
85L<Bio::Tools::Run::WrapperBase> namespace, adding the general
86command-handling capability of L<Bio::Tools::Run::BWA>. It creates run
87factories that are automatically Bio::ParameterBaseI compliant,
88meaning that C<available_parameters()>, C<set_parameters()>,
89C<get_parameters>, C<reset_parameters()>, and C<parameters_changed()>
90are available.
91
92=head1 DEVELOPER INTERFACE
93
94C<CommandExts> is currently set up to read particular package globals
95which define the program, the commands available, command-line options
96for those commands, and human-readable aliases for those options.
97
98The easiest way to use C<CommandExts> is probably to create two modules:
99
100 Bio::Tools::Run::YourRunPkg
101 Bio::Tools::Run::YourRunPkg::Config
102
103The package globals should be defined in the C<Config> module, and the
104run package itself should begin with the following mantra:
105
106 use YourRunPkg::Config;
107 use Bio::Tools::Run::WrapperBase;
108 use Bio::Tools::Run::WrapperBase::CommandExts;
109 sub new {
110     my $class = shift;
111     my @args = @_;
112     my $self = $class->SUPER::new(@args);
113     ...
114     return $self;
115 }
116
117The following globals can/should be defined in the C<Config> module:
118
119  $program_name
120  $program_dir
121  $use_dash
122  $join
123  @program_commands
124  %command_prefixes
125  @program_params
126  @program_switches
127  %param_translation
128  %composite_commands
129  %command_files
130
131See L</Config Globals> for detailed descriptions.
132
133The work of creating a run wrapper with C<CommandExts> lies mainly in
134setting up the globals. The key methods for the developer interface are:
135
136=over
137
138=item * program_dir($path_to_programs)
139
140Set this to point the factory to the executables.
141
142=item * _run(@file_args)
143
144Runs an instantiated factory with the given file args. Use in the
145 C<run()> method override.
146
147=item *  _create_factory_set()
148
149Returns a hash of instantiated factories for each true command from a
150composite command factory. The hash keys are the true command names, so
151you could do
152
153 $cmds = $composite_fac->_create_factory_set;
154 for (@true_commands) {
155    $cmds->{$_}->_run(@file_args);
156 }
157
158=item * executables($cmd,[$fullpath])
159
160For pseudo-programs, this gets/sets the full path to the executable of
161the true program corresponding to the command C<$cmd>.
162
163=back
164
165=head2 Implementing Composite Commands
166
167=head2 Implementing Pseudo-programs
168
169To indicate that a package wraps disparate programs under a single pseudo program, use an asterisk before the program name:
170
171 package Bio::Tools::Run::YourPkg::Config;
172 ...
173 our $program_name = '*blast+';
174
175and C<_run> will know what to do. Specify the rest of the globals as
176if the desired programs were commands. Use the basename of the
177programs for the command names.
178
179If all the programs can be found in a single directory, just specify
180that directory in C<program_dir()>. If not, use C<executables()> to set the paths to each program explicitly:
181
182 foreach (keys %cmdpaths) {
183    $self->executables($_, $cmdpaths{$_});
184 }
185
186=head2 Config Globals
187
188Here is an example config file. Further details in prose are below.
189
190 package Dummy::Config;
191 use strict;
192 use warnings;
193 no warnings qw(qw);
194 use Exporter;
195 our (@ISA, @EXPORT, @EXPORT_OK);
196 push @ISA, 'Exporter';
197 @EXPORT = qw(
198              $program_name
199              $program_dir
200              $use_dash
201              $join
202              @program_commands
203              %command_prefixes
204              @program_params
205              @program_switches
206              %param_translation
207              %command_files
208              %composite_commands
209             );
210
211 our $program_name = '*flurb';
212 our $program_dir = 'C:\cygwin\usr\local\bin';
213 our $use_dash = 'mixed';
214 our $join = ' ';
215
216 our @program_commands = qw(
217  rpsblast
218  find
219  goob
220  blorb
221  multiglob
222   );
223
224 our %command_prefixes = (
225     blastp => 'blp',
226     tblastn => 'tbn',
227     goob => 'g',
228     blorb => 'b',
229     multiglob => 'm'
230     );
231
232 our @program_params = qw(
233     command
234     g|narf
235     g|schlurb
236     b|scroob
237     b|frelb
238     m|trud
239 );
240
241 our @program_switches = qw(
242     g|freen
243     b|klep
244 );
245
246 our %param_translation = (
247     'g|narf'     => 'n',
248     'g|schlurb'  => 'schlurb',
249     'g|freen'    => 'f',
250     'b|scroob'   => 's',
251     'b|frelb'    => 'frelb'
252     );
253
254 our %command_files = (
255     'goob'       => [qw( fas faq )],
256     );
257
258 our %composite_commands = (
259     'multiglob' => [qw( blorb goob )]
260     );
261 1;
262
263C<$use_dash> can be one of C<single>, C<double>, or C<mixed>. See L<Bio::Tools::Run::WrapperBase>.
264
265There is a syntax for the C<%command_files> specification. The token
266matching C<[a-zA-Z0-9_]+> in each element of each arrayref becomes the
267named filespec parameter for the C<_run()> method in the wrapper
268class. Additional symbols surrounding this token indicate how this
269argument should be handled. Some examples:
270
271 >out  : stdout is redirected into the file
272         specified by (..., -out => $file,... )
273 <in   : stdin is accepted from the file
274         specified by (..., -in => $file,... )
275 2>log : stderr is redirected into the file
276         specified by (..., -log => $file,... )
277 #opt  : this filespec argument is optional
278         (no throw if -opt => $option is missing)
279 2>#log: if -log is not specified in the arguments, the stderr()
280         method will capture stderr
281 *lst  : this filespec can take multiple arguments,
282         specify using an arrayref (..., -lst => [$file1, $file2], ...)
283 *#lst : an optional list
284
285The tokens above are examples; they can be anything matching the above regexp.
286
287=head1 USER INTERFACE
288
289Using a wrapper created with C<Bio::Tools::Run::WrapperBase::CommandExts>:
290
291=over
292
293=item * Getting a list of available commands, parameters, and filespecs:
294
295To get a list of commands, simply:
296
297 @commands = Bio::Tools::Run::ThePkg->available_commands;
298
299The wrapper will generally have human-readable aliases for each of the
300command-line options for the wrapped program and commands. To obtain a
301list of the parameters and switches available for a particular
302command, do
303
304 $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb' );
305 @params = $factory->available_parameters('params');
306 @switches = $factory->available_parameters('switches');
307 @filespec = $factory->available_parameters('filespec');
308 @filespec = $factory->filespec; # alias
309
310=item * Create factories
311
312The factory is a handle on the program and command you wish to
313run. Create a factory using C<new> to set command-line parameters:
314
315 $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb',
316                                          -freen => 1,
317                                          -furschlugginer => 'vreeble' );
318
319A shorthand for this is:
320
321 $factory = Bio::Tools::Run::ThePkg->new_glurb(
322                                       -freen => 1,
323                                       -furschlugginer => 'vreeble' );
324
325=item * Running programs
326
327To run the program, use the C<run> method, providing filespecs as arguments
328
329 $factory = Bio::Tools::Run::ThePkg->new_assemble( -min_qual => 63 );
330 $factory->run( -faq1 => 'read1.fq', -faq2 => 'read2.fq',
331                -ref => 'refseq.fas', -out => 'new.sam' );
332 # do another
333 $factory->run( -faq1 => 'read-old1.fq', -faq2 => 'read-old2.fq',
334                -ref => 'refseq.fas', -out => 'old.sam' );
335
336Messages on STDOUT and STDERR are dumped into their respective attributes:
337
338 $stdout = $factory->stdout;
339 $stderr = $factory->stderr;
340
341unless STDOUT and/or STDERR are part of the named files in the filespec.
342
343=item * Setting/getting/resetting/polling parameters.
344
345A C<CommandExts>-based factory is always L<Bio::ParameterBaseI>
346compliant. That means that you may set, get, and reset parameters
347using C<set_parameters()>, C<get_parameters()>, and
348C<reset_parameters>. You can ask whether parameters have changed since
349they were last accessed by using the predicate
350C<parameters_changed>. See L<Bio::ParameterBaseI> for more details.
351
352Once set, parameters become attributes of the factory. Thus, you can get their values as follows:
353
354 if ($factory->freen) {
355    $furs = $factory->furshlugginer;
356    #...
357 }
358
359=back
360
361=head1 FEEDBACK
362
363=head2 Mailing Lists
364
365User feedback is an integral part of the evolution of this and other
366Bioperl modules. Send your comments and suggestions preferably to
367the Bioperl mailing list.  Your participation is much appreciated.
368
369  bioperl-l@bioperl.org                  - General discussion
370http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
371
372=head2 Support
373
374Please direct usage questions or support issues to the mailing list:
375
376L<bioperl-l@bioperl.org>
377
378rather than to the module maintainer directly. Many experienced and
379reponsive experts will be able look at the problem and quickly
380address it. Please include a thorough description of the problem
381with code and data examples if at all possible.
382
383=head2 Reporting Bugs
384
385Report bugs to the Bioperl bug tracking system to help us keep track
386of the bugs and their resolution. Bug reports can be submitted via
387the web:
388
389  https://github.com/bioperl/bioperl-live/issues
390
391=head1 AUTHOR - Mark A. Jensen
392
393Email maj -at- fortinbras -dot- us
394
395Describe contact details here
396
397=head1 CONTRIBUTORS
398
399Dan Kortschak ( dan -dot- kortschak -at- adelaide -dot- edu -dot- au )
400
401=head1 APPENDIX
402
403The rest of the documentation details each of the object methods.
404Internal methods are usually preceded with a _
405
406=cut
407
408# Let the code begin...
409
410package Bio::Tools::Run::WrapperBase; # need these methods in WrapperBase/maj
411$Bio::Tools::Run::WrapperBase::VERSION = '1.7.7';
412use strict;
413use warnings;
414no warnings qw(redefine);
415
416use Bio::Root::Root;
417use File::Spec;
418use IPC::Run;
419use base qw(Bio::Root::Root Bio::ParameterBaseI);
420
421our $AUTOLOAD;
422
423=head2 new()
424
425 Title   : new
426 Usage   :
427 Function: constructor for WrapperBase::CommandExts ;
428           correctly binds configuration variables
429           to the WrapperBase object
430 Returns : Bio::Tools::Run::WrapperBase object with command extensions
431 Args    :
432 Note    : this method subsumes the old _register_program_commands and
433           _set_program_options, leaving out the assembler-specific
434           parms ($qual_param and out_type())
435
436=cut
437
438sub new {
439    my ($class, @args) = @_;
440    my $self = bless ({}, $class);
441    # pull in *copies* of the Config variables from the caller namespace:
442    my ($pkg, @goob) = caller();
443    my ($commands,
444    $prefixes,
445    $params,
446    $switches,
447    $translation,
448    $use_dash,
449    $join,
450    $name,
451    $dir,
452    $composite_commands,
453    $files);
454    for (qw( @program_commands
455             %command_prefixes
456             @program_params
457             @program_switches
458             %param_translation
459             $use_dash
460             $join
461             $program_name
462             $program_dir
463             %composite_commands
464             %command_files ) ) {
465    my ($sigil, $var) = m/(.)(.*)/;
466    my $qualvar = "${sigil}${pkg}::${var}";
467    for ($sigil) {
468        /\@/ && do { $qualvar = "\[$qualvar\]" };
469        /\%/ && do { $qualvar = "\{$qualvar\}" };
470    }
471    my $locvar = "\$${var}";
472    $locvar =~ s/program_|command_|param_//g;
473    eval "$locvar = $qualvar";
474    }
475    # set up the info registry hash
476    my %registry;
477    if ($composite_commands) {
478    $self->_register_composite_commands($composite_commands,
479                        $params,
480                        $switches,
481                        $prefixes);
482    }
483    @registry{qw( _commands _prefixes _files
484                  _params _switches _translation
485                  _composite_commands )} =
486    ($commands, $prefixes, $files,
487     $params, $switches, $translation,
488     $composite_commands);
489    $self->{_options} = \%registry;
490    if (not defined $use_dash) {
491    $self->{'_options'}->{'_dash'}      = 1;
492    } else {
493    $self->{'_options'}->{'_dash'}      = $use_dash;
494    }
495    if (not defined $join) {
496    $self->{'_options'}->{'_join'}      = ' ';
497    } else {
498    $self->{'_options'}->{'_join'}      = $join;
499    }
500    if ($name =~ /^\*/) {
501    $self->is_pseudo(1);
502    $name =~ s/^\*//;
503    }
504    $self->program_name($name) if not defined $self->program_name();
505    $self->program_dir($dir) if not defined $self->program_dir();
506    $self->set_parameters(@args);
507    $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI
508    return $self;
509}
510
511=head2 program_name
512
513 Title   : program_name
514 Usage   : $factory->program_name($name)
515 Function: get/set the executable name
516 Returns:  string
517 Args    : string
518
519=cut
520
521sub program_name {
522    my ($self, $val) = @_;
523    $self->{'_program_name'} = $val if $val;
524    return $self->{'_program_name'};
525}
526
527=head2 program_dir
528
529 Title   : program_dir
530 Usage   : $factory->program_dir($dir)
531 Function: get/set the program dir
532 Returns:  string
533 Args    : string
534
535=cut
536
537sub program_dir {
538    my ($self, $val) = @_;
539    $self->{'_program_dir'} = $val if $val;
540    return $self->{'_program_dir'};
541}
542
543=head2 _register_program_commands()
544
545 Title   : _register_program_commands
546 Usage   : $factory->_register_program_commands( \@commands, \%prefixes )
547 Function: Register the commands a program accepts (for programs that act
548           as frontends for a set of commands, each command having its own
549           set of params/switches)
550 Returns : true on success
551 Args    : arrayref to a list of commands (scalar strings),
552           hashref to a translation table of the form
553           { $prefix1 => $command1, ... } [optional]
554 Note    : To implement a program with this kind of calling structure,
555           include a parameter called 'command' in the
556           @program_params global
557 Note    : The translation table is used to associate parameters and
558           switches specified in _set_program_options with the correct
559           program command. In the globals @program_params and
560           @program_switches, specify elements as 'prefix1|param' and
561           'prefix1|switch', etc.
562
563=cut
564
565=head2 _set_program_options
566
567 Title   : _set_program_options
568 Usage   : $factory->_set_program_options( \@ args );
569 Function: Register the parameters and flags that an assembler takes.
570 Returns : 1 for success
571 Args    : - arguments passed by the user
572           - parameters that the program accepts, optional (default: none)
573           - switches that the program accepts, optional (default: none)
574           - parameter translation, optional (default: no translation occurs)
575           - dash option for the program parameters, [1|single|double|mixed],
576             optional (default: yes, use single dashes only)
577           - join, optional (default: ' ')
578
579=cut
580
581=head2 _translate_params
582
583 Title   : _translate_params
584 Usage   : @options = @{$assembler->_translate_params( )};
585 Function: Translate the Bioperl arguments into the arguments to pass to the
586           program on the command line
587 Returns : Arrayref of arguments
588 Args    : none
589
590=cut
591
592sub _translate_params {
593  my ($self)   = @_;
594  # Get option string
595  my ($params, $switches, $join, $dash, $translat) =
596      @{$self->{_options}}{qw(_params _switches _join _dash _translation)};
597
598  # access the multiple dash choices of _setparams...
599  my @dash_args;
600  $dash ||= 1; # default as advertised
601  for ($dash) {
602      $_ eq '1' && do {
603      @dash_args = ( -dash => 1 );
604      last;
605      };
606      /^s/ && do { #single dash only
607      @dash_args = ( -dash => 1);
608      last;
609      };
610      /^d/ && do { # double dash only
611      @dash_args = ( -double_dash => 1);
612      last;
613      };
614      /^m/ && do { # mixed dash: one-letter opts get -,
615                  # long opts get --
616      @dash_args = ( -mixed_dash => 1);
617      last;
618      };
619      do {
620      $self->warn( "Dash spec '$dash' not recognized; using 'single'" );
621      @dash_args = ( -dash => 1 );
622      };
623  }
624  my $options  = $self->_setparams(
625    -params    => $params,
626    -switches  => $switches,
627    -join      => $join,
628    @dash_args
629  );
630
631  # Translate options
632  # parse more carefully - bioperl-run issue #12
633  $options =~ s/^\s+//;
634  $options =~ s/\s+$//;
635  my @options;
636  my $in_quotes;
637  for (split(/(\s|$join)/, $options)) {
638    if (/^-/) {
639      push @options, $_;
640    }
641    elsif (s/^"//) {
642      $in_quotes=1 unless (s/["']$//);
643      push @options, $_;
644    }
645    elsif (s/"$//) {
646      $options[-1] .= $_;
647      $in_quotes=0;
648    }
649    else {
650      $in_quotes ? $options[-1] .= $_ :
651    push(@options, $_);
652    }
653  }
654  $self->throw("Unmatched quote in option value") if $in_quotes;
655  for (my $i = 0; $i < scalar @options; $i++) {
656    my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ );
657    if (defined $name) {
658    if ($name =~ /command/i) {
659        $name = $options[$i+2]; # get the command
660        splice @options, $i, 4;
661        $i--;
662        # don't add the command if this is a pseudo-program
663        unshift @options, $name unless ($self->is_pseudo); # put command first
664    }
665    elsif (defined $$translat{$name}) {
666        $options[$i] = $prefix.$$translat{$name};
667    }
668    }
669    else {
670    splice @options, $i, 1;
671    $i--;
672    }
673  }
674
675  @options = grep (!/^\s*$/,@options);
676  # this is a kludge for mixed options: the reason mixed doesn't
677  # work right on the pass through _setparams is that the
678  # *aliases* and not the actual params are passed to it.
679  # here we just rejigger the dashes
680  if ($dash =~ /^m/) {
681      s/--([a-z0-9](?:\s|$))/-$1/gi for @options;
682  }
683  # Now arrayify the options
684
685  return \@options;
686}
687
688=head2 executable()
689
690 Title   : executable
691 Usage   :
692 Function: find the full path to the main executable,
693           or to the command executable for pseudo-programs
694 Returns : full path, if found
695 Args    : [optional] explicit path to the executable
696           (will set the appropriate command exec if
697            applicable)
698           [optional] boolean flag whether or not to warn when exe no found
699 Note    : overrides WrapperBase.pm
700
701=cut
702
703sub executable {
704    my $self = shift;
705    my ($exe, $warn) = @_;
706    if ($self->is_pseudo) {
707    return $self->{_pathtoexe} = $self->executables($self->command,$exe);
708    }
709
710    # otherwise
711    # setter
712    if (defined $exe) {
713    $self->throw("binary '$exe' does not exist") unless -e $exe;
714    $self->throw("'$exe' is not executable") unless -x $exe;
715    return $self->{_pathtoexe} = $exe;
716    }
717
718    # getter
719    return $self->{_pathtoexe} if defined $self->{_pathstoexe};
720
721    # finder
722    return $self->{_pathtoexe} = $self->_find_executable($exe, $warn);
723}
724
725=head2 executables()
726
727 Title   : executables
728 Usage   :
729 Function: find the full path to a command's executable
730 Returns : full path (scalar string)
731 Args    : command (scalar string),
732           [optional] explicit path to this command exe
733           [optional] boolean flag whether or not to warn when exe no found
734
735=cut
736
737sub executables {
738    my $self = shift;
739    my ($cmd, $exe, $warn) = @_;
740    # for now, barf if this is not a pseudo program
741    $self->throw("This wrapper represents a single program with commands, not multiple programs; can't use executables()") unless $self->is_pseudo;
742    $self->throw("Command name required at arg 1") unless defined $cmd;
743    $self->throw("The desired executable '$cmd' is not registered as a command") unless grep /^$cmd$/, @{$self->{_options}->{_commands}};
744
745    # setter
746    if (defined $exe) {
747    $self->throw("binary '$exe' does not exist") unless -e $exe;
748    $self->throw("'$exe' is not executable") unless -x $exe;
749    $self->{_pathstoexe} = {} unless defined $self->{_pathstoexe};
750    return $self->{_pathstoexe}->{$cmd} = $exe;
751    }
752
753    # getter
754    return $self->{_pathstoexe}->{$cmd} if defined $self->{_pathstoexe}->{$cmd};
755
756    $exe ||= $cmd;
757    # finder
758    return $self->{_pathstoexe}->{$cmd} = $self->_find_executable($exe, $warn);
759}
760
761=head2 _find_executable()
762
763 Title   : _find_executable
764 Usage   : my $exe_path = $fac->_find_executable($exe, $warn);
765 Function: find the full path to a named executable,
766 Returns : full path, if found
767 Args    : name of executable to find
768           [optional] boolean flag whether or not to warn when exe no found
769 Note    : differs from executable and executables in not
770           setting any object attributes
771
772=cut
773
774sub _find_executable {
775    my $self = shift;
776    my ($exe, $warn) = @_;
777
778    if ($self->is_pseudo && !$exe) {
779    if (!$self->command) {
780        # this throw probably appropriate
781        # the rest are now warns if $warn.../maj
782        $self->throw(
783        "The ".__PACKAGE__." wrapper represents several different programs;".
784        "arg1 to _find_executable must be specified explicitly,".
785        "or the command() attribute set");
786    }
787    else {
788        $exe = $self->command;
789    }
790    }
791    $exe ||= $self->program_path;
792
793    my $path;
794    if ($self->program_dir) {
795    $path = File::Spec->catfile($self->program_dir, $exe);
796    } else {
797    $path = $exe;
798    $self->warn('Program directory not specified; use program_dir($path).') if $warn;
799    }
800
801    # use provided info - we are allowed to follow symlinks, but refuse directories
802    map { return $path.$_ if ( -x $path.$_ && !(-d $path.$_) ) } ('', '.exe') if defined $path;
803
804    # couldn't get path to executable from provided info, so use system path
805    $path = $path ? " in $path" : undef;
806    $self->warn("Executable $exe not found$path, trying system path...") if $warn;
807    if ($path = $self->io->exists_exe($exe)) {
808    return $path;
809    } else {
810    $self->warn("Cannot find executable for program '".($self->is_pseudo ? $self->command : $self->program_name)."'") if $warn;
811    return;
812    }
813}
814
815=head2 _register_composite_commands()
816
817 Title   : _register_composite_commands
818 Usage   :
819 Function: adds subcomand params and switches for composite commands
820 Returns : true on success
821 Args    : \%composite_commands,
822           \@program_params,
823           \@program_switches
824
825=cut
826
827sub _register_composite_commands {
828    my $self = shift;
829    my ($composite_commands, $program_params,
830    $program_switches, $command_prefixes) = @_;
831    my @sub_params;
832    my @sub_switches;
833    foreach my $cmd (keys %$composite_commands) {
834    my $pfx = $command_prefixes->{$cmd} || $cmd;
835    foreach my $subcmd ( @{$$composite_commands{$cmd}} ) {
836        my $spfx = $command_prefixes->{$subcmd} || $subcmd;
837        my @sub_program_params = grep /^$spfx\|/, @$program_params;
838        my @sub_program_switches = grep /^$spfx\|/, @$program_switches;
839        for (@sub_program_params) {
840        m/^$spfx\|(.*)/;
841        push @sub_params, "$pfx\|${spfx}_".$1;
842        }
843        for (@sub_program_switches) {
844        m/^$spfx\|(.*)/;
845        push @sub_switches, "$pfx\|${spfx}_".$1;
846        }
847    }
848    }
849    push @$program_params, @sub_params;
850    push @$program_switches, @sub_switches;
851    # translations for subcmd params/switches not necessary
852    return 1;
853}
854
855=head2 _create_factory_set()
856
857 Title   : _create_factory_set
858 Usage   : @facs = $self->_create_factory_set
859 Function: instantiate a set of individual command factories for
860           a given composite command
861           Factories will have the correct parameter fields set for
862           their own subcommand
863 Returns : hash of factories: ( $subcmd_prefix => $subcmd_factory, ... )
864 Args    : none
865
866=cut
867
868sub _create_factory_set {
869    my $self = shift;
870    $self->throw('command not set') unless $self->command;
871    my $cmd = $self->command;
872    $self->throw('_create_factory_set only works on composite commands')
873    unless grep /^$cmd$/, keys %{$self->{_options}->{_composite_commands}};
874    my %ret;
875    my $class = ref $self;
876    my $subargs_hash = $self->_collate_subcmd_args($cmd);
877    for (keys %$subargs_hash) {
878    $ret{$_} = $class->new( -command => $_,  @{$$subargs_hash{$_}} );
879    }
880    return %ret;
881}
882
883=head2 _collate_subcmd_args()
884
885 Title   : _collate_subcmd_args
886 Usage   : $args_hash = $self->_collate_subcmd_args
887 Function: collate parameters and switches into command-specific
888           arg lists for passing to new()
889 Returns : hash of named argument lists
890 Args    : [optional] composite cmd prefix (scalar string)
891           [default is 'run']
892
893=cut
894
895sub _collate_subcmd_args {
896    my $self = shift;
897    my $cmd = shift;
898    my %ret;
899    # default command is 'run'
900    $cmd ||= 'run';
901    return unless $self->{'_options'}->{'_composite_commands'};
902    return unless $self->{'_options'}->{'_composite_commands'}->{$cmd};
903    my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}};
904
905    my $cur_options = $self->{'_options'};
906    # collate
907    foreach my $subcmd (@subcmds) {
908    # find the composite cmd form of the argument in
909    # the current params and switches
910    # e.g., map_max_mismatches
911    my $pfx = $self->{_options}->{_prefixes}->{$subcmd} || $subcmd;
912    my @params = grep /^${pfx}_/, @{$$cur_options{'_params'}};
913    my @switches = grep /^${pfx}_/, @{$$cur_options{'_switches'}};
914    $ret{$subcmd} = [];
915    # create an argument list suitable for passing to new() of
916    # the subcommand factory...
917    foreach my $opt (@params, @switches) {
918        my $subopt = $opt;
919        $subopt =~ s/^${pfx}_//;
920        push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt;
921    }
922    }
923    return \%ret;
924}
925
926=head2 _run
927
928 Title   : _run
929 Usage   : $fac->_run( @file_args )
930 Function: Run a command as specified during object construction
931 Returns : true on success
932 Args    : a specification of the files to operate on according
933           to the filespec
934
935=cut
936
937sub _run {
938    my ($self, @args) = @_;
939    # _translate_params will provide an array of command/parameters/switches
940    # -- these are set at object construction
941    # to set up the run, need to add the files to the call
942    # -- provide these as arguments to this function
943    my $cmd = $self->command if $self->can('command');
944    my $opts = $self->{_options};
945    my %args;
946    $self->throw("No command specified for the object") unless $cmd;
947    # setup files necessary for this command
948    my $filespec = $opts->{'_files'}->{$cmd};
949    my @switches;
950    my ($in, $out, $err);
951    # some applications rely completely on switches
952    if (defined $filespec && @$filespec) {
953        # parse args based on filespec
954        # require named args
955        $self->throw("Named args are required") unless !(@args % 2);
956        s/^-// for @args;
957        %args = @args;
958        # validate
959        my @req = map {
960            my $s = $_;
961            $s =~ s/^-.*\|//;
962            $s =~ s/^[012]?[<>]//;
963            $s =~ s/[^a-zA-Z0-9_]//g;
964            $s
965        } grep !/[#]/, @$filespec;
966        !defined($args{$_}) && $self->throw("Required filearg '$_' not specified") for @req;
967        # set up redirects and file switches
968        for (@$filespec) {
969            m/^1?>#?(.*)/ && do {
970                defined($args{$1}) && ( open $out, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") );
971                next;
972            };
973            m/^2>#?(.*)/ && do {
974                defined($args{$1}) && ( open $err, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") );
975                next;
976            };
977            m/^<#?(.*)/ && do {
978                defined($args{$1}) && ( open $in, '<', $args{$1} or $self->throw("Could not read file '$args{$1}': $!") );
979                next;
980            };
981            if (m/^-(.*)\|/) {
982                push @switches, $self->_dash_switch($1);
983            } else {
984                push @switches, undef;
985            }
986        }
987    }
988    my $dum;
989    $in || ($in = \$dum);
990    $out || ($out = \$self->{'stdout'});
991    $err || ($err = \$self->{'stderr'});
992
993    # Get program executable
994    my $exe = $self->executable;
995    $self->throw("Can't find executable for '".($self->is_pseudo ? $self->command : $self->program_name)."'; can't continue") unless $exe;
996
997    # Get command-line options
998    my $options = $self->_translate_params();
999    # Get file specs sans redirects in correct order
1000    my @specs = map {
1001        my $s = $_;
1002        $s =~ s/^-.*\|//;
1003        $s =~ s/[^a-zA-Z0-9_]//g;
1004        $s
1005    } grep !/[<>]/, @$filespec;
1006    my @files = @args{@specs};
1007    # expand arrayrefs
1008    my $l = $#files;
1009
1010    # Note: below code block may be brittle, see link on this:
1011    # http://lists.open-bio.org/pipermail/bioperl-l/2010-June/033439.html
1012
1013    for (0..$l) {
1014        if (ref($files[$_]) eq 'ARRAY') {
1015            splice(@switches, $_, 1, ($switches[$_]) x @{$files[$_]});
1016            splice(@files, $_, 1, @{$files[$_]});
1017        }
1018    }
1019
1020    @files = map {
1021        my $s = shift @switches;
1022        defined $_ ? ($s, $_): ()
1023    } @files;
1024    @files = map { defined $_ ? $_ : () } @files; # squish undefs
1025    my @ipc_args = ( $exe, @$options, @files );
1026    $self->{_last_execution} = join( $self->{'_options'}->{'_join'}, @ipc_args );
1027    eval {
1028        IPC::Run::run(\@ipc_args, $in, $out, $err) or
1029            die ("There was a problem running $exe : ".$$err);
1030    };
1031
1032    if ($@) {
1033        $self->throw("$exe call crashed: $@") unless $self->no_throw_on_crash;
1034        return 0;
1035    }
1036
1037     return 1;
1038}
1039
1040
1041
1042=head2 no_throw_on_crash()
1043
1044 Title   : no_throw_on_crash
1045 Usage   :
1046 Function: prevent throw on execution error
1047 Returns :
1048 Args    : [optional] boolean
1049
1050=cut
1051
1052sub no_throw_on_crash {
1053    my $self = shift;
1054    return $self->{'_no_throw'} = shift if @_;
1055    return $self->{'_no_throw'};
1056}
1057
1058=head2 last_execution()
1059
1060 Title   : last_execution
1061 Usage   :
1062 Function: return the last executed command with options
1063 Returns : string of command line sent to IPC::Run
1064 Args    :
1065
1066=cut
1067
1068sub last_execution {
1069    my $self = shift;
1070    return $self->{'_last_execution'};
1071}
1072
1073=head2 _dash_switch()
1074
1075 Title   : _dash_switch
1076 Usage   : $version = $fac->_dash_switch( $switch )
1077 Function: Returns an appropriately dashed switch for the executable
1078 Args    : A string containing a switch without dashes
1079 Returns : string containing an appropriately dashed switch for the current executable
1080
1081=cut
1082
1083sub _dash_switch {
1084    my ($self, $switch) = @_;
1085
1086    my $dash = $self->{'_options'}->{'_dash'};
1087    for ($dash) {
1088        $_ eq '1' && do {
1089            $switch = '-'.$switch;
1090            last;
1091        };
1092        /^s/ && do { #single dash only
1093            $switch = '-'.$switch;
1094            last;
1095        };
1096        /^d/ && do { # double dash only
1097            $switch = '--'.$switch;
1098            last;
1099        };
1100        /^m/ && do { # mixed dash: one-letter opts get -,
1101            $switch = '-'.$switch;
1102            $switch =~ s/^(-[a-z0-9](?:\w+))$/-$1/i;
1103            last;
1104        };
1105        do {
1106            $self->warn( "Dash spec '$dash' not recognized; using 'single'" );
1107            $switch = '-'.$switch;
1108        };
1109    }
1110
1111    return $switch;
1112}
1113
1114=head2 stdout()
1115
1116 Title   : stdout
1117 Usage   : $fac->stdout()
1118 Function: store the output from STDOUT for the run,
1119           if no file specified in _run arguments
1120 Example :
1121 Returns : scalar string
1122 Args    : on set, new value (a scalar or undef, optional)
1123
1124=cut
1125
1126sub stdout {
1127    my $self = shift;
1128    return $self->{'stdout'} = shift if @_;
1129    return $self->{'stdout'};
1130}
1131
1132=head2 stderr()
1133
1134 Title   : stderr
1135 Usage   : $fac->stderr()
1136 Function: store the output from STDERR for the run,
1137           if no file is specified in _run arguments
1138 Example :
1139 Returns : scalar string
1140 Args    : on set, new value (a scalar or undef, optional)
1141
1142=cut
1143
1144sub stderr {
1145    my $self = shift;
1146    return $self->{'stderr'} = shift if @_;
1147    return $self->{'stderr'};
1148}
1149
1150=head2 is_pseudo()
1151
1152 Title   : is_pseudo
1153 Usage   : $obj->is_pseudo($newval)
1154 Function: returns true if this factory represents
1155           a pseudo-program
1156 Example :
1157 Returns : value of is_pseudo (boolean)
1158 Args    : on set, new value (a scalar or undef, optional)
1159
1160=cut
1161
1162sub is_pseudo {
1163    my $self = shift;
1164
1165    return $self->{'is_pseudo'} = shift if @_;
1166    return $self->{'is_pseudo'};
1167}
1168
1169=head2 AUTOLOAD
1170
1171AUTOLOAD permits
1172
1173 $class->new_yourcommand(@args);
1174
1175as an alias for
1176
1177 $class->new( -command => 'yourcommand', @args );
1178
1179=cut
1180
1181sub AUTOLOAD {
1182    my $class = shift;
1183    my $tok = $AUTOLOAD;
1184    my @args = @_;
1185    $tok =~ s/.*:://;
1186    unless ($tok =~ /^new_/) {
1187    $class->throw("Can't locate object method '$tok' via package '".ref($class)?ref($class):$class);
1188    }
1189    my ($cmd) = $tok =~ m/new_(.*)/;
1190    return $class->new( -command => $cmd, @args );
1191}
1192
1193=head1 Bio:ParameterBaseI compliance
1194
1195=head2 set_parameters()
1196
1197 Title   : set_parameters
1198 Usage   : $pobj->set_parameters(%params);
1199 Function: sets the parameters listed in the hash or array
1200 Returns : true on success
1201 Args    : [optional] hash or array of parameter/values.
1202
1203=cut
1204
1205sub set_parameters {
1206    my ($self, @args) = @_;
1207
1208    # currently stored stuff
1209    my $opts = $self->{'_options'};
1210    my $params = $opts->{'_params'};
1211    my $switches = $opts->{'_switches'};
1212    my $translation = $opts->{'_translation'};
1213    my $use_dash = $opts->{'_dash'};
1214    my $join = $opts->{'_join'};
1215    unless (($self->can('command') && $self->command)
1216        || (grep /command/, @args)) {
1217    push @args, '-command', 'run';
1218    }
1219    my %args = @args;
1220    my $cmd = $args{'-command'} || $args{'command'} || ($self->can('command') && $self->command);
1221    if ($cmd) {
1222    my (@p,@s, %x);
1223    $self->warn('Command present, but no commands registered') unless $self->{'_options'}->{'_commands'};
1224    $self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}};
1225    $cmd = $self->{_options}->{_prefixes}->{$cmd} || $cmd;
1226
1227    @p = (grep(!/^.*?\|/, @$params), grep(/^${cmd}\|/, @$params));
1228    @s = (grep(!/^.*?\|/, @$switches), grep(/^${cmd}\|/, @$switches));
1229    s/.*?\|// for @p;
1230    s/.*?\|// for @s;
1231    @x{@p, @s} = @{$translation}{
1232        grep( !/^.*?\|/, @$params, @$switches),
1233        grep(/^${cmd}\|/, @$params, @$switches) };
1234    $opts->{_translation} = $translation = \%x;
1235    $opts->{_params} = $params = \@p;
1236    $opts->{_switches} = $switches = \@s;
1237    }
1238    $self->_set_from_args(
1239    \@args,
1240    -methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ],
1241    -create =>  1,
1242    # when our parms are accessed, signal parameters are unchanged for
1243    # future reads (until set_parameters is called)
1244    -code =>
1245    ' my $self = shift;
1246          $self->parameters_changed(0);
1247          return $self->{\'_\'.$method} = shift if @_;
1248          return $self->{\'_\'.$method};'
1249    );
1250    # the question is, are previously-set parameters left alone when
1251    # not specified in @args?
1252    $self->parameters_changed(1);
1253    return 1;
1254}
1255
1256=head2 reset_parameters()
1257
1258 Title   : reset_parameters
1259 Usage   : resets values
1260 Function: resets parameters to either undef or value in passed hash
1261 Returns : none
1262 Args    : [optional] hash of parameter-value pairs
1263
1264=cut
1265
1266sub reset_parameters {
1267    my ($self, @args) = @_;
1268
1269    my @reset_args;
1270    # currently stored stuff
1271    my $opts = $self->{'_options'};
1272    my $params = $opts->{'_params'};
1273    my $switches = $opts->{'_switches'};
1274    my $translation = $opts->{'_translation'};
1275    my $qual_param = $opts->{'_qual_param'};
1276    my $use_dash = $opts->{'_dash'};
1277    my $join = $opts->{'_join'};
1278
1279    # handle command name
1280    my %args = @args;
1281    my $cmd = $args{'-command'} || $args{'command'} || $self->command;
1282    $args{'command'} = $cmd;
1283    delete $args{'-command'};
1284    @args = %args;
1285    # don't like this, b/c _set_program_args will create a bunch of
1286    # accessors with undef values, but oh well for now /maj
1287
1288    for my $p (@$params) {
1289    push(@reset_args, $p => undef) unless grep /^[-]?$p$/, @args;
1290    }
1291    for my $s (@$switches) {
1292    push(@reset_args, $s => undef) unless grep /^[-]?$s$/, @args;
1293    }
1294    push @args, @reset_args;
1295    $self->set_parameters(@args);
1296    $self->parameters_changed(1);
1297}
1298
1299=head2 parameters_changed()
1300
1301 Title   : parameters_changed
1302 Usage   : if ($pobj->parameters_changed) {...}
1303 Function: Returns boolean true (1) if parameters have changed
1304 Returns : Boolean (0 or 1)
1305 Args    : [optional] Boolean
1306
1307=cut
1308
1309sub parameters_changed {
1310    my $self = shift;
1311    return $self->{'_parameters_changed'} = shift if @_;
1312    return $self->{'_parameters_changed'};
1313}
1314
1315=head2 available_parameters()
1316
1317 Title   : available_parameters
1318 Usage   : @params = $pobj->available_parameters()
1319 Function: Returns a list of the available parameters
1320 Returns : Array of parameters
1321 Args    : 'params' for settable program parameters
1322           'switches' for boolean program switches
1323           default: all
1324
1325=cut
1326
1327sub available_parameters {
1328    my $self = shift;
1329    my $subset = shift;
1330    my $opts = $self->{'_options'};
1331    my @ret;
1332    for ($subset) {
1333    (!defined || /^a/) && do {
1334        @ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}});
1335        last;
1336    };
1337    m/^p/i && do {
1338        @ret = @{$opts->{'_params'}};
1339        last;
1340    };
1341    m/^s/i && do {
1342        @ret = @{$opts->{'_switches'}};
1343        last;
1344    };
1345    m/^c/i && do {
1346        @ret = @{$opts->{'_commands'}};
1347        last;
1348    };
1349    m/^f/i && do { # get file spec
1350        return @{$opts->{'_files'}->{$self->command}};
1351    };
1352    do { #fail
1353        $self->throw("available_parameters: unrecognized subset");
1354    };
1355    }
1356    return @ret;
1357}
1358
1359sub available_commands { shift->available_parameters('commands') }
1360sub filespec { shift->available_parameters('filespec') }
1361
1362=head2 get_parameters()
1363
1364 Title   : get_parameters
1365 Usage   : %params = $pobj->get_parameters;
1366 Function: Returns list of key-value pairs of parameter => value
1367 Returns : List of key-value pairs
1368 Args    : [optional] A string is allowed if subsets are wanted or (if a
1369           parameter subset is default) 'all' to return all parameters
1370
1371=cut
1372
1373sub get_parameters {
1374    my $self = shift;
1375    my $subset = shift;
1376    $subset ||= 'all';
1377    my @ret;
1378    my $opts = $self->{'_options'};
1379    for ($subset) {
1380    m/^p/i && do { #params only
1381        for (@{$opts->{'_params'}}) {
1382        push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
1383        }
1384        last;
1385    };
1386    m/^s/i && do { #switches only
1387        for (@{$opts->{'_switches'}}) {
1388        push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
1389        }
1390        last;
1391    };
1392    m/^a/i && do { # all
1393        for ((@{$opts->{'_params'}},@{$opts->{'_switches'}})) {
1394        push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
1395        }
1396        last;
1397    };
1398    do {
1399        $self->throw("get_parameters: unrecognized subset");
1400    };
1401    }
1402    return @ret;
1403}
1404
14051;
1406