1# 2# BioPerl module for Bio::Tools::Run::WrapperBase::CommandExts 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Mark A. Jensen <maj -at- fortinbras -dot- us> 7# 8# Copyright Mark A. Jensen 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Tools::Run::WrapperBase::CommandExts - Extensions to WrapperBase for handling programs with commands *ALPHA* 17 18=head1 SYNOPSIS 19 20Devs, see L</DEVELOPER INTERFACE>. 21Users, see L</USER INTERFACE>. 22 23=head1 DESCRIPTION 24 25This is a developer-focused experimental module. The main idea is to 26extend L<Bio::Tools::Run::WrapperBase> to make it relatively easy to 27create run wrappers around I<suites> of related programs, like 28C<samtools> or C<blast+>. 29 30Some definitions: 31 32=over 33 34=item * program 35 36The program is the command-line frontend application. C<samtools>, for example, is run from the command line as follows: 37 38 $ samtools view -bS in.bam > out.sam 39 $ samtools faidx 40 41=item * command 42 43The command is the specific component of a suite run by executing the 44program. In the example above, C<view> and C<faidx> are commands. 45 46=item * command prefix 47 48The command prefix is an abbreviation of the command name used 49internally by C<CommandExts> method, and sometimes by the user of the 50factory for specifying command line parameters to subcommands of 51composite commands. 52 53=item * composite command 54 55A composite command is a pipeline or script representing a series of 56separate executions of different commands. Composite commands can be 57specified by configuring C<CommandExts> appropriately; the composite 58command can be run by the user from a factory in the same way as 59ordinary commands. 60 61=item * options, parameters, switches and filespecs 62 63An option is any command-line option; i.e., a specification set off by 64a command-line by a specifier (like C<-v> or C<--outfile>). Parameters 65are command-line options that accept a value (C<-title mydb>); 66switches are boolean flags (C<--no-filter>). Filespecs are barewords 67at the end of the command line that usually indicate input or output 68files. In this module, this includes files that capture STDIN, STDOUT, 69or STDERR via redirection. 70 71=item * pseudo-program 72 73A "pseudo-program" is a way to refer to a collection of related 74applications that are run independently from the command line, rather 75than via a frontend program. The C<blast+> suite of programs is an 76example: C<blastn>, C<makeblastdb>, etc. C<CommandExts> can be 77configured to create a single factory for a suite of related, 78independent programs that treats each independent program as a 79"pseudo-program" command. 80 81=back 82 83This module essentially adds the non-assembler-specific wrapper 84machinery of fangly's L<Bio::Tools::Run::AssemblerBase> to the 85L<Bio::Tools::Run::WrapperBase> namespace, adding the general 86command-handling capability of L<Bio::Tools::Run::BWA>. It creates run 87factories that are automatically Bio::ParameterBaseI compliant, 88meaning that C<available_parameters()>, C<set_parameters()>, 89C<get_parameters>, C<reset_parameters()>, and C<parameters_changed()> 90are available. 91 92=head1 DEVELOPER INTERFACE 93 94C<CommandExts> is currently set up to read particular package globals 95which define the program, the commands available, command-line options 96for those commands, and human-readable aliases for those options. 97 98The easiest way to use C<CommandExts> is probably to create two modules: 99 100 Bio::Tools::Run::YourRunPkg 101 Bio::Tools::Run::YourRunPkg::Config 102 103The package globals should be defined in the C<Config> module, and the 104run package itself should begin with the following mantra: 105 106 use YourRunPkg::Config; 107 use Bio::Tools::Run::WrapperBase; 108 use Bio::Tools::Run::WrapperBase::CommandExts; 109 sub new { 110 my $class = shift; 111 my @args = @_; 112 my $self = $class->SUPER::new(@args); 113 ... 114 return $self; 115 } 116 117The following globals can/should be defined in the C<Config> module: 118 119 $program_name 120 $program_dir 121 $use_dash 122 $join 123 @program_commands 124 %command_prefixes 125 @program_params 126 @program_switches 127 %param_translation 128 %composite_commands 129 %command_files 130 131See L</Config Globals> for detailed descriptions. 132 133The work of creating a run wrapper with C<CommandExts> lies mainly in 134setting up the globals. The key methods for the developer interface are: 135 136=over 137 138=item * program_dir($path_to_programs) 139 140Set this to point the factory to the executables. 141 142=item * _run(@file_args) 143 144Runs an instantiated factory with the given file args. Use in the 145 C<run()> method override. 146 147=item * _create_factory_set() 148 149Returns a hash of instantiated factories for each true command from a 150composite command factory. The hash keys are the true command names, so 151you could do 152 153 $cmds = $composite_fac->_create_factory_set; 154 for (@true_commands) { 155 $cmds->{$_}->_run(@file_args); 156 } 157 158=item * executables($cmd,[$fullpath]) 159 160For pseudo-programs, this gets/sets the full path to the executable of 161the true program corresponding to the command C<$cmd>. 162 163=back 164 165=head2 Implementing Composite Commands 166 167=head2 Implementing Pseudo-programs 168 169To indicate that a package wraps disparate programs under a single pseudo program, use an asterisk before the program name: 170 171 package Bio::Tools::Run::YourPkg::Config; 172 ... 173 our $program_name = '*blast+'; 174 175and C<_run> will know what to do. Specify the rest of the globals as 176if the desired programs were commands. Use the basename of the 177programs for the command names. 178 179If all the programs can be found in a single directory, just specify 180that directory in C<program_dir()>. If not, use C<executables()> to set the paths to each program explicitly: 181 182 foreach (keys %cmdpaths) { 183 $self->executables($_, $cmdpaths{$_}); 184 } 185 186=head2 Config Globals 187 188Here is an example config file. Further details in prose are below. 189 190 package Dummy::Config; 191 use strict; 192 use warnings; 193 no warnings qw(qw); 194 use Exporter; 195 our (@ISA, @EXPORT, @EXPORT_OK); 196 push @ISA, 'Exporter'; 197 @EXPORT = qw( 198 $program_name 199 $program_dir 200 $use_dash 201 $join 202 @program_commands 203 %command_prefixes 204 @program_params 205 @program_switches 206 %param_translation 207 %command_files 208 %composite_commands 209 ); 210 211 our $program_name = '*flurb'; 212 our $program_dir = 'C:\cygwin\usr\local\bin'; 213 our $use_dash = 'mixed'; 214 our $join = ' '; 215 216 our @program_commands = qw( 217 rpsblast 218 find 219 goob 220 blorb 221 multiglob 222 ); 223 224 our %command_prefixes = ( 225 blastp => 'blp', 226 tblastn => 'tbn', 227 goob => 'g', 228 blorb => 'b', 229 multiglob => 'm' 230 ); 231 232 our @program_params = qw( 233 command 234 g|narf 235 g|schlurb 236 b|scroob 237 b|frelb 238 m|trud 239 ); 240 241 our @program_switches = qw( 242 g|freen 243 b|klep 244 ); 245 246 our %param_translation = ( 247 'g|narf' => 'n', 248 'g|schlurb' => 'schlurb', 249 'g|freen' => 'f', 250 'b|scroob' => 's', 251 'b|frelb' => 'frelb' 252 ); 253 254 our %command_files = ( 255 'goob' => [qw( fas faq )], 256 ); 257 258 our %composite_commands = ( 259 'multiglob' => [qw( blorb goob )] 260 ); 261 1; 262 263C<$use_dash> can be one of C<single>, C<double>, or C<mixed>. See L<Bio::Tools::Run::WrapperBase>. 264 265There is a syntax for the C<%command_files> specification. The token 266matching C<[a-zA-Z0-9_]+> in each element of each arrayref becomes the 267named filespec parameter for the C<_run()> method in the wrapper 268class. Additional symbols surrounding this token indicate how this 269argument should be handled. Some examples: 270 271 >out : stdout is redirected into the file 272 specified by (..., -out => $file,... ) 273 <in : stdin is accepted from the file 274 specified by (..., -in => $file,... ) 275 2>log : stderr is redirected into the file 276 specified by (..., -log => $file,... ) 277 #opt : this filespec argument is optional 278 (no throw if -opt => $option is missing) 279 2>#log: if -log is not specified in the arguments, the stderr() 280 method will capture stderr 281 *lst : this filespec can take multiple arguments, 282 specify using an arrayref (..., -lst => [$file1, $file2], ...) 283 *#lst : an optional list 284 285The tokens above are examples; they can be anything matching the above regexp. 286 287=head1 USER INTERFACE 288 289Using a wrapper created with C<Bio::Tools::Run::WrapperBase::CommandExts>: 290 291=over 292 293=item * Getting a list of available commands, parameters, and filespecs: 294 295To get a list of commands, simply: 296 297 @commands = Bio::Tools::Run::ThePkg->available_commands; 298 299The wrapper will generally have human-readable aliases for each of the 300command-line options for the wrapped program and commands. To obtain a 301list of the parameters and switches available for a particular 302command, do 303 304 $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb' ); 305 @params = $factory->available_parameters('params'); 306 @switches = $factory->available_parameters('switches'); 307 @filespec = $factory->available_parameters('filespec'); 308 @filespec = $factory->filespec; # alias 309 310=item * Create factories 311 312The factory is a handle on the program and command you wish to 313run. Create a factory using C<new> to set command-line parameters: 314 315 $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb', 316 -freen => 1, 317 -furschlugginer => 'vreeble' ); 318 319A shorthand for this is: 320 321 $factory = Bio::Tools::Run::ThePkg->new_glurb( 322 -freen => 1, 323 -furschlugginer => 'vreeble' ); 324 325=item * Running programs 326 327To run the program, use the C<run> method, providing filespecs as arguments 328 329 $factory = Bio::Tools::Run::ThePkg->new_assemble( -min_qual => 63 ); 330 $factory->run( -faq1 => 'read1.fq', -faq2 => 'read2.fq', 331 -ref => 'refseq.fas', -out => 'new.sam' ); 332 # do another 333 $factory->run( -faq1 => 'read-old1.fq', -faq2 => 'read-old2.fq', 334 -ref => 'refseq.fas', -out => 'old.sam' ); 335 336Messages on STDOUT and STDERR are dumped into their respective attributes: 337 338 $stdout = $factory->stdout; 339 $stderr = $factory->stderr; 340 341unless STDOUT and/or STDERR are part of the named files in the filespec. 342 343=item * Setting/getting/resetting/polling parameters. 344 345A C<CommandExts>-based factory is always L<Bio::ParameterBaseI> 346compliant. That means that you may set, get, and reset parameters 347using C<set_parameters()>, C<get_parameters()>, and 348C<reset_parameters>. You can ask whether parameters have changed since 349they were last accessed by using the predicate 350C<parameters_changed>. See L<Bio::ParameterBaseI> for more details. 351 352Once set, parameters become attributes of the factory. Thus, you can get their values as follows: 353 354 if ($factory->freen) { 355 $furs = $factory->furshlugginer; 356 #... 357 } 358 359=back 360 361=head1 FEEDBACK 362 363=head2 Mailing Lists 364 365User feedback is an integral part of the evolution of this and other 366Bioperl modules. Send your comments and suggestions preferably to 367the Bioperl mailing list. Your participation is much appreciated. 368 369 bioperl-l@bioperl.org - General discussion 370http://bioperl.org/wiki/Mailing_lists - About the mailing lists 371 372=head2 Support 373 374Please direct usage questions or support issues to the mailing list: 375 376L<bioperl-l@bioperl.org> 377 378rather than to the module maintainer directly. Many experienced and 379reponsive experts will be able look at the problem and quickly 380address it. Please include a thorough description of the problem 381with code and data examples if at all possible. 382 383=head2 Reporting Bugs 384 385Report bugs to the Bioperl bug tracking system to help us keep track 386of the bugs and their resolution. Bug reports can be submitted via 387the web: 388 389 https://github.com/bioperl/bioperl-live/issues 390 391=head1 AUTHOR - Mark A. Jensen 392 393Email maj -at- fortinbras -dot- us 394 395Describe contact details here 396 397=head1 CONTRIBUTORS 398 399Dan Kortschak ( dan -dot- kortschak -at- adelaide -dot- edu -dot- au ) 400 401=head1 APPENDIX 402 403The rest of the documentation details each of the object methods. 404Internal methods are usually preceded with a _ 405 406=cut 407 408# Let the code begin... 409 410package Bio::Tools::Run::WrapperBase; # need these methods in WrapperBase/maj 411$Bio::Tools::Run::WrapperBase::VERSION = '1.7.7'; 412use strict; 413use warnings; 414no warnings qw(redefine); 415 416use Bio::Root::Root; 417use File::Spec; 418use IPC::Run; 419use base qw(Bio::Root::Root Bio::ParameterBaseI); 420 421our $AUTOLOAD; 422 423=head2 new() 424 425 Title : new 426 Usage : 427 Function: constructor for WrapperBase::CommandExts ; 428 correctly binds configuration variables 429 to the WrapperBase object 430 Returns : Bio::Tools::Run::WrapperBase object with command extensions 431 Args : 432 Note : this method subsumes the old _register_program_commands and 433 _set_program_options, leaving out the assembler-specific 434 parms ($qual_param and out_type()) 435 436=cut 437 438sub new { 439 my ($class, @args) = @_; 440 my $self = bless ({}, $class); 441 # pull in *copies* of the Config variables from the caller namespace: 442 my ($pkg, @goob) = caller(); 443 my ($commands, 444 $prefixes, 445 $params, 446 $switches, 447 $translation, 448 $use_dash, 449 $join, 450 $name, 451 $dir, 452 $composite_commands, 453 $files); 454 for (qw( @program_commands 455 %command_prefixes 456 @program_params 457 @program_switches 458 %param_translation 459 $use_dash 460 $join 461 $program_name 462 $program_dir 463 %composite_commands 464 %command_files ) ) { 465 my ($sigil, $var) = m/(.)(.*)/; 466 my $qualvar = "${sigil}${pkg}::${var}"; 467 for ($sigil) { 468 /\@/ && do { $qualvar = "\[$qualvar\]" }; 469 /\%/ && do { $qualvar = "\{$qualvar\}" }; 470 } 471 my $locvar = "\$${var}"; 472 $locvar =~ s/program_|command_|param_//g; 473 eval "$locvar = $qualvar"; 474 } 475 # set up the info registry hash 476 my %registry; 477 if ($composite_commands) { 478 $self->_register_composite_commands($composite_commands, 479 $params, 480 $switches, 481 $prefixes); 482 } 483 @registry{qw( _commands _prefixes _files 484 _params _switches _translation 485 _composite_commands )} = 486 ($commands, $prefixes, $files, 487 $params, $switches, $translation, 488 $composite_commands); 489 $self->{_options} = \%registry; 490 if (not defined $use_dash) { 491 $self->{'_options'}->{'_dash'} = 1; 492 } else { 493 $self->{'_options'}->{'_dash'} = $use_dash; 494 } 495 if (not defined $join) { 496 $self->{'_options'}->{'_join'} = ' '; 497 } else { 498 $self->{'_options'}->{'_join'} = $join; 499 } 500 if ($name =~ /^\*/) { 501 $self->is_pseudo(1); 502 $name =~ s/^\*//; 503 } 504 $self->program_name($name) if not defined $self->program_name(); 505 $self->program_dir($dir) if not defined $self->program_dir(); 506 $self->set_parameters(@args); 507 $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI 508 return $self; 509} 510 511=head2 program_name 512 513 Title : program_name 514 Usage : $factory->program_name($name) 515 Function: get/set the executable name 516 Returns: string 517 Args : string 518 519=cut 520 521sub program_name { 522 my ($self, $val) = @_; 523 $self->{'_program_name'} = $val if $val; 524 return $self->{'_program_name'}; 525} 526 527=head2 program_dir 528 529 Title : program_dir 530 Usage : $factory->program_dir($dir) 531 Function: get/set the program dir 532 Returns: string 533 Args : string 534 535=cut 536 537sub program_dir { 538 my ($self, $val) = @_; 539 $self->{'_program_dir'} = $val if $val; 540 return $self->{'_program_dir'}; 541} 542 543=head2 _register_program_commands() 544 545 Title : _register_program_commands 546 Usage : $factory->_register_program_commands( \@commands, \%prefixes ) 547 Function: Register the commands a program accepts (for programs that act 548 as frontends for a set of commands, each command having its own 549 set of params/switches) 550 Returns : true on success 551 Args : arrayref to a list of commands (scalar strings), 552 hashref to a translation table of the form 553 { $prefix1 => $command1, ... } [optional] 554 Note : To implement a program with this kind of calling structure, 555 include a parameter called 'command' in the 556 @program_params global 557 Note : The translation table is used to associate parameters and 558 switches specified in _set_program_options with the correct 559 program command. In the globals @program_params and 560 @program_switches, specify elements as 'prefix1|param' and 561 'prefix1|switch', etc. 562 563=cut 564 565=head2 _set_program_options 566 567 Title : _set_program_options 568 Usage : $factory->_set_program_options( \@ args ); 569 Function: Register the parameters and flags that an assembler takes. 570 Returns : 1 for success 571 Args : - arguments passed by the user 572 - parameters that the program accepts, optional (default: none) 573 - switches that the program accepts, optional (default: none) 574 - parameter translation, optional (default: no translation occurs) 575 - dash option for the program parameters, [1|single|double|mixed], 576 optional (default: yes, use single dashes only) 577 - join, optional (default: ' ') 578 579=cut 580 581=head2 _translate_params 582 583 Title : _translate_params 584 Usage : @options = @{$assembler->_translate_params( )}; 585 Function: Translate the Bioperl arguments into the arguments to pass to the 586 program on the command line 587 Returns : Arrayref of arguments 588 Args : none 589 590=cut 591 592sub _translate_params { 593 my ($self) = @_; 594 # Get option string 595 my ($params, $switches, $join, $dash, $translat) = 596 @{$self->{_options}}{qw(_params _switches _join _dash _translation)}; 597 598 # access the multiple dash choices of _setparams... 599 my @dash_args; 600 $dash ||= 1; # default as advertised 601 for ($dash) { 602 $_ eq '1' && do { 603 @dash_args = ( -dash => 1 ); 604 last; 605 }; 606 /^s/ && do { #single dash only 607 @dash_args = ( -dash => 1); 608 last; 609 }; 610 /^d/ && do { # double dash only 611 @dash_args = ( -double_dash => 1); 612 last; 613 }; 614 /^m/ && do { # mixed dash: one-letter opts get -, 615 # long opts get -- 616 @dash_args = ( -mixed_dash => 1); 617 last; 618 }; 619 do { 620 $self->warn( "Dash spec '$dash' not recognized; using 'single'" ); 621 @dash_args = ( -dash => 1 ); 622 }; 623 } 624 my $options = $self->_setparams( 625 -params => $params, 626 -switches => $switches, 627 -join => $join, 628 @dash_args 629 ); 630 631 # Translate options 632 # parse more carefully - bioperl-run issue #12 633 $options =~ s/^\s+//; 634 $options =~ s/\s+$//; 635 my @options; 636 my $in_quotes; 637 for (split(/(\s|$join)/, $options)) { 638 if (/^-/) { 639 push @options, $_; 640 } 641 elsif (s/^"//) { 642 $in_quotes=1 unless (s/["']$//); 643 push @options, $_; 644 } 645 elsif (s/"$//) { 646 $options[-1] .= $_; 647 $in_quotes=0; 648 } 649 else { 650 $in_quotes ? $options[-1] .= $_ : 651 push(@options, $_); 652 } 653 } 654 $self->throw("Unmatched quote in option value") if $in_quotes; 655 for (my $i = 0; $i < scalar @options; $i++) { 656 my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ ); 657 if (defined $name) { 658 if ($name =~ /command/i) { 659 $name = $options[$i+2]; # get the command 660 splice @options, $i, 4; 661 $i--; 662 # don't add the command if this is a pseudo-program 663 unshift @options, $name unless ($self->is_pseudo); # put command first 664 } 665 elsif (defined $$translat{$name}) { 666 $options[$i] = $prefix.$$translat{$name}; 667 } 668 } 669 else { 670 splice @options, $i, 1; 671 $i--; 672 } 673 } 674 675 @options = grep (!/^\s*$/,@options); 676 # this is a kludge for mixed options: the reason mixed doesn't 677 # work right on the pass through _setparams is that the 678 # *aliases* and not the actual params are passed to it. 679 # here we just rejigger the dashes 680 if ($dash =~ /^m/) { 681 s/--([a-z0-9](?:\s|$))/-$1/gi for @options; 682 } 683 # Now arrayify the options 684 685 return \@options; 686} 687 688=head2 executable() 689 690 Title : executable 691 Usage : 692 Function: find the full path to the main executable, 693 or to the command executable for pseudo-programs 694 Returns : full path, if found 695 Args : [optional] explicit path to the executable 696 (will set the appropriate command exec if 697 applicable) 698 [optional] boolean flag whether or not to warn when exe no found 699 Note : overrides WrapperBase.pm 700 701=cut 702 703sub executable { 704 my $self = shift; 705 my ($exe, $warn) = @_; 706 if ($self->is_pseudo) { 707 return $self->{_pathtoexe} = $self->executables($self->command,$exe); 708 } 709 710 # otherwise 711 # setter 712 if (defined $exe) { 713 $self->throw("binary '$exe' does not exist") unless -e $exe; 714 $self->throw("'$exe' is not executable") unless -x $exe; 715 return $self->{_pathtoexe} = $exe; 716 } 717 718 # getter 719 return $self->{_pathtoexe} if defined $self->{_pathstoexe}; 720 721 # finder 722 return $self->{_pathtoexe} = $self->_find_executable($exe, $warn); 723} 724 725=head2 executables() 726 727 Title : executables 728 Usage : 729 Function: find the full path to a command's executable 730 Returns : full path (scalar string) 731 Args : command (scalar string), 732 [optional] explicit path to this command exe 733 [optional] boolean flag whether or not to warn when exe no found 734 735=cut 736 737sub executables { 738 my $self = shift; 739 my ($cmd, $exe, $warn) = @_; 740 # for now, barf if this is not a pseudo program 741 $self->throw("This wrapper represents a single program with commands, not multiple programs; can't use executables()") unless $self->is_pseudo; 742 $self->throw("Command name required at arg 1") unless defined $cmd; 743 $self->throw("The desired executable '$cmd' is not registered as a command") unless grep /^$cmd$/, @{$self->{_options}->{_commands}}; 744 745 # setter 746 if (defined $exe) { 747 $self->throw("binary '$exe' does not exist") unless -e $exe; 748 $self->throw("'$exe' is not executable") unless -x $exe; 749 $self->{_pathstoexe} = {} unless defined $self->{_pathstoexe}; 750 return $self->{_pathstoexe}->{$cmd} = $exe; 751 } 752 753 # getter 754 return $self->{_pathstoexe}->{$cmd} if defined $self->{_pathstoexe}->{$cmd}; 755 756 $exe ||= $cmd; 757 # finder 758 return $self->{_pathstoexe}->{$cmd} = $self->_find_executable($exe, $warn); 759} 760 761=head2 _find_executable() 762 763 Title : _find_executable 764 Usage : my $exe_path = $fac->_find_executable($exe, $warn); 765 Function: find the full path to a named executable, 766 Returns : full path, if found 767 Args : name of executable to find 768 [optional] boolean flag whether or not to warn when exe no found 769 Note : differs from executable and executables in not 770 setting any object attributes 771 772=cut 773 774sub _find_executable { 775 my $self = shift; 776 my ($exe, $warn) = @_; 777 778 if ($self->is_pseudo && !$exe) { 779 if (!$self->command) { 780 # this throw probably appropriate 781 # the rest are now warns if $warn.../maj 782 $self->throw( 783 "The ".__PACKAGE__." wrapper represents several different programs;". 784 "arg1 to _find_executable must be specified explicitly,". 785 "or the command() attribute set"); 786 } 787 else { 788 $exe = $self->command; 789 } 790 } 791 $exe ||= $self->program_path; 792 793 my $path; 794 if ($self->program_dir) { 795 $path = File::Spec->catfile($self->program_dir, $exe); 796 } else { 797 $path = $exe; 798 $self->warn('Program directory not specified; use program_dir($path).') if $warn; 799 } 800 801 # use provided info - we are allowed to follow symlinks, but refuse directories 802 map { return $path.$_ if ( -x $path.$_ && !(-d $path.$_) ) } ('', '.exe') if defined $path; 803 804 # couldn't get path to executable from provided info, so use system path 805 $path = $path ? " in $path" : undef; 806 $self->warn("Executable $exe not found$path, trying system path...") if $warn; 807 if ($path = $self->io->exists_exe($exe)) { 808 return $path; 809 } else { 810 $self->warn("Cannot find executable for program '".($self->is_pseudo ? $self->command : $self->program_name)."'") if $warn; 811 return; 812 } 813} 814 815=head2 _register_composite_commands() 816 817 Title : _register_composite_commands 818 Usage : 819 Function: adds subcomand params and switches for composite commands 820 Returns : true on success 821 Args : \%composite_commands, 822 \@program_params, 823 \@program_switches 824 825=cut 826 827sub _register_composite_commands { 828 my $self = shift; 829 my ($composite_commands, $program_params, 830 $program_switches, $command_prefixes) = @_; 831 my @sub_params; 832 my @sub_switches; 833 foreach my $cmd (keys %$composite_commands) { 834 my $pfx = $command_prefixes->{$cmd} || $cmd; 835 foreach my $subcmd ( @{$$composite_commands{$cmd}} ) { 836 my $spfx = $command_prefixes->{$subcmd} || $subcmd; 837 my @sub_program_params = grep /^$spfx\|/, @$program_params; 838 my @sub_program_switches = grep /^$spfx\|/, @$program_switches; 839 for (@sub_program_params) { 840 m/^$spfx\|(.*)/; 841 push @sub_params, "$pfx\|${spfx}_".$1; 842 } 843 for (@sub_program_switches) { 844 m/^$spfx\|(.*)/; 845 push @sub_switches, "$pfx\|${spfx}_".$1; 846 } 847 } 848 } 849 push @$program_params, @sub_params; 850 push @$program_switches, @sub_switches; 851 # translations for subcmd params/switches not necessary 852 return 1; 853} 854 855=head2 _create_factory_set() 856 857 Title : _create_factory_set 858 Usage : @facs = $self->_create_factory_set 859 Function: instantiate a set of individual command factories for 860 a given composite command 861 Factories will have the correct parameter fields set for 862 their own subcommand 863 Returns : hash of factories: ( $subcmd_prefix => $subcmd_factory, ... ) 864 Args : none 865 866=cut 867 868sub _create_factory_set { 869 my $self = shift; 870 $self->throw('command not set') unless $self->command; 871 my $cmd = $self->command; 872 $self->throw('_create_factory_set only works on composite commands') 873 unless grep /^$cmd$/, keys %{$self->{_options}->{_composite_commands}}; 874 my %ret; 875 my $class = ref $self; 876 my $subargs_hash = $self->_collate_subcmd_args($cmd); 877 for (keys %$subargs_hash) { 878 $ret{$_} = $class->new( -command => $_, @{$$subargs_hash{$_}} ); 879 } 880 return %ret; 881} 882 883=head2 _collate_subcmd_args() 884 885 Title : _collate_subcmd_args 886 Usage : $args_hash = $self->_collate_subcmd_args 887 Function: collate parameters and switches into command-specific 888 arg lists for passing to new() 889 Returns : hash of named argument lists 890 Args : [optional] composite cmd prefix (scalar string) 891 [default is 'run'] 892 893=cut 894 895sub _collate_subcmd_args { 896 my $self = shift; 897 my $cmd = shift; 898 my %ret; 899 # default command is 'run' 900 $cmd ||= 'run'; 901 return unless $self->{'_options'}->{'_composite_commands'}; 902 return unless $self->{'_options'}->{'_composite_commands'}->{$cmd}; 903 my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}}; 904 905 my $cur_options = $self->{'_options'}; 906 # collate 907 foreach my $subcmd (@subcmds) { 908 # find the composite cmd form of the argument in 909 # the current params and switches 910 # e.g., map_max_mismatches 911 my $pfx = $self->{_options}->{_prefixes}->{$subcmd} || $subcmd; 912 my @params = grep /^${pfx}_/, @{$$cur_options{'_params'}}; 913 my @switches = grep /^${pfx}_/, @{$$cur_options{'_switches'}}; 914 $ret{$subcmd} = []; 915 # create an argument list suitable for passing to new() of 916 # the subcommand factory... 917 foreach my $opt (@params, @switches) { 918 my $subopt = $opt; 919 $subopt =~ s/^${pfx}_//; 920 push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt; 921 } 922 } 923 return \%ret; 924} 925 926=head2 _run 927 928 Title : _run 929 Usage : $fac->_run( @file_args ) 930 Function: Run a command as specified during object construction 931 Returns : true on success 932 Args : a specification of the files to operate on according 933 to the filespec 934 935=cut 936 937sub _run { 938 my ($self, @args) = @_; 939 # _translate_params will provide an array of command/parameters/switches 940 # -- these are set at object construction 941 # to set up the run, need to add the files to the call 942 # -- provide these as arguments to this function 943 my $cmd = $self->command if $self->can('command'); 944 my $opts = $self->{_options}; 945 my %args; 946 $self->throw("No command specified for the object") unless $cmd; 947 # setup files necessary for this command 948 my $filespec = $opts->{'_files'}->{$cmd}; 949 my @switches; 950 my ($in, $out, $err); 951 # some applications rely completely on switches 952 if (defined $filespec && @$filespec) { 953 # parse args based on filespec 954 # require named args 955 $self->throw("Named args are required") unless !(@args % 2); 956 s/^-// for @args; 957 %args = @args; 958 # validate 959 my @req = map { 960 my $s = $_; 961 $s =~ s/^-.*\|//; 962 $s =~ s/^[012]?[<>]//; 963 $s =~ s/[^a-zA-Z0-9_]//g; 964 $s 965 } grep !/[#]/, @$filespec; 966 !defined($args{$_}) && $self->throw("Required filearg '$_' not specified") for @req; 967 # set up redirects and file switches 968 for (@$filespec) { 969 m/^1?>#?(.*)/ && do { 970 defined($args{$1}) && ( open $out, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") ); 971 next; 972 }; 973 m/^2>#?(.*)/ && do { 974 defined($args{$1}) && ( open $err, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") ); 975 next; 976 }; 977 m/^<#?(.*)/ && do { 978 defined($args{$1}) && ( open $in, '<', $args{$1} or $self->throw("Could not read file '$args{$1}': $!") ); 979 next; 980 }; 981 if (m/^-(.*)\|/) { 982 push @switches, $self->_dash_switch($1); 983 } else { 984 push @switches, undef; 985 } 986 } 987 } 988 my $dum; 989 $in || ($in = \$dum); 990 $out || ($out = \$self->{'stdout'}); 991 $err || ($err = \$self->{'stderr'}); 992 993 # Get program executable 994 my $exe = $self->executable; 995 $self->throw("Can't find executable for '".($self->is_pseudo ? $self->command : $self->program_name)."'; can't continue") unless $exe; 996 997 # Get command-line options 998 my $options = $self->_translate_params(); 999 # Get file specs sans redirects in correct order 1000 my @specs = map { 1001 my $s = $_; 1002 $s =~ s/^-.*\|//; 1003 $s =~ s/[^a-zA-Z0-9_]//g; 1004 $s 1005 } grep !/[<>]/, @$filespec; 1006 my @files = @args{@specs}; 1007 # expand arrayrefs 1008 my $l = $#files; 1009 1010 # Note: below code block may be brittle, see link on this: 1011 # http://lists.open-bio.org/pipermail/bioperl-l/2010-June/033439.html 1012 1013 for (0..$l) { 1014 if (ref($files[$_]) eq 'ARRAY') { 1015 splice(@switches, $_, 1, ($switches[$_]) x @{$files[$_]}); 1016 splice(@files, $_, 1, @{$files[$_]}); 1017 } 1018 } 1019 1020 @files = map { 1021 my $s = shift @switches; 1022 defined $_ ? ($s, $_): () 1023 } @files; 1024 @files = map { defined $_ ? $_ : () } @files; # squish undefs 1025 my @ipc_args = ( $exe, @$options, @files ); 1026 $self->{_last_execution} = join( $self->{'_options'}->{'_join'}, @ipc_args ); 1027 eval { 1028 IPC::Run::run(\@ipc_args, $in, $out, $err) or 1029 die ("There was a problem running $exe : ".$$err); 1030 }; 1031 1032 if ($@) { 1033 $self->throw("$exe call crashed: $@") unless $self->no_throw_on_crash; 1034 return 0; 1035 } 1036 1037 return 1; 1038} 1039 1040 1041 1042=head2 no_throw_on_crash() 1043 1044 Title : no_throw_on_crash 1045 Usage : 1046 Function: prevent throw on execution error 1047 Returns : 1048 Args : [optional] boolean 1049 1050=cut 1051 1052sub no_throw_on_crash { 1053 my $self = shift; 1054 return $self->{'_no_throw'} = shift if @_; 1055 return $self->{'_no_throw'}; 1056} 1057 1058=head2 last_execution() 1059 1060 Title : last_execution 1061 Usage : 1062 Function: return the last executed command with options 1063 Returns : string of command line sent to IPC::Run 1064 Args : 1065 1066=cut 1067 1068sub last_execution { 1069 my $self = shift; 1070 return $self->{'_last_execution'}; 1071} 1072 1073=head2 _dash_switch() 1074 1075 Title : _dash_switch 1076 Usage : $version = $fac->_dash_switch( $switch ) 1077 Function: Returns an appropriately dashed switch for the executable 1078 Args : A string containing a switch without dashes 1079 Returns : string containing an appropriately dashed switch for the current executable 1080 1081=cut 1082 1083sub _dash_switch { 1084 my ($self, $switch) = @_; 1085 1086 my $dash = $self->{'_options'}->{'_dash'}; 1087 for ($dash) { 1088 $_ eq '1' && do { 1089 $switch = '-'.$switch; 1090 last; 1091 }; 1092 /^s/ && do { #single dash only 1093 $switch = '-'.$switch; 1094 last; 1095 }; 1096 /^d/ && do { # double dash only 1097 $switch = '--'.$switch; 1098 last; 1099 }; 1100 /^m/ && do { # mixed dash: one-letter opts get -, 1101 $switch = '-'.$switch; 1102 $switch =~ s/^(-[a-z0-9](?:\w+))$/-$1/i; 1103 last; 1104 }; 1105 do { 1106 $self->warn( "Dash spec '$dash' not recognized; using 'single'" ); 1107 $switch = '-'.$switch; 1108 }; 1109 } 1110 1111 return $switch; 1112} 1113 1114=head2 stdout() 1115 1116 Title : stdout 1117 Usage : $fac->stdout() 1118 Function: store the output from STDOUT for the run, 1119 if no file specified in _run arguments 1120 Example : 1121 Returns : scalar string 1122 Args : on set, new value (a scalar or undef, optional) 1123 1124=cut 1125 1126sub stdout { 1127 my $self = shift; 1128 return $self->{'stdout'} = shift if @_; 1129 return $self->{'stdout'}; 1130} 1131 1132=head2 stderr() 1133 1134 Title : stderr 1135 Usage : $fac->stderr() 1136 Function: store the output from STDERR for the run, 1137 if no file is specified in _run arguments 1138 Example : 1139 Returns : scalar string 1140 Args : on set, new value (a scalar or undef, optional) 1141 1142=cut 1143 1144sub stderr { 1145 my $self = shift; 1146 return $self->{'stderr'} = shift if @_; 1147 return $self->{'stderr'}; 1148} 1149 1150=head2 is_pseudo() 1151 1152 Title : is_pseudo 1153 Usage : $obj->is_pseudo($newval) 1154 Function: returns true if this factory represents 1155 a pseudo-program 1156 Example : 1157 Returns : value of is_pseudo (boolean) 1158 Args : on set, new value (a scalar or undef, optional) 1159 1160=cut 1161 1162sub is_pseudo { 1163 my $self = shift; 1164 1165 return $self->{'is_pseudo'} = shift if @_; 1166 return $self->{'is_pseudo'}; 1167} 1168 1169=head2 AUTOLOAD 1170 1171AUTOLOAD permits 1172 1173 $class->new_yourcommand(@args); 1174 1175as an alias for 1176 1177 $class->new( -command => 'yourcommand', @args ); 1178 1179=cut 1180 1181sub AUTOLOAD { 1182 my $class = shift; 1183 my $tok = $AUTOLOAD; 1184 my @args = @_; 1185 $tok =~ s/.*:://; 1186 unless ($tok =~ /^new_/) { 1187 $class->throw("Can't locate object method '$tok' via package '".ref($class)?ref($class):$class); 1188 } 1189 my ($cmd) = $tok =~ m/new_(.*)/; 1190 return $class->new( -command => $cmd, @args ); 1191} 1192 1193=head1 Bio:ParameterBaseI compliance 1194 1195=head2 set_parameters() 1196 1197 Title : set_parameters 1198 Usage : $pobj->set_parameters(%params); 1199 Function: sets the parameters listed in the hash or array 1200 Returns : true on success 1201 Args : [optional] hash or array of parameter/values. 1202 1203=cut 1204 1205sub set_parameters { 1206 my ($self, @args) = @_; 1207 1208 # currently stored stuff 1209 my $opts = $self->{'_options'}; 1210 my $params = $opts->{'_params'}; 1211 my $switches = $opts->{'_switches'}; 1212 my $translation = $opts->{'_translation'}; 1213 my $use_dash = $opts->{'_dash'}; 1214 my $join = $opts->{'_join'}; 1215 unless (($self->can('command') && $self->command) 1216 || (grep /command/, @args)) { 1217 push @args, '-command', 'run'; 1218 } 1219 my %args = @args; 1220 my $cmd = $args{'-command'} || $args{'command'} || ($self->can('command') && $self->command); 1221 if ($cmd) { 1222 my (@p,@s, %x); 1223 $self->warn('Command present, but no commands registered') unless $self->{'_options'}->{'_commands'}; 1224 $self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}}; 1225 $cmd = $self->{_options}->{_prefixes}->{$cmd} || $cmd; 1226 1227 @p = (grep(!/^.*?\|/, @$params), grep(/^${cmd}\|/, @$params)); 1228 @s = (grep(!/^.*?\|/, @$switches), grep(/^${cmd}\|/, @$switches)); 1229 s/.*?\|// for @p; 1230 s/.*?\|// for @s; 1231 @x{@p, @s} = @{$translation}{ 1232 grep( !/^.*?\|/, @$params, @$switches), 1233 grep(/^${cmd}\|/, @$params, @$switches) }; 1234 $opts->{_translation} = $translation = \%x; 1235 $opts->{_params} = $params = \@p; 1236 $opts->{_switches} = $switches = \@s; 1237 } 1238 $self->_set_from_args( 1239 \@args, 1240 -methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ], 1241 -create => 1, 1242 # when our parms are accessed, signal parameters are unchanged for 1243 # future reads (until set_parameters is called) 1244 -code => 1245 ' my $self = shift; 1246 $self->parameters_changed(0); 1247 return $self->{\'_\'.$method} = shift if @_; 1248 return $self->{\'_\'.$method};' 1249 ); 1250 # the question is, are previously-set parameters left alone when 1251 # not specified in @args? 1252 $self->parameters_changed(1); 1253 return 1; 1254} 1255 1256=head2 reset_parameters() 1257 1258 Title : reset_parameters 1259 Usage : resets values 1260 Function: resets parameters to either undef or value in passed hash 1261 Returns : none 1262 Args : [optional] hash of parameter-value pairs 1263 1264=cut 1265 1266sub reset_parameters { 1267 my ($self, @args) = @_; 1268 1269 my @reset_args; 1270 # currently stored stuff 1271 my $opts = $self->{'_options'}; 1272 my $params = $opts->{'_params'}; 1273 my $switches = $opts->{'_switches'}; 1274 my $translation = $opts->{'_translation'}; 1275 my $qual_param = $opts->{'_qual_param'}; 1276 my $use_dash = $opts->{'_dash'}; 1277 my $join = $opts->{'_join'}; 1278 1279 # handle command name 1280 my %args = @args; 1281 my $cmd = $args{'-command'} || $args{'command'} || $self->command; 1282 $args{'command'} = $cmd; 1283 delete $args{'-command'}; 1284 @args = %args; 1285 # don't like this, b/c _set_program_args will create a bunch of 1286 # accessors with undef values, but oh well for now /maj 1287 1288 for my $p (@$params) { 1289 push(@reset_args, $p => undef) unless grep /^[-]?$p$/, @args; 1290 } 1291 for my $s (@$switches) { 1292 push(@reset_args, $s => undef) unless grep /^[-]?$s$/, @args; 1293 } 1294 push @args, @reset_args; 1295 $self->set_parameters(@args); 1296 $self->parameters_changed(1); 1297} 1298 1299=head2 parameters_changed() 1300 1301 Title : parameters_changed 1302 Usage : if ($pobj->parameters_changed) {...} 1303 Function: Returns boolean true (1) if parameters have changed 1304 Returns : Boolean (0 or 1) 1305 Args : [optional] Boolean 1306 1307=cut 1308 1309sub parameters_changed { 1310 my $self = shift; 1311 return $self->{'_parameters_changed'} = shift if @_; 1312 return $self->{'_parameters_changed'}; 1313} 1314 1315=head2 available_parameters() 1316 1317 Title : available_parameters 1318 Usage : @params = $pobj->available_parameters() 1319 Function: Returns a list of the available parameters 1320 Returns : Array of parameters 1321 Args : 'params' for settable program parameters 1322 'switches' for boolean program switches 1323 default: all 1324 1325=cut 1326 1327sub available_parameters { 1328 my $self = shift; 1329 my $subset = shift; 1330 my $opts = $self->{'_options'}; 1331 my @ret; 1332 for ($subset) { 1333 (!defined || /^a/) && do { 1334 @ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}}); 1335 last; 1336 }; 1337 m/^p/i && do { 1338 @ret = @{$opts->{'_params'}}; 1339 last; 1340 }; 1341 m/^s/i && do { 1342 @ret = @{$opts->{'_switches'}}; 1343 last; 1344 }; 1345 m/^c/i && do { 1346 @ret = @{$opts->{'_commands'}}; 1347 last; 1348 }; 1349 m/^f/i && do { # get file spec 1350 return @{$opts->{'_files'}->{$self->command}}; 1351 }; 1352 do { #fail 1353 $self->throw("available_parameters: unrecognized subset"); 1354 }; 1355 } 1356 return @ret; 1357} 1358 1359sub available_commands { shift->available_parameters('commands') } 1360sub filespec { shift->available_parameters('filespec') } 1361 1362=head2 get_parameters() 1363 1364 Title : get_parameters 1365 Usage : %params = $pobj->get_parameters; 1366 Function: Returns list of key-value pairs of parameter => value 1367 Returns : List of key-value pairs 1368 Args : [optional] A string is allowed if subsets are wanted or (if a 1369 parameter subset is default) 'all' to return all parameters 1370 1371=cut 1372 1373sub get_parameters { 1374 my $self = shift; 1375 my $subset = shift; 1376 $subset ||= 'all'; 1377 my @ret; 1378 my $opts = $self->{'_options'}; 1379 for ($subset) { 1380 m/^p/i && do { #params only 1381 for (@{$opts->{'_params'}}) { 1382 push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; 1383 } 1384 last; 1385 }; 1386 m/^s/i && do { #switches only 1387 for (@{$opts->{'_switches'}}) { 1388 push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; 1389 } 1390 last; 1391 }; 1392 m/^a/i && do { # all 1393 for ((@{$opts->{'_params'}},@{$opts->{'_switches'}})) { 1394 push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; 1395 } 1396 last; 1397 }; 1398 do { 1399 $self->throw("get_parameters: unrecognized subset"); 1400 }; 1401 } 1402 return @ret; 1403} 1404 14051; 1406