1# 2# BioPerl module for Bio::Tools::Run::Simprot 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Albert Vilella <avilella-at-gmail-dot-com> 7# 8# Copyright Albert Vilella 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Tools::Run::Simprot - Wrapper around the Simprot program. Wrapper for the calculation of a multiple sequence alignment from a phylogenetic tree 17 18=head1 SYNOPSIS 19 20 use Bio::Tools::Run::Simprot; 21 use Bio::TreeIO; 22 23 my $treeio = Bio::TreeIO->new( 24 -format => 'nh', -file => 't/data/tree.nh'); 25 26 my $tree = $treeio->next_tree; 27 28 my $simprot = Bio::Tools::Run::Simprot->new(); 29 $simprot->tree($tree); 30 my ($rc,$aln,$seq) = $simprot->run(); 31 32=head1 DESCRIPTION 33 34This is a wrapper around the Simprot program by Andy Pang, Andrew D 35Smith, Paulo AS Nuin and Elisabeth RM Tillier. 36 37Simprot allows for several models of amino acid substitution (PAM, JTT 38and PMB), allows for gamma distributed sites rates according to Yang's 39model, and implements a parameterised Qian and Goldstein distribution 40model for insertion and deletion. 41 42See http://www.uhnres.utoronto.ca/labs/tillier/software.htm for more 43information. 44 45 46=head2 Helping the module find your executable 47 48You will need to enable SIMPROTDIR to find the simprot program. This can be 49done in (at least) three ways: 50 51 1. Make sure the simprot executable is in your path (i.e. 52 'which simprot' returns a valid program 53 2. define an environmental variable SIMPROTDIR which points to a 54 directory containing the 'simprot' app: 55 In bash 56 export SIMPROTDIR=/home/progs/simprot or 57 In csh/tcsh 58 setenv SIMPROTDIR /home/progs/simprot 59 60 3. include a definition of an environmental variable SIMPROTDIR 61 in every script that will 62 BEGIN {$ENV{SIMPROTDIR} = '/home/progs/simprot'; } 63 use Bio::Tools::Run::Simprot; 64 65=head1 FEEDBACK 66 67=head2 Mailing Lists 68 69User feedback is an integral part of the evolution of this and other 70Bioperl modules. Send your comments and suggestions preferably to one 71of the Bioperl mailing lists. Your participation is much appreciated. 72 73 bioperl-l@bioperl.org - General discussion 74 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 75 76=head2 Support 77 78Please direct usage questions or support issues to the mailing list: 79 80I<bioperl-l@bioperl.org> 81 82rather than to the module maintainer directly. Many experienced and 83reponsive experts will be able look at the problem and quickly 84address it. Please include a thorough description of the problem 85with code and data examples if at all possible. 86 87=head2 Reporting Bugs 88 89Report bugs to the Bioperl bug tracking system to help us keep track 90the bugs and their resolution. Bug reports can be submitted via the web: 91 92 http://redmine.open-bio.org/projects/bioperl/ 93 94=head1 AUTHOR - Albert Vilella 95 96Email avilella-at-gmail-dot-com 97 98=head1 APPENDIX 99 100The rest of the documentation details each of the object 101methods. Internal methods are usually preceded with a _ 102 103=cut 104 105package Bio::Tools::Run::Simprot; 106 107use vars qw(@ISA %VALIDVALUES $PROGRAMNAME $PROGRAM); 108 109use strict; 110use Bio::SimpleAlign; 111use Bio::AlignIO; 112use Bio::SeqIO; 113use Bio::TreeIO; 114use Bio::Root::Root; 115use Bio::Root::IO; 116use Bio::Tools::Run::WrapperBase; 117@ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); 118 119# valid values for parameters, the default one is always 120# the first one in the array 121BEGIN { 122 %VALIDVALUES = ( 123 'branch' => '1', 124 'eFactor' => '3', 125 'indelFrequncy' => '0.03', 126 'maxIndel' => '2048', 127 'subModel' => [ 2,0,1], # 0:PAM, 1:JTT, 2:PMB 128 'rootLength' => '50', 129 'alpha' => '1', 130 'Benner' => '0', 131 'interleaved' => '1', 132 'variablegamma' => '0', 133 'bennerk' => '-2', 134 ); 135} 136 137=head2 program_name 138 139 Title : program_name 140 Usage : $factory->program_name() 141 Function: holds the program name 142 Returns: string 143 Args : None 144 145=cut 146 147sub program_name { 148 return 'simprot'; 149} 150 151=head2 program_dir 152 153 Title : program_dir 154 Usage : $factory->program_dir(@params) 155 Function: returns the program directory, obtained from ENV variable. 156 Returns: string 157 Args : 158 159=cut 160 161sub program_dir { 162 return Bio::Root::IO->catfile($ENV{SIMPROTDIR}) if $ENV{SIMPROTDIR}; 163} 164 165 166=head2 new 167 168 Title : new 169 Usage : my $simprot = Bio::Tools::Run::Simprot->new(); 170 Function: Builds a new Bio::Tools::Run::Simprot 171 Returns : Bio::Tools::Run::Simprot 172 Args : -alignment => the Bio::Align::AlignI object 173 -tree => the Bio::Tree::TreeI object 174 -save_tempfiles => boolean to save the generated tempfiles and 175 NOT cleanup after onesself (default FALSE) 176 -executable => where the simprot executable resides 177 -params => A reference to a hash where keys are parameter names 178 and hash values are the associated parameter values 179 180See also: L<Bio::Tree::TreeI>, L<Bio::Align::AlignI> 181 182=cut 183 184sub new { 185 my($class,@args) = @_; 186 187 my $self = $class->SUPER::new(@args); 188 my ($aln, $tree, $st, $params, $exe, 189 $ubl) = $self->_rearrange([qw(TREE SAVE_TEMPFILES PARAMS EXECUTABLE)], 190 @args); 191 defined $tree && $self->tree($tree); 192 defined $st && $self->save_tempfiles($st); 193 defined $exe && $self->executable($exe); 194 195 $self->set_default_parameters(); 196 if( defined $params ) { 197 if( ref($params) !~ /HASH/i ) { 198 $self->warn("Must provide a valid hash ref for parameter -FLAGS"); 199 } else { 200 map { $self->set_parameter($_, $$params{$_}) } keys %$params; 201 } 202 } 203 return $self; 204} 205 206=head2 set_parameters 207 208 Title : set_parameters 209 Usage : $codeml->set_parameters($parameter, $value); 210 Function: (Re)set the SimProt parameters 211 Returns : none 212 Args : First argument is the parameter name 213 Second argument is the parameter value 214 215=cut 216 217sub set_parameter{ 218 my ($self,$param,$value) = @_; 219 unless (defined $self->{'no_param_checks'} && $self->{'no_param_checks'} == 1) { 220 if ( ! defined $VALIDVALUES{$param} ) { 221 $self->warn("unknown parameter $param will not be set unless you force by setting no_param_checks to true"); 222 return 0; 223 } 224 if ( ref( $VALIDVALUES{$param}) =~ /ARRAY/i && 225 scalar @{$VALIDVALUES{$param}} > 0 ) { 226 227 unless ( grep { $value eq $_ } @{ $VALIDVALUES{$param} } ) { 228 $self->warn("parameter $param specified value $value is not recognized, please see the documentation and the code for this module or set the no_param_checks to a true value"); 229 return 0; 230 } 231 } 232 } 233 $self->{'_simprotparams'}->{$param} = $value; 234 return 1; 235} 236 237 238 239=head2 set_default_parameters 240 241 Title : set_default_parameters 242 Usage : $codeml->set_default_parameters(0); 243 Function: (Re)set the default parameters from the defaults 244 (the first value in each array in the 245 %VALIDVALUES class variable) 246 Returns : none 247 Args : boolean: keep existing parameter values 248 249 250=cut 251 252sub set_default_parameters{ 253 my ($self,$keepold) = @_; 254 $keepold = 0 unless defined $keepold; 255 256 while( my ($param,$val) = each %VALIDVALUES ) { 257 # skip if we want to keep old values and it is already set 258 next if( defined $self->{'_simprotparams'}->{$param} && $keepold); 259 if(ref($val)=~/ARRAY/i ) { 260 $self->{'_simprotparams'}->{$param} = $val->[0]; 261 } else { 262 $self->{'_simprotparams'}->{$param} = $val; 263 } 264 } 265} 266 267 268=head2 get_parameters 269 270 Title : get_parameters 271 Usage : my %params = $self->get_parameters(); 272 Function: returns the list of parameters as a hash 273 Returns : associative array keyed on parameter names 274 Args : none 275 276 277=cut 278 279sub get_parameters{ 280 my ($self) = @_; 281 # we're returning a copy of this 282 return %{ $self->{'_simprotparams'} }; 283} 284 285 286 287=head2 prepare 288 289 Title : prepare 290 Usage : my $rundir = $simprot->prepare(); 291 Function: prepare the simprot analysis using the default or updated parameters 292 the alignment parameter and species tree must have been set 293 Returns : value of rundir 294 Args : L<Bio::Align::AlignI> object, 295 L<Bio::Tree::TreeI> object [optional] 296 297=cut 298 299sub prepare { 300 my ($self,$tree) = @_; 301 302 unless ( $self->save_tempfiles ) { 303 # brush so we don't get plaque buildup ;) 304 $self->cleanup(); 305 } 306 $tree = $self->tree unless $tree; 307 if( ! $tree ) { 308 $self->warn("must have supplied a valid species tree file in order to run simprot"); 309 return 0; 310 } 311 my ($tempdir) = $self->tempdir(); 312 313 my ($temptreeFH); 314 if( ! ref($tree) && -e $tree ) { 315 $self->{_temptreefile} = $tree; 316 } else { 317 ($temptreeFH,$self->{_temptreefile}) = $self->io->tempfile 318 ('-dir' => $tempdir, 319 UNLINK => ($self->save_tempfiles ? 0 : 1)); 320 321 my $treeout = Bio::TreeIO->new('-format' => 'newick', 322 '-fh' => $temptreeFH); 323 $treeout->write_tree($tree); 324 $treeout->close(); 325 close($temptreeFH); 326 } 327 $self->{_prepared} = 1; 328 329 my %params = $self->get_parameters; 330 while( my ($param,$val) = each %params ) { 331 $self->{_simprot_params} .=" \-\-$param\=$val"; 332 } 333 334 return $tempdir; 335} 336 337 338=head2 run 339 340 Title : run 341 Usage : my $nhx_tree = $simprot->run(); 342 Function: run the simprot analysis using the default or updated parameters 343 the alignment parameter must have been set 344 Returns : L<Bio::Tree::TreeI> object [optional] 345 Args : L<Bio::Align::AlignI> object 346 L<Bio::Tree::TreeI> object 347 348 349=cut 350 351sub run { 352 my ($self,$tree) = @_; 353 354 $self->prepare($tree) unless (defined($self->{_prepared})); 355 my ($rc,$aln,$seq) = (1); 356 my ($tmpdir) = $self->tempdir(); 357 my $outfile; 358 { 359 my $commandstring; 360 my $exit_status; 361 my $simprot_executable = $self->executable; 362 $commandstring .= $simprot_executable; 363 $commandstring .= $self->{_simprot_params}; 364 $commandstring .= " --tree=". $self->{_temptreefile} . " "; 365 my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir()); 366 close($tfh); 367 undef $tfh; 368 $self->outfile_name($outfile); 369 my $seqfile; 370 ($tfh, $seqfile) = $self->io->tempfile(-dir=>$self->tempdir()); 371 close($tfh); 372 undef $tfh; 373 374 $commandstring .= "--alignment=". $self->outfile_name . " "; 375 $commandstring .= "--sequence=". $seqfile . " "; 376 377 $self->throw("unable to find or run executable for 'simprot'") 378 unless $simprot_executable && -e $simprot_executable && -x _; 379 380 open(RUN, "$commandstring |") 381 or $self->throw("Cannot run $commandstring"); 382 383 my @output = <RUN>; 384 $exit_status = close(RUN); 385 $self->error_string(join('',@output)); 386 if( (grep { /^\[ /io } @output) || !$exit_status) { 387 $self->warn("There was an error - see error_string for the program output"); 388 $rc = 0; 389 } 390 eval { 391 $aln = Bio::AlignIO->new(-file => "$outfile",-format => 'fasta'); 392 $seq = Bio::SeqIO->new(-file => "$seqfile", -format => 'fasta'); 393 }; 394 if( $@ ) { 395 $self->warn($self->error_string); 396 } 397 } 398 unless ( $self->save_tempfiles ) { 399 $self->cleanup(); 400 } 401 return ($rc,$aln,$seq); 402} 403 404 405=head2 error_string 406 407 Title : error_string 408 Usage : $obj->error_string($newval) 409 Function: Where the output from the last analysus run is stored. 410 Returns : value of error_string 411 Args : newvalue (optional) 412 413 414=cut 415 416sub error_string { 417 my ($self,$value) = @_; 418 if( defined $value) { 419 $self->{'error_string'} = $value; 420 } 421 return $self->{'error_string'}; 422 423} 424 425 426=head2 version 427 428 Title : version 429 Usage : exit if $prog->version() < 1.8 430 Function: Determine the version number of the program 431 Example : 432 Returns : float or undef 433 Args : none 434 435=cut 436 437sub version { 438 my ($self) = @_; 439 my $exe; 440 return undef unless $exe = $self->executable; 441 my $string = `$exe 2>&1` ; 442 443 $string =~ /Version\:\s+(\d+.\d+.\d+)/m; 444 return $1 || undef; 445} 446 447 448=head2 alignment 449 450 Title : alignment 451 Usage : $simprot->align($aln); 452 Function: Get/Set the L<Bio::Align::AlignI> object 453 Returns : L<Bio::Align::AlignI> object 454 Args : [optional] L<Bio::Align::AlignI> 455 Comment : We could potentially add support for running directly on a file 456 but we shall keep it simple 457 See also: L<Bio::SimpleAlign> 458 459=cut 460 461sub alignment { 462 my ($self,$aln) = @_; 463 464 if( defined $aln ) { 465 if( -e $aln ) { 466 $self->{'_alignment'} = $aln; 467 } elsif( !ref($aln) || ! $aln->isa('Bio::Align::AlignI') ) { 468 $self->warn("Must specify a valid Bio::Align::AlignI object to the alignment function not $aln"); 469 return undef; 470 } else { 471 $self->{'_alignment'} = $aln; 472 } 473 } 474 return $self->{'_alignment'}; 475} 476 477=head2 tree 478 479 Title : tree 480 Usage : $simprot->tree($tree, %params); 481 Function: Get/Set the L<Bio::Tree::TreeI> object 482 Returns : L<Bio::Tree::TreeI> 483 Args : [optional] $tree => L<Bio::Tree::TreeI>, 484 [optional] %parameters => hash of tree-specific parameters 485 486 Comment : We could potentially add support for running directly on a file 487 but we shall keep it simple 488 See also: L<Bio::Tree::Tree> 489 490=cut 491 492sub tree { 493 my ($self, $tree, %params) = @_; 494 if( defined $tree ) { 495 if( ! ref($tree) || ! $tree->isa('Bio::Tree::TreeI') ) { 496 $self->warn("Must specify a valid Bio::Tree::TreeI object to the alignment function"); 497 } 498 $self->{'_tree'} = $tree; 499 } 500 return $self->{'_tree'}; 501} 502 503 504 505=head1 Bio::Tools::Run::BaseWrapper methods 506 507=cut 508 509=head2 save_tempfiles 510 511 Title : save_tempfiles 512 Usage : $obj->save_tempfiles($newval) 513 Function: 514 Returns : value of save_tempfiles 515 Args : newvalue (optional) 516 517 518=cut 519 520=head2 outfile_name 521 522 Title : outfile_name 523 Usage : my $outfile = $simprot->outfile_name(); 524 Function: Get/Set the name of the output file for this run 525 (if you wanted to do something special) 526 Returns : string 527 Args : [optional] string to set value to 528 529 530=cut 531 532 533=head2 tempdir 534 535 Title : tempdir 536 Usage : my $tmpdir = $self->tempdir(); 537 Function: Retrieve a temporary directory name (which is created) 538 Returns : string which is the name of the temporary directory 539 Args : none 540 541 542=cut 543 544=head2 cleanup 545 546 Title : cleanup 547 Usage : $simprot->cleanup(); 548 Function: Will cleanup the tempdir directory 549 Returns : none 550 Args : none 551 552 553=cut 554 555=head2 io 556 557 Title : io 558 Usage : $obj->io($newval) 559 Function: Gets a L<Bio::Root::IO> object 560 Returns : L<Bio::Root::IO> 561 Args : none 562 563 564=cut 565 566sub DESTROY { 567 my $self= shift; 568 unless ( $self->save_tempfiles ) { 569 $self->cleanup(); 570 } 571 $self->SUPER::DESTROY(); 572} 573 5741; # Needed to keep compiler happy 575