1#
2# BioPerl module for Bio::Tools::Run::Simprot
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Albert Vilella <avilella-at-gmail-dot-com>
7#
8# Copyright Albert Vilella
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Tools::Run::Simprot - Wrapper around the Simprot program. Wrapper for the calculation of a multiple sequence alignment from a phylogenetic tree
17
18=head1 SYNOPSIS
19
20  use Bio::Tools::Run::Simprot;
21  use Bio::TreeIO;
22
23  my $treeio = Bio::TreeIO->new(
24      -format => 'nh', -file => 't/data/tree.nh');
25
26  my $tree = $treeio->next_tree;
27
28  my $simprot = Bio::Tools::Run::Simprot->new();
29  $simprot->tree($tree);
30  my ($rc,$aln,$seq) = $simprot->run();
31
32=head1 DESCRIPTION
33
34This is a wrapper around the Simprot program by Andy Pang, Andrew D
35Smith, Paulo AS Nuin and Elisabeth RM Tillier.
36
37Simprot allows for several models of amino acid substitution (PAM, JTT
38and PMB), allows for gamma distributed sites rates according to Yang's
39model, and implements a parameterised Qian and Goldstein distribution
40model for insertion and deletion.
41
42See http://www.uhnres.utoronto.ca/labs/tillier/software.htm for more
43information.
44
45
46=head2 Helping the module find your executable
47
48You will need to enable SIMPROTDIR to find the simprot program. This can be
49done in (at least) three ways:
50
51  1. Make sure the simprot executable is in your path (i.e.
52     'which simprot' returns a valid program
53  2. define an environmental variable SIMPROTDIR which points to a
54     directory containing the 'simprot' app:
55   In bash
56	export SIMPROTDIR=/home/progs/simprot   or
57   In csh/tcsh
58        setenv SIMPROTDIR /home/progs/simprot
59
60  3. include a definition of an environmental variable SIMPROTDIR
61      in every script that will
62     BEGIN {$ENV{SIMPROTDIR} = '/home/progs/simprot'; }
63     use Bio::Tools::Run::Simprot;
64
65=head1 FEEDBACK
66
67=head2 Mailing Lists
68
69User feedback is an integral part of the evolution of this and other
70Bioperl modules. Send your comments and suggestions preferably to one
71of the Bioperl mailing lists.  Your participation is much appreciated.
72
73  bioperl-l@bioperl.org                  - General discussion
74  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
75
76=head2 Support
77
78Please direct usage questions or support issues to the mailing list:
79
80I<bioperl-l@bioperl.org>
81
82rather than to the module maintainer directly. Many experienced and
83reponsive experts will be able look at the problem and quickly
84address it. Please include a thorough description of the problem
85with code and data examples if at all possible.
86
87=head2 Reporting Bugs
88
89Report bugs to the Bioperl bug tracking system to help us keep track
90the bugs and their resolution.  Bug reports can be submitted via the web:
91
92 http://redmine.open-bio.org/projects/bioperl/
93
94=head1 AUTHOR -  Albert Vilella
95
96Email avilella-at-gmail-dot-com
97
98=head1 APPENDIX
99
100The rest of the documentation details each of the object
101methods. Internal methods are usually preceded with a _
102
103=cut
104
105package Bio::Tools::Run::Simprot;
106
107use vars qw(@ISA %VALIDVALUES $PROGRAMNAME $PROGRAM);
108
109use strict;
110use Bio::SimpleAlign;
111use Bio::AlignIO;
112use Bio::SeqIO;
113use Bio::TreeIO;
114use Bio::Root::Root;
115use Bio::Root::IO;
116use Bio::Tools::Run::WrapperBase;
117@ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
118
119# valid values for parameters, the default one is always
120# the first one in the array
121BEGIN {
122    %VALIDVALUES = (
123                    'branch'   => '1',
124                    'eFactor'  => '3',
125                    'indelFrequncy'   => '0.03',
126                    'maxIndel'   => '2048',
127                    'subModel' => [ 2,0,1], # 0:PAM, 1:JTT, 2:PMB
128                    'rootLength'   => '50',
129                    'alpha'   => '1',
130                    'Benner'   => '0',
131                    'interleaved'   => '1',
132                    'variablegamma'   => '0',
133                    'bennerk'   => '-2',
134                   );
135}
136
137=head2 program_name
138
139 Title   : program_name
140 Usage   : $factory->program_name()
141 Function: holds the program name
142 Returns:  string
143 Args    : None
144
145=cut
146
147sub program_name {
148        return 'simprot';
149}
150
151=head2 program_dir
152
153 Title   : program_dir
154 Usage   : $factory->program_dir(@params)
155 Function: returns the program directory, obtained from ENV variable.
156 Returns:  string
157 Args    :
158
159=cut
160
161sub program_dir {
162        return Bio::Root::IO->catfile($ENV{SIMPROTDIR}) if $ENV{SIMPROTDIR};
163}
164
165
166=head2 new
167
168 Title   : new
169 Usage   : my $simprot = Bio::Tools::Run::Simprot->new();
170 Function: Builds a new Bio::Tools::Run::Simprot
171 Returns : Bio::Tools::Run::Simprot
172 Args    : -alignment => the Bio::Align::AlignI object
173           -tree => the Bio::Tree::TreeI object
174           -save_tempfiles => boolean to save the generated tempfiles and
175                              NOT cleanup after onesself (default FALSE)
176           -executable => where the simprot executable resides
177					 -params => A reference to a hash where keys are parameter names
178					            and hash values are the associated parameter values
179
180See also: L<Bio::Tree::TreeI>, L<Bio::Align::AlignI>
181
182=cut
183
184sub new {
185  my($class,@args) = @_;
186
187  my $self = $class->SUPER::new(@args);
188  my ($aln, $tree, $st, $params, $exe,
189      $ubl) = $self->_rearrange([qw(TREE SAVE_TEMPFILES PARAMS EXECUTABLE)],
190				    @args);
191  defined $tree && $self->tree($tree);
192  defined $st  && $self->save_tempfiles($st);
193  defined $exe && $self->executable($exe);
194
195  $self->set_default_parameters();
196  if( defined $params ) {
197      if( ref($params) !~ /HASH/i ) {
198	  $self->warn("Must provide a valid hash ref for parameter -FLAGS");
199      } else {
200	  map { $self->set_parameter($_, $$params{$_}) } keys %$params;
201      }
202  }
203  return $self;
204}
205
206=head2 set_parameters
207
208 Title   : set_parameters
209 Usage   : $codeml->set_parameters($parameter, $value);
210 Function: (Re)set the SimProt parameters
211 Returns : none
212 Args    : First argument is the parameter name
213           Second argument is the parameter value
214
215=cut
216
217sub set_parameter{
218   my ($self,$param,$value) = @_;
219   unless (defined $self->{'no_param_checks'} && $self->{'no_param_checks'} == 1) {
220       if ( ! defined $VALIDVALUES{$param} ) {
221           $self->warn("unknown parameter $param will not be set unless you force by setting no_param_checks to true");
222           return 0;
223       }
224       if ( ref( $VALIDVALUES{$param}) =~ /ARRAY/i &&
225            scalar @{$VALIDVALUES{$param}} > 0 ) {
226
227           unless ( grep { $value eq $_ } @{ $VALIDVALUES{$param} } ) {
228               $self->warn("parameter $param specified value $value is not recognized, please see the documentation and the code for this module or set the no_param_checks to a true value");
229               return 0;
230           }
231       }
232   }
233   $self->{'_simprotparams'}->{$param} = $value;
234   return 1;
235}
236
237
238
239=head2 set_default_parameters
240
241 Title   : set_default_parameters
242 Usage   : $codeml->set_default_parameters(0);
243 Function: (Re)set the default parameters from the defaults
244           (the first value in each array in the
245	    %VALIDVALUES class variable)
246 Returns : none
247 Args    : boolean: keep existing parameter values
248
249
250=cut
251
252sub set_default_parameters{
253   my ($self,$keepold) = @_;
254   $keepold = 0 unless defined $keepold;
255
256   while( my ($param,$val) = each %VALIDVALUES ) {
257       # skip if we want to keep old values and it is already set
258       next if( defined $self->{'_simprotparams'}->{$param} && $keepold);
259       if(ref($val)=~/ARRAY/i ) {
260	   $self->{'_simprotparams'}->{$param} = $val->[0];
261       }  else {
262	   $self->{'_simprotparams'}->{$param} = $val;
263       }
264   }
265}
266
267
268=head2 get_parameters
269
270 Title   : get_parameters
271 Usage   : my %params = $self->get_parameters();
272 Function: returns the list of parameters as a hash
273 Returns : associative array keyed on parameter names
274 Args    : none
275
276
277=cut
278
279sub get_parameters{
280   my ($self) = @_;
281   # we're returning a copy of this
282   return %{ $self->{'_simprotparams'} };
283}
284
285
286
287=head2 prepare
288
289 Title   : prepare
290 Usage   : my $rundir = $simprot->prepare();
291 Function: prepare the simprot analysis using the default or updated parameters
292           the alignment parameter and species tree must have been set
293 Returns : value of rundir
294 Args    : L<Bio::Align::AlignI> object,
295	   L<Bio::Tree::TreeI> object [optional]
296
297=cut
298
299sub prepare {
300   my ($self,$tree) = @_;
301
302   unless ( $self->save_tempfiles ) {
303       # brush so we don't get plaque buildup ;)
304       $self->cleanup();
305   }
306   $tree = $self->tree unless $tree;
307   if( ! $tree ) {
308       $self->warn("must have supplied a valid species tree file in order to run simprot");
309       return 0;
310   }
311   my ($tempdir) = $self->tempdir();
312
313   my ($temptreeFH);
314   if( ! ref($tree) && -e $tree ) {
315       $self->{_temptreefile} = $tree;
316   } else {
317       ($temptreeFH,$self->{_temptreefile}) = $self->io->tempfile
318	   ('-dir' => $tempdir,
319	    UNLINK => ($self->save_tempfiles ? 0 : 1));
320
321       my $treeout = Bio::TreeIO->new('-format' => 'newick',
322				     '-fh'     => $temptreeFH);
323       $treeout->write_tree($tree);
324       $treeout->close();
325       close($temptreeFH);
326   }
327   $self->{_prepared} = 1;
328
329   my %params = $self->get_parameters;
330   while( my ($param,$val) = each %params ) {
331       $self->{_simprot_params} .=" \-\-$param\=$val";
332   }
333
334   return $tempdir;
335}
336
337
338=head2 run
339
340 Title   : run
341 Usage   : my $nhx_tree = $simprot->run();
342 Function: run the simprot analysis using the default or updated parameters
343           the alignment parameter must have been set
344 Returns : L<Bio::Tree::TreeI> object [optional]
345 Args    : L<Bio::Align::AlignI> object
346	   L<Bio::Tree::TreeI> object
347
348
349=cut
350
351sub run {
352   my ($self,$tree) = @_;
353
354   $self->prepare($tree) unless (defined($self->{_prepared}));
355   my ($rc,$aln,$seq) = (1);
356   my ($tmpdir) = $self->tempdir();
357   my $outfile;
358   {
359       my $commandstring;
360       my $exit_status;
361       my $simprot_executable = $self->executable;
362       $commandstring .= $simprot_executable;
363       $commandstring .= $self->{_simprot_params};
364       $commandstring .= " --tree=". $self->{_temptreefile} . " ";
365       my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir());
366       close($tfh);
367       undef $tfh;
368       $self->outfile_name($outfile);
369       my $seqfile;
370       ($tfh, $seqfile) = $self->io->tempfile(-dir=>$self->tempdir());
371       close($tfh);
372       undef $tfh;
373
374       $commandstring .= "--alignment=". $self->outfile_name . " ";
375       $commandstring .= "--sequence=". $seqfile . " ";
376
377       $self->throw("unable to find or run executable for 'simprot'")
378           unless $simprot_executable && -e $simprot_executable && -x _;
379
380       open(RUN, "$commandstring |")
381           or $self->throw("Cannot run $commandstring");
382
383       my @output = <RUN>;
384       $exit_status = close(RUN);
385       $self->error_string(join('',@output));
386       if( (grep { /^\[ /io } @output)  || !$exit_status) {
387	   $self->warn("There was an error - see error_string for the program output");
388	   $rc = 0;
389       }
390       eval {
391	   $aln = Bio::AlignIO->new(-file => "$outfile",-format => 'fasta');
392	   $seq = Bio::SeqIO->new(-file => "$seqfile", -format => 'fasta');
393       };
394       if( $@ ) {
395	   $self->warn($self->error_string);
396       }
397   }
398   unless ( $self->save_tempfiles ) {
399       $self->cleanup();
400   }
401   return ($rc,$aln,$seq);
402}
403
404
405=head2 error_string
406
407 Title   : error_string
408 Usage   : $obj->error_string($newval)
409 Function: Where the output from the last analysus run is stored.
410 Returns : value of error_string
411 Args    : newvalue (optional)
412
413
414=cut
415
416sub error_string {
417   my ($self,$value) = @_;
418   if( defined $value) {
419      $self->{'error_string'} = $value;
420    }
421    return $self->{'error_string'};
422
423}
424
425
426=head2  version
427
428 Title   : version
429 Usage   : exit if $prog->version() < 1.8
430 Function: Determine the version number of the program
431 Example :
432 Returns : float or undef
433 Args    : none
434
435=cut
436
437sub version {
438    my ($self) = @_;
439    my $exe;
440    return undef unless $exe = $self->executable;
441    my $string = `$exe 2>&1` ;
442
443    $string =~ /Version\:\s+(\d+.\d+.\d+)/m;
444    return $1 || undef;
445}
446
447
448=head2 alignment
449
450 Title   : alignment
451 Usage   : $simprot->align($aln);
452 Function: Get/Set the L<Bio::Align::AlignI> object
453 Returns : L<Bio::Align::AlignI> object
454 Args    : [optional] L<Bio::Align::AlignI>
455 Comment : We could potentially add support for running directly on a file
456           but we shall keep it simple
457 See also: L<Bio::SimpleAlign>
458
459=cut
460
461sub alignment {
462   my ($self,$aln) = @_;
463
464   if( defined $aln ) {
465       if( -e $aln ) {
466	   $self->{'_alignment'} = $aln;
467       } elsif( !ref($aln) || ! $aln->isa('Bio::Align::AlignI') ) {
468	   $self->warn("Must specify a valid Bio::Align::AlignI object to the alignment function not $aln");
469	   return undef;
470       } else {
471	   $self->{'_alignment'} = $aln;
472       }
473   }
474   return  $self->{'_alignment'};
475}
476
477=head2 tree
478
479 Title   : tree
480 Usage   : $simprot->tree($tree, %params);
481 Function: Get/Set the L<Bio::Tree::TreeI> object
482 Returns : L<Bio::Tree::TreeI>
483 Args    : [optional] $tree => L<Bio::Tree::TreeI>,
484           [optional] %parameters => hash of tree-specific parameters
485
486 Comment : We could potentially add support for running directly on a file
487           but we shall keep it simple
488 See also: L<Bio::Tree::Tree>
489
490=cut
491
492sub tree {
493   my ($self, $tree, %params) = @_;
494   if( defined $tree ) {
495       if( ! ref($tree) || ! $tree->isa('Bio::Tree::TreeI') ) {
496	   $self->warn("Must specify a valid Bio::Tree::TreeI object to the alignment function");
497       }
498       $self->{'_tree'} = $tree;
499   }
500   return $self->{'_tree'};
501}
502
503
504
505=head1 Bio::Tools::Run::BaseWrapper methods
506
507=cut
508
509=head2 save_tempfiles
510
511 Title   : save_tempfiles
512 Usage   : $obj->save_tempfiles($newval)
513 Function:
514 Returns : value of save_tempfiles
515 Args    : newvalue (optional)
516
517
518=cut
519
520=head2 outfile_name
521
522 Title   : outfile_name
523 Usage   : my $outfile = $simprot->outfile_name();
524 Function: Get/Set the name of the output file for this run
525           (if you wanted to do something special)
526 Returns : string
527 Args    : [optional] string to set value to
528
529
530=cut
531
532
533=head2 tempdir
534
535 Title   : tempdir
536 Usage   : my $tmpdir = $self->tempdir();
537 Function: Retrieve a temporary directory name (which is created)
538 Returns : string which is the name of the temporary directory
539 Args    : none
540
541
542=cut
543
544=head2 cleanup
545
546 Title   : cleanup
547 Usage   : $simprot->cleanup();
548 Function: Will cleanup the tempdir directory
549 Returns : none
550 Args    : none
551
552
553=cut
554
555=head2 io
556
557 Title   : io
558 Usage   : $obj->io($newval)
559 Function:  Gets a L<Bio::Root::IO> object
560 Returns : L<Bio::Root::IO>
561 Args    : none
562
563
564=cut
565
566sub DESTROY {
567    my $self= shift;
568    unless ( $self->save_tempfiles ) {
569	$self->cleanup();
570    }
571    $self->SUPER::DESTROY();
572}
573
5741; # Needed to keep compiler happy
575