1# 2# BioPerl module for Bio::Tools::Run::Phylo::Semphy 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Sendu Bala <bix@sendu.me.uk> 7# 8# Copyright Sendu Bala 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Tools::Run::Phylo::Semphy - Wrapper for Semphy 17 18=head1 SYNOPSIS 19 20 use Bio::Tools::Run::Phylo::Semphy; 21 22 # Make a Semphy factory 23 $factory = Bio::Tools::Run::Phylo::Semphy->new(); 24 25 # Run Semphy with an alignment 26 my $tree = $factory->run($alignfilename); 27 28 # or with alignment object 29 $tree = $factory->run($bio_simplalign); 30 31 # you can supply an initial tree as well, which can be a newick tree file, 32 # Bio::Tree::Tree object... 33 $tree = $factory->run($bio_simplalign, $tree_obj); 34 35 # ... or Bio::DB::Taxonomy object 36 $tree = $factory->run($bio_simplalign, $bio_db_taxonomy); 37 38 # (mixtures of all the above are possible) 39 40 # $tree isa Bio::Tree::Tree 41 42=head1 DESCRIPTION 43 44This is a wrapper for running the Semphy application by N. Friedman et a.. You 45can get details here: http://compbio.cs.huji.ac.il/semphy/. Semphy is used for 46phylogenetic reconstruction (making a tree with branch lengths from an aligned 47set of input sequences). 48 49You can try supplying normal Semphy command-line arguments to new(), eg. 50new(-hky => 1) or calling arg-named methods (excluding the initial hyphen(s), 51eg. $factory->hky(1) to set the --hky switch to true). 52Note that Semphy args are case-sensitive. To distinguish between Bioperl's 53-verbose and the Semphy's --verbose, you must set Semphy's verbosity with 54-semphy_verbose or the semphy_verbose() method. 55 56 57You will need to enable this Semphy wrapper to find the Semphy program. 58This can be done in (at least) three ways: 59 60 1. Make sure the Semphy executable is in your path. 61 2. Define an environmental variable SEMPHYDIR which is a 62 directory which contains the Semphy application: 63 In bash: 64 65 export SEMPHYDIR=/home/username/semphy/ 66 67 In csh/tcsh: 68 69 setenv SEMPHYDIR /home/username/semphy 70 71 3. Include a definition of an environmental variable SEMPHYDIR in 72 every script that will use this Semphy wrapper module, e.g.: 73 74 BEGIN { $ENV{SEMPHYDIR} = '/home/username/semphy/' } 75 use Bio::Tools::Run::Phylo::Semphy; 76 77=head1 FEEDBACK 78 79=head2 Mailing Lists 80 81User feedback is an integral part of the evolution of this and other 82Bioperl modules. Send your comments and suggestions preferably to 83the Bioperl mailing list. Your participation is much appreciated. 84 85 bioperl-l@bioperl.org - General discussion 86 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 87 88=head2 Support 89 90Please direct usage questions or support issues to the mailing list: 91 92I<bioperl-l@bioperl.org> 93 94rather than to the module maintainer directly. Many experienced and 95reponsive experts will be able look at the problem and quickly 96address it. Please include a thorough description of the problem 97with code and data examples if at all possible. 98 99=head2 Reporting Bugs 100 101Report bugs to the Bioperl bug tracking system to help us keep track 102of the bugs and their resolution. Bug reports can be submitted via 103the web: 104 105 http://redmine.open-bio.org/projects/bioperl/ 106 107=head1 AUTHOR - Sendu Bala 108 109Email bix@sendu.me.uk 110 111=head1 APPENDIX 112 113The rest of the documentation details each of the object methods. 114Internal methods are usually preceded with a _ 115 116=cut 117 118package Bio::Tools::Run::Phylo::Semphy; 119use strict; 120 121use File::Spec; 122use Bio::AlignIO; 123use Bio::TreeIO; 124 125use base qw(Bio::Tools::Run::Phylo::PhyloBase); 126 127our $PROGRAM_NAME = 'semphy'; 128our $PROGRAM_DIR = $ENV{'SEMPHYDIR'}; 129 130# methods for the semphy args we support 131our %PARAMS = (outputfile => 'o', 132 treeoutputfile => 'T', 133 constraint => 'c', 134 gaps => 'g', 135 seed => 'r', 136 Logfile => 'l', 137 alphabet => 'a', 138 ratio => 'z', 139 ACGprob => 'p', 140 BPrepeats => 'BPrepeats', 141 BPconsensus => 'BPconsensus', 142 SEMPHY => 'S', 143 modelfile => 'modelfile', 144 alpha => 'A', 145 categories => 'C', 146 semphy_verbose => 'semphy_verbose'); 147our %SWITCHES = (homogeneousRatesDTME => 'homogeneousRatesDTME', 148 NJ => 'J', 149 pairwiseGammaDTME => 'pairwiseGammaDTME', 150 commonAlphaDTME => 'commonAlphaDTME', 151 rate4siteDTME => 'rate4siteDTME', 152 posteriorDTME => 'posteriorDTME', 153 BPonUserTree => 'BPonUserTree', 154 nucjc => 'nucjc', 155 aaJC => 'aaJC', 156 k2p => 'k2p', 157 hky => 'hky', 158 day => 'day', 159 jtt => 'jtt', 160 rev => 'rev', 161 wag => 'wag', 162 cprev => 'cprev', 163 homogeneous => 'H', 164 optimizeAlpha => 'O', 165 bbl => 'n', 166 likelihood => 'L', 167 PerPosLike => 'P', 168 PerPosPosterior => 'B', 169 rate => 'R'); 170 171# just to be explicit, args we don't support (yet) or we handle ourselves 172our @UNSUPPORTED = qw(h help full-help s sequence t tree); 173 174 175=head2 program_name 176 177 Title : program_name 178 Usage : $factory>program_name() 179 Function: holds the program name 180 Returns : string 181 Args : None 182 183=cut 184 185sub program_name { 186 return $PROGRAM_NAME; 187} 188 189=head2 program_dir 190 191 Title : program_dir 192 Usage : $factory->program_dir(@params) 193 Function: returns the program directory, obtained from ENV variable. 194 Returns : string 195 Args : None 196 197=cut 198 199sub program_dir { 200 return $PROGRAM_DIR; 201} 202 203=head2 new 204 205 Title : new 206 Usage : $factory = Bio::Tools::Run::Phylo::Semphy->new() 207 Function: creates a new Semphy factory 208 Returns : Bio::Tools::Run::Phylo::Semphy 209 Args : Most options understood by Semphy can be supplied as key => 210 value pairs, with a true value for switches. 211 212 These options can NOT be used with this wrapper (they are handled 213 internally or don't make sense in this context): 214 -h | --help | --fill-help 215 -s | --sequence 216 -t | --tree 217 218 To distinguish between Bioperl's -verbose and the Semphy's --verbose, 219 you must set Semphy's verbosity with -semphy_verbose 220 221=cut 222 223sub new { 224 my ($class, @args) = @_; 225 my $self = $class->SUPER::new(@args); 226 227 $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS), 228 (map { $_ => $SWITCHES{$_} } keys %SWITCHES), 229 quiet => 'quiet'}, 230 -create => 1, 231 -case_sensitive => 1); 232 233 return $self; 234} 235 236=head2 run 237 238 Title : run 239 Usage : $result = $factory->run($fasta_align_file); 240 -or- 241 $result = $factory->run($align_object); 242 -or- 243 $result = $factory->run($fasta_align_file, $newick_tree_file); 244 -or- 245 $result = $factory->run($align_object, $tree_object); 246 -or- 247 $result = $factory->run($align_object, $db_taxonomy_object); 248 Function: Runs Semphy on an alignment. 249 Returns : Bio::Tree::Tree 250 Args : The first argument represents an alignment, the second (optional) 251 argument a species tree (to set an initial tree: normally the -t 252 option to Semphy). 253 The alignment can be provided as a multi-fasta format alignment 254 filename, or a Bio::Align::AlignI compliant object (eg. a 255 Bio::SimpleAlign). 256 The species tree can be provided as a newick format tree filename 257 or a Bio::Tree::TreeI compliant object. Alternatively a 258 Bio::DB::Taxonomy object can be supplied, in which case the species 259 tree will be generated by using the alignment sequence names as 260 species names and looking for those in the supplied database. 261 262 In all cases where an initial tree was supplied, the alignment 263 sequence names must correspond to node ids in the species tree. 264 265=cut 266 267sub run { 268 my ($self, $aln, $tree) = @_; 269 270 $aln || $self->throw("alignment must be supplied"); 271 $self->_alignment($aln); 272 273 if ($tree) { 274 $self->_tree($tree); 275 276 # check node and seq names match 277 $self->_check_names; 278 } 279 280 return $self->_run; 281} 282 283sub _run { 284 my $self = shift; 285 286 my $exe = $self->executable || return; 287 288 my $aln_file = $self->_write_alignment; 289 290 # generate a semphy-friendly tree file 291 my $tree = $self->_tree; 292 my $tree_file = ''; 293 if ($tree) { 294 $tree = $self->_write_tree; 295 } 296 297 unless ($self->T) { 298 my ($tfh, $tempfile) = $self->io->tempfile(-dir => $self->tempdir); 299 $self->T($tempfile); 300 close($tfh); 301 } 302 303 my $command = $exe.$self->_setparams($aln_file, $tree_file); 304 $self->debug("semphy command = $command\n"); 305 306 open(my $pipe, "$command |") || $self->throw("semphy call ($command) failed to start: $? | $!"); 307 my $error = ''; 308 while (<$pipe>) { 309 print unless $self->quiet; 310 $error .= $_; 311 } 312 close($pipe) || ($error ? $self->warn("semphy call ($command) failed: $error") : $self->throw("semphy call ($command) crashed: $?")); 313 314 my $result_file = $self->T(); 315 my $tio = Bio::TreeIO->new(-format => 'newick', -file => $result_file); 316 my $result_tree = $tio->next_tree; 317 318 return $result_tree; 319} 320 321=head2 _setparams 322 323 Title : _setparams 324 Usage : Internal function, not to be called directly 325 Function: Creates a string of params to be used in the command string 326 Returns : string of params 327 Args : alignment and tree file names 328 329=cut 330 331sub _setparams { 332 my ($self, $aln_file, $tree_file) = @_; 333 334 my $param_string = ' -s '.$aln_file; 335 $param_string .= ' -t '.$tree_file if $tree_file; 336 337 my %methods = map { $_ => $_ } keys %PARAMS; 338 $methods{'semphy_verbose'} = 'verbose'; 339 $param_string .= $self->SUPER::_setparams(-params => \%methods, 340 -switches => [keys %SWITCHES], 341 -double_dash => 1); 342 343 $param_string .= ' 2>&1'; 344 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; 345 $param_string .= " 1>$null" if $self->quiet; 346 347 return $param_string; 348} 349 3501; 351