1# 2# BioPerl module for Bio::Tools::Run::Phylo::LVB 3# 4# Created by Daniel Barker, based on ProtPars.pm by Shawn Hoon 5# 6# You may distribute this module under the same terms as perl itself 7# 8# POD documentation - main docs before the code 9 10=head1 NAME 11 12Bio::Tools::Run::Phylo::LVB - Object for using the LVB program to create 13an array of L<Bio::Tree> objects from a nucleotide multiple alignment 14file or a nucleotide SimpleAlign object. Works with LVB version 2.1. 15 16=head1 SYNOPSIS 17 18 use Bio::Tools::Run::Phylo::LVB; 19 20 # Create a SimpleAlign object. 21 # NOTE. Aligning nucleotide sequence directly, as below, makes 22 # sense for non-coding nucleotide sequence (e.g., structural RNA 23 # genes, introns, ITS). For protein-coding genes, to prevent 24 # Clustal intronducing frameshifts one should instead align the 25 # translations of the genes, then convert the multiple alignment 26 # to nucleotide by referring to the corresponding transcript 27 # sequences (e.g., using EMBOSS tranalign). 28 use Bio::Tools::Run::Alignment::Clustalw; 29 $aln_factory = Bio::Tools::Run::Alignment::Clustalw->new(quiet => 1); 30 $inputfilename = "/Users/daniel/nuc.fa"; 31 $aln = $aln_factory->align($inputfilename); 32 33 # Create the tree or trees. 34 $tree_factory = Bio::Tools::Run::Phylo::LVB->new(quiet => 1); 35 @trees = $tree_factory->run($aln); 36 37 # Or one can pass in a file name containing a nucleotide multiple 38 # alignment in Phylip 3.6 format: 39 $tree_factory = Bio::Tools::Run::Phylo::LVB->new(quiet => 1); 40 $tree = $tree_factory->run("/Users/daniel/nuc.phy"); 41 42=head1 DESCRIPTION 43 44Wrapper for LVB, which uses a simulated annealing heuristic search 45to seek parsimonious trees from a nucleotide multiple alignment. 46 47=head1 FEEDBACK 48 49=head2 Mailing Lists 50 51User feedback is an integral part of the evolution of this and other 52Bioperl modules. Send your comments and suggestions preferably to one 53of the Bioperl mailing lists. Your participation is much appreciated. 54 55 bioperl-l@bioperl.org - General discussion 56 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 57 58=head2 Support 59 60Please direct usage questions or support issues to the mailing list: 61 62I<bioperl-l@bioperl.org> 63 64rather than to the module maintainer directly. Many experienced and 65reponsive experts will be able look at the problem and quickly 66address it. Please include a thorough description of the problem 67with code and data examples if at all possible. 68 69=head2 Reporting Bugs 70 71Report bugs to the Bioperl bug tracking system to help us keep track 72the bugs and their resolution. Bug reports can be submitted via the 73web: 74 75 http://redmine.open-bio.org/projects/bioperl/ 76 77=head1 PARAMETERS FOR LVB COMPUTATION 78 79=head2 FORMAT 80 81 Title : FORMAT 82 Description : (optional) 83 When running LVB from a Phylip 3.6-format 84 multiple alignment file, this specifies 85 the layout of the file. It may be 86 "interleaved" or "sequential". FORMAT is 87 automatically set to "interleaved" if 88 running from a SimpleAlign object. 89 Defaults to "interleaved". 90 91=head2 GAPS 92 93 Title : GAPS 94 Description : (optional) 95 LVB can treat gaps represented in the 96 multiple alignment by "-" as either 97 "fifthstate" or "unknown". "fifthstate" 98 regards "-" as equivalent to "O", which 99 is an unambiguous character state 100 distinct from all nucleotides. "unknown" 101 regards "-" as equivalent to "?", which 102 is as an ambiguous site that may contain 103 "A" or "C" or "G" or "T" or "O". 104 Defaults to "unknown". 105 106=head2 SEED 107 108 Title : SEED 109 Description : (optional) 110 This specifies the random number seed 111 for LVB. SEED must be an integer in the 112 range 0 to 900000000 inclusive. If no 113 seed is specified, LVB takes a seed from 114 the system clock. By default, no seed is 115 specified. 116 117=head2 DURATION 118 119 Title : DURATION 120 Description : (optional) 121 This specifies the duration of the 122 analysis, which may be "fast" or "slow". 123 "slow" causes LVB to perform a more 124 thorough and more time-consuming search 125 than "fast". Defaults to "slow". 126 127=head2 BOOTSTRAPS 128 129 Title : BOOTSTRAPS 130 Description : (optional) 131 This specifies the number of bootstrap 132 replicates to use, which must be a 133 positive integer. Set bootstraps to 0 for 134 no bootstrapping. Defaults to 0. 135 136=head1 AUTHOR 137 138Daniel Barker 139 140=head1 CONTRIBUTORS 141 142Email jason-AT-bioperl_DOT_org 143 144=head1 APPENDIX 145 146The rest of the documentation details each of the object 147methods. Internal methods are usually preceded with a _ 148 149=cut 150 151#' 152 153package Bio::Tools::Run::Phylo::LVB; 154 155use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME 156 @LVB_PARAMS @OTHER_SWITCHES 157 %OK_FIELD); 158use strict; 159use Bio::SimpleAlign; 160use Cwd; 161use Bio::AlignIO; 162use Bio::TreeIO; 163use Bio::Root::Root; 164use Bio::Tools::Run::WrapperBase; 165use Bio::Root::IO; 166use File::Copy; 167 168@ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); 169 170# You will need to enable the LVB program. 171# You can set the path to the program through doing: 172# my @params('executable'=>'/usr/local/bin/lvb'); 173# my $lvb_factory = Bio::Tools::Run::Phylo::LVB->new(@params); 174# 175 176BEGIN { 177 # NOTE. The order of the members of @LVB_PARAMS is vital! 178 @LVB_PARAMS = qw(FORMAT GAPS SEED DURATION BOOTSTRAPS); 179 @OTHER_SWITCHES = qw(QUIET); 180 foreach my $attr(@LVB_PARAMS, @OTHER_SWITCHES) { 181 $OK_FIELD{$attr}++; 182 } 183} 184 185=head2 program_name 186 187 Title : program_name 188 Usage : ->program_name() 189 Function: holds the program name 190 Returns: string 191 Args : None 192 193=cut 194 195sub program_name { 196 return 'lvb'; 197} 198 199=head2 program_dir 200 201 Title : program_dir 202 Usage : ->program_dir() 203 Function: returns undef 204 Args : 205 206=cut 207 208sub program_dir { 209 return undef; 210} 211 212sub new { 213 my ($class,@args) = @_; 214 my $self = $class->SUPER::new(@args); 215 216 # set defaults 217 $self->FORMAT("interleaved"); 218 $self->GAPS("unknown"); 219 $self->SEED(""); 220 $self->DURATION("slow"); 221 $self->BOOTSTRAPS(0); 222 223 # re-set with user's values where specified 224 my ($attr, $value); 225 while (@args) { 226 $attr = shift @args; 227 $value = shift @args; 228 next if( $attr =~ /^-/ ); # don't want named parameters 229 $self->$attr($value); 230 } 231 return $self; 232} 233 234sub AUTOLOAD { 235 my $self = shift; 236 my $attr = $AUTOLOAD; 237 $attr =~ s/.*:://; 238 $attr = uc $attr; 239 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr}; 240 $self->{$attr} = shift if @_; 241 return $self->{$attr}; 242} 243 244=head2 run 245 246 Title : run 247 Usage : 248 $inputfilename = '/Users/daniel/nuc.phy'; 249 @trees = $factory->run($inputfilename); 250 Function: Create one or more LVB trees from a SimpleAlign 251 object or a file containing a Phylip 3.6-format 252 nucleotide multiple alignment. 253 Example : 254 Returns : Array of L<Bio::Tree> objects 255 Args : Name of a file containing a nucleotide multiple 256 alignment in Phylip 3.6 format, or a SimpleAlign 257 object 258 259=cut 260 261sub run{ 262 263 my ($self,$input) = @_; 264 my ($infilename); 265 266 # Create input file pointer 267 $infilename = $self->_setinput($input); 268 if (!$infilename) {$self->throw("Problems setting up for lvb. Probably bad input data in $input !");} 269 270 # Create parameter string to pass to lvb program 271 my $param_string = $self->_setparams(); 272 273 # run lvb 274 my @trees = $self->_run($infilename,$param_string); 275} 276 277=head2 create_tree 278 279 Title : create_tree 280 Usage : 281 $inputfilename = '/Users/daniel/nuc.phy'; 282 @trees = $factory->create_tree($inputfilename); 283 Function: Create one or more LVB trees from a SimpleAlign 284 object or a file containing a Phylip 3.6-format 285 nucleotide multiple alignment. 286 Example : 287 Returns : Array of L<Bio::Tree> objects 288 Args : Name of a file containing a nucleotide multiple 289 alignment in Phylip 3.6 format, or a SimpleAlign 290 object 291 292=cut 293 294sub create_tree{ 295 return shift->run(@_); 296} 297 298################################################# 299 300=head2 _run 301 302 Title : _run 303 Usage : Internal function, not to be called directly 304 Function: makes actual system call to lvb program 305 Example : 306 Returns : Array of Bio::Tree objects 307 Args : Name of a file containing a multiple alignment 308 in Phylip 3.6 format and a parameter string to be 309 passed to LVB 310 311=cut 312 313sub _run { 314 my ($self,$infile,$param_string) = @_; 315 return unless( $self->executable ); 316 317 my $instring; 318 my $curpath = cwd; 319 unless( File::Spec->file_name_is_absolute($infile) ) { 320 $infile = $self->io->catfile($curpath,$infile); 321 } 322 $instring = $param_string; 323 $self->debug( "Program ".$self->executable || ''."\n"); 324 325 # create LVB's working copy of the input file, which must be named "infile" 326 # NOTE, we cut trailing spaces since they can cause trouble with LVB 2.1 327 my $lvb_infile = $self->tempdir . "/infile"; 328 open(LVB_SUB_RUN_TMP_IN_FH, "$infile"); 329 open(LVB_SUB_RUN_TMP_OUT_FH, ">$lvb_infile"); 330 while (<LVB_SUB_RUN_TMP_IN_FH>) { 331 s/ +$//; 332 print LVB_SUB_RUN_TMP_OUT_FH 333 or $self->throw("output error on $lvb_infile"); 334 } 335 chdir($self->tempdir); 336 #open a pipe to run lvb to bypass interactive menus 337 if ($self->quiet() || $self->verbose() < 0) { 338 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; 339 open(LVB_PIPE,"|".$self->executable.">$null"); 340 } 341 else { 342 open(LVB_PIPE,"|".$self->executable); 343 } 344 print LVB_PIPE $instring; 345 close(LVB_PIPE); 346 chdir($curpath); 347 #get the results 348 my $treefile = $self->tempdir . "/outtree"; 349 350 $self->throw("LVB did not create treefile correctly") 351 unless (-e $treefile); 352 353 #create the trees 354 my $in = Bio::TreeIO->new(-file => $treefile, '-format' => 'newick'); 355 my @trees = (); 356 while (my $tree = $in->next_tree()) { 357 push @trees, $tree; 358 } 359 360 unless ( $self->save_tempfiles ) { 361 # Clean up the temporary files created along the way... 362 unlink $lvb_infile; 363 unlink $treefile; 364 } 365 return @trees; 366} 367 368=head2 _setinput 369 370 Title : _setinput 371 Usage : Internal function, not to be called directly 372 Function: Create input file for lvb program 373 Example : 374 Returns : name of file containing a multiple alignment in 375 Phylip 3.6 format 376 Args : SimpleAlign object reference or input file name 377 378=cut 379 380sub _setinput { 381 my ($self, $input, $suffix) = @_; 382 my ($alnfilename,$infilename, $temp, $tfh,$input_tmp,$input_fh); 383 384 # If $input is not a reference it better be the name of a 385 # file with the sequence/ 386 387 # a phy formatted alignment file 388 unless (ref $input) { 389 # check that file exists or throw 390 $alnfilename= $input; 391 unless (-e $input) {return 0;} 392 return $alnfilename; 393 } 394 395 # $input may be a SimpleAlign Object 396 if ($input->isa("Bio::Align::AlignI")) { 397 # Open temporary file for both reading & writing of BioSeq array 398 ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir); 399 my $alnIO = Bio::AlignIO->new(-fh => $tfh, -format=>'phylip',idlength=>$10); 400 $alnIO->write_aln($input); 401 $alnIO->close(); 402 close($tfh); 403 $tfh = undef; 404 unless ($self->format() =~ /^interleaved$/i) { 405 $self->warn("resetting LVB format to interleaved"); 406 $self->format("interleaved"); 407 } 408 return $alnfilename; 409 } 410 return 0; 411} 412 413=head2 _setparams 414 415 Title : _setparams 416 Usage : Internal function, not to be called directly 417 Function: Create parameter inputs for lvb program 418 Example : 419 Returns : parameter string to be passed to LVB 420 Args : name of calling object 421 422=cut 423 424sub _setparams { 425 my ($attr, $value, $self); 426 427 $self = shift; 428 my $param_string = ""; 429 430 for $attr (@LVB_PARAMS) { 431 $value = $self->$attr(); 432 if ($attr =~/SEED/i) { 433 $value = "" unless defined $value; 434 $param_string .= "$value\n"; 435 } elsif ($attr =~ /BOOTSTRAPS/i) { 436 $value = 0 unless defined $value; 437 $param_string .= "$value\n"; 438 } else { # we want I for "interleaved" or S for "sequential", 439 # U for "unknown" or F for "fifthstate", 440 # F for "fast" or S for "slow" 441 $param_string .= uc(substr $value, 0, 1) . "\n"; 442 } 443 } 444 445 return $param_string; 446} 447 4481; # Needed to keep compiler happy 449