1# 2# BioPerl module for Bio::Tools::Run::Phylo::Raxml 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Copyright Brian Osborne 7# 8# You may distribute this module under the same terms as perl itself 9# 10# POD documentation - main docs before the code 11 12=head1 NAME 13 14Bio::Tools::Run::Phylo::Raxml 15 16=head1 SYNOPSIS 17 18 # Build a Raxml factory 19 $factory = Bio::Tools::Run::Phylo::Raxml->new(-p => 100); 20 21 # Get an alignment 22 my $alignio = Bio::AlignIO->new( 23 -format => 'fasta', 24 -file => '219877.cdna.fasta'); 25 my $alnobj = $alignio->next_aln; 26 27 # Analyze the aligment and get a Tree 28 my $tree = $factory->run($alnobj); 29 30=head1 DESCRIPTION 31 32Get a Bio::Tree object using raxml given a protein or DNA alignment. 33 34=head1 FEEDBACK 35 36=head2 Mailing Lists 37 38User feedback is an integral part of the evolution of this and other 39Bioperl modules. Send your comments and suggestions preferably to one 40of the Bioperl mailing lists. Your participation is much appreciated. 41 42 bioperl-l@bioperl.org - General discussion 43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 44 45=head2 Support 46 47Please direct usage questions or support issues to the mailing list: 48 49I<bioperl-l@bioperl.org> 50 51Do not contact the module maintainer directly. Many experienced experts 52at bioperl-l will be able look at the problem and quickly 53address it. Please include a thorough description of the problem 54with code and data examples if at all possible. 55 56=head2 Reporting Bugs 57 58Report bugs to the Bioperl bug tracking system to help us keep track 59the bugs and their resolution. Bug reports can be submitted via the web: 60 61 http://redmine.open-bio.org/projects/bioperl/ 62 63=head1 AUTHOR - Brian Osborne 64 65Email briano@bioteam.net 66 67=head1 APPENDIX 68 69The rest of the documentation details each of the object 70methods. Internal methods are usually preceded with a _ 71 72=cut 73 74package Bio::Tools::Run::Phylo::Raxml; 75 76use strict; 77use File::Basename; 78use File::Spec; 79use Bio::Seq; 80use Bio::SeqIO; 81use Bio::TreeIO; 82use Bio::AlignIO; 83use Bio::Root::IO; 84use Cwd; 85 86use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); 87 88our @Raxml_PARAMS = 89 qw(s n m a A b B c e E f g G i I J o p P q r R S t T w W x z N); 90our @Raxml_SWITCHES = 91 qw(SSE3 PTHREADS PTHREADS-SSE3 HYBRID HYBRID-SSE3 F h k K M j U v X y C d D); 92our $PROGRAM_NAME = 'raxml'; 93 94# Specify some model if none is specified 95my $DEFAULTAAMODEL = 'PROTCATDAYHOFF'; 96my $DEFAULTNTMODEL = 'GTRCAT'; 97 98=head2 new 99 100 Title : new 101 Usage : my $treebuilder = Bio::Tools::Run::Phylo::Raxml->new(); 102 Function: Constructor 103 Returns : Bio::Tools::Run::Phylo::Raxml 104 Args : Same as those used to run raxml. For example: 105 106 $factory = Bio::Tools::Run::Phylo::Raxml->new(-p => 100, -SSE3 => 1) 107 108=cut 109 110sub new { 111 my ( $class, @args ) = @_; 112 my $self = $class->SUPER::new(@args); 113 114 $self->_set_from_args( 115 \@args, 116 -case_sensitive => 1, 117 -methods => [ @Raxml_PARAMS, @Raxml_SWITCHES ], 118 -create => 1 119 ); 120 121 my ($out,$quiet) = $self->SUPER::_rearrange( [qw(OUTFILE_NAME QUIET)], @args ); 122 123 $self->outfile_name( $out || '' ); 124 $self->quiet( $quiet ) if $quiet; 125 126 $self; 127} 128 129=head2 program_name 130 131 Title : program_name 132 Usage : $factory->program_name() 133 Function: holds the program name 134 Returns: string 135 Args : None 136 137=cut 138 139sub program_name { 140 $PROGRAM_NAME; 141} 142 143=head2 program_dir 144 145 Title : program_dir 146 Usage : $factory->program_dir(@params) 147 Function: returns the program directory 148 Returns: string 149 Args : 150 151=cut 152 153sub program_dir { 154 undef; 155} 156 157=head2 error_string 158 159 Title : error_string 160 Usage : $obj->error_string($newval) 161 Function: Where the output from the last analysus run is stored. 162 Returns : value of error_string 163 Args : newvalue (optional) 164 165=cut 166 167sub error_string { 168 my ( $self, $value ) = @_; 169 170 $self->{'error_string'} = $value if ( defined $value ); 171 $self->{'error_string'}; 172} 173 174=head2 version 175 176 Title : version 177 Usage : exit if $prog->version() < 1.8 178 Function: Determine the version number of the program 179 Example : 180 Returns : float or undef 181 Args : none 182 183=cut 184 185sub version { 186 my ($self) = @_; 187 my $exe; 188 189 return undef unless $exe = $self->executable; 190 my $string = `$exe -v 2>&1`; 191 192 $string =~ /raxml\s+version\s+([\d\.]+)/i; 193 return $1 || undef; 194} 195 196=head2 quiet 197 198 Title : quiet 199 Usage : 200 Function: get or set value for 'quiet' 201 Example : 202 Returns : 203 Args : the value 204 205=cut 206 207sub quiet { 208 my ( $self, $value ) = @_; 209 210 $self->{'_quiet'} = $value if ( defined $value ); 211 $self->{'_quiet'}; 212} 213 214=head2 run 215 216 Title : run 217 Usage : $factory->run($stockholm_file) OR 218 $factory->run($align_object) 219 Function: Runs Raxml to generate a tree 220 Returns : Bio::Tree::Tree object 221 Args : File name for your input alignment in stockholm format, OR 222 Bio::Align::AlignI compliant object (eg. Bio::SimpleAlign). 223 224=cut 225 226sub run { 227 my ($self, $in) = @_; 228 229 if (ref $in && $in->isa("Bio::Align::AlignI")) { 230 $in = $self->_write_alignfile($in); 231 } 232 elsif (! -e $in) { 233 $self->throw("When not supplying a Bio::Align::AlignI object, you must supply a readable filename"); 234 } 235 236 $self->_run($in); 237} 238 239=head2 _run 240 241 Title : _run 242 Usage : Internal function, not to be called directly 243 Function: Runs the application 244 Returns : Tree object 245 Args : Alignment file name 246 247=cut 248 249sub _run { 250 my ( $self, $file ) = @_; 251 252 my $exe = $self->executable || return; 253 my $param_str = $self->arguments . " " . $self->_setparams($file); 254 my $command = "$exe $param_str"; 255 $self->debug("Raxml command = $command"); 256 257 my $status = system($command); 258 259 # raxml creates tree files with names like "RAxML_bestTree.ABDBxjjdfg3" 260 # if rapid bootstrapping was enabled, also a tree with RAxML_bipartitions.ABDBxjjdfg3 261 # with support values is created, which then should be returned 262 my $outfile = $self->f() eq 'a' ? 'RAxML_bipartitions.' : 'RAxML_bestTree.'; 263 $outfile .= $self->outfile_name; 264 265 $outfile = File::Spec->catfile( ($self->w), $outfile ) if $self->w; 266 267 if ( !-e $outfile || -z $outfile ) { 268 $self->warn("Raxml call had status of $status: $? [command $command] \n"); 269 return undef; 270 } 271 272 my $treeio = Bio::TreeIO->new( -file => $outfile ); 273 my $tree = $treeio->next_tree; 274 275# if bootstraps were enabled, the bootstraps are the ids; convert to 276# bootstrap and no id 277# if ($self->boot) { 278# my @nodes = $tree->get_nodes; 279# my %non_internal = map { $_ => 1 } ($tree->get_leaf_nodes, $tree->get_root_node); 280# foreach my $node (@nodes) { 281# next if exists $non_internal{$node}; 282# $node->bootstrap && next; # protect ourselves incase the parser improves 283# $node->bootstrap($node->id); 284# $node->id(''); 285# } 286# } 287 288 $tree; 289} 290 291=head2 _write_alignfile 292 293 Title : _write_alignfile 294 Usage : Internal function, not to be called directly 295 Function: Create an alignment file 296 Returns : filename 297 Args : Bio::Align::AlignI 298 299=cut 300 301sub _write_alignfile { 302 my ( $self, $align ) = @_; 303 304 my ( $tfh, $tempfile ) = $self->io->tempfile( -dir => '.' ); 305 306 my $out = Bio::AlignIO->new( 307 -file => ">$tempfile", 308 -format => 'phylip' 309 ); 310 $out->write_aln($align); 311 $out->close(); 312 undef($out); 313 close($tfh); 314 undef($tfh); 315 316 die "Alignment file $tempfile was not created" if ( ! -e $tempfile ); 317 318 $tempfile; 319} 320 321=head2 _alphabet 322 323 Title : _alphabet 324 Usage : my $alphabet = $self->_alphabet; 325 Function: Get the alphabet of the input alignment, defaults to 'dna' 326 Returns : 'dna' or 'protein' 327 Args : Alignment file 328 329=cut 330 331sub _alphabet { 332 my ( $self, $file ) = @_; 333 334 if ($file) { 335 if ( -e $file ) { 336 my $in = Bio::AlignIO->new( -file => $file ); 337 my $aln = $in->next_aln; 338 339 # arbitrary, the first one 340 my $seq = $aln->get_seq_by_pos(1); 341 my $alphabet = $seq->alphabet; 342 $self->{_alphabet} = $alphabet; 343 } 344 else { 345 die "File $file can not be found"; 346 } 347 } 348 349 # default is 'dna' 350 return $self->{'_alphabet'} || 'dna'; 351} 352 353=head2 _setparams 354 355 Title : _setparams 356 Usage : Internal function, not to be called directly 357 Function: Create parameter inputs for Raxml program 358 Example : 359 Returns : parameter string to be passed to Raxml 360 Args : name of calling object 361 362=cut 363 364sub _setparams { 365 my ( $self, $infile ) = @_; 366 my $param_string = ''; 367 368 # If 'model' is not set with '-m' check the alphabet of the input, 369 # then specify the default model 370 if ( !$self->m ) { 371 my $model = 372 ( $self->_alphabet($infile) eq 'dna' ) 373 ? $DEFAULTNTMODEL 374 : $DEFAULTAAMODEL; 375 $self->m($model); 376 } 377 378 # Set default output file if no explicit output file has been given. 379 # Raxml insists that the output file name not contain '/' and its 380 # output directory is set using the '-w' argument. 381 if ( !$self->outfile_name ) { 382 my $dir = getcwd(); 383 $self->w($dir); 384 385 my ( $tfh, $outfile ) = $self->io->tempfile( -dir => $dir ); 386 close($tfh); 387 undef $tfh; 388 $outfile = basename($outfile); 389 $self->outfile_name($outfile); 390 } 391 392 for my $attr (@Raxml_PARAMS) { 393 my $value = $self->$attr(); 394 next unless ( defined $value ); 395 $param_string .= ' -' . $attr . ' ' . $value . ' '; 396 } 397 398 for my $attr (@Raxml_SWITCHES) { 399 my $value = $self->$attr(); 400 next unless ($value); 401 $param_string .= ' -' . $attr . ' '; 402 } 403 404 $param_string .= "-s $infile -n " . $self->outfile_name; 405 406 my $null = File::Spec->devnull(); 407 $param_string .= " > $null 2> $null" 408 if ( $self->quiet() || $self->verbose < 0 ); 409 410 $param_string; 411} 412 413=head1 Bio::Tools::Run::BaseWrapper methods 414 415=cut 416 417=head2 no_param_checks 418 419Title : no_param_checks 420Usage : $obj->no_param_checks($newval) 421Function: Boolean flag as to whether or not we should 422 trust the sanity checks for parameter values 423Returns : value of no_param_checks 424Args : newvalue (optional) 425 426=cut 427 428=head2 save_tempfiles 429 430Title : save_tempfiles 431Usage : $obj->save_tempfiles($newval) 432Function: 433Returns : value of save_tempfiles 434Args : newvalue (optional) 435 436=cut 437 438=head2 outfile_name 439 440Title : outfile_name 441Usage : my $outfile = $Raxml->outfile_name(); 442Function: Get/Set the name of the output file for this run 443 (if you wanted to do something special) 444Returns : string 445Args : [optional] string to set value to 446 447=cut 448 449=head2 tempdir 450 451Title : tempdir 452Usage : my $tmpdir = $self->tempdir(); 453Function: Retrieve a temporary directory name (which is created) 454Returns : string which is the name of the temporary directory 455Args : none 456 457=cut 458 459=head2 cleanup 460 461Title : cleanup 462Usage : $Raxml->cleanup(); 463Function: Will cleanup the tempdir directory 464Returns : none 465Args : none 466 467=cut 468 469=head2 io 470 471Title : io 472Usage : $obj->io($newval) 473Function: Gets a L<Bio::Root::IO> object 474Returns : L<Bio::Root::IO> 475Args : none 476 477=cut 478 4791; # Needed to keep compiler happy 480 481__END__ 482