1#
2# BioPerl module for Bio::Tools::Run::Phylo::Raxml
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Copyright Brian Osborne
7#
8# You may distribute this module under the same terms as perl itself
9#
10# POD documentation - main docs before the code
11
12=head1 NAME
13
14Bio::Tools::Run::Phylo::Raxml
15
16=head1 SYNOPSIS
17
18  # Build a Raxml factory
19  $factory = Bio::Tools::Run::Phylo::Raxml->new(-p  => 100);
20
21  # Get an alignment
22  my $alignio = Bio::AlignIO->new(
23        -format => 'fasta',
24        -file   => '219877.cdna.fasta');
25  my $alnobj = $alignio->next_aln;
26
27  # Analyze the aligment and get a Tree
28  my $tree = $factory->run($alnobj);
29
30=head1 DESCRIPTION
31
32Get a Bio::Tree object using raxml given a protein or DNA alignment.
33
34=head1 FEEDBACK
35
36=head2 Mailing Lists
37
38User feedback is an integral part of the evolution of this and other
39Bioperl modules. Send your comments and suggestions preferably to one
40of the Bioperl mailing lists.  Your participation is much appreciated.
41
42  bioperl-l@bioperl.org                  - General discussion
43  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
44
45=head2 Support
46
47Please direct usage questions or support issues to the mailing list:
48
49I<bioperl-l@bioperl.org>
50
51Do not contact the module maintainer directly. Many experienced experts
52at bioperl-l will be able look at the problem and quickly
53address it. Please include a thorough description of the problem
54with code and data examples if at all possible.
55
56=head2 Reporting Bugs
57
58Report bugs to the Bioperl bug tracking system to help us keep track
59the bugs and their resolution.  Bug reports can be submitted via the web:
60
61 http://redmine.open-bio.org/projects/bioperl/
62
63=head1 AUTHOR -  Brian Osborne
64
65Email briano@bioteam.net
66
67=head1 APPENDIX
68
69The rest of the documentation details each of the object
70methods. Internal methods are usually preceded with a _
71
72=cut
73
74package Bio::Tools::Run::Phylo::Raxml;
75
76use strict;
77use File::Basename;
78use File::Spec;
79use Bio::Seq;
80use Bio::SeqIO;
81use Bio::TreeIO;
82use Bio::AlignIO;
83use Bio::Root::IO;
84use Cwd;
85
86use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
87
88our @Raxml_PARAMS =
89  qw(s n m a A b B c e E f g G i I J o p P q r R S t T w W x z N);
90our @Raxml_SWITCHES =
91  qw(SSE3 PTHREADS PTHREADS-SSE3 HYBRID HYBRID-SSE3 F h k K M j U v X y C d D);
92our $PROGRAM_NAME = 'raxml';
93
94# Specify some model if none is specified
95my $DEFAULTAAMODEL = 'PROTCATDAYHOFF';
96my $DEFAULTNTMODEL = 'GTRCAT';
97
98=head2 new
99
100 Title   : new
101 Usage   : my $treebuilder = Bio::Tools::Run::Phylo::Raxml->new();
102 Function: Constructor
103 Returns : Bio::Tools::Run::Phylo::Raxml
104 Args    : Same as those used to run raxml. For example:
105
106 $factory = Bio::Tools::Run::Phylo::Raxml->new(-p  => 100, -SSE3 => 1)
107
108=cut
109
110sub new {
111    my ( $class, @args ) = @_;
112    my $self = $class->SUPER::new(@args);
113
114    $self->_set_from_args(
115        \@args,
116        -case_sensitive => 1,
117        -methods => [ @Raxml_PARAMS, @Raxml_SWITCHES ],
118        -create  => 1
119    );
120
121    my ($out,$quiet) = $self->SUPER::_rearrange( [qw(OUTFILE_NAME QUIET)], @args );
122
123    $self->outfile_name( $out || '' );
124    $self->quiet( $quiet ) if $quiet;
125
126    $self;
127}
128
129=head2 program_name
130
131 Title   : program_name
132 Usage   : $factory->program_name()
133 Function: holds the program name
134 Returns:  string
135 Args    : None
136
137=cut
138
139sub program_name {
140    $PROGRAM_NAME;
141}
142
143=head2 program_dir
144
145 Title   : program_dir
146 Usage   : $factory->program_dir(@params)
147 Function: returns the program directory
148 Returns:  string
149 Args    :
150
151=cut
152
153sub program_dir {
154    undef;
155}
156
157=head2 error_string
158
159 Title   : error_string
160 Usage   : $obj->error_string($newval)
161 Function: Where the output from the last analysus run is stored.
162 Returns : value of error_string
163 Args    : newvalue (optional)
164
165=cut
166
167sub error_string {
168    my ( $self, $value ) = @_;
169
170    $self->{'error_string'} = $value if ( defined $value );
171    $self->{'error_string'};
172}
173
174=head2  version
175
176 Title   : version
177 Usage   : exit if $prog->version() < 1.8
178 Function: Determine the version number of the program
179 Example :
180 Returns : float or undef
181 Args    : none
182
183=cut
184
185sub version {
186    my ($self) = @_;
187    my $exe;
188
189    return undef unless $exe = $self->executable;
190    my $string = `$exe -v 2>&1`;
191
192    $string =~ /raxml\s+version\s+([\d\.]+)/i;
193    return $1 || undef;
194}
195
196=head2  quiet
197
198 Title   : quiet
199 Usage   :
200 Function: get or set value for 'quiet'
201 Example :
202 Returns :
203 Args    : the value
204
205=cut
206
207sub quiet {
208    my ( $self, $value ) = @_;
209
210    $self->{'_quiet'} = $value if ( defined $value );
211    $self->{'_quiet'};
212}
213
214=head2 run
215
216 Title   : run
217 Usage   : $factory->run($stockholm_file) OR
218           $factory->run($align_object)
219 Function: Runs Raxml to generate a tree
220 Returns : Bio::Tree::Tree object
221 Args    : File name for your input alignment in stockholm format, OR
222           Bio::Align::AlignI compliant object (eg. Bio::SimpleAlign).
223
224=cut
225
226sub run {
227    my ($self, $in) = @_;
228
229    if (ref $in && $in->isa("Bio::Align::AlignI")) {
230        $in = $self->_write_alignfile($in);
231    }
232    elsif (! -e $in) {
233        $self->throw("When not supplying a Bio::Align::AlignI object, you must supply a readable filename");
234    }
235
236    $self->_run($in);
237}
238
239=head2 _run
240
241 Title   : _run
242 Usage   : Internal function, not to be called directly
243 Function: Runs the application
244 Returns : Tree object
245 Args    : Alignment file name
246
247=cut
248
249sub _run {
250    my ( $self, $file ) = @_;
251
252    my $exe       = $self->executable || return;
253    my $param_str = $self->arguments . " " . $self->_setparams($file);
254    my $command   = "$exe $param_str";
255    $self->debug("Raxml command = $command");
256
257    my $status  = system($command);
258
259    # raxml creates tree files with names like "RAxML_bestTree.ABDBxjjdfg3"
260    # if rapid bootstrapping was enabled, also a tree with RAxML_bipartitions.ABDBxjjdfg3
261    # with support values is created, which then should be returned
262    my $outfile = $self->f() eq 'a' ? 'RAxML_bipartitions.' : 'RAxML_bestTree.';
263    $outfile .= $self->outfile_name;
264
265    $outfile = File::Spec->catfile( ($self->w), $outfile ) if $self->w;
266
267    if ( !-e $outfile || -z $outfile ) {
268        $self->warn("Raxml call had status of $status: $? [command $command] \n");
269        return undef;
270    }
271
272    my $treeio = Bio::TreeIO->new( -file => $outfile );
273    my $tree = $treeio->next_tree;
274
275# if bootstraps were enabled, the bootstraps are the ids; convert to
276# bootstrap and no id
277# if ($self->boot) {
278#     my @nodes = $tree->get_nodes;
279#     my %non_internal = map { $_ => 1 } ($tree->get_leaf_nodes, $tree->get_root_node);
280#     foreach my $node (@nodes) {
281#         next if exists $non_internal{$node};
282#         $node->bootstrap && next; # protect ourselves incase the parser improves
283#         $node->bootstrap($node->id);
284#         $node->id('');
285#     }
286# }
287
288    $tree;
289}
290
291=head2 _write_alignfile
292
293 Title   : _write_alignfile
294 Usage   : Internal function, not to be called directly
295 Function: Create an alignment file
296 Returns : filename
297 Args    : Bio::Align::AlignI
298
299=cut
300
301sub _write_alignfile {
302    my ( $self, $align ) = @_;
303
304    my ( $tfh, $tempfile ) = $self->io->tempfile( -dir => '.' );
305
306    my $out = Bio::AlignIO->new(
307        -file   => ">$tempfile",
308        -format => 'phylip'
309    );
310    $out->write_aln($align);
311    $out->close();
312    undef($out);
313    close($tfh);
314    undef($tfh);
315
316    die "Alignment file $tempfile was not created" if ( ! -e $tempfile );
317
318    $tempfile;
319}
320
321=head2 _alphabet
322
323 Title   : _alphabet
324 Usage   : my $alphabet = $self->_alphabet;
325 Function: Get the alphabet of the input alignment, defaults to 'dna'
326 Returns : 'dna' or 'protein'
327 Args    : Alignment file
328
329=cut
330
331sub _alphabet {
332    my ( $self, $file ) = @_;
333
334    if ($file) {
335        if ( -e $file ) {
336            my $in = Bio::AlignIO->new( -file => $file );
337            my $aln = $in->next_aln;
338
339            # arbitrary, the first one
340            my $seq      = $aln->get_seq_by_pos(1);
341            my $alphabet = $seq->alphabet;
342            $self->{_alphabet} = $alphabet;
343        }
344        else {
345            die "File $file can not be found";
346        }
347    }
348
349    # default is 'dna'
350    return $self->{'_alphabet'} || 'dna';
351}
352
353=head2  _setparams
354
355 Title   :  _setparams
356 Usage   :  Internal function, not to be called directly
357 Function:  Create parameter inputs for Raxml program
358 Example :
359 Returns : parameter string to be passed to Raxml
360 Args    : name of calling object
361
362=cut
363
364sub _setparams {
365    my ( $self, $infile ) = @_;
366    my $param_string = '';
367
368    # If 'model' is not set with '-m' check the alphabet of the input,
369    # then specify the default model
370    if ( !$self->m ) {
371        my $model =
372          ( $self->_alphabet($infile) eq 'dna' )
373          ? $DEFAULTNTMODEL
374          : $DEFAULTAAMODEL;
375        $self->m($model);
376    }
377
378    # Set default output file if no explicit output file has been given.
379    # Raxml insists that the output file name not contain '/' and its
380    # output directory is set using the '-w' argument.
381    if ( !$self->outfile_name ) {
382        my $dir = getcwd();
383        $self->w($dir);
384
385        my ( $tfh, $outfile ) = $self->io->tempfile( -dir => $dir );
386        close($tfh);
387        undef $tfh;
388        $outfile = basename($outfile);
389        $self->outfile_name($outfile);
390    }
391
392    for my $attr (@Raxml_PARAMS) {
393        my $value = $self->$attr();
394        next unless ( defined $value );
395        $param_string .= ' -' . $attr . ' ' . $value . ' ';
396    }
397
398    for my $attr (@Raxml_SWITCHES) {
399        my $value = $self->$attr();
400        next unless ($value);
401        $param_string .= ' -' . $attr . ' ';
402    }
403
404    $param_string .= "-s $infile -n " . $self->outfile_name;
405
406    my $null = File::Spec->devnull();
407    $param_string .= " > $null 2> $null"
408      if ( $self->quiet() || $self->verbose < 0 );
409
410    $param_string;
411}
412
413=head1 Bio::Tools::Run::BaseWrapper methods
414
415=cut
416
417=head2 no_param_checks
418
419Title   : no_param_checks
420Usage   : $obj->no_param_checks($newval)
421Function: Boolean flag as to whether or not we should
422          trust the sanity checks for parameter values
423Returns : value of no_param_checks
424Args    : newvalue (optional)
425
426=cut
427
428=head2 save_tempfiles
429
430Title   : save_tempfiles
431Usage   : $obj->save_tempfiles($newval)
432Function:
433Returns : value of save_tempfiles
434Args    : newvalue (optional)
435
436=cut
437
438=head2 outfile_name
439
440Title   : outfile_name
441Usage   : my $outfile = $Raxml->outfile_name();
442Function: Get/Set the name of the output file for this run
443           (if you wanted to do something special)
444Returns : string
445Args    : [optional] string to set value to
446
447=cut
448
449=head2 tempdir
450
451Title   : tempdir
452Usage   : my $tmpdir = $self->tempdir();
453Function: Retrieve a temporary directory name (which is created)
454Returns : string which is the name of the temporary directory
455Args    : none
456
457=cut
458
459=head2 cleanup
460
461Title   : cleanup
462Usage   : $Raxml->cleanup();
463Function: Will cleanup the tempdir directory
464Returns : none
465Args    : none
466
467=cut
468
469=head2 io
470
471Title   : io
472Usage   : $obj->io($newval)
473Function:  Gets a L<Bio::Root::IO> object
474Returns : L<Bio::Root::IO>
475Args    : none
476
477=cut
478
4791;    # Needed to keep compiler happy
480
481__END__
482