1#
2# BioPerl module for Bio::Tools::Run::Phylo::Semphy
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Sendu Bala <bix@sendu.me.uk>
7#
8# Copyright Sendu Bala
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Tools::Run::Phylo::Semphy - Wrapper for Semphy
17
18=head1 SYNOPSIS
19
20  use Bio::Tools::Run::Phylo::Semphy;
21
22  # Make a Semphy factory
23  $factory = Bio::Tools::Run::Phylo::Semphy->new();
24
25  # Run Semphy with an alignment
26  my $tree = $factory->run($alignfilename);
27
28  # or with alignment object
29  $tree = $factory->run($bio_simplalign);
30
31  # you can supply an initial tree as well, which can be a newick tree file,
32  # Bio::Tree::Tree object...
33  $tree = $factory->run($bio_simplalign, $tree_obj);
34
35  # ... or Bio::DB::Taxonomy object
36  $tree = $factory->run($bio_simplalign, $bio_db_taxonomy);
37
38  # (mixtures of all the above are possible)
39
40  # $tree isa Bio::Tree::Tree
41
42=head1 DESCRIPTION
43
44This is a wrapper for running the Semphy application by N. Friedman et a.. You
45can get details here: http://compbio.cs.huji.ac.il/semphy/. Semphy is used for
46phylogenetic reconstruction (making a tree with branch lengths from an aligned
47set of input sequences).
48
49You can try supplying normal Semphy command-line arguments to new(), eg.
50new(-hky => 1) or calling arg-named methods (excluding the initial hyphen(s),
51eg. $factory->hky(1) to set the --hky switch to true).
52Note that Semphy args are case-sensitive. To distinguish between Bioperl's
53-verbose and the Semphy's --verbose, you must set Semphy's verbosity with
54-semphy_verbose or the semphy_verbose() method.
55
56
57You will need to enable this Semphy wrapper to find the Semphy program.
58This can be done in (at least) three ways:
59
60 1. Make sure the Semphy executable is in your path.
61 2. Define an environmental variable SEMPHYDIR which is a
62    directory which contains the Semphy application:
63    In bash:
64
65    export SEMPHYDIR=/home/username/semphy/
66
67    In csh/tcsh:
68
69    setenv SEMPHYDIR /home/username/semphy
70
71 3. Include a definition of an environmental variable SEMPHYDIR in
72    every script that will use this Semphy wrapper module, e.g.:
73
74    BEGIN { $ENV{SEMPHYDIR} = '/home/username/semphy/' }
75    use Bio::Tools::Run::Phylo::Semphy;
76
77=head1 FEEDBACK
78
79=head2 Mailing Lists
80
81User feedback is an integral part of the evolution of this and other
82Bioperl modules. Send your comments and suggestions preferably to
83the Bioperl mailing list.  Your participation is much appreciated.
84
85  bioperl-l@bioperl.org                  - General discussion
86  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
87
88=head2 Support
89
90Please direct usage questions or support issues to the mailing list:
91
92I<bioperl-l@bioperl.org>
93
94rather than to the module maintainer directly. Many experienced and
95reponsive experts will be able look at the problem and quickly
96address it. Please include a thorough description of the problem
97with code and data examples if at all possible.
98
99=head2 Reporting Bugs
100
101Report bugs to the Bioperl bug tracking system to help us keep track
102of the bugs and their resolution. Bug reports can be submitted via
103the web:
104
105  http://redmine.open-bio.org/projects/bioperl/
106
107=head1 AUTHOR - Sendu Bala
108
109Email bix@sendu.me.uk
110
111=head1 APPENDIX
112
113The rest of the documentation details each of the object methods.
114Internal methods are usually preceded with a _
115
116=cut
117
118package Bio::Tools::Run::Phylo::Semphy;
119use strict;
120
121use File::Spec;
122use Bio::AlignIO;
123use Bio::TreeIO;
124
125use base qw(Bio::Tools::Run::Phylo::PhyloBase);
126
127our $PROGRAM_NAME = 'semphy';
128our $PROGRAM_DIR = $ENV{'SEMPHYDIR'};
129
130# methods for the semphy args we support
131our %PARAMS   = (outputfile => 'o',
132                 treeoutputfile => 'T',
133                 constraint => 'c',
134                 gaps => 'g',
135                 seed => 'r',
136                 Logfile => 'l',
137                 alphabet => 'a',
138                 ratio => 'z',
139                 ACGprob => 'p',
140                 BPrepeats => 'BPrepeats',
141                 BPconsensus => 'BPconsensus',
142                 SEMPHY => 'S',
143                 modelfile => 'modelfile',
144                 alpha => 'A',
145                 categories => 'C',
146                 semphy_verbose => 'semphy_verbose');
147our %SWITCHES = (homogeneousRatesDTME => 'homogeneousRatesDTME',
148                 NJ => 'J',
149                 pairwiseGammaDTME => 'pairwiseGammaDTME',
150                 commonAlphaDTME => 'commonAlphaDTME',
151                 rate4siteDTME => 'rate4siteDTME',
152                 posteriorDTME => 'posteriorDTME',
153                 BPonUserTree => 'BPonUserTree',
154                 nucjc => 'nucjc',
155                 aaJC => 'aaJC',
156                 k2p => 'k2p',
157                 hky => 'hky',
158                 day => 'day',
159                 jtt => 'jtt',
160                 rev => 'rev',
161                 wag => 'wag',
162                 cprev => 'cprev',
163                 homogeneous => 'H',
164                 optimizeAlpha => 'O',
165                 bbl => 'n',
166                 likelihood => 'L',
167                 PerPosLike => 'P',
168                 PerPosPosterior => 'B',
169                 rate => 'R');
170
171# just to be explicit, args we don't support (yet) or we handle ourselves
172our @UNSUPPORTED = qw(h help full-help s sequence t tree);
173
174
175=head2 program_name
176
177 Title   : program_name
178 Usage   : $factory>program_name()
179 Function: holds the program name
180 Returns : string
181 Args    : None
182
183=cut
184
185sub program_name {
186    return $PROGRAM_NAME;
187}
188
189=head2 program_dir
190
191 Title   : program_dir
192 Usage   : $factory->program_dir(@params)
193 Function: returns the program directory, obtained from ENV variable.
194 Returns : string
195 Args    : None
196
197=cut
198
199sub program_dir {
200    return $PROGRAM_DIR;
201}
202
203=head2 new
204
205 Title   : new
206 Usage   : $factory = Bio::Tools::Run::Phylo::Semphy->new()
207 Function: creates a new Semphy factory
208 Returns : Bio::Tools::Run::Phylo::Semphy
209 Args    : Most options understood by Semphy can be supplied as key =>
210           value pairs, with a true value for switches.
211
212           These options can NOT be used with this wrapper (they are handled
213           internally or don't make sense in this context):
214           -h | --help | --fill-help
215           -s | --sequence
216           -t | --tree
217
218           To distinguish between Bioperl's -verbose and the Semphy's --verbose,
219           you must set Semphy's verbosity with -semphy_verbose
220
221=cut
222
223sub new {
224    my ($class, @args) = @_;
225    my $self = $class->SUPER::new(@args);
226
227    $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
228                                               (map { $_ => $SWITCHES{$_} } keys %SWITCHES),
229                                               quiet => 'quiet'},
230                                  -create => 1,
231                                  -case_sensitive => 1);
232
233    return $self;
234}
235
236=head2 run
237
238 Title   : run
239 Usage   : $result = $factory->run($fasta_align_file);
240           -or-
241           $result = $factory->run($align_object);
242           -or-
243           $result = $factory->run($fasta_align_file, $newick_tree_file);
244           -or-
245           $result = $factory->run($align_object, $tree_object);
246           -or-
247           $result = $factory->run($align_object, $db_taxonomy_object);
248 Function: Runs Semphy on an alignment.
249 Returns : Bio::Tree::Tree
250 Args    : The first argument represents an alignment, the second (optional)
251           argument a species tree (to set an initial tree: normally the -t
252           option to Semphy).
253           The alignment can be provided as a multi-fasta format alignment
254           filename, or a Bio::Align::AlignI compliant object (eg. a
255           Bio::SimpleAlign).
256           The species tree can be provided as a newick format tree filename
257           or a Bio::Tree::TreeI compliant object. Alternatively a
258           Bio::DB::Taxonomy object can be supplied, in which case the species
259           tree will be generated by using the alignment sequence names as
260           species names and looking for those in the supplied database.
261
262           In all cases where an initial tree was supplied, the alignment
263           sequence names must correspond to node ids in the species tree.
264
265=cut
266
267sub run {
268    my ($self, $aln, $tree) = @_;
269
270    $aln || $self->throw("alignment must be supplied");
271    $self->_alignment($aln);
272
273    if ($tree) {
274        $self->_tree($tree);
275
276        # check node and seq names match
277        $self->_check_names;
278    }
279
280    return $self->_run;
281}
282
283sub _run {
284    my $self = shift;
285
286    my $exe = $self->executable || return;
287
288    my $aln_file = $self->_write_alignment;
289
290    # generate a semphy-friendly tree file
291    my $tree = $self->_tree;
292    my $tree_file = '';
293    if ($tree) {
294        $tree = $self->_write_tree;
295    }
296
297    unless ($self->T) {
298        my ($tfh, $tempfile) = $self->io->tempfile(-dir => $self->tempdir);
299        $self->T($tempfile);
300        close($tfh);
301    }
302
303    my $command = $exe.$self->_setparams($aln_file, $tree_file);
304    $self->debug("semphy command = $command\n");
305
306    open(my $pipe, "$command |") || $self->throw("semphy call ($command) failed to start: $? | $!");
307    my $error = '';
308    while (<$pipe>) {
309        print unless $self->quiet;
310        $error .= $_;
311    }
312    close($pipe) || ($error ? $self->warn("semphy call ($command) failed: $error") : $self->throw("semphy call ($command) crashed: $?"));
313
314    my $result_file = $self->T();
315    my $tio = Bio::TreeIO->new(-format => 'newick', -file => $result_file);
316    my $result_tree = $tio->next_tree;
317
318    return $result_tree;
319}
320
321=head2 _setparams
322
323 Title   : _setparams
324 Usage   : Internal function, not to be called directly
325 Function: Creates a string of params to be used in the command string
326 Returns : string of params
327 Args    : alignment and tree file names
328
329=cut
330
331sub _setparams {
332    my ($self, $aln_file, $tree_file) = @_;
333
334    my $param_string = ' -s '.$aln_file;
335    $param_string .= ' -t '.$tree_file if $tree_file;
336
337    my %methods = map { $_ => $_ } keys %PARAMS;
338    $methods{'semphy_verbose'} = 'verbose';
339    $param_string .= $self->SUPER::_setparams(-params => \%methods,
340                                              -switches => [keys %SWITCHES],
341                                              -double_dash => 1);
342
343    $param_string .= ' 2>&1';
344    my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
345    $param_string .= " 1>$null" if $self->quiet;
346
347    return $param_string;
348}
349
3501;
351