1# $Id$
2#
3# BioPerl module for Bio::Tools::Run::Phylo::Phast::PhyloFit
4#
5# Please direct questions and support issues to <bioperl-l@bioperl.org>
6#
7# Cared for by Sendu Bala <bix@sendu.me.uk>
8#
9# Copyright Sendu Bala
10#
11# You may distribute this module under the same terms as perl itself
12
13# POD documentation - main docs before the code
14
15=head1 NAME
16
17Bio::Tools::Run::Phylo::Phast::PhyloFit - Wrapper for phyloFit
18
19=head1 SYNOPSIS
20
21  use Bio::Tools::Run::Phylo::Phast::PhyloFit;
22
23  # Make a PhyloFit factory
24  $factory = Bio::Tools::Run::Phylo::Phast::PhastCons->new();
25
26  # Generate an init.mod file for use by phastCons
27  my $init_file = $factory->run($alignment, $tree);
28
29=head1 DESCRIPTION
30
31This is a wrapper for running the phyloFit application by Adam Siepel. You
32can get details here: http://compgen.bscb.cornell.edu/~acs/software.html
33
34Currently the interface is extremely simplified. Only the --tree form of usage
35is allowed (not --init-model), which means a tree must be supplied with the
36alignment (to run()). You can try supplying normal phyloFit arguments to new(),
37or calling arg-named methods (excluding initial hyphens and converting others
38to underscores, eg. $factory-E<gt>gaps_as_bases(1) to set the --gaps-as-bases arg).
39
40WARNING: the API may change in the future to allow for greater flexability and
41access to more phyloFit features.
42
43
44You will need to enable this PhyloFit wrapper to find the phast programs (at
45least phyloFit itself).
46This can be done in (at least) three ways:
47
48 1. Make sure the phyloFit executable is in your path.
49 2. Define an environmental variable PHASTDIR which is a
50    directory which contains the phyloFit application:
51    In bash:
52
53    export PHASTDIR=/home/username/phast/bin
54
55    In csh/tcsh:
56
57    setenv PHASTDIR /home/username/phast/bin
58
59 3. Include a definition of an environmental variable PHASTDIR in
60    every script that will use this PhyloFit wrapper module, e.g.:
61
62    BEGIN { $ENV{PHASTDIR} = '/home/username/phast/bin' }
63    use Bio::Tools::Run::Phylo::Phast::PhyloFit;
64
65=head1 FEEDBACK
66
67=head2 Mailing Lists
68
69User feedback is an integral part of the evolution of this and other
70Bioperl modules. Send your comments and suggestions preferably to
71the Bioperl mailing list.  Your participation is much appreciated.
72
73  bioperl-l@bioperl.org                  - General discussion
74  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
75
76=head2 Support
77
78Please direct usage questions or support issues to the mailing list:
79
80I<bioperl-l@bioperl.org>
81
82rather than to the module maintainer directly. Many experienced and
83reponsive experts will be able look at the problem and quickly
84address it. Please include a thorough description of the problem
85with code and data examples if at all possible.
86
87=head2 Reporting Bugs
88
89Report bugs to the Bioperl bug tracking system to help us keep track
90of the bugs and their resolution. Bug reports can be submitted via
91the web:
92
93  http://redmine.open-bio.org/projects/bioperl/
94
95=head1 AUTHOR - Sendu Bala
96
97Email bix@sendu.me.uk
98
99=head1 APPENDIX
100
101The rest of the documentation details each of the object methods.
102Internal methods are usually preceded with a _
103
104=cut
105
106package Bio::Tools::Run::Phylo::Phast::PhyloFit;
107use strict;
108
109use Cwd;
110use File::Spec;
111use Bio::AlignIO;
112use Bio::TreeIO;
113
114use base qw(Bio::Tools::Run::Phylo::PhyloBase);
115
116our $PROGRAM_NAME = 'phyloFit';
117our $PROGRAM_DIR = $ENV{'PHASTDIR'};
118
119# methods and their synonyms from the phastCons args we support
120our %PARAMS   = (subst_mod => 's',
121                 min_informative => 'I',
122                 precision => 'p',
123                 log => 'l',
124                 ancestor => 'A',
125                 nrates => 'k',
126                 alpha => 'a',
127                 rate_constants => 'K',
128                 features => 'g',
129                 catmap => 'c',
130                 do_cats => 'C',
131                 reverse_groups => 'R');
132
133our %SWITCHES = (gaps_as_bases => 'G',
134                 quiet => 'q',
135                 EM => 'E',
136                 init_random => 'r',
137                 estimate_freqs => 'F',
138                 markov => 'N',
139                 non_overlapping => 'V');
140
141# just to be explicit, args we don't support (yet) or we handle ourselves
142our %UNSUPPORTED = (msa_format => 'i',
143                    out_root => 'o',
144                    tree => 't',
145                    help => 'h',
146                    lnl => 'L',
147                    init_model => 'M',
148                    scale_only => 'B',
149                    scale_subtree => 'S',
150                    no_freqs => 'f',
151                    no_rates => 'n',
152                    post_probs => 'P',
153                    expected_subs => 'X',
154                    expected_total_subs => 'Z',
155                    column_probs => 'U',
156                    windows => 'w',
157                    windows_explicit => 'v');
158
159=head2 program_name
160
161 Title   : program_name
162 Usage   : $factory>program_name()
163 Function: holds the program name
164 Returns : string
165 Args    : None
166
167=cut
168
169sub program_name {
170    return $PROGRAM_NAME;
171}
172
173=head2 program_dir
174
175 Title   : program_dir
176 Usage   : $factory->program_dir(@params)
177 Function: returns the program directory, obtained from ENV variable.
178 Returns : string
179 Args    : None
180
181=cut
182
183sub program_dir {
184    return $PROGRAM_DIR;
185}
186
187=head2 new
188
189 Title   : new
190 Usage   : $factory = Bio::Tools::Run::Phylo::Phast::PhyloFit->new()
191 Function: creates a new PhyloFit factory
192 Returns : Bio::Tools::Run::Phylo::Phast::PhyloFit
193 Args    : Most options understood by phastCons can be supplied as key =>
194           value pairs. Options that don't normally take a value
195           should be given a value of 1. You can type the keys as you would on
196           the command line (eg. '--gaps-as-bases' => 1) or with only a single
197           hyphen to start and internal hyphens converted to underscores (eg.
198           -gaps_as_bases => 1) to avoid having to quote the key.
199
200           These options can NOT be used with this wrapper currently:
201           msa_format / i
202           out_root / o
203           tree / t
204           help / h
205           lnl / L
206           init_model / M
207           scale_only / B
208           scale_subtree / S
209           no_freqs / f
210           no_rates / n
211           post_probs / P
212           expected_subs / X
213           expected_total_subs / Z
214           column_probs / U
215           windows / w
216           windows_explicit / v
217
218=cut
219
220sub new {
221    my ($class, @args) = @_;
222    my $self = $class->SUPER::new(@args);
223
224    $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS),
225                                               (map { $_ => $SWITCHES{$_} } keys %SWITCHES)},
226                                  -create => 1);
227
228    return $self;
229}
230
231=head2 run
232
233 Title   : run
234 Usage   : $result = $factory->run($fasta_align_file, $newick_tree_file);
235           -or-
236           $result = $factory->run($align_object, $tree_object);
237           -or-
238           $result = $factory->run($align_object, $db_taxonomy_object);
239 Function: Runs phyloFit on an alignment.
240 Returns : filename of init.mod file produced
241 Args    : The first argument represents an alignment, the second argument
242           a species tree.
243           The alignment can be provided as a multi-fasta format alignment
244           filename, or a Bio::Align::AlignI compliant object (eg. a
245           Bio::SimpleAlign).
246           The species tree can be provided as a newick format tree filename
247           or a Bio::Tree::TreeI compliant object. Alternatively a
248           Bio::DB::Taxonomy object can be supplied, in which case the species
249           tree will be generated by using the alignment sequence names as
250           species names and looking for those in the supplied database.
251
252           In all cases, the alignment sequence names must correspond to node
253           ids in the species tree. Multi-word species names should be joined
254           with underscores to form the sequence names, eg. Homo_sapiens
255
256=cut
257
258sub run {
259    my ($self, $aln, $tree) = @_;
260
261    ($aln && $tree) || $self->throw("alignment and tree must be supplied");
262    $self->_alignment($aln);
263    $tree = $self->_tree($tree);
264
265    $tree->force_binary;
266
267    # adjust tree node ids to convert spaces to underscores (eg. if tree
268    # generated from taxonomy)
269    foreach my $node ($tree->get_leaf_nodes) {
270        my $id = $node->id;
271        $id =~ s/ /_/g;
272        $node->id($id);
273    }
274
275    # check node and seq names match
276    $self->_check_names;
277
278    return $self->_run;
279}
280
281sub _run {
282    my $self = shift;
283
284    my $exe = $self->executable || return;
285
286    # cd to a temp dir
287    my $temp_dir = $self->tempdir;
288    my $cwd = Cwd->cwd();
289    chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
290
291    my $aln_file = $self->_write_alignment;
292    my $tree_file = $self->_write_tree;
293
294    #...phyloFit --tree "(human,(mouse,rat))" --msa-format FASTA --out-root init alignment.fa
295    my $command = $exe.$self->_setparams($aln_file, $tree_file);
296    $self->debug("phyloFit command = $command\n");
297    system($command) && $self->throw("phyloFit call ($command) crashed: $?");
298
299    # cd back again
300    chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
301
302    return File::Spec->catfile($temp_dir, 'init.mod');
303}
304
305=head2 _setparams
306
307 Title   : _setparams
308 Usage   : Internal function, not to be called directly
309 Function: Creates a string of params to be used in the command string
310 Returns : string of params
311 Args    : alignment and tree file names
312
313=cut
314
315sub _setparams {
316    my ($self, $aln_file, $tree_file) = @_;
317
318    my $param_string = ' --tree '.$tree_file;
319    $param_string .= ' --msa-format FASTA';
320    $param_string .= ' --out-root init';
321
322    # --min-informative defaults to 50, but must not be greater than the number
323    # of bases in the alignment
324    my $aln = $self->_alignment;
325    my $length = $aln->length;
326    my $min_informative = $self->min_informative || 50;
327    if ($length < $min_informative) {
328        $self->min_informative($length);
329    }
330
331    $param_string .= $self->SUPER::_setparams(-params => [keys %PARAMS],
332                                              -switches => [keys %SWITCHES],
333                                              -double_dash => 1,
334                                              -underscore_to_dash => 1);
335    $param_string .= ' '.$aln_file;
336
337    return $param_string;
338}
339
3401;
341