1#
2# BioPerl module for Bio::Tools::Run::Phylo::Gerp
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Sendu Bala <bix@sendu.me.uk>
7#
8# Copyright Sendu Bala
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14=head1 NAME
15
16Bio::Tools::Run::Gerp - Wrapper for GERP
17
18=head1 SYNOPSIS
19
20  use Bio::Tools::Run::Phylo::Gerp;
21
22  # Make a Gerp factory
23  $factory = Bio::Tools::Run::Phylo::Gerp->new();
24
25  # Run Gerp with an alignment and tree file
26  my $parser = $factory->run($alignfilename, $treefilename);
27
28  # or with alignment object and tree object (which needs branch lengths)
29  $parser = $factory->run($bio_simplalign, $bio_tree_tree);
30
31  # (mixtures of the above are possible)
32
33  # look at the results
34  while (my $feat = $parser->next_result) {
35    my $start = $feat->start;
36    my $end = $feat->end;
37    my $rs_score = $feat->score;
38    my $p_value = ($feat->annotation->get_Annotations('p-value'))[0]->value;
39  }
40
41=head1 DESCRIPTION
42
43This is a wrapper for running the GERP (v2) programs 'gerpcol' and 'gerpelem' by
44Eugene Davydov (originally Gregory M. Cooper et al.). You can get details here:
45http://mendel.stanford.edu/sidowlab/. GERP can be used for phylogenetic
46footprinting/ shadowing (it finds 'constrained elements in multiple
47alignments').
48
49You can try supplying normal gerpcol/gerpelem command-line arguments to new(),
50eg. $factory-E<gt>new(-e =E<gt> 0.05) or calling arg-named methods, eg.
51$factory-E<gt>e(0.05). The filename-related args (t, f, x) are handled internally
52by the run() method. This wrapper currently only supports running GERP on a
53single alignment at a time (ie. F isn't used at all, nor are multiple fs
54possible).
55
56
57You will need to enable this GERP wrapper to find the GERP executables.
58This can be done in (at least) three ways:
59
60 1. Make sure gerpcol and gerpelem are in your path.
61 2. Define an environmental variable GERPDIR which is a
62    directory which contains the GERP executables:
63    In bash:
64
65    export GERPDIR=/home/username/gerp/
66
67    In csh/tcsh:
68
69    setenv GERPDIR /home/username/gerp
70
71 3. Include a definition of an environmental variable GERPDIR in
72    every script that will use this GERP wrapper module, e.g.:
73
74    BEGIN { $ENV{GERPDIR} = '/home/username/gerp/' }
75    use Bio::Tools::Run::Phylo::Gerp;
76
77=head1 FEEDBACK
78
79=head2 Mailing Lists
80
81User feedback is an integral part of the evolution of this and other
82Bioperl modules. Send your comments and suggestions preferably to
83the Bioperl mailing list.  Your participation is much appreciated.
84
85  bioperl-l@bioperl.org                  - General discussion
86  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
87
88=head2 Support
89
90Please direct usage questions or support issues to the mailing list:
91
92I<bioperl-l@bioperl.org>
93
94rather than to the module maintainer directly. Many experienced and
95reponsive experts will be able look at the problem and quickly
96address it. Please include a thorough description of the problem
97with code and data examples if at all possible.
98
99=head2 Reporting Bugs
100
101Report bugs to the Bioperl bug tracking system to help us keep track
102of the bugs and their resolution. Bug reports can be submitted via
103the web:
104
105  http://redmine.open-bio.org/projects/bioperl/
106
107=head1 AUTHOR - Sendu Bala
108
109Email bix@sendu.me.uk
110
111=head1 APPENDIX
112
113The rest of the documentation details each of the object methods.
114Internal methods are usually preceded with a _
115
116=cut
117
118package Bio::Tools::Run::Phylo::Gerp;
119use strict;
120
121use Cwd;
122use File::Spec;
123use File::Basename;
124use Bio::AlignIO;
125use Bio::TreeIO;
126use Bio::Tools::Phylo::Gerp;
127
128use base qw(Bio::Tools::Run::Phylo::PhyloBase);
129
130our $PROGRAM_NAME = 'gerpcol';
131our $PROGRAM_DIR;
132
133# methods for the gerp args we support
134our @COLPARAMS   = qw(r n s);
135our @ELEMPARAMS  = qw(l L t d p b a c r e);
136our @SWITCHES    = qw(v);
137
138# just to be explicit, args we don't support (yet) or we handle ourselves
139our @UNSUPPORTED = qw(h t f F x);
140
141BEGIN {
142    # lets add all the gerp executables to the path
143    $PROGRAM_DIR = $ENV{'GERPDIR'};
144    $ENV{PATH} = "$PROGRAM_DIR:$ENV{PATH}" if $PROGRAM_DIR;
145}
146
147=head2 program_name
148
149 Title   : program_name
150 Usage   : $factory>program_name()
151 Function: holds the program name
152 Returns : string
153 Args    : None
154
155=cut
156
157sub program_name {
158    my $self = shift;
159    if (@_) { $self->{program_name} = shift }
160    return $self->{program_name} || $PROGRAM_NAME;
161}
162
163=head2 program_dir
164
165 Title   : program_dir
166 Usage   : $factory->program_dir(@params)
167 Function: returns the program directory, obtained from ENV variable.
168 Returns : string
169 Args    : None
170
171=cut
172
173sub program_dir {
174    return $PROGRAM_DIR;
175}
176
177=head2 new
178
179 Title   : new
180 Usage   : $factory = Bio::Tools::Run::Phylo::Gerp->new()
181 Function: creates a new GERP factory
182 Returns : Bio::Tools::Run::Phylo::Gerp
183 Args    : Most options understood by GERP can be supplied as key =>
184           value pairs.
185
186           These options can NOT be used with this wrapper:
187           h, t, f, F and x
188
189=cut
190
191sub new {
192    my ($class, @args) = @_;
193    my $self = $class->SUPER::new(@args);
194
195    $self->_set_from_args(\@args, -methods => [@COLPARAMS, @ELEMPARAMS,
196                                               @SWITCHES, 'quiet'],
197                                  -create => 1);
198
199    return $self;
200}
201
202=head2 run
203
204 Title   : run
205 Usage   : $parser = $factory->run($align_file, $tree_file);
206           -or-
207           $parser = $factory->run($align_object, $tree_object);
208 Function: Runs GERP on an alignment.
209 Returns : Bio::Tools::Phylo::Gerp parser object, containing the results
210 Args    : The first argument represents an alignment, the second argument
211           a phylogenetic tree with branch lengths.
212           The alignment can be provided as a MAF format alignment
213           filename, or a Bio::Align::AlignI compliant object (eg. a
214           Bio::SimpleAlign).
215           The species tree can be provided as a newick format tree filename
216           or a Bio::Tree::TreeI compliant object.
217
218           In all cases, the alignment sequence names must correspond to node
219           ids in the tree. Multi-word species names should have the
220           spaces replaced with underscores (eg. Homo_sapiens)
221
222=cut
223
224sub run {
225    my ($self, $aln, $tree) = @_;
226    $self->_alignment($aln || $self->throw("An alignment must be supplied"));
227    $self->_tree($tree || $self->throw("A phylo tree must be supplied"));
228
229    # check node and seq names match
230    $self->_check_names;
231
232    return $self->_run;
233}
234
235sub _run {
236    my $self = shift;
237
238    $self->executable || return;
239
240    # cd to a temp dir
241    my $temp_dir = $self->tempdir;
242    my $cwd = Cwd->cwd();
243    chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
244
245    foreach my $prog ('gerpcol', 'gerpelem') {
246        delete $self->{'_pathtoexe'};
247        $self->program_name($prog);
248        my $exe = $self->executable || $self->throw("'$prog' executable not found");
249
250        my $command = $exe.$self->_setparams($prog);
251        $self->debug("gerp command = $command\n");
252
253        #eval {
254        #    local $SIG{ALRM} = sub { die "alarm\n" };
255        #    alarm 60;
256        #    system($command) && $self->throw("gerp call ($command) failed: $! | $?");
257        #    alarm 0;
258        #};
259        #die if $@ && $@ ne "alarm\n";
260        #if ($@) {
261        #    die "Gerp timed out\n";
262        #}
263        #
264        # system("rm -fr $cwd/gerp_dir; cp -R $temp_dir $cwd/gerp_dir");
265
266        open(my $pipe, "$command |") || $self->throw("gerp call ($command) failed to start: $? | $!");
267        my $error = '';
268        my $warning = '';
269        while (<$pipe>) {
270            if ($self->quiet) {
271                $error .= $_;
272                $warning .= $_ if /warning/i;
273            }
274            else {
275                print;
276            }
277        }
278        close($pipe) || ($error ? $self->throw("gerp call ($command) failed: $error") : $self->throw("gerp call ($command) crashed: $?"));
279
280        # (throws most likely due to seg fault in gerpelem when ~25000 entries
281        #  in rates file, not much I can do about it!)
282
283        $self->warn("GERP: ".$warning) if $warning;
284    }
285
286    #system("rm -fr $cwd/gerp_dir; cp -R $temp_dir $cwd/gerp_dir");
287
288    my $result_file = $self->{align_base}.'.rates.elems';
289    my $parser = Bio::Tools::Phylo::Gerp->new(-file => $result_file);
290
291    # cd back again
292    chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
293
294    return $parser;
295}
296
297=head2 _setparams
298
299 Title   : _setparams
300 Usage   : Internal function, not to be called directly
301 Function: Creates a string of params to be used in the command string
302 Returns : string of params
303 Args    : none
304
305=cut
306
307sub _setparams {
308    my ($self, $prog) = @_;
309
310    my $param_string;
311    if ($prog eq 'gerpcol') {
312        my $align_file = $self->_write_alignment;
313        $param_string .= ' -f '.$align_file;
314        $self->{align_base} = basename($align_file);
315        $param_string .= ' -t '.$self->_write_tree;
316        $param_string .= $self->SUPER::_setparams(-params => \@COLPARAMS,
317                                                  -switches => \@SWITCHES,
318                                                  -dash => 1);
319    }
320    else {
321        $param_string .= ' -f '.$self->{align_base}.'.rates';
322        $param_string .= $self->SUPER::_setparams(-params => \@ELEMPARAMS,
323                                                  -switches => \@SWITCHES,
324                                                  -dash => 1);
325    }
326
327    $param_string .= " 2>&1";
328
329    return $param_string;
330}
331
3321;
333