1# 2# BioPerl module for Bio::Tools::Run::Phylo::Gerp 3# 4# Please direct questions and support issues to <bioperl-l@bioperl.org> 5# 6# Cared for by Sendu Bala <bix@sendu.me.uk> 7# 8# Copyright Sendu Bala 9# 10# You may distribute this module under the same terms as perl itself 11 12# POD documentation - main docs before the code 13 14=head1 NAME 15 16Bio::Tools::Run::Gerp - Wrapper for GERP 17 18=head1 SYNOPSIS 19 20 use Bio::Tools::Run::Phylo::Gerp; 21 22 # Make a Gerp factory 23 $factory = Bio::Tools::Run::Phylo::Gerp->new(); 24 25 # Run Gerp with an alignment and tree file 26 my $parser = $factory->run($alignfilename, $treefilename); 27 28 # or with alignment object and tree object (which needs branch lengths) 29 $parser = $factory->run($bio_simplalign, $bio_tree_tree); 30 31 # (mixtures of the above are possible) 32 33 # look at the results 34 while (my $feat = $parser->next_result) { 35 my $start = $feat->start; 36 my $end = $feat->end; 37 my $rs_score = $feat->score; 38 my $p_value = ($feat->annotation->get_Annotations('p-value'))[0]->value; 39 } 40 41=head1 DESCRIPTION 42 43This is a wrapper for running the GERP (v2) programs 'gerpcol' and 'gerpelem' by 44Eugene Davydov (originally Gregory M. Cooper et al.). You can get details here: 45http://mendel.stanford.edu/sidowlab/. GERP can be used for phylogenetic 46footprinting/ shadowing (it finds 'constrained elements in multiple 47alignments'). 48 49You can try supplying normal gerpcol/gerpelem command-line arguments to new(), 50eg. $factory-E<gt>new(-e =E<gt> 0.05) or calling arg-named methods, eg. 51$factory-E<gt>e(0.05). The filename-related args (t, f, x) are handled internally 52by the run() method. This wrapper currently only supports running GERP on a 53single alignment at a time (ie. F isn't used at all, nor are multiple fs 54possible). 55 56 57You will need to enable this GERP wrapper to find the GERP executables. 58This can be done in (at least) three ways: 59 60 1. Make sure gerpcol and gerpelem are in your path. 61 2. Define an environmental variable GERPDIR which is a 62 directory which contains the GERP executables: 63 In bash: 64 65 export GERPDIR=/home/username/gerp/ 66 67 In csh/tcsh: 68 69 setenv GERPDIR /home/username/gerp 70 71 3. Include a definition of an environmental variable GERPDIR in 72 every script that will use this GERP wrapper module, e.g.: 73 74 BEGIN { $ENV{GERPDIR} = '/home/username/gerp/' } 75 use Bio::Tools::Run::Phylo::Gerp; 76 77=head1 FEEDBACK 78 79=head2 Mailing Lists 80 81User feedback is an integral part of the evolution of this and other 82Bioperl modules. Send your comments and suggestions preferably to 83the Bioperl mailing list. Your participation is much appreciated. 84 85 bioperl-l@bioperl.org - General discussion 86 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 87 88=head2 Support 89 90Please direct usage questions or support issues to the mailing list: 91 92I<bioperl-l@bioperl.org> 93 94rather than to the module maintainer directly. Many experienced and 95reponsive experts will be able look at the problem and quickly 96address it. Please include a thorough description of the problem 97with code and data examples if at all possible. 98 99=head2 Reporting Bugs 100 101Report bugs to the Bioperl bug tracking system to help us keep track 102of the bugs and their resolution. Bug reports can be submitted via 103the web: 104 105 http://redmine.open-bio.org/projects/bioperl/ 106 107=head1 AUTHOR - Sendu Bala 108 109Email bix@sendu.me.uk 110 111=head1 APPENDIX 112 113The rest of the documentation details each of the object methods. 114Internal methods are usually preceded with a _ 115 116=cut 117 118package Bio::Tools::Run::Phylo::Gerp; 119use strict; 120 121use Cwd; 122use File::Spec; 123use File::Basename; 124use Bio::AlignIO; 125use Bio::TreeIO; 126use Bio::Tools::Phylo::Gerp; 127 128use base qw(Bio::Tools::Run::Phylo::PhyloBase); 129 130our $PROGRAM_NAME = 'gerpcol'; 131our $PROGRAM_DIR; 132 133# methods for the gerp args we support 134our @COLPARAMS = qw(r n s); 135our @ELEMPARAMS = qw(l L t d p b a c r e); 136our @SWITCHES = qw(v); 137 138# just to be explicit, args we don't support (yet) or we handle ourselves 139our @UNSUPPORTED = qw(h t f F x); 140 141BEGIN { 142 # lets add all the gerp executables to the path 143 $PROGRAM_DIR = $ENV{'GERPDIR'}; 144 $ENV{PATH} = "$PROGRAM_DIR:$ENV{PATH}" if $PROGRAM_DIR; 145} 146 147=head2 program_name 148 149 Title : program_name 150 Usage : $factory>program_name() 151 Function: holds the program name 152 Returns : string 153 Args : None 154 155=cut 156 157sub program_name { 158 my $self = shift; 159 if (@_) { $self->{program_name} = shift } 160 return $self->{program_name} || $PROGRAM_NAME; 161} 162 163=head2 program_dir 164 165 Title : program_dir 166 Usage : $factory->program_dir(@params) 167 Function: returns the program directory, obtained from ENV variable. 168 Returns : string 169 Args : None 170 171=cut 172 173sub program_dir { 174 return $PROGRAM_DIR; 175} 176 177=head2 new 178 179 Title : new 180 Usage : $factory = Bio::Tools::Run::Phylo::Gerp->new() 181 Function: creates a new GERP factory 182 Returns : Bio::Tools::Run::Phylo::Gerp 183 Args : Most options understood by GERP can be supplied as key => 184 value pairs. 185 186 These options can NOT be used with this wrapper: 187 h, t, f, F and x 188 189=cut 190 191sub new { 192 my ($class, @args) = @_; 193 my $self = $class->SUPER::new(@args); 194 195 $self->_set_from_args(\@args, -methods => [@COLPARAMS, @ELEMPARAMS, 196 @SWITCHES, 'quiet'], 197 -create => 1); 198 199 return $self; 200} 201 202=head2 run 203 204 Title : run 205 Usage : $parser = $factory->run($align_file, $tree_file); 206 -or- 207 $parser = $factory->run($align_object, $tree_object); 208 Function: Runs GERP on an alignment. 209 Returns : Bio::Tools::Phylo::Gerp parser object, containing the results 210 Args : The first argument represents an alignment, the second argument 211 a phylogenetic tree with branch lengths. 212 The alignment can be provided as a MAF format alignment 213 filename, or a Bio::Align::AlignI compliant object (eg. a 214 Bio::SimpleAlign). 215 The species tree can be provided as a newick format tree filename 216 or a Bio::Tree::TreeI compliant object. 217 218 In all cases, the alignment sequence names must correspond to node 219 ids in the tree. Multi-word species names should have the 220 spaces replaced with underscores (eg. Homo_sapiens) 221 222=cut 223 224sub run { 225 my ($self, $aln, $tree) = @_; 226 $self->_alignment($aln || $self->throw("An alignment must be supplied")); 227 $self->_tree($tree || $self->throw("A phylo tree must be supplied")); 228 229 # check node and seq names match 230 $self->_check_names; 231 232 return $self->_run; 233} 234 235sub _run { 236 my $self = shift; 237 238 $self->executable || return; 239 240 # cd to a temp dir 241 my $temp_dir = $self->tempdir; 242 my $cwd = Cwd->cwd(); 243 chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'"); 244 245 foreach my $prog ('gerpcol', 'gerpelem') { 246 delete $self->{'_pathtoexe'}; 247 $self->program_name($prog); 248 my $exe = $self->executable || $self->throw("'$prog' executable not found"); 249 250 my $command = $exe.$self->_setparams($prog); 251 $self->debug("gerp command = $command\n"); 252 253 #eval { 254 # local $SIG{ALRM} = sub { die "alarm\n" }; 255 # alarm 60; 256 # system($command) && $self->throw("gerp call ($command) failed: $! | $?"); 257 # alarm 0; 258 #}; 259 #die if $@ && $@ ne "alarm\n"; 260 #if ($@) { 261 # die "Gerp timed out\n"; 262 #} 263 # 264 # system("rm -fr $cwd/gerp_dir; cp -R $temp_dir $cwd/gerp_dir"); 265 266 open(my $pipe, "$command |") || $self->throw("gerp call ($command) failed to start: $? | $!"); 267 my $error = ''; 268 my $warning = ''; 269 while (<$pipe>) { 270 if ($self->quiet) { 271 $error .= $_; 272 $warning .= $_ if /warning/i; 273 } 274 else { 275 print; 276 } 277 } 278 close($pipe) || ($error ? $self->throw("gerp call ($command) failed: $error") : $self->throw("gerp call ($command) crashed: $?")); 279 280 # (throws most likely due to seg fault in gerpelem when ~25000 entries 281 # in rates file, not much I can do about it!) 282 283 $self->warn("GERP: ".$warning) if $warning; 284 } 285 286 #system("rm -fr $cwd/gerp_dir; cp -R $temp_dir $cwd/gerp_dir"); 287 288 my $result_file = $self->{align_base}.'.rates.elems'; 289 my $parser = Bio::Tools::Phylo::Gerp->new(-file => $result_file); 290 291 # cd back again 292 chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'"); 293 294 return $parser; 295} 296 297=head2 _setparams 298 299 Title : _setparams 300 Usage : Internal function, not to be called directly 301 Function: Creates a string of params to be used in the command string 302 Returns : string of params 303 Args : none 304 305=cut 306 307sub _setparams { 308 my ($self, $prog) = @_; 309 310 my $param_string; 311 if ($prog eq 'gerpcol') { 312 my $align_file = $self->_write_alignment; 313 $param_string .= ' -f '.$align_file; 314 $self->{align_base} = basename($align_file); 315 $param_string .= ' -t '.$self->_write_tree; 316 $param_string .= $self->SUPER::_setparams(-params => \@COLPARAMS, 317 -switches => \@SWITCHES, 318 -dash => 1); 319 } 320 else { 321 $param_string .= ' -f '.$self->{align_base}.'.rates'; 322 $param_string .= $self->SUPER::_setparams(-params => \@ELEMPARAMS, 323 -switches => \@SWITCHES, 324 -dash => 1); 325 } 326 327 $param_string .= " 2>&1"; 328 329 return $param_string; 330} 331 3321; 333