1# $Id$ 2# 3# BioPerl module for Bio::Tools::Run::Phylo::Phast::PhyloFit 4# 5# Please direct questions and support issues to <bioperl-l@bioperl.org> 6# 7# Cared for by Sendu Bala <bix@sendu.me.uk> 8# 9# Copyright Sendu Bala 10# 11# You may distribute this module under the same terms as perl itself 12 13# POD documentation - main docs before the code 14 15=head1 NAME 16 17Bio::Tools::Run::Phylo::Phast::PhyloFit - Wrapper for phyloFit 18 19=head1 SYNOPSIS 20 21 use Bio::Tools::Run::Phylo::Phast::PhyloFit; 22 23 # Make a PhyloFit factory 24 $factory = Bio::Tools::Run::Phylo::Phast::PhastCons->new(); 25 26 # Generate an init.mod file for use by phastCons 27 my $init_file = $factory->run($alignment, $tree); 28 29=head1 DESCRIPTION 30 31This is a wrapper for running the phyloFit application by Adam Siepel. You 32can get details here: http://compgen.bscb.cornell.edu/~acs/software.html 33 34Currently the interface is extremely simplified. Only the --tree form of usage 35is allowed (not --init-model), which means a tree must be supplied with the 36alignment (to run()). You can try supplying normal phyloFit arguments to new(), 37or calling arg-named methods (excluding initial hyphens and converting others 38to underscores, eg. $factory-E<gt>gaps_as_bases(1) to set the --gaps-as-bases arg). 39 40WARNING: the API may change in the future to allow for greater flexability and 41access to more phyloFit features. 42 43 44You will need to enable this PhyloFit wrapper to find the phast programs (at 45least phyloFit itself). 46This can be done in (at least) three ways: 47 48 1. Make sure the phyloFit executable is in your path. 49 2. Define an environmental variable PHASTDIR which is a 50 directory which contains the phyloFit application: 51 In bash: 52 53 export PHASTDIR=/home/username/phast/bin 54 55 In csh/tcsh: 56 57 setenv PHASTDIR /home/username/phast/bin 58 59 3. Include a definition of an environmental variable PHASTDIR in 60 every script that will use this PhyloFit wrapper module, e.g.: 61 62 BEGIN { $ENV{PHASTDIR} = '/home/username/phast/bin' } 63 use Bio::Tools::Run::Phylo::Phast::PhyloFit; 64 65=head1 FEEDBACK 66 67=head2 Mailing Lists 68 69User feedback is an integral part of the evolution of this and other 70Bioperl modules. Send your comments and suggestions preferably to 71the Bioperl mailing list. Your participation is much appreciated. 72 73 bioperl-l@bioperl.org - General discussion 74 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 75 76=head2 Support 77 78Please direct usage questions or support issues to the mailing list: 79 80I<bioperl-l@bioperl.org> 81 82rather than to the module maintainer directly. Many experienced and 83reponsive experts will be able look at the problem and quickly 84address it. Please include a thorough description of the problem 85with code and data examples if at all possible. 86 87=head2 Reporting Bugs 88 89Report bugs to the Bioperl bug tracking system to help us keep track 90of the bugs and their resolution. Bug reports can be submitted via 91the web: 92 93 http://redmine.open-bio.org/projects/bioperl/ 94 95=head1 AUTHOR - Sendu Bala 96 97Email bix@sendu.me.uk 98 99=head1 APPENDIX 100 101The rest of the documentation details each of the object methods. 102Internal methods are usually preceded with a _ 103 104=cut 105 106package Bio::Tools::Run::Phylo::Phast::PhyloFit; 107use strict; 108 109use Cwd; 110use File::Spec; 111use Bio::AlignIO; 112use Bio::TreeIO; 113 114use base qw(Bio::Tools::Run::Phylo::PhyloBase); 115 116our $PROGRAM_NAME = 'phyloFit'; 117our $PROGRAM_DIR = $ENV{'PHASTDIR'}; 118 119# methods and their synonyms from the phastCons args we support 120our %PARAMS = (subst_mod => 's', 121 min_informative => 'I', 122 precision => 'p', 123 log => 'l', 124 ancestor => 'A', 125 nrates => 'k', 126 alpha => 'a', 127 rate_constants => 'K', 128 features => 'g', 129 catmap => 'c', 130 do_cats => 'C', 131 reverse_groups => 'R'); 132 133our %SWITCHES = (gaps_as_bases => 'G', 134 quiet => 'q', 135 EM => 'E', 136 init_random => 'r', 137 estimate_freqs => 'F', 138 markov => 'N', 139 non_overlapping => 'V'); 140 141# just to be explicit, args we don't support (yet) or we handle ourselves 142our %UNSUPPORTED = (msa_format => 'i', 143 out_root => 'o', 144 tree => 't', 145 help => 'h', 146 lnl => 'L', 147 init_model => 'M', 148 scale_only => 'B', 149 scale_subtree => 'S', 150 no_freqs => 'f', 151 no_rates => 'n', 152 post_probs => 'P', 153 expected_subs => 'X', 154 expected_total_subs => 'Z', 155 column_probs => 'U', 156 windows => 'w', 157 windows_explicit => 'v'); 158 159=head2 program_name 160 161 Title : program_name 162 Usage : $factory>program_name() 163 Function: holds the program name 164 Returns : string 165 Args : None 166 167=cut 168 169sub program_name { 170 return $PROGRAM_NAME; 171} 172 173=head2 program_dir 174 175 Title : program_dir 176 Usage : $factory->program_dir(@params) 177 Function: returns the program directory, obtained from ENV variable. 178 Returns : string 179 Args : None 180 181=cut 182 183sub program_dir { 184 return $PROGRAM_DIR; 185} 186 187=head2 new 188 189 Title : new 190 Usage : $factory = Bio::Tools::Run::Phylo::Phast::PhyloFit->new() 191 Function: creates a new PhyloFit factory 192 Returns : Bio::Tools::Run::Phylo::Phast::PhyloFit 193 Args : Most options understood by phastCons can be supplied as key => 194 value pairs. Options that don't normally take a value 195 should be given a value of 1. You can type the keys as you would on 196 the command line (eg. '--gaps-as-bases' => 1) or with only a single 197 hyphen to start and internal hyphens converted to underscores (eg. 198 -gaps_as_bases => 1) to avoid having to quote the key. 199 200 These options can NOT be used with this wrapper currently: 201 msa_format / i 202 out_root / o 203 tree / t 204 help / h 205 lnl / L 206 init_model / M 207 scale_only / B 208 scale_subtree / S 209 no_freqs / f 210 no_rates / n 211 post_probs / P 212 expected_subs / X 213 expected_total_subs / Z 214 column_probs / U 215 windows / w 216 windows_explicit / v 217 218=cut 219 220sub new { 221 my ($class, @args) = @_; 222 my $self = $class->SUPER::new(@args); 223 224 $self->_set_from_args(\@args, -methods => {(map { $_ => $PARAMS{$_} } keys %PARAMS), 225 (map { $_ => $SWITCHES{$_} } keys %SWITCHES)}, 226 -create => 1); 227 228 return $self; 229} 230 231=head2 run 232 233 Title : run 234 Usage : $result = $factory->run($fasta_align_file, $newick_tree_file); 235 -or- 236 $result = $factory->run($align_object, $tree_object); 237 -or- 238 $result = $factory->run($align_object, $db_taxonomy_object); 239 Function: Runs phyloFit on an alignment. 240 Returns : filename of init.mod file produced 241 Args : The first argument represents an alignment, the second argument 242 a species tree. 243 The alignment can be provided as a multi-fasta format alignment 244 filename, or a Bio::Align::AlignI compliant object (eg. a 245 Bio::SimpleAlign). 246 The species tree can be provided as a newick format tree filename 247 or a Bio::Tree::TreeI compliant object. Alternatively a 248 Bio::DB::Taxonomy object can be supplied, in which case the species 249 tree will be generated by using the alignment sequence names as 250 species names and looking for those in the supplied database. 251 252 In all cases, the alignment sequence names must correspond to node 253 ids in the species tree. Multi-word species names should be joined 254 with underscores to form the sequence names, eg. Homo_sapiens 255 256=cut 257 258sub run { 259 my ($self, $aln, $tree) = @_; 260 261 ($aln && $tree) || $self->throw("alignment and tree must be supplied"); 262 $self->_alignment($aln); 263 $tree = $self->_tree($tree); 264 265 $tree->force_binary; 266 267 # adjust tree node ids to convert spaces to underscores (eg. if tree 268 # generated from taxonomy) 269 foreach my $node ($tree->get_leaf_nodes) { 270 my $id = $node->id; 271 $id =~ s/ /_/g; 272 $node->id($id); 273 } 274 275 # check node and seq names match 276 $self->_check_names; 277 278 return $self->_run; 279} 280 281sub _run { 282 my $self = shift; 283 284 my $exe = $self->executable || return; 285 286 # cd to a temp dir 287 my $temp_dir = $self->tempdir; 288 my $cwd = Cwd->cwd(); 289 chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'"); 290 291 my $aln_file = $self->_write_alignment; 292 my $tree_file = $self->_write_tree; 293 294 #...phyloFit --tree "(human,(mouse,rat))" --msa-format FASTA --out-root init alignment.fa 295 my $command = $exe.$self->_setparams($aln_file, $tree_file); 296 $self->debug("phyloFit command = $command\n"); 297 system($command) && $self->throw("phyloFit call ($command) crashed: $?"); 298 299 # cd back again 300 chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'"); 301 302 return File::Spec->catfile($temp_dir, 'init.mod'); 303} 304 305=head2 _setparams 306 307 Title : _setparams 308 Usage : Internal function, not to be called directly 309 Function: Creates a string of params to be used in the command string 310 Returns : string of params 311 Args : alignment and tree file names 312 313=cut 314 315sub _setparams { 316 my ($self, $aln_file, $tree_file) = @_; 317 318 my $param_string = ' --tree '.$tree_file; 319 $param_string .= ' --msa-format FASTA'; 320 $param_string .= ' --out-root init'; 321 322 # --min-informative defaults to 50, but must not be greater than the number 323 # of bases in the alignment 324 my $aln = $self->_alignment; 325 my $length = $aln->length; 326 my $min_informative = $self->min_informative || 50; 327 if ($length < $min_informative) { 328 $self->min_informative($length); 329 } 330 331 $param_string .= $self->SUPER::_setparams(-params => [keys %PARAMS], 332 -switches => [keys %SWITCHES], 333 -double_dash => 1, 334 -underscore_to_dash => 1); 335 $param_string .= ' '.$aln_file; 336 337 return $param_string; 338} 339 3401; 341