1# BioPerl module for Bio::Tools::Run::Phylo::Phylip::Consense 2# 3# Created by 4# 5# Shawn Hoon 6# 7# You may distribute this module under the same terms as perl itself 8 9# POD documentation - main docs before the code 10 11=head1 NAME 12 13Bio::Tools::Run::Phylo::Phylip::Consense - Wrapper for the phylip 14program Consense 15 16=head1 SYNOPSIS 17 18 19 use Bio::Tools::Run::Phylo::Phylip::Consense; 20 use Bio::Tools::Run::Phylo::Phylip::SeqBoot; 21 use Bio::Tools::Run::Phylo::Phylip::ProtDist; 22 use Bio::Tools::Run::Phylo::Phylip::Neighbor; 23 use Bio::Tools::Run::Phylo::Phylip::DrawTree; 24 25 26 #first get an alignment 27 my $aio= Bio::AlignIO->new(-file=>$ARGV[0],-format=>"clustalw"); 28 my $aln = $aio->next_aln; 29 30 # To prevent truncation of sequence names by PHYLIP runs, use set_displayname_safe 31 my ($aln_safe, $ref_name)=$aln->set_displayname_safe(); 32 33 #next use seqboot to generate multiple aligments 34 my @params = ('datatype'=>'SEQUENCE','replicates'=>10); 35 my $seqboot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params); 36 37 my $aln_ref= $seqboot_factory->run($aln); 38 39 Or, for long sequence names: 40 41 my $aln_ref= $seqboot_factory->run($aln_safe); 42 43 #next build distance matrices and construct trees 44 my $pd_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new(); 45 my $ne_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new(); 46 47 foreach my $a (@{$aln_ref}){ 48 my $mat = $pd_factory->create_distance_matrix($a); 49 push @tree, $ne_factory->create_tree($mat); 50 } 51 52 #now use consense to get a final tree 53 my $con_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(); 54 55 #you may set outgroup either by the number representing the order in 56 #which species are entered or by the name of the species 57 58 $con_factory->outgroup(1); 59 $con_factory->outgroup('HUMAN'); 60 61 my $tree = $con_factory->run(\@tree); 62 63 # Restore original sequence names, after ALL phylip runs: 64 my @nodes = $tree->get_nodes(); 65 foreach my $nd (@nodes){ 66 $nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf; 67 } 68 69 #now draw the tree 70 my $draw_factory = Bio::Tools::Run::Phylo::Phylip::DrawTree->new(); 71 my $image_filename = $draw_factory->draw_tree($tree); 72 73=head1 DESCRIPTION 74 75Wrapper for phylip consense program 76 77Taken from phylip documentation... 78 79CONSENSE reads a file of computer-readable trees and prints out 80(and may also write out onto a file) a consensus tree. At the moment 81it carries out a family of consensus tree methods called the M[l] methods 82(Margush and McMorris, 1981). These include strict consensus 83and majority rule consensus. Basically the consensus tree consists of monophyletic 84groups that occur as often as possible in the data. 85 86More documentation on using Consense and setting parameters may be found 87in the phylip package. 88 89VERSION Support 90 91This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although 92this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported. 93 94=head1 PARAMETERS FOR Consense 95 96=head2 TYPE 97 98Title : TYPE 99Description : (optional) 100 Only available in phylip v3.6 101 102 This program supports 3 types of consensus generation 103 104 MRe : Majority Rule (extended) Any set of species that 105 appears in more than 50% of the trees is included. 106 The program then considers the other sets of species 107 in order of the frequency with which they have appeared, 108 adding to the consensus tree any which are compatible 109 with it until 110 111 STRICT: A set of species must appear in all input trees to be 112 included in the strict consensus tree. 113 114 MR : A set of species is included in the consensus tree 115 if it is present in more than half of the input trees. 116 117 Ml : The user is asked for a fraction between 0.5 and 1, and 118 the program then includes in the consensus tree any set 119 of species that occurs among the input trees more than 120 that fraction of then time. The Strict consensus and the 121 Majority Rule consensus are extreme cases of the M[l] consensus, 122 being for fractions of 1 and 0.5 respectively 123 124 usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-type=>"Ml 0.7"); 125 126 127 Defaults to MRe 128 129=head2 ROOTED 130 131 Title: ROOTED 132 Description: (optional) 133 134 toggles between the default assumption that the input trees are unrooted trees and 135 the selection that specifies that the tree is to be treated as a rooted tree and not 136 re-rooted. Otherwise the tree will be treated as outgroup-rooted and will be 137 re-rooted automatically at the first species encountered on the first tree 138 (or at a species designated by the Outgroup option) 139 140 usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-rooted=>1); 141 142 Defaults to unrooted 143 144=head2 OUTGROUP 145 146 Title : OUTGROUP 147 Description : (optional) 148 149 It is in effect only if the Rooted option selection is not in effect. 150 The trees will be re-rooted with a species of your choosing. 151 152 usage my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-outgroup=>2); 153 154 Defaults to first species encountered. 155 156=head1 FEEDBACK 157 158=head2 Mailing Lists 159 160User feedback is an integral part of the evolution of this and other 161Bioperl modules. Send your comments and suggestions preferably to one 162of the Bioperl mailing lists. Your participation is much appreciated. 163 164 bioperl-l@bioperl.org - General discussion 165 http://bioperl.org/wiki/Mailing_lists - About the mailing lists 166 167=head2 Support 168 169Please direct usage questions or support issues to the mailing list: 170 171I<bioperl-l@bioperl.org> 172 173rather than to the module maintainer directly. Many experienced and 174reponsive experts will be able look at the problem and quickly 175address it. Please include a thorough description of the problem 176with code and data examples if at all possible. 177 178=head2 Reporting Bugs 179 180Report bugs to the Bioperl bug tracking system to help us keep track 181the bugs and their resolution. Bug reports can be submitted via the 182web: 183 184 http://redmine.open-bio.org/projects/bioperl/ 185 186=head1 AUTHOR - Shawn Hoon 187 188Email shawnh@fugu-sg.org 189 190=head1 APPENDIX 191 192The rest of the documentation details each of the object 193methods. Internal methods are usually preceded with a _ 194 195=cut 196 197#' 198 199 200package Bio::Tools::Run::Phylo::Phylip::Consense; 201 202use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME 203 @CONSENSE_PARAMS @OTHER_SWITCHES 204 %OK_FIELD); 205use strict; 206use Bio::SimpleAlign; 207use Bio::TreeIO; 208use Bio::Tools::Run::Phylo::Phylip::Base; 209use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu); 210use IO::String; 211use Cwd; 212 213 214# inherit from Phylip::Base which has some methods for dealing with 215# Phylip specifics 216@ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base); 217 218# You will need to enable the Consense program. This 219# can be done in (at least) 3 ways: 220# 221# 1. define an environmental variable PHYLIPDIR: 222# export PHYLIPDIR=/home/shawnh/PHYLIP/bin 223# 224# 2. include a definition of an environmental variable PHYLIPDIR in 225# every script that will use Consense.pm. 226# $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin'; 227# 228# 3. You can set the path to the program through doing: 229# my @params('executable'=>'/usr/local/bin/consense'); 230# my $Consense_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(@params); 231# 232 233 234BEGIN { 235 @CONSENSE_PARAMS = qw(TYPE OUTGROUP ROOTED); 236 @OTHER_SWITCHES = qw(QUIET); 237 foreach my $attr(@CONSENSE_PARAMS,@OTHER_SWITCHES) { 238 $OK_FIELD{$attr}++; 239 } 240} 241 242=head2 program_name 243 244 Title : program_name 245 Usage : $obj->program_name() 246 Function: holds the program name 247 Returns: string 248 Args : None 249 250=cut 251 252sub program_name { 253 return 'consense'; 254} 255 256=head2 program_dir 257 258 Title : program_dir 259 Usage : ->program_dir() 260 Function: returns the program directory, obtained from ENV variable. 261 Returns: string 262 Args : 263 264=cut 265 266sub program_dir { 267 return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR}; 268} 269 270sub new { 271 my ($class,@args) = @_; 272 my $self = $class->SUPER::new(@args); 273 274 my ($attr, $value); 275 while (@args) { 276 $attr = shift @args; 277 $value = shift @args; 278 279 next if( $attr =~ /^-/ ); # don't want named parameters 280 if ($attr =~/PROGRAM/i) { 281 $self->executable($value); 282 next; 283 } 284 if ($attr =~ /IDLENGTH/i){ 285 $self->idlength($value); 286 next; 287 } 288 $self->$attr($value); 289 } 290 return $self; 291} 292 293sub AUTOLOAD { 294 my $self = shift; 295 my $attr = $AUTOLOAD; 296 $attr =~ s/.*:://; 297 $attr = uc $attr; 298 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr}; 299 $self->{$attr} = shift if @_; 300 return $self->{$attr}; 301} 302 303=head2 idlength 304 305 Title : idlength 306 Usage : $obj->idlength ($newval) 307 Function: 308 Returns : value of idlength 309 Args : newvalue (optional) 310 311 312=cut 313 314sub idlength{ 315 my $self = shift; 316 if( @_ ) { 317 my $value = shift; 318 $self->{'idlength'} = $value; 319 } 320 return $self->{'idlength'}; 321 322} 323 324 325=head2 run 326 327 Title : run 328 Usage : 329 $inputfilename = 't/data/prot.treefile'; 330 $tree= $Consense_factory->run($inputfilename); 331or 332 333 $tree= $consense_factory->run(\@tree); 334 335 Function: Create bootstrap sets of alignments 336 Example : 337 Returns : a L<Bio::Tree::Tree> 338 Args : either a file containing trees in newick format 339 or an array ref of L<Bio::Tree::Tree> 340 341 Throws an exception if argument is not either a string (eg a 342 filename) or a Bio::Tree::TreeI object. If 343 argument is string, throws exception if file corresponding to string 344 name can not be found. 345 346=cut 347 348sub run{ 349 350 my ($self,$input) = @_; 351 my ($infilename); 352 353 # Create input file pointer 354 $infilename = $self->_setinput($input); 355 if (!$infilename) { 356 $self->throw("Problems setting up for Consense. Probably bad input data in $input !"); 357 } 358 359# Create parameter string to pass to Consense program 360 my $param_string = $self->_setparams(); 361# run Consense 362 my $aln = $self->_run($infilename,$param_string); 363} 364 365################################################# 366 367=head2 _run 368 369 Title : _run 370 Usage : Internal function, not to be called directly 371 Function: makes actual system call to Consense program 372 Example : 373 Returns : an array ref of <Bio::Tree::Tree> 374 Args : Name of a file containing a set of tree in newick format 375 and a parameter string to be passed to Consense 376 377 378=cut 379 380sub _run { 381 my ($self,$infile,$param_string) = @_; 382 my $instring; 383 my $curpath = cwd; 384 unless( File::Spec->file_name_is_absolute($infile) ) { 385 $infile = $self->io->catfile($curpath,$infile); 386 } 387 my $tmpdir = $self->tempdir; 388 chdir($self->tempdir); 389 # open a pipe to run Consense to bypass interactive menus 390 if ($self->quiet() || $self->verbose() < 0) { 391 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; 392 open(Consense,"| ".$self->executable .">$null"); 393 } 394 else { 395 open(Consense,"| ".$self->executable); 396 } 397 $instring = $infile."\n".$param_string; 398 $self->debug( "Program ".$self->executable." $instring\n"); 399 print Consense $instring; 400 close(Consense); 401 402 # get the results 403 my $outfile = $self->io->catfile($self->tempdir,$self->treefile); 404 chdir($curpath); 405 406 $self->throw("Consense did not create files correctly ($outfile)") 407 unless (-e $outfile); 408 409 #parse the alignments 410 411 my @aln; 412 my $tio = Bio::TreeIO->new(-file=>$outfile,-format=>"newick"); 413 my $tree = $tio->next_tree; 414 415 # Clean up the temporary files created along the way... 416 unlink $outfile unless $self->save_tempfiles; 417 418 return $tree; 419} 420 421sub _set_names_from_tree { 422 my ($self,$tree) = @_; 423 my $newick; 424 my $ios = IO::String->new($newick); 425 my $tio = Bio::TreeIO->new(-fh=>$ios,-format=>'newick'); 426 $tio->write_tree($tree); 427 my @names = $newick=~/(\w+):\d+/g; 428 my %names; 429 for(my $i=0; $i < $#names; $i++){ 430 $names{$names[$i]} = $i+1; 431 } 432 $self->names(\%names); 433 434 return; 435} 436 437 438=head2 _setinput() 439 440 Title : _setinput 441 Usage : Internal function, not to be called directly 442 Function: Create input file for Consense program 443 Example : 444 Returns : name of file containing a trees in newick format 445 Args : an array ref of Bio::Tree::Tree object or input file name 446 447 448=cut 449 450sub _setinput { 451 my ($self, $input) = @_; 452 my ($alnfilename,$tfh); 453 454 # a phy formatted alignment file 455 unless (ref $input) { 456 # check that file exists or throw 457 $alnfilename= $input; 458 unless (-e $input) {return 0;} 459 my $tio = Bio::TreeIO->new(-file=>$alnfilename,-format=>'newick'); 460 my $tree = $tio->next_tree; 461 $self->_set_names_from_tree($tree); 462 return $alnfilename; 463 } 464 465 # $input may be a SimpleAlign Object 466 my @input = ref($input) eq "ARRAY" ? @{$input} : ($input); 467 ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir); 468 my $treeIO = Bio::TreeIO->new(-fh => $tfh, 469 -format=>'newick'); 470 471 foreach my $tree(@input){ 472 $tree->isa('Bio::Tree::TreeI') || $self->throw('Expected a Bio::TreeI object'); 473 $treeIO->write_tree($tree); 474 } 475 #get the species names in order, using the first one 476 $self->_set_names_from_tree($input[0]); 477 $treeIO->close(); 478 close($tfh); 479 undef $tfh; 480 return $alnfilename; 481} 482 483=head2 names() 484 485 Title : names 486 Usage : $tree->names(\%names) 487 Function: get/set for a hash ref for storing names in matrix 488 with rank as values. 489 Example : 490 Returns : hash reference 491 Args : hash reference 492 493=cut 494 495sub names { 496 my ($self,$name) = @_; 497 if($name){ 498 $self->{'_names'} = $name; 499 } 500 return $self->{'_names'}; 501} 502 503=head2 _setparams() 504 505 Title : _setparams 506 Usage : Internal function, not to be called directly 507 Function: Create parameter inputs for Consense program 508 Example : 509 Returns : parameter string to be passed to Consense 510 Args : name of calling object 511 512=cut 513 514sub _setparams { 515 my ($attr, $value, $self); 516 517 #do nothing for now 518 $self = shift; 519 my $param_string = ""; 520 my $rooted = 0; 521 522 #for case where type is Ml 523 my $Ml = 0; 524 my $frac = 0.5; 525 my %menu = %{$Menu{$self->version}->{'CONSENSE'}}; 526 527 foreach my $attr ( @CONSENSE_PARAMS) { 528 $value = $self->$attr(); 529 next unless (defined $value); 530 if ($attr =~/ROOTED/i){ 531 $rooted = 1; 532 $param_string .= $menu{'ROOTED'}; 533 } 534 elsif($attr =~/OUTGROUP/i){ 535 if($rooted == 1){ 536 $self->warn("Outgroup option cannot be used with a rooted tree"); 537 next; 538 } 539 if($value !~/^\d+$/){ # is a name 540 my %names = %{$self->names}; 541 $names{$value} || $self->throw("Outgroup $value not found"); 542 $value = $names{$value}; 543 } 544 $param_string .=$menu{'OUTGROUP'}."$value\n"; 545 } 546 elsif($attr=~/TYPE/i){ 547 if($value=~/Ml/i){ 548 ($value,$frac) = split(/\s+/,$value); 549 #default if not given 550 $frac ||= 0.5; 551 if($frac <= 0.5 || $frac > 1){ 552 $self->warn("fraction given is out of range 0.5<frac<1, setting to 0.5"); 553 $frac = 0.5; 554 } 555 $Ml=1; 556 } 557 $param_string.=$menu{'TYPE'}{uc $value}; 558 } 559 else { 560 $param_string.=$menu{uc $attr}; 561 } 562 } 563 $param_string .= $menu{'SUBMIT'}; 564 if($Ml){ 565 $param_string.=$frac."\n"; 566 } 567 568 return $param_string; 569} 570 571 572 573=head1 Bio::Tools::Run::Wrapper methods 574 575=cut 576 577=head2 no_param_checks 578 579 Title : no_param_checks 580 Usage : $obj->no_param_checks($newval) 581 Function: Boolean flag as to whether or not we should 582 trust the sanity checks for parameter values 583 Returns : value of no_param_checks 584 Args : newvalue (optional) 585 586 587=cut 588 589=head2 save_tempfiles 590 591 Title : save_tempfiles 592 Usage : $obj->save_tempfiles($newval) 593 Function: 594 Returns : value of save_tempfiles 595 Args : newvalue (optional) 596 597 598=cut 599 600=head2 outfile_name 601 602 Title : outfile_name 603 Usage : my $outfile = $Consense->outfile_name(); 604 Function: Get/Set the name of the output file for this run 605 (if you wanted to do something special) 606 Returns : string 607 Args : [optional] string to set value to 608 609 610=cut 611 612 613=head2 tempdir 614 615 Title : tempdir 616 Usage : my $tmpdir = $self->tempdir(); 617 Function: Retrieve a temporary directory name (which is created) 618 Returns : string which is the name of the temporary directory 619 Args : none 620 621 622=cut 623 624=head2 cleanup 625 626 Title : cleanup 627 Usage : $codeml->cleanup(); 628 Function: Will cleanup the tempdir directory after a Consense run 629 Returns : none 630 Args : none 631 632 633=cut 634 635=head2 io 636 637 Title : io 638 Usage : $obj->io($newval) 639 Function: Gets a L<Bio::Root::IO> object 640 Returns : L<Bio::Root::IO> 641 Args : none 642 643 644=cut 645 6461; # Needed to keep compiler happy 647