1# BioPerl module for Bio::Tools::Run::Phylo::Phylip::Consense
2#
3# Created by
4#
5# Shawn Hoon
6#
7# You may distribute this module under the same terms as perl itself
8
9# POD documentation - main docs before the code
10
11=head1 NAME
12
13Bio::Tools::Run::Phylo::Phylip::Consense - Wrapper for the phylip
14program Consense
15
16=head1 SYNOPSIS
17
18
19  use Bio::Tools::Run::Phylo::Phylip::Consense;
20  use Bio::Tools::Run::Phylo::Phylip::SeqBoot;
21  use Bio::Tools::Run::Phylo::Phylip::ProtDist;
22  use Bio::Tools::Run::Phylo::Phylip::Neighbor;
23  use Bio::Tools::Run::Phylo::Phylip::DrawTree;
24
25
26  #first get an alignment
27  my $aio= Bio::AlignIO->new(-file=>$ARGV[0],-format=>"clustalw");
28  my $aln = $aio->next_aln;
29
30  # To prevent truncation of sequence names by PHYLIP runs, use set_displayname_safe
31  my ($aln_safe, $ref_name)=$aln->set_displayname_safe();
32
33  #next use seqboot to generate multiple aligments
34  my @params = ('datatype'=>'SEQUENCE','replicates'=>10);
35  my $seqboot_factory = Bio::Tools::Run::Phylo::Phylip::SeqBoot->new(@params);
36
37  my $aln_ref= $seqboot_factory->run($aln);
38
39  Or, for long sequence names:
40
41  my $aln_ref= $seqboot_factory->run($aln_safe);
42
43  #next build distance matrices and construct trees
44  my $pd_factory = Bio::Tools::Run::Phylo::Phylip::ProtDist->new();
45  my $ne_factory = Bio::Tools::Run::Phylo::Phylip::Neighbor->new();
46
47  foreach my $a (@{$aln_ref}){
48    my $mat = $pd_factory->create_distance_matrix($a);
49    push @tree, $ne_factory->create_tree($mat);
50  }
51
52  #now use consense to get a final tree
53  my $con_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new();
54
55  #you may set outgroup either by the number representing the order in
56  #which species are entered or by the name of the species
57
58  $con_factory->outgroup(1);
59  $con_factory->outgroup('HUMAN');
60
61  my $tree = $con_factory->run(\@tree);
62
63  # Restore original sequence names, after ALL phylip runs:
64  my @nodes = $tree->get_nodes();
65  foreach my $nd (@nodes){
66     $nd->id($ref_name->{$nd->id_output}) if $nd->is_Leaf;
67  }
68
69  #now draw the tree
70  my $draw_factory = Bio::Tools::Run::Phylo::Phylip::DrawTree->new();
71  my $image_filename = $draw_factory->draw_tree($tree);
72
73=head1 DESCRIPTION
74
75Wrapper for phylip consense program
76
77Taken from phylip documentation...
78
79CONSENSE reads a file of computer-readable trees and prints out
80(and may also write out onto a file) a consensus tree. At the moment
81it carries out a family of consensus tree methods called the M[l] methods
82(Margush and McMorris, 1981). These include strict consensus
83and majority rule consensus. Basically the consensus tree consists of monophyletic
84groups that occur as often as possible in the data.
85
86More documentation on using Consense and setting parameters may be found
87in the phylip package.
88
89VERSION Support
90
91This wrapper currently supports v3.5 of phylip. There is also support for v3.6 although
92this is still experimental as v3.6 is still under alpha release and not all functionalities maybe supported.
93
94=head1 PARAMETERS FOR Consense
95
96=head2 TYPE
97
98Title		: TYPE
99Description	: (optional)
100             Only available in phylip v3.6
101
102                  This program supports 3 types of consensus generation
103
104                  MRe   : Majority Rule (extended) Any set of species that
105                          appears in more than 50% of the trees is included.
106                          The program then considers the other sets of species
107                          in order of the frequency with which they have appeared,
108                          adding to the consensus tree any which are compatible
109                          with it until
110
111                  STRICT: A set of species must appear in all input trees to be
112                          included in the strict consensus tree.
113
114                  MR    :  A set of species is included in the consensus tree
115                          if it is present in more than half of the input trees.
116
117                  Ml    : The user is asked for a fraction between 0.5 and 1, and
118                          the program then includes in the consensus tree any set
119                          of species that occurs among the input trees more than
120                          that fraction of then time. The Strict consensus and the
121                          Majority Rule consensus are extreme cases of the M[l] consensus,
122                          being for fractions of 1 and 0.5 respectively
123
124                  usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-type=>"Ml 0.7");
125
126
127             Defaults to MRe
128
129=head2 ROOTED
130
131  Title: ROOTED
132  Description: (optional)
133
134             toggles between the default assumption that the input trees are unrooted trees and
135             the selection that specifies that the tree is to be treated as a rooted tree and not
136             re-rooted. Otherwise the tree will be treated as outgroup-rooted and will be
137             re-rooted automatically at the first species encountered on the first tree
138             (or at a species designated by the Outgroup option)
139
140             usage: my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-rooted=>1);
141
142             Defaults to unrooted
143
144=head2 OUTGROUP
145
146  Title		: OUTGROUP
147  Description	: (optional)
148
149                It is in effect only if the Rooted option selection is not in effect.
150                The trees will be re-rooted with a species of your choosing.
151
152                usage  my $factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(-outgroup=>2);
153
154                Defaults to first species encountered.
155
156=head1 FEEDBACK
157
158=head2 Mailing Lists
159
160User feedback is an integral part of the evolution of this and other
161Bioperl modules. Send your comments and suggestions preferably to one
162of the Bioperl mailing lists.  Your participation is much appreciated.
163
164  bioperl-l@bioperl.org                  - General discussion
165  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
166
167=head2 Support
168
169Please direct usage questions or support issues to the mailing list:
170
171I<bioperl-l@bioperl.org>
172
173rather than to the module maintainer directly. Many experienced and
174reponsive experts will be able look at the problem and quickly
175address it. Please include a thorough description of the problem
176with code and data examples if at all possible.
177
178=head2 Reporting Bugs
179
180Report bugs to the Bioperl bug tracking system to help us keep track
181the bugs and their resolution.  Bug reports can be submitted via the
182web:
183
184  http://redmine.open-bio.org/projects/bioperl/
185
186=head1 AUTHOR - Shawn Hoon
187
188Email shawnh@fugu-sg.org
189
190=head1 APPENDIX
191
192The rest of the documentation details each of the object
193methods. Internal methods are usually preceded with a _
194
195=cut
196
197#'
198
199
200package Bio::Tools::Run::Phylo::Phylip::Consense;
201
202use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
203	    @CONSENSE_PARAMS @OTHER_SWITCHES
204	    %OK_FIELD);
205use strict;
206use Bio::SimpleAlign;
207use Bio::TreeIO;
208use Bio::Tools::Run::Phylo::Phylip::Base;
209use Bio::Tools::Run::Phylo::Phylip::PhylipConf qw(%Menu);
210use IO::String;
211use Cwd;
212
213
214# inherit from Phylip::Base which has some methods for dealing with
215# Phylip specifics
216@ISA = qw(Bio::Tools::Run::Phylo::Phylip::Base);
217
218# You will need to enable the Consense program. This
219# can be done in (at least) 3 ways:
220#
221# 1. define an environmental variable PHYLIPDIR:
222# export PHYLIPDIR=/home/shawnh/PHYLIP/bin
223#
224# 2. include a definition of an environmental variable PHYLIPDIR in
225# every script that will use Consense.pm.
226# $ENV{PHYLIPDIR} = '/home/shawnh/PHYLIP/bin';
227#
228# 3. You can set the path to the program through doing:
229# my @params('executable'=>'/usr/local/bin/consense');
230# my $Consense_factory = Bio::Tools::Run::Phylo::Phylip::Consense->new(@params);
231#
232
233
234BEGIN {
235	@CONSENSE_PARAMS = qw(TYPE OUTGROUP ROOTED);
236	@OTHER_SWITCHES = qw(QUIET);
237	foreach my $attr(@CONSENSE_PARAMS,@OTHER_SWITCHES) {
238	    $OK_FIELD{$attr}++;
239	}
240}
241
242=head2 program_name
243
244 Title   : program_name
245 Usage   : $obj->program_name()
246 Function: holds the program name
247 Returns:  string
248 Args    : None
249
250=cut
251
252sub program_name {
253        return 'consense';
254}
255
256=head2 program_dir
257
258 Title   : program_dir
259 Usage   : ->program_dir()
260 Function: returns the program directory, obtained from ENV variable.
261 Returns:  string
262 Args    :
263
264=cut
265
266sub program_dir {
267        return Bio::Root::IO->catfile($ENV{PHYLIPDIR}) if $ENV{PHYLIPDIR};
268}
269
270sub new {
271    my ($class,@args) = @_;
272    my $self = $class->SUPER::new(@args);
273
274    my ($attr, $value);
275    while (@args)  {
276	$attr =   shift @args;
277	$value =  shift @args;
278
279	next if( $attr =~ /^-/ ); # don't want named parameters
280	if ($attr =~/PROGRAM/i) {
281	    $self->executable($value);
282	    next;
283	}
284	if ($attr =~ /IDLENGTH/i){
285	    $self->idlength($value);
286	    next;
287	}
288	$self->$attr($value);
289    }
290    return $self;
291}
292
293sub AUTOLOAD {
294    my $self = shift;
295    my $attr = $AUTOLOAD;
296    $attr =~ s/.*:://;
297    $attr = uc $attr;
298    $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
299    $self->{$attr} = shift if @_;
300    return $self->{$attr};
301}
302
303=head2 idlength
304
305 Title   : idlength
306 Usage   : $obj->idlength ($newval)
307 Function:
308 Returns : value of idlength
309 Args    : newvalue (optional)
310
311
312=cut
313
314sub idlength{
315   my $self = shift;
316   if( @_ ) {
317      my $value = shift;
318      $self->{'idlength'} = $value;
319    }
320    return $self->{'idlength'};
321
322}
323
324
325=head2  run
326
327 Title   : run
328 Usage   :
329	$inputfilename = 't/data/prot.treefile';
330	$tree= $Consense_factory->run($inputfilename);
331or
332
333	$tree= $consense_factory->run(\@tree);
334
335 Function: Create bootstrap sets of alignments
336 Example :
337 Returns : a L<Bio::Tree::Tree>
338 Args    : either a file containing trees in newick format
339           or an array ref of L<Bio::Tree::Tree>
340
341 Throws an exception if argument is not either a string (eg a
342 filename) or a Bio::Tree::TreeI object. If
343 argument is string, throws exception if file corresponding to string
344 name can not be found.
345
346=cut
347
348sub run{
349
350    my ($self,$input) = @_;
351    my ($infilename);
352
353    # Create input file pointer
354    $infilename = $self->_setinput($input);
355    if (!$infilename) {
356	$self->throw("Problems setting up for Consense. Probably bad input data in $input !");
357    }
358
359# Create parameter string to pass to Consense program
360    my $param_string = $self->_setparams();
361# run Consense
362    my $aln = $self->_run($infilename,$param_string);
363}
364
365#################################################
366
367=head2  _run
368
369 Title   :  _run
370 Usage   :  Internal function, not to be called directly
371 Function:  makes actual system call to Consense program
372 Example :
373 Returns : an array ref of <Bio::Tree::Tree>
374 Args    : Name of a file containing a set of tree in newick format
375           and a parameter string to be passed to Consense
376
377
378=cut
379
380sub _run {
381    my ($self,$infile,$param_string) = @_;
382    my $instring;
383    my $curpath = cwd;
384    unless( File::Spec->file_name_is_absolute($infile) ) {
385    	$infile = $self->io->catfile($curpath,$infile);
386    }
387    my $tmpdir = $self->tempdir;
388    chdir($self->tempdir);
389    # open a pipe to run Consense to bypass interactive menus
390    if ($self->quiet() || $self->verbose() < 0) {
391	my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
392   	open(Consense,"| ".$self->executable .">$null");
393    }
394    else {
395    	open(Consense,"| ".$self->executable);
396    }
397    $instring = $infile."\n".$param_string;
398    $self->debug( "Program ".$self->executable." $instring\n");
399    print Consense $instring;
400    close(Consense);
401
402    # get the results
403    my $outfile = $self->io->catfile($self->tempdir,$self->treefile);
404    chdir($curpath);
405
406    $self->throw("Consense did not create files correctly ($outfile)")
407  	unless (-e $outfile);
408
409    #parse the alignments
410
411    my @aln;
412    my $tio = Bio::TreeIO->new(-file=>$outfile,-format=>"newick");
413    my $tree = $tio->next_tree;
414
415    # Clean up the temporary files created along the way...
416    unlink $outfile unless $self->save_tempfiles;
417
418    return $tree;
419}
420
421sub _set_names_from_tree {
422  my ($self,$tree) = @_;
423  my $newick;
424  my $ios = IO::String->new($newick);
425  my $tio = Bio::TreeIO->new(-fh=>$ios,-format=>'newick');
426  $tio->write_tree($tree);
427  my @names = $newick=~/(\w+):\d+/g;
428  my %names;
429  for(my $i=0; $i < $#names; $i++){
430      $names{$names[$i]} = $i+1;
431  }
432  $self->names(\%names);
433
434  return;
435}
436
437
438=head2  _setinput()
439
440 Title   :  _setinput
441 Usage   :  Internal function, not to be called directly
442 Function:   Create input file for Consense program
443 Example :
444 Returns : name of file containing a trees in newick format
445 Args    : an array ref of Bio::Tree::Tree object or input file name
446
447
448=cut
449
450sub _setinput {
451    my ($self, $input) = @_;
452    my ($alnfilename,$tfh);
453
454    #  a phy formatted alignment file
455    unless (ref $input) {
456        # check that file exists or throw
457        $alnfilename= $input;
458	unless (-e $input) {return 0;}
459	my $tio = Bio::TreeIO->new(-file=>$alnfilename,-format=>'newick');
460	my $tree = $tio->next_tree;
461	$self->_set_names_from_tree($tree);
462	return $alnfilename;
463    }
464
465    #  $input may be a SimpleAlign Object
466    my @input = ref($input) eq "ARRAY" ? @{$input} : ($input);
467    ($tfh,$alnfilename) = $self->io->tempfile(-dir=>$self->tempdir);
468    my $treeIO = Bio::TreeIO->new(-fh => $tfh,
469				  -format=>'newick');
470
471    foreach my $tree(@input){
472	$tree->isa('Bio::Tree::TreeI') || $self->throw('Expected a Bio::TreeI object');
473	$treeIO->write_tree($tree);
474    }
475    #get the species names in order, using the first one
476    $self->_set_names_from_tree($input[0]);
477    $treeIO->close();
478    close($tfh);
479    undef $tfh;
480    return $alnfilename;
481}
482
483=head2  names()
484
485 Title   :  names
486 Usage   :  $tree->names(\%names)
487 Function:  get/set for a hash ref for storing names in matrix
488            with rank as values.
489 Example :
490 Returns : hash reference
491 Args    : hash reference
492
493=cut
494
495sub names {
496    my ($self,$name) = @_;
497    if($name){
498        $self->{'_names'} = $name;
499    }
500    return $self->{'_names'};
501}
502
503=head2  _setparams()
504
505 Title   :  _setparams
506 Usage   :  Internal function, not to be called directly
507 Function:   Create parameter inputs for Consense program
508 Example :
509 Returns : parameter string to be passed to Consense
510 Args    : name of calling object
511
512=cut
513
514sub _setparams {
515    my ($attr, $value, $self);
516
517    #do nothing for now
518    $self = shift;
519    my $param_string = "";
520    my $rooted = 0;
521
522    #for case where type is Ml
523    my $Ml = 0;
524    my $frac = 0.5;
525    my %menu = %{$Menu{$self->version}->{'CONSENSE'}};
526
527    foreach  my $attr ( @CONSENSE_PARAMS) {
528    	$value = $self->$attr();
529    	next unless (defined $value);
530    	if ($attr =~/ROOTED/i){
531        $rooted = 1;
532        $param_string .= $menu{'ROOTED'};
533      }
534      elsif($attr =~/OUTGROUP/i){
535          if($rooted == 1){
536              $self->warn("Outgroup option cannot be used with a rooted tree");
537              next;
538          }
539          if($value !~/^\d+$/){ # is a name
540              my %names = %{$self->names};
541              $names{$value} || $self->throw("Outgroup $value not found");
542              $value = $names{$value};
543          }
544          $param_string .=$menu{'OUTGROUP'}."$value\n";
545      }
546      elsif($attr=~/TYPE/i){
547          if($value=~/Ml/i){
548              ($value,$frac) = split(/\s+/,$value);
549              #default if not given
550              $frac ||= 0.5;
551              if($frac <= 0.5 || $frac > 1){
552                  $self->warn("fraction given is out of range 0.5<frac<1, setting to 0.5");
553                  $frac = 0.5;
554              }
555              $Ml=1;
556          }
557          $param_string.=$menu{'TYPE'}{uc $value};
558      }
559      else {
560          $param_string.=$menu{uc $attr};
561      }
562	  }
563    $param_string .= $menu{'SUBMIT'};
564    if($Ml){
565        $param_string.=$frac."\n";
566    }
567
568    return $param_string;
569}
570
571
572
573=head1 Bio::Tools::Run::Wrapper methods
574
575=cut
576
577=head2 no_param_checks
578
579 Title   : no_param_checks
580 Usage   : $obj->no_param_checks($newval)
581 Function: Boolean flag as to whether or not we should
582           trust the sanity checks for parameter values
583 Returns : value of no_param_checks
584 Args    : newvalue (optional)
585
586
587=cut
588
589=head2 save_tempfiles
590
591 Title   : save_tempfiles
592 Usage   : $obj->save_tempfiles($newval)
593 Function:
594 Returns : value of save_tempfiles
595 Args    : newvalue (optional)
596
597
598=cut
599
600=head2 outfile_name
601
602 Title   : outfile_name
603 Usage   : my $outfile = $Consense->outfile_name();
604 Function: Get/Set the name of the output file for this run
605           (if you wanted to do something special)
606 Returns : string
607 Args    : [optional] string to set value to
608
609
610=cut
611
612
613=head2 tempdir
614
615 Title   : tempdir
616 Usage   : my $tmpdir = $self->tempdir();
617 Function: Retrieve a temporary directory name (which is created)
618 Returns : string which is the name of the temporary directory
619 Args    : none
620
621
622=cut
623
624=head2 cleanup
625
626 Title   : cleanup
627 Usage   : $codeml->cleanup();
628 Function: Will cleanup the tempdir directory after a Consense run
629 Returns : none
630 Args    : none
631
632
633=cut
634
635=head2 io
636
637 Title   : io
638 Usage   : $obj->io($newval)
639 Function:  Gets a L<Bio::Root::IO> object
640 Returns : L<Bio::Root::IO>
641 Args    : none
642
643
644=cut
645
6461; # Needed to keep compiler happy
647