1#
2# BioPerl module for Bio::Tree::Tree
3#
4# Please direct questions and support issues to <bioperl-l@bioperl.org>
5#
6# Cared for by Jason Stajich <jason@bioperl.org>
7#
8# Copyright Jason Stajich
9#
10# You may distribute this module under the same terms as perl itself
11
12# POD documentation - main docs before the code
13
14
15=head1 NAME
16
17Bio::Tree::Tree - An implementation of the TreeI interface.
18
19=head1 SYNOPSIS
20
21    use Bio::TreeIO;
22
23    # like from a TreeIO
24    my $treeio = Bio::TreeIO->new(-format => 'newick', -file => 'treefile.dnd');
25    my $tree = $treeio->next_tree;
26    my @nodes = $tree->get_nodes;
27    my $root = $tree->get_root_node;
28
29=head1 DESCRIPTION
30
31This object holds handles to Nodes which make up a tree.
32
33=head1 IMPLEMENTATION NOTE
34
35This implementation of Bio::Tree::Tree contains Bio::Tree:::NodeI; mainly linked
36via the root node. As NodeI can potentially contain circular references (as
37nodes will need to refer to both parent and child nodes), Bio::Tree::Tree will
38remove those circular references when the object is garbage-collected. This has
39some side effects; primarily, one must keep the Tree in scope or have at least
40one reference to it if working with nodes. The fix is to count the references to
41the nodes and if it is greater than expected retain all of them, but it requires
42an additional prereq and thus may not be worth the effort.  This only shows up
43in minor edge cases, though (see Bug #2869).
44
45Example of issue:
46
47  # tree is not assigned to a variable, so passes from memory after
48  # root node is passed
49  my $root = Bio::TreeIO->new(-format => 'newick', -file => 'foo.txt')->next_tree
50                 ->get_root_node;
51
52  # gets nothing, as all Node links are broken when Tree is garbage-collected above
53  my @descendents = $root->get_all_Descendents;
54
55=head1 FEEDBACK
56
57=head2 Mailing Lists
58
59User feedback is an integral part of the evolution of this and other
60Bioperl modules. Send your comments and suggestions preferably to
61the Bioperl mailing list.  Your participation is much appreciated.
62
63  bioperl-l@bioperl.org                  - General discussion
64  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
65
66=head2 Support
67
68Please direct usage questions or support issues to the mailing list:
69
70I<bioperl-l@bioperl.org>
71
72rather than to the module maintainer directly. Many experienced and
73reponsive experts will be able look at the problem and quickly
74address it. Please include a thorough description of the problem
75with code and data examples if at all possible.
76
77=head2 Reporting Bugs
78
79Report bugs to the Bioperl bug tracking system to help us keep track
80of the bugs and their resolution. Bug reports can be submitted via
81the web:
82
83  https://github.com/bioperl/bioperl-live/issues
84
85=head1 AUTHOR - Jason Stajich
86
87Email jason@bioperl.org
88
89=head1 CONTRIBUTORS
90
91Aaron Mackey amackey@virginia.edu
92Sendu Bala   bix@sendu.me.uk
93Mark A. Jensen maj@fortinbras.us
94
95=head1 APPENDIX
96
97The rest of the documentation details each of the object methods.
98Internal methods are usually preceded with a _
99
100=cut
101
102
103# Let the code begin...
104
105
106package Bio::Tree::Tree;
107$Bio::Tree::Tree::VERSION = '1.7.7';
108use strict;
109
110# Object preamble - inherits from Bio::Root::Root
111
112
113use base qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
114
115=head2 new
116
117 Title   : new
118 Usage   : my $obj = Bio::Tree::Tree->new();
119 Function: Builds a new Bio::Tree::Tree object
120 Returns : Bio::Tree::Tree
121 Args    : -root     => L<Bio::Tree::NodeI> object which is the root
122              OR
123           -node     => L<Bio::Tree::NodeI> object from which the root will be
124                        determined
125
126           -nodelete => boolean, whether or not to try and cleanup all
127                        the nodes when this this tree goes out of scope.
128           -id       => optional tree ID
129           -score    => optional tree score value
130
131=cut
132
133sub new {
134    my ($class, @args) = @_;
135
136    my $self = $class->SUPER::new(@args);
137    $self->{'_rootnode'} = undef;
138    $self->{'_maxbranchlen'} = 0;
139    $self->_register_for_cleanup(\&cleanup_tree);
140    my ($root, $node, $nodel, $id, $score) =
141        $self->_rearrange([qw(ROOT NODE NODELETE ID SCORE)], @args);
142
143    if ($node && ! $root) {
144        $self->throw("Must supply a Bio::Tree::NodeI") unless ref($node) && $node->isa('Bio::Tree::NodeI');
145        my @lineage = $self->get_lineage_nodes($node);
146        $root = shift(@lineage) || $node;
147
148        # to stop us pulling in entire database of a Bio::Taxon when we later do
149        # get_nodes() or similar, specifically set ancestor() for each node
150        if ($node->isa('Bio::Taxon')) {
151            push(@lineage, $node) unless $node eq $root;
152            my $ancestor = $root;
153            foreach my $lineage_node (@lineage) {
154                $lineage_node->ancestor($ancestor);
155            } continue { $ancestor = $lineage_node; }
156        }
157    }
158    if ($root) {
159        $self->set_root_node($root);
160    }
161
162    $self->nodelete($nodel || 0);
163    $self->id($id)       if defined $id;
164    $self->score($score) if defined $score;
165    return $self;
166}
167
168
169=head2 nodelete
170
171 Title   : nodelete
172 Usage   : $obj->nodelete($newval)
173 Function: Get/Set Boolean whether or not to delete the underlying
174           nodes when it goes out of scope.  By default this is false
175           meaning trees are cleaned up.
176 Returns : boolean
177 Args    : on set, new boolean value
178
179=cut
180
181sub nodelete {
182    my $self = shift;
183    return $self->{'nodelete'} = shift if @_;
184    return $self->{'nodelete'};
185}
186
187
188=head2 get_nodes
189
190 Title   : get_nodes
191 Usage   : my @nodes = $tree->get_nodes()
192 Function: Return list of Bio::Tree::NodeI objects
193 Returns : array of Bio::Tree::NodeI objects
194 Args    : (named values) hash with one value
195           order => 'b|breadth' first order or 'd|depth' first order
196           sortby => [optional] "height", "creation", "alpha", "revalpha",
197           or coderef to be used to sort the order of children nodes. See L<Bio::Tree::Node> for details
198
199=cut
200
201sub get_nodes {
202    my ($self, @args) = @_;
203    my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)], @args);
204    $order  ||= 'depth';
205    $sortby ||= 'none';
206
207    my @children;
208    my $node = $self->get_root_node;
209    if ($node) {
210        if ($order =~ m/^b/oi) { # breadth-first
211            @children = ($node);
212            my @to_process = ($node);
213            while( @to_process ) {
214                my $n = shift @to_process;
215                my @c  = $n->each_Descendent($sortby);
216                push @children, @c;
217                push @to_process, @c;
218            }
219        } elsif ($order =~ m/^d/oi) { # depth-first
220            @children = ($node, $node->get_all_Descendents($sortby));
221        } else {
222            $self->verbose(1);
223            $self->warn("specified an order '$order' which I don't understan\n");
224        }
225    }
226
227    return @children;
228}
229
230
231=head2 get_root_node
232
233 Title   : get_root_node
234 Usage   : my $node = $tree->get_root_node();
235 Function: Get the Top Node in the tree, in this implementation
236           Trees only have one top node.
237 Returns : Bio::Tree::NodeI object
238 Args    : none
239
240=cut
241
242sub get_root_node {
243    my ($self) = @_;
244    return $self->{'_rootnode'};
245}
246
247
248=head2 set_root_node
249
250 Title   : set_root_node
251 Usage   : $tree->set_root_node($node)
252 Function: Set the Root Node for the Tree
253 Returns : Bio::Tree::NodeI
254 Args    : Bio::Tree::NodeI
255
256=cut
257
258sub set_root_node {
259    my $self = shift;
260    if ( @_ ) {
261        my $value = shift;
262        if ( defined $value && ! $value->isa('Bio::Tree::NodeI') ) {
263            $self->warn("Trying to set the root node to $value which is not a Bio::Tree::NodeI");
264            return $self->get_root_node;
265        }
266        $self->{'_rootnode'} = $value;
267    }
268    return $self->get_root_node;
269}
270
271
272=head2 total_branch_length
273
274 Title   : total_branch_length
275 Usage   : my $size = $tree->total_branch_length
276 Function: Returns the sum of the length of all branches
277 Returns : real
278 Args    : none
279
280=cut
281
282sub total_branch_length { shift->subtree_length }
283
284
285=head2 subtree_length
286
287 Title   : subtree_length
288 Usage   : my $subtree_size = $tree->subtree_length($internal_node)
289 Function: Returns the sum of the length of all branches in a subtree
290           under the node. Calculates the size of the whole tree
291           without an argument (but only if root node is defined)
292 Returns : real or undef
293 Args    : Bio::Tree::NodeI object, defaults to the root node
294
295=cut
296
297sub subtree_length {
298    my $tree = shift;
299    my $node = shift || $tree->get_root_node;
300    return unless $node;
301    my $sum = 0;
302    for ( $node->get_all_Descendents ) {
303        $sum += $_->branch_length || 0;
304    }
305    return $sum;
306}
307
308
309=head2 id
310
311 Title   : id
312 Usage   : my $id = $tree->id();
313 Function: An id value for the tree
314 Returns : scalar
315 Args    : [optional] new value to set
316
317=cut
318
319sub id {
320   my ($self, $val) = @_;
321   if ( defined $val ) {
322       $self->{'_treeid'} = $val;
323   }
324   return $self->{'_treeid'};
325}
326
327
328=head2 score
329
330 Title   : score
331 Usage   : $obj->score($newval)
332 Function: Sets the associated score with this tree
333           This is a generic slot which is probably best used
334           for log likelihood or other overall tree score
335 Returns : value of score
336 Args    : newvalue (optional)
337
338=cut
339
340sub score {
341   my ($self, $val) = @_;
342   if ( defined $val ) {
343       $self->{'_score'} = $val;
344   }
345   return $self->{'_score'};
346}
347
348
349# decorated interface TreeI Implements this
350
351=head2 height
352
353 Title   : height
354 Usage   : my $height = $tree->height
355 Function: Gets the height of tree - this LOG_2($number_nodes)
356           WARNING: this is only true for strict binary trees.  The TreeIO
357           system is capable of building non-binary trees, for which this
358           method will currently return an incorrect value!!
359 Returns : integer
360 Args    : none
361
362=head2 number_nodes
363
364 Title   : number_nodes
365 Usage   : my $size = $tree->number_nodes
366 Function: Returns the number of nodes in the tree
367 Returns : integer
368 Args    : none
369
370=head2 as_text
371
372 Title   : as_text
373 Usage   : my $tree_as_string = $tree->as_text($format)
374 Function: Returns the tree as a string representation in the
375           desired format, e.g.: 'newick', 'nhx' or 'tabtree' (the default)
376 Returns : scalar string
377 Args    : format type as specified by Bio::TreeIO
378 Note    : This method loads the Bio::TreeIO::$format module
379           on the fly, and commandeers the _write_tree_Helper
380           routine therein to create the tree string.
381
382=cut
383
384sub as_text {
385    my $self = shift;
386    my $format = shift || 'tabtree';
387    my $params_input = shift || {};
388
389    my $iomod = "Bio::TreeIO::$format";
390    $self->_load_module($iomod);
391
392    my $string = '';
393    open my $fh, '>', \$string or $self->throw("Could not write '$string' as file: $!");
394    my $test = $iomod->new( -format => $format, -fh => $fh );
395
396    # Get the default params for the given IO module.
397    $test->set_params($params_input);
398
399    $test->write_tree($self);
400    close $fh;
401    return $string;
402}
403
404
405=head2 Methods for associating Tag/Values with a Tree
406
407These methods associate tag/value pairs with a Tree
408
409=head2 set_tag_value
410
411 Title   : set_tag_value
412 Usage   : $tree->set_tag_value($tag,$value)
413           $tree->set_tag_value($tag,@values)
414 Function: Sets a tag value(s) to a tree. Replaces old values.
415 Returns : number of values stored for this tag
416 Args    : $tag   - tag name
417           $value - value to store for the tag
418
419=cut
420
421sub set_tag_value {
422    my ($self, $tag, @values) = @_;
423    if ( ! defined $tag || ! scalar @values ) {
424        $self->warn("cannot call set_tag_value with an undefined value");
425    }
426    $self->remove_tag ($tag);
427    map { push @{$self->{'_tags'}->{$tag}}, $_ } @values;
428    return scalar @{$self->{'_tags'}->{$tag}};
429}
430
431
432=head2 add_tag_value
433
434 Title   : add_tag_value
435 Usage   : $tree->add_tag_value($tag,$value)
436 Function: Adds a tag value to a tree
437 Returns : number of values stored for this tag
438 Args    : $tag   - tag name
439           $value - value to store for the tag
440
441=cut
442
443sub add_tag_value {
444    my ($self, $tag, $value) = @_;
445    if ( ! defined $tag || ! defined $value ) {
446        $self->warn("cannot call add_tag_value with an undefined value");
447    }
448    push @{$self->{'_tags'}->{$tag}}, $value;
449    return scalar @{$self->{'_tags'}->{$tag}};
450}
451
452
453=head2 remove_tag
454
455 Title   : remove_tag
456 Usage   : $tree->remove_tag($tag)
457 Function: Remove the tag and all values for this tag
458 Returns : boolean representing success (0 if tag does not exist)
459 Args    : $tag - tagname to remove
460
461=cut
462
463sub remove_tag {
464    my ($self, $tag) = @_;
465    if ( exists $self->{'_tags'}->{$tag} ) {
466        $self->{'_tags'}->{$tag} = undef;
467        delete $self->{'_tags'}->{$tag};
468        return 1;
469    }
470    return 0;
471}
472
473
474=head2 remove_all_tags
475
476 Title   : remove_all_tags
477 Usage   : $tree->remove_all_tags()
478 Function: Removes all tags
479 Returns : None
480 Args    : None
481
482=cut
483
484sub remove_all_tags {
485    my ($self) = @_;
486    $self->{'_tags'} = {};
487    return;
488}
489
490
491=head2 get_all_tags
492
493 Title   : get_all_tags
494 Usage   : my @tags = $tree->get_all_tags()
495 Function: Gets all the tag names for this Tree
496 Returns : Array of tagnames
497 Args    : None
498
499=cut
500
501sub get_all_tags {
502    my ($self) = @_;
503    my @tags = sort keys %{$self->{'_tags'} || {}};
504    return @tags;
505}
506
507
508=head2 get_tag_values
509
510 Title   : get_tag_values
511 Usage   : my @values = $tree->get_tag_values($tag)
512 Function: Gets the values for given tag ($tag)
513 Returns : Array of values or empty list if tag does not exist
514 Args    : $tag - tag name
515
516=cut
517
518sub get_tag_values {
519    my ($self, $tag) = @_;
520    return wantarray ? @{$self->{'_tags'}->{$tag} || []} :
521                      (@{$self->{'_tags'}->{$tag} || []})[0];
522}
523
524
525=head2 has_tag
526
527 Title   : has_tag
528 Usage   : $tree->has_tag($tag)
529 Function: Boolean test if tag exists in the Tree
530 Returns : Boolean
531 Args    : $tag - tagname
532
533=cut
534
535sub has_tag {
536    my ($self, $tag) = @_;
537    return exists $self->{'_tags'}->{$tag};
538}
539
540
541# safe tree clone that doesn't seg fault
542
543=head2 clone
544
545 Title   : clone
546 Alias   : _clone
547 Usage   : $tree_copy = $tree->clone();
548           $subtree_copy = $tree->clone($internal_node);
549 Function: Safe tree clone that doesn't segfault
550 Returns : Bio::Tree::Tree object
551 Args    : [optional] $start_node, Bio::Tree::Node object
552
553=cut
554
555sub clone {
556    my ($self, $parent, $parent_clone) = @_;
557    $parent ||= $self->get_root_node;
558    $parent_clone ||= $self->_clone_node($parent);
559
560    foreach my $node ($parent->each_Descendent()) {
561        my $child = $self->_clone_node($node);
562        $child->ancestor($parent_clone);
563        $self->_clone($node, $child);
564    }
565    $parent->ancestor && return;
566
567    my $tree = $self->new(-root => $parent_clone);
568    return $tree;
569}
570
571
572# -- private internal methods --
573
574sub cleanup_tree {
575    my $self = shift;
576    unless( $self->nodelete ) {
577        for my $node ($self->get_nodes(-order  => 'b', -sortby => 'none')) {
578            $node->node_cleanup;
579        }
580    }
581    $self->{'_rootnode'} = undef;
582}
583
5841;
585