1# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
2# This code is part of the Biopython distribution and governed by its
3# license. Please see the LICENSE file that should have been included
4# as part of this package.
5
6"""Unit tests for the Bio.Phylo module."""
7
8import os
9import unittest
10import tempfile
11
12from io import StringIO
13
14from Bio import Phylo
15from Bio.Phylo import PhyloXML
16
17
18# Example Newick and Nexus files
19EX_NEWICK = "Nexus/int_node_labels.nwk"
20EX_NEWICK2 = "Nexus/test.new"
21EX_NEXUS = "Nexus/test_Nexus_input.nex"
22EX_NEXUS2 = "Nexus/bats.nex"
23EX_NEWICK_BOM = "Nexus/ByteOrderMarkFile.nwk"
24
25# Example PhyloXML files
26EX_APAF = "PhyloXML/apaf.xml"
27EX_BCL2 = "PhyloXML/bcl_2.xml"
28EX_DIST = "PhyloXML/distribution.xml"
29EX_PHYLO = "PhyloXML/phyloxml_examples.xml"
30
31
32class IOTests(unittest.TestCase):
33    """Tests for parsing and writing the supported formats."""
34
35    def test_newick_read_single1(self):
36        """Read first Newick file with one tree."""
37        tree = Phylo.read(EX_NEWICK, "newick")
38        self.assertEqual(len(tree.get_terminals()), 28)
39
40    def test_newick_read_single2(self):
41        """Read second Newick file with one tree."""
42        tree = Phylo.read(EX_NEWICK2, "newick")
43        self.assertEqual(len(tree.get_terminals()), 33)
44        self.assertEqual(tree.find_any("Homo sapiens").comment, "modern human")
45        self.assertEqual(
46            tree.find_any("Equus caballus").comment,
47            "wild horse; also 'Equus ferus caballus'",
48        )
49        self.assertEqual(tree.root.confidence, 80)
50        tree = Phylo.read(EX_NEWICK2, "newick", comments_are_confidence=True)
51        self.assertEqual(tree.root.confidence, 100)
52
53    def test_newick_read_single3(self):
54        """Read Nexus file with one tree."""
55        tree = Phylo.read(EX_NEXUS2, "nexus")
56        self.assertEqual(len(tree.get_terminals()), 658)
57
58    def test_unicode_exception(self):
59        """Read a Newick file with a unicode byte order mark (BOM)."""
60        with open(EX_NEWICK_BOM, encoding="utf-8") as handle:
61            tree = Phylo.read(handle, "newick")
62        self.assertEqual(len(tree.get_terminals()), 3)
63
64    def test_newick_read_multiple(self):
65        """Parse a Nexus file with multiple trees."""
66        trees = list(Phylo.parse(EX_NEXUS, "nexus"))
67        self.assertEqual(len(trees), 3)
68        for tree in trees:
69            self.assertEqual(len(tree.get_terminals()), 9)
70
71    def test_newick_write(self):
72        """Parse a Nexus file with multiple trees."""
73        # Tree with internal node labels
74        mem_file = StringIO()
75        tree = Phylo.read(StringIO("(A,B,(C,D)E)F;"), "newick")
76        Phylo.write(tree, mem_file, "newick")
77        mem_file.seek(0)
78        tree2 = Phylo.read(mem_file, "newick")
79        # Sanity check
80        self.assertEqual(tree2.count_terminals(), 4)
81        # Check internal node labels were retained
82        internal_names = {c.name for c in tree2.get_nonterminals() if c is not None}
83        self.assertEqual(internal_names, {"E", "F"})
84
85    def test_newick_read_scinot(self):
86        """Parse Newick branch lengths in scientific notation."""
87        tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), "newick")
88        clade_a = tree.clade[0]
89        self.assertEqual(clade_a.name, "foo")
90        self.assertAlmostEqual(clade_a.branch_length, 0.1)
91
92    def test_phylo_read_extra(self):
93        """Additional tests to check correct parsing."""
94        tree = Phylo.read(StringIO("(A:1, B:-2, (C:3, D:4):-2)"), "newick")
95        self.assertEqual(tree.distance("A"), 1)
96        self.assertEqual(tree.distance("B"), -2)
97        self.assertEqual(tree.distance("C"), 1)
98        self.assertEqual(tree.distance("D"), 2)
99
100        tree = Phylo.read(StringIO("((A:1, B:-2):-5, (C:3, D:4):-2)"), "newick")
101        self.assertEqual(tree.distance("A"), -4)
102        self.assertEqual(tree.distance("B"), -7)
103        self.assertEqual(tree.distance("C"), 1)
104        self.assertEqual(tree.distance("D"), 2)
105
106        tree = Phylo.read(StringIO("((:1, B:-2):-5, (C:3, D:4):-2)"), "newick")
107        distances = {-4.0: 1, -7.0: 1, 1: 1, 2: 1}
108        for x in tree.get_terminals():
109            entry = int(tree.distance(x))
110            distances[entry] -= distances[entry]
111            self.assertEqual(distances[entry], 0)
112
113        tree = Phylo.read(StringIO("((:\n1\n,\n B:-2):-5, (C:3, D:4):-2);"), "newick")
114        distances = {-4.0: 1, -7.0: 1, 1: 1, 2: 1}
115        for x in tree.get_terminals():
116            entry = int(tree.distance(x))
117            distances[entry] -= distances[entry]
118            self.assertEqual(distances[entry], 0)
119
120    def test_format_branch_length(self):
121        """Custom format string for Newick branch length serialization."""
122        tree = Phylo.read(StringIO("A:0.1;"), "newick")
123        mem_file = StringIO()
124        Phylo.write(tree, mem_file, "newick", format_branch_length="%.0e")
125        value = mem_file.getvalue().strip()
126        self.assertTrue(value.startswith("A:"))
127        self.assertTrue(value.endswith(";"))
128        self.assertEqual(value[2:-1], "%.0e" % 0.1)
129
130    def test_convert(self):
131        """Convert a tree between all supported formats."""
132        mem_file_1 = StringIO()
133        mem_file_2 = StringIO()
134        mem_file_3 = StringIO()
135        Phylo.convert(EX_NEWICK, "newick", mem_file_1, "nexus")
136        mem_file_1.seek(0)
137        Phylo.convert(mem_file_1, "nexus", mem_file_2, "phyloxml")
138        mem_file_2.seek(0)
139        Phylo.convert(mem_file_2, "phyloxml", mem_file_3, "newick")
140        mem_file_3.seek(0)
141        tree = Phylo.read(mem_file_3, "newick")
142        self.assertEqual(len(tree.get_terminals()), 28)
143
144    def test_convert_phyloxml_binary(self):
145        """Try writing phyloxml to a binary handle; fail on Py3."""
146        trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
147        with tempfile.NamedTemporaryFile(mode="wb") as out_handle:
148            self.assertRaises(TypeError, Phylo.write, trees, out_handle, "phyloxml")
149
150    def test_convert_phyloxml_text(self):
151        """Write phyloxml to a text handle."""
152        trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
153        with tempfile.NamedTemporaryFile(mode="w") as out_handle:
154            count = Phylo.write(trees, out_handle, "phyloxml")
155        self.assertEqual(13, count)
156
157    def test_convert_phyloxml_filename(self):
158        """Write phyloxml to a given filename."""
159        trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml")
160        out_handle = tempfile.NamedTemporaryFile(mode="w", delete=False)
161        out_handle.close()
162        tmp_filename = out_handle.name
163        try:
164            count = Phylo.write(trees, tmp_filename, "phyloxml")
165        finally:
166            os.remove(tmp_filename)
167        self.assertEqual(13, count)
168
169    def test_int_labels(self):
170        """Read newick formatted tree with numeric labels."""
171        tree = Phylo.read(
172            StringIO("(((0:0.1,1:0.1)0.99:0.1,2:0.1)0.98:0.0);"), "newick"
173        )
174        self.assertEqual({leaf.name for leaf in tree.get_terminals()}, {"0", "1", "2"})
175
176
177class TreeTests(unittest.TestCase):
178    """Tests for methods on BaseTree.Tree objects."""
179
180    def test_randomized(self):
181        """Tree.randomized: generate a new randomized tree."""
182        for N in (2, 5, 20):
183            tree = Phylo.BaseTree.Tree.randomized(N)
184            self.assertEqual(tree.count_terminals(), N)
185            self.assertEqual(tree.total_branch_length(), (N - 1) * 2)
186            tree = Phylo.BaseTree.Tree.randomized(N, branch_length=2.0)
187            self.assertEqual(tree.total_branch_length(), (N - 1) * 4)
188        tree = Phylo.BaseTree.Tree.randomized(5, branch_stdev=0.5)
189        self.assertEqual(tree.count_terminals(), 5)
190
191    def test_root_with_outgroup(self):
192        """Tree.root_with_outgroup: reroot at a given clade."""
193        # On a large realistic tree, at a deep internal node
194        tree = Phylo.read(EX_APAF, "phyloxml")
195        orig_num_tips = len(tree.get_terminals())
196        orig_tree_len = tree.total_branch_length()
197        tree.root_with_outgroup("19_NEMVE", "20_NEMVE")
198        self.assertEqual(orig_num_tips, len(tree.get_terminals()))
199        self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
200        # Now, at an external node
201        tree.root_with_outgroup("1_BRAFL")
202        self.assertEqual(orig_num_tips, len(tree.get_terminals()))
203        self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
204        # Specifying outgroup branch length mustn't change the total tree size
205        tree.root_with_outgroup("2_BRAFL", outgroup_branch_length=0.5)
206        self.assertEqual(orig_num_tips, len(tree.get_terminals()))
207        self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
208        tree.root_with_outgroup("36_BRAFL", "37_BRAFL", outgroup_branch_length=0.5)
209        self.assertEqual(orig_num_tips, len(tree.get_terminals()))
210        self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
211        # On small contrived trees, testing edge cases
212        for small_nwk in (
213            "(A,B,(C,D));",
214            "((E,F),((G,H)),(I,J));",
215            "((Q,R),(S,T),(U,V));",
216            "(X,Y);",
217        ):
218            tree = Phylo.read(StringIO(small_nwk), "newick")
219            orig_tree_len = tree.total_branch_length()
220            for node in list(tree.find_clades()):
221                tree.root_with_outgroup(node)
222                self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
223
224    def test_root_at_midpoint(self):
225        """Tree.root_at_midpoint: reroot at the tree's midpoint."""
226        for treefname, fmt in [
227            (EX_APAF, "phyloxml"),
228            (EX_BCL2, "phyloxml"),
229            (EX_NEWICK, "newick"),
230        ]:
231            tree = Phylo.read(treefname, fmt)
232            orig_tree_len = tree.total_branch_length()
233            # Total branch length does not change
234            tree.root_at_midpoint()
235            self.assertAlmostEqual(orig_tree_len, tree.total_branch_length())
236            # Root is bifurcating
237            self.assertEqual(len(tree.root.clades), 2)
238            # Deepest tips under each child of the root are equally deep
239            deep_dist_0 = max(tree.clade[0].depths().values())
240            deep_dist_1 = max(tree.clade[1].depths().values())
241            self.assertAlmostEqual(deep_dist_0, deep_dist_1)
242
243    # Magic method
244    def test_str(self):
245        """Tree.__str__: pretty-print to a string.
246
247        NB: The exact line counts are liable to change if the object
248        constructors change.
249        """
250        for source, count in zip((EX_APAF, EX_BCL2, EX_DIST), (386, 747, 15)):
251            tree = Phylo.read(source, "phyloxml")
252            output = str(tree)
253            self.assertEqual(len(output.splitlines()), count)
254
255
256class MixinTests(unittest.TestCase):
257    """Tests for TreeMixin methods."""
258
259    def setUp(self):
260        self.phylogenies = list(Phylo.parse(EX_PHYLO, "phyloxml"))
261
262    # Traversal methods
263
264    def test_find_elements(self):
265        """TreeMixin: find_elements() method."""
266        # From the docstring example
267        tree = self.phylogenies[5]
268        matches = list(tree.find_elements(PhyloXML.Taxonomy, code="OCTVU"))
269        self.assertEqual(len(matches), 1)
270        self.assertIsInstance(matches[0], PhyloXML.Taxonomy)
271        self.assertEqual(matches[0].code, "OCTVU")
272        self.assertEqual(matches[0].scientific_name, "Octopus vulgaris")
273        # Iteration and regexps
274        tree = self.phylogenies[10]
275        for point, alt in zip(
276            tree.find_elements(geodetic_datum=r"WGS\d{2}"), (472, 10, 452)
277        ):
278            self.assertIsInstance(point, PhyloXML.Point)
279            self.assertEqual(point.geodetic_datum, "WGS84")
280            self.assertAlmostEqual(point.alt, alt)
281        # class filter
282        tree = self.phylogenies[4]
283        events = list(tree.find_elements(PhyloXML.Events))
284        self.assertEqual(len(events), 2)
285        self.assertEqual(events[0].speciations, 1)
286        self.assertEqual(events[1].duplications, 1)
287        # string filter & find_any
288        tree = self.phylogenies[3]
289        taxonomy = tree.find_any("B. subtilis")
290        self.assertEqual(taxonomy.scientific_name, "B. subtilis")
291        # integer filter
292        tree = Phylo.read(EX_APAF, "phyloxml")
293        domains = list(tree.find_elements(start=5))
294        self.assertEqual(len(domains), 8)
295        for dom in domains:
296            self.assertEqual(dom.start, 5)
297            self.assertEqual(dom.value, "CARD")
298
299    def test_find_clades(self):
300        """TreeMixin: find_clades() method."""
301        # boolean filter
302        for clade, name in zip(
303            self.phylogenies[10].find_clades(name=True), list("ABCD")
304        ):
305            self.assertIsInstance(clade, PhyloXML.Clade)
306            self.assertEqual(clade.name, name)
307        # finding deeper attributes
308        octo = list(self.phylogenies[5].find_clades(code="OCTVU"))
309        self.assertEqual(len(octo), 1)
310        self.assertIsInstance(octo[0], PhyloXML.Clade)
311        self.assertEqual(octo[0].taxonomies[0].code, "OCTVU")
312        # string filter
313        dee = next(self.phylogenies[10].find_clades("D"))
314        self.assertEqual(dee.name, "D")
315
316    def test_find_terminal(self):
317        """TreeMixin: find_elements() with terminal argument."""
318        for tree, total, extern, intern in zip(
319            self.phylogenies,
320            (6, 6, 7, 18, 21, 27, 7, 9, 9, 19, 15, 9, 6),
321            (3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3),
322            (3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3),
323        ):
324            self.assertEqual(len(list(tree.find_elements())), total)
325            self.assertEqual(len(list(tree.find_elements(terminal=True))), extern)
326            self.assertEqual(len(list(tree.find_elements(terminal=False))), intern)
327
328    def test_get_path(self):
329        """TreeMixin: get_path() method."""
330        path = self.phylogenies[1].get_path("B")
331        self.assertEqual(len(path), 2)
332        self.assertAlmostEqual(path[0].branch_length, 0.06)
333        self.assertAlmostEqual(path[1].branch_length, 0.23)
334        self.assertEqual(path[1].name, "B")
335
336    def test_trace(self):
337        """TreeMixin: trace() method."""
338        tree = self.phylogenies[1]
339        path = tree.trace("A", "C")
340        self.assertEqual(len(path), 3)
341        self.assertAlmostEqual(path[0].branch_length, 0.06)
342        self.assertAlmostEqual(path[2].branch_length, 0.4)
343        self.assertEqual(path[2].name, "C")
344
345    # Information methods
346
347    def test_common_ancestor(self):
348        """TreeMixin: common_ancestor() method."""
349        tree = self.phylogenies[1]
350        lca = tree.common_ancestor("A", "B")
351        self.assertEqual(lca, tree.clade[0])
352        lca = tree.common_ancestor("A", "C")
353        self.assertEqual(lca, tree.clade)
354        tree = self.phylogenies[10]
355        lca = tree.common_ancestor("A", "B", "C")
356        self.assertEqual(lca, tree.clade[0])
357
358    def test_depths(self):
359        """TreeMixin: depths() method."""
360        tree = self.phylogenies[1]
361        depths = tree.depths()
362        self.assertEqual(len(depths), 5)
363        for found, expect in zip(
364            sorted(depths.values()), [0, 0.060, 0.162, 0.290, 0.400]
365        ):
366            self.assertAlmostEqual(found, expect)
367
368    def test_distance(self):
369        """TreeMixin: distance() method."""
370        t = self.phylogenies[1]
371        self.assertAlmostEqual(t.distance("A"), 0.162)
372        self.assertAlmostEqual(t.distance("B"), 0.29)
373        self.assertAlmostEqual(t.distance("C"), 0.4)
374        self.assertAlmostEqual(t.distance("A", "B"), 0.332)
375        self.assertAlmostEqual(t.distance("A", "C"), 0.562)
376        self.assertAlmostEqual(t.distance("B", "C"), 0.69)
377
378    def test_is_bifurcating(self):
379        """TreeMixin: is_bifurcating() method."""
380        for tree, is_b in zip(
381            self.phylogenies, (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1)
382        ):
383            self.assertEqual(tree.is_bifurcating(), is_b)
384
385    def test_is_monophyletic(self):
386        """TreeMixin: is_monophyletic() method."""
387        tree = self.phylogenies[10]
388        abcd = tree.get_terminals()
389        abc = tree.clade[0].get_terminals()
390        ab = abc[:2]
391        d = tree.clade[1].get_terminals()
392        self.assertEqual(tree.is_monophyletic(abcd), tree.root)
393        self.assertEqual(tree.is_monophyletic(abc), tree.clade[0])
394        self.assertFalse(tree.is_monophyletic(ab))
395        self.assertEqual(tree.is_monophyletic(d), tree.clade[1])
396        # Alternate argument form
397        self.assertEqual(tree.is_monophyletic(*abcd), tree.root)
398
399    def test_total_branch_length(self):
400        """TreeMixin: total_branch_length() method."""
401        tree = self.phylogenies[1]
402        self.assertAlmostEqual(tree.total_branch_length(), 0.792)
403        self.assertAlmostEqual(tree.clade[0].total_branch_length(), 0.392)
404
405    # Tree manipulation methods
406
407    def test_collapse(self):
408        """TreeMixin: collapse() method."""
409        tree = self.phylogenies[1]
410        parent = tree.collapse(tree.clade[0])
411        self.assertEqual(len(parent), 3)
412        for clade, name, blength in zip(parent, ("C", "A", "B"), (0.4, 0.162, 0.29)):
413            self.assertEqual(clade.name, name)
414            self.assertAlmostEqual(clade.branch_length, blength)
415
416    def test_collapse_all(self):
417        """TreeMixin: collapse_all() method."""
418        tree = Phylo.read(EX_APAF, "phyloxml")
419        d1 = tree.depths()
420        tree.collapse_all()
421        d2 = tree.depths()
422        # Total branch lengths should not change
423        for clade in d2:
424            self.assertAlmostEqual(d1[clade], d2[clade])
425        # No internal nodes should remain except the root
426        self.assertEqual(len(tree.get_terminals()), len(tree.clade))
427        self.assertEqual(len(list(tree.find_clades(terminal=False))), 1)
428        # Again, with a target specification
429        tree = Phylo.read(EX_APAF, "phyloxml")
430        d1 = tree.depths()
431        internal_node_ct = len(tree.get_nonterminals())
432        tree.collapse_all(lambda c: c.branch_length < 0.1)  # noqa: E731
433        d2 = tree.depths()
434        # Should have collapsed 7 internal nodes
435        self.assertEqual(len(tree.get_nonterminals()), internal_node_ct - 7)
436        for clade in d2:
437            self.assertAlmostEqual(d1[clade], d2[clade])
438
439    def test_ladderize(self):
440        """TreeMixin: ladderize() method."""
441
442        def ordered_names(tree):
443            return [n.name for n in tree.get_terminals()]
444
445        tree = self.phylogenies[10]
446        self.assertEqual(ordered_names(tree), list("ABCD"))
447        tree.ladderize()
448        self.assertEqual(ordered_names(tree), list("DABC"))
449        tree.ladderize(reverse=True)
450        self.assertEqual(ordered_names(tree), list("ABCD"))
451
452    def test_prune(self):
453        """TreeMixin: prune() method."""
454        tree = self.phylogenies[10]
455        # Taxon in a trifurcation -- no collapse afterward
456        parent = tree.prune(name="B")
457        self.assertEqual(len(parent.clades), 2)
458        self.assertEqual(parent.clades[0].name, "A")
459        self.assertEqual(parent.clades[1].name, "C")
460        self.assertEqual(len(tree.get_terminals()), 3)
461        self.assertEqual(len(tree.get_nonterminals()), 2)
462        # Taxon in a bifurcation -- collapse
463        tree = self.phylogenies[0]
464        parent = tree.prune(name="A")
465        self.assertEqual(len(parent.clades), 2)
466        for clade, name, blen in zip(parent, "BC", (0.29, 0.4)):
467            self.assertTrue(clade.is_terminal())
468            self.assertEqual(clade.name, name)
469            self.assertAlmostEqual(clade.branch_length, blen)
470        self.assertEqual(len(tree.get_terminals()), 2)
471        self.assertEqual(len(tree.get_nonterminals()), 1)
472        # Taxon just below the root -- don't screw up
473        tree = self.phylogenies[1]
474        parent = tree.prune(name="C")
475        self.assertEqual(parent, tree.root)
476        self.assertEqual(len(parent.clades), 2)
477        for clade, name, blen in zip(parent, "AB", (0.102, 0.23)):
478            self.assertTrue(clade.is_terminal())
479            self.assertEqual(clade.name, name)
480            self.assertAlmostEqual(clade.branch_length, blen)
481        self.assertEqual(len(tree.get_terminals()), 2)
482        self.assertEqual(len(tree.get_nonterminals()), 1)
483
484    def test_split(self):
485        """TreeMixin: split() method."""
486        tree = self.phylogenies[0]
487        C = tree.clade[1]
488        C.split()
489        self.assertEqual(len(C), 2)
490        self.assertEqual(len(tree.get_terminals()), 4)
491        self.assertEqual(len(tree.get_nonterminals()), 3)
492        C[0].split(3, 0.5)
493        self.assertEqual(len(tree.get_terminals()), 6)
494        self.assertEqual(len(tree.get_nonterminals()), 4)
495        for clade, name, blen in zip(C[0], ("C00", "C01", "C02"), (0.5, 0.5, 0.5)):
496            self.assertTrue(clade.is_terminal())
497            self.assertEqual(clade.name, name)
498            self.assertEqual(clade.branch_length, blen)
499
500
501# ---------------------------------------------------------
502
503if __name__ == "__main__":
504    runner = unittest.TextTestRunner(verbosity=2)
505    unittest.main(testRunner=runner)
506