1# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 2# This code is part of the Biopython distribution and governed by its 3# license. Please see the LICENSE file that should have been included 4# as part of this package. 5 6"""Unit tests for the Bio.Phylo module.""" 7 8import os 9import unittest 10import tempfile 11 12from io import StringIO 13 14from Bio import Phylo 15from Bio.Phylo import PhyloXML 16 17 18# Example Newick and Nexus files 19EX_NEWICK = "Nexus/int_node_labels.nwk" 20EX_NEWICK2 = "Nexus/test.new" 21EX_NEXUS = "Nexus/test_Nexus_input.nex" 22EX_NEXUS2 = "Nexus/bats.nex" 23EX_NEWICK_BOM = "Nexus/ByteOrderMarkFile.nwk" 24 25# Example PhyloXML files 26EX_APAF = "PhyloXML/apaf.xml" 27EX_BCL2 = "PhyloXML/bcl_2.xml" 28EX_DIST = "PhyloXML/distribution.xml" 29EX_PHYLO = "PhyloXML/phyloxml_examples.xml" 30 31 32class IOTests(unittest.TestCase): 33 """Tests for parsing and writing the supported formats.""" 34 35 def test_newick_read_single1(self): 36 """Read first Newick file with one tree.""" 37 tree = Phylo.read(EX_NEWICK, "newick") 38 self.assertEqual(len(tree.get_terminals()), 28) 39 40 def test_newick_read_single2(self): 41 """Read second Newick file with one tree.""" 42 tree = Phylo.read(EX_NEWICK2, "newick") 43 self.assertEqual(len(tree.get_terminals()), 33) 44 self.assertEqual(tree.find_any("Homo sapiens").comment, "modern human") 45 self.assertEqual( 46 tree.find_any("Equus caballus").comment, 47 "wild horse; also 'Equus ferus caballus'", 48 ) 49 self.assertEqual(tree.root.confidence, 80) 50 tree = Phylo.read(EX_NEWICK2, "newick", comments_are_confidence=True) 51 self.assertEqual(tree.root.confidence, 100) 52 53 def test_newick_read_single3(self): 54 """Read Nexus file with one tree.""" 55 tree = Phylo.read(EX_NEXUS2, "nexus") 56 self.assertEqual(len(tree.get_terminals()), 658) 57 58 def test_unicode_exception(self): 59 """Read a Newick file with a unicode byte order mark (BOM).""" 60 with open(EX_NEWICK_BOM, encoding="utf-8") as handle: 61 tree = Phylo.read(handle, "newick") 62 self.assertEqual(len(tree.get_terminals()), 3) 63 64 def test_newick_read_multiple(self): 65 """Parse a Nexus file with multiple trees.""" 66 trees = list(Phylo.parse(EX_NEXUS, "nexus")) 67 self.assertEqual(len(trees), 3) 68 for tree in trees: 69 self.assertEqual(len(tree.get_terminals()), 9) 70 71 def test_newick_write(self): 72 """Parse a Nexus file with multiple trees.""" 73 # Tree with internal node labels 74 mem_file = StringIO() 75 tree = Phylo.read(StringIO("(A,B,(C,D)E)F;"), "newick") 76 Phylo.write(tree, mem_file, "newick") 77 mem_file.seek(0) 78 tree2 = Phylo.read(mem_file, "newick") 79 # Sanity check 80 self.assertEqual(tree2.count_terminals(), 4) 81 # Check internal node labels were retained 82 internal_names = {c.name for c in tree2.get_nonterminals() if c is not None} 83 self.assertEqual(internal_names, {"E", "F"}) 84 85 def test_newick_read_scinot(self): 86 """Parse Newick branch lengths in scientific notation.""" 87 tree = Phylo.read(StringIO("(foo:1e-1,bar:0.1)"), "newick") 88 clade_a = tree.clade[0] 89 self.assertEqual(clade_a.name, "foo") 90 self.assertAlmostEqual(clade_a.branch_length, 0.1) 91 92 def test_phylo_read_extra(self): 93 """Additional tests to check correct parsing.""" 94 tree = Phylo.read(StringIO("(A:1, B:-2, (C:3, D:4):-2)"), "newick") 95 self.assertEqual(tree.distance("A"), 1) 96 self.assertEqual(tree.distance("B"), -2) 97 self.assertEqual(tree.distance("C"), 1) 98 self.assertEqual(tree.distance("D"), 2) 99 100 tree = Phylo.read(StringIO("((A:1, B:-2):-5, (C:3, D:4):-2)"), "newick") 101 self.assertEqual(tree.distance("A"), -4) 102 self.assertEqual(tree.distance("B"), -7) 103 self.assertEqual(tree.distance("C"), 1) 104 self.assertEqual(tree.distance("D"), 2) 105 106 tree = Phylo.read(StringIO("((:1, B:-2):-5, (C:3, D:4):-2)"), "newick") 107 distances = {-4.0: 1, -7.0: 1, 1: 1, 2: 1} 108 for x in tree.get_terminals(): 109 entry = int(tree.distance(x)) 110 distances[entry] -= distances[entry] 111 self.assertEqual(distances[entry], 0) 112 113 tree = Phylo.read(StringIO("((:\n1\n,\n B:-2):-5, (C:3, D:4):-2);"), "newick") 114 distances = {-4.0: 1, -7.0: 1, 1: 1, 2: 1} 115 for x in tree.get_terminals(): 116 entry = int(tree.distance(x)) 117 distances[entry] -= distances[entry] 118 self.assertEqual(distances[entry], 0) 119 120 def test_format_branch_length(self): 121 """Custom format string for Newick branch length serialization.""" 122 tree = Phylo.read(StringIO("A:0.1;"), "newick") 123 mem_file = StringIO() 124 Phylo.write(tree, mem_file, "newick", format_branch_length="%.0e") 125 value = mem_file.getvalue().strip() 126 self.assertTrue(value.startswith("A:")) 127 self.assertTrue(value.endswith(";")) 128 self.assertEqual(value[2:-1], "%.0e" % 0.1) 129 130 def test_convert(self): 131 """Convert a tree between all supported formats.""" 132 mem_file_1 = StringIO() 133 mem_file_2 = StringIO() 134 mem_file_3 = StringIO() 135 Phylo.convert(EX_NEWICK, "newick", mem_file_1, "nexus") 136 mem_file_1.seek(0) 137 Phylo.convert(mem_file_1, "nexus", mem_file_2, "phyloxml") 138 mem_file_2.seek(0) 139 Phylo.convert(mem_file_2, "phyloxml", mem_file_3, "newick") 140 mem_file_3.seek(0) 141 tree = Phylo.read(mem_file_3, "newick") 142 self.assertEqual(len(tree.get_terminals()), 28) 143 144 def test_convert_phyloxml_binary(self): 145 """Try writing phyloxml to a binary handle; fail on Py3.""" 146 trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") 147 with tempfile.NamedTemporaryFile(mode="wb") as out_handle: 148 self.assertRaises(TypeError, Phylo.write, trees, out_handle, "phyloxml") 149 150 def test_convert_phyloxml_text(self): 151 """Write phyloxml to a text handle.""" 152 trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") 153 with tempfile.NamedTemporaryFile(mode="w") as out_handle: 154 count = Phylo.write(trees, out_handle, "phyloxml") 155 self.assertEqual(13, count) 156 157 def test_convert_phyloxml_filename(self): 158 """Write phyloxml to a given filename.""" 159 trees = Phylo.parse("PhyloXML/phyloxml_examples.xml", "phyloxml") 160 out_handle = tempfile.NamedTemporaryFile(mode="w", delete=False) 161 out_handle.close() 162 tmp_filename = out_handle.name 163 try: 164 count = Phylo.write(trees, tmp_filename, "phyloxml") 165 finally: 166 os.remove(tmp_filename) 167 self.assertEqual(13, count) 168 169 def test_int_labels(self): 170 """Read newick formatted tree with numeric labels.""" 171 tree = Phylo.read( 172 StringIO("(((0:0.1,1:0.1)0.99:0.1,2:0.1)0.98:0.0);"), "newick" 173 ) 174 self.assertEqual({leaf.name for leaf in tree.get_terminals()}, {"0", "1", "2"}) 175 176 177class TreeTests(unittest.TestCase): 178 """Tests for methods on BaseTree.Tree objects.""" 179 180 def test_randomized(self): 181 """Tree.randomized: generate a new randomized tree.""" 182 for N in (2, 5, 20): 183 tree = Phylo.BaseTree.Tree.randomized(N) 184 self.assertEqual(tree.count_terminals(), N) 185 self.assertEqual(tree.total_branch_length(), (N - 1) * 2) 186 tree = Phylo.BaseTree.Tree.randomized(N, branch_length=2.0) 187 self.assertEqual(tree.total_branch_length(), (N - 1) * 4) 188 tree = Phylo.BaseTree.Tree.randomized(5, branch_stdev=0.5) 189 self.assertEqual(tree.count_terminals(), 5) 190 191 def test_root_with_outgroup(self): 192 """Tree.root_with_outgroup: reroot at a given clade.""" 193 # On a large realistic tree, at a deep internal node 194 tree = Phylo.read(EX_APAF, "phyloxml") 195 orig_num_tips = len(tree.get_terminals()) 196 orig_tree_len = tree.total_branch_length() 197 tree.root_with_outgroup("19_NEMVE", "20_NEMVE") 198 self.assertEqual(orig_num_tips, len(tree.get_terminals())) 199 self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) 200 # Now, at an external node 201 tree.root_with_outgroup("1_BRAFL") 202 self.assertEqual(orig_num_tips, len(tree.get_terminals())) 203 self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) 204 # Specifying outgroup branch length mustn't change the total tree size 205 tree.root_with_outgroup("2_BRAFL", outgroup_branch_length=0.5) 206 self.assertEqual(orig_num_tips, len(tree.get_terminals())) 207 self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) 208 tree.root_with_outgroup("36_BRAFL", "37_BRAFL", outgroup_branch_length=0.5) 209 self.assertEqual(orig_num_tips, len(tree.get_terminals())) 210 self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) 211 # On small contrived trees, testing edge cases 212 for small_nwk in ( 213 "(A,B,(C,D));", 214 "((E,F),((G,H)),(I,J));", 215 "((Q,R),(S,T),(U,V));", 216 "(X,Y);", 217 ): 218 tree = Phylo.read(StringIO(small_nwk), "newick") 219 orig_tree_len = tree.total_branch_length() 220 for node in list(tree.find_clades()): 221 tree.root_with_outgroup(node) 222 self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) 223 224 def test_root_at_midpoint(self): 225 """Tree.root_at_midpoint: reroot at the tree's midpoint.""" 226 for treefname, fmt in [ 227 (EX_APAF, "phyloxml"), 228 (EX_BCL2, "phyloxml"), 229 (EX_NEWICK, "newick"), 230 ]: 231 tree = Phylo.read(treefname, fmt) 232 orig_tree_len = tree.total_branch_length() 233 # Total branch length does not change 234 tree.root_at_midpoint() 235 self.assertAlmostEqual(orig_tree_len, tree.total_branch_length()) 236 # Root is bifurcating 237 self.assertEqual(len(tree.root.clades), 2) 238 # Deepest tips under each child of the root are equally deep 239 deep_dist_0 = max(tree.clade[0].depths().values()) 240 deep_dist_1 = max(tree.clade[1].depths().values()) 241 self.assertAlmostEqual(deep_dist_0, deep_dist_1) 242 243 # Magic method 244 def test_str(self): 245 """Tree.__str__: pretty-print to a string. 246 247 NB: The exact line counts are liable to change if the object 248 constructors change. 249 """ 250 for source, count in zip((EX_APAF, EX_BCL2, EX_DIST), (386, 747, 15)): 251 tree = Phylo.read(source, "phyloxml") 252 output = str(tree) 253 self.assertEqual(len(output.splitlines()), count) 254 255 256class MixinTests(unittest.TestCase): 257 """Tests for TreeMixin methods.""" 258 259 def setUp(self): 260 self.phylogenies = list(Phylo.parse(EX_PHYLO, "phyloxml")) 261 262 # Traversal methods 263 264 def test_find_elements(self): 265 """TreeMixin: find_elements() method.""" 266 # From the docstring example 267 tree = self.phylogenies[5] 268 matches = list(tree.find_elements(PhyloXML.Taxonomy, code="OCTVU")) 269 self.assertEqual(len(matches), 1) 270 self.assertIsInstance(matches[0], PhyloXML.Taxonomy) 271 self.assertEqual(matches[0].code, "OCTVU") 272 self.assertEqual(matches[0].scientific_name, "Octopus vulgaris") 273 # Iteration and regexps 274 tree = self.phylogenies[10] 275 for point, alt in zip( 276 tree.find_elements(geodetic_datum=r"WGS\d{2}"), (472, 10, 452) 277 ): 278 self.assertIsInstance(point, PhyloXML.Point) 279 self.assertEqual(point.geodetic_datum, "WGS84") 280 self.assertAlmostEqual(point.alt, alt) 281 # class filter 282 tree = self.phylogenies[4] 283 events = list(tree.find_elements(PhyloXML.Events)) 284 self.assertEqual(len(events), 2) 285 self.assertEqual(events[0].speciations, 1) 286 self.assertEqual(events[1].duplications, 1) 287 # string filter & find_any 288 tree = self.phylogenies[3] 289 taxonomy = tree.find_any("B. subtilis") 290 self.assertEqual(taxonomy.scientific_name, "B. subtilis") 291 # integer filter 292 tree = Phylo.read(EX_APAF, "phyloxml") 293 domains = list(tree.find_elements(start=5)) 294 self.assertEqual(len(domains), 8) 295 for dom in domains: 296 self.assertEqual(dom.start, 5) 297 self.assertEqual(dom.value, "CARD") 298 299 def test_find_clades(self): 300 """TreeMixin: find_clades() method.""" 301 # boolean filter 302 for clade, name in zip( 303 self.phylogenies[10].find_clades(name=True), list("ABCD") 304 ): 305 self.assertIsInstance(clade, PhyloXML.Clade) 306 self.assertEqual(clade.name, name) 307 # finding deeper attributes 308 octo = list(self.phylogenies[5].find_clades(code="OCTVU")) 309 self.assertEqual(len(octo), 1) 310 self.assertIsInstance(octo[0], PhyloXML.Clade) 311 self.assertEqual(octo[0].taxonomies[0].code, "OCTVU") 312 # string filter 313 dee = next(self.phylogenies[10].find_clades("D")) 314 self.assertEqual(dee.name, "D") 315 316 def test_find_terminal(self): 317 """TreeMixin: find_elements() with terminal argument.""" 318 for tree, total, extern, intern in zip( 319 self.phylogenies, 320 (6, 6, 7, 18, 21, 27, 7, 9, 9, 19, 15, 9, 6), 321 (3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3), 322 (3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), 323 ): 324 self.assertEqual(len(list(tree.find_elements())), total) 325 self.assertEqual(len(list(tree.find_elements(terminal=True))), extern) 326 self.assertEqual(len(list(tree.find_elements(terminal=False))), intern) 327 328 def test_get_path(self): 329 """TreeMixin: get_path() method.""" 330 path = self.phylogenies[1].get_path("B") 331 self.assertEqual(len(path), 2) 332 self.assertAlmostEqual(path[0].branch_length, 0.06) 333 self.assertAlmostEqual(path[1].branch_length, 0.23) 334 self.assertEqual(path[1].name, "B") 335 336 def test_trace(self): 337 """TreeMixin: trace() method.""" 338 tree = self.phylogenies[1] 339 path = tree.trace("A", "C") 340 self.assertEqual(len(path), 3) 341 self.assertAlmostEqual(path[0].branch_length, 0.06) 342 self.assertAlmostEqual(path[2].branch_length, 0.4) 343 self.assertEqual(path[2].name, "C") 344 345 # Information methods 346 347 def test_common_ancestor(self): 348 """TreeMixin: common_ancestor() method.""" 349 tree = self.phylogenies[1] 350 lca = tree.common_ancestor("A", "B") 351 self.assertEqual(lca, tree.clade[0]) 352 lca = tree.common_ancestor("A", "C") 353 self.assertEqual(lca, tree.clade) 354 tree = self.phylogenies[10] 355 lca = tree.common_ancestor("A", "B", "C") 356 self.assertEqual(lca, tree.clade[0]) 357 358 def test_depths(self): 359 """TreeMixin: depths() method.""" 360 tree = self.phylogenies[1] 361 depths = tree.depths() 362 self.assertEqual(len(depths), 5) 363 for found, expect in zip( 364 sorted(depths.values()), [0, 0.060, 0.162, 0.290, 0.400] 365 ): 366 self.assertAlmostEqual(found, expect) 367 368 def test_distance(self): 369 """TreeMixin: distance() method.""" 370 t = self.phylogenies[1] 371 self.assertAlmostEqual(t.distance("A"), 0.162) 372 self.assertAlmostEqual(t.distance("B"), 0.29) 373 self.assertAlmostEqual(t.distance("C"), 0.4) 374 self.assertAlmostEqual(t.distance("A", "B"), 0.332) 375 self.assertAlmostEqual(t.distance("A", "C"), 0.562) 376 self.assertAlmostEqual(t.distance("B", "C"), 0.69) 377 378 def test_is_bifurcating(self): 379 """TreeMixin: is_bifurcating() method.""" 380 for tree, is_b in zip( 381 self.phylogenies, (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1) 382 ): 383 self.assertEqual(tree.is_bifurcating(), is_b) 384 385 def test_is_monophyletic(self): 386 """TreeMixin: is_monophyletic() method.""" 387 tree = self.phylogenies[10] 388 abcd = tree.get_terminals() 389 abc = tree.clade[0].get_terminals() 390 ab = abc[:2] 391 d = tree.clade[1].get_terminals() 392 self.assertEqual(tree.is_monophyletic(abcd), tree.root) 393 self.assertEqual(tree.is_monophyletic(abc), tree.clade[0]) 394 self.assertFalse(tree.is_monophyletic(ab)) 395 self.assertEqual(tree.is_monophyletic(d), tree.clade[1]) 396 # Alternate argument form 397 self.assertEqual(tree.is_monophyletic(*abcd), tree.root) 398 399 def test_total_branch_length(self): 400 """TreeMixin: total_branch_length() method.""" 401 tree = self.phylogenies[1] 402 self.assertAlmostEqual(tree.total_branch_length(), 0.792) 403 self.assertAlmostEqual(tree.clade[0].total_branch_length(), 0.392) 404 405 # Tree manipulation methods 406 407 def test_collapse(self): 408 """TreeMixin: collapse() method.""" 409 tree = self.phylogenies[1] 410 parent = tree.collapse(tree.clade[0]) 411 self.assertEqual(len(parent), 3) 412 for clade, name, blength in zip(parent, ("C", "A", "B"), (0.4, 0.162, 0.29)): 413 self.assertEqual(clade.name, name) 414 self.assertAlmostEqual(clade.branch_length, blength) 415 416 def test_collapse_all(self): 417 """TreeMixin: collapse_all() method.""" 418 tree = Phylo.read(EX_APAF, "phyloxml") 419 d1 = tree.depths() 420 tree.collapse_all() 421 d2 = tree.depths() 422 # Total branch lengths should not change 423 for clade in d2: 424 self.assertAlmostEqual(d1[clade], d2[clade]) 425 # No internal nodes should remain except the root 426 self.assertEqual(len(tree.get_terminals()), len(tree.clade)) 427 self.assertEqual(len(list(tree.find_clades(terminal=False))), 1) 428 # Again, with a target specification 429 tree = Phylo.read(EX_APAF, "phyloxml") 430 d1 = tree.depths() 431 internal_node_ct = len(tree.get_nonterminals()) 432 tree.collapse_all(lambda c: c.branch_length < 0.1) # noqa: E731 433 d2 = tree.depths() 434 # Should have collapsed 7 internal nodes 435 self.assertEqual(len(tree.get_nonterminals()), internal_node_ct - 7) 436 for clade in d2: 437 self.assertAlmostEqual(d1[clade], d2[clade]) 438 439 def test_ladderize(self): 440 """TreeMixin: ladderize() method.""" 441 442 def ordered_names(tree): 443 return [n.name for n in tree.get_terminals()] 444 445 tree = self.phylogenies[10] 446 self.assertEqual(ordered_names(tree), list("ABCD")) 447 tree.ladderize() 448 self.assertEqual(ordered_names(tree), list("DABC")) 449 tree.ladderize(reverse=True) 450 self.assertEqual(ordered_names(tree), list("ABCD")) 451 452 def test_prune(self): 453 """TreeMixin: prune() method.""" 454 tree = self.phylogenies[10] 455 # Taxon in a trifurcation -- no collapse afterward 456 parent = tree.prune(name="B") 457 self.assertEqual(len(parent.clades), 2) 458 self.assertEqual(parent.clades[0].name, "A") 459 self.assertEqual(parent.clades[1].name, "C") 460 self.assertEqual(len(tree.get_terminals()), 3) 461 self.assertEqual(len(tree.get_nonterminals()), 2) 462 # Taxon in a bifurcation -- collapse 463 tree = self.phylogenies[0] 464 parent = tree.prune(name="A") 465 self.assertEqual(len(parent.clades), 2) 466 for clade, name, blen in zip(parent, "BC", (0.29, 0.4)): 467 self.assertTrue(clade.is_terminal()) 468 self.assertEqual(clade.name, name) 469 self.assertAlmostEqual(clade.branch_length, blen) 470 self.assertEqual(len(tree.get_terminals()), 2) 471 self.assertEqual(len(tree.get_nonterminals()), 1) 472 # Taxon just below the root -- don't screw up 473 tree = self.phylogenies[1] 474 parent = tree.prune(name="C") 475 self.assertEqual(parent, tree.root) 476 self.assertEqual(len(parent.clades), 2) 477 for clade, name, blen in zip(parent, "AB", (0.102, 0.23)): 478 self.assertTrue(clade.is_terminal()) 479 self.assertEqual(clade.name, name) 480 self.assertAlmostEqual(clade.branch_length, blen) 481 self.assertEqual(len(tree.get_terminals()), 2) 482 self.assertEqual(len(tree.get_nonterminals()), 1) 483 484 def test_split(self): 485 """TreeMixin: split() method.""" 486 tree = self.phylogenies[0] 487 C = tree.clade[1] 488 C.split() 489 self.assertEqual(len(C), 2) 490 self.assertEqual(len(tree.get_terminals()), 4) 491 self.assertEqual(len(tree.get_nonterminals()), 3) 492 C[0].split(3, 0.5) 493 self.assertEqual(len(tree.get_terminals()), 6) 494 self.assertEqual(len(tree.get_nonterminals()), 4) 495 for clade, name, blen in zip(C[0], ("C00", "C01", "C02"), (0.5, 0.5, 0.5)): 496 self.assertTrue(clade.is_terminal()) 497 self.assertEqual(clade.name, name) 498 self.assertEqual(clade.branch_length, blen) 499 500 501# --------------------------------------------------------- 502 503if __name__ == "__main__": 504 runner = unittest.TextTestRunner(verbosity=2) 505 unittest.main(testRunner=runner) 506