1# Copyright (C) 2013 by Yanbo Ye (yeyanbo289@gmail.com) 2# This code is part of the Biopython distribution and governed by its 3# license. Please see the LICENSE file that should have been included 4# as part of this package. 5 6"""Unit tests for the Bio.Phylo.TreeConstruction module.""" 7 8import os 9import unittest 10import tempfile 11 12from io import StringIO 13from Bio import AlignIO 14from Bio import Phylo 15from Bio.Phylo import BaseTree 16from Bio.Phylo import TreeConstruction 17from Bio.Phylo import Consensus 18from Bio.Phylo.TreeConstruction import _Matrix 19from Bio.Phylo.TreeConstruction import DistanceMatrix 20from Bio.Phylo.TreeConstruction import DistanceCalculator 21from Bio.Phylo.TreeConstruction import DistanceTreeConstructor 22from Bio.Phylo.TreeConstruction import ParsimonyScorer 23from Bio.Phylo.TreeConstruction import NNITreeSearcher 24from Bio.Phylo.TreeConstruction import ParsimonyTreeConstructor 25 26 27temp_dir = tempfile.mkdtemp() 28 29 30class DistanceMatrixTest(unittest.TestCase): 31 """Test for DistanceMatrix construction and manipulation.""" 32 33 def setUp(self): 34 self.names = ["Alpha", "Beta", "Gamma", "Delta"] 35 self.matrix = [[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]] 36 37 def test_good_construction(self): 38 dm = DistanceMatrix(self.names, self.matrix) 39 self.assertIsInstance(dm, TreeConstruction.DistanceMatrix) 40 self.assertEqual(dm.names[0], "Alpha") 41 self.assertEqual(dm.matrix[2][1], 3) 42 self.assertEqual(len(dm), 4) 43 self.assertEqual( 44 repr(dm), 45 "DistanceMatrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], " 46 "matrix=[[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]])", 47 ) 48 49 def test_bad_construction(self): 50 self.assertRaises( 51 TypeError, 52 DistanceMatrix, 53 ["Alpha", 100, "Gamma", "Delta"], 54 [[0], [0.1, 0], [0.2, 0.3, 0], [0.4, 0.5, 0.6, 0]], 55 ) 56 self.assertRaises( 57 TypeError, 58 DistanceMatrix, 59 ["Alpha", "Beta", "Gamma", "Delta"], 60 [[0], ["a"], [0.2, 0.3], [0.4, 0.5, 0.6]], 61 ) 62 self.assertRaises( 63 ValueError, 64 DistanceMatrix, 65 ["Alpha", "Alpha", "Gamma", "Delta"], 66 [[0], [0.1], [0.2, 0.3], [0.4, 0.5, 0.6]], 67 ) 68 self.assertRaises( 69 ValueError, 70 DistanceMatrix, 71 ["Alpha", "Beta", "Gamma", "Delta"], 72 [[0], [0.2, 0], [0.4, 0.5, 0.6]], 73 ) 74 self.assertRaises( 75 ValueError, 76 DistanceMatrix, 77 ["Alpha", "Beta", "Gamma", "Delta"], 78 [[0], [0.1], [0.2, 0.3, 0.4], [0.4, 0.5, 0.6]], 79 ) 80 81 def test_good_manipulation(self): 82 dm = DistanceMatrix(self.names, self.matrix) 83 # getitem 84 self.assertEqual(dm[1], [1, 0, 3, 5]) 85 self.assertEqual(dm[2, 1], 3) 86 self.assertEqual(dm[2][1], 3) 87 self.assertEqual(dm[1, 2], 3) 88 self.assertEqual(dm[1][2], 3) 89 self.assertEqual(dm["Alpha"], [0, 1, 2, 4]) 90 self.assertEqual(dm["Gamma", "Delta"], 6) 91 # setitem 92 dm["Alpha"] = [0, 10, 20, 40] 93 self.assertEqual(dm["Alpha"], [0, 10, 20, 40]) 94 # delitem insert item 95 del dm[1] 96 self.assertEqual(dm.names, ["Alpha", "Gamma", "Delta"]) 97 self.assertEqual(dm.matrix, [[0], [20, 0], [40, 6, 0]]) 98 dm.insert("Beta", [1, 0, 3, 5], 1) 99 self.assertEqual(dm.names, self.names) 100 self.assertEqual(dm.matrix, [[0], [1, 0], [20, 3, 0], [40, 5, 6, 0]]) 101 del dm["Alpha"] 102 self.assertEqual(dm.names, ["Beta", "Gamma", "Delta"]) 103 self.assertEqual(dm.matrix, [[0], [3, 0], [5, 6, 0]]) 104 dm.insert("Alpha", [1, 2, 4, 0]) 105 self.assertEqual(dm.names, ["Beta", "Gamma", "Delta", "Alpha"]) 106 self.assertEqual(dm.matrix, [[0], [3, 0], [5, 6, 0], [1, 2, 4, 0]]) 107 108 def test_bad_manipulation(self): 109 dm = DistanceMatrix(self.names, self.matrix) 110 # getitem 111 self.assertRaises(ValueError, dm.__getitem__, "A") 112 self.assertRaises(ValueError, dm.__getitem__, ("Alpha", "A")) 113 self.assertRaises(TypeError, dm.__getitem__, (1, "A")) 114 self.assertRaises(TypeError, dm.__getitem__, (1, 1.2)) 115 self.assertRaises(IndexError, dm.__getitem__, 6) 116 self.assertRaises(IndexError, dm.__getitem__, (10, 10)) 117 # setitem: item or index test 118 self.assertRaises(ValueError, dm.__setitem__, "A", [1, 3, 4]) 119 self.assertRaises(ValueError, dm.__setitem__, ("Alpha", "A"), 4) 120 self.assertRaises(TypeError, dm.__setitem__, (1, "A"), 3) 121 self.assertRaises(TypeError, dm.__setitem__, (1, 1.2), 2) 122 self.assertRaises(IndexError, dm.__setitem__, 6, [1, 3, 4]) 123 self.assertRaises(IndexError, dm.__setitem__, (10, 10), 1) 124 # setitem: value test 125 self.assertRaises(ValueError, dm.__setitem__, 0, [1, 2]) 126 self.assertRaises(TypeError, dm.__setitem__, ("Alpha", "Beta"), "a") 127 self.assertRaises(TypeError, dm.__setitem__, "Alpha", ["a", "b", "c"]) 128 129 def test_format_phylip(self): 130 dm = DistanceMatrix(self.names, self.matrix) 131 handle = StringIO() 132 dm.format_phylip(handle) 133 lines = handle.getvalue().splitlines() 134 self.assertEqual(len(lines), len(dm) + 1) 135 self.assertTrue(lines[0].endswith(str(len(dm)))) 136 for name, line in zip(self.names, lines[1:]): 137 self.assertTrue(line.startswith(name)) 138 139 140class DistanceCalculatorTest(unittest.TestCase): 141 """Test DistanceCalculator.""" 142 143 def test_known_matrices(self): 144 aln = AlignIO.read("TreeConstruction/msa.phy", "phylip") 145 146 calculator = DistanceCalculator("identity") 147 dm = calculator.get_distance(aln) 148 self.assertEqual(dm["Alpha", "Beta"], 1 - (10 * 1.0 / 13)) 149 150 calculator = DistanceCalculator("blastn") 151 dm = calculator.get_distance(aln) 152 self.assertEqual(dm["Alpha", "Beta"], 1 - (38 * 1.0 / 65)) 153 154 calculator = DistanceCalculator("trans") 155 dm = calculator.get_distance(aln) 156 self.assertEqual(dm["Alpha", "Beta"], 1 - (54 * 1.0 / 65)) 157 158 calculator = DistanceCalculator("blosum62") 159 dm = calculator.get_distance(aln) 160 self.assertEqual(dm["Alpha", "Beta"], 1 - (53 * 1.0 / 84)) 161 162 def test_nonmatching_seqs(self): 163 aln = AlignIO.read(StringIO(">Alpha\nA-A--\n>Gamma\n-Y-Y-"), "fasta") 164 # With a proper scoring matrix -- no matches 165 dmat = DistanceCalculator("blosum62").get_distance(aln) 166 self.assertEqual(dmat["Alpha", "Alpha"], 0.0) 167 self.assertEqual(dmat["Alpha", "Gamma"], 1.0) 168 # Comparing characters only -- 4 misses, 1 match 169 dmat = DistanceCalculator().get_distance(aln) 170 self.assertEqual(dmat["Alpha", "Alpha"], 0.0) 171 self.assertAlmostEqual(dmat["Alpha", "Gamma"], 4.0 / 5.0) 172 173 174class DistanceTreeConstructorTest(unittest.TestCase): 175 """Test DistanceTreeConstructor.""" 176 177 def setUp(self): 178 self.aln = AlignIO.read("TreeConstruction/msa.phy", "phylip") 179 calculator = DistanceCalculator("blosum62") 180 self.dm = calculator.get_distance(self.aln) 181 self.constructor = DistanceTreeConstructor(calculator) 182 183 def test_upgma(self): 184 tree = self.constructor.upgma(self.dm) 185 self.assertIsInstance(tree, BaseTree.Tree) 186 # tree_file = StringIO() 187 # Phylo.write(tree, tree_file, 'newick') 188 ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick") 189 self.assertTrue(Consensus._equal_topology(tree, ref_tree)) 190 # ref_tree.close() 191 192 def test_nj(self): 193 tree = self.constructor.nj(self.dm) 194 self.assertIsInstance(tree, BaseTree.Tree) 195 # tree_file = StringIO() 196 # Phylo.write(tree, tree_file, 'newick') 197 ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick") 198 self.assertTrue(Consensus._equal_topology(tree, ref_tree)) 199 # ref_tree.close() 200 201 # create a matrix of length 2 202 calculator = DistanceCalculator("blosum62") 203 self.min_dm = calculator.get_distance(self.aln) 204 for i in range(len(self.min_dm) - 2): 205 del self.min_dm[len(self.min_dm) - 1] 206 207 min_tree = self.constructor.nj(self.min_dm) 208 self.assertIsInstance(min_tree, BaseTree.Tree) 209 210 ref_min_tree = Phylo.read("./TreeConstruction/nj_min.tre", "newick") 211 self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree)) 212 213 def test_built_tree(self): 214 tree = self.constructor.build_tree(self.aln) 215 self.assertIsInstance(tree, BaseTree.Tree) 216 # tree_file = StringIO() 217 # Phylo.write(tree, tree_file, 'newick') 218 ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick") 219 self.assertTrue(Consensus._equal_topology(tree, ref_tree)) 220 # ref_tree.close() 221 222 223class ParsimonyScorerTest(unittest.TestCase): 224 """Test ParsimonyScorer.""" 225 226 def test_get_score(self): 227 aln = AlignIO.read("TreeConstruction/msa.phy", "phylip") 228 tree = Phylo.read("./TreeConstruction/upgma.tre", "newick") 229 scorer = ParsimonyScorer() 230 score = scorer.get_score(tree, aln) 231 self.assertEqual(score, 2 + 1 + 2 + 2 + 1 + 1 + 1 + 3) 232 233 alphabet = ["A", "T", "C", "G"] 234 step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]] 235 matrix = _Matrix(alphabet, step_matrix) 236 scorer = ParsimonyScorer(matrix) 237 score = scorer.get_score(tree, aln) 238 self.assertEqual(score, 3.5 + 2.5 + 3.5 + 3.5 + 2.5 + 1 + 2.5 + 4.5) 239 240 alphabet = [ 241 "A", 242 "C", 243 "D", 244 "E", 245 "F", 246 "G", 247 "H", 248 "I", 249 "K", 250 "L", 251 "M", 252 "N", 253 "P", 254 "Q", 255 "R", 256 "1", 257 "2", 258 "T", 259 "V", 260 "W", 261 "Y", 262 "*", 263 "-", 264 ] 265 step_matrix = [ 266 [0], 267 [2, 0], 268 [1, 2, 0], 269 [1, 2, 1, 0], 270 [2, 1, 2, 2, 0], 271 [1, 1, 1, 1, 2, 0], 272 [2, 2, 1, 2, 2, 2, 0], 273 [2, 2, 2, 2, 1, 2, 2, 0], 274 [2, 2, 2, 1, 2, 2, 2, 1, 0], 275 [2, 2, 2, 2, 1, 2, 1, 1, 2, 0], 276 [2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0], 277 [2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 0], 278 [1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0], 279 [2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 0], 280 [2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0], 281 [1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0], 282 [2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 0], 283 [1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0], 284 [1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0], 285 [2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 2, 0], 286 [2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 3, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0], 287 [2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 0], 288 [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0], 289 ] 290 291 matrix = _Matrix(alphabet, step_matrix) 292 scorer = ParsimonyScorer(matrix) 293 score = scorer.get_score(tree, aln) 294 self.assertEqual(score, 3 + 1 + 3 + 3 + 2 + 1 + 2 + 5) 295 296 297class NNITreeSearcherTest(unittest.TestCase): 298 """Test NNITreeSearcher.""" 299 300 def test_get_neighbors(self): 301 tree = Phylo.read("./TreeConstruction/upgma.tre", "newick") 302 alphabet = ["A", "T", "C", "G"] 303 step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]] 304 matrix = _Matrix(alphabet, step_matrix) 305 scorer = ParsimonyScorer(matrix) 306 searcher = NNITreeSearcher(scorer) 307 trees = searcher._get_neighbors(tree) 308 self.assertEqual(len(trees), 2 * (5 - 3)) 309 Phylo.write(trees, os.path.join(temp_dir, "neighbor_trees.tre"), "newick") 310 311 312class ParsimonyTreeConstructorTest(unittest.TestCase): 313 """Test ParsimonyTreeConstructor.""" 314 315 def test_build_tree(self): 316 aln = AlignIO.read("TreeConstruction/msa.phy", "phylip") 317 tree1 = Phylo.read("./TreeConstruction/upgma.tre", "newick") 318 tree2 = Phylo.read("./TreeConstruction/nj.tre", "newick") 319 alphabet = ["A", "T", "C", "G"] 320 step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]] 321 matrix = _Matrix(alphabet, step_matrix) 322 scorer = ParsimonyScorer(matrix) 323 searcher = NNITreeSearcher(scorer) 324 constructor = ParsimonyTreeConstructor(searcher, tree1) 325 best_tree = constructor.build_tree(aln) 326 Phylo.write(best_tree, os.path.join(temp_dir, "pars1.tre"), "newick") 327 constructor.starting_tree = tree2 328 best_tree = constructor.build_tree(aln) 329 Phylo.write(best_tree, os.path.join(temp_dir, "pars2.tre"), "newick") 330 constructor.starting_tree = None 331 best_tree = constructor.build_tree(aln) 332 Phylo.write(best_tree, os.path.join(temp_dir, "pars3.tre"), "newick") 333 334 335if __name__ == "__main__": 336 runner = unittest.TextTestRunner(verbosity=2) 337 unittest.main(testRunner=runner) 338