1# Copyright (C) 2013 by Yanbo Ye (yeyanbo289@gmail.com)
2# This code is part of the Biopython distribution and governed by its
3# license. Please see the LICENSE file that should have been included
4# as part of this package.
5
6"""Unit tests for the Bio.Phylo.TreeConstruction module."""
7
8import os
9import unittest
10import tempfile
11
12from io import StringIO
13from Bio import AlignIO
14from Bio import Phylo
15from Bio.Phylo import BaseTree
16from Bio.Phylo import TreeConstruction
17from Bio.Phylo import Consensus
18from Bio.Phylo.TreeConstruction import _Matrix
19from Bio.Phylo.TreeConstruction import DistanceMatrix
20from Bio.Phylo.TreeConstruction import DistanceCalculator
21from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
22from Bio.Phylo.TreeConstruction import ParsimonyScorer
23from Bio.Phylo.TreeConstruction import NNITreeSearcher
24from Bio.Phylo.TreeConstruction import ParsimonyTreeConstructor
25
26
27temp_dir = tempfile.mkdtemp()
28
29
30class DistanceMatrixTest(unittest.TestCase):
31    """Test for DistanceMatrix construction and manipulation."""
32
33    def setUp(self):
34        self.names = ["Alpha", "Beta", "Gamma", "Delta"]
35        self.matrix = [[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]]
36
37    def test_good_construction(self):
38        dm = DistanceMatrix(self.names, self.matrix)
39        self.assertIsInstance(dm, TreeConstruction.DistanceMatrix)
40        self.assertEqual(dm.names[0], "Alpha")
41        self.assertEqual(dm.matrix[2][1], 3)
42        self.assertEqual(len(dm), 4)
43        self.assertEqual(
44            repr(dm),
45            "DistanceMatrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], "
46            "matrix=[[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]])",
47        )
48
49    def test_bad_construction(self):
50        self.assertRaises(
51            TypeError,
52            DistanceMatrix,
53            ["Alpha", 100, "Gamma", "Delta"],
54            [[0], [0.1, 0], [0.2, 0.3, 0], [0.4, 0.5, 0.6, 0]],
55        )
56        self.assertRaises(
57            TypeError,
58            DistanceMatrix,
59            ["Alpha", "Beta", "Gamma", "Delta"],
60            [[0], ["a"], [0.2, 0.3], [0.4, 0.5, 0.6]],
61        )
62        self.assertRaises(
63            ValueError,
64            DistanceMatrix,
65            ["Alpha", "Alpha", "Gamma", "Delta"],
66            [[0], [0.1], [0.2, 0.3], [0.4, 0.5, 0.6]],
67        )
68        self.assertRaises(
69            ValueError,
70            DistanceMatrix,
71            ["Alpha", "Beta", "Gamma", "Delta"],
72            [[0], [0.2, 0], [0.4, 0.5, 0.6]],
73        )
74        self.assertRaises(
75            ValueError,
76            DistanceMatrix,
77            ["Alpha", "Beta", "Gamma", "Delta"],
78            [[0], [0.1], [0.2, 0.3, 0.4], [0.4, 0.5, 0.6]],
79        )
80
81    def test_good_manipulation(self):
82        dm = DistanceMatrix(self.names, self.matrix)
83        # getitem
84        self.assertEqual(dm[1], [1, 0, 3, 5])
85        self.assertEqual(dm[2, 1], 3)
86        self.assertEqual(dm[2][1], 3)
87        self.assertEqual(dm[1, 2], 3)
88        self.assertEqual(dm[1][2], 3)
89        self.assertEqual(dm["Alpha"], [0, 1, 2, 4])
90        self.assertEqual(dm["Gamma", "Delta"], 6)
91        # setitem
92        dm["Alpha"] = [0, 10, 20, 40]
93        self.assertEqual(dm["Alpha"], [0, 10, 20, 40])
94        # delitem insert item
95        del dm[1]
96        self.assertEqual(dm.names, ["Alpha", "Gamma", "Delta"])
97        self.assertEqual(dm.matrix, [[0], [20, 0], [40, 6, 0]])
98        dm.insert("Beta", [1, 0, 3, 5], 1)
99        self.assertEqual(dm.names, self.names)
100        self.assertEqual(dm.matrix, [[0], [1, 0], [20, 3, 0], [40, 5, 6, 0]])
101        del dm["Alpha"]
102        self.assertEqual(dm.names, ["Beta", "Gamma", "Delta"])
103        self.assertEqual(dm.matrix, [[0], [3, 0], [5, 6, 0]])
104        dm.insert("Alpha", [1, 2, 4, 0])
105        self.assertEqual(dm.names, ["Beta", "Gamma", "Delta", "Alpha"])
106        self.assertEqual(dm.matrix, [[0], [3, 0], [5, 6, 0], [1, 2, 4, 0]])
107
108    def test_bad_manipulation(self):
109        dm = DistanceMatrix(self.names, self.matrix)
110        # getitem
111        self.assertRaises(ValueError, dm.__getitem__, "A")
112        self.assertRaises(ValueError, dm.__getitem__, ("Alpha", "A"))
113        self.assertRaises(TypeError, dm.__getitem__, (1, "A"))
114        self.assertRaises(TypeError, dm.__getitem__, (1, 1.2))
115        self.assertRaises(IndexError, dm.__getitem__, 6)
116        self.assertRaises(IndexError, dm.__getitem__, (10, 10))
117        # setitem: item or index test
118        self.assertRaises(ValueError, dm.__setitem__, "A", [1, 3, 4])
119        self.assertRaises(ValueError, dm.__setitem__, ("Alpha", "A"), 4)
120        self.assertRaises(TypeError, dm.__setitem__, (1, "A"), 3)
121        self.assertRaises(TypeError, dm.__setitem__, (1, 1.2), 2)
122        self.assertRaises(IndexError, dm.__setitem__, 6, [1, 3, 4])
123        self.assertRaises(IndexError, dm.__setitem__, (10, 10), 1)
124        # setitem: value test
125        self.assertRaises(ValueError, dm.__setitem__, 0, [1, 2])
126        self.assertRaises(TypeError, dm.__setitem__, ("Alpha", "Beta"), "a")
127        self.assertRaises(TypeError, dm.__setitem__, "Alpha", ["a", "b", "c"])
128
129    def test_format_phylip(self):
130        dm = DistanceMatrix(self.names, self.matrix)
131        handle = StringIO()
132        dm.format_phylip(handle)
133        lines = handle.getvalue().splitlines()
134        self.assertEqual(len(lines), len(dm) + 1)
135        self.assertTrue(lines[0].endswith(str(len(dm))))
136        for name, line in zip(self.names, lines[1:]):
137            self.assertTrue(line.startswith(name))
138
139
140class DistanceCalculatorTest(unittest.TestCase):
141    """Test DistanceCalculator."""
142
143    def test_known_matrices(self):
144        aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
145
146        calculator = DistanceCalculator("identity")
147        dm = calculator.get_distance(aln)
148        self.assertEqual(dm["Alpha", "Beta"], 1 - (10 * 1.0 / 13))
149
150        calculator = DistanceCalculator("blastn")
151        dm = calculator.get_distance(aln)
152        self.assertEqual(dm["Alpha", "Beta"], 1 - (38 * 1.0 / 65))
153
154        calculator = DistanceCalculator("trans")
155        dm = calculator.get_distance(aln)
156        self.assertEqual(dm["Alpha", "Beta"], 1 - (54 * 1.0 / 65))
157
158        calculator = DistanceCalculator("blosum62")
159        dm = calculator.get_distance(aln)
160        self.assertEqual(dm["Alpha", "Beta"], 1 - (53 * 1.0 / 84))
161
162    def test_nonmatching_seqs(self):
163        aln = AlignIO.read(StringIO(">Alpha\nA-A--\n>Gamma\n-Y-Y-"), "fasta")
164        # With a proper scoring matrix -- no matches
165        dmat = DistanceCalculator("blosum62").get_distance(aln)
166        self.assertEqual(dmat["Alpha", "Alpha"], 0.0)
167        self.assertEqual(dmat["Alpha", "Gamma"], 1.0)
168        # Comparing characters only -- 4 misses, 1 match
169        dmat = DistanceCalculator().get_distance(aln)
170        self.assertEqual(dmat["Alpha", "Alpha"], 0.0)
171        self.assertAlmostEqual(dmat["Alpha", "Gamma"], 4.0 / 5.0)
172
173
174class DistanceTreeConstructorTest(unittest.TestCase):
175    """Test DistanceTreeConstructor."""
176
177    def setUp(self):
178        self.aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
179        calculator = DistanceCalculator("blosum62")
180        self.dm = calculator.get_distance(self.aln)
181        self.constructor = DistanceTreeConstructor(calculator)
182
183    def test_upgma(self):
184        tree = self.constructor.upgma(self.dm)
185        self.assertIsInstance(tree, BaseTree.Tree)
186        # tree_file = StringIO()
187        # Phylo.write(tree, tree_file, 'newick')
188        ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
189        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
190        # ref_tree.close()
191
192    def test_nj(self):
193        tree = self.constructor.nj(self.dm)
194        self.assertIsInstance(tree, BaseTree.Tree)
195        # tree_file = StringIO()
196        # Phylo.write(tree, tree_file, 'newick')
197        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
198        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
199        # ref_tree.close()
200
201        # create a matrix of length 2
202        calculator = DistanceCalculator("blosum62")
203        self.min_dm = calculator.get_distance(self.aln)
204        for i in range(len(self.min_dm) - 2):
205            del self.min_dm[len(self.min_dm) - 1]
206
207        min_tree = self.constructor.nj(self.min_dm)
208        self.assertIsInstance(min_tree, BaseTree.Tree)
209
210        ref_min_tree = Phylo.read("./TreeConstruction/nj_min.tre", "newick")
211        self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))
212
213    def test_built_tree(self):
214        tree = self.constructor.build_tree(self.aln)
215        self.assertIsInstance(tree, BaseTree.Tree)
216        # tree_file = StringIO()
217        # Phylo.write(tree, tree_file, 'newick')
218        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
219        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
220        # ref_tree.close()
221
222
223class ParsimonyScorerTest(unittest.TestCase):
224    """Test ParsimonyScorer."""
225
226    def test_get_score(self):
227        aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
228        tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
229        scorer = ParsimonyScorer()
230        score = scorer.get_score(tree, aln)
231        self.assertEqual(score, 2 + 1 + 2 + 2 + 1 + 1 + 1 + 3)
232
233        alphabet = ["A", "T", "C", "G"]
234        step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]]
235        matrix = _Matrix(alphabet, step_matrix)
236        scorer = ParsimonyScorer(matrix)
237        score = scorer.get_score(tree, aln)
238        self.assertEqual(score, 3.5 + 2.5 + 3.5 + 3.5 + 2.5 + 1 + 2.5 + 4.5)
239
240        alphabet = [
241            "A",
242            "C",
243            "D",
244            "E",
245            "F",
246            "G",
247            "H",
248            "I",
249            "K",
250            "L",
251            "M",
252            "N",
253            "P",
254            "Q",
255            "R",
256            "1",
257            "2",
258            "T",
259            "V",
260            "W",
261            "Y",
262            "*",
263            "-",
264        ]
265        step_matrix = [
266            [0],
267            [2, 0],
268            [1, 2, 0],
269            [1, 2, 1, 0],
270            [2, 1, 2, 2, 0],
271            [1, 1, 1, 1, 2, 0],
272            [2, 2, 1, 2, 2, 2, 0],
273            [2, 2, 2, 2, 1, 2, 2, 0],
274            [2, 2, 2, 1, 2, 2, 2, 1, 0],
275            [2, 2, 2, 2, 1, 2, 1, 1, 2, 0],
276            [2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0],
277            [2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 0],
278            [1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0],
279            [2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 0],
280            [2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0],
281            [1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 0],
282            [2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 0],
283            [1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 0],
284            [1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0],
285            [2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 3, 2, 2, 1, 1, 2, 2, 2, 0],
286            [2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 3, 1, 2, 2, 2, 1, 2, 2, 2, 2, 0],
287            [2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 0],
288            [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0],
289        ]
290
291        matrix = _Matrix(alphabet, step_matrix)
292        scorer = ParsimonyScorer(matrix)
293        score = scorer.get_score(tree, aln)
294        self.assertEqual(score, 3 + 1 + 3 + 3 + 2 + 1 + 2 + 5)
295
296
297class NNITreeSearcherTest(unittest.TestCase):
298    """Test NNITreeSearcher."""
299
300    def test_get_neighbors(self):
301        tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
302        alphabet = ["A", "T", "C", "G"]
303        step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]]
304        matrix = _Matrix(alphabet, step_matrix)
305        scorer = ParsimonyScorer(matrix)
306        searcher = NNITreeSearcher(scorer)
307        trees = searcher._get_neighbors(tree)
308        self.assertEqual(len(trees), 2 * (5 - 3))
309        Phylo.write(trees, os.path.join(temp_dir, "neighbor_trees.tre"), "newick")
310
311
312class ParsimonyTreeConstructorTest(unittest.TestCase):
313    """Test ParsimonyTreeConstructor."""
314
315    def test_build_tree(self):
316        aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
317        tree1 = Phylo.read("./TreeConstruction/upgma.tre", "newick")
318        tree2 = Phylo.read("./TreeConstruction/nj.tre", "newick")
319        alphabet = ["A", "T", "C", "G"]
320        step_matrix = [[0], [2.5, 0], [2.5, 1, 0], [1, 2.5, 2.5, 0]]
321        matrix = _Matrix(alphabet, step_matrix)
322        scorer = ParsimonyScorer(matrix)
323        searcher = NNITreeSearcher(scorer)
324        constructor = ParsimonyTreeConstructor(searcher, tree1)
325        best_tree = constructor.build_tree(aln)
326        Phylo.write(best_tree, os.path.join(temp_dir, "pars1.tre"), "newick")
327        constructor.starting_tree = tree2
328        best_tree = constructor.build_tree(aln)
329        Phylo.write(best_tree, os.path.join(temp_dir, "pars2.tre"), "newick")
330        constructor.starting_tree = None
331        best_tree = constructor.build_tree(aln)
332        Phylo.write(best_tree, os.path.join(temp_dir, "pars3.tre"), "newick")
333
334
335if __name__ == "__main__":
336    runner = unittest.TextTestRunner(verbosity=2)
337    unittest.main(testRunner=runner)
338