1from __future__ import absolute_import
2from __future__ import print_function
3import unittest
4
5from .. import PhyloTree, SeqGroup
6from .datasets import *
7
8class Test_phylo_module(unittest.TestCase):
9
10    # ALL TESTS USE THIS EXAMPLE TREE
11    #
12    #                    /-Dme_001
13    #          /--------|
14    #         |          \-Dme_002
15    #         |
16    #         |                              /-Cfa_001
17    #         |                    /--------|
18    #         |                   |          \-Mms_001
19    #         |                   |
20    #---------|                   |                                        /-Hsa_001
21    #         |                   |                              /--------|
22    #         |          /--------|                    /--------|          \-Hsa_003
23    #         |         |         |                   |         |
24    #         |         |         |          /--------|          \-Ptr_001
25    #         |         |         |         |         |
26    #         |         |         |         |          \-Mmu_001
27    #         |         |          \--------|
28    #          \--------|                   |                    /-Hsa_004
29    #                   |                   |          /--------|
30    #                   |                    \--------|          \-Ptr_004
31    #                   |                             |
32    #                   |                              \-Mmu_004
33    #                   |
34    #                   |          /-Ptr_002
35    #                    \--------|
36    #                             |          /-Hsa_002
37    #                              \--------|
38    #                                        \-Mmu_002
39
40
41    def test_link_alignmets(self):
42        """ Phylotree can be linked to SeqGroup objects"""
43        fasta = """
44         >seqA
45         MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
46         >seqB
47         MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
48         >seqC
49         MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
50         >seqD
51         MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
52        """
53        # Caution with iphylip string. blank spaces in the beginning are important
54        iphylip = """
55         4 76
56      seqA   MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA
57      seqB   MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA
58      seqC   MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA
59      seqD   MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ---
60
61             LTNVSHQFMA LTNVSH
62             LTNVSH---- ------
63             LTNVSH---- ------
64             -------FMA LTNVSH
65        """
66
67        # Loads a tree and link it to an alignment. As usual, 'alignment' can be
68        # the path to a file or the data themselves in text string format
69
70        alg1 = SeqGroup(fasta)
71        alg2 = SeqGroup(iphylip, format="iphylip")
72
73        t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta, alg_format="fasta")
74
75        for l in t.get_leaves():
76            self.assertEqual(l.sequence, alg1.get_seq(l.name))
77
78        # The associated alignment can be changed at any time
79        t.link_to_alignment(alignment=alg2, alg_format="iphylip")
80
81        for l in t.get_leaves():
82            self.assertEqual(l.sequence, alg2.get_seq(l.name))
83
84    def test_get_sp_overlap_on_all_descendants(self):
85        """ Tests ortholgy prediction using the sp overlap"""
86        # Creates a gene phylogeny with several duplication events at
87        # different levels.
88        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')
89
90        # Scans the tree using the species overlap algorithm and detect all
91        # speciation and duplication events
92        events = t.get_descendant_evol_events()
93
94        # Check that all duplications are detected
95        dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
96        self.assertEqual(dup1.evoltype, "D")
97
98        dup2 = t.get_common_ancestor("Dme_001", "Dme_002")
99        self.assertEqual(dup2.evoltype, "D")
100
101        dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002")
102        self.assertEqual(dup3.evoltype, "D")
103
104        dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003")
105        self.assertEqual(dup4.evoltype, "D")
106
107
108        # All other nodes should be speciation
109        for node in t.traverse():
110            if not node.is_leaf() and \
111                   node not in set([dup1, dup2, dup3, dup4]):
112                self.assertEqual(node.evoltype, "S")
113
114        # Check events
115        for e in events:
116            self.assertEqual(e.node.evoltype, e.etype)
117
118        # Check orthology/paralogy prediction
119        orthologs = set()
120        for e in events:
121            if e.node == dup1:
122                self.assertEqual(e.inparalogs, set(['Ptr_001', 'Hsa_001', 'Mmu_001', 'Hsa_003']))
123                self.assertEqual(e.outparalogs, set(['Mmu_004', 'Ptr_004', 'Hsa_004']))
124                self.assertEqual(e.orthologs, set())
125                self.assertEqual(e.outparalogs, e.out_seqs)
126                self.assertEqual(e.inparalogs, e.in_seqs)
127            elif e.node == dup2:
128                self.assertEqual(e.inparalogs, set(['Dme_001']))
129                self.assertEqual(e.outparalogs, set(['Dme_002']))
130                self.assertEqual(e.orthologs, set())
131                self.assertEqual(e.outparalogs, e.out_seqs)
132                self.assertEqual(e.inparalogs, e.in_seqs)
133            elif e.node == dup3:
134                self.assertEqual(e.inparalogs, set(['Hsa_003', 'Cfa_001', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001']))
135                self.assertEqual(e.outparalogs, set(['Hsa_002', 'Ptr_002', 'Mmu_002']))
136                self.assertEqual(e.orthologs, set())
137                self.assertEqual(e.outparalogs, e.out_seqs)
138                self.assertEqual(e.inparalogs, e.in_seqs)
139            elif e.node == dup4:
140                self.assertEqual(e.inparalogs, set(['Hsa_001']))
141                self.assertEqual(e.outparalogs, set(['Hsa_003']))
142                self.assertEqual(e.orthologs, set())
143                self.assertEqual(e.outparalogs, e.out_seqs)
144                self.assertEqual(e.inparalogs, e.in_seqs)
145            else:
146
147                key1 = list(e.inparalogs)
148                key2 = list(e.orthologs)
149                key1.sort()
150                key2.sort()
151                orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))
152
153        orthologies = [
154            [set(['Dme_001', 'Dme_002']), set(['Ptr_001', 'Cfa_001', 'Hsa_002', 'Hsa_003', 'Ptr_002', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001', 'Mmu_002'])],
155            [set(['Mms_001', 'Cfa_001']), set(['Hsa_003', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001'])],
156            [set(['Ptr_002']), set(['Hsa_002', 'Mmu_002'])],
157            [set(['Cfa_001']), set(['Mms_001'])],
158            [set(['Hsa_002']), set(['Mmu_002'])],
159            [set(['Hsa_003', 'Hsa_001', 'Ptr_001']), set(['Mmu_001'])],
160            [set(['Ptr_004', 'Hsa_004']), set(['Mmu_004'])],
161            [set(['Hsa_003', 'Hsa_001']), set(['Ptr_001'])],
162            [set(['Hsa_004']), set(['Ptr_004'])]
163            ]
164        expected_orthologs = set()
165        for l1,l2 in orthologies:
166            key1 = list(l1)
167            key2 = list(l2)
168            key1.sort()
169            key2.sort()
170            expected_orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))
171
172        # Are all orthologies as expected
173        self.assertEqual(expected_orthologs, orthologs)
174
175        # Test different sos_thr
176        t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));')
177        seed = (t & 'SP1_a')
178        events = t.get_descendant_evol_events(0.1)
179        self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'D')
180        self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D')
181
182        t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));')
183        seed = (t & 'SP1_a')
184        events = t.get_descendant_evol_events(0.5)
185        self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S')
186        self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D')
187
188        t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));')
189        seed = (t & 'SP1_a')
190        events = seed.get_my_evol_events(0.75)
191        self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S')
192        self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'S')
193
194    def test_get_sp_overlap_on_a_seed(self):
195        """ Tests ortholgy prediction using sp overlap"""
196        # Creates a gene phylogeny with several duplication events at
197        # different levels.
198        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')
199
200        # Scans the tree using the species overlap algorithm
201        seed = t.search_nodes(name="Hsa_001")[0]
202        events = seed.get_my_evol_events()
203
204        # Check that duplications are detected
205        dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
206        #print(dup1)
207        self.assertEqual(dup1.evoltype, "D")
208
209        # This duplication is not in the seed path
210        dup2 = t.get_common_ancestor("Dme_001", "Dme_002")
211        self.assertTrue(not hasattr(dup2, "evoltype"))
212
213        dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002")
214        self.assertEqual(dup3.evoltype, "D")
215
216        dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003")
217        self.assertEqual(dup4.evoltype, "D")
218
219        # All other nodes should be speciation
220        node = seed
221        while node:
222            if not node.is_leaf() and \
223                   node not in set([dup1, dup2, dup3, dup4]):
224                self.assertEqual(node.evoltype, "S")
225            node = node.up
226
227        # Check events
228        for e in events:
229            self.assertEqual(e.node.evoltype, e.etype)
230
231        # Check orthology/paralogy prediction
232        orthologs = set()
233        for e in events:
234            if e.node == dup1:
235                self.assertEqual(e.inparalogs, set(['Hsa_001', 'Hsa_003']))
236                self.assertEqual(e.outparalogs, set(['Hsa_004']))
237                self.assertEqual(e.orthologs, set())
238                self.assertEqual(e.in_seqs, set(['Ptr_001', 'Hsa_001', 'Mmu_001', 'Hsa_003']))
239                self.assertEqual(e.out_seqs, set(['Mmu_004', 'Ptr_004', 'Hsa_004']))
240            elif e.node == dup3:
241                self.assertEqual(e.inparalogs, set(['Hsa_003', 'Hsa_001',  'Hsa_004' ]))
242                self.assertEqual(e.outparalogs, set(['Hsa_002']))
243                self.assertEqual(e.orthologs, set())
244                self.assertEqual(e.in_seqs, set(['Hsa_003', 'Cfa_001', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001']))
245                self.assertEqual(e.out_seqs, set(['Hsa_002', 'Ptr_002', 'Mmu_002']))
246            elif e.node == dup4:
247                self.assertEqual(e.inparalogs, set(['Hsa_001']))
248                self.assertEqual(e.outparalogs, set(['Hsa_003']))
249                self.assertEqual(e.orthologs, set())
250                self.assertEqual(e.in_seqs, set(['Hsa_001']))
251                self.assertEqual(e.out_seqs, set(['Hsa_003']))
252            else:
253
254                key1 = list(e.inparalogs)
255                key2 = list(e.orthologs)
256                key1.sort()
257                key2.sort()
258                orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))
259
260
261        orthologies = [
262            [set(['Dme_001', 'Dme_002']), set([ 'Hsa_002', 'Hsa_003', 'Hsa_001',  'Hsa_004' ])],
263            [set(['Mms_001', 'Cfa_001']), set(['Hsa_003',  'Hsa_001', 'Hsa_004'])],
264            [set(['Hsa_003', 'Hsa_001']), set(['Mmu_001'])],
265            [set(['Hsa_003', 'Hsa_001']), set(['Ptr_001'])],
266            ]
267        expected_orthologs = set()
268        for l1,l2 in orthologies:
269            key1 = list(l1)
270            key2 = list(l2)
271            key1.sort()
272            key2.sort()
273            expected_orthologs.add(tuple(sorted([tuple(key1), tuple(key2)])))
274
275        # Are all orthologies as expected
276        self.assertEqual(expected_orthologs, orthologs)
277
278        # Test different sos_thr
279        t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));')
280        seed = (t & 'SP1_a')
281        events = seed.get_my_evol_events(0.1)
282        self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'D')
283        self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D')
284
285        t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));')
286        seed = (t & 'SP1_a')
287        events = seed.get_my_evol_events(0.50)
288        self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S')
289        self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D')
290
291        t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));')
292        seed = (t & 'SP1_a')
293        events = seed.get_my_evol_events(0.75)
294        self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S')
295        self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'S')
296
297    def test_reconciliation(self):
298        """ Tests ortholgy prediction based on the species reconciliation method"""
299        gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
300        species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"
301
302        genetree = PhyloTree(gene_tree_nw)
303        sptree = PhyloTree(species_tree_nw)
304
305        recon_tree, events = genetree.reconcile(sptree)
306
307        # Check that reconcilied tree nodes have the correct lables:
308        # gene loss, duplication, etc.
309        expected_recon = "((Dme_001:1,Dme_002:1)1:1[&&NHX:evoltype=D],(((Cfa_001:1,Mms_001:1)1:1[&&NHX:evoltype=S],((Hsa_001:1,Ptr_001:1)1:1[&&NHX:evoltype=S],Mmu_001:1)1:1[&&NHX:evoltype=S])1:1[&&NHX:evoltype=S],((Mms:1[&&NHX:evoltype=L],Cfa:1[&&NHX:evoltype=L])1:1[&&NHX:evoltype=L],(((Hsa:1[&&NHX:evoltype=L],Ptr_002:1)1:1[&&NHX:evoltype=L],Mmu:1[&&NHX:evoltype=L])1:1[&&NHX:evoltype=L],((Ptr:1[&&NHX:evoltype=L],Hsa_002:1)1:1[&&NHX:evoltype=L],Mmu_002:1)1:1[&&NHX:evoltype=S])1:1[&&NHX:evoltype=D])1:1[&&NHX:evoltype=L])1:1[&&NHX:evoltype=D])[&&NHX:evoltype=S];"
310
311        self.assertEqual(recon_tree.write(["evoltype"], format=9), PhyloTree(expected_recon).write(features=["evoltype"],format=9))
312
313    def test_miscelaneus(self):
314        """ Test several things """
315        # Creates a gene phylogeny with several duplication events at
316        # different levels.
317        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')
318
319        # Create a dictionary with relative ages for the species present in
320        # the phylogenetic tree.  Note that ages are only relative numbers to
321        # define which species are older, and that different species can
322        # belong to the same age.
323        sp2age = {
324          'Hsa': 1, # Homo sapiens (Hominids)
325          'Ptr': 2, # P. troglodytes (primates)
326          'Mmu': 2, # Macaca mulata (primates)
327          'Mms': 3, # Mus musculus (mammals)
328          'Cfa': 3, # Canis familiaris (mammals)
329          'Dme': 4  # Drosophila melanogaster (metazoa)
330        }
331
332
333        # Check that dup ages are correct
334        dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
335        self.assertEqual(dup1.get_age(sp2age), 2)
336        dup2 = t.get_common_ancestor("Dme_001", "Dme_002")
337        self.assertEqual(dup2.get_age(sp2age), 4)
338        dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002")
339        self.assertEqual(dup3.get_age(sp2age), 3)
340        dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003")
341        self.assertEqual(dup4.get_age(sp2age), 1)
342
343        # Check rooting options
344        expected_root = t.search_nodes(name="Dme_002")[0]
345        expected_root.dist += 2.3
346        self.assertEqual(t.get_farthest_oldest_leaf(sp2age), expected_root)
347        #print t
348        #print t.get_farthest_oldest_node(sp2age)
349
350
351        # Check get species functions
352        self.assertEqual(t.get_species(), set(sp2age.keys()))
353        self.assertEqual(set([sp for sp in t.iter_species()]), set(sp2age.keys()))
354
355    def test_colappse(self):
356        t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_001),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));')
357        collapsed_hsa = '((Dme_001:1,Dme_002:1)1:1,(((Cfa_001:1,Mms_001:1)1:1,(((Ptr_001:1,Hsa_001:1)1:1,Mmu_001:1)1:1,((Hsa_004:1,Ptr_004:1)1:1,Mmu_004:1)1:1)1:1)1:1,(Ptr_002:1,(Hsa_002:1,Mmu_002:1)1:1)1:1)1:1);'
358        t2 = t.collapse_lineage_specific_expansions(['Hsa'])
359        self.assertEqual(str(collapsed_hsa), str(t2.write()))
360        with self.assertRaises(TypeError):
361            print(t.collapse_lineage_specific_expansions('Hsa'))
362
363
364if __name__ == '__main__':
365    unittest.main()
366