1from __future__ import absolute_import 2from __future__ import print_function 3import unittest 4 5from .. import PhyloTree, SeqGroup 6from .datasets import * 7 8class Test_phylo_module(unittest.TestCase): 9 10 # ALL TESTS USE THIS EXAMPLE TREE 11 # 12 # /-Dme_001 13 # /--------| 14 # | \-Dme_002 15 # | 16 # | /-Cfa_001 17 # | /--------| 18 # | | \-Mms_001 19 # | | 20 #---------| | /-Hsa_001 21 # | | /--------| 22 # | /--------| /--------| \-Hsa_003 23 # | | | | | 24 # | | | /--------| \-Ptr_001 25 # | | | | | 26 # | | | | \-Mmu_001 27 # | | \--------| 28 # \--------| | /-Hsa_004 29 # | | /--------| 30 # | \--------| \-Ptr_004 31 # | | 32 # | \-Mmu_004 33 # | 34 # | /-Ptr_002 35 # \--------| 36 # | /-Hsa_002 37 # \--------| 38 # \-Mmu_002 39 40 41 def test_link_alignmets(self): 42 """ Phylotree can be linked to SeqGroup objects""" 43 fasta = """ 44 >seqA 45 MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH 46 >seqB 47 MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH 48 >seqC 49 MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH 50 >seqD 51 MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH 52 """ 53 # Caution with iphylip string. blank spaces in the beginning are important 54 iphylip = """ 55 4 76 56 seqA MAEIPDETIQ QFMALT---H NIAVQYLSEF GDLNEALNSY YASQTDDIKD RREEAHQFMA 57 seqB MAEIPDATIQ QFMALTNVSH NIAVQY--EF GDLNEALNSY YAYQTDDQKD RREEAHQFMA 58 seqC MAEIPDATIQ ---ALTNVSH NIAVQYLSEF GDLNEALNSY YASQTDDQPD RREEAHQFMA 59 seqD MAEAPDETIQ QFMALTNVSH NIAVQYLSEF GDLNEAL--- ---------- -REEAHQ--- 60 61 LTNVSHQFMA LTNVSH 62 LTNVSH---- ------ 63 LTNVSH---- ------ 64 -------FMA LTNVSH 65 """ 66 67 # Loads a tree and link it to an alignment. As usual, 'alignment' can be 68 # the path to a file or the data themselves in text string format 69 70 alg1 = SeqGroup(fasta) 71 alg2 = SeqGroup(iphylip, format="iphylip") 72 73 t = PhyloTree("(((seqA,seqB),seqC),seqD);", alignment=fasta, alg_format="fasta") 74 75 for l in t.get_leaves(): 76 self.assertEqual(l.sequence, alg1.get_seq(l.name)) 77 78 # The associated alignment can be changed at any time 79 t.link_to_alignment(alignment=alg2, alg_format="iphylip") 80 81 for l in t.get_leaves(): 82 self.assertEqual(l.sequence, alg2.get_seq(l.name)) 83 84 def test_get_sp_overlap_on_all_descendants(self): 85 """ Tests ortholgy prediction using the sp overlap""" 86 # Creates a gene phylogeny with several duplication events at 87 # different levels. 88 t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));') 89 90 # Scans the tree using the species overlap algorithm and detect all 91 # speciation and duplication events 92 events = t.get_descendant_evol_events() 93 94 # Check that all duplications are detected 95 dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004") 96 self.assertEqual(dup1.evoltype, "D") 97 98 dup2 = t.get_common_ancestor("Dme_001", "Dme_002") 99 self.assertEqual(dup2.evoltype, "D") 100 101 dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002") 102 self.assertEqual(dup3.evoltype, "D") 103 104 dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003") 105 self.assertEqual(dup4.evoltype, "D") 106 107 108 # All other nodes should be speciation 109 for node in t.traverse(): 110 if not node.is_leaf() and \ 111 node not in set([dup1, dup2, dup3, dup4]): 112 self.assertEqual(node.evoltype, "S") 113 114 # Check events 115 for e in events: 116 self.assertEqual(e.node.evoltype, e.etype) 117 118 # Check orthology/paralogy prediction 119 orthologs = set() 120 for e in events: 121 if e.node == dup1: 122 self.assertEqual(e.inparalogs, set(['Ptr_001', 'Hsa_001', 'Mmu_001', 'Hsa_003'])) 123 self.assertEqual(e.outparalogs, set(['Mmu_004', 'Ptr_004', 'Hsa_004'])) 124 self.assertEqual(e.orthologs, set()) 125 self.assertEqual(e.outparalogs, e.out_seqs) 126 self.assertEqual(e.inparalogs, e.in_seqs) 127 elif e.node == dup2: 128 self.assertEqual(e.inparalogs, set(['Dme_001'])) 129 self.assertEqual(e.outparalogs, set(['Dme_002'])) 130 self.assertEqual(e.orthologs, set()) 131 self.assertEqual(e.outparalogs, e.out_seqs) 132 self.assertEqual(e.inparalogs, e.in_seqs) 133 elif e.node == dup3: 134 self.assertEqual(e.inparalogs, set(['Hsa_003', 'Cfa_001', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001'])) 135 self.assertEqual(e.outparalogs, set(['Hsa_002', 'Ptr_002', 'Mmu_002'])) 136 self.assertEqual(e.orthologs, set()) 137 self.assertEqual(e.outparalogs, e.out_seqs) 138 self.assertEqual(e.inparalogs, e.in_seqs) 139 elif e.node == dup4: 140 self.assertEqual(e.inparalogs, set(['Hsa_001'])) 141 self.assertEqual(e.outparalogs, set(['Hsa_003'])) 142 self.assertEqual(e.orthologs, set()) 143 self.assertEqual(e.outparalogs, e.out_seqs) 144 self.assertEqual(e.inparalogs, e.in_seqs) 145 else: 146 147 key1 = list(e.inparalogs) 148 key2 = list(e.orthologs) 149 key1.sort() 150 key2.sort() 151 orthologs.add(tuple(sorted([tuple(key1), tuple(key2)]))) 152 153 orthologies = [ 154 [set(['Dme_001', 'Dme_002']), set(['Ptr_001', 'Cfa_001', 'Hsa_002', 'Hsa_003', 'Ptr_002', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001', 'Mmu_002'])], 155 [set(['Mms_001', 'Cfa_001']), set(['Hsa_003', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001'])], 156 [set(['Ptr_002']), set(['Hsa_002', 'Mmu_002'])], 157 [set(['Cfa_001']), set(['Mms_001'])], 158 [set(['Hsa_002']), set(['Mmu_002'])], 159 [set(['Hsa_003', 'Hsa_001', 'Ptr_001']), set(['Mmu_001'])], 160 [set(['Ptr_004', 'Hsa_004']), set(['Mmu_004'])], 161 [set(['Hsa_003', 'Hsa_001']), set(['Ptr_001'])], 162 [set(['Hsa_004']), set(['Ptr_004'])] 163 ] 164 expected_orthologs = set() 165 for l1,l2 in orthologies: 166 key1 = list(l1) 167 key2 = list(l2) 168 key1.sort() 169 key2.sort() 170 expected_orthologs.add(tuple(sorted([tuple(key1), tuple(key2)]))) 171 172 # Are all orthologies as expected 173 self.assertEqual(expected_orthologs, orthologs) 174 175 # Test different sos_thr 176 t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));') 177 seed = (t & 'SP1_a') 178 events = t.get_descendant_evol_events(0.1) 179 self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'D') 180 self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D') 181 182 t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));') 183 seed = (t & 'SP1_a') 184 events = t.get_descendant_evol_events(0.5) 185 self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S') 186 self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D') 187 188 t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));') 189 seed = (t & 'SP1_a') 190 events = seed.get_my_evol_events(0.75) 191 self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S') 192 self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'S') 193 194 def test_get_sp_overlap_on_a_seed(self): 195 """ Tests ortholgy prediction using sp overlap""" 196 # Creates a gene phylogeny with several duplication events at 197 # different levels. 198 t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));') 199 200 # Scans the tree using the species overlap algorithm 201 seed = t.search_nodes(name="Hsa_001")[0] 202 events = seed.get_my_evol_events() 203 204 # Check that duplications are detected 205 dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004") 206 #print(dup1) 207 self.assertEqual(dup1.evoltype, "D") 208 209 # This duplication is not in the seed path 210 dup2 = t.get_common_ancestor("Dme_001", "Dme_002") 211 self.assertTrue(not hasattr(dup2, "evoltype")) 212 213 dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002") 214 self.assertEqual(dup3.evoltype, "D") 215 216 dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003") 217 self.assertEqual(dup4.evoltype, "D") 218 219 # All other nodes should be speciation 220 node = seed 221 while node: 222 if not node.is_leaf() and \ 223 node not in set([dup1, dup2, dup3, dup4]): 224 self.assertEqual(node.evoltype, "S") 225 node = node.up 226 227 # Check events 228 for e in events: 229 self.assertEqual(e.node.evoltype, e.etype) 230 231 # Check orthology/paralogy prediction 232 orthologs = set() 233 for e in events: 234 if e.node == dup1: 235 self.assertEqual(e.inparalogs, set(['Hsa_001', 'Hsa_003'])) 236 self.assertEqual(e.outparalogs, set(['Hsa_004'])) 237 self.assertEqual(e.orthologs, set()) 238 self.assertEqual(e.in_seqs, set(['Ptr_001', 'Hsa_001', 'Mmu_001', 'Hsa_003'])) 239 self.assertEqual(e.out_seqs, set(['Mmu_004', 'Ptr_004', 'Hsa_004'])) 240 elif e.node == dup3: 241 self.assertEqual(e.inparalogs, set(['Hsa_003', 'Hsa_001', 'Hsa_004' ])) 242 self.assertEqual(e.outparalogs, set(['Hsa_002'])) 243 self.assertEqual(e.orthologs, set()) 244 self.assertEqual(e.in_seqs, set(['Hsa_003', 'Cfa_001', 'Ptr_001', 'Hsa_001', 'Ptr_004', 'Hsa_004', 'Mmu_004', 'Mmu_001', 'Mms_001'])) 245 self.assertEqual(e.out_seqs, set(['Hsa_002', 'Ptr_002', 'Mmu_002'])) 246 elif e.node == dup4: 247 self.assertEqual(e.inparalogs, set(['Hsa_001'])) 248 self.assertEqual(e.outparalogs, set(['Hsa_003'])) 249 self.assertEqual(e.orthologs, set()) 250 self.assertEqual(e.in_seqs, set(['Hsa_001'])) 251 self.assertEqual(e.out_seqs, set(['Hsa_003'])) 252 else: 253 254 key1 = list(e.inparalogs) 255 key2 = list(e.orthologs) 256 key1.sort() 257 key2.sort() 258 orthologs.add(tuple(sorted([tuple(key1), tuple(key2)]))) 259 260 261 orthologies = [ 262 [set(['Dme_001', 'Dme_002']), set([ 'Hsa_002', 'Hsa_003', 'Hsa_001', 'Hsa_004' ])], 263 [set(['Mms_001', 'Cfa_001']), set(['Hsa_003', 'Hsa_001', 'Hsa_004'])], 264 [set(['Hsa_003', 'Hsa_001']), set(['Mmu_001'])], 265 [set(['Hsa_003', 'Hsa_001']), set(['Ptr_001'])], 266 ] 267 expected_orthologs = set() 268 for l1,l2 in orthologies: 269 key1 = list(l1) 270 key2 = list(l2) 271 key1.sort() 272 key2.sort() 273 expected_orthologs.add(tuple(sorted([tuple(key1), tuple(key2)]))) 274 275 # Are all orthologies as expected 276 self.assertEqual(expected_orthologs, orthologs) 277 278 # Test different sos_thr 279 t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));') 280 seed = (t & 'SP1_a') 281 events = seed.get_my_evol_events(0.1) 282 self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'D') 283 self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D') 284 285 t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));') 286 seed = (t & 'SP1_a') 287 events = seed.get_my_evol_events(0.50) 288 self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S') 289 self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'D') 290 291 t = PhyloTree('(((SP1_a, SP2_a), (SP3_a, SP1_b)), (SP1_c, SP2_c));') 292 seed = (t & 'SP1_a') 293 events = seed.get_my_evol_events(0.75) 294 self.assertEqual(t.get_common_ancestor(seed, 'SP3_a').evoltype, 'S') 295 self.assertEqual(t.get_common_ancestor(seed, 'SP1_c').evoltype, 'S') 296 297 def test_reconciliation(self): 298 """ Tests ortholgy prediction based on the species reconciliation method""" 299 gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));' 300 species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);" 301 302 genetree = PhyloTree(gene_tree_nw) 303 sptree = PhyloTree(species_tree_nw) 304 305 recon_tree, events = genetree.reconcile(sptree) 306 307 # Check that reconcilied tree nodes have the correct lables: 308 # gene loss, duplication, etc. 309 expected_recon = "((Dme_001:1,Dme_002:1)1:1[&&NHX:evoltype=D],(((Cfa_001:1,Mms_001:1)1:1[&&NHX:evoltype=S],((Hsa_001:1,Ptr_001:1)1:1[&&NHX:evoltype=S],Mmu_001:1)1:1[&&NHX:evoltype=S])1:1[&&NHX:evoltype=S],((Mms:1[&&NHX:evoltype=L],Cfa:1[&&NHX:evoltype=L])1:1[&&NHX:evoltype=L],(((Hsa:1[&&NHX:evoltype=L],Ptr_002:1)1:1[&&NHX:evoltype=L],Mmu:1[&&NHX:evoltype=L])1:1[&&NHX:evoltype=L],((Ptr:1[&&NHX:evoltype=L],Hsa_002:1)1:1[&&NHX:evoltype=L],Mmu_002:1)1:1[&&NHX:evoltype=S])1:1[&&NHX:evoltype=D])1:1[&&NHX:evoltype=L])1:1[&&NHX:evoltype=D])[&&NHX:evoltype=S];" 310 311 self.assertEqual(recon_tree.write(["evoltype"], format=9), PhyloTree(expected_recon).write(features=["evoltype"],format=9)) 312 313 def test_miscelaneus(self): 314 """ Test several things """ 315 # Creates a gene phylogeny with several duplication events at 316 # different levels. 317 t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_003),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));') 318 319 # Create a dictionary with relative ages for the species present in 320 # the phylogenetic tree. Note that ages are only relative numbers to 321 # define which species are older, and that different species can 322 # belong to the same age. 323 sp2age = { 324 'Hsa': 1, # Homo sapiens (Hominids) 325 'Ptr': 2, # P. troglodytes (primates) 326 'Mmu': 2, # Macaca mulata (primates) 327 'Mms': 3, # Mus musculus (mammals) 328 'Cfa': 3, # Canis familiaris (mammals) 329 'Dme': 4 # Drosophila melanogaster (metazoa) 330 } 331 332 333 # Check that dup ages are correct 334 dup1 = t.get_common_ancestor("Hsa_001", "Hsa_004") 335 self.assertEqual(dup1.get_age(sp2age), 2) 336 dup2 = t.get_common_ancestor("Dme_001", "Dme_002") 337 self.assertEqual(dup2.get_age(sp2age), 4) 338 dup3 = t.get_common_ancestor("Hsa_001", "Hsa_002") 339 self.assertEqual(dup3.get_age(sp2age), 3) 340 dup4 = t.get_common_ancestor("Hsa_001", "Hsa_003") 341 self.assertEqual(dup4.get_age(sp2age), 1) 342 343 # Check rooting options 344 expected_root = t.search_nodes(name="Dme_002")[0] 345 expected_root.dist += 2.3 346 self.assertEqual(t.get_farthest_oldest_leaf(sp2age), expected_root) 347 #print t 348 #print t.get_farthest_oldest_node(sp2age) 349 350 351 # Check get species functions 352 self.assertEqual(t.get_species(), set(sp2age.keys())) 353 self.assertEqual(set([sp for sp in t.iter_species()]), set(sp2age.keys())) 354 355 def test_colappse(self): 356 t = PhyloTree('((Dme_001,Dme_002),(((Cfa_001,Mms_001),((((Hsa_001,Hsa_001),Ptr_001),Mmu_001),((Hsa_004,Ptr_004),Mmu_004))),(Ptr_002,(Hsa_002,Mmu_002))));') 357 collapsed_hsa = '((Dme_001:1,Dme_002:1)1:1,(((Cfa_001:1,Mms_001:1)1:1,(((Ptr_001:1,Hsa_001:1)1:1,Mmu_001:1)1:1,((Hsa_004:1,Ptr_004:1)1:1,Mmu_004:1)1:1)1:1)1:1,(Ptr_002:1,(Hsa_002:1,Mmu_002:1)1:1)1:1)1:1);' 358 t2 = t.collapse_lineage_specific_expansions(['Hsa']) 359 self.assertEqual(str(collapsed_hsa), str(t2.write())) 360 with self.assertRaises(TypeError): 361 print(t.collapse_lineage_specific_expansions('Hsa')) 362 363 364if __name__ == '__main__': 365 unittest.main() 366