1from __future__ import absolute_import 2# #START_LICENSE########################################################### 3# 4# 5# This file is part of the Environment for Tree Exploration program 6# (ETE). http://etetoolkit.org 7# 8# ETE is free software: you can redistribute it and/or modify it 9# under the terms of the GNU General Public License as published by 10# the Free Software Foundation, either version 3 of the License, or 11# (at your option) any later version. 12# 13# ETE is distributed in the hope that it will be useful, but WITHOUT 14# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16# License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with ETE. If not, see <http://www.gnu.org/licenses/>. 20# 21# 22# ABOUT THE ETE PACKAGE 23# ===================== 24# 25# ETE is distributed under the GPL copyleft license (2008-2015). 26# 27# If you make use of ETE in published work, please cite: 28# 29# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon. 30# ETE: a python Environment for Tree Exploration. Jaime BMC 31# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24 32# 33# Note that extra references to the specific methods implemented in 34# the toolkit may be available in the documentation. 35# 36# More info at http://etetoolkit.org. Contact: huerta@embl.de 37# 38# 39# #END_LICENSE############################################################# 40 41import copy 42from .evolevents import EvolEvent 43 44 45def get_reconciled_tree(node, sptree, events): 46 """ Returns the recoliation gene tree with a provided species 47 topology """ 48 49 if len(node.children) == 2: 50 # First visit childs 51 morphed_childs = [] 52 for ch in node.children: 53 mc, ev = get_reconciled_tree(ch, sptree, events) 54 morphed_childs.append(mc) 55 56 # morphed childs are the reconciled children. I trust its 57 # topology. Remember tree is visited on recursive post-order 58 sp_child_0 = morphed_childs[0].get_species() 59 sp_child_1 = morphed_childs[1].get_species() 60 all_species = sp_child_1 | sp_child_0 61 62 # If childs represents a duplication (duplicated species) 63 # Check that both are reconciliated to the same species 64 if len(sp_child_0 & sp_child_1) > 0: 65 newnode = copy.deepcopy(node) 66 newnode.up = None 67 newnode.children = [] 68 template = _get_expected_topology(sptree, all_species) 69 # replaces child0 partition on the template 70 newmorphed0, matchnode = _replace_on_template(template, morphed_childs[0]) 71 # replaces child1 partition on the template 72 newmorphed1, matchnode = _replace_on_template(template, morphed_childs[1]) 73 newnode.add_child(newmorphed0) 74 newnode.add_child(newmorphed1) 75 newnode.add_feature("evoltype", "D") 76 node.add_feature("evoltype", "D") 77 e = EvolEvent() 78 e.etype = "D" 79 e.inparalogs = node.children[0].get_leaf_names() 80 e.outparalogs = node.children[1].get_leaf_names() 81 e.in_seqs = node.children[0].get_leaf_names() 82 e.out_seqs = node.children[1].get_leaf_names() 83 events.append(e) 84 return newnode, events 85 86 # Otherwise, we need to reconciliate species at both sides 87 # into a single partition. 88 else: 89 # gets the topology expected by the observed species 90 template = _get_expected_topology(sptree, all_species) 91 # replaces child0 partition on the template 92 template, matchnode = _replace_on_template(template, morphed_childs[0] ) 93 # replaces child1 partition on the template 94 template, matchnode = _replace_on_template(template, morphed_childs[1]) 95 template.add_feature("evoltype","S") 96 node.add_feature("evoltype","S") 97 e = EvolEvent() 98 e.etype = "S" 99 e.inparalogs = node.children[0].get_leaf_names() 100 e.orthologs = node.children[1].get_leaf_names() 101 e.in_seqs = node.children[0].get_leaf_names() 102 e.out_seqs = node.children[1].get_leaf_names() 103 events.append(e) 104 return template, events 105 elif len(node.children)==0: 106 return copy.deepcopy(node), events 107 else: 108 raise ValueError("Algorithm can only work with binary trees.") 109 110def _replace_on_template(orig_template, node): 111 template = copy.deepcopy(orig_template) 112 # detects partition within topo that matchs child1 species 113 nodespcs = node.get_species() 114 spseed = list(nodespcs)[0] # any sp name woulbe ok 115 # Set an start point 116 subtopo = template.search_nodes(children=[], name=spseed)[0] 117 # While subtopo does not cover all child species 118 while len(nodespcs - set(subtopo.get_leaf_names() ) )>0: 119 subtopo= subtopo.up 120 # Puts original partition on the expected topology template 121 nodecp = copy.deepcopy(node) 122 if subtopo.up is None: 123 return nodecp, nodecp 124 else: 125 parent = subtopo.up 126 parent.remove_child(subtopo) 127 parent.add_child(nodecp) 128 return template, nodecp 129 130def _get_expected_topology(t, species): 131 missing_sp = set(species) - set(t.get_leaf_names()) 132 if missing_sp: 133 raise KeyError("* The following species are not contained in the species tree: "+ ','.join(missing_sp) ) 134 135 node = t.search_nodes(children=[], name=list(species)[0])[0] 136 137 sps = set(species) 138 while sps-set(node.get_leaf_names()) != set([]): 139 node = node.up 140 template = copy.deepcopy(node) 141 # make get_species() to work 142 #template._speciesFunction = _get_species_on_TOL 143 template.set_species_naming_function(_get_species_on_TOL) 144 template.detach() 145 for n in [template]+template.get_descendants(): 146 n.add_feature("evoltype","L") 147 n.dist = 1 148 return template 149 150def _get_species_on_TOL(name): 151 return name 152 153def get_reconciled_tree_zmasek(gtree, sptree, inplace=False): 154 """ 155 Reconciles the gene tree with the species tree 156 using Zmasek and Eddy's algorithm. Details can be 157 found in the paper: 158 159 Christian M. Zmasek, Sean R. Eddy: A simple algorithm 160 to infer gene duplication and speciation events on a 161 gene tree. Bioinformatics 17(9): 821-828 (2001) 162 163 :argument gtree: gene tree (PhyloTree instance) 164 165 :argument sptree: species tree (PhyloTree instance) 166 167 :argument False inplace: if True, the provided gene tree instance is 168 modified. Otherwise a reconciled copy of the gene tree is returned. 169 170 :returns: reconciled gene tree 171 """ 172 # some cleanup operations 173 def cleanup(tree): 174 for node in tree.traverse(): node.del_feature("M") 175 176 if not inplace: 177 gtree = gtree.copy('deepcopy') 178 179 # check for missing species 180 missing_sp = gtree.get_species() - sptree.get_species() 181 if missing_sp: 182 raise KeyError("* The following species are not contained in the species tree: "+ ', '.join(missing_sp)) 183 184 # initialization 185 sp2node = dict() 186 for node in sptree.get_leaves(): sp2node[node.species] = node 187 188 # set/compute the mapping function M(g) for the 189 # leaf nodes in the gene tree (see paper for details) 190 species = sptree.get_species() 191 for node in gtree.get_leaves(): 192 node.add_feature("M",sp2node[node.species]) 193 194 # visit each internal node in the gene tree 195 # and detect its event (duplication or speciation) 196 for node in gtree.traverse(strategy="postorder"): 197 if len(node.children) == 0: 198 continue # nothing to do for leaf nodes 199 200 if len(node.children) != 2: 201 cleanup(gtree) 202 raise ValueError("Algorithm can only work with binary trees.") 203 204 lca = node.children[0].M.get_common_ancestor(node.children[1].M) # LCA in the species tree 205 node.add_feature("M",lca) 206 207 node.add_feature("evoltype","S") 208 if id(node.children[0].M) == id(node.M) or id(node.children[1].M) == id(node.M): 209 node.evoltype = "D" 210 211 cleanup(gtree) 212 return gtree 213 214