1from __future__ import absolute_import
2# #START_LICENSE###########################################################
3#
4#
5# This file is part of the Environment for Tree Exploration program
6# (ETE).  http://etetoolkit.org
7#
8# ETE is free software: you can redistribute it and/or modify it
9# under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# ETE is distributed in the hope that it will be useful, but WITHOUT
14# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16# License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with ETE.  If not, see <http://www.gnu.org/licenses/>.
20#
21#
22#                     ABOUT THE ETE PACKAGE
23#                     =====================
24#
25# ETE is distributed under the GPL copyleft license (2008-2015).
26#
27# If you make use of ETE in published work, please cite:
28#
29# Jaime Huerta-Cepas, Joaquin Dopazo and Toni Gabaldon.
30# ETE: a python Environment for Tree Exploration. Jaime BMC
31# Bioinformatics 2010,:24doi:10.1186/1471-2105-11-24
32#
33# Note that extra references to the specific methods implemented in
34# the toolkit may be available in the documentation.
35#
36# More info at http://etetoolkit.org. Contact: huerta@embl.de
37#
38#
39# #END_LICENSE#############################################################
40
41import copy
42from .evolevents import EvolEvent
43
44
45def get_reconciled_tree(node, sptree, events):
46    """ Returns the recoliation gene tree with a provided species
47    topology """
48
49    if len(node.children) == 2:
50        # First visit childs
51        morphed_childs = []
52        for ch in node.children:
53            mc, ev = get_reconciled_tree(ch, sptree, events)
54            morphed_childs.append(mc)
55
56        # morphed childs are the reconciled children. I trust its
57        # topology. Remember tree is visited on recursive post-order
58        sp_child_0 = morphed_childs[0].get_species()
59        sp_child_1 = morphed_childs[1].get_species()
60        all_species = sp_child_1 | sp_child_0
61
62        # If childs represents a duplication (duplicated species)
63        # Check that both are reconciliated to the same species
64        if len(sp_child_0 & sp_child_1) > 0:
65            newnode = copy.deepcopy(node)
66            newnode.up = None
67            newnode.children = []
68            template = _get_expected_topology(sptree, all_species)
69            # replaces child0 partition on the template
70            newmorphed0, matchnode = _replace_on_template(template, morphed_childs[0])
71            # replaces child1 partition on the template
72            newmorphed1, matchnode = _replace_on_template(template, morphed_childs[1])
73            newnode.add_child(newmorphed0)
74            newnode.add_child(newmorphed1)
75            newnode.add_feature("evoltype", "D")
76            node.add_feature("evoltype", "D")
77            e = EvolEvent()
78            e.etype = "D"
79            e.inparalogs = node.children[0].get_leaf_names()
80            e.outparalogs = node.children[1].get_leaf_names()
81            e.in_seqs  = node.children[0].get_leaf_names()
82            e.out_seqs = node.children[1].get_leaf_names()
83            events.append(e)
84            return newnode, events
85
86        # Otherwise, we need to reconciliate species at both sides
87        # into a single partition.
88        else:
89            # gets the topology expected by the observed species
90            template = _get_expected_topology(sptree, all_species)
91            # replaces child0 partition on the template
92            template, matchnode = _replace_on_template(template, morphed_childs[0] )
93            # replaces child1 partition on the template
94            template, matchnode = _replace_on_template(template, morphed_childs[1])
95            template.add_feature("evoltype","S")
96            node.add_feature("evoltype","S")
97            e = EvolEvent()
98            e.etype = "S"
99            e.inparalogs = node.children[0].get_leaf_names()
100            e.orthologs = node.children[1].get_leaf_names()
101            e.in_seqs  = node.children[0].get_leaf_names()
102            e.out_seqs = node.children[1].get_leaf_names()
103            events.append(e)
104            return template, events
105    elif len(node.children)==0:
106        return copy.deepcopy(node), events
107    else:
108        raise ValueError("Algorithm can only work with binary trees.")
109
110def _replace_on_template(orig_template, node):
111    template = copy.deepcopy(orig_template)
112    # detects partition within topo that matchs child1 species
113    nodespcs = node.get_species()
114    spseed = list(nodespcs)[0]  # any sp name woulbe ok
115    # Set an start point
116    subtopo = template.search_nodes(children=[], name=spseed)[0]
117    # While subtopo does not cover all child species
118    while len(nodespcs - set(subtopo.get_leaf_names() ) )>0:
119        subtopo= subtopo.up
120    # Puts original partition on the expected topology template
121    nodecp = copy.deepcopy(node)
122    if subtopo.up is None:
123        return nodecp, nodecp
124    else:
125        parent = subtopo.up
126        parent.remove_child(subtopo)
127        parent.add_child(nodecp)
128        return template, nodecp
129
130def _get_expected_topology(t, species):
131    missing_sp = set(species) - set(t.get_leaf_names())
132    if missing_sp:
133        raise KeyError("* The following species are not contained in the species tree: "+ ','.join(missing_sp) )
134
135    node = t.search_nodes(children=[], name=list(species)[0])[0]
136
137    sps = set(species)
138    while sps-set(node.get_leaf_names()) != set([]):
139        node = node.up
140    template = copy.deepcopy(node)
141    # make get_species() to work
142    #template._speciesFunction = _get_species_on_TOL
143    template.set_species_naming_function(_get_species_on_TOL)
144    template.detach()
145    for n in [template]+template.get_descendants():
146        n.add_feature("evoltype","L")
147        n.dist = 1
148    return template
149
150def _get_species_on_TOL(name):
151    return name
152
153def get_reconciled_tree_zmasek(gtree, sptree, inplace=False):
154    """
155    Reconciles the gene tree with the species tree
156    using Zmasek and Eddy's algorithm. Details can be
157    found in the paper:
158
159    Christian M. Zmasek, Sean R. Eddy: A simple algorithm
160    to infer gene duplication and speciation events on a
161    gene tree. Bioinformatics 17(9): 821-828 (2001)
162
163    :argument gtree: gene tree (PhyloTree instance)
164
165    :argument sptree: species tree (PhyloTree instance)
166
167    :argument False inplace: if True, the provided gene tree instance is
168       modified. Otherwise a reconciled copy of the gene tree is returned.
169
170    :returns: reconciled gene tree
171    """
172    # some cleanup operations
173    def cleanup(tree):
174        for node in tree.traverse(): node.del_feature("M")
175
176    if not inplace:
177        gtree = gtree.copy('deepcopy')
178
179    # check for missing species
180    missing_sp = gtree.get_species() - sptree.get_species()
181    if missing_sp:
182        raise KeyError("* The following species are not contained in the species tree: "+ ', '.join(missing_sp))
183
184    # initialization
185    sp2node = dict()
186    for node in sptree.get_leaves(): sp2node[node.species] = node
187
188    # set/compute the mapping function M(g) for the
189    # leaf nodes in the gene tree (see paper for details)
190    species = sptree.get_species()
191    for node in gtree.get_leaves():
192        node.add_feature("M",sp2node[node.species])
193
194    # visit each internal node in the gene tree
195    # and detect its event (duplication or speciation)
196    for node in gtree.traverse(strategy="postorder"):
197        if len(node.children) == 0:
198            continue # nothing to do for leaf nodes
199
200        if len(node.children) != 2:
201            cleanup(gtree)
202            raise ValueError("Algorithm can only work with binary trees.")
203
204        lca = node.children[0].M.get_common_ancestor(node.children[1].M) # LCA in the species tree
205        node.add_feature("M",lca)
206
207        node.add_feature("evoltype","S")
208        if id(node.children[0].M) == id(node.M) or id(node.children[1].M) == id(node.M):
209                node.evoltype = "D"
210
211    cleanup(gtree)
212    return gtree
213
214