1#!/usr/bin/env python
2
3###############################################################################
4#                                                                             #
5#    This program is free software: you can redistribute it and/or modify     #
6#    it under the terms of the GNU General Public License as published by     #
7#    the Free Software Foundation, either version 3 of the License, or        #
8#    (at your option) any later version.                                      #
9#                                                                             #
10#    This program is distributed in the hope that it will be useful,          #
11#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
12#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
13#    GNU General Public License for more details.                             #
14#                                                                             #
15#    You should have received a copy of the GNU General Public License        #
16#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
17#                                                                             #
18###############################################################################
19
20"""
21Useful methods for processing taxonomy strings.
22"""
23
24__author__ = 'Donovan Parks'
25__copyright__ = 'Copyright 2013'
26__credits__ = ['Donovan Parks']
27__license__ = 'GPL3'
28__version__ = '1.0.0'
29__maintainer__ = 'Donovan Parks'
30__email__ = 'donovan.parks@gmail.com'
31__status__ = 'Development'
32
33taxonomicRanks = ['life', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species']
34taxonomicPrefixes = ['l__', 'k__', 'p__', 'c__', 'o__', 'f__', 'g__', 's__']
35
36ranksByLabel = {'life':-1, 'domain': 0, 'phylum': 1, 'class': 2, 'order': 3, 'family': 4, 'genus': 5, 'species': 6, 'gg_id': 7}
37ranksByLevel = {-1: 'life', 0: 'domain', 1: 'phylum', 2: 'class', 3: 'order', 4: 'family', 5: 'genus', 6: 'species', 7: 'gg_id'}
38rankPrefixes = {-1: 'l__', 0: 'k__', 1: 'p__', 2: 'c__', 3: 'o__', 4: 'f__', 5: 'g__', 6: 's__', 7: 'id__'}
39
40
41def appendTaxonomyRanks(taxonomy, ranks=7):
42    """Append taxonomy prefix to list of taxa."""
43    t = []
44    for i in xrange(0, ranks):
45        t.append(rankPrefixes[i] + taxonomy[i])
46
47    return t
48
49
50def readTaxonomy(taxonomyFile):
51    """Read tab-seperated taxonomy file."""
52    taxonIdToTaxonomy = {}
53    for line in open(taxonomyFile):
54        lineSplit = line.split('\t')
55        taxonIdToTaxonomy[lineSplit[0]] = lineSplit[1].rstrip()
56
57    return taxonIdToTaxonomy
58
59
60def __parseTaxon(taxon):
61    if '(' in taxon:
62        taxonSplit = taxon.split('(')
63        taxonId = taxonSplit[0]
64        taxonId = taxonId.strip()
65        bootstrapSupport = int(taxonSplit[1][0:taxonSplit[1].find(')')])
66    else:
67        taxonId = taxon.strip()
68        bootstrapSupport = 0
69
70    return taxonId, bootstrapSupport
71
72
73def LCA(taxonomy1, taxonomy2):
74    """Find lowest-common ancestor between two taxa lists."""
75    lca = []
76    for i in xrange(0, min(len(taxonomy1), len(taxonomy2))):
77        t1, b1 = __parseTaxon(taxonomy1[i])
78        t2, b2 = __parseTaxon(taxonomy2[i])
79
80        if t1 != t2:
81            if 'unmapped' in t1 or 'unmapped' in t2:
82                lca.append(rankPrefixes[i] + 'unmapped')
83            else:
84                lca.append(rankPrefixes[i] + 'unclassified')
85        else:
86            if b1 == 0 and b2 == 0:
87                lca.append(t1)
88            else:
89                lca.append(t1 + '(' + str(min(b1, b2)) + ')')
90
91    return lca
92