1#!/usr/bin/env python 2 3############################################################################### 4# # 5# This program is free software: you can redistribute it and/or modify # 6# it under the terms of the GNU General Public License as published by # 7# the Free Software Foundation, either version 3 of the License, or # 8# (at your option) any later version. # 9# # 10# This program is distributed in the hope that it will be useful, # 11# but WITHOUT ANY WARRANTY; without even the implied warranty of # 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13# GNU General Public License for more details. # 14# # 15# You should have received a copy of the GNU General Public License # 16# along with this program. If not, see <http://www.gnu.org/licenses/>. # 17# # 18############################################################################### 19 20""" 21Useful methods for processing taxonomy strings. 22""" 23 24__author__ = 'Donovan Parks' 25__copyright__ = 'Copyright 2013' 26__credits__ = ['Donovan Parks'] 27__license__ = 'GPL3' 28__version__ = '1.0.0' 29__maintainer__ = 'Donovan Parks' 30__email__ = 'donovan.parks@gmail.com' 31__status__ = 'Development' 32 33taxonomicRanks = ['life', 'domain', 'phylum', 'class', 'order', 'family', 'genus', 'species'] 34taxonomicPrefixes = ['l__', 'k__', 'p__', 'c__', 'o__', 'f__', 'g__', 's__'] 35 36ranksByLabel = {'life':-1, 'domain': 0, 'phylum': 1, 'class': 2, 'order': 3, 'family': 4, 'genus': 5, 'species': 6, 'gg_id': 7} 37ranksByLevel = {-1: 'life', 0: 'domain', 1: 'phylum', 2: 'class', 3: 'order', 4: 'family', 5: 'genus', 6: 'species', 7: 'gg_id'} 38rankPrefixes = {-1: 'l__', 0: 'k__', 1: 'p__', 2: 'c__', 3: 'o__', 4: 'f__', 5: 'g__', 6: 's__', 7: 'id__'} 39 40 41def appendTaxonomyRanks(taxonomy, ranks=7): 42 """Append taxonomy prefix to list of taxa.""" 43 t = [] 44 for i in xrange(0, ranks): 45 t.append(rankPrefixes[i] + taxonomy[i]) 46 47 return t 48 49 50def readTaxonomy(taxonomyFile): 51 """Read tab-seperated taxonomy file.""" 52 taxonIdToTaxonomy = {} 53 for line in open(taxonomyFile): 54 lineSplit = line.split('\t') 55 taxonIdToTaxonomy[lineSplit[0]] = lineSplit[1].rstrip() 56 57 return taxonIdToTaxonomy 58 59 60def __parseTaxon(taxon): 61 if '(' in taxon: 62 taxonSplit = taxon.split('(') 63 taxonId = taxonSplit[0] 64 taxonId = taxonId.strip() 65 bootstrapSupport = int(taxonSplit[1][0:taxonSplit[1].find(')')]) 66 else: 67 taxonId = taxon.strip() 68 bootstrapSupport = 0 69 70 return taxonId, bootstrapSupport 71 72 73def LCA(taxonomy1, taxonomy2): 74 """Find lowest-common ancestor between two taxa lists.""" 75 lca = [] 76 for i in xrange(0, min(len(taxonomy1), len(taxonomy2))): 77 t1, b1 = __parseTaxon(taxonomy1[i]) 78 t2, b2 = __parseTaxon(taxonomy2[i]) 79 80 if t1 != t2: 81 if 'unmapped' in t1 or 'unmapped' in t2: 82 lca.append(rankPrefixes[i] + 'unmapped') 83 else: 84 lca.append(rankPrefixes[i] + 'unclassified') 85 else: 86 if b1 == 0 and b2 == 0: 87 lca.append(t1) 88 else: 89 lca.append(t1 + '(' + str(min(b1, b2)) + ')') 90 91 return lca 92