1############################################################################### 2# 3# profile.py - calculate percentage of reads mapped to each bin 4# 5############################################################################### 6# # 7# This program is free software: you can redistribute it and/or modify # 8# it under the terms of the GNU General Public License as published by # 9# the Free Software Foundation, either version 3 of the License, or # 10# (at your option) any later version. # 11# # 12# This program is distributed in the hope that it will be useful, # 13# but WITHOUT ANY WARRANTY; without even the implied warranty of # 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15# GNU General Public License for more details. # 16# # 17# You should have received a copy of the GNU General Public License # 18# along with this program. If not, see <http://www.gnu.org/licenses/>. # 19# # 20############################################################################### 21 22import logging 23 24import prettytable 25 26from checkm.defaultValues import DefaultValues 27from common import checkFileExists, reassignStdOut, restoreStdOut 28 29 30class Profile(): 31 def __init__(self): 32 self.logger = logging.getLogger('timestamp') 33 34 def run(self, coverageFile, outFile, bTabTable): 35 checkFileExists(coverageFile) 36 37 # get number of reads mapped to each bin 38 self.logger.info('Determining number of reads mapped to each bin.') 39 40 readsMappedToBin = {} 41 binSize = {} 42 totalMappedReads = {} 43 bHeader = True 44 for line in open(coverageFile): 45 if bHeader: 46 bHeader = False 47 continue 48 49 lineSplit = line.split('\t') 50 51 # seqId = lineSplit[0] 52 binId = lineSplit[1] 53 54 seqLen = int(lineSplit[2]) 55 binSize[binId] = binSize.get(binId, 0) + seqLen 56 57 if binId not in readsMappedToBin: 58 readsMappedToBin[binId] = {} 59 60 for i in xrange(3, len(lineSplit), 3): 61 bamId = lineSplit[i] 62 mappedReads = int(lineSplit[i + 2]) 63 64 totalMappedReads[bamId] = totalMappedReads.get(bamId, 0) + mappedReads 65 readsMappedToBin[binId][bamId] = readsMappedToBin[binId].get(bamId, 0) + mappedReads 66 67 # calculate percentage of mapped reads to binned populations 68 perMappedReads = {} 69 normBinCoverage = {} 70 sumNormBinCoverage = {} 71 for binId, bamIds in readsMappedToBin.iteritems(): 72 perMappedReads[binId] = {} 73 normBinCoverage[binId] = {} 74 75 for bamId in bamIds: 76 perMR = float(readsMappedToBin[binId][bamId]) / totalMappedReads[bamId] 77 perMappedReads[binId][bamId] = perMR 78 79 if binId == DefaultValues.UNBINNED: 80 continue 81 82 normCoverage = perMR / binSize[binId] 83 normBinCoverage[binId][bamId] = normCoverage 84 sumNormBinCoverage[bamId] = sumNormBinCoverage.get(bamId, 0) + normCoverage 85 86 for binId, bamIds in normBinCoverage.iteritems(): 87 for bamId in bamIds: 88 if sumNormBinCoverage[bamId] != 0: 89 normBinCoverage[binId][bamId] /= sumNormBinCoverage[bamId] 90 else: 91 normBinCoverage[binId][bamId] = 0 92 93 # write community profile 94 oldStdOut = reassignStdOut(outFile) 95 96 sortedBinIds = sorted(readsMappedToBin.keys()) 97 sortedBamIds = sorted(readsMappedToBin[sortedBinIds[0]].keys()) 98 99 header = ['Bin Id', 'Bin size (Mbp)'] 100 for bamId in sortedBamIds: 101 header += [bamId + ': mapped reads'] 102 header += [bamId + ': % mapped reads'] 103 header += [bamId + ': % binned populations'] 104 header += [bamId + ': % community'] 105 106 if bTabTable: 107 print('\t'.join(header)) 108 else: 109 pTable = prettytable.PrettyTable(header) 110 pTable.float_format = '.2' 111 pTable.align = 'c' 112 pTable.align[header[0]] = 'l' 113 pTable.hrules = prettytable.FRAME 114 pTable.vrules = prettytable.NONE 115 116 for binId in sortedBinIds: 117 row = [binId] 118 row += [float(binSize[binId]) / 1e6] 119 120 for bamId in sortedBamIds: 121 row += [readsMappedToBin[binId][bamId]] 122 row += [perMappedReads[binId][bamId] * 100.0] 123 124 if DefaultValues.UNBINNED in perMappedReads: 125 unbinnedPercentage = perMappedReads[DefaultValues.UNBINNED][bamId] 126 else: 127 unbinnedPercentage = 0 128 129 if binId == DefaultValues.UNBINNED: 130 row += ['NA'] 131 row += [unbinnedPercentage * 100.0] 132 else: 133 row += [normBinCoverage[binId][bamId] * 100.0] 134 row += [normBinCoverage[binId][bamId] * 100.0 * (1.0 - unbinnedPercentage)] 135 136 if bTabTable: 137 print('\t'.join(map(str, row))) 138 else: 139 pTable.add_row(row) 140 141 if not bTabTable: 142 print(pTable.get_string()) 143 144 restoreStdOut(outFile, oldStdOut) 145