1#!/usr/bin/env python2.5 2# -*- coding: utf-8 -*- 3 4import sys 5import math 6import random 7 8 9# Define functions 10def importStats(fin_stats): 11 dicStats = {} 12 listHeader = [] 13 14 while True: 15 line = fin_stats.readline() 16 if not line: 17 break 18 19 if len(dicStats) == 0: 20 listHeader = line.rstrip("\n").split("\t") 21 for header in listHeader: 22 dicStats[header] = [] 23 else: 24 listStats = line.rstrip("\n").split("\t") 25 for i in range(len(listStats)): 26 if i in [0, 1, 2, 3, 9, 10, 11]: 27 stats = int(listStats[i]) 28 else: 29 stats = float(listStats[i]) 30 dicStats[listHeader[i]].append(stats) 31 32 return dicStats 33 34 35def weightedHisto(dicStats, xMin, xMax, binWidth): 36 dicHisto = {} 37 listShort1Cov = dicStats["short1_cov"] 38 listLgth = dicStats["lgth"] 39 40 for x in range(xMin, xMax, binWidth): 41 dicHisto[x] = 0 42 43 for i in range(len(listShort1Cov)): 44 cov = listShort1Cov[i] 45 if cov < xMin or cov >= xMax: 46 continue 47 for x in range(xMin, xMax+binWidth, binWidth): 48 if (cov >= x and cov < x + binWidth): 49 dicHisto[x] += listLgth[i] 50 51 return dicHisto 52 53 54def smoothingHisto(dicHisto, xMin, xMax, binWidth, widthMovAve): 55 dicSmoothHisto = {} 56 listMovAve = [] 57 58 for x in range(xMin, xMax, binWidth): 59 listMovAve.append(dicHisto[x]) 60 if len(listMovAve) < widthMovAve: 61 continue 62 dicSmoothHisto[x - binWidth * ((widthMovAve - 1) / 2)] \ 63 = sum(listMovAve) / float(widthMovAve) 64 listMovAve.pop(0) 65 66 return dicSmoothHisto 67 68 69def printHisto(dicHisto, xMin, xMax, binWidth): 70 for x in range(xMin, xMax, binWidth): 71 #print str(x) + " : " + str(int(round(dicHisto[x], 0))) 72 lenBar = int(round((dicHisto[x] / 20000), 0)) - 1 73 print str(x) + "\t", 74 for i in range(lenBar): 75 print "=", 76 print "\n", 77 print "\n", 78 79 80def setXMax(xMax, binWidth): 81 return int((math.floor(xMax / binWidth)) * binWidth) 82 83 84def getFirstXMax(dicStats, binWidth, thresConLen): 85 listLgth = dicStats["lgth"] 86 listShort1Cov = dicStats["short1_cov"] 87 maxCov = 0 88 subMaxCov = 0 89 90 for i in range(len(listLgth)): 91 if listLgth[i] >= thresConLen: 92 if listShort1Cov[i] > maxCov: 93 subMaxCov = maxCov 94 maxCov = listShort1Cov[i] 95 96 xMax = setXMax(subMaxCov, binWidth) + binWidth * 5 97 return xMax 98 99 100def getN50(tupleConLen): 101 listSortedConLen = list(tupleConLen) 102 listSortedConLen.sort() 103 listSortedConLen.reverse() 104 totalLen = sum(listSortedConLen) 105 sumLen = 0 106 107 for i in range(len(listSortedConLen)): 108 sumLen += listSortedConLen[i] 109 if sumLen >= totalLen / 2: 110 return listSortedConLen[i] 111 112 return -1 113 114 115def setWidthByXMax(xMax): 116 listWidth = [0, 0] # [binWidth, widthMovAve] 117 118 if xMax > 300: 119 listWidth = [6, 5] 120 if xMax <= 300: 121 listWidth = [4, 3] 122 if xMax <= 120: 123 listWidth = [2, 3] 124 if xMax <= 100: 125 listWidth = [1, 1] 126 127 return listWidth 128 129 130def detectPeakPandS(dicHisto, xMin, xMax, binWidth, 131 thresHeight, listPeakPandS): 132 countIncrease = 0; thresIncrease = 3 133 countDecrease = 0; thresDecrease = 3 134 beforeHeight = -1 135 flagPeakStart = False 136 peakHeight = 0; peakCov = 0 137 138 for x in range(xMax - binWidth, xMin - binWidth, -1 * binWidth): 139 if beforeHeight == -1: 140 beforeHeight = dicHisto[x] 141 continue 142 143 if not flagPeakStart: 144 if dicHisto[x] >= thresHeight: 145 if dicHisto[x] >= beforeHeight: 146 countIncrease += 1 147 if countIncrease >= thresIncrease: 148 countIncrease = 0 149 flagPeakStart = True 150 beforeHeight = dicHisto[x] 151 152 if flagPeakStart: 153 if dicHisto[x] >= peakHeight: 154 peakHeight = dicHisto[x] 155 peakCov = x 156 else: 157 countDecrease += 1 158 if countDecrease >= thresDecrease: 159 for i in range(2): 160 if listPeakPandS[i] == -1: 161 tmpBias = float(binWidth) / 2 162 listPeakPandS[i] = peakCov + tmpBias 163 peakHeight = 0; peakCov = 0 164 break 165 if listPeakPandS[1] != -1: 166 return listPeakPandS 167 countDecrease = 0 168 flagPeakStart = False 169 170 return listPeakPandS 171 172 173 174# ---------- Main function ---------- 175 176# Import stats file 177fin_stats = open(sys.argv[1], "r") 178dicStats = importStats(fin_stats) 179 180# Make weighted histogram 181listPeak = [] 182xMin = 0 183xMax = 1000 184binWidth = 4 185widthMovAve = 5 186listPeakPandS = [-1, -1] 187N50 = 0 188thresHeight = 0 189thresConLen = 0 190 191while True: 192 # Get N50 193 if len(listPeak) == 0: 194 N50 = getN50(tuple(dicStats["lgth"])) 195 print "N50 : " + str(N50) 196 thresConLen = N50 * 5 197 198 # Get first xMax 199 if len(listPeak) == 0: 200 xMax = getFirstXMax(dicStats, binWidth, thresConLen) 201 print "First xMax : " + str(xMax) 202 203 # Set width and xMax 204 listWidth = setWidthByXMax(xMax) 205 binWidth = listWidth[0]; widthMovAve = listWidth[1] 206 xMax = setXMax(xMax, binWidth) 207 208 # Make weighted and smoothed histogram 209 xMin = 0 210 dicHisto = weightedHisto(dicStats, xMin, xMax, binWidth) 211 dicSmoothHisto = smoothingHisto(dicHisto, xMin, xMax, 212 binWidth, widthMovAve) 213 xMin += binWidth * ((widthMovAve - 1) / 2) 214 xMax -= binWidth * ((widthMovAve - 1) / 2) 215 216 # Get thresHeight 217 if len(listPeak) == 0: 218 thresHeight = dicSmoothHisto[xMax - binWidth] 219 print "Thres Height : " + str(thresHeight) 220 221 # Print histogram 222 if len(listPeak) == 0: 223 printHisto(dicSmoothHisto, xMin, xMax, binWidth) 224 225 # Detect (primary and) secondary peak 226 listPeakPandS = detectPeakPandS(dicSmoothHisto, xMin, xMax, binWidth, 227 thresHeight, listPeakPandS) 228 229 # Record peak 230 if len(listPeak) == 0: 231 listPeak.append(listPeakPandS[0]) 232 listPeak.append(listPeakPandS[1]) 233 234 # When couldn't detect secondary peak, break 235 if listPeakPandS[1] == -1: 236 listPeak.pop(-1) 237 print listPeak 238 break 239 240 # Prepare for next peak 241 listPeakPandS[0] = listPeakPandS[1] 242 listPeakPandS[1] = -1 243 xMax = listPeakPandS[0] 244