1#!/usr/bin/env python2.5
2# -*- coding: utf-8 -*-
3
4import sys
5import math
6import random
7
8
9# Define functions
10def importStats(fin_stats):
11    dicStats = {}
12    listHeader = []
13
14    while True:
15        line = fin_stats.readline()
16        if not line:
17            break
18
19        if len(dicStats) == 0:
20            listHeader = line.rstrip("\n").split("\t")
21            for header in listHeader:
22                dicStats[header] = []
23        else:
24            listStats = line.rstrip("\n").split("\t")
25            for i in range(len(listStats)):
26                if i in [0, 1, 2, 3, 9, 10, 11]:
27                    stats = int(listStats[i])
28                else:
29                    stats = float(listStats[i])
30                dicStats[listHeader[i]].append(stats)
31
32    return dicStats
33
34
35def weightedHisto(dicStats, xMin, xMax, binWidth):
36    dicHisto = {}
37    listShort1Cov = dicStats["short1_cov"]
38    listLgth = dicStats["lgth"]
39
40    for x in range(xMin, xMax, binWidth):
41        dicHisto[x] = 0
42
43    for i in range(len(listShort1Cov)):
44        cov = listShort1Cov[i]
45        if cov < xMin or cov >= xMax:
46            continue
47        for x in range(xMin, xMax+binWidth, binWidth):
48            if (cov >= x and cov < x + binWidth):
49                dicHisto[x] += listLgth[i]
50
51    return dicHisto
52
53
54def smoothingHisto(dicHisto, xMin, xMax, binWidth, widthMovAve):
55    dicSmoothHisto = {}
56    listMovAve = []
57
58    for x in range(xMin, xMax, binWidth):
59        listMovAve.append(dicHisto[x])
60        if len(listMovAve) < widthMovAve:
61            continue
62        dicSmoothHisto[x - binWidth * ((widthMovAve - 1) / 2)] \
63            = sum(listMovAve) / float(widthMovAve)
64        listMovAve.pop(0)
65
66    return dicSmoothHisto
67
68
69def printHisto(dicHisto, xMin, xMax, binWidth):
70    for x in range(xMin, xMax, binWidth):
71        #print str(x) + " : " + str(int(round(dicHisto[x], 0)))
72        lenBar = int(round((dicHisto[x] / 20000), 0)) - 1
73        print str(x) + "\t",
74        for i in range(lenBar):
75            print "=",
76        print "\n",
77    print "\n",
78
79
80def setXMax(xMax, binWidth):
81    return int((math.floor(xMax / binWidth)) * binWidth)
82
83
84def getFirstXMax(dicStats, binWidth, thresConLen):
85    listLgth = dicStats["lgth"]
86    listShort1Cov = dicStats["short1_cov"]
87    maxCov = 0
88    subMaxCov = 0
89
90    for i in range(len(listLgth)):
91        if listLgth[i] >= thresConLen:
92            if listShort1Cov[i] > maxCov:
93                subMaxCov = maxCov
94                maxCov = listShort1Cov[i]
95
96    xMax = setXMax(subMaxCov, binWidth) + binWidth * 5
97    return xMax
98
99
100def getN50(tupleConLen):
101    listSortedConLen = list(tupleConLen)
102    listSortedConLen.sort()
103    listSortedConLen.reverse()
104    totalLen = sum(listSortedConLen)
105    sumLen = 0
106
107    for i in range(len(listSortedConLen)):
108        sumLen += listSortedConLen[i]
109        if sumLen >= totalLen / 2:
110            return listSortedConLen[i]
111
112    return -1
113
114
115def setWidthByXMax(xMax):
116    listWidth = [0, 0]  # [binWidth, widthMovAve]
117
118    if xMax > 300:
119        listWidth = [6, 5]
120    if xMax <= 300:
121        listWidth = [4, 3]
122    if xMax <= 120:
123        listWidth = [2, 3]
124    if xMax <= 100:
125        listWidth = [1, 1]
126
127    return listWidth
128
129
130def detectPeakPandS(dicHisto, xMin, xMax, binWidth,
131                    thresHeight, listPeakPandS):
132    countIncrease = 0; thresIncrease = 3
133    countDecrease = 0; thresDecrease = 3
134    beforeHeight = -1
135    flagPeakStart = False
136    peakHeight = 0; peakCov = 0
137
138    for x in range(xMax - binWidth, xMin - binWidth, -1 * binWidth):
139        if beforeHeight == -1:
140            beforeHeight = dicHisto[x]
141            continue
142
143        if not flagPeakStart:
144            if dicHisto[x] >= thresHeight:
145                if dicHisto[x] >= beforeHeight:
146                    countIncrease += 1
147                    if countIncrease >= thresIncrease:
148                        countIncrease = 0
149                        flagPeakStart = True
150            beforeHeight = dicHisto[x]
151
152        if flagPeakStart:
153            if dicHisto[x] >= peakHeight:
154                peakHeight = dicHisto[x]
155                peakCov = x
156            else:
157                countDecrease += 1
158                if countDecrease >= thresDecrease:
159                    for i in range(2):
160                        if listPeakPandS[i] == -1:
161                            tmpBias = float(binWidth) / 2
162                            listPeakPandS[i] = peakCov + tmpBias
163                            peakHeight = 0; peakCov = 0
164                            break
165                    if listPeakPandS[1] != -1:
166                        return listPeakPandS
167                    countDecrease = 0
168                    flagPeakStart = False
169
170    return listPeakPandS
171
172
173
174# ---------- Main function ----------
175
176# Import stats file
177fin_stats = open(sys.argv[1], "r")
178dicStats = importStats(fin_stats)
179
180# Make weighted histogram
181listPeak = []
182xMin = 0
183xMax = 1000
184binWidth = 4
185widthMovAve = 5
186listPeakPandS = [-1, -1]
187N50 = 0
188thresHeight = 0
189thresConLen = 0
190
191while True:
192    # Get N50
193    if len(listPeak) == 0:
194        N50 = getN50(tuple(dicStats["lgth"]))
195        print "N50 : " + str(N50)
196        thresConLen = N50 * 5
197
198    # Get first xMax
199    if len(listPeak) == 0:
200        xMax = getFirstXMax(dicStats, binWidth, thresConLen)
201        print "First xMax : " + str(xMax)
202
203    # Set width and xMax
204    listWidth = setWidthByXMax(xMax)
205    binWidth = listWidth[0]; widthMovAve = listWidth[1]
206    xMax = setXMax(xMax, binWidth)
207
208    # Make weighted and smoothed histogram
209    xMin = 0
210    dicHisto = weightedHisto(dicStats, xMin, xMax, binWidth)
211    dicSmoothHisto = smoothingHisto(dicHisto, xMin, xMax,
212                                    binWidth, widthMovAve)
213    xMin += binWidth * ((widthMovAve - 1) / 2)
214    xMax -= binWidth * ((widthMovAve - 1) / 2)
215
216    # Get thresHeight
217    if len(listPeak) == 0:
218        thresHeight = dicSmoothHisto[xMax - binWidth]
219        print "Thres Height : " + str(thresHeight)
220
221    # Print histogram
222    if len(listPeak) == 0:
223        printHisto(dicSmoothHisto, xMin, xMax, binWidth)
224
225    # Detect (primary and) secondary peak
226    listPeakPandS = detectPeakPandS(dicSmoothHisto, xMin, xMax, binWidth,
227                                    thresHeight, listPeakPandS)
228
229    # Record peak
230    if len(listPeak) == 0:
231        listPeak.append(listPeakPandS[0])
232    listPeak.append(listPeakPandS[1])
233
234    # When couldn't detect secondary peak, break
235    if listPeakPandS[1] == -1:
236        listPeak.pop(-1)
237        print listPeak
238        break
239
240    # Prepare for next peak
241    listPeakPandS[0] = listPeakPandS[1]
242    listPeakPandS[1] = -1
243    xMax = listPeakPandS[0]
244