1 #include "muscle.h"
2 #include "profile.h"
3 #include "pwpath.h"
4
5 #define OCC 1
6
7 struct DP_MEMORY
8 {
9 unsigned uLength;
10 SCORE *GapOpenA;
11 SCORE *GapOpenB;
12 SCORE *GapCloseA;
13 SCORE *GapCloseB;
14 SCORE *MPrev;
15 SCORE *MCurr;
16 SCORE *MWork;
17 SCORE *DPrev;
18 SCORE *DCurr;
19 SCORE *DWork;
20 SCORE **ScoreMxB;
21 #if OCC
22 FCOUNT *OccA;
23 FCOUNT *OccB;
24 #endif
25 unsigned **SortOrderA;
26 unsigned *uDeletePos;
27 FCOUNT **FreqsA;
28 int **TraceBack;
29 };
30
31 static struct DP_MEMORY DPM;
32
AllocDPMem(unsigned uLengthA,unsigned uLengthB)33 static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
34 {
35 // Max prefix length
36 unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
37 if (uLength < DPM.uLength)
38 return;
39
40 // Add 256 to allow for future expansion and
41 // round up to next multiple of 32.
42 uLength += 256;
43 uLength += 32 - uLength%32;
44
45 const unsigned uOldLength = DPM.uLength;
46 if (uOldLength > 0)
47 {
48 for (unsigned i = 0; i < uOldLength; ++i)
49 {
50 delete[] DPM.TraceBack[i];
51 delete[] DPM.FreqsA[i];
52 delete[] DPM.SortOrderA[i];
53 }
54 for (unsigned n = 0; n < 20; ++n)
55 delete[] DPM.ScoreMxB[n];
56
57 delete[] DPM.MPrev;
58 delete[] DPM.MCurr;
59 delete[] DPM.MWork;
60 delete[] DPM.DPrev;
61 delete[] DPM.DCurr;
62 delete[] DPM.DWork;
63 delete[] DPM.uDeletePos;
64 delete[] DPM.GapOpenA;
65 delete[] DPM.GapOpenB;
66 delete[] DPM.GapCloseA;
67 delete[] DPM.GapCloseB;
68 delete[] DPM.SortOrderA;
69 delete[] DPM.FreqsA;
70 delete[] DPM.ScoreMxB;
71 delete[] DPM.TraceBack;
72 #if OCC
73 delete[] DPM.OccA;
74 delete[] DPM.OccB;
75 #endif
76 }
77
78 DPM.uLength = uLength;
79
80 DPM.GapOpenA = new SCORE[uLength];
81 DPM.GapOpenB = new SCORE[uLength];
82 DPM.GapCloseA = new SCORE[uLength];
83 DPM.GapCloseB = new SCORE[uLength];
84 #if OCC
85 DPM.OccA = new FCOUNT[uLength];
86 DPM.OccB = new FCOUNT[uLength];
87 #endif
88
89 DPM.SortOrderA = new unsigned*[uLength];
90 DPM.FreqsA = new FCOUNT*[uLength];
91 DPM.ScoreMxB = new SCORE*[20];
92 DPM.MPrev = new SCORE[uLength];
93 DPM.MCurr = new SCORE[uLength];
94 DPM.MWork = new SCORE[uLength];
95
96 DPM.DPrev = new SCORE[uLength];
97 DPM.DCurr = new SCORE[uLength];
98 DPM.DWork = new SCORE[uLength];
99 DPM.uDeletePos = new unsigned[uLength];
100
101 DPM.TraceBack = new int*[uLength];
102
103 for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
104 DPM.ScoreMxB[uLetter] = new SCORE[uLength];
105
106 for (unsigned i = 0; i < uLength; ++i)
107 {
108 DPM.SortOrderA[i] = new unsigned[20];
109 DPM.FreqsA[i] = new FCOUNT[20];
110 DPM.TraceBack[i] = new int[uLength];
111 }
112 }
113
GlobalAlignLE(const ProfPos * PA,unsigned uLengthA,const ProfPos * PB,unsigned uLengthB,PWPath & Path)114 SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
115 unsigned uLengthB, PWPath &Path)
116 {
117 SetTermGaps(PA, uLengthA);
118 SetTermGaps(PB, uLengthB);
119
120 const unsigned uPrefixCountA = uLengthA + 1;
121 const unsigned uPrefixCountB = uLengthB + 1;
122
123 AllocDPMem(uLengthA, uLengthB);
124
125 SCORE *GapOpenA = DPM.GapOpenA;
126 SCORE *GapOpenB = DPM.GapOpenB;
127 SCORE *GapCloseA = DPM.GapCloseA;
128 SCORE *GapCloseB = DPM.GapCloseB;
129
130 unsigned **SortOrderA = DPM.SortOrderA;
131 FCOUNT **FreqsA = DPM.FreqsA;
132 SCORE **ScoreMxB = DPM.ScoreMxB;
133 SCORE *MPrev = DPM.MPrev;
134 SCORE *MCurr = DPM.MCurr;
135 SCORE *MWork = DPM.MWork;
136
137 SCORE *DPrev = DPM.DPrev;
138 SCORE *DCurr = DPM.DCurr;
139 SCORE *DWork = DPM.DWork;
140
141 #if OCC
142 FCOUNT *OccA = DPM.OccA;
143 FCOUNT *OccB = DPM.OccB;
144 #endif
145
146 unsigned *uDeletePos = DPM.uDeletePos;
147
148 int **TraceBack = DPM.TraceBack;
149
150 for (unsigned i = 0; i < uLengthA; ++i)
151 {
152 GapOpenA[i] = PA[i].m_scoreGapOpen;
153 GapCloseA[i] = PA[i].m_scoreGapClose;
154 #if OCC
155 OccA[i] = PA[i].m_fOcc;
156 #endif
157
158 for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
159 {
160 SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
161 FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
162 }
163 }
164
165 for (unsigned j = 0; j < uLengthB; ++j)
166 {
167 GapOpenB[j] = PB[j].m_scoreGapOpen;
168 GapCloseB[j] = PB[j].m_scoreGapClose;
169 #if OCC
170 OccB[j] = PB[j].m_fOcc;
171 #endif
172 }
173
174 for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
175 {
176 for (unsigned j = 0; j < uLengthB; ++j)
177 ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
178 }
179
180 for (unsigned i = 0; i < uPrefixCountA; ++i)
181 memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
182
183 // Special case for i=0
184 unsigned **ptrSortOrderA = SortOrderA;
185 FCOUNT **ptrFreqsA = FreqsA;
186 assert(ptrSortOrderA == &(SortOrderA[0]));
187 assert(ptrFreqsA == &(FreqsA[0]));
188 TraceBack[0][0] = 0;
189
190 SCORE scoreSum = 0;
191 unsigned *ptrSortOrderAi = SortOrderA[0];
192 const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
193 FCOUNT *ptrFreqsAi = FreqsA[0];
194 for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
195 {
196 const unsigned uLetter = *ptrSortOrderAi;
197 const FCOUNT fcLetter = ptrFreqsAi[uLetter];
198 if (0 == fcLetter)
199 break;
200 scoreSum += fcLetter*ScoreMxB[uLetter][0];
201 }
202 if (0 == scoreSum)
203 MPrev[0] = -2.5;
204 else
205 {
206 #if OCC
207 MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
208 #else
209 MPrev[0] = (logf(scoreSum) - g_scoreCenter);
210 #endif
211 }
212
213 // D(0,0) is -infinity (requires I->D).
214 DPrev[0] = MINUS_INFINITY;
215
216 for (unsigned j = 1; j < uLengthB; ++j)
217 {
218 // Only way to get M(0, j) looks like this:
219 // A ----X
220 // B XXXXX
221 // 0 j
222 // So gap-open at j=0, gap-close at j-1.
223 SCORE scoreSum = 0;
224 unsigned *ptrSortOrderAi = SortOrderA[0];
225 const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
226 FCOUNT *ptrFreqsAi = FreqsA[0];
227 for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
228 {
229 const unsigned uLetter = *ptrSortOrderAi;
230 const FCOUNT fcLetter = ptrFreqsAi[uLetter];
231 if (0 == fcLetter)
232 break;
233 scoreSum += fcLetter*ScoreMxB[uLetter][j];
234 }
235 if (0 == scoreSum)
236 MPrev[j] = -2.5;
237 else
238 {
239 #if OCC
240 MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
241 GapOpenB[0] + GapCloseB[j-1];
242 #else
243 MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
244 GapOpenB[0] + GapCloseB[j-1];
245 #endif
246 }
247 TraceBack[0][j] = -(int) j;
248
249 // Assume no D->I transitions, then can't be a delete if only
250 // one letter from A.
251 DPrev[j] = MINUS_INFINITY;
252 }
253
254 SCORE IPrev_j_1;
255 for (unsigned i = 1; i < uLengthA; ++i)
256 {
257 ++ptrSortOrderA;
258 ++ptrFreqsA;
259 assert(ptrSortOrderA == &(SortOrderA[i]));
260 assert(ptrFreqsA == &(FreqsA[i]));
261
262 SCORE *ptrMCurr_j = MCurr;
263 memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
264 const FCOUNT *FreqsAi = *ptrFreqsA;
265
266 const unsigned *SortOrderAi = *ptrSortOrderA;
267 const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
268 const SCORE *ptrMCurrMax = MCurr + uLengthB;
269 for (const unsigned *ptrSortOrderAi = SortOrderAi;
270 ptrSortOrderAi != ptrSortOrderAiEnd;
271 ++ptrSortOrderAi)
272 {
273 const unsigned uLetter = *ptrSortOrderAi;
274 SCORE *NSBR_Letter = ScoreMxB[uLetter];
275 const FCOUNT fcLetter = FreqsAi[uLetter];
276 if (0 == fcLetter)
277 break;
278 SCORE *ptrNSBR = NSBR_Letter;
279 for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
280 *ptrMCurr += fcLetter*(*ptrNSBR++);
281 }
282
283 #if OCC
284 const FCOUNT OccAi = OccA[i];
285 #endif
286 for (unsigned j = 0; j < uLengthB; ++j)
287 {
288 if (MCurr[j] == 0)
289 MCurr[j] = -2.5;
290 else
291 #if OCC
292 MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
293 #else
294 MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
295 #endif
296 }
297
298 ptrMCurr_j = MCurr;
299 unsigned *ptrDeletePos = uDeletePos;
300
301 // Special case for j=0
302 // Only way to get M(i, 0) looks like this:
303 // 0 i
304 // A XXXXX
305 // B ----X
306 // So gap-open at i=0, gap-close at i-1.
307 assert(ptrMCurr_j == &(MCurr[0]));
308 *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
309
310 ++ptrMCurr_j;
311
312 int *ptrTraceBack_ij = TraceBack[i];
313 *ptrTraceBack_ij++ = (int) i;
314
315 SCORE *ptrMPrev_j = MPrev;
316 SCORE *ptrDPrev = DPrev;
317 SCORE d = *ptrDPrev;
318 SCORE DNew = *ptrMPrev_j + GapOpenA[i];
319 if (DNew > d)
320 {
321 d = DNew;
322 *ptrDeletePos = i;
323 }
324
325 SCORE *ptrDCurr = DCurr;
326
327 assert(ptrDCurr == &(DCurr[0]));
328 *ptrDCurr = d;
329
330 // Can't have an insert if no letters from B
331 IPrev_j_1 = MINUS_INFINITY;
332
333 unsigned uInsertPos = 0;
334 const SCORE scoreGapOpenAi = GapOpenA[i];
335 const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
336
337 for (unsigned j = 1; j < uLengthB; ++j)
338 {
339 // Here, MPrev_j is preserved from previous
340 // iteration so with current i,j is M[i-1][j-1]
341 SCORE MPrev_j = *ptrMPrev_j;
342 SCORE INew = MPrev_j + GapOpenB[j];
343 if (INew > IPrev_j_1)
344 {
345 IPrev_j_1 = INew;
346 uInsertPos = j;
347 }
348
349 SCORE scoreMax = MPrev_j;
350
351 assert(ptrDPrev == &(DPrev[j-1]));
352 SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
353 if (scoreD > scoreMax)
354 {
355 scoreMax = scoreD;
356 assert(ptrDeletePos == &(uDeletePos[j-1]));
357 *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
358 assert(*ptrTraceBack_ij > 0);
359 }
360 ++ptrDeletePos;
361
362 SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
363 if (scoreI > scoreMax)
364 {
365 scoreMax = scoreI;
366 *ptrTraceBack_ij = (int) uInsertPos - (int) j;
367 assert(*ptrTraceBack_ij < 0);
368 }
369
370 assert(ptrSortOrderA == &(SortOrderA[i]));
371 assert(ptrFreqsA == &(FreqsA[i]));
372
373 *ptrMCurr_j += scoreMax;
374 assert(ptrMCurr_j == &(MCurr[j]));
375 ++ptrMCurr_j;
376
377 MPrev_j = *(++ptrMPrev_j);
378 assert(ptrDPrev == &(DPrev[j]));
379 SCORE d = *ptrDPrev;
380 SCORE DNew = MPrev_j + scoreGapOpenAi;
381 if (DNew > d)
382 {
383 d = DNew;
384 assert(ptrDeletePos == &uDeletePos[j]);
385 *ptrDeletePos = i;
386 }
387 assert(ptrDCurr + 1 == &(DCurr[j]));
388 *(++ptrDCurr) = d;
389
390 ++ptrTraceBack_ij;
391 }
392
393 Rotate(MPrev, MCurr, MWork);
394 Rotate(DPrev, DCurr, DWork);
395 }
396
397 // Special case for i=uLengthA
398 SCORE IPrev = MINUS_INFINITY;
399
400 unsigned uInsertPos;
401
402 for (unsigned j = 1; j < uLengthB; ++j)
403 {
404 SCORE INew = MPrev[j-1] + GapOpenB[j];
405 if (INew > IPrev)
406 {
407 uInsertPos = j;
408 IPrev = INew;
409 }
410 }
411
412 // Special case for i=uLengthA, j=uLengthB
413 SCORE scoreMax = MPrev[uLengthB-1];
414 int iTraceBack = 0;
415
416 SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
417 if (scoreD > scoreMax)
418 {
419 scoreMax = scoreD;
420 iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
421 }
422
423 SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
424 if (scoreI > scoreMax)
425 {
426 scoreMax = scoreI;
427 iTraceBack = (int) uInsertPos - (int) uLengthB;
428 }
429
430 TraceBack[uLengthA][uLengthB] = iTraceBack;
431
432 TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
433
434 return scoreMax;
435 }
436