1 #include "muscle.h"
2 #include "profile.h"
3 #include "pwpath.h"
4 
5 #define	OCC	1
6 
7 struct DP_MEMORY
8 	{
9 	unsigned uLength;
10 	SCORE *GapOpenA;
11 	SCORE *GapOpenB;
12 	SCORE *GapCloseA;
13 	SCORE *GapCloseB;
14 	SCORE *MPrev;
15 	SCORE *MCurr;
16 	SCORE *MWork;
17 	SCORE *DPrev;
18 	SCORE *DCurr;
19 	SCORE *DWork;
20 	SCORE **ScoreMxB;
21 #if	OCC
22 	FCOUNT *OccA;
23 	FCOUNT *OccB;
24 #endif
25 	unsigned **SortOrderA;
26 	unsigned *uDeletePos;
27 	FCOUNT **FreqsA;
28 	int **TraceBack;
29 	};
30 
31 static struct DP_MEMORY DPM;
32 
AllocDPMem(unsigned uLengthA,unsigned uLengthB)33 static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)
34 	{
35 // Max prefix length
36 	unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;
37 	if (uLength < DPM.uLength)
38 		return;
39 
40 // Add 256 to allow for future expansion and
41 // round up to next multiple of 32.
42 	uLength += 256;
43 	uLength += 32 - uLength%32;
44 
45 	const unsigned uOldLength = DPM.uLength;
46 	if (uOldLength > 0)
47 		{
48 		for (unsigned i = 0; i < uOldLength; ++i)
49 			{
50 			delete[] DPM.TraceBack[i];
51 			delete[] DPM.FreqsA[i];
52 			delete[] DPM.SortOrderA[i];
53 			}
54 		for (unsigned n = 0; n < 20; ++n)
55 			delete[] DPM.ScoreMxB[n];
56 
57 		delete[] DPM.MPrev;
58 		delete[] DPM.MCurr;
59 		delete[] DPM.MWork;
60 		delete[] DPM.DPrev;
61 		delete[] DPM.DCurr;
62 		delete[] DPM.DWork;
63 		delete[] DPM.uDeletePos;
64 		delete[] DPM.GapOpenA;
65 		delete[] DPM.GapOpenB;
66 		delete[] DPM.GapCloseA;
67 		delete[] DPM.GapCloseB;
68 		delete[] DPM.SortOrderA;
69 		delete[] DPM.FreqsA;
70 		delete[] DPM.ScoreMxB;
71 		delete[] DPM.TraceBack;
72 #if	OCC
73 		delete[] DPM.OccA;
74 		delete[] DPM.OccB;
75 #endif
76 		}
77 
78 	DPM.uLength = uLength;
79 
80 	DPM.GapOpenA = new SCORE[uLength];
81 	DPM.GapOpenB = new SCORE[uLength];
82 	DPM.GapCloseA = new SCORE[uLength];
83 	DPM.GapCloseB = new SCORE[uLength];
84 #if	OCC
85 	DPM.OccA = new FCOUNT[uLength];
86 	DPM.OccB = new FCOUNT[uLength];
87 #endif
88 
89 	DPM.SortOrderA = new unsigned*[uLength];
90 	DPM.FreqsA = new FCOUNT*[uLength];
91 	DPM.ScoreMxB = new SCORE*[20];
92 	DPM.MPrev = new SCORE[uLength];
93 	DPM.MCurr = new SCORE[uLength];
94 	DPM.MWork = new SCORE[uLength];
95 
96 	DPM.DPrev = new SCORE[uLength];
97 	DPM.DCurr = new SCORE[uLength];
98 	DPM.DWork = new SCORE[uLength];
99 	DPM.uDeletePos = new unsigned[uLength];
100 
101 	DPM.TraceBack = new int*[uLength];
102 
103 	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
104 		DPM.ScoreMxB[uLetter] = new SCORE[uLength];
105 
106 	for (unsigned i = 0; i < uLength; ++i)
107 		{
108 		DPM.SortOrderA[i] = new unsigned[20];
109 		DPM.FreqsA[i] = new FCOUNT[20];
110 		DPM.TraceBack[i] = new int[uLength];
111 		}
112 	}
113 
GlobalAlignLE(const ProfPos * PA,unsigned uLengthA,const ProfPos * PB,unsigned uLengthB,PWPath & Path)114 SCORE GlobalAlignLE(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
115   unsigned uLengthB, PWPath &Path)
116 	{
117 	SetTermGaps(PA, uLengthA);
118 	SetTermGaps(PB, uLengthB);
119 
120 	const unsigned uPrefixCountA = uLengthA + 1;
121 	const unsigned uPrefixCountB = uLengthB + 1;
122 
123 	AllocDPMem(uLengthA, uLengthB);
124 
125 	SCORE *GapOpenA = DPM.GapOpenA;
126 	SCORE *GapOpenB = DPM.GapOpenB;
127 	SCORE *GapCloseA = DPM.GapCloseA;
128 	SCORE *GapCloseB = DPM.GapCloseB;
129 
130 	unsigned **SortOrderA = DPM.SortOrderA;
131 	FCOUNT **FreqsA = DPM.FreqsA;
132 	SCORE **ScoreMxB = DPM.ScoreMxB;
133 	SCORE *MPrev = DPM.MPrev;
134 	SCORE *MCurr = DPM.MCurr;
135 	SCORE *MWork = DPM.MWork;
136 
137 	SCORE *DPrev = DPM.DPrev;
138 	SCORE *DCurr = DPM.DCurr;
139 	SCORE *DWork = DPM.DWork;
140 
141 #if	OCC
142 	FCOUNT *OccA = DPM.OccA;
143 	FCOUNT *OccB = DPM.OccB;
144 #endif
145 
146 	unsigned *uDeletePos = DPM.uDeletePos;
147 
148 	int **TraceBack = DPM.TraceBack;
149 
150 	for (unsigned i = 0; i < uLengthA; ++i)
151 		{
152 		GapOpenA[i] = PA[i].m_scoreGapOpen;
153 		GapCloseA[i] = PA[i].m_scoreGapClose;
154 #if	OCC
155 		OccA[i] = PA[i].m_fOcc;
156 #endif
157 
158 		for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
159 			{
160 			SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];
161 			FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];
162 			}
163 		}
164 
165 	for (unsigned j = 0; j < uLengthB; ++j)
166 		{
167 		GapOpenB[j] = PB[j].m_scoreGapOpen;
168 		GapCloseB[j] = PB[j].m_scoreGapClose;
169 #if	OCC
170 		OccB[j] = PB[j].m_fOcc;
171 #endif
172 		}
173 
174 	for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
175 		{
176 		for (unsigned j = 0; j < uLengthB; ++j)
177 			ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];
178 		}
179 
180 	for (unsigned i = 0; i < uPrefixCountA; ++i)
181 		memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));
182 
183 // Special case for i=0
184 	unsigned **ptrSortOrderA = SortOrderA;
185 	FCOUNT **ptrFreqsA = FreqsA;
186 	assert(ptrSortOrderA == &(SortOrderA[0]));
187 	assert(ptrFreqsA == &(FreqsA[0]));
188 	TraceBack[0][0] = 0;
189 
190 	SCORE scoreSum = 0;
191 	unsigned *ptrSortOrderAi = SortOrderA[0];
192 	const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
193 	FCOUNT *ptrFreqsAi = FreqsA[0];
194 	for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
195 		{
196 		const unsigned uLetter = *ptrSortOrderAi;
197 		const FCOUNT fcLetter = ptrFreqsAi[uLetter];
198 		if (0 == fcLetter)
199 			break;
200 		scoreSum += fcLetter*ScoreMxB[uLetter][0];
201 		}
202 	if (0 == scoreSum)
203 		MPrev[0] = -2.5;
204 	else
205 		{
206 #if	OCC
207 		MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];
208 #else
209 		MPrev[0] = (logf(scoreSum) - g_scoreCenter);
210 #endif
211 		}
212 
213 // D(0,0) is -infinity (requires I->D).
214 	DPrev[0] = MINUS_INFINITY;
215 
216 	for (unsigned j = 1; j < uLengthB; ++j)
217 		{
218 	// Only way to get M(0, j) looks like this:
219 	//		A	----X
220 	//		B	XXXXX
221 	//			0   j
222 	// So gap-open at j=0, gap-close at j-1.
223 		SCORE scoreSum = 0;
224 		unsigned *ptrSortOrderAi = SortOrderA[0];
225 		const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;
226 		FCOUNT *ptrFreqsAi = FreqsA[0];
227 		for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)
228 			{
229 			const unsigned uLetter = *ptrSortOrderAi;
230 			const FCOUNT fcLetter = ptrFreqsAi[uLetter];
231 			if (0 == fcLetter)
232 				break;
233 			scoreSum += fcLetter*ScoreMxB[uLetter][j];
234 			}
235 		if (0 == scoreSum)
236 			MPrev[j] = -2.5;
237 		else
238 			{
239 #if	OCC
240 			MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +
241 			  GapOpenB[0] + GapCloseB[j-1];
242 #else
243 			MPrev[j] = (logf(scoreSum) - g_scoreCenter) +
244 			  GapOpenB[0] + GapCloseB[j-1];
245 #endif
246 			}
247 		TraceBack[0][j] = -(int) j;
248 
249 	// Assume no D->I transitions, then can't be a delete if only
250 	// one letter from A.
251 		DPrev[j] = MINUS_INFINITY;
252 		}
253 
254 	SCORE IPrev_j_1;
255 	for (unsigned i = 1; i < uLengthA; ++i)
256 		{
257 		++ptrSortOrderA;
258 		++ptrFreqsA;
259 		assert(ptrSortOrderA == &(SortOrderA[i]));
260 		assert(ptrFreqsA == &(FreqsA[i]));
261 
262 		SCORE *ptrMCurr_j = MCurr;
263 		memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));
264 		const FCOUNT *FreqsAi = *ptrFreqsA;
265 
266 		const unsigned *SortOrderAi = *ptrSortOrderA;
267 		const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;
268 		const SCORE *ptrMCurrMax = MCurr + uLengthB;
269 		for (const unsigned *ptrSortOrderAi = SortOrderAi;
270 		  ptrSortOrderAi != ptrSortOrderAiEnd;
271 		  ++ptrSortOrderAi)
272 			{
273 			const unsigned uLetter = *ptrSortOrderAi;
274 			SCORE *NSBR_Letter = ScoreMxB[uLetter];
275 			const FCOUNT fcLetter = FreqsAi[uLetter];
276 			if (0 == fcLetter)
277 				break;
278 			SCORE *ptrNSBR = NSBR_Letter;
279 			for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)
280 				*ptrMCurr += fcLetter*(*ptrNSBR++);
281 			}
282 
283 #if	OCC
284 		const FCOUNT OccAi = OccA[i];
285 #endif
286 		for (unsigned j = 0; j < uLengthB; ++j)
287 			{
288 			if (MCurr[j] == 0)
289 				MCurr[j] = -2.5;
290 			else
291 #if	OCC
292 				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];
293 #else
294 				MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);
295 #endif
296 			}
297 
298 		ptrMCurr_j = MCurr;
299 		unsigned *ptrDeletePos = uDeletePos;
300 
301 	// Special case for j=0
302 	// Only way to get M(i, 0) looks like this:
303 	//			0   i
304 	//		A	XXXXX
305 	//		B	----X
306 	// So gap-open at i=0, gap-close at i-1.
307 		assert(ptrMCurr_j == &(MCurr[0]));
308 		*ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];
309 
310 		++ptrMCurr_j;
311 
312 		int *ptrTraceBack_ij = TraceBack[i];
313 		*ptrTraceBack_ij++ = (int) i;
314 
315 		SCORE *ptrMPrev_j = MPrev;
316 		SCORE *ptrDPrev = DPrev;
317 		SCORE d = *ptrDPrev;
318 		SCORE DNew = *ptrMPrev_j + GapOpenA[i];
319 		if (DNew > d)
320 			{
321 			d = DNew;
322 			*ptrDeletePos = i;
323 			}
324 
325 		SCORE *ptrDCurr = DCurr;
326 
327 		assert(ptrDCurr == &(DCurr[0]));
328 		*ptrDCurr = d;
329 
330 	// Can't have an insert if no letters from B
331 		IPrev_j_1 = MINUS_INFINITY;
332 
333 		unsigned uInsertPos = 0;
334 		const SCORE scoreGapOpenAi = GapOpenA[i];
335 		const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];
336 
337 		for (unsigned j = 1; j < uLengthB; ++j)
338 			{
339 		// Here, MPrev_j is preserved from previous
340 		// iteration so with current i,j is M[i-1][j-1]
341 			SCORE MPrev_j = *ptrMPrev_j;
342 			SCORE INew = MPrev_j + GapOpenB[j];
343 			if (INew > IPrev_j_1)
344 				{
345 				IPrev_j_1 = INew;
346 				uInsertPos = j;
347 				}
348 
349 			SCORE scoreMax = MPrev_j;
350 
351 			assert(ptrDPrev == &(DPrev[j-1]));
352 			SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;
353 			if (scoreD > scoreMax)
354 				{
355 				scoreMax = scoreD;
356 				assert(ptrDeletePos == &(uDeletePos[j-1]));
357 				*ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;
358 				assert(*ptrTraceBack_ij > 0);
359 				}
360 			++ptrDeletePos;
361 
362 			SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];
363 			if (scoreI > scoreMax)
364 				{
365 				scoreMax = scoreI;
366 				*ptrTraceBack_ij = (int) uInsertPos - (int) j;
367 				assert(*ptrTraceBack_ij < 0);
368 				}
369 
370 			assert(ptrSortOrderA == &(SortOrderA[i]));
371 			assert(ptrFreqsA == &(FreqsA[i]));
372 
373 			*ptrMCurr_j += scoreMax;
374 			assert(ptrMCurr_j == &(MCurr[j]));
375 			++ptrMCurr_j;
376 
377 			MPrev_j = *(++ptrMPrev_j);
378 			assert(ptrDPrev == &(DPrev[j]));
379 			SCORE d = *ptrDPrev;
380 			SCORE DNew = MPrev_j + scoreGapOpenAi;
381 			if (DNew > d)
382 				{
383 				d = DNew;
384 				assert(ptrDeletePos == &uDeletePos[j]);
385 				*ptrDeletePos = i;
386 				}
387 			assert(ptrDCurr + 1 == &(DCurr[j]));
388 			*(++ptrDCurr) = d;
389 
390 			++ptrTraceBack_ij;
391 			}
392 
393 		Rotate(MPrev, MCurr, MWork);
394 		Rotate(DPrev, DCurr, DWork);
395 		}
396 
397 // Special case for i=uLengthA
398 	SCORE IPrev = MINUS_INFINITY;
399 
400 	unsigned uInsertPos;
401 
402 	for (unsigned j = 1; j < uLengthB; ++j)
403 		{
404 		SCORE INew = MPrev[j-1] + GapOpenB[j];
405 		if (INew > IPrev)
406 			{
407 			uInsertPos = j;
408 			IPrev = INew;
409 			}
410 		}
411 
412 // Special case for i=uLengthA, j=uLengthB
413 	SCORE scoreMax = MPrev[uLengthB-1];
414 	int iTraceBack = 0;
415 
416 	SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];
417 	if (scoreD > scoreMax)
418 		{
419 		scoreMax = scoreD;
420 		iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];
421 		}
422 
423 	SCORE scoreI = IPrev + GapCloseB[uLengthB-1];
424 	if (scoreI > scoreMax)
425 		{
426 		scoreMax = scoreI;
427 		iTraceBack = (int) uInsertPos - (int) uLengthB;
428 		}
429 
430 	TraceBack[uLengthA][uLengthB] = iTraceBack;
431 
432 	TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);
433 
434 	return scoreMax;
435 	}
436