1 #include "muscle.h"
2 #include "msa.h"
3 #include "pwpath.h"
4 #include "profile.h"
5 
6 #define	TRACE	0
7 
LogPP(const ProfPos & PP)8 static void LogPP(const ProfPos &PP)
9 	{
10 	Log("ResidueGroup   %u\n", PP.m_uResidueGroup);
11 	Log("AllGaps      %d\n", PP.m_bAllGaps);
12 	Log("Occ          %.3g\n", PP.m_fOcc);
13 	Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
14 	Log("Freqs        ");
15 	for (unsigned i = 0; i < 20; ++i)
16 		if (PP.m_fcCounts[i] > 0)
17 			Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
18 	Log("\n");
19 	}
20 
AssertProfPosEq(const ProfPos * PA,const ProfPos * PB,unsigned i)21 static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
22 	{
23 	const ProfPos &PPA = PA[i];
24 	const ProfPos &PPB = PB[i];
25 #define	eq(x)	if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
26 #define be(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
27 	eq(bAllGaps)
28 	eq(uResidueGroup)
29 
30 	be(LL)
31 	be(LG)
32 	be(GL)
33 	be(GG)
34 	be(fOcc)
35 	be(scoreGapOpen)
36 	be(scoreGapClose)
37 
38 	for (unsigned j = 0; j < 20; ++j)
39 		{
40 #define	eqj(x)	if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
41 #define bej(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
42 		bej(fcCounts[j]);
43 //		eqj(uSortOrder[j]) // may differ due to ties, don't check?
44 		bej(AAScores[j])
45 #undef eqj
46 #undef bej
47 		}
48 #undef eq
49 #undef be
50 	}
51 
AssertProfsEq(const ProfPos * PA,unsigned uLengthA,const ProfPos * PB,unsigned uLengthB)52 void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
53   unsigned uLengthB)
54 	{
55 	if (uLengthA != uLengthB)
56 		Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
57 	for (unsigned i = 0; i < uLengthB; ++i)
58 		AssertProfPosEq(PA, PB, i);
59 	}
60 
61 #if	DEBUG
ValidateProf(const ProfPos * Prof,unsigned uLength)62 static void ValidateProf(const ProfPos *Prof, unsigned uLength)
63 	{
64 	for (unsigned i = 0; i < uLength; ++i)
65 		{
66 		const ProfPos &PP = Prof[i];
67 
68 		FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
69 		assert(BTEq(s1, 1.0));
70 
71 		if (i > 0)
72 			{
73 			const ProfPos &PPPrev = Prof[i-1];
74 			FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
75 			FCOUNT s3 = PP.m_LL + PP.m_LG;
76 			assert(BTEq(s2, s3));
77 			}
78 		if (i < uLength - 1)
79 			{
80 			const ProfPos &PPNext = Prof[i+1];
81 			FCOUNT s4 = PP.m_LL + PP.m_GL;
82 			FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
83 			assert(BTEq(s4, s5));
84 			}
85 		}
86 	}
87 #else
88 #define ValidateProf(Prof, Length)	/* empty */
89 #endif
90 
ScoresFromFreqsPos(ProfPos * Prof,unsigned uLength,unsigned uPos)91 static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
92 	{
93 	ProfPos &PP = Prof[uPos];
94 	SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
95 	PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
96 
97 // "Occupancy"
98 	PP.m_fOcc = PP.m_LL + PP.m_GL;
99 
100 // Frequency of gap-opens in this position (i)
101 // Gap open 	= letter in i-1 and gap in i
102 //				= iff LG in i
103 	FCOUNT fcOpen = PP.m_LG;
104 
105 // Frequency of gap-closes in this position
106 // Gap close	= gap in i and letter in i+1
107 //				= iff GL in i+1
108 	FCOUNT fcClose;
109 	if (uPos + 1 < uLength)
110 		fcClose = Prof[uPos + 1].m_GL;
111 	else
112 		fcClose = PP.m_GG + PP.m_LG;
113 
114 	PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
115 	PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
116 #if	DOUBLE_AFFINE
117 	PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
118 	PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
119 #endif
120 
121 	for (unsigned i = 0; i < g_AlphaSize; ++i)
122 		{
123 		SCORE scoreSum = 0;
124 		for (unsigned j = 0; j < g_AlphaSize; ++j)
125 			scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
126 		PP.m_AAScores[i] = scoreSum;
127 		}
128 	}
129 
ProfScoresFromFreqs(ProfPos * Prof,unsigned uLength)130 void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
131 	{
132 	for (unsigned i = 0; i < uLength; ++i)
133 		ScoresFromFreqsPos(Prof, uLength, i);
134 	}
135 
AppendDelete(const MSA & msaA,unsigned & uColIndexA,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)136 static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
137   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
138   unsigned &uColIndexCombined)
139 	{
140 #if	TRACE
141 	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
142 	  uColIndexA, uColIndexCombined);
143 #endif
144 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
145 		{
146 		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
147 		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
148 		}
149 
150 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
151 		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
152 
153 	++uColIndexCombined;
154 	++uColIndexA;
155 	}
156 
AppendInsert(const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)157 static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
158   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
159   unsigned &uColIndexCombined)
160 	{
161 #if	TRACE
162 	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
163 	  uColIndexB, uColIndexCombined);
164 #endif
165 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
166 		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
167 
168 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
169 		{
170 		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
171 		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
172 		}
173 
174 	++uColIndexCombined;
175 	++uColIndexB;
176 	}
177 
AppendTplInserts(const MSA & msaA,unsigned & uColIndexA,unsigned uColCountA,const MSA & msaB,unsigned & uColIndexB,unsigned uColCountB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)178 static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
179   const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
180   unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
181 	{
182 #if	TRACE
183 	Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
184 	  uColIndexA, uColIndexB, uColIndexCombined);
185 #endif
186 	const unsigned uLengthA = msaA.GetColCount();
187 	const unsigned uLengthB = msaB.GetColCount();
188 
189 	unsigned uNewColCount = uColCountA;
190 	if (uColCountB > uNewColCount)
191 		uNewColCount = uColCountB;
192 
193 	for (unsigned n = 0; n < uColCountA; ++n)
194 		{
195 		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
196 			{
197 			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
198 			c = UnalignChar(c);
199 			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
200 			}
201 		}
202 	for (unsigned n = uColCountA; n < uNewColCount; ++n)
203 		{
204 		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
205 			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
206 		}
207 
208 	for (unsigned n = 0; n < uColCountB; ++n)
209 		{
210 		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
211 			{
212 			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
213 			c = UnalignChar(c);
214 			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
215 			}
216 		}
217 	for (unsigned n = uColCountB; n < uNewColCount; ++n)
218 		{
219 		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
220 			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
221 		}
222 
223 	uColIndexCombined += uNewColCount;
224 	uColIndexA += uColCountA;
225 	uColIndexB += uColCountB;
226 	}
227 
AppendMatch(const MSA & msaA,unsigned & uColIndexA,const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)228 static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
229   unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
230   MSA &msaCombined, unsigned &uColIndexCombined)
231 	{
232 #if	TRACE
233 	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
234 	  uColIndexA, uColIndexB, uColIndexCombined);
235 #endif
236 
237 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
238 		{
239 		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
240 		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
241 		}
242 
243 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
244 		{
245 		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
246 		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
247 		}
248 
249 	++uColIndexA;
250 	++uColIndexB;
251 	++uColIndexCombined;
252 	}
253 
AlignTwoMSAsGivenPath(const PWPath & Path,const MSA & msaA,const MSA & msaB,MSA & msaCombined)254 void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
255   MSA &msaCombined)
256 	{
257 	msaCombined.Clear();
258 
259 #if	TRACE
260 	Log("FastAlignProfiles\n");
261 	Log("Template A:\n");
262 	msaA.LogMe();
263 	Log("Template B:\n");
264 	msaB.LogMe();
265 #endif
266 
267 	const unsigned uColCountA = msaA.GetColCount();
268 	const unsigned uColCountB = msaB.GetColCount();
269 
270 	const unsigned uSeqCountA = msaA.GetSeqCount();
271 	const unsigned uSeqCountB = msaB.GetSeqCount();
272 
273 	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
274 
275 // Copy sequence names into combined MSA
276 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
277 		{
278 		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
279 		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
280 		}
281 
282 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
283 		{
284 		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
285 		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
286 		}
287 
288 	unsigned uColIndexA = 0;
289 	unsigned uColIndexB = 0;
290 	unsigned uColIndexCombined = 0;
291 	const unsigned uEdgeCount = Path.GetEdgeCount();
292 	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
293 		{
294 		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
295 #if	TRACE
296 		Log("\nEdge %u %c%u.%u\n",
297 		  uEdgeIndex,
298 		  Edge.cType,
299 		  Edge.uPrefixLengthA,
300 		  Edge.uPrefixLengthB);
301 #endif
302 		const char cType = Edge.cType;
303 		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
304 		unsigned uColCountA = 0;
305 		if (uPrefixLengthA > 0)
306 			{
307 			const unsigned uNodeIndexA = uPrefixLengthA - 1;
308 			const unsigned uTplColIndexA = uNodeIndexA;
309 			if (uTplColIndexA > uColIndexA)
310 				uColCountA = uTplColIndexA - uColIndexA;
311 			}
312 
313 		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
314 		unsigned uColCountB = 0;
315 		if (uPrefixLengthB > 0)
316 			{
317 			const unsigned uNodeIndexB = uPrefixLengthB - 1;
318 			const unsigned uTplColIndexB = uNodeIndexB;
319 			if (uTplColIndexB > uColIndexB)
320 				uColCountB = uTplColIndexB - uColIndexB;
321 			}
322 
323 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
324 		assert(uColCountA == 0);
325 		assert(uColCountB == 0);
326 		AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
327 		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
328 
329 		switch (cType)
330 			{
331 		case 'M':
332 			{
333 			assert(uPrefixLengthA > 0);
334 			assert(uPrefixLengthB > 0);
335 			const unsigned uColA = uPrefixLengthA - 1;
336 			const unsigned uColB = uPrefixLengthB - 1;
337 			assert(uColIndexA == uColA);
338 			assert(uColIndexB == uColB);
339 			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
340 			  msaCombined, uColIndexCombined);
341 			break;
342 			}
343 		case 'D':
344 			{
345 			assert(uPrefixLengthA > 0);
346 			const unsigned uColA = uPrefixLengthA - 1;
347 			assert(uColIndexA == uColA);
348 			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
349 			break;
350 			}
351 		case 'I':
352 			{
353 			assert(uPrefixLengthB > 0);
354 			const unsigned uColB = uPrefixLengthB - 1;
355 			assert(uColIndexB == uColB);
356 			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
357 			break;
358 			}
359 		default:
360 			assert(false);
361 			}
362 		}
363 	unsigned uInsertColCountA = uColCountA - uColIndexA;
364 	unsigned uInsertColCountB = uColCountB - uColIndexB;
365 
366 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
367 	assert(uInsertColCountA == 0);
368 	assert(uInsertColCountB == 0);
369 	AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
370 	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
371 
372 	assert(msaCombined.GetColCount() == uEdgeCount);
373 	}
374 
375 static const ProfPos PPStart =
376 	{
377 	false,		//m_bAllGaps;
378 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
379 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
380 	1.0,	// m_LL;
381 	0.0,	// m_LG;
382 	0.0,	// m_GL;
383 	0.0,	// m_GG;
384 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
385 	0,		// m_uResidueGroup;
386 	1.0,	// m_fOcc;
387 	0.0,	// m_fcStartOcc;
388 	0.0,	// m_fcEndOcc;
389 	0.0,	// m_scoreGapOpen;
390 	0.0,	// m_scoreGapClose;
391 	};
392 
393 // MM
394 //  Ai�1	Ai		Out
395 //  X		X	LL	LL
396 //  X		-	LG	LG
397 //  -		X	GL	GL
398 //  -		-	GG	GG
399 //
400 //  Bj�1	Bj
401 //  X		X	LL	LL
402 //  X		-	LG	LG
403 //  -		X	GL	GL
404 //  -		-	GG	GG
SetGapsMM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)405 static void SetGapsMM(
406   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
407   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
408   ProfPos *POut, unsigned uColIndexOut)
409 	{
410 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
411 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
412 	ProfPos &PPO = POut[uColIndexOut];
413 
414 	PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
415 	PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
416 	PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
417 	PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
418 	}
419 
420 // MD
421 //  Ai�1	Ai		Out
422 //  X		X	LL	LL
423 //  X		-	LG	LG
424 //  -		X	GL	GL
425 //  -		-	GG	GG
426 //
427 //  Bj		(-)
428 //  X		-	?L	LG
429 //  -		-	?G	GG
SetGapsMD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)430 static void SetGapsMD(
431   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
432   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
433   ProfPos *POut, unsigned uColIndexOut)
434 	{
435 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
436 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
437 	ProfPos &PPO = POut[uColIndexOut];
438 
439 	PPO.m_LL = wA*PPA.m_LL;
440 	PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
441 	PPO.m_GL = wA*PPA.m_GL;
442 	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
443 	}
444 
445 // DD
446 //  Ai�1	Ai		Out
447 //  X		X	LL	LL
448 //  X		-	LG	LG
449 //  -		X	GL	GL
450 //  -		-	GG	GG
451 //
452 //  (-)		(-)
453 //  -		-	??	GG
SetGapsDD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)454 static void SetGapsDD(
455   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
456   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
457   ProfPos *POut, unsigned uColIndexOut)
458 	{
459 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
460 	ProfPos &PPO = POut[uColIndexOut];
461 
462 	PPO.m_LL = wA*PPA.m_LL;
463 	PPO.m_LG = wA*PPA.m_LG;
464 	PPO.m_GL = wA*PPA.m_GL;
465 	PPO.m_GG = wA*PPA.m_GG + wB;
466 	}
467 
468 // MI
469 //  Ai		(-)		Out
470 //  X		-	?L	LG
471 //  -		-	?G	GG
472 
473 //  Bj�1	Bj
474 //  X		X	LL	LL
475 //  X		-	LG	LG
476 //  -		X	GL	GL
477 //  -		-	GG	GG
SetGapsMI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)478 static void SetGapsMI(
479   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
480   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
481   ProfPos *POut, unsigned uColIndexOut)
482 	{
483 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
484 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
485 	ProfPos &PPO = POut[uColIndexOut];
486 
487 	PPO.m_LL = wB*PPB.m_LL;
488 	PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
489 	PPO.m_GL = wB*PPB.m_GL;
490 	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
491 	}
492 
493 // DM
494 //  Ai�1	Ai		Out
495 //  X		X	LL	LL
496 //  X		-	LG	LG
497 //  -		X	GL	GL
498 //  -		-	GG	GG
499 //
500 //  (-)		Bj
501 //  -		X		?L	GL
502 //  -		-		?G	GG
SetGapsDM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)503 static void SetGapsDM(
504   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
505   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
506   ProfPos *POut, unsigned uColIndexOut)
507 	{
508 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
509 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
510 	ProfPos &PPO = POut[uColIndexOut];
511 
512 	PPO.m_LL = wA*PPA.m_LL;
513 	PPO.m_LG = wA*PPA.m_LG;
514 	PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
515 	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
516 	}
517 
518 // IM
519 //  (-)		Ai		Out
520 //  -		X	?L	GL
521 //  -		-	?G	GG
522 
523 //  Bj�1	Bj
524 //  X		X	LL	LL
525 //  X		-	LG	LG
526 //  -		X	GL	GL
527 //  -		-	GG	GG
SetGapsIM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)528 static void SetGapsIM(
529   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
530   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
531   ProfPos *POut, unsigned uColIndexOut)
532 	{
533 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
534 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
535 	ProfPos &PPO = POut[uColIndexOut];
536 
537 	PPO.m_LL = wB*PPB.m_LL;
538 	PPO.m_LG = wB*PPB.m_LG;
539 	PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
540 	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
541 	}
542 
543 // ID
544 //  (-)		Ai		Out
545 //  -		X	?L	GL
546 //  -		-	?G	GG
547 
548 //  Bj		(-)
549 //  X		-	?L	LG
550 //  -		-	?G	GG
SetGapsID(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)551 static void SetGapsID(
552   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
553   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
554   ProfPos *POut, unsigned uColIndexOut)
555 	{
556 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
557 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
558 	ProfPos &PPO = POut[uColIndexOut];
559 
560 	PPO.m_LL = 0;
561 	PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
562 	PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
563 	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
564 	}
565 
566 // DI
567 //  Ai		(-)		Out
568 //  X		-	?L	LG
569 //  -		-	?G	GG
570 
571 //  (-)		Bj
572 //  -		X	?L	GL
573 //  -		-	?G	GG
SetGapsDI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)574 static void SetGapsDI(
575   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
576   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
577   ProfPos *POut, unsigned uColIndexOut)
578 	{
579 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
580 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
581 	ProfPos &PPO = POut[uColIndexOut];
582 
583 	PPO.m_LL = 0;
584 	PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
585 	PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
586 	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
587 	}
588 
589 // II
590 //  (-)		(-)		Out
591 //  -		-	??	GG
592 
593 //  Bj�1	Bj
594 //  X		X	LL	LL
595 //  X		-	LG	LG
596 //  -		X	GL	GL
597 //  -		-	GG	GG
SetGapsII(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)598 static void SetGapsII(
599   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
600   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
601   ProfPos *POut, unsigned uColIndexOut)
602 	{
603 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
604 	ProfPos &PPO = POut[uColIndexOut];
605 
606 	PPO.m_LL = wB*PPB.m_LL;
607 	PPO.m_LG = wB*PPB.m_LG;
608 	PPO.m_GL = wB*PPB.m_GL;
609 	PPO.m_GG = wB*PPB.m_GG + wA;
610 	}
611 
SetFreqs(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)612 static void SetFreqs(
613   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
614   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
615   ProfPos *POut, unsigned uColIndexOut)
616 	{
617 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
618 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
619 	ProfPos &PPO = POut[uColIndexOut];
620 
621 	if (g_bNormalizeCounts)
622 		{
623 		const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
624 		const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
625 		FCOUNT fTotal = 0;
626 		for (unsigned i = 0; i < 20; ++i)
627 			{
628 			const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
629 			PPO.m_fcCounts[i] = f;
630 			fTotal += f;
631 			}
632 		if (fTotal > 0)
633 			for (unsigned i = 0; i < 20; ++i)
634 				PPO.m_fcCounts[i] /= fTotal;
635 		}
636 	else
637 		{
638 		for (unsigned i = 0; i < 20; ++i)
639 			PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
640 		}
641 	}
642 
AlignTwoProfsGivenPath(const PWPath & Path,const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos ** ptrPOut,unsigned * ptruLengthOut)643 void AlignTwoProfsGivenPath(const PWPath &Path,
644   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
645   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
646   ProfPos **ptrPOut, unsigned *ptruLengthOut)
647 	{
648 #if	TRACE
649 	Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
650 	Path.LogMe();
651 #endif
652 	assert(BTEq(wA + wB, 1.0));
653 
654 	unsigned uColIndexA = 0;
655 	unsigned uColIndexB = 0;
656 	unsigned uColIndexOut = 0;
657 	const unsigned uEdgeCount = Path.GetEdgeCount();
658 	ProfPos *POut = new ProfPos[uEdgeCount];
659 	char cPrevType = 'M';
660 	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
661 		{
662 		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
663 		const char cType = Edge.cType;
664 
665 		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
666 		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
667 
668 #if	TRACE
669 		Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
670 		  uEdgeIndex,
671 		  Edge.cType,
672 		  Edge.uPrefixLengthA,
673 		  Edge.uPrefixLengthB,
674 		  uColIndexA,
675 		  uColIndexB);
676 #endif
677 
678 		POut[uColIndexOut].m_bAllGaps = false;
679 		switch (cType)
680 			{
681 		case 'M':
682 			{
683 			assert(uPrefixLengthA > 0);
684 			assert(uPrefixLengthB > 0);
685 			SetFreqs(
686 			  PA, uPrefixLengthA, wA,
687 			  PB, uPrefixLengthB, wB,
688 			  POut, uColIndexOut);
689 			switch (cPrevType)
690 				{
691 			case 'M':
692 				SetGapsMM(
693 				  PA, uPrefixLengthA, wA,
694 				  PB, uPrefixLengthB, wB,
695 				  POut, uColIndexOut);
696 			  break;
697 			case 'D':
698 				SetGapsDM(
699 				  PA, uPrefixLengthA, wA,
700 				  PB, uPrefixLengthB, wB,
701 				  POut, uColIndexOut);
702 				break;
703 			case 'I':
704 				SetGapsIM(
705 				  PA, uPrefixLengthA, wA,
706 				  PB, uPrefixLengthB, wB,
707 				  POut, uColIndexOut);
708 				break;
709 			default:
710 				Quit("Bad cPrevType");
711 				}
712 			++uColIndexA;
713 			++uColIndexB;
714 			++uColIndexOut;
715 			break;
716 			}
717 		case 'D':
718 			{
719 			assert(uPrefixLengthA > 0);
720 			SetFreqs(
721 			  PA, uPrefixLengthA, wA,
722 			  PB, uPrefixLengthB, 0,
723 			  POut, uColIndexOut);
724 			switch (cPrevType)
725 				{
726 			case 'M':
727 				SetGapsMD(
728 				  PA, uPrefixLengthA, wA,
729 				  PB, uPrefixLengthB, wB,
730 				  POut, uColIndexOut);
731 			  break;
732 			case 'D':
733 				SetGapsDD(
734 				  PA, uPrefixLengthA, wA,
735 				  PB, uPrefixLengthB, wB,
736 				  POut, uColIndexOut);
737 				break;
738 			case 'I':
739 				SetGapsID(
740 				  PA, uPrefixLengthA, wA,
741 				  PB, uPrefixLengthB, wB,
742 				  POut, uColIndexOut);
743 				break;
744 			default:
745 				Quit("Bad cPrevType");
746 				}
747 			++uColIndexA;
748 			++uColIndexOut;
749 			break;
750 			}
751 		case 'I':
752 			{
753 			assert(uPrefixLengthB > 0);
754 			SetFreqs(
755 			  PA, uPrefixLengthA, 0,
756 			  PB, uPrefixLengthB, wB,
757 			  POut, uColIndexOut);
758 			switch (cPrevType)
759 				{
760 			case 'M':
761 				SetGapsMI(
762 				  PA, uPrefixLengthA, wA,
763 				  PB, uPrefixLengthB, wB,
764 				  POut, uColIndexOut);
765 			  break;
766 			case 'D':
767 				SetGapsDI(
768 				  PA, uPrefixLengthA, wA,
769 				  PB, uPrefixLengthB, wB,
770 				  POut, uColIndexOut);
771 				break;
772 			case 'I':
773 				SetGapsII(
774 				  PA, uPrefixLengthA, wA,
775 				  PB, uPrefixLengthB, wB,
776 				  POut, uColIndexOut);
777 				break;
778 			default:
779 				Quit("Bad cPrevType");
780 				}
781 			++uColIndexB;
782 			++uColIndexOut;
783 			break;
784 			}
785 		default:
786 			assert(false);
787 			}
788 		cPrevType = cType;
789 		}
790 	assert(uColIndexOut == uEdgeCount);
791 
792 	ProfScoresFromFreqs(POut, uEdgeCount);
793 	ValidateProf(POut, uEdgeCount);
794 
795 	*ptrPOut = POut;
796 	*ptruLengthOut = uEdgeCount;
797 
798 #if	TRACE
799 	Log("AlignTwoProfsGivenPath:\n");
800 	ListProfile(POut, uEdgeCount, 0);
801 #endif
802 	}
803