1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3 
4 #include "muscle.h"
5 #include "msa.h"
6 #include "pwpath.h"
7 #include "profile.h"
8 #include "muscle_context.h"
9 
10 //static void LogPP(const ProfPos &PP)
11 //	{
12 //    char *g_LetterToChar = getMuscleContext()->alpha.g_LetterToChar;
13 //
14 //	Log("ResidueGroup   %u\n", PP.m_uResidueGroup);
15 //	Log("AllGaps      %d\n", PP.m_bAllGaps);
16 //	Log("Occ          %.3g\n", PP.m_fOcc);
17 //	Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
18 //	Log("Freqs        ");
19 //	for (unsigned i = 0; i < 20; ++i)
20 //		if (PP.m_fcCounts[i] > 0)
21 //			Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
22 //	Log("\n");
23 //	}
24 
AssertProfPosEq(const ProfPos * PA,const ProfPos * PB,unsigned i)25 static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
26 	{
27 	const ProfPos &PPA = PA[i];
28 	const ProfPos &PPB = PB[i];
29 #define	eq(x)	if (PPA.m_##x != PPB.m_##x) { /*LogPP(PPA); LogPP(PPB);*/ Quit("AssertProfPosEq." #x); }
30 #define be(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) { /*LogPP(PPA); LogPP(PPB);*/ Quit("AssertProfPosEq." #x); }
31 	eq(bAllGaps)
32 	eq(uResidueGroup)
33 
34 	be(LL)
35 	be(LG)
36 	be(GL)
37 	be(GG)
38 	be(fOcc)
39 	be(scoreGapOpen)
40 	be(scoreGapClose)
41 
42 	for (unsigned j = 0; j < 20; ++j)
43 		{
44 #define	eqj(x)	if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
45 #define bej(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
46 		bej(fcCounts[j]);
47 //		eqj(uSortOrder[j]) // may differ due to ties, don't check?
48 		bej(AAScores[j])
49 #undef eqj
50 #undef bej
51 		}
52 #undef eq
53 #undef be
54 	}
55 
AssertProfsEq(const ProfPos * PA,unsigned uLengthA,const ProfPos * PB,unsigned uLengthB)56 void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
57   unsigned uLengthB)
58 	{
59 	if (uLengthA != uLengthB)
60 		Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
61 	for (unsigned i = 0; i < uLengthB; ++i)
62 		AssertProfPosEq(PA, PB, i);
63 	}
64 
65 #if	DEBUG
ValidateProf(const ProfPos * Prof,unsigned uLength)66 static void ValidateProf(const ProfPos *Prof, unsigned uLength)
67 	{
68 	for (unsigned i = 0; i < uLength; ++i)
69 		{
70 		const ProfPos &PP = Prof[i];
71 
72 		FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
73 		assert(BTEq(s1, 1.0));
74 
75 		if (i > 0)
76 			{
77 			const ProfPos &PPPrev = Prof[i-1];
78 			FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
79 			FCOUNT s3 = PP.m_LL + PP.m_LG;
80 			assert(BTEq(s2, s3));
81 			}
82 		if (i < uLength - 1)
83 			{
84 			const ProfPos &PPNext = Prof[i+1];
85 			FCOUNT s4 = PP.m_LL + PP.m_GL;
86 			FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
87 			assert(BTEq(s4, s5));
88 			}
89 		}
90 	}
91 #else
92 #define ValidateProf(Prof, Length)	/* empty */
93 #endif
94 
ScoresFromFreqsPos(ProfPos * Prof,unsigned uLength,unsigned uPos)95 static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
96 	{
97     MuscleContext *ctx = getMuscleContext();
98     unsigned &g_AlphaSize = ctx->alpha.g_AlphaSize;
99     PTR_SCOREMATRIX &g_ptrScoreMatrix = ctx->params.g_ptrScoreMatrix;
100     SCORE &g_scoreGapOpen = ctx->params.g_scoreGapOpen;
101     //SCORE &g_scoreGapExtend = ctx->params.g_scoreGapExtend;
102     //SCORE &g_scoreGapOpen2 = ctx->params.g_scoreGapOpen2;
103     //SCORE &g_scoreGapExtend2 = ctx->params.g_scoreGapExtend2;
104 
105 	ProfPos &PP = Prof[uPos];
106 	SortCounts(PP.m_fcCounts, PP.m_uSortOrder, g_AlphaSize);
107 	PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
108 
109 // "Occupancy"
110 	PP.m_fOcc = PP.m_LL + PP.m_GL;
111 
112 // Frequency of gap-opens in this position (i)
113 // Gap open 	= letter in i-1 and gap in i
114 //				= iff LG in i
115 	FCOUNT fcOpen = PP.m_LG;
116 
117 // Frequency of gap-closes in this position
118 // Gap close	= gap in i and letter in i+1
119 //				= iff GL in i+1
120 	FCOUNT fcClose;
121 	if (uPos + 1 < uLength)
122 		fcClose = Prof[uPos + 1].m_GL;
123 	else
124 		fcClose = PP.m_GG + PP.m_LG;
125 
126 	PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
127 	PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
128 #if	DOUBLE_AFFINE
129 	PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
130 	PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
131 #endif
132 
133 	for (unsigned i = 0; i < g_AlphaSize; ++i)
134 		{
135 		SCORE scoreSum = 0;
136 		for (unsigned j = 0; j < g_AlphaSize; ++j)
137 			scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
138 		PP.m_AAScores[i] = scoreSum;
139 		}
140 	}
141 
ProfScoresFromFreqs(ProfPos * Prof,unsigned uLength)142 void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
143 	{
144 	for (unsigned i = 0; i < uLength; ++i)
145 		ScoresFromFreqsPos(Prof, uLength, i);
146 	}
147 
AppendDelete(const MSA & msaA,unsigned & uColIndexA,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)148 static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
149   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
150   unsigned &uColIndexCombined)
151 	{
152 #if	TRACE
153 	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
154 	  uColIndexA, uColIndexCombined);
155 #endif
156 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
157 		{
158 		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
159 		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
160 		}
161 
162 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
163 		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
164 
165 	++uColIndexCombined;
166 	++uColIndexA;
167 	}
168 
AppendInsert(const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)169 static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
170   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
171   unsigned &uColIndexCombined)
172 	{
173 #if	TRACE
174 	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
175 	  uColIndexB, uColIndexCombined);
176 #endif
177 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
178 		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
179 
180 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
181 		{
182 		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
183 		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
184 		}
185 
186 	++uColIndexCombined;
187 	++uColIndexB;
188 	}
189 
AppendTplInserts(const MSA & msaA,unsigned & uColIndexA,unsigned uColCountA,const MSA & msaB,unsigned & uColIndexB,unsigned uColCountB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)190 static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
191   const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
192   unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
193 	{
194     MuscleContext *ctx = getMuscleContext();
195     char *g_UnalignChar = ctx->alpha.g_UnalignChar;
196 #if	TRACE
197 	Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
198 	  uColIndexA, uColIndexB, uColIndexCombined);
199 #endif
200 //	const unsigned uLengthA = msaA.GetColCount();
201 //	const unsigned uLengthB = msaB.GetColCount();
202 
203 	unsigned uNewColCount = uColCountA;
204 	if (uColCountB > uNewColCount)
205 		uNewColCount = uColCountB;
206 
207 	for (unsigned n = 0; n < uColCountA; ++n)
208 		{
209 		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
210 			{
211 			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
212 			c = UnalignChar(c);
213 			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
214 			}
215 		}
216 	for (unsigned n = uColCountA; n < uNewColCount; ++n)
217 		{
218 		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
219 			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
220 		}
221 
222 	for (unsigned n = 0; n < uColCountB; ++n)
223 		{
224 		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
225 			{
226 			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
227 			c = UnalignChar(c);
228 			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
229 			}
230 		}
231 	for (unsigned n = uColCountB; n < uNewColCount; ++n)
232 		{
233 		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
234 			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
235 		}
236 
237 	uColIndexCombined += uNewColCount;
238 	uColIndexA += uColCountA;
239 	uColIndexB += uColCountB;
240 	}
241 
AppendMatch(const MSA & msaA,unsigned & uColIndexA,const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)242 static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
243   unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
244   MSA &msaCombined, unsigned &uColIndexCombined)
245 	{
246 #if	TRACE
247 	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
248 	  uColIndexA, uColIndexB, uColIndexCombined);
249 #endif
250 
251 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
252 		{
253 		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
254 		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
255 		}
256 
257 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
258 		{
259 		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
260 		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
261 		}
262 
263 	++uColIndexA;
264 	++uColIndexB;
265 	++uColIndexCombined;
266 	}
267 
AlignTwoMSAsGivenPath(const PWPath & Path,const MSA & msaA,const MSA & msaB,MSA & msaCombined)268 void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
269   MSA &msaCombined)
270 	{
271 	msaCombined.Clear();
272 
273 #if	TRACE
274 	Log("FastAlignProfiles\n");
275 	Log("Template A:\n");
276 	msaA.LogMe();
277 	Log("Template B:\n");
278 	msaB.LogMe();
279 #endif
280 
281 	const unsigned uColCountA = msaA.GetColCount();
282 	const unsigned uColCountB = msaB.GetColCount();
283 
284 	const unsigned uSeqCountA = msaA.GetSeqCount();
285 	const unsigned uSeqCountB = msaB.GetSeqCount();
286 
287 	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
288 
289 // Copy sequence names into combined MSA
290 	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
291 		{
292 		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
293 		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
294 		}
295 
296 	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
297 		{
298 		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
299 		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
300 		}
301 
302 	unsigned uColIndexA = 0;
303 	unsigned uColIndexB = 0;
304 	unsigned uColIndexCombined = 0;
305 	const unsigned uEdgeCount = Path.GetEdgeCount();
306 	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
307 		{
308 		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
309 #if	TRACE
310 		Log("\nEdge %u %c%u.%u\n",
311 		  uEdgeIndex,
312 		  Edge.cType,
313 		  Edge.uPrefixLengthA,
314 		  Edge.uPrefixLengthB);
315 #endif
316 		const char cType = Edge.cType;
317 		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
318 		unsigned uColCountA = 0;
319 		if (uPrefixLengthA > 0)
320 			{
321 			const unsigned uNodeIndexA = uPrefixLengthA - 1;
322 			const unsigned uTplColIndexA = uNodeIndexA;
323 			if (uTplColIndexA > uColIndexA)
324 				uColCountA = uTplColIndexA - uColIndexA;
325 			}
326 
327 		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
328 		unsigned uColCountB = 0;
329 		if (uPrefixLengthB > 0)
330 			{
331 			const unsigned uNodeIndexB = uPrefixLengthB - 1;
332 			const unsigned uTplColIndexB = uNodeIndexB;
333 			if (uTplColIndexB > uColIndexB)
334 				uColCountB = uTplColIndexB - uColIndexB;
335 			}
336 
337 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
338 		assert(uColCountA == 0);
339 		assert(uColCountB == 0);
340 		AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
341 		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
342 
343 		switch (cType)
344 			{
345 		case 'M':
346 			{
347 			assert(uPrefixLengthA > 0);
348 			assert(uPrefixLengthB > 0);
349 			const unsigned uColA = uPrefixLengthA - 1;
350 			const unsigned uColB = uPrefixLengthB - 1;
351 			assert(uColIndexA == uColA);
352 			assert(uColIndexB == uColB);
353 			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
354 			  msaCombined, uColIndexCombined);
355 			break;
356 			}
357 		case 'D':
358 			{
359 			assert(uPrefixLengthA > 0);
360 			const unsigned uColA = uPrefixLengthA - 1;
361 			assert(uColIndexA == uColA);
362 			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
363 			break;
364 			}
365 		case 'I':
366 			{
367 			assert(uPrefixLengthB > 0);
368 			const unsigned uColB = uPrefixLengthB - 1;
369 			assert(uColIndexB == uColB);
370 			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
371 			break;
372 			}
373 		default:
374 			assert(false);
375 			}
376 		}
377 	unsigned uInsertColCountA = uColCountA - uColIndexA;
378 	unsigned uInsertColCountB = uColCountB - uColIndexB;
379 
380 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
381 	assert(uInsertColCountA == 0);
382 	assert(uInsertColCountB == 0);
383 	AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
384 	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
385 
386 	assert(msaCombined.GetColCount() == uEdgeCount);
387 	}
388 
389 static const ProfPos PPStart =
390 	{
391 	false,		//m_bAllGaps;
392 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
393 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
394 	1.0,	// m_LL;
395 	0.0,	// m_LG;
396 	0.0,	// m_GL;
397 	0.0,	// m_GG;
398 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
399 	0,		// m_uResidueGroup;
400 	1.0,	// m_fOcc;
401 	0.0,	// m_fcStartOcc;
402 	0.0,	// m_fcEndOcc;
403 	0.0,	// m_scoreGapOpen;
404 	0.0,	// m_scoreGapClose;
405 	};
406 
407 // MM
408 //  Ai�1	Ai		Out
409 //  X		X	LL	LL
410 //  X		-	LG	LG
411 //  -		X	GL	GL
412 //  -		-	GG	GG
413 //
414 //  Bj�1	Bj
415 //  X		X	LL	LL
416 //  X		-	LG	LG
417 //  -		X	GL	GL
418 //  -		-	GG	GG
SetGapsMM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)419 static void SetGapsMM(
420   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
421   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
422   ProfPos *POut, unsigned uColIndexOut)
423 	{
424 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
425 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
426 	ProfPos &PPO = POut[uColIndexOut];
427 
428 	PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
429 	PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
430 	PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
431 	PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
432 	}
433 
434 // MD
435 //  Ai�1	Ai		Out
436 //  X		X	LL	LL
437 //  X		-	LG	LG
438 //  -		X	GL	GL
439 //  -		-	GG	GG
440 //
441 //  Bj		(-)
442 //  X		-	?L	LG
443 //  -		-	?G	GG
SetGapsMD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)444 static void SetGapsMD(
445   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
446   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
447   ProfPos *POut, unsigned uColIndexOut)
448 	{
449 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
450 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
451 	ProfPos &PPO = POut[uColIndexOut];
452 
453 	PPO.m_LL = wA*PPA.m_LL;
454 	PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
455 	PPO.m_GL = wA*PPA.m_GL;
456 	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
457 	}
458 
459 // DD
460 //  Ai�1	Ai		Out
461 //  X		X	LL	LL
462 //  X		-	LG	LG
463 //  -		X	GL	GL
464 //  -		-	GG	GG
465 //
466 //  (-)		(-)
467 //  -		-	??	GG
SetGapsDD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)468 static void SetGapsDD(
469   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
470   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
471   ProfPos *POut, unsigned uColIndexOut)
472 	{
473 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
474 	ProfPos &PPO = POut[uColIndexOut];
475 
476 	PPO.m_LL = wA*PPA.m_LL;
477 	PPO.m_LG = wA*PPA.m_LG;
478 	PPO.m_GL = wA*PPA.m_GL;
479 	PPO.m_GG = wA*PPA.m_GG + wB;
480 	}
481 
482 // MI
483 //  Ai		(-)		Out
484 //  X		-	?L	LG
485 //  -		-	?G	GG
486 
487 //  Bj�1	Bj
488 //  X		X	LL	LL
489 //  X		-	LG	LG
490 //  -		X	GL	GL
491 //  -		-	GG	GG
SetGapsMI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)492 static void SetGapsMI(
493   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
494   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
495   ProfPos *POut, unsigned uColIndexOut)
496 	{
497 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
498 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
499 	ProfPos &PPO = POut[uColIndexOut];
500 
501 	PPO.m_LL = wB*PPB.m_LL;
502 	PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
503 	PPO.m_GL = wB*PPB.m_GL;
504 	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
505 	}
506 
507 // DM
508 //  Ai�1	Ai		Out
509 //  X		X	LL	LL
510 //  X		-	LG	LG
511 //  -		X	GL	GL
512 //  -		-	GG	GG
513 //
514 //  (-)		Bj
515 //  -		X		?L	GL
516 //  -		-		?G	GG
SetGapsDM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)517 static void SetGapsDM(
518   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
519   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
520   ProfPos *POut, unsigned uColIndexOut)
521 	{
522 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
523 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
524 	ProfPos &PPO = POut[uColIndexOut];
525 
526 	PPO.m_LL = wA*PPA.m_LL;
527 	PPO.m_LG = wA*PPA.m_LG;
528 	PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
529 	PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
530 	}
531 
532 // IM
533 //  (-)		Ai		Out
534 //  -		X	?L	GL
535 //  -		-	?G	GG
536 
537 //  Bj�1	Bj
538 //  X		X	LL	LL
539 //  X		-	LG	LG
540 //  -		X	GL	GL
541 //  -		-	GG	GG
SetGapsIM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)542 static void SetGapsIM(
543   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
544   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
545   ProfPos *POut, unsigned uColIndexOut)
546 	{
547 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
548 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
549 	ProfPos &PPO = POut[uColIndexOut];
550 
551 	PPO.m_LL = wB*PPB.m_LL;
552 	PPO.m_LG = wB*PPB.m_LG;
553 	PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
554 	PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
555 	}
556 
557 // ID
558 //  (-)		Ai		Out
559 //  -		X	?L	GL
560 //  -		-	?G	GG
561 
562 //  Bj		(-)
563 //  X		-	?L	LG
564 //  -		-	?G	GG
SetGapsID(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)565 static void SetGapsID(
566   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
567   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
568   ProfPos *POut, unsigned uColIndexOut)
569 	{
570 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
571 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
572 	ProfPos &PPO = POut[uColIndexOut];
573 
574 	PPO.m_LL = 0;
575 	PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
576 	PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
577 	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
578 	}
579 
580 // DI
581 //  Ai		(-)		Out
582 //  X		-	?L	LG
583 //  -		-	?G	GG
584 
585 //  (-)		Bj
586 //  -		X	?L	GL
587 //  -		-	?G	GG
SetGapsDI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)588 static void SetGapsDI(
589   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
590   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
591   ProfPos *POut, unsigned uColIndexOut)
592 	{
593 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
594 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
595 	ProfPos &PPO = POut[uColIndexOut];
596 
597 	PPO.m_LL = 0;
598 	PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
599 	PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
600 	PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
601 	}
602 
603 // II
604 //  (-)		(-)		Out
605 //  -		-	??	GG
606 
607 //  Bj�1	Bj
608 //  X		X	LL	LL
609 //  X		-	LG	LG
610 //  -		X	GL	GL
611 //  -		-	GG	GG
SetGapsII(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)612 static void SetGapsII(
613   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
614   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
615   ProfPos *POut, unsigned uColIndexOut)
616 	{
617 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
618 	ProfPos &PPO = POut[uColIndexOut];
619 
620 	PPO.m_LL = wB*PPB.m_LL;
621 	PPO.m_LG = wB*PPB.m_LG;
622 	PPO.m_GL = wB*PPB.m_GL;
623 	PPO.m_GG = wB*PPB.m_GG + wA;
624 	}
625 
SetFreqs(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)626 static void SetFreqs(
627   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
628   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
629   ProfPos *POut, unsigned uColIndexOut)
630 	{
631 	const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
632 	const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
633 	ProfPos &PPO = POut[uColIndexOut];
634 
635 	if (getMuscleContext()->params.g_bNormalizeCounts)
636 		{
637 		const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
638 		const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
639 		FCOUNT fTotal = 0;
640 		for (unsigned i = 0; i < 20; ++i)
641 			{
642 			const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
643 			PPO.m_fcCounts[i] = f;
644 			fTotal += f;
645 			}
646 		if (fTotal > 0)
647 			for (unsigned i = 0; i < 20; ++i)
648 				PPO.m_fcCounts[i] /= fTotal;
649 		}
650 	else
651 		{
652 		for (unsigned i = 0; i < 20; ++i)
653 			PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
654 		}
655 	}
656 
AlignTwoProfsGivenPath(const PWPath & Path,const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos ** ptrPOut,unsigned * ptruLengthOut)657 void AlignTwoProfsGivenPath(const PWPath &Path,
658   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
659   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
660   ProfPos **ptrPOut, unsigned *ptruLengthOut)
661 	{
662 #if	TRACE
663 	Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
664 	Path.LogMe();
665 #endif
666 	assert(BTEq(wA + wB, 1.0));
667 
668 	unsigned uColIndexA = 0;
669 	unsigned uColIndexB = 0;
670 	unsigned uColIndexOut = 0;
671 	const unsigned uEdgeCount = Path.GetEdgeCount();
672 	ProfPos *POut = new ProfPos[uEdgeCount];
673 	char cPrevType = 'M';
674 	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
675 		{
676 		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
677 		const char cType = Edge.cType;
678 
679 		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
680 		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
681 
682 #if	TRACE
683 		Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
684 		  uEdgeIndex,
685 		  Edge.cType,
686 		  Edge.uPrefixLengthA,
687 		  Edge.uPrefixLengthB,
688 		  uColIndexA,
689 		  uColIndexB);
690 #endif
691 
692 		POut[uColIndexOut].m_bAllGaps = false;
693 		switch (cType)
694 			{
695 		case 'M':
696 			{
697 			assert(uPrefixLengthA > 0);
698 			assert(uPrefixLengthB > 0);
699 			SetFreqs(
700 			  PA, uPrefixLengthA, wA,
701 			  PB, uPrefixLengthB, wB,
702 			  POut, uColIndexOut);
703 			switch (cPrevType)
704 				{
705 			case 'M':
706 				SetGapsMM(
707 				  PA, uPrefixLengthA, wA,
708 				  PB, uPrefixLengthB, wB,
709 				  POut, uColIndexOut);
710 			  break;
711 			case 'D':
712 				SetGapsDM(
713 				  PA, uPrefixLengthA, wA,
714 				  PB, uPrefixLengthB, wB,
715 				  POut, uColIndexOut);
716 				break;
717 			case 'I':
718 				SetGapsIM(
719 				  PA, uPrefixLengthA, wA,
720 				  PB, uPrefixLengthB, wB,
721 				  POut, uColIndexOut);
722 				break;
723 			default:
724 				Quit("Bad cPrevType");
725 				}
726 			++uColIndexA;
727 			++uColIndexB;
728 			++uColIndexOut;
729 			break;
730 			}
731 		case 'D':
732 			{
733 			assert(uPrefixLengthA > 0);
734 			SetFreqs(
735 			  PA, uPrefixLengthA, wA,
736 			  PB, uPrefixLengthB, 0,
737 			  POut, uColIndexOut);
738 			switch (cPrevType)
739 				{
740 			case 'M':
741 				SetGapsMD(
742 				  PA, uPrefixLengthA, wA,
743 				  PB, uPrefixLengthB, wB,
744 				  POut, uColIndexOut);
745 			  break;
746 			case 'D':
747 				SetGapsDD(
748 				  PA, uPrefixLengthA, wA,
749 				  PB, uPrefixLengthB, wB,
750 				  POut, uColIndexOut);
751 				break;
752 			case 'I':
753 				SetGapsID(
754 				  PA, uPrefixLengthA, wA,
755 				  PB, uPrefixLengthB, wB,
756 				  POut, uColIndexOut);
757 				break;
758 			default:
759 				Quit("Bad cPrevType");
760 				}
761 			++uColIndexA;
762 			++uColIndexOut;
763 			break;
764 			}
765 		case 'I':
766 			{
767 			assert(uPrefixLengthB > 0);
768 			SetFreqs(
769 			  PA, uPrefixLengthA, 0,
770 			  PB, uPrefixLengthB, wB,
771 			  POut, uColIndexOut);
772 			switch (cPrevType)
773 				{
774 			case 'M':
775 				SetGapsMI(
776 				  PA, uPrefixLengthA, wA,
777 				  PB, uPrefixLengthB, wB,
778 				  POut, uColIndexOut);
779 			  break;
780 			case 'D':
781 				SetGapsDI(
782 				  PA, uPrefixLengthA, wA,
783 				  PB, uPrefixLengthB, wB,
784 				  POut, uColIndexOut);
785 				break;
786 			case 'I':
787 				SetGapsII(
788 				  PA, uPrefixLengthA, wA,
789 				  PB, uPrefixLengthB, wB,
790 				  POut, uColIndexOut);
791 				break;
792 			default:
793 				Quit("Bad cPrevType");
794 				}
795 			++uColIndexB;
796 			++uColIndexOut;
797 			break;
798 			}
799 		default:
800 			assert(false);
801 			}
802 		cPrevType = cType;
803 		}
804 	assert(uColIndexOut == uEdgeCount);
805 
806 	ProfScoresFromFreqs(POut, uEdgeCount);
807 	ValidateProf(POut, uEdgeCount);
808 
809 	*ptrPOut = POut;
810 	*ptruLengthOut = uEdgeCount;
811 
812 #if	TRACE
813 	Log("AlignTwoProfsGivenPath:\n");
814 	ListProfile(POut, uEdgeCount, 0);
815 #endif
816 	}
817