1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3
4 #include "muscle.h"
5 #include "msa.h"
6 #include "pwpath.h"
7 #include "profile.h"
8 #include "muscle_context.h"
9
10 //static void LogPP(const ProfPos &PP)
11 // {
12 // char *g_LetterToChar = getMuscleContext()->alpha.g_LetterToChar;
13 //
14 // Log("ResidueGroup %u\n", PP.m_uResidueGroup);
15 // Log("AllGaps %d\n", PP.m_bAllGaps);
16 // Log("Occ %.3g\n", PP.m_fOcc);
17 // Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
18 // Log("Freqs ");
19 // for (unsigned i = 0; i < 20; ++i)
20 // if (PP.m_fcCounts[i] > 0)
21 // Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
22 // Log("\n");
23 // }
24
AssertProfPosEq(const ProfPos * PA,const ProfPos * PB,unsigned i)25 static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
26 {
27 const ProfPos &PPA = PA[i];
28 const ProfPos &PPB = PB[i];
29 #define eq(x) if (PPA.m_##x != PPB.m_##x) { /*LogPP(PPA); LogPP(PPB);*/ Quit("AssertProfPosEq." #x); }
30 #define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { /*LogPP(PPA); LogPP(PPB);*/ Quit("AssertProfPosEq." #x); }
31 eq(bAllGaps)
32 eq(uResidueGroup)
33
34 be(LL)
35 be(LG)
36 be(GL)
37 be(GG)
38 be(fOcc)
39 be(scoreGapOpen)
40 be(scoreGapClose)
41
42 for (unsigned j = 0; j < 20; ++j)
43 {
44 #define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
45 #define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
46 bej(fcCounts[j]);
47 // eqj(uSortOrder[j]) // may differ due to ties, don't check?
48 bej(AAScores[j])
49 #undef eqj
50 #undef bej
51 }
52 #undef eq
53 #undef be
54 }
55
AssertProfsEq(const ProfPos * PA,unsigned uLengthA,const ProfPos * PB,unsigned uLengthB)56 void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
57 unsigned uLengthB)
58 {
59 if (uLengthA != uLengthB)
60 Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
61 for (unsigned i = 0; i < uLengthB; ++i)
62 AssertProfPosEq(PA, PB, i);
63 }
64
65 #if DEBUG
ValidateProf(const ProfPos * Prof,unsigned uLength)66 static void ValidateProf(const ProfPos *Prof, unsigned uLength)
67 {
68 for (unsigned i = 0; i < uLength; ++i)
69 {
70 const ProfPos &PP = Prof[i];
71
72 FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
73 assert(BTEq(s1, 1.0));
74
75 if (i > 0)
76 {
77 const ProfPos &PPPrev = Prof[i-1];
78 FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
79 FCOUNT s3 = PP.m_LL + PP.m_LG;
80 assert(BTEq(s2, s3));
81 }
82 if (i < uLength - 1)
83 {
84 const ProfPos &PPNext = Prof[i+1];
85 FCOUNT s4 = PP.m_LL + PP.m_GL;
86 FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
87 assert(BTEq(s4, s5));
88 }
89 }
90 }
91 #else
92 #define ValidateProf(Prof, Length) /* empty */
93 #endif
94
ScoresFromFreqsPos(ProfPos * Prof,unsigned uLength,unsigned uPos)95 static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
96 {
97 MuscleContext *ctx = getMuscleContext();
98 unsigned &g_AlphaSize = ctx->alpha.g_AlphaSize;
99 PTR_SCOREMATRIX &g_ptrScoreMatrix = ctx->params.g_ptrScoreMatrix;
100 SCORE &g_scoreGapOpen = ctx->params.g_scoreGapOpen;
101 //SCORE &g_scoreGapExtend = ctx->params.g_scoreGapExtend;
102 //SCORE &g_scoreGapOpen2 = ctx->params.g_scoreGapOpen2;
103 //SCORE &g_scoreGapExtend2 = ctx->params.g_scoreGapExtend2;
104
105 ProfPos &PP = Prof[uPos];
106 SortCounts(PP.m_fcCounts, PP.m_uSortOrder, g_AlphaSize);
107 PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
108
109 // "Occupancy"
110 PP.m_fOcc = PP.m_LL + PP.m_GL;
111
112 // Frequency of gap-opens in this position (i)
113 // Gap open = letter in i-1 and gap in i
114 // = iff LG in i
115 FCOUNT fcOpen = PP.m_LG;
116
117 // Frequency of gap-closes in this position
118 // Gap close = gap in i and letter in i+1
119 // = iff GL in i+1
120 FCOUNT fcClose;
121 if (uPos + 1 < uLength)
122 fcClose = Prof[uPos + 1].m_GL;
123 else
124 fcClose = PP.m_GG + PP.m_LG;
125
126 PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
127 PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
128 #if DOUBLE_AFFINE
129 PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
130 PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
131 #endif
132
133 for (unsigned i = 0; i < g_AlphaSize; ++i)
134 {
135 SCORE scoreSum = 0;
136 for (unsigned j = 0; j < g_AlphaSize; ++j)
137 scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
138 PP.m_AAScores[i] = scoreSum;
139 }
140 }
141
ProfScoresFromFreqs(ProfPos * Prof,unsigned uLength)142 void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
143 {
144 for (unsigned i = 0; i < uLength; ++i)
145 ScoresFromFreqsPos(Prof, uLength, i);
146 }
147
AppendDelete(const MSA & msaA,unsigned & uColIndexA,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)148 static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
149 unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
150 unsigned &uColIndexCombined)
151 {
152 #if TRACE
153 Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
154 uColIndexA, uColIndexCombined);
155 #endif
156 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
157 {
158 char c = msaA.GetChar(uSeqIndexA, uColIndexA);
159 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
160 }
161
162 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
163 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
164
165 ++uColIndexCombined;
166 ++uColIndexA;
167 }
168
AppendInsert(const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)169 static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
170 unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
171 unsigned &uColIndexCombined)
172 {
173 #if TRACE
174 Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
175 uColIndexB, uColIndexCombined);
176 #endif
177 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
178 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
179
180 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
181 {
182 char c = msaB.GetChar(uSeqIndexB, uColIndexB);
183 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
184 }
185
186 ++uColIndexCombined;
187 ++uColIndexB;
188 }
189
AppendTplInserts(const MSA & msaA,unsigned & uColIndexA,unsigned uColCountA,const MSA & msaB,unsigned & uColIndexB,unsigned uColCountB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)190 static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
191 const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
192 unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
193 {
194 MuscleContext *ctx = getMuscleContext();
195 char *g_UnalignChar = ctx->alpha.g_UnalignChar;
196 #if TRACE
197 Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
198 uColIndexA, uColIndexB, uColIndexCombined);
199 #endif
200 // const unsigned uLengthA = msaA.GetColCount();
201 // const unsigned uLengthB = msaB.GetColCount();
202
203 unsigned uNewColCount = uColCountA;
204 if (uColCountB > uNewColCount)
205 uNewColCount = uColCountB;
206
207 for (unsigned n = 0; n < uColCountA; ++n)
208 {
209 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
210 {
211 char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
212 c = UnalignChar(c);
213 msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
214 }
215 }
216 for (unsigned n = uColCountA; n < uNewColCount; ++n)
217 {
218 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
219 msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
220 }
221
222 for (unsigned n = 0; n < uColCountB; ++n)
223 {
224 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
225 {
226 char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
227 c = UnalignChar(c);
228 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
229 }
230 }
231 for (unsigned n = uColCountB; n < uNewColCount; ++n)
232 {
233 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
234 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
235 }
236
237 uColIndexCombined += uNewColCount;
238 uColIndexA += uColCountA;
239 uColIndexB += uColCountB;
240 }
241
AppendMatch(const MSA & msaA,unsigned & uColIndexA,const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)242 static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
243 unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
244 MSA &msaCombined, unsigned &uColIndexCombined)
245 {
246 #if TRACE
247 Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
248 uColIndexA, uColIndexB, uColIndexCombined);
249 #endif
250
251 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
252 {
253 char c = msaA.GetChar(uSeqIndexA, uColIndexA);
254 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
255 }
256
257 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
258 {
259 char c = msaB.GetChar(uSeqIndexB, uColIndexB);
260 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
261 }
262
263 ++uColIndexA;
264 ++uColIndexB;
265 ++uColIndexCombined;
266 }
267
AlignTwoMSAsGivenPath(const PWPath & Path,const MSA & msaA,const MSA & msaB,MSA & msaCombined)268 void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
269 MSA &msaCombined)
270 {
271 msaCombined.Clear();
272
273 #if TRACE
274 Log("FastAlignProfiles\n");
275 Log("Template A:\n");
276 msaA.LogMe();
277 Log("Template B:\n");
278 msaB.LogMe();
279 #endif
280
281 const unsigned uColCountA = msaA.GetColCount();
282 const unsigned uColCountB = msaB.GetColCount();
283
284 const unsigned uSeqCountA = msaA.GetSeqCount();
285 const unsigned uSeqCountB = msaB.GetSeqCount();
286
287 msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
288
289 // Copy sequence names into combined MSA
290 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
291 {
292 msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
293 msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
294 }
295
296 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
297 {
298 msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
299 msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
300 }
301
302 unsigned uColIndexA = 0;
303 unsigned uColIndexB = 0;
304 unsigned uColIndexCombined = 0;
305 const unsigned uEdgeCount = Path.GetEdgeCount();
306 for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
307 {
308 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
309 #if TRACE
310 Log("\nEdge %u %c%u.%u\n",
311 uEdgeIndex,
312 Edge.cType,
313 Edge.uPrefixLengthA,
314 Edge.uPrefixLengthB);
315 #endif
316 const char cType = Edge.cType;
317 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
318 unsigned uColCountA = 0;
319 if (uPrefixLengthA > 0)
320 {
321 const unsigned uNodeIndexA = uPrefixLengthA - 1;
322 const unsigned uTplColIndexA = uNodeIndexA;
323 if (uTplColIndexA > uColIndexA)
324 uColCountA = uTplColIndexA - uColIndexA;
325 }
326
327 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
328 unsigned uColCountB = 0;
329 if (uPrefixLengthB > 0)
330 {
331 const unsigned uNodeIndexB = uPrefixLengthB - 1;
332 const unsigned uTplColIndexB = uNodeIndexB;
333 if (uTplColIndexB > uColIndexB)
334 uColCountB = uTplColIndexB - uColIndexB;
335 }
336
337 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
338 assert(uColCountA == 0);
339 assert(uColCountB == 0);
340 AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
341 uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
342
343 switch (cType)
344 {
345 case 'M':
346 {
347 assert(uPrefixLengthA > 0);
348 assert(uPrefixLengthB > 0);
349 const unsigned uColA = uPrefixLengthA - 1;
350 const unsigned uColB = uPrefixLengthB - 1;
351 assert(uColIndexA == uColA);
352 assert(uColIndexB == uColB);
353 AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
354 msaCombined, uColIndexCombined);
355 break;
356 }
357 case 'D':
358 {
359 assert(uPrefixLengthA > 0);
360 const unsigned uColA = uPrefixLengthA - 1;
361 assert(uColIndexA == uColA);
362 AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
363 break;
364 }
365 case 'I':
366 {
367 assert(uPrefixLengthB > 0);
368 const unsigned uColB = uPrefixLengthB - 1;
369 assert(uColIndexB == uColB);
370 AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
371 break;
372 }
373 default:
374 assert(false);
375 }
376 }
377 unsigned uInsertColCountA = uColCountA - uColIndexA;
378 unsigned uInsertColCountB = uColCountB - uColIndexB;
379
380 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
381 assert(uInsertColCountA == 0);
382 assert(uInsertColCountB == 0);
383 AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
384 uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
385
386 assert(msaCombined.GetColCount() == uEdgeCount);
387 }
388
389 static const ProfPos PPStart =
390 {
391 false, //m_bAllGaps;
392 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
393 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
394 1.0, // m_LL;
395 0.0, // m_LG;
396 0.0, // m_GL;
397 0.0, // m_GG;
398 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
399 0, // m_uResidueGroup;
400 1.0, // m_fOcc;
401 0.0, // m_fcStartOcc;
402 0.0, // m_fcEndOcc;
403 0.0, // m_scoreGapOpen;
404 0.0, // m_scoreGapClose;
405 };
406
407 // MM
408 // Ai�1 Ai Out
409 // X X LL LL
410 // X - LG LG
411 // - X GL GL
412 // - - GG GG
413 //
414 // Bj�1 Bj
415 // X X LL LL
416 // X - LG LG
417 // - X GL GL
418 // - - GG GG
SetGapsMM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)419 static void SetGapsMM(
420 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
421 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
422 ProfPos *POut, unsigned uColIndexOut)
423 {
424 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
425 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
426 ProfPos &PPO = POut[uColIndexOut];
427
428 PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
429 PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
430 PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
431 PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
432 }
433
434 // MD
435 // Ai�1 Ai Out
436 // X X LL LL
437 // X - LG LG
438 // - X GL GL
439 // - - GG GG
440 //
441 // Bj (-)
442 // X - ?L LG
443 // - - ?G GG
SetGapsMD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)444 static void SetGapsMD(
445 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
446 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
447 ProfPos *POut, unsigned uColIndexOut)
448 {
449 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
450 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
451 ProfPos &PPO = POut[uColIndexOut];
452
453 PPO.m_LL = wA*PPA.m_LL;
454 PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
455 PPO.m_GL = wA*PPA.m_GL;
456 PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
457 }
458
459 // DD
460 // Ai�1 Ai Out
461 // X X LL LL
462 // X - LG LG
463 // - X GL GL
464 // - - GG GG
465 //
466 // (-) (-)
467 // - - ?? GG
SetGapsDD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)468 static void SetGapsDD(
469 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
470 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
471 ProfPos *POut, unsigned uColIndexOut)
472 {
473 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
474 ProfPos &PPO = POut[uColIndexOut];
475
476 PPO.m_LL = wA*PPA.m_LL;
477 PPO.m_LG = wA*PPA.m_LG;
478 PPO.m_GL = wA*PPA.m_GL;
479 PPO.m_GG = wA*PPA.m_GG + wB;
480 }
481
482 // MI
483 // Ai (-) Out
484 // X - ?L LG
485 // - - ?G GG
486
487 // Bj�1 Bj
488 // X X LL LL
489 // X - LG LG
490 // - X GL GL
491 // - - GG GG
SetGapsMI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)492 static void SetGapsMI(
493 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
494 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
495 ProfPos *POut, unsigned uColIndexOut)
496 {
497 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
498 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
499 ProfPos &PPO = POut[uColIndexOut];
500
501 PPO.m_LL = wB*PPB.m_LL;
502 PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
503 PPO.m_GL = wB*PPB.m_GL;
504 PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
505 }
506
507 // DM
508 // Ai�1 Ai Out
509 // X X LL LL
510 // X - LG LG
511 // - X GL GL
512 // - - GG GG
513 //
514 // (-) Bj
515 // - X ?L GL
516 // - - ?G GG
SetGapsDM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)517 static void SetGapsDM(
518 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
519 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
520 ProfPos *POut, unsigned uColIndexOut)
521 {
522 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
523 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
524 ProfPos &PPO = POut[uColIndexOut];
525
526 PPO.m_LL = wA*PPA.m_LL;
527 PPO.m_LG = wA*PPA.m_LG;
528 PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
529 PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
530 }
531
532 // IM
533 // (-) Ai Out
534 // - X ?L GL
535 // - - ?G GG
536
537 // Bj�1 Bj
538 // X X LL LL
539 // X - LG LG
540 // - X GL GL
541 // - - GG GG
SetGapsIM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)542 static void SetGapsIM(
543 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
544 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
545 ProfPos *POut, unsigned uColIndexOut)
546 {
547 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
548 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
549 ProfPos &PPO = POut[uColIndexOut];
550
551 PPO.m_LL = wB*PPB.m_LL;
552 PPO.m_LG = wB*PPB.m_LG;
553 PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
554 PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
555 }
556
557 // ID
558 // (-) Ai Out
559 // - X ?L GL
560 // - - ?G GG
561
562 // Bj (-)
563 // X - ?L LG
564 // - - ?G GG
SetGapsID(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)565 static void SetGapsID(
566 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
567 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
568 ProfPos *POut, unsigned uColIndexOut)
569 {
570 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
571 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
572 ProfPos &PPO = POut[uColIndexOut];
573
574 PPO.m_LL = 0;
575 PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
576 PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
577 PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
578 }
579
580 // DI
581 // Ai (-) Out
582 // X - ?L LG
583 // - - ?G GG
584
585 // (-) Bj
586 // - X ?L GL
587 // - - ?G GG
SetGapsDI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)588 static void SetGapsDI(
589 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
590 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
591 ProfPos *POut, unsigned uColIndexOut)
592 {
593 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
594 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
595 ProfPos &PPO = POut[uColIndexOut];
596
597 PPO.m_LL = 0;
598 PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
599 PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
600 PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
601 }
602
603 // II
604 // (-) (-) Out
605 // - - ?? GG
606
607 // Bj�1 Bj
608 // X X LL LL
609 // X - LG LG
610 // - X GL GL
611 // - - GG GG
SetGapsII(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)612 static void SetGapsII(
613 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
614 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
615 ProfPos *POut, unsigned uColIndexOut)
616 {
617 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
618 ProfPos &PPO = POut[uColIndexOut];
619
620 PPO.m_LL = wB*PPB.m_LL;
621 PPO.m_LG = wB*PPB.m_LG;
622 PPO.m_GL = wB*PPB.m_GL;
623 PPO.m_GG = wB*PPB.m_GG + wA;
624 }
625
SetFreqs(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)626 static void SetFreqs(
627 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
628 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
629 ProfPos *POut, unsigned uColIndexOut)
630 {
631 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
632 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
633 ProfPos &PPO = POut[uColIndexOut];
634
635 if (getMuscleContext()->params.g_bNormalizeCounts)
636 {
637 const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
638 const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
639 FCOUNT fTotal = 0;
640 for (unsigned i = 0; i < 20; ++i)
641 {
642 const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
643 PPO.m_fcCounts[i] = f;
644 fTotal += f;
645 }
646 if (fTotal > 0)
647 for (unsigned i = 0; i < 20; ++i)
648 PPO.m_fcCounts[i] /= fTotal;
649 }
650 else
651 {
652 for (unsigned i = 0; i < 20; ++i)
653 PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
654 }
655 }
656
AlignTwoProfsGivenPath(const PWPath & Path,const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos ** ptrPOut,unsigned * ptruLengthOut)657 void AlignTwoProfsGivenPath(const PWPath &Path,
658 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
659 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
660 ProfPos **ptrPOut, unsigned *ptruLengthOut)
661 {
662 #if TRACE
663 Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
664 Path.LogMe();
665 #endif
666 assert(BTEq(wA + wB, 1.0));
667
668 unsigned uColIndexA = 0;
669 unsigned uColIndexB = 0;
670 unsigned uColIndexOut = 0;
671 const unsigned uEdgeCount = Path.GetEdgeCount();
672 ProfPos *POut = new ProfPos[uEdgeCount];
673 char cPrevType = 'M';
674 for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
675 {
676 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
677 const char cType = Edge.cType;
678
679 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
680 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
681
682 #if TRACE
683 Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
684 uEdgeIndex,
685 Edge.cType,
686 Edge.uPrefixLengthA,
687 Edge.uPrefixLengthB,
688 uColIndexA,
689 uColIndexB);
690 #endif
691
692 POut[uColIndexOut].m_bAllGaps = false;
693 switch (cType)
694 {
695 case 'M':
696 {
697 assert(uPrefixLengthA > 0);
698 assert(uPrefixLengthB > 0);
699 SetFreqs(
700 PA, uPrefixLengthA, wA,
701 PB, uPrefixLengthB, wB,
702 POut, uColIndexOut);
703 switch (cPrevType)
704 {
705 case 'M':
706 SetGapsMM(
707 PA, uPrefixLengthA, wA,
708 PB, uPrefixLengthB, wB,
709 POut, uColIndexOut);
710 break;
711 case 'D':
712 SetGapsDM(
713 PA, uPrefixLengthA, wA,
714 PB, uPrefixLengthB, wB,
715 POut, uColIndexOut);
716 break;
717 case 'I':
718 SetGapsIM(
719 PA, uPrefixLengthA, wA,
720 PB, uPrefixLengthB, wB,
721 POut, uColIndexOut);
722 break;
723 default:
724 Quit("Bad cPrevType");
725 }
726 ++uColIndexA;
727 ++uColIndexB;
728 ++uColIndexOut;
729 break;
730 }
731 case 'D':
732 {
733 assert(uPrefixLengthA > 0);
734 SetFreqs(
735 PA, uPrefixLengthA, wA,
736 PB, uPrefixLengthB, 0,
737 POut, uColIndexOut);
738 switch (cPrevType)
739 {
740 case 'M':
741 SetGapsMD(
742 PA, uPrefixLengthA, wA,
743 PB, uPrefixLengthB, wB,
744 POut, uColIndexOut);
745 break;
746 case 'D':
747 SetGapsDD(
748 PA, uPrefixLengthA, wA,
749 PB, uPrefixLengthB, wB,
750 POut, uColIndexOut);
751 break;
752 case 'I':
753 SetGapsID(
754 PA, uPrefixLengthA, wA,
755 PB, uPrefixLengthB, wB,
756 POut, uColIndexOut);
757 break;
758 default:
759 Quit("Bad cPrevType");
760 }
761 ++uColIndexA;
762 ++uColIndexOut;
763 break;
764 }
765 case 'I':
766 {
767 assert(uPrefixLengthB > 0);
768 SetFreqs(
769 PA, uPrefixLengthA, 0,
770 PB, uPrefixLengthB, wB,
771 POut, uColIndexOut);
772 switch (cPrevType)
773 {
774 case 'M':
775 SetGapsMI(
776 PA, uPrefixLengthA, wA,
777 PB, uPrefixLengthB, wB,
778 POut, uColIndexOut);
779 break;
780 case 'D':
781 SetGapsDI(
782 PA, uPrefixLengthA, wA,
783 PB, uPrefixLengthB, wB,
784 POut, uColIndexOut);
785 break;
786 case 'I':
787 SetGapsII(
788 PA, uPrefixLengthA, wA,
789 PB, uPrefixLengthB, wB,
790 POut, uColIndexOut);
791 break;
792 default:
793 Quit("Bad cPrevType");
794 }
795 ++uColIndexB;
796 ++uColIndexOut;
797 break;
798 }
799 default:
800 assert(false);
801 }
802 cPrevType = cType;
803 }
804 assert(uColIndexOut == uEdgeCount);
805
806 ProfScoresFromFreqs(POut, uEdgeCount);
807 ValidateProf(POut, uEdgeCount);
808
809 *ptrPOut = POut;
810 *ptruLengthOut = uEdgeCount;
811
812 #if TRACE
813 Log("AlignTwoProfsGivenPath:\n");
814 ListProfile(POut, uEdgeCount, 0);
815 #endif
816 }
817