1 #include "muscle.h"
2 #include "msa.h"
3 #include "pwpath.h"
4 #include "profile.h"
5
6 #define TRACE 0
7
LogPP(const ProfPos & PP)8 static void LogPP(const ProfPos &PP)
9 {
10 Log("ResidueGroup %u\n", PP.m_uResidueGroup);
11 Log("AllGaps %d\n", PP.m_bAllGaps);
12 Log("Occ %.3g\n", PP.m_fOcc);
13 Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
14 Log("Freqs ");
15 for (unsigned i = 0; i < 20; ++i)
16 if (PP.m_fcCounts[i] > 0)
17 Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
18 Log("\n");
19 }
20
AssertProfPosEq(const ProfPos * PA,const ProfPos * PB,unsigned i)21 static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
22 {
23 const ProfPos &PPA = PA[i];
24 const ProfPos &PPB = PB[i];
25 #define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
26 #define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
27 eq(bAllGaps)
28 eq(uResidueGroup)
29
30 be(LL)
31 be(LG)
32 be(GL)
33 be(GG)
34 be(fOcc)
35 be(scoreGapOpen)
36 be(scoreGapClose)
37
38 for (unsigned j = 0; j < 20; ++j)
39 {
40 #define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
41 #define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
42 bej(fcCounts[j]);
43 // eqj(uSortOrder[j]) // may differ due to ties, don't check?
44 bej(AAScores[j])
45 #undef eqj
46 #undef bej
47 }
48 #undef eq
49 #undef be
50 }
51
AssertProfsEq(const ProfPos * PA,unsigned uLengthA,const ProfPos * PB,unsigned uLengthB)52 void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
53 unsigned uLengthB)
54 {
55 if (uLengthA != uLengthB)
56 Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
57 for (unsigned i = 0; i < uLengthB; ++i)
58 AssertProfPosEq(PA, PB, i);
59 }
60
61 #if DEBUG
ValidateProf(const ProfPos * Prof,unsigned uLength)62 static void ValidateProf(const ProfPos *Prof, unsigned uLength)
63 {
64 for (unsigned i = 0; i < uLength; ++i)
65 {
66 const ProfPos &PP = Prof[i];
67
68 FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
69 assert(BTEq(s1, 1.0));
70
71 if (i > 0)
72 {
73 const ProfPos &PPPrev = Prof[i-1];
74 FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
75 FCOUNT s3 = PP.m_LL + PP.m_LG;
76 assert(BTEq(s2, s3));
77 }
78 if (i < uLength - 1)
79 {
80 const ProfPos &PPNext = Prof[i+1];
81 FCOUNT s4 = PP.m_LL + PP.m_GL;
82 FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
83 assert(BTEq(s4, s5));
84 }
85 }
86 }
87 #else
88 #define ValidateProf(Prof, Length) /* empty */
89 #endif
90
ScoresFromFreqsPos(ProfPos * Prof,unsigned uLength,unsigned uPos)91 static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
92 {
93 ProfPos &PP = Prof[uPos];
94 SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
95 PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
96
97 // "Occupancy"
98 PP.m_fOcc = PP.m_LL + PP.m_GL;
99
100 // Frequency of gap-opens in this position (i)
101 // Gap open = letter in i-1 and gap in i
102 // = iff LG in i
103 FCOUNT fcOpen = PP.m_LG;
104
105 // Frequency of gap-closes in this position
106 // Gap close = gap in i and letter in i+1
107 // = iff GL in i+1
108 FCOUNT fcClose;
109 if (uPos + 1 < uLength)
110 fcClose = Prof[uPos + 1].m_GL;
111 else
112 fcClose = PP.m_GG + PP.m_LG;
113
114 PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
115 PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
116 #if DOUBLE_AFFINE
117 PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
118 PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
119 #endif
120
121 for (unsigned i = 0; i < g_AlphaSize; ++i)
122 {
123 SCORE scoreSum = 0;
124 for (unsigned j = 0; j < g_AlphaSize; ++j)
125 scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
126 PP.m_AAScores[i] = scoreSum;
127 }
128 }
129
ProfScoresFromFreqs(ProfPos * Prof,unsigned uLength)130 void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
131 {
132 for (unsigned i = 0; i < uLength; ++i)
133 ScoresFromFreqsPos(Prof, uLength, i);
134 }
135
AppendDelete(const MSA & msaA,unsigned & uColIndexA,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)136 static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
137 unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
138 unsigned &uColIndexCombined)
139 {
140 #if TRACE
141 Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
142 uColIndexA, uColIndexCombined);
143 #endif
144 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
145 {
146 char c = msaA.GetChar(uSeqIndexA, uColIndexA);
147 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
148 }
149
150 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
151 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');
152
153 ++uColIndexCombined;
154 ++uColIndexA;
155 }
156
AppendInsert(const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)157 static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
158 unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
159 unsigned &uColIndexCombined)
160 {
161 #if TRACE
162 Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
163 uColIndexB, uColIndexCombined);
164 #endif
165 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
166 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');
167
168 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
169 {
170 char c = msaB.GetChar(uSeqIndexB, uColIndexB);
171 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
172 }
173
174 ++uColIndexCombined;
175 ++uColIndexB;
176 }
177
AppendTplInserts(const MSA & msaA,unsigned & uColIndexA,unsigned uColCountA,const MSA & msaB,unsigned & uColIndexB,unsigned uColCountB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)178 static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
179 const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
180 unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
181 {
182 #if TRACE
183 Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
184 uColIndexA, uColIndexB, uColIndexCombined);
185 #endif
186 const unsigned uLengthA = msaA.GetColCount();
187 const unsigned uLengthB = msaB.GetColCount();
188
189 unsigned uNewColCount = uColCountA;
190 if (uColCountB > uNewColCount)
191 uNewColCount = uColCountB;
192
193 for (unsigned n = 0; n < uColCountA; ++n)
194 {
195 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
196 {
197 char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
198 c = UnalignChar(c);
199 msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
200 }
201 }
202 for (unsigned n = uColCountA; n < uNewColCount; ++n)
203 {
204 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
205 msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
206 }
207
208 for (unsigned n = 0; n < uColCountB; ++n)
209 {
210 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
211 {
212 char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
213 c = UnalignChar(c);
214 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
215 }
216 }
217 for (unsigned n = uColCountB; n < uNewColCount; ++n)
218 {
219 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
220 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
221 }
222
223 uColIndexCombined += uNewColCount;
224 uColIndexA += uColCountA;
225 uColIndexB += uColCountB;
226 }
227
AppendMatch(const MSA & msaA,unsigned & uColIndexA,const MSA & msaB,unsigned & uColIndexB,unsigned uSeqCountA,unsigned uSeqCountB,MSA & msaCombined,unsigned & uColIndexCombined)228 static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
229 unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
230 MSA &msaCombined, unsigned &uColIndexCombined)
231 {
232 #if TRACE
233 Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
234 uColIndexA, uColIndexB, uColIndexCombined);
235 #endif
236
237 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
238 {
239 char c = msaA.GetChar(uSeqIndexA, uColIndexA);
240 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
241 }
242
243 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
244 {
245 char c = msaB.GetChar(uSeqIndexB, uColIndexB);
246 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
247 }
248
249 ++uColIndexA;
250 ++uColIndexB;
251 ++uColIndexCombined;
252 }
253
AlignTwoMSAsGivenPath(const PWPath & Path,const MSA & msaA,const MSA & msaB,MSA & msaCombined)254 void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
255 MSA &msaCombined)
256 {
257 msaCombined.Clear();
258
259 #if TRACE
260 Log("FastAlignProfiles\n");
261 Log("Template A:\n");
262 msaA.LogMe();
263 Log("Template B:\n");
264 msaB.LogMe();
265 #endif
266
267 const unsigned uColCountA = msaA.GetColCount();
268 const unsigned uColCountB = msaB.GetColCount();
269
270 const unsigned uSeqCountA = msaA.GetSeqCount();
271 const unsigned uSeqCountB = msaB.GetSeqCount();
272
273 msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);
274
275 // Copy sequence names into combined MSA
276 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
277 {
278 msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
279 msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
280 }
281
282 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
283 {
284 msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
285 msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
286 }
287
288 unsigned uColIndexA = 0;
289 unsigned uColIndexB = 0;
290 unsigned uColIndexCombined = 0;
291 const unsigned uEdgeCount = Path.GetEdgeCount();
292 for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
293 {
294 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
295 #if TRACE
296 Log("\nEdge %u %c%u.%u\n",
297 uEdgeIndex,
298 Edge.cType,
299 Edge.uPrefixLengthA,
300 Edge.uPrefixLengthB);
301 #endif
302 const char cType = Edge.cType;
303 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
304 unsigned uColCountA = 0;
305 if (uPrefixLengthA > 0)
306 {
307 const unsigned uNodeIndexA = uPrefixLengthA - 1;
308 const unsigned uTplColIndexA = uNodeIndexA;
309 if (uTplColIndexA > uColIndexA)
310 uColCountA = uTplColIndexA - uColIndexA;
311 }
312
313 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
314 unsigned uColCountB = 0;
315 if (uPrefixLengthB > 0)
316 {
317 const unsigned uNodeIndexB = uPrefixLengthB - 1;
318 const unsigned uTplColIndexB = uNodeIndexB;
319 if (uTplColIndexB > uColIndexB)
320 uColCountB = uTplColIndexB - uColIndexB;
321 }
322
323 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
324 assert(uColCountA == 0);
325 assert(uColCountB == 0);
326 AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
327 uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
328
329 switch (cType)
330 {
331 case 'M':
332 {
333 assert(uPrefixLengthA > 0);
334 assert(uPrefixLengthB > 0);
335 const unsigned uColA = uPrefixLengthA - 1;
336 const unsigned uColB = uPrefixLengthB - 1;
337 assert(uColIndexA == uColA);
338 assert(uColIndexB == uColB);
339 AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
340 msaCombined, uColIndexCombined);
341 break;
342 }
343 case 'D':
344 {
345 assert(uPrefixLengthA > 0);
346 const unsigned uColA = uPrefixLengthA - 1;
347 assert(uColIndexA == uColA);
348 AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
349 break;
350 }
351 case 'I':
352 {
353 assert(uPrefixLengthB > 0);
354 const unsigned uColB = uPrefixLengthB - 1;
355 assert(uColIndexB == uColB);
356 AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
357 break;
358 }
359 default:
360 assert(false);
361 }
362 }
363 unsigned uInsertColCountA = uColCountA - uColIndexA;
364 unsigned uInsertColCountB = uColCountB - uColIndexB;
365
366 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?
367 assert(uInsertColCountA == 0);
368 assert(uInsertColCountB == 0);
369 AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
370 uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
371
372 assert(msaCombined.GetColCount() == uEdgeCount);
373 }
374
375 static const ProfPos PPStart =
376 {
377 false, //m_bAllGaps;
378 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
379 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
380 1.0, // m_LL;
381 0.0, // m_LG;
382 0.0, // m_GL;
383 0.0, // m_GG;
384 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
385 0, // m_uResidueGroup;
386 1.0, // m_fOcc;
387 0.0, // m_fcStartOcc;
388 0.0, // m_fcEndOcc;
389 0.0, // m_scoreGapOpen;
390 0.0, // m_scoreGapClose;
391 };
392
393 // MM
394 // Ai�1 Ai Out
395 // X X LL LL
396 // X - LG LG
397 // - X GL GL
398 // - - GG GG
399 //
400 // Bj�1 Bj
401 // X X LL LL
402 // X - LG LG
403 // - X GL GL
404 // - - GG GG
SetGapsMM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)405 static void SetGapsMM(
406 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
407 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
408 ProfPos *POut, unsigned uColIndexOut)
409 {
410 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
411 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
412 ProfPos &PPO = POut[uColIndexOut];
413
414 PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;
415 PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;
416 PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;
417 PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;
418 }
419
420 // MD
421 // Ai�1 Ai Out
422 // X X LL LL
423 // X - LG LG
424 // - X GL GL
425 // - - GG GG
426 //
427 // Bj (-)
428 // X - ?L LG
429 // - - ?G GG
SetGapsMD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)430 static void SetGapsMD(
431 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
432 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
433 ProfPos *POut, unsigned uColIndexOut)
434 {
435 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
436 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
437 ProfPos &PPO = POut[uColIndexOut];
438
439 PPO.m_LL = wA*PPA.m_LL;
440 PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);
441 PPO.m_GL = wA*PPA.m_GL;
442 PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
443 }
444
445 // DD
446 // Ai�1 Ai Out
447 // X X LL LL
448 // X - LG LG
449 // - X GL GL
450 // - - GG GG
451 //
452 // (-) (-)
453 // - - ?? GG
SetGapsDD(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)454 static void SetGapsDD(
455 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
456 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
457 ProfPos *POut, unsigned uColIndexOut)
458 {
459 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
460 ProfPos &PPO = POut[uColIndexOut];
461
462 PPO.m_LL = wA*PPA.m_LL;
463 PPO.m_LG = wA*PPA.m_LG;
464 PPO.m_GL = wA*PPA.m_GL;
465 PPO.m_GG = wA*PPA.m_GG + wB;
466 }
467
468 // MI
469 // Ai (-) Out
470 // X - ?L LG
471 // - - ?G GG
472
473 // Bj�1 Bj
474 // X X LL LL
475 // X - LG LG
476 // - X GL GL
477 // - - GG GG
SetGapsMI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)478 static void SetGapsMI(
479 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
480 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
481 ProfPos *POut, unsigned uColIndexOut)
482 {
483 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
484 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
485 ProfPos &PPO = POut[uColIndexOut];
486
487 PPO.m_LL = wB*PPB.m_LL;
488 PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);
489 PPO.m_GL = wB*PPB.m_GL;
490 PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
491 }
492
493 // DM
494 // Ai�1 Ai Out
495 // X X LL LL
496 // X - LG LG
497 // - X GL GL
498 // - - GG GG
499 //
500 // (-) Bj
501 // - X ?L GL
502 // - - ?G GG
SetGapsDM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)503 static void SetGapsDM(
504 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
505 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
506 ProfPos *POut, unsigned uColIndexOut)
507 {
508 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
509 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
510 ProfPos &PPO = POut[uColIndexOut];
511
512 PPO.m_LL = wA*PPA.m_LL;
513 PPO.m_LG = wA*PPA.m_LG;
514 PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);
515 PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);
516 }
517
518 // IM
519 // (-) Ai Out
520 // - X ?L GL
521 // - - ?G GG
522
523 // Bj�1 Bj
524 // X X LL LL
525 // X - LG LG
526 // - X GL GL
527 // - - GG GG
SetGapsIM(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)528 static void SetGapsIM(
529 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
530 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
531 ProfPos *POut, unsigned uColIndexOut)
532 {
533 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
534 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
535 ProfPos &PPO = POut[uColIndexOut];
536
537 PPO.m_LL = wB*PPB.m_LL;
538 PPO.m_LG = wB*PPB.m_LG;
539 PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);
540 PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);
541 }
542
543 // ID
544 // (-) Ai Out
545 // - X ?L GL
546 // - - ?G GG
547
548 // Bj (-)
549 // X - ?L LG
550 // - - ?G GG
SetGapsID(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)551 static void SetGapsID(
552 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
553 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
554 ProfPos *POut, unsigned uColIndexOut)
555 {
556 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
557 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
558 ProfPos &PPO = POut[uColIndexOut];
559
560 PPO.m_LL = 0;
561 PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;
562 PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;
563 PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
564 }
565
566 // DI
567 // Ai (-) Out
568 // X - ?L LG
569 // - - ?G GG
570
571 // (-) Bj
572 // - X ?L GL
573 // - - ?G GG
SetGapsDI(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)574 static void SetGapsDI(
575 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
576 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
577 ProfPos *POut, unsigned uColIndexOut)
578 {
579 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
580 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
581 ProfPos &PPO = POut[uColIndexOut];
582
583 PPO.m_LL = 0;
584 PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;
585 PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;
586 PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);
587 }
588
589 // II
590 // (-) (-) Out
591 // - - ?? GG
592
593 // Bj�1 Bj
594 // X X LL LL
595 // X - LG LG
596 // - X GL GL
597 // - - GG GG
SetGapsII(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)598 static void SetGapsII(
599 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
600 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
601 ProfPos *POut, unsigned uColIndexOut)
602 {
603 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
604 ProfPos &PPO = POut[uColIndexOut];
605
606 PPO.m_LL = wB*PPB.m_LL;
607 PPO.m_LG = wB*PPB.m_LG;
608 PPO.m_GL = wB*PPB.m_GL;
609 PPO.m_GG = wB*PPB.m_GG + wA;
610 }
611
SetFreqs(const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos * POut,unsigned uColIndexOut)612 static void SetFreqs(
613 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
614 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
615 ProfPos *POut, unsigned uColIndexOut)
616 {
617 const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;
618 const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;
619 ProfPos &PPO = POut[uColIndexOut];
620
621 if (g_bNormalizeCounts)
622 {
623 const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);
624 const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);
625 FCOUNT fTotal = 0;
626 for (unsigned i = 0; i < 20; ++i)
627 {
628 const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];
629 PPO.m_fcCounts[i] = f;
630 fTotal += f;
631 }
632 if (fTotal > 0)
633 for (unsigned i = 0; i < 20; ++i)
634 PPO.m_fcCounts[i] /= fTotal;
635 }
636 else
637 {
638 for (unsigned i = 0; i < 20; ++i)
639 PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];
640 }
641 }
642
AlignTwoProfsGivenPath(const PWPath & Path,const ProfPos * PA,unsigned uPrefixLengthA,WEIGHT wA,const ProfPos * PB,unsigned uPrefixLengthB,WEIGHT wB,ProfPos ** ptrPOut,unsigned * ptruLengthOut)643 void AlignTwoProfsGivenPath(const PWPath &Path,
644 const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,
645 const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,
646 ProfPos **ptrPOut, unsigned *ptruLengthOut)
647 {
648 #if TRACE
649 Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);
650 Path.LogMe();
651 #endif
652 assert(BTEq(wA + wB, 1.0));
653
654 unsigned uColIndexA = 0;
655 unsigned uColIndexB = 0;
656 unsigned uColIndexOut = 0;
657 const unsigned uEdgeCount = Path.GetEdgeCount();
658 ProfPos *POut = new ProfPos[uEdgeCount];
659 char cPrevType = 'M';
660 for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
661 {
662 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
663 const char cType = Edge.cType;
664
665 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
666 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
667
668 #if TRACE
669 Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",
670 uEdgeIndex,
671 Edge.cType,
672 Edge.uPrefixLengthA,
673 Edge.uPrefixLengthB,
674 uColIndexA,
675 uColIndexB);
676 #endif
677
678 POut[uColIndexOut].m_bAllGaps = false;
679 switch (cType)
680 {
681 case 'M':
682 {
683 assert(uPrefixLengthA > 0);
684 assert(uPrefixLengthB > 0);
685 SetFreqs(
686 PA, uPrefixLengthA, wA,
687 PB, uPrefixLengthB, wB,
688 POut, uColIndexOut);
689 switch (cPrevType)
690 {
691 case 'M':
692 SetGapsMM(
693 PA, uPrefixLengthA, wA,
694 PB, uPrefixLengthB, wB,
695 POut, uColIndexOut);
696 break;
697 case 'D':
698 SetGapsDM(
699 PA, uPrefixLengthA, wA,
700 PB, uPrefixLengthB, wB,
701 POut, uColIndexOut);
702 break;
703 case 'I':
704 SetGapsIM(
705 PA, uPrefixLengthA, wA,
706 PB, uPrefixLengthB, wB,
707 POut, uColIndexOut);
708 break;
709 default:
710 Quit("Bad cPrevType");
711 }
712 ++uColIndexA;
713 ++uColIndexB;
714 ++uColIndexOut;
715 break;
716 }
717 case 'D':
718 {
719 assert(uPrefixLengthA > 0);
720 SetFreqs(
721 PA, uPrefixLengthA, wA,
722 PB, uPrefixLengthB, 0,
723 POut, uColIndexOut);
724 switch (cPrevType)
725 {
726 case 'M':
727 SetGapsMD(
728 PA, uPrefixLengthA, wA,
729 PB, uPrefixLengthB, wB,
730 POut, uColIndexOut);
731 break;
732 case 'D':
733 SetGapsDD(
734 PA, uPrefixLengthA, wA,
735 PB, uPrefixLengthB, wB,
736 POut, uColIndexOut);
737 break;
738 case 'I':
739 SetGapsID(
740 PA, uPrefixLengthA, wA,
741 PB, uPrefixLengthB, wB,
742 POut, uColIndexOut);
743 break;
744 default:
745 Quit("Bad cPrevType");
746 }
747 ++uColIndexA;
748 ++uColIndexOut;
749 break;
750 }
751 case 'I':
752 {
753 assert(uPrefixLengthB > 0);
754 SetFreqs(
755 PA, uPrefixLengthA, 0,
756 PB, uPrefixLengthB, wB,
757 POut, uColIndexOut);
758 switch (cPrevType)
759 {
760 case 'M':
761 SetGapsMI(
762 PA, uPrefixLengthA, wA,
763 PB, uPrefixLengthB, wB,
764 POut, uColIndexOut);
765 break;
766 case 'D':
767 SetGapsDI(
768 PA, uPrefixLengthA, wA,
769 PB, uPrefixLengthB, wB,
770 POut, uColIndexOut);
771 break;
772 case 'I':
773 SetGapsII(
774 PA, uPrefixLengthA, wA,
775 PB, uPrefixLengthB, wB,
776 POut, uColIndexOut);
777 break;
778 default:
779 Quit("Bad cPrevType");
780 }
781 ++uColIndexB;
782 ++uColIndexOut;
783 break;
784 }
785 default:
786 assert(false);
787 }
788 cPrevType = cType;
789 }
790 assert(uColIndexOut == uEdgeCount);
791
792 ProfScoresFromFreqs(POut, uEdgeCount);
793 ValidateProf(POut, uEdgeCount);
794
795 *ptrPOut = POut;
796 *ptruLengthOut = uEdgeCount;
797
798 #if TRACE
799 Log("AlignTwoProfsGivenPath:\n");
800 ListProfile(POut, uEdgeCount, 0);
801 #endif
802 }
803