1 #include "muscle.h"
2 #include "msa.h"
3 #include "profile.h"
4 #include "objscore.h"
5
6 #if DOUBLE_AFFINE
7
8 #define TRACE 0
9 #define TEST_SPFAST 0
10
GapPenalty(unsigned uLength,bool Term,SCORE g,SCORE e)11 static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e)
12 {
13 //if (Term)
14 // {
15 // switch (g_TermGap)
16 // {
17 // case TERMGAP_Full:
18 // return g + (uLength - 1)*e;
19
20 // case TERMGAP_Half:
21 // return g/2 + (uLength - 1)*e;
22
23 // case TERMGAP_Ext:
24 // return uLength*e;
25 // }
26 // Quit("Bad termgap");
27 // }
28 //else
29 // return g + (uLength - 1)*e;
30 //return MINUS_INFINITY;
31 return g + (uLength - 1)*e;
32 }
33
GapPenalty(unsigned uLength,bool Term)34 static SCORE GapPenalty(unsigned uLength, bool Term)
35 {
36 SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend);
37 #if DOUBLE_AFFINE
38 SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2);
39 if (s1 > s2)
40 return s1;
41 return s2;
42 #else
43 return s1;
44 #endif
45 }
46
47 static const MSA *g_ptrMSA1;
48 static const MSA *g_ptrMSA2;
49 static unsigned g_uSeqIndex1;
50 static unsigned g_uSeqIndex2;
51
LogGap(unsigned uStart,unsigned uEnd,unsigned uGapLength,bool bNTerm,bool bCTerm)52 static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength,
53 bool bNTerm, bool bCTerm)
54 {
55 Log("%16.16s ", "");
56 for (unsigned i = 0; i < uStart; ++i)
57 Log(" ");
58 unsigned uMyLength = 0;
59 for (unsigned i = uStart; i <= uEnd; ++i)
60 {
61 bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i);
62 bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i);
63 if (!bGap1 && !bGap2)
64 Quit("Error -- neither gapping");
65 if (bGap1 && bGap2)
66 Log(".");
67 else
68 {
69 ++uMyLength;
70 Log("-");
71 }
72 }
73 SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm);
74 Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s);
75 Log("\n");
76 if (uMyLength != uGapLength)
77 Quit("Lengths differ");
78
79 }
80
ScoreSeqPair(const MSA & msa1,unsigned uSeqIndex1,const MSA & msa2,unsigned uSeqIndex2,SCORE * ptrLetters,SCORE * ptrGaps)81 static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
82 const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
83 {
84 g_ptrMSA1 = &msa1;
85 g_ptrMSA2 = &msa2;
86 g_uSeqIndex1 = uSeqIndex1;
87 g_uSeqIndex2 = uSeqIndex2;
88
89 const unsigned uColCount = msa1.GetColCount();
90 const unsigned uColCount2 = msa2.GetColCount();
91 if (uColCount != uColCount2)
92 Quit("ScoreSeqPair, different lengths");
93
94 #if TRACE
95 Log("ScoreSeqPair\n");
96 Log("%16.16s ", msa1.GetSeqName(uSeqIndex1));
97 for (unsigned i = 0; i < uColCount; ++i)
98 Log("%c", msa1.GetChar(uSeqIndex1, i));
99 Log("\n");
100 Log("%16.16s ", msa2.GetSeqName(uSeqIndex2));
101 for (unsigned i = 0; i < uColCount; ++i)
102 Log("%c", msa1.GetChar(uSeqIndex2, i));
103 Log("\n");
104 #endif
105
106 SCORE scoreTotal = 0;
107
108 // Substitution scores
109 unsigned uFirstLetter1 = uInsane;
110 unsigned uFirstLetter2 = uInsane;
111 unsigned uLastLetter1 = uInsane;
112 unsigned uLastLetter2 = uInsane;
113 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
114 {
115 bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
116 bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
117 bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
118 bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);
119
120 if (!bGap1)
121 {
122 if (uInsane == uFirstLetter1)
123 uFirstLetter1 = uColIndex;
124 uLastLetter1 = uColIndex;
125 }
126 if (!bGap2)
127 {
128 if (uInsane == uFirstLetter2)
129 uFirstLetter2 = uColIndex;
130 uLastLetter2 = uColIndex;
131 }
132
133 if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
134 continue;
135
136 unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
137 unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);
138
139 SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
140 scoreTotal += scoreMatch;
141 #if TRACE
142 Log("%c <-> %c = %7.1f %10.1f\n",
143 msa1.GetChar(uSeqIndex1, uColIndex),
144 msa2.GetChar(uSeqIndex2, uColIndex),
145 scoreMatch,
146 scoreTotal);
147 #endif
148 }
149
150 *ptrLetters = scoreTotal;
151
152 // Gap penalties
153 unsigned uGapLength = uInsane;
154 unsigned uGapStartCol = uInsane;
155 bool bGapping1 = false;
156 bool bGapping2 = false;
157
158 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
159 {
160 bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
161 bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
162
163 if (bGap1 && bGap2)
164 continue;
165
166 if (bGapping1)
167 {
168 if (bGap1)
169 ++uGapLength;
170 else
171 {
172 bGapping1 = false;
173 bool bNTerm = (uFirstLetter2 == uGapStartCol);
174 bool bCTerm = (uLastLetter2 + 1 == uColIndex);
175 SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
176 scoreTotal += scoreGap;
177 #if TRACE
178 LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
179 Log("GAP %7.1f %10.1f\n",
180 scoreGap,
181 scoreTotal);
182 #endif
183 }
184 continue;
185 }
186 else
187 {
188 if (bGap1)
189 {
190 uGapStartCol = uColIndex;
191 bGapping1 = true;
192 uGapLength = 1;
193 continue;
194 }
195 }
196
197 if (bGapping2)
198 {
199 if (bGap2)
200 ++uGapLength;
201 else
202 {
203 bGapping2 = false;
204 bool bNTerm = (uFirstLetter1 == uGapStartCol);
205 bool bCTerm = (uLastLetter1 + 1 == uColIndex);
206 SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
207 scoreTotal += scoreGap;
208 #if TRACE
209 LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
210 Log("GAP %7.1f %10.1f\n",
211 scoreGap,
212 scoreTotal);
213 #endif
214 }
215 }
216 else
217 {
218 if (bGap2)
219 {
220 uGapStartCol = uColIndex;
221 bGapping2 = true;
222 uGapLength = 1;
223 }
224 }
225 }
226
227 if (bGapping1 || bGapping2)
228 {
229 SCORE scoreGap = GapPenalty(uGapLength, true);
230 scoreTotal += scoreGap;
231 #if TRACE
232 LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
233 Log("GAP %7.1f %10.1f\n",
234 scoreGap,
235 scoreTotal);
236 #endif
237 }
238 *ptrGaps = scoreTotal - *ptrLetters;
239 return scoreTotal;
240 }
241
242 // The usual sum-of-pairs objective score: sum the score
243 // of the alignment of each pair of sequences.
ObjScoreDA(const MSA & msa,SCORE * ptrLetters,SCORE * ptrGaps)244 SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
245 {
246 const unsigned uSeqCount = msa.GetSeqCount();
247 SCORE scoreTotal = 0;
248 unsigned uPairCount = 0;
249 #if TRACE
250 msa.LogMe();
251 Log(" Score Weight Weight Total\n");
252 Log("---------- ------ ------ ----------\n");
253 #endif
254 SCORE TotalLetters = 0;
255 SCORE TotalGaps = 0;
256 for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
257 {
258 const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
259 for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
260 {
261 const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
262 const WEIGHT w = w1*w2;
263 SCORE Letters;
264 SCORE Gaps;
265 SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
266 &Letters, &Gaps);
267 scoreTotal += w1*w2*scorePair;
268 TotalLetters += w1*w2*Letters;
269 TotalGaps += w1*w2*Gaps;
270 ++uPairCount;
271 #if TRACE
272 Log("%10.2f %6.3f %6.3f %10.2f %d=%s %d=%s\n",
273 scorePair,
274 w1,
275 w2,
276 scorePair*w1*w2,
277 uSeqIndex1,
278 msa.GetSeqName(uSeqIndex1),
279 uSeqIndex2,
280 msa.GetSeqName(uSeqIndex2));
281 #endif
282 }
283 }
284 *ptrLetters = TotalLetters;
285 *ptrGaps = TotalGaps;
286 return scoreTotal;
287 }
288
289 #endif // DOUBLE_AFFINE
290