1 #include "muscle.h"
2 #include "msa.h"
3 #include "profile.h"
4 #include "objscore.h"
5 
6 #if	DOUBLE_AFFINE
7 
8 #define TRACE			0
9 #define TEST_SPFAST		0
10 
GapPenalty(unsigned uLength,bool Term,SCORE g,SCORE e)11 static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e)
12 	{
13 	//if (Term)
14 	//	{
15 	//	switch (g_TermGap)
16 	//		{
17 	//	case TERMGAP_Full:
18 	//		return g + (uLength - 1)*e;
19 
20 	//	case TERMGAP_Half:
21 	//		return g/2 + (uLength - 1)*e;
22 
23 	//	case TERMGAP_Ext:
24 	//		return uLength*e;
25 	//		}
26 	//	Quit("Bad termgap");
27 	//	}
28 	//else
29 	//	return g + (uLength - 1)*e;
30 	//return MINUS_INFINITY;
31 	return g + (uLength - 1)*e;
32 	}
33 
GapPenalty(unsigned uLength,bool Term)34 static SCORE GapPenalty(unsigned uLength, bool Term)
35 	{
36 	SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend);
37 #if	DOUBLE_AFFINE
38 	SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2);
39 	if (s1 > s2)
40 		return s1;
41 	return s2;
42 #else
43 	return s1;
44 #endif
45 	}
46 
47 static const MSA *g_ptrMSA1;
48 static const MSA *g_ptrMSA2;
49 static unsigned g_uSeqIndex1;
50 static unsigned g_uSeqIndex2;
51 
LogGap(unsigned uStart,unsigned uEnd,unsigned uGapLength,bool bNTerm,bool bCTerm)52 static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength,
53   bool bNTerm, bool bCTerm)
54 	{
55 	Log("%16.16s  ", "");
56 	for (unsigned i = 0; i < uStart; ++i)
57 		Log(" ");
58 	unsigned uMyLength = 0;
59 	for (unsigned i = uStart; i <= uEnd; ++i)
60 		{
61 		bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i);
62 		bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i);
63 		if (!bGap1 && !bGap2)
64 			Quit("Error -- neither gapping");
65 		if (bGap1 && bGap2)
66 			Log(".");
67 		else
68 			{
69 			++uMyLength;
70 			Log("-");
71 			}
72 		}
73 	SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm);
74 	Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s);
75 	Log("\n");
76 	if (uMyLength != uGapLength)
77 		Quit("Lengths differ");
78 
79 	}
80 
ScoreSeqPair(const MSA & msa1,unsigned uSeqIndex1,const MSA & msa2,unsigned uSeqIndex2,SCORE * ptrLetters,SCORE * ptrGaps)81 static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
82   const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
83 	{
84 	g_ptrMSA1 = &msa1;
85 	g_ptrMSA2 = &msa2;
86 	g_uSeqIndex1 = uSeqIndex1;
87 	g_uSeqIndex2 = uSeqIndex2;
88 
89 	const unsigned uColCount = msa1.GetColCount();
90 	const unsigned uColCount2 = msa2.GetColCount();
91 	if (uColCount != uColCount2)
92 		Quit("ScoreSeqPair, different lengths");
93 
94 #if	TRACE
95 	Log("ScoreSeqPair\n");
96 	Log("%16.16s  ", msa1.GetSeqName(uSeqIndex1));
97 	for (unsigned i = 0; i < uColCount; ++i)
98 		Log("%c", msa1.GetChar(uSeqIndex1, i));
99 	Log("\n");
100 	Log("%16.16s  ", msa2.GetSeqName(uSeqIndex2));
101 	for (unsigned i = 0; i < uColCount; ++i)
102 		Log("%c", msa1.GetChar(uSeqIndex2, i));
103 	Log("\n");
104 #endif
105 
106 	SCORE scoreTotal = 0;
107 
108 // Substitution scores
109 	unsigned uFirstLetter1 = uInsane;
110 	unsigned uFirstLetter2 = uInsane;
111 	unsigned uLastLetter1 = uInsane;
112 	unsigned uLastLetter2 = uInsane;
113 	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
114 		{
115 		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
116 		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
117 		bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
118 		bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);
119 
120 		if (!bGap1)
121 			{
122 			if (uInsane == uFirstLetter1)
123 				uFirstLetter1 = uColIndex;
124 			uLastLetter1 = uColIndex;
125 			}
126 		if (!bGap2)
127 			{
128 			if (uInsane == uFirstLetter2)
129 				uFirstLetter2 = uColIndex;
130 			uLastLetter2 = uColIndex;
131 			}
132 
133 		if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
134 			continue;
135 
136 		unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
137 		unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);
138 
139 		SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
140 		scoreTotal += scoreMatch;
141 #if	TRACE
142 		Log("%c <-> %c = %7.1f  %10.1f\n",
143 		  msa1.GetChar(uSeqIndex1, uColIndex),
144 		  msa2.GetChar(uSeqIndex2, uColIndex),
145 		  scoreMatch,
146 		  scoreTotal);
147 #endif
148 		}
149 
150 	*ptrLetters = scoreTotal;
151 
152 // Gap penalties
153 	unsigned uGapLength = uInsane;
154 	unsigned uGapStartCol = uInsane;
155 	bool bGapping1 = false;
156 	bool bGapping2 = false;
157 
158 	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
159 		{
160 		bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
161 		bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
162 
163 		if (bGap1 && bGap2)
164 			continue;
165 
166 		if (bGapping1)
167 			{
168 			if (bGap1)
169 				++uGapLength;
170 			else
171 				{
172 				bGapping1 = false;
173 				bool bNTerm = (uFirstLetter2 == uGapStartCol);
174 				bool bCTerm = (uLastLetter2 + 1 == uColIndex);
175 				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
176 				scoreTotal += scoreGap;
177 #if	TRACE
178 				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
179 				Log("GAP         %7.1f  %10.1f\n",
180 				  scoreGap,
181 				  scoreTotal);
182 #endif
183 				}
184 			continue;
185 			}
186 		else
187 			{
188 			if (bGap1)
189 				{
190 				uGapStartCol = uColIndex;
191 				bGapping1 = true;
192 				uGapLength = 1;
193 				continue;
194 				}
195 			}
196 
197 		if (bGapping2)
198 			{
199 			if (bGap2)
200 				++uGapLength;
201 			else
202 				{
203 				bGapping2 = false;
204 				bool bNTerm = (uFirstLetter1 == uGapStartCol);
205 				bool bCTerm = (uLastLetter1 + 1 == uColIndex);
206 				SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
207 				scoreTotal += scoreGap;
208 #if	TRACE
209 				LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
210 				Log("GAP         %7.1f  %10.1f\n",
211 				  scoreGap,
212 				  scoreTotal);
213 #endif
214 				}
215 			}
216 		else
217 			{
218 			if (bGap2)
219 				{
220 				uGapStartCol = uColIndex;
221 				bGapping2 = true;
222 				uGapLength = 1;
223 				}
224 			}
225 		}
226 
227 	if (bGapping1 || bGapping2)
228 		{
229 		SCORE scoreGap = GapPenalty(uGapLength, true);
230 		scoreTotal += scoreGap;
231 #if	TRACE
232 		LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
233 		Log("GAP         %7.1f  %10.1f\n",
234 		  scoreGap,
235 		  scoreTotal);
236 #endif
237 		}
238 	*ptrGaps = scoreTotal - *ptrLetters;
239 	return scoreTotal;
240 	}
241 
242 // The usual sum-of-pairs objective score: sum the score
243 // of the alignment of each pair of sequences.
ObjScoreDA(const MSA & msa,SCORE * ptrLetters,SCORE * ptrGaps)244 SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
245 	{
246 	const unsigned uSeqCount = msa.GetSeqCount();
247 	SCORE scoreTotal = 0;
248 	unsigned uPairCount = 0;
249 #if	TRACE
250 	msa.LogMe();
251 	Log("     Score  Weight  Weight       Total\n");
252 	Log("----------  ------  ------  ----------\n");
253 #endif
254 	SCORE TotalLetters = 0;
255 	SCORE TotalGaps = 0;
256 	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
257 		{
258 		const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
259 		for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
260 			{
261 			const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
262 			const WEIGHT w = w1*w2;
263 			SCORE Letters;
264 			SCORE Gaps;
265 			SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
266 			  &Letters, &Gaps);
267 			scoreTotal += w1*w2*scorePair;
268 			TotalLetters += w1*w2*Letters;
269 			TotalGaps += w1*w2*Gaps;
270 			++uPairCount;
271 #if	TRACE
272 			Log("%10.2f  %6.3f  %6.3f  %10.2f  %d=%s %d=%s\n",
273 			  scorePair,
274 			  w1,
275 			  w2,
276 			  scorePair*w1*w2,
277 			  uSeqIndex1,
278 			  msa.GetSeqName(uSeqIndex1),
279 			  uSeqIndex2,
280 			  msa.GetSeqName(uSeqIndex2));
281 #endif
282 			}
283 		}
284 	*ptrLetters = TotalLetters;
285 	*ptrGaps = TotalGaps;
286 	return scoreTotal;
287 	}
288 
289 #endif	// DOUBLE_AFFINE
290