1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3 
4 #include "muscle.h"
5 #include "profile.h"
6 #include "diaglist.h"
7 #include "muscle_context.h"
8 
9 #define TRACE	0
10 
11 #define pow4(i)	(1 << (2*i))	// 4^i = 2^(2*i)
12 
13 const unsigned K = MuscleContext::finddiagsn_struct::K;
14 const unsigned KTUPS = MuscleContext::finddiagsn_struct::KTUPS;
15 
16 
17 /*static char *TupleToStr(int t)
18 	{
19     char *g_LetterToChar = getMuscleContext()->alpha.g_LetterToChar;
20 	static char s[K];
21 
22     for (unsigned i = 0; i < K; ++i)
23 		{
24 		unsigned Letter = (t/(pow4(i)))%4;
25 //		assert(Letter >= 0 && Letter < 4);
26 		s[K-i-1] = LetterToChar(Letter);
27 		}
28 
29 	return s;
30         }*/
31 
GetTuple(const ProfPos * PP,unsigned uPos)32 static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
33 	{
34 	unsigned t = 0;
35 
36 	for (unsigned i = 0; i < K; ++i)
37 		{
38 		const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
39 		if (RESIDUE_GROUP_MULTIPLE == uLetter)
40 			return EMPTY;
41 		t = t*4 + uLetter;
42 		}
43 
44 	return t;
45 	}
46 
FindDiagsNuc(const ProfPos * PX,unsigned uLengthX,const ProfPos * PY,unsigned uLengthY,DiagList & DL)47 void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
48   unsigned uLengthY, DiagList &DL)
49 	{
50     MuscleContext *ctx = getMuscleContext();
51     ALPHA &g_Alpha = ctx->alpha.g_Alpha;
52     unsigned* TuplePos = ctx->finddiagsn.TuplePos;
53     unsigned &g_uMinDiagLength = ctx->params.g_uMinDiagLength;
54 
55 	if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
56 		Quit("FindDiagsNuc: requires nucleo alphabet");
57 
58 	DL.Clear();
59 
60 // 16 is arbitrary slop, no principled reason for this.
61 	if (uLengthX < K + 16 || uLengthY < K + 16)
62 		return;
63 
64 // Set A to shorter profile, B to longer
65 	const ProfPos *PA;
66 	const ProfPos *PB;
67 	unsigned uLengthA;
68 	unsigned uLengthB;
69 	bool bSwap;
70 	if (uLengthX < uLengthY)
71 		{
72 		bSwap = false;
73 		PA = PX;
74 		PB = PY;
75 		uLengthA = uLengthX;
76 		uLengthB = uLengthY;
77 		}
78 	else
79 		{
80 		bSwap = true;
81 		PA = PY;
82 		PB = PX;
83 		uLengthA = uLengthY;
84 		uLengthB = uLengthX;
85 		}
86 
87 #if	TRACE
88 	Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
89 #endif
90 
91 // Build tuple map for the longer profile, B
92 	if (uLengthB < K)
93 		Quit("FindDiags: profile too short");
94 
95 	memset(TuplePos, EMPTY, sizeof(TuplePos));
96 
97 	for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
98 		{
99 		const unsigned uTuple = GetTuple(PB, uPos);
100 		if (EMPTY == uTuple)
101 			continue;
102 		TuplePos[uTuple] = uPos;
103 		}
104 
105 // Find matches
106 	for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
107 		{
108 		const unsigned uTuple = GetTuple(PA, uPosA);
109 		if (EMPTY == uTuple)
110 			continue;
111 		const unsigned uPosB = TuplePos[uTuple];
112 		if (EMPTY == uPosB)
113 			continue;
114 
115 	// This tuple is found in both profiles
116 		unsigned uStartPosA = uPosA;
117 		unsigned uStartPosB = uPosB;
118 
119 	// Try to extend the match forwards
120 		unsigned uEndPosA = uPosA + K - 1;
121 		unsigned uEndPosB = uPosB + K - 1;
122 		for (;;)
123 			{
124 			if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
125 				break;
126 			const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
127 			if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
128 				break;
129 			const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
130 			if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
131 				break;
132 			if (uAAGroupA != uAAGroupB)
133 				break;
134 			++uEndPosA;
135 			++uEndPosB;
136 			}
137 		uPosA = uEndPosA;
138 
139 #if	TRACE
140 		{
141 		Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);
142 		for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
143 			Log("%c", LetterToChar(PA[n].m_uResidueGroup));
144 		Log("\n");
145 		Log("       B %4u-%4u   ", uStartPosB, uEndPosB);
146 		for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
147 			Log("%c", LetterToChar(PB[n].m_uResidueGroup));
148 		Log("\n");
149 		}
150 #endif
151 
152 		const unsigned uLength = uEndPosA - uStartPosA + 1;
153 		assert(uEndPosB - uStartPosB + 1 == uLength);
154 
155 		if (uLength >= g_uMinDiagLength)
156 			{
157 			if (bSwap)
158 				DL.Add(uStartPosB, uStartPosA, uLength);
159 			else
160 				DL.Add(uStartPosA, uStartPosB, uLength);
161 			}
162 		}
163 	}
164