1 #include <U2Core/disable-warnings.h>
2 U2_DISABLE_WARNINGS
3
4 #include "muscle.h"
5 #include "profile.h"
6 #include "diaglist.h"
7 #include "muscle_context.h"
8
9 #define TRACE 0
10
11 #define pow4(i) (1 << (2*i)) // 4^i = 2^(2*i)
12
13 const unsigned K = MuscleContext::finddiagsn_struct::K;
14 const unsigned KTUPS = MuscleContext::finddiagsn_struct::KTUPS;
15
16
17 /*static char *TupleToStr(int t)
18 {
19 char *g_LetterToChar = getMuscleContext()->alpha.g_LetterToChar;
20 static char s[K];
21
22 for (unsigned i = 0; i < K; ++i)
23 {
24 unsigned Letter = (t/(pow4(i)))%4;
25 // assert(Letter >= 0 && Letter < 4);
26 s[K-i-1] = LetterToChar(Letter);
27 }
28
29 return s;
30 }*/
31
GetTuple(const ProfPos * PP,unsigned uPos)32 static unsigned GetTuple(const ProfPos *PP, unsigned uPos)
33 {
34 unsigned t = 0;
35
36 for (unsigned i = 0; i < K; ++i)
37 {
38 const unsigned uLetter = PP[uPos+i].m_uResidueGroup;
39 if (RESIDUE_GROUP_MULTIPLE == uLetter)
40 return EMPTY;
41 t = t*4 + uLetter;
42 }
43
44 return t;
45 }
46
FindDiagsNuc(const ProfPos * PX,unsigned uLengthX,const ProfPos * PY,unsigned uLengthY,DiagList & DL)47 void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,
48 unsigned uLengthY, DiagList &DL)
49 {
50 MuscleContext *ctx = getMuscleContext();
51 ALPHA &g_Alpha = ctx->alpha.g_Alpha;
52 unsigned* TuplePos = ctx->finddiagsn.TuplePos;
53 unsigned &g_uMinDiagLength = ctx->params.g_uMinDiagLength;
54
55 if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)
56 Quit("FindDiagsNuc: requires nucleo alphabet");
57
58 DL.Clear();
59
60 // 16 is arbitrary slop, no principled reason for this.
61 if (uLengthX < K + 16 || uLengthY < K + 16)
62 return;
63
64 // Set A to shorter profile, B to longer
65 const ProfPos *PA;
66 const ProfPos *PB;
67 unsigned uLengthA;
68 unsigned uLengthB;
69 bool bSwap;
70 if (uLengthX < uLengthY)
71 {
72 bSwap = false;
73 PA = PX;
74 PB = PY;
75 uLengthA = uLengthX;
76 uLengthB = uLengthY;
77 }
78 else
79 {
80 bSwap = true;
81 PA = PY;
82 PB = PX;
83 uLengthA = uLengthY;
84 uLengthB = uLengthX;
85 }
86
87 #if TRACE
88 Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);
89 #endif
90
91 // Build tuple map for the longer profile, B
92 if (uLengthB < K)
93 Quit("FindDiags: profile too short");
94
95 memset(TuplePos, EMPTY, sizeof(TuplePos));
96
97 for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)
98 {
99 const unsigned uTuple = GetTuple(PB, uPos);
100 if (EMPTY == uTuple)
101 continue;
102 TuplePos[uTuple] = uPos;
103 }
104
105 // Find matches
106 for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)
107 {
108 const unsigned uTuple = GetTuple(PA, uPosA);
109 if (EMPTY == uTuple)
110 continue;
111 const unsigned uPosB = TuplePos[uTuple];
112 if (EMPTY == uPosB)
113 continue;
114
115 // This tuple is found in both profiles
116 unsigned uStartPosA = uPosA;
117 unsigned uStartPosB = uPosB;
118
119 // Try to extend the match forwards
120 unsigned uEndPosA = uPosA + K - 1;
121 unsigned uEndPosB = uPosB + K - 1;
122 for (;;)
123 {
124 if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)
125 break;
126 const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;
127 if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)
128 break;
129 const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;
130 if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)
131 break;
132 if (uAAGroupA != uAAGroupB)
133 break;
134 ++uEndPosA;
135 ++uEndPosB;
136 }
137 uPosA = uEndPosA;
138
139 #if TRACE
140 {
141 Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);
142 for (unsigned n = uStartPosA; n <= uEndPosA; ++n)
143 Log("%c", LetterToChar(PA[n].m_uResidueGroup));
144 Log("\n");
145 Log(" B %4u-%4u ", uStartPosB, uEndPosB);
146 for (unsigned n = uStartPosB; n <= uEndPosB; ++n)
147 Log("%c", LetterToChar(PB[n].m_uResidueGroup));
148 Log("\n");
149 }
150 #endif
151
152 const unsigned uLength = uEndPosA - uStartPosA + 1;
153 assert(uEndPosB - uStartPosB + 1 == uLength);
154
155 if (uLength >= g_uMinDiagLength)
156 {
157 if (bSwap)
158 DL.Add(uStartPosB, uStartPosA, uLength);
159 else
160 DL.Add(uStartPosA, uStartPosB, uLength);
161 }
162 }
163 }
164