1 #include "uhmmsearch_opt.h"
2
3 #include <U2Core/SequenceWalkerTask.h>
4 #include <U2Core/U2Region.h>
5 #include <U2Core/Timer.h>
6 #include <hmmer2/funcs.h>
7
8 #include <assert.h>
9 #include <emmintrin.h>
10
11 using namespace U2;
12
13 namespace {
14
15 #define CHUNK_SIZE (10 * 1024)
16 const int ALIGNMENT_MEM = 128;
17
18 #define ALIGNED(ptr, base) ( (unsigned char*) ( (((quintptr)(ptr))+((base)-1)) &~((base-1)) ) )
19
20 struct hmm_opt{
21 static const int ALIGNMENT_MEM = 128; // alignment of mem
22 static const int ALIGNMENT_SCORES = 16; //alignment of tsc, msc, etc.
23 int M; // Length of this HMM in states
24 int * mem; // honestly allocated memory. All pointers below are mem + some offset.
25
26 int * tsc[7]; // transition scores [0..6][1..M-1]
27 int * msc[MAXCODE]; // M state emission probs. [0..MAXCODE-1][1..M]
28 int * isc[MAXCODE]; // I state emission probs. [0..MAXCODE-1][1..M-1]
29 int * xsc[4]; // N,E,C,J transition scores [0..3][0..1]
30 int * bsc; // B state transition probs. [1..M]
31 int * esc; // E state transition probs. [1..M]
32
hmm_opt__anon41affd3f0111::hmm_opt33 hmm_opt( plan7_s * hmm ) {
34 assert( NULL != hmm );
35 M = hmm->M;
36
37 qint64 tsc_sz = 7 * ( M + 16 );
38 qint64 msc_sz = MAXCODE * ( M + 1 + 16 );
39 qint64 isc_sz = MAXCODE * ( M + 16 );
40 qint64 xsc_sz = 50;
41 qint64 bsc_sz = M + 1 + 12;
42 qint64 esc_sz = M + 1 + 12;
43
44 qint64 bsc_real_sz = M + 1;
45 qint64 esc_real_sz = M + 1;
46
47 qint64 total_sz = ( tsc_sz + msc_sz + isc_sz + xsc_sz + bsc_sz + esc_sz ) * sizeof( int ) +
48 ALIGNMENT_MEM + 0x10 * ( 15 + 2 * MAXCODE ) * sizeof(int);
49
50 mem = (int*)malloc( total_sz );
51 memset( mem, 0, total_sz );
52 int* mem_aligned = (int*)ALIGNED( mem, ALIGNMENT_MEM );
53
54 bsc = mem_aligned;
55 bsc += 3;
56 memcpy( bsc, hmm->bsc_mem, bsc_real_sz * sizeof(int) );
57
58 esc = (int*)ALIGNED( bsc + 4 + bsc_sz, ALIGNMENT_SCORES ) + 0x10;
59 esc += 3;
60 memcpy( esc, hmm->esc_mem, esc_real_sz * sizeof(int) );
61 for (int i=1;i<8;i++) {
62 esc[M+i] = -INFTY;
63 }
64
65 int* tsc_tmp = (int*)ALIGNED( esc + esc_sz, ALIGNMENT_SCORES ) + 0x10;
66 for(int i = 0; i < 7; ++i ) {
67 tsc[i] = tsc_tmp;
68 tsc_tmp = (int*)ALIGNED( tsc_tmp + M, ALIGNMENT_SCORES ) + 0x10;
69 }
70 tsc[1] += 3;
71 tsc[4] += 3;
72 for(int i = 0; i < 7; ++i ) {
73 memcpy( tsc[i], hmm->tsc[i], ( M ) * sizeof(int) );
74 }
75
76 int* msc_tmp = (int*)ALIGNED( tsc[0] + tsc_sz, ALIGNMENT_SCORES );
77 for(int i = 0; i < MAXCODE; ++i ) {
78 msc[i] = msc_tmp;
79 msc[i] += 3;
80 msc_tmp = (int*)ALIGNED( msc_tmp + M + 1, ALIGNMENT_SCORES )+ 0x10;
81 memcpy( msc[i], hmm->msc[i], ( M + 1 ) * sizeof(int) );
82 }
83
84 int* isc_tmp = (int*)ALIGNED( msc[0] + msc_sz, ALIGNMENT_SCORES )+ 0x10;
85 for(int i = 0; i < MAXCODE; ++i ) {
86 isc[i] = isc_tmp;
87 isc[i] += 3;
88 isc_tmp = (int*)ALIGNED( isc_tmp + M, ALIGNMENT_SCORES )+ 0x10;
89 memcpy( isc[i], hmm->isc[i], ( M ) * sizeof(int) );
90 }
91
92 int* xsc_tmp = (int*)ALIGNED( isc[0] + isc_sz, ALIGNMENT_SCORES )+ 0x10;
93 for(int i = 0; i < 4; ++i ) {
94 xsc[i] = xsc_tmp;
95 xsc_tmp = (int*)ALIGNED( xsc_tmp + 2, ALIGNMENT_SCORES )+ 0x10;
96 for( int j = 0; j < 2; ++j ) {
97 xsc[i][j] = hmm->xsc[i][j];
98 }
99 }
100 }
101
~hmm_opt__anon41affd3f0111::hmm_opt102 ~hmm_opt() {
103 free(mem);
104 }
105
dump__anon41affd3f0111::hmm_opt106 void dump() {
107 int i = 0;
108 int j = 0;;
109 printf("--START HMM OPT DUMP (M=%d)\n", M);
110 for (i=0;i<M;i++){
111 printf("HMM->ESC[%d]=%d\n",i, esc[i]);
112 }
113 for (i=0;i<M;i++){
114 printf("HMM->BSC[%d]=%d\n",i, bsc[i]);
115 }
116 for (i=0;i<MAXCODE;i++){
117 for (j=0;j<=M;j++){
118 printf("HMM->MSC[%d][%d]=%d\n",i,j, msc[i][j]);
119 }
120 }
121 for (i=0;i<4;i++){
122 for (j=0;j<2;j++){
123 printf("HMM->XSC[%d][%d]=%d\n",i,j, xsc[i][j]);
124 }
125 }
126 for (i=0;i<7;i++){
127 for (j=0;j<M;j++){
128 printf("HMM->TSC[%d][%d]=%d\n",i,j, tsc[i][j]);
129 }
130 }
131 for (i=0;i<MAXCODE;i++){
132 for (j=0;j<M;j++){
133 printf("HMM->ISC[%d][%d]=%d\n",i,j, isc[i][j]);
134 }
135 }
136 printf("--END HMM OPT DUMP (M=%d)\n", M);
137 }
138 };
139
at0(__m128i from)140 inline int at0( __m128i from ) {
141 int a = _mm_extract_epi16( from, 0 ) & 0xFFFF;
142 int b = _mm_extract_epi16( from, 1 ) & 0xFFFF;
143 return (a | (b << 16));
144 }
145
at1(__m128i from)146 inline int at1( __m128i from ) {
147 int a = _mm_extract_epi16( from, 2 ) & 0xFFFF;
148 int b = _mm_extract_epi16( from, 3 ) & 0xFFFF;
149 return (a | (b << 16));
150 }
151
at2(__m128i from)152 inline int at2( __m128i from ) {
153 int a = _mm_extract_epi16( from, 4 ) & 0xFFFF;
154 int b = _mm_extract_epi16( from, 5 ) & 0xFFFF;
155 return (a | (b << 16));
156 }
157
at3(__m128i from)158 inline int at3( __m128i from ) {
159 int a = _mm_extract_epi16( from, 6 ) & 0xFFFF;
160 int b = _mm_extract_epi16( from, 7 ) & 0xFFFF;
161 return (a | (b << 16));
162 }
163
164
shufflePMask(__m128i a,__m128i b)165 inline __m128i shufflePMask( __m128i a, __m128i b ) {
166 return _mm_set_epi32( at2(a), at1(a), at0(a), at3(b) );
167 }
168
shuffleD0Mask(const __m128i & a,const __m128i & b)169 inline __m128i shuffleD0Mask( const __m128i& a, const __m128i& b ) {
170 return _mm_set_epi32( at3(b), at2(b), at1(a), at0(a) );
171 }
172
shuffleD1Mask(const __m128i & a,const __m128i & b)173 inline __m128i shuffleD1Mask( const __m128i& a, const __m128i& b ) {
174 return _mm_set_epi32( at0(b), at0(b), at0(b), at0(a) );
175 }
176
shuffleD2Mask(const __m128i & a,const __m128i & b)177 inline __m128i shuffleD2Mask( const __m128i& a, const __m128i& b ) {
178 return _mm_set_epi32( at1(b), at1(b), at1(a), at0(a) );
179 }
180
shuffleD3Mask(const __m128i & a,const __m128i & b)181 inline __m128i shuffleD3Mask( const __m128i& a, const __m128i& b ) {
182 return _mm_set_epi32( at2(b), at2(a), at1(a), at0(a) );
183 }
184
insert0I32(const __m128i & a,int b)185 inline __m128i insert0I32( const __m128i& a, int b ) {
186 __m128i mask_a = _mm_set_epi32( 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 );
187 __m128i mask_b = _mm_set_epi32( 0, 0, 0, b );
188 __m128i res = _mm_and_si128( a, mask_a );
189 res = _mm_or_si128( res, mask_b );
190 return res;
191 }
192
insert3I32(const __m128i & a,int b)193 inline __m128i insert3I32( const __m128i& a, int b ) {
194 __m128i mask_a = _mm_set_epi32( 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
195 __m128i mask_b = _mm_set_epi32( b, 0, 0, 0 );
196 __m128i res = _mm_and_si128( a, mask_a );
197 res = _mm_or_si128( res, mask_b );
198 return res;
199 }
200
selectBits(const __m128i & a,const __m128i & b,const __m128i & pattern)201 inline __m128i selectBits( const __m128i& a, const __m128i& b, const __m128i& pattern ) {
202 __m128i tmpB = _mm_and_si128( pattern, b );
203 __m128i tmpA = _mm_andnot_si128( pattern, a );
204 return _mm_or_si128( tmpA, tmpB );
205 }
206
maxComponent(const __m128i & a)207 inline int maxComponent( const __m128i& a ) {
208 int max = at0(a);
209 int e1 = at1(a);
210 int e2 = at2(a);
211 int e3 = at3(a);
212
213 max = max > e1 ? max : e1;
214 max = max > e2 ? max : e2;
215 max = max > e3 ? max : e3;
216 return max;
217 }
218
219 //void p ( const char * name, __m128i arg ) {
220 // printf( "%s: {%d, %d, %d, %d}\n", name,
221 // at0(arg),
222 // at1(arg),
223 // at2(arg),
224 // at3(arg) );
225 //}
226
227
viterbiSSE(hmm_opt * hmm,const unsigned char * seq,int seqLength,int * mscoreC,int * mscoreP,int * iscoreC,int * iscoreP,int * dscoreC,int * dscoreP)228 float viterbiSSE( hmm_opt* hmm, const unsigned char * seq, int seqLength, int* mscoreC, int* mscoreP, int* iscoreC, int *iscoreP, int* dscoreC, int *dscoreP ){
229 assert( NULL != hmm );
230 assert( NULL != seq && 0 < seqLength );
231 assert( NULL != mscoreC && NULL != mscoreP );
232 assert( NULL != iscoreC && NULL != iscoreP );
233 assert( NULL != dscoreC && NULL != dscoreP );
234
235 __m128i itV1, itV2, itV3, itV4, itV5, itV6, itV7, itV8;
236 __m128i ipV, ipV1, dpV, dpV1, mpV, mpV1;
237 __m128i dt1, dt2, dt3, dt4, dt, mt1, mt2, mt3, mt4;
238 __m128i xscXTN = _mm_set1_epi32( hmm->xsc[XTN][MOVE] );
239 __m128i xscPXMB = _mm_set1_epi32( hmm->xsc[XTN][MOVE] );
240 __m128i hmmxscXTNLOOP = _mm_set1_epi32( hmm->xsc[XTN][LOOP] );
241 __m128i hmmxscXTCMOVE = _mm_set1_epi32( hmm->xsc[XTC][MOVE] );
242 __m128i hmmxscXTCLOOP = _mm_set1_epi32( hmm->xsc[XTC][LOOP] );
243 __m128i hmmxscXTJMOVE = _mm_set1_epi32( hmm->xsc[XTJ][MOVE] );
244 __m128i hmmxscXTJLOOP = _mm_set1_epi32( hmm->xsc[XTJ][LOOP] );
245 __m128i hmmxscXTEMOVE = _mm_set1_epi32( hmm->xsc[XTE][MOVE] );
246 __m128i hmmxscXTELOOP = _mm_set1_epi32( hmm->xsc[XTE][LOOP] );
247 __m128i zeroVEC = _mm_set1_epi32( 0 );
248 __m128i INFTY_VEC = _mm_set1_epi32( -INFTY );
249 __m128i xscCXME = _mm_set1_epi32( -INFTY );
250 __m128i xscCXMC = _mm_set1_epi32( -INFTY );
251 __m128i xscCXMJ = _mm_set1_epi32( -INFTY );
252
253 __m128i *hmmtscTII = (__m128i*)( hmm->tsc[TII] + 1 );
254 __m128i *hmmtscTMM = (__m128i*) hmm->tsc[TMM];
255 __m128i *hmmtscTIM = (__m128i*) hmm->tsc[TIM];
256 __m128i *hmmtscTDM = (__m128i*) hmm->tsc[TDM];
257 __m128i *hmmtscTMI = (__m128i*)( hmm->tsc[TMI] + 1 );
258 __m128i *hmmbsc = (__m128i*)( hmm->bsc + 1 );
259 __m128i *hmmesc = (__m128i*)( hmm->esc + 1 );
260
261 __m128i *mscCV = (__m128i*) ( mscoreC + 1 );
262 __m128i *iscCV = (__m128i*) ( iscoreC + 1 );
263 __m128i *dscCV = (__m128i*) ( dscoreC + 1 );
264 __m128i *mscPV = (__m128i*) ( mscoreP + 1 );
265 __m128i *iscPV = (__m128i*) ( iscoreP + 1 );
266 __m128i *dscPV = (__m128i*) ( dscoreP + 1 );
267 __m128i *Mdt = (__m128i*) hmm->tsc[TMD] - 1;
268
269 __m128i *mscDSQ, *iscDSQ, *tmpMV, *tmpIV, *tmpDV;
270 __m128i cmp1, cmp2, cmp3;
271
272 float result = 0;
273 int i = 0, j = 0, k = 0, sc = 0, M4th = 0;
274 int *tmpM = NULL, *tmpD = NULL, *hmmtscTDDip = hmm->tsc[TDD];
275
276 M4th = ( hmm->M + 3 ) >> 2;
277 for( k = 0; k <= M4th; ++k ) {
278 mscPV[k] = INFTY_VEC;
279 iscPV[k] = INFTY_VEC;
280 dscPV[k] = INFTY_VEC;
281 }
282
283 for( i = 1; i <= seqLength; ++i ) {
284 xscCXME = INFTY_VEC;
285
286 mscDSQ = (__m128i*)( hmm->msc[(int)seq[i]] + 1);
287 iscDSQ = (__m128i*)( hmm->isc[(int)seq[i]] + 1);
288 for( k = 1, j = 1; k <= M4th; j += 8, k += 2 ) {
289 ipV = shufflePMask( iscPV[k-1], iscPV[k-2] );
290 ipV1 = shufflePMask( iscPV[k], iscPV[k-1] );
291 mpV = shufflePMask( mscPV[k-1],mscPV[k-2] );
292 mpV1 = shufflePMask( mscPV[k], mscPV[k-1] );
293 dpV = shufflePMask( dscPV[k-1],dscPV[k-2] );
294 dpV1 = shufflePMask( dscPV[k], dscPV[k-1] );
295
296 itV2 = _mm_add_epi32( ipV, hmmtscTIM[k-1] );
297 itV6 = _mm_add_epi32( ipV1, hmmtscTIM[k] );
298 itV1 = _mm_add_epi32( mpV, hmmtscTMM[k-1] );
299 itV5 = _mm_add_epi32( mpV1, hmmtscTMM[k] );
300 itV4 = _mm_add_epi32( dpV, hmmtscTDM[k-1] );
301 itV8 = _mm_add_epi32( dpV1, hmmtscTDM[k] );
302 itV3 = _mm_add_epi32( xscPXMB, hmmbsc[k-1] );
303 itV7 = _mm_add_epi32( xscPXMB, hmmbsc[k] );
304
305 cmp1 = _mm_cmpgt_epi32( itV1, itV2 );
306 itV1 = selectBits( itV2, itV1, cmp1 );
307
308 cmp2 = _mm_cmpgt_epi32( itV3, itV4 );
309 dt4 = insert3I32( zeroVEC, hmmtscTDDip[j+2] );
310 itV2 = selectBits( itV4, itV3, cmp2 );
311 dt3 = _mm_set1_epi32(hmmtscTDDip[j+1]);
312 cmp1 = _mm_cmpgt_epi32( itV1, itV2 );
313 dt3 = _mm_add_epi32( dt3, dt4 );
314 itV3 = selectBits( itV2, itV1, cmp1 );
315 cmp2 = _mm_cmpgt_epi32( itV5, itV6 );
316 dt3 = shuffleD0Mask( zeroVEC, dt3 );
317 itV1 = selectBits( itV6, itV5, cmp2 );
318
319 cmp1 = _mm_cmpgt_epi32( itV7, itV8 );
320 dt2 = _mm_set1_epi32(hmmtscTDDip[j]);
321 itV2 = selectBits( itV8, itV7, cmp1 );
322 dt2 = _mm_add_epi32( dt2, dt3 );
323 cmp2 = _mm_cmpgt_epi32( itV1, itV2 );
324 mscCV[k-1] = _mm_add_epi32( itV3, mscDSQ[k-1] );
325 itV4 = selectBits( itV2, itV1, cmp2 );
326 dt2 = insert0I32(dt2, 0 );
327 mscCV[k] = _mm_add_epi32( itV4, mscDSQ[k] );
328 dt1 = _mm_set1_epi32( hmmtscTDDip[j-1] );
329 mpV = shufflePMask( mscCV[k-1], mscCV[k-2] );
330 dt1 = _mm_add_epi32( dt1, dt2 );
331 mt1 = _mm_add_epi32( mpV, Mdt[k] );
332 dt = _mm_set1_epi32( dscoreC[j-1] );
333 mt2 = shuffleD1Mask( INFTY_VEC, mt1 );
334 dt1 = _mm_add_epi32( dt, dt1 );
335 mt3 = shuffleD2Mask( INFTY_VEC, mt1 );
336 dt2 = _mm_add_epi32( mt2, dt2 );
337 mt4 = shuffleD3Mask( INFTY_VEC, mt1 );
338 dt3 = _mm_add_epi32( mt3, dt3 );
339 cmp1 = _mm_cmpgt_epi32( dt1, mt1 );
340 dt4 = _mm_add_epi32( mt4, dt4 );
341 cmp2 = _mm_cmpgt_epi32( dt2, dt3 );
342 dt1 = selectBits( mt1, dt1, cmp1 );
343 cmp3 = _mm_cmpgt_epi32( dt1, dt4 );
344 dt2 = selectBits( dt3, dt2, cmp2 );
345 dt1 = selectBits( dt4, dt1, cmp3 );
346 cmp2 = _mm_cmpgt_epi32( dt1, dt2 );
347 dt4 = insert3I32( zeroVEC, hmmtscTDDip[j+6] );
348 dscCV[k-1] = selectBits( dt2, dt1, cmp2 );
349 dt3 = _mm_set1_epi32( hmmtscTDDip[j+5] );
350 itV1 = _mm_add_epi32( mscPV[k-1], hmmtscTMI[k-1] );
351 dt3 = _mm_add_epi32( dt3, dt4 );
352 dt3 = shuffleD0Mask( zeroVEC, dt3 );
353 dt2 = _mm_set1_epi32( hmmtscTDDip[j+4] );
354 itV2 = _mm_add_epi32( iscPV[k-1], hmmtscTII[k-1] );
355 dt2 = _mm_add_epi32( dt2, dt3 );
356 dt1 = _mm_set1_epi32( hmmtscTDDip[j+3] );
357 dt2 = insert0I32( dt2, 0 );
358 cmp1 = _mm_cmpgt_epi32( itV1, itV2 );
359 dt1 = _mm_add_epi32( dt1, dt2 );
360 mpV = shufflePMask( mscCV[k], mscCV[k-1] );
361 mt1 = _mm_add_epi32( mpV, Mdt[k+1] );
362 dt = _mm_set1_epi32( dscoreC[j+3] );
363 itV3 = selectBits( itV2, itV1, cmp1 );
364 dt1 = _mm_add_epi32( dt, dt1 );
365 mt2 = shuffleD1Mask( INFTY_VEC, mt1 );
366 itV1 = _mm_add_epi32( mscPV[k], hmmtscTMI[k] );
367 dt2 = _mm_add_epi32( mt2, dt2 );
368 mt3 = shuffleD2Mask( INFTY_VEC, mt1 );
369 itV2 = _mm_add_epi32( iscPV[k], hmmtscTII[k] );
370 dt3 = _mm_add_epi32( mt3, dt3 );
371 mt4 = shuffleD3Mask( INFTY_VEC, mt1 );
372 cmp3 = _mm_cmpgt_epi32( itV1, itV2 );
373 dt4 = _mm_add_epi32( mt4, dt4 );
374 cmp1 = _mm_cmpgt_epi32( dt1, mt1 );
375 itV4 = selectBits( itV2, itV1, cmp3 );
376 cmp2 = _mm_cmpgt_epi32( dt2, dt3 );
377 dt1 = selectBits( mt1, dt1, cmp1 );
378 dt2 = selectBits( dt3, dt2, cmp2 );
379 cmp1 = _mm_cmpgt_epi32( dt1, dt4 );
380 dt1 = selectBits( dt4, dt1, cmp1 );
381 cmp2 = _mm_cmpgt_epi32( dt1, dt2 );
382 iscCV[k-1] = _mm_add_epi32( itV3, iscDSQ[k-1] );
383 dscCV[k] = selectBits( dt2, dt1, cmp2 );
384 itV1 = _mm_add_epi32( mscCV[k-1], hmmesc[k-1] );
385 itV2 = _mm_add_epi32( mscCV[k], hmmesc[k] );
386 cmp1 = _mm_cmpgt_epi32( itV1, xscCXME );
387 cmp2 = _mm_cmpgt_epi32( itV2, xscCXME );
388 xscCXME = selectBits( xscCXME, itV1, cmp1 );
389 iscCV[k] = _mm_add_epi32( itV4, iscDSQ[k] );
390 xscCXME = selectBits( xscCXME, itV2, cmp2 );
391 }
392 xscCXME = _mm_set1_epi32( maxComponent( xscCXME ) );
393 itV2 = _mm_add_epi32( xscCXMJ, hmmxscXTJLOOP );
394 itV3 = _mm_add_epi32( xscCXME, hmmxscXTELOOP );
395 itV1 = _mm_add_epi32( xscCXMJ, hmmxscXTJMOVE );
396
397 cmp1 = _mm_cmpgt_epi32( itV2, itV3 );
398 xscXTN = _mm_add_epi32( xscXTN, hmmxscXTNLOOP );
399 xscCXMJ = selectBits( itV3, itV2, cmp1 );
400 itV2 = _mm_add_epi32( xscCXMC, hmmxscXTCLOOP );
401 cmp1 = _mm_cmpgt_epi32( xscXTN, itV1 );
402 itV3 = _mm_add_epi32( xscCXME, hmmxscXTEMOVE );
403 xscPXMB = selectBits( itV1, xscXTN, cmp1 );
404 cmp2 = _mm_cmpgt_epi32( itV2, itV3 );
405 xscCXMC = selectBits( itV3, itV2, cmp2 );
406 tmpMV = mscCV;
407 tmpIV = iscCV;
408 tmpDV = dscCV;
409 mscCV = mscPV;
410 iscCV = iscPV;
411 dscCV = dscPV;
412 mscPV = tmpMV;
413 iscPV = tmpIV;
414 dscPV = tmpDV;
415 tmpM = mscoreC;
416 tmpD = dscoreC;
417 mscoreC = mscoreP;
418 dscoreC = dscoreP;
419 mscoreP = tmpM;
420 dscoreP = tmpD;
421 }
422 itV1 = _mm_add_epi32( xscCXMC, hmmxscXTCMOVE );
423 sc = at0(itV1);
424 result = (float) sc / INTSCALE;
425 return result;
426 }
427
428 } //anonymous namespace
429
430 //assuming dsq is aligned
sseScoring(unsigned char * dsq,int seqlen,plan7_s * hmm,HMMSeqGranulation * gr,TaskStateInfo & ti)431 QList<float> sseScoring( unsigned char * dsq, int seqlen, plan7_s* hmm, HMMSeqGranulation * gr, TaskStateInfo& ti ) {
432
433 assert( dsq );
434 assert( gr );
435 assert( seqlen > 0);
436
437 const int M4th = ( hmm->M + 3 ) >> 2;
438 const int memSz = 4 + (M4th+1) * 16 + 12 + ALIGNMENT_MEM;
439 char* mscCurMem = (char*)malloc( memSz );
440 char* iscCurMem = (char*)malloc( memSz );
441 char* dscCurMem = (char*)malloc( memSz );
442 char* mscPrevMem = (char*)malloc( memSz );
443 char* iscPrevMem = (char*)malloc( memSz );
444 char* dscPrevMem = (char*)malloc( memSz );
445
446 memset( mscCurMem, 0, memSz );
447 memset( iscCurMem, 0, memSz );
448 memset( dscCurMem, 0, memSz );
449 memset( mscPrevMem, 0, memSz );
450 memset( iscPrevMem, 0, memSz );
451 memset( dscPrevMem, 0, memSz );
452
453 int* mscCur = (int*)( ALIGNED( mscCurMem, ALIGNMENT_MEM ) + 0xC );
454 int* iscCur = (int*)( ALIGNED( iscCurMem, ALIGNMENT_MEM ) + 0xC );
455 int* dscCur = (int*)( ALIGNED( dscCurMem, ALIGNMENT_MEM ) + 0xC );
456 int* mscPrev = (int*)( ALIGNED( mscPrevMem, ALIGNMENT_MEM ) + 0xC );
457 int* iscPrev = (int*)( ALIGNED( iscPrevMem, ALIGNMENT_MEM ) + 0xC );
458 int* dscPrev = (int*)( ALIGNED( dscPrevMem, ALIGNMENT_MEM ) + 0xC );
459
460 QList<float> results;
461 U2Region range( 0, seqlen );
462 gr->overlap = 2 * hmm->M;
463 gr->exOverlap = 0;
464 gr->chunksize = qBound( gr->overlap+1, CHUNK_SIZE, seqlen );
465 gr->regions = SequenceWalkerTask::splitRange( range, gr->chunksize, gr->overlap, gr->exOverlap, false );
466 const QVector<U2Region> & regions = gr->regions;
467
468 int regionsSz = regions.size();
469 int regionsPassed = 0;
470
471 hmm_opt hmmOpt( hmm );
472 foreach( U2Region chunk, regions ) {
473 unsigned char * curSeqAddr = dsq + chunk.startPos;
474 int curSeqLen = chunk.length;
475 float sc = viterbiSSE( &hmmOpt, curSeqAddr, curSeqLen, mscCur, mscPrev, iscCur, iscPrev, dscCur, dscPrev );
476 results.push_back( sc );
477
478 ti.progress = (int)( ( 100.0 * ( regionsPassed++ ) ) / regionsSz );
479 if( ti.cancelFlag ) {
480 break;
481 }
482 }
483
484 free( mscCurMem );
485 free( iscCurMem );
486 free( dscCurMem );
487 free( mscPrevMem );
488 free( iscPrevMem );
489 free( dscPrevMem );
490
491 return results;
492 }
493