1 /* Routines for the P7_OPROFILE structure:
2 * a search profile in an optimized implementation.
3 *
4 * Contents:
5 * 1. The P7_OPROFILE object: allocation, initialization, destruction.
6 * 2. Conversion from generic P7_PROFILE to optimized P7_OPROFILE
7 * 3. Conversion from optimized P7_OPROFILE to compact score arrays
8 * 4. Debugging and development utilities.
9 * 5. Benchmark driver.
10 * 6. Unit tests.
11 * 7. Test driver.
12 * 8. Example.
13 */
14 #include "p7_config.h"
15
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h> /* roundf() */
19
20 #include <xmmintrin.h> /* SSE */
21 #include <emmintrin.h> /* SSE2 */
22
23 #include "easel.h"
24 #include "esl_random.h"
25 #include "esl_sse.h"
26 #include "esl_vectorops.h"
27
28 #include "hmmer.h"
29 #include "impl_sse.h"
30
31 static uint8_t unbiased_byteify(P7_OPROFILE *om, float sc);
32 static uint8_t biased_byteify(P7_OPROFILE *om, float sc);
33 static int16_t wordify(P7_OPROFILE *om, float sc);
34 static int sf_conversion(P7_OPROFILE *om);
35
36 /*****************************************************************
37 * 1. The P7_OPROFILE structure: a score profile.
38 *****************************************************************/
39
40 /* Function: p7_oprofile_Create()
41 * Synopsis: Allocate an optimized profile structure.
42 * Incept: SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
43 *
44 * Purpose: Allocate for profiles of up to <allocM> nodes for digital alphabet <abc>.
45 *
46 * Throws: <NULL> on allocation error.
47 */
48 P7_OPROFILE *
p7_oprofile_Create(int allocM,const ESL_ALPHABET * abc)49 p7_oprofile_Create(int allocM, const ESL_ALPHABET *abc)
50 {
51 int status;
52 P7_OPROFILE *om = NULL;
53 int nqb = p7O_NQB(allocM); /* # of uchar vectors needed for query */
54 int nqw = p7O_NQW(allocM); /* # of sword vectors needed for query */
55 int nqf = p7O_NQF(allocM); /* # of float vectors needed for query */
56 int nqs = nqb + p7O_EXTRA_SB;
57 int x;
58
59 /* level 0 */
60 ESL_ALLOC(om, sizeof(P7_OPROFILE));
61 om->rbv_mem = NULL;
62 om->sbv_mem = NULL;
63 om->rwv_mem = NULL;
64 om->twv_mem = NULL;
65 om->rfv_mem = NULL;
66 om->tfv_mem = NULL;
67 om->rbv = NULL;
68 om->sbv = NULL;
69 om->rwv = NULL;
70 om->twv = NULL;
71 om->rfv = NULL;
72 om->tfv = NULL;
73 om->clone = 0;
74
75 /* level 1 */
76 ESL_ALLOC(om->rbv_mem, sizeof(__m128i) * nqb * abc->Kp +15); /* +15 is for manual 16-byte alignment */
77 ESL_ALLOC(om->sbv_mem, sizeof(__m128i) * nqs * abc->Kp +15);
78 ESL_ALLOC(om->rwv_mem, sizeof(__m128i) * nqw * abc->Kp +15);
79 ESL_ALLOC(om->twv_mem, sizeof(__m128i) * nqw * p7O_NTRANS +15);
80 ESL_ALLOC(om->rfv_mem, sizeof(__m128) * nqf * abc->Kp +15);
81 ESL_ALLOC(om->tfv_mem, sizeof(__m128) * nqf * p7O_NTRANS +15);
82
83 ESL_ALLOC(om->rbv, sizeof(__m128i *) * abc->Kp);
84 ESL_ALLOC(om->sbv, sizeof(__m128i *) * abc->Kp);
85 ESL_ALLOC(om->rwv, sizeof(__m128i *) * abc->Kp);
86 ESL_ALLOC(om->rfv, sizeof(__m128 *) * abc->Kp);
87
88 /* align vector memory on 16-byte boundaries */
89 om->rbv[0] = (__m128i *) (((unsigned long int) om->rbv_mem + 15) & (~0xf));
90 om->sbv[0] = (__m128i *) (((unsigned long int) om->sbv_mem + 15) & (~0xf));
91 om->rwv[0] = (__m128i *) (((unsigned long int) om->rwv_mem + 15) & (~0xf));
92 om->twv = (__m128i *) (((unsigned long int) om->twv_mem + 15) & (~0xf));
93 om->rfv[0] = (__m128 *) (((unsigned long int) om->rfv_mem + 15) & (~0xf));
94 om->tfv = (__m128 *) (((unsigned long int) om->tfv_mem + 15) & (~0xf));
95
96 /* set the rest of the row pointers for match emissions */
97 for (x = 1; x < abc->Kp; x++) {
98 om->rbv[x] = om->rbv[0] + (x * nqb);
99 om->sbv[x] = om->sbv[0] + (x * nqs);
100 om->rwv[x] = om->rwv[0] + (x * nqw);
101 om->rfv[x] = om->rfv[0] + (x * nqf);
102 }
103 om->allocQ16 = nqb;
104 om->allocQ8 = nqw;
105 om->allocQ4 = nqf;
106
107 /* Remaining initializations */
108 om->tbm_b = 0;
109 om->tec_b = 0;
110 om->tjb_b = 0;
111 om->scale_b = 0.0f;
112 om->base_b = 0;
113 om->bias_b = 0;
114
115 om->scale_w = 0.0f;
116 om->base_w = 0;
117 om->ddbound_w = 0;
118 om->ncj_roundoff = 0.0f;
119
120 for (x = 0; x < p7_NOFFSETS; x++) om->offs[x] = -1;
121 for (x = 0; x < p7_NEVPARAM; x++) om->evparam[x] = p7_EVPARAM_UNSET;
122 for (x = 0; x < p7_NCUTOFFS; x++) om->cutoff[x] = p7_CUTOFF_UNSET;
123 for (x = 0; x < p7_MAXABET; x++) om->compo[x] = p7_COMPO_UNSET;
124
125 om->name = NULL;
126 om->acc = NULL;
127 om->desc = NULL;
128
129 /* in a P7_OPROFILE, we always allocate for the optional RF, CS annotation.
130 * we only rely on the leading \0 to signal that it's unused, but
131 * we initialize all this memory to zeros to shut valgrind up about
132 * fwrite'ing uninitialized memory in the io functions.
133 */
134 ESL_ALLOC(om->rf, sizeof(char) * (allocM+2));
135 ESL_ALLOC(om->mm, sizeof(char) * (allocM+2));
136 ESL_ALLOC(om->cs, sizeof(char) * (allocM+2));
137 ESL_ALLOC(om->consensus, sizeof(char) * (allocM+2));
138 memset(om->rf, '\0', sizeof(char) * (allocM+2));
139 memset(om->mm, '\0', sizeof(char) * (allocM+2));
140 memset(om->cs, '\0', sizeof(char) * (allocM+2));
141 memset(om->consensus,'\0', sizeof(char) * (allocM+2));
142
143 om->abc = abc;
144 om->L = 0;
145 om->M = 0;
146 om->max_length = -1;
147 om->allocM = allocM;
148 om->mode = p7_NO_MODE;
149 om->nj = 0.0f;
150 return om;
151
152 ERROR:
153 p7_oprofile_Destroy(om);
154 return NULL;
155 }
156
157 /* Function: p7_oprofile_IsLocal()
158 * Synopsis: Returns TRUE if profile is in local alignment mode.
159 * Incept: SRE, Sat Aug 16 08:46:00 2008 [Janelia]
160 */
161 int
p7_oprofile_IsLocal(const P7_OPROFILE * om)162 p7_oprofile_IsLocal(const P7_OPROFILE *om)
163 {
164 if (om->mode == p7_LOCAL || om->mode == p7_UNILOCAL) return TRUE;
165 return FALSE;
166 }
167
168
169
170 /* Function: p7_oprofile_Destroy()
171 * Synopsis: Frees an optimized profile structure.
172 * Incept: SRE, Sun Nov 25 12:22:21 2007 [Casa de Gatos]
173 */
174 void
p7_oprofile_Destroy(P7_OPROFILE * om)175 p7_oprofile_Destroy(P7_OPROFILE *om)
176 {
177 if (om == NULL) return;
178
179 if (om->clone == 0)
180 {
181 if (om->rbv_mem != NULL) free(om->rbv_mem);
182 if (om->sbv_mem != NULL) free(om->sbv_mem);
183 if (om->rwv_mem != NULL) free(om->rwv_mem);
184 if (om->twv_mem != NULL) free(om->twv_mem);
185 if (om->rfv_mem != NULL) free(om->rfv_mem);
186 if (om->tfv_mem != NULL) free(om->tfv_mem);
187 if (om->rbv != NULL) free(om->rbv);
188 if (om->sbv != NULL) free(om->sbv);
189 if (om->rwv != NULL) free(om->rwv);
190 if (om->rfv != NULL) free(om->rfv);
191 if (om->name != NULL) free(om->name);
192 if (om->acc != NULL) free(om->acc);
193 if (om->desc != NULL) free(om->desc);
194 if (om->rf != NULL) free(om->rf);
195 if (om->mm != NULL) free(om->mm);
196 if (om->cs != NULL) free(om->cs);
197 if (om->consensus != NULL) free(om->consensus);
198 }
199
200 free(om);
201 }
202
203 /* Function: p7_oprofile_Sizeof()
204 * Synopsis: Return the allocated size of a <P7_OPROFILE>.
205 * Incept: SRE, Wed Mar 2 10:09:21 2011 [Janelia]
206 *
207 * Purpose: Returns the allocated size of a <P7_OPROFILE>,
208 * in bytes.
209 */
210 size_t
p7_oprofile_Sizeof(P7_OPROFILE * om)211 p7_oprofile_Sizeof(P7_OPROFILE *om)
212 {
213 size_t n = 0;
214 int nqb = om->allocQ16; /* # of uchar vectors needed for query */
215 int nqw = om->allocQ8; /* # of sword vectors needed for query */
216 int nqf = om->allocQ4; /* # of float vectors needed for query */
217 int nqs = nqb + p7O_EXTRA_SB;
218
219 /* Stuff below exactly mirrors the malloc()'s in
220 * p7_oprofile_Create(); so even though we could
221 * write this more compactly, leave it like this
222 * w/ one:one correspondence to _Create(), for
223 * maintainability and clarity.
224 */
225 n += sizeof(P7_OPROFILE);
226 n += sizeof(__m128i) * nqb * om->abc->Kp +15; /* om->rbv_mem */
227 n += sizeof(__m128i) * nqs * om->abc->Kp +15; /* om->sbv_mem */
228 n += sizeof(__m128i) * nqw * om->abc->Kp +15; /* om->rwv_mem */
229 n += sizeof(__m128i) * nqw * p7O_NTRANS +15; /* om->twv_mem */
230 n += sizeof(__m128) * nqf * om->abc->Kp +15; /* om->rfv_mem */
231 n += sizeof(__m128) * nqf * p7O_NTRANS +15; /* om->tfv_mem */
232
233 n += sizeof(__m128i *) * om->abc->Kp; /* om->rbv */
234 n += sizeof(__m128i *) * om->abc->Kp; /* om->sbv */
235 n += sizeof(__m128i *) * om->abc->Kp; /* om->rwv */
236 n += sizeof(__m128 *) * om->abc->Kp; /* om->rfv */
237
238 n += sizeof(char) * (om->allocM+2); /* om->rf */
239 n += sizeof(char) * (om->allocM+2); /* om->mm */
240 n += sizeof(char) * (om->allocM+2); /* om->cs */
241 n += sizeof(char) * (om->allocM+2); /* om->consensus */
242
243 return n;
244 }
245
246
247 /* TODO: this is not following the _Copy interface guidelines; it's a _Clone */
248 /* TODO: its documentation header is a cut/paste of _Create; FIXME */
249 /* Function: p7_oprofile_Copy()
250 * Synopsis: Allocate an optimized profile structure.
251 * Incept: SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
252 *
253 * Purpose: Allocate for profiles of up to <allocM> nodes for digital alphabet <abc>.
254 *
255 * Throws: <NULL> on allocation error.
256 */
257 P7_OPROFILE *
p7_oprofile_Copy(P7_OPROFILE * om1)258 p7_oprofile_Copy(P7_OPROFILE *om1)
259 {
260 int x, y;
261 int status;
262
263 int nqb = p7O_NQB(om1->allocM); /* # of uchar vectors needed for query */
264 int nqw = p7O_NQW(om1->allocM); /* # of sword vectors needed for query */
265 int nqf = p7O_NQF(om1->allocM); /* # of float vectors needed for query */
266 int nqs = nqb + p7O_EXTRA_SB;
267
268 size_t size = sizeof(char) * (om1->allocM+2);
269
270 P7_OPROFILE *om2 = NULL;
271
272 const ESL_ALPHABET *abc = om1->abc;
273
274 /* level 0 */
275 ESL_ALLOC(om2, sizeof(P7_OPROFILE));
276 om2->rbv_mem = NULL;
277 om2->sbv_mem = NULL;
278 om2->rwv_mem = NULL;
279 om2->twv_mem = NULL;
280 om2->rfv_mem = NULL;
281 om2->tfv_mem = NULL;
282 om2->rbv = NULL;
283 om2->sbv = NULL;
284 om2->rwv = NULL;
285 om2->twv = NULL;
286 om2->rfv = NULL;
287 om2->tfv = NULL;
288
289 /* level 1 */
290 ESL_ALLOC(om2->rbv_mem, sizeof(__m128i) * nqb * abc->Kp +15); /* +15 is for manual 16-byte alignment */
291 ESL_ALLOC(om2->sbv_mem, sizeof(__m128i) * nqs * abc->Kp +15);
292 ESL_ALLOC(om2->rwv_mem, sizeof(__m128i) * nqw * abc->Kp +15);
293 ESL_ALLOC(om2->twv_mem, sizeof(__m128i) * nqw * p7O_NTRANS +15);
294 ESL_ALLOC(om2->rfv_mem, sizeof(__m128) * nqf * abc->Kp +15);
295 ESL_ALLOC(om2->tfv_mem, sizeof(__m128) * nqf * p7O_NTRANS +15);
296
297 ESL_ALLOC(om2->rbv, sizeof(__m128i *) * abc->Kp);
298 ESL_ALLOC(om2->sbv, sizeof(__m128i *) * abc->Kp);
299 ESL_ALLOC(om2->rwv, sizeof(__m128i *) * abc->Kp);
300 ESL_ALLOC(om2->rfv, sizeof(__m128 *) * abc->Kp);
301
302 /* align vector memory on 16-byte boundaries */
303 om2->rbv[0] = (__m128i *) (((unsigned long int) om2->rbv_mem + 15) & (~0xf));
304 om2->sbv[0] = (__m128i *) (((unsigned long int) om2->sbv_mem + 15) & (~0xf));
305 om2->rwv[0] = (__m128i *) (((unsigned long int) om2->rwv_mem + 15) & (~0xf));
306 om2->twv = (__m128i *) (((unsigned long int) om2->twv_mem + 15) & (~0xf));
307 om2->rfv[0] = (__m128 *) (((unsigned long int) om2->rfv_mem + 15) & (~0xf));
308 om2->tfv = (__m128 *) (((unsigned long int) om2->tfv_mem + 15) & (~0xf));
309
310 /* copy the vector data */
311 memcpy(om2->rbv[0], om1->rbv[0], sizeof(__m128i) * nqb * abc->Kp);
312 memcpy(om2->sbv[0], om1->sbv[0], sizeof(__m128i) * nqs * abc->Kp);
313 memcpy(om2->rwv[0], om1->rwv[0], sizeof(__m128i) * nqw * abc->Kp);
314 memcpy(om2->rfv[0], om1->rfv[0], sizeof(__m128i) * nqf * abc->Kp);
315
316 /* set the rest of the row pointers for match emissions */
317 for (x = 1; x < abc->Kp; x++) {
318 om2->rbv[x] = om2->rbv[0] + (x * nqb);
319 om2->sbv[x] = om2->sbv[0] + (x * nqs);
320 om2->rwv[x] = om2->rwv[0] + (x * nqw);
321 om2->rfv[x] = om2->rfv[0] + (x * nqf);
322 }
323 om2->allocQ16 = nqb;
324 om2->allocQ8 = nqw;
325 om2->allocQ4 = nqf;
326
327 /* Remaining initializations */
328 om2->tbm_b = om1->tbm_b;
329 om2->tec_b = om1->tec_b;
330 om2->tjb_b = om1->tjb_b;
331 om2->scale_b = om1->scale_b;
332 om2->base_b = om1->base_b;
333 om2->bias_b = om1->bias_b;
334
335 om2->scale_w = om1->scale_w;
336 om2->base_w = om1->base_w;
337 om2->ddbound_w = om1->ddbound_w;
338 om2->ncj_roundoff = om1->ncj_roundoff;
339
340 for (x = 0; x < p7_NOFFSETS; x++) om2->offs[x] = om1->offs[x];
341 for (x = 0; x < p7_NEVPARAM; x++) om2->evparam[x] = om1->evparam[x];
342 for (x = 0; x < p7_NCUTOFFS; x++) om2->cutoff[x] = om1->cutoff[x];
343 for (x = 0; x < p7_MAXABET; x++) om2->compo[x] = om1->compo[x];
344
345 for (x = 0; x < nqw * p7O_NTRANS; ++x) om2->twv[x] = om1->twv[x];
346 for (x = 0; x < nqf * p7O_NTRANS; ++x) om2->tfv[x] = om1->tfv[x];
347
348 for (x = 0; x < p7O_NXSTATES; x++)
349 for (y = 0; y < p7O_NXTRANS; y++)
350 {
351 om2->xw[x][y] = om1->xw[x][y];
352 om2->xf[x][y] = om1->xf[x][y];
353 }
354
355 if ((status = esl_strdup(om1->name, -1, &om2->name)) != eslOK) goto ERROR;
356 if ((status = esl_strdup(om1->acc, -1, &om2->acc)) != eslOK) goto ERROR;
357 if ((status = esl_strdup(om1->desc, -1, &om2->desc)) != eslOK) goto ERROR;
358
359 /* in a P7_OPROFILE, we always allocate for the optional RF, CS annotation.
360 * we only rely on the leading \0 to signal that it's unused, but
361 * we initialize all this memory to zeros to shut valgrind up about
362 * fwrite'ing uninitialized memory in the io functions.
363 */
364 ESL_ALLOC(om2->rf, size);
365 ESL_ALLOC(om2->mm, size);
366 ESL_ALLOC(om2->cs, size);
367 ESL_ALLOC(om2->consensus, size);
368
369 memcpy(om2->rf, om1->rf, size);
370 memcpy(om2->mm, om1->mm, size);
371 memcpy(om2->cs, om1->cs, size);
372 memcpy(om2->consensus, om1->consensus, size);
373
374 om2->abc = om1->abc;
375 om2->L = om1->L;
376 om2->M = om1->M;
377 om2->allocM = om1->allocM;
378 om2->mode = om1->mode;
379 om2->nj = om1->nj;
380 om2->max_length = om1->max_length;
381
382 om2->clone = om1->clone;
383
384 return om2;
385
386 ERROR:
387 p7_oprofile_Destroy(om2);
388 return NULL;
389 }
390
391 /* Function: p7_oprofile_Clone()
392 * Synopsis: Allocate a cloned copy of the optimized profile structure. All
393 * allocated memory from the original profile is not reallocated.
394 * The cloned copy will point to the same memory as the original.
395 * Incept: SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
396 *
397 * Purpose: Quick copy of an optimized profile used in mutiple threads.
398 *
399 * Throws: <NULL> on allocation error.
400 */
401 P7_OPROFILE *
p7_oprofile_Clone(const P7_OPROFILE * om1)402 p7_oprofile_Clone(const P7_OPROFILE *om1)
403 {
404 int status;
405
406 P7_OPROFILE *om2 = NULL;
407
408 ESL_ALLOC(om2, sizeof(P7_OPROFILE));
409 memcpy(om2, om1, sizeof(P7_OPROFILE));
410
411 om2->clone = 1;
412
413 return om2;
414
415 ERROR:
416 p7_oprofile_Destroy(om2);
417 return NULL;
418 }
419
420
421 /* Function: p7_oprofile_UpdateFwdEmissionScores()
422 * Synopsis: Update the Forward/Backward part of the optimized profile
423 * match emissions to account for new background distribution.
424 *
425 * Purpose: This implementation re-orders the loops used to access/modify
426 * the rfv array relative to how it's accessed for example in
427 * fb_conversion(), to minimize the required size of sc_arr.
428 *
429 * Args: om - optimized profile to be updated.
430 * bg - the new bg distribution
431 * fwd_emissions - precomputed Fwd (float) residue emission
432 * probabilities in serial order (gathered from
433 * the optimized striped <om> with
434 * p7_oprofile_GetFwdEmissionArray() ).
435 * sc_arr Preallocated array of at least Kp*4 floats
436 */
437 int
p7_oprofile_UpdateFwdEmissionScores(P7_OPROFILE * om,P7_BG * bg,float * fwd_emissions,float * sc_arr)438 p7_oprofile_UpdateFwdEmissionScores(P7_OPROFILE *om, P7_BG *bg, float *fwd_emissions, float *sc_arr)
439 {
440 int M = om->M; /* length of the query */
441 int k, q, x, z;
442 int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
443 int K = om->abc->K;
444 int Kp = om->abc->Kp;
445 union { __m128 v; float x[4]; } tmp; /* used to align and load simd minivectors */
446
447
448 for (k = 1, q = 0; q < nq; q++, k++) {
449
450 //First compute the core characters of the alphabet
451 for (x = 0; x < K; x++) {
452 for (z = 0; z < 4; z++) {
453 if (k+ z*nq <= M) sc_arr[z*Kp + x] = (om->mm && om->mm[(k+z*nq)]=='m') ? 0 : log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]);
454 else sc_arr[z*Kp + x] = -eslINFINITY;
455
456 tmp.x[z] = sc_arr[z*Kp + x];
457 }
458 om->rfv[x][q] = esl_sse_expf(tmp.v);
459
460 }
461
462 /* gap, nonresidue, and missing data residue codes don't get set by FExpectScVec(),
463 * so do them
464 */
465 for (z = 0; z < 4; z++)
466 {
467 sc_arr[z*Kp + K] = -eslINFINITY; /* gap char - */
468 sc_arr[z*Kp + (Kp - 2)] = -eslINFINITY; /* nonresidue * */
469 sc_arr[z*Kp + (Kp - 1)] = -eslINFINITY; /* missing data ~ */
470 }
471
472 // Then compute corresponding scores for ambiguity codes.
473 for (z = 0; z < 4; z++)
474 esl_abc_FExpectScVec(om->abc, sc_arr+(z*Kp), bg->f);
475
476 //finish off the interleaved values
477 for (x = K; x < Kp; x++) {
478 for (z = 0; z < 4; z++)
479 tmp.x[z] = sc_arr[z*Kp + x]; // computed in FExpectScVec call above
480 om->rfv[x][q] = esl_sse_expf(tmp.v);
481 }
482 }
483
484 return eslOK;
485 }
486
487
488 /* Function: p7_oprofile_UpdateVitEmissionScores()
489 * Synopsis: Update the Viterbi part of the optimized profile match
490 * emissions to account for new background distribution.
491 *.
492 * Purpose: This implementation re-orders the loops used to access/modify
493 * the rmv array relative to how it's accessed for example in
494 * vf_conversion(), to minimize the required size of sc_arr.
495 *
496 * Args: om - optimized profile to be updated.
497 * bg - the new bg distribution
498 * fwd_emissions - precomputed Fwd (float) residue emission
499 * probabilities in serial order (gathered from
500 * the optimized striped <om> with
501 * p7_oprofile_GetFwdEmissionArray() ).
502 * sc_arr Preallocated array of at least Kp*8 floats
503 */
504 int
p7_oprofile_UpdateVitEmissionScores(P7_OPROFILE * om,P7_BG * bg,float * fwd_emissions,float * sc_arr)505 p7_oprofile_UpdateVitEmissionScores(P7_OPROFILE *om, P7_BG *bg, float *fwd_emissions, float *sc_arr)
506 {
507 int M = om->M; /* length of the query */
508 int k, q, x, z;
509 int nq = p7O_NQW(M); /* segment length; total # of striped vectors needed */
510 int K = om->abc->K;
511 int Kp = om->abc->Kp;
512 int idx;
513 union { __m128i v; int16_t i[8]; } tmp; /* used to align and load simd minivectors */
514
515 for (k = 1, q = 0; q < nq; q++, k++) {
516
517 //First compute the core characters of the alphabet
518 for (x = 0; x < K; x++) {
519 for (z = 0; z < 8; z++) {
520 idx = z*Kp + x;
521 if (k+ z*nq <= M) {
522 sc_arr[idx] = (om->mm && om->mm[(k+z*nq)]=='m') ? 0 : log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]);
523 tmp.i[z] = wordify(om, sc_arr[idx]);
524 }
525 else
526 {
527 sc_arr[idx] = -eslINFINITY;
528 tmp.i[z] = -32768;
529 }
530
531 }
532 om->rwv[x][q] = tmp.v;
533
534 }
535
536 // Then compute corresponding scores for ambiguity codes.
537 for (z = 0; z < 8; z++)
538 esl_abc_FExpectScVec(om->abc, sc_arr+(z*Kp), bg->f);
539
540 //finish off the interleaved values
541 for (x = K; x < Kp; x++) {
542 for (z = 0; z < 8; z++) {
543 idx = z*Kp + x;
544 if (x==K || x>Kp-3 || sc_arr[idx] == -eslINFINITY)
545 tmp.i[z] = -32768;
546 else
547 tmp.i[z] = wordify(om, sc_arr[idx]);
548 }
549 om->rwv[x][q] = tmp.v;
550 }
551 }
552
553 return eslOK;
554 }
555
556
557
558 /* Function: p7_oprofile_UpdateMSVEmissionScores()
559 * Synopsis: Update the MSV part of the optimized profile match
560 * emissions to account for new background distribution.
561 *.
562 * Purpose: This implementation re-orders the loops used to access/modify
563 * the rbv array relative to how it's accessed for example in
564 * mf_conversion(), to minimize the required size of sc_arr.
565 *
566 * Args: om - optimized profile to be updated.
567 * bg - the new bg distribution
568 * fwd_emissions - precomputed Fwd (float) residue emission
569 * probabilities in serial order (gathered from
570 * the optimized striped <om> with
571 * p7_oprofile_GetFwdEmissionArray() ).
572 * sc_arr Preallocated array of at least Kp*16 floats
573 */
574 int
p7_oprofile_UpdateMSVEmissionScores(P7_OPROFILE * om,P7_BG * bg,float * fwd_emissions,float * sc_arr)575 p7_oprofile_UpdateMSVEmissionScores(P7_OPROFILE *om, P7_BG *bg, float *fwd_emissions, float *sc_arr)
576 {
577 int M = om->M; /* length of the query */
578 int k, q, x, z;
579 int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
580 int K = om->abc->K;
581 int Kp = om->abc->Kp;
582 int idx;
583 float max = 0.0; /* maximum residue score: used for unsigned emission score bias */
584 union { __m128i v; uint8_t i[16]; } tmp; /* used to align and load simd minivectors */
585
586
587 /* First we determine the basis for the limited-precision MSVFilter scoring system.
588 * Default: 1/3 bit units, base offset 190: range 0..255 => -190..65 => -63.3..21.7 bits
589 * See J2/66, J4/138 for analysis.
590 * This depends on having computed scores. I do this in a first pass, to get the max
591 * score ... then re-compute those scores so they can be converted to 8bit scores
592 */
593 for (k = 1, q = 0; q < nq; q++, k++) {
594 for (x = 0; x < K; x++) {
595 for (z = 0; z < 16; z++) {
596 idx = z*Kp + x;
597 if (k+ z*nq <= M && !(om->mm && om->mm[(k+z*nq)]=='m'))
598 max = ESL_MAX(max, log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]));
599 }
600 }
601 }
602 om->scale_b = 3.0 / eslCONST_LOG2; /* scores in units of third-bits */
603 om->base_b = 190;
604 om->bias_b = unbiased_byteify(om, -1.0 * max);
605
606 for (k = 1, q = 0; q < nq; q++, k++) {
607
608 //First compute the core characters of the alphabet
609 for (x = 0; x < K; x++) {
610 for (z = 0; z < 16; z++) {
611 idx = z*Kp + x;
612 if (k+ z*nq <= M) {
613 sc_arr[idx] = (om->mm && om->mm[(k+z*nq)]=='m') ? 0 : log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]);
614 tmp.i[z] = biased_byteify(om, sc_arr[idx]);
615 }
616 else
617 {
618 sc_arr[idx] = -eslINFINITY;
619 tmp.i[z] = 255;
620 }
621
622 }
623 om->rbv[x][q] = tmp.v;
624
625 }
626
627 // Then compute corresponding scores for ambiguity codes.
628 for (z = 0; z < 16; z++)
629 esl_abc_FExpectScVec(om->abc, sc_arr+(z*Kp), bg->f);
630
631 //finish off the interleaved values
632 for (x = K; x < Kp; x++) {
633 for (z = 0; z < 16; z++) {
634 idx = z*Kp + x;
635 if (x==K || x>Kp-3 || sc_arr[idx] == -eslINFINITY)
636 tmp.i[z] = 255;
637 else
638 tmp.i[z] = biased_byteify(om, sc_arr[idx]);
639 }
640 om->rbv[x][q] = tmp.v;
641 }
642 }
643
644 sf_conversion(om);
645
646 return eslOK;
647 }
648
649
650 /*----------------- end, P7_OPROFILE structure ------------------*/
651
652
653
654 /*****************************************************************
655 * 2. Conversion from generic P7_PROFILE to optimized P7_OPROFILE
656 *****************************************************************/
657
658 /* biased_byteify()
659 * Converts original log-odds residue score to a rounded biased uchar cost.
660 * Match emission scores for MSVFilter get this treatment.
661 * e.g. a score of +3.2, with scale 3.0 and bias 12, becomes 2.
662 * 3.2*3 = 9.6; rounded = 10; bias-10 = 2.
663 * When used, we add the bias, then subtract this cost.
664 * (A cost of +255 is our -infinity "prohibited event")
665 */
666 static uint8_t
biased_byteify(P7_OPROFILE * om,float sc)667 biased_byteify(P7_OPROFILE *om, float sc)
668 {
669 uint8_t b;
670
671 sc = -1.0f * roundf(om->scale_b * sc); /* ugh. sc is now an integer cost represented in a float... */
672 b = (sc > 255 - om->bias_b) ? 255 : (uint8_t) sc + om->bias_b; /* and now we cast, saturate, and bias it to an unsigned char cost... */
673 return b;
674 }
675
676 /* unbiased_byteify()
677 * Convert original transition score to a rounded uchar cost
678 * Transition scores for MSVFilter get this treatment.
679 * e.g. a score of -2.1, with scale 3.0, becomes a cost of 6.
680 * (A cost of +255 is our -infinity "prohibited event")
681 */
682 static uint8_t
unbiased_byteify(P7_OPROFILE * om,float sc)683 unbiased_byteify(P7_OPROFILE *om, float sc)
684 {
685 uint8_t b;
686
687 sc = -1.0f * roundf(om->scale_b * sc); /* ugh. sc is now an integer cost represented in a float... */
688 b = (sc > 255.) ? 255 : (uint8_t) sc; /* and now we cast and saturate it to an unsigned char cost... */
689 return b;
690 }
691
692 /* wordify()
693 * Converts log probability score to a rounded signed 16-bit integer cost.
694 * Both emissions and transitions for ViterbiFilter get this treatment.
695 * No bias term needed, because we use signed words.
696 * e.g. a score of +3.2, with scale 500.0, becomes +1600.
697 */
698 static int16_t
wordify(P7_OPROFILE * om,float sc)699 wordify(P7_OPROFILE *om, float sc)
700 {
701 sc = roundf(om->scale_w * sc);
702 if (sc >= 32767.0) return 32767;
703 else if (sc <= -32768.0) return -32768;
704 else return (int16_t) sc;
705 }
706
707
708 /* sf_conversion():
709 * Author: Bjarne Knudsen
710 *
711 * Generates the SSVFilter() parts of the profile <om> scores
712 * from the completed MSV score. This includes calculating
713 * special versions of the match scores for using the the
714 * ssv filter.
715 *
716 * Returns: <eslOK> on success.
717 *
718 * Throws: (no abnormal error conditions)
719 */
720 static int
sf_conversion(P7_OPROFILE * om)721 sf_conversion(P7_OPROFILE *om)
722 {
723 int M = om->M; /* length of the query */
724 int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
725 int x; /* counter over residues */
726 int q; /* q counts over total # of striped vectors, 0..nq-1 */
727 __m128i tmp;
728 __m128i tmp2;
729
730 /* We now want to fill out om->sbv with om->rbv - bias for use in the
731 * SSV filter. The only challenge is that the om->rbv values are
732 * unsigned and generally use the whole scale while the om->sbv
733 * values are signed. To solve that problem we perform the following
734 * calculation:
735 *
736 * ((127 + bias) - rbv) ^ 127
737 *
738 * where the subtraction is unsigned saturated and the addition is
739 * unsigned (it will not overflow, since bias is a small positive
740 * number). The f(x) = x ^ 127 combined with a change from unsigned
741 * to signed numbers have the same effect as f(x) = -x + 127. So if
742 * we regard the above as signed instead of unsigned it is equal to:
743 *
744 * -((127 + bias) - rbv) + 127 = rbv - bias
745 *
746 * which is what we want. The reason for this slightly complex idea
747 * is that we wish the transformation to be fast, especially for
748 * hmmscan where many models are loaded.
749 */
750
751 tmp = _mm_set1_epi8((int8_t) (om->bias_b + 127));
752 tmp2 = _mm_set1_epi8(127);
753
754 for (x = 0; x < om->abc->Kp; x++)
755 {
756 for (q = 0; q < nq; q++) om->sbv[x][q] = _mm_xor_si128(_mm_subs_epu8(tmp, om->rbv[x][q]), tmp2);
757 for (q = nq; q < nq + p7O_EXTRA_SB; q++) om->sbv[x][q] = om->sbv[x][q % nq];
758 }
759
760 return eslOK;
761 }
762
763 /* mf_conversion():
764 *
765 * This builds the MSVFilter() parts of the profile <om>, scores
766 * in lspace uchars (16-way parallel), by rescaling, rounding, and
767 * casting the scores in <gm>.
768 *
769 * Returns <eslOK> on success;
770 * throws <eslEINVAL> if <om> hasn't been allocated properly.
771 */
772 static int
mf_conversion(const P7_PROFILE * gm,P7_OPROFILE * om)773 mf_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
774 {
775 int M = gm->M; /* length of the query */
776 int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
777 float max = 0.0; /* maximum residue score: used for unsigned emission score bias */
778 int x; /* counter over residues */
779 int q; /* q counts over total # of striped vectors, 0..nq-1 */
780 int k; /* the usual counter over model nodes 1..M */
781 int z; /* counter within elements of one SIMD minivector */
782 union { __m128i v; uint8_t i[16]; } tmp; /* used to align and load simd minivectors */
783
784 if (nq > om->allocQ16) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
785
786 /* First we determine the basis for the limited-precision MSVFilter scoring system.
787 * Default: 1/3 bit units, base offset 190: range 0..255 => -190..65 => -63.3..21.7 bits
788 * See J2/66, J4/138 for analysis.
789 */
790 for (x = 0; x < gm->abc->K; x++) max = ESL_MAX(max, esl_vec_FMax(gm->rsc[x], (M+1)*2));
791 om->scale_b = 3.0 / eslCONST_LOG2; /* scores in units of third-bits */
792 om->base_b = 190;
793 om->bias_b = unbiased_byteify(om, -1.0 * max);
794
795 /* striped match costs: start at k=1. */
796 for (x = 0; x < gm->abc->Kp; x++)
797 {
798 for (q = 0, k = 1; q < nq; q++, k++)
799 {
800 for (z = 0; z < 16; z++) tmp.i[z] = ((k+ z*nq <= M) ? biased_byteify(om, p7P_MSC(gm, k+z*nq, x)) : 255);
801 om->rbv[x][q] = tmp.v;
802 }
803 }
804
805 /* transition costs */
806 om->tbm_b = unbiased_byteify(om, logf(2.0f / ((float) gm->M * (float) (gm->M+1)))); /* constant B->Mk penalty */
807 om->tec_b = unbiased_byteify(om, logf(0.5f)); /* constant multihit E->C = E->J */
808 om->tjb_b = unbiased_byteify(om, logf(3.0f / (float) (gm->L+3))); /* this adopts the L setting of the parent profile */
809
810 sf_conversion(om);
811
812 return eslOK;
813 }
814
815
816 /* vf_conversion():
817 *
818 * This builds the ViterbiFilter() parts of the profile <om>, scores
819 * in lspace swords (8-way parallel), by rescaling, rounding, and
820 * casting the scores in <gm>.
821 *
822 * Returns <eslOK> on success;
823 * throws <eslEINVAL> if <om> hasn't been allocated properly.
824 */
825 static int
vf_conversion(const P7_PROFILE * gm,P7_OPROFILE * om)826 vf_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
827 {
828 int M = gm->M; /* length of the query */
829 int nq = p7O_NQW(M); /* segment length; total # of striped vectors needed */
830 int x; /* counter over residues */
831 int q; /* q counts over total # of striped vectors, 0..nq-1 */
832 int k; /* the usual counter over model nodes 1..M */
833 int kb; /* possibly offset base k for loading om's TSC vectors */
834 int z; /* counter within elements of one SIMD minivector */
835 int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
836 int tg; /* transition index in gm */
837 int j; /* counter in interleaved vector arrays in the profile */
838 int ddtmp; /* used in finding worst DD transition bound */
839 int16_t maxval; /* used to prevent zero cost II */
840 int16_t val;
841 union { __m128i v; int16_t i[8]; } tmp; /* used to align and load simd minivectors */
842
843 if (nq > om->allocQ8) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
844
845 /* First set the basis for the limited-precision scoring system.
846 * Default: 1/500 bit units, base offset 12000: range -32768..32767 => -44768..20767 => -89.54..41.53 bits
847 * See J4/138 for analysis.
848 */
849 om->scale_w = 500.0 / eslCONST_LOG2;
850 om->base_w = 12000;
851
852 /* striped match scores */
853 for (x = 0; x < gm->abc->Kp; x++)
854 for (k = 1, q = 0; q < nq; q++, k++)
855 {
856 for (z = 0; z < 8; z++) tmp.i[z] = ((k+ z*nq <= M) ? wordify(om, p7P_MSC(gm, k+z*nq, x)) : -32768);
857 om->rwv[x][q] = tmp.v;
858 }
859
860 /* Transition costs, all but the DD's. */
861 for (j = 0, k = 1, q = 0; q < nq; q++, k++)
862 {
863 for (t = p7O_BM; t <= p7O_II; t++) /* this loop of 7 transitions depends on the order in p7o_tsc_e */
864 {
865 switch (t) {
866 case p7O_BM: tg = p7P_BM; kb = k-1; maxval = 0; break; /* gm has tBMk stored off by one! start from k=0 not 1 */
867 case p7O_MM: tg = p7P_MM; kb = k-1; maxval = 0; break; /* MM, DM, IM vectors are rotated by -1, start from k=0 */
868 case p7O_IM: tg = p7P_IM; kb = k-1; maxval = 0; break;
869 case p7O_DM: tg = p7P_DM; kb = k-1; maxval = 0; break;
870 case p7O_MD: tg = p7P_MD; kb = k; maxval = 0; break; /* the remaining ones are straight up */
871 case p7O_MI: tg = p7P_MI; kb = k; maxval = 0; break;
872 case p7O_II: tg = p7P_II; kb = k; maxval = -1; break;
873 }
874
875 for (z = 0; z < 8; z++) {
876 val = ((kb+ z*nq < M) ? wordify(om, p7P_TSC(gm, kb+ z*nq, tg)) : -32768);
877 tmp.i[z] = (val <= maxval) ? val : maxval; /* do not allow an II transition cost of 0, or hell may occur. */
878 }
879 om->twv[j++] = tmp.v;
880 }
881 }
882
883 /* Finally the DD's, which are at the end of the optimized tsc vector; (j is already sitting there) */
884 for (k = 1, q = 0; q < nq; q++, k++)
885 {
886 for (z = 0; z < 8; z++) tmp.i[z] = ((k+ z*nq < M) ? wordify(om, p7P_TSC(gm, k+ z*nq, p7P_DD)) : -32768);
887 om->twv[j++] = tmp.v;
888 }
889
890 /* Specials. (Actually in same order in om and gm, but we copy in general form anyway.) */
891 /* VF CC,NN,JJ transitions hardcoded zero; -3.0 nat approximation used instead; this papers
892 * over a length independence problem, where the approximation weirdly outperforms the
893 * exact solution, probably indicating that the model's Pascal distribution is problematic,
894 * and the "approximation" is in fact closer to the One True Model, the mythic H4 supermodel.
895 * [xref J5/36]
896 */
897 om->xw[p7O_E][p7O_LOOP] = wordify(om, gm->xsc[p7P_E][p7P_LOOP]);
898 om->xw[p7O_E][p7O_MOVE] = wordify(om, gm->xsc[p7P_E][p7P_MOVE]);
899 om->xw[p7O_N][p7O_MOVE] = wordify(om, gm->xsc[p7P_N][p7P_MOVE]);
900 om->xw[p7O_N][p7O_LOOP] = 0; /* was wordify(om, gm->xsc[p7P_N][p7P_LOOP]); */
901 om->xw[p7O_C][p7O_MOVE] = wordify(om, gm->xsc[p7P_C][p7P_MOVE]);
902 om->xw[p7O_C][p7O_LOOP] = 0; /* was wordify(om, gm->xsc[p7P_C][p7P_LOOP]); */
903 om->xw[p7O_J][p7O_MOVE] = wordify(om, gm->xsc[p7P_J][p7P_MOVE]);
904 om->xw[p7O_J][p7O_LOOP] = 0; /* was wordify(om, gm->xsc[p7P_J][p7P_LOOP]); */
905
906 om->ncj_roundoff = 0.0; /* goes along with NN=CC=JJ=0, -3.0 nat approximation */
907 /* otherwise, would be = om->scale_w * gm->xsc[p7P_N][p7P_LOOP] - om->xw[p7O_N][p7O_LOOP]; */
908 /* see J4/150 for discussion of VF error suppression, superceded by the -3.0 nat approximation */
909
910 /* Transition score bound for "lazy F" DD path evaluation (xref J2/52) */
911 om->ddbound_w = -32768;
912 for (k = 2; k < M-1; k++)
913 {
914 ddtmp = (int) wordify(om, p7P_TSC(gm, k, p7P_DD));
915 ddtmp += (int) wordify(om, p7P_TSC(gm, k+1, p7P_DM));
916 ddtmp -= (int) wordify(om, p7P_TSC(gm, k+1, p7P_BM));
917 om->ddbound_w = ESL_MAX(om->ddbound_w, ddtmp);
918 }
919
920 return eslOK;
921 }
922
923
924 /* fb_conversion()
925 * This builds the Forward/Backward part of the optimized profile <om>,
926 * where we use odds ratios (not log-odds scores).
927 */
928 static int
fb_conversion(const P7_PROFILE * gm,P7_OPROFILE * om)929 fb_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
930 {
931 int M = gm->M; /* length of the query */
932 int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
933 int x; /* counter over residues */
934 int q; /* q counts over total # of striped vectors, 0..nq-1 */
935 int k; /* the usual counter over model nodes 1..M */
936 int kb; /* possibly offset base k for loading om's TSC vectors */
937 int z; /* counter within elements of one SIMD minivector */
938 int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
939 int tg; /* transition index in gm */
940 int j; /* counter in interleaved vector arrays in the profile */
941 union { __m128 v; float x[4]; } tmp; /* used to align and load simd minivectors */
942
943 if (nq > om->allocQ4) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
944
945 /* striped match scores: start at k=1 */
946 for (x = 0; x < gm->abc->Kp; x++)
947 for (k = 1, q = 0; q < nq; q++, k++)
948 {
949 for (z = 0; z < 4; z++) tmp.x[z] = (k+ z*nq <= M) ? p7P_MSC(gm, k+z*nq, x) : -eslINFINITY;
950 om->rfv[x][q] = esl_sse_expf(tmp.v);
951 }
952
953
954 /* Transition scores, all but the DD's. */
955 for (j = 0, k = 1, q = 0; q < nq; q++, k++)
956 {
957 for (t = p7O_BM; t <= p7O_II; t++) /* this loop of 7 transitions depends on the order in the definition of p7o_tsc_e */
958 {
959 switch (t) {
960 case p7O_BM: tg = p7P_BM; kb = k-1; break; /* gm has tBMk stored off by one! start from k=0 not 1 */
961 case p7O_MM: tg = p7P_MM; kb = k-1; break; /* MM, DM, IM quads are rotated by -1, start from k=0 */
962 case p7O_IM: tg = p7P_IM; kb = k-1; break;
963 case p7O_DM: tg = p7P_DM; kb = k-1; break;
964 case p7O_MD: tg = p7P_MD; kb = k; break; /* the remaining ones are straight up */
965 case p7O_MI: tg = p7P_MI; kb = k; break;
966 case p7O_II: tg = p7P_II; kb = k; break;
967 }
968
969 for (z = 0; z < 4; z++) tmp.x[z] = (kb+z*nq < M) ? p7P_TSC(gm, kb+z*nq, tg) : -eslINFINITY;
970 om->tfv[j++] = esl_sse_expf(tmp.v);
971 }
972 }
973
974 /* And finally the DD's, which are at the end of the optimized tfv vector; (j is already there) */
975 for (k = 1, q = 0; q < nq; q++, k++)
976 {
977 for (z = 0; z < 4; z++) tmp.x[z] = (k+z*nq < M) ? p7P_TSC(gm, k+z*nq, p7P_DD) : -eslINFINITY;
978 om->tfv[j++] = esl_sse_expf(tmp.v);
979 }
980
981 /* Specials. (These are actually in exactly the same order in om and
982 * gm, but we copy in general form anyway.)
983 */
984 om->xf[p7O_E][p7O_LOOP] = expf(gm->xsc[p7P_E][p7P_LOOP]);
985 om->xf[p7O_E][p7O_MOVE] = expf(gm->xsc[p7P_E][p7P_MOVE]);
986 om->xf[p7O_N][p7O_LOOP] = expf(gm->xsc[p7P_N][p7P_LOOP]);
987 om->xf[p7O_N][p7O_MOVE] = expf(gm->xsc[p7P_N][p7P_MOVE]);
988 om->xf[p7O_C][p7O_LOOP] = expf(gm->xsc[p7P_C][p7P_LOOP]);
989 om->xf[p7O_C][p7O_MOVE] = expf(gm->xsc[p7P_C][p7P_MOVE]);
990 om->xf[p7O_J][p7O_LOOP] = expf(gm->xsc[p7P_J][p7P_LOOP]);
991 om->xf[p7O_J][p7O_MOVE] = expf(gm->xsc[p7P_J][p7P_MOVE]);
992
993 return eslOK;
994 }
995
996
997 /* Function: p7_oprofile_Convert()
998 * Synopsis: Converts standard profile to an optimized one.
999 * Incept: SRE, Mon Nov 26 07:38:57 2007 [Janelia]
1000 *
1001 * Purpose: Convert a standard profile <gm> to an optimized profile <om>,
1002 * where <om> has already been allocated for a profile of at
1003 * least <gm->M> nodes and the same emission alphabet <gm->abc>.
1004 *
1005 * Args: gm - profile to optimize
1006 * om - allocated optimized profile for holding the result.
1007 *
1008 * Returns: <eslOK> on success.
1009 *
1010 * Throws: <eslEINVAL> if <gm>, <om> aren't compatible.
1011 * <eslEMEM> on allocation failure.
1012 */
1013 int
p7_oprofile_Convert(const P7_PROFILE * gm,P7_OPROFILE * om)1014 p7_oprofile_Convert(const P7_PROFILE *gm, P7_OPROFILE *om)
1015 {
1016 int status, z;
1017
1018 /* Set these first so they are available in the following calls */
1019 om->mode = gm->mode;
1020 om->L = gm->L;
1021 om->M = gm->M;
1022 om->nj = gm->nj;
1023 om->max_length = gm->max_length;
1024
1025 if (gm->abc->type != om->abc->type) ESL_EXCEPTION(eslEINVAL, "alphabets of the two profiles don't match");
1026 if (gm->M > om->allocM) ESL_EXCEPTION(eslEINVAL, "oprofile is too small");
1027
1028 if ((status = mf_conversion(gm, om)) != eslOK) return status; /* MSVFilter()'s information */
1029 if ((status = vf_conversion(gm, om)) != eslOK) return status; /* ViterbiFilter()'s information */
1030 if ((status = fb_conversion(gm, om)) != eslOK) return status; /* ForwardFilter()'s information */
1031
1032 if (om->name != NULL) free(om->name);
1033 if (om->acc != NULL) free(om->acc);
1034 if (om->desc != NULL) free(om->desc);
1035 if ((status = esl_strdup(gm->name, -1, &(om->name))) != eslOK) goto ERROR;
1036 if ((status = esl_strdup(gm->acc, -1, &(om->acc))) != eslOK) goto ERROR;
1037 if ((status = esl_strdup(gm->desc, -1, &(om->desc))) != eslOK) goto ERROR;
1038 strcpy(om->rf, gm->rf);
1039 strcpy(om->mm, gm->mm);
1040 strcpy(om->cs, gm->cs);
1041 strcpy(om->consensus, gm->consensus);
1042 for (z = 0; z < p7_NEVPARAM; z++) om->evparam[z] = gm->evparam[z];
1043 for (z = 0; z < p7_NCUTOFFS; z++) om->cutoff[z] = gm->cutoff[z];
1044 for (z = 0; z < p7_MAXABET; z++) om->compo[z] = gm->compo[z];
1045
1046 return eslOK;
1047
1048 ERROR:
1049 return status;
1050 }
1051
1052 /* Function: p7_oprofile_ReconfigLength()
1053 * Synopsis: Set the target sequence length of a model.
1054 * Incept: SRE, Thu Dec 20 09:56:40 2007 [Janelia]
1055 *
1056 * Purpose: Given an already configured model <om>, quickly reset its
1057 * expected length distribution for a new mean target sequence
1058 * length of <L>.
1059 *
1060 * This doesn't affect the length distribution of the null
1061 * model. That must also be reset, using <p7_bg_SetLength()>.
1062 *
1063 * We want this routine to run as fast as possible, because
1064 * this call is in the critical path: it must be called at
1065 * each new target sequence in a database search.
1066 *
1067 * Returns: <eslOK> on success. Costs/scores for N,C,J transitions are set
1068 * here.
1069 */
1070 int
p7_oprofile_ReconfigLength(P7_OPROFILE * om,int L)1071 p7_oprofile_ReconfigLength(P7_OPROFILE *om, int L)
1072 {
1073 int status;
1074 if ((status = p7_oprofile_ReconfigMSVLength (om, L)) != eslOK) return status;
1075 if ((status = p7_oprofile_ReconfigRestLength(om, L)) != eslOK) return status;
1076 return eslOK;
1077 }
1078
1079 /* Function: p7_oprofile_ReconfigMSVLength()
1080 * Synopsis: Set the target sequence length of the MSVFilter part of the model.
1081 * Incept: SRE, Tue Dec 16 13:39:17 2008 [Janelia]
1082 *
1083 * Purpose: Given an already configured model <om>, quickly reset its
1084 * expected length distribution for a new mean target sequence
1085 * length of <L>, only for the part of the model that's used
1086 * for the accelerated MSV filter.
1087 *
1088 * The acceleration pipeline uses this to defer reconfiguring the
1089 * length distribution of the main model, mostly because hmmscan
1090 * reads the model in two pieces, MSV part first, then the rest.
1091 *
1092 * Returns: <eslOK> on success.
1093 */
1094 int
p7_oprofile_ReconfigMSVLength(P7_OPROFILE * om,int L)1095 p7_oprofile_ReconfigMSVLength(P7_OPROFILE *om, int L)
1096 {
1097 om->tjb_b = unbiased_byteify(om, logf(3.0f / (float) (L+3)));
1098 return eslOK;
1099 }
1100
1101 /* Function: p7_oprofile_ReconfigRestLength()
1102 * Synopsis: Set the target sequence length of the main profile.
1103 * Incept: SRE, Tue Dec 16 13:41:30 2008 [Janelia]
1104 *
1105 * Purpose: Given an already configured model <om>, quickly reset its
1106 * expected length distribution for a new mean target sequence
1107 * length of <L>, for everything except the MSV filter part
1108 * of the model.
1109 *
1110 * Calling <p7_oprofile_ReconfigMSVLength()> then
1111 * <p7_oprofile_ReconfigRestLength()> is equivalent to
1112 * just calling <p7_oprofile_ReconfigLength()>. The two
1113 * part version is used in the acceleration pipeline.
1114 *
1115 * Returns: <eslOK> on success.
1116 */
1117 int
p7_oprofile_ReconfigRestLength(P7_OPROFILE * om,int L)1118 p7_oprofile_ReconfigRestLength(P7_OPROFILE *om, int L)
1119 {
1120 float pmove, ploop;
1121
1122 pmove = (2.0f + om->nj) / ((float) L + 2.0f + om->nj); /* 2/(L+2) for sw; 3/(L+3) for fs */
1123 ploop = 1.0f - pmove;
1124
1125 /* ForwardFilter() parameters: pspace floats */
1126 om->xf[p7O_N][p7O_LOOP] = om->xf[p7O_C][p7O_LOOP] = om->xf[p7O_J][p7O_LOOP] = ploop;
1127 om->xf[p7O_N][p7O_MOVE] = om->xf[p7O_C][p7O_MOVE] = om->xf[p7O_J][p7O_MOVE] = pmove;
1128
1129 /* ViterbiFilter() parameters: lspace signed 16-bit ints */
1130 om->xw[p7O_N][p7O_MOVE] = om->xw[p7O_C][p7O_MOVE] = om->xw[p7O_J][p7O_MOVE] = wordify(om, logf(pmove));
1131 /* om->xw[p7O_N][p7O_LOOP] = om->xw[p7O_C][p7O_LOOP] = om->xw[p7O_J][p7O_LOOP] = wordify(om, logf(ploop)); */ /* 3nat approx in force: these stay 0 */
1132 /* om->ncj_roundoff = (om->scale_w * logf(ploop)) - om->xw[p7O_N][p7O_LOOP]; */ /* and this does too */
1133
1134 om->L = L;
1135 return eslOK;
1136 }
1137
1138
1139 /* Function: p7_oprofile_ReconfigMultihit()
1140 * Synopsis: Quickly reconfig model into multihit mode for target length <L>.
1141 * Incept: SRE, Thu Aug 21 10:04:07 2008 [Janelia]
1142 *
1143 * Purpose: Given a profile <om> that's already been configured once,
1144 * quickly reconfigure it into a multihit mode for target
1145 * length <L>.
1146 *
1147 * This gets called in domain definition, when we need to
1148 * flip the model in and out of unihit mode to
1149 * process individual domains.
1150 *
1151 * Note: You can't just flip uni/multi mode alone, because that
1152 * parameterization also affects target length
1153 * modeling. You need to make sure uni vs. multi choice is
1154 * made before the length model is set, and you need to
1155 * make sure the length model is recalculated if you change
1156 * the uni/multi mode. Hence, these functions call
1157 * <p7_oprofile_ReconfigLength()>.
1158 */
1159 int
p7_oprofile_ReconfigMultihit(P7_OPROFILE * om,int L)1160 p7_oprofile_ReconfigMultihit(P7_OPROFILE *om, int L)
1161 {
1162 om->xf[p7O_E][p7O_MOVE] = 0.5;
1163 om->xf[p7O_E][p7O_LOOP] = 0.5;
1164 om->nj = 1.0f;
1165
1166 om->xw[p7O_E][p7O_MOVE] = wordify(om, -eslCONST_LOG2);
1167 om->xw[p7O_E][p7O_LOOP] = wordify(om, -eslCONST_LOG2);
1168
1169 return p7_oprofile_ReconfigLength(om, L);
1170 }
1171
1172 /* Function: p7_oprofile_ReconfigUnihit()
1173 * Synopsis: Quickly reconfig model into unihit mode for target length <L>.
1174 * Incept: SRE, Thu Aug 21 10:10:32 2008 [Janelia]
1175 *
1176 * Purpose: Given a profile <om> that's already been configured once,
1177 * quickly reconfigure it into a unihit mode for target
1178 * length <L>.
1179 *
1180 * This gets called in domain definition, when we need to
1181 * flip the model in and out of unihit <L=0> mode to
1182 * process individual domains.
1183 */
1184 int
p7_oprofile_ReconfigUnihit(P7_OPROFILE * om,int L)1185 p7_oprofile_ReconfigUnihit(P7_OPROFILE *om, int L)
1186 {
1187 om->xf[p7O_E][p7O_MOVE] = 1.0f;
1188 om->xf[p7O_E][p7O_LOOP] = 0.0f;
1189 om->nj = 0.0f;
1190
1191 om->xw[p7O_E][p7O_MOVE] = 0;
1192 om->xw[p7O_E][p7O_LOOP] = -32768;
1193
1194 return p7_oprofile_ReconfigLength(om, L);
1195 }
1196 /*------------ end, conversions to P7_OPROFILE ------------------*/
1197
1198 /*******************************************************************
1199 * 3. Conversion from optimized P7_OPROFILE to compact score arrays
1200 *******************************************************************/
1201
1202 /* Function: p7_oprofile_GetFwdTransitionArray()
1203 * Synopsis: Retrieve full 32-bit float transition probabilities from an
1204 * optimized profile into a flat array
1205 *
1206 * Purpose: Extract an array of <type> (e.g. p7O_II) transition probabilities
1207 * from the underlying <om> profile. In SIMD implementations,
1208 * these are striped and interleaved, making them difficult to
1209 * directly access.
1210 *
1211 * Args: <om> - optimized profile, containing transition information
1212 * <type> - transition type (e.g. p7O_II)
1213 * <arr> - preallocated array into which floats will be placed
1214 *
1215 * Returns: <eslOK> on success.
1216 *
1217 * Throws: (no abnormal error conditions)
1218 */
1219 int
p7_oprofile_GetFwdTransitionArray(const P7_OPROFILE * om,int type,float * arr)1220 p7_oprofile_GetFwdTransitionArray(const P7_OPROFILE *om, int type, float *arr )
1221 {
1222 int nq = p7O_NQF(om->M); /* # of striped vectors needed */
1223 int i, j;
1224 union { __m128 v; float x[4]; } tmp; /* used to align and read simd minivectors */
1225
1226
1227 for (i=0; i<nq; i++) {
1228 // because DD transitions are held at the end of the tfv array
1229 tmp.v = om->tfv[ (type==p7O_DD ? nq*7+i : type+7*i) ];
1230 for (j=0; j<4; j++)
1231 if ( i+1+ j*nq < om->M+1)
1232 arr[i+1+ j*nq] = tmp.x[j];
1233 }
1234
1235 return eslOK;
1236
1237 }
1238
1239 /* Function: p7_oprofile_GetSSVEmissionScoreArray()
1240 * Synopsis: Retrieve MSV residue emission scores from an optimized
1241 * profile into an array
1242 *
1243 * Purpose: Extract an implicitly 2D array of 8-bit int SSV residue
1244 * emission scores from an optimized profile <om>. <arr> must
1245 * be allocated by the calling function to be of size
1246 * ( om->abc->Kp * ( om->M + 1 )), and indexing into the array
1247 * is done as [om->abc->Kp * i + c ] for character c at
1248 * position i.
1249 *
1250 * In SIMD implementations, the residue scores are striped
1251 * and interleaved, making them somewhat difficult to
1252 * directly access. Faster access is desired, for example,
1253 * in SSV back-tracking of a high-scoring diagonal
1254 *
1255 * Args: <om> - optimized profile, containing transition information
1256 * <arr> - preallocated array into which scores will be placed
1257 *
1258 * Returns: <eslOK> on success.
1259 *
1260 * Throws: (no abnormal error conditions)
1261 */
1262 int
p7_oprofile_GetSSVEmissionScoreArray(const P7_OPROFILE * om,uint8_t * arr)1263 p7_oprofile_GetSSVEmissionScoreArray(const P7_OPROFILE *om, uint8_t *arr )
1264 {
1265 int x, q, z, k;
1266 union { __m128i v; uint8_t i[16]; } tmp; /* used to align and read simd minivectors */
1267 int M = om->M; /* length of the query */
1268 int K = om->abc->Kp;
1269 int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
1270 int cell_cnt = (om->M + 1) * K;
1271
1272 for (x = 0; x < K ; x++) {
1273 for (q = 0, k = 1; q < nq; q++, k++) {
1274 tmp.v = om->rbv[x][q];
1275 for (z=0; z<16; z++)
1276 if ( (K * (k+z*nq) + x) < cell_cnt)
1277 arr[ K * (k+z*nq) + x ] = tmp.i[z];
1278 }
1279 }
1280
1281 return eslOK;
1282 }
1283
1284
1285 /* Function: p7_oprofile_GetFwdEmissionScoreArray()
1286 * Synopsis: Retrieve Fwd (float) residue emission scores from an optimized
1287 * profile into an array
1288 *
1289 * Purpose: Extract an implicitly 2D array of 32-bit float Fwd residue
1290 * emission scores from an optimized profile <om>. <arr> must
1291 * be allocated by the calling function to be of size
1292 * ( om->abc->Kp * ( om->M + 1 )), and indexing into the array
1293 * is done as [om->abc->Kp * i + c ] for character c at
1294 * position i.
1295 *
1296 * In SIMD implementations, the residue scores are striped
1297 * and interleaved, making them somewhat difficult to
1298 * directly access.
1299 *
1300 * Args: <om> - optimized profile, containing transition information
1301 * <arr> - preallocated array into which scores will be placed
1302 *
1303 * Returns: <eslOK> on success.
1304 *
1305 * Throws: (no abnormal error conditions)
1306 */
1307 int
p7_oprofile_GetFwdEmissionScoreArray(const P7_OPROFILE * om,float * arr)1308 p7_oprofile_GetFwdEmissionScoreArray(const P7_OPROFILE *om, float *arr )
1309 {
1310 int x, q, z, k;
1311 union { __m128 v; float f[4]; } tmp; /* used to align and read simd minivectors */
1312 int M = om->M; /* length of the query */
1313 int K = om->abc->Kp;
1314 int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
1315 int cell_cnt = (om->M + 1) * K;
1316
1317 for (x = 0; x < K; x++) {
1318 for (q = 0, k = 1; q < nq; q++, k++) {
1319 tmp.v = esl_sse_logf(om->rfv[x][q]);
1320 for (z = 0; z < 4; z++)
1321 if ( (K * (k+z*nq) + x) < cell_cnt)
1322 arr[ K * (k+z*nq) + x ] = tmp.f[z];
1323 }
1324 }
1325
1326 return eslOK;
1327 }
1328
1329 /* Function: p7_oprofile_GetFwdEmissionArray()
1330 * Synopsis: Retrieve Fwd (float) residue emission values from an optimized
1331 * profile into an array
1332 *
1333 * Purpose: Extract an implicitly 2D array of 32-bit float Fwd residue
1334 * emission values from an optimized profile <om>, converting
1335 * back to emission values based on the background. <arr> must
1336 * be allocated by the calling function to be of size
1337 * ( om->abc->Kp * ( om->M + 1 )), and indexing into the array
1338 * is done as [om->abc->Kp * i + c ] for character c at
1339 * position i.
1340 *
1341 * In SIMD implementations, the residue scores are striped
1342 * and interleaved, making them somewhat difficult to
1343 * directly access.
1344 *
1345 * Args: <om> - optimized profile, containing transition information
1346 * <bg> - background frequencies
1347 * <arr> - preallocated array into which scores will be placed
1348 *
1349 * Returns: <eslOK> on success.
1350 *
1351 * Throws: (no abnormal error conditions)
1352 */
1353 int
p7_oprofile_GetFwdEmissionArray(const P7_OPROFILE * om,P7_BG * bg,float * arr)1354 p7_oprofile_GetFwdEmissionArray(const P7_OPROFILE *om, P7_BG *bg, float *arr )
1355 {
1356 int x, q, z, k;
1357 union { __m128 v; float f[4]; } tmp; /* used to align and read simd minivectors */
1358 int M = om->M; /* length of the query */
1359 int Kp = om->abc->Kp;
1360 int K = om->abc->K;
1361 int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
1362 int cell_cnt = (om->M + 1) * Kp;
1363
1364 for (x = 0; x < K; x++) {
1365 for (q = 0, k = 1; q < nq; q++, k++) {
1366 tmp.v = om->rfv[x][q];
1367 for (z = 0; z < 4; z++)
1368 if ( (Kp * (k+z*nq) + x) < cell_cnt)
1369 arr[ Kp * (k+z*nq) + x ] = tmp.f[z] * bg->f[x];
1370 }
1371 }
1372
1373 //degeneracy emissions for each position
1374 for (x = 0; x <= M; x++)
1375 esl_abc_FExpectScVec(om->abc, arr+Kp*x, bg->f);
1376
1377 return eslOK;
1378 }
1379
1380
1381 /*------------ end, conversions from P7_OPROFILE ------------------*/
1382
1383
1384 /*****************************************************************
1385 * 4. Debugging and development utilities.
1386 *****************************************************************/
1387
1388
1389 /* oprofile_dump_mf()
1390 *
1391 * Dump the MSVFilter part of a profile <om> to <stdout>.
1392 */
1393 static int
oprofile_dump_mf(FILE * fp,const P7_OPROFILE * om)1394 oprofile_dump_mf(FILE *fp, const P7_OPROFILE *om)
1395 {
1396 int M = om->M; /* length of the query */
1397 int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
1398 int x; /* counter over residues */
1399 int q; /* q counts over total # of striped vectors, 0..nq-1 */
1400 int k; /* counter over nodes 1..M */
1401 int z; /* counter within elements of one SIMD minivector */
1402 union { __m128i v; uint8_t i[16]; } tmp; /* used to align and read simd minivectors */
1403
1404 /* Header (rearranged column numbers, in the vectors) */
1405 fprintf(fp, " ");
1406 for (k =1, q = 0; q < nq; q++, k++)
1407 {
1408 fprintf(fp, "[ ");
1409 for (z = 0; z < 16; z++)
1410 if (k+z*nq <= M) fprintf(fp, "%4d ", k+z*nq);
1411 else fprintf(fp, "%4s ", "xx");
1412 fprintf(fp, "]");
1413 }
1414 fprintf(fp, "\n");
1415
1416 /* Table of residue emissions */
1417 for (x = 0; x < om->abc->Kp; x++)
1418 {
1419 fprintf(fp, "(%c): ", om->abc->sym[x]);
1420
1421 for (q = 0; q < nq; q++)
1422 {
1423 fprintf(fp, "[ ");
1424 _mm_store_si128(&tmp.v, om->rbv[x][q]);
1425 for (z = 0; z < 16; z++) fprintf(fp, "%4d ", tmp.i[z]);
1426 fprintf(fp, "]");
1427 }
1428 fprintf(fp, "\n");
1429 }
1430 fprintf(fp, "\n");
1431
1432 fprintf(fp, "t_EC,EJ: %4d\n", om->tec_b);
1433 fprintf(fp, "t_NB,JB,CT: %4d\n", om->tjb_b);
1434 fprintf(fp, "t_BMk: %4d\n", om->tbm_b);
1435 fprintf(fp, "scale: %.2f\n", om->scale_b);
1436 fprintf(fp, "base: %4d\n", om->base_b);
1437 fprintf(fp, "bias: %4d\n", om->bias_b);
1438 fprintf(fp, "Q: %4d\n", nq);
1439 fprintf(fp, "M: %4d\n", M);
1440 return eslOK;
1441 }
1442
1443
1444
1445 /* oprofile_dump_vf()
1446 *
1447 * Dump the ViterbiFilter part of a profile <om> to <stdout>.
1448 */
1449 static int
oprofile_dump_vf(FILE * fp,const P7_OPROFILE * om)1450 oprofile_dump_vf(FILE *fp, const P7_OPROFILE *om)
1451 {
1452 int M = om->M; /* length of the query */
1453 int nq = p7O_NQW(M); /* segment length; total # of striped vectors needed */
1454 int x; /* counter over residues */
1455 int q; /* q counts over total # of striped vectors, 0..nq-1 */
1456 int k; /* the usual counter over model nodes 1..M */
1457 int kb; /* possibly offset base k for loading om's TSC vectors */
1458 int z; /* counter within elements of one SIMD minivector */
1459 int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
1460 int j; /* counter in interleaved vector arrays in the profile */
1461 union { __m128i v; int16_t i[8]; } tmp; /* used to align and read simd minivectors */
1462
1463 /* Emission score header (rearranged column numbers, in the vectors) */
1464 fprintf(fp, " ");
1465 for (k =1, q = 0; q < nq; q++, k++)
1466 {
1467 fprintf(fp, "[ ");
1468 for (z = 0; z < 8; z++)
1469 if (k+z*nq <= M) fprintf(fp, "%6d ", k+z*nq);
1470 else fprintf(fp, "%6s ", "xx");
1471 fprintf(fp, "]");
1472 }
1473 fprintf(fp, "\n");
1474
1475 /* Table of residue emissions */
1476 for (x = 0; x < om->abc->Kp; x++)
1477 {
1478 fprintf(fp, "(%c): ", om->abc->sym[x]);
1479
1480 /* Match emission scores (insert emissions are assumed zero by design) */
1481 for (q = 0; q < nq; q++)
1482 {
1483 fprintf(fp, "[ ");
1484 _mm_store_si128(&tmp.v, om->rwv[x][q]);
1485 for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
1486 fprintf(fp, "]");
1487 }
1488 fprintf(fp, "\n");
1489 }
1490 fprintf(fp, "\n");
1491
1492 /* Transitions */
1493 for (t = p7O_BM; t <= p7O_II; t++)
1494 {
1495 switch (t) {
1496 case p7O_BM: fprintf(fp, "\ntBM: "); break;
1497 case p7O_MM: fprintf(fp, "\ntMM: "); break;
1498 case p7O_IM: fprintf(fp, "\ntIM: "); break;
1499 case p7O_DM: fprintf(fp, "\ntDM: "); break;
1500 case p7O_MD: fprintf(fp, "\ntMD: "); break;
1501 case p7O_MI: fprintf(fp, "\ntMI: "); break;
1502 case p7O_II: fprintf(fp, "\ntII: "); break;
1503 }
1504
1505 for (k = 1, q = 0; q < nq; q++, k++)
1506 {
1507 switch (t) {
1508 case p7O_BM: kb = k; break;
1509 case p7O_MM: kb = (1 + (nq+k-2)) % nq; break; /* MM, DM, IM quads rotated by +1 */
1510 case p7O_IM: kb = (1 + (nq+k-2)) % nq; break;
1511 case p7O_DM: kb = (1 + (nq+k-2)) % nq; break;
1512 case p7O_MD: kb = k; break; /* the remaining ones are straight up */
1513 case p7O_MI: kb = k; break;
1514 case p7O_II: kb = k; break;
1515 }
1516 fprintf(fp, "[ ");
1517 for (z = 0; z < 8; z++)
1518 if (kb+z*nq <= M) fprintf(fp, "%6d ", kb+z*nq);
1519 else fprintf(fp, "%6s ", "xx");
1520 fprintf(fp, "]");
1521 }
1522 fprintf(fp, "\n ");
1523 for (q = 0; q < nq; q++)
1524 {
1525 fprintf(fp, "[ ");
1526 _mm_store_si128(&tmp.v, om->twv[q*7 + t]);
1527 for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
1528 fprintf(fp, "]");
1529 }
1530 fprintf(fp, "\n");
1531 }
1532
1533 /* DD transitions */
1534 fprintf(fp, "\ntDD: ");
1535 for (k =1, q = 0; q < nq; q++, k++)
1536 {
1537 fprintf(fp, "[ ");
1538 for (z = 0; z < 8; z++)
1539 if (k+z*nq <= M) fprintf(fp, "%6d ", k+z*nq);
1540 else fprintf(fp, "%6s ", "xx");
1541 fprintf(fp, "]");
1542 }
1543 fprintf(fp, "\n ");
1544 for (j = nq*7, q = 0; q < nq; q++, j++)
1545 {
1546 fprintf(fp, "[ ");
1547 _mm_store_si128(&tmp.v, om->twv[j]);
1548 for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
1549 fprintf(fp, "]");
1550 }
1551 fprintf(fp, "\n");
1552
1553 fprintf(fp, "E->C: %6d E->J: %6d\n", om->xw[p7O_E][p7O_MOVE], om->xw[p7O_E][p7O_LOOP]);
1554 fprintf(fp, "N->B: %6d N->N: %6d\n", om->xw[p7O_N][p7O_MOVE], om->xw[p7O_N][p7O_LOOP]);
1555 fprintf(fp, "J->B: %6d J->J: %6d\n", om->xw[p7O_J][p7O_MOVE], om->xw[p7O_J][p7O_LOOP]);
1556 fprintf(fp, "C->T: %6d C->C: %6d\n", om->xw[p7O_C][p7O_MOVE], om->xw[p7O_C][p7O_LOOP]);
1557
1558 fprintf(fp, "scale: %6.2f\n", om->scale_w);
1559 fprintf(fp, "base: %6d\n", om->base_w);
1560 fprintf(fp, "bound: %6d\n", om->ddbound_w);
1561 fprintf(fp, "Q: %6d\n", nq);
1562 fprintf(fp, "M: %6d\n", M);
1563 return eslOK;
1564 }
1565
1566
1567 /* oprofile_dump_fb()
1568 *
1569 * Dump the Forward/Backward part of a profile <om> to <stdout>.
1570 * <width>, <precision> control the floating point output:
1571 * 8,5 is a reasonable choice for prob space,
1572 * 5,2 is reasonable for log space.
1573 */
1574 static int
oprofile_dump_fb(FILE * fp,const P7_OPROFILE * om,int width,int precision)1575 oprofile_dump_fb(FILE *fp, const P7_OPROFILE *om, int width, int precision)
1576 {
1577 int M = om->M; /* length of the query */
1578 int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
1579 int x; /* counter over residues */
1580 int q; /* q counts over total # of striped vectors, 0..nq-1 */
1581 int k; /* the usual counter over model nodes 1..M */
1582 int kb; /* possibly offset base k for loading om's TSC vectors */
1583 int z; /* counter within elements of one SIMD minivector */
1584 int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
1585 int j; /* counter in interleaved vector arrays in the profile */
1586 union { __m128 v; float x[4]; } tmp; /* used to align and read simd minivectors */
1587
1588 /* Residue emissions */
1589 for (x = 0; x < om->abc->Kp; x++)
1590 {
1591 fprintf(fp, "(%c): ", om->abc->sym[x]);
1592 for (k =1, q = 0; q < nq; q++, k++)
1593 {
1594 fprintf(fp, "[ ");
1595 for (z = 0; z < 4; z++)
1596 if (k+z*nq <= M) fprintf(fp, "%*d ", width, k+z*nq);
1597 else fprintf(fp, "%*s ", width, "xx");
1598 fprintf(fp, "]");
1599 }
1600 fprintf(fp, "\nmat: ");
1601 for (q = 0; q < nq; q++)
1602 {
1603 fprintf(fp, "[ ");
1604 tmp.v = om->rfv[x][q];
1605 for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
1606 fprintf(fp, "]");
1607 }
1608 fprintf(fp, "\n\n");
1609 }
1610
1611 /* Transitions */
1612 for (t = p7O_BM; t <= p7O_II; t++)
1613 {
1614 switch (t) {
1615 case p7O_BM: fprintf(fp, "\ntBM: "); break;
1616 case p7O_MM: fprintf(fp, "\ntMM: "); break;
1617 case p7O_IM: fprintf(fp, "\ntIM: "); break;
1618 case p7O_DM: fprintf(fp, "\ntDM: "); break;
1619 case p7O_MD: fprintf(fp, "\ntMD: "); break;
1620 case p7O_MI: fprintf(fp, "\ntMI: "); break;
1621 case p7O_II: fprintf(fp, "\ntII: "); break;
1622 }
1623 for (k = 1, q = 0; q < nq; q++, k++)
1624 {
1625 switch (t) {
1626 case p7O_MM:/* MM, DM, IM quads rotated by +1 */
1627 case p7O_IM:
1628 case p7O_DM:
1629 kb = (1 + (nq+k-2)) % nq;
1630 break;
1631 case p7O_BM:/* the remaining ones are straight up */
1632 case p7O_MD:
1633 case p7O_MI:
1634 case p7O_II:
1635 kb = k;
1636 break;
1637 }
1638 fprintf(fp, "[ ");
1639 for (z = 0; z < 4; z++)
1640 if (kb+z*nq <= M) fprintf(fp, "%*d ", width, kb+z*nq);
1641 else fprintf(fp, "%*s ", width, "xx");
1642 fprintf(fp, "]");
1643 }
1644 fprintf(fp, "\n ");
1645 for (q = 0; q < nq; q++)
1646 {
1647 fprintf(fp, "[ ");
1648 tmp.v = om->tfv[q*7 + t];
1649 for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
1650 fprintf(fp, "]");
1651 }
1652 fprintf(fp, "\n");
1653 }
1654
1655 /* DD transitions */
1656 fprintf(fp, "\ntDD: ");
1657 for (k =1, q = 0; q < nq; q++, k++)
1658 {
1659 fprintf(fp, "[ ");
1660 for (z = 0; z < 4; z++)
1661 if (k+z*nq <= M) fprintf(fp, "%*d ", width, k+z*nq);
1662 else fprintf(fp, "%*s ", width, "xx");
1663 fprintf(fp, "]");
1664 }
1665 fprintf(fp, "\n ");
1666 for (j = nq*7, q = 0; q < nq; q++, j++)
1667 {
1668 fprintf(fp, "[ ");
1669 tmp.v = om->tfv[j];
1670 for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
1671 fprintf(fp, "]");
1672 }
1673 fprintf(fp, "\n");
1674
1675 /* Specials */
1676 fprintf(fp, "E->C: %*.*f E->J: %*.*f\n", width, precision, om->xf[p7O_E][p7O_MOVE], width, precision, om->xf[p7O_E][p7O_LOOP]);
1677 fprintf(fp, "N->B: %*.*f N->N: %*.*f\n", width, precision, om->xf[p7O_N][p7O_MOVE], width, precision, om->xf[p7O_N][p7O_LOOP]);
1678 fprintf(fp, "J->B: %*.*f J->J: %*.*f\n", width, precision, om->xf[p7O_J][p7O_MOVE], width, precision, om->xf[p7O_J][p7O_LOOP]);
1679 fprintf(fp, "C->T: %*.*f C->C: %*.*f\n", width, precision, om->xf[p7O_C][p7O_MOVE], width, precision, om->xf[p7O_C][p7O_LOOP]);
1680 fprintf(fp, "Q: %d\n", nq);
1681 fprintf(fp, "M: %d\n", M);
1682 return eslOK;
1683 }
1684
1685
1686 /* Function: p7_oprofile_Dump()
1687 * Synopsis: Dump internals of a <P7_OPROFILE>
1688 * Incept: SRE, Thu Dec 13 08:49:30 2007 [Janelia]
1689 *
1690 * Purpose: Dump the internals of <P7_OPROFILE> structure <om>
1691 * to stream <fp>; generally for testing or debugging
1692 * purposes.
1693 *
1694 * Args: fp - output stream (often stdout)
1695 * om - optimized profile to dump
1696 *
1697 * Returns: <eslOK> on success.
1698 *
1699 * Throws: (no abnormal error conditions)
1700 */
1701 int
p7_oprofile_Dump(FILE * fp,const P7_OPROFILE * om)1702 p7_oprofile_Dump(FILE *fp, const P7_OPROFILE *om)
1703 {
1704 int status;
1705
1706 fprintf(fp, "Dump of a <P7_OPROFILE> ::\n");
1707
1708 fprintf(fp, "\n -- float part, odds ratios for Forward/Backward:\n");
1709 if ((status = oprofile_dump_fb(fp, om, 8, 5)) != eslOK) return status;
1710
1711 fprintf(fp, "\n -- sword part, log odds for ViterbiFilter(): \n");
1712 if ((status = oprofile_dump_vf(fp, om)) != eslOK) return status;
1713
1714 fprintf(fp, "\n -- uchar part, log odds for MSVFilter(): \n");
1715 if ((status = oprofile_dump_mf(fp, om)) != eslOK) return status;
1716
1717 return eslOK;
1718 }
1719
1720
1721 /* Function: p7_oprofile_Sample()
1722 * Synopsis: Sample a random profile.
1723 * Incept: SRE, Wed Jul 30 13:11:52 2008 [Janelia]
1724 *
1725 * Purpose: Sample a random profile of <M> nodes for alphabet <abc>,
1726 * using <r> as the source of random numbers. Parameterize
1727 * it for generation of target sequences of mean length
1728 * <L>. Calculate its log-odds scores using background
1729 * model <bg>.
1730 *
1731 * Args: r - random number generator
1732 * abc - emission alphabet
1733 * bg - background frequency model
1734 * M - size of sampled profile, in nodes
1735 * L - configured target seq mean length
1736 * opt_hmm - optRETURN: sampled HMM
1737 * opt_gm - optRETURN: sampled normal profile
1738 * opt_om - RETURN: optimized profile
1739 *
1740 * Returns: <eslOK> on success.
1741 *
1742 * Throws: (no abnormal error conditions)
1743 */
1744 int
p7_oprofile_Sample(ESL_RANDOMNESS * r,const ESL_ALPHABET * abc,const P7_BG * bg,int M,int L,P7_HMM ** opt_hmm,P7_PROFILE ** opt_gm,P7_OPROFILE ** ret_om)1745 p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L,
1746 P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om)
1747 {
1748 P7_HMM *hmm = NULL;
1749 P7_PROFILE *gm = NULL;
1750 P7_OPROFILE *om = NULL;
1751 int status;
1752
1753 if ((gm = p7_profile_Create (M, abc)) == NULL) { status = eslEMEM; goto ERROR; }
1754 if ((om = p7_oprofile_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; }
1755
1756 if ((status = p7_hmm_Sample(r, M, abc, &hmm)) != eslOK) goto ERROR;
1757 if ((status = p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)) != eslOK) goto ERROR;
1758 if ((status = p7_oprofile_Convert(gm, om)) != eslOK) goto ERROR;
1759 if ((status = p7_oprofile_ReconfigLength(om, L)) != eslOK) goto ERROR;
1760
1761 if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm);
1762 if (opt_gm != NULL) *opt_gm = gm; else p7_profile_Destroy(gm);
1763 *ret_om = om;
1764 return eslOK;
1765
1766 ERROR:
1767 if (opt_hmm != NULL) *opt_hmm = NULL;
1768 if (opt_gm != NULL) *opt_gm = NULL;
1769 *ret_om = NULL;
1770 return status;
1771 }
1772
1773
1774 /* Function: p7_oprofile_Compare()
1775 * Synopsis: Compare two optimized profiles for equality.
1776 * Incept: SRE, Wed Jan 21 13:29:10 2009 [Janelia]
1777 *
1778 * Purpose: Compare the contents of <om1> and <om2>; return
1779 * <eslOK> if they are effectively identical profiles,
1780 * or <eslFAIL> if not.
1781 *
1782 * Floating point comparisons are done to a tolerance
1783 * of <tol> using <esl_FCompare()>.
1784 *
1785 * If a comparison fails, an informative error message is
1786 * left in <errmsg> to indicate why.
1787 *
1788 * Internal allocation sizes are not compared, only the
1789 * data.
1790 *
1791 * Args: om1 - one optimized profile to compare
1792 * om2 - the other
1793 * tol - floating point comparison tolerance; see <esl_FCompare()>
1794 * errmsg - ptr to array of at least <eslERRBUFSIZE> characters.
1795 *
1796 * Returns: <eslOK> on effective equality; <eslFAIL> on difference.
1797 */
1798 int
p7_oprofile_Compare(const P7_OPROFILE * om1,const P7_OPROFILE * om2,float tol,char * errmsg)1799 p7_oprofile_Compare(const P7_OPROFILE *om1, const P7_OPROFILE *om2, float tol, char *errmsg)
1800 {
1801 int Q4 = p7O_NQF(om1->M);
1802 int Q8 = p7O_NQW(om1->M);
1803 int Q16 = p7O_NQB(om1->M);
1804 int q, r, x, y;
1805 union { __m128i v; uint8_t c[16]; } a16, b16;
1806 union { __m128i v; int16_t w[8]; } a8, b8;
1807 union { __m128 v; float x[4]; } a4, b4;
1808
1809 if (om1->mode != om2->mode) ESL_FAIL(eslFAIL, errmsg, "comparison failed: mode");
1810 if (om1->L != om2->L) ESL_FAIL(eslFAIL, errmsg, "comparison failed: L");
1811 if (om1->M != om2->M) ESL_FAIL(eslFAIL, errmsg, "comparison failed: M");
1812 if (om1->nj != om2->nj) ESL_FAIL(eslFAIL, errmsg, "comparison failed: nj");
1813 if (om1->abc->type != om2->abc->type) ESL_FAIL(eslFAIL, errmsg, "comparison failed: alphabet type");
1814
1815 /* MSVFilter part */
1816 for (x = 0; x < om1->abc->Kp; x++)
1817 for (q = 0; q < Q16; q++)
1818 {
1819 a16.v = om1->rbv[x][q]; b16.v = om2->rbv[x][q];
1820 for (r = 0; r < 16; r++) if (a16.c[r] != b16.c[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rb[%d] elem %d", q, r);
1821 }
1822 if (om1->tbm_b != om2->tbm_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tbm_b");
1823 if (om1->tec_b != om2->tec_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tec_b");
1824 if (om1->tjb_b != om2->tjb_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tjb_b");
1825 if (om1->scale_b != om2->scale_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: scale_b");
1826 if (om1->base_b != om2->base_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: base_b");
1827 if (om1->bias_b != om2->bias_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: bias_b");
1828
1829 /* ViterbiFilter() part */
1830 for (x = 0; x < om1->abc->Kp; x++)
1831 for (q = 0; q < Q8; q++)
1832 {
1833 a8.v = om1->rwv[x][q]; b8.v = om2->rwv[x][q];
1834 for (r = 0; r < 8; r++) if (a8.w[r] != b8.w[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rw[%d] elem %d", q, r);
1835 }
1836 for (q = 0; q < 8*Q16; q++)
1837 {
1838 a8.v = om1->twv[q]; b8.v = om2->twv[q];
1839 for (r = 0; r < 8; r++) if (a8.w[r] != b8.w[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tw[%d] elem %d", q, r);
1840 }
1841 for (x = 0; x < p7O_NXSTATES; x++)
1842 for (y = 0; y < p7O_NXTRANS; y++)
1843 if (om1->xw[x][y] != om2->xw[x][y]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: xw[%d][%d]", x, y);
1844
1845 if (om1->scale_w != om2->scale_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: scale");
1846 if (om1->base_w != om2->base_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: base");
1847 if (om1->ddbound_w != om2->ddbound_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: ddbound_w");
1848
1849 /* Forward/Backward part */
1850 for (x = 0; x < om1->abc->Kp; x++)
1851 for (q = 0; q < Q4; q++)
1852 {
1853 a4.v = om1->rfv[x][q]; b4.v = om2->rfv[x][q];
1854 for (r = 0; r < 4; r++) if (esl_FCompare(a4.x[r], b4.x[r], tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rf[%d] elem %d", q, r);
1855 }
1856 for (q = 0; q < 8*Q4; q++)
1857 {
1858 a4.v = om1->tfv[q]; b4.v = om2->tfv[q];
1859 for (r = 0; r < 4; r++) if (a4.x[r] != b4.x[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tf[%d] elem %d", q, r);
1860 }
1861 for (x = 0; x < p7O_NXSTATES; x++)
1862 if (esl_vec_FCompare(om1->xf[x], om2->xf[x], p7O_NXTRANS, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: xf[%d] vector", x);
1863
1864 for (x = 0; x < p7_NOFFSETS; x++)
1865 if (om1->offs[x] != om2->offs[x]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: offs[%d]", x);
1866
1867 if (esl_strcmp(om1->name, om2->name) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: name");
1868 if (esl_strcmp(om1->acc, om2->acc) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: acc");
1869 if (esl_strcmp(om1->desc, om2->desc) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: desc");
1870 if (esl_strcmp(om1->rf, om2->rf) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: ref");
1871 if (esl_strcmp(om1->mm, om2->mm) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: mm");
1872 if (esl_strcmp(om1->cs, om2->cs) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: cs");
1873 if (esl_strcmp(om1->consensus, om2->consensus) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: consensus");
1874
1875 if (esl_vec_FCompare(om1->evparam, om2->evparam, p7_NEVPARAM, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: evparam vector");
1876 if (esl_vec_FCompare(om1->cutoff, om2->cutoff, p7_NCUTOFFS, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: cutoff vector");
1877 if (esl_vec_FCompare(om1->compo, om2->compo, p7_MAXABET, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: compo vector");
1878
1879 return eslOK;
1880 }
1881
1882
1883 /* Function: p7_profile_SameAsMF()
1884 * Synopsis: Set a generic profile's scores to give MSV scores.
1885 * Incept: SRE, Wed Jul 30 13:42:49 2008 [Janelia]
1886 *
1887 * Purpose: Set a generic profile's scores so that the normal <dp_generic> DP
1888 * algorithms will give the same score as <p7_MSVFilter()>:
1889 * all t_MM scores = 0; all other core transitions = -inf;
1890 * multihit local mode; all <t_BMk> entries uniformly <log 2/(M(M+1))>;
1891 * <tCC, tNN, tJJ> scores 0; total approximated later as -3;
1892 * rounded in the same way as the 8-bit limited precision.
1893 *
1894 * Returns: <eslOK> on success.
1895 */
1896 int
p7_profile_SameAsMF(const P7_OPROFILE * om,P7_PROFILE * gm)1897 p7_profile_SameAsMF(const P7_OPROFILE *om, P7_PROFILE *gm)
1898 {
1899 int k,x;
1900 float tbm = roundf(om->scale_b * (log(2.0f / ((float) gm->M * (float) (gm->M+1)))));
1901
1902 /* Transitions */
1903 esl_vec_FSet(gm->tsc, p7P_NTRANS * gm->M, -eslINFINITY);
1904 for (k = 1; k < gm->M; k++) p7P_TSC(gm, k, p7P_MM) = 0.0f;
1905 for (k = 0; k < gm->M; k++) p7P_TSC(gm, k, p7P_BM) = tbm;
1906
1907 /* Emissions */
1908 for (x = 0; x < gm->abc->Kp; x++)
1909 for (k = 0; k <= gm->M; k++)
1910 {
1911 gm->rsc[x][k*2] = (gm->rsc[x][k*2] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_b * gm->rsc[x][k*2]);
1912 gm->rsc[x][k*2+1] = 0; /* insert score: VF makes it zero no matter what. */
1913 }
1914
1915 /* Specials */
1916 for (k = 0; k < p7P_NXSTATES; k++)
1917 for (x = 0; x < p7P_NXTRANS; x++)
1918 gm->xsc[k][x] = (gm->xsc[k][x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_b * gm->xsc[k][x]);
1919
1920 /* NN, CC, JJ hardcoded 0 in limited precision */
1921 gm->xsc[p7P_N][p7P_LOOP] = gm->xsc[p7P_J][p7P_LOOP] = gm->xsc[p7P_C][p7P_LOOP] = 0;
1922
1923 return eslOK;
1924 }
1925
1926
1927 /* Function: p7_profile_SameAsVF()
1928 * Synopsis: Round a generic profile to match ViterbiFilter scores.
1929 * Incept: SRE, Wed Jul 30 13:37:48 2008 [Janelia]
1930 *
1931 * Purpose: Round all the scores in a generic (lspace) <P7_PROFILE> <gm> in
1932 * exactly the same way that the scores in the
1933 * <P7_OPROFILE> <om> were rounded. Then we can test that two profiles
1934 * give identical internal scores in testing, say,
1935 * <p7_ViterbiFilter()> against <p7_GViterbi()>.
1936 *
1937 * The 3nat approximation is used; NN=CC=JJ=0, and 3 nats are
1938 * subtracted at the end to account for their contribution.
1939 *
1940 * To convert a generic Viterbi score <gsc> calculated with this profile
1941 * to a nat score that should match ViterbiFilter() exactly,
1942 * do <(gsc / om->scale_w) - 3.0>.
1943 *
1944 * <gm> must be the same profile that <om> was constructed from.
1945 *
1946 * <gm> is irrevocably altered by this call.
1947 *
1948 * Do not call this more than once on any given <gm>!
1949 *
1950 * Args: <om> - optimized profile, containing scale information.
1951 * <gm> - generic profile that <om> was built from.
1952 *
1953 * Returns: <eslOK> on success.
1954 *
1955 * Throws: (no abnormal error conditions)
1956 */
1957 int
p7_profile_SameAsVF(const P7_OPROFILE * om,P7_PROFILE * gm)1958 p7_profile_SameAsVF(const P7_OPROFILE *om, P7_PROFILE *gm)
1959 {
1960 int k;
1961 int x;
1962
1963 /* Transitions */
1964 /* <= -eslINFINITY test is used solely to silence compiler. really testing == -eslINFINITY */
1965 for (x = 0; x < gm->M*p7P_NTRANS; x++)
1966 gm->tsc[x] = (gm->tsc[x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->tsc[x]);
1967
1968 /* Enforce the rule that no II can be 0; max of -1 */
1969 for (x = p7P_II; x < gm->M*p7P_NTRANS; x += p7P_NTRANS)
1970 if (gm->tsc[x] == 0.0) gm->tsc[x] = -1.0;
1971
1972 /* Emissions */
1973 for (x = 0; x < gm->abc->Kp; x++)
1974 for (k = 0; k <= gm->M; k++)
1975 {
1976 gm->rsc[x][k*2] = (gm->rsc[x][k*2] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->rsc[x][k*2]);
1977 gm->rsc[x][k*2+1] = 0.0; /* insert score: VF makes it zero no matter what. */
1978 }
1979
1980 /* Specials */
1981 for (k = 0; k < p7P_NXSTATES; k++)
1982 for (x = 0; x < p7P_NXTRANS; x++)
1983 gm->xsc[k][x] = (gm->xsc[k][x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->xsc[k][x]);
1984
1985 /* 3nat approximation: NN, CC, JJ hardcoded 0 in limited precision */
1986 gm->xsc[p7P_N][p7P_LOOP] = gm->xsc[p7P_J][p7P_LOOP] = gm->xsc[p7P_C][p7P_LOOP] = 0.0;
1987
1988 return eslOK;
1989 }
1990 /*------------ end, P7_OPROFILE debugging tools ----------------*/
1991
1992
1993
1994 /*****************************************************************
1995 * 5. Benchmark driver.
1996 *****************************************************************/
1997
1998 #ifdef p7OPROFILE_BENCHMARK
1999 /* Timing profile conversion.
2000 gcc -o benchmark-oprofile -std=gnu99 -g -Wall -msse2 -I.. -L.. -I../../easel -L../../easel -Dp7OPROFILE_BENCHMARK\
2001 p7_oprofile.c -lhmmer -leasel -lm
2002 icc -o benchmark-oprofile -O3 -static -I.. -L.. -I../../easel -L../../easel -Dp7OPROFILE_BENCHMARK p7_oprofile.c -lhmmer -leasel -lm
2003 ./benchmark-sse <hmmfile> runs benchmark
2004 */
2005 #include "p7_config.h"
2006
2007 #include "easel.h"
2008 #include "esl_alphabet.h"
2009 #include "esl_getopts.h"
2010 #include "esl_random.h"
2011 #include "esl_stopwatch.h"
2012
2013 #include "hmmer.h"
2014 #include "impl_sse.h"
2015
2016 static ESL_OPTIONS options[] = {
2017 /* name type default env range toggles reqs incomp help docgroup*/
2018 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
2019 { "-L", eslARG_INT, "400", NULL, NULL, NULL, NULL, NULL, "length of target sequence", 0 },
2020 { "-N", eslARG_INT, "100000", NULL, NULL, NULL, NULL, NULL, "number of conversions to time", 0 },
2021 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
2022 };
2023 static char usage[] = "[-options] <hmmfile>";
2024 static char banner[] = "benchmark driver for the generic implementation";
2025
2026 int
main(int argc,char ** argv)2027 main(int argc, char **argv)
2028 {
2029 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
2030 char *hmmfile = esl_opt_GetArg(go, 1);
2031 ESL_STOPWATCH *w = esl_stopwatch_Create();
2032 ESL_ALPHABET *abc = NULL;
2033 P7_HMMFILE *hfp = NULL;
2034 P7_HMM *hmm = NULL;
2035 P7_BG *bg = NULL;
2036 P7_PROFILE *gm = NULL;
2037 P7_OPROFILE *om = NULL;
2038 int L = esl_opt_GetInteger(go, "-L");
2039 int N = esl_opt_GetInteger(go, "-N");
2040 int i;
2041
2042 if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
2043 if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM");
2044
2045 bg = p7_bg_Create(abc);
2046 p7_bg_SetLength(bg, L);
2047 gm = p7_profile_Create(hmm->M, abc);
2048 p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
2049 om = p7_oprofile_Create(gm->M, abc);
2050
2051 esl_stopwatch_Start(w);
2052 for (i = 0; i < N; i++)
2053 p7_oprofile_Convert(gm, om);
2054 esl_stopwatch_Stop(w);
2055 esl_stopwatch_Display(stdout, w, "# CPU time: ");
2056 printf("# M = %d\n", gm->M);
2057
2058 p7_oprofile_Destroy(om);
2059 p7_profile_Destroy(gm);
2060 p7_bg_Destroy(bg);
2061 p7_hmm_Destroy(hmm);
2062 p7_hmmfile_Close(hfp);
2063 esl_alphabet_Destroy(abc);
2064 esl_stopwatch_Destroy(w);
2065 esl_getopts_Destroy(go);
2066 return 0;
2067 }
2068 #endif /*p7OPROFILE_BENCHMARK*/
2069 /*---------------- end, benchmark driver ------------------------*/
2070
2071
2072
2073
2074
2075 /*****************************************************************
2076 * 6. Unit tests
2077 *****************************************************************/
2078 #ifdef p7OPROFILE_TESTDRIVE
2079
2080
2081 #endif /*p7OPROFILE_TESTDRIVE*/
2082 /*------------------- end, unit tests ---------------------------*/
2083
2084
2085
2086
2087 /*****************************************************************
2088 * 7. Test driver
2089 *****************************************************************/
2090 #ifdef p7OPROFILE_TESTDRIVE
2091
2092
2093 #endif /*p7OPROFILE_TESTDRIVE*/
2094 /*------------------- end, test driver --------------------------*/
2095
2096
2097 /*****************************************************************
2098 * 8. Example
2099 *****************************************************************/
2100 #ifdef p7OPROFILE_EXAMPLE
2101 /* gcc -std=gnu99 -g -Wall -Dp7OPROFILE_EXAMPLE -I.. -I../../easel -L.. -L../../easel -o p7_oprofile_example p7_oprofile.c -lhmmer -leasel -lm
2102 * ./p7_oprofile_example <hmmfile>
2103 */
2104 #include "p7_config.h"
2105 #include <stdlib.h>
2106 #include "easel.h"
2107 #include "hmmer.h"
2108
2109 int
main(int argc,char ** argv)2110 main(int argc, char **argv)
2111 {
2112 char *hmmfile = argv[1];
2113 ESL_ALPHABET *abc = NULL;
2114 P7_HMMFILE *hfp = NULL;
2115 P7_HMM *hmm = NULL;
2116 P7_BG *bg = NULL;
2117 P7_PROFILE *gm = NULL;
2118 P7_OPROFILE *om1 = NULL;
2119 P7_OPROFILE *om2 = NULL;
2120 int status;
2121 char errbuf[eslERRBUFSIZE];
2122
2123 status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf);
2124 if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
2125 else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
2126 else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf);
2127
2128 status = p7_hmmfile_Read(hfp, &abc, &hmm);
2129 if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", hfp->fname, hfp->errbuf);
2130 else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type));
2131 else if (status == eslEOF) p7_Fail("Empty HMM file %s? No HMM data found.\n", hfp->fname);
2132 else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s\n", hfp->fname);
2133
2134 bg = p7_bg_Create(abc);
2135 gm = p7_profile_Create(hmm->M, abc);
2136 om1 = p7_oprofile_Create(hmm->M, abc);
2137 p7_ProfileConfig(hmm, bg, gm, 400, p7_LOCAL);
2138 p7_oprofile_Convert(gm, om1);
2139
2140 p7_oprofile_Dump(stdout, om1);
2141
2142 om2 = p7_oprofile_Copy(om1);
2143 if (p7_oprofile_Compare(om1, om2, 0.001f, errbuf) != eslOK) printf ("ERROR %s\n", errbuf);
2144
2145 p7_oprofile_Destroy(om1);
2146 p7_oprofile_Destroy(om2);
2147 p7_profile_Destroy(gm);
2148 p7_bg_Destroy(bg);
2149 p7_hmm_Destroy(hmm);
2150 p7_hmmfile_Close(hfp);
2151 esl_alphabet_Destroy(abc);
2152 return eslOK;
2153 }
2154 #endif /*p7OPROFILE_EXAMPLE*/
2155 /*----------------------- end, example --------------------------*/
2156
2157
2158