1 /* Routines for the P7_OPROFILE structure:
2  * a search profile in an optimized implementation.
3  *
4  * Contents:
5  *   1. The P7_OPROFILE object: allocation, initialization, destruction.
6  *   2. Conversion from generic P7_PROFILE to optimized P7_OPROFILE
7  *   3. Conversion from optimized P7_OPROFILE to compact score arrays
8  *   4. Debugging and development utilities.
9  *   5. Benchmark driver.
10  *   6. Unit tests.
11  *   7. Test driver.
12  *   8. Example.
13  */
14 #include "p7_config.h"
15 
16 #include <stdio.h>
17 #include <string.h>
18 #include <math.h>		/* roundf() */
19 
20 #include <xmmintrin.h>		/* SSE  */
21 #include <emmintrin.h>		/* SSE2 */
22 
23 #include "easel.h"
24 #include "esl_random.h"
25 #include "esl_sse.h"
26 #include "esl_vectorops.h"
27 
28 #include "hmmer.h"
29 #include "impl_sse.h"
30 
31 static uint8_t unbiased_byteify(P7_OPROFILE *om, float sc);
32 static uint8_t biased_byteify(P7_OPROFILE *om, float sc);
33 static int16_t wordify(P7_OPROFILE *om, float sc);
34 static int     sf_conversion(P7_OPROFILE *om);
35 
36 /*****************************************************************
37  * 1. The P7_OPROFILE structure: a score profile.
38  *****************************************************************/
39 
40 /* Function:  p7_oprofile_Create()
41  * Synopsis:  Allocate an optimized profile structure.
42  * Incept:    SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
43  *
44  * Purpose:   Allocate for profiles of up to <allocM> nodes for digital alphabet <abc>.
45  *
46  * Throws:    <NULL> on allocation error.
47  */
48 P7_OPROFILE *
p7_oprofile_Create(int allocM,const ESL_ALPHABET * abc)49 p7_oprofile_Create(int allocM, const ESL_ALPHABET *abc)
50 {
51   int          status;
52   P7_OPROFILE *om  = NULL;
53   int          nqb = p7O_NQB(allocM); /* # of uchar vectors needed for query */
54   int          nqw = p7O_NQW(allocM); /* # of sword vectors needed for query */
55   int          nqf = p7O_NQF(allocM); /* # of float vectors needed for query */
56   int          nqs = nqb + p7O_EXTRA_SB;
57   int          x;
58 
59   /* level 0 */
60   ESL_ALLOC(om, sizeof(P7_OPROFILE));
61   om->rbv_mem = NULL;
62   om->sbv_mem = NULL;
63   om->rwv_mem = NULL;
64   om->twv_mem = NULL;
65   om->rfv_mem = NULL;
66   om->tfv_mem = NULL;
67   om->rbv     = NULL;
68   om->sbv     = NULL;
69   om->rwv     = NULL;
70   om->twv     = NULL;
71   om->rfv     = NULL;
72   om->tfv     = NULL;
73   om->clone   = 0;
74 
75   /* level 1 */
76   ESL_ALLOC(om->rbv_mem, sizeof(__m128i) * nqb  * abc->Kp          +15); /* +15 is for manual 16-byte alignment */
77   ESL_ALLOC(om->sbv_mem, sizeof(__m128i) * nqs  * abc->Kp          +15);
78   ESL_ALLOC(om->rwv_mem, sizeof(__m128i) * nqw  * abc->Kp          +15);
79   ESL_ALLOC(om->twv_mem, sizeof(__m128i) * nqw  * p7O_NTRANS       +15);
80   ESL_ALLOC(om->rfv_mem, sizeof(__m128)  * nqf  * abc->Kp          +15);
81   ESL_ALLOC(om->tfv_mem, sizeof(__m128)  * nqf  * p7O_NTRANS       +15);
82 
83   ESL_ALLOC(om->rbv, sizeof(__m128i *) * abc->Kp);
84   ESL_ALLOC(om->sbv, sizeof(__m128i *) * abc->Kp);
85   ESL_ALLOC(om->rwv, sizeof(__m128i *) * abc->Kp);
86   ESL_ALLOC(om->rfv, sizeof(__m128  *) * abc->Kp);
87 
88   /* align vector memory on 16-byte boundaries */
89   om->rbv[0] = (__m128i *) (((unsigned long int) om->rbv_mem + 15) & (~0xf));
90   om->sbv[0] = (__m128i *) (((unsigned long int) om->sbv_mem + 15) & (~0xf));
91   om->rwv[0] = (__m128i *) (((unsigned long int) om->rwv_mem + 15) & (~0xf));
92   om->twv    = (__m128i *) (((unsigned long int) om->twv_mem + 15) & (~0xf));
93   om->rfv[0] = (__m128  *) (((unsigned long int) om->rfv_mem + 15) & (~0xf));
94   om->tfv    = (__m128  *) (((unsigned long int) om->tfv_mem + 15) & (~0xf));
95 
96   /* set the rest of the row pointers for match emissions */
97   for (x = 1; x < abc->Kp; x++) {
98     om->rbv[x] = om->rbv[0] + (x * nqb);
99     om->sbv[x] = om->sbv[0] + (x * nqs);
100     om->rwv[x] = om->rwv[0] + (x * nqw);
101     om->rfv[x] = om->rfv[0] + (x * nqf);
102   }
103   om->allocQ16  = nqb;
104   om->allocQ8   = nqw;
105   om->allocQ4   = nqf;
106 
107   /* Remaining initializations */
108   om->tbm_b     = 0;
109   om->tec_b     = 0;
110   om->tjb_b     = 0;
111   om->scale_b   = 0.0f;
112   om->base_b    = 0;
113   om->bias_b    = 0;
114 
115   om->scale_w      = 0.0f;
116   om->base_w       = 0;
117   om->ddbound_w    = 0;
118   om->ncj_roundoff = 0.0f;
119 
120   for (x = 0; x < p7_NOFFSETS; x++) om->offs[x]    = -1;
121   for (x = 0; x < p7_NEVPARAM; x++) om->evparam[x] = p7_EVPARAM_UNSET;
122   for (x = 0; x < p7_NCUTOFFS; x++) om->cutoff[x]  = p7_CUTOFF_UNSET;
123   for (x = 0; x < p7_MAXABET;  x++) om->compo[x]   = p7_COMPO_UNSET;
124 
125   om->name      = NULL;
126   om->acc       = NULL;
127   om->desc      = NULL;
128 
129   /* in a P7_OPROFILE, we always allocate for the optional RF, CS annotation.
130    * we only rely on the leading \0 to signal that it's unused, but
131    * we initialize all this memory to zeros to shut valgrind up about
132    * fwrite'ing uninitialized memory in the io functions.
133    */
134   ESL_ALLOC(om->rf,          sizeof(char) * (allocM+2));
135   ESL_ALLOC(om->mm,          sizeof(char) * (allocM+2));
136   ESL_ALLOC(om->cs,          sizeof(char) * (allocM+2));
137   ESL_ALLOC(om->consensus,   sizeof(char) * (allocM+2));
138   memset(om->rf,       '\0', sizeof(char) * (allocM+2));
139   memset(om->mm,       '\0', sizeof(char) * (allocM+2));
140   memset(om->cs,       '\0', sizeof(char) * (allocM+2));
141   memset(om->consensus,'\0', sizeof(char) * (allocM+2));
142 
143   om->abc        = abc;
144   om->L          = 0;
145   om->M          = 0;
146   om->max_length = -1;
147   om->allocM     = allocM;
148   om->mode       = p7_NO_MODE;
149   om->nj         = 0.0f;
150   return om;
151 
152  ERROR:
153   p7_oprofile_Destroy(om);
154   return NULL;
155 }
156 
157 /* Function:  p7_oprofile_IsLocal()
158  * Synopsis:  Returns TRUE if profile is in local alignment mode.
159  * Incept:    SRE, Sat Aug 16 08:46:00 2008 [Janelia]
160  */
161 int
p7_oprofile_IsLocal(const P7_OPROFILE * om)162 p7_oprofile_IsLocal(const P7_OPROFILE *om)
163 {
164   if (om->mode == p7_LOCAL || om->mode == p7_UNILOCAL) return TRUE;
165   return FALSE;
166 }
167 
168 
169 
170 /* Function:  p7_oprofile_Destroy()
171  * Synopsis:  Frees an optimized profile structure.
172  * Incept:    SRE, Sun Nov 25 12:22:21 2007 [Casa de Gatos]
173  */
174 void
p7_oprofile_Destroy(P7_OPROFILE * om)175 p7_oprofile_Destroy(P7_OPROFILE *om)
176 {
177   if (om == NULL) return;
178 
179   if (om->clone == 0)
180     {
181       if (om->rbv_mem   != NULL) free(om->rbv_mem);
182       if (om->sbv_mem   != NULL) free(om->sbv_mem);
183       if (om->rwv_mem   != NULL) free(om->rwv_mem);
184       if (om->twv_mem   != NULL) free(om->twv_mem);
185       if (om->rfv_mem   != NULL) free(om->rfv_mem);
186       if (om->tfv_mem   != NULL) free(om->tfv_mem);
187       if (om->rbv       != NULL) free(om->rbv);
188       if (om->sbv       != NULL) free(om->sbv);
189       if (om->rwv       != NULL) free(om->rwv);
190       if (om->rfv       != NULL) free(om->rfv);
191       if (om->name      != NULL) free(om->name);
192       if (om->acc       != NULL) free(om->acc);
193       if (om->desc      != NULL) free(om->desc);
194       if (om->rf        != NULL) free(om->rf);
195       if (om->mm        != NULL) free(om->mm);
196       if (om->cs        != NULL) free(om->cs);
197       if (om->consensus != NULL) free(om->consensus);
198     }
199 
200   free(om);
201 }
202 
203 /* Function:  p7_oprofile_Sizeof()
204  * Synopsis:  Return the allocated size of a <P7_OPROFILE>.
205  * Incept:    SRE, Wed Mar  2 10:09:21 2011 [Janelia]
206  *
207  * Purpose:   Returns the allocated size of a <P7_OPROFILE>,
208  *            in bytes.
209  */
210 size_t
p7_oprofile_Sizeof(P7_OPROFILE * om)211 p7_oprofile_Sizeof(P7_OPROFILE *om)
212 {
213   size_t n   = 0;
214   int    nqb = om->allocQ16;	/* # of uchar vectors needed for query */
215   int    nqw = om->allocQ8;     /* # of sword vectors needed for query */
216   int    nqf = om->allocQ4;     /* # of float vectors needed for query */
217   int    nqs = nqb + p7O_EXTRA_SB;
218 
219   /* Stuff below exactly mirrors the malloc()'s in
220    * p7_oprofile_Create(); so even though we could
221    * write this more compactly, leave it like this
222    * w/ one:one correspondence to _Create(), for
223    * maintainability and clarity.
224    */
225   n  += sizeof(P7_OPROFILE);
226   n  += sizeof(__m128i) * nqb  * om->abc->Kp +15; /* om->rbv_mem   */
227   n  += sizeof(__m128i) * nqs  * om->abc->Kp +15; /* om->sbv_mem   */
228   n  += sizeof(__m128i) * nqw  * om->abc->Kp +15; /* om->rwv_mem   */
229   n  += sizeof(__m128i) * nqw  * p7O_NTRANS  +15; /* om->twv_mem   */
230   n  += sizeof(__m128)  * nqf  * om->abc->Kp +15; /* om->rfv_mem   */
231   n  += sizeof(__m128)  * nqf  * p7O_NTRANS  +15; /* om->tfv_mem   */
232 
233   n  += sizeof(__m128i *) * om->abc->Kp;          /* om->rbv       */
234   n  += sizeof(__m128i *) * om->abc->Kp;          /* om->sbv       */
235   n  += sizeof(__m128i *) * om->abc->Kp;          /* om->rwv       */
236   n  += sizeof(__m128  *) * om->abc->Kp;          /* om->rfv       */
237 
238   n  += sizeof(char) * (om->allocM+2);            /* om->rf        */
239   n  += sizeof(char) * (om->allocM+2);            /* om->mm        */
240   n  += sizeof(char) * (om->allocM+2);            /* om->cs        */
241   n  += sizeof(char) * (om->allocM+2);            /* om->consensus */
242 
243   return n;
244 }
245 
246 
247 /* TODO: this is not following the _Copy interface guidelines; it's a _Clone */
248 /* TODO: its documentation header is a cut/paste of _Create; FIXME */
249 /* Function:  p7_oprofile_Copy()
250  * Synopsis:  Allocate an optimized profile structure.
251  * Incept:    SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
252  *
253  * Purpose:   Allocate for profiles of up to <allocM> nodes for digital alphabet <abc>.
254  *
255  * Throws:    <NULL> on allocation error.
256  */
257 P7_OPROFILE *
p7_oprofile_Copy(P7_OPROFILE * om1)258 p7_oprofile_Copy(P7_OPROFILE *om1)
259 {
260   int           x, y;
261   int           status;
262 
263   int           nqb  = p7O_NQB(om1->allocM); /* # of uchar vectors needed for query */
264   int           nqw  = p7O_NQW(om1->allocM); /* # of sword vectors needed for query */
265   int           nqf  = p7O_NQF(om1->allocM); /* # of float vectors needed for query */
266   int           nqs  = nqb + p7O_EXTRA_SB;
267 
268   size_t        size = sizeof(char) * (om1->allocM+2);
269 
270   P7_OPROFILE  *om2  = NULL;
271 
272   const ESL_ALPHABET *abc = om1->abc;
273 
274   /* level 0 */
275   ESL_ALLOC(om2, sizeof(P7_OPROFILE));
276   om2->rbv_mem = NULL;
277   om2->sbv_mem = NULL;
278   om2->rwv_mem = NULL;
279   om2->twv_mem = NULL;
280   om2->rfv_mem = NULL;
281   om2->tfv_mem = NULL;
282   om2->rbv     = NULL;
283   om2->sbv     = NULL;
284   om2->rwv     = NULL;
285   om2->twv     = NULL;
286   om2->rfv     = NULL;
287   om2->tfv     = NULL;
288 
289   /* level 1 */
290   ESL_ALLOC(om2->rbv_mem, sizeof(__m128i) * nqb  * abc->Kp    +15);	/* +15 is for manual 16-byte alignment */
291   ESL_ALLOC(om2->sbv_mem, sizeof(__m128i) * nqs  * abc->Kp    +15);
292   ESL_ALLOC(om2->rwv_mem, sizeof(__m128i) * nqw  * abc->Kp    +15);
293   ESL_ALLOC(om2->twv_mem, sizeof(__m128i) * nqw  * p7O_NTRANS +15);
294   ESL_ALLOC(om2->rfv_mem, sizeof(__m128)  * nqf  * abc->Kp    +15);
295   ESL_ALLOC(om2->tfv_mem, sizeof(__m128)  * nqf  * p7O_NTRANS +15);
296 
297   ESL_ALLOC(om2->rbv, sizeof(__m128i *) * abc->Kp);
298   ESL_ALLOC(om2->sbv, sizeof(__m128i *) * abc->Kp);
299   ESL_ALLOC(om2->rwv, sizeof(__m128i *) * abc->Kp);
300   ESL_ALLOC(om2->rfv, sizeof(__m128  *) * abc->Kp);
301 
302   /* align vector memory on 16-byte boundaries */
303   om2->rbv[0] = (__m128i *) (((unsigned long int) om2->rbv_mem + 15) & (~0xf));
304   om2->sbv[0] = (__m128i *) (((unsigned long int) om2->sbv_mem + 15) & (~0xf));
305   om2->rwv[0] = (__m128i *) (((unsigned long int) om2->rwv_mem + 15) & (~0xf));
306   om2->twv    = (__m128i *) (((unsigned long int) om2->twv_mem + 15) & (~0xf));
307   om2->rfv[0] = (__m128  *) (((unsigned long int) om2->rfv_mem + 15) & (~0xf));
308   om2->tfv    = (__m128  *) (((unsigned long int) om2->tfv_mem + 15) & (~0xf));
309 
310   /* copy the vector data */
311   memcpy(om2->rbv[0], om1->rbv[0], sizeof(__m128i) * nqb  * abc->Kp);
312   memcpy(om2->sbv[0], om1->sbv[0], sizeof(__m128i) * nqs  * abc->Kp);
313   memcpy(om2->rwv[0], om1->rwv[0], sizeof(__m128i) * nqw  * abc->Kp);
314   memcpy(om2->rfv[0], om1->rfv[0], sizeof(__m128i) * nqf  * abc->Kp);
315 
316   /* set the rest of the row pointers for match emissions */
317   for (x = 1; x < abc->Kp; x++) {
318     om2->rbv[x] = om2->rbv[0] + (x * nqb);
319     om2->sbv[x] = om2->sbv[0] + (x * nqs);
320     om2->rwv[x] = om2->rwv[0] + (x * nqw);
321     om2->rfv[x] = om2->rfv[0] + (x * nqf);
322   }
323   om2->allocQ16  = nqb;
324   om2->allocQ8   = nqw;
325   om2->allocQ4   = nqf;
326 
327   /* Remaining initializations */
328   om2->tbm_b     = om1->tbm_b;
329   om2->tec_b     = om1->tec_b;
330   om2->tjb_b     = om1->tjb_b;
331   om2->scale_b   = om1->scale_b;
332   om2->base_b    = om1->base_b;
333   om2->bias_b    = om1->bias_b;
334 
335   om2->scale_w      = om1->scale_w;
336   om2->base_w       = om1->base_w;
337   om2->ddbound_w    = om1->ddbound_w;
338   om2->ncj_roundoff = om1->ncj_roundoff;
339 
340   for (x = 0; x < p7_NOFFSETS; x++) om2->offs[x]    = om1->offs[x];
341   for (x = 0; x < p7_NEVPARAM; x++) om2->evparam[x] = om1->evparam[x];
342   for (x = 0; x < p7_NCUTOFFS; x++) om2->cutoff[x]  = om1->cutoff[x];
343   for (x = 0; x < p7_MAXABET;  x++) om2->compo[x]   = om1->compo[x];
344 
345   for (x = 0; x < nqw  * p7O_NTRANS; ++x) om2->twv[x] = om1->twv[x];
346   for (x = 0; x < nqf  * p7O_NTRANS; ++x) om2->tfv[x] = om1->tfv[x];
347 
348   for (x = 0; x < p7O_NXSTATES; x++)
349     for (y = 0; y < p7O_NXTRANS; y++)
350       {
351 	om2->xw[x][y] = om1->xw[x][y];
352 	om2->xf[x][y] = om1->xf[x][y];
353       }
354 
355   if ((status = esl_strdup(om1->name, -1, &om2->name)) != eslOK) goto ERROR;
356   if ((status = esl_strdup(om1->acc,  -1, &om2->acc))  != eslOK) goto ERROR;
357   if ((status = esl_strdup(om1->desc, -1, &om2->desc)) != eslOK) goto ERROR;
358 
359   /* in a P7_OPROFILE, we always allocate for the optional RF, CS annotation.
360    * we only rely on the leading \0 to signal that it's unused, but
361    * we initialize all this memory to zeros to shut valgrind up about
362    * fwrite'ing uninitialized memory in the io functions.
363    */
364   ESL_ALLOC(om2->rf,          size);
365   ESL_ALLOC(om2->mm,          size);
366   ESL_ALLOC(om2->cs,          size);
367   ESL_ALLOC(om2->consensus,   size);
368 
369   memcpy(om2->rf,        om1->rf,        size);
370   memcpy(om2->mm,        om1->mm,        size);
371   memcpy(om2->cs,        om1->cs,        size);
372   memcpy(om2->consensus, om1->consensus, size);
373 
374   om2->abc       = om1->abc;
375   om2->L         = om1->L;
376   om2->M         = om1->M;
377   om2->allocM    = om1->allocM;
378   om2->mode      = om1->mode;
379   om2->nj        = om1->nj;
380   om2->max_length   = om1->max_length;
381 
382   om2->clone     = om1->clone;
383 
384   return om2;
385 
386  ERROR:
387   p7_oprofile_Destroy(om2);
388   return NULL;
389 }
390 
391 /* Function:  p7_oprofile_Clone()
392  * Synopsis:  Allocate a cloned copy of the optimized profile structure.  All
393  *            allocated memory from the original profile is not reallocated.
394  *            The cloned copy will point to the same memory as the original.
395  * Incept:    SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
396  *
397  * Purpose:   Quick copy of an optimized profile used in mutiple threads.
398  *
399  * Throws:    <NULL> on allocation error.
400  */
401 P7_OPROFILE *
p7_oprofile_Clone(const P7_OPROFILE * om1)402 p7_oprofile_Clone(const P7_OPROFILE *om1)
403 {
404   int           status;
405 
406   P7_OPROFILE  *om2  = NULL;
407 
408   ESL_ALLOC(om2, sizeof(P7_OPROFILE));
409   memcpy(om2, om1, sizeof(P7_OPROFILE));
410 
411   om2->clone  = 1;
412 
413   return om2;
414 
415  ERROR:
416   p7_oprofile_Destroy(om2);
417   return NULL;
418 }
419 
420 
421 /* Function:  p7_oprofile_UpdateFwdEmissionScores()
422  * Synopsis:  Update the Forward/Backward part of the optimized profile
423  *            match emissions to account for new background distribution.
424  *
425  * Purpose:   This implementation re-orders the loops used to access/modify
426  *            the rfv array relative to how it's accessed for example in
427  *            fb_conversion(), to minimize the required size of sc_arr.
428  *
429  * Args:      om              - optimized profile to be updated.
430  *            bg              - the new bg distribution
431  *            fwd_emissions   - precomputed Fwd (float) residue emission
432  *                              probabilities in serial order (gathered from
433  *                              the optimized striped <om> with
434  *                              p7_oprofile_GetFwdEmissionArray() ).
435  *            sc_arr            Preallocated array of at least Kp*4 floats
436  */
437 int
p7_oprofile_UpdateFwdEmissionScores(P7_OPROFILE * om,P7_BG * bg,float * fwd_emissions,float * sc_arr)438 p7_oprofile_UpdateFwdEmissionScores(P7_OPROFILE *om, P7_BG *bg, float *fwd_emissions, float *sc_arr)
439 {
440   int     M   = om->M;    /* length of the query                                          */
441   int     k, q, x, z;
442   int     nq  = p7O_NQF(M);     /* segment length; total # of striped vectors needed            */
443   int     K   = om->abc->K;
444   int     Kp  = om->abc->Kp;
445   union   { __m128 v; float x[4]; } tmp; /* used to align and load simd minivectors               */
446 
447 
448   for (k = 1, q = 0; q < nq; q++, k++) {
449 
450     //First compute the core characters of the alphabet
451     for (x = 0; x < K; x++) {
452       for (z = 0; z < 4; z++) {
453         if (k+ z*nq <= M)  sc_arr[z*Kp + x] =  (om->mm && om->mm[(k+z*nq)]=='m') ? 0 : log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]);
454         else               sc_arr[z*Kp + x] =  -eslINFINITY;
455 
456         tmp.x[z] = sc_arr[z*Kp + x];
457       }
458       om->rfv[x][q] = esl_sse_expf(tmp.v);
459 
460     }
461 
462     /* gap, nonresidue, and missing data residue codes don't get set by FExpectScVec(),
463      * so do them
464      */
465     for (z = 0; z < 4; z++)
466       {
467 	sc_arr[z*Kp + K]        = -eslINFINITY; /* gap char -     */
468 	sc_arr[z*Kp + (Kp - 2)] = -eslINFINITY; /* nonresidue *   */
469 	sc_arr[z*Kp + (Kp - 1)] = -eslINFINITY; /* missing data ~ */
470       }
471 
472     // Then compute corresponding scores for ambiguity codes.
473     for (z = 0; z < 4; z++)
474       esl_abc_FExpectScVec(om->abc, sc_arr+(z*Kp), bg->f);
475 
476     //finish off the interleaved values
477     for (x = K; x < Kp; x++) {
478       for (z = 0; z < 4; z++)
479          tmp.x[z] = sc_arr[z*Kp + x];  // computed in FExpectScVec call above
480       om->rfv[x][q] = esl_sse_expf(tmp.v);
481     }
482   }
483 
484   return eslOK;
485 }
486 
487 
488 /* Function:  p7_oprofile_UpdateVitEmissionScores()
489  * Synopsis:  Update the Viterbi part of the optimized profile match
490  *            emissions to account for new background distribution.
491  *.
492  * Purpose:   This implementation re-orders the loops used to access/modify
493  *            the rmv array relative to how it's accessed for example in
494  *            vf_conversion(), to minimize the required size of sc_arr.
495  *
496  * Args:      om              - optimized profile to be updated.
497  *            bg              - the new bg distribution
498  *            fwd_emissions   - precomputed Fwd (float) residue emission
499  *                              probabilities in serial order (gathered from
500  *                              the optimized striped <om> with
501  *                              p7_oprofile_GetFwdEmissionArray() ).
502  *            sc_arr            Preallocated array of at least Kp*8 floats
503  */
504 int
p7_oprofile_UpdateVitEmissionScores(P7_OPROFILE * om,P7_BG * bg,float * fwd_emissions,float * sc_arr)505 p7_oprofile_UpdateVitEmissionScores(P7_OPROFILE *om, P7_BG *bg, float *fwd_emissions, float *sc_arr)
506 {
507   int     M   = om->M;    /* length of the query                                          */
508   int     k, q, x, z;
509   int     nq  = p7O_NQW(M);     /* segment length; total # of striped vectors needed            */
510   int     K   = om->abc->K;
511   int     Kp  = om->abc->Kp;
512   int     idx;
513   union   { __m128i v; int16_t i[8]; } tmp; /* used to align and load simd minivectors            */
514 
515   for (k = 1, q = 0; q < nq; q++, k++) {
516 
517     //First compute the core characters of the alphabet
518     for (x = 0; x < K; x++) {
519       for (z = 0; z < 8; z++) {
520         idx = z*Kp + x;
521         if (k+ z*nq <= M)  {
522           sc_arr[idx] = (om->mm && om->mm[(k+z*nq)]=='m') ? 0 : log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]);
523           tmp.i[z]    = wordify(om, sc_arr[idx]);
524         }
525         else
526         {
527           sc_arr[idx] =  -eslINFINITY;
528           tmp.i[z]    =  -32768;
529         }
530 
531       }
532       om->rwv[x][q] = tmp.v;
533 
534     }
535 
536     // Then compute corresponding scores for ambiguity codes.
537     for (z = 0; z < 8; z++)
538       esl_abc_FExpectScVec(om->abc, sc_arr+(z*Kp), bg->f);
539 
540     //finish off the interleaved values
541     for (x = K; x < Kp; x++) {
542       for (z = 0; z < 8; z++) {
543         idx = z*Kp + x;
544         if (x==K || x>Kp-3 || sc_arr[idx] == -eslINFINITY)
545           tmp.i[z]  =  -32768;
546         else
547           tmp.i[z]  = wordify(om, sc_arr[idx]);
548       }
549       om->rwv[x][q] = tmp.v;
550     }
551   }
552 
553   return eslOK;
554 }
555 
556 
557 
558 /* Function:  p7_oprofile_UpdateMSVEmissionScores()
559  * Synopsis:  Update the MSV part of the optimized profile match
560  *            emissions to account for new background distribution.
561  *.
562  * Purpose:   This implementation re-orders the loops used to access/modify
563  *            the rbv array relative to how it's accessed for example in
564  *            mf_conversion(), to minimize the required size of sc_arr.
565  *
566  * Args:      om              - optimized profile to be updated.
567  *            bg              - the new bg distribution
568  *            fwd_emissions   - precomputed Fwd (float) residue emission
569  *                              probabilities in serial order (gathered from
570  *                              the optimized striped <om> with
571  *                              p7_oprofile_GetFwdEmissionArray() ).
572  *            sc_arr            Preallocated array of at least Kp*16 floats
573  */
574 int
p7_oprofile_UpdateMSVEmissionScores(P7_OPROFILE * om,P7_BG * bg,float * fwd_emissions,float * sc_arr)575 p7_oprofile_UpdateMSVEmissionScores(P7_OPROFILE *om, P7_BG *bg, float *fwd_emissions, float *sc_arr)
576 {
577   int     M   = om->M;    /* length of the query                                          */
578   int     k, q, x, z;
579   int     nq  = p7O_NQB(M);     /* segment length; total # of striped vectors needed            */
580   int     K   = om->abc->K;
581   int     Kp  = om->abc->Kp;
582   int     idx;
583   float   max = 0.0;    /* maximum residue score: used for unsigned emission score bias */
584   union   { __m128i v; uint8_t i[16]; } tmp; /* used to align and load simd minivectors           */
585 
586 
587   /* First we determine the basis for the limited-precision MSVFilter scoring system.
588    * Default: 1/3 bit units, base offset 190:  range 0..255 => -190..65 => -63.3..21.7 bits
589    * See J2/66, J4/138 for analysis.
590    * This depends on having computed scores. I do this in a first pass, to get the max
591    * score ... then re-compute those scores so they can be converted to 8bit scores
592    */
593   for (k = 1, q = 0; q < nq; q++, k++) {
594     for (x = 0; x < K; x++) {
595       for (z = 0; z < 16; z++) {
596         idx = z*Kp + x;
597         if (k+ z*nq <= M && !(om->mm && om->mm[(k+z*nq)]=='m'))
598           max = ESL_MAX(max, log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]));
599       }
600     }
601   }
602   om->scale_b = 3.0 / eslCONST_LOG2;                    /* scores in units of third-bits */
603   om->base_b  = 190;
604   om->bias_b  = unbiased_byteify(om, -1.0 * max);
605 
606   for (k = 1, q = 0; q < nq; q++, k++) {
607 
608     //First compute the core characters of the alphabet
609     for (x = 0; x < K; x++) {
610       for (z = 0; z < 16; z++) {
611         idx = z*Kp + x;
612         if (k+ z*nq <= M)  {
613           sc_arr[idx] = (om->mm && om->mm[(k+z*nq)]=='m') ? 0 : log( (double)(fwd_emissions[Kp * (k+z*nq) + x])/bg->f[x]);
614           tmp.i[z]    = biased_byteify(om, sc_arr[idx]);
615         }
616         else
617         {
618           sc_arr[idx] =  -eslINFINITY;
619           tmp.i[z]    =  255;
620         }
621 
622       }
623       om->rbv[x][q] = tmp.v;
624 
625     }
626 
627     // Then compute corresponding scores for ambiguity codes.
628     for (z = 0; z < 16; z++)
629       esl_abc_FExpectScVec(om->abc, sc_arr+(z*Kp), bg->f);
630 
631     //finish off the interleaved values
632     for (x = K; x < Kp; x++) {
633       for (z = 0; z < 16; z++) {
634         idx = z*Kp + x;
635         if (x==K || x>Kp-3 || sc_arr[idx] == -eslINFINITY)
636           tmp.i[z]  =  255;
637         else
638           tmp.i[z]  = biased_byteify(om, sc_arr[idx]);
639       }
640       om->rbv[x][q] = tmp.v;
641     }
642   }
643 
644   sf_conversion(om);
645 
646   return eslOK;
647 }
648 
649 
650 /*----------------- end, P7_OPROFILE structure ------------------*/
651 
652 
653 
654 /*****************************************************************
655  * 2. Conversion from generic P7_PROFILE to optimized P7_OPROFILE
656  *****************************************************************/
657 
658 /* biased_byteify()
659  * Converts original log-odds residue score to a rounded biased uchar cost.
660  * Match emission scores for MSVFilter get this treatment.
661  * e.g. a score of +3.2, with scale 3.0 and bias 12, becomes 2.
662  *    3.2*3 = 9.6; rounded = 10; bias-10 = 2.
663  * When used, we add the bias, then subtract this cost.
664  * (A cost of +255 is our -infinity "prohibited event")
665  */
666 static uint8_t
biased_byteify(P7_OPROFILE * om,float sc)667 biased_byteify(P7_OPROFILE *om, float sc)
668 {
669   uint8_t b;
670 
671   sc  = -1.0f * roundf(om->scale_b * sc);                          /* ugh. sc is now an integer cost represented in a float...           */
672   b   = (sc > 255 - om->bias_b) ? 255 : (uint8_t) sc + om->bias_b; /* and now we cast, saturate, and bias it to an unsigned char cost... */
673   return b;
674 }
675 
676 /* unbiased_byteify()
677  * Convert original transition score to a rounded uchar cost
678  * Transition scores for MSVFilter get this treatment.
679  * e.g. a score of -2.1, with scale 3.0, becomes a cost of 6.
680  * (A cost of +255 is our -infinity "prohibited event")
681  */
682 static uint8_t
unbiased_byteify(P7_OPROFILE * om,float sc)683 unbiased_byteify(P7_OPROFILE *om, float sc)
684 {
685   uint8_t b;
686 
687   sc  = -1.0f * roundf(om->scale_b * sc);       /* ugh. sc is now an integer cost represented in a float...    */
688   b   = (sc > 255.) ? 255 : (uint8_t) sc;	/* and now we cast and saturate it to an unsigned char cost... */
689   return b;
690 }
691 
692 /* wordify()
693  * Converts log probability score to a rounded signed 16-bit integer cost.
694  * Both emissions and transitions for ViterbiFilter get this treatment.
695  * No bias term needed, because we use signed words.
696  *   e.g. a score of +3.2, with scale 500.0, becomes +1600.
697  */
698 static int16_t
wordify(P7_OPROFILE * om,float sc)699 wordify(P7_OPROFILE *om, float sc)
700 {
701   sc  = roundf(om->scale_w * sc);
702   if      (sc >=  32767.0) return  32767;
703   else if (sc <= -32768.0) return -32768;
704   else return (int16_t) sc;
705 }
706 
707 
708 /* sf_conversion():
709  * Author: Bjarne Knudsen
710  *
711  * Generates the SSVFilter() parts of the profile <om> scores
712  * from the completed MSV score.  This includes calculating
713  * special versions of the match scores for using the the
714  * ssv filter.
715  *
716  * Returns:   <eslOK> on success.
717  *
718  * Throws:    (no abnormal error conditions)
719  */
720 static int
sf_conversion(P7_OPROFILE * om)721 sf_conversion(P7_OPROFILE *om)
722 {
723   int     M   = om->M;		/* length of the query                                          */
724   int     nq  = p7O_NQB(M);     /* segment length; total # of striped vectors needed            */
725   int     x;			/* counter over residues                                        */
726   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
727   __m128i tmp;
728   __m128i tmp2;
729 
730   /* We now want to fill out om->sbv with om->rbv - bias for use in the
731    * SSV filter. The only challenge is that the om->rbv values are
732    * unsigned and generally use the whole scale while the om->sbv
733    * values are signed. To solve that problem we perform the following
734    * calculation:
735    *
736    *   ((127 + bias) - rbv) ^ 127
737    *
738    * where the subtraction is unsigned saturated and the addition is
739    * unsigned (it will not overflow, since bias is a small positive
740    * number). The f(x) = x ^ 127 combined with a change from unsigned
741    * to signed numbers have the same effect as f(x) = -x + 127. So if
742    * we regard the above as signed instead of unsigned it is equal to:
743    *
744    *   -((127 + bias) - rbv) + 127 = rbv - bias
745    *
746    * which is what we want. The reason for this slightly complex idea
747    * is that we wish the transformation to be fast, especially for
748    * hmmscan where many models are loaded.
749    */
750 
751   tmp = _mm_set1_epi8((int8_t) (om->bias_b + 127));
752   tmp2  = _mm_set1_epi8(127);
753 
754   for (x = 0; x < om->abc->Kp; x++)
755     {
756       for (q = 0;  q < nq;            q++) om->sbv[x][q] = _mm_xor_si128(_mm_subs_epu8(tmp, om->rbv[x][q]), tmp2);
757       for (q = nq; q < nq + p7O_EXTRA_SB; q++) om->sbv[x][q] = om->sbv[x][q % nq];
758     }
759 
760   return eslOK;
761 }
762 
763 /* mf_conversion():
764  *
765  * This builds the MSVFilter() parts of the profile <om>, scores
766  * in lspace uchars (16-way parallel), by rescaling, rounding, and
767  * casting the scores in <gm>.
768  *
769  * Returns <eslOK> on success;
770  * throws <eslEINVAL> if <om> hasn't been allocated properly.
771  */
772 static int
mf_conversion(const P7_PROFILE * gm,P7_OPROFILE * om)773 mf_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
774 {
775   int     M   = gm->M;		/* length of the query                                          */
776   int     nq  = p7O_NQB(M);     /* segment length; total # of striped vectors needed            */
777   float   max = 0.0;		/* maximum residue score: used for unsigned emission score bias */
778   int     x;			/* counter over residues                                        */
779   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
780   int     k;			/* the usual counter over model nodes 1..M                      */
781   int     z;			/* counter within elements of one SIMD minivector               */
782   union { __m128i v; uint8_t i[16]; } tmp; /* used to align and load simd minivectors           */
783 
784   if (nq > om->allocQ16) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
785 
786   /* First we determine the basis for the limited-precision MSVFilter scoring system.
787    * Default: 1/3 bit units, base offset 190:  range 0..255 => -190..65 => -63.3..21.7 bits
788    * See J2/66, J4/138 for analysis.
789    */
790   for (x = 0; x < gm->abc->K; x++)  max = ESL_MAX(max, esl_vec_FMax(gm->rsc[x], (M+1)*2));
791   om->scale_b = 3.0 / eslCONST_LOG2;                    /* scores in units of third-bits */
792   om->base_b  = 190;
793   om->bias_b  = unbiased_byteify(om, -1.0 * max);
794 
795   /* striped match costs: start at k=1.  */
796   for (x = 0; x < gm->abc->Kp; x++)
797   {
798     for (q = 0, k = 1; q < nq; q++, k++)
799     {
800       for (z = 0; z < 16; z++) tmp.i[z] = ((k+ z*nq <= M) ? biased_byteify(om, p7P_MSC(gm, k+z*nq, x)) : 255);
801       om->rbv[x][q]   = tmp.v;
802     }
803   }
804 
805   /* transition costs */
806   om->tbm_b = unbiased_byteify(om, logf(2.0f / ((float) gm->M * (float) (gm->M+1)))); /* constant B->Mk penalty        */
807   om->tec_b = unbiased_byteify(om, logf(0.5f));                                       /* constant multihit E->C = E->J */
808   om->tjb_b = unbiased_byteify(om, logf(3.0f / (float) (gm->L+3))); /* this adopts the L setting of the parent profile */
809 
810   sf_conversion(om);
811 
812   return eslOK;
813 }
814 
815 
816 /* vf_conversion():
817  *
818  * This builds the ViterbiFilter() parts of the profile <om>, scores
819  * in lspace swords (8-way parallel), by rescaling, rounding, and
820  * casting the scores in <gm>.
821  *
822  * Returns <eslOK> on success;
823  * throws <eslEINVAL> if <om> hasn't been allocated properly.
824  */
825 static int
vf_conversion(const P7_PROFILE * gm,P7_OPROFILE * om)826 vf_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
827 {
828   int     M   = gm->M;		/* length of the query                                          */
829   int     nq  = p7O_NQW(M);     /* segment length; total # of striped vectors needed            */
830   int     x;			/* counter over residues                                        */
831   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
832   int     k;			/* the usual counter over model nodes 1..M                      */
833   int     kb;			/* possibly offset base k for loading om's TSC vectors          */
834   int     z;			/* counter within elements of one SIMD minivector               */
835   int     t;			/* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
836   int     tg;			/* transition index in gm                                       */
837   int     j;			/* counter in interleaved vector arrays in the profile          */
838   int     ddtmp;		/* used in finding worst DD transition bound                    */
839   int16_t  maxval;		/* used to prevent zero cost II                                 */
840   int16_t  val;
841   union { __m128i v; int16_t i[8]; } tmp; /* used to align and load simd minivectors            */
842 
843   if (nq > om->allocQ8) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
844 
845   /* First set the basis for the limited-precision scoring system.
846    * Default: 1/500 bit units, base offset 12000:  range -32768..32767 => -44768..20767 => -89.54..41.53 bits
847    * See J4/138 for analysis.
848    */
849   om->scale_w = 500.0 / eslCONST_LOG2;
850   om->base_w  = 12000;
851 
852   /* striped match scores */
853   for (x = 0; x < gm->abc->Kp; x++)
854     for (k = 1, q = 0; q < nq; q++, k++)
855       {
856 	for (z = 0; z < 8; z++) tmp.i[z] = ((k+ z*nq <= M) ? wordify(om, p7P_MSC(gm, k+z*nq, x)) : -32768);
857 	om->rwv[x][q]   = tmp.v;
858       }
859 
860   /* Transition costs, all but the DD's. */
861   for (j = 0, k = 1, q = 0; q < nq; q++, k++)
862     {
863       for (t = p7O_BM; t <= p7O_II; t++) /* this loop of 7 transitions depends on the order in p7o_tsc_e */
864 	{
865 	  switch (t) {
866 	  case p7O_BM: tg = p7P_BM;  kb = k-1; maxval =  0; break; /* gm has tBMk stored off by one! start from k=0 not 1   */
867 	  case p7O_MM: tg = p7P_MM;  kb = k-1; maxval =  0; break; /* MM, DM, IM vectors are rotated by -1, start from k=0  */
868 	  case p7O_IM: tg = p7P_IM;  kb = k-1; maxval =  0; break;
869 	  case p7O_DM: tg = p7P_DM;  kb = k-1; maxval =  0; break;
870 	  case p7O_MD: tg = p7P_MD;  kb = k;   maxval =  0; break; /* the remaining ones are straight up  */
871 	  case p7O_MI: tg = p7P_MI;  kb = k;   maxval =  0; break;
872 	  case p7O_II: tg = p7P_II;  kb = k;   maxval = -1; break;
873 	  }
874 
875 	  for (z = 0; z < 8; z++) {
876 	    val      = ((kb+ z*nq < M) ? wordify(om, p7P_TSC(gm, kb+ z*nq, tg)) : -32768);
877 	    tmp.i[z] = (val <= maxval) ? val : maxval; /* do not allow an II transition cost of 0, or hell may occur. */
878 	  }
879 	  om->twv[j++] = tmp.v;
880 	}
881     }
882 
883   /* Finally the DD's, which are at the end of the optimized tsc vector; (j is already sitting there) */
884   for (k = 1, q = 0; q < nq; q++, k++)
885     {
886       for (z = 0; z < 8; z++) tmp.i[z] = ((k+ z*nq < M) ? wordify(om, p7P_TSC(gm, k+ z*nq, p7P_DD)) : -32768);
887       om->twv[j++] = tmp.v;
888     }
889 
890   /* Specials. (Actually in same order in om and gm, but we copy in general form anyway.)  */
891   /* VF CC,NN,JJ transitions hardcoded zero; -3.0 nat approximation used instead; this papers
892    * over a length independence problem, where the approximation weirdly outperforms the
893    * exact solution, probably indicating that the model's Pascal distribution is problematic,
894    * and the "approximation" is in fact closer to the One True Model, the mythic H4 supermodel.
895    * [xref J5/36]
896    */
897   om->xw[p7O_E][p7O_LOOP] = wordify(om, gm->xsc[p7P_E][p7P_LOOP]);
898   om->xw[p7O_E][p7O_MOVE] = wordify(om, gm->xsc[p7P_E][p7P_MOVE]);
899   om->xw[p7O_N][p7O_MOVE] = wordify(om, gm->xsc[p7P_N][p7P_MOVE]);
900   om->xw[p7O_N][p7O_LOOP] = 0;                                        /* was wordify(om, gm->xsc[p7P_N][p7P_LOOP]); */
901   om->xw[p7O_C][p7O_MOVE] = wordify(om, gm->xsc[p7P_C][p7P_MOVE]);
902   om->xw[p7O_C][p7O_LOOP] = 0;                                        /* was wordify(om, gm->xsc[p7P_C][p7P_LOOP]); */
903   om->xw[p7O_J][p7O_MOVE] = wordify(om, gm->xsc[p7P_J][p7P_MOVE]);
904   om->xw[p7O_J][p7O_LOOP] = 0;                                        /* was wordify(om, gm->xsc[p7P_J][p7P_LOOP]); */
905 
906   om->ncj_roundoff = 0.0; /* goes along with NN=CC=JJ=0, -3.0 nat approximation */
907                           /* otherwise, would be = om->scale_w * gm->xsc[p7P_N][p7P_LOOP] -  om->xw[p7O_N][p7O_LOOP];   */
908 			  /* see J4/150 for discussion of VF error suppression, superceded by the -3.0 nat approximation */
909 
910   /* Transition score bound for "lazy F" DD path evaluation (xref J2/52) */
911   om->ddbound_w = -32768;
912   for (k = 2; k < M-1; k++)
913     {
914       ddtmp         = (int) wordify(om, p7P_TSC(gm, k,   p7P_DD));
915       ddtmp        += (int) wordify(om, p7P_TSC(gm, k+1, p7P_DM));
916       ddtmp        -= (int) wordify(om, p7P_TSC(gm, k+1, p7P_BM));
917       om->ddbound_w = ESL_MAX(om->ddbound_w, ddtmp);
918     }
919 
920   return eslOK;
921 }
922 
923 
924 /* fb_conversion()
925  * This builds the Forward/Backward part of the optimized profile <om>,
926  * where we use odds ratios (not log-odds scores).
927  */
928 static int
fb_conversion(const P7_PROFILE * gm,P7_OPROFILE * om)929 fb_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
930 {
931   int     M   = gm->M;		/* length of the query                                          */
932   int     nq  = p7O_NQF(M);     /* segment length; total # of striped vectors needed            */
933   int     x;			/* counter over residues                                        */
934   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
935   int     k;			/* the usual counter over model nodes 1..M                      */
936   int     kb;			/* possibly offset base k for loading om's TSC vectors          */
937   int     z;			/* counter within elements of one SIMD minivector               */
938   int     t;			/* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
939   int     tg;			/* transition index in gm                                       */
940   int     j;			/* counter in interleaved vector arrays in the profile          */
941   union { __m128 v; float x[4]; } tmp; /* used to align and load simd minivectors               */
942 
943   if (nq > om->allocQ4) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
944 
945   /* striped match scores: start at k=1 */
946   for (x = 0; x < gm->abc->Kp; x++)
947     for (k = 1, q = 0; q < nq; q++, k++)
948       {
949 	for (z = 0; z < 4; z++) tmp.x[z] = (k+ z*nq <= M) ? p7P_MSC(gm, k+z*nq, x) : -eslINFINITY;
950 	om->rfv[x][q] = esl_sse_expf(tmp.v);
951       }
952 
953 
954   /* Transition scores, all but the DD's. */
955   for (j = 0, k = 1, q = 0; q < nq; q++, k++)
956     {
957       for (t = p7O_BM; t <= p7O_II; t++) /* this loop of 7 transitions depends on the order in the definition of p7o_tsc_e */
958 	{
959 	  switch (t) {
960 	  case p7O_BM: tg = p7P_BM;  kb = k-1; break; /* gm has tBMk stored off by one! start from k=0 not 1 */
961 	  case p7O_MM: tg = p7P_MM;  kb = k-1; break; /* MM, DM, IM quads are rotated by -1, start from k=0  */
962 	  case p7O_IM: tg = p7P_IM;  kb = k-1; break;
963 	  case p7O_DM: tg = p7P_DM;  kb = k-1; break;
964 	  case p7O_MD: tg = p7P_MD;  kb = k;   break; /* the remaining ones are straight up  */
965 	  case p7O_MI: tg = p7P_MI;  kb = k;   break;
966 	  case p7O_II: tg = p7P_II;  kb = k;   break;
967 	  }
968 
969 	  for (z = 0; z < 4; z++) tmp.x[z] = (kb+z*nq < M) ? p7P_TSC(gm, kb+z*nq, tg) : -eslINFINITY;
970 	  om->tfv[j++] = esl_sse_expf(tmp.v);
971 	}
972     }
973 
974   /* And finally the DD's, which are at the end of the optimized tfv vector; (j is already there) */
975   for (k = 1, q = 0; q < nq; q++, k++)
976     {
977       for (z = 0; z < 4; z++) tmp.x[z] = (k+z*nq < M) ? p7P_TSC(gm, k+z*nq, p7P_DD) : -eslINFINITY;
978       om->tfv[j++] = esl_sse_expf(tmp.v);
979     }
980 
981   /* Specials. (These are actually in exactly the same order in om and
982    *  gm, but we copy in general form anyway.)
983    */
984   om->xf[p7O_E][p7O_LOOP] = expf(gm->xsc[p7P_E][p7P_LOOP]);
985   om->xf[p7O_E][p7O_MOVE] = expf(gm->xsc[p7P_E][p7P_MOVE]);
986   om->xf[p7O_N][p7O_LOOP] = expf(gm->xsc[p7P_N][p7P_LOOP]);
987   om->xf[p7O_N][p7O_MOVE] = expf(gm->xsc[p7P_N][p7P_MOVE]);
988   om->xf[p7O_C][p7O_LOOP] = expf(gm->xsc[p7P_C][p7P_LOOP]);
989   om->xf[p7O_C][p7O_MOVE] = expf(gm->xsc[p7P_C][p7P_MOVE]);
990   om->xf[p7O_J][p7O_LOOP] = expf(gm->xsc[p7P_J][p7P_LOOP]);
991   om->xf[p7O_J][p7O_MOVE] = expf(gm->xsc[p7P_J][p7P_MOVE]);
992 
993   return eslOK;
994 }
995 
996 
997 /* Function:  p7_oprofile_Convert()
998  * Synopsis:  Converts standard profile to an optimized one.
999  * Incept:    SRE, Mon Nov 26 07:38:57 2007 [Janelia]
1000  *
1001  * Purpose:   Convert a standard profile <gm> to an optimized profile <om>,
1002  *            where <om> has already been allocated for a profile of at
1003  *            least <gm->M> nodes and the same emission alphabet <gm->abc>.
1004  *
1005  * Args:      gm - profile to optimize
1006  *            om - allocated optimized profile for holding the result.
1007  *
1008  * Returns:   <eslOK> on success.
1009  *
1010  * Throws:    <eslEINVAL> if <gm>, <om> aren't compatible.
1011  *            <eslEMEM> on allocation failure.
1012  */
1013 int
p7_oprofile_Convert(const P7_PROFILE * gm,P7_OPROFILE * om)1014 p7_oprofile_Convert(const P7_PROFILE *gm, P7_OPROFILE *om)
1015 {
1016   int status, z;
1017 
1018   /* Set these first so they are available in the following calls */
1019   om->mode       = gm->mode;
1020   om->L          = gm->L;
1021   om->M          = gm->M;
1022   om->nj         = gm->nj;
1023   om->max_length = gm->max_length;
1024 
1025   if (gm->abc->type != om->abc->type)  ESL_EXCEPTION(eslEINVAL, "alphabets of the two profiles don't match");
1026   if (gm->M         >  om->allocM)     ESL_EXCEPTION(eslEINVAL, "oprofile is too small");
1027 
1028   if ((status =  mf_conversion(gm, om)) != eslOK) return status;   /* MSVFilter()'s information     */
1029   if ((status =  vf_conversion(gm, om)) != eslOK) return status;   /* ViterbiFilter()'s information */
1030   if ((status =  fb_conversion(gm, om)) != eslOK) return status;   /* ForwardFilter()'s information */
1031 
1032   if (om->name != NULL) free(om->name);
1033   if (om->acc  != NULL) free(om->acc);
1034   if (om->desc != NULL) free(om->desc);
1035   if ((status = esl_strdup(gm->name, -1, &(om->name))) != eslOK) goto ERROR;
1036   if ((status = esl_strdup(gm->acc,  -1, &(om->acc)))  != eslOK) goto ERROR;
1037   if ((status = esl_strdup(gm->desc, -1, &(om->desc))) != eslOK) goto ERROR;
1038   strcpy(om->rf,        gm->rf);
1039   strcpy(om->mm,        gm->mm);
1040   strcpy(om->cs,        gm->cs);
1041   strcpy(om->consensus, gm->consensus);
1042   for (z = 0; z < p7_NEVPARAM; z++) om->evparam[z] = gm->evparam[z];
1043   for (z = 0; z < p7_NCUTOFFS; z++) om->cutoff[z]  = gm->cutoff[z];
1044   for (z = 0; z < p7_MAXABET;  z++) om->compo[z]   = gm->compo[z];
1045 
1046   return eslOK;
1047 
1048  ERROR:
1049   return status;
1050 }
1051 
1052 /* Function:  p7_oprofile_ReconfigLength()
1053  * Synopsis:  Set the target sequence length of a model.
1054  * Incept:    SRE, Thu Dec 20 09:56:40 2007 [Janelia]
1055  *
1056  * Purpose:   Given an already configured model <om>, quickly reset its
1057  *            expected length distribution for a new mean target sequence
1058  *            length of <L>.
1059  *
1060  *            This doesn't affect the length distribution of the null
1061  *            model. That must also be reset, using <p7_bg_SetLength()>.
1062  *
1063  *            We want this routine to run as fast as possible, because
1064  *            this call is in the critical path: it must be called at
1065  *            each new target sequence in a database search.
1066  *
1067  * Returns:   <eslOK> on success. Costs/scores for N,C,J transitions are set
1068  *            here.
1069  */
1070 int
p7_oprofile_ReconfigLength(P7_OPROFILE * om,int L)1071 p7_oprofile_ReconfigLength(P7_OPROFILE *om, int L)
1072 {
1073   int status;
1074   if ((status = p7_oprofile_ReconfigMSVLength (om, L)) != eslOK) return status;
1075   if ((status = p7_oprofile_ReconfigRestLength(om, L)) != eslOK) return status;
1076   return eslOK;
1077 }
1078 
1079 /* Function:  p7_oprofile_ReconfigMSVLength()
1080  * Synopsis:  Set the target sequence length of the MSVFilter part of the model.
1081  * Incept:    SRE, Tue Dec 16 13:39:17 2008 [Janelia]
1082  *
1083  * Purpose:   Given an  already configured model <om>, quickly reset its
1084  *            expected length distribution for a new mean target sequence
1085  *            length of <L>, only for the part of the model that's used
1086  *            for the accelerated MSV filter.
1087  *
1088  *            The acceleration pipeline uses this to defer reconfiguring the
1089  *            length distribution of the main model, mostly because hmmscan
1090  *            reads the model in two pieces, MSV part first, then the rest.
1091  *
1092  * Returns:   <eslOK> on success.
1093  */
1094 int
p7_oprofile_ReconfigMSVLength(P7_OPROFILE * om,int L)1095 p7_oprofile_ReconfigMSVLength(P7_OPROFILE *om, int L)
1096 {
1097   om->tjb_b = unbiased_byteify(om, logf(3.0f / (float) (L+3)));
1098   return eslOK;
1099 }
1100 
1101 /* Function:  p7_oprofile_ReconfigRestLength()
1102  * Synopsis:  Set the target sequence length of the main profile.
1103  * Incept:    SRE, Tue Dec 16 13:41:30 2008 [Janelia]
1104  *
1105  * Purpose:   Given an  already configured model <om>, quickly reset its
1106  *            expected length distribution for a new mean target sequence
1107  *            length of <L>, for everything except the MSV filter part
1108  *            of the model.
1109  *
1110  *            Calling <p7_oprofile_ReconfigMSVLength()> then
1111  *            <p7_oprofile_ReconfigRestLength()> is equivalent to
1112  *            just calling <p7_oprofile_ReconfigLength()>. The two
1113  *            part version is used in the acceleration pipeline.
1114  *
1115  * Returns:   <eslOK> on success.
1116  */
1117 int
p7_oprofile_ReconfigRestLength(P7_OPROFILE * om,int L)1118 p7_oprofile_ReconfigRestLength(P7_OPROFILE *om, int L)
1119 {
1120   float pmove, ploop;
1121 
1122   pmove = (2.0f + om->nj) / ((float) L + 2.0f + om->nj); /* 2/(L+2) for sw; 3/(L+3) for fs */
1123   ploop = 1.0f - pmove;
1124 
1125   /* ForwardFilter() parameters: pspace floats */
1126   om->xf[p7O_N][p7O_LOOP] =  om->xf[p7O_C][p7O_LOOP] = om->xf[p7O_J][p7O_LOOP] = ploop;
1127   om->xf[p7O_N][p7O_MOVE] =  om->xf[p7O_C][p7O_MOVE] = om->xf[p7O_J][p7O_MOVE] = pmove;
1128 
1129   /* ViterbiFilter() parameters: lspace signed 16-bit ints */
1130   om->xw[p7O_N][p7O_MOVE] =  om->xw[p7O_C][p7O_MOVE] = om->xw[p7O_J][p7O_MOVE] = wordify(om, logf(pmove));
1131   /* om->xw[p7O_N][p7O_LOOP] =  om->xw[p7O_C][p7O_LOOP] = om->xw[p7O_J][p7O_LOOP] = wordify(om, logf(ploop)); */ /* 3nat approx in force: these stay 0 */
1132   /* om->ncj_roundoff        = (om->scale_w * logf(ploop)) - om->xw[p7O_N][p7O_LOOP];                         */ /* and this does too                  */
1133 
1134   om->L = L;
1135   return eslOK;
1136 }
1137 
1138 
1139 /* Function:  p7_oprofile_ReconfigMultihit()
1140  * Synopsis:  Quickly reconfig model into multihit mode for target length <L>.
1141  * Incept:    SRE, Thu Aug 21 10:04:07 2008 [Janelia]
1142  *
1143  * Purpose:   Given a profile <om> that's already been configured once,
1144  *            quickly reconfigure it into a multihit mode for target
1145  *            length <L>.
1146  *
1147  *            This gets called in domain definition, when we need to
1148  *            flip the model in and out of unihit mode to
1149  *            process individual domains.
1150  *
1151  * Note:      You can't just flip uni/multi mode alone, because that
1152  *            parameterization also affects target length
1153  *            modeling. You need to make sure uni vs. multi choice is
1154  *            made before the length model is set, and you need to
1155  *            make sure the length model is recalculated if you change
1156  *            the uni/multi mode. Hence, these functions call
1157  *            <p7_oprofile_ReconfigLength()>.
1158  */
1159 int
p7_oprofile_ReconfigMultihit(P7_OPROFILE * om,int L)1160 p7_oprofile_ReconfigMultihit(P7_OPROFILE *om, int L)
1161 {
1162   om->xf[p7O_E][p7O_MOVE] = 0.5;
1163   om->xf[p7O_E][p7O_LOOP] = 0.5;
1164   om->nj = 1.0f;
1165 
1166   om->xw[p7O_E][p7O_MOVE] = wordify(om, -eslCONST_LOG2);
1167   om->xw[p7O_E][p7O_LOOP] = wordify(om, -eslCONST_LOG2);
1168 
1169   return p7_oprofile_ReconfigLength(om, L);
1170 }
1171 
1172 /* Function:  p7_oprofile_ReconfigUnihit()
1173  * Synopsis:  Quickly reconfig model into unihit mode for target length <L>.
1174  * Incept:    SRE, Thu Aug 21 10:10:32 2008 [Janelia]
1175  *
1176  * Purpose:   Given a profile <om> that's already been configured once,
1177  *            quickly reconfigure it into a unihit mode for target
1178  *            length <L>.
1179  *
1180  *            This gets called in domain definition, when we need to
1181  *            flip the model in and out of unihit <L=0> mode to
1182  *            process individual domains.
1183  */
1184 int
p7_oprofile_ReconfigUnihit(P7_OPROFILE * om,int L)1185 p7_oprofile_ReconfigUnihit(P7_OPROFILE *om, int L)
1186 {
1187   om->xf[p7O_E][p7O_MOVE] = 1.0f;
1188   om->xf[p7O_E][p7O_LOOP] = 0.0f;
1189   om->nj = 0.0f;
1190 
1191   om->xw[p7O_E][p7O_MOVE] = 0;
1192   om->xw[p7O_E][p7O_LOOP] = -32768;
1193 
1194   return p7_oprofile_ReconfigLength(om, L);
1195 }
1196 /*------------ end, conversions to P7_OPROFILE ------------------*/
1197 
1198 /*******************************************************************
1199 *   3. Conversion from optimized P7_OPROFILE to compact score arrays
1200  *******************************************************************/
1201 
1202 /* Function:  p7_oprofile_GetFwdTransitionArray()
1203  * Synopsis:  Retrieve full 32-bit float transition probabilities from an
1204  *            optimized profile into a flat array
1205  *
1206  * Purpose:   Extract an array of <type> (e.g. p7O_II) transition probabilities
1207  *            from the underlying <om> profile. In SIMD implementations,
1208  *            these are striped and interleaved, making them difficult to
1209  *            directly access.
1210  *
1211  * Args:      <om>   - optimized profile, containing transition information
1212  *            <type> - transition type (e.g. p7O_II)
1213  *            <arr>  - preallocated array into which floats will be placed
1214  *
1215  * Returns:   <eslOK> on success.
1216  *
1217  * Throws:    (no abnormal error conditions)
1218  */
1219 int
p7_oprofile_GetFwdTransitionArray(const P7_OPROFILE * om,int type,float * arr)1220 p7_oprofile_GetFwdTransitionArray(const P7_OPROFILE *om, int type, float *arr )
1221 {
1222   int     nq  = p7O_NQF(om->M);     /* # of striped vectors needed            */
1223   int i, j;
1224   union { __m128 v; float x[4]; } tmp; /* used to align and read simd minivectors               */
1225 
1226 
1227   for (i=0; i<nq; i++) {
1228     // because DD transitions are held at the end of the tfv array
1229     tmp.v = om->tfv[ (type==p7O_DD ?  nq*7+i :  type+7*i) ];
1230     for (j=0; j<4; j++)
1231       if ( i+1+ j*nq < om->M+1)
1232         arr[i+1+ j*nq]      = tmp.x[j];
1233   }
1234 
1235   return eslOK;
1236 
1237 }
1238 
1239 /* Function:  p7_oprofile_GetSSVEmissionScoreArray()
1240  * Synopsis:  Retrieve MSV residue emission scores from an optimized
1241  *            profile into an array
1242  *
1243  * Purpose:   Extract an implicitly 2D array of 8-bit int SSV residue
1244  *            emission scores from an optimized profile <om>. <arr> must
1245  *            be allocated by the calling function to be of size
1246  *            ( om->abc->Kp * ( om->M  + 1 )), and indexing into the array
1247  *            is done as  [om->abc->Kp * i +  c ] for character c at
1248  *            position i.
1249  *
1250  *            In SIMD implementations, the residue scores are striped
1251  *            and interleaved, making them somewhat difficult to
1252  *            directly access. Faster access is desired, for example,
1253  *            in SSV back-tracking of a high-scoring diagonal
1254  *
1255  * Args:      <om>   - optimized profile, containing transition information
1256  *            <arr>  - preallocated array into which scores will be placed
1257  *
1258  * Returns:   <eslOK> on success.
1259  *
1260  * Throws:    (no abnormal error conditions)
1261  */
1262 int
p7_oprofile_GetSSVEmissionScoreArray(const P7_OPROFILE * om,uint8_t * arr)1263 p7_oprofile_GetSSVEmissionScoreArray(const P7_OPROFILE *om, uint8_t *arr )
1264 {
1265   int x, q, z, k;
1266   union { __m128i v; uint8_t i[16]; } tmp; /* used to align and read simd minivectors           */
1267   int      M   = om->M;    /* length of the query                                          */
1268   int      K   = om->abc->Kp;
1269   int      nq  = p7O_NQB(M);     /* segment length; total # of striped vectors needed            */
1270   int cell_cnt = (om->M + 1) * K;
1271 
1272   for (x = 0; x < K ; x++) {
1273     for (q = 0, k = 1; q < nq; q++, k++) {
1274       tmp.v = om->rbv[x][q];
1275       for (z=0; z<16; z++)
1276         if (  (K * (k+z*nq) + x) < cell_cnt)
1277           arr[ K * (k+z*nq) + x ] = tmp.i[z];
1278     }
1279   }
1280 
1281   return eslOK;
1282 }
1283 
1284 
1285 /* Function:  p7_oprofile_GetFwdEmissionScoreArray()
1286  * Synopsis:  Retrieve Fwd (float) residue emission scores from an optimized
1287  *            profile into an array
1288  *
1289  * Purpose:   Extract an implicitly 2D array of 32-bit float Fwd residue
1290  *            emission scores from an optimized profile <om>. <arr> must
1291  *            be allocated by the calling function to be of size
1292  *            ( om->abc->Kp * ( om->M  + 1 )), and indexing into the array
1293  *            is done as  [om->abc->Kp * i +  c ] for character c at
1294  *            position i.
1295  *
1296  *            In SIMD implementations, the residue scores are striped
1297  *            and interleaved, making them somewhat difficult to
1298  *            directly access.
1299  *
1300  * Args:      <om>   - optimized profile, containing transition information
1301  *            <arr>  - preallocated array into which scores will be placed
1302  *
1303  * Returns:   <eslOK> on success.
1304  *
1305  * Throws:    (no abnormal error conditions)
1306  */
1307 int
p7_oprofile_GetFwdEmissionScoreArray(const P7_OPROFILE * om,float * arr)1308 p7_oprofile_GetFwdEmissionScoreArray(const P7_OPROFILE *om, float *arr )
1309 {
1310   int x, q, z, k;
1311   union { __m128 v; float f[4]; } tmp; /* used to align and read simd minivectors               */
1312   int      M   = om->M;    /* length of the query                                          */
1313   int      K   = om->abc->Kp;
1314   int      nq  = p7O_NQF(M);     /* segment length; total # of striped vectors needed            */
1315   int cell_cnt = (om->M + 1) * K;
1316 
1317   for (x = 0; x < K; x++) {
1318       for (q = 0, k = 1; q < nq; q++, k++) {
1319         tmp.v = esl_sse_logf(om->rfv[x][q]);
1320         for (z = 0; z < 4; z++)
1321           if (  (K * (k+z*nq) + x) < cell_cnt)
1322             arr[ K * (k+z*nq) + x ] = tmp.f[z];
1323       }
1324   }
1325 
1326   return eslOK;
1327 }
1328 
1329 /* Function:  p7_oprofile_GetFwdEmissionArray()
1330  * Synopsis:  Retrieve Fwd (float) residue emission values from an optimized
1331  *            profile into an array
1332  *
1333  * Purpose:   Extract an implicitly 2D array of 32-bit float Fwd residue
1334  *            emission values from an optimized profile <om>, converting
1335  *            back to emission values based on the background. <arr> must
1336  *            be allocated by the calling function to be of size
1337  *            ( om->abc->Kp * ( om->M  + 1 )), and indexing into the array
1338  *            is done as  [om->abc->Kp * i +  c ] for character c at
1339  *            position i.
1340  *
1341  *            In SIMD implementations, the residue scores are striped
1342  *            and interleaved, making them somewhat difficult to
1343  *            directly access.
1344  *
1345  * Args:      <om>   - optimized profile, containing transition information
1346  *            <bg>   - background frequencies
1347  *            <arr>  - preallocated array into which scores will be placed
1348  *
1349  * Returns:   <eslOK> on success.
1350  *
1351  * Throws:    (no abnormal error conditions)
1352  */
1353 int
p7_oprofile_GetFwdEmissionArray(const P7_OPROFILE * om,P7_BG * bg,float * arr)1354 p7_oprofile_GetFwdEmissionArray(const P7_OPROFILE *om, P7_BG *bg, float *arr )
1355 {
1356   int x, q, z, k;
1357   union { __m128 v; float f[4]; } tmp; /* used to align and read simd minivectors               */
1358   int      M   = om->M;    /* length of the query                                          */
1359   int      Kp  = om->abc->Kp;
1360   int      K   = om->abc->K;
1361   int      nq  = p7O_NQF(M);     /* segment length; total # of striped vectors needed            */
1362   int cell_cnt = (om->M + 1) * Kp;
1363 
1364   for (x = 0; x < K; x++) {
1365       for (q = 0, k = 1; q < nq; q++, k++) {
1366         tmp.v = om->rfv[x][q];
1367         for (z = 0; z < 4; z++)
1368           if (  (Kp * (k+z*nq) + x) < cell_cnt)
1369             arr[ Kp * (k+z*nq) + x ] = tmp.f[z] * bg->f[x];
1370       }
1371   }
1372 
1373   //degeneracy emissions for each position
1374   for (x = 0; x <= M; x++)
1375     esl_abc_FExpectScVec(om->abc, arr+Kp*x, bg->f);
1376 
1377   return eslOK;
1378 }
1379 
1380 
1381 /*------------ end, conversions from P7_OPROFILE ------------------*/
1382 
1383 
1384 /*****************************************************************
1385  * 4. Debugging and development utilities.
1386  *****************************************************************/
1387 
1388 
1389 /* oprofile_dump_mf()
1390  *
1391  * Dump the MSVFilter part of a profile <om> to <stdout>.
1392  */
1393 static int
oprofile_dump_mf(FILE * fp,const P7_OPROFILE * om)1394 oprofile_dump_mf(FILE *fp, const P7_OPROFILE *om)
1395 {
1396   int     M   = om->M;		/* length of the query                                          */
1397   int     nq  = p7O_NQB(M);     /* segment length; total # of striped vectors needed            */
1398   int     x;			/* counter over residues                                        */
1399   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
1400   int     k;			/* counter over nodes 1..M                                      */
1401   int     z;			/* counter within elements of one SIMD minivector               */
1402   union { __m128i v; uint8_t i[16]; } tmp; /* used to align and read simd minivectors           */
1403 
1404   /* Header (rearranged column numbers, in the vectors)  */
1405   fprintf(fp, "     ");
1406   for (k =1, q = 0; q < nq; q++, k++)
1407     {
1408       fprintf(fp, "[ ");
1409       for (z = 0; z < 16; z++)
1410 	if (k+z*nq <= M) fprintf(fp, "%4d ", k+z*nq);
1411 	else             fprintf(fp, "%4s ", "xx");
1412       fprintf(fp, "]");
1413     }
1414   fprintf(fp, "\n");
1415 
1416   /* Table of residue emissions */
1417   for (x = 0; x < om->abc->Kp; x++)
1418     {
1419       fprintf(fp, "(%c): ", om->abc->sym[x]);
1420 
1421       for (q = 0; q < nq; q++)
1422 	{
1423 	  fprintf(fp, "[ ");
1424 	  _mm_store_si128(&tmp.v, om->rbv[x][q]);
1425 	  for (z = 0; z < 16; z++) fprintf(fp, "%4d ", tmp.i[z]);
1426 	  fprintf(fp, "]");
1427 	}
1428       fprintf(fp, "\n");
1429     }
1430   fprintf(fp, "\n");
1431 
1432   fprintf(fp, "t_EC,EJ:    %4d\n",  om->tec_b);
1433   fprintf(fp, "t_NB,JB,CT: %4d\n",  om->tjb_b);
1434   fprintf(fp, "t_BMk:      %4d\n",  om->tbm_b);
1435   fprintf(fp, "scale:      %.2f\n", om->scale_b);
1436   fprintf(fp, "base:       %4d\n",  om->base_b);
1437   fprintf(fp, "bias:       %4d\n",  om->bias_b);
1438   fprintf(fp, "Q:          %4d\n",  nq);
1439   fprintf(fp, "M:          %4d\n",  M);
1440   return eslOK;
1441 }
1442 
1443 
1444 
1445 /* oprofile_dump_vf()
1446  *
1447  * Dump the ViterbiFilter part of a profile <om> to <stdout>.
1448  */
1449 static int
oprofile_dump_vf(FILE * fp,const P7_OPROFILE * om)1450 oprofile_dump_vf(FILE *fp, const P7_OPROFILE *om)
1451 {
1452   int     M   = om->M;		/* length of the query                                          */
1453   int     nq  = p7O_NQW(M);     /* segment length; total # of striped vectors needed            */
1454   int     x;			/* counter over residues                                        */
1455   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
1456   int     k;			/* the usual counter over model nodes 1..M                      */
1457   int     kb;			/* possibly offset base k for loading om's TSC vectors          */
1458   int     z;			/* counter within elements of one SIMD minivector               */
1459   int     t;			/* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
1460   int     j;			/* counter in interleaved vector arrays in the profile          */
1461   union { __m128i v; int16_t i[8]; } tmp; /* used to align and read simd minivectors           */
1462 
1463   /* Emission score header (rearranged column numbers, in the vectors)  */
1464   fprintf(fp, "     ");
1465   for (k =1, q = 0; q < nq; q++, k++)
1466     {
1467       fprintf(fp, "[ ");
1468       for (z = 0; z < 8; z++)
1469 	if (k+z*nq <= M) fprintf(fp, "%6d ", k+z*nq);
1470 	else             fprintf(fp, "%6s ", "xx");
1471       fprintf(fp, "]");
1472     }
1473   fprintf(fp, "\n");
1474 
1475   /* Table of residue emissions */
1476   for (x = 0; x < om->abc->Kp; x++)
1477     {
1478       fprintf(fp, "(%c): ", om->abc->sym[x]);
1479 
1480       /* Match emission scores (insert emissions are assumed zero by design) */
1481       for (q = 0; q < nq; q++)
1482 	{
1483 	  fprintf(fp, "[ ");
1484 	  _mm_store_si128(&tmp.v, om->rwv[x][q]);
1485 	  for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
1486 	  fprintf(fp, "]");
1487 	}
1488       fprintf(fp, "\n");
1489     }
1490   fprintf(fp, "\n");
1491 
1492   /* Transitions */
1493   for (t = p7O_BM; t <= p7O_II; t++)
1494     {
1495       switch (t) {
1496       case p7O_BM: fprintf(fp, "\ntBM: "); break;
1497       case p7O_MM: fprintf(fp, "\ntMM: "); break;
1498       case p7O_IM: fprintf(fp, "\ntIM: "); break;
1499       case p7O_DM: fprintf(fp, "\ntDM: "); break;
1500       case p7O_MD: fprintf(fp, "\ntMD: "); break;
1501       case p7O_MI: fprintf(fp, "\ntMI: "); break;
1502       case p7O_II: fprintf(fp, "\ntII: "); break;
1503       }
1504 
1505       for (k = 1, q = 0; q < nq; q++, k++)
1506 	{
1507 	  switch (t) {
1508 	  case p7O_BM: kb = k;                 break;
1509 	  case p7O_MM: kb = (1 + (nq+k-2)) % nq; break; /* MM, DM, IM quads rotated by +1  */
1510 	  case p7O_IM: kb = (1 + (nq+k-2)) % nq; break;
1511 	  case p7O_DM: kb = (1 + (nq+k-2)) % nq; break;
1512 	  case p7O_MD: kb = k;                 break; /* the remaining ones are straight up  */
1513 	  case p7O_MI: kb = k;                 break;
1514 	  case p7O_II: kb = k;                 break;
1515 	  }
1516 	  fprintf(fp, "[ ");
1517 	  for (z = 0; z < 8; z++)
1518 	    if (kb+z*nq <= M) fprintf(fp, "%6d ", kb+z*nq);
1519 	    else              fprintf(fp, "%6s ", "xx");
1520 	  fprintf(fp, "]");
1521 	}
1522       fprintf(fp, "\n     ");
1523       for (q = 0; q < nq; q++)
1524 	{
1525 	  fprintf(fp, "[ ");
1526 	  _mm_store_si128(&tmp.v, om->twv[q*7 + t]);
1527 	  for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
1528 	  fprintf(fp, "]");
1529 	}
1530       fprintf(fp, "\n");
1531     }
1532 
1533   /* DD transitions */
1534   fprintf(fp, "\ntDD: ");
1535   for (k =1, q = 0; q < nq; q++, k++)
1536     {
1537       fprintf(fp, "[ ");
1538       for (z = 0; z < 8; z++)
1539 	if (k+z*nq <= M) fprintf(fp, "%6d ", k+z*nq);
1540 	else             fprintf(fp, "%6s ", "xx");
1541       fprintf(fp, "]");
1542     }
1543   fprintf(fp, "\n     ");
1544   for (j = nq*7, q = 0; q < nq; q++, j++)
1545     {
1546       fprintf(fp, "[ ");
1547       _mm_store_si128(&tmp.v, om->twv[j]);
1548       for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
1549       fprintf(fp, "]");
1550     }
1551   fprintf(fp, "\n");
1552 
1553   fprintf(fp, "E->C: %6d    E->J: %6d\n", om->xw[p7O_E][p7O_MOVE], om->xw[p7O_E][p7O_LOOP]);
1554   fprintf(fp, "N->B: %6d    N->N: %6d\n", om->xw[p7O_N][p7O_MOVE], om->xw[p7O_N][p7O_LOOP]);
1555   fprintf(fp, "J->B: %6d    J->J: %6d\n", om->xw[p7O_J][p7O_MOVE], om->xw[p7O_J][p7O_LOOP]);
1556   fprintf(fp, "C->T: %6d    C->C: %6d\n", om->xw[p7O_C][p7O_MOVE], om->xw[p7O_C][p7O_LOOP]);
1557 
1558   fprintf(fp, "scale: %6.2f\n", om->scale_w);
1559   fprintf(fp, "base:  %6d\n",   om->base_w);
1560   fprintf(fp, "bound: %6d\n",   om->ddbound_w);
1561   fprintf(fp, "Q:     %6d\n",   nq);
1562   fprintf(fp, "M:     %6d\n",   M);
1563   return eslOK;
1564 }
1565 
1566 
1567 /* oprofile_dump_fb()
1568  *
1569  * Dump the Forward/Backward part of a profile <om> to <stdout>.
1570  * <width>, <precision> control the floating point output:
1571  *  8,5 is a reasonable choice for prob space,
1572  *  5,2 is reasonable for log space.
1573  */
1574 static int
oprofile_dump_fb(FILE * fp,const P7_OPROFILE * om,int width,int precision)1575 oprofile_dump_fb(FILE *fp, const P7_OPROFILE *om, int width, int precision)
1576 {
1577   int     M   = om->M;		/* length of the query                                          */
1578   int     nq  = p7O_NQF(M);     /* segment length; total # of striped vectors needed            */
1579   int     x;			/* counter over residues                                        */
1580   int     q;			/* q counts over total # of striped vectors, 0..nq-1            */
1581   int     k;			/* the usual counter over model nodes 1..M                      */
1582   int     kb;			/* possibly offset base k for loading om's TSC vectors          */
1583   int     z;			/* counter within elements of one SIMD minivector               */
1584   int     t;			/* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
1585   int     j;			/* counter in interleaved vector arrays in the profile          */
1586   union { __m128 v; float x[4]; } tmp; /* used to align and read simd minivectors               */
1587 
1588   /* Residue emissions */
1589   for (x = 0; x < om->abc->Kp; x++)
1590     {
1591       fprintf(fp, "(%c): ", om->abc->sym[x]);
1592       for (k =1, q = 0; q < nq; q++, k++)
1593 	{
1594 	  fprintf(fp, "[ ");
1595 	  for (z = 0; z < 4; z++)
1596 	    if (k+z*nq <= M) fprintf(fp, "%*d ", width, k+z*nq);
1597 	    else             fprintf(fp, "%*s ", width, "xx");
1598 	  fprintf(fp, "]");
1599 	}
1600       fprintf(fp, "\nmat: ");
1601       for (q = 0; q < nq; q++)
1602 	{
1603 	  fprintf(fp, "[ ");
1604 	  tmp.v = om->rfv[x][q];
1605 	  for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
1606 	  fprintf(fp, "]");
1607 	}
1608       fprintf(fp, "\n\n");
1609     }
1610 
1611   /* Transitions */
1612   for (t = p7O_BM; t <= p7O_II; t++)
1613     {
1614       switch (t) {
1615       case p7O_BM: fprintf(fp, "\ntBM: "); break;
1616       case p7O_MM: fprintf(fp, "\ntMM: "); break;
1617       case p7O_IM: fprintf(fp, "\ntIM: "); break;
1618       case p7O_DM: fprintf(fp, "\ntDM: "); break;
1619       case p7O_MD: fprintf(fp, "\ntMD: "); break;
1620       case p7O_MI: fprintf(fp, "\ntMI: "); break;
1621       case p7O_II: fprintf(fp, "\ntII: "); break;
1622       }
1623       for (k = 1, q = 0; q < nq; q++, k++)
1624 	{
1625 	  switch (t) {
1626 	  case p7O_MM:/* MM, DM, IM quads rotated by +1  */
1627 	  case p7O_IM:
1628 	  case p7O_DM:
1629 		  kb = (1 + (nq+k-2)) % nq;
1630 		  break;
1631 	  case p7O_BM:/* the remaining ones are straight up  */
1632 	  case p7O_MD:
1633 	  case p7O_MI:
1634 	  case p7O_II:
1635 		  kb = k;
1636 		  break;
1637 	  }
1638 	  fprintf(fp, "[ ");
1639 	  for (z = 0; z < 4; z++)
1640 	    if (kb+z*nq <= M) fprintf(fp, "%*d ", width, kb+z*nq);
1641 	    else              fprintf(fp, "%*s ", width, "xx");
1642 	  fprintf(fp, "]");
1643 	}
1644       fprintf(fp, "\n     ");
1645       for (q = 0; q < nq; q++)
1646 	{
1647 	  fprintf(fp, "[ ");
1648 	  tmp.v = om->tfv[q*7 + t];
1649 	  for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
1650 	  fprintf(fp, "]");
1651 	}
1652       fprintf(fp, "\n");
1653     }
1654 
1655   /* DD transitions */
1656   fprintf(fp, "\ntDD: ");
1657   for (k =1, q = 0; q < nq; q++, k++)
1658     {
1659       fprintf(fp, "[ ");
1660       for (z = 0; z < 4; z++)
1661 	if (k+z*nq <= M) fprintf(fp, "%*d ", width, k+z*nq);
1662 	else             fprintf(fp, "%*s ", width, "xx");
1663       fprintf(fp, "]");
1664     }
1665   fprintf(fp, "\n     ");
1666   for (j = nq*7, q = 0; q < nq; q++, j++)
1667     {
1668       fprintf(fp, "[ ");
1669       tmp.v = om->tfv[j];
1670       for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
1671       fprintf(fp, "]");
1672     }
1673   fprintf(fp, "\n");
1674 
1675   /* Specials */
1676   fprintf(fp, "E->C: %*.*f    E->J: %*.*f\n", width, precision, om->xf[p7O_E][p7O_MOVE], width, precision, om->xf[p7O_E][p7O_LOOP]);
1677   fprintf(fp, "N->B: %*.*f    N->N: %*.*f\n", width, precision, om->xf[p7O_N][p7O_MOVE], width, precision, om->xf[p7O_N][p7O_LOOP]);
1678   fprintf(fp, "J->B: %*.*f    J->J: %*.*f\n", width, precision, om->xf[p7O_J][p7O_MOVE], width, precision, om->xf[p7O_J][p7O_LOOP]);
1679   fprintf(fp, "C->T: %*.*f    C->C: %*.*f\n", width, precision, om->xf[p7O_C][p7O_MOVE], width, precision, om->xf[p7O_C][p7O_LOOP]);
1680   fprintf(fp, "Q:     %d\n",   nq);
1681   fprintf(fp, "M:     %d\n",   M);
1682   return eslOK;
1683 }
1684 
1685 
1686 /* Function:  p7_oprofile_Dump()
1687  * Synopsis:  Dump internals of a <P7_OPROFILE>
1688  * Incept:    SRE, Thu Dec 13 08:49:30 2007 [Janelia]
1689  *
1690  * Purpose:   Dump the internals of <P7_OPROFILE> structure <om>
1691  *            to stream <fp>; generally for testing or debugging
1692  *            purposes.
1693  *
1694  * Args:      fp   - output stream (often stdout)
1695  *            om   - optimized profile to dump
1696  *
1697  * Returns:   <eslOK> on success.
1698  *
1699  * Throws:    (no abnormal error conditions)
1700  */
1701 int
p7_oprofile_Dump(FILE * fp,const P7_OPROFILE * om)1702 p7_oprofile_Dump(FILE *fp, const P7_OPROFILE *om)
1703 {
1704   int status;
1705 
1706   fprintf(fp, "Dump of a <P7_OPROFILE> ::\n");
1707 
1708   fprintf(fp, "\n  -- float part, odds ratios for Forward/Backward:\n");
1709   if ((status = oprofile_dump_fb(fp, om, 8, 5)) != eslOK) return status;
1710 
1711   fprintf(fp, "\n  -- sword part, log odds for ViterbiFilter(): \n");
1712   if ((status = oprofile_dump_vf(fp, om))       != eslOK) return status;
1713 
1714   fprintf(fp, "\n  -- uchar part, log odds for MSVFilter(): \n");
1715   if ((status = oprofile_dump_mf(fp, om))       != eslOK) return status;
1716 
1717   return eslOK;
1718 }
1719 
1720 
1721 /* Function:  p7_oprofile_Sample()
1722  * Synopsis:  Sample a random profile.
1723  * Incept:    SRE, Wed Jul 30 13:11:52 2008 [Janelia]
1724  *
1725  * Purpose:   Sample a random profile of <M> nodes for alphabet <abc>,
1726  *            using <r> as the source of random numbers. Parameterize
1727  *            it for generation of target sequences of mean length
1728  *            <L>. Calculate its log-odds scores using background
1729  *            model <bg>.
1730  *
1731  * Args:      r       - random number generator
1732  *            abc     - emission alphabet
1733  *            bg      - background frequency model
1734  *            M       - size of sampled profile, in nodes
1735  *            L       - configured target seq mean length
1736  *            opt_hmm - optRETURN: sampled HMM
1737  *            opt_gm  - optRETURN: sampled normal profile
1738  *            opt_om  - RETURN: optimized profile
1739  *
1740  * Returns:   <eslOK> on success.
1741  *
1742  * Throws:    (no abnormal error conditions)
1743  */
1744 int
p7_oprofile_Sample(ESL_RANDOMNESS * r,const ESL_ALPHABET * abc,const P7_BG * bg,int M,int L,P7_HMM ** opt_hmm,P7_PROFILE ** opt_gm,P7_OPROFILE ** ret_om)1745 p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L,
1746 		   P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om)
1747 {
1748   P7_HMM         *hmm  = NULL;
1749   P7_PROFILE     *gm   = NULL;
1750   P7_OPROFILE    *om   = NULL;
1751   int             status;
1752 
1753   if ((gm = p7_profile_Create (M, abc)) == NULL)  { status = eslEMEM; goto ERROR; }
1754   if ((om = p7_oprofile_Create(M, abc)) == NULL)  { status = eslEMEM; goto ERROR; }
1755 
1756   if ((status = p7_hmm_Sample(r, M, abc, &hmm))             != eslOK) goto ERROR;
1757   if ((status = p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)) != eslOK) goto ERROR;
1758   if ((status = p7_oprofile_Convert(gm, om))                != eslOK) goto ERROR;
1759   if ((status = p7_oprofile_ReconfigLength(om, L))          != eslOK) goto ERROR;
1760 
1761   if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm);
1762   if (opt_gm  != NULL) *opt_gm  = gm;  else p7_profile_Destroy(gm);
1763   *ret_om = om;
1764   return eslOK;
1765 
1766  ERROR:
1767   if (opt_hmm != NULL) *opt_hmm = NULL;
1768   if (opt_gm  != NULL) *opt_gm  = NULL;
1769   *ret_om = NULL;
1770   return status;
1771 }
1772 
1773 
1774 /* Function:  p7_oprofile_Compare()
1775  * Synopsis:  Compare two optimized profiles for equality.
1776  * Incept:    SRE, Wed Jan 21 13:29:10 2009 [Janelia]
1777  *
1778  * Purpose:   Compare the contents of <om1> and <om2>; return
1779  *            <eslOK> if they are effectively identical profiles,
1780  *            or <eslFAIL> if not.
1781  *
1782  *            Floating point comparisons are done to a tolerance
1783  *            of <tol> using <esl_FCompare()>.
1784  *
1785  *            If a comparison fails, an informative error message is
1786  *            left in <errmsg> to indicate why.
1787  *
1788  *            Internal allocation sizes are not compared, only the
1789  *            data.
1790  *
1791  * Args:      om1    - one optimized profile to compare
1792  *            om2    - the other
1793  *            tol    - floating point comparison tolerance; see <esl_FCompare()>
1794  *            errmsg - ptr to array of at least <eslERRBUFSIZE> characters.
1795  *
1796  * Returns:   <eslOK> on effective equality;  <eslFAIL> on difference.
1797  */
1798 int
p7_oprofile_Compare(const P7_OPROFILE * om1,const P7_OPROFILE * om2,float tol,char * errmsg)1799 p7_oprofile_Compare(const P7_OPROFILE *om1, const P7_OPROFILE *om2, float tol, char *errmsg)
1800 {
1801   int Q4  = p7O_NQF(om1->M);
1802   int Q8  = p7O_NQW(om1->M);
1803   int Q16 = p7O_NQB(om1->M);
1804   int q, r, x, y;
1805   union { __m128i v; uint8_t c[16]; } a16, b16;
1806   union { __m128i v; int16_t w[8];  } a8,  b8;
1807   union { __m128  v; float   x[4];  } a4,  b4;
1808 
1809   if (om1->mode      != om2->mode)      ESL_FAIL(eslFAIL, errmsg, "comparison failed: mode");
1810   if (om1->L         != om2->L)         ESL_FAIL(eslFAIL, errmsg, "comparison failed: L");
1811   if (om1->M         != om2->M)         ESL_FAIL(eslFAIL, errmsg, "comparison failed: M");
1812   if (om1->nj        != om2->nj)        ESL_FAIL(eslFAIL, errmsg, "comparison failed: nj");
1813   if (om1->abc->type != om2->abc->type) ESL_FAIL(eslFAIL, errmsg, "comparison failed: alphabet type");
1814 
1815   /* MSVFilter part */
1816   for (x = 0; x < om1->abc->Kp; x++)
1817     for (q = 0; q < Q16; q++)
1818       {
1819 	a16.v = om1->rbv[x][q]; b16.v = om2->rbv[x][q];
1820 	for (r = 0; r < 16; r++) if (a16.c[r] != b16.c[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rb[%d] elem %d", q, r);
1821       }
1822   if (om1->tbm_b     != om2->tbm_b)     ESL_FAIL(eslFAIL, errmsg, "comparison failed: tbm_b");
1823   if (om1->tec_b     != om2->tec_b)     ESL_FAIL(eslFAIL, errmsg, "comparison failed: tec_b");
1824   if (om1->tjb_b     != om2->tjb_b)     ESL_FAIL(eslFAIL, errmsg, "comparison failed: tjb_b");
1825   if (om1->scale_b   != om2->scale_b)   ESL_FAIL(eslFAIL, errmsg, "comparison failed: scale_b");
1826   if (om1->base_b    != om2->base_b)    ESL_FAIL(eslFAIL, errmsg, "comparison failed: base_b");
1827   if (om1->bias_b    != om2->bias_b)    ESL_FAIL(eslFAIL, errmsg, "comparison failed: bias_b");
1828 
1829   /* ViterbiFilter() part */
1830   for (x = 0; x < om1->abc->Kp; x++)
1831     for (q = 0; q < Q8; q++)
1832       {
1833 	a8.v = om1->rwv[x][q]; b8.v = om2->rwv[x][q];
1834 	for (r = 0; r < 8; r++) if (a8.w[r] != b8.w[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rw[%d] elem %d", q, r);
1835       }
1836   for (q = 0; q < 8*Q16; q++)
1837     {
1838       a8.v = om1->twv[q]; b8.v = om2->twv[q];
1839       for (r = 0; r < 8; r++) if (a8.w[r] != b8.w[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tw[%d] elem %d", q, r);
1840     }
1841   for (x = 0; x < p7O_NXSTATES; x++)
1842     for (y = 0; y < p7O_NXTRANS; y++)
1843       if (om1->xw[x][y] != om2->xw[x][y]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: xw[%d][%d]", x, y);
1844 
1845   if (om1->scale_w   != om2->scale_w)   ESL_FAIL(eslFAIL, errmsg, "comparison failed: scale");
1846   if (om1->base_w    != om2->base_w)    ESL_FAIL(eslFAIL, errmsg, "comparison failed: base");
1847   if (om1->ddbound_w != om2->ddbound_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: ddbound_w");
1848 
1849   /* Forward/Backward part */
1850   for (x = 0; x < om1->abc->Kp; x++)
1851     for (q = 0; q < Q4; q++)
1852       {
1853 	a4.v = om1->rfv[x][q]; b4.v = om2->rfv[x][q];
1854 	for (r = 0; r < 4; r++) if (esl_FCompare(a4.x[r], b4.x[r], tol) != eslOK)  ESL_FAIL(eslFAIL, errmsg, "comparison failed: rf[%d] elem %d", q, r);
1855       }
1856   for (q = 0; q < 8*Q4; q++)
1857     {
1858       a4.v = om1->tfv[q]; b4.v = om2->tfv[q];
1859       for (r = 0; r < 4; r++) if (a4.x[r] != b4.x[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tf[%d] elem %d", q, r);
1860     }
1861   for (x = 0; x < p7O_NXSTATES; x++)
1862     if (esl_vec_FCompare(om1->xf[x], om2->xf[x], p7O_NXTRANS, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: xf[%d] vector", x);
1863 
1864    for (x = 0; x < p7_NOFFSETS; x++)
1865      if (om1->offs[x] != om2->offs[x]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: offs[%d]", x);
1866 
1867    if (esl_strcmp(om1->name,      om2->name)      != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: name");
1868    if (esl_strcmp(om1->acc,       om2->acc)       != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: acc");
1869    if (esl_strcmp(om1->desc,      om2->desc)      != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: desc");
1870    if (esl_strcmp(om1->rf,        om2->rf)        != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: ref");
1871    if (esl_strcmp(om1->mm,        om2->mm)        != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: mm");
1872    if (esl_strcmp(om1->cs,        om2->cs)        != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: cs");
1873    if (esl_strcmp(om1->consensus, om2->consensus) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: consensus");
1874 
1875    if (esl_vec_FCompare(om1->evparam, om2->evparam, p7_NEVPARAM, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: evparam vector");
1876    if (esl_vec_FCompare(om1->cutoff,  om2->cutoff,  p7_NCUTOFFS, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: cutoff vector");
1877    if (esl_vec_FCompare(om1->compo,   om2->compo,   p7_MAXABET,  tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: compo vector");
1878 
1879    return eslOK;
1880 }
1881 
1882 
1883 /* Function:  p7_profile_SameAsMF()
1884  * Synopsis:  Set a generic profile's scores to give MSV scores.
1885  * Incept:    SRE, Wed Jul 30 13:42:49 2008 [Janelia]
1886  *
1887  * Purpose:   Set a generic profile's scores so that the normal <dp_generic> DP
1888  *            algorithms will give the same score as <p7_MSVFilter()>:
1889  *            all t_MM scores = 0; all other core transitions = -inf;
1890  *            multihit local mode; all <t_BMk> entries uniformly <log 2/(M(M+1))>;
1891  *            <tCC, tNN, tJJ> scores 0; total approximated later as -3;
1892  *            rounded in the same way as the 8-bit limited precision.
1893  *
1894  * Returns:   <eslOK> on success.
1895  */
1896 int
p7_profile_SameAsMF(const P7_OPROFILE * om,P7_PROFILE * gm)1897 p7_profile_SameAsMF(const P7_OPROFILE *om, P7_PROFILE *gm)
1898 {
1899   int    k,x;
1900   float  tbm = roundf(om->scale_b * (log(2.0f / ((float) gm->M * (float) (gm->M+1)))));
1901 
1902   /* Transitions */
1903   esl_vec_FSet(gm->tsc, p7P_NTRANS * gm->M, -eslINFINITY);
1904   for (k = 1; k <  gm->M; k++) p7P_TSC(gm, k, p7P_MM) = 0.0f;
1905   for (k = 0; k <  gm->M; k++) p7P_TSC(gm, k, p7P_BM) = tbm;
1906 
1907   /* Emissions */
1908   for (x = 0; x < gm->abc->Kp; x++)
1909     for (k = 0; k <= gm->M; k++)
1910       {
1911 	gm->rsc[x][k*2]   = (gm->rsc[x][k*2] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_b * gm->rsc[x][k*2]);
1912 	gm->rsc[x][k*2+1] = 0;	/* insert score: VF makes it zero no matter what. */
1913       }
1914 
1915    /* Specials */
1916   for (k = 0; k < p7P_NXSTATES; k++)
1917     for (x = 0; x < p7P_NXTRANS; x++)
1918       gm->xsc[k][x] = (gm->xsc[k][x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_b * gm->xsc[k][x]);
1919 
1920   /* NN, CC, JJ hardcoded 0 in limited precision */
1921   gm->xsc[p7P_N][p7P_LOOP] =  gm->xsc[p7P_J][p7P_LOOP] =  gm->xsc[p7P_C][p7P_LOOP] = 0;
1922 
1923   return eslOK;
1924 }
1925 
1926 
1927 /* Function:  p7_profile_SameAsVF()
1928  * Synopsis:  Round a generic profile to match ViterbiFilter scores.
1929  * Incept:    SRE, Wed Jul 30 13:37:48 2008 [Janelia]
1930  *
1931  * Purpose:   Round all the scores in a generic (lspace) <P7_PROFILE> <gm> in
1932  *            exactly the same way that the scores in the
1933  *            <P7_OPROFILE> <om> were rounded. Then we can test that two profiles
1934  *            give identical internal scores in testing, say,
1935  *            <p7_ViterbiFilter()> against <p7_GViterbi()>.
1936  *
1937  *            The 3nat approximation is used; NN=CC=JJ=0, and 3 nats are
1938  *            subtracted at the end to account for their contribution.
1939  *
1940  *            To convert a generic Viterbi score <gsc> calculated with this profile
1941  *            to a nat score that should match ViterbiFilter() exactly,
1942  *            do <(gsc / om->scale_w) - 3.0>.
1943  *
1944  *            <gm> must be the same profile that <om> was constructed from.
1945  *
1946  *            <gm> is irrevocably altered by this call.
1947  *
1948  *            Do not call this more than once on any given <gm>!
1949  *
1950  * Args:      <om>  - optimized profile, containing scale information.
1951  *            <gm>  - generic profile that <om> was built from.
1952  *
1953  * Returns:   <eslOK> on success.
1954  *
1955  * Throws:    (no abnormal error conditions)
1956  */
1957 int
p7_profile_SameAsVF(const P7_OPROFILE * om,P7_PROFILE * gm)1958 p7_profile_SameAsVF(const P7_OPROFILE *om, P7_PROFILE *gm)
1959 {
1960   int k;
1961   int x;
1962 
1963   /* Transitions */
1964   /* <= -eslINFINITY test is used solely to silence compiler. really testing == -eslINFINITY */
1965   for (x = 0; x < gm->M*p7P_NTRANS; x++)
1966     gm->tsc[x] = (gm->tsc[x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->tsc[x]);
1967 
1968   /* Enforce the rule that no II can be 0; max of -1 */
1969   for (x = p7P_II; x < gm->M*p7P_NTRANS; x += p7P_NTRANS)
1970     if (gm->tsc[x] == 0.0) gm->tsc[x] = -1.0;
1971 
1972   /* Emissions */
1973   for (x = 0; x < gm->abc->Kp; x++)
1974     for (k = 0; k <= gm->M; k++)
1975       {
1976 	gm->rsc[x][k*2]   = (gm->rsc[x][k*2]   <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->rsc[x][k*2]);
1977 	gm->rsc[x][k*2+1] = 0.0;	/* insert score: VF makes it zero no matter what. */
1978       }
1979 
1980   /* Specials */
1981   for (k = 0; k < p7P_NXSTATES; k++)
1982     for (x = 0; x < p7P_NXTRANS; x++)
1983       gm->xsc[k][x] = (gm->xsc[k][x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->xsc[k][x]);
1984 
1985   /* 3nat approximation: NN, CC, JJ hardcoded 0 in limited precision */
1986   gm->xsc[p7P_N][p7P_LOOP] =  gm->xsc[p7P_J][p7P_LOOP] =  gm->xsc[p7P_C][p7P_LOOP] = 0.0;
1987 
1988   return eslOK;
1989 }
1990 /*------------ end, P7_OPROFILE debugging tools  ----------------*/
1991 
1992 
1993 
1994 /*****************************************************************
1995  * 5. Benchmark driver.
1996  *****************************************************************/
1997 
1998 #ifdef p7OPROFILE_BENCHMARK
1999 /* Timing profile conversion.
2000    gcc -o benchmark-oprofile -std=gnu99 -g -Wall -msse2 -I.. -L.. -I../../easel -L../../easel -Dp7OPROFILE_BENCHMARK\
2001       p7_oprofile.c -lhmmer -leasel -lm
2002    icc -o benchmark-oprofile -O3 -static -I.. -L.. -I../../easel -L../../easel -Dp7OPROFILE_BENCHMARK p7_oprofile.c -lhmmer -leasel -lm
2003    ./benchmark-sse <hmmfile>         runs benchmark
2004  */
2005 #include "p7_config.h"
2006 
2007 #include "easel.h"
2008 #include "esl_alphabet.h"
2009 #include "esl_getopts.h"
2010 #include "esl_random.h"
2011 #include "esl_stopwatch.h"
2012 
2013 #include "hmmer.h"
2014 #include "impl_sse.h"
2015 
2016 static ESL_OPTIONS options[] = {
2017   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
2018   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
2019   { "-L",        eslARG_INT,    "400", NULL, NULL,  NULL,  NULL, NULL, "length of target sequence",                        0 },
2020   { "-N",        eslARG_INT, "100000", NULL, NULL,  NULL,  NULL, NULL, "number of conversions to time",                    0 },
2021   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
2022 };
2023 static char usage[]  = "[-options] <hmmfile>";
2024 static char banner[] = "benchmark driver for the generic implementation";
2025 
2026 int
main(int argc,char ** argv)2027 main(int argc, char **argv)
2028 {
2029   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
2030   char           *hmmfile = esl_opt_GetArg(go, 1);
2031   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
2032   ESL_ALPHABET   *abc     = NULL;
2033   P7_HMMFILE     *hfp     = NULL;
2034   P7_HMM         *hmm     = NULL;
2035   P7_BG          *bg      = NULL;
2036   P7_PROFILE     *gm      = NULL;
2037   P7_OPROFILE    *om      = NULL;
2038   int             L       = esl_opt_GetInteger(go, "-L");
2039   int             N       = esl_opt_GetInteger(go, "-N");
2040   int             i;
2041 
2042   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
2043   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
2044 
2045   bg = p7_bg_Create(abc);
2046   p7_bg_SetLength(bg, L);
2047   gm = p7_profile_Create(hmm->M, abc);
2048   p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
2049   om = p7_oprofile_Create(gm->M, abc);
2050 
2051   esl_stopwatch_Start(w);
2052   for (i = 0; i < N; i++)
2053     p7_oprofile_Convert(gm, om);
2054   esl_stopwatch_Stop(w);
2055   esl_stopwatch_Display(stdout, w, "# CPU time: ");
2056   printf("# M = %d\n", gm->M);
2057 
2058   p7_oprofile_Destroy(om);
2059   p7_profile_Destroy(gm);
2060   p7_bg_Destroy(bg);
2061   p7_hmm_Destroy(hmm);
2062   p7_hmmfile_Close(hfp);
2063   esl_alphabet_Destroy(abc);
2064   esl_stopwatch_Destroy(w);
2065   esl_getopts_Destroy(go);
2066   return 0;
2067 }
2068 #endif /*p7OPROFILE_BENCHMARK*/
2069 /*---------------- end, benchmark driver ------------------------*/
2070 
2071 
2072 
2073 
2074 
2075 /*****************************************************************
2076  * 6. Unit tests
2077  *****************************************************************/
2078 #ifdef p7OPROFILE_TESTDRIVE
2079 
2080 
2081 #endif /*p7OPROFILE_TESTDRIVE*/
2082 /*------------------- end, unit tests ---------------------------*/
2083 
2084 
2085 
2086 
2087 /*****************************************************************
2088  * 7. Test driver
2089  *****************************************************************/
2090 #ifdef p7OPROFILE_TESTDRIVE
2091 
2092 
2093 #endif /*p7OPROFILE_TESTDRIVE*/
2094 /*------------------- end, test driver --------------------------*/
2095 
2096 
2097 /*****************************************************************
2098  * 8. Example
2099  *****************************************************************/
2100 #ifdef p7OPROFILE_EXAMPLE
2101 /* gcc -std=gnu99 -g -Wall -Dp7OPROFILE_EXAMPLE -I.. -I../../easel -L.. -L../../easel -o p7_oprofile_example p7_oprofile.c -lhmmer -leasel -lm
2102  * ./p7_oprofile_example <hmmfile>
2103  */
2104 #include "p7_config.h"
2105 #include <stdlib.h>
2106 #include "easel.h"
2107 #include "hmmer.h"
2108 
2109 int
main(int argc,char ** argv)2110 main(int argc, char **argv)
2111 {
2112   char         *hmmfile = argv[1];
2113   ESL_ALPHABET *abc     = NULL;
2114   P7_HMMFILE   *hfp     = NULL;
2115   P7_HMM       *hmm     = NULL;
2116   P7_BG        *bg      = NULL;
2117   P7_PROFILE   *gm      = NULL;
2118   P7_OPROFILE  *om1     = NULL;
2119   P7_OPROFILE  *om2     = NULL;
2120   int           status;
2121   char          errbuf[eslERRBUFSIZE];
2122 
2123   status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf);
2124   if      (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
2125   else if (status == eslEFORMAT)   p7_Fail("File format problem in trying to open HMM file %s.\n%s\n",                hmmfile, errbuf);
2126   else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n",               status, hmmfile, errbuf);
2127 
2128   status = p7_hmmfile_Read(hfp, &abc, &hmm);
2129   if      (status == eslEFORMAT)   p7_Fail("Bad file format in HMM file %s:\n%s\n",          hfp->fname, hfp->errbuf);
2130   else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type));
2131   else if (status == eslEOF)       p7_Fail("Empty HMM file %s? No HMM data found.\n",        hfp->fname);
2132   else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s\n",     hfp->fname);
2133 
2134   bg  = p7_bg_Create(abc);
2135   gm  = p7_profile_Create(hmm->M, abc);
2136   om1 = p7_oprofile_Create(hmm->M, abc);
2137   p7_ProfileConfig(hmm, bg, gm, 400, p7_LOCAL);
2138   p7_oprofile_Convert(gm, om1);
2139 
2140   p7_oprofile_Dump(stdout, om1);
2141 
2142   om2 = p7_oprofile_Copy(om1);
2143   if (p7_oprofile_Compare(om1, om2, 0.001f, errbuf) != eslOK)    printf ("ERROR %s\n", errbuf);
2144 
2145   p7_oprofile_Destroy(om1);
2146   p7_oprofile_Destroy(om2);
2147   p7_profile_Destroy(gm);
2148   p7_bg_Destroy(bg);
2149   p7_hmm_Destroy(hmm);
2150   p7_hmmfile_Close(hfp);
2151   esl_alphabet_Destroy(abc);
2152   return eslOK;
2153 }
2154 #endif /*p7OPROFILE_EXAMPLE*/
2155 /*----------------------- end, example --------------------------*/
2156 
2157 
2158