1 /* VMX implementation of Forward and Backward algorithms.
2  *
3  * Both profile and DP matrix are striped and interleaved, for fast
4  * SIMD vector operations. Calculations are in probability space
5  * (scaled odds ratios, actually) rather than log probabilities,
6  * allowing fast multiply/add operations rather than slow Logsum()
7  * calculations. Sparse rescaling is used to achieve full dynamic
8  * range of scores.
9  *
10  * The Forward and Backward implementations may be used either in a
11  * full O(ML) mode that keeps an entire DP matrix, or in a O(M+L)
12  * linear memory "parsing" mode that only keeps one row of memory for
13  * the main MDI states and one column 0..L for the special states
14  * B,E,N,C,J. Keeping a full matrix allows subsequent stochastic
15  * traceback or posterior decoding of any state. In parsing mode, we
16  * can do posterior decoding on the special states and determine
17  * regions of the target sequence that are generated by the
18  * nonhomology states (NCJ) versus not -- thus, identifying where
19  * high-probability "regions" are, the first step of identifying the
20  * domain structure of a target sequence.
21  *
22  * Contents:
23  *   1. Forward/Backward wrapper API
24  *   2. Forward and Backward engine implementations
25  *   4. Benchmark driver.
26  *   5. Unit tests.
27  *   6. Test driver.
28  *   7. Example.
29  *
30  * SRE, Thu Jul 31 08:43:20 2008 [Janelia]
31  */
32 #include "p7_config.h"
33 
34 #include <stdio.h>
35 #include <math.h>
36 
37 #ifndef __APPLE_ALTIVEC__
38 #include <altivec.h>
39 #endif
40 
41 #include "easel.h"
42 #include "esl_vmx.h"
43 
44 #include "hmmer.h"
45 #include "impl_vmx.h"
46 
47 static int forward_engine (int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om,                    P7_OMX *fwd, float *opt_sc);
48 static int backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc);
49 
50 
51 /*****************************************************************
52  * 1. Forward/Backward API.
53  *****************************************************************/
54 
55 /* Function:  p7_Forward()
56  * Synopsis:  The Forward algorithm, full matrix fill version.
57  * Incept:    SRE, Fri Aug 15 18:59:43 2008 [Casa de Gatos]
58  *
59  * Purpose:   Calculates the Forward algorithm for sequence <dsq> of
60  *            length <L> residues, using optimized profile <om>, and a
61  *            preallocated DP matrix <ox>. Upon successful return, <ox>
62  *            contains the filled Forward matrix, and <*opt_sc>
63  *            optionally contains the raw Forward score in nats.
64  *
65  *            This calculation requires $O(ML)$ memory and time.
66  *            The caller must provide a suitably allocated full <ox>
67  *            by calling <ox = p7_omx_Create(M, L, L)> or
68  *            <p7_omx_GrowTo(ox, M, L, L)>.
69  *
70  *            The model <om> must be configured in local alignment
71  *            mode. The sparse rescaling method used to keep
72  *            probability values within single-precision floating
73  *            point dynamic range cannot be safely applied to models in
74  *            glocal or global mode.
75  *
76  * Args:      dsq     - digital target sequence, 1..L
77  *            L       - length of dsq in residues
78  *            om      - optimized profile
79  *            ox      - RETURN: Forward DP matrix
80  *            opt_sc  - RETURN: Forward score (in nats)
81  *
82  * Returns:   <eslOK> on success.
83  *
84  * Throws:    <eslEINVAL> if <ox> allocation is too small, or if the profile
85  *            isn't in local alignment mode.
86  *            <eslERANGE> if the score exceeds the limited range of
87  *            a probability-space odds ratio.
88  *            In either case, <*opt_sc> is undefined.
89  */
90 int
p7_Forward(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,P7_OMX * ox,float * opt_sc)91 p7_Forward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
92 {
93 #if eslDEBUGLEVEL > 0
94   if (om->M >  ox->allocQ4*4)    ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
95   if (L     >= ox->validR)       ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
96   if (L     >= ox->allocXR)      ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
97   if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
98 #endif
99 
100   return forward_engine(TRUE, dsq, L, om, ox, opt_sc);
101 }
102 
103 /* Function:  p7_ForwardParser()
104  * Synopsis:  The Forward algorithm, linear memory parsing version.
105  * Incept:    SRE, Fri Aug 15 19:05:26 2008 [Casa de Gatos]
106  *
107  * Purpose:   Same as <p7_Forward() except that the full matrix isn't
108  *            kept. Instead, a linear $O(M+L)$ memory algorithm is
109  *            used, keeping only the DP matrix values for the special
110  *            (BENCJ) states. These are sufficient to do posterior
111  *            decoding to identify high-probability regions where
112  *            domains are.
113  *
114  *            The caller must provide a suitably allocated "parsing"
115  *            <ox> by calling <ox = p7_omx_Create(M, 0, L)> or
116  *            <p7_omx_GrowTo(ox, M, 0, L)>.
117  *
118  * Args:      dsq     - digital target sequence, 1..L
119  *            L       - length of dsq in residues
120  *            om      - optimized profile
121  *            ox      - RETURN: Forward DP matrix
122  *            ret_sc  - RETURN: Forward score (in nats)
123  *
124  * Returns:   <eslOK> on success.
125  *
126  * Throws:    <eslEINVAL> if <ox> allocation is too small, or if the profile
127  *            isn't in local alignment mode.
128  *            <eslERANGE> if the score exceeds the limited range of
129  *            a probability-space odds ratio.
130  *            In either case, <*opt_sc> is undefined.
131  */
132 int
p7_ForwardParser(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,P7_OMX * ox,float * opt_sc)133 p7_ForwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
134 {
135 #if eslDEBUGLEVEL > 0
136   if (om->M >  ox->allocQ4*4)    ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
137   if (ox->validR < 1)            ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
138   if (L     >= ox->allocXR)      ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
139   if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
140 #endif
141 
142   return forward_engine(FALSE, dsq, L, om, ox, opt_sc);
143 }
144 
145 
146 
147 /* Function:  p7_Backward()
148  * Synopsis:  The Backward algorithm; full matrix fill version.
149  * Incept:    SRE, Sat Aug 16 08:34:22 2008 [Janelia]
150  *
151  * Purpose:   Calculates the Backward algorithm for sequence <dsq> of
152  *            length <L> residues, using optimized profile <om>, and a
153  *            preallocated DP matrix <bck>. A filled Forward matrix
154  *            must also be provided in <fwd>, because we need to use
155  *            the same sparse scaling factors that Forward used. The
156  *            <bck> matrix is filled in, and the Backward score (in
157  *            nats) is optionally returned in <opt_sc>.
158  *
159  *            This calculation requires $O(ML)$ memory and time. The
160  *            caller must provide a suitably allocated full <bck> by
161  *            calling <bck = p7_omx_Create(M, L, L)> or
162  *            <p7_omx_GrowTo(bck, M, L, L)>.
163  *
164  *            Because only the sparse scaling factors are needed from
165  *            the <fwd> matrix, the caller may have this matrix
166  *            calculated either in full or parsing mode.
167  *
168  *            The model <om> must be configured in local alignment
169  *            mode. The sparse rescaling method used to keep
170  *            probability values within single-precision floating
171  *            point dynamic range cannot be safely applied to models in
172  *            glocal or global mode.
173  *
174  * Args:      dsq     - digital target sequence, 1..L
175  *            L       - length of dsq in residues
176  *            om      - optimized profile
177  *            fwd     - filled Forward DP matrix, for scale factors
178  *            do_full - TRUE=full matrix; FALSE=linear memory parse mode
179  *            bck     - RETURN: filled Backward matrix
180  *            opt_sc  - optRETURN: Backward score (in nats)
181  *
182  * Returns:   <eslOK> on success.
183  *
184  * Throws:    <eslEINVAL> if <ox> allocation is too small, or if the profile
185  *            isn't in local alignment mode.
186  *            <eslERANGE> if the score exceeds the limited range of
187  *            a probability-space odds ratio.
188  *            In either case, <*opt_sc> is undefined.
189  */
190 int
p7_Backward(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,const P7_OMX * fwd,P7_OMX * bck,float * opt_sc)191 p7_Backward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
192 {
193 #if eslDEBUGLEVEL > 0
194   if (om->M >  bck->allocQ4*4)    ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
195   if (L     >= bck->validR)       ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
196   if (L     >= bck->allocXR)      ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
197   if (L     != fwd->L)            ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
198   if (! p7_oprofile_IsLocal(om))  ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
199 #endif
200 
201  return backward_engine(TRUE, dsq, L, om, fwd, bck, opt_sc);
202 }
203 
204 
205 
206 /* Function:  p7_BackwardParser()
207  * Synopsis:  The Backward algorithm, linear memory parsing version.
208  * Incept:    SRE, Sat Aug 16 08:34:13 2008 [Janelia]
209  *
210  * Purpose:   Same as <p7_Backward()> except that the full matrix isn't
211  *            kept. Instead, a linear $O(M+L)$ memory algorithm is
212  *            used, keeping only the DP matrix values for the special
213  *            (BENCJ) states. These are sufficient to do posterior
214  *            decoding to identify high-probability regions where
215  *            domains are.
216  *
217  *            The caller must provide a suitably allocated "parsing"
218  *            <bck> by calling <bck = p7_omx_Create(M, 0, L)> or
219  *            <p7_omx_GrowTo(bck, M, 0, L)>.
220  *
221  * Args:      dsq     - digital target sequence, 1..L
222  *            L       - length of dsq in residues
223  *            om      - optimized profile
224  *            fwd     - filled Forward DP matrix, for scale factors
225  *            bck     - RETURN: filled Backward matrix
226  *            opt_sc  - optRETURN: Backward score (in nats)
227  *
228  * Returns:   <eslOK> on success.
229  *
230  * Throws:    <eslEINVAL> if <ox> allocation is too small, or if the profile
231  *            isn't in local alignment mode.
232  *            <eslERANGE> if the score exceeds the limited range of
233  *            a probability-space odds ratio.
234  *            In either case, <*opt_sc> is undefined.
235  */
236 int
p7_BackwardParser(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,const P7_OMX * fwd,P7_OMX * bck,float * opt_sc)237 p7_BackwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
238 {
239 #if eslDEBUGLEVEL > 0
240   if (om->M >  bck->allocQ4*4)    ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
241   if (bck->validR < 1)            ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
242   if (L     >= bck->allocXR)      ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
243   if (L     != fwd->L)            ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
244   if (! p7_oprofile_IsLocal(om))  ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
245 #endif
246 
247   return backward_engine(FALSE, dsq, L, om, fwd, bck, opt_sc);
248 }
249 
250 
251 
252 /*****************************************************************
253  * 2. Forward/Backward engine implementations (called thru API)
254  *****************************************************************/
255 
256 static int
forward_engine(int do_full,const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,P7_OMX * ox,float * opt_sc)257 forward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
258 {
259   vector float mpv, dpv, ipv;      /* previous row values                                       */
260   vector float sv;		   /* temp storage of 1 curr row value in progress              */
261   vector float dcv;		   /* delayed storage of D(i,q+1)                               */
262   vector float xEv;		   /* E state: keeps max for Mk->E as we go                     */
263   vector float xBv;		   /* B state: splatted vector of B[i-1] for B->Mk calculations */
264   vector float zerov;		   /* splatted 0.0's in a vector                                */
265   float    xN, xE, xB, xC, xJ;	   /* special states' scores                                    */
266   int i;			   /* counter over sequence positions 1..L                      */
267   int q;			   /* counter over quads 0..nq-1                                */
268   int j;			   /* counter over DD iterations (4 is full serialization)      */
269   int Q       = p7O_NQF(om->M);	   /* segment length: # of vectors                              */
270   vector float *dpc = ox->dpf[0];  /* current row, for use in {MDI}MO(dpp,q) access macro       */
271   vector float *dpp;               /* previous row, for use in {MDI}MO(dpp,q) access macro      */
272   vector float *rp;		   /* will point at om->rfv[x] for residue x[i]                 */
273   vector float *tp;		   /* will point into (and step thru) om->tfv                   */
274 
275   /* Initialization. */
276   ox->M  = om->M;
277   ox->L  = L;
278   ox->has_own_scales = TRUE; 	/* all forward matrices control their own scalefactors */
279   zerov = (vector float) vec_splat_u32(0);
280   for (q = 0; q < Q; q++)
281     MMO(dpc,q) = IMO(dpc,q) = DMO(dpc,q) = zerov;
282   xE    = ox->xmx[p7X_E] = 0.;
283   xN    = ox->xmx[p7X_N] = 1.;
284   xJ    = ox->xmx[p7X_J] = 0.;
285   xB    = ox->xmx[p7X_B] = om->xf[p7O_N][p7O_MOVE];
286   xC    = ox->xmx[p7X_C] = 0.;
287 
288   ox->xmx[p7X_SCALE] = 1.0;
289   ox->totscale       = 0.0;
290 
291 #if eslDEBUGLEVEL > 0
292   if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, 0, 9, 5, xE, xN, xJ, xB, xC);	/* logify=TRUE, <rowi>=0, width=8, precision=5*/
293 #endif
294 
295   for (i = 1; i <= L; i++)
296     {
297       dpp   = dpc;
298       dpc   = ox->dpf[do_full * i];     /* avoid conditional, use do_full as kronecker delta */
299       rp    = om->rfv[dsq[i]];
300       tp    = om->tfv;
301       dcv   = (vector float) vec_splat_u32(0);
302       xEv   = (vector float) vec_splat_u32(0);
303       xBv   = esl_vmx_set_float(xB);
304 
305       /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12.  Shift zeros on. */
306       mpv   = vec_sld(zerov, MMO(dpp,Q-1), 12);
307       dpv   = vec_sld(zerov, DMO(dpp,Q-1), 12);
308       ipv   = vec_sld(zerov, IMO(dpp,Q-1), 12);
309 
310       for (q = 0; q < Q; q++)
311 	{
312 	  /* Calculate new MMO(i,q); don't store it yet, hold it in sv. */
313 	  sv   = (vector float) vec_splat_u32(0);
314 	  sv   = vec_madd(xBv, *tp, sv);     tp++;
315 	  sv   = vec_madd(mpv, *tp, sv);     tp++;
316 	  sv   = vec_madd(ipv, *tp, sv);     tp++;
317 	  sv   = vec_madd(dpv, *tp, sv);     tp++;
318 	  sv   = vec_madd(sv,  *rp, zerov);  rp++;
319 	  xEv  = vec_add(xEv, sv);
320 
321 	  /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
322 	   * {MDI}MX(q) is then the current, not the prev row
323 	   */
324 	  mpv = MMO(dpp,q);
325 	  dpv = DMO(dpp,q);
326 	  ipv = IMO(dpp,q);
327 
328 	  /* Do the delayed stores of {MD}(i,q) now that memory is usable */
329 	  MMO(dpc,q) = sv;
330 	  DMO(dpc,q) = dcv;
331 
332 	  /* Calculate the next D(i,q+1) partially: M->D only;
333            * delay storage, holding it in dcv
334 	   */
335 	  dcv   = vec_madd(sv, *tp, zerov); tp++;
336 
337 	  /* Calculate and store I(i,q); assumes odds ratio for emission is 1.0 */
338 	  sv         = vec_madd(mpv, *tp, zerov);  tp++;
339 	  IMO(dpc,q) = vec_madd(ipv, *tp, sv);     tp++;
340 	}
341 
342       /* Now the DD paths. We would rather not serialize them but
343        * in an accurate Forward calculation, we have few options.
344        */
345       /* dcv has carried through from end of q loop above; store it
346        * in first pass, we add M->D and D->D path into DMX
347        */
348       /* We're almost certainly're obligated to do at least one complete
349        * DD path to be sure:
350        */
351       dcv        = vec_sld(zerov, dcv, 12);
352       DMO(dpc,0) = (vector float) vec_splat_u32(0);
353       tp         = om->tfv + 7*Q;	/* set tp to start of the DD's */
354       for (q = 0; q < Q; q++)
355 	{
356 	  DMO(dpc,q) = vec_add(dcv, DMO(dpc,q));
357 	  dcv        = vec_madd(DMO(dpc,q), *tp, zerov); tp++; /* extend DMO(q), so we include M->D and D->D paths */
358 	}
359 
360       /* now. on small models, it seems best (empirically) to just go
361        * ahead and serialize. on large models, we can do a bit better,
362        * by testing for when dcv (DD path) accrued to DMO(q) is below
363        * machine epsilon for all q, in which case we know DMO(q) are all
364        * at their final values. The tradeoff point is (empirically) somewhere around M=100,
365        * at least on my desktop. We don't worry about the conditional here;
366        * it's outside any inner loops.
367        */
368       if (om->M < 100)
369 	{			/* Fully serialized version */
370 	  for (j = 1; j < 4; j++)
371 	    {
372 	      dcv = vec_sld(zerov, dcv, 12);
373 	      tp  = om->tfv + 7*Q;	/* set tp to start of the DD's */
374 	      for (q = 0; q < Q; q++)
375 		{ /* note, extend dcv, not DMO(q); only adding DD paths now */
376 		  DMO(dpc,q) = vec_add(dcv, DMO(dpc,q));
377 		  dcv        = vec_madd(dcv, *tp, zerov);   tp++;
378 		}
379 	    }
380 	}
381       else
382 	{			/* Slightly parallelized version, but which incurs some overhead */
383 	  for (j = 1; j < 4; j++)
384 	    {
385 	      vector bool int cv;	/* keeps track of whether any DD's change DMO(q) */
386 
387 	      dcv = vec_sld(zerov, dcv, 12);
388 	      tp  = om->tfv + 7*Q;	/* set tp to start of the DD's */
389 	      cv  = (vector bool int) vec_splat_u32(0);
390 	      for (q = 0; q < Q; q++)
391 		{ /* using cmpgt below tests if DD changed any DMO(q) *without* conditional branch */
392 		  sv         = vec_add(dcv, DMO(dpc,q));
393 		  cv         = vec_or(cv, vec_cmpgt(sv, DMO(dpc,q)));
394 		  DMO(dpc,q) = sv;	                               /* store new DMO(q) */
395 		  dcv        = vec_madd(dcv, *tp, zerov);   tp++;      /* note, extend dcv, not DMO(q) */
396 		}
397 	      /* DD's didn't change any DMO(q)? Then done, break out. */
398 	      if (vec_all_eq(cv, (vector bool int)zerov)) break;
399 	    }
400 	}
401 
402       /* Add D's to xEv */
403       for (q = 0; q < Q; q++) xEv = vec_add(DMO(dpc,q), xEv);
404 
405       /* Finally the "special" states, which start from Mk->E (->C, ->J->B) */
406       /* The following incantation is a horizontal sum of xEv's elements  */
407       /* These must follow DD calculations, because D's contribute to E in Forward
408        * (as opposed to Viterbi)
409        */
410       xE = esl_vmx_hsum_float(xEv);
411 
412       xN =  xN * om->xf[p7O_N][p7O_LOOP];
413       xC = (xC * om->xf[p7O_C][p7O_LOOP]) +  (xE * om->xf[p7O_E][p7O_MOVE]);
414       xJ = (xJ * om->xf[p7O_J][p7O_LOOP]) +  (xE * om->xf[p7O_E][p7O_LOOP]);
415       xB = (xJ * om->xf[p7O_J][p7O_MOVE]) +  (xN * om->xf[p7O_N][p7O_MOVE]);
416       /* and now xB will carry over into next i, and xC carries over after i=L */
417 
418       /* Sparse rescaling. xE above threshold? trigger a rescaling event.            */
419       if (xE > 1.0e4)	/* that's a little less than e^10, ~10% of our dynamic range */
420 	{
421 	  xN  = xN / xE;
422 	  xC  = xC / xE;
423 	  xJ  = xJ / xE;
424 	  xB  = xB / xE;
425 	  xEv = esl_vmx_set_float(1.0 / xE);
426 	  for (q = 0; q < Q; q++)
427 	    {
428 	      MMO(dpc,q) = vec_madd(MMO(dpc,q), xEv, zerov);
429 	      DMO(dpc,q) = vec_madd(DMO(dpc,q), xEv, zerov);
430 	      IMO(dpc,q) = vec_madd(IMO(dpc,q), xEv, zerov);
431 	    }
432 	  ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = xE;
433 	  ox->totscale += log(xE);
434 	  xE = 1.0;
435 	}
436       else ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = 1.0;
437 
438       /* Storage of the specials.  We could've stored these already
439        * but using xE, etc. variables makes it easy to convert this
440        * code to O(M) memory versions just by deleting storage steps.
441        */
442       ox->xmx[i*p7X_NXCELLS+p7X_E] = xE;
443       ox->xmx[i*p7X_NXCELLS+p7X_N] = xN;
444       ox->xmx[i*p7X_NXCELLS+p7X_J] = xJ;
445       ox->xmx[i*p7X_NXCELLS+p7X_B] = xB;
446       ox->xmx[i*p7X_NXCELLS+p7X_C] = xC;
447 
448 #if eslDEBUGLEVEL > 0
449       if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, i, 9, 5, xE, xN, xJ, xB, xC);	/* logify=TRUE, <rowi>=i, width=8, precision=5*/
450 #endif
451     } /* end loop over sequence residues 1..L */
452 
453   /* finally C->T, and flip total score back to log space (nats) */
454   /* On overflow, xC is inf or nan (nan arises because inf*0 = nan). */
455   /* On an underflow (which shouldn't happen), we counterintuitively return infinity:
456    * the effect of this is to force the caller to rescore us with full range.
457    */
458   if       (isnan(xC))        ESL_EXCEPTION(eslERANGE, "forward score is NaN");
459   else if  (L>0 && xC == 0.0) ESL_EXCEPTION(eslERANGE, "forward score underflow (is 0.0)");     /* [J5/118] */
460   else if  (isinf(xC) == 1)   ESL_EXCEPTION(eslERANGE, "forward score overflow (is infinity)");
461 
462   if (opt_sc != NULL) *opt_sc = ox->totscale + log(xC * om->xf[p7O_C][p7O_MOVE]);
463   return eslOK;
464 }
465 
466 
467 
468 static int
backward_engine(int do_full,const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,const P7_OMX * fwd,P7_OMX * bck,float * opt_sc)469 backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
470 {
471   vector float mpv, ipv, dpv;         /* previous row values                                       */
472   vector float mcv, dcv;              /* current row values                                        */
473   vector float tmmv, timv, tdmv;      /* tmp vars for accessing rotated transition scores          */
474   vector float xBv;		      /* collects B->Mk components of B(i)                         */
475   vector float xEv;	              /* splatted E(i)                                             */
476   vector float zerov;		      /* splatted 0.0's in a vector                                */
477   float    xN, xE, xB, xC, xJ;	      /* special states' scores                                    */
478   int      i;			      /* counter over sequence positions 0,1..L                    */
479   int      q;			      /* counter over quads 0..Q-1                                 */
480   int      Q       = p7O_NQF(om->M);  /* segment length: # of vectors                              */
481   int      j;			      /* DD segment iteration counter (4 = full serialization)     */
482   vector float  *dpc;                 /* current DP row                                            */
483   vector float  *dpp;	              /* next ("previous") DP row                                  */
484   vector float  *rp;		      /* will point into om->rfv[x] for residue x[i+1]             */
485   vector float  *tp;		      /* will point into (and step thru) om->tfv transition scores */
486 
487   /* initialize the L row. */
488   bck->M = om->M;
489   bck->L = L;
490   bck->has_own_scales = FALSE;	/* backwards scale factors are *usually* given by <fwd> */
491   dpc    = bck->dpf[L * do_full];
492   xJ     = 0.0;
493   xB     = 0.0;
494   xN     = 0.0;
495   xC     = om->xf[p7O_C][p7O_MOVE];      /* C<-T */
496   xE     = xC * om->xf[p7O_E][p7O_MOVE]; /* E<-C, no tail */
497   xEv    = esl_vmx_set_float(xE);
498   zerov  = (vector float) vec_splat_u32(0);
499   dcv    = (vector float) vec_splat_u32(0);;		/* solely to silence a compiler warning */
500   for (q = 0; q < Q; q++) MMO(dpc,q) = DMO(dpc,q) = xEv;
501   for (q = 0; q < Q; q++) IMO(dpc,q) = zerov;
502 
503   /* init row L's DD paths, 1) first segment includes xE, from DMO(q) */
504   tp  = om->tfv + 8*Q - 1;	                        /* <*tp> now the [4 8 12 x] TDD quad         */
505   dpv = vec_sld(DMO(dpc,Q-1), zerov, 4);
506   for (q = Q-1; q >= 1; q--)
507     {
508       DMO(dpc,q) = vec_madd(dpv, *tp, DMO(dpc,q));      tp--;
509       dpv        = DMO(dpc,q);
510     }
511   dcv        = vec_madd(dpv, *tp, zerov);
512   DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
513 
514   /* 2) three more passes, only extending DD component (dcv only; no xE contrib from DMO(q)) */
515   for (j = 1; j < 4; j++)
516     {
517       tp  = om->tfv + 8*Q - 1;	                        /* <*tp> now the [4 8 12 x] TDD quad         */
518       dcv = vec_sld(dcv, zerov, 4);
519       for (q = Q-1; q >= 0; q--)
520 	{
521 	  dcv        = vec_madd(dcv, *tp, zerov); tp--;
522 	  DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
523 	}
524     }
525 
526   /* now MD init */
527   tp  = om->tfv + 7*Q - 3;	                        /* <*tp> now the [4 8 12 x] Mk->Dk+1 quad    */
528   dcv = vec_sld(DMO(dpc,0), zerov, 4);
529   for (q = Q-1; q >= 0; q--)
530     {
531       MMO(dpc,q) = vec_madd(dcv, *tp, MMO(dpc,q)); tp -= 7;
532       dcv        = DMO(dpc,q);
533     }
534 
535   /* Sparse rescaling: same scale factors as fwd matrix */
536   if (fwd->xmx[L*p7X_NXCELLS+p7X_SCALE] > 1.0)
537     {
538       xE  = xE / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
539       xN  = xN / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
540       xC  = xC / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
541       xJ  = xJ / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
542       xB  = xB / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
543       xEv = esl_vmx_set_float(1.0 / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]);
544       for (q = 0; q < Q; q++) {
545 	MMO(dpc,q) = vec_madd(MMO(dpc,q), xEv, zerov);
546 	DMO(dpc,q) = vec_madd(DMO(dpc,q), xEv, zerov);
547 	IMO(dpc,q) = vec_madd(IMO(dpc,q), xEv, zerov);
548       }
549     }
550   bck->xmx[L*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
551   bck->totscale                     = log(bck->xmx[L*p7X_NXCELLS+p7X_SCALE]);
552 
553   /* Stores */
554   bck->xmx[L*p7X_NXCELLS+p7X_E] = xE;
555   bck->xmx[L*p7X_NXCELLS+p7X_N] = xN;
556   bck->xmx[L*p7X_NXCELLS+p7X_J] = xJ;
557   bck->xmx[L*p7X_NXCELLS+p7X_B] = xB;
558   bck->xmx[L*p7X_NXCELLS+p7X_C] = xC;
559 
560 #if eslDEBUGLEVEL > 0
561   if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, L, 9, 4, xE, xN, xJ, xB, xC);	/* logify=TRUE, <rowi>=L, width=9, precision=4*/
562 #endif
563 
564   /* main recursion */
565   for (i = L-1; i >= 1; i--)	/* backwards stride */
566     {
567       /* phase 1. B(i) collected. Old row destroyed, new row contains
568        *    complete I(i,k), partial {MD}(i,k) w/ no {MD}->{DE} paths yet.
569        */
570       dpc = bck->dpf[i     * do_full];
571       dpp = bck->dpf[(i+1) * do_full];
572       rp  = om->rfv[dsq[i+1]] + Q-1; /* <*rp> is now the [4 8 12 x] match emission quad */
573       tp  = om->tfv + 7*Q - 1;	    /* <*tp> is now the [4 8 12 x] TII transition quad  */
574 
575       /* leftshift the first transition quads */
576       tmmv = vec_sld(om->tfv[1], zerov, 4);
577       timv = vec_sld(om->tfv[2], zerov, 4);
578       tdmv = vec_sld(om->tfv[3], zerov, 4);
579 
580       mpv = vec_madd(MMO(dpp,0), om->rfv[dsq[i+1]][0], zerov); /* precalc M(i+1,k+1)*e(M_k+1,x_{i+1}) */
581       mpv = vec_sld(mpv, zerov, 4);
582 
583       xBv = zerov;
584       for (q = Q-1; q >= 0; q--)     /* backwards stride */
585 	{
586 	  vector float t1;
587 
588 	  ipv = IMO(dpp,q); /* assumes emission odds ratio of 1.0; i+1's IMO(q) now free */
589 	  t1         = vec_madd(mpv, timv, zerov);
590 	  IMO(dpc,q) = vec_madd(ipv, *tp,  t1);            tp--;
591 	  DMO(dpc,q) = vec_madd(mpv, tdmv, zerov);
592 	  t1         = vec_madd(mpv, tmmv, zerov);
593 	  mcv        = vec_madd(ipv, *tp,  t1);            tp -= 2;
594 
595 	  /* obtain mpv for next q. i+1's MMO(q) is freed  */
596 	  mpv        = vec_madd(MMO(dpp,q), *rp, zerov);   rp--;
597 	  MMO(dpc,q) = mcv;
598 
599 	  tdmv = *tp;   tp--;
600 	  timv = *tp;   tp--;
601 	  tmmv = *tp;   tp--;
602 
603 	  xBv = vec_madd(mpv, *tp, xBv); tp--;
604 	}
605 
606       /* phase 2: now that we have accumulated the B->Mk transitions in xBv, we can do the specials */
607       xB = esl_vmx_hsum_float(xBv);
608 
609       xC =  xC * om->xf[p7O_C][p7O_LOOP];
610       xJ = (xB * om->xf[p7O_J][p7O_MOVE]) + (xJ * om->xf[p7O_J][p7O_LOOP]); /* must come after xB */
611       xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); /* must come after xB */
612       xE = (xC * om->xf[p7O_E][p7O_MOVE]) + (xJ * om->xf[p7O_E][p7O_LOOP]); /* must come after xJ, xC */
613       xEv = esl_vmx_set_float(xE);	/* splat */
614 
615 
616       /* phase 3: {MD}->E paths and one step of the D->D paths */
617       tp  = om->tfv + 8*Q - 1;	/* <*tp> now the [4 8 12 x] TDD quad */
618       dpv = vec_add(DMO(dpc,0), xEv);
619       dpv = vec_sld(dpv, zerov, 4);
620       for (q = Q-1; q >= 1; q--)
621 	{
622 	  dcv        = vec_madd(dpv, *tp, xEv);    tp--;
623 	  DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
624 	  dpv        = DMO(dpc,q);
625 	  MMO(dpc,q) = vec_add(MMO(dpc,q), xEv);
626 	}
627       dcv        = vec_madd(dpv, *tp, zerov);
628       DMO(dpc,q) = vec_add(DMO(dpc,q), vec_add(dcv, xEv));
629       MMO(dpc,q) = vec_add(MMO(dpc,q), xEv);
630 
631       /* phase 4: finish extending the DD paths */
632       /* fully serialized for now */
633       for (j = 1; j < 4; j++)	/* three passes: we've already done 1 segment, we need 4 total */
634 	{
635 	  dcv = vec_sld(dcv, zerov, 4);
636 	  tp  = om->tfv + 8*Q - 1;	/* <*tp> now the [4 8 12 x] TDD quad */
637 	  for (q = Q-1; q >= 0; q--)
638 	    {
639 	      dcv        = vec_madd(dcv, *tp, zerov); tp--;
640 	      DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
641 	    }
642 	}
643 
644       /* phase 5: add M->D paths */
645       dcv = vec_sld(DMO(dpc,0), zerov, 4);
646       tp  = om->tfv + 7*Q - 3;	/* <*tp> is now the [4 8 12 x] Mk->Dk+1 quad */
647       for (q = Q-1; q >= 0; q--)
648 	{
649 	  MMO(dpc,q) = vec_madd(dcv, *tp, MMO(dpc,q)); tp -= 7;
650 	  dcv        = DMO(dpc,q);
651 	}
652 
653       /* Sparse rescaling  */
654 
655       /* In rare cases [J3/119] scale factors from <fwd> are
656        * insufficient and backwards will overflow. In this case, we
657        * switch on the fly to using our own scale factors, different
658        * from those in <fwd>. This will complicate subsequent
659        * posterior decoding routines.
660        */
661       if (xB > 1.0e16) bck->has_own_scales = TRUE;
662 
663       if      (bck->has_own_scales)  bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = (xB > 1.0e4) ? xB : 1.0;
664       else                           bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[i*p7X_NXCELLS+p7X_SCALE];
665 
666       if (bck->xmx[i*p7X_NXCELLS+p7X_SCALE] > 1.0)
667 	{
668 	  xE /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
669 	  xN /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
670 	  xJ /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
671 	  xB /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
672 	  xC /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
673 	  xBv = esl_vmx_set_float(1.0 / bck->xmx[i*p7X_NXCELLS+p7X_SCALE]);
674 	  for (q = 0; q < Q; q++) {
675 	    MMO(dpc,q) = vec_madd(MMO(dpc,q), xBv, zerov);
676 	    DMO(dpc,q) = vec_madd(DMO(dpc,q), xBv, zerov);
677 	    IMO(dpc,q) = vec_madd(IMO(dpc,q), xBv, zerov);
678 	  }
679 	  bck->totscale += log(bck->xmx[i*p7X_NXCELLS+p7X_SCALE]);
680 	}
681 
682       /* Stores are separate only for pedagogical reasons: easy to
683        * turn this into a more memory efficient version just by
684        * deleting the stores.
685        */
686       bck->xmx[i*p7X_NXCELLS+p7X_E] = xE;
687       bck->xmx[i*p7X_NXCELLS+p7X_N] = xN;
688       bck->xmx[i*p7X_NXCELLS+p7X_J] = xJ;
689       bck->xmx[i*p7X_NXCELLS+p7X_B] = xB;
690       bck->xmx[i*p7X_NXCELLS+p7X_C] = xC;
691 
692 #if eslDEBUGLEVEL > 0
693       if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, i, 9, 4, xE, xN, xJ, xB, xC);	/* logify=TRUE, <rowi>=i, width=9, precision=4*/
694 #endif
695     } /* thus ends the loop over sequence positions i */
696 
697   /* Termination at i=0, where we can only reach N,B states. */
698   dpp = bck->dpf[1 * do_full];
699   tp  = om->tfv;	        /* <*tp> is now the [1 5 9 13] TBMk transition quad  */
700   rp  = om->rfv[dsq[1]];	/* <*rp> is now the [1 5 9 13] match emission quad   */
701   xBv = (vector float) vec_splat_u32(0);
702   for (q = 0; q < Q; q++)
703     {
704       mpv = vec_madd(MMO(dpp,q), *rp, zerov);  rp++;
705       xBv = vec_madd(mpv,        *tp, xBv);    tp += 7;
706     }
707   /* horizontal sum of xBv */
708   xB = esl_vmx_hsum_float(xBv);
709 
710   xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]);
711 
712   bck->xmx[p7X_B]     = xB;
713   bck->xmx[p7X_C]     = 0.0;
714   bck->xmx[p7X_J]     = 0.0;
715   bck->xmx[p7X_N]     = xN;
716   bck->xmx[p7X_E]     = 0.0;
717   bck->xmx[p7X_SCALE] = 1.0;
718 
719 #if eslDEBUGLEVEL > 0
720   dpc = bck->dpf[0];
721   for (q = 0; q < Q; q++) /* Not strictly necessary, but if someone's looking at DP matrices, this is nice to do: */
722     MMO(dpc,q) = DMO(dpc,q) = IMO(dpc,q) = zerov;
723   if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, 0, 9, 4, bck->xmx[p7X_E], bck->xmx[p7X_N],  bck->xmx[p7X_J], bck->xmx[p7X_B],  bck->xmx[p7X_C]);	/* logify=TRUE, <rowi>=0, width=9, precision=4*/
724 #endif
725 
726   if       (isnan(xN))         ESL_EXCEPTION(eslERANGE, "backward score is NaN");
727   else if  (L>0 && xN == 0.0)  ESL_EXCEPTION(eslERANGE, "backward score underflow (is 0.0)");    /* [J5/118] */
728   else if  (isinf(xN) == 1)    ESL_EXCEPTION(eslERANGE, "backward score overflow (is infinity)");
729 
730   if (opt_sc != NULL) *opt_sc = bck->totscale + log(xN);
731   return eslOK;
732 }
733 /*-------------- end, forward/backward engines  -----------------*/
734 
735 
736 
737 
738 
739 /*****************************************************************
740  * 4. Benchmark driver.
741  *****************************************************************/
742 #ifdef p7FWDBACK_BENCHMARK
743 /* -c, -x options are for debugging and testing: see fwdfilter.c for explanation */
744 /*
745    icc  -O3 -static -o fwdback_benchmark -I.. -L.. -I../../easel -L../../easel -Dp7FWDBACK_BENCHMARK fwdback.c -lhmmer -leasel -lm
746 
747    ./fwdback_benchmark <hmmfile>           runs benchmark on both Forward and Backward parser
748    ./fwdback_benchmark -c -N100 <hmmfile>  compare scores of VMX to generic impl
749    ./fwdback_benchmark -x -N100 <hmmfile>  test that scores match trusted implementation.
750  */
751 #include "p7_config.h"
752 
753 #include "easel.h"
754 #include "esl_alphabet.h"
755 #include "esl_getopts.h"
756 #include "esl_random.h"
757 #include "esl_randomseq.h"
758 #include "esl_stopwatch.h"
759 
760 #include "hmmer.h"
761 #include "impl_vmx.h"
762 
763 static ESL_OPTIONS options[] = {
764   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
765   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
766   { "-c",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, "-x", "compare scores to generic implementation (debug)", 0 },
767   { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
768   { "-x",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, "-c", "equate scores to trusted implementation (debug)",  0 },
769   { "-L",        eslARG_INT,    "400", NULL, "n>0", NULL,  NULL, NULL, "length of random target seqs",                     0 },
770   { "-N",        eslARG_INT,  "50000", NULL, "n>0", NULL,  NULL, NULL, "number of random target seqs",                     0 },
771   { "-F",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, "-B", "only benchmark Forward",                           0 },
772   { "-B",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, "-F", "only benchmark Backward",                          0 },
773   { "-P",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "benchmark parsing version, not full version",      0 },
774   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
775 };
776 static char usage[]  = "[-options] <hmmfile>";
777 static char banner[] = "benchmark driver for Forward, Backward implementations";
778 
779 int
main(int argc,char ** argv)780 main(int argc, char **argv)
781 {
782   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
783   char           *hmmfile = esl_opt_GetArg(go, 1);
784   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
785   ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
786   ESL_ALPHABET   *abc     = NULL;
787   P7_HMMFILE     *hfp     = NULL;
788   P7_HMM         *hmm     = NULL;
789   P7_BG          *bg      = NULL;
790   P7_PROFILE     *gm      = NULL;
791   P7_OPROFILE    *om      = NULL;
792   P7_GMX         *gx      = NULL;
793   P7_OMX         *fwd     = NULL;
794   P7_OMX         *bck     = NULL;
795   int             L       = esl_opt_GetInteger(go, "-L");
796   int             N       = esl_opt_GetInteger(go, "-N");
797   ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
798   int             i;
799   float           fsc, bsc;
800   float           fsc2, bsc2;
801   double          base_time, bench_time, Mcs;
802 
803   p7_FLogsumInit();
804 
805   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
806   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
807 
808   bg = p7_bg_Create(abc);
809   p7_bg_SetLength(bg, L);
810   gm = p7_profile_Create(hmm->M, abc);
811   p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
812   om = p7_oprofile_Create(gm->M, abc);
813   p7_oprofile_Convert(gm, om);
814   p7_oprofile_ReconfigLength(om, L);
815 
816   if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001)
817     p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode");
818 
819   if (esl_opt_GetBoolean(go, "-P")) {
820     fwd = p7_omx_Create(gm->M, 0, L);
821     bck = p7_omx_Create(gm->M, 0, L);
822   } else {
823     fwd = p7_omx_Create(gm->M, L, L);
824     bck = p7_omx_Create(gm->M, L, L);
825   }
826   gx  = p7_gmx_Create(gm->M, L);
827 
828   /* Get a baseline time: how long it takes just to generate the sequences */
829   esl_stopwatch_Start(w);
830   for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
831   esl_stopwatch_Stop(w);
832   base_time = w->user;
833 
834   esl_stopwatch_Start(w);
835   for (i = 0; i < N; i++)
836     {
837       esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
838       if (esl_opt_GetBoolean(go, "-P")) {
839 	if (! esl_opt_GetBoolean(go, "-B"))  p7_ForwardParser (dsq, L, om,      fwd, &fsc);
840 	if (! esl_opt_GetBoolean(go, "-F"))  p7_BackwardParser(dsq, L, om, fwd, bck, &bsc);
841       } else {
842 	if (! esl_opt_GetBoolean(go, "-B"))  p7_Forward (dsq, L, om,      fwd, &fsc);
843 	if (! esl_opt_GetBoolean(go, "-F"))  p7_Backward(dsq, L, om, fwd, bck, &bsc);
844       }
845 
846       if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x"))
847 	{
848 	  p7_GForward (dsq, L, gm, gx, &fsc2);
849 	  p7_GBackward(dsq, L, gm, gx, &bsc2);
850 	  printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2);
851 	}
852     }
853   esl_stopwatch_Stop(w);
854   bench_time = w->user - base_time;
855   Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
856   esl_stopwatch_Display(stdout, w, "# CPU time: ");
857   printf("# M    = %d\n",   gm->M);
858   printf("# %.1f Mc/s\n", Mcs);
859 
860   free(dsq);
861   p7_omx_Destroy(bck);
862   p7_omx_Destroy(fwd);
863   p7_gmx_Destroy(gx);
864   p7_oprofile_Destroy(om);
865   p7_profile_Destroy(gm);
866   p7_bg_Destroy(bg);
867   p7_hmm_Destroy(hmm);
868   p7_hmmfile_Close(hfp);
869   esl_alphabet_Destroy(abc);
870   esl_stopwatch_Destroy(w);
871   esl_randomness_Destroy(r);
872   esl_getopts_Destroy(go);
873   return 0;
874 }
875 #endif /*p7FWDBACK_BENCHMARK*/
876 /*------------------- end, benchmark driver ---------------------*/
877 
878 
879 
880 
881 
882 /*****************************************************************
883  * 5. Unit tests.
884  *****************************************************************/
885 #ifdef p7FWDBACK_TESTDRIVE
886 #include "esl_random.h"
887 #include "esl_randomseq.h"
888 
889 /*
890  * compare to GForward() scores.
891  */
892 static void
utest_fwdback(ESL_RANDOMNESS * r,ESL_ALPHABET * abc,P7_BG * bg,int M,int L,int N)893 utest_fwdback(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N)
894 {
895   char        *msg = "forward/backward unit test failed";
896   P7_HMM      *hmm = NULL;
897   P7_PROFILE  *gm  = NULL;
898   P7_OPROFILE *om  = NULL;
899   ESL_DSQ     *dsq = malloc(sizeof(ESL_DSQ) * (L+2));
900   P7_OMX      *fwd = p7_omx_Create(M, 0, L);
901   P7_OMX      *bck = p7_omx_Create(M, 0, L);
902   P7_OMX      *oxf = p7_omx_Create(M, L, L);
903   P7_OMX      *oxb = p7_omx_Create(M, L, L);
904   P7_GMX      *gx  = p7_gmx_Create(M, L);
905   float tolerance;
906   float fsc1, fsc2;
907   float bsc1, bsc2;
908   float generic_sc;
909 
910   p7_FLogsumInit();
911   if (p7_FLogsumError(-0.4, -0.5) > 0.0001) tolerance = 1.0;  /* weaker test against GForward()   */
912   else tolerance = 0.0001;   /* stronger test: FLogsum() is in slow exact mode. */
913 
914   p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om);
915   while (N--)
916     {
917       esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
918 
919       p7_Forward       (dsq, L, om, oxf,      &fsc1);
920       p7_Backward      (dsq, L, om, oxf, oxb, &bsc1);
921       p7_ForwardParser (dsq, L, om, fwd,      &fsc2);
922       p7_BackwardParser(dsq, L, om, fwd, bck, &bsc2);
923       p7_GForward      (dsq, L, gm, gx,  &generic_sc);
924 
925       /* Forward and Backward scores should agree with high tolerance */
926       if (fabs(fsc1-bsc1) > 0.0001)    esl_fatal(msg);
927       if (fabs(fsc2-bsc2) > 0.0001)    esl_fatal(msg);
928       if (fabs(fsc1-fsc2) > 0.0001)    esl_fatal(msg);
929 
930       /* GForward scores should approximate Forward scores,
931        * with tolerance that depends on how logsum.c was compiled
932        */
933       if (fabs(fsc1-generic_sc) > tolerance) esl_fatal(msg);
934     }
935 
936   free(dsq);
937   p7_hmm_Destroy(hmm);
938   p7_omx_Destroy(oxb);
939   p7_omx_Destroy(oxf);
940   p7_omx_Destroy(bck);
941   p7_omx_Destroy(fwd);
942   p7_gmx_Destroy(gx);
943   p7_profile_Destroy(gm);
944   p7_oprofile_Destroy(om);
945 }
946 #endif /*p7FWDBACK_TESTDRIVE*/
947 /*---------------------- end, unit tests ------------------------*/
948 
949 
950 
951 
952 /*****************************************************************
953  * 6. Test driver
954  *****************************************************************/
955 #ifdef p7FWDBACK_TESTDRIVE
956 /*
957    gcc -g -Wall -maltivec -std=gnu99 -o fwdback_utest -I.. -L.. -I../../easel -L../../easel -Dp7FWDBACK_TESTDRIVE fwdback.c -lhmmer -leasel -lm
958    ./fwdback_utest
959  */
960 #include "p7_config.h"
961 
962 #include "easel.h"
963 #include "esl_alphabet.h"
964 #include "esl_getopts.h"
965 #include "esl_random.h"
966 
967 #include "hmmer.h"
968 #include "impl_vmx.h"
969 
970 static ESL_OPTIONS options[] = {
971   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
972   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",           0 },
973   { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                  0 },
974   { "-L",        eslARG_INT,    "200", NULL, NULL,  NULL,  NULL, NULL, "size of random sequences to sample",             0 },
975   { "-M",        eslARG_INT,    "145", NULL, NULL,  NULL,  NULL, NULL, "size of random models to sample",                0 },
976   { "-N",        eslARG_INT,    "100", NULL, NULL,  NULL,  NULL, NULL, "number of random sequences to sample",           0 },
977   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
978 };
979 static char usage[]  = "[-options]";
980 static char banner[] = "test driver for VMX Forward, Backward implementations";
981 
982 int
main(int argc,char ** argv)983 main(int argc, char **argv)
984 {
985   ESL_GETOPTS    *go   = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
986   ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
987   ESL_ALPHABET   *abc  = NULL;
988   P7_BG          *bg   = NULL;
989   int             M    = esl_opt_GetInteger(go, "-M");
990   int             L    = esl_opt_GetInteger(go, "-L");
991   int             N    = esl_opt_GetInteger(go, "-N");
992 
993   /* First round of tests for DNA alphabets.  */
994   if ((abc = esl_alphabet_Create(eslDNA)) == NULL)  esl_fatal("failed to create alphabet");
995   if ((bg = p7_bg_Create(abc))            == NULL)  esl_fatal("failed to create null model");
996 
997   utest_fwdback(r, abc, bg, M, L, N);   /* normal sized models */
998   utest_fwdback(r, abc, bg, 1, L, 10);  /* size 1 models       */
999   utest_fwdback(r, abc, bg, M, 1, 10);  /* size 1 sequences    */
1000 
1001   esl_alphabet_Destroy(abc);
1002   p7_bg_Destroy(bg);
1003 
1004   /* Second round of tests for amino alphabets.  */
1005   if ((abc = esl_alphabet_Create(eslAMINO)) == NULL)  esl_fatal("failed to create alphabet");
1006   if ((bg = p7_bg_Create(abc))              == NULL)  esl_fatal("failed to create null model");
1007 
1008   utest_fwdback(r, abc, bg, M, L, N);
1009   utest_fwdback(r, abc, bg, 1, L, 10);
1010   utest_fwdback(r, abc, bg, M, 1, 10);
1011 
1012   esl_alphabet_Destroy(abc);
1013   p7_bg_Destroy(bg);
1014 
1015   esl_getopts_Destroy(go);
1016   esl_randomness_Destroy(r);
1017   return eslOK;
1018 }
1019 #endif /*p7FWDBACK_TESTDRIVE*/
1020 /*--------------------- end, test driver ------------------------*/
1021 
1022 
1023 
1024 /*****************************************************************
1025  * 7. Example
1026  *****************************************************************/
1027 #ifdef p7FWDBACK_EXAMPLE
1028 /* Useful for debugging on small HMMs and sequences.
1029  *
1030  * Compares to GForward().
1031  *
1032    gcc -g -Wall -maltivec -std=gnu99 -o fwdback_example -I.. -L.. -I../../easel -L../../easel -Dp7FWDBACK_EXAMPLE fwdback.c -lhmmer -leasel -lm
1033    ./fwdback_example <hmmfile> <seqfile>
1034  */
1035 #include "p7_config.h"
1036 
1037 #include "easel.h"
1038 #include "esl_alphabet.h"
1039 #include "esl_exponential.h"
1040 #include "esl_getopts.h"
1041 #include "esl_sq.h"
1042 #include "esl_sqio.h"
1043 
1044 #include "hmmer.h"
1045 #include "impl_vmx.h"
1046 
1047 static ESL_OPTIONS options[] = {
1048   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
1049   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
1050   { "-1",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "output in one line awkable format",                0 },
1051   { "-P",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "output in profmark format",                        0 },
1052   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1053 };
1054 static char usage[]  = "[-options] <hmmfile> <seqfile>";
1055 static char banner[] = "example of Forward/Backward (VMX versions)";
1056 
1057 int
main(int argc,char ** argv)1058 main(int argc, char **argv)
1059 {
1060   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
1061   char           *hmmfile = esl_opt_GetArg(go, 1);
1062   char           *seqfile = esl_opt_GetArg(go, 2);
1063   ESL_ALPHABET   *abc     = NULL;
1064   P7_HMMFILE     *hfp     = NULL;
1065   P7_HMM         *hmm     = NULL;
1066   P7_BG          *bg      = NULL;
1067   P7_PROFILE     *gm      = NULL;
1068   P7_OPROFILE    *om      = NULL;
1069   P7_GMX         *gx      = NULL;
1070   P7_OMX         *fwd     = NULL;
1071   P7_OMX         *bck     = NULL;
1072   ESL_SQ         *sq      = NULL;
1073   ESL_SQFILE     *sqfp    = NULL;
1074   int             format  = eslSQFILE_UNKNOWN;
1075   float           fraw, braw, nullsc, fsc;
1076   float           gfraw, gbraw, gfsc;
1077   double          P, gP;
1078   int             status;
1079 
1080   /* Read in one HMM */
1081   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
1082   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
1083 
1084   /* Open sequence file for reading */
1085   sq     = esl_sq_CreateDigital(abc);
1086   status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
1087   if      (status == eslENOTFOUND) p7_Fail("No such file.");
1088   else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
1089   else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
1090   else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);
1091 
1092   /* create default null model, then create and optimize profile */
1093   bg = p7_bg_Create(abc);
1094   p7_bg_SetLength(bg, sq->n);
1095   gm = p7_profile_Create(hmm->M, abc);
1096   p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
1097   om = p7_oprofile_Create(gm->M, abc);
1098   p7_oprofile_Convert(gm, om);
1099 
1100   /* p7_oprofile_Dump(stdout, om);  */
1101 
1102   /* allocate DP matrices for O(M+L) parsers */
1103   fwd = p7_omx_Create(gm->M, 0, sq->n);
1104   bck = p7_omx_Create(gm->M, 0, sq->n);
1105   gx  = p7_gmx_Create(gm->M,    sq->n);
1106 
1107   /* allocate DP matrices for O(ML) fills */
1108   /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */
1109   /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */
1110 
1111   /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */     /* makes the fast DP algorithms dump their matrices */
1112   /* p7_omx_SetDumpMode(stdout, bck, TRUE); */
1113 
1114   while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
1115     {
1116       p7_oprofile_ReconfigLength(om, sq->n);
1117       p7_ReconfigLength(gm,          sq->n);
1118       p7_bg_SetLength(bg,            sq->n);
1119       p7_omx_GrowTo(fwd, om->M, 0,   sq->n);
1120       p7_omx_GrowTo(bck, om->M, 0,   sq->n);
1121       p7_gmx_GrowTo(gx,  gm->M,      sq->n);
1122 
1123       p7_bg_NullOne  (bg, sq->dsq, sq->n, &nullsc);
1124 
1125       p7_ForwardParser (sq->dsq, sq->n, om,      fwd, &fraw);
1126       p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw);
1127 
1128       /* p7_Forward (sq->dsq, sq->n, om,      fwd, &fsc);        printf("forward:              %.2f nats\n", fsc);  */
1129       /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc);        printf("backward:             %.2f nats\n", bsc);  */
1130 
1131       /* Comparison to other F/B implementations */
1132       p7_GForward     (sq->dsq, sq->n, gm, gx,  &gfraw);
1133       p7_GBackward    (sq->dsq, sq->n, gm, gx,  &gbraw);
1134 
1135       /* p7_gmx_Dump(stdout, gx, p7_DEFAULT);  */
1136 
1137       fsc  =  (fraw-nullsc) / eslCONST_LOG2;
1138       gfsc = (gfraw-nullsc) / eslCONST_LOG2;
1139       P  = esl_exp_surv(fsc,   om->evparam[p7_FTAU],  om->evparam[p7_FLAMBDA]);
1140       gP = esl_exp_surv(gfsc,  gm->evparam[p7_FTAU],  gm->evparam[p7_FLAMBDA]);
1141 
1142       if (esl_opt_GetBoolean(go, "-1"))
1143 	{
1144 	  printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc);
1145 	}
1146       else if (esl_opt_GetBoolean(go, "-P"))
1147 	{ /* output suitable for direct use in profmark benchmark postprocessors: */
1148 	  printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name);
1149 	}
1150       else
1151 	{
1152 	  printf("target sequence:      %s\n",        sq->name);
1153 	  printf("fwd filter raw score: %.2f nats\n", fraw);
1154 	  printf("bck filter raw score: %.2f nats\n", braw);
1155 	  printf("null score:           %.2f nats\n", nullsc);
1156 	  printf("per-seq score:        %.2f bits\n", fsc);
1157 	  printf("P-value:              %g\n",        P);
1158 	  printf("GForward raw score:   %.2f nats\n", gfraw);
1159 	  printf("GBackward raw score:  %.2f nats\n", gbraw);
1160 	  printf("GForward seq score:   %.2f bits\n", gfsc);
1161 	  printf("GForward P-value:     %g\n",        gP);
1162 	}
1163 
1164       esl_sq_Reuse(sq);
1165     }
1166 
1167   /* cleanup */
1168   esl_sq_Destroy(sq);
1169   esl_sqfile_Close(sqfp);
1170   p7_omx_Destroy(bck);
1171   p7_omx_Destroy(fwd);
1172   p7_gmx_Destroy(gx);
1173   p7_oprofile_Destroy(om);
1174   p7_profile_Destroy(gm);
1175   p7_bg_Destroy(bg);
1176   p7_hmm_Destroy(hmm);
1177   p7_hmmfile_Close(hfp);
1178   esl_alphabet_Destroy(abc);
1179   esl_getopts_Destroy(go);
1180   return 0;
1181 }
1182 #endif /*p7FWDBACK_EXAMPLE*/
1183 
1184 
1185