1 /* VMX implementation of Forward and Backward algorithms.
2 *
3 * Both profile and DP matrix are striped and interleaved, for fast
4 * SIMD vector operations. Calculations are in probability space
5 * (scaled odds ratios, actually) rather than log probabilities,
6 * allowing fast multiply/add operations rather than slow Logsum()
7 * calculations. Sparse rescaling is used to achieve full dynamic
8 * range of scores.
9 *
10 * The Forward and Backward implementations may be used either in a
11 * full O(ML) mode that keeps an entire DP matrix, or in a O(M+L)
12 * linear memory "parsing" mode that only keeps one row of memory for
13 * the main MDI states and one column 0..L for the special states
14 * B,E,N,C,J. Keeping a full matrix allows subsequent stochastic
15 * traceback or posterior decoding of any state. In parsing mode, we
16 * can do posterior decoding on the special states and determine
17 * regions of the target sequence that are generated by the
18 * nonhomology states (NCJ) versus not -- thus, identifying where
19 * high-probability "regions" are, the first step of identifying the
20 * domain structure of a target sequence.
21 *
22 * Contents:
23 * 1. Forward/Backward wrapper API
24 * 2. Forward and Backward engine implementations
25 * 4. Benchmark driver.
26 * 5. Unit tests.
27 * 6. Test driver.
28 * 7. Example.
29 *
30 * SRE, Thu Jul 31 08:43:20 2008 [Janelia]
31 */
32 #include "p7_config.h"
33
34 #include <stdio.h>
35 #include <math.h>
36
37 #ifndef __APPLE_ALTIVEC__
38 #include <altivec.h>
39 #endif
40
41 #include "easel.h"
42 #include "esl_vmx.h"
43
44 #include "hmmer.h"
45 #include "impl_vmx.h"
46
47 static int forward_engine (int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *fwd, float *opt_sc);
48 static int backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc);
49
50
51 /*****************************************************************
52 * 1. Forward/Backward API.
53 *****************************************************************/
54
55 /* Function: p7_Forward()
56 * Synopsis: The Forward algorithm, full matrix fill version.
57 * Incept: SRE, Fri Aug 15 18:59:43 2008 [Casa de Gatos]
58 *
59 * Purpose: Calculates the Forward algorithm for sequence <dsq> of
60 * length <L> residues, using optimized profile <om>, and a
61 * preallocated DP matrix <ox>. Upon successful return, <ox>
62 * contains the filled Forward matrix, and <*opt_sc>
63 * optionally contains the raw Forward score in nats.
64 *
65 * This calculation requires $O(ML)$ memory and time.
66 * The caller must provide a suitably allocated full <ox>
67 * by calling <ox = p7_omx_Create(M, L, L)> or
68 * <p7_omx_GrowTo(ox, M, L, L)>.
69 *
70 * The model <om> must be configured in local alignment
71 * mode. The sparse rescaling method used to keep
72 * probability values within single-precision floating
73 * point dynamic range cannot be safely applied to models in
74 * glocal or global mode.
75 *
76 * Args: dsq - digital target sequence, 1..L
77 * L - length of dsq in residues
78 * om - optimized profile
79 * ox - RETURN: Forward DP matrix
80 * opt_sc - RETURN: Forward score (in nats)
81 *
82 * Returns: <eslOK> on success.
83 *
84 * Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
85 * isn't in local alignment mode.
86 * <eslERANGE> if the score exceeds the limited range of
87 * a probability-space odds ratio.
88 * In either case, <*opt_sc> is undefined.
89 */
90 int
p7_Forward(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,P7_OMX * ox,float * opt_sc)91 p7_Forward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
92 {
93 #if eslDEBUGLEVEL > 0
94 if (om->M > ox->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
95 if (L >= ox->validR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
96 if (L >= ox->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
97 if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
98 #endif
99
100 return forward_engine(TRUE, dsq, L, om, ox, opt_sc);
101 }
102
103 /* Function: p7_ForwardParser()
104 * Synopsis: The Forward algorithm, linear memory parsing version.
105 * Incept: SRE, Fri Aug 15 19:05:26 2008 [Casa de Gatos]
106 *
107 * Purpose: Same as <p7_Forward() except that the full matrix isn't
108 * kept. Instead, a linear $O(M+L)$ memory algorithm is
109 * used, keeping only the DP matrix values for the special
110 * (BENCJ) states. These are sufficient to do posterior
111 * decoding to identify high-probability regions where
112 * domains are.
113 *
114 * The caller must provide a suitably allocated "parsing"
115 * <ox> by calling <ox = p7_omx_Create(M, 0, L)> or
116 * <p7_omx_GrowTo(ox, M, 0, L)>.
117 *
118 * Args: dsq - digital target sequence, 1..L
119 * L - length of dsq in residues
120 * om - optimized profile
121 * ox - RETURN: Forward DP matrix
122 * ret_sc - RETURN: Forward score (in nats)
123 *
124 * Returns: <eslOK> on success.
125 *
126 * Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
127 * isn't in local alignment mode.
128 * <eslERANGE> if the score exceeds the limited range of
129 * a probability-space odds ratio.
130 * In either case, <*opt_sc> is undefined.
131 */
132 int
p7_ForwardParser(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,P7_OMX * ox,float * opt_sc)133 p7_ForwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
134 {
135 #if eslDEBUGLEVEL > 0
136 if (om->M > ox->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
137 if (ox->validR < 1) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
138 if (L >= ox->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
139 if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
140 #endif
141
142 return forward_engine(FALSE, dsq, L, om, ox, opt_sc);
143 }
144
145
146
147 /* Function: p7_Backward()
148 * Synopsis: The Backward algorithm; full matrix fill version.
149 * Incept: SRE, Sat Aug 16 08:34:22 2008 [Janelia]
150 *
151 * Purpose: Calculates the Backward algorithm for sequence <dsq> of
152 * length <L> residues, using optimized profile <om>, and a
153 * preallocated DP matrix <bck>. A filled Forward matrix
154 * must also be provided in <fwd>, because we need to use
155 * the same sparse scaling factors that Forward used. The
156 * <bck> matrix is filled in, and the Backward score (in
157 * nats) is optionally returned in <opt_sc>.
158 *
159 * This calculation requires $O(ML)$ memory and time. The
160 * caller must provide a suitably allocated full <bck> by
161 * calling <bck = p7_omx_Create(M, L, L)> or
162 * <p7_omx_GrowTo(bck, M, L, L)>.
163 *
164 * Because only the sparse scaling factors are needed from
165 * the <fwd> matrix, the caller may have this matrix
166 * calculated either in full or parsing mode.
167 *
168 * The model <om> must be configured in local alignment
169 * mode. The sparse rescaling method used to keep
170 * probability values within single-precision floating
171 * point dynamic range cannot be safely applied to models in
172 * glocal or global mode.
173 *
174 * Args: dsq - digital target sequence, 1..L
175 * L - length of dsq in residues
176 * om - optimized profile
177 * fwd - filled Forward DP matrix, for scale factors
178 * do_full - TRUE=full matrix; FALSE=linear memory parse mode
179 * bck - RETURN: filled Backward matrix
180 * opt_sc - optRETURN: Backward score (in nats)
181 *
182 * Returns: <eslOK> on success.
183 *
184 * Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
185 * isn't in local alignment mode.
186 * <eslERANGE> if the score exceeds the limited range of
187 * a probability-space odds ratio.
188 * In either case, <*opt_sc> is undefined.
189 */
190 int
p7_Backward(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,const P7_OMX * fwd,P7_OMX * bck,float * opt_sc)191 p7_Backward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
192 {
193 #if eslDEBUGLEVEL > 0
194 if (om->M > bck->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
195 if (L >= bck->validR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
196 if (L >= bck->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
197 if (L != fwd->L) ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
198 if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
199 #endif
200
201 return backward_engine(TRUE, dsq, L, om, fwd, bck, opt_sc);
202 }
203
204
205
206 /* Function: p7_BackwardParser()
207 * Synopsis: The Backward algorithm, linear memory parsing version.
208 * Incept: SRE, Sat Aug 16 08:34:13 2008 [Janelia]
209 *
210 * Purpose: Same as <p7_Backward()> except that the full matrix isn't
211 * kept. Instead, a linear $O(M+L)$ memory algorithm is
212 * used, keeping only the DP matrix values for the special
213 * (BENCJ) states. These are sufficient to do posterior
214 * decoding to identify high-probability regions where
215 * domains are.
216 *
217 * The caller must provide a suitably allocated "parsing"
218 * <bck> by calling <bck = p7_omx_Create(M, 0, L)> or
219 * <p7_omx_GrowTo(bck, M, 0, L)>.
220 *
221 * Args: dsq - digital target sequence, 1..L
222 * L - length of dsq in residues
223 * om - optimized profile
224 * fwd - filled Forward DP matrix, for scale factors
225 * bck - RETURN: filled Backward matrix
226 * opt_sc - optRETURN: Backward score (in nats)
227 *
228 * Returns: <eslOK> on success.
229 *
230 * Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
231 * isn't in local alignment mode.
232 * <eslERANGE> if the score exceeds the limited range of
233 * a probability-space odds ratio.
234 * In either case, <*opt_sc> is undefined.
235 */
236 int
p7_BackwardParser(const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,const P7_OMX * fwd,P7_OMX * bck,float * opt_sc)237 p7_BackwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
238 {
239 #if eslDEBUGLEVEL > 0
240 if (om->M > bck->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
241 if (bck->validR < 1) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
242 if (L >= bck->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
243 if (L != fwd->L) ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
244 if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
245 #endif
246
247 return backward_engine(FALSE, dsq, L, om, fwd, bck, opt_sc);
248 }
249
250
251
252 /*****************************************************************
253 * 2. Forward/Backward engine implementations (called thru API)
254 *****************************************************************/
255
256 static int
forward_engine(int do_full,const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,P7_OMX * ox,float * opt_sc)257 forward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc)
258 {
259 vector float mpv, dpv, ipv; /* previous row values */
260 vector float sv; /* temp storage of 1 curr row value in progress */
261 vector float dcv; /* delayed storage of D(i,q+1) */
262 vector float xEv; /* E state: keeps max for Mk->E as we go */
263 vector float xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */
264 vector float zerov; /* splatted 0.0's in a vector */
265 float xN, xE, xB, xC, xJ; /* special states' scores */
266 int i; /* counter over sequence positions 1..L */
267 int q; /* counter over quads 0..nq-1 */
268 int j; /* counter over DD iterations (4 is full serialization) */
269 int Q = p7O_NQF(om->M); /* segment length: # of vectors */
270 vector float *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */
271 vector float *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */
272 vector float *rp; /* will point at om->rfv[x] for residue x[i] */
273 vector float *tp; /* will point into (and step thru) om->tfv */
274
275 /* Initialization. */
276 ox->M = om->M;
277 ox->L = L;
278 ox->has_own_scales = TRUE; /* all forward matrices control their own scalefactors */
279 zerov = (vector float) vec_splat_u32(0);
280 for (q = 0; q < Q; q++)
281 MMO(dpc,q) = IMO(dpc,q) = DMO(dpc,q) = zerov;
282 xE = ox->xmx[p7X_E] = 0.;
283 xN = ox->xmx[p7X_N] = 1.;
284 xJ = ox->xmx[p7X_J] = 0.;
285 xB = ox->xmx[p7X_B] = om->xf[p7O_N][p7O_MOVE];
286 xC = ox->xmx[p7X_C] = 0.;
287
288 ox->xmx[p7X_SCALE] = 1.0;
289 ox->totscale = 0.0;
290
291 #if eslDEBUGLEVEL > 0
292 if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, 0, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=0, width=8, precision=5*/
293 #endif
294
295 for (i = 1; i <= L; i++)
296 {
297 dpp = dpc;
298 dpc = ox->dpf[do_full * i]; /* avoid conditional, use do_full as kronecker delta */
299 rp = om->rfv[dsq[i]];
300 tp = om->tfv;
301 dcv = (vector float) vec_splat_u32(0);
302 xEv = (vector float) vec_splat_u32(0);
303 xBv = esl_vmx_set_float(xB);
304
305 /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. Shift zeros on. */
306 mpv = vec_sld(zerov, MMO(dpp,Q-1), 12);
307 dpv = vec_sld(zerov, DMO(dpp,Q-1), 12);
308 ipv = vec_sld(zerov, IMO(dpp,Q-1), 12);
309
310 for (q = 0; q < Q; q++)
311 {
312 /* Calculate new MMO(i,q); don't store it yet, hold it in sv. */
313 sv = (vector float) vec_splat_u32(0);
314 sv = vec_madd(xBv, *tp, sv); tp++;
315 sv = vec_madd(mpv, *tp, sv); tp++;
316 sv = vec_madd(ipv, *tp, sv); tp++;
317 sv = vec_madd(dpv, *tp, sv); tp++;
318 sv = vec_madd(sv, *rp, zerov); rp++;
319 xEv = vec_add(xEv, sv);
320
321 /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
322 * {MDI}MX(q) is then the current, not the prev row
323 */
324 mpv = MMO(dpp,q);
325 dpv = DMO(dpp,q);
326 ipv = IMO(dpp,q);
327
328 /* Do the delayed stores of {MD}(i,q) now that memory is usable */
329 MMO(dpc,q) = sv;
330 DMO(dpc,q) = dcv;
331
332 /* Calculate the next D(i,q+1) partially: M->D only;
333 * delay storage, holding it in dcv
334 */
335 dcv = vec_madd(sv, *tp, zerov); tp++;
336
337 /* Calculate and store I(i,q); assumes odds ratio for emission is 1.0 */
338 sv = vec_madd(mpv, *tp, zerov); tp++;
339 IMO(dpc,q) = vec_madd(ipv, *tp, sv); tp++;
340 }
341
342 /* Now the DD paths. We would rather not serialize them but
343 * in an accurate Forward calculation, we have few options.
344 */
345 /* dcv has carried through from end of q loop above; store it
346 * in first pass, we add M->D and D->D path into DMX
347 */
348 /* We're almost certainly're obligated to do at least one complete
349 * DD path to be sure:
350 */
351 dcv = vec_sld(zerov, dcv, 12);
352 DMO(dpc,0) = (vector float) vec_splat_u32(0);
353 tp = om->tfv + 7*Q; /* set tp to start of the DD's */
354 for (q = 0; q < Q; q++)
355 {
356 DMO(dpc,q) = vec_add(dcv, DMO(dpc,q));
357 dcv = vec_madd(DMO(dpc,q), *tp, zerov); tp++; /* extend DMO(q), so we include M->D and D->D paths */
358 }
359
360 /* now. on small models, it seems best (empirically) to just go
361 * ahead and serialize. on large models, we can do a bit better,
362 * by testing for when dcv (DD path) accrued to DMO(q) is below
363 * machine epsilon for all q, in which case we know DMO(q) are all
364 * at their final values. The tradeoff point is (empirically) somewhere around M=100,
365 * at least on my desktop. We don't worry about the conditional here;
366 * it's outside any inner loops.
367 */
368 if (om->M < 100)
369 { /* Fully serialized version */
370 for (j = 1; j < 4; j++)
371 {
372 dcv = vec_sld(zerov, dcv, 12);
373 tp = om->tfv + 7*Q; /* set tp to start of the DD's */
374 for (q = 0; q < Q; q++)
375 { /* note, extend dcv, not DMO(q); only adding DD paths now */
376 DMO(dpc,q) = vec_add(dcv, DMO(dpc,q));
377 dcv = vec_madd(dcv, *tp, zerov); tp++;
378 }
379 }
380 }
381 else
382 { /* Slightly parallelized version, but which incurs some overhead */
383 for (j = 1; j < 4; j++)
384 {
385 vector bool int cv; /* keeps track of whether any DD's change DMO(q) */
386
387 dcv = vec_sld(zerov, dcv, 12);
388 tp = om->tfv + 7*Q; /* set tp to start of the DD's */
389 cv = (vector bool int) vec_splat_u32(0);
390 for (q = 0; q < Q; q++)
391 { /* using cmpgt below tests if DD changed any DMO(q) *without* conditional branch */
392 sv = vec_add(dcv, DMO(dpc,q));
393 cv = vec_or(cv, vec_cmpgt(sv, DMO(dpc,q)));
394 DMO(dpc,q) = sv; /* store new DMO(q) */
395 dcv = vec_madd(dcv, *tp, zerov); tp++; /* note, extend dcv, not DMO(q) */
396 }
397 /* DD's didn't change any DMO(q)? Then done, break out. */
398 if (vec_all_eq(cv, (vector bool int)zerov)) break;
399 }
400 }
401
402 /* Add D's to xEv */
403 for (q = 0; q < Q; q++) xEv = vec_add(DMO(dpc,q), xEv);
404
405 /* Finally the "special" states, which start from Mk->E (->C, ->J->B) */
406 /* The following incantation is a horizontal sum of xEv's elements */
407 /* These must follow DD calculations, because D's contribute to E in Forward
408 * (as opposed to Viterbi)
409 */
410 xE = esl_vmx_hsum_float(xEv);
411
412 xN = xN * om->xf[p7O_N][p7O_LOOP];
413 xC = (xC * om->xf[p7O_C][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_MOVE]);
414 xJ = (xJ * om->xf[p7O_J][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_LOOP]);
415 xB = (xJ * om->xf[p7O_J][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_MOVE]);
416 /* and now xB will carry over into next i, and xC carries over after i=L */
417
418 /* Sparse rescaling. xE above threshold? trigger a rescaling event. */
419 if (xE > 1.0e4) /* that's a little less than e^10, ~10% of our dynamic range */
420 {
421 xN = xN / xE;
422 xC = xC / xE;
423 xJ = xJ / xE;
424 xB = xB / xE;
425 xEv = esl_vmx_set_float(1.0 / xE);
426 for (q = 0; q < Q; q++)
427 {
428 MMO(dpc,q) = vec_madd(MMO(dpc,q), xEv, zerov);
429 DMO(dpc,q) = vec_madd(DMO(dpc,q), xEv, zerov);
430 IMO(dpc,q) = vec_madd(IMO(dpc,q), xEv, zerov);
431 }
432 ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = xE;
433 ox->totscale += log(xE);
434 xE = 1.0;
435 }
436 else ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = 1.0;
437
438 /* Storage of the specials. We could've stored these already
439 * but using xE, etc. variables makes it easy to convert this
440 * code to O(M) memory versions just by deleting storage steps.
441 */
442 ox->xmx[i*p7X_NXCELLS+p7X_E] = xE;
443 ox->xmx[i*p7X_NXCELLS+p7X_N] = xN;
444 ox->xmx[i*p7X_NXCELLS+p7X_J] = xJ;
445 ox->xmx[i*p7X_NXCELLS+p7X_B] = xB;
446 ox->xmx[i*p7X_NXCELLS+p7X_C] = xC;
447
448 #if eslDEBUGLEVEL > 0
449 if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, i, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=8, precision=5*/
450 #endif
451 } /* end loop over sequence residues 1..L */
452
453 /* finally C->T, and flip total score back to log space (nats) */
454 /* On overflow, xC is inf or nan (nan arises because inf*0 = nan). */
455 /* On an underflow (which shouldn't happen), we counterintuitively return infinity:
456 * the effect of this is to force the caller to rescore us with full range.
457 */
458 if (isnan(xC)) ESL_EXCEPTION(eslERANGE, "forward score is NaN");
459 else if (L>0 && xC == 0.0) ESL_EXCEPTION(eslERANGE, "forward score underflow (is 0.0)"); /* [J5/118] */
460 else if (isinf(xC) == 1) ESL_EXCEPTION(eslERANGE, "forward score overflow (is infinity)");
461
462 if (opt_sc != NULL) *opt_sc = ox->totscale + log(xC * om->xf[p7O_C][p7O_MOVE]);
463 return eslOK;
464 }
465
466
467
468 static int
backward_engine(int do_full,const ESL_DSQ * dsq,int L,const P7_OPROFILE * om,const P7_OMX * fwd,P7_OMX * bck,float * opt_sc)469 backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc)
470 {
471 vector float mpv, ipv, dpv; /* previous row values */
472 vector float mcv, dcv; /* current row values */
473 vector float tmmv, timv, tdmv; /* tmp vars for accessing rotated transition scores */
474 vector float xBv; /* collects B->Mk components of B(i) */
475 vector float xEv; /* splatted E(i) */
476 vector float zerov; /* splatted 0.0's in a vector */
477 float xN, xE, xB, xC, xJ; /* special states' scores */
478 int i; /* counter over sequence positions 0,1..L */
479 int q; /* counter over quads 0..Q-1 */
480 int Q = p7O_NQF(om->M); /* segment length: # of vectors */
481 int j; /* DD segment iteration counter (4 = full serialization) */
482 vector float *dpc; /* current DP row */
483 vector float *dpp; /* next ("previous") DP row */
484 vector float *rp; /* will point into om->rfv[x] for residue x[i+1] */
485 vector float *tp; /* will point into (and step thru) om->tfv transition scores */
486
487 /* initialize the L row. */
488 bck->M = om->M;
489 bck->L = L;
490 bck->has_own_scales = FALSE; /* backwards scale factors are *usually* given by <fwd> */
491 dpc = bck->dpf[L * do_full];
492 xJ = 0.0;
493 xB = 0.0;
494 xN = 0.0;
495 xC = om->xf[p7O_C][p7O_MOVE]; /* C<-T */
496 xE = xC * om->xf[p7O_E][p7O_MOVE]; /* E<-C, no tail */
497 xEv = esl_vmx_set_float(xE);
498 zerov = (vector float) vec_splat_u32(0);
499 dcv = (vector float) vec_splat_u32(0);; /* solely to silence a compiler warning */
500 for (q = 0; q < Q; q++) MMO(dpc,q) = DMO(dpc,q) = xEv;
501 for (q = 0; q < Q; q++) IMO(dpc,q) = zerov;
502
503 /* init row L's DD paths, 1) first segment includes xE, from DMO(q) */
504 tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
505 dpv = vec_sld(DMO(dpc,Q-1), zerov, 4);
506 for (q = Q-1; q >= 1; q--)
507 {
508 DMO(dpc,q) = vec_madd(dpv, *tp, DMO(dpc,q)); tp--;
509 dpv = DMO(dpc,q);
510 }
511 dcv = vec_madd(dpv, *tp, zerov);
512 DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
513
514 /* 2) three more passes, only extending DD component (dcv only; no xE contrib from DMO(q)) */
515 for (j = 1; j < 4; j++)
516 {
517 tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
518 dcv = vec_sld(dcv, zerov, 4);
519 for (q = Q-1; q >= 0; q--)
520 {
521 dcv = vec_madd(dcv, *tp, zerov); tp--;
522 DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
523 }
524 }
525
526 /* now MD init */
527 tp = om->tfv + 7*Q - 3; /* <*tp> now the [4 8 12 x] Mk->Dk+1 quad */
528 dcv = vec_sld(DMO(dpc,0), zerov, 4);
529 for (q = Q-1; q >= 0; q--)
530 {
531 MMO(dpc,q) = vec_madd(dcv, *tp, MMO(dpc,q)); tp -= 7;
532 dcv = DMO(dpc,q);
533 }
534
535 /* Sparse rescaling: same scale factors as fwd matrix */
536 if (fwd->xmx[L*p7X_NXCELLS+p7X_SCALE] > 1.0)
537 {
538 xE = xE / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
539 xN = xN / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
540 xC = xC / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
541 xJ = xJ / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
542 xB = xB / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
543 xEv = esl_vmx_set_float(1.0 / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]);
544 for (q = 0; q < Q; q++) {
545 MMO(dpc,q) = vec_madd(MMO(dpc,q), xEv, zerov);
546 DMO(dpc,q) = vec_madd(DMO(dpc,q), xEv, zerov);
547 IMO(dpc,q) = vec_madd(IMO(dpc,q), xEv, zerov);
548 }
549 }
550 bck->xmx[L*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
551 bck->totscale = log(bck->xmx[L*p7X_NXCELLS+p7X_SCALE]);
552
553 /* Stores */
554 bck->xmx[L*p7X_NXCELLS+p7X_E] = xE;
555 bck->xmx[L*p7X_NXCELLS+p7X_N] = xN;
556 bck->xmx[L*p7X_NXCELLS+p7X_J] = xJ;
557 bck->xmx[L*p7X_NXCELLS+p7X_B] = xB;
558 bck->xmx[L*p7X_NXCELLS+p7X_C] = xC;
559
560 #if eslDEBUGLEVEL > 0
561 if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, L, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=L, width=9, precision=4*/
562 #endif
563
564 /* main recursion */
565 for (i = L-1; i >= 1; i--) /* backwards stride */
566 {
567 /* phase 1. B(i) collected. Old row destroyed, new row contains
568 * complete I(i,k), partial {MD}(i,k) w/ no {MD}->{DE} paths yet.
569 */
570 dpc = bck->dpf[i * do_full];
571 dpp = bck->dpf[(i+1) * do_full];
572 rp = om->rfv[dsq[i+1]] + Q-1; /* <*rp> is now the [4 8 12 x] match emission quad */
573 tp = om->tfv + 7*Q - 1; /* <*tp> is now the [4 8 12 x] TII transition quad */
574
575 /* leftshift the first transition quads */
576 tmmv = vec_sld(om->tfv[1], zerov, 4);
577 timv = vec_sld(om->tfv[2], zerov, 4);
578 tdmv = vec_sld(om->tfv[3], zerov, 4);
579
580 mpv = vec_madd(MMO(dpp,0), om->rfv[dsq[i+1]][0], zerov); /* precalc M(i+1,k+1)*e(M_k+1,x_{i+1}) */
581 mpv = vec_sld(mpv, zerov, 4);
582
583 xBv = zerov;
584 for (q = Q-1; q >= 0; q--) /* backwards stride */
585 {
586 vector float t1;
587
588 ipv = IMO(dpp,q); /* assumes emission odds ratio of 1.0; i+1's IMO(q) now free */
589 t1 = vec_madd(mpv, timv, zerov);
590 IMO(dpc,q) = vec_madd(ipv, *tp, t1); tp--;
591 DMO(dpc,q) = vec_madd(mpv, tdmv, zerov);
592 t1 = vec_madd(mpv, tmmv, zerov);
593 mcv = vec_madd(ipv, *tp, t1); tp -= 2;
594
595 /* obtain mpv for next q. i+1's MMO(q) is freed */
596 mpv = vec_madd(MMO(dpp,q), *rp, zerov); rp--;
597 MMO(dpc,q) = mcv;
598
599 tdmv = *tp; tp--;
600 timv = *tp; tp--;
601 tmmv = *tp; tp--;
602
603 xBv = vec_madd(mpv, *tp, xBv); tp--;
604 }
605
606 /* phase 2: now that we have accumulated the B->Mk transitions in xBv, we can do the specials */
607 xB = esl_vmx_hsum_float(xBv);
608
609 xC = xC * om->xf[p7O_C][p7O_LOOP];
610 xJ = (xB * om->xf[p7O_J][p7O_MOVE]) + (xJ * om->xf[p7O_J][p7O_LOOP]); /* must come after xB */
611 xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); /* must come after xB */
612 xE = (xC * om->xf[p7O_E][p7O_MOVE]) + (xJ * om->xf[p7O_E][p7O_LOOP]); /* must come after xJ, xC */
613 xEv = esl_vmx_set_float(xE); /* splat */
614
615
616 /* phase 3: {MD}->E paths and one step of the D->D paths */
617 tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
618 dpv = vec_add(DMO(dpc,0), xEv);
619 dpv = vec_sld(dpv, zerov, 4);
620 for (q = Q-1; q >= 1; q--)
621 {
622 dcv = vec_madd(dpv, *tp, xEv); tp--;
623 DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
624 dpv = DMO(dpc,q);
625 MMO(dpc,q) = vec_add(MMO(dpc,q), xEv);
626 }
627 dcv = vec_madd(dpv, *tp, zerov);
628 DMO(dpc,q) = vec_add(DMO(dpc,q), vec_add(dcv, xEv));
629 MMO(dpc,q) = vec_add(MMO(dpc,q), xEv);
630
631 /* phase 4: finish extending the DD paths */
632 /* fully serialized for now */
633 for (j = 1; j < 4; j++) /* three passes: we've already done 1 segment, we need 4 total */
634 {
635 dcv = vec_sld(dcv, zerov, 4);
636 tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
637 for (q = Q-1; q >= 0; q--)
638 {
639 dcv = vec_madd(dcv, *tp, zerov); tp--;
640 DMO(dpc,q) = vec_add(DMO(dpc,q), dcv);
641 }
642 }
643
644 /* phase 5: add M->D paths */
645 dcv = vec_sld(DMO(dpc,0), zerov, 4);
646 tp = om->tfv + 7*Q - 3; /* <*tp> is now the [4 8 12 x] Mk->Dk+1 quad */
647 for (q = Q-1; q >= 0; q--)
648 {
649 MMO(dpc,q) = vec_madd(dcv, *tp, MMO(dpc,q)); tp -= 7;
650 dcv = DMO(dpc,q);
651 }
652
653 /* Sparse rescaling */
654
655 /* In rare cases [J3/119] scale factors from <fwd> are
656 * insufficient and backwards will overflow. In this case, we
657 * switch on the fly to using our own scale factors, different
658 * from those in <fwd>. This will complicate subsequent
659 * posterior decoding routines.
660 */
661 if (xB > 1.0e16) bck->has_own_scales = TRUE;
662
663 if (bck->has_own_scales) bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = (xB > 1.0e4) ? xB : 1.0;
664 else bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[i*p7X_NXCELLS+p7X_SCALE];
665
666 if (bck->xmx[i*p7X_NXCELLS+p7X_SCALE] > 1.0)
667 {
668 xE /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
669 xN /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
670 xJ /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
671 xB /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
672 xC /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
673 xBv = esl_vmx_set_float(1.0 / bck->xmx[i*p7X_NXCELLS+p7X_SCALE]);
674 for (q = 0; q < Q; q++) {
675 MMO(dpc,q) = vec_madd(MMO(dpc,q), xBv, zerov);
676 DMO(dpc,q) = vec_madd(DMO(dpc,q), xBv, zerov);
677 IMO(dpc,q) = vec_madd(IMO(dpc,q), xBv, zerov);
678 }
679 bck->totscale += log(bck->xmx[i*p7X_NXCELLS+p7X_SCALE]);
680 }
681
682 /* Stores are separate only for pedagogical reasons: easy to
683 * turn this into a more memory efficient version just by
684 * deleting the stores.
685 */
686 bck->xmx[i*p7X_NXCELLS+p7X_E] = xE;
687 bck->xmx[i*p7X_NXCELLS+p7X_N] = xN;
688 bck->xmx[i*p7X_NXCELLS+p7X_J] = xJ;
689 bck->xmx[i*p7X_NXCELLS+p7X_B] = xB;
690 bck->xmx[i*p7X_NXCELLS+p7X_C] = xC;
691
692 #if eslDEBUGLEVEL > 0
693 if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, i, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=9, precision=4*/
694 #endif
695 } /* thus ends the loop over sequence positions i */
696
697 /* Termination at i=0, where we can only reach N,B states. */
698 dpp = bck->dpf[1 * do_full];
699 tp = om->tfv; /* <*tp> is now the [1 5 9 13] TBMk transition quad */
700 rp = om->rfv[dsq[1]]; /* <*rp> is now the [1 5 9 13] match emission quad */
701 xBv = (vector float) vec_splat_u32(0);
702 for (q = 0; q < Q; q++)
703 {
704 mpv = vec_madd(MMO(dpp,q), *rp, zerov); rp++;
705 xBv = vec_madd(mpv, *tp, xBv); tp += 7;
706 }
707 /* horizontal sum of xBv */
708 xB = esl_vmx_hsum_float(xBv);
709
710 xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]);
711
712 bck->xmx[p7X_B] = xB;
713 bck->xmx[p7X_C] = 0.0;
714 bck->xmx[p7X_J] = 0.0;
715 bck->xmx[p7X_N] = xN;
716 bck->xmx[p7X_E] = 0.0;
717 bck->xmx[p7X_SCALE] = 1.0;
718
719 #if eslDEBUGLEVEL > 0
720 dpc = bck->dpf[0];
721 for (q = 0; q < Q; q++) /* Not strictly necessary, but if someone's looking at DP matrices, this is nice to do: */
722 MMO(dpc,q) = DMO(dpc,q) = IMO(dpc,q) = zerov;
723 if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, 0, 9, 4, bck->xmx[p7X_E], bck->xmx[p7X_N], bck->xmx[p7X_J], bck->xmx[p7X_B], bck->xmx[p7X_C]); /* logify=TRUE, <rowi>=0, width=9, precision=4*/
724 #endif
725
726 if (isnan(xN)) ESL_EXCEPTION(eslERANGE, "backward score is NaN");
727 else if (L>0 && xN == 0.0) ESL_EXCEPTION(eslERANGE, "backward score underflow (is 0.0)"); /* [J5/118] */
728 else if (isinf(xN) == 1) ESL_EXCEPTION(eslERANGE, "backward score overflow (is infinity)");
729
730 if (opt_sc != NULL) *opt_sc = bck->totscale + log(xN);
731 return eslOK;
732 }
733 /*-------------- end, forward/backward engines -----------------*/
734
735
736
737
738
739 /*****************************************************************
740 * 4. Benchmark driver.
741 *****************************************************************/
742 #ifdef p7FWDBACK_BENCHMARK
743 /* -c, -x options are for debugging and testing: see fwdfilter.c for explanation */
744 /*
745 icc -O3 -static -o fwdback_benchmark -I.. -L.. -I../../easel -L../../easel -Dp7FWDBACK_BENCHMARK fwdback.c -lhmmer -leasel -lm
746
747 ./fwdback_benchmark <hmmfile> runs benchmark on both Forward and Backward parser
748 ./fwdback_benchmark -c -N100 <hmmfile> compare scores of VMX to generic impl
749 ./fwdback_benchmark -x -N100 <hmmfile> test that scores match trusted implementation.
750 */
751 #include "p7_config.h"
752
753 #include "easel.h"
754 #include "esl_alphabet.h"
755 #include "esl_getopts.h"
756 #include "esl_random.h"
757 #include "esl_randomseq.h"
758 #include "esl_stopwatch.h"
759
760 #include "hmmer.h"
761 #include "impl_vmx.h"
762
763 static ESL_OPTIONS options[] = {
764 /* name type default env range toggles reqs incomp help docgroup*/
765 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
766 { "-c", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-x", "compare scores to generic implementation (debug)", 0 },
767 { "-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
768 { "-x", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-c", "equate scores to trusted implementation (debug)", 0 },
769 { "-L", eslARG_INT, "400", NULL, "n>0", NULL, NULL, NULL, "length of random target seqs", 0 },
770 { "-N", eslARG_INT, "50000", NULL, "n>0", NULL, NULL, NULL, "number of random target seqs", 0 },
771 { "-F", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-B", "only benchmark Forward", 0 },
772 { "-B", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, "-F", "only benchmark Backward", 0 },
773 { "-P", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "benchmark parsing version, not full version", 0 },
774 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
775 };
776 static char usage[] = "[-options] <hmmfile>";
777 static char banner[] = "benchmark driver for Forward, Backward implementations";
778
779 int
main(int argc,char ** argv)780 main(int argc, char **argv)
781 {
782 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
783 char *hmmfile = esl_opt_GetArg(go, 1);
784 ESL_STOPWATCH *w = esl_stopwatch_Create();
785 ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
786 ESL_ALPHABET *abc = NULL;
787 P7_HMMFILE *hfp = NULL;
788 P7_HMM *hmm = NULL;
789 P7_BG *bg = NULL;
790 P7_PROFILE *gm = NULL;
791 P7_OPROFILE *om = NULL;
792 P7_GMX *gx = NULL;
793 P7_OMX *fwd = NULL;
794 P7_OMX *bck = NULL;
795 int L = esl_opt_GetInteger(go, "-L");
796 int N = esl_opt_GetInteger(go, "-N");
797 ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2));
798 int i;
799 float fsc, bsc;
800 float fsc2, bsc2;
801 double base_time, bench_time, Mcs;
802
803 p7_FLogsumInit();
804
805 if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
806 if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM");
807
808 bg = p7_bg_Create(abc);
809 p7_bg_SetLength(bg, L);
810 gm = p7_profile_Create(hmm->M, abc);
811 p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
812 om = p7_oprofile_Create(gm->M, abc);
813 p7_oprofile_Convert(gm, om);
814 p7_oprofile_ReconfigLength(om, L);
815
816 if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001)
817 p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode");
818
819 if (esl_opt_GetBoolean(go, "-P")) {
820 fwd = p7_omx_Create(gm->M, 0, L);
821 bck = p7_omx_Create(gm->M, 0, L);
822 } else {
823 fwd = p7_omx_Create(gm->M, L, L);
824 bck = p7_omx_Create(gm->M, L, L);
825 }
826 gx = p7_gmx_Create(gm->M, L);
827
828 /* Get a baseline time: how long it takes just to generate the sequences */
829 esl_stopwatch_Start(w);
830 for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
831 esl_stopwatch_Stop(w);
832 base_time = w->user;
833
834 esl_stopwatch_Start(w);
835 for (i = 0; i < N; i++)
836 {
837 esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
838 if (esl_opt_GetBoolean(go, "-P")) {
839 if (! esl_opt_GetBoolean(go, "-B")) p7_ForwardParser (dsq, L, om, fwd, &fsc);
840 if (! esl_opt_GetBoolean(go, "-F")) p7_BackwardParser(dsq, L, om, fwd, bck, &bsc);
841 } else {
842 if (! esl_opt_GetBoolean(go, "-B")) p7_Forward (dsq, L, om, fwd, &fsc);
843 if (! esl_opt_GetBoolean(go, "-F")) p7_Backward(dsq, L, om, fwd, bck, &bsc);
844 }
845
846 if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x"))
847 {
848 p7_GForward (dsq, L, gm, gx, &fsc2);
849 p7_GBackward(dsq, L, gm, gx, &bsc2);
850 printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2);
851 }
852 }
853 esl_stopwatch_Stop(w);
854 bench_time = w->user - base_time;
855 Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
856 esl_stopwatch_Display(stdout, w, "# CPU time: ");
857 printf("# M = %d\n", gm->M);
858 printf("# %.1f Mc/s\n", Mcs);
859
860 free(dsq);
861 p7_omx_Destroy(bck);
862 p7_omx_Destroy(fwd);
863 p7_gmx_Destroy(gx);
864 p7_oprofile_Destroy(om);
865 p7_profile_Destroy(gm);
866 p7_bg_Destroy(bg);
867 p7_hmm_Destroy(hmm);
868 p7_hmmfile_Close(hfp);
869 esl_alphabet_Destroy(abc);
870 esl_stopwatch_Destroy(w);
871 esl_randomness_Destroy(r);
872 esl_getopts_Destroy(go);
873 return 0;
874 }
875 #endif /*p7FWDBACK_BENCHMARK*/
876 /*------------------- end, benchmark driver ---------------------*/
877
878
879
880
881
882 /*****************************************************************
883 * 5. Unit tests.
884 *****************************************************************/
885 #ifdef p7FWDBACK_TESTDRIVE
886 #include "esl_random.h"
887 #include "esl_randomseq.h"
888
889 /*
890 * compare to GForward() scores.
891 */
892 static void
utest_fwdback(ESL_RANDOMNESS * r,ESL_ALPHABET * abc,P7_BG * bg,int M,int L,int N)893 utest_fwdback(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N)
894 {
895 char *msg = "forward/backward unit test failed";
896 P7_HMM *hmm = NULL;
897 P7_PROFILE *gm = NULL;
898 P7_OPROFILE *om = NULL;
899 ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2));
900 P7_OMX *fwd = p7_omx_Create(M, 0, L);
901 P7_OMX *bck = p7_omx_Create(M, 0, L);
902 P7_OMX *oxf = p7_omx_Create(M, L, L);
903 P7_OMX *oxb = p7_omx_Create(M, L, L);
904 P7_GMX *gx = p7_gmx_Create(M, L);
905 float tolerance;
906 float fsc1, fsc2;
907 float bsc1, bsc2;
908 float generic_sc;
909
910 p7_FLogsumInit();
911 if (p7_FLogsumError(-0.4, -0.5) > 0.0001) tolerance = 1.0; /* weaker test against GForward() */
912 else tolerance = 0.0001; /* stronger test: FLogsum() is in slow exact mode. */
913
914 p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om);
915 while (N--)
916 {
917 esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
918
919 p7_Forward (dsq, L, om, oxf, &fsc1);
920 p7_Backward (dsq, L, om, oxf, oxb, &bsc1);
921 p7_ForwardParser (dsq, L, om, fwd, &fsc2);
922 p7_BackwardParser(dsq, L, om, fwd, bck, &bsc2);
923 p7_GForward (dsq, L, gm, gx, &generic_sc);
924
925 /* Forward and Backward scores should agree with high tolerance */
926 if (fabs(fsc1-bsc1) > 0.0001) esl_fatal(msg);
927 if (fabs(fsc2-bsc2) > 0.0001) esl_fatal(msg);
928 if (fabs(fsc1-fsc2) > 0.0001) esl_fatal(msg);
929
930 /* GForward scores should approximate Forward scores,
931 * with tolerance that depends on how logsum.c was compiled
932 */
933 if (fabs(fsc1-generic_sc) > tolerance) esl_fatal(msg);
934 }
935
936 free(dsq);
937 p7_hmm_Destroy(hmm);
938 p7_omx_Destroy(oxb);
939 p7_omx_Destroy(oxf);
940 p7_omx_Destroy(bck);
941 p7_omx_Destroy(fwd);
942 p7_gmx_Destroy(gx);
943 p7_profile_Destroy(gm);
944 p7_oprofile_Destroy(om);
945 }
946 #endif /*p7FWDBACK_TESTDRIVE*/
947 /*---------------------- end, unit tests ------------------------*/
948
949
950
951
952 /*****************************************************************
953 * 6. Test driver
954 *****************************************************************/
955 #ifdef p7FWDBACK_TESTDRIVE
956 /*
957 gcc -g -Wall -maltivec -std=gnu99 -o fwdback_utest -I.. -L.. -I../../easel -L../../easel -Dp7FWDBACK_TESTDRIVE fwdback.c -lhmmer -leasel -lm
958 ./fwdback_utest
959 */
960 #include "p7_config.h"
961
962 #include "easel.h"
963 #include "esl_alphabet.h"
964 #include "esl_getopts.h"
965 #include "esl_random.h"
966
967 #include "hmmer.h"
968 #include "impl_vmx.h"
969
970 static ESL_OPTIONS options[] = {
971 /* name type default env range toggles reqs incomp help docgroup*/
972 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
973 { "-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
974 { "-L", eslARG_INT, "200", NULL, NULL, NULL, NULL, NULL, "size of random sequences to sample", 0 },
975 { "-M", eslARG_INT, "145", NULL, NULL, NULL, NULL, NULL, "size of random models to sample", 0 },
976 { "-N", eslARG_INT, "100", NULL, NULL, NULL, NULL, NULL, "number of random sequences to sample", 0 },
977 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
978 };
979 static char usage[] = "[-options]";
980 static char banner[] = "test driver for VMX Forward, Backward implementations";
981
982 int
main(int argc,char ** argv)983 main(int argc, char **argv)
984 {
985 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
986 ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
987 ESL_ALPHABET *abc = NULL;
988 P7_BG *bg = NULL;
989 int M = esl_opt_GetInteger(go, "-M");
990 int L = esl_opt_GetInteger(go, "-L");
991 int N = esl_opt_GetInteger(go, "-N");
992
993 /* First round of tests for DNA alphabets. */
994 if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet");
995 if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model");
996
997 utest_fwdback(r, abc, bg, M, L, N); /* normal sized models */
998 utest_fwdback(r, abc, bg, 1, L, 10); /* size 1 models */
999 utest_fwdback(r, abc, bg, M, 1, 10); /* size 1 sequences */
1000
1001 esl_alphabet_Destroy(abc);
1002 p7_bg_Destroy(bg);
1003
1004 /* Second round of tests for amino alphabets. */
1005 if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet");
1006 if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model");
1007
1008 utest_fwdback(r, abc, bg, M, L, N);
1009 utest_fwdback(r, abc, bg, 1, L, 10);
1010 utest_fwdback(r, abc, bg, M, 1, 10);
1011
1012 esl_alphabet_Destroy(abc);
1013 p7_bg_Destroy(bg);
1014
1015 esl_getopts_Destroy(go);
1016 esl_randomness_Destroy(r);
1017 return eslOK;
1018 }
1019 #endif /*p7FWDBACK_TESTDRIVE*/
1020 /*--------------------- end, test driver ------------------------*/
1021
1022
1023
1024 /*****************************************************************
1025 * 7. Example
1026 *****************************************************************/
1027 #ifdef p7FWDBACK_EXAMPLE
1028 /* Useful for debugging on small HMMs and sequences.
1029 *
1030 * Compares to GForward().
1031 *
1032 gcc -g -Wall -maltivec -std=gnu99 -o fwdback_example -I.. -L.. -I../../easel -L../../easel -Dp7FWDBACK_EXAMPLE fwdback.c -lhmmer -leasel -lm
1033 ./fwdback_example <hmmfile> <seqfile>
1034 */
1035 #include "p7_config.h"
1036
1037 #include "easel.h"
1038 #include "esl_alphabet.h"
1039 #include "esl_exponential.h"
1040 #include "esl_getopts.h"
1041 #include "esl_sq.h"
1042 #include "esl_sqio.h"
1043
1044 #include "hmmer.h"
1045 #include "impl_vmx.h"
1046
1047 static ESL_OPTIONS options[] = {
1048 /* name type default env range toggles reqs incomp help docgroup*/
1049 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
1050 { "-1", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "output in one line awkable format", 0 },
1051 { "-P", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "output in profmark format", 0 },
1052 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1053 };
1054 static char usage[] = "[-options] <hmmfile> <seqfile>";
1055 static char banner[] = "example of Forward/Backward (VMX versions)";
1056
1057 int
main(int argc,char ** argv)1058 main(int argc, char **argv)
1059 {
1060 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
1061 char *hmmfile = esl_opt_GetArg(go, 1);
1062 char *seqfile = esl_opt_GetArg(go, 2);
1063 ESL_ALPHABET *abc = NULL;
1064 P7_HMMFILE *hfp = NULL;
1065 P7_HMM *hmm = NULL;
1066 P7_BG *bg = NULL;
1067 P7_PROFILE *gm = NULL;
1068 P7_OPROFILE *om = NULL;
1069 P7_GMX *gx = NULL;
1070 P7_OMX *fwd = NULL;
1071 P7_OMX *bck = NULL;
1072 ESL_SQ *sq = NULL;
1073 ESL_SQFILE *sqfp = NULL;
1074 int format = eslSQFILE_UNKNOWN;
1075 float fraw, braw, nullsc, fsc;
1076 float gfraw, gbraw, gfsc;
1077 double P, gP;
1078 int status;
1079
1080 /* Read in one HMM */
1081 if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
1082 if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM");
1083
1084 /* Open sequence file for reading */
1085 sq = esl_sq_CreateDigital(abc);
1086 status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
1087 if (status == eslENOTFOUND) p7_Fail("No such file.");
1088 else if (status == eslEFORMAT) p7_Fail("Format unrecognized.");
1089 else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz.");
1090 else if (status != eslOK) p7_Fail("Open failed, code %d.", status);
1091
1092 /* create default null model, then create and optimize profile */
1093 bg = p7_bg_Create(abc);
1094 p7_bg_SetLength(bg, sq->n);
1095 gm = p7_profile_Create(hmm->M, abc);
1096 p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL);
1097 om = p7_oprofile_Create(gm->M, abc);
1098 p7_oprofile_Convert(gm, om);
1099
1100 /* p7_oprofile_Dump(stdout, om); */
1101
1102 /* allocate DP matrices for O(M+L) parsers */
1103 fwd = p7_omx_Create(gm->M, 0, sq->n);
1104 bck = p7_omx_Create(gm->M, 0, sq->n);
1105 gx = p7_gmx_Create(gm->M, sq->n);
1106
1107 /* allocate DP matrices for O(ML) fills */
1108 /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */
1109 /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */
1110
1111 /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */ /* makes the fast DP algorithms dump their matrices */
1112 /* p7_omx_SetDumpMode(stdout, bck, TRUE); */
1113
1114 while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
1115 {
1116 p7_oprofile_ReconfigLength(om, sq->n);
1117 p7_ReconfigLength(gm, sq->n);
1118 p7_bg_SetLength(bg, sq->n);
1119 p7_omx_GrowTo(fwd, om->M, 0, sq->n);
1120 p7_omx_GrowTo(bck, om->M, 0, sq->n);
1121 p7_gmx_GrowTo(gx, gm->M, sq->n);
1122
1123 p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc);
1124
1125 p7_ForwardParser (sq->dsq, sq->n, om, fwd, &fraw);
1126 p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw);
1127
1128 /* p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); printf("forward: %.2f nats\n", fsc); */
1129 /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc); printf("backward: %.2f nats\n", bsc); */
1130
1131 /* Comparison to other F/B implementations */
1132 p7_GForward (sq->dsq, sq->n, gm, gx, &gfraw);
1133 p7_GBackward (sq->dsq, sq->n, gm, gx, &gbraw);
1134
1135 /* p7_gmx_Dump(stdout, gx, p7_DEFAULT); */
1136
1137 fsc = (fraw-nullsc) / eslCONST_LOG2;
1138 gfsc = (gfraw-nullsc) / eslCONST_LOG2;
1139 P = esl_exp_surv(fsc, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]);
1140 gP = esl_exp_surv(gfsc, gm->evparam[p7_FTAU], gm->evparam[p7_FLAMBDA]);
1141
1142 if (esl_opt_GetBoolean(go, "-1"))
1143 {
1144 printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc);
1145 }
1146 else if (esl_opt_GetBoolean(go, "-P"))
1147 { /* output suitable for direct use in profmark benchmark postprocessors: */
1148 printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name);
1149 }
1150 else
1151 {
1152 printf("target sequence: %s\n", sq->name);
1153 printf("fwd filter raw score: %.2f nats\n", fraw);
1154 printf("bck filter raw score: %.2f nats\n", braw);
1155 printf("null score: %.2f nats\n", nullsc);
1156 printf("per-seq score: %.2f bits\n", fsc);
1157 printf("P-value: %g\n", P);
1158 printf("GForward raw score: %.2f nats\n", gfraw);
1159 printf("GBackward raw score: %.2f nats\n", gbraw);
1160 printf("GForward seq score: %.2f bits\n", gfsc);
1161 printf("GForward P-value: %g\n", gP);
1162 }
1163
1164 esl_sq_Reuse(sq);
1165 }
1166
1167 /* cleanup */
1168 esl_sq_Destroy(sq);
1169 esl_sqfile_Close(sqfp);
1170 p7_omx_Destroy(bck);
1171 p7_omx_Destroy(fwd);
1172 p7_gmx_Destroy(gx);
1173 p7_oprofile_Destroy(om);
1174 p7_profile_Destroy(gm);
1175 p7_bg_Destroy(bg);
1176 p7_hmm_Destroy(hmm);
1177 p7_hmmfile_Close(hfp);
1178 esl_alphabet_Destroy(abc);
1179 esl_getopts_Destroy(go);
1180 return 0;
1181 }
1182 #endif /*p7FWDBACK_EXAMPLE*/
1183
1184
1185