1 /* Viterbi algorithm; generic (non-SIMD) version.
2  *
3  * This implementation is modified from an optimized implementation
4  * contributed by Jeremy D. Buhler (Washington University in
5  * St. Louis).
6  *
7  * Relative to the implementation in HMMER2, Jeremy rearranged data
8  * structures to reduce the number of registers needed in the inner
9  * loop; eliminated branches from the inner loop by unrolling the Mth
10  * iteration in Viterbi and by replacing a bunch of "if" tests with
11  * MAX; and exposed opportunities for hoisting and strength reduction
12  * to the compiler. (The preceding sentence is nearly verbatim from
13  * Jeremy's notes.) I then uplifted the JB code to H3, most notably
14  * involving conversion from H2's scaled integers to H3's floating
15  * point calculations.
16  *
17  * Contents:
18  *    1. Viterbi implementation.
19  *    2. Benchmark driver.
20  *    3. Unit tests.
21  *    4. Test driver.
22  *    5. Example.
23  */
24 #include "p7_config.h"
25 
26 #include "easel.h"
27 #include "esl_alphabet.h"
28 #include "esl_gumbel.h"
29 
30 #include "hmmer.h"
31 
32 /*****************************************************************
33  * 1. Viterbi implementation.
34  *****************************************************************/
35 
36 /* Function:  p7_GViterbi()
37  * Synopsis:  The Viterbi algorithm.
38  * Incept:    SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis]
39  *
40  * Purpose:   The standard Viterbi dynamic programming algorithm.
41  *
42  *            Given a digital sequence <dsq> of length <L>, a profile
43  *            <gm>, and DP matrix <gx> allocated for at least <L>
44  *            by <gm->M> cells; calculate the maximum scoring path by
45  *            Viterbi; return the Viterbi score in <ret_sc>, and the
46  *            Viterbi matrix is in <gx>.
47  *
48  *            The caller may then retrieve the Viterbi path by calling
49  *            <p7_GTrace()>.
50  *
51  *            The Viterbi lod score is returned in nats. The caller
52  *            needs to subtract a null model lod score, then convert
53  *            to bits.
54  *
55  * Args:      dsq    - sequence in digitized form, 1..L
56  *            L      - length of dsq
57  *            gm     - profile.
58  *            gx     - DP matrix with room for an MxL alignment
59  *            opt_sc - optRETURN: Viterbi lod score in nats
60  *
61  * Return:   <eslOK> on success.
62  */
63 int
p7_GViterbi(const ESL_DSQ * dsq,int L,const P7_PROFILE * gm,P7_GMX * gx,float * opt_sc)64 p7_GViterbi(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc)
65 {
66   float const *tsc  = gm->tsc;
67   float      **dp   = gx->dp;
68   float       *xmx  = gx->xmx;
69   int          M    = gm->M;
70   int          i,k;
71   float        esc  = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;
72 
73   /* Initialization of the zero row.  */
74   XMX(0,p7G_N) = 0;                                           /* S->N, p=1            */
75   XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE];                    /* S->N->B, no N-tail   */
76   XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY;  /* need seq to get here */
77   for (k = 0; k <= gm->M; k++)
78     MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY;            /* need seq to get here */
79 
80   /* DP recursion */
81   for (i = 1; i <= L; i++)
82     {
83       float const *rsc = gm->rsc[dsq[i]];
84       float sc;
85 
86       MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
87       XMX(i,p7G_E) = -eslINFINITY;
88 
89       for (k = 1; k < gm->M; k++)
90 	{
91   	  /* match state */
92 	  sc       = ESL_MAX(    MMX(i-1,k-1)   + TSC(p7P_MM,k-1),
93 				 IMX(i-1,k-1)   + TSC(p7P_IM,k-1));
94 	  sc       = ESL_MAX(sc, DMX(i-1,k-1)   + TSC(p7P_DM,k-1));
95 	  sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1));
96 	  MMX(i,k) = sc + MSC(k);
97 
98 	  /* E state update */
99 	  XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc);
100 	  /* in Viterbi alignments, Dk->E can't win in local mode (and
101 	   * isn't possible in glocal mode), so don't bother
102 	   * looking. */
103 
104 	  /* insert state */
105 	  sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k),
106 		       IMX(i-1,k) + TSC(p7P_II,k));
107 	  IMX(i,k) = sc + ISC(k);
108 
109 	  /* delete state */
110 	  DMX(i,k) =  ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1),
111 			      DMX(i,k-1) + TSC(p7P_DD,k-1));
112 	}
113 
114       /* Unrolled match state M. */
115       sc       = ESL_MAX(    MMX(i-1,M-1)   + TSC(p7P_MM,M-1),
116 			     IMX(i-1,M-1)   + TSC(p7P_IM,M-1));
117       sc       = ESL_MAX(sc, DMX(i-1,M-1 )  + TSC(p7P_DM,M-1));
118       sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1));
119       MMX(i,M) = sc + MSC(M);
120 
121       /* Unrolled delete state D_M
122        * (Unlike internal Dk->E transitions that can never appear in
123        * Viterbi alignments, D_M->E is possible in glocal mode.)
124        */
125       DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1),
126 			 DMX(i,M-1) + TSC(p7P_DD,M-1));
127 
128       /* E state update; transition from M_M scores 0 by def'n */
129       sc  =          ESL_MAX(XMX(i,p7G_E), MMX(i,M));
130       XMX(i,p7G_E) = ESL_MAX(sc,           DMX(i,M));
131 
132       /* Now the special states. E must already be done, and B must follow N,J.
133        * remember, N, C and J emissions are zero score by definition.
134        */
135       /* J state */
136       sc           =             XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP];   /* J->J */
137       XMX(i,p7G_J) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP]);  /* E->J is E's "loop" */
138 
139       /* C state */
140       sc           =             XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
141       XMX(i,p7G_C) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);
142 
143       /* N state */
144       XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];
145 
146       /* B state */
147       sc           =             XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE];   /* N->B is N's move */
148       XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]);  /* J->B is J's move */
149     }
150 
151   /* T state (not stored) */
152   if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + gm->xsc[p7P_C][p7P_MOVE];
153   gx->M = gm->M;
154   gx->L = L;
155   return eslOK;
156 }
157 
158 
159 /*-------------------- end, viterbi -----------------------------*/
160 
161 
162 
163 /* Function:  p7_GViterbi_longtarget()
164  * Synopsis:  The Viterbi algorithm.
165  * Incept:    SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis]
166  *
167  * Purpose:   Given a digital sequence <dsq> of length <L>, a profile
168  *            <gm>, and DP matrix <gx> allocated for at least <L>
169  *            by <gm->M> cells; calculates the Viterbi score for
170  *            regions of <dsq>, and captures the positions at which
171  *            such regions exceed the score required to be
172  *            significant in the eyes of the calling function
173  *            (usually p=0.001).
174  *
175  * Args:      dsq    - sequence in digitized form, 1..L
176  *            L      - length of dsq
177  *            gm     - profile.
178  *            gx     - DP matrix with room for an MxL alignment
179  *            filtersc   - null or bias correction, required for translating a P-value threshold into a score threshold
180  *            P       - p-value below which a region is captured as being above threshold
181  *            windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas
182  *
183  * Return:   <eslOK> on success.
184  */
185 int
p7_GViterbi_longtarget(const ESL_DSQ * dsq,int L,const P7_PROFILE * gm,P7_GMX * gx,float filtersc,double P,P7_HMM_WINDOWLIST * windowlist)186 p7_GViterbi_longtarget(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx,
187                        float filtersc, double P, P7_HMM_WINDOWLIST *windowlist)
188 {
189   float const *tsc  = gm->tsc;
190   float      **dp   = gx->dp;
191   float       *xmx  = gx->xmx;
192   int          M    = gm->M;
193   int          i,k;
194   float        esc  = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;
195 
196   int16_t sc_thresh;
197   float invP;
198 
199   /* Initialization of the zero row.  */
200   XMX(0,p7G_N) = 0;                                           /* S->N, p=1            */
201   XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE];                    /* S->N->B, no N-tail   */
202   XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY;  /* need seq to get here */
203   for (k = 0; k <= gm->M; k++)
204     MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY;            /* need seq to get here */
205 
206 
207   /*
208    *  In p7_ViterbiFilter, converting from a scaled int Viterbi score
209    *  S (aka xE the score getting to state E) to a probability
210    *  goes like this:
211    *    S =  XMX(i,p7G_E)
212    *    vsc =  S + gm->xsc[p7P_E][p7P_MOVE] +  gm->xsc[p7P_C][p7P_MOVE];
213    *    P  = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2  ,  gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA]);
214    *  and we're computing the threshold vsc, so invert it:
215    *    (vsc - filtersc) /  eslCONST_LOG2 = esl_gumbel_invsurv( P, gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA])
216    *    vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA])
217    *    S = vsc - gm->xsc[p7P_E][p7P_MOVE] -  gm->xsc[p7P_C][p7P_MOVE]
218    */
219     invP = esl_gumbel_invsurv(P, gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA]);
220     sc_thresh =   (int) ceil (filtersc + (eslCONST_LOG2 * invP)
221                   - gm->xsc[p7P_E][p7P_MOVE] -  gm->xsc[p7P_C][p7P_MOVE] );
222 
223 
224   /* DP recursion */
225   for (i = 1; i <= L; i++)
226   {
227       float const *rsc = gm->rsc[dsq[i]];
228       float sc;
229 
230       MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
231       XMX(i,p7G_E) = -eslINFINITY;
232 
233       for (k = 1; k < gm->M; k++)
234       {
235           /* match state */
236         sc       = ESL_MAX(    MMX(i-1,k-1)   + TSC(p7P_MM,k-1),
237              IMX(i-1,k-1)   + TSC(p7P_IM,k-1));
238         sc       = ESL_MAX(sc, DMX(i-1,k-1)   + TSC(p7P_DM,k-1));
239         sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1));
240         MMX(i,k) = sc + MSC(k);
241 
242         /* E state update */
243         XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc);
244         /* in Viterbi alignments, Dk->E can't win in local mode (and
245          * isn't possible in glocal mode), so don't bother
246          * looking. */
247 
248         /* insert state */
249         sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k),
250                IMX(i-1,k) + TSC(p7P_II,k));
251         IMX(i,k) = sc + ISC(k);
252 
253         /* delete state */
254         DMX(i,k) =  ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1),
255                 DMX(i,k-1) + TSC(p7P_DD,k-1));
256       }
257 
258       /* Unrolled match state M. */
259       sc       = ESL_MAX(    MMX(i-1,M-1)   + TSC(p7P_MM,M-1),
260            IMX(i-1,M-1)   + TSC(p7P_IM,M-1));
261       sc       = ESL_MAX(sc, DMX(i-1,M-1 )  + TSC(p7P_DM,M-1));
262       sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1));
263       MMX(i,M) = sc + MSC(M);
264 
265       /* Unrolled delete state D_M
266        * (Unlike internal Dk->E transitions that can never appear in
267        * Viterbi alignments, D_M->E is possible in glocal mode.)
268        */
269       DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1),
270        DMX(i,M-1) + TSC(p7P_DD,M-1));
271 
272       /* E state update; transition from M_M scores 0 by def'n */
273       sc  =          ESL_MAX(XMX(i,p7G_E), MMX(i,M));
274       XMX(i,p7G_E) = ESL_MAX(sc,           DMX(i,M));
275 
276 
277       if (XMX(i,p7G_E) >= sc_thresh) {
278         //hit score threshold. Add a window to the list, then reset scores.
279 
280         for (k = 1; k <= gm->M; k++) {
281           if (MMX(i,k) == XMX(i,p7G_E)) {
282             p7_hmmwindow_new(windowlist, 0, i, 0, k, 1, 0.0, p7_NOCOMPLEMENT, L);
283           }
284           MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
285         }
286       } else {
287 
288         /* Now the special states. E must already be done, and B must follow N,J.
289          * remember, N, C and J emissions are zero score by definition.
290          */
291         /* J state */
292         sc           =             XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP];   /* J->J */
293         XMX(i,p7G_J) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP]);  /* E->J is E's "loop" */
294 
295         /* C state */
296         sc           =             XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
297         XMX(i,p7G_C) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);
298 
299         /* N state */
300         XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];
301 
302         /* B state */
303         sc           =             XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE];   /* N->B is N's move */
304         XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]);  /* J->B is J's move */
305 
306       }
307    }
308 
309   /* T state (not stored) */
310   gx->M = gm->M;
311   gx->L = L;
312   return eslOK;
313 }
314 
315 /*-------------------- end, p7_GViterbi_longtarget -----------------------------*/
316 
317 /*****************************************************************
318  * 2. Benchmark driver.
319  *****************************************************************/
320 #ifdef p7GENERIC_VITERBI_BENCHMARK
321 /*
322    gcc -g -O2      -o generic_viterbi_benchmark -I. -L. -I../easel -L../easel -Dp7GENERIC_VITERBI_BENCHMARK generic_viterbi.c -lhmmer -leasel -lm
323    icc -O3 -static -o generic_viterbi_benchmark -I. -L. -I../easel -L../easel -Dp7GENERIC_VITERBI_BENCHMARK generic_viterbi.c -lhmmer -leasel -lm
324    ./benchmark-generic-viterbi <hmmfile>
325  */
326 /* As of Fri Dec 28 14:48:39 2007
327  *    Viterbi  = 61.8 Mc/s
328  *    Forward  =  8.6 Mc/s
329  *   Backward  =  7.1 Mc/s
330  *        MSV  = 55.9 Mc/s
331  * (gcc -g -O2, 3.2GHz Xeon, N=50K, L=400, M=72 RRM_1 model)
332  */
333 #include "p7_config.h"
334 
335 #include "easel.h"
336 #include "esl_alphabet.h"
337 #include "esl_getopts.h"
338 #include "esl_random.h"
339 #include "esl_randomseq.h"
340 #include "esl_stopwatch.h"
341 
342 #include "hmmer.h"
343 
344 static ESL_OPTIONS options[] = {
345   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
346   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",           0 },
347   { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                  0 },
348   { "-L",        eslARG_INT,    "400", NULL, "n>0", NULL,  NULL, NULL, "length of random target seqs",                   0 },
349   { "-N",        eslARG_INT,  "20000", NULL, "n>0", NULL,  NULL, NULL, "number of random target seqs",                   0 },
350   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
351 };
352 static char usage[]  = "[-options] <hmmfile>";
353 static char banner[] = "benchmark driver for generic Viterbi";
354 
355 int
main(int argc,char ** argv)356 main(int argc, char **argv)
357 {
358   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
359   char           *hmmfile = esl_opt_GetArg(go, 1);
360   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
361   ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
362   ESL_ALPHABET   *abc     = NULL;
363   P7_HMMFILE     *hfp     = NULL;
364   P7_HMM         *hmm     = NULL;
365   P7_BG          *bg      = NULL;
366   P7_PROFILE     *gm      = NULL;
367   P7_GMX         *gx      = NULL;
368   int             L       = esl_opt_GetInteger(go, "-L");
369   int             N       = esl_opt_GetInteger(go, "-N");
370   ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
371   int             i;
372   float           sc;
373   double          base_time, bench_time, Mcs;
374 
375   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
376   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
377 
378   bg = p7_bg_Create(abc);
379   p7_bg_SetLength(bg, L);
380   gm = p7_profile_Create(hmm->M, abc);
381   p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL);
382   gx = p7_gmx_Create(gm->M, L);
383 
384   /* Baseline time. */
385   esl_stopwatch_Start(w);
386   for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
387   esl_stopwatch_Stop(w);
388   base_time = w->user;
389 
390   /* Benchmark time. */
391   esl_stopwatch_Start(w);
392   for (i = 0; i < N; i++)
393     {
394       esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
395       p7_GViterbi     (dsq, L, gm, gx, &sc);
396     }
397   esl_stopwatch_Stop(w);
398   bench_time = w->user - base_time;
399   Mcs        = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
400   esl_stopwatch_Display(stdout, w, "# CPU time: ");
401   printf("# M    = %d\n",   gm->M);
402   printf("# %.1f Mc/s\n", Mcs);
403 
404 
405   free(dsq);
406   p7_gmx_Destroy(gx);
407   p7_profile_Destroy(gm);
408   p7_bg_Destroy(bg);
409   p7_hmm_Destroy(hmm);
410   p7_hmmfile_Close(hfp);
411   esl_alphabet_Destroy(abc);
412   esl_stopwatch_Destroy(w);
413   esl_randomness_Destroy(r);
414   esl_getopts_Destroy(go);
415   return 0;
416 }
417 #endif /*p7GENERIC_VITERBI_BENCHMARK*/
418 /*----------------- end, benchmark ------------------------------*/
419 
420 
421 
422 /*****************************************************************
423  * 3. Unit tests
424  *****************************************************************/
425 #ifdef p7GENERIC_VITERBI_TESTDRIVE
426 #include <string.h>
427 #include "esl_getopts.h"
428 #include "esl_msa.h"
429 #include "esl_msafile.h"
430 #include "esl_random.h"
431 #include "esl_randomseq.h"
432 
433 /* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence,
434  * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging.
435  */
436 static void
utest_basic(ESL_GETOPTS * go)437 utest_basic(ESL_GETOPTS *go)
438 {
439   char           *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n";
440   int             fmt  = eslMSAFILE_STOCKHOLM;
441   char           *targ = "GAATTC";
442   ESL_ALPHABET   *abc  = NULL;
443   ESL_MSA        *msa  = NULL;
444   P7_HMM         *hmm  = NULL;
445   P7_PROFILE     *gm   = NULL;
446   P7_BG          *bg   = NULL;
447   P7_PRIOR       *pri  = NULL;
448   ESL_DSQ        *dsq  = NULL;
449   P7_GMX         *gx   = NULL;
450   P7_TRACE        *tr  = NULL;
451   int             L    = strlen(targ);
452   float           vsc, vsc2, fsc;
453 
454   if ((abc = esl_alphabet_Create(eslDNA))          == NULL)  esl_fatal("failed to create alphabet");
455   if ((pri = p7_prior_CreateNucleic())             == NULL)  esl_fatal("failed to create prior");
456   if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL)  esl_fatal("failed to create MSA");
457   if (esl_msa_Digitize(abc, msa, NULL)             != eslOK) esl_fatal("failed to digitize MSA");
458   if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model");
459   if (p7_ParameterEstimation(hmm, pri)             != eslOK) esl_fatal("failed to parameterize GAATTC model");
460   if (p7_hmm_SetConsensus(hmm, NULL)               != eslOK) esl_fatal("failed to make consensus");
461   if ((bg = p7_bg_Create(abc))                     == NULL)  esl_fatal("failed to create DNA null model");
462   if ((gm = p7_profile_Create(hmm->M, abc))        == NULL)  esl_fatal("failed to create GAATTC profile");
463   if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile");
464   if (p7_profile_Validate(gm, NULL, 0.0001)        != eslOK) esl_fatal("whoops, profile is bad!");
465   if (esl_abc_CreateDsq(abc, targ, &dsq)           != eslOK) esl_fatal("failed to create GAATTC digital sequence");
466   if ((gx = p7_gmx_Create(gm->M, L))               == NULL)  esl_fatal("failed to create DP matrix");
467   if ((tr = p7_trace_Create())                     == NULL)  esl_fatal("trace creation failed");
468 
469   p7_GViterbi   (dsq, L, gm, gx, &vsc);
470   if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc);
471   if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);
472 
473   p7_GTrace     (dsq, L, gm, gx, tr);
474   p7_trace_Score(tr, dsq, gm, &vsc2);
475   if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq);
476 
477   if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK)  esl_fatal("trace score and Viterbi score don't agree.");
478 
479   p7_GForward   (dsq, L, gm, gx, &fsc);
480   if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc);
481   if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);
482 
483   p7_trace_Destroy(tr);
484   p7_gmx_Destroy(gx);
485   free(dsq);
486   p7_profile_Destroy(gm);
487   p7_bg_Destroy(bg);
488   p7_hmm_Destroy(hmm);
489   esl_msa_Destroy(msa);
490   p7_prior_Destroy(pri);
491   esl_alphabet_Destroy(abc);
492   return;
493 }
494 
495 /* Viterbi validation is done by comparing the returned score
496  * to the score of the optimal trace. Not foolproof, but catches
497  * many kinds of errors.
498  *
499  * Another check is that the average score should be <= 0,
500  * since the random sequences are drawn from the null model.
501  */
502 static void
utest_viterbi(ESL_GETOPTS * go,ESL_RANDOMNESS * r,ESL_ALPHABET * abc,P7_BG * bg,P7_PROFILE * gm,int nseq,int L)503 utest_viterbi(ESL_GETOPTS *go, ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, P7_PROFILE *gm, int nseq, int L)
504 {
505   float     avg_sc = 0.;
506   char      errbuf[eslERRBUFSIZE];
507   ESL_DSQ  *dsq = NULL;
508   P7_GMX   *gx  = NULL;
509   P7_TRACE *tr  = NULL;
510   int       idx;
511   float     sc1, sc2;
512 
513   if ((dsq    = malloc(sizeof(ESL_DSQ) *(L+2))) == NULL)  esl_fatal("malloc failed");
514   if ((tr     = p7_trace_Create())              == NULL)  esl_fatal("trace creation failed");
515   if ((gx     = p7_gmx_Create(gm->M, L))        == NULL)  esl_fatal("matrix creation failed");
516 
517   for (idx = 0; idx < nseq; idx++)
518     {
519       if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal("seq generation failed");
520       if (p7_GViterbi(dsq, L, gm, gx, &sc1)       != eslOK) esl_fatal("viterbi failed");
521       if (p7_GTrace  (dsq, L, gm, gx, tr)         != eslOK) esl_fatal("trace failed");
522       if (p7_trace_Validate(tr, abc, dsq, errbuf) != eslOK) esl_fatal("trace invalid:\n%s", errbuf);
523       if (p7_trace_Score(tr, dsq, gm, &sc2)       != eslOK) esl_fatal("trace score failed");
524       if (esl_FCompare(sc1, sc2, 1e-6)            != eslOK) esl_fatal("Trace score != Viterbi score");
525       if (p7_bg_NullOne(bg, dsq, L, &sc2)         != eslOK) esl_fatal("null score failed");
526 
527       avg_sc += (sc1 - sc2);
528 
529       if (esl_opt_GetBoolean(go, "--vv"))
530 	printf("utest_viterbi: Viterbi score: %.4f (null %.4f) (total so far: %.4f)\n", sc1, sc2, avg_sc);
531 
532       p7_trace_Reuse(tr);
533     }
534 
535   avg_sc /= (float) nseq;
536   if (avg_sc > 0.) esl_fatal("Viterbi scores have positive expectation (%f nats)", avg_sc);
537 
538   p7_gmx_Destroy(gx);
539   p7_trace_Destroy(tr);
540   free(dsq);
541   return;
542 }
543 
544 #endif /*p7GENERIC_VITERBI_TESTDRIVE*/
545 /*----------------- end, unit tests -----------------------------*/
546 
547 
548 /*****************************************************************
549  * 4. Test driver.
550  *****************************************************************/
551 /* gcc -g -Wall -Dp7GENERIC_VITERBI_TESTDRIVE -I. -I../easel -L. -L../easel -o generic_viterbi_utest generic_viterbi.c -lhmmer -leasel -lm
552  */
553 #ifdef p7GENERIC_VITERBI_TESTDRIVE
554 #include "easel.h"
555 #include "esl_getopts.h"
556 #include "esl_msa.h"
557 
558 #include "p7_config.h"
559 #include "hmmer.h"
560 
561 static ESL_OPTIONS options[] = {
562   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
563   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",           0 },
564   { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                  0 },
565   { "-v",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "be verbose",                                     0 },
566   { "--vv",      eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "be very verbose",                                0 },
567   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
568 };
569 static char usage[]  = "[-options]";
570 static char banner[] = "unit test driver for the generic Viterbi implementation";
571 
572 int
main(int argc,char ** argv)573 main(int argc, char **argv)
574 {
575   ESL_GETOPTS    *go    = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
576   ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
577   ESL_ALPHABET   *abc  = NULL;
578   P7_HMM         *hmm  = NULL;
579   P7_PROFILE     *gm   = NULL;
580   P7_BG          *bg   = NULL;
581   int             M    = 100;
582   int             L    = 200;
583   int             nseq = 20;
584   char            errbuf[eslERRBUFSIZE];
585 
586   if ((abc = esl_alphabet_Create(eslAMINO))         == NULL)  esl_fatal("failed to create alphabet");
587   if (p7_hmm_Sample(r, M, abc, &hmm)                != eslOK) esl_fatal("failed to sample an HMM");
588   if ((bg = p7_bg_Create(abc))                      == NULL)  esl_fatal("failed to create null model");
589   if ((gm = p7_profile_Create(hmm->M, abc))         == NULL)  esl_fatal("failed to create profile");
590   if (p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)    != eslOK) esl_fatal("failed to config profile");
591   if (p7_hmm_Validate    (hmm, errbuf, 0.0001)      != eslOK) esl_fatal("whoops, HMM is bad!: %s", errbuf);
592   if (p7_profile_Validate(gm,  errbuf, 0.0001)      != eslOK) esl_fatal("whoops, profile is bad!: %s", errbuf);
593 
594   utest_basic  (go);
595   utest_viterbi(go, r, abc, bg, gm, nseq, L);
596 
597   p7_profile_Destroy(gm);
598   p7_bg_Destroy(bg);
599   p7_hmm_Destroy(hmm);
600   esl_alphabet_Destroy(abc);
601   esl_randomness_Destroy(r);
602   esl_getopts_Destroy(go);
603   return 0;
604 }
605 #endif /*p7GENERIC_VITERBI_TESTDRIVE*/
606 /*-------------------- end, test driver -------------------------*/
607 
608 
609 
610 
611 /*****************************************************************
612  * 5. Example
613  *****************************************************************/
614 /* This is essentially identical to the vtrace example. */
615 #ifdef p7GENERIC_VITERBI_EXAMPLE
616 /*
617    gcc -g -O2 -Dp7GENERIC_VITERBI_EXAMPLE -I. -I../easel -L. -L../easel -o generic_viterbi_example generic_viterbi.c -lhmmer -leasel -lm
618  */
619 #include "p7_config.h"
620 
621 #include "easel.h"
622 #include "esl_alphabet.h"
623 #include "esl_getopts.h"
624 #include "esl_sq.h"
625 #include "esl_sqio.h"
626 
627 #include "hmmer.h"
628 
629 static ESL_OPTIONS options[] = {
630   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
631   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
632   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
633 };
634 static char usage[]  = "[-options] <hmmfile> <seqfile>";
635 static char banner[] = "example of generic Viterbi";
636 
637 
638 int
main(int argc,char ** argv)639 main(int argc, char **argv)
640 {
641   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
642   char           *hmmfile = esl_opt_GetArg(go, 1);
643   char           *seqfile = esl_opt_GetArg(go, 2);
644   ESL_ALPHABET   *abc     = NULL;
645   P7_HMMFILE     *hfp     = NULL;
646   P7_HMM         *hmm     = NULL;
647   P7_BG          *bg      = NULL;
648   P7_PROFILE     *gm      = NULL;
649   P7_GMX         *fwd     = NULL;
650   ESL_SQ         *sq      = NULL;
651   ESL_SQFILE     *sqfp    = NULL;
652   P7_TRACE       *tr      = NULL;
653   int             format  = eslSQFILE_UNKNOWN;
654   char            errbuf[eslERRBUFSIZE];
655   float           sc;
656   int             d;
657   int             status;
658 
659   /* Read in one HMM */
660   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
661   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
662   p7_hmmfile_Close(hfp);
663 
664   /* Read in one sequence */
665   sq     = esl_sq_CreateDigital(abc);
666   status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
667   if      (status == eslENOTFOUND) p7_Fail("No such file.");
668   else if (status == eslEFORMAT)   p7_Fail("Format unrecognized.");
669   else if (status == eslEINVAL)    p7_Fail("Can't autodetect stdin or .gz.");
670   else if (status != eslOK)        p7_Fail("Open failed, code %d.", status);
671   if  (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence");
672   esl_sqfile_Close(sqfp);
673 
674   /* Configure a profile from the HMM */
675   bg = p7_bg_Create(abc);
676   p7_bg_SetLength(bg, sq->n);
677   gm = p7_profile_Create(hmm->M, abc);
678   p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);
679 
680   /* Allocate matrix and a trace */
681   fwd = p7_gmx_Create(gm->M, sq->n);
682   tr  = p7_trace_Create();
683 
684   /* Run Viterbi; do traceback */
685   p7_GViterbi (sq->dsq, sq->n, gm, fwd, &sc);
686   p7_GTrace   (sq->dsq, sq->n, gm, fwd, tr);
687 
688   /* Dump and validate the trace. */
689   p7_trace_Dump(stdout, tr, gm, sq->dsq);
690   if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf);
691 
692   /* Domain info in the trace. */
693   p7_trace_Index(tr);
694   printf("# Viterbi: %d domains : ", tr->ndom);
695   for (d = 0; d < tr->ndom; d++) printf("%6d %6d %6d %6d  ", tr->sqfrom[d], tr->sqto[d], tr->hmmfrom[d], tr->hmmto[d]);
696   printf("\n");
697 
698   /* Cleanup */
699   p7_trace_Destroy(tr);
700   p7_gmx_Destroy(fwd);
701   p7_profile_Destroy(gm);
702   p7_bg_Destroy(bg);
703   p7_hmm_Destroy(hmm);
704   esl_sq_Destroy(sq);
705   esl_alphabet_Destroy(abc);
706   esl_getopts_Destroy(go);
707   return 0;
708 }
709 #endif /*p7GENERIC_VITERBI_EXAMPLE*/
710 /*-------------------- end, example -----------------------------*/
711 
712 
713