1 /* Viterbi algorithm; generic (non-SIMD) version.
2 *
3 * This implementation is modified from an optimized implementation
4 * contributed by Jeremy D. Buhler (Washington University in
5 * St. Louis).
6 *
7 * Relative to the implementation in HMMER2, Jeremy rearranged data
8 * structures to reduce the number of registers needed in the inner
9 * loop; eliminated branches from the inner loop by unrolling the Mth
10 * iteration in Viterbi and by replacing a bunch of "if" tests with
11 * MAX; and exposed opportunities for hoisting and strength reduction
12 * to the compiler. (The preceding sentence is nearly verbatim from
13 * Jeremy's notes.) I then uplifted the JB code to H3, most notably
14 * involving conversion from H2's scaled integers to H3's floating
15 * point calculations.
16 *
17 * Contents:
18 * 1. Viterbi implementation.
19 * 2. Benchmark driver.
20 * 3. Unit tests.
21 * 4. Test driver.
22 * 5. Example.
23 */
24 #include "p7_config.h"
25
26 #include "easel.h"
27 #include "esl_alphabet.h"
28 #include "esl_gumbel.h"
29
30 #include "hmmer.h"
31
32 /*****************************************************************
33 * 1. Viterbi implementation.
34 *****************************************************************/
35
36 /* Function: p7_GViterbi()
37 * Synopsis: The Viterbi algorithm.
38 * Incept: SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis]
39 *
40 * Purpose: The standard Viterbi dynamic programming algorithm.
41 *
42 * Given a digital sequence <dsq> of length <L>, a profile
43 * <gm>, and DP matrix <gx> allocated for at least <L>
44 * by <gm->M> cells; calculate the maximum scoring path by
45 * Viterbi; return the Viterbi score in <ret_sc>, and the
46 * Viterbi matrix is in <gx>.
47 *
48 * The caller may then retrieve the Viterbi path by calling
49 * <p7_GTrace()>.
50 *
51 * The Viterbi lod score is returned in nats. The caller
52 * needs to subtract a null model lod score, then convert
53 * to bits.
54 *
55 * Args: dsq - sequence in digitized form, 1..L
56 * L - length of dsq
57 * gm - profile.
58 * gx - DP matrix with room for an MxL alignment
59 * opt_sc - optRETURN: Viterbi lod score in nats
60 *
61 * Return: <eslOK> on success.
62 */
63 int
p7_GViterbi(const ESL_DSQ * dsq,int L,const P7_PROFILE * gm,P7_GMX * gx,float * opt_sc)64 p7_GViterbi(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc)
65 {
66 float const *tsc = gm->tsc;
67 float **dp = gx->dp;
68 float *xmx = gx->xmx;
69 int M = gm->M;
70 int i,k;
71 float esc = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;
72
73 /* Initialization of the zero row. */
74 XMX(0,p7G_N) = 0; /* S->N, p=1 */
75 XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE]; /* S->N->B, no N-tail */
76 XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY; /* need seq to get here */
77 for (k = 0; k <= gm->M; k++)
78 MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY; /* need seq to get here */
79
80 /* DP recursion */
81 for (i = 1; i <= L; i++)
82 {
83 float const *rsc = gm->rsc[dsq[i]];
84 float sc;
85
86 MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
87 XMX(i,p7G_E) = -eslINFINITY;
88
89 for (k = 1; k < gm->M; k++)
90 {
91 /* match state */
92 sc = ESL_MAX( MMX(i-1,k-1) + TSC(p7P_MM,k-1),
93 IMX(i-1,k-1) + TSC(p7P_IM,k-1));
94 sc = ESL_MAX(sc, DMX(i-1,k-1) + TSC(p7P_DM,k-1));
95 sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1));
96 MMX(i,k) = sc + MSC(k);
97
98 /* E state update */
99 XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc);
100 /* in Viterbi alignments, Dk->E can't win in local mode (and
101 * isn't possible in glocal mode), so don't bother
102 * looking. */
103
104 /* insert state */
105 sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k),
106 IMX(i-1,k) + TSC(p7P_II,k));
107 IMX(i,k) = sc + ISC(k);
108
109 /* delete state */
110 DMX(i,k) = ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1),
111 DMX(i,k-1) + TSC(p7P_DD,k-1));
112 }
113
114 /* Unrolled match state M. */
115 sc = ESL_MAX( MMX(i-1,M-1) + TSC(p7P_MM,M-1),
116 IMX(i-1,M-1) + TSC(p7P_IM,M-1));
117 sc = ESL_MAX(sc, DMX(i-1,M-1 ) + TSC(p7P_DM,M-1));
118 sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1));
119 MMX(i,M) = sc + MSC(M);
120
121 /* Unrolled delete state D_M
122 * (Unlike internal Dk->E transitions that can never appear in
123 * Viterbi alignments, D_M->E is possible in glocal mode.)
124 */
125 DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1),
126 DMX(i,M-1) + TSC(p7P_DD,M-1));
127
128 /* E state update; transition from M_M scores 0 by def'n */
129 sc = ESL_MAX(XMX(i,p7G_E), MMX(i,M));
130 XMX(i,p7G_E) = ESL_MAX(sc, DMX(i,M));
131
132 /* Now the special states. E must already be done, and B must follow N,J.
133 * remember, N, C and J emissions are zero score by definition.
134 */
135 /* J state */
136 sc = XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP]; /* J->J */
137 XMX(i,p7G_J) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP]); /* E->J is E's "loop" */
138
139 /* C state */
140 sc = XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
141 XMX(i,p7G_C) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);
142
143 /* N state */
144 XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];
145
146 /* B state */
147 sc = XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE]; /* N->B is N's move */
148 XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]); /* J->B is J's move */
149 }
150
151 /* T state (not stored) */
152 if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + gm->xsc[p7P_C][p7P_MOVE];
153 gx->M = gm->M;
154 gx->L = L;
155 return eslOK;
156 }
157
158
159 /*-------------------- end, viterbi -----------------------------*/
160
161
162
163 /* Function: p7_GViterbi_longtarget()
164 * Synopsis: The Viterbi algorithm.
165 * Incept: SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis]
166 *
167 * Purpose: Given a digital sequence <dsq> of length <L>, a profile
168 * <gm>, and DP matrix <gx> allocated for at least <L>
169 * by <gm->M> cells; calculates the Viterbi score for
170 * regions of <dsq>, and captures the positions at which
171 * such regions exceed the score required to be
172 * significant in the eyes of the calling function
173 * (usually p=0.001).
174 *
175 * Args: dsq - sequence in digitized form, 1..L
176 * L - length of dsq
177 * gm - profile.
178 * gx - DP matrix with room for an MxL alignment
179 * filtersc - null or bias correction, required for translating a P-value threshold into a score threshold
180 * P - p-value below which a region is captured as being above threshold
181 * windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas
182 *
183 * Return: <eslOK> on success.
184 */
185 int
p7_GViterbi_longtarget(const ESL_DSQ * dsq,int L,const P7_PROFILE * gm,P7_GMX * gx,float filtersc,double P,P7_HMM_WINDOWLIST * windowlist)186 p7_GViterbi_longtarget(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx,
187 float filtersc, double P, P7_HMM_WINDOWLIST *windowlist)
188 {
189 float const *tsc = gm->tsc;
190 float **dp = gx->dp;
191 float *xmx = gx->xmx;
192 int M = gm->M;
193 int i,k;
194 float esc = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;
195
196 int16_t sc_thresh;
197 float invP;
198
199 /* Initialization of the zero row. */
200 XMX(0,p7G_N) = 0; /* S->N, p=1 */
201 XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE]; /* S->N->B, no N-tail */
202 XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY; /* need seq to get here */
203 for (k = 0; k <= gm->M; k++)
204 MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY; /* need seq to get here */
205
206
207 /*
208 * In p7_ViterbiFilter, converting from a scaled int Viterbi score
209 * S (aka xE the score getting to state E) to a probability
210 * goes like this:
211 * S = XMX(i,p7G_E)
212 * vsc = S + gm->xsc[p7P_E][p7P_MOVE] + gm->xsc[p7P_C][p7P_MOVE];
213 * P = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2 , gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]);
214 * and we're computing the threshold vsc, so invert it:
215 * (vsc - filtersc) / eslCONST_LOG2 = esl_gumbel_invsurv( P, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA])
216 * vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA])
217 * S = vsc - gm->xsc[p7P_E][p7P_MOVE] - gm->xsc[p7P_C][p7P_MOVE]
218 */
219 invP = esl_gumbel_invsurv(P, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]);
220 sc_thresh = (int) ceil (filtersc + (eslCONST_LOG2 * invP)
221 - gm->xsc[p7P_E][p7P_MOVE] - gm->xsc[p7P_C][p7P_MOVE] );
222
223
224 /* DP recursion */
225 for (i = 1; i <= L; i++)
226 {
227 float const *rsc = gm->rsc[dsq[i]];
228 float sc;
229
230 MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
231 XMX(i,p7G_E) = -eslINFINITY;
232
233 for (k = 1; k < gm->M; k++)
234 {
235 /* match state */
236 sc = ESL_MAX( MMX(i-1,k-1) + TSC(p7P_MM,k-1),
237 IMX(i-1,k-1) + TSC(p7P_IM,k-1));
238 sc = ESL_MAX(sc, DMX(i-1,k-1) + TSC(p7P_DM,k-1));
239 sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1));
240 MMX(i,k) = sc + MSC(k);
241
242 /* E state update */
243 XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc);
244 /* in Viterbi alignments, Dk->E can't win in local mode (and
245 * isn't possible in glocal mode), so don't bother
246 * looking. */
247
248 /* insert state */
249 sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k),
250 IMX(i-1,k) + TSC(p7P_II,k));
251 IMX(i,k) = sc + ISC(k);
252
253 /* delete state */
254 DMX(i,k) = ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1),
255 DMX(i,k-1) + TSC(p7P_DD,k-1));
256 }
257
258 /* Unrolled match state M. */
259 sc = ESL_MAX( MMX(i-1,M-1) + TSC(p7P_MM,M-1),
260 IMX(i-1,M-1) + TSC(p7P_IM,M-1));
261 sc = ESL_MAX(sc, DMX(i-1,M-1 ) + TSC(p7P_DM,M-1));
262 sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1));
263 MMX(i,M) = sc + MSC(M);
264
265 /* Unrolled delete state D_M
266 * (Unlike internal Dk->E transitions that can never appear in
267 * Viterbi alignments, D_M->E is possible in glocal mode.)
268 */
269 DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1),
270 DMX(i,M-1) + TSC(p7P_DD,M-1));
271
272 /* E state update; transition from M_M scores 0 by def'n */
273 sc = ESL_MAX(XMX(i,p7G_E), MMX(i,M));
274 XMX(i,p7G_E) = ESL_MAX(sc, DMX(i,M));
275
276
277 if (XMX(i,p7G_E) >= sc_thresh) {
278 //hit score threshold. Add a window to the list, then reset scores.
279
280 for (k = 1; k <= gm->M; k++) {
281 if (MMX(i,k) == XMX(i,p7G_E)) {
282 p7_hmmwindow_new(windowlist, 0, i, 0, k, 1, 0.0, p7_NOCOMPLEMENT, L);
283 }
284 MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
285 }
286 } else {
287
288 /* Now the special states. E must already be done, and B must follow N,J.
289 * remember, N, C and J emissions are zero score by definition.
290 */
291 /* J state */
292 sc = XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP]; /* J->J */
293 XMX(i,p7G_J) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP]); /* E->J is E's "loop" */
294
295 /* C state */
296 sc = XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
297 XMX(i,p7G_C) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);
298
299 /* N state */
300 XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];
301
302 /* B state */
303 sc = XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE]; /* N->B is N's move */
304 XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]); /* J->B is J's move */
305
306 }
307 }
308
309 /* T state (not stored) */
310 gx->M = gm->M;
311 gx->L = L;
312 return eslOK;
313 }
314
315 /*-------------------- end, p7_GViterbi_longtarget -----------------------------*/
316
317 /*****************************************************************
318 * 2. Benchmark driver.
319 *****************************************************************/
320 #ifdef p7GENERIC_VITERBI_BENCHMARK
321 /*
322 gcc -g -O2 -o generic_viterbi_benchmark -I. -L. -I../easel -L../easel -Dp7GENERIC_VITERBI_BENCHMARK generic_viterbi.c -lhmmer -leasel -lm
323 icc -O3 -static -o generic_viterbi_benchmark -I. -L. -I../easel -L../easel -Dp7GENERIC_VITERBI_BENCHMARK generic_viterbi.c -lhmmer -leasel -lm
324 ./benchmark-generic-viterbi <hmmfile>
325 */
326 /* As of Fri Dec 28 14:48:39 2007
327 * Viterbi = 61.8 Mc/s
328 * Forward = 8.6 Mc/s
329 * Backward = 7.1 Mc/s
330 * MSV = 55.9 Mc/s
331 * (gcc -g -O2, 3.2GHz Xeon, N=50K, L=400, M=72 RRM_1 model)
332 */
333 #include "p7_config.h"
334
335 #include "easel.h"
336 #include "esl_alphabet.h"
337 #include "esl_getopts.h"
338 #include "esl_random.h"
339 #include "esl_randomseq.h"
340 #include "esl_stopwatch.h"
341
342 #include "hmmer.h"
343
344 static ESL_OPTIONS options[] = {
345 /* name type default env range toggles reqs incomp help docgroup*/
346 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
347 { "-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
348 { "-L", eslARG_INT, "400", NULL, "n>0", NULL, NULL, NULL, "length of random target seqs", 0 },
349 { "-N", eslARG_INT, "20000", NULL, "n>0", NULL, NULL, NULL, "number of random target seqs", 0 },
350 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
351 };
352 static char usage[] = "[-options] <hmmfile>";
353 static char banner[] = "benchmark driver for generic Viterbi";
354
355 int
main(int argc,char ** argv)356 main(int argc, char **argv)
357 {
358 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
359 char *hmmfile = esl_opt_GetArg(go, 1);
360 ESL_STOPWATCH *w = esl_stopwatch_Create();
361 ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
362 ESL_ALPHABET *abc = NULL;
363 P7_HMMFILE *hfp = NULL;
364 P7_HMM *hmm = NULL;
365 P7_BG *bg = NULL;
366 P7_PROFILE *gm = NULL;
367 P7_GMX *gx = NULL;
368 int L = esl_opt_GetInteger(go, "-L");
369 int N = esl_opt_GetInteger(go, "-N");
370 ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2));
371 int i;
372 float sc;
373 double base_time, bench_time, Mcs;
374
375 if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
376 if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM");
377
378 bg = p7_bg_Create(abc);
379 p7_bg_SetLength(bg, L);
380 gm = p7_profile_Create(hmm->M, abc);
381 p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL);
382 gx = p7_gmx_Create(gm->M, L);
383
384 /* Baseline time. */
385 esl_stopwatch_Start(w);
386 for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
387 esl_stopwatch_Stop(w);
388 base_time = w->user;
389
390 /* Benchmark time. */
391 esl_stopwatch_Start(w);
392 for (i = 0; i < N; i++)
393 {
394 esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
395 p7_GViterbi (dsq, L, gm, gx, &sc);
396 }
397 esl_stopwatch_Stop(w);
398 bench_time = w->user - base_time;
399 Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time;
400 esl_stopwatch_Display(stdout, w, "# CPU time: ");
401 printf("# M = %d\n", gm->M);
402 printf("# %.1f Mc/s\n", Mcs);
403
404
405 free(dsq);
406 p7_gmx_Destroy(gx);
407 p7_profile_Destroy(gm);
408 p7_bg_Destroy(bg);
409 p7_hmm_Destroy(hmm);
410 p7_hmmfile_Close(hfp);
411 esl_alphabet_Destroy(abc);
412 esl_stopwatch_Destroy(w);
413 esl_randomness_Destroy(r);
414 esl_getopts_Destroy(go);
415 return 0;
416 }
417 #endif /*p7GENERIC_VITERBI_BENCHMARK*/
418 /*----------------- end, benchmark ------------------------------*/
419
420
421
422 /*****************************************************************
423 * 3. Unit tests
424 *****************************************************************/
425 #ifdef p7GENERIC_VITERBI_TESTDRIVE
426 #include <string.h>
427 #include "esl_getopts.h"
428 #include "esl_msa.h"
429 #include "esl_msafile.h"
430 #include "esl_random.h"
431 #include "esl_randomseq.h"
432
433 /* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence,
434 * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging.
435 */
436 static void
utest_basic(ESL_GETOPTS * go)437 utest_basic(ESL_GETOPTS *go)
438 {
439 char *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n";
440 int fmt = eslMSAFILE_STOCKHOLM;
441 char *targ = "GAATTC";
442 ESL_ALPHABET *abc = NULL;
443 ESL_MSA *msa = NULL;
444 P7_HMM *hmm = NULL;
445 P7_PROFILE *gm = NULL;
446 P7_BG *bg = NULL;
447 P7_PRIOR *pri = NULL;
448 ESL_DSQ *dsq = NULL;
449 P7_GMX *gx = NULL;
450 P7_TRACE *tr = NULL;
451 int L = strlen(targ);
452 float vsc, vsc2, fsc;
453
454 if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet");
455 if ((pri = p7_prior_CreateNucleic()) == NULL) esl_fatal("failed to create prior");
456 if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL) esl_fatal("failed to create MSA");
457 if (esl_msa_Digitize(abc, msa, NULL) != eslOK) esl_fatal("failed to digitize MSA");
458 if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model");
459 if (p7_ParameterEstimation(hmm, pri) != eslOK) esl_fatal("failed to parameterize GAATTC model");
460 if (p7_hmm_SetConsensus(hmm, NULL) != eslOK) esl_fatal("failed to make consensus");
461 if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create DNA null model");
462 if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create GAATTC profile");
463 if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile");
464 if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!");
465 if (esl_abc_CreateDsq(abc, targ, &dsq) != eslOK) esl_fatal("failed to create GAATTC digital sequence");
466 if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("failed to create DP matrix");
467 if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed");
468
469 p7_GViterbi (dsq, L, gm, gx, &vsc);
470 if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc);
471 if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);
472
473 p7_GTrace (dsq, L, gm, gx, tr);
474 p7_trace_Score(tr, dsq, gm, &vsc2);
475 if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq);
476
477 if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK) esl_fatal("trace score and Viterbi score don't agree.");
478
479 p7_GForward (dsq, L, gm, gx, &fsc);
480 if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc);
481 if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);
482
483 p7_trace_Destroy(tr);
484 p7_gmx_Destroy(gx);
485 free(dsq);
486 p7_profile_Destroy(gm);
487 p7_bg_Destroy(bg);
488 p7_hmm_Destroy(hmm);
489 esl_msa_Destroy(msa);
490 p7_prior_Destroy(pri);
491 esl_alphabet_Destroy(abc);
492 return;
493 }
494
495 /* Viterbi validation is done by comparing the returned score
496 * to the score of the optimal trace. Not foolproof, but catches
497 * many kinds of errors.
498 *
499 * Another check is that the average score should be <= 0,
500 * since the random sequences are drawn from the null model.
501 */
502 static void
utest_viterbi(ESL_GETOPTS * go,ESL_RANDOMNESS * r,ESL_ALPHABET * abc,P7_BG * bg,P7_PROFILE * gm,int nseq,int L)503 utest_viterbi(ESL_GETOPTS *go, ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, P7_PROFILE *gm, int nseq, int L)
504 {
505 float avg_sc = 0.;
506 char errbuf[eslERRBUFSIZE];
507 ESL_DSQ *dsq = NULL;
508 P7_GMX *gx = NULL;
509 P7_TRACE *tr = NULL;
510 int idx;
511 float sc1, sc2;
512
513 if ((dsq = malloc(sizeof(ESL_DSQ) *(L+2))) == NULL) esl_fatal("malloc failed");
514 if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed");
515 if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("matrix creation failed");
516
517 for (idx = 0; idx < nseq; idx++)
518 {
519 if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal("seq generation failed");
520 if (p7_GViterbi(dsq, L, gm, gx, &sc1) != eslOK) esl_fatal("viterbi failed");
521 if (p7_GTrace (dsq, L, gm, gx, tr) != eslOK) esl_fatal("trace failed");
522 if (p7_trace_Validate(tr, abc, dsq, errbuf) != eslOK) esl_fatal("trace invalid:\n%s", errbuf);
523 if (p7_trace_Score(tr, dsq, gm, &sc2) != eslOK) esl_fatal("trace score failed");
524 if (esl_FCompare(sc1, sc2, 1e-6) != eslOK) esl_fatal("Trace score != Viterbi score");
525 if (p7_bg_NullOne(bg, dsq, L, &sc2) != eslOK) esl_fatal("null score failed");
526
527 avg_sc += (sc1 - sc2);
528
529 if (esl_opt_GetBoolean(go, "--vv"))
530 printf("utest_viterbi: Viterbi score: %.4f (null %.4f) (total so far: %.4f)\n", sc1, sc2, avg_sc);
531
532 p7_trace_Reuse(tr);
533 }
534
535 avg_sc /= (float) nseq;
536 if (avg_sc > 0.) esl_fatal("Viterbi scores have positive expectation (%f nats)", avg_sc);
537
538 p7_gmx_Destroy(gx);
539 p7_trace_Destroy(tr);
540 free(dsq);
541 return;
542 }
543
544 #endif /*p7GENERIC_VITERBI_TESTDRIVE*/
545 /*----------------- end, unit tests -----------------------------*/
546
547
548 /*****************************************************************
549 * 4. Test driver.
550 *****************************************************************/
551 /* gcc -g -Wall -Dp7GENERIC_VITERBI_TESTDRIVE -I. -I../easel -L. -L../easel -o generic_viterbi_utest generic_viterbi.c -lhmmer -leasel -lm
552 */
553 #ifdef p7GENERIC_VITERBI_TESTDRIVE
554 #include "easel.h"
555 #include "esl_getopts.h"
556 #include "esl_msa.h"
557
558 #include "p7_config.h"
559 #include "hmmer.h"
560
561 static ESL_OPTIONS options[] = {
562 /* name type default env range toggles reqs incomp help docgroup*/
563 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
564 { "-s", eslARG_INT, "42", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
565 { "-v", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "be verbose", 0 },
566 { "--vv", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "be very verbose", 0 },
567 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
568 };
569 static char usage[] = "[-options]";
570 static char banner[] = "unit test driver for the generic Viterbi implementation";
571
572 int
main(int argc,char ** argv)573 main(int argc, char **argv)
574 {
575 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
576 ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
577 ESL_ALPHABET *abc = NULL;
578 P7_HMM *hmm = NULL;
579 P7_PROFILE *gm = NULL;
580 P7_BG *bg = NULL;
581 int M = 100;
582 int L = 200;
583 int nseq = 20;
584 char errbuf[eslERRBUFSIZE];
585
586 if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create alphabet");
587 if (p7_hmm_Sample(r, M, abc, &hmm) != eslOK) esl_fatal("failed to sample an HMM");
588 if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create null model");
589 if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create profile");
590 if (p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL) != eslOK) esl_fatal("failed to config profile");
591 if (p7_hmm_Validate (hmm, errbuf, 0.0001) != eslOK) esl_fatal("whoops, HMM is bad!: %s", errbuf);
592 if (p7_profile_Validate(gm, errbuf, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!: %s", errbuf);
593
594 utest_basic (go);
595 utest_viterbi(go, r, abc, bg, gm, nseq, L);
596
597 p7_profile_Destroy(gm);
598 p7_bg_Destroy(bg);
599 p7_hmm_Destroy(hmm);
600 esl_alphabet_Destroy(abc);
601 esl_randomness_Destroy(r);
602 esl_getopts_Destroy(go);
603 return 0;
604 }
605 #endif /*p7GENERIC_VITERBI_TESTDRIVE*/
606 /*-------------------- end, test driver -------------------------*/
607
608
609
610
611 /*****************************************************************
612 * 5. Example
613 *****************************************************************/
614 /* This is essentially identical to the vtrace example. */
615 #ifdef p7GENERIC_VITERBI_EXAMPLE
616 /*
617 gcc -g -O2 -Dp7GENERIC_VITERBI_EXAMPLE -I. -I../easel -L. -L../easel -o generic_viterbi_example generic_viterbi.c -lhmmer -leasel -lm
618 */
619 #include "p7_config.h"
620
621 #include "easel.h"
622 #include "esl_alphabet.h"
623 #include "esl_getopts.h"
624 #include "esl_sq.h"
625 #include "esl_sqio.h"
626
627 #include "hmmer.h"
628
629 static ESL_OPTIONS options[] = {
630 /* name type default env range toggles reqs incomp help docgroup*/
631 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
632 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
633 };
634 static char usage[] = "[-options] <hmmfile> <seqfile>";
635 static char banner[] = "example of generic Viterbi";
636
637
638 int
main(int argc,char ** argv)639 main(int argc, char **argv)
640 {
641 ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
642 char *hmmfile = esl_opt_GetArg(go, 1);
643 char *seqfile = esl_opt_GetArg(go, 2);
644 ESL_ALPHABET *abc = NULL;
645 P7_HMMFILE *hfp = NULL;
646 P7_HMM *hmm = NULL;
647 P7_BG *bg = NULL;
648 P7_PROFILE *gm = NULL;
649 P7_GMX *fwd = NULL;
650 ESL_SQ *sq = NULL;
651 ESL_SQFILE *sqfp = NULL;
652 P7_TRACE *tr = NULL;
653 int format = eslSQFILE_UNKNOWN;
654 char errbuf[eslERRBUFSIZE];
655 float sc;
656 int d;
657 int status;
658
659 /* Read in one HMM */
660 if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
661 if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM");
662 p7_hmmfile_Close(hfp);
663
664 /* Read in one sequence */
665 sq = esl_sq_CreateDigital(abc);
666 status = esl_sqfile_Open(seqfile, format, NULL, &sqfp);
667 if (status == eslENOTFOUND) p7_Fail("No such file.");
668 else if (status == eslEFORMAT) p7_Fail("Format unrecognized.");
669 else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz.");
670 else if (status != eslOK) p7_Fail("Open failed, code %d.", status);
671 if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence");
672 esl_sqfile_Close(sqfp);
673
674 /* Configure a profile from the HMM */
675 bg = p7_bg_Create(abc);
676 p7_bg_SetLength(bg, sq->n);
677 gm = p7_profile_Create(hmm->M, abc);
678 p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);
679
680 /* Allocate matrix and a trace */
681 fwd = p7_gmx_Create(gm->M, sq->n);
682 tr = p7_trace_Create();
683
684 /* Run Viterbi; do traceback */
685 p7_GViterbi (sq->dsq, sq->n, gm, fwd, &sc);
686 p7_GTrace (sq->dsq, sq->n, gm, fwd, tr);
687
688 /* Dump and validate the trace. */
689 p7_trace_Dump(stdout, tr, gm, sq->dsq);
690 if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf);
691
692 /* Domain info in the trace. */
693 p7_trace_Index(tr);
694 printf("# Viterbi: %d domains : ", tr->ndom);
695 for (d = 0; d < tr->ndom; d++) printf("%6d %6d %6d %6d ", tr->sqfrom[d], tr->sqto[d], tr->hmmfrom[d], tr->hmmto[d]);
696 printf("\n");
697
698 /* Cleanup */
699 p7_trace_Destroy(tr);
700 p7_gmx_Destroy(fwd);
701 p7_profile_Destroy(gm);
702 p7_bg_Destroy(bg);
703 p7_hmm_Destroy(hmm);
704 esl_sq_Destroy(sq);
705 esl_alphabet_Destroy(abc);
706 esl_getopts_Destroy(go);
707 return 0;
708 }
709 #endif /*p7GENERIC_VITERBI_EXAMPLE*/
710 /*-------------------- end, example -----------------------------*/
711
712
713