1 /* cm_dpalign.c
2  *
3  * DP functions for standard (non-truncated) HMM banded and
4  * non-banded, non-D&C CM alignment of a full target sequence.
5  *
6  * All functions use a DP matrix and or shadow matrix, either
7  * non-banded (CM_MX, CM_SHADOW_MX) or HMM banded (CM_HB_MX,
8  * CM_HB_SHADOW_MX).  The HMM banded matrices only have cells within
9  * bands allocated. The bands derived from a HMM Forward/Backward
10  * alignment of the target sequence and are stored in a CP9Bands_t
11  * object, a pointer to which must exist in the cm (CM_t object).
12  *
13  * The non-banded, non-D&C alignment functions are mainly useful for
14  * understanding and/or debugging the HMM banded versions.  These are
15  * consistent (same logic/code organization) with their HMM banded
16  * counterparts. They are memory intensive. For small memory
17  * non-banded alignment functions see cm_dpsmall.c. For truncated
18  * alignment functions (both non-banded and HMM banded) see
19  * cm_dpalign_trunc.c.
20  *
21  * List of functions:
22  * non-banded version        HMM banded version
23  * -----------------------   ------------------------
24  * cm_alignT()               cm_alignT_hb()
25  * cm_AlignSizeNeeded()      cm_AlignSizeNeededHB()
26  * cm_Align()                cm_AlignHB()
27  * cm_CYKInsideAlign()       cm_CYKInsideAlignHB()
28  * cm_InsideAlign()          cm_InsideAlignHB()
29  * cm_OptAccAlign()          cm_OptAccAlignHB()
30  * cm_CYKOutsideAlign()*     cm_CYKOutsideAlignHB()*
31  * cm_OutsideAlign()         cm_OutsideAlignHB()
32  * cm_Posterior()            cm_PosteriorHB()
33  *
34  * * cm_CYKOutsideAlign() and cm_CYKOutsideAlignHB() are for reference
35  * and debugging only they're not called by any of the main Infernal
36  * programs, only by test programs.
37  *
38  * EPN, Wed Sep 14 05:31:02 2011 Note: post version 1.0.2, the
39  * 'Fast'/'fast_' prefix was dropped from many of these functions and
40  * the cm_ prefix was added. Also 'optimal_accuracy' was shortened to
41  * 'optacc'. At the same time, CM_MX and CM_SHADOW_MX data structures
42  * were introduced to replace the multidimensional float/void arrays
43  * previously used in the non-banded functions.
44  *
45  * EPN, Thu Sep 29 10:01:48 2011 Note: post version 1.0.2, all
46  * functions were simplified to take the target sequence length L
47  * instead of start and end positions i0 and j0. Now, i0 is implicitly
48  * 1 and j0 is implicitly L. To align a subsequence i..j of a larger
49  * sequence the caller need only pass dsq+i as dsq and j-i+1 as L.
50  * The old method of passing i0 and j0 is leftover from the D&C
51  * functions in cm_dpsmall.c upon which many of the functions here
52  * were based.
53  *
54  * EPN, Thu Sep 29 10:44:19 2011
55  */
56 
57 #include "esl_config.h"
58 #include "p7_config.h"
59 #include "config.h"
60 
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <assert.h>
64 #include <math.h>
65 
66 #include "easel.h"
67 #include "esl_sqio.h"
68 #include "esl_stack.h"
69 #include "esl_stopwatch.h"
70 #include "esl_vectorops.h"
71 
72 #include "hmmer.h"
73 
74 #include "infernal.h"
75 
76 static int   cm_alignT   (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, CM_MX    *mx, CM_SHADOW_MX    *shmx, CM_EMIT_MX    *emit_mx, Parsetree_t **ret_tr, float *ret_sc_or_pp);
77 static int   cm_alignT_hb(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_EMIT_MX *emit_mx, Parsetree_t **ret_tr, float *ret_sc_or_pp);
78 
79 
80 /* Function: cm_alignT()
81  * Date:     EPN, Sun Nov 18 19:21:30 2007
82  *
83  * Note:     Based on insideT() [SRE, Fri Aug 11 12:08:18 2000 [Pittsburgh]]
84  *           Renamed from fast_alignT() [EPN, Wed Sep 14 06:04:39 2011].
85  *
86  * Purpose:  Call either cm_CYKInsideAlign() (if !<do_optacc>),
87  *           or cm_OptAccAlign()  (if  <do_optacc>),
88  *           get vjd shadow matrix; then trace back and
89  *           append to an existing but empty parsetree tr.
90  *           The full sequence 1..L will be aligned.
91  *
92  *           If (<do_optacc>) then emit_mx must != NULL.
93  *
94  *           Very similar to cm_dpsmall.c:insideT() in case of
95  *           CYK alignment, but uses more efficient implementation
96  *           of CYK alignment (cm_CYKInsideAlign()) as opposed to
97  *           inside()).
98  *
99  * Args:     cm           - the model
100  *           errbuf       - char buffer for reporting errors
101  *           dsq          - the digitized sequence [1..L]
102  *           L            - length of the dsq to align
103  *           size_limit   - max size in Mb for DP matrix
104  *           do_optacc    - TRUE to align with optimal accuracy, else use CYK
105  *           mx           - the DP matrix to fill in
106  *           shmx         - the shadow matrix to fill in
107  *           emit_mx      - the pre-filled emit matrix, must be non-NULL if do_optacc
108  *           ret_tr       - RETURN: the optimal parsetree
109  *           ret_sc_or_pp - RETURN: optimal score (CYK if !do_optacc, else avg PP of all 1..L residues)
110  *
111  * Returns:  <eslOK>     on success.
112  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>, in
113  *                       this case, alignment has been aborted, ret_* variables are not valid
114  *           <eslEINVAL> on traceback problem: bogus state
115  */
116 int
cm_alignT(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_optacc,CM_MX * mx,CM_SHADOW_MX * shmx,CM_EMIT_MX * emit_mx,Parsetree_t ** ret_tr,float * ret_sc_or_pp)117 cm_alignT(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc,
118 	  CM_MX *mx, CM_SHADOW_MX *shmx, CM_EMIT_MX *emit_mx, Parsetree_t **ret_tr, float *ret_sc_or_pp)
119 {
120   int       status;
121   Parsetree_t *tr = NULL;       /* the parsetree */
122   float     sc;			/* the score of the CYK alignment */
123   float     pp;			/* avg pp of all emitted residues in optacc alignment */
124   ESL_STACK *pda;               /* stack that tracks bifurc parent of a right start */
125   int       v,j,d,i;		/* indices for state, j, subseq len */
126   int       k;			/* subseq len for bifurcs */
127   int       y, yoffset;         /* child state y, it's offset */
128   int       bifparent;          /* B_st parent */
129   int       b;                  /* local begin state */
130 
131   if(do_optacc) { if((status = cm_OptAccAlign   (cm, errbuf, dsq, L, size_limit, mx, shmx, emit_mx, &b, &pp)) != eslOK) return status; }
132   else          { if((status = cm_CYKInsideAlign(cm, errbuf, dsq, L, size_limit, mx, shmx,          &b, &sc)) != eslOK) return status; };
133 
134   /* Create and initialize the parsetree */
135   tr = CreateParsetree(100);
136   InsertTraceNode(tr, -1, TRACE_LEFT_CHILD, 1, L, 0); /* init: attach the root S */
137 
138   pda = esl_stack_ICreate();
139   if(pda == NULL) goto ERROR;
140   v = 0;
141   i = 1;
142   j = d = L;
143 
144   while (1) {
145     if (cm->sttype[v] == B_st) {
146       k = shmx->kshadow[v][j][d];   /* k = len of right fragment */
147 
148       /* Store info about the right fragment that we'll retrieve later:
149        */
150       /* remember the end j */
151       if((status = esl_stack_IPush(pda, j))       != eslOK) goto ERROR;	/* remember the end j    */
152       if((status = esl_stack_IPush(pda, k))       != eslOK) goto ERROR;	/* remember the subseq length k */
153       if((status = esl_stack_IPush(pda, tr->n-1)) != eslOK) goto ERROR;	/* remember the trace index of the parent B state */
154 
155       /* Deal with attaching left start state.
156        */
157       j = j-k;
158       d = d-k;
159       i = j-d+1;
160       y = cm->cfirst[v];
161       InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
162       v = y;
163     } else if (cm->sttype[v] == E_st || cm->sttype[v] == EL_st) {
164       /* We don't trace back from an E or EL. Instead, we're done with the
165        * left branch of the tree, and we try to swing over to the right
166        * branch by popping a right start off the stack and attaching
167        * it. If the stack is empty, then we're done with the
168        * traceback altogether. This is the only way to break the
169        * while (1) loop.
170        */
171       if (esl_stack_IPop(pda, &bifparent) == eslEOD) break;
172       esl_stack_IPop(pda, &d);
173       esl_stack_IPop(pda, &j);
174       v = tr->state[bifparent];	/* recover state index of B */
175       y = cm->cnum[v];		/* find state index of right S */
176       i = j-d+1;
177 				/* attach the S to the right */
178       InsertTraceNode(tr, bifparent, TRACE_RIGHT_CHILD, i, j, y);
179       v = y;
180     } else {
181       yoffset = shmx->yshadow[v][j][d];
182 
183       /*printf("v : %d | r : %d | z : %d | i0 : %d | \n", v, r, z, i0);*/
184       /*printf("\tyoffset : %d\n", yoffset);*/
185       switch (cm->sttype[v]) {
186       case D_st:            break;
187       case MP_st: i++; j--; break;
188       case ML_st: i++;      break;
189       case MR_st:      j--; break;
190       case IL_st: i++;      break;
191       case IR_st:      j--; break;
192       case S_st:            break;
193       default:    ESL_FAIL(eslEINVAL, errbuf, "bogus state type in cm_alignT()");
194       }
195       d = j-i+1;
196 
197       if (yoffset == USED_EL)
198 	{	/* a local alignment end */
199 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M);
200 	  v = cm->M;		/* now we're in EL. */
201 	}
202       else if (yoffset == USED_LOCAL_BEGIN)
203 	{ /* local begin; can only happen once, from root */
204 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b);
205 	  v = b;
206 	}
207       else
208 	{
209 	  y = cm->cfirst[v] + yoffset;
210 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
211 	  v = y;
212 	}
213     }
214   }
215   esl_stack_Destroy(pda);  /* it should be empty; we could check; naaah. */
216 
217   if(ret_tr       != NULL) *ret_tr = tr; else FreeParsetree(tr);
218   if(ret_sc_or_pp != NULL) *ret_sc_or_pp = do_optacc ? pp : sc;
219   return eslOK;
220 
221  ERROR:
222   ESL_FAIL(status, errbuf, "out of memory");
223   return status; /* NEVERREACHED */
224 }
225 
226 
227 /* Function: cm_alignT_hb()
228  * Date:     EPN 03.29.06
229  *
230  * Note:     Based on insideT() [SRE, Fri Aug 11 12:08:18 2000 [Pittsburgh]]
231  *           Renamed from fast_alignT_hb() [EPN, Wed Sep 14 06:00:51 2011].
232  *
233  * Purpose: Call either cm_CYKInsideAlignHB() (if !<do_optacc>), or
234  *           cm_OptAccAlignHB() (if <do_optacc>), fill banded vjd
235  *           shadow matrix in <shmx>; then trace back.  Append the
236  *           trace to a given traceback, which already has state 0 at
237  *           tr->n-1.
238  *
239  *           If (<do_optacc>) then emit_mx must != NULL.
240  *
241  * Args:     cm           - the model
242  *           errbuf       - char buffer for reporting errors
243  *           dsq          - the digitized sequence [1..L]
244  *           L            - length of the dsq to align
245  *           size_limit   - max size in Mb for DP matrix
246  *           do_optacc    - TRUE to align with optimal accuracy, else use CYK
247  *           mx           - the DP matrix to fill in
248  *           shmx         - the shadow matrix to fill in
249  *           emit_mx      - the pre-filled emit matrix, must be non-NULL if do_optacc
250  *           ret_tr       - RETURN: the optimal parsetree
251  *           ret_sc_or_pp - RETURN: optimal score (CYK if !do_optacc, else avg PP of all 1..L residues)
252  *
253  *
254  * Throws:  <eslOK>     on success
255  *          <eslERANGE> if required CM_HB_MX exceeds <size_limit>
256  *          <eslEINVAL> on traceback problem: bogus state
257  */
258 int
cm_alignT_hb(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_optacc,CM_HB_MX * mx,CM_HB_SHADOW_MX * shmx,CM_HB_EMIT_MX * emit_mx,Parsetree_t ** ret_tr,float * ret_sc_or_pp)259 cm_alignT_hb(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc,
260 	     CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_EMIT_MX *emit_mx, Parsetree_t **ret_tr, float *ret_sc_or_pp)
261 {
262   int       status;
263   Parsetree_t *tr = NULL;       /* the parsetree */
264   float     sc;			/* the score of the CYK alignment */
265   float     pp;			/* avg pp of all emitted residues in optacc alignment */
266   ESL_STACK *pda;               /* stack that tracks bifurc parent of a right start */
267   int       v,j,d,i;		/* indices for state, j, subseq len */
268   int       k;			/* subseq len for bifurcs */
269   /*int       z;*/              /* state index */
270   int       y, yoffset;         /* child state y, it's offset */
271   int       bifparent;          /* B_st parent */
272   int       b;                  /* local begin state */
273   int       jp_v;               /* j-jmin[v] for current j, and current v */
274   int       dp_v;               /* d-hdmin[v][jp_v] for current j, current v, current d*/
275   int       allow_S_local_end;  /* set to true to allow d==0 BEGL_S and BEGR_S local ends if(do_optacc) */
276 
277   /* pointers to cp9b data for convenience */
278   CP9Bands_t  *cp9b = cm->cp9b;
279   int         *jmin = cp9b->jmin;
280   int         *jmax = cp9b->jmax;
281   int       **hdmin = cp9b->hdmin;
282   int       **hdmax = cp9b->hdmax;
283 
284   if(do_optacc) { if((status = cm_OptAccAlignHB   (cm, errbuf, dsq, L, size_limit, mx, shmx, emit_mx, &b, &pp)) != eslOK) return status; }
285   else          { if((status = cm_CYKInsideAlignHB(cm, errbuf, dsq, L, size_limit, mx, shmx,	      &b, &sc)) != eslOK) return status; }
286 
287   /* Create and initialize the parsetree */
288   tr = CreateParsetree(100);
289   InsertTraceNode(tr, -1, TRACE_LEFT_CHILD, 1, L, 0); /* init: attach the root S */
290 
291   pda = esl_stack_ICreate();
292   if(pda == NULL) goto ERROR;
293   v = 0;
294   i = 1;
295   j = d = L;
296 
297   while (1) {
298     /* special case for HMM banded optimal accuracy, explained below, after the crazy if */
299     if(do_optacc && d == 0 && (cm->stid[v] == BEGL_S || cm->stid[v] == BEGR_S) &&
300        ((j < jmin[v]             || j > jmax[v]) ||              /* j is outside v's j band */
301 	(d < hdmin[v][j-jmin[v]] || d > hdmax[v][j-jmin[v]]))) { /* j is within v's j band, but d is outside j's d band */
302       /* special case: doing optimal accuracy and v is a BEGL_S or
303        * BEGR_S and d is 0 and j is outside v's j band or j is within
304        * the band but d is outside j's d band.  We allow this case
305        * because although this implies a cell outside the bands, in
306        * optimal accuracy only emissions add to the score and we to
307        * initialize all cells to IMPOSSIBLE. This means when d==0, we
308        * have no way of distinguishing those cells that have been
309        * reset to IMPOSSIBLE because they correspond to a valid cell
310        * (with valid cells in the B deck, BEGL_S and BEGR_S decks) and
311        * those that do not correspond to a valid cell and were never
312        * changed since initialization (i.e. this case). So we allow it
313        * to prevent an out-of-bounds error. We have to catch it though
314        * so we don't try to determine jp_v and dp_v below. We even use
315        * USED_EL here if we're not in local mode. You could argue
316        * either way whether we should or shouldn't allow this (e.g. we
317        * already allow illegal parsetrees in optimal accuracy), but a
318        * big reason I decided to allow it is that it is difficult
319        * implement a way of disallowing it. Plus the goal of optimal
320        * accuracy is to show the alignment that has the maximum
321        * average PP on emitted residues within the bands. By allowing
322        * this, we also consider a few possible alignments that violate
323        * the bands, which I think is okay.
324        */
325       allow_S_local_end = TRUE; /* this sets yoffset to USED_LOCAL_END in the final 'else' of below code block */
326     }
327     else if (cm->sttype[v] != EL_st) { /* normal case, determine jp_v, dp_v, j, d offset values given bands */
328       jp_v = j - jmin[v];
329       dp_v = d - hdmin[v][jp_v];
330       allow_S_local_end = FALSE;
331       assert(j >= jmin[v]        && j <= jmax[v]);
332       assert(d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]);
333       ESL_DASSERT1((j >= jmin[v]        && j <= jmax[v]));
334       ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
335     }
336 
337     if (cm->sttype[v] == B_st) {
338       k = shmx->kshadow[v][jp_v][dp_v];   /* k = offset len of right fragment */
339       /*z = cm->cnum[v];*/
340 
341       /* Store info about the right fragment that we'll retrieve later:
342        */
343       if((status = esl_stack_IPush(pda, j)) != eslOK)       goto ERROR;	/* remember the end j    */
344       if((status = esl_stack_IPush(pda, k)) != eslOK)       goto ERROR;	/* remember the subseq length k */
345       if((status = esl_stack_IPush(pda, tr->n-1)) != eslOK) goto ERROR; /* remember the trace index of the parent B state */
346       /* Deal with attaching left start state.
347        */
348       j = j-k;
349       d = d-k;
350       i = j-d+1;
351       y = cm->cfirst[v];
352       InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
353       v = y;
354     }
355     else if (cm->sttype[v] == E_st || cm->sttype[v] == EL_st) {
356       /* We don't trace back from an E or EL. Instead, we're done with the
357        * left branch of the tree, and we try to swing over to the right
358        * branch by popping a right start off the stack and attaching
359        * it. If the stack is empty, then we're done with the
360        * traceback altogether. This is the only way to break the
361        * while (1) loop.
362        */
363       if (esl_stack_IPop(pda, &bifparent) == eslEOD) break;
364       esl_stack_IPop(pda, &d);
365       esl_stack_IPop(pda, &j);
366       v = tr->state[bifparent];	/* recover state index of B */
367       y = cm->cnum[v];		/* find state index of right S */
368       i = j-d+1;
369 				/* attach the S to the right */
370       InsertTraceNode(tr, bifparent, TRACE_RIGHT_CHILD, i, j, y);
371       v = y;
372     }
373     else {
374       /* get yoffset */
375       if (allow_S_local_end) {
376 	yoffset = USED_EL;
377       }
378       else {
379 	yoffset = shmx->yshadow[v][jp_v][dp_v];
380       }
381       switch (cm->sttype[v]) {
382       case D_st:            break;
383       case MP_st: i++; j--; break;
384       case ML_st: i++;      break;
385       case MR_st:      j--; break;
386       case IL_st: i++;      break;
387       case IR_st:      j--; break;
388       case S_st:            break;
389       default:    ESL_FAIL(eslEINVAL, errbuf, "Bogus state type in cm_alignT_hb()");
390       }
391       d = j-i+1;
392 
393       if (yoffset == USED_EL)
394 	{	/* a local alignment end */
395 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M);
396 	  v = cm->M;		/* now we're in EL. */
397 	}
398       else if (yoffset == USED_LOCAL_BEGIN)
399 	{ /* local begin; can only happen once, from root */
400 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b);
401 	  v = b;
402 	}
403       else
404 	{
405 	  y = cm->cfirst[v] + yoffset;
406 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
407 	  v = y;
408 	}
409       /*ParsetreeDump(stdout, tr, cm, dsq);*/
410     }
411   }
412   esl_stack_Destroy(pda);  /* it should be empty; we could check; naaah. */
413 
414   /*ParsetreeDump(stdout, tr, cm, dsq);*/
415 
416   if(ret_tr       != NULL) *ret_tr = tr; else FreeParsetree(tr);
417   if(ret_sc_or_pp != NULL) *ret_sc_or_pp = do_optacc ? pp : sc;
418   return eslOK;
419 
420  ERROR:
421   ESL_FAIL(eslEMEM, errbuf, "out of memory");
422   return status; /* NEVERREACHED */
423 }
424 
425 /* Function: cm_AlignSizeNeeded()
426  * Date:     EPN, Thu Jan 12 09:51:11 2012
427  *
428  * Purpose:  Determine size in Mb required to successfully call
429  *           cm_Align() for a given model <cm>, sequence length
430  *           <L> and alignment options in <do_sample> and <do_post>.
431  *
432  *           Return <eslERANGE> if required size exceeds size_limit.
433  *
434  * Args:     cm         - the covariance model
435  *           errbuf     - char buffer for reporting errors
436  *           L          - length of sequence
437  *           size_limit - max size in Mb for all required matrices, return eslERANGE if exceeded
438  *           do_sample  - TRUE to sample a parsetree from the Inside matrix
439  *           do_post    - TRUE to do posteriors
440  *           ret_mxmb   - RETURN: size in Mb of required CM_MX (we'll need 2 of these if do_post)
441  *           ret_emxmb  - RETURN: size in Mb of required CM_EMIT_MX   (0. if we won't need one)
442  *           ret_shmxmb - RETURN: size in Mb of required CM_SHADOW_MX (0. if we won't need one)
443  *           ret_totmb  - RETURN: size in Mb of all required matrices
444  *
445  * Returns: <eslOK> on success.
446  *
447  * Throws:  <eslEINVAL> on contract violation
448  *          <eslERANGE> if total size of all matrices exceeds <size_limit>
449  */
450 int
cm_AlignSizeNeeded(CM_t * cm,char * errbuf,int L,float size_limit,int do_sample,int do_post,float * ret_mxmb,float * ret_emxmb,float * ret_shmxmb,float * ret_totmb)451 cm_AlignSizeNeeded(CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post,
452 		   float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb)
453 {
454   int          status;
455   float        totmb    = 0.;  /* total Mb required for all matrices (that must be simultaneously in memory) */
456   float        mxmb     = 0.;  /* Mb required for CM_MX */
457   float        emxmb    = 0.;  /* Mb required for CM_EMIT_MX */
458   float        shmxmb   = 0.;  /* Mb required for CM_SHADOW_MX */
459 
460   /* we pass NULL values to the *_mx_SizeNeeded() functions because we don't care about cell counts */
461 
462   /* we will always need an Inside or CYK matrix */
463   if((status = cm_mx_SizeNeeded(cm, errbuf, L, NULL, &mxmb)) != eslOK) return status;
464   totmb = mxmb;
465 
466   /* if calc'ing posteriors, we'll also need an Outside matrix (which
467    * we'll reuse as the Posterior matrix, so only count it once) and
468    * an emit matrix.
469    */
470   if(do_post) {
471     totmb += mxmb;
472     if((status = cm_emit_mx_SizeNeeded(cm, errbuf, L, NULL, NULL, &emxmb)) != eslOK) return status;
473     totmb += emxmb;
474   }
475 
476   /* if we're not sampling an alignment, we'll also need a shadow
477    * matrix for the traceback.
478    */
479   if(! do_sample) { /* if do_sample, we won't need a shadow matrix */
480     if((status = cm_shadow_mx_SizeNeeded(cm, errbuf, L, NULL, NULL, &shmxmb)) != eslOK) return status;
481     totmb += shmxmb;
482   }
483 
484   if (ret_mxmb   != NULL) *ret_mxmb    = mxmb;
485   if (ret_emxmb  != NULL) *ret_emxmb   = emxmb;
486   if (ret_shmxmb != NULL) *ret_shmxmb  = shmxmb;
487   if (ret_totmb  != NULL) *ret_totmb   = totmb;
488 
489   if(totmb > size_limit) ESL_FAIL(eslERANGE, errbuf, "non-banded standard alignment mxes need %.2f Mb > %.2f Mb limit.\nUse --mxsize, --maxtau or --tau.", totmb, (float) size_limit);
490 
491   return eslOK;
492 }
493 
494 /* Function: cm_AlignSizeNeededHB()
495  * Date:     EPN, Thu Jan 12 10:06:20 2012
496  *
497  * Purpose:  Determine size in Mb required to successfully call
498  *           cm_AlignHB() for a given model <cm>, sequence length
499  *           <L>, HMM bands <cm->cp9b> and alignment options
500  *           in <do_sample> and <do_post>.
501  *
502  *           Return <eslERANGE> if required size exceeds size_limit.
503  *
504  * Args:     cm         - the covariance model
505  *           errbuf     - char buffer for reporting errors
506  *           L          - length of sequence
507  *           size_limit - max size in Mb for all required matrices, return eslERANGE if exceeded
508  *           do_sample  - TRUE to sample a parsetree from the Inside matrix
509  *           do_post    - TRUE to do posteriors
510  *           ret_mxmb   - RETURN: size in Mb of required CM_HB_MX (we'll need 2 of these if do_post)
511  *           ret_emxmb  - RETURN: size in Mb of required CM_HB_EMIT_MX   (0. if we won't need one)
512  *           ret_shmxmb - RETURN: size in Mb of required CM_HB_SHADOW_MX (0. if we won't need one)
513  *           ret_totmb  - RETURN: size in Mb of all required matrices
514  *
515  * Returns: <eslOK> on success.
516  *
517  * Throws:  <eslEINVAL> on contract violation
518  *          <eslERANGE> if total size of all matrices exceeds <size_limit>
519  */
520 int
cm_AlignSizeNeededHB(CM_t * cm,char * errbuf,int L,float size_limit,int do_sample,int do_post,float * ret_mxmb,float * ret_emxmb,float * ret_shmxmb,float * ret_totmb)521 cm_AlignSizeNeededHB(CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post,
522 		     float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb)
523 {
524   int          status;
525   float        totmb    = 0.;  /* total Mb required for all matrices (that must be simultaneously in memory) */
526   float        mxmb     = 0.;  /* Mb required for CM_MX */
527   float        emxmb    = 0.;  /* Mb required for CM_EMIT_MX */
528   float        shmxmb   = 0.;  /* Mb required for CM_SHADOW_MX */
529 
530   /* we pass NULL values to the *_mx_SizeNeeded() functions because we don't care about cell counts */
531 
532   /* we will always need an Inside or CYK matrix */
533   if((status = cm_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, &mxmb)) != eslOK) return status;
534   totmb = mxmb;
535 
536   /* if calc'ing posteriors, we'll also need an Outside matrix (which
537    * we'll reuse as the Posterior matrix, so only count it once) and
538    * an emit matrix.
539    */
540   if(do_post) {
541     totmb += mxmb;
542     if((status = cm_hb_emit_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, NULL, &emxmb)) != eslOK) return status;
543     totmb += emxmb;
544   }
545 
546   /* if we're not sampling an alignment, we'll also need a shadow
547    * matrix for the traceback.
548    */
549   if(! do_sample) {
550     if((status = cm_hb_shadow_mx_SizeNeeded(cm, errbuf, cm->cp9b, NULL, NULL, &shmxmb)) != eslOK) return status;
551     totmb += shmxmb;
552   }
553 
554   if (ret_mxmb   != NULL) *ret_mxmb    = mxmb;
555   if (ret_emxmb  != NULL) *ret_emxmb   = emxmb;
556   if (ret_shmxmb != NULL) *ret_shmxmb  = shmxmb;
557   if (ret_totmb  != NULL) *ret_totmb   = totmb;
558 
559 #if eslDEBUGLEVEL >= 1
560   printf("#DEBUG: cm_AlignSizeNeededHB()\n");
561   printf("#DEBUG: \t mxmb:  %.2f\n", mxmb);
562   printf("#DEBUG: \t emxmb: %.2f\n", emxmb);
563   printf("#DEBUG: \t shmxmb:%.2f\n", shmxmb);
564   printf("#DEBUG: \t totmb: %.2f\n", totmb);
565   printf("#DEBUG: \t limit: %.2f\n", size_limit);
566 #endif
567 
568   if(totmb > size_limit) ESL_FAIL(eslERANGE, errbuf, "HMM banded standard alignment mxes need %.2f Mb > %.2f Mb limit.\nUse --mxsize, --maxtau or --tau.", totmb, (float) size_limit);
569 
570   return eslOK;
571 }
572 
573 /* Function: cm_Align()
574  * Date:     EPN, Sun Nov 18 19:26:45 2007
575  *
576  * Note:     Very similar to cm_dpsmall.c:CYKInside() for case
577  *           of CYK alignment, but uses slightly more efficient
578  *           implementation (cm_CYKInsideAlign() instead of inside()).
579  *           Renamed from FastAlign() [EPN, Wed Sep 14 06:12:46 2011].
580  *
581  * Purpose: Wrapper for the cm_alignT() routine - solve a full
582  *           alignment problem either by CYK, using optimal accuracy,
583  *           or sampling, and return the traceback and the score,
584  *           without dividing & conquering. Optionally return a
585  *           posterior code string.
586  *
587  *           Input arguments allow this function to be run in 6 'modes':
588  *
589  *           mode      returns                 arguments
590  *           ----  ---------------  ----------------------------------------
591  *                 tr        ppstr  do_optacc  do_sample post_mx   ret_ppstr
592  *                 ---------------  ----------------------------------------
593  *              1. CYK       no      FALSE      FALSE      NULL      NULL
594  *              2. CYK       yes     FALSE      FALSE     !NULL     !NULL
595  *              3. Opt acc   no      TRUE       FALSE     !NULL      NULL
596  *              4. Opt acc   yes     TRUE       FALSE     !NULL     !NULL
597  *              5. sampled   no      FALSE      TRUE       NULL      NULL
598  *              6. sampled   yes     FALSE      TRUE      !NULL     !NULL
599  *
600  *           CYK parsetrees are most the likely parsetree, 'Opt acc'
601  *           parsetrees are Holmes/Durbin optimally accurate
602  *           parsetrees, the parse that maximizes the summed posterior
603  *           probability of emitted residues. A sampled parsetree
604  *           is a parsetree sampled from an Inside matrix based on
605  *           it's probability.
606  *
607  * Args:     cm        - the covariance model
608  *           errbuf    - char buffer for reporting errors
609  *           dsq       - the digitized sequence, 1..L
610  *           L         - length of sequence
611  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
612  *           do_optacc - TRUE: do optimal accuracy alignment, not CYK, requires post_mx != NULL
613  *           do_sample - TRUE to sample a parsetree from the Inside matrix
614  *           mx        - the main dp matrix, grown and filled here, must be non-NULL
615  *           shmx      - the shadow matrix, grown and filled here
616  *           post_mx   - dp matrix for posterior calculation, grown and filled here, can be NULL only if !do_optacc
617  *           emit_mx   - emit matrix to fill
618  *           r         - source of randomness, must be non-NULL only if do_sample==TRUE
619  *           ret_ppstr - RETURN: posterior code 1, (pass NULL if not wanted, must be NULL if post_mx == NULL)
620  *           ret_tr    - RETURN: traceback (pass NULL if trace isn't wanted)
621  *           ret_avgpp - RETURN: avg PP of emitted residues in parsetree (CYK or optacc) if ret_ppstr == NULL, set as 0.
622  *           ret_sc    - RETURN: score of the alignment in bits (Inside score if do_optacc)
623  *
624  * Returns: <eslOK> on success.
625  *
626  * Throws:  <eslEINVAL> on contract violation
627  *          <eslERANGE> if required CM_MX for Inside/Outside/CYK/Posterior exceeds <size_limit>
628  */
629 int
cm_Align(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_optacc,int do_sample,CM_MX * mx,CM_SHADOW_MX * shmx,CM_MX * post_mx,CM_EMIT_MX * emit_mx,ESL_RANDOMNESS * r,char ** ret_ppstr,Parsetree_t ** ret_tr,float * ret_avgpp,float * ret_sc)630 cm_Align(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, int do_sample,
631 	 CM_MX *mx, CM_SHADOW_MX *shmx, CM_MX *post_mx, CM_EMIT_MX *emit_mx, ESL_RANDOMNESS *r,
632 	 char **ret_ppstr, Parsetree_t **ret_tr, float *ret_avgpp, float *ret_sc)
633 {
634   int          status;
635   Parsetree_t *tr = NULL;
636   float        sc       = 0.;
637   float        avgpp    = 0.;
638   float        ins_sc   = 0.;
639   int          do_post;
640   char        *ppstr = NULL;
641   int          have_ppstr;
642 
643   have_ppstr = (ret_ppstr != NULL)       ? TRUE : FALSE;
644   do_post    = (do_optacc || have_ppstr) ? TRUE : FALSE;
645 
646   /* Contract check */
647   if(do_optacc && do_sample)         ESL_FAIL(eslEINCOMPAT, errbuf, "cm_Align(), do_optacc and do_sample are both TRUE.");
648   if(do_optacc && post_mx == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_Align(), do_optacc is TRUE, but post_mx == NULL.\n");
649   if(do_sample && r       == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_Align(), do_sample but r is NULL.");
650 
651   /* if do_post:   fill Inside, Outside, Posterior matrices, in that order.
652    * if do_sample: fill Inside and sample from it.
653    */
654   if(do_post || do_sample) {
655     if((status = cm_InsideAlign (cm, errbuf, dsq, L, size_limit, mx,  &ins_sc)) != eslOK) return status;
656     if(do_sample) {
657       if((status = cm_StochasticParsetree(cm, errbuf, dsq, L, mx, r, &tr, &sc)) != eslOK) return status;
658     }
659     if(do_post) { /* Inside was called above, now do Outside, then Posterior */
660       if((status = cm_OutsideAlign(cm, errbuf, dsq, L, size_limit, ((cm->align_opts & CM_ALIGN_CHECKINOUT) && (! (cm->flags & CMH_LOCAL_END))), post_mx, mx, NULL)) != eslOK) return status;
661       /* Note: we can only check the posteriors in cm_OutsideAlign() if local begin/ends are off */
662       if((status = cm_Posterior       (cm, errbuf, L, size_limit, mx, post_mx, post_mx)) != eslOK) return status;
663       if((status = cm_EmitterPosterior(cm, errbuf, L, size_limit, post_mx, emit_mx, (cm->align_opts & CM_ALIGN_CHECKINOUT))) != eslOK) return status;
664     }
665   }
666 
667   if(!do_sample) { /* if do_sample, we already have a parsetree */
668     if((status = cm_alignT(cm, errbuf, dsq, L, size_limit, do_optacc, mx, shmx, emit_mx, &tr, (do_optacc) ? NULL : &sc)) != eslOK) return status;
669   }
670 
671   if(have_ppstr || do_optacc) { /* call cm_PostCode to get average PP and optionally a PP string (if have_ppstr) */
672     if((status = cm_PostCode(cm, errbuf, L, emit_mx, tr, (have_ppstr) ? &ppstr : NULL, &avgpp)) != eslOK) return status;
673   }
674 
675   if (ret_ppstr  != NULL) *ret_ppstr  = ppstr; else free(ppstr);
676   if (ret_tr     != NULL) *ret_tr     = tr;    else FreeParsetree(tr);
677   if (ret_avgpp  != NULL) *ret_avgpp  = avgpp;
678   if (ret_sc     != NULL) *ret_sc     = (do_optacc) ? ins_sc : sc;
679 
680   ESL_DPRINTF1(("#DEBUG: returning from cm_Align() sc : %f\n", sc));
681   return eslOK;
682 }
683 
684 
685 
686 /* Function: cm_AlignHB()
687  * Incept:   EPN, Fri Oct 26 09:31:43 2007
688  *
689  * Note:     Based on CYKInside_b_jd() [11.04.05] which was based on CYKInside_b()
690  *           which was based on CYKInside() [SRE, Sun Jun  3 19:48:33 2001 [St. Louis]]
691  *           Renamed from cm_AlignHB() [EPN, Wed Sep 14 06:09:51 2011].
692  *
693  * Purpose: Wrapper for the cm_alignT() routine - solve a full
694  *           alignment problem either by CYK, using optimal accuracy,
695  *           or sampling, and return the traceback and the score,
696  *           without dividing & conquering. Optionally return a
697  *           posterior code string.
698  *
699  *           Identical to cm_Align() but HMM bands are used here.
700  *           See that function's 'Purpose' for more details.
701  *
702  * Args:     cm        - the covariance model
703  *           errbuf    - char buffer for reporting errors
704  *           dsq       - the digitized sequence, 1..L
705  *           L         - length of sequence
706  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
707  *           do_optacc - TRUE: do optimal accuracy alignment, not CYK, requires post_mx != NULL
708  *           do_sample - TRUE: sample a parsetree from the Inside matrix
709  *           mx        - the main dp matrix, grown and filled here, must be non-NULL
710  *           shmx      - the shadow matrix, grown and filled here
711  *           post_mx   - dp matrix for posterior calculation, grown and filled here, can be NULL only if !do_optacc
712  *           emit_mx   - emit matrix to fill
713  *           r         - source of randomness, must be non-NULL only if do_sample==TRUE
714  *           ret_ppstr - RETURN: posterior code 1, (pass NULL if not wanted, must be NULL if post_mx == NULL)
715  *           ret_tr    - RETURN: traceback (pass NULL if trace isn't wanted)
716  *           ret_avgpp - RETURN: avg PP of emitted residues in parsetree (CYK or optacc) if ret_ppstr == NULL, set as 0.
717  *           ret_sc    - RETURN: score of the alignment in bits (Inside score if do_optacc)
718  *
719  * Returns: <eslOK> on success
720  *
721  * Throws:  <eslEINVAL> on contract violation
722  *          <eslERANGE> if required CM_HB_MX for Inside/Outside/CYK/Posterior exceeds <size_limit>
723  */
724 
725 int
cm_AlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_optacc,int do_sample,CM_HB_MX * mx,CM_HB_SHADOW_MX * shmx,CM_HB_MX * post_mx,CM_HB_EMIT_MX * emit_mx,ESL_RANDOMNESS * r,char ** ret_ppstr,Parsetree_t ** ret_tr,float * ret_avgpp,float * ret_sc)726 cm_AlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, int do_sample,
727 	   CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_MX *post_mx, CM_HB_EMIT_MX *emit_mx, ESL_RANDOMNESS *r,
728 	   char **ret_ppstr, Parsetree_t **ret_tr, float *ret_avgpp, float *ret_sc)
729 {
730   int          status;
731   Parsetree_t *tr = NULL;
732   float        sc     = 0.;
733   float        avgpp  = 0.;
734   float        ins_sc = 0.;
735   int          do_post;
736   char        *ppstr = NULL;
737   int          have_ppstr;
738 
739   have_ppstr = (ret_ppstr != NULL)       ? TRUE : FALSE;
740   do_post    = (do_optacc || have_ppstr) ? TRUE : FALSE;
741 
742   /* Contract check */
743   if(do_optacc && do_sample)         ESL_FAIL(eslEINCOMPAT, errbuf, "cm_AlignHB(), do_optacc and do_sample are both TRUE.");
744   if(do_optacc && post_mx == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_AlignHB(), do_optacc is TRUE, but post_mx == NULL.\n");
745   if(do_sample && r       == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_AlignHB(), do_sample but r is NULL.");
746 
747   /* PrintDPCellsSaved_jd(cm, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, L); */
748 
749   /* if do_post:   fill Inside, Outside, Posterior matrices, in that order.
750    * if do_sample: fill Inside and sample from it.
751    */
752   if(do_post || do_sample) {
753     if((status = cm_InsideAlignHB (cm, errbuf, dsq, L, size_limit, mx, &ins_sc)) != eslOK) return status;
754     if(do_sample) {
755       if((status = cm_StochasticParsetreeHB(cm, errbuf, dsq, L, mx, r, &tr, &sc)) != eslOK) return status;
756     }
757     if(do_post) { /* Inside was called above, now do Outside, then Posterior */
758       if((status = cm_OutsideAlignHB(cm, errbuf, dsq, L, size_limit, ((cm->align_opts & CM_ALIGN_CHECKINOUT) && (! (cm->flags & CMH_LOCAL_END))), post_mx, mx, NULL)) != eslOK) return status;
759       /* Note: we can only check the posteriors in cm_OutsideAlignHB() if local begin/ends are off */
760       if((status = cm_PosteriorHB       (cm, errbuf, L, size_limit, mx, post_mx, post_mx)) != eslOK) return status;
761       if((status = cm_EmitterPosteriorHB(cm, errbuf, L, size_limit, post_mx, emit_mx, (cm->align_opts & CM_ALIGN_CHECKINOUT))) != eslOK) return status;
762     }
763   }
764 
765   if(!do_sample) { /* if do_sample, we already have a parsetree */
766     if((status = cm_alignT_hb(cm, errbuf, dsq, L, size_limit, do_optacc, mx, shmx, emit_mx, &tr, (do_optacc) ? NULL : &sc)) != eslOK) return status;
767   }
768 
769   if(have_ppstr || do_optacc) {
770     if((status = cm_PostCodeHB(cm, errbuf, L, emit_mx, tr, (have_ppstr) ? &ppstr : NULL, &avgpp)) != eslOK) return status;
771   }
772 
773   /* Uncomment to dump emit map and parse tree */
774   /* CMEmitMap_t *emap;
775      emap = CreateEmitMap(cm);
776      DumpEmitMap(stdout, emap, cm);
777      FreeEmitMap(emap);
778      ParsetreeDump(stdout, tr, cm, dsq);
779   */
780 
781   if (ret_ppstr  != NULL) *ret_ppstr  = ppstr; else free(ppstr);
782   if (ret_tr     != NULL) *ret_tr     = tr;    else FreeParsetree(tr);
783   if (ret_avgpp  != NULL) *ret_avgpp  = avgpp;
784   if (ret_sc     != NULL) *ret_sc     = (do_optacc) ? ins_sc : sc;
785 
786   ESL_DPRINTF1(("#DEBUG: returning from cm_AlignHB() sc : %f\n", sc));
787   return eslOK;
788 }
789 
790 /* Function: cm_CYKInsideAlign()
791  * Date:     EPN, Sun Nov 18 19:37:39 2007
792  *
793  * Purpose:  Run the inside phase of a CYK alignment. Non-banded
794  *           version. See cm_CYKInsideAlignHB() for HMM banded version.
795  *
796  *           This function must perform a complete alignment, aligning
797  *           the full sequence 1..L to the ROOT_S state 0 of the model.
798  *
799  *           We deal with local begins by keeping track of the optimal
800  *           state that we could enter and account for the whole target
801  *           sequence: b = argmax_v  alpha_v(1,L) + log t_0(v),
802  *           and bsc is the score for that.
803  *
804  *           If local begins are on (cm->flags & CMH_LOCAL_BEGIN), the
805  *           optimal alignment must use a local begin transition,
806  *           0->b, and we have to be able to trace that back. If local
807  *           begins are on, we return a valid b (the optimal 0->b
808  *           choice), yshad[0][L][L] will be USE_LOCAL_BEGIN, telling
809  *           cm_alignT() to check b and start with a local 0->b entry
810  *           transition.
811  *
812  *           Note on history of this function: It was previously
813  *           fast_cyk_align() (up to Infernal 1.0.2), which was
814  *           based on inside() from cm_dpsmall.c.
815  *
816  * Args:     cm        - the model
817  *           errbuf    - char buffer for reporting errors
818  *           dsq       - the digitized sequence [1..L]
819  *           L         - length of the dsq to align
820  *           size_limit- max size in Mb for DP matrix
821  *           mx        - the DP matrix to fill in
822  *           shmx      - the shadow matrix to fill in
823  *           ret_b     - RETURN: local begin state if local begins are on
824  *           ret_sc    - RETURN: score of optimal, CYK parsetree
825  *
826  * Returns:  <eslOK> on success.
827  *
828  * Throws:   <eslERANGE> if required mx or shmx size exceeds <size_limit>
829  *           In this case alignment has been aborted, <ret_*> variables are not valid
830  */
831 int
cm_CYKInsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,CM_MX * mx,CM_SHADOW_MX * shmx,int * ret_b,float * ret_sc)832 cm_CYKInsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,
833 		  CM_MX *mx, CM_SHADOW_MX *shmx, int *ret_b, float *ret_sc)
834 {
835   int      status;
836   int      v,y,z;	/* indices for states  */
837   int      j,d,i,k;	/* indices in sequence dimensions */
838   float    sc;		/* a temporary variable holding a score */
839   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
840   int      b;		/* best local begin state */
841   float    bsc;		/* score for using the best local begin state */
842   float   *el_scA;      /* [0..d..W-1] probability of local end emissions of length d */
843   int      sd;          /* StateDelta(cm->sttype[v]) */
844   int      sdr;         /* StateRightDelta(cm->sttype[v] */
845   int      j_sdr;       /* j - sdr */
846   int      d_sd;        /* d - sd */
847   float    tsc;         /* a transition score */
848 
849   /* the DP matrix */
850   float ***alpha   = mx->dp;        /* pointer to the alpha DP matrix */
851   char  ***yshadow = shmx->yshadow; /* pointer to the yshadow matrix */
852   int   ***kshadow = shmx->kshadow; /* pointer to the kshadow matrix */
853 
854   /* Allocations and initializations  */
855   b   = -1;
856   bsc = IMPOSSIBLE;
857 
858   /* grow the matrices based on the current sequence */
859   if((status = cm_mx_GrowTo       (cm,   mx, errbuf, L, size_limit)) != eslOK) return status;
860   if((status = cm_shadow_mx_GrowTo(cm, shmx, errbuf, L, size_limit)) != eslOK) return status;
861 
862   /* initialize all cells of the matrix to IMPOSSIBLE, all cells of shadow matrix to USED_EL */
863   esl_vec_FSet(mx->dp_mem, mx->ncells_valid, IMPOSSIBLE);
864   for(i = 0; i < shmx->y_ncells_valid; i++) shmx->yshadow_mem[i] = USED_EL;
865   esl_vec_ISet(shmx->kshadow_mem, shmx->k_ncells_valid, USED_EL);
866 
867   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
868   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
869   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
870 
871   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores */
872   if(cm->flags & CMH_LOCAL_END) {
873     for (j = 0; j <= L; j++) {
874       for (d = 0;  d <= j; d++) alpha[cm->M][j][d] = el_scA[d];
875     }
876   }
877 
878   /* Main recursion */
879   for (v = cm->M-1; v >= 0; v--) {
880     float const *esc_v = cm->oesc[v]; /* emission scores for state v */
881     float const *tsc_v = cm->tsc[v];  /* transition scores for state v */
882     sd   = StateDelta(cm->sttype[v]);
883     sdr  = StateRightDelta(cm->sttype[v]);
884 
885     /* re-initialize the J deck if we can do a local end from v */
886     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
887       for (j = 0; j <= L; j++) {
888 	for (d = sd; d <= j; d++) {
889 	  alpha[v][j][d] = el_scA[d-sd] + cm->endsc[v];
890 	}
891       }
892     }
893     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
894 
895     if(cm->sttype[v] == E_st) {
896       for (j = 0; j <= L; j++) {
897 	alpha[v][j][0] = 0.;
898 	/* rest of deck remains IMPOSSIBLE */
899       }
900     }
901     else if(cm->sttype[v] == IL_st) {
902       /* update alpha[v][j][d] cells, for IL states, loop nesting order is:
903        * for j { for d { for y { } } } because they can self transit, and a
904        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
905        * before can start calc'ing for alpha[v][j][d+1] */
906       for (j = sdr; j <= L; j++) {
907 	j_sdr = j - sdr;
908 	for (d = sd; d <= j; d++) {
909 	  d_sd = d - sd;
910 	  i    = j - d + 1;
911 	  for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
912 	    y = cm->cfirst[v] + yoffset;
913 	    if ((sc = alpha[y][j_sdr][d_sd] + tsc_v[yoffset]) > alpha[v][j][d]) {
914 	      alpha[v][j][d] = sc;
915 	      yshadow[v][j][d]    = yoffset;
916 	    }
917 	  }
918 	  alpha[v][j][d] += esc_v[dsq[i--]];
919 	  alpha[v][j][d]  = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
920 	}
921       }
922     }
923     else if(cm->sttype[v] == IR_st) {
924       /* update alpha[v][j][d] cells, for IR states, loop nesting order is:
925        * for j { for d { for y { } } } because they can self transit, and a
926        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
927        * before can start calc'ing for alpha[v][j][d+1] */
928       for (j = sdr; j <= L; j++) {
929 	j_sdr = j - sdr;
930 	for (d = sd; d <= j; d++) {
931 	  d_sd = d - sd;
932 	  i = j - d + 1;
933 	  for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
934 	    y = cm->cfirst[v] + yoffset;
935 	    if ((sc = alpha[y][j_sdr][d_sd] + tsc_v[yoffset]) > alpha[v][j][d]) {
936 	      alpha[v][j][d] = sc;
937 	      yshadow[v][j][d]    = yoffset;
938 	    }
939 	  }
940 	  alpha[v][j][d] += esc_v[dsq[j]];
941 	  alpha[v][j][d]  = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
942 	}
943       }
944     }
945     else if(cm->sttype[v] != B_st) { /* entered if state v is (! IL && ! IR && ! B) */
946       /* ML, MP, MR, D, S, E states cannot self transit, this means that all cells
947        * in alpha[v] are independent of each other, only depending on alpha[y] for previously calc'ed y.
948        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
949        * for y { for j { for d { } } }
950        */
951       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
952 	yoffset = y - cm->cfirst[v];
953 	tsc = tsc_v[yoffset];
954 
955 	for (j = sdr; j <= L; j++) {
956 	  j_sdr = j - sdr;
957 
958 	  for (d = sd; d <= j; d++) {
959 	    if((sc = alpha[y][j_sdr][d - sd] + tsc) > alpha[v][j][d]) {
960 	      alpha[v][j][d] = sc;
961 	      yshadow[v][j][d]    = yoffset;
962 	    }
963 	  }
964 	}
965       }
966       /* add in emission score, if any */
967       switch(cm->sttype[v]) {
968       case ML_st:
969 	for (j = 0; j <= L; j++) {
970 	  i = j - 1;
971 	  for (d = sd; d <= j; d++)
972 	    alpha[v][j][d] += esc_v[dsq[j-d+1]];
973 	}
974 	break;
975       case MR_st:
976 	for (j = 0; j <= L; j++) {
977 	  for (d = sd; d <= j; d++)
978 	    alpha[v][j][d] += esc_v[dsq[j]];
979 	}
980 	break;
981       case MP_st:
982 	for (j = 0; j <= L; j++) {
983 	  i = j - 1;
984 	  for (d = sd; d <= j; d++)
985 	    alpha[v][j][d] += esc_v[dsq[i--]*cm->abc->Kp+dsq[j]];
986 	}
987       default:
988 	break;
989       }
990       /* ensure all cells are >= IMPOSSIBLE */
991       for (j = 0; j <= L; j++) {
992 	for (d = 0; d <= j; d++)
993 	  alpha[v][j][d] = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
994       }
995     }
996     else { /* B_st */
997       y = cm->cfirst[v]; /* left  subtree */
998       z = cm->cnum[v];   /* right subtree */
999 
1000       for (j = 0; j <= L; j++) {
1001 	for (d = 0; d <= j; d++) {
1002 	  for (k = 0; k <= d; k++) {
1003 	    if ((sc = alpha[y][j-k][d-k] + alpha[z][j][k]) > alpha[v][j][d]) {
1004 	      alpha[v][j][d] = sc;
1005 	      kshadow[v][j][d] = k;
1006 	    }
1007 	  }
1008 	}
1009       }
1010     }
1011 
1012     /* allow local begins, if nec */
1013     if ((cm->flags & CMH_LOCAL_BEGIN) &&
1014 	(NOT_IMPOSSIBLE(cm->beginsc[v])) &&
1015 	(alpha[v][L][L] + cm->beginsc[v] > bsc)) {
1016       b   = v;
1017       bsc = alpha[v][L][L] + cm->beginsc[v];
1018     }
1019   } /* finished calculating deck v. */
1020 
1021   /* Check for whether we need to store an optimal local begin score
1022    * as the optimal overall score, and if we need to put a flag
1023    * in the shadow matrix telling cm_alignT() to use the b we return.
1024    */
1025   if (bsc > alpha[0][L][L]) {
1026     alpha[0][L][L] = bsc;
1027     yshadow[0][L][L] = USED_LOCAL_BEGIN;
1028   }
1029 
1030 #if eslDEBUGLEVEL >= 3
1031   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
1032   /* FILE *fp1; fp1 = fopen("tmp.std_cykmx",   "w"); cm_mx_Dump(fp1, mx); fclose(fp1); */
1033   /* FILE *fp2; fp2 = fopen("tmp.std_cykshmx", "w"); cm_shadow_mx_Dump(fp2, cm, shmx); fclose(fp2); */
1034 #endif
1035 
1036   sc = alpha[0][L][L];
1037 
1038   free(el_scA);
1039 
1040   if (ret_b   != NULL) *ret_b  = b;    /* b is -1 if local begins are off */
1041   if (ret_sc  != NULL) *ret_sc = sc;
1042 
1043   ESL_DPRINTF1(("#DEBUG: cm_CYKInsideAlign return sc: %f\n", sc));
1044   return eslOK;
1045 
1046  ERROR:
1047   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
1048 }
1049 
1050 /* Function: cm_CYKInsideAlignHB()
1051  * Date:     EPN 03.29.06 [EPN started]
1052  *           SRE, Mon Aug  7 13:15:37 2000 [St. Louis]
1053  *
1054  * Purpose:  Run the inside phase of a CYK alignment using bands
1055  *           in the j and d dimensions of the DP matrix. Bands
1056  *           were obtained from an HMM Forward-Backward parse
1057  *           of the target sequence. Uses float log odds scores.
1058  *           Otherwise, (meant to be) identical to cm_CYKInsideAlign()
1059  *           see that function for more information.
1060  *
1061  *           A CM_HB_MX DP matrix must be passed in. Only cells valid
1062  *           within the bands given in the CP9Bands_t <cm->cp9b> will
1063  *           be valid.
1064  *
1065  *           Note on history of this function: It was previously
1066  *           fast_cyk_align_hb() (up to Infernal 1.0.2), which was
1067  *           based on inside_b_me() which was based on inside().
1068  *
1069  * Args:     cm        - the model
1070  *           errbuf    - char buffer for reporting errors
1071  *           dsq       - the digitized sequence [1..L]
1072  *           L         - length of the dsq to align
1073  *           size_limit- max size in Mb for DP matrix
1074  *           mx        - the DP matrix to fill in, only cells within bands are valid
1075  *           shmx      - the shadow matrix to fill in, only cells within bands are valid
1076  *           ret_b     - RETURN: best local begin state, or NULL if unwanted
1077  *           ret_sc    - RETURN: score of optimal, CYK parsetree
1078  *
1079  * Returns: <eslOK> on success.
1080  *
1081  * Throws:  <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
1082  *          <eslEINVAL> if the full sequence is not within the bands for state 0
1083  *          In either case alignment has been aborted, ret_* variables are not valid
1084  *
1085  */
1086 int
cm_CYKInsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,CM_HB_MX * mx,CM_HB_SHADOW_MX * shmx,int * ret_b,float * ret_sc)1087 cm_CYKInsideAlignHB(CM_t *cm, char *errbuf,  ESL_DSQ *dsq, int L, float size_limit,
1088 		    CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, int *ret_b, float *ret_sc)
1089 {
1090   int      status;
1091   int      v,y,z;	/* indices for states  */
1092   int      j,d,i,k;	/* indices in sequence dimensions */
1093   float    sc;		/* a temporary variable holding a score */
1094   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
1095   int      b;		/* best local begin state */
1096   float    bsc;		/* score for using the best local begin state */
1097   int     *yvalidA;     /* [0..MAXCONNECT-1] TRUE if v->yoffset is legal transition (within bands) */
1098   float   *el_scA;      /* [0..d..L-1] probability of local end emissions of length d */
1099   int      sd;          /* StateDelta(cm->sttype[v]) */
1100   int      sdr;         /* StateRightDelta(cm->sttype[v] */
1101   int      j_sdr;              /* j - sdr */
1102 
1103   /* indices used for handling band-offset issues, and in the depths of the DP recursion */
1104   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
1105   int      jp_y_sdr;           /* jp_y - sdr */
1106   int      jn, jx;             /* current minimum/maximum j allowed */
1107   int      jpn, jpx;           /* minimum/maximum jp_v */
1108   int      dp_v, dp_y;         /* d index for state v/y in alpha w/mem eff bands */
1109   int      dn, dx;             /* current minimum/maximum d allowed */
1110   int      dp_y_sd;            /* dp_y - sd */
1111   int      dpn, dpx;           /* minimum/maximum dp_v */
1112   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
1113   int      kn, kx;             /* current minimum/maximum k value */
1114   int      Lp;                 /* L index also changes depending on state */
1115   float    tsc;                /* a transition score */
1116   int      yvalid_idx;         /* for keeping track of which children are valid */
1117   int      yvalid_ct;          /* for keeping track of which children are valid */
1118   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
1119   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
1120 
1121   /* variables used for memory efficient bands */
1122   /* ptrs to cp9b info, for convenience */
1123   CP9Bands_t *cp9b    = cm->cp9b;
1124   int        *jmin    = cp9b->jmin;
1125   int        *jmax    = cp9b->jmax;
1126   int       **hdmin   = cp9b->hdmin;
1127   int       **hdmax   = cp9b->hdmax;
1128   float    ***alpha   = mx->dp;        /* pointer to the alpha DP matrix */
1129   char     ***yshadow = shmx->yshadow; /* pointer to the yshadow matrix */
1130   int      ***kshadow = shmx->kshadow; /* pointer to the kshadow matrix */
1131 
1132   /* Allocations and initializations  */
1133   b   = -1;
1134   bsc = IMPOSSIBLE;
1135   /* ensure a full alignment to ROOT_S (v==0) is allowed by the bands */
1136   if (cp9b->jmin[0] > L || cp9b->jmax[0] < L)
1137     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
1138   jp_0 = L - jmin[0];
1139   if (cp9b->hdmin[0][jp_0] > L || cp9b->hdmax[0][jp_0] < L)
1140     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][jp_0], cp9b->hdmax[0][jp_0]);
1141   Lp_0 = L - hdmin[0][jp_0];
1142 
1143   /* grow the matrices based on the current sequence and bands */
1144   if((status = cm_hb_mx_GrowTo       (cm,   mx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
1145   if((status = cm_hb_shadow_mx_GrowTo(cm, shmx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
1146 
1147   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
1148   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
1149   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
1150 
1151   /* yvalidA[0..cnum[v]] will hold TRUE for states y for which a transition is legal
1152    * (some transitions are impossible due to the bands)
1153    */
1154   ESL_ALLOC(yvalidA, sizeof(int) * MAXCONNECT);
1155   esl_vec_ISet(yvalidA, MAXCONNECT, FALSE);
1156 
1157   /* initialize all cells of the matrix to IMPOSSIBLE */
1158   esl_vec_FSet(alpha[0][0], mx->ncells_valid, IMPOSSIBLE);
1159   if(shmx->y_ncells_valid > 0) for(i = 0; i < shmx->y_ncells_valid; i++) shmx->yshadow_mem[i] = USED_EL;
1160   /* for B states, shadow matrix holds k, length of right fragment, this will be overwritten */
1161   if(shmx->k_ncells_valid > 0) esl_vec_ISet(shmx->kshadow_mem, shmx->k_ncells_valid, 0);
1162 
1163   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores,
1164    * Note: we could optimize by skipping this step and using el_scA[d] to
1165    * initialize ELs for each state in the first step of the main recursion
1166    * below. We fill in the EL deck here for completeness and so that
1167    * a check of this alpha matrix with a CYKOutside matrix will pass.
1168    */
1169   if(cm->flags & CMH_LOCAL_END) {
1170     for (j = 0; j <= L; j++) {
1171       for (d = 0;  d <= j; d++) alpha[cm->M][j][d] = el_scA[d];
1172     }
1173   }
1174 
1175   /* Main recursion */
1176   for (v = cm->M-1; v >= 0; v--) {
1177     float const *esc_v = cm->oesc[v]; /* emission scores for state v */
1178     float const *tsc_v = cm->tsc[v];  /* transition scores for state v */
1179     sd   = StateDelta(cm->sttype[v]);
1180     sdr  = StateRightDelta(cm->sttype[v]);
1181     jn   = jmin[v];
1182     jx   = jmax[v];
1183 
1184     /* re-initialize if we can do a local end from v */
1185     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
1186       for (j = jmin[v]; j <= jmax[v]; j++) {
1187 	jp_v  = j - jmin[v];
1188 	if(hdmin[v][jp_v] >= sd) {
1189 	  d    = hdmin[v][jp_v];
1190 	  dp_v = 0;
1191 	}
1192 	else {
1193 	  d    = sd;
1194 	  dp_v = sd - hdmin[v][jp_v];
1195 	}
1196 	for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
1197 	  if(d >= sd) {
1198 	    alpha[v][jp_v][dp_v] = alpha[cm->M][j][d-sd] + cm->endsc[v];
1199 	    /* If we optimize by skipping the filling of the
1200 	     * EL deck the above line would become:
1201 	     * 'alpha[v][jp_v][dp_v] = el_scA[d-sd] + cm->endsc[v];'
1202 	     */
1203 	  }
1204 	}
1205       }
1206     }
1207     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
1208 
1209     if(cm->sttype[v] == E_st) {
1210       for (j = jmin[v]; j <= jmax[v]; j++) {
1211 	jp_v = j-jmin[v];
1212 	ESL_DASSERT1((hdmin[v][jp_v] == 0));
1213 	ESL_DASSERT1((hdmax[v][jp_v] == 0));
1214 	alpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
1215       }
1216     }
1217     else if(cm->sttype[v] == IL_st) {
1218       /* update alpha[v][jp_v][dp_v] cells, for IL states, loop nesting order is:
1219        * for j { for d { for y { } } } because they can self transit, and a
1220        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1221        * before can start calc'ing for alpha[v][j][d+1] */
1222       for (j = jmin[v]; j <= jmax[v]; j++) {
1223 	ESL_DASSERT1((j >= 0 && j <= L));
1224 	jp_v = j - jmin[v];
1225 	yvalid_ct = 0;
1226 	j_sdr = j - sdr;
1227 
1228 	/* determine which children y we can legally transit to for v, j */
1229 	for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
1230 	  if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr valid for state y? */
1231 
1232 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
1233 	  i = j - d + 1;
1234 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
1235 	  for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
1236 	    yoffset = yvalidA[yvalid_idx];
1237 	    y = cm->cfirst[v] + yoffset;
1238 	    jp_y_sdr = j - jmin[y] - sdr;
1239 
1240 	    if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
1241 	      dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
1242 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
1243 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
1244 	      if ((sc = alpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]) > alpha[v][jp_v][dp_v])
1245 		{
1246 		  alpha[v][jp_v][dp_v] = sc;
1247 		  yshadow[v][jp_v][dp_v]    = yoffset;
1248 		}
1249 	    }
1250 	  }
1251 	  alpha[v][jp_v][dp_v] += esc_v[dsq[i--]];
1252 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
1253 	}
1254       }
1255     }
1256     else if(cm->sttype[v] == IR_st) {
1257       /* update alpha[v][jp_v][dp_v] cells, for IR states, loop nesting order is:
1258        * for j { for d { for y { } } } because they can self transit, and a
1259        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1260        * before can start calc'ing for alpha[v][j][d+1] */
1261       for (j = jmin[v]; j <= jmax[v]; j++) {
1262 	ESL_DASSERT1((j >= 0 && j <= L));
1263 	jp_v = j - jmin[v];
1264 	yvalid_ct = 0;
1265 	j_sdr = j - sdr;
1266 
1267 	/* determine which children y we can legally transit to for v, j */
1268 	for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
1269 	  if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr is valid for state y? */
1270 
1271 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
1272 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
1273 	  for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
1274 	    yoffset = yvalidA[yvalid_idx];
1275 	    y = cm->cfirst[v] + yoffset;
1276 	    jp_y_sdr = j - jmin[y] - sdr;
1277 
1278 	    if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
1279 	      dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
1280 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
1281 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
1282 	      if ((sc = alpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]) > alpha[v][jp_v][dp_v])
1283 		{
1284 		  alpha[v][jp_v][dp_v] = sc;
1285 		  yshadow[v][jp_v][dp_v]    = yoffset;
1286 		}
1287 	    }
1288 	  }
1289 	  alpha[v][jp_v][dp_v] += esc_v[dsq[j]];
1290 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
1291 	}
1292       }
1293     }
1294     else if(cm->sttype[v] != B_st) { /* entered if state v is (! IL && ! IR && ! B) */
1295       /* ML, MP, MR, D, S, E states cannot self transit, this means that all cells
1296        * in alpha[v] are independent of each other, only depending on alpha[y] for previously calc'ed y.
1297        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
1298        * for y { for j { for d { } } }
1299        */
1300       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
1301 	yoffset = y - cm->cfirst[v];
1302 	tsc = tsc_v[yoffset];
1303 
1304 	/* j must satisfy:
1305 	 * j >= jmin[v]
1306 	 * j >= jmin[y]+sdr (follows from (j-sdr >= jmin[y]))
1307 	 * j <= jmax[v]
1308 	 * j <= jmax[y]+sdr (follows from (j-sdr <= jmax[y]))
1309 	 * this reduces to two ESL_MAX calls
1310 	 */
1311 	jn = ESL_MAX(jmin[v], jmin[y]+sdr);
1312 	jx = ESL_MIN(jmax[v], jmax[y]+sdr);
1313 
1314 	jpn = jn - jmin[v];
1315 	jpx = jx - jmin[v];
1316 	jp_y_sdr = jn - jmin[y] - sdr;
1317 
1318 	for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
1319 	  ESL_DASSERT1((jp_v     >= 0 && jp_v     <= (jmax[v]-jmin[v])));
1320 	  ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
1321 
1322 	/* d must satisfy:
1323 	 * d >= hdmin[v][jp_v]
1324 	 * d >= hdmin[y][jp_y_sdr]+sd (follows from (d-sd >= hdmin[y][jp_y_sdr]))
1325 	 * d <= hdmax[v][jp_v]
1326 	 * d <= hdmax[y][jp_y_sdr]+sd (follows from (d-sd <= hdmax[y][jp_y_sdr]))
1327 	 * this reduces to two ESL_MAX calls
1328 	 */
1329 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
1330 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
1331 	  dpn     = dn - hdmin[v][jp_v];
1332 	  dpx     = dx - hdmin[v][jp_v];
1333 	  dp_y_sd = dn - hdmin[y][jp_y_sdr] - sd;
1334 
1335 	  for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
1336 	    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
1337 	    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
1338 	    if((sc = alpha[y][jp_y_sdr][dp_y_sd] + tsc) > alpha[v][jp_v][dp_v]) {
1339 	      alpha[v][jp_v][dp_v] = sc;
1340 	      yshadow[v][jp_v][dp_v]    = yoffset;
1341 	    }
1342 	  }
1343 	}
1344       }
1345       /* add in emission score, if any */
1346       switch(cm->sttype[v]) {
1347       case ML_st:
1348 	for (j = jmin[v]; j <= jmax[v]; j++) {
1349 	  jp_v  = j - jmin[v];
1350 	  i     = j - hdmin[v][jp_v] + 1;
1351 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1352 	    alpha[v][jp_v][dp_v] += esc_v[dsq[i--]];
1353 	}
1354 	break;
1355       case MR_st:
1356 	for (j = jmin[v]; j <= jmax[v]; j++) {
1357 	  jp_v  = j - jmin[v];
1358 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1359 	    alpha[v][jp_v][dp_v] += esc_v[dsq[j]];
1360 	}
1361 	break;
1362       case MP_st:
1363 	for (j = jmin[v]; j <= jmax[v]; j++) {
1364 	  jp_v  = j - jmin[v];
1365 	  i     = j - hdmin[v][jp_v] + 1;
1366 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1367 	    alpha[v][jp_v][dp_v] += esc_v[dsq[i--]*cm->abc->Kp+dsq[j]];
1368 	}
1369       default:
1370 	break;
1371       }
1372       /* ensure all cells are >= IMPOSSIBLE */
1373       for (j = jmin[v]; j <= jmax[v]; j++) {
1374 	jp_v  = j - jmin[v];
1375 	for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1376 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
1377       }
1378     }
1379     else { /* B_st */
1380       y = cm->cfirst[v]; /* left  subtree */
1381       z = cm->cnum[v];   /* right subtree */
1382 
1383       /* Any valid j must be within both state v and state z's j band
1384        * I think jmin[v] <= jmin[z] is guaranteed by the way bands are
1385        * constructed, but we'll check anyway.
1386        */
1387       jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
1388       jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
1389       /* the main j loop */
1390       for (j = jn; j <= jx; j++) {
1391 	jp_v = j - jmin[v];
1392 	jp_y = j - jmin[y];
1393 	jp_z = j - jmin[z];
1394 	kn = ((j-jmax[y]) > (hdmin[z][jp_z])) ? (j-jmax[y]) : hdmin[z][jp_z];
1395         kn = ESL_MAX(kn, 0); /* kn must be non-negative, added with fix to bug i36 */
1396         /* kn satisfies inequalities (1) and (3) (listed below)*/
1397 	kx = ( jp_y       < (hdmax[z][jp_z])) ?  jp_y       : hdmax[z][jp_z];
1398 	/* kn satisfies inequalities (2) and (4) (listed below)*/
1399 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
1400 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
1401 
1402 	  /* Find the first k value that implies a valid cell in the y and z decks.
1403 	   * This k must satisfy the following 6 inequalities (some may be redundant):
1404 	   * (1) k >= j-jmax[y];
1405 	   * (2) k <= j-jmin[y];
1406 	   *     1 and 2 guarantee (j-k) is within state y's j band
1407 	   *
1408 	   * (3) k >= hdmin[z][j-jmin[z]];
1409 	   * (4) k <= hdmax[z][j-jmin[z]];
1410 	   *     3 and 4 guarantee k is within z's j=(j), d band
1411 	   *
1412 	   * (5) k >= d-hdmax[y][j-jmin[y]-k];
1413 	   * (6) k <= d-hdmin[y][j-jmin[y]-k];
1414 	   *     5 and 6 guarantee (d-k) is within state y's j=(j-k) d band
1415 	   *
1416 	   * kn and kx were set above (outside (for (dp_v...) loop) that
1417 	   * satisfy 1-4 (b/c 1-4 are d-independent and k-independent)
1418 	   * RHS of inequalities 5 and 6 are dependent on k, so we check
1419 	   * for these within the next for loop.
1420 	   */
1421 	  for(k = kn; k <= kx; k++) {
1422 	    if((k >= d - hdmax[y][jp_y-k]) && k <= d - hdmin[y][jp_y-k]) {
1423 	      /* for current k, all 6 inequalities have been satisified
1424 	       * so we know the cells corresponding to the platonic
1425 	       * matrix cells alpha[v][j][d], alpha[y][j-k][d-k], and
1426 	       * alpha[z][j][k] are all within the bands. These
1427 	       * cells correspond to alpha[v][jp_v][dp_v],
1428 	       * alpha[y][jp_y-k][d-hdmin[jp_y-k]-k],
1429 	       * and alpha[z][jp_z][k-hdmin[jp_z]];
1430 	       */
1431 	      kp_z = k-hdmin[z][jp_z];
1432 	      dp_y = d-hdmin[y][jp_y-k];
1433 
1434 	      if ((sc = alpha[y][jp_y-k][dp_y - k] + alpha[z][jp_z][kp_z])
1435 		  > alpha[v][jp_v][dp_v]) {
1436 		alpha[v][jp_v][dp_v] = sc;
1437 		kshadow[v][jp_v][dp_v] = k;
1438 	      }
1439 	    }
1440 	  }
1441 	}
1442       }
1443     } /* finished calculating deck v. */
1444 
1445     /* allow local begins, if nec */
1446     if(cm->flags & CMH_LOCAL_BEGIN) {
1447       if(L >= jmin[v] && L <= jmax[v]) {
1448 	jp_v = L - jmin[v];
1449 	Lp   = L - hdmin[v][jp_v];
1450 	if(L >= hdmin[v][jp_v] && L <= hdmax[v][jp_v]) {
1451 	/* If we get here alpha[v][jp_v][Lp] is a valid cell
1452 	 * in the banded alpha matrix, corresponding to
1453 	 * alpha[v][L][L] in the platonic matrix.
1454 	 */
1455 	/* Check for local begin getting us to the root.
1456 	 * This is "off-shadow": if/when we trace back, we'll handle this
1457 	 * case separately (and we'll know to do it because we'll immediately
1458 	 * see a USED_LOCAL_BEGIN flag in the shadow matrix, telling us
1459 	 * to jump right to state b; see below)
1460 	 */
1461 	  if (NOT_IMPOSSIBLE(cm->beginsc[v]) &&
1462 	      (alpha[v][jp_v][Lp] + cm->beginsc[v] > bsc)) {
1463 	    b   = v;
1464 	    bsc = alpha[v][jp_v][Lp] + cm->beginsc[v];
1465 	  }
1466 	}
1467       }
1468     }
1469   } /* end loop over all v */
1470   /* Check for whether we need to store an optimal local begin score
1471    * as the optimal overall score, and if we need to put a flag
1472    * in the shadow matrix telling cm_alignT() to use the b we return.
1473    */
1474   if (NOT_IMPOSSIBLE(bsc) && (bsc > alpha[0][jp_0][Lp_0])) {
1475     alpha[0][jp_0][Lp_0] = bsc;
1476     yshadow[0][jp_0][Lp_0] = USED_LOCAL_BEGIN;
1477   }
1478 
1479 #if eslDEBUGLEVEL >= 3
1480   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
1481   /* FILE *fp1; fp1 = fopen("tmp.std_cykhbmx", "w");   cm_hb_mx_Dump(fp1, mx); fclose(fp1); */
1482   /* FILE *fp2; fp2 = fopen("tmp.std_cykhbshmx", "w"); cm_hb_shadow_mx_Dump(fp2, cm, shmx); fclose(fp2); */
1483 #endif
1484 
1485   sc = alpha[0][jp_0][Lp_0];
1486 
1487   free(el_scA);
1488   free(yvalidA);
1489 
1490   if (ret_b != NULL)  *ret_b   = b;    /* b is -1 if local begins are off */
1491   if (ret_sc != NULL) *ret_sc = sc;
1492 
1493   ESL_DPRINTF1(("#DEBUG: cm_CYKInsideAlignHB return sc: %f\n", sc));
1494   return eslOK;
1495 
1496  ERROR:
1497   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
1498 }
1499 
1500 /* Function: cm_InsideAlign()
1501  * Date:     EPN, Mon Nov 19 06:21:51 2007
1502  *
1503  * Purpose:  Run the inside algorithm on a target sequence
1504  *           without using bands. The full target sequence
1505  *           1..L is aligned (only full alignments will
1506  *           contribute to the Inside score).
1507  *
1508  *           Identical to cm_InsideAlignHB() but no bands
1509  *           are used.
1510  *
1511  *           Very similar to cm_CYKInsideAlign(), see 'Purpose'
1512  *           of that function for more details. Only differences with
1513  *           that function is:
1514  *           - we do Inside, not CYK
1515  *           - can't return a shadow matrix (we're not aligning)
1516  *           - doesn't return bsc, b info about local begins
1517  *
1518  *           This function complements cm_OutsideAlign().
1519  *
1520  *           Note: renamed from FastInsideAlign() [EPN, Wed Sep 14 06:13:37 2011].
1521  *
1522  * Args:     cm         - the model
1523  *           errbuf     - char buffer for reporting errors
1524  *           dsq        - the digitized sequence
1525  *           L          - target sequence length
1526  *           size_limit - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
1527  *           mx         - the dp matrix, grown and filled here
1528  *           ret_sc     - RETURN: log P(S|M)/P(S|R), as a bit score
1529  *
1530  * Returns:  <eslOK> on success.
1531  *
1532  * Throws:   <eslERANGE> if required CM_MX size exceeds <size_limit>
1533  *           In this case alignment has been aborted, ret_sc is not valid
1534  */
1535 int
cm_InsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,CM_MX * mx,float * ret_sc)1536 cm_InsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_MX *mx, float *ret_sc)
1537 {
1538   int      status;
1539   int      v,y,z;	/* indices for states  */
1540   int      j,d,i,k;	/* indices in sequence dimensions */
1541   float    sc;		/* the final score */
1542   float    tsc;         /* a temporary variable holding a transition score */
1543   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
1544   float    bsc;		/* summed score for using all local begins */
1545   float   *el_scA;      /* [0..d..L-1] probability of local end emissions of length d */
1546   int      sd;          /* StateDelta(cm->sttype[v]) */
1547   int      sdl;         /* StateLeftDelta(cm->sttype[v] */
1548   int      sdr;         /* StateRightDelta(cm->sttype[v] */
1549   int      j_sdr;       /* j - sdr */
1550   int      d_sd;        /* d - sd */
1551 
1552   /* the DP matrix */
1553   float ***alpha = mx->dp;     /* pointer to the alpha DP matrix */
1554 
1555   /* Allocations and initializations */
1556   bsc = IMPOSSIBLE;
1557 
1558   /* grow the matrix based on the current sequence */
1559   if((status = cm_mx_GrowTo(cm, mx, errbuf, L, size_limit)) != eslOK) return status;
1560 
1561   /* initialize all cells of the matrix to IMPOSSIBLE */
1562   esl_vec_FSet(alpha[0][0], mx->ncells_valid, IMPOSSIBLE);
1563 
1564   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
1565   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
1566   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
1567 
1568   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores */
1569   if(cm->flags & CMH_LOCAL_END) {
1570     for (j = 0; j <= L; j++) {
1571       for (d = 0;  d <= j; d++) alpha[cm->M][j][d] = el_scA[d];
1572     }
1573   }
1574 
1575   /* Main recursion  */
1576   for (v = cm->M-1; v >= 0; v--) {
1577     float const *esc_v = cm->oesc[v];
1578     float const *tsc_v = cm->tsc[v];
1579     sd   = StateDelta(cm->sttype[v]);
1580     sdl  = StateLeftDelta(cm->sttype[v]);
1581     sdr  = StateRightDelta(cm->sttype[v]);
1582 
1583     /* re-initialize the J deck if we can do a local end from v */
1584     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
1585       for (j = 0; j <= L; j++) {
1586 	for (d = sd; d <= j; d++) alpha[v][j][d] = el_scA[d-sd] + cm->endsc[v];
1587       }
1588     }
1589     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
1590 
1591     /* E_st: easy, no children, and d must be 0 for all valid j */
1592     if(cm->sttype[v] == E_st) {
1593       for (j = 0; j <= L; j++) {
1594 	alpha[v][j][0] = 0.;
1595 	/* rest of deck remains IMPOSSIBLE */
1596       }
1597     }
1598     else if(cm->sttype[v] == IL_st) {
1599       /* update alpha[v][jp_v][dp_v] cells, for IL states, loop nesting order is:
1600        * for j { for d { for y { } } } because they can self transit, and a
1601        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1602        * before can start calc'ing for alpha[v][j][d+1] */
1603       for (j = sdr; j <= L; j++) {
1604 	j_sdr = j - sdr;
1605 	for (d = sd; d <= j; d++) {
1606 	  d_sd = d - sd;
1607 	  i    = j - d + 1;
1608 	  for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
1609 	    y = cm->cfirst[v] + yoffset;
1610 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], alpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
1611 	  }
1612 	  alpha[v][j][d] += esc_v[dsq[i--]];
1613 	  alpha[v][j][d]  = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
1614 	}
1615       }
1616     }
1617     else if(cm->sttype[v] == IR_st) {
1618       /* update alpha[v][jp_v][dp_v] cells, for IR states, loop nesting order is:
1619        * for j { for d { for y { } } } because they can self transit, and a
1620        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1621        * before can start calc'ing for alpha[v][j][d+1] */
1622       for (j = sdr; j <= L; j++) {
1623 	j_sdr = j - sdr;
1624 	for (d = sd; d <= j; d++) {
1625 	  d_sd = d - sd;
1626 	  i    = j - d + 1;
1627 	  for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
1628 	    y = cm->cfirst[v] + yoffset;
1629 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], alpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
1630 	  }
1631 	  alpha[v][j][d] += esc_v[dsq[j]];
1632 	  alpha[v][j][d] = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
1633 	}
1634       }
1635     }
1636     else if(cm->sttype[v] != B_st) { /* entered if state v is (! IL && ! IR && ! B) */
1637       /* ML, MP, MR, D, S, E states cannot self transit, this means that all cells
1638        * in alpha[v] are independent of each other, only depending on alpha[y] for previously calc'ed y.
1639        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
1640        * for y { for j { for d { } } }
1641        */
1642       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
1643 	yoffset = y - cm->cfirst[v];
1644 	tsc = tsc_v[yoffset];
1645 
1646 	for (j = sdr; j <= L; j++) {
1647 	  j_sdr = j - sdr;
1648 	  for (d = sd; d <= j; d++) {
1649 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], (alpha[y][j_sdr][d-sd] + tsc));;
1650 	  }
1651 	}
1652       }
1653       /* add in emission score, if any */
1654       switch(cm->sttype[v]) {
1655       case ML_st:
1656 	for (j = 0; j <= L; j++) {
1657 	  i = j - sdl;
1658 	  for (d = sd; d <= j; d++)
1659 	    alpha[v][j][d] += esc_v[dsq[j-d+1]];
1660 	}
1661 	break;
1662       case MR_st:
1663 	for (j = 0; j <= L; j++) {
1664 	  for (d = sd; d <= j; d++)
1665 	    alpha[v][j][d] += esc_v[dsq[j]];
1666 	}
1667 	break;
1668       case MP_st:
1669 	for (j = 0; j <= L; j++) {
1670 	  i = j - sdl;
1671 	  for (d = sd; d <= j; d++)
1672 	    alpha[v][j][d] += esc_v[dsq[i--]*cm->abc->Kp+dsq[j]];
1673 	}
1674       default:
1675 	break;
1676       }
1677       /* ensure all cells are >= IMPOSSIBLE */
1678       for (j = 0; j <= L; j++) {
1679 	for (d = 0; d <= j; d++)
1680 	  alpha[v][j][d] = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
1681       }
1682     }
1683     else { /* B_st */
1684       y = cm->cfirst[v]; /* left  subtree */
1685       z = cm->cnum[v];   /* right subtree */
1686 
1687       for (j = 0; j <= L; j++) {
1688 	for (d = 0; d <= j; d++) {
1689 	  for (k = 0; k <= d; k++) {
1690 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], alpha[y][j-k][d-k] + alpha[z][j][k]);
1691 	  }
1692 	}
1693       }
1694     }
1695 
1696     /* allow local begins, if nec */
1697     if ((cm->flags & CMH_LOCAL_BEGIN) &&
1698 	(NOT_IMPOSSIBLE(cm->beginsc[v]))) {
1699       /* add in score for local begin getting us to the root. */
1700       bsc = FLogsum(bsc, alpha[v][L][L] + cm->beginsc[v]);
1701     }
1702   } /* finished calculating deck v. */
1703 
1704   /* include the bsc as part of alpha[0][L][L] */
1705   alpha[0][L][L] = FLogsum(alpha[0][L][L], bsc);
1706 
1707 #if eslDEBUGLEVEL >= 3
1708   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
1709   /* FILE *fp1; fp1 = fopen("tmp.std_imx", "w");   cm_mx_Dump(fp1, mx); fclose(fp1); */
1710 #endif
1711 
1712   sc =  alpha[0][L][L];
1713 
1714   free(el_scA);
1715 
1716   if(ret_sc != NULL) *ret_sc = sc;
1717 
1718   ESL_DPRINTF1(("#DEBUG: cm_InsideAlign() return sc: %f\n", sc));
1719   return eslOK;
1720 
1721  ERROR:
1722   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
1723 }
1724 
1725 /* Function: cm_InsideAlignHB()
1726  * Date:     EPN, Thu Nov  8 18:24:41 2007
1727  *
1728  * Purpose:  Run the inside algorithm on a target sequence using bands
1729  *           in the j and d dimensions of the DP matrix. Bands
1730  *           were obtained from an HMM Forward-Backward parse
1731  *           of the target sequence. Uses float log odds scores.
1732  *           The full target sequence 1..L is aligned (only full
1733  *           alignments will contribute to the Inside score).
1734  *
1735  *           Very similar to cm_CYKInsideAlignHB(), see 'Purpose'
1736  *           of that function for more details. Only differences with
1737  *           that function is:
1738  *           - we do Inside, not CYK
1739  *           - can't return a shadow matrix (we're not aligning)
1740  *           - doesn't return bsc, b info about local begins
1741  *
1742  *           This function complements cm_OutsideAlignHB().
1743  *
1744  *           Note: renamed from FastInsideAlignHB() [EPN, Wed Sep 14 06:13:08 2011].
1745  *
1746  * Args:     cm         - the model
1747  *           errbuf     - char buffer for reporting errors
1748  *           dsq        - the digitized sequence
1749  *           L          - target sequence length
1750  *           size_limit - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
1751  *           mx         - the dp matrix, only cells within bands in cp9b will be valid
1752  *           ret_sc     - RETURN: log P(S|M)/P(S|R) (given bands), as a bit score
1753  *
1754  * Returns:  <eslOK> on success.
1755  *
1756  * Throws:  <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
1757  *          <eslEINVAL> if the full sequence is not within the bands for state 0
1758  *          In either case alignment has been aborted, ret_sc is not valid
1759  */
1760 int
cm_InsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,CM_HB_MX * mx,float * ret_sc)1761 cm_InsideAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_HB_MX *mx, float *ret_sc)
1762 {
1763   int      status;
1764   int      v,y,z;	/* indices for states  */
1765   int      j,d,i,k;	/* indices in sequence dimensions */
1766   float    sc;		/* the final score */
1767   float    tsc;         /* a temporary variable holding a transition score */
1768   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
1769   float    bsc;		/* summed score for using all local begins */
1770   float   *el_scA;      /* [0..d..L-1] probability of local end emissions of length d */
1771   int      sd;          /* StateDelta(cm->sttype[v]) */
1772   int      sdr;         /* StateRightDelta(cm->sttype[v] */
1773   int      j_sdr;       /* j - sdr */
1774 
1775   /* indices used for handling band-offset issues, and in the depths of the DP recursion */
1776   int     *yvalidA;            /* [0..MAXCONNECT-1] TRUE if v->yoffset is legal transition (within bands) */
1777   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
1778   int      jp_y_sdr;           /* jp_y - sdr */
1779   int      jn, jx;             /* current minimum/maximum j allowed */
1780   int      jpn, jpx;           /* minimum/maximum jp_v */
1781   int      dp_v, dp_y;         /* d index for state v/y in alpha w/mem eff bands */
1782   int      dn, dx;             /* current minimum/maximum d allowed */
1783   int      dp_y_sd;            /* dp_y - sd */
1784   int      dpn, dpx;           /* minimum/maximum dp_v */
1785   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
1786   int      kn, kx;             /* current minimum/maximum k value */
1787   int      Lp;                 /* L also changes depending on state */
1788   int      yvalid_idx;         /* for keeping track of which children are valid */
1789   int      yvalid_ct;          /* for keeping track of which children are valid */
1790   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
1791   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
1792 
1793   /* ptrs to cp9b info, for convenience */
1794   CP9Bands_t *cp9b = cm->cp9b;
1795   int     *jmin  = cp9b->jmin;
1796   int     *jmax  = cp9b->jmax;
1797   int    **hdmin = cp9b->hdmin;
1798   int    **hdmax = cp9b->hdmax;
1799 
1800   /* the DP matrix */
1801   float ***alpha = mx->dp;     /* pointer to the alpha DP matrix */
1802 
1803   /* Allocations and initializations */
1804   bsc = IMPOSSIBLE;
1805   /* ensure a full alignment to ROOT_S (v==0) is allowed by the bands */
1806   if (cp9b->jmin[0] > L || cp9b->jmax[0] < L)
1807     ESL_FAIL(eslEINVAL, errbuf, "cm_InsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
1808   jp_0 = L - jmin[0];
1809   if (cp9b->hdmin[0][jp_0] > L || cp9b->hdmax[0][jp_0] < L)
1810     ESL_FAIL(eslEINVAL, errbuf, "cm_InsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][jp_0], cp9b->hdmax[0][jp_0]);
1811   Lp_0 = L - hdmin[0][jp_0];
1812 
1813   /* grow the matrix based on the current sequence and bands */
1814   if((status = cm_hb_mx_GrowTo(cm, mx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
1815 
1816   /* initialize all cells of the matrix to IMPOSSIBLE */
1817   esl_vec_FSet(alpha[0][0], mx->ncells_valid, IMPOSSIBLE);
1818 
1819   /* precalcuate all possible local end scores, for local end emits of 1..W residues */
1820   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
1821   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
1822 
1823   /* yvalidA[0..cnum[v]] will hold TRUE for states y for which a transition is legal
1824    * (some transitions are impossible due to the bands)
1825    */
1826   ESL_ALLOC(yvalidA, sizeof(int) * MAXCONNECT);
1827   esl_vec_ISet(yvalidA, MAXCONNECT, FALSE);
1828 
1829 
1830   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores,
1831    * Note: we could optimize by skipping this step and using el_scA[d] to
1832    * initialize ELs for each state in the first step of the main recursion
1833    * below. We fill in the EL deck here for completeness and so that
1834    * a check of this alpha matrix with a CYKOutside matrix will pass.
1835    */
1836   if(cm->flags & CMH_LOCAL_END) {
1837     for (j = 0; j <= L; j++) {
1838       for (d = 0;  d <= j; d++) alpha[cm->M][j][d] = el_scA[d];
1839     }
1840   }
1841 
1842   /* Main recursion  */
1843   for (v = cm->M-1; v >= 0; v--) {
1844     float const *esc_v = cm->oesc[v];
1845     float const *tsc_v = cm->tsc[v];
1846     sd   = StateDelta(cm->sttype[v]);
1847     sdr  = StateRightDelta(cm->sttype[v]);
1848     jn   = jmin[v];
1849     jx   = jmax[v];
1850 
1851     /* re-initialize the J deck if we can do a local end from v */
1852     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
1853       for (j = jmin[v]; j <= jmax[v]; j++) {
1854 	ESL_DASSERT1((j >= 0 && j <= L));
1855 	jp_v  = j - jmin[v];
1856 	for (dp_v = 0, d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; dp_v++, d++)
1857 	  alpha[v][jp_v][dp_v] = el_scA[d-sd] + cm->endsc[v];
1858       }
1859     }
1860     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
1861 
1862     /* E_st: easy, no children, and d must be 0 for all valid j */
1863     if(cm->sttype[v] == E_st) {
1864       for (j = jmin[v]; j <= jmax[v]; j++) {
1865 	jp_v = j-jmin[v];
1866 	ESL_DASSERT1((hdmin[v][jp_v] == 0));
1867 	ESL_DASSERT1((hdmax[v][jp_v] == 0));
1868 	alpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
1869 	/* rest of deck remains IMPOSSIBLE */
1870       }
1871     }
1872     else if(cm->sttype[v] == IL_st) {
1873       /* update alpha[v][jp_v][dp_v] cells, for IL states, loop nesting order is:
1874        * for j { for d { for y { } } } because they can self transit, and a
1875        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1876        * before can start calc'ing for alpha[v][j][d+1] */
1877       for (j = jmin[v]; j <= jmax[v]; j++) {
1878 	ESL_DASSERT1((j >= 0 && j <= L));
1879 	jp_v = j - jmin[v];
1880 	yvalid_ct = 0;
1881 	j_sdr = j - sdr;
1882 
1883 	/* determine which children y we can legally transit to for v, j */
1884 	for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
1885 	  if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr is valid for state y? */
1886 
1887 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
1888 	  i = j - d + 1;
1889 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
1890 	  for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
1891 	    yoffset = yvalidA[yvalid_idx];
1892 	    y = cm->cfirst[v] + yoffset;
1893 	    jp_y_sdr = j - jmin[y] - sdr;
1894 
1895 	    if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
1896 	      dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
1897 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
1898 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
1899 	      alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], alpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]);
1900 	    }
1901 	  }
1902 	  alpha[v][jp_v][dp_v] += esc_v[dsq[i--]];
1903 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
1904 	}
1905       }
1906     }
1907     else if(cm->sttype[v] == IR_st) {
1908       /* update alpha[v][jp_v][dp_v] cells, for IR states, loop nesting order is:
1909        * for j { for d { for y { } } } because they can self transit, and a
1910        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1911        * before can start calc'ing for alpha[v][j][d+1] */
1912       for (j = jmin[v]; j <= jmax[v]; j++) {
1913 	ESL_DASSERT1((j >= 0 && j <= L));
1914 	jp_v = j - jmin[v];
1915 	yvalid_ct = 0;
1916 	j_sdr = j - sdr;
1917 
1918 	/* determine which children y we can legally transit to for v, j */
1919 	for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
1920 	  if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr is valid for state y? */
1921 
1922 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
1923 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
1924 	  for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
1925 	    yoffset = yvalidA[yvalid_idx];
1926 	    y = cm->cfirst[v] + yoffset;
1927 	    jp_y_sdr = j - jmin[y] - sdr;
1928 
1929 	    if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
1930 	      dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
1931 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
1932 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
1933 	      alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], alpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]);
1934 	    }
1935 	  }
1936 	  alpha[v][jp_v][dp_v] += esc_v[dsq[j]];
1937 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
1938 	}
1939       }
1940     }
1941     else if(cm->sttype[v] != B_st) { /* entered if state v is (! IL && ! IR && ! B) */
1942       /* ML, MP, MR, D, S, E states cannot self transit, this means that all cells
1943        * in alpha[v] are independent of each other, only depending on alpha[y] for previously calc'ed y.
1944        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
1945        * for y { for j { for d { } } }
1946        */
1947       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
1948 	yoffset = y - cm->cfirst[v];
1949 	tsc = tsc_v[yoffset];
1950 
1951 	jn = ESL_MAX(jmin[v], jmin[y]+sdr);
1952 	jx = ESL_MIN(jmax[v], jmax[y]+sdr);
1953 	jpn = jn - jmin[v];
1954 	jpx = jx - jmin[v];
1955 	jp_y_sdr = jn - jmin[y] - sdr;
1956 
1957 	for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
1958 	  ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
1959 	  ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
1960 
1961 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
1962 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
1963 	  dpn     = dn - hdmin[v][jp_v];
1964 	  dpx     = dx - hdmin[v][jp_v];
1965 	  dp_y_sd = dn - hdmin[y][jp_y_sdr] - sd;
1966 
1967 	  for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
1968 	    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
1969 	    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
1970 	    alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], (alpha[y][jp_y_sdr][dp_y_sd] + tsc));;
1971 	  }
1972 	}
1973       }
1974       /* add in emission score, if any */
1975       switch(cm->sttype[v]) {
1976       case ML_st:
1977 	for (j = jmin[v]; j <= jmax[v]; j++) {
1978 	  jp_v  = j - jmin[v];
1979 	  i     = j - hdmin[v][jp_v] + 1;
1980 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1981 	    alpha[v][jp_v][dp_v] += esc_v[dsq[i--]];
1982 	}
1983 	break;
1984       case MR_st:
1985 	for (j = jmin[v]; j <= jmax[v]; j++) {
1986 	  jp_v  = j - jmin[v];
1987 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1988 	    alpha[v][jp_v][dp_v] += esc_v[dsq[j]];
1989 	}
1990 	break;
1991       case MP_st:
1992 	for (j = jmin[v]; j <= jmax[v]; j++) {
1993 	  jp_v  = j - jmin[v];
1994 	  i     = j - hdmin[v][jp_v] + 1;
1995 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
1996 	    alpha[v][jp_v][dp_v] += esc_v[dsq[i--]*cm->abc->Kp+dsq[j]];
1997 	  }
1998       default: /* no emission */
1999 	break;
2000       }
2001       /* ensure all cells are >= IMPOSSIBLE */
2002       for (j = jmin[v]; j <= jmax[v]; j++) {
2003 	ESL_DASSERT1((j >= 0 && j <= L));
2004 	jp_v  = j - jmin[v];
2005 	for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
2006 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
2007       }
2008     }
2009     else { /* B_st */
2010       y = cm->cfirst[v]; /* left  subtree */
2011       z = cm->cnum[v];   /* right subtree */
2012 
2013       /* Any valid j must be within both state v and state z's j band
2014        * I think jmin[v] <= jmin[z] is guaranteed by the way bands are
2015        * constructed, but we'll check anyway.
2016        */
2017       jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
2018       jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
2019       /* the main j loop */
2020       for (j = jn; j <= jx; j++) {
2021 	ESL_DASSERT1((j >= 0 && j <= L));
2022 	jp_v = j - jmin[v];
2023 	jp_y = j - jmin[y];
2024 	jp_z = j - jmin[z];
2025 	kn = ((j-jmax[y]) > (hdmin[z][jp_z])) ? (j-jmax[y]) : hdmin[z][jp_z];
2026         kn = ESL_MAX(kn, 0); /* kn must be non-negative, added with fix to bug i36 */
2027 	/* kn satisfies inequalities (1) and (3) (listed below)*/
2028 	kx = ( jp_y       < (hdmax[z][jp_z])) ?  jp_y       : hdmax[z][jp_z];
2029 	/* kn satisfies inequalities (2) and (4) (listed below)*/
2030 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
2031 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
2032 
2033 	  /* Find the first k value that implies a valid cell in the y and z decks.
2034 	   * This k must satisfy the following 6 inequalities (some may be redundant):
2035 	   * (1) k >= j-jmax[y];
2036 	   * (2) k <= j-jmin[y];
2037 	   *     1 and 2 guarantee (j-k) is within state y's j band
2038 	   *
2039 	   * (3) k >= hdmin[z][j-jmin[z]];
2040 	   * (4) k <= hdmax[z][j-jmin[z]];
2041 	   *     3 and 4 guarantee k is within z's j=(j), d band
2042 	   *
2043 	   * (5) k >= d-hdmax[y][j-jmin[y]-k];
2044 	   * (6) k <= d-hdmin[y][j-jmin[y]-k];
2045 	   *     5 and 6 guarantee (d-k) is within state y's j=(j-k) d band
2046 	   *
2047 	   * kn and kx were set above (outside (for (dp_v...) loop) that
2048 	   * satisfy 1-4 (b/c 1-4 are d-independent and k-independent)
2049 	   * RHS of inequalities 5 and 6 are dependent on k, so we check
2050 	   * for these within the next for loop.
2051 	   */
2052 	  for(k = kn; k <= kx; k++) {
2053 	    if((k >= d - hdmax[y][jp_y-k]) && k <= d - hdmin[y][jp_y-k]) {
2054 	      /* for current k, all 6 inequalities have been satisified
2055 	       * so we know the cells corresponding to the platonic
2056 	       * matrix cells alpha[v][j][d], alpha[y][j-k][d-k], and
2057 	       * alpha[z][j][k] are all within the bands. These
2058 	       * cells correspond to alpha[v][jp_v][dp_v],
2059 	       * alpha[y][jp_y-k][d-hdmin[jp_y-k]-k],
2060 	       * and alpha[z][jp_z][k-hdmin[jp_z]];
2061 	       */
2062 	      kp_z = k-hdmin[z][jp_z];
2063 	      dp_y = d-hdmin[y][jp_y-k];
2064 
2065 	      alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], alpha[y][jp_y-k][dp_y - k] + alpha[z][jp_z][kp_z]);
2066 	    }
2067 	  }
2068 	}
2069       }
2070     }
2071 
2072     /* allow local begins, if nec */
2073     if((cm->flags & CMH_LOCAL_BEGIN) &&
2074        NOT_IMPOSSIBLE(cm->beginsc[v])) {
2075       if(L >= jmin[v] && L <= jmax[v]) {
2076 	jp_v = L - jmin[v];
2077 	Lp   = L - hdmin[v][jp_v];
2078 	if(L >= hdmin[v][jp_v] && L <= hdmax[v][jp_v]) {
2079 	  /* If we get here alpha[v][jp_v][Lp] is a valid cell
2080 	   * in the banded alpha matrix, corresponding to
2081 	   * alpha[v][L][L] in the platonic matrix.
2082 	   */
2083 	  /* Check for local begin getting us to the root.
2084 	   */
2085 	  bsc = FLogsum(bsc, (alpha[v][jp_v][Lp] + cm->beginsc[v]));
2086 	}
2087       }
2088     }
2089   } /* end loop over all v */
2090 
2091   /* include the bsc as part of alpha[0][jp_0][Lp_0] */
2092   if (NOT_IMPOSSIBLE(bsc)) {
2093     alpha[0][jp_0][Lp_0] = FLogsum(alpha[0][jp_0][Lp_0], bsc);
2094   }
2095 
2096 #if eslDEBUGLEVEL >= 3
2097   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
2098   /* FILE *fp; fp = fopen("tmp.std_ihbmx", "w"); cm_hb_mx_Dump(fp, mx); fclose(fp); */
2099 #endif
2100 
2101   sc = alpha[0][jp_0][Lp_0];
2102 
2103   free(el_scA);
2104   free(yvalidA);
2105 
2106   if(ret_sc != NULL) *ret_sc = sc;
2107 
2108   ESL_DPRINTF1(("#DEBUG: cm_InsideAlignHB() return sc: %f\n", sc));
2109   return eslOK;
2110 
2111  ERROR:
2112   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
2113 }
2114 
2115 /* Function: cm_OptAccAlign()
2116  * Date:     EPN, Sun Nov 18 20:45:22 2007
2117  *           EPN, Sat Oct  1 05:57:49 2011 (updated to use emit matrices)
2118  *
2119  * Purpose:  Run the Holmes/Durbin optimal accuracy algorithm
2120  *           on a full target sequence 1..L, given a pre-filled
2121  *           posterior matrix. Uses float log odds scores.
2122  *           Non-banded version. See cm_OptAccAlignHB() for
2123  *           HMM banded version.
2124  *
2125  *           A CM_EMIT_MX matrix <emit_mx> must be passed in, filled by
2126  *           cm_EmitterPosterior(), with values:
2127  *
2128  *           l_pp[v][i]: log of the posterior probability that state v
2129  *           emitted residue i leftwise either at (if a match state)
2130  *           or *after* (if an insert state) the left consensus
2131  *           position modeled by state v's node.
2132  *
2133  *           r_pp[v][i]: log of the posterior probability that state v
2134  *           emitted residue i rightwise either at (if a match
2135  *           state) or *before* (if an insert state) the right
2136  *           consensus position modeled by state v's node.
2137  *
2138  *           l_pp[v] is NULL for states that do not emit leftwise
2139  *           r_pp[v] is NULL for states that do not emit rightwise
2140  *
2141  *           Additionally, a CM_MX DP matrix <mx> and CM_SHADOW_MX
2142  *           <shmx> must be passed in. <shmx> will be expanded and
2143  *           filled here with traceback pointers to allow the
2144  *           optimally accurate parsetree to be recovered in
2145  *           cm_alignT() and <mx> will be expanded and filled with the
2146  *           optimal accuracy scores, where:
2147  *
2148  *           mx->dp[v][j][d]: log of the sum of the posterior
2149  *           probabilities of emitting residues i..j in the subtree
2150  *           rooted at v.
2151  *
2152  *           The optimally accurate parsetree, i.e. the parsetree that
2153  *           maximizes the sum of the posterior probabilities of all
2154  *           1..L emitted residues, will be found.
2155  *
2156  *           Previously (infernal versions 1.0->1.0.2) this function
2157  *           (then named optimal_accuracy_align()) used the posterior
2158  *           matrix instead of the emit matrices used here, and thus
2159  *           did not determine (or at least was not guaranteed to
2160  *           determine) the optimally accurate parsetree as defined
2161  *           above. Instead it determined the parsetree that maximized
2162  *           the probability mass that passed through emitting states.
2163  *
2164  *           Local begins are handled the same as they are in
2165  *           cm_CYKInsideAlign(), see that function's purpose for specifics.
2166  *
2167  *           Note: Renamed from optimal_accuracy_align() [EPN, Wed Sep
2168  *	     14 06:16:38 2011].  Corrected to use emit matrices
2169  *	     instead of a posterior matrix [EPN, Sat Oct 1 06:04:34
2170  *	     2011].
2171  *
2172  * Args:     cm        - the model
2173  *           errbuf    - char buffer for reporting errors
2174  *           dsq       - the digitaized sequence [i0..j0]
2175  *           L         - length of the dsq
2176  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
2177  *           mx        - the DP matrix to fill in
2178  *           shmx      - the shadow matrix to fill in
2179  *           emit_mx   - pre-filled emit matrix
2180  *           ret_b     - RETURN: local begin state if local begins are on
2181  *           ret_pp    - RETURN: average posterior probability of aligned residues
2182  *                       in the optimally accurate parsetree
2183  *
2184  * Returns: <eslOK>     on success.
2185  * Throws:  <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
2186  *          If !eslOK: alignment has been aborted, ret_* variables are not valid
2187  */
2188 int
cm_OptAccAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,CM_MX * mx,CM_SHADOW_MX * shmx,CM_EMIT_MX * emit_mx,int * ret_b,float * ret_pp)2189 cm_OptAccAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_MX *mx, CM_SHADOW_MX *shmx,
2190 	       CM_EMIT_MX *emit_mx, int *ret_b, float *ret_pp)
2191 {
2192   int      status;
2193   int      v,y,z;	/* indices for states  */
2194   int      j,d,i,k;	/* indices in sequence dimensions */
2195   float    sc;		/* a temporary variable holding a score */
2196   float    pp;		/* average posterior probability of all emitted residues */
2197   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
2198   int      b;		/* best local begin state */
2199   float    bsc;		/* score for using the best local begin state */
2200   int      sd;          /* StateDelta(cm->sttype[v]) */
2201   int      sdr;         /* StateRightDelta(cm->sttype[v] */
2202   int      j_sdr;       /* j - sdr */
2203   int      d_sd;        /* d - sd */
2204   int      have_el;     /* TRUE if CM has local ends on, otherwise FALSE */
2205 
2206   /* the DP matrices */
2207   float ***alpha   = mx->dp;       /* pointer to the alpha DP matrix, we'll store optimal parse in  */
2208   float  **l_pp    = emit_mx->l_pp; /* pointer to the prefilled posterior values for left  emitters */
2209   float  **r_pp    = emit_mx->r_pp; /* pointer to the prefilled posterior values for right emitters */
2210   char  ***yshadow = shmx->yshadow; /* pointer to the yshadow matrix */
2211   int   ***kshadow = shmx->kshadow; /* pointer to the kshadow matrix */
2212 
2213   /* Allocations and initializations  */
2214   b   = -1;
2215   bsc = IMPOSSIBLE;
2216 
2217   /* grow the matrices based on the current sequence and bands */
2218   if((status = cm_mx_GrowTo       (cm, mx,   errbuf, L, size_limit)) != eslOK) return status;
2219   if((status = cm_shadow_mx_GrowTo(cm, shmx, errbuf, L, size_limit)) != eslOK) return status;
2220 
2221   /* initialize all cells of the matrix */
2222   if(  mx->ncells_valid   > 0) esl_vec_FSet(mx->dp_mem, mx->ncells_valid, IMPOSSIBLE);
2223   if(shmx->y_ncells_valid > 0) for(i = 0; i < shmx->y_ncells_valid; i++) shmx->yshadow_mem[i] = USED_EL;
2224   /* for B states, shadow matrix holds k, length of right fragment, this will almost certainly be overwritten */
2225   if(shmx->k_ncells_valid > 0) esl_vec_ISet(shmx->kshadow_mem, shmx->k_ncells_valid, 0);
2226 
2227   /* a special optimal accuracy specific step, initialize yshadow intelligently for d == 0
2228    * (necessary b/c zero length parsetees have 0 emits and so always score IMPOSSIBLE)
2229    */
2230   if((status = cm_InitializeOptAccShadowDZero(cm, errbuf, yshadow, L)) != eslOK) return status;
2231 
2232   /* start with the EL state */
2233   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
2234   if(have_el && l_pp[cm->M] != NULL) {
2235     for (j = 0; j <= L; j++) {
2236       alpha[cm->M][j][0] = l_pp[cm->M][0];
2237       i = j;
2238       for (d = 1; d <= j; d++) {
2239 	alpha[cm->M][j][d] = FLogsum(alpha[cm->M][j][d-1], l_pp[cm->M][i--]);
2240       }
2241     }
2242   }
2243 
2244   /* Main recursion */
2245   for (v = cm->M-1; v >= 0; v--) {
2246     sd   = StateDelta(cm->sttype[v]);
2247     sdr  = StateRightDelta(cm->sttype[v]);
2248 
2249     /* re-initialize if we can do a local end from v */
2250     if(have_el && NOT_IMPOSSIBLE(cm->endsc[v])) {
2251       for (j = 0; j <= L; j++) {
2252 	/* copy values from saved EL deck */
2253 	for (d = sd; d <= j; d++) {
2254 	  alpha[v][j][d] = alpha[cm->M][j-sdr][d-sd];
2255 	  /* yshadow[v][j][d] remains USED_EL */
2256 	}
2257       }
2258     }
2259     /* note there's no E state update here, those cells all remain IMPOSSIBLE */
2260 
2261     /* we have to separate out IL_st and IR_st because IL use emit_mx->l_pp and IR use emit_mx->r_pp */
2262     if(cm->sttype[v] == IL_st) {
2263       /* update alpha[v][j][d] cells, for IL states, loop nesting order is:
2264        * for j { for d { for y { } } } because they can self transit, and a
2265        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
2266        * before can start calc'ing for alpha[v][j][d+1] */
2267       for (j = 1; j <= L; j++) {
2268 	i    = j;
2269 	d_sd = 0;
2270 	for (d = 1; d <= j; d++, d_sd++, i--) {
2271 	  for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
2272 	    y = cm->cfirst[v] + yoffset;
2273 	    if ((sc = alpha[y][j][d_sd]) > alpha[v][j][d]) {
2274 	      alpha[v][j][d]   = sc;
2275 	      yshadow[v][j][d] = yoffset;
2276 	    }
2277 	  }
2278 	  alpha[v][j][d] = FLogsum(alpha[v][j][d], l_pp[v][i]);
2279 	  alpha[v][j][d] = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
2280 	  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
2281 	   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
2282 	   */
2283 	  if((! have_el) && yshadow[v][j][d] == USED_EL && d > sd) {
2284 	    alpha[v][j][d] = IMPOSSIBLE;
2285 	  }
2286 	}
2287       }
2288     }
2289     if(cm->sttype[v] == IR_st) {
2290       /* IR: same loop nesting order as for IL for same reason, see IL comment above */
2291       j_sdr = 0;
2292       for (j = 1; j <= L; j++, j_sdr++) {
2293 	d_sd  = 0;
2294 	for (d = 1; d <= j; d++, d_sd++) {
2295 	  for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
2296 	    y = cm->cfirst[v] + yoffset;
2297 	    if ((sc = alpha[y][j_sdr][d_sd]) > alpha[v][j][d]) {
2298 	      alpha[v][j][d]   = sc;
2299 	      yshadow[v][j][d] = yoffset;
2300 	    }
2301 	  }
2302 	  alpha[v][j][d] = FLogsum(alpha[v][j][d], r_pp[v][j]);
2303 	  alpha[v][j][d] = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
2304 	  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
2305 	   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
2306 	   */
2307 	  if((! have_el) && yshadow[v][j][d] == USED_EL && d > sd) {
2308 	    alpha[v][j][d] = IMPOSSIBLE;
2309 	  }
2310 	}
2311       }
2312     }
2313     else if(cm->sttype[v] != B_st) { /* entered if state v is (! IL && ! IR && ! B) */
2314       /* ML, MP, MR, D, S, E states cannot self transit, so all cells
2315        * in alpha[v] are independent of each other, only depending on
2316        * alpha[y] for previously calc'ed y.  We can do the for loops
2317        * in any nesting order, this implementation does what I think
2318        * is most efficient: for y { for j { for d { } } }.
2319        */
2320       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
2321 	yoffset = y - cm->cfirst[v];
2322 	j_sdr = 0;
2323 	for (j = sdr; j <= L; j++, j_sdr++) {
2324 	  d_sd = 0;
2325 	  for (d = sd; d <= j; d++, d_sd++) {
2326 	    if((sc = alpha[y][j_sdr][d_sd]) > alpha[v][j][d]) {
2327 	      alpha[v][j][d] = sc;
2328 	      yshadow[v][j][d] = yoffset;
2329 	    }
2330 	  }
2331 	}
2332       }
2333       /* add in emission score, if any */
2334       switch(cm->sttype[v]) {
2335       case ML_st:
2336 	for (j = 1; j <= L; j++) {
2337 	  i = j;
2338 	  for (d = sd; d <= j; d++, i--) {
2339 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], l_pp[v][i]);
2340 	  }
2341 	}
2342 	break;
2343       case MR_st:
2344 	for (j = 1; j <= L; j++) {
2345 	  for (d = sd; d <= j; d++) {
2346 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], r_pp[v][j]);
2347 	  }
2348 	}
2349 	break;
2350       case MP_st:
2351 	for (j = 2; j <= L; j++) {
2352 	  i = j-1;
2353 	  for (d = sd; d <= j; d++, i--) {
2354 	    alpha[v][j][d] = FLogsum(alpha[v][j][d], FLogsum(l_pp[v][i], r_pp[v][j]));
2355 	  }
2356 	}
2357 	break;
2358       default:
2359 	break;
2360       }
2361       /* ensure all cells are >= IMPOSSIBLE */
2362       for (j = 0; j <= L; j++) {
2363 	for (d = 0; d <= j; d++)
2364 	  alpha[v][j][d] = ESL_MAX(alpha[v][j][d], IMPOSSIBLE);
2365       }
2366       /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
2367        * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
2368        */
2369       if(! have_el && sd > 0) { /* this is only necessary for emitters (MP, ML, MR in this context) */
2370 	for (j = 0; j <= L; j++) {
2371 	  for (d = sd+1; d <= j; d++) {
2372 	    if(yshadow[v][j][d] == USED_EL) alpha[v][j][d] = IMPOSSIBLE;
2373 	  }
2374 	}
2375       }
2376     }
2377     else { /* B_st */
2378       y = cm->cfirst[v]; /* left  subtree */
2379       z = cm->cnum[v];   /* right subtree */
2380       for (j = 0; j <= L; j++) {
2381 	for (d = 0; d <= j; d++) {
2382 	  for (k = 0; k <= d; k++) {
2383 	    if ((sc = FLogsum(alpha[y][j-k][d-k], alpha[z][j][k])) > alpha[v][j][d])
2384 	      {
2385 		if(((d == k) || (NOT_IMPOSSIBLE(alpha[y][j-k][d-k]))) && /* left  subtree can only be IMPOSSIBLE if it has length 0 (in which case d==k, and d-k=0) */
2386 		   ((k == 0) || (NOT_IMPOSSIBLE(alpha[z][j][k]))))       /* right subtree can only be IMPOSSIBLE if it has length 0 (in which case k==0) */
2387 		  {
2388 		    alpha[v][j][d]   = sc;
2389 		    kshadow[v][j][d] = k;
2390 
2391 		    /* Note: we take the logsum here, because we're keeping track of the
2392 		     * log of the summed probability of emitting all residues up to this
2393 		     * point, (from i..j) from left subtree (i=j-d+1..j-k) and from the
2394 		     * right subtree. (j-k+1..j)
2395 		     *
2396 		     * EPN, Tue Nov 17 10:53:13 2009 Bug fix post infernal-1.0.2 release in "if(((sc = FLogsum..."
2397 		     * statement above.  This is i15 in BUGTRAX, fixed as of svn revision 3056 in infernal 1.0 release
2398 		     * branch, and revision 3057 in infernal trunk.  Bug description: See analogous section and comment
2399 		     * in cm_OptAccAlignHB() above. In that function, in very rare cases (1 case in the 1.1 million SSU
2400 		     * sequences in release 10_15 of RDP), this step will add two alpha values (alpha[y][j-k][d-k] for
2401 		     * left subtree, and alpha[z][j][k] for right subtree) where one of them is IMPOSSIBLE and the
2402 		     * corresponding subtree length ('d-k' in left subtree, or 'k' if right subtree) is non-zero, yet
2403 		     * their FLogsum (which equals the value of the non-IMPOSSIBLE cell) is sufficiently high to be part
2404 		     * of the optimally accurate traceback. This will probably cause a seg fault later b/c it implies a
2405 		     * left or right subtree that is IMPOSSIBLE. It is okay if an IMPOSSIBLE scoring subtree has length
2406 		     * 0 b/c 0 residues will contribute nothing to the summed log probability (nothing corresponds to a
2407 		     * score of IMPOSSIBLE). We handle this case here by explicitly checking if either left or right
2408 		     * subtree cell is IMPOSSIBLE with non-zero length before reassigning alpha[v][j][d].  I'm not sure
2409 		     * if this is even possible in the non-banded function (this function), but I included the analogous
2410 		     * fix here (the NOT_IMPOSSIBLE() calls) in case it was ever possible. This will slow down the
2411 		     * implementation, but I'd rather err on the side of caution here, since we don't care so much about
2412 		     * speed in the non-banded function, and b/c finding this bug again if the non-banded function can
2413 		     * have the bug would be a pain in the ass.
2414 		     */
2415 		  }
2416 	      }
2417 	  }
2418 	}
2419       }
2420     }
2421     /* allow local begins, if nec */
2422     if((cm->flags & CMH_LOCAL_BEGIN) && (NOT_IMPOSSIBLE(cm->beginsc[v]))) {
2423       if (alpha[v][L][L] > bsc) {
2424 	b   = v;
2425 	bsc = alpha[v][L][L];
2426       }
2427     }
2428   } /* finished calculating deck v. */
2429 
2430   /* If local begins are on, the only way out of ROOT_S is via a local
2431    * begin, so update the optimal score and put a flag in the shadow
2432    * matrix telling cm_alignT() to use the b we return.
2433    *
2434    * Note that because we're in OptAcc alpha[0][L][L] will already be
2435    * equal to bsc because transition scores (and thus impossible
2436    * transitions out of ROOT_S) have no effect on the score, so
2437    * whereas we can check to see if 'bsc > alpha[0][L][L]' at an
2438    * analogous point in CYK before setting the USED_LOCAL_BEGIN
2439    * flag, we can't here because it would be FALSE.
2440    */
2441   if(NOT_IMPOSSIBLE(bsc) && (cm->flags & CMH_LOCAL_BEGIN)) {
2442     alpha[0][L][L]   = bsc;
2443     yshadow[0][L][L] = USED_LOCAL_BEGIN;
2444   }
2445 
2446 #if eslDEBUGLEVEL >= 3
2447   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
2448   /* FILE *fp1; fp1 = fopen("tmp.std_oamx",   "w"); cm_mx_Dump(fp1, mx); fclose(fp1); */
2449   /* FILE *fp2; fp2 = fopen("tmp.std_oashmx", "w"); cm_shadow_mx_Dump(fp2, cm, shmx); fclose(fp2); */
2450 #endif
2451 
2452   sc = alpha[0][L][L];
2453 
2454   /* convert sc, a log probability, into the average posterior probability of all L aligned residues */
2455   pp = sreEXP2(sc) / (float) L;
2456 
2457   if(ret_b != NULL)  *ret_b  = b;    /* b is -1 if local ends are off */
2458   if(ret_pp != NULL) *ret_pp = pp;
2459 
2460   ESL_DPRINTF1(("#DEBUG: cm_OptAccAlign return pp: %f\n", pp));
2461   return eslOK;
2462 }
2463 
2464 
2465 /* Function: cm_OptAccAlignHB()
2466  * Date:     EPN, Thu Nov 15 10:48:37 2007
2467  *
2468  * Purpose:  Same as cm_OptAccAlign() but HMM bands are used.
2469  *           See cm_OptAccAlign()'s Purpose for more information.
2470  *
2471  *           Note: Renamed from optimal_accuracy_align_hb() [EPN, Wed Sep 14 06:16:06 2011].
2472  *
2473  * Args:     cm        - the model
2474  *           errbuf    - char buffer for reporting errors
2475  *           dsq       - the digitaized sequence [i0..j0]
2476  *           L         - length of the dsq
2477  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
2478  *           mx        - the DP matrix to fill in
2479  *           shmx      - the shadow matrix to fill in
2480  *           emit_mx   - pre-filled emit matrix
2481  *           ret_b     - RETURN: local begin state if local begins are on
2482  *           ret_pp    - RETURN: average posterior probability of aligned residues
2483  *                       in the optimally accurate parsetree
2484  *
2485  * Returns: <eslOK> on success.
2486  *
2487  * Throws:  <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
2488  *          If !eslOK: alignment has been aborted, ret_* variables are not valid
2489  */
2490 int
cm_OptAccAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,CM_HB_MX * mx,CM_HB_SHADOW_MX * shmx,CM_HB_EMIT_MX * emit_mx,int * ret_b,float * ret_pp)2491 cm_OptAccAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx,
2492 		 CM_HB_EMIT_MX *emit_mx, int *ret_b, float *ret_pp)
2493 {
2494   int      status;
2495   int      v,y,z;	/* indices for states  */
2496   int      j,d,i,k;	/* indices in sequence dimensions */
2497   float    sc;		/* a temporary variable holding a score */
2498   float    pp;		/* average posterior probability of all emitted residues */
2499   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
2500   int      b;		/* best local begin state */
2501   float    bsc;		/* score for using the best local begin state */
2502   int     *yvalidA;     /* [0..MAXCONNECT-1] TRUE if v->yoffset is legal transition (within bands) */
2503   int      jp_0;        /* L offset in ROOT_S's (v==0) j band */
2504   int      Lp_0;        /* L offset in ROOT_S's (v==0) d band */
2505   int      Lp;          /* L offset in any state v's d band */
2506   int      sd;          /* StateDelta(cm->sttype[v]) */
2507   int      sdr;         /* StateRightDelta(cm->sttype[v] */
2508   int      have_el;     /* TRUE if CM has local ends on, otherwise FALSE */
2509 
2510   /* indices used for handling band-offset issues, and in the depths of the DP recursion */
2511   int      ip_v;               /* offset i index for state v */
2512   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
2513   int      jp_y_sdr;           /* jp_y - sdr */
2514   int      j_sdr;              /* j - sdr */
2515   int      jn, jx;             /* current minimum/maximum j allowed */
2516   int      jpn, jpx;           /* minimum/maximum jp_v */
2517   int      dp_v, dp_y;         /* d index for state v/y in alpha w/mem eff bands */
2518   int      dn, dx;             /* current minimum/maximum d allowed */
2519   int      dp_y_sd;            /* dp_y - sd */
2520   int      dpn, dpx;           /* minimum/maximum dp_v */
2521   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
2522   int      jp_y_minus_k;       /* jp_y - k, used in one loop, stored to avoid calc'ing twice */
2523   int      dp_y_minus_k;       /* dp_y - k, used in one loop, stored to avoid calc'ing twice */
2524   int      kn, kx;             /* current minimum/maximum k value */
2525   int      yvalid_idx;         /* for keeping track of which children are valid */
2526   int      yvalid_ct;          /* for keeping track of which children are valid */
2527 
2528   /* variables used for memory efficient bands */
2529   /* ptrs to cp9b info, for convenience */
2530   CP9Bands_t *cp9b  = cm->cp9b;
2531   int        *imin  = cp9b->imin;
2532   int        *imax  = cp9b->imax;
2533   int        *jmin  = cp9b->jmin;
2534   int        *jmax  = cp9b->jmax;
2535   int       **hdmin = cp9b->hdmin;
2536   int       **hdmax = cp9b->hdmax;
2537 
2538   /* the DP matrices */
2539   float ***alpha   = mx->dp;        /* pointer to the alpha DP matrix, we'll store optimal parse in  */
2540   float  **l_pp    = emit_mx->l_pp; /* pointer to the prefilled posterior values for left  emitters */
2541   float  **r_pp    = emit_mx->r_pp; /* pointer to the prefilled posterior values for right emitters */
2542   char  ***yshadow = shmx->yshadow; /* pointer to the yshadow matrix */
2543   int   ***kshadow = shmx->kshadow; /* pointer to the kshadow matrix */
2544 
2545   /* Allocations and initializations  */
2546   b   = -1;
2547   bsc = IMPOSSIBLE;
2548   if (cp9b->jmin[0] > L || cp9b->jmax[0] < L)
2549     ESL_FAIL(eslEINVAL, errbuf, "cm_OptAccAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
2550   jp_0 = L - jmin[0];
2551   if (cp9b->hdmin[0][jp_0] > L || cp9b->hdmax[0][jp_0] < L)
2552     ESL_FAIL(eslEINVAL, errbuf, "cm_OptAccAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][jp_0], cp9b->hdmax[0][jp_0]);
2553   Lp_0 = L - hdmin[0][jp_0];
2554 
2555   /* grow the matrices based on the current sequence and bands */
2556   if((status = cm_hb_mx_GrowTo       (cm, mx,   errbuf, cp9b, L, size_limit)) != eslOK) return status;
2557   if((status = cm_hb_shadow_mx_GrowTo(cm, shmx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
2558 
2559   /* initialize all cells of the matrix */
2560   if(  mx->ncells_valid   > 0) esl_vec_FSet(mx->dp_mem, mx->ncells_valid, IMPOSSIBLE);
2561   if(shmx->y_ncells_valid > 0) for(i = 0; i < shmx->y_ncells_valid; i++) shmx->yshadow_mem[i] = USED_EL;
2562   /* for B states, shadow matrix holds k, length of right fragment, this will almost certainly be overwritten */
2563   if(shmx->k_ncells_valid > 0) esl_vec_ISet(shmx->kshadow_mem, shmx->k_ncells_valid, 0);
2564 
2565   /* a special optimal accuracy specific step, initialize yshadow intelligently for d == 0
2566    * (necessary b/c zero length parsetees have 0 emits and so always score IMPOSSIBLE)
2567    */
2568   if((status = cm_InitializeOptAccShadowDZeroHB(cm, cp9b, errbuf, yshadow, L)) != eslOK) return status;
2569 
2570   /* start with the EL state (remember, cm->M deck is non-banded) */
2571   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
2572   if(have_el && l_pp[cm->M] != NULL) {
2573     for (j = 0; j <= L; j++) {
2574       alpha[cm->M][j][0] = l_pp[cm->M][0];
2575       i = j;
2576       for (d = 1; d <= j; d++) {
2577 	alpha[cm->M][j][d] = FLogsum(alpha[cm->M][j][d-1], l_pp[cm->M][i--]);
2578       }
2579     }
2580   }
2581 
2582   /* yvalidA[0..cnum[v]] will hold TRUE for states y for which a transition is legal
2583    * (some transitions are impossible due to the bands) */
2584   ESL_ALLOC(yvalidA, sizeof(int) * MAXCONNECT);
2585   esl_vec_ISet(yvalidA, MAXCONNECT, FALSE);
2586 
2587   /* Main recursion */
2588   for (v = cm->M-1; v >= 0; v--) {
2589     sd   = StateDelta(cm->sttype[v]);
2590     sdr  = StateRightDelta(cm->sttype[v]);
2591 
2592     /* re-initialize if we can do a local end from v */
2593     if(have_el && NOT_IMPOSSIBLE(cm->endsc[v])) {
2594       for (j = jmin[v]; j <= jmax[v]; j++) {
2595 	jp_v  = j - jmin[v];
2596 	/* copy values from saved EL deck */
2597 	for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
2598 	  dp_v = d - hdmin[v][jp_v];
2599 	  alpha[v][jp_v][dp_v] = alpha[cm->M][j-sdr][d-sd];
2600 	  /* yshadow[v][jp_v][dp_v] remains USED_EL */
2601 	}
2602       }
2603     }
2604     /* note there's no E state update here, those cells all remain IMPOSSIBLE */
2605 
2606     /* we could separate out IL_st and IR_st, but I don't it makes a significant difference in run time */
2607     if(cm->sttype[v] == IL_st || cm->sttype[v] == IR_st) {
2608       /* update alpha[v][jp_v][dp_v] cells, for IL/IR states, loop nesting order is:
2609        * for j { for d { for y { } } } because they can self transit, and a
2610        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
2611        * before can start calc'ing for alpha[v][j][d+1] */
2612       for (j = jmin[v]; j <= jmax[v]; j++) {
2613 	jp_v = j - jmin[v];
2614 	yvalid_ct = 0;
2615 	j_sdr = j - sdr;
2616 
2617 	/* determine which children y we can legally transit to for v, j */
2618 	for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
2619 	  if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr valid for state y? */
2620 
2621 	i = j - hdmin[v][jp_v] + 1;
2622 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++, i--) { /* for each valid d for v, j */
2623 	  /*printf("v: %4d j: %4d (%4d..%4d) d: %4d (%4d..%4d) i: %4d (%4d..%4d)\n",
2624 	    v, j, jmin[v], jmax[v], d, hdmin[v][jp_v], hdmax[v][jp_v], i, imin[v], imax[v]);*/
2625 	  assert(i >= imin[v] && i <= imax[v]);
2626 	  ESL_DASSERT1((i >= imin[v] && i <= imax[v]));
2627 	  ip_v = i - imin[v];         /* i index for state v in emit_mx->l_pp */
2628 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
2629 	  for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
2630 	    yoffset = yvalidA[yvalid_idx];
2631 	    y = cm->cfirst[v] + yoffset;
2632 	    jp_y_sdr = j - jmin[y] - sdr;
2633 
2634 	    if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
2635 	      dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
2636 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2637 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2638 	      if ((sc = alpha[y][jp_y_sdr][dp_y_sd]) > alpha[v][jp_v][dp_v])
2639 		{
2640 		  alpha[v][jp_v][dp_v] = sc;
2641 		  yshadow[v][jp_v][dp_v]    = yoffset;
2642 		}
2643 	    }
2644 	  }
2645 	  if(cm->sttype[v] == IL_st) alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], l_pp[v][ip_v]);
2646 	  else                       alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], r_pp[v][jp_v]);
2647 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
2648 	  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
2649 	   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
2650 	   */
2651 	  if((! have_el) && yshadow[v][jp_v][dp_v] == USED_EL && d > sd) {
2652 	    alpha[v][jp_v][dp_v] = IMPOSSIBLE;
2653 	  }
2654 	}
2655       }
2656     }
2657     else if(cm->sttype[v] != B_st) { /* entered if state v is (! IL && ! IR && ! B) */
2658       /* ML, MP, MR, D, S, E states cannot self transit, this means that all cells
2659        * in alpha[v] are independent of each other, only depending on alpha[y] for previously calc'ed y.
2660        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
2661        * for y { for j { for d { } } }
2662        */
2663       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
2664 	yoffset = y - cm->cfirst[v];
2665 
2666 	jn = ESL_MAX(jmin[v], jmin[y]+sdr);
2667 	jx = ESL_MIN(jmax[v], jmax[y]+sdr);
2668 	jpn = jn - jmin[v];
2669 	jpx = jx - jmin[v];
2670 	jp_y_sdr = jn - jmin[y] - sdr;
2671 
2672 	for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
2673 	  ESL_DASSERT1((jp_v     >= 0 && jp_v     <= (jmax[v]-jmin[v])));
2674 	  ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
2675 
2676 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
2677 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
2678 	  dpn     = dn - hdmin[v][jp_v];
2679 	  dpx     = dx - hdmin[v][jp_v];
2680 	  dp_y_sd = dn - hdmin[y][jp_y_sdr] - sd;
2681 
2682 	  for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
2683 	    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2684 	    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2685 	    if((sc = alpha[y][jp_y_sdr][dp_y_sd]) > alpha[v][jp_v][dp_v]) {
2686 	      alpha[v][jp_v][dp_v] = sc;
2687 	      yshadow[v][jp_v][dp_v] = yoffset;
2688 	    }
2689 	  }
2690 	}
2691       }
2692       /* add in emission score, if any */
2693       switch(cm->sttype[v]) {
2694       case ML_st:
2695 	for (j = jmin[v]; j <= jmax[v]; j++) {
2696 	  jp_v = j - jmin[v];
2697 	  i    = j - hdmin[v][jp_v] + 1;
2698 	  ip_v = i - imin[v];
2699 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++, ip_v--) {
2700 	    /*printf("v: %4d j: %4d (%4d..%4d) d: %4d (%4d..%4d) i: %4d (%4d..%4d)\n",
2701 	      v, j, jmin[v], jmax[v], d, hdmin[v][jp_v], hdmax[v][jp_v], i, imin[v], imax[v]);*/
2702 	    assert(ip_v >= 0 && ip_v <= (imax[v] - imin[v]));
2703 	    ESL_DASSERT1((ip_v >= 0 && ip_v <= (imax[v] - imin[v])));
2704 	    alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], l_pp[v][ip_v]);
2705 	  }
2706 	}
2707 	break;
2708       case MR_st:
2709 	for (j = jmin[v]; j <= jmax[v]; j++) {
2710 	  jp_v  = j - jmin[v];
2711 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
2712 	    alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], r_pp[v][jp_v]);
2713 	  }
2714 	}
2715 	break;
2716       case MP_st:
2717 	for (j = jmin[v]; j <= jmax[v]; j++) {
2718 	  jp_v = j - jmin[v];
2719 	  i    = j - hdmin[v][jp_v] + 1;
2720 	  ip_v = i - imin[v];
2721 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++, ip_v--) {
2722 	    assert(ip_v >= 0 && ip_v <= (imax[v] - imin[v]));
2723 	    ESL_DASSERT1((ip_v >= 0 && ip_v <= (imax[v] - imin[v])));
2724 	    alpha[v][jp_v][dp_v] = FLogsum(alpha[v][jp_v][dp_v], FLogsum(l_pp[v][ip_v], r_pp[v][jp_v]));
2725 	  }
2726 	}
2727 	break;
2728       default:
2729 	break;
2730       }
2731       /* ensure all cells are >= IMPOSSIBLE */
2732       for (j = jmin[v]; j <= jmax[v]; j++) {
2733 	jp_v = j - jmin[v];
2734 	for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++)
2735 	  alpha[v][jp_v][dp_v] = ESL_MAX(alpha[v][jp_v][dp_v], IMPOSSIBLE);
2736       }
2737       /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
2738        * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
2739        */
2740       if(! have_el && sd > 0) { /* this is only necessary for emitters (MP, ML, MR in this context) */
2741 	for (j = jmin[v]; j <= jmax[v]; j++) {
2742 	  jp_v = j - jmin[v];
2743 	  d = ESL_MAX(sd+1, hdmin[v][jp_v]);
2744 	  dp_v = d - hdmin[v][jp_v];
2745 	  for (; d <= hdmax[v][jp_v]; d++) {
2746 	    if(yshadow[v][jp_v][dp_v] == USED_EL) alpha[v][jp_v][dp_v] = IMPOSSIBLE;
2747 	    dp_v++;
2748 	  }
2749 	}
2750       }
2751     }
2752     else { /* B_st */
2753       y = cm->cfirst[v]; /* left  subtree */
2754       z = cm->cnum[v];   /* right subtree */
2755 
2756       /* Any valid j must be within both state v and state z's j band
2757        * I think jmin[v] <= jmin[z] is guaranteed by the way bands are
2758        * constructed, but we'll check anyway.
2759        */
2760       jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
2761       jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
2762       /* the main j loop */
2763       for (j = jn; j <= jx; j++) {
2764 	jp_v = j - jmin[v];
2765 	jp_y = j - jmin[y];
2766 	jp_z = j - jmin[z];
2767 	kn = ((j-jmax[y]) > (hdmin[z][jp_z])) ? (j-jmax[y]) : hdmin[z][jp_z];
2768         kn = ESL_MAX(kn, 0); /* kn must be non-negative, added with fix to bug i36 */
2769 	/* kn satisfies inequalities (1) and (3) (listed below)*/
2770 	kx = ( jp_y       < (hdmax[z][jp_z])) ?  jp_y       : hdmax[z][jp_z];
2771 	/* kn satisfies inequalities (2) and (4) (listed below)*/
2772 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
2773 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
2774 
2775 	  /* Find the first k value that implies a valid cell in the y and z decks.
2776 	   * This k must satisfy the following 6 inequalities (some may be redundant):
2777 	   * (1) k >= j-jmax[y];
2778 	   * (2) k <= j-jmin[y];
2779 	   *     1 and 2 guarantee (j-k) is within state y's j band
2780 	   *
2781 	   * (3) k >= hdmin[z][j-jmin[z]];
2782 	   * (4) k <= hdmax[z][j-jmin[z]];
2783 	   *     3 and 4 guarantee k is within z's j=(j), d band
2784 	   *
2785 	   * (5) k >= d-hdmax[y][j-jmin[y]-k];
2786 	   * (6) k <= d-hdmin[y][j-jmin[y]-k];
2787 	   *     5 and 6 guarantee (d-k) is within state y's j=(j-k) d band
2788 	   *
2789 	   * kn and kx were set above (outside (for (dp_v...) loop) that
2790 	   * satisfy 1-4 (b/c 1-4 are d-independent and k-independent)
2791 	   * RHS of inequalities 5 and 6 are dependent on k, so we check
2792 	   * for these within the next for loop.
2793 	   */
2794 	  for(k = kn; k <= kx; k++) {
2795 	    if((k >= d - hdmax[y][jp_y-k]) && k <= d - hdmin[y][jp_y-k]) {
2796 	      /* for current k, all 6 inequalities have been satisified
2797 	       * so we know the cells corresponding to the platonic
2798 	       * matrix cells alpha[v][j][d], alpha[y][j-k][d-k], and
2799 	       * alpha[z][j][k] are all within the bands. These
2800 	       * cells correspond to alpha[v][jp_v][dp_v],
2801 	       * alpha[y][jp_y-k][d-hdmin[jp_y-k]-k],
2802 	       * and alpha[z][jp_z][k-hdmin[jp_z]];
2803 	       */
2804 	      kp_z = k-hdmin[z][jp_z];
2805 	      dp_y = d-hdmin[y][jp_y-k];
2806 	      jp_y_minus_k = jp_y-k;
2807 	      dp_y_minus_k = dp_y-k;
2808 
2809 	      if((sc = FLogsum(alpha[y][jp_y_minus_k][dp_y_minus_k], alpha[z][jp_z][kp_z])) > alpha[v][jp_v][dp_v])
2810 		{
2811 		  if(((d == k) || (NOT_IMPOSSIBLE(alpha[y][jp_y_minus_k][dp_y_minus_k]))) && /* left subtree can only be IMPOSSIBLE if it has length 0 (in which case d==k, and d-k=0) */
2812 		     ((k == 0) || (NOT_IMPOSSIBLE(alpha[z][jp_z][kp_z]))))                   /* right subtree can only be IMPOSSIBLE if it has length 0 (in which case k==0) */
2813 		    {
2814 		      alpha[v][jp_v][dp_v] = sc;
2815 		      kshadow[v][jp_v][dp_v] = k;
2816 		      /* Note: we take the logsum here, because we're
2817 		       * keeping track of the log of the summed probability
2818 		       * of emitting all residues up to this point, (from
2819 		       * i..j) from left subtree (i=j-d+1..j-k) and from the
2820 		       * right subtree. (j-k+1..j).
2821 		       *
2822 		       * EPN, Tue Nov 17 09:57:59 2009:
2823 		       * Bug fix post infernal-1.0.2 release.
2824 		       * Addition of 2-line if statement beginning
2825 		       * "if(((d == k...)"  This is i15 in BUGTRAX,
2826 		       * fixed as of svn revision 3056 in infernal 1.0
2827 		       * release branch, and revision 3057 in infernal
2828 		       * trunk.
2829 		       * Bug description: In very rare cases (1 case
2830 		       * in the 1.1 million SSU sequences in release
2831 		       * 10_15 of RDP), this step will add two alpha
2832 		       * values (alpha[y][jp_y_minus_k][dp_y_minus_k]
2833 		       * for left subtree, and alpha[z][jp_z][kp_z]
2834 		       * for right subtree) where one of them is
2835 		       * IMPOSSIBLE and the corresponding subtree
2836 		       * length ('d-k' in left subtree, or 'k' if right
2837 		       * subtree) is non-zero, yet their FLogsum
2838 		       * (which equals the value of the non-IMPOSSIBLE
2839 		       * cell) is sufficiently high to be part of the
2840 		       * optimally accurate traceback. This will
2841 		       * probably cause a seg fault later b/c it
2842 		       * implies a left or right subtree that is
2843 		       * IMPOSSIBLE. It is okay if an IMPOSSIBLE
2844 		       * scoring subtree has length 0 b/c 0 residues
2845 		       * will contribute nothing to the summed log
2846 		       * probability (nothing corresponds to a score
2847 		       * of IMPOSSIBLE). We handle this case here by
2848 		       * explicitly checking if either left or right
2849 		       * subtree cell is IMPOSSIBLE with non-zero
2850 		       * length before reassigning
2851 		       * alpha[v][jp_v][dp_v].
2852 		       */
2853 		    }
2854 		}
2855 	    }
2856 	  }
2857 	}
2858       }
2859     }
2860     /* allow local begins, if nec */
2861     if((cm->flags & CMH_LOCAL_BEGIN) && (NOT_IMPOSSIBLE(cm->beginsc[v]))) {
2862       if(L >= jmin[v] && L <= jmax[v]) {
2863 	jp_v = L - jmin[v];
2864 	Lp   = L - hdmin[v][jp_v];
2865 	if(L >= hdmin[v][jp_v] && L <= hdmax[v][jp_v]) {
2866 	/* If we get here alpha[v][jp_v][Wp] is a valid cell
2867 	 * in the banded alpha matrix, corresponding to
2868 	 * alpha[v][j0][W] in the platonic matrix.
2869 	 */
2870 	/* Check for local begin getting us to the root.
2871 	 * This is "off-shadow": if/when we trace back, we'll handle this
2872 	 * case separately (and we'll know to do it because we'll immediately
2873 	 * see a USED_LOCAL_BEGIN flag in the shadow matrix, telling us
2874 	 * to jump right to state b; see below)
2875 	 */
2876 	  if (alpha[v][jp_v][Lp] > bsc) {
2877 	    b   = v;
2878 	    bsc = alpha[v][jp_v][Lp];
2879 	  }
2880 	}
2881       }
2882     }
2883   } /* end loop over all v */
2884 
2885   /* If local begins are on, the only way out of ROOT_S is via a local
2886    * begin, so update the optimal score and put a flag in the shadow
2887    * matrix telling cm_alignT() to use the b we return.
2888    *
2889    * Note that because we're in OptAcc alpha[0][L][L] will already be
2890    * equal to bsc because transition scores (and thus impossible
2891    * transitions out of ROOT_S) have no effect on the score, so
2892    * whereas we can check to see if 'bsc > alpha[0][L][L]' at an
2893    * analogous point in CYK before setting the USED_LOCAL_BEGIN
2894    * flag, we can't here because it would be FALSE.
2895    */
2896   if(NOT_IMPOSSIBLE(bsc) && (cm->flags & CMH_LOCAL_BEGIN)) {
2897     alpha[0][jp_0][Lp_0]   = bsc;
2898     yshadow[0][jp_0][Lp_0] = USED_LOCAL_BEGIN;
2899   }
2900 
2901 #if eslDEBUGLEVEL >= 3
2902   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
2903   /* FILE *fp1; fp1 = fopen("tmp.std_oahbmx",   "w"); cm_hb_mx_Dump(fp1, mx); fclose(fp1); */
2904   /* FILE *fp2; fp2 = fopen("tmp.std_oahbshmx", "w"); cm_hb_shadow_mx_Dump(fp2, cm, shmx); fclose(fp2); */
2905 #endif
2906 
2907   sc = alpha[0][jp_0][Lp_0];
2908 
2909   /* convert sc, a log probability, into the average posterior probability of all L aligned residues */
2910   pp = sreEXP2(sc) / (float) L;
2911 
2912   free(yvalidA);
2913 
2914   if (ret_b  != NULL)   *ret_b  = b;   /* b is -1 if local begins are off */
2915   if (ret_pp != NULL)   *ret_pp = pp;
2916 
2917   ESL_DPRINTF1(("#DEBUG: cm_OptAccAlignHB return pp: %f\n", pp));
2918   return eslOK;
2919 
2920  ERROR:
2921   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
2922   return status; /* never reached */
2923 }
2924 
2925 /* Function: cm_CYKOutsideAlign()
2926  * Date:     EPN, Wed Sep 14 14:01:36 2011
2927  *
2928  * Purpose:  Run the outside CYK algorithm on a target sequence.
2929  *           Non-banded version. See cm_CYKOutsideAlignHB() for
2930  *           the HMM banded version. The full target sequence
2931  *           1..L is aligned.
2932  *
2933  *           Very similar to cm_OutsideAlign() but calculates
2934  *           beta[v][j][d]: log probability of the most likely parse
2935  *           that emits 1..i-1 and j+1..L and passes through v at j,d
2936  *           (where i = j-d+1) instead of the log of the summed
2937  *           probability of all such parses. This means max operations
2938  *           are used instead of logsums.
2939  *
2940  *           This function complements cm_CYKInsideAlign() but is
2941  *           mainly useful for testing and reference. It can be used
2942  *           with do_check=TRUE to verify that the implementation of
2943  *           CYKInside and CYKOutside are consistent.  Because the
2944  *           structure of CYKInside and Inside, and CYKOutside and
2945  *           Outside are so similar and the CYK variants are easier to
2946  *           debug (because only the optimal parsetree is considered
2947  *           instead of all possible parsetrees) this function can be
2948  *           useful for finding bugs in Outside.  It is currently not
2949  *           hooked up to any of the main Infernal programs.
2950  *
2951  * Args:     cm        - the model
2952  *           errbuf    - char buffer for reporting errors
2953  *           dsq       - the digitized sequence
2954  *           L         - length of the dsq to align
2955  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
2956  *           do_check  - TRUE to attempt to check
2957  *           mx        - the dp matrix, grown and filled here
2958  *           inscyk_mx - the pre-filled dp matrix from the CYK Inside calculation
2959  *                       (performed by cm_CYKInsideAlign(), required)
2960  *           ret_sc    - RETURN: log P(S|M)/P(S|R), as a bit score, this is from
2961  *                       inscyk_mx IF local ends are on (see comments towards
2962  *                       end of function).
2963  *
2964  * Returns:  <eslOK> on success.
2965  *
2966  * Throws:   <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
2967  *           <eslEMEM>   if we run out of memory
2968  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
2969  *           In any of these cases, alignment has been aborted, ret_sc is not valid.
2970  */
2971 int
cm_CYKOutsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_check,CM_MX * mx,CM_MX * inscyk_mx,float * ret_sc)2972 cm_CYKOutsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check,
2973 		   CM_MX *mx, CM_MX *inscyk_mx, float *ret_sc)
2974 {
2975   int      status;
2976   int      v,y,z;	       /* indices for states */
2977   int      j,d,i,k;	       /* indices in sequence dimensions */
2978   float    sc;     	       /* a temporary score */
2979   float  **esc_vAA;            /* ptr to cm->oesc, optimized emission scores */
2980   float    escore;	       /* an emission score, tmp variable */
2981   int      voffset;	       /* index of v in t_v(y) transition scores */
2982   int      sd;                 /* StateDelta(cm->sttype[y]) */
2983   int      sdr;                /* StateRightDelta(cm->sttype[y] */
2984 
2985   /* variables used only if do_check==TRUE */
2986   int      fail1_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 1*/
2987   int      fail2_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 2*/
2988   int      fail3_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 3*/
2989   int      n;                  /* counter over nodes, used only if do_check = TRUE */
2990   int      num_split_states;   /* temp variable used only if do_check = TRUE */
2991   float    diff;               /* temp variable used only if do_check = TRUE */
2992   int      vmax;               /* i, offset in the matrix */
2993   float    tol;                /* tolerance for differences in bit scores */
2994   int     *optseen = NULL;     /* [1..i..W] TRUE is residue i is accounted for in optimal parse */
2995 
2996   /* the DP matrices */
2997   float ***beta  = mx->dp;        /* pointer to the Oustide DP mx */
2998   float ***alpha = inscyk_mx->dp; /* pointer to the CYK Inside DP mx (already calc'ed and passed in) */
2999 
3000   /* Allocations and initializations */
3001   esc_vAA = cm->oesc;            /* a ptr to the optimized emission scores */
3002 
3003   /* grow the matrix based on the current sequence */
3004   if((status = cm_mx_GrowTo(cm, mx, errbuf, L, size_limit)) != eslOK) return status;
3005 
3006   /* initialize all cells of the matrix to IMPOSSIBLE */
3007   esl_vec_FSet(beta[0][0], mx->ncells_valid, IMPOSSIBLE);
3008 
3009   /* now set beta[0][L][L] to 0., all (valid) parses must end there */
3010   beta[0][L][L] = 0.;
3011 
3012   /* initialize local begin cells for emitting full seq (j==L && d == L) */
3013   if (cm->flags & CMH_LOCAL_BEGIN) {
3014     for (v = 1; v < cm->M; v++)
3015       beta[v][L][L] = cm->beginsc[v];
3016   }
3017 
3018   /* Main recursion */
3019   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
3020     sd  = StateDelta(cm->sttype[v]);
3021     sdr = StateRightDelta(cm->sttype[v]);
3022 
3023     if (cm->stid[v] == BEGL_S) { /* BEGL_S */
3024       y = cm->plast[v];	/* the parent bifurcation    */
3025       z = cm->cnum[y];	/* the other (right) S state */
3026       for(j = 0; j <= L; j++) {
3027 	for (d = 0; d <= j; d++) {
3028 	  for (k = 0; k <= (L-j); k++) {
3029 	    beta[v][j][d] = ESL_MAX(beta[v][j][d], (beta[y][j+k][d+k] + alpha[z][j+k][k]));
3030 	  }
3031 	}
3032       }
3033     } /* end of 'if (cm->stid[v] == BEGL_S */
3034     else if (cm->stid[v] == BEGR_S) {
3035       y = cm->plast[v];	  /* the parent bifurcation    */
3036       z = cm->cfirst[y];  /* the other (left) S state  */
3037       for(j = 0; j <= L; j++) {
3038 	for (d = 0; d <= j; d++) {
3039 	  for (k = 0; k <= (j-d); k++) {
3040  	    beta[v][j][d] = ESL_MAX(beta[v][j][d], (beta[y][j][d+k] + alpha[z][j-d][k]));
3041 	  }
3042 	}
3043       }
3044     } /* end of 'else if (cm->stid[v] == BEGR_S */
3045     else { /* (cm->sttype[v] != BEGL_S && cm->sttype[v] != BEGR_S */
3046       for (j = L; j >= 0; j--) {
3047 	i = 1;
3048 	for (d = j; d >= 0; d--, i++) {
3049 	  for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
3050 	    voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
3051 	    sd  = StateDelta(cm->sttype[y]);
3052 	    sdr = StateRightDelta(cm->sttype[y]);
3053 	    switch(cm->sttype[y]) {
3054 	      case MP_st:
3055 		if (j == L || d == j) continue; /* boundary condition */
3056 		escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
3057 		beta[v][j][d] = ESL_MAX(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore));
3058 		break;
3059 
3060 	      case ML_st:
3061 	      case IL_st:
3062 		if (d == j) continue;	/* boundary condition (note when j=0, d=0*/
3063 		escore = esc_vAA[y][dsq[i-1]];
3064 		beta[v][j][d] = ESL_MAX(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore));
3065 		break;
3066 
3067 	      case MR_st:
3068 	      case IR_st:
3069 		if (j == L) continue;
3070 		escore = esc_vAA[y][dsq[j+1]];
3071 		beta[v][j][d] = ESL_MAX(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore));
3072 		break;
3073 
3074 	      case S_st:
3075 	      case E_st:
3076 	      case D_st:
3077 		beta[v][j][d] = ESL_MAX(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset]));
3078 		break;
3079 	    } /* end of switch(cm->sttype[y] */
3080 	  } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
3081 	  if (beta[v][j][d] < IMPOSSIBLE) beta[v][j][d] = IMPOSSIBLE;
3082 	} /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
3083       } /* end loop over j. We know beta for this whole state */
3084     } /* end of 'else if cm->sttype[v] != BEGL_S, BEGR_S */
3085     /* we're done calculating deck v for everything but local begins */
3086 
3087     /* deal with local alignment end transitions v->EL (EL = deck at M.) */
3088     if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
3089       sdr = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
3090       sd  = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
3091 
3092       for (j = 0; j <= L; j++) {
3093 	for (d = 0; d <= j; d++) {
3094 	  i = j-d+1;
3095 	  switch (cm->sttype[v]) {
3096 	  case MP_st:
3097 	    if (j == L || d == j) continue; /* boundary condition */
3098 	    escore = esc_vAA[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
3099 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]
3100 							    + escore));
3101 	    break;
3102 	  case ML_st:
3103 	  case IL_st:
3104 	    if (d == j) continue;
3105 	    escore = esc_vAA[v][dsq[i-1]];
3106 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]
3107 							    + escore));
3108 	    break;
3109 	  case MR_st:
3110 	  case IR_st:
3111 	    if (j == L) continue;
3112 	    escore = esc_vAA[v][dsq[j+1]];
3113 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]
3114 							    + escore));
3115 	    break;
3116 	  case S_st:
3117 	  case D_st:
3118 	  case B_st:
3119 	  case E_st:
3120 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]));
3121 	    break;
3122 	  }
3123 	}
3124       }
3125     }
3126   }
3127   /* Deal with last step needed for local alignment
3128    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
3129    */
3130   if (cm->flags & CMH_LOCAL_END) {
3131     for (j = L; j > 0; j--) { /* careful w/ boundary here */
3132       for (d = j-1; d >= 0; d--) /* careful w/ boundary here */
3133 	beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[cm->M][j][d+1] + cm->el_selfsc));
3134     }
3135   }
3136 
3137 #if eslDEBUGLEVEL >= 3
3138   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
3139   /* FILE *fp1; fp1 = fopen("tmp.std_ocykmx", "w");   cm_mx_Dump(fp1, mx); fclose(fp1); */
3140 #endif
3141 
3142   fail1_flag = FALSE;
3143   fail2_flag = FALSE;
3144   fail3_flag = FALSE;
3145   if(do_check) {
3146     /* Check for consistency between the Inside alpha matrix and the
3147      * Outside beta matrix. We assume the Inside CYK parse score
3148      * (optsc) is the optimal score, so for all v,j,d:
3149      *
3150      * Jalpha[v][j][d] + Jbeta[v][j][d] <= optsc
3151      *
3152      * Further, we know that each residue must be emitted by a state
3153      * in the optimal parse. So as we do the above check, we determine
3154      * when we're in a cell that may be involved in the optimal parse
3155      * (the sum of the Inside and Outside scores are equal to the
3156      * optimal parse score), if that cell corresponds to a left
3157      * emitter emitting position i, we know an emitted i has been
3158      * observed in an optimal parse and set optseen[i] to TRUE.
3159      * Likewise, if that cell corresponds to a right emitter emitting
3160      * position j, we update optseen[j] to TRUE. At the end of the
3161      * check optseen[i] should be TRUE for all i in the range
3162      * [1..L].
3163      *
3164      * Note that we don't ensure that all of our presumed optimal
3165      * cells make up a valid parse, so it is possible we could pass
3166      * this check even if the Inside and Outside matrices are
3167      * inconsistent (i.e. there's a bug in the implementation of one
3168      * and or the other) but that should be extremely unlikely.  If we
3169      * do this test many times for many different models and pass, we
3170      * should be confident we have consistent implementations.
3171      *
3172      * This is an expensive check and should only be done while
3173      * debugging.
3174      *
3175      * Another test we could do but do not is to determine the CYK
3176      * parse by tracing back the CYK Inside matrix, then ensure that
3177      * for each cell in that parse alpha[v][j][d]+beta[v][j][d] ==
3178      * optsc.
3179      */
3180     ESL_ALLOC(optseen, sizeof(int) * (L+1));
3181     esl_vec_ISet(optseen, L+1, FALSE);
3182     vmax = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
3183     /* define bit score difference tolerance, somewhat arbitrarily:
3184      * clen <= 200: tolerance is 0.001; then a function of clen:
3185      * clen == 1000 tolerance is 0.005,
3186      * clen == 2000, tolerance is 0.01.
3187      *
3188      * I did this b/c with tests with SSU_rRNA_eukarya I noticed
3189      * failures with bit score differences up to 0.004 or so.  This
3190      * could mean a bug, but I couldn't get any average sized model to
3191      * fail with a difference above 0.001, so I blamed it on
3192      * precision. I'm not entirely convinced it isn't a bug but
3193      * until I see a failure on a smaller model it seems precision
3194      * is the most likely explanation, right?
3195      */
3196     tol = ESL_MAX(1e-3, (float) cm->clen / 200000.);
3197     for(v = 0; v <= vmax; v++) {
3198       for(j = 1; j <= L; j++) {
3199 	for(d = 0; d <= j; d++) {
3200 	  sc  = (alpha[v][j][d] + beta[v][j][d]) - alpha[0][L][L];
3201 	  if(sc > tol) {
3202 	    printf("Check 1 failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
3203 		   v, j, d, alpha[v][j][d], beta[v][j][d], alpha[v][j][d] + beta[v][j][d], alpha[0][L][L]);
3204 	    fail1_flag = TRUE;
3205 	  }
3206 	  if(fabs(sc) < tol) { /* this cell is involved in a parse with the optimal score */
3207 	    i  = j-d+1;
3208 	    if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st || (cm->sttype[v] == EL_st && d >0)) {
3209 	      /* i is accounted for by a parse with an optimal score */
3210 	      optseen[i] = TRUE;
3211 	      /*printf("\tResidue %4d possibly accounted for by Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);*/
3212 	    }
3213 	    if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
3214 	      /* j is accounted for by a parse with an optimal score */
3215 	      optseen[j] = TRUE;
3216 	      /*printf("\tResidue %4d possibly accounted for by Right emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", j, Statetype(cm->sttype[v]), v, j, d);*/
3217 	    }
3218 	  }
3219 	}
3220       }
3221     }
3222     for(j = 1; j <= L; j++) {
3223       if(optseen[j] == FALSE) {
3224 	printf("Check 2 failure: residue %d not emitted in the optimal parsetree\n", j);
3225 	fail2_flag = TRUE;
3226       }
3227     }
3228     free(optseen);
3229   }
3230   /* Another test that we can only do if local ends are OFF */
3231   if(do_check && (!(cm->flags & CMH_LOCAL_END))) {
3232     /* Local ends make the following test invalid because it is not true that
3233      * exactly 1 state in each node's split set must be visited in each parse.
3234      *
3235      * Determine P(pi, S|M) / P(S|R) (probability of the sequence and most likely parse
3236      * tree pi given the model)
3237      * using both the Outside (beta) and Inside (alpha) matrices,
3238      * and ensure they're consistent with P(pi, S|M) / P(S|R) from the Inside calculation.
3239      * For all v in each split set: Max_v [ Max_j,(d<=j) ( alpha[v][j][d] * beta[v][j][d] ) ]
3240      *                                                = P(pi, S|M) / P(S|R)
3241      */
3242     for(n = 0; n < cm->nodes; n++) {
3243       sc = IMPOSSIBLE;
3244       num_split_states = SplitStatesInNode(cm->ndtype[n]);
3245       for(v = cm->nodemap[n]; v < cm->nodemap[n] + num_split_states; v++) {
3246 	for (j = 0; j <= L; j++) {
3247 	  for (d = 0; d <= j; d++) {
3248 	    sc = ESL_MAX(sc, (alpha[v][j][d] + beta[v][j][d]));
3249 	  }
3250 	}
3251       }
3252       /*printf("checking node: %d | sc: %.6f\n", n, sc);*/
3253       diff = sc - alpha[0][L][L];
3254       if(diff > 0.01 || diff < -0.01) {
3255 	fail3_flag = TRUE;
3256 	printf("ERROR: node %d P(S|M): %.5f inconsistent with Inside P(S|M): %.5f (diff: %.5f)\n",
3257 	       n, sc, alpha[0][L][L], diff);
3258       }
3259     }
3260   }
3261   /* Finally, calculate the optimal score, but this only works if
3262    * we're not in local mode:
3263    *
3264    * If local ends are off, we know the optimal parse MUST visit each END_E state,
3265    * we pick final END_E state state cm->M-1 (though any END_E could be used here):
3266    *
3267    * Max_j=0 to L (alpha[M-1][j][0] * beta[M-1][j][0]) = P(S|M) / P(S|R)
3268    *
3269    * Note: alpha[M-1][j][0] = 0.0 for all j
3270    *       because all parse subtrees rooted at an END_E must have d=0, (2^0 = 1.0)
3271    * therefore:
3272    * Max_j=0 to L (beta[M-1][j][0]) = P(S|M) / P(S|R)
3273    *
3274    * *** If local ends are on, each parse MUST visit either each END_E state with d=0
3275    * or the EL state but d can vary, so we can't use this test (believe me I tried
3276    * to get a similar test working, but I'm convinced you need alpha to get P(S|M)
3277    * in local mode).
3278    */
3279   if(!(cm->flags & CMH_LOCAL_END)) {
3280     sc = IMPOSSIBLE;
3281     v = cm->M-1;
3282     for (j = 0; j <= L; j++) {
3283       sc = ESL_MAX(sc, (beta[v][j][0]));
3284       /*printf("\talpha[%3d][%3d][%3d]: %5.2f | beta[%3d][%3d][%3d]: %5.2f\n", (cm->M-1), (j), 0, alpha[(cm->M-1)][j][0], (cm->M-1), (j), 0, beta[(cm->M-1)][j][0]);*/
3285     }
3286   }
3287   else { /* return sc = P(S|M) / P(S|R) from Inside() */
3288     sc = alpha[0][L][L];
3289   }
3290 
3291   if(do_check) {
3292     if     (fail1_flag) ESL_FAIL(eslFAIL, errbuf, "CYK Inside/Outside check1 FAILED.");
3293     else if(fail2_flag) ESL_FAIL(eslFAIL, errbuf, "CYK Inside/Outside check2 FAILED.");
3294     else if(fail3_flag) ESL_FAIL(eslFAIL, errbuf, "CYK Inside/Outside check3 FAILED.");
3295     ESL_DPRINTF1(("#DEBUG: SUCCESS! CYK Inside/Outside checks PASSED.\n"));
3296   }
3297 
3298   if(!(cm->flags & CMH_LOCAL_END)) ESL_DPRINTF1(("#DEBUG: \tcm_CYKOutsideAlign() sc : %f\n", sc));
3299   else                             ESL_DPRINTF1(("#DEBUG: \tcm_CYKOutsideAlign() sc : %f (LOCAL mode; sc is from Inside)\n", sc));
3300 
3301   if(ret_sc != NULL) *ret_sc = sc;
3302 
3303   return eslOK;
3304 
3305  ERROR:
3306   ESL_FAIL(status, errbuf, "Out of memory");
3307   return status; /* NEVER REACHED */
3308 }
3309 
3310 /* Function: cm_CYKOutsideAlignHB()
3311  * Date:     EPN, Fri Sep 30 10:12:51 2011
3312  *
3313  * Purpose:  Run the outside CYK algorithm on a target sequence.
3314  *           HMM banded version. See cm_CYKOutsideAlign() for
3315  *           the non-banded version. The full target sequence
3316  *           1..L is aligned.
3317  *
3318  *           Very similar to cm_OutsideAlignHB() but calculates
3319  *           beta[v][j][d]: log probability of the most likely parse
3320  *           that emits 1..i-1 and j+1..L and passes through v at j,d
3321  *           (where i = j-d+1) instead of the log of the summed
3322  *           probability of all such parses. This means max operations
3323  *           are used instead of logsums.
3324  *
3325  *           This function complements cm_CYKInsideAlignHB() but is
3326  *           mainly useful for testing and reference. It can be used
3327  *           with do_check=TRUE to verify that the implementation of
3328  *           CYKInsideHB and CYKOutsideHB are consistent.  Because the
3329  *           structure of CYKInsideHB and InsideHB, and CYKOutsideHB
3330  *           and OutsideHB are so similar and the CYK variants are
3331  *           easier to debug (because only the optimal parsetree is
3332  *           considered instead of all possible parsetrees) this
3333  *           function can be useful for finding bugs in OutsideHB.  It
3334  *           is currently not hooked up to any of the main Infernal
3335  *           programs.
3336  *
3337  * Args:     cm        - the model
3338  *           errbuf    - char buffer for reporting errors
3339  *           dsq       - the digitized sequence
3340  *           L         - length of the dsq to align
3341  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
3342  *           do_check  - TRUE to attempt to check
3343  *           mx        - the dp matrix, only cells within bands in cp9b will be valid
3344  *           ins_mx    - the dp matrix from the Inside run calculation (required)
3345  *           ret_sc    - RETURN: log P(S|M)/P(S|R), as a bit score, this is from ins_mx IF local
3346  *                       ends are on (see *** comment towards end of function).
3347  *
3348  * Returns:  <eslOK> on success
3349  *
3350  * Throws:   <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
3351  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
3352  *           In either of these cases, alignment has been aborted, ret_sc is not valid.
3353  */
3354 int
cm_CYKOutsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_check,CM_HB_MX * mx,CM_HB_MX * ins_mx,float * ret_sc)3355 cm_CYKOutsideAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check,
3356 		     CM_HB_MX *mx, CM_HB_MX *ins_mx, float *ret_sc)
3357 {
3358   int      status;
3359   int      v,y,z;	       /* indices for states */
3360   int      j,d,i,k;	       /* indices in sequence dimensions */
3361   float  **esc_vAA;            /* ptr to cm->oesc, optimized emission scores */
3362   float    sc;		       /* a temporary score */
3363   float    escore;	       /* an emission score, tmp variable */
3364   int      voffset;	       /* index of v in t_v(y) transition scores */
3365   int      emitmode;           /* EMITLEFT, EMITRIGHT, EMITPAIR, EMITNONE, for state y */
3366   int      sd;                 /* StateDelta(cm->sttype[y]) */
3367   int      sdr;                /* StateRightDelta(cm->sttype[y] */
3368 
3369   /* variables used only if do_check */
3370   int      fail1_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 1*/
3371   int      fail2_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 2*/
3372   int      fail3_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 3*/
3373   int      n;                  /* counter over nodes, used only if do_check = TRUE */
3374   int      num_split_states;   /* temp variable used only if do_check = TRUE */
3375   float    diff;               /* temp variable used only if do_check = TRUE */
3376   int      vmax;               /* i, offset in the matrix */
3377   float    tol;                /* tolerance for differences in bit scores */
3378   int     *optseen = NULL;     /* [1..i..W] TRUE is residue i is accounted for in optimal parse */
3379 
3380   /* band related variables */
3381   int      dp_v;               /* d index for state v in alpha w/mem eff bands */
3382   int      dp_y;               /* d index for state y in alpha w/mem eff bands */
3383   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
3384   int      Lp;                 /* L index also changes depending on state */
3385   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
3386   int      kmin, kmax;         /* temporary minimum/maximum allowed k */
3387   int      jn, jx;             /* current minimum/maximum j allowed */
3388   int      dn, dx;             /* current minimum/maximum d allowed */
3389   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
3390   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
3391 
3392   /* the DP matrices */
3393   float ***beta  = mx->dp;     /* pointer to the Oustide DP mx */
3394   float ***alpha = ins_mx->dp; /* pointer to the Inside DP mx (already calc'ed and passed in) */
3395 
3396   /* ptrs to cp9b info, for convenience */
3397   int     *jmin  = cm->cp9b->jmin;
3398   int     *jmax  = cm->cp9b->jmax;
3399   int    **hdmin = cm->cp9b->hdmin;
3400   int    **hdmax = cm->cp9b->hdmax;
3401 
3402   /* Allocations and initializations */
3403   esc_vAA = cm->oesc;            /* a ptr to the optimized emission scores */
3404 
3405   /* grow the matrix based on the current sequence and bands */
3406   if((status = cm_hb_mx_GrowTo(cm, mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
3407 
3408   /* initialize all cells of the matrix to IMPOSSIBLE */
3409   esl_vec_FSet(beta[0][0], mx->ncells_valid, IMPOSSIBLE);
3410 
3411   /* ensure a full alignment to ROOT_S (v==0) is allowed by the bands */
3412   if (jmin[0] > L || jmax[0] < L)
3413     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, jmin[0], jmax[0]);
3414   jp_0 = L - jmin[0];
3415   if (hdmin[0][jp_0] > L || hdmax[0][jp_0] < L)
3416     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, hdmin[0][jp_0], hdmax[0][jp_0]);
3417   Lp_0 = L - hdmin[0][jp_0];
3418   /* set the offset banded cell corresponding to beta[0][L][L] to 0., all parses must end there */
3419   beta[0][jp_0][Lp_0] = 0.;
3420 
3421   /* If we can do a local begin into v, overwrite IMPOSSIBLE with the local begin score. */
3422   if (cm->flags & CMH_LOCAL_BEGIN) {
3423     for (v = 1; v < cm->M; v++) {
3424       if(NOT_IMPOSSIBLE(cm->beginsc[v])) {
3425 	if((L >= jmin[v]) && (L <= jmax[v])) {
3426 	  jp_v = L - jmin[v];
3427 	  if((L >= hdmin[v][jp_v]) && L <= hdmax[v][jp_v]) {
3428 	    Lp = L - hdmin[v][jp_v];
3429 	    beta[v][jp_v][Lp] = cm->beginsc[v];
3430 	  }
3431 	}
3432       }
3433     }
3434   }
3435   /* done allocation/initialization */
3436 
3437   /* Recursion: main loop down through the decks */
3438   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
3439     if (cm->stid[v] == BEGL_S) { /* BEGL_S */
3440       y = cm->plast[v];	/* the parent bifurcation    */
3441       z = cm->cnum[y];	/* the other (right) S state */
3442       for (j = jmax[v]; j >= jmin[v]; j--) {
3443 	ESL_DASSERT1((j >= 0 && j <= L));
3444 	jp_v = j - jmin[v];
3445 	jp_y = j - jmin[y];
3446 	jp_z = j - jmin[z];
3447 	i = j-d+1;
3448 	for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
3449 	  dp_v = d - hdmin[v][jp_v];
3450 	  /* Find the first k value that implies a valid cell in the y and z decks.
3451 	   * This k must satisfy the following 8 inequalities (some may be redundant):
3452 	   * NOTE: these are different from those in Inside() (for one thing, v and y
3453 	   *       (BEGL_S and BIF_B here respectively) are switched relative to Inside.
3454 	   *
3455 	   * (1) k <= jmax[y] - j;
3456 	   * (2) k >= jmin[y] - j;
3457 	   * (3) k <= jmax[z] - j;
3458 	   * (4) k >= jmin[z] - j;
3459 	   *     1 and 2 guarantee (j+k) is within state y's j band
3460 	   *     3 and 4 guarantee (j+k) is within state z's j band
3461 	   *
3462 	   * (5) k >= hdmin[y][j-jmin[y]+k] - d;
3463 	   * (6) k <= hdmax[y][j-jmin[y]+k] - d;
3464 	   *     5 and 6 guarantee k+d is within y's j=(j+k), d band
3465 	   *
3466 	   * (7) k >= hdmin[z][j-jmin[z]+k];
3467 	   * (8) k <= hdmax[z][j-jmin[z]+k];
3468 	   *     5 and 6 guarantee k is within state z's j=(j+k) d band
3469 	   */
3470 	  kmin = ESL_MAX(jmin[y], jmin[z]) - j;
3471 	  kmax = ESL_MIN(jmax[y], jmax[z]) - j;
3472 	  /* kmin and kmax satisfy inequalities (1-4) */
3473 	  /* RHS of inequalities 5-8 are dependent on k, so we check
3474 	   * for these within the next for loop. */
3475 	  for(k = kmin; k <= kmax; k++) {
3476 	    if(k < (hdmin[y][jp_y+k] - d) || k > (hdmax[y][jp_y+k] - d)) continue;
3477 	    /* above line continues if inequality 5 or 6 is violated */
3478 	    if(k < (hdmin[z][jp_z+k])     || k > (hdmax[z][jp_z+k]))     continue;
3479 	    /* above line continues if inequality 7 or 8 is violated */
3480 
3481 	    /* if we get here for current k, all 8 inequalities have been satisified
3482 	     * so we know the cells corresponding to the platonic
3483 	     * matrix cells alpha[v][j][d], alpha[y][j+k][d+k], and
3484 	     * alpha[z][j+k][k] are all within the bands. These
3485 	     * cells correspond to beta[v][jp_v][dp_v],
3486 	     * beta[y][jp_y+k][d-hdmin[y][jp_y+k]+k],
3487 	     * and alpha[z][jp_z][k-hdmin[z][jp_z+k]];
3488 	     */
3489 	    kp_z = k-hdmin[z][jp_z+k];
3490 	    dp_y = d-hdmin[y][jp_y+k];
3491 	    beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y+k][dp_y+k] + alpha[z][jp_z+k][kp_z]));
3492 	  }
3493 	}
3494       }
3495     } /* end of 'if (cm->stid[v] == BEGL_S */
3496     else if (cm->stid[v] == BEGR_S) {
3497       y = cm->plast[v];	  /* the parent bifurcation    */
3498       z = cm->cfirst[y];  /* the other (left) S state  */
3499       jn = ESL_MAX(jmin[v], jmin[y]);
3500       jx = ESL_MIN(jmax[v], jmax[y]);
3501       for (j = jx; j >= jn; j--) {
3502 	ESL_DASSERT1((j >= 0 && j <= L));
3503 	jp_v = j - jmin[v];
3504 	jp_y = j - jmin[y];
3505 	jp_z = j - jmin[z];
3506 	i = j-d+1;
3507 
3508 	dn = ESL_MAX(hdmin[v][jp_v], j-jmax[z]);
3509 	dx = ESL_MIN(hdmax[v][jp_v], jp_z);
3510 	/* above makes sure that j,d are valid for state z: (jmin[z] + d) >= j >= (jmax[z] + d) */
3511 	for (d = dx; d >= dn; d--) {
3512 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
3513 	  /* Find the first k value that implies a valid cell in the y and z decks.
3514 	   * This k must satisfy the following 4 inequalities (some may be redundant):
3515 	   * NOTE: these are different from those in Inside() (for one thing, v and y
3516 	   *       (BEGR_S and BIF_B here respectively) are switched relative to Inside.
3517 	   *
3518 	   * (1) k >= hdmin[y][j-jmin[y]] - d;
3519 	   * (2) k <= hdmax[y][j-jmin[y]] - d;
3520 	   *     1 and 2 guarantee (d+k) is within state y's j=(j) d band
3521 	   *
3522 	   * (3) k >= hdmin[z][j-jmin[z]-d];
3523 	   * (4) k <= hdmax[z][j-jmin[z]-d];
3524 	   *     3 and 4 guarantee k is within z's j=(j-d) d band
3525 	   *
3526 	   */
3527 	  kmin = ESL_MAX((hdmin[y][jp_y]-d), (hdmin[z][jp_z-d]));
3528 	  kmax = ESL_MIN((hdmax[y][jp_y]-d), (hdmax[z][jp_z-d]));
3529 	  /* kmin and kmax satisfy inequalities (1-4) */
3530 	  for(k = kmin; k <= kmax; k++) {
3531 	    /* for current k, all 4 inequalities have been satisified
3532 	     * so we know the cells corresponding to the platonic
3533 	     * matrix cells beta[v][j][d], beta[y][j][d+k], and
3534 	     * alpha[z][j-d][k] are all within the bands. These
3535 	     * cells correspond to beta[v][jp_v][dp_v],
3536 	     * beta[y][jp_y+k][d-hdmin[y][jp_y]+k],
3537 	     * and alpha[z][jp_z-d][k-hdmin[z][jp_z-d]];
3538 	     */
3539 	    kp_z = k-hdmin[z][jp_z-d];
3540 	    dp_y = d-hdmin[y][jp_y];
3541 	    beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y][dp_y+k]
3542 								+ alpha[z][jp_z-d][kp_z]));
3543 	  }
3544 	}
3545       }
3546     } /* end of 'else if (cm->stid[v] == BEGR_S */
3547     else if (cm->sttype[v] == IL_st || cm->sttype[v] == IR_st) {
3548       /* ILs and IRs can self transit, this means that beta[v][j][d] must be fully calculated
3549        * before beta[v][j][d+1] can be started to be calculated, forcing the following nesting order:
3550        * for j { for d { for y { } } }
3551        * for non-self-transitioners, we can do a more efficient nesting order (see below)
3552        */
3553       for (j = jmax[v]; j >= jmin[v]; j--) {
3554 	ESL_DASSERT1((j >= 0 && j <= L));
3555 	jp_v = j - jmin[v];
3556 	for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
3557 	  i = j-d+1;
3558 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
3559 
3560 	  for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
3561 	    voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
3562 
3563 	    /* Note: this looks like it can be optimized, I tried but my 'optimization' slowed the code, so I reverted [EPN] */
3564 	    switch(cm->sttype[y]) {
3565 	    case MP_st:
3566 	      if (j == L || d == j) continue; /* boundary condition */
3567 	      if ((j+1) < jmin[y] || (j+1) > jmax[y]) continue; /* enforces j is valid for state y */
3568 	      jp_y = j - jmin[y];
3569 	      if ((d+2) < hdmin[y][(jp_y+1)] || (d+2) > hdmax[y][(jp_y+1)]) continue; /* enforces d is valid for state y */
3570 	      /* if we get here alpha[y][jp_y+1][dp_y+2] is a valid alpha cell
3571 	       * corresponding to alpha[y][j+1][d+2] in the platonic matrix.
3572 		   */
3573 	      dp_y = d - hdmin[y][jp_y+1];  /* d index for state y */
3574 	      escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
3575 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y+1][dp_y+2]
3576 								  + cm->tsc[y][voffset] + escore));
3577 	      break;
3578 
3579 	    case ML_st:
3580 	    case IL_st:
3581 	      if (d == j) continue;	/* boundary condition (note when j=0, d=0)*/
3582 	      if (j < jmin[y] || j > jmax[y]) continue; /* enforces j is valid for state y */
3583 	      jp_y = j - jmin[y];
3584 	      if ((d+1) < hdmin[y][jp_y] || (d+1) > hdmax[y][jp_y]) continue; /* enforces d is valid for state y */
3585 	      /* if we get here alpha[y][jp_y][dp_y+1] is a valid alpha cell
3586 	       * corresponding to alpha[y][j][d+1] in the platonic matrix.
3587 	       */
3588 	      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
3589 	      escore = esc_vAA[y][dsq[i-1]];
3590 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y][dp_y+1]
3591 								  + cm->tsc[y][voffset] + escore));
3592 	      break;
3593 
3594 	    case MR_st:
3595 	    case IR_st:
3596 	      if (j == L) continue;
3597 	      if ((j+1) < jmin[y] || (j+1) > jmax[y]) continue; /* enforces j is valid for state y */
3598 	      jp_y = j - jmin[y];
3599 	      if ((d+1) < hdmin[y][(jp_y+1)] || (d+1) > hdmax[y][(jp_y+1)]) continue; /* enforces d is valid for state y */
3600 	      /* if we get here alpha[y][jp_y+1][dp_y+1] is a valid alpha cell
3601 	       * corresponding to alpha[y][j+1][d+1] in the platonic matrix.
3602 	       */
3603 	      dp_y = d - hdmin[y][(jp_y+1)];  /* d index for state y */
3604 	      escore = esc_vAA[y][dsq[j+1]];
3605 	      /*printf("j: %d | jmin[y]: %d | jmax[y]: %d | jp_v: %d | dp_v: %d | jp_y: %d | dp_y: %d\n", j, jmin[y], jmax[y], jp_v, dp_v, jp_y, dp_y);*/
3606 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y+1][dp_y+1]
3607 								  + cm->tsc[y][voffset] + escore));
3608 	      break;
3609 
3610 	    case S_st:
3611 	    case E_st:
3612 	    case D_st:
3613 	      if (j < jmin[y] || j > jmax[y]) continue; /* enforces j is valid for state y */
3614 	      jp_y = j - jmin[y];
3615 	      if (d < hdmin[y][jp_y] || d > hdmax[y][jp_y]) continue; /* enforces d is valid for state y */
3616 	      /* if we get here alpha[y][jp_y][dp_y] is a valid alpha cell
3617 	       * corresponding to alpha[y][j][d] in the platonic matrix.
3618 	       */
3619 	      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
3620 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y][dp_y] + cm->tsc[y][voffset]));
3621 	      break;
3622 	    } /* end of switch(cm->sttype[y] */
3623 	  } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
3624 	  if (beta[v][jp_v][dp_v] < IMPOSSIBLE) beta[v][jp_v][dp_v] = IMPOSSIBLE;
3625 	} /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
3626       } /* end loop over jp. We know beta for this whole state */
3627     } /* end of 'else if cm->sttype[v] == IL_st || cm->sttype[v] == IR_st' */
3628     else { /* state v is not BEGL_S, BEGL_R IL nor IR (must be ML, MP, MR, D, S, B or E) */
3629       /* ML, MP, MR, D, S, B, E states cannot self transit, this means that all cells
3630        * in beta[v] are independent of each other, only depending on beta[y] for previously calc'ed y.
3631        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
3632        * for y { for j { for d { } } }
3633        */
3634       for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
3635 	voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
3636 	sdr = StateRightDelta(cm->sttype[y]);
3637 	sd  = StateDelta(cm->sttype[y]);
3638 	emitmode = Emitmode(cm->sttype[y]);
3639 	/* determine min j (jn) and max j (jx) that are valid for v and y */
3640 	jn = ESL_MAX(jmin[v], jmin[y]-sdr);
3641 	jx = ESL_MIN(jmax[v], jmax[y]-sdr);
3642 	for (j = jx; j >= jn; j--) {
3643 	  ESL_DASSERT1((j >= 0 && j <= L));
3644 	  jp_v = j - jmin[v];
3645 	  jp_y = j - jmin[y];
3646 	  ESL_DASSERT1((j+sdr >= jmin[y] && j+sdr <= jmax[y]));
3647 
3648 	  /* determine min d (dn) and max d (dx) that are valid for v and y and j */
3649 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y + sdr] - sd);
3650 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y + sdr] - sd);
3651 	  dp_v = dx - hdmin[v][jp_v];
3652 	  dp_y = dx - hdmin[y][jp_y + sdr];
3653 	  i    = j-dx+1;
3654 
3655 	  /* for each emit mode, update beta[v][jp_v][dp_v] for all valid d = dp_v */
3656 	  switch(emitmode) {
3657 	  case EMITPAIR:  /* MP_st */
3658 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--, i++) {
3659 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
3660 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
3661 	      escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
3662 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
3663 								  + cm->tsc[y][voffset] + escore));
3664 	    }
3665 	    break;
3666 	  case EMITLEFT:  /* ML_st, IL_st */
3667 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--, i++) {
3668 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
3669 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
3670 	      escore = esc_vAA[y][dsq[i-1]];
3671 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
3672 								  + cm->tsc[y][voffset] + escore));
3673 	    }
3674 	    break;
3675 	  case EMITRIGHT:  /* MR_st, IR_st */
3676 	    escore = esc_vAA[y][dsq[j+1]]; /* not dependent on i */
3677 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--) {
3678 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
3679 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
3680 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
3681 								  + cm->tsc[y][voffset] + escore));
3682 	    }
3683 	    break;
3684 	  case EMITNONE:  /* D_st, S_st, E_st*/
3685 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--) {
3686 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
3687 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
3688 	      beta[v][jp_v][dp_v] = ESL_MAX(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
3689 								  + cm->tsc[y][voffset]));
3690 	    }
3691 	    break;
3692 	  } /* end of switch(emitmode) */
3693 	} /* end of for j = jx; j >= jn; j-- */
3694       } /* end of for y = plast[v]... */
3695     } /* ends else entered for non-BEGL_S/BEGR_S/IL/IR states*/
3696     /* we're done calculating deck v for everything but local begins */
3697 
3698     /* deal with local alignment end transitions v->EL (EL = deck at M.) */
3699     if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
3700       sdr      = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
3701       sd       = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
3702       emitmode = Emitmode(cm->sttype[v]);        /* note emitmode is for state v */
3703 
3704       jn = jmin[v] - sdr;
3705       jx = jmax[v] - sdr;
3706       for (j = jn; j <= jx; j++) {
3707 	jp_v =  j - jmin[v];
3708 	dn   = hdmin[v][jp_v + sdr] - sd;
3709 	dx   = hdmax[v][jp_v + sdr] - sd;
3710 	i    = j-dn+1;                     /* we'll decrement this in for (d... loops inside switch below */
3711 	dp_v = dn - hdmin[v][jp_v + sdr];  /* we'll increment this in for (d... loops inside switch below */
3712 
3713 	switch (emitmode) {
3714 	case EMITPAIR:
3715 	  for (d = dn; d <= dx; d++, dp_v++, i--) {
3716 	    escore = esc_vAA[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
3717 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]
3718 								    + escore));
3719 	  }
3720 	  break;
3721 	case EMITLEFT:
3722 	  for (d = dn; d <= dx; d++, dp_v++, i--) {
3723 	    escore = esc_vAA[v][dsq[i-1]];
3724 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]
3725 								    + escore));
3726 	  }
3727 	  break;
3728 
3729 	case EMITRIGHT:
3730 	  escore = esc_vAA[v][dsq[j+1]];
3731 	  for (d = dn; d <= dx; d++, dp_v++) {
3732 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]
3733 								    + escore));
3734 	  }
3735 	  break;
3736 
3737 	case EMITNONE:
3738 	  for (d = dn; d <= dx; d++, dp_v++) {
3739 	    beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]));
3740 	  }
3741 	  break;
3742 	}
3743       }
3744     }
3745   } /* end loop over decks v. */
3746 
3747   /* Deal with last step needed for local alignment
3748    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
3749    */
3750   if (cm->flags & CMH_LOCAL_END) {
3751     for (j = L; j > 0; j--) { /* careful w/ boundary here */
3752       for (d = j-1; d >= 0; d--) /* careful w/ boundary here */
3753 	beta[cm->M][j][d] = ESL_MAX(beta[cm->M][j][d], (beta[cm->M][j][d+1] + cm->el_selfsc));
3754     }
3755   }
3756 
3757 #if eslDEBUGLEVEL >= 3
3758   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
3759   /* FILE *fp1; fp1 = fopen("tmp.stdocykhbmx", "w");   cm_hb_mx_Dump(fp1, mx); fclose(fp1); */
3760 #endif
3761 
3762   fail1_flag = FALSE;
3763   fail2_flag = FALSE;
3764   fail3_flag = FALSE;
3765   printf("DO CHECK: %d\n", do_check);
3766   if(do_check) {
3767     /* Check for consistency between the Inside alpha matrix and the
3768      * Outside beta matrix. We assume the Inside CYK parse score
3769      * (optsc) is the optimal score, so for all v,j,d:
3770      *
3771      * Jalpha[v][j][d] + Jbeta[v][j][d] <= optsc
3772      *
3773      * Further, we know that each residue must be emitted by a state
3774      * in the optimal parse. So as we do the above check, we determine
3775      * when we're in a cell that may be involved in the optimal parse
3776      * (the sum of the Inside and Outside scores are equal to the
3777      * optimal parse score), if that cell corresponds to a left
3778      * emitter emitting position i, we know an emitted i has been
3779      * observed in an optimal parse and set optseen[i] to TRUE.
3780      * Likewise, if that cell corresponds to a right emitter emitting
3781      * position j, we update optseen[j] to TRUE. At the end of the
3782      * check optseen[i] should be TRUE for all i in the range
3783      * [1..L].
3784      *
3785      * Note that we don't ensure that all of our presumed optimal
3786      * cells make up a valid parse, so it is possible we could pass
3787      * this check even if the Inside and Outside matrices are
3788      * inconsistent (i.e. there's a bug in the implementation of one
3789      * and or the other) but that should be extremely unlikely.  If we
3790      * do this test many times for many different models and pass, we
3791      * should be confident we have consistent implementations.
3792      *
3793      * This is an expensive check and should only be done while
3794      * debugging.
3795      *
3796      * Another test we could do but do not is to determine the CYK
3797      * parse by tracing back the CYK Inside matrix, then ensure that
3798      * for each cell in that parse alpha[v][j][d]+beta[v][j][d] ==
3799      * optsc.
3800      */
3801     ESL_ALLOC(optseen, sizeof(int) * (L+1));
3802     esl_vec_ISet(optseen, L+1, FALSE);
3803     vmax = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
3804     /* define bit score difference tolerance, somewhat arbitrarily:
3805      * clen <= 200: tolerance is 0.001; then a function of clen:
3806      * clen == 1000 tolerance is 0.005,
3807      * clen == 2000, tolerance is 0.01.
3808      *
3809      * I did this b/c with tests with SSU_rRNA_eukarya I noticed
3810      * failures with bit score differences up to 0.004 or so.  This
3811      * could mean a bug, but I couldn't get any average sized model to
3812      * fail with a difference above 0.001, so I blamed it on
3813      * precision. I'm not entirely convinced it isn't a bug but
3814      * until I see a failure on a smaller model it seems precision
3815      * is the most likely explanation, right?
3816      */
3817     tol = ESL_MAX(1e-3, (float) cm->clen / 200000.);
3818     for(v = 0; v <= vmax; v++) {
3819       jn = (v == cm->M) ? 1 : jmin[v];
3820       jx = (v == cm->M) ? L : jmax[v];
3821       for(j = jn; j <= jx; j++) {
3822 	jp_v = (v == cm->M) ? j : j - jmin[v];
3823 	dn   = (v == cm->M) ? 0 : hdmin[v][jp_v];
3824 	dx   = (v == cm->M) ? j : hdmax[v][jp_v];
3825 	for(d = dn; d <= dx; d++) {
3826 	  dp_v = (v == cm->M) ? d : d - hdmin[v][jp_v];
3827 	  sc  = (alpha[v][jp_v][dp_v] + beta[v][jp_v][dp_v]) - alpha[0][jp_0][Lp_0];
3828 	  if(sc > tol) {
3829 	    printf("Check 1 failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
3830 		   v, j, d, alpha[v][jp_v][dp_v], beta[v][jp_v][dp_v], alpha[v][jp_v][dp_v] + beta[v][jp_v][dp_v], alpha[0][jp_0][Lp_0]);
3831 	    fail1_flag = TRUE;
3832 	  }
3833 	  if(fabs(sc) < tol) { /* this cell is involved in a parse with the optimal score */
3834 	    i  = j-d+1;
3835 	    if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st || (cm->sttype[v] == EL_st && d >0)) {
3836 	      /* i is accounted for by a parse with an optimal score */
3837 	      optseen[i] = TRUE;
3838 	      /*printf("\tResidue %4d possibly accounted for by Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);*/
3839 	    }
3840 	    if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
3841 	      /* j is accounted for by a parse with an optimal score */
3842 	      optseen[j] = TRUE;
3843 	      /*printf("\tResidue %4d possibly accounted for by Right emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", j, Statetype(cm->sttype[v]), v, j, d);*/
3844 	    }
3845 	  }
3846 	}
3847       }
3848     }
3849     for(j = 1; j <= L; j++) {
3850       if(optseen[j] == FALSE) {
3851 	printf("Check 2 failure: residue %d not emitted in the optimal parsetree\n", j);
3852 	fail2_flag = TRUE;
3853       }
3854     }
3855     free(optseen);
3856   }
3857   /* Another test that we can only do if local ends are OFF */
3858   if(do_check && (!(cm->flags & CMH_LOCAL_END))) {
3859     /* Local ends make the following test invalid because it is not true that
3860      * exactly 1 state in each node's split set must be visited in each parse.
3861      *
3862      * Determine P(pi, S|M) / P(S|R) (probability of the sequence and most likely parse
3863      * tree pi given the model)
3864      * using both the Outside (beta) and Inside (alpha) matrices,
3865      * and ensure they're consistent with P(pi, S|M) / P(S|R) from the Inside calculation.
3866      * For all v in each split set: Max_v [ Max_j,(d<=j) ( alpha[v][j][d] * beta[v][j][d] ) ]
3867      *                                                = P(pi, S|M) / P(S|R)
3868      */
3869     for(n = 0; n < cm->nodes; n++) {
3870       sc = IMPOSSIBLE;
3871       num_split_states = SplitStatesInNode(cm->ndtype[n]);
3872       for(v = cm->nodemap[n]; v < cm->nodemap[n] + num_split_states; v++) {
3873 	for (j = jmin[v]; j <= jmax[v]; j++) {
3874 	  jp_v = j - jmin[v];
3875 	  for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
3876 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
3877 	    sc = ESL_MAX(sc, (alpha[v][jp_v][dp_v] + beta[v][jp_v][dp_v]));
3878 	  }
3879 	}
3880       }
3881       /*printf("checking node: %d | sc: %.6f\n", n, sc);*/
3882       diff = sc - alpha[0][jp_0][Lp_0];
3883       if(diff > 0.01 || diff < -0.01) {
3884 	fail3_flag = TRUE;
3885 	printf("ERROR: node %d P(S|M): %.5f inconsistent with Inside P(S|M): %.5f (diff: %.5f)\n",
3886 	       n, sc, alpha[0][jp_0][Lp_0], diff);
3887       }
3888     }
3889   }
3890 
3891   /* Finally, calculate the optimal score, but this only works if
3892    * we're not in local mode:
3893    *
3894    * If local ends are off, we know the optimal parse MUST visit each END_E state,
3895    * we pick final END_E state state cm->M-1 (though any END_E could be used here):
3896    *
3897    * Max_j=0 to L (alpha[M-1][j][0] * beta[M-1][j][0]) = P(S|M) / P(S|R)
3898    *
3899    * Note: alpha[M-1][j][0] = 0.0 for all j
3900    *       because all parse subtrees rooted at an END_E must have d=0, (2^0 = 1.0)
3901    * therefore:
3902    * Max_j=0 to L (beta[M-1][j][0]) = P(S|M) / P(S|R)
3903    *
3904    * *** If local ends are on, each parse MUST visit either each END_E state with d=0
3905    * or the EL state but d can vary, so we can't use this test (believe me I tried
3906    * to get a similar test working, but I'm convinced you need alpha to get P(S|M)
3907    * in local mode).
3908    */
3909   if(!(cm->flags & CMH_LOCAL_END)) {
3910     sc = IMPOSSIBLE;
3911     v = cm->M-1;
3912     for (j = jmin[v]; j <= jmax[v]; j++) {
3913       jp_v = j - jmin[v];
3914       assert(hdmin[v][jp_v] == 0);
3915       sc = ESL_MAX(sc, (beta[v][jp_v][0]));
3916       /* printf("\talpha[%3d][%3d][%3d]: %5.2f | beta[%3d][%3d][%3d]: %5.2f\n", (cm->M-1), (j), 0, alpha[(cm->M-1)][j][0], (cm->M-1), (j), 0, beta[(cm->M-1)][j][0]);*/
3917     }
3918   }
3919   else { /* return sc = P(S|M) / P(S|R) from Inside() */
3920     sc = alpha[0][jp_0][Lp_0];
3921   }
3922 
3923 #if eslDEBUGLEVEL >= 3
3924   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
3925   /* FILE *fp; fp = fopen("tmp.std_ocykhbmx", "w"); cm_hb_mx_Dump(fp, mx); fclose(fp); */
3926 #endif
3927 
3928   if(do_check) {
3929     if     (fail1_flag) ESL_FAIL(eslFAIL, errbuf, "CYK Inside/Outside HB check1 FAILED.");
3930     else if(fail2_flag) ESL_FAIL(eslFAIL, errbuf, "CYK Inside/Outside HB check2 FAILED.");
3931     else if(fail3_flag) ESL_FAIL(eslFAIL, errbuf, "CYK Inside/Outside HB check3 FAILED.");
3932     ESL_DPRINTF1(("#DEBUG: SUCCESS! CYK Inside/Outside HB checks PASSED.\n"));
3933   }
3934 
3935   if(!(cm->flags & CMH_LOCAL_END)) ESL_DPRINTF1(("#DEBUG: \tcm_CYKOutsideAlignHB() sc : %f\n", sc));
3936   else                             ESL_DPRINTF1(("#DEBUG: \tcm_CYKOutsideAlignHB() sc : %f (LOCAL mode; sc is from Inside)\n", sc));
3937 
3938   if(ret_sc != NULL) *ret_sc = sc;
3939 
3940   return eslOK;
3941 
3942  ERROR:
3943   ESL_FAIL(status, errbuf, "Out of memory");
3944   return status; /* NEVER REACHED */
3945 }
3946 
3947 /* Function: cm_OutsideAlign()
3948  * Date:     EPN, Mon Nov 19 07:00:37 2007
3949  *
3950  * Purpose:  Run the outside algorithm on a target sequence.
3951  *           Non-banded version. See cm_OutsideAlignHB() for
3952  *           the HMM banded version. The full target sequence
3953  *           1..L is aligned.
3954  *
3955  *           Very similar to cm_CYKOutsideAlign() but calculates
3956  *           beta[v][j][d]: log of the summed probability of all
3957  *           parsetrees that emits 1..i-1 and j+1..L and pass through
3958  *           v at j,d (where i = j-d+1) instead of the log of the
3959  *           probability of the most likely (CYK) parse. This means
3960  *           logsum operations are used instead of max operations.
3961  *
3962  *           For debugging this function, the cm_CYKOutsideAlign() can
3963  *           be useful, because it has a very similar organization but
3964  *           is easier to debug because only the most likely parsetree
3965  *           is considered. cm_CYKOutsideAlign() also allows a more
3966  *           stringent test for the consistency of the CYKInside and
3967  *           CYKOutside matrices.
3968  *
3969  *           If <do_check> is TRUE (and the CM is not in local mode)
3970  *           we check that the outside matrix values are consistent
3971  *           with the inside matrix values (in ins_mx).  This check is
3972  *           described in comments towards the end of the function.
3973  *
3974  *           Note: renamed from FastOutsideAlign() [EPN, Wed Sep 14 06:13:53 2011].
3975  *
3976  * Args:     cm        - the model
3977  *           errbuf    - char buffer for reporting errors
3978  *           dsq       - the digitized sequence
3979  *           L         - length of the dsq to align
3980  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
3981  *           do_check  - TRUE to attempt to check
3982  *           mx        - the dp matrix, grown and filled here
3983  *           ins_mx    - the pre-filled dp matrix from the Inside run calculation (required)
3984  *           ret_sc    - RETURN: log P(S|M)/P(S|R), as a bit score, this is from ins_mx IF local
3985  *                       ends are on (see *** comment towards end of function).
3986  *
3987  * Returns:  <eslOK> on success
3988  *
3989  * Throws:   <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
3990  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
3991  *           In either of these cases, alignment has been aborted, ret_sc is not valid.
3992 
3993  */
3994 int
cm_OutsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_check,CM_MX * mx,CM_MX * ins_mx,float * ret_sc)3995 cm_OutsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check,
3996 		CM_MX *mx, CM_MX *ins_mx, float *ret_sc)
3997 {
3998   int      status;
3999   int      v,y,z;	       /* indices for states */
4000   int      j,d,i,k;	       /* indices in sequence dimensions */
4001   float    sc;     	       /* a temporary score */
4002   float  **esc_vAA;            /* ptr to cm->oesc, optimized emission scores */
4003   float    escore;	       /* an emission score, tmp variable */
4004   int      voffset;	       /* index of v in t_v(y) transition scores */
4005   int      sd;                 /* StateDelta(cm->sttype[y]) */
4006   int      sdr;                /* StateRightDelta(cm->sttype[y] */
4007 
4008   /* variables used only if do_check==TRUE */
4009   int      n;                  /* counter over nodes, used only if do_check = TRUE */
4010   int      num_split_states;   /* temp variable used only if do_check = TRUE */
4011   float    diff;               /* temp variable used only if do_check = TRUE */
4012   int      fail_flag = FALSE;  /* set to TRUE if do_check and we see a problem */
4013 
4014   /* the DP matrices */
4015   float ***beta  = mx->dp;     /* pointer to the Oustide DP mx */
4016   float ***alpha = ins_mx->dp; /* pointer to the Inside DP mx (already calc'ed and passed in) */
4017 
4018   /* Allocations and initializations */
4019   esc_vAA = cm->oesc;            /* a ptr to the optimized emission scores */
4020 
4021   /* grow the matrix based on the current sequence */
4022   if((status = cm_mx_GrowTo(cm, mx, errbuf, L, size_limit)) != eslOK) return status;
4023 
4024   /* initialize all cells of the matrix to IMPOSSIBLE */
4025   esl_vec_FSet(beta[0][0], mx->ncells_valid, IMPOSSIBLE);
4026 
4027   /* now set beta[0][L][L] to 0., all parses must end there */
4028   beta[0][L][L] = 0.;
4029 
4030   /* initialize local begin cells for emitting full seq (j==L && d == L) */
4031   if (cm->flags & CMH_LOCAL_BEGIN) {
4032     for (v = 1; v < cm->M; v++)
4033       beta[v][L][L] = cm->beginsc[v];
4034   }
4035 
4036   /* Main recursion */
4037   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
4038     sd  = StateDelta(cm->sttype[v]);
4039     sdr = StateRightDelta(cm->sttype[v]);
4040 
4041     if (cm->stid[v] == BEGL_S) { /* BEGL_S */
4042       y = cm->plast[v];	/* the parent bifurcation    */
4043       z = cm->cnum[y];	/* the other (right) S state */
4044       for(j = 0; j <= L; j++) {
4045 	for (d = 0; d <= j; d++) {
4046 	  for (k = 0; k <= (L-j); k++) {
4047 	    beta[v][j][d] = FLogsum(beta[v][j][d], (beta[y][j+k][d+k] + alpha[z][j+k][k]));
4048 	  }
4049 	}
4050       }
4051     } /* end of 'if (cm->stid[v] == BEGL_S */
4052     else if (cm->stid[v] == BEGR_S) {
4053       y = cm->plast[v];	  /* the parent bifurcation    */
4054       z = cm->cfirst[y];  /* the other (left) S state  */
4055       for(j = 0; j <= L; j++) {
4056 	for (d = 0; d <= j; d++) {
4057 	  for (k = 0; k <= (j-d); k++) {
4058  	    beta[v][j][d] = FLogsum(beta[v][j][d], (beta[y][j][d+k] + alpha[z][j-d][k]));
4059 	  }
4060 	}
4061       }
4062     } /* end of 'else if (cm->stid[v] == BEGR_S */
4063     else { /* (cm->sttype[v] != BEGL_S && cm->sttype[v] != BEGR_S */
4064       for (j = L; j >= 0; j--) {
4065 	i = 1;
4066 	for (d = j; d >= 0; d--, i++) {
4067 	  for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
4068 	    voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
4069 	    sd  = StateDelta(cm->sttype[y]);
4070 	    sdr = StateRightDelta(cm->sttype[y]);
4071 	    switch(cm->sttype[y]) {
4072 	      case MP_st:
4073 		if (j == L || d == j) continue; /* boundary condition */
4074 		escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
4075 		beta[v][j][d] = FLogsum(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore));
4076 		break;
4077 
4078 	      case ML_st:
4079 	      case IL_st:
4080 		if (d == j) continue;	/* boundary condition (note when j=0, d=0*/
4081 		escore = esc_vAA[y][dsq[i-1]];
4082 		beta[v][j][d] = FLogsum(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore));
4083 		break;
4084 
4085 	      case MR_st:
4086 	      case IR_st:
4087 		if (j == L) continue;
4088 		escore = esc_vAA[y][dsq[j+1]];
4089 		beta[v][j][d] = FLogsum(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore));
4090 		break;
4091 
4092 	      case S_st:
4093 	      case E_st:
4094 	      case D_st:
4095 		beta[v][j][d] = FLogsum(beta[v][j][d], (beta[y][j+sdr][d+sd] + cm->tsc[y][voffset]));
4096 		break;
4097 	    } /* end of switch(cm->sttype[y] */
4098 	  } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
4099 	  if (beta[v][j][d] < IMPOSSIBLE) beta[v][j][d] = IMPOSSIBLE;
4100 	} /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
4101       } /* end loop over j. We know beta for this whole state */
4102     } /* end of 'else if cm->sttype[v] != BEGL_S, BEGR_S */
4103     /* we're done calculating deck v for everything but local begins */
4104 
4105     /* deal with local alignment end transitions v->EL (EL = deck at M.) */
4106     if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
4107       sdr = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
4108       sd  = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
4109 
4110       for (j = 0; j <= L; j++) {
4111 	for (d = 0; d <= j; d++) {
4112 	  i = j-d+1;
4113 	  switch (cm->sttype[v]) {
4114 	  case MP_st:
4115 	    if (j == L || d == j) continue; /* boundary condition */
4116 	    escore = esc_vAA[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
4117 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]
4118 							    + escore));
4119 	    break;
4120 	  case ML_st:
4121 	  case IL_st:
4122 	    if (d == j) continue;
4123 	    escore = esc_vAA[v][dsq[i-1]];
4124 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]
4125 							    + escore));
4126 	    break;
4127 	  case MR_st:
4128 	  case IR_st:
4129 	    if (j == L) continue;
4130 	    escore = esc_vAA[v][dsq[j+1]];
4131 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]
4132 							    + escore));
4133 	    break;
4134 	  case S_st:
4135 	  case D_st:
4136 	  case B_st:
4137 	  case E_st:
4138 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][j+sdr][d+sd] + cm->endsc[v]));
4139 	    break;
4140 	  }
4141 	}
4142       }
4143     }
4144   }
4145   /* Deal with last step needed for local alignment
4146    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
4147    */
4148   if (cm->flags & CMH_LOCAL_END) {
4149     for (j = L; j > 0; j--) { /* careful w/ boundary here */
4150       for (d = j-1; d >= 0; d--) /* careful w/ boundary here */
4151 	beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[cm->M][j][d+1] + cm->el_selfsc));
4152     }
4153   }
4154 
4155   if(do_check && (!(cm->flags & CMH_LOCAL_END))) {
4156     /* Local ends make the following test invalid because it is not true that
4157      * exactly 1 state in each node's split set must be visited in each parse.
4158      *
4159      * Determine P(S|M) / P(S|R) (probability of the sequence given the model)
4160      * using both the Outside (beta) and Inside (alpha) matrices,
4161      * and ensure they're consistent with P(S|M) / P(S|R) from the Inside calculation.
4162      * For all v in each split set: Sum_v [ Sum_j,(d<=j) ( alpha[v][j][d] * beta[v][j][d] ) ]
4163      *                                                = P(S|M) / P(S|R)
4164      */
4165     for(n = 0; n < cm->nodes; n++) {
4166       sc = IMPOSSIBLE;
4167       num_split_states = SplitStatesInNode(cm->ndtype[n]);
4168       for(v = cm->nodemap[n]; v < cm->nodemap[n] + num_split_states; v++) {
4169 	for (j = 0; j <= L; j++) {
4170 	  for (d = 0; d <= j; d++) {
4171 	    sc = FLogsum(sc, (alpha[v][j][d] + beta[v][j][d]));
4172 	  }
4173 	}
4174       }
4175       /*printf("checking node: %d | sc: %.6f\n", n, sc);*/
4176       diff = sc - alpha[0][L][L];
4177       if(diff > 0.01 || diff < -0.01) {
4178 	fail_flag = TRUE;
4179 	printf("ERROR: node %d P(S|M): %.5f inconsistent with Inside P(S|M): %.5f (diff: %.5f)\n",
4180 	       n, sc, alpha[0][L][L], diff);
4181       }
4182     }
4183     if(! fail_flag) {
4184       ESL_DPRINTF1(("#DEBUG: SUCCESS! all nodes passed error check (cm_OutsideAlign())\n"));
4185     }
4186   }
4187 
4188   /* Finally, calculate the optimal score, but this only works if
4189    * we're not in local mode:
4190    *
4191    * IF local ends are off, we know each parse MUST visit each END_E state,
4192    * we pick final END_E state state cm->M-1 (though any END_E could be used here):
4193    *
4194    * Sum_j=0 to L (alpha[M-1][j][0] * beta[M-1][j][0]) = P(S|M) / P(S|R)
4195    *
4196    * Note: alpha[M-1][j][0] = 0.0 for all j
4197    *       because all parse subtrees rooted at an END_E must have d=0, (2^0 = 1.0)
4198    * therefore:
4199    * Sum_j=0 to L (beta[M-1][j][0]) = P(S|M) / P(S|R)
4200    *
4201    * *** If local ends are on, each parse MUST visit either each END_E state with d=0
4202    * or the EL state but d can vary, so we can't use this test (believe me I tried
4203    * to get a similar test working, but I'm convinced you need alpha to get P(S|M)
4204    * in local mode).
4205    */
4206   if(!(cm->flags & CMH_LOCAL_END)) {
4207     sc = IMPOSSIBLE;
4208     v = cm->M-1;
4209     for (j = 0; j <= L; j++) {
4210       sc = FLogsum(sc, (beta[v][j][0]));
4211       /*printf("\talpha[%3d][%3d][%3d]: %5.2f | beta[%3d][%3d][%3d]: %5.2f\n", (cm->M-1), (j), 0, alpha[(cm->M-1)][j][0], (cm->M-1), (j), 0, beta[(cm->M-1)][j][0]);*/
4212     }
4213   }
4214   else { /* sc = P(S|M) / P(S|R) from Inside() */
4215     sc = alpha[0][L][L];
4216   }
4217 
4218   if(fail_flag) ESL_FAIL(eslFAIL, errbuf, "Not all nodes passed posterior check.");
4219 
4220 #if eslDEBUGLEVEL >= 3
4221   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
4222   /* FILE *fp1; fp1 = fopen("tmp.std_omx", "w");   cm_mx_Dump(fp1, mx); fclose(fp1); */
4223 #endif
4224 
4225   if(!(cm->flags & CMH_LOCAL_END)) ESL_DPRINTF1(("#DEBUG: \tcm_OutsideAlign() sc : %f\n", sc));
4226   else                             ESL_DPRINTF1(("#DEBUG: \tcm_OutsideAlign() sc : %f (LOCAL mode; sc is from Inside)\n", sc));
4227 
4228   if(ret_sc != NULL) *ret_sc = sc;
4229 
4230   return eslOK;
4231 }
4232 
4233 /* Function: cm_OutsideAlignHB()
4234  * Date:     EPN, Thu Nov  8 18:40:05 2007
4235  *
4236  * Purpose:  Run the outside algorithm on a target sequence.
4237  *           HMM banded version. See cm_OutsideAlign() for
4238  *           the non-banded version. The full target sequence
4239  *           1..L is aligned.
4240  *
4241  *           Very similar to cm_CYKOutsideAlignHB() but calculates
4242  *           beta[v][j][d]: log of the summed probability of all
4243  *           parsetrees that emits 1..i-1 and j+1..L and pass through
4244  *           v at j,d (where i = j-d+1) instead of the log of the
4245  *           probability of the most likely (CYK) parse. This means
4246  *           logsum operations are used instead of max operations.
4247  *
4248  *           For debugging this function, the cm_CYKOutsideAlign() can
4249  *           be useful, because it has a very similar organization but
4250  *           is easier to debug because only the most likely parsetree
4251  *           is considered. cm_CYKOutsideAlign() also allows a more
4252  *           stringent test for the consistency of the CYKInside and
4253  *           CYKOutside matrices.
4254   *
4255  *           The DP recursion has been 'optimized' for all state types
4256  *           except IL, IR, BEGL_S, BEGR_S. The main optimization
4257  *           is a change in nesting order of the for loops:
4258  *           optimized order:     for v { for y { for j { for d {}}}}
4259  *           non-optimized order: for v { for j { for d { for y {}}}}
4260  *
4261  *           ILs and IRs are not optimized because they can self
4262  *           transit so mx[v][j][d] must be fully calc'ed before
4263  *           mx[v][j][d+1] can be calced. BEGL_S and BEGR_S are not
4264  *           optimized b/c they require searching for optimal d and k,
4265  *           which complicates the enforcement of the bands and makes
4266  *           this optimization strategy impossible.
4267  *
4268  *           If <do_check> is TRUE (and the CM is not in local mode)
4269  *           we check that the outside matrix values are consistent
4270  *           with the inside matrix values (in ins_mx).  This check is
4271  *           described in comments towards the end of the function.
4272  *
4273  *           Note: renamed from FastOutsideAlignHB() [EPN, Wed Sep 14 06:13:53 2011].
4274  *
4275  * Args:     cm        - the model
4276  *           errbuf    - char buffer for reporting errors
4277  *           dsq       - the digitized sequence
4278  *           L         - length of the dsq to align
4279  *           size_limit- max number of Mb for DP matrix, if matrix is bigger return eslERANGE
4280  *           do_check  - TRUE to attempt to check
4281  *           mx        - the dp matrix, only cells within bands in cp9b will be valid
4282  *           ins_mx    - the dp matrix from the Inside run calculation (required)
4283  *           ret_sc    - RETURN: log P(S|M)/P(S|R), as a bit score, this is from ins_mx IF local
4284  *                       ends are on (see *** comment towards end of function).
4285  *
4286  * Returns:  <eslOK> on success
4287  *
4288  * Throws:   <eslERANGE> if required CM_HB_MX size exceeds <size_limit>
4289  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
4290  *           In either of these cases, alignment has been aborted, ret_sc is not valid.
4291  */
4292 int
cm_OutsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,int do_check,CM_HB_MX * mx,CM_HB_MX * ins_mx,float * ret_sc)4293 cm_OutsideAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check,
4294 		  CM_HB_MX *mx, CM_HB_MX *ins_mx, float *ret_sc)
4295 {
4296   int      status;
4297   int      v,y,z;	       /* indices for states */
4298   int      j,d,i,k;	       /* indices in sequence dimensions */
4299   float  **esc_vAA;            /* ptr to cm->oesc, optimized emission scores */
4300   float    sc;		       /* a temporary score */
4301   float    escore;	       /* an emission score, tmp variable */
4302   int      voffset;	       /* index of v in t_v(y) transition scores */
4303   int      emitmode;           /* EMITLEFT, EMITRIGHT, EMITPAIR, EMITNONE, for state y */
4304   int      sd;                 /* StateDelta(cm->sttype[y]) */
4305   int      sdr;                /* StateRightDelta(cm->sttype[y] */
4306 
4307   /* variables used only if do_check */
4308   int      fail_flag = FALSE;  /* set to TRUE if do_check and we see a problem */
4309   int      n;                  /* counter over nodes, used only if do_check = TRUE */
4310   int      num_split_states;   /* temp variable used only if do_check = TRUE */
4311   float    diff;               /* temp variable used only if do_check = TRUE */
4312 
4313   /* band related variables */
4314   int      dp_v;               /* d index for state v in alpha w/mem eff bands */
4315   int      dp_y;               /* d index for state y in alpha w/mem eff bands */
4316   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
4317   int      Lp;                 /* L index also changes depending on state */
4318   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
4319   int      kmin, kmax;         /* temporary minimum/maximum allowed k */
4320   int      jn, jx;             /* current minimum/maximum j allowed */
4321   int      dn, dx;             /* current minimum/maximum d allowed */
4322   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
4323   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
4324 
4325   /* the DP matrices */
4326   float ***beta  = mx->dp;     /* pointer to the Oustide DP mx */
4327   float ***alpha = ins_mx->dp; /* pointer to the Inside DP mx (already calc'ed and passed in) */
4328 
4329   /* ptrs to cp9b info, for convenience */
4330   int     *jmin  = cm->cp9b->jmin;
4331   int     *jmax  = cm->cp9b->jmax;
4332   int    **hdmin = cm->cp9b->hdmin;
4333   int    **hdmax = cm->cp9b->hdmax;
4334 
4335   /* Allocations and initializations */
4336   esc_vAA = cm->oesc;            /* a ptr to the optimized emission scores */
4337 
4338   /* grow the matrix based on the current sequence and bands */
4339   if((status = cm_hb_mx_GrowTo(cm, mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
4340 
4341   /* initialize all cells of the matrix to IMPOSSIBLE */
4342   esl_vec_FSet(beta[0][0], mx->ncells_valid, IMPOSSIBLE);
4343 
4344   /* ensure a full alignment to ROOT_S (v==0) is allowed by the bands */
4345   if (jmin[0] > L || jmax[0] < L)
4346     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, jmin[0], jmax[0]);
4347   jp_0 = L - jmin[0];
4348   if (hdmin[0][jp_0] > L || hdmax[0][jp_0] < L)
4349     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, hdmin[0][jp_0], hdmax[0][jp_0]);
4350   Lp_0 = L - hdmin[0][jp_0];
4351   /* set the offset banded cell corresponding to beta[0][L][L] to 0., all parses must end there */
4352   beta[0][jp_0][Lp_0] = 0.;
4353 
4354   /* If we can do a local begin into v, overwrite IMPOSSIBLE with the local begin score. */
4355   if (cm->flags & CMH_LOCAL_BEGIN) {
4356     for (v = 1; v < cm->M; v++) {
4357       if(NOT_IMPOSSIBLE(cm->beginsc[v])) {
4358 	if((L >= jmin[v]) && (L <= jmax[v])) {
4359 	  jp_v = L - jmin[v];
4360 	  if((L >= hdmin[v][jp_v]) && L <= hdmax[v][jp_v]) {
4361 	    Lp = L - hdmin[v][jp_v];
4362 	    beta[v][jp_v][Lp] = cm->beginsc[v];
4363 	  }
4364 	}
4365       }
4366     }
4367   }
4368   /* done allocation/initialization */
4369 
4370   /* Recursion: main loop down through the decks */
4371   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
4372     if (cm->stid[v] == BEGL_S) { /* BEGL_S */
4373       y = cm->plast[v];	/* the parent bifurcation    */
4374       z = cm->cnum[y];	/* the other (right) S state */
4375       for (j = jmax[v]; j >= jmin[v]; j--) {
4376 	ESL_DASSERT1((j >= 0 && j <= L));
4377 	jp_v = j - jmin[v];
4378 	jp_y = j - jmin[y];
4379 	jp_z = j - jmin[z];
4380 	i = j-d+1;
4381 	for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
4382 	  dp_v = d - hdmin[v][jp_v];
4383 	  /* Find the first k value that implies a valid cell in the y and z decks.
4384 	   * This k must satisfy the following 8 inequalities (some may be redundant):
4385 	   * NOTE: these are different from those in Inside() (for one thing, v and y
4386 	   *       (BEGL_S and BIF_B here respectively) are switched relative to Inside.
4387 	   *
4388 	   * (1) k <= jmax[y] - j;
4389 	   * (2) k >= jmin[y] - j;
4390 	   * (3) k <= jmax[z] - j;
4391 	   * (4) k >= jmin[z] - j;
4392 	   *     1 and 2 guarantee (j+k) is within state y's j band
4393 	   *     3 and 4 guarantee (j+k) is within state z's j band
4394 	   *
4395 	   * (5) k >= hdmin[y][j-jmin[y]+k] - d;
4396 	   * (6) k <= hdmax[y][j-jmin[y]+k] - d;
4397 	   *     5 and 6 guarantee k+d is within y's j=(j+k), d band
4398 	   *
4399 	   * (7) k >= hdmin[z][j-jmin[z]+k];
4400 	   * (8) k <= hdmax[z][j-jmin[z]+k];
4401 	   *     5 and 6 guarantee k is within state z's j=(j+k) d band
4402 	   */
4403 	  kmin = ESL_MAX(jmin[y], jmin[z]) - j;
4404 	  kmax = ESL_MIN(jmax[y], jmax[z]) - j;
4405 	  /* kmin and kmax satisfy inequalities (1-4) */
4406 	  /* RHS of inequalities 5-8 are dependent on k, so we check
4407 	   * for these within the next for loop. */
4408 	  for(k = kmin; k <= kmax; k++) {
4409 	    if(k < (hdmin[y][jp_y+k] - d) || k > (hdmax[y][jp_y+k] - d)) continue;
4410 	    /* above line continues if inequality 5 or 6 is violated */
4411 	    if(k < (hdmin[z][jp_z+k])     || k > (hdmax[z][jp_z+k]))     continue;
4412 	    /* above line continues if inequality 7 or 8 is violated */
4413 
4414 	    /* if we get here for current k, all 8 inequalities have been satisified
4415 	     * so we know the cells corresponding to the platonic
4416 	     * matrix cells alpha[v][j][d], alpha[y][j+k][d+k], and
4417 	     * alpha[z][j+k][k] are all within the bands. These
4418 	     * cells correspond to beta[v][jp_v][dp_v],
4419 	     * beta[y][jp_y+k][d-hdmin[y][jp_y+k]+k],
4420 	     * and alpha[z][jp_z][k-hdmin[z][jp_z+k]];
4421 	     */
4422 	    kp_z = k-hdmin[z][jp_z+k];
4423 	    dp_y = d-hdmin[y][jp_y+k];
4424 	    beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y+k][dp_y+k] + alpha[z][jp_z+k][kp_z]));
4425 	  }
4426 	}
4427       }
4428     } /* end of 'if (cm->stid[v] == BEGL_S */
4429     else if (cm->stid[v] == BEGR_S) {
4430       y = cm->plast[v];	  /* the parent bifurcation    */
4431       z = cm->cfirst[y];  /* the other (left) S state  */
4432       jn = ESL_MAX(jmin[v], jmin[y]);
4433       jx = ESL_MIN(jmax[v], jmax[y]);
4434       for (j = jx; j >= jn; j--) {
4435 	ESL_DASSERT1((j >= 0 && j <= L));
4436 	jp_v = j - jmin[v];
4437 	jp_y = j - jmin[y];
4438 	jp_z = j - jmin[z];
4439 	i = j-d+1;
4440 
4441 	dn = ESL_MAX(hdmin[v][jp_v], j-jmax[z]);
4442 	dx = ESL_MIN(hdmax[v][jp_v], jp_z);
4443 	/* above makes sure that j,d are valid for state z: (jmin[z] + d) >= j >= (jmax[z] + d) */
4444 	for (d = dx; d >= dn; d--) {
4445 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
4446 	  /* Find the first k value that implies a valid cell in the y and z decks.
4447 	   * This k must satisfy the following 4 inequalities (some may be redundant):
4448 	   * NOTE: these are different from those in Inside() (for one thing, v and y
4449 	   *       (BEGR_S and BIF_B here respectively) are switched relative to Inside.
4450 	   *
4451 	   * (1) k >= hdmin[y][j-jmin[y]] - d;
4452 	   * (2) k <= hdmax[y][j-jmin[y]] - d;
4453 	   *     1 and 2 guarantee (d+k) is within state y's j=(j) d band
4454 	   *
4455 	   * (3) k >= hdmin[z][j-jmin[z]-d];
4456 	   * (4) k <= hdmax[z][j-jmin[z]-d];
4457 	   *     3 and 4 guarantee k is within z's j=(j-d) d band
4458 	   *
4459 	   */
4460 	  kmin = ESL_MAX((hdmin[y][jp_y]-d), (hdmin[z][jp_z-d]));
4461 	  kmax = ESL_MIN((hdmax[y][jp_y]-d), (hdmax[z][jp_z-d]));
4462 	  /* kmin and kmax satisfy inequalities (1-4) */
4463 	  for(k = kmin; k <= kmax; k++) {
4464 	    /* for current k, all 4 inequalities have been satisified
4465 	     * so we know the cells corresponding to the platonic
4466 	     * matrix cells beta[v][j][d], beta[y][j][d+k], and
4467 	     * alpha[z][j-d][k] are all within the bands. These
4468 	     * cells correspond to beta[v][jp_v][dp_v],
4469 	     * beta[y][jp_y+k][d-hdmin[y][jp_y]+k],
4470 	     * and alpha[z][jp_z-d][k-hdmin[z][jp_z-d]];
4471 	     */
4472 	    kp_z = k-hdmin[z][jp_z-d];
4473 	    dp_y = d-hdmin[y][jp_y];
4474 	    beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y][dp_y+k]
4475 								+ alpha[z][jp_z-d][kp_z]));
4476 	  }
4477 	}
4478       }
4479     } /* end of 'else if (cm->stid[v] == BEGR_S */
4480     else if (cm->sttype[v] == IL_st || cm->sttype[v] == IR_st) {
4481       /* ILs and IRs can self transit, this means that beta[v][j][d] must be fully calculated
4482        * before beta[v][j][d+1] can be started to be calculated, forcing the following nesting order:
4483        * for j { for d { for y { } } }
4484        * for non-self-transitioners, we can do a more efficient nesting order (see below)
4485        */
4486       for (j = jmax[v]; j >= jmin[v]; j--) {
4487 	ESL_DASSERT1((j >= 0 && j <= L));
4488 	jp_v = j - jmin[v];
4489 	for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
4490 	  i = j-d+1;
4491 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
4492 
4493 	  for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
4494 	    voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
4495 
4496 	    /* Note: this looks like it can be optimized, I tried but my 'optimization' slowed the code, so I reverted [EPN] */
4497 	    switch(cm->sttype[y]) {
4498 	    case MP_st:
4499 	      if (j == L || d == j) continue; /* boundary condition */
4500 	      if ((j+1) < jmin[y] || (j+1) > jmax[y]) continue; /* enforces j is valid for state y */
4501 	      jp_y = j - jmin[y];
4502 	      if ((d+2) < hdmin[y][(jp_y+1)] || (d+2) > hdmax[y][(jp_y+1)]) continue; /* enforces d is valid for state y */
4503 	      /* if we get here alpha[y][jp_y+1][dp_y+2] is a valid alpha cell
4504 	       * corresponding to alpha[y][j+1][d+2] in the platonic matrix.
4505 		   */
4506 	      dp_y = d - hdmin[y][jp_y+1];  /* d index for state y */
4507 	      escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
4508 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y+1][dp_y+2]
4509 								  + cm->tsc[y][voffset] + escore));
4510 	      break;
4511 
4512 	    case ML_st:
4513 	    case IL_st:
4514 	      if (d == j) continue;	/* boundary condition (note when j=0, d=0)*/
4515 	      if (j < jmin[y] || j > jmax[y]) continue; /* enforces j is valid for state y */
4516 	      jp_y = j - jmin[y];
4517 	      if ((d+1) < hdmin[y][jp_y] || (d+1) > hdmax[y][jp_y]) continue; /* enforces d is valid for state y */
4518 	      /* if we get here alpha[y][jp_y][dp_y+1] is a valid alpha cell
4519 	       * corresponding to alpha[y][j][d+1] in the platonic matrix.
4520 	       */
4521 	      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
4522 	      escore = esc_vAA[y][dsq[i-1]];
4523 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y][dp_y+1]
4524 								  + cm->tsc[y][voffset] + escore));
4525 	      break;
4526 
4527 	    case MR_st:
4528 	    case IR_st:
4529 	      if (j == L) continue;
4530 	      if ((j+1) < jmin[y] || (j+1) > jmax[y]) continue; /* enforces j is valid for state y */
4531 	      jp_y = j - jmin[y];
4532 	      if ((d+1) < hdmin[y][(jp_y+1)] || (d+1) > hdmax[y][(jp_y+1)]) continue; /* enforces d is valid for state y */
4533 	      /* if we get here alpha[y][jp_y+1][dp_y+1] is a valid alpha cell
4534 	       * corresponding to alpha[y][j+1][d+1] in the platonic matrix.
4535 	       */
4536 	      dp_y = d - hdmin[y][(jp_y+1)];  /* d index for state y */
4537 	      escore = esc_vAA[y][dsq[j+1]];
4538 	      /*printf("j: %d | jmin[y]: %d | jmax[y]: %d | jp_v: %d | dp_v: %d | jp_y: %d | dp_y: %d\n", j, jmin[y], jmax[y], jp_v, dp_v, jp_y, dp_y);*/
4539 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y+1][dp_y+1]
4540 								  + cm->tsc[y][voffset] + escore));
4541 	      break;
4542 
4543 	    case S_st:
4544 	    case E_st:
4545 	    case D_st:
4546 	      if (j < jmin[y] || j > jmax[y]) continue; /* enforces j is valid for state y */
4547 	      jp_y = j - jmin[y];
4548 	      if (d < hdmin[y][jp_y] || d > hdmax[y][jp_y]) continue; /* enforces d is valid for state y */
4549 	      /* if we get here alpha[y][jp_y][dp_y] is a valid alpha cell
4550 	       * corresponding to alpha[y][j][d] in the platonic matrix.
4551 	       */
4552 	      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
4553 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y][dp_y] + cm->tsc[y][voffset]));
4554 	      break;
4555 	    } /* end of switch(cm->sttype[y] */
4556 	  } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
4557 	  if (beta[v][jp_v][dp_v] < IMPOSSIBLE) beta[v][jp_v][dp_v] = IMPOSSIBLE;
4558 	} /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
4559       } /* end loop over jp. We know beta for this whole state */
4560     } /* end of 'else if cm->sttype[v] == IL_st || cm->sttype[v] == IR_st' */
4561     else { /* state v is not BEGL_S, BEGL_R IL nor IR (must be ML, MP, MR, D, S, B or E) */
4562       /* ML, MP, MR, D, S, B, E states cannot self transit, this means that all cells
4563        * in beta[v] are independent of each other, only depending on beta[y] for previously calc'ed y.
4564        * We can do the for loops in any nesting order, this implementation does what I think is most efficient:
4565        * for y { for j { for d { } } }
4566        */
4567       for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
4568 	voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
4569 	sdr = StateRightDelta(cm->sttype[y]);
4570 	sd  = StateDelta(cm->sttype[y]);
4571 	emitmode = Emitmode(cm->sttype[y]);
4572 	/* determine min j (jn) and max j (jx) that are valid for v and y */
4573 	jn = ESL_MAX(jmin[v], jmin[y]-sdr);
4574 	jx = ESL_MIN(jmax[v], jmax[y]-sdr);
4575 	for (j = jx; j >= jn; j--) {
4576 	  ESL_DASSERT1((j >= 0 && j <= L));
4577 	  jp_v = j - jmin[v];
4578 	  jp_y = j - jmin[y];
4579 	  ESL_DASSERT1((j+sdr >= jmin[y] && j+sdr <= jmax[y]));
4580 
4581 	  /* determine min d (dn) and max d (dx) that are valid for v and y and j */
4582 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y + sdr] - sd);
4583 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y + sdr] - sd);
4584 	  dp_v = dx - hdmin[v][jp_v];
4585 	  dp_y = dx - hdmin[y][jp_y + sdr];
4586 	  i    = j-dx+1;
4587 
4588 	  /* for each emit mode, update beta[v][jp_v][dp_v] for all valid d = dp_v */
4589 	  switch(emitmode) {
4590 	  case EMITPAIR:  /* MP_st */
4591 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--, i++) {
4592 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
4593 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
4594 	      escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
4595 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
4596 								  + cm->tsc[y][voffset] + escore));
4597 	    }
4598 	    break;
4599 	  case EMITLEFT:  /* ML_st, IL_st */
4600 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--, i++) {
4601 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
4602 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
4603 	      escore = esc_vAA[y][dsq[i-1]];
4604 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
4605 								  + cm->tsc[y][voffset] + escore));
4606 	    }
4607 	    break;
4608 	  case EMITRIGHT:  /* MR_st, IR_st */
4609 	    escore = esc_vAA[y][dsq[j+1]]; /* not dependent on i */
4610 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--) {
4611 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
4612 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
4613 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
4614 								  + cm->tsc[y][voffset] + escore));
4615 	    }
4616 	    break;
4617 	  case EMITNONE:  /* D_st, S_st, E_st*/
4618 	    for (d = dx; d >= dn; d--, dp_v--, dp_y--) {
4619 	      ESL_DASSERT1((  d       >= hdmin[v][jp_v]        &&   d       <= hdmax[v][jp_v]));
4620 	      ESL_DASSERT1((((d + sd) >= hdmin[y][jp_y + sdr]) && ((d + sd) <= hdmax[y][jp_y + sdr])));
4621 	      beta[v][jp_v][dp_v] = FLogsum(beta[v][jp_v][dp_v], (beta[y][jp_y + sdr][dp_y + sd]
4622 								  + cm->tsc[y][voffset]));
4623 	    }
4624 	    break;
4625 	  } /* end of switch(emitmode) */
4626 	} /* end of for j = jx; j >= jn; j-- */
4627       } /* end of for y = plast[v]... */
4628     } /* ends else entered for non-BEGL_S/BEGR_S/IL/IR states*/
4629     /* we're done calculating deck v for everything but local begins */
4630 
4631     /* deal with local alignment end transitions v->EL (EL = deck at M.) */
4632     if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
4633       sdr      = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
4634       sd       = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
4635       emitmode = Emitmode(cm->sttype[v]);        /* note emitmode is for state v */
4636 
4637       jn = jmin[v] - sdr;
4638       jx = jmax[v] - sdr;
4639       for (j = jn; j <= jx; j++) {
4640 	jp_v =  j - jmin[v];
4641 	dn   = hdmin[v][jp_v + sdr] - sd;
4642 	dx   = hdmax[v][jp_v + sdr] - sd;
4643 	i    = j-dn+1;                     /* we'll decrement this in for (d... loops inside switch below */
4644 	dp_v = dn - hdmin[v][jp_v + sdr];  /* we'll increment this in for (d... loops inside switch below */
4645 
4646 	switch (emitmode) {
4647 	case EMITPAIR:
4648 	  for (d = dn; d <= dx; d++, dp_v++, i--) {
4649 	    escore = esc_vAA[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
4650 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]
4651 								    + escore));
4652 	  }
4653 	  break;
4654 	case EMITLEFT:
4655 	  for (d = dn; d <= dx; d++, dp_v++, i--) {
4656 	    escore = esc_vAA[v][dsq[i-1]];
4657 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]
4658 								    + escore));
4659 	  }
4660 	  break;
4661 
4662 	case EMITRIGHT:
4663 	  escore = esc_vAA[v][dsq[j+1]];
4664 	  for (d = dn; d <= dx; d++, dp_v++) {
4665 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]
4666 								    + escore));
4667 	  }
4668 	  break;
4669 
4670 	case EMITNONE:
4671 	  for (d = dn; d <= dx; d++, dp_v++) {
4672 	    beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]));
4673 	  }
4674 	  break;
4675 	}
4676       }
4677     }
4678   } /* end loop over decks v. */
4679 
4680   /* Deal with last step needed for local alignment
4681    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
4682    */
4683   if (cm->flags & CMH_LOCAL_END) {
4684     for (j = L; j > 0; j--) { /* careful w/ boundary here */
4685       for (d = j-1; d >= 0; d--) /* careful w/ boundary here */
4686 	beta[cm->M][j][d] = FLogsum(beta[cm->M][j][d], (beta[cm->M][j][d+1] + cm->el_selfsc));
4687     }
4688   }
4689 
4690   if(do_check && (!(cm->flags & CMH_LOCAL_END))) {
4691     /* Local ends make the following test invalid because it is not true that
4692      * exactly 1 state in each node's split set must be visited in each parse.
4693      *
4694      * Determine P(S|M) / P(S|R) (probability of the sequence given the model)
4695      * using both the Outside (beta) and Inside (alpha) matrices,
4696      * and ensure they're consistent with P(S|M) / P(S|R) from the Inside calculation.
4697      * For all v in each split set: Sum_v [ Sum_j,(d<=j) ( alpha[v][j][d] * beta[v][j][d] ) ]
4698      *                                                    = P(S|M) / P(S|R)
4699      */
4700 
4701     for(n = 0; n < cm->nodes; n++) {
4702       sc = IMPOSSIBLE;
4703       num_split_states = SplitStatesInNode(cm->ndtype[n]);
4704       for(v = cm->nodemap[n]; v < cm->nodemap[n] + num_split_states; v++) {
4705 	for (j = jmin[v]; j <= jmax[v]; j++) {
4706 	  jp_v = j - jmin[v];
4707 	  for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
4708 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
4709 	    sc = FLogsum(sc, (alpha[v][jp_v][dp_v] + beta[v][jp_v][dp_v]));
4710 	    /*printf("node %d | adding alpha beta: v: %d | jp_v: %d | dp_v: %d| j: %d | d: %d\n", n, v, jp_v, dp_v, j, d);
4711 	      printf("\talpha: %f | beta: %f\n", alpha[v][jp_v][dp_v], beta[v][jp_v][dp_v]);*/
4712 	  }
4713 	}
4714       }
4715       /*printf("checking node: %d | sc: %.6f\n", n, sc);*/
4716       diff = sc - alpha[0][jp_0][Lp_0];
4717       if(diff > 0.01 || diff < -0.01) {
4718 	fail_flag = TRUE;
4719 	printf("ERROR: node %d P(S|M): %.5f inconsistent with Inside P(S|M): %.5f (diff: %.5f)\n",
4720 	       n, sc, alpha[0][jp_0][Lp_0], diff);
4721       }
4722     }
4723   }
4724 
4725   /* If not in local mode, we can calculate P(S|M) / P(S|R) given only the
4726    * beta matrix as follows:
4727    *
4728    * IF local ends are off, we know each parse MUST visit each END_E state,
4729    * we pick final END_E state state cm->M-1 (though any END_E could be used here):
4730    *
4731    * Sum_j=0 to W (alpha[M-1][j][0] * beta[M-1][j][0]) = P(S|M) / P(S|R)
4732    *
4733    * Note: alpha[M-1][j][0] = 0.0 for all j
4734    *       because all parse subtrees rooted at an END_E must have d=0, (2^0 = 1.0)
4735    * therefore:
4736    * Sum_j=0 to W (beta[M-1][j][0]) = P(S|M) / P(S|R)
4737    *
4738    * *** If local ends are on, each parse MUST visit either each END_E state with d=0
4739    * or the EL state but d can vary, so we can't use this test (believe me I tried
4740    * to get a similar test working, but I'm convinced you need alpha to get P(S|M)
4741    * in local mode).
4742    */
4743   if(!(cm->flags & CMH_LOCAL_END)) {
4744     sc = IMPOSSIBLE;
4745     v = cm->M-1;
4746     for (j = jmin[v]; j <= jmax[v]; j++) {
4747       jp_v = j - jmin[v];
4748       assert(hdmin[v][jp_v] == 0);
4749       sc = FLogsum(sc, (beta[v][jp_v][0]));
4750       /* printf("\talpha[%3d][%3d][%3d]: %5.2f | beta[%3d][%3d][%3d]: %5.2f\n", (cm->M-1), (j), 0, alpha[(cm->M-1)][j][0], (cm->M-1), (j), 0, beta[(cm->M-1)][j][0]);*/
4751     }
4752   }
4753   else { /* return_sc = P(S|M) / P(S|R) from Inside() */
4754     sc = alpha[0][jp_0][Lp_0];
4755   }
4756 
4757   if(fail_flag) ESL_FAIL(eslFAIL, errbuf, "Not all nodes passed posterior check.");
4758 
4759 #if eslDEBUGLEVEL >= 3
4760   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
4761   /* FILE *fp1; fp1 = fopen("tmp.std_ohbmx", "w");   cm_hb_mx_Dump(fp1, mx); fclose(fp1); */
4762 #endif
4763 
4764 
4765   if(!(cm->flags & CMH_LOCAL_END)) ESL_DPRINTF1(("#DEBUG: \tcm_OutsideAlignHB() sc : %f\n", sc));
4766   else                             ESL_DPRINTF1(("#DEBUG: \tcm_OutsideAlignHB() sc : %f (LOCAL mode; sc is from Inside)\n", sc));
4767 
4768   if (ret_sc != NULL) *ret_sc = sc;
4769   return eslOK;
4770 }
4771 
4772 /* Function: cm_Posterior()
4773  * Date:     EPN, Mon Nov 19 09:02:12 2007
4774  * Note:     based on Ian Holmes' P7EmitterPosterior() from HMMER's 2.x postprob.c
4775  *           Renamed from CMPosterior() [EPN, Wed Sep 14 06:15:22 2011].
4776  *
4777  * Purpose: Combines non-banded Inside and Outside matrices into a
4778  *           posterior probability matrix. The value in post[v][j][d]
4779  *           is the log of the posterior probability of a parse
4780  *           subtree rooted at v emitting the subsequence i..j
4781  *           (i=j-d+1).  The caller must provide a <post> float
4782  *           matrix, but this matrix may be the same matrix as that
4783  *           provided as Outside <out_mx>, (overwriting it will not
4784  *           compromise the algorithm). Posteriors are calculated
4785  *           for the full sequence 1..L.
4786  *
4787  *
4788  * Args:     cm         - the model
4789  *           errbuf     - char buffer for reporting errors
4790  *           L          - length of the dsq to align
4791  *           size_limit - max number of Mb for DP matrix
4792  *           ins_mx     - pre-calculated Inside matrix
4793  *           out_mx     - pre-calculated Outside matrix
4794  *           post_mx    - pre-allocated matrix for Posteriors
4795  *
4796  * Returns:  <eslOK>     on success.
4797  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>, in
4798  *                       this case, post_mx is not filled.
4799  */
4800 int
cm_Posterior(CM_t * cm,char * errbuf,int L,float size_limit,CM_MX * ins_mx,CM_MX * out_mx,CM_MX * post_mx)4801 cm_Posterior(CM_t *cm, char *errbuf, int L, float size_limit, CM_MX *ins_mx, CM_MX *out_mx, CM_MX *post_mx)
4802 {
4803   int   status;
4804   int   v, j, d; /* state, position, subseq length */
4805   int   vmax;    /* cm->M if local ends on, else cm->M-1 */
4806   float sc;      /* optimal score, from Inside matrix */
4807 
4808   /* the DP matrices */
4809   float ***alpha = ins_mx->dp; /* pointer to the alpha DP matrix */
4810   float ***beta  = out_mx->dp; /* pointer to the beta DP matrix */
4811   float ***post  = post_mx->dp; /* pointer to the post DP matrix */
4812 
4813   /* grow our post matrix, but only if isn't also our out_mx in which
4814    * case we know we're already big enought (also in that case we
4815    * don't want to call GrowTo b/c it can potentially free the DP
4816    * matrix memory and reallocate it, which would be bad b/c we
4817    * need the out_mx!)
4818    */
4819   if(post_mx != out_mx) {
4820     if((status = cm_mx_GrowTo(cm, post_mx, errbuf, L, size_limit)) != eslOK) return status;
4821   }
4822 
4823   sc = ins_mx->dp[0][L][L];
4824 
4825   /* If local ends are on, start with the EL state (cm->M), otherwise
4826    * its not a valid deck.
4827    */
4828   vmax = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
4829   for (v = vmax; v >= 0; v--) {
4830     for (j = 0; j <= L; j++) {
4831       for (d = 0; d <= j; d++) {
4832 	post[v][j][d] = alpha[v][j][d] + beta[v][j][d] - sc;
4833       }
4834     }
4835   }
4836 
4837 #if eslDEBUGLEVEL >= 3
4838   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
4839   /* FILE *fp1; fp1 = fopen("tmp.std_pmx", "w");   cm_mx_Dump(fp1, post_mx); fclose(fp1); */
4840 #endif
4841 
4842   return eslOK;
4843 }
4844 
4845 /* Function: cm_PosteriorHB()
4846  * Date:     EPN 05.27.06
4847  * Note:     based on Ian Holmes' P7EmitterPosterior() from HMMER's 2.x postprob.c
4848  *           Renamed from CMPosteriorHB() [EPN, Wed Sep 14 06:14:48 2011].
4849  *
4850  * Purpose: Combines HMM banded Inside and Outside matrices into a
4851  *           posterior probability matrix. Any cells outside of HMM
4852  *           bands do not exist in memory. The value in
4853  *           post[v][jp_v][dp_v] is the log of the posterior
4854  *           probability of a parse subtree rooted at v emitting the
4855  *           subsequence i..j (i=j-d+1). Where j = jp_v + jmin[v], and
4856  *           d = dp_v + hdmin[v][jp_v]. The caller must provide a
4857  *           <post> CM_HB_MX matrix, but this matrix may be the same
4858  *           matrix as that provided as Outside <out_mx>, (overwriting
4859  *           it will not compromise the algorithm). Posteriors are
4860  *           calculated for the full sequence 1..L.
4861  *
4862  * Args:     cm         - the model
4863  *           errbuf     - char buffer for reporting errors
4864  *           L          - length of the dsq to align
4865  *           size_limit - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
4866  *           ins_mx     - pre-calculated Inside matrix
4867  *           out_mx     - pre-calculated Outside matrix
4868  *           post_mx    - pre-allocated matrix for Posteriors
4869  *
4870  * Returns:  <eslOK>     on success.
4871  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>
4872  *           <eslEINVAL> if the full sequence is not within the bands for state 0
4873  *           In either case the post_mx is not filled
4874  */
4875 int
cm_PosteriorHB(CM_t * cm,char * errbuf,int L,float size_limit,CM_HB_MX * ins_mx,CM_HB_MX * out_mx,CM_HB_MX * post_mx)4876 cm_PosteriorHB(CM_t *cm, char *errbuf, int L, float size_limit, CM_HB_MX *ins_mx, CM_HB_MX *out_mx, CM_HB_MX *post_mx)
4877 {
4878   int      status;
4879   int      v, j, d; /* state, position, position, subseq length */
4880   float    sc;      /* total score, the log probability of the current seq  */
4881   int      jp_v;    /* j index for state v in alpha/beta with HMM bands */
4882   int      dp_v;    /* d index for state v in alpha/beta with HMM bands */
4883   int      jp_0;        /* L offset in ROOT_S's (v==0) j band */
4884   int      Lp_0;        /* L offset in ROOT_S's (v==0) d band */
4885 
4886   /* the DP matrices */
4887   float ***alpha = ins_mx->dp; /* pointer to the alpha DP matrix */
4888   float ***beta  = out_mx->dp; /* pointer to the beta DP matrix */
4889   float ***post  = post_mx->dp; /* pointer to the post DP matrix */
4890 
4891   /* ptrs to cp9b info, for convenience */
4892   int     *jmin  = cm->cp9b->jmin;
4893   int     *jmax  = cm->cp9b->jmax;
4894   int    **hdmin = cm->cp9b->hdmin;
4895   int    **hdmax = cm->cp9b->hdmax;
4896 
4897   /* ensure a full alignment to ROOT_S (v==0) is allowed by the bands */
4898   if (cm->cp9b->jmin[0] > L || cm->cp9b->jmax[0] < L)
4899     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cm->cp9b->jmin[0], cm->cp9b->jmax[0]);
4900   jp_0 = L - jmin[0];
4901   if (cm->cp9b->hdmin[0][jp_0] > L || cm->cp9b->hdmax[0][jp_0] < L)
4902     ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cm->cp9b->hdmin[0][jp_0], cm->cp9b->hdmax[0][jp_0]);
4903   Lp_0 = L - hdmin[0][jp_0];
4904 
4905   sc = alpha[0][jp_0][Lp_0];
4906 
4907   /* grow our post matrix, but only if isn't also our out_mx in which
4908    * case we know we're already big enought (also in that case we
4909    * don't want to call GrowTo b/c it can potentially free the DP
4910    * matrix memory and reallocate it, which would be bad b/c we
4911    * need the out_mx!)
4912    */
4913   if(post_mx != out_mx) {
4914     if((status = cm_hb_mx_GrowTo(cm, post_mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
4915   }
4916 
4917   /* If local ends are on, start with the EL state (cm->M), otherwise
4918    * M deck is not valid. Note: there are no bands on the EL state
4919    */
4920   if (cm->flags & CMH_LOCAL_END) {
4921     for(j = 0; j <= L; j++) {
4922       for (d = 0; d <= j; d++) {
4923 	post[cm->M][j][d] = alpha[cm->M][j][d] + beta[cm->M][j][d] - sc;
4924       }
4925     }
4926   }
4927 
4928   for (v = (cm->M-1); v >= 0; v--) {
4929     for (j = jmin[v]; j <= jmax[v]; j++) {
4930       ESL_DASSERT1((j >= 0 && j <= L));
4931       jp_v = j - jmin[v];
4932       for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
4933 	dp_v = d - hdmin[v][jp_v];
4934 	post[v][jp_v][dp_v] = alpha[v][jp_v][dp_v] + beta[v][jp_v][dp_v] - sc;
4935 	/*printf("v: %3d | jp_v: %3d | dp_v: %3d | alpha: %5.2f | beta: %5.2f\n", v, jp_v, dp_v, alpha[v][jp_v][dp_v], beta[v][jp_v][dp_v]);*/
4936       }
4937     }
4938   }
4939   return eslOK;
4940 }
4941 
4942 /* Function: cm_EmitterPosterior()
4943  * Date:     EPN, Fri Sep 30 13:53:57 2011
4944  *
4945  * Purpose: Given a posterior probability cube, where the value in
4946  *           post[v][j][d] is the log of the posterior probability of
4947  *           a parse subtree rooted at v emitting the subsequence i..j
4948  *           (i=j-d+1), fill a CM_EMIT_MX <emit_mx> with two 2-dimensional
4949  *           matrices with values:
4950  *
4951  *           emit_mx->l_pp[v][i]: log of the posterior probability that
4952  *           state v emitted residue i leftwise either at (if a match
4953  *           state) or *after* (if an insert state) the left consensus
4954  *           position modeled by state v's node.
4955  *
4956  *           emit_mx->r_pp[v][i]: log of the posterior probability that
4957  *           state v emitted residue i rightwise either at (if a match
4958  *           state) or *before* (if an insert state) the right
4959  *           consensus position modeled by state v's node.
4960  *
4961  *           l_pp[v] is NULL for states that do not emit leftwise
4962  *           r_pp[v] is NULL for states that do not emit rightwise
4963  *
4964  *          This is done in 3 steps:
4965  *          1. Fill l_pp[v][i] and r_pp[v][i] with the posterior
4966  *             probability that state v emitted residue i either
4967  *             leftwise (l_pp) or rightwise (r_pp).
4968  *
4969  *          2. Normalize l_pp and r_pp so that probability that
4970  *             each residue was emitted by any state is exactly
4971  *             1.0.
4972  *
4973  *          3. Combine l_pp values for MATP_MP (v) and MATP_ML (y=v+1)
4974  *             states in the same node so they give the value defined
4975  *             above (i.e. l_pp[v] == l_pp[y] = the PP that either v
4976  *             or y emitted residue i) instead of l_pp[v] = PP that v
4977  *             emitted i, and l_pp[y] = PP that y emitted i.  And
4978  *             combine r_pp values for MATP_MP (v) and MATP_MR (y=v+2)
4979  *             states in an analogous way.
4980  *
4981  *          If <do_check> we check to make sure the summed probability
4982  *          of any residue is > 0.98 and < 1.02 prior the step 2
4983  *          normalization, and throw eslFAIL if not.
4984  *
4985  *          Note: A failure of this test does not necessarily mean a
4986  *          bug in the code, because this check is known to fail for
4987  *          some cases with parsetrees that contain inserts of 100s of
4988  *          residues from the same IL or IR state (that utilize 100s
4989  *          of IL->IL or IR->IR self transitions). These cases were
4990  *          looked at in detail to determine if they were due to a bug
4991  *          in the DP code. This was logged in
4992  *          ~nawrockie/notebook/8_1016_inf-1rc3_bug_alignment/00LOG.
4993  *          The conclusion was that the failure of the posterior check
4994  *          is due completely to lack of precision in the float scores
4995  *          (not just in the logsum look-up table but also with using
4996  *          real log() and exp() calls). If this function returns an
4997  *          error, please check to see if the parsetree has a large
4998  *          insertion in it, if so you can expect probabilities up to
4999  *          1.03 due solely to this precision issue. See the notebook
5000  *          00LOG for more, included a check I performed to change the
5001  *          relevant IL->IL transition probability by very small
5002  *          values (~0.0001) and you can observe the posteriors change
5003  *          dramatically which demonstrates that precision of floats
5004  *          is the culprit.  (EPN, Sun Oct 26 14:54:31 2008
5005  *          (originally added to cm_Posterior() function 'Purpose'
5006  *          function which no longer exists, having been replaced by
5007  *          this function.)
5008  *
5009  *
5010  * Args:     cm         - the model
5011  *           errbuf     - for error messages
5012  *           L          - length of the sequence
5013  *           size_limit - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
5014  *           post       - pre-filled posterior cube
5015  *           emit_mx     - pre-allocated emit matrix, grown and filled-in here
5016  *           do_check   - if TRUE, return eslEFAIL if summed prob of any residue
5017  *                        (before normalization) is < 0.98 or > 1.02.
5018  *
5019  * Returns:  <eslOK>     on success.
5020  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>
5021  *           <eslFAIL>   if (do_check) and any residue check fails
5022  *           <eslEMEM>   if we run out of memory.
5023  *           If !eslOK the l_pp and r_pp values are invalid.
5024  */
5025 int
cm_EmitterPosterior(CM_t * cm,char * errbuf,int L,float size_limit,CM_MX * post,CM_EMIT_MX * emit_mx,int do_check)5026 cm_EmitterPosterior(CM_t *cm, char *errbuf, int L, float size_limit, CM_MX *post, CM_EMIT_MX *emit_mx, int do_check)
5027 {
5028   int    status;
5029   int    v, j, d; /* state, position, subseq length */
5030   int    i;       /* sequence position */
5031   int    sd;      /* StateDelta(v) */
5032 
5033   /* grow the emit matrices based on the current sequence */
5034   if((status = cm_emit_mx_GrowTo(cm, emit_mx, errbuf, L, size_limit)) != eslOK) return status;
5035 
5036   /* initialize all cells of the emit matrices to IMPOSSIBLE */
5037   esl_vec_FSet(emit_mx->l_pp_mem, emit_mx->l_ncells_valid, IMPOSSIBLE);
5038   esl_vec_FSet(emit_mx->r_pp_mem, emit_mx->r_ncells_valid, IMPOSSIBLE);
5039 
5040   /* Step 1. Fill l_pp[v][i] and r_pp[v][i] with the posterior
5041    *         probability that state v emitted residue i either
5042    *         leftwise (l_pp) or rightwise (r_pp).
5043    */
5044   for(v = 0; v < cm->M; v++) {
5045     sd = StateDelta(cm->sttype[v]);
5046     if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
5047       for(j = 1; j <= L; j++) {
5048 	i = j-sd+1;
5049 	for(d = sd; d <= j; d++, i--) {
5050 	  emit_mx->l_pp[v][i] = FLogsum(emit_mx->l_pp[v][i], post->dp[v][j][d]);
5051 	}
5052       }
5053     }
5054     if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
5055       for(j = 1; j <= L; j++) {
5056 	sd = StateDelta(cm->sttype[v]);
5057 	for(d = sd; d <= j; d++) {
5058 	  emit_mx->r_pp[v][j] = FLogsum(emit_mx->r_pp[v][j], post->dp[v][j][d]);
5059 	}
5060       }
5061     }
5062   }
5063   /* factor in contribution of local ends, the EL state may have emitted this residue. */
5064   if (cm->flags & CMH_LOCAL_END) {
5065     for (j = 1; j <= L; j++) {
5066       i = j;
5067       for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
5068 	emit_mx->l_pp[cm->M][i] = FLogsum(emit_mx->l_pp[cm->M][i], post->dp[cm->M][j][d]);
5069       }
5070     }
5071   }
5072 
5073 #if eslDEBUGLEVEL >= 3
5074   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
5075   /* FILE *fp1; fp1 = fopen("tmp.std_unnorm_emitmx",  "w"); cm_emit_mx_Dump(fp1, cm, emit_mx); fclose(fp1); */
5076 #endif
5077 
5078   /* Step 2. Normalize l_pp and r_pp so that probability that
5079    *         each residue was emitted by any state is exactly
5080    *         1.0.
5081    */
5082   esl_vec_FSet(emit_mx->sum, (L+1), IMPOSSIBLE);
5083   for(v = 0; v <= cm->M; v++) {
5084     if(emit_mx->l_pp[v] != NULL) {
5085       for(i = 1; i <= L; i++) {
5086 	emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->l_pp[v][i]);
5087       }
5088     }
5089     if(emit_mx->r_pp[v] != NULL) {
5090       for(j = 1; j <= L; j++) {
5091 	emit_mx->sum[j] = FLogsum(emit_mx->sum[j], emit_mx->r_pp[v][j]);
5092       }
5093     }
5094   }
5095   /* perform the check, if nec */
5096   if(do_check) {
5097     for(i = 1; i <= L; i++) {
5098       if((sreEXP2(emit_mx->sum[i]) < 0.98) || (sreEXP2(emit_mx->sum[i]) > 1.02)) {
5099 	ESL_FAIL(eslFAIL, errbuf, "residue %d has summed prob of %5.4f (2^%5.4f).\nMay not be a DP coding bug, see 'Note:' on precision in cm_EmitterPosterior().\n", i, (sreEXP2(emit_mx->sum[i])), emit_mx->sum[i]);
5100       }
5101       printf("i: %d | total: %10.4f\n", i, (sreEXP2(emit_mx->sum[i])));
5102     }
5103     ESL_DPRINTF1(("#DEBUG: cm_EmitterPosterior() check passed, all residues have summed probability of emission of between 0.98 and 1.02.\n"));
5104   }
5105 
5106   /* normalize, using the sum vector */
5107   for(v = 0; v <= cm->M; v++) {
5108     if(emit_mx->l_pp[v] != NULL) {
5109       for(i = 1; i <= L; i++) {
5110 	emit_mx->l_pp[v][i] -= emit_mx->sum[i];
5111       }
5112     }
5113     if(emit_mx->r_pp[v] != NULL) {
5114       for(j = 1; j <= L; j++) {
5115 	emit_mx->r_pp[v][j] -= emit_mx->sum[j];
5116       }
5117     }
5118   }
5119 
5120   /* Step 3. Combine l_pp values for MATP_MP (v) and MATP_ML (y=v+1)
5121    *         states in the same node so they give the value defined
5122    *         above (i.e. l_pp[v] == l_pp[y] = the PP that either v or
5123    *         y emitted residue i) instead of l_pp[v] = PP that v
5124    *         emitted i, and l_pp[y] = PP that y emitted i.  And
5125    *         combine r_pp values for MATP_MP (v) and MATP_MR (y=v+2)
5126    *         states in an analogous way.
5127    */
5128   for(v = 0; v <= cm->M; v++) {
5129     if(cm->sttype[v] == MP_st) {
5130       for(i = 1; i <= L; i++) {
5131 	emit_mx->l_pp[v][i]   = FLogsum(emit_mx->l_pp[v][i], emit_mx->l_pp[v+1][i]);
5132 	emit_mx->l_pp[v+1][i] = emit_mx->l_pp[v][i];
5133       }
5134       for(j = 1; j <= L; j++) {
5135 	emit_mx->r_pp[v][j]   = FLogsum(emit_mx->r_pp[v][j], emit_mx->r_pp[v+2][j]);
5136 	emit_mx->r_pp[v+2][j] = emit_mx->r_pp[v][j];
5137       }
5138     }
5139   }
5140 
5141 #if eslDEBUGLEVEL >= 3
5142   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
5143   /* FILE *fp2; fp2 = fopen("tmp.std_emitmx",  "w"); cm_emit_mx_Dump(fp2, cm, emit_mx); fclose(fp2); */
5144 #endif
5145 
5146   return eslOK;
5147 }
5148 
5149 
5150 /* Function: cm_EmitterPosteriorHB()
5151  * Date:     EPN, Thu Oct  6 06:59:53 2011
5152  *
5153  * Purpose: Same as cm_EmitterPosterior() except HMM banded matrices
5154  *          are used. The main difference is that we have to be careful
5155  *          to stay within the bands because matrix cells outside
5156  *          the bands do not exist (are not allocated). This requires
5157  *          keeping careful track of our offsets between the sequence
5158  *          position index and the corresponding indices in the matrix.
5159  *
5160  * Args:     cm         - the model
5161  *           errbuf     - for error messages
5162  *           L          - length of the sequence
5163  *           size_limit - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
5164  *           post       - pre-filled posterior cube
5165  *           emit_mx     - pre-allocated emit matrix, grown and filled-in here
5166  *           do_check   - if TRUE, return eslEFAIL if summed prob of any residue
5167  *                        (before normalization) is < 0.98 or > 1.02.
5168  *
5169  * Returns:  <eslOK>     on success.
5170  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>
5171  *           <eslFAIL>   if (do_check) and any residue check fails
5172  *           <eslEMEM>   if we run out of memory.
5173  *           If !eslOK the l_pp and r_pp values are invalid.
5174  */
5175 int
cm_EmitterPosteriorHB(CM_t * cm,char * errbuf,int L,float size_limit,CM_HB_MX * post,CM_HB_EMIT_MX * emit_mx,int do_check)5176 cm_EmitterPosteriorHB(CM_t *cm, char *errbuf, int L, float size_limit, CM_HB_MX *post, CM_HB_EMIT_MX *emit_mx, int do_check)
5177 {
5178   int    status;
5179   int    v, j, d; /* state, position, subseq length */
5180   int    i;       /* sequence position */
5181   int    ip_v;    /* offset i in banded matrix */
5182   int    ip_v2;   /* another offset i in banded matrix */
5183   int    jp_v;    /* offset j in banded matrix */
5184   int    jp_v2;   /* another offset j in banded matrix */
5185   int    dp_v;    /* offset d in banded matrix */
5186   int    in, ix;  /* temp min/max i */
5187   int    jn, jx;  /* temp min/max j */
5188 
5189   /* ptrs to band info, for convenience */
5190   int     *imin  = cm->cp9b->imin;
5191   int     *imax  = cm->cp9b->imax;
5192   int     *jmin  = cm->cp9b->jmin;
5193   int     *jmax  = cm->cp9b->jmax;
5194   int    **hdmin = cm->cp9b->hdmin;
5195   int    **hdmax = cm->cp9b->hdmax;
5196 
5197   /* grow the emit matrices based on the current sequence */
5198   if((status = cm_hb_emit_mx_GrowTo(cm, emit_mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
5199 
5200   /* initialize all cells of the emit matrices to IMPOSSIBLE */
5201   esl_vec_FSet(emit_mx->l_pp_mem, emit_mx->l_ncells_valid, IMPOSSIBLE);
5202   esl_vec_FSet(emit_mx->r_pp_mem, emit_mx->r_ncells_valid, IMPOSSIBLE);
5203 
5204   /* Step 1. Fill l_pp[v][i] and r_pp[v][i] with the posterior
5205    *         probability that state v emitted residue i either
5206    *         leftwise (l_pp) or rightwise (r_pp).
5207    */
5208   for(v = 0; v < cm->M; v++) {
5209     if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
5210       for(j = jmin[v]; j <= jmax[v]; j++) {
5211 	jp_v = j - jmin[v];
5212 	for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
5213 	  dp_v = d-hdmin[v][jp_v];
5214 	  i    = j-d+1;
5215 	  assert(i >= imin[v] && i <= imax[v]);
5216 	  ip_v = i - imin[v];
5217 	  emit_mx->l_pp[v][ip_v] = FLogsum(emit_mx->l_pp[v][ip_v], post->dp[v][jp_v][dp_v]);
5218 	}
5219       }
5220     }
5221     if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
5222       for(j = jmin[v]; j <= jmax[v]; j++) {
5223 	jp_v = j - jmin[v];
5224 	for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
5225 	  dp_v = d-hdmin[v][jp_v];
5226 	  emit_mx->r_pp[v][jp_v] = FLogsum(emit_mx->r_pp[v][jp_v], post->dp[v][jp_v][dp_v]);
5227 	}
5228       }
5229     }
5230   }
5231   /* factor in contribution of local ends, the EL state may have emitted this residue.
5232    * Note, the M deck is non-banded
5233    */
5234   if (cm->flags & CMH_LOCAL_END) {
5235     for (j = 1; j <= L; j++) {
5236       i = j;
5237       for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
5238 	emit_mx->l_pp[cm->M][i] = FLogsum(emit_mx->l_pp[cm->M][i], post->dp[cm->M][j][d]);
5239       }
5240     }
5241   }
5242 
5243 #if eslDEBUGLEVEL >= 3
5244   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
5245   /* FILE *fp1; fp1 = fopen("tmp.std_unnorm_hbemitmx",  "w"); cm_hb_emit_mx_Dump(fp1, cm, emit_mx); fclose(fp1); */
5246 #endif
5247 
5248   /* Step 2. Normalize l_pp and r_pp so that probability that
5249    *         each residue was emitted by any state is exactly
5250    *         1.0.
5251    */
5252   esl_vec_FSet(emit_mx->sum, (L+1), IMPOSSIBLE);
5253   for(v = 0; v < cm->M; v++) { /* we'll handle EL special */
5254     if(emit_mx->l_pp[v] != NULL) {
5255       for(i = imin[v]; i <= imax[v]; i++) {
5256 	ip_v = i - imin[v];
5257 	emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->l_pp[v][ip_v]);
5258       }
5259     }
5260     if(emit_mx->r_pp[v] != NULL) {
5261       for(j = jmin[v]; j <= jmax[v]; j++) {
5262 	jp_v = j - jmin[v];
5263 	emit_mx->sum[j] = FLogsum(emit_mx->sum[j], emit_mx->r_pp[v][jp_v]);
5264       }
5265     }
5266   }
5267   /* Handle EL deck, remember it is non-banded */
5268   if(emit_mx->l_pp[cm->M] != NULL) {
5269     for(i = 1; i <= L; i++) {
5270       emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->l_pp[v][i]);
5271     }
5272   }
5273 
5274   /* perform the check, if nec */
5275   if(do_check) {
5276     for(i = 1; i <= L; i++) {
5277       if((sreEXP2(emit_mx->sum[i]) < 0.98) || (sreEXP2(emit_mx->sum[i]) > 1.02)) {
5278 	ESL_FAIL(eslFAIL, errbuf, "residue %d has summed prob of %5.4f (2^%5.4f).\nMay not be a DP coding bug, see 'Note:' on precision in cm_EmitterPosterior().\n", i, (sreEXP2(emit_mx->sum[i])), emit_mx->sum[i]);
5279       }
5280       printf("HB i: %d | total: %10.4f\n", i, (sreEXP2(emit_mx->sum[i])));
5281     }
5282     ESL_DPRINTF1(("#DEBUG: cm_EmitterPosteriorHB() check passed, all residues have summed probability of emission of between 0.98 and 1.02.\n"));
5283   }
5284 
5285   /* normalize, using the sum vector */
5286   for(v = 0; v < cm->M; v++) {
5287     if(emit_mx->l_pp[v] != NULL) {
5288       for(i = imin[v]; i <= imax[v]; i++) {
5289 	ip_v = i - imin[v];
5290 	emit_mx->l_pp[v][ip_v] -= emit_mx->sum[i];
5291       }
5292     }
5293     if(emit_mx->r_pp[v] != NULL) {
5294       for(j = jmin[v]; j <= jmax[v]; j++) {
5295 	jp_v = j - jmin[v];
5296 	emit_mx->r_pp[v][jp_v] -= emit_mx->sum[j];
5297       }
5298     }
5299   }
5300   /* Handle EL deck, remember it is non-banded */
5301   if(emit_mx->l_pp[cm->M] != NULL) {
5302     for(i = 1; i <= L; i++) {
5303       emit_mx->l_pp[cm->M][i] -= emit_mx->sum[i];
5304     }
5305   }
5306 
5307   /* Step 3. Combine l_pp values for MATP_MP (v) and MATP_ML (y=v+1)
5308    *         states in the same node so they give the value defined
5309    *         above (i.e. l_pp[v] == l_pp[y] = the PP that either v or
5310    *         y emitted residue i) instead of l_pp[v] = PP that v
5311    *         emitted i, and l_pp[y] = PP that y emitted i.  And
5312    *         combine r_pp values for MATP_MP (v) and MATP_MR (y=v+2)
5313    *         states in an analogous way.
5314    */
5315   for(v = 0; v <= cm->M; v++) {
5316     if(cm->sttype[v] == MP_st) {
5317       /* we only change l_pp[v][i] and l_pp[v+1][i] if i is within both
5318        * state v and v+1's i band.
5319        */
5320       if(imax[v] >= 1 && imax[v+1] >= 1) {
5321 	in = ESL_MAX(imin[v], imin[v+1]);
5322 	ix = ESL_MIN(imax[v], imax[v+1]);
5323 	for(i = in; i <= ix; i++) {
5324 	  ip_v  = i - imin[v];
5325 	  ip_v2 = i - imin[v+1];
5326 	  emit_mx->l_pp[v][ip_v]    = FLogsum(emit_mx->l_pp[v][ip_v], emit_mx->l_pp[v+1][ip_v2]);
5327 	  emit_mx->l_pp[v+1][ip_v2] = emit_mx->l_pp[v][ip_v];
5328 	}
5329       }
5330       /* we only change r_pp[v][j] and r_pp[v+2][j] if j is within both
5331        * state v and v+2's j band.
5332        */
5333       if(jmax[v] >= 1 && jmax[v+2] >= 1) {
5334 	jn = ESL_MAX(jmin[v], jmin[v+2]);
5335 	jx = ESL_MIN(jmax[v], jmax[v+2]);
5336 	for(j = jn; j <= jx; j++) {
5337 	  jp_v  = j - jmin[v];
5338 	  jp_v2 = j - jmin[v+2];
5339 	  emit_mx->r_pp[v][jp_v]    = FLogsum(emit_mx->r_pp[v][jp_v], emit_mx->r_pp[v+2][jp_v2]);
5340 	  emit_mx->r_pp[v+2][jp_v2] = emit_mx->r_pp[v][jp_v];
5341 	}
5342       }
5343     }
5344   }
5345 
5346 #if eslDEBUGLEVEL >= 3
5347   /* Uncomment to dump matrix to file. This could be very large, so be careful. */
5348   /* FILE *fp2; fp2 = fopen("tmp.std_hbemitmx",  "w"); cm_hb_emit_mx_Dump(fp2, cm, emit_mx); fclose(fp2); */
5349 #endif
5350 
5351   return eslOK;
5352 }
5353 
5354 /* Function: cm_PostCode()
5355  * Date:     EPN 05.25.06 based on SRE's Postcode()
5356  *           from HMMER's postprob.c
5357  *
5358  * Purpose: Given a parse tree and a filled emit matrix calculate two
5359  *           strings that represents the confidence values on each
5360  *           aligned residue in the sequence.
5361  *
5362  *           The emit_mx values are:
5363  *           l_pp[v][i]: log of the posterior probability that state v emitted
5364  *                       residue i leftwise either at (if a match state) or
5365  *                       *after* (if an insert state) the left consensus
5366  *                       position modeled by state v's node.
5367  *
5368  *           r_pp[v][i]: log of the posterior probability that state v emitted
5369  *                       residue i rightwise either at (if a match state) or
5370  *                       *before* (if an insert state) the right consensus
5371  *                       position modeled by state v's node.
5372  *
5373  *           l_pp[v] is NULL for states that do not emit leftwise  (B,S,D,E,IR,MR)
5374  *           r_pp[v] is NULL for states that do not emit rightwise (B,S,D,E,IL,ML)
5375  *
5376  *           The PP string is 0..L-1  (L = len of target seq),
5377  *           so its in the coordinate system of the sequence string;
5378  *           off by one from dsq.
5379  *
5380  *           Values are 0,1,2,3,4,5,6,7,8,9,*:
5381  *           '0' = [0.00-0.05)
5382  *           '1' = [0.05-0.15)
5383  *           '2' = [0.15-0.25)
5384  *           '3' = [0.25-0.35)
5385  *           '4' = [0.35-0.45)
5386  *           '5' = [0.45-0.55)
5387  *           '6' = [0.55-0.65)
5388  *           '7' = [0.65-0.75)
5389  *           '8' = [0.75-0.85)
5390  *           '9' = [0.85-0.95)
5391  *           '*' = [0.95-1.00)
5392  *
5393  *           cm_PostCodeHB() is nearly the same function with the
5394  *           difference that HMM bands were used for the alignment,
5395  *           so we have to deal with offset issues.
5396  *
5397  *           Renamed from CMPostCode() [EPN, Wed Sep 14 06:20:35 2011].
5398  *
5399  * Args:     cm         - the model
5400  *           errbuf     - char buffer for reporting errors
5401  *           dsq        - the digitized sequence [1..L]
5402  *           L          - length of the dsq to align
5403  *           emit_mx    - the pre-filled emit matrix, must be non-NULL if do_optacc
5404  *           tr         - the parstree with the emissions we're setting PPs for
5405  *           ret_ppstr  - RETURN: a string of the PP code values (0..L-1)
5406  *           ret_avgp   - RETURN: the average PP of all aligned residues
5407  *
5408  * Returns:  <eslOK>     on success.
5409  * Throws:   <eslEINVAL> if a posterior probability is > 1.01 or less than -0.01.
5410  */
5411 char
Fscore2postcode(float sc)5412 Fscore2postcode(float sc)
5413 {
5414   float p = FScore2Prob(sc, 1.);
5415   return (p + 0.05 >= 1.0) ? '*' :  (char) ((p + 0.05) * 10.0) + '0';
5416 }
5417 
5418 /* Function: FScore2Prob()
5419  *
5420  * Purpose:  Convert a float log_2 odds score back to a probability;
5421  *           needs the null model probability, if any, to do the conversion.
5422  */
5423 float
FScore2Prob(float sc,float null)5424 FScore2Prob(float sc, float null)
5425 {
5426   /*printf("in FScore2Prob: %10.2f sreEXP2: %10.2f\n", sc, (sreEXP2(sc)));*/
5427   if (!(NOT_IMPOSSIBLE(sc))) return 0.;
5428   else                       return (null * sreEXP2(sc));
5429 }
5430 
5431 int
cm_PostCode(CM_t * cm,char * errbuf,int L,CM_EMIT_MX * emit_mx,Parsetree_t * tr,char ** ret_ppstr,float * ret_avgp)5432 cm_PostCode(CM_t *cm, char *errbuf, int L, CM_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp)
5433 {
5434   int   status;
5435   int   x, v, i, j, r; /* counters */
5436   char *ppstr;       /* the PP string, created here */
5437   float p;           /* a probability */
5438   float sum_logp;    /* log of summed probability of all residues emitted thus far */
5439 
5440   ESL_ALLOC(ppstr, (L+1) * sizeof(char));
5441   sum_logp = IMPOSSIBLE;
5442 
5443   /* go through each node of the parsetree and determine post code for emissions */
5444   for (x = 0; x < tr->n; x++)
5445     {
5446       v = tr->state[x];
5447       i = tr->emitl[x];
5448       j = tr->emitr[x];
5449 
5450       /* Only P, L, R, and EL states have emissions. */
5451       if(cm->sttype[v] == EL_st) { /* EL state, we have to handle this guy special */
5452 	for(r = i; r <= j; r++) { /* we have to annotate from residues i..j */
5453 	  ppstr[r-1] = Fscore2postcode(emit_mx->l_pp[v][r]);
5454 	  sum_logp   = FLogsum(sum_logp, emit_mx->l_pp[v][r]);
5455 	  /* make sure we've got a valid probability */
5456 	  p = FScore2Prob(emit_mx->l_pp[v][r], 1.);
5457 	  if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for EL state v: %d residue r: %d > 1.00 (%.2f)", v, r, p);
5458 	  if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for EL state v: %d residue r: %d < 0.00 (%.2f)", v, r, p);
5459 	}
5460       }
5461       if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
5462 	ppstr[i-1] = Fscore2postcode(emit_mx->l_pp[v][i]);
5463 	sum_logp   = FLogsum(sum_logp, emit_mx->l_pp[v][i]);
5464 	/* make sure we've got a valid probability */
5465 	p = FScore2Prob(emit_mx->l_pp[v][i], 1.);
5466 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for left state v: %d residue i: %d > 1.00 (%.2f)", v, i, p);
5467 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for left state v: %d residue i: %d < 0.00 (%.2f)", v, i, p);
5468       }
5469       if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
5470 	ppstr[j-1] = Fscore2postcode(emit_mx->r_pp[v][j]);
5471 	sum_logp   = FLogsum(sum_logp, emit_mx->r_pp[v][j]);
5472 	/* make sure we've got a valid probability */
5473 	p = FScore2Prob(emit_mx->r_pp[v][j], 1.);
5474 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for right state v: %d residue i: %d > 1.00 (%.2f)", v, j, p);
5475 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for right state v: %d residue i: %d < 0.00 (%.2f)", v, j, p);
5476       }
5477     }
5478   ppstr[L] = '\0';
5479 
5480   if(ret_ppstr != NULL) *ret_ppstr = ppstr; else free(ppstr);
5481   if(ret_avgp  != NULL) *ret_avgp  = sreEXP2(sum_logp) / (float) L;
5482   return eslOK;
5483 
5484  ERROR:
5485   ESL_FAIL(eslEMEM, errbuf, "cm_Postcode(): Memory allocation error.");
5486   return status; /* never reached */
5487 }
5488 
5489 
5490 int
cm_PostCodeHB(CM_t * cm,char * errbuf,int L,CM_HB_EMIT_MX * emit_mx,Parsetree_t * tr,char ** ret_ppstr,float * ret_avgp)5491 cm_PostCodeHB(CM_t *cm, char *errbuf, int L, CM_HB_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp)
5492 {
5493   int   status;
5494   int   x, v, i, j, r; /* counters */
5495   char *ppstr;       /* the PP string, created here */
5496   float p;           /* a probability */
5497   float sum_logp;    /* log of summed probability of all residues emitted thus far */
5498 
5499   /* variables used for HMM bands */
5500   int ip_v, jp_v; /* i, j offset within bands */
5501   /* ptrs to cp9b info, for convenience */
5502   CP9Bands_t *cp9b = cm->cp9b;
5503   int     *imin  = cp9b->imin;
5504   int     *imax  = cp9b->imax;
5505   int     *jmin  = cp9b->jmin;
5506   int     *jmax  = cp9b->jmax;
5507 
5508   ESL_ALLOC(ppstr, (L+1) * sizeof(char));
5509   sum_logp = IMPOSSIBLE;
5510 
5511   /* go through each node of the parsetree and determine post code for emissions */
5512   for (x = 0; x < tr->n; x++)
5513     {
5514       v = tr->state[x];
5515       i = tr->emitl[x];
5516       j = tr->emitr[x];
5517 
5518       /* Only P, L, R, and EL states have emissions. */
5519       if(cm->sttype[v] == EL_st) { /* EL state, we have to handle this guy special */
5520 	for(r = i; r <= j; r++) { /* we have to annotate from residues i..j */
5521 	  /* remember the EL deck is non-banded */
5522 	  ppstr[r-1] = Fscore2postcode(emit_mx->l_pp[v][r]);
5523 	  sum_logp   = FLogsum(sum_logp, emit_mx->l_pp[v][r]);
5524 	  /* make sure we've got a valid probability */
5525 	  p = FScore2Prob(emit_mx->l_pp[v][r], 1.);
5526 	  if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for EL state v: %d residue r: %d > 1.00 (%.2f)", v, r, p);
5527 	  if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for EL state v: %d residue r: %d < 0.00 (%.2f)", v, r, p);
5528 	}
5529       }
5530       if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
5531 	ip_v = i - imin[v];
5532 	assert(i >= imin[v] && i <= imax[v]);
5533 	ESL_DASSERT1((i >= imin[v] && i <= imax[v]));
5534 	ppstr[i-1] = Fscore2postcode(emit_mx->l_pp[v][ip_v]);
5535 	sum_logp   = FLogsum(sum_logp, emit_mx->l_pp[v][ip_v]);
5536 	/* make sure we've got a valid probability */
5537 	p = FScore2Prob(emit_mx->l_pp[v][ip_v], 1.);
5538 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for left state v: %d residue i: %d > 1.00 (%.2f)", v, i, p);
5539 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for left state v: %d residue i: %d < 0.00 (%.2f)", v, i, p);
5540       }
5541       if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
5542 	jp_v = j - jmin[v];
5543 	assert(j >= jmin[v] && j <= jmax[v]);
5544 	ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
5545 	ppstr[j-1] = Fscore2postcode(emit_mx->r_pp[v][jp_v]);
5546 	sum_logp   = FLogsum(sum_logp, emit_mx->r_pp[v][jp_v]);
5547 	/* make sure we've got a valid probability */
5548 	p = FScore2Prob(emit_mx->r_pp[v][jp_v], 1.);
5549 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for right state v: %d residue i: %d > 1.00 (%.2f)", v, j, p);
5550 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_PostCode(): probability for right state v: %d residue i: %d < 0.00 (%.2f)", v, j, p);
5551       }
5552     }
5553   ppstr[L] = '\0';
5554 
5555   if(ret_ppstr != NULL) *ret_ppstr = ppstr; else free(ppstr);
5556   if(ret_avgp  != NULL) *ret_avgp  = sreEXP2(sum_logp) / (float) L;
5557   ESL_DPRINTF1(("#DEBUG: cm_PostcodeHB(): average pp %.4f\n", sreEXP2(sum_logp) / (float) L));
5558   /*printf("cm_PostcodeHB(): average pp %.4f\n", sreEXP2(sum_logp) / (float) L);*/
5559 
5560   return eslOK;
5561 
5562  ERROR:
5563   ESL_FAIL(eslEMEM, errbuf, "cm_PostcodeHB(): Memory allocation error.");
5564   return status; /* never reached */
5565 }
5566 
5567 /* Function: cm_InitializeOptAccShadowDZero()
5568  * Date:     EPN, Fri Nov 11 13:09:14 2011
5569  *
5570  * Purpose:  Initialize a optimal accuracy shadow (traceback) matrix
5571  *           for d == 0, based on transition scores. Optimal accuracy
5572  *           traceback matrices are special when d==0 because only
5573  *           emissions contribute to score so the value when d==0 is
5574  *           always IMPOSSIBLE. So d==0 cells are never modified
5575  *           during the OA DP recursion.
5576  *
5577  *           In this function we determine the appropriate state to
5578  *           traceback to for d==0 for all states v and endpoints
5579  *           j. If local ends are off, this is trivial; it is simply
5580  *           the child state y for which StateDelta(y) == 0 (there is
5581  *           always exactly 1 such child state for each v). If any
5582  *           such state is entered for d == 0 in the optimally
5583  *           accurate parsetree, the parse will continue along
5584  *           delete->delete transitions (all with d==0) until an E_st
5585  *           (or E_st's if we go through a B_st) is reached.
5586  *
5587  *           If local ends are on, it is more complex because we
5588  *           could do a local end instead of a string of deletes until
5589  *           an end is reached. We determine the score of the
5590  *           transitions from the current state v through y to the
5591  *           nearest E_st(s) and if it is less than the score for
5592  *           entering a EL state we set the shadow matrix to y, else
5593  *           we set it to USED_EL.
5594  *
5595  *           In some cases, we initialize to USED_EL for states v for
5596  *           which ELs are illegal (not a MATP_MP, MATL_ML, MATR_MR,
5597  *           BEGL_S or BEGR_nd). This means that the eventual optimal
5598  *           accuracy parsetree may contain an illegal EL, but I think
5599  *           this is unavoidable.
5600  *
5601  *           Upon entrance, yshadow should be initialized to USED_EL
5602  *           for all values.
5603  *
5604  *           Note that if we didn't call this function, the optimally
5605  *           accurate parsetree would not be affected, nor its score.
5606  *           This function is only useful because it affects the
5607  *           output of the parsetree's alignment by only using a zero
5608  *           length EL transitions only when it is less expensive than
5609  *           a string of deletes.
5610  *
5611  *           If called by a truncated optimal accuracy function
5612  *           (cm_TrOptAccAlign()), yshadow is really a <Jyshadow>
5613  *           matrix from a CM_TR_SHADOW_MX object. Otherwise it is a
5614  *           <yshadow> matrix from a CM_SHADOW_MX object.
5615  *
5616  * Args:     cm         - the model, used only for its alphabet and null model
5617  *           errbuf     - for reporting errors
5618  *           yshadow    - the shadow matrix to updated, only values for which
5619  *                        d==0 will be modified.
5620  *           L          - length of the sequence we're aligning
5621  *
5622  *
5623  * Returns:  eslOK on success
5624  *
5625  * Throws:   eslEMEM on memory error.
5626  */
5627 int
cm_InitializeOptAccShadowDZero(CM_t * cm,char * errbuf,char *** yshadow,int L)5628 cm_InitializeOptAccShadowDZero(CM_t *cm, char *errbuf, char ***yshadow, int L)
5629 {
5630   int   status;
5631   float *esc;  /* [0..v..M-1] summed transition score for getting from v to nearest E_st(s)
5632 		* through only delete states */
5633   float endsc; /* score for transitioning to an EL state */
5634   int have_el; /* are local ends on? */
5635   int v;       /* state counter */
5636   int j;       /* sequence position */
5637   int y, z;    /* BEGL_S and BEGR_S states */
5638   int sd;      /* StateDelta(v) */
5639   int yoffset; /* child state index */
5640 
5641   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
5642   if(have_el) {
5643     ESL_ALLOC(esc, sizeof(float) * cm->M);
5644     esl_vec_FSet(esc, cm->M, 0.);
5645     /* determine score for transitioning to an EL (same for all legal states) */
5646     v = 0; while(! NOT_IMPOSSIBLE(cm->endsc[v])) v++;
5647     endsc = cm->endsc[v];
5648     /*printf("endsc: %.4f end %.4f\n", endsc, cm->end[v]);*/
5649   }
5650   else {
5651     esc = NULL;
5652     endsc = IMPOSSIBLE;
5653   }
5654 
5655   for(v = cm->M-1; v >= 0; v--) {
5656     sd = StateDelta(cm->sttype[v]);
5657     if(cm->sttype[v] == E_st) {
5658       if(esc != NULL) esc[v] = 0.;
5659     }
5660     else if(cm->sttype[v] == B_st) {
5661       if(esc != NULL) {
5662 	y = cm->cfirst[v]; /* left  subtree */
5663 	z = cm->cnum[v];   /* right subtree */
5664 	esc[v] = esc[y] + esc[z];
5665       }
5666     }
5667     else {
5668       /* determine the one and only child state y for which StateDelta(y) == 0 */
5669       y = cm->cfirst[v];
5670       while(StateDelta(cm->sttype[y]) != 0) y++;
5671       yoffset = y-cm->cfirst[v];
5672       assert(cm->ndidx[v] == (cm->ndidx[y]-1));
5673       if(esc != NULL) {
5674 	esc[v] = esc[y] + cm->tsc[v][yoffset];
5675 	if(endsc > esc[v]) yoffset = USED_EL;
5676 	/* else yoffset is not changed */
5677 
5678 	/*printf("EL: %10.4f  d->d->e %10.4f  ", endsc, esc[v]);
5679 	  if(yoffset != USED_EL) printf("  path for v: %4d %4s %2s is through deletes!\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]));
5680 	  else printf("\n");
5681 	*/
5682       }
5683       for(j = sd; j <= L; j++) yshadow[v][j][sd] = yoffset;
5684     }
5685   }
5686 
5687   if(esc != NULL) free(esc);
5688   return eslOK;
5689 
5690  ERROR:
5691   ESL_FAIL(eslEMEM, errbuf, "Out of memory");
5692 }
5693 
5694 
5695 /* Function: cm_InitializeOptAccShadowDZeroHB()
5696  * Date:     EPN, Fri Nov 11 14:00:55 2011
5697  *
5698  * Purpose:  Same as cm_InitializeOptAccShadowDZero() but for HMM
5699  *           banded matrices, see that function for more information.
5700  *
5701  * Args:     cm         - the model, used only for its alphabet and null model
5702  *           cp9b       - CP9 Bands for current sequence
5703  *           errbuf     - for reporting errors
5704  *           yshadow    - the shadow matrix to updated, only values for which
5705  *                        d==0 will be modified.
5706  *           L          - length of the sequence we're aligning
5707  *
5708  *
5709  * Returns:  eslOK on success
5710  *
5711  * Throws:   eslEMEM on memory error.
5712  */
5713 int
cm_InitializeOptAccShadowDZeroHB(CM_t * cm,CP9Bands_t * cp9b,char * errbuf,char *** yshadow,int L)5714 cm_InitializeOptAccShadowDZeroHB(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, char ***yshadow, int L)
5715 {
5716   int   status;
5717   float *esc;  /* [0..v..M-1] summed transition score for getting from v to nearest E_st(s)
5718 		* through only delete states */
5719   float endsc; /* score for transitioning to an EL state */
5720   int have_el; /* are local ends on? */
5721   int v;       /* state counter */
5722   int j;       /* sequence position */
5723   int y, z;    /* BEGL_S and BEGR_S states */
5724   int sd;      /* StateDelta(v) */
5725   int yoffset; /* child state index */
5726 
5727   /* variables needed because we've got HMM bands */
5728   int sdr;     /* StateRightDelta(v) */
5729   int jp_v;    /* j offset for state v given HMM bands */
5730   int jp_y;    /* j offset for state y given HMM bands */
5731   int dp_v;    /* d offset for state v given HMM bands */
5732 
5733   /* pointers to cp9b data for convenience */
5734   int         *jmin = cp9b->jmin;
5735   int         *jmax = cp9b->jmax;
5736   int       **hdmin = cp9b->hdmin;
5737   int       **hdmax = cp9b->hdmax;
5738 
5739   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
5740   if(have_el) {
5741     ESL_ALLOC(esc, sizeof(float) * cm->M);
5742     esl_vec_FSet(esc, cm->M, IMPOSSIBLE);
5743     /* determine score for transitioning to an EL (same for all legal states) */
5744     v = 0; while(! NOT_IMPOSSIBLE(cm->endsc[v])) v++;
5745     endsc = cm->endsc[v];
5746     /*printf("endsc: %.4f end %.4f\n", endsc, cm->end[v]);*/
5747   }
5748   else {
5749     esc = NULL;
5750     endsc = IMPOSSIBLE;
5751   }
5752 
5753   for(v = cm->M-1; v >= 0; v--) {
5754     if(cm->cp9b->Jvalid[v]) { /* only valid v values will have non-impossible esc[v] values */
5755       sd  = StateDelta(cm->sttype[v]);
5756       sdr = StateRightDelta(cm->sttype[v]);
5757       if(cm->sttype[v] == E_st) {
5758 	if(esc != NULL) esc[v] = 0.;
5759       }
5760       else if(cm->sttype[v] == B_st) {
5761 	if(esc != NULL) {
5762 	  y = cm->cfirst[v]; /* left  subtree */
5763 	  z = cm->cnum[v];   /* right subtree */
5764 	  esc[v] = esc[y] + esc[z];
5765 	}
5766       }
5767       else {
5768 	/* determine the one and only child state y for which StateDelta(y) == 0 */
5769 	y = cm->cfirst[v];
5770 	while(StateDelta(cm->sttype[y]) != 0) y++;
5771 	yoffset = y-cm->cfirst[v];
5772 	assert(cm->ndidx[v] == (cm->ndidx[y]-1));
5773 	if(esc != NULL) {
5774 	esc[v] = esc[y] + cm->tsc[v][yoffset];
5775 	if(endsc > esc[v]) yoffset = USED_EL;
5776 	/* else yoffset is not changed */
5777 
5778 #if 0
5779 	printf("EL: %10.4f  d->d->e %10.4f  ", endsc, esc[v]);
5780 	if(yoffset != USED_EL) printf("  path for v %4d %4s %2s is through deletes!\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]));
5781 	else printf("\n");
5782 #endif
5783 
5784 	}
5785 	for(j = ESL_MAX(sd, jmin[v]); j <= jmax[v]; j++) {
5786 	  jp_v = j-jmin[v];
5787 	  if(hdmin[v][jp_v] <= hdmax[v][jp_v]) { /* at least one valid d exists for this v and j */
5788 	    if((j-sdr) >= jmin[y] && (j-sdr) <= jmax[y]) { /* j-sdr is valid for state y */
5789 	      jp_y = j - sdr - jmin[y];
5790 	      if(sd >= hdmin[v][jp_v] && sd <= hdmax[v][jp_v] && /* d==sd is valid for state v and end posn j */
5791 		 0  >= hdmin[y][jp_y] &&  0 <= hdmax[y][jp_y]) { /* d==0  is valid for state y and end posn j-sdr */
5792 		dp_v = sd - hdmin[v][jp_v];
5793 		yshadow[v][jp_v][dp_v] = yoffset;
5794 	      }
5795 	    }
5796 	  }
5797 	}
5798       }
5799     }
5800   }
5801 
5802   if(esc != NULL) free(esc);
5803   return eslOK;
5804 
5805  ERROR:
5806   ESL_FAIL(eslEMEM, errbuf, "Out of memory");
5807 }
5808 
5809 
5810 /*****************************************************************
5811  * Benchmark driver
5812  *****************************************************************/
5813 #ifdef IMPL_ALIGN_BENCHMARK
5814 /* Next line is not optimized (debugging on) on MacBook Pro:
5815  * gcc   -o benchmark-align -std=gnu99 -g -Wall -I. -L. -I../hmmer/src -L../hmmer/src -I../easel -L../easel -DIMPL_ALIGN_BENCHMARK cm_dpalign.c -linfernal -lhmmer -leasel -lm
5816  * Next line is optimized (debugging not on) on wyvern:
5817  * gcc   -o benchmark-align -std=gnu99 -O3 -fomit-frame-pointer -malign-double -fstrict-aliasing -pthread -I. -L. -I../hmmer/src -L../hmmer/src -I../easel -L../easel -DIMPL_ALIGN_BENCHMARK cm_dpalign.c -linfernal -lhmmer -leasel -lm
5818  * ./benchmark-align <cmfile>
5819  */
5820 
5821 #include "esl_config.h"
5822 #include "config.h"
5823 
5824 #include <stdio.h>
5825 #include <stdlib.h>
5826 #include <string.h>
5827 #include <time.h>
5828 
5829 #include "easel.h"
5830 #include <esl_getopts.h>
5831 #include <esl_histogram.h>
5832 #include <esl_sqio.h>
5833 #include <esl_stats.h>
5834 #include <esl_stopwatch.h>
5835 #include <esl_vectorops.h>
5836 #include <esl_wuss.h>
5837 
5838 #include "hmmer.h"
5839 
5840 #include "infernal.h"
5841 
5842 static ESL_OPTIONS options[] = {
5843   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
5844   { "-h",        eslARG_NONE,    NULL, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",           0 },
5845   { "-l",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "configure CM/HMM for local alignment", 0 },
5846   { "--cykout",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "run CYKOutside, to make sure it agrees with CYK (Inside)", 0 },
5847   { "--sums",    eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "use posterior sums during HMM band calculation (widens bands)", 0 },
5848   { "--dlev",    eslARG_INT,    "0",   NULL, "0<=n<=3",NULL,NULL,NULL, "set verbosity of debugging print statements to <n>", 0 },
5849   { "--hmmcheck",eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "check that HMM posteriors are correctly calc'ed", 0 },
5850   { "--cmcheck", eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "check that CM posteriors are correctly calc'ed", 0 },
5851   { "--optacc",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "also execute optimal accuracy HMM banded alignment alg", 0 },
5852   { "--tau",     eslARG_REAL,   "5e-6",NULL, "0<x<1",NULL, NULL, NULL, "set tail loss prob for HMM bands to <x>", 0 },
5853   { "--post",   eslARG_NONE,    FALSE, NULL, NULL,  NULL,  NULL, NULL, "also execute fast float HMM banded Inside/Outside alignment algs", 0 },
5854   { "--mxsize",  eslARG_REAL, "256.0", NULL, "x>0.",NULL,  NULL, NULL, "set maximum allowable DP matrix size to <x> (Mb)", 0 },
5855   { "--nonbanded",eslARG_NONE,  FALSE, NULL, NULL,  NULL,  NULL, NULL, "also execute non-banded alignment algorithms", 0 },
5856   { "--tr",       eslARG_NONE,  FALSE, NULL, NULL,  NULL,  NULL, NULL, "dump parsetrees to stdout", 0 },
5857   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
5858 };
5859 static char usage[]  = "[-options] <cmfile> <seqfile>";
5860 static char banner[] = "benchmark driver for fast HMM banded CYK alignment and scanning algorithm";
5861 
5862 int
main(int argc,char ** argv)5863 main(int argc, char **argv)
5864 {
5865   int status;
5866   ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
5867   CM_t           *cm;
5868   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
5869   ESL_ALPHABET   *abc     = NULL;
5870   int             i;
5871   float           sc;
5872   float           pp;
5873   char           *cmfile  = esl_opt_GetArg(go, 1);
5874   char           *seqfile = esl_opt_GetArg(go, 2);
5875   CM_FILE        *cmfp  = NULL;  /* open input CM file stream */
5876   ESL_SQFILE     *sqfp  = NULL;  /* open sequence input file stream */
5877   ESL_SQ         *sq    = NULL;  /* a sequence */
5878   int             L;             /* length of sequence */
5879   char            errbuf[eslERRBUFSIZE];
5880   float           size_limit = esl_opt_GetReal(go, "--mxsize");
5881   int             do_check   = esl_opt_GetBoolean(go, "--cmcheck");
5882   float           parsetree_sc, parsetree_struct_sc;
5883   Parsetree_t    *tr    = NULL;
5884 
5885   /* open CM file */
5886   if ((status = cm_file_Open(cmfile, NULL, FALSE, &(cmfp), errbuf)) != eslOK) cm_Fail(errbuf);
5887   if ((status = cm_file_Read(cmfp, TRUE, &abc, &cm))                != eslOK) cm_Fail(cmfp->errbuf);
5888   cm_file_Close(cmfp);
5889 
5890   /* open the sequence file */
5891   status = esl_sqfile_OpenDigital(cm->abc, seqfile, eslSQFILE_UNKNOWN, NULL, &sqfp);
5892   if (status == eslENOTFOUND)    esl_fatal("File %s doesn't exist or is not readable\n", seqfile);
5893   else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of sequence file %s\n", seqfile);
5894   else if (status == eslEINVAL)  esl_fatal("Can't autodetect stdin or .gz.");
5895   else if (status != eslOK)      esl_fatal("Sequence file open failed with error %d.\n", status);
5896 
5897   /* configure CM */
5898   cm->align_opts  |= CM_ALIGN_HBANDED;
5899   if(esl_opt_GetBoolean(go, "--sums")) cm->align_opts |= CM_ALIGN_SUMS;
5900   if(esl_opt_GetBoolean(go, "-l")) {
5901     cm->config_opts  |= CM_CONFIG_LOCAL;
5902     cm->config_opts  |= CM_CONFIG_HMMLOCAL;
5903     cm->config_opts  |= CM_CONFIG_HMMEL;
5904   }
5905   if(esl_opt_GetBoolean(go, "--hmmcheck")) cm->align_opts |= CM_ALIGN_CHECKFB;
5906   if(esl_opt_GetBoolean(go, "--cmcheck"))  cm->align_opts |= CM_ALIGN_CHECKINOUT;
5907   cm->tau = esl_opt_GetReal(go, "--tau");
5908 
5909   if((status = cm_Configure(cm, errbuf, -1)) != eslOK) cm_Fail(errbuf);
5910 
5911   /* setup logsum lookups (could do this only if nec based on options, but this is safer) */
5912   init_ilogsum();
5913   FLogsumInit();
5914 
5915   i = 0;
5916   sq = esl_sq_CreateDigital(cm->abc);
5917   while((status = esl_sqio_Read(sqfp, sq)) == eslOK) {
5918     i++;
5919     L = sq->n;
5920 
5921     esl_stopwatch_Start(w);
5922     if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, sq->dsq, 1, L, cm->cp9b, FALSE, PLI_PASS_STD_ANY, 0)) != eslOK) cm_Fail(errbuf);
5923     esl_stopwatch_Stop(w);
5924     printf("%4d %-30s %17s", i, "Exptl Band calc:", "");
5925     esl_stopwatch_Display(stdout, w, "CPU time: ");
5926 
5927     esl_stopwatch_Start(w);
5928     if((status = cm_AlignHB(cm, errbuf, sq->dsq, L, size_limit, FALSE, FALSE, cm->hb_mx, cm->hb_shmx, NULL, NULL, NULL, NULL, &tr, &pp, &sc)) != eslOK) cm_Fail(errbuf);
5929     printf("%4d %-30s %10.4f bits ", (i), "cm_AlignHB() CYK:", sc);
5930     esl_stopwatch_Stop(w);
5931     esl_stopwatch_Display(stdout, w, " CPU time: ");
5932 
5933     if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, sq->dsq);
5934     ParsetreeScore(cm, NULL, NULL, tr, sq->dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
5935     FreeParsetree(tr);
5936     printf("Parsetree score      : %.4f           (FULL LENGTH CYK)\n", parsetree_sc);
5937 
5938     if(esl_opt_GetBoolean(go, "--cykout")) {
5939       esl_stopwatch_Start(w);
5940       if((status = cm_CYKOutsideAlignHB(cm, errbuf, sq->dsq, L, size_limit, TRUE, cm->hb_omx, cm->hb_mx, &sc)) != eslOK) cm_Fail(errbuf);
5941       printf("%4d %-30s %10.4f bits ", (i), "cm_Align() CYK:", sc);
5942       esl_stopwatch_Stop(w);
5943       esl_stopwatch_Display(stdout, w, " CPU time: ");
5944     }
5945 
5946     if(esl_opt_GetBoolean(go, "--nonbanded")) {
5947       esl_stopwatch_Start(w);
5948       if((status = cm_Align(cm, errbuf, sq->dsq, L, size_limit, FALSE, FALSE, cm->nb_mx, cm->nb_shmx, NULL, cm->nb_emx, NULL, NULL, &tr, &pp, &sc)) != eslOK) cm_Fail(errbuf);
5949       printf("%4d %-30s %10.4f bits ", (i), "cm_Align() CYK:", sc);
5950       esl_stopwatch_Stop(w);
5951       esl_stopwatch_Display(stdout, w, " CPU time: ");
5952 
5953       if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, sq->dsq);
5954       ParsetreeScore(cm, NULL, NULL, tr, sq->dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
5955       FreeParsetree(tr);
5956       printf("Parsetree score      : %.4f           (FULL LENGTH CYK)\n", parsetree_sc);
5957 
5958       if(esl_opt_GetBoolean(go, "--cykout")) {
5959 	esl_stopwatch_Start(w);
5960 	if((status = cm_CYKOutsideAlign(cm, errbuf, sq->dsq, L, size_limit, TRUE, cm->nb_omx, cm->nb_mx, &sc)) != eslOK) cm_Fail(errbuf);
5961 	printf("%4d %-30s %10.4f bits ", (i), "cm_Align() CYK:", sc);
5962 	esl_stopwatch_Stop(w);
5963 	esl_stopwatch_Display(stdout, w, " CPU time: ");
5964       }
5965     }
5966     printf("\n");
5967 
5968     if(esl_opt_GetBoolean(go, "--post")) {
5969       esl_stopwatch_Start(w);
5970       /* need alpha matrix from Inside to do Outside */
5971       if((status = cm_InsideAlignHB(cm, errbuf, sq->dsq, L, size_limit, cm->hb_mx, &sc)) != eslOK) cm_Fail(errbuf);
5972       printf("%4d %-30s %10.4f bits ", (i), "cm_InsideAlignHB():", sc);
5973       esl_stopwatch_Stop(w);
5974       esl_stopwatch_Display(stdout, w, " CPU time: ");
5975 
5976       esl_stopwatch_Start(w);
5977       /* need alpha matrix from Inside to do Outside */
5978       if((status = cm_OutsideAlignHB(cm, errbuf, sq->dsq, L, size_limit, do_check, cm->hb_omx, cm->hb_mx, &sc)) != eslOK) cm_Fail(errbuf);
5979       printf("%4d %-30s %10.4f bits ", (i), "cm_OutsideAlignHB():", sc);
5980       esl_stopwatch_Stop(w);
5981       esl_stopwatch_Display(stdout, w, " CPU time: ");
5982 
5983       if(esl_opt_GetBoolean(go, "--nonbanded")) {
5984 	esl_stopwatch_Start(w);
5985 	/* need alpha matrix from Inside to do Outside */
5986 	if((status = cm_InsideAlign(cm, errbuf, sq->dsq, L, size_limit, cm->nb_mx, &sc)) != eslOK) cm_Fail(errbuf);
5987 	printf("%4d %-30s %10.4f bits ", (i), "cm_InsideAlign():", sc);
5988 	esl_stopwatch_Stop(w);
5989 	esl_stopwatch_Display(stdout, w, " CPU time: ");
5990 
5991 	esl_stopwatch_Start(w);
5992 	/* need alpha matrix from Inside to do Outside */
5993 	if((status = cm_OutsideAlign(cm, errbuf, sq->dsq, L, size_limit, do_check, cm->nb_omx, cm->nb_mx, &sc)) != eslOK) cm_Fail(errbuf);
5994 	printf("%4d %-30s %10.4f bits ", (i), "cm_OutsideAlign():", sc);
5995 	esl_stopwatch_Stop(w);
5996 	esl_stopwatch_Display(stdout, w, " CPU time: ");
5997       }
5998     }
5999 
6000     if(esl_opt_GetBoolean(go, "--optacc")) {
6001       esl_stopwatch_Start(w);
6002       if((status = cm_AlignHB(cm, errbuf, sq->dsq, L, size_limit, TRUE, FALSE, cm->hb_mx, cm->hb_shmx, cm->hb_omx, cm->hb_emx, NULL, NULL, &tr, &pp, &sc)) != eslOK) cm_Fail(errbuf);
6003       printf("%4d %-30s %10.4f avgpp ", (i), "cm_AlignHB() OA:", pp);
6004       esl_stopwatch_Stop(w);
6005       esl_stopwatch_Display(stdout, w, " CPU time: ");
6006 
6007       if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, sq->dsq);
6008       ParsetreeScore(cm, NULL, NULL, tr, sq->dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
6009       FreeParsetree(tr);
6010       printf("Parsetree score      : %.4f           (FULL LENGTH OPTACC)\n", parsetree_sc);
6011 
6012       if(esl_opt_GetBoolean(go, "--nonbanded")) {
6013 	esl_stopwatch_Start(w);
6014 	if((status = cm_Align(cm, errbuf, sq->dsq, L, size_limit, TRUE, FALSE, cm->nb_mx, cm->nb_shmx, cm->nb_omx, cm->nb_emx, NULL, NULL, &tr, &pp, &sc)) != eslOK) cm_Fail(errbuf);
6015 	printf("%4d %-30s %10.4f avgpp ", (i), "cm_Align() OA:", sc);
6016 	esl_stopwatch_Stop(w);
6017 	esl_stopwatch_Display(stdout, w, " CPU time: ");
6018 
6019 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, sq->dsq);
6020 	ParsetreeScore(cm, NULL, NULL, tr, sq->dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
6021 	FreeParsetree(tr);
6022 	printf("Parsetree score      : %.4f           (FULL LENGTH OPTACC)\n", parsetree_sc);
6023       }
6024     }
6025     printf("\n");
6026     esl_sq_Reuse(sq);
6027   }
6028   if(status != eslEOF) cm_Fail("ERROR reading sequence file, sequence number %d\n", i);
6029 
6030   FreeCM(cm);
6031   esl_sq_Destroy(sq);
6032   esl_alphabet_Destroy(abc);
6033   esl_stopwatch_Destroy(w);
6034   esl_getopts_Destroy(go);
6035   esl_sqfile_Close(sqfp);
6036 
6037   return 0;
6038 }
6039 #endif /*IMPL_ALIGN_BENCHMARK*/
6040