1 /* cm_dpalign_trunc.c
2  *
3  * DP functions for truncated HMM banded and non-banded, non-D&C CM
4  * alignment of a full target sequence.
5  *
6  * All functions use a DP matrix and or shadow matrix, either
7  * non-banded (CM_TR_MX, CM_TR_SHADOW_MX) or HMM banded (CM_TR_HB_MX,
8  * CM_TR_HB_SHADOW_MX).  The HMM banded matrices only have cells
9  * within bands allocated, and further for each state v, J, L, R, or T
10  * matrix cells are only available if cp9b->do_{J,L,R,T}[v]. The bands
11  * are derived from a HMM Forward/Backward alignment of the target
12  * sequence and are stored in a CP9Bands_t object, a pointer to which
13  * must exist in the cm (CM_t object).
14  *
15  * The non-banded, non-D&C alignment functions are mainly useful for
16  * understanding and/or debugging the HMM banded versions.  These are
17  * consistent (same logic/code organization) with their HMM banded
18  * counterparts. They are memory intensive. For small memory
19  * non-banded alignment functions see truncyk.c.
20  *
21  * Many of the functions here were based on analogous ones for
22  * standard (non-truncated) CYK/Inside/Outside alignment in
23  * cm_dpalign.c.
24  *
25  * List of functions:
26  * non-banded version          HMM banded version
27  * -------------------------   -----------------------
28  * cm_tr_alignT()              cm_tr_alignT_hb()
29  * cm_TrAlignSizeNeeded()      cm_TrAlignSizeNeededHB()
30  * cm_TrAlign()                cm_TrAlignHB()
31  * cm_TrCYKInsideAlign()       cm_TrCYKInsideAlignHB()
32  * cm_TrInsideAlign()          cm_TrInsideAlignHB()
33  * cm_TrOptAccAlign()          cm_TrOptAccAlignHB()
34  * cm_TrCYKOutsideAlign()*     cm_TrCYKOutsideAlignHB()*
35  * cm_TrOutsideAlign()         cm_TrOutsideAlignHB()
36  * cm_TrPosterior()            cm_TrPosteriorHB()
37  * cm_TrEmitterPosterior()     cm_TrEmitterPosteriorHB()
38  *
39  * * cm_TrCYKOutsideAlign() and cm_TrCYKOutsideAlignHB() are for
40  * reference and debugging only. They're not called by any of the main
41  * Infernal programs, only by test programs.
42  *
43  * Notes specific to truncated alignment:
44  *
45  * In truncated alignment, four matrices (J, L, R, and T) exist where
46  * there was only one in standard alignment (which is essentially
47  * implicitly the J matrix).  For each mode, TrCYKInside and TrInside
48  * functions will find the optimal alignment for that mode and store
49  * it in {J,L,R,T}alpha[0][L][L] (or the HMM banded equivalent
50  * {J,L,R,T}alpha[0][jp_0][Lp_0]). The overall optimal alignment is
51  * the highest scoring optimal alignment in any mode. That is, in
52  * TrInside we have to choose the mode and then the Inside score is
53  * the score of all alignments in that mode, not the score of all
54  * alignments in any mode.
55  *
56  * Naively, in non-banded mode we have to fill in all four matrices,
57  * but in many cases we already know the marginal mode of the entire
58  * alignment. This will always be TRUE in TrOutside, TrPosterior and
59  * TrEmitterPosterior because we must have already done TrCYKInside or
60  * TrInside and so we already know the optimal mode. In TrOutside it
61  * is vital that we only consider alignments in the optimal mode or
62  * else the TrPosterior values will be invalid (see ELN3 p.10-11). In
63  * some cases we we will know the optimal mode when we enter
64  * TrCYKInside or TrInside functions, i.e. if we're in a search
65  * pipeline and we've already determined there is a hit in a
66  * particular marginal mode by running a scanning TrCYK or TrInside
67  * function (see cm_dpsearch_trunc.c).
68  *
69  * If we know the optimal mode, stored in <optimal_mode>, upon
70  * entering any of these functions we can usually save time by only
71  * filling in a subset of the four matrices, this is controlled within
72  * the functions by the <fill_J>, <fill_L>, <fill_R> and <fill_T>
73  * parameters. Specifically:
74  *
75  * <optimal_mode>       <fill_J>  <fill_L>  <fill_R>  <fill_T>
76  * ---------------  --------  --------  --------  --------
77  * TRMODE_J         TRUE      FALSE     FALSE     FALSE
78  * TRMODE_L         TRUE      TRUE      FALSE     FALSE
79  * TRMODE_R         TRUE      FALSE     TRUE      FALSE
80  * TRMODE_T         TRUE      TRUE      TRUE      TRUE
81  * TRMODE_UNKNOWN   TRUE      TRUE      TRUE      TRUE
82  *
83  * The <fill_*> values are set in cm_TrFillFromMode(). We then use the
84  * <fill_*> variables to save time in the DP recursions by skipping
85  * unnecessary matrices. Since fill_J is always TRUE we don't actually
86  * need a fill_J parameter.  (For Outside functions fill_T
87  * is implicitly TRUE but we only have to set Tbeta values for a
88  * single cell per B state, the one corresponding to a full alignment
89  * of all 1..L residues are that state. This is done when initializing
90  * the Tbeta matrix.)
91  *
92  * <fill_*> values are used in both non-banded and HMM banded
93  * matrices. In HMM banded functions we have similar per-state
94  * information stored in cp9b->Jvalid[], cp9b->Lvalid[],
95  * cp9b->Rvalid[], cp9b->Tvalid[] arrays which dictate if we need to
96  * fill in J,L,R,T decks for each state. These were determined based
97  * on the HMM posterior values for the start and end positions of the
98  * alignment (see
99  * hmmband.c:cp9_MarginalCandidatesFromStartEndPositions()).  These
100  * values can be used in combination with the fill_* values, that is,
101  * both are relevant. For example, if fill_L is FALSE, the we never
102  * fill in L matrix values for any state. But if it is TRUE, we only
103  * fill in L matrix values for those states for which cp9b->Lvalid[]
104  * is TRUE.
105  *
106  * EPN, Wed Sep  7 12:13:00 2011
107  */
108 
109 #include "esl_config.h"
110 #include "p7_config.h"
111 #include "config.h"
112 
113 #include <stdio.h>
114 #include <stdlib.h>
115 #include <assert.h>
116 #include <math.h>
117 
118 #include "easel.h"
119 #include "esl_sqio.h"
120 #include "esl_stack.h"
121 #include "esl_stopwatch.h"
122 #include "esl_vectorops.h"
123 
124 #include "hmmer.h"
125 
126 #include "infernal.h"
127 
128 static int   cm_tr_alignT   (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char optimal_mode, int pass_idx, int do_optacc, CM_TR_MX    *mx, CM_TR_SHADOW_MX    *shmx, CM_TR_EMIT_MX    *emit_mx, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc_or_pp);
129 static int   cm_tr_alignT_hb(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char optimal_mode, int pass_idx, int do_optacc, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_EMIT_MX *emit_mx, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc_or_pp);
130 
131 /* Function: cm_tr_alignT()
132  * Date:     EPN, Sat Sep 10 11:25:37 2011
133  *           EPN, Sun Nov 18 19:21:30 2007 [cm_alignT()]
134  *
135  * Note:     based on insideT() [SRE, Fri Aug 11 12:08:18 2000 [Pittsburgh]]
136  *
137  * Purpose: Call either cm_TrCYKInsideAlign() (if !<do_optacc>), or
138  *           cm_TrOptAccAlign() (if <do_optacc>), get vjd shadow
139  *           matrix; then trace back and append to an existing but
140  *           empty parsetree tr.  The full sequence 1..L will be
141  *           aligned. This function is nearly identical to
142  *           cm_tr_alignT_hb() with the important difference that HMM
143  *           bands are not used here.
144  *
145  *           If <do_optacc>==TRUE then <emit_mx> must != NULL and
146  *           <optimal_mode> must not be TRMODE_UNKNOWN, it will have been
147  *           determined by caller from a cm_TrInsideAlign() call and
148  *           passed in. If <do_optacc> is FALSE, then we're doing CYK
149  *           alignment and we may or may not know the truncation mode
150  *           of the alignment yet. If we know (e.g. if we're being
151  *           called from a search/scan pipeline that already ran a
152  *           scanning trCYK) then <optimal_mode> will be TRMODE_J,
153  *           TRMODE_L, TRMODE_R or TRMODE_T, if not (e.g. if we're
154  *           being called for 'cmalign') then <optimal_mode> will be
155  *           TRMODE_UNKNOWN and we'll determine it via CYK.
156  *
157  * Args:     cm           - the model
158  *           errbuf       - char buffer for reporting errors
159  *           dsq          - the digitized sequence [1..L]
160  *           L            - length of the dsq to align
161  *           size_limit   - max size in Mb for DP matrix
162  *           optimal_mode - the optimal alignment mode, TRMODE_UNKNOWN if unknown
163  *           pass_idx     - pipeline pass index, indicates what truncation penalty to use
164  *           do_optacc    - TRUE to align with optimal accuracy, else use CYK
165  *           mx           - the DP matrix to fill in
166  *           shmx         - the shadow matrix to fill in
167  *           emit_mx      - the pre-filled emit matrix, must be non-NULL if do_optacc
168  *           ret_tr       - RETURN: the optimal parsetree
169  *           ret_mode     - RETURN: mode of optimal alignment (TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T)
170  *           ret_sc_or_pp - RETURN: optimal score (CYK if !do_optacc, else avg PP of all 1..L residues)
171  *
172  * Returns:  <eslOK>     on success.
173  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>, in
174  *                       this case, alignment has been aborted, ret_* variables are not valid
175  *           <eslEINVAL> on invalid tro or traceback problem: bogus state
176  */
177 int
cm_tr_alignT(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char optimal_mode,int pass_idx,int do_optacc,CM_TR_MX * mx,CM_TR_SHADOW_MX * shmx,CM_TR_EMIT_MX * emit_mx,Parsetree_t ** ret_tr,char * ret_mode,float * ret_sc_or_pp)178 cm_tr_alignT(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char optimal_mode, int pass_idx, int do_optacc,
179              CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, CM_TR_EMIT_MX *emit_mx, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc_or_pp)
180 {
181   int       status;
182   Parsetree_t *tr = NULL;       /* the parsetree */
183   float     sc;			/* the score of the CYK alignment */
184   float     pp;			/* avg pp of all emitted residues in optacc alignment */
185   ESL_STACK *pda_i;             /* stack that tracks bifurc parent of a right start */
186   ESL_STACK *pda_c;             /* stack that tracks mode of bifurc parent of a right start */
187   int       v,j,d,i;		/* indices for state, seq positions */
188   int       k;			/* right subtree len for bifurcs */
189   int       y, yoffset;         /* child state y, it's offset */
190   int       bifparent;          /* B_st parent */
191   /* variables specific to truncated version */
192   char      mode;               /* current truncation mode: TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T */
193   char      prvmode, nxtmode;   /* previous, next truncation mode */
194   int       b;                  /* local entry state for best overall alignment */
195   int       pty_idx;            /* index for truncation penalty, determined by pass_idx */
196 
197   if(do_optacc) {
198     if((status = cm_TrOptAccAlign(cm, errbuf, dsq, L,
199 				  size_limit,   /* max size of DP matrix */
200 				  optimal_mode, /* marginal mode of optimal alignment */
201 				  pass_idx,     /* truncation penalty index */
202 				  mx,	        /* the DP matrix, to expand and fill-in */
203 				  shmx,	        /* the shadow matrix, to expand and fill-in */
204 				  emit_mx,      /* pre-calc'ed emit matrix */
205 				  &b,           /* the entry point for optimal alignment */
206 				  &pp))         /* avg post prob of all emissions in optimally accurate parsetree */
207        != eslOK) return status;
208     mode  = optimal_mode;
209   }
210   else {
211     if((status = cm_TrCYKInsideAlign(cm, errbuf, dsq, L,
212 				     size_limit,         /* max size of DP matrix */
213 				     optimal_mode,       /* marginal mode of optimal alignment, TRMODE_UNKNOWN if unknown */
214 				     pass_idx,           /* truncation penalty index */
215 				     mx,                 /* the HMM banded mx */
216 				     shmx,	         /* the HMM banded shadow matrix */
217 				     &b,                 /* entry point for optimal alignment */
218 				     &mode, &sc))        /* mode (J,L,R or T) and score of CYK parsetree */
219        != eslOK) return status;
220     optimal_mode = mode;
221   }
222 
223   /* Create and initialize the parsetree */
224   tr = CreateParsetree(100);
225   /* set the 2 truncation-specific parsetree values:
226    * is_std is always set to FALSE for truncation mode,
227    * trpenalty is truncation penalty assessed in DP function, differs if we're local or global
228    * and if we allowed 5' OR 3' truncations or 5' AND 3' truncations
229    */
230   tr->is_std = FALSE; /* lower is_std flag, now we'll know this parsetree was created by a truncated (non-standard) alignment function */
231   tr->pass_idx = pass_idx;
232   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_tr_alignT(), unexpected pass idx: %d", pass_idx);
233   tr->trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][b] : cm->trp->g_ptyAA[pty_idx][b];
234   InsertTraceNodewithMode(tr, -1, TRACE_LEFT_CHILD, 1, L, 0, mode); /* init: attach the root S */
235 
236   pda_i = esl_stack_ICreate();
237   pda_c = esl_stack_CCreate();
238   if(pda_i == NULL) goto ERROR;
239   if(pda_c == NULL) goto ERROR;
240   v = 0;
241   i = 1;
242   j = d = L;
243 
244   while (1) {
245 #if eslDEBUGLEVEL >= 1
246     if(cm->sttype[v] != EL_st) printf("#DEBUG: v: %4d  mode: %4d  j: %4d d: %4d\n", v, mode, j, d);
247     else                       printf("#DEBUG: v: %4d  mode: %4d  j: %4d d: %4d EL\n", v, mode, j, d);
248 #endif
249 
250     if (cm->sttype[v] == B_st) {
251       /* get k, the len of right fragment */
252       if     (mode == TRMODE_J) k = shmx->Jkshadow[v][j][d];
253       else if(mode == TRMODE_L) k = shmx->Lkshadow[v][j][d];
254       else if(mode == TRMODE_R) k = shmx->Rkshadow[v][j][d];
255       else if(mode == TRMODE_T) k = shmx->Tkshadow[v][j][d];
256       else                      ESL_FAIL(eslEINVAL, errbuf, "bogus truncation mode for B state: %d\n", mode);
257       /* if k is 0, right fragment is of length 0 */
258       /* determine mode of right child */
259       prvmode = mode;
260       if     (mode == TRMODE_J) ; /* do nothing, in J mode, right child mode remains TRMODE_J */
261       else if(mode == TRMODE_L) mode = TRMODE_L; /* in TRMODE_L, right child is always Left marginal */
262       else if(mode == TRMODE_R) mode = shmx->Rkmode[v][j][d];
263       else if(mode == TRMODE_T) mode = TRMODE_L; /* in TRMODE_T, right child is always Left marginal */
264 
265       /* Store info about the right fragment that we'll retrieve later:
266        */
267       if((status = esl_stack_CPush(pda_c, mode))    != eslOK) goto ERROR;  /* remember the mode of right child */
268       if((status = esl_stack_IPush(pda_i, j))       != eslOK) goto ERROR;  /* remember the end j    */
269       if((status = esl_stack_IPush(pda_i, k))       != eslOK) goto ERROR;  /* remember the subseq length k for right child */
270       if((status = esl_stack_IPush(pda_i, tr->n-1)) != eslOK) goto ERROR;  /* remember the trace index of the parent B state */
271 
272       /* Determine mode of left start state */
273       if     (prvmode == TRMODE_J) mode = TRMODE_J;
274       else if(prvmode == TRMODE_L) mode = shmx->Lkmode[v][j][d];
275       else if(prvmode == TRMODE_R) mode = TRMODE_R; /* for R mode, left child is always Right marginal */
276       else if(prvmode == TRMODE_T) mode = TRMODE_R; /* for T mode, left child is always Right marginal */
277 
278       /* Deal with attaching left start state.
279        */
280       j = j-k;
281       d = d-k;
282       i = j-d+1;
283 
284       y = cm->cfirst[v];
285       InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y, mode);
286       v = y;
287 
288 #if eslDEBUGLEVEL >= 2
289       /* Uncomment to dump parsetree */
290       /* printf("added BEGL_S, dumping parsetree (prvmode: %d mode: %d:\n", prvmode, mode); */
291       /* ParsetreeDump(stdout, tr, cm, dsq); */
292 #endif
293     } else if (cm->sttype[v] == E_st || cm->sttype[v] == EL_st) {
294       /* We don't trace back from an E or EL. Instead, we're done with the
295        * left branch of the tree, and we try to swing over to the right
296        * branch by popping a right start off the stack and attaching
297        * it. If the stack is empty, then we're done with the
298        * traceback altogether. This is the only way to break the
299        * while (1) loop.
300        */
301       if (esl_stack_IPop(pda_i, &bifparent) == eslEOD) break;
302       esl_stack_IPop(pda_i, &d);
303       esl_stack_IPop(pda_i, &j);
304       esl_stack_CPop(pda_c, &mode);
305       v = tr->state[bifparent];	/* recover state index of B */
306       y = cm->cnum[v];		/* find state index of right S */
307       i = j-d+1;
308 				/* attach the S to the right */
309       InsertTraceNodewithMode(tr, bifparent, TRACE_RIGHT_CHILD, i, j, y, mode);
310 
311 #if eslDEBUGLEVEL >= 2
312       /* Uncomment to dump parsetree */
313       /* printf("added E or EL, dumping parsetree:\n"); */
314       /* ParsetreeDump(stdout, tr, cm, dsq); */
315 #endif
316 
317       v = y;
318     }
319     else {
320       /* get yoffset */
321       if     (mode == TRMODE_J) yoffset = shmx->Jyshadow[v][j][d];
322       else if(mode == TRMODE_L) yoffset = shmx->Lyshadow[v][j][d];
323       else if(mode == TRMODE_R) yoffset = shmx->Ryshadow[v][j][d];
324       else if(mode == TRMODE_T) {
325 	/* v==0 is a special case, must be a local begin (shmx->Tyshadow[] doesn't exist) */
326 	if(v == 0) yoffset = USED_TRUNC_BEGIN;
327 	else       ESL_FAIL(eslEINVAL, errbuf, "truncation mode T for non B, not ROOT_S state");
328       }
329       else {
330         ESL_FAIL(eslEINVAL, errbuf, "bogus truncation mode %d\n", mode);
331       }
332 #if eslDEBUGLEVEL >= 2
333       printf("#DEBUG: v: %d std mode: %d yoffset: %d ", v, mode, yoffset);
334 #endif
335       /* determine nxtmode, and correct yoffset */
336       if     (yoffset == USED_TRUNC_BEGIN) { yoffset = USED_TRUNC_BEGIN; nxtmode = mode; } /* yoffset, mode don't change */
337       else if(yoffset == USED_TRUNC_END)   { yoffset = USED_TRUNC_END;   } /* nxtmode is irrelevant in this case */
338       else if(yoffset == USED_EL)          { yoffset = USED_EL;          } /* nxtmode is irrelevant in this case */
339       else if(yoffset >= TRMODE_R_OFFSET)  { nxtmode = TRMODE_R; yoffset -= TRMODE_R_OFFSET; }
340       else if(yoffset >= TRMODE_L_OFFSET)  { nxtmode = TRMODE_L; yoffset -= TRMODE_L_OFFSET; }
341       else if(yoffset >= TRMODE_J_OFFSET)  { nxtmode = TRMODE_J; yoffset -= TRMODE_J_OFFSET; }
342       else                                  ESL_FAIL(eslEINVAL, errbuf, "yoffset out of bounds: %d\n", yoffset);
343 #if eslDEBUGLEVEL >= 2
344       printf("new yoffset: %d nxtmode: %d\n", yoffset, nxtmode);
345       if(mode == TRMODE_J) printf("HEYA J v: %4d j: %4d d: %4d mode: %4d yoffset: %4d nxtmode: %4d\n", v, j, d, mode, yoffset, nxtmode);
346       if(mode == TRMODE_L) printf("HEYA L v: %4d j: %4d d: %4d mode: %4d yoffset: %4d nxtmode: %4d\n", v, j, d, mode, yoffset, nxtmode);
347       if(mode == TRMODE_R) printf("HEYA R v: %4d j: %4d d: %4d mode: %4d yoffset: %4d nxtmode: %4d\n", v, j, d, mode, yoffset, nxtmode);
348 #endif
349       switch (cm->sttype[v]) {
350       case  D_st:
351 	break;
352       case MP_st:
353 	if ( mode == TRMODE_J )          i++;
354 	if ( mode == TRMODE_L && d > 0 ) i++;
355 	if ( mode == TRMODE_J )          j--;
356 	if ( mode == TRMODE_R && d > 0 ) j--;
357 	break;
358       case ML_st:
359 	if ( mode == TRMODE_J )          i++;
360 	if ( mode == TRMODE_L && d > 0 ) i++;
361 	break;
362       case MR_st:
363 	if ( mode == TRMODE_J )          j--;
364 	if ( mode == TRMODE_R && d > 0 ) j--;
365 	break;
366       case IL_st:
367 	if ( mode == TRMODE_J )          i++;
368 	if ( mode == TRMODE_L && d > 0 ) i++;
369 	break;
370       case IR_st:
371 	if ( mode == TRMODE_J )          j--;
372 	if ( mode == TRMODE_R && d > 0 ) j--;
373 	break;
374       case  S_st:
375 	break;
376       default: ESL_FAIL(eslEINVAL, errbuf, "bogus state type %d \n", cm->sttype[v]);
377       }
378       d = j-i+1;
379 
380       if (yoffset == USED_EL || yoffset == USED_TRUNC_END)
381 	{	/* a local alignment end  or a truncation end */
382 	  if(yoffset == USED_EL) {
383 	    InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M, mode);
384 #if eslDEBUGLEVEL >= 2
385             /* Uncomment to dump parsetree */
386 	    /* printf("added USED_EL or USED_TRUNC_END, dumping parsetree:\n"); */
387 	    /* ParsetreeDump(stdout, tr, cm, dsq); */
388 #endif
389 	  }
390 	  v = cm->M;		/* now we're in EL (if USED_TRUNC_END, we act like we are) */
391 	}
392       else if (yoffset == USED_TRUNC_BEGIN)
393 	{ /* local begin; can only happen once, from root */
394 	  InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b, mode);
395 	  v = b;
396 	}
397       else
398 	{
399 	  mode = nxtmode;
400 	  y = cm->cfirst[v] + yoffset;
401 	  InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y, mode);
402 #if eslDEBUGLEVEL >= 2
403           /* Uncomment to dump parsetree */
404 	  /* printf("STD yoffset: %d\n", yoffset); */
405 	  /* printf("added standard, dumping parsetree:\n"); */
406 	  /* ParsetreeDump(stdout, tr, cm, dsq); */
407 #endif
408 	  v = y;
409 	}
410     }
411   }
412   esl_stack_Destroy(pda_i);  /* it should be empty; we could check; naaah. */
413   esl_stack_Destroy(pda_c);  /* it should be empty; we could check; naaah. */
414 
415   if(ret_tr       != NULL) *ret_tr   = tr; else FreeParsetree(tr);
416   if(ret_mode     != NULL) *ret_mode = optimal_mode;
417   if(ret_sc_or_pp != NULL) *ret_sc_or_pp = do_optacc ? pp : sc;
418 
419   return eslOK;
420 
421  ERROR:
422   ESL_FAIL(status, errbuf, "out of memory");
423   return status; /* NEVERREACHED */
424 }
425 
426 /* Function: cm_tr_alignT_hb()
427  * Date:     EPN, Thu Sep  8 07:59:10 2011
428  *           EPN 03.29.06 (cm_alignT_hb())
429  *
430  * Purpose:  Call either cm_TrCYKInsideAlignHB() (if !<do_optacc>), or
431  *           cm_TrOptAccAlignHB() (if <do_optacc>), get vjd shadow
432  *           matrix; then trace back and append to an existing but
433  *           empty parsetree tr.  The full sequence 1..L will be
434  *           aligned. This function is nearly identical to
435  *           cm_tr_alignT() with the important difference that HMM
436  *           bands are used here.
437  *
438  *           If <do_optacc>==TRUE then <emit_mx> must != NULL and
439  *           <optimal_mode> must not be TRMODE_UNKNOWN, it will have been
440  *           determined by caller from a cm_TrInsideAlign() call and
441  *           passed in. If <do_optacc> is FALSE, then we're doing CYK
442  *           alignment and we may or may not know the truncation mode
443  *           of the alignment yet. If we know (e.g. if we're being
444  *           called from a search/scan pipeline that already ran a
445  *           scanning trCYK) then <optimal_mode> will be TRMODE_J,
446  *           TRMODE_L, TRMODE_R or TRMODE_T, if not (e.g. if we're
447  *           being called for 'cmalign') then <optimal_mode> will be
448  *           TRMODE_UNKNOWN and we'll determine it via CYK.
449  *
450  * Args:     cm           - the model
451  *           errbuf       - char buffer for reporting errors
452  *           dsq          - the digitized sequence [1..L]
453  *           L            - length of the dsq to align
454  *           size_limit   - max size in Mb for DP matrix
455  *           optimal_mode - the optimal alignment mode, TRMODE_UNKNOWN if unknown
456  *           pass_idx     - pipeline pass index, indicates what truncation penalty to use
457  *           do_optacc    - TRUE to align with optimal accuracy, else use CYK
458  *           mx           - the DP matrix to fill in
459  *           shmx         - the shadow matrix to fill in
460  *           emit_mx      - the pre-filled emit matrix, must be non-NULL if do_optacc
461  *           ret_tr       - RETURN: the optimal parsetree
462  *           ret_mode     - RETURN: mode of optimal alignment (TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T)
463  *           ret_sc_or_pp - RETURN: optimal score (CYK if !do_optacc, else avg PP of all 1..L residues)
464  *
465  * Returns:  <eslOK>     on success.
466  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>, in
467  *                       this case, alignment has been aborted, ret_* variables are not valid
468  *           <eslEINVAL> on invalide tro or traceback problem: bogus state
469  */
470 int
cm_tr_alignT_hb(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char optimal_mode,int pass_idx,int do_optacc,CM_TR_HB_MX * mx,CM_TR_HB_SHADOW_MX * shmx,CM_TR_HB_EMIT_MX * emit_mx,Parsetree_t ** ret_tr,char * ret_mode,float * ret_sc_or_pp)471 cm_tr_alignT_hb(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char optimal_mode, int pass_idx, int do_optacc,
472                 CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_EMIT_MX *emit_mx, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc_or_pp)
473 {
474   int       status;
475   Parsetree_t *tr = NULL;       /* the parsetree */
476   float     sc;			/* the score of the CYK alignment */
477   float     pp;			/* avg pp of all emitted residues in optacc alignment */
478   ESL_STACK *pda_i;             /* stack that tracks bifurc parent of a right start */
479   ESL_STACK *pda_c;             /* stack that tracks mode of bifurc parent of a right start */
480   int       v,j,d,i;		/* indices for state, seq positions */
481   int       k;			/* right subtree len for bifurcs */
482   int       y, yoffset;         /* child state y, it's offset */
483   int       bifparent;          /* B_st parent */
484   int       jp_v;               /* j-jmin[v] for current j, and current v */
485   int       dp_v;               /* d-hdmin[v][jp_v] for current j, current v, current d*/
486   char      mode;               /* current truncation mode: TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T */
487   char      prvmode, nxtmode;   /* previous, next truncation mode */
488   int       allow_S_trunc_end;  /* set to true to allow d==0 BEGL_S and BEGR_S truncated ends */
489   int       allow_S_local_end;  /* set to true to allow d==0 BEGL_S and BEGR_S local ends if(do_optacc) */
490   int       b;                  /* local entry state for best overall alignment */
491   int       pty_idx;            /* index for truncation penalty, determined by pass_idx */
492 
493   /* pointers to cp9b data for convenience */
494   CP9Bands_t  *cp9b = cm->cp9b;
495   int         *jmin = cp9b->jmin;
496   int         *jmax = cp9b->jmax;
497   int       **hdmin = cp9b->hdmin;
498   int       **hdmax = cp9b->hdmax;
499 
500   /* ensure full sequence is within bands */
501   if (cp9b->jmin[0]             > L || cp9b->jmax[0]             < L) ESL_FAIL(eslEINVAL, errbuf, "cm_tr_alignT_hb(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
502   if (cp9b->hdmin[0][L-jmin[0]] > L || cp9b->hdmax[0][L-jmin[0]] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_tr_alignT_hb(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][L-jmin[0]], cp9b->hdmax[0][L-jmin[0]]);
503 
504   if(do_optacc) {
505     if((status = cm_TrOptAccAlignHB(cm, errbuf, dsq, L,
506 				    size_limit,   /* max size of DP matrix */
507 				    optimal_mode, /* marginal mode of optimal alignment */
508 				    pass_idx,     /* truncation penalty index */
509 				    mx,	          /* the DP matrix, to expand and fill-in */
510 				    shmx,	  /* the shadow matrix, to expand and fill-in */
511 				    emit_mx,      /* pre-calc'ed emit matrix */
512 				    &b,           /* the entry point for optimal alignment */
513 				    &pp))         /* avg post prob of all emissions in optimally accurate parsetree */
514        != eslOK) return status;
515     mode = optimal_mode;
516   }
517   else {
518     if((status = cm_TrCYKInsideAlignHB(cm, errbuf, dsq, L,
519 				       size_limit,         /* max size of DP matrix */
520 				       optimal_mode,       /* marginal mode of optimal alignment, TRMODE_UNKNOWN if unknown */
521 				       pass_idx,           /* truncation penalty index */
522 				       mx,                 /* the HMM banded mx */
523 				       shmx,	           /* the HMM banded shadow matrix */
524 				       &b,                 /* entry point for optimal alignment */
525 				       &mode, &sc))        /* mode (J,L,R or T) and score of CYK parsetree */
526        != eslOK) return status;
527     optimal_mode = mode;
528   }
529 
530   /* Create and initialize the parsetree */
531   tr = CreateParsetree(100);
532   /* set the 2 truncation-specific parsetree values:
533    * is_std is always set to FALSE for truncation mode,
534    * trpenalty is truncation penalty assessed in DP function, differs if we're local or global
535    * and if we allowed 5' OR 3' truncations or 5' AND 3' truncations
536    */
537   tr->is_std = FALSE; /* lower is_std flag, now we'll know this parsetree was created by a truncated (non-standard) alignment function */
538   tr->pass_idx = pass_idx;
539   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_tr_alignT_hb(), unexpected pass idx: %d", pass_idx);
540   tr->trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][b] : cm->trp->g_ptyAA[pty_idx][b];
541   InsertTraceNodewithMode(tr, -1, TRACE_LEFT_CHILD, 1, L, 0, mode); /* init: attach the root S */
542 
543   pda_i = esl_stack_ICreate();
544   pda_c = esl_stack_CCreate();
545   if(pda_i == NULL) goto ERROR;
546   if(pda_c == NULL) goto ERROR;
547   v = 0;
548   i = 1;
549   j = d = L;
550 
551   while (1) {
552 #if eslDEBUGLEVEL >= 1
553     if(cm->sttype[v] != EL_st) printf("#DEBUG: v: %4d  mode: %4d  j: %4d (%4d..%4d)  d: %4d\n", v, mode, j, jmin[v], jmax[v], d);
554     else                       printf("#DEBUG: v: %4d  mode: %4d  j: %4d             d: %4d EL\n", v, mode, j, d);
555 #endif
556     /* super special case for HMM banded truncated mode, explained below, after the crazy if */
557     if((cm->stid[v] == BEGL_S || cm->stid[v] == BEGR_S) && d == 0 &&
558        ((mode == TRMODE_J && (! cp9b->Jvalid[v]))  ||            /* J mode, but J mode is disallowed for state v */
559 	(mode == TRMODE_L && (! cp9b->Lvalid[v]))  ||            /* L mode, but L mode is disallowed for state v */
560 	(mode == TRMODE_R && (! cp9b->Rvalid[v]))  ||            /* R mode, but R mode is disallowed for state v */
561 	(j < jmin[v]             || j > jmax[v]) ||              /* j is outside v's j band */
562 	(d < hdmin[v][j-jmin[v]] || d > hdmax[v][j-jmin[v]]))) { /* j is within v's j band, but d is outside j's d band */
563       /* special case: v is a BEGL_S or BEGR_S and either we're in a
564        * truncated alignment mode for v that is disallowed by the
565        * bands or j is outside v's j band or j is within the band but
566        * d is outside j's d band.  We allow this case if d == 0 b/c
567        * we're doing a truncated end out of this state immediately,
568        * i.e. we're not really using the state at all we're just using
569        * it so we can use its parent B state and its sister left or
570        * right start state. This only occurs if the parent bif state
571        * emitted the full sequence via the other child (BEGR_S or
572        * BEGL_S).
573        *
574        * This will usually occur if v is a BEGL_S and we're in R mode,
575        * or v is a BEGR_S and we're in L mode, but not always. We need
576        * to also allow a similar case that also occurs in
577        * *non-truncated* optimal accuracy alignment. See
578        * cm_dpalign.c::cm_alignT_hb() at the analogous point in that
579        * function for details.
580        */
581       ESL_DASSERT1(((cm->stid[v] == BEGL_S && mode == TRMODE_R) || (cm->stid[v] == BEGR_S && mode == TRMODE_L)));
582       if((cm->stid[v] == BEGL_S && mode == TRMODE_R) || (cm->stid[v] == BEGR_S && mode == TRMODE_L)) {
583 	allow_S_trunc_end = TRUE; /* this sets yoffset to USED_TRUNC_END in the final 'else' of below code block */
584 	allow_S_local_end = FALSE;
585       }
586       else if (do_optacc) {
587 	allow_S_local_end = TRUE; /* this sets yoffset to USED_EL in the final 'else' of below code block */
588 	allow_S_trunc_end = FALSE;
589       }
590     }
591     else if (cm->sttype[v] != EL_st) { /* normal case, determine jp_v, dp_v; j, d offset values given bands */
592       jp_v = j - jmin[v];
593       dp_v = d - hdmin[v][jp_v];
594       allow_S_trunc_end = FALSE;
595       allow_S_local_end = FALSE;
596       assert(j >= jmin[v]        && j <= jmax[v]);
597       assert(d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]);
598       ESL_DASSERT1((j >= jmin[v]        && j <= jmax[v]));
599       ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
600     }
601 
602     if (cm->sttype[v] == B_st) {
603       /* get k, the len of right fragment */
604       if     (mode == TRMODE_J) k = shmx->Jkshadow[v][jp_v][dp_v];
605       else if(mode == TRMODE_L) k = shmx->Lkshadow[v][jp_v][dp_v];
606       else if(mode == TRMODE_R) k = shmx->Rkshadow[v][jp_v][dp_v];
607       else if(mode == TRMODE_T) k = shmx->Tkshadow[v][jp_v][dp_v];
608       else                           ESL_FAIL(eslEINVAL, errbuf, "bogus truncation mode for B state: %d\n", mode);
609       /* if k is 0, right fragment is of length 0 */
610       /* determine mode of right child */
611       prvmode = mode;
612       if     (mode == TRMODE_J) ; /* do nothing, in J mode, right child mode remains TRMODE_J */
613       else if(mode == TRMODE_L) mode = TRMODE_L; /* in TRMODE_L, right child is always Left marginal */
614       else if(mode == TRMODE_R) mode = shmx->Rkmode[v][jp_v][dp_v];
615       else if(mode == TRMODE_T) mode = TRMODE_L; /* in TRMODE_T, right child is always Left marginal */
616 
617       /* Store info about the right fragment that we'll retrieve later:
618        */
619       if((status = esl_stack_CPush(pda_c, mode))    != eslOK) goto ERROR;  /* remember the mode of right child */
620       if((status = esl_stack_IPush(pda_i, j))       != eslOK) goto ERROR;  /* remember the end j    */
621       if((status = esl_stack_IPush(pda_i, k))       != eslOK) goto ERROR;  /* remember the subseq length k for right child */
622       if((status = esl_stack_IPush(pda_i, tr->n-1)) != eslOK) goto ERROR;  /* remember the trace index of the parent B state */
623 
624       /* Determine mode of left start state */
625       if     (prvmode == TRMODE_J) mode = TRMODE_J;
626       else if(prvmode == TRMODE_L) mode = shmx->Lkmode[v][jp_v][dp_v];
627       else if(prvmode == TRMODE_R) mode = TRMODE_R; /* for R mode, left child is always Right marginal */
628       else if(prvmode == TRMODE_T) mode = TRMODE_R; /* for T mode, left child is always Right marginal */
629 
630       /* Deal with attaching left start state.
631        */
632       j = j-k;
633       d = d-k;
634       i = j-d+1;
635 
636       y = cm->cfirst[v];
637       InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y, mode);
638       v = y;
639 
640 #if eslDEBUGLEVEL >= 2
641       /* Uncomment to dump parsetree */
642       /* printf("added BEGL_S, dumping parsetree (prvmode: %d mode: %d:\n", prvmode, mode); */
643       /* ParsetreeDump(stdout, tr, cm, dsq); */
644 #endif
645     }
646     else if (cm->sttype[v] == E_st || cm->sttype[v] == EL_st) {
647       /* We don't trace back from an E or EL. Instead, we're done with the
648        * left branch of the tree, and we try to swing over to the right
649        * branch by popping a right start off the stack and attaching
650        * it. If the stack is empty, then we're done with the
651        * traceback altogether. This is the only way to break the
652        * while (1) loop.
653        */
654       if (esl_stack_IPop(pda_i, &bifparent) == eslEOD) break;
655       esl_stack_IPop(pda_i, &d);
656       esl_stack_IPop(pda_i, &j);
657       esl_stack_CPop(pda_c, &mode);
658       v = tr->state[bifparent];	/* recover state index of B */
659       y = cm->cnum[v];		/* find state index of right S */
660       i = j-d+1;
661 				/* attach the S to the right */
662       InsertTraceNodewithMode(tr, bifparent, TRACE_RIGHT_CHILD, i, j, y, mode);
663 #if eslDEBUGLEVEL >= 2
664       /* Uncomment to dump parsetree */
665       /* printf("added E or EL, dumping parsetree:\n"); */
666       /* ParsetreeDump(stdout, tr, cm, dsq); */
667 #endif
668 
669       v = y;
670     }
671     else {
672       /* get yoffset */
673       if(allow_S_trunc_end) {
674 	yoffset = USED_TRUNC_END; /* nxt mode is irrelevant in this case */
675       }
676       else if(allow_S_local_end) {
677 	yoffset = USED_EL; /* nxt mode is irrelevant in this case */
678       }
679       else {
680 	if     (mode == TRMODE_J) yoffset = shmx->Jyshadow[v][jp_v][dp_v];
681 	else if(mode == TRMODE_L) yoffset = shmx->Lyshadow[v][jp_v][dp_v];
682 	else if(mode == TRMODE_R) yoffset = shmx->Ryshadow[v][jp_v][dp_v];
683 	else if(mode == TRMODE_T) {
684 	  /* v==0 is a special case, must be a local begin (shmx->Tyshadow[] doesn't exist) */
685 	  if(v == 0) yoffset = USED_TRUNC_BEGIN;
686 	  else       ESL_FAIL(eslEINVAL, errbuf, "truncation mode T for non B, not ROOT_S state");
687 	}
688 	else {
689 	  ESL_FAIL(eslEINVAL, errbuf, "bogus truncation mode %d\n", mode);
690 	}
691       }
692 #if eslDEBUGLEVEL >= 2
693       printf("#DEBUG: v: %d std mode: %d yoffset: %d ", v, mode, yoffset);
694 #endif
695       /* determine nxtmode, and correct yoffset */
696       if     (yoffset == USED_TRUNC_BEGIN) { yoffset = USED_TRUNC_BEGIN; nxtmode = mode; } /* yoffset, mode don't change */
697       else if(yoffset == USED_TRUNC_END)   { yoffset = USED_TRUNC_END; } /* nxtmode is irrelevant in this case */
698       else if(yoffset == USED_EL)          { yoffset = USED_EL;        } /* nxtmode is irrelevant in this case */
699       else if(yoffset >= TRMODE_R_OFFSET)  { nxtmode = TRMODE_R; yoffset -= TRMODE_R_OFFSET; }
700       else if(yoffset >= TRMODE_L_OFFSET)  { nxtmode = TRMODE_L; yoffset -= TRMODE_L_OFFSET; }
701       else if(yoffset >= TRMODE_J_OFFSET)  { nxtmode = TRMODE_J; yoffset -= TRMODE_J_OFFSET; }
702       else                                  ESL_FAIL(eslEINVAL, errbuf, "yoffset out of bounds: %d\n", yoffset);
703 #if eslDEBUGLEVEL >= 2
704       printf("new yoffset: %d nxtmode: %d\n", yoffset, nxtmode);
705       if(mode == TRMODE_J) printf("HEYA J v: %4d j: %4d d: %4d mode: %4d yoffset: %4d nxtmode: %4d\n", v, j, d, mode, yoffset, nxtmode);
706       if(mode == TRMODE_L) printf("HEYA L v: %4d j: %4d d: %4d mode: %4d yoffset: %4d nxtmode: %4d\n", v, j, d, mode, yoffset, nxtmode);
707       if(mode == TRMODE_R) printf("HEYA R v: %4d j: %4d d: %4d mode: %4d yoffset: %4d nxtmode: %4d\n", v, j, d, mode, yoffset, nxtmode);
708 #endif
709       switch (cm->sttype[v]) {
710       case  D_st:
711 	break;
712       case MP_st:
713 	if ( mode == TRMODE_J )          i++;
714 	if ( mode == TRMODE_L && d > 0 ) i++;
715 	if ( mode == TRMODE_J )          j--;
716 	if ( mode == TRMODE_R && d > 0 ) j--;
717 	break;
718       case ML_st:
719 	if ( mode == TRMODE_J )          i++;
720 	if ( mode == TRMODE_L && d > 0 ) i++;
721 	break;
722       case MR_st:
723 	if ( mode == TRMODE_J )          j--;
724 	if ( mode == TRMODE_R && d > 0 ) j--;
725 	break;
726       case IL_st:
727 	if ( mode == TRMODE_J )          i++;
728 	if ( mode == TRMODE_L && d > 0 ) i++;
729 	break;
730       case IR_st:
731 	if ( mode == TRMODE_J )          j--;
732 	if ( mode == TRMODE_R && d > 0 ) j--;
733 	break;
734       case  S_st:
735 	break;
736       default: ESL_FAIL(eslEINVAL, errbuf, "bogus state type %d \n", cm->sttype[v]);
737       }
738       d = j-i+1;
739 
740       if (yoffset == USED_EL || yoffset == USED_TRUNC_END)
741 	{	/* a local alignment end  or a truncation end */
742 	  if(yoffset == USED_EL) {
743 	    InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M, mode);
744 #if eslDEBUGLEVEL >= 2
745             /* Uncomment to dump parsetree */
746 	    /* printf("added USED_EL or USED_TRUNC_END, dumping parsetree:\n"); */
747 	    /* ParsetreeDump(stdout, tr, cm, dsq); */
748 #endif
749 	  }
750 	  v = cm->M; /* now we're in EL (if USED_TRUNC_END, we act like we are) */
751 	}
752       else if (yoffset == USED_TRUNC_BEGIN)
753 	{ /* local begin; can only happen once, from root */
754 	  InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b, mode);
755 	  v = b;
756 	}
757       else
758 	{
759 	  mode = nxtmode;
760 	  y = cm->cfirst[v] + yoffset;
761 	  InsertTraceNodewithMode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y, mode);
762 #if eslDEBUGLEVEL >= 2
763           /* Uncomment to dump parsetree */
764 	  /* printf("STD yoffset: %d\n", yoffset); */
765 	  /* printf("added standard, dumping parsetree:\n"); */
766 	  /* ParsetreeDump(stdout, tr, cm, dsq); */
767 #endif
768 	  v    = y;
769 	}
770     }
771     /*ParsetreeDump(stdout, tr, cm, dsq);*/
772   }
773   esl_stack_Destroy(pda_i);  /* it should be empty; we could check; naaah. */
774   esl_stack_Destroy(pda_c);  /* it should be empty; we could check; naaah. */
775 
776   if(ret_tr       != NULL) *ret_tr   = tr; else FreeParsetree(tr);
777   if(ret_mode     != NULL) *ret_mode = optimal_mode;
778   if(ret_sc_or_pp != NULL) *ret_sc_or_pp = do_optacc ? pp : sc;
779 
780   return eslOK;
781 
782  ERROR:
783   ESL_FAIL(status, errbuf, "out of memory");
784   return status; /* NEVERREACHED */
785 }
786 
787 
788 /* Function: cm_TrAlignSizeNeeded()
789  * Date:     EPN, Thu Jan 12 10:15:08 2012
790  *
791  * Purpose:  Determine size in Mb required to successfully call
792  *           cm_TrAlign() for a given model <cm>, sequence length
793  *           <L> and alignment options in <do_sample> and <do_post>.
794  *
795  *           We are ignorant of any preset marginal alignment mode,
796  *           because that doesn't affect how the matrices are
797  *           allocated (although it can affect which cells are filled
798  *           in).
799  *
800  *           Return <eslERANGE> if required size exceeds size_limit.
801  *
802  * Args:     cm         - the covariance model
803  *           errbuf     - char buffer for reporting errors
804  *           L          - length of sequence
805  *           size_limit - max size in Mb for all required matrices, return eslERANGE if exceeded
806  *           do_sample  - TRUE to sample a parsetree from the Inside matrix
807  *           do_post    - TRUE to do posteriors
808  *           ret_mxmb   - RETURN: size in Mb of required CM_TR_MX (we'll need 2 of these if do_post)
809  *           ret_emxmb  - RETURN: size in Mb of required CM_TR_EMIT_MX   (0. if we won't need one)
810  *           ret_shmxmb - RETURN: size in Mb of required CM_TR_SHADOW_MX (0. if we won't need one)
811  *           ret_totmb  - RETURN: size in Mb of all required matrices
812  *
813  * Returns: <eslOK> on success.
814  *
815  * Throws:  <eslEINVAL> on contract violation
816  *          <eslERANGE> if total size of all matrices exceeds <size_limit>
817  */
818 int
cm_TrAlignSizeNeeded(CM_t * cm,char * errbuf,int L,float size_limit,int do_sample,int do_post,float * ret_mxmb,float * ret_emxmb,float * ret_shmxmb,float * ret_totmb)819 cm_TrAlignSizeNeeded(CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post,
820 		     float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb)
821 {
822   int          status;
823   float        totmb    = 0.;  /* total Mb required for all matrices (that must be simultaneously in memory) */
824   float        mxmb     = 0.;  /* Mb required for CM_MX */
825   float        emxmb    = 0.;  /* Mb required for CM_EMIT_MX */
826   float        shmxmb   = 0.;  /* Mb required for CM_SHADOW_MX */
827 
828   /* we pass NULL values to the *_mx_SizeNeeded() functions because we don't care about cell counts */
829 
830   /* we will always need an Inside or CYK matrix */
831   if((status = cm_tr_mx_SizeNeeded(cm, errbuf, L, NULL, NULL, NULL, NULL, &mxmb)) != eslOK) return status;
832   totmb = mxmb;
833 
834   /* if calc'ing posteriors, we'll also need an Outside matrix (which
835    * we'll reuse as the Posterior matrix, so only count it once) and
836    * an emit matrix.
837    */
838   if(do_post) {
839     totmb += mxmb;
840     if((status = cm_tr_emit_mx_SizeNeeded(cm, errbuf, L, NULL, NULL, &emxmb)) != eslOK) return status;
841     totmb += emxmb;
842   }
843 
844   /* if we're not sampling an alignment, we'll also need a shadow
845    * matrix for the traceback.
846    */
847   if(! do_sample) {
848     if((status = cm_tr_shadow_mx_SizeNeeded(cm, errbuf, L, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &shmxmb)) != eslOK) return status;
849     totmb += shmxmb;
850   }
851 
852   if (ret_mxmb   != NULL) *ret_mxmb    = mxmb;
853   if (ret_emxmb  != NULL) *ret_emxmb   = emxmb;
854   if (ret_shmxmb != NULL) *ret_shmxmb  = shmxmb;
855   if (ret_totmb  != NULL) *ret_totmb   = totmb;
856 
857 #if eslDEBUGLEVEL >= 1
858   printf("#DEBUG: cm_TrAlignSizeNeeded()\n");
859   printf("#DEBUG: \t mxmb:  %.2f\n", mxmb);
860   printf("#DEBUG: \t emxmb: %.2f\n", emxmb);
861   printf("#DEBUG: \t shmxmb:%.2f\n", shmxmb);
862   printf("#DEBUG: \t totmb: %.2f\n", totmb);
863   printf("#DEBUG: \t limit: %.2f\n", size_limit);
864 #endif
865 
866   if(totmb > size_limit) ESL_FAIL(eslERANGE, errbuf, "non-banded truncated alignment mxes need %.2f Mb > %.2f Mb limit.\nUse --mxsize, --maxtau or --tau.", totmb, (float) size_limit);
867 
868   return eslOK;
869 }
870 
871 /* Function: cm_TrAlignSizeNeededHB()
872  * Date:     EPN, Thu Jan 12 10:24:20 2012
873  *
874  * Purpose:  Determine size in Mb required to successfully call
875  *           cm_TrAlignHB() for a given model <cm>, sequence length
876  *           <L>, HMM bands <cm->cp9b> and alignment options
877  *           in <do_sample> and <do_post>.
878  *
879  *           We are ignorant of any preset marginal alignment mode,
880  *           because that doesn't affect how the matrices are
881  *           allocated (although it can affect which cells are filled
882  *           in).
883  *
884  *           Return <eslERANGE> if required size exceeds size_limit.
885  *
886  * Args:     cm         - the covariance model
887  *           errbuf     - char buffer for reporting errors
888  *           L          - length of sequence
889  *           size_limit - max size in Mb for all required matrices, return eslERANGE if exceeded
890  *           do_sample  - TRUE to sample a parsetree from the Inside matrix
891  *           do_post    - TRUE to do posteriors
892  *           ret_mxmb   - RETURN: size in Mb of required CM_TR_HB_MX (we'll need 2 of these if do_post)
893  *           ret_emxmb  - RETURN: size in Mb of required CM_TR_HB_EMIT_MX   (0. if we won't need one)
894  *           ret_shmxmb - RETURN: size in Mb of required CM_TR_HB_SHADOW_MX (0. if we won't need one)
895  *           ret_totmb  - RETURN: size in Mb of all required matrices
896  *
897  * Returns: <eslOK> on success.
898  *
899  * Throws:  <eslEINVAL> on contract violation
900  *          <eslERANGE> if total size of all matrices exceeds <size_limit>
901  */
902 int
cm_TrAlignSizeNeededHB(CM_t * cm,char * errbuf,int L,float size_limit,int do_sample,int do_post,float * ret_mxmb,float * ret_emxmb,float * ret_shmxmb,float * ret_totmb)903 cm_TrAlignSizeNeededHB(CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post,
904 		       float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb)
905 {
906   int          status;
907   float        totmb    = 0.;  /* total Mb required for all matrices (that must be simultaneously in memory) */
908   float        mxmb     = 0.;  /* Mb required for CM_MX */
909   float        emxmb    = 0.;  /* Mb required for CM_EMIT_MX */
910   float        shmxmb   = 0.;  /* Mb required for CM_SHADOW_MX */
911 
912   /* we pass NULL values to the *_mx_SizeNeeded() functions because we don't care about cell counts */
913 
914   /* we will always need an Inside or CYK matrix */
915   if((status = cm_tr_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, NULL, NULL, NULL, &mxmb)) != eslOK) return status;
916   totmb = mxmb;
917 
918   /* if calc'ing posteriors, we'll also need an Outside matrix (which
919    * we'll reuse as the Posterior matrix, so only count it once) and
920    * an emit matrix.
921    */
922   if(do_post) {
923     totmb += mxmb;
924     if((status = cm_tr_hb_emit_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, NULL, &emxmb)) != eslOK) return status;
925     totmb += emxmb;
926   }
927 
928   /* if we're not sampling an alignment, we'll also need a shadow
929    * matrix for the traceback.
930    */
931   if(! do_sample) {
932     if((status = cm_tr_hb_shadow_mx_SizeNeeded(cm, errbuf, cm->cp9b, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &shmxmb)) != eslOK) return status;
933     totmb += shmxmb;
934   }
935 
936   if (ret_mxmb   != NULL) *ret_mxmb    = mxmb;
937   if (ret_emxmb  != NULL) *ret_emxmb   = emxmb;
938   if (ret_shmxmb != NULL) *ret_shmxmb  = shmxmb;
939   if (ret_totmb  != NULL) *ret_totmb   = totmb;
940 
941 #if eslDEBUGLEVEL >= 1
942   printf("#DEBUG: cm_TrAlignSizeNeededHB()\n");
943   printf("#DEBUG: \t mxmb:  %.2f\n", mxmb);
944   printf("#DEBUG: \t emxmb: %.2f\n", emxmb);
945   printf("#DEBUG: \t shmxmb:%.2f\n", shmxmb);
946   printf("#DEBUG: \t totmb: %.2f\n", totmb);
947   printf("#DEBUG: \t limit: %.2f\n", size_limit);
948 #endif
949 
950   /*printf("cm_TrAlignSizeNeededHB() returning %.2f\n", mxmb);*/
951 
952   if(totmb > size_limit) ESL_FAIL(eslERANGE, errbuf, "HMM banded truncated alignment mxes need %.2f Mb > %.2f Mb limit.\nUse --mxsize, --maxtau or --tau.", totmb, (float) size_limit);
953 
954   return eslOK;
955 }
956 
957 /* Function: cm_TrAlign()
958  * Incept:   EPN, Sat Sep 10 12:58:09 2011
959  *
960  * Purpose: Wrapper for the cm_tr_alignTb() routine - solve a full
961  *           alignment problem using trCYK, truncated optimal accuracy
962  *           or sampling and return the traceback and the score.
963  *
964  *           Identical to cm_TrAlignHB() but HMM bands are not used here.
965  *
966  *           Input arguments allow this function to be run in 6 'modes':
967  *
968  *           mode      returns                 arguments
969  *           ----  ----------------  ----------------------------------------
970  *                 tr        ppstrs  do_optacc  do_sample post_mx   ret_ppstr
971  *                 ----------------  ----------------------------------------
972  *              1. CYK       no      FALSE      FALSE      NULL      NULL
973  *              2. CYK       yes     FALSE      FALSE     !NULL     !NULL
974  *              3. Opt acc   no      TRUE       FALSE     !NULL      NULL
975  *              4. Opt acc   yes     TRUE       FALSE     !NULL     !NULL
976  *              5. sampled   no      FALSE      TRUE       NULL      NULL
977  *              6. sampled   yes     FALSE      TRUE      !NULL     !NULL
978  *
979  *           CYK parsetrees are most the likely parsetree, 'Opt acc'
980  *           parsetrees are Holmes/Durbin optimally accurate
981  *           parsetrees, the parse that maximizes the summed posterior
982  *           probability of emitted residues. A sampled parsetree
983  *           is a parsetree sampled from an Inside matrix based on
984  *           its probability.
985  *
986  *           We can enforce that the parsetree found be in a
987  *           particular marginal alignment mode via a <preset_mode>
988  *           value other than TRMODE_UNKNOWN. This can be useful we're
989  *           called from a search/scan pipeline and a scanning
990  *           truncated DP search algorithm has already determined the
991  *           optimal truncation mode for the alignment, as we'll save
992  *           time by only performing the required DP calculations for
993  *           that mode in the DP functions we call here. In that case,
994  *           <ret_mode> will necessarily be equal to <preset_mode>.
995  *           Alternatively, if <preset_mode> is TRMODE_UNKNOWN then
996  *           we'll determine it here and return a known mode (TRMODE_J
997  *           | TRMODE_L | TRMODE_R | TRMODE_T) in <ret_mode>.
998  *
999  * Args:     cm          - the covariance model
1000  *           errbuf      - char buffer for reporting errors
1001  *           dsq         - the digitized sequence, 1..L
1002  *           L           - length of sequence
1003  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
1004  *           preset_mode - the alignment mode to enforce, if TRMODE_UNKNOWN we determine mode here
1005  *           pass_idx     - pipeline pass index, indicates what truncation penalty to use
1006  *           do_optacc   - TRUE to not do CYK alignment, determine the Holmes/Durbin optimally
1007  *                         accurate parsetree in ret_tr, requires post_mx != NULL
1008  *           do_sample   - TRUE to sample a parsetree from the Inside matrix
1009  *           mx          - the main dp matrix, only cells within bands in cm->cp9b will be valid.
1010  *           shmx        - the HMM banded shadow matrix to fill in and traceback, same cells as mx are valid.
1011  *           post_mx     - dp matrix for posterior calculation, can be NULL only if !do_optacc
1012  *           emit_mx     - emit matrix to fill
1013  *           r           - source of randomness, must be non-NULL only if do_sample==TRUE
1014  *           ret_ppstr   - RETURN: posterior code 1, (pass NULL if not wanted, must be NULL if post_mx == NULL)
1015  *           ret_tr      - RETURN: parsetree (either optimal or sampled, pass NULL if not wanted)
1016  *           ret_mode    - RETURN: mode of ret_tr, will be <preset_mode> unless that was TRMODE_UNKNOWN
1017  *           ret_avgpp   - RETURN: avg PP of emitted residues in parsetree (CYK or optacc) if ret_ppstr == NULL, set as 0.
1018  *           ret_sc      - RETURN: score of the alignment in bits (Inside score if do_optacc)
1019  *
1020  * Returns: <eslOK> on success.
1021  *
1022  * Throws:  <eslEINVAL> on contract violation
1023  *          <eslERANGE> if required CM_TR_MX for Inside/Outside/CYK/Posterior exceeds <size_limit>
1024  */
1025 int
cm_TrAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,int do_optacc,int do_sample,CM_TR_MX * mx,CM_TR_SHADOW_MX * shmx,CM_TR_MX * post_mx,CM_TR_EMIT_MX * emit_mx,ESL_RANDOMNESS * r,char ** ret_ppstr,Parsetree_t ** ret_tr,char * ret_mode,float * ret_avgpp,float * ret_sc)1026 cm_TrAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
1027 	   int do_optacc, int do_sample, CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, CM_TR_MX *post_mx,
1028 	   CM_TR_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr,
1029 	   char *ret_mode, float *ret_avgpp, float *ret_sc)
1030 {
1031   int          status;
1032   Parsetree_t *tr = NULL;
1033   float        sc     = 0.;
1034   float        avgpp  = 0.;
1035   float        ins_sc = 0.;
1036   int          do_post;
1037   char        *ppstr = NULL;
1038   int          have_ppstr;
1039   char         mode = TRMODE_UNKNOWN;  /* mode of tr, <ret_mode> set as this */
1040 
1041   have_ppstr = (ret_ppstr != NULL)       ? TRUE : FALSE;
1042   do_post    = (do_optacc || have_ppstr) ? TRUE : FALSE;
1043 
1044   /* Contract check */
1045   if(do_optacc && do_sample)         ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrAlign(), do_optacc and do_sample are both TRUE.");
1046   if(do_optacc && post_mx == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrAlign(), do_optacc is TRUE, but post_mx == NULL.\n");
1047   if(do_sample && r       == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrAlign(), do_sample but r is NULL.");
1048 
1049   /* if do_post:   fill Inside, Outside, Posterior matrices, in that order.
1050    * if do_sample: fill Inside and sample from it.
1051    */
1052   if(do_post || do_sample) {
1053     if((status = cm_TrInsideAlign(cm, errbuf, dsq, L, size_limit, preset_mode, pass_idx, mx, &mode, &ins_sc)) != eslOK) return status;
1054     /* mode will equal preset_mode unless preset_mode is TRMODE_UNKNOWN, in which case it will be mode that gives max inside score  */
1055     if(do_sample) {
1056       if((status = cm_TrStochasticParsetree(cm, errbuf, dsq, L, preset_mode, pass_idx, mx, r, &tr, &mode, &sc)) != eslOK) return status;
1057       /* mode may be changed if preset_mode is TRMODE_UNKNOWN, else it will equal preset mode */
1058     }
1059     if(do_post) { /* Inside was called above, now do Outside, then Posterior */
1060       if((status = cm_TrOutsideAlign    (cm, errbuf, dsq, L, size_limit, mode, pass_idx, (cm->align_opts & CM_ALIGN_CHECKINOUT), post_mx, mx)) != eslOK) return status;
1061       if((status = cm_TrPosterior       (cm, errbuf,      L, size_limit, mode, mx, post_mx, post_mx)) != eslOK) return status;
1062       if((status = cm_TrEmitterPosterior(cm, errbuf,      L, size_limit, mode, (cm->align_opts & CM_ALIGN_CHECKINOUT), post_mx, emit_mx)) != eslOK) return status;
1063     }
1064   }
1065   else {
1066     mode = preset_mode; /* this allows us to pass <mode> (not <preset_mode>) into cm_tr_alignT() below for all cases */
1067   }
1068 
1069   if(!do_sample) { /* if do_sample, we already have a parsetree */
1070     if((status = cm_tr_alignT(cm, errbuf, dsq, L, size_limit, mode, pass_idx, do_optacc, mx, shmx, emit_mx, &tr, &mode, (do_optacc) ? NULL : &sc)) != eslOK) return status;
1071   }
1072 
1073   if(have_ppstr || do_optacc) { /* call cm_PostCode to get average PP and optionally a PP string (if have_ppstr) */
1074     if((status = cm_TrPostCode(cm, errbuf, L, emit_mx, tr, (have_ppstr) ? &ppstr : NULL, &avgpp)) != eslOK) return status;
1075   }
1076 
1077   if (ret_ppstr  != NULL) *ret_ppstr  = ppstr; else if(ppstr != NULL) free(ppstr);
1078   if (ret_tr     != NULL) *ret_tr     = tr;    else if(tr    != NULL) FreeParsetree(tr);
1079   if (ret_mode   != NULL) *ret_mode   = mode;
1080   if (ret_avgpp  != NULL) *ret_avgpp  = avgpp;
1081   if (ret_sc     != NULL) *ret_sc     = (do_optacc) ? ins_sc : sc;
1082 
1083   ESL_DPRINTF1(("#DEBUG: returning from cm_TrAlign() sc : %f\n", sc));
1084   return eslOK;
1085 }
1086 
1087 /* Function: cm_TrAlignHB()
1088  * Incept:   EPN, Thu Sep  8 08:55:26 2011
1089  *           EPN, Fri Oct 26 09:31:43 2007 [FastAlignHB()]
1090  *
1091  * Purpose: Wrapper for the cm_tr_alignT_hb() routine - solve a full
1092  *           alignment problem using trCYK, truncated optimal accuracy
1093  *           or sampling and return the traceback and the score,
1094  *           without dividing & conquering, but by using bands on the
1095  *           j and d dimensions of the DP matrix.  Bands derived by
1096  *           HMM Forward/Backward runs. Optionally return a posterior
1097  *           code string.
1098  *
1099  *           Identical to cm_TrAlign() but HMM bands are used here.
1100  *           See that function's 'Purpose' for more details.
1101  *
1102  * Args:     cm          - the covariance model
1103  *           errbuf      - char buffer for reporting errors
1104  *           dsq         - the digitized sequence, 1..L
1105  *           L           - length of sequence
1106  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
1107  *           preset_mode - the alignment mode to enforce, if TRMODE_UNKNOWN we determine mode here
1108  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
1109  *           do_optacc   - TRUE to not do CYK alignment, determine the Holmes/Durbin optimally
1110  *                         accurate parsetree in ret_tr, requires post_mx != NULL
1111  *           do_sample   - TRUE to sample a parsetree from the Inside matrix
1112  *           mx          - the main dp matrix, only cells within bands in cm->cp9b will be valid.
1113  *           shmx        - the HMM banded shadow matrix to fill in and traceback, same cells as mx are valid.
1114  *           post_mx     - dp matrix for posterior calculation, can be NULL only if !do_optacc
1115  *           emit_mx     - emit matrix to fill
1116  *           r           - source of randomness, must be non-NULL only if do_sample==TRUE
1117  *           ret_ppstr   - RETURN: posterior code 1, (pass NULL if not wanted, must be NULL if post_mx == NULL)
1118  *           ret_ins_sc  - RETURN: if(do_optacc || ret_ppstr != NULL): inside score of sequence in bits
1119  *                                 else: should be NULL (inside will not be run)
1120  *           ret_tr      - RETURN: traceback (pass NULL if trace isn't wanted)
1121  *           ret_mode    - RETURN: mode of ret_tr, will be <preset_mode> unless that was TRMODE_UNKNOWN
1122  *           ret_sc      - RETURN: score of the alignment in bits (Inside score if do_optacc)
1123  *
1124  * Returns: <ret_tr>, <ret_ppstr>, <ret_sc>, see 'Args' section
1125  *
1126  * Returns: <eslOK> on success
1127  *
1128  * Throws:  <eslEINVAL> on contract violation
1129  *          <eslERANGE> if required CM_TR_HB_MX for Inside/Outside/CYK/Posterior exceeds <size_limit>
1130  */
1131 int
cm_TrAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,int do_optacc,int do_sample,CM_TR_HB_MX * mx,CM_TR_HB_SHADOW_MX * shmx,CM_TR_HB_MX * post_mx,CM_TR_HB_EMIT_MX * emit_mx,ESL_RANDOMNESS * r,char ** ret_ppstr,Parsetree_t ** ret_tr,char * ret_mode,float * ret_avgpp,float * ret_sc)1132 cm_TrAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
1133 	     int do_optacc, int do_sample, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_MX *post_mx,
1134 	     CM_TR_HB_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr,
1135 	     char *ret_mode, float *ret_avgpp, float *ret_sc)
1136 {
1137   int          status;
1138   Parsetree_t *tr = NULL;
1139   float        sc     = 0.;
1140   float        avgpp  = 0.;
1141   float        ins_sc = 0.;
1142   int          do_post;
1143   char        *ppstr = NULL;
1144   int          have_ppstr;
1145   char         mode = TRMODE_UNKNOWN;  /* mode of tr, <ret_mode> set as this */
1146 
1147   have_ppstr = (ret_ppstr != NULL)       ? TRUE : FALSE;
1148   do_post    = (do_optacc || have_ppstr) ? TRUE : FALSE;
1149 
1150   /* Contract check */
1151   if(do_optacc && do_sample)         ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrAlignHB(), do_optacc and do_sample are both TRUE.");
1152   if(do_optacc && post_mx == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrAlignHB(), do_optacc is TRUE, but post_mx == NULL.\n");
1153   if(do_sample && r       == NULL)   ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrAlignHB(), do_sample but r is NULL.");
1154 
1155   /* if do_post, fill Inside, Outside, Posterior matrices, in that order */
1156   /* if do_sample (and !do_post) fill Inside and sample from it */
1157   if(do_post || do_sample) {
1158     if((status = cm_TrInsideAlignHB (cm, errbuf, dsq, L, size_limit, preset_mode, pass_idx, mx, &mode, &ins_sc)) != eslOK) return status;
1159     /* mode will equal preset_mode unless preset_mode is TRMODE_UNKNOWN, in which case it will be mode that gives max inside score  */
1160     if(do_sample) {
1161       if((status = cm_TrStochasticParsetreeHB(cm, errbuf, dsq, L, preset_mode, pass_idx, mx, r, &tr, &mode, &sc)) != eslOK) return status;
1162       /* mode may be changed if preset_mode is TRMODE_UNKNOWN, else it will equal preset mode */
1163     }
1164     if(do_post) { /* Inside was called above, now do Outside, then Posterior, then EmitterPosterior */
1165       if((status = cm_TrOutsideAlignHB    (cm, errbuf, dsq, L, size_limit, mode, pass_idx, (cm->align_opts & CM_ALIGN_CHECKINOUT), post_mx, mx)) != eslOK) return status;
1166       if((status = cm_TrPosteriorHB       (cm, errbuf,      L, size_limit, mode, mx, post_mx, post_mx)) != eslOK) return status;
1167       if((status = cm_TrEmitterPosteriorHB(cm, errbuf,      L, size_limit, mode, (cm->align_opts & CM_ALIGN_CHECKINOUT), post_mx, emit_mx)) != eslOK) return status;
1168     }
1169   }
1170   else {
1171     mode = preset_mode; /* this allows us to pass <mode> (not <preset_mode>) into cm_tr_alignT() below for all cases */
1172   }
1173 
1174   if(!do_sample) { /* if do_sample, we already have a parsetree */
1175     if((status = cm_tr_alignT_hb(cm, errbuf, dsq, L, size_limit, mode, pass_idx, do_optacc, mx, shmx, emit_mx, &tr, &mode, (do_optacc) ? NULL : &sc)) != eslOK) return status;
1176   }
1177 
1178   if(have_ppstr || do_optacc) {
1179     if((status = cm_TrPostCodeHB(cm, errbuf, L, emit_mx, tr, (have_ppstr) ? &ppstr : NULL, &avgpp)) != eslOK) return status;
1180   }
1181 
1182 #if eslDEBUGLEVEL >= 2
1183   /* Uncomment to dump emitmap and parsetree */
1184   /* CMEmitMap_t *emap; */
1185   /* emap = CreateEmitMap(cm); */
1186   /* DumpEmitMap(stdout, emap, cm); */
1187   /* FreeEmitMap(emap); */
1188   /* ParsetreeDump(stdout, tr, cm, dsq); */
1189 #endif
1190 
1191   if (ret_ppstr  != NULL) *ret_ppstr  = ppstr; else if(ppstr != NULL) free(ppstr);
1192   if (ret_tr     != NULL) *ret_tr     = tr;    else if(tr    != NULL) FreeParsetree(tr);
1193   if (ret_mode   != NULL) *ret_mode   = mode;
1194   if (ret_avgpp  != NULL) *ret_avgpp  = avgpp;
1195   if (ret_sc     != NULL) *ret_sc     = (do_optacc) ? ins_sc : sc;
1196 
1197   ESL_DPRINTF1(("#DEBUG: returning from cm_TrAlignHB() sc : %f\n", sc));
1198   return eslOK;
1199 }
1200 
1201 /* Function: cm_TrCYKInsideAlign()
1202  * based on cm_CYKInsideAlign()
1203  *
1204  * Date:     EPN, Fri Sep  9 15:35:06 2011
1205  *
1206  * Note:     Very similar to inside(), but slightly more efficient.
1207  *           Identical to cm_TrCYKInsideAlignHB() but HMM bands are not
1208  *           used.
1209  *
1210  * Purpose:  Perform trCYK alignment on a full sequence 1..L
1211  *           rooted at state 0. Very similar to cm_CYKInsideAlign()
1212  *           except we're doing truncated alignment and marginal
1213  *           alignment modes are possible.
1214  *
1215  *           The caller may already know the mode of the optimal
1216  *           alignment, passed in as <preset_mode>. This will happen if
1217  *           we're being called from within a search pipeline, for
1218  *           example. If the caller does not know the optimal mode yet
1219  *           (e.g. if we're being called for 'cmalign'), <preset_mode>
1220  *           will be TRMODE_UNKNOWN. In this case, we allow all modes.
1221  *
1222  *           The mode of the optimal parsetree is returned in <ret_mode>,
1223  *           it has score <ret_sc>.
1224  *
1225  *           We deal with truncated begins by keeping track of the
1226  *           optimal state that we could enter and account for the
1227  *           whole target sequence in each mode: {J,L,R,T}b = argmax_v
1228  *           {J,L,R,T}alpha_v(1,L) + log t_0(v), and
1229  *           {J,L,R,T}alpha[0][L][L] is the score for that. For the
1230  *           mode that gives the optimal alignment, <ret_b> is that
1231  *           mode's b and <ret_sc> is that modes alpha[0][L][L]
1232  *           sc. For example if Jalpha[0][L][L] is the optimal score,
1233  *           a local alignment into state Jb in joint marginal mode is
1234  *           optimal and <ret_b> = Jb and <ret_sc> = Jbsc.
1235  *
1236  *           All alignments must use truncated begins when computing
1237  *           truncated alignments. The penalty for the begin is
1238  *           different depending on if we're in local mode or not and
1239  *           what the value of <pass_idx> is.
1240  *
1241  * Args:     cm          - the model    [0..M-1]
1242  *           errbuf      - char buffer for reporting errors
1243  *           dsq         - the digitaized sequence [1..L]
1244  *           L           - length of target sequence, we align 1..L
1245  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
1246  *           preset_mode - the pre-determined alignment mode, or TRMODE_UNKNOWN to allow any mode
1247  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
1248  *           mx          - dp matrix
1249  *           shmx        - shadow matrix
1250  *           ret_b       - RETURN: best internal entry state for optimal mode, if local begins are on
1251  *           ret_mode    - RETURN: mode of optimal CYK parsetree (TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T)
1252  *           ret_sc      - RETURN: score of optimal, CYK parsetree in any mode (max of mx->{J,L,R,T}alpha[0][L][L])
1253  *
1254  * Returns:  <eslOK> on success.
1255  *
1256  * Throws:   <eslERANGE> if required mx or shmx size exceeds <size_limit>
1257  *           In this case alignment has been aborted, <ret_*> variables are not valid
1258  */
1259 int
cm_TrCYKInsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,CM_TR_MX * mx,CM_TR_SHADOW_MX * shmx,int * ret_b,char * ret_mode,float * ret_sc)1260 cm_TrCYKInsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
1261 		    CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, int *ret_b, char *ret_mode, float *ret_sc)
1262 {
1263   int      status;          /* easel status code */
1264   int      v,y,z;	    /* indices for states  */
1265   int      j,d,i,k;	    /* indices in sequence dimensions */
1266   float    sc;		    /* a temporary variable holding a score */
1267   int      yoffset;	    /* y=base+offset -- counter in child states that v can transit to */
1268   float   *el_scA;          /* [0..d..W-1] probability of local end emissions of length d */
1269   int      sd;              /* StateDelta(cm->sttype[v]) */
1270   int      sdl;             /* StateLeftDelta(cm->sttype[v] */
1271   int      sdr;             /* StateRightDelta(cm->sttype[v] */
1272   int      j_sdr;           /* j - sdr */
1273   int      d_sd;            /* d - sd */
1274   int      d_sdl;           /* d - sdl */
1275   int      d_sdr;           /* d - sdr */
1276   float    tsc;             /* a transition score */
1277 
1278   /* other variables used in truncated version, but not standard version (not in cm_CYKInsideAlign()) */
1279   int   b, Jb, Lb, Rb, Tb;      /* local entry state rooting overall and {J,L,R,T} optimal parsetrees using */
1280   char  mode = TRMODE_UNKNOWN;  /* truncation mode for obtaining optimal score <ret_sc> */
1281   int   Lyoffset0;              /* first yoffset to use for updating L matrix in IR/MR states, 1 if IR, 0 if MR */
1282   int   Ryoffset0;              /* first yoffset to use for updating R matrix in IL/ML states, 1 if IL, 0 if ML */
1283   int   fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
1284   int   pty_idx;                /* index for truncation penalty, determined by pass_idx */
1285   float trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
1286 
1287   /* the DP matrix */
1288   float ***Jalpha  = mx->Jdp; /* pointer to the Jalpha DP matrix */
1289   float ***Lalpha  = mx->Ldp; /* pointer to the Lalpha DP matrix */
1290   float ***Ralpha  = mx->Rdp; /* pointer to the Ralpha DP matrix */
1291   float ***Talpha  = mx->Tdp; /* pointer to the Talpha DP matrix */
1292 
1293   char  ***Jyshadow = shmx->Jyshadow; /* pointer to the Jyshadow matrix */
1294   char  ***Lyshadow = shmx->Lyshadow; /* pointer to the Lyshadow matrix */
1295   char  ***Ryshadow = shmx->Ryshadow; /* pointer to the Ryshadow matrix */
1296   int   ***Jkshadow = shmx->Jkshadow; /* pointer to the Jkshadow matrix */
1297   int   ***Lkshadow = shmx->Lkshadow; /* pointer to the Lkshadow matrix */
1298   int   ***Rkshadow = shmx->Rkshadow; /* pointer to the Rkshadow matrix */
1299   int   ***Tkshadow = shmx->Tkshadow; /* pointer to the Tkshadow matrix */
1300   char  ***Lkmode   = shmx->Lkmode;   /* pointer to the Lkmode matrix */
1301   char  ***Rkmode   = shmx->Rkmode;   /* pointer to the Rkmode matrix */
1302 
1303   /* Determine which matrices we need to fill in, based on <preset_mode>, if TRMODE_UNKNOWN, fill_L, fill_R, fill_T will all be set as TRUE */
1304   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrCYKInsideAlign(), bogus mode: %d", preset_mode);
1305 
1306   /* Determine the truncation penalty index, from the pass_idx */
1307   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrCYKInsideAlign(), unexpected pass idx: %d", pass_idx);
1308 
1309   /* Allocations and initializations  */
1310   Jb   = Lb   = Rb   = Tb   = b = 0; /* will be unchanged if local begins are off, *ret_b will be set as 0 */
1311 
1312   /* grow the matrices based on the current sequence and bands */
1313   if((status = cm_tr_mx_GrowTo       (cm, mx,   errbuf, L, size_limit)) != eslOK) return status;
1314   if((status = cm_tr_shadow_mx_GrowTo(cm, shmx, errbuf, L, size_limit)) != eslOK) return status;
1315 
1316   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
1317   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
1318   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
1319 
1320   /* initialize all cells of the matrix to IMPOSSIBLE */
1321   if(  mx->Jncells_valid   > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
1322   if(  mx->Lncells_valid   > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
1323   if(  mx->Rncells_valid   > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
1324   if(  mx->Tncells_valid   > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
1325   if(shmx->Jy_ncells_valid > 0)           for(i = 0; i < shmx->Jy_ncells_valid; i++) shmx->Jyshadow_mem[i] = USED_EL;
1326   if(shmx->Ly_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Ly_ncells_valid; i++) shmx->Lyshadow_mem[i] = USED_EL;
1327   if(shmx->Ry_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Ry_ncells_valid; i++) shmx->Ryshadow_mem[i] = USED_EL;
1328   /* for B states, shadow matrix holds k, length of right fragment, this will almost certainly be overwritten */
1329   if(shmx->Jk_ncells_valid > 0)           esl_vec_ISet(shmx->Jkshadow_mem, shmx->Jk_ncells_valid, 0);
1330   if(shmx->Lk_ncells_valid > 0 && fill_L) esl_vec_ISet(shmx->Lkshadow_mem, shmx->Lk_ncells_valid, 0);
1331   if(shmx->Rk_ncells_valid > 0 && fill_R) esl_vec_ISet(shmx->Rkshadow_mem, shmx->Rk_ncells_valid, 0);
1332   if(shmx->Tk_ncells_valid > 0 && fill_T) esl_vec_ISet(shmx->Tkshadow_mem, shmx->Tk_ncells_valid, 0);
1333   if(shmx->Lk_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Lk_ncells_valid; i++) shmx->Lkmode_mem[i] = TRMODE_J;
1334   if(shmx->Rk_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Rk_ncells_valid; i++) shmx->Rkmode_mem[i] = TRMODE_J;
1335 
1336   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores */
1337   if(cm->flags & CMH_LOCAL_END) {
1338     for (j = 0; j <= L; j++) {
1339       for (d = 0;  d <= j; d++) {
1340 	Jalpha[cm->M][j][d] = el_scA[d];
1341       }
1342     }
1343     if(fill_L) {
1344       for (j = 0; j <= L; j++) {
1345 	for (d = 0;  d <= j; d++) {
1346 	  Lalpha[cm->M][j][d] = el_scA[d];
1347 	}
1348       }
1349     }
1350     if(fill_R) {
1351       for (j = 0; j <= L; j++) {
1352 	for (d = 0;  d <= j; d++) {
1353 	  Ralpha[cm->M][j][d] = el_scA[d];
1354 	}
1355       }
1356     }
1357   }
1358 
1359   /* Main recursion */
1360   for (v = cm->M-1; v > 0; v--) { /* almost to ROOT_S, we handle that differently */
1361     float const *esc_v = cm->oesc[v]; /* emission scores for state v */
1362     float const *tsc_v = cm->tsc[v];  /* transition scores for state v */
1363     float const *lmesc_v = cm->lmesc[v]; /* marginal left  emission scores for state v */
1364     float const *rmesc_v = cm->rmesc[v]; /* marginal right emission scores for state v */
1365     sd   = StateDelta(cm->sttype[v]);
1366     sdl  = StateLeftDelta(cm->sttype[v]);
1367     sdr  = StateRightDelta(cm->sttype[v]);
1368 
1369     /* re-initialize the J, L and R deck if we can do a local end from v */
1370     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
1371       for (j = 0; j <= L; j++) {
1372 	for (d = sd; d <= j; d++) {
1373 	  Jalpha[v][j][d] = el_scA[d-sd] + cm->endsc[v];
1374 	}
1375       }
1376       if(fill_L) {
1377 	for (j = 0; j <= L; j++) {
1378 	  for (d = sdl; d <= j; d++) {
1379 	    Lalpha[v][j][d] = el_scA[d-sdl] + cm->endsc[v];
1380 	  }
1381 	}
1382       }
1383       if(fill_R) {
1384 	for (j = 0; j <= L; j++) {
1385 	  for (d = sdr; d <= j; d++) {
1386 	    Ralpha[v][j][d] = el_scA[d-sdr] + cm->endsc[v];
1387 	  }
1388 	}
1389       }
1390     }
1391     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
1392 
1393     if(cm->sttype[v] == E_st) {
1394       for (j = 0; j <= L; j++) {
1395 	Jalpha[v][j][0] = 0.;
1396 	if(fill_L) Lalpha[v][j][0] = 0.;
1397 	if(fill_R) Ralpha[v][j][0] = 0.;
1398 	/* rest of deck remains IMPOSSIBLE */
1399       }
1400     }
1401     else if(cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) {
1402       /* update alpha[v][j][d] cells, for IL states, loop nesting order is:
1403        * for j { for d { for y { } } } because they can self transit, and a
1404        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1405        * before can start calc'ing for alpha[v][j][d+1]
1406        * We do ML states as well as IL states b/c they follow the same rules,
1407        * and we're not worried about efficiency here.
1408        */
1409 
1410       /* In TrCYK: we need to treat R differently from and J and L
1411        * here, by doing separate 'for (yoffset...' loops for J and R
1412        * because we have to fully calculate Jalpha[v][j][d]) before we
1413        * can start to calculate Ralpha[v][j][d].
1414        */
1415 
1416       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
1417 	Ryoffset0 = cm->sttype[v] == IL_st ? 1 : 0; /* don't allow IL self transits in R mode */
1418 	for (j = sdr; j <= L; j++) {
1419 	  j_sdr = j - sdr;
1420 	  for (d = sd; d <= j; d++) {
1421 	    d_sd = d - sd;
1422 	    i    = j - d + 1;
1423 	    for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
1424 	      y = cm->cfirst[v] + yoffset;
1425 	      if ((sc = Jalpha[y][j_sdr][d_sd] + tsc_v[yoffset]) > Jalpha[v][j][d]) {
1426 		Jalpha[v][j][d]   = sc;
1427 		Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1428 	      }
1429 	      if (fill_L && (sc = Lalpha[y][j_sdr][d_sd] + tsc_v[yoffset]) > Lalpha[v][j][d]) {
1430 		Lalpha[v][j][d]   = sc;
1431 		Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
1432 	      }
1433 	    }
1434 	    Jalpha[v][j][d] += esc_v[dsq[i]];
1435 	    Jalpha[v][j][d]  = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
1436 	    if(fill_L) {
1437 	      if(d >= 2) {
1438 		Lalpha[v][j][d] += esc_v[dsq[i]];
1439 	      }
1440 	      else {
1441 		Lalpha[v][j][d]   = esc_v[dsq[i]];
1442 		Lyshadow[v][j][d] = USED_TRUNC_END;
1443 	      }
1444 	      Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
1445 	    }
1446 	    i--;
1447 
1448 	    /* handle R separately */
1449 	    if(fill_R) {
1450 	      /* note we use 'd', not 'd_sd' (which we used in the corresponding loop for J,L above) */
1451 	      for (yoffset = Ryoffset0; yoffset < cm->cnum[v]; yoffset++) { /* using Ryoffset0 instead of 0 disallows IL self transits in R mode */
1452 		y = cm->cfirst[v] + yoffset;
1453 		if ((sc = Jalpha[y][j_sdr][d] + tsc_v[yoffset]) > Ralpha[v][j][d]) {
1454 		  Ralpha[v][j][d] = sc;
1455 		  Ryshadow[v][j][d]= yoffset + TRMODE_J_OFFSET;
1456 		}
1457 		if ((sc = Ralpha[y][j_sdr][d] + tsc_v[yoffset]) > Ralpha[v][j][d]) {
1458 		  Ralpha[v][j][d] = sc;
1459 		  Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
1460 		}
1461 	      }
1462 	      Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
1463 	    }
1464 	  }
1465 	}
1466       } /* end of if(! StateIsDetached(cm,v )) */
1467     }
1468     else if(cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) {
1469       /* update alpha[v][j][d] cells, for IR states, loop nesting order is:
1470        * for j { for d { for y { } } } because they can self transit, and a
1471        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
1472        * before can start calc'ing for alpha[v][j][d+1].
1473        * We do MR states as well as IR states b/c they follow the same rules,
1474        * and we're not worried about efficiency here.
1475        */
1476 
1477       /* In TrCYK: we need to treat L differently from and J and R
1478        * here, by doing separate 'for (yoffset...' loops for J and R
1479        * because we have to fully calculate Jalpha[v][j][d]) before we
1480        * can start to calculate Lalpha[v][j][d].
1481        */
1482 
1483       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
1484 	Lyoffset0 = cm->sttype[v] == IR_st ? 1 : 0; /* don't allow IR self transits in L mode */
1485 	for (j = sdr; j <= L; j++) {
1486 	  j_sdr = j - sdr;
1487 	  for (d = sd; d <= j; d++) {
1488 	    d_sd = d - sd;
1489 	    i = j - d + 1;
1490 	    for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
1491 	      y = cm->cfirst[v] + yoffset;
1492 	      if ((sc = Jalpha[y][j_sdr][d_sd] + tsc_v[yoffset]) > Jalpha[v][j][d]) {
1493 		Jalpha[v][j][d]   = sc;
1494 		Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1495 	      }
1496 	      if (fill_R && (sc = Ralpha[y][j_sdr][d_sd] + tsc_v[yoffset]) > Ralpha[v][j][d]) {
1497 		Ralpha[v][j][d]   = sc;
1498 		Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
1499 	      }
1500 	    }
1501 	    Jalpha[v][j][d] += esc_v[dsq[j]];
1502 	    Jalpha[v][j][d]  = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
1503 	    if(fill_R) {
1504 	      if(d >= 2) {
1505 		Ralpha[v][j][d] += esc_v[dsq[j]];
1506 	      }
1507 	      else {
1508 		Ralpha[v][j][d]   = esc_v[dsq[j]];
1509 		Ryshadow[v][j][d] = USED_TRUNC_END;
1510 	      }
1511 	      Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
1512 	    }
1513 
1514 	    /* handle L separately */
1515 	    if(fill_L) {
1516 	      /* note we use 'j' and 'd', not 'j_sdr' and 'd_sd' (which we used in the corresponding loop for J,R above) */
1517 	      for (yoffset = Lyoffset0; yoffset < cm->cnum[v]; yoffset++) { /* using Lyoffset0, instead of 0 disallows IR self transits in L mode */
1518 		y = cm->cfirst[v] + yoffset;
1519 		if ((sc = Jalpha[y][j][d] + tsc_v[yoffset]) > Lalpha[v][j][d]) {
1520 		  Lalpha[v][j][d] = sc;
1521 		  Lyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1522 		}
1523 		if ((sc = Lalpha[y][j][d] + tsc_v[yoffset]) > Lalpha[v][j][d]) {
1524 		  Lalpha[v][j][d] = sc;
1525 		  Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
1526 		}
1527 	      }
1528 	      Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
1529 	    }
1530 	  }
1531 	}
1532       } /* end of if(! StateIsDetached(cm, v)) */
1533     }
1534     else if(cm->sttype[v] == MP_st) {
1535       /* MP states cannot self transit, this means that all cells in
1536        * alpha[v] are independent of each other, only depending on
1537        * alpha[y] for previously calc'ed y.  We can do the for loops
1538        * in any nesting order, this implementation does what I think
1539        * is most efficient: for y { for j { for d { } } }
1540        */
1541       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
1542 	yoffset = y - cm->cfirst[v];
1543 	tsc = tsc_v[yoffset];
1544 
1545 	for (j = sdr; j <= L; j++) {
1546 	  j_sdr = j - sdr;
1547 
1548 	  for (d = sd; d <= j; d++) { /* sd == 2 for MP state */
1549 	    d_sd = d-sd;
1550 	    if((sc = Jalpha[y][j_sdr][d_sd] + tsc) > Jalpha[v][j][d]) {
1551 	      Jalpha[v][j][d]   = sc;
1552 	      Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1553 	    }
1554 	  }
1555 	  if(fill_L) {
1556 	    /* note we use 'j' and 'd_sdl' not 'j_sdr' for 'd_sd' for L, plus minimum d is sdl (1) */
1557 	    for (d = sdl; d <= j; d++) { /* sdl == 1 for MP state */
1558 	      d_sdl = d-sdl;
1559 	      if((sc = Jalpha[y][j][d_sdl] + tsc) > Lalpha[v][j][d]) {
1560 		Lalpha[v][j][d]   = sc;
1561 		Lyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1562 	      }
1563 	      if((sc = Lalpha[y][j][d_sdl] + tsc) > Lalpha[v][j][d]) {
1564 		Lalpha[v][j][d]   = sc;
1565 		Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
1566 	      }
1567 	    }
1568 	  }
1569 	  if(fill_R) {
1570 	    /* note we use 'd_sdr' not 'd_sd' for R, plus minimum d is sdr (1) */
1571 	    for (d = sdr; d <= j; d++) { /* sdr == 1 for MP state */
1572 	      d_sdr = d - sdr;
1573 	      if((sc = Jalpha[y][j_sdr][d_sdr] + tsc) > Ralpha[v][j][d]) {
1574 		Ralpha[v][j][d]   = sc;
1575 		Ryshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1576 	      }
1577 	      if((sc = Ralpha[y][j_sdr][d_sdr] + tsc) > Ralpha[v][j][d]) {
1578 		Ralpha[v][j][d]   = sc;
1579 		Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
1580 	      }
1581 	    }
1582 	  }
1583 	}
1584       }
1585       /* add in emission score */
1586       for (j = 0; j <= L; j++) {
1587 	i = j;
1588 	Jalpha[v][j][1] = IMPOSSIBLE;
1589 	if(fill_L) {
1590 	  Lalpha[v][j][1] = lmesc_v[dsq[i]];
1591 	  Lyshadow[v][j][1] = USED_TRUNC_END;
1592 	}
1593 	if(fill_R) {
1594 	  Ralpha[v][j][1] = rmesc_v[dsq[j]];
1595 	  Ryshadow[v][j][1] = USED_TRUNC_END;
1596 	}
1597 	i--;
1598 	for (d = 2; d <= j; d++) {
1599 	  Jalpha[v][j][d] += esc_v[dsq[i]*cm->abc->Kp+dsq[j]];
1600 	  if(fill_L) Lalpha[v][j][d] += lmesc_v[dsq[i]];
1601 	  if(fill_R) Ralpha[v][j][d] += rmesc_v[dsq[j]];
1602 	  i--;
1603 	}
1604       }
1605       /* ensure all cells are >= IMPOSSIBLE */
1606       for (j = 0; j <= L; j++) {
1607 	for (d = 1; d <= j; d++) {
1608 	  Jalpha[v][j][d] = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
1609 	  if(fill_L) Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
1610 	  if(fill_R) Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
1611 	}
1612       }
1613     }
1614     else if(cm->sttype[v] != B_st) { /* entered if state v is D or S */
1615       /* D, S states cannot self transit, this means that all cells in
1616        * alpha[v] are independent of each other, only depending on
1617        * alpha[y] for previously calc'ed y.  We can do the for loops
1618        * in any nesting order, this implementation does what I think
1619        * is most efficient: for y { for j { for d { } } }
1620        */
1621       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
1622 	yoffset = y - cm->cfirst[v];
1623 	tsc = tsc_v[yoffset];
1624 	for (j = sdr; j <= L; j++) {
1625 	  j_sdr = j - sdr;
1626 
1627 	  for (d = sd; d <= j; d++) {
1628 	    d_sd = d-sd;
1629 	    if((sc = Jalpha[y][j_sdr][d_sd] + tsc) > Jalpha[v][j][d]) {
1630 	      Jalpha[v][j][d]   = sc;
1631 	      Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
1632 	    }
1633 	    if(fill_L && (sc = Lalpha[y][j_sdr][d_sd] + tsc) > Lalpha[v][j][d]) {
1634 	      Lalpha[v][j][d]   = sc;
1635 	      Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
1636 	    }
1637 	    if(fill_R && (sc = Ralpha[y][j_sdr][d_sd] + tsc) > Ralpha[v][j][d]) {
1638 	      Ralpha[v][j][d]   = sc;
1639 	      Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
1640 	    }
1641 	  }
1642 	  /* an easy to overlook case: if d == 0, ensure L and R values are IMPOSSIBLE */
1643 	  if(fill_L) Lalpha[v][j][0] = IMPOSSIBLE;
1644 	  if(fill_R) Ralpha[v][j][0] = IMPOSSIBLE;
1645 	  /* And another special case for BEGL_S and BEGR_S states,
1646 	   * reset shadow matrix values for d == 0 (which were
1647 	   * initialized to USED_EL above), even though the score of
1648 	   * these cells is impossible we may use them as a
1649 	   * zero-length left or right half of a BIF_B subtree during
1650 	   * construction of the parsetree.
1651 	   */
1652 	  if(cm->sttype[v] == S_st) {
1653 	    if(fill_L) Lyshadow[v][j][0] = USED_TRUNC_END;
1654 	    if(fill_R) Ryshadow[v][j][0] = USED_TRUNC_END;
1655 	  }
1656 	}
1657       }
1658       /* no emission score to add */
1659     }
1660     else { /* B_st */
1661       assert(cm->sttype[v] == B_st);
1662       y = cm->cfirst[v]; /* left  subtree */
1663       z = cm->cnum[v];   /* right subtree */
1664 
1665       for (j = 0; j <= L; j++) {
1666 	for (d = 0; d <= j; d++) {
1667 	  for (k = 0; k <= d; k++) {
1668 	    if((sc = Jalpha[y][j-k][d-k] + Jalpha[z][j][k]) > Jalpha[v][j][d]) {
1669 	      Jalpha[v][j][d]   = sc;
1670 	      Jkshadow[v][j][d] = k;
1671 	    }
1672 	    if(fill_L && (sc = Jalpha[y][j-k][d-k] + Lalpha[z][j][k]) > Lalpha[v][j][d]) {
1673 	      Lalpha[v][j][d]   = sc;
1674 	      Lkshadow[v][j][d] = k;
1675 	      Lkmode[v][j][d]   = TRMODE_J;
1676 	    }
1677 	    if(fill_R && (sc = Ralpha[y][j-k][d-k] + Jalpha[z][j][k]) > Ralpha[v][j][d]) {
1678 	      Ralpha[v][j][d]   = sc;
1679 	      Rkshadow[v][j][d] = k;
1680 	      Rkmode[v][j][d]   = TRMODE_J;
1681 	    }
1682 	  }
1683 	  if(fill_T) {
1684 	    for(k = 1; k < d; k++) { /* special boundary case for T matrix */
1685 	      if(fill_T && (sc = Ralpha[y][j-k][d-k] + Lalpha[z][j][k]) > Talpha[v][j][d]) {
1686 		Talpha[v][j][d]   = sc;
1687 		Tkshadow[v][j][d] = k;
1688 	      }
1689 	    }
1690 	  }
1691 	  /* two additional special cases in trCYK (these are not in standard CYK) */
1692 	  /* special case 1: k == 0 (full sequence aligns to BEGL_S left child */
1693 	  if(fill_L) {
1694 	    if((sc = Jalpha[y][j][d]) > Lalpha[v][j][d]) {
1695 	      Lalpha[v][j][d]   = sc;
1696 	      Lkshadow[v][j][d] = 0; /* k == 0 for this case, full sequence is on left */
1697 	      Lkmode[v][j][d]   = TRMODE_J;
1698 	    }
1699 	    if((sc = Lalpha[y][j][d]) > Lalpha[v][j][d]) {
1700 	      Lalpha[v][j][d]   = sc;
1701 	      Lkshadow[v][j][d] = 0; /* k == 0 for this case, full sequence is on left */
1702 	      Lkmode[v][j][d]   = TRMODE_L;
1703 	    }
1704 	  }
1705 	  /* special case 2: k == d (full sequence aligns to BEGR_S right child */
1706 	  if(fill_R) {
1707 	    if((sc = Jalpha[z][j][d]) > Ralpha[v][j][d]) {
1708 	      Ralpha[v][j][d]   = sc;
1709 	      Rkshadow[v][j][d] = d; /* k == d in this case, full sequence is on right */
1710 	      Rkmode[v][j][d]   = TRMODE_J;
1711 	    }
1712 	    if((sc = Ralpha[z][j][d]) > Ralpha[v][j][d]) {
1713 	      Ralpha[v][j][d]   = sc;
1714 	      Rkshadow[v][j][d] = d; /* k == d in this case, full sequence is on right */
1715 	      Rkmode[v][j][d]   = TRMODE_R;
1716 	    }
1717 	  }
1718 	}
1719       }
1720     } /* end of B_st recursion */
1721 
1722     /* Now handle from ROOT_S, state 0. So far we haven't touched
1723      * the {J,L,R,T}alpha[0] decks at all since initialization and here
1724      * we'll only update at most 1 cell in each, the one pertaining
1725      * to a full alignment [0][L][L].
1726      *
1727      * In truncated alignment the only way out of ROOT_S in local or
1728      * global mode is via a 'truncated begin' with a score (penalty)
1729      * from cm->trp into any emitting state. The penalty was
1730      * calculated in cm_tr_penalties_Create() and differs depending on
1731      * whether we are in local or global mode and the value of
1732      * 'pty_idx' which was passed in.
1733      */
1734     trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
1735     if(NOT_IMPOSSIBLE(trpenalty)) {
1736       /* check if we have a new optimally scoring Joint alignment in J matrix */
1737       sc = Jalpha[v][L][L] + trpenalty;
1738       if (sc > Jalpha[0][L][L]) {
1739 	Jalpha[0][L][L] = sc;
1740 	Jb = v;
1741       }
1742       /* check if we have a new optimally scoring Left alignment in L matrix */
1743       if(fill_L) {
1744 	sc = Lalpha[v][L][L] + trpenalty;
1745 	if (sc > Lalpha[0][L][L]) {
1746 	  Lalpha[0][L][L] = sc;
1747 	  Lb = v;
1748 	}
1749       }
1750       /* check if we have a new optimally scoring Right alignment in R matrix */
1751       if(fill_R) {
1752 	sc = Ralpha[v][L][L] + trpenalty;
1753 	if (sc > Ralpha[0][L][L]) {
1754 	  Ralpha[0][L][L] = sc;
1755 	  Rb = v;
1756 	}
1757       }
1758       /* check if we have a new optimally scoring Terminal alignment in T matrix */
1759       if(fill_T && cm->sttype[v] == B_st) {
1760 	sc = Talpha[v][L][L] + trpenalty;
1761 	if (sc > Talpha[0][L][L]) {
1762 	  Talpha[0][L][L] = sc;
1763 	  Tb = v;
1764 	}
1765       }
1766     }
1767   } /* end loop for (v = cm->M-1; v > 0; v--) */
1768 
1769   /* all valid alignments must use a truncated begin */
1770   Jyshadow[0][L][L] = USED_TRUNC_BEGIN;
1771   if(fill_L) Lyshadow[0][L][L] = USED_TRUNC_BEGIN;
1772   if(fill_R) Ryshadow[0][L][L] = USED_TRUNC_BEGIN;
1773   /* Tyshadow[0] doesn't exist, caller must know how to deal */
1774 
1775   /* determine mode of optimal alignment, if it was preset then use that */
1776   if(preset_mode == TRMODE_J) {
1777     sc   = Jalpha[0][L][L];
1778     mode = TRMODE_J;
1779     b    = Jb;
1780   }
1781   else if(preset_mode == TRMODE_L) {
1782     sc   = Lalpha[0][L][L];
1783     mode = TRMODE_L;
1784     b    = Lb;
1785   }
1786   else if(preset_mode == TRMODE_R) {
1787     sc   = Ralpha[0][L][L];
1788     mode = TRMODE_R;
1789     b    = Rb;
1790   }
1791   else if(preset_mode == TRMODE_T) {
1792     sc   = Talpha[0][L][L];
1793     mode = TRMODE_T;
1794     b    = Tb;
1795   }
1796   else { /* preset_mode was unknown, max score determines mode */
1797     sc   = Jalpha[0][L][L];
1798     mode = TRMODE_J;
1799     b    = Jb;
1800     if (fill_L && Lalpha[0][L][L] > sc) {
1801       sc   = Lalpha[0][L][L];
1802       mode = TRMODE_L;
1803       b    = Lb;
1804     }
1805     if (fill_R && Ralpha[0][L][L] > sc) {
1806       sc   = Ralpha[0][L][L];
1807       mode = TRMODE_R;
1808       b    = Rb;
1809     }
1810     if (fill_T && Talpha[0][L][L] > sc) {
1811       sc   = Talpha[0][L][L];
1812       mode = TRMODE_T;
1813       b    = Tb;
1814     }
1815   }
1816 
1817 #if eslDEBUGLEVEL >= 2
1818   /* Uncomment to dump matrix to file. Careful...this could be very large. */
1819   /* FILE *fp1; fp1 = fopen("tmp.tru_cykmx", "w");   cm_tr_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
1820   /* FILE *fp2; fp2 = fopen("tmp.tru_cykshmx", "w"); cm_tr_shadow_mx_Dump(fp2, cm, shmx, preset_mode, TRUE); fclose(fp2); */
1821 #endif
1822 
1823   if(ret_b    != NULL) *ret_b    = b;
1824   if(ret_mode != NULL) *ret_mode = mode;
1825   if(ret_sc   != NULL) *ret_sc   = sc;
1826 
1827   free(el_scA);
1828 
1829   ESL_DPRINTF1(("#DEBUG: cm_TrCYKInsideAlign return sc: %f\n", sc));
1830   return eslOK;
1831 
1832  ERROR:
1833   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
1834 }
1835 
1836 /* Function: cm_TrCYKInsideAlignHB()
1837  *
1838  * Date:     EPN, Wed Sep  7 12:13:43 2011
1839  *
1840  * Purpose: Run the inside phase of a trCYK alignment using bands in
1841  *           the j and d dimensions of the DP matrix. Bands were
1842  *           obtained from an HMM Forward-Backward parse of the target
1843  *           sequence. Uses float log odds scores.
1844  *
1845  *           A CM_TR_HB_MX DP matrix must be passed in. Only cells
1846  *           valid within the bands given in the CP9Bands_t <cm->cp9b>
1847  *           will be valid.
1848  *
1849  *           Otherwise, the same as cm_TrCYKInsideAlign(), see that
1850  *           functions 'Purpose' for more information, including
1851  *           important caveats regarding handling local begins.
1852  *
1853  * Args:     cm          - the model    [0..M-1]
1854  *           errbuf      - char buffer for reporting errors
1855  *           dsq         - the digitaized sequence [1..L]
1856  *           L           - length of target sequence, we align 1..L
1857  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
1858  *           preset_mode - the pre-determined alignment mode, TRMODE_UNKNOWN to allow any mode
1859  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
1860  *           mx          - the dp matrix, only cells within bands in cm->cp9b will be valid.
1861  *           shmx        - the HMM banded shadow matrix to fill in, only cells within bands are valid
1862  *           ret_b       - RETURN: best internal entry state for optimal mode, if local begins are on
1863  *           ret_mode    - mode of optimal CYK parsetree (TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T)
1864  *           ret_sc      - score of optimal, CYK parsetree in any mode (max of mx->{J,L,R,T}alpha[0][L][L])
1865  *
1866  * Returns:  <eslOK> on success.
1867  *
1868  * Throws:   <eslERANGE>     if required mx or shmx size exceeds <size_limit>
1869  *           <eslEINVAL>     if the full sequence is not within the bands for state 0
1870  *           <eslEAMBIGUOUS> if no valid alignment is possible due to bands (score of sequence is IMPOSSIBLE)
1871  *           In any of these three cases, alignment has been aborted, ret variables are not valid.
1872  */
1873 int
cm_TrCYKInsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,CM_TR_HB_MX * mx,CM_TR_HB_SHADOW_MX * shmx,int * ret_b,char * ret_mode,float * ret_sc)1874 cm_TrCYKInsideAlignHB(CM_t *cm, char *errbuf,  ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
1875 		      CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, int *ret_b, char *ret_mode, float *ret_sc)
1876 {
1877   int      status;
1878   int      v,y,z;	/* indices for states  */
1879   int      j,d,i,k;	/* indices in sequence dimensions */
1880   float    sc;          /* temporary score */
1881   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
1882   int     *yvalidA;     /* [0..MAXCONNECT-1] TRUE if v->yoffset is legal transition (within bands) */
1883   float   *el_scA;      /* [0..d..W-1] probability of local end emissions of length d */
1884   int      sd;          /* StateDelta(cm->sttype[v]) */
1885   int      sdl;         /* StateLeftDelta(cm->sttype[v]) */
1886   int      sdr;         /* StateRightDelta(cm->sttype[v]) */
1887   int      j_sdr;       /* j - sdr */
1888 
1889   /* indices used for handling band-offset issues, and in the depths of the DP recursion */
1890   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
1891   int      jp_y_sdr;           /* jp_y - sdr */
1892   int      jn, jx;             /* current minimum/maximum j allowed */
1893   int      jpn, jpx;           /* minimum/maximum jp_v */
1894   int      dp_v, dp_y, dp_z;   /* d index for state v/y/z in alpha */
1895   int      dn, dx;             /* current minimum/maximum d allowed */
1896   int      dp_y_sd;            /* dp_y - sd */
1897   int      dp_y_sdr;           /* dp_y - sdr */
1898   int      dp_y_sdl;           /* dp_y - sdl */
1899   int      dpn, dpx;           /* minimum/maximum dp_v */
1900   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
1901   int      kn, kx;             /* current minimum/maximum k value */
1902   int      Lp;                 /* L index also changes depending on state */
1903   float    tsc;                /* a transition score */
1904   int      yvalid_idx;         /* for keeping track of which children are valid */
1905   int      yvalid_ct;          /* for keeping track of which children are valid */
1906   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
1907   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
1908 
1909   /* variables related to truncated alignment (not in cm_CYKInsideAlignHB() */
1910   int      b, Jb, Lb, Rb, Tb;      /* local entry state rooting overall and {J,L,R,T} optimal parsetrees using */
1911   char     mode = TRMODE_UNKNOWN;  /* truncation mode for obtaining optimal score <ret_sc> */
1912   int      fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
1913   int      do_J_v, do_J_y, do_J_z; /* must we fill J matrix deck for state v, y, z? */
1914   int      do_L_v, do_L_y, do_L_z; /* must we fill L matrix deck for state v, y, z? */
1915   int      do_R_v, do_R_y, do_R_z; /* must we fill R matrix deck for state v, y, z? */
1916   int      do_T_v;                 /* must we fill T matrix deck for state v? */
1917   int      pty_idx;                /* index for truncation penalty, determined by pass_idx */
1918   float    trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
1919 
1920   /* variables used for memory efficient bands */
1921   /* ptrs to cp9b info, for convenience */
1922   CP9Bands_t *cp9b = cm->cp9b;
1923   int     *jmin  = cp9b->jmin;
1924   int     *jmax  = cp9b->jmax;
1925   int    **hdmin = cp9b->hdmin;
1926   int    **hdmax = cp9b->hdmax;
1927 
1928   /* the DP matrix */
1929   float ***Jalpha  = mx->Jdp; /* pointer to the Jalpha DP matrix */
1930   float ***Lalpha  = mx->Ldp; /* pointer to the Lalpha DP matrix */
1931   float ***Ralpha  = mx->Rdp; /* pointer to the Ralpha DP matrix */
1932   float ***Talpha  = mx->Tdp; /* pointer to the Talpha DP matrix */
1933 
1934   char  ***Jyshadow = shmx->Jyshadow; /* pointer to the Jyshadow matrix */
1935   char  ***Lyshadow = shmx->Lyshadow; /* pointer to the Lyshadow matrix */
1936   char  ***Ryshadow = shmx->Ryshadow; /* pointer to the Ryshadow matrix */
1937   int   ***Jkshadow = shmx->Jkshadow; /* pointer to the Jkshadow matrix */
1938   int   ***Lkshadow = shmx->Lkshadow; /* pointer to the Lkshadow matrix */
1939   int   ***Rkshadow = shmx->Rkshadow; /* pointer to the Rkshadow matrix */
1940   int   ***Tkshadow = shmx->Tkshadow; /* pointer to the Tkshadow matrix */
1941   char  ***Lkmode   = shmx->Lkmode;   /* pointer to the Lkmode matrix */
1942   char  ***Rkmode   = shmx->Rkmode;   /* pointer to the Rkmode matrix */
1943 
1944   /* Determine which matrices we need to fill in, based on <preset_mode>, if TRMODE_UNKNOWN, fill_L, fill_R, fill_T will all be set as TRUE */
1945   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrCYKInsideAlignHB(), bogus mode: %d", preset_mode);
1946 
1947   /* Determine the truncation penalty index, from the pass_idx */
1948   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrCYKInsideAlignHB(), unexpected pass idx: %d", pass_idx);
1949 
1950   /* Allocations and initializations  */
1951   Jb   = Lb   = Rb   = Tb   = b = 0; /* will be unchanged if local begins are off, *ret_b will be set as 0 */
1952 
1953   /* ensure a full alignment to ROOT_S (v==0) is possible, remember In CYK <preset_mode> may be known or unknown */
1954   if (preset_mode == TRMODE_J && (! cp9b->Jvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): preset_mode is J mode, but cp9b->Jvalid[v] is FALSE");
1955   if (preset_mode == TRMODE_L && (! cp9b->Lvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): preset_mode is L mode, but cp9b->Lvalid[v] is FALSE");
1956   if (preset_mode == TRMODE_R && (! cp9b->Rvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): preset_mode is R mode, but cp9b->Rvalid[v] is FALSE");
1957   if (preset_mode == TRMODE_T && (! cp9b->Tvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): preset_mode is T mode, but cp9b->Tvalid[v] is FALSE");
1958   if (preset_mode == TRMODE_UNKNOWN && (! (cp9b->Jvalid[0] || cp9b->Lvalid[0] || cp9b->Rvalid[0] || cp9b->Tvalid[0]))) {
1959     ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): no marginal mode is allowed for state 0");
1960   }
1961   if (cp9b->jmin[0] > L || cp9b->jmax[0] < L)               ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
1962   jp_0 = L - jmin[0];
1963   if (cp9b->hdmin[0][jp_0] > L || cp9b->hdmax[0][jp_0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][jp_0], cp9b->hdmax[0][jp_0]);
1964   Lp_0 = L - hdmin[0][jp_0];
1965 
1966   /* grow the matrices based on the current sequence and bands */
1967   if((status = cm_tr_hb_mx_GrowTo       (cm,   mx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
1968   if((status = cm_tr_hb_shadow_mx_GrowTo(cm, shmx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
1969 
1970   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
1971   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
1972   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
1973 
1974   /* yvalidA[0..cnum[v]] will hold TRUE for states y for which a transition is legal
1975    * (some transitions are impossible due to the bands) */
1976   ESL_ALLOC(yvalidA, sizeof(int) * MAXCONNECT);
1977   esl_vec_ISet(yvalidA, MAXCONNECT, FALSE);
1978 
1979   /* initialize all cells of the matrix to IMPOSSIBLE, all cells of shadow matrix to USED_EL or USED_TRUNC_END */
1980   if(  mx->Jncells_valid   > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
1981   if(  mx->Lncells_valid   > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
1982   if(  mx->Rncells_valid   > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
1983   if(  mx->Tncells_valid   > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
1984   if(shmx->Jy_ncells_valid > 0)           for(i = 0; i < shmx->Jy_ncells_valid; i++) shmx->Jyshadow_mem[i] = USED_EL;
1985   if(shmx->Ly_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Ly_ncells_valid; i++) shmx->Lyshadow_mem[i] = USED_EL;
1986   if(shmx->Ry_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Ry_ncells_valid; i++) shmx->Ryshadow_mem[i] = USED_EL;
1987   /* for B states, shadow matrix holds k, length of right fragment, this will be overwritten */
1988   if(shmx->Jk_ncells_valid > 0)           esl_vec_ISet(shmx->Jkshadow_mem, shmx->Jk_ncells_valid, 0);
1989   if(shmx->Lk_ncells_valid > 0 && fill_L) esl_vec_ISet(shmx->Lkshadow_mem, shmx->Lk_ncells_valid, 0);
1990   if(shmx->Rk_ncells_valid > 0 && fill_R) esl_vec_ISet(shmx->Rkshadow_mem, shmx->Rk_ncells_valid, 0);
1991   if(shmx->Tk_ncells_valid > 0 && fill_T) esl_vec_ISet(shmx->Tkshadow_mem, shmx->Tk_ncells_valid, 0);
1992   if(shmx->Lk_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Lk_ncells_valid; i++) shmx->Lkmode_mem[i] = TRMODE_J;
1993   if(shmx->Rk_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Rk_ncells_valid; i++) shmx->Rkmode_mem[i] = TRMODE_J;
1994 
1995   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores,
1996    * Note: we could optimize by skipping this step and using el_scA[d] to
1997    * initialize ELs for each state in the first step of the main recursion
1998    * below. We fill in the EL deck here for completeness and so that
1999    * a check of this alpha matrix with a CYKOutside matrix will pass.
2000    */
2001   if(cm->flags & CMH_LOCAL_END) {
2002     if(cp9b->Jvalid[cm->M]) {
2003       for (j = 0; j <= L; j++) {
2004 	for (d = 0;  d <= j; d++) Jalpha[cm->M][j][d] = el_scA[d];
2005       }
2006     }
2007     if(fill_L && cp9b->Lvalid[cm->M]) {
2008       for (j = 0; j <= L; j++) {
2009 	for (d = 0;  d <= j; d++) Lalpha[cm->M][j][d] = el_scA[d];
2010       }
2011     }
2012     if(fill_R && cp9b->Rvalid[cm->M]) {
2013       for (j = 0; j <= L; j++) {
2014 	for (d = 0;  d <= j; d++) Ralpha[cm->M][j][d] = el_scA[d];
2015       }
2016     }
2017   }
2018 
2019   /* Main recursion */
2020   for (v = cm->M-1; v > 0; v--) { /* almost to ROOT_S, we handle that differently */
2021     float const *esc_v   = cm->oesc[v];  /* emission scores for state v */
2022     float const *tsc_v   = cm->tsc[v];   /* transition scores for state v */
2023     float const *lmesc_v = cm->lmesc[v]; /* marginal left  emission scores for state v */
2024     float const *rmesc_v = cm->rmesc[v]; /* marginal right emission scores for state v */
2025     sd   = StateDelta(cm->sttype[v]);
2026     sdl  = StateLeftDelta(cm->sttype[v]);
2027     sdr  = StateRightDelta(cm->sttype[v]);
2028     jn   = jmin[v];
2029     jx   = jmax[v];
2030     do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
2031     do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
2032     do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
2033     do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
2034     /* re-initialize the J, L and R decks if we can do a local end from v */
2035     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
2036       if(do_J_v && cp9b->Jvalid[cm->M]) {
2037 	for (j = jmin[v]; j <= jmax[v]; j++) {
2038 	  jp_v  = j - jmin[v];
2039 	  if(hdmin[v][jp_v] >= sd) {
2040 	    d    = hdmin[v][jp_v];
2041 	    dp_v = 0;
2042 	  }
2043 	  else {
2044 	    d    = sd;
2045 	    dp_v = sd - hdmin[v][jp_v];
2046 	  }
2047 	  for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
2048 	    Jalpha[v][jp_v][dp_v] = Jalpha[cm->M][j][d-sd] + cm->endsc[v];
2049 	    /* If we optimize by skipping the filling of the
2050 	     * EL deck the above line would become:
2051 	     * 'Jalpha[v][jp_v][dp_v] = el_scA[d-sd] + cm->endsc[v];'
2052 	     */
2053 	  }
2054 	}
2055       }
2056       if(do_L_v && cp9b->Lvalid[cm->M]) {
2057 	for (j = jmin[v]; j <= jmax[v]; j++) {
2058 	  jp_v  = j - jmin[v];
2059 	  if(hdmin[v][jp_v] >= sdl) {
2060 	    d    = hdmin[v][jp_v];
2061 	    dp_v = 0;
2062 	  }
2063 	  else {
2064 	    d    = sdl;
2065 	    dp_v = sdl - hdmin[v][jp_v];
2066 	  }
2067 	  for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
2068 	    Lalpha[v][jp_v][dp_v] = Lalpha[cm->M][j][d-sdl] + cm->endsc[v];
2069 	  }
2070 	}
2071       }
2072       if(do_R_v && cp9b->Rvalid[cm->M]) {
2073 	for (j = jmin[v]; j <= jmax[v]; j++) {
2074 	  jp_v  = j - jmin[v];
2075 	  if(hdmin[v][jp_v] >= sdr) {
2076 	    d    = hdmin[v][jp_v];
2077 	    dp_v = 0;
2078 	  }
2079 	  else {
2080 	    d    = sdr;
2081 	    dp_v = sdr - hdmin[v][jp_v];
2082 	  }
2083 	  for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
2084 	    Ralpha[v][jp_v][dp_v] = Ralpha[cm->M][j][d-sdr] + cm->endsc[v];
2085 	  }
2086 	}
2087       }
2088     }
2089     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
2090 
2091     if(cm->sttype[v] == E_st) {
2092       for (j = jmin[v]; j <= jmax[v]; j++) {
2093 	jp_v = j-jmin[v];
2094 	ESL_DASSERT1((hdmin[v][jp_v] == 0));
2095 	ESL_DASSERT1((hdmax[v][jp_v] == 0));
2096 	if(do_J_v) Jalpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
2097 	if(do_L_v) Lalpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
2098 	if(do_R_v) Ralpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
2099       }
2100     }
2101     else if(cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) {
2102       /* update {J,L,R}alpha[v][jp_v][dp_v] cells, for IL states, loop
2103        * nesting order is: for j { for d { for y { } } } because they
2104        * can self transit, and a {J,L,R}alpha[v][j][d] cell must be
2105        * complete (that is we must have looked at all children y)
2106        * before can start calc'ing for {J,L,R}alpha[v][j][d+1]
2107        * We could be slightly more efficient if we separated out
2108        * MR from IR b/c self-transits in MRs are impossible, but
2109        * we don't do that here. */
2110       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
2111 	for (j = jmin[v]; j <= jmax[v]; j++) {
2112 	  jp_v = j - jmin[v];
2113 	  yvalid_ct = 0;
2114 	  j_sdr = j - sdr;
2115 
2116 	  /* determine which children y we can legally transit to for v, j */
2117 	  for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
2118 	    if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr valid for state y? */
2119 
2120 	  for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
2121 	    i    = j - d + 1;
2122 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
2123 
2124 	    /* We need to treat R differently from and J and L here, by
2125 	     * doing separate 'for (yoffset...' loops for J and R
2126 	     * because we have to fully calculate Jalpha[v][jp_v][dp_v])
2127 	     * before we can start to calculate Ralpha[v][jp_v][dp_v].
2128 	     */
2129 	    /* Handle J and L first */
2130 	    if(do_J_v || do_L_v) {
2131 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
2132 		yoffset = yvalidA[yvalid_idx];
2133 		y = cm->cfirst[v] + yoffset;
2134 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2135 		do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
2136 		if(do_J_y || do_L_y) {
2137 		  jp_y_sdr = j - jmin[y] - sdr;
2138 
2139 		  if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
2140 		    dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
2141 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2142 		    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2143 		    if((do_J_v && do_J_y) &&
2144 		       ((sc = Jalpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]) > Jalpha[v][jp_v][dp_v])) {
2145 		      Jalpha[v][jp_v][dp_v]   = sc;
2146 		      Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2147 		    }
2148 		    if((do_L_v && do_L_y) &&
2149 		       ((sc = Lalpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]) > Lalpha[v][jp_v][dp_v])) {
2150 		      Lalpha[v][jp_v][dp_v]   = sc;
2151 		      Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
2152 		    }
2153 		  }
2154 		}
2155 	      }
2156 	      if(do_J_v) {
2157 		Jalpha[v][jp_v][dp_v] += esc_v[dsq[i]];
2158 		Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
2159 	      }
2160 	      if(do_L_v) {
2161 		if(d >= 2) {
2162 		  Lalpha[v][jp_v][dp_v] += esc_v[dsq[i]];
2163 		}
2164 		else {
2165 		  Lalpha[v][jp_v][dp_v]   = esc_v[dsq[i]];
2166 		  Lyshadow[v][jp_v][dp_v] = USED_TRUNC_END;
2167 		}
2168 		Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
2169 	      }
2170 	      i--;
2171 	    }
2172 
2173 	    if(do_R_v) {
2174 	      /* Handle R separately */
2175 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
2176 		yoffset = yvalidA[yvalid_idx];
2177 		y = cm->cfirst[v] + yoffset;
2178 		do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
2179 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2180 		if((do_J_y || do_R_y) && (y != v)) { /* (y != v) part is to disallow IL self transits in R mode */
2181 		  jp_y_sdr = j - jmin[y] - sdr;
2182 
2183 		  /* we use 'd' and 'dp_y' here, not 'd-sd' and 'dp_y_sd' (which we used in the corresponding loop for J,L above) */
2184 		  if((d) >= hdmin[y][jp_y_sdr] && (d) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
2185 		    dp_y = d - hdmin[y][jp_y_sdr];
2186 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2187 		    ESL_DASSERT1((dp_y    >= 0 && dp_y     <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2188 
2189 		    if(do_J_y &&
2190 		       ((sc = Jalpha[y][jp_y_sdr][dp_y] + tsc_v[yoffset]) > Ralpha[v][jp_v][dp_v])) {
2191 		      Ralpha[v][jp_v][dp_v] = sc;
2192 		      Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2193 		    }
2194 		    if(do_R_y &&
2195 		       ((sc = Ralpha[y][jp_y_sdr][dp_y] + tsc_v[yoffset]) > Ralpha[v][jp_v][dp_v])) {
2196 		      Ralpha[v][jp_v][dp_v] = sc;
2197 		      Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
2198 		    }
2199 		  }
2200 		}
2201 	      } /* end of for (yvalid_idx = 0... loop */
2202 	    }
2203 	  }
2204 	}
2205       } /* end of if(! StateIsDetached(cm, v)) */
2206     }
2207     else if(cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) {
2208       /* update {J,L,R}alpha[v][jp_v][dp_v] cells, for IR states, loop
2209        * nesting order is: for j { for d { for y { } } } because they
2210        * can self transit, and a {J,L,R}alpha[v][j][d] cell must be
2211        * complete (that is we must have looked at all children y)
2212        * before can start calc'ing for {J,L,R}alpha[v][j][d+1].
2213        * We could be slightly more efficient if we separated out
2214        * MR from IR b/c self-transits in MRs are impossible, but
2215        * we don't do that here. */
2216 
2217       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
2218 	/* The first MR_st/IR_st 'for (j...' loop is for J and R matrices which use the same set of j values */
2219 	if(do_J_v || do_R_v) {
2220 	  for (j = jmin[v]; j <= jmax[v]; j++) {
2221 	    jp_v = j - jmin[v];
2222 	    yvalid_ct = 0;
2223 	    j_sdr = j - sdr;
2224 
2225 	    /* determine which children y we can legally transit to for v, j */
2226 	    for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
2227 	      if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr is valid for state y? */
2228 
2229 	    for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
2230 	      dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
2231 
2232 	      /* We need to treat L differently from and J and R here, by
2233 	       * doing separate 'for (yoffset...' loops for J because we
2234 	       * have to fully calculate Jalpha[v][jp_v][dp_v]) before we
2235 	       * can start to calculate Lalpha[v][jp_v][dp_v].
2236 	       */
2237 	      /* Handle J and R first */
2238 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
2239 		yoffset = yvalidA[yvalid_idx];
2240 		y = cm->cfirst[v] + yoffset;
2241 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2242 		do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
2243 		if(do_J_y || do_R_y) {
2244 		  jp_y_sdr = j - jmin[y] - sdr;
2245 
2246 		  if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
2247 		    dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
2248 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2249 		    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2250 
2251 		    if((do_J_v && do_J_y) &&
2252 		       ((sc = Jalpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]) > Jalpha[v][jp_v][dp_v])) {
2253 		      Jalpha[v][jp_v][dp_v]   = sc;
2254 		      Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2255 		    }
2256 		    if((do_R_v && do_R_y) &&
2257 		       ((sc = Ralpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]) > Ralpha[v][jp_v][dp_v])) {
2258 		      Ralpha[v][jp_v][dp_v]   = sc;
2259 		      Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
2260 		    }
2261 		  }
2262 		}
2263 	      }
2264 	      if(do_J_v) {
2265 		Jalpha[v][jp_v][dp_v] += esc_v[dsq[j]];
2266 		Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
2267 	      }
2268 	      if(do_R_v) {
2269 		if(d >= 2) {
2270 		  Ralpha[v][jp_v][dp_v] += esc_v[dsq[j]];
2271 		}
2272 		else {
2273 		  Ralpha[v][jp_v][dp_v]   = esc_v[dsq[j]];
2274 		  Ryshadow[v][jp_v][dp_v] = USED_TRUNC_END;
2275 		}
2276 		Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
2277 	      }
2278 	    }
2279 	  }
2280 	}
2281 	/* Handle L separately */
2282 	if(do_L_v) {
2283 	  /* The second MR_st/IR_st 'for (j...' loop is for the L matrix which use a different set of j values */
2284 	  for (j = jmin[v]; j <= jmax[v]; j++) {
2285 	    jp_v = j - jmin[v];
2286 	    yvalid_ct = 0;
2287 
2288 	    /* determine which children y we can legally transit to for v, j */
2289 	    /* we use 'j' and not 'j_sdr' here for the L matrix, differently from J and R matrices above */
2290 	    for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
2291 	      if(j >= jmin[y] && j <= jmax[y]) yvalidA[yvalid_ct++] = yoffset; /* is j is valid for state y? */
2292 
2293 	    for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
2294 	      dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
2295 
2296 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
2297 		/* Note if we're an IL state, we can't self transit in R mode, this was ensured above when we set up yvalidA[] (xref:ELN3,p5)*/
2298 		yoffset = yvalidA[yvalid_idx];
2299 		y = cm->cfirst[v] + yoffset;
2300 		do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
2301 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2302 		if((do_J_y || do_L_y) && (y != v)) { /* (y != v) part is to disallow IR self transits in L mode */
2303 
2304 		  /* we use 'jp_y=j-min[y]' here, not 'jp_y_sdr=j-jmin[y]-sdr' (which we used in the corresponding loop for J,R above) */
2305 		  jp_y = j - jmin[y];
2306 
2307 		  /* we use 'd' and 'dp_y' here, not 'd-sd' and 'dp_y_sd' (which we used in the corresponding loop for J,R above) */
2308 		  if((d) >= hdmin[y][jp_y] && (d) <= hdmax[y][jp_y]) { /* make sure d is valid for this v, j and y */
2309 		    dp_y = d - hdmin[y][jp_y];
2310 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
2311 		    ESL_DASSERT1((dp_y    >= 0 && dp_y     <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
2312 
2313 		    if(do_J_y &&
2314 		       (sc = Jalpha[y][jp_y][dp_y] + tsc_v[yoffset]) > Lalpha[v][jp_v][dp_v]) {
2315 		      Lalpha[v][jp_v][dp_v] = sc;
2316 		      Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2317 		    }
2318 		    if(do_L_y &&
2319 		       (sc = Lalpha[y][jp_y][dp_y] + tsc_v[yoffset]) > Lalpha[v][jp_v][dp_v]) {
2320 		      Lalpha[v][jp_v][dp_v] = sc;
2321 		      Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
2322 		    }
2323 		  }
2324 		}
2325 	      } /* end of for (yvalid_idx = 0... loop */
2326 	    }
2327 	  }
2328 	}
2329       } /* end of if(! StateIsDetached(cm, v) */
2330     }
2331     else if(cm->sttype[v] == MP_st) {
2332       /* MP states cannot self transit, this means that all cells in
2333        * alpha[v] are independent of each other, only depending on
2334        * alpha[y] for previously calc'ed y.  We can do the for loops
2335        * in any nesting order, this implementation does what I think
2336        * is most efficient: for y { for j { for d { } } }
2337        */
2338       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
2339 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2340 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
2341 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
2342 	yoffset = y - cm->cfirst[v];
2343 	tsc = tsc_v[yoffset];
2344 
2345 	/* The first MP_st 'for (jp_v...' loop is for J and R matrices which use the same set of j values */
2346 	/* j must satisfy:
2347 	 * j >= jmin[v]
2348 	 * j >= jmin[y]+sdr (follows from (j-sdr >= jmin[y]))
2349 	 * j <= jmax[v]
2350 	 * j <= jmax[y]+sdr (follows from (j-sdr <= jmax[y]))
2351 	 * this reduces to two ESL_MAX calls
2352 	 */
2353 	jn = ESL_MAX(jmin[v], jmin[y]+sdr);
2354 	jx = ESL_MIN(jmax[v], jmax[y]+sdr);
2355 	jpn = jn - jmin[v];
2356 	jpx = jx - jmin[v];
2357 	jp_y_sdr = jn - jmin[y] - sdr;
2358 	/* for Lalpha, we use 'jp_y=j-min[y]' instead of 'jp_y_sdr=j-jmin[y]-sdr' */
2359 
2360 	if((do_J_v && do_J_y) || (do_R_v && (do_J_y || do_R_y))) {
2361 	  for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++, jp_y++) {
2362 	    ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
2363 	    ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
2364 
2365 	    if(do_J_v && do_J_y) {
2366 	      /* J matrix: */
2367 	      /* d must satisfy:
2368 	       * d >= hdmin[v][jp_v]
2369 	       * d >= hdmin[y][jp_y_sdr]+sd (follows from (d-sd >= hdmin[y][jp_y_sdr]))
2370 	       * d <= hdmax[v][jp_v]
2371 	       * d <= hdmax[y][jp_y_sdr]+sd (follows from (d-sd <= hdmax[y][jp_y_sdr]))
2372 	       * this reduces to two ESL_MAX calls
2373 	       */
2374 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
2375 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
2376 	      dpn       = dn - hdmin[v][jp_v];
2377 	      dpx       = dx - hdmin[v][jp_v];
2378 	      dp_y_sd   = dn - hdmin[y][jp_y_sdr] - sd;
2379 
2380 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
2381 		ESL_DASSERT1((dp_v      >= 0 && dp_v       <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2382 		ESL_DASSERT1((dp_y_sd   >= 0 && dp_y_sd    <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2383 
2384 		if((sc = Jalpha[y][jp_y_sdr][dp_y_sd] + tsc) > Jalpha[v][jp_v][dp_v]) {
2385 		  Jalpha[v][jp_v][dp_v]   = sc;
2386 		  Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2387 		}
2388 	      }
2389 	    }
2390 
2391 	    if(do_R_v && (do_R_y || do_J_y)) {
2392 	      /* R matrix: */
2393 	      /* d must satisfy:
2394 	       * d >= hdmin[v][jp_v]
2395 	       * d >= hdmin[y][jp_y_sd]+sd (follows from (d-sd >= hdmin[y][jp_y_sd]))
2396 	       * d <= hdmax[v][jp_v]
2397 	       * d <= hdmax[y][jp_y_sd]+sd (follows from (d-sd <= hdmax[y][jp_y_sd]))
2398 	       * this reduces to two ESL_MAX calls
2399 	       */
2400 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sdr);
2401 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sdr);
2402 	      dpn       = dn - hdmin[v][jp_v];
2403 	      dpx       = dx - hdmin[v][jp_v];
2404 	      dp_y_sdr  = dn - hdmin[y][jp_y_sdr] - sdr;
2405 	      /* for {L,R}alpha, we use 'dp_y_sdr' instead of 'dy_y_sd' */
2406 
2407 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdr++) {
2408 		/* we use 'dp_y_sdr' here, not 'dp_y_sd' (which we used in the corresponding loop for J above) */
2409 		ESL_DASSERT1((dp_y_sdr  >= 0 && dp_y_sdr   <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2410 		if(do_J_y &&
2411 		   ((sc = Jalpha[y][jp_y_sdr][dp_y_sdr] + tsc) > Ralpha[v][jp_v][dp_v])) {
2412 		  Ralpha[v][jp_v][dp_v]   = sc;
2413 		  Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2414 		}
2415 		if(do_R_y &&
2416 		   ((sc = Ralpha[y][jp_y_sdr][dp_y_sdr] + tsc) > Ralpha[v][jp_v][dp_v])) {
2417 		  Ralpha[v][jp_v][dp_v]   = sc;
2418 		  Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
2419 		}
2420 	      }
2421 	    }
2422 	  }
2423 	}
2424 
2425 	if(do_L_v && (do_L_y || do_J_y)) {
2426 	  /* The second MP_st 'for (jp_v...' loop is for L matrix, which uses a different set of j values from J and R */
2427 	  /* j must satisfy:
2428 	   * j >= jmin[v]
2429 	   * j >= jmin[y] (follows from (j >= jmin[y]))
2430 	   * j <= jmax[v]
2431 	   * j <= jmax[y] (follows from (j <= jmax[y]))
2432 	   * this reduces to two ESL_MAX calls
2433 	   */
2434 	  jn = ESL_MAX(jmin[v], jmin[y]);
2435 	  jx = ESL_MIN(jmax[v], jmax[y]);
2436 	  jpn = jn - jmin[v];
2437 	  jpx = jx - jmin[v];
2438 	  jp_y = jn - jmin[y];
2439 	  /* for Lalpha, we use 'jp_y=j-min[y]' instead of 'jp_y_sdr=j-jmin[y]-sdr' */
2440 
2441 	  for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y++) {
2442 	    ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
2443 
2444 	    /* d must satisfy:
2445 	     * d >= hdmin[v][jp_v]
2446 	     * d >= hdmin[y][jp_y]+sdl (follows from (d-sdl >= hdmin[y][jp_y]))
2447 	     * d <= hdmax[v][jp_v]
2448 	     * d <= hdmax[y][jp_y]+sdl (follows from (d-sdl <= hdmax[y][jp_y]))
2449 	     * this reduces to two ESL_MAX calls
2450 	     */
2451 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y] + sdl);
2452 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y] + sdl);
2453 	    dpn       = dn - hdmin[v][jp_v];
2454 	    dpx       = dx - hdmin[v][jp_v];
2455 	    dp_y_sdl  = dn - hdmin[y][jp_y] - sdl;
2456 	    /* for Lalpha, we use 'dp_y_sdl' instead of 'dy_y_sd' */
2457 
2458 	    for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdl++) {
2459 	      /* we use 'dp_y_sdl' here, not 'dp_y_sd' (which we used in the corresponding loop for J above) */
2460 	      ESL_DASSERT1((dp_y_sdl >= 0 && dp_y_sdl <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
2461 	      if(do_J_y &&
2462 		 ((sc = Jalpha[y][jp_y][dp_y_sdl] + tsc) > Lalpha[v][jp_v][dp_v])) {
2463 		Lalpha[v][jp_v][dp_v]  = sc;
2464 		Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2465 	      }
2466 	      if(do_L_y &&
2467 		 ((sc = Lalpha[y][jp_y][dp_y_sdl] + tsc) > Lalpha[v][jp_v][dp_v])) {
2468 		Lalpha[v][jp_v][dp_v]  = sc;
2469 		Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
2470 	      }
2471 	    }
2472 	  }
2473 	}
2474       }
2475       /* add in emission score */
2476       for (j = jmin[v]; j <= jmax[v]; j++) {
2477 	jp_v  = j - jmin[v];
2478 	i     = j - hdmin[v][jp_v] + 1;
2479 	for (d = hdmin[v][jp_v], dp_v = 0; d <= hdmax[v][jp_v]; d++, dp_v++)
2480 	  {
2481 	    if(d >= 2) {
2482 	      if(do_J_v) Jalpha[v][jp_v][dp_v] += esc_v[dsq[i]*cm->abc->Kp+dsq[j]];
2483 	      if(do_L_v) Lalpha[v][jp_v][dp_v] += lmesc_v[dsq[i]];
2484 	      if(do_R_v) Ralpha[v][jp_v][dp_v] += rmesc_v[dsq[j]];
2485 	    }
2486 	    else {
2487 	      if(do_J_v) { Jalpha[v][jp_v][dp_v] = IMPOSSIBLE; }
2488 	      if(do_L_v) { Lalpha[v][jp_v][dp_v] = lmesc_v[dsq[i]]; Lyshadow[v][jp_v][dp_v] = USED_TRUNC_END; }
2489 	      if(do_R_v) { Ralpha[v][jp_v][dp_v] = rmesc_v[dsq[j]]; Ryshadow[v][jp_v][dp_v] = USED_TRUNC_END; }
2490 	    }
2491 	    i--;
2492 	  }
2493       }
2494       /* ensure all cells are >= IMPOSSIBLE */
2495       for (j = jmin[v]; j <= jmax[v]; j++) {
2496 	jp_v  = j - jmin[v];
2497 	for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
2498 	  if(do_J_v) Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
2499 	  if(do_L_v) Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
2500 	  if(do_R_v) Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
2501 	}
2502       }
2503     }
2504     else if(cm->sttype[v] != B_st) { /* entered if state v is D or S */
2505       /* D, S states cannot self transit, this means that all cells in
2506        * alpha[v] are independent of each other, only depending on
2507        * alpha[y] for previously calc'ed y.  We can do the for loops
2508        * in any nesting order, this implementation does what I think
2509        * is most efficient: for y { for j { for d { } } }
2510        */
2511       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
2512 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2513 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
2514 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
2515 	yoffset = y - cm->cfirst[v];
2516 	tsc = tsc_v[yoffset];
2517 
2518 	if((do_J_v && do_J_y) || (do_L_v && do_L_y) || (do_R_v && do_R_y)) {
2519 	  /* j must satisfy:
2520 	   * j >= jmin[v]
2521 	   * j >= jmin[y]+sdr (follows from (j-sdr >= jmin[y]))
2522 	   * j <= jmax[v]
2523 	   * j <= jmax[y]+sdr (follows from (j-sdr <= jmax[y]))
2524 	   * this reduces to two ESL_MAX calls
2525 	   */
2526 	  jn = ESL_MAX(jmin[v], jmin[y]+sdr);
2527 	  jx = ESL_MIN(jmax[v], jmax[y]+sdr);
2528 	  jpn = jn - jmin[v];
2529 	  jpx = jx - jmin[v];
2530 	  jp_y_sdr = jn - jmin[y] - sdr;
2531 
2532 	  for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
2533 	    ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
2534 	    ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
2535 
2536 	    /* d must satisfy:
2537 	     * d >= hdmin[v][jp_v]
2538 	     * d >= hdmin[y][jp_y_sdr]+sd (follows from (d-sd >= hdmin[y][jp_y_sdr]))
2539 	     * d <= hdmax[v][jp_v]
2540 	     * d <= hdmax[y][jp_y_sdr]+sd (follows from (d-sd <= hdmax[y][jp_y_sdr]))
2541 	     * this reduces to two ESL_MAX calls
2542 	     */
2543 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
2544 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
2545 	    dpn     = dn - hdmin[v][jp_v];
2546 	    dpx     = dx - hdmin[v][jp_v];
2547 	    dp_y_sd = dn - hdmin[y][jp_y_sdr] - sd;
2548 
2549 	    for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
2550 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
2551 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
2552 
2553 	      if((do_J_v && do_J_y) &&
2554 		 ((sc = Jalpha[y][jp_y_sdr][dp_y_sd] + tsc) > Jalpha[v][jp_v][dp_v])) {
2555 		Jalpha[v][jp_v][dp_v]  = sc;
2556 		Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
2557 	      }
2558 	      if((do_L_v && do_L_y) &&
2559 		 ((sc = Lalpha[y][jp_y_sdr][dp_y_sd] + tsc) > Lalpha[v][jp_v][dp_v])) {
2560 		Lalpha[v][jp_v][dp_v]  = sc;
2561 		Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
2562 	      }
2563 	      if((do_R_v && do_R_y) &&
2564 		 ((sc = Ralpha[y][jp_y_sdr][dp_y_sd] + tsc) > Ralpha[v][jp_v][dp_v])) {
2565 		Ralpha[v][jp_v][dp_v]  = sc;
2566 		Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
2567 	      }
2568 	      /* an easy to overlook case: if d == 0, ensure L and R values are IMPOSSIBLE */
2569 	      if(dp_v == dpn && dn == 0) { /* d is 0 */
2570 		if(do_L_v) Lalpha[v][jp_v][dp_v] = IMPOSSIBLE;
2571 		if(do_R_v) Ralpha[v][jp_v][dp_v] = IMPOSSIBLE;
2572 		/* And another special case for BEGL_S and BEGR_S states,
2573 		 * reset shadow matrix values for d == 0 (which were
2574 		 * initialized to USED_EL above), even though the score of
2575 		 * these cells is impossible we may use them as a
2576 		 * zero-length left or right half of a BIF_B subtree during
2577 		 * construction of the parsetree.
2578 		 */
2579 		if(cm->sttype[v] == S_st) {
2580 		  if(do_L_v) Lyshadow[v][jp_v][dp_v] = USED_TRUNC_END;
2581 		  if(do_R_v) Ryshadow[v][jp_v][dp_v] = USED_TRUNC_END;
2582 		}
2583 	      }
2584 	    }
2585 	  }
2586 	}
2587       }
2588       /* no emission score to add */
2589     }
2590     else { /* B_st */
2591       y = cm->cfirst[v]; /* left  subtree */
2592       z = cm->cnum[v];   /* right subtree */
2593 
2594       do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
2595       do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
2596       do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
2597 
2598       do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
2599       do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
2600       do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
2601 
2602       /* Any valid j must be within both state v and state z's j band
2603        * I think jmin[v] <= jmin[z] is guaranteed by the way bands are
2604        * constructed, but we'll check anyway.
2605        */
2606       jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
2607       jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
2608       /* the main j loop */
2609       for (j = jn; j <= jx; j++) {
2610 	jp_v = j - jmin[v];
2611 	jp_y = j - jmin[y];
2612 	jp_z = j - jmin[z];
2613 	kn = ((j-jmax[y]) > (hdmin[z][jp_z])) ? (j-jmax[y]) : hdmin[z][jp_z];
2614         kn = ESL_MAX(kn, 0); /* kn must be non-negative, added with fix to bug i36 */
2615 	/* kn satisfies inequalities (1) and (3) (listed below)*/
2616 	kx = ( jp_y       < (hdmax[z][jp_z])) ?  jp_y       : hdmax[z][jp_z];
2617 	/* kn satisfies inequalities (2) and (4) (listed below)*/
2618 	i = j - hdmin[v][jp_v] + 1;
2619 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++, i--) {
2620 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
2621 
2622 	  /* Find the first k value that implies a valid cell in the {J,L,R} matrix y and z decks.
2623 	   * This k must satisfy the following 6 inequalities (some may be redundant):
2624 	   * (1) k >= j-jmax[y];
2625 	   * (2) k <= j-jmin[y];
2626 	   *     1 and 2 guarantee (j-k) is within state y's j band
2627 	   *
2628 	   * (3) k >= hdmin[z][j-jmin[z]];
2629 	   * (4) k <= hdmax[z][j-jmin[z]];
2630 	   *     3 and 4 guarantee k is within z's j=(j), d band
2631 	   *
2632 	   * (5) k >= d-hdmax[y][j-jmin[y]-k];
2633 	   * (6) k <= d-hdmin[y][j-jmin[y]-k];
2634 	   *     5 and 6 guarantee (d-k) is within state y's j=(j-k) d band
2635 	   *
2636 	   * kn and kx were set above (outside (for (dp_v...) loop) that
2637 	   * satisfy 1-4 (b/c 1-4 are d-independent and k-independent)
2638 	   * RHS of inequalities 5 and 6 are dependent on k, so we check
2639 	   * for these within the next for loop.
2640 	   *
2641 	   * To update a cell in the T matrix with a sum of an R matrix value for y
2642 	   * and a L matrix value for z, there are 2 additional inequalities to satisfy:
2643 	   * (7) k != 0
2644 	   * (8) k != d
2645 	   * We ensure 7 and 8 in the loop below.
2646 	   */
2647 	  for(k = kn; k <= kx; k++) {
2648 	    if((k >= d - hdmax[y][jp_y-k]) && k <= d - hdmin[y][jp_y-k]) {
2649 	      /* for current k, all 6 inequalities have been satisified
2650 	       * so we know the cells corresponding to the platonic
2651 	       * matrix cells alpha[v][j][d], alpha[y][j-k][d-k], and
2652 	       * alpha[z][j][k] are all within the bands. These
2653 	       * cells correspond to alpha[v][jp_v][dp_v],
2654 	       * alpha[y][jp_y-k][d-hdmin[jp_y-k]-k],
2655 	       * and alpha[z][jp_z][k-hdmin[jp_z]];
2656 	       */
2657 	      kp_z = k-hdmin[z][jp_z];
2658 	      dp_y = d-hdmin[y][jp_y-k];
2659 	      if((do_J_v && do_J_y && do_J_z) &&
2660 		 ((sc = Jalpha[y][jp_y-k][dp_y - k] + Jalpha[z][jp_z][kp_z]) > Jalpha[v][jp_v][dp_v])) {
2661 		Jalpha[v][jp_v][dp_v]   = sc;
2662 		Jkshadow[v][jp_v][dp_v] = k;
2663 	      }
2664 	      if((do_L_v && do_J_y && do_L_z) &&
2665 		 ((sc = Jalpha[y][jp_y-k][dp_y - k] + Lalpha[z][jp_z][kp_z]) > Lalpha[v][jp_v][dp_v])) {
2666 		Lalpha[v][jp_v][dp_v]   = sc;
2667 		Lkshadow[v][jp_v][dp_v] = k;
2668 		Lkmode[v][jp_v][dp_v]   = TRMODE_J;
2669 	      }
2670 	      if((do_R_v && do_R_y && do_J_z) &&
2671 		 ((sc = Ralpha[y][jp_y-k][dp_y - k] + Jalpha[z][jp_z][kp_z]) > Ralpha[v][jp_v][dp_v])) {
2672 		Ralpha[v][jp_v][dp_v]   = sc;
2673 		Rkshadow[v][jp_v][dp_v] = k;
2674 		Rkmode[v][jp_v][dp_v]   = TRMODE_J;
2675 	      }
2676 	      if(k != 0 && k != d) {
2677 		if((do_T_v && do_R_y && do_L_z) &&
2678 		   ((sc = Ralpha[y][jp_y-k][dp_y - k] + Lalpha[z][jp_z][kp_z]) > Talpha[v][jp_v][dp_v])) {
2679 		  Talpha[v][jp_v][dp_v]   = sc;
2680 		  Tkshadow[v][jp_v][dp_v] = k;
2681 		}
2682 	      }
2683 	    }
2684 	  }
2685 	}
2686       }
2687 
2688       /* two additional special cases in trCYK (these are not in standard CYK).
2689        * we do these in their own for(j.. { for(d.. { } } loops b/c one
2690        * is independent of z, the other of y, unlike the above loop which is dependent
2691        * on both.
2692        */
2693       if(do_L_v && (do_J_y || do_L_y)) {
2694 	jn = (jmin[v] > jmin[y]) ? jmin[v] : jmin[y];
2695 	jx = (jmax[v] < jmax[y]) ? jmax[v] : jmax[y];
2696 	for (j = jn; j <= jx; j++) {
2697 	  jp_v = j - jmin[v];
2698 	  jp_y = j - jmin[y];
2699 	  ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
2700 	  ESL_DASSERT1((j >= jmin[y] && j <= jmax[y]));
2701 	  dn = (hdmin[v][jp_v] > hdmin[y][jp_y]) ? hdmin[v][jp_v] : hdmin[y][jp_y];
2702 	  dx = (hdmax[v][jp_v] < hdmax[y][jp_y]) ? hdmax[v][jp_v] : hdmax[y][jp_y];
2703 	  for(d = dn; d <= dx; d++) {
2704 	    dp_v = d - hdmin[v][jp_v];
2705 	    dp_y = d - hdmin[y][jp_y];
2706 	    ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
2707 	    ESL_DASSERT1((d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]));
2708 	    if(do_J_y &&
2709 	       ((sc = Jalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v])) {
2710 	      Lalpha[v][jp_v][dp_v]   = sc;
2711 	      Lkshadow[v][jp_v][dp_v] = 0; /* k == 0 for this case, full sequence is on left */
2712 	      Lkmode[v][jp_v][dp_v]   = TRMODE_J;
2713 	      /* consider making a different mode here, to let the traceback know that right child emits 0 residues,
2714 	       * this should then effect the alignment display, no? it is a different case from the
2715 	       * >0 residues from right child TRMODE_J case for Lalpha checked for in (for k) loop above.
2716 	       */
2717 	    }
2718 	    if(do_L_y &&
2719 	       ((sc = Lalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v])) {
2720 	      Lalpha[v][jp_v][dp_v]   = sc;
2721 	      Lkshadow[v][jp_v][dp_v] = 0; /* k == 0 for this case, full sequence is on left */
2722 	      Lkmode[v][jp_v][dp_v]   = TRMODE_L;
2723 	      /* consider making a different mode here, to let the traceback know that right child emits 0 residues,
2724 	       * this should then effect the alignment display, no? it is a different case from the
2725 	       * >0 residues from right child TRMODE_L case for Lalpha checked for in (for k) loop above.
2726 	       */
2727 	    }
2728 	  }
2729 	}
2730       }
2731       if(do_R_v && (do_J_z || do_R_z)) {
2732 	jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
2733 	jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
2734 	for (j = jn; j <= jx; j++) {
2735 	  jp_v = j - jmin[v];
2736 	  jp_z = j - jmin[z];
2737 	  ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
2738 	  ESL_DASSERT1((j >= jmin[z] && j <= jmax[z]));
2739 	  dn = (hdmin[v][jp_v] > hdmin[z][jp_z]) ? hdmin[v][jp_v] : hdmin[z][jp_z];
2740 	  dx = (hdmax[v][jp_v] < hdmax[z][jp_z]) ? hdmax[v][jp_v] : hdmax[z][jp_z];
2741 	  for(d = dn; d <= dx; d++) {
2742 	    dp_v = d - hdmin[v][jp_v];
2743 	    dp_z = d - hdmin[z][jp_z];
2744 	    ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
2745 	    ESL_DASSERT1((d >= hdmin[z][jp_z] && d <= hdmax[z][jp_z]));
2746 	    if(do_J_z &&
2747 	       ((sc = Jalpha[z][jp_z][dp_z]) > Ralpha[v][jp_v][dp_v])) {
2748 	      Ralpha[v][jp_v][dp_v]   = sc;
2749 	      Rkshadow[v][jp_v][dp_v] = d; /* k == d in this case, full sequence is on right */
2750 	      Rkmode[v][jp_v][dp_v]   = TRMODE_J;
2751 	      /* consider making a different mode here, to let the traceback know that left child emits 0 residues,
2752 	       * this should then effect the alignment display, no? it is a different case from the
2753 	       * >0 residues from left child TRMODE_J case for Ralpha checked for in (for k) loop above.
2754 	       */
2755 	    }
2756 	    if(do_R_z &&
2757 	       ((sc = Ralpha[z][jp_z][dp_z]) > Ralpha[v][jp_v][dp_v])) {
2758 	      Ralpha[v][jp_v][dp_v]   = sc;
2759 	      Rkshadow[v][jp_v][dp_v] = d; /* k == d in this case, full sequence is on right */
2760 	      Rkmode[v][jp_v][dp_v]   = TRMODE_R;
2761 	      /* consider making a different mode here, to let the traceback know that left child emits 0 residues,
2762 	       * this should then effect the alignment display, no? it is a different case from the
2763 	       * >0 residues from left child TRMODE_R case for Ralpha checked for in (for k) loop above.
2764 	       */
2765 	    }
2766 	  }
2767 	}
2768       }
2769     } /* end of B_st recursion */
2770 
2771     /* Now handle from ROOT_S, state 0. So far we haven't touched
2772      * the {J,L,R,T}alpha[0] decks at all since initialization and here
2773      * we'll only update at most 1 cell in each, the one pertaining
2774      * to a full alignment [0][L][L].
2775      *
2776      * In truncated alignment the only way out of ROOT_S in local or
2777      * global mode is via a 'truncated begin' with a score (penalty)
2778      * from cm->trp into any emitting state. The penalty was
2779      * calculated in cm_tr_penalties_Create() and differs depending on
2780      * whether we are in local or global mode and the value of
2781      * 'pty_idx' which was passed in.
2782      */
2783     if(L >= jmin[v] && L <= jmax[v]) {
2784       jp_v = L - jmin[v];
2785       Lp   = L - hdmin[v][jp_v];
2786       if(L >= hdmin[v][jp_v] && L <= hdmax[v][jp_v]) {
2787 
2788 	/* If we get here alpha[v][jp_v][Lp] and alpha[0][jp_0][Lp_0]
2789 	 * are valid cells in the banded alpha matrix, corresponding to
2790 	 * alpha[v][L][L] and alpha[0][L][L] in the platonic matrix.
2791 	 * (Le've already made sure alpha[0][jp_0][Lp_0] was valid
2792 	 * at the beginning of the function.)
2793 	 */
2794 	trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
2795 	if(NOT_IMPOSSIBLE(trpenalty)) {
2796 	  /* check if we have a new optimally scoring Joint alignment in J matrix */
2797 	  if(do_J_v && cp9b->Jvalid[0]) {
2798 	    sc = Jalpha[v][jp_v][Lp] + trpenalty;
2799 	    if (sc > Jalpha[0][jp_0][Lp_0]) {
2800 	      Jalpha[0][jp_0][Lp_0] = sc;
2801 	      Jb = v;
2802 	    }
2803 	  }
2804 	  /* check if we have a new optimally scoring Left alignment in L matrix */
2805 	  if(do_L_v && cp9b->Lvalid[0]) {
2806 	    sc = Lalpha[v][jp_v][Lp] + trpenalty;
2807 	    if (sc > Lalpha[0][jp_0][Lp_0]) {
2808 	      Lalpha[0][jp_0][Lp_0] = sc;
2809 	      Lb = v;
2810 	    }
2811 	  }
2812 	  /* check if we have a new optimally scoring Right alignment in R matrix */
2813 	  if(do_R_v && cp9b->Rvalid[0]) {
2814 	    sc = Ralpha[v][jp_v][Lp] + trpenalty;
2815 	    if (sc > Ralpha[0][jp_0][Lp_0]) {
2816 	      Ralpha[0][jp_0][Lp_0] = sc;
2817 	      Rb = v;
2818 	    }
2819 	  }
2820 	  /* check if we have a new optimally scoring Terminal alignment in T matrix */
2821 	  if(do_T_v && cp9b->Tvalid[0]) {
2822 	    sc = Talpha[v][jp_v][Lp] + trpenalty;
2823 	    if (sc > Talpha[0][jp_0][Lp_0]) {
2824 	      Talpha[0][jp_0][Lp_0] = sc;
2825 	      Tb = v;
2826 	    }
2827 	  }
2828 	}
2829       }
2830     }
2831   } /* end loop for (v = cm->M-1; v > 0; v--) */
2832 
2833   /* all valid alignments must use a truncated begin */
2834   if (          cp9b->Jvalid[0]) Jyshadow[0][jp_0][Lp_0] = USED_TRUNC_BEGIN;
2835   if (fill_L && cp9b->Lvalid[0]) Lyshadow[0][jp_0][Lp_0] = USED_TRUNC_BEGIN;
2836   if (fill_R && cp9b->Rvalid[0]) Ryshadow[0][jp_0][Lp_0] = USED_TRUNC_BEGIN;
2837   /* Tyshadow[0] doesn't exist, caller must know how to deal */
2838 
2839   /* determine mode of optimal alignment, if it was preset then use that */
2840   if(preset_mode == TRMODE_J) {
2841     sc   = Jalpha[0][jp_0][Lp_0];
2842     mode = TRMODE_J;
2843     b    = Jb;
2844   }
2845   else if(preset_mode == TRMODE_L) {
2846     sc   = Lalpha[0][jp_0][Lp_0];
2847     mode = TRMODE_L;
2848     b    = Lb;
2849   }
2850   else if(preset_mode == TRMODE_R) {
2851     sc   = Ralpha[0][jp_0][Lp_0];
2852     mode = TRMODE_R;
2853     b    = Rb;
2854   }
2855   else if(preset_mode == TRMODE_T) {
2856     sc   = Talpha[0][jp_0][Lp_0];
2857     mode = TRMODE_T;
2858     b    = Tb;
2859   }
2860   else { /* preset_mode was unknown, max score determines mode */
2861     sc   = IMPOSSIBLE;
2862     mode = TRMODE_UNKNOWN;
2863     if (cp9b->Jvalid[0] && Jalpha[0][jp_0][Lp_0] > sc) {
2864       sc   = Jalpha[0][jp_0][Lp_0];
2865       mode = TRMODE_J;
2866       b    = Jb;
2867     }
2868     if (fill_L && cp9b->Lvalid[0] && Lalpha[0][jp_0][Lp_0] > sc) {
2869       sc   = Lalpha[0][jp_0][Lp_0];
2870       mode = TRMODE_L;
2871       b    = Lb;
2872     }
2873     if (fill_R && cp9b->Rvalid[0] && Ralpha[0][jp_0][Lp_0] > sc) {
2874       sc   = Ralpha[0][jp_0][Lp_0];
2875       mode = TRMODE_R;
2876       b    = Rb;
2877     }
2878     if (fill_T && cp9b->Tvalid[0] && Talpha[0][jp_0][Lp_0] > sc) {
2879       sc   = Talpha[0][jp_0][Lp_0];
2880       mode = TRMODE_T;
2881       b    = Tb;
2882     }
2883   }
2884 
2885 #if eslDEBUGLEVEL >= 2
2886   /* Uncomment to dump matrix to file. Careful...this could be very large. */
2887   /*FILE *fp1; fp1 = fopen("tmp.tru_cykhbmx", "w");   cm_tr_hb_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1);*/
2888   /*FILE *fp2; fp2 = fopen("tmp.tru_cykhbshmx", "w"); cm_tr_hb_shadow_mx_Dump(fp2, cm, shmx, preset_mode, TRUE); fclose(fp2);*/
2889 #endif
2890 
2891   if(ret_b    != NULL) *ret_b    = b;
2892   if(ret_mode != NULL) *ret_mode = mode;
2893   if(ret_sc   != NULL) *ret_sc   = sc;
2894 
2895   free(el_scA);
2896   free(yvalidA);
2897 
2898   ESL_DPRINTF1(("#DEBUG: cm_TrCYKInsideAlignHB return sc: %f\n", sc));
2899 
2900   if(*ret_mode == TRMODE_UNKNOWN) ESL_FAIL(eslEAMBIGUOUS, errbuf, "cm_TrCYKInsideAlignHB() no valid parsetree found");
2901 
2902   return eslOK;
2903 
2904  ERROR:
2905   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
2906 }
2907 
2908 /* Function: cm_TrInsideAlign()
2909  * Date:     EPN, Mon Sep 12 04:31:43 2011
2910  *
2911  * Purpose: Run the truncated inside algorithm on a target sequence
2912  *          without using bands. The full target sequence 1..L is
2913  *          aligned (only full alignments will contribute to the
2914  *          Inside score).
2915  *
2916  *          Identical to cm_InsideAlign() but no bands are used.
2917  *
2918  *          Very similar to cm_TrCYKInsideAlign(), see 'Purpose'
2919  *          of that function for more details. Only differences with
2920  *          that function is:
2921  *           - we do TrInside, not TrCYK
2922  *           - can't return a shadow matrix (we're not aligning)
2923  *           - doesn't return bsc, b info about truncated begins
2924  *
2925  *          The caller may already know the mode of the alignment,
2926  *          passed in as <preset_mode>. This will happen if we're
2927  *          being called from within a search pipeline, for
2928  *          example. If the caller does not know the optimal mode yet
2929  *          (e.g. if we're being called for 'cmalign'), <preset_mode>
2930  *          will be TRMODE_UNKNOWN.
2931  *
2932  *          This function complements cm_TrOutsideAlign().
2933  *
2934  * Args:     cm          - the model
2935  *           errbuf      - char buffer for reporting errors
2936  *           dsq         - the digitized sequence
2937  *           L           - target sequence length
2938  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
2939  *           preset_mode - the pre-determined alignment mode, TRMODE_UNKNOWN to allow any mode
2940  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
2941  *           mx          - the dp matrix, grown and filled here
2942  *           ret_mode    - RETURN: mode of optimal truncation mode, TRMODE_{J,L,R,T} if {J,L,R,T}alpha[0][L][L] is max scoring.
2943  *           ret_sc      - RETURN: log P(S|M)/P(S|R), as a bit score
2944  *                         NOTE: we don't sum over different marginal modes, we pick the highest scoring
2945  *                         one (J,L,R or T) and return {J,L,R,T}alpha[0][L][L] the sum of all complete
2946  *                         J,L,R, or T alignments.
2947  *
2948  * Returns:  <eslOK> on success.
2949  *
2950  * Throws:   <eslERANGE> if required CM_TR_MX size exceeds <size_limit>
2951  *           In this case alignment has been aborted, ret_sc is not valid
2952  */
2953 int
cm_TrInsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,CM_TR_MX * mx,char * ret_mode,float * ret_sc)2954 cm_TrInsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
2955 		 CM_TR_MX *mx, char *ret_mode, float *ret_sc)
2956 {
2957   int      status;          /* easel status code */
2958   int      v,y,z;	    /* indices for states  */
2959   int      j,d,i,k;	    /* indices in sequence dimensions */
2960   float    sc;		    /* a temporary variable holding a score */
2961   int      yoffset;	    /* y=base+offset -- counter in child states that v can transit to */
2962   float   *el_scA;          /* [0..d..W-1] probability of local end emissions of length d */
2963   int      sd;              /* StateDelta(cm->sttype[v]) */
2964   int      sdl;             /* StateLeftDelta(cm->sttype[v] */
2965   int      sdr;             /* StateRightDelta(cm->sttype[v] */
2966   int      j_sdr;           /* j - sdr */
2967   int      d_sd;            /* d - sd */
2968   int      d_sdl;           /* d - sdl */
2969   int      d_sdr;           /* d - sdr */
2970   float    tsc;             /* a transition score */
2971 
2972   /* other variables used in truncated version, but not standard version (not in cm_CYKInsideAlign()) */
2973   char     mode = TRMODE_UNKNOWN;  /* truncation mode for obtaining optimal score <ret_sc> */
2974   int      Lyoffset0;              /* first yoffset to use for updating L matrix in IR/MR states, 1 if IR, 0 if MR */
2975   int      Ryoffset0;              /* first yoffset to use for updating R matrix in IL/ML states, 1 if IL, 0 if ML */
2976   int      fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
2977   int      pty_idx;                /* index for truncation penalty, determined by pass_idx */
2978   float    trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
2979 
2980   /* the DP matrix */
2981   float ***Jalpha  = mx->Jdp; /* pointer to the Jalpha DP matrix */
2982   float ***Lalpha  = mx->Ldp; /* pointer to the Lalpha DP matrix */
2983   float ***Ralpha  = mx->Rdp; /* pointer to the Ralpha DP matrix */
2984   float ***Talpha  = mx->Tdp; /* pointer to the Talpha DP matrix */
2985 
2986   /* Determine which matrices we need to fill in, based on <preset_mode>, if TRMODE_UNKNOWN, fill_L, fill_R, fill_T will all be set as TRUE */
2987   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrInsideAlign(), bogus mode: %d", preset_mode);
2988 
2989   /* Determine the truncation penalty index, from the pass_idx */
2990   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrInsideAlign(), unexpected pass idx: %d", pass_idx);
2991 
2992   /* Allocations and initializations  */
2993 
2994   /* grow the matrices for current sequence */
2995   if((status = cm_tr_mx_GrowTo(cm, mx,   errbuf, L, size_limit)) != eslOK) return status;
2996 
2997   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
2998   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
2999   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
3000 
3001   /* initialize all cells of the matrix to IMPOSSIBLE */
3002   if(mx->Jncells_valid > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
3003   if(mx->Lncells_valid > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
3004   if(mx->Rncells_valid > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
3005   if(mx->Tncells_valid > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
3006 
3007   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores */
3008   if(cm->flags & CMH_LOCAL_END) {
3009     for (j = 0; j <= L; j++) {
3010       for (d = 0;  d <= j; d++) {
3011 	Jalpha[cm->M][j][d] = el_scA[d];
3012       }
3013     }
3014     if(fill_L) {
3015       for (j = 0; j <= L; j++) {
3016 	for (d = 0;  d <= j; d++) {
3017 	  Lalpha[cm->M][j][d] = el_scA[d];
3018 	}
3019       }
3020     }
3021     if(fill_R) {
3022       for (j = 0; j <= L; j++) {
3023 	for (d = 0;  d <= j; d++) {
3024 	  Ralpha[cm->M][j][d] = el_scA[d];
3025 	}
3026       }
3027     }
3028   }
3029 
3030   /* Main recursion */
3031   for (v = cm->M-1; v > 0; v--) { /* almost to ROOT_S, we handle that differently */
3032     float const *esc_v = cm->oesc[v]; /* emission scores for state v */
3033     float const *tsc_v = cm->tsc[v];  /* transition scores for state v */
3034     float const *lmesc_v = cm->lmesc[v]; /* marginal left  emission scores for state v */
3035     float const *rmesc_v = cm->rmesc[v]; /* marginal right emission scores for state v */
3036     sd   = StateDelta(cm->sttype[v]);
3037     sdl  = StateLeftDelta(cm->sttype[v]);
3038     sdr  = StateRightDelta(cm->sttype[v]);
3039 
3040     /* re-initialize the J, L and R decks if we can do a local end from v */
3041     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
3042       for (j = 0; j <= L; j++) {
3043 	for (d = sd; d <= j; d++) {
3044 	  Jalpha[v][j][d] = Jalpha[cm->M][j][d-sd] + cm->endsc[v];
3045 	}
3046       }
3047       if(fill_L) {
3048 	for (j = 0; j <= L; j++) {
3049 	  for (d = sdl; d <= j; d++) {
3050 	    Lalpha[v][j][d] = Lalpha[cm->M][j][d-sdl] + cm->endsc[v];
3051 	  }
3052 	}
3053       }
3054       if(fill_R) {
3055 	for (j = 0; j <= L; j++) {
3056 	  for (d = sdr; d <= j; d++) {
3057 	    Ralpha[v][j][d] = Ralpha[cm->M][j][d-sdr] + cm->endsc[v];
3058 	  }
3059 	}
3060       }
3061     }
3062     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
3063 
3064     if(cm->sttype[v] == E_st) {
3065       for (j = 0; j <= L; j++) {
3066 	Jalpha[v][j][0] = 0.;
3067 	if(fill_L) Lalpha[v][j][0] = 0.;
3068 	if(fill_R) Ralpha[v][j][0] = 0.;
3069 	/* rest of deck remains IMPOSSIBLE */
3070       }
3071     }
3072     else if(cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) {
3073       /* update alpha[v][j][d] cells, for IL states, loop nesting order is:
3074        * for j { for d { for y { } } } because they can self transit, and a
3075        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
3076        * before can start calc'ing for alpha[v][j][d+1]
3077        * We do ML states as well as IL states b/c they follow the same rules,
3078        * and we're not worried about efficiency here.
3079        */
3080 
3081       /* In TrCYK: we need to treat R differently from and J and L
3082        * here, by doing separate 'for (yoffset...' loops for J and R
3083        * because we have to fully calculate Jalpha[v][j][d]) before we
3084        * can start to calculate Ralpha[v][j][d].
3085        */
3086       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
3087 	Ryoffset0 = cm->sttype[v] == IL_st ? 1 : 0; /* don't allow IL self transits in R mode */
3088 	for (j = sdr; j <= L; j++) {
3089 	  j_sdr = j - sdr;
3090 	  for (d = sd; d <= j; d++) {
3091 	    d_sd = d - sd;
3092 	    i    = j - d + 1;
3093 	    for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
3094 	      y = cm->cfirst[v] + yoffset;
3095 	      Jalpha[v][j][d] = FLogsum(Jalpha[v][j][d], Jalpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
3096 	      if(fill_L) Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Lalpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
3097 	    }
3098 	    Jalpha[v][j][d] += esc_v[dsq[i]];
3099 	    if(fill_L) Lalpha[v][j][d]  = (d >= 2) ? Lalpha[v][j][d] + esc_v[dsq[i]] : esc_v[dsq[i]];
3100 
3101 	    Jalpha[v][j][d]  = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
3102 	    if(fill_L) Lalpha[v][j][d]  = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
3103 	    i--;
3104 
3105 	    /* handle R separately */
3106 	    if(fill_R) {
3107 	      /* note we use 'd', not 'd_sd' (which we used in the corresponding loop for J,L above) */
3108 	      for (yoffset = Ryoffset0; yoffset < cm->cnum[v]; yoffset++) { /* using Ryoffset0 instead of 0 disallows IL self transits in R mode */
3109 		y = cm->cfirst[v] + yoffset;
3110 		Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Jalpha[y][j_sdr][d] + tsc_v[yoffset]);
3111 		Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Ralpha[y][j_sdr][d] + tsc_v[yoffset]);
3112 	      }
3113 	      Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
3114 	    }
3115 	  }
3116 	}
3117       } /* end of if(! StateIsDetached(cm, v)) */
3118     }
3119     else if(cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) {
3120       /* update alpha[v][j][d] cells, for IR states, loop nesting order is:
3121        * for j { for d { for y { } } } because they can self transit, and a
3122        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
3123        * before can start calc'ing for alpha[v][j][d+1].
3124        * We do MR states as well as IR states b/c they follow the same rules,
3125        * and we're not worried about efficiency here.
3126        */
3127 
3128       /* In TrCYK: we need to treat L differently from and J and R
3129        * here, by doing separate 'for (yoffset...' loops for J and R
3130        * because we have to fully calculate Jalpha[v][j][d]) before we
3131        * can start to calculate Lalpha[v][j][d].
3132        */
3133       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
3134 	Lyoffset0 = cm->sttype[v] == IR_st ? 1 : 0; /* don't allow IR self transits in L mode */
3135 	for (j = sdr; j <= L; j++) {
3136 	  j_sdr = j - sdr;
3137 	  for (d = sd; d <= j; d++) {
3138 	    d_sd = d - sd;
3139 	    i = j - d + 1;
3140 	    for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++) {
3141 	      y = cm->cfirst[v] + yoffset;
3142 	      Jalpha[v][j][d] = FLogsum(Jalpha[v][j][d], Jalpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
3143 	      if(fill_R) Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Ralpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
3144 	    }
3145 
3146 	    Jalpha[v][j][d] += esc_v[dsq[j]];
3147 	    if(fill_R) Ralpha[v][j][d]  = (d >= 2) ? Ralpha[v][j][d] + esc_v[dsq[j]] : esc_v[dsq[j]];
3148 
3149 	    Jalpha[v][j][d]  = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
3150 	    if(fill_R) Ralpha[v][j][d]  = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
3151 
3152 	    /* handle L separately */
3153 	    if(fill_L) {
3154 	      /* note we use 'j' and 'd', not 'j_sdr' and 'd_sd' (which we used in the corresponding loop for J,R above) */
3155 	      for (yoffset = Lyoffset0; yoffset < cm->cnum[v]; yoffset++) { /* using Lyoffset0, instead of 0 disallows IR self transits in L mode */
3156 		y = cm->cfirst[v] + yoffset;
3157 		Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Jalpha[y][j][d] + tsc_v[yoffset]);
3158 		Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Lalpha[y][j][d] + tsc_v[yoffset]);
3159 	      }
3160 	      Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
3161 	    }
3162 	  }
3163 	}
3164       } /* end of if(! StateIsDetached(cm, v)) */
3165     }
3166     else if(cm->sttype[v] == MP_st) {
3167       /* MP states cannot self transit, this means that all cells in
3168        * alpha[v] are independent of each other, only depending on
3169        * alpha[y] for previously calc'ed y.  We can do the for loops
3170        * in any nesting order, this implementation does what I think
3171        * is most efficient: for y { for j { for d { } } }
3172        */
3173       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
3174 	yoffset = y - cm->cfirst[v];
3175 	tsc = tsc_v[yoffset];
3176 
3177 	for (j = sdr; j <= L; j++) {
3178 	  j_sdr = j - sdr;
3179 
3180 	  for (d = sd; d <= j; d++) { /* sd == 2 for MP state */
3181 	    d_sd = d - sd;
3182 	    Jalpha[v][j][d] = FLogsum(Jalpha[v][j][d], Jalpha[y][j_sdr][d_sd] + tsc_v[yoffset]);
3183 	  }
3184 	  if(fill_L) {
3185 	    /* note we use 'j' and 'd_sdl' not 'j_sdr' for 'd_sd' for L, plus minimum d is sdl (1) */
3186 	    for (d = sdl; d <= j; d++) { /* sdl == 1 for MP state */
3187 	      d_sdl = d-sdl;
3188 	      Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Jalpha[y][j][d_sdl] + tsc_v[yoffset]);
3189 	      Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Lalpha[y][j][d_sdl] + tsc_v[yoffset]);
3190 	    }
3191 	  }
3192 	  if(fill_R) {
3193 	    /* note we use 'd_sdr' not 'd_sd' for R, plus minimum d is sdr (1) */
3194 	    for (d = sdr; d <= j; d++) { /* sdr == 1 for MP state */
3195 	      d_sdr = d - sdr;
3196 	      Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Jalpha[y][j_sdr][d_sdr] + tsc_v[yoffset]);
3197 	      Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Ralpha[y][j_sdr][d_sdr] + tsc_v[yoffset]);
3198 	    }
3199 	  }
3200 	}
3201       }
3202       /* add in emission score */
3203       for (j = 0; j <= L; j++) {
3204 	i = j;
3205 	Jalpha[v][j][1] = IMPOSSIBLE;
3206 	if(fill_L) Lalpha[v][j][1] = lmesc_v[dsq[i]];
3207 	if(fill_R) Ralpha[v][j][1] = rmesc_v[dsq[j]];
3208 	i--;
3209 	for (d = 2; d <= j; d++) {
3210 	  Jalpha[v][j][d] += esc_v[dsq[i]*cm->abc->Kp+dsq[j]];
3211 	  if(fill_L) Lalpha[v][j][d] += lmesc_v[dsq[i]];
3212 	  if(fill_R) Ralpha[v][j][d] += rmesc_v[dsq[j]];
3213 	  i--;
3214 	}
3215       }
3216       /* ensure all cells are >= IMPOSSIBLE */
3217       for (j = 0; j <= L; j++) {
3218 	for (d = 1; d <= j; d++) {
3219 	  Jalpha[v][j][d] = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
3220 	  if(fill_L) Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
3221 	  if(fill_R) Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
3222 	}
3223       }
3224     }
3225     else if(cm->sttype[v] != B_st) { /* entered if state v is D or S */
3226       /* D, S states cannot self transit, this means that all cells in
3227        * alpha[v] are independent of each other, only depending on
3228        * alpha[y] for previously calc'ed y.  We can do the for loops
3229        * in any nesting order, this implementation does what I think
3230        * is most efficient: for y { for j { for d { } } }
3231        */
3232       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
3233 	yoffset = y - cm->cfirst[v];
3234 	tsc = tsc_v[yoffset];
3235 
3236 	for (j = sdr; j <= L; j++) {
3237 	  j_sdr = j - sdr;
3238 
3239 	  for (d = sd; d <= j; d++) {
3240 	    d_sd = d-sd;
3241 	    Jalpha[v][j][d] = FLogsum(Jalpha[v][j][d], Jalpha[y][j_sdr][d_sd] + tsc);
3242 	    if(fill_L) Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Lalpha[y][j_sdr][d_sd] + tsc);
3243 	    if(fill_R) Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Ralpha[y][j_sdr][d_sd] + tsc);
3244 	  }
3245 	  /* an easy to overlook case: if d == 0, ensure L and R values are IMPOSSIBLE */
3246 	  if(fill_L) Lalpha[v][j][0] = IMPOSSIBLE;
3247 	  if(fill_R) Ralpha[v][j][0] = IMPOSSIBLE;
3248 	}
3249       }
3250       /* no emission score to add */
3251     }
3252     else { /* B_st */
3253       assert(cm->sttype[v] == B_st);
3254       y = cm->cfirst[v]; /* left  subtree */
3255       z = cm->cnum[v];   /* right subtree */
3256 
3257       for (j = 0; j <= L; j++) {
3258 	for (d = 0; d <= j; d++) {
3259 	  for (k = 0; k <= d; k++) {
3260 	    Jalpha[v][j][d] = FLogsum(Jalpha[v][j][d], Jalpha[y][j-k][d-k] + Jalpha[z][j][k]);
3261 	    if(fill_L) Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Jalpha[y][j-k][d-k] + Lalpha[z][j][k]);
3262 	    if(fill_R) Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Ralpha[y][j-k][d-k] + Jalpha[z][j][k]);
3263 	  }
3264 	  if(fill_T) {
3265 	    for(k = 1; k < d; k++) { /* special boundary case for T matrix */
3266 	      Talpha[v][j][d] = FLogsum(Talpha[v][j][d], Ralpha[y][j-k][d-k] + Lalpha[z][j][k]);
3267 	    }
3268 	  }
3269 	  /* two additional special cases in trCYK (these are not in standard CYK) */
3270 	  /* special case 1: k == 0 (full sequence aligns to BEGL_S left child */
3271 	  if(fill_L) {
3272 	    Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Jalpha[y][j][d]);
3273 	    Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Lalpha[y][j][d]);
3274 	  }
3275 	  /* special case 2: k == d (full sequence aligns to BEGR_S right child */
3276 	  if(fill_R) {
3277 	    Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Jalpha[z][j][d]);
3278 	    Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Ralpha[z][j][d]);
3279 	  }
3280 	}
3281       }
3282     } /* end of B_st recursion */
3283 
3284     /* Now handle from ROOT_S, state 0. So far we haven't touched
3285      * the {J,L,R,T}alpha[0] decks at all since initialization and here
3286      * we'll only update at most 1 cell in each, the one pertaining
3287      * to a full alignment [0][L][L].
3288      *
3289      * In truncated alignment the only way out of ROOT_S in local or
3290      * global mode is via a 'truncated begin' with a score (penalty)
3291      * from cm->trp into any emitting state. The penalty was
3292      * calculated in cm_tr_penalties_Create() and differs depending on
3293      * whether we are in local or global mode and the value of
3294      * 'pty_idx' which was passed in.
3295      */
3296     trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
3297     if(NOT_IMPOSSIBLE(trpenalty)) {
3298       /* include full length hits in J matrix */
3299       Jalpha[0][L][L] = FLogsum(Jalpha[0][L][L], Jalpha[v][L][L] + trpenalty);
3300       /* include full length hits in L matrix */
3301       if(fill_L) {
3302 	Lalpha[0][L][L] = FLogsum(Lalpha[0][L][L], Lalpha[v][L][L] + trpenalty);
3303       }
3304       /* include full length hits in R matrix */
3305       if(fill_R) {
3306 	Ralpha[0][L][L] = FLogsum(Ralpha[0][L][L], Ralpha[v][L][L] + trpenalty);
3307       }
3308       /* include full length hits in T matrix */
3309       if(fill_T && cm->sttype[v] == B_st) {
3310 	Talpha[0][L][L] = FLogsum(Talpha[0][L][L], Talpha[v][L][L] + trpenalty);
3311       }
3312     }
3313   } /* end of for (v = cm->M-1; v > 0; v--) */
3314 
3315   /* determine mode of optimal alignment, if it was preset then use that */
3316   if(preset_mode == TRMODE_J) {
3317     sc   = Jalpha[0][L][L];
3318     mode = TRMODE_J;
3319   }
3320   else if(preset_mode == TRMODE_L) {
3321     sc   = Lalpha[0][L][L];
3322     mode = TRMODE_L;
3323   }
3324   else if(preset_mode == TRMODE_R) {
3325     sc   = Ralpha[0][L][L];
3326     mode = TRMODE_R;
3327   }
3328   else if(preset_mode == TRMODE_T) {
3329     sc   = Talpha[0][L][L];
3330     mode = TRMODE_T;
3331   }
3332   else { /* preset_mode was unknown, max score determines mode */
3333     sc   = Jalpha[0][L][L];
3334     mode = TRMODE_J;
3335     if (fill_L && Lalpha[0][L][L] > sc) {
3336       sc   = Lalpha[0][L][L];
3337       mode = TRMODE_L;
3338     }
3339     if (fill_R && Ralpha[0][L][L] > sc) {
3340       sc   = Ralpha[0][L][L];
3341       mode = TRMODE_R;
3342     }
3343     if (fill_T && Talpha[0][L][L] > sc) {
3344       sc   = Talpha[0][L][L];
3345       mode = TRMODE_T;
3346     }
3347   }
3348 
3349 #if eslDEBUGLEVEL >= 2
3350   /* Uncomment to dump matrix to file. Careful...this could be very large. */
3351   /* FILE *fp1; fp1 = fopen("tmp.tru_imx", "w");   cm_tr_mx_Dump(fp1, mx, mode, TRUE); fclose(fp1); */
3352 #endif
3353 
3354   if(ret_mode != NULL) *ret_mode = mode;
3355   if(ret_sc   != NULL) *ret_sc   = sc;
3356 
3357   free(el_scA);
3358 
3359   ESL_DPRINTF1(("#DEBUG: cm_TrInsideAlign() return sc: %f\n", sc));
3360   return eslOK;
3361 
3362  ERROR:
3363   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
3364 }
3365 
3366 
3367 /* Function: cm_TrInsideAlignHB()
3368  * Date:     EPN, Mon Sep 12 04:32:00 2011
3369  *
3370  * Purpose: Run the truncated inside algorithm on a target sequence
3371  *           using bands in the j and d dimensions of the DP
3372  *           matrix. Bands were obtained from an HMM Forward-Backward
3373  *           parse of the target sequence. Uses float log odds scores.
3374  *           The full target sequence 1..L is aligned (only full
3375  *           alignments will contribute to the Inside score).
3376  *
3377  *           Very similar to cm_TrCYKInsideAlignHB(), see 'Purpose'
3378  *           of that function for more details. Only differences with
3379  *           that function is:
3380  *           - we do TrInside, not TrCYK
3381  *           - can't return a shadow matrix (we're not aligning)
3382  *           - doesn't return b, info about local begins
3383  *
3384  *           The caller may already know the mode of the alignment,
3385  *           passed in as <preset_mode>. This will happen if we're
3386  *           being called from within a search pipeline, for
3387  *           example. If the caller does not know the optimal mode yet
3388  *           (e.g. if we're being called for 'cmalign'), <preset_mode>
3389  *           will be TRMODE_UNKNOWN.
3390  *
3391  *           This function complements cm_TrOutsideAlignHB().
3392  *
3393  * Args:     cm          - the model    [0..M-1]
3394  *           errbuf      - char buffer for reporting errors
3395  *           dsq         - the digitized sequence
3396  *           L           - target sequence length
3397  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
3398  *           preset_mode - the pre-determined alignment mode, TRMODE_UNKNOWN to allow any mode
3399  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
3400  *           mx          - the dp matrix, only cells within bands in cp9b will be valid
3401  *           ret_mode    - RETURN: mode of optimal truncation mode, TRMODE_{J,L,R,T} if {J,L,R,T}alpha[0][L][L] is max scoring.
3402  *           ret_sc      - RETURN: log P(S|M)/P(S|R), as a bit score
3403  *                         NOTE: we don't sum over different marginal modes, we pick the highest scoring
3404  *                         one (J,L,R or T) and return {J,L,R,T}alpha[0][L][L] the sum of all complete
3405  *                         J,L,R, or T alignments.
3406  *
3407  * Returns:  <eslOK> on success.
3408  *
3409  * Throws:   <eslERANGE>     if required CM_TR_HB_MX size exceeds <size_limit>
3410  *           <eslEINVAL>     if the full sequence is not within the bands for state 0
3411  *           <eslEAMBIGUOUS> if no valid alignment is possible due to bands (score of sequence is IMPOSSIBLE)
3412  *           In any of these three cases, alignment has been aborted, ret variables are not valid.
3413  */
3414 int
cm_TrInsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,CM_TR_HB_MX * mx,char * ret_mode,float * ret_sc)3415 cm_TrInsideAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
3416 		   CM_TR_HB_MX *mx, char *ret_mode, float *ret_sc)
3417 {
3418   int      status;
3419   int      v,y,z;	/* indices for states  */
3420   int      j,d,i,k;	/* indices in sequence dimensions */
3421   float    sc;          /* temporary score */
3422   float    tsc;         /* a temporary variable holding a transition score */
3423   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
3424   int      sd;          /* StateDelta(cm->sttype[v]) */
3425   int      sdl;         /* StateLeftDelta(cm->sttype[v]) */
3426   int      sdr;         /* StateRightDelta(cm->sttype[v]) */
3427   int     *yvalidA;     /* [0..MAXCONNECT-1] TRUE if v->yoffset is legal transition (within bands) */
3428   float   *el_scA;      /* [0..d..W-1] probability of local end emissions of length d */
3429 
3430   /* indices used for handling band-offset issues, and in the depths of the DP recursion */
3431   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
3432   int      jp_y_sdr;           /* jp_y - sdr */
3433   int      j_sdr;              /* j - sdr */
3434   int      jn, jx;             /* current minimum/maximum j allowed */
3435   int      jpn, jpx;           /* minimum/maximum jp_v */
3436   int      dp_v, dp_y, dp_z;   /* d index for state v/y/z in alpha w/mem eff bands */
3437   int      dn, dx;             /* current minimum/maximum d allowed */
3438   int      dp_y_sd;            /* dp_y - sd */
3439   int      dp_y_sdl;           /* dp_y - sdl */
3440   int      dp_y_sdr;           /* dp_y - sdr */
3441   int      dpn, dpx;           /* minimum/maximum dp_v */
3442   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
3443   int      kn, kx;             /* current minimum/maximum k value */
3444   int      Lp;                 /* L also changes depending on state */
3445   int      yvalid_idx;         /* for keeping track of which children are valid */
3446   int      yvalid_ct;          /* for keeping track of which children are valid */
3447   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
3448   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
3449 
3450   /* variables related to truncated alignment (not in cm_InsideAlignHB()) */
3451   char     mode = TRMODE_UNKNOWN;  /* truncation mode for obtaining optimal score <ret_sc> */
3452   int      fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
3453   int      do_J_v, do_J_y, do_J_z; /* must we fill J matrix deck for state v, y, z? */
3454   int      do_L_v, do_L_y, do_L_z; /* must we fill L matrix deck for state v, y, z? */
3455   int      do_R_v, do_R_y, do_R_z; /* must we fill R matrix deck for state v, y, z? */
3456   int      do_T_v;                 /* must we fill T matrix deck for state v?       */
3457   int      pty_idx;                /* index for truncation penalty, determined by pass_idx */
3458   float    trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
3459 
3460   /* ptrs to cp9b info, for convenience */
3461   CP9Bands_t *cp9b = cm->cp9b;
3462   int     *jmin  = cp9b->jmin;
3463   int     *jmax  = cp9b->jmax;
3464   int    **hdmin = cp9b->hdmin;
3465   int    **hdmax = cp9b->hdmax;
3466 
3467   /* the DP matrix */
3468   float ***Jalpha  = mx->Jdp; /* pointer to the Jalpha DP matrix */
3469   float ***Lalpha  = mx->Ldp; /* pointer to the Lalpha DP matrix */
3470   float ***Ralpha  = mx->Rdp; /* pointer to the Ralpha DP matrix */
3471   float ***Talpha  = mx->Tdp; /* pointer to the Talpha DP matrix */
3472 
3473   /* Determine which matrices we need to fill in, based on <preset_mode>, if TRMODE_UNKNOWN, fill_L, fill_R, fill_T will all be set as TRUE */
3474   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrInsideAlignHB(), bogus mode: %d", preset_mode);
3475 
3476   /* Determine the truncation penalty index, from the pass_idx */
3477   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrInsideAlignHB(), unexpected pass idx: %d", pass_idx);
3478 
3479   /* Allocations and initializations */
3480 
3481   /* ensure a full alignment to ROOT_S (v==0) is possible, remember In Inside <preset_mode> may be known or unknown */
3482   if (preset_mode == TRMODE_J && (! cp9b->Jvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): preset_mode is J mode, but cp9b->Jvalid[v] is FALSE");
3483   if (preset_mode == TRMODE_L && (! cp9b->Lvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): preset_mode is L mode, but cp9b->Lvalid[v] is FALSE");
3484   if (preset_mode == TRMODE_R && (! cp9b->Rvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): preset_mode is R mode, but cp9b->Rvalid[v] is FALSE");
3485   if (preset_mode == TRMODE_T && (! cp9b->Tvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): preset_mode is T mode, but cp9b->Tvalid[v] is FALSE");
3486   if (preset_mode == TRMODE_UNKNOWN && (! (cp9b->Jvalid[0] || cp9b->Lvalid[0] || cp9b->Rvalid[0] || cp9b->Tvalid[0]))) {
3487     ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): no marginal mode is allowed for state 0");
3488   }
3489   if (cp9b->jmin[0] > L || cp9b->jmax[0] < L)               ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
3490   jp_0 = L - jmin[0];
3491   if (cp9b->hdmin[0][jp_0] > L || cp9b->hdmax[0][jp_0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][jp_0], cp9b->hdmax[0][jp_0]);
3492   Lp_0 = L - hdmin[0][jp_0];
3493 
3494   /* grow the matrix based on the current sequence and bands */
3495   if((status = cm_tr_hb_mx_GrowTo(cm, mx, errbuf, cp9b, L, size_limit)) != eslOK) return status;
3496 
3497   /* precalcuate all possible local end scores, for local end emits of 1..L residues */
3498   ESL_ALLOC(el_scA, sizeof(float) * (L+1));
3499   for(d = 0; d <= L; d++) el_scA[d] = cm->el_selfsc * d;
3500 
3501   /* yvalidA[0..cnum[v]] will hold TRUE for states y for which a transition is legal
3502    * (some transitions are impossible due to the bands) */
3503   ESL_ALLOC(yvalidA, sizeof(int) * MAXCONNECT);
3504   esl_vec_ISet(yvalidA, MAXCONNECT, FALSE);
3505 
3506   /* initialize all cells of the matrix to IMPOSSIBLE */
3507   if(mx->Jncells_valid > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
3508   if(mx->Lncells_valid > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
3509   if(mx->Rncells_valid > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
3510   if(mx->Tncells_valid > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
3511 
3512   /* if local ends are on, replace the EL deck IMPOSSIBLEs with EL scores,
3513    * Note: we could optimize by skipping this step and using el_scA[d] to
3514    * initialize ELs for each state in the first step of the main recursion
3515    * below. We fill in the EL deck here for completeness and so that
3516    * a check of this alpha matrix with a Outside matrix will pass.
3517    */
3518   if(cm->flags & CMH_LOCAL_END) {
3519     if(cp9b->Jvalid[cm->M]) {
3520       for (j = 0; j <= L; j++) {
3521 	for (d = 0;  d <= j; d++) Jalpha[cm->M][j][d] = el_scA[d];
3522 	/* remember, the EL deck is non-banded */
3523       }
3524     }
3525     if(fill_L && cp9b->Lvalid[cm->M]) {
3526       for (j = 0; j <= L; j++) {
3527 	for (d = 0;  d <= j; d++) Lalpha[cm->M][j][d] = el_scA[d];
3528       }
3529     }
3530     if(fill_R && cp9b->Rvalid[cm->M]) {
3531       for (j = 0; j <= L; j++) {
3532 	for (d = 0;  d <= j; d++) Ralpha[cm->M][j][d] = el_scA[d];
3533       }
3534     }
3535   }
3536 
3537   /* Main recursion */
3538   for (v = cm->M-1; v > 0; v--) { /* almost to ROOT_S, we handle that differently */
3539     float const *esc_v   = cm->oesc[v]; /* emission scores for state v */
3540     float const *tsc_v   = cm->tsc[v];  /* transition scores for state v */
3541     float const *lmesc_v = cm->lmesc[v];
3542     float const *rmesc_v = cm->rmesc[v];
3543     sd     = StateDelta(cm->sttype[v]);
3544     sdl    = StateLeftDelta(cm->sttype[v]);
3545     sdr    = StateRightDelta(cm->sttype[v]);
3546     jn     = jmin[v];
3547     jx     = jmax[v];
3548     do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
3549     do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
3550     do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
3551     do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
3552 
3553     /* re-initialize the J, L or R decks if we can do a local end from v */
3554     if(NOT_IMPOSSIBLE(cm->endsc[v])) {
3555       for (j = jmin[v]; j <= jmax[v]; j++) {
3556 	jp_v  = j - jmin[v];
3557 	if(do_J_v && cp9b->Jvalid[cm->M]) {
3558 	  if(hdmin[v][jp_v] >= sd) {
3559 	    d    = hdmin[v][jp_v];
3560 	    dp_v = 0;
3561 	  }
3562 	  else {
3563 	    d    = sd;
3564 	    dp_v = sd - hdmin[v][jp_v];
3565 	  }
3566 	  for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
3567 	    Jalpha[v][jp_v][dp_v] = el_scA[d-sd] + cm->endsc[v];
3568 	  }
3569 	}
3570 
3571 	if(do_L_v && cp9b->Lvalid[cm->M]) {
3572 	  if(hdmin[v][jp_v] >= sdl) {
3573 	    d    = hdmin[v][jp_v];
3574 	    dp_v = 0;
3575 	  }
3576 	  else {
3577 	    d    = sdl;
3578 	    dp_v = sdl - hdmin[v][jp_v];
3579 	  }
3580 	  for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
3581 	    Lalpha[v][jp_v][dp_v] = el_scA[d-sdl] + cm->endsc[v];
3582 	  }
3583 	}
3584 
3585 	if(do_R_v && cp9b->Rvalid[cm->M]) {
3586 	  if(hdmin[v][jp_v] >= sdr) {
3587 	    d    = hdmin[v][jp_v];
3588 	    dp_v = 0;
3589 	  }
3590 	  else {
3591 	    d    = sdr;
3592 	    dp_v = sdr - hdmin[v][jp_v];
3593 	  }
3594 	  for (; d <= hdmax[v][jp_v]; dp_v++, d++) {
3595 	    Ralpha[v][jp_v][dp_v] = el_scA[d-sdr] + cm->endsc[v];
3596 	  }
3597 	}
3598       }
3599     }
3600     /* otherwise this state's deck has already been initialized to IMPOSSIBLE */
3601 
3602     if(cm->sttype[v] == E_st) {
3603       for (j = jmin[v]; j <= jmax[v]; j++) {
3604 	jp_v = j-jmin[v];
3605 	ESL_DASSERT1((hdmin[v][jp_v] == 0));
3606 	ESL_DASSERT1((hdmax[v][jp_v] == 0));
3607 	if(do_J_v) Jalpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
3608 	if(do_L_v) Lalpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
3609 	if(do_R_v) Ralpha[v][jp_v][0] = 0.; /* for End states, d must be 0 */
3610       }
3611     }
3612     else if(cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) {
3613       /* update {J,L,R}alpha[v][jp_v][dp_v] cells, for IL states, loop
3614        * nesting order is: for j { for d { for y { } } } because they
3615        * can self transit, and a {J,L,R}alpha[v][j][d] cell must be
3616        * complete (that is we must have looked at all children y)
3617        * before can start calc'ing for {J,L,R}alpha[v][j][d+1]
3618        * We could be slightly more efficient if we separated out
3619        * MR from IR b/c self-transits in MRs are impossible, but
3620        * we don't do that here. */
3621       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
3622 	for (j = jmin[v]; j <= jmax[v]; j++) {
3623 	  jp_v = j - jmin[v];
3624 	  yvalid_ct = 0;
3625 	  j_sdr = j - sdr;
3626 
3627 	  /* determine which children y we can legally transit to for v, j */
3628 	  for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
3629 	    if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr valid for state y? */
3630 
3631 	  for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
3632 	    i    = j - d + 1;
3633 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
3634 
3635 	    /* We need to treat R differently from and J and L here, by
3636 	     * doing separate 'for (yoffset...' loops for J and R
3637 	     * because we have to fully calculate Jalpha[v][jp_v][dp_v])
3638 	     * before we can start to calculate Ralpha[v][jp_v][dp_v].
3639 	     */
3640 	    /* Handle J and L first */
3641 	    if(do_J_v || do_L_v) {
3642 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
3643 		yoffset = yvalidA[yvalid_idx];
3644 		y = cm->cfirst[v] + yoffset;
3645 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
3646 		do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
3647 		if(do_J_y || do_L_y) {
3648 		  jp_y_sdr = j - jmin[y] - sdr;
3649 
3650 		  if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
3651 		    dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
3652 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
3653 		    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
3654 		    if(do_J_v && do_J_y) Jalpha[v][jp_v][dp_v] = FLogsum(Jalpha[v][jp_v][dp_v], Jalpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]);
3655 		    if(do_L_v && do_L_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Lalpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]);
3656 		  }
3657 		}
3658 	      }
3659 	      if(do_J_v) {
3660 		Jalpha[v][jp_v][dp_v] += esc_v[dsq[i]];
3661 		Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
3662 	      }
3663 	      if(do_L_v) {
3664 		Lalpha[v][jp_v][dp_v] = (d >= 2) ? Lalpha[v][jp_v][dp_v] + esc_v[dsq[i]] : esc_v[dsq[i]];
3665 		Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
3666 	      }
3667 	      i--;
3668 	    }
3669 
3670 	    if(do_R_v) {
3671 	      /* Handle R separately */
3672 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
3673 		yoffset = yvalidA[yvalid_idx];
3674 		y = cm->cfirst[v] + yoffset;
3675 		do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
3676 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
3677 		if((do_J_y || do_R_y) && (y != v)) { /* (y != v) part is to disallow IL self transits in R mode */
3678 		  jp_y_sdr = j - jmin[y] - sdr;
3679 
3680 		  /* we use 'd' and 'dp_y' here, not 'd-sd' and 'dp_y_sd' (which we used in the corresponding loop for J,L above) */
3681 		  if((d) >= hdmin[y][jp_y_sdr] && (d) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
3682 		    dp_y = d - hdmin[y][jp_y_sdr];
3683 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
3684 		    ESL_DASSERT1((dp_y    >= 0 && dp_y     <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
3685 		    if(do_J_y) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Jalpha[y][jp_y_sdr][dp_y] + tsc_v[yoffset]);
3686 		    if(do_R_y) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Ralpha[y][jp_y_sdr][dp_y] + tsc_v[yoffset]);
3687 		  }
3688 		}
3689 	      } /* end of for (yvalid_idx = 0... loop */
3690 	      Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
3691 	    }
3692 	  }
3693 	}
3694       } /* end of if(! StateIsDetached(cm, v) */
3695     }
3696     else if(cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) {
3697       /* update {J,L,R}alpha[v][jp_v][dp_v] cells, for IR states, loop
3698        * nesting order is: for j { for d { for y { } } } because they
3699        * can self transit, and a {J,L,R}alpha[v][j][d] cell must be
3700        * complete (that is we must have looked at all children y)
3701        * before can start calc'ing for {J,L,R}alpha[v][j][d+1].
3702        * We could be slightly more efficient if we separated out
3703        * MR from IR b/c self-transits in MRs are impossible, but
3704        * we don't do that here. */
3705 
3706       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
3707       /* The first MR_st/IR_st 'for (j...' loop is for J and R matrices which use the same set of j values */
3708 	if(do_J_v || do_R_v) {
3709 	  for (j = jmin[v]; j <= jmax[v]; j++) {
3710 	    jp_v = j - jmin[v];
3711 	    yvalid_ct = 0;
3712 	    j_sdr = j - sdr;
3713 
3714 	    /* determine which children y we can legally transit to for v, j */
3715 	    for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
3716 	      if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr is valid for state y? */
3717 
3718 	    for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
3719 	      dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
3720 
3721 	      /* We need to treat L differently from and J and R here, by
3722 	       * doing separate 'for (yoffset...' loops for J because we
3723 	       * have to fully calculate Jalpha[v][jp_v][dp_v]) before we
3724 	       * can start to calculate Lalpha[v][jp_v][dp_v].
3725 	       */
3726 	      /* Handle J and R first */
3727 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
3728 		yoffset = yvalidA[yvalid_idx];
3729 		y = cm->cfirst[v] + yoffset;
3730 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
3731 		do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
3732 		if(do_J_y || do_R_y) {
3733 		  jp_y_sdr = j - jmin[y] - sdr;
3734 
3735 		  if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
3736 		    dp_y_sd = d - sd - hdmin[y][jp_y_sdr];
3737 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
3738 		    ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
3739 		    if(do_J_v && do_J_y) Jalpha[v][jp_v][dp_v] = FLogsum(Jalpha[v][jp_v][dp_v], Jalpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]);
3740 		    if(do_R_v && do_R_y) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Ralpha[y][jp_y_sdr][dp_y_sd] + tsc_v[yoffset]);
3741 		  }
3742 		}
3743 	      }
3744 	      if(do_J_v) {
3745 		Jalpha[v][jp_v][dp_v] += esc_v[dsq[j]];
3746 		Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
3747 	      }
3748 	      if(do_R_v) {
3749 		Ralpha[v][jp_v][dp_v] = (d >= 2) ? Ralpha[v][jp_v][dp_v] + esc_v[dsq[j]] : esc_v[dsq[j]];
3750 		Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
3751 	      }
3752 	    }
3753 	  }
3754 	}
3755 	/* Handle L separately */
3756 	if(do_L_v) {
3757 	  /* The second MR_st/IR_st 'for (j...' loop is for the L matrix which use a different set of j values */
3758 	  for (j = jmin[v]; j <= jmax[v]; j++) {
3759 	    jp_v = j - jmin[v];
3760 	    yvalid_ct = 0;
3761 
3762 	    /* determine which children y we can legally transit to for v, j */
3763 	    /* we use 'j' and not 'j_sdr' here for the L matrix, differently from J and R matrices above */
3764 	    for (y = cm->cfirst[v], yoffset = 0; y < (cm->cfirst[v] + cm->cnum[v]); y++, yoffset++)
3765 	      if(j >= jmin[y] && j <= jmax[y]) yvalidA[yvalid_ct++] = yoffset; /* is j is valid for state y? */
3766 
3767 	    for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
3768 	      dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
3769 
3770 	      for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
3771 		/* Note if we're an IL state, we can't self transit in R mode, this was ensured above when we set up yvalidA[] (xref:ELN3,p5)*/
3772 		yoffset = yvalidA[yvalid_idx];
3773 		y = cm->cfirst[v] + yoffset;
3774 		do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
3775 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
3776 		if((do_J_y || do_L_y) && (y != v)) { /* (y != v) part is to disallow IR self transits in L mode */
3777 		  /* we use 'jp_y=j-min[y]' here, not 'jp_y_sdr=j-jmin[y]-sdr' (which we used in the corresponding loop for J,R above) */
3778 		  jp_y = j - jmin[y];
3779 
3780 		  /* we use 'd' and 'dp_y' here, not 'd-sd' and 'dp_y_sd' (which we used in the corresponding loop for J,R above) */
3781 		  if((d) >= hdmin[y][jp_y] && (d) <= hdmax[y][jp_y]) { /* make sure d is valid for this v, j and y */
3782 		    dp_y = d - hdmin[y][jp_y];
3783 		    ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
3784 		    ESL_DASSERT1((dp_y    >= 0 && dp_y     <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
3785 		    if(do_J_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Jalpha[y][jp_y][dp_y] + tsc_v[yoffset]);
3786 		    if(do_L_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Lalpha[y][jp_y][dp_y] + tsc_v[yoffset]);
3787 		  }
3788 		}
3789 	      } /* end of for (yvalid_idx = 0... loop */
3790 	      Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
3791 	    }
3792 	  }
3793 	}
3794       } /* end of if(! StateIsDetached(cm, v)) */
3795     }
3796     else if(cm->sttype[v] == MP_st) {
3797       /* MP states cannot self transit, this means that all cells in
3798        * alpha[v] are independent of each other, only depending on
3799        * alpha[y] for previously calc'ed y.  We can do the for loops
3800        * in any nesting order, this implementation does what I think
3801        * is most efficient: for y { for j { for d { } } }
3802        */
3803       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
3804 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
3805 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
3806 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
3807 	yoffset = y - cm->cfirst[v];
3808 	tsc = tsc_v[yoffset];
3809 
3810 	/* The first MP_st 'for (jp_v...' loop is for J and R matrices which use the same set of j values */
3811 	/* j must satisfy:
3812 	 * j >= jmin[v]
3813 	 * j >= jmin[y]+sdr (follows from (j-sdr >= jmin[y]))
3814 	 * j <= jmax[v]
3815 	 * j <= jmax[y]+sdr (follows from (j-sdr <= jmax[y]))
3816 	 * this reduces to two ESL_MAX calls
3817 	 */
3818 	jn = ESL_MAX(jmin[v], jmin[y]+sdr);
3819 	jx = ESL_MIN(jmax[v], jmax[y]+sdr);
3820 	jpn = jn - jmin[v];
3821 	jpx = jx - jmin[v];
3822 	jp_y_sdr = jn - jmin[y] - sdr;
3823 	/* for Lalpha, we use 'jp_y=j-min[y]' instead of 'jp_y_sdr=j-jmin[y]-sdr' */
3824 
3825 	if((do_J_v && do_J_y) || (do_R_v && (do_J_y || do_R_y))) {
3826 	  for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++, jp_y++) {
3827 	    ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
3828 	    ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
3829 
3830 	    if(do_J_v && do_J_y) {
3831 	      /* J matrix: */
3832 	      /* d must satisfy:
3833 	       * d >= hdmin[v][jp_v]
3834 	       * d >= hdmin[y][jp_y_sdr]+sd (follows from (d-sd >= hdmin[y][jp_y_sdr]))
3835 	       * d <= hdmax[v][jp_v]
3836 	       * d <= hdmax[y][jp_y_sdr]+sd (follows from (d-sd <= hdmax[y][jp_y_sdr]))
3837 	       * this reduces to two ESL_MAX calls
3838 	       */
3839 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
3840 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
3841 	      dpn       = dn - hdmin[v][jp_v];
3842 	      dpx       = dx - hdmin[v][jp_v];
3843 	      dp_y_sd   = dn - hdmin[y][jp_y_sdr] - sd;
3844 
3845 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
3846 		ESL_DASSERT1((dp_v      >= 0 && dp_v       <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
3847 		ESL_DASSERT1((dp_y_sd   >= 0 && dp_y_sd    <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
3848 		Jalpha[v][jp_v][dp_v] = FLogsum(Jalpha[v][jp_v][dp_v], Jalpha[y][jp_y_sdr][dp_y_sd] + tsc);
3849 	      }
3850 	    }
3851 
3852 	    if(do_R_v && (do_R_y || do_J_y)) {
3853 	      /* R matrix: */
3854 	      /* d must satisfy:
3855 	       * d >= hdmin[v][jp_v]
3856 	       * d >= hdmin[y][jp_y_sd]+sd (follows from (d-sd >= hdmin[y][jp_y_sd]))
3857 	       * d <= hdmax[v][jp_v]
3858 	       * d <= hdmax[y][jp_y_sd]+sd (follows from (d-sd <= hdmax[y][jp_y_sd]))
3859 	       * this reduces to two ESL_MAX calls
3860 	       */
3861 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sdr);
3862 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sdr);
3863 	      dpn       = dn - hdmin[v][jp_v];
3864 	      dpx       = dx - hdmin[v][jp_v];
3865 	      dp_y_sdr  = dn - hdmin[y][jp_y_sdr] - sdr;
3866 	      /* for {L,R}alpha, we use 'dp_y_sdr' instead of 'dy_y_sd' */
3867 
3868 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdr++) {
3869 		/* we use 'dp_y_sdr' here, not 'dp_y_sd' (which we used in the corresponding loop for J above) */
3870 		ESL_DASSERT1((dp_y_sdr  >= 0 && dp_y_sdr   <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
3871 		if(do_J_y) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Jalpha[y][jp_y_sdr][dp_y_sdr] + tsc);
3872 		if(do_R_y) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Ralpha[y][jp_y_sdr][dp_y_sdr] + tsc);
3873 	      }
3874 	    }
3875 	  }
3876 	}
3877 
3878 	if(do_L_v && (do_L_y || do_J_y)) {
3879 	  /* The second MP_st 'for (jp_v...' loop is for L matrix, which uses a different set of j values from J and R */
3880 	  /* j must satisfy:
3881 	   * j >= jmin[v]
3882 	   * j >= jmin[y] (follows from (j >= jmin[y]))
3883 	   * j <= jmax[v]
3884 	   * j <= jmax[y] (follows from (j <= jmax[y]))
3885 	   * this reduces to two ESL_MAX calls
3886 	   */
3887 	  jn = ESL_MAX(jmin[v], jmin[y]);
3888 	  jx = ESL_MIN(jmax[v], jmax[y]);
3889 	  jpn = jn - jmin[v];
3890 	  jpx = jx - jmin[v];
3891 	  jp_y = jn - jmin[y];
3892 	  /* for Lalpha, we use 'jp_y=j-min[y]' instead of 'jp_y_sdr=j-jmin[y]-sdr' */
3893 
3894 	  for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y++) {
3895 	    ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
3896 
3897 	    /* d must satisfy:
3898 	     * d >= hdmin[v][jp_v]
3899 	     * d >= hdmin[y][jp_y_sd]+sd (follows from (d-sd >= hdmin[y][jp_y_sd]))
3900 	     * d <= hdmax[v][jp_v]
3901 	     * d <= hdmax[y][jp_y_sd]+sd (follows from (d-sd <= hdmax[y][jp_y_sd]))
3902 	     * this reduces to two ESL_MAX calls
3903 	     */
3904 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y] + sdr);
3905 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y] + sdr);
3906 	    dpn       = dn - hdmin[v][jp_v];
3907 	    dpx       = dx - hdmin[v][jp_v];
3908 	    dp_y_sdl  = dn - hdmin[y][jp_y] - sdl;
3909 	    /* for Lalpha, we use 'dp_y_sdl' instead of 'dy_y_sd' */
3910 
3911 	    for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdl++) {
3912 	      /* we use 'dp_y_sdl' here, not 'dp_y_sd' (which we used in the corresponding loop for J above) */
3913 	      ESL_DASSERT1((dp_y_sdl >= 0 && dp_y_sdl  <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
3914 	      if(do_J_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Jalpha[y][jp_y][dp_y_sdl] + tsc);
3915 	      if(do_L_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Lalpha[y][jp_y][dp_y_sdl] + tsc);
3916 	    }
3917 	  }
3918 	}
3919       }
3920       /* add in emission score */
3921       for (j = jmin[v]; j <= jmax[v]; j++) {
3922 	jp_v  = j - jmin[v];
3923 	i     = j - hdmin[v][jp_v] + 1;
3924 	for (d = hdmin[v][jp_v], dp_v = 0; d <= hdmax[v][jp_v]; d++, dp_v++)
3925 	  {
3926 	    if(d >= 2) {
3927 	      if(do_J_v) Jalpha[v][jp_v][dp_v] += esc_v[dsq[i]*cm->abc->Kp+dsq[j]];
3928 	      if(do_L_v) Lalpha[v][jp_v][dp_v] += lmesc_v[dsq[i]];
3929 	      if(do_R_v) Ralpha[v][jp_v][dp_v] += rmesc_v[dsq[j]];
3930 	    }
3931 	    else {
3932 	      if(do_J_v) Jalpha[v][jp_v][dp_v] = IMPOSSIBLE;
3933 	      if(do_L_v) Lalpha[v][jp_v][dp_v] = lmesc_v[dsq[i]];
3934 	      if(do_R_v) Ralpha[v][jp_v][dp_v] = rmesc_v[dsq[j]];
3935 	    }
3936 	    i--;
3937 	  }
3938       }
3939       /* ensure all cells are >= IMPOSSIBLE */
3940       for (j = jmin[v]; j <= jmax[v]; j++) {
3941 	jp_v  = j - jmin[v];
3942 	for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
3943 	  if(do_J_v) Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
3944 	  if(do_L_v) Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
3945 	  if(do_R_v) Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
3946 	}
3947       }
3948     }
3949     else if(cm->sttype[v] != B_st) { /* entered if state v is D or S (! E && ! B && ! ML && ! IL && ! MR && ! IR) */
3950       /* D, S states cannot self transit, this means that all cells in
3951        * alpha[v] are independent of each other, only depending on
3952        * alpha[y] for previously calc'ed y.  We can do the for loops
3953        * in any nesting order, this implementation does what I think
3954        * is most efficient: for y { for j { for d { } } }
3955        */
3956       for (y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
3957 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
3958 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
3959 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
3960 	yoffset = y - cm->cfirst[v];
3961 	tsc = tsc_v[yoffset];
3962 
3963 	if((do_J_v && do_J_y) || (do_L_v && do_L_y) || (do_R_v && do_R_y)) {
3964 	  /* j must satisfy:
3965 	   * j >= jmin[v]
3966 	   * j >= jmin[y]+sdr (follows from (j-sdr >= jmin[y]))
3967 	   * j <= jmax[v]
3968 	   * j <= jmax[y]+sdr (follows from (j-sdr <= jmax[y]))
3969 	   * this reduces to two ESL_MAX calls
3970 	   */
3971 	  jn = ESL_MAX(jmin[v], jmin[y]+sdr);
3972 	  jx = ESL_MIN(jmax[v], jmax[y]+sdr);
3973 	  jpn = jn - jmin[v];
3974 	  jpx = jx - jmin[v];
3975 	  jp_y_sdr = jn - jmin[y] - sdr;
3976 
3977 	  for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
3978 	    ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
3979 	    ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
3980 
3981 	    /* d must satisfy:
3982 	     * d >= hdmin[v][jp_v]
3983 	     * d >= hdmin[y][jp_y_sdr]+sd (follows from (d-sd >= hdmin[y][jp_y_sdr]))
3984 	     * d <= hdmax[v][jp_v]
3985 	     * d <= hdmax[y][jp_y_sdr]+sd (follows from (d-sd <= hdmax[y][jp_y_sdr]))
3986 	     * this reduces to two ESL_MAX calls
3987 	     */
3988 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
3989 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
3990 	    dpn     = dn - hdmin[v][jp_v];
3991 	    dpx     = dx - hdmin[v][jp_v];
3992 	    dp_y_sd = dn - hdmin[y][jp_y_sdr] - sd;
3993 
3994 	    for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
3995 	      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
3996 	      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
3997 	      if(do_J_v && do_J_y) Jalpha[v][jp_v][dp_v] = FLogsum(Jalpha[v][jp_v][dp_v], Jalpha[y][jp_y_sdr][dp_y_sd] + tsc);
3998 	      if(do_L_v && do_L_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Lalpha[y][jp_y_sdr][dp_y_sd] + tsc);
3999 	      if(do_R_v && do_R_y) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Ralpha[y][jp_y_sdr][dp_y_sd] + tsc);
4000 
4001 	      /* an easy to overlook case: if d == 0, set L and R values to IMPOSSIBLE */
4002 	      if(dp_v == dpn && dn == 0) { /* d is 0 */
4003 		if(do_L_v) Lalpha[v][jp_v][dp_v] = IMPOSSIBLE;
4004 		if(do_R_v) Ralpha[v][jp_v][dp_v] = IMPOSSIBLE;
4005 	      }
4006 	    }
4007 	  }
4008 	}
4009       }
4010       /* no emission score to add */
4011     }
4012     else { /* B_st */
4013       y = cm->cfirst[v]; /* left  subtree */
4014       z = cm->cnum[v];   /* right subtree */
4015 
4016       do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
4017       do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
4018       do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
4019 
4020       do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
4021       do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
4022       do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
4023 
4024       /* Any valid j must be within both state v and state z's j band
4025        * I think jmin[v] <= jmin[z] is guaranteed by the way bands are
4026        * constructed, but we'll check anyway.
4027        */
4028       jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
4029       jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
4030       /* the main j loop */
4031       for (j = jn; j <= jx; j++) {
4032 	jp_v = j - jmin[v];
4033 	jp_y = j - jmin[y];
4034 	jp_z = j - jmin[z];
4035 	kn = ((j-jmax[y]) > (hdmin[z][jp_z])) ? (j-jmax[y]) : hdmin[z][jp_z];
4036         kn = ESL_MAX(kn, 0); /* kn must be non-negative, added with fix to bug i36 */
4037 	/* kn satisfies inequalities (1) and (3) (listed below)*/
4038 	kx = ( jp_y       < (hdmax[z][jp_z])) ?  jp_y       : hdmax[z][jp_z];
4039 	/* kn satisfies inequalities (2) and (4) (listed below)*/
4040 	i = j - hdmin[v][jp_v] + 1;
4041 	for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++, i--) {
4042 	  dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
4043 
4044 	  /* Find the first k value that implies a valid cell in the {J,L,R} matrix y and z decks.
4045 	   * This k must satisfy the following 6 inequalities (some may be redundant):
4046 	   * (1) k >= j-jmax[y];
4047 	   * (2) k <= j-jmin[y];
4048 	   *     1 and 2 guarantee (j-k) is within state y's j band
4049 	   *
4050 	   * (3) k >= hdmin[z][j-jmin[z]];
4051 	   * (4) k <= hdmax[z][j-jmin[z]];
4052 	   *     3 and 4 guarantee k is within z's j=(j), d band
4053 	   *
4054 	   * (5) k >= d-hdmax[y][j-jmin[y]-k];
4055 	   * (6) k <= d-hdmin[y][j-jmin[y]-k];
4056 	   *     5 and 6 guarantee (d-k) is within state y's j=(j-k) d band
4057 	   *
4058 	   * kn and kx were set above (outside (for (dp_v...) loop) that
4059 	   * satisfy 1-4 (b/c 1-4 are d-independent and k-independent)
4060 	   * RHS of inequalities 5 and 6 are dependent on k, so we check
4061 	   * for these within the next for loop.
4062 	   *
4063 	   * To update a cell in the T matrix with a sum of an R matrix value for y
4064 	   * and a L matrix value for z, there are 2 additional inequalities to satisfy:
4065 	   * (7) k != 0
4066 	   * (8) k != d
4067 	   * We ensure 7 and 8 in the loop below.
4068 	   */
4069 	  for(k = kn; k <= kx; k++) {
4070 	    if((k >= d - hdmax[y][jp_y-k]) && k <= d - hdmin[y][jp_y-k]) {
4071 	      /* for current k, all 6 inequalities have been satisified
4072 	       * so we know the cells corresponding to the platonic
4073 	       * matrix cells alpha[v][j][d], alpha[y][j-k][d-k], and
4074 	       * alpha[z][j][k] are all within the bands. These
4075 	       * cells correspond to alpha[v][jp_v][dp_v],
4076 	       * alpha[y][jp_y-k][d-hdmin[jp_y-k]-k],
4077 	       * and alpha[z][jp_z][k-hdmin[jp_z]];
4078 	       */
4079 	      kp_z = k-hdmin[z][jp_z];
4080 	      dp_y = d-hdmin[y][jp_y-k];
4081 	      if(do_J_v && do_J_y && do_J_z) Jalpha[v][jp_v][dp_v] = FLogsum(Jalpha[v][jp_v][dp_v], Jalpha[y][jp_y-k][dp_y - k] + Jalpha[z][jp_z][kp_z]);
4082 	      if(do_L_v && do_J_y && do_L_z) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Jalpha[y][jp_y-k][dp_y - k] + Lalpha[z][jp_z][kp_z]);
4083 	      if(do_R_v && do_R_y && do_J_z) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Ralpha[y][jp_y-k][dp_y - k] + Jalpha[z][jp_z][kp_z]);
4084 	      if((k != 0) && (k != d)) {
4085 		if(do_T_v && do_R_y && do_L_z) Talpha[v][jp_v][dp_v] = FLogsum(Talpha[v][jp_v][dp_v], Ralpha[y][jp_y-k][dp_y - k] + Lalpha[z][jp_z][kp_z]);
4086 	      }
4087 	    }
4088 	  }
4089 	}
4090       }
4091 
4092       /* two additional special cases in trCYK (these are not in standard CYK).
4093        * we do these in their own for(j.. { for(d.. { } } loops b/c one
4094        * is independent of z, the other of y, unlike the above loop which is dependent
4095        * on both.
4096        */
4097       if(do_L_v && (do_J_y || do_L_y)) {
4098 	jn = (jmin[v] > jmin[y]) ? jmin[v] : jmin[y];
4099 	jx = (jmax[v] < jmax[y]) ? jmax[v] : jmax[y];
4100 	for (j = jn; j <= jx; j++) {
4101 	  jp_v = j - jmin[v];
4102 	  jp_y = j - jmin[y];
4103 	  ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
4104 	  ESL_DASSERT1((j >= jmin[y] && j <= jmax[y]));
4105 	  dn = (hdmin[v][jp_v] > hdmin[y][jp_y]) ? hdmin[v][jp_v] : hdmin[y][jp_y];
4106 	  dx = (hdmax[v][jp_v] < hdmax[y][jp_y]) ? hdmax[v][jp_v] : hdmax[y][jp_y];
4107 	  for(d = dn; d <= dx; d++) {
4108 	    dp_v = d - hdmin[v][jp_v];
4109 	    dp_y = d - hdmin[y][jp_y];
4110 	    ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
4111 	    ESL_DASSERT1((d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]));
4112 	    if(do_J_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Jalpha[y][jp_y][dp_y]);
4113 	    if(do_L_y) Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Lalpha[y][jp_y][dp_y]);
4114 	  }
4115 	}
4116       }
4117       if(do_R_v && (do_J_z || do_R_z)) {
4118 	jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
4119 	jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
4120 	for (j = jn; j <= jx; j++) {
4121 	  jp_v = j - jmin[v];
4122 	  jp_z = j - jmin[z];
4123 	  ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
4124 	  ESL_DASSERT1((j >= jmin[z] && j <= jmax[z]));
4125 	  dn = (hdmin[v][jp_v] > hdmin[z][jp_z]) ? hdmin[v][jp_v] : hdmin[z][jp_z];
4126 	  dx = (hdmax[v][jp_v] < hdmax[z][jp_z]) ? hdmax[v][jp_v] : hdmax[z][jp_z];
4127 	  for(d = dn; d <= dx; d++) {
4128 	    dp_v = d - hdmin[v][jp_v];
4129 	    dp_z = d - hdmin[z][jp_z];
4130 	    ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
4131 	    ESL_DASSERT1((d >= hdmin[z][jp_z] && d <= hdmax[z][jp_z]));
4132 	    if(do_J_z) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Jalpha[z][jp_z][dp_z]);
4133 	    if(do_R_z) Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Ralpha[z][jp_z][dp_z]);
4134 	  }
4135 	}
4136       }
4137     } /* end of B_st recursion */
4138 
4139     /* Now handle from ROOT_S, state 0. So far we haven't touched
4140      * the {J,L,R,T}alpha[0] decks at all since initialization and here
4141      * we'll only update at most 1 cell in each, the one pertaining
4142      * to a full alignment [0][L][L].
4143      *
4144      * In truncated alignment the only way out of ROOT_S in local or
4145      * global mode is via a 'truncated begin' with a score (penalty)
4146      * from cm->trp into any emitting state. The penalty was
4147      * calculated in cm_tr_penalties_Create() and differs depending on
4148      * whether we are in local or global mode and the value of
4149      * 'pty_idx' which was passed in.
4150      */
4151     if(L >= jmin[v] && L <= jmax[v]) {
4152       jp_v = L - jmin[v];
4153       Lp   = L - hdmin[v][jp_v];
4154       if(L >= hdmin[v][jp_v] && L <= hdmax[v][jp_v]) {
4155 	/* If we get here alpha[v][jp_v][Lp] and alpha[0][jp_0][Lp0]
4156 	 * are valid cells in the banded alpha matrix, corresponding to
4157 	 * alpha[v][L][L] and alpha[0][L][L] in the platonic matrix.
4158 	 * (We've already made sure alpha[0][jp_0][Lp_0] was valid
4159 	 * at the beginning of the function.)
4160 	 */
4161 	trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
4162 	if(NOT_IMPOSSIBLE(trpenalty)) {
4163 	  /* include full length hits in J matrix */
4164 	  if(do_J_v && cp9b->Jvalid[0]) {
4165 	    Jalpha[0][jp_0][Lp_0] = FLogsum(Jalpha[0][jp_0][Lp_0], Jalpha[v][jp_v][Lp] + trpenalty);
4166 	  }
4167 	  /* include full length hits in L matrix */
4168 	  if(do_L_v && cp9b->Lvalid[0]) {
4169 	    Lalpha[0][jp_0][Lp_0] = FLogsum(Lalpha[0][jp_0][Lp_0], Lalpha[v][jp_v][Lp] + trpenalty);
4170 	  }
4171 	  /* include full length hits in R matrix */
4172 	  if(do_R_v && cp9b->Rvalid[0]) {
4173 	    Ralpha[0][jp_0][Lp_0] = FLogsum(Ralpha[0][jp_0][Lp_0], Ralpha[v][jp_v][Lp] + trpenalty);
4174 	  }
4175 	  /* include full length hits in T matrix */
4176 	  if(do_T_v && cp9b->Tvalid[0]) {
4177 	    Talpha[0][jp_0][Lp_0] = FLogsum(Talpha[0][jp_0][Lp_0], Talpha[v][jp_v][Lp] + trpenalty);
4178 	  }
4179 	}
4180       }
4181     }
4182   } /* end of for (v = cm->M-1; v > 0; v--) */
4183 
4184   /* determine mode of optimal alignment, if it was preset then use that */
4185   if(preset_mode == TRMODE_J) {
4186     sc   = Jalpha[0][jp_0][Lp_0];
4187     mode = TRMODE_J;
4188   }
4189   else if(preset_mode == TRMODE_L) {
4190     sc   = Lalpha[0][jp_0][Lp_0];
4191     mode = TRMODE_L;
4192   }
4193   else if(preset_mode == TRMODE_R) {
4194     sc   = Ralpha[0][jp_0][Lp_0];
4195     mode = TRMODE_R;
4196   }
4197   else if(preset_mode == TRMODE_T) {
4198     sc   = Talpha[0][jp_0][Lp_0];
4199     mode = TRMODE_T;
4200   }
4201   else { /* preset_mode was unknown, max score determines mode */
4202     sc = IMPOSSIBLE;
4203     mode = TRMODE_UNKNOWN;
4204 
4205     if (cp9b->Jvalid[0] && Jalpha[0][jp_0][Lp_0] > sc) {
4206       sc   = Jalpha[0][jp_0][Lp_0];
4207       mode = TRMODE_J;
4208     }
4209     if (fill_L && cp9b->Lvalid[0] && Lalpha[0][jp_0][Lp_0] > sc) {
4210       sc   = Lalpha[0][jp_0][Lp_0];
4211       mode = TRMODE_L;
4212     }
4213     if (fill_R && cp9b->Rvalid[0] && Ralpha[0][jp_0][Lp_0] > sc) {
4214       sc   = Ralpha[0][jp_0][Lp_0];
4215       mode = TRMODE_R;
4216     }
4217     if (fill_T && cp9b->Tvalid[0] && Talpha[0][jp_0][Lp_0] > sc) {
4218       sc   = Talpha[0][jp_0][Lp_0];
4219       mode = TRMODE_T;
4220     }
4221   }
4222 
4223 
4224 #if eslDEBUGLEVEL >= 2
4225   /* Uncomment to dump matrix to file. Careful...this could be very large. */
4226   /* FILE *fp1; fp1 = fopen("tmp.tru_ihbmx", "w");   cm_tr_hb_mx_Dump(fp1, mx, mode, TRUE); fclose(fp1); */
4227 #endif
4228 
4229   if(ret_mode != NULL) *ret_mode = mode;
4230   if(ret_sc   != NULL) *ret_sc   = sc;
4231 
4232   free(el_scA);
4233   free(yvalidA);
4234 
4235   if(ret_sc != NULL) *ret_sc = sc;
4236 
4237   ESL_DPRINTF1(("#DEBUG: cm_TrInsideAlignHB() return sc: %f\n", sc));
4238 
4239   if(*ret_mode == TRMODE_UNKNOWN) ESL_FAIL(eslEAMBIGUOUS, errbuf, "cm_TrInsideAlignHB() no valid parsetree found");
4240 
4241   return eslOK;
4242 
4243  ERROR:
4244   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
4245 }
4246 
4247 /* Function: cm_TrOptAccAlign()
4248  * based on cm_OptAccAlign()
4249  *
4250  * Date:     EPN, Wed Sep 28 13:16:12 2011
4251  *
4252  * Purpose: Run the truncated version of the Holmes/Durbin optimal
4253  *           accuracy algorithm on a full target sequence 1..L, given
4254  *           a pre-filled posterior matrix. Uses float log odds
4255  *           scores.  Non-banded version. See cm_OptAccAlignHB() for
4256  *           HMM banded version.
4257  *
4258  *           A CM_TR_EMIT_MX matrix <emit_mx> must be passed in,
4259  *           filled by cm_TrEmitterPosterior(), with values:
4260  *
4261  *           {J,L}l_pp[v][i]: log of the posterior probability that
4262  *           state v emitted residue i leftwise either at (if a match
4263  *           state) or *after* (if an insert state) the left consensus
4264  *           position modeled by state v's node, either in Joint
4265  *           marginal or Left marginal mode {J or L}.
4266  *
4267  *           {J,R}r_pp[v][i]: log of the posterior probability that
4268  *           state v emitted residue i rightwise either at (if a match
4269  *           state) or *before* (if an insert state) the right
4270  *           consensus position modeled by state v's node, either in
4271  *           Joint marginal or Right marginal mode {J or R}.
4272  *
4273  *           {J,L}l_pp[v] is NULL for states that do not emit leftwise
4274  *           {J,r}r_pp[v] is NULL for states that do not emit rightwise
4275  *
4276  *           Additionally, a CM_TR_MX DP matrix <mx> and
4277  *           CM_TR_SHADOW_MX <shmx> must be passed in. <shmx> will be
4278  *           expanded and filled here with traceback pointers to allow
4279  *           the optimally accurate parsetree to be recovered in
4280  *           cm_alignT() and <mx> will be expanded and filled with the
4281  *           optimal accuracy scores, where:
4282  *
4283  *           mx->{J,L,R,T}dp[v][j][d]: log of the sum of the posterior
4284  *           probabilities of emitting residues i..j in the subtree
4285  *           rooted at v given that v is in marginal mode J,L,R, or T.
4286  *
4287  *           The optimally accurate parsetree in marginal mode
4288  *           <preset_mode>, i.e. the parsetree that maximizes the sum
4289  *           of the posterior probabilities of all 1..L emitted
4290  *           residues, will be found. Its score is returned in
4291  *           <ret_sc>. The optimal truncated entry state is returned in
4292  *           <ret_b>, regardless of if we're in global or local mode
4293  *           because all truncated alignments must use a truncated
4294  *           begin.
4295  *
4296  * Args:     cm          - the model
4297  *           errbuf      - char buffer for reporting errors
4298  *           dsq         - the digitaized sequence [1..L]
4299  *           L           - length of the dsq
4300  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
4301  *           preset_mode - the pre-determined alignment mode, must not be TRMODE_UNKNOWN
4302  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
4303  *           mx          - the DP matrix to fill in
4304  *           shmx        - the shadow matrix to fill in
4305  *           emit_mx     - pre-filled emit matrix
4306  *           ret_b       - optimal entry point (state) for the alignment
4307  *           ret_pp      - RETURN: average posterior probability of aligned residues
4308  *                         in the optimally accurate parsetree
4309  *
4310  * Returns: <eslOK>     on success.
4311  * Throws:  <eslERANGE> if required CM_TR_HB_MX size exceeds <size_limit>
4312  *          If !eslOK: alignment has been aborted, ret_* variables are not valid
4313  */
4314 int
cm_TrOptAccAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,CM_TR_MX * mx,CM_TR_SHADOW_MX * shmx,CM_TR_EMIT_MX * emit_mx,int * ret_b,float * ret_pp)4315 cm_TrOptAccAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
4316 		 CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, CM_TR_EMIT_MX *emit_mx, int *ret_b, float *ret_pp)
4317 {
4318   int      status;          /* easel status code */
4319   int      v,y,z;	    /* indices for states  */
4320   int      j,d,i,k;	    /* indices in sequence dimensions */
4321   float    sc;		    /* temporary log odds score */
4322   float    pp;		    /* average posterior probability of all emitted residues */
4323   int      yoffset;	    /* y=base+offset -- counter in child states that v can transit to */
4324   int      sd;              /* StateDelta(cm->sttype[v]) */
4325   int      sdl;             /* StateLeftDelta(cm->sttype[v] */
4326   int      sdr;             /* StateRightDelta(cm->sttype[v] */
4327   int      j_sdr;           /* j - sdr */
4328   int      d_sd;            /* d - sd */
4329   int      d_sdl;           /* d - sdl */
4330   int      d_sdr;           /* d - sdr */
4331   int      have_el;         /* TRUE if local ends are on in the CM, otherwise FALSE */
4332 
4333   /* other variables used in truncated version, but not standard version (not in cm_OptAccAlign()) */
4334   int   b = 0;		    /* best truncated entry state */
4335   int   Lyoffset0;          /* first yoffset to use for updating L matrix in IR/MR states, 1 if IR, 0 if MR */
4336   int   Ryoffset0;          /* first yoffset to use for updating R matrix in IL/ML states, 1 if IL, 0 if ML */
4337   int   fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
4338   int   pty_idx;            /* index for truncation penalty, determined by pass_idx */
4339   float trpenalty;          /* truncation penalty, differs based on pty_idx and if we're local or global */
4340   int   nins_v;             /* number of insert states reachable from current state */
4341   int   yctr;               /* used for special for(y) loops in TrOptAcc (see code) */
4342 
4343   /* the DP matrices */
4344   float ***Jalpha  = mx->Jdp; /* pointer to the Jalpha DP matrix */
4345   float ***Lalpha  = mx->Ldp; /* pointer to the Lalpha DP matrix */
4346   float ***Ralpha  = mx->Rdp; /* pointer to the Ralpha DP matrix */
4347   float ***Talpha  = mx->Tdp; /* pointer to the Talpha DP matrix */
4348 
4349   char  ***Jyshadow = shmx->Jyshadow; /* pointer to the Jyshadow matrix */
4350   char  ***Lyshadow = shmx->Lyshadow; /* pointer to the Lyshadow matrix */
4351   char  ***Ryshadow = shmx->Ryshadow; /* pointer to the Ryshadow matrix */
4352   int   ***Jkshadow = shmx->Jkshadow; /* pointer to the Jkshadow matrix */
4353   int   ***Lkshadow = shmx->Lkshadow; /* pointer to the Lkshadow matrix */
4354   int   ***Rkshadow = shmx->Rkshadow; /* pointer to the Rkshadow matrix */
4355   int   ***Tkshadow = shmx->Tkshadow; /* pointer to the Tkshadow matrix */
4356   char  ***Lkmode   = shmx->Lkmode;   /* pointer to the Lkmode matrix */
4357   char  ***Rkmode   = shmx->Rkmode;   /* pointer to the Rkmode matrix */
4358 
4359   float  **Jl_pp    = emit_mx->Jl_pp; /* pointer to the prefilled posterior values for left  emitters in Joint mode */
4360   float  **Ll_pp    = emit_mx->Ll_pp; /* pointer to the prefilled posterior values for left  emitters in Left  mode */
4361   float  **Jr_pp    = emit_mx->Jr_pp; /* pointer to the prefilled posterior values for right emitters in Joint mode */
4362   float  **Rr_pp    = emit_mx->Rr_pp; /* pointer to the prefilled posterior values for right emitters in Right mode */
4363 
4364   /* Determine which matrices we need to fill in, based on <preset_mode> */
4365   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlign(): preset_mode is not J, L, R, or T");
4366   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrOptAccAlign(), bogus mode: %d", preset_mode);
4367 
4368   /* Determine the truncation penalty index, from the pass_idx */
4369   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrOptAccAlign(), unexpected pass idx: %d", pass_idx);
4370 
4371   /* we need an emitmap in this function */
4372   if(cm->emap == NULL) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlign(), emit map is NULL");
4373 
4374   /* Allocations and initializations  */
4375   /* grow the matrices based on the current sequence and bands */
4376   if((status = cm_tr_mx_GrowTo       (cm, mx,   errbuf, L, size_limit)) != eslOK) return status;
4377   if((status = cm_tr_shadow_mx_GrowTo(cm, shmx, errbuf, L, size_limit)) != eslOK) return status;
4378 
4379   /* initialize all cells of the matrix to IMPOSSIBLE */
4380   if(  mx->Jncells_valid   > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
4381   if(  mx->Lncells_valid   > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
4382   if(  mx->Rncells_valid   > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
4383   if(  mx->Tncells_valid   > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
4384   if(shmx->Jy_ncells_valid > 0)           for(i = 0; i < shmx->Jy_ncells_valid; i++) shmx->Jyshadow_mem[i] = USED_EL;
4385   if(shmx->Ly_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Ly_ncells_valid; i++) shmx->Lyshadow_mem[i] = USED_EL;
4386   if(shmx->Ry_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Ry_ncells_valid; i++) shmx->Ryshadow_mem[i] = USED_EL;
4387   /* for B states, shadow matrix holds k, length of right fragment, this will almost certainly be overwritten */
4388   if(shmx->Jk_ncells_valid > 0)           esl_vec_ISet(shmx->Jkshadow_mem, shmx->Jk_ncells_valid, 0);
4389   if(shmx->Lk_ncells_valid > 0 && fill_L) esl_vec_ISet(shmx->Lkshadow_mem, shmx->Lk_ncells_valid, 0);
4390   if(shmx->Rk_ncells_valid > 0 && fill_R) esl_vec_ISet(shmx->Rkshadow_mem, shmx->Rk_ncells_valid, 0);
4391   if(shmx->Tk_ncells_valid > 0 && fill_T) esl_vec_ISet(shmx->Tkshadow_mem, shmx->Tk_ncells_valid, 0);
4392   if(shmx->Lk_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Lk_ncells_valid; i++) shmx->Lkmode_mem[i] = TRMODE_J;
4393   if(shmx->Rk_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Rk_ncells_valid; i++) shmx->Rkmode_mem[i] = TRMODE_J;
4394 
4395   /* a special optimal accuracy specific step, initialize Jyshadow intelligently for d == 0
4396    * (necessary b/c zero length parsetees have 0 emits and so always score IMPOSSIBLE)
4397    */
4398   if((status = cm_InitializeOptAccShadowDZero(cm, errbuf, Jyshadow, L)) != eslOK) return status;
4399 
4400   /* start with the EL state */
4401   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
4402   if(have_el) {
4403     for (j = 0; j <= L; j++) {
4404       if(Jl_pp[cm->M] != NULL)           Jalpha[cm->M][j][0] = Jl_pp[cm->M][0];
4405       if(Ll_pp[cm->M] != NULL && fill_L) Lalpha[cm->M][j][0] = Ll_pp[cm->M][0];
4406       if(Rr_pp[cm->M] != NULL && fill_R) Ralpha[cm->M][j][0] = Rr_pp[cm->M][0];
4407       if(Jl_pp[cm->M] != NULL) {
4408 	i = j;
4409 	for (d = 1; d <= j; d++) Jalpha[cm->M][j][d] = FLogsum(Jalpha[cm->M][j][d-1], Jl_pp[cm->M][i--]);
4410       }
4411       if(Ll_pp[cm->M] != NULL && fill_L) {
4412 	i = j;
4413 	for (d = 1; d <= j; d++) Lalpha[cm->M][j][d] = FLogsum(Lalpha[cm->M][j][d-1], Ll_pp[cm->M][i--]);
4414       }
4415       if(Rr_pp[cm->M] != NULL && fill_R) {
4416 	i = j;
4417 	for (d = 1; d <= j; d++) Ralpha[cm->M][j][d] = FLogsum(Ralpha[cm->M][j][d-1], Rr_pp[cm->M][i--]);
4418       }
4419     }
4420   }
4421 
4422   /* Main recursion */
4423   for (v = cm->M-1; v > 0; v--) { /* almost done to ROOT_S, we handle that differently */
4424     sd     = StateDelta(cm->sttype[v]);
4425     sdl    = StateLeftDelta(cm->sttype[v]);
4426     sdr    = StateRightDelta(cm->sttype[v]);
4427     nins_v = NumReachableInserts(cm->stid[v]);
4428 
4429     /* re-initialize if we can do a local end from v */
4430     if(have_el && NOT_IMPOSSIBLE(cm->endsc[v])) {
4431       for (j = 0; j <= L; j++) {
4432 	/* copy values from saved EL deck */
4433 	for (d = sd;  d <= j; d++) Jalpha[v][j][d] = Jalpha[cm->M][j-sdr][d-sd];
4434 	for (d = sdl; d <= j; d++) Lalpha[v][j][d] = Lalpha[cm->M][j][d-sdl];
4435 	for (d = sdr; d <= j; d++) Ralpha[v][j][d] = Ralpha[cm->M][j-sdr][d-sdr];
4436       }
4437     }
4438     /* note there's no E state update here, those cells all remain IMPOSSIBLE */
4439 
4440     if(cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) {
4441       /* update alpha[v][j][d] cells, for IL states, loop nesting order is:
4442        * for j { for d { for y { } } } because they can self transit, and a
4443        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
4444        * before can start calc'ing for alpha[v][j][d+1]
4445        * We do ML states as well as IL states b/c they follow the same rules,
4446        * and we're not worried about efficiency here.
4447        */
4448 
4449       /* In TrCYK: we need to treat R differently from and J and L
4450        * here, by doing separate 'for (yoffset...' loops for J and R
4451        * because we have to fully calculate Jalpha[v][j][d]) before we
4452        * can start to calculate Ralpha[v][j][d].
4453        */
4454 
4455       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
4456 	Ryoffset0 = cm->sttype[v] == IL_st ? 1 : 0; /* don't allow IL self transits in R mode */
4457 	for (j = sdr; j <= L; j++) {
4458 	  i = j-sd+1;
4459 	  j_sdr = j - sdr;
4460 	  for (d = sd; d <= j; d++, i--) {
4461 	    d_sd = d - sd;
4462 	    for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
4463 	      yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
4464 	      y = cm->cfirst[v] + yoffset;
4465 	      if ((sc = Jalpha[y][j_sdr][d_sd]) > Jalpha[v][j][d]) {
4466 		Jalpha[v][j][d]   = sc;
4467 		Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4468 	      }
4469 	      if (fill_L && ((sc = Lalpha[y][j_sdr][d_sd]) > Lalpha[v][j][d])) {
4470 		Lalpha[v][j][d]   = sc;
4471 		Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
4472 	      }
4473 	    }
4474 	    Jalpha[v][j][d]  = FLogsum(Jalpha[v][j][d], Jl_pp[v][i]);
4475 	    Jalpha[v][j][d]  = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
4476 	    /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
4477 	     * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
4478 	     */
4479 	    if((! have_el) && Jyshadow[v][j][d] == USED_EL && d > sd) {
4480 	      Jalpha[v][j][d] = IMPOSSIBLE;
4481 	    }
4482 
4483 	    if(fill_L) {
4484 	      if(d >= 2) {
4485 		Lalpha[v][j][d]  = FLogsum(Lalpha[v][j][d], Ll_pp[v][i]);
4486 		/* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions *
4487 		 * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
4488 		 */
4489 		if((! have_el) && Lyshadow[v][j][d] == USED_EL) {
4490 		  Lalpha[v][j][d] = IMPOSSIBLE;
4491 		}
4492 	      }
4493 	      else {
4494 		Lalpha[v][j][d]   = Ll_pp[v][i]; /* actually I think this will give the same value as d >= 2 case above */
4495 		Lyshadow[v][j][d] = USED_TRUNC_END;
4496 	      }
4497 	      Lalpha[v][j][d]  = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
4498 	    }
4499 
4500 	    /* handle R separately */
4501 	    if(fill_R) {
4502 	      /* note we use 'd', not 'd_sd' (which we used in the corresponding loop for J,L above) */
4503 	      for (yctr = Ryoffset0; yctr < cm->cnum[v]; yctr++) { /* using Ryoffset0 instead of 0 disallows IL self transits in R mode */
4504 		yoffset = (yctr + nins_v - Ryoffset0) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
4505 		y = cm->cfirst[v] + yoffset;
4506 		if ((sc = Jalpha[y][j_sdr][d]) > Ralpha[v][j][d]) {
4507 		  Ralpha[v][j][d] = sc;
4508 		  Ryshadow[v][j][d]= yoffset + TRMODE_J_OFFSET;
4509 		}
4510 		if ((sc = Ralpha[y][j_sdr][d]) > Ralpha[v][j][d]) {
4511 		  Ralpha[v][j][d] = sc;
4512 		  Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
4513 		}
4514 	      }
4515 	      /* no residue was emitted if we're in R mode */
4516 	      Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
4517 	    }
4518 	  }
4519 	}
4520       } /* end of if(! StateIsDetached(cm,v )) */
4521     }
4522     else if(cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) {
4523       /* update alpha[v][j][d] cells, for IR states, loop nesting order is:
4524        * for j { for d { for y { } } } because they can self transit, and a
4525        * alpha[v][j][d] cell must be complete (that is we must have looked at all children y)
4526        * before can start calc'ing for alpha[v][j][d+1].
4527        * We do MR states as well as IR states b/c they follow the same rules,
4528        * and we're not worried about efficiency here.
4529        */
4530 
4531       /* In TrCYK: we need to treat L differently from and J and R
4532        * here, by doing separate 'for (yoffset...' loops for J and R
4533        * because we have to fully calculate Jalpha[v][j][d]) before we
4534        * can start to calculate Lalpha[v][j][d].
4535        */
4536 
4537       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
4538 	Lyoffset0 = cm->sttype[v] == IR_st ? 1 : 0; /* don't allow IR self transits in L mode */
4539 	for (j = sdr; j <= L; j++) {
4540 	  j_sdr = j - sdr;
4541 	  for (d = sd; d <= j; d++) {
4542 	    d_sd = d - sd;
4543 	    for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
4544 	      yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
4545 	      y = cm->cfirst[v] + yoffset;
4546 	      if ((sc = Jalpha[y][j_sdr][d_sd]) > Jalpha[v][j][d]) {
4547 		Jalpha[v][j][d]   = sc;
4548 		Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4549 	      }
4550 	      if (fill_R && ((sc = Ralpha[y][j_sdr][d_sd]) > Ralpha[v][j][d])) {
4551 		Ralpha[v][j][d]   = sc;
4552 		Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
4553 	      }
4554 	    }
4555 	    Jalpha[v][j][d]  = FLogsum(Jalpha[v][j][d], Jr_pp[v][j]);
4556 	    Jalpha[v][j][d]  = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
4557 	    /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
4558 	     * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
4559 	     */
4560 	    if((! have_el) && Jyshadow[v][j][d] == USED_EL && d > sd) {
4561 	      Jalpha[v][j][d] = IMPOSSIBLE;
4562 	    }
4563 
4564 	    if(fill_R) {
4565 	      if(d >= 2) {
4566 		Ralpha[v][j][d]  = FLogsum(Ralpha[v][j][d], Rr_pp[v][j]);
4567 		/* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
4568 		* (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
4569 		*/
4570 		if((! have_el) && Ryshadow[v][j][d] == USED_EL) {
4571 		  Ralpha[v][j][d] = IMPOSSIBLE;
4572 		}
4573 	      }
4574 	      else {
4575 		Ralpha[v][j][d]   = Rr_pp[v][j]; /* actually I think this will give the same value as d >= 2 case above */
4576 		Ryshadow[v][j][d] = USED_TRUNC_END;
4577 	      }
4578 	      Ralpha[v][j][d]  = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
4579 	    }
4580 
4581 	    /* handle L separately */
4582 	    if(fill_L) {
4583 	      /* note we use 'j' and 'd', not 'j_sdr' and 'd_sd' (which we used in the corresponding loop for J,R above) */
4584 	      for (yctr = Lyoffset0; yctr < cm->cnum[v]; yctr++) { /* using Lyoffset0, instead of 0 disallows IR self transits in L mode */
4585 		yoffset = (yctr + nins_v - Lyoffset0) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
4586 		y = cm->cfirst[v] + yoffset;
4587 		if ((sc = Jalpha[y][j][d]) > Lalpha[v][j][d]) {
4588 		  Lalpha[v][j][d] = sc;
4589 		  Lyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4590 		}
4591 		if ((sc = Lalpha[y][j][d]) > Lalpha[v][j][d]) {
4592 		  Lalpha[v][j][d] = sc;
4593 		  Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
4594 		}
4595 	      }
4596 	      /* no residue was emitted if we're in R mode */
4597 	      Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
4598 	    }
4599 	  }
4600 	}
4601       } /* end of if(! StateIsDetached(cm, v)) */
4602     }
4603     else if(cm->sttype[v] == MP_st) {
4604       /* MP states cannot self transit, this means that all cells in
4605        * alpha[v] are independent of each other, only depending on
4606        * alpha[y] for previously calc'ed y.  We can do the for loops
4607        * in any nesting order, this implementation does what I think
4608        * is most efficient: for y { for j { for d { } } }
4609        */
4610       for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
4611 	yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
4612 	y = cm->cfirst[v] + yoffset;
4613 
4614 	for (j = sdr; j <= L; j++) {
4615 	  j_sdr = j - sdr;
4616 	  for (d = sd; d <= j; d++) { /* sd == 2 for MP state */
4617 	    d_sd = d-sd;
4618 	    if((sc = Jalpha[y][j_sdr][d_sd]) > Jalpha[v][j][d]) {
4619 	      Jalpha[v][j][d]   = sc;
4620 	      Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4621 	    }
4622 	  }
4623 	  if(fill_L) {
4624 	    /* note we use 'j' and 'd_sdl' not 'j_sdr' for 'd_sd' for L, plus minimum d is sdl (1) */
4625 	    for (d = sdl; d <= j; d++) { /* sdl == 1 for MP state */
4626 	      d_sdl = d-sdl;
4627 	      if((sc = Jalpha[y][j][d_sdl]) > Lalpha[v][j][d]) {
4628 		Lalpha[v][j][d]   = sc;
4629 		Lyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4630 	      }
4631 	      if((sc = Lalpha[y][j][d_sdl]) > Lalpha[v][j][d]) {
4632 		Lalpha[v][j][d]   = sc;
4633 		Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
4634 	      }
4635 	    }
4636 	  }
4637 	  if(fill_R) {
4638 	    /* note we use 'd_sdr' not 'd_sd' for R, plus minimum d is sdr (1) */
4639 	    for (d = sdr; d <= j; d++) { /* sdr == 1 for MP state */
4640 	      d_sdr = d - sdr;
4641 	      if((sc = Jalpha[y][j_sdr][d_sdr]) > Ralpha[v][j][d]) {
4642 		Ralpha[v][j][d]   = sc;
4643 		Ryshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4644 	      }
4645 	      if((sc = Ralpha[y][j_sdr][d_sdr]) > Ralpha[v][j][d]) {
4646 		Ralpha[v][j][d]   = sc;
4647 		Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
4648 	      }
4649 	    }
4650 	  }
4651 	}
4652       }
4653       /* add in emission score */
4654       for (j = 0; j <= L; j++) {
4655 	Jalpha[v][j][1] = IMPOSSIBLE;
4656 	if(fill_L) {
4657 	  i = j-1+1;
4658 	  Lalpha[v][j][1]   = Ll_pp[v][i];
4659 	  Lyshadow[v][j][1] = USED_TRUNC_END;
4660 	}
4661 	if(fill_R) {
4662 	  Ralpha[v][j][1]   = Rr_pp[v][j];
4663 	  Ryshadow[v][j][1] = USED_TRUNC_END;
4664 	}
4665 	i = j-2+1;
4666 	for (d = 2; d <= j; d++, i--) {
4667 	  Jalpha[v][j][d] = FLogsum(Jalpha[v][j][d], FLogsum(Jl_pp[v][i], Jr_pp[v][j]));
4668 	}
4669 	if(fill_L) {
4670 	  i = j-2+1;
4671 	  for (d = 2; d <= j; d++, i--) {
4672 	    Lalpha[v][j][d] = FLogsum(Lalpha[v][j][d], Ll_pp[v][i]);
4673 	  }
4674 	}
4675 	if(fill_R) {
4676 	  for (d = 2; d <= j; d++) {
4677 	    Ralpha[v][j][d] = FLogsum(Ralpha[v][j][d], Rr_pp[v][j]);
4678 	  }
4679 	}
4680       }
4681       /* ensure all cells are >= IMPOSSIBLE */
4682       for (j = 0; j <= L; j++) {
4683 	for (d = 1; d <= j; d++) Jalpha[v][j][d] = ESL_MAX(Jalpha[v][j][d], IMPOSSIBLE);
4684 	if(fill_L) for (d = 1; d <= j; d++) Lalpha[v][j][d] = ESL_MAX(Lalpha[v][j][d], IMPOSSIBLE);
4685 	if(fill_R) for (d = 1; d <= j; d++) Ralpha[v][j][d] = ESL_MAX(Ralpha[v][j][d], IMPOSSIBLE);
4686       }
4687       /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
4688        * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
4689        */
4690       if(! have_el) {
4691 	for (j = 0; j <= L; j++) {
4692 	  for (d = sd+1; d <= j; d++) {
4693 	    if(Jyshadow[v][j][d] == USED_EL) Jalpha[v][j][d] = IMPOSSIBLE;
4694 	  }
4695 	  if(fill_L) {
4696 	    for (d = sdl+1; d <= j; d++) {
4697 	      if(Lyshadow[v][j][d] == USED_EL) Lalpha[v][j][d] = IMPOSSIBLE;
4698 	    }
4699 	  }
4700 	  if(fill_R) {
4701 	    for (d = sdr+1; d <= j; d++) {
4702 	      if(Ryshadow[v][j][d] == USED_EL) Ralpha[v][j][d] = IMPOSSIBLE;
4703 	    }
4704 	  }
4705 	}
4706       }
4707     }
4708     else if(cm->sttype[v] != B_st) { /* entered if state v is D or S */
4709       /* D, S states cannot self transit, this means that all cells in
4710        * alpha[v] are independent of each other, only depending on
4711        * alpha[y] for previously calc'ed y.  We can do the for loops
4712        * in any nesting order, this implementation does what I think
4713        * is most efficient: for y { for j { for d { } } }
4714        */
4715       for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
4716 	yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
4717 	y = cm->cfirst[v] + yoffset;
4718 
4719 	for (j = sdr; j <= L; j++) {
4720 	  j_sdr = j - sdr;
4721 
4722 	  for (d = sd; d <= j; d++) {
4723 	    d_sd = d-sd;
4724 	    if((sc = Jalpha[y][j_sdr][d_sd]) > Jalpha[v][j][d]) {
4725 	      Jalpha[v][j][d]   = sc;
4726 	      Jyshadow[v][j][d] = yoffset + TRMODE_J_OFFSET;
4727 	    }
4728 	  }
4729 	  if(fill_L) {
4730 	    for (d = sd; d <= j; d++) {
4731 	      d_sd = d-sd;
4732 	      if((sc = Lalpha[y][j_sdr][d_sd]) > Lalpha[v][j][d]) {
4733 		Lalpha[v][j][d]   = sc;
4734 		Lyshadow[v][j][d] = yoffset + TRMODE_L_OFFSET;
4735 	      }
4736 	    }
4737 	  }
4738 	  if(fill_R) {
4739 	    for (d = sd; d <= j; d++) {
4740 	      d_sd = d-sd;
4741 	      if((sc = Ralpha[y][j_sdr][d_sd]) > Ralpha[v][j][d]) {
4742 		Ralpha[v][j][d]   = sc;
4743 		Ryshadow[v][j][d] = yoffset + TRMODE_R_OFFSET;
4744 	      }
4745 	    }
4746 	  }
4747 	  /* an easy to overlook case: if d == 0, ensure L and R values are IMPOSSIBLE */
4748 	  if(fill_L) Lalpha[v][j][0] = IMPOSSIBLE;
4749 	  if(fill_R) Ralpha[v][j][0] = IMPOSSIBLE;
4750 	  /* And another special case for BEGL_S and BEGR_S states,
4751 	   * reset shadow matrix values for d == 0 (which were
4752 	   * initialized to USED_EL above), even though the score of
4753 	   * these cells is impossible we may use them as a
4754 	   * zero-length left or right half of a BIF_B subtree during
4755 	   * construction of the parsetree.
4756 	   */
4757 	  if(cm->sttype[v] == S_st) {
4758 	    if(fill_L) Lyshadow[v][j][0] = USED_TRUNC_END;
4759 	    if(fill_R) Ryshadow[v][j][0] = USED_TRUNC_END;
4760 	  }
4761 	}
4762       }
4763       /* no emission score to add */
4764     }
4765     else { /* B_st */
4766       assert(cm->sttype[v] == B_st);
4767       y = cm->cfirst[v]; /* left  subtree */
4768       z = cm->cnum[v];   /* right subtree */
4769 
4770       for (j = 0; j <= L; j++) {
4771 	for (d = 0; d <= j; d++) {
4772 	  for (k = 0; k <= d; k++) {
4773 	    if((NOT_IMPOSSIBLE(Jalpha[y][j-k][d-k]) || d == k) && /* left  subtree is not IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
4774 	       (NOT_IMPOSSIBLE(Jalpha[z][j][k])     || k == 0) && /* right subtree is not IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
4775 	       ((sc = FLogsum(Jalpha[y][j-k][d-k], Jalpha[z][j][k])) > Jalpha[v][j][d])) {
4776 	      Jalpha[v][j][d]   = sc;
4777 	      Jkshadow[v][j][d] = k;
4778 	    }
4779 	  }
4780 	  if(fill_L) {
4781 	    for (k = 0; k <= d; k++) {
4782 	      if((NOT_IMPOSSIBLE(Jalpha[y][j-k][d-k]) || d == k) && /* left  subtree is not IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
4783 		 (NOT_IMPOSSIBLE(Lalpha[z][j][k])     || k == 0) && /* right subtree is not IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
4784 		 ((sc = FLogsum(Jalpha[y][j-k][d-k], Jalpha[z][j][k])) > Jalpha[v][j][d])) {
4785 		Lalpha[v][j][d]   = sc;
4786 		Lkshadow[v][j][d] = k;
4787 		Lkmode[v][j][d]   = TRMODE_J;
4788 	      }
4789 	    }
4790 	  }
4791 	  if(fill_R) {
4792 	    for (k = 0; k <= d; k++) {
4793 	      if((NOT_IMPOSSIBLE(Ralpha[y][j-k][d-k]) || d == k) && /* left  subtree is not IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
4794 		 (NOT_IMPOSSIBLE(Jalpha[z][j][k])     || k == 0) && /* right subtree is not IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
4795 		 ((sc = FLogsum(Ralpha[y][j-k][d-k], Jalpha[z][j][k])) > Ralpha[v][j][d])) {
4796 		Ralpha[v][j][d]   = sc;
4797 		Rkshadow[v][j][d] = k;
4798 		Rkmode[v][j][d]   = TRMODE_J;
4799 	      }
4800 	    }
4801 	  }
4802 	  if(fill_T) {
4803 	    for (k = 1; k < d; k++) { /* special boundary case for T matrix */
4804 	      if((NOT_IMPOSSIBLE(Ralpha[y][j-k][d-k])) && /* left  subtree is not IMPOSSIBLE (no check for 'd==k' b/c of special T mx boundary case (see for loop above)) */
4805 		 (NOT_IMPOSSIBLE(Lalpha[z][j][k]))     && /* right subtree is not IMPOSSIBLE (no check for 'k==0' b/c of special T mx boundary case (see for loop above)) */
4806 		 ((sc = FLogsum(Ralpha[y][j-k][d-k], Lalpha[z][j][k])) > Talpha[v][j][d])) {
4807 		Talpha[v][j][d]   = sc;
4808 		Tkshadow[v][j][d] = k;
4809 	      }
4810 	    }
4811 	  }
4812 	  /* two additional special cases in trCYK (these are not in standard CYK) */
4813 	  /* special case 1: k == 0 (full sequence aligns to BEGL_S left child */
4814 	  if(fill_L) {
4815 	    if((sc = Jalpha[y][j][d]) > Lalpha[v][j][d]) {
4816 	      Lalpha[v][j][d]   = sc;
4817 	      Lkshadow[v][j][d] = 0; /* k == 0 for this case, full sequence is on left */
4818 	      Lkmode[v][j][d]   = TRMODE_J;
4819 	    }
4820 	    if((sc = Lalpha[y][j][d]) > Lalpha[v][j][d]) {
4821 	      Lalpha[v][j][d]   = sc;
4822 	      Lkshadow[v][j][d] = 0; /* k == 0 for this case, full sequence is on left */
4823 	      Lkmode[v][j][d]   = TRMODE_L;
4824 	    }
4825 	  }
4826 	  /* special case 2: k == d (full sequence aligns to BEGR_S right child */
4827 	  if(fill_R) {
4828 	    if((sc = Jalpha[z][j][d]) > Ralpha[v][j][d]) {
4829 	      Ralpha[v][j][d]   = sc;
4830 	      Rkshadow[v][j][d] = d; /* k == d in this case, full sequence is on right */
4831 	      Rkmode[v][j][d]   = TRMODE_J;
4832 	    }
4833 	    if((sc = Ralpha[z][j][d]) > Ralpha[v][j][d]) {
4834 	      Ralpha[v][j][d]   = sc;
4835 	      Rkshadow[v][j][d] = d; /* k == d in this case, full sequence is on right */
4836 	      Rkmode[v][j][d]   = TRMODE_R;
4837 	    }
4838 	  }
4839 	}
4840       }
4841     }/* end of B_st recursion */
4842 
4843     /* Now handle from ROOT_S, state 0. So far we haven't touched
4844      * the {J,L,R,T}alpha[0] decks at all since initialization and here
4845      * we'll only update at most 1 cell in each, the one pertaining
4846      * to a full alignment [0][L][L].
4847      *
4848      * In truncated alignment the only way out of ROOT_S in local or
4849      * global mode is via a 'truncated begin' with a score (penalty)
4850      * from cm->trp into any emitting state. The penalty was
4851      * calculated in cm_tr_penalties_Create() and differs depending on
4852      * whether we are in local or global mode and the value of
4853      * 'pty_idx' which was passed in.
4854      *
4855      * Since we're in OptAcc alignment we don't assess the
4856      * penalty but we still need to know if it's non-IMPOSSIBLE,
4857      * to know which states we're allowed to do a truncated
4858      * begin into.
4859      */
4860     trpenalty = (have_el) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
4861     if(NOT_IMPOSSIBLE(trpenalty)) {
4862       if(preset_mode == TRMODE_J) {
4863 	if(Jalpha[v][L][L] > Jalpha[0][L][L]) {
4864 	  Jalpha[0][L][L] = Jalpha[v][L][L];
4865 	  b = v;
4866 	}
4867       }
4868       else if(preset_mode == TRMODE_L) {
4869 	if(Lalpha[v][L][L] > Lalpha[0][L][L]) {
4870 	  Lalpha[0][L][L] = Lalpha[v][L][L];
4871 	  b = v;
4872 	}
4873       }
4874       else if(preset_mode == TRMODE_R) {
4875 	if(Ralpha[v][L][L] > Ralpha[0][L][L]) {
4876 	  Ralpha[0][L][L] = Ralpha[v][L][L];
4877 	  b = v;
4878 	}
4879       }
4880       else if(preset_mode == TRMODE_T) {
4881 	if(cm->sttype[v] == B_st) {
4882 	  if(Talpha[v][L][L] > Talpha[0][L][L]) {
4883 	    Talpha[0][L][L] = Talpha[v][L][L];
4884 	    b = v;
4885 	  }
4886 	}
4887       }
4888     }
4889   } /* end loop for (v = cm->M-1; v > 0; v--) */
4890 
4891   /* all valid alignments must use a truncated begin */
4892   Jyshadow[0][L][L] = USED_TRUNC_BEGIN;
4893   if(fill_L) Lyshadow[0][L][L] = USED_TRUNC_BEGIN;
4894   if(fill_R) Ryshadow[0][L][L] = USED_TRUNC_BEGIN;
4895   /* Tyshadow[0] doesn't exist, caller must know how to deal */
4896 
4897   if (preset_mode == TRMODE_J) sc = Jalpha[0][L][L];
4898   if (preset_mode == TRMODE_L) sc = Lalpha[0][L][L];
4899   if (preset_mode == TRMODE_R) sc = Ralpha[0][L][L];
4900   if (preset_mode == TRMODE_T) sc = Talpha[0][L][L];
4901 
4902   /* convert pp, a log probability, into the average posterior probability of all L aligned residues */
4903   pp = sreEXP2(sc) / (float) L;
4904 
4905 #if eslDEBUGLEVEL >= 2
4906   /* Uncomment to dump matrix to file. Careful...this could be very large. */
4907   /* FILE *fp1; fp1 = fopen("tmp.tru_oamx", "w");   cm_tr_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
4908   /* FILE *fp2; fp2 = fopen("tmp.tru_oashmx", "w"); cm_tr_shadow_mx_Dump(fp2, cm, shmx, preset_mode, TRUE); fclose(fp2); */
4909 #endif
4910 
4911   if(ret_b  != NULL) *ret_b  = b;
4912   if(ret_pp != NULL) *ret_pp = pp;
4913 
4914   ESL_DPRINTF1(("#DEBUG: cm_TrOptAccAlign() return pp: %f\n", pp));
4915   return eslOK;
4916 }
4917 
4918 
4919 /* Function: cm_TrOptAccAlignHB()
4920  * Date:     EPN, Tue Oct 11 10:05:24 2011
4921  *
4922  * Purpose: Run the truncated version of the Holmes/Durbin optimal
4923  *           accuracy algorithm on a full target sequence 1..L, given
4924  *           a pre-filled posterior matrix. Uses float log odds
4925  *           scores. HMM banded version. cm_OptAccAlign() is the
4926  *           non-banded version. See that function's 'Purpose' for
4927  *           more information. The only difference is that we use
4928  *           HMM bands from cm->cp9b here. All cells outside the
4929  *           bands don't exist in memory, so we have to be careful
4930  *           with offset issues.
4931  *
4932  * Args:     cm          - the model
4933  *           errbuf      - char buffer for reporting errors
4934  *           dsq         - the digitaized sequence [1..L]
4935  *           L           - length of the dsq
4936  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
4937  *           preset_mode - the pre-determined alignment mode, can't be TRMODE_UNKNOWN
4938  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
4939  *           mx          - the DP matrix to fill in
4940  *           shmx        - the shadow matrix to fill in
4941  *           emit_mx     - pre-filled emit matrix
4942  *           ret_b       - optimal entry point (state) for the alignment
4943  *           ret_pp      - RETURN: average posterior probability of aligned residues
4944  *                         in the optimally accurate parsetree
4945  *
4946  * Returns: <eslOK>     on success.
4947  * Throws:  <eslERANGE> if required CM_TR_HB_MX size exceeds <size_limit>
4948  *          If !eslOK: alignment has been aborted, ret_* variables are not valid
4949  */
4950 int
cm_TrOptAccAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,CM_TR_HB_MX * mx,CM_TR_HB_SHADOW_MX * shmx,CM_TR_HB_EMIT_MX * emit_mx,int * ret_b,float * ret_pp)4951 cm_TrOptAccAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
4952 		   CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_EMIT_MX *emit_mx, int *ret_b, float *ret_pp)
4953 {
4954   int      status;          /* easel status code */
4955   int      v,y,z;	    /* indices for states  */
4956   int      j,d,i,k;	    /* indices in sequence dimensions */
4957   float    sc;		    /* temporary log odds score */
4958   float    pp;		    /* average posterior probability of all emitted residues */
4959   int      yoffset;	    /* y=base+offset -- counter in child states that v can transit to */
4960   int      sd;              /* StateDelta(cm->sttype[v]) */
4961   int      sdl;             /* StateLeftDelta(cm->sttype[v] */
4962   int      sdr;             /* StateRightDelta(cm->sttype[v] */
4963   int      j_sdr;           /* j - sdr */
4964   int      have_el;         /* TRUE if local ends are on in the CM, otherwise FALSE */
4965 
4966   /* indices used for handling band-offset issues, and in the depths of the DP recursion */
4967   int      ip_v;               /* offset i index for state v */
4968   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
4969   int      jp_y_sdr;           /* jp_y - sdr */
4970   int      jn, jx;             /* current minimum/maximum j allowed */
4971   int      jpn, jpx;           /* minimum/maximum jp_v */
4972   int      dp_y_sd;            /* dp_y - sd */
4973   int      dp_y_sdl;           /* dp_y - sdl */
4974   int      dp_y_sdr;           /* dp_y - sdr */
4975   int      dp_v, dp_y, dp_z;   /* d index for state v,y,z in HMM banded matrix */
4976   int      dn, dx;             /* current minimum/maximum d allowed */
4977   int      dpn, dpx;           /* minimum/maximum dp_v */
4978   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
4979   int      kn, kx;             /* current minimum/maximum k value */
4980   int     *yvalidA = NULL;     /* [0..MAXCONNECT-1] TRUE if v->yoffset is legal transition (within bands) */
4981   int      yvalid_idx;         /* for keeping track of which children are valid */
4982   int      yvalid_ct;          /* for keeping track of which children are valid */
4983   int      Lp;                 /* L index also changes depending on state */
4984   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
4985   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
4986 
4987   /* other variables used in truncated version, but not standard version (not in cm_OptAccAlign()) */
4988   int   b = 0;		    /* best truncated entry state */
4989   int   fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
4990   int   pty_idx;            /* index for truncation penalty, determined by pass_idx */
4991   float trpenalty;          /* truncation penalty, differs based on pty_idx and if we're local or global */
4992   int   nins_v;             /* number of insert states reachable from current state */
4993   int   yctr;               /* used for special for(y) loops in TrOptAcc (see code) */
4994 
4995   /* variables related to truncated alignment (not in cm_OptAccAlignHB() */
4996   int      do_J_v, do_J_y, do_J_z; /* must we fill J matrix deck for state v, y, z? */
4997   int      do_L_v, do_L_y, do_L_z; /* must we fill L matrix deck for state v, y, z? */
4998   int      do_R_v, do_R_y, do_R_z; /* must we fill R matrix deck for state v, y, z? */
4999   int      do_T_v;                 /* must we fill T matrix deck for state v?       */
5000 
5001   /* variables used for memory efficient bands */
5002   /* ptrs to cp9b info, for convenience */
5003   CP9Bands_t *cp9b  = cm->cp9b;
5004   int        *imin  = cp9b->imin;
5005   int        *imax  = cp9b->imax;
5006   int        *jmin  = cp9b->jmin;
5007   int        *jmax  = cp9b->jmax;
5008   int       **hdmin = cp9b->hdmin;
5009   int       **hdmax = cp9b->hdmax;
5010 
5011   /* the DP matrices */
5012   float ***Jalpha  = mx->Jdp; /* pointer to the Jalpha DP matrix */
5013   float ***Lalpha  = mx->Ldp; /* pointer to the Lalpha DP matrix */
5014   float ***Ralpha  = mx->Rdp; /* pointer to the Ralpha DP matrix */
5015   float ***Talpha  = mx->Tdp; /* pointer to the Talpha DP matrix */
5016 
5017   char  ***Jyshadow = shmx->Jyshadow; /* pointer to the Jyshadow matrix */
5018   char  ***Lyshadow = shmx->Lyshadow; /* pointer to the Lyshadow matrix */
5019   char  ***Ryshadow = shmx->Ryshadow; /* pointer to the Ryshadow matrix */
5020   int   ***Jkshadow = shmx->Jkshadow; /* pointer to the Jkshadow matrix */
5021   int   ***Lkshadow = shmx->Lkshadow; /* pointer to the Lkshadow matrix */
5022   int   ***Rkshadow = shmx->Rkshadow; /* pointer to the Rkshadow matrix */
5023   int   ***Tkshadow = shmx->Tkshadow; /* pointer to the Tkshadow matrix */
5024   char  ***Lkmode   = shmx->Lkmode;   /* pointer to the Lkmode matrix */
5025   char  ***Rkmode   = shmx->Rkmode;   /* pointer to the Rkmode matrix */
5026 
5027   float  **Jl_pp    = emit_mx->Jl_pp; /* pointer to the prefilled posterior values for left  emitters in Joint mode */
5028   float  **Ll_pp    = emit_mx->Ll_pp; /* pointer to the prefilled posterior values for left  emitters in Left  mode */
5029   float  **Jr_pp    = emit_mx->Jr_pp; /* pointer to the prefilled posterior values for right emitters in Joint mode */
5030   float  **Rr_pp    = emit_mx->Rr_pp; /* pointer to the prefilled posterior values for right emitters in Right mode */
5031 
5032   /* Determine which matrices we need to fill in, based on <preset_mode> */
5033   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrOptAccAlignHB(), bogus mode: %d", preset_mode);
5034 
5035   /* Determine the truncation penalty index, from the pass_idx */
5036   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrOptAccAlignHB(), unexpected pass idx: %d", pass_idx);
5037 
5038   /* Allocations and initializations  */
5039   /* In OptAcc <preset_mode> must be known, ensure a full alignment to ROOT_S (v==0) in the optimal mode is allowed by the bands */
5040   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): preset_mode is not J, L, R, or T");
5041   if (preset_mode == TRMODE_J && (! cp9b->Jvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): preset_mode is J mode, but cp9b->Jvalid[v] is FALSE");
5042   if (preset_mode == TRMODE_L && (! cp9b->Lvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): preset_mode is L mode, but cp9b->Lvalid[v] is FALSE");
5043   if (preset_mode == TRMODE_R && (! cp9b->Rvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): preset_mode is R mode, but cp9b->Rvalid[v] is FALSE");
5044   if (preset_mode == TRMODE_T && (! cp9b->Tvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): preset_mode is T mode, but cp9b->Tvalid[v] is FALSE");
5045   if (cp9b->jmin[0] > L || cp9b->jmax[0] < L)             ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cp9b->jmin[0], cp9b->jmax[0]);
5046   jp_0 = L - jmin[0];
5047   if (cp9b->hdmin[0][jp_0] > L || cp9b->hdmax[0][jp_0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOptAccAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cp9b->hdmin[0][jp_0], cp9b->hdmax[0][jp_0]);
5048   Lp_0 = L - hdmin[0][jp_0];
5049 
5050   /* grow the matrices based on the current sequence and bands */
5051   if((status = cm_tr_hb_mx_GrowTo       (cm, mx,   errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
5052   if((status = cm_tr_hb_shadow_mx_GrowTo(cm, shmx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
5053 
5054   /* initialize all cells of the matrix to IMPOSSIBLE, all cells of shadow matrix to USED_EL or USED_TRUNC_END */
5055   if(  mx->Jncells_valid   > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
5056   if(  mx->Lncells_valid   > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
5057   if(  mx->Rncells_valid   > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
5058   if(  mx->Tncells_valid   > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
5059   if(shmx->Jy_ncells_valid > 0)           for(i = 0; i < shmx->Jy_ncells_valid; i++) shmx->Jyshadow_mem[i] = USED_EL;
5060   if(shmx->Ly_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Ly_ncells_valid; i++) shmx->Lyshadow_mem[i] = USED_EL;
5061   if(shmx->Ry_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Ry_ncells_valid; i++) shmx->Ryshadow_mem[i] = USED_EL;
5062   /* for B states, shadow matrix holds k, length of right fragment, this will be overwritten */
5063   if(shmx->Jk_ncells_valid > 0)           esl_vec_ISet(shmx->Jkshadow_mem, shmx->Jk_ncells_valid, 0);
5064   if(shmx->Lk_ncells_valid > 0 && fill_L) esl_vec_ISet(shmx->Lkshadow_mem, shmx->Lk_ncells_valid, 0);
5065   if(shmx->Rk_ncells_valid > 0 && fill_R) esl_vec_ISet(shmx->Rkshadow_mem, shmx->Rk_ncells_valid, 0);
5066   if(shmx->Tk_ncells_valid > 0 && fill_T) esl_vec_ISet(shmx->Tkshadow_mem, shmx->Tk_ncells_valid, 0);
5067   if(shmx->Lk_ncells_valid > 0 && fill_L) for(i = 0; i < shmx->Lk_ncells_valid; i++) shmx->Lkmode_mem[i] = TRMODE_J;
5068   if(shmx->Rk_ncells_valid > 0 && fill_R) for(i = 0; i < shmx->Rk_ncells_valid; i++) shmx->Rkmode_mem[i] = TRMODE_J;
5069 
5070   /* a special optimal accuracy specific step, initialize Jyshadow intelligently for d == 0
5071    * (necessary b/c zero length parsetees have 0 emits and so always score IMPOSSIBLE)
5072    */
5073   if((status = cm_InitializeOptAccShadowDZeroHB(cm, cp9b, errbuf, Jyshadow, L)) != eslOK) return status;
5074 
5075   /* start with the EL state (remember the EL deck is non-banded) */
5076   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
5077   if(have_el) {
5078     do_J_v = (cp9b->Jvalid[cm->M] && Jl_pp[cm->M] != NULL)           ? TRUE : FALSE;
5079     do_L_v = (cp9b->Lvalid[cm->M] && Ll_pp[cm->M] != NULL && fill_L) ? TRUE : FALSE;
5080     do_R_v = (cp9b->Rvalid[cm->M] && Rr_pp[cm->M] != NULL && fill_R) ? TRUE : FALSE;
5081     for (j = 0; j <= L; j++) {
5082       if(do_J_v) Jalpha[cm->M][j][0] = Jl_pp[cm->M][0];
5083       if(do_L_v) Lalpha[cm->M][j][0] = Ll_pp[cm->M][0];
5084       if(do_R_v) Ralpha[cm->M][j][0] = Rr_pp[cm->M][0];
5085       if(do_J_v) {
5086 	i = j;
5087 	for (d = 1; d <= j; d++) Jalpha[cm->M][j][d] = FLogsum(Jalpha[cm->M][j][d-1], Jl_pp[cm->M][i--]);
5088       }
5089       if(do_L_v) {
5090 	i = j;
5091 	for (d = 1; d <= j; d++) Lalpha[cm->M][j][d] = FLogsum(Lalpha[cm->M][j][d-1], Ll_pp[cm->M][i--]);
5092       }
5093       if(do_R_v) {
5094 	i = j;
5095 	for (d = 1; d <= j; d++) Ralpha[cm->M][j][d] = FLogsum(Ralpha[cm->M][j][d-1], Rr_pp[cm->M][i--]);
5096       }
5097     }
5098   }
5099 
5100   /* yvalidA[0..cnum[v]] will hold TRUE for states y for which a transition is legal
5101    * (some transitions are impossible due to the bands)
5102    */
5103   ESL_ALLOC(yvalidA, sizeof(int) * MAXCONNECT);
5104   esl_vec_ISet(yvalidA, MAXCONNECT, FALSE);
5105 
5106   /* Main recursion */
5107   for (v = cm->M-1; v > 0; v--) { /* almost to ROOT_S, we handle that differently */
5108     sd     = StateDelta(cm->sttype[v]);
5109     sdl    = StateLeftDelta(cm->sttype[v]);
5110     sdr    = StateRightDelta(cm->sttype[v]);
5111     nins_v = NumReachableInserts(cm->stid[v]);
5112     do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
5113     do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
5114     do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
5115     do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
5116 
5117     /* re-initialize if we can do a local end from v,
5118      * copy values from saved EL deck.
5119      * shadow values remain as initialized: USED_EL
5120      */
5121     if(have_el && NOT_IMPOSSIBLE(cm->endsc[v])) {
5122       if(do_J_v && cp9b->Jvalid[cm->M]) {
5123 	for (j = jmin[v]; j <= jmax[v]; j++) {
5124 	  jp_v  = j - jmin[v];
5125 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
5126 	    dp_v = d - hdmin[v][jp_v];
5127 	    Jalpha[v][jp_v][dp_v] = Jalpha[cm->M][j-sdr][d-sd];
5128 	  }
5129 	}
5130       }
5131       if(do_L_v && cp9b->Lvalid[cm->M]) {
5132 	for (j = jmin[v]; j <= jmax[v]; j++) {
5133 	  jp_v  = j - jmin[v];
5134 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
5135 	    dp_v = d - hdmin[v][jp_v];
5136 	    Lalpha[v][jp_v][dp_v] = Lalpha[cm->M][j][d-sdl];
5137 	  }
5138 	}
5139       }
5140       if(do_R_v && cp9b->Rvalid[cm->M]) {
5141 	for (j = jmin[v]; j <= jmax[v]; j++) {
5142 	  jp_v  = j - jmin[v];
5143 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
5144 	    dp_v = d - hdmin[v][jp_v];
5145 	    Ralpha[v][jp_v][dp_v] = Ralpha[cm->M][j-sdr][d-sdr];
5146 	  }
5147 	}
5148       }
5149     }
5150     /* note there's no E state update here, those cells all remain IMPOSSIBLE */
5151 
5152     if(cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) {
5153       /* update alpha[v][j][d] cells, for IL and ML states, loop
5154        * nesting order is: for j { for d { for y { } } } because they
5155        * can self transit, and a alpha[v][j][d] cell must be complete
5156        * (that is we must have looked at all children y) before can
5157        * start calc'ing for alpha[v][j][d+1] We do ML states as well
5158        * as IL states b/c they follow the same rules. We could possibly
5159        * separate them out and get a small speedup, but I don't think
5160        * it's worth further complicating the code.
5161        */
5162 
5163       /* In TrCYK: we need to treat R differently from and J and L
5164        * here, by doing separate 'for (d...' loops for J and R
5165        * because we have to fully calculate Jalpha[v][j][d]) before we
5166        * can start to calculate Ralpha[v][j][d].
5167        */
5168       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
5169 	if(do_J_v || do_L_v || do_R_v) {
5170 	  for (j = jmin[v]; j <= jmax[v]; j++) {
5171 	    jp_v = j - jmin[v];
5172 
5173 	    /* determine which children y we can legally transit to for v, j in J and L mode */
5174 	    yvalid_ct = 0;
5175 	    for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
5176 	      yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
5177 	      y = cm->cfirst[v] + yoffset;
5178 	      if(j >= jmin[y] && j <= jmax[y]) yvalidA[yvalid_ct++] = yoffset; /* is j valid for state y? */
5179 	    }
5180 
5181 	    if(do_J_v || do_L_v) {
5182 	      i = j - hdmin[v][jp_v] + 1;
5183 	      for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++, i--) { /* for each valid d for v, j */
5184 		assert(i >= imin[v] && i <= imax[v]);
5185 		ESL_DASSERT1((i >= imin[v] && i <= imax[v]));
5186 		ip_v = i - imin[v];         /* i index for state v in emit_mx->{J,L}l_pp */
5187 		dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
5188 
5189 		for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
5190 		  yoffset = yvalidA[yvalid_idx];
5191 		  y = cm->cfirst[v] + yoffset;
5192 		  jp_y = j - jmin[y];
5193 		  do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5194 		  do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
5195 
5196 		  if(do_J_y || do_L_y) {
5197 		    if((d-sd) >= hdmin[y][jp_y] && (d-sd) <= hdmax[y][jp_y]) { /* make sure d is valid for this v, j and y */
5198 		      dp_y_sd = d - hdmin[y][jp_y] - sd;
5199 		      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5200 		      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5201 		      if(do_J_v && do_J_y) {
5202 			if ((sc = Jalpha[y][jp_y][dp_y_sd]) > Jalpha[v][jp_v][dp_v]) {
5203 			  Jalpha[v][jp_v][dp_v]   = sc;
5204 			  Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5205 			}
5206 		      }
5207 		      if(do_L_v && do_L_y) {
5208 			if ((sc = Lalpha[y][jp_y][dp_y_sd]) > Lalpha[v][jp_v][dp_v]) {
5209 			  Lalpha[v][jp_v][dp_v]   = sc;
5210 			  Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
5211 			}
5212 		      }
5213 		    }
5214 		  }
5215 		} /* end of 'for (yvalid_idx = 0'... */
5216 		/* add emission PP */
5217 		if(do_J_v) {
5218 		  Jalpha[v][jp_v][dp_v]  = FLogsum(Jalpha[v][jp_v][dp_v], Jl_pp[v][ip_v]);
5219 		  Jalpha[v][jp_v][dp_v]  = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
5220 		  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
5221 		   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5222 		   */
5223 		  if((! have_el) && Jyshadow[v][jp_v][dp_v] == USED_EL && d > sd) {
5224 		    Jalpha[v][jp_v][dp_v] = IMPOSSIBLE;
5225 		  }
5226 		}
5227 		if(do_L_v) {
5228 		  if(d >= 2) {
5229 		    Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Ll_pp[v][ip_v]);
5230 		    /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions *
5231 		     * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5232 		     */
5233 		    if((! have_el) && Lyshadow[v][jp_v][dp_v] == USED_EL) {
5234 		      Lalpha[v][jp_v][dp_v] = IMPOSSIBLE;
5235 		    }
5236 		  }
5237 		  else {
5238 		    Lalpha[v][jp_v][dp_v]   = Ll_pp[v][ip_v]; /* actually I think this will give the same value as d >= 2 case above */
5239 		    Lyshadow[v][jp_v][dp_v] = USED_TRUNC_END;
5240 		  }
5241 		  Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
5242 		}
5243 	      }
5244 	    }
5245 	    /* handle R separately */
5246 	    if(do_R_v) {
5247 	      for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
5248 		dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
5249 
5250 		for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
5251 		  yoffset = yvalidA[yvalid_idx];
5252 		  y = cm->cfirst[v] + yoffset;
5253 		  jp_y = j - jmin[y];
5254 		  do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5255 		  do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
5256 
5257 		  if((do_J_y || do_R_y) && (y != v)) { /* (y != v) part is to disallow IL self transits in R mode */
5258 		    /* note we use 'd', not 'd_sd' (which we used in the corresponding loop for J,L above) */
5259 		    if(d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]) { /* make sure d is valid for this v, j and y */
5260 		      dp_y = d - hdmin[y][jp_y];
5261 		      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5262 		      if(do_J_y) {
5263 			if ((sc = Jalpha[y][jp_y][dp_y]) > Ralpha[v][jp_v][dp_v]) {
5264 			  Ralpha[v][jp_v][dp_v]   = sc;
5265 			  Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5266 			}
5267 		      }
5268 		      if(do_R_y) {
5269 			if ((sc = Ralpha[y][jp_y][dp_y]) > Ralpha[v][jp_v][dp_v]) {
5270 			  Ralpha[v][jp_v][dp_v]   = sc;
5271 			  Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
5272 			}
5273 		      }
5274 		    }
5275 		  }
5276 		}
5277 		/* no residue was emitted if we're in R mode */
5278 		Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
5279 	      }
5280 	    }
5281 	  } /* end of for j loop */
5282 	}
5283       } /* end of if(! StateIsDetached(cm,v )) */
5284     } /* end of if IL/ML state */
5285     else if(cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) {
5286       /* update alpha[v][j][d] cells, for IR and MR states, loop
5287        * nesting order is: for j { for d { for y { } } } because they
5288        * can self transit, and a alpha[v][j][d] cell must be complete
5289        * (that is we must have looked at all children y) before can
5290        * start calc'ing for alpha[v][j][d+1].  We do MR states as well
5291        * as IR states here b/c they follow the same rules. We could
5292        * possibly separate them out and get a small speedup, but I
5293        * don't think it's worth further complicating the code.  and
5294        * we're not worried about efficiency here.
5295        */
5296 
5297       /* In TrCYK: we need to treat L differently from and J and R
5298        * here, by doing separate 'for (d..' loops for J and R
5299        * because we have to fully calculate Jalpha[v][j][d]) before we
5300        * can start to calculate Lalpha[v][j][d].
5301        */
5302 
5303       if(! StateIsDetached(cm, v)) { /* if we're detached (unreachable), leave all {J,L,R}alpha values as they were initialized, as IMPOSSIBLE */
5304 	if(do_J_v || do_L_v || do_R_v) {
5305 	  for (j = jmin[v]; j <= jmax[v]; j++) {
5306 	    jp_v = j - jmin[v];
5307 	    j_sdr = j - sdr;
5308 
5309 	    /* determine which children y we can legally transit to for v, j in J and R mode */
5310 	    yvalid_ct = 0;
5311 	    for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
5312 	      yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
5313 	      y = cm->cfirst[v] + yoffset;
5314 	      if((j_sdr) >= jmin[y] && ((j_sdr) <= jmax[y])) yvalidA[yvalid_ct++] = yoffset; /* is j-sdr valid for state y? */
5315 	    }
5316 
5317 	    if(do_J_v || do_R_v) {
5318 	      for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
5319 		dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
5320 		for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
5321 		  yoffset = yvalidA[yvalid_idx];
5322 		  y = cm->cfirst[v] + yoffset;
5323 		  jp_y_sdr = j - jmin[y] - sdr;
5324 		  do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5325 		  do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
5326 
5327 		  if(do_J_y || do_R_y) {
5328 		    if((d-sd) >= hdmin[y][jp_y_sdr] && (d-sd) <= hdmax[y][jp_y_sdr]) { /* make sure d is valid for this v, j and y */
5329 		      dp_y_sd = d - hdmin[y][jp_y_sdr] - sd;
5330 		      ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
5331 		      ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
5332 		      if(do_J_v && do_J_y) {
5333 			if ((sc = Jalpha[y][jp_y_sdr][dp_y_sd]) > Jalpha[v][jp_v][dp_v]) {
5334 			  Jalpha[v][jp_v][dp_v]   = sc;
5335 			  Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5336 			}
5337 		      }
5338 		      if(do_R_v && do_R_y) {
5339 			if ((sc = Ralpha[y][jp_y_sdr][dp_y_sd]) > Ralpha[v][jp_v][dp_v]) {
5340 			  Ralpha[v][jp_v][dp_v]   = sc;
5341 			  Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
5342 			}
5343 		      }
5344 		    }
5345 		  }
5346 		} /* end of 'for (yvalid_idx = 0'... */
5347 		/* add emission PP */
5348 		if(do_J_v) {
5349 		  Jalpha[v][jp_v][dp_v]  = FLogsum(Jalpha[v][jp_v][dp_v], Jr_pp[v][jp_v]);
5350 		  Jalpha[v][jp_v][dp_v]  = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
5351 		  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
5352 		   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5353 		   */
5354 		  if((! have_el) && Jyshadow[v][jp_v][dp_v] == USED_EL && d > sd) {
5355 		    Jalpha[v][jp_v][dp_v] = IMPOSSIBLE;
5356 		  }
5357 		}
5358 		if(do_R_v) {
5359 		  if(d >= 2) {
5360 		    Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Rr_pp[v][jp_v]);
5361 		    /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
5362 		     * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5363 		     */
5364 		    if((! have_el) && Ryshadow[v][jp_v][dp_v] == USED_EL) {
5365 		      Ralpha[v][jp_v][dp_v] = IMPOSSIBLE;
5366 		    }
5367 		  }
5368 		  else {
5369 		    Ralpha[v][jp_v][dp_v]   = Rr_pp[v][jp_v]; /* actually I think this will give the same value as d >= 2 case above */
5370 		    Ryshadow[v][jp_v][dp_v] = USED_TRUNC_END;
5371 		  }
5372 		  Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
5373 		}
5374 	      } /* end of for(d... */
5375 	    } /* end of if(do_J_v || do_R_v) */
5376 	    /* handle L separately */
5377 	    if(do_L_v) {
5378 	      /* determine which children y we can legally transit to for v, j, this is different for L, b/c j is different,
5379 	       * note we use 'j' and not 'j_sdr' because IR and MR are silent in L marginal mode
5380 	       */
5381 	      yvalid_ct = 0;
5382 	      for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
5383 		yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
5384 		y = cm->cfirst[v] + yoffset;
5385 		if(j >= jmin[y] && j <= jmax[y]) yvalidA[yvalid_ct++] = yoffset; /* is j valid for state y? */
5386 	      }
5387 
5388 	      for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) { /* for each valid d for v, j */
5389 		dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha */
5390 		for (yvalid_idx = 0; yvalid_idx < yvalid_ct; yvalid_idx++) { /* for each valid child y, for v, j */
5391 		  yoffset = yvalidA[yvalid_idx];
5392 		  y = cm->cfirst[v] + yoffset;
5393 		  jp_y   = j - jmin[y];
5394 		  do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5395 		  do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
5396 
5397 		  /* note we use 'd' and not 'd-sd' below because IR/MR are silent in L marginal mode */
5398 		  if((do_J_y || do_L_y) && (y != v)) { /* (y != v) part is to disallow IR self transits in L mode */
5399 		    if(d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]) { /* make sure d is valid for this v, j and y */
5400 		      dp_y = d - hdmin[y][jp_y] ;
5401 		      ESL_DASSERT1((dp_v >= 0 && dp_v  <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5402 		      ESL_DASSERT1((dp_y >= 0 && dp_y  <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5403 		      if(do_J_y) {
5404 			if ((sc = Jalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v]) {
5405 			  Lalpha[v][jp_v][dp_v]   = sc;
5406 			  Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5407 			}
5408 		      }
5409 		      if(do_L_y) {
5410 			if ((sc = Lalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v]) {
5411 			  Lalpha[v][jp_v][dp_v]   = sc;
5412 			  Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
5413 			}
5414 		      }
5415 		    }
5416 		  }
5417 		} /* end of 'for (yvalid_idx = 0'... */
5418 		/* no residue was emitted if we're in L mode */
5419 		Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
5420 	      }
5421 	    }
5422 	  }
5423 	}
5424       } /* end of if(! StateIsDetached(cm, v)) */
5425     } /* end of if IR/MR state */
5426     else if(cm->sttype[v] == MP_st) {
5427       /* MP states cannot self transit, this means that all cells in
5428        * alpha[v] are independent of each other, only depending on
5429        * alpha[y] for previously calc'ed y.  We can do the for loops
5430        * in any nesting order, this implementation does what I think
5431        * is most efficient: for y { for j { for d { } } }
5432        */
5433       if(do_J_v || do_L_v || do_R_v) {
5434 	for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
5435 	  yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
5436 	  y = cm->cfirst[v] + yoffset;
5437 	  do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5438 	  do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
5439 	  do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
5440 
5441 	  if(do_J_v && do_J_y) {
5442 	    jn = ESL_MAX(jmin[v], jmin[y]+sdr);
5443 	    jx = ESL_MIN(jmax[v], jmax[y]+sdr);
5444 	    jpn = jn - jmin[v];
5445 	    jpx = jx - jmin[v];
5446 	    jp_y_sdr = jn - jmin[y] - sdr;
5447 	    for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
5448 	      ESL_DASSERT1((jp_v >= 0     && jp_v     <= (jmax[v]-jmin[v])));
5449 	      ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
5450 
5451 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sd);
5452 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sd);
5453 	      dpn     = dn - hdmin[v][jp_v];
5454 	      dpx     = dx - hdmin[v][jp_v];
5455 	      dp_y_sd = dn - hdmin[y][jp_y_sdr] - sd;
5456 
5457 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sd++) {
5458 		ESL_DASSERT1((dp_v    >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
5459 		ESL_DASSERT1((dp_y_sd >= 0 && dp_y_sd  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
5460 		if((sc = Jalpha[y][jp_y_sdr][dp_y_sd]) > Jalpha[v][jp_v][dp_v]) {
5461 		  Jalpha[v][jp_v][dp_v]   = sc;
5462 		  Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5463 		}
5464 	      }
5465 	    }
5466 	  }
5467 	  if(do_L_v && (do_J_y || do_L_y)) {
5468 	    /* note we use 'j' and 'd_sdl' not 'j_sdr' for 'd_sd' for L */
5469 	    jn = ESL_MAX(jmin[v], jmin[y]);
5470 	    jx = ESL_MIN(jmax[v], jmax[y]);
5471 	    jpn = jn - jmin[v];
5472 	    jpx = jx - jmin[v];
5473 	    jp_y = jn - jmin[y];
5474 	    for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y++) {
5475 	      ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
5476 	      ESL_DASSERT1((jp_y >= 0 && jp_y <= (jmax[y]-jmin[y])));
5477 
5478 	      dn  = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y] + sdl);
5479 	      dx  = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y] + sdl);
5480 	      dpn = dn - hdmin[v][jp_v];
5481 	      dpx = dx - hdmin[v][jp_v];
5482 
5483 	      if (do_J_y) {
5484 		dp_y_sdl = dn - hdmin[y][jp_y] - sdl;
5485 		for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdl++) {
5486 		  ESL_DASSERT1((dp_v     >= 0 && dp_v     <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5487 		  ESL_DASSERT1((dp_y_sdl >= 0 && dp_y_sdl <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5488 		  if((sc = Jalpha[y][jp_y][dp_y_sdl]) > Lalpha[v][jp_v][dp_v]) {
5489 		    Lalpha[v][jp_v][dp_v]   = sc;
5490 		    Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5491 		  }
5492 		}
5493 	      }
5494 	      if (do_L_y) {
5495 		dp_y_sdl = dn - hdmin[y][jp_y] - sdl;
5496 		for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdl++) {
5497 		  ESL_DASSERT1((dp_v     >= 0 && dp_v     <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5498 		  ESL_DASSERT1((dp_y_sdl >= 0 && dp_y_sdl <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5499 		  if((sc = Lalpha[y][jp_y][dp_y_sdl]) > Lalpha[v][jp_v][dp_v]) {
5500 		    Lalpha[v][jp_v][dp_v]   = sc;
5501 		    Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
5502 		  }
5503 		}
5504 	      }
5505 	    }
5506 	  }
5507 	  if(do_R_v && (do_J_y || do_R_y)) {
5508 	    /* note we use 'd_sdr' not 'd_sd' for R, plus minimum d is sdr (1) */
5509 	    jn = ESL_MAX(jmin[v], jmin[y]+sdr);
5510 	    jx = ESL_MIN(jmax[v], jmax[y]+sdr);
5511 	    jpn = jn - jmin[v];
5512 	    jpx = jx - jmin[v];
5513 	    jp_y_sdr = jn - jmin[y] - sdr;
5514 	    for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y_sdr++) {
5515 	      ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
5516 	      ESL_DASSERT1((jp_y_sdr >= 0 && jp_y_sdr <= (jmax[y]-jmin[y])));
5517 
5518 	      dn  = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y_sdr] + sdr);
5519 	      dx  = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y_sdr] + sdr);
5520 	      dpn = dn - hdmin[v][jp_v];
5521 	      dpx = dx - hdmin[v][jp_v];
5522 
5523 	      if (do_J_y) {
5524 		dp_y_sdr = dn - hdmin[y][jp_y_sdr] - sdr;
5525 		for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdr++) {
5526 		  ESL_DASSERT1((dp_v     >= 0 && dp_v      <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
5527 		  ESL_DASSERT1((dp_y_sdr >= 0 && dp_y_sdr  <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
5528 		  if((sc = Jalpha[y][jp_y_sdr][dp_y_sdr]) > Ralpha[v][jp_v][dp_v]) {
5529 		    Ralpha[v][jp_v][dp_v]   = sc;
5530 		    Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5531 		  }
5532 		}
5533 	      }
5534 	      if (do_R_y) {
5535 		dp_y_sdr = dn - hdmin[y][jp_y_sdr] - sdr;
5536 		for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y_sdr++) {
5537 		  ESL_DASSERT1((dp_v     >= 0 && dp_v     <= (hdmax[v][jp_v]     - hdmin[v][jp_v])));
5538 		  ESL_DASSERT1((dp_y_sdr >= 0 && dp_y_sdr <= (hdmax[y][jp_y_sdr] - hdmin[y][jp_y_sdr])));
5539 		  if((sc = Ralpha[y][jp_y_sdr][dp_y_sdr]) > Ralpha[v][jp_v][dp_v]) {
5540 		    Ralpha[v][jp_v][dp_v]   = sc;
5541 		    Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
5542 		  }
5543 		}
5544 	      }
5545 	    }
5546 	  }
5547 	}
5548       }
5549       /* add in emission score */
5550       if(do_J_v) {
5551 	for (j = jmin[v]; j <= jmax[v]; j++) {
5552 	  jp_v  = j - jmin[v];
5553 	  i     = j - hdmin[v][jp_v] + 1;
5554 	  ip_v  = i - imin[v];
5555 	  for (d = hdmin[v][jp_v], dp_v = 0; d <= hdmax[v][jp_v]; d++, dp_v++, ip_v--) {
5556 	    if(d >= 2) {
5557 	      Jalpha[v][jp_v][dp_v] = FLogsum(Jalpha[v][jp_v][dp_v], FLogsum(Jl_pp[v][ip_v], Jr_pp[v][jp_v]));
5558 	    }
5559 	    else {
5560 	      Jalpha[v][jp_v][dp_v] = IMPOSSIBLE;
5561 	    }
5562 	  }
5563 	}
5564       }
5565       if(do_L_v) {
5566 	for (j = jmin[v]; j <= jmax[v]; j++) {
5567 	  jp_v  = j - jmin[v];
5568 	  i     = j - hdmin[v][jp_v] + 1;
5569 	  ip_v  = i - imin[v];
5570 	  for (d = hdmin[v][jp_v], dp_v = 0; d <= hdmax[v][jp_v]; d++, dp_v++, ip_v--) {
5571 	    if(d >= 2) {
5572 	      Lalpha[v][jp_v][dp_v] = FLogsum(Lalpha[v][jp_v][dp_v], Ll_pp[v][ip_v]);
5573 	    }
5574 	    else {
5575 	      Lalpha[v][jp_v][dp_v]   = Ll_pp[v][ip_v];
5576 	      Lyshadow[v][jp_v][dp_v] = USED_TRUNC_END;
5577 	    }
5578 	  }
5579 	}
5580       }
5581       if(do_R_v) {
5582 	for (j = jmin[v]; j <= jmax[v]; j++) {
5583 	  jp_v  = j - jmin[v];
5584 	  for (d = hdmin[v][jp_v], dp_v = 0; d <= hdmax[v][jp_v]; d++, dp_v++) {
5585 	    if(d >= 2) {
5586 	      Ralpha[v][jp_v][dp_v] = FLogsum(Ralpha[v][jp_v][dp_v], Rr_pp[v][jp_v]);
5587 	    }
5588 	    else {
5589 	      Ralpha[v][jp_v][dp_v]   = Rr_pp[v][jp_v];
5590 	      Ryshadow[v][jp_v][dp_v] = USED_TRUNC_END;
5591 	    }
5592 	  }
5593 	}
5594       }
5595       /* ensure all cells are >= IMPOSSIBLE */
5596       if(do_J_v) {
5597 	for (j = jmin[v]; j <= jmax[v]; j++) {
5598 	  jp_v  = j - jmin[v];
5599 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++, d++) {
5600 	    Jalpha[v][jp_v][dp_v] = ESL_MAX(Jalpha[v][jp_v][dp_v], IMPOSSIBLE);
5601 	  }
5602 	  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
5603 	   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5604 	   */
5605 	  if(! have_el) {
5606 	    d = hdmin[v][jp_v];
5607 	    for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
5608 	      if(Jyshadow[v][jp_v][dp_v] == USED_EL && d > sd) Jalpha[v][jp_v][dp_v] = IMPOSSIBLE;
5609 	      d++;
5610 	    }
5611 	  }
5612 	}
5613       }
5614       if(do_L_v) {
5615 	for (j = jmin[v]; j <= jmax[v]; j++) {
5616 	  jp_v  = j - jmin[v];
5617 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
5618 	    Lalpha[v][jp_v][dp_v] = ESL_MAX(Lalpha[v][jp_v][dp_v], IMPOSSIBLE);
5619 	  }
5620 	  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
5621 	   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5622 	   */
5623 	  if(! have_el) {
5624 	    d = hdmin[v][jp_v];
5625 	    for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
5626 	      if(Lyshadow[v][jp_v][dp_v] == USED_EL && d > sdl) Lalpha[v][jp_v][dp_v] = IMPOSSIBLE;
5627 	      d++;
5628 	    }
5629 	  }
5630 	}
5631       }
5632       if(do_R_v) {
5633 	for (j = jmin[v]; j <= jmax[v]; j++) {
5634 	  jp_v  = j - jmin[v];
5635 	  for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
5636 	    Ralpha[v][jp_v][dp_v] = ESL_MAX(Ralpha[v][jp_v][dp_v], IMPOSSIBLE);
5637 	  }
5638 	  /* special case if local ends are off: explicitly disallow transitions to EL that require EL emissions
5639 	   * (we do allow an 'illegal' transition to EL in OptAcc but only if no EL emissions are req'd)
5640 	   */
5641 	  if(! have_el) {
5642 	    d = hdmin[v][jp_v];
5643 	    for (dp_v = 0; dp_v <= (hdmax[v][jp_v] - hdmin[v][jp_v]); dp_v++) {
5644 	      if(Ryshadow[v][jp_v][dp_v] == USED_EL && d > sdr) Ralpha[v][jp_v][dp_v] = IMPOSSIBLE;
5645 	      d++;
5646 	    }
5647 	  }
5648 	}
5649       }
5650     }
5651     else if(cm->sttype[v] != B_st) { /* entered if state v is D or S */
5652       /* D, S states cannot self transit, this means that all cells in
5653        * alpha[v] are independent of each other, only depending on
5654        * alpha[y] for previously calc'ed y.  We can do the for loops
5655        * in any nesting order, this implementation does what I think
5656        * is most efficient: for y { for j { for d { } } }
5657        */
5658       if(do_J_v || do_L_v || do_R_v) {
5659 	for (yctr = 0; yctr < cm->cnum[v]; yctr++) {
5660 	  yoffset = (yctr + nins_v) % cm->cnum[v]; /* special y ordering for TrOptAcc, consider consensus state first, not inserts */
5661 	  y = cm->cfirst[v] + yoffset;
5662 	  do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5663 	  do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
5664 	  do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
5665 
5666 	  /*printf("v: %4d y: %4d yoffset: %4d\n", v, y, yoffset);*/
5667 	  if(do_J_v && do_J_y) {
5668 	    jn = ESL_MAX(jmin[v], jmin[y]);
5669 	    jx = ESL_MIN(jmax[v], jmax[y]);
5670 	    jpn = jn - jmin[v];
5671 	    jpx = jx - jmin[v];
5672 	    jp_y = jn - jmin[y];
5673 
5674 	    for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y++) {
5675 	      ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
5676 	      ESL_DASSERT1((jp_y >= 0 && jp_y <= (jmax[y]-jmin[y])));
5677 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
5678 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
5679 	      dpn  = dn - hdmin[v][jp_v];
5680 	      dpx  = dx - hdmin[v][jp_v];
5681 	      dp_y = dn - hdmin[y][jp_y];
5682 
5683 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y++) {
5684 		ESL_DASSERT1((dp_v >= 0 && dp_v  <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5685 		ESL_DASSERT1((dp_y >= 0 && dp_y  <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5686 		if((sc = Jalpha[y][jp_y][dp_y]) > Jalpha[v][jp_v][dp_v]) {
5687 		  Jalpha[v][jp_v][dp_v]   = sc;
5688 		  Jyshadow[v][jp_v][dp_v] = yoffset + TRMODE_J_OFFSET;
5689 		}
5690 	      }
5691 	    }
5692 	  }
5693 	  if(do_L_v && do_L_y) {
5694 	    jn = ESL_MAX(jmin[v], jmin[y]);
5695 	    jx = ESL_MIN(jmax[v], jmax[y]);
5696 	    jpn = jn - jmin[v];
5697 	    jpx = jx - jmin[v];
5698 	    jp_y = jn - jmin[y];
5699 
5700 	    for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y++) {
5701 	      ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
5702 	      ESL_DASSERT1((jp_y >= 0 && jp_y <= (jmax[y]-jmin[y])));
5703 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
5704 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
5705 	      dpn  = dn - hdmin[v][jp_v];
5706 	      dpx  = dx - hdmin[v][jp_v];
5707 	      dp_y = dn - hdmin[y][jp_y];
5708 
5709 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y++) {
5710 		ESL_DASSERT1((dp_v >= 0 && dp_v  <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5711 		ESL_DASSERT1((dp_y >= 0 && dp_y  <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5712 		if((sc = Lalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v]) {
5713 		  Lalpha[v][jp_v][dp_v]   = sc;
5714 		  Lyshadow[v][jp_v][dp_v] = yoffset + TRMODE_L_OFFSET;
5715 		}
5716 	      }
5717 	    }
5718 	  }
5719 	  /* a couple of special considerations for d == 0 */
5720 	  if(do_L_v) {
5721 	    jn = jmin[v];
5722 	    jx = jmax[v];
5723 	    jpn = jn - jmin[v];
5724 	    jpx = jx - jmin[v];
5725 	    for (jp_v = jpn; jp_v <= jpx; jp_v++) {
5726 	      /* an easy to overlook case: if d == 0, ensure L value is IMPOSSIBLE */
5727 	      if(hdmin[v][jp_v] == 0) {
5728 		Lalpha[v][jp_v][0] = IMPOSSIBLE;
5729 		/* And another special case for BEGL_S states,
5730 		 * reset shadow matrix values for d == 0 (which were
5731 		 * initialized to USED_EL above), even though the score of
5732 		 * these cells is impossible we may use them as a
5733 		 * zero-length left half of a BIF_B subtree during
5734 		 * construction of the parsetree.
5735 		 */
5736 		if(cm->sttype[v] == S_st) Lyshadow[v][jp_v][0] = USED_TRUNC_END;
5737 	      }
5738 	    }
5739 	  }
5740 	  if(do_R_v && do_R_y) {
5741 	    jn = ESL_MAX(jmin[v], jmin[y]);
5742 	    jx = ESL_MIN(jmax[v], jmax[y]);
5743 	    jpn = jn - jmin[v];
5744 	    jpx = jx - jmin[v];
5745 	    jp_y = jn - jmin[y];
5746 
5747 	    for (jp_v = jpn; jp_v <= jpx; jp_v++, jp_y++) {
5748 	      ESL_DASSERT1((jp_v >= 0 && jp_v <= (jmax[v]-jmin[v])));
5749 	      ESL_DASSERT1((jp_y >= 0 && jp_y <= (jmax[y]-jmin[y])));
5750 	      dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
5751 	      dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
5752 	      dpn  = dn - hdmin[v][jp_v];
5753 	      dpx  = dx - hdmin[v][jp_v];
5754 	      dp_y = dn - hdmin[y][jp_y];
5755 
5756 	      for (dp_v = dpn; dp_v <= dpx; dp_v++, dp_y++) {
5757 		ESL_DASSERT1((dp_v >= 0 && dp_v  <= (hdmax[v][jp_v] - hdmin[v][jp_v])));
5758 		ESL_DASSERT1((dp_y >= 0 && dp_y  <= (hdmax[y][jp_y] - hdmin[y][jp_y])));
5759 		if((sc = Ralpha[y][jp_y][dp_y]) > Ralpha[v][jp_v][dp_v]) {
5760 		  Ralpha[v][jp_v][dp_v]   = sc;
5761 		  Ryshadow[v][jp_v][dp_v] = yoffset + TRMODE_R_OFFSET;
5762 		}
5763 	      }
5764 	    }
5765 	  }
5766 	  /* a couple of special considerations for d == 0 */
5767 	  if(do_R_v) {
5768 	    jn = jmin[v];
5769 	    jx = jmax[v];
5770 	    jpn = jn - jmin[v];
5771 	    jpx = jx - jmin[v];
5772 	    for (jp_v = jpn; jp_v <= jpx; jp_v++) {
5773 	      /* an easy to overlook case: if d == 0, ensure R value is IMPOSSIBLE */
5774 	      if(hdmin[v][jp_v] == 0) {
5775 		Ralpha[v][jp_v][0] = IMPOSSIBLE;
5776 		/* And another special case for BEGL_S states,
5777 		 * reset shadow matrix values for d == 0 (which were
5778 		 * initialized to USED_EL above), even though the score of
5779 		 * these cells is impossible we may use them as a
5780 		 * zero-length left half of a BIF_B subtree during
5781 		 * construction of the parsetree.
5782 		 */
5783 		if(cm->sttype[v] == S_st) Ryshadow[v][jp_v][0] = USED_TRUNC_END;
5784 	      }
5785 	    }
5786 	  }
5787 	}
5788       }
5789       /* no emission score to add */
5790     } /* end of 'else if(cm->sttype[v] != B_st)' which is entered for S and D states */
5791     else { /* B_st */
5792       if(do_J_v || do_L_v || do_R_v || do_T_v) {
5793 	y = cm->cfirst[v]; /* left  subtree */
5794 	z = cm->cnum[v];   /* right subtree */
5795 
5796 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
5797 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
5798 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
5799 
5800 	do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
5801 	do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
5802 	do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
5803 
5804 	/* Any valid j must be within both state v and state z's j band
5805 	 * I think jmin[v] <= jmin[z] is guaranteed by the way bands are
5806 	 * constructed, but we'll check anyway.
5807 	 */
5808 	jn = (jmin[v] > jmin[z]) ? jmin[v] : jmin[z];
5809 	jx = (jmax[v] < jmax[z]) ? jmax[v] : jmax[z];
5810 	/* the main j loop */
5811 	for (j = jn; j <= jx; j++) {
5812 	  jp_v = j - jmin[v];
5813 	  jp_y = j - jmin[y];
5814 	  jp_z = j - jmin[z];
5815 	  kn = ((j-jmax[y]) > (hdmin[z][jp_z])) ? (j-jmax[y]) : hdmin[z][jp_z];
5816           kn = ESL_MAX(kn, 0); /* kn must be non-negative, added with fix to bug i36 */
5817 	  /* kn satisfies inequalities (1) and (3) (listed below)*/
5818 	  kx = ( jp_y       < (hdmax[z][jp_z])) ?  jp_y       : hdmax[z][jp_z];
5819 	  /* kn satisfies inequalities (2) and (4) (listed below)*/
5820 	  i = j - hdmin[v][jp_v] + 1;
5821 	  for (d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++, i--) {
5822 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
5823 
5824 	    /* Find the first k value that implies a valid cell in the {J,L,R} matrix y and z decks.
5825 	     * This k must satisfy the following 6 inequalities (some may be redundant):
5826 	     * (1) k >= j-jmax[y];
5827 	     * (2) k <= j-jmin[y];
5828 	     *     1 and 2 guarantee (j-k) is within state y's j band
5829 	     *
5830 	     * (3) k >= hdmin[z][j-jmin[z]];
5831 	     * (4) k <= hdmax[z][j-jmin[z]];
5832 	     *     3 and 4 guarantee k is within z's j=(j), d band
5833 	     *
5834 	     * (5) k >= d-hdmax[y][j-jmin[y]-k];
5835 	     * (6) k <= d-hdmin[y][j-jmin[y]-k];
5836 	     *     5 and 6 guarantee (d-k) is within state y's j=(j-k) d band
5837 	     *
5838 	     * kn and kx were set above (outside (for (dp_v...) loop) that
5839 	     * satisfy 1-4 (b/c 1-4 are d-independent and k-independent)
5840 	     * RHS of inequalities 5 and 6 are dependent on k, so we check
5841 	     * for these within the next for loop.
5842 	     *
5843 	     * To update a cell in the T matrix with a sum of an R matrix value for y
5844 	     * and a L matrix value for z, there are 2 additional inequalities to satisfy:
5845 	     * (7) k != 0
5846 	     * (8) k != d
5847 	     * We ensure 7 and 8 in the loop below.
5848 	     */
5849 	    for(k = kn; k <= kx; k++) {
5850 	      if((k >= d - hdmax[y][jp_y-k]) && k <= d - hdmin[y][jp_y-k]) {
5851 		/* for current k, all 6 inequalities have been satisified
5852 		 * so we know the cells corresponding to the platonic
5853 		 * matrix cells alpha[v][j][d], alpha[y][j-k][d-k], and
5854 		 * alpha[z][j][k] are all within the bands. These
5855 		 * cells correspond to alpha[v][jp_v][dp_v],
5856 		 * alpha[y][jp_y-k][d-hdmin[jp_y-k]-k],
5857 		 * and alpha[z][jp_z][k-hdmin[jp_z]];
5858 		 */
5859 		kp_z = k-hdmin[z][jp_z];
5860 		dp_y = d-hdmin[y][jp_y-k];
5861 		if(do_J_v && do_J_y && do_J_z &&
5862 		   (NOT_IMPOSSIBLE(Jalpha[y][jp_y-k][dp_y-k]) || d == k) && /* left  subtree is not-IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
5863 		   (NOT_IMPOSSIBLE(Jalpha[z][jp_z][kp_z])     || k == 0) && /* right subtree is not-IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
5864 		   (sc = FLogsum(Jalpha[y][jp_y-k][dp_y-k], Jalpha[z][jp_z][kp_z])) > Jalpha[v][jp_v][dp_v]) {
5865 		  Jalpha[v][jp_v][dp_v]   = sc;
5866 		  Jkshadow[v][jp_v][dp_v] = k;
5867 		}
5868 		if(do_L_v && do_J_y && do_L_z &&
5869 		   (NOT_IMPOSSIBLE(Jalpha[y][jp_y-k][dp_y-k]) || d == k) && /* left  subtree is not-IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
5870 		   (NOT_IMPOSSIBLE(Lalpha[z][jp_z][kp_z])     || k == 0) && /* right subtree is not-IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
5871 		   (sc = FLogsum(Jalpha[y][jp_y-k][dp_y-k], Lalpha[z][jp_z][kp_z])) > Lalpha[v][jp_v][dp_v]) {
5872 		  Lalpha[v][jp_v][dp_v]   = sc;
5873 		  Lkshadow[v][jp_v][dp_v] = k;
5874 		}
5875 		if(do_R_v && do_R_y && do_J_z &&
5876 		   (NOT_IMPOSSIBLE(Ralpha[y][jp_y-k][dp_y-k]) || d == k) && /* left  subtree is not-IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
5877 		   (NOT_IMPOSSIBLE(Jalpha[z][jp_z][kp_z])     || k == 0) && /* right subtree is not-IMPOSSIBLE or IMPOSSIBLE w/length 0 (latter is ok b/c only emits contribute to score) */
5878 		   (sc = FLogsum(Ralpha[y][jp_y-k][dp_y-k], Jalpha[z][jp_z][kp_z])) > Ralpha[v][jp_v][dp_v]) {
5879 		  Ralpha[v][jp_v][dp_v]   = sc;
5880 		  Rkshadow[v][jp_v][dp_v] = k;
5881 		}
5882 		if((k != 0) && (k != d)) { /* special boundary case for T matrix */
5883 		  if(do_T_v && do_R_y && do_L_z &&
5884 		     (NOT_IMPOSSIBLE(Ralpha[y][jp_y-k][dp_y-k])) &&  /* left  subtree is not-IMPOSSIBLE (no check for 'd==k' b/c of special T mx boundary case (see 3 lines up)) */
5885 		     (NOT_IMPOSSIBLE(Lalpha[z][jp_z][kp_z]))      && /* right subtree is not-IMPOSSIBLE (no check for 'k==0' b/c of special T mx boundary case (see 3 lines up)) */
5886 		     (sc = FLogsum(Ralpha[y][jp_y-k][dp_y-k], Lalpha[z][jp_z][kp_z])) > Talpha[v][jp_v][dp_v]) {
5887 		    Talpha[v][jp_v][dp_v]   = sc;
5888 		    Tkshadow[v][jp_v][dp_v] = k;
5889 		  }
5890 		}
5891 	      }
5892 	    }
5893 	  }
5894 	}
5895       }
5896       /* two additional special cases in trCYK (these are not in standard CYK).
5897        * we do these in their own for(j.. { for(d.. { } } loops b/c one
5898        * is independent of z, the other of y, unlike the above loop which is dependent
5899        * on both.
5900        */
5901       if(do_L_v && (do_J_y || do_L_y)) {
5902 	jn = ESL_MAX(jmin[v], jmin[y]);
5903 	jx = ESL_MIN(jmax[v], jmax[y]);
5904 	for (j = jn; j <= jx; j++) {
5905 	  jp_v = j - jmin[v];
5906 	  jp_y = j - jmin[y];
5907 	  ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
5908 	  ESL_DASSERT1((j >= jmin[y] && j <= jmax[y]));
5909 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
5910 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
5911 	  for(d = dn; d <= dx; d++) {
5912 	    dp_v = d - hdmin[v][jp_v];
5913 	    dp_y = d - hdmin[y][jp_y];
5914 	    ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
5915 	    ESL_DASSERT1((d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]));
5916 	    if(do_J_y && (sc = Jalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v]) {
5917 	      Lalpha[v][jp_v][dp_v]   = sc;
5918 	      Lkshadow[v][jp_v][dp_v] = 0; /* k == 0 for this case, full sequence is on left */
5919 	      Lkmode[v][jp_v][dp_v]   = TRMODE_J;
5920 	    }
5921 	    if(do_L_y && (sc = Lalpha[y][jp_y][dp_y]) > Lalpha[v][jp_v][dp_v]) {
5922 	      Lalpha[v][jp_v][dp_v]   = sc;
5923 	      Lkshadow[v][jp_v][dp_v] = 0; /* k == 0 for this case, full sequence is on left */
5924 	      Lkmode[v][jp_v][dp_v]   = TRMODE_L;
5925 	    }
5926 	  }
5927 	}
5928       }
5929       if(do_R_v && (do_J_z || do_R_z)) {
5930 	jn = ESL_MAX(jmin[v], jmin[z]);
5931 	jx = ESL_MIN(jmax[v], jmax[z]);
5932 	for (j = jn; j <= jx; j++) {
5933 	  jp_v = j - jmin[v];
5934 	  jp_z = j - jmin[z];
5935 	  ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
5936 	  ESL_DASSERT1((j >= jmin[z] && j <= jmax[z]));
5937 	  dn = ESL_MAX(hdmin[v][jp_v], hdmin[z][jp_z]);
5938 	  dx = ESL_MIN(hdmax[v][jp_v], hdmax[z][jp_z]);
5939 	  for(d = dn; d <= dx; d++) {
5940 	    dp_v = d - hdmin[v][jp_v];
5941 	    dp_z = d - hdmin[z][jp_z];
5942 	    ESL_DASSERT1((d >= hdmin[v][jp_v] && d <= hdmax[v][jp_v]));
5943 	    ESL_DASSERT1((d >= hdmin[z][jp_z] && d <= hdmax[z][jp_z]));
5944 	    if(do_J_z && (sc = Jalpha[z][jp_z][dp_z]) > Ralpha[v][jp_v][dp_v]) {
5945 	      Ralpha[v][jp_v][dp_v]   = sc;
5946 	      Rkshadow[v][jp_v][dp_v] = d; /* k == d for this case, full sequence is on right */
5947 	      Rkmode[v][jp_v][dp_v]   = TRMODE_J;
5948 	    }
5949 	    if(do_R_z && (sc = Ralpha[z][jp_z][dp_z]) > Ralpha[v][jp_v][dp_v]) {
5950 	      Ralpha[v][jp_v][dp_v]   = sc;
5951 	      Rkshadow[v][jp_v][dp_v] = d; /* k == d for this case, full sequence is on right */
5952 	      Rkmode[v][jp_v][dp_v]   = TRMODE_R;
5953 	    }
5954 	  }
5955 	}
5956       }
5957     } /* end of 'else' that is entered if v is a B st */
5958 
5959     /* Now handle from ROOT_S, state 0. So far we haven't touched
5960      * the {J,L,R,T}alpha[0] decks at all since initialization and here
5961      * we'll only update at most 1 cell in each, the one pertaining
5962      * to a full alignment [0][L][L].
5963      *
5964      * In truncated alignment the only way out of ROOT_S in local or
5965      * global mode is via a 'truncated begin' with a score (penalty)
5966      * from cm->trp into any emitting state. The penalty was
5967      * calculated in cm_tr_penalties_Create() and differs depending on
5968      * whether we are in local or global mode and the value of
5969      * 'pty_idx' which was passed in.
5970      *
5971      * Since we're in OptAcc alignment we don't assess the
5972      * penalty but we still need to know if it's non-IMPOSSIBLE,
5973      * to know which states we're allowed to do a truncated
5974      * begin into.
5975      */
5976     if(L >= jmin[v] && L <= jmax[v]) {
5977       jp_v = L - jmin[v];
5978       Lp   = L - hdmin[v][jp_v];
5979       if(L >= hdmin[v][jp_v] && L <= hdmax[v][jp_v]) {
5980 	/* If we get here alpha[v][jp_v][Lp] and alpha[0][jp_0][Lp_0]
5981 	 * are valid cells in the banded alpha matrix, corresponding to
5982 	 * alpha[v][L][L] and alpha[0][L][L] in the platonic matrix.
5983 	 * (We've already made sure alpha[0][jp_0][Lp_0] was valid
5984 	 * at the beginning of the function.)
5985 	 */
5986 	trpenalty = have_el ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
5987 	if(NOT_IMPOSSIBLE(trpenalty)) {
5988 	  if(preset_mode == TRMODE_J && do_J_v) {
5989 	    if(Jalpha[v][jp_v][Lp] > Jalpha[0][jp_0][Lp_0]) {
5990 	      Jalpha[0][jp_0][Lp_0] = Jalpha[v][jp_v][Lp];
5991 	      b = v;
5992 	    }
5993 	  }
5994 	  if(preset_mode == TRMODE_L && do_L_v) {
5995 	    if(Lalpha[v][jp_v][Lp] > Lalpha[0][jp_0][Lp_0]) {
5996 	      Lalpha[0][jp_0][Lp_0] = Lalpha[v][jp_v][Lp];
5997 	      b = v;
5998 	    }
5999 	  }
6000 	  if(preset_mode == TRMODE_R && do_R_v) {
6001 	    if(Ralpha[v][jp_v][Lp] > Ralpha[0][jp_0][Lp_0]) {
6002 	      Ralpha[0][jp_0][Lp_0] = Ralpha[v][jp_v][Lp];
6003 	      b = v;
6004 	    }
6005 	  }
6006 	  if(preset_mode == TRMODE_T && do_T_v && cm->sttype[v] == B_st) {
6007 	    if(Talpha[v][jp_v][Lp] > Talpha[0][jp_0][Lp_0]) {
6008 	      Talpha[0][jp_0][Lp_0] = Talpha[v][jp_v][Lp];
6009 	      b = v;
6010 	    }
6011 	  }
6012 	}
6013       }
6014     }
6015   } /* end loop for (v = cm->M-1; v > 0; v--) */
6016 
6017   /* all valid alignments must use a truncated begin */
6018   if (          cp9b->Jvalid[0]) Jyshadow[0][jp_0][Lp_0] = USED_TRUNC_BEGIN;
6019   if (fill_L && cp9b->Lvalid[0]) Lyshadow[0][jp_0][Lp_0] = USED_TRUNC_BEGIN;
6020   if (fill_R && cp9b->Rvalid[0]) Ryshadow[0][jp_0][Lp_0] = USED_TRUNC_BEGIN;
6021   /* Tyshadow[0] doesn't exist, caller must know how to deal */
6022 
6023   if (preset_mode == TRMODE_J) sc = Jalpha[0][jp_0][Lp_0];
6024   if (preset_mode == TRMODE_L) sc = Lalpha[0][jp_0][Lp_0];
6025   if (preset_mode == TRMODE_R) sc = Ralpha[0][jp_0][Lp_0];
6026   if (preset_mode == TRMODE_T) sc = Talpha[0][jp_0][Lp_0];
6027 
6028   /* convert sc, a log probability, into the average posterior probability of all L aligned residues */
6029   pp = sreEXP2(sc) / (float) L;
6030 
6031 #if eslDEBUGLEVEL >= 2
6032   /* Uncomment to dump matrix to file. Careful...this could be very large. */
6033   /* FILE *fp1; fp1 = fopen("tmp.tru_oahbmx", "w");   cm_tr_hb_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
6034   /* FILE *fp2; fp2 = fopen("tmp.tru_oahbshmx", "w"); cm_tr_hb_shadow_mx_Dump(fp2, cm, shmx, preset_mode, TRUE); fclose(fp2); */
6035 #endif
6036 
6037   if(ret_b  != NULL) *ret_b  = b;
6038   if(ret_pp != NULL) *ret_pp = pp;
6039 
6040   free(yvalidA);
6041 
6042   ESL_DPRINTF1(("#DEBUG: cm_TrOptAccAlignHB() return pp: %f\n", pp));
6043   return eslOK;
6044 
6045  ERROR:
6046   ESL_FAIL(status, errbuf, "out of memory");
6047   return status; /* NEVERREACHED */
6048 }
6049 
6050 /* Function: cm_TrCYKOutsideAlign()
6051  * Date:     EPN, Wed Sep 14 14:20:20 2011
6052  *
6053  * Purpose:  Run the outside TrCYK algorithm on a target sequence.
6054  *           Non-banded version. See cm_TrCYKOutsideAlignHB() for
6055  *           the HMM banded version. The full target sequence
6056  *           1..L is aligned.
6057  *
6058  *           Very similar to cm_TrOutsideAlign() but calculates
6059  *           beta[v][j][d]: log probability of the most likely parse
6060  *           that emits 1..i-1 and j+1..L and passes through v at j,d
6061  *           (where i = j-d+1) instead of the log of the summed
6062  *           probability of all such parses. This means max operations
6063  *           are used instead of logsums.
6064  *
6065  *           Meaning of cells:
6066  *
6067  *           Jbeta[v][j][d]: log prob of the most likely parse that
6068  *                           emits 1..i-1 and j+1..L and passes through
6069  *                           v in Joint marginal mode at j,d.
6070  *           Lbeta[v][j][d]: log prob of the most likely parse that
6071  *                           emits 1..i-1 and j+1..L and passes through
6072  *                           v in Left marginal mode at j,d.
6073  *           Rbeta[v][j][d]: log prob of the most likely parse that
6074  *                           emits 1..i-1 and j+1..L and passes through
6075  *                           v in Right marginal mode at j,d.
6076  *
6077  *           This function complements cm_TrCYKInsideAlign() but is
6078  *           mainly useful for testing and reference. It can be used
6079  *           with do_check=TRUE to verify that the implementation of
6080  *           CYKTrInside and CYKTrOutside are consistent.  Because the
6081  *           structure of CYKTrInside and TrInside, and CYKTrOutside
6082  *           and TrOutside are so similar and the CYK variants are
6083  *           easier to debug (because only the optimal parsetree is
6084  *           considered instead of all possible parsetrees) this
6085  *           function can be useful for finding bugs in Outside.  It
6086  *           is currently not hooked up to any of the main Infernal
6087  *           programs.
6088  *
6089  * Args:     cm          - the model
6090  *           errbuf      - char buffer for reporting errors
6091  *           dsq         - the digitized sequence
6092  *           L           - length of the dsq to align
6093  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
6094  *           do_check    - TRUE to attempt to check
6095  *           preset_mode - TRMODE_J, TRMODE_L, TRMODE_R, or TRMODE_T, the pre-determined
6096  *                         alignment mode, we'll only allow alignments in this mode.
6097  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
6098  *           mx          - the dp matrix, grown and filled here
6099  *           inscyk_mx   - the pre-filled dp matrix from the CYK Inside calculation
6100  *                         (performed by cm_CYKInsideAlign(), required)
6101  *
6102  * Returns:  <eslOK> on success.
6103  *
6104  * Throws:   <eslERANGE> if required CM_TR_HB_MX size exceeds <size_limit>
6105  *           <eslEMEM>   if we run out of memory
6106  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
6107  */
6108 int
cm_TrCYKOutsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,int do_check,CM_TR_MX * mx,CM_TR_MX * inscyk_mx)6109 cm_TrCYKOutsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_MX *mx, CM_TR_MX *inscyk_mx)
6110 {
6111   int      status;
6112   int      v,y,z;	       /* indices for states */
6113   float    Jsc,Lsc,Rsc,Tsc;    /* a temporary variable holding a float score */
6114   int      j,d,i,k;	       /* indices in sequence dimensions */
6115   float    optsc;              /* the optimal score from the Inside matrix */
6116   float    escore;	       /* an emission score, tmp variable */
6117   int      voffset;	       /* index of v in t_v(y) transition scores */
6118   int      sd;                 /* StateDelta(cm->sttype[y]) */
6119   int      sdl;                /* StateLeftDelta(cm->sttype[y] */
6120   int      sdr;                /* StateRightDelta(cm->sttype[y] */
6121 
6122   /* variables used only if do_check */
6123   int      fail1_flag = FALSE; /* set to TRUE if do_check and we see a problem in check 1 */
6124   int      fail2_flag = FALSE; /* set to TRUE if do_check and we see a problem in check 2 */
6125   int      vmax;               /* i, offset in the matrix */
6126   float    tol;                /* tolerance for differences in bit scores */
6127   int     *optseen = NULL;     /* [1..i..L] TRUE is residue i is accounted for in optimal parse */
6128 
6129   /* other variables used in truncated version, but not standard version (not in cm_CYKOutsideAlign()) */
6130   int      fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
6131   int      pty_idx;                /* index for truncation penalty, determined by pass_idx */
6132   float    trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
6133 
6134   /* DP matrix variables */
6135   float ***Jbeta   = mx->Jdp;     /* pointer to the outside Jbeta DP matrix */
6136   float ***Lbeta   = mx->Ldp;     /* pointer to the outside Lbeta DP matrix */
6137   float ***Rbeta   = mx->Rdp;     /* pointer to the outside Rbeta DP matrix */
6138   float ***Tbeta   = mx->Tdp;     /* pointer to the outside Tbeta DP matrix */
6139 
6140   float ***Jalpha  = inscyk_mx->Jdp; /* pointer to the precalc'ed inside Jalpha DP matrix */
6141   float ***Lalpha  = inscyk_mx->Ldp; /* pointer to the precalc'ed inside Lalpha DP matrix */
6142   float ***Ralpha  = inscyk_mx->Rdp; /* pointer to the precalc'ed inside Ralpha DP matrix */
6143   float ***Talpha  = inscyk_mx->Tdp; /* pointer to the precalc'ed inside Talpha DP matrix, only used to possibly get optsc */
6144 
6145   /* Allocations and initializations */
6146 
6147   /* Determine which matrices we need to fill in, based on <preset_mode> */
6148   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlign(): preset_mode is not J, L, R, or T");
6149   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrCYKOutsideAlign(), bogus mode: %d", preset_mode);
6150 
6151   /* Determine the truncation penalty index, from the pass_idx */
6152   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrCYKOussideAlign(), unexpected pass idx: %d", pass_idx);
6153 
6154   /* grow the matrices based on the current sequence and bands */
6155   if((status = cm_tr_mx_GrowTo(cm, mx, errbuf, L, size_limit)) != eslOK) return status;
6156 
6157   /* initialize all cells of the matrix to IMPOSSIBLE */
6158   if(mx->Jncells_valid > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
6159   if(mx->Lncells_valid > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
6160   if(mx->Rncells_valid > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
6161   if(mx->Tncells_valid > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
6162 
6163   /* initialize cells in the special ROOT_S deck corresponding to full sequence alignments to 0.0 */
6164   if     (preset_mode == TRMODE_J) Jbeta[0][L][L] = 0.; /* a full Joint    alignment is outside this cell */
6165   else if(preset_mode == TRMODE_L) Lbeta[0][L][L] = 0.; /* a full Left     alignment is outside this cell */
6166   else if(preset_mode == TRMODE_R) Rbeta[0][L][L] = 0.; /* a full Right    alignment is outside this cell */
6167   else if(preset_mode == TRMODE_T) Tbeta[0][L][L] = 0.; /* a full Terminal alignment is outside this cell */
6168   else ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlign() preset_mode %d is invalid", preset_mode);
6169 
6170   /* set cells corresponding to legal truncated begin entry states to
6171    * the appropriate penalty. In truncated alignment the only way out
6172    * of ROOT_S in local or global mode is via a 'truncated begin' with
6173    * a score (penalty) from cm->trp into any emitting state. The
6174    * penalty was calculated in cm_tr_penalties_Create() and differs
6175    * depending on whether we are in local or global mode and the value
6176    * of 'pty_idx' which was passed in.
6177    */
6178   for(v = 0; v < cm->M; v++) {
6179     trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
6180     if(NOT_IMPOSSIBLE(trpenalty)) {
6181       if(preset_mode == TRMODE_J) Jbeta[v][L][L] = trpenalty; /* a full Joint alignment is outside this cell */
6182       if(preset_mode == TRMODE_L) Lbeta[v][L][L] = trpenalty; /* a full Left  alignment is outside this cell */
6183       if(preset_mode == TRMODE_R) Rbeta[v][L][L] = trpenalty; /* a full Right alignment is outside this cell */
6184       if(preset_mode == TRMODE_T && cm->sttype[v] == B_st) {
6185 	Tbeta[v][L][L] = trpenalty; /* a full Terminal alignment is outside this cell */
6186       }
6187     }
6188   }
6189 
6190   /* main loop down through the decks */
6191   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
6192     if(! StateIsDetached(cm, v)) {
6193       sd  = StateDelta(cm->sttype[v]);
6194       sdr = StateRightDelta(cm->sttype[v]);
6195 
6196       if (cm->stid[v] == BEGL_S) { /* BEGL_S */
6197 	y = cm->plast[v];	/* the parent bifurcation    */
6198 	z = cm->cnum[y];	/* the other (right) S state */
6199 	for(j = 0; j <= L; j++) {
6200 	  for (d = 0; d <= j; d++) {
6201 	    for (k = 0; k <= (L-j); k++) {
6202 	      Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Jbeta[y][j+k][d+k] + Jalpha[z][j+k][k]); /* A */
6203 	      if(fill_L) {
6204 		Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Lbeta[y][j+k][d+k] + Lalpha[z][j+k][k]); /* B */
6205 	      }
6206 	      if(fill_R) {
6207 		Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Rbeta[y][j+k][d+k] + Jalpha[z][j+k][k]); /* C */
6208 		if(fill_T && fill_L && d == j && (j+k) == L) {
6209 		  Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Tbeta[y][j+k][d+k] + Lalpha[z][j+k][k]); /* D */
6210 		  /* Note: Tbeta[y][j+k==L][d+k==L] will be 0.0 or
6211 		   * IMPOSSIBLE because it was initialized that
6212 		   * way. That T cell includes the full target 1..L
6213 		   * (any valid T alignment must because we must
6214 		   * account for the full target) rooted at a B state,
6215 		   * and a transition from that B state to this BEGL_S
6216 		   * is always probability 1.0.
6217 		   */
6218 		}
6219 	      }
6220 	    } /* end of for k loop */
6221 	    if(fill_L) {
6222 	      Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Lbeta[y][j][d]); /* entire sequence on left, no sequence on right, k == 0 */
6223 	      Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Lbeta[y][j][d]); /* entire sequence on left, no sequence on right, k == 0 */
6224 	    }
6225 	  }
6226 	}
6227       } /* end of 'if (cm->stid[v] == BEGL_S */
6228       else if (cm->stid[v] == BEGR_S) {
6229 	y = cm->plast[v];	  /* the parent bifurcation    */
6230 	z = cm->cfirst[y];  /* the other (left) S state  */
6231 	for(j = 0; j <= L; j++) {
6232 	  for (d = 0; d <= j; d++) {
6233 	    i = j-d+1;
6234 	    for (k = 0; k <= (j-d); k++) {
6235 	      Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Jbeta[y][j][d+k] + Jalpha[z][j-d][k]); /* A */
6236 	      if(fill_R) {
6237 		Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Rbeta[y][j][d+k] + Ralpha[z][j-d][k]); /* C */
6238 	      }
6239 	      if(fill_L) {
6240 		Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Lbeta[y][j][d+k] + Jalpha[z][j-d][k]); /* B */
6241 		if(fill_T && fill_R && k == (i-1) && j == L) {
6242 		  Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Tbeta[y][j][d+k] + Ralpha[z][j-d][k]); /* D */
6243 		  /* Note: Tbeta[y][j==L][d+k==L] will be 0.0 or
6244 		   * IMPOSSIBLE because it was initialized that
6245 		   * way. That T cell includes the full target 1..L (any
6246 		   * valid T alignment must because we must account for
6247 		   * the full target) rooted at a B state, and a
6248 		   * transition from that B state to this BEGR_S is
6249 		   * always probability 1.0.
6250 		   */
6251 		}
6252 	      }
6253 	    }
6254 	    if(fill_R) {
6255 	      Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Rbeta[y][j][d]); /* entire sequence on right, no sequence on left, k == 0 */
6256 	      Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Rbeta[y][j][d]); /* entire sequence on right, no sequence on left, k == 0 */
6257 	    }
6258 	  }
6259 	}
6260       } /* end of 'else if (cm->stid[v] == BEGR_S */
6261       else { /* (cm->sttype[v] != BEGL_S && cm->sttype[v] != BEGR_S */
6262 	for (j = L; j >= 0; j--) {
6263 	  i = 1;
6264 	  for (d = j; d >= 0; d--, i++) {
6265 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
6266 	      /* mind the following sneaky if statement: in truncated
6267 	       * aln, the only way out of state 0 is through a
6268 	       * truncated begin, which we handled above (search for
6269 	       * 'trpenalty'). If we're in local mode transitions out
6270 	       * of 0 will have IMPOSSIBLE scores, but NOT if we're in
6271 	       * glocal mode, so we need this 'if'.
6272 	       */
6273 	      if(y != 0) {
6274 		voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
6275 		sd  = StateDelta(cm->sttype[y]);
6276 		sdl = StateLeftDelta(cm->sttype[y]);
6277 		sdr = StateRightDelta(cm->sttype[y]);
6278 		switch(cm->sttype[y]) {
6279 		case MP_st:
6280 		  if(j != L && d != j) {
6281 		    escore = cm->oesc[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
6282 		    Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Jbeta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore);
6283 		  }
6284 		  if(fill_L && j == L && d != j) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
6285 		    escore = cm->lmesc[y][dsq[i-1]];
6286 		    Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Lbeta[y][j][d+sdl] + cm->tsc[y][voffset] + escore);
6287 		    Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Lbeta[y][j][d+sdl] + cm->tsc[y][voffset] + escore);
6288 		  }
6289 		  if(fill_R && i == 1 && j != L) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
6290 		    escore = cm->rmesc[y][dsq[j+1]];
6291 		    Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Rbeta[y][j+sdr][d+sdr] + cm->tsc[y][voffset] + escore);
6292 		    Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Rbeta[y][j+sdr][d+sdr] + cm->tsc[y][voffset] + escore);
6293 		  }
6294 		  break;
6295 		case ML_st:
6296 		case IL_st:
6297 		  if (d != j) {
6298 		    escore = cm->oesc[y][dsq[i-1]];
6299 		    Jbeta[v][j][d]            = ESL_MAX(Jbeta[v][j][d], Jbeta[y][j][d+sd] + cm->tsc[y][voffset] + escore);
6300 		    if(fill_L) Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Lbeta[y][j][d+sd] + cm->tsc[y][voffset] + escore);
6301 		  }
6302 		  if(fill_R && i == 1 && /* only allow transition from R if we're emitting first residue 1 from y  */
6303 		     v != y) {           /* will only happen if v == IL, we don't allow silent self transitions from IL->IL */
6304 		    Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Rbeta[y][j][d] + cm->tsc[y][voffset]);
6305 		    Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Rbeta[y][j][d] + cm->tsc[y][voffset]);
6306 		  }
6307 		  break;
6308 		case MR_st:
6309 		case IR_st:
6310 		  if (j != L) {
6311 		    escore = cm->oesc[y][dsq[j+1]];
6312 		    Jbeta[v][j][d]            = ESL_MAX(Jbeta[v][j][d], Jbeta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore);
6313 		    if(fill_R) Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Rbeta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore);
6314 		  }
6315 		  if(fill_L && j == L && /* only allow transition from L if we're emitting final residue L from y */
6316 		     v != y) {           /* will only happen if v == IR, we don't allow silent self transitions from IR->IR */
6317 		    Jbeta[v][j][d] = ESL_MAX(Jbeta[v][j][d], Lbeta[y][j][d] + cm->tsc[y][voffset]);
6318 		    Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Lbeta[y][j][d] + cm->tsc[y][voffset]);
6319 		  }
6320 		  break;
6321 		case S_st:
6322 		case E_st:
6323 		case D_st:
6324 		  Jbeta[v][j][d]            = ESL_MAX(Jbeta[v][j][d], Jbeta[y][j][d] + cm->tsc[y][voffset]);
6325 		  if(fill_L) Lbeta[v][j][d] = ESL_MAX(Lbeta[v][j][d], Lbeta[y][j][d] + cm->tsc[y][voffset]);
6326 		  if(fill_R) Rbeta[v][j][d] = ESL_MAX(Rbeta[v][j][d], Rbeta[y][j][d] + cm->tsc[y][voffset]);
6327 		  break;
6328 		} /* end of switch(cm->sttype[y] */
6329 	      } /* end of sneaky if y != 0 */
6330 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
6331 	    if (Jbeta[v][j][d] < IMPOSSIBLE) Jbeta[v][j][d] = IMPOSSIBLE;
6332 	  } /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
6333 	} /* end loop over j. We know beta for this whole state */
6334       } /* end of 'else' (if cm->sttype[v] != BEGL_S, BEGR_S) */
6335     } /* end of 'if(! StateIsDetached(cm, v))' */
6336     /* we're done calculating deck v for everything but local ends */
6337 
6338     /* deal with local alignment end transitions v->EL J matrix only (EL = deck at M.) */
6339     if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
6340       sd  = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
6341       sdl = StateLeftDelta(cm->sttype[v]);  /* note sdl is for state v */
6342       sdr = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
6343 
6344       for (j = 0; j <= L; j++) {
6345 	for (d = 0; d <= j; d++) {
6346 	  i = j-d+1;
6347 	  switch (cm->sttype[v]) {
6348 	  case MP_st:
6349 	    if (j != L && d != j) {
6350 	      escore = cm->oesc[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
6351 	      Jbeta[cm->M][j][d] = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][j+sdr][d+sd] + cm->endsc[v] + escore));
6352 	    }
6353 	    if(fill_L && j == L && d != j) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
6354 	      escore = cm->lmesc[v][dsq[i-1]];
6355 	      Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][j][d+sdl] + cm->endsc[v] + escore));
6356 	    }
6357 	    if(fill_R && i == 1 && j != L) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
6358 	      escore = cm->rmesc[v][dsq[j+1]];
6359 	      Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][j+sdr][d+sdr] + cm->endsc[v] + escore));
6360 	    }
6361 	    break;
6362 	  case ML_st:
6363 	  case IL_st:
6364 	    if (d != j) {
6365 	      escore = cm->oesc[v][dsq[i-1]];
6366 	      Jbeta[cm->M][j][d]            = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][j][d+sd] + cm->endsc[v] + escore));
6367 	      if(fill_L) Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][j][d+sd] + cm->endsc[v] + escore));
6368 	    }
6369 	    if(fill_R && i == 1) { /* only allow transition from R if we haven't emitted any residues leftwise (i == 1) */
6370 	      Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], Rbeta[v][j][d] + cm->endsc[v]);
6371 	    }
6372 	    break;
6373 	  case MR_st:
6374 	  case IR_st:
6375 	    if(j != L) {
6376 	      escore = cm->oesc[v][dsq[j+1]];
6377 	      Jbeta[cm->M][j][d]            = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][j+sdr][d+sd] + cm->endsc[v] + escore));
6378 	      if(fill_R) Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][j+sdr][d+sd] + cm->endsc[v] + escore));
6379 	    }
6380 	    if(fill_L && j == L) { /* only allow transition from L if we haven't emitted any residues rightwise (j == L) */
6381 	      Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], Lbeta[v][j][d] + cm->endsc[v]);
6382 	    }
6383 	    break;
6384 	  case S_st:
6385 	  case D_st:
6386 	  case E_st:
6387 	    Jbeta[cm->M][j][d]            = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][j+sdr][d+sd] + cm->endsc[v]));
6388 	    if(fill_L) Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][j+sdr][d+sd] + cm->endsc[v]));
6389 	    if(fill_R) Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][j+sdr][d+sd] + cm->endsc[v]));
6390 	    break;
6391 	  }
6392 	}
6393       }
6394     }
6395   }
6396   /* Deal with last step needed for local alignment
6397    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
6398    */
6399   if (cm->flags & CMH_LOCAL_END) {
6400     for (j = L; j > 0; j--) { /* careful w/ boundary here */
6401       for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
6402 	Jbeta[cm->M][j][d]            = ESL_MAX(Jbeta[cm->M][j][d], Jbeta[cm->M][j][d+1] + cm->el_selfsc);
6403 	if(fill_L) Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], Lbeta[cm->M][j][d+1] + cm->el_selfsc);
6404 	if(fill_R) Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], Rbeta[cm->M][j][d+1] + cm->el_selfsc);
6405       }
6406     }
6407   }
6408 
6409   fail1_flag = FALSE;
6410   fail2_flag = FALSE;
6411   if(do_check) {
6412     /* Check for consistency between the Inside alpha matrix and the
6413      * Outside beta matrix. we assume the Inside CYK parse score
6414      * (optsc) is the optimal score, so for all v,j,d:
6415      *
6416      * Jalpha[v][j][d] + Jbeta[v][j][d] <= optsc
6417      * Lalpha[v][j][d] + Lbeta[v][j][d] <= optsc
6418      * Ralpha[v][j][d] + Rbeta[v][j][d] <= optsc
6419      *
6420      * Further, we know that each residue must be emitted by a state
6421      * in the optimal parse. So as we do the above check, we determine
6422      * when we're in a cell that may be involved in the optimal parse
6423      * (the sum of the Inside and Outside scores are equal to the
6424      * optimal parse score), if that cell corresponds to a left
6425      * emitter emitting position i, we know an emitted i has been
6426      * observed in an optimal parse and set optseen[i] to TRUE.
6427      * Likewise, if that cell corresponds to a right emitter emitting
6428      * position j, we update optseen[j] to TRUE. At the end of the
6429      * check optseen[i] should be TRUE for all i in the range
6430      * [1..L].
6431      *
6432      * Note that we don't ensure that all of our presumed optimal
6433      * cells make up a valid parse, so it is possible we could pass
6434      * this check even if the Inside and Outside matrices are
6435      * inconsistent (i.e. there's a bug in the implementation of one
6436      * and/or the other) but that should be extremely unlikely.  If we
6437      * do this test many times for many different models and pass, we
6438      * should be confident we have consistent implementations.
6439      *
6440      * Note that we don't check fill_L and fill_R variables
6441      * here, although they will have dictated whether we've filled
6442      * in the L and R matrices. If they're FALSE, those matrices
6443      * should remain as they've been initialized as all IMPOSSIBLE
6444      * values, so they won't cause us to fail our tests here.
6445      *
6446      * This is an expensive check and should only be done while
6447      * debugging.
6448      */
6449     ESL_ALLOC(optseen, sizeof(int) * (L+1));
6450     esl_vec_ISet(optseen, L+1, FALSE);
6451     vmax = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
6452     if     (preset_mode == TRMODE_J) optsc = Jalpha[0][L][L];
6453     else if(preset_mode == TRMODE_L) optsc = Lalpha[0][L][L];
6454     else if(preset_mode == TRMODE_R) optsc = Ralpha[0][L][L];
6455     else if(preset_mode == TRMODE_T) optsc = Talpha[0][L][L];
6456     /* define bit score difference tolerance, somewhat arbitrarily:
6457      * clen <= 200: tolerance is 0.001; then a function of clen:
6458      * clen == 1000 tolerance is 0.005,
6459      * clen == 2000, tolerance is 0.01.
6460      *
6461      * I did this b/c with tests with SSU_rRNA_eukarya I noticed
6462      * failures with bit score differences up to 0.004 or so.  This
6463      * could mean a bug, but I couldn't get any average sized model to
6464      * fail with a difference above 0.001, so I blamed it on
6465      * precision. I'm not entirely convinced it isn't a bug but
6466      * until I see a failure on a smaller model it seems precision
6467      * is the most likely explanation, right?
6468      */
6469     tol = ESL_MAX(1e-3, (float) cm->clen / 200000.);
6470     for(v = 0; v <= vmax; v++) {
6471       for(j = 1; j <= L; j++) {
6472 	for(d = 0; d <= j; d++) {
6473 	  Jsc  = Jalpha[v][j][d] + Jbeta[v][j][d] - optsc;
6474 	  Lsc  = (fill_L) ? Lalpha[v][j][d] + Lbeta[v][j][d] - optsc : IMPOSSIBLE;
6475 	  Rsc  = (fill_R) ? Ralpha[v][j][d] + Rbeta[v][j][d] - optsc : IMPOSSIBLE;
6476 	  Tsc  = (fill_T && cm->sttype[v] == B_st) ? Talpha[v][j][d] + Tbeta[v][j][d] - optsc : IMPOSSIBLE;
6477 	  if(Jsc > tol) {
6478 	    printf("Check 1 J failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
6479 		   v, j, d, Jalpha[v][j][d], Jbeta[v][j][d], Jalpha[v][j][d] + Jbeta[v][j][d], optsc);
6480 	    fail1_flag = TRUE;
6481 	  }
6482 	  if(Lsc > tol) {
6483 	    printf("Check 1 L failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
6484 		   v, j, d, Lalpha[v][j][d], Lbeta[v][j][d], Lalpha[v][j][d] + Lbeta[v][j][d], optsc);
6485 	    fail1_flag = TRUE;
6486 	  }
6487 	  if(Rsc > tol) {
6488 	    printf("Check 1 R failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
6489 		   v, j, d, Ralpha[v][j][d], Rbeta[v][j][d], Ralpha[v][j][d] + Rbeta[v][j][d], optsc);
6490 	    fail1_flag = TRUE;
6491 	  }
6492 	  if(cm->sttype[v] == B_st && Tsc > tol) {
6493 	    printf("Check 1 T failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
6494 		   v, j, d, Talpha[v][j][d], Tbeta[v][j][d], Talpha[v][j][d] + Tbeta[v][j][d], optsc);
6495 	    fail1_flag = TRUE;
6496 	  }
6497 	  if(((fabs(Jsc) < tol || fabs(Lsc) < tol) &&
6498 	      (cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st || (cm->sttype[v] == EL_st && d >0))) ||
6499 	     ((fabs(Rsc) < tol && cm->sttype[v] == EL_st && d >0))) {
6500 	    i = j-d+1;
6501 	    /* i is accounted for by a parse with an optimal score */
6502 	    optseen[i] = TRUE;
6503 	    /*
6504 	      if     (fabs(Jsc) < tol) printf("\tResidue %4d possibly accounted for by J matrix Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);
6505 	      else if(fabs(Lsc) < tol) printf("\tResidue %4d possibly accounted for by L matrix Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);
6506 	      else if(fabs(Rsc) < tol) printf("\tResidue %4d possibly accounted for by R matrix Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);
6507 	    */
6508 	  }
6509 	  if((fabs(Jsc) < tol || fabs(Rsc) < tol) &&
6510 	     (cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st)) {
6511 	    /* j is accounted for by a parse with an optimal score */
6512 	    optseen[j] = TRUE;
6513 	    /*
6514 	       if     (fabs(Jsc) < tol) printf("\tResidue %4d possibly accounted for by J matrix Right emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", j, Statetype(cm->sttype[v]), v, j, d);
6515 	       else if(fabs(Rsc) < tol) printf("\tResidue %4d possibly accounted for by R matrix Right emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", j, Statetype(cm->sttype[v]), v, j, d);
6516 	    */
6517 	  }
6518 	}
6519       }
6520     }
6521     for(j = 1; j <= L; j++) {
6522       if(optseen[j] == FALSE) {
6523 	printf("Check 2 failure: residue %d not emitted in the optimal parsetree\n", j);
6524 	fail2_flag = TRUE;
6525       }
6526     }
6527     free(optseen);
6528   }
6529   if(fail1_flag || fail2_flag) for(j = 1; j <= L; j++) printf("dsq[%4d]: %4d\n", j, dsq[j]);
6530 
6531 #if eslDEBUGLEVEL >= 2
6532   /* Uncomment to dump matrix to file. Careful...this could be very large. */
6533   /* FILE *fp1; fp1 = fopen("tmp.tru_ocykmx", "w");   cm_tr_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
6534 #endif
6535 
6536   if(do_check) {
6537     if     (fail1_flag) ESL_FAIL(eslFAIL, errbuf, "TrCYK Inside/Outside check1 FAILED.");
6538     else if(fail2_flag) ESL_FAIL(eslFAIL, errbuf, "TrCYK Inside/Outside check2 FAILED.");
6539     /*else                printf("SUCCESS! TrCYK Inside/Outside checks PASSED.\n");*/
6540   }
6541 
6542   if     (preset_mode == TRMODE_J) optsc = Jalpha[0][L][L];
6543   else if(preset_mode == TRMODE_L) optsc = Lalpha[0][L][L];
6544   else if(preset_mode == TRMODE_R) optsc = Ralpha[0][L][L];
6545   else if(preset_mode == TRMODE_T) optsc = Talpha[0][L][L];
6546   ESL_DPRINTF1(("#DEBUG: \tcm_TrCYKOutsideAlign() sc : %f (sc is from Inside!)\n", optsc));
6547 
6548   return eslOK;
6549 
6550  ERROR:
6551   ESL_FAIL(status, errbuf, "Out of memory");
6552   return status; /* NEVER REACHED */
6553 }
6554 
6555 /* Function: cm_TrCYKOutsideAlignHB()
6556  * Date:     EPN, Sat Oct  8 15:42:48 2011
6557  *
6558  * Purpose:  Run the outside TrCYK algorithm on a target sequence.
6559  *           HMM banded version. See cm_TrCYKOutsideAlign() for
6560  *           the non-banded version. The full target sequence
6561  *           1..L is aligned.
6562  *
6563  *           Very similar to cm_TrOutsideAlignHB() but calculates
6564  *           beta[v][j][d]: log probability of the most likely parse
6565  *           that emits 1..i-1 and j+1..L and passes through v at j,d
6566  *           (where i = j-d+1) instead of the log of the summed
6567  *           probability of all such parses. This means max operations
6568  *           are used instead of logsums.
6569  *
6570  *           This function complements cm_TrCYKInsideAlignHB() but is
6571  *           mainly useful for testing and reference. It can be used
6572  *           with do_check=TRUE to verify that the implementation of
6573  *           TrCYKInsideAlignHB and TrCYKOutsideAlignHB are
6574  *           consistent.  Because the structure of TrCYKInsideAlignHB
6575  *           and TrInsideAlignHB, and TrCYKOutsideAlignHB and
6576  *           TrOutsideAlignHB are so similar and the TrCYK variants
6577  *           are easier to debug (because only the optimal parsetree
6578  *           is considered instead of all possible parsetrees) this
6579  *           function can be useful for finding bugs in
6580  *           TrOutsideAlignHB. It is currently not hooked up to any of
6581  *           the main Infernal programs.
6582  *
6583  * Args:     cm          - the model
6584  *           errbuf      - char buffer for reporting errors
6585  *           dsq         - the digitized sequence
6586  *           L           - length of the dsq to align
6587  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
6588  *           do_check    - TRUE to attempt to check
6589  *           mx          - the dp matrix, only cells within bands in cp9b will be valid
6590  *           preset_mode - TRMODE_J, TRMODE_L, TRMODE_R, or TRMODE_T, the pre-determined
6591  *                         alignment mode, we'll only allow alignments in this mode.
6592  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
6593  *           ins_mx      - the dp matrix from the Inside run calculation (required)
6594  *
6595  * Returns:  <eslOK> on success
6596  *
6597  * Throws:   <eslERANGE> if required CM_TR_HB_MX size exceeds <size_limit>
6598  *           <eslEMEM>   if we run out of memory
6599  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
6600  *           In either of these cases, alignment has been aborted.
6601  */
6602 int
cm_TrCYKOutsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,int do_check,CM_TR_HB_MX * mx,CM_TR_HB_MX * inscyk_mx)6603 cm_TrCYKOutsideAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
6604 		       int do_check, CM_TR_HB_MX *mx, CM_TR_HB_MX *inscyk_mx)
6605 {
6606   int      status;
6607   int      v,y,z;	       /* indices for states */
6608   float    Jsc,Lsc,Rsc,Tsc;    /* temporary variables holding a float score */
6609   int      j,d,i,k;	       /* indices in sequence dimensions */
6610   float  **esc_vAA;            /* ptr to cm->oesc, optimized emission scores */
6611   float    optsc;              /* optimal score in <preset_mode>, from Inside */
6612   float    escore;	       /* an emission score, tmp variable */
6613   int      voffset;	       /* index of v in t_v(y) transition scores */
6614   int      emitmode;           /* EMITLEFT, EMITRIGHT, EMITPAIR, EMITNONE, for state y */
6615   int      sd;                 /* StateDelta(cm->sttype[y]) */
6616   int      sdl;                /* StateLeftDelta(cm->sttype[y] */
6617   int      sdr;                /* StateRightDelta(cm->sttype[y] */
6618 
6619   /* variables used only if do_check */
6620   int      fail1_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 1*/
6621   int      fail2_flag = FALSE; /* set to TRUE if do_check and we see a problem with check 2*/
6622   int      vmax;               /* i, offset in the matrix */
6623   float    tol;                /* tolerance for differences in bit scores */
6624   int     *optseen = NULL;     /* [1..i..W] TRUE is residue i is accounted for in optimal parse */
6625 
6626   /* band related variables */
6627   int      dp_v;               /* d index for state v in alpha w/mem eff bands */
6628   int      dp_y;               /* d index for state y in alpha w/mem eff bands */
6629   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
6630   int      Lp;                 /* L index also changes depending on state */
6631   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
6632   int      kmin, kmax;         /* temporary minimum/maximum allowed k */
6633   int      jn, jx;             /* current minimum/maximum j allowed */
6634   int      dn, dx;             /* current minimum/maximum d allowed */
6635   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
6636   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
6637 
6638   /* variables related to truncated alignment (not in cm_CYKInsideAlignHB() */
6639   int      fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
6640   int      do_J_v, do_J_y, do_J_z; /* must we fill J matrix deck for state v, y, z? */
6641   int      do_L_v, do_L_y, do_L_z; /* must we fill L matrix deck for state v, y, z? */
6642   int      do_R_v, do_R_y, do_R_z; /* must we fill R matrix deck for state v, y, z? */
6643   int      do_T_v, do_T_y;         /* is T matrix valid for state v, y?    */
6644   int      pty_idx;                /* index for truncation penalty, determined by pass_idx */
6645   float    trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
6646 
6647   /* DP matrix variables */
6648   float ***Jbeta   = mx->Jdp;     /* pointer to the outside Jbeta DP matrix */
6649   float ***Lbeta   = mx->Ldp;     /* pointer to the outside Lbeta DP matrix */
6650   float ***Rbeta   = mx->Rdp;     /* pointer to the outside Rbeta DP matrix */
6651   float ***Tbeta   = mx->Tdp;     /* pointer to the outside Tbeta DP matrix */
6652 
6653   float ***Jalpha  = inscyk_mx->Jdp; /* pointer to the precalc'ed inside Jalpha DP matrix */
6654   float ***Lalpha  = inscyk_mx->Ldp; /* pointer to the precalc'ed inside Lalpha DP matrix */
6655   float ***Ralpha  = inscyk_mx->Rdp; /* pointer to the precalc'ed inside Ralpha DP matrix */
6656   float ***Talpha  = inscyk_mx->Tdp; /* pointer to the precalc'ed inside Talpha DP matrix, only used to possibly get optsc */
6657 
6658   /* ptrs to cp9b info, for convenience */
6659   CP9Bands_t *cp9b = cm->cp9b;
6660   int     *jmin    = cm->cp9b->jmin;
6661   int     *jmax    = cm->cp9b->jmax;
6662   int    **hdmin   = cm->cp9b->hdmin;
6663   int    **hdmax   = cm->cp9b->hdmax;
6664 
6665   /* Allocations and initializations */
6666   esc_vAA = cm->oesc;            /* a ptr to the optimized emission scores */
6667 
6668   /* Determine which matrices we need to fill in, based on <preset_mode> */
6669   if(preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB(): preset_mode is not J, L, R, or T");
6670   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrCYKOutsideAlignHB(), bogus mode: %d", preset_mode);
6671 
6672   /* Determine the truncation penalty index, from the pass_idx */
6673   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrCYKOutsideAlignHB(), unexpected pass idx: %d", pass_idx);
6674 
6675   /* grow the matrix based on the current sequence and bands */
6676   if((status = cm_tr_hb_mx_GrowTo(cm, mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
6677 
6678   /* initialize all cells of the matrix to IMPOSSIBLE */
6679   if(mx->Jncells_valid > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
6680   if(mx->Lncells_valid > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
6681   if(mx->Rncells_valid > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
6682   if(mx->Tncells_valid > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
6683 
6684   /* ensure a full alignment in <preset_mode> to ROOT_S (v==0) is allowed by the bands */
6685   if      (preset_mode == TRMODE_J && (! cp9b->Jvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB() preset_mode is J but cp9b->Jvalid[0] is FALSE");
6686   else if (preset_mode == TRMODE_L && (! cp9b->Lvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB() preset_mode is L but cp9b->Lvalid[0] is FALSE");
6687   else if (preset_mode == TRMODE_R && (! cp9b->Rvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB() preset_mode is R but cp9b->Rvalid[0] is FALSE");
6688   else if (preset_mode == TRMODE_T && (! cp9b->Tvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB() preset_mode is T but cp9b->Tvalid[0] is FALSE");
6689   if (jmin[0] > L        || jmax[0] < L)        ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, jmin[0], jmax[0]);
6690   jp_0 = L - jmin[0];
6691   if (hdmin[0][jp_0] > L || hdmax[0][jp_0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, hdmin[0][jp_0], hdmax[0][jp_0]);
6692   Lp_0 = L - hdmin[0][jp_0];
6693 
6694   /* initialize cells in the special ROOT_S deck corresponding to full sequence alignments to 0.0 */
6695   if     (preset_mode == TRMODE_J) Jbeta[0][jp_0][Lp_0] = 0.; /* a full Joint    alignment is outside this cell */
6696   else if(preset_mode == TRMODE_L) Lbeta[0][jp_0][Lp_0] = 0.; /* a full Left     alignment is outside this cell */
6697   else if(preset_mode == TRMODE_R) Rbeta[0][jp_0][Lp_0] = 0.; /* a full Right    alignment is outside this cell */
6698   else if(preset_mode == TRMODE_T) Tbeta[0][jp_0][Lp_0] = 0.; /* a full Terminal alignment is outside this cell */
6699   else ESL_FAIL(eslEINVAL, errbuf, "cm_TrCYKOutsideAlignHB() preset_mode %d is invalid", preset_mode);
6700 
6701   /* set cells corresponding to legal truncated begin entry states to
6702    * the appropriate penalty. In truncated alignment the only way out
6703    * of ROOT_S in local or global mode is via a 'truncated begin' with
6704    * a score (penalty) from cm->trp into any emitting state. The
6705    * penalty was calculated in cm_tr_penalties_Create() and differs
6706    * depending on whether we are in local or global mode and the value
6707    * of 'pty_idx' which was passed in.
6708    */
6709   for(v = 0; v < cm->M; v++) {
6710     if((L >= jmin[v]) && (L <= jmax[v])) {
6711       jp_v = L - jmin[v];
6712       if((L >= hdmin[v][jp_v]) && L <= hdmax[v][jp_v]) {
6713 	Lp = L - hdmin[v][jp_v];
6714 
6715 	trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
6716 	if(NOT_IMPOSSIBLE(trpenalty)) {
6717 	  do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
6718 	  do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
6719 	  do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
6720 	  do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
6721 	  if(preset_mode == TRMODE_J && do_J_v) Jbeta[v][jp_v][Lp] = trpenalty; /* a full Joint alignment is outside this cell */
6722 	  if(preset_mode == TRMODE_L && do_L_v) Lbeta[v][jp_v][Lp] = trpenalty; /* a full Left  alignment is outside this cell */
6723 	  if(preset_mode == TRMODE_R && do_R_v) Rbeta[v][jp_v][Lp] = trpenalty; /* a full Right alignment is outside this cell */
6724 	  if(preset_mode == TRMODE_T && do_T_v && cm->sttype[v] == B_st) {
6725 	    Tbeta[v][jp_v][Lp] = trpenalty; /* a full Terminal alignment is outside this cell */
6726 	  }
6727 	}
6728       }
6729     }
6730   }
6731   /* done allocation/initialization */
6732 
6733   /* Recursion: main loop down through the decks */
6734   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
6735     if(! StateIsDetached(cm, v)) {
6736       sd  = StateDelta(cm->sttype[v]);
6737       sdr = StateRightDelta(cm->sttype[v]);
6738       do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
6739       do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
6740       do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
6741       do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
6742 
6743       /* if the v deck is invalid in J, L R and T mode, all states for v will remain impossible */
6744       if(! (do_J_v || do_L_v || do_R_v || do_T_v)) continue;
6745 
6746       if (cm->stid[v] == BEGL_S) { /* BEGL_S */
6747 	y = cm->plast[v];	/* the parent bifurcation    */
6748 	z = cm->cnum[y];	/* the other (right) S state */
6749 
6750 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
6751 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
6752 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
6753 	do_T_y = cp9b->Tvalid[y] && fill_T ? TRUE : FALSE; /* will be FALSE, y is not a B_st */
6754 
6755 	do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
6756 	do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
6757 	do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
6758 
6759 	for (j = jmax[v]; j >= jmin[v]; j--) {
6760 	  ESL_DASSERT1((j >= 0 && j <= L));
6761 	  jp_v = j - jmin[v];
6762 	  jp_y = j - jmin[y];
6763 	  jp_z = j - jmin[z];
6764 	  i = j-d+1;
6765 	  for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
6766 	    dp_v = d - hdmin[v][jp_v];
6767 	    /* Find the first k value that implies a valid cell in the y and z decks.
6768 	     * This k must satisfy the following 8 inequalities (some may be redundant):
6769 	     * NOTE: these are different from those in Inside() (for one thing, v and y
6770 	     *       (BEGL_S and BIF_B here respectively) are switched relative to Inside.
6771 	     *
6772 	     * (1) k <= jmax[y] - j;
6773 	     * (2) k >= jmin[y] - j;
6774 	     * (3) k <= jmax[z] - j;
6775 	     * (4) k >= jmin[z] - j;
6776 	     *     1 and 2 guarantee (j+k) is within state y's j band
6777 	     *     3 and 4 guarantee (j+k) is within state z's j band
6778 	     *
6779 	     * (5) k >= hdmin[y][j-jmin[y]+k] - d;
6780 	     * (6) k <= hdmax[y][j-jmin[y]+k] - d;
6781 	     *     5 and 6 guarantee k+d is within y's j=(j+k), d band
6782 	     *
6783 	     * (7) k >= hdmin[z][j-jmin[z]+k];
6784 	     * (8) k <= hdmax[z][j-jmin[z]+k];
6785 	     *     5 and 6 guarantee k is within state z's j=(j+k) d band
6786 	     */
6787 	    kmin = ESL_MAX(jmin[y], jmin[z]) - j;
6788 	    kmax = ESL_MIN(jmax[y], jmax[z]) - j;
6789 	    /* kmin and kmax satisfy inequalities (1-4) */
6790 	    /* RHS of inequalities 5-8 are dependent on k, so we check
6791 	     * for these within the next for loop. */
6792 	    for(k = kmin; k <= kmax; k++) {
6793 	      if(k < (hdmin[y][jp_y+k] - d) || k > (hdmax[y][jp_y+k] - d)) continue;
6794 	      /* above line continues if inequality 5 or 6 is violated */
6795 	      if(k < (hdmin[z][jp_z+k])     || k > (hdmax[z][jp_z+k]))     continue;
6796 	      /* above line continues if inequality 7 or 8 is violated */
6797 
6798 	      /* if we get here for current k, all 8 inequalities have been satisified
6799 	       * so we know the cells corresponding to the platonic
6800 	       * matrix cells alpha[v][j][d], alpha[y][j+k][d+k], and
6801 	       * alpha[z][j+k][k] are all within the bands. These
6802 	       * cells correspond to beta[v][jp_v][dp_v],
6803 	       * beta[y][jp_y+k][d-hdmin[y][jp_y+k]+k],
6804 	       * and alpha[z][jp_z][k-hdmin[z][jp_z+k]];
6805 	       */
6806 	      kp_z = k-hdmin[z][jp_z+k];
6807 	      dp_y = d-hdmin[y][jp_y+k];
6808 
6809 	      if(do_J_v && do_J_y && do_J_z) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y+k][dp_y+k] + Jalpha[z][jp_z+k][kp_z]); /* A */
6810 	      if(do_J_v && do_L_y && do_L_z) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y+k][dp_y+k] + Lalpha[z][jp_z+k][kp_z]); /* B */
6811 	      if(do_R_v && do_R_y && do_J_z) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y+k][dp_y+k] + Jalpha[z][jp_z+k][kp_z]); /* C */
6812 	      if(d == j && (j+k) == L &&
6813 		 do_R_v && do_T_y && do_L_z) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Tbeta[y][jp_y+k][dp_y+k] + Lalpha[z][jp_z+k][kp_z]); /* D */
6814 	      /* Note: Tbeta[y][j+k==L][d+k==L] will be 0.0 because it
6815 	       * was initialized that way. That T cell includes the
6816 	       * full target 1..L (any valid T alignment must because
6817 	       * we must account for the full target) rooted at a B
6818 	       * state, and a transition from that B state to this
6819 	       * BEGL_S is always probability 1.0.
6820 	       */
6821 	    } /* end of for k loop */
6822 	  } /* end of for d loop */
6823 	} /* end of for j loop */
6824 	/* Two more special cases in truncated alignment, we have to
6825 	 * do these within their own for j and for d loops because j
6826 	 * and d has different restrictions than it does in the
6827 	 * above for j and for d loops we just closed.
6828 	 */
6829 	if(do_L_y && (do_J_v || do_L_v)) {
6830 	  jn = ESL_MAX(jmin[v], jmin[y]);
6831 	  jx = ESL_MIN(jmax[v], jmax[y]);
6832 	  for (j = jx; j >= jn; j--) {
6833 	    jp_v = j - jmin[v];
6834 	    jp_y = j - jmin[y];
6835 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
6836 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
6837 	    for (d = dx; d >= dn; d--) {
6838 	      dp_v = d-hdmin[v][jp_v];
6839 	      dp_y = d-hdmin[y][jp_y];
6840 	      if(do_J_v) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y]); /* entire sequence on left, no sequence on right, k == 0 */
6841 	      if(do_L_v) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y]); /* entire sequence on left, no sequence on right, k == 0 */
6842 	    }
6843 	  }
6844 	}
6845       } /* end of 'if (cm->stid[v] == BEGL_S */
6846       else if (cm->stid[v] == BEGR_S) {
6847 	y = cm->plast[v];   /* the parent bifurcation    */
6848 	z = cm->cfirst[y];  /* the other (left) S state  */
6849 
6850 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
6851 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
6852 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
6853 	do_T_y = cp9b->Tvalid[y] && fill_T ? TRUE : FALSE;
6854 
6855 	do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
6856 	do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
6857 	do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
6858 
6859 	jn = ESL_MAX(jmin[v], jmin[y]);
6860 	jx = ESL_MIN(jmax[v], jmax[y]);
6861 	for (j = jx; j >= jn; j--) {
6862 	  ESL_DASSERT1((j >= 0 && j <= L));
6863 	  jp_v = j - jmin[v];
6864 	  jp_y = j - jmin[y];
6865 	  jp_z = j - jmin[z];
6866 
6867 	  dn = ESL_MAX(hdmin[v][jp_v], j-jmax[z]);
6868 	  dx = ESL_MIN(hdmax[v][jp_v], jp_z);
6869 	  /* above makes sure that j,d are valid for state z: (jmin[z] + d) >= j >= (jmax[z] + d) */
6870 	  i = j-dx+1;
6871 	  for (d = dx; d >= dn; d--, i++) {
6872 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
6873 	    /* Find the first k value that implies a valid cell in the y and z decks.
6874 	     * This k must satisfy the following 4 inequalities (some may be redundant):
6875 	     * NOTE: these are different from those in Inside() (for one thing, v and y
6876 	     *       (BEGR_S and BIF_B here respectively) are switched relative to Inside.
6877 	     *
6878 	     * (1) k >= hdmin[y][j-jmin[y]] - d;
6879 	     * (2) k <= hdmax[y][j-jmin[y]] - d;
6880 	     *     1 and 2 guarantee (d+k) is within state y's j=(j) d band
6881 	     *
6882 	     * (3) k >= hdmin[z][j-jmin[z]-d];
6883 	     * (4) k <= hdmax[z][j-jmin[z]-d];
6884 	     *     3 and 4 guarantee k is within z's j=(j-d) d band
6885 	     *
6886 	     */
6887 	    kmin = ESL_MAX((hdmin[y][jp_y]-d), (hdmin[z][jp_z-d]));
6888 	    kmax = ESL_MIN((hdmax[y][jp_y]-d), (hdmax[z][jp_z-d]));
6889 	    /* kmin and kmax satisfy inequalities (1-4) */
6890 	    for(k = kmin; k <= kmax; k++) {
6891 	      /* for current k, all 4 inequalities have been satisified
6892 	       * so we know the cells corresponding to the platonic
6893 	       * matrix cells beta[v][j][d], beta[y][j][d+k], and
6894 	       * alpha[z][j-d][k] are all within the bands. These
6895 	       * cells correspond to beta[v][jp_v][dp_v],
6896 	       * beta[y][jp_y+k][d-hdmin[y][jp_y]+k],
6897 	       * and alpha[z][jp_z-d][k-hdmin[z][jp_z-d]];
6898 	       */
6899 	      kp_z = k-hdmin[z][jp_z-d];
6900 	      dp_y = d-hdmin[y][jp_y];
6901 
6902 	      if(do_J_v && do_J_y && do_J_z) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y][dp_y+k] + Jalpha[z][jp_z-d][kp_z]); /* A */
6903 	      if(do_J_v && do_R_y && do_R_z) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y+k] + Ralpha[z][jp_z-d][kp_z]); /* C */
6904 	      if(do_L_v && do_L_y && do_J_z) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+k] + Jalpha[z][jp_z-d][kp_z]); /* B */
6905 	      if(k == (i-1) && j == L &&
6906 		 do_L_v && do_T_y && do_R_z) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Tbeta[y][jp_y][dp_y+k] + Ralpha[z][jp_z-d][kp_z]); /* D */
6907 	      /* Note: Tbeta[y][j==L][d+k==L] will be 0.0 because it
6908 	       * was initialized that way. That T cell includes the
6909 	       * full target 1..L (any valid T alignment must because
6910 	       * we must account for the full target) rooted at a B
6911 	       * state, and a transition from that B state to this
6912 	       * BEGR_S is always probability 1.0.
6913 	       */
6914 	    } /* end of for k loop */
6915 	  } /* end of for d loop */
6916 	  /* Two more special cases in truncated alignment, we have to
6917 	   * do these within their own for d loop because d has
6918 	   * different restrictions than it does in the above for d
6919 	   * loop we just closed. j's restrictions are the same
6920 	   * though, so we stay inside the for j loop.
6921 	   */
6922 	  if(do_R_y && (do_J_v || do_R_v)) {
6923 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
6924 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
6925 	    for (d = dx; d >= dn; d--) {
6926 	      dp_v = d-hdmin[v][jp_v];
6927 	      dp_y = d-hdmin[y][jp_y];
6928 	      if(do_J_v) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y]); /* entire sequence on right, no sequence on left, k == 0 */
6929 	      if(do_R_v) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y]); /* entire sequence on right, no sequence on left, k == 0 */
6930 	    }
6931 	  }
6932 	} /* end of for j loop */
6933       } /* end of 'else if (cm->stid[v] == BEGR_S */
6934       else { /* (cm->sttype[v] != BEGL_S && cm->sttype[v] != BEGR_S */
6935 	/* in cm_CYKOutsideAlignHB(), IL and IR states are separated
6936 	 * out from the other states at this stage because only they
6937 	 * can self-transit, making it slightly more efficient to
6938 	 * handle non-inserts differently. In truncated mode there's
6939 	 * more special cases so I've decided to collapse all states
6940 	 * together here. An analogous form of the following block is
6941 	 * used only for IL/IR states in cm_CYKOutsideAlignHB().
6942 	 *
6943 	 * ILs and IRs can self transit, this means that
6944 	 * {J,L,R}beta[v][j][d] must be fully calculated before
6945 	 * {J,L,R}beta[v][j][d+1] can be started to be calculated,
6946 	 * forcing the following nesting order: for j { for d { for y
6947 	 * { } } } for non-self-transitioners, we could do a more
6948 	 * efficient nesting order (you can see it in
6949 	 * cm_CYKOutsideAlignHB() but we don't here because truncation
6950 	 * makes it more complex).
6951 	 */
6952 	for (j = jmax[v]; j >= jmin[v]; j--) {
6953 	  ESL_DASSERT1((j >= 0 && j <= L));
6954 	  jp_v = j - jmin[v];
6955 	  for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
6956 	    i = j-d+1;
6957 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
6958 
6959 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
6960 	      /* mind the following sneaky if statement: in truncated
6961 	       * aln, the only way out of state 0 is through a
6962 	       * truncated begin, which we handled above (search for
6963 	       * 'trpenalty'). If we're in local mode transitions out
6964 	       * of 0 will have IMPOSSIBLE scores, but NOT if we're in
6965 	       * glocal mode, so we need this 'if'.
6966 	       */
6967 	      if(y != 0) {
6968 		voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
6969 		sd  = StateDelta(cm->sttype[y]);
6970 		sdl = StateLeftDelta(cm->sttype[y]);
6971 		sdr = StateRightDelta(cm->sttype[y]);
6972 
6973 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
6974 		do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
6975 		do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
6976 		do_T_y = cp9b->Tvalid[y] && fill_T ? TRUE : FALSE; /* will be FALSE, y is not a B_st */
6977 
6978 		/* if the y deck is invalid in J, L and R mode, we don't have to update v based on transitions from y */
6979 		if (! (do_J_y || do_L_y || do_R_y)) continue;
6980 
6981 		/* Note: this looks like it can be optimized, I tried but my 'optimization' slowed the code, so I reverted [EPN] */
6982 		switch(cm->sttype[y]) {
6983 		case MP_st:
6984 		  jp_y = j - jmin[y];
6985 		  if(j != L && d != j &&                                           /* boundary condition */
6986 		     do_J_v && do_J_y &&                                           /* J deck is valid for v and y */
6987 		     (j+sdr >= jmin[y]            && j+sdr <= jmax[y]) &&          /* j+sdr is within y's j band */
6988 		     (d+sd  >= hdmin[y][jp_y+sdr] && d+sd  <= hdmax[y][jp_y+sdr])) /* d+sd  is within y's d band for j+sdr */
6989 		    {
6990 		      dp_y = d - hdmin[y][jp_y+sdr];  /* d index for state y */
6991 		      escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
6992 		      Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y+sdr][dp_y+sd] + cm->tsc[y][voffset] + escore);
6993 		    }
6994 		  if(j == L && d != j &&                                           /* boundary condition, only allow transition from L if we haven't emitted any residues rightwise (j==L) */
6995 		     do_L_y &&                                                     /* L deck is valid for y */
6996 		     (j     >= jmin[y]        && j     <= jmax[y]) &&              /* j is within y's j band */
6997 		     (d+sdl >= hdmin[y][jp_y] && d+sdl <= hdmax[y][jp_y]))         /* d+sdl is within y's d band for j */
6998 		    {
6999 		      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
7000 		      escore = cm->lmesc[y][dsq[i-1]];
7001 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+sdl] + cm->tsc[y][voffset] + escore);
7002 		      if(do_L_v) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+sdl] + cm->tsc[y][voffset] + escore);
7003 		    }
7004 		  if(i == 1 && j != L &&                                           /* boundary condition, only allow transition from R if we haven't emitted any residues leftwise (i==1) */
7005 		     do_R_y &&                                                     /* R deck is valid for y */
7006 		     (j+sdr >= jmin[y]            && j+sdr <= jmax[y]) &&          /* j+sdr is within y's j band */
7007 		     (d+sdr >= hdmin[y][jp_y+sdr] && d+sdr <= hdmax[y][jp_y+sdr])) /* d+sdr is within y's d band for j+sdr */
7008 		    {
7009 		      dp_y = d - hdmin[y][jp_y+sdr];  /* d index for state y */
7010 		      escore = cm->rmesc[y][dsq[j+1]];
7011 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y+sdr][dp_y+sdr] + cm->tsc[y][voffset] + escore);
7012 		      if(do_R_v) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y+sdr][dp_y+sdr] + cm->tsc[y][voffset] + escore);
7013 		    }
7014 		  break;
7015 
7016 		case ML_st:
7017 		case IL_st:
7018 		  jp_y = j - jmin[y];
7019 		  if(d != j &&                                              /* boundary case */
7020 		     (j     >= jmin[y]        && j     <= jmax[y]) &&       /* j is within y's j band */
7021 		     (d+sdl >= hdmin[y][jp_y] && d+sdl <= hdmax[y][jp_y]))  /* d+sdl is within y's d band for j */
7022 		    {
7023 		      dp_y = d - hdmin[y][jp_y];
7024 		      escore = cm->oesc[y][dsq[i-1]];
7025 		      if(do_J_v && do_J_y) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y][dp_y+sd] + cm->tsc[y][voffset] + escore);
7026 		      if(do_L_v && do_L_y) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+sd] + cm->tsc[y][voffset] + escore);
7027 		    }
7028 		  if(i == 1 &&                                              /* boundary condition, only allow transition from R if we're emitting first residue 1 from y  */
7029 		     v != y &&                                              /* will only happen if v == IL, we don't allow silent self transitions from IL->IL */
7030 		     do_R_y &&                                              /* R deck is valid for y */
7031 		     (j     >= jmin[y]        && j     <= jmax[y]) &&       /* j is within y's j band */
7032 		     (d     >= hdmin[y][jp_y] && d     <= hdmax[y][jp_y]))  /* d+sdr(==d) is within y's d band for j */
7033 		    {
7034 		      dp_y = d - hdmin[y][jp_y];
7035 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7036 		      if(do_R_v) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7037 		    }
7038 		  break;
7039 
7040 		case MR_st:
7041 		case IR_st:
7042 		  jp_y = j - jmin[y];
7043 		  if (j != L &&                                                    /* boundary condition */
7044 		      (j+sdr >= jmin[y]            && j+sdr <= jmax[y]) &&          /* j+sdr is within y's j band */
7045 		      (d+sd  >= hdmin[y][jp_y+sdr] && d+sd  <= hdmax[y][jp_y+sdr])) /* d+sd is within y's d band for j+sdr */
7046 		    {
7047 		      dp_y = d - hdmin[y][jp_y+sdr];
7048 		      escore = cm->oesc[y][dsq[j+1]];
7049 		      if(do_J_v && do_J_y) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y+sdr][dp_y+sd] + cm->tsc[y][voffset] + escore);
7050 		      if(do_R_v && do_R_y) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y+sdr][dp_y+sd] + cm->tsc[y][voffset] + escore);
7051 		    }
7052 		  if(j == L &&                                                     /* boundary condition, only allow transition from L if we're emitting final residue L from y */
7053 		     v != y &&                                                     /* will only happen if v == IR, we don't allow silent self transitions from IR->IR */
7054 		     do_L_y &&                                                     /* L deck is valid for y */
7055 		     (j     >= jmin[y]           && j      <= jmax[y]) &&          /* j is within y's j band */
7056 		     (d     >= hdmin[y][jp_y]    && d      <= hdmax[y][jp_y]))     /* d+sdl(==d) is within y's d band for j */
7057 		    {
7058 		      dp_y = d - hdmin[y][jp_y];
7059 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7060 		      if(do_L_v) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7061 		    }
7062 		  break;
7063 		case S_st:
7064 		case E_st:
7065 		case D_st:
7066 		  jp_y = j - jmin[y];
7067 		  if((j >= jmin[y]        && j <= jmax[y]) &&
7068 		     (d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]))
7069 		    {
7070 		      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
7071 		      if(do_J_v && do_J_y) Jbeta[v][jp_v][dp_v] = ESL_MAX(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7072 		      if(do_L_v && do_L_y) Lbeta[v][jp_v][dp_v] = ESL_MAX(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7073 		      if(do_R_v && do_R_y) Rbeta[v][jp_v][dp_v] = ESL_MAX(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
7074 		    }
7075 		  break;
7076 		} /* end of switch(cm->sttype[y] */
7077 	      } /* end of sneaky if y != 0 */
7078 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
7079 	    if (do_J_v && Jbeta[v][jp_v][dp_v] < IMPOSSIBLE) Jbeta[v][jp_v][dp_v] = IMPOSSIBLE;
7080 	    if (do_L_v && Lbeta[v][jp_v][dp_v] < IMPOSSIBLE) Lbeta[v][jp_v][dp_v] = IMPOSSIBLE;
7081 	    if (do_R_v && Rbeta[v][jp_v][dp_v] < IMPOSSIBLE) Rbeta[v][jp_v][dp_v] = IMPOSSIBLE;
7082 	  } /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
7083 	} /* end loop over jp. We know beta for this whole state */
7084       } /* end of 'else' (entered if cm->sttype[v] != BEGL_S nor BEGR_S */
7085       /* we're done calculating deck v for everything but local ends */
7086 
7087       /* deal with local alignment end transitions v->EL (EL = deck at M.) */
7088       if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
7089 	sd       = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
7090 	sdl      = StateLeftDelta(cm->sttype[v]);  /* note sdl is for state v */
7091 	sdr      = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
7092 	emitmode = Emitmode(cm->sttype[v]);        /* note emitmode is for state v */
7093 
7094 	/* we handle all three possible modes (J,L,R) differently because they have different boundary conditions */
7095 
7096 	/* J mode */
7097 	if(do_J_v && cp9b->Jvalid[cm->M]) {
7098 	  jn = jmin[v] - sdr;
7099 	  jx = jmax[v] - sdr;
7100 	  for (j = jn; j <= jx; j++) {
7101 	    jp_v = j - jmin[v];
7102 	    dn   = hdmin[v][jp_v + sdr] - sd;
7103 	    dx   = hdmax[v][jp_v + sdr] - sd;
7104 	    i    = j-dn+1;                     /* we'll decrement this in for (d... loops inside switch below */
7105 	    dp_v = dn - hdmin[v][jp_v + sdr];  /* we'll increment this in for (d... loops inside switch below */
7106 
7107 	    switch (emitmode) {
7108 	    case EMITPAIR:
7109 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
7110 		escore = esc_vAA[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
7111 		Jbeta[cm->M][j][d] = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v] + escore));
7112 	      }
7113 	      break;
7114 
7115 	    case EMITLEFT:
7116 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
7117 		escore = esc_vAA[v][dsq[i-1]];
7118 		Jbeta[cm->M][j][d] = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v] + escore));
7119 	      }
7120 	      break;
7121 
7122 	    case EMITRIGHT:
7123 	      escore = esc_vAA[v][dsq[j+1]];
7124 	      for (d = dn; d <= dx; d++, dp_v++) {
7125 		Jbeta[cm->M][j][d] = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v] + escore));
7126 	      }
7127 	      break;
7128 
7129 	    case EMITNONE:
7130 	      for (d = dn; d <= dx; d++, dp_v++) {
7131 		Jbeta[cm->M][j][d] = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]));
7132 	      }
7133 	      break;
7134 	    }
7135 	  }
7136 	}
7137 
7138 	/* L mode: again, this code is inefficient, but I chose not to try to optimize lest it get more complex */
7139 	if(do_L_v && cp9b->Lvalid[cm->M]) {
7140 	  jn = jmin[v];
7141 	  jx = jmax[v];
7142 	  for (j = jn; j <= jx; j++) {
7143 	    jp_v = j - jmin[v];
7144 	    dn   = hdmin[v][jp_v] - sdl;
7145 	    dx   = hdmax[v][jp_v] - sdl;
7146 	    i    = j-dn+1;               /* we'll decrement this in for (d... loops inside switch below */
7147 	    dp_v = dn - hdmin[v][jp_v];  /* we'll increment this in for (d... loops inside switch below */
7148 
7149 	    switch (emitmode) {
7150 	    case EMITPAIR:
7151 	      if(j == L) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
7152 		for (d = dn; d <= dx; d++, dp_v++, i--) {
7153 		  escore = cm->lmesc[v][dsq[i-1]];
7154 		  Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v+sdl] + cm->endsc[v] + escore));
7155 		}
7156 	      }
7157 	      break;
7158 
7159 	    case EMITLEFT:
7160 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
7161 		escore = esc_vAA[v][dsq[i-1]];
7162 		Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v+sdl] + cm->endsc[v] + escore));
7163 	      }
7164 	      break;
7165 
7166 	    case EMITRIGHT:
7167 	      if(j == L) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
7168 		for (d = dn; d <= dx; d++, dp_v++) {
7169 		  Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v] + cm->endsc[v]));
7170 		}
7171 	      }
7172 	      break;
7173 
7174 	    case EMITNONE:
7175 	      for (d = dn; d <= dx; d++, dp_v++) {
7176 		Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v] + cm->endsc[v]));
7177 	      }
7178 	      break;
7179 	    }
7180 	  }
7181 	} /* end of if(do_L_v) */
7182 
7183 	/* R mode: again, this code is inefficient, but I chose not to try to optimize lest it get more complex */
7184 	if(do_R_v && cp9b->Rvalid[cm->M]) {
7185 	  jn = jmin[v] - sdr;
7186 	  jx = jmax[v] - sdr;
7187 	  for (j = jn; j <= jx; j++) {
7188 	    jp_v = j - jmin[v];
7189 	    dn   = hdmin[v][jp_v + sdr] - sdr;
7190 	    dx   = hdmax[v][jp_v + sdr] - sdr;
7191 	    i    = j-dn+1;                     /* we'll decrement this in for (d... loops inside switch below */
7192 	    dp_v = dn - hdmin[v][jp_v + sdr];  /* we'll increment this in for (d... loops inside switch below */
7193 
7194 	    switch (emitmode) {
7195 	    case EMITPAIR:
7196 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
7197 		if(i == 1) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
7198 		  escore = cm->rmesc[v][dsq[j+1]];
7199 		  Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][jp_v+sdr][dp_v+sdr] + cm->endsc[v] + escore));
7200 		}
7201 	      }
7202 	      break;
7203 	    case EMITLEFT:
7204 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
7205 		if(i == 1) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
7206 		  Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][jp_v][dp_v] + cm->endsc[v]));
7207 		}
7208 	      }
7209 	      break;
7210 
7211 	    case EMITRIGHT:
7212 	      escore = esc_vAA[v][dsq[j+1]];
7213 	      for (d = dn; d <= dx; d++, dp_v++) {
7214 		Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][jp_v+sdr][dp_v+sdr] + cm->endsc[v] + escore));
7215 	      }
7216 	      break;
7217 
7218 	    case EMITNONE:
7219 	      for (d = dn; d <= dx; d++, dp_v++) {
7220 	      Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[v][jp_v+sdr][dp_v+sdr] + cm->endsc[v]));
7221 	      }
7222 	      break;
7223 	    }
7224 	  }
7225 	} /* end of if(do_R_v) */
7226       } /* end of calculating EL scores */
7227     } /* end of if !StateIsDetached() */
7228   } /* end loop over decks v. */
7229 
7230   /* Deal with last step needed for local alignment
7231    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
7232    */
7233   if (cm->flags & CMH_LOCAL_END) {
7234     if(cp9b->Jvalid[cm->M]) {
7235       for (j = L; j > 0; j--) { /* careful w/ boundary here */
7236 	for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
7237 	  Jbeta[cm->M][j][d] = ESL_MAX(Jbeta[cm->M][j][d], (Jbeta[cm->M][j][d+1] + cm->el_selfsc));
7238 	}
7239       }
7240     }
7241     if(fill_L && cp9b->Lvalid[cm->M]) {
7242       for (j = L; j > 0; j--) { /* careful w/ boundary here */
7243 	for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
7244 	  Lbeta[cm->M][j][d] = ESL_MAX(Lbeta[cm->M][j][d], (Lbeta[cm->M][j][d+1] + cm->el_selfsc));
7245 	}
7246       }
7247     }
7248     if(fill_R && cp9b->Rvalid[cm->M]) {
7249       for (j = L; j > 0; j--) { /* careful w/ boundary here */
7250 	for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
7251 	  Rbeta[cm->M][j][d] = ESL_MAX(Rbeta[cm->M][j][d], (Rbeta[cm->M][j][d+1] + cm->el_selfsc));
7252 	}
7253       }
7254     }
7255   }
7256 
7257   fail1_flag = FALSE;
7258   fail2_flag = FALSE;
7259   if(do_check) {
7260     /* Check for consistency between the Inside alpha matrix and the
7261      * Outside beta matrix. we assume the Inside CYK parse score
7262      * (optsc) is the optimal score, so for all v,j,d:
7263      *
7264      * Jalpha[v][j][d] + Jbeta[v][j][d] <= optsc
7265      * Lalpha[v][j][d] + Lbeta[v][j][d] <= optsc
7266      * Ralpha[v][j][d] + Rbeta[v][j][d] <= optsc
7267      *
7268      * Further, we know that each residue must be emitted by a state
7269      * in the optimal parse. So as we do the above check, we determine
7270      * when we're in a cell that may be involved in the optimal parse
7271      * (the sum of the Inside and Outside scores are equal to the
7272      * optimal parse score), if that cell corresponds to a left
7273      * emitter emitting position i, we know an emitted i has been
7274      * observed in an optimal parse and set optseen[i] to TRUE.
7275      * Likewise, if that cell corresponds to a right emitter emitting
7276      * position j, we update optseen[j] to TRUE. At the end of the
7277      * check optseen[i] should be TRUE for all i in the range
7278      * [1..L].
7279      *
7280      * Note that we don't ensure that all of our presumed optimal
7281      * cells make up a valid parse, so it is possible we could pass
7282      * this check even if the Inside and Outside matrices are
7283      * inconsistent (i.e. there's a bug in the implementation of one
7284      * and/or the other) but that should be extremely unlikely.  If we
7285      * do this test many times for many different models and pass, we
7286      * should be confident we have consistent implementations.
7287      *
7288      * This is an expensive check and should only be done while
7289      * debugging.
7290      */
7291     ESL_ALLOC(optseen, sizeof(int) * (L+1));
7292     esl_vec_ISet(optseen, L+1, FALSE);
7293     vmax  = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
7294     if     (preset_mode == TRMODE_J) optsc = Jalpha[0][jp_0][Lp_0];
7295     else if(preset_mode == TRMODE_L) optsc = Lalpha[0][jp_0][Lp_0];
7296     else if(preset_mode == TRMODE_R) optsc = Ralpha[0][jp_0][Lp_0];
7297     else if(preset_mode == TRMODE_T) optsc = Talpha[0][jp_0][Lp_0];
7298     /* define bit score difference tolerance, somewhat arbitrarily:
7299      * clen <= 200: tolerance is 0.001; then a function of clen:
7300      * clen == 1000 tolerance is 0.005,
7301      * clen == 2000, tolerance is 0.01.
7302      *
7303      * I did this b/c with tests with SSU_rRNA_eukarya I noticed
7304      * failures with bit score differences up to 0.004 or so.  This
7305      * could mean a bug, but I couldn't get any average sized model to
7306      * fail with a difference above 0.001, so I blamed it on
7307      * precision. I'm not entirely convinced it isn't a bug but
7308      * until I see a failure on a smaller model it seems precision
7309      * is the most likely explanation, right?
7310      */
7311     tol = ESL_MAX(1e-3, (float) cm->clen / 200000.);
7312     for(v = 0; v <= vmax; v++) {
7313       do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
7314       do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
7315       do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
7316       do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
7317       jn = (v == cm->M) ? 1 : jmin[v];
7318       jx = (v == cm->M) ? L : jmax[v];
7319       for(j = jn; j <= jx; j++) {
7320 	jp_v = (v == cm->M) ? j : j - jmin[v];
7321 	dn   = (v == cm->M) ? 0 : hdmin[v][jp_v];
7322 	dx   = (v == cm->M) ? j : hdmax[v][jp_v];
7323 	for(d = dn; d <= dx; d++) {
7324 	  dp_v = (v == cm->M) ? d : d - hdmin[v][jp_v];
7325 	  Jsc  = (do_J_v) ? Jalpha[v][jp_v][dp_v] + Jbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
7326 	  Lsc  = (do_L_v) ? Lalpha[v][jp_v][dp_v] + Lbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
7327 	  Rsc  = (do_R_v) ? Ralpha[v][jp_v][dp_v] + Rbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
7328 	  Tsc  = (do_T_v) ? Talpha[v][jp_v][dp_v] + Tbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
7329 	  if(Jsc > tol) {
7330 	    printf("v: %d j: %d d: %d\n", v, j, d);
7331 	    printf("Check 1 J failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7332 		   v, j, d, Jalpha[v][jp_v][dp_v], Jbeta[v][jp_v][dp_v], Jalpha[v][jp_v][dp_v] + Jbeta[v][jp_v][dp_v], optsc);
7333 	    fail1_flag = TRUE;
7334 	  }
7335 	  if(Lsc > tol) {
7336 	    printf("Check 1 L failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7337 		   v, j, d, Lalpha[v][jp_v][dp_v], Lbeta[v][jp_v][dp_v], Lalpha[v][jp_v][dp_v] + Lbeta[v][jp_v][dp_v], optsc);
7338 	    fail1_flag = TRUE;
7339 	  }
7340 	  if(Rsc > tol) {
7341 	    printf("Check 1 R failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7342 		   v, j, d, Ralpha[v][jp_v][dp_v], Rbeta[v][jp_v][dp_v], Ralpha[v][jp_v][dp_v] + Rbeta[v][jp_v][dp_v], optsc);
7343 	    fail1_flag = TRUE;
7344 	  }
7345 	  if(Tsc > tol) {
7346 	    printf("Check 1 T failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7347 		   v, j, d, Talpha[v][jp_v][dp_v], Tbeta[v][jp_v][dp_v], Talpha[v][jp_v][dp_v] + Tbeta[v][jp_v][dp_v], optsc);
7348 	    fail1_flag = TRUE;
7349 	  }
7350 	  if((((do_J_v && fabs(Jsc) < tol) || (do_L_v && fabs(Lsc) < tol)) &&
7351 	      (cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st || (cm->sttype[v] == EL_st && d >0))) ||
7352 	     ((do_R_v && fabs(Rsc) < tol) && cm->sttype[v] == EL_st && d >0)) {
7353 	    i = j-d+1;
7354 	    /* i is accounted for by a parse with an optimal score */
7355 	    optseen[i] = TRUE;
7356 	    /*
7357 	      if     (fabs(Jsc) < tol) printf("\tResidue %4d possibly accounted for by J matrix Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);
7358 	      else if(fabs(Lsc) < tol) printf("\tResidue %4d possibly accounted for by L matrix Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);
7359 	      else if(fabs(Rsc) < tol) printf("\tResidue %4d possibly accounted for by R matrix Left  emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", i, Statetype(cm->sttype[v]), v, j, d);
7360 	    */
7361 	  }
7362 	  if(((do_J_v && fabs(Jsc) < tol) || (do_R_v && fabs(Rsc) < tol)) &&
7363 	     (cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st)) {
7364 	    /* j is accounted for by a parse with an optimal score */
7365 	    optseen[j] = TRUE;
7366 	    /*
7367 	       if     (fabs(Jsc) < tol) printf("\tResidue %4d possibly accounted for by J matrix Right emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", j, Statetype(cm->sttype[v]), v, j, d);
7368 	       else if(fabs(Rsc) < tol) printf("\tResidue %4d possibly accounted for by R matrix Right emitter %2s cell [v:%4d][j:%4d][d:%4d]\n", j, Statetype(cm->sttype[v]), v, j, d);
7369 	    */
7370 	  }
7371 	}
7372       }
7373     }
7374     for(j = 1; j <= L; j++) {
7375       if(optseen[j] == FALSE) {
7376 	printf("Check 2 failure: residue %d not emitted in the optimal parsetree\n", j);
7377 	fail2_flag = TRUE;
7378       }
7379     }
7380     free(optseen);
7381   }
7382   if(fail1_flag || fail2_flag) for(j = 1; j <= L; j++) printf("dsq[%4d]: %4d\n", j, dsq[j]);
7383 
7384 #if eslDEBUGLEVEL >= 2
7385   /* Uncomment to dump matrix to file. Careful...this could be very large. */
7386   /* FILE *fp1; fp1 = fopen("tmp.tru_ocykhbmx", "w");   cm_tr_hb_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
7387 #endif
7388 
7389   if(do_check) {
7390     if     (fail1_flag) ESL_FAIL(eslFAIL, errbuf, "TrCYKHB Inside/Outside check1 FAILED.");
7391     else if(fail2_flag) ESL_FAIL(eslFAIL, errbuf, "TrCYKHB Inside/Outside check2 FAILED.");
7392     ESL_DPRINTF1(("#DEBUG: SUCCESS! TrCYKHB Inside/Outside checks PASSED.\n"));
7393   }
7394 
7395   if     (preset_mode == TRMODE_J) optsc = Jalpha[0][jp_0][Lp_0];
7396   else if(preset_mode == TRMODE_L) optsc = Lalpha[0][jp_0][Lp_0];
7397   else if(preset_mode == TRMODE_R) optsc = Ralpha[0][jp_0][Lp_0];
7398   else if(preset_mode == TRMODE_T) optsc = Talpha[0][jp_0][Lp_0];
7399   ESL_DPRINTF1(("#DEBUG: \tcm_TrCYKOutsideAlignHB() sc : %f (sc is from Inside!)\n", optsc));
7400 
7401   return eslOK;
7402 
7403  ERROR:
7404   ESL_FAIL(status, errbuf, "Out of memory");
7405   return status; /* NEVER REACHED */
7406 }
7407 
7408 /* Function: cm_TrOutsideAlign()
7409  * Date:     EPN, Mon Sep 19 14:48:30 2011
7410  *
7411  * Purpose: Run the truncated outside algorithm. Non-banded version.
7412  *           A CM_TR_MX DP matrix must be passed in.  Very similar to
7413  *           cm_TrCYKOutsideAlign() but calculates summed log probs of
7414  *           all likely parses instead of the most likely parse.
7415  *           i.e. uses log sum operations instead of max's.  Meaning of
7416  *           cells:
7417  *
7418  *           Jbeta[v][j][d]: summed log prob of all parsetrees that
7419  *                           emit 1..i-1 and j+1..L and pass through
7420  *                           v in Joint marginal mode at j,d.
7421  *           Lbeta[v][j][d]: summed log prob of all parsetrees that
7422  *                           emit 1..i-1 and j+1..L and pass through
7423  *                           v in Left marginal mode at j,d.
7424  *           Rbeta[v][j][d]: summed log prob of all parsetrees that
7425  *                           emit 1..i-1 and j+1..L and pass through
7426  *                           v in Right marginal mode at j,d.
7427  *
7428  * Args:     cm          - the model    [0..M-1]
7429  *           errbuf      - char buffer for reporting errors
7430  *           dsq         - the digitized sequence
7431  *           L           - length of the dsq to align
7432  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
7433  *           preset_mode - TRMODE_J, TRMODE_L, TRMODE_R, or TRMODE_T, the pre-determined
7434  *                         alignment mode, we'll only allow alignments in this mode.
7435  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
7436  *           do_check    - TRUE to attempt to check matrices for correctness
7437  *           mx          - the dp matrix, only cells within bands in cp9b will be valid
7438  *           ins_mx      - the dp matrix from the CYK Inside run calculation
7439  *                         (performed by cm_TrCYKInsideAlign(), required)
7440  *
7441  * Returns:  <eslOK> on success
7442  *
7443  * Throws:   <eslERANGE> if required CM_TR_HB_MX size exceeds <size_limit>
7444  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
7445  *           In either of these cases, alignment has been aborted.
7446  */
7447 int
cm_TrOutsideAlign(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,int do_check,CM_TR_MX * mx,CM_TR_MX * ins_mx)7448 cm_TrOutsideAlign(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
7449 		  int do_check, CM_TR_MX *mx, CM_TR_MX *ins_mx)
7450 {
7451   int      status;
7452   int      v,y,z;	       /* indices for states */
7453   int      j,d,i,k;	       /* indices in sequence dimensions */
7454   float    Jsc,Lsc,Rsc,Tsc;    /* temporary variables holding a float score */
7455   float    optsc;              /* the Inside score */
7456   float    escore;	       /* an emission score, tmp variable */
7457   int      voffset;	       /* index of v in t_v(y) transition scores */
7458 
7459   /* variables used only if do_check */
7460   int      fail_flag = FALSE; /* set to TRUE if do_check and we see a problem */
7461   int      vmax;              /* i, offset in the matrix */
7462   float    tol;                /* tolerance for differences in bit scores */
7463 
7464   /* indices used in the depths of the DP recursion */
7465   int      sd;                 /* StateDelta(cm->sttype[y]) */
7466   int      sdl;                /* StateLeftDelta(cm->sttype[y] */
7467   int      sdr;                /* StateRightDelta(cm->sttype[y] */
7468 
7469   /* other variables used in truncated version, but not standard version (not in cm_OutsideAlign()) */
7470   int   fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
7471   int   pty_idx;                /* index for truncation penalty, determined by pass_idx */
7472   float trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
7473 
7474   /* DP matrix variables */
7475   float ***Jbeta   = mx->Jdp;     /* pointer to the outside Jbeta DP matrix */
7476   float ***Lbeta   = mx->Ldp;     /* pointer to the outside Lbeta DP matrix */
7477   float ***Rbeta   = mx->Rdp;     /* pointer to the outside Rbeta DP matrix */
7478   float ***Tbeta   = mx->Tdp;     /* pointer to the outside Tbeta DP matrix */
7479 
7480   float ***Jalpha  = ins_mx->Jdp; /* pointer to the precalc'ed inside Jalpha DP matrix */
7481   float ***Lalpha  = ins_mx->Ldp; /* pointer to the precalc'ed inside Lalpha DP matrix */
7482   float ***Ralpha  = ins_mx->Rdp; /* pointer to the precalc'ed inside Ralpha DP matrix */
7483   float ***Talpha  = ins_mx->Tdp; /* pointer to the precalc'ed inside Talpha DP matrix, only used to possibly get optsc */
7484 
7485   /* Allocations and initializations */
7486   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlign(): preset_mode is not J, L, R, or T");
7487   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrOutsideAlign(), bogus mode: %d", preset_mode);
7488 
7489   /* Determine the truncation penalty index, from the pass_idx */
7490   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrOutsideAlign(), unexpected pass idx: %d", pass_idx);
7491 
7492   /* grow the matrices based on the current sequence and bands */
7493   if((status = cm_tr_mx_GrowTo(cm, mx, errbuf, L, size_limit)) != eslOK) return status;
7494 
7495   /* initialize all cells of the matrix to IMPOSSIBLE */
7496   if(mx->Jncells_valid > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
7497   if(mx->Lncells_valid > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
7498   if(mx->Rncells_valid > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
7499   if(mx->Tncells_valid > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
7500 
7501   /* initialize cells in the special ROOT_S deck corresponding to full sequence alignments to 0.0 */
7502   if     (preset_mode == TRMODE_J) Jbeta[0][L][L] = 0.; /* a full Joint    alignment is outside this cell */
7503   else if(preset_mode == TRMODE_L) Lbeta[0][L][L] = 0.; /* a full Left     alignment is outside this cell */
7504   else if(preset_mode == TRMODE_R) Rbeta[0][L][L] = 0.; /* a full Right    alignment is outside this cell */
7505   else if(preset_mode == TRMODE_T) Tbeta[0][L][L] = 0.; /* a full Terminal alignment is outside this cell */
7506   else ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlign() preset_mode %d is invalid", preset_mode);
7507 
7508   /* set cells corresponding to legal truncated begin entry states to
7509    * the appropriate penalty. In truncated alignment the only way out
7510    * of ROOT_S in local or global mode is via a 'truncated begin' with
7511    * a score (penalty) from cm->trp into any emitting state. The
7512    * penalty was calculated in cm_tr_penalties_Create() and differs
7513    * depending on whether we are in local or global mode and the value
7514    * of 'pty_idx' which was passed in.
7515    */
7516   for(v = 0; v < cm->M; v++) {
7517     trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
7518     if(NOT_IMPOSSIBLE(trpenalty)) {
7519       if(preset_mode == TRMODE_J) Jbeta[v][L][L] = trpenalty; /* a full Joint alignment is outside this cell */
7520       if(preset_mode == TRMODE_L) Lbeta[v][L][L] = trpenalty; /* a full Left  alignment is outside this cell */
7521       if(preset_mode == TRMODE_R) Rbeta[v][L][L] = trpenalty; /* a full Right alignment is outside this cell */
7522       if(preset_mode == TRMODE_T && cm->sttype[v] == B_st) {
7523 	Tbeta[v][L][L] = trpenalty; /* a full Terminal alignment is outside this cell */
7524       }
7525     }
7526   }
7527 
7528   /* main loop down through the decks */
7529   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
7530     if(! StateIsDetached(cm, v)) { /* skip detached inserts, they're cells will remain IMPOSSIBLE */
7531       sd  = StateDelta(cm->sttype[v]);
7532       sdr = StateRightDelta(cm->sttype[v]);
7533 
7534       if (cm->stid[v] == BEGL_S) { /* BEGL_S */
7535 	y = cm->plast[v];	/* the parent bifurcation    */
7536 	z = cm->cnum[y];	/* the other (right) S state */
7537 	for(j = 0; j <= L; j++) {
7538 	  for (d = 0; d <= j; d++) {
7539 	    for (k = 0; k <= (L-j); k++) {
7540 	      Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Jbeta[y][j+k][d+k] + Jalpha[z][j+k][k]); /* A */
7541 	      if(fill_L) {
7542 		Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Lbeta[y][j+k][d+k] + Lalpha[z][j+k][k]); /* B */
7543 	      }
7544 	      if(fill_R) {
7545 		Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Rbeta[y][j+k][d+k] + Jalpha[z][j+k][k]); /* C */
7546 		if(fill_T && fill_L && d == j && (j+k) == L) {
7547 		  Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Tbeta[y][j+k][d+k] + Lalpha[z][j+k][k]); /* D */
7548 		  /* Note: Tbeta[y][j+k==L][d+k==L] will be 0.0 or
7549 		   * IMPOSSIBLE because it was initialized that
7550 		   * way. That T cell includes the full target 1..L
7551 		   * (any valid T alignment must because we must
7552 		   * account for the full target) rooted at a B state,
7553 		   * and a transition from that B state to this BEGL_S
7554 		   * is always probability 1.0.
7555 		   */
7556 		}
7557 	      }
7558 	    }
7559 	    if(fill_L) {
7560 	      Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Lbeta[y][j][d]); /* entire sequence on left, no sequence on right, k == 0 */
7561 	      Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Lbeta[y][j][d]); /* entire sequence on left, no sequence on right, k == 0 */
7562 	    }
7563 	  }
7564 	}
7565       } /* end of 'if (cm->stid[v] == BEGL_S */
7566       else if (cm->stid[v] == BEGR_S) {
7567 	y = cm->plast[v];   /* the parent bifurcation    */
7568 	z = cm->cfirst[y];  /* the other (left) S state  */
7569 	for(j = 0; j <= L; j++) {
7570 	  for (d = 0; d <= j; d++) {
7571 	    i = j-d+1;
7572 	    for (k = 0; k <= (j-d); k++) {
7573 	      Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Jbeta[y][j][d+k] + Jalpha[z][j-d][k]); /* A */
7574 	      if(fill_R) {
7575 		Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Rbeta[y][j][d+k] + Ralpha[z][j-d][k]); /* C */
7576 	      }
7577 	      if(fill_L) {
7578 		Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Lbeta[y][j][d+k] + Jalpha[z][j-d][k]); /* B */
7579 		if(fill_R && fill_T && k == (i-1) && j == L) {
7580 		  Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Tbeta[y][j][d+k] + Ralpha[z][j-d][k]); /* D */
7581 		  /* Note: Tbeta[y][j==L][d+k==L] will be 0.0 or
7582 		   * IMPOSSIBLE because it was initialized that
7583 		   * way. That T cell includes the full target 1..L
7584 		   * (any valid T alignment must because we must
7585 		   * account for the full target) rooted at a B state,
7586 		   * and a transition from that B state to this BEGR_S
7587 		   * is always probability 1.0.
7588 		   */
7589 		}
7590 	      }
7591 	    }
7592 	    if(fill_R) {
7593 	      Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Rbeta[y][j][d]); /* entire sequence on right, no sequence on left, k == 0 */
7594 	      Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Rbeta[y][j][d]); /* entire sequence on right, no sequence on left, k == 0 */
7595 	    }
7596 	  }
7597 	}
7598       } /* end of 'else if (cm->stid[v] == BEGR_S */
7599       else { /* (cm->sttype[v] != BEGL_S && cm->sttype[v] != BEGR_S */
7600 	for (j = L; j >= 0; j--) {
7601 	  i = 1;
7602 	  for (d = j; d >= 0; d--, i++) {
7603 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
7604 	      /* mind the following sneaky if statement: in truncated
7605 	       * aln, the only way out of state 0 is through a
7606 	       * truncated begin, which we handled above (search for
7607 	       * 'trpenalty'). If we're in local mode transitions out
7608 	       * of 0 will have IMPOSSIBLE scores, but NOT if we're in
7609 	       * glocal mode, so we need this 'if'.
7610 	       */
7611 	      if(y != 0) {
7612 		voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
7613 		sd  = StateDelta(cm->sttype[y]);
7614 		sdl = StateLeftDelta(cm->sttype[y]);
7615 		sdr = StateRightDelta(cm->sttype[y]);
7616 		switch(cm->sttype[y]) {
7617 		case MP_st:
7618 		  if(j != L && d != j) {
7619 		    escore = cm->oesc[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
7620 		    Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Jbeta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore);
7621 		  }
7622 		  if(fill_L && j == L && d != j)  { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
7623 		    escore = cm->lmesc[y][dsq[i-1]];
7624 		    Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Lbeta[y][j][d+sdl]     + cm->tsc[y][voffset] + escore);
7625 		    Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Lbeta[y][j][d+sdl]     + cm->tsc[y][voffset] + escore);
7626 		  }
7627 		  if(fill_R && i == 1 && j != L) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
7628 		    escore = cm->rmesc[y][dsq[j+1]];
7629 		    Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Rbeta[y][j+sdr][d+sdr] + cm->tsc[y][voffset] + escore);
7630 		    Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Rbeta[y][j+sdr][d+sdr] + cm->tsc[y][voffset] + escore);
7631 		  }
7632 		  break;
7633 		case ML_st:
7634 		case IL_st:
7635 		  if (d != j) {
7636 		    escore = cm->oesc[y][dsq[i-1]];
7637 		    Jbeta[v][j][d]            = FLogsum(Jbeta[v][j][d], Jbeta[y][j][d+sd]     + cm->tsc[y][voffset] + escore);
7638 		    if(fill_L) Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Lbeta[y][j][d+sd]     + cm->tsc[y][voffset] + escore);
7639 		  }
7640 		  if(fill_R && i == 1 && /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
7641 		     v != y ) {          /* will only happen if v == IL, we don't allow silent self transitions from IL->IL */
7642 		    Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Rbeta[y][j][d]        + cm->tsc[y][voffset]);
7643 		    Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Rbeta[y][j][d]        + cm->tsc[y][voffset]);
7644 		  }
7645 		  break;
7646 		case MR_st:
7647 		case IR_st:
7648 		  if (j != L) {
7649 		    escore = cm->oesc[y][dsq[j+1]];
7650 		    Jbeta[v][j][d]            = FLogsum(Jbeta[v][j][d], Jbeta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore);
7651 		    if(fill_R) Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Rbeta[y][j+sdr][d+sd] + cm->tsc[y][voffset] + escore);
7652 		  }
7653 		  if(fill_L && j == L && /* only allow transition from R if we haven't emitted any residues rightwise (j==L) */
7654 		     v != y) {           /* will only happen if v == IR, we don't allow silent self transitions from IR->IR */
7655 		    Jbeta[v][j][d] = FLogsum(Jbeta[v][j][d], Lbeta[y][j][d]        + cm->tsc[y][voffset]);
7656 		    Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Lbeta[y][j][d]        + cm->tsc[y][voffset]);
7657 		  }
7658 		  break;
7659 		case S_st:
7660 		case E_st:
7661 		case D_st:
7662 		  Jbeta[v][j][d]            = FLogsum(Jbeta[v][j][d], Jbeta[y][j][d] + cm->tsc[y][voffset]);
7663 		  if(fill_L) Lbeta[v][j][d] = FLogsum(Lbeta[v][j][d], Lbeta[y][j][d] + cm->tsc[y][voffset]);
7664 		  if(fill_R) Rbeta[v][j][d] = FLogsum(Rbeta[v][j][d], Rbeta[y][j][d] + cm->tsc[y][voffset]);
7665 		  break;
7666 		} /* end of switch(cm->sttype[y] */
7667 	      } /* end of sneaky if y != 0 */
7668 	    }  /* ends for loop over parent states. we now know beta[v][j][d] for this d */
7669 	    if (Jbeta[v][j][d] < IMPOSSIBLE) Jbeta[v][j][d] = IMPOSSIBLE;
7670 	  } /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
7671 	} /* end loop over j. We know beta for this whole state */
7672       } /* end of 'else' (if cm->sttype[v] != BEGL_S, BEGR_S) */
7673     } /* end of 'if(! StateIsDetached(cm, v))' */
7674     /* we're done calculating deck v for everything but local ends */
7675 
7676     /* deal with local end transitions v->EL J matrix only (EL = deck at M.) */
7677     if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
7678       sd  = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
7679       sdl = StateLeftDelta(cm->sttype[v]);  /* note sdl is for state v */
7680       sdr = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
7681 
7682       for (j = 0; j <= L; j++) {
7683 	for (d = 0; d <= j; d++) {
7684 	  i = j-d+1;
7685 	  switch (cm->sttype[v]) {
7686 	  case MP_st:
7687 	    if (j != L && d != j) {
7688 	      escore = cm->oesc[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
7689 	      Jbeta[cm->M][j][d] = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][j+sdr][d+sd] + cm->endsc[v] + escore));
7690 	    }
7691 	    if(fill_L && j == L && d != j) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
7692 	      escore = cm->lmesc[v][dsq[i-1]];
7693 	      Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][j][d+sdl] + cm->endsc[v] + escore));
7694 	    }
7695 	    if(fill_R && i == 1 && j != L) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
7696 	      escore = cm->rmesc[v][dsq[j+1]];
7697 	      Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][j+sdr][d+sdr] + cm->endsc[v] + escore));
7698 	    }
7699 	    break;
7700 	  case ML_st:
7701 	  case IL_st:
7702 	    if (d != j) {
7703 	      escore = cm->oesc[v][dsq[i-1]];
7704 	      Jbeta[cm->M][j][d]            = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][j][d+sd]  + cm->endsc[v] + escore));
7705 	      if(fill_L) Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][j][d+sdl] + cm->endsc[v] + escore));
7706 	    }
7707 	    if(fill_R && i == 1) { /* only allow transition from R if we haven't emitted any residues leftwise (i == 1) */
7708 	      Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], Rbeta[v][j][d] + cm->endsc[v]);
7709 	    }
7710 	    break;
7711 	  case MR_st:
7712 	  case IR_st:
7713 	    if(j != L) {
7714 	      escore = cm->oesc[v][dsq[j+1]];
7715 	      Jbeta[cm->M][j][d]            = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][j+sdr][d+sd]  + cm->endsc[v] + escore));
7716 	      if(fill_R) Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][j+sdr][d+sdr] + cm->endsc[v] + escore));
7717 	    }
7718 	    if(fill_L && j == L) { /* only allow transition from L if we haven't emitted any residues rightwise (j == L) */
7719 	      Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], Lbeta[v][j][d] + cm->endsc[v]);
7720 	    }
7721 	    break;
7722 	  case S_st:
7723 	  case D_st:
7724 	  case E_st:
7725 	    Jbeta[cm->M][j][d]            = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][j+sdr][d+sd] + cm->endsc[v]));
7726 	    if(fill_L) Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][j+sdr][d+sd] + cm->endsc[v]));
7727 	    if(fill_R) Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][j+sdr][d+sd] + cm->endsc[v]));
7728 	    break;
7729 	  }
7730 	}
7731       }
7732     }
7733   }
7734   /* Deal with last step needed for local alignment
7735    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
7736    */
7737   if (cm->flags & CMH_LOCAL_END) {
7738     for (j = L; j > 0; j--) { /* careful w/ boundary here */
7739       for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
7740 	Jbeta[cm->M][j][d]            = FLogsum(Jbeta[cm->M][j][d], Jbeta[cm->M][j][d+1] + cm->el_selfsc);
7741 	if(fill_L) Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], Lbeta[cm->M][j][d+1] + cm->el_selfsc);
7742 	if(fill_R) Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], Rbeta[cm->M][j][d+1] + cm->el_selfsc);
7743       }
7744     }
7745   }
7746 
7747   fail_flag = FALSE;
7748   if(do_check) {
7749     /* Check for consistency between the Inside alpha matrix and the
7750      * Outside beta matrix. If the Inside score (optsc) really is
7751      * the log sum of all possible parsetrees that emit the full
7752      * target sequence 1..L, then for all v,j,d:
7753      *
7754      * Jalpha[v][j][d] + Jbeta[v][j][d] <= optsc
7755      * Lalpha[v][j][d] + Lbeta[v][j][d] <= optsc
7756      * Ralpha[v][j][d] + Rbeta[v][j][d] <= optsc
7757      * Talpha[v][j][d] + Tbeta[v][j][d] <= optsc
7758      *
7759      * We do a more extensive check in cm_TrCYKOutsideAlign(), but
7760      * it doesn't apply here, because we've summed all parsetrees
7761      * instead of finding only the optimal one.
7762      *
7763      * Note that we don't check fill_L and fill_R variables
7764      * here, although they will have dictated whether we've filled
7765      * in the L and R matrices. If they're FALSE, those matrices
7766      * should remain as they've been initialized as all IMPOSSIBLE
7767      * values, so they won't cause us to fail our tests here.
7768      *
7769      * This is an expensive check and should only be done while
7770      * debugging.
7771      */
7772     vmax = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
7773     if     (preset_mode == TRMODE_J) optsc = Jalpha[0][L][L];
7774     else if(preset_mode == TRMODE_L) optsc = Lalpha[0][L][L];
7775     else if(preset_mode == TRMODE_R) optsc = Ralpha[0][L][L];
7776     else if(preset_mode == TRMODE_T) optsc = Talpha[0][L][L];
7777     /* define bit score difference tolerance, somewhat arbitrarily:
7778      * clen <= 200: tolerance is 0.001; then a function of clen:
7779      * clen == 1000 tolerance is 0.005,
7780      * clen == 2000, tolerance is 0.01.
7781      *
7782      * I did this b/c with tests with SSU_rRNA_eukarya I noticed
7783      * failures with bit score differences up to 0.004 or so.  This
7784      * could mean a bug, but I couldn't get any average sized model to
7785      * fail with a difference above 0.001, so I blamed it on
7786      * precision. I'm not entirely convinced it isn't a bug but
7787      * until I see a failure on a smaller model it seems precision
7788      * is the most likely explanation, right?
7789      */
7790     tol = ESL_MAX(1e-3, (float) cm->clen / 200000.);
7791     for(v = 0; v <= vmax; v++) {
7792       for(j = 1; j <= L; j++) {
7793 	for(d = 0; d <= j; d++) {
7794 	  Jsc  = Jalpha[v][j][d] + Jbeta[v][j][d] - optsc;
7795 	  Lsc  = (fill_L) ? Lalpha[v][j][d] + Lbeta[v][j][d] - optsc : IMPOSSIBLE;
7796 	  Rsc  = (fill_R) ? Ralpha[v][j][d] + Rbeta[v][j][d] - optsc : IMPOSSIBLE;
7797 	  Tsc  = (fill_T && cm->sttype[v] == B_st) ? Talpha[v][j][d] + Tbeta[v][j][d] - optsc : IMPOSSIBLE;
7798 	  if(Jsc > tol) {
7799 	    printf("Check J failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7800 		   v, j, d, Jalpha[v][j][d], Jbeta[v][j][d], Jalpha[v][j][d] + Jbeta[v][j][d], optsc);
7801 	    fail_flag = TRUE;
7802 	  }
7803 	  if(Lsc > tol) {
7804 	    printf("Check L failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7805 		   v, j, d, Lalpha[v][j][d], Lbeta[v][j][d], Lalpha[v][j][d] + Lbeta[v][j][d], optsc);
7806 	    fail_flag = TRUE;
7807 	  }
7808 	  if(Rsc > tol) {
7809 	    printf("Check R failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7810 		   v, j, d, Ralpha[v][j][d], Rbeta[v][j][d], Ralpha[v][j][d] + Rbeta[v][j][d], optsc);
7811 	    fail_flag = TRUE;
7812 	  }
7813 	  if(cm->sttype[v] == B_st && Tsc > tol) {
7814 	    printf("Check 1 T failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
7815 		   v, j, d, ins_mx->Tdp[v][j][d], Tbeta[v][j][d], ins_mx->Tdp[v][j][d] + Tbeta[v][j][d], optsc);
7816 	    fail_flag = TRUE;
7817 	  }
7818 	}
7819       }
7820     }
7821     if(fail_flag) for(j = 1; j <= L; j++) printf("dsq[%4d]: %4d\n", j, dsq[j]);
7822   }
7823 
7824 #if eslDEBUGLEVEL >= 2
7825   /* Uncomment to dump matrix to file. Careful...this could be very large. */
7826   /* FILE *fp1; fp1 = fopen("tmp.tru_omx", "w");   cm_tr_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
7827 #endif
7828 
7829   if(do_check) {
7830     if  (fail_flag) ESL_FAIL(eslFAIL, errbuf, "Tr Inside/Outside check FAILED.");
7831     ESL_DPRINTF1(("#DEBUG: SUCCESS! Tr Inside/Outside check PASSED.\n"));
7832     /*printf("SUCCESS! Tr Inside/Outside check PASSED.\n");*/
7833   }
7834 
7835   if     (preset_mode == TRMODE_J) optsc = Jalpha[0][L][L];
7836   else if(preset_mode == TRMODE_L) optsc = Lalpha[0][L][L];
7837   else if(preset_mode == TRMODE_R) optsc = Ralpha[0][L][L];
7838   else if(preset_mode == TRMODE_T) optsc = Talpha[0][L][L];
7839   ESL_DPRINTF1(("#DEBUG: \tcm_TrOutsideAlign() sc : %f (sc is from Inside!)\n", optsc));
7840 
7841   return eslOK;
7842 }
7843 
7844 
7845 /* Function: cm_TrOutsideAlignHB()
7846  * Date:     EPN, Tue Oct 11 09:13:17 2011
7847  *
7848  * Purpose: Run the truncated outside algorithm. HMM banded version.
7849  *           See cm_TrOutsideAlign() for the non-banded version. The
7850  *           full target sequence 1..L is aligned.
7851  *
7852  *           A CM_TR_HB_MX DP matrix must be passed in.  Very similar to
7853  *           cm_TrCYKOutsideAlignHB() but calculates summed log probs of
7854  *           all likely parses instead of the most likely parse.
7855  *           i.e. uses log sum operations instead of max's.  Meaning of
7856  *           cells:
7857  *
7858  *           Jbeta[v][jp_v][dp_v]: summed log prob of all parsetrees that
7859  *                           emit 1..i-1 and j+1..L and pass through
7860  *                           v in Joint marginal mode at j,d.
7861  *           Lbeta[v][jp_v][dp_v]: summed log prob of all parsetrees that
7862  *                           emit 1..i-1 and j+1..L and pass through
7863  *                           v in Left marginal mode at j,d.
7864  *           Rbeta[v][jp_v][dp_v]: summed log prob of all parsetrees that
7865  *                           emit 1..i-1 and j+1..L and pass through
7866  *                           v in Right marginal mode at j,d.
7867  *
7868  *           Where jp_v = j-jmin[v] and dp_v = d-hdmin[v][jp_v];
7869  *
7870  * Args:     cm          - the model
7871  *           errbuf      - char buffer for reporting errors
7872  *           dsq         - the digitized sequence
7873  *           L           - length of the dsq to align
7874  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
7875  *           preset_mode - TRMODE_J, TRMODE_L, TRMODE_R, or TRMODE_T, the pre-determined
7876  *                         alignment mode, we'll only allow alignments in this mode.
7877  *           pass_idx    - pipeline pass index, indicates what truncation penalty to use
7878  *           do_check    - TRUE to attempt to check
7879  *           mx          - the dp matrix, only cells within bands in cp9b will be valid
7880  *           ins_mx      - the dp matrix from the Inside run calculation (required)
7881  *
7882  * Returns:  <eslOK> on success
7883  *
7884  * Throws:   <eslERANGE> if required CM_TR_HB_MX size exceeds <size_limit>
7885  *           <eslFAIL>   if <do_check>==TRUE and we fail a test
7886  *           In either of these cases, alignment has been aborted.
7887  */
7888 int
cm_TrOutsideAlignHB(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int L,float size_limit,char preset_mode,int pass_idx,int do_check,CM_TR_HB_MX * mx,CM_TR_HB_MX * ins_mx)7889 cm_TrOutsideAlignHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx,
7890 		    int do_check, CM_TR_HB_MX *mx, CM_TR_HB_MX *ins_mx)
7891 {
7892   int      status;
7893   int      v,y,z;	       /* indices for states */
7894   float    Jsc,Lsc,Rsc,Tsc;    /* temporary variables holding a float score */
7895   int      j,d,i,k;	       /* indices in sequence dimensions */
7896   float  **esc_vAA;            /* ptr to cm->oesc, optimized emission scores */
7897   float    optsc;              /* optimal score in <preset_mode>, from Inside */
7898   float    escore;	       /* an emission score, tmp variable */
7899   int      voffset;	       /* index of v in t_v(y) transition scores */
7900   int      emitmode;           /* EMITLEFT, EMITRIGHT, EMITPAIR, EMITNONE, for state y */
7901   int      sd;                 /* StateDelta(cm->sttype[y]) */
7902   int      sdl;                /* StateLeftDelta(cm->sttype[y] */
7903   int      sdr;                /* StateRightDelta(cm->sttype[y] */
7904 
7905   /* variables used only if do_check */
7906   int      fail_flag = FALSE; /* set to TRUE if do_check and we see a problem */
7907   int      vmax;              /* i, offset in the matrix */
7908   float    tol;                /* tolerance for differences in bit scores */
7909 
7910   /* band related variables */
7911   int      dp_v;               /* d index for state v in alpha w/mem eff bands */
7912   int      dp_y;               /* d index for state y in alpha w/mem eff bands */
7913   int      kp_z;               /* k (in the d dim) index for state z in alpha w/mem eff bands */
7914   int      Lp;                 /* L index also changes depending on state */
7915   int      jp_v, jp_y, jp_z;   /* offset j index for states v, y, z */
7916   int      kmin, kmax;         /* temporary minimum/maximum allowed k */
7917   int      jn, jx;             /* current minimum/maximum j allowed */
7918   int      dn, dx;             /* current minimum/maximum d allowed */
7919   int      jp_0;               /* L offset in ROOT_S's (v==0) j band */
7920   int      Lp_0;               /* L offset in ROOT_S's (v==0) d band */
7921 
7922   /* variables related to truncated alignment (not in cm_CYKInsideAlignHB() */
7923   int      fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
7924   int      do_J_v, do_J_y, do_J_z; /* must we fill J matrix deck for state v, y, z? */
7925   int      do_L_v, do_L_y, do_L_z; /* must we fill L matrix deck for state v, y, z? */
7926   int      do_R_v, do_R_y, do_R_z; /* must we fill R matrix deck for state v, y, z? */
7927   int      do_T_v, do_T_y;         /* is T matrix valid for state v, y?    */
7928   int      pty_idx;                /* index for truncation penalty, determined by pass_idx */
7929   float    trpenalty;              /* truncation penalty, differs based on pty_idx and if we're local or global */
7930 
7931   /* DP matrix variables */
7932   float ***Jbeta   = mx->Jdp;     /* pointer to the outside Jbeta DP matrix */
7933   float ***Lbeta   = mx->Ldp;     /* pointer to the outside Lbeta DP matrix */
7934   float ***Rbeta   = mx->Rdp;     /* pointer to the outside Rbeta DP matrix */
7935   float ***Tbeta   = mx->Tdp;     /* pointer to the outside Tbeta DP matrix */
7936 
7937   float ***Jalpha  = ins_mx->Jdp; /* pointer to the precalc'ed inside Jalpha DP matrix */
7938   float ***Lalpha  = ins_mx->Ldp; /* pointer to the precalc'ed inside Lalpha DP matrix */
7939   float ***Ralpha  = ins_mx->Rdp; /* pointer to the precalc'ed inside Ralpha DP matrix */
7940   float ***Talpha  = ins_mx->Tdp; /* pointer to the precalc'ed inside Talpha DP matrix, only used to possibly get optsc */
7941 
7942   /* ptrs to cp9b info, for convenience */
7943   CP9Bands_t *cp9b = cm->cp9b;
7944   int     *jmin    = cm->cp9b->jmin;
7945   int     *jmax    = cm->cp9b->jmax;
7946   int    **hdmin   = cm->cp9b->hdmin;
7947   int    **hdmax   = cm->cp9b->hdmax;
7948 
7949   /* Allocations and initializations */
7950   esc_vAA = cm->oesc;            /* a ptr to the optimized emission scores */
7951 
7952   /* Determine which matrices we need to fill in, based on <preset_mode> */
7953   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB(): preset_mode is not J, L, R, or T");
7954   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrOutsideAlignHB(), bogus mode: %d", preset_mode);
7955 
7956   /* Determine the truncation penalty index, from the pass_idx */
7957   if((pty_idx = cm_tr_penalties_IdxForPass(pass_idx)) == -1) ESL_FAIL(eslEINCOMPAT, errbuf, "cm_TrOutsideAlignHB(), unexpected pass idx: %d", pass_idx);
7958 
7959   /* grow the matrix based on the current sequence and bands */
7960   if((status = cm_tr_hb_mx_GrowTo(cm, mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
7961 
7962   /* initialize all cells of the matrix to IMPOSSIBLE */
7963   if(mx->Jncells_valid > 0)           esl_vec_FSet(mx->Jdp_mem, mx->Jncells_valid, IMPOSSIBLE);
7964   if(mx->Lncells_valid > 0 && fill_L) esl_vec_FSet(mx->Ldp_mem, mx->Lncells_valid, IMPOSSIBLE);
7965   if(mx->Rncells_valid > 0 && fill_R) esl_vec_FSet(mx->Rdp_mem, mx->Rncells_valid, IMPOSSIBLE);
7966   if(mx->Tncells_valid > 0 && fill_T) esl_vec_FSet(mx->Tdp_mem, mx->Tncells_valid, IMPOSSIBLE);
7967 
7968   /* ensure a full alignment in <preset_mode> to ROOT_S (v==0) is allowed by the bands */
7969   if      (preset_mode == TRMODE_J && (! cp9b->Jvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB() preset_mode is J but cp9b->Jvalid[0] is FALSE");
7970   else if (preset_mode == TRMODE_L && (! cp9b->Lvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB() preset_mode is L but cp9b->Lvalid[0] is FALSE");
7971   else if (preset_mode == TRMODE_R && (! cp9b->Rvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB() preset_mode is R but cp9b->Rvalid[0] is FALSE");
7972   else if (preset_mode == TRMODE_T && (! cp9b->Tvalid[0])) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB() preset_mode is T but cp9b->Tvalid[0] is FALSE");
7973 
7974   if (jmin[0] > L        || jmax[0] < L)        ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, jmin[0], jmax[0]);
7975   jp_0 = L - jmin[0];
7976   if (hdmin[0][jp_0] > L || hdmax[0][jp_0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, hdmin[0][jp_0], hdmax[0][jp_0]);
7977   Lp_0 = L - hdmin[0][jp_0];
7978 
7979   /* initialize cells in the special ROOT_S deck corresponding to full sequence alignments to 0.0 */
7980   if     (preset_mode == TRMODE_J) Jbeta[0][jp_0][Lp_0] = 0.; /* a full Joint    alignment is outside this cell */
7981   else if(preset_mode == TRMODE_L) Lbeta[0][jp_0][Lp_0] = 0.; /* a full Left     alignment is outside this cell */
7982   else if(preset_mode == TRMODE_R) Rbeta[0][jp_0][Lp_0] = 0.; /* a full Right    alignment is outside this cell */
7983   else if(preset_mode == TRMODE_T) Tbeta[0][jp_0][Lp_0] = 0.; /* a full Terminal alignment is outside this cell */
7984   else ESL_FAIL(eslEINVAL, errbuf, "cm_TrOutsideAlignHB() preset_mode %d is invalid", preset_mode);
7985 
7986   /* set cells corresponding to legal truncated begin entry states to
7987    * the appropriate penalty. In truncated alignment the only way out
7988    * of ROOT_S in local or global mode is via a 'truncated begin' with
7989    * a score (penalty) from cm->trp into any emitting state. The
7990    * penalty was calculated in cm_tr_penalties_Create() and differs
7991    * depending on whether we are in local or global mode and the value
7992    * of 'pty_idx' which was passed in.
7993    */
7994   for(v = 0; v < cm->M; v++) {
7995     if((L >= jmin[v]) && (L <= jmax[v])) {
7996       jp_v = L - jmin[v];
7997       if((L >= hdmin[v][jp_v]) && L <= hdmax[v][jp_v]) {
7998 	Lp = L - hdmin[v][jp_v];
7999 
8000 	trpenalty = (cm->flags & CMH_LOCAL_BEGIN) ? cm->trp->l_ptyAA[pty_idx][v] : cm->trp->g_ptyAA[pty_idx][v];
8001 	if(NOT_IMPOSSIBLE(trpenalty)) {
8002 	  do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
8003 	  do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
8004 	  do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
8005 	  do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
8006 	  if(preset_mode == TRMODE_J && do_J_v) Jbeta[v][jp_v][Lp] = trpenalty; /* a full Joint alignment is outside this cell */
8007 	  if(preset_mode == TRMODE_L && do_L_v) Lbeta[v][jp_v][Lp] = trpenalty; /* a full Left  alignment is outside this cell */
8008 	  if(preset_mode == TRMODE_R && do_R_v) Rbeta[v][jp_v][Lp] = trpenalty; /* a full Right alignment is outside this cell */
8009 	  if(preset_mode == TRMODE_T && do_T_v && cm->sttype[v] == B_st) {
8010 	    Tbeta[v][jp_v][Lp] = trpenalty; /* a full Terminal alignment is outside this cell */
8011 	  }
8012 	}
8013       }
8014     }
8015   }
8016   /* done allocation/initialization */
8017 
8018   /* Recursion: main loop down through the decks */
8019   for (v = 1; v < cm->M; v++) { /* start at state 1 because we set all values for ROOT_S state 0 above */
8020     if(! StateIsDetached(cm, v)) {
8021       sd  = StateDelta(cm->sttype[v]);
8022       sdr = StateRightDelta(cm->sttype[v]);
8023       do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
8024       do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
8025       do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
8026       do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
8027 
8028       /* if the v deck is invalid in J, L R and T mode, all states for v will remain impossible */
8029       if(! (do_J_v || do_L_v || do_R_v || do_T_v)) continue;
8030 
8031       if (cm->stid[v] == BEGL_S) { /* BEGL_S */
8032 	y = cm->plast[v];	/* the parent bifurcation    */
8033 	z = cm->cnum[y];	/* the other (right) S state */
8034 
8035 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
8036 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
8037 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
8038 	do_T_y = cp9b->Tvalid[y] && fill_T ? TRUE : FALSE; /* will be FALSE, y is not a B_st */
8039 
8040 	do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
8041 	do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
8042 	do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
8043 
8044 	for (j = jmax[v]; j >= jmin[v]; j--) {
8045 	  ESL_DASSERT1((j >= 0 && j <= L));
8046 	  jp_v = j - jmin[v];
8047 	  jp_y = j - jmin[y];
8048 	  jp_z = j - jmin[z];
8049 	  i = j-d+1;
8050 	  for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
8051 	    dp_v = d - hdmin[v][jp_v];
8052 	    /* Find the first k value that implies a valid cell in the y and z decks.
8053 	     * This k must satisfy the following 8 inequalities (some may be redundant):
8054 	     * NOTE: these are different from those in Inside() (for one thing, v and y
8055 	     *       (BEGL_S and BIF_B here respectively) are switched relative to Inside.
8056 	     *
8057 	     * (1) k <= jmax[y] - j;
8058 	     * (2) k >= jmin[y] - j;
8059 	     * (3) k <= jmax[z] - j;
8060 	     * (4) k >= jmin[z] - j;
8061 	     *     1 and 2 guarantee (j+k) is within state y's j band
8062 	     *     3 and 4 guarantee (j+k) is within state z's j band
8063 	     *
8064 	     * (5) k >= hdmin[y][j-jmin[y]+k] - d;
8065 	     * (6) k <= hdmax[y][j-jmin[y]+k] - d;
8066 	     *     5 and 6 guarantee k+d is within y's j=(j+k), d band
8067 	     *
8068 	     * (7) k >= hdmin[z][j-jmin[z]+k];
8069 	     * (8) k <= hdmax[z][j-jmin[z]+k];
8070 	     *     5 and 6 guarantee k is within state z's j=(j+k) d band
8071 	     */
8072 	    kmin = ESL_MAX(jmin[y], jmin[z]) - j;
8073 	    kmax = ESL_MIN(jmax[y], jmax[z]) - j;
8074 	    /* kmin and kmax satisfy inequalities (1-4) */
8075 	    /* RHS of inequalities 5-8 are dependent on k, so we check
8076 	     * for these within the next for loop. */
8077 	    for(k = kmin; k <= kmax; k++) {
8078 	      if(k < (hdmin[y][jp_y+k] - d) || k > (hdmax[y][jp_y+k] - d)) continue;
8079 	      /* above line continues if inequality 5 or 6 is violated */
8080 	      if(k < (hdmin[z][jp_z+k])     || k > (hdmax[z][jp_z+k]))     continue;
8081 	      /* above line continues if inequality 7 or 8 is violated */
8082 
8083 	      /* if we get here for current k, all 8 inequalities have been satisified
8084 	       * so we know the cells corresponding to the platonic
8085 	       * matrix cells alpha[v][j][d], alpha[y][j+k][d+k], and
8086 	       * alpha[z][j+k][k] are all within the bands. These
8087 	       * cells correspond to beta[v][jp_v][dp_v],
8088 	       * beta[y][jp_y+k][d-hdmin[y][jp_y+k]+k],
8089 	       * and alpha[z][jp_z][k-hdmin[z][jp_z+k]];
8090 	       */
8091 	      kp_z = k-hdmin[z][jp_z+k];
8092 	      dp_y = d-hdmin[y][jp_y+k];
8093 
8094 	      if(do_J_v && do_J_y && do_J_z) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y+k][dp_y+k] + Jalpha[z][jp_z+k][kp_z]); /* A */
8095 	      if(do_J_v && do_L_y && do_L_z) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y+k][dp_y+k] + Lalpha[z][jp_z+k][kp_z]); /* B */
8096 	      if(do_R_v && do_R_y && do_J_z) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y+k][dp_y+k] + Jalpha[z][jp_z+k][kp_z]); /* C */
8097 	      if(d == j && (j+k) == L &&
8098 		 do_R_v && do_T_y && do_L_z) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Tbeta[y][jp_y+k][dp_y+k] + Lalpha[z][jp_z+k][kp_z]); /* D */
8099 	      /* Note: Tbeta[y][j+k==L][d+k==L] will be 0.0 because it
8100 	       * was initialized that way. That T cell includes the
8101 	       * full target 1..L (any valid T alignment must because
8102 	       * we must account for the full target) rooted at a B
8103 	       * state, and a transition from that B state to this
8104 	       * BEGL_S is always probability 1.0.
8105 	       */
8106 	    } /* end of for k loop */
8107 	  } /* end of for d loop */
8108 	} /* end of for j loop */
8109 	/* Two more special cases in truncated alignment, we have to
8110 	 * do these within their own for j and for d loops because j
8111 	 * and d has different restrictions than it does in the
8112 	 * above for j and for d loops we just closed.
8113 	 */
8114 	if(do_L_y && (do_J_v || do_L_v)) {
8115 	  jn = ESL_MAX(jmin[v], jmin[y]);
8116 	  jx = ESL_MIN(jmax[v], jmax[y]);
8117 	  for (j = jx; j >= jn; j--) {
8118 	    jp_v = j - jmin[v];
8119 	    jp_y = j - jmin[y];
8120 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
8121 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
8122 	    for (d = dx; d >= dn; d--) {
8123 	      dp_v = d-hdmin[v][jp_v];
8124 	      dp_y = d-hdmin[y][jp_y];
8125 	      if(do_J_v) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y]); /* entire sequence on left, no sequence on right, k == 0 */
8126 	      if(do_L_v) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y]); /* entire sequence on left, no sequence on right, k == 0 */
8127 	    }
8128 	  }
8129 	}
8130       } /* end of 'if (cm->stid[v] == BEGL_S */
8131       else if (cm->stid[v] == BEGR_S) {
8132 	y = cm->plast[v];   /* the parent bifurcation    */
8133 	z = cm->cfirst[y];  /* the other (left) S state  */
8134 
8135 	do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
8136 	do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
8137 	do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
8138 	do_T_y = cp9b->Tvalid[y] && fill_T ? TRUE : FALSE;
8139 
8140 	do_J_z = cp9b->Jvalid[z]           ? TRUE : FALSE;
8141 	do_L_z = cp9b->Lvalid[z] && fill_L ? TRUE : FALSE;
8142 	do_R_z = cp9b->Rvalid[z] && fill_R ? TRUE : FALSE;
8143 
8144 	jn = ESL_MAX(jmin[v], jmin[y]);
8145 	jx = ESL_MIN(jmax[v], jmax[y]);
8146 	for (j = jx; j >= jn; j--) {
8147 	  ESL_DASSERT1((j >= 0 && j <= L));
8148 	  jp_v = j - jmin[v];
8149 	  jp_y = j - jmin[y];
8150 	  jp_z = j - jmin[z];
8151 
8152 	  dn = ESL_MAX(hdmin[v][jp_v], j-jmax[z]);
8153 	  dx = ESL_MIN(hdmax[v][jp_v], jp_z);
8154 	  /* above makes sure that j,d are valid for state z: (jmin[z] + d) >= j >= (jmax[z] + d) */
8155 	  i = j-dx+1;
8156 	  for (d = dx; d >= dn; d--, i++) {
8157 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
8158 	    /* Find the first k value that implies a valid cell in the y and z decks.
8159 	     * This k must satisfy the following 4 inequalities (some may be redundant):
8160 	     * NOTE: these are different from those in Inside() (for one thing, v and y
8161 	     *       (BEGR_S and BIF_B here respectively) are switched relative to Inside.
8162 	     *
8163 	     * (1) k >= hdmin[y][j-jmin[y]] - d;
8164 	     * (2) k <= hdmax[y][j-jmin[y]] - d;
8165 	     *     1 and 2 guarantee (d+k) is within state y's j=(j) d band
8166 	     *
8167 	     * (3) k >= hdmin[z][j-jmin[z]-d];
8168 	     * (4) k <= hdmax[z][j-jmin[z]-d];
8169 	     *     3 and 4 guarantee k is within z's j=(j-d) d band
8170 	     *
8171 	     */
8172 	    kmin = ESL_MAX((hdmin[y][jp_y]-d), (hdmin[z][jp_z-d]));
8173 	    kmax = ESL_MIN((hdmax[y][jp_y]-d), (hdmax[z][jp_z-d]));
8174 	    /* kmin and kmax satisfy inequalities (1-4) */
8175 	    for(k = kmin; k <= kmax; k++) {
8176 	      /* for current k, all 4 inequalities have been satisified
8177 	       * so we know the cells corresponding to the platonic
8178 	       * matrix cells beta[v][j][d], beta[y][j][d+k], and
8179 	       * alpha[z][j-d][k] are all within the bands. These
8180 	       * cells correspond to beta[v][jp_v][dp_v],
8181 	       * beta[y][jp_y+k][d-hdmin[y][jp_y]+k],
8182 	       * and alpha[z][jp_z-d][k-hdmin[z][jp_z-d]];
8183 	       */
8184 	      kp_z = k-hdmin[z][jp_z-d];
8185 	      dp_y = d-hdmin[y][jp_y];
8186 
8187 	      if(do_J_v && do_J_y && do_J_z) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y][dp_y+k] + Jalpha[z][jp_z-d][kp_z]); /* A */
8188 	      if(do_J_v && do_R_y && do_R_z) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y+k] + Ralpha[z][jp_z-d][kp_z]); /* C */
8189 	      if(do_L_v && do_L_y && do_J_z) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+k] + Jalpha[z][jp_z-d][kp_z]); /* B */
8190 	      if(k == (i-1) && j == L &&
8191 		 do_L_v && do_T_y && do_R_z) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Tbeta[y][jp_y][dp_y+k] + Ralpha[z][jp_z-d][kp_z]); /* D */
8192 	      /* Note: Tbeta[y][j==L][d+k==L] will be 0.0 because it
8193 	       * was initialized that way. That T cell includes the
8194 	       * full target 1..L (any valid T alignment must because
8195 	       * we must account for the full target) rooted at a B
8196 	       * state, and a transition from that B state to this
8197 	       * BEGR_S is always probability 1.0.
8198 	       */
8199 	    } /* end of for k loop */
8200 	  } /* end of for d loop */
8201 	  /* Two more special cases in truncated alignment, we have to
8202 	   * do these within their own for d loop because d has
8203 	   * different restrictions than it does in the above for d
8204 	   * loop we just closed. j's restrictions are the same
8205 	   * though, so we stay inside the for j loop.
8206 	   */
8207 	  if(do_R_y && (do_J_v || do_R_v)) {
8208 	    dn = ESL_MAX(hdmin[v][jp_v], hdmin[y][jp_y]);
8209 	    dx = ESL_MIN(hdmax[v][jp_v], hdmax[y][jp_y]);
8210 	    for (d = dx; d >= dn; d--) {
8211 	      dp_v = d-hdmin[v][jp_v];
8212 	      dp_y = d-hdmin[y][jp_y];
8213 	      if(do_J_v) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y]); /* entire sequence on right, no sequence on left, k == 0 */
8214 	      if(do_R_v) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y]); /* entire sequence on right, no sequence on left, k == 0 */
8215 	    }
8216 	  }
8217 	} /* end of for j loop */
8218       } /* end of 'else if (cm->stid[v] == BEGR_S */
8219       else { /* (cm->sttype[v] != BEGL_S && cm->sttype[v] != BEGR_S */
8220 	/* in cm_CYKOutsideAlignHB(), IL and IR states are separated
8221 	 * out from the other states at this stage because only they
8222 	 * can self-transit, making it slightly more efficient to
8223 	 * handle non-inserts differently. In truncated mode there's
8224 	 * more special cases so I've decided to collapse all states
8225 	 * together here. An analogous form of the following block is
8226 	 * used only for IL/IR states in cm_CYKOutsideAlignHB().
8227 	 *
8228 	 * ILs and IRs can self transit, this means that
8229 	 * {J,L,R}beta[v][j][d] must be fully calculated before
8230 	 * {J,L,R}beta[v][j][d+1] can be started to be calculated,
8231 	 * forcing the following nesting order: for j { for d { for y
8232 	 * { } } } for non-self-transitioners, we could do a more
8233 	 * efficient nesting order (you can see it in
8234 	 * cm_CYKOutsideAlignHB() but we don't here because truncation
8235 	 * makes it more complex.
8236 	 */
8237 	for (j = jmax[v]; j >= jmin[v]; j--) {
8238 	  ESL_DASSERT1((j >= 0 && j <= L));
8239 	  jp_v = j - jmin[v];
8240 	  for (d = hdmax[v][jp_v]; d >= hdmin[v][jp_v]; d--) {
8241 	    i = j-d+1;
8242 	    dp_v = d - hdmin[v][jp_v];  /* d index for state v in alpha w/mem eff bands */
8243 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
8244 	      /* mind the following sneaky if statement: in truncated
8245 	       * aln, the only way out of state 0 is through a
8246 	       * truncated begin, which we handled above (search for
8247 	       * 'trpenalty'). If we're in local mode transitions out
8248 	       * of 0 will have IMPOSSIBLE scores, but NOT if we're in
8249 	       * glocal mode, so we need this 'if'.
8250 	       */
8251 	      if(y != 0) {
8252 		voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
8253 		sd  = StateDelta(cm->sttype[y]);
8254 		sdl = StateLeftDelta(cm->sttype[y]);
8255 		sdr = StateRightDelta(cm->sttype[y]);
8256 
8257 		do_J_y = cp9b->Jvalid[y]           ? TRUE : FALSE;
8258 		do_L_y = cp9b->Lvalid[y] && fill_L ? TRUE : FALSE;
8259 		do_R_y = cp9b->Rvalid[y] && fill_R ? TRUE : FALSE;
8260 		do_T_y = cp9b->Tvalid[y] && fill_T ? TRUE : FALSE; /* will be FALSE, y is not a B_st */
8261 
8262 		/* if the y deck is invalid in J, L and R mode, we don't have to update v based on transitions from y */
8263 		if (! (do_J_y || do_L_y || do_R_y)) continue;
8264 
8265 		/* Note: this looks like it can be optimized, I tried but my 'optimization' slowed the code, so I reverted [EPN] */
8266 		switch(cm->sttype[y]) {
8267 		case MP_st:
8268 		  jp_y = j - jmin[y];
8269 		  if(j != L && d != j &&                                           /* boundary condition */
8270 		     do_J_v && do_J_y &&                                           /* J deck is valid for v and y */
8271 		     (j+sdr >= jmin[y]            && j+sdr <= jmax[y]) &&          /* j+sdr is within y's j band */
8272 		     (d+sd  >= hdmin[y][jp_y+sdr] && d+sd  <= hdmax[y][jp_y+sdr])) /* d+sd  is within y's d band for j+sdr */
8273 		    {
8274 		      dp_y = d - hdmin[y][jp_y+sdr];  /* d index for state y */
8275 		      escore = esc_vAA[y][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
8276 		      Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y+sdr][dp_y+sd] + cm->tsc[y][voffset] + escore);
8277 		    }
8278 		  if(j == L && d != j &&                                           /* boundary condition, only allow transition from L if we haven't emitted any residues rightwise (j==L) */
8279 		     do_L_y &&                                                     /* L deck is valid for y */
8280 		     (j     >= jmin[y]        && j     <= jmax[y]) &&              /* j is within y's j band */
8281 		     (d+sdl >= hdmin[y][jp_y] && d+sdl <= hdmax[y][jp_y]))         /* d+sdl is within y's d band for j */
8282 		    {
8283 		      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
8284 		      escore = cm->lmesc[y][dsq[i-1]];
8285 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+sdl] + cm->tsc[y][voffset] + escore);
8286 		      if(do_L_v) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+sdl] + cm->tsc[y][voffset] + escore);
8287 		    }
8288 		  if(i == 1 && j != L &&                                           /* boundary condition, only allow transition from R if we haven't emitted any residues leftwise (i==1) */
8289 		     do_R_y &&                                                     /* R deck is valid for y */
8290 		     (j+sdr >= jmin[y]            && j+sdr <= jmax[y]) &&          /* j+sdr is within y's j band */
8291 		     (d+sdr >= hdmin[y][jp_y+sdr] && d+sdr <= hdmax[y][jp_y+sdr])) /* d+sdr is within y's d band for j+sdr */
8292 		    {
8293 		      dp_y = d - hdmin[y][jp_y+sdr];  /* d index for state y */
8294 		      escore = cm->rmesc[y][dsq[j+1]];
8295 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y+sdr][dp_y+sdr] + cm->tsc[y][voffset] + escore);
8296 		      if(do_R_v) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y+sdr][dp_y+sdr] + cm->tsc[y][voffset] + escore);
8297 		    }
8298 		  break;
8299 
8300 		case ML_st:
8301 		case IL_st:
8302 		  jp_y = j - jmin[y];
8303 		  if(d != j &&                                              /* boundary case */
8304 		     (j     >= jmin[y]        && j     <= jmax[y]) &&       /* j is within y's j band */
8305 		     (d+sdl >= hdmin[y][jp_y] && d+sdl <= hdmax[y][jp_y]))  /* d+sdl is within y's d band for j */
8306 		    {
8307 		      dp_y = d - hdmin[y][jp_y];
8308 		      escore = cm->oesc[y][dsq[i-1]];
8309 		      if(do_J_v && do_J_y) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y][dp_y+sd] + cm->tsc[y][voffset] + escore);
8310 		      if(do_L_v && do_L_y) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y+sd] + cm->tsc[y][voffset] + escore);
8311 		    }
8312 		  if(i == 1 &&                                              /* boundary condition, only allow transition from R if we're emitting first residue 1 from y  */
8313 		     v != y &&                                              /* will only happen if v == IL, we don't allow silent self transitions from IL->IL */
8314 		     do_R_y &&                                              /* R deck is valid for y */
8315 		     (j     >= jmin[y]        && j     <= jmax[y]) &&       /* j is within y's j band */
8316 		     (d     >= hdmin[y][jp_y] && d     <= hdmax[y][jp_y]))  /* d+sdr(==d) is within y's d band for j */
8317 		    {
8318 		      dp_y = d - hdmin[y][jp_y];
8319 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8320 		      if(do_R_v) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8321 		    }
8322 		  break;
8323 
8324 		case MR_st:
8325 		case IR_st:
8326 		  jp_y = j - jmin[y];
8327 		  if (j != L &&                                                    /* boundary condition */
8328 		      (j+sdr >= jmin[y]            && j+sdr <= jmax[y]) &&          /* j+sdr is within y's j band */
8329 		      (d+sd  >= hdmin[y][jp_y+sdr] && d+sd  <= hdmax[y][jp_y+sdr])) /* d+sd is within y's d band for j+sdr */
8330 		    {
8331 		      dp_y = d - hdmin[y][jp_y+sdr];
8332 		      escore = cm->oesc[y][dsq[j+1]];
8333 		      if(do_J_v && do_J_y) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y+sdr][dp_y+sd] + cm->tsc[y][voffset] + escore);
8334 		      if(do_R_v && do_R_y) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y+sdr][dp_y+sd] + cm->tsc[y][voffset] + escore);
8335 		    }
8336 		  if(j == L &&                                                     /* boundary condition, only allow transition from L if we're emitting final residue L from y */
8337 		     v != y &&                                                     /* will only happen if v == IR, we don't allow silent self transitions from IR->IR */
8338 		     do_L_y &&                                                     /* L deck is valid for y */
8339 		     (j     >= jmin[y]           && j      <= jmax[y]) &&          /* j is within y's j band */
8340 		     (d     >= hdmin[y][jp_y]    && d      <= hdmax[y][jp_y]))     /* d+sdl(==d) is within y's d band for j */
8341 		    {
8342 		      dp_y = d - hdmin[y][jp_y];
8343 		      if(do_J_v) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8344 		      if(do_L_v) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8345 		    }
8346 		  break;
8347 		case S_st:
8348 		case E_st:
8349 		case D_st:
8350 		  jp_y = j - jmin[y];
8351 		  if((j >= jmin[y]        && j <= jmax[y]) &&
8352 		     (d >= hdmin[y][jp_y] && d <= hdmax[y][jp_y]))
8353 		    {
8354 		      dp_y = d - hdmin[y][jp_y];  /* d index for state y */
8355 		      if(do_J_v && do_J_y) Jbeta[v][jp_v][dp_v] = FLogsum(Jbeta[v][jp_v][dp_v], Jbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8356 		      if(do_L_v && do_L_y) Lbeta[v][jp_v][dp_v] = FLogsum(Lbeta[v][jp_v][dp_v], Lbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8357 		      if(do_R_v && do_R_y) Rbeta[v][jp_v][dp_v] = FLogsum(Rbeta[v][jp_v][dp_v], Rbeta[y][jp_y][dp_y] + cm->tsc[y][voffset]);
8358 		    }
8359 		  break;
8360 		} /* end of switch(cm->sttype[y] */
8361 	      } /* end of sneaky if y != 0 */
8362 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
8363 	    if (do_J_v && Jbeta[v][jp_v][dp_v] < IMPOSSIBLE) Jbeta[v][jp_v][dp_v] = IMPOSSIBLE;
8364 	    if (do_L_v && Lbeta[v][jp_v][dp_v] < IMPOSSIBLE) Lbeta[v][jp_v][dp_v] = IMPOSSIBLE;
8365 	    if (do_R_v && Rbeta[v][jp_v][dp_v] < IMPOSSIBLE) Rbeta[v][jp_v][dp_v] = IMPOSSIBLE;
8366 	  } /* ends loop over d. We know all beta[v][j][d] in this row j and state v */
8367 	} /* end loop over jp. We know beta for this whole state */
8368       } /* end of 'else' (entered if cm->sttype[v] != BEGL_S nor BEGR_S */
8369       /* we're done calculating deck v for everything but local ends */
8370 
8371       /* deal with local alignment end transitions v->EL (EL = deck at M.) */
8372       if ((cm->flags & CMH_LOCAL_END) && NOT_IMPOSSIBLE(cm->endsc[v])) {
8373 	sd       = StateDelta(cm->sttype[v]);      /* note sd  is for state v */
8374 	sdl      = StateLeftDelta(cm->sttype[v]);  /* note sdl is for state v */
8375 	sdr      = StateRightDelta(cm->sttype[v]); /* note sdr is for state v */
8376 	emitmode = Emitmode(cm->sttype[v]);        /* note emitmode is for state v */
8377 
8378 	/* we handle all three possible modes (J,L,R) differently because they have different boundary conditions */
8379 
8380 	/* J mode */
8381 	if(do_J_v && cp9b->Jvalid[cm->M]) {
8382 	  jn = jmin[v] - sdr;
8383 	  jx = jmax[v] - sdr;
8384 	  for (j = jn; j <= jx; j++) {
8385 	    jp_v = j - jmin[v];
8386 	    dn   = hdmin[v][jp_v + sdr] - sd;
8387 	    dx   = hdmax[v][jp_v + sdr] - sd;
8388 	    i    = j-dn+1;                     /* we'll decrement this in for (d... loops inside switch below */
8389 	    dp_v = dn - hdmin[v][jp_v + sdr];  /* we'll increment this in for (d... loops inside switch below */
8390 
8391 	    switch (emitmode) {
8392 	    case EMITPAIR:
8393 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
8394 		escore = esc_vAA[v][dsq[i-1]*cm->abc->Kp+dsq[j+1]];
8395 		Jbeta[cm->M][j][d] = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v] + escore));
8396 	      }
8397 	      break;
8398 	    case EMITLEFT:
8399 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
8400 		escore = esc_vAA[v][dsq[i-1]];
8401 		Jbeta[cm->M][j][d] = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v] + escore));
8402 	      }
8403 	      break;
8404 
8405 	    case EMITRIGHT:
8406 	      escore = esc_vAA[v][dsq[j+1]];
8407 	      for (d = dn; d <= dx; d++, dp_v++) {
8408 		Jbeta[cm->M][j][d] = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v] + escore));
8409 	      }
8410 	      break;
8411 
8412 	    case EMITNONE:
8413 	      for (d = dn; d <= dx; d++, dp_v++) {
8414 		Jbeta[cm->M][j][d] = FLogsum(Jbeta[cm->M][j][d], (Jbeta[v][jp_v+sdr][dp_v+sd] + cm->endsc[v]));
8415 	      }
8416 	      break;
8417 	    }
8418 	  }
8419 	}
8420 
8421 	/* L mode: again, this code is inefficient, but I chose not to try to optimize lest it get more complex */
8422 	if(do_L_v && cp9b->Lvalid[cm->M]) {
8423 	  jn = jmin[v];
8424 	  jx = jmax[v];
8425 	  for (j = jn; j <= jx; j++) {
8426 	    jp_v = j - jmin[v];
8427 	    dn   = hdmin[v][jp_v] - sdl;
8428 	    dx   = hdmax[v][jp_v] - sdl;
8429 	    i    = j-dn+1;               /* we'll decrement this in for (d... loops inside switch below */
8430 	    dp_v = dn - hdmin[v][jp_v];  /* we'll increment this in for (d... loops inside switch below */
8431 
8432 	    switch (emitmode) {
8433 	    case EMITPAIR:
8434 	      if(j == L) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
8435 		for (d = dn; d <= dx; d++, dp_v++, i--) {
8436 		  escore = cm->lmesc[v][dsq[i-1]];
8437 		  Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v+sdl] + cm->endsc[v] + escore));
8438 		}
8439 	      }
8440 	      break;
8441 
8442 	    case EMITLEFT:
8443 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
8444 		escore = esc_vAA[v][dsq[i-1]];
8445 		Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v+sdl] + cm->endsc[v] + escore));
8446 	      }
8447 	      break;
8448 
8449 	    case EMITRIGHT:
8450 	      if(j == L) { /* only allow transition from L if we haven't emitted any residues rightwise (j==L) */
8451 		for (d = dn; d <= dx; d++, dp_v++) {
8452 		  Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v] + cm->endsc[v]));
8453 		}
8454 	      }
8455 	      break;
8456 
8457 	    case EMITNONE:
8458 	      for (d = dn; d <= dx; d++, dp_v++) {
8459 		Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[v][jp_v][dp_v] + cm->endsc[v]));
8460 	      }
8461 	      break;
8462 	    }
8463 	  }
8464 	} /* end of if(do_L_v) */
8465 
8466 	/* R mode: again, this code is inefficient, but I chose not to try to optimize lest it get more complex */
8467 	if(do_R_v && cp9b->Rvalid[cm->M]) {
8468 	  jn = jmin[v] - sdr;
8469 	  jx = jmax[v] - sdr;
8470 	  for (j = jn; j <= jx; j++) {
8471 	    jp_v = j - jmin[v];
8472 	    dn   = hdmin[v][jp_v + sdr] - sdr;
8473 	    dx   = hdmax[v][jp_v + sdr] - sdr;
8474 	    i    = j-dn+1;                     /* we'll decrement this in for (d... loops inside switch below */
8475 	    dp_v = dn - hdmin[v][jp_v + sdr];  /* we'll increment this in for (d... loops inside switch below */
8476 
8477 	    switch (emitmode) {
8478 	    case EMITPAIR:
8479 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
8480 		if(i == 1) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
8481 		  escore = cm->rmesc[v][dsq[j+1]];
8482 		  Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][jp_v+sdr][dp_v+sdr] + cm->endsc[v] + escore));
8483 		}
8484 	      }
8485 	      break;
8486 	    case EMITLEFT:
8487 	      for (d = dn; d <= dx; d++, dp_v++, i--) {
8488 		if(i == 1) { /* only allow transition from R if we haven't emitted any residues leftwise (i==1) */
8489 		  Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][jp_v][dp_v] + cm->endsc[v]));
8490 		}
8491 	      }
8492 	      break;
8493 
8494 	    case EMITRIGHT:
8495 	      escore = esc_vAA[v][dsq[j+1]];
8496 	      for (d = dn; d <= dx; d++, dp_v++) {
8497 		Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][jp_v+sdr][dp_v+sdr] + cm->endsc[v] + escore));
8498 	      }
8499 	      break;
8500 
8501 	    case EMITNONE:
8502 	      for (d = dn; d <= dx; d++, dp_v++) {
8503 		Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[v][jp_v+sdr][dp_v+sdr] + cm->endsc[v]));
8504 	      }
8505 	      break;
8506 	    }
8507 	  }
8508 	} /* end of if(do_R_v) */
8509       } /* end of calculating EL scores */
8510     } /* end of if !StateIsDetached() */
8511   } /* end loop over decks v. */
8512 
8513   /* Deal with last step needed for local alignment
8514    * w.r.t. ends: left-emitting, EL->EL transitions. (EL = deck at M.)
8515    */
8516   if (cm->flags & CMH_LOCAL_END) {
8517     if(cp9b->Jvalid[cm->M]) {
8518       for (j = L; j > 0; j--) { /* careful w/ boundary here */
8519 	for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
8520 	  Jbeta[cm->M][j][d] = FLogsum(Jbeta[cm->M][j][d], (Jbeta[cm->M][j][d+1] + cm->el_selfsc));
8521 	}
8522       }
8523     }
8524     if(fill_L && cp9b->Lvalid[cm->M]) {
8525       for (j = L; j > 0; j--) { /* careful w/ boundary here */
8526 	for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
8527 	  Lbeta[cm->M][j][d] = FLogsum(Lbeta[cm->M][j][d], (Lbeta[cm->M][j][d+1] + cm->el_selfsc));
8528 	}
8529       }
8530     }
8531     if(fill_R && cp9b->Rvalid[cm->M]) {
8532       for (j = L; j > 0; j--) { /* careful w/ boundary here */
8533 	for (d = j-1; d >= 0; d--) { /* careful w/ boundary here */
8534 	  Rbeta[cm->M][j][d] = FLogsum(Rbeta[cm->M][j][d], (Rbeta[cm->M][j][d+1] + cm->el_selfsc));
8535 	}
8536       }
8537     }
8538   }
8539   fail_flag = FALSE;
8540   if(do_check) {
8541     /* Check for consistency between the Inside alpha matrix and the
8542      * Outside beta matrix. we assume the Inside CYK parse score
8543      * (optsc) is the optimal score, so for all v,j,d:
8544      *
8545      * Jalpha[v][j][d] + Jbeta[v][j][d] <= optsc
8546      * Lalpha[v][j][d] + Lbeta[v][j][d] <= optsc
8547      * Ralpha[v][j][d] + Rbeta[v][j][d] <= optsc
8548      *
8549      * We do a more extensive check in cm_TrCYKOutsideAlignHB(), but
8550      * it doesn't apply here, because we've summed all parsetrees
8551      * instead of finding only the optimal one.
8552      *
8553      * This is an expensive check and should only be done while
8554      * debugging.
8555      */
8556     vmax  = (cm->flags & CMH_LOCAL_END) ? cm->M : cm->M-1;
8557     if     (preset_mode == TRMODE_J) optsc = Jalpha[0][jp_0][Lp_0];
8558     else if(preset_mode == TRMODE_L) optsc = Lalpha[0][jp_0][Lp_0];
8559     else if(preset_mode == TRMODE_R) optsc = Ralpha[0][jp_0][Lp_0];
8560     else if(preset_mode == TRMODE_T) optsc = Talpha[0][jp_0][Lp_0];
8561     /* define bit score difference tolerance, somewhat arbitrarily:
8562      * clen <= 200: tolerance is 0.001; then a function of clen:
8563      * clen == 1000 tolerance is 0.005,
8564      * clen == 2000, tolerance is 0.01.
8565      *
8566      * I did this b/c with tests with SSU_rRNA_eukarya I noticed
8567      * failures with bit score differences up to 0.004 or so.  This
8568      * could mean a bug, but I couldn't get any average sized model to
8569      * fail with a difference above 0.001, so I blamed it on
8570      * precision. I'm not entirely convinced it isn't a bug but
8571      * until I see a failure on a smaller model it seems precision
8572      * is the most likely explanation, right?
8573      */
8574     tol = ESL_MAX(1e-3, (float) cm->clen / 200000.);
8575     for(v = 0; v <= vmax; v++) {
8576       do_J_v = cp9b->Jvalid[v]           ? TRUE : FALSE;
8577       do_L_v = cp9b->Lvalid[v] && fill_L ? TRUE : FALSE;
8578       do_R_v = cp9b->Rvalid[v] && fill_R ? TRUE : FALSE;
8579       do_T_v = cp9b->Tvalid[v] && fill_T ? TRUE : FALSE;
8580       jn = (v == cm->M) ? 1 : jmin[v];
8581       jx = (v == cm->M) ? L : jmax[v];
8582       for(j = jn; j <= jx; j++) {
8583 	jp_v = (v == cm->M) ? j : j - jmin[v];
8584 	dn   = (v == cm->M) ? 0 : hdmin[v][jp_v];
8585 	dx   = (v == cm->M) ? j : hdmax[v][jp_v];
8586 	for(d = dn; d <= dx; d++) {
8587 	  dp_v = (v == cm->M) ? d : d - hdmin[v][jp_v];
8588 	  Jsc  = (do_J_v) ? Jalpha[v][jp_v][dp_v] + Jbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
8589 	  Lsc  = (do_L_v) ? Lalpha[v][jp_v][dp_v] + Lbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
8590 	  Rsc  = (do_R_v) ? Ralpha[v][jp_v][dp_v] + Rbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
8591 	  Tsc  = (do_T_v) ? Talpha[v][jp_v][dp_v] + Tbeta[v][jp_v][dp_v] - optsc : IMPOSSIBLE;
8592 	  if(Jsc > tol) {
8593 	    printf("Check 1 J failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
8594 		   v, j, d, Jalpha[v][jp_v][dp_v], Jbeta[v][jp_v][dp_v], Jalpha[v][jp_v][dp_v] + Jbeta[v][jp_v][dp_v], optsc);
8595 	    fail_flag = TRUE;
8596 	  }
8597 	  if(Lsc > tol) {
8598 	    printf("Check 1 L failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
8599 		   v, j, d, Lalpha[v][jp_v][dp_v], Lbeta[v][jp_v][dp_v], Lalpha[v][jp_v][dp_v] + Lbeta[v][jp_v][dp_v], optsc);
8600 	    fail_flag = TRUE;
8601 	  }
8602 	  if(Rsc > tol) {
8603 	    printf("Check 1 R failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
8604 		   v, j, d, Ralpha[v][jp_v][dp_v], Rbeta[v][jp_v][dp_v], Ralpha[v][jp_v][dp_v] + Rbeta[v][jp_v][dp_v], optsc);
8605 	    fail_flag = TRUE;
8606 	  }
8607 	  if(Tsc > tol) {
8608 	    printf("Check 1 T failure: v: %4d j: %4d d: %4d (%.4f + %.4f) %.4f > %.4f\n",
8609 		   v, j, d, Talpha[v][jp_v][dp_v], Tbeta[v][jp_v][dp_v], Talpha[v][jp_v][dp_v] + Tbeta[v][jp_v][dp_v], optsc);
8610 	    fail_flag = TRUE;
8611 	  }
8612 	}
8613       }
8614     }
8615   }
8616   if(fail_flag) for(j = 1; j <= L; j++) printf("dsq[%4d]: %4d\n", j, dsq[j]);
8617 
8618 #if eslDEBUGLEVEL >= 2
8619   /* Uncomment to dump matrix to file. Careful...this could be very large. */
8620   /* FILE *fp1; fp1 = fopen("tmp.tru_ohbmx", "w");   cm_tr_hb_mx_Dump(fp1, mx, preset_mode, TRUE); fclose(fp1); */
8621 #endif
8622 
8623   if(do_check) {
8624     if(fail_flag) ESL_FAIL(eslFAIL, errbuf, "Tr Inside/Outside HB check FAILED.");
8625     ESL_DPRINTF1(("#DEBUG: SUCCESS! Tr Inside/Outside HB check PASSED.\n"));
8626     printf("SUCCESS! Tr Inside/Outside HB check PASSED.\n");
8627   }
8628 
8629   if     (preset_mode == TRMODE_J) optsc = Jalpha[0][jp_0][Lp_0];
8630   else if(preset_mode == TRMODE_L) optsc = Lalpha[0][jp_0][Lp_0];
8631   else if(preset_mode == TRMODE_R) optsc = Ralpha[0][jp_0][Lp_0];
8632   else if(preset_mode == TRMODE_T) optsc = Talpha[0][jp_0][Lp_0];
8633   ESL_DPRINTF1(("#DEBUG: \tcm_TrOutsideAlignHB() sc : %f (sc is from Inside!)\n", optsc));
8634 
8635   return eslOK;
8636 }
8637 
8638 /* Function: cm_TrPosterior()
8639  * Date:     EPN, Tue Sep 13 16:18:25 2011
8640  * Note:     based on Ian Holmes' P7EmitterPosterior() from HMMER's 2.x postprob.c
8641  *
8642  * Purpose: Combines non-banded cm_TrInside and cm_TrOutside matrices
8643  *           into a posterior probability matrix. The value in
8644  *           post->{J,L,R}[v][j][d] is the log of the posterior
8645  *           probability of a parse subtree rooted at v emitting the
8646  *           subsequence i..j (i=j-d+1) and being in J, L, or R mode
8647  *           at at state v.  The caller must provide a <post> float
8648  *           matrix, but this matrix may be the same matrix as that
8649  *           provided as Outside <out_mx>, (overwriting it will not
8650  *           compromise the algorithm).
8651  *
8652  * Args:     cm          - the model
8653  *           errbuf      - char buffer for reporting errors
8654  *           L           - length of the target sequence we're aligning
8655  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
8656  *           preset_mode - mode of alignment: TRMODE_J, TRMODE_L, TRMODE_R, or TRMODE_T
8657  *           ins_mx      - pre-calculated Inside matrix
8658  *           out_mx      - pre-calculated Outside matrix
8659  *           post_mx     - pre-allocated matrix for Posteriors
8660  *
8661  * Return:   eslOK on success, eslEINCOMPAT on contract violation
8662  */
8663 int
cm_TrPosterior(CM_t * cm,char * errbuf,int L,float size_limit,char preset_mode,CM_TR_MX * ins_mx,CM_TR_MX * out_mx,CM_TR_MX * post_mx)8664 cm_TrPosterior(CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode,
8665 	       CM_TR_MX *ins_mx, CM_TR_MX *out_mx, CM_TR_MX *post_mx)
8666 {
8667   int   status;   /* Easel status code */
8668   int   v;        /* state index */
8669   int   j;        /* position */
8670   int   d;        /* subsequence length */
8671   float sc;       /* optimal Inside score */
8672   int   fill_L, fill_R, fill_T; /* should we fill-in values for L, R, T? (we always fill in J) */
8673 
8674   /* Determine which matrices we need to fill-in, and the optimal score */
8675   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPosterior(): preset_mode is not J, L, R, or T");
8676   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrPosterior, bogus preset_mode: %d", preset_mode);
8677   if(preset_mode == TRMODE_J) sc = ins_mx->Jdp[0][L][L];
8678   if(preset_mode == TRMODE_L) sc = ins_mx->Ldp[0][L][L];
8679   if(preset_mode == TRMODE_R) sc = ins_mx->Rdp[0][L][L];
8680   if(preset_mode == TRMODE_T) sc = ins_mx->Tdp[0][L][L];
8681 
8682   /* grow our post matrix, but only if isn't also our out_mx in which
8683    * case we know we're already big enought (also in that case we
8684    * don't want to call GrowTo b/c it can potentially free the DP
8685    * matrix memory and reallocate it, which would be bad b/c we
8686    * need the out_mx!)
8687    */
8688   if(post_mx != out_mx) {
8689     if((status = cm_tr_mx_GrowTo(cm, post_mx, errbuf, L, size_limit)) != eslOK) return status;
8690   }
8691 
8692   /* If local ends are on, start with the EL state (cm->M), otherwise
8693    * it's not a valid deck. */
8694   if(cm->flags & CMH_LOCAL_END) {
8695     for (j = 0; j <= L; j++) {
8696       for (d = 0; d <= j; d++) {
8697 	post_mx->Jdp[cm->M][j][d] = ins_mx->Jdp[cm->M][j][d] + out_mx->Jdp[cm->M][j][d] - sc;
8698       }
8699     }
8700     if(fill_L) {
8701       for (j = 0; j <= L; j++) {
8702 	for (d = 0; d <= j; d++) {
8703 	  post_mx->Ldp[cm->M][j][d] = ins_mx->Ldp[cm->M][j][d] + out_mx->Ldp[cm->M][j][d] - sc;
8704 	}
8705       }
8706     }
8707     if(fill_R) {
8708       for (j = 0; j <= L; j++) {
8709 	for (d = 0; d <= j; d++) {
8710 	  post_mx->Rdp[cm->M][j][d] = ins_mx->Rdp[cm->M][j][d] + out_mx->Rdp[cm->M][j][d] - sc;
8711 	}
8712       }
8713     }
8714   }
8715 
8716   /* Fill in the rest of the matrices */
8717   for (v = cm->M-1; v >= 0; v--) {
8718     for (j = 0; j <= L; j++) {
8719       for (d = 0; d <= j; d++) {
8720 	post_mx->Jdp[v][j][d] = ins_mx->Jdp[v][j][d] + out_mx->Jdp[v][j][d] - sc;
8721       }
8722     }
8723   }
8724   if (fill_L) {
8725     for (v = cm->M-1; v >= 0; v--) {
8726       for (j = 0; j <= L; j++) {
8727 	for (d = 0; d <= j; d++) {
8728 	  post_mx->Ldp[v][j][d] = ins_mx->Ldp[v][j][d] + out_mx->Ldp[v][j][d] - sc;
8729 	}
8730       }
8731     }
8732   }
8733   if (fill_R) {
8734     for (v = cm->M-1; v >= 0; v--) {
8735       for (j = 0; j <= L; j++) {
8736 	for (d = 0; d <= j; d++) {
8737 	  post_mx->Rdp[v][j][d] = ins_mx->Rdp[v][j][d] + out_mx->Rdp[v][j][d] - sc;
8738 	}
8739       }
8740     }
8741   }
8742   if (fill_T) {
8743     for (v = cm->M-1; v >= 0; v--) {
8744       if (v == 0 || cm->sttype[v] == B_st) {
8745 	for (j = 0; j <= L; j++) {
8746 	  for (d = 0; d <= j; d++) {
8747 	    post_mx->Tdp[v][j][d] = ins_mx->Tdp[v][j][d] + out_mx->Tdp[v][j][d] - sc;
8748 	  }
8749 	}
8750       }
8751     }
8752   }
8753 #if eslDEBUGLEVEL >= 2
8754   /* Uncomment to dump matrix to file. Careful...this could be very large. */
8755   /* FILE *fp1; fp1 = fopen("tmp.tru_pmx", "w");   cm_tr_mx_Dump(fp1, post_mx, preset_mode, TRUE); fclose(fp1); */
8756 #endif
8757 
8758   return eslOK;
8759 }
8760 
8761 
8762 /* Function: cm_TrPosteriorHB()
8763  * Date:     EPN, Tue Oct 11 09:24:07 2011
8764  * Note:     based on Ian Holmes' P7EmitterPosterior() from HMMER's 2.x postprob.c
8765  *
8766  * Purpose: Combines HMM banded cm_TrInside and cm_TrOutside matrices
8767  *           into a posterior probability matrix. Any cells outside of
8768  *           the HMM bands do not exist in memory. The value in
8769  *           post->{J,L,R}[v][jp_v][dp_v] is the log of the posterior
8770  *           probability of a parse subtree rooted at v emitting the
8771  *           subsequence i..j (i=j-d+1) and being in J, L, or R mode
8772  *           at at state v, with jp_v = j-jmin[v] and dp_v =
8773  *           d-hdmin[v][jp_v].  The caller must provide a <post> float
8774  *           matrix, but this matrix may be the same matrix as that
8775  *           provided as Outside <out_mx>, (overwriting it will not
8776  *           compromise the algorithm).
8777  *
8778  * Args:     cm          - the model
8779  *           errbuf      - char buffer for reporting errors
8780  *           L           - length of the target sequence we're aligning
8781  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
8782  *           preset_mode - mode of alignment: TRMODE_J, TRMODE_L, TRMODE_R, or TRMODE_T
8783  *           ins_mx      - pre-calculated Inside matrix
8784  *           out_mx      - pre-calculated Outside matrix
8785  *           post_mx     - pre-allocated matrix for Posteriors
8786  *
8787  * Return:   <eslOK> on success.
8788  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>
8789  *           <eslEINVAL> if the full sequence is not within the bands for state 0
8790  *           In either case the post_mx is not filled
8791  */
8792 int
cm_TrPosteriorHB(CM_t * cm,char * errbuf,int L,float size_limit,char preset_mode,CM_TR_HB_MX * ins_mx,CM_TR_HB_MX * out_mx,CM_TR_HB_MX * post_mx)8793 cm_TrPosteriorHB(CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode,
8794 		 CM_TR_HB_MX *ins_mx, CM_TR_HB_MX *out_mx, CM_TR_HB_MX *post_mx)
8795 {
8796   int   status;   /* Easel status code */
8797   int   v;        /* state index */
8798   int   j;        /* position */
8799   int   d;        /* subsequence length */
8800   int   jp_v;     /* j offset in HMM banded matrix */
8801   int   dp_v;     /* d offset in HMM banded matrix */
8802   int   jx;       /* max j */
8803   int   dx;       /* max d */
8804   int   jp_0;     /* L offset in ROOT_S's (v==0) j band */
8805   int   Lp_0;     /* L offset in ROOT_S's (v==0) d band */
8806   float sc;       /* optimal Inside score */
8807   int   fill_L, fill_R, fill_T; /* must we fill in the L, R, and T matrices? */
8808 
8809   /* ptrs to cp9b info, for convenience */
8810   CP9Bands_t *cp9b = cm->cp9b;
8811   int     *jmin  = cp9b->jmin;
8812   int     *jmax  = cp9b->jmax;
8813   int    **hdmin = cp9b->hdmin;
8814   int    **hdmax = cp9b->hdmax;
8815 
8816   /* Determine which matrices we need to fill in, based on <preset_mode> */
8817   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPosteriorHB(): preset_mode is not J, L, R, or T");
8818   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, &fill_T)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrPosteriorHB(), bogus mode: %d", preset_mode);
8819 
8820   /* ensure a full alignment to ROOT_S (v==0) is allowed by the bands */
8821   if (cm->cp9b->jmin[0] > L || cm->cp9b->jmax[0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's j band (%d..%d)\n", L, cm->cp9b->jmin[0], cm->cp9b->jmax[0]);
8822   jp_0 = L - jmin[0];
8823   if (cm->cp9b->hdmin[0][jp_0] > L || cm->cp9b->hdmax[0][jp_0] < L) ESL_FAIL(eslEINVAL, errbuf, "cm_CYKInsideAlignHB(): L (%d) is outside ROOT_S's d band (%d..%d)\n", L, cm->cp9b->hdmin[0][jp_0], cm->cp9b->hdmax[0][jp_0]);
8824   Lp_0 = L - hdmin[0][jp_0];
8825 
8826   /* Determine the optimal score */
8827   if(preset_mode == TRMODE_J) sc = ins_mx->Jdp[0][jp_0][Lp_0];
8828   if(preset_mode == TRMODE_L) sc = ins_mx->Ldp[0][jp_0][Lp_0];
8829   if(preset_mode == TRMODE_R) sc = ins_mx->Rdp[0][jp_0][Lp_0];
8830   if(preset_mode == TRMODE_T) sc = ins_mx->Tdp[0][jp_0][Lp_0];
8831 
8832   /* grow our post matrix, but only if isn't also our out_mx in which
8833    * case we know we're already big enought (also in that case we
8834    * don't want to call GrowTo b/c it can potentially free the DP
8835    * matrix memory and reallocate it, which would be bad b/c we
8836    * need the out_mx!)
8837    */
8838   if(post_mx != out_mx) {
8839     if((status = cm_tr_hb_mx_GrowTo(cm, post_mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
8840   }
8841 
8842   /* If local ends are on, start with the non-banded EL state (cm->M), otherwise it's not a valid deck. */
8843   if(cm->flags & CMH_LOCAL_END) {
8844     if(cp9b->Jvalid[cm->M]) {
8845       for (j = 0; j <= L; j++) {
8846 	for (d = 0; d <= j; d++) {
8847 	  post_mx->Jdp[cm->M][j][d] = ins_mx->Jdp[cm->M][j][d] + out_mx->Jdp[cm->M][j][d] - sc;
8848 	}
8849       }
8850     }
8851     if(fill_L && cp9b->Lvalid[cm->M]) {
8852       for (j = 0; j <= L; j++) {
8853 	for (d = 0; d <= j; d++) {
8854 	  post_mx->Ldp[cm->M][j][d] = ins_mx->Ldp[cm->M][j][d] + out_mx->Ldp[cm->M][j][d] - sc;
8855 	}
8856       }
8857     }
8858     if(fill_R && cp9b->Rvalid[cm->M]) {
8859       for (j = 0; j <= L; j++) {
8860 	for (d = 0; d <= j; d++) {
8861 	  post_mx->Rdp[cm->M][j][d] = ins_mx->Rdp[cm->M][j][d] + out_mx->Rdp[cm->M][j][d] - sc;
8862 	}
8863       }
8864     }
8865   }
8866   /* Fill in the rest of the matrices */
8867   for (v = cm->M-1; v >= 0; v--) {
8868     if(cp9b->Jvalid[v]) {
8869       jx = jmax[v]-jmin[v];
8870       for (jp_v = 0; jp_v <= jx; jp_v++) {
8871 	dx = hdmax[v][jp_v]-hdmin[v][jp_v];
8872 	for (dp_v = 0; dp_v <= dx; dp_v++) {
8873 	  post_mx->Jdp[v][jp_v][dp_v] = ins_mx->Jdp[v][jp_v][dp_v] + out_mx->Jdp[v][jp_v][dp_v] - sc;
8874 	}
8875       }
8876     }
8877   }
8878   if(fill_L) {
8879     for (v = cm->M-1; v >= 0; v--) {
8880       if(cp9b->Lvalid[v]) {
8881 	jx = jmax[v]-jmin[v];
8882 	for (jp_v = 0; jp_v <= jx; jp_v++) {
8883 	  dx = hdmax[v][jp_v]-hdmin[v][jp_v];
8884 	  for (dp_v = 0; dp_v <= dx; dp_v++) {
8885 	    post_mx->Ldp[v][jp_v][dp_v] = ins_mx->Ldp[v][jp_v][dp_v] + out_mx->Ldp[v][jp_v][dp_v] - sc;
8886 	  }
8887 	}
8888       }
8889     }
8890   }
8891   if(fill_R) {
8892     for (v = cm->M-1; v >= 0; v--) {
8893       if(cp9b->Rvalid[v]) {
8894 	jx = jmax[v]-jmin[v];
8895 	for (jp_v = 0; jp_v <= jx; jp_v++) {
8896 	  dx = hdmax[v][jp_v]-hdmin[v][jp_v];
8897 	  for (dp_v = 0; dp_v <= dx; dp_v++) {
8898 	    post_mx->Rdp[v][jp_v][dp_v] = ins_mx->Rdp[v][jp_v][dp_v] + out_mx->Rdp[v][jp_v][dp_v] - sc;
8899 	  }
8900 	}
8901       }
8902     }
8903   }
8904   if(fill_T) {
8905     for (v = cm->M-1; v >= 0; v--) {
8906       if(cp9b->Tvalid[v]) {
8907 	jx = jmax[v]-jmin[v];
8908 	for (jp_v = 0; jp_v <= jx; jp_v++) {
8909 	  dx = hdmax[v][jp_v]-hdmin[v][jp_v];
8910 	  for (dp_v = 0; dp_v <= dx; dp_v++) {
8911 	    post_mx->Tdp[v][jp_v][dp_v] = ins_mx->Tdp[v][jp_v][dp_v] + out_mx->Tdp[v][jp_v][dp_v] - sc;
8912 	  }
8913 	}
8914       }
8915     }
8916   }
8917 #if eslDEBUGLEVEL >= 2
8918   /* Uncomment to dump matrix to file. Careful...this could be very large. */
8919   /* FILE *fp1; fp1 = fopen("tmp.tru_phbmx", "w");   cm_tr_hb_mx_Dump(fp1, post_mx, preset_mode, TRUE); fclose(fp1); */
8920 #endif
8921   return eslOK;
8922 }
8923 
8924 /* Function: cm_TrEmitterPosterior()
8925  * Date:     EPN, Fri Oct  7 05:30:31 2011
8926  *
8927  * Purpose: Given a posterior probability cube, where the value in
8928  *           post[v][j][d] is the log of the posterior probability of
8929  *           a parse subtree rooted at v emitting the subsequence i..j
8930  *           (i=j-d+1), fill a CM_EMIT_MX <emit_mx> with
8931  *           matrices with values:
8932  *
8933  *           emit_mx->*l_pp[v][i]: log of the posterior probability
8934  *           that state v emitted residue i leftwise while in * (J or
8935  *           L, Joint of Left) marginal mode either at (if a match
8936  *           state) or *after* (if an insert state) the left consensus
8937  *           position modeled by state v's node.
8938  *
8939  *           emit_mx->*r_pp[v][i]: log of the posterior probability
8940  *           that state v emitted residue i rightwise while in * (J
8941  *           or R, Joint or Right) marginal mode either at (if a match
8942  *           state) or *before* (if an insert state) the right
8943  *           consensus position modeled by state v's node.
8944  *
8945  *           *l_pp[v] is NULL for states that do not emit leftwise
8946  *           *r_pp[v] is NULL for states that do not emit rightwise
8947  *
8948  *           We only need to fill a subset of the *l_pp and *r_pp
8949  *           matrices, depending on the <preset_mode> of the alignment
8950  *           which is known and passed in:
8951  *           <preset_mode> == TRMODE_J, fill Jl_pp, Jr_pp
8952  *           <preset_mode> == TRMODE_L, fill Jl_pp, Jr_pp and Ll_pp
8953  *           <preset_mode> == TRMODE_R, fill Jl_pp, Jr_pp and Rr_pp
8954  *           <preset_mode> == TRMODE_T, fill Jl_pp, Jr_pp, Ll_pp, and Rr_pp
8955  *
8956  *          This is done in 3 steps:
8957  *          1. Fill *l_pp[v][i] and *r_pp[v][i] with the posterior
8958  *             probability that state v emitted residue i either
8959  *             leftwise (l_pp) or rightwise (r_pp).
8960  *
8961  *          2. Normalize *l_pp and *r_pp so that probability that
8962  *             each residue was emitted by any state is exactly
8963  *             1.0.
8964  *
8965  *          3. Combine *l_pp values for MATP_MP (v) and MATP_ML (y=v+1)
8966  *             states in the same node so they give the value defined
8967  *             above (i.e. *l_pp[v] == *l_pp[y] = the PP that either v
8968  *             or y emitted residue i) instead of *l_pp[v] = PP that v
8969  *             emitted i, and *l_pp[y] = PP that y emitted i.  And
8970  *             combine *r_pp values for MATP_MP (v) and MATP_MR (y=v+2)
8971  *             states in an analogous way.
8972  *
8973  *          If <do_check> we check to make sure the summed probability
8974  *          of any residue is > 0.98 and < 1.02 prior the step 2
8975  *          normalization, and throw eslFAIL if not.
8976  *
8977  *          Note: A failure of this test does not necessarily mean a
8978  *          bug in the code, because this check is known to fail for
8979  *          some cases with parsetrees that contain inserts of 100s of
8980  *          residues from the same IL or IR state (that utilize 100s
8981  *          of IL->IL or IR->IR self transitions). These cases were
8982  *          looked at in detail to determine if they were due to a bug
8983  *          in the DP code. This was logged in
8984  *          ~nawrockie/notebook/8_1016_inf-1rc3_bug_alignment/00LOG.
8985  *          The conclusion was that the failure of the posterior check
8986  *          is due completely to lack of precision in the float scores
8987  *          (not just in the logsum look-up table but also with using
8988  *          real log() and exp() calls). If this function returns an
8989  *          error, please check to see if the parsetree has a large
8990  *          insertion in it, if so you can expect probabilities up to
8991  *          1.03 due solely to this precision issue. See the notebook
8992  *          00LOG for more, included a check I performed to change the
8993  *          relevant IL->IL transition probability by very small
8994  *          values (~0.0001) and you can observe the posteriors change
8995  *          dramatically which demonstrates that precision of floats
8996  *          is the culprit.  (EPN, Sun Oct 26 14:54:31 2008
8997  *          (originally added to cm_Posterior() function 'Purpose'
8998  *          function which no longer exists, having been replaced by
8999  *          this function.)
9000  *
9001  * Args:     cm          - the model
9002  *           errbuf      - for error messages
9003  *           L           - length of the sequence
9004  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
9005  *           post        - pre-filled posterior cube
9006  *           emit_mx     - pre-allocated emit matrix, grown and filled-in here
9007  *           preset_mode - known (pre-determined) mode of the alignment
9008  *           do_check    - if TRUE, return eslEFAIL if summed prob of any residue
9009  *                         (before normalization) is < 0.98 or > 1.02.
9010  *
9011  * Returns:  <eslOK>     on success.
9012  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>
9013  *           <eslFAIL>   if (do_check) and any residue check fails
9014  *           <eslEMEM>   if we run out of memory.
9015  *           If !eslOK the l_pp and r_pp values are invalid.
9016  */
9017 int
cm_TrEmitterPosterior(CM_t * cm,char * errbuf,int L,float size_limit,char preset_mode,int do_check,CM_TR_MX * post,CM_TR_EMIT_MX * emit_mx)9018 cm_TrEmitterPosterior(CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode, int do_check, CM_TR_MX *post, CM_TR_EMIT_MX *emit_mx)
9019 {
9020   int    status;
9021   int    v, j, d; /* state, position, subseq length */
9022   int    i;       /* sequence position */
9023   int    sd;      /* StateDelta(v) */
9024   int    sdl;     /* StateLeftDelta(v) */
9025   int    sdr;     /* StateRightDelta(v) */
9026   int    fill_L, fill_R; /* do we need to fill Ll_pp/Rr_pp matrices? */
9027 
9028   /* determine which matrices we need to fill in, based on <preset_mode> */
9029   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrEmitterPosterior(): preset_mode is not J, L, R, or T");
9030   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, NULL)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrCheckFromPosterior, bogus mode: %d", preset_mode);
9031 
9032   /* grow the emit matrices based on the current sequence */
9033   if((status = cm_tr_emit_mx_GrowTo(cm, emit_mx, errbuf, L, size_limit)) != eslOK) return status;
9034 
9035   /* initialize all cells of the emit matrices to IMPOSSIBLE */
9036   esl_vec_FSet(emit_mx->Jl_pp_mem, emit_mx->l_ncells_valid, IMPOSSIBLE);
9037   if(fill_L) esl_vec_FSet(emit_mx->Ll_pp_mem, emit_mx->l_ncells_valid, IMPOSSIBLE);
9038   esl_vec_FSet(emit_mx->Jr_pp_mem, emit_mx->r_ncells_valid, IMPOSSIBLE);
9039   if(fill_R) esl_vec_FSet(emit_mx->Rr_pp_mem, emit_mx->r_ncells_valid, IMPOSSIBLE);
9040 
9041   /* Step 1. Fill *l_pp[v][i] and *r_pp[v][i] with the posterior
9042    *         probability that state v emitted residue i either
9043    *         leftwise (*l_pp) or rightwise (*r_pp).
9044    */
9045   for(v = 0; v < cm->M; v++) {
9046     sd  = StateDelta(cm->sttype[v]);
9047     sdl = StateLeftDelta(cm->sttype[v]);
9048     sdr = StateRightDelta(cm->sttype[v]);
9049     if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
9050       for(j = 1; j <= L; j++) {
9051 	i = j-sd+1;
9052 	for(d = sd; d <= j; d++, i--) {
9053 	  emit_mx->Jl_pp[v][i] = FLogsum(emit_mx->Jl_pp[v][i], post->Jdp[v][j][d]);
9054 	}
9055 	if(fill_L) {
9056 	  i = j-sdl+1; /* careful, use sdl, not sd */
9057 	  for(d = sdl; d <= j; d++, i--) {
9058 	    emit_mx->Ll_pp[v][i] = FLogsum(emit_mx->Ll_pp[v][i], post->Ldp[v][j][d]);
9059 	  }
9060 	}
9061       }
9062     }
9063     if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
9064       for(j = 1; j <= L; j++) {
9065 	for(d = sd; d <= j; d++) {
9066 	  emit_mx->Jr_pp[v][j] = FLogsum(emit_mx->Jr_pp[v][j], post->Jdp[v][j][d]);
9067 	}
9068 	if(fill_R) {
9069 	  for(d = sdr; d <= j; d++) { /* careful, use sdr, not sd */
9070 	    emit_mx->Rr_pp[v][j] = FLogsum(emit_mx->Rr_pp[v][j], post->Rdp[v][j][d]);
9071 	  }
9072 	}
9073       }
9074     }
9075   }
9076   /* factor in contribution of local ends, the EL state may have emitted this residue. */
9077   if (cm->flags & CMH_LOCAL_END) {
9078     for (j = 1; j <= L; j++) {
9079       i = j;
9080       for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
9081 	emit_mx->Jl_pp[cm->M][i] = FLogsum(emit_mx->Jl_pp[cm->M][i], post->Jdp[cm->M][j][d]);
9082       }
9083     }
9084     if(fill_L) {
9085       for (j = 1; j <= L; j++) {
9086 	i = j;
9087 	for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
9088 	  emit_mx->Ll_pp[cm->M][i] = FLogsum(emit_mx->Ll_pp[cm->M][i], post->Ldp[cm->M][j][d]);
9089 	}
9090       }
9091     }
9092     if(fill_R) {
9093       for (j = 1; j <= L; j++) {
9094 	i = j;
9095 	for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
9096 	  emit_mx->Rr_pp[cm->M][i] = FLogsum(emit_mx->Rr_pp[cm->M][i], post->Rdp[cm->M][j][d]);
9097 	}
9098       }
9099     }
9100   }
9101 #if eslDEBUGLEVEL >= 2
9102   /* Uncomment to dump matrix to file. Careful...this could be very large. */
9103   /* FILE *fp1; fp1 = fopen("tmp.tru_unnorm_emitmx",  "w"); cm_tr_emit_mx_Dump(fp1, cm, emit_mx, preset_mode, TRUE); fclose(fp1); */
9104 #endif
9105 
9106   /* Step 2. Normalize *l_pp and *r_pp so that probability that
9107    *         each residue was emitted by any state is exactly
9108    *         1.0.
9109    */
9110   esl_vec_FSet(emit_mx->sum, (L+1), IMPOSSIBLE);
9111   for(v = 0; v <= cm->M; v++) {
9112     if(emit_mx->Jl_pp[v] != NULL) {
9113       for(i = 1; i <= L; i++) {
9114 	emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Jl_pp[v][i]);
9115       }
9116     }
9117     if(emit_mx->Ll_pp[v] != NULL && fill_L) {
9118       for(i = 1; i <= L; i++) {
9119 	emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Ll_pp[v][i]);
9120       }
9121     }
9122     if(emit_mx->Jr_pp[v] != NULL) {
9123       for(j = 1; j <= L; j++) {
9124 	emit_mx->sum[j] = FLogsum(emit_mx->sum[j], emit_mx->Jr_pp[v][j]);
9125       }
9126     }
9127     if(emit_mx->Rr_pp[v] != NULL && fill_R) {
9128       for(j = 1; j <= L; j++) {
9129 	emit_mx->sum[j] = FLogsum(emit_mx->sum[j], emit_mx->Rr_pp[v][j]);
9130       }
9131     }
9132   }
9133   /* perform the check, if nec */
9134   if(do_check) {
9135     for(i = 1; i <= L; i++) {
9136       if((sreEXP2(emit_mx->sum[i]) < 0.98) || (sreEXP2(emit_mx->sum[i]) > 1.02)) {
9137 	ESL_FAIL(eslFAIL, errbuf, "residue %d has summed prob of %5.4f (2^%5.4f).\nMay not be a DP coding bug, see 'Note:' on precision in cm_TrEmitterPosterior", i, (sreEXP2(emit_mx->sum[i])), emit_mx->sum[i]);
9138       }
9139       /*printf("i: %d | total: %10.4f\n", i, (sreEXP2(emit_mx->sum[i])));*/
9140     }
9141     ESL_DPRINTF1(("#DEBUG: cm_TrEmitterPosterior() check passed, all residues have summed probability of emission of between 0.98 and 1.02.\n"));
9142   }
9143 
9144   /* normalize, using the sum vector */
9145   for(v = 0; v <= cm->M; v++) {
9146     if(emit_mx->Jl_pp[v] != NULL) {
9147       for(i = 1; i <= L; i++) emit_mx->Jl_pp[v][i] -= emit_mx->sum[i];
9148     }
9149     if(emit_mx->Ll_pp[v] != NULL && fill_L) {
9150       for(i = 1; i <= L; i++) emit_mx->Ll_pp[v][i] -= emit_mx->sum[i];
9151     }
9152     if(emit_mx->Jr_pp[v] != NULL) {
9153       for(j = 1; j <= L; j++) emit_mx->Jr_pp[v][j] -= emit_mx->sum[j];
9154     }
9155     if(emit_mx->Rr_pp[v] != NULL && fill_R) {
9156       for(j = 1; j <= L; j++) emit_mx->Rr_pp[v][j] -= emit_mx->sum[j];
9157     }
9158   }
9159 
9160   /* Step 3. Combine *l_pp values for MATP_MP (v) and MATP_ML (y=v+1)
9161    *         states in the same node so they give the value defined
9162    *         above (i.e. *l_pp[v] == *l_pp[y] = the PP that either v or
9163    *         y emitted residue i) instead of *l_pp[v] = PP that v
9164    *         emitted i, and *l_pp[y] = PP that y emitted i.  And
9165    *         combine *r_pp values for MATP_MP (v) and MATP_MR (y=v+2)
9166    *         states in an analogous way.
9167    */
9168   for(v = 0; v <= cm->M; v++) {
9169     if(cm->sttype[v] == MP_st) {
9170       for(i = 1; i <= L; i++) {
9171 	emit_mx->Jl_pp[v][i]   = FLogsum(emit_mx->Jl_pp[v][i], emit_mx->Jl_pp[v+1][i]);
9172 	emit_mx->Jl_pp[v+1][i] = emit_mx->Jl_pp[v][i];
9173       }
9174       if(fill_L) {
9175 	for(i = 1; i <= L; i++) {
9176 	  emit_mx->Ll_pp[v][i]   = FLogsum(emit_mx->Ll_pp[v][i], emit_mx->Ll_pp[v+1][i]);
9177 	  emit_mx->Ll_pp[v+1][i] = emit_mx->Ll_pp[v][i];
9178 	}
9179       }
9180       for(j = 1; j <= L; j++) {
9181 	emit_mx->Jr_pp[v][j]   = FLogsum(emit_mx->Jr_pp[v][j], emit_mx->Jr_pp[v+2][j]);
9182 	emit_mx->Jr_pp[v+2][j] = emit_mx->Jr_pp[v][j];
9183       }
9184       if(fill_R){
9185 	for(j = 1; j <= L; j++) {
9186 	  emit_mx->Rr_pp[v][j]   = FLogsum(emit_mx->Rr_pp[v][j], emit_mx->Rr_pp[v+2][j]);
9187 	  emit_mx->Rr_pp[v+2][j] = emit_mx->Rr_pp[v][j];
9188 	}
9189       }
9190     }
9191   }
9192 #if eslDEBUGLEVEL >= 2
9193   /* Uncomment to dump matrix to file. Careful...this could be very large. */
9194   /* FILE *fp2; fp2 = fopen("tmp.tru_emitmx",  "w"); cm_tr_emit_mx_Dump(fp2, cm, emit_mx, preset_mode, TRUE); fclose(fp2); */
9195 #endif
9196 
9197   return eslOK;
9198 }
9199 
9200 
9201 /* Function: cm_TrEmitterPosteriorHB()
9202  * Date:     EPN, Tue Oct 11 09:36:55 2011
9203  *
9204  * Purpose: Same as cm_TrEmitterPosterior() except HMM banded matrices
9205  *          are used. The main difference is that we have to be careful
9206  *          to stay within the bands because matrix cells outside
9207  *          the bands do not exist (are not allocated). This requires
9208  *          keeping careful track of our offsets between the sequence
9209  *          position index and the corresponding indices in the matrix.
9210  *
9211  * Args:     cm          - the model
9212  *           errbuf      - for error messages
9213  *           L           - length of the sequence
9214  *           size_limit  - max number of Mb for DP matrix, if matrix is bigger return eslERANGE
9215  *           post        - pre-filled posterior cube
9216  *           emit_mx     - pre-allocated emit matrix, grown and filled-in here
9217  *           preset_mode - known optimal mode of the alignment
9218  *           do_check    - if TRUE, return eslEFAIL if summed prob of any residue
9219  *                         (before normalization) is < 0.98 or > 1.02.
9220  *
9221  * Returns:  <eslOK>     on success.
9222  * Throws:   <eslERANGE> if required DP matrix size exceeds <size_limit>
9223  *           <eslFAIL>   if (do_check) and any residue check fails
9224  *           <eslEMEM>   if we run out of memory.
9225  *           If !eslOK the *l_pp and *r_pp values are invalid.
9226  */
9227 int
cm_TrEmitterPosteriorHB(CM_t * cm,char * errbuf,int L,float size_limit,char preset_mode,int do_check,CM_TR_HB_MX * post,CM_TR_HB_EMIT_MX * emit_mx)9228 cm_TrEmitterPosteriorHB(CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode, int do_check, CM_TR_HB_MX *post, CM_TR_HB_EMIT_MX *emit_mx)
9229 {
9230   int    status;
9231   int    v, j, d; /* state, position, subseq length */
9232   int    i;       /* sequence position */
9233   int    fill_L, fill_R; /* do we need to fill Ll_pp/Rr_pp matrices? */
9234   int    jp_v;    /* j-jmin[v] for current j, and current v */
9235   int    jp_v2;   /* another offset j in banded matrix */
9236   int    ip_v;    /* i-imin[v] for current i, and current v */
9237   int    ip_v2;   /* another offset i in banded matrix */
9238   int    dp_v;    /* d-hdmin[v][jp_v] for current j, current v, current d*/
9239   int    in, ix;  /* temp min/max i */
9240   int    jn, jx;  /* temp min/max j */
9241 
9242   /* ptrs to band info, for convenience */
9243   int     *imin  = cm->cp9b->imin;
9244   int     *imax  = cm->cp9b->imax;
9245   int     *jmin  = cm->cp9b->jmin;
9246   int     *jmax  = cm->cp9b->jmax;
9247   int    **hdmin = cm->cp9b->hdmin;
9248   int    **hdmax = cm->cp9b->hdmax;
9249 
9250   /* determine which matrices we need to fill in, based on <preset_mode> */
9251   if (preset_mode != TRMODE_J && preset_mode != TRMODE_L && preset_mode != TRMODE_R && preset_mode != TRMODE_T) ESL_FAIL(eslEINVAL, errbuf, "cm_TrEmitterPosteriorHB(): preset_mode is not J, L, R, or T");
9252   if((status = cm_TrFillFromMode(preset_mode, &fill_L, &fill_R, NULL)) != eslOK) ESL_FAIL(status, errbuf, "cm_TrCheckFromPosterior, bogus mode: %d", preset_mode);
9253 
9254   /* grow the emit matrices based on the current sequence */
9255   if((status = cm_tr_hb_emit_mx_GrowTo(cm, emit_mx, errbuf, cm->cp9b, L, size_limit)) != eslOK) return status;
9256 
9257   /* initialize all cells of the emit matrices to IMPOSSIBLE */
9258   esl_vec_FSet(emit_mx->Jl_pp_mem, emit_mx->l_ncells_valid, IMPOSSIBLE);
9259   if(fill_L) esl_vec_FSet(emit_mx->Ll_pp_mem, emit_mx->l_ncells_valid, IMPOSSIBLE);
9260   esl_vec_FSet(emit_mx->Jr_pp_mem, emit_mx->r_ncells_valid, IMPOSSIBLE);
9261   if(fill_R) esl_vec_FSet(emit_mx->Rr_pp_mem, emit_mx->r_ncells_valid, IMPOSSIBLE);
9262 
9263   /* Step 1. Fill *l_pp[v][i] and *r_pp[v][i] with the posterior
9264    *         probability that state v emitted residue i either
9265    *         leftwise (*l_pp) or rightwise (*r_pp).
9266    */
9267   for(v = 0; v < cm->M; v++) {
9268     if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
9269       if(cm->cp9b->Jvalid[v]) {
9270 	for(j = jmin[v]; j <= jmax[v]; j++) {
9271 	  jp_v = j - jmin[v];
9272 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
9273 	    dp_v = d-hdmin[v][jp_v];
9274 	    i    = j-d+1;
9275 	    assert(i >= imin[v] && i <= imax[v]);
9276 	    ip_v = i - imin[v];
9277 	    emit_mx->Jl_pp[v][ip_v] = FLogsum(emit_mx->Jl_pp[v][ip_v], post->Jdp[v][jp_v][dp_v]);
9278 	  }
9279 	}
9280       }
9281       if(cm->cp9b->Lvalid[v] && fill_L) {
9282 	for(j = jmin[v]; j <= jmax[v]; j++) {
9283 	  jp_v = j - jmin[v];
9284 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
9285 	    dp_v = d-hdmin[v][jp_v];
9286 	    i    = j-d+1;
9287 	    assert(i >= imin[v] && i <= imax[v]);
9288 	    ip_v = i - imin[v];
9289 	    emit_mx->Ll_pp[v][ip_v] = FLogsum(emit_mx->Ll_pp[v][ip_v], post->Ldp[v][jp_v][dp_v]);
9290 	  }
9291 	}
9292       }
9293     }
9294     if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
9295       if(cm->cp9b->Jvalid[v]) {
9296 	for(j = jmin[v]; j <= jmax[v]; j++) {
9297 	  jp_v = j - jmin[v];
9298 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
9299 	    dp_v = d-hdmin[v][jp_v];
9300 	    emit_mx->Jr_pp[v][jp_v] = FLogsum(emit_mx->Jr_pp[v][jp_v], post->Jdp[v][jp_v][dp_v]);
9301 	  }
9302 	}
9303       }
9304       if(cm->cp9b->Rvalid[v] && fill_R) {
9305 	for(j = jmin[v]; j <= jmax[v]; j++) {
9306 	  jp_v = j - jmin[v];
9307 	  for(d = hdmin[v][jp_v]; d <= hdmax[v][jp_v]; d++) {
9308 	    dp_v = d-hdmin[v][jp_v];
9309 	    emit_mx->Rr_pp[v][jp_v] = FLogsum(emit_mx->Rr_pp[v][jp_v], post->Rdp[v][jp_v][dp_v]);
9310 	  }
9311 	}
9312       }
9313     }
9314   }
9315   /* factor in contribution of local ends, the EL state may have emitted this residue. */
9316   /* Remember, the EL deck is non-banded */
9317   if (cm->flags & CMH_LOCAL_END) {
9318     if(cm->cp9b->Jvalid[cm->M]) {
9319       for (j = 1; j <= L; j++) {
9320 	i = j;
9321 	for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
9322 	  emit_mx->Jl_pp[cm->M][i] = FLogsum(emit_mx->Jl_pp[cm->M][i], post->Jdp[cm->M][j][d]);
9323 	}
9324       }
9325     }
9326     if(fill_L && cm->cp9b->Lvalid[cm->M]) {
9327       for (j = 1; j <= L; j++) {
9328 	i = j;
9329 	for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
9330 	  emit_mx->Ll_pp[cm->M][i] = FLogsum(emit_mx->Ll_pp[cm->M][i], post->Ldp[cm->M][j][d]);
9331 	}
9332       }
9333     }
9334     if(fill_R && cm->cp9b->Rvalid[cm->M]) {
9335       for (j = 1; j <= L; j++) {
9336 	i = j;
9337 	for (d = 1; d <= j; d++, i--) { /* note: d >= 1, b/c EL emits 1 residue */
9338 	  emit_mx->Rr_pp[cm->M][i] = FLogsum(emit_mx->Rr_pp[cm->M][i], post->Rdp[cm->M][j][d]);
9339 	}
9340       }
9341     }
9342   }
9343 #if eslDEBUGLEVEL >= 2
9344   /* Uncomment to dump matrix to file. Careful...this could be very large. */
9345   /* FILE *fp1; fp1 = fopen("tmp.tru_unnorm_hbemitmx",  "w"); cm_tr_hb_emit_mx_Dump(fp1, cm, emit_mx, preset_mode, TRUE); fclose(fp1); */
9346 #endif
9347 
9348   /* Step 2. Normalize *l_pp and *r_pp so that probability that
9349    *         each residue was emitted by any state is exactly
9350    *         1.0.
9351    */
9352   esl_vec_FSet(emit_mx->sum, (L+1), IMPOSSIBLE);
9353   for(v = 0; v < cm->M; v++) { /* we'll handle EL special */
9354     if(emit_mx->Jl_pp[v] != NULL && cm->cp9b->Jvalid[v]) {
9355       in = ESL_MAX(imin[v], 1);
9356       ix = ESL_MIN(imax[v], L);
9357       for(i = in; i <= ix; i++) {
9358 	ip_v = i - imin[v];
9359 	emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Jl_pp[v][ip_v]);
9360       }
9361     }
9362     if(emit_mx->Ll_pp[v] != NULL && cm->cp9b->Lvalid[v] && fill_L) {
9363       in = ESL_MAX(imin[v], 1);
9364       ix = ESL_MIN(imax[v], L);
9365       for(i = in; i <= ix; i++) {
9366 	ip_v = i - imin[v];
9367 	emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Ll_pp[v][ip_v]);
9368       }
9369     }
9370     if(emit_mx->Jr_pp[v] != NULL && cm->cp9b->Jvalid[v]) {
9371       jn = ESL_MAX(jmin[v], 1);
9372       jx = ESL_MIN(jmax[v], L);
9373       for(j = jn; j <= jx; j++) {
9374 	jp_v = j - jmin[v];
9375 	emit_mx->sum[j] = FLogsum(emit_mx->sum[j], emit_mx->Jr_pp[v][jp_v]);
9376       }
9377     }
9378     if(emit_mx->Rr_pp[v] != NULL && cm->cp9b->Rvalid[v] && fill_R) {
9379       jn = ESL_MAX(jmin[v], 1);
9380       jx = ESL_MIN(jmax[v], L);
9381       for(j = jn; j <= jx; j++) {
9382 	jp_v = j - jmin[v];
9383 	emit_mx->sum[j] = FLogsum(emit_mx->sum[j], emit_mx->Rr_pp[v][jp_v]);
9384       }
9385     }
9386   }
9387   /* Handle EL deck, remember it is non-banded, and only valid for Jl_pp, Ll_pp and Rr_pp */
9388   if(emit_mx->Jl_pp[cm->M] != NULL && cm->cp9b->Jvalid[cm->M]) {
9389     for(i = 1; i <= L; i++) {
9390       emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Jl_pp[cm->M][i]);
9391     }
9392   }
9393   if(emit_mx->Ll_pp[cm->M] != NULL && cm->cp9b->Lvalid[cm->M] && fill_L) {
9394     for(i = 1; i <= L; i++) {
9395       emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Ll_pp[cm->M][i]);
9396     }
9397   }
9398   if(emit_mx->Rr_pp[cm->M] != NULL && cm->cp9b->Rvalid[cm->M] && fill_R) {
9399     for(i = 1; i <= L; i++) {
9400       emit_mx->sum[i] = FLogsum(emit_mx->sum[i], emit_mx->Rr_pp[cm->M][i]);
9401     }
9402   }
9403 
9404   /* perform the check, if nec */
9405   if(do_check) {
9406     for(i = 1; i <= L; i++) {
9407       if((sreEXP2(emit_mx->sum[i]) < 0.98) || (sreEXP2(emit_mx->sum[i]) > 1.02)) {
9408 	ESL_FAIL(eslFAIL, errbuf, "residue %d has summed prob of %5.4f (2^%5.4f).\nMay not be a DP coding bug, see 'Note:' on precision in cm_TrEmitterPosterior().\n", i, (sreEXP2(emit_mx->sum[i])), emit_mx->sum[i]);
9409       }
9410       /*printf("i: %d | total: %10.4f\n", i, (sreEXP2(emit_mx->sum[i])));*/
9411     }
9412     ESL_DPRINTF1(("#DEBUG: cm_TrEmitterPosteriorHB() check passed, all residues have summed probability of emission of between 0.98 and 1.02.\n"));
9413   }
9414 
9415   /* normalize, using the sum vector */
9416   for(v = 0; v < cm->M; v++) {
9417     if(emit_mx->Jl_pp[v] != NULL && cm->cp9b->Jvalid[v]) {
9418       in = ESL_MAX(imin[v], 1);
9419       ix = ESL_MIN(imax[v], L);
9420       for(i = in; i <= ix; i++) {
9421 	ip_v = i - imin[v];
9422 	emit_mx->Jl_pp[v][ip_v] -= emit_mx->sum[i];
9423       }
9424     }
9425     if(emit_mx->Ll_pp[v] != NULL && cm->cp9b->Lvalid[v] && fill_L) {
9426       in = ESL_MAX(imin[v], 1);
9427       ix = ESL_MIN(imax[v], L);
9428       for(i = in; i <= ix; i++) {
9429 	ip_v = i - imin[v];
9430 	emit_mx->Ll_pp[v][ip_v] -= emit_mx->sum[i];
9431       }
9432     }
9433     if(emit_mx->Jr_pp[v] != NULL && cm->cp9b->Jvalid[v]) {
9434       jn = ESL_MAX(jmin[v], 1);
9435       jx = ESL_MIN(jmax[v], L);
9436       for(j = jn; j <= jx; j++) {
9437 	jp_v = j - jmin[v];
9438 	emit_mx->Jr_pp[v][jp_v] -= emit_mx->sum[j];
9439       }
9440     }
9441     if(emit_mx->Rr_pp[v] != NULL && cm->cp9b->Rvalid[v] && fill_R) {
9442       jn = ESL_MAX(jmin[v], 1);
9443       jx = ESL_MIN(jmax[v], L);
9444       for(j = jn; j <= jx; j++) {
9445 	jp_v = j - jmin[v];
9446 	emit_mx->Rr_pp[v][jp_v] -= emit_mx->sum[j];
9447       }
9448     }
9449   }
9450   /* Handle EL deck, remember it is non-banded */
9451   if(emit_mx->Jl_pp[cm->M] != NULL && cm->cp9b->Jvalid[cm->M]) {
9452     for(i = 1; i <= L; i++) {
9453       emit_mx->Jl_pp[cm->M][i] -= emit_mx->sum[i];
9454     }
9455   }
9456   if(emit_mx->Ll_pp[cm->M] != NULL && cm->cp9b->Lvalid[cm->M] && fill_L) {
9457     for(i = 1; i <= L; i++) {
9458       emit_mx->Ll_pp[cm->M][i] -= emit_mx->sum[i];
9459     }
9460   }
9461   if(emit_mx->Rr_pp[cm->M] != NULL && cm->cp9b->Rvalid[cm->M] && fill_R) {
9462     for(i = 1; i <= L; i++) {
9463       emit_mx->Rr_pp[cm->M][i] -= emit_mx->sum[i];
9464     }
9465   }
9466   /* Step 3. Combine *l_pp values for MATP_MP (v) and MATP_ML (y=v+1)
9467    *         states in the same node so they give the value defined
9468    *         above (i.e. *l_pp[v] == *l_pp[y] = the PP that either v or
9469    *         y emitted residue i) instead of *l_pp[v] = PP that v
9470    *         emitted i, and *l_pp[y] = PP that y emitted i.  And
9471    *         combine *r_pp values for MATP_MP (v) and MATP_MR (y=v+2)
9472    *         states in an analogous way.
9473    */
9474   for(v = 0; v <= cm->M; v++) {
9475     if(cm->sttype[v] == MP_st) {
9476       /* we only change {J,L}l_pp[v][i] and {J,L}l_pp[v+1][i] if i is within both
9477        * state v and v+1's i band.
9478        */
9479       if(cm->cp9b->Jvalid[v]) {
9480 	if(imax[v] >= 1 && imax[v+1] >= 1) {
9481 	  in = ESL_MAX(imin[v], imin[v+1]);
9482 	  ix = ESL_MIN(imax[v], imax[v+1]);
9483 	  for(i = in; i <= ix; i++) {
9484 	    ip_v  = i - imin[v];
9485 	    ip_v2 = i - imin[v+1];
9486 	    emit_mx->Jl_pp[v][ip_v]    = FLogsum(emit_mx->Jl_pp[v][ip_v], emit_mx->Jl_pp[v+1][ip_v2]);
9487 	    emit_mx->Jl_pp[v+1][ip_v2] = emit_mx->Jl_pp[v][ip_v];
9488 	  }
9489 	}
9490       }
9491       if(cm->cp9b->Lvalid[v] && fill_L) {
9492 	if(imax[v] >= 1 && imax[v+1] >= 1) {
9493 	  in = ESL_MAX(imin[v], imin[v+1]);
9494 	  ix = ESL_MIN(imax[v], imax[v+1]);
9495 	  for(i = in; i <= ix; i++) {
9496 	    ip_v  = i - imin[v];
9497 	    ip_v2 = i - imin[v+1];
9498 	    emit_mx->Ll_pp[v][ip_v]    = FLogsum(emit_mx->Ll_pp[v][ip_v], emit_mx->Ll_pp[v+1][ip_v2]);
9499 	    emit_mx->Ll_pp[v+1][ip_v2] = emit_mx->Ll_pp[v][ip_v];
9500 	  }
9501 	}
9502       }
9503       /* we only change {J,R}r_pp[v][j] and {J,R}r_pp[v+2][j] if j is within both
9504        * state v and v+2's j band.
9505        */
9506       if(cm->cp9b->Jvalid[v]) {
9507 	if(jmax[v] >= 1 && jmax[v+2] >= 1) {
9508 	  jn = ESL_MAX(jmin[v], jmin[v+2]);
9509 	  jx = ESL_MIN(jmax[v], jmax[v+2]);
9510 	  for(j = jn; j <= jx; j++) {
9511 	    jp_v  = j - jmin[v];
9512 	    jp_v2 = j - jmin[v+2];
9513 	    emit_mx->Jr_pp[v][jp_v]    = FLogsum(emit_mx->Jr_pp[v][jp_v], emit_mx->Jr_pp[v+2][jp_v2]);
9514 	    emit_mx->Jr_pp[v+2][jp_v2] = emit_mx->Jr_pp[v][jp_v];
9515 	  }
9516 	}
9517       }
9518       if(cm->cp9b->Rvalid[v] && fill_R) {
9519 	if(jmax[v] >= 1 && jmax[v+2] >= 1) {
9520 	  jn = ESL_MAX(jmin[v], jmin[v+2]);
9521 	  jx = ESL_MIN(jmax[v], jmax[v+2]);
9522 	  for(j = jn; j <= jx; j++) {
9523 	    jp_v  = j - jmin[v];
9524 	    jp_v2 = j - jmin[v+2];
9525 	    emit_mx->Rr_pp[v][jp_v]    = FLogsum(emit_mx->Rr_pp[v][jp_v], emit_mx->Rr_pp[v+2][jp_v2]);
9526 	    emit_mx->Rr_pp[v+2][jp_v2] = emit_mx->Rr_pp[v][jp_v];
9527 	  }
9528 	}
9529       }
9530     }
9531   }
9532 #if eslDEBUGLEVEL >= 2
9533   /* Uncomment to dump matrix to file. Careful...this could be very large. */
9534   /* FILE *fp2; fp2 = fopen("tmp.tru_hbemitmx",  "w"); cm_tr_hb_emit_mx_Dump(fp2, cm, emit_mx, preset_mode, TRUE); fclose(fp2); */
9535 #endif
9536 
9537   return eslOK;
9538 }
9539 
9540 /* Function: cm_TrPostCode()
9541  * Date:     EPN, Fri Oct  7 14:30:32 2011
9542  *
9543  * Purpose: Given a parse tree and a filled emit matrix calculate two
9544  *           strings that represents the confidence values on each
9545  *           aligned residue in the sequence.
9546  *
9547  *           The emit_mx values are:
9548  *           {J,L}l_pp[v][i]: log of the posterior probability that state v emitted
9549  *                            residue i leftwise either at (if a match state) or
9550  *                            *after* (if an insert state) the left consensus
9551  *                            position modeled by state v's node in Joint marginal
9552  *                            mode (for Jl_pp) or Left marginal mode (for Ll_pp).
9553  *
9554  *           {J,R}r_pp[v][i]: log of the posterior probability that state v emitted
9555  *                            residue i rightwise either at (if a match state) or
9556  *                            *before* (if an insert state) the right consensus
9557  *                            position modeled by state v's node in Joint marginal
9558  *                            mode (for Jr_pp) or Right marginal mode (for Rr_pp).
9559  *
9560  *           {J,L}l_pp[v] is NULL for states that do not emit leftwise  (B,S,D,E,IR,MR)
9561  *           {J,R}r_pp[v] is NULL for states that do not emit rightwise (B,S,D,E,IL,ML)
9562  *
9563  *           The PP string is 0..L-1  (L = len of target seq),
9564  *           so its in the coordinate system of the sequence string;
9565  *           off by one from dsq.
9566  *
9567  *           Values are 0,1,2,3,4,5,6,7,8,9,*:
9568  *           '0' = [0.00-0.05)
9569  *           '1' = [0.05-0.15)
9570  *           '2' = [0.15-0.25)
9571  *           '3' = [0.25-0.35)
9572  *           '4' = [0.35-0.45)
9573  *           '5' = [0.45-0.55)
9574  *           '6' = [0.55-0.65)
9575  *           '7' = [0.65-0.75)
9576  *           '8' = [0.75-0.85)
9577  *           '9' = [0.85-0.95)
9578  *           '*' = [0.95-1.00)
9579  *
9580  *           cm_TrPostCodeHB() is nearly the same function with the
9581  *           difference that HMM bands were used for the alignment,
9582  *           so we have to deal with offset issues.
9583  *
9584  *           Renamed from CMPostCode() [EPN, Wed Sep 14 06:20:35 2011].
9585  *
9586  * Args:     cm         - the model
9587  *           errbuf     - char buffer for reporting errors
9588  *           dsq        - the digitized sequence [1..L]
9589  *           L          - length of the dsq to align
9590  *           emit_mx    - the pre-filled emit matrix
9591  *           tr         - the parstree with the emissions we're setting PPs for
9592  *           ret_ppstr  - RETURN: a string of the PP code values (0..L-1)
9593  *           ret_avgp   - RETURN: the average PP of all aligned residues
9594  *
9595  * Returns:  <eslOK>     on success.
9596  * Throws:   <eslEINVAL> if a posterior probability is > 1.01 or less than -0.01.
9597  *                       or if we get a marginal mode in the parsetree that doesn't
9598  *                       make sense.
9599  */
9600 int
cm_TrPostCode(CM_t * cm,char * errbuf,int L,CM_TR_EMIT_MX * emit_mx,Parsetree_t * tr,char ** ret_ppstr,float * ret_avgp)9601 cm_TrPostCode(CM_t *cm, char *errbuf, int L, CM_TR_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp)
9602 {
9603   int   status;
9604   int   x, v, i, j, r; /* counters */
9605   char *ppstr;       /* the PP string, created here */
9606   float p;           /* a probability */
9607   float sum_logp;    /* log of summed probability of all residues emitted thus far */
9608   float cur_log_pp;  /* current log probability of emitting a residue */
9609   char  mode;        /* marginal mode: TRMODE_J, TRMODE_L or TRMODE_R */
9610   int   have_el;     /* TRUE if CM has local ends, otherwise FALSE */
9611 
9612   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
9613 
9614   ESL_ALLOC(ppstr, (L+1) * sizeof(char));
9615   sum_logp = IMPOSSIBLE;
9616 
9617   /* go through each node of the parsetree and determine post code for emissions */
9618   for (x = 0; x < tr->n; x++) {
9619     v    = tr->state[x];
9620     i    = tr->emitl[x];
9621     j    = tr->emitr[x];
9622     mode = tr->mode[x];
9623 
9624     /* Only P, L, R, and EL states have emissions. */
9625     if(cm->sttype[v] == EL_st) { /* EL state, we have to handle this guy special */
9626       if(mode == TRMODE_J || mode == TRMODE_L || mode == TRMODE_R) {
9627 	for(r = i; r <= j; r++) { /* we have to annotate from residues i..j */
9628 	  if(! have_el) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode() using EL state to emit residue %d, but ELs are turned off!\n", r);
9629 	  switch (mode) {
9630 	  case TRMODE_J: cur_log_pp = emit_mx->Jl_pp[v][r]; break;
9631 	  case TRMODE_L: cur_log_pp = emit_mx->Ll_pp[v][r]; break;
9632 	  case TRMODE_R: cur_log_pp = emit_mx->Rr_pp[v][r]; break;
9633 	  }
9634 	  ppstr[r-1] = Fscore2postcode(cur_log_pp);
9635 	  sum_logp   = FLogsum(sum_logp, cur_log_pp);
9636 	  /* make sure we've got a valid probability */
9637 	  p = FScore2Prob(cur_log_pp, 1.);
9638 	  if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): probability for EL state v: %d residue r: %d > 1.00 (%.2f)", v, r, p);
9639 	  if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): probability for EL state v: %d residue r: %d < 0.00 (%.2f)", v, r, p);
9640 	}
9641       }
9642       else ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): invalid mode for EL state in the parsetree: %d\n", mode);
9643     }
9644     if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
9645       if(mode == TRMODE_J || mode == TRMODE_L) {
9646 	cur_log_pp = (mode == TRMODE_J) ? emit_mx->Jl_pp[v][i] : emit_mx->Ll_pp[v][i];
9647 	ppstr[i-1] = Fscore2postcode(cur_log_pp);
9648 	sum_logp   = FLogsum(sum_logp, cur_log_pp);
9649 	/* make sure we've got a valid probability */
9650 	p = FScore2Prob(cur_log_pp, 1.);
9651 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): probability for left state v: %d residue i: %d > 1.00 (%.2f)", v, i, p);
9652 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): probability for left state v: %d residue i: %d < 0.00 (%.2f)", v, i, p);
9653       }
9654       else if(mode != TRMODE_R) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): non-sensical mode for MP, ML, IL state: %d", mode);
9655     }
9656     if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
9657       if(mode == TRMODE_J || mode == TRMODE_R) {
9658 	cur_log_pp = (mode == TRMODE_J) ? emit_mx->Jr_pp[v][j] : emit_mx->Rr_pp[v][j];
9659 	ppstr[j-1] = Fscore2postcode(cur_log_pp);
9660 	sum_logp   = FLogsum(sum_logp, cur_log_pp);
9661 	/* make sure we've got a valid probability */
9662 	p = FScore2Prob(cur_log_pp, 1.);
9663 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): probability for right state v: %d residue i: %d > 1.00 (%.2f)", v, j, p);
9664 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): probability for right state v: %d residue i: %d < 0.00 (%.2f)", v, j, p);
9665       }
9666       else if(mode != TRMODE_L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): non-sensical mode for MP, MR, IR state: %d", mode);
9667     }
9668   }
9669   ppstr[L] = '\0';
9670 
9671   if(ret_ppstr != NULL) *ret_ppstr = ppstr; else free(ppstr);
9672   if(ret_avgp  != NULL) *ret_avgp  = sreEXP2(sum_logp) / (float) L;
9673   return eslOK;
9674 
9675  ERROR:
9676   ESL_FAIL(eslEMEM, errbuf, "cm_TrPostcode(): Memory allocation error.");
9677   return status; /* never reached */
9678 }
9679 
9680 /* Function: cm_TrPostCodeHB()
9681  * Date:     EPN, Tue Oct 11 09:58:47 2011
9682  *
9683  * Purpose: Same as cm_TrPostCode() except HMM banded matrices are
9684  *          used. The main difference is that we have to be careful to
9685  *          stay within the bands because matrix cells outside the
9686  *          bands do not exist (are not allocated). This requires
9687  *          keeping careful track of our offsets between the sequence
9688  *          position index and the corresponding indices in the
9689  *          matrix.
9690  *
9691  * Args:     cm         - the model
9692  *           errbuf     - char buffer for reporting errors
9693  *           dsq        - the digitized sequence [1..L]
9694  *           L          - length of the dsq to align
9695  *           emit_mx    - the pre-filled emit matrix
9696  *           tr         - the parstree with the emissions we're setting PPs for
9697  *           ret_ppstr  - RETURN: a string of the PP code values (0..L-1)
9698  *           ret_avgp   - RETURN: the average PP of all aligned residues
9699  *
9700  * Returns:  <eslOK>     on success.
9701  * Throws:   <eslEINVAL> if a posterior probability is > 1.01 or less than -0.01.
9702  *                       or if we get a marginal mode in the parsetree that doesn't
9703  *                       make sense.
9704  */
9705 int
cm_TrPostCodeHB(CM_t * cm,char * errbuf,int L,CM_TR_HB_EMIT_MX * emit_mx,Parsetree_t * tr,char ** ret_ppstr,float * ret_avgp)9706 cm_TrPostCodeHB(CM_t *cm, char *errbuf, int L, CM_TR_HB_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp)
9707 {
9708   int   status;
9709   int   x, v, i, j, r; /* counters */
9710   char *ppstr;       /* the PP string, created here */
9711   float p;           /* a probability */
9712   float sum_logp;    /* log of summed probability of all residues emitted thus far */
9713   float cur_log_pp;  /* current log probability of emitting a residue */
9714   char  mode;        /* marginal mode: TRMODE_J, TRMODE_L or TRMODE_R */
9715   int   have_el;     /* TRUE if CM has local ends, otherwise FALSE */
9716 
9717   /* variables used for HMM bands */
9718   int ip_v, jp_v; /* i, j offset within bands */
9719   /* ptrs to cp9b info, for convenience */
9720   CP9Bands_t *cp9b = cm->cp9b;
9721   int     *imin  = cp9b->imin;
9722   int     *imax  = cp9b->imax;
9723   int     *jmin  = cp9b->jmin;
9724   int     *jmax  = cp9b->jmax;
9725 
9726   have_el = (cm->flags & CMH_LOCAL_END) ? TRUE : FALSE;
9727 
9728   ESL_ALLOC(ppstr, (L+1) * sizeof(char));
9729   sum_logp = IMPOSSIBLE;
9730 
9731   /* go through each node of the parsetree and determine post code for emissions */
9732   for (x = 0; x < tr->n; x++) {
9733     v    = tr->state[x];
9734     i    = tr->emitl[x];
9735     j    = tr->emitr[x];
9736     mode = tr->mode[x];
9737 
9738     /* Only P, L, R, and EL states have emissions. */
9739     if(cm->sttype[v] == EL_st) { /* EL state, we have to handle this guy special */
9740       /* Note it is possible to use an EL state in an HMM banded
9741        * truncated optimal accuracy parsetree if d == 0 (no EL
9742        * emissions) even when local ends are off! (This is also true
9743        * in non-truncated HMM banded OA, see the note in
9744        * cm_dpalign.c:cm_alignT_hb() regarding the special case
9745        * involving allow_S_local_end'.) This means we don't fail
9746        * if local ends are off and we see EL unless we see that we've
9747        * emitted >= 1 residues from EL.
9748        */
9749       if(mode == TRMODE_J || mode == TRMODE_L || mode == TRMODE_R) {
9750 	for(r = i; r <= j; r++) { /* we have to annotate from residues i..j */
9751 	  if(! have_el) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB() using EL state to emit residue %d, but ELs are turned off!\n", r);
9752 	  /* remember the EL deck is non-banded */
9753 	  switch (mode) {
9754 	  case TRMODE_J: cur_log_pp = emit_mx->Jl_pp[v][r]; break;
9755 	  case TRMODE_L: cur_log_pp = emit_mx->Ll_pp[v][r]; break;
9756 	  case TRMODE_R: cur_log_pp = emit_mx->Rr_pp[v][r]; break;
9757 	  }
9758 	  ppstr[r-1] = Fscore2postcode(cur_log_pp);
9759 	  sum_logp   = FLogsum(sum_logp, cur_log_pp);
9760 	  /* make sure we've got a valid probability */
9761 	  p = FScore2Prob(cur_log_pp, 1.);
9762 	  if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): probability for EL state v: %d residue r: %d > 1.00 (%.2f)", v, r, p);
9763 	  if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): probability for EL state v: %d residue r: %d < 0.00 (%.2f)", v, r, p);
9764 	}
9765       }
9766       else ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): invalid mode for EL state in the parsetree: %d\n", mode);
9767     }
9768     if(cm->sttype[v] == MP_st || cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
9769       if(mode == TRMODE_J || mode == TRMODE_L) {
9770 	ip_v = i - imin[v];
9771 	assert(i >= imin[v] && i <= imax[v]);
9772 	ESL_DASSERT1((i >= imin[v] && i <= imax[v]));
9773 	cur_log_pp = (mode == TRMODE_J) ? emit_mx->Jl_pp[v][ip_v] : emit_mx->Ll_pp[v][ip_v];
9774 	ppstr[i-1] = Fscore2postcode(cur_log_pp);
9775 	sum_logp   = FLogsum(sum_logp, cur_log_pp);
9776 	/* make sure we've got a valid probability */
9777 	p = FScore2Prob(cur_log_pp, 1.);
9778 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): probability for left state v: %d residue i: %d > 1.00 (%.2f)", v, i, p);
9779 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): probability for left state v: %d residue i: %d < 0.00 (%.2f)", v, i, p);
9780       }
9781       else if(mode != TRMODE_R) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): non-sensical mode for MP, ML, IL state: %d", mode);
9782     }
9783     if(cm->sttype[v] == MP_st || cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
9784       if(mode == TRMODE_J || mode == TRMODE_R) {
9785 	jp_v = j - jmin[v];
9786 	assert(j >= jmin[v] && j <= jmax[v]);
9787 	ESL_DASSERT1((j >= jmin[v] && j <= jmax[v]));
9788 	cur_log_pp = (mode == TRMODE_J) ? emit_mx->Jr_pp[v][jp_v] : emit_mx->Rr_pp[v][jp_v];
9789 	ppstr[j-1] = Fscore2postcode(cur_log_pp);
9790 	sum_logp   = FLogsum(sum_logp, cur_log_pp);
9791 	/* make sure we've got a valid probability */
9792 	p = FScore2Prob(cur_log_pp, 1.);
9793 	if(p >  1.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): probability for right state v: %d residue i: %d > 1.00 (%.2f)", v, j, p);
9794 	if(p < -0.01) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCodeHB(): probability for right state v: %d residue i: %d < 0.00 (%.2f)", v, j, p);
9795       }
9796       else if(mode != TRMODE_L) ESL_FAIL(eslEINVAL, errbuf, "cm_TrPostCode(): non-sensical mode for MP, MR, IR state: %d", mode);
9797     }
9798   }
9799   ppstr[L] = '\0';
9800 
9801   /*printf("cm_TrPostCodeHB() return avgpp: %f\n", sreEXP2(sum_logp) / (float) L);*/
9802   ESL_DPRINTF1(("#DEBUG: cm_TrPostCodeHB() return avgpp: %f\n", sreEXP2(sum_logp) / (float) L));
9803 
9804   if(ret_ppstr != NULL) *ret_ppstr = ppstr; else free(ppstr);
9805   if(ret_avgp  != NULL) *ret_avgp  = sreEXP2(sum_logp) / (float) L;
9806   return eslOK;
9807 
9808  ERROR:
9809   ESL_FAIL(eslEMEM, errbuf, "cm_TrPostcodeHB(): Memory allocation error.");
9810   return status; /* never reached */
9811 }
9812 
9813 
9814 /* Function: cm_TrFillFromMode()
9815  * Date:     EPN, Wed Sep 28 05:29:19 2011
9816  *
9817  * Purpose: Given an optimal marginal alignment mode
9818  *          (could be TRMODE_UNKNOWN), determine which
9819  *          of the marginal matrices we need to fill
9820  *          in to find the alignment in that mode.
9821  *
9822  *          If mode == TRMODE_J: fill J matrix only
9823  *          If mode == TRMODE_L: fill J and L matrices only
9824  *          If mode == TRMODE_R: fill J and R  matrices only
9825  *          If mode == TRMODE_T: fill J, L, R, and T matrices
9826  *          If mode == TRMODE_UNKNOWN: fill J, L, R, and T matrices
9827  *
9828  *          Return TRUE/FALSE values in <ret_fill_{L,R,T}>.
9829  *          Note that we always must fill in J matrices so a fill_J
9830  *          value is unnecessary, it's implicitly true.
9831  *
9832  * Args:     mode       - optimal mode
9833  *           ret_fill_L - RETURN: should we fill in L based on <ret_mode>?
9834  *           ret_fill_R - RETURN: should we fill in R based on <ret_mode>?
9835  *           ret_fill_T - RETURN: should we fill in T based on <ret_mode>?
9836  *
9837  * Throws:   eslEINVAL if mode is not TRMODE_J, TRMODE_L, TRMODE_R, TRMODE_T nor TRMODE_UNKNOWN.
9838  */
9839 int
cm_TrFillFromMode(char mode,int * ret_fill_L,int * ret_fill_R,int * ret_fill_T)9840 cm_TrFillFromMode(char mode, int *ret_fill_L, int *ret_fill_R, int *ret_fill_T)
9841 {
9842   int fill_L, fill_R, fill_T;
9843   int invalid_mode = FALSE;
9844 
9845   fill_L = fill_R = fill_T = FALSE;
9846   switch(mode) {
9847   case TRMODE_J:
9848     break;
9849   case TRMODE_L:
9850     fill_L = TRUE;
9851     break;
9852   case TRMODE_R:
9853     fill_R = TRUE;
9854     break;
9855   case TRMODE_T:
9856   case TRMODE_UNKNOWN:
9857     fill_L = fill_R = fill_T = TRUE;
9858     break;
9859   default:
9860     invalid_mode = TRUE;
9861     break;
9862   }
9863 
9864   if(ret_fill_L != NULL) *ret_fill_L = fill_L;
9865   if(ret_fill_R != NULL) *ret_fill_R = fill_R;
9866   if(ret_fill_T != NULL) *ret_fill_T = fill_T;
9867 
9868   if(invalid_mode) return eslEINVAL;
9869   return eslOK;
9870 }
9871 
9872 /*****************************************************************
9873  * Benchmark driver
9874  *****************************************************************/
9875 #ifdef IMPL_TRUNC_ALIGN_BENCHMARK
9876 /* Next line is optimized (debugging on) on MacBook Pro:
9877  * gcc   -o benchmark-trunc-align -std=gnu99 -g -Wall -I. -L. -I../hmmer/src -L../hmmer/src -I../easel -L../easel -DIMPL_TRUNC_ALIGN_BENCHMARK cm_dpalign_trunc.c -linfernal -lhmmer -leasel -lm
9878  * Next line is optimized (debugging not on) on wyvern:
9879  * gcc   -o benchmark-trunc-align -std=gnu99 -O3 -fomit-frame-pointer -malign-double -fstrict-aliasing -pthread -I. -L. -I../hmmer/src -L../hmmer/src -I../easel -L../easel -DIMPL_TRUNC_ALIGN_BENCHMARK cm_dpalign_trunc.c -linfernal -lhmmer -leasel -lm
9880  * ./benchmark-trunc-align <cmfile>
9881  */
9882 
9883 #include "esl_config.h"
9884 #include "p7_config.h"
9885 #include "config.h"
9886 
9887 #include <stdio.h>
9888 #include <stdlib.h>
9889 #include <string.h>
9890 #include <time.h>
9891 
9892 #include "easel.h"
9893 #include <esl_getopts.h>
9894 #include <esl_histogram.h>
9895 #include <esl_sqio.h>
9896 #include <esl_stats.h>
9897 #include <esl_stopwatch.h>
9898 #include <esl_vectorops.h>
9899 #include <esl_wuss.h>
9900 
9901 #include "hmmer.h"
9902 
9903 #include "infernal.h"
9904 
9905 static ESL_OPTIONS options[] = {
9906   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
9907   { "-h",        eslARG_NONE,    NULL, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",           0 },
9908   { "-l",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "configure CM/HMM for local alignment", 0 },
9909   { "--cykout",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL,"--optacc", "run TrCYKOutside, to make sure it agrees with TrCYKInside", 0 },
9910   { "--std",     eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "also do standard (non-truncated) alignments",    0},
9911   { "--orig",    eslARG_NONE,   FALSE, NULL, NULL,  NULL,"--search", NULL, "also do search with original trCYK",         0},
9912   { "--hb",      eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "also do HMM banded alignments",                   0},
9913   { "--failok",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "allow failures of Inside vs Outside checks",      0},
9914   { "--search",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "also run search algorithms",                   0},
9915   { "--noqdb",   eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "don't use QDBs", 0},
9916   { "--sums",    eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "use posterior sums during HMM band calculation (widens bands)", 0},
9917   { "--onlyhb",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "only run HMM banded scanning trCYK", 0},
9918   { "--optacc",  eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "run optimal accuracy alignment instead of CYK", 0},
9919   { "--compacc", eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "run optimal accuracy and CYK and ensure OA avg pp >= CYK avg pp", 0},
9920   { "--tau",     eslARG_REAL,   "5e-6",NULL, "0<x<1",NULL, NULL, NULL, "set tail loss prob for HMM bands to <x>", 0},
9921   { "--cp9noel", eslARG_NONE,   FALSE, NULL, NULL,  NULL,  "-l", NULL,         "turn OFF local ends in cp9 HMMs", 0},
9922   { "--cp9gloc", eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, "--cp9noel",  "configure CP9 HMM in glocal mode", 0},
9923   { "--thresh1", eslARG_REAL,  "0.01", NULL, NULL,  NULL,  NULL,  NULL, "set HMM bands thresh1 to <x>", 0},
9924   { "--thresh2", eslARG_REAL,  "0.99", NULL, NULL,  NULL,  NULL,  NULL, "set HMM bands thresh2 to <x>", 0},
9925   { "--mxsize",  eslARG_REAL, "128.", NULL, "x>0", NULL,  NULL,  NULL, "set maximum allowed size of HB matrices to <x> Mb", 0},
9926   { "--tr",      eslARG_NONE,  FALSE,  NULL, NULL,  NULL,  NULL,  NULL, "dump parsetrees to stdout", 0},
9927   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
9928 };
9929 static char usage[]  = "[-options] <cmfile> <seqfile>";
9930 static char banner[] = "benchmark driver for truncated alignment implementations";
9931 
9932 int
main(int argc,char ** argv)9933 main(int argc, char **argv)
9934 {
9935   int                status;
9936   ESL_GETOPTS       *go      = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
9937   CM_t              *cm;
9938   ESL_STOPWATCH     *w       = esl_stopwatch_Create();
9939   ESL_ALPHABET      *abc     = NULL;
9940   ESL_DSQ           *dsq;
9941   int                i;
9942   float              sc, sc_oa, sc_cyk;
9943   float              pp_oa, pp_cyk;
9944   char              *cmfile  = esl_opt_GetArg(go, 1);
9945   char              *seqfile = esl_opt_GetArg(go, 2);
9946   CM_FILE           *cmfp;	/* open input CM file stream */
9947   int                L;         /* length of sequence */
9948   char               errbuf[eslERRBUFSIZE];
9949   int                pass_idx  = PLI_PASS_5P_AND_3P_ANY; /* this only affects the truncation penalty, not really impt */
9950   ESL_SQFILE        *sqfp      = NULL;        /* open sequence input file stream */
9951   ESL_SQ            *sq        = NULL;  /* a sequence */
9952   Parsetree_t       *tr        = NULL;
9953   float              size_limit = esl_opt_GetReal(go, "--mxsize");
9954   float              save_tau, save_cp9b_thresh1, save_cp9b_thresh2;
9955   float              hbmx_Mb, trhbmx_Mb;
9956   float              parsetree_sc, parsetree_struct_sc;
9957   char               mode;
9958   int                qdbidx;
9959   int                do_optacc = (   esl_opt_GetBoolean(go, "--optacc")  || esl_opt_GetBoolean(go, "--compacc")) ? TRUE : FALSE;
9960   int                do_cyk    = ((! esl_opt_GetBoolean(go, "--optacc")) || esl_opt_GetBoolean(go, "--compacc")) ? TRUE : FALSE;
9961   int                do_compacc = esl_opt_GetBoolean(go, "--compacc") ? TRUE : FALSE;
9962   char              *ppstr = NULL; /* just so cm_*Align*() will return avg PP for CYK */
9963 
9964   /* open CM file */
9965   if ((status = cm_file_Open(cmfile, NULL, FALSE, &(cmfp), errbuf)) != eslOK) cm_Fail(errbuf);
9966   if ((status = cm_file_Read(cmfp, TRUE, &abc, &cm))                != eslOK) cm_Fail(cmfp->errbuf);
9967   cm_file_Close(cmfp);
9968 
9969   /* open the sequence file */
9970   status = esl_sqfile_OpenDigital(cm->abc, seqfile, eslSQFILE_UNKNOWN, NULL, &sqfp);
9971   if (status == eslENOTFOUND)    esl_fatal("File %s doesn't exist or is not readable\n", seqfile);
9972   else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of sequence file %s\n", seqfile);
9973   else if (status == eslEINVAL)  esl_fatal("Can't autodetect stdin or .gz.");
9974   else if (status != eslOK)      esl_fatal("Sequence file open failed with error %d.\n", status);
9975 
9976   cm->config_opts |= CM_CONFIG_TRUNC;
9977   cm->align_opts  |= CM_ALIGN_HBANDED;
9978   if(esl_opt_GetBoolean(go, "--sums")) cm->align_opts |= CM_ALIGN_SUMS;
9979 
9980   if(esl_opt_GetBoolean(go, "-l")) {
9981     cm->config_opts |= CM_CONFIG_LOCAL;
9982     if(! esl_opt_GetBoolean(go, "--cp9gloc")) {
9983       cm->config_opts |= CM_CONFIG_HMMLOCAL;
9984       if(! esl_opt_GetBoolean(go, "--cp9noel")) cm->config_opts |= CM_CONFIG_HMMEL;
9985     }
9986   }
9987   if(esl_opt_GetBoolean(go, "--search")) {
9988     cm->config_opts |= CM_CONFIG_SCANMX;
9989     cm->config_opts |= CM_CONFIG_TRSCANMX;
9990   }
9991 
9992   cm->align_opts |= CM_ALIGN_CHECKINOUT;
9993 
9994   if((status = cm_Configure(cm, errbuf, -1)) != eslOK) cm_Fail(errbuf);
9995 
9996   /* setup logsum lookups (could do this only if nec based on options, but this is safer) */
9997   init_ilogsum();
9998   FLogsumInit();
9999 
10000   /* create nonbanded matrices if nec */
10001   if(esl_opt_GetBoolean(go, "--std")) {
10002     cm->nb_mx   = cm_mx_Create(cm->M);
10003     cm->nb_omx  = cm_mx_Create(cm->M);
10004     cm->nb_shmx = cm_shadow_mx_Create(cm);
10005     cm->nb_emx  = cm_emit_mx_Create(cm);
10006   }
10007 
10008   if (esl_opt_IsUsed(go, "--thresh1")) { cm->cp9b->thresh1 = esl_opt_GetReal(go, "--thresh1"); }
10009   if (esl_opt_IsUsed(go, "--thresh2")) { cm->cp9b->thresh2 = esl_opt_GetReal(go, "--thresh2"); }
10010 
10011   if (esl_opt_GetBoolean(go, "--noqdb")) {
10012     cm->search_opts |= CM_SEARCH_NONBANDED; /* don't use QDB to search */
10013     qdbidx = SMX_NOQDB;
10014   }
10015   else {
10016     qdbidx = SMX_QDB1_TIGHT;
10017   }
10018 
10019   cm->tau = esl_opt_GetReal(go, "--tau");
10020 
10021   if(! esl_opt_GetBoolean(go, "--onlyhb")) {
10022     printf("%-30s", "Creating tr matrix...");
10023     fflush(stdout);
10024     esl_stopwatch_Start(w);
10025     cm->trnb_mx   = cm_tr_mx_Create(cm);
10026     cm->trnb_omx  = cm_tr_mx_Create(cm);
10027     cm->trnb_emx  = cm_tr_emit_mx_Create(cm);
10028     cm->trnb_shmx = cm_tr_shadow_mx_Create(cm);
10029     printf("done.  ");
10030     fflush(stdout);
10031     esl_stopwatch_Stop(w);
10032     esl_stopwatch_Display(stdout, w, " CPU time: ");
10033   }
10034 
10035   save_tau = cm->tau;
10036   save_cp9b_thresh1 = cm->cp9b->thresh1;
10037   save_cp9b_thresh2 = cm->cp9b->thresh2;
10038 
10039   i = 0;
10040   sq = esl_sq_CreateDigital(cm->abc);
10041   while((status = esl_sqio_Read(sqfp, sq)) == eslOK) {
10042     i++;
10043     L = sq->n;
10044     dsq = sq->dsq;
10045     cm->search_opts &= ~CM_SEARCH_INSIDE;
10046 
10047     cm->tau = save_tau;
10048     cm->cp9b->thresh1 = save_cp9b_thresh1;
10049     cm->cp9b->thresh2 = save_cp9b_thresh2;
10050 
10051     cm->align_opts  |= CM_ALIGN_HBANDED;
10052 
10053     /* 1. non-banded truncated alignment, unless --onlyhb
10054      * 2. non-banded standard  alignment, if requested
10055      * 3. HMM banded truncated alignment, if requested
10056      * 4. HMM banded standard  alignment, if requested
10057      * 5. non-banded truncated search,    if requested
10058      * 6. non-banded standard  search,    if requested
10059      * 7. HMM banded truncated search,    if requested
10060      * 8. HMM banded standard  search,    if requested
10061      */
10062 
10063     /* 1. non-banded truncated alignment, unless --onlyhb */
10064     if(! esl_opt_GetBoolean(go, "--onlyhb")) {
10065       /*********************Begin cm_TrAlign****************************/
10066       if(do_optacc) {
10067 	esl_stopwatch_Start(w);
10068 	if((status = cm_TrAlign(cm, errbuf, dsq, L, size_limit, TRMODE_UNKNOWN, pass_idx, TRUE, FALSE, cm->trnb_mx, cm->trnb_shmx, cm->trnb_omx, cm->trnb_emx, NULL, &ppstr, &tr, &mode, &pp_oa, &sc_oa)) != eslOK) cm_Fail(errbuf);
10069 	printf("%4d %-30s %10.4f PP (mode: %s)  (FULL LENGTH OPTACC)\n", i, "cm_TrAlign(): ", pp_oa, MarginalMode(mode));
10070 	esl_stopwatch_Stop(w);
10071 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10072 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10073 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10074 	FreeParsetree(tr);
10075 	free(ppstr); ppstr = NULL;
10076 	printf("Parsetree score      : %.4f           (FULL LENGTH OPTACC)\n", parsetree_sc);
10077       }
10078       if(do_cyk) {
10079 	esl_stopwatch_Start(w);
10080 	if((status = cm_TrAlign(cm, errbuf, dsq, L, size_limit, TRMODE_UNKNOWN, pass_idx, FALSE, FALSE, cm->trnb_mx, cm->trnb_shmx, cm->trnb_omx, cm->trnb_emx, NULL,
10081 				(do_compacc) ? &ppstr : NULL,
10082 				&tr, &mode, &pp_cyk, &sc_cyk)) != eslOK) cm_Fail(errbuf);
10083 	printf("%4d %-30s %10.4f pp %10.4f bits (mode: %s) (FULL LENGTH CYK)\n", i, "cm_TrAlign(): ", pp_cyk, sc_cyk, MarginalMode(mode));
10084 	esl_stopwatch_Stop(w);
10085 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10086 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10087 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10088 	FreeParsetree(tr);
10089 	if(ppstr != NULL) { free(ppstr); ppstr = NULL; }
10090 	printf("Parsetree score      : %.4f           (FULL LENGTH CYK)\n", parsetree_sc);
10091       }
10092       if(do_compacc) {
10093 	if((pp_oa - pp_cyk) < -0.0001) cm_Fail("ERROR OA PP: %f < CYK PP: %f\n", pp_oa, pp_cyk);
10094       }
10095       /*********************End cm_TrAlign****************************/
10096 
10097       if(esl_opt_GetBoolean(go, "--cykout")) {
10098 	/*********************Begin cm_TrCYKOutsideAlign****************************/
10099 	esl_stopwatch_Start(w);
10100 	status = cm_TrCYKOutsideAlign(cm, errbuf, dsq,  L, size_limit, mode, pass_idx, TRUE, cm->trnb_omx, cm->trnb_mx);
10101 	if     (status != eslOK && esl_opt_GetBoolean(go, "--failok")) printf("%s\nError detected, but continuing thanks to --failok\n", errbuf);
10102 	else if(status != eslOK)                                       cm_Fail(errbuf);
10103 	printf("%4d %-30s %10s bits ", i, "cm_TrCYKOutsideAlign() CYK:", "?");
10104 	esl_stopwatch_Stop(w);
10105 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10106 	/*********************End cm_TrCYKOutsideAlign****************************/
10107       }
10108 
10109       /*********************Begin cm_TrInsideAlign()****************************/
10110       if((status = cm_TrInsideAlign(cm, errbuf, dsq, L, size_limit, TRMODE_UNKNOWN, pass_idx, cm->trnb_mx, NULL, &sc)) != eslOK) cm_Fail(errbuf);
10111       printf("%4d %-30s %10.4f bits (FULL LENGTH INSIDE)", i, "cm_TrInsideAlign(): ", sc);
10112       esl_stopwatch_Stop(w);
10113       esl_stopwatch_Display(stdout, w, " CPU time: ");
10114       /*********************End cm_TrInsideAlign*****************************/
10115     }
10116 
10117     /* 2. non-banded standard (non-truncated) alignment, if requested */
10118     if(esl_opt_GetBoolean(go, "--std") && (! esl_opt_GetBoolean(go, "--onlyhb"))) {
10119       /*********************Begin cm_Align()****************************/
10120       if(do_optacc) {
10121 	esl_stopwatch_Start(w);
10122 	if((status = cm_Align  (cm, errbuf, dsq, L, size_limit, TRUE, FALSE, cm->nb_mx, cm->nb_shmx, cm->nb_omx, cm->nb_emx, NULL, &ppstr, &tr, &pp_oa, &sc_oa)) != eslOK) return status;
10123 	esl_stopwatch_Stop(w);
10124 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10125 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10126 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10127 	mode = ParsetreeMode(tr);
10128 	FreeParsetree(tr);
10129 	free(ppstr); ppstr = NULL;
10130 	printf("%4d %-30s %10.4f pp   (FULL LENGTH OPTACC)\n", i, "cm_Align(): ", pp_oa);
10131 	printf("Parsetree score      : %.4f           (FULL LENGTH OPTACC)\n", parsetree_sc);
10132       }
10133       if(do_cyk) {
10134 	esl_stopwatch_Start(w);
10135 	if((status = cm_Align  (cm, errbuf, dsq, L, size_limit, FALSE, FALSE, cm->nb_mx, cm->nb_shmx, cm->nb_omx, cm->nb_emx, NULL,
10136 				(do_compacc) ? &ppstr : NULL,
10137 				&tr, &pp_cyk, &sc_cyk)) != eslOK) return status;
10138 	esl_stopwatch_Stop(w);
10139 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10140 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10141 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10142 	mode = ParsetreeMode(tr);
10143 	FreeParsetree(tr);
10144 	if(ppstr != NULL) { free(ppstr); ppstr = NULL; }
10145 	printf("%4d %-30s %10.4f pp %10.4f bits  (FULL LENGTH OPTACC)\n", i, "cm_Align(): ", pp_cyk, sc_cyk);
10146       	printf("Parsetree score      : %.4f           (FULL LENGTH CYK)\n", parsetree_sc);
10147       }
10148       if(do_compacc) {
10149 	if((pp_oa - pp_cyk) < -0.0001) cm_Fail("ERROR OA PP: %f < CYK PP: %f\n", pp_oa, pp_cyk);
10150       }
10151       /*********************End cm_Align*****************************/
10152 
10153       if(esl_opt_GetBoolean(go, "--cykout")) {
10154 	/*********************Begin cm_CYKOutsideAlign****************************/
10155 	esl_stopwatch_Start(w);
10156 	status = cm_CYKOutsideAlign(cm, errbuf, dsq, L, size_limit, TRUE, cm->nb_omx, cm->nb_mx, &sc);
10157 	if     (status != eslOK && esl_opt_GetBoolean(go, "--failok")) printf("%s\nError detected, but continuing thanks to --failok\n", errbuf);
10158 	else if(status != eslOK)                                       cm_Fail(errbuf);
10159 	printf("%4d %-30s %10.4f bits ", i, "cm_CYKOutsideAlign() CYK:", sc);
10160 	esl_stopwatch_Stop(w);
10161 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10162 	/*********************End cm_CYKOutsideAlign****************************/
10163       }
10164 
10165 
10166       /*********************Begin cm_InsideAlign()****************************/
10167       esl_stopwatch_Start(w);
10168       if((status = cm_InsideAlign (cm, errbuf, dsq, L, size_limit, cm->nb_mx, &sc)) != eslOK) return status;
10169       printf("%4d %-30s %10.4f bits (FULL LENGTH INSIDE)", i, "cm_InsideAlign(): ", sc);
10170       esl_stopwatch_Stop(w);
10171       esl_stopwatch_Display(stdout, w, " CPU time: ");
10172       /*********************End cm_InsideAlign*****************************/
10173 
10174       /*********************Begin cm_OutsideAlign*****************************/
10175       esl_stopwatch_Start(w);
10176       if((status = cm_OutsideAlign(cm, errbuf, dsq, L, size_limit, TRUE, cm->nb_omx, cm->nb_mx, &sc)) != eslOK) return status;
10177       printf("%4d %-30s %10.4f bits (FULL LENGTH OUTSIDE)", i, "cm_OutsideAlign(): ", sc);
10178       esl_stopwatch_Stop(w);
10179       esl_stopwatch_Display(stdout, w, " CPU time: ");
10180       /*********************End cm_OutsideAlign*****************************/
10181       if((status = cm_Posterior(cm, errbuf, L, size_limit, cm->nb_mx, cm->nb_omx, cm->nb_omx)) != eslOK) return status;
10182     }
10183 
10184     /* 3. HMM banded truncated alignment, if requested */
10185     if(esl_opt_GetBoolean(go, "--hb") || esl_opt_GetBoolean(go, "--onlyhb")) {
10186       /*********************Begin cm_TrAlignHB()****************************/
10187       esl_stopwatch_Start(w);
10188       /* Calculate HMM bands. We'll tighten tau and recalculate bands until
10189        * the resulting HMM banded matrix is under our size limit.
10190        */
10191       cm->tau = save_tau;
10192       while(1) {
10193 	if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, dsq, 1, L, cm->cp9b,
10194 				   FALSE, /* doing search? */
10195 				   pass_idx, 0)) != eslOK) cm_Fail(errbuf);
10196 	if((status = cm_tr_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, NULL, NULL, NULL, &trhbmx_Mb)) != eslOK) return status;
10197 	if(trhbmx_Mb < size_limit) break; /* our matrix will be small enough, break out of while(1) */
10198 	if(cm->tau > 0.01)         cm_Fail("tau reached limit, unable to create matrix smaller than size limit of %.2f Mb\n", size_limit);
10199 	printf("TrCYK 0 tau: %10g  thresh1: %10g  thresh2: %10g  trhbmx_Mb: %10.2f\n", cm->tau, cm->cp9b->thresh1, cm->cp9b->thresh2, trhbmx_Mb);
10200 	cm->tau *= 2.;
10201 	cm->cp9b->thresh1 *= 2.;
10202 	cm->cp9b->thresh2 -= (1.0-cm->cp9b->thresh2);
10203 	cm->cp9b->thresh1 = ESL_MIN(0.25, cm->cp9b->thresh1);
10204 	cm->cp9b->thresh2 = ESL_MAX(0.25, cm->cp9b->thresh2);
10205       }
10206       printf("TrCYK 1 tau: %10g  thresh1: %10g  thresh2: %10g  trhbmx_Mb: %10.2f\n", cm->tau, cm->cp9b->thresh1, cm->cp9b->thresh2, trhbmx_Mb);
10207       esl_stopwatch_Stop(w);
10208       printf("%4d %-30s %17s", i, "HMM Band calc:", "");
10209       esl_stopwatch_Display(stdout, w, "CPU time: ");
10210 
10211       /*PrintDPCellsSaved_jd(cm, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, L);*/
10212 
10213 
10214       if(do_optacc) {
10215 	esl_stopwatch_Start(w);
10216 	if((status = cm_TrAlignHB(cm, errbuf, dsq, L, size_limit, TRMODE_UNKNOWN, pass_idx, TRUE, FALSE, cm->trhb_mx, cm->trhb_shmx, cm->trhb_omx, cm->trhb_emx, NULL, &ppstr, &tr, &mode, &pp_oa, &sc_oa)) != eslOK) cm_Fail(errbuf);
10217 	printf("%4d %-30s %10.4f PP  (mode: %s)  (FULL LENGTH OPTACC)", i, "cm_TrAlignHB(): ", pp_oa, MarginalMode(mode));
10218 	esl_stopwatch_Stop(w);
10219 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10220 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10221 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10222 	mode = ParsetreeMode(tr);
10223 	FreeParsetree(tr);
10224 	free(ppstr); ppstr = NULL;
10225 	printf("Parsetree score      : %.4f           (FULL LENGTH OPTACC)\n", parsetree_sc);
10226       }
10227       if(do_cyk) {
10228 	esl_stopwatch_Start(w);
10229 	if((status = cm_TrAlignHB(cm, errbuf, dsq, L, size_limit, TRMODE_UNKNOWN, pass_idx, FALSE, FALSE, cm->trhb_mx, cm->trhb_shmx, cm->trhb_omx, cm->trhb_emx, NULL,
10230 				  (do_compacc) ? &ppstr : NULL,
10231 				  &tr, &mode, &pp_cyk, &sc_cyk)) != eslOK) cm_Fail(errbuf);
10232 	printf("%4d %-30s %10.4f pp %10.4f bits (mode: %s)  (FULL LENGTH CYK)", i, "cm_TrAlignHB(): ", pp_cyk, sc_cyk, MarginalMode(mode));
10233 	esl_stopwatch_Stop(w);
10234 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10235 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10236 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10237 	mode = ParsetreeMode(tr);
10238 	FreeParsetree(tr);
10239 	if(ppstr != NULL) { free(ppstr); ppstr = NULL; }
10240 	printf("Parsetree score      : %.4f           (FULL LENGTH CYK)\n", parsetree_sc);
10241       }
10242       if(do_compacc) {
10243 	if((pp_oa - pp_cyk) < -0.0001) cm_Fail("ERROR OA PP: %f < CYK PP: %f\n", pp_oa, pp_cyk);
10244       }
10245       /*********************End cm_TrAlignHB*****************************/
10246 
10247       if(esl_opt_GetBoolean(go, "--cykout")) {
10248 	/*********************Begin cm_TrCYKOutsideAlignHB****************************/
10249 	esl_stopwatch_Start(w);
10250 	status = cm_TrCYKOutsideAlignHB(cm, errbuf, dsq, L, size_limit, mode, pass_idx, TRUE, cm->trhb_omx, cm->trhb_mx);
10251 	if     (status != eslOK && esl_opt_GetBoolean(go, "--failok")) printf("%s\nError detected, but continuing thanks to --failok\n", errbuf);
10252 	else if(status != eslOK)                                       cm_Fail(errbuf);
10253 	printf("%4d %-30s %10s bits ", i, "cm_TrCYKOutsideAlignHB() CYK:", "?");
10254 	esl_stopwatch_Stop(w);
10255 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10256 	/*********************End cm_TrCYKOutsideAlignHB****************************/
10257       }
10258 
10259       /*********************Begin cm_TrInsideAlignHB()****************************/
10260       esl_stopwatch_Start(w);
10261       if((status = cm_TrInsideAlignHB(cm, errbuf, dsq, L, size_limit, TRMODE_UNKNOWN, pass_idx, cm->trhb_mx, NULL, &sc)) != eslOK) cm_Fail(errbuf);
10262       printf("%4d %-30s %10.4f bits (FULL LENGTH INSIDE)", i, "cm_TrInsideAlignHB(): ", sc);
10263       esl_stopwatch_Stop(w);
10264       esl_stopwatch_Display(stdout, w, " CPU time: ");
10265       /*********************End cm_TrInsideAlignHB*****************************/
10266     }
10267 
10268     /* 4. HMM banded standard alignment, if requested */
10269     if(esl_opt_GetBoolean(go, "--std") && (esl_opt_GetBoolean(go, "--hb") || esl_opt_GetBoolean(go, "--onlyhb"))) {
10270       /*********************Begin cm_AlignHB()***************************/
10271       esl_stopwatch_Start(w);
10272       while(1) {
10273 	if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, dsq, 1, L, cm->cp9b,
10274 				   FALSE,  /* doing search? */
10275 				   PLI_PASS_STD_ANY,  /* we are not allowing truncated alignments */
10276 				   0)) != eslOK) cm_Fail(errbuf);
10277 	if((status = cm_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, &hbmx_Mb)) != eslOK) return status;
10278 	if(hbmx_Mb < size_limit) break; /* our matrix will be small enough, break out of while(1) */
10279 	if(cm->tau > 0.01)         cm_Fail("tau reached limit, unable to create matrix smaller than size limit of %.2f Mb\n", size_limit);
10280 	printf("  CYK 0 tau: %10g  hbmx_Mb: %10.2f\n", cm->tau, hbmx_Mb);
10281 	cm->tau *= 2.;
10282       }
10283 
10284       esl_stopwatch_Stop(w);
10285       printf("%4d %-30s %17s", i+1, "HMM Band calc:", "");
10286       esl_stopwatch_Display(stdout, w, "CPU time: ");
10287 
10288       /*PrintDPCellsSaved_jd(cm, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, L);*/
10289 
10290       if(do_optacc) {
10291 	esl_stopwatch_Start(w);
10292 	if((status = cm_AlignHB(cm, errbuf, dsq, L, size_limit, TRUE, FALSE, cm->hb_mx, cm->hb_shmx, cm->hb_omx, cm->hb_emx, NULL, &ppstr, &tr, &pp_oa, &sc_oa)) != eslOK) cm_Fail(errbuf);
10293 	esl_stopwatch_Stop(w);
10294 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10295 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10296 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10297 	FreeParsetree(tr);
10298 	free(ppstr);
10299 	printf("%4d %-30s %10.4f pp   (FULL LENGTH OPTACC)\n", i, "cm_AlignHB(): ", pp_oa);
10300 	printf("Parsetree score      : %.4f           (FULL LENGTH OPTACC)\n", parsetree_sc);
10301       }
10302       if(do_cyk) {
10303 	if((status = cm_AlignHB(cm, errbuf, dsq, L, size_limit, FALSE, FALSE, cm->hb_mx, cm->hb_shmx, cm->hb_omx, cm->hb_emx, NULL,
10304 				(do_compacc) ? &ppstr : NULL, &tr, &pp_cyk, &sc_cyk)) != eslOK) cm_Fail(errbuf);
10305 	esl_stopwatch_Stop(w);
10306 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10307 	if(esl_opt_GetBoolean(go, "--tr")) ParsetreeDump(stdout, tr, cm, dsq);
10308 	ParsetreeScore(cm, NULL, NULL, tr, dsq, FALSE, &parsetree_sc, &parsetree_struct_sc, NULL, NULL, NULL);
10309 	FreeParsetree(tr);
10310 	if(ppstr != NULL) { free(ppstr); ppstr = NULL; }
10311 	printf("%4d %-30s %10.4f pp %10.4f bits (FULL LENGTH CYK)\n", i, "cm_AlignHB(): ", pp_cyk, sc_cyk);
10312 	printf("Parsetree score      : %.4f           (FULL LENGTH CYK)\n", parsetree_sc);
10313       }
10314       if(do_compacc) {
10315 	if((pp_oa - pp_cyk) < -0.0001) cm_Fail("ERROR OA PP: %f < CYK PP: %f\n", pp_oa, pp_cyk);
10316       }
10317       /*********************End cm_AlignHB()***************************/
10318     }
10319 
10320     if(esl_opt_GetBoolean(go, "--search")) {
10321       /* 5. non-banded truncated search, if requested */
10322       /*********************Begin RefTrCYKScan****************************/
10323       esl_stopwatch_Start(w);
10324       if((status = RefTrCYKScan(cm, errbuf, cm->trsmx, qdbidx, pass_idx, dsq, 1, L, 0., NULL, FALSE, 0., NULL, NULL, NULL, &mode, &sc)) != eslOK) cm_Fail(errbuf);
10325       printf("%4d %-30s %10.4f bits (mode: %s)", i, "RefTrCYKScan(): ", sc, MarginalMode(mode));
10326       esl_stopwatch_Stop(w);
10327       esl_stopwatch_Display(stdout, w, " CPU time: ");
10328       /*********************End RefTrCYKScan****************************/
10329 
10330       /*********************Begin RefITrInsideScan****************************/
10331       cm->search_opts |= CM_SEARCH_INSIDE;
10332       esl_stopwatch_Start(w);
10333       if((status = RefITrInsideScan(cm, errbuf, cm->trsmx, qdbidx, pass_idx, dsq, 1, L, 0., NULL, FALSE, 0., NULL, NULL, NULL, &mode, &sc)) != eslOK) cm_Fail(errbuf);
10334       printf("%4d %-30s %10.4f bits (mode: %s)", i, "RefITrInsideScan(): ", sc, MarginalMode(mode));
10335       esl_stopwatch_Stop(w);
10336       esl_stopwatch_Display(stdout, w, " CPU time: ");
10337       cm->search_opts &= ~CM_SEARCH_INSIDE;
10338       /*********************End RefITrInsideScan****************************/
10339 
10340       if(esl_opt_GetBoolean(go, "--orig")) {
10341 	/*********************Begin TrCYK_Inside****************************/
10342 	esl_stopwatch_Start(w);
10343 	sc = TrCYK_Inside(cm, dsq, L, 0, 1, L, pass_idx, FALSE, FALSE, NULL);
10344 	printf("%4d %-30s %10.4f bits ", i, "TrCYK_Inside():   ", sc);
10345 	esl_stopwatch_Stop(w);
10346 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10347 	/*********************End TrCYK_Inside****************************/
10348       }
10349 
10350       /* 6. non-banded standard search, if requested */
10351       /*********************Begin FastCYKScan****************************/
10352       esl_stopwatch_Start(w);
10353       if((status = FastCYKScan(cm, errbuf, cm->smx, qdbidx, dsq, 1, L, 0., NULL, FALSE, 0., NULL, NULL, NULL, &sc)) != eslOK) cm_Fail(errbuf);
10354       printf("%4d %-30s %10.4f bits ", i, "FastCYKScan(): ", sc);
10355       esl_stopwatch_Stop(w);
10356       esl_stopwatch_Display(stdout, w, " CPU time: ");
10357       /*********************End FastCYKScan****************************/
10358 
10359       /*********************Begin FastIInsideScan****************************/
10360       cm->search_opts |= CM_SEARCH_INSIDE;
10361       esl_stopwatch_Start(w);
10362       if((status = FastIInsideScan(cm, errbuf, cm->smx, qdbidx, dsq, 1, L, 0., NULL, FALSE, 0., NULL, NULL, NULL, &sc)) != eslOK) cm_Fail(errbuf);
10363       printf("%4d %-30s %10.4f bits ", i, "FastIInsideScan(): ", sc);
10364       esl_stopwatch_Stop(w);
10365       esl_stopwatch_Display(stdout, w, " CPU time: ");
10366       cm->search_opts &= ~CM_SEARCH_INSIDE;
10367       /*********************End RefITrInsideScan****************************/
10368 
10369       /* 7. HMM banded truncated search, if requested */
10370       if(esl_opt_GetBoolean(go, "--hb") || esl_opt_GetBoolean(go, "--onlyhb")) {
10371 	/*********************Begin TrCYKScanHB****************************/
10372 	esl_stopwatch_Start(w);
10373 	/* Calculate HMM bands. We'll tighten tau and recalculate bands until
10374 	 * the resulting HMM banded matrix is under our size limit.
10375 	 */
10376 	cm->tau = save_tau;
10377 	while(1) {
10378 	  if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, dsq, 1, L, cm->cp9b,
10379 				     TRUE,  /* doing search? */
10380 				     pass_idx, 0)) != eslOK) cm_Fail(errbuf);
10381 	  if((status = cm_tr_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, NULL, NULL, NULL, &trhbmx_Mb)) != eslOK) return status;
10382 	  if(trhbmx_Mb < size_limit) break; /* our matrix will be small enough, break out of while(1) */
10383 	  if(cm->tau > 0.01)         cm_Fail("tau reached limit, unable to create matrix smaller than size limit of %.2f Mb\n", size_limit);
10384 	  printf("TrCYK 0 tau: %10g  thresh1: %10g  thresh2: %10g  trhbmx_Mb: %10.2f\n", cm->tau, cm->cp9b->thresh1, cm->cp9b->thresh2, trhbmx_Mb);
10385 	  cm->tau *= 2.;
10386 	  cm->cp9b->thresh1 *= 2.;
10387 	  cm->cp9b->thresh2 -= (1.0-cm->cp9b->thresh2);
10388 	  cm->cp9b->thresh1 = ESL_MIN(0.25, cm->cp9b->thresh1);
10389 	  cm->cp9b->thresh2 = ESL_MAX(0.25, cm->cp9b->thresh2);
10390 	}
10391 	printf("TrCYK 1 tau: %10g  thresh1: %10g  thresh2: %10g  trhbmx_Mb: %10.2f\n", cm->tau, cm->cp9b->thresh1, cm->cp9b->thresh2, trhbmx_Mb);
10392 	esl_stopwatch_Stop(w);
10393 	printf("%4d %-30s %17s", i+1, "HMM Band calc:", "");
10394 	esl_stopwatch_Display(stdout, w, "CPU time: ");
10395 
10396 	/*PrintDPCellsSaved_jd(cm, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, L);*/
10397 
10398 	esl_stopwatch_Start(w);
10399 	if((status = TrCYKScanHB(cm, errbuf, cm->trhb_mx, size_limit, pass_idx, dsq, 1, L, 0., NULL, FALSE, 0.,  NULL, NULL, &mode, &sc)) != eslOK) cm_Fail(errbuf);
10400 	printf("%4d %-30s %10.4f bits (mode: %s)", i, "TrCYKScanHB(): ", sc, MarginalMode(mode));
10401 	esl_stopwatch_Stop(w);
10402 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10403 	/*********************End TrCYKScanHB****************************/
10404 
10405 	/*********************Begin FTrInsideScanHB****************************/
10406 	esl_stopwatch_Start(w);
10407 	if((status = FTrInsideScanHB(cm, errbuf, cm->trhb_mx, size_limit, pass_idx, dsq, 1, L, 0., NULL, FALSE, 0.,  NULL, NULL, &mode, &sc)) != eslOK) cm_Fail(errbuf);
10408 	printf("%4d %-30s %10.4f bits (mode: %s)", i, "FTrInsideScanHB(): ", sc, MarginalMode(mode));
10409 	esl_stopwatch_Stop(w);
10410 	esl_stopwatch_Display(stdout, w, " CPU time: ");
10411 	/*********************End FTrInsideScanHB***********************/
10412 
10413 	/* 8. HMM banded standard search, if requested */
10414 	if(esl_opt_GetBoolean(go, "--std") && (esl_opt_GetBoolean(go, "--hb") || esl_opt_GetBoolean(go, "--onlyhb"))) {
10415 	  /*********************Begin FastCYKScanHB****************************/
10416 	  esl_stopwatch_Start(w);
10417 	  cm->tau = save_tau;
10418 	  while(1) {
10419 	    if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, dsq, 1, L, cm->cp9b,
10420 				       TRUE,  /* doing search? */
10421 				       PLI_PASS_STD_ANY,  /* we are not allowing truncated alignments */
10422 				       0)) != eslOK) cm_Fail(errbuf);
10423 	    if((status = cm_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, L, NULL, &hbmx_Mb)) != eslOK) return status;
10424 	    if(hbmx_Mb < size_limit) break; /* our matrix will be small enough, break out of while(1) */
10425 	    if(cm->tau > 0.01)         cm_Fail("tau reached limit, unable to create matrix smaller than size limit of %.2f Mb\n", size_limit);
10426 	    printf("  CYK 0 tau: %10g  hbmx_Mb: %10.2f\n", cm->tau, hbmx_Mb);
10427 	    cm->tau *= 2.;
10428 	  }
10429 
10430 	  esl_stopwatch_Stop(w);
10431 	  printf("%4d %-30s %17s", i+1, "HMM Band calc:", "");
10432 	  esl_stopwatch_Display(stdout, w, "CPU time: ");
10433 
10434 	  /*PrintDPCellsSaved_jd(cm, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, L);*/
10435 
10436 	  esl_stopwatch_Start(w);
10437 	  if((status = FastCYKScanHB(cm, errbuf, cm->hb_mx, size_limit, dsq, 1, L, 0., NULL, FALSE, 0., NULL, NULL, &sc)) != eslOK) cm_Fail(errbuf);
10438 	  printf("%4d %-30s %10.4f bits ", i, "FastCYKScanHB(): ", sc);
10439 	  esl_stopwatch_Stop(w);
10440 	  esl_stopwatch_Display(stdout, w, " CPU time: ");
10441 	  /*********************End FastCYKScanHB****************************/
10442 	}
10443       }
10444     }
10445     printf("\n");
10446     esl_sq_Reuse(sq);
10447   }
10448   if(status != eslEOF) cm_Fail("ERROR reading sequence file, sequence number %d\n", i);
10449 
10450   FreeCM(cm);
10451   esl_sq_Destroy(sq);
10452   esl_alphabet_Destroy(abc);
10453   esl_stopwatch_Destroy(w);
10454   esl_getopts_Destroy(go);
10455   esl_sqfile_Close(sqfp);
10456 
10457   return 0;
10458 }
10459 #endif /*IMPL_TRUNC_ALIGN_BENCHMARK*/
10460