1 /* hmmband.c
2  * EPN 12.16.05
3  *
4  * Functions to support deriving bands for a constrained CM
5  * parse of a target sequence using CM. Bands are derived
6  * from CM plan 9 HMM (CP9 HMM) Forward/Backward parses of
7  * the target.
8  */
9 
10 #include "esl_config.h"
11 #include "p7_config.h"
12 #include "config.h"
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <ctype.h>
18 #include <float.h>
19 #include <limits.h>
20 #include <math.h>
21 
22 #include "easel.h"
23 #include "esl_stack.h"
24 #include "esl_vectorops.h"
25 
26 #include "hmmer.h"
27 
28 #include "infernal.h"
29 
30 
31 static int          cp9_FB2HMMBands        (CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
32 				            int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level);
33 static int          cp9_FB2HMMBandsWithSums(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
34 					    int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level);
35 static void         cp9_Posterior(ESL_DSQ *dsq, int i0, int j0, CP9_t *hmm, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *mx, int did_fwd_scan);
36 static void         cp9_IFillPostSums(CP9_MX *post, CP9Bands_t *cp9, int i0, int j0);
37 static int          HMMBandsEnforceValidParse(CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, char *errbuf, int i0, int j0, int doing_search, int *ret_did_expand,
38 					      int **ret_r_mn, int **ret_r_mx, int **ret_r_in,  int **ret_r_ix, int **ret_r_dn, int **ret_r_dx,
39 					      int **ret_r_nn_i, int **ret_r_nx_i, int **ret_r_nn_j, int **ret_r_nx_j);
40 static int          HMMBandsFixUnreachable(CP9Bands_t *cp9b, char *errbuf, int k, int r_prv_min, int r_prv_max, int r_insert_prv_min);
41 static int          HMMBandsFillGap(CP9Bands_t *cp9b, char *errbuf, int k, int min1, int max1, int min2, int max2, int prv_nd_r_mn, int prv_nd_r_dn);
42 #if eslDEBUGLEVEL >= 1
43 static int          CMBandsCheckValidParse(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, int i0, int j0, int doing_search);
44 #endif
45 
46 /* EPN 10.28.06
47  * Function: AllocCP9Bands()
48  *
49  * Purpose:  Allocate the arrays needed for creating i and j
50  *           bands on a CM based on a CP9 parse. See infernal.h
51  *           for description of this structure.
52  *
53  * Args:
54  * cm_M                - number of states in the CM
55  * hmm_M               - number of nodes in the CP9 HMM for the CM
56  * Returns: (void)
57  *
58  */
59 
60 CP9Bands_t *
AllocCP9Bands(int cm_M,int hmm_M)61 AllocCP9Bands(int cm_M, int hmm_M)
62 {
63   int status;
64   CP9Bands_t  *cp9bands;
65 
66   ESL_ALLOC(cp9bands, sizeof(CP9Bands_t));
67 
68   cp9bands->cm_M  = cm_M;
69   cp9bands->hmm_M = hmm_M;
70 
71   cp9bands->sp1 = cp9bands->sp2 = cp9bands->ep1 = cp9bands->ep2 = -1;
72   cp9bands->thresh1          = DEFAULT_CP9BANDS_THRESH1;    /* 0.01 */
73   cp9bands->thresh2          = DEFAULT_CP9BANDS_THRESH2;    /* 0.98 */
74   cp9bands->Rmarg_imin = cp9bands->Lmarg_jmin = -1;
75   cp9bands->Rmarg_imax = cp9bands->Lmarg_jmax = -2;
76 
77   ESL_ALLOC(cp9bands->Jvalid, sizeof(int) * (cm_M+1));
78   ESL_ALLOC(cp9bands->Lvalid, sizeof(int) * (cm_M+1));
79   ESL_ALLOC(cp9bands->Rvalid, sizeof(int) * (cm_M+1));
80   ESL_ALLOC(cp9bands->Tvalid, sizeof(int) * (cm_M+1));
81   esl_vec_ISet(cp9bands->Jvalid, cm_M+1, TRUE);
82   esl_vec_ISet(cp9bands->Lvalid, cm_M+1, TRUE);
83   esl_vec_ISet(cp9bands->Rvalid, cm_M+1, TRUE);
84   esl_vec_ISet(cp9bands->Tvalid, cm_M, TRUE);
85   cp9bands->Tvalid[cm_M] = FALSE;
86 
87   ESL_ALLOC(cp9bands->pn_min_m, sizeof(int) * (cp9bands->hmm_M+1));
88   ESL_ALLOC(cp9bands->pn_max_m, sizeof(int) * (cp9bands->hmm_M+1));
89   ESL_ALLOC(cp9bands->pn_min_i, sizeof(int) * (cp9bands->hmm_M+1));
90   ESL_ALLOC(cp9bands->pn_max_i, sizeof(int) * (cp9bands->hmm_M+1));
91   ESL_ALLOC(cp9bands->pn_min_d, sizeof(int) * (cp9bands->hmm_M+1));
92   ESL_ALLOC(cp9bands->pn_max_d, sizeof(int) * (cp9bands->hmm_M+1));
93   ESL_ALLOC(cp9bands->isum_pn_m,sizeof(int) * (cp9bands->hmm_M+1));
94   ESL_ALLOC(cp9bands->isum_pn_i,sizeof(int) * (cp9bands->hmm_M+1));
95   ESL_ALLOC(cp9bands->isum_pn_d,sizeof(int) * (cp9bands->hmm_M+1));
96 
97   ESL_ALLOC(cp9bands->imin,       sizeof(int)   * cp9bands->cm_M);
98   ESL_ALLOC(cp9bands->imax,       sizeof(int)   * cp9bands->cm_M);
99   ESL_ALLOC(cp9bands->jmin,       sizeof(int)   * cp9bands->cm_M);
100   ESL_ALLOC(cp9bands->jmax,       sizeof(int)   * cp9bands->cm_M);
101   ESL_ALLOC(cp9bands->safe_hdmin, sizeof(int)   * cp9bands->cm_M);
102   ESL_ALLOC(cp9bands->safe_hdmax, sizeof(int)   * cp9bands->cm_M);
103   ESL_ALLOC(cp9bands->hdmin,      sizeof(int *) * cp9bands->cm_M);
104   ESL_ALLOC(cp9bands->hdmax,      sizeof(int *) * cp9bands->cm_M);
105   cp9bands->hdmin_mem = NULL;
106   cp9bands->hdmax_mem = NULL;
107   /* NOTE: cp9bands->hdmin and hdmax are 2D arrays, the ptrs are
108    * alloc'ed here, but the actually memory is alloc'ed by
109    * hmmband.c:cp9_Seq2Bands() with a call to hmmband.c:cp9_GrowHDBands().
110    */
111   cp9bands->hd_needed  = 0;
112   cp9bands->hd_alloced = 0;
113 
114   cp9bands->tau        = -1.; /* invalid, reset each time bands are calculated */
115   return cp9bands;
116 
117  ERROR:
118   cm_Fail("Memory allocation error.\n");
119   return NULL; /* never reached */
120 }
121 
122 /* Function: SizeofCP9Bands()
123  * Returns:  Size (Mb) of cp9b.
124  */
125 float
SizeofCP9Bands(CP9Bands_t * cp9b)126 SizeofCP9Bands(CP9Bands_t *cp9b)
127 {
128   float bytes = 0.;
129 
130   bytes += sizeof(CP9Bands_t);
131 
132   /* following from AllocCP9Bands() */
133   bytes += sizeof(int) * (cp9b->cm_M+1); /* Jvalid */
134   bytes += sizeof(int) * (cp9b->cm_M+1); /* Lvalid */
135   bytes += sizeof(int) * (cp9b->cm_M+1); /* Rvalid */
136   bytes += sizeof(int) * (cp9b->cm_M+1); /* Tvalid */
137 
138   bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_min_m */
139   bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_max_m */
140   bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_min_i */
141   bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_max_i */
142   bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_min_d */
143   bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_max_d */
144   bytes += sizeof(int) * (cp9b->hmm_M+1); /* isum_pn_m */
145   bytes += sizeof(int) * (cp9b->hmm_M+1); /* isum_pn_i */
146   bytes += sizeof(int) * (cp9b->hmm_M+1); /* isum_pn_d */
147 
148   bytes += sizeof(int) *  cp9b->cm_M; /* imin */
149   bytes += sizeof(int) *  cp9b->cm_M; /* imax */
150   bytes += sizeof(int) *  cp9b->cm_M; /* jmin */
151   bytes += sizeof(int) *  cp9b->cm_M; /* jmax */
152   bytes += sizeof(int) *  cp9b->cm_M; /* safe_hdmin */
153   bytes += sizeof(int) *  cp9b->cm_M; /* safe_hdmax */
154   bytes += sizeof(int *) *  cp9b->cm_M; /* hdmin */
155   bytes += sizeof(int *) *  cp9b->cm_M; /* hdmax */
156 
157   bytes += sizeof(int) * cp9b->hd_alloced; /* hdmin */
158   bytes += sizeof(int) * cp9b->hd_alloced; /* hdmax */
159 
160   return bytes / 1000000.;
161 }
162 
163 /* Function: FreeCP9Bands()
164  * Returns: (void)
165  */
166 void
FreeCP9Bands(CP9Bands_t * cp9bands)167 FreeCP9Bands(CP9Bands_t *cp9bands)
168 {
169   free(cp9bands->imin);
170   free(cp9bands->imax);
171   free(cp9bands->jmin);
172   free(cp9bands->jmax);
173   free(cp9bands->safe_hdmin);
174   free(cp9bands->safe_hdmax);
175   if(cp9bands->hdmin_mem != NULL)
176     free(cp9bands->hdmin_mem); /* all v were malloc'ed as a block */
177   if(cp9bands->hdmax_mem != NULL)
178     free(cp9bands->hdmax_mem); /* all v were malloc'ed as a block */
179   free(cp9bands->hdmin);
180   free(cp9bands->hdmax);
181 
182   free(cp9bands->pn_min_m);
183   free(cp9bands->pn_max_m);
184   free(cp9bands->pn_min_i);
185   free(cp9bands->pn_max_i);
186   free(cp9bands->pn_min_d);
187   free(cp9bands->pn_max_d);
188   free(cp9bands->isum_pn_m);
189   free(cp9bands->isum_pn_i);
190   free(cp9bands->isum_pn_d);
191 
192   free(cp9bands->Jvalid);
193   free(cp9bands->Lvalid);
194   free(cp9bands->Rvalid);
195   free(cp9bands->Tvalid);
196 
197   free(cp9bands);
198 }
199 
200 /* Function: cp9_Seq2Bands
201  * Date    : EPN, Mon Jan  8 07:23:34 2007
202  *           EPN, Wed Oct 17 04:53:58 2007  [updated/optimized]
203  *
204  * Purpose:  Given a CM with precalc'ed CP9 HMM and CP9Map, a sequence and
205  *           a CP9Bands_t structure, calculate the HMM bands and store them
206  *           in the CP9Bands_t structure.
207  *
208  * Args:     cm           - the covariance model
209  *           errbuf       - char buffer for reporting errors
210  *           fmx          - CP9 dp matrix for Forward()
211  *           bmx          - CP9 dp matrix for Backward()
212  *           pmx          - CP9 dp matrix to fill with posteriors, can == bmx
213  *           dsq          - sequence in digitized form
214  *           i0           - start of target subsequence (often 1, beginning of sq)
215  *           j0           - end of target subsequence (often L, end of sq)
216  *           cp9b         - PRE-ALLOCATED, the HMM bands for this sequence, filled here.
217  *           doing_search - TRUE if we're going to use these HMM bands for search, not alignment
218  *           pass_idx     - pipeline pass index, tells us which truncation modes to allow, if any
219  *           debug_level  - verbosity level for debugging printf()s
220  *
221  * Return:  eslOK on success;
222  *
223  */
224 int
cp9_Seq2Bands(CM_t * cm,char * errbuf,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,ESL_DSQ * dsq,int i0,int j0,CP9Bands_t * cp9b,int doing_search,int pass_idx,int debug_level)225 cp9_Seq2Bands(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, CP9Bands_t *cp9b, int doing_search, int pass_idx, int debug_level)
226 {
227   int   status;
228   int   use_sums;     /* TRUE to fill and use posterior sums during HMM band calc, yields wider bands  */
229   float sc;
230   int do_old_hmm2ij;
231   int do_trunc;       /* are we allowing truncated alignments (either L or R)? */
232   int do_fwd_scan;    /* run Forward  in scanning mode? (see long comment on this below by assignment of do_fwd_scan) */
233   int do_bck_scan;    /* run Backward in scanning mode? (see long comment on this below by assignment of do_fwd_scan) */
234   CP9_t *cp9 = NULL;  /* ptr to cp9 HMM (cm->cp9, cm->Lcp9, cm->Rcp9, or cm->Tcp9) we'll use for deriving bands */
235 
236   /* Contract checks */
237   if(cm->cp9map == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, but cm->cp9map is NULL.\n");
238   if(dsq == NULL)        ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, dsq is NULL.");
239   if(i0 > j0)            ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, i0: %d > j0: %d\n", i0, j0);
240   if(cm->tau > 0.5)      ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, cm->tau (%f) > 0.5, we can't deal.", cm->tau);
241 
242   use_sums      = ((cm->align_opts & CM_ALIGN_SUMS)      || (cm->search_opts & CM_SEARCH_SUMS))      ? TRUE : FALSE;
243   do_old_hmm2ij = ((cm->align_opts & CM_ALIGN_HMM2IJOLD) || (cm->search_opts & CM_SEARCH_HMM2IJOLD)) ? TRUE : FALSE;
244 
245   /* Determine which cp9 HMM to use and whether or not we're doing
246    * truncated alignment, based on value of pass_idx.
247    */
248   switch(pass_idx) {
249   case PLI_PASS_5P_ONLY_FORCE:   do_trunc = TRUE;  cp9 = cm->Rcp9; break;
250   case PLI_PASS_3P_ONLY_FORCE:   do_trunc = TRUE;  cp9 = cm->Lcp9; break;
251   case PLI_PASS_5P_AND_3P_FORCE: do_trunc = TRUE;  cp9 = cm->Tcp9; break;
252   case PLI_PASS_5P_AND_3P_ANY:   do_trunc = TRUE;  cp9 = cm->Tcp9; break;
253   default:                       do_trunc = FALSE; cp9 = cm->cp9;  break;
254   }
255   if(cp9 == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, relevant cp9 is NULL.\n");
256 
257   /* Determine if we should do Forward and Backward in scan mode.
258    * When in scan mode, Forward will allow parses to start at any
259    * position, else they must start at i0 (first res).
260    * When in scan mode, Backward will allow parses to end at any
261    * position, else they must end at j0 (final res).
262    *
263    * We should only scan in Forward if i0 does not need to be in any
264    * eventual CM parsetree we derive using these bands. This is only
265    * true if we'll use these bands for a CM search
266    * (doing_search==TRUE) and that search won't be a special truncated
267    * search where i0 must be in any valid parsetree. We will be doing
268    * a truncated search enforcing i0 inclusion if pass_idx is either
269    * PLI_PASS_5P_AND_3P or PLI_PASS_5P_ONLY.
270    *
271    * Likewise, we should only scan in Backward if j0 does not need to
272    * be in any eventual CM parsetree we derive using these bands.
273    * This is only true if we'll use these bands for a CM search
274    * (doing_search==TRUE) and that search won't be a special truncated
275    * search where j0 must be in any valid parsetree. We will be doing
276    * a truncated search enforcing j0 inclusion if pass_idx is either
277    * PLI_PASS_5P_AND_3P or PLI_PASS_3P_ONLY.
278    */
279   if((! doing_search) || (cm->search_opts & CM_SEARCH_HMMALNBANDS)) {
280     do_fwd_scan = do_bck_scan = FALSE;
281   }
282   else {
283     do_fwd_scan = cm_pli_PassEnforcesFirstRes(pass_idx) ? FALSE : TRUE;
284     do_bck_scan = cm_pli_PassEnforcesFinalRes(pass_idx) ? FALSE : TRUE;
285   }
286 
287   /* Step 1: Get HMM Forward/Backward DP matrices.
288    * Step 2: F/B -> HMM bands.
289    * Step 3: Calculate candidate states for truncated alignments
290    * Step 4: HMM bands -> CM bands.
291    */
292 
293   /* Step 1: Get HMM Forward/Backward DP matrices. */
294   if((status = cp9_Forward(cp9, errbuf, fmx, dsq, i0, j0,
295 			   do_fwd_scan,      /* allow parses to start at any posn? */
296 			   (! doing_search), /* are we going to use bands to align? */
297 			   FALSE,            /* don't be memory efficient */
298 			   NULL, NULL,
299 			   &sc)) != eslOK) return status;
300 
301   if((status = cp9_Backward(cp9, errbuf, bmx, dsq, i0, j0,
302 			    do_bck_scan,       /* allow parses to end at any posn? */
303 			    (! doing_search),  /* are we going to use posteriors to align? */
304 			    FALSE,             /* don't be memory efficient */
305 			    NULL, NULL,
306 			    &sc)) != eslOK) return status;
307 
308   if(cm->align_opts & CM_ALIGN_CHECKFB) {
309     if((status = cp9_CheckFB(fmx, bmx, cp9, errbuf, sc, i0, j0, dsq)) != eslOK) return status;
310     printf("Forward/Backward matrices checked.\n");
311   }
312 
313   /* Step 2: F/B -> HMM bands. */
314   if(use_sums){
315     if((status = cp9_FB2HMMBandsWithSums(cp9, errbuf, dsq, fmx, bmx, pmx, cp9b, i0, j0, cp9b->hmm_M,
316 					 (1.-cm->tau), do_fwd_scan, do_bck_scan, do_old_hmm2ij, debug_level)) != eslOK) return status;
317   }
318   else {
319     if((status = cp9_FB2HMMBands(cp9, errbuf, dsq, fmx, bmx, pmx, cp9b, i0, j0, cp9b->hmm_M,
320 				 (1.-cm->tau), do_fwd_scan, do_bck_scan, do_old_hmm2ij, debug_level)) != eslOK) return status;
321   }
322   if(debug_level > 0) cp9_DebugPrintHMMBands(stdout, j0, cp9b, cm->tau, 1);
323   cp9b->tau = cm->tau;
324 
325   /* Step 3: (only if truncated alignments are possible)
326    * Calculate occupancy and candidate states for marginal alignments
327    */
328   if(do_trunc) {
329     cp9_PredictStartAndEndPositions(pmx, cp9b, i0, j0);
330     if((status = cp9_MarginalCandidatesFromStartEndPositions(cm, cp9b, pass_idx, errbuf)) != eslOK) return status;
331     /* xref: ELN2 notebook, p.146-147; ~nawrockie/notebook/11_0816_inf_banded_trcyk/00LOG */
332   }
333   else {
334     /* reset all Jvalid values to TRUE */
335     esl_vec_ISet(cp9b->Jvalid, cm->M+1, TRUE);
336     /* and all {L,R,T}valid values to FALSE */
337     esl_vec_ISet(cp9b->Lvalid, cm->M+1, FALSE);
338     esl_vec_ISet(cp9b->Rvalid, cm->M+1, FALSE);
339     esl_vec_ISet(cp9b->Tvalid, cm->M+1, FALSE);
340   }
341 
342   /* Step 4: HMM bands -> CM bands. */
343   if(do_old_hmm2ij) {
344     if((status = cp9_HMM2ijBands_OLD(cm, errbuf, cp9b, cm->cp9map, i0, j0, doing_search, debug_level)) != eslOK) return status;
345   }
346   else {
347     if((status = cp9_HMM2ijBands(cm, errbuf, cp9, cp9b, cm->cp9map, i0, j0, doing_search, do_trunc, debug_level)) != eslOK) return status;
348   }
349 
350   /* Use the CM bands on i and j to get bands on d, specific to j. */
351   /* cp9_GrowHDBands() must be called before ij2d_bands() so hdmin, hdmax are adjusted for new seq */
352   if((status = cp9_GrowHDBands(cp9b, errbuf)) != eslOK) return status;
353   ij2d_bands(cm, (j0-i0+1), cp9b->imin, cp9b->imax, cp9b->jmin, cp9b->jmax, cp9b->hdmin, cp9b->hdmax, do_trunc, debug_level);
354 
355 #if eslDEBUGLEVEL >= 1
356   if((status = cp9_ValidateBands(cm, errbuf, cp9b, i0, j0, do_trunc)) != eslOK) return status;
357   ESL_DPRINTF1(("#DEBUG: bands validated.\n"));
358 #endif
359   if(debug_level > 0) debug_print_ij_bands(cm);
360   if(debug_level > 0) PrintDPCellsSaved_jd(cm, cp9b->jmin, cp9b->jmax, cp9b->hdmin, cp9b->hdmax, (j0-i0+1));
361 
362   return eslOK;
363 }
364 
365 /* Function:  cp9_IterateSeq2Bands()
366  * Incept:    EPN, Thu Mar  1 17:56:42 2012
367  *
368  * Purpose:   Increase cm->tau (tighten HMM bands) by multiplying it
369  *            by TAU_MULTIPLIER (2.0) until required HMM banded matrix
370  *            size is below <size_limit> Mb, or cm->tau is greater than
371  *            <maxtau>.
372  *
373  *            If we're doing a truncated alignment (which we can figure
374  *            out based on the value of <pass_idx>) then we also increase
375  *            cp9b->thresh1 and decrease cp9b->thresh2 by a hard-coded
376  *            value into the maximum/minimum is reached for them as
377  *            wel..
378  *
379  *            Since we can't determine the required size of a HB
380  *            matrix unless we have filled a CP9Bands_t object
381  *            (cm->cp9b), we need to recalculate bands each time tau,
382  *            (and possibly thresh1 and thresh2) are modified and then
383  *            check size of resulting matrix given the bands.
384  *
385  *            Upon returning cm->tau, cm->cp9b->tau, cm->cp9b->thresh1
386  *            and cm->cp9b->thresh2 may have been changed.
387  *
388  * Args       cm           - the CM
389  *            errbuf       - for error messages
390  *            dsq          - sequence we're aligning
391  *            i0           - first position in dsq to align (usually 1)
392  *            j0           - final position in dsq to align (usually sq->n)
393  *            pass_idx     - pipeline pass index
394  *            size_limit   - max allowed size of an HB mx, in Mb
395  *            doing_search - TRUE if we're going to use these HMM bands for search, not alignment
396  *            do_sample    - TRUE if bands will eventually be used for sampling a parsetree
397  *            do_post      - TRUE if bands will eventually be used for posterior alignment
398  *            do_iterate   - TRUE to attempt to iteratively tighten bands until matrix is small enough
399  *            maxtau       - max value allowed for cm->tau
400  *            xtau         - we multiply tau by this at each iteration (must be > 1.1)
401  *            ret_Mb       - RETURN: required Mb for HB mx for cm->tau upon exit.
402  *
403  * Returns:   <eslOK> on success.
404  *            <eslERANGE> if required matrix size is > <size_limit>,
405  *            for cm->tau = maxtau.
406  *            A different error code upon an error, errbuf is filled.
407  */
408 int
cp9_IterateSeq2Bands(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int64_t i0,int64_t j0,int pass_idx,float size_limit,int doing_search,int do_sample,int do_post,int do_iterate,double maxtau,float * ret_Mb)409 cp9_IterateSeq2Bands(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int64_t i0, int64_t j0, int pass_idx, float size_limit, int doing_search, int do_sample, int do_post, int do_iterate, double maxtau, float *ret_Mb)
410 {
411   int   status;
412   int   do_trunc = cm_pli_PassAllowsTruncation(pass_idx);
413   float hbmx_Mb;  /* approximate size in Mb required for HMM banded matrix */
414   int   tau_at_limit     = FALSE;
415   int   thresh1_at_limit = (do_trunc) ? FALSE : TRUE;
416   int   thresh2_at_limit = (do_trunc) ? FALSE : TRUE;
417 
418   while(1) {
419     if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, dsq, i0, j0, cm->cp9b, doing_search, pass_idx, 0)) != eslOK) goto ERROR;
420     if(doing_search) {
421       if(do_trunc) { if((status = cm_tr_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, j0-i0+1, NULL, NULL, NULL, NULL, &hbmx_Mb)) != eslOK) goto ERROR; }
422       else         { if((status = cm_hb_mx_SizeNeeded   (cm, errbuf, cm->cp9b, j0-i0+1, NULL, &hbmx_Mb)) != eslOK) goto ERROR; }
423     }
424     else {
425       if(do_trunc) { status = cm_TrAlignSizeNeededHB(cm, errbuf, j0-i0+1, size_limit, do_sample, do_post, NULL, NULL, NULL, &hbmx_Mb); }
426       else         { status = cm_AlignSizeNeededHB  (cm, errbuf, j0-i0+1, size_limit, do_sample, do_post, NULL, NULL, NULL, &hbmx_Mb); }
427       if(status != eslOK && status != eslERANGE) return status;
428     }
429     /*printf("cm->tau: %10.2g thresh1: %4.2f thresh2: %4.2f mxsize: %.2f\n", cm->tau, cm->cp9b->thresh1, cm->cp9b->thresh2, hbmx_Mb);*/
430     /* check if we can stop iterating, three ways we can
431      * case 1: matrix is now smaller than our limit.
432      * case 2: do_iterate == FALSE
433      * case 3: do_trunc == FALSE && tau has reached its limit
434      * case 4: do_trunc == TRUE  && tau, thresh1 and thresh have all reached their limits
435      */
436     if(hbmx_Mb <  size_limit) {
437       break; /* our matrix will be small enough, break out of while(1) */
438     }
439     if(! do_iterate) {
440       break; /* do_iterate is FALSE */
441     }
442     if(tau_at_limit && thresh1_at_limit && thresh2_at_limit) { /* if do_trunc is FALSE, thresh{1,2}_at_limit were init'ed as TRUE */
443       break; /* tau, thresh1 and thresh2 have all reached their limits, break out of while (1) */
444     }
445     if(! tau_at_limit) {
446       cm->tau *= TAU_MULTIPLIER;
447       if(cm->tau >= maxtau) { cm->tau = maxtau; tau_at_limit = TRUE; }
448     }
449     if(! thresh1_at_limit) {
450       cm->cp9b->thresh1 += DELTA_CP9BANDS_THRESH1;
451       if(cm->cp9b->thresh1 >= MAX_CP9BANDS_THRESH1) { cm->cp9b->thresh1 = MAX_CP9BANDS_THRESH1; thresh1_at_limit = TRUE; }
452     }
453     if(! thresh2_at_limit) {
454       cm->cp9b->thresh2 -= DELTA_CP9BANDS_THRESH2;
455       if(cm->cp9b->thresh2 <= MIN_CP9BANDS_THRESH2) { cm->cp9b->thresh2 = MIN_CP9BANDS_THRESH2; thresh2_at_limit = TRUE; }
456     }
457   }
458 
459   if(ret_Mb != NULL) *ret_Mb = hbmx_Mb;
460 
461   if(hbmx_Mb > size_limit) return eslERANGE;
462 
463   return eslOK;
464 
465  ERROR:
466   if(ret_Mb != NULL) *ret_Mb = 0.;
467   return status;
468 }
469 
470 /* Function: cp9_Seq2Posteriors
471  * Date    : EPN, Mon Jan  8 07:27:21 2007
472  *
473  * Purpose:  Given a CM with precalc'ed CP9 HMM and CP9Map, and a sequence,
474  *           run HMM Forward and Backward algorithms, and return a CP9 posterior
475  *           matrix.
476  *
477  *           Note: this function was never updated to handle
478  *           truncated alignment (b/c it's no longer hooked up
479  *           to any of the Infernal applications).
480  *
481  * Args:     cm           - the covariance model
482  *           errbuf       - char buffer for error messages
483  *           fmx          - CP9 dp matrix for Forward()
484  *           bmx          - CP9 dp matrix for Backward()
485  *           pmx          - CP9 dp matrix to fill with posteriors, can == bmx
486  *           dsq          - sequence in digitized form
487  *           i0           - start of target subsequence (often 1, beginning of dsq)
488  *           j0           - end of target subsequence (often L, end of dsq)
489  *           debug_level  - verbosity level for debugging printf()s
490  *
491  * Return:  eslOK on success
492  */
493 int
cp9_Seq2Posteriors(CM_t * cm,char * errbuf,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,ESL_DSQ * dsq,int i0,int j0,int debug_level)494 cp9_Seq2Posteriors(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, int debug_level)
495 {
496   int status;
497   float sc;
498   CP9_t *cp9 = NULL;  /* ptr to cp9 HMM (this could be Lcp9, Rcp9, Tcp9 if we update this function to possibly handle truncated alignment) */
499 
500   /* Contract checks */
501   if(dsq == NULL)        ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors(), dsq is NULL.");
502   if(cm->cp9    == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors, but cm->cp9 is NULL.\n");
503   if(cm->cp9map == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors, but cm->cp9map is NULL.\n");
504   if((cm->search_opts & CM_SEARCH_HMMALNBANDS) && (! (cm->search_opts & CM_SEARCH_HBANDED)))
505     ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors, CM_SEARCH_HMMALNBANDS flag raised, but not CM_SEARCH_HBANDED flag, this doesn't make sense\n");
506 
507   cp9 = cm->cp9;
508   if(cp9 == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Posteriors, relevant cp9 is NULL.\n");
509 
510   /* Step 1: Get HMM posteriors.*/
511   if((status = cp9_Forward(cp9, errbuf, fmx, dsq, i0, j0,
512 			   FALSE,     /* don't use scanning Forward/Backward */
513 			   TRUE,      /* we are going to use posteriors to align */
514 			   FALSE,     /* don't be memory efficient */
515 			   NULL, NULL,
516 			   &sc)) != eslOK) return status;
517   if(debug_level > 0) printf("CP9 Forward  score : %.4f\n", sc);
518   if((status = cp9_Backward(cp9, errbuf, bmx, dsq, i0, j0,
519 			    FALSE, /* don't use scanning Forward/Backward */
520 			    TRUE,  /* we are going to use posteriors to align */
521 			    FALSE, /* don't be memory efficient */
522 			    NULL, NULL,
523 			    &sc)) != eslOK) return status;
524   if(debug_level > 0) printf("CP9 Backward  score : %.4f\n", sc);
525 
526   if(cm->align_opts & CM_ALIGN_CHECKFB) {
527     if((status = cp9_CheckFB(fmx, bmx, cp9, errbuf, sc, i0, j0, dsq)) != eslOK) return status;
528     printf("Forward/Backward matrices checked.\n");
529   }
530 
531   /* Get posteriors */
532   cp9_Posterior(dsq, i0, j0, cp9, fmx, bmx, pmx, FALSE);
533 
534   return eslOK;
535 }
536 
537 
538 /* Function: cp9_FB2HMMBands()
539  * Date:     EPN, 04.03.06
540  *           EPN, Mon Oct 15 18:20:42 2007 [updated/optimized]
541  *
542  * Purpose: Determine the band on all HMM states given a Forward and
543  *          Backward matrix. Do this by calculating and summing log posterior
544  *          probabilities that each state emitted/was visited at each posn,
545  *          starting at the sequence ends, and creeping in, until the half the
546  *          maximum allowable probability excluded is reached on each side.
547  *
548  * Args:
549  *
550  * CP9_t hmm        the HMM
551  * errbuf           char buffer for error messages
552  * CP9_MX fmx:      forward DP matrix, already calc'ed
553  * CP9_MX bmx:      backward DP matrix, already calc'ed
554  * CP9_MX pmx:      DP matrix for posteriors, filled here, can == bmx
555  * dsq              the digitized sequence
556  * CP9Bands_t cp9b  CP9 bands data structure
557  * int i0           start of target subsequence (often 1, beginning of dsq)
558  * int j0           end of target subsequence (often L, end of dsq)
559  * int   M          number of nodes in HMM (num columns of pmx matrix)
560  * double p_thresh  the probability mass we're requiring is within each band
561  * int did_fwd_scan  TRUE if Forward was run in 'scan mode' (parses could start anywhere)
562  * int did_bck_scan  TRUE if Backward was run in 'scan mode' (parses could end anywhere)
563  * int do_old_hmm2ij TRUE if we'll use old cp9_HMM2ijBands_OLD() function downstream
564  * int debug_level  [0..3] tells the function what level of debugging print
565  *                  statements to print.
566  *
567  * Returns: eslOK on success;
568  */
569 int
cp9_FB2HMMBands(CP9_t * hmm,char * errbuf,ESL_DSQ * dsq,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,CP9Bands_t * cp9b,int i0,int j0,int M,double p_thresh,int did_fwd_scan,int did_bck_scan,int do_old_hmm2ij,int debug_level)570 cp9_FB2HMMBands(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
571 		int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level)
572 {
573   int status;
574   int k;                                  /* counter over nodes of the model */
575   int L = j0-i0+1;                        /* length of sequence */
576   int thresh = Prob2Score(((1. - p_thresh)/2.), 1.); /* allowable prob mass excluded on each side */
577   int max;                                /* temporary max value */
578   int pnmax;                              /* position that gives max */
579 
580   /* *_m = match, *_i = insert, *_d = delete */
581   int *kthresh_m, *kthresh_i, *kthresh_d; /* [0..k..hmm->M], individual thresholds for each state */
582   int *nset_m, *nset_i, *nset_d;          /* [0..k..hmm->M], has minimum been set for this state? */
583   int *xset_m, *xset_i, *xset_d;          /* [0..k..hmm->M], has maximum been set for this state? */
584   int *mass_m, *mass_i, *mass_d;          /* [0..k..hmm->M], summed log prob of pmx->mx[i][k] from 0..k or k..L */
585   int i, ip;                              /* actual position and relative position in sequence, ip = i-i0+1 */
586   int sc;                                 /* summed score of all parses (derived from backward matrix)
587 					   * if(cm->search_opts & CM_SEARCH_HMMALNBANDS) Forward and Backward
588 					   * were run in 'scan mode' where each residue can be begin/end of a parse,
589 					   * so we have to sum up parses that end at each posn,
590 					   * if ! (cm->search_opts & CM_SEARCH_HMMALNBANDS) we know we have
591 					   * to start at residue i0 and end at residue j0, so sc is simply bmx->mmx[0][0]
592 					   */
593   int hmm_is_localized;                   /* TRUE if HMM has local begins, ends or ELs on */
594   hmm_is_localized = ((hmm->flags & CPLAN9_LOCAL_BEGIN) || (hmm->flags & CPLAN9_LOCAL_END) || (hmm->flags & CPLAN9_EL)) ? TRUE : FALSE;
595 
596   if(bmx != pmx) GrowCP9Matrix(pmx, errbuf, L, M, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
597 
598   /* allocations and initializations */
599   ESL_ALLOC(nset_m, sizeof(int) * (M+1));
600   ESL_ALLOC(nset_i, sizeof(int) * (M+1));
601   ESL_ALLOC(nset_d, sizeof(int) * (M+1));
602   ESL_ALLOC(xset_m, sizeof(int) * (M+1));
603   ESL_ALLOC(xset_i, sizeof(int) * (M+1));
604   ESL_ALLOC(xset_d, sizeof(int) * (M+1));
605   ESL_ALLOC(mass_m, sizeof(int) * (M+1));
606   ESL_ALLOC(mass_i, sizeof(int) * (M+1));
607   ESL_ALLOC(mass_d, sizeof(int) * (M+1));
608   ESL_ALLOC(kthresh_m, sizeof(int) * (M+1));
609   ESL_ALLOC(kthresh_i, sizeof(int) * (M+1));
610   ESL_ALLOC(kthresh_d, sizeof(int) * (M+1));
611 
612   esl_vec_ISet(mass_m, M+1, -INFTY);
613   esl_vec_ISet(mass_i, M+1, -INFTY);
614   esl_vec_ISet(mass_d, M+1, -INFTY);
615   esl_vec_ISet(nset_m, M+1, FALSE);
616   esl_vec_ISet(nset_i, M+1, FALSE);
617   esl_vec_ISet(nset_d, M+1, FALSE);
618   esl_vec_ISet(xset_m, M+1, FALSE);
619   esl_vec_ISet(xset_i, M+1, FALSE);
620   esl_vec_ISet(xset_d, M+1, FALSE);
621 
622   if(did_fwd_scan) { /* parses were allowed to begin anywhere */
623     sc = -INFTY;
624     for (ip = 0; ip <= L; ip++) {
625       /*printf("bmx->mmx[i:%d][0]: %d\n", ip+i0-1, bmx->mmx[ip][0]); */
626       sc = ILogsum(sc, (bmx->mmx[ip][0]));
627     }
628   }
629   else sc = bmx->mmx[0][0]; /* Forward/Backward run in 'align mode' parses must start at i0, end at j0 */
630   /* sc is summed log prob of all possible parses of seq i0..j0 */
631 
632   /* note boundary conditions, ip = 0, i = i0-1 */
633   pmx->mmx[0][0] = fmx->mmx[0][0] + bmx->mmx[0][0] - sc; /* fmx->mmx[0][0] is 0, bmx->mmx[0][0] is overall score */
634   pmx->imx[0][0] = -INFTY; /*need seq to get here*/
635   pmx->dmx[0][0] = -INFTY; /*D_0 does not exist*/
636   if((mass_m[0] = pmx->mmx[0][0]) > thresh) {
637     cp9b->pn_min_m[0] = ESL_MAX(i0-1, 0);
638     nset_m[0] = TRUE;
639   }
640   mass_i[0] = -INFTY; /* b/c pmx->imx[0][0] is -INFTY, set above */
641   mass_d[0] = -INFTY; /* b/c pmx->dmx[0][0] is -INFTY, set above */
642 
643   for (k = 1; k <= M; k++) {
644     pmx->mmx[0][k] = -INFTY; /*need seq to get here*/
645     pmx->imx[0][k] = -INFTY; /*need seq to get here*/
646     pmx->dmx[0][k] = fmx->dmx[0][k] + bmx->dmx[0][k] - sc;
647     /* mass_m[k] doesn't change b/c pmx->mmx[0][k] is -INFTY */
648     /* mass_i[k] doesn't change b/c pmx->imx[0][k] is -INFTY */
649     if((mass_d[k] = pmx->dmx[0][k]) > thresh) {
650       cp9b->pn_min_d[k] = ESL_MAX(i0-1, 0);
651       nset_d[k] = TRUE;
652     }
653   }
654 
655   /* Find minimum position in band for each state (M,I,D) of each node (0..M) */
656   for (ip = 1; ip <= L; ip++) /* ip is the relative position in the seq */
657     {
658       i = i0+ip-1;		/* e.g. i is actual index in dsq, runs from i0 to j0 */
659       k = 0;
660       /* new block EPN, Wed Feb 13 11:58:52 2008 */
661       pmx->mmx[ip][0] = ESL_MAX(fmx->mmx[ip][0] + bmx->mmx[ip][0] - sc, -INFTY); /* M_0 doesn't emit */
662       if(! nset_m[0]) {
663 	if((mass_m[0] = ILogsum(mass_m[0], pmx->mmx[ip][0])) > thresh) {
664 	  cp9b->pn_min_m[0] = i;
665 	  nset_m[0] = TRUE;
666 	}
667       }
668       /* end of new block, old line used to be: pmx->mmx[ip][0] = -INFTY; */
669 
670       pmx->imx[ip][0] = ESL_MAX(fmx->imx[ip][0] + bmx->imx[ip][0] - hmm->isc[dsq[i]][0] - sc, -INFTY);
671       /*hmm->isc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
672       if(! nset_i[0]) {
673 	if((mass_i[0] = ILogsum(mass_i[0], pmx->imx[ip][0])) > thresh) {
674 	  cp9b->pn_min_i[0] = i;
675 	  nset_i[0] = TRUE;
676 	}
677       }
678       pmx->dmx[ip][0] = -INFTY; /* D_0 doesn't exist */
679 
680       for(k = 1; k <= M; k++)
681 	{
682 	  pmx->mmx[ip][k] = ESL_MAX(fmx->mmx[ip][k] + bmx->mmx[ip][k] - hmm->msc[dsq[i]][k] - sc, -INFTY);
683 	  /*hmm->msc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
684 	  pmx->imx[ip][k] = ESL_MAX(fmx->imx[ip][k] + bmx->imx[ip][k] - hmm->isc[dsq[i]][k] - sc, -INFTY);
685 	  /*hmm->isc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
686 	  pmx->dmx[ip][k] = ESL_MAX(fmx->dmx[ip][k] + bmx->dmx[ip][k] - sc, -INFTY);
687 
688 	  if(! nset_m[k]) {
689 	    if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > thresh) {
690 	      cp9b->pn_min_m[k] = i;
691 	      nset_m[k] = TRUE;
692 	    }
693 	  }
694 	  if(! nset_i[k]) {
695 	    if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > thresh) {
696 	      cp9b->pn_min_i[k] = i;
697 	      nset_i[k] = TRUE;
698 	    }
699 	  }
700 	  if(! nset_d[k]) {
701 	    if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > thresh) {
702 	      cp9b->pn_min_d[k] = i;
703 	      nset_d[k] = TRUE;
704 	    }
705 	  }
706 	}
707     }
708   esl_vec_ISet(mass_m, M+1, -INFTY);
709   esl_vec_ISet(mass_i, M+1, -INFTY);
710   esl_vec_ISet(mass_d, M+1, -INFTY);
711   /* Find maximum position in band for each state (M,I,D) of each node (0..M)
712    * by moving from L down to 1 */
713   for (ip = L; ip >= 1; ip--) /* ip is the relative position in the seq */
714     {
715       i = i0+ip-1;		/* e.g. i is actual index in dsq, runs from i0 to j0 */
716       for(k = 0; k <= M; k++)
717 	{
718 	  if(! xset_m[k]) {
719 	    if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > thresh) {
720 	      cp9b->pn_max_m[k] = i;
721 	      xset_m[k] = TRUE;
722 	    }
723 	  }
724 	  if(! xset_i[k]) {
725 	    if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > thresh) {
726 	      cp9b->pn_max_i[k] = i;
727 	      xset_i[k] = TRUE;
728 	    }
729 	  }
730 	  if(! xset_d[k]) {
731 	    if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > thresh) {
732 	      cp9b->pn_max_d[k] = i;
733 	      xset_d[k] = TRUE;
734 	    }
735 	  }
736 	}
737     }
738   ip = 0;
739   i  = i0-1;
740   /* note boundary conditions, ip = 0, i = i0-1 */
741   if(! xset_m[0]) {
742     if((mass_m[0] = ILogsum(mass_m[0], pmx->mmx[0][0])) > thresh) {
743       cp9b->pn_max_m[0] = ESL_MAX(i0-1, 0);
744       xset_m[0] = TRUE;
745     }
746   }
747   /* mass_i[0] is unchanged because b/c pmx->imx[0][0] is -INFTY, set above */
748   /* mass_d[0] is unchanged because b/c pmx->dmx[0][0] is -INFTY, set above */
749   for (k = 1; k <= M; k++) {
750     /* mass_m[k] doesn't change b/c pmx->mmx[0][k] is -INFTY */
751     /* mass_i[k] doesn't change b/c pmx->mmx[0][k] is -INFTY */
752     if(!xset_d[k]) {
753       if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[0][k])) > thresh) {
754 	cp9b->pn_max_d[k] = ESL_MAX(i0-1, 0);
755 	xset_d[k] = TRUE;
756       }
757     }
758   }
759 
760   if(! do_old_hmm2ij) {
761     /* new way as of EPN, Sun Jan 27 08:48:34 2008 */
762     /* Some states may not have had their min/max set. This occurs if the entire
763      * state is outside the band (i.e. the summed probablity the state is entered for ANY i
764      * is less than our threshold. Current strategy in this situation is to set the
765      * pn_min_* and pn_max_* values as special flags, (-2) so the function that
766      * uses them to derive i and j bands knows this is the case and handles it
767      * accordingly.
768      */
769     int mset;
770     int dset;
771     for(k = 0; k <= M; k++)
772       {
773 	mset = dset = TRUE;
774 	/* theoretically either nset_*[k] and xset_*[k] should be either both TRUE or both
775 	 * FALSE, but I'm slightly worried about rare precision issues, so we check if one
776 	 * or the other is unset, and if so, we set both to argmax position */
777 	if(((! nset_m[k])) || (! xset_m[k]) || (cp9b->pn_max_m[k] < cp9b->pn_min_m[k])) {
778 	  cp9b->pn_min_m[k] = cp9b->pn_max_m[k] = -1;
779 	  mset = FALSE;
780 	}
781 	if(((! nset_i[k])) || (! xset_i[k]) || (cp9b->pn_max_i[k] < cp9b->pn_min_i[k])) {
782 	  cp9b->pn_min_i[k] = cp9b->pn_max_i[k] = -1;
783 	}
784 	if(((! nset_d[k])) || (! xset_d[k]) || (cp9b->pn_max_d[k] < cp9b->pn_min_d[k])) {
785 	  cp9b->pn_min_d[k] = cp9b->pn_max_d[k] = -1;
786 	  dset = FALSE;
787 	}
788 	if((!hmm_is_localized && !did_fwd_scan && !did_bck_scan) && (mset == FALSE && dset == FALSE)) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "node: %d match nor delete HMM state bands were set in non-localized, non-scanning HMM, lower tau (should be << 0.5).\n", k);
789       }
790   }
791   else {
792     /* old way, prior to Sun Jan 27 08:46:16 2008 */
793     /* Some states may not have had their min/max set. This occurs if the entire
794      * state is outside the band (i.e. the summed probablity the state is entered for ANY i
795      * is less than our threshold. Current strategy in this situation is to set the
796      * band to width 1 of the most likely position for that state, but to do that we
797      * need to find what the most likely posn is, we could do this in the loop above,
798      * but this is a rare situation, and so that turns out to be wasteful.
799      *
800      * Note: the off-by-one issue mentioned below is dealt with differently with the
801      * new code, when we're setting i and j CM bands using the HMM bands.
802      */
803     for(k = 0; k <= M; k++)
804       {
805 	/* comment *: off-by-one issue with non-emitters (includes all D states and M_0):
806 	 * pn_min_d[k] = i, means posn i was last residue emitted
807 	 * prior to entering node k's delete state. However, for a CM,
808 	 * if a delete states sub-parsetree is bounded by i' and j', then
809 	 * positions i' and j' HAVE YET TO BE EMITTED.
810 	 * For M_0, so we don't have to check each node to see if k == 0, we
811 	 * do the off-by-one correction at the end of the function.
812 	 */
813 	if(k != 0) {
814 	  if(cp9b->pn_min_d[k] != -1) cp9b->pn_min_d[k]++;
815 	  if(cp9b->pn_min_d[k] != -1) cp9b->pn_max_d[k]++;
816 	}
817 	/* theoretically either nset_*[k] and xset_*[k] should be either both TRUE or both
818 	 * FALSE, but I'm slightly worried about rare precision issues, so we check if one
819 	 * or the other is unset, and if so, we set both to argmax position */
820 	if((! nset_m[k]) || (! xset_m[k])) {
821 	  max = pmx->mmx[0][k];
822 	  for(ip = 1; ip <= L; ip++)
823 	    if(pmx->mmx[ip][k] > max) { pnmax = i0+ip-1; max = pmx->mmx[ip][k]; } /* i = i0+ip-1 */
824 	  cp9b->pn_min_m[k] = cp9b->pn_max_m[k] = pnmax;
825 	}
826 	if((! nset_i[k]) || (! xset_i[k])) {
827 	  max = pmx->imx[0][k];
828 	  for(ip = 1; ip <= L; ip++)
829 	    if(pmx->imx[ip][k] > max) { pnmax = i0+ip-1; max = pmx->imx[ip][k]; } /* i = i0+ip-1 */
830 	  cp9b->pn_min_i[k] = cp9b->pn_max_i[k] = pnmax;
831 	}
832 	if((! nset_d[k]) || (! xset_d[k])) {
833 	  max = pmx->dmx[0][k];
834 	  for(ip = 1; ip <= L; ip++)
835 	    if(pmx->dmx[ip][k] > max) { pnmax = i0+ip-1; max = pmx->dmx[ip][k]; } /* i = i0+ip-1 */
836 	  cp9b->pn_min_d[k] = cp9b->pn_max_d[k] = pnmax;
837 	}
838       }
839     cp9b->pn_min_m[0]++; /* non emitter */
840     cp9b->pn_max_m[0]++; /* non emitter */
841   }
842 
843   cp9b->pn_min_d[0] = -1; /* D_0 doesn't exist */
844   cp9b->pn_max_d[0] = -1; /* D_0 doesn't exist */
845 
846   if(debug_level > 0) cp9_DebugPrintHMMBands(stdout, j0, cp9b, (1.-p_thresh), 1);
847 
848   free(mass_m);
849   free(mass_i);
850   free(mass_d);
851   free(nset_m);
852   free(nset_i);
853   free(nset_d);
854   free(xset_m);
855   free(xset_i);
856   free(xset_d);
857   free(kthresh_m);
858   free(kthresh_i);
859   free(kthresh_d);
860 
861   return eslOK;
862 
863  ERROR:
864   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
865 }
866 
867 
868 /* Function: cp9_FB2HMMBandsWithSums()
869  * Date:     EPN, Wed Oct 17 10:22:44 2007
870  *
871  * Purpose: Determine the band on all HMM states given a Forward and
872  *          Backward matrix. Do this by calculating and summing log posterior
873  *          probabilities that each state emitted/was visited at each posn,
874  *          starting at the sequence ends, and creeping in, until the half the
875  *          maximum allowable probability excluded is reached on each side.
876  *
877  * CP9_t hmm        the HMM
878  * errbuf           char buffer for error messages
879  * CP9_MX fmx:      forward DP matrix, already calc'ed
880  * CP9_MX bmx:      backward DP matrix, already calc'ed
881  * CP9_MX pmx:      DP matrix for posteriors, filled here, can == bmx
882  * dsq              the digitized sequence
883  * CP9Bands_t cp9b  CP9 bands data structure
884  * int i0           start of target subsequence (often 1, beginning of dsq)
885  * int j0           end of target subsequence (often L, end of dsq)
886  * int   M          number of nodes in HMM (num columns of post matrix)
887  * double p_thresh  the probability mass we're requiring is within each band
888  * int did_fwd_scan TRUE if Forward was run in 'scan mode'  (parses could start at any posn)
889  * int did_bck_scan TRUE if Backward was run in 'scan mode' (parses could end  at any posn)
890  * int do_old_hmm2ij TRUE if we'll use old cp9_HMM2ijBands_OLD() function downstream
891  * int debug_level  [0..3] tells the function what level of debugging print
892  *                  statements to print.
893  *
894  * Returns: eslOK on success;
895  */
896 int
cp9_FB2HMMBandsWithSums(CP9_t * hmm,char * errbuf,ESL_DSQ * dsq,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,CP9Bands_t * cp9b,int i0,int j0,int M,double p_thresh,int did_fwd_scan,int did_bck_scan,int do_old_hmm2ij,int debug_level)897 cp9_FB2HMMBandsWithSums(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
898 			int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level)
899 {
900   int status;
901   int k;                                  /* counter over nodes of the model */
902   int L = j0-i0+1;                        /* length of sequence */
903   int thresh = Prob2Score(((1. - p_thresh)/2.), 1.); /* allowable prob mass excluded on each side */
904 
905   /* *_m = match, *_i = insert, *_d = delete */
906   int i, ip;                              /* actual position and relative position in sequence, ip = i-i0+1 */
907   int *kthresh_m, *kthresh_i, *kthresh_d; /* [0..k..hmm->M], individual thresholds for each state */
908   int *nset_m, *nset_i, *nset_d;          /* [0..k..hmm->M], has minimum been set for this state? */
909   int *xset_m, *xset_i, *xset_d;          /* [0..k..hmm->M], has maximum been set for this state? */
910   int *mass_m, *mass_i, *mass_d;          /* [0..k..hmm->M], summed log prob of pmx->mx[i][k] from 0..k or k..L */
911 
912   if(bmx != pmx) GrowCP9Matrix(pmx, errbuf, L, M, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
913 
914   /* allocations and initializations */
915   ESL_ALLOC(nset_m, sizeof(int) * (M+1));
916   ESL_ALLOC(nset_i, sizeof(int) * (M+1));
917   ESL_ALLOC(nset_d, sizeof(int) * (M+1));
918   ESL_ALLOC(xset_m, sizeof(int) * (M+1));
919   ESL_ALLOC(xset_i, sizeof(int) * (M+1));
920   ESL_ALLOC(xset_d, sizeof(int) * (M+1));
921   ESL_ALLOC(mass_m, sizeof(int) * (M+1));
922   ESL_ALLOC(mass_i, sizeof(int) * (M+1));
923   ESL_ALLOC(mass_d, sizeof(int) * (M+1));
924   ESL_ALLOC(kthresh_m, sizeof(int) * (M+1));
925   ESL_ALLOC(kthresh_i, sizeof(int) * (M+1));
926   ESL_ALLOC(kthresh_d, sizeof(int) * (M+1));
927 
928   esl_vec_ISet(mass_m, M+1, -INFTY);
929   esl_vec_ISet(mass_i, M+1, -INFTY);
930   esl_vec_ISet(mass_d, M+1, -INFTY);
931   esl_vec_ISet(nset_m, M+1, FALSE);
932   esl_vec_ISet(nset_i, M+1, FALSE);
933   esl_vec_ISet(nset_d, M+1, FALSE);
934   esl_vec_ISet(xset_m, M+1, FALSE);
935   esl_vec_ISet(xset_i, M+1, FALSE);
936   esl_vec_ISet(xset_d, M+1, FALSE);
937 
938   /* get the posterior matrix first, we need it b/c each state will have a different log prob threshold */
939   cp9_Posterior(dsq, i0, j0, hmm, fmx, bmx, pmx, did_fwd_scan);
940 
941   /* fill ipost_sums in cp9bands data structure */
942   cp9_IFillPostSums(pmx, cp9b, i0, j0);
943 
944   /* set state dependent cutoff thresholds for log prob mass we need on each side (this is unique to
945    * WithSums() function */
946   for(k = 0; k <= M; k++) {
947     kthresh_m[k] = thresh + cp9b->isum_pn_m[k];
948     kthresh_i[k] = thresh + cp9b->isum_pn_i[k];
949     kthresh_d[k] = thresh + cp9b->isum_pn_d[k];
950   }
951 
952   /* Find minimum position in band for each state (M,I,D) of each node (0..M) */
953   for (ip = 0; ip <= L; ip++) /* ip is the relative position in the seq */
954     {
955       i = i0+ip-1;		/* e.g. i is actual index in dsq, runs from i0 to j0 */
956       for(k = 0; k <= M; k++)
957 	{
958 	  if(! nset_m[k]) {
959 	    if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > kthresh_m[k]) {
960 	      cp9b->pn_min_m[k] = i;
961 	      nset_m[k] = TRUE;
962 	    }
963 	  }
964 	  if(! nset_i[k]) {
965 	    if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > kthresh_i[k]) {
966 	      cp9b->pn_min_i[k] = i;
967 	      nset_i[k] = TRUE;
968 	    }
969 	  }
970 	  if(! nset_d[k]) {
971 	    if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > kthresh_d[k]) {
972 	      cp9b->pn_min_d[k] = i;
973 	      nset_d[k] = TRUE;
974 	    }
975 	  }
976 	}
977     }
978   /* Find maximum position in band for each state (M,I,D) of each node (0..M)
979    * by moving from L down to 0 */
980   /* reset mass_* arrays */
981   esl_vec_ISet(mass_m, M+1, -INFTY);
982   esl_vec_ISet(mass_i, M+1, -INFTY);
983   esl_vec_ISet(mass_d, M+1, -INFTY);
984   for (ip = L; ip >= 0; ip--) /* ip is the relative position in the seq */
985     {
986       i = i0+ip-1;		/* e.g. i is actual index in dsq, runs from i0 to j0 */
987       for(k = 0; k <= M; k++)
988 	{
989 	  if(! xset_m[k]) {
990 	    if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > kthresh_m[k]) {
991 	      cp9b->pn_max_m[k] = i;
992 	      xset_m[k] = TRUE;
993 	    }
994 	  }
995 	  if(! xset_i[k]) {
996 	    if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > kthresh_i[k]) {
997 	      cp9b->pn_max_i[k] = i;
998 	      xset_i[k] = TRUE;
999 	    }
1000 	  }
1001 	  if(! xset_d[k]) {
1002 	    if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > kthresh_d[k]) {
1003 	      cp9b->pn_max_d[k] = i;
1004 	      xset_d[k] = TRUE;
1005 	    }
1006 	  }
1007 	}
1008     }
1009 
1010   if(do_old_hmm2ij) { /* we have to correct for an off-by-one to be consistent with the 'old' way code */
1011     for(k = 1; k <= M; k++)
1012       {
1013 	/* comment *: off-by-one issue with non-emitters (includes all D states and M_0):
1014 	 * pn_min_d[k] = i, means posn i was last residue emitted
1015 	 * prior to entering node k's delete state. However, for a CM,
1016 	 * if a delete states sub-parsetree is bounded by i' and j', then
1017 	 * positions i' and j' HAVE YET TO BE EMITTED.
1018 	 * For M_0, so we don't have to check each node to see if k == 0, we
1019 	 * do the off-by-one correction at the end of the function.
1020 	 */
1021 	  if(cp9b->pn_min_d[k] != -1) cp9b->pn_min_d[k]++;
1022 	  if(cp9b->pn_min_d[k] != -1) cp9b->pn_max_d[k]++;
1023       }
1024     cp9b->pn_min_m[0]++; /* non-emitter */
1025     cp9b->pn_max_m[0]++; /* non-emitter */
1026   }
1027 
1028 #if eslDEBUGLEVEL >= 1
1029   /* all states should have their min/max set because we've normalized the probability
1030    * of entering each state to 1.0, so we assert this to be true */
1031   ESL_DASSERT1((nset_m[0]));
1032   ESL_DASSERT1((nset_i[0]));
1033   ESL_DASSERT1((xset_m[0]));
1034   ESL_DASSERT1((xset_i[0]));
1035   /* D_0 state does not exist */
1036   for(k = 1; k <= M; k++)
1037     {
1038       ESL_DASSERT1((nset_m[k]));
1039       ESL_DASSERT1((nset_i[k]));
1040       ESL_DASSERT1((nset_d[k]));
1041       ESL_DASSERT1((xset_m[k]));
1042       ESL_DASSERT1((xset_i[k]));
1043       ESL_DASSERT1((xset_d[k]));
1044     }
1045 #endif
1046 
1047   cp9b->pn_min_d[0] = -1; /* D_0 doesn't exist */
1048   cp9b->pn_max_d[0] = -1; /* D_0 doesn't exist */
1049 
1050   if(debug_level > 0) cp9_DebugPrintHMMBands(stdout, j0, cp9b, (1.-p_thresh), 1);
1051 
1052   free(mass_m);
1053   free(mass_i);
1054   free(mass_d);
1055   free(nset_m);
1056   free(nset_i);
1057   free(nset_d);
1058   free(xset_m);
1059   free(xset_i);
1060   free(xset_d);
1061   free(kthresh_m);
1062   free(kthresh_i);
1063   free(kthresh_d);
1064 
1065   return eslOK;
1066 
1067  ERROR:
1068   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
1069 }
1070 
1071 /* Function: cp9_Posterior()
1072  * based on Ian Holmes' hmmer/src/postprob.c::P7EmitterPosterior()
1073  *
1074  * Purpose:  Combines Forward and Backward matrices into a posterior
1075  *           probability matrix. For emitters (match and inserts) the
1076  *           entries in row i of this matrix are the logs of the posterior
1077  *           probabilities of each state emitting symbol i of the sequence.
1078  *           For non-emitters the entries in row i of this matrix are the
1079  *           logs of the posterior probabilities of each state being 'visited'
1080  *           when the last emitted residue in the parse was symbol i of the
1081  *           sequence.
1082  *           The last point distinguishes this function from P7EmitterPosterior()
1083  *           which set all posterior values for for non-emitting states to -INFTY.
1084  *           The caller must allocate space for the matrix, although the
1085  *           backward matrix can be used instead (overwriting it will not
1086  *           compromise the algorithm).
1087  *
1088  *           if(did_fwd_scan == TRUE) forward was run in scan mode, which allowed
1089  *           parses to start at any position of sequence, this changes how
1090  *           we calculate summed prob of all parses (calculation of 'sc', see code).
1091  *
1092  * Args:     dsq      - sequence in digitized form
1093  *           i0       - start of target subsequence (often 1, beginning of dsq)
1094  *           j0       - end of target subsequence (often L, end of dsq)
1095  *           hmm      - the model
1096  *           forward  - pre-calculated forward matrix
1097  *           backward - pre-calculated backward matrix
1098  *           mx       - pre-allocated dynamic programming matrix
1099  *           did_fwd_scan - TRUE if Forward was run in 'scan' mode, which means
1100  *                          parses can start at any position of the sequence
1101  *
1102  * Return:   void
1103  */
1104 void
cp9_Posterior(ESL_DSQ * dsq,int i0,int j0,CP9_t * hmm,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * mx,int did_fwd_scan)1105 cp9_Posterior(ESL_DSQ *dsq, int i0, int j0, CP9_t *hmm, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *mx, int did_fwd_scan)
1106 {
1107   if(dsq == NULL) cm_Fail("in cp9_posterior(), dsq is NULL.");
1108 
1109   int i;
1110   int k;
1111   int sc;
1112   int L;		/* subsequence length */
1113   int ip;		/* i': relative position in the subsequence  */
1114   /*float temp_sc;*/
1115 
1116   L  = j0-i0+1;		/* the length of the subsequence */
1117 
1118   if(did_fwd_scan) { /* parses could start/stop anywhere */
1119     sc = -INFTY;
1120     for (ip = 0; ip <= L; ip++) {
1121       /*printf("bmx->mmx[i:%d][0]: %d\n", i, bmx->mmx[ip][0]);*/
1122       sc = ILogsum(sc, (bmx->mmx[ip][0]));
1123     }
1124   } /* parses must start/stop at (i = i0)/(j = j0) */
1125   else sc = bmx->mmx[0][0];
1126 
1127   /* note boundary conditions, case by case by case... */
1128   mx->mmx[0][0] = fmx->mmx[0][0] + bmx->mmx[0][0] - sc; /* fmx->mmx[0][0] is 0, bmx->mmx[1][0] is overall score */
1129   mx->imx[0][0] = -INFTY; /*need seq to get here*/
1130   mx->dmx[0][0] = -INFTY; /*D_0 does not exist*/
1131   for (k = 1; k <= hmm->M; k++) {
1132       mx->mmx[0][k] = -INFTY; /*need seq to get here*/
1133       mx->imx[0][k] = -INFTY; /*need seq to get here*/
1134       mx->dmx[0][k] = fmx->dmx[0][k] + bmx->dmx[0][k] - sc;
1135   }
1136 
1137   for (ip = 1; ip <= L; ip++) /* ip is the relative position in the seq */
1138     {
1139       i = i0+ip-1;		/* e.g. i is actual index in dsq, runs from i0 to j0 */
1140       mx->mmx[ip][0] = -INFTY; /*M_0 does not emit*/
1141       mx->imx[ip][0] = fmx->imx[ip][0] + bmx->imx[ip][0] - hmm->isc[dsq[i]][0] - sc;
1142       /*hmm->isc[dsq[i]][0] will have been counted in both fmx->imx and bmx->imx*/
1143       mx->dmx[ip][0] = -INFTY; /*D_0 does not exist*/
1144 
1145       /*printf("fmx->mmx[ip:%d][0]: %d\n bmx->mmx[ip:%d][0]: %d\n", ip, fmx->mmx[ip][0], ip, bmx->mmx[ip][0]);
1146 	printf("fmx->imx[ip:%d][0]: %d\n bmx->imx[ip:%d][0]: %d\n", ip, fmx->imx[ip][0], ip, bmx->imx[ip][0]);
1147 	printf("fmx->dmx[ip:%d][0]: %d\n bmx->dmx[ip:%d][0]: %d\n", ip, fmx->dmx[ip][0], ip, bmx->dmx[ip][0]);*/
1148       for (k = 1; k <= hmm->M; k++)
1149 	{
1150 	  mx->mmx[ip][k] = ESL_MAX(fmx->mmx[ip][k] + bmx->mmx[ip][k] - hmm->msc[dsq[i]][k] - sc, -INFTY);
1151 	  /*hmm->msc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
1152 	  mx->imx[ip][k] = ESL_MAX(fmx->imx[ip][k] + bmx->imx[ip][k] - hmm->isc[dsq[i]][k] - sc, -INFTY);
1153 	  /*hmm->isc[dsq[i]][k] will have been counted in both fmx->imx and bmx->imx*/
1154 	  mx->dmx[ip][k] = ESL_MAX(fmx->dmx[ip][k] + bmx->dmx[ip][k] - sc, -INFTY);
1155 	  /*printf("fmx->mmx[ip:%d][%d]: %d\n bmx->mmx[ip:%d][%d]: %d\n", ip, k, fmx->mmx[ip][k], ip, k, bmx->mmx[ip][k]);
1156 	  printf("fmx->imx[ip:%d][%d]: %d\n bmx->imx[ip:%d][%d]: %d\n", ip, k, fmx->imx[ip][k], ip, k, bmx->imx[ip][k]);
1157 	  printf("fmx->dmx[ip:%d][%d]: %d\n bmx->dmx[ip:%d][%d]: %d\n\n", ip, k, fmx->dmx[ip][k], ip, k, bmx->dmx[ip][k]);*/
1158 	}
1159     }
1160 
1161   /*
1162     float temp_sc;
1163     for(i = 0; i <= L; i++)
1164     {
1165     for(k = 0; k <= hmm->M; k++)
1166     {
1167     temp_sc = Score2Prob(mx->mmx[i][k], 1.);
1168     if(temp_sc > .0001)
1169     printf("mx->mmx[%3d][%3d]: %9d | %8f\n", i, k, mx->mmx[i][k], temp_sc);
1170     temp_sc = Score2Prob(mx->imx[i][k], 1.);
1171     if(temp_sc > .0001)
1172     printf("mx->imx[%3d][%3d]: %9d | %8f\n", i, k, mx->imx[i][k], temp_sc);
1173     temp_sc = Score2Prob(mx->dmx[i][k], 1.);
1174     if(temp_sc > .0001)
1175     printf("mx->dmx[%3d][%3d]: %9d | %8f\n", i, k, mx->dmx[i][k], temp_sc);
1176     }
1177     }*/
1178 }
1179 
1180 /*****************************************************************************
1181  * EPN 03.23.06
1182  * Function: cp9_IFillPostSums()
1183  * based on: ifill_post_sums_del() (deprecated) 11.23.05
1184  *
1185  * Purpose:  Given a posterior matrix post, where post->mmx[i][k]
1186  *           is the log odds score of the probability that
1187  *           match state k emitted position i of the sequence,
1188  *           sum the log probabilities that each state emitted
1189  *           each position. Do this for inserts, matches, and
1190  *           and deletes.
1191  *
1192  * arguments:
1193  * cp9_dpmatrix_s *post  dpmatrix_s posterior matrix, xmx, mmx, imx, dmx
1194  *                       2D int arrays. [0.1..N][0.1..M]
1195  * CP9Bands_t *cp9b - the cp9 bands data structure
1196  * int  i0          start of target subsequence (often 1, beginning of dsq)
1197  * int  j0          end of target subsequence (often L, end of dsq)
1198  *****************************************************************************/
1199 void
cp9_IFillPostSums(CP9_MX * post,CP9Bands_t * cp9b,int i0,int j0)1200 cp9_IFillPostSums(CP9_MX *post, CP9Bands_t *cp9b, int i0, int j0)
1201 {
1202   int i;            /* counter over positions of the sequence */
1203   int k;            /* counter over nodes of the model */
1204   int L;	    /* subsequence length */
1205   int M;            /* consensus length of cp9 */
1206   M = cp9b->hmm_M;
1207   L  = j0-i0+1;		/* the length of the subsequence */
1208 
1209   /* step through each node, fill the post sum structures */
1210   for(k = 0; k <= M; k++)
1211     {
1212       cp9b->isum_pn_m[k] = -INFTY;
1213       cp9b->isum_pn_i[k] = -INFTY;
1214       cp9b->isum_pn_d[k] = -INFTY;
1215       for(i = 0; i <= L; i++) {
1216 	cp9b->isum_pn_m[k] = ILogsum(cp9b->isum_pn_m[k], post->mmx[i][k]);
1217 	cp9b->isum_pn_i[k] = ILogsum(cp9b->isum_pn_i[k], post->imx[i][k]);
1218 	cp9b->isum_pn_d[k] = ILogsum(cp9b->isum_pn_d[k], post->dmx[i][k]);
1219       }
1220     }
1221 }
1222 
1223 /* Function: cp9_ValidateBands()
1224  * Incept:   EPN, Wed Nov 14 15:49:08 2007
1225  * Purpose:  Validate the info in CP9Bands_t data structure is internally
1226  *           consistent.
1227  *
1228  * Args:     cm     the cm
1229  *           errbuf char buffer for error message
1230  *           cp9b   the CP9 bands object
1231  *           i0     first residue we can possibly allow as valid j
1232  *           j0     final residue we can possibly allow as valid j
1233  *
1234  * Returns: eslOK, or, if error, other status code and filled errbuf
1235  */
1236 int
cp9_ValidateBands(CM_t * cm,char * errbuf,CP9Bands_t * cp9b,int i0,int j0,int do_trunc)1237 cp9_ValidateBands(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int i0, int j0, int do_trunc)
1238 {
1239   int v;            /* counter over states of the CM */
1240   int jp;           /* counter over valid j's, but offset. jp+jmin[v] = actual j */
1241   int sd;           /* minimum d allowed for a state, ex: MP_st = 2, ML_st = 1. etc. */
1242   int max_sdl_sdr;  /* maximum of StateLeftDelta, StateRightDelta for a state */
1243   int dn;           /* max_sdl_sdr if do_trunc, else sd */
1244   int hd_needed;
1245   int j;
1246 
1247 
1248   if(cm->M    != cp9b->cm_M)  ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cm->M != cp9b->cm_M\n");
1249   if(cm->clen != cp9b->hmm_M) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cm->clen != cp9b->hmm_M\n");
1250 
1251   hd_needed = 0;
1252   for(v = 0; v < cp9b->cm_M; v++) {
1253     hd_needed += cp9b->jmax[v] - cp9b->jmin[v] + 1;
1254   }
1255   if(hd_needed != cp9b->hd_needed) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hd_needed inconsistent.");
1256 
1257   for(v = 0; v < cm->M; v++) {
1258     assert((cp9b->imin[v] == -1 && cp9b->imax[v] == -2) || (cp9b->imin[v] >= 0 && cp9b->imax[v] >= 0));
1259     assert((cp9b->jmin[v] == -1 && cp9b->jmax[v] == -2) || (cp9b->jmin[v] >= 0 && cp9b->jmax[v] >= 0));
1260   }
1261 
1262   for(v = 0; v < cm->M; v++) {
1263     sd          = StateDelta(cm->sttype[v]);
1264     max_sdl_sdr = ESL_MAX(StateLeftDelta(cm->sttype[v]), StateRightDelta(cm->sttype[v]));
1265     dn          = do_trunc ? max_sdl_sdr : sd;
1266     /* if (do_trunc) d can be 1 for MP states, this is why we use dn
1267      * here.  Note: d can't be 0 for ML/IL in R mode, MR/IR in L
1268      * mode even though you might think it could be. We'll always do
1269      * a truncated begin with d=1 for L,R marginal alignments. */
1270     if(cm->sttype[v] == E_st) {
1271       for(jp = 0; jp <= (cp9b->jmax[v]-cp9b->jmin[v]); jp++) {
1272 	if(cp9b->hdmin[v][jp] != 0) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin for E state is inconsistent.");
1273 	if(cp9b->hdmax[v][jp] != 0) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin for E state is inconsistent.");
1274       }
1275     }
1276     else {
1277       if(cp9b->jmin[v] != -1) {
1278 	for(jp = 0; jp <= (cp9b->jmax[v]-cp9b->jmin[v]); jp++) {
1279 	  j = jp+cp9b->jmin[v];
1280           if(cp9b->hdmin[v][jp] == -1) {
1281             if(cp9b->hdmax[v][jp] != -2) { ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin is -1 for state %d, j: %d, but hdmax is not -2 (it's %d).",  v, j, cp9b->hdmax[v][jp]); }
1282           }
1283           else {
1284             if(cp9b->hdmin[v][jp] != ESL_MAX((j - cp9b->imax[v] + 1), dn)) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin %d (dn: %d) for state %d, j: %d imax[v]: %d is inconsistent.", cp9b->hdmin[v][jp], dn, v, j, cp9b->imax[v]);
1285             if(cp9b->hdmax[v][jp] != ESL_MAX((j - cp9b->imin[v] + 1), dn)) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmax %d (dn: %d) for state %d, j: %d imin[v]: %d is inconsistent.", cp9b->hdmax[v][jp], dn, v, j, cp9b->imin[v]);
1286           }
1287         }
1288       }
1289     }
1290     /* get rid of StateIsDetached once old band construction method is deprecated */
1291     if(cp9b->imin[v] == -1 && !StateIsDetached(cm, v)) { /* ensure all unreachable states have 0 width bands */
1292       if(cp9b->imax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] == -1, but imax[v] != -2 but rather %d\n", v, cp9b->imax[v]);
1293       if(cp9b->jmin[v] != -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] == -1, but jmin[v] != -1 but rather %d\n", v, cp9b->jmin[v]);
1294       if(cp9b->jmax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] == -1, but jmax[v] != -2 but rather %d\n", v, cp9b->jmax[v]);
1295     }
1296     else if(!StateIsDetached(cm, v)){
1297       if(cp9b->imax[v] == -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] != -1, but imax[v] == -2!\n", v);
1298       if(cp9b->jmin[v] == -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] != -1, but jmin[v] == -1!\n", v);
1299       if(cp9b->jmax[v] == -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] != -1, but jmax[v] == -2!\n", v);
1300     }
1301 
1302     if(i0 == j0 && cm->sttype[v] == MP_st) { /* special case, MPs are impossible in this case */
1303       if(cp9b->imin[v] != -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but imin[v]: %d != -1\n", i0, v, cp9b->imin[v]);
1304       if(cp9b->imax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but imax[v]: %d != -2\n", i0, v, cp9b->imax[v]);
1305       if(cp9b->jmin[v] != -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but jmin[v]: %d != -1\n", i0, v, cp9b->jmin[v]);
1306       if(cp9b->jmax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but jmax[v]: %d != -2\n", i0, v, cp9b->jmax[v]);
1307     }
1308     else {
1309       if(cp9b->jmin[v] != -1) {
1310 	for(j = cp9b->jmin[v]; j <= cp9b->jmax[v]; j++) {
1311 	  if(j < (i0-1)) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), j: %d outside i0-1:%d..j0:%d is within v's j band: jmin[%d]: %d jmax[%d]: %d\n", j, i0-1, j0, v, cp9b->jmin[v], v, cp9b->jmax[v]);
1312 	  if(j > j0)     ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), j: %d outside i0-1:%d..j0:%d is within v's j band: jmin[%d]: %d jmax[%d]: %d\n", j, i0-1, j0, v, cp9b->jmin[v], v, cp9b->jmax[v]);
1313           if(cp9b->hdmin[v][(j-cp9b->jmin[v])] == -1) {
1314             if(cp9b->hdmax[v][(j-cp9b->jmin[v])] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d j: %d hdmin[v][jp_v:%d] == -1, but hdmax[v][jp_v:%d] != -2 (it's %d)\n", v, j, (j-cp9b->jmin[v]), (j-cp9b->jmin[v]), cp9b->hdmax[v][(j-cp9b->jmin[v])]);
1315           }
1316           else {
1317             if(cp9b->hdmin[v][(j-cp9b->jmin[v])] < dn) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d j: %d hdmin[v][jp_v:%d] : %d less than StateDelta for v: %d\n", v, j, (j-cp9b->jmin[v]), cp9b->hdmin[v][(j-cp9b->jmin[v])], dn);
1318             if(cp9b->hdmax[v][(j-cp9b->jmin[v])] < dn) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d j: %d hdmax[v][jp_v:%d] : %d less than StateDelta for v: %d\n", v, j, (j-cp9b->jmin[v]), cp9b->hdmax[v][(j-cp9b->jmin[v])], dn);
1319           }
1320         }
1321 	if(cp9b->jmax[v] > cp9b->jmax[0]) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), jmax[v:%d]:%d > jmax[0]:%d.", v, cp9b->jmax[v], cp9b->jmax[0]);
1322 	if(cp9b->imin[v] < cp9b->imin[0]) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), imin[v:%d]:%d < imin[0]:%d, i0:%d j0:%d jmin[v]:%d jmax[v]:%d jmin[0]:%d jmax[0]:%d imax[v]:%d", v, cp9b->imin[v], cp9b->imin[0], i0, j0, cp9b->jmin[v], cp9b->jmax[v], cp9b->jmin[0], cp9b->jmax[0], cp9b->imax[v]);
1323       }
1324     }
1325   }
1326   return eslOK;
1327 }
1328 
1329 /*
1330  * Function: cp9_GrowHDBands()
1331  *
1332  * Incept:   EPN, Thu Oct 25 13:24:29 2007
1333  * Purpose:  Rearrange CP9 hdmin and hdmax pointers for a new sequence
1334  *           based on j bands (jmin and jmax). If the currently allocated
1335  *           size for hdmin, hdmax is not big enough, reallocate them.
1336  *
1337  * Args:
1338  * CP9Bands_t cp9b    the CP9 Bands object.
1339  * errbuf   char buffer for error messages
1340  *
1341  * Returns: eslOK on success, eslEMEM if memory allocation error
1342  */
1343 int
cp9_GrowHDBands(CP9Bands_t * cp9b,char * errbuf)1344 cp9_GrowHDBands(CP9Bands_t *cp9b, char *errbuf)
1345 {
1346   int status;
1347   int v;
1348   int cur_size = 0;
1349   int jbw;
1350 
1351   /* count size we need for hdmin/hdmax given current jmin, jmax */
1352   cp9b->hd_needed = 0; /* we'll rewrite this */
1353   for(v = 0; v < cp9b->cm_M; v++) {
1354     cp9b->hd_needed += cp9b->jmax[v] - cp9b->jmin[v] + 1;
1355     /* printf("hd needed v: %4d bw: %4d total: %5d\n", v, cp9b->jmax[v] - cp9b->jmin[v] + 1, cp9b->hd_needed);  */
1356   }
1357   if(cp9b->hd_alloced < cp9b->hd_needed) {
1358     void *tmp;
1359     if(cp9b->hdmin_mem == NULL) ESL_ALLOC(cp9b->hdmin_mem, sizeof(int) * cp9b->hd_needed);
1360     else                        ESL_RALLOC(cp9b->hdmin_mem, tmp, sizeof(int) * cp9b->hd_needed);
1361     if(cp9b->hdmax_mem == NULL) ESL_ALLOC(cp9b->hdmax_mem, sizeof(int) * cp9b->hd_needed);
1362     else                        ESL_RALLOC(cp9b->hdmax_mem, tmp, sizeof(int) * cp9b->hd_needed);
1363   }
1364 
1365   /* set pointers */
1366   cur_size = 0;
1367   for(v = 0; v < cp9b->cm_M; v++) {
1368     cp9b->hdmin[v] = cp9b->hdmin_mem + cur_size;
1369     cp9b->hdmax[v] = cp9b->hdmax_mem + cur_size;
1370     jbw = cp9b->jmax[v] - cp9b->jmin[v] + 1;
1371     assert(jbw >= 0);
1372     ESL_DASSERT1((jbw >= 0));
1373     cur_size += jbw;
1374   }
1375   cp9b->hd_alloced = cur_size;
1376   ESL_DASSERT1((cp9b->hd_alloced == cp9b->hd_needed));
1377   return eslOK;
1378 
1379  ERROR:
1380   ESL_FAIL(status, errbuf, "Memory allocation error.");
1381 }
1382 
1383 
1384 /*****************************************************************************
1385  * EPN 11.03.05
1386  * Function: ij2d_bands()
1387  *
1388  * Purpose:  Determine the band for each cm state v on d (the band on the
1389  *           length of the subsequence emitted from the subtree rooted
1390  *           at state v). These are easily calculated given the bands on i
1391  *           and j.
1392  *
1393  * arguments:
1394  *
1395  * CM_t *cm         the CM
1396  * int  W           length of sequence we're aligning
1397  * int *imin        imin[v] = first position in band on i for state v
1398  * int *imax        imax[v] = last position in band on i for state v
1399  * int *jmin        jmin[v] = first position in band on j for state v
1400  * int *jmax        jmax[v] = last position in band on j for state v
1401  * int **hdmin      hdmin[v][jp] = first position in band on d for state v
1402  *                                 and j position: j = jp+jmin[v].
1403  *                  Filled in this function.
1404  * int **hdmax      hdmax[v][jp] = last position in band on d for state v
1405  *                                 and j position: j = jp+jmin[v].
1406  *                  Filled in this function.
1407  * int do_trunc     TRUE if we'll use these bands in a truncated version of CYK/Inside/Outside
1408  * int debug_level  [0..3] tells the function what level of debugging print
1409  *                  statements to print.
1410  *****************************************************************************/
1411 void
ij2d_bands(CM_t * cm,int W,int * imin,int * imax,int * jmin,int * jmax,int ** hdmin,int ** hdmax,int do_trunc,int debug_level)1412 ij2d_bands(CM_t *cm, int W, int *imin, int *imax, int *jmin, int *jmax,
1413 	   int **hdmin, int **hdmax, int do_trunc, int debug_level)
1414 {
1415   int v;            /* counter over states of the CM */
1416   int jp;           /* counter over valid j's, but offset. jp+jmin[v] = actual j */
1417   int j;            /* actual j */
1418   int sd;           /* minimum d allowed for a state, ex: MP_st = 2, ML_st = 1. etc. */
1419   int max_sdl_sdr;  /* maximum of StateLeftDelta, StateRightDelta for a state */
1420   int dn;           /* max_sdl_sdr if do_trunc, else sd */
1421   int hdn, hdx;     /* temporary hdmin/hdmax */
1422   for(v = 0; v < cm->M; v++) {
1423     if(cm->sttype[v] == E_st) {
1424       for(jp = 0; jp <= (jmax[v]-jmin[v]); jp++) {
1425 	hdmin[v][jp] = 0;
1426 	hdmax[v][jp] = 0;
1427       }
1428     }
1429     else {
1430       sd          = StateDelta(cm->sttype[v]);
1431       max_sdl_sdr = ESL_MAX(StateLeftDelta(cm->sttype[v]), StateRightDelta(cm->sttype[v]));
1432       dn          = do_trunc ? max_sdl_sdr : sd;
1433       /* if (do_trunc) d can be 1 for MP states, this is why we use dn
1434        * here.  Note: d can't be 0 for ML/IL in R mode, MR/IR in L
1435        * mode even though you might think it could be. We'll always do
1436        * a truncated begin with d=1 for L,R marginal alignments. */
1437 
1438       for(jp = 0; jp <= (jmax[v]-jmin[v]); jp++) {
1439 	j   = jp+jmin[v];
1440 	hdn = j-imax[v]+1;
1441 	hdx = j-imin[v]+1;
1442 	if(hdx < dn) {
1443 	  hdmin[v][jp] = -1;
1444 	  hdmax[v][jp] = -2;
1445 	}
1446 	else {
1447 	  hdmin[v][jp] = ESL_MAX(hdn, dn);
1448 	  hdmax[v][jp] = hdx;
1449 	}
1450 	/* printf("hd[%d][j=%d]: min: %d | max: %d\n", v, (jp+jmin[v]), hdmin[v][jp], hdmax[v][jp]); */
1451       }
1452     }
1453   }
1454 }
1455 
1456 /* Function: cp9_HMM2ijBands()
1457  * Synopsis: Derive bands on i and j for all CM states given HMM bands.
1458  * Incept:   EPN, Thu Feb  7 12:05:01 2008
1459  *
1460  * Purpose:  Given HMM bands, determine the corresponding bands on the
1461  *           CM. Both for i: the left border of the subsequence emitted
1462  *           from the subtree rooted at v, the band is imin[v]..imax[v]
1463  *           inclusive. And also for j: the right border of the subseq
1464  *           emitted from the subtree rooted at v, the band is
1465  *           jmin[v]..jmax[v] inclusive.
1466  *
1467  *           This is done by first enforcing that the HMM bands allow
1468  *           at least 1 possible HMM parse. A valid parse given the
1469  *           HMM bands is not guaranteed, although it's nearly always
1470  *           likely even for relatively high values of tau (the
1471  *           probability mass allowed outside the band for each state,
1472  *           relatively high is 0.01). With very tight bands, for
1473  *           example from a tau of 0.49, the chance that all parses
1474  *           are impossible given the bands is much more likely (especially
1475  *           with non-homologous sequences). *If* the HMM bands exclude
1476  *           all possible HMM parses, they are expanded in a greedy,
1477  *           stupid way to allow at least 1 parse (we could be smarter,
1478  *           but this case only arises for impractical tau values, in
1479  *           fact I only implemented it to verify the rest of the HMM
1480  *           banding implementation is robust, and will always work
1481  *           for tau values up to 0.5).
1482  *
1483  *           Once we know an HMM parse is possible given the HMM bands,
1484  *           we also know if we impose those exact bands on the CM
1485  *           we will also have a valid CM parse, b/c there is a 1:1
1486  *           mapping between HMM parses and CM parsetrees. So, we
1487  *           impose the HMM bands onto the CM to get the i and j
1488  *           bands using a stack and mapping 'explicit' bands,
1489  *           the i or j bands of CM states that map to an HMM
1490  *            state (for example the i band of MATL_ML states,
1491  *            or the j bands of MATR_MR states). The other bands
1492  *           that are not explicitly set (ex: the j band of a
1493  *           MATL_ML state and the i band of a MATR_MR state), are
1494  *           implicitly set based on the explicit ones.
1495  *
1496  *           Note: This code is ugly, even more than usual for me.
1497  *           There's a plethora of special cases, which are maddening
1498  *           during development/debugging. The code starts out simple
1499  *           and balloons as you add code to handle the special cases.
1500  *           [EPN, Thu Feb  7 12:17:53 2008].
1501  *
1502  * Args:     <cm>     - the model
1503  *           <errbuf> - for returning error messages
1504  *           <cp9>    - the CP9 HMM used to determine the bands
1505  *           <cp9b>   - the bands data structure
1506  *           <cp9map> - map between the CM and HMM
1507  *           <i0>     - first position in the sequence we're considering
1508  *           <j0>     - final position in the sequence we're considering
1509  *           <doing_search> - TRUE if we're searching the target sequence, not aligning it,
1510  *                            relevant b/c iff we're aligning the parsetree *must* span i0..j0
1511  *           <do_trunc>     - TRUE if we're going to use these bands for truncated CYK/Inside/Outside
1512  *           <debug_level>  - verbosity level for debuggint printf() statements
1513  *
1514  * Returns:  <eslOK> on success.
1515  *
1516  * Throws:   <eslEINCOMPAT> on contract violation
1517  *           <eslEMEM> on memory error
1518  */
1519 int
cp9_HMM2ijBands(CM_t * cm,char * errbuf,CP9_t * cp9,CP9Bands_t * cp9b,CP9Map_t * cp9map,int i0,int j0,int doing_search,int do_trunc,int debug_level)1520 cp9_HMM2ijBands(CM_t *cm, char *errbuf, CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int do_trunc, int debug_level)
1521 {
1522 
1523   int status;
1524   int v;
1525 
1526   /* ptrs to cp9b data, for convenience */
1527   int *imin;          /* imin[v] = first position in band on i for state v to be filled in this function. [1..M] */
1528   int *imax;          /* imax[v] = last position in band on i for state v to be filled in this function. [1..M] */
1529   int *jmin;          /* jmin[v] = first position in band on j for state v to be filled in this function. [1..M] */
1530   int *jmax;          /* jmax[v] = last position in band on j for state v to be filled in this function. [1..M] */
1531 
1532   int nd;                  /* counter over CM nodes. */
1533   int y;                   /* counters over children states */
1534   int hmm_M;               /* number of nodes in the HMM */
1535   ESL_STACK   *nd_pda;     /* used to traverse the CM from left to right in consensus positions, cpos = 0..clen */
1536   ESL_STACK   *lpos_pda;   /* used to store lpos for BIF nodes */
1537   int          on_right;   /* TRUE if we're on the right for current node during our CM traversal */
1538   int          w;          /* a state index */
1539   int          lpos, rpos; /* left/right border of subtree for current node */
1540   /*int          k;*/          /* counter of HMM nodes, for debugging print statements, currently not used */
1541   int hmm_is_localized;      /* TRUE if HMM has local begins, ends or ELs on */
1542   int cm_is_fully_localized; /* TRUE if CM has local begins and ends on */
1543 
1544   /* r_* arrays, these are filled in HMMBandsEnforceValidParse(), they are the band on 'reachable'
1545    * residues for each HMM state as we move from left to right through the HMM.
1546    * For example, r_mn[k] = 3, r_mx[k] = 5, means that for all possible HMM parses within the bands
1547    * in the cp9b pn_* arrays that reach the match state of node k, the residue emitted by that match
1548    * must be either 3, 4, or 5.
1549    */
1550   int *r_mn;   /* [0..k..hmm_M] minimal residue position for which we can reach M_k (match state of node k) */
1551   int *r_mx;   /* [0..k..hmm_M] maximal residue position for which we can reach M_k */
1552   int *r_in;   /* [0..k..hmm_M] minimal residue position for which we can reach I_k (insert state of node k) */
1553   int *r_ix;   /* [0..k..hmm_M] maximal residue position for which we can reach I_k */
1554   int *r_dn;   /* [0..k..hmm_M] minimal residue position for which we can reach D_k (delete state of node k) */
1555   int *r_dx;   /* [0..k..hmm_M] maximal residue position for which we can reach D_k */
1556   int *r_nn_i; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1557   int *r_nx_i; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1558   int *r_nn_j; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1559   int *r_nx_j; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1560   /* r_nn_i and r_nx_i are used when setting i bands, and r_nn_j and r_nx_j are used when setting j bands .
1561    * the values can differ vecause of an off-by-one issue with the non-emitting (delete and M_0) states of the HMM:
1562    * pn_min_d[k] = i, means posn i was last residue emitted prior to entering node k's delete state. However, for a CM,
1563    * if a delete states sub-parsetree is bounded by i' and j', this means positions i' and j' HAVE YET TO BE EMITTED.
1564    * For i states this means we have to add 1 to the delete band positions, but for j states we do not, the off-by-one
1565    * is taken care of because the HMM is moving left to right, while j positions move right to left (confusing as hell,
1566    * bad explanation, i know... write out an example, its the only way to get it).
1567    */
1568 
1569   /* Contract checks */
1570   if (cp9b == NULL)                                                                   ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands(), cp9b is NULL.\n");
1571   if(i0 < 1) ESL_FAIL(eslEINCOMPAT,  errbuf, "cp9_HMM2ijBands(), i0 < 1: %d\n", i0);
1572   if(j0 < 1) ESL_FAIL(eslEINCOMPAT,  errbuf, "cp9_HMM2ijBands(), j0 < 1: %d\n", j0);
1573   if(j0 < i0) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands(), i0 (%d) < j0 (%d)\n", i0, j0);
1574   hmm_is_localized      = ((cp9->flags & CPLAN9_LOCAL_BEGIN) || (cp9->flags & CPLAN9_LOCAL_END) || (cp9->flags & CPLAN9_EL)) ? TRUE : FALSE;
1575   cm_is_fully_localized = ((cm->flags & CMH_LOCAL_BEGIN) && (cm->flags & CMH_LOCAL_END)) ? TRUE : FALSE;
1576 
1577   /* ptrs to cp9b arrays, for convenience */
1578   imin     = cp9b->imin;
1579   imax     = cp9b->imax;
1580   jmin     = cp9b->jmin;
1581   jmax     = cp9b->jmax;
1582   hmm_M    = cp9b->hmm_M;
1583   /* Initialize all bands to -1 */
1584   esl_vec_ISet(imin, cm->M, -1);
1585   esl_vec_ISet(imax, cm->M, -2);
1586   esl_vec_ISet(jmin, cm->M, -1);
1587   esl_vec_ISet(jmax, cm->M, -2);
1588 
1589   /* Step 1: Check for valid HMM parse within the HMM bands, if there isn't one messily expand the bands so that there is one */
1590   if((status = HMMBandsEnforceValidParse(cp9, cp9b, cp9map, errbuf, i0, j0, doing_search, NULL,
1591 					 &r_mn, &r_mx, &r_in, &r_ix, &r_dn, &r_dx, &r_nn_i, &r_nx_i, &r_nn_j, &r_nx_j)) != eslOK) return status;
1592 
1593   /* debugging printf block */
1594   /*
1595     for(k = 0; k <= cp9b->hmm_M;k ++) {
1596     printf("k: %4d  %4d %4d  %4d %4d  %4d %4d  %4d %4d  %4d  %4d\n", k, r_mn[k], r_mx[k], r_in[k], r_ix[k], r_dn[k], r_dx[k], r_nn_i[k], r_nx_i[k], r_nn_j[k], r_nx_j[k]);
1597     }
1598     cp9_DebugPrintHMMBands(stdout, j0, cp9b, cm->tau, 1);
1599   */
1600 
1601   /* Step 2: Traverse the CM from left to right in consensus position coordinates. Fill in the
1602    *         i and j bands (imin, imax, jmin, jmax) for all states as we go. The CM is traversed
1603    *         using a stack, each node is visited twice (this is based on Sean's cleaner:
1604    *         display.c::CreateEmitMap(). The first time a node <nd> is visited we're 'on the left'
1605    *         and then we push it back to the stack, and visit it again 'on the right' later. We
1606    *         are moving around the perimeter of the guide tree, stepping one position at a time
1607    *         in the consensus sequence coordinates, from left to right. We mainly set bands
1608    *         when we're 'on the right', with the exception of Left emitting states, which are
1609    *         set when we're on the HMM. All emitting states and delete states v have either
1610    *         i, or j or both bands that can be set 'explicitly' based on the HMM bands for
1611    *         the HMM state that maps to v. For example we can set the i bands for MATL_ML
1612    *         states, and the j bands for MATR_MR states. All other bands (and both i
1613    *         and j bands for S states, B states, E states) are set 'implicitly based on the
1614    *         explicit bands, and the r_* data structures we filled in HMMBandsEnforceValidParse().
1615    *         The goal was to make this function as clean and simple as possible, and although
1616    *         it doesn't look it, this is as good as I can get it. There are many special
1617    *         cases that make an elegant implementation beyond me.
1618    */
1619   if(! doing_search) {
1620     assert(r_mn[0] == (i0-1));
1621     if(!hmm_is_localized) assert(r_mx[hmm_M] == j0 || r_ix[hmm_M] == j0 || r_dx[hmm_M] == j0);
1622   }
1623   nd   = 0;
1624   lpos = 0;
1625   rpos = 0;
1626 
1627   if ((nd_pda    = esl_stack_ICreate())      == NULL)  goto ERROR;
1628   if ((lpos_pda  = esl_stack_ICreate())      == NULL)  goto ERROR;
1629   if ((status = esl_stack_IPush(nd_pda, 0))  != eslOK) goto ERROR;		/* 0 = left side. 1 would = right side. */
1630   if ((status = esl_stack_IPush(nd_pda, nd)) != eslOK) goto ERROR;
1631   while (esl_stack_IPop(nd_pda, &nd) != eslEOD)
1632     {
1633       esl_stack_IPop(nd_pda, &on_right);
1634       if (on_right) {
1635 	switch(cm->ndtype[nd]) { /* this is a massive switch, we set i and j bands for almost all
1636 				  * states here when we're on the right (sole exceptions are i bands for
1637 				  * MATP_nd states (except MATP_IR), and MATL_nd states) */
1638 
1639 	case BIF_nd: /* special case, set i bands based on left child, j bands based on right child */
1640 	  v = cm->nodemap[nd];
1641 	  w = cm->cfirst[v]; /* BEGL_S */
1642 	  y = cm->cnum[v];   /* BEGR_S */
1643 
1644 	  /* set v's i band based on left child w, and v's j band based on right child y */
1645 	  imin[v] = (imin[w] != -1) ? imin[w] : imin[y]; /* if imin[w] == imin[y] == -1, then imin[v] will be set as -1 */
1646 	  imax[v] = (imax[w] != -2) ? imax[w] : imax[y]; /* if imax[w] == imax[y] == -2, then imax[v] will be set as -2 */
1647 	  jmin[v] = (jmin[y] != -1) ? jmin[y] : jmin[w]; /* if jmin[y] == jmin[w] == -1, then jmin[v] will be set as -1 */
1648 	  jmax[v] = (jmax[y] != -2) ? jmax[y] : jmax[w]; /* if jmax[y] == jmax[w] == -2, then jmax[v] will be set as -2 */
1649 
1650 	  if(! do_trunc) {
1651 	    /* check for possibility that either child is not reachable, will only possibly happen with local on */
1652 	    if(imin[w] == -1 || jmin[w] == -1 || imin[y] == -1 || jmin[y] == -1 ||
1653 	       imax[w] == -2 || imax[w] == -2 || jmax[y] == -2 || jmax[y] == -2) {
1654 	      /* either the left child, or right child is not reachable, make them both unreachable as well as the BIF state */
1655 	      imin[v] = imin[w] = imin[y] = jmin[v] = jmin[w] = jmin[y] = -1;
1656 	      imax[v] = imax[w] = imax[y] = jmax[v] = jmax[w] = jmax[y] = -2;
1657 	      /* also make the BEGR_IL unreachable */
1658 	      imin[y+1] = jmin[y+1] = -1;
1659 	      imax[y+1] = jmax[y+1] = -2;
1660 	    }
1661 	  }
1662 	  break;
1663 
1664 	case MATP_nd:
1665 	  lpos = cp9map->nd2lpos[nd];
1666 	  rpos = cp9map->nd2rpos[nd];
1667 
1668 	  v = cm->nodemap[nd]; /* v is MATP_MP */
1669 	  jmin[v] = r_mn[rpos];
1670 	  jmax[v] = r_mx[rpos];
1671 
1672 	  v++; /* v is MATP_ML */
1673 	  jmin[v] = r_dn[rpos];
1674 	  jmax[v] = r_dx[rpos];
1675 
1676 	  v++; /* v is MATP_MR */
1677 	  jmin[v] = r_mn[rpos];
1678 	  jmax[v] = r_mx[rpos];
1679 
1680 	  v++; /* v is MATP_D */
1681 	  jmin[v] = r_dn[rpos];
1682 	  jmax[v] = r_dx[rpos];
1683 
1684 	  v++; /* v is MATP_IL */
1685 	  jmin[v] = r_nn_j[rpos-1];
1686 	  jmax[v] = r_nx_j[rpos-1];
1687 
1688 	  v++; /* v is MATP_IR */
1689 	  jmin[v] = r_in[rpos-1];
1690 	  jmax[v] = r_ix[rpos-1];
1691 	  imin[v] = r_nn_i[lpos+1]; /* look at band on lpos *+1* b/c we enter MATP_IR AFTER the MATP_MP, MATP_MR, MATP_ML, or MATP_IL insert (if any) */
1692 	  imax[v] = r_nx_i[lpos+1]; /* look at band on lpos *+1* b/c we enter MATP_IR AFTER the MATP_MP, MATP_MR, MATP_ML, or MATP_IL insert (if any) */
1693 	  ESL_DASSERT1(((lpos+1) <= cm->clen)); /* note: we know lpos+1 <= cm->clen b/c we're in a MATP node, and the ccol the right half of the node maps to
1694 						 *       must be to the right of the ccol the left half of the node maps to */
1695 	  if(imin[v] == 0) { cm_Fail("v: %d lpos: %d\n", v, lpos); }
1696 	  break; /* case MATP_nd */
1697 
1698        	case MATL_nd: /* i bands were set when we were on the left, non-right emitter, set implicit j bands */
1699 	  lpos = cp9map->nd2lpos[nd];
1700 
1701 	  v = cm->nodemap[nd]; /* v is MATL_ML */
1702 	  jmin[v] = r_nn_j[rpos];
1703 	  jmax[v] = r_nx_j[rpos];
1704 
1705 	  v++; /* v is MATL_D, the MATL_ML and MATL_IL concerns don't apply, D's don't emit */
1706 	  jmin[v] = r_nn_j[rpos];
1707 	  jmax[v] = r_nx_j[rpos];
1708 
1709 	  v++; /* v is MATL_IL */
1710 	  jmin[v] = r_nn_j[rpos];
1711 	  jmax[v] = r_nx_j[rpos];
1712 	  break;
1713 
1714 	case MATR_nd: /* set j bands explicitly from HMM bands, i bands implicitly */
1715 	  rpos = cp9map->nd2rpos[nd];
1716 	  v = cm->nodemap[nd]; /* v is MATR_MR */
1717 	  jmin[v] = r_mn[rpos];
1718 	  jmax[v] = r_mx[rpos];
1719 	  imin[v] = r_nn_i[lpos];
1720 	  imax[v] = r_nx_i[lpos];
1721 
1722 	  v++; /* v is MATR_D */
1723 	  jmin[v] = r_dn[rpos];
1724 	  jmax[v] = r_dx[rpos];
1725 	  imin[v] = r_nn_i[lpos];
1726 	  imax[v] = r_nx_i[lpos];
1727 
1728 	  v++; /* v is MATR_IR */
1729 	  jmin[v] = r_in[rpos-1];
1730 	  jmax[v] = r_ix[rpos-1];
1731 	  imin[v] = r_nn_i[lpos];
1732 	  imax[v] = r_nx_i[lpos];
1733 	  break;
1734 
1735 	case BEGL_nd:
1736 	case BEGR_nd: /* set i and j bands implicitly, except for BEGR_IL, whose i bands are set explicitly based on HMM */
1737 	  v = cm->nodemap[nd]; /* set i and j band for BEG{L,R}_S based on children */
1738 	  imin[v] = jmin[v] = INT_MAX;
1739 	  imax[v] = jmax[v] = INT_MIN;
1740 	  for(y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++) {
1741 	    /* if y is reachable, make sure we can get there from v */
1742 	    if(imin[y] != -1) {
1743 	      imin[v] = ESL_MIN(imin[v], imin[y]);
1744 	      imax[v] = ESL_MAX(imax[v], imax[y]);
1745 	    }
1746 	    if(jmin[y] != -1) {
1747 	      jmin[v] = ESL_MIN(jmin[v], jmin[y]);
1748 	      jmax[v] = ESL_MAX(jmax[v], jmax[y]);
1749 	    }
1750 	  }
1751 	  if(imin[v] == INT_MAX) {
1752 	    imin[v] = -1;
1753 	    imax[v] = -2;
1754 	  }
1755 	  if(jmin[v] == INT_MAX) {
1756 	    jmin[v] = -1;
1757 	    jmax[v] = -2;
1758 	  }
1759 
1760 	  /* set BEGR_IL's i and j band */
1761 	  if(cm->ndtype[nd] == BEGR_nd) {
1762 	    v++;
1763 	    imin[v] = r_in[lpos-1]; /* BEGR_IL emits before lpos */
1764 	    imax[v] = r_ix[lpos-1];
1765 	    if(imin[v-1] != -1 && imin[v] != -1) { /* if BEGR_S and BEGR_IL is reachable */
1766 	      imin[v-1] = ESL_MIN(imin[v-1], imin[v]); /* expand BEGR_S so it can reach BEGR_IL */
1767 	      jmin[v] = (jmin[v-1] == -1) ? -1 : ESL_MAX(jmin[v-1], i0); /* can't get to a BEGR_IL without emitting at least i0 */
1768 	      jmax[v] = jmax[v-1];
1769 	    }
1770 	    else {
1771 	      imin[v] = jmin[v] = -1;
1772 	      imax[v] = jmax[v] = -2;
1773 	    }
1774 	    esl_stack_IPop(lpos_pda, &lpos); /* pop the remembered lpos from our sister BEGL_nd to use for parent BIF_nd and above */
1775 	  }
1776 	  else { /* BEGL_nd */
1777 	    if ((status = esl_stack_IPush(lpos_pda, lpos)) != eslOK) goto ERROR;
1778 	    lpos = rpos+1; /* next node we pop from stack will be our BEGR sister, on the right, switch lpos to rpos+1 */
1779 	  }
1780 	  break;
1781 
1782 	case END_nd:
1783 	  v = cm->nodemap[nd]; /* v is END_E */
1784 	  imin[v] = r_nn_i[lpos];
1785 	  imax[v] = (r_nx_i[lpos] == -2) ? r_nx_i[lpos] : ESL_MIN(r_nx_i[lpos]+1, j0+1); /* +1 is for StateDelta */
1786 	  if(r_in[lpos] != -1) { /* we could come from an IR above us (tricky case) */
1787 	    imin[v] = ESL_MIN(imin[v], ESL_MAX(r_in[lpos] - 1, i0));
1788 	    imax[v] = ESL_MAX(imax[v], ESL_MAX(r_ix[lpos] - 1, i0));
1789 	  }
1790 	  rpos = lpos;
1791 	  if(imin[v] != -1) {
1792 	    jmin[v] = imin[v]-1; /* E must emit d = 0 residues, so j ==i-1 */
1793 	    jmax[v] = imax[v]-1; /* E must emit d = 0 residues, so j ==i-1 */
1794 	  }
1795 	  break;
1796 
1797 	case ROOT_nd: /* ROOT is a special case, set i and j bands */
1798 	  /* lpos == 1 and rpos == hmm_M */
1799 	  assert(lpos == 1);
1800 	  assert(rpos == hmm_M);
1801 	  v = cm->nodemap[nd]; /* v is ROOT_S */
1802 	  imin[v] = r_nn_i[1];
1803 	  imax[v] = r_nx_i[1];
1804 	  jmin[v] = r_nn_j[hmm_M];
1805 	  jmax[v] = r_nx_j[hmm_M];
1806 
1807 	  v++; /* v is ROOT_IL */
1808 	  imin[v] = r_in[0]; /* ROOT_IL maps to HMM insert state of HMM node 0 */
1809 	  imax[v] = r_ix[0]; /* ROOT_IL maps to HMM insert state of HMM node 0 */
1810 	  /* ROOT_IL's j bands will be same as ROOT_S's, after ensuring state delta of 1 is respected */
1811 	  jmin[v] = (r_nn_j[hmm_M] == -1) ? -1 : ESL_MAX(r_nn_j[hmm_M], i0); /* can't get to ROOT_IL without emitting at least i0 */
1812 	  jmax[v] = r_nx_j[hmm_M];
1813 	  if(r_in[hmm_M] != -1) {
1814 	    jmin[v] = ESL_MIN(jmin[v], r_in[hmm_M]);
1815 	    jmax[v] = ESL_MIN(jmax[v], r_ix[hmm_M]);
1816 	  }
1817 
1818 	  v++; /* v is ROOT_IR */
1819 	  if(r_in[hmm_M] != -1) { /* if r_in[hmm_M] == -1, this state is unreachable */
1820 	    imin[v] = r_nn_i[1]; /* HMM state M_0 is silent */
1821 	    imax[v] = r_nx_i[1]; /* HMM state M_0 is silent */
1822 	    if(imin[v-1] != -1) {
1823 	      imin[v] = ESL_MIN(imin[v], imin[v-1]+1);
1824 	      imax[v] = ESL_MAX(imax[v], imax[v-1]+1);
1825 	    }
1826 	    jmin[v] = r_in[hmm_M]; /* ROOT_IR maps to HMM insert state of HMM node hmm_M */
1827 	    jmax[v] = r_ix[hmm_M]; /* ROOT_IR maps to HMM insert state of HMM node hmm_M */
1828 	  }
1829 	  break;
1830 	} /* end of switch(cm->ndtype[nd]) */
1831       } /* end of if(on_right) */
1832 
1833       else { /* on left */
1834 	/* set i bands for MATP_nd, MATL_nd only */
1835 	switch(cm->ndtype[nd]) {
1836 	case MATP_nd:
1837 	  lpos = cp9map->nd2lpos[nd];
1838 	  v = cm->nodemap[nd]; /* v is MATP_MP */
1839 	  imin[v] = r_mn[lpos];
1840 	  imax[v] = r_mx[lpos];
1841 	  v++; /* v is MATP_ML */
1842 	  imin[v] = r_mn[lpos];
1843 	  imax[v] = r_mx[lpos];
1844 	  v++; /* v is MATP_MR */
1845 	  imin[v] = r_dn[lpos] == -1 ? -1 : r_dn[lpos]+1;
1846 	  imax[v] = r_dx[lpos] == -2 ? -2 : r_dx[lpos]+1;
1847 	  v++; /* v is MATP_D */
1848 	  imin[v] = r_dn[lpos] == -1 ? -1 : r_dn[lpos]+1;
1849 	  imax[v] = r_dx[lpos] == -2 ? -2 : r_dx[lpos]+1;
1850 	  v++; /* v is MATP_IL */
1851 	  imin[v] = r_in[lpos];
1852 	  imax[v] = r_ix[lpos];
1853 	  /* we deal with setting imin/imax for MATP_IR when we're on the right */
1854 	  break;
1855 
1856 	case MATL_nd:
1857 	  lpos = cp9map->nd2lpos[nd];
1858 	  v = cm->nodemap[nd]; /* v is MATL_ML */
1859 	  imin[v] = r_mn[lpos];
1860 	  imax[v] = r_mx[lpos];
1861 	  v++; /* v is MATL_D */
1862 	  imin[v] = r_dn[lpos] == -1 ? -1 : r_dn[lpos]+1;
1863 	  imax[v] = r_dx[lpos] == -2 ? -2 : r_dx[lpos]+1;
1864 	  v++; /* v is MATL_IL */
1865 	  imin[v] = r_in[lpos];
1866 	  imax[v] = r_ix[lpos];
1867 	  break;
1868 	} /* end of switch(cm->ndtype[nd]) */
1869 
1870 	if(cm->ndtype[nd] == BIF_nd) {
1871 	  /* push the BIF back on for its right side  */
1872 	  if ((status = esl_stack_IPush(nd_pda, 1)) != eslOK) goto ERROR;
1873 	  if ((status = esl_stack_IPush(nd_pda, nd)) != eslOK) goto ERROR;
1874 	  /* push node index for right child */
1875 	  if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR;
1876 	  if ((status = esl_stack_IPush(nd_pda, cm->ndidx[cm->cnum[cm->nodemap[nd]]])) != eslOK) goto ERROR;
1877 	  /* push node index for left child */
1878 	  if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR;
1879 	  if ((status = esl_stack_IPush(nd_pda, cm->ndidx[cm->cfirst[cm->nodemap[nd]]])) != eslOK) goto ERROR;
1880 	}
1881 	else {
1882 	  /* push the node back on for right side */
1883 	  if ((status = esl_stack_IPush(nd_pda, 1)) != eslOK) goto ERROR;
1884 	  if ((status = esl_stack_IPush(nd_pda, nd)) != eslOK) goto ERROR;
1885 	  /* push child node on */
1886 	  if (cm->ndtype[nd] != END_nd) {
1887 	    if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR;
1888 	    if ((status = esl_stack_IPush(nd_pda, nd+1)) != eslOK) goto ERROR;
1889 	  }
1890 	}
1891       }
1892     }
1893 
1894   /* If we're allowing truncated alignments, do a final pass through all states, expanding bands to allow for
1895    * L and/or R and/or T marginal alignments, as necessary,
1896    * cp9b->{L,R}marg_{i,j}{min,max} were defined in cp9_PredictStartAndEndPositions().
1897    */
1898   if(do_trunc) {
1899     for(v = 0; v < cm->M; v++) {
1900       if(cp9b->Lvalid[v] || cp9b->Tvalid[v]) { /* allow for left marginal alignment by expanding j band */
1901 	jmin[v] = (jmin[v] == -1) ? cp9b->Lmarg_jmin : ESL_MIN(jmin[v], cp9b->Lmarg_jmin);
1902 	jmax[v] = (jmax[v] == -2) ? cp9b->Lmarg_jmax : ESL_MAX(jmax[v], cp9b->Lmarg_jmax);
1903       }
1904       if(cp9b->Rvalid[v] || cp9b->Tvalid[v]) { /* allow for right marginal alignment by expanding i band */
1905 	imin[v] = (imin[v] == -1) ? cp9b->Rmarg_imin : ESL_MIN(imin[v], cp9b->Rmarg_imin);
1906 	imax[v] = (imax[v] == -2) ? cp9b->Rmarg_imax : ESL_MAX(imax[v], cp9b->Rmarg_imax);
1907       }
1908     }
1909   }
1910 
1911   if(! doing_search) { /* if we're aligning the full seq must be aligned at the root state */
1912     imin[0] = i0;                   /* first residue must be in subtree of ROOT_S */
1913     if(imin[1] != -1) imin[1] = i0; /* first residue must be in subtree of ROOT_IL, if it is used */
1914     jmax[0] = j0;                   /* final residue must be in subtree of ROOT_S */
1915     if(jmin[1] != -1) jmax[1] = j0; /* final residue must be in subtree of ROOT_IL if it is used */
1916     if(jmin[2] != -1) jmax[2] = j0; /* final residue must be in subtree of ROOT_IR if it is used */
1917   }
1918 
1919   /* Final pass through all states:
1920    * 1. if any band value implies a state is unreachable, make it so by setting imin[v]=jmin[v]=-1, imax[v]=jmax[v]=-2;
1921    * 2. set detached inserts unreachable.
1922    * 3. if(!do_trunc) for left emitters enforce jmin/jmax allow at least 1 residue to be emitted
1923    * 4. if(!do_trunc) for MP states, enforce jmin/jmax allow at least 2 residues to be emitted
1924    */
1925   for(v = 0; v < cm->M; v++) {
1926     ESL_DASSERT1(((imin[v] == -1 && imax[v] == -2) || (imin[v] >= 0 && imax[v] >= 0)));
1927     ESL_DASSERT1(((jmin[v] == -1 && jmax[v] == -2) || (jmin[v] >= 0 && jmax[v] >= 0)));
1928     if(imin[v] == -1 || jmin[v] == -1) {
1929       imin[v] = jmin[v] = -1;
1930       imax[v] = jmax[v] = -2;
1931     }
1932     if(StateIsDetached(cm, v)) {
1933       imin[v] = jmin[v] = -1;
1934       imax[v] = jmax[v] = -2;
1935     }
1936     if(! do_trunc) {
1937       if(cm->sttype[v] == MP_st) {
1938 	if(jmax[v] == i0) { /* HMM tells us right half of MP state must emit first residue in the sequence, we know better, make state unreachable */
1939 	  ESL_DASSERT1((jmin[v] == i0));
1940 	  imin[v] = jmin[v] = -1; /* ignore hmm */
1941 	  imax[v] = jmax[v] = -2; /* ignore hmm */
1942 	}
1943 	else if (jmin[v] == i0) { /* HMM tells us right half of MP state could possibly first residue (i0), but we know it can't (see comment above). */
1944 	  jmin[v]++;              /* pad 1 onto what the hmm thought, make first emittable residue i0+1 */
1945 	  /* leave jmax[v] alone, we konw it's not == i0, we checked for that case above */
1946 	}
1947       }
1948       /* if a left emitter, enforce jmin/jmax require at least 1 residue is emitted */
1949       if((StateLeftDelta(cm->sttype[v]) == 1) && imin[v] != -1) {
1950 	if(jmax[v] == (i0-1)) { /* HMM bands implied state must be entered after emitting exactly 0 residues, we know better, make it unreachable */
1951 	  ESL_DASSERT1((jmin[v] == (i0-1)));
1952 	  imin[v] = jmin[v] = -1; /* ignore hmm */
1953 	  imax[v] = jmax[v] = -2; /* ignore hmm */
1954 	}
1955 	else if (jmin[v] == (i0-1)) {
1956 	  jmin[v] = i0; /* pad 1 onto what the hmm thought */
1957 	  /* leave jmax[v] alone, we know it's not == i0-1, we checked for that case above */
1958 	}
1959       }
1960     }
1961   }
1962 
1963 #if 0
1964   if(do_trunc) {
1965     for(v = 0; v < cm->M; v++) {
1966       printf("dotrunc: %d ijband v: %4d nd: %4d  %4s  %2s  i: %5d - %5d  j: %5d - %5d\n", do_trunc, v, cm->ndidx[v],
1967 	     Nodetype(cm->ndtype[cm->ndidx[v]]),
1968 	     Statetype(cm->sttype[v]),
1969 	     imin[v], imax[v], jmin[v], jmax[v]);
1970     }
1971   }
1972 #endif
1973 
1974   /* A final, brutal hack. If the hmm used to derive bands has local
1975    * begins, ends and ELs on, it's possible (but extremely rare
1976    * empirically, even with very high tau values (0.49!)) that no
1977    * valid CM parse exists within the i and j bands. To avoid this, if
1978    * the CM has local begins and ends on then we use a brutal hack
1979    * here to enable at least one valid parse from the ROOT_S to a
1980    * state from which the EL can be reached and able to emit all
1981    * residues.
1982    *
1983    * There's 2 relevant cases.
1984    *
1985    * Case 1: node 1 is a MATP, MATR, or MATL node (this is the easier case)
1986    * Case 2: node 1 is a BIF node
1987    *
1988    * Case 1: node 1 is a MATP, MATR, or MATL node (this is the easier case)
1989    * A. assert CM local begins and ends are on (they should be if we're using a localized HMM to get bands).
1990    *    and we can do a local begin into and a local end out of node 1. This will be TRUE unless there
1991    *    are only 3 nodes in the CM (which is impossible, cmbuild won't build a 3 node CM - the reason is that
1992    *    such a CM would suck at local alignment b/c no local ends are possible (not to mention they're too small
1993    *    to be useful, and that if node 1 == MATL the CM can only emit/align 1 residue in local mode b/c the
1994    *    ROOT_IL, ROOT_IR are unreachable and the MATL_IL is detached!).
1995    *
1996    * B. if we're doing alignment (full target must be accounted for):
1997    *    v = cm->nodemap[nd]
1998    *    set imin[v] = ESL_MIN(imin[v], i0)
1999    *        imax[v] = ESL_MAX(imax[v], i0)
2000    *        jmin[v] = ESL_MIN(jmin[v], j0)
2001    *        jmax[v] = ESL_MAX(jmax[v], j0)
2002    *    else if we're doing search and v is unreachable, make it reachable by setting
2003    *        imin[v] = imin[0];
2004    *        imax[v] = imax[0];
2005    *        jmin[v] = jmin[0];
2006    *        jmax[v] = jmax[0];
2007    *    then we'll be able to emit some residues from v, (so we're guaranteed a valid parse.)
2008    *
2009    * Case 2: node 1 is a BIF node
2010    * v = cm->nodemap[nd] (the BIF_B state)
2011    * if v is reachable and we're doing alignment, expand it's bands so that it can
2012    * account for the full seq:
2013    *    set imin[v] = ESL_MIN(imin[v], i0)
2014    *        imax[v] = ESL_MAX(imax[v], i0)
2015    *        jmin[v] = ESL_MIN(jmin[v], j0)
2016    *        jmax[v] = ESL_MAX(jmax[v], j0)
2017    * else if v is reachable and we're doing search, ensure that one contiguous chunk of
2018    * seq can be emitted by BIF's children (see code)
2019    *
2020    * if v is not reachable (if we're doing search or not), we enforce 1 valid parse,
2021    * the BIF must emit the full target, residues i0..j0-1 from its' BEGL_S's EL state, and
2022    * residue j0 from it's BEGR_S EL state.
2023    */
2024   if(hmm_is_localized && cm_is_fully_localized) {
2025     if(do_trunc) cp9b->Jvalid[0] = TRUE;
2026     if(imin[0] == -1) { /* ROOT_S is unreachable, uhh... */
2027       imin[0] = imax[0] = i0;
2028       jmin[0] = jmax[0] = j0;
2029     }
2030     if(cm->nodes == 3) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "cp9_HMM2ijBands(), cm/hmm are locally configured, only 3 nodes in the CM, this is an illegal CM b/c local ENDs are impossible.");
2031     nd = 1;
2032     if(i0 == j0) {
2033       while((nd < cm->nodes) && (cm->ndtype[nd] == MATP_nd)) nd++; /* a local begin into a MATP_MP state can't happen when the target is 1 residue, it must emit 2 residues */
2034       if(cm->ndtype[nd] == END_nd) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "cp9_HMM2ijBands(), CM has no MATL, MATR or BIF nodes, this shouldn't happen (cmbuild forbids it)!\n");
2035     }
2036     if(cm->ndtype[nd] == BIF_nd) {
2037       v = cm->nodemap[nd];
2038       w = cm->cfirst[v]; /* BEGL_S */
2039       y = cm->cnum[v];   /* BEGR_S */
2040       if(do_trunc) {
2041 	cp9b->Jvalid[v] = TRUE;
2042 	cp9b->Jvalid[w] = TRUE;
2043 	cp9b->Jvalid[y] = TRUE;
2044       }
2045       if(imin[v] != -1 && imin[w] != -1 && imin[y] != -1) { /* v and its children w and y are all reachable */
2046 	if(!doing_search) { /* we need to be able to account for the full sequence */
2047 	  imin[v] = ESL_MIN(imin[v], i0);
2048 	  imax[v] = ESL_MAX(imax[v], i0);
2049 	  jmin[v] = ESL_MIN(jmin[v], j0);
2050 	  jmax[v] = ESL_MAX(jmax[v], j0);
2051 	  imin[w] = imin[v];
2052 	  imax[w] = imax[v];
2053 	  jmax[w] = ESL_MAX(jmax[w], ESL_MIN(j0, imax[w]));
2054 	  jmin[y] = jmin[v];
2055 	  jmax[y] = jmax[v];
2056 	  imin[y] = ESL_MIN(imin[y], ESL_MAX(i0, jmin[y]));
2057 	  /* now ensure that imin[y] <= jmax[w]+1, so we can definitely emit the full seq */
2058 	  imin[y] = ESL_MIN(imin[y], ESL_MAX(i0, jmax[w]+1));
2059 	  imax[y] = ESL_MAX(imin[y], imax[y]);
2060 	}
2061 	else { /* doing search, we only need to be able to emit some range of residues from BEGL and BEGR's EL states */
2062 	  imin[y] = ESL_MIN(imin[y], ESL_MAX(i0, jmax[w]+1));
2063 	  imax[y] = ESL_MAX(imin[y], imax[y]);
2064 	}
2065       } /* end of if(imin[v] != -1) */
2066       else { /* v, w or y are unreachable, make them reachable */
2067 	if(! doing_search) {
2068 	  /* if we're doing alignment, we enforce that the full seq must be emittable
2069 	   * by BIF and it's children's (BEGL_S and BEGR_S) EL states */
2070 	  imin[v] = i0;
2071 	  imax[v] = i0;
2072 	  jmin[v] = j0;
2073 	  jmax[v] = j0;
2074 	  imin[w] = i0; /* w will emit i0..j0-1 (which may be 0 residues if i0==j0) */
2075 	  imax[w] = i0;
2076 	  jmin[w] = j0-1;
2077 	  jmax[w] = j0-1;
2078 	  imin[y] = j0; /* y will emit only j0 */
2079 	  imax[y] = j0;
2080 	  jmin[y] = j0;
2081 	  jmax[y] = j0;
2082 	}
2083 	else {
2084 	  /* if we're doing search we enforce that the residues from imin[0]..jmax[0] are emittable
2085 	   * by BIF and it's children's (BEGL_S and BEGR_S) EL states */
2086 	  imin[v] = imin[0];
2087 	  imax[v] = imin[0];
2088 	  jmin[v] = jmax[0];
2089 	  jmax[v] = jmax[0];
2090 	  imin[w] = imin[0]; /* w will emit imin[0]..jmax[0]-1 (which may be 0 residues if imin[0]==jmax[0]) */
2091 	  imax[w] = imin[0];
2092 	  jmin[w] = jmax[0]-1;
2093 	  jmax[w] = jmax[0]-1;
2094 	  imin[y] = jmax[0]; /* y will emit only jmax[0] */
2095 	  imax[y] = jmax[0]; /* y will emit only jmax[0] */
2096 	  jmin[y] = jmax[0];
2097 	  jmax[y] = jmax[0];
2098 	}
2099       }
2100     } /* end of if(cm->ndtype[nd] == BIF_nd) */
2101     else {
2102       /* node nd is a MATL, MATR or MATP */
2103       ESL_DASSERT1((cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd));
2104       assert(cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd);
2105       v = cm->nodemap[nd];
2106       if(do_trunc) cp9b->Jvalid[v] = TRUE;
2107       /* we can do a local begin into and local end out of v */
2108       ESL_DASSERT1((NOT_IMPOSSIBLE(cm->beginsc[v])));
2109       ESL_DASSERT1((NOT_IMPOSSIBLE(cm->endsc[v])));
2110       assert(NOT_IMPOSSIBLE(cm->beginsc[v]));
2111       assert(NOT_IMPOSSIBLE(cm->endsc[v]));
2112       if(!doing_search) { /* we need to be able to account for the full sequence */
2113 	if(imin[v] == -1) { /* v is unreachable, make it reachable only for emitting the full seq */
2114 	  imin[v] = imax[v] = i0;
2115 	  jmin[v] = jmax[v] = j0;
2116 	}
2117 	else { /* v is reachable, expand it's band so it can emit the full seq */
2118 	  imin[v] = ESL_MIN(imin[v], i0);
2119 	  imax[v] = ESL_MAX(imax[v], i0);
2120 	  jmin[v] = ESL_MIN(jmin[v], j0);
2121 	  jmax[v] = ESL_MAX(jmax[v], j0);
2122 	}
2123       }
2124       else { /* doing search, do not need to account for full target sequence, make it so we can reach v for some i and j (this will guarantee >= 1 valid parse) */
2125 	if(imin[v] == -1) { /* v is unreachable */
2126 	  imin[v] = imin[0];
2127 	  imax[v] = imax[0];
2128 	  jmin[v] = jmin[0];
2129 	  jmax[v] = jmax[0];
2130 	}
2131 	else { /* v is reachable, make sure it's reachable from the ROOT_S state, expand the ROOT_S band */
2132 	  imin[0] = ESL_MIN(imin[0], imin[v]);
2133 	  imax[0] = ESL_MAX(imax[0], imax[v]);
2134 	  jmin[0] = ESL_MIN(jmin[0], jmin[v]);
2135 	  jmax[0] = ESL_MAX(jmax[0], jmax[v]);
2136 	}
2137       }
2138     }
2139   }
2140   /* end of brutal hack */
2141 #if eslDEBUGLEVEL >= 1
2142   /* check for valid CM parse, there should be one, unless do_trunc is true, then we may not... */
2143   if((status = CMBandsCheckValidParse(cm, cp9b, errbuf, i0, j0, doing_search)) != eslOK) return status;
2144 #endif
2145 
2146   esl_stack_Destroy(nd_pda);
2147   esl_stack_Destroy(lpos_pda);
2148   free(r_mn);
2149   free(r_mx);
2150   free(r_dn);
2151   free(r_dx);
2152   free(r_in);
2153   free(r_ix);
2154   free(r_nn_i);
2155   free(r_nx_i);
2156   free(r_nn_j);
2157   free(r_nx_j);
2158 
2159   return eslOK;
2160 
2161  ERROR:
2162   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
2163 }
2164 
2165 /* Function: HMMBandsEnforceValidParse()
2166  * Incept:   EPN, Fri Feb  1 16:46:50 2008
2167  *
2168  * Purpose:  Given bands on HMM states for a target sequence,
2169  *           check for a valid HMM parse within those bands.
2170  *           If no valid parse exists, expand the bands such that
2171  *           one does exist, in a greedy manner.
2172  *
2173  *           Bands are expanded using the HMMBandsFixUnreachable()
2174  *           function. These take a node that is unreachable
2175  *           and modify bands on current node and nearby nodes
2176  *           to make it reachable. This is awful hack number 1.
2177  *           (see HMMBandsFixUnreachable() for details.
2178  *           Note: the technique used for expanding the bands was
2179  *           selected for it's relative simplicity. It does not
2180  *           expand the bands in any smart way that is aware of
2181  *           probability mass or score of the newly possible parses
2182  *           during the band expansion. You could try to do that,
2183  *           but it's not likely to be worth it, when the default
2184  *           bands before expansion do not allow a single parse,
2185  *           the real solution is to lower tau, the tail loss parameter
2186  *           used during band calculation. This function is really
2187  *           only nec so the HMM banding technique is robust to
2188  *           high values of tau, higher than any reasonable application
2189  *           should use.
2190  *
2191  *           Awful hack #2 occurs when two different transitions to the
2192  *           same state imply reachable bands that have a 'gap' in the
2193  *           middle. For example if node D_3 can reach node M_4 with
2194  *           i = 3 or 4, and node M_3 can reach node M_4 with i equal
2195  *           to 6 or 7. This means that node M_4 cannot be reached for
2196  *           i == 5, but this implementation is much easier if we can
2197  *           just set the reachable band for M_4 to 3..7. So, that's
2198  *           what we do, and we doctor the band of I_3 so that M_4 *can*
2199  *           be reached for i == 5. This is done in HMMBandsFillGap().
2200  *           See that function for details. This hack is only performed
2201  *           for models NOT in local mode. If we are in local mode,
2202  *           this 'gap' situation comes up much more often, but when
2203  *           we're in local mode, we can use an EL state in the CM
2204  *           to basically always get a valid parse, so we're not
2205  *           so worried about enforcing a valid parse and we skip
2206  *           this hack.
2207  *
2208  * Args:     cp9    - the HMM the bands were derived from
2209  *           cp9b   - the CP9 bands object
2210  *           cp9map - map from CM to cp9
2211  *           errbuf - for error messages
2212  *           i0     - first residue of sequence we're using bands for
2213  *           j0     - final residue of sequence we're using bands for
2214  *
2215  * Returns:  eslOK on success
2216  *           eslEINCONCEIVABLE if we can't expand the bands to make a valid parse (shouldn't happen)
2217  *           eslEMEM if a memory allocation error occurs
2218  *           <ret_did_expand> set to TRUE if we had to expand the HMM bands, FALSE if not
2219  */
2220 int
HMMBandsEnforceValidParse(CP9_t * cp9,CP9Bands_t * cp9b,CP9Map_t * cp9map,char * errbuf,int i0,int j0,int doing_search,int * ret_did_expand,int ** ret_r_mn,int ** ret_r_mx,int ** ret_r_in,int ** ret_r_ix,int ** ret_r_dn,int ** ret_r_dx,int ** ret_r_nn_i,int ** ret_r_nx_i,int ** ret_r_nn_j,int ** ret_r_nx_j)2221 HMMBandsEnforceValidParse(CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, char *errbuf, int i0, int j0, int doing_search, int *ret_did_expand,
2222 			  int **ret_r_mn, int **ret_r_mx, int **ret_r_in,  int **ret_r_ix, int **ret_r_dn, int **ret_r_dx,
2223 			  int **ret_r_nn_i, int **ret_r_nx_i, int **ret_r_nn_j, int **ret_r_nx_j)
2224 {
2225   int status;
2226   /* r_* arrays, these are the bands on 'reachable' residues for each HMM state as we move
2227    * from left to right through the HMM.
2228    * For example, r_mn[k] = 3, r_mx[k] = 5, means that for all possible HMM parses within the bands
2229    * in the cp9b pn_* arrays that reach the match state of node k, the residue emitted by that match
2230    * must be either 3, 4, or 5.
2231    */
2232   int *r_mn;   /* [0..k..hmm_M] minimal residue position for which we can reach M_k (match state of node k) */
2233   int *r_mx;   /* [0..k..hmm_M] maximal residue position for which we can reach M_k */
2234   int *r_in;   /* [0..k..hmm_M] minimal residue position for which we can reach I_k (insert state of node k) */
2235   int *r_ix;   /* [0..k..hmm_M] maximal residue position for which we can reach I_k */
2236   int *r_dn;   /* [0..k..hmm_M] minimal residue position for which we can reach D_k (delete state of node k) */
2237   int *r_dx;   /* [0..k..hmm_M] maximal residue position for which we can reach D_k */
2238   int r_begn;  /*  minimal first residue position for which we can exit the BEGIN state */
2239   int r_begx;  /*  minimal first residue position for which we can exit the BEGIN state */
2240   int r_endn;  /*  minimal final residue position for which we can reach the END state */
2241   int r_endx;  /*  maximal final residue position for which we can reach the END state */
2242   int *r_nn_i; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2243   int *r_nx_i; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2244   int *r_nn_j; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2245   int *r_nx_j; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2246   /* r_nn_i and r_nx_i are used when setting i bands, and r_nn_j and r_nx_j are used when setting j bands .
2247    * The values can differ vecause of an off-by-one issue with the non-emitting (delete and M_0) states of the HMM:
2248    * pn_min_d[k] = i, means posn i was last residue emitted prior to entering node k's delete state. However, for a CM,
2249    * if a delete states sub-parsetree is bounded by i' and j', this means positions i' and j' HAVE YET TO BE EMITTED.
2250    * For i states this means we have to add 1 to the delete band positions, but for j states we do not, the off-by-one
2251    * is taken care of because the HMM is moving left to right, while j positions move right to left (confusing as hell,
2252    * bad explanation, i know... write out an example, it's the only way to get it).
2253    */
2254   int *r_nn_hmm;   /* [0..k..hmm_M] min reachable position i in HMM node k from the HMM's perspective */
2255   int *r_nx_hmm;   /* [0..k..hmm_M] max reachable position i in HMM node k from the HMM's perspective  */
2256   int *was_unr;    /* [0..k..hmm_M] TRUE if node k was unreachable, then we expanded bands, now it should be reachable */
2257   int *filled_gap; /* [0..k..hmm_M] TRUE if we filled a gap in the reachable bands for node k */
2258   int  just_filled_gap; /* TRUE if we filled a gap for the current node */
2259   int  hmm_M = cp9b->hmm_M; /* number of nodes in the model */
2260   int  k, kp;      /* node counters */
2261   int  n;          /* a temporary minimum residue position */
2262   int  x;          /* a temporary maximum residue position */
2263   int  c;          /* counter */
2264   int sd;          /* state delta, number of emissions for each state */
2265   int local_begins_ends_on; /* TRUE if HMM has local begins (M_0(B) -> M_k for k = 1..M and local ends (M_k -> E) for k = 1..M-1 */
2266   /*int j0_is_reachable = FALSE; */ /* TRUE if we can reach j0 for some node */
2267   /* ptrs to cp9b data, for convenience */
2268   int *pn_min_m;      /* pn_min_m[k] = first position in HMM band for match state of HMM node k */
2269   int *pn_max_m;      /* pn_max_m[k] = final position in HMM band for match state of HMM node k */
2270   int *pn_min_i;      /* pn_min_i[k] = first position in HMM band for insert state of HMM node k */
2271   int *pn_max_i;      /* pn_max_i[k] = final position in HMM band for insert state of HMM node k */
2272   int *pn_min_d;      /* pn_min_d[k] = first position in HMM band for delete state of HMM node k */
2273   int *pn_max_d;      /* pn_max_d[k] = final position in HMM band for delete state of HMM node k */
2274 
2275   if((cp9->flags & CPLAN9_LOCAL_BEGIN) && (! (cp9->flags & CPLAN9_LOCAL_END))) ESL_FAIL(eslEINCOMPAT, errbuf, "HMMBandsEnforceValidParse(), HMM has local begins ON but local ends OFF. Both must be on, or both must be off.");
2276   local_begins_ends_on = ((cp9->flags & CPLAN9_LOCAL_BEGIN) && (cp9->flags & CPLAN9_LOCAL_END)) ? TRUE : FALSE;
2277 
2278   pn_min_m = cp9b->pn_min_m;
2279   pn_max_m = cp9b->pn_max_m;
2280   pn_min_i = cp9b->pn_min_i;
2281   pn_max_i = cp9b->pn_max_i;
2282   pn_min_d = cp9b->pn_min_d;
2283   pn_max_d = cp9b->pn_max_d;
2284 
2285   /* allocate and initialize */
2286   ESL_ALLOC(r_mn, sizeof(int) * (hmm_M+1));
2287   ESL_ALLOC(r_mx, sizeof(int) * (hmm_M+1));
2288   ESL_ALLOC(r_in, sizeof(int) * (hmm_M+1));
2289   ESL_ALLOC(r_ix, sizeof(int) * (hmm_M+1));
2290   ESL_ALLOC(r_dn, sizeof(int) * (hmm_M+1));
2291   ESL_ALLOC(r_dx, sizeof(int) * (hmm_M+1));
2292   ESL_ALLOC(r_nn_i, sizeof(int) * (hmm_M+1));
2293   ESL_ALLOC(r_nx_i, sizeof(int) * (hmm_M+1));
2294   ESL_ALLOC(r_nn_j, sizeof(int) * (hmm_M+1));
2295   ESL_ALLOC(r_nx_j, sizeof(int) * (hmm_M+1));
2296   ESL_ALLOC(r_nn_hmm, sizeof(int) * (hmm_M+1));
2297   ESL_ALLOC(r_nx_hmm, sizeof(int) * (hmm_M+1));
2298 
2299   for(k = 0; k <= hmm_M; k++) {
2300     r_mn[k] = r_in[k] = r_dn[k] = r_nn_i[k] = r_nn_j[k] = r_nn_hmm[k] = INT_MAX;
2301     r_mx[k] = r_ix[k] = r_dx[k] = r_nx_i[k] = r_nx_j[k] = r_nx_hmm[k] = INT_MIN;
2302   }
2303   r_begn = INT_MAX;
2304   r_begx = INT_MIN;
2305   r_endn = INT_MAX;
2306   r_endx = INT_MIN;
2307 
2308   ESL_ALLOC(was_unr,    sizeof(int) * (hmm_M+1));
2309   ESL_ALLOC(filled_gap, sizeof(int) * (hmm_M+1));
2310   esl_vec_ISet(was_unr,    (hmm_M+1), FALSE);
2311   esl_vec_ISet(filled_gap, (hmm_M+1), FALSE);
2312 
2313   /* Note on comment nomenclature:
2314    * M_k: match  state of node k
2315    * I_k: insert state of node k
2316    * D_k: detele state of node k
2317    */
2318 
2319   if(! doing_search) assert(pn_min_m[0] == (i0-1));
2320   if(pn_min_m[0] != -1) {
2321     r_mn[0] = pn_min_m[0]; /* initialize min reachable residue for M_0 as pn_min_m[0] */
2322     r_mx[0] = pn_max_m[0]; /* initialize min reachable residue for M_0 as pn_max_m[0] */
2323   }
2324 
2325   /* The main loop: for each node, for each state, determine which residues are reachable given
2326    * the reachable residues for the states in the previous node and current node.
2327    * The order is important: first we account for all transitions to the insert state of the same
2328    * node, as the reachable band on the insert will affect later transitions.
2329    * Then we do all transitions to the match of the next node, and finally to the delete of the
2330    * next node.
2331    */
2332   for(k = 0; k <= hmm_M; k++) {
2333     if(pn_min_m[k] == -1) ESL_DASSERT1((pn_max_m[k] == -1));
2334     if(pn_min_i[k] == -1) ESL_DASSERT1((pn_max_i[k] == -1));
2335     if(pn_min_d[k] == -1) ESL_DASSERT1((pn_max_d[k] == -1));
2336     just_filled_gap = FALSE;
2337 
2338     /* transitions to insert of node k (I_k) */
2339     if(r_mn[k] <= r_mx[k]) { /* M_k is reachable */
2340       /* M_k->I_k transition */
2341       if(pn_min_i[k] != -1) {
2342 	n = r_mn[k]+1;
2343 	x = r_mx[k]+1;
2344 	if((ESL_MIN(x, pn_max_i[k]) - ESL_MAX(n, pn_min_i[k])) >= 0) { /* TRUE if n..x overlaps with pn_min_i[k]..pn_max_i[k] by at least 1 residue */
2345 	  n = ESL_MAX(n, pn_min_i[k]); /* n can't be less than pn_min_i[k] */
2346 	  n = ESL_MIN(n, pn_max_i[k]); /* n can't be more than pn_max_i[k] */
2347 	  x = ESL_MIN(x, pn_max_i[k]); /* x can't be more than pn_max_i[k] */
2348 	  /* no need to check if we need to fill a gap, not an issue for inserts which can self-transit and fill their own gaps */
2349 	  r_in[k] = ESL_MIN(r_in[k], n);
2350 	  r_ix[k] = ESL_MAX(r_ix[k], x);
2351 	  ESL_DASSERT1((r_in[k] <= r_ix[k]));
2352 	}
2353       }
2354     }
2355     if(r_dn[k] <= r_dx[k]) {
2356       /* D_k->I_k transition */
2357       if(pn_min_i[k] != -1) {
2358 	n = r_dn[k]+1;
2359 	x = r_dx[k]+1;
2360 	if((ESL_MIN(x, pn_max_i[k]) - ESL_MAX(n, pn_min_i[k])) >= 0) { /* TRUE if n..x overlaps with pn_min_i[k]..pn_max_i[k] by at least 1 residue */
2361 	  n = ESL_MAX(n, pn_min_i[k]); /* n can't be less than pn_min_i[k] */
2362 	  n = ESL_MIN(n, pn_max_i[k]); /* n can't be more than pn_max_i[k] */
2363 	  x = ESL_MIN(x, pn_max_i[k]); /* x can't be more than pn_max_i[k] */
2364 	  /* no need to check if we need to fill a gap, not an issue for inserts which can self-transit and fill their own gaps */
2365 	  r_in[k] = ESL_MIN(r_in[k], n);
2366 	  r_ix[k] = ESL_MAX(r_ix[k], x);
2367 	  ESL_DASSERT1((r_in[k] <= r_ix[k]));
2368 	}
2369       }
2370     }
2371     if(r_in[k] <= r_ix[k]) {
2372       /* I_k -> I_k transition */
2373       ESL_DASSERT1((r_ix[k] <= pn_max_i[k]));
2374       /* I_k->I_k   transition (first b/c self transitions are possible) */
2375       if(pn_min_i[k] != -1) { /* special case, self emitter, if we can enter this INSERT state, for any valid residue, we can emit residues until we reach pn_max_i[k] */
2376 	if(r_in[k] <= pn_max_i[k]) { /* we can reach this insert for i == r_in[k], then emit until pn_max_i[k] */
2377 	  r_ix[k] = pn_max_i[k];
2378 	}
2379 	else {
2380 	  r_in[k] = INT_MAX;
2381 	  r_ix[k] = INT_MIN;
2382 	}
2383       }
2384     }
2385     /* done with transitions to I_k */
2386 
2387     /* transitions to match of node k+1 (M_k+1) */
2388     if(k < hmm_M) { /* state M_M+1 is special, it's the END state, we deal with that below */
2389       if(r_mn[k] <= r_mx[k]) {
2390 	/* M_k->M_k+1 transition */
2391 	if(pn_min_m[k+1] != -1) {
2392 	  n = r_mn[k]+1;
2393 	  x = r_mx[k]+1;
2394 	  if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2395 	    n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2396 	    n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2397 	    x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2398 	    if(r_mn[k+1] != INT_MAX) {
2399 	      if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2400 		/* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2401 		if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2402 		just_filled_gap = TRUE;
2403 	      }
2404 	    }
2405 	    r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2406 	    r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2407 	    ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2408 	  }
2409 	}
2410       }
2411       /* D_k->M_k+1 transition */
2412       if(r_dn[k] <= r_dx[k]) {
2413 	if(pn_min_m[k+1] != -1) {
2414 	  n = r_dn[k]+1;
2415 	  x = r_dx[k]+1;
2416 	  if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2417 	    n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2418 	    n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2419 	    x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2420 	    if(r_mn[k+1] != INT_MAX) {
2421 	      if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2422 		/* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2423 		ESL_DASSERT1((k != 0));
2424 		if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2425 		just_filled_gap = TRUE;
2426 	      }
2427 	    }
2428 	    r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2429 	    r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2430 	    ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2431 	  }
2432 	}
2433       }
2434       /* I_k->M_k+1transition */
2435       if(r_in[k] <= r_ix[k]) {
2436 	if(pn_min_m[k+1] != -1) {
2437 	  n = r_in[k]+1;
2438 	  x = r_ix[k]+1;
2439 	  if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2440 	    n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2441 	    n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2442 	    x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2443 	    if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2444 	      /* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2445 	      ESL_DASSERT1((k != 0));
2446 	      if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2447 	      just_filled_gap = TRUE;
2448 	    }
2449 	    r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2450 	    r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2451 	    ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2452 	  }
2453 	}
2454       }
2455       /* EL_kp->M_k+1 transition, we could have come from 1 or more EL states */
2456       if(cp9->flags & CPLAN9_EL) {
2457 	if(pn_min_m[k+1] != -1) {
2458 	  for(c = 0; c < cp9->el_from_ct[k+1]; c++) { /* el_from_ct[k+1] holds # ELs that can go to k+1 */
2459 	    kp = cp9->el_from_idx[k+1][c];
2460 	    if(r_mn[kp] <= r_mx[kp]) {
2461 	      n = r_mn[kp]; /* EL's can emit 0 or more residues */
2462 	      x = j0;       /* EL's can emit 0 or more residues */
2463 	      if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2464 		n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2465 		n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2466 		x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2467 		if(r_mn[k+1] != INT_MAX) {
2468 		  if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2469 		    /* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2470 		    ESL_DASSERT1((k != 0));
2471 		    if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2472 		    just_filled_gap = TRUE;
2473 		  }
2474 		}
2475 		r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2476 		r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2477 		ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2478 	      }
2479 	    }
2480 	  }
2481 	}
2482       }
2483       /* Begin ->M_k+1 transition, if local begins are on, we could go B->M_k+1, this is always true if M_k+1 is reachbale and (doing_search),
2484        * if we're doing alignment this is true only if the first residue is within the band on M_k+1
2485        */
2486       if(local_begins_ends_on) {
2487 	if(pn_min_m[k+1] != -1) {
2488 	  if(doing_search) {
2489 	    n = pn_min_m[k+1];
2490 	    x = pn_max_m[k+1];
2491 	    r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2492 	    r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2493 	  }
2494 	  else { /* doing alignment, we can only do a local begin into M_k+1 if the first residue is within it's band */
2495 	    if(pn_min_m[k+1] == r_mn[0]+1) {
2496 	      r_mn[k+1] = ESL_MIN(r_mn[k+1], r_mn[0]+1);
2497 	      r_mx[k+1] = ESL_MAX(r_mx[k+1], r_mn[0]+1); /* not a typo, r_mx[0] == r_mn[0] (it's the begin state) */
2498 	    }
2499 	  }
2500 	}
2501       }
2502     } /* end of if(k < hmm_M) */
2503     /* transitions to END state */
2504     if(k == hmm_M || local_begins_ends_on) { /* handle transitions from M_k to END */
2505       if(r_mn[k] <= r_mx[k] && cp9->esc[k] != -INFTY) { /* if M_k is reachable and we're allowed to transit to E */
2506 	/* M_k->E transition */
2507 	n = r_mn[k];
2508 	x = r_mx[k];
2509 	/* note: we don't have to worry about filling gaps here (that is if gap of >= 1 residue between [n..x] and [r_endn..r_endx]
2510 	 *       because end state is last state we care about, if we can reach it for residue in n..x or r_endn..r_endx, set band to
2511 	 *       include all those residues (min(n, r_end_n)..max(x, r_endx)) is harmless, a CM parse WILL exist for some residue in that range
2512 	 *       and the CM will be able to find it */
2513 	r_endn = ESL_MIN(r_endn, n);
2514 	r_endx = ESL_MAX(r_endx, x);
2515 	/*////printf("0 r_endn,x: %d..%d (k: %d) \n", r_endn, r_endx, k);*/
2516 	ESL_DASSERT1((r_endn <= r_endx));
2517       }
2518     }
2519     if(k == hmm_M) { /* if we're at the last node, we could also get to END from D_k, or I_k */
2520       if(r_dn[k] <= r_dx[k] && cp9->tsc[CTDM][k] != -INFTY) { /* if D_k is reachable and we're allowed to transit to E */
2521 	/* D_M->E transition */
2522 	n = r_dn[k];
2523 	x = r_dx[k];
2524 	/* note: we don't have to worry about filling gaps here (see more verbose comment above for M_k->E transition) */
2525 	r_endn = ESL_MIN(r_endn, n);
2526 	r_endx = ESL_MAX(r_endx, x);
2527 	/*////printf("1 r_endn,x: %d..%d\n", r_endn, r_endx);*/
2528 	ESL_DASSERT1((r_endn <= r_endx));
2529       }
2530       if(r_in[k] <= r_ix[k] && cp9->tsc[CTIM][k] != -INFTY) { /* if I_k is reachable and we're allowed to transit to E */
2531 	/* I_M->E transition */
2532 	n = r_in[k];
2533 	x = r_in[k];
2534 	/* note: we don't have to worry about filling gaps here (see more verbose comment above for M_k->E transition) */
2535 	r_endn = ESL_MIN(r_endn, n);
2536 	r_endx = ESL_MAX(r_endx, x);
2537 	/*////printf("2 r_endn,x: %d..%d\n", r_endn, r_endx);*/
2538 	ESL_DASSERT1((r_endn <= r_endx));
2539       }
2540       /* finally, deal with the possibility that we go to E from an EL state */
2541       if(cp9->flags & CMH_LOCAL_END) {
2542 	for(c = 0; c < cp9->el_from_ct[k+1]; c++) { /* el_from_ct[k+1] holds # ELs that can go to k+1 */
2543 	  kp = cp9->el_from_idx[k+1][c];
2544 	  if(r_mn[kp] <= r_mx[kp]) {
2545 	    n = r_mn[kp]; /* EL's can emit 0 or more residues */
2546 	    x = j0;
2547 	    r_endn = ESL_MIN(r_endn, n);
2548 	    r_endx = ESL_MAX(r_endx, x);
2549 	    /*////printf("3 c: %d..%d r_endn: %d\n", c, r_endn, r_endx);*/
2550 	    ESL_DASSERT1((r_endn <= r_endx));
2551 	  }
2552 	}
2553       }
2554     } /* end of if k == hmm_M, done with transitions to match of node k+1 */
2555 
2556     /* transitions to delete of node k+1 (D_k+1)*/
2557     if(k < hmm_M) {
2558       /* M_k -> D_k+1 transition */
2559       if(r_mn[k] <= r_mx[k]) {
2560 	if(pn_min_d[k+1] != -1) {
2561 	  n = r_mn[k];
2562 	  x = r_mx[k];
2563 	  if((ESL_MIN(x, pn_max_d[k+1]) - ESL_MAX(n, pn_min_d[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_d[k+1]..pn_max_d[k+1] by at least 1 residue */
2564 	    n = ESL_MAX(n, pn_min_d[k+1]); /* n can't be less than pn_min_d[k+1] */
2565 	    n = ESL_MIN(n, pn_max_d[k+1]); /* n can't be more than pn_max_d[k+1] */
2566 	    x = ESL_MIN(x, pn_max_d[k+1]); /* x can't be more than pn_max_d[k+1] */
2567 	    if(r_dn[k+1] != INT_MAX) {
2568 	      if(!local_begins_ends_on && ESL_MIN(x, r_dx[k+1]) - ESL_MAX(n, r_dn[k+1]) < -1) {
2569 		/* there's a 'gap' of >= 1 residue between n..x and r_dn[k+1].._r_dx[k+1], fill the gap by expanding band of I_k */
2570 		ESL_DASSERT1((k != 0));
2571 		if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_dn[k+1], r_dx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2572 		just_filled_gap = TRUE;
2573 	      }
2574 	    }
2575 	    r_dn[k+1] = ESL_MIN(r_dn[k+1], n);
2576 	    r_dx[k+1] = ESL_MAX(r_dx[k+1], x);
2577 	    ESL_DASSERT1((r_dn[k+1] <= r_dx[k+1]));
2578 	  }
2579 	}
2580       }
2581       /* I_k -> D_k+1 transition */
2582       if(r_in[k] <= r_ix[k]) {
2583 	/* I_k->D_k+1 transition */
2584 	if(pn_min_d[k+1] != -1) {
2585 	  n = r_in[k];
2586 	  x = r_ix[k];
2587 	  if((ESL_MIN(x, pn_max_d[k+1]) - ESL_MAX(n, pn_min_d[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_d[k+1]..pn_max_d[k+1] by at least 1 residue */
2588 	    n = ESL_MAX(n, pn_min_d[k+1]); /* n can't be less than pn_min_d[k+1] */
2589 	    n = ESL_MIN(n, pn_max_d[k+1]); /* n can't be more than pn_max_d[k+1] */
2590 	    x = ESL_MIN(x, pn_max_d[k+1]); /* x can't be more than pn_max_d[k+1] */
2591 	    if(r_dn[k+1] != INT_MAX) {
2592 	      if(!local_begins_ends_on && ESL_MIN(x, r_dx[k+1]) - ESL_MAX(n, r_dn[k+1]) < -1) {
2593 		/* there's a 'gap' of >= 1 residue between n..x and r_dn[k+1].._r_dx[k+1], fill the gap by expanding band of I_k */
2594 		ESL_DASSERT1((k != 0));
2595 		if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_dn[k+1], r_dx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2596 		just_filled_gap = TRUE;
2597 	      }
2598 	    }
2599 	    r_dn[k+1] = ESL_MIN(r_dn[k+1], n);
2600 	    r_dx[k+1] = ESL_MAX(r_dx[k+1], x);
2601 	    ESL_DASSERT1((r_dn[k+1] <= r_dx[k+1]));
2602 	  }
2603 	}
2604       }
2605       /* D_k -> D_k+1 */
2606       if(r_dn[k] <= r_dx[k]) {
2607 	if(pn_min_d[k+1] != -1) {
2608 	  n = r_dn[k];
2609 	  x = r_dx[k];
2610 	  if((ESL_MIN(x, pn_max_d[k+1]) - ESL_MAX(n, pn_min_d[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_d[k+1]..pn_max_d[k+1] by at least 1 residue */
2611 	    n = ESL_MAX(n, pn_min_d[k+1]); /* n can't be less than pn_min_d[k+1] */
2612 	    n = ESL_MIN(n, pn_max_d[k+1]); /* n can't be more than pn_max_d[k+1] */
2613 	    x = ESL_MIN(x, pn_max_d[k+1]); /* x can't be more than pn_max_d[k+1] */
2614 	    if(r_dn[k+1] != INT_MAX) {
2615 	      if(!local_begins_ends_on && ESL_MIN(x, r_dx[k+1]) - ESL_MAX(n, r_dn[k+1]) < -1) { /* FALSE if n..x overlaps with r_mn[k+1].._r_mx[k+1] by at least 1 residue, if FAILs we have to pick to either NOT change r_mn, r_mx, or change them to n and x */
2616 		ESL_DASSERT1((k != 0));
2617 		if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_dn[k+1], r_dx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2618 		just_filled_gap = TRUE;
2619 	      }
2620 	    }
2621 	    r_dn[k+1] = ESL_MIN(r_dn[k+1], n);
2622 	    r_dx[k+1] = ESL_MAX(r_dx[k+1], x);
2623 	    ESL_DASSERT1((r_dn[k+1] <= r_dx[k+1]));
2624 	  }
2625 	}
2626       }
2627     }
2628 
2629     /* update the reachable-by-node bands, which residues can we reach this node for?
2630      * inside the following if's we don't have to check if r_*n[k], r_*x[k] == INT_MAX or
2631      * INT_MIN, b/c we only enter the ifs if r_*n[k] <= r_*x[k]
2632      */
2633     if(r_mn[k] <= r_mx[k]) { /* M_k is reachable for i = r_mn[k]..r_mx[k] */
2634       r_nn_hmm[k] = ESL_MIN(r_nn_hmm[k], r_mn[k]);
2635       r_nx_hmm[k] = ESL_MAX(r_nx_hmm[k], r_mx[k]);
2636 
2637       sd = 1;
2638       if(k != hmm_M) {
2639 	r_nn_i[k+1] = ESL_MIN(r_nn_i[k+1], r_mn[k]+sd);
2640 	r_nx_i[k+1] = ESL_MAX(r_nx_i[k+1], r_mx[k]+sd);
2641       }
2642       if(k != 0) {
2643 	r_nn_j[k-1] = ESL_MIN(r_nn_j[k-1], r_mn[k]-sd);
2644 	r_nx_j[k-1] = ESL_MAX(r_nx_j[k-1], r_mx[k]-sd);
2645       }
2646       if((local_begins_ends_on && k > 0) || k == hmm_M) { /* we can go to end from M_k with i from r_mn[k]..r_mx[k] */
2647 	if(doing_search) {
2648 	  r_nn_j[k] = ESL_MIN(r_nn_j[k], r_mn[k]);
2649 	  r_nx_j[k] = ESL_MAX(r_nx_j[k], r_mx[k]);
2650 	}
2651 	else { /* have to emit j0 from last match state visited */
2652 	  if(r_mx[k] == j0) {
2653 	    r_nn_j[k] = ESL_MIN(r_nn_j[k], j0);
2654 	    r_nx_j[k] = ESL_MAX(r_nx_j[k], j0);
2655 	  }
2656 	}
2657       }
2658       if((local_begins_ends_on && k > 0) || k == 1) { /* we can go from begin to M_k with i to r_mn[k]..r_mx[k] */
2659 	if(doing_search) {
2660 	  r_nn_i[k] = ESL_MIN(r_nn_i[k], r_mn[k]);
2661 	  r_nx_i[k] = ESL_MAX(r_nx_i[k], r_mx[k]);
2662 	  /* superfluous */
2663 	  r_begn = ESL_MIN(r_begn, r_mn[k]);
2664 	  r_begx = ESL_MAX(r_begx, r_mx[k]);
2665 	}
2666 	else { /* have to emit i0 from first match state entered */
2667 	  if(r_mn[k] == i0) {
2668 	    r_nn_i[k] = ESL_MIN(r_nn_i[k], i0);
2669 	    r_nx_i[k] = ESL_MAX(r_nx_i[k], i0);
2670 	  }
2671 	}
2672       }
2673     }
2674     if(r_in[k] <= r_ix[k]) { /* I_k is reachable for i = r_in[k]..r_ix[k] */
2675       r_nn_hmm[k] = ESL_MIN(r_nn_hmm[k], r_in[k]);
2676       r_nx_hmm[k] = ESL_MAX(r_nx_hmm[k], r_ix[k]);
2677 
2678       sd = 1;
2679       if(k != hmm_M) {
2680 	r_nn_i[k+1] = ESL_MIN(r_nn_i[k+1], r_in[k]+sd);
2681 	r_nx_i[k+1] = ESL_MAX(r_nx_i[k+1], r_ix[k]+sd);
2682       }
2683       r_nn_j[k] = ESL_MIN(r_nn_j[k], r_in[k]-sd);
2684       r_nx_j[k] = ESL_MAX(r_nx_j[k], r_ix[k]-sd);
2685 
2686       /* superfluous */
2687       if(k == 0) {
2688 	r_begn = ESL_MIN(r_begn, r_in[k]);
2689 	r_begx = ESL_MAX(r_begx, r_ix[k]);
2690       }
2691     }
2692     if(r_dn[k] <= r_dx[k]) { /* D_k is reachable for i = r_dn[k]..r_dx[k] */
2693       r_nn_hmm[k] = ESL_MIN(r_nn_hmm[k], r_dn[k]);
2694       r_nx_hmm[k] = ESL_MAX(r_nx_hmm[k], r_dx[k]);
2695 
2696       sd = 0;
2697       if(k != hmm_M) {
2698 	r_nn_i[k+1] = ESL_MIN(r_nn_i[k+1], r_dn[k]+1); /* off-by-one */
2699 	r_nx_i[k+1] = ESL_MAX(r_nx_i[k+1], r_dx[k]+1); /* off-by-one */
2700       }
2701       if(k != 0) {
2702 	r_nn_j[k-1] = ESL_MIN(r_nn_j[k-1], r_dn[k]);
2703 	r_nx_j[k-1] = ESL_MAX(r_nx_j[k-1], r_dx[k]);
2704       }
2705       if(k == 1) {
2706 	r_begn = ESL_MIN(r_begn, r_dn[k]+1);
2707 	r_begx = ESL_MAX(r_begx, r_dx[k]+1);
2708       }
2709     }
2710     /* is the node reachable? (it doesn't matter if we're in local mode) */
2711     if((!local_begins_ends_on) && (r_mn[k] > r_mx[k]) && (r_dn[k] > r_dx[k])) {
2712       assert(k != 0);
2713       ESL_DASSERT1((just_filled_gap == FALSE));
2714       ESL_DPRINTF1(("#DEBUG: ! HMM node %d is unreachable hmm!\n", k));
2715       if(was_unr[k]) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "HMMBandsEnforceValidParse() node k %d was determined unreachable in second pass! Shouldn't happen (coding error).\n", k);
2716       was_unr[k] = TRUE;
2717       /* expand the bands so k becomes reachable, using a greedy technique */
2718       if((status = HMMBandsFixUnreachable(cp9b, errbuf, k, r_nn_hmm[k-1], r_nx_hmm[k-1], r_in[k-1])) != eslOK) return status;
2719       /* to ensure we can now reach node k, we simply decrement k by 2, then
2720        * we'll reenter the loop above for k=k-1, and check if k is reachable with
2721        * new band on I_k-1. This is unnecessary if the code is right, used here just
2722        * to check.
2723        */
2724       k -= 2;
2725     }
2726     else if(just_filled_gap == TRUE) {
2727       ESL_DPRINTF1(("#DEBUG: ! HMM node %d filled a gap!\n", k));
2728       if(filled_gap[k] == TRUE) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "HMMBandsEnforceValidParse() node k %d needed a gap filled in second pass! Shouldn't happen (coding error).\n", k);
2729       filled_gap[k] = TRUE;
2730       /* to ensure we can now reach node k, we simply decrement k by 2, then
2731        * we'll reenter the loop above for k=k, and check if k is reachable with
2732        * new band on I_k. This is unnecessary if the code is right, used here just
2733        * to check.
2734        */
2735       k -= 1;
2736     }
2737     /*else if(r_nx_hmm[k] == j0) j0_is_reachable = TRUE;*/
2738   }
2739   /* final check, if we're doing alignment, the first residue i0, must be first emitted
2740    * residue, and the final residue, j0 must be final emittable residue. Enforce it.
2741    */
2742   if(! doing_search) {
2743     r_begn = i0;
2744     r_begx = i0;
2745     r_endn = j0;
2746     r_endx = j0;
2747   }
2748 
2749   /* A hack! set r_nn_j[hmm_M] to rend_n and r_nx_j[hmm_M] to rend_x, b/c we
2750    * only use r_nn_j[hmm_M] and r_nx_j[hmm_M] to set j bands on states of non-right
2751    * emitting CM nodes (non-MATR MATP nodes) and we need the ones above all non
2752    * emitters (where rpos == hmm_M) to have the j bands equal to the band on the
2753    * HMM END state. This is a hack b/c there should be a band on the E state itself,
2754    * which should map to right half of ROOT_S, but I didn't implement it that way.
2755    */
2756   r_nn_i[1]     = ESL_MIN(r_nn_i[1], r_begn);
2757   r_nx_i[1]     = ESL_MAX(r_nx_i[1], r_begx);
2758   r_nn_j[hmm_M] = ESL_MIN(r_nn_j[hmm_M], r_endn);
2759   r_nx_j[hmm_M] = ESL_MAX(r_nx_j[hmm_M], r_endx);
2760 
2761   for(k = 0; k <= hmm_M; k++) {
2762     if(r_mn[k]  == INT_MAX) r_mn[k] = -1;
2763     if(r_mx[k]  == INT_MIN) r_mx[k] = -2;
2764     if(r_in[k]  == INT_MAX) r_in[k] = -1;
2765     if(r_ix[k]  == INT_MIN) r_ix[k] = -2;
2766     if(r_dn[k]  == INT_MAX) r_dn[k] = -1;
2767     if(r_dx[k]  == INT_MIN) r_dx[k] = -2;
2768 
2769     if(!local_begins_ends_on) {
2770       ESL_DASSERT1((r_nn_i[k]  != INT_MAX || k == 0));
2771       ESL_DASSERT1((r_nx_i[k]  != INT_MIN || k == 0));
2772       ESL_DASSERT1((r_nn_j[k]  != INT_MAX));
2773       ESL_DASSERT1((r_nx_j[k]  != INT_MIN));
2774     }
2775     else {
2776       if(r_nn_i[k]  == INT_MAX) r_nn_i[k] = -1;
2777       if(r_nx_i[k]  == INT_MIN) r_nx_i[k] = -2;
2778       if(r_nn_j[k]  == INT_MAX) r_nn_j[k] = -1;
2779       if(r_nx_j[k]  == INT_MIN) r_nx_j[k] = -2;
2780     }
2781   }
2782 
2783 
2784   *ret_r_mn = r_mn;
2785   *ret_r_mx = r_mx;
2786   *ret_r_in = r_in;
2787   *ret_r_ix = r_ix;
2788   *ret_r_dn = r_dn;
2789   *ret_r_dx = r_dx;
2790   *ret_r_nn_i = r_nn_i;
2791   *ret_r_nx_i = r_nx_i;
2792   *ret_r_nn_j = r_nn_j;
2793   *ret_r_nx_j = r_nx_j;
2794   free(was_unr);
2795   free(filled_gap);
2796   free(r_nn_hmm);
2797   free(r_nx_hmm);
2798 
2799   return eslOK;
2800 
2801  ERROR:
2802   ESL_FAIL(status, errbuf, "HMMBandsEnforceValidParse(): memory allocation error.");
2803   return eslOK; /* neverreached */
2804 }
2805 
2806 /* Function: HMMBandsFixUnreachable()
2807  * Incept:   EPN, Fri Feb  1 17:12:55 2008
2808  *
2809  * Purpose:  Expand the HMM bands such that a parse becomes
2810  *           possible up through node <k>. We know that a parse
2811  *           is possible up through node <k-1>, the reachable
2812  *           range of residues for all possible parses up to
2813  *           node <k-1> is from <r_prv_min> to <r_prv_max>.
2814  *
2815  *           Note: The technique used for expanding the bands was
2816  *           selected for it's *relative* simplicity. It does not
2817  *           expand the bands in any smart way that is aware of
2818  *           probability mass or score of the newly possible parses
2819  *           during the band expansion. You could try to do that,
2820  *           but I don't think it's worth it. This function is only
2821  *           entered if the default bands (prior to expansion)
2822  *           do not allow a single parse, in which case the bands are
2823  *           too tight, and the smart solution is to lower tau,
2824  *           the tail loss parameter. In other words this function is
2825  *           only very rarely used for reasonable values of tau
2826  *           ('reasonable' determined from empirical expts, and
2827  *             enforced by getopts). This function is necessary
2828  *           for the HMM banding technique to be robust though,
2829  *           otherwise it's possible that the HMM bands make all
2830  *           parses impossible, which is bad, because that means
2831  *           all CM parses are impossible too.
2832  *
2833  *           There are two possible scenarios for why node k
2834  *           is unreachable, each with a different solution
2835  *           this function determines which scenario node k is
2836  *           in and then fixes it. The scenarios are described
2837  *           in comments in the code below.a
2838  *
2839  * Args:     cp9b      - the CP9 bands object
2840  *           errbuf    - for error messages
2841  *           k         - the node we want to make reachable
2842  *           r_prv_min - minimal possible residue index accounted for in any parse up to and including node k-1
2843  *           r_prv_max - maximal possible residue index accounted for in any parse up to and including node k-1
2844  *           r_insert_prv_min - minimal possible residue index accounted for in any parse up to and state I_k-1
2845  *
2846  * Returns:  eslOK on success
2847  *           eslEMEM if a memory allocation error occurs
2848  */
2849 int
HMMBandsFixUnreachable(CP9Bands_t * cp9b,char * errbuf,int k,int r_prv_min,int r_prv_max,int r_insert_prv_min)2850 HMMBandsFixUnreachable(CP9Bands_t *cp9b, char *errbuf, int k, int r_prv_min, int r_prv_max, int r_insert_prv_min)
2851 {
2852 
2853   int kp;    /* k prime, a node counter */
2854   int nxt_m; /* minimal possible residue index we must account for before entering M_k */
2855   int nxt_d; /* minimal possible residue index we must account for before entering D_k */
2856   int nxt_n; /* minimal possible residue index we must account for before entering either M_k or D_k */
2857 
2858   ESL_DASSERT1((k != 0));
2859   ESL_DASSERT1((r_prv_min !=  INT_MAX));
2860   ESL_DASSERT1((r_prv_max != INT_MIN));
2861   ESL_DASSERT1((r_prv_min <= r_prv_max));
2862 
2863   /* scenario 1: there's a 'hole' of at least 1 residue between the residue posns that can be reached
2864    *             for node k-1 (these are r_prv_min..r_prv_max) and by node k's match or delete state.
2865    *             our solution is to allow the I_k-1 (node k-1 insert state) to emit the residues in
2866    *             the 'hole', then we know we can reach either node k's match or delete.
2867    */
2868   /* check if we're in scenario 1 */
2869 
2870   /* initialize, if neither nxt_m nor nxt_d doesn't change, we know we're not in scenario 1 */
2871   nxt_m = -1;
2872   nxt_d = -1;
2873   if(cp9b->pn_min_m[k] != -1 && cp9b->pn_max_m[k] != -1) {
2874     ESL_DASSERT1((cp9b->pn_max_m[k] >= cp9b->pn_min_m[k]));
2875     if(cp9b->pn_max_m[k]-1 > r_prv_min) { /* if we go from I_k-1 to M_k, we have to emit 1 residue from M_k, that's
2876 				     * why we have cp9b->pn_max_m[k]-1 (i.e. the -1 is for the StateDelta) */
2877       nxt_m = ESL_MAX(cp9b->pn_min_m[k]-1, r_prv_min); /* we could get from node k-1 to node k's match state by using I_k-1 to fill the 'hole' */
2878     }
2879   }
2880   if(cp9b->pn_min_d[k] != -1 && cp9b->pn_max_d[k] != -1) {
2881     ESL_DASSERT1((cp9b->pn_max_d[k] >= cp9b->pn_min_d[k]));
2882     if(cp9b->pn_max_d[k] > r_prv_min) { /* if we go from I_k-1 to D_k, we don't emit from D_k so there's no -1 as above with M_k */
2883       nxt_d = ESL_MAX(cp9b->pn_min_d[k], r_prv_min);/* we could get from node k-1 to node k's delete state by using I_k-1 to fill the 'hole' */
2884     }
2885   }
2886   if(nxt_m != -1 || nxt_d != -1) {
2887     /* we're in scenario 1, there's a 'hole' of missing residues we have to account for before entering node k,
2888      * determine the easier route, to M_k or D_k?  (pick route with less required I_k-1 emissions)  */
2889     if      (nxt_m == -1) nxt_n = nxt_d;
2890     else if (nxt_d == -1) nxt_n = nxt_m;
2891     else                  nxt_n = ESL_MIN(nxt_m, nxt_d);
2892 
2893     /* now doctor I_k-1's bands so that:
2894      * (a) I_k-1 is reachable from at least one of M_k-1, D_k-1
2895      * (b) I_k-1 can transit to M_k or D_k
2896      */
2897     if(cp9b->pn_min_i[k-1] != -1) cp9b->pn_min_i[k-1] = ESL_MIN(cp9b->pn_min_i[k-1], r_prv_min+1);
2898     else                          cp9b->pn_min_i[k-1] = r_prv_min+1;
2899     if(cp9b->pn_max_i[k-1] != -1) cp9b->pn_max_i[k-1] = ESL_MAX(cp9b->pn_max_i[k-1], nxt_n);
2900     else                          cp9b->pn_max_i[k-1] = nxt_n;
2901     ESL_DASSERT1((cp9b->pn_max_i[k-1] >= cp9b->pn_min_i[k-1]));
2902     ESL_DPRINTF1(("#DEBUG: scenario 1 reset k from %d to %d\n", k+2, k));
2903   }
2904   else {
2905     /* scenario 2: the opposite of scenario 1. All possible parses that reach node k-1 have already emitted too many
2906      *             residues to reach node k. In other words, the maximal residue in the HMM band on node k's match
2907      *             and delete states has been already been emitted by all possible parses that end at node k-1.
2908      *             We have to use the delete states of nodes k...kp, where kp is the leftmost node that we can reach
2909      *             M_kp and emit residue i==r_prv_min+1 or visit D_kp with i == r_prv_min.
2910      */
2911     kp = k;
2912     while(kp <= cp9b->hmm_M && ((cp9b->pn_max_m[kp] < (r_prv_min+1)) && (cp9b->pn_max_d[kp] < (r_prv_min)))) { /* note cp9b->pn_max_{m,d}[kp] may be == -1, that's okay */
2913       cp9b->pn_min_d[kp] = cp9b->pn_max_d[kp] = r_prv_min; /* enforce this delete state is used */
2914       kp++;
2915     }
2916     ESL_DPRINTF1(("#DEBUG: scenario 2 reset k from %d to %d (kp: %d r_prv_min: %d (+1=%d for match))\n", k, k-2, kp, r_prv_min, r_prv_min+1));
2917   }
2918   return eslOK;
2919 }
2920 
2921 /* Function: HMMBandsFillGap()
2922  * Incept:   EPN, Fri Feb  1 17:12:55 2008
2923  *
2924  * Purpose:  In HMMBandsEnforceValidParse() it's possible (but rare) that two
2925  *           different transitions to the same state imply reachable bands that have
2926  *           a 'gap' in the middle. For example if node D_3 can reach node M_4 with
2927  *           i = 3 or 4, and node M_3 can reach node M_4 with i equal to 6 or 7.
2928  *           This means that node M_4 cannot be reached for
2929  *           i == 5, but the HMMBandsEnforceValidParse() implementation is
2930  *           much easier if we can just set the reachable band for M_4 to
2931  *           3..7. So, that's what we do, and we doctor the band of I_3 so
2932  *           that M_4 *can* be reached for i == 5. This band doctoring is
2933  *           done in this function.
2934  *
2935  * Args:     cp9b - the CP9 bands object
2936  *           errbuf - for error messages
2937  *           k    - the node we want to make reachable
2938  *           min1 - min in the reachable band for first of the two relevant transitions
2939  *           max1 - max in the reachable band for first of the two relevant transitions
2940  *           min2 - min in the reachable band for second of the two relevant transitions
2941  *           max2 - max in the reachable band for second of the two relevant transitions
2942  *           prv_nd_r_mn - min residue posn in reachable band of M_k-1, -1 if M_k-1 is unreachable
2943  *           prv_nd_r_dn - min residue posn in reachable band of D_k-1, -1 if D_k-1 is unreachable
2944  *
2945  * Returns:  eslOK on success
2946  */
2947 int
HMMBandsFillGap(CP9Bands_t * cp9b,char * errbuf,int k,int min1,int max1,int min2,int max2,int prv_nd_r_mn,int prv_nd_r_dn)2948 HMMBandsFillGap(CP9Bands_t *cp9b, char *errbuf, int k, int min1, int max1, int min2, int max2, int prv_nd_r_mn, int prv_nd_r_dn)
2949 {
2950   int left_max;              /* min1/max1 if min1 <= min2, else min2/max2 */
2951   int right_min;             /* min2/max2 if min1 <= min2, else min1/max1 */
2952   int in, ix;                /* min/max residue for I_k, calc'ed here */
2953 
2954   ESL_DASSERT1((k != 0));
2955   ESL_DASSERT1((max1 >= min1));
2956   ESL_DASSERT1((max2 >= min2));
2957 
2958   if (min1 <= min2) { left_max = max1;  right_min = min2; }
2959   else              { left_max = max2;  right_min = min1; }
2960   ESL_DASSERT1((right_min - left_max > 1));
2961 
2962   /* determine in and ix */
2963   in = INT_MAX;
2964   if(prv_nd_r_mn != INT_MAX) in = ESL_MIN(in, prv_nd_r_mn+1);
2965   if(prv_nd_r_dn != INT_MAX) in = ESL_MIN(in, prv_nd_r_dn);
2966   ESL_DASSERT1((in != INT_MAX));
2967   assert(in != INT_MAX);
2968   ix = right_min-1;
2969 
2970   /* doctor I_k's bands so that it:
2971    * (a) I_k is reachable from at M_k or D_k (whichever has leftmost reachable band)
2972    * (b) I_k can transit to M_k+1 or D_k+1   (whichever has rightmost reachable band)
2973    */
2974   if(cp9b->pn_min_i[k] != -1) cp9b->pn_min_i[k] = ESL_MIN(cp9b->pn_min_i[k], in);
2975   else                        cp9b->pn_min_i[k] = in;
2976   if(cp9b->pn_max_i[k] != -1) cp9b->pn_max_i[k] = ESL_MAX(cp9b->pn_max_i[k], ix);
2977   else                        cp9b->pn_max_i[k] = ix;
2978   assert(cp9b->pn_min_i[k] <= cp9b->pn_max_i[k]);
2979   ESL_DASSERT1((cp9b->pn_min_i[k] <= cp9b->pn_max_i[k]));
2980 
2981   return eslOK;
2982 }
2983 
2984 #if eslDEBUGLEVEL >= 1
2985 /* Function: CMBandsCheckValidParse()
2986  * Incept:   EPN, Tue Feb  5 07:59:48 2008
2987  *
2988  * Purpose:  Given bands on CM states for a target sequence,
2989  *           check for a valid CM parse within those bands.
2990  *           Return eslFAIL if there is no valid parse.
2991  *
2992  * Args:     cm     - the model
2993  *           cp9b   - the CP9 bands object
2994  *           errbuf - for error messages
2995  *           i0     - first residue we're concerned with in target sequence
2996  *           j0     - final residue we're concerned with in target sequence
2997  *           doing_search - TRUE if we're searching, and a local hit is okay,
2998  *                          if FALSE, the full sequence i0..j0 must be in the subtree of ROOT_S
2999  *
3000  * Returns:  eslOK on success
3001  *           eslEINCOMPAT if contract is violated
3002  *           eslFAIL if no valid parse exists within the i and j bands
3003  *           eslEMEM if a memory allocation error occurs
3004  */
3005 int
CMBandsCheckValidParse(CM_t * cm,CP9Bands_t * cp9b,char * errbuf,int i0,int j0,int doing_search)3006 CMBandsCheckValidParse(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, int i0, int j0, int doing_search)
3007 {
3008   int status;                 /* easel status code */
3009   int v, w, y;                /* state indices */
3010   int nd;                     /* nd counter */
3011   int sd, sdl, sdr;           /* state deltas, number of residues emitted by current state, total, to the left, and to the right */
3012   int *imin, *imax;           /* [0..v..M-1] i band for state v, min/max i position allowed for state v */
3013   int *jmin, *jmax;           /* [0..v..M-1] j band for state v, min/max j position allowed for state v */
3014   int child_imin, child_imax; /* imin, imax for child of current state, after accouting for emissions (state deltas) */
3015   int child_jmin, child_jmax; /* jmin, jmax for child of current state, after accouting for emissions (state deltas) */
3016   int *v_is_r;                /* [0..v..M-1] TRUE if state v is reachable for at least one i,j pair */
3017   int *nd_is_r;               /* [0..nd..cm->nodes-1] TRUE if any state (incl. insert) in node nd is reachable for at least one i,j pair */
3018   int *r_imin, *r_imax;       /* [0..v..M-1] reachable i bands, for which i positions can we reach state v */
3019   int *r_jmin, *r_jmax;       /* [0..v..M-1] reachable j bands, for which j positions can we reach state v */
3020   int *nd_r_imin, *nd_r_imax; /* [0..nd..M-1] reachable i bands, for which i positions can we reach at least 1 state (incl. insert) in nd */
3021   int *nd_r_jmin, *nd_r_jmax; /* [0..nd..M-1] reachable j bands, for which j positions can we reach at least 1 state (incl. insert) in nd */
3022   int y_nd, w_nd;             /* node index */
3023   int cm_is_localized;        /* TRUE if local begins and ends are on, if we can reach a state v with a non-impossible endsc[v], we can finish the parse for any i,j reachable for v */
3024 
3025   /*printf("TEMP in CMBandsCheckValidParse() i0: %d j0: %d\n", i0, j0);*/
3026 
3027   if((cm->flags & CMH_LOCAL_BEGIN) && (! (cm->flags & CMH_LOCAL_END))) ESL_FAIL(eslEINCOMPAT, errbuf, "CMBandsCheckValidParse(), cm flag CMH_LOCAL_BEGIN is up and cm flag CMH_LOCAL_END is down. This is unexpected, we can't deal.");
3028   if((! (cm->flags & CMH_LOCAL_BEGIN)) && ((cm->flags & CMH_LOCAL_END))) ESL_FAIL(eslEINCOMPAT, errbuf, "CMBandsCheckValidParse(), cm flag CMH_LOCAL_BEGIN is down and cm flag CMH_LOCAL_END is up. This is unexpected, we can't deal.");
3029 
3030   cm_is_localized = ((cm->flags & CMH_LOCAL_BEGIN) && (cm->flags & CMH_LOCAL_END)) ? TRUE : FALSE;
3031 
3032   /* pointers to cp9b arrays, for convenience */
3033   imin     = cp9b->imin;
3034   imax     = cp9b->imax;
3035   jmin     = cp9b->jmin;
3036   jmax     = cp9b->jmax;
3037 
3038   /* allocate and initialize */
3039   ESL_ALLOC(v_is_r,    sizeof(int) * cm->M);
3040   ESL_ALLOC(r_imin,    sizeof(int) * cm->M);
3041   ESL_ALLOC(r_imax,    sizeof(int) * cm->M);
3042   ESL_ALLOC(r_jmin,    sizeof(int) * cm->M);
3043   ESL_ALLOC(r_jmax,    sizeof(int) * cm->M);
3044   ESL_ALLOC(nd_is_r,   sizeof(int) * cm->nodes);
3045   ESL_ALLOC(nd_r_imin, sizeof(int) * cm->nodes);
3046   ESL_ALLOC(nd_r_imax, sizeof(int) * cm->nodes);
3047   ESL_ALLOC(nd_r_jmin, sizeof(int) * cm->nodes);
3048   ESL_ALLOC(nd_r_jmax, sizeof(int) * cm->nodes);
3049 
3050   esl_vec_ISet(v_is_r, cm->M, FALSE);
3051   esl_vec_ISet(nd_is_r, cm->nodes, FALSE);
3052 
3053   for (v = 0; v < cm->M; v++) {
3054     r_imin[v] = INT_MAX;
3055     r_imax[v] = INT_MIN;
3056     r_jmin[v] = INT_MAX;
3057     r_jmax[v] = INT_MIN;
3058   }
3059   for (nd = 0; nd < cm->nodes; nd++) {
3060     nd_r_imin[nd] = INT_MAX;
3061     nd_r_imax[nd] = INT_MIN;
3062     nd_r_jmin[nd] = INT_MAX;
3063     nd_r_jmax[nd] = INT_MIN;
3064   }
3065 
3066   nd_is_r[0] = TRUE;
3067   v_is_r[0]  = TRUE;
3068   r_imin[0] = nd_r_imin[0] = imin[0];
3069   r_imax[0] = nd_r_imax[0] = imax[0];
3070   r_jmin[0] = nd_r_jmin[0] = jmin[0];
3071   r_jmax[0] = nd_r_jmax[0] = jmax[0];
3072 
3073   if(! doing_search) { /* we're aligning the full sequence from i0..j0, that means imin[0] must == i0 and jmax[0] must == j0, if not we can't align the full seq */
3074     if(imin[0] != i0) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), doing_search is FALSE, but imin[0] == %d, it should be i0 (%d)\n", imin[0], i0);
3075     if(jmax[0] != j0) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), doing_search is FALSE, but jmax[0] == %d, it should be j0 (%d)\n", jmax[0], j0);
3076   }
3077 
3078   /* deal with local begins, if they're active, we can jump into any local begin state with:
3079    * i within imin[0]..imax[0] and j within jmin[0]..jmax[0], as long as i,j are within
3080    * imin[v]..imax[v] and jmin[v]..jmax[v].
3081    */
3082   if(cm->flags & CMH_LOCAL_BEGIN) {
3083     for(v = 0; v < cm->M; v++) {
3084       if(NOT_IMPOSSIBLE(cm->beginsc[v])) {
3085 	if(imin[v] != -1 && jmin[v] != -1) {
3086 	  if(((ESL_MIN(imax[v], imax[0]) - ESL_MAX(imin[v], imin[0])) >= 0) && /* TRUE if imin[v]..imax[v] overlaps with imin[0]..imax[0] by at least 1 residue */
3087 	     ((ESL_MIN(jmax[v], jmax[0]) - ESL_MAX(jmin[v], jmin[0])) >= 0)) {  /* TRUE if jmin[v]..jmax[v] overlaps with jmin[0]..jmax[0] by at least 1 residue */
3088 	    r_imin[v] = ESL_MAX(imin[v], imin[0]);
3089 	    r_imax[v] = ESL_MIN(imax[v], imax[0]);
3090 	    r_jmin[v] = ESL_MAX(jmin[v], jmin[0]);
3091 	    r_jmax[v] = ESL_MIN(jmax[v], jmax[0]);
3092 	    v_is_r[v]  = TRUE;
3093 	    nd_is_r[cm->ndidx[v]] = TRUE;
3094 	  }
3095 	}
3096 	ESL_DASSERT1(((cm->stid[v] == MATP_MP) || (cm->stid[v] == MATR_MR) || (cm->stid[v] == MATL_ML) || (cm->stid[v] == BIF_B)));
3097 	assert((cm->stid[v] == MATP_MP) || (cm->stid[v] == MATR_MR) || (cm->stid[v] == MATL_ML) || (cm->stid[v] == BIF_B));
3098       }
3099     }
3100   }
3101 
3102   /* The main loop: step through the CM, node by node, state by state,
3103    * for reachable-states v, determine which i,j residues are reachable for each child state of v
3104    */
3105   for (nd = 0; nd < cm->nodes; nd++) {
3106     for (v = cm->nodemap[nd]; v < (cm->nodemap[nd] + TotalStatesInNode(cm->ndtype[nd])); v++) {
3107       if(! StateIsDetached(cm, v)) {
3108 	if(cm->sttype[v] == E_st) {
3109 	  if((r_imin[v] <= r_imax[v] && r_jmin[v] <= r_jmax[v]) && ((r_imax[v] - r_jmin[v] - 1) >= 0)) {
3110 	    /* END state v is reachable for some i, j such that j-i+1 = d = 0 (which is required for E states) */
3111 	    v_is_r[v] = TRUE;
3112 	    nd_is_r[nd] = TRUE;
3113 	  }
3114 	}
3115 	else if (cm->sttype[v] == B_st) {
3116 	  /* same loop as if v != B_st, (the else case below) but we know sdl = sdr = 0, and we have two children BEGL_S and BEGR_S */
3117 	  if((r_imin[v] <= r_imax[v] && r_jmin[v] <= r_jmax[v]) && ((r_jmax[v] - r_imin[v] + 1) >= sd)) {
3118 	    /* v is reachable for some i, j */
3119 	    v_is_r[v] = TRUE;
3120 	    nd_is_r[nd] = TRUE;
3121 	    w = cm->cfirst[v]; /* BEGL_S */
3122 	    y = cm->cnum[v];   /* BEGR_S */
3123 
3124 	    /* only way to get to a BEGL_S is through it's BIF parent, even with local begins (no local begin in BEGL_S) */
3125 	    r_imin[w] = ESL_MAX(imin[w], imin[v]);
3126 	    r_imax[w] = ESL_MIN(imax[w], imax[v]);
3127 	    r_jmin[w] = jmin[w];
3128 	    r_jmax[w] = jmax[w];
3129 	    w_nd = cm->ndidx[w];
3130 	    nd_r_imin[w_nd] = r_imin[w];
3131 	    nd_r_imax[w_nd] = r_imax[w];
3132 	    nd_r_jmin[w_nd] = r_jmin[w];
3133 	    nd_r_jmax[w_nd] = r_jmax[w];
3134 
3135 	    /* only way to get to a BEGR_S is through it's BIF parent, even with local begins (no local begin in BEGR_S) */
3136 	    r_imin[y] = imin[y];
3137 	    r_imax[y] = imax[y];
3138 	    r_jmin[y] = ESL_MAX(jmin[y], jmin[v]);
3139 	    r_jmax[y] = ESL_MIN(jmax[y], jmax[v]);
3140 	    y_nd = cm->ndidx[y];
3141 	    nd_r_imin[y_nd] = r_imin[y];
3142 	    nd_r_imax[y_nd] = r_imax[y];
3143 	    nd_r_jmin[y_nd] = r_jmin[y];
3144 	    nd_r_jmax[y_nd] = r_jmax[y];
3145 	  }
3146 	}
3147 	else { /* state is not a B_st nor an E_st */
3148 	  sdl = StateLeftDelta(cm->sttype[v]);
3149 	  sdr = StateRightDelta(cm->sttype[v]);
3150 	  sd  = sdl + sdr;
3151 
3152 	  if((r_imin[v] <= r_imax[v] && r_jmin[v] <= r_jmax[v]) && ((r_jmax[v] - r_imin[v] + 1) >= sd)) {
3153 	    /* v is reachable for some i, j */
3154 	    ///if(NOT_IMPOSSIBLE(cm->endsc[v])) {
3155 
3156 	    v_is_r[v] = TRUE;
3157 	    nd_is_r[nd] = TRUE;
3158 	    child_imin = r_imin[v] + sdl;
3159 	    child_imax = r_imax[v] + sdl;
3160 	    child_jmin = r_jmin[v] - sdr;
3161 	    child_jmax = r_jmax[v] - sdr;
3162 	    if(cm->sttype[v] == IL_st) child_imax = ESL_MAX(child_imax, (imax[v]+1));
3163 	    if(cm->sttype[v] == IR_st) child_jmin = ESL_MIN(child_jmin, (jmin[v]-1));
3164 	    ///printf("\nv: %4d %4s %2s (%4d %4d    %4d %4d)\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), r_imin[v], r_imax[v], r_jmin[v], r_jmax[v]);
3165 	    for(y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
3166 	      if(imin[y] != -1) {
3167 		r_imin[y] = ESL_MIN(r_imin[y], ESL_MAX(imin[y], child_imin));
3168 		r_imax[y] = ESL_MAX(r_imax[y], ESL_MIN(imax[y], child_imax));
3169 		r_jmin[y] = ESL_MIN(r_jmin[y], ESL_MAX(jmin[y], child_jmin));
3170 		r_jmax[y] = ESL_MAX(r_jmax[y], ESL_MIN(jmax[y], child_jmax));
3171 
3172 		if((r_imin[y] <= r_imax[y] && r_jmin[y] <= r_jmax[y]) && ((r_jmax[y] - r_imin[y] + 1) >= StateDelta(cm->sttype[y]))) {
3173 		  ///printf("y: %4d %4s %2s (%4d %4d    %4d %4d)\n", y, Nodetype(cm->ndtype[cm->ndidx[y]]), Statetype(cm->sttype[y]), r_imin[y], r_imax[y], r_jmin[y], r_jmax[y]);
3174 		  y_nd = cm->ndidx[y];
3175 		  nd_r_imin[y_nd] = ESL_MIN(nd_r_imin[y_nd], r_imin[y]);
3176 		  nd_r_imax[y_nd] = ESL_MAX(nd_r_imax[y_nd], r_imax[y]);
3177 		  nd_r_jmin[y_nd] = ESL_MIN(nd_r_jmin[y_nd], r_jmin[y]);
3178 		  nd_r_jmax[y_nd] = ESL_MAX(nd_r_jmax[y_nd], r_jmax[y]);
3179 		}
3180 		else {
3181 		  r_imin[y] =  INT_MAX;
3182 		  r_imax[y] = INT_MIN;
3183 		  r_jmin[y] =  INT_MAX;
3184 		  r_jmax[y] = INT_MIN;
3185 		}
3186 	      }
3187 	    }
3188 	  }
3189 	} /* end of else that's entered if v != E_st nor B_st */
3190       } /* end of if(!StateIsDetached) */
3191 	/*////if(v_is_r[v]) printf("ck v  %4s %2s %4d R %d (%11d %11d  %11d %11d) (HMM nd: %4d %4d)\n", Nodetype(cm->ndtype[nd]), Statetype(cm->sttype[v]), v, v_is_r[v], r_imin[v], r_imax[v], r_jmin[v], r_jmax[v], cm->cp9map->cs2hn[v][0], cm->cp9map->cs2hn[v][1]);*/
3192     } /* end of for (v) loop */
3193 
3194     /*////printf("ck nd %4s    %4d R %d (%11d %11d  %11d %11d)\n\n", Nodetype(cm->ndtype[nd]), nd, nd_is_r[nd], nd_r_imin[nd], nd_r_imax[nd], nd_r_jmin[nd], nd_r_jmax[nd]); */
3195   }
3196   /* now we know what states are reachable for what i and j,  check if a valid parse exists */
3197   if(! cm_is_localized) { /* local begins/ends are off, all nodes must be reachable to get a valid parse */
3198     for(nd = 0; nd < cm->nodes; nd++) {
3199       if(nd_is_r[nd] == FALSE) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), CM is not locally configured and node %d (%4s) is unreachable\n", nd, Nodetype(cm->ndtype[nd]));
3200       if(cm->ndtype[nd] == BIF_nd) {
3201 	v = cm->nodemap[nd];
3202 	w = cm->cfirst[v]; /* BEGL_S */
3203 	y = cm->cnum[v];   /* BEGR_S */
3204 	if(r_jmax[w] < (r_imin[y]-1)) {
3205 	  ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), CM not local, BEGL_S w:%d nd:%d & BEGR_S y:%d nd:%d bands don't touch, res %d..%d unemittable!\n", w, w_nd, y, y_nd, r_jmax[w]+1, r_imin[y]-1);
3206 	}
3207       }
3208     }
3209   }
3210   else if(doing_search && cm_is_localized) { /* we're doing a local search, we have a valid parse if any state from which a local end is possible is reachable */
3211     v = 0;
3212     while(v < cm->M && !(v_is_r[v] && NOT_IMPOSSIBLE(cm->endsc[v]))) v++; /* increment v until we come to a state that is reachable and can go to EL, or we run out of states */
3213     if(v == cm->M && i0 != j0) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), doing_search=TRUE, CM is local, i0 != j0, but no CM state is reachable from which an EL is possible.\n");
3214   }
3215 
3216   free(v_is_r);
3217   free(r_imin);
3218   free(r_imax);
3219   free(r_jmin);
3220   free(r_jmax);
3221   free(nd_is_r);
3222   free(nd_r_imin);
3223   free(nd_r_imax);
3224   free(nd_r_jmin);
3225   free(nd_r_jmax);
3226   return eslOK;
3227 
3228  ERROR:
3229   ESL_FAIL(status, errbuf, "CMBandsCheckValidParse(), memory allocation error.");
3230   return status; /* NEVER REACHED */
3231 }
3232 #endif
3233 
3234 /**************************************************************************
3235  * cp9_HMM2ijBands_OLD() and helper functions.
3236  * This was how bands were calculated up until revision 2318 (02.07.2008)
3237  *
3238  */
3239 /* helper functions for cp9_HMM2ijBands_OLD() */
3240 static void hmm2ij_prestate_step0_initialize(int n, int *nss_max_imin, int *nss_min_jmax, int i0, int j0);
3241 static void hmm2ij_prestate_step1_set_node_inserts(int n, int *nis_imin, int *nis_imax,
3242 						   int *nis_jmin, int *nis_jmax,
3243 						   int *nss_imin, int *nss_imax,
3244 						   int *nss_jmin, int *nss_jmax,
3245 						   int *pn_min_i, int *pn_max_i,
3246 						   CP9Map_t *cp9map);
3247 static void hmm2ij_prestate_step2_determine_safe(int n,
3248 						 int nss_max_imin_np1, int nss_min_jmax_np1,
3249 						 int nis_imin_n,
3250 						 int nis_jmax_n,
3251 						 int *safe_imax, int *safe_jmin);
3252 static void hmm2ij_prestate_step3_preset_node_splits(int n, int *nis_imin, int *nis_imax,
3253 						     int *nis_jmin, int *nis_jmax,
3254 						     int *nss_imin, int *nss_imax,
3255 						     int *nss_jmin, int *nss_jmax,
3256 						     int *pn_min_m, int *pn_max_m,
3257 						     int *pn_min_d, int *pn_max_d,
3258 						     CP9Map_t *cp9map);
3259 static void hmm2ij_split_state_step1_set_state_bands(int v, int n,
3260 						     int tmp_imin, int tmp_imax,
3261 						     int tmp_jmin, int tmp_jmax,
3262 						     int *imin, int *imax, int *jmin, int *jmax,
3263 						     int *nss_imin, int *nss_imax,
3264 						     int *nss_jmin, int *nss_jmax);
3265 static void hmm2ij_insert_state_step1_set_state_bands(int v,
3266 						      int tmp_imin, int tmp_imax,
3267 						      int tmp_jmin, int tmp_jmax,
3268 						      int *imin, int *imax, int *jmin, int *jmax);
3269 static void hmm2ij_state_step2_enforce_safe_trans(CM_t *cm, int v, int n, int *imax, int *jmin,
3270 						  int *nss_imax, int *nss_jmin,
3271 						  int safe_imax, int safe_jmin);
3272 static void hmm2ij_state_step3_enforce_state_delta(CM_t *cm, int v, int *jmin, int *jmax);
3273 static void hmm2ij_state_step4_update_safe_holders(int v, int n, int imin_v, int jmax_v, int *nss_max_imin,
3274 						   int *nss_min_jmax);
3275 static void hmm2ij_state_step5_non_emitter_d0_hack(int v, int imax_v, int *jmin);
3276 
3277 /*****************************************************************************
3278  * Functions to go from HMM bands to i and j bands on a CM
3279  * cp9_HMM2ijBands_OLD()
3280  */
3281 /*
3282  * Function: cp9_HMM2ijBands_OLD()
3283  *           EPN 12.21.05
3284  *
3285  * Purpose:  Determine the band for each cm state v on i (the band on the
3286  *           starting index in the subsequence emitted from the subtree rooted
3287  *           at state v), and on j (the band on the ending index in the
3288  *           subsequence emitted from the subtree rooted at state v).
3289  *
3290  *           Some i and d bands are calculated from HMM bands on match and insert
3291  *           and delete states from each node of the HMM that maps to a left emitting
3292  *           node of the CM (including MATP nodes). The HMM bands were
3293  *           calculated previously from the posterior matrices for mmx,
3294  *           imx and dmx from a CP9 HMM.
3295  *
3296  *           Some j bands are calculated from HMM bands on match and insert and
3297  *           delete states from each node of the HMM that maps to a right emitting
3298  *           node of the CM (including MATP nodes).
3299  *
3300  *           i and j bands that cannot be directly determined from the
3301  *           HMM bands are inferred based on the constraints imposed
3302  *           on them by the i and j bands that CAN be determined from
3303  *           the HMM bands.
3304  *
3305  *           Our strategy is to set i and j bands for each state v
3306  *           such that at least one state y (y \in C_v (y is reachable
3307  *           from v)) can be reached from v while staying within the i
3308  *           and j bands for v and y.  This constraint is enforced by
3309  *           determining the min and max i and j bands across all
3310  *           states y (into safe* data structures) for a given v, and
3311  *           then enforcing that at least one cell in the i and j
3312  *           bands of v can transit to at least one cell in a band for
3313  *           a y state after accounting for the direction specific
3314  *           StateDelta() values for v.
3315  *
3316  *           This function needs to be called only once, it determines
3317  *           bands for ALL states. Its unclear the best way to handle
3318  *           any states that don't have an explicit mapping to an HMM
3319  *           state that we have a band on (i.e. all delete states, and
3320  *           ROOT_IR, ROOT_IL, BEGR_IL, BIF_B, and start states).
3321  *           (11.02.05) I take a simple approach, and set the bands on i
3322  *           for such states to the same as those for states in a close
3323  *           proximity. (see code for exact definitions)
3324  *
3325  *           This function uses HMM derived bands on delete states.
3326  *
3327  * arguments:
3328  *
3329  * CM_t *cm         the CM, must have valid cp9b (CP9 bands object)
3330  * errbuf           char buffer for error messages
3331  * CP9Bands_t *cp9b the CP9 bands object, usually cm->cp9b
3332  * CP9Map_t *cp9map map from CM to CP9 HMM and vice versa
3333  * int i0           start of target subsequence (often 1, beginning of dsq)
3334  * int j0           end of target subsequence (often L, end of dsq)
3335  * int doing_search TRUE if the bands will be used for a scanning CYK/Inside
3336  * int debug_level  [0..3] tells the function what level of debugging print
3337  *                  statements to print.
3338  *
3339  * Returns: eslOK on success;
3340  */
3341 int
cp9_HMM2ijBands_OLD(CM_t * cm,char * errbuf,CP9Bands_t * cp9b,CP9Map_t * cp9map,int i0,int j0,int doing_search,int debug_level)3342 cp9_HMM2ijBands_OLD(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int debug_level)
3343 {
3344   int v;              /* counter over states of the CM */
3345 
3346   int status;
3347   int safe_imax;
3348   int safe_jmin;
3349 
3350   int tmp_imin;
3351   int tmp_imax;
3352   int tmp_jmin;
3353   int tmp_jmax;
3354 
3355   /* ptrs to cp9b data, for convenience */
3356   int *pn_min_m;      /* pn_min_m[k] = first position in HMM band for match state of HMM node k */
3357   int *pn_max_m;      /* pn_max_m[k] = last position in HMM band for match state of HMM node k */
3358   int *pn_min_i;      /* pn_min_i[k] = first position in HMM band for insert state of HMM node k */
3359   int *pn_max_i;      /* pn_max_i[k] = last position in HMM band for insert state of HMM node k */
3360   int *pn_min_d;      /* pn_min_d[k] = first position in HMM band for delete state of HMM node k */
3361   int *pn_max_d;      /* pn_max_d[k] = last position in HMM band for delete state of HMM node k */
3362   int *imin;          /* imin[v] = first position in band on i for state v to be filled in this function. [1..M] */
3363   int *imax;          /* imax[v] = last position in band on i for state v to be filled in this function. [1..M] */
3364   int *jmin;          /* jmin[v] = first position in band on j for state v to be filled in this function. [1..M] */
3365   int *jmax;          /* jmax[v] = last position in band on j for state v to be filled in this function. [1..M] */
3366 
3367   int *nss_imin;      /* nss_imin[n] = imin of each split set state in node n*/
3368   int *nss_imax;      /* nss_imax[n] = imax of each split set state in node n*/
3369   int *nss_jmin;      /* nss_jmin[n] = jmin of each split set state in node n*/
3370   int *nss_jmax;      /* nss_jmax[n] = jmax of each split set state in node n*/
3371 
3372   int *nis_imin;      /* nss_imin[n] = imin of each insert set state in node n*/
3373   int *nis_imax;      /* nss_imax[n] = imax of each insert set state in node n*/
3374   int *nis_jmin;      /* nss_jmin[n] = jmin of each insert set state in node n*/
3375   int *nis_jmax;      /* nss_jmax[n] = jmax of each insert set state in node n*/
3376 
3377   int *nss_max_imin;  /* nss_max_imin[n] = max imin over split set states in node n*/
3378   int *nss_min_jmax;  /* nss_min_jmax[n] = min jmax over split set states in node n*/
3379 
3380   int n;            /* counter over CM nodes. */
3381   int y, yoffset;   /* counters over children states */
3382 
3383   /* Contract checks */
3384   if (cp9b == NULL)                                                                   ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), cp9b is NULL.\n");
3385   if(i0 < 1) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), i0 < 1: %d\n", i0);
3386   if(j0 < 1) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), j0 < 1: %d\n", j0);
3387   if(j0 < i0) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), i0 (%d) < j0 (%d)\n", i0, j0);
3388 
3389   /* set pointers to cp9b data
3390    * note: these arrays used to be allocated here, but that was wasteful, now it's allocated
3391    * once per model (instead of once per sequence) in AllocCP9Bands()
3392    */
3393 
3394   pn_min_m = cp9b->pn_min_m;
3395   pn_max_m = cp9b->pn_max_m;
3396   pn_min_i = cp9b->pn_min_i;
3397   pn_max_i = cp9b->pn_max_i;
3398   pn_min_d = cp9b->pn_min_d;
3399   pn_max_d = cp9b->pn_max_d;
3400   imin     = cp9b->imin;
3401   imax     = cp9b->imax;
3402   jmin     = cp9b->jmin;
3403   jmax     = cp9b->jmax;
3404 
3405   ESL_ALLOC(nss_imin, sizeof(int) * cm->nodes);
3406   ESL_ALLOC(nss_imax, sizeof(int) * cm->nodes);
3407   ESL_ALLOC(nss_jmin, sizeof(int) * cm->nodes);
3408   ESL_ALLOC(nss_jmax, sizeof(int) * cm->nodes);
3409 
3410   ESL_ALLOC(nis_imin, sizeof(int) * cm->nodes);
3411   ESL_ALLOC(nis_imax, sizeof(int) * cm->nodes);
3412   ESL_ALLOC(nis_jmin, sizeof(int) * cm->nodes);
3413   ESL_ALLOC(nis_jmax, sizeof(int) * cm->nodes);
3414 
3415   ESL_ALLOC(nss_max_imin, sizeof(int) * cm->nodes);
3416   ESL_ALLOC(nss_min_jmax, sizeof(int) * cm->nodes);
3417 
3418   esl_vec_ISet(nss_imin, cm->nodes, -1);
3419   esl_vec_ISet(nss_imax, cm->nodes, -1);
3420   esl_vec_ISet(nss_jmin, cm->nodes, -1);
3421   esl_vec_ISet(nss_jmax, cm->nodes, -1);
3422 
3423   esl_vec_ISet(nis_imin, cm->nodes, -1);
3424   esl_vec_ISet(nis_imax, cm->nodes, -1);
3425   esl_vec_ISet(nis_jmin, cm->nodes, -1);
3426   esl_vec_ISet(nis_jmax, cm->nodes, -1);
3427 
3428   esl_vec_ISet(nss_max_imin, cm->nodes, -1);
3429   esl_vec_ISet(nss_min_jmax, cm->nodes, -1);
3430 
3431   /* Initialize all bands to -1. */
3432   esl_vec_ISet(imin, cm->M, -1);
3433   esl_vec_ISet(imax, cm->M, -1);
3434   esl_vec_ISet(jmin, cm->M, -1);
3435   esl_vec_ISet(jmax, cm->M, -1);
3436 
3437   /* We go node by node, bottom up, and fill in the bands on each
3438    * state for each node. Keeping track of the node split set min and max i's
3439    * and j's, as well as the node insert set's
3440    * also because they influence all nodes above (until a BEGL or BEGR at least).
3441    */
3442 
3443   /* For match nodes (MATP, MATL, MATR):
3444    * First calc the split set node mins and maxes, then impose these
3445    * on each state v in the split set of the node, requiring that any valid
3446    * d resulting from the i and j bands on state v
3447    * is least dv = StateDelta(v).
3448    * This is done by ensuring that jmin[v] >= dv & jmax[v] >= dv.
3449    * (We don't have to worry about i as we check again when we create
3450    *  the d bands from the i and j bands in ij2d_bands()).
3451    * We really only have to enforce the StateDelta issue here so we
3452    * don't run into d band on j that is 0 cells in ij2d_bands().
3453    * Alternatively, we could ignore the StateDelta() issue here, and
3454    * allow ij2d_bands() to modify j bands when it enforces the StateDelta()
3455    * issue.
3456    */
3457 
3458    for(n = (cm->nodes-1); n >= 0; n--) {
3459      switch (cm->ndtype[n]) {
3460      case END_nd:
3461        /* Special case, we need to know the bands on the states
3462 	* in the node ABOVE this one. Node above MUST be MATP, MATL
3463 	* or MATR. For END states, the band on i = the band on j,
3464 	* this is because d must be 0, so i must be (j+1), so its pointless
3465 	* to allow an i value that (j+1) is not allowed to be or vice versa.
3466 	* If the node above is MATL, we use the HMM band that maps
3467 	* to the ML state - these correspond to bands on i. If its a MATR,
3468 	* we use the HMM band that maps to the MR state - these correspond
3469 	* to bands on j. If its a MATP, we get fancy (see below).
3470 	*/
3471        v = cm->nodemap[n];
3472        if(cm->ndtype[n-1] == MATL_nd) {
3473 	 /* tricky. we keep the n_*m** structures ignorant of the fact that we're in
3474 	  * an end state, i.e. we don't force a d=0 (j-i+1=0). This way when
3475 	  * the node immediately above the end (the MATL) looks at it when its determining
3476 	  * the correct bands on i, it doesn't get screwed up (as it would if j < i).
3477 	  */
3478 
3479 	 /*minimum of delete and match states of node above*/
3480 	 nss_imin[n] = (pn_min_m[cp9map->nd2lpos[n-1]] <= (pn_min_d[cp9map->nd2lpos[n-1]])) ?
3481 	   pn_min_m[cp9map->nd2lpos[n-1]] : (pn_min_d[cp9map->nd2lpos[n-1]]);
3482 	 /*for the max, we must allow possibility of inserts and deletes.*/
3483 	 nss_imax[n] = (pn_max_m[cp9map->nd2lpos[n-1]] >= pn_max_i[cp9map->nd2lpos[n-1]]) ?
3484 	   pn_max_m[cp9map->nd2lpos[n-1]] : pn_max_i[cp9map->nd2lpos[n-1]];
3485 	 /* deletes max bands may always be less than match max bands...(not sure)*/
3486 	 if(nss_imax[n] < (pn_max_d[cp9map->nd2lpos[n-1]]))
3487 	   nss_imax[n] = (pn_max_d[cp9map->nd2lpos[n-1]]);
3488 
3489 	 nss_jmin[n] = nss_imin[n];
3490 	 nss_jmax[n] = nss_imax[n];
3491 
3492 	 imin[v] = nss_imin[n];
3493 	 imax[v] = nss_imax[n] + 1; /* we add 1 because we have to figure in the emission
3494 				     * of the MATL_ML (or final MATL_IL), which would increase
3495 				     * i by 1 potentially relative to the imax of that state.
3496 				     */
3497 	 jmin[v] = imin[v] - 1; /* d must be 0 for end states. */
3498 	 jmax[v] = imax[v] - 1; /* d must be 0 for end states. */
3499 
3500 	 nss_max_imin[n] = imin[v];
3501 	 nss_min_jmax[n] = jmax[v];
3502        }
3503        else if(cm->ndtype[n-1] == MATR_nd) {
3504 	 /* tricky. we keep the nss_*m** structures ignorant of the fact that we're in
3505 	  * an end state, i.e. we don't force a d=0 (j-i+1=0). This way when
3506 	  * the node immediately above the end (the MATR) looks at it when its determining
3507 	  * the correct bands on i, it doesn't get screwed up (as it would if j < i).
3508 	  */
3509 
3510 	 /*minimum of delete and match states of node above */
3511 	 nss_jmin[n] = (pn_min_m[cp9map->nd2rpos[n-1]] <= pn_min_d[cp9map->nd2rpos[n-1]]) ?
3512 	   pn_min_m[cp9map->nd2rpos[n-1]] : pn_min_d[cp9map->nd2rpos[n-1]];
3513 	 /*for the max, we must allow possibility of inserts.*/
3514 	 nss_jmax[n] = (pn_max_m[cp9map->nd2rpos[n-1]] >= pn_max_i[cp9map->nd2rpos[n-1]]) ?
3515 	   pn_max_m[cp9map->nd2rpos[n-1]] : pn_max_i[cp9map->nd2rpos[n-1]];
3516 	 /* deletes max bands may always be less than match max bands...(not sure)*/
3517 	 if(nss_jmax[n] < pn_max_d[cp9map->nd2rpos[n-1]])
3518 	   nss_jmax[n] = pn_max_d[cp9map->nd2rpos[n-1]];
3519 	 nss_imin[n] = nss_jmin[n];
3520 	 nss_imax[n] = nss_jmax[n];
3521 
3522 	 jmin[v] = nss_jmin[v] - 1; /* we subtract 1 because of we have to figure
3523 				     * in the emission of the MATR_MR (or final MATR_IR), which would
3524 				     * decrease j by 1 potentially relative to jmin of that state.
3525 				     */
3526 	 jmax[v] = nss_jmax[n];
3527 	 imin[v] = jmin[v] + 1; /*d (j-i+1) must be 0 for end states*/
3528 	 imax[v] = jmax[v] + 1; /*d (j-i+1) must be 0 for end states*/
3529 
3530 	 nss_max_imin[n] = imin[v];
3531 	 nss_min_jmax[n] = jmax[v];
3532        }
3533        else if(cm->ndtype[n-1] == MATP_nd) {
3534 	 /* Very rare case, only if the last bp in a stem is the last left consensus
3535 	  * column (respecting gap_thresh) in that alignment. Does happen though,
3536 	  * (at least in RFAM 6.1) because the training counts for transition priors
3537 	  * had counts for MATP_* state -> END_nd transition sets.
3538 	  */
3539 
3540 	 /* tricky. we keep the nss_*m** structures ignorant of the fact that we're in
3541 	  * an end state, i.e. we don't force a d=0 (j-i+1=0). This way when
3542 	  * the node immediately above the end (the MATP) looks at it when its determining
3543 	  * the correct bands on j, it doesn't get screwed up (as it would if j < i).
3544 	  */
3545 	 /*minimum of delete and match states of node above*/
3546 	 nss_imin[n] = (pn_min_m[cp9map->nd2lpos[n-1]] <= (pn_min_d[cp9map->nd2lpos[n-1]])) ?
3547 	   pn_min_m[cp9map->nd2lpos[n-1]] : (pn_min_d[cp9map->nd2lpos[n-1]]);
3548 	 /*for the max, we must allow possibility of inserts and deletes.*/
3549 	 nss_imax[n] = (pn_max_m[cp9map->nd2lpos[n-1]] >= pn_max_i[cp9map->nd2lpos[n-1]]) ?
3550 	   pn_max_m[cp9map->nd2lpos[n-1]] : pn_max_i[cp9map->nd2lpos[n-1]];
3551 	 /* deletes max bands may always be less than match max bands...(not sure)*/
3552 	 if(nss_imax[n] < (pn_max_d[cp9map->nd2lpos[n-1]]))
3553 	   nss_imax[n] = (pn_max_d[cp9map->nd2lpos[n-1]]);
3554 
3555 	 /*minimum of delete and match states of node above*/
3556 	 nss_jmin[n] = (pn_min_m[cp9map->nd2rpos[n-1]] <= pn_min_d[cp9map->nd2rpos[n-1]]) ?
3557 	   pn_min_m[cp9map->nd2rpos[n-1]] : pn_min_d[cp9map->nd2rpos[n-1]];
3558 	 /*for the max, we must allow possibility of inserts.*/
3559 	 nss_jmax[n] = (pn_max_m[cp9map->nd2rpos[n-1]] >= pn_max_i[cp9map->nd2rpos[n-1]]) ?
3560 	   pn_max_m[cp9map->nd2rpos[n-1]] : pn_max_i[cp9map->nd2rpos[n-1]];
3561 	 /* deletes max bands may always be less than match max bands...(not sure)*/
3562 	 if(nss_jmax[n] < pn_max_d[cp9map->nd2rpos[n-1]])
3563 	   nss_jmax[n] = pn_max_d[cp9map->nd2rpos[n-1]];
3564 
3565 	 /* unique situation. end's d must be 0, so we are constrained on what
3566 	  * i can be relative to j, and j can be relative to i, but what we want
3567 	  * are the constraints on what i can be, and j can be.
3568 	  * because d=0 => j-i+1 = 0. then imin should equal = jmin + 1 and imax = jmax + 1.
3569 	  * so we really just want to know a min over i and j, and a max over i and j.
3570 	  * below we take min of imin and jmin (should always be imin i think) as the min,
3571 	  * and max of imax and jmax (should always be jmax i think) after accounting for
3572 	  * the possibility that a single base was just emitted left and/or right.
3573 	  */
3574 	 imax[v] = ((nss_imax[n] + 1) > nss_jmax[n]) ?
3575 	   (nss_imax[n] + 1) : nss_jmax[n];
3576 	 imin[v] = ((nss_imin[n]) < (nss_jmin[n] - 1)) ?
3577 	   (nss_imin[n]) : (nss_jmin[n] - 1);
3578 	 /* we can't have an i < i0 */
3579 	 imin[v] = ESL_MAX(imin[v], i0);
3580 	 imax[v] = ESL_MAX(imax[v], i0);
3581 	 jmin[v] = imin[v] - 1; /* d must be 0 for end states. */
3582 	 jmax[v] = imax[v] - 1; /* d must be 0 for end states. */
3583 
3584 	 nss_max_imin[n] = imin[v];
3585 	 nss_min_jmax[n] = jmax[v];
3586        }
3587        break;
3588 
3589 	case MATP_nd:
3590 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3591 	  hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3592 						 nss_imin, nss_imax, nss_jmin, nss_jmax,
3593 						 pn_min_i, pn_max_i, cp9map);
3594 
3595 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3596 					       nis_imin[n], nis_jmax[n],
3597 					       &safe_imax, &safe_jmin);
3598 	  hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3599 						   nss_imin, nss_imax, nss_jmin, nss_jmax,
3600 						   pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3601 						   cp9map);
3602 	  /* 6 states MATP_MP, MATP_ML, MATP_MR, MATP_D, MATP_IL, MATP_IR */
3603 	  v = cm->nodemap[n]; /* MATP_MP */
3604 	  /* Determine implied v bands using hmm for mapped 'direction(s)' and
3605 	   * next node's bands for non-mapped direction(s).
3606 	   */
3607 	  tmp_imin = pn_min_m[cp9map->nd2lpos[n]];
3608 	  tmp_imax = pn_max_m[cp9map->nd2lpos[n]];
3609 	  tmp_jmin = pn_min_m[cp9map->nd2rpos[n]];
3610 	  tmp_jmax = pn_max_m[cp9map->nd2rpos[n]];
3611 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3612 						   tmp_jmax, imin, imax, jmin, jmax,
3613 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3614 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3615 	  					safe_jmin);
3616 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3617 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3618 
3619 	  v++; /*MATP_ML*/
3620 	  /* Determine implied v bands using hmm for mapped 'direction(s)' and
3621 	   * next node's bands for non-mapped direction(s).
3622 	   */
3623 	  tmp_imin = pn_min_m[cp9map->nd2lpos[n]];
3624 	  tmp_imax = pn_max_m[cp9map->nd2lpos[n]];
3625 	  /* 12.19.05 - trying to deal with the right delete off-by-one
3626 	   * inverted relative to left delete issue.
3627 	   */
3628 	  tmp_jmin = (pn_min_d[cp9map->nd2rpos[n]] < nss_jmin[n+1]) ?
3629 	    pn_min_d[cp9map->nd2rpos[n]] : nss_jmin[n+1];
3630 	  tmp_jmax = (pn_max_d[cp9map->nd2rpos[n]] > nss_jmax[n+1]) ?
3631 	    pn_max_d[cp9map->nd2rpos[n]] : nss_jmax[n+1];
3632 
3633 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3634 						   tmp_jmax, imin, imax, jmin, jmax,
3635 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3636 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3637 	  					safe_jmin);
3638 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3639 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3640 
3641 	  v++; /*MATP_MR*/
3642 	  /* this D-left state gets the delete band from the HMM node
3643 	   * that maps to the left side.
3644 	   */
3645 	  tmp_imin = pn_min_d[cp9map->nd2lpos[n]];
3646 	  tmp_imax = pn_max_d[cp9map->nd2lpos[n]];
3647 	  tmp_jmin = pn_min_m[cp9map->nd2rpos[n]];
3648 	  tmp_jmax = pn_max_m[cp9map->nd2rpos[n]];
3649 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3650 						   tmp_jmax, imin, imax, jmin, jmax,
3651 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3652 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3653 	  					safe_jmin);
3654 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3655 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3656 
3657 	  v++; /*MATP_D*/
3658 	  tmp_imin = pn_min_d[cp9map->nd2lpos[n]];
3659 	  tmp_imax = pn_max_d[cp9map->nd2lpos[n]];
3660 	  /* 12.19.05 - trying to deal with the right delete off-by-one
3661 	   * inverted relative to left delete issue.
3662 	   */
3663 	  tmp_jmin = (pn_min_d[cp9map->nd2rpos[n]] < nss_jmin[n+1]) ?
3664 	    pn_min_d[cp9map->nd2rpos[n]] : nss_jmin[n+1];
3665 	  tmp_jmax = (pn_max_d[cp9map->nd2rpos[n]] > nss_jmax[n+1]) ?
3666 	    pn_max_d[cp9map->nd2rpos[n]] : nss_jmax[n+1];
3667 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3668 						   tmp_jmax, imin, imax, jmin, jmax,
3669 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3670 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3671 	  					safe_jmin);
3672 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3673 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3674 	  hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3675 
3676 	  v++; /*MATP_IL*/
3677 	  /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
3678 	  tmp_imin = pn_min_i[cp9map->cs2hn[v][0]]; /* insert states can only map to 1 HMM node */
3679 	  tmp_imax = pn_max_i[cp9map->cs2hn[v][0]]; /* insert states can only map to 1 HMM node */
3680 	  tmp_jmin = nss_jmin[n];
3681 	  tmp_jmax = nss_jmax[n];
3682 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3683 						    tmp_jmax, imin, imax, jmin, jmax);
3684 	  /* Enforce safe transitions, this makes sure that at least one state
3685 	   * y \in C_v is reachable from v. And further (special case for inserts)
3686 	   * make sure that we don't consider v as a possible y.  IF we did, we might
3687 	   * be faced with a situation where v could only transit to itself, and then
3688 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3689 	   * including to itself.
3690 	   */
3691 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3692 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3693 
3694 	  v++; /*MATP_IR*/
3695 	  /* skip detached inserts */
3696 	  if(cp9map->cs2hn[v][0] == -1)
3697 	    continue;
3698 	  /* Special case, one of only two situations (other is ROOT_IR)
3699 	   * we could have come where v is an insert, and a possible
3700 	   * state x that we came from is an insert, but x != y (x can be the MATP_IL).
3701 	   * So we have to determine imin and imax carefully.
3702 	   */
3703 	  tmp_imin = (nss_imin[n] < imin[v-1]) ?
3704 	    nss_imin[n] : imin[v-1];
3705 	  tmp_imax = (nss_imax[n] > imax[v-1]) ?
3706 	    nss_imax[n] : imax[v-1];
3707 	  /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
3708 	  tmp_jmin = pn_min_i[cp9map->cs2hn[v][0]];
3709 	  tmp_jmax = pn_max_i[cp9map->cs2hn[v][0]];
3710 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3711 						    tmp_jmax, imin, imax, jmin, jmax);
3712 	  /* Enforce safe transitions, this makes sure that at least one state
3713 	   * y \in C_v is reachable from v. And further (special case for inserts)
3714 	   * make sure that we don't consider v as a possible y.  IF we did, we might
3715 	   * be faced with a situation where v could only transit to itself, and then
3716 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3717 	   * including to itself.
3718 	   */
3719 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3720 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3721 	  break;
3722 
3723 	case MATL_nd:
3724 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3725 	  hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3726 						 nss_imin, nss_imax, nss_jmin, nss_jmax,
3727 						 pn_min_i, pn_max_i, cp9map);
3728 
3729 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3730 					       nis_imin[n], nis_jmax[n],
3731 					       &safe_imax, &safe_jmin);
3732 	  hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3733 						   nss_imin, nss_imax, nss_jmin, nss_jmax,
3734 						   pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3735 						   cp9map);
3736 
3737 	  /* 3 states MATL_ML, MATL_D, MATL_IL */
3738 	  v = cm->nodemap[n]; /* MATL_ML */
3739 	  tmp_imin = pn_min_m[cp9map->nd2lpos[n]];
3740 	  tmp_imax = pn_max_m[cp9map->nd2lpos[n]];
3741 	  tmp_jmin = nss_jmin[n+1];
3742 	  tmp_jmax = nss_jmax[n+1];
3743 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3744 						   tmp_jmax, imin, imax, jmin, jmax,
3745 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3746 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3747 	  					safe_jmin);
3748 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3749 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3750 
3751 	  v++; /*MATL_D*/
3752 	  /* this D-left state gets the delete band from the HMM node
3753 	   * that maps to the left side.
3754 	   */
3755 	  tmp_imin = pn_min_d[cp9map->nd2lpos[n]];
3756 	  tmp_imax = pn_max_d[cp9map->nd2lpos[n]];
3757 	  tmp_jmin = nss_jmin[n+1];
3758 	  tmp_jmax = nss_jmax[n+1];
3759 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3760 						   tmp_jmax, imin, imax, jmin, jmax,
3761 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3762 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3763 	  					safe_jmin);
3764 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3765 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3766 	  hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3767 
3768 	  v++; /*MATL_IL*/
3769 	  /* skip detached inserts */
3770 	  if(cp9map->cs2hn[v][0] == -1)
3771 	    continue;
3772 	  /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
3773 	  tmp_imin = pn_min_i[cp9map->cs2hn[v][0]];
3774 	  tmp_imax = pn_max_i[cp9map->cs2hn[v][0]];
3775 	  tmp_jmin = nss_jmin[n];
3776 	  tmp_jmax = nss_jmax[n];
3777 
3778 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3779 						    tmp_jmax, imin, imax, jmin, jmax);
3780 	  /* Enforce safe transitions, this makes sure that at least one state
3781 	   * y \in C_v is reachable from v. And further (special case for inserts)
3782 	   * make sure that we don't consider v as a possible y.  IF we did, we might
3783 	   * be faced with a situation where v could only transit to itself, and then
3784 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3785 	   * including to itself.
3786 	   */
3787 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3788 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3789 	  break;
3790 
3791 	case MATR_nd:
3792 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3793 	  hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3794 						 nss_imin, nss_imax, nss_jmin, nss_jmax,
3795 						 pn_min_i, pn_max_i, cp9map);
3796 
3797 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3798 					       nis_imin[n], nis_jmax[n],
3799 					       &safe_imax, &safe_jmin);
3800 	  hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3801 						   nss_imin, nss_imax, nss_jmin, nss_jmax,
3802 						   pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3803 						   cp9map);
3804 
3805 	  /* 3 states MATR_MR, MATR_D, MATR_IR */
3806 	  v = cm->nodemap[n]; /* MATR_MR */
3807 	  tmp_imin = nss_imin[n+1];
3808 	  tmp_imax = nss_imax[n+1];
3809 	  tmp_jmin = pn_min_m[cp9map->nd2rpos[n]];
3810 	  tmp_jmax = pn_max_m[cp9map->nd2rpos[n]];
3811 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3812 						   tmp_jmax, imin, imax, jmin, jmax,
3813 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3814 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3815 	  					safe_jmin);
3816 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3817 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3818 
3819 	  v++; /*MATR_D*/
3820 	  /* this D-left state gets the delete band from the HMM node
3821 	   * that maps to the left side.
3822 	   */
3823 	  tmp_imin = nss_imin[n+1];
3824 	  tmp_imax = nss_imax[n+1];
3825 	  /* 12.19.05 - trying to deal with the right delete off-by-one
3826 	   * inverted relative to left delete issue.
3827 	   */
3828 	  tmp_jmin = (pn_min_d[cp9map->nd2rpos[n]] < nss_jmin[n+1]) ?
3829 	    pn_min_d[cp9map->nd2rpos[n]] : nss_jmin[n+1];
3830 	  tmp_jmax = (pn_max_d[cp9map->nd2rpos[n]] > nss_jmax[n+1]) ?
3831 	    pn_max_d[cp9map->nd2rpos[n]] : nss_jmax[n+1];
3832 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3833 						   tmp_jmax, imin, imax, jmin, jmax,
3834 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3835 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3836 	  					safe_jmin);
3837 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3838 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3839 	  hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3840 
3841 	  v++; /*MATR_IR*/
3842 	  /* skip detached inserts */
3843 	  if(cp9map->cs2hn[v][0] == -1)
3844 	    continue;
3845 	  tmp_imin = nss_imin[n];
3846 	  tmp_imax = nss_imax[n];
3847 	  /* This state maps to the insert state of HMM node cshn_map[v]*/
3848 	  tmp_jmin = pn_min_i[cp9map->cs2hn[v][0]];
3849 	  tmp_jmax = pn_max_i[cp9map->cs2hn[v][0]];
3850 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3851 						    tmp_jmax, imin, imax, jmin, jmax);
3852 	  /* Enforce safe transitions, this makes sure that at least one state
3853 	   * y \in C_v is reachable from v. And further (special case for inserts)
3854 	   * make sure that we don't consider v as a possible y.  IF we did, we might
3855 	   * be faced with a situation where v could only transit to itself, and then
3856 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3857 	   * including to itself.
3858 	   */
3859 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3860 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3861 	  break;
3862 
3863 	case ROOT_nd:
3864 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3865 	  hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3866 						 nss_imin, nss_imax, nss_jmin, nss_jmax,
3867 						 pn_min_i, pn_max_i, cp9map);
3868 
3869 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3870 					       nis_imin[n], nis_jmax[n],
3871 					       &safe_imax, &safe_jmin);
3872 	  hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3873 						   nss_imin, nss_imax, nss_jmin, nss_jmax,
3874 						   pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3875 						   cp9map);
3876 	  /* 3 states, ROOT_S, ROOT_IL, and ROOT_IR*/
3877 	  v = cm->nodemap[n]; /* ROOT_S SPECIAL CASE */
3878 	  if(doing_search) { /* we're doing search, ROOT_S doesn't necessarily emit full sequence */
3879 	    tmp_imin = nss_imin[n+1];
3880 	    tmp_imax = nss_imax[n+1];
3881 	    tmp_jmin = nss_jmin[n+1];
3882 	    tmp_jmax = nss_jmax[n+1];
3883 	  }
3884 	  else { /* we're doing alignment, enforce ROOT_S emits full sequence */
3885 	    /* for now, enforce ROOT_S emits full sequence at end of the function, we'll relax this if doing_search==TRUE */
3886 	    tmp_imin = i0;
3887 	    tmp_imax = i0;
3888 	    tmp_jmin = j0;
3889 	    tmp_jmax = j0;
3890 	  }
3891 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3892 						   tmp_jmax, imin, imax, jmin, jmax,
3893 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3894 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3895 	  					safe_jmin);
3896 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3897 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3898 
3899 	  v++; /*ROOT_IL SPECIAL CASE*/
3900 	  /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0], which is HMM node 0*/
3901 	  if(doing_search)
3902 	  tmp_imin =  pn_min_i[cp9map->cs2hn[v][0]]; /* should this be imin[0]? */
3903 	  else
3904 	    tmp_imin =  i0; /* Have to be able to transit here from ROOT_S */
3905 	  tmp_imax = nss_imax[n+1];
3906 	  if(doing_search) {
3907 	    tmp_jmin = nss_jmin[n+1];
3908 	    tmp_jmax = nss_jmax[n+1];
3909 	  }
3910 	  else {
3911 	    tmp_jmin = j0; /* we never emit to the right in this state */
3912 	    tmp_jmax = j0; /* we never emit to the right in this state */
3913 	  }
3914 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3915 						    tmp_jmax, imin, imax, jmin, jmax);
3916 	  /* Enforce safe transitions, this makes sure that at least one state
3917 	   * y \in C_v is reachable from v. And further (special case for inserts)
3918 	   * make sure that we don't consider v as a possible y.  IF we did, we might
3919 	   * be faced with a situation where v could only transit to itself, and then
3920 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3921 	   * including to itself.
3922 	   */
3923 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3924 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3925 
3926 	  v++; /*ROOT_IR SPECIAL CASE analagous to ROOT_IL*/
3927 	  if(doing_search)
3928 	    tmp_imin = nss_imin[n+1]; /* same tmp_imin as ROOT_S */
3929 	  else
3930 	    tmp_imin = i0; /* we never emit to the left in this state */
3931 	  tmp_imax = nss_imax[n+1];
3932 	  tmp_jmin = nss_jmin[n+1];
3933 	  tmp_jmax = j0; /* Have to be able to transit here from ROOT_S */
3934 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3935 						    tmp_jmax, imin, imax, jmin, jmax);
3936 	  /* Enforce safe transitions, this makes sure that at least one state
3937 	   * y \in C_v is reachable from v. And further (special case for inserts)
3938 	   * make sure that we don't consider v as a possible y.  IF we did, we might
3939 	   * be faced with a situation where v could only transit to itself, and then
3940 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3941 	   * including to itself.
3942 	   */
3943 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3944 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3945 	  break;
3946 
3947 	case BEGL_nd:
3948 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3949 	  hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3950 						 nss_imin, nss_imax, nss_jmin, nss_jmax,
3951 						 pn_min_i, pn_max_i, cp9map);
3952 
3953 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3954 					       nis_imin[n], nis_jmax[n],
3955 					       &safe_imax, &safe_jmin);
3956 	  hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3957 						   nss_imin, nss_imax, nss_jmin, nss_jmax,
3958 						   pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3959 						   cp9map);
3960 	  /* 1 state BEGL_S */
3961 	  v = cm->nodemap[n];
3962 	  /* The next node MUST be a match node (MATP
3963 	   * specifically due to model building
3964 	   * algorithm) or a BIF node. We derive imin, imax,
3965 	   * jmin and jmax from that node.
3966 	   */
3967 	  /* Use the next nodes split set band, which
3968 	   * will be wider of match and delete states bands
3969 	   * for split set states in next node.
3970 	   */
3971 	  tmp_imin = nss_imin[n+1];
3972 	  tmp_imax = nss_imax[n+1];
3973 	  tmp_jmin = nss_jmin[n+1];
3974 	  tmp_jmax = nss_jmax[n+1];
3975 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3976 						   tmp_jmax, imin, imax, jmin, jmax,
3977 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
3978 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3979 	  					safe_jmin);
3980 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3981 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3982 	  hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3983 	  break;
3984 
3985 	case BEGR_nd:
3986 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3987 	  hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3988 						 nss_imin, nss_imax, nss_jmin, nss_jmax,
3989 						 pn_min_i, pn_max_i, cp9map);
3990 
3991 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3992 					       nis_imin[n], nis_jmax[n],
3993 					       &safe_imax, &safe_jmin);
3994 	  hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3995 						   nss_imin, nss_imax, nss_jmin, nss_jmax,
3996 						   pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3997 						   cp9map);
3998 	  /* 2 states BEGR_S and BEGR_IL */
3999 	  v = cm->nodemap[n]; /*BEGR_S*/
4000 	  /* Use either the next nodes split set band, which
4001 	   * will be wider of match and delete states bands
4002 	   * for split set states in next node OR
4003 	   * the band on the insert state that maps to the
4004 	   * BEGR_IL, erring on the safe side (wider band).
4005 	   */
4006 	  tmp_imin = nss_imin[n+1];
4007 	  tmp_imax = nss_imax[n+1];
4008 	  if(pn_min_i[cp9map->cs2hn[v+1][0]] < tmp_imin)
4009 	    tmp_imin = pn_min_i[cp9map->cs2hn[v+1][0]];
4010 	  if(pn_max_i[cp9map->cs2hn[v+1][0]] > tmp_imax)
4011 	    tmp_imax = pn_max_i[cp9map->cs2hn[v+1][0]];
4012 	  tmp_jmin = nss_jmin[n+1];
4013 	  tmp_jmax = nss_jmax[n+1];
4014 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
4015 						   tmp_jmax, imin, imax, jmin, jmax,
4016 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
4017 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
4018 	  					safe_jmin);
4019 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
4020 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
4021 	  hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
4022 
4023 	  v++; /*BEGR_IL*/
4024 	  /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
4025 	  tmp_imin = pn_min_i[cp9map->cs2hn[v][0]];
4026 	  tmp_imax = pn_max_i[cp9map->cs2hn[v][0]];
4027 	  tmp_jmin = nss_jmin[n+1];
4028 	  tmp_jmax = nss_jmax[n+1];
4029 	  hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
4030 						    tmp_jmax, imin, imax, jmin, jmax);
4031 	  /* Enforce safe transitions, this makes sure that at least one state
4032 	   * y \in C_v is reachable from v. And further (special case for inserts)
4033 	   * make sure that we don't consider v as a possible y.  IF we did, we might
4034 	   * be faced with a situation where v could only transit to itself, and then
4035 	   * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
4036 	   * including to itself.
4037 	   */
4038 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
4039 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
4040 	  break;
4041 
4042 	case BIF_nd:
4043 	  hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
4044 
4045 	  /* 1 state BIF_B */
4046 	  v = cm->nodemap[n]; /*BIF_B*/
4047 	  /* The only two connected states are BEGL_S and BEGR_S.
4048 	   * We can derive our imin, imax, jmin, and jmax from
4049 	   * those two states.
4050 	   * cm->cfirst[v] is the state index of the left child.
4051 	   * cm->cnum[v] is the state index of the right child.
4052 	   */
4053 	  nis_imin[n] = imin[cm->cfirst[v]];
4054 	  nis_imax[n] = imax[cm->cfirst[v]];
4055 	  nis_jmin[n] = jmin[cm->cnum[v]];
4056 	  nis_jmax[n] = jmax[cm->cnum[v]];
4057 
4058 	  nss_imin[n] = imin[cm->cfirst[v]];
4059 	  nss_imax[n] = imax[cm->cfirst[v]];
4060 	  nss_jmin[n] = jmin[cm->cnum[v]];
4061 	  nss_jmax[n] = jmax[cm->cnum[v]];
4062 
4063 	  hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
4064 					       nis_imin[n], nis_jmax[n],
4065 					       &safe_imax, &safe_jmin);
4066 	  tmp_imin = imin[cm->cfirst[v]];
4067 	  tmp_imax = imax[cm->cfirst[v]];
4068 	  tmp_jmin = jmin[cm->cnum[v]];
4069 	  tmp_jmax = jmax[cm->cnum[v]];
4070 	  hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
4071 						   tmp_jmax, imin, imax, jmin, jmax,
4072 						   nss_imin, nss_imax, nss_jmin, nss_jmax);
4073 	  hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
4074 	  					safe_jmin);
4075 	  hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
4076 	  hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
4077 	  hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
4078 	  break;
4079 	}
4080     }
4081 
4082    /* Tie up some loose ends:
4083     * 1. Ensure that all valid i are >= i0 and all valid j are <= j0
4084     * 2. Ensure all bands have bandwidth >= 0 (see code)
4085     * 3. Set detached inserts states to imin=imax=jmin=jmax=i0 to avoid
4086     *    problems in downstream functions. These states WILL NEVER BE ENTERED
4087     * 4. Do a quick check to make sure we've assigned the bands
4088     *    on i and j for all states to positive values (none were
4089     *    left as -1 EXCEPT for end states which should have i bands left as -1).
4090     * 5. Ensure imin[0] <= imin[v] for all v and jmax[0] >= jmax[v] for all v.
4091     * 6. If doing_search==TRUE, rewrite the bands on the
4092     *    ROOT_S state so they allow any possible transition to a child
4093     *    that the child's bands would allow.
4094     */
4095 
4096    /* 1. Ensure that all valid i are >= i0 and all valid j are <= j0 */
4097    for(v = 0; v < cm->M; v++) {
4098      imin[v] = ESL_MAX(imin[v], i0); /* imin[v] can't be less than i0 */
4099      imax[v] = ESL_MAX(imax[v], i0); /* imax[v] can't be less than i0 */
4100 
4101      imin[v] = ESL_MIN(imin[v], j0); /* imin[v] can't be more than j0 */
4102      imax[v] = ESL_MIN(imax[v], j0); /* imax[v] can't be more than j0 */
4103 
4104      imax[v] = ESL_MIN(imax[v], j0); /* imax[v] can't be more than j0 */
4105 
4106      jmin[v] = ESL_MIN(jmin[v], j0); /* jmin[v] can't be more than j0 */
4107      jmax[v] = ESL_MIN(jmax[v], j0); /* jmax[v] can't be more than j0 */
4108 
4109      jmin[v] = ESL_MAX(jmin[v], i0); /* jmin[v] can't be less than i0 */
4110      jmax[v] = ESL_MAX(jmax[v], i0); /* jmax[v] can't be less than i0 */
4111 
4112      /* 2. Ensure all bands have bandwidth >= 0
4113       * Ensure: jmax[v] - jmin[v] + 1 >= 0
4114       *         imax[v] - imin[v] + 1 >= 0
4115       * jmax[v] - jmin[v] + 1 == 0 means there are no valid j's for state v,
4116       * so state v is not allowed to be in the parse, we allow this (maybe we shouldn't)
4117       */
4118      imax[v] = ESL_MAX(imax[v], imin[v]-1);
4119      jmin[v] = ESL_MIN(jmin[v], jmax[v]+1);
4120 
4121      /* 3. Set detached inserts states to imin=imax=jmin=jmax=i0 to avoid
4122       *    problems in downstream functions. These states WILL NEVER BE ENTERED
4123       */
4124      if(cm->sttype[v+1] == E_st) imin[v] = imax[v] = jmin[v] = jmax[v] = i0;
4125 
4126      /* 4. Do a quick check to make sure we've assigned the bands
4127       *    on i and j for all states to positive values (none were
4128       *    left as -1 EXCEPT for end states which should have i bands left as -1).
4129       */
4130      ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imin[v] == -1))));
4131      ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imax[v] == -1))));
4132      ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmin[v] == -1))));
4133      ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmax[v] == -1))));
4134 
4135      /* 5. Ensure imin[0] <= imin[v] for all v and jmax[0] >= jmax[v] for all v. */
4136      imin[0] = ESL_MIN(imin[0], imin[v]);
4137      jmax[0] = ESL_MAX(jmax[0], jmax[v]);
4138    }
4139 
4140    /* 6. If doing_search==TRUE, rewrite the bands on the
4141     *    ROOT_S state so they allow any possible transition to a child
4142     *    that the child's bands would allow.
4143     */
4144    if(doing_search) {
4145      /* First look at children of 0 (these probs will be 0. if local begins on, but it doesn't matter for our purposes here) */
4146      for (yoffset = 0; yoffset < cm->cnum[0]; yoffset++) {
4147        y = cm->cnum[0] + yoffset;
4148        imin[0] = ESL_MIN(imin[0], imin[y]);
4149        imax[0] = ESL_MAX(imax[0], imax[y]);
4150        jmin[0] = ESL_MIN(jmin[0], jmin[y]);
4151        jmax[0] = ESL_MAX(jmax[0], jmax[y]);
4152      }
4153      /* now for possible local begins */
4154      if(cm->flags & CMH_LOCAL_BEGIN) {
4155        for (y = 1; y < cm->M; y++) {
4156 	 if(NOT_IMPOSSIBLE(cm->beginsc[y])) {
4157 	   imin[0] = ESL_MIN(imin[0], imin[y]);
4158 	   imax[0] = ESL_MAX(imax[0], imax[y]);
4159 	   jmin[0] = ESL_MIN(jmin[0], jmin[y]);
4160 	   jmax[0] = ESL_MAX(jmax[0], jmax[y]);
4161 	 }
4162        }
4163      }
4164    }
4165   /* Final, exceedingly rare, special case */
4166   if(i0 == j0) { /* special case that breaks DP recursion for MP states
4167 		  * b/c target seq is length 1, and all MPs are impossible,
4168 		  * yet above code just forced jmin[v] <= j0 and jmax[v] <= j0,
4169 		  * which says that MPs are possible.
4170 		  */
4171     for(v = 0; v < cm->M; v++) {
4172       if(cm->sttype[v] == MP_st) {
4173 	jmin[v] = j0+1;
4174 	jmax[v] = j0;
4175 	/* now 'for (j = jmin[v]; j <= jmax[v]; j++)' { loops will never be entered, b/c jmin[v] == 2, jmax[v] == 1 */
4176       }
4177     }
4178   }
4179 
4180 #if 0
4181   /* OLD CODE EPN, Fri Dec 21 09:14:32 2007 */
4182    /* Tie up some loose ends:
4183     * 1. Set detached inserts states to imin=imax=jmin=jmax=i0 to avoid
4184     *    problems in downstream functions. These states WILL NEVER BE ENTERED
4185     * 2. Do a quick check to make sure we've assigned the bands
4186     *    on i and j for all states to positive values (none were
4187     *    left as -1 EXCEPT for end states which should have i bands left as -1).
4188     * 3. Ensure that all *max[v] and *min[v] values are <= L, values greater
4189     *    than this don't make sense.
4190     */
4191 
4192   for(v = 0; v < cm->M; v++) {
4193     /* set bands for detached inserts */
4194     if(cm->sttype[v+1] == E_st) imin[v] = imax[v] = jmin[v] = jmax[v] = i0;
4195 
4196     /* Ensure: for all i imin[v]..i..imax[v]
4197      *             i0 <= i <= j0+1
4198      *         for all j jmin[v]..j..jmax[v]
4199      *             i0 <= j <= j0
4200      * Note: i can be j0+1 to allow delete states to be entered with
4201      * d = 0, after the entire seq has been emitted.
4202      */
4203     imin[v] = ESL_MAX(imin[v], i0);
4204     imin[v] = ESL_MIN(imin[v], j0+1);
4205     imax[v] = ESL_MAX(imax[v], i0);
4206     imax[v] = ESL_MIN(imax[v], j0+1);
4207     jmin[v] = ESL_MAX(jmin[v], i0);
4208     jmin[v] = ESL_MIN(jmin[v], j0);
4209     jmax[v] = ESL_MAX(jmax[v], i0);
4210     jmax[v] = ESL_MIN(jmax[v], j0);
4211 
4212     /* Ensure: for all v imin[v] >= imin[0],
4213      *                   jmax[v] <= jmax[0].
4214      */
4215     imin[v] = ESL_MAX(imin[v], imin[0]);
4216     imax[v] = ESL_MAX(imax[v], imin[0]);
4217     jmax[v] = ESL_MIN(jmax[v], jmax[0]);
4218     jmin[v] = ESL_MIN(jmin[v], jmax[0]);
4219 
4220     /* Ensure: jmax[v] - jmin[v] + 1 >= 0
4221      *         imax[v] - imin[v] + 1 >= 0
4222      * jmax[v] - jmin[v] + 1 == 0 means there are no valid j's for state v,
4223      * so state v is not allowed to be in the parse, we allow this (maybe we shouldn't)
4224      */
4225     imin[v] = ESL_MIN(imin[v], imax[v]+1);
4226     jmin[v] = ESL_MIN(jmin[v], jmax[v]+1);
4227 
4228     ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imin[v] == -1))));
4229     ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imax[v] == -1))));
4230     ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmin[v] == -1))));
4231     ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmax[v] == -1))));
4232   }
4233 #endif
4234 
4235   /* debug_print_ij_bands(cm); */
4236 
4237   free(nss_imin);
4238   free(nss_imax);
4239   free(nss_jmin);
4240   free(nss_jmax);
4241   free(nis_imin);
4242   free(nis_imax);
4243   free(nis_jmin);
4244   free(nis_jmax);
4245   free(nss_max_imin);
4246   free(nss_min_jmax);
4247   return eslOK;
4248 
4249  ERROR:
4250   ESL_FAIL(status, errbuf, "Memory allocation error.\n");
4251 }
4252 
4253 /**************************************************************************
4254  * Helper functions for *_cp9_HMM2ijBands_OLD()
4255  *  hmm2ij_prestate_step0_initialize()
4256  *  hmm2ij_prestate_step1_set_node_inserts()
4257  *  hmm2ij_prestate_step2_determine_safe()
4258  *  hmm2ij_prestate_step3_preset_node_splits()
4259  *  hmm2ij_split_state_step1_set_state_bands()
4260  *  hmm2ij_insert_state_step1_set_state_bands()
4261  *  hmm2ij_state_step2_enforce_safe_trans()
4262  *  hmm2ij_state_step5_non_emitter_d0_hack()
4263  */
4264 
4265 /*****************************************************************************
4266  * EPN 12.21.05
4267  * Function: hmm2ij_prestate_step0_initialize
4268  *
4269  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4270  *
4271  *****************************************************************************/
4272 void
hmm2ij_prestate_step0_initialize(int n,int * nss_max_imin,int * nss_min_jmax,int i0,int j0)4273 hmm2ij_prestate_step0_initialize(int n, int *nss_max_imin, int *nss_min_jmax, int i0, int j0)
4274 {
4275   nss_max_imin[n] = i0-1;
4276   nss_min_jmax[n] = j0;
4277 }
4278 
4279 /*****************************************************************************
4280  * EPN 12.21.05
4281  * Function: hmm2ij_prestate_step1_set_node_inserts
4282  *
4283  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4284  *
4285  *****************************************************************************/
4286 void
hmm2ij_prestate_step1_set_node_inserts(int n,int * nis_imin,int * nis_imax,int * nis_jmin,int * nis_jmax,int * nss_imin,int * nss_imax,int * nss_jmin,int * nss_jmax,int * pn_min_i,int * pn_max_i,CP9Map_t * cp9map)4287 hmm2ij_prestate_step1_set_node_inserts(int n, int *nis_imin, int *nis_imax,
4288 				       int *nis_jmin, int *nis_jmax,
4289 				       int *nss_imin, int *nss_imax,
4290 				       int *nss_jmin, int *nss_jmax,
4291 				       int *pn_min_i, int *pn_max_i,
4292 				       CP9Map_t *cp9map)
4293 
4294 {
4295   if(cp9map->nd2lpos[n] != -1)
4296     {
4297       nis_imin[n] = pn_min_i[cp9map->nd2lpos[n]];
4298       nis_imax[n] = pn_max_i[cp9map->nd2lpos[n]];
4299     }
4300   else
4301     {
4302       nis_imin[n] = nss_imin[n+1];
4303       nis_imax[n] = nss_imax[n+1];
4304     }
4305   if(cp9map->nd2rpos[n] != -1)
4306     {
4307       nis_jmin[n] = pn_min_i[cp9map->nd2rpos[n]];
4308       nis_jmax[n] = pn_max_i[cp9map->nd2rpos[n]];
4309     }
4310   else
4311     {
4312       nis_jmin[n] = nss_jmin[n+1];
4313       nis_jmax[n] = nss_jmax[n+1];
4314     }
4315 }
4316 /*****************************************************************************
4317  * EPN 12.21.05
4318  * Function: hmm2ij_prestate_step1_set_node_inserts
4319  *
4320  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4321  *
4322  *****************************************************************************/
4323 void
hmm2ij_prestate_step2_determine_safe(int n,int nss_max_imin_np1,int nss_min_jmax_np1,int nis_imin_n,int nis_jmax_n,int * safe_imax,int * safe_jmin)4324 hmm2ij_prestate_step2_determine_safe(int n,
4325 				     int nss_max_imin_np1, int nss_min_jmax_np1,
4326 				     int nis_imin_n,
4327 				     int nis_jmax_n,
4328 				     int *safe_imax, int *safe_jmin)
4329 {
4330   *safe_imax = (nss_max_imin_np1 < nis_imin_n) ?
4331     nss_max_imin_np1 : nis_imin_n;
4332   *safe_jmin = (nss_min_jmax_np1 > nis_jmax_n) ?
4333     nss_min_jmax_np1 : nis_jmax_n;
4334 }
4335 /*****************************************************************************
4336  * EPN 12.21.05
4337  * Function: hmm2ij_prestate_step1_set_node_inserts
4338  *
4339  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4340  *
4341  *****************************************************************************/
4342 void
hmm2ij_prestate_step3_preset_node_splits(int n,int * nis_imin,int * nis_imax,int * nis_jmin,int * nis_jmax,int * nss_imin,int * nss_imax,int * nss_jmin,int * nss_jmax,int * pn_min_m,int * pn_max_m,int * pn_min_d,int * pn_max_d,CP9Map_t * cp9map)4343 hmm2ij_prestate_step3_preset_node_splits(int n, int *nis_imin, int *nis_imax,
4344 					 int *nis_jmin, int *nis_jmax,
4345 					 int *nss_imin, int *nss_imax,
4346 					 int *nss_jmin, int *nss_jmax,
4347 					 int *pn_min_m, int *pn_max_m,
4348 					 int *pn_min_d, int *pn_max_d,
4349 					 CP9Map_t *cp9map)
4350 {
4351   if(cp9map->nd2lpos[n] != -1)
4352     {
4353       nss_imin[n] = (pn_min_m[cp9map->nd2lpos[n]] < (pn_min_d[cp9map->nd2lpos[n]])) ?
4354 	pn_min_m[cp9map->nd2lpos[n]] : (pn_min_d[cp9map->nd2lpos[n]]);
4355       nss_imax[n] = (pn_max_m[cp9map->nd2lpos[n]] > (pn_max_d[cp9map->nd2lpos[n]])) ?
4356 	pn_max_m[cp9map->nd2lpos[n]] : (pn_max_d[cp9map->nd2lpos[n]]);
4357     }
4358   else
4359     {
4360       nss_imin[n] = nss_imin[n+1];
4361       nss_imax[n] = nss_imax[n+1];
4362     }
4363   if(cp9map->nd2rpos[n] != -1)
4364     {
4365       nss_jmin[n] = (pn_min_m[cp9map->nd2rpos[n]] < pn_min_d[cp9map->nd2rpos[n]]) ?
4366 	pn_min_m[cp9map->nd2rpos[n]] : pn_min_d[cp9map->nd2rpos[n]];
4367       nss_jmax[n] = (pn_max_m[cp9map->nd2rpos[n]] > pn_max_d[cp9map->nd2rpos[n]]) ?
4368 	pn_max_m[cp9map->nd2rpos[n]] : pn_max_d[cp9map->nd2rpos[n]];
4369     }
4370   else
4371     {
4372       nss_jmin[n] = nss_jmin[n+1];
4373       nss_jmax[n] = nss_jmax[n+1];
4374     }
4375 }
4376 
4377 /*****************************************************************************
4378  * EPN 12.21.05
4379  * Function: hmm2ij_split_state_step1_set_state_bands
4380  *
4381  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4382  *
4383  *****************************************************************************/
4384 void
hmm2ij_split_state_step1_set_state_bands(int v,int n,int tmp_imin,int tmp_imax,int tmp_jmin,int tmp_jmax,int * imin,int * imax,int * jmin,int * jmax,int * nss_imin,int * nss_imax,int * nss_jmin,int * nss_jmax)4385 hmm2ij_split_state_step1_set_state_bands(int v, int n,
4386 					 int tmp_imin, int tmp_imax,
4387 					 int tmp_jmin, int tmp_jmax,
4388 					 int *imin, int *imax, int *jmin, int *jmax,
4389 					 int *nss_imin, int *nss_imax,
4390 					 int *nss_jmin, int *nss_jmax)
4391 {
4392   imin[v] = tmp_imin;
4393   imax[v] = tmp_imax;
4394   jmin[v] = tmp_jmin;
4395   jmax[v] = tmp_jmax;
4396   if(imin[v] < nss_imin[n])
4397     nss_imin[n] = imin[v];
4398   if(imax[v] > nss_imax[n])
4399     nss_imax[n] = imax[v];
4400   if(jmin[v] < nss_jmin[n])
4401     nss_jmin[n] = jmin[v];
4402   if(jmax[v] > nss_jmax[n])
4403     nss_jmax[n] = jmax[v];
4404 
4405 }
4406 /*****************************************************************************
4407  * EPN 12.21.05
4408  * Function: hmm2ij_prestate_step1_set_node_inserts
4409  *
4410  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4411  *
4412  *****************************************************************************/
hmm2ij_insert_state_step1_set_state_bands(int v,int tmp_imin,int tmp_imax,int tmp_jmin,int tmp_jmax,int * imin,int * imax,int * jmin,int * jmax)4413 void hmm2ij_insert_state_step1_set_state_bands(int v,
4414 					       int tmp_imin, int tmp_imax,
4415 					       int tmp_jmin, int tmp_jmax,
4416 					       int *imin, int *imax, int *jmin, int *jmax)
4417 {
4418   imin[v] = tmp_imin;
4419   imax[v] = tmp_imax;
4420   jmin[v] = tmp_jmin;
4421   jmax[v] = tmp_jmax;
4422 }
4423 /*****************************************************************************
4424  * EPN 12.21.05
4425  * Function: hmm2ij_state_step2_enforce_safe_trans
4426  *
4427  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4428  *
4429  *****************************************************************************/
4430 void
hmm2ij_state_step2_enforce_safe_trans(CM_t * cm,int v,int n,int * imax,int * jmin,int * nss_imax,int * nss_jmin,int safe_imax,int safe_jmin)4431 hmm2ij_state_step2_enforce_safe_trans(CM_t *cm, int v, int n, int *imax, int *jmin,
4432 				      int *nss_imax, int *nss_jmin,
4433 				      int safe_imax, int safe_jmin)
4434 {
4435   int dv_l;
4436   int dv_r;
4437   if((cm->sttype[v] == ML_st) ||
4438      (cm->sttype[v] == IL_st) ||
4439      (cm->sttype[v] == MP_st))
4440     dv_l = 1;
4441   else
4442     dv_l = 0;
4443   if((cm->sttype[v] == MR_st) ||
4444      (cm->sttype[v] == IR_st) ||
4445      (cm->sttype[v] == MP_st))
4446     dv_r = 1;
4447   else
4448     dv_r = 0;
4449   if(imax[v] < safe_imax - dv_l)
4450     {
4451       imax[v] = safe_imax - dv_l;
4452       if(imax[v] > nss_imax[n])
4453 	nss_imax[n] = imax[v];
4454     }
4455   if(jmin[v] > safe_jmin + dv_r)
4456     {
4457       jmin[v] = safe_jmin + dv_r;
4458       if(jmin[v] < nss_jmin[n])
4459 	nss_jmin[n] = jmin[v];
4460     }
4461 }
4462 
4463 /*****************************************************************************
4464  * EPN 12.21.05
4465  * Function: hmm2ij_state_step3_enforce_state_delta
4466  *
4467  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4468  *
4469  *****************************************************************************/
4470 void
hmm2ij_state_step3_enforce_state_delta(CM_t * cm,int v,int * jmin,int * jmax)4471 hmm2ij_state_step3_enforce_state_delta(CM_t *cm, int v, int *jmin, int *jmax)
4472 {
4473   int dv_l;
4474   int dv_r;
4475   if((cm->sttype[v] == ML_st) ||
4476      (cm->sttype[v] == IL_st) ||
4477      (cm->sttype[v] == MP_st))
4478     dv_l = 1;
4479   else
4480     dv_l = 0;
4481   if((cm->sttype[v] == MR_st) ||
4482      (cm->sttype[v] == IR_st) ||
4483      (cm->sttype[v] == MP_st))
4484     dv_r = 1;
4485   else
4486     dv_r = 0;
4487   if(jmin[v] < (dv_l + dv_r))
4488      jmin[v] = dv_l + dv_r;
4489   if(jmax[v] < (dv_l + dv_r))
4490     jmax[v] = dv_l + dv_r;
4491 }
4492 /*****************************************************************************
4493  * EPN 12.21.05
4494  * Function: hmm2ij_state_step4_update_safe_holders
4495  *
4496  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4497  *
4498  *****************************************************************************/
4499 void
hmm2ij_state_step4_update_safe_holders(int v,int n,int imin_v,int jmax_v,int * nss_max_imin,int * nss_min_jmax)4500 hmm2ij_state_step4_update_safe_holders(int v, int n, int imin_v, int jmax_v, int *nss_max_imin,
4501 				       int *nss_min_jmax)
4502 {
4503   if(imin_v > nss_max_imin[n])
4504     nss_max_imin[n] = imin_v;
4505   if(jmax_v < nss_min_jmax[n])
4506     nss_min_jmax[n] = jmax_v;
4507 }
4508 
4509 /*****************************************************************************
4510  * EPN 12.21.05
4511  * Function: hmm2ij_state_step5_non_emitter_d0_hack
4512  *
4513  * Purpose:  cp9_HMM2ijBands_OLD*() function helper function.
4514  *
4515  *****************************************************************************/
4516 void
hmm2ij_state_step5_non_emitter_d0_hack(int v,int imax_v,int * jmin)4517 hmm2ij_state_step5_non_emitter_d0_hack(int v, int imax_v, int *jmin)
4518 {
4519   /* allow for possibility that d=0 for delete states*/
4520   if(jmin[v] <= imax_v && jmin[v] > 0)
4521     jmin[v]--;
4522   /* if imax = L, allow possibility for
4523   if(imax[v] == Limax_v && jmin[v] > 0)
4524   jmin[v]--;*/
4525 }
4526 
4527 /* Function: cp9_ShiftCMBands()
4528  *
4529  * Description: Given a CM with a valid cm->cp9b CP9 bands object
4530  *              calculated for a sequence in coordinates 1..i..j..L,
4531  *              subtract a fixed offset (i-1) from all CM positions
4532  *              in cp9b (cp9b->imin, cp9b->imax, cp9b->jmin, cp9b->jmax)
4533  *              so the bands will now pertain to the same hit if
4534  *              its coordinates were shifted to 1..j-i+1. This is used
4535  *              prior to alignment of a pre-defined hit from i..j
4536  *              using bands calculated when i..j was within its larger
4537  *              context of 1..i..j..L. During alignment hits always
4538  *              start at position 1.
4539  *
4540  *              Because only positions i..j are possible in the subsequent
4541  *              alignment, bands that allow residues before i or after
4542  *              j are tightened to only include within i..j. This will
4543  *              make states that were possible to reach only with residues
4544  *              before i or after j now impossible to reach.
4545  *
4546  *              Once all i and j bands are updated, ij2d_bands()
4547  *              is used to update the d bands.
4548  *
4549  *              NOTE: after calling this function cp9b will fail
4550  *              a cp9_ValidateBands() function call.
4551  *
4552  * Args:        CM - the CM, with a valid cm->cp9b CP9Bands_t object
4553  *              i  - first position of hit, this is the offset
4554  *              j  - final position of hit, used only to determine hit length
4555  * Returns: (void)
4556  */
4557 void
cp9_ShiftCMBands(CM_t * cm,int i,int j,int do_trunc)4558 cp9_ShiftCMBands(CM_t *cm, int i, int j, int do_trunc)
4559 {
4560   int v;
4561   int ip = i-1;
4562   int Lp = j-i+1;
4563   int sd, sdl, sdr;
4564   int min_i, max_i, min_j, max_j;
4565 
4566 #if eslDEBUGLEVEL >= 1
4567   printf("#DEBUG: cp9_ShiftCMBands(), i: %d j: %d Lp: %d\n", i, j, Lp);
4568 #endif
4569 
4570   for(v = 0; v < cm->M; v++) {
4571     sd  = StateDelta(cm->sttype[v]);
4572     sdl = StateLeftDelta(cm->sttype[v]);
4573     sdr = StateRightDelta(cm->sttype[v]);
4574     if(cm->cp9b->imin[v] > 0) { /* state is currently possible to reach */
4575       min_i = 1;
4576       max_i = do_trunc ? Lp+1-ESL_MAX(sdl, sdr) : Lp+1-sd;
4577       /* careful! if do_trunc, d can be 1 for MP states, so i can be
4578        * at most Lp. Note: d can't be 0 for ML/IL in R mode, MR/IR in
4579        * L mode even though you might think it could be. We'll always
4580        * do a truncated begin with d=1 for L,R marginal alignments.
4581        *
4582        * If ! do_trunc, then d must be at least sd for all states, hence
4583        * the max i of Lp+1-sd.
4584        *
4585        * This is bug i37, one of the two bugs in 1.1rc2, which previously
4586        * had this as 'max_i = Lp'.
4587        */
4588 
4589       min_j = do_trunc ? ESL_MAX(sdl, sdr) : sd;
4590       max_j = ESL_MAX(Lp, min_j);
4591       /* if (do_trunc) d can be 1 for MP states, this is why we use
4592        * ESL_MAX() call for min_j above.  Note: d can't be 0 for ML/IL
4593        * in R mode, MR/IR in L mode even though you might think it
4594        * could be. We'll always do a truncated begin with d=1 for L,R
4595        * marginal alignments. */
4596 
4597       cm->cp9b->imin[v] = ESL_MAX(cm->cp9b->imin[v] - ip, min_i);
4598       cm->cp9b->imax[v] = ESL_MIN(cm->cp9b->imax[v] - ip, max_i);
4599 
4600       cm->cp9b->jmin[v] = ESL_MAX(cm->cp9b->jmin[v] - ip, min_j);
4601       cm->cp9b->jmax[v] = ESL_MIN(cm->cp9b->jmax[v] - ip, max_j);
4602 
4603       if(cm->cp9b->imax[v] <  min_i || cm->cp9b->jmax[v] < min_j ||
4604 	 cm->cp9b->imin[v] >  max_i || cm->cp9b->jmin[v] > max_j) {
4605 	/* this state is now impossible to reach */
4606 	cm->cp9b->imin[v] = cm->cp9b->jmin[v] = -1;
4607 	cm->cp9b->imax[v] = cm->cp9b->jmax[v] = -2;
4608       }
4609     }
4610   }
4611   ij2d_bands(cm, Lp, cm->cp9b->imin, cm->cp9b->imax, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, do_trunc, 0);
4612   /* Note that this will not update hdmin bands that are no longer within jmin..jmax, that's okay */
4613 
4614   return;
4615 }
4616 
4617 /* Function: cp9_CloneBands()
4618  *
4619  * Description: Clone a CP9Bands_t *cp9b object and return it.
4620  *
4621  * Args:    cp9b - the CP9Bands_t object to clone
4622  *
4623  * Returns: the clone CP9Bands_t object.
4624  */
4625 CP9Bands_t *
cp9_CloneBands(CP9Bands_t * src_cp9b,char * errbuf)4626 cp9_CloneBands(CP9Bands_t *src_cp9b, char *errbuf)
4627 {
4628   int status;
4629   CP9Bands_t *dest_cp9b = NULL;
4630   dest_cp9b = AllocCP9Bands(src_cp9b->cm_M, src_cp9b->hmm_M);
4631 
4632   esl_vec_ICopy(src_cp9b->pn_min_m,  src_cp9b->hmm_M+1, dest_cp9b->pn_min_m);
4633   esl_vec_ICopy(src_cp9b->pn_max_m,  src_cp9b->hmm_M+1, dest_cp9b->pn_max_m);
4634   esl_vec_ICopy(src_cp9b->pn_min_i,  src_cp9b->hmm_M+1, dest_cp9b->pn_min_i);
4635   esl_vec_ICopy(src_cp9b->pn_max_i,  src_cp9b->hmm_M+1, dest_cp9b->pn_max_i);
4636   esl_vec_ICopy(src_cp9b->pn_min_d,  src_cp9b->hmm_M+1, dest_cp9b->pn_min_d);
4637   esl_vec_ICopy(src_cp9b->pn_max_d,  src_cp9b->hmm_M+1, dest_cp9b->pn_max_d);
4638   esl_vec_ICopy(src_cp9b->isum_pn_m, src_cp9b->hmm_M+1, dest_cp9b->isum_pn_m);
4639   esl_vec_ICopy(src_cp9b->isum_pn_i, src_cp9b->hmm_M+1, dest_cp9b->isum_pn_i);
4640   esl_vec_ICopy(src_cp9b->isum_pn_d, src_cp9b->hmm_M+1, dest_cp9b->isum_pn_d);
4641 
4642   dest_cp9b->sp1 = src_cp9b->sp1;
4643   dest_cp9b->ep1 = src_cp9b->ep1;
4644   dest_cp9b->sp2 = src_cp9b->sp2;
4645   dest_cp9b->ep2 = src_cp9b->ep2;
4646 
4647   dest_cp9b->thresh1 = src_cp9b->thresh1;
4648   dest_cp9b->thresh2 = src_cp9b->thresh2;
4649 
4650   dest_cp9b->Rmarg_imin = src_cp9b->Rmarg_imin;
4651   dest_cp9b->Rmarg_imax = src_cp9b->Rmarg_imax;
4652   dest_cp9b->Lmarg_jmin = src_cp9b->Lmarg_jmin;
4653   dest_cp9b->Lmarg_jmax = src_cp9b->Lmarg_jmax;
4654 
4655   esl_vec_ICopy(src_cp9b->Jvalid, (src_cp9b->cm_M+1), dest_cp9b->Jvalid);
4656   esl_vec_ICopy(src_cp9b->Lvalid, (src_cp9b->cm_M+1), dest_cp9b->Lvalid);
4657   esl_vec_ICopy(src_cp9b->Rvalid, (src_cp9b->cm_M+1), dest_cp9b->Rvalid);
4658   esl_vec_ICopy(src_cp9b->Tvalid, (src_cp9b->cm_M+1), dest_cp9b->Tvalid);
4659 
4660   esl_vec_ICopy(src_cp9b->imin, src_cp9b->cm_M, dest_cp9b->imin);
4661   esl_vec_ICopy(src_cp9b->imax, src_cp9b->cm_M, dest_cp9b->imax);
4662   esl_vec_ICopy(src_cp9b->jmin, src_cp9b->cm_M, dest_cp9b->jmin);
4663   esl_vec_ICopy(src_cp9b->jmax, src_cp9b->cm_M, dest_cp9b->jmax);
4664 
4665   if(src_cp9b->hd_alloced > 0) {
4666     /* set hdmin, hdmax ptrs and hd_needed and hd_alloced (all set in cp9GrowHDBands()) */
4667     if((status = cp9_GrowHDBands(dest_cp9b, errbuf)) != eslOK) goto ERROR;
4668     esl_vec_ICopy(src_cp9b->hdmin_mem, dest_cp9b->hd_alloced, dest_cp9b->hdmin_mem);
4669     esl_vec_ICopy(src_cp9b->hdmax_mem, dest_cp9b->hd_alloced, dest_cp9b->hdmax_mem);
4670   }
4671 
4672   esl_vec_ICopy(src_cp9b->safe_hdmin, src_cp9b->cm_M, dest_cp9b->safe_hdmin);
4673   esl_vec_ICopy(src_cp9b->safe_hdmax, src_cp9b->cm_M, dest_cp9b->safe_hdmax);
4674 
4675   dest_cp9b->tau = src_cp9b->tau;
4676 
4677   return dest_cp9b;
4678 
4679  ERROR:
4680   if(dest_cp9b != NULL) FreeCP9Bands(dest_cp9b);
4681   return NULL;
4682 }
4683 
4684 /* Function: cp9_PredictStartAndEndPositions()
4685  * Date:     EPN, Tue Sep  6 11:43:18 2011
4686  *
4687  * Purpose: Given a filled HMM posterior matrix and a CP9Bands_t
4688  *          object with valid pn_{min,max}{m,i,d} bands, determine the
4689  *          first and final HMM nodes that have a probability of being
4690  *          occupied that exceeds <cp9b->thresh1> and <cp9b->thresh2>.
4691  *          Store these four values in:
4692  *          <cp9b->sp1>: minimum position that might be used       (p > cp9b->thresh1, typically 0.01)
4693  *          <cp9b->sp2>: minimum position that will likely be used (p > cp9b->thresh2, typically 0.98)
4694  *          <cp9b->ep1>: maximum position that might be used       (p > cp9b->thresh1, typically 0.01)
4695  *          <cp9b->ep2>: maximum position that will likely be used (p > cp9b->thresh2, typically 0.98)
4696  *
4697  *          If no HMM node has an occupancy probability that exceeds
4698  *          <cp9b->thresh2> then sp2 and ep2 are set as out-of-bounds
4699  *          values M+1 and 0 respectively.
4700  *
4701  *          If no HMM node has an occupancy probability that exceeds
4702  *          <cp9b->thresh1> then sp1 and ep1 are set as out-of-bounds
4703  *          values M+1 and 0 respectively, though this should be
4704  *          very rare.
4705  *
4706  *          Using out-of-bounds values means we can't get any
4707  *          information about where the alignment starts and ends from
4708  *          the HMM. This has the effect that in a downstream call to
4709  *          cp9_MarginalCandidatesFromStartEndPositions() all marginal
4710  *          modes will be possible for all states and the eventual
4711  *          alignment will essentially mimic a non-banded one.
4712  *
4713  *          Also determine the CM bands on i and j that
4714  *          will be used to allow for marginal alignments,
4715  *          store these in <cp9b->{L,R}marg{i,j}_{min,max}.
4716  *
4717  * CP9_MX pmx:      DP matrix for posteriors, already calc'ed
4718  * CP9Bands_t cp9b: the cp9 bands
4719  * int i0           start of target subsequence (often 1, beginning of dsq)
4720  * int j0           end of target subsequence (often L, end of dsq)
4721  *
4722  * Returns: void
4723  *
4724  * xref: ELN2 notebook, p.146-147; ~nawrockie/notebook/11_0816_inf_banded_trcyk/00LOG
4725  */
4726 void
cp9_PredictStartAndEndPositions(CP9_MX * pmx,CP9Bands_t * cp9b,int i0,int j0)4727 cp9_PredictStartAndEndPositions(CP9_MX *pmx, CP9Bands_t *cp9b, int i0, int j0)
4728 {
4729   int i;
4730   int k;                                  /* counter over nodes of the model */
4731   int L = j0-i0+1;                        /* length of sequence */
4732   int   iocc;       /* occupancy probability, scaled int form */
4733   float pocc;       /* occupancy probability, probability form */
4734 
4735   /* Calculate minimum start positions: */
4736   k = 1;
4737   cp9b->sp1 = cp9b->sp2 = -1;
4738   while(k <= cp9b->hmm_M && (cp9b->sp1 == -1 || cp9b->sp2 == -1)) {
4739     if(cp9b->pn_min_m[k] == -1 && cp9b->pn_min_i[k] == -1 && cp9b->pn_min_d[k] == -1) {
4740       /*printf("k: %4d pocc IRRELEVANT (k unreachable, skipping)\n", k);*/
4741       k++;
4742       /* M, I, D states in node k are unreachable (no posterior cells had more than
4743        * cm->tau probability mass), k won't be our sp1 or sp2 */
4744     }
4745     else {
4746       iocc = -INFTY;
4747       for(i = 0; i <= L; i++) {
4748 	iocc = ILogsum(iocc, ILogsum(pmx->mmx[i][k], pmx->dmx[i][k]));
4749       }
4750       pocc = Score2Prob(iocc, 1.);
4751       /*printf("k: %4d pocc: %.4f\n", k, pocc);*/
4752       if((cp9b->sp1 == -1) && (pocc > cp9b->thresh1)) cp9b->sp1 = k;
4753       if((cp9b->sp2 == -1) && (pocc > cp9b->thresh2)) cp9b->sp2 = k;
4754       k++;
4755     }
4756   }
4757   if(k == cp9b->hmm_M+1) {
4758     if(cp9b->sp1 == -1) { cp9b->sp1 = cp9b->hmm_M+1; } /* no node k has occupancy > thresh1, set as out-of-bounds value M+1 */
4759     if(cp9b->sp2 == -1) { cp9b->sp2 = cp9b->hmm_M+1; } /* no node k has occupancy > thresh2,  set as out-of-bounds value M+1 */
4760   }
4761 
4762   /* Calculate maximum end positions: */
4763   if((cp9b->sp1 == cp9b->hmm_M+1) &&
4764      (cp9b->sp2 == cp9b->hmm_M+1)) {
4765     /* we already know that there's no nodes that satisfy either thresh1 or thresh2, we can save time here */
4766     cp9b->ep1 = 0;
4767     cp9b->ep2 = 0;
4768   }
4769   else {
4770     cp9b->ep1 = cp9b->ep2 = -1;
4771     k = cp9b->hmm_M;
4772     while(k >= 1 && (cp9b->ep1 == -1 || cp9b->ep2 == -1)) {
4773       if(cp9b->pn_min_m[k] == -1 && cp9b->pn_min_i[k] == -1 && cp9b->pn_min_d[k] == -1) {
4774 	/*printf("k: %4d pocc IRRELEVANT (k unreachable, skipping)\n", k);*/
4775 	k--;
4776 	/* M, I, D states in node k are unreachable (no posterior cells had more than
4777 	 * cm->tau probability mass), k won't be our ep1 or ep2 */
4778       }
4779       else {
4780 	iocc = -INFTY;
4781 	for(i = 0; i <= L; i++) {
4782 	  iocc = ILogsum(iocc, ILogsum(pmx->mmx[i][k], pmx->dmx[i][k]));
4783 	}
4784 	pocc = Score2Prob(iocc, 1.);
4785 	/*printf("k: %4d pocc: %.4f\n", k, pocc);*/
4786 	if((cp9b->ep1 == -1) && (pocc > cp9b->thresh1)) cp9b->ep1 = k;
4787 	if((cp9b->ep2 == -1) && (pocc > cp9b->thresh2)) cp9b->ep2 = k;
4788 	k--;
4789       }
4790     }
4791     if(k == 0) {
4792       if(cp9b->ep1 == -1) { cp9b->ep1 = 0; } /* no node k has occupancy > thresh1, set as out-of-bounds value 0 */
4793       if(cp9b->ep2 == -1) { cp9b->ep2 = 0; } /* no node k has occupancy > thresh2, set as out-of-bounds value 0 */
4794     }
4795   }
4796 
4797   /* determine cp9b->{R,L}marg_{i,j}{min,max}, the i and j bands that will be used to allow for marginal left (Lmarg_j{min,max}
4798    * and marginal right (Rmarg_i{min,max} alignment. */
4799    /* set cp9b->Rmarg_imin */
4800   if(cp9b->sp1 == cp9b->hmm_M+1) { cp9b->Rmarg_imin = i0; }
4801   else {
4802     cp9b->Rmarg_imin = INT_MAX;
4803     if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_min_m[cp9b->sp1] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_m[cp9b->sp1]);
4804     if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_min_i[cp9b->sp1] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_i[cp9b->sp1]);
4805     if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_min_d[cp9b->sp1] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_d[cp9b->sp1]);
4806     if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_min_m[cp9b->sp2] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_m[cp9b->sp2]);
4807     if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_min_i[cp9b->sp2] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_i[cp9b->sp2]);
4808     if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_min_d[cp9b->sp2] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_d[cp9b->sp2]);
4809     if(cp9b->Rmarg_imin == INT_MAX || cp9b->sp1 == (cp9b->hmm_M+1) || cp9b->sp2 == (cp9b->hmm_M+1)) cp9b->Rmarg_imin = i0;
4810     cp9b->Rmarg_imin = ESL_MAX(i0,   cp9b->Rmarg_imin); /* i can't be less than i0 */
4811     cp9b->Rmarg_imin = ESL_MIN(j0+1, cp9b->Rmarg_imin); /* i can't be more than j0+1 */
4812   }
4813 
4814   /* set cp9b->Rmarg_imax */
4815   if(cp9b->sp1 == cp9b->hmm_M+1) { cp9b->Rmarg_imax = j0; }
4816   else {
4817     cp9b->Rmarg_imax = INT_MIN;
4818     if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_max_m[cp9b->sp1] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_m[cp9b->sp1]);
4819     if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_max_i[cp9b->sp1] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_i[cp9b->sp1]);
4820     if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_max_d[cp9b->sp1] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_d[cp9b->sp1]);
4821     if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_max_m[cp9b->sp2] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_m[cp9b->sp2]);
4822     if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_max_i[cp9b->sp2] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_i[cp9b->sp2]);
4823     if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_max_d[cp9b->sp2] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_d[cp9b->sp2]);
4824     if(cp9b->Rmarg_imax == INT_MIN || cp9b->sp1 == (cp9b->hmm_M+1) || cp9b->sp2 == (cp9b->hmm_M+1)) cp9b->Rmarg_imax = j0+1;
4825     cp9b->Rmarg_imax = ESL_MAX(i0,   cp9b->Rmarg_imax); /* i can't be less than i0 */
4826     cp9b->Rmarg_imax = ESL_MIN(j0+1, cp9b->Rmarg_imax); /* i can't be more than j0+1 */
4827   }
4828 
4829   /* set cp9b->Lmarg_jmin */
4830   if(cp9b->ep1 == 0) { cp9b->Lmarg_jmin = i0-1; }
4831   else {
4832     cp9b->Lmarg_jmin = INT_MAX;
4833     if(cp9b->ep1 != 0 && cp9b->pn_min_m[cp9b->ep1] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_m[cp9b->ep1]);
4834     if(cp9b->ep1 != 0 && cp9b->pn_min_i[cp9b->ep1] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_i[cp9b->ep1]);
4835     if(cp9b->ep1 != 0 && cp9b->pn_min_d[cp9b->ep1] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_d[cp9b->ep1]-1); /* off-by-one with deletes in HMM vs CM */
4836     if(cp9b->ep2 != 0 && cp9b->pn_min_m[cp9b->ep2] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_m[cp9b->ep2]);
4837     if(cp9b->ep2 != 0 && cp9b->pn_min_i[cp9b->ep2] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_i[cp9b->ep2]);
4838     if(cp9b->ep2 != 0 && cp9b->pn_min_d[cp9b->ep2] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_d[cp9b->ep2]-1); /* off-by-one with deletes in HMM vs CM */
4839     if(cp9b->Lmarg_jmin == INT_MAX || cp9b->ep1 == 0 || cp9b->ep2 == 0) cp9b->Lmarg_jmin = i0-1;
4840     cp9b->Lmarg_jmin = ESL_MAX(i0-1, cp9b->Lmarg_jmin); /* j can't be less than i0-1 */
4841     cp9b->Lmarg_jmin = ESL_MIN(j0,   cp9b->Lmarg_jmin); /* j can't be more than j0 */
4842   }
4843 
4844   /* set cp9b->Lmarg_jmax */
4845   if(cp9b->ep1 == 0) { cp9b->Lmarg_jmax = j0; }
4846   else {
4847     cp9b->Lmarg_jmax = INT_MIN;
4848     if(cp9b->ep1 != 0 && cp9b->pn_max_m[cp9b->ep1] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_m[cp9b->ep1]);
4849     if(cp9b->ep1 != 0 && cp9b->pn_max_i[cp9b->ep1] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_i[cp9b->ep1]);
4850     if(cp9b->ep1 != 0 && cp9b->pn_max_d[cp9b->ep1] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_d[cp9b->ep1]-1); /* off-by-one with deletes in HMM vs CM */
4851     if(cp9b->ep2 != 0 && cp9b->pn_max_m[cp9b->ep2] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_m[cp9b->ep2]);
4852     if(cp9b->ep2 != 0 && cp9b->pn_max_i[cp9b->ep2] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_i[cp9b->ep2]);
4853     if(cp9b->ep2 != 0 && cp9b->pn_max_d[cp9b->ep2] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_d[cp9b->ep2]-1); /* off-by-one with deletes in HMM vs CM */
4854     if(cp9b->Lmarg_jmax == INT_MIN || cp9b->ep1 == 0 || cp9b->ep2 == 0) cp9b->Lmarg_jmax = j0;
4855     cp9b->Lmarg_jmax = ESL_MAX(i0-1, cp9b->Lmarg_jmax); /* j can't be less than i0-1 */
4856     cp9b->Lmarg_jmax = ESL_MIN(j0,   cp9b->Lmarg_jmax); /* j can't be more than j0 */
4857   }
4858 
4859 #if 0
4860   printf("HEYA Returning from cp9_PredictStartAndEndPositions():\n\t");
4861     printf("sp1: %4d\n\t", cp9b->sp1);
4862     printf("sp2: %4d\n\t", cp9b->sp2);
4863     printf("ep2: %4d\n\t", cp9b->ep2);
4864     printf("ep1: %4d\n\t", cp9b->ep1);
4865     printf("Ljn: %4d\n\t", cp9b->Lmarg_jmin);
4866     printf("Ljx: %4d\n\t", cp9b->Lmarg_jmax);
4867     printf("Rin: %4d\n\t", cp9b->Rmarg_imin);
4868     printf("Rix: %4d\n\n", cp9b->Rmarg_imax);
4869 #endif
4870 
4871   return;
4872 }
4873 
4874 
4875 /* Function: cp9_MarginalCandidatesFromStartEndPositions()
4876  * Date:     EPN, Tue Sep  6 14:50:16 2011
4877  *
4878  * Purpose: Given a CP9Bands_t object with valid sp1, sp2, ep1, and
4879  *          ep2 values from cp9_PredictStartAndEndPositions(),
4880  *          determine for each CM state v, whether a joint (J), left
4881  *          marginal (L) right marginal (R), or terminal marginal
4882  *          alignment that includes v should be allowed. For any
4883  *          disallowed type of alignment we will be able to skip the
4884  *          corresponding calculations in a trCYK/trInside/trOutside
4885  *          DP recursion.  And we won't have to allocate memory for
4886  *          that state in the corresponding (J,L,R,T) DP matrix.
4887  *
4888  *          We can determine from passed-in <pass_idx>, which type of
4889  *          marginal alignments will be allowed. If L alignments
4890  *          are not allowed, Lvalid[] will be FALSE for all v.
4891  *          Likewise for R alignments and Rvalid[] and T alignments
4892  *          and Tvalid[].
4893  *
4894  * Args:      cm       - the model
4895  *            cp9b     - the cp9 bands
4896  *            pass_idx - the pipeline pass index we're on, dictates
4897  *                       which modes of marginal alns to allow
4898  *            errbuf   - for error messages
4899  *
4900  * Returns: eslOK on success; eslEINVAL if pass_idx is invalid (errbuf filled).
4901  *
4902  * xref: ELN2 notebook, p.146-147; ~nawrockie/notebook/11_0816_inf_banded_trcyk/00LOG
4903  */
4904 int
cp9_MarginalCandidatesFromStartEndPositions(CM_t * cm,CP9Bands_t * cp9b,int pass_idx,char * errbuf)4905 cp9_MarginalCandidatesFromStartEndPositions(CM_t *cm, CP9Bands_t *cp9b, int pass_idx, char *errbuf)
4906 {
4907   int status;
4908   int v;
4909   int nd;
4910   int lpos = 1;
4911   int rpos = cm->clen;
4912   int allow_L, allow_R, allow_T; /* will we allow L, R, and T alignments? */
4913 
4914   if((status = cm_TrFillFromPassIdx(pass_idx, &allow_L, &allow_R, &allow_T)) != eslOK) ESL_FAIL(status, errbuf, "cp9_MarginalCandidatesFromStartEndPositions(), unexpected pass idx: %d", pass_idx);
4915 
4916   for(v = 0; v < cp9b->cm_M; v++) {
4917     nd = cm->ndidx[v];
4918     /* Careful, emitmap is off-by-one for our purposes for lpos if v is not MATP_MP or MATL_ML, and rpos if v is not MATP_MP or MATR_MR */
4919     lpos = (cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd) ? cm->emap->lpos[nd] : cm->emap->lpos[nd]+1;
4920     rpos = (cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd) ? cm->emap->rpos[nd] : cm->emap->rpos[nd]-1;
4921 
4922     /* below: 'possibly' means probability > cp9b->thresh1 (typically 0.01) */
4923     /*        'probably' means probability > cp9b->thresh2 (typically 0.98) */
4924 
4925     /* Jvalid if both lpos and rpos are possibly used */
4926     cp9b->Jvalid[v] = ((lpos >= cp9b->sp1) && (rpos <= cp9b->ep1)) ? TRUE : FALSE;
4927 
4928     /* Lvalid if lpos is possibly used and rpos is possibly not used */
4929     cp9b->Lvalid[v] = (allow_L && (lpos >= cp9b->sp1 && lpos <= cp9b->ep1) && (rpos > cp9b->ep2)) ? TRUE : FALSE;
4930 
4931     /* Rvalid if rpos is possibly used and lpos is possibly not used */
4932     cp9b->Rvalid[v] = (allow_R && (rpos <= cp9b->ep1 && rpos >= cp9b->sp1) && (lpos < cp9b->sp2)) ? TRUE : FALSE;
4933 
4934     if(cm->sttype[v] == B_st) {
4935       /* Tvalid if lpos and rpos are possibly not used */
4936       cp9b->Tvalid[v] = (allow_T && (lpos < cp9b->sp2) && (rpos > cp9b->ep2)) ? TRUE : FALSE;
4937     }
4938     else {
4939       cp9b->Tvalid[v] = FALSE;
4940     }
4941 #if eslDEBUGLEVEL >= 1
4942     printf("#DEBUG: v: %4d [%4d..%4d] %4s %2s %d%d%d%d\n", v, lpos, rpos, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]),
4943 	   cp9b->Jvalid[v], cp9b->Lvalid[v], cp9b->Rvalid[v], cp9b->Tvalid[v]);
4944 #endif
4945   }
4946 
4947   /* The ROOT_S state is special, all hits are rooted there, if we can do a
4948    * truncated {J,L,R,T} begin into v, we need to set do_{J,L,R,T}[0] to TRUE.
4949    */
4950   for(v = 0; v < cp9b->cm_M; v++) {
4951     switch(cm->sttype[v]) {
4952     case B_st:
4953       if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4954       if(cp9b->Lvalid[v]) cp9b->Lvalid[0] = TRUE;
4955       if(cp9b->Rvalid[v]) cp9b->Rvalid[0] = TRUE;
4956       if(cp9b->Tvalid[v]) cp9b->Tvalid[0] = TRUE;
4957       break;
4958     case MP_st:
4959       if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4960       if(cp9b->Lvalid[v]) cp9b->Lvalid[0] = TRUE;
4961       if(cp9b->Rvalid[v]) cp9b->Rvalid[0] = TRUE;
4962       break;
4963     case ML_st:
4964     case IL_st:
4965       if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4966       if(cp9b->Lvalid[v]) cp9b->Lvalid[0] = TRUE;
4967       break;
4968     case MR_st:
4969     case IR_st:
4970       if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4971       if(cp9b->Rvalid[v]) cp9b->Rvalid[0] = TRUE;
4972       break;
4973     }
4974     if(cp9b->Jvalid[0] &&
4975        cp9b->Lvalid[0] &&
4976        cp9b->Rvalid[0] &&
4977        cp9b->Tvalid[0]) {
4978       v = cp9b->cm_M;
4979     }
4980   }
4981 
4982   /* The EL state is special, if local ends are on, make J, L and R
4983    * modes all valid. (We could only make those modes valid for
4984    * which there's a local end possible (e.g. make cm->M invalid for
4985    * R if R is not valid for any states), but empirically this is
4986    * rare, so I've opted to always allow all types to avoid allowing
4987    * the possibility that we turn {J,L,R}valid[cm->M] on and off
4988    * as we process seqs, thus avoiding all the possible complications
4989    * of doing that.
4990    */
4991   if(cm->flags & CMH_LOCAL_END) {
4992     cp9b->Jvalid[cm->M] = TRUE;
4993     cp9b->Lvalid[cm->M] = TRUE;
4994     cp9b->Rvalid[cm->M] = TRUE;
4995   }
4996 
4997   return eslOK;
4998 }
4999 
5000 
5001 
5002 /****************************************************************************
5003  * Debugging print functions
5004  *
5005  * cp9_DebugPrintHMMBands()
5006  * PrintDPCellsSaved_jd()
5007  *
5008  * Currently not compiled (#if 0'ed out) but saved for ref:
5009  * ijBandedTraceInfoDump()
5010  * ijdBandedTraceInfoDump()
5011  * debug_print_hd_bands()
5012  * debug_print_ij_bands()
5013  * debug_print_parsetree_and_ij_bands()
5014  *
5015  */
5016 #if 0
5017 static void         ijBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5018 					  int *jmin, int *jmax, int debug_level);
5019 static void         ijdBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5020 					   int *jmin, int *jmax, int **hdmin, int **hdmax,
5021 					   int debug_level);
5022 static void         debug_print_hd_bands(CM_t *cm, int **hdmin, int **hdmax, int *jmin, int *jmax);
5023 static void         debug_print_parsetree_and_ij_bands(FILE *fp, Parsetree_t *tr, CM_t *cm, ESL_DSQ *dsq, CP9Bands_t *cp9b);
5024 static void         cp9_RelaxRootBandsForSearch(CM_t *cm, int i0, int j0, int *imin, int *imax, int *jmin, int *jmax);
5025 #endif
5026 
5027 /* EPN 12.18.05
5028  * cp9_DebugPrintHMMBands()
5029  * based loosely on: cmbuild.c's
5030  * Function: model_trace_info_dump
5031  *
5032  * Purpose:  Print out the bands derived from the posteriors for the
5033  *           insert and match states of each HMM node.
5034  *
5035  * Args:
5036  * FILE *ofp      - filehandle to print to (can by STDOUT)
5037  * int L          - length of sequence
5038  * CP9Bands_t     - the CP9 bands data structure
5039  * double hmm_bandp - fraction of probability mass allowed outside each band.
5040  * int debug_level  [0..3] tells the function what level of debugging print
5041  *                  statements to print.
5042  * Returns: (void)
5043  */
5044 
5045 void
cp9_DebugPrintHMMBands(FILE * ofp,int L,CP9Bands_t * cp9b,double hmm_bandp,int debug_level)5046 cp9_DebugPrintHMMBands(FILE *ofp, int L, CP9Bands_t *cp9b, double hmm_bandp, int debug_level)
5047 {
5048   int M;
5049   int k;
5050   int cells_in_bands_m; /* number of cells within all the bands for match states*/
5051   int cells_in_bands_i; /* number of cells within all the bands for insert states*/
5052   int cells_in_bands_d; /* number of cells within all the bands for delete states*/
5053   int cells_in_bands_all; /* number of cells within all the bands for match and insert states*/
5054   int bw;               /* band width of current band */
5055 
5056   M = cp9b->hmm_M;
5057   cells_in_bands_m = cells_in_bands_i = cells_in_bands_d = cells_in_bands_all = 0;
5058 
5059   /* first print the bands on the match states */
5060   fprintf(ofp, "***********************************************************\n");
5061   if(debug_level > 0)
5062     fprintf(ofp, "printing hmm bands\n");
5063   fprintf(ofp, "hmm_bandp: %f\n", hmm_bandp);
5064   if(debug_level > 0)
5065     {
5066       fprintf(ofp, "\n");
5067       fprintf(ofp, "match states\n");
5068     }
5069   for(k = 0; k <= cp9b->hmm_M; k++)
5070     {
5071       bw = (cp9b->pn_min_m[k] == -1) ? 0 : cp9b->pn_max_m[k] - cp9b->pn_min_m[k] + 1;
5072       if(debug_level > 0 || debug_level == -1)
5073 	fprintf(ofp, "M node: %3d | min %3d | max %3d | w %3d \n", k, cp9b->pn_min_m[k], cp9b->pn_max_m[k], bw);
5074       cells_in_bands_m += bw;
5075     }
5076   if(debug_level > 0)
5077     fprintf(ofp, "\n");
5078   if(debug_level > 0)
5079     fprintf(ofp, "insert states\n");
5080   for(k = 0; k <= cp9b->hmm_M; k++)
5081     {
5082       bw = (cp9b->pn_min_i[k] == -1) ? 0 : cp9b->pn_max_i[k] - cp9b->pn_min_i[k] + 1;
5083       if(debug_level > 0 || debug_level == -1)
5084 	fprintf(ofp, "I node: %3d | min %3d | max %3d | w %3d\n", k, cp9b->pn_min_i[k], cp9b->pn_max_i[k], bw);
5085       cells_in_bands_i += bw;
5086     }
5087   if(debug_level > 0)
5088     fprintf(ofp, "\n");
5089   if(debug_level > 0)
5090     fprintf(ofp, "delete states\n");
5091   for(k = 1; k <= cp9b->hmm_M; k++)
5092     {
5093       bw = (cp9b->pn_min_d[k] == -1) ? 0 : cp9b->pn_max_d[k] - cp9b->pn_min_d[k] + 1;
5094       if(debug_level > 0 || debug_level == -1)
5095 	fprintf(ofp, "D node: %3d | min %3d | max %3d | w %3d\n", k, cp9b->pn_min_d[k], cp9b->pn_max_d[k], bw);
5096       cells_in_bands_d += bw;
5097     }
5098   if(debug_level > 0)
5099     {
5100       fprintf(ofp, "\n");
5101       printf("cells_in_bands_m : %d\n", cells_in_bands_m);
5102       printf("cells_in_bands_i : %d\n", cells_in_bands_i);
5103       printf("cells_in_bands_d : %d\n", cells_in_bands_d);
5104     }
5105 
5106   cells_in_bands_all = cells_in_bands_m + cells_in_bands_i + cells_in_bands_d;
5107   printf("fraction match excluded  : %f\n", (1 - ((float) cells_in_bands_m / (M * L))));
5108   printf("fraction insert excluded : %f\n", (1 - ((float) cells_in_bands_i / ((M-1) * L))));
5109   printf("fraction delete excluded : %f\n", (1 - ((float) cells_in_bands_d / ((M-1) * L))));
5110   printf("fraction total excluded  : %f\n", (1 - ((float) (cells_in_bands_all) / (((M-1) * L) + ((M-1) * L) + (M *L)))));
5111   fprintf(ofp, "***********************************************************\n");
5112 
5113 }
5114 
5115 /* Function: PrintDPCellsSaved_jd()
5116  * Prints out an estimate of the speed up due to j and d bands */
5117 void
PrintDPCellsSaved_jd(CM_t * cm,int * jmin,int * jmax,int ** hdmin,int ** hdmax,int W)5118 PrintDPCellsSaved_jd(CM_t *cm, int *jmin, int *jmax, int **hdmin, int **hdmax,
5119 		     int W)
5120 {
5121   int v;
5122   int j;
5123   int max;
5124   int64_t after, before;
5125 
5126   printf("Printing DP cells saved using j and d bands:\n");
5127   before = after = 0;
5128   for (v = 0; v < cm->M; v++)
5129     {
5130       for(j = 0; j <= W; j++)
5131 	if (cm->sttype[v] != E_st)
5132 	  before += j + 1;
5133       for(j = jmin[v]; j <= jmax[v]; j++)
5134 	if (cm->sttype[v] != E_st)
5135 	  {
5136 	    max = (j < hdmax[v][j-jmin[v]]) ? j : hdmax[v][j-jmin[v]];
5137 	    after += max - hdmin[v][j-jmin[v]] + 1;
5138 	  }
5139     }
5140   printf("Before:  something like %" PRId64 "\n", before);
5141   printf("After:   something like %" PRId64 "\n", after);
5142   printf("Speedup: maybe %.2f fold\n\n", (double) before / (double) after);
5143 }
5144 
5145 /* Function: debug_print_ij_bands
5146  *
5147  * Purpose:  Print out i and j bands for all states v.
5148  *
5149  */
5150 void
debug_print_ij_bands(CM_t * cm)5151 debug_print_ij_bands(CM_t *cm)
5152 {
5153   int v;
5154   printf("%5s  %-7s    %5s  %5s    %5s  %5s  %4s\n", "v",     "type",    "imin",  "imax",  "jmin",  "jmax", "JLRT");
5155   printf("%5s  %-7s    %5s  %5s    %5s  %5s  %4s\n", "-----", "-------", "-----", "-----", "-----", "-----", "----");
5156   for(v = 0; v < cm->M; v++)
5157     printf("%5d  %-7s    %5d  %5d    %5d  %5d  %d%d%d%d\n", v, CMStateid(cm->stid[v]), cm->cp9b->imin[v], cm->cp9b->imax[v], cm->cp9b->jmin[v], cm->cp9b->jmax[v],
5158 	   cm->cp9b->Jvalid[v], cm->cp9b->Lvalid[v], cm->cp9b->Rvalid[v], cm->cp9b->Tvalid[v]);
5159   return;
5160 }
5161 
5162 
5163 #if 0
5164 /* EPN 11.03.05
5165  * Function: ijBandedTraceInfoDump()
5166  *
5167  * Purpose:  Experimental HMMERNAL function used in development.
5168  *           This function determines how close the
5169  *           trace was to the bands for i and j at each state in the trace,
5170  *           and prints out that information in differing levels
5171  *           of verbosity depending on an input parameter
5172  *           (debug_level).
5173  *
5174  * Args:    cm       - the CM (useful for determining which states are E states)
5175  *          tr       - the parsetree (trace)
5176  *          imin     - minimum i bound for each state v; [0..v..M-1]
5177  *          imax     - maximum i bound for each state v; [0..v..M-1]
5178  *          jmin     - minimum j bound for each state v; [0..v..M-1]
5179  *          jmax     - maximum j bound for each state v; [0..v..M-1]
5180  *          debug_level - level of verbosity
5181  * Returns: (void)
5182  */
5183 
5184 void
5185 ijBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5186 		      int *jmin, int *jmax, int debug_level)
5187 {
5188   int v, i, j, d, tpos;
5189   int imindiff;            /* i - imin[v] */
5190   int imaxdiff;            /* imax[v] - i */
5191   int jmindiff;            /* j - jmin[v] */
5192   int jmaxdiff;            /* jmax[v] - j */
5193   int imin_out;
5194   int imax_out;
5195   int jmin_out;
5196   int jmax_out;
5197 
5198   imin_out = 0;
5199   imax_out = 0;
5200   jmin_out = 0;
5201   jmax_out = 0;
5202 
5203   debug_level = 2;
5204 
5205   for (tpos = 0; tpos < tr->n; tpos++)
5206     {
5207       v  = tr->state[tpos];
5208       i = tr->emitl[tpos];
5209       j = tr->emitr[tpos];
5210       d = j-i+1;
5211       imindiff = i-imin[v];
5212       imaxdiff = imax[v]-i;
5213       jmindiff = j-jmin[v];
5214       jmaxdiff = jmax[v]-j;
5215       if(cm->sttype[v] != E_st)
5216 	{
5217 	  if(imindiff < 0)
5218 	    imin_out++;
5219 	  if(imaxdiff < 0)
5220 	    imax_out++;
5221 	  if(jmindiff < 0)
5222 	    jmin_out++;
5223 	  if(jmaxdiff < 0)
5224 	    jmax_out++;
5225 
5226 	  if(debug_level > 1 || ((imindiff < 0) || (imaxdiff < 0) || (jmindiff < 0) || (jmaxdiff < 0)))
5227 	    {
5228 	      printf("v: %4d %-4s %-2s | d: %4d | i: %4d | in: %4d | ix: %4d | %3d | %3d |\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), d, i, imin[v], imax[v], imindiff, imaxdiff);
5229 	      printf("                          | j: %4d | jn: %4d | jx: %4d | %3d | %3d |\n", j, jmin[v], jmax[v], jmindiff, jmaxdiff);
5230 
5231 	    }
5232 	}
5233       else if(cm->sttype[v] == E_st)
5234 	{
5235 	  if(debug_level > 1)
5236 	    {
5237 	      printf("v: %4d %-4s %-2s | d: %4d | i: %4d | in: %4d | ix: %4d | %3d | %3d |\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), d, i, imin[v], imax[v], imindiff, imaxdiff);
5238 	      printf("                          | j: %4d | jn: %4d | jx: %4d | %3d | %3d |\n", j, jmin[v], jmax[v], jmindiff, jmaxdiff);
5239 	    }
5240 	}
5241     }
5242   printf("\nimin out: %d\n", imin_out);
5243   printf("imax out: %d\n", imax_out);
5244   printf("jmin out: %d\n", jmin_out);
5245   printf("jmax out: %d\n", jmax_out);
5246 
5247   if((imin_out + imax_out + jmin_out + jmax_out) > 0)
5248     {
5249       printf("ERROR, some of the i and j bands are going to prevent optimal alignment. Sorry.\n");
5250     }
5251 
5252   return;
5253 }
5254 
5255 
5256 /* EPN 11.03.05
5257  * Function: ijdBandedTraceInfoDump()
5258  *
5259  * Purpose:  Experimental HMMERNAL function used in development.
5260  *           This function determines how close the
5261  *           trace was to the bands for i and j and d at each state in the trace,
5262  *           and prints out that information in differing levels
5263  *           of verbosity depending on an input parameter
5264  *           (debug_level).
5265  *
5266  * Args:    cm       - the CM (useful for determining which states are E states)
5267  *          tr       - the parsetree (trace)
5268  *          imin     - minimum i bound for each state v; [0..v..M-1]
5269  *          imax     - maximum i bound for each state v; [0..v..M-1]
5270  *          jmin     - minimum j bound for each state v; [0..v..M-1]
5271  *          jmax     - maximum j bound for each state v; [0..v..M-1]
5272  *          hdmin    - minimum d bound for each state v and offset j;
5273  *                     [0..v..M-1][0..(jmax[v]-jmin[v])]
5274  *          hdmax    - maximum d bound for each state v and offset j;
5275  *                     [0..v..M-1][0..(jmax[v]-jmin[v])]
5276  *          debug_level - level of verbosity
5277  * Returns: (void)
5278  */
5279 
5280 void
5281 ijdBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5282 		       int *jmin, int *jmax, int **hdmin, int **hdmax, int debug_level)
5283 {
5284   int v, i, j, d, tpos;
5285   int imindiff;            /* i - imin[v] */
5286   int imaxdiff;            /* imax[v] - i */
5287   int jmindiff;            /* j - jmin[v] */
5288   int jmaxdiff;            /* jmax[v] - j */
5289   int hdmindiff;           /* d - hdmin[v][j] */
5290   int hdmaxdiff;           /* hdmax[v][j] - d */
5291 
5292   int imin_out;
5293   int imax_out;
5294   int jmin_out;
5295   int jmax_out;
5296   int hdmin_out;
5297   int hdmax_out;
5298   int local_used;
5299 
5300   imin_out = 0;
5301   imax_out = 0;
5302   jmin_out = 0;
5303   jmax_out = 0;
5304   hdmin_out = 0;
5305   hdmax_out = 0;
5306   local_used = 0;
5307 
5308   debug_level = 2;
5309 
5310   for (tpos = 0; tpos < tr->n; tpos++)
5311     {
5312       v  = tr->state[tpos];
5313       i = tr->emitl[tpos];
5314       j = tr->emitr[tpos];
5315       d = j-i+1;
5316       if(cm->sttype[v] == EL_st) /*END LOCAL state*/
5317 	{
5318 	  if(debug_level > 1)
5319 	    {
5320 	      printf("v: %4d NA   %-2s (  NA) | d: %4d | i: %4d | in: NA    | ix: NA   | NA  | NA  |\n", v, Statetype(cm->sttype[v]), d, i);
5321 	      printf("                                 | j: %4d | jn: NA   | jx: NA  | NA  | NA  |\n", j);
5322 	      printf("                                 | d: %4d | dn: NA   | dx: NA   | NA  | NA  |\n", d);
5323 
5324 	      local_used++;
5325 	    }
5326 	}
5327       else
5328 	{
5329 	  imindiff = i-imin[v];
5330 	  imaxdiff = imax[v]-i;
5331 	  jmindiff = j-jmin[v];
5332 	  jmaxdiff = jmax[v]-j;
5333 	  if(j >= jmin[v] && j <= jmax[v])
5334 	    {
5335 	      hdmindiff = d - hdmin[v][j-jmin[v]];
5336 	      hdmaxdiff = hdmax[v][j-jmin[v]] - d;
5337 	    }
5338 	  else
5339 	    {
5340 	      hdmindiff = -1000;
5341 	      hdmaxdiff = -1000;
5342 	    }
5343 	  if(imindiff < 0)
5344 	    imin_out++;
5345 	  if(imaxdiff < 0)
5346 	    imax_out++;
5347 	  if(jmindiff < 0)
5348 	    jmin_out++;
5349 	  if(jmaxdiff < 0)
5350 	    jmax_out++;
5351 	  if(hdmindiff < 0)
5352 	    hdmin_out++;
5353 	  if(hdmaxdiff < 0)
5354 	    hdmax_out++;
5355 
5356 	  if(debug_level > 1 || ((imindiff < 0) || (imaxdiff < 0) || (jmindiff < 0) || (jmaxdiff < 0) ||
5357 				 (hdmindiff < 0) || (hdmaxdiff < 0)))
5358 	    {
5359 	      printf("v: %4d %-4s %-2s (%4d) | d: %4d | i: %4d | in: %4d | ix: %4d | %3d | %3d |\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), cm->ndidx[v], d, i, imin[v], imax[v], imindiff, imaxdiff);
5360 	      printf("                                 | j: %4d | jn: %4d | jx: %4d | %3d | %3d |\n", j, jmin[v], jmax[v], jmindiff, jmaxdiff);
5361 	      if(j >= jmin[v] && j <= jmax[v])
5362 		{
5363 		  printf("                                 | d: %4d | dn: %4d | dx: %4d | %3d | %3d |\n", d, hdmin[v][j-jmin[v]], hdmax[v][j-jmin[v]], hdmindiff, hdmaxdiff);
5364 		}
5365 	      else
5366 		{
5367 		  printf("                                 | d: %4d | dn: jout | dx: jout | %3d | %3d |\n", d, hdmindiff, hdmaxdiff);
5368 		}
5369 	    }
5370 	}
5371     }
5372   printf("\nimin out  : %d\n", imin_out);
5373   printf("imax out  : %d\n", imax_out);
5374   printf("jmin out  : %d\n", jmin_out);
5375   printf("jmax out  : %d\n", jmax_out);
5376   printf("hdmin out : %d\n", hdmin_out);
5377   printf("hdmax out : %d\n", hdmax_out);
5378   printf("local used: %d\n", local_used);
5379 
5380   if((imin_out + imax_out + jmin_out + jmax_out) > 0)
5381     {
5382       printf("ERROR, some of the i and j bands are going to prevent optimal alignment. Sorry.\n");
5383     }
5384 
5385   return;
5386 }
5387 
5388 /* EPN 01.18.06
5389  * Function: debug_print_hd_bands
5390  *
5391  * Purpose:  Print out the v and j dependent hd bands.
5392  */
5393 void
5394 debug_print_hd_bands(CM_t *cm, int **hdmin, int **hdmax, int *jmin, int *jmax)
5395 {
5396   int v, j;
5397 
5398   printf("\nPrinting hd bands :\n");
5399   printf("****************\n");
5400   for(v = 0; v < cm->M; v++)
5401    {
5402      for(j = jmin[v]; j <= jmax[v]; j++)
5403        {
5404 	 printf("band v:%d j:%d n:%d %-4s %-2s min:%d max:%d\n", v, j, cm->ndidx[v], Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), hdmin[v][j-jmin[v]], hdmax[v][j-jmin[v]]);
5405        }
5406      printf("\n");
5407    }
5408   printf("****************\n\n");
5409 
5410   return;
5411 }
5412 
5413 /* Function: debug_print_parsetree_and_ij_bands()
5414  * Date:     EPN, Sun Jan 27 16:38:14 2008
5415  *
5416  * Purpose:  Print a parsetree a la ParseTreeDump() but supplement it
5417  *           with details on where the parsetree violates i and j bands
5418  *           (if at all) from a cp9bands data structure.
5419  *
5420  * Args:    fp    - FILE to write output to.
5421  *          tr    - parsetree to examine.
5422  *          cm    - model that was aligned to dsq to generate the parsetree
5423  *          dsq   - digitized sequence that was aligned to cm to generate the parsetree
5424  *          gamma - cumulative subsequence length probability distributions
5425  *                  used to generate the bands; from BandDistribution(); [0..v..M-1][0..W]
5426  *          W     - maximum window length W (gamma distributions range up to this)
5427  *          cp9b  - CP9 bands object with i and j bands
5428  *
5429  * Returns:  (void)
5430  */
5431 void
5432 debug_print_parsetree_and_ij_bands(FILE *fp, Parsetree_t *tr, CM_t *cm, ESL_DSQ *dsq, CP9Bands_t *cp9b)
5433 {
5434   int   x;
5435   char  syml, symr;
5436   float tsc;
5437   float esc;
5438   int   v,y;
5439   char  mode;
5440 
5441   /* Contract check */
5442   if(dsq == NULL)  cm_Fail("In debug_print_parsetree_and_ij_bands(), dsq is NULL");
5443 
5444   fprintf(fp, "%5s %6s %6s %7s %5s %5s %5s %5s %5s   %5s %5s %5s    %5s %5s %5s\n",
5445 	  " idx ", "emitl", "emitr", "state", " nxtl", " nxtr", " prv ", " tsc ", " esc ",
5446 	  " imin", " imax", "idiff", "jmin", "jmax", "jdiff");
5447   fprintf(fp, "%5s %6s %6s %7s %5s %5s %5s %5s %5s   %5s %5s %5s    %5s %5s %5s\n",
5448 	  "-----", "------", "------", "-------", "-----","-----", "-----","-----", "-----",
5449 	  "-----", "-----", "-----", "-----", "-----", "-----");
5450   for (x = 0; x < tr->n; x++)
5451     {
5452       v = tr->state[x];
5453       mode = tr->mode[x];
5454 
5455       /* Set syml, symr: one char representation of what we emit, or ' '.
5456        * Set esc:        emission score, or 0.
5457        * Only P, L, R states have emissions.
5458        */
5459       syml = symr = ' ';
5460       esc = 0.;
5461       if (cm->sttype[v] == MP_st) {
5462 	if (mode == TRMODE_J || mode == TRMODE_L) syml = cm->abc->sym[dsq[tr->emitl[x]]];
5463 	if (mode == TRMODE_J || mode == TRMODE_R) symr = cm->abc->sym[dsq[tr->emitr[x]]];
5464 	if      (mode == TRMODE_J) esc = DegeneratePairScore(cm->abc, cm->esc[v], dsq[tr->emitl[x]], dsq[tr->emitr[x]]);
5465         else if (mode == TRMODE_L) esc = cm->lmesc[v][dsq[tr->emitl[x]]];
5466         else if (mode == TRMODE_R) esc = cm->rmesc[v][dsq[tr->emitr[x]]];
5467       } else if ( (cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) && (mode == TRMODE_J || mode == TRMODE_L) ) {
5468 	syml = cm->abc->sym[dsq[tr->emitl[x]]];
5469 	esc  = esl_abc_FAvgScore(cm->abc, dsq[tr->emitl[x]], cm->esc[v]);
5470       } else if ( (cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) && (mode == TRMODE_J || mode == TRMODE_R) ) {
5471 	symr = cm->abc->sym[dsq[tr->emitr[x]]];
5472 	esc  = esl_abc_FAvgScore(cm->abc, dsq[tr->emitr[x]], cm->esc[v]);
5473       }
5474 
5475       /* Set tsc: transition score, or 0.
5476        * B, E, and the special EL state (M, local end) have no transitions.
5477        */
5478       tsc = 0.;
5479       if (v != cm->M && cm->sttype[v] != B_st && cm->sttype[v] != E_st) {
5480 	y = tr->state[tr->nxtl[x]];
5481 
5482         if (tr->nxtl[x] == -1)
5483           ;
5484 	else if (v == 0 && (cm->flags & CMH_LOCAL_BEGIN))
5485 	  tsc = cm->beginsc[y];
5486 	else if (y == cm->M) /* CMH_LOCAL_END is presumably set, else this wouldn't happen */
5487 	  tsc = cm->endsc[v] + (cm->el_selfsc * (tr->emitr[x] - tr->emitl[x] + 1 - StateDelta(cm->sttype[v])));
5488 	else 		/* y - cm->first[v] gives us the offset in the transition vector */
5489 	  tsc = cm->tsc[v][y - cm->cfirst[v]];
5490       }
5491 
5492       /* Print the info line for this state
5493        */
5494       fprintf(fp, "%5d %5d%c %5d%c %5d%-2s %5d %5d %5d %5.2f %5.2f ",
5495 	      x, tr->emitl[x], syml, tr->emitr[x], symr, tr->state[x],
5496 	      Statetype(cm->sttype[v]), tr->nxtl[x], tr->nxtr[x], tr->prv[x], tsc, esc);
5497       if(tr->emitl[x] < cp9b->imin[tr->state[x]]) {
5498 	fprintf(fp, "%5d %5d %5d   ",
5499 		cp9b->imin[tr->state[x]], cp9b->imax[tr->state[x]], (tr->emitl[x] - cp9b->imin[tr->state[x]]));
5500       }
5501       else if(tr->emitl[x] > cp9b->imax[tr->state[x]]) {
5502 	fprintf(fp, "%5d %5d %5d   ",
5503 		cp9b->imin[tr->state[x]], cp9b->imax[tr->state[x]], (tr->emitl[x] - cp9b->imax[tr->state[x]]));
5504       }
5505       else {
5506 	fprintf(fp, "%5d %5d %5s   ",
5507 		cp9b->imin[tr->state[x]], cp9b->imax[tr->state[x]], "");
5508       }
5509       if(tr->emitr[x] < cp9b->jmin[tr->state[x]]) {
5510 	fprintf(fp, "%5d %5d %5d\n",
5511 		cp9b->jmin[tr->state[x]], cp9b->jmax[tr->state[x]], (tr->emitr[x] - cp9b->jmin[tr->state[x]]));
5512       }
5513       else if(tr->emitr[x] > cp9b->jmax[tr->state[x]]) {
5514 	fprintf(fp, "%5d %5d %5d\n",
5515 		cp9b->jmin[tr->state[x]], cp9b->jmax[tr->state[x]], (tr->emitr[x] - cp9b->jmax[tr->state[x]]));
5516       }
5517       else {
5518 	fprintf(fp, "%5d %5d %5s\n",
5519 		cp9b->jmin[tr->state[x]], cp9b->jmax[tr->state[x]], "");
5520       }
5521     }
5522 
5523   fprintf(fp, "%5s %6s %6s %7s %5s %5s %5s %5s %5s %5s %5s %5s %5s    %5s %5s %5s\n",
5524 	  "-----", "------", "------", "-------", "-----","-----", "-----","-----", "-----",
5525 	  "-----", "-----", "-----", "-----", "-----", "-----", "-----");
5526 
5527   fflush(fp);
5528 }
5529 
5530 /*********************************************************************
5531  * Function: cp9_RelaxRootBandsForSearch()
5532  *
5533  * Purpose:  In cp9_HMM2ijBands_OLD(), ROOT_S (state 0) sets imin[0]=imax[0]=i0,
5534  *           and jmin[0]=jmax[0]=j0, which is important for alignment,
5535  *           but during search enforces that the optimal alignment start
5536  *           at i0 and end at j0, but when searching we want to relax this
5537  *           requirement in case a higher scoring parse has different endpoints.
5538  *           See code for details.
5539  *
5540  * Args:
5541  * cm               the cm
5542  * i0               first position of seq
5543  * j0               last position of seq
5544  * int *imin        imin[v] = first position in band on i for state v
5545  * int *imax        imax[v] = last position in band on i for state v
5546  * int *jmin        jmin[v] = first position in band on j for state v
5547  * int *jmax        jmax[v] = last position in band on j for state v
5548  */
5549 void
5550 cp9_RelaxRootBandsForSearch(CM_t *cm, int i0, int j0, int *imin, int *imax, int *jmin, int *jmax)
5551 {
5552   int y, yoffset;
5553 
5554   if(i0 == j0) return; /* this is a special vanishingly rare case, we've set otherwise illegal jmin, jmax values for MP states
5555 			* b/c all MPs are impossible for a length 1 seq, do nothing in this case.
5556 			*/
5557   /* look at all children y of ROOT_S (v == 0) and set:
5558    * imin[0] = min_y imin[y];
5559    * imax[0] = max_y imax[y];
5560    * jmin[0] = min_y jmin[y];
5561    * jmax[0] = max_y jmax[y];
5562    */
5563   /* First look at children of 0 (these probs will be 0. if local begins on, but it doesn't matter for our purposes here) */
5564   for (yoffset = 0; yoffset < cm->cnum[0]; yoffset++) {
5565     y = cm->cnum[0] + yoffset;
5566     imin[0] = ESL_MIN(imin[0], imin[y]);
5567     imax[0] = ESL_MAX(imax[0], imax[y]);
5568     jmin[0] = ESL_MIN(jmin[0], jmin[y]);
5569     jmax[0] = ESL_MAX(jmax[0], jmax[y]);
5570   }
5571   /* now for possible local begins */
5572   if(cm->flags & CMH_LOCAL_BEGIN) {
5573     for (y = 1; y < cm->M; y++) {
5574       if(NOT_IMPOSSIBLE(cm->beginsc[y])) {
5575 	imin[0] = ESL_MIN(imin[0], imin[y]);
5576 	imax[0] = ESL_MAX(imax[0], imax[y]);
5577 	jmin[0] = ESL_MIN(jmin[0], jmin[y]);
5578 	jmax[0] = ESL_MAX(jmax[0], jmax[y]);
5579       }
5580     }
5581   }
5582 }
5583 
5584 #endif
5585 
5586