1 /* hmmband.c
2 * EPN 12.16.05
3 *
4 * Functions to support deriving bands for a constrained CM
5 * parse of a target sequence using CM. Bands are derived
6 * from CM plan 9 HMM (CP9 HMM) Forward/Backward parses of
7 * the target.
8 */
9
10 #include "esl_config.h"
11 #include "p7_config.h"
12 #include "config.h"
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <ctype.h>
18 #include <float.h>
19 #include <limits.h>
20 #include <math.h>
21
22 #include "easel.h"
23 #include "esl_stack.h"
24 #include "esl_vectorops.h"
25
26 #include "hmmer.h"
27
28 #include "infernal.h"
29
30
31 static int cp9_FB2HMMBands (CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
32 int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level);
33 static int cp9_FB2HMMBandsWithSums(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
34 int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level);
35 static void cp9_Posterior(ESL_DSQ *dsq, int i0, int j0, CP9_t *hmm, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *mx, int did_fwd_scan);
36 static void cp9_IFillPostSums(CP9_MX *post, CP9Bands_t *cp9, int i0, int j0);
37 static int HMMBandsEnforceValidParse(CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, char *errbuf, int i0, int j0, int doing_search, int *ret_did_expand,
38 int **ret_r_mn, int **ret_r_mx, int **ret_r_in, int **ret_r_ix, int **ret_r_dn, int **ret_r_dx,
39 int **ret_r_nn_i, int **ret_r_nx_i, int **ret_r_nn_j, int **ret_r_nx_j);
40 static int HMMBandsFixUnreachable(CP9Bands_t *cp9b, char *errbuf, int k, int r_prv_min, int r_prv_max, int r_insert_prv_min);
41 static int HMMBandsFillGap(CP9Bands_t *cp9b, char *errbuf, int k, int min1, int max1, int min2, int max2, int prv_nd_r_mn, int prv_nd_r_dn);
42 #if eslDEBUGLEVEL >= 1
43 static int CMBandsCheckValidParse(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, int i0, int j0, int doing_search);
44 #endif
45
46 /* EPN 10.28.06
47 * Function: AllocCP9Bands()
48 *
49 * Purpose: Allocate the arrays needed for creating i and j
50 * bands on a CM based on a CP9 parse. See infernal.h
51 * for description of this structure.
52 *
53 * Args:
54 * cm_M - number of states in the CM
55 * hmm_M - number of nodes in the CP9 HMM for the CM
56 * Returns: (void)
57 *
58 */
59
60 CP9Bands_t *
AllocCP9Bands(int cm_M,int hmm_M)61 AllocCP9Bands(int cm_M, int hmm_M)
62 {
63 int status;
64 CP9Bands_t *cp9bands;
65
66 ESL_ALLOC(cp9bands, sizeof(CP9Bands_t));
67
68 cp9bands->cm_M = cm_M;
69 cp9bands->hmm_M = hmm_M;
70
71 cp9bands->sp1 = cp9bands->sp2 = cp9bands->ep1 = cp9bands->ep2 = -1;
72 cp9bands->thresh1 = DEFAULT_CP9BANDS_THRESH1; /* 0.01 */
73 cp9bands->thresh2 = DEFAULT_CP9BANDS_THRESH2; /* 0.98 */
74 cp9bands->Rmarg_imin = cp9bands->Lmarg_jmin = -1;
75 cp9bands->Rmarg_imax = cp9bands->Lmarg_jmax = -2;
76
77 ESL_ALLOC(cp9bands->Jvalid, sizeof(int) * (cm_M+1));
78 ESL_ALLOC(cp9bands->Lvalid, sizeof(int) * (cm_M+1));
79 ESL_ALLOC(cp9bands->Rvalid, sizeof(int) * (cm_M+1));
80 ESL_ALLOC(cp9bands->Tvalid, sizeof(int) * (cm_M+1));
81 esl_vec_ISet(cp9bands->Jvalid, cm_M+1, TRUE);
82 esl_vec_ISet(cp9bands->Lvalid, cm_M+1, TRUE);
83 esl_vec_ISet(cp9bands->Rvalid, cm_M+1, TRUE);
84 esl_vec_ISet(cp9bands->Tvalid, cm_M, TRUE);
85 cp9bands->Tvalid[cm_M] = FALSE;
86
87 ESL_ALLOC(cp9bands->pn_min_m, sizeof(int) * (cp9bands->hmm_M+1));
88 ESL_ALLOC(cp9bands->pn_max_m, sizeof(int) * (cp9bands->hmm_M+1));
89 ESL_ALLOC(cp9bands->pn_min_i, sizeof(int) * (cp9bands->hmm_M+1));
90 ESL_ALLOC(cp9bands->pn_max_i, sizeof(int) * (cp9bands->hmm_M+1));
91 ESL_ALLOC(cp9bands->pn_min_d, sizeof(int) * (cp9bands->hmm_M+1));
92 ESL_ALLOC(cp9bands->pn_max_d, sizeof(int) * (cp9bands->hmm_M+1));
93 ESL_ALLOC(cp9bands->isum_pn_m,sizeof(int) * (cp9bands->hmm_M+1));
94 ESL_ALLOC(cp9bands->isum_pn_i,sizeof(int) * (cp9bands->hmm_M+1));
95 ESL_ALLOC(cp9bands->isum_pn_d,sizeof(int) * (cp9bands->hmm_M+1));
96
97 ESL_ALLOC(cp9bands->imin, sizeof(int) * cp9bands->cm_M);
98 ESL_ALLOC(cp9bands->imax, sizeof(int) * cp9bands->cm_M);
99 ESL_ALLOC(cp9bands->jmin, sizeof(int) * cp9bands->cm_M);
100 ESL_ALLOC(cp9bands->jmax, sizeof(int) * cp9bands->cm_M);
101 ESL_ALLOC(cp9bands->safe_hdmin, sizeof(int) * cp9bands->cm_M);
102 ESL_ALLOC(cp9bands->safe_hdmax, sizeof(int) * cp9bands->cm_M);
103 ESL_ALLOC(cp9bands->hdmin, sizeof(int *) * cp9bands->cm_M);
104 ESL_ALLOC(cp9bands->hdmax, sizeof(int *) * cp9bands->cm_M);
105 cp9bands->hdmin_mem = NULL;
106 cp9bands->hdmax_mem = NULL;
107 /* NOTE: cp9bands->hdmin and hdmax are 2D arrays, the ptrs are
108 * alloc'ed here, but the actually memory is alloc'ed by
109 * hmmband.c:cp9_Seq2Bands() with a call to hmmband.c:cp9_GrowHDBands().
110 */
111 cp9bands->hd_needed = 0;
112 cp9bands->hd_alloced = 0;
113
114 cp9bands->tau = -1.; /* invalid, reset each time bands are calculated */
115 return cp9bands;
116
117 ERROR:
118 cm_Fail("Memory allocation error.\n");
119 return NULL; /* never reached */
120 }
121
122 /* Function: SizeofCP9Bands()
123 * Returns: Size (Mb) of cp9b.
124 */
125 float
SizeofCP9Bands(CP9Bands_t * cp9b)126 SizeofCP9Bands(CP9Bands_t *cp9b)
127 {
128 float bytes = 0.;
129
130 bytes += sizeof(CP9Bands_t);
131
132 /* following from AllocCP9Bands() */
133 bytes += sizeof(int) * (cp9b->cm_M+1); /* Jvalid */
134 bytes += sizeof(int) * (cp9b->cm_M+1); /* Lvalid */
135 bytes += sizeof(int) * (cp9b->cm_M+1); /* Rvalid */
136 bytes += sizeof(int) * (cp9b->cm_M+1); /* Tvalid */
137
138 bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_min_m */
139 bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_max_m */
140 bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_min_i */
141 bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_max_i */
142 bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_min_d */
143 bytes += sizeof(int) * (cp9b->hmm_M+1); /* pn_max_d */
144 bytes += sizeof(int) * (cp9b->hmm_M+1); /* isum_pn_m */
145 bytes += sizeof(int) * (cp9b->hmm_M+1); /* isum_pn_i */
146 bytes += sizeof(int) * (cp9b->hmm_M+1); /* isum_pn_d */
147
148 bytes += sizeof(int) * cp9b->cm_M; /* imin */
149 bytes += sizeof(int) * cp9b->cm_M; /* imax */
150 bytes += sizeof(int) * cp9b->cm_M; /* jmin */
151 bytes += sizeof(int) * cp9b->cm_M; /* jmax */
152 bytes += sizeof(int) * cp9b->cm_M; /* safe_hdmin */
153 bytes += sizeof(int) * cp9b->cm_M; /* safe_hdmax */
154 bytes += sizeof(int *) * cp9b->cm_M; /* hdmin */
155 bytes += sizeof(int *) * cp9b->cm_M; /* hdmax */
156
157 bytes += sizeof(int) * cp9b->hd_alloced; /* hdmin */
158 bytes += sizeof(int) * cp9b->hd_alloced; /* hdmax */
159
160 return bytes / 1000000.;
161 }
162
163 /* Function: FreeCP9Bands()
164 * Returns: (void)
165 */
166 void
FreeCP9Bands(CP9Bands_t * cp9bands)167 FreeCP9Bands(CP9Bands_t *cp9bands)
168 {
169 free(cp9bands->imin);
170 free(cp9bands->imax);
171 free(cp9bands->jmin);
172 free(cp9bands->jmax);
173 free(cp9bands->safe_hdmin);
174 free(cp9bands->safe_hdmax);
175 if(cp9bands->hdmin_mem != NULL)
176 free(cp9bands->hdmin_mem); /* all v were malloc'ed as a block */
177 if(cp9bands->hdmax_mem != NULL)
178 free(cp9bands->hdmax_mem); /* all v were malloc'ed as a block */
179 free(cp9bands->hdmin);
180 free(cp9bands->hdmax);
181
182 free(cp9bands->pn_min_m);
183 free(cp9bands->pn_max_m);
184 free(cp9bands->pn_min_i);
185 free(cp9bands->pn_max_i);
186 free(cp9bands->pn_min_d);
187 free(cp9bands->pn_max_d);
188 free(cp9bands->isum_pn_m);
189 free(cp9bands->isum_pn_i);
190 free(cp9bands->isum_pn_d);
191
192 free(cp9bands->Jvalid);
193 free(cp9bands->Lvalid);
194 free(cp9bands->Rvalid);
195 free(cp9bands->Tvalid);
196
197 free(cp9bands);
198 }
199
200 /* Function: cp9_Seq2Bands
201 * Date : EPN, Mon Jan 8 07:23:34 2007
202 * EPN, Wed Oct 17 04:53:58 2007 [updated/optimized]
203 *
204 * Purpose: Given a CM with precalc'ed CP9 HMM and CP9Map, a sequence and
205 * a CP9Bands_t structure, calculate the HMM bands and store them
206 * in the CP9Bands_t structure.
207 *
208 * Args: cm - the covariance model
209 * errbuf - char buffer for reporting errors
210 * fmx - CP9 dp matrix for Forward()
211 * bmx - CP9 dp matrix for Backward()
212 * pmx - CP9 dp matrix to fill with posteriors, can == bmx
213 * dsq - sequence in digitized form
214 * i0 - start of target subsequence (often 1, beginning of sq)
215 * j0 - end of target subsequence (often L, end of sq)
216 * cp9b - PRE-ALLOCATED, the HMM bands for this sequence, filled here.
217 * doing_search - TRUE if we're going to use these HMM bands for search, not alignment
218 * pass_idx - pipeline pass index, tells us which truncation modes to allow, if any
219 * debug_level - verbosity level for debugging printf()s
220 *
221 * Return: eslOK on success;
222 *
223 */
224 int
cp9_Seq2Bands(CM_t * cm,char * errbuf,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,ESL_DSQ * dsq,int i0,int j0,CP9Bands_t * cp9b,int doing_search,int pass_idx,int debug_level)225 cp9_Seq2Bands(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, CP9Bands_t *cp9b, int doing_search, int pass_idx, int debug_level)
226 {
227 int status;
228 int use_sums; /* TRUE to fill and use posterior sums during HMM band calc, yields wider bands */
229 float sc;
230 int do_old_hmm2ij;
231 int do_trunc; /* are we allowing truncated alignments (either L or R)? */
232 int do_fwd_scan; /* run Forward in scanning mode? (see long comment on this below by assignment of do_fwd_scan) */
233 int do_bck_scan; /* run Backward in scanning mode? (see long comment on this below by assignment of do_fwd_scan) */
234 CP9_t *cp9 = NULL; /* ptr to cp9 HMM (cm->cp9, cm->Lcp9, cm->Rcp9, or cm->Tcp9) we'll use for deriving bands */
235
236 /* Contract checks */
237 if(cm->cp9map == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, but cm->cp9map is NULL.\n");
238 if(dsq == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, dsq is NULL.");
239 if(i0 > j0) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, i0: %d > j0: %d\n", i0, j0);
240 if(cm->tau > 0.5) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, cm->tau (%f) > 0.5, we can't deal.", cm->tau);
241
242 use_sums = ((cm->align_opts & CM_ALIGN_SUMS) || (cm->search_opts & CM_SEARCH_SUMS)) ? TRUE : FALSE;
243 do_old_hmm2ij = ((cm->align_opts & CM_ALIGN_HMM2IJOLD) || (cm->search_opts & CM_SEARCH_HMM2IJOLD)) ? TRUE : FALSE;
244
245 /* Determine which cp9 HMM to use and whether or not we're doing
246 * truncated alignment, based on value of pass_idx.
247 */
248 switch(pass_idx) {
249 case PLI_PASS_5P_ONLY_FORCE: do_trunc = TRUE; cp9 = cm->Rcp9; break;
250 case PLI_PASS_3P_ONLY_FORCE: do_trunc = TRUE; cp9 = cm->Lcp9; break;
251 case PLI_PASS_5P_AND_3P_FORCE: do_trunc = TRUE; cp9 = cm->Tcp9; break;
252 case PLI_PASS_5P_AND_3P_ANY: do_trunc = TRUE; cp9 = cm->Tcp9; break;
253 default: do_trunc = FALSE; cp9 = cm->cp9; break;
254 }
255 if(cp9 == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Bands, relevant cp9 is NULL.\n");
256
257 /* Determine if we should do Forward and Backward in scan mode.
258 * When in scan mode, Forward will allow parses to start at any
259 * position, else they must start at i0 (first res).
260 * When in scan mode, Backward will allow parses to end at any
261 * position, else they must end at j0 (final res).
262 *
263 * We should only scan in Forward if i0 does not need to be in any
264 * eventual CM parsetree we derive using these bands. This is only
265 * true if we'll use these bands for a CM search
266 * (doing_search==TRUE) and that search won't be a special truncated
267 * search where i0 must be in any valid parsetree. We will be doing
268 * a truncated search enforcing i0 inclusion if pass_idx is either
269 * PLI_PASS_5P_AND_3P or PLI_PASS_5P_ONLY.
270 *
271 * Likewise, we should only scan in Backward if j0 does not need to
272 * be in any eventual CM parsetree we derive using these bands.
273 * This is only true if we'll use these bands for a CM search
274 * (doing_search==TRUE) and that search won't be a special truncated
275 * search where j0 must be in any valid parsetree. We will be doing
276 * a truncated search enforcing j0 inclusion if pass_idx is either
277 * PLI_PASS_5P_AND_3P or PLI_PASS_3P_ONLY.
278 */
279 if((! doing_search) || (cm->search_opts & CM_SEARCH_HMMALNBANDS)) {
280 do_fwd_scan = do_bck_scan = FALSE;
281 }
282 else {
283 do_fwd_scan = cm_pli_PassEnforcesFirstRes(pass_idx) ? FALSE : TRUE;
284 do_bck_scan = cm_pli_PassEnforcesFinalRes(pass_idx) ? FALSE : TRUE;
285 }
286
287 /* Step 1: Get HMM Forward/Backward DP matrices.
288 * Step 2: F/B -> HMM bands.
289 * Step 3: Calculate candidate states for truncated alignments
290 * Step 4: HMM bands -> CM bands.
291 */
292
293 /* Step 1: Get HMM Forward/Backward DP matrices. */
294 if((status = cp9_Forward(cp9, errbuf, fmx, dsq, i0, j0,
295 do_fwd_scan, /* allow parses to start at any posn? */
296 (! doing_search), /* are we going to use bands to align? */
297 FALSE, /* don't be memory efficient */
298 NULL, NULL,
299 &sc)) != eslOK) return status;
300
301 if((status = cp9_Backward(cp9, errbuf, bmx, dsq, i0, j0,
302 do_bck_scan, /* allow parses to end at any posn? */
303 (! doing_search), /* are we going to use posteriors to align? */
304 FALSE, /* don't be memory efficient */
305 NULL, NULL,
306 &sc)) != eslOK) return status;
307
308 if(cm->align_opts & CM_ALIGN_CHECKFB) {
309 if((status = cp9_CheckFB(fmx, bmx, cp9, errbuf, sc, i0, j0, dsq)) != eslOK) return status;
310 printf("Forward/Backward matrices checked.\n");
311 }
312
313 /* Step 2: F/B -> HMM bands. */
314 if(use_sums){
315 if((status = cp9_FB2HMMBandsWithSums(cp9, errbuf, dsq, fmx, bmx, pmx, cp9b, i0, j0, cp9b->hmm_M,
316 (1.-cm->tau), do_fwd_scan, do_bck_scan, do_old_hmm2ij, debug_level)) != eslOK) return status;
317 }
318 else {
319 if((status = cp9_FB2HMMBands(cp9, errbuf, dsq, fmx, bmx, pmx, cp9b, i0, j0, cp9b->hmm_M,
320 (1.-cm->tau), do_fwd_scan, do_bck_scan, do_old_hmm2ij, debug_level)) != eslOK) return status;
321 }
322 if(debug_level > 0) cp9_DebugPrintHMMBands(stdout, j0, cp9b, cm->tau, 1);
323 cp9b->tau = cm->tau;
324
325 /* Step 3: (only if truncated alignments are possible)
326 * Calculate occupancy and candidate states for marginal alignments
327 */
328 if(do_trunc) {
329 cp9_PredictStartAndEndPositions(pmx, cp9b, i0, j0);
330 if((status = cp9_MarginalCandidatesFromStartEndPositions(cm, cp9b, pass_idx, errbuf)) != eslOK) return status;
331 /* xref: ELN2 notebook, p.146-147; ~nawrockie/notebook/11_0816_inf_banded_trcyk/00LOG */
332 }
333 else {
334 /* reset all Jvalid values to TRUE */
335 esl_vec_ISet(cp9b->Jvalid, cm->M+1, TRUE);
336 /* and all {L,R,T}valid values to FALSE */
337 esl_vec_ISet(cp9b->Lvalid, cm->M+1, FALSE);
338 esl_vec_ISet(cp9b->Rvalid, cm->M+1, FALSE);
339 esl_vec_ISet(cp9b->Tvalid, cm->M+1, FALSE);
340 }
341
342 /* Step 4: HMM bands -> CM bands. */
343 if(do_old_hmm2ij) {
344 if((status = cp9_HMM2ijBands_OLD(cm, errbuf, cp9b, cm->cp9map, i0, j0, doing_search, debug_level)) != eslOK) return status;
345 }
346 else {
347 if((status = cp9_HMM2ijBands(cm, errbuf, cp9, cp9b, cm->cp9map, i0, j0, doing_search, do_trunc, debug_level)) != eslOK) return status;
348 }
349
350 /* Use the CM bands on i and j to get bands on d, specific to j. */
351 /* cp9_GrowHDBands() must be called before ij2d_bands() so hdmin, hdmax are adjusted for new seq */
352 if((status = cp9_GrowHDBands(cp9b, errbuf)) != eslOK) return status;
353 ij2d_bands(cm, (j0-i0+1), cp9b->imin, cp9b->imax, cp9b->jmin, cp9b->jmax, cp9b->hdmin, cp9b->hdmax, do_trunc, debug_level);
354
355 #if eslDEBUGLEVEL >= 1
356 if((status = cp9_ValidateBands(cm, errbuf, cp9b, i0, j0, do_trunc)) != eslOK) return status;
357 ESL_DPRINTF1(("#DEBUG: bands validated.\n"));
358 #endif
359 if(debug_level > 0) debug_print_ij_bands(cm);
360 if(debug_level > 0) PrintDPCellsSaved_jd(cm, cp9b->jmin, cp9b->jmax, cp9b->hdmin, cp9b->hdmax, (j0-i0+1));
361
362 return eslOK;
363 }
364
365 /* Function: cp9_IterateSeq2Bands()
366 * Incept: EPN, Thu Mar 1 17:56:42 2012
367 *
368 * Purpose: Increase cm->tau (tighten HMM bands) by multiplying it
369 * by TAU_MULTIPLIER (2.0) until required HMM banded matrix
370 * size is below <size_limit> Mb, or cm->tau is greater than
371 * <maxtau>.
372 *
373 * If we're doing a truncated alignment (which we can figure
374 * out based on the value of <pass_idx>) then we also increase
375 * cp9b->thresh1 and decrease cp9b->thresh2 by a hard-coded
376 * value into the maximum/minimum is reached for them as
377 * wel..
378 *
379 * Since we can't determine the required size of a HB
380 * matrix unless we have filled a CP9Bands_t object
381 * (cm->cp9b), we need to recalculate bands each time tau,
382 * (and possibly thresh1 and thresh2) are modified and then
383 * check size of resulting matrix given the bands.
384 *
385 * Upon returning cm->tau, cm->cp9b->tau, cm->cp9b->thresh1
386 * and cm->cp9b->thresh2 may have been changed.
387 *
388 * Args cm - the CM
389 * errbuf - for error messages
390 * dsq - sequence we're aligning
391 * i0 - first position in dsq to align (usually 1)
392 * j0 - final position in dsq to align (usually sq->n)
393 * pass_idx - pipeline pass index
394 * size_limit - max allowed size of an HB mx, in Mb
395 * doing_search - TRUE if we're going to use these HMM bands for search, not alignment
396 * do_sample - TRUE if bands will eventually be used for sampling a parsetree
397 * do_post - TRUE if bands will eventually be used for posterior alignment
398 * do_iterate - TRUE to attempt to iteratively tighten bands until matrix is small enough
399 * maxtau - max value allowed for cm->tau
400 * xtau - we multiply tau by this at each iteration (must be > 1.1)
401 * ret_Mb - RETURN: required Mb for HB mx for cm->tau upon exit.
402 *
403 * Returns: <eslOK> on success.
404 * <eslERANGE> if required matrix size is > <size_limit>,
405 * for cm->tau = maxtau.
406 * A different error code upon an error, errbuf is filled.
407 */
408 int
cp9_IterateSeq2Bands(CM_t * cm,char * errbuf,ESL_DSQ * dsq,int64_t i0,int64_t j0,int pass_idx,float size_limit,int doing_search,int do_sample,int do_post,int do_iterate,double maxtau,float * ret_Mb)409 cp9_IterateSeq2Bands(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int64_t i0, int64_t j0, int pass_idx, float size_limit, int doing_search, int do_sample, int do_post, int do_iterate, double maxtau, float *ret_Mb)
410 {
411 int status;
412 int do_trunc = cm_pli_PassAllowsTruncation(pass_idx);
413 float hbmx_Mb; /* approximate size in Mb required for HMM banded matrix */
414 int tau_at_limit = FALSE;
415 int thresh1_at_limit = (do_trunc) ? FALSE : TRUE;
416 int thresh2_at_limit = (do_trunc) ? FALSE : TRUE;
417
418 while(1) {
419 if((status = cp9_Seq2Bands(cm, errbuf, cm->cp9_mx, cm->cp9_bmx, cm->cp9_bmx, dsq, i0, j0, cm->cp9b, doing_search, pass_idx, 0)) != eslOK) goto ERROR;
420 if(doing_search) {
421 if(do_trunc) { if((status = cm_tr_hb_mx_SizeNeeded(cm, errbuf, cm->cp9b, j0-i0+1, NULL, NULL, NULL, NULL, &hbmx_Mb)) != eslOK) goto ERROR; }
422 else { if((status = cm_hb_mx_SizeNeeded (cm, errbuf, cm->cp9b, j0-i0+1, NULL, &hbmx_Mb)) != eslOK) goto ERROR; }
423 }
424 else {
425 if(do_trunc) { status = cm_TrAlignSizeNeededHB(cm, errbuf, j0-i0+1, size_limit, do_sample, do_post, NULL, NULL, NULL, &hbmx_Mb); }
426 else { status = cm_AlignSizeNeededHB (cm, errbuf, j0-i0+1, size_limit, do_sample, do_post, NULL, NULL, NULL, &hbmx_Mb); }
427 if(status != eslOK && status != eslERANGE) return status;
428 }
429 /*printf("cm->tau: %10.2g thresh1: %4.2f thresh2: %4.2f mxsize: %.2f\n", cm->tau, cm->cp9b->thresh1, cm->cp9b->thresh2, hbmx_Mb);*/
430 /* check if we can stop iterating, three ways we can
431 * case 1: matrix is now smaller than our limit.
432 * case 2: do_iterate == FALSE
433 * case 3: do_trunc == FALSE && tau has reached its limit
434 * case 4: do_trunc == TRUE && tau, thresh1 and thresh have all reached their limits
435 */
436 if(hbmx_Mb < size_limit) {
437 break; /* our matrix will be small enough, break out of while(1) */
438 }
439 if(! do_iterate) {
440 break; /* do_iterate is FALSE */
441 }
442 if(tau_at_limit && thresh1_at_limit && thresh2_at_limit) { /* if do_trunc is FALSE, thresh{1,2}_at_limit were init'ed as TRUE */
443 break; /* tau, thresh1 and thresh2 have all reached their limits, break out of while (1) */
444 }
445 if(! tau_at_limit) {
446 cm->tau *= TAU_MULTIPLIER;
447 if(cm->tau >= maxtau) { cm->tau = maxtau; tau_at_limit = TRUE; }
448 }
449 if(! thresh1_at_limit) {
450 cm->cp9b->thresh1 += DELTA_CP9BANDS_THRESH1;
451 if(cm->cp9b->thresh1 >= MAX_CP9BANDS_THRESH1) { cm->cp9b->thresh1 = MAX_CP9BANDS_THRESH1; thresh1_at_limit = TRUE; }
452 }
453 if(! thresh2_at_limit) {
454 cm->cp9b->thresh2 -= DELTA_CP9BANDS_THRESH2;
455 if(cm->cp9b->thresh2 <= MIN_CP9BANDS_THRESH2) { cm->cp9b->thresh2 = MIN_CP9BANDS_THRESH2; thresh2_at_limit = TRUE; }
456 }
457 }
458
459 if(ret_Mb != NULL) *ret_Mb = hbmx_Mb;
460
461 if(hbmx_Mb > size_limit) return eslERANGE;
462
463 return eslOK;
464
465 ERROR:
466 if(ret_Mb != NULL) *ret_Mb = 0.;
467 return status;
468 }
469
470 /* Function: cp9_Seq2Posteriors
471 * Date : EPN, Mon Jan 8 07:27:21 2007
472 *
473 * Purpose: Given a CM with precalc'ed CP9 HMM and CP9Map, and a sequence,
474 * run HMM Forward and Backward algorithms, and return a CP9 posterior
475 * matrix.
476 *
477 * Note: this function was never updated to handle
478 * truncated alignment (b/c it's no longer hooked up
479 * to any of the Infernal applications).
480 *
481 * Args: cm - the covariance model
482 * errbuf - char buffer for error messages
483 * fmx - CP9 dp matrix for Forward()
484 * bmx - CP9 dp matrix for Backward()
485 * pmx - CP9 dp matrix to fill with posteriors, can == bmx
486 * dsq - sequence in digitized form
487 * i0 - start of target subsequence (often 1, beginning of dsq)
488 * j0 - end of target subsequence (often L, end of dsq)
489 * debug_level - verbosity level for debugging printf()s
490 *
491 * Return: eslOK on success
492 */
493 int
cp9_Seq2Posteriors(CM_t * cm,char * errbuf,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,ESL_DSQ * dsq,int i0,int j0,int debug_level)494 cp9_Seq2Posteriors(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, int debug_level)
495 {
496 int status;
497 float sc;
498 CP9_t *cp9 = NULL; /* ptr to cp9 HMM (this could be Lcp9, Rcp9, Tcp9 if we update this function to possibly handle truncated alignment) */
499
500 /* Contract checks */
501 if(dsq == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors(), dsq is NULL.");
502 if(cm->cp9 == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors, but cm->cp9 is NULL.\n");
503 if(cm->cp9map == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors, but cm->cp9map is NULL.\n");
504 if((cm->search_opts & CM_SEARCH_HMMALNBANDS) && (! (cm->search_opts & CM_SEARCH_HBANDED)))
505 ESL_FAIL(eslEINCOMPAT, errbuf, "in cp9_Seq2Posteriors, CM_SEARCH_HMMALNBANDS flag raised, but not CM_SEARCH_HBANDED flag, this doesn't make sense\n");
506
507 cp9 = cm->cp9;
508 if(cp9 == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_Seq2Posteriors, relevant cp9 is NULL.\n");
509
510 /* Step 1: Get HMM posteriors.*/
511 if((status = cp9_Forward(cp9, errbuf, fmx, dsq, i0, j0,
512 FALSE, /* don't use scanning Forward/Backward */
513 TRUE, /* we are going to use posteriors to align */
514 FALSE, /* don't be memory efficient */
515 NULL, NULL,
516 &sc)) != eslOK) return status;
517 if(debug_level > 0) printf("CP9 Forward score : %.4f\n", sc);
518 if((status = cp9_Backward(cp9, errbuf, bmx, dsq, i0, j0,
519 FALSE, /* don't use scanning Forward/Backward */
520 TRUE, /* we are going to use posteriors to align */
521 FALSE, /* don't be memory efficient */
522 NULL, NULL,
523 &sc)) != eslOK) return status;
524 if(debug_level > 0) printf("CP9 Backward score : %.4f\n", sc);
525
526 if(cm->align_opts & CM_ALIGN_CHECKFB) {
527 if((status = cp9_CheckFB(fmx, bmx, cp9, errbuf, sc, i0, j0, dsq)) != eslOK) return status;
528 printf("Forward/Backward matrices checked.\n");
529 }
530
531 /* Get posteriors */
532 cp9_Posterior(dsq, i0, j0, cp9, fmx, bmx, pmx, FALSE);
533
534 return eslOK;
535 }
536
537
538 /* Function: cp9_FB2HMMBands()
539 * Date: EPN, 04.03.06
540 * EPN, Mon Oct 15 18:20:42 2007 [updated/optimized]
541 *
542 * Purpose: Determine the band on all HMM states given a Forward and
543 * Backward matrix. Do this by calculating and summing log posterior
544 * probabilities that each state emitted/was visited at each posn,
545 * starting at the sequence ends, and creeping in, until the half the
546 * maximum allowable probability excluded is reached on each side.
547 *
548 * Args:
549 *
550 * CP9_t hmm the HMM
551 * errbuf char buffer for error messages
552 * CP9_MX fmx: forward DP matrix, already calc'ed
553 * CP9_MX bmx: backward DP matrix, already calc'ed
554 * CP9_MX pmx: DP matrix for posteriors, filled here, can == bmx
555 * dsq the digitized sequence
556 * CP9Bands_t cp9b CP9 bands data structure
557 * int i0 start of target subsequence (often 1, beginning of dsq)
558 * int j0 end of target subsequence (often L, end of dsq)
559 * int M number of nodes in HMM (num columns of pmx matrix)
560 * double p_thresh the probability mass we're requiring is within each band
561 * int did_fwd_scan TRUE if Forward was run in 'scan mode' (parses could start anywhere)
562 * int did_bck_scan TRUE if Backward was run in 'scan mode' (parses could end anywhere)
563 * int do_old_hmm2ij TRUE if we'll use old cp9_HMM2ijBands_OLD() function downstream
564 * int debug_level [0..3] tells the function what level of debugging print
565 * statements to print.
566 *
567 * Returns: eslOK on success;
568 */
569 int
cp9_FB2HMMBands(CP9_t * hmm,char * errbuf,ESL_DSQ * dsq,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,CP9Bands_t * cp9b,int i0,int j0,int M,double p_thresh,int did_fwd_scan,int did_bck_scan,int do_old_hmm2ij,int debug_level)570 cp9_FB2HMMBands(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
571 int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level)
572 {
573 int status;
574 int k; /* counter over nodes of the model */
575 int L = j0-i0+1; /* length of sequence */
576 int thresh = Prob2Score(((1. - p_thresh)/2.), 1.); /* allowable prob mass excluded on each side */
577 int max; /* temporary max value */
578 int pnmax; /* position that gives max */
579
580 /* *_m = match, *_i = insert, *_d = delete */
581 int *kthresh_m, *kthresh_i, *kthresh_d; /* [0..k..hmm->M], individual thresholds for each state */
582 int *nset_m, *nset_i, *nset_d; /* [0..k..hmm->M], has minimum been set for this state? */
583 int *xset_m, *xset_i, *xset_d; /* [0..k..hmm->M], has maximum been set for this state? */
584 int *mass_m, *mass_i, *mass_d; /* [0..k..hmm->M], summed log prob of pmx->mx[i][k] from 0..k or k..L */
585 int i, ip; /* actual position and relative position in sequence, ip = i-i0+1 */
586 int sc; /* summed score of all parses (derived from backward matrix)
587 * if(cm->search_opts & CM_SEARCH_HMMALNBANDS) Forward and Backward
588 * were run in 'scan mode' where each residue can be begin/end of a parse,
589 * so we have to sum up parses that end at each posn,
590 * if ! (cm->search_opts & CM_SEARCH_HMMALNBANDS) we know we have
591 * to start at residue i0 and end at residue j0, so sc is simply bmx->mmx[0][0]
592 */
593 int hmm_is_localized; /* TRUE if HMM has local begins, ends or ELs on */
594 hmm_is_localized = ((hmm->flags & CPLAN9_LOCAL_BEGIN) || (hmm->flags & CPLAN9_LOCAL_END) || (hmm->flags & CPLAN9_EL)) ? TRUE : FALSE;
595
596 if(bmx != pmx) GrowCP9Matrix(pmx, errbuf, L, M, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
597
598 /* allocations and initializations */
599 ESL_ALLOC(nset_m, sizeof(int) * (M+1));
600 ESL_ALLOC(nset_i, sizeof(int) * (M+1));
601 ESL_ALLOC(nset_d, sizeof(int) * (M+1));
602 ESL_ALLOC(xset_m, sizeof(int) * (M+1));
603 ESL_ALLOC(xset_i, sizeof(int) * (M+1));
604 ESL_ALLOC(xset_d, sizeof(int) * (M+1));
605 ESL_ALLOC(mass_m, sizeof(int) * (M+1));
606 ESL_ALLOC(mass_i, sizeof(int) * (M+1));
607 ESL_ALLOC(mass_d, sizeof(int) * (M+1));
608 ESL_ALLOC(kthresh_m, sizeof(int) * (M+1));
609 ESL_ALLOC(kthresh_i, sizeof(int) * (M+1));
610 ESL_ALLOC(kthresh_d, sizeof(int) * (M+1));
611
612 esl_vec_ISet(mass_m, M+1, -INFTY);
613 esl_vec_ISet(mass_i, M+1, -INFTY);
614 esl_vec_ISet(mass_d, M+1, -INFTY);
615 esl_vec_ISet(nset_m, M+1, FALSE);
616 esl_vec_ISet(nset_i, M+1, FALSE);
617 esl_vec_ISet(nset_d, M+1, FALSE);
618 esl_vec_ISet(xset_m, M+1, FALSE);
619 esl_vec_ISet(xset_i, M+1, FALSE);
620 esl_vec_ISet(xset_d, M+1, FALSE);
621
622 if(did_fwd_scan) { /* parses were allowed to begin anywhere */
623 sc = -INFTY;
624 for (ip = 0; ip <= L; ip++) {
625 /*printf("bmx->mmx[i:%d][0]: %d\n", ip+i0-1, bmx->mmx[ip][0]); */
626 sc = ILogsum(sc, (bmx->mmx[ip][0]));
627 }
628 }
629 else sc = bmx->mmx[0][0]; /* Forward/Backward run in 'align mode' parses must start at i0, end at j0 */
630 /* sc is summed log prob of all possible parses of seq i0..j0 */
631
632 /* note boundary conditions, ip = 0, i = i0-1 */
633 pmx->mmx[0][0] = fmx->mmx[0][0] + bmx->mmx[0][0] - sc; /* fmx->mmx[0][0] is 0, bmx->mmx[0][0] is overall score */
634 pmx->imx[0][0] = -INFTY; /*need seq to get here*/
635 pmx->dmx[0][0] = -INFTY; /*D_0 does not exist*/
636 if((mass_m[0] = pmx->mmx[0][0]) > thresh) {
637 cp9b->pn_min_m[0] = ESL_MAX(i0-1, 0);
638 nset_m[0] = TRUE;
639 }
640 mass_i[0] = -INFTY; /* b/c pmx->imx[0][0] is -INFTY, set above */
641 mass_d[0] = -INFTY; /* b/c pmx->dmx[0][0] is -INFTY, set above */
642
643 for (k = 1; k <= M; k++) {
644 pmx->mmx[0][k] = -INFTY; /*need seq to get here*/
645 pmx->imx[0][k] = -INFTY; /*need seq to get here*/
646 pmx->dmx[0][k] = fmx->dmx[0][k] + bmx->dmx[0][k] - sc;
647 /* mass_m[k] doesn't change b/c pmx->mmx[0][k] is -INFTY */
648 /* mass_i[k] doesn't change b/c pmx->imx[0][k] is -INFTY */
649 if((mass_d[k] = pmx->dmx[0][k]) > thresh) {
650 cp9b->pn_min_d[k] = ESL_MAX(i0-1, 0);
651 nset_d[k] = TRUE;
652 }
653 }
654
655 /* Find minimum position in band for each state (M,I,D) of each node (0..M) */
656 for (ip = 1; ip <= L; ip++) /* ip is the relative position in the seq */
657 {
658 i = i0+ip-1; /* e.g. i is actual index in dsq, runs from i0 to j0 */
659 k = 0;
660 /* new block EPN, Wed Feb 13 11:58:52 2008 */
661 pmx->mmx[ip][0] = ESL_MAX(fmx->mmx[ip][0] + bmx->mmx[ip][0] - sc, -INFTY); /* M_0 doesn't emit */
662 if(! nset_m[0]) {
663 if((mass_m[0] = ILogsum(mass_m[0], pmx->mmx[ip][0])) > thresh) {
664 cp9b->pn_min_m[0] = i;
665 nset_m[0] = TRUE;
666 }
667 }
668 /* end of new block, old line used to be: pmx->mmx[ip][0] = -INFTY; */
669
670 pmx->imx[ip][0] = ESL_MAX(fmx->imx[ip][0] + bmx->imx[ip][0] - hmm->isc[dsq[i]][0] - sc, -INFTY);
671 /*hmm->isc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
672 if(! nset_i[0]) {
673 if((mass_i[0] = ILogsum(mass_i[0], pmx->imx[ip][0])) > thresh) {
674 cp9b->pn_min_i[0] = i;
675 nset_i[0] = TRUE;
676 }
677 }
678 pmx->dmx[ip][0] = -INFTY; /* D_0 doesn't exist */
679
680 for(k = 1; k <= M; k++)
681 {
682 pmx->mmx[ip][k] = ESL_MAX(fmx->mmx[ip][k] + bmx->mmx[ip][k] - hmm->msc[dsq[i]][k] - sc, -INFTY);
683 /*hmm->msc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
684 pmx->imx[ip][k] = ESL_MAX(fmx->imx[ip][k] + bmx->imx[ip][k] - hmm->isc[dsq[i]][k] - sc, -INFTY);
685 /*hmm->isc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
686 pmx->dmx[ip][k] = ESL_MAX(fmx->dmx[ip][k] + bmx->dmx[ip][k] - sc, -INFTY);
687
688 if(! nset_m[k]) {
689 if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > thresh) {
690 cp9b->pn_min_m[k] = i;
691 nset_m[k] = TRUE;
692 }
693 }
694 if(! nset_i[k]) {
695 if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > thresh) {
696 cp9b->pn_min_i[k] = i;
697 nset_i[k] = TRUE;
698 }
699 }
700 if(! nset_d[k]) {
701 if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > thresh) {
702 cp9b->pn_min_d[k] = i;
703 nset_d[k] = TRUE;
704 }
705 }
706 }
707 }
708 esl_vec_ISet(mass_m, M+1, -INFTY);
709 esl_vec_ISet(mass_i, M+1, -INFTY);
710 esl_vec_ISet(mass_d, M+1, -INFTY);
711 /* Find maximum position in band for each state (M,I,D) of each node (0..M)
712 * by moving from L down to 1 */
713 for (ip = L; ip >= 1; ip--) /* ip is the relative position in the seq */
714 {
715 i = i0+ip-1; /* e.g. i is actual index in dsq, runs from i0 to j0 */
716 for(k = 0; k <= M; k++)
717 {
718 if(! xset_m[k]) {
719 if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > thresh) {
720 cp9b->pn_max_m[k] = i;
721 xset_m[k] = TRUE;
722 }
723 }
724 if(! xset_i[k]) {
725 if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > thresh) {
726 cp9b->pn_max_i[k] = i;
727 xset_i[k] = TRUE;
728 }
729 }
730 if(! xset_d[k]) {
731 if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > thresh) {
732 cp9b->pn_max_d[k] = i;
733 xset_d[k] = TRUE;
734 }
735 }
736 }
737 }
738 ip = 0;
739 i = i0-1;
740 /* note boundary conditions, ip = 0, i = i0-1 */
741 if(! xset_m[0]) {
742 if((mass_m[0] = ILogsum(mass_m[0], pmx->mmx[0][0])) > thresh) {
743 cp9b->pn_max_m[0] = ESL_MAX(i0-1, 0);
744 xset_m[0] = TRUE;
745 }
746 }
747 /* mass_i[0] is unchanged because b/c pmx->imx[0][0] is -INFTY, set above */
748 /* mass_d[0] is unchanged because b/c pmx->dmx[0][0] is -INFTY, set above */
749 for (k = 1; k <= M; k++) {
750 /* mass_m[k] doesn't change b/c pmx->mmx[0][k] is -INFTY */
751 /* mass_i[k] doesn't change b/c pmx->mmx[0][k] is -INFTY */
752 if(!xset_d[k]) {
753 if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[0][k])) > thresh) {
754 cp9b->pn_max_d[k] = ESL_MAX(i0-1, 0);
755 xset_d[k] = TRUE;
756 }
757 }
758 }
759
760 if(! do_old_hmm2ij) {
761 /* new way as of EPN, Sun Jan 27 08:48:34 2008 */
762 /* Some states may not have had their min/max set. This occurs if the entire
763 * state is outside the band (i.e. the summed probablity the state is entered for ANY i
764 * is less than our threshold. Current strategy in this situation is to set the
765 * pn_min_* and pn_max_* values as special flags, (-2) so the function that
766 * uses them to derive i and j bands knows this is the case and handles it
767 * accordingly.
768 */
769 int mset;
770 int dset;
771 for(k = 0; k <= M; k++)
772 {
773 mset = dset = TRUE;
774 /* theoretically either nset_*[k] and xset_*[k] should be either both TRUE or both
775 * FALSE, but I'm slightly worried about rare precision issues, so we check if one
776 * or the other is unset, and if so, we set both to argmax position */
777 if(((! nset_m[k])) || (! xset_m[k]) || (cp9b->pn_max_m[k] < cp9b->pn_min_m[k])) {
778 cp9b->pn_min_m[k] = cp9b->pn_max_m[k] = -1;
779 mset = FALSE;
780 }
781 if(((! nset_i[k])) || (! xset_i[k]) || (cp9b->pn_max_i[k] < cp9b->pn_min_i[k])) {
782 cp9b->pn_min_i[k] = cp9b->pn_max_i[k] = -1;
783 }
784 if(((! nset_d[k])) || (! xset_d[k]) || (cp9b->pn_max_d[k] < cp9b->pn_min_d[k])) {
785 cp9b->pn_min_d[k] = cp9b->pn_max_d[k] = -1;
786 dset = FALSE;
787 }
788 if((!hmm_is_localized && !did_fwd_scan && !did_bck_scan) && (mset == FALSE && dset == FALSE)) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "node: %d match nor delete HMM state bands were set in non-localized, non-scanning HMM, lower tau (should be << 0.5).\n", k);
789 }
790 }
791 else {
792 /* old way, prior to Sun Jan 27 08:46:16 2008 */
793 /* Some states may not have had their min/max set. This occurs if the entire
794 * state is outside the band (i.e. the summed probablity the state is entered for ANY i
795 * is less than our threshold. Current strategy in this situation is to set the
796 * band to width 1 of the most likely position for that state, but to do that we
797 * need to find what the most likely posn is, we could do this in the loop above,
798 * but this is a rare situation, and so that turns out to be wasteful.
799 *
800 * Note: the off-by-one issue mentioned below is dealt with differently with the
801 * new code, when we're setting i and j CM bands using the HMM bands.
802 */
803 for(k = 0; k <= M; k++)
804 {
805 /* comment *: off-by-one issue with non-emitters (includes all D states and M_0):
806 * pn_min_d[k] = i, means posn i was last residue emitted
807 * prior to entering node k's delete state. However, for a CM,
808 * if a delete states sub-parsetree is bounded by i' and j', then
809 * positions i' and j' HAVE YET TO BE EMITTED.
810 * For M_0, so we don't have to check each node to see if k == 0, we
811 * do the off-by-one correction at the end of the function.
812 */
813 if(k != 0) {
814 if(cp9b->pn_min_d[k] != -1) cp9b->pn_min_d[k]++;
815 if(cp9b->pn_min_d[k] != -1) cp9b->pn_max_d[k]++;
816 }
817 /* theoretically either nset_*[k] and xset_*[k] should be either both TRUE or both
818 * FALSE, but I'm slightly worried about rare precision issues, so we check if one
819 * or the other is unset, and if so, we set both to argmax position */
820 if((! nset_m[k]) || (! xset_m[k])) {
821 max = pmx->mmx[0][k];
822 for(ip = 1; ip <= L; ip++)
823 if(pmx->mmx[ip][k] > max) { pnmax = i0+ip-1; max = pmx->mmx[ip][k]; } /* i = i0+ip-1 */
824 cp9b->pn_min_m[k] = cp9b->pn_max_m[k] = pnmax;
825 }
826 if((! nset_i[k]) || (! xset_i[k])) {
827 max = pmx->imx[0][k];
828 for(ip = 1; ip <= L; ip++)
829 if(pmx->imx[ip][k] > max) { pnmax = i0+ip-1; max = pmx->imx[ip][k]; } /* i = i0+ip-1 */
830 cp9b->pn_min_i[k] = cp9b->pn_max_i[k] = pnmax;
831 }
832 if((! nset_d[k]) || (! xset_d[k])) {
833 max = pmx->dmx[0][k];
834 for(ip = 1; ip <= L; ip++)
835 if(pmx->dmx[ip][k] > max) { pnmax = i0+ip-1; max = pmx->dmx[ip][k]; } /* i = i0+ip-1 */
836 cp9b->pn_min_d[k] = cp9b->pn_max_d[k] = pnmax;
837 }
838 }
839 cp9b->pn_min_m[0]++; /* non emitter */
840 cp9b->pn_max_m[0]++; /* non emitter */
841 }
842
843 cp9b->pn_min_d[0] = -1; /* D_0 doesn't exist */
844 cp9b->pn_max_d[0] = -1; /* D_0 doesn't exist */
845
846 if(debug_level > 0) cp9_DebugPrintHMMBands(stdout, j0, cp9b, (1.-p_thresh), 1);
847
848 free(mass_m);
849 free(mass_i);
850 free(mass_d);
851 free(nset_m);
852 free(nset_i);
853 free(nset_d);
854 free(xset_m);
855 free(xset_i);
856 free(xset_d);
857 free(kthresh_m);
858 free(kthresh_i);
859 free(kthresh_d);
860
861 return eslOK;
862
863 ERROR:
864 ESL_FAIL(status, errbuf, "Memory allocation error.\n");
865 }
866
867
868 /* Function: cp9_FB2HMMBandsWithSums()
869 * Date: EPN, Wed Oct 17 10:22:44 2007
870 *
871 * Purpose: Determine the band on all HMM states given a Forward and
872 * Backward matrix. Do this by calculating and summing log posterior
873 * probabilities that each state emitted/was visited at each posn,
874 * starting at the sequence ends, and creeping in, until the half the
875 * maximum allowable probability excluded is reached on each side.
876 *
877 * CP9_t hmm the HMM
878 * errbuf char buffer for error messages
879 * CP9_MX fmx: forward DP matrix, already calc'ed
880 * CP9_MX bmx: backward DP matrix, already calc'ed
881 * CP9_MX pmx: DP matrix for posteriors, filled here, can == bmx
882 * dsq the digitized sequence
883 * CP9Bands_t cp9b CP9 bands data structure
884 * int i0 start of target subsequence (often 1, beginning of dsq)
885 * int j0 end of target subsequence (often L, end of dsq)
886 * int M number of nodes in HMM (num columns of post matrix)
887 * double p_thresh the probability mass we're requiring is within each band
888 * int did_fwd_scan TRUE if Forward was run in 'scan mode' (parses could start at any posn)
889 * int did_bck_scan TRUE if Backward was run in 'scan mode' (parses could end at any posn)
890 * int do_old_hmm2ij TRUE if we'll use old cp9_HMM2ijBands_OLD() function downstream
891 * int debug_level [0..3] tells the function what level of debugging print
892 * statements to print.
893 *
894 * Returns: eslOK on success;
895 */
896 int
cp9_FB2HMMBandsWithSums(CP9_t * hmm,char * errbuf,ESL_DSQ * dsq,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * pmx,CP9Bands_t * cp9b,int i0,int j0,int M,double p_thresh,int did_fwd_scan,int did_bck_scan,int do_old_hmm2ij,int debug_level)897 cp9_FB2HMMBandsWithSums(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b,
898 int i0, int j0, int M, double p_thresh, int did_fwd_scan, int did_bck_scan, int do_old_hmm2ij, int debug_level)
899 {
900 int status;
901 int k; /* counter over nodes of the model */
902 int L = j0-i0+1; /* length of sequence */
903 int thresh = Prob2Score(((1. - p_thresh)/2.), 1.); /* allowable prob mass excluded on each side */
904
905 /* *_m = match, *_i = insert, *_d = delete */
906 int i, ip; /* actual position and relative position in sequence, ip = i-i0+1 */
907 int *kthresh_m, *kthresh_i, *kthresh_d; /* [0..k..hmm->M], individual thresholds for each state */
908 int *nset_m, *nset_i, *nset_d; /* [0..k..hmm->M], has minimum been set for this state? */
909 int *xset_m, *xset_i, *xset_d; /* [0..k..hmm->M], has maximum been set for this state? */
910 int *mass_m, *mass_i, *mass_d; /* [0..k..hmm->M], summed log prob of pmx->mx[i][k] from 0..k or k..L */
911
912 if(bmx != pmx) GrowCP9Matrix(pmx, errbuf, L, M, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
913
914 /* allocations and initializations */
915 ESL_ALLOC(nset_m, sizeof(int) * (M+1));
916 ESL_ALLOC(nset_i, sizeof(int) * (M+1));
917 ESL_ALLOC(nset_d, sizeof(int) * (M+1));
918 ESL_ALLOC(xset_m, sizeof(int) * (M+1));
919 ESL_ALLOC(xset_i, sizeof(int) * (M+1));
920 ESL_ALLOC(xset_d, sizeof(int) * (M+1));
921 ESL_ALLOC(mass_m, sizeof(int) * (M+1));
922 ESL_ALLOC(mass_i, sizeof(int) * (M+1));
923 ESL_ALLOC(mass_d, sizeof(int) * (M+1));
924 ESL_ALLOC(kthresh_m, sizeof(int) * (M+1));
925 ESL_ALLOC(kthresh_i, sizeof(int) * (M+1));
926 ESL_ALLOC(kthresh_d, sizeof(int) * (M+1));
927
928 esl_vec_ISet(mass_m, M+1, -INFTY);
929 esl_vec_ISet(mass_i, M+1, -INFTY);
930 esl_vec_ISet(mass_d, M+1, -INFTY);
931 esl_vec_ISet(nset_m, M+1, FALSE);
932 esl_vec_ISet(nset_i, M+1, FALSE);
933 esl_vec_ISet(nset_d, M+1, FALSE);
934 esl_vec_ISet(xset_m, M+1, FALSE);
935 esl_vec_ISet(xset_i, M+1, FALSE);
936 esl_vec_ISet(xset_d, M+1, FALSE);
937
938 /* get the posterior matrix first, we need it b/c each state will have a different log prob threshold */
939 cp9_Posterior(dsq, i0, j0, hmm, fmx, bmx, pmx, did_fwd_scan);
940
941 /* fill ipost_sums in cp9bands data structure */
942 cp9_IFillPostSums(pmx, cp9b, i0, j0);
943
944 /* set state dependent cutoff thresholds for log prob mass we need on each side (this is unique to
945 * WithSums() function */
946 for(k = 0; k <= M; k++) {
947 kthresh_m[k] = thresh + cp9b->isum_pn_m[k];
948 kthresh_i[k] = thresh + cp9b->isum_pn_i[k];
949 kthresh_d[k] = thresh + cp9b->isum_pn_d[k];
950 }
951
952 /* Find minimum position in band for each state (M,I,D) of each node (0..M) */
953 for (ip = 0; ip <= L; ip++) /* ip is the relative position in the seq */
954 {
955 i = i0+ip-1; /* e.g. i is actual index in dsq, runs from i0 to j0 */
956 for(k = 0; k <= M; k++)
957 {
958 if(! nset_m[k]) {
959 if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > kthresh_m[k]) {
960 cp9b->pn_min_m[k] = i;
961 nset_m[k] = TRUE;
962 }
963 }
964 if(! nset_i[k]) {
965 if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > kthresh_i[k]) {
966 cp9b->pn_min_i[k] = i;
967 nset_i[k] = TRUE;
968 }
969 }
970 if(! nset_d[k]) {
971 if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > kthresh_d[k]) {
972 cp9b->pn_min_d[k] = i;
973 nset_d[k] = TRUE;
974 }
975 }
976 }
977 }
978 /* Find maximum position in band for each state (M,I,D) of each node (0..M)
979 * by moving from L down to 0 */
980 /* reset mass_* arrays */
981 esl_vec_ISet(mass_m, M+1, -INFTY);
982 esl_vec_ISet(mass_i, M+1, -INFTY);
983 esl_vec_ISet(mass_d, M+1, -INFTY);
984 for (ip = L; ip >= 0; ip--) /* ip is the relative position in the seq */
985 {
986 i = i0+ip-1; /* e.g. i is actual index in dsq, runs from i0 to j0 */
987 for(k = 0; k <= M; k++)
988 {
989 if(! xset_m[k]) {
990 if((mass_m[k] = ILogsum(mass_m[k], pmx->mmx[ip][k])) > kthresh_m[k]) {
991 cp9b->pn_max_m[k] = i;
992 xset_m[k] = TRUE;
993 }
994 }
995 if(! xset_i[k]) {
996 if((mass_i[k] = ILogsum(mass_i[k], pmx->imx[ip][k])) > kthresh_i[k]) {
997 cp9b->pn_max_i[k] = i;
998 xset_i[k] = TRUE;
999 }
1000 }
1001 if(! xset_d[k]) {
1002 if((mass_d[k] = ILogsum(mass_d[k], pmx->dmx[ip][k])) > kthresh_d[k]) {
1003 cp9b->pn_max_d[k] = i;
1004 xset_d[k] = TRUE;
1005 }
1006 }
1007 }
1008 }
1009
1010 if(do_old_hmm2ij) { /* we have to correct for an off-by-one to be consistent with the 'old' way code */
1011 for(k = 1; k <= M; k++)
1012 {
1013 /* comment *: off-by-one issue with non-emitters (includes all D states and M_0):
1014 * pn_min_d[k] = i, means posn i was last residue emitted
1015 * prior to entering node k's delete state. However, for a CM,
1016 * if a delete states sub-parsetree is bounded by i' and j', then
1017 * positions i' and j' HAVE YET TO BE EMITTED.
1018 * For M_0, so we don't have to check each node to see if k == 0, we
1019 * do the off-by-one correction at the end of the function.
1020 */
1021 if(cp9b->pn_min_d[k] != -1) cp9b->pn_min_d[k]++;
1022 if(cp9b->pn_min_d[k] != -1) cp9b->pn_max_d[k]++;
1023 }
1024 cp9b->pn_min_m[0]++; /* non-emitter */
1025 cp9b->pn_max_m[0]++; /* non-emitter */
1026 }
1027
1028 #if eslDEBUGLEVEL >= 1
1029 /* all states should have their min/max set because we've normalized the probability
1030 * of entering each state to 1.0, so we assert this to be true */
1031 ESL_DASSERT1((nset_m[0]));
1032 ESL_DASSERT1((nset_i[0]));
1033 ESL_DASSERT1((xset_m[0]));
1034 ESL_DASSERT1((xset_i[0]));
1035 /* D_0 state does not exist */
1036 for(k = 1; k <= M; k++)
1037 {
1038 ESL_DASSERT1((nset_m[k]));
1039 ESL_DASSERT1((nset_i[k]));
1040 ESL_DASSERT1((nset_d[k]));
1041 ESL_DASSERT1((xset_m[k]));
1042 ESL_DASSERT1((xset_i[k]));
1043 ESL_DASSERT1((xset_d[k]));
1044 }
1045 #endif
1046
1047 cp9b->pn_min_d[0] = -1; /* D_0 doesn't exist */
1048 cp9b->pn_max_d[0] = -1; /* D_0 doesn't exist */
1049
1050 if(debug_level > 0) cp9_DebugPrintHMMBands(stdout, j0, cp9b, (1.-p_thresh), 1);
1051
1052 free(mass_m);
1053 free(mass_i);
1054 free(mass_d);
1055 free(nset_m);
1056 free(nset_i);
1057 free(nset_d);
1058 free(xset_m);
1059 free(xset_i);
1060 free(xset_d);
1061 free(kthresh_m);
1062 free(kthresh_i);
1063 free(kthresh_d);
1064
1065 return eslOK;
1066
1067 ERROR:
1068 ESL_FAIL(status, errbuf, "Memory allocation error.\n");
1069 }
1070
1071 /* Function: cp9_Posterior()
1072 * based on Ian Holmes' hmmer/src/postprob.c::P7EmitterPosterior()
1073 *
1074 * Purpose: Combines Forward and Backward matrices into a posterior
1075 * probability matrix. For emitters (match and inserts) the
1076 * entries in row i of this matrix are the logs of the posterior
1077 * probabilities of each state emitting symbol i of the sequence.
1078 * For non-emitters the entries in row i of this matrix are the
1079 * logs of the posterior probabilities of each state being 'visited'
1080 * when the last emitted residue in the parse was symbol i of the
1081 * sequence.
1082 * The last point distinguishes this function from P7EmitterPosterior()
1083 * which set all posterior values for for non-emitting states to -INFTY.
1084 * The caller must allocate space for the matrix, although the
1085 * backward matrix can be used instead (overwriting it will not
1086 * compromise the algorithm).
1087 *
1088 * if(did_fwd_scan == TRUE) forward was run in scan mode, which allowed
1089 * parses to start at any position of sequence, this changes how
1090 * we calculate summed prob of all parses (calculation of 'sc', see code).
1091 *
1092 * Args: dsq - sequence in digitized form
1093 * i0 - start of target subsequence (often 1, beginning of dsq)
1094 * j0 - end of target subsequence (often L, end of dsq)
1095 * hmm - the model
1096 * forward - pre-calculated forward matrix
1097 * backward - pre-calculated backward matrix
1098 * mx - pre-allocated dynamic programming matrix
1099 * did_fwd_scan - TRUE if Forward was run in 'scan' mode, which means
1100 * parses can start at any position of the sequence
1101 *
1102 * Return: void
1103 */
1104 void
cp9_Posterior(ESL_DSQ * dsq,int i0,int j0,CP9_t * hmm,CP9_MX * fmx,CP9_MX * bmx,CP9_MX * mx,int did_fwd_scan)1105 cp9_Posterior(ESL_DSQ *dsq, int i0, int j0, CP9_t *hmm, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *mx, int did_fwd_scan)
1106 {
1107 if(dsq == NULL) cm_Fail("in cp9_posterior(), dsq is NULL.");
1108
1109 int i;
1110 int k;
1111 int sc;
1112 int L; /* subsequence length */
1113 int ip; /* i': relative position in the subsequence */
1114 /*float temp_sc;*/
1115
1116 L = j0-i0+1; /* the length of the subsequence */
1117
1118 if(did_fwd_scan) { /* parses could start/stop anywhere */
1119 sc = -INFTY;
1120 for (ip = 0; ip <= L; ip++) {
1121 /*printf("bmx->mmx[i:%d][0]: %d\n", i, bmx->mmx[ip][0]);*/
1122 sc = ILogsum(sc, (bmx->mmx[ip][0]));
1123 }
1124 } /* parses must start/stop at (i = i0)/(j = j0) */
1125 else sc = bmx->mmx[0][0];
1126
1127 /* note boundary conditions, case by case by case... */
1128 mx->mmx[0][0] = fmx->mmx[0][0] + bmx->mmx[0][0] - sc; /* fmx->mmx[0][0] is 0, bmx->mmx[1][0] is overall score */
1129 mx->imx[0][0] = -INFTY; /*need seq to get here*/
1130 mx->dmx[0][0] = -INFTY; /*D_0 does not exist*/
1131 for (k = 1; k <= hmm->M; k++) {
1132 mx->mmx[0][k] = -INFTY; /*need seq to get here*/
1133 mx->imx[0][k] = -INFTY; /*need seq to get here*/
1134 mx->dmx[0][k] = fmx->dmx[0][k] + bmx->dmx[0][k] - sc;
1135 }
1136
1137 for (ip = 1; ip <= L; ip++) /* ip is the relative position in the seq */
1138 {
1139 i = i0+ip-1; /* e.g. i is actual index in dsq, runs from i0 to j0 */
1140 mx->mmx[ip][0] = -INFTY; /*M_0 does not emit*/
1141 mx->imx[ip][0] = fmx->imx[ip][0] + bmx->imx[ip][0] - hmm->isc[dsq[i]][0] - sc;
1142 /*hmm->isc[dsq[i]][0] will have been counted in both fmx->imx and bmx->imx*/
1143 mx->dmx[ip][0] = -INFTY; /*D_0 does not exist*/
1144
1145 /*printf("fmx->mmx[ip:%d][0]: %d\n bmx->mmx[ip:%d][0]: %d\n", ip, fmx->mmx[ip][0], ip, bmx->mmx[ip][0]);
1146 printf("fmx->imx[ip:%d][0]: %d\n bmx->imx[ip:%d][0]: %d\n", ip, fmx->imx[ip][0], ip, bmx->imx[ip][0]);
1147 printf("fmx->dmx[ip:%d][0]: %d\n bmx->dmx[ip:%d][0]: %d\n", ip, fmx->dmx[ip][0], ip, bmx->dmx[ip][0]);*/
1148 for (k = 1; k <= hmm->M; k++)
1149 {
1150 mx->mmx[ip][k] = ESL_MAX(fmx->mmx[ip][k] + bmx->mmx[ip][k] - hmm->msc[dsq[i]][k] - sc, -INFTY);
1151 /*hmm->msc[dsq[i]][k] will have been counted in both fmx->mmx and bmx->mmx*/
1152 mx->imx[ip][k] = ESL_MAX(fmx->imx[ip][k] + bmx->imx[ip][k] - hmm->isc[dsq[i]][k] - sc, -INFTY);
1153 /*hmm->isc[dsq[i]][k] will have been counted in both fmx->imx and bmx->imx*/
1154 mx->dmx[ip][k] = ESL_MAX(fmx->dmx[ip][k] + bmx->dmx[ip][k] - sc, -INFTY);
1155 /*printf("fmx->mmx[ip:%d][%d]: %d\n bmx->mmx[ip:%d][%d]: %d\n", ip, k, fmx->mmx[ip][k], ip, k, bmx->mmx[ip][k]);
1156 printf("fmx->imx[ip:%d][%d]: %d\n bmx->imx[ip:%d][%d]: %d\n", ip, k, fmx->imx[ip][k], ip, k, bmx->imx[ip][k]);
1157 printf("fmx->dmx[ip:%d][%d]: %d\n bmx->dmx[ip:%d][%d]: %d\n\n", ip, k, fmx->dmx[ip][k], ip, k, bmx->dmx[ip][k]);*/
1158 }
1159 }
1160
1161 /*
1162 float temp_sc;
1163 for(i = 0; i <= L; i++)
1164 {
1165 for(k = 0; k <= hmm->M; k++)
1166 {
1167 temp_sc = Score2Prob(mx->mmx[i][k], 1.);
1168 if(temp_sc > .0001)
1169 printf("mx->mmx[%3d][%3d]: %9d | %8f\n", i, k, mx->mmx[i][k], temp_sc);
1170 temp_sc = Score2Prob(mx->imx[i][k], 1.);
1171 if(temp_sc > .0001)
1172 printf("mx->imx[%3d][%3d]: %9d | %8f\n", i, k, mx->imx[i][k], temp_sc);
1173 temp_sc = Score2Prob(mx->dmx[i][k], 1.);
1174 if(temp_sc > .0001)
1175 printf("mx->dmx[%3d][%3d]: %9d | %8f\n", i, k, mx->dmx[i][k], temp_sc);
1176 }
1177 }*/
1178 }
1179
1180 /*****************************************************************************
1181 * EPN 03.23.06
1182 * Function: cp9_IFillPostSums()
1183 * based on: ifill_post_sums_del() (deprecated) 11.23.05
1184 *
1185 * Purpose: Given a posterior matrix post, where post->mmx[i][k]
1186 * is the log odds score of the probability that
1187 * match state k emitted position i of the sequence,
1188 * sum the log probabilities that each state emitted
1189 * each position. Do this for inserts, matches, and
1190 * and deletes.
1191 *
1192 * arguments:
1193 * cp9_dpmatrix_s *post dpmatrix_s posterior matrix, xmx, mmx, imx, dmx
1194 * 2D int arrays. [0.1..N][0.1..M]
1195 * CP9Bands_t *cp9b - the cp9 bands data structure
1196 * int i0 start of target subsequence (often 1, beginning of dsq)
1197 * int j0 end of target subsequence (often L, end of dsq)
1198 *****************************************************************************/
1199 void
cp9_IFillPostSums(CP9_MX * post,CP9Bands_t * cp9b,int i0,int j0)1200 cp9_IFillPostSums(CP9_MX *post, CP9Bands_t *cp9b, int i0, int j0)
1201 {
1202 int i; /* counter over positions of the sequence */
1203 int k; /* counter over nodes of the model */
1204 int L; /* subsequence length */
1205 int M; /* consensus length of cp9 */
1206 M = cp9b->hmm_M;
1207 L = j0-i0+1; /* the length of the subsequence */
1208
1209 /* step through each node, fill the post sum structures */
1210 for(k = 0; k <= M; k++)
1211 {
1212 cp9b->isum_pn_m[k] = -INFTY;
1213 cp9b->isum_pn_i[k] = -INFTY;
1214 cp9b->isum_pn_d[k] = -INFTY;
1215 for(i = 0; i <= L; i++) {
1216 cp9b->isum_pn_m[k] = ILogsum(cp9b->isum_pn_m[k], post->mmx[i][k]);
1217 cp9b->isum_pn_i[k] = ILogsum(cp9b->isum_pn_i[k], post->imx[i][k]);
1218 cp9b->isum_pn_d[k] = ILogsum(cp9b->isum_pn_d[k], post->dmx[i][k]);
1219 }
1220 }
1221 }
1222
1223 /* Function: cp9_ValidateBands()
1224 * Incept: EPN, Wed Nov 14 15:49:08 2007
1225 * Purpose: Validate the info in CP9Bands_t data structure is internally
1226 * consistent.
1227 *
1228 * Args: cm the cm
1229 * errbuf char buffer for error message
1230 * cp9b the CP9 bands object
1231 * i0 first residue we can possibly allow as valid j
1232 * j0 final residue we can possibly allow as valid j
1233 *
1234 * Returns: eslOK, or, if error, other status code and filled errbuf
1235 */
1236 int
cp9_ValidateBands(CM_t * cm,char * errbuf,CP9Bands_t * cp9b,int i0,int j0,int do_trunc)1237 cp9_ValidateBands(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int i0, int j0, int do_trunc)
1238 {
1239 int v; /* counter over states of the CM */
1240 int jp; /* counter over valid j's, but offset. jp+jmin[v] = actual j */
1241 int sd; /* minimum d allowed for a state, ex: MP_st = 2, ML_st = 1. etc. */
1242 int max_sdl_sdr; /* maximum of StateLeftDelta, StateRightDelta for a state */
1243 int dn; /* max_sdl_sdr if do_trunc, else sd */
1244 int hd_needed;
1245 int j;
1246
1247
1248 if(cm->M != cp9b->cm_M) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cm->M != cp9b->cm_M\n");
1249 if(cm->clen != cp9b->hmm_M) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cm->clen != cp9b->hmm_M\n");
1250
1251 hd_needed = 0;
1252 for(v = 0; v < cp9b->cm_M; v++) {
1253 hd_needed += cp9b->jmax[v] - cp9b->jmin[v] + 1;
1254 }
1255 if(hd_needed != cp9b->hd_needed) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hd_needed inconsistent.");
1256
1257 for(v = 0; v < cm->M; v++) {
1258 assert((cp9b->imin[v] == -1 && cp9b->imax[v] == -2) || (cp9b->imin[v] >= 0 && cp9b->imax[v] >= 0));
1259 assert((cp9b->jmin[v] == -1 && cp9b->jmax[v] == -2) || (cp9b->jmin[v] >= 0 && cp9b->jmax[v] >= 0));
1260 }
1261
1262 for(v = 0; v < cm->M; v++) {
1263 sd = StateDelta(cm->sttype[v]);
1264 max_sdl_sdr = ESL_MAX(StateLeftDelta(cm->sttype[v]), StateRightDelta(cm->sttype[v]));
1265 dn = do_trunc ? max_sdl_sdr : sd;
1266 /* if (do_trunc) d can be 1 for MP states, this is why we use dn
1267 * here. Note: d can't be 0 for ML/IL in R mode, MR/IR in L
1268 * mode even though you might think it could be. We'll always do
1269 * a truncated begin with d=1 for L,R marginal alignments. */
1270 if(cm->sttype[v] == E_st) {
1271 for(jp = 0; jp <= (cp9b->jmax[v]-cp9b->jmin[v]); jp++) {
1272 if(cp9b->hdmin[v][jp] != 0) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin for E state is inconsistent.");
1273 if(cp9b->hdmax[v][jp] != 0) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin for E state is inconsistent.");
1274 }
1275 }
1276 else {
1277 if(cp9b->jmin[v] != -1) {
1278 for(jp = 0; jp <= (cp9b->jmax[v]-cp9b->jmin[v]); jp++) {
1279 j = jp+cp9b->jmin[v];
1280 if(cp9b->hdmin[v][jp] == -1) {
1281 if(cp9b->hdmax[v][jp] != -2) { ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin is -1 for state %d, j: %d, but hdmax is not -2 (it's %d).", v, j, cp9b->hdmax[v][jp]); }
1282 }
1283 else {
1284 if(cp9b->hdmin[v][jp] != ESL_MAX((j - cp9b->imax[v] + 1), dn)) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmin %d (dn: %d) for state %d, j: %d imax[v]: %d is inconsistent.", cp9b->hdmin[v][jp], dn, v, j, cp9b->imax[v]);
1285 if(cp9b->hdmax[v][jp] != ESL_MAX((j - cp9b->imin[v] + 1), dn)) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), cp9b->hdmax %d (dn: %d) for state %d, j: %d imin[v]: %d is inconsistent.", cp9b->hdmax[v][jp], dn, v, j, cp9b->imin[v]);
1286 }
1287 }
1288 }
1289 }
1290 /* get rid of StateIsDetached once old band construction method is deprecated */
1291 if(cp9b->imin[v] == -1 && !StateIsDetached(cm, v)) { /* ensure all unreachable states have 0 width bands */
1292 if(cp9b->imax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] == -1, but imax[v] != -2 but rather %d\n", v, cp9b->imax[v]);
1293 if(cp9b->jmin[v] != -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] == -1, but jmin[v] != -1 but rather %d\n", v, cp9b->jmin[v]);
1294 if(cp9b->jmax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] == -1, but jmax[v] != -2 but rather %d\n", v, cp9b->jmax[v]);
1295 }
1296 else if(!StateIsDetached(cm, v)){
1297 if(cp9b->imax[v] == -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] != -1, but imax[v] == -2!\n", v);
1298 if(cp9b->jmin[v] == -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] != -1, but jmin[v] == -1!\n", v);
1299 if(cp9b->jmax[v] == -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d imin[v] != -1, but jmax[v] == -2!\n", v);
1300 }
1301
1302 if(i0 == j0 && cm->sttype[v] == MP_st) { /* special case, MPs are impossible in this case */
1303 if(cp9b->imin[v] != -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but imin[v]: %d != -1\n", i0, v, cp9b->imin[v]);
1304 if(cp9b->imax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but imax[v]: %d != -2\n", i0, v, cp9b->imax[v]);
1305 if(cp9b->jmin[v] != -1) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but jmin[v]: %d != -1\n", i0, v, cp9b->jmin[v]);
1306 if(cp9b->jmax[v] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), exceedingly rare case, i0==j0==%d v: %d is MP but jmax[v]: %d != -2\n", i0, v, cp9b->jmax[v]);
1307 }
1308 else {
1309 if(cp9b->jmin[v] != -1) {
1310 for(j = cp9b->jmin[v]; j <= cp9b->jmax[v]; j++) {
1311 if(j < (i0-1)) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), j: %d outside i0-1:%d..j0:%d is within v's j band: jmin[%d]: %d jmax[%d]: %d\n", j, i0-1, j0, v, cp9b->jmin[v], v, cp9b->jmax[v]);
1312 if(j > j0) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), j: %d outside i0-1:%d..j0:%d is within v's j band: jmin[%d]: %d jmax[%d]: %d\n", j, i0-1, j0, v, cp9b->jmin[v], v, cp9b->jmax[v]);
1313 if(cp9b->hdmin[v][(j-cp9b->jmin[v])] == -1) {
1314 if(cp9b->hdmax[v][(j-cp9b->jmin[v])] != -2) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d j: %d hdmin[v][jp_v:%d] == -1, but hdmax[v][jp_v:%d] != -2 (it's %d)\n", v, j, (j-cp9b->jmin[v]), (j-cp9b->jmin[v]), cp9b->hdmax[v][(j-cp9b->jmin[v])]);
1315 }
1316 else {
1317 if(cp9b->hdmin[v][(j-cp9b->jmin[v])] < dn) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d j: %d hdmin[v][jp_v:%d] : %d less than StateDelta for v: %d\n", v, j, (j-cp9b->jmin[v]), cp9b->hdmin[v][(j-cp9b->jmin[v])], dn);
1318 if(cp9b->hdmax[v][(j-cp9b->jmin[v])] < dn) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), v: %d j: %d hdmax[v][jp_v:%d] : %d less than StateDelta for v: %d\n", v, j, (j-cp9b->jmin[v]), cp9b->hdmax[v][(j-cp9b->jmin[v])], dn);
1319 }
1320 }
1321 if(cp9b->jmax[v] > cp9b->jmax[0]) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), jmax[v:%d]:%d > jmax[0]:%d.", v, cp9b->jmax[v], cp9b->jmax[0]);
1322 if(cp9b->imin[v] < cp9b->imin[0]) ESL_FAIL(eslEINVAL, errbuf, "cp9_ValidateBands(), imin[v:%d]:%d < imin[0]:%d, i0:%d j0:%d jmin[v]:%d jmax[v]:%d jmin[0]:%d jmax[0]:%d imax[v]:%d", v, cp9b->imin[v], cp9b->imin[0], i0, j0, cp9b->jmin[v], cp9b->jmax[v], cp9b->jmin[0], cp9b->jmax[0], cp9b->imax[v]);
1323 }
1324 }
1325 }
1326 return eslOK;
1327 }
1328
1329 /*
1330 * Function: cp9_GrowHDBands()
1331 *
1332 * Incept: EPN, Thu Oct 25 13:24:29 2007
1333 * Purpose: Rearrange CP9 hdmin and hdmax pointers for a new sequence
1334 * based on j bands (jmin and jmax). If the currently allocated
1335 * size for hdmin, hdmax is not big enough, reallocate them.
1336 *
1337 * Args:
1338 * CP9Bands_t cp9b the CP9 Bands object.
1339 * errbuf char buffer for error messages
1340 *
1341 * Returns: eslOK on success, eslEMEM if memory allocation error
1342 */
1343 int
cp9_GrowHDBands(CP9Bands_t * cp9b,char * errbuf)1344 cp9_GrowHDBands(CP9Bands_t *cp9b, char *errbuf)
1345 {
1346 int status;
1347 int v;
1348 int cur_size = 0;
1349 int jbw;
1350
1351 /* count size we need for hdmin/hdmax given current jmin, jmax */
1352 cp9b->hd_needed = 0; /* we'll rewrite this */
1353 for(v = 0; v < cp9b->cm_M; v++) {
1354 cp9b->hd_needed += cp9b->jmax[v] - cp9b->jmin[v] + 1;
1355 /* printf("hd needed v: %4d bw: %4d total: %5d\n", v, cp9b->jmax[v] - cp9b->jmin[v] + 1, cp9b->hd_needed); */
1356 }
1357 if(cp9b->hd_alloced < cp9b->hd_needed) {
1358 void *tmp;
1359 if(cp9b->hdmin_mem == NULL) ESL_ALLOC(cp9b->hdmin_mem, sizeof(int) * cp9b->hd_needed);
1360 else ESL_RALLOC(cp9b->hdmin_mem, tmp, sizeof(int) * cp9b->hd_needed);
1361 if(cp9b->hdmax_mem == NULL) ESL_ALLOC(cp9b->hdmax_mem, sizeof(int) * cp9b->hd_needed);
1362 else ESL_RALLOC(cp9b->hdmax_mem, tmp, sizeof(int) * cp9b->hd_needed);
1363 }
1364
1365 /* set pointers */
1366 cur_size = 0;
1367 for(v = 0; v < cp9b->cm_M; v++) {
1368 cp9b->hdmin[v] = cp9b->hdmin_mem + cur_size;
1369 cp9b->hdmax[v] = cp9b->hdmax_mem + cur_size;
1370 jbw = cp9b->jmax[v] - cp9b->jmin[v] + 1;
1371 assert(jbw >= 0);
1372 ESL_DASSERT1((jbw >= 0));
1373 cur_size += jbw;
1374 }
1375 cp9b->hd_alloced = cur_size;
1376 ESL_DASSERT1((cp9b->hd_alloced == cp9b->hd_needed));
1377 return eslOK;
1378
1379 ERROR:
1380 ESL_FAIL(status, errbuf, "Memory allocation error.");
1381 }
1382
1383
1384 /*****************************************************************************
1385 * EPN 11.03.05
1386 * Function: ij2d_bands()
1387 *
1388 * Purpose: Determine the band for each cm state v on d (the band on the
1389 * length of the subsequence emitted from the subtree rooted
1390 * at state v). These are easily calculated given the bands on i
1391 * and j.
1392 *
1393 * arguments:
1394 *
1395 * CM_t *cm the CM
1396 * int W length of sequence we're aligning
1397 * int *imin imin[v] = first position in band on i for state v
1398 * int *imax imax[v] = last position in band on i for state v
1399 * int *jmin jmin[v] = first position in band on j for state v
1400 * int *jmax jmax[v] = last position in band on j for state v
1401 * int **hdmin hdmin[v][jp] = first position in band on d for state v
1402 * and j position: j = jp+jmin[v].
1403 * Filled in this function.
1404 * int **hdmax hdmax[v][jp] = last position in band on d for state v
1405 * and j position: j = jp+jmin[v].
1406 * Filled in this function.
1407 * int do_trunc TRUE if we'll use these bands in a truncated version of CYK/Inside/Outside
1408 * int debug_level [0..3] tells the function what level of debugging print
1409 * statements to print.
1410 *****************************************************************************/
1411 void
ij2d_bands(CM_t * cm,int W,int * imin,int * imax,int * jmin,int * jmax,int ** hdmin,int ** hdmax,int do_trunc,int debug_level)1412 ij2d_bands(CM_t *cm, int W, int *imin, int *imax, int *jmin, int *jmax,
1413 int **hdmin, int **hdmax, int do_trunc, int debug_level)
1414 {
1415 int v; /* counter over states of the CM */
1416 int jp; /* counter over valid j's, but offset. jp+jmin[v] = actual j */
1417 int j; /* actual j */
1418 int sd; /* minimum d allowed for a state, ex: MP_st = 2, ML_st = 1. etc. */
1419 int max_sdl_sdr; /* maximum of StateLeftDelta, StateRightDelta for a state */
1420 int dn; /* max_sdl_sdr if do_trunc, else sd */
1421 int hdn, hdx; /* temporary hdmin/hdmax */
1422 for(v = 0; v < cm->M; v++) {
1423 if(cm->sttype[v] == E_st) {
1424 for(jp = 0; jp <= (jmax[v]-jmin[v]); jp++) {
1425 hdmin[v][jp] = 0;
1426 hdmax[v][jp] = 0;
1427 }
1428 }
1429 else {
1430 sd = StateDelta(cm->sttype[v]);
1431 max_sdl_sdr = ESL_MAX(StateLeftDelta(cm->sttype[v]), StateRightDelta(cm->sttype[v]));
1432 dn = do_trunc ? max_sdl_sdr : sd;
1433 /* if (do_trunc) d can be 1 for MP states, this is why we use dn
1434 * here. Note: d can't be 0 for ML/IL in R mode, MR/IR in L
1435 * mode even though you might think it could be. We'll always do
1436 * a truncated begin with d=1 for L,R marginal alignments. */
1437
1438 for(jp = 0; jp <= (jmax[v]-jmin[v]); jp++) {
1439 j = jp+jmin[v];
1440 hdn = j-imax[v]+1;
1441 hdx = j-imin[v]+1;
1442 if(hdx < dn) {
1443 hdmin[v][jp] = -1;
1444 hdmax[v][jp] = -2;
1445 }
1446 else {
1447 hdmin[v][jp] = ESL_MAX(hdn, dn);
1448 hdmax[v][jp] = hdx;
1449 }
1450 /* printf("hd[%d][j=%d]: min: %d | max: %d\n", v, (jp+jmin[v]), hdmin[v][jp], hdmax[v][jp]); */
1451 }
1452 }
1453 }
1454 }
1455
1456 /* Function: cp9_HMM2ijBands()
1457 * Synopsis: Derive bands on i and j for all CM states given HMM bands.
1458 * Incept: EPN, Thu Feb 7 12:05:01 2008
1459 *
1460 * Purpose: Given HMM bands, determine the corresponding bands on the
1461 * CM. Both for i: the left border of the subsequence emitted
1462 * from the subtree rooted at v, the band is imin[v]..imax[v]
1463 * inclusive. And also for j: the right border of the subseq
1464 * emitted from the subtree rooted at v, the band is
1465 * jmin[v]..jmax[v] inclusive.
1466 *
1467 * This is done by first enforcing that the HMM bands allow
1468 * at least 1 possible HMM parse. A valid parse given the
1469 * HMM bands is not guaranteed, although it's nearly always
1470 * likely even for relatively high values of tau (the
1471 * probability mass allowed outside the band for each state,
1472 * relatively high is 0.01). With very tight bands, for
1473 * example from a tau of 0.49, the chance that all parses
1474 * are impossible given the bands is much more likely (especially
1475 * with non-homologous sequences). *If* the HMM bands exclude
1476 * all possible HMM parses, they are expanded in a greedy,
1477 * stupid way to allow at least 1 parse (we could be smarter,
1478 * but this case only arises for impractical tau values, in
1479 * fact I only implemented it to verify the rest of the HMM
1480 * banding implementation is robust, and will always work
1481 * for tau values up to 0.5).
1482 *
1483 * Once we know an HMM parse is possible given the HMM bands,
1484 * we also know if we impose those exact bands on the CM
1485 * we will also have a valid CM parse, b/c there is a 1:1
1486 * mapping between HMM parses and CM parsetrees. So, we
1487 * impose the HMM bands onto the CM to get the i and j
1488 * bands using a stack and mapping 'explicit' bands,
1489 * the i or j bands of CM states that map to an HMM
1490 * state (for example the i band of MATL_ML states,
1491 * or the j bands of MATR_MR states). The other bands
1492 * that are not explicitly set (ex: the j band of a
1493 * MATL_ML state and the i band of a MATR_MR state), are
1494 * implicitly set based on the explicit ones.
1495 *
1496 * Note: This code is ugly, even more than usual for me.
1497 * There's a plethora of special cases, which are maddening
1498 * during development/debugging. The code starts out simple
1499 * and balloons as you add code to handle the special cases.
1500 * [EPN, Thu Feb 7 12:17:53 2008].
1501 *
1502 * Args: <cm> - the model
1503 * <errbuf> - for returning error messages
1504 * <cp9> - the CP9 HMM used to determine the bands
1505 * <cp9b> - the bands data structure
1506 * <cp9map> - map between the CM and HMM
1507 * <i0> - first position in the sequence we're considering
1508 * <j0> - final position in the sequence we're considering
1509 * <doing_search> - TRUE if we're searching the target sequence, not aligning it,
1510 * relevant b/c iff we're aligning the parsetree *must* span i0..j0
1511 * <do_trunc> - TRUE if we're going to use these bands for truncated CYK/Inside/Outside
1512 * <debug_level> - verbosity level for debuggint printf() statements
1513 *
1514 * Returns: <eslOK> on success.
1515 *
1516 * Throws: <eslEINCOMPAT> on contract violation
1517 * <eslEMEM> on memory error
1518 */
1519 int
cp9_HMM2ijBands(CM_t * cm,char * errbuf,CP9_t * cp9,CP9Bands_t * cp9b,CP9Map_t * cp9map,int i0,int j0,int doing_search,int do_trunc,int debug_level)1520 cp9_HMM2ijBands(CM_t *cm, char *errbuf, CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int do_trunc, int debug_level)
1521 {
1522
1523 int status;
1524 int v;
1525
1526 /* ptrs to cp9b data, for convenience */
1527 int *imin; /* imin[v] = first position in band on i for state v to be filled in this function. [1..M] */
1528 int *imax; /* imax[v] = last position in band on i for state v to be filled in this function. [1..M] */
1529 int *jmin; /* jmin[v] = first position in band on j for state v to be filled in this function. [1..M] */
1530 int *jmax; /* jmax[v] = last position in band on j for state v to be filled in this function. [1..M] */
1531
1532 int nd; /* counter over CM nodes. */
1533 int y; /* counters over children states */
1534 int hmm_M; /* number of nodes in the HMM */
1535 ESL_STACK *nd_pda; /* used to traverse the CM from left to right in consensus positions, cpos = 0..clen */
1536 ESL_STACK *lpos_pda; /* used to store lpos for BIF nodes */
1537 int on_right; /* TRUE if we're on the right for current node during our CM traversal */
1538 int w; /* a state index */
1539 int lpos, rpos; /* left/right border of subtree for current node */
1540 /*int k;*/ /* counter of HMM nodes, for debugging print statements, currently not used */
1541 int hmm_is_localized; /* TRUE if HMM has local begins, ends or ELs on */
1542 int cm_is_fully_localized; /* TRUE if CM has local begins and ends on */
1543
1544 /* r_* arrays, these are filled in HMMBandsEnforceValidParse(), they are the band on 'reachable'
1545 * residues for each HMM state as we move from left to right through the HMM.
1546 * For example, r_mn[k] = 3, r_mx[k] = 5, means that for all possible HMM parses within the bands
1547 * in the cp9b pn_* arrays that reach the match state of node k, the residue emitted by that match
1548 * must be either 3, 4, or 5.
1549 */
1550 int *r_mn; /* [0..k..hmm_M] minimal residue position for which we can reach M_k (match state of node k) */
1551 int *r_mx; /* [0..k..hmm_M] maximal residue position for which we can reach M_k */
1552 int *r_in; /* [0..k..hmm_M] minimal residue position for which we can reach I_k (insert state of node k) */
1553 int *r_ix; /* [0..k..hmm_M] maximal residue position for which we can reach I_k */
1554 int *r_dn; /* [0..k..hmm_M] minimal residue position for which we can reach D_k (delete state of node k) */
1555 int *r_dx; /* [0..k..hmm_M] maximal residue position for which we can reach D_k */
1556 int *r_nn_i; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1557 int *r_nx_i; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1558 int *r_nn_j; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1559 int *r_nx_j; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
1560 /* r_nn_i and r_nx_i are used when setting i bands, and r_nn_j and r_nx_j are used when setting j bands .
1561 * the values can differ vecause of an off-by-one issue with the non-emitting (delete and M_0) states of the HMM:
1562 * pn_min_d[k] = i, means posn i was last residue emitted prior to entering node k's delete state. However, for a CM,
1563 * if a delete states sub-parsetree is bounded by i' and j', this means positions i' and j' HAVE YET TO BE EMITTED.
1564 * For i states this means we have to add 1 to the delete band positions, but for j states we do not, the off-by-one
1565 * is taken care of because the HMM is moving left to right, while j positions move right to left (confusing as hell,
1566 * bad explanation, i know... write out an example, its the only way to get it).
1567 */
1568
1569 /* Contract checks */
1570 if (cp9b == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands(), cp9b is NULL.\n");
1571 if(i0 < 1) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands(), i0 < 1: %d\n", i0);
1572 if(j0 < 1) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands(), j0 < 1: %d\n", j0);
1573 if(j0 < i0) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands(), i0 (%d) < j0 (%d)\n", i0, j0);
1574 hmm_is_localized = ((cp9->flags & CPLAN9_LOCAL_BEGIN) || (cp9->flags & CPLAN9_LOCAL_END) || (cp9->flags & CPLAN9_EL)) ? TRUE : FALSE;
1575 cm_is_fully_localized = ((cm->flags & CMH_LOCAL_BEGIN) && (cm->flags & CMH_LOCAL_END)) ? TRUE : FALSE;
1576
1577 /* ptrs to cp9b arrays, for convenience */
1578 imin = cp9b->imin;
1579 imax = cp9b->imax;
1580 jmin = cp9b->jmin;
1581 jmax = cp9b->jmax;
1582 hmm_M = cp9b->hmm_M;
1583 /* Initialize all bands to -1 */
1584 esl_vec_ISet(imin, cm->M, -1);
1585 esl_vec_ISet(imax, cm->M, -2);
1586 esl_vec_ISet(jmin, cm->M, -1);
1587 esl_vec_ISet(jmax, cm->M, -2);
1588
1589 /* Step 1: Check for valid HMM parse within the HMM bands, if there isn't one messily expand the bands so that there is one */
1590 if((status = HMMBandsEnforceValidParse(cp9, cp9b, cp9map, errbuf, i0, j0, doing_search, NULL,
1591 &r_mn, &r_mx, &r_in, &r_ix, &r_dn, &r_dx, &r_nn_i, &r_nx_i, &r_nn_j, &r_nx_j)) != eslOK) return status;
1592
1593 /* debugging printf block */
1594 /*
1595 for(k = 0; k <= cp9b->hmm_M;k ++) {
1596 printf("k: %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d %4d\n", k, r_mn[k], r_mx[k], r_in[k], r_ix[k], r_dn[k], r_dx[k], r_nn_i[k], r_nx_i[k], r_nn_j[k], r_nx_j[k]);
1597 }
1598 cp9_DebugPrintHMMBands(stdout, j0, cp9b, cm->tau, 1);
1599 */
1600
1601 /* Step 2: Traverse the CM from left to right in consensus position coordinates. Fill in the
1602 * i and j bands (imin, imax, jmin, jmax) for all states as we go. The CM is traversed
1603 * using a stack, each node is visited twice (this is based on Sean's cleaner:
1604 * display.c::CreateEmitMap(). The first time a node <nd> is visited we're 'on the left'
1605 * and then we push it back to the stack, and visit it again 'on the right' later. We
1606 * are moving around the perimeter of the guide tree, stepping one position at a time
1607 * in the consensus sequence coordinates, from left to right. We mainly set bands
1608 * when we're 'on the right', with the exception of Left emitting states, which are
1609 * set when we're on the HMM. All emitting states and delete states v have either
1610 * i, or j or both bands that can be set 'explicitly' based on the HMM bands for
1611 * the HMM state that maps to v. For example we can set the i bands for MATL_ML
1612 * states, and the j bands for MATR_MR states. All other bands (and both i
1613 * and j bands for S states, B states, E states) are set 'implicitly based on the
1614 * explicit bands, and the r_* data structures we filled in HMMBandsEnforceValidParse().
1615 * The goal was to make this function as clean and simple as possible, and although
1616 * it doesn't look it, this is as good as I can get it. There are many special
1617 * cases that make an elegant implementation beyond me.
1618 */
1619 if(! doing_search) {
1620 assert(r_mn[0] == (i0-1));
1621 if(!hmm_is_localized) assert(r_mx[hmm_M] == j0 || r_ix[hmm_M] == j0 || r_dx[hmm_M] == j0);
1622 }
1623 nd = 0;
1624 lpos = 0;
1625 rpos = 0;
1626
1627 if ((nd_pda = esl_stack_ICreate()) == NULL) goto ERROR;
1628 if ((lpos_pda = esl_stack_ICreate()) == NULL) goto ERROR;
1629 if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR; /* 0 = left side. 1 would = right side. */
1630 if ((status = esl_stack_IPush(nd_pda, nd)) != eslOK) goto ERROR;
1631 while (esl_stack_IPop(nd_pda, &nd) != eslEOD)
1632 {
1633 esl_stack_IPop(nd_pda, &on_right);
1634 if (on_right) {
1635 switch(cm->ndtype[nd]) { /* this is a massive switch, we set i and j bands for almost all
1636 * states here when we're on the right (sole exceptions are i bands for
1637 * MATP_nd states (except MATP_IR), and MATL_nd states) */
1638
1639 case BIF_nd: /* special case, set i bands based on left child, j bands based on right child */
1640 v = cm->nodemap[nd];
1641 w = cm->cfirst[v]; /* BEGL_S */
1642 y = cm->cnum[v]; /* BEGR_S */
1643
1644 /* set v's i band based on left child w, and v's j band based on right child y */
1645 imin[v] = (imin[w] != -1) ? imin[w] : imin[y]; /* if imin[w] == imin[y] == -1, then imin[v] will be set as -1 */
1646 imax[v] = (imax[w] != -2) ? imax[w] : imax[y]; /* if imax[w] == imax[y] == -2, then imax[v] will be set as -2 */
1647 jmin[v] = (jmin[y] != -1) ? jmin[y] : jmin[w]; /* if jmin[y] == jmin[w] == -1, then jmin[v] will be set as -1 */
1648 jmax[v] = (jmax[y] != -2) ? jmax[y] : jmax[w]; /* if jmax[y] == jmax[w] == -2, then jmax[v] will be set as -2 */
1649
1650 if(! do_trunc) {
1651 /* check for possibility that either child is not reachable, will only possibly happen with local on */
1652 if(imin[w] == -1 || jmin[w] == -1 || imin[y] == -1 || jmin[y] == -1 ||
1653 imax[w] == -2 || imax[w] == -2 || jmax[y] == -2 || jmax[y] == -2) {
1654 /* either the left child, or right child is not reachable, make them both unreachable as well as the BIF state */
1655 imin[v] = imin[w] = imin[y] = jmin[v] = jmin[w] = jmin[y] = -1;
1656 imax[v] = imax[w] = imax[y] = jmax[v] = jmax[w] = jmax[y] = -2;
1657 /* also make the BEGR_IL unreachable */
1658 imin[y+1] = jmin[y+1] = -1;
1659 imax[y+1] = jmax[y+1] = -2;
1660 }
1661 }
1662 break;
1663
1664 case MATP_nd:
1665 lpos = cp9map->nd2lpos[nd];
1666 rpos = cp9map->nd2rpos[nd];
1667
1668 v = cm->nodemap[nd]; /* v is MATP_MP */
1669 jmin[v] = r_mn[rpos];
1670 jmax[v] = r_mx[rpos];
1671
1672 v++; /* v is MATP_ML */
1673 jmin[v] = r_dn[rpos];
1674 jmax[v] = r_dx[rpos];
1675
1676 v++; /* v is MATP_MR */
1677 jmin[v] = r_mn[rpos];
1678 jmax[v] = r_mx[rpos];
1679
1680 v++; /* v is MATP_D */
1681 jmin[v] = r_dn[rpos];
1682 jmax[v] = r_dx[rpos];
1683
1684 v++; /* v is MATP_IL */
1685 jmin[v] = r_nn_j[rpos-1];
1686 jmax[v] = r_nx_j[rpos-1];
1687
1688 v++; /* v is MATP_IR */
1689 jmin[v] = r_in[rpos-1];
1690 jmax[v] = r_ix[rpos-1];
1691 imin[v] = r_nn_i[lpos+1]; /* look at band on lpos *+1* b/c we enter MATP_IR AFTER the MATP_MP, MATP_MR, MATP_ML, or MATP_IL insert (if any) */
1692 imax[v] = r_nx_i[lpos+1]; /* look at band on lpos *+1* b/c we enter MATP_IR AFTER the MATP_MP, MATP_MR, MATP_ML, or MATP_IL insert (if any) */
1693 ESL_DASSERT1(((lpos+1) <= cm->clen)); /* note: we know lpos+1 <= cm->clen b/c we're in a MATP node, and the ccol the right half of the node maps to
1694 * must be to the right of the ccol the left half of the node maps to */
1695 if(imin[v] == 0) { cm_Fail("v: %d lpos: %d\n", v, lpos); }
1696 break; /* case MATP_nd */
1697
1698 case MATL_nd: /* i bands were set when we were on the left, non-right emitter, set implicit j bands */
1699 lpos = cp9map->nd2lpos[nd];
1700
1701 v = cm->nodemap[nd]; /* v is MATL_ML */
1702 jmin[v] = r_nn_j[rpos];
1703 jmax[v] = r_nx_j[rpos];
1704
1705 v++; /* v is MATL_D, the MATL_ML and MATL_IL concerns don't apply, D's don't emit */
1706 jmin[v] = r_nn_j[rpos];
1707 jmax[v] = r_nx_j[rpos];
1708
1709 v++; /* v is MATL_IL */
1710 jmin[v] = r_nn_j[rpos];
1711 jmax[v] = r_nx_j[rpos];
1712 break;
1713
1714 case MATR_nd: /* set j bands explicitly from HMM bands, i bands implicitly */
1715 rpos = cp9map->nd2rpos[nd];
1716 v = cm->nodemap[nd]; /* v is MATR_MR */
1717 jmin[v] = r_mn[rpos];
1718 jmax[v] = r_mx[rpos];
1719 imin[v] = r_nn_i[lpos];
1720 imax[v] = r_nx_i[lpos];
1721
1722 v++; /* v is MATR_D */
1723 jmin[v] = r_dn[rpos];
1724 jmax[v] = r_dx[rpos];
1725 imin[v] = r_nn_i[lpos];
1726 imax[v] = r_nx_i[lpos];
1727
1728 v++; /* v is MATR_IR */
1729 jmin[v] = r_in[rpos-1];
1730 jmax[v] = r_ix[rpos-1];
1731 imin[v] = r_nn_i[lpos];
1732 imax[v] = r_nx_i[lpos];
1733 break;
1734
1735 case BEGL_nd:
1736 case BEGR_nd: /* set i and j bands implicitly, except for BEGR_IL, whose i bands are set explicitly based on HMM */
1737 v = cm->nodemap[nd]; /* set i and j band for BEG{L,R}_S based on children */
1738 imin[v] = jmin[v] = INT_MAX;
1739 imax[v] = jmax[v] = INT_MIN;
1740 for(y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++) {
1741 /* if y is reachable, make sure we can get there from v */
1742 if(imin[y] != -1) {
1743 imin[v] = ESL_MIN(imin[v], imin[y]);
1744 imax[v] = ESL_MAX(imax[v], imax[y]);
1745 }
1746 if(jmin[y] != -1) {
1747 jmin[v] = ESL_MIN(jmin[v], jmin[y]);
1748 jmax[v] = ESL_MAX(jmax[v], jmax[y]);
1749 }
1750 }
1751 if(imin[v] == INT_MAX) {
1752 imin[v] = -1;
1753 imax[v] = -2;
1754 }
1755 if(jmin[v] == INT_MAX) {
1756 jmin[v] = -1;
1757 jmax[v] = -2;
1758 }
1759
1760 /* set BEGR_IL's i and j band */
1761 if(cm->ndtype[nd] == BEGR_nd) {
1762 v++;
1763 imin[v] = r_in[lpos-1]; /* BEGR_IL emits before lpos */
1764 imax[v] = r_ix[lpos-1];
1765 if(imin[v-1] != -1 && imin[v] != -1) { /* if BEGR_S and BEGR_IL is reachable */
1766 imin[v-1] = ESL_MIN(imin[v-1], imin[v]); /* expand BEGR_S so it can reach BEGR_IL */
1767 jmin[v] = (jmin[v-1] == -1) ? -1 : ESL_MAX(jmin[v-1], i0); /* can't get to a BEGR_IL without emitting at least i0 */
1768 jmax[v] = jmax[v-1];
1769 }
1770 else {
1771 imin[v] = jmin[v] = -1;
1772 imax[v] = jmax[v] = -2;
1773 }
1774 esl_stack_IPop(lpos_pda, &lpos); /* pop the remembered lpos from our sister BEGL_nd to use for parent BIF_nd and above */
1775 }
1776 else { /* BEGL_nd */
1777 if ((status = esl_stack_IPush(lpos_pda, lpos)) != eslOK) goto ERROR;
1778 lpos = rpos+1; /* next node we pop from stack will be our BEGR sister, on the right, switch lpos to rpos+1 */
1779 }
1780 break;
1781
1782 case END_nd:
1783 v = cm->nodemap[nd]; /* v is END_E */
1784 imin[v] = r_nn_i[lpos];
1785 imax[v] = (r_nx_i[lpos] == -2) ? r_nx_i[lpos] : ESL_MIN(r_nx_i[lpos]+1, j0+1); /* +1 is for StateDelta */
1786 if(r_in[lpos] != -1) { /* we could come from an IR above us (tricky case) */
1787 imin[v] = ESL_MIN(imin[v], ESL_MAX(r_in[lpos] - 1, i0));
1788 imax[v] = ESL_MAX(imax[v], ESL_MAX(r_ix[lpos] - 1, i0));
1789 }
1790 rpos = lpos;
1791 if(imin[v] != -1) {
1792 jmin[v] = imin[v]-1; /* E must emit d = 0 residues, so j ==i-1 */
1793 jmax[v] = imax[v]-1; /* E must emit d = 0 residues, so j ==i-1 */
1794 }
1795 break;
1796
1797 case ROOT_nd: /* ROOT is a special case, set i and j bands */
1798 /* lpos == 1 and rpos == hmm_M */
1799 assert(lpos == 1);
1800 assert(rpos == hmm_M);
1801 v = cm->nodemap[nd]; /* v is ROOT_S */
1802 imin[v] = r_nn_i[1];
1803 imax[v] = r_nx_i[1];
1804 jmin[v] = r_nn_j[hmm_M];
1805 jmax[v] = r_nx_j[hmm_M];
1806
1807 v++; /* v is ROOT_IL */
1808 imin[v] = r_in[0]; /* ROOT_IL maps to HMM insert state of HMM node 0 */
1809 imax[v] = r_ix[0]; /* ROOT_IL maps to HMM insert state of HMM node 0 */
1810 /* ROOT_IL's j bands will be same as ROOT_S's, after ensuring state delta of 1 is respected */
1811 jmin[v] = (r_nn_j[hmm_M] == -1) ? -1 : ESL_MAX(r_nn_j[hmm_M], i0); /* can't get to ROOT_IL without emitting at least i0 */
1812 jmax[v] = r_nx_j[hmm_M];
1813 if(r_in[hmm_M] != -1) {
1814 jmin[v] = ESL_MIN(jmin[v], r_in[hmm_M]);
1815 jmax[v] = ESL_MIN(jmax[v], r_ix[hmm_M]);
1816 }
1817
1818 v++; /* v is ROOT_IR */
1819 if(r_in[hmm_M] != -1) { /* if r_in[hmm_M] == -1, this state is unreachable */
1820 imin[v] = r_nn_i[1]; /* HMM state M_0 is silent */
1821 imax[v] = r_nx_i[1]; /* HMM state M_0 is silent */
1822 if(imin[v-1] != -1) {
1823 imin[v] = ESL_MIN(imin[v], imin[v-1]+1);
1824 imax[v] = ESL_MAX(imax[v], imax[v-1]+1);
1825 }
1826 jmin[v] = r_in[hmm_M]; /* ROOT_IR maps to HMM insert state of HMM node hmm_M */
1827 jmax[v] = r_ix[hmm_M]; /* ROOT_IR maps to HMM insert state of HMM node hmm_M */
1828 }
1829 break;
1830 } /* end of switch(cm->ndtype[nd]) */
1831 } /* end of if(on_right) */
1832
1833 else { /* on left */
1834 /* set i bands for MATP_nd, MATL_nd only */
1835 switch(cm->ndtype[nd]) {
1836 case MATP_nd:
1837 lpos = cp9map->nd2lpos[nd];
1838 v = cm->nodemap[nd]; /* v is MATP_MP */
1839 imin[v] = r_mn[lpos];
1840 imax[v] = r_mx[lpos];
1841 v++; /* v is MATP_ML */
1842 imin[v] = r_mn[lpos];
1843 imax[v] = r_mx[lpos];
1844 v++; /* v is MATP_MR */
1845 imin[v] = r_dn[lpos] == -1 ? -1 : r_dn[lpos]+1;
1846 imax[v] = r_dx[lpos] == -2 ? -2 : r_dx[lpos]+1;
1847 v++; /* v is MATP_D */
1848 imin[v] = r_dn[lpos] == -1 ? -1 : r_dn[lpos]+1;
1849 imax[v] = r_dx[lpos] == -2 ? -2 : r_dx[lpos]+1;
1850 v++; /* v is MATP_IL */
1851 imin[v] = r_in[lpos];
1852 imax[v] = r_ix[lpos];
1853 /* we deal with setting imin/imax for MATP_IR when we're on the right */
1854 break;
1855
1856 case MATL_nd:
1857 lpos = cp9map->nd2lpos[nd];
1858 v = cm->nodemap[nd]; /* v is MATL_ML */
1859 imin[v] = r_mn[lpos];
1860 imax[v] = r_mx[lpos];
1861 v++; /* v is MATL_D */
1862 imin[v] = r_dn[lpos] == -1 ? -1 : r_dn[lpos]+1;
1863 imax[v] = r_dx[lpos] == -2 ? -2 : r_dx[lpos]+1;
1864 v++; /* v is MATL_IL */
1865 imin[v] = r_in[lpos];
1866 imax[v] = r_ix[lpos];
1867 break;
1868 } /* end of switch(cm->ndtype[nd]) */
1869
1870 if(cm->ndtype[nd] == BIF_nd) {
1871 /* push the BIF back on for its right side */
1872 if ((status = esl_stack_IPush(nd_pda, 1)) != eslOK) goto ERROR;
1873 if ((status = esl_stack_IPush(nd_pda, nd)) != eslOK) goto ERROR;
1874 /* push node index for right child */
1875 if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR;
1876 if ((status = esl_stack_IPush(nd_pda, cm->ndidx[cm->cnum[cm->nodemap[nd]]])) != eslOK) goto ERROR;
1877 /* push node index for left child */
1878 if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR;
1879 if ((status = esl_stack_IPush(nd_pda, cm->ndidx[cm->cfirst[cm->nodemap[nd]]])) != eslOK) goto ERROR;
1880 }
1881 else {
1882 /* push the node back on for right side */
1883 if ((status = esl_stack_IPush(nd_pda, 1)) != eslOK) goto ERROR;
1884 if ((status = esl_stack_IPush(nd_pda, nd)) != eslOK) goto ERROR;
1885 /* push child node on */
1886 if (cm->ndtype[nd] != END_nd) {
1887 if ((status = esl_stack_IPush(nd_pda, 0)) != eslOK) goto ERROR;
1888 if ((status = esl_stack_IPush(nd_pda, nd+1)) != eslOK) goto ERROR;
1889 }
1890 }
1891 }
1892 }
1893
1894 /* If we're allowing truncated alignments, do a final pass through all states, expanding bands to allow for
1895 * L and/or R and/or T marginal alignments, as necessary,
1896 * cp9b->{L,R}marg_{i,j}{min,max} were defined in cp9_PredictStartAndEndPositions().
1897 */
1898 if(do_trunc) {
1899 for(v = 0; v < cm->M; v++) {
1900 if(cp9b->Lvalid[v] || cp9b->Tvalid[v]) { /* allow for left marginal alignment by expanding j band */
1901 jmin[v] = (jmin[v] == -1) ? cp9b->Lmarg_jmin : ESL_MIN(jmin[v], cp9b->Lmarg_jmin);
1902 jmax[v] = (jmax[v] == -2) ? cp9b->Lmarg_jmax : ESL_MAX(jmax[v], cp9b->Lmarg_jmax);
1903 }
1904 if(cp9b->Rvalid[v] || cp9b->Tvalid[v]) { /* allow for right marginal alignment by expanding i band */
1905 imin[v] = (imin[v] == -1) ? cp9b->Rmarg_imin : ESL_MIN(imin[v], cp9b->Rmarg_imin);
1906 imax[v] = (imax[v] == -2) ? cp9b->Rmarg_imax : ESL_MAX(imax[v], cp9b->Rmarg_imax);
1907 }
1908 }
1909 }
1910
1911 if(! doing_search) { /* if we're aligning the full seq must be aligned at the root state */
1912 imin[0] = i0; /* first residue must be in subtree of ROOT_S */
1913 if(imin[1] != -1) imin[1] = i0; /* first residue must be in subtree of ROOT_IL, if it is used */
1914 jmax[0] = j0; /* final residue must be in subtree of ROOT_S */
1915 if(jmin[1] != -1) jmax[1] = j0; /* final residue must be in subtree of ROOT_IL if it is used */
1916 if(jmin[2] != -1) jmax[2] = j0; /* final residue must be in subtree of ROOT_IR if it is used */
1917 }
1918
1919 /* Final pass through all states:
1920 * 1. if any band value implies a state is unreachable, make it so by setting imin[v]=jmin[v]=-1, imax[v]=jmax[v]=-2;
1921 * 2. set detached inserts unreachable.
1922 * 3. if(!do_trunc) for left emitters enforce jmin/jmax allow at least 1 residue to be emitted
1923 * 4. if(!do_trunc) for MP states, enforce jmin/jmax allow at least 2 residues to be emitted
1924 */
1925 for(v = 0; v < cm->M; v++) {
1926 ESL_DASSERT1(((imin[v] == -1 && imax[v] == -2) || (imin[v] >= 0 && imax[v] >= 0)));
1927 ESL_DASSERT1(((jmin[v] == -1 && jmax[v] == -2) || (jmin[v] >= 0 && jmax[v] >= 0)));
1928 if(imin[v] == -1 || jmin[v] == -1) {
1929 imin[v] = jmin[v] = -1;
1930 imax[v] = jmax[v] = -2;
1931 }
1932 if(StateIsDetached(cm, v)) {
1933 imin[v] = jmin[v] = -1;
1934 imax[v] = jmax[v] = -2;
1935 }
1936 if(! do_trunc) {
1937 if(cm->sttype[v] == MP_st) {
1938 if(jmax[v] == i0) { /* HMM tells us right half of MP state must emit first residue in the sequence, we know better, make state unreachable */
1939 ESL_DASSERT1((jmin[v] == i0));
1940 imin[v] = jmin[v] = -1; /* ignore hmm */
1941 imax[v] = jmax[v] = -2; /* ignore hmm */
1942 }
1943 else if (jmin[v] == i0) { /* HMM tells us right half of MP state could possibly first residue (i0), but we know it can't (see comment above). */
1944 jmin[v]++; /* pad 1 onto what the hmm thought, make first emittable residue i0+1 */
1945 /* leave jmax[v] alone, we konw it's not == i0, we checked for that case above */
1946 }
1947 }
1948 /* if a left emitter, enforce jmin/jmax require at least 1 residue is emitted */
1949 if((StateLeftDelta(cm->sttype[v]) == 1) && imin[v] != -1) {
1950 if(jmax[v] == (i0-1)) { /* HMM bands implied state must be entered after emitting exactly 0 residues, we know better, make it unreachable */
1951 ESL_DASSERT1((jmin[v] == (i0-1)));
1952 imin[v] = jmin[v] = -1; /* ignore hmm */
1953 imax[v] = jmax[v] = -2; /* ignore hmm */
1954 }
1955 else if (jmin[v] == (i0-1)) {
1956 jmin[v] = i0; /* pad 1 onto what the hmm thought */
1957 /* leave jmax[v] alone, we know it's not == i0-1, we checked for that case above */
1958 }
1959 }
1960 }
1961 }
1962
1963 #if 0
1964 if(do_trunc) {
1965 for(v = 0; v < cm->M; v++) {
1966 printf("dotrunc: %d ijband v: %4d nd: %4d %4s %2s i: %5d - %5d j: %5d - %5d\n", do_trunc, v, cm->ndidx[v],
1967 Nodetype(cm->ndtype[cm->ndidx[v]]),
1968 Statetype(cm->sttype[v]),
1969 imin[v], imax[v], jmin[v], jmax[v]);
1970 }
1971 }
1972 #endif
1973
1974 /* A final, brutal hack. If the hmm used to derive bands has local
1975 * begins, ends and ELs on, it's possible (but extremely rare
1976 * empirically, even with very high tau values (0.49!)) that no
1977 * valid CM parse exists within the i and j bands. To avoid this, if
1978 * the CM has local begins and ends on then we use a brutal hack
1979 * here to enable at least one valid parse from the ROOT_S to a
1980 * state from which the EL can be reached and able to emit all
1981 * residues.
1982 *
1983 * There's 2 relevant cases.
1984 *
1985 * Case 1: node 1 is a MATP, MATR, or MATL node (this is the easier case)
1986 * Case 2: node 1 is a BIF node
1987 *
1988 * Case 1: node 1 is a MATP, MATR, or MATL node (this is the easier case)
1989 * A. assert CM local begins and ends are on (they should be if we're using a localized HMM to get bands).
1990 * and we can do a local begin into and a local end out of node 1. This will be TRUE unless there
1991 * are only 3 nodes in the CM (which is impossible, cmbuild won't build a 3 node CM - the reason is that
1992 * such a CM would suck at local alignment b/c no local ends are possible (not to mention they're too small
1993 * to be useful, and that if node 1 == MATL the CM can only emit/align 1 residue in local mode b/c the
1994 * ROOT_IL, ROOT_IR are unreachable and the MATL_IL is detached!).
1995 *
1996 * B. if we're doing alignment (full target must be accounted for):
1997 * v = cm->nodemap[nd]
1998 * set imin[v] = ESL_MIN(imin[v], i0)
1999 * imax[v] = ESL_MAX(imax[v], i0)
2000 * jmin[v] = ESL_MIN(jmin[v], j0)
2001 * jmax[v] = ESL_MAX(jmax[v], j0)
2002 * else if we're doing search and v is unreachable, make it reachable by setting
2003 * imin[v] = imin[0];
2004 * imax[v] = imax[0];
2005 * jmin[v] = jmin[0];
2006 * jmax[v] = jmax[0];
2007 * then we'll be able to emit some residues from v, (so we're guaranteed a valid parse.)
2008 *
2009 * Case 2: node 1 is a BIF node
2010 * v = cm->nodemap[nd] (the BIF_B state)
2011 * if v is reachable and we're doing alignment, expand it's bands so that it can
2012 * account for the full seq:
2013 * set imin[v] = ESL_MIN(imin[v], i0)
2014 * imax[v] = ESL_MAX(imax[v], i0)
2015 * jmin[v] = ESL_MIN(jmin[v], j0)
2016 * jmax[v] = ESL_MAX(jmax[v], j0)
2017 * else if v is reachable and we're doing search, ensure that one contiguous chunk of
2018 * seq can be emitted by BIF's children (see code)
2019 *
2020 * if v is not reachable (if we're doing search or not), we enforce 1 valid parse,
2021 * the BIF must emit the full target, residues i0..j0-1 from its' BEGL_S's EL state, and
2022 * residue j0 from it's BEGR_S EL state.
2023 */
2024 if(hmm_is_localized && cm_is_fully_localized) {
2025 if(do_trunc) cp9b->Jvalid[0] = TRUE;
2026 if(imin[0] == -1) { /* ROOT_S is unreachable, uhh... */
2027 imin[0] = imax[0] = i0;
2028 jmin[0] = jmax[0] = j0;
2029 }
2030 if(cm->nodes == 3) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "cp9_HMM2ijBands(), cm/hmm are locally configured, only 3 nodes in the CM, this is an illegal CM b/c local ENDs are impossible.");
2031 nd = 1;
2032 if(i0 == j0) {
2033 while((nd < cm->nodes) && (cm->ndtype[nd] == MATP_nd)) nd++; /* a local begin into a MATP_MP state can't happen when the target is 1 residue, it must emit 2 residues */
2034 if(cm->ndtype[nd] == END_nd) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "cp9_HMM2ijBands(), CM has no MATL, MATR or BIF nodes, this shouldn't happen (cmbuild forbids it)!\n");
2035 }
2036 if(cm->ndtype[nd] == BIF_nd) {
2037 v = cm->nodemap[nd];
2038 w = cm->cfirst[v]; /* BEGL_S */
2039 y = cm->cnum[v]; /* BEGR_S */
2040 if(do_trunc) {
2041 cp9b->Jvalid[v] = TRUE;
2042 cp9b->Jvalid[w] = TRUE;
2043 cp9b->Jvalid[y] = TRUE;
2044 }
2045 if(imin[v] != -1 && imin[w] != -1 && imin[y] != -1) { /* v and its children w and y are all reachable */
2046 if(!doing_search) { /* we need to be able to account for the full sequence */
2047 imin[v] = ESL_MIN(imin[v], i0);
2048 imax[v] = ESL_MAX(imax[v], i0);
2049 jmin[v] = ESL_MIN(jmin[v], j0);
2050 jmax[v] = ESL_MAX(jmax[v], j0);
2051 imin[w] = imin[v];
2052 imax[w] = imax[v];
2053 jmax[w] = ESL_MAX(jmax[w], ESL_MIN(j0, imax[w]));
2054 jmin[y] = jmin[v];
2055 jmax[y] = jmax[v];
2056 imin[y] = ESL_MIN(imin[y], ESL_MAX(i0, jmin[y]));
2057 /* now ensure that imin[y] <= jmax[w]+1, so we can definitely emit the full seq */
2058 imin[y] = ESL_MIN(imin[y], ESL_MAX(i0, jmax[w]+1));
2059 imax[y] = ESL_MAX(imin[y], imax[y]);
2060 }
2061 else { /* doing search, we only need to be able to emit some range of residues from BEGL and BEGR's EL states */
2062 imin[y] = ESL_MIN(imin[y], ESL_MAX(i0, jmax[w]+1));
2063 imax[y] = ESL_MAX(imin[y], imax[y]);
2064 }
2065 } /* end of if(imin[v] != -1) */
2066 else { /* v, w or y are unreachable, make them reachable */
2067 if(! doing_search) {
2068 /* if we're doing alignment, we enforce that the full seq must be emittable
2069 * by BIF and it's children's (BEGL_S and BEGR_S) EL states */
2070 imin[v] = i0;
2071 imax[v] = i0;
2072 jmin[v] = j0;
2073 jmax[v] = j0;
2074 imin[w] = i0; /* w will emit i0..j0-1 (which may be 0 residues if i0==j0) */
2075 imax[w] = i0;
2076 jmin[w] = j0-1;
2077 jmax[w] = j0-1;
2078 imin[y] = j0; /* y will emit only j0 */
2079 imax[y] = j0;
2080 jmin[y] = j0;
2081 jmax[y] = j0;
2082 }
2083 else {
2084 /* if we're doing search we enforce that the residues from imin[0]..jmax[0] are emittable
2085 * by BIF and it's children's (BEGL_S and BEGR_S) EL states */
2086 imin[v] = imin[0];
2087 imax[v] = imin[0];
2088 jmin[v] = jmax[0];
2089 jmax[v] = jmax[0];
2090 imin[w] = imin[0]; /* w will emit imin[0]..jmax[0]-1 (which may be 0 residues if imin[0]==jmax[0]) */
2091 imax[w] = imin[0];
2092 jmin[w] = jmax[0]-1;
2093 jmax[w] = jmax[0]-1;
2094 imin[y] = jmax[0]; /* y will emit only jmax[0] */
2095 imax[y] = jmax[0]; /* y will emit only jmax[0] */
2096 jmin[y] = jmax[0];
2097 jmax[y] = jmax[0];
2098 }
2099 }
2100 } /* end of if(cm->ndtype[nd] == BIF_nd) */
2101 else {
2102 /* node nd is a MATL, MATR or MATP */
2103 ESL_DASSERT1((cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd));
2104 assert(cm->ndtype[nd] == MATL_nd || cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd);
2105 v = cm->nodemap[nd];
2106 if(do_trunc) cp9b->Jvalid[v] = TRUE;
2107 /* we can do a local begin into and local end out of v */
2108 ESL_DASSERT1((NOT_IMPOSSIBLE(cm->beginsc[v])));
2109 ESL_DASSERT1((NOT_IMPOSSIBLE(cm->endsc[v])));
2110 assert(NOT_IMPOSSIBLE(cm->beginsc[v]));
2111 assert(NOT_IMPOSSIBLE(cm->endsc[v]));
2112 if(!doing_search) { /* we need to be able to account for the full sequence */
2113 if(imin[v] == -1) { /* v is unreachable, make it reachable only for emitting the full seq */
2114 imin[v] = imax[v] = i0;
2115 jmin[v] = jmax[v] = j0;
2116 }
2117 else { /* v is reachable, expand it's band so it can emit the full seq */
2118 imin[v] = ESL_MIN(imin[v], i0);
2119 imax[v] = ESL_MAX(imax[v], i0);
2120 jmin[v] = ESL_MIN(jmin[v], j0);
2121 jmax[v] = ESL_MAX(jmax[v], j0);
2122 }
2123 }
2124 else { /* doing search, do not need to account for full target sequence, make it so we can reach v for some i and j (this will guarantee >= 1 valid parse) */
2125 if(imin[v] == -1) { /* v is unreachable */
2126 imin[v] = imin[0];
2127 imax[v] = imax[0];
2128 jmin[v] = jmin[0];
2129 jmax[v] = jmax[0];
2130 }
2131 else { /* v is reachable, make sure it's reachable from the ROOT_S state, expand the ROOT_S band */
2132 imin[0] = ESL_MIN(imin[0], imin[v]);
2133 imax[0] = ESL_MAX(imax[0], imax[v]);
2134 jmin[0] = ESL_MIN(jmin[0], jmin[v]);
2135 jmax[0] = ESL_MAX(jmax[0], jmax[v]);
2136 }
2137 }
2138 }
2139 }
2140 /* end of brutal hack */
2141 #if eslDEBUGLEVEL >= 1
2142 /* check for valid CM parse, there should be one, unless do_trunc is true, then we may not... */
2143 if((status = CMBandsCheckValidParse(cm, cp9b, errbuf, i0, j0, doing_search)) != eslOK) return status;
2144 #endif
2145
2146 esl_stack_Destroy(nd_pda);
2147 esl_stack_Destroy(lpos_pda);
2148 free(r_mn);
2149 free(r_mx);
2150 free(r_dn);
2151 free(r_dx);
2152 free(r_in);
2153 free(r_ix);
2154 free(r_nn_i);
2155 free(r_nx_i);
2156 free(r_nn_j);
2157 free(r_nx_j);
2158
2159 return eslOK;
2160
2161 ERROR:
2162 ESL_FAIL(status, errbuf, "Memory allocation error.\n");
2163 }
2164
2165 /* Function: HMMBandsEnforceValidParse()
2166 * Incept: EPN, Fri Feb 1 16:46:50 2008
2167 *
2168 * Purpose: Given bands on HMM states for a target sequence,
2169 * check for a valid HMM parse within those bands.
2170 * If no valid parse exists, expand the bands such that
2171 * one does exist, in a greedy manner.
2172 *
2173 * Bands are expanded using the HMMBandsFixUnreachable()
2174 * function. These take a node that is unreachable
2175 * and modify bands on current node and nearby nodes
2176 * to make it reachable. This is awful hack number 1.
2177 * (see HMMBandsFixUnreachable() for details.
2178 * Note: the technique used for expanding the bands was
2179 * selected for it's relative simplicity. It does not
2180 * expand the bands in any smart way that is aware of
2181 * probability mass or score of the newly possible parses
2182 * during the band expansion. You could try to do that,
2183 * but it's not likely to be worth it, when the default
2184 * bands before expansion do not allow a single parse,
2185 * the real solution is to lower tau, the tail loss parameter
2186 * used during band calculation. This function is really
2187 * only nec so the HMM banding technique is robust to
2188 * high values of tau, higher than any reasonable application
2189 * should use.
2190 *
2191 * Awful hack #2 occurs when two different transitions to the
2192 * same state imply reachable bands that have a 'gap' in the
2193 * middle. For example if node D_3 can reach node M_4 with
2194 * i = 3 or 4, and node M_3 can reach node M_4 with i equal
2195 * to 6 or 7. This means that node M_4 cannot be reached for
2196 * i == 5, but this implementation is much easier if we can
2197 * just set the reachable band for M_4 to 3..7. So, that's
2198 * what we do, and we doctor the band of I_3 so that M_4 *can*
2199 * be reached for i == 5. This is done in HMMBandsFillGap().
2200 * See that function for details. This hack is only performed
2201 * for models NOT in local mode. If we are in local mode,
2202 * this 'gap' situation comes up much more often, but when
2203 * we're in local mode, we can use an EL state in the CM
2204 * to basically always get a valid parse, so we're not
2205 * so worried about enforcing a valid parse and we skip
2206 * this hack.
2207 *
2208 * Args: cp9 - the HMM the bands were derived from
2209 * cp9b - the CP9 bands object
2210 * cp9map - map from CM to cp9
2211 * errbuf - for error messages
2212 * i0 - first residue of sequence we're using bands for
2213 * j0 - final residue of sequence we're using bands for
2214 *
2215 * Returns: eslOK on success
2216 * eslEINCONCEIVABLE if we can't expand the bands to make a valid parse (shouldn't happen)
2217 * eslEMEM if a memory allocation error occurs
2218 * <ret_did_expand> set to TRUE if we had to expand the HMM bands, FALSE if not
2219 */
2220 int
HMMBandsEnforceValidParse(CP9_t * cp9,CP9Bands_t * cp9b,CP9Map_t * cp9map,char * errbuf,int i0,int j0,int doing_search,int * ret_did_expand,int ** ret_r_mn,int ** ret_r_mx,int ** ret_r_in,int ** ret_r_ix,int ** ret_r_dn,int ** ret_r_dx,int ** ret_r_nn_i,int ** ret_r_nx_i,int ** ret_r_nn_j,int ** ret_r_nx_j)2221 HMMBandsEnforceValidParse(CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, char *errbuf, int i0, int j0, int doing_search, int *ret_did_expand,
2222 int **ret_r_mn, int **ret_r_mx, int **ret_r_in, int **ret_r_ix, int **ret_r_dn, int **ret_r_dx,
2223 int **ret_r_nn_i, int **ret_r_nx_i, int **ret_r_nn_j, int **ret_r_nx_j)
2224 {
2225 int status;
2226 /* r_* arrays, these are the bands on 'reachable' residues for each HMM state as we move
2227 * from left to right through the HMM.
2228 * For example, r_mn[k] = 3, r_mx[k] = 5, means that for all possible HMM parses within the bands
2229 * in the cp9b pn_* arrays that reach the match state of node k, the residue emitted by that match
2230 * must be either 3, 4, or 5.
2231 */
2232 int *r_mn; /* [0..k..hmm_M] minimal residue position for which we can reach M_k (match state of node k) */
2233 int *r_mx; /* [0..k..hmm_M] maximal residue position for which we can reach M_k */
2234 int *r_in; /* [0..k..hmm_M] minimal residue position for which we can reach I_k (insert state of node k) */
2235 int *r_ix; /* [0..k..hmm_M] maximal residue position for which we can reach I_k */
2236 int *r_dn; /* [0..k..hmm_M] minimal residue position for which we can reach D_k (delete state of node k) */
2237 int *r_dx; /* [0..k..hmm_M] maximal residue position for which we can reach D_k */
2238 int r_begn; /* minimal first residue position for which we can exit the BEGIN state */
2239 int r_begx; /* minimal first residue position for which we can exit the BEGIN state */
2240 int r_endn; /* minimal final residue position for which we can reach the END state */
2241 int r_endx; /* maximal final residue position for which we can reach the END state */
2242 int *r_nn_i; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2243 int *r_nx_i; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2244 int *r_nn_j; /* [0..k..hmm_M] minimal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2245 int *r_nx_j; /* [0..k..hmm_M] maximal residue position for which we can reach node k (any of M_k, I_k, D_k) */
2246 /* r_nn_i and r_nx_i are used when setting i bands, and r_nn_j and r_nx_j are used when setting j bands .
2247 * The values can differ vecause of an off-by-one issue with the non-emitting (delete and M_0) states of the HMM:
2248 * pn_min_d[k] = i, means posn i was last residue emitted prior to entering node k's delete state. However, for a CM,
2249 * if a delete states sub-parsetree is bounded by i' and j', this means positions i' and j' HAVE YET TO BE EMITTED.
2250 * For i states this means we have to add 1 to the delete band positions, but for j states we do not, the off-by-one
2251 * is taken care of because the HMM is moving left to right, while j positions move right to left (confusing as hell,
2252 * bad explanation, i know... write out an example, it's the only way to get it).
2253 */
2254 int *r_nn_hmm; /* [0..k..hmm_M] min reachable position i in HMM node k from the HMM's perspective */
2255 int *r_nx_hmm; /* [0..k..hmm_M] max reachable position i in HMM node k from the HMM's perspective */
2256 int *was_unr; /* [0..k..hmm_M] TRUE if node k was unreachable, then we expanded bands, now it should be reachable */
2257 int *filled_gap; /* [0..k..hmm_M] TRUE if we filled a gap in the reachable bands for node k */
2258 int just_filled_gap; /* TRUE if we filled a gap for the current node */
2259 int hmm_M = cp9b->hmm_M; /* number of nodes in the model */
2260 int k, kp; /* node counters */
2261 int n; /* a temporary minimum residue position */
2262 int x; /* a temporary maximum residue position */
2263 int c; /* counter */
2264 int sd; /* state delta, number of emissions for each state */
2265 int local_begins_ends_on; /* TRUE if HMM has local begins (M_0(B) -> M_k for k = 1..M and local ends (M_k -> E) for k = 1..M-1 */
2266 /*int j0_is_reachable = FALSE; */ /* TRUE if we can reach j0 for some node */
2267 /* ptrs to cp9b data, for convenience */
2268 int *pn_min_m; /* pn_min_m[k] = first position in HMM band for match state of HMM node k */
2269 int *pn_max_m; /* pn_max_m[k] = final position in HMM band for match state of HMM node k */
2270 int *pn_min_i; /* pn_min_i[k] = first position in HMM band for insert state of HMM node k */
2271 int *pn_max_i; /* pn_max_i[k] = final position in HMM band for insert state of HMM node k */
2272 int *pn_min_d; /* pn_min_d[k] = first position in HMM band for delete state of HMM node k */
2273 int *pn_max_d; /* pn_max_d[k] = final position in HMM band for delete state of HMM node k */
2274
2275 if((cp9->flags & CPLAN9_LOCAL_BEGIN) && (! (cp9->flags & CPLAN9_LOCAL_END))) ESL_FAIL(eslEINCOMPAT, errbuf, "HMMBandsEnforceValidParse(), HMM has local begins ON but local ends OFF. Both must be on, or both must be off.");
2276 local_begins_ends_on = ((cp9->flags & CPLAN9_LOCAL_BEGIN) && (cp9->flags & CPLAN9_LOCAL_END)) ? TRUE : FALSE;
2277
2278 pn_min_m = cp9b->pn_min_m;
2279 pn_max_m = cp9b->pn_max_m;
2280 pn_min_i = cp9b->pn_min_i;
2281 pn_max_i = cp9b->pn_max_i;
2282 pn_min_d = cp9b->pn_min_d;
2283 pn_max_d = cp9b->pn_max_d;
2284
2285 /* allocate and initialize */
2286 ESL_ALLOC(r_mn, sizeof(int) * (hmm_M+1));
2287 ESL_ALLOC(r_mx, sizeof(int) * (hmm_M+1));
2288 ESL_ALLOC(r_in, sizeof(int) * (hmm_M+1));
2289 ESL_ALLOC(r_ix, sizeof(int) * (hmm_M+1));
2290 ESL_ALLOC(r_dn, sizeof(int) * (hmm_M+1));
2291 ESL_ALLOC(r_dx, sizeof(int) * (hmm_M+1));
2292 ESL_ALLOC(r_nn_i, sizeof(int) * (hmm_M+1));
2293 ESL_ALLOC(r_nx_i, sizeof(int) * (hmm_M+1));
2294 ESL_ALLOC(r_nn_j, sizeof(int) * (hmm_M+1));
2295 ESL_ALLOC(r_nx_j, sizeof(int) * (hmm_M+1));
2296 ESL_ALLOC(r_nn_hmm, sizeof(int) * (hmm_M+1));
2297 ESL_ALLOC(r_nx_hmm, sizeof(int) * (hmm_M+1));
2298
2299 for(k = 0; k <= hmm_M; k++) {
2300 r_mn[k] = r_in[k] = r_dn[k] = r_nn_i[k] = r_nn_j[k] = r_nn_hmm[k] = INT_MAX;
2301 r_mx[k] = r_ix[k] = r_dx[k] = r_nx_i[k] = r_nx_j[k] = r_nx_hmm[k] = INT_MIN;
2302 }
2303 r_begn = INT_MAX;
2304 r_begx = INT_MIN;
2305 r_endn = INT_MAX;
2306 r_endx = INT_MIN;
2307
2308 ESL_ALLOC(was_unr, sizeof(int) * (hmm_M+1));
2309 ESL_ALLOC(filled_gap, sizeof(int) * (hmm_M+1));
2310 esl_vec_ISet(was_unr, (hmm_M+1), FALSE);
2311 esl_vec_ISet(filled_gap, (hmm_M+1), FALSE);
2312
2313 /* Note on comment nomenclature:
2314 * M_k: match state of node k
2315 * I_k: insert state of node k
2316 * D_k: detele state of node k
2317 */
2318
2319 if(! doing_search) assert(pn_min_m[0] == (i0-1));
2320 if(pn_min_m[0] != -1) {
2321 r_mn[0] = pn_min_m[0]; /* initialize min reachable residue for M_0 as pn_min_m[0] */
2322 r_mx[0] = pn_max_m[0]; /* initialize min reachable residue for M_0 as pn_max_m[0] */
2323 }
2324
2325 /* The main loop: for each node, for each state, determine which residues are reachable given
2326 * the reachable residues for the states in the previous node and current node.
2327 * The order is important: first we account for all transitions to the insert state of the same
2328 * node, as the reachable band on the insert will affect later transitions.
2329 * Then we do all transitions to the match of the next node, and finally to the delete of the
2330 * next node.
2331 */
2332 for(k = 0; k <= hmm_M; k++) {
2333 if(pn_min_m[k] == -1) ESL_DASSERT1((pn_max_m[k] == -1));
2334 if(pn_min_i[k] == -1) ESL_DASSERT1((pn_max_i[k] == -1));
2335 if(pn_min_d[k] == -1) ESL_DASSERT1((pn_max_d[k] == -1));
2336 just_filled_gap = FALSE;
2337
2338 /* transitions to insert of node k (I_k) */
2339 if(r_mn[k] <= r_mx[k]) { /* M_k is reachable */
2340 /* M_k->I_k transition */
2341 if(pn_min_i[k] != -1) {
2342 n = r_mn[k]+1;
2343 x = r_mx[k]+1;
2344 if((ESL_MIN(x, pn_max_i[k]) - ESL_MAX(n, pn_min_i[k])) >= 0) { /* TRUE if n..x overlaps with pn_min_i[k]..pn_max_i[k] by at least 1 residue */
2345 n = ESL_MAX(n, pn_min_i[k]); /* n can't be less than pn_min_i[k] */
2346 n = ESL_MIN(n, pn_max_i[k]); /* n can't be more than pn_max_i[k] */
2347 x = ESL_MIN(x, pn_max_i[k]); /* x can't be more than pn_max_i[k] */
2348 /* no need to check if we need to fill a gap, not an issue for inserts which can self-transit and fill their own gaps */
2349 r_in[k] = ESL_MIN(r_in[k], n);
2350 r_ix[k] = ESL_MAX(r_ix[k], x);
2351 ESL_DASSERT1((r_in[k] <= r_ix[k]));
2352 }
2353 }
2354 }
2355 if(r_dn[k] <= r_dx[k]) {
2356 /* D_k->I_k transition */
2357 if(pn_min_i[k] != -1) {
2358 n = r_dn[k]+1;
2359 x = r_dx[k]+1;
2360 if((ESL_MIN(x, pn_max_i[k]) - ESL_MAX(n, pn_min_i[k])) >= 0) { /* TRUE if n..x overlaps with pn_min_i[k]..pn_max_i[k] by at least 1 residue */
2361 n = ESL_MAX(n, pn_min_i[k]); /* n can't be less than pn_min_i[k] */
2362 n = ESL_MIN(n, pn_max_i[k]); /* n can't be more than pn_max_i[k] */
2363 x = ESL_MIN(x, pn_max_i[k]); /* x can't be more than pn_max_i[k] */
2364 /* no need to check if we need to fill a gap, not an issue for inserts which can self-transit and fill their own gaps */
2365 r_in[k] = ESL_MIN(r_in[k], n);
2366 r_ix[k] = ESL_MAX(r_ix[k], x);
2367 ESL_DASSERT1((r_in[k] <= r_ix[k]));
2368 }
2369 }
2370 }
2371 if(r_in[k] <= r_ix[k]) {
2372 /* I_k -> I_k transition */
2373 ESL_DASSERT1((r_ix[k] <= pn_max_i[k]));
2374 /* I_k->I_k transition (first b/c self transitions are possible) */
2375 if(pn_min_i[k] != -1) { /* special case, self emitter, if we can enter this INSERT state, for any valid residue, we can emit residues until we reach pn_max_i[k] */
2376 if(r_in[k] <= pn_max_i[k]) { /* we can reach this insert for i == r_in[k], then emit until pn_max_i[k] */
2377 r_ix[k] = pn_max_i[k];
2378 }
2379 else {
2380 r_in[k] = INT_MAX;
2381 r_ix[k] = INT_MIN;
2382 }
2383 }
2384 }
2385 /* done with transitions to I_k */
2386
2387 /* transitions to match of node k+1 (M_k+1) */
2388 if(k < hmm_M) { /* state M_M+1 is special, it's the END state, we deal with that below */
2389 if(r_mn[k] <= r_mx[k]) {
2390 /* M_k->M_k+1 transition */
2391 if(pn_min_m[k+1] != -1) {
2392 n = r_mn[k]+1;
2393 x = r_mx[k]+1;
2394 if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2395 n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2396 n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2397 x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2398 if(r_mn[k+1] != INT_MAX) {
2399 if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2400 /* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2401 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2402 just_filled_gap = TRUE;
2403 }
2404 }
2405 r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2406 r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2407 ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2408 }
2409 }
2410 }
2411 /* D_k->M_k+1 transition */
2412 if(r_dn[k] <= r_dx[k]) {
2413 if(pn_min_m[k+1] != -1) {
2414 n = r_dn[k]+1;
2415 x = r_dx[k]+1;
2416 if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2417 n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2418 n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2419 x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2420 if(r_mn[k+1] != INT_MAX) {
2421 if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2422 /* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2423 ESL_DASSERT1((k != 0));
2424 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2425 just_filled_gap = TRUE;
2426 }
2427 }
2428 r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2429 r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2430 ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2431 }
2432 }
2433 }
2434 /* I_k->M_k+1transition */
2435 if(r_in[k] <= r_ix[k]) {
2436 if(pn_min_m[k+1] != -1) {
2437 n = r_in[k]+1;
2438 x = r_ix[k]+1;
2439 if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2440 n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2441 n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2442 x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2443 if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2444 /* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2445 ESL_DASSERT1((k != 0));
2446 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2447 just_filled_gap = TRUE;
2448 }
2449 r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2450 r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2451 ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2452 }
2453 }
2454 }
2455 /* EL_kp->M_k+1 transition, we could have come from 1 or more EL states */
2456 if(cp9->flags & CPLAN9_EL) {
2457 if(pn_min_m[k+1] != -1) {
2458 for(c = 0; c < cp9->el_from_ct[k+1]; c++) { /* el_from_ct[k+1] holds # ELs that can go to k+1 */
2459 kp = cp9->el_from_idx[k+1][c];
2460 if(r_mn[kp] <= r_mx[kp]) {
2461 n = r_mn[kp]; /* EL's can emit 0 or more residues */
2462 x = j0; /* EL's can emit 0 or more residues */
2463 if((ESL_MIN(x, pn_max_m[k+1]) - ESL_MAX(n, pn_min_m[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_m[k+1]..pn_max_m[k+1] by at least 1 residue */
2464 n = ESL_MAX(n, pn_min_m[k+1]); /* n can't be less than pn_min_m[k+1] */
2465 n = ESL_MIN(n, pn_max_m[k+1]); /* n can't be more than pn_max_m[k+1] */
2466 x = ESL_MIN(x, pn_max_m[k+1]); /* x can't be more than pn_max_m[k+1] */
2467 if(r_mn[k+1] != INT_MAX) {
2468 if(!local_begins_ends_on && ESL_MIN(x, r_mx[k+1]) - ESL_MAX(n, r_mn[k+1]) < -1) {
2469 /* there's a 'gap' of >= 1 residue between n..x and r_mn[k+1].._r_mx[k+1], fill the gap by expanding band of I_k */
2470 ESL_DASSERT1((k != 0));
2471 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_mn[k+1], r_mx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2472 just_filled_gap = TRUE;
2473 }
2474 }
2475 r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2476 r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2477 ESL_DASSERT1((r_mn[k+1] <= r_mx[k+1]));
2478 }
2479 }
2480 }
2481 }
2482 }
2483 /* Begin ->M_k+1 transition, if local begins are on, we could go B->M_k+1, this is always true if M_k+1 is reachbale and (doing_search),
2484 * if we're doing alignment this is true only if the first residue is within the band on M_k+1
2485 */
2486 if(local_begins_ends_on) {
2487 if(pn_min_m[k+1] != -1) {
2488 if(doing_search) {
2489 n = pn_min_m[k+1];
2490 x = pn_max_m[k+1];
2491 r_mn[k+1] = ESL_MIN(r_mn[k+1], n);
2492 r_mx[k+1] = ESL_MAX(r_mx[k+1], x);
2493 }
2494 else { /* doing alignment, we can only do a local begin into M_k+1 if the first residue is within it's band */
2495 if(pn_min_m[k+1] == r_mn[0]+1) {
2496 r_mn[k+1] = ESL_MIN(r_mn[k+1], r_mn[0]+1);
2497 r_mx[k+1] = ESL_MAX(r_mx[k+1], r_mn[0]+1); /* not a typo, r_mx[0] == r_mn[0] (it's the begin state) */
2498 }
2499 }
2500 }
2501 }
2502 } /* end of if(k < hmm_M) */
2503 /* transitions to END state */
2504 if(k == hmm_M || local_begins_ends_on) { /* handle transitions from M_k to END */
2505 if(r_mn[k] <= r_mx[k] && cp9->esc[k] != -INFTY) { /* if M_k is reachable and we're allowed to transit to E */
2506 /* M_k->E transition */
2507 n = r_mn[k];
2508 x = r_mx[k];
2509 /* note: we don't have to worry about filling gaps here (that is if gap of >= 1 residue between [n..x] and [r_endn..r_endx]
2510 * because end state is last state we care about, if we can reach it for residue in n..x or r_endn..r_endx, set band to
2511 * include all those residues (min(n, r_end_n)..max(x, r_endx)) is harmless, a CM parse WILL exist for some residue in that range
2512 * and the CM will be able to find it */
2513 r_endn = ESL_MIN(r_endn, n);
2514 r_endx = ESL_MAX(r_endx, x);
2515 /*////printf("0 r_endn,x: %d..%d (k: %d) \n", r_endn, r_endx, k);*/
2516 ESL_DASSERT1((r_endn <= r_endx));
2517 }
2518 }
2519 if(k == hmm_M) { /* if we're at the last node, we could also get to END from D_k, or I_k */
2520 if(r_dn[k] <= r_dx[k] && cp9->tsc[CTDM][k] != -INFTY) { /* if D_k is reachable and we're allowed to transit to E */
2521 /* D_M->E transition */
2522 n = r_dn[k];
2523 x = r_dx[k];
2524 /* note: we don't have to worry about filling gaps here (see more verbose comment above for M_k->E transition) */
2525 r_endn = ESL_MIN(r_endn, n);
2526 r_endx = ESL_MAX(r_endx, x);
2527 /*////printf("1 r_endn,x: %d..%d\n", r_endn, r_endx);*/
2528 ESL_DASSERT1((r_endn <= r_endx));
2529 }
2530 if(r_in[k] <= r_ix[k] && cp9->tsc[CTIM][k] != -INFTY) { /* if I_k is reachable and we're allowed to transit to E */
2531 /* I_M->E transition */
2532 n = r_in[k];
2533 x = r_in[k];
2534 /* note: we don't have to worry about filling gaps here (see more verbose comment above for M_k->E transition) */
2535 r_endn = ESL_MIN(r_endn, n);
2536 r_endx = ESL_MAX(r_endx, x);
2537 /*////printf("2 r_endn,x: %d..%d\n", r_endn, r_endx);*/
2538 ESL_DASSERT1((r_endn <= r_endx));
2539 }
2540 /* finally, deal with the possibility that we go to E from an EL state */
2541 if(cp9->flags & CMH_LOCAL_END) {
2542 for(c = 0; c < cp9->el_from_ct[k+1]; c++) { /* el_from_ct[k+1] holds # ELs that can go to k+1 */
2543 kp = cp9->el_from_idx[k+1][c];
2544 if(r_mn[kp] <= r_mx[kp]) {
2545 n = r_mn[kp]; /* EL's can emit 0 or more residues */
2546 x = j0;
2547 r_endn = ESL_MIN(r_endn, n);
2548 r_endx = ESL_MAX(r_endx, x);
2549 /*////printf("3 c: %d..%d r_endn: %d\n", c, r_endn, r_endx);*/
2550 ESL_DASSERT1((r_endn <= r_endx));
2551 }
2552 }
2553 }
2554 } /* end of if k == hmm_M, done with transitions to match of node k+1 */
2555
2556 /* transitions to delete of node k+1 (D_k+1)*/
2557 if(k < hmm_M) {
2558 /* M_k -> D_k+1 transition */
2559 if(r_mn[k] <= r_mx[k]) {
2560 if(pn_min_d[k+1] != -1) {
2561 n = r_mn[k];
2562 x = r_mx[k];
2563 if((ESL_MIN(x, pn_max_d[k+1]) - ESL_MAX(n, pn_min_d[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_d[k+1]..pn_max_d[k+1] by at least 1 residue */
2564 n = ESL_MAX(n, pn_min_d[k+1]); /* n can't be less than pn_min_d[k+1] */
2565 n = ESL_MIN(n, pn_max_d[k+1]); /* n can't be more than pn_max_d[k+1] */
2566 x = ESL_MIN(x, pn_max_d[k+1]); /* x can't be more than pn_max_d[k+1] */
2567 if(r_dn[k+1] != INT_MAX) {
2568 if(!local_begins_ends_on && ESL_MIN(x, r_dx[k+1]) - ESL_MAX(n, r_dn[k+1]) < -1) {
2569 /* there's a 'gap' of >= 1 residue between n..x and r_dn[k+1].._r_dx[k+1], fill the gap by expanding band of I_k */
2570 ESL_DASSERT1((k != 0));
2571 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_dn[k+1], r_dx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2572 just_filled_gap = TRUE;
2573 }
2574 }
2575 r_dn[k+1] = ESL_MIN(r_dn[k+1], n);
2576 r_dx[k+1] = ESL_MAX(r_dx[k+1], x);
2577 ESL_DASSERT1((r_dn[k+1] <= r_dx[k+1]));
2578 }
2579 }
2580 }
2581 /* I_k -> D_k+1 transition */
2582 if(r_in[k] <= r_ix[k]) {
2583 /* I_k->D_k+1 transition */
2584 if(pn_min_d[k+1] != -1) {
2585 n = r_in[k];
2586 x = r_ix[k];
2587 if((ESL_MIN(x, pn_max_d[k+1]) - ESL_MAX(n, pn_min_d[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_d[k+1]..pn_max_d[k+1] by at least 1 residue */
2588 n = ESL_MAX(n, pn_min_d[k+1]); /* n can't be less than pn_min_d[k+1] */
2589 n = ESL_MIN(n, pn_max_d[k+1]); /* n can't be more than pn_max_d[k+1] */
2590 x = ESL_MIN(x, pn_max_d[k+1]); /* x can't be more than pn_max_d[k+1] */
2591 if(r_dn[k+1] != INT_MAX) {
2592 if(!local_begins_ends_on && ESL_MIN(x, r_dx[k+1]) - ESL_MAX(n, r_dn[k+1]) < -1) {
2593 /* there's a 'gap' of >= 1 residue between n..x and r_dn[k+1].._r_dx[k+1], fill the gap by expanding band of I_k */
2594 ESL_DASSERT1((k != 0));
2595 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_dn[k+1], r_dx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2596 just_filled_gap = TRUE;
2597 }
2598 }
2599 r_dn[k+1] = ESL_MIN(r_dn[k+1], n);
2600 r_dx[k+1] = ESL_MAX(r_dx[k+1], x);
2601 ESL_DASSERT1((r_dn[k+1] <= r_dx[k+1]));
2602 }
2603 }
2604 }
2605 /* D_k -> D_k+1 */
2606 if(r_dn[k] <= r_dx[k]) {
2607 if(pn_min_d[k+1] != -1) {
2608 n = r_dn[k];
2609 x = r_dx[k];
2610 if((ESL_MIN(x, pn_max_d[k+1]) - ESL_MAX(n, pn_min_d[k+1])) >= 0) { /* TRUE if n..x overlaps with pn_min_d[k+1]..pn_max_d[k+1] by at least 1 residue */
2611 n = ESL_MAX(n, pn_min_d[k+1]); /* n can't be less than pn_min_d[k+1] */
2612 n = ESL_MIN(n, pn_max_d[k+1]); /* n can't be more than pn_max_d[k+1] */
2613 x = ESL_MIN(x, pn_max_d[k+1]); /* x can't be more than pn_max_d[k+1] */
2614 if(r_dn[k+1] != INT_MAX) {
2615 if(!local_begins_ends_on && ESL_MIN(x, r_dx[k+1]) - ESL_MAX(n, r_dn[k+1]) < -1) { /* FALSE if n..x overlaps with r_mn[k+1].._r_mx[k+1] by at least 1 residue, if FAILs we have to pick to either NOT change r_mn, r_mx, or change them to n and x */
2616 ESL_DASSERT1((k != 0));
2617 if((status = HMMBandsFillGap(cp9b, errbuf, k, n, x, r_dn[k+1], r_dx[k+1], r_mn[k-1], r_dn[k-1])) != eslOK) return status;
2618 just_filled_gap = TRUE;
2619 }
2620 }
2621 r_dn[k+1] = ESL_MIN(r_dn[k+1], n);
2622 r_dx[k+1] = ESL_MAX(r_dx[k+1], x);
2623 ESL_DASSERT1((r_dn[k+1] <= r_dx[k+1]));
2624 }
2625 }
2626 }
2627 }
2628
2629 /* update the reachable-by-node bands, which residues can we reach this node for?
2630 * inside the following if's we don't have to check if r_*n[k], r_*x[k] == INT_MAX or
2631 * INT_MIN, b/c we only enter the ifs if r_*n[k] <= r_*x[k]
2632 */
2633 if(r_mn[k] <= r_mx[k]) { /* M_k is reachable for i = r_mn[k]..r_mx[k] */
2634 r_nn_hmm[k] = ESL_MIN(r_nn_hmm[k], r_mn[k]);
2635 r_nx_hmm[k] = ESL_MAX(r_nx_hmm[k], r_mx[k]);
2636
2637 sd = 1;
2638 if(k != hmm_M) {
2639 r_nn_i[k+1] = ESL_MIN(r_nn_i[k+1], r_mn[k]+sd);
2640 r_nx_i[k+1] = ESL_MAX(r_nx_i[k+1], r_mx[k]+sd);
2641 }
2642 if(k != 0) {
2643 r_nn_j[k-1] = ESL_MIN(r_nn_j[k-1], r_mn[k]-sd);
2644 r_nx_j[k-1] = ESL_MAX(r_nx_j[k-1], r_mx[k]-sd);
2645 }
2646 if((local_begins_ends_on && k > 0) || k == hmm_M) { /* we can go to end from M_k with i from r_mn[k]..r_mx[k] */
2647 if(doing_search) {
2648 r_nn_j[k] = ESL_MIN(r_nn_j[k], r_mn[k]);
2649 r_nx_j[k] = ESL_MAX(r_nx_j[k], r_mx[k]);
2650 }
2651 else { /* have to emit j0 from last match state visited */
2652 if(r_mx[k] == j0) {
2653 r_nn_j[k] = ESL_MIN(r_nn_j[k], j0);
2654 r_nx_j[k] = ESL_MAX(r_nx_j[k], j0);
2655 }
2656 }
2657 }
2658 if((local_begins_ends_on && k > 0) || k == 1) { /* we can go from begin to M_k with i to r_mn[k]..r_mx[k] */
2659 if(doing_search) {
2660 r_nn_i[k] = ESL_MIN(r_nn_i[k], r_mn[k]);
2661 r_nx_i[k] = ESL_MAX(r_nx_i[k], r_mx[k]);
2662 /* superfluous */
2663 r_begn = ESL_MIN(r_begn, r_mn[k]);
2664 r_begx = ESL_MAX(r_begx, r_mx[k]);
2665 }
2666 else { /* have to emit i0 from first match state entered */
2667 if(r_mn[k] == i0) {
2668 r_nn_i[k] = ESL_MIN(r_nn_i[k], i0);
2669 r_nx_i[k] = ESL_MAX(r_nx_i[k], i0);
2670 }
2671 }
2672 }
2673 }
2674 if(r_in[k] <= r_ix[k]) { /* I_k is reachable for i = r_in[k]..r_ix[k] */
2675 r_nn_hmm[k] = ESL_MIN(r_nn_hmm[k], r_in[k]);
2676 r_nx_hmm[k] = ESL_MAX(r_nx_hmm[k], r_ix[k]);
2677
2678 sd = 1;
2679 if(k != hmm_M) {
2680 r_nn_i[k+1] = ESL_MIN(r_nn_i[k+1], r_in[k]+sd);
2681 r_nx_i[k+1] = ESL_MAX(r_nx_i[k+1], r_ix[k]+sd);
2682 }
2683 r_nn_j[k] = ESL_MIN(r_nn_j[k], r_in[k]-sd);
2684 r_nx_j[k] = ESL_MAX(r_nx_j[k], r_ix[k]-sd);
2685
2686 /* superfluous */
2687 if(k == 0) {
2688 r_begn = ESL_MIN(r_begn, r_in[k]);
2689 r_begx = ESL_MAX(r_begx, r_ix[k]);
2690 }
2691 }
2692 if(r_dn[k] <= r_dx[k]) { /* D_k is reachable for i = r_dn[k]..r_dx[k] */
2693 r_nn_hmm[k] = ESL_MIN(r_nn_hmm[k], r_dn[k]);
2694 r_nx_hmm[k] = ESL_MAX(r_nx_hmm[k], r_dx[k]);
2695
2696 sd = 0;
2697 if(k != hmm_M) {
2698 r_nn_i[k+1] = ESL_MIN(r_nn_i[k+1], r_dn[k]+1); /* off-by-one */
2699 r_nx_i[k+1] = ESL_MAX(r_nx_i[k+1], r_dx[k]+1); /* off-by-one */
2700 }
2701 if(k != 0) {
2702 r_nn_j[k-1] = ESL_MIN(r_nn_j[k-1], r_dn[k]);
2703 r_nx_j[k-1] = ESL_MAX(r_nx_j[k-1], r_dx[k]);
2704 }
2705 if(k == 1) {
2706 r_begn = ESL_MIN(r_begn, r_dn[k]+1);
2707 r_begx = ESL_MAX(r_begx, r_dx[k]+1);
2708 }
2709 }
2710 /* is the node reachable? (it doesn't matter if we're in local mode) */
2711 if((!local_begins_ends_on) && (r_mn[k] > r_mx[k]) && (r_dn[k] > r_dx[k])) {
2712 assert(k != 0);
2713 ESL_DASSERT1((just_filled_gap == FALSE));
2714 ESL_DPRINTF1(("#DEBUG: ! HMM node %d is unreachable hmm!\n", k));
2715 if(was_unr[k]) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "HMMBandsEnforceValidParse() node k %d was determined unreachable in second pass! Shouldn't happen (coding error).\n", k);
2716 was_unr[k] = TRUE;
2717 /* expand the bands so k becomes reachable, using a greedy technique */
2718 if((status = HMMBandsFixUnreachable(cp9b, errbuf, k, r_nn_hmm[k-1], r_nx_hmm[k-1], r_in[k-1])) != eslOK) return status;
2719 /* to ensure we can now reach node k, we simply decrement k by 2, then
2720 * we'll reenter the loop above for k=k-1, and check if k is reachable with
2721 * new band on I_k-1. This is unnecessary if the code is right, used here just
2722 * to check.
2723 */
2724 k -= 2;
2725 }
2726 else if(just_filled_gap == TRUE) {
2727 ESL_DPRINTF1(("#DEBUG: ! HMM node %d filled a gap!\n", k));
2728 if(filled_gap[k] == TRUE) ESL_FAIL(eslEINCONCEIVABLE, errbuf, "HMMBandsEnforceValidParse() node k %d needed a gap filled in second pass! Shouldn't happen (coding error).\n", k);
2729 filled_gap[k] = TRUE;
2730 /* to ensure we can now reach node k, we simply decrement k by 2, then
2731 * we'll reenter the loop above for k=k, and check if k is reachable with
2732 * new band on I_k. This is unnecessary if the code is right, used here just
2733 * to check.
2734 */
2735 k -= 1;
2736 }
2737 /*else if(r_nx_hmm[k] == j0) j0_is_reachable = TRUE;*/
2738 }
2739 /* final check, if we're doing alignment, the first residue i0, must be first emitted
2740 * residue, and the final residue, j0 must be final emittable residue. Enforce it.
2741 */
2742 if(! doing_search) {
2743 r_begn = i0;
2744 r_begx = i0;
2745 r_endn = j0;
2746 r_endx = j0;
2747 }
2748
2749 /* A hack! set r_nn_j[hmm_M] to rend_n and r_nx_j[hmm_M] to rend_x, b/c we
2750 * only use r_nn_j[hmm_M] and r_nx_j[hmm_M] to set j bands on states of non-right
2751 * emitting CM nodes (non-MATR MATP nodes) and we need the ones above all non
2752 * emitters (where rpos == hmm_M) to have the j bands equal to the band on the
2753 * HMM END state. This is a hack b/c there should be a band on the E state itself,
2754 * which should map to right half of ROOT_S, but I didn't implement it that way.
2755 */
2756 r_nn_i[1] = ESL_MIN(r_nn_i[1], r_begn);
2757 r_nx_i[1] = ESL_MAX(r_nx_i[1], r_begx);
2758 r_nn_j[hmm_M] = ESL_MIN(r_nn_j[hmm_M], r_endn);
2759 r_nx_j[hmm_M] = ESL_MAX(r_nx_j[hmm_M], r_endx);
2760
2761 for(k = 0; k <= hmm_M; k++) {
2762 if(r_mn[k] == INT_MAX) r_mn[k] = -1;
2763 if(r_mx[k] == INT_MIN) r_mx[k] = -2;
2764 if(r_in[k] == INT_MAX) r_in[k] = -1;
2765 if(r_ix[k] == INT_MIN) r_ix[k] = -2;
2766 if(r_dn[k] == INT_MAX) r_dn[k] = -1;
2767 if(r_dx[k] == INT_MIN) r_dx[k] = -2;
2768
2769 if(!local_begins_ends_on) {
2770 ESL_DASSERT1((r_nn_i[k] != INT_MAX || k == 0));
2771 ESL_DASSERT1((r_nx_i[k] != INT_MIN || k == 0));
2772 ESL_DASSERT1((r_nn_j[k] != INT_MAX));
2773 ESL_DASSERT1((r_nx_j[k] != INT_MIN));
2774 }
2775 else {
2776 if(r_nn_i[k] == INT_MAX) r_nn_i[k] = -1;
2777 if(r_nx_i[k] == INT_MIN) r_nx_i[k] = -2;
2778 if(r_nn_j[k] == INT_MAX) r_nn_j[k] = -1;
2779 if(r_nx_j[k] == INT_MIN) r_nx_j[k] = -2;
2780 }
2781 }
2782
2783
2784 *ret_r_mn = r_mn;
2785 *ret_r_mx = r_mx;
2786 *ret_r_in = r_in;
2787 *ret_r_ix = r_ix;
2788 *ret_r_dn = r_dn;
2789 *ret_r_dx = r_dx;
2790 *ret_r_nn_i = r_nn_i;
2791 *ret_r_nx_i = r_nx_i;
2792 *ret_r_nn_j = r_nn_j;
2793 *ret_r_nx_j = r_nx_j;
2794 free(was_unr);
2795 free(filled_gap);
2796 free(r_nn_hmm);
2797 free(r_nx_hmm);
2798
2799 return eslOK;
2800
2801 ERROR:
2802 ESL_FAIL(status, errbuf, "HMMBandsEnforceValidParse(): memory allocation error.");
2803 return eslOK; /* neverreached */
2804 }
2805
2806 /* Function: HMMBandsFixUnreachable()
2807 * Incept: EPN, Fri Feb 1 17:12:55 2008
2808 *
2809 * Purpose: Expand the HMM bands such that a parse becomes
2810 * possible up through node <k>. We know that a parse
2811 * is possible up through node <k-1>, the reachable
2812 * range of residues for all possible parses up to
2813 * node <k-1> is from <r_prv_min> to <r_prv_max>.
2814 *
2815 * Note: The technique used for expanding the bands was
2816 * selected for it's *relative* simplicity. It does not
2817 * expand the bands in any smart way that is aware of
2818 * probability mass or score of the newly possible parses
2819 * during the band expansion. You could try to do that,
2820 * but I don't think it's worth it. This function is only
2821 * entered if the default bands (prior to expansion)
2822 * do not allow a single parse, in which case the bands are
2823 * too tight, and the smart solution is to lower tau,
2824 * the tail loss parameter. In other words this function is
2825 * only very rarely used for reasonable values of tau
2826 * ('reasonable' determined from empirical expts, and
2827 * enforced by getopts). This function is necessary
2828 * for the HMM banding technique to be robust though,
2829 * otherwise it's possible that the HMM bands make all
2830 * parses impossible, which is bad, because that means
2831 * all CM parses are impossible too.
2832 *
2833 * There are two possible scenarios for why node k
2834 * is unreachable, each with a different solution
2835 * this function determines which scenario node k is
2836 * in and then fixes it. The scenarios are described
2837 * in comments in the code below.a
2838 *
2839 * Args: cp9b - the CP9 bands object
2840 * errbuf - for error messages
2841 * k - the node we want to make reachable
2842 * r_prv_min - minimal possible residue index accounted for in any parse up to and including node k-1
2843 * r_prv_max - maximal possible residue index accounted for in any parse up to and including node k-1
2844 * r_insert_prv_min - minimal possible residue index accounted for in any parse up to and state I_k-1
2845 *
2846 * Returns: eslOK on success
2847 * eslEMEM if a memory allocation error occurs
2848 */
2849 int
HMMBandsFixUnreachable(CP9Bands_t * cp9b,char * errbuf,int k,int r_prv_min,int r_prv_max,int r_insert_prv_min)2850 HMMBandsFixUnreachable(CP9Bands_t *cp9b, char *errbuf, int k, int r_prv_min, int r_prv_max, int r_insert_prv_min)
2851 {
2852
2853 int kp; /* k prime, a node counter */
2854 int nxt_m; /* minimal possible residue index we must account for before entering M_k */
2855 int nxt_d; /* minimal possible residue index we must account for before entering D_k */
2856 int nxt_n; /* minimal possible residue index we must account for before entering either M_k or D_k */
2857
2858 ESL_DASSERT1((k != 0));
2859 ESL_DASSERT1((r_prv_min != INT_MAX));
2860 ESL_DASSERT1((r_prv_max != INT_MIN));
2861 ESL_DASSERT1((r_prv_min <= r_prv_max));
2862
2863 /* scenario 1: there's a 'hole' of at least 1 residue between the residue posns that can be reached
2864 * for node k-1 (these are r_prv_min..r_prv_max) and by node k's match or delete state.
2865 * our solution is to allow the I_k-1 (node k-1 insert state) to emit the residues in
2866 * the 'hole', then we know we can reach either node k's match or delete.
2867 */
2868 /* check if we're in scenario 1 */
2869
2870 /* initialize, if neither nxt_m nor nxt_d doesn't change, we know we're not in scenario 1 */
2871 nxt_m = -1;
2872 nxt_d = -1;
2873 if(cp9b->pn_min_m[k] != -1 && cp9b->pn_max_m[k] != -1) {
2874 ESL_DASSERT1((cp9b->pn_max_m[k] >= cp9b->pn_min_m[k]));
2875 if(cp9b->pn_max_m[k]-1 > r_prv_min) { /* if we go from I_k-1 to M_k, we have to emit 1 residue from M_k, that's
2876 * why we have cp9b->pn_max_m[k]-1 (i.e. the -1 is for the StateDelta) */
2877 nxt_m = ESL_MAX(cp9b->pn_min_m[k]-1, r_prv_min); /* we could get from node k-1 to node k's match state by using I_k-1 to fill the 'hole' */
2878 }
2879 }
2880 if(cp9b->pn_min_d[k] != -1 && cp9b->pn_max_d[k] != -1) {
2881 ESL_DASSERT1((cp9b->pn_max_d[k] >= cp9b->pn_min_d[k]));
2882 if(cp9b->pn_max_d[k] > r_prv_min) { /* if we go from I_k-1 to D_k, we don't emit from D_k so there's no -1 as above with M_k */
2883 nxt_d = ESL_MAX(cp9b->pn_min_d[k], r_prv_min);/* we could get from node k-1 to node k's delete state by using I_k-1 to fill the 'hole' */
2884 }
2885 }
2886 if(nxt_m != -1 || nxt_d != -1) {
2887 /* we're in scenario 1, there's a 'hole' of missing residues we have to account for before entering node k,
2888 * determine the easier route, to M_k or D_k? (pick route with less required I_k-1 emissions) */
2889 if (nxt_m == -1) nxt_n = nxt_d;
2890 else if (nxt_d == -1) nxt_n = nxt_m;
2891 else nxt_n = ESL_MIN(nxt_m, nxt_d);
2892
2893 /* now doctor I_k-1's bands so that:
2894 * (a) I_k-1 is reachable from at least one of M_k-1, D_k-1
2895 * (b) I_k-1 can transit to M_k or D_k
2896 */
2897 if(cp9b->pn_min_i[k-1] != -1) cp9b->pn_min_i[k-1] = ESL_MIN(cp9b->pn_min_i[k-1], r_prv_min+1);
2898 else cp9b->pn_min_i[k-1] = r_prv_min+1;
2899 if(cp9b->pn_max_i[k-1] != -1) cp9b->pn_max_i[k-1] = ESL_MAX(cp9b->pn_max_i[k-1], nxt_n);
2900 else cp9b->pn_max_i[k-1] = nxt_n;
2901 ESL_DASSERT1((cp9b->pn_max_i[k-1] >= cp9b->pn_min_i[k-1]));
2902 ESL_DPRINTF1(("#DEBUG: scenario 1 reset k from %d to %d\n", k+2, k));
2903 }
2904 else {
2905 /* scenario 2: the opposite of scenario 1. All possible parses that reach node k-1 have already emitted too many
2906 * residues to reach node k. In other words, the maximal residue in the HMM band on node k's match
2907 * and delete states has been already been emitted by all possible parses that end at node k-1.
2908 * We have to use the delete states of nodes k...kp, where kp is the leftmost node that we can reach
2909 * M_kp and emit residue i==r_prv_min+1 or visit D_kp with i == r_prv_min.
2910 */
2911 kp = k;
2912 while(kp <= cp9b->hmm_M && ((cp9b->pn_max_m[kp] < (r_prv_min+1)) && (cp9b->pn_max_d[kp] < (r_prv_min)))) { /* note cp9b->pn_max_{m,d}[kp] may be == -1, that's okay */
2913 cp9b->pn_min_d[kp] = cp9b->pn_max_d[kp] = r_prv_min; /* enforce this delete state is used */
2914 kp++;
2915 }
2916 ESL_DPRINTF1(("#DEBUG: scenario 2 reset k from %d to %d (kp: %d r_prv_min: %d (+1=%d for match))\n", k, k-2, kp, r_prv_min, r_prv_min+1));
2917 }
2918 return eslOK;
2919 }
2920
2921 /* Function: HMMBandsFillGap()
2922 * Incept: EPN, Fri Feb 1 17:12:55 2008
2923 *
2924 * Purpose: In HMMBandsEnforceValidParse() it's possible (but rare) that two
2925 * different transitions to the same state imply reachable bands that have
2926 * a 'gap' in the middle. For example if node D_3 can reach node M_4 with
2927 * i = 3 or 4, and node M_3 can reach node M_4 with i equal to 6 or 7.
2928 * This means that node M_4 cannot be reached for
2929 * i == 5, but the HMMBandsEnforceValidParse() implementation is
2930 * much easier if we can just set the reachable band for M_4 to
2931 * 3..7. So, that's what we do, and we doctor the band of I_3 so
2932 * that M_4 *can* be reached for i == 5. This band doctoring is
2933 * done in this function.
2934 *
2935 * Args: cp9b - the CP9 bands object
2936 * errbuf - for error messages
2937 * k - the node we want to make reachable
2938 * min1 - min in the reachable band for first of the two relevant transitions
2939 * max1 - max in the reachable band for first of the two relevant transitions
2940 * min2 - min in the reachable band for second of the two relevant transitions
2941 * max2 - max in the reachable band for second of the two relevant transitions
2942 * prv_nd_r_mn - min residue posn in reachable band of M_k-1, -1 if M_k-1 is unreachable
2943 * prv_nd_r_dn - min residue posn in reachable band of D_k-1, -1 if D_k-1 is unreachable
2944 *
2945 * Returns: eslOK on success
2946 */
2947 int
HMMBandsFillGap(CP9Bands_t * cp9b,char * errbuf,int k,int min1,int max1,int min2,int max2,int prv_nd_r_mn,int prv_nd_r_dn)2948 HMMBandsFillGap(CP9Bands_t *cp9b, char *errbuf, int k, int min1, int max1, int min2, int max2, int prv_nd_r_mn, int prv_nd_r_dn)
2949 {
2950 int left_max; /* min1/max1 if min1 <= min2, else min2/max2 */
2951 int right_min; /* min2/max2 if min1 <= min2, else min1/max1 */
2952 int in, ix; /* min/max residue for I_k, calc'ed here */
2953
2954 ESL_DASSERT1((k != 0));
2955 ESL_DASSERT1((max1 >= min1));
2956 ESL_DASSERT1((max2 >= min2));
2957
2958 if (min1 <= min2) { left_max = max1; right_min = min2; }
2959 else { left_max = max2; right_min = min1; }
2960 ESL_DASSERT1((right_min - left_max > 1));
2961
2962 /* determine in and ix */
2963 in = INT_MAX;
2964 if(prv_nd_r_mn != INT_MAX) in = ESL_MIN(in, prv_nd_r_mn+1);
2965 if(prv_nd_r_dn != INT_MAX) in = ESL_MIN(in, prv_nd_r_dn);
2966 ESL_DASSERT1((in != INT_MAX));
2967 assert(in != INT_MAX);
2968 ix = right_min-1;
2969
2970 /* doctor I_k's bands so that it:
2971 * (a) I_k is reachable from at M_k or D_k (whichever has leftmost reachable band)
2972 * (b) I_k can transit to M_k+1 or D_k+1 (whichever has rightmost reachable band)
2973 */
2974 if(cp9b->pn_min_i[k] != -1) cp9b->pn_min_i[k] = ESL_MIN(cp9b->pn_min_i[k], in);
2975 else cp9b->pn_min_i[k] = in;
2976 if(cp9b->pn_max_i[k] != -1) cp9b->pn_max_i[k] = ESL_MAX(cp9b->pn_max_i[k], ix);
2977 else cp9b->pn_max_i[k] = ix;
2978 assert(cp9b->pn_min_i[k] <= cp9b->pn_max_i[k]);
2979 ESL_DASSERT1((cp9b->pn_min_i[k] <= cp9b->pn_max_i[k]));
2980
2981 return eslOK;
2982 }
2983
2984 #if eslDEBUGLEVEL >= 1
2985 /* Function: CMBandsCheckValidParse()
2986 * Incept: EPN, Tue Feb 5 07:59:48 2008
2987 *
2988 * Purpose: Given bands on CM states for a target sequence,
2989 * check for a valid CM parse within those bands.
2990 * Return eslFAIL if there is no valid parse.
2991 *
2992 * Args: cm - the model
2993 * cp9b - the CP9 bands object
2994 * errbuf - for error messages
2995 * i0 - first residue we're concerned with in target sequence
2996 * j0 - final residue we're concerned with in target sequence
2997 * doing_search - TRUE if we're searching, and a local hit is okay,
2998 * if FALSE, the full sequence i0..j0 must be in the subtree of ROOT_S
2999 *
3000 * Returns: eslOK on success
3001 * eslEINCOMPAT if contract is violated
3002 * eslFAIL if no valid parse exists within the i and j bands
3003 * eslEMEM if a memory allocation error occurs
3004 */
3005 int
CMBandsCheckValidParse(CM_t * cm,CP9Bands_t * cp9b,char * errbuf,int i0,int j0,int doing_search)3006 CMBandsCheckValidParse(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, int i0, int j0, int doing_search)
3007 {
3008 int status; /* easel status code */
3009 int v, w, y; /* state indices */
3010 int nd; /* nd counter */
3011 int sd, sdl, sdr; /* state deltas, number of residues emitted by current state, total, to the left, and to the right */
3012 int *imin, *imax; /* [0..v..M-1] i band for state v, min/max i position allowed for state v */
3013 int *jmin, *jmax; /* [0..v..M-1] j band for state v, min/max j position allowed for state v */
3014 int child_imin, child_imax; /* imin, imax for child of current state, after accouting for emissions (state deltas) */
3015 int child_jmin, child_jmax; /* jmin, jmax for child of current state, after accouting for emissions (state deltas) */
3016 int *v_is_r; /* [0..v..M-1] TRUE if state v is reachable for at least one i,j pair */
3017 int *nd_is_r; /* [0..nd..cm->nodes-1] TRUE if any state (incl. insert) in node nd is reachable for at least one i,j pair */
3018 int *r_imin, *r_imax; /* [0..v..M-1] reachable i bands, for which i positions can we reach state v */
3019 int *r_jmin, *r_jmax; /* [0..v..M-1] reachable j bands, for which j positions can we reach state v */
3020 int *nd_r_imin, *nd_r_imax; /* [0..nd..M-1] reachable i bands, for which i positions can we reach at least 1 state (incl. insert) in nd */
3021 int *nd_r_jmin, *nd_r_jmax; /* [0..nd..M-1] reachable j bands, for which j positions can we reach at least 1 state (incl. insert) in nd */
3022 int y_nd, w_nd; /* node index */
3023 int cm_is_localized; /* TRUE if local begins and ends are on, if we can reach a state v with a non-impossible endsc[v], we can finish the parse for any i,j reachable for v */
3024
3025 /*printf("TEMP in CMBandsCheckValidParse() i0: %d j0: %d\n", i0, j0);*/
3026
3027 if((cm->flags & CMH_LOCAL_BEGIN) && (! (cm->flags & CMH_LOCAL_END))) ESL_FAIL(eslEINCOMPAT, errbuf, "CMBandsCheckValidParse(), cm flag CMH_LOCAL_BEGIN is up and cm flag CMH_LOCAL_END is down. This is unexpected, we can't deal.");
3028 if((! (cm->flags & CMH_LOCAL_BEGIN)) && ((cm->flags & CMH_LOCAL_END))) ESL_FAIL(eslEINCOMPAT, errbuf, "CMBandsCheckValidParse(), cm flag CMH_LOCAL_BEGIN is down and cm flag CMH_LOCAL_END is up. This is unexpected, we can't deal.");
3029
3030 cm_is_localized = ((cm->flags & CMH_LOCAL_BEGIN) && (cm->flags & CMH_LOCAL_END)) ? TRUE : FALSE;
3031
3032 /* pointers to cp9b arrays, for convenience */
3033 imin = cp9b->imin;
3034 imax = cp9b->imax;
3035 jmin = cp9b->jmin;
3036 jmax = cp9b->jmax;
3037
3038 /* allocate and initialize */
3039 ESL_ALLOC(v_is_r, sizeof(int) * cm->M);
3040 ESL_ALLOC(r_imin, sizeof(int) * cm->M);
3041 ESL_ALLOC(r_imax, sizeof(int) * cm->M);
3042 ESL_ALLOC(r_jmin, sizeof(int) * cm->M);
3043 ESL_ALLOC(r_jmax, sizeof(int) * cm->M);
3044 ESL_ALLOC(nd_is_r, sizeof(int) * cm->nodes);
3045 ESL_ALLOC(nd_r_imin, sizeof(int) * cm->nodes);
3046 ESL_ALLOC(nd_r_imax, sizeof(int) * cm->nodes);
3047 ESL_ALLOC(nd_r_jmin, sizeof(int) * cm->nodes);
3048 ESL_ALLOC(nd_r_jmax, sizeof(int) * cm->nodes);
3049
3050 esl_vec_ISet(v_is_r, cm->M, FALSE);
3051 esl_vec_ISet(nd_is_r, cm->nodes, FALSE);
3052
3053 for (v = 0; v < cm->M; v++) {
3054 r_imin[v] = INT_MAX;
3055 r_imax[v] = INT_MIN;
3056 r_jmin[v] = INT_MAX;
3057 r_jmax[v] = INT_MIN;
3058 }
3059 for (nd = 0; nd < cm->nodes; nd++) {
3060 nd_r_imin[nd] = INT_MAX;
3061 nd_r_imax[nd] = INT_MIN;
3062 nd_r_jmin[nd] = INT_MAX;
3063 nd_r_jmax[nd] = INT_MIN;
3064 }
3065
3066 nd_is_r[0] = TRUE;
3067 v_is_r[0] = TRUE;
3068 r_imin[0] = nd_r_imin[0] = imin[0];
3069 r_imax[0] = nd_r_imax[0] = imax[0];
3070 r_jmin[0] = nd_r_jmin[0] = jmin[0];
3071 r_jmax[0] = nd_r_jmax[0] = jmax[0];
3072
3073 if(! doing_search) { /* we're aligning the full sequence from i0..j0, that means imin[0] must == i0 and jmax[0] must == j0, if not we can't align the full seq */
3074 if(imin[0] != i0) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), doing_search is FALSE, but imin[0] == %d, it should be i0 (%d)\n", imin[0], i0);
3075 if(jmax[0] != j0) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), doing_search is FALSE, but jmax[0] == %d, it should be j0 (%d)\n", jmax[0], j0);
3076 }
3077
3078 /* deal with local begins, if they're active, we can jump into any local begin state with:
3079 * i within imin[0]..imax[0] and j within jmin[0]..jmax[0], as long as i,j are within
3080 * imin[v]..imax[v] and jmin[v]..jmax[v].
3081 */
3082 if(cm->flags & CMH_LOCAL_BEGIN) {
3083 for(v = 0; v < cm->M; v++) {
3084 if(NOT_IMPOSSIBLE(cm->beginsc[v])) {
3085 if(imin[v] != -1 && jmin[v] != -1) {
3086 if(((ESL_MIN(imax[v], imax[0]) - ESL_MAX(imin[v], imin[0])) >= 0) && /* TRUE if imin[v]..imax[v] overlaps with imin[0]..imax[0] by at least 1 residue */
3087 ((ESL_MIN(jmax[v], jmax[0]) - ESL_MAX(jmin[v], jmin[0])) >= 0)) { /* TRUE if jmin[v]..jmax[v] overlaps with jmin[0]..jmax[0] by at least 1 residue */
3088 r_imin[v] = ESL_MAX(imin[v], imin[0]);
3089 r_imax[v] = ESL_MIN(imax[v], imax[0]);
3090 r_jmin[v] = ESL_MAX(jmin[v], jmin[0]);
3091 r_jmax[v] = ESL_MIN(jmax[v], jmax[0]);
3092 v_is_r[v] = TRUE;
3093 nd_is_r[cm->ndidx[v]] = TRUE;
3094 }
3095 }
3096 ESL_DASSERT1(((cm->stid[v] == MATP_MP) || (cm->stid[v] == MATR_MR) || (cm->stid[v] == MATL_ML) || (cm->stid[v] == BIF_B)));
3097 assert((cm->stid[v] == MATP_MP) || (cm->stid[v] == MATR_MR) || (cm->stid[v] == MATL_ML) || (cm->stid[v] == BIF_B));
3098 }
3099 }
3100 }
3101
3102 /* The main loop: step through the CM, node by node, state by state,
3103 * for reachable-states v, determine which i,j residues are reachable for each child state of v
3104 */
3105 for (nd = 0; nd < cm->nodes; nd++) {
3106 for (v = cm->nodemap[nd]; v < (cm->nodemap[nd] + TotalStatesInNode(cm->ndtype[nd])); v++) {
3107 if(! StateIsDetached(cm, v)) {
3108 if(cm->sttype[v] == E_st) {
3109 if((r_imin[v] <= r_imax[v] && r_jmin[v] <= r_jmax[v]) && ((r_imax[v] - r_jmin[v] - 1) >= 0)) {
3110 /* END state v is reachable for some i, j such that j-i+1 = d = 0 (which is required for E states) */
3111 v_is_r[v] = TRUE;
3112 nd_is_r[nd] = TRUE;
3113 }
3114 }
3115 else if (cm->sttype[v] == B_st) {
3116 /* same loop as if v != B_st, (the else case below) but we know sdl = sdr = 0, and we have two children BEGL_S and BEGR_S */
3117 if((r_imin[v] <= r_imax[v] && r_jmin[v] <= r_jmax[v]) && ((r_jmax[v] - r_imin[v] + 1) >= sd)) {
3118 /* v is reachable for some i, j */
3119 v_is_r[v] = TRUE;
3120 nd_is_r[nd] = TRUE;
3121 w = cm->cfirst[v]; /* BEGL_S */
3122 y = cm->cnum[v]; /* BEGR_S */
3123
3124 /* only way to get to a BEGL_S is through it's BIF parent, even with local begins (no local begin in BEGL_S) */
3125 r_imin[w] = ESL_MAX(imin[w], imin[v]);
3126 r_imax[w] = ESL_MIN(imax[w], imax[v]);
3127 r_jmin[w] = jmin[w];
3128 r_jmax[w] = jmax[w];
3129 w_nd = cm->ndidx[w];
3130 nd_r_imin[w_nd] = r_imin[w];
3131 nd_r_imax[w_nd] = r_imax[w];
3132 nd_r_jmin[w_nd] = r_jmin[w];
3133 nd_r_jmax[w_nd] = r_jmax[w];
3134
3135 /* only way to get to a BEGR_S is through it's BIF parent, even with local begins (no local begin in BEGR_S) */
3136 r_imin[y] = imin[y];
3137 r_imax[y] = imax[y];
3138 r_jmin[y] = ESL_MAX(jmin[y], jmin[v]);
3139 r_jmax[y] = ESL_MIN(jmax[y], jmax[v]);
3140 y_nd = cm->ndidx[y];
3141 nd_r_imin[y_nd] = r_imin[y];
3142 nd_r_imax[y_nd] = r_imax[y];
3143 nd_r_jmin[y_nd] = r_jmin[y];
3144 nd_r_jmax[y_nd] = r_jmax[y];
3145 }
3146 }
3147 else { /* state is not a B_st nor an E_st */
3148 sdl = StateLeftDelta(cm->sttype[v]);
3149 sdr = StateRightDelta(cm->sttype[v]);
3150 sd = sdl + sdr;
3151
3152 if((r_imin[v] <= r_imax[v] && r_jmin[v] <= r_jmax[v]) && ((r_jmax[v] - r_imin[v] + 1) >= sd)) {
3153 /* v is reachable for some i, j */
3154 ///if(NOT_IMPOSSIBLE(cm->endsc[v])) {
3155
3156 v_is_r[v] = TRUE;
3157 nd_is_r[nd] = TRUE;
3158 child_imin = r_imin[v] + sdl;
3159 child_imax = r_imax[v] + sdl;
3160 child_jmin = r_jmin[v] - sdr;
3161 child_jmax = r_jmax[v] - sdr;
3162 if(cm->sttype[v] == IL_st) child_imax = ESL_MAX(child_imax, (imax[v]+1));
3163 if(cm->sttype[v] == IR_st) child_jmin = ESL_MIN(child_jmin, (jmin[v]-1));
3164 ///printf("\nv: %4d %4s %2s (%4d %4d %4d %4d)\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), r_imin[v], r_imax[v], r_jmin[v], r_jmax[v]);
3165 for(y = cm->cfirst[v]; y < (cm->cfirst[v] + cm->cnum[v]); y++) {
3166 if(imin[y] != -1) {
3167 r_imin[y] = ESL_MIN(r_imin[y], ESL_MAX(imin[y], child_imin));
3168 r_imax[y] = ESL_MAX(r_imax[y], ESL_MIN(imax[y], child_imax));
3169 r_jmin[y] = ESL_MIN(r_jmin[y], ESL_MAX(jmin[y], child_jmin));
3170 r_jmax[y] = ESL_MAX(r_jmax[y], ESL_MIN(jmax[y], child_jmax));
3171
3172 if((r_imin[y] <= r_imax[y] && r_jmin[y] <= r_jmax[y]) && ((r_jmax[y] - r_imin[y] + 1) >= StateDelta(cm->sttype[y]))) {
3173 ///printf("y: %4d %4s %2s (%4d %4d %4d %4d)\n", y, Nodetype(cm->ndtype[cm->ndidx[y]]), Statetype(cm->sttype[y]), r_imin[y], r_imax[y], r_jmin[y], r_jmax[y]);
3174 y_nd = cm->ndidx[y];
3175 nd_r_imin[y_nd] = ESL_MIN(nd_r_imin[y_nd], r_imin[y]);
3176 nd_r_imax[y_nd] = ESL_MAX(nd_r_imax[y_nd], r_imax[y]);
3177 nd_r_jmin[y_nd] = ESL_MIN(nd_r_jmin[y_nd], r_jmin[y]);
3178 nd_r_jmax[y_nd] = ESL_MAX(nd_r_jmax[y_nd], r_jmax[y]);
3179 }
3180 else {
3181 r_imin[y] = INT_MAX;
3182 r_imax[y] = INT_MIN;
3183 r_jmin[y] = INT_MAX;
3184 r_jmax[y] = INT_MIN;
3185 }
3186 }
3187 }
3188 }
3189 } /* end of else that's entered if v != E_st nor B_st */
3190 } /* end of if(!StateIsDetached) */
3191 /*////if(v_is_r[v]) printf("ck v %4s %2s %4d R %d (%11d %11d %11d %11d) (HMM nd: %4d %4d)\n", Nodetype(cm->ndtype[nd]), Statetype(cm->sttype[v]), v, v_is_r[v], r_imin[v], r_imax[v], r_jmin[v], r_jmax[v], cm->cp9map->cs2hn[v][0], cm->cp9map->cs2hn[v][1]);*/
3192 } /* end of for (v) loop */
3193
3194 /*////printf("ck nd %4s %4d R %d (%11d %11d %11d %11d)\n\n", Nodetype(cm->ndtype[nd]), nd, nd_is_r[nd], nd_r_imin[nd], nd_r_imax[nd], nd_r_jmin[nd], nd_r_jmax[nd]); */
3195 }
3196 /* now we know what states are reachable for what i and j, check if a valid parse exists */
3197 if(! cm_is_localized) { /* local begins/ends are off, all nodes must be reachable to get a valid parse */
3198 for(nd = 0; nd < cm->nodes; nd++) {
3199 if(nd_is_r[nd] == FALSE) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), CM is not locally configured and node %d (%4s) is unreachable\n", nd, Nodetype(cm->ndtype[nd]));
3200 if(cm->ndtype[nd] == BIF_nd) {
3201 v = cm->nodemap[nd];
3202 w = cm->cfirst[v]; /* BEGL_S */
3203 y = cm->cnum[v]; /* BEGR_S */
3204 if(r_jmax[w] < (r_imin[y]-1)) {
3205 ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), CM not local, BEGL_S w:%d nd:%d & BEGR_S y:%d nd:%d bands don't touch, res %d..%d unemittable!\n", w, w_nd, y, y_nd, r_jmax[w]+1, r_imin[y]-1);
3206 }
3207 }
3208 }
3209 }
3210 else if(doing_search && cm_is_localized) { /* we're doing a local search, we have a valid parse if any state from which a local end is possible is reachable */
3211 v = 0;
3212 while(v < cm->M && !(v_is_r[v] && NOT_IMPOSSIBLE(cm->endsc[v]))) v++; /* increment v until we come to a state that is reachable and can go to EL, or we run out of states */
3213 if(v == cm->M && i0 != j0) ESL_FAIL(eslFAIL, errbuf, "CMBandsCheckValidParse(), doing_search=TRUE, CM is local, i0 != j0, but no CM state is reachable from which an EL is possible.\n");
3214 }
3215
3216 free(v_is_r);
3217 free(r_imin);
3218 free(r_imax);
3219 free(r_jmin);
3220 free(r_jmax);
3221 free(nd_is_r);
3222 free(nd_r_imin);
3223 free(nd_r_imax);
3224 free(nd_r_jmin);
3225 free(nd_r_jmax);
3226 return eslOK;
3227
3228 ERROR:
3229 ESL_FAIL(status, errbuf, "CMBandsCheckValidParse(), memory allocation error.");
3230 return status; /* NEVER REACHED */
3231 }
3232 #endif
3233
3234 /**************************************************************************
3235 * cp9_HMM2ijBands_OLD() and helper functions.
3236 * This was how bands were calculated up until revision 2318 (02.07.2008)
3237 *
3238 */
3239 /* helper functions for cp9_HMM2ijBands_OLD() */
3240 static void hmm2ij_prestate_step0_initialize(int n, int *nss_max_imin, int *nss_min_jmax, int i0, int j0);
3241 static void hmm2ij_prestate_step1_set_node_inserts(int n, int *nis_imin, int *nis_imax,
3242 int *nis_jmin, int *nis_jmax,
3243 int *nss_imin, int *nss_imax,
3244 int *nss_jmin, int *nss_jmax,
3245 int *pn_min_i, int *pn_max_i,
3246 CP9Map_t *cp9map);
3247 static void hmm2ij_prestate_step2_determine_safe(int n,
3248 int nss_max_imin_np1, int nss_min_jmax_np1,
3249 int nis_imin_n,
3250 int nis_jmax_n,
3251 int *safe_imax, int *safe_jmin);
3252 static void hmm2ij_prestate_step3_preset_node_splits(int n, int *nis_imin, int *nis_imax,
3253 int *nis_jmin, int *nis_jmax,
3254 int *nss_imin, int *nss_imax,
3255 int *nss_jmin, int *nss_jmax,
3256 int *pn_min_m, int *pn_max_m,
3257 int *pn_min_d, int *pn_max_d,
3258 CP9Map_t *cp9map);
3259 static void hmm2ij_split_state_step1_set_state_bands(int v, int n,
3260 int tmp_imin, int tmp_imax,
3261 int tmp_jmin, int tmp_jmax,
3262 int *imin, int *imax, int *jmin, int *jmax,
3263 int *nss_imin, int *nss_imax,
3264 int *nss_jmin, int *nss_jmax);
3265 static void hmm2ij_insert_state_step1_set_state_bands(int v,
3266 int tmp_imin, int tmp_imax,
3267 int tmp_jmin, int tmp_jmax,
3268 int *imin, int *imax, int *jmin, int *jmax);
3269 static void hmm2ij_state_step2_enforce_safe_trans(CM_t *cm, int v, int n, int *imax, int *jmin,
3270 int *nss_imax, int *nss_jmin,
3271 int safe_imax, int safe_jmin);
3272 static void hmm2ij_state_step3_enforce_state_delta(CM_t *cm, int v, int *jmin, int *jmax);
3273 static void hmm2ij_state_step4_update_safe_holders(int v, int n, int imin_v, int jmax_v, int *nss_max_imin,
3274 int *nss_min_jmax);
3275 static void hmm2ij_state_step5_non_emitter_d0_hack(int v, int imax_v, int *jmin);
3276
3277 /*****************************************************************************
3278 * Functions to go from HMM bands to i and j bands on a CM
3279 * cp9_HMM2ijBands_OLD()
3280 */
3281 /*
3282 * Function: cp9_HMM2ijBands_OLD()
3283 * EPN 12.21.05
3284 *
3285 * Purpose: Determine the band for each cm state v on i (the band on the
3286 * starting index in the subsequence emitted from the subtree rooted
3287 * at state v), and on j (the band on the ending index in the
3288 * subsequence emitted from the subtree rooted at state v).
3289 *
3290 * Some i and d bands are calculated from HMM bands on match and insert
3291 * and delete states from each node of the HMM that maps to a left emitting
3292 * node of the CM (including MATP nodes). The HMM bands were
3293 * calculated previously from the posterior matrices for mmx,
3294 * imx and dmx from a CP9 HMM.
3295 *
3296 * Some j bands are calculated from HMM bands on match and insert and
3297 * delete states from each node of the HMM that maps to a right emitting
3298 * node of the CM (including MATP nodes).
3299 *
3300 * i and j bands that cannot be directly determined from the
3301 * HMM bands are inferred based on the constraints imposed
3302 * on them by the i and j bands that CAN be determined from
3303 * the HMM bands.
3304 *
3305 * Our strategy is to set i and j bands for each state v
3306 * such that at least one state y (y \in C_v (y is reachable
3307 * from v)) can be reached from v while staying within the i
3308 * and j bands for v and y. This constraint is enforced by
3309 * determining the min and max i and j bands across all
3310 * states y (into safe* data structures) for a given v, and
3311 * then enforcing that at least one cell in the i and j
3312 * bands of v can transit to at least one cell in a band for
3313 * a y state after accounting for the direction specific
3314 * StateDelta() values for v.
3315 *
3316 * This function needs to be called only once, it determines
3317 * bands for ALL states. Its unclear the best way to handle
3318 * any states that don't have an explicit mapping to an HMM
3319 * state that we have a band on (i.e. all delete states, and
3320 * ROOT_IR, ROOT_IL, BEGR_IL, BIF_B, and start states).
3321 * (11.02.05) I take a simple approach, and set the bands on i
3322 * for such states to the same as those for states in a close
3323 * proximity. (see code for exact definitions)
3324 *
3325 * This function uses HMM derived bands on delete states.
3326 *
3327 * arguments:
3328 *
3329 * CM_t *cm the CM, must have valid cp9b (CP9 bands object)
3330 * errbuf char buffer for error messages
3331 * CP9Bands_t *cp9b the CP9 bands object, usually cm->cp9b
3332 * CP9Map_t *cp9map map from CM to CP9 HMM and vice versa
3333 * int i0 start of target subsequence (often 1, beginning of dsq)
3334 * int j0 end of target subsequence (often L, end of dsq)
3335 * int doing_search TRUE if the bands will be used for a scanning CYK/Inside
3336 * int debug_level [0..3] tells the function what level of debugging print
3337 * statements to print.
3338 *
3339 * Returns: eslOK on success;
3340 */
3341 int
cp9_HMM2ijBands_OLD(CM_t * cm,char * errbuf,CP9Bands_t * cp9b,CP9Map_t * cp9map,int i0,int j0,int doing_search,int debug_level)3342 cp9_HMM2ijBands_OLD(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int debug_level)
3343 {
3344 int v; /* counter over states of the CM */
3345
3346 int status;
3347 int safe_imax;
3348 int safe_jmin;
3349
3350 int tmp_imin;
3351 int tmp_imax;
3352 int tmp_jmin;
3353 int tmp_jmax;
3354
3355 /* ptrs to cp9b data, for convenience */
3356 int *pn_min_m; /* pn_min_m[k] = first position in HMM band for match state of HMM node k */
3357 int *pn_max_m; /* pn_max_m[k] = last position in HMM band for match state of HMM node k */
3358 int *pn_min_i; /* pn_min_i[k] = first position in HMM band for insert state of HMM node k */
3359 int *pn_max_i; /* pn_max_i[k] = last position in HMM band for insert state of HMM node k */
3360 int *pn_min_d; /* pn_min_d[k] = first position in HMM band for delete state of HMM node k */
3361 int *pn_max_d; /* pn_max_d[k] = last position in HMM band for delete state of HMM node k */
3362 int *imin; /* imin[v] = first position in band on i for state v to be filled in this function. [1..M] */
3363 int *imax; /* imax[v] = last position in band on i for state v to be filled in this function. [1..M] */
3364 int *jmin; /* jmin[v] = first position in band on j for state v to be filled in this function. [1..M] */
3365 int *jmax; /* jmax[v] = last position in band on j for state v to be filled in this function. [1..M] */
3366
3367 int *nss_imin; /* nss_imin[n] = imin of each split set state in node n*/
3368 int *nss_imax; /* nss_imax[n] = imax of each split set state in node n*/
3369 int *nss_jmin; /* nss_jmin[n] = jmin of each split set state in node n*/
3370 int *nss_jmax; /* nss_jmax[n] = jmax of each split set state in node n*/
3371
3372 int *nis_imin; /* nss_imin[n] = imin of each insert set state in node n*/
3373 int *nis_imax; /* nss_imax[n] = imax of each insert set state in node n*/
3374 int *nis_jmin; /* nss_jmin[n] = jmin of each insert set state in node n*/
3375 int *nis_jmax; /* nss_jmax[n] = jmax of each insert set state in node n*/
3376
3377 int *nss_max_imin; /* nss_max_imin[n] = max imin over split set states in node n*/
3378 int *nss_min_jmax; /* nss_min_jmax[n] = min jmax over split set states in node n*/
3379
3380 int n; /* counter over CM nodes. */
3381 int y, yoffset; /* counters over children states */
3382
3383 /* Contract checks */
3384 if (cp9b == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), cp9b is NULL.\n");
3385 if(i0 < 1) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), i0 < 1: %d\n", i0);
3386 if(j0 < 1) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), j0 < 1: %d\n", j0);
3387 if(j0 < i0) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_HMM2ijBands_OLD(), i0 (%d) < j0 (%d)\n", i0, j0);
3388
3389 /* set pointers to cp9b data
3390 * note: these arrays used to be allocated here, but that was wasteful, now it's allocated
3391 * once per model (instead of once per sequence) in AllocCP9Bands()
3392 */
3393
3394 pn_min_m = cp9b->pn_min_m;
3395 pn_max_m = cp9b->pn_max_m;
3396 pn_min_i = cp9b->pn_min_i;
3397 pn_max_i = cp9b->pn_max_i;
3398 pn_min_d = cp9b->pn_min_d;
3399 pn_max_d = cp9b->pn_max_d;
3400 imin = cp9b->imin;
3401 imax = cp9b->imax;
3402 jmin = cp9b->jmin;
3403 jmax = cp9b->jmax;
3404
3405 ESL_ALLOC(nss_imin, sizeof(int) * cm->nodes);
3406 ESL_ALLOC(nss_imax, sizeof(int) * cm->nodes);
3407 ESL_ALLOC(nss_jmin, sizeof(int) * cm->nodes);
3408 ESL_ALLOC(nss_jmax, sizeof(int) * cm->nodes);
3409
3410 ESL_ALLOC(nis_imin, sizeof(int) * cm->nodes);
3411 ESL_ALLOC(nis_imax, sizeof(int) * cm->nodes);
3412 ESL_ALLOC(nis_jmin, sizeof(int) * cm->nodes);
3413 ESL_ALLOC(nis_jmax, sizeof(int) * cm->nodes);
3414
3415 ESL_ALLOC(nss_max_imin, sizeof(int) * cm->nodes);
3416 ESL_ALLOC(nss_min_jmax, sizeof(int) * cm->nodes);
3417
3418 esl_vec_ISet(nss_imin, cm->nodes, -1);
3419 esl_vec_ISet(nss_imax, cm->nodes, -1);
3420 esl_vec_ISet(nss_jmin, cm->nodes, -1);
3421 esl_vec_ISet(nss_jmax, cm->nodes, -1);
3422
3423 esl_vec_ISet(nis_imin, cm->nodes, -1);
3424 esl_vec_ISet(nis_imax, cm->nodes, -1);
3425 esl_vec_ISet(nis_jmin, cm->nodes, -1);
3426 esl_vec_ISet(nis_jmax, cm->nodes, -1);
3427
3428 esl_vec_ISet(nss_max_imin, cm->nodes, -1);
3429 esl_vec_ISet(nss_min_jmax, cm->nodes, -1);
3430
3431 /* Initialize all bands to -1. */
3432 esl_vec_ISet(imin, cm->M, -1);
3433 esl_vec_ISet(imax, cm->M, -1);
3434 esl_vec_ISet(jmin, cm->M, -1);
3435 esl_vec_ISet(jmax, cm->M, -1);
3436
3437 /* We go node by node, bottom up, and fill in the bands on each
3438 * state for each node. Keeping track of the node split set min and max i's
3439 * and j's, as well as the node insert set's
3440 * also because they influence all nodes above (until a BEGL or BEGR at least).
3441 */
3442
3443 /* For match nodes (MATP, MATL, MATR):
3444 * First calc the split set node mins and maxes, then impose these
3445 * on each state v in the split set of the node, requiring that any valid
3446 * d resulting from the i and j bands on state v
3447 * is least dv = StateDelta(v).
3448 * This is done by ensuring that jmin[v] >= dv & jmax[v] >= dv.
3449 * (We don't have to worry about i as we check again when we create
3450 * the d bands from the i and j bands in ij2d_bands()).
3451 * We really only have to enforce the StateDelta issue here so we
3452 * don't run into d band on j that is 0 cells in ij2d_bands().
3453 * Alternatively, we could ignore the StateDelta() issue here, and
3454 * allow ij2d_bands() to modify j bands when it enforces the StateDelta()
3455 * issue.
3456 */
3457
3458 for(n = (cm->nodes-1); n >= 0; n--) {
3459 switch (cm->ndtype[n]) {
3460 case END_nd:
3461 /* Special case, we need to know the bands on the states
3462 * in the node ABOVE this one. Node above MUST be MATP, MATL
3463 * or MATR. For END states, the band on i = the band on j,
3464 * this is because d must be 0, so i must be (j+1), so its pointless
3465 * to allow an i value that (j+1) is not allowed to be or vice versa.
3466 * If the node above is MATL, we use the HMM band that maps
3467 * to the ML state - these correspond to bands on i. If its a MATR,
3468 * we use the HMM band that maps to the MR state - these correspond
3469 * to bands on j. If its a MATP, we get fancy (see below).
3470 */
3471 v = cm->nodemap[n];
3472 if(cm->ndtype[n-1] == MATL_nd) {
3473 /* tricky. we keep the n_*m** structures ignorant of the fact that we're in
3474 * an end state, i.e. we don't force a d=0 (j-i+1=0). This way when
3475 * the node immediately above the end (the MATL) looks at it when its determining
3476 * the correct bands on i, it doesn't get screwed up (as it would if j < i).
3477 */
3478
3479 /*minimum of delete and match states of node above*/
3480 nss_imin[n] = (pn_min_m[cp9map->nd2lpos[n-1]] <= (pn_min_d[cp9map->nd2lpos[n-1]])) ?
3481 pn_min_m[cp9map->nd2lpos[n-1]] : (pn_min_d[cp9map->nd2lpos[n-1]]);
3482 /*for the max, we must allow possibility of inserts and deletes.*/
3483 nss_imax[n] = (pn_max_m[cp9map->nd2lpos[n-1]] >= pn_max_i[cp9map->nd2lpos[n-1]]) ?
3484 pn_max_m[cp9map->nd2lpos[n-1]] : pn_max_i[cp9map->nd2lpos[n-1]];
3485 /* deletes max bands may always be less than match max bands...(not sure)*/
3486 if(nss_imax[n] < (pn_max_d[cp9map->nd2lpos[n-1]]))
3487 nss_imax[n] = (pn_max_d[cp9map->nd2lpos[n-1]]);
3488
3489 nss_jmin[n] = nss_imin[n];
3490 nss_jmax[n] = nss_imax[n];
3491
3492 imin[v] = nss_imin[n];
3493 imax[v] = nss_imax[n] + 1; /* we add 1 because we have to figure in the emission
3494 * of the MATL_ML (or final MATL_IL), which would increase
3495 * i by 1 potentially relative to the imax of that state.
3496 */
3497 jmin[v] = imin[v] - 1; /* d must be 0 for end states. */
3498 jmax[v] = imax[v] - 1; /* d must be 0 for end states. */
3499
3500 nss_max_imin[n] = imin[v];
3501 nss_min_jmax[n] = jmax[v];
3502 }
3503 else if(cm->ndtype[n-1] == MATR_nd) {
3504 /* tricky. we keep the nss_*m** structures ignorant of the fact that we're in
3505 * an end state, i.e. we don't force a d=0 (j-i+1=0). This way when
3506 * the node immediately above the end (the MATR) looks at it when its determining
3507 * the correct bands on i, it doesn't get screwed up (as it would if j < i).
3508 */
3509
3510 /*minimum of delete and match states of node above */
3511 nss_jmin[n] = (pn_min_m[cp9map->nd2rpos[n-1]] <= pn_min_d[cp9map->nd2rpos[n-1]]) ?
3512 pn_min_m[cp9map->nd2rpos[n-1]] : pn_min_d[cp9map->nd2rpos[n-1]];
3513 /*for the max, we must allow possibility of inserts.*/
3514 nss_jmax[n] = (pn_max_m[cp9map->nd2rpos[n-1]] >= pn_max_i[cp9map->nd2rpos[n-1]]) ?
3515 pn_max_m[cp9map->nd2rpos[n-1]] : pn_max_i[cp9map->nd2rpos[n-1]];
3516 /* deletes max bands may always be less than match max bands...(not sure)*/
3517 if(nss_jmax[n] < pn_max_d[cp9map->nd2rpos[n-1]])
3518 nss_jmax[n] = pn_max_d[cp9map->nd2rpos[n-1]];
3519 nss_imin[n] = nss_jmin[n];
3520 nss_imax[n] = nss_jmax[n];
3521
3522 jmin[v] = nss_jmin[v] - 1; /* we subtract 1 because of we have to figure
3523 * in the emission of the MATR_MR (or final MATR_IR), which would
3524 * decrease j by 1 potentially relative to jmin of that state.
3525 */
3526 jmax[v] = nss_jmax[n];
3527 imin[v] = jmin[v] + 1; /*d (j-i+1) must be 0 for end states*/
3528 imax[v] = jmax[v] + 1; /*d (j-i+1) must be 0 for end states*/
3529
3530 nss_max_imin[n] = imin[v];
3531 nss_min_jmax[n] = jmax[v];
3532 }
3533 else if(cm->ndtype[n-1] == MATP_nd) {
3534 /* Very rare case, only if the last bp in a stem is the last left consensus
3535 * column (respecting gap_thresh) in that alignment. Does happen though,
3536 * (at least in RFAM 6.1) because the training counts for transition priors
3537 * had counts for MATP_* state -> END_nd transition sets.
3538 */
3539
3540 /* tricky. we keep the nss_*m** structures ignorant of the fact that we're in
3541 * an end state, i.e. we don't force a d=0 (j-i+1=0). This way when
3542 * the node immediately above the end (the MATP) looks at it when its determining
3543 * the correct bands on j, it doesn't get screwed up (as it would if j < i).
3544 */
3545 /*minimum of delete and match states of node above*/
3546 nss_imin[n] = (pn_min_m[cp9map->nd2lpos[n-1]] <= (pn_min_d[cp9map->nd2lpos[n-1]])) ?
3547 pn_min_m[cp9map->nd2lpos[n-1]] : (pn_min_d[cp9map->nd2lpos[n-1]]);
3548 /*for the max, we must allow possibility of inserts and deletes.*/
3549 nss_imax[n] = (pn_max_m[cp9map->nd2lpos[n-1]] >= pn_max_i[cp9map->nd2lpos[n-1]]) ?
3550 pn_max_m[cp9map->nd2lpos[n-1]] : pn_max_i[cp9map->nd2lpos[n-1]];
3551 /* deletes max bands may always be less than match max bands...(not sure)*/
3552 if(nss_imax[n] < (pn_max_d[cp9map->nd2lpos[n-1]]))
3553 nss_imax[n] = (pn_max_d[cp9map->nd2lpos[n-1]]);
3554
3555 /*minimum of delete and match states of node above*/
3556 nss_jmin[n] = (pn_min_m[cp9map->nd2rpos[n-1]] <= pn_min_d[cp9map->nd2rpos[n-1]]) ?
3557 pn_min_m[cp9map->nd2rpos[n-1]] : pn_min_d[cp9map->nd2rpos[n-1]];
3558 /*for the max, we must allow possibility of inserts.*/
3559 nss_jmax[n] = (pn_max_m[cp9map->nd2rpos[n-1]] >= pn_max_i[cp9map->nd2rpos[n-1]]) ?
3560 pn_max_m[cp9map->nd2rpos[n-1]] : pn_max_i[cp9map->nd2rpos[n-1]];
3561 /* deletes max bands may always be less than match max bands...(not sure)*/
3562 if(nss_jmax[n] < pn_max_d[cp9map->nd2rpos[n-1]])
3563 nss_jmax[n] = pn_max_d[cp9map->nd2rpos[n-1]];
3564
3565 /* unique situation. end's d must be 0, so we are constrained on what
3566 * i can be relative to j, and j can be relative to i, but what we want
3567 * are the constraints on what i can be, and j can be.
3568 * because d=0 => j-i+1 = 0. then imin should equal = jmin + 1 and imax = jmax + 1.
3569 * so we really just want to know a min over i and j, and a max over i and j.
3570 * below we take min of imin and jmin (should always be imin i think) as the min,
3571 * and max of imax and jmax (should always be jmax i think) after accounting for
3572 * the possibility that a single base was just emitted left and/or right.
3573 */
3574 imax[v] = ((nss_imax[n] + 1) > nss_jmax[n]) ?
3575 (nss_imax[n] + 1) : nss_jmax[n];
3576 imin[v] = ((nss_imin[n]) < (nss_jmin[n] - 1)) ?
3577 (nss_imin[n]) : (nss_jmin[n] - 1);
3578 /* we can't have an i < i0 */
3579 imin[v] = ESL_MAX(imin[v], i0);
3580 imax[v] = ESL_MAX(imax[v], i0);
3581 jmin[v] = imin[v] - 1; /* d must be 0 for end states. */
3582 jmax[v] = imax[v] - 1; /* d must be 0 for end states. */
3583
3584 nss_max_imin[n] = imin[v];
3585 nss_min_jmax[n] = jmax[v];
3586 }
3587 break;
3588
3589 case MATP_nd:
3590 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3591 hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3592 nss_imin, nss_imax, nss_jmin, nss_jmax,
3593 pn_min_i, pn_max_i, cp9map);
3594
3595 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3596 nis_imin[n], nis_jmax[n],
3597 &safe_imax, &safe_jmin);
3598 hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3599 nss_imin, nss_imax, nss_jmin, nss_jmax,
3600 pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3601 cp9map);
3602 /* 6 states MATP_MP, MATP_ML, MATP_MR, MATP_D, MATP_IL, MATP_IR */
3603 v = cm->nodemap[n]; /* MATP_MP */
3604 /* Determine implied v bands using hmm for mapped 'direction(s)' and
3605 * next node's bands for non-mapped direction(s).
3606 */
3607 tmp_imin = pn_min_m[cp9map->nd2lpos[n]];
3608 tmp_imax = pn_max_m[cp9map->nd2lpos[n]];
3609 tmp_jmin = pn_min_m[cp9map->nd2rpos[n]];
3610 tmp_jmax = pn_max_m[cp9map->nd2rpos[n]];
3611 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3612 tmp_jmax, imin, imax, jmin, jmax,
3613 nss_imin, nss_imax, nss_jmin, nss_jmax);
3614 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3615 safe_jmin);
3616 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3617 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3618
3619 v++; /*MATP_ML*/
3620 /* Determine implied v bands using hmm for mapped 'direction(s)' and
3621 * next node's bands for non-mapped direction(s).
3622 */
3623 tmp_imin = pn_min_m[cp9map->nd2lpos[n]];
3624 tmp_imax = pn_max_m[cp9map->nd2lpos[n]];
3625 /* 12.19.05 - trying to deal with the right delete off-by-one
3626 * inverted relative to left delete issue.
3627 */
3628 tmp_jmin = (pn_min_d[cp9map->nd2rpos[n]] < nss_jmin[n+1]) ?
3629 pn_min_d[cp9map->nd2rpos[n]] : nss_jmin[n+1];
3630 tmp_jmax = (pn_max_d[cp9map->nd2rpos[n]] > nss_jmax[n+1]) ?
3631 pn_max_d[cp9map->nd2rpos[n]] : nss_jmax[n+1];
3632
3633 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3634 tmp_jmax, imin, imax, jmin, jmax,
3635 nss_imin, nss_imax, nss_jmin, nss_jmax);
3636 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3637 safe_jmin);
3638 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3639 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3640
3641 v++; /*MATP_MR*/
3642 /* this D-left state gets the delete band from the HMM node
3643 * that maps to the left side.
3644 */
3645 tmp_imin = pn_min_d[cp9map->nd2lpos[n]];
3646 tmp_imax = pn_max_d[cp9map->nd2lpos[n]];
3647 tmp_jmin = pn_min_m[cp9map->nd2rpos[n]];
3648 tmp_jmax = pn_max_m[cp9map->nd2rpos[n]];
3649 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3650 tmp_jmax, imin, imax, jmin, jmax,
3651 nss_imin, nss_imax, nss_jmin, nss_jmax);
3652 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3653 safe_jmin);
3654 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3655 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3656
3657 v++; /*MATP_D*/
3658 tmp_imin = pn_min_d[cp9map->nd2lpos[n]];
3659 tmp_imax = pn_max_d[cp9map->nd2lpos[n]];
3660 /* 12.19.05 - trying to deal with the right delete off-by-one
3661 * inverted relative to left delete issue.
3662 */
3663 tmp_jmin = (pn_min_d[cp9map->nd2rpos[n]] < nss_jmin[n+1]) ?
3664 pn_min_d[cp9map->nd2rpos[n]] : nss_jmin[n+1];
3665 tmp_jmax = (pn_max_d[cp9map->nd2rpos[n]] > nss_jmax[n+1]) ?
3666 pn_max_d[cp9map->nd2rpos[n]] : nss_jmax[n+1];
3667 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3668 tmp_jmax, imin, imax, jmin, jmax,
3669 nss_imin, nss_imax, nss_jmin, nss_jmax);
3670 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3671 safe_jmin);
3672 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3673 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3674 hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3675
3676 v++; /*MATP_IL*/
3677 /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
3678 tmp_imin = pn_min_i[cp9map->cs2hn[v][0]]; /* insert states can only map to 1 HMM node */
3679 tmp_imax = pn_max_i[cp9map->cs2hn[v][0]]; /* insert states can only map to 1 HMM node */
3680 tmp_jmin = nss_jmin[n];
3681 tmp_jmax = nss_jmax[n];
3682 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3683 tmp_jmax, imin, imax, jmin, jmax);
3684 /* Enforce safe transitions, this makes sure that at least one state
3685 * y \in C_v is reachable from v. And further (special case for inserts)
3686 * make sure that we don't consider v as a possible y. IF we did, we might
3687 * be faced with a situation where v could only transit to itself, and then
3688 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3689 * including to itself.
3690 */
3691 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3692 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3693
3694 v++; /*MATP_IR*/
3695 /* skip detached inserts */
3696 if(cp9map->cs2hn[v][0] == -1)
3697 continue;
3698 /* Special case, one of only two situations (other is ROOT_IR)
3699 * we could have come where v is an insert, and a possible
3700 * state x that we came from is an insert, but x != y (x can be the MATP_IL).
3701 * So we have to determine imin and imax carefully.
3702 */
3703 tmp_imin = (nss_imin[n] < imin[v-1]) ?
3704 nss_imin[n] : imin[v-1];
3705 tmp_imax = (nss_imax[n] > imax[v-1]) ?
3706 nss_imax[n] : imax[v-1];
3707 /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
3708 tmp_jmin = pn_min_i[cp9map->cs2hn[v][0]];
3709 tmp_jmax = pn_max_i[cp9map->cs2hn[v][0]];
3710 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3711 tmp_jmax, imin, imax, jmin, jmax);
3712 /* Enforce safe transitions, this makes sure that at least one state
3713 * y \in C_v is reachable from v. And further (special case for inserts)
3714 * make sure that we don't consider v as a possible y. IF we did, we might
3715 * be faced with a situation where v could only transit to itself, and then
3716 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3717 * including to itself.
3718 */
3719 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3720 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3721 break;
3722
3723 case MATL_nd:
3724 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3725 hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3726 nss_imin, nss_imax, nss_jmin, nss_jmax,
3727 pn_min_i, pn_max_i, cp9map);
3728
3729 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3730 nis_imin[n], nis_jmax[n],
3731 &safe_imax, &safe_jmin);
3732 hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3733 nss_imin, nss_imax, nss_jmin, nss_jmax,
3734 pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3735 cp9map);
3736
3737 /* 3 states MATL_ML, MATL_D, MATL_IL */
3738 v = cm->nodemap[n]; /* MATL_ML */
3739 tmp_imin = pn_min_m[cp9map->nd2lpos[n]];
3740 tmp_imax = pn_max_m[cp9map->nd2lpos[n]];
3741 tmp_jmin = nss_jmin[n+1];
3742 tmp_jmax = nss_jmax[n+1];
3743 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3744 tmp_jmax, imin, imax, jmin, jmax,
3745 nss_imin, nss_imax, nss_jmin, nss_jmax);
3746 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3747 safe_jmin);
3748 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3749 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3750
3751 v++; /*MATL_D*/
3752 /* this D-left state gets the delete band from the HMM node
3753 * that maps to the left side.
3754 */
3755 tmp_imin = pn_min_d[cp9map->nd2lpos[n]];
3756 tmp_imax = pn_max_d[cp9map->nd2lpos[n]];
3757 tmp_jmin = nss_jmin[n+1];
3758 tmp_jmax = nss_jmax[n+1];
3759 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3760 tmp_jmax, imin, imax, jmin, jmax,
3761 nss_imin, nss_imax, nss_jmin, nss_jmax);
3762 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3763 safe_jmin);
3764 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3765 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3766 hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3767
3768 v++; /*MATL_IL*/
3769 /* skip detached inserts */
3770 if(cp9map->cs2hn[v][0] == -1)
3771 continue;
3772 /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
3773 tmp_imin = pn_min_i[cp9map->cs2hn[v][0]];
3774 tmp_imax = pn_max_i[cp9map->cs2hn[v][0]];
3775 tmp_jmin = nss_jmin[n];
3776 tmp_jmax = nss_jmax[n];
3777
3778 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3779 tmp_jmax, imin, imax, jmin, jmax);
3780 /* Enforce safe transitions, this makes sure that at least one state
3781 * y \in C_v is reachable from v. And further (special case for inserts)
3782 * make sure that we don't consider v as a possible y. IF we did, we might
3783 * be faced with a situation where v could only transit to itself, and then
3784 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3785 * including to itself.
3786 */
3787 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3788 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3789 break;
3790
3791 case MATR_nd:
3792 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3793 hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3794 nss_imin, nss_imax, nss_jmin, nss_jmax,
3795 pn_min_i, pn_max_i, cp9map);
3796
3797 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3798 nis_imin[n], nis_jmax[n],
3799 &safe_imax, &safe_jmin);
3800 hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3801 nss_imin, nss_imax, nss_jmin, nss_jmax,
3802 pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3803 cp9map);
3804
3805 /* 3 states MATR_MR, MATR_D, MATR_IR */
3806 v = cm->nodemap[n]; /* MATR_MR */
3807 tmp_imin = nss_imin[n+1];
3808 tmp_imax = nss_imax[n+1];
3809 tmp_jmin = pn_min_m[cp9map->nd2rpos[n]];
3810 tmp_jmax = pn_max_m[cp9map->nd2rpos[n]];
3811 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3812 tmp_jmax, imin, imax, jmin, jmax,
3813 nss_imin, nss_imax, nss_jmin, nss_jmax);
3814 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3815 safe_jmin);
3816 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3817 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3818
3819 v++; /*MATR_D*/
3820 /* this D-left state gets the delete band from the HMM node
3821 * that maps to the left side.
3822 */
3823 tmp_imin = nss_imin[n+1];
3824 tmp_imax = nss_imax[n+1];
3825 /* 12.19.05 - trying to deal with the right delete off-by-one
3826 * inverted relative to left delete issue.
3827 */
3828 tmp_jmin = (pn_min_d[cp9map->nd2rpos[n]] < nss_jmin[n+1]) ?
3829 pn_min_d[cp9map->nd2rpos[n]] : nss_jmin[n+1];
3830 tmp_jmax = (pn_max_d[cp9map->nd2rpos[n]] > nss_jmax[n+1]) ?
3831 pn_max_d[cp9map->nd2rpos[n]] : nss_jmax[n+1];
3832 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3833 tmp_jmax, imin, imax, jmin, jmax,
3834 nss_imin, nss_imax, nss_jmin, nss_jmax);
3835 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3836 safe_jmin);
3837 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3838 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3839 hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3840
3841 v++; /*MATR_IR*/
3842 /* skip detached inserts */
3843 if(cp9map->cs2hn[v][0] == -1)
3844 continue;
3845 tmp_imin = nss_imin[n];
3846 tmp_imax = nss_imax[n];
3847 /* This state maps to the insert state of HMM node cshn_map[v]*/
3848 tmp_jmin = pn_min_i[cp9map->cs2hn[v][0]];
3849 tmp_jmax = pn_max_i[cp9map->cs2hn[v][0]];
3850 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3851 tmp_jmax, imin, imax, jmin, jmax);
3852 /* Enforce safe transitions, this makes sure that at least one state
3853 * y \in C_v is reachable from v. And further (special case for inserts)
3854 * make sure that we don't consider v as a possible y. IF we did, we might
3855 * be faced with a situation where v could only transit to itself, and then
3856 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3857 * including to itself.
3858 */
3859 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3860 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3861 break;
3862
3863 case ROOT_nd:
3864 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3865 hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3866 nss_imin, nss_imax, nss_jmin, nss_jmax,
3867 pn_min_i, pn_max_i, cp9map);
3868
3869 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3870 nis_imin[n], nis_jmax[n],
3871 &safe_imax, &safe_jmin);
3872 hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3873 nss_imin, nss_imax, nss_jmin, nss_jmax,
3874 pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3875 cp9map);
3876 /* 3 states, ROOT_S, ROOT_IL, and ROOT_IR*/
3877 v = cm->nodemap[n]; /* ROOT_S SPECIAL CASE */
3878 if(doing_search) { /* we're doing search, ROOT_S doesn't necessarily emit full sequence */
3879 tmp_imin = nss_imin[n+1];
3880 tmp_imax = nss_imax[n+1];
3881 tmp_jmin = nss_jmin[n+1];
3882 tmp_jmax = nss_jmax[n+1];
3883 }
3884 else { /* we're doing alignment, enforce ROOT_S emits full sequence */
3885 /* for now, enforce ROOT_S emits full sequence at end of the function, we'll relax this if doing_search==TRUE */
3886 tmp_imin = i0;
3887 tmp_imax = i0;
3888 tmp_jmin = j0;
3889 tmp_jmax = j0;
3890 }
3891 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3892 tmp_jmax, imin, imax, jmin, jmax,
3893 nss_imin, nss_imax, nss_jmin, nss_jmax);
3894 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3895 safe_jmin);
3896 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3897 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3898
3899 v++; /*ROOT_IL SPECIAL CASE*/
3900 /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0], which is HMM node 0*/
3901 if(doing_search)
3902 tmp_imin = pn_min_i[cp9map->cs2hn[v][0]]; /* should this be imin[0]? */
3903 else
3904 tmp_imin = i0; /* Have to be able to transit here from ROOT_S */
3905 tmp_imax = nss_imax[n+1];
3906 if(doing_search) {
3907 tmp_jmin = nss_jmin[n+1];
3908 tmp_jmax = nss_jmax[n+1];
3909 }
3910 else {
3911 tmp_jmin = j0; /* we never emit to the right in this state */
3912 tmp_jmax = j0; /* we never emit to the right in this state */
3913 }
3914 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3915 tmp_jmax, imin, imax, jmin, jmax);
3916 /* Enforce safe transitions, this makes sure that at least one state
3917 * y \in C_v is reachable from v. And further (special case for inserts)
3918 * make sure that we don't consider v as a possible y. IF we did, we might
3919 * be faced with a situation where v could only transit to itself, and then
3920 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3921 * including to itself.
3922 */
3923 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3924 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3925
3926 v++; /*ROOT_IR SPECIAL CASE analagous to ROOT_IL*/
3927 if(doing_search)
3928 tmp_imin = nss_imin[n+1]; /* same tmp_imin as ROOT_S */
3929 else
3930 tmp_imin = i0; /* we never emit to the left in this state */
3931 tmp_imax = nss_imax[n+1];
3932 tmp_jmin = nss_jmin[n+1];
3933 tmp_jmax = j0; /* Have to be able to transit here from ROOT_S */
3934 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
3935 tmp_jmax, imin, imax, jmin, jmax);
3936 /* Enforce safe transitions, this makes sure that at least one state
3937 * y \in C_v is reachable from v. And further (special case for inserts)
3938 * make sure that we don't consider v as a possible y. IF we did, we might
3939 * be faced with a situation where v could only transit to itself, and then
3940 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
3941 * including to itself.
3942 */
3943 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
3944 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3945 break;
3946
3947 case BEGL_nd:
3948 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3949 hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3950 nss_imin, nss_imax, nss_jmin, nss_jmax,
3951 pn_min_i, pn_max_i, cp9map);
3952
3953 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3954 nis_imin[n], nis_jmax[n],
3955 &safe_imax, &safe_jmin);
3956 hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3957 nss_imin, nss_imax, nss_jmin, nss_jmax,
3958 pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3959 cp9map);
3960 /* 1 state BEGL_S */
3961 v = cm->nodemap[n];
3962 /* The next node MUST be a match node (MATP
3963 * specifically due to model building
3964 * algorithm) or a BIF node. We derive imin, imax,
3965 * jmin and jmax from that node.
3966 */
3967 /* Use the next nodes split set band, which
3968 * will be wider of match and delete states bands
3969 * for split set states in next node.
3970 */
3971 tmp_imin = nss_imin[n+1];
3972 tmp_imax = nss_imax[n+1];
3973 tmp_jmin = nss_jmin[n+1];
3974 tmp_jmax = nss_jmax[n+1];
3975 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
3976 tmp_jmax, imin, imax, jmin, jmax,
3977 nss_imin, nss_imax, nss_jmin, nss_jmax);
3978 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
3979 safe_jmin);
3980 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
3981 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
3982 hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
3983 break;
3984
3985 case BEGR_nd:
3986 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
3987 hmm2ij_prestate_step1_set_node_inserts(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3988 nss_imin, nss_imax, nss_jmin, nss_jmax,
3989 pn_min_i, pn_max_i, cp9map);
3990
3991 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
3992 nis_imin[n], nis_jmax[n],
3993 &safe_imax, &safe_jmin);
3994 hmm2ij_prestate_step3_preset_node_splits(n, nis_imin, nis_imax, nis_jmin, nis_jmax,
3995 nss_imin, nss_imax, nss_jmin, nss_jmax,
3996 pn_min_m, pn_max_m, pn_min_d, pn_max_d,
3997 cp9map);
3998 /* 2 states BEGR_S and BEGR_IL */
3999 v = cm->nodemap[n]; /*BEGR_S*/
4000 /* Use either the next nodes split set band, which
4001 * will be wider of match and delete states bands
4002 * for split set states in next node OR
4003 * the band on the insert state that maps to the
4004 * BEGR_IL, erring on the safe side (wider band).
4005 */
4006 tmp_imin = nss_imin[n+1];
4007 tmp_imax = nss_imax[n+1];
4008 if(pn_min_i[cp9map->cs2hn[v+1][0]] < tmp_imin)
4009 tmp_imin = pn_min_i[cp9map->cs2hn[v+1][0]];
4010 if(pn_max_i[cp9map->cs2hn[v+1][0]] > tmp_imax)
4011 tmp_imax = pn_max_i[cp9map->cs2hn[v+1][0]];
4012 tmp_jmin = nss_jmin[n+1];
4013 tmp_jmax = nss_jmax[n+1];
4014 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
4015 tmp_jmax, imin, imax, jmin, jmax,
4016 nss_imin, nss_imax, nss_jmin, nss_jmax);
4017 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
4018 safe_jmin);
4019 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
4020 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
4021 hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
4022
4023 v++; /*BEGR_IL*/
4024 /* This state maps to the insert state of HMM node cp9map->cs2hn[v][0]*/
4025 tmp_imin = pn_min_i[cp9map->cs2hn[v][0]];
4026 tmp_imax = pn_max_i[cp9map->cs2hn[v][0]];
4027 tmp_jmin = nss_jmin[n+1];
4028 tmp_jmax = nss_jmax[n+1];
4029 hmm2ij_insert_state_step1_set_state_bands(v, tmp_imin, tmp_imax, tmp_jmin,
4030 tmp_jmax, imin, imax, jmin, jmax);
4031 /* Enforce safe transitions, this makes sure that at least one state
4032 * y \in C_v is reachable from v. And further (special case for inserts)
4033 * make sure that we don't consider v as a possible y. IF we did, we might
4034 * be faced with a situation where v could only transit to itself, and then
4035 * we'd be in the same situation, but we may no longer be able to transit ANYWHERE
4036 * including to itself.
4037 */
4038 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, (nss_max_imin[n+1]), nss_min_jmax[n+1]);
4039 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
4040 break;
4041
4042 case BIF_nd:
4043 hmm2ij_prestate_step0_initialize(n, nss_max_imin, nss_min_jmax, i0, j0);
4044
4045 /* 1 state BIF_B */
4046 v = cm->nodemap[n]; /*BIF_B*/
4047 /* The only two connected states are BEGL_S and BEGR_S.
4048 * We can derive our imin, imax, jmin, and jmax from
4049 * those two states.
4050 * cm->cfirst[v] is the state index of the left child.
4051 * cm->cnum[v] is the state index of the right child.
4052 */
4053 nis_imin[n] = imin[cm->cfirst[v]];
4054 nis_imax[n] = imax[cm->cfirst[v]];
4055 nis_jmin[n] = jmin[cm->cnum[v]];
4056 nis_jmax[n] = jmax[cm->cnum[v]];
4057
4058 nss_imin[n] = imin[cm->cfirst[v]];
4059 nss_imax[n] = imax[cm->cfirst[v]];
4060 nss_jmin[n] = jmin[cm->cnum[v]];
4061 nss_jmax[n] = jmax[cm->cnum[v]];
4062
4063 hmm2ij_prestate_step2_determine_safe(n, nss_max_imin[n+1], nss_min_jmax[n+1],
4064 nis_imin[n], nis_jmax[n],
4065 &safe_imax, &safe_jmin);
4066 tmp_imin = imin[cm->cfirst[v]];
4067 tmp_imax = imax[cm->cfirst[v]];
4068 tmp_jmin = jmin[cm->cnum[v]];
4069 tmp_jmax = jmax[cm->cnum[v]];
4070 hmm2ij_split_state_step1_set_state_bands(v, n, tmp_imin, tmp_imax, tmp_jmin,
4071 tmp_jmax, imin, imax, jmin, jmax,
4072 nss_imin, nss_imax, nss_jmin, nss_jmax);
4073 hmm2ij_state_step2_enforce_safe_trans(cm, v, n, imax, jmin, nss_imax, nss_jmin, safe_imax,
4074 safe_jmin);
4075 hmm2ij_state_step3_enforce_state_delta(cm, v, jmin, jmax);
4076 hmm2ij_state_step4_update_safe_holders(v, n, imin[v], jmax[v], nss_max_imin, nss_min_jmax);
4077 hmm2ij_state_step5_non_emitter_d0_hack(v, imax[v], jmin);
4078 break;
4079 }
4080 }
4081
4082 /* Tie up some loose ends:
4083 * 1. Ensure that all valid i are >= i0 and all valid j are <= j0
4084 * 2. Ensure all bands have bandwidth >= 0 (see code)
4085 * 3. Set detached inserts states to imin=imax=jmin=jmax=i0 to avoid
4086 * problems in downstream functions. These states WILL NEVER BE ENTERED
4087 * 4. Do a quick check to make sure we've assigned the bands
4088 * on i and j for all states to positive values (none were
4089 * left as -1 EXCEPT for end states which should have i bands left as -1).
4090 * 5. Ensure imin[0] <= imin[v] for all v and jmax[0] >= jmax[v] for all v.
4091 * 6. If doing_search==TRUE, rewrite the bands on the
4092 * ROOT_S state so they allow any possible transition to a child
4093 * that the child's bands would allow.
4094 */
4095
4096 /* 1. Ensure that all valid i are >= i0 and all valid j are <= j0 */
4097 for(v = 0; v < cm->M; v++) {
4098 imin[v] = ESL_MAX(imin[v], i0); /* imin[v] can't be less than i0 */
4099 imax[v] = ESL_MAX(imax[v], i0); /* imax[v] can't be less than i0 */
4100
4101 imin[v] = ESL_MIN(imin[v], j0); /* imin[v] can't be more than j0 */
4102 imax[v] = ESL_MIN(imax[v], j0); /* imax[v] can't be more than j0 */
4103
4104 imax[v] = ESL_MIN(imax[v], j0); /* imax[v] can't be more than j0 */
4105
4106 jmin[v] = ESL_MIN(jmin[v], j0); /* jmin[v] can't be more than j0 */
4107 jmax[v] = ESL_MIN(jmax[v], j0); /* jmax[v] can't be more than j0 */
4108
4109 jmin[v] = ESL_MAX(jmin[v], i0); /* jmin[v] can't be less than i0 */
4110 jmax[v] = ESL_MAX(jmax[v], i0); /* jmax[v] can't be less than i0 */
4111
4112 /* 2. Ensure all bands have bandwidth >= 0
4113 * Ensure: jmax[v] - jmin[v] + 1 >= 0
4114 * imax[v] - imin[v] + 1 >= 0
4115 * jmax[v] - jmin[v] + 1 == 0 means there are no valid j's for state v,
4116 * so state v is not allowed to be in the parse, we allow this (maybe we shouldn't)
4117 */
4118 imax[v] = ESL_MAX(imax[v], imin[v]-1);
4119 jmin[v] = ESL_MIN(jmin[v], jmax[v]+1);
4120
4121 /* 3. Set detached inserts states to imin=imax=jmin=jmax=i0 to avoid
4122 * problems in downstream functions. These states WILL NEVER BE ENTERED
4123 */
4124 if(cm->sttype[v+1] == E_st) imin[v] = imax[v] = jmin[v] = jmax[v] = i0;
4125
4126 /* 4. Do a quick check to make sure we've assigned the bands
4127 * on i and j for all states to positive values (none were
4128 * left as -1 EXCEPT for end states which should have i bands left as -1).
4129 */
4130 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imin[v] == -1))));
4131 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imax[v] == -1))));
4132 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmin[v] == -1))));
4133 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmax[v] == -1))));
4134
4135 /* 5. Ensure imin[0] <= imin[v] for all v and jmax[0] >= jmax[v] for all v. */
4136 imin[0] = ESL_MIN(imin[0], imin[v]);
4137 jmax[0] = ESL_MAX(jmax[0], jmax[v]);
4138 }
4139
4140 /* 6. If doing_search==TRUE, rewrite the bands on the
4141 * ROOT_S state so they allow any possible transition to a child
4142 * that the child's bands would allow.
4143 */
4144 if(doing_search) {
4145 /* First look at children of 0 (these probs will be 0. if local begins on, but it doesn't matter for our purposes here) */
4146 for (yoffset = 0; yoffset < cm->cnum[0]; yoffset++) {
4147 y = cm->cnum[0] + yoffset;
4148 imin[0] = ESL_MIN(imin[0], imin[y]);
4149 imax[0] = ESL_MAX(imax[0], imax[y]);
4150 jmin[0] = ESL_MIN(jmin[0], jmin[y]);
4151 jmax[0] = ESL_MAX(jmax[0], jmax[y]);
4152 }
4153 /* now for possible local begins */
4154 if(cm->flags & CMH_LOCAL_BEGIN) {
4155 for (y = 1; y < cm->M; y++) {
4156 if(NOT_IMPOSSIBLE(cm->beginsc[y])) {
4157 imin[0] = ESL_MIN(imin[0], imin[y]);
4158 imax[0] = ESL_MAX(imax[0], imax[y]);
4159 jmin[0] = ESL_MIN(jmin[0], jmin[y]);
4160 jmax[0] = ESL_MAX(jmax[0], jmax[y]);
4161 }
4162 }
4163 }
4164 }
4165 /* Final, exceedingly rare, special case */
4166 if(i0 == j0) { /* special case that breaks DP recursion for MP states
4167 * b/c target seq is length 1, and all MPs are impossible,
4168 * yet above code just forced jmin[v] <= j0 and jmax[v] <= j0,
4169 * which says that MPs are possible.
4170 */
4171 for(v = 0; v < cm->M; v++) {
4172 if(cm->sttype[v] == MP_st) {
4173 jmin[v] = j0+1;
4174 jmax[v] = j0;
4175 /* now 'for (j = jmin[v]; j <= jmax[v]; j++)' { loops will never be entered, b/c jmin[v] == 2, jmax[v] == 1 */
4176 }
4177 }
4178 }
4179
4180 #if 0
4181 /* OLD CODE EPN, Fri Dec 21 09:14:32 2007 */
4182 /* Tie up some loose ends:
4183 * 1. Set detached inserts states to imin=imax=jmin=jmax=i0 to avoid
4184 * problems in downstream functions. These states WILL NEVER BE ENTERED
4185 * 2. Do a quick check to make sure we've assigned the bands
4186 * on i and j for all states to positive values (none were
4187 * left as -1 EXCEPT for end states which should have i bands left as -1).
4188 * 3. Ensure that all *max[v] and *min[v] values are <= L, values greater
4189 * than this don't make sense.
4190 */
4191
4192 for(v = 0; v < cm->M; v++) {
4193 /* set bands for detached inserts */
4194 if(cm->sttype[v+1] == E_st) imin[v] = imax[v] = jmin[v] = jmax[v] = i0;
4195
4196 /* Ensure: for all i imin[v]..i..imax[v]
4197 * i0 <= i <= j0+1
4198 * for all j jmin[v]..j..jmax[v]
4199 * i0 <= j <= j0
4200 * Note: i can be j0+1 to allow delete states to be entered with
4201 * d = 0, after the entire seq has been emitted.
4202 */
4203 imin[v] = ESL_MAX(imin[v], i0);
4204 imin[v] = ESL_MIN(imin[v], j0+1);
4205 imax[v] = ESL_MAX(imax[v], i0);
4206 imax[v] = ESL_MIN(imax[v], j0+1);
4207 jmin[v] = ESL_MAX(jmin[v], i0);
4208 jmin[v] = ESL_MIN(jmin[v], j0);
4209 jmax[v] = ESL_MAX(jmax[v], i0);
4210 jmax[v] = ESL_MIN(jmax[v], j0);
4211
4212 /* Ensure: for all v imin[v] >= imin[0],
4213 * jmax[v] <= jmax[0].
4214 */
4215 imin[v] = ESL_MAX(imin[v], imin[0]);
4216 imax[v] = ESL_MAX(imax[v], imin[0]);
4217 jmax[v] = ESL_MIN(jmax[v], jmax[0]);
4218 jmin[v] = ESL_MIN(jmin[v], jmax[0]);
4219
4220 /* Ensure: jmax[v] - jmin[v] + 1 >= 0
4221 * imax[v] - imin[v] + 1 >= 0
4222 * jmax[v] - jmin[v] + 1 == 0 means there are no valid j's for state v,
4223 * so state v is not allowed to be in the parse, we allow this (maybe we shouldn't)
4224 */
4225 imin[v] = ESL_MIN(imin[v], imax[v]+1);
4226 jmin[v] = ESL_MIN(jmin[v], jmax[v]+1);
4227
4228 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imin[v] == -1))));
4229 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (imax[v] == -1))));
4230 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmin[v] == -1))));
4231 ESL_DASSERT1((! ((cm->sttype[v] != E_st) && (jmax[v] == -1))));
4232 }
4233 #endif
4234
4235 /* debug_print_ij_bands(cm); */
4236
4237 free(nss_imin);
4238 free(nss_imax);
4239 free(nss_jmin);
4240 free(nss_jmax);
4241 free(nis_imin);
4242 free(nis_imax);
4243 free(nis_jmin);
4244 free(nis_jmax);
4245 free(nss_max_imin);
4246 free(nss_min_jmax);
4247 return eslOK;
4248
4249 ERROR:
4250 ESL_FAIL(status, errbuf, "Memory allocation error.\n");
4251 }
4252
4253 /**************************************************************************
4254 * Helper functions for *_cp9_HMM2ijBands_OLD()
4255 * hmm2ij_prestate_step0_initialize()
4256 * hmm2ij_prestate_step1_set_node_inserts()
4257 * hmm2ij_prestate_step2_determine_safe()
4258 * hmm2ij_prestate_step3_preset_node_splits()
4259 * hmm2ij_split_state_step1_set_state_bands()
4260 * hmm2ij_insert_state_step1_set_state_bands()
4261 * hmm2ij_state_step2_enforce_safe_trans()
4262 * hmm2ij_state_step5_non_emitter_d0_hack()
4263 */
4264
4265 /*****************************************************************************
4266 * EPN 12.21.05
4267 * Function: hmm2ij_prestate_step0_initialize
4268 *
4269 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4270 *
4271 *****************************************************************************/
4272 void
hmm2ij_prestate_step0_initialize(int n,int * nss_max_imin,int * nss_min_jmax,int i0,int j0)4273 hmm2ij_prestate_step0_initialize(int n, int *nss_max_imin, int *nss_min_jmax, int i0, int j0)
4274 {
4275 nss_max_imin[n] = i0-1;
4276 nss_min_jmax[n] = j0;
4277 }
4278
4279 /*****************************************************************************
4280 * EPN 12.21.05
4281 * Function: hmm2ij_prestate_step1_set_node_inserts
4282 *
4283 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4284 *
4285 *****************************************************************************/
4286 void
hmm2ij_prestate_step1_set_node_inserts(int n,int * nis_imin,int * nis_imax,int * nis_jmin,int * nis_jmax,int * nss_imin,int * nss_imax,int * nss_jmin,int * nss_jmax,int * pn_min_i,int * pn_max_i,CP9Map_t * cp9map)4287 hmm2ij_prestate_step1_set_node_inserts(int n, int *nis_imin, int *nis_imax,
4288 int *nis_jmin, int *nis_jmax,
4289 int *nss_imin, int *nss_imax,
4290 int *nss_jmin, int *nss_jmax,
4291 int *pn_min_i, int *pn_max_i,
4292 CP9Map_t *cp9map)
4293
4294 {
4295 if(cp9map->nd2lpos[n] != -1)
4296 {
4297 nis_imin[n] = pn_min_i[cp9map->nd2lpos[n]];
4298 nis_imax[n] = pn_max_i[cp9map->nd2lpos[n]];
4299 }
4300 else
4301 {
4302 nis_imin[n] = nss_imin[n+1];
4303 nis_imax[n] = nss_imax[n+1];
4304 }
4305 if(cp9map->nd2rpos[n] != -1)
4306 {
4307 nis_jmin[n] = pn_min_i[cp9map->nd2rpos[n]];
4308 nis_jmax[n] = pn_max_i[cp9map->nd2rpos[n]];
4309 }
4310 else
4311 {
4312 nis_jmin[n] = nss_jmin[n+1];
4313 nis_jmax[n] = nss_jmax[n+1];
4314 }
4315 }
4316 /*****************************************************************************
4317 * EPN 12.21.05
4318 * Function: hmm2ij_prestate_step1_set_node_inserts
4319 *
4320 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4321 *
4322 *****************************************************************************/
4323 void
hmm2ij_prestate_step2_determine_safe(int n,int nss_max_imin_np1,int nss_min_jmax_np1,int nis_imin_n,int nis_jmax_n,int * safe_imax,int * safe_jmin)4324 hmm2ij_prestate_step2_determine_safe(int n,
4325 int nss_max_imin_np1, int nss_min_jmax_np1,
4326 int nis_imin_n,
4327 int nis_jmax_n,
4328 int *safe_imax, int *safe_jmin)
4329 {
4330 *safe_imax = (nss_max_imin_np1 < nis_imin_n) ?
4331 nss_max_imin_np1 : nis_imin_n;
4332 *safe_jmin = (nss_min_jmax_np1 > nis_jmax_n) ?
4333 nss_min_jmax_np1 : nis_jmax_n;
4334 }
4335 /*****************************************************************************
4336 * EPN 12.21.05
4337 * Function: hmm2ij_prestate_step1_set_node_inserts
4338 *
4339 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4340 *
4341 *****************************************************************************/
4342 void
hmm2ij_prestate_step3_preset_node_splits(int n,int * nis_imin,int * nis_imax,int * nis_jmin,int * nis_jmax,int * nss_imin,int * nss_imax,int * nss_jmin,int * nss_jmax,int * pn_min_m,int * pn_max_m,int * pn_min_d,int * pn_max_d,CP9Map_t * cp9map)4343 hmm2ij_prestate_step3_preset_node_splits(int n, int *nis_imin, int *nis_imax,
4344 int *nis_jmin, int *nis_jmax,
4345 int *nss_imin, int *nss_imax,
4346 int *nss_jmin, int *nss_jmax,
4347 int *pn_min_m, int *pn_max_m,
4348 int *pn_min_d, int *pn_max_d,
4349 CP9Map_t *cp9map)
4350 {
4351 if(cp9map->nd2lpos[n] != -1)
4352 {
4353 nss_imin[n] = (pn_min_m[cp9map->nd2lpos[n]] < (pn_min_d[cp9map->nd2lpos[n]])) ?
4354 pn_min_m[cp9map->nd2lpos[n]] : (pn_min_d[cp9map->nd2lpos[n]]);
4355 nss_imax[n] = (pn_max_m[cp9map->nd2lpos[n]] > (pn_max_d[cp9map->nd2lpos[n]])) ?
4356 pn_max_m[cp9map->nd2lpos[n]] : (pn_max_d[cp9map->nd2lpos[n]]);
4357 }
4358 else
4359 {
4360 nss_imin[n] = nss_imin[n+1];
4361 nss_imax[n] = nss_imax[n+1];
4362 }
4363 if(cp9map->nd2rpos[n] != -1)
4364 {
4365 nss_jmin[n] = (pn_min_m[cp9map->nd2rpos[n]] < pn_min_d[cp9map->nd2rpos[n]]) ?
4366 pn_min_m[cp9map->nd2rpos[n]] : pn_min_d[cp9map->nd2rpos[n]];
4367 nss_jmax[n] = (pn_max_m[cp9map->nd2rpos[n]] > pn_max_d[cp9map->nd2rpos[n]]) ?
4368 pn_max_m[cp9map->nd2rpos[n]] : pn_max_d[cp9map->nd2rpos[n]];
4369 }
4370 else
4371 {
4372 nss_jmin[n] = nss_jmin[n+1];
4373 nss_jmax[n] = nss_jmax[n+1];
4374 }
4375 }
4376
4377 /*****************************************************************************
4378 * EPN 12.21.05
4379 * Function: hmm2ij_split_state_step1_set_state_bands
4380 *
4381 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4382 *
4383 *****************************************************************************/
4384 void
hmm2ij_split_state_step1_set_state_bands(int v,int n,int tmp_imin,int tmp_imax,int tmp_jmin,int tmp_jmax,int * imin,int * imax,int * jmin,int * jmax,int * nss_imin,int * nss_imax,int * nss_jmin,int * nss_jmax)4385 hmm2ij_split_state_step1_set_state_bands(int v, int n,
4386 int tmp_imin, int tmp_imax,
4387 int tmp_jmin, int tmp_jmax,
4388 int *imin, int *imax, int *jmin, int *jmax,
4389 int *nss_imin, int *nss_imax,
4390 int *nss_jmin, int *nss_jmax)
4391 {
4392 imin[v] = tmp_imin;
4393 imax[v] = tmp_imax;
4394 jmin[v] = tmp_jmin;
4395 jmax[v] = tmp_jmax;
4396 if(imin[v] < nss_imin[n])
4397 nss_imin[n] = imin[v];
4398 if(imax[v] > nss_imax[n])
4399 nss_imax[n] = imax[v];
4400 if(jmin[v] < nss_jmin[n])
4401 nss_jmin[n] = jmin[v];
4402 if(jmax[v] > nss_jmax[n])
4403 nss_jmax[n] = jmax[v];
4404
4405 }
4406 /*****************************************************************************
4407 * EPN 12.21.05
4408 * Function: hmm2ij_prestate_step1_set_node_inserts
4409 *
4410 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4411 *
4412 *****************************************************************************/
hmm2ij_insert_state_step1_set_state_bands(int v,int tmp_imin,int tmp_imax,int tmp_jmin,int tmp_jmax,int * imin,int * imax,int * jmin,int * jmax)4413 void hmm2ij_insert_state_step1_set_state_bands(int v,
4414 int tmp_imin, int tmp_imax,
4415 int tmp_jmin, int tmp_jmax,
4416 int *imin, int *imax, int *jmin, int *jmax)
4417 {
4418 imin[v] = tmp_imin;
4419 imax[v] = tmp_imax;
4420 jmin[v] = tmp_jmin;
4421 jmax[v] = tmp_jmax;
4422 }
4423 /*****************************************************************************
4424 * EPN 12.21.05
4425 * Function: hmm2ij_state_step2_enforce_safe_trans
4426 *
4427 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4428 *
4429 *****************************************************************************/
4430 void
hmm2ij_state_step2_enforce_safe_trans(CM_t * cm,int v,int n,int * imax,int * jmin,int * nss_imax,int * nss_jmin,int safe_imax,int safe_jmin)4431 hmm2ij_state_step2_enforce_safe_trans(CM_t *cm, int v, int n, int *imax, int *jmin,
4432 int *nss_imax, int *nss_jmin,
4433 int safe_imax, int safe_jmin)
4434 {
4435 int dv_l;
4436 int dv_r;
4437 if((cm->sttype[v] == ML_st) ||
4438 (cm->sttype[v] == IL_st) ||
4439 (cm->sttype[v] == MP_st))
4440 dv_l = 1;
4441 else
4442 dv_l = 0;
4443 if((cm->sttype[v] == MR_st) ||
4444 (cm->sttype[v] == IR_st) ||
4445 (cm->sttype[v] == MP_st))
4446 dv_r = 1;
4447 else
4448 dv_r = 0;
4449 if(imax[v] < safe_imax - dv_l)
4450 {
4451 imax[v] = safe_imax - dv_l;
4452 if(imax[v] > nss_imax[n])
4453 nss_imax[n] = imax[v];
4454 }
4455 if(jmin[v] > safe_jmin + dv_r)
4456 {
4457 jmin[v] = safe_jmin + dv_r;
4458 if(jmin[v] < nss_jmin[n])
4459 nss_jmin[n] = jmin[v];
4460 }
4461 }
4462
4463 /*****************************************************************************
4464 * EPN 12.21.05
4465 * Function: hmm2ij_state_step3_enforce_state_delta
4466 *
4467 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4468 *
4469 *****************************************************************************/
4470 void
hmm2ij_state_step3_enforce_state_delta(CM_t * cm,int v,int * jmin,int * jmax)4471 hmm2ij_state_step3_enforce_state_delta(CM_t *cm, int v, int *jmin, int *jmax)
4472 {
4473 int dv_l;
4474 int dv_r;
4475 if((cm->sttype[v] == ML_st) ||
4476 (cm->sttype[v] == IL_st) ||
4477 (cm->sttype[v] == MP_st))
4478 dv_l = 1;
4479 else
4480 dv_l = 0;
4481 if((cm->sttype[v] == MR_st) ||
4482 (cm->sttype[v] == IR_st) ||
4483 (cm->sttype[v] == MP_st))
4484 dv_r = 1;
4485 else
4486 dv_r = 0;
4487 if(jmin[v] < (dv_l + dv_r))
4488 jmin[v] = dv_l + dv_r;
4489 if(jmax[v] < (dv_l + dv_r))
4490 jmax[v] = dv_l + dv_r;
4491 }
4492 /*****************************************************************************
4493 * EPN 12.21.05
4494 * Function: hmm2ij_state_step4_update_safe_holders
4495 *
4496 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4497 *
4498 *****************************************************************************/
4499 void
hmm2ij_state_step4_update_safe_holders(int v,int n,int imin_v,int jmax_v,int * nss_max_imin,int * nss_min_jmax)4500 hmm2ij_state_step4_update_safe_holders(int v, int n, int imin_v, int jmax_v, int *nss_max_imin,
4501 int *nss_min_jmax)
4502 {
4503 if(imin_v > nss_max_imin[n])
4504 nss_max_imin[n] = imin_v;
4505 if(jmax_v < nss_min_jmax[n])
4506 nss_min_jmax[n] = jmax_v;
4507 }
4508
4509 /*****************************************************************************
4510 * EPN 12.21.05
4511 * Function: hmm2ij_state_step5_non_emitter_d0_hack
4512 *
4513 * Purpose: cp9_HMM2ijBands_OLD*() function helper function.
4514 *
4515 *****************************************************************************/
4516 void
hmm2ij_state_step5_non_emitter_d0_hack(int v,int imax_v,int * jmin)4517 hmm2ij_state_step5_non_emitter_d0_hack(int v, int imax_v, int *jmin)
4518 {
4519 /* allow for possibility that d=0 for delete states*/
4520 if(jmin[v] <= imax_v && jmin[v] > 0)
4521 jmin[v]--;
4522 /* if imax = L, allow possibility for
4523 if(imax[v] == Limax_v && jmin[v] > 0)
4524 jmin[v]--;*/
4525 }
4526
4527 /* Function: cp9_ShiftCMBands()
4528 *
4529 * Description: Given a CM with a valid cm->cp9b CP9 bands object
4530 * calculated for a sequence in coordinates 1..i..j..L,
4531 * subtract a fixed offset (i-1) from all CM positions
4532 * in cp9b (cp9b->imin, cp9b->imax, cp9b->jmin, cp9b->jmax)
4533 * so the bands will now pertain to the same hit if
4534 * its coordinates were shifted to 1..j-i+1. This is used
4535 * prior to alignment of a pre-defined hit from i..j
4536 * using bands calculated when i..j was within its larger
4537 * context of 1..i..j..L. During alignment hits always
4538 * start at position 1.
4539 *
4540 * Because only positions i..j are possible in the subsequent
4541 * alignment, bands that allow residues before i or after
4542 * j are tightened to only include within i..j. This will
4543 * make states that were possible to reach only with residues
4544 * before i or after j now impossible to reach.
4545 *
4546 * Once all i and j bands are updated, ij2d_bands()
4547 * is used to update the d bands.
4548 *
4549 * NOTE: after calling this function cp9b will fail
4550 * a cp9_ValidateBands() function call.
4551 *
4552 * Args: CM - the CM, with a valid cm->cp9b CP9Bands_t object
4553 * i - first position of hit, this is the offset
4554 * j - final position of hit, used only to determine hit length
4555 * Returns: (void)
4556 */
4557 void
cp9_ShiftCMBands(CM_t * cm,int i,int j,int do_trunc)4558 cp9_ShiftCMBands(CM_t *cm, int i, int j, int do_trunc)
4559 {
4560 int v;
4561 int ip = i-1;
4562 int Lp = j-i+1;
4563 int sd, sdl, sdr;
4564 int min_i, max_i, min_j, max_j;
4565
4566 #if eslDEBUGLEVEL >= 1
4567 printf("#DEBUG: cp9_ShiftCMBands(), i: %d j: %d Lp: %d\n", i, j, Lp);
4568 #endif
4569
4570 for(v = 0; v < cm->M; v++) {
4571 sd = StateDelta(cm->sttype[v]);
4572 sdl = StateLeftDelta(cm->sttype[v]);
4573 sdr = StateRightDelta(cm->sttype[v]);
4574 if(cm->cp9b->imin[v] > 0) { /* state is currently possible to reach */
4575 min_i = 1;
4576 max_i = do_trunc ? Lp+1-ESL_MAX(sdl, sdr) : Lp+1-sd;
4577 /* careful! if do_trunc, d can be 1 for MP states, so i can be
4578 * at most Lp. Note: d can't be 0 for ML/IL in R mode, MR/IR in
4579 * L mode even though you might think it could be. We'll always
4580 * do a truncated begin with d=1 for L,R marginal alignments.
4581 *
4582 * If ! do_trunc, then d must be at least sd for all states, hence
4583 * the max i of Lp+1-sd.
4584 *
4585 * This is bug i37, one of the two bugs in 1.1rc2, which previously
4586 * had this as 'max_i = Lp'.
4587 */
4588
4589 min_j = do_trunc ? ESL_MAX(sdl, sdr) : sd;
4590 max_j = ESL_MAX(Lp, min_j);
4591 /* if (do_trunc) d can be 1 for MP states, this is why we use
4592 * ESL_MAX() call for min_j above. Note: d can't be 0 for ML/IL
4593 * in R mode, MR/IR in L mode even though you might think it
4594 * could be. We'll always do a truncated begin with d=1 for L,R
4595 * marginal alignments. */
4596
4597 cm->cp9b->imin[v] = ESL_MAX(cm->cp9b->imin[v] - ip, min_i);
4598 cm->cp9b->imax[v] = ESL_MIN(cm->cp9b->imax[v] - ip, max_i);
4599
4600 cm->cp9b->jmin[v] = ESL_MAX(cm->cp9b->jmin[v] - ip, min_j);
4601 cm->cp9b->jmax[v] = ESL_MIN(cm->cp9b->jmax[v] - ip, max_j);
4602
4603 if(cm->cp9b->imax[v] < min_i || cm->cp9b->jmax[v] < min_j ||
4604 cm->cp9b->imin[v] > max_i || cm->cp9b->jmin[v] > max_j) {
4605 /* this state is now impossible to reach */
4606 cm->cp9b->imin[v] = cm->cp9b->jmin[v] = -1;
4607 cm->cp9b->imax[v] = cm->cp9b->jmax[v] = -2;
4608 }
4609 }
4610 }
4611 ij2d_bands(cm, Lp, cm->cp9b->imin, cm->cp9b->imax, cm->cp9b->jmin, cm->cp9b->jmax, cm->cp9b->hdmin, cm->cp9b->hdmax, do_trunc, 0);
4612 /* Note that this will not update hdmin bands that are no longer within jmin..jmax, that's okay */
4613
4614 return;
4615 }
4616
4617 /* Function: cp9_CloneBands()
4618 *
4619 * Description: Clone a CP9Bands_t *cp9b object and return it.
4620 *
4621 * Args: cp9b - the CP9Bands_t object to clone
4622 *
4623 * Returns: the clone CP9Bands_t object.
4624 */
4625 CP9Bands_t *
cp9_CloneBands(CP9Bands_t * src_cp9b,char * errbuf)4626 cp9_CloneBands(CP9Bands_t *src_cp9b, char *errbuf)
4627 {
4628 int status;
4629 CP9Bands_t *dest_cp9b = NULL;
4630 dest_cp9b = AllocCP9Bands(src_cp9b->cm_M, src_cp9b->hmm_M);
4631
4632 esl_vec_ICopy(src_cp9b->pn_min_m, src_cp9b->hmm_M+1, dest_cp9b->pn_min_m);
4633 esl_vec_ICopy(src_cp9b->pn_max_m, src_cp9b->hmm_M+1, dest_cp9b->pn_max_m);
4634 esl_vec_ICopy(src_cp9b->pn_min_i, src_cp9b->hmm_M+1, dest_cp9b->pn_min_i);
4635 esl_vec_ICopy(src_cp9b->pn_max_i, src_cp9b->hmm_M+1, dest_cp9b->pn_max_i);
4636 esl_vec_ICopy(src_cp9b->pn_min_d, src_cp9b->hmm_M+1, dest_cp9b->pn_min_d);
4637 esl_vec_ICopy(src_cp9b->pn_max_d, src_cp9b->hmm_M+1, dest_cp9b->pn_max_d);
4638 esl_vec_ICopy(src_cp9b->isum_pn_m, src_cp9b->hmm_M+1, dest_cp9b->isum_pn_m);
4639 esl_vec_ICopy(src_cp9b->isum_pn_i, src_cp9b->hmm_M+1, dest_cp9b->isum_pn_i);
4640 esl_vec_ICopy(src_cp9b->isum_pn_d, src_cp9b->hmm_M+1, dest_cp9b->isum_pn_d);
4641
4642 dest_cp9b->sp1 = src_cp9b->sp1;
4643 dest_cp9b->ep1 = src_cp9b->ep1;
4644 dest_cp9b->sp2 = src_cp9b->sp2;
4645 dest_cp9b->ep2 = src_cp9b->ep2;
4646
4647 dest_cp9b->thresh1 = src_cp9b->thresh1;
4648 dest_cp9b->thresh2 = src_cp9b->thresh2;
4649
4650 dest_cp9b->Rmarg_imin = src_cp9b->Rmarg_imin;
4651 dest_cp9b->Rmarg_imax = src_cp9b->Rmarg_imax;
4652 dest_cp9b->Lmarg_jmin = src_cp9b->Lmarg_jmin;
4653 dest_cp9b->Lmarg_jmax = src_cp9b->Lmarg_jmax;
4654
4655 esl_vec_ICopy(src_cp9b->Jvalid, (src_cp9b->cm_M+1), dest_cp9b->Jvalid);
4656 esl_vec_ICopy(src_cp9b->Lvalid, (src_cp9b->cm_M+1), dest_cp9b->Lvalid);
4657 esl_vec_ICopy(src_cp9b->Rvalid, (src_cp9b->cm_M+1), dest_cp9b->Rvalid);
4658 esl_vec_ICopy(src_cp9b->Tvalid, (src_cp9b->cm_M+1), dest_cp9b->Tvalid);
4659
4660 esl_vec_ICopy(src_cp9b->imin, src_cp9b->cm_M, dest_cp9b->imin);
4661 esl_vec_ICopy(src_cp9b->imax, src_cp9b->cm_M, dest_cp9b->imax);
4662 esl_vec_ICopy(src_cp9b->jmin, src_cp9b->cm_M, dest_cp9b->jmin);
4663 esl_vec_ICopy(src_cp9b->jmax, src_cp9b->cm_M, dest_cp9b->jmax);
4664
4665 if(src_cp9b->hd_alloced > 0) {
4666 /* set hdmin, hdmax ptrs and hd_needed and hd_alloced (all set in cp9GrowHDBands()) */
4667 if((status = cp9_GrowHDBands(dest_cp9b, errbuf)) != eslOK) goto ERROR;
4668 esl_vec_ICopy(src_cp9b->hdmin_mem, dest_cp9b->hd_alloced, dest_cp9b->hdmin_mem);
4669 esl_vec_ICopy(src_cp9b->hdmax_mem, dest_cp9b->hd_alloced, dest_cp9b->hdmax_mem);
4670 }
4671
4672 esl_vec_ICopy(src_cp9b->safe_hdmin, src_cp9b->cm_M, dest_cp9b->safe_hdmin);
4673 esl_vec_ICopy(src_cp9b->safe_hdmax, src_cp9b->cm_M, dest_cp9b->safe_hdmax);
4674
4675 dest_cp9b->tau = src_cp9b->tau;
4676
4677 return dest_cp9b;
4678
4679 ERROR:
4680 if(dest_cp9b != NULL) FreeCP9Bands(dest_cp9b);
4681 return NULL;
4682 }
4683
4684 /* Function: cp9_PredictStartAndEndPositions()
4685 * Date: EPN, Tue Sep 6 11:43:18 2011
4686 *
4687 * Purpose: Given a filled HMM posterior matrix and a CP9Bands_t
4688 * object with valid pn_{min,max}{m,i,d} bands, determine the
4689 * first and final HMM nodes that have a probability of being
4690 * occupied that exceeds <cp9b->thresh1> and <cp9b->thresh2>.
4691 * Store these four values in:
4692 * <cp9b->sp1>: minimum position that might be used (p > cp9b->thresh1, typically 0.01)
4693 * <cp9b->sp2>: minimum position that will likely be used (p > cp9b->thresh2, typically 0.98)
4694 * <cp9b->ep1>: maximum position that might be used (p > cp9b->thresh1, typically 0.01)
4695 * <cp9b->ep2>: maximum position that will likely be used (p > cp9b->thresh2, typically 0.98)
4696 *
4697 * If no HMM node has an occupancy probability that exceeds
4698 * <cp9b->thresh2> then sp2 and ep2 are set as out-of-bounds
4699 * values M+1 and 0 respectively.
4700 *
4701 * If no HMM node has an occupancy probability that exceeds
4702 * <cp9b->thresh1> then sp1 and ep1 are set as out-of-bounds
4703 * values M+1 and 0 respectively, though this should be
4704 * very rare.
4705 *
4706 * Using out-of-bounds values means we can't get any
4707 * information about where the alignment starts and ends from
4708 * the HMM. This has the effect that in a downstream call to
4709 * cp9_MarginalCandidatesFromStartEndPositions() all marginal
4710 * modes will be possible for all states and the eventual
4711 * alignment will essentially mimic a non-banded one.
4712 *
4713 * Also determine the CM bands on i and j that
4714 * will be used to allow for marginal alignments,
4715 * store these in <cp9b->{L,R}marg{i,j}_{min,max}.
4716 *
4717 * CP9_MX pmx: DP matrix for posteriors, already calc'ed
4718 * CP9Bands_t cp9b: the cp9 bands
4719 * int i0 start of target subsequence (often 1, beginning of dsq)
4720 * int j0 end of target subsequence (often L, end of dsq)
4721 *
4722 * Returns: void
4723 *
4724 * xref: ELN2 notebook, p.146-147; ~nawrockie/notebook/11_0816_inf_banded_trcyk/00LOG
4725 */
4726 void
cp9_PredictStartAndEndPositions(CP9_MX * pmx,CP9Bands_t * cp9b,int i0,int j0)4727 cp9_PredictStartAndEndPositions(CP9_MX *pmx, CP9Bands_t *cp9b, int i0, int j0)
4728 {
4729 int i;
4730 int k; /* counter over nodes of the model */
4731 int L = j0-i0+1; /* length of sequence */
4732 int iocc; /* occupancy probability, scaled int form */
4733 float pocc; /* occupancy probability, probability form */
4734
4735 /* Calculate minimum start positions: */
4736 k = 1;
4737 cp9b->sp1 = cp9b->sp2 = -1;
4738 while(k <= cp9b->hmm_M && (cp9b->sp1 == -1 || cp9b->sp2 == -1)) {
4739 if(cp9b->pn_min_m[k] == -1 && cp9b->pn_min_i[k] == -1 && cp9b->pn_min_d[k] == -1) {
4740 /*printf("k: %4d pocc IRRELEVANT (k unreachable, skipping)\n", k);*/
4741 k++;
4742 /* M, I, D states in node k are unreachable (no posterior cells had more than
4743 * cm->tau probability mass), k won't be our sp1 or sp2 */
4744 }
4745 else {
4746 iocc = -INFTY;
4747 for(i = 0; i <= L; i++) {
4748 iocc = ILogsum(iocc, ILogsum(pmx->mmx[i][k], pmx->dmx[i][k]));
4749 }
4750 pocc = Score2Prob(iocc, 1.);
4751 /*printf("k: %4d pocc: %.4f\n", k, pocc);*/
4752 if((cp9b->sp1 == -1) && (pocc > cp9b->thresh1)) cp9b->sp1 = k;
4753 if((cp9b->sp2 == -1) && (pocc > cp9b->thresh2)) cp9b->sp2 = k;
4754 k++;
4755 }
4756 }
4757 if(k == cp9b->hmm_M+1) {
4758 if(cp9b->sp1 == -1) { cp9b->sp1 = cp9b->hmm_M+1; } /* no node k has occupancy > thresh1, set as out-of-bounds value M+1 */
4759 if(cp9b->sp2 == -1) { cp9b->sp2 = cp9b->hmm_M+1; } /* no node k has occupancy > thresh2, set as out-of-bounds value M+1 */
4760 }
4761
4762 /* Calculate maximum end positions: */
4763 if((cp9b->sp1 == cp9b->hmm_M+1) &&
4764 (cp9b->sp2 == cp9b->hmm_M+1)) {
4765 /* we already know that there's no nodes that satisfy either thresh1 or thresh2, we can save time here */
4766 cp9b->ep1 = 0;
4767 cp9b->ep2 = 0;
4768 }
4769 else {
4770 cp9b->ep1 = cp9b->ep2 = -1;
4771 k = cp9b->hmm_M;
4772 while(k >= 1 && (cp9b->ep1 == -1 || cp9b->ep2 == -1)) {
4773 if(cp9b->pn_min_m[k] == -1 && cp9b->pn_min_i[k] == -1 && cp9b->pn_min_d[k] == -1) {
4774 /*printf("k: %4d pocc IRRELEVANT (k unreachable, skipping)\n", k);*/
4775 k--;
4776 /* M, I, D states in node k are unreachable (no posterior cells had more than
4777 * cm->tau probability mass), k won't be our ep1 or ep2 */
4778 }
4779 else {
4780 iocc = -INFTY;
4781 for(i = 0; i <= L; i++) {
4782 iocc = ILogsum(iocc, ILogsum(pmx->mmx[i][k], pmx->dmx[i][k]));
4783 }
4784 pocc = Score2Prob(iocc, 1.);
4785 /*printf("k: %4d pocc: %.4f\n", k, pocc);*/
4786 if((cp9b->ep1 == -1) && (pocc > cp9b->thresh1)) cp9b->ep1 = k;
4787 if((cp9b->ep2 == -1) && (pocc > cp9b->thresh2)) cp9b->ep2 = k;
4788 k--;
4789 }
4790 }
4791 if(k == 0) {
4792 if(cp9b->ep1 == -1) { cp9b->ep1 = 0; } /* no node k has occupancy > thresh1, set as out-of-bounds value 0 */
4793 if(cp9b->ep2 == -1) { cp9b->ep2 = 0; } /* no node k has occupancy > thresh2, set as out-of-bounds value 0 */
4794 }
4795 }
4796
4797 /* determine cp9b->{R,L}marg_{i,j}{min,max}, the i and j bands that will be used to allow for marginal left (Lmarg_j{min,max}
4798 * and marginal right (Rmarg_i{min,max} alignment. */
4799 /* set cp9b->Rmarg_imin */
4800 if(cp9b->sp1 == cp9b->hmm_M+1) { cp9b->Rmarg_imin = i0; }
4801 else {
4802 cp9b->Rmarg_imin = INT_MAX;
4803 if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_min_m[cp9b->sp1] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_m[cp9b->sp1]);
4804 if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_min_i[cp9b->sp1] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_i[cp9b->sp1]);
4805 if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_min_d[cp9b->sp1] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_d[cp9b->sp1]);
4806 if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_min_m[cp9b->sp2] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_m[cp9b->sp2]);
4807 if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_min_i[cp9b->sp2] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_i[cp9b->sp2]);
4808 if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_min_d[cp9b->sp2] >= 0) cp9b->Rmarg_imin = ESL_MIN(cp9b->Rmarg_imin, cp9b->pn_min_d[cp9b->sp2]);
4809 if(cp9b->Rmarg_imin == INT_MAX || cp9b->sp1 == (cp9b->hmm_M+1) || cp9b->sp2 == (cp9b->hmm_M+1)) cp9b->Rmarg_imin = i0;
4810 cp9b->Rmarg_imin = ESL_MAX(i0, cp9b->Rmarg_imin); /* i can't be less than i0 */
4811 cp9b->Rmarg_imin = ESL_MIN(j0+1, cp9b->Rmarg_imin); /* i can't be more than j0+1 */
4812 }
4813
4814 /* set cp9b->Rmarg_imax */
4815 if(cp9b->sp1 == cp9b->hmm_M+1) { cp9b->Rmarg_imax = j0; }
4816 else {
4817 cp9b->Rmarg_imax = INT_MIN;
4818 if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_max_m[cp9b->sp1] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_m[cp9b->sp1]);
4819 if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_max_i[cp9b->sp1] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_i[cp9b->sp1]);
4820 if(cp9b->sp1 != (cp9b->hmm_M+1) && cp9b->pn_max_d[cp9b->sp1] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_d[cp9b->sp1]);
4821 if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_max_m[cp9b->sp2] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_m[cp9b->sp2]);
4822 if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_max_i[cp9b->sp2] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_i[cp9b->sp2]);
4823 if(cp9b->sp2 != (cp9b->hmm_M+1) && cp9b->pn_max_d[cp9b->sp2] >= 0) cp9b->Rmarg_imax = ESL_MAX(cp9b->Rmarg_imax, cp9b->pn_max_d[cp9b->sp2]);
4824 if(cp9b->Rmarg_imax == INT_MIN || cp9b->sp1 == (cp9b->hmm_M+1) || cp9b->sp2 == (cp9b->hmm_M+1)) cp9b->Rmarg_imax = j0+1;
4825 cp9b->Rmarg_imax = ESL_MAX(i0, cp9b->Rmarg_imax); /* i can't be less than i0 */
4826 cp9b->Rmarg_imax = ESL_MIN(j0+1, cp9b->Rmarg_imax); /* i can't be more than j0+1 */
4827 }
4828
4829 /* set cp9b->Lmarg_jmin */
4830 if(cp9b->ep1 == 0) { cp9b->Lmarg_jmin = i0-1; }
4831 else {
4832 cp9b->Lmarg_jmin = INT_MAX;
4833 if(cp9b->ep1 != 0 && cp9b->pn_min_m[cp9b->ep1] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_m[cp9b->ep1]);
4834 if(cp9b->ep1 != 0 && cp9b->pn_min_i[cp9b->ep1] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_i[cp9b->ep1]);
4835 if(cp9b->ep1 != 0 && cp9b->pn_min_d[cp9b->ep1] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_d[cp9b->ep1]-1); /* off-by-one with deletes in HMM vs CM */
4836 if(cp9b->ep2 != 0 && cp9b->pn_min_m[cp9b->ep2] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_m[cp9b->ep2]);
4837 if(cp9b->ep2 != 0 && cp9b->pn_min_i[cp9b->ep2] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_i[cp9b->ep2]);
4838 if(cp9b->ep2 != 0 && cp9b->pn_min_d[cp9b->ep2] >= 0) cp9b->Lmarg_jmin = ESL_MIN(cp9b->Lmarg_jmin, cp9b->pn_min_d[cp9b->ep2]-1); /* off-by-one with deletes in HMM vs CM */
4839 if(cp9b->Lmarg_jmin == INT_MAX || cp9b->ep1 == 0 || cp9b->ep2 == 0) cp9b->Lmarg_jmin = i0-1;
4840 cp9b->Lmarg_jmin = ESL_MAX(i0-1, cp9b->Lmarg_jmin); /* j can't be less than i0-1 */
4841 cp9b->Lmarg_jmin = ESL_MIN(j0, cp9b->Lmarg_jmin); /* j can't be more than j0 */
4842 }
4843
4844 /* set cp9b->Lmarg_jmax */
4845 if(cp9b->ep1 == 0) { cp9b->Lmarg_jmax = j0; }
4846 else {
4847 cp9b->Lmarg_jmax = INT_MIN;
4848 if(cp9b->ep1 != 0 && cp9b->pn_max_m[cp9b->ep1] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_m[cp9b->ep1]);
4849 if(cp9b->ep1 != 0 && cp9b->pn_max_i[cp9b->ep1] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_i[cp9b->ep1]);
4850 if(cp9b->ep1 != 0 && cp9b->pn_max_d[cp9b->ep1] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_d[cp9b->ep1]-1); /* off-by-one with deletes in HMM vs CM */
4851 if(cp9b->ep2 != 0 && cp9b->pn_max_m[cp9b->ep2] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_m[cp9b->ep2]);
4852 if(cp9b->ep2 != 0 && cp9b->pn_max_i[cp9b->ep2] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_i[cp9b->ep2]);
4853 if(cp9b->ep2 != 0 && cp9b->pn_max_d[cp9b->ep2] >= 0) cp9b->Lmarg_jmax = ESL_MAX(cp9b->Lmarg_jmax, cp9b->pn_max_d[cp9b->ep2]-1); /* off-by-one with deletes in HMM vs CM */
4854 if(cp9b->Lmarg_jmax == INT_MIN || cp9b->ep1 == 0 || cp9b->ep2 == 0) cp9b->Lmarg_jmax = j0;
4855 cp9b->Lmarg_jmax = ESL_MAX(i0-1, cp9b->Lmarg_jmax); /* j can't be less than i0-1 */
4856 cp9b->Lmarg_jmax = ESL_MIN(j0, cp9b->Lmarg_jmax); /* j can't be more than j0 */
4857 }
4858
4859 #if 0
4860 printf("HEYA Returning from cp9_PredictStartAndEndPositions():\n\t");
4861 printf("sp1: %4d\n\t", cp9b->sp1);
4862 printf("sp2: %4d\n\t", cp9b->sp2);
4863 printf("ep2: %4d\n\t", cp9b->ep2);
4864 printf("ep1: %4d\n\t", cp9b->ep1);
4865 printf("Ljn: %4d\n\t", cp9b->Lmarg_jmin);
4866 printf("Ljx: %4d\n\t", cp9b->Lmarg_jmax);
4867 printf("Rin: %4d\n\t", cp9b->Rmarg_imin);
4868 printf("Rix: %4d\n\n", cp9b->Rmarg_imax);
4869 #endif
4870
4871 return;
4872 }
4873
4874
4875 /* Function: cp9_MarginalCandidatesFromStartEndPositions()
4876 * Date: EPN, Tue Sep 6 14:50:16 2011
4877 *
4878 * Purpose: Given a CP9Bands_t object with valid sp1, sp2, ep1, and
4879 * ep2 values from cp9_PredictStartAndEndPositions(),
4880 * determine for each CM state v, whether a joint (J), left
4881 * marginal (L) right marginal (R), or terminal marginal
4882 * alignment that includes v should be allowed. For any
4883 * disallowed type of alignment we will be able to skip the
4884 * corresponding calculations in a trCYK/trInside/trOutside
4885 * DP recursion. And we won't have to allocate memory for
4886 * that state in the corresponding (J,L,R,T) DP matrix.
4887 *
4888 * We can determine from passed-in <pass_idx>, which type of
4889 * marginal alignments will be allowed. If L alignments
4890 * are not allowed, Lvalid[] will be FALSE for all v.
4891 * Likewise for R alignments and Rvalid[] and T alignments
4892 * and Tvalid[].
4893 *
4894 * Args: cm - the model
4895 * cp9b - the cp9 bands
4896 * pass_idx - the pipeline pass index we're on, dictates
4897 * which modes of marginal alns to allow
4898 * errbuf - for error messages
4899 *
4900 * Returns: eslOK on success; eslEINVAL if pass_idx is invalid (errbuf filled).
4901 *
4902 * xref: ELN2 notebook, p.146-147; ~nawrockie/notebook/11_0816_inf_banded_trcyk/00LOG
4903 */
4904 int
cp9_MarginalCandidatesFromStartEndPositions(CM_t * cm,CP9Bands_t * cp9b,int pass_idx,char * errbuf)4905 cp9_MarginalCandidatesFromStartEndPositions(CM_t *cm, CP9Bands_t *cp9b, int pass_idx, char *errbuf)
4906 {
4907 int status;
4908 int v;
4909 int nd;
4910 int lpos = 1;
4911 int rpos = cm->clen;
4912 int allow_L, allow_R, allow_T; /* will we allow L, R, and T alignments? */
4913
4914 if((status = cm_TrFillFromPassIdx(pass_idx, &allow_L, &allow_R, &allow_T)) != eslOK) ESL_FAIL(status, errbuf, "cp9_MarginalCandidatesFromStartEndPositions(), unexpected pass idx: %d", pass_idx);
4915
4916 for(v = 0; v < cp9b->cm_M; v++) {
4917 nd = cm->ndidx[v];
4918 /* Careful, emitmap is off-by-one for our purposes for lpos if v is not MATP_MP or MATL_ML, and rpos if v is not MATP_MP or MATR_MR */
4919 lpos = (cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATL_nd) ? cm->emap->lpos[nd] : cm->emap->lpos[nd]+1;
4920 rpos = (cm->ndtype[nd] == MATP_nd || cm->ndtype[nd] == MATR_nd) ? cm->emap->rpos[nd] : cm->emap->rpos[nd]-1;
4921
4922 /* below: 'possibly' means probability > cp9b->thresh1 (typically 0.01) */
4923 /* 'probably' means probability > cp9b->thresh2 (typically 0.98) */
4924
4925 /* Jvalid if both lpos and rpos are possibly used */
4926 cp9b->Jvalid[v] = ((lpos >= cp9b->sp1) && (rpos <= cp9b->ep1)) ? TRUE : FALSE;
4927
4928 /* Lvalid if lpos is possibly used and rpos is possibly not used */
4929 cp9b->Lvalid[v] = (allow_L && (lpos >= cp9b->sp1 && lpos <= cp9b->ep1) && (rpos > cp9b->ep2)) ? TRUE : FALSE;
4930
4931 /* Rvalid if rpos is possibly used and lpos is possibly not used */
4932 cp9b->Rvalid[v] = (allow_R && (rpos <= cp9b->ep1 && rpos >= cp9b->sp1) && (lpos < cp9b->sp2)) ? TRUE : FALSE;
4933
4934 if(cm->sttype[v] == B_st) {
4935 /* Tvalid if lpos and rpos are possibly not used */
4936 cp9b->Tvalid[v] = (allow_T && (lpos < cp9b->sp2) && (rpos > cp9b->ep2)) ? TRUE : FALSE;
4937 }
4938 else {
4939 cp9b->Tvalid[v] = FALSE;
4940 }
4941 #if eslDEBUGLEVEL >= 1
4942 printf("#DEBUG: v: %4d [%4d..%4d] %4s %2s %d%d%d%d\n", v, lpos, rpos, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]),
4943 cp9b->Jvalid[v], cp9b->Lvalid[v], cp9b->Rvalid[v], cp9b->Tvalid[v]);
4944 #endif
4945 }
4946
4947 /* The ROOT_S state is special, all hits are rooted there, if we can do a
4948 * truncated {J,L,R,T} begin into v, we need to set do_{J,L,R,T}[0] to TRUE.
4949 */
4950 for(v = 0; v < cp9b->cm_M; v++) {
4951 switch(cm->sttype[v]) {
4952 case B_st:
4953 if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4954 if(cp9b->Lvalid[v]) cp9b->Lvalid[0] = TRUE;
4955 if(cp9b->Rvalid[v]) cp9b->Rvalid[0] = TRUE;
4956 if(cp9b->Tvalid[v]) cp9b->Tvalid[0] = TRUE;
4957 break;
4958 case MP_st:
4959 if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4960 if(cp9b->Lvalid[v]) cp9b->Lvalid[0] = TRUE;
4961 if(cp9b->Rvalid[v]) cp9b->Rvalid[0] = TRUE;
4962 break;
4963 case ML_st:
4964 case IL_st:
4965 if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4966 if(cp9b->Lvalid[v]) cp9b->Lvalid[0] = TRUE;
4967 break;
4968 case MR_st:
4969 case IR_st:
4970 if(cp9b->Jvalid[v]) cp9b->Jvalid[0] = TRUE;
4971 if(cp9b->Rvalid[v]) cp9b->Rvalid[0] = TRUE;
4972 break;
4973 }
4974 if(cp9b->Jvalid[0] &&
4975 cp9b->Lvalid[0] &&
4976 cp9b->Rvalid[0] &&
4977 cp9b->Tvalid[0]) {
4978 v = cp9b->cm_M;
4979 }
4980 }
4981
4982 /* The EL state is special, if local ends are on, make J, L and R
4983 * modes all valid. (We could only make those modes valid for
4984 * which there's a local end possible (e.g. make cm->M invalid for
4985 * R if R is not valid for any states), but empirically this is
4986 * rare, so I've opted to always allow all types to avoid allowing
4987 * the possibility that we turn {J,L,R}valid[cm->M] on and off
4988 * as we process seqs, thus avoiding all the possible complications
4989 * of doing that.
4990 */
4991 if(cm->flags & CMH_LOCAL_END) {
4992 cp9b->Jvalid[cm->M] = TRUE;
4993 cp9b->Lvalid[cm->M] = TRUE;
4994 cp9b->Rvalid[cm->M] = TRUE;
4995 }
4996
4997 return eslOK;
4998 }
4999
5000
5001
5002 /****************************************************************************
5003 * Debugging print functions
5004 *
5005 * cp9_DebugPrintHMMBands()
5006 * PrintDPCellsSaved_jd()
5007 *
5008 * Currently not compiled (#if 0'ed out) but saved for ref:
5009 * ijBandedTraceInfoDump()
5010 * ijdBandedTraceInfoDump()
5011 * debug_print_hd_bands()
5012 * debug_print_ij_bands()
5013 * debug_print_parsetree_and_ij_bands()
5014 *
5015 */
5016 #if 0
5017 static void ijBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5018 int *jmin, int *jmax, int debug_level);
5019 static void ijdBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5020 int *jmin, int *jmax, int **hdmin, int **hdmax,
5021 int debug_level);
5022 static void debug_print_hd_bands(CM_t *cm, int **hdmin, int **hdmax, int *jmin, int *jmax);
5023 static void debug_print_parsetree_and_ij_bands(FILE *fp, Parsetree_t *tr, CM_t *cm, ESL_DSQ *dsq, CP9Bands_t *cp9b);
5024 static void cp9_RelaxRootBandsForSearch(CM_t *cm, int i0, int j0, int *imin, int *imax, int *jmin, int *jmax);
5025 #endif
5026
5027 /* EPN 12.18.05
5028 * cp9_DebugPrintHMMBands()
5029 * based loosely on: cmbuild.c's
5030 * Function: model_trace_info_dump
5031 *
5032 * Purpose: Print out the bands derived from the posteriors for the
5033 * insert and match states of each HMM node.
5034 *
5035 * Args:
5036 * FILE *ofp - filehandle to print to (can by STDOUT)
5037 * int L - length of sequence
5038 * CP9Bands_t - the CP9 bands data structure
5039 * double hmm_bandp - fraction of probability mass allowed outside each band.
5040 * int debug_level [0..3] tells the function what level of debugging print
5041 * statements to print.
5042 * Returns: (void)
5043 */
5044
5045 void
cp9_DebugPrintHMMBands(FILE * ofp,int L,CP9Bands_t * cp9b,double hmm_bandp,int debug_level)5046 cp9_DebugPrintHMMBands(FILE *ofp, int L, CP9Bands_t *cp9b, double hmm_bandp, int debug_level)
5047 {
5048 int M;
5049 int k;
5050 int cells_in_bands_m; /* number of cells within all the bands for match states*/
5051 int cells_in_bands_i; /* number of cells within all the bands for insert states*/
5052 int cells_in_bands_d; /* number of cells within all the bands for delete states*/
5053 int cells_in_bands_all; /* number of cells within all the bands for match and insert states*/
5054 int bw; /* band width of current band */
5055
5056 M = cp9b->hmm_M;
5057 cells_in_bands_m = cells_in_bands_i = cells_in_bands_d = cells_in_bands_all = 0;
5058
5059 /* first print the bands on the match states */
5060 fprintf(ofp, "***********************************************************\n");
5061 if(debug_level > 0)
5062 fprintf(ofp, "printing hmm bands\n");
5063 fprintf(ofp, "hmm_bandp: %f\n", hmm_bandp);
5064 if(debug_level > 0)
5065 {
5066 fprintf(ofp, "\n");
5067 fprintf(ofp, "match states\n");
5068 }
5069 for(k = 0; k <= cp9b->hmm_M; k++)
5070 {
5071 bw = (cp9b->pn_min_m[k] == -1) ? 0 : cp9b->pn_max_m[k] - cp9b->pn_min_m[k] + 1;
5072 if(debug_level > 0 || debug_level == -1)
5073 fprintf(ofp, "M node: %3d | min %3d | max %3d | w %3d \n", k, cp9b->pn_min_m[k], cp9b->pn_max_m[k], bw);
5074 cells_in_bands_m += bw;
5075 }
5076 if(debug_level > 0)
5077 fprintf(ofp, "\n");
5078 if(debug_level > 0)
5079 fprintf(ofp, "insert states\n");
5080 for(k = 0; k <= cp9b->hmm_M; k++)
5081 {
5082 bw = (cp9b->pn_min_i[k] == -1) ? 0 : cp9b->pn_max_i[k] - cp9b->pn_min_i[k] + 1;
5083 if(debug_level > 0 || debug_level == -1)
5084 fprintf(ofp, "I node: %3d | min %3d | max %3d | w %3d\n", k, cp9b->pn_min_i[k], cp9b->pn_max_i[k], bw);
5085 cells_in_bands_i += bw;
5086 }
5087 if(debug_level > 0)
5088 fprintf(ofp, "\n");
5089 if(debug_level > 0)
5090 fprintf(ofp, "delete states\n");
5091 for(k = 1; k <= cp9b->hmm_M; k++)
5092 {
5093 bw = (cp9b->pn_min_d[k] == -1) ? 0 : cp9b->pn_max_d[k] - cp9b->pn_min_d[k] + 1;
5094 if(debug_level > 0 || debug_level == -1)
5095 fprintf(ofp, "D node: %3d | min %3d | max %3d | w %3d\n", k, cp9b->pn_min_d[k], cp9b->pn_max_d[k], bw);
5096 cells_in_bands_d += bw;
5097 }
5098 if(debug_level > 0)
5099 {
5100 fprintf(ofp, "\n");
5101 printf("cells_in_bands_m : %d\n", cells_in_bands_m);
5102 printf("cells_in_bands_i : %d\n", cells_in_bands_i);
5103 printf("cells_in_bands_d : %d\n", cells_in_bands_d);
5104 }
5105
5106 cells_in_bands_all = cells_in_bands_m + cells_in_bands_i + cells_in_bands_d;
5107 printf("fraction match excluded : %f\n", (1 - ((float) cells_in_bands_m / (M * L))));
5108 printf("fraction insert excluded : %f\n", (1 - ((float) cells_in_bands_i / ((M-1) * L))));
5109 printf("fraction delete excluded : %f\n", (1 - ((float) cells_in_bands_d / ((M-1) * L))));
5110 printf("fraction total excluded : %f\n", (1 - ((float) (cells_in_bands_all) / (((M-1) * L) + ((M-1) * L) + (M *L)))));
5111 fprintf(ofp, "***********************************************************\n");
5112
5113 }
5114
5115 /* Function: PrintDPCellsSaved_jd()
5116 * Prints out an estimate of the speed up due to j and d bands */
5117 void
PrintDPCellsSaved_jd(CM_t * cm,int * jmin,int * jmax,int ** hdmin,int ** hdmax,int W)5118 PrintDPCellsSaved_jd(CM_t *cm, int *jmin, int *jmax, int **hdmin, int **hdmax,
5119 int W)
5120 {
5121 int v;
5122 int j;
5123 int max;
5124 int64_t after, before;
5125
5126 printf("Printing DP cells saved using j and d bands:\n");
5127 before = after = 0;
5128 for (v = 0; v < cm->M; v++)
5129 {
5130 for(j = 0; j <= W; j++)
5131 if (cm->sttype[v] != E_st)
5132 before += j + 1;
5133 for(j = jmin[v]; j <= jmax[v]; j++)
5134 if (cm->sttype[v] != E_st)
5135 {
5136 max = (j < hdmax[v][j-jmin[v]]) ? j : hdmax[v][j-jmin[v]];
5137 after += max - hdmin[v][j-jmin[v]] + 1;
5138 }
5139 }
5140 printf("Before: something like %" PRId64 "\n", before);
5141 printf("After: something like %" PRId64 "\n", after);
5142 printf("Speedup: maybe %.2f fold\n\n", (double) before / (double) after);
5143 }
5144
5145 /* Function: debug_print_ij_bands
5146 *
5147 * Purpose: Print out i and j bands for all states v.
5148 *
5149 */
5150 void
debug_print_ij_bands(CM_t * cm)5151 debug_print_ij_bands(CM_t *cm)
5152 {
5153 int v;
5154 printf("%5s %-7s %5s %5s %5s %5s %4s\n", "v", "type", "imin", "imax", "jmin", "jmax", "JLRT");
5155 printf("%5s %-7s %5s %5s %5s %5s %4s\n", "-----", "-------", "-----", "-----", "-----", "-----", "----");
5156 for(v = 0; v < cm->M; v++)
5157 printf("%5d %-7s %5d %5d %5d %5d %d%d%d%d\n", v, CMStateid(cm->stid[v]), cm->cp9b->imin[v], cm->cp9b->imax[v], cm->cp9b->jmin[v], cm->cp9b->jmax[v],
5158 cm->cp9b->Jvalid[v], cm->cp9b->Lvalid[v], cm->cp9b->Rvalid[v], cm->cp9b->Tvalid[v]);
5159 return;
5160 }
5161
5162
5163 #if 0
5164 /* EPN 11.03.05
5165 * Function: ijBandedTraceInfoDump()
5166 *
5167 * Purpose: Experimental HMMERNAL function used in development.
5168 * This function determines how close the
5169 * trace was to the bands for i and j at each state in the trace,
5170 * and prints out that information in differing levels
5171 * of verbosity depending on an input parameter
5172 * (debug_level).
5173 *
5174 * Args: cm - the CM (useful for determining which states are E states)
5175 * tr - the parsetree (trace)
5176 * imin - minimum i bound for each state v; [0..v..M-1]
5177 * imax - maximum i bound for each state v; [0..v..M-1]
5178 * jmin - minimum j bound for each state v; [0..v..M-1]
5179 * jmax - maximum j bound for each state v; [0..v..M-1]
5180 * debug_level - level of verbosity
5181 * Returns: (void)
5182 */
5183
5184 void
5185 ijBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5186 int *jmin, int *jmax, int debug_level)
5187 {
5188 int v, i, j, d, tpos;
5189 int imindiff; /* i - imin[v] */
5190 int imaxdiff; /* imax[v] - i */
5191 int jmindiff; /* j - jmin[v] */
5192 int jmaxdiff; /* jmax[v] - j */
5193 int imin_out;
5194 int imax_out;
5195 int jmin_out;
5196 int jmax_out;
5197
5198 imin_out = 0;
5199 imax_out = 0;
5200 jmin_out = 0;
5201 jmax_out = 0;
5202
5203 debug_level = 2;
5204
5205 for (tpos = 0; tpos < tr->n; tpos++)
5206 {
5207 v = tr->state[tpos];
5208 i = tr->emitl[tpos];
5209 j = tr->emitr[tpos];
5210 d = j-i+1;
5211 imindiff = i-imin[v];
5212 imaxdiff = imax[v]-i;
5213 jmindiff = j-jmin[v];
5214 jmaxdiff = jmax[v]-j;
5215 if(cm->sttype[v] != E_st)
5216 {
5217 if(imindiff < 0)
5218 imin_out++;
5219 if(imaxdiff < 0)
5220 imax_out++;
5221 if(jmindiff < 0)
5222 jmin_out++;
5223 if(jmaxdiff < 0)
5224 jmax_out++;
5225
5226 if(debug_level > 1 || ((imindiff < 0) || (imaxdiff < 0) || (jmindiff < 0) || (jmaxdiff < 0)))
5227 {
5228 printf("v: %4d %-4s %-2s | d: %4d | i: %4d | in: %4d | ix: %4d | %3d | %3d |\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), d, i, imin[v], imax[v], imindiff, imaxdiff);
5229 printf(" | j: %4d | jn: %4d | jx: %4d | %3d | %3d |\n", j, jmin[v], jmax[v], jmindiff, jmaxdiff);
5230
5231 }
5232 }
5233 else if(cm->sttype[v] == E_st)
5234 {
5235 if(debug_level > 1)
5236 {
5237 printf("v: %4d %-4s %-2s | d: %4d | i: %4d | in: %4d | ix: %4d | %3d | %3d |\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), d, i, imin[v], imax[v], imindiff, imaxdiff);
5238 printf(" | j: %4d | jn: %4d | jx: %4d | %3d | %3d |\n", j, jmin[v], jmax[v], jmindiff, jmaxdiff);
5239 }
5240 }
5241 }
5242 printf("\nimin out: %d\n", imin_out);
5243 printf("imax out: %d\n", imax_out);
5244 printf("jmin out: %d\n", jmin_out);
5245 printf("jmax out: %d\n", jmax_out);
5246
5247 if((imin_out + imax_out + jmin_out + jmax_out) > 0)
5248 {
5249 printf("ERROR, some of the i and j bands are going to prevent optimal alignment. Sorry.\n");
5250 }
5251
5252 return;
5253 }
5254
5255
5256 /* EPN 11.03.05
5257 * Function: ijdBandedTraceInfoDump()
5258 *
5259 * Purpose: Experimental HMMERNAL function used in development.
5260 * This function determines how close the
5261 * trace was to the bands for i and j and d at each state in the trace,
5262 * and prints out that information in differing levels
5263 * of verbosity depending on an input parameter
5264 * (debug_level).
5265 *
5266 * Args: cm - the CM (useful for determining which states are E states)
5267 * tr - the parsetree (trace)
5268 * imin - minimum i bound for each state v; [0..v..M-1]
5269 * imax - maximum i bound for each state v; [0..v..M-1]
5270 * jmin - minimum j bound for each state v; [0..v..M-1]
5271 * jmax - maximum j bound for each state v; [0..v..M-1]
5272 * hdmin - minimum d bound for each state v and offset j;
5273 * [0..v..M-1][0..(jmax[v]-jmin[v])]
5274 * hdmax - maximum d bound for each state v and offset j;
5275 * [0..v..M-1][0..(jmax[v]-jmin[v])]
5276 * debug_level - level of verbosity
5277 * Returns: (void)
5278 */
5279
5280 void
5281 ijdBandedTraceInfoDump(CM_t *cm, Parsetree_t *tr, int *imin, int *imax,
5282 int *jmin, int *jmax, int **hdmin, int **hdmax, int debug_level)
5283 {
5284 int v, i, j, d, tpos;
5285 int imindiff; /* i - imin[v] */
5286 int imaxdiff; /* imax[v] - i */
5287 int jmindiff; /* j - jmin[v] */
5288 int jmaxdiff; /* jmax[v] - j */
5289 int hdmindiff; /* d - hdmin[v][j] */
5290 int hdmaxdiff; /* hdmax[v][j] - d */
5291
5292 int imin_out;
5293 int imax_out;
5294 int jmin_out;
5295 int jmax_out;
5296 int hdmin_out;
5297 int hdmax_out;
5298 int local_used;
5299
5300 imin_out = 0;
5301 imax_out = 0;
5302 jmin_out = 0;
5303 jmax_out = 0;
5304 hdmin_out = 0;
5305 hdmax_out = 0;
5306 local_used = 0;
5307
5308 debug_level = 2;
5309
5310 for (tpos = 0; tpos < tr->n; tpos++)
5311 {
5312 v = tr->state[tpos];
5313 i = tr->emitl[tpos];
5314 j = tr->emitr[tpos];
5315 d = j-i+1;
5316 if(cm->sttype[v] == EL_st) /*END LOCAL state*/
5317 {
5318 if(debug_level > 1)
5319 {
5320 printf("v: %4d NA %-2s ( NA) | d: %4d | i: %4d | in: NA | ix: NA | NA | NA |\n", v, Statetype(cm->sttype[v]), d, i);
5321 printf(" | j: %4d | jn: NA | jx: NA | NA | NA |\n", j);
5322 printf(" | d: %4d | dn: NA | dx: NA | NA | NA |\n", d);
5323
5324 local_used++;
5325 }
5326 }
5327 else
5328 {
5329 imindiff = i-imin[v];
5330 imaxdiff = imax[v]-i;
5331 jmindiff = j-jmin[v];
5332 jmaxdiff = jmax[v]-j;
5333 if(j >= jmin[v] && j <= jmax[v])
5334 {
5335 hdmindiff = d - hdmin[v][j-jmin[v]];
5336 hdmaxdiff = hdmax[v][j-jmin[v]] - d;
5337 }
5338 else
5339 {
5340 hdmindiff = -1000;
5341 hdmaxdiff = -1000;
5342 }
5343 if(imindiff < 0)
5344 imin_out++;
5345 if(imaxdiff < 0)
5346 imax_out++;
5347 if(jmindiff < 0)
5348 jmin_out++;
5349 if(jmaxdiff < 0)
5350 jmax_out++;
5351 if(hdmindiff < 0)
5352 hdmin_out++;
5353 if(hdmaxdiff < 0)
5354 hdmax_out++;
5355
5356 if(debug_level > 1 || ((imindiff < 0) || (imaxdiff < 0) || (jmindiff < 0) || (jmaxdiff < 0) ||
5357 (hdmindiff < 0) || (hdmaxdiff < 0)))
5358 {
5359 printf("v: %4d %-4s %-2s (%4d) | d: %4d | i: %4d | in: %4d | ix: %4d | %3d | %3d |\n", v, Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), cm->ndidx[v], d, i, imin[v], imax[v], imindiff, imaxdiff);
5360 printf(" | j: %4d | jn: %4d | jx: %4d | %3d | %3d |\n", j, jmin[v], jmax[v], jmindiff, jmaxdiff);
5361 if(j >= jmin[v] && j <= jmax[v])
5362 {
5363 printf(" | d: %4d | dn: %4d | dx: %4d | %3d | %3d |\n", d, hdmin[v][j-jmin[v]], hdmax[v][j-jmin[v]], hdmindiff, hdmaxdiff);
5364 }
5365 else
5366 {
5367 printf(" | d: %4d | dn: jout | dx: jout | %3d | %3d |\n", d, hdmindiff, hdmaxdiff);
5368 }
5369 }
5370 }
5371 }
5372 printf("\nimin out : %d\n", imin_out);
5373 printf("imax out : %d\n", imax_out);
5374 printf("jmin out : %d\n", jmin_out);
5375 printf("jmax out : %d\n", jmax_out);
5376 printf("hdmin out : %d\n", hdmin_out);
5377 printf("hdmax out : %d\n", hdmax_out);
5378 printf("local used: %d\n", local_used);
5379
5380 if((imin_out + imax_out + jmin_out + jmax_out) > 0)
5381 {
5382 printf("ERROR, some of the i and j bands are going to prevent optimal alignment. Sorry.\n");
5383 }
5384
5385 return;
5386 }
5387
5388 /* EPN 01.18.06
5389 * Function: debug_print_hd_bands
5390 *
5391 * Purpose: Print out the v and j dependent hd bands.
5392 */
5393 void
5394 debug_print_hd_bands(CM_t *cm, int **hdmin, int **hdmax, int *jmin, int *jmax)
5395 {
5396 int v, j;
5397
5398 printf("\nPrinting hd bands :\n");
5399 printf("****************\n");
5400 for(v = 0; v < cm->M; v++)
5401 {
5402 for(j = jmin[v]; j <= jmax[v]; j++)
5403 {
5404 printf("band v:%d j:%d n:%d %-4s %-2s min:%d max:%d\n", v, j, cm->ndidx[v], Nodetype(cm->ndtype[cm->ndidx[v]]), Statetype(cm->sttype[v]), hdmin[v][j-jmin[v]], hdmax[v][j-jmin[v]]);
5405 }
5406 printf("\n");
5407 }
5408 printf("****************\n\n");
5409
5410 return;
5411 }
5412
5413 /* Function: debug_print_parsetree_and_ij_bands()
5414 * Date: EPN, Sun Jan 27 16:38:14 2008
5415 *
5416 * Purpose: Print a parsetree a la ParseTreeDump() but supplement it
5417 * with details on where the parsetree violates i and j bands
5418 * (if at all) from a cp9bands data structure.
5419 *
5420 * Args: fp - FILE to write output to.
5421 * tr - parsetree to examine.
5422 * cm - model that was aligned to dsq to generate the parsetree
5423 * dsq - digitized sequence that was aligned to cm to generate the parsetree
5424 * gamma - cumulative subsequence length probability distributions
5425 * used to generate the bands; from BandDistribution(); [0..v..M-1][0..W]
5426 * W - maximum window length W (gamma distributions range up to this)
5427 * cp9b - CP9 bands object with i and j bands
5428 *
5429 * Returns: (void)
5430 */
5431 void
5432 debug_print_parsetree_and_ij_bands(FILE *fp, Parsetree_t *tr, CM_t *cm, ESL_DSQ *dsq, CP9Bands_t *cp9b)
5433 {
5434 int x;
5435 char syml, symr;
5436 float tsc;
5437 float esc;
5438 int v,y;
5439 char mode;
5440
5441 /* Contract check */
5442 if(dsq == NULL) cm_Fail("In debug_print_parsetree_and_ij_bands(), dsq is NULL");
5443
5444 fprintf(fp, "%5s %6s %6s %7s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
5445 " idx ", "emitl", "emitr", "state", " nxtl", " nxtr", " prv ", " tsc ", " esc ",
5446 " imin", " imax", "idiff", "jmin", "jmax", "jdiff");
5447 fprintf(fp, "%5s %6s %6s %7s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
5448 "-----", "------", "------", "-------", "-----","-----", "-----","-----", "-----",
5449 "-----", "-----", "-----", "-----", "-----", "-----");
5450 for (x = 0; x < tr->n; x++)
5451 {
5452 v = tr->state[x];
5453 mode = tr->mode[x];
5454
5455 /* Set syml, symr: one char representation of what we emit, or ' '.
5456 * Set esc: emission score, or 0.
5457 * Only P, L, R states have emissions.
5458 */
5459 syml = symr = ' ';
5460 esc = 0.;
5461 if (cm->sttype[v] == MP_st) {
5462 if (mode == TRMODE_J || mode == TRMODE_L) syml = cm->abc->sym[dsq[tr->emitl[x]]];
5463 if (mode == TRMODE_J || mode == TRMODE_R) symr = cm->abc->sym[dsq[tr->emitr[x]]];
5464 if (mode == TRMODE_J) esc = DegeneratePairScore(cm->abc, cm->esc[v], dsq[tr->emitl[x]], dsq[tr->emitr[x]]);
5465 else if (mode == TRMODE_L) esc = cm->lmesc[v][dsq[tr->emitl[x]]];
5466 else if (mode == TRMODE_R) esc = cm->rmesc[v][dsq[tr->emitr[x]]];
5467 } else if ( (cm->sttype[v] == IL_st || cm->sttype[v] == ML_st) && (mode == TRMODE_J || mode == TRMODE_L) ) {
5468 syml = cm->abc->sym[dsq[tr->emitl[x]]];
5469 esc = esl_abc_FAvgScore(cm->abc, dsq[tr->emitl[x]], cm->esc[v]);
5470 } else if ( (cm->sttype[v] == IR_st || cm->sttype[v] == MR_st) && (mode == TRMODE_J || mode == TRMODE_R) ) {
5471 symr = cm->abc->sym[dsq[tr->emitr[x]]];
5472 esc = esl_abc_FAvgScore(cm->abc, dsq[tr->emitr[x]], cm->esc[v]);
5473 }
5474
5475 /* Set tsc: transition score, or 0.
5476 * B, E, and the special EL state (M, local end) have no transitions.
5477 */
5478 tsc = 0.;
5479 if (v != cm->M && cm->sttype[v] != B_st && cm->sttype[v] != E_st) {
5480 y = tr->state[tr->nxtl[x]];
5481
5482 if (tr->nxtl[x] == -1)
5483 ;
5484 else if (v == 0 && (cm->flags & CMH_LOCAL_BEGIN))
5485 tsc = cm->beginsc[y];
5486 else if (y == cm->M) /* CMH_LOCAL_END is presumably set, else this wouldn't happen */
5487 tsc = cm->endsc[v] + (cm->el_selfsc * (tr->emitr[x] - tr->emitl[x] + 1 - StateDelta(cm->sttype[v])));
5488 else /* y - cm->first[v] gives us the offset in the transition vector */
5489 tsc = cm->tsc[v][y - cm->cfirst[v]];
5490 }
5491
5492 /* Print the info line for this state
5493 */
5494 fprintf(fp, "%5d %5d%c %5d%c %5d%-2s %5d %5d %5d %5.2f %5.2f ",
5495 x, tr->emitl[x], syml, tr->emitr[x], symr, tr->state[x],
5496 Statetype(cm->sttype[v]), tr->nxtl[x], tr->nxtr[x], tr->prv[x], tsc, esc);
5497 if(tr->emitl[x] < cp9b->imin[tr->state[x]]) {
5498 fprintf(fp, "%5d %5d %5d ",
5499 cp9b->imin[tr->state[x]], cp9b->imax[tr->state[x]], (tr->emitl[x] - cp9b->imin[tr->state[x]]));
5500 }
5501 else if(tr->emitl[x] > cp9b->imax[tr->state[x]]) {
5502 fprintf(fp, "%5d %5d %5d ",
5503 cp9b->imin[tr->state[x]], cp9b->imax[tr->state[x]], (tr->emitl[x] - cp9b->imax[tr->state[x]]));
5504 }
5505 else {
5506 fprintf(fp, "%5d %5d %5s ",
5507 cp9b->imin[tr->state[x]], cp9b->imax[tr->state[x]], "");
5508 }
5509 if(tr->emitr[x] < cp9b->jmin[tr->state[x]]) {
5510 fprintf(fp, "%5d %5d %5d\n",
5511 cp9b->jmin[tr->state[x]], cp9b->jmax[tr->state[x]], (tr->emitr[x] - cp9b->jmin[tr->state[x]]));
5512 }
5513 else if(tr->emitr[x] > cp9b->jmax[tr->state[x]]) {
5514 fprintf(fp, "%5d %5d %5d\n",
5515 cp9b->jmin[tr->state[x]], cp9b->jmax[tr->state[x]], (tr->emitr[x] - cp9b->jmax[tr->state[x]]));
5516 }
5517 else {
5518 fprintf(fp, "%5d %5d %5s\n",
5519 cp9b->jmin[tr->state[x]], cp9b->jmax[tr->state[x]], "");
5520 }
5521 }
5522
5523 fprintf(fp, "%5s %6s %6s %7s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s\n",
5524 "-----", "------", "------", "-------", "-----","-----", "-----","-----", "-----",
5525 "-----", "-----", "-----", "-----", "-----", "-----", "-----");
5526
5527 fflush(fp);
5528 }
5529
5530 /*********************************************************************
5531 * Function: cp9_RelaxRootBandsForSearch()
5532 *
5533 * Purpose: In cp9_HMM2ijBands_OLD(), ROOT_S (state 0) sets imin[0]=imax[0]=i0,
5534 * and jmin[0]=jmax[0]=j0, which is important for alignment,
5535 * but during search enforces that the optimal alignment start
5536 * at i0 and end at j0, but when searching we want to relax this
5537 * requirement in case a higher scoring parse has different endpoints.
5538 * See code for details.
5539 *
5540 * Args:
5541 * cm the cm
5542 * i0 first position of seq
5543 * j0 last position of seq
5544 * int *imin imin[v] = first position in band on i for state v
5545 * int *imax imax[v] = last position in band on i for state v
5546 * int *jmin jmin[v] = first position in band on j for state v
5547 * int *jmax jmax[v] = last position in band on j for state v
5548 */
5549 void
5550 cp9_RelaxRootBandsForSearch(CM_t *cm, int i0, int j0, int *imin, int *imax, int *jmin, int *jmax)
5551 {
5552 int y, yoffset;
5553
5554 if(i0 == j0) return; /* this is a special vanishingly rare case, we've set otherwise illegal jmin, jmax values for MP states
5555 * b/c all MPs are impossible for a length 1 seq, do nothing in this case.
5556 */
5557 /* look at all children y of ROOT_S (v == 0) and set:
5558 * imin[0] = min_y imin[y];
5559 * imax[0] = max_y imax[y];
5560 * jmin[0] = min_y jmin[y];
5561 * jmax[0] = max_y jmax[y];
5562 */
5563 /* First look at children of 0 (these probs will be 0. if local begins on, but it doesn't matter for our purposes here) */
5564 for (yoffset = 0; yoffset < cm->cnum[0]; yoffset++) {
5565 y = cm->cnum[0] + yoffset;
5566 imin[0] = ESL_MIN(imin[0], imin[y]);
5567 imax[0] = ESL_MAX(imax[0], imax[y]);
5568 jmin[0] = ESL_MIN(jmin[0], jmin[y]);
5569 jmax[0] = ESL_MAX(jmax[0], jmax[y]);
5570 }
5571 /* now for possible local begins */
5572 if(cm->flags & CMH_LOCAL_BEGIN) {
5573 for (y = 1; y < cm->M; y++) {
5574 if(NOT_IMPOSSIBLE(cm->beginsc[y])) {
5575 imin[0] = ESL_MIN(imin[0], imin[y]);
5576 imax[0] = ESL_MAX(imax[0], imax[y]);
5577 jmin[0] = ESL_MIN(jmin[0], jmin[y]);
5578 jmax[0] = ESL_MAX(jmax[0], jmax[y]);
5579 }
5580 }
5581 }
5582 }
5583
5584 #endif
5585
5586