1 /* cm_dpsmall.c  (formerly smallcyk.c)
2  * SRE, Wed Aug  2 08:42:49 2000 [St. Louis]
3  *
4  * Alignment of a CM to a target (sub)sequence.
5  *
6  * Implementation of the CM divide and conquer alignment algorithm
7  * described in [Eddy02]. Also implements standard CYK/Inside
8  * optimal alignment by dynamic programming [Durbin98].
9  *
10  * These algorithms align to the entire target (sub)sequence
11  * (e.g. global alignment). For sequence-local alignment, see
12  * scancyk.c.
13  */
14 
15 /*################################################################
16  * smallcyk's external API:
17  *
18  * CYKDivideAndConquer()    - The divide and conquer algorithm. Align
19  *                            a model to a (sub)sequence.
20  * CYKInside()              - Align model to (sub)sequence, using normal
21  *                            CYK/Inside algorithm.
22  * CYKInsideScore()         - Calculate the CYK/Inside score of optimal
23  *                            alignment, without recovering the alignment;
24  *                            allows timing CYK/Inside without blowing
25  *                            out memory, for large target RNAs.
26  *
27  * CYKDemands()             - Print a bunch of info comparing predicted d&c
28  *                            time/memory requirements to standard CYK/inside
29  *                            time/memory requirements.
30  *
31  * All of these functions can take query dependent bands (dmin
32  * and dmax) or have them passed as NULL.
33  *################################################################
34  */
35 
36 #include "esl_config.h"
37 #include "p7_config.h"
38 #include "config.h"
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 #include "easel.h"
44 #include "esl_stack.h"
45 #include "esl_vectorops.h"
46 
47 #include "hmmer.h"
48 
49 #include "infernal.h"
50 
51 /* The dividers and conquerors.
52  */
53 static float generic_splitter(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
54 			      int r, int vend, int i0, int j0);
55 static float wedge_splitter(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
56 			    int r, int z, int i0, int j0);
57 static void  v_splitter(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
58 			int r, int z, int i0, int i1, int j1, int j0, int useEL);
59 
60 /* The alignment engines.
61  */
62 static float inside(CM_t *cm, ESL_DSQ *dsq, int L,
63 		    int r, int z, int i0, int j0, int do_full,
64 		    float ***alpha, float ****ret_alpha,
65 		    struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
66 		    void ****ret_shadow, int allow_begin, int *ret_b, float *ret_bsc);
67 static void  outside(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0,
68 		     int do_full, float ***beta, float ****ret_beta,
69 		     struct deckpool_s *dpool, struct deckpool_s **ret_dpool);
70 static float vinside(CM_t *cm, ESL_DSQ *dsq, int L,
71 		     int r, int z, int i0, int i1, int j1, int j0, int useEL,
72 		     int do_full, float ***a, float ****ret_a,
73 		     struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
74 		     char ****ret_shadow,
75 		     int allow_begin, int *ret_b, float *ret_bsc);
76 static void  voutside(CM_t *cm, ESL_DSQ *dsq, int L,
77 		      int r, int z, int i0, int i1, int j1, int j0, int useEL,
78 		      int do_full, float ***beta, float ****ret_beta,
79 		      struct deckpool_s *dpool, struct deckpool_s **ret_dpool);
80 
81 /* The traceback routines.
82  */
83 static float insideT(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
84 		     int r, int z, int i0, int j0, int allow_begin,
85 		     int *dmin, int *dmax);
86 static float vinsideT(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
87 		      int r, int z, int i0, int i1, int j1, int j0, int useEL,
88 		      int allow_begin, int *dmin, int *dmax);
89 
90 /* The size calculators.
91  */
92 float insideT_size(CM_t *cm, int L, int r, int z, int i0, int j0);
93 float vinsideT_size(CM_t *cm, int r, int z, int i0, int i1, int j1, int j0);
94 static int   cyk_deck_count(CM_t *cm, int r, int z);
95 static int   cyk_extra_decks(CM_t *cm);
96 
97 /* The memory management routines are in infernal.h so hmmband.c can access them
98  */
99 
100 /*******************************************************************************
101  * EPN: Banded functions are named *_b()
102  * Functions that I don't think need a banded version are indicated with a U
103  * before their names.
104  *
105  * To change *most* of the following code from banded to normal versions, two
106  * 'replace-string's would be done :
107  * (1) replace '_b(' with '(' : to replace all banded function calls with calls
108  *     to their non-banded versions.
109  * (2) replace ', dmin, dmax)' with ')' : all banded functions have exactly
110  *     two extra variables passed in, dmin a pointer to an int array with minimum
111  *     bands, and dmax, a pointer to an int array with maximum bands.  Further,
112  *     these are always the last two variables passed into a function.
113  *
114  * There are two classes of changes that were made to the original functions
115  * to make (what I think are) functioning banded versions (*_b()).
116  *
117  * Class 1 : vjd deck changes - using dmin and dmax as bands
118  * Class 2 : vji deck changes - using imin and imax (derived from dmin and dmax)
119  *
120  * Class 2 changes occur only within v problems, only functions : v_splitter_b(),
121  * vinside_b(), and voutside_b().
122  *
123  * The class 1 changes are more straightforward relative to the class 2 changes.
124  * This is completely due to the fact that the vjd coordinate system directly
125  * uses d (distance of subsequence in parse tree rooted at state v) which
126  * corresponds conveniently with dmin and dmax.
127  *
128  * Class 1 changes are usually involved with a for loop that involves
129  * the d index in either the alpha or the beta matrix.  The original for loops
130  * are simply replaced with a new for loop that enforces the bands.
131  *
132  * Class 2 changes that involve the vji decks involve several offset variables
133  * because the implicit d value for a given vji cell has to be calculated.  The
134  * formula for that conversion is simple :   d = j-i+1
135  * in the code however, jp and ip are used where jp = j-j1 and ip = i-i0.
136  * so we have :  d = (jp+j1) - (ip+i0) + 1
137  *
138  * The way this is handled is only one possible way (and not necessarily the best way)
139  * but saves some calculations from being repeated and is somewhat consistent with
140  * analagous code elsewhere.  Also the way it's handled here is somewhat general
141  * and could be easily changed.
142  *
143  * That approach is to use an imin[] and imax[] vector, somewhat analagous to
144  * dmin[] and dmax[], indexed by states where states in the imin
145  * and imax vectors are offset (usually by r or w1) because v problems don't involve
146  * the entire set of 0..M-1 states.  Because determining a d for a given vji
147  * cell depends on both jp and ip, we can't calculate the bands for a given
148  * state (vji deck) independent of jp.  Therefore, imin[] and imax[] are calculated
149  * independent of jp, and jp must be added within a for(jp...) loop to determine
150  * the actual band in the i dimension.
151  *
152  * So imin[v-r] = j1-i0-dmax[v]+1;
153  *    imax[v-r] = j1-i0-dmin[v]+1;
154  *
155  * Here's an example of using imin and imax within a for(jp ... ) loop :
156  *	  for (jp = 0; jp <= j0-j1; jp++)
157  *	    {
158  * 	      if((imax[v-r]+jp) > (i1-i0)) ip = (i1-i0);
159  *	      else ip = imax[v-r] + jp;
160  * 	      for(; ip >= imin[v-r]+jp && ip >= 0; ip--) {
161  *
162  * Code where bands are used in the vji deck are marked with "Bands used ip X" where X
163  * is a number (1-19).  Some of these sections have been commented out as I slowly
164  * realized they were mistakes or unnecessary.  There are, admittedly scattered, notes
165  * on how I arrived at each of these in :
166  * ~nawrocki/lab/rRNA/inf/infernal_0426/banded_testing_0207/00LOG
167  *
168  * Other changes of both class 1 and 2 involves imposing the bands during
169  * the initialization step of the alpha matrix.  These changes
170  * add additional code that sets all cells outside the bands to IMPOSSIBLE.
171  *
172  *******************************************************************************/
173 
174 /* The banded dividers and conquerors.
175  */
176 static float generic_splitter_b(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
177 				int r, int vend, int i0, int j0, int *dmin, int *dmax);
178 static float wedge_splitter_b(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
179 			      int r, int z, int i0, int j0, int *dmin, int *dmax);
180 static void  v_splitter_b(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
181 			  int r, int z, int i0, int i1, int j1, int j0, int useEL,
182 			  int *dmin, int *dmax);
183 
184 /* The banded alignment engines.
185  */
186 static float inside_b(CM_t *cm, ESL_DSQ *dsq, int L,
187 		      int r, int z, int i0, int j0,
188 		      int do_full,
189 		      float ***alpha, float ****ret_alpha,
190 		      struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
191 		      void ****ret_shadow,
192 		      int allow_begin, int *ret_b, float *ret_bsc,
193 		      int *dmin, int *dmax);
194 static void  outside_b(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0,
195 		       int do_full, float ***beta, float ****ret_beta,
196 		       struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
197 		       int *dmin, int *dmax);
198 static float vinside_b(CM_t *cm, ESL_DSQ *dsq, int L,
199 		       int r, int z, int i0, int i1, int j1, int j0, int useEL,
200 		       int do_full, float ***a, float ****ret_a,
201 		       struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
202 		       char ****ret_shadow,
203 		       int allow_begin, int *ret_b, float *ret_bsc,
204 		       int *dmin, int *dmax);
205 static void  voutside_b(CM_t *cm, ESL_DSQ *dsq, int L,
206 			int r, int z, int i0, int i1, int j1, int j0, int useEL,
207 			int do_full, float ***beta, float ****ret_beta,
208 			struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
209 			int *dmin, int *dmax);
210 
211 /* No banded versions of the traceback routines because the non-banded
212  * functions can be used.*/
213 
214 /* No banded size calculators right now. */
215 
216 /*******************************************************************************
217  * 05.24.05
218  * EPN MEMORY EFFICIENT BANDED VERSIONS OF SELECTED FUNCTIONS
219  * Memory efficient banded functions are named *_b_me()
220  *
221  * These functions are modified from their originals to make the memory
222  * efficient banded FULL (not D&C) CYK implementation work.  These functions
223  * are dubbed 'memory efficient' because they only allocate cells of the
224  * alpha or shadow matrix which are within the bands.  The non-memory efficient
225  * functions (*_b()) still allocate the same memory as the non-banded functions,
226  * but only use the cells within the bands, here we actually don't even allocate
227  * unnecessary cells.  The only real difficulty implementing memory efficient
228  * bands is in being able to determine what cell alpha[v][j][d] from the
229  * non-memory efficient code corresponds to in the memory-efficient code (we'll
230  * call the corresponding cell a[v'][j'][d'] or a[vp][jp][dp]).  The reason
231  * v != v'; j != j' and d != d' is because the primes are offset due to the
232  * fact that some of the original alpha matrix deck (a[v]) has not been allocated
233  * due to the bands.  Therefore all of the differences between the *_b_me() functions
234  * and their *_b() versions is to deal with the offset issue.
235  *
236  * All changes from the original (non-memory efficient) banded code have been
237  * marked with comments beginning 'CYK Full ME Bands Used'.
238  *
239  * There are only two functions that need seperate _b_me() versions, because
240  * the non D&C alignment algorithm only involves three functions, CYKInside(),
241  * inside(), and insideT(), and the CYKInside() is really only a wrapper,
242  * for which the memory efficient implementation has no effect, so all we
243  * need is inside_b_me() and insideT_b_me().
244  *
245  *******************************************************************************/
246 
247 /* The alignment engines.
248  */
249 static float inside_b_me(CM_t *cm, ESL_DSQ *dsq, int L,
250 			 int r, int z, int i0, int j0,
251 			 int do_full,
252 			 float ***alpha, float ****ret_alpha,
253 			 void ****ret_shadow,
254 			 int allow_begin, int *ret_b, float *ret_bsc,
255 			 int *dmin, int *dmax);
256 
257 /* The traceback routines.
258  * At first, it wasn't immediately obvious that a *_me version of
259  * this function was needed, but there's some crazy offset issues. [EPN]
260  */
261 
262 static float insideT_b_me(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
263 			  int r, int z, int i0, int j0, int allow_begin,
264 			  int *dmin, int *dmax);
265 
266 
267 /* Function: CYKDivideAndConquer()
268  * Date:     SRE, Sun Jun  3 19:32:14 2001 [St. Louis]
269  *
270  * Purpose:  Align a CM to a (sub)sequence using the divide and conquer
271  *           algorithm. Return the score (in bits) and a traceback
272  *           structure.
273  *
274  *           The simplest call to this, for a model cm and a sequence
275  *           dsq of length L and no bands on d:
276  *               CYKDivideAndConquer(cm, dsq, L, 0, 1, &tr, NULL, NULL);
277  *           which will align the model to the entire sequence. (The alignment
278  *           will be global w.r.t the sequence.)
279  *
280  *           Sometimes we already know the second state in the traceback:
281  *           a CYKScan() will tell us r, for a 0->r local begin transition.
282  *           (It also tells us i0, j0: the bounds of a high-scoring subsequence
283  *           hit in the target sequence.)  We take all this information in
284  *           as a shortcut. The 0->r transition is still counted
285  *           towards the score. That is, CYKDivideAndConquer() always
286  *           gives a parsetree rooted at state 0, the root, and the sc
287  *           we return is the score for that complete parse tree.
288  *
289  * Args:     cm     - the covariance model
290  *           dsq    - the digitized sequence, 1..L
291  *           L      - length of sequence
292  *           r      - root of subgraph to align to target subseq (usually 0, the model's root)
293  *           i0     - start of target subsequence (often 1, beginning of sq)
294  *           j0     - end of target subsequence (often L, end of sq)
295  *           ret_tr - RETURN: traceback (pass NULL if trace isn't wanted)
296  *           dmin   - minimum d bound for each state v; [0..v..M-1] (NULL if non-banded)
297  *           dmax   - maximum d bound for each state v; [0..v..M-1] (NULL if non-banded)
298  *
299  * Returns: score of the alignment in bits.
300  */
301 float
CYKDivideAndConquer(CM_t * cm,ESL_DSQ * dsq,int L,int r,int i0,int j0,Parsetree_t ** ret_tr,int * dmin,int * dmax)302 CYKDivideAndConquer(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, Parsetree_t **ret_tr,
303 		    int *dmin, int *dmax)
304 {
305   Parsetree_t *tr;
306   float        sc;
307   int          z;
308 
309   /*printf("alignment strategy:CYKDivideAndConquer:nb:small\n");*/
310   /* Trust, but verify.
311    * Check out input parameters.
312    */
313   if (cm->stid[r] != ROOT_S) {
314     if (! (cm->flags & CMH_LOCAL_BEGIN)) cm_Fail("internal error: we're not in local mode, but r is not root");
315     if (cm->stid[r] != MATP_MP && cm->stid[r] != MATL_ML &&
316 	cm->stid[r] != MATR_MR && cm->stid[r] != BIF_B)
317       cm_Fail("internal error: trying to do a local begin at a non-mainline start");
318   }
319 
320   /* Create a parse tree structure.
321    * The traceback machinery expects to build on a start state already
322    * in the parsetree, so initialize by adding the root state.
323    */
324   tr = CreateParsetree(100);
325   InsertTraceNode(tr, -1, TRACE_LEFT_CHILD, i0, j0, 0); /* init: attach the root S */
326   z  = cm->M-1;
327   sc = 0.;
328 
329   /* If r != 0, we already know we're starting with a local entry transition 0->r;
330    * add that node too, and count the begin transition towards the score. We have
331    * just done our one allowed local begin, so allow_begin becomes FALSE.
332    */
333   if (r != 0)
334     {
335       InsertTraceNode(tr, 0,  TRACE_LEFT_CHILD, i0, j0, r);
336       z  =  CMSubtreeFindEnd(cm, r);
337       sc =  cm->beginsc[r];
338     }
339 
340   /* Start the divide and conquer recursion: call the generic_splitter()
341    * or generic_splitter_b() on the whole DP cube.
342    */
343   if(dmin == NULL && dmax == NULL)
344     sc += generic_splitter(cm, dsq, L, tr, r, z, i0, j0);
345   else
346     sc += generic_splitter_b(cm, dsq, L, tr, r, z, i0, j0, dmin, dmax);
347 
348   /* Free memory and return
349    */
350   if (ret_tr != NULL) *ret_tr = tr; else FreeParsetree(tr);
351   ESL_DPRINTF1(("#DEBUG: returning from CYKDivideAndConquer() sc : %f\n", sc));
352   return sc;
353 }
354 
355 /* Function: CYKInside()
356  * Date:     SRE, Sun Jun  3 19:48:33 2001 [St. Louis]
357  *
358  * Purpose:  Wrapper for the insideT() routine - solve
359  *           a full alignment problem, return the traceback
360  *           and the score, without dividing & conquering.
361  *
362  *           Analogous to CYKDivideAndConquer() in many respects;
363  *           see the more extensive comments in that function for
364  *           more details on shared aspects.
365  *
366  * Args:     cm     - the covariance model
367  *           sq    - the sequence, 1..L
368  *           r      - root of subgraph to align to target subseq (usually 0, the model's root)
369  *           i0     - start of target subsequence (often 1, beginning of sq)
370  *           j0     - end of target subsequence (often L, end of sq)
371  *           ret_tr - RETURN: traceback (pass NULL if trace isn't wanted)
372  *           dmin   - minimum d bound for each state v; [0..v..M-1] (NULL if non-banded)
373  *           dmax   - maximum d bound for each state v; [0..v..M-1] (NULL if non-banded)
374  *
375  * Returns:  score of the alignment in bits.
376  */
377 float
CYKInside(CM_t * cm,ESL_DSQ * dsq,int L,int r,int i0,int j0,Parsetree_t ** ret_tr,int * dmin,int * dmax)378 CYKInside(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, Parsetree_t **ret_tr,
379 	  int *dmin, int *dmax)
380 {
381   Parsetree_t *tr;
382   int          z;
383   float        sc;
384 
385   /* Trust, but verify.
386    * Check out input parameters.
387    */
388   if (cm->stid[r] != ROOT_S) {
389     if (! (cm->flags & CMH_LOCAL_BEGIN)) cm_Fail("internal error: we're not in local mode, but r is not root");
390     if (cm->stid[r] != MATP_MP && cm->stid[r] != MATL_ML &&
391 	cm->stid[r] != MATR_MR && cm->stid[r] != BIF_B)
392       cm_Fail("internal error: trying to do a local begin at a non-mainline start");
393   }
394 
395   /* Create the parse tree, and initialize.
396    */
397   tr = CreateParsetree(100);
398   InsertTraceNode(tr, -1, TRACE_LEFT_CHILD, 1, L, 0); /* init: attach the root S */
399   z  = cm->M-1;
400   sc = 0.;
401 
402   /* Deal with case where we already know a local entry transition 0->r
403    */
404   if (r != 0)
405     {
406       InsertTraceNode(tr, 0,  TRACE_LEFT_CHILD, i0, j0, r);
407       z  =  CMSubtreeFindEnd(cm, r);
408       sc =  cm->beginsc[r];
409     }
410 
411   /* Solve the whole thing with one call to insideT.
412    */
413   /* if we're non-banded use the original function */
414   if(dmin == NULL && dmax == NULL)
415     sc += insideT(cm, dsq, L, tr, r, z, i0, j0, (r==0),
416 		  dmin, dmax);
417   /* if we're using query dependent bands, call the
418    * memory efficient QDB alignment version.
419    */
420   else
421     sc += insideT_b_me(cm, dsq, L, tr, r, z, i0, j0, (r==0),
422       dmin, dmax);
423   /* To call the non-memory efficient version, uncomment
424    * the following line: */
425   /*sc += insideT(cm, dsq, L, tr, r, z, i0, j0, (r==0),    dmin, dmax);*/
426 
427   if (ret_tr != NULL) *ret_tr = tr; else FreeParsetree(tr);
428   return sc;
429 }
430 
431 /* Function: CYKInsideScore()
432  * Date:     SRE, Tue Apr  9 05:21:22 2002 [St. Louis]
433  *
434  * Purpose:  Wrapper for the inside() routine. Solve
435  *           a full alignment problem in one pass of inside,
436  *           in memory-saving mode, returning only the score.
437  *
438  *           Fairly useless. Written just to obtain timings
439  *           for SSU and LSU alignments, for comparison to
440  *           divide and conquer.
441  *
442  *           Analogous to CYKDivideAndConquer() in many respects;
443  *           see the more extensive comments in that function for
444  *           more details on shared aspects.
445  *
446  * Args:     cm     - the covariance model
447  *           dsq    - the sequence, 1..L
448  *           L      - length of sequence
449  *           r      - root of subgraph to align to target subseq (usually 0, the model's root)
450  *           i0     - start of target subsequence (often 1, beginning of sq)
451  *           j0     - end of target subsequence (often L, end of sq)
452  *           dmin   - minimum d bound for each state v; [0..v..M-1] (NULL if non-banded)
453  *           dmax   - maximum d bound for each state v; [0..v..M-1] (NULL if non-banded)
454  *
455  * Returns:  score of the alignment in bits.
456  */
457 float
CYKInsideScore(CM_t * cm,ESL_DSQ * dsq,int L,int r,int i0,int j0,int * dmin,int * dmax)458 CYKInsideScore(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int *dmin, int *dmax)
459 {
460   int    z;
461   float  sc;
462 
463   z           = cm->M-1;
464   sc          = 0.;
465 
466   if (r != 0)
467     {
468       z  =  CMSubtreeFindEnd(cm, r);
469       sc =  cm->beginsc[r];
470     }
471 
472   if(dmin == NULL && dmax == NULL)
473     sc +=  inside(cm, dsq, L, r, z, i0, j0, FALSE,
474 		  NULL, NULL, NULL, NULL, NULL,
475 		  (r==0), NULL, NULL);
476   else
477     sc +=  inside_b(cm, dsq, L, r, z, i0, j0, FALSE,
478 		    NULL, NULL, NULL, NULL, NULL,
479 		    (r==0), NULL, NULL, dmin, dmax);
480 
481   return sc;
482 }
483 
484 
485 /* Function: CYKDemands()
486  * Date:     SRE, Sun Jun  3 20:00:54 2001 [St. Louis]
487  *
488  * Purpose:  Print out information on the computational
489  *           complexity of an alignment problem for divide
490  *           and conquer versus the full CYK.
491  *
492  * Args:     cm     - the model
493  *           L      - length of sequence.
494  *           dmin   - minimum d bound for each state v; [0..v..M-1] (NULL if non-banded)
495  *           dmax   - maximum d bound for each state v; [0..v..M-1] (NULL if non-banded)
496  *           be_quiet - TRUE to not print info, just return number of DP calcs
497  *
498  * Returns: (float) the total number of DP calculations, either using QDB (if
499  *                  dmin & dmax are non-NULL) or not using QDB.
500  */
501 float
CYKDemands(CM_t * cm,int L,int * dmin,int * dmax,int be_quiet)502 CYKDemands(CM_t *cm, int L, int *dmin, int *dmax, int be_quiet)
503 {
504   float Mb_per_deck;    /* megabytes per deck */
505   int   bif_decks;	/* bifurcation decks  */
506   int   nends;		/* end decks (only need 1, even for multiple E's */
507   int   maxdecks;	/* maximum # of decks needed by CYKInside() */
508   int   extradecks;     /* max # of extra decks needed for bifurcs */
509   float smallmemory;	/* how much memory small version of CYKInside() needs */
510   float bigmemory;	/* how much memory a full CYKInside() would take */
511   float dpcells;	/* # of dp cells */
512   float bifcalcs;	/* # of inner loops executed for bifurcation calculations */
513   float bifcalcs_b;	/* # of inner loops executed for bifurcation calculations in QDB */
514   float dpcalcs;	/* # of inner loops executed for non-bif calculations */
515   float dpcalcs_b;	/* # of inner loops executed for bifurcation calculations in QDB */
516   int   j;
517   float avg_Mb_per_banded_deck;    /* average megabytes per deck in mem efficient big mode */
518   int   v, y, z, d, kmin, kmax; /* for QDB calculations */
519 
520   Mb_per_deck = size_vjd_deck(L, 1, L);
521   bif_decks   = CMCountStatetype(cm, B_st);
522   nends       = CMCountStatetype(cm, E_st);
523   maxdecks    = cyk_deck_count(cm, 0, cm->M-1);
524   extradecks  = cyk_extra_decks(cm);
525   smallmemory = (float) maxdecks * Mb_per_deck;
526   bifcalcs = 0.;
527   for (j = 0; j <= L; j++)
528     bifcalcs += (float)(j+1)*(float)(j+2)/2.;
529   bifcalcs *= (float) bif_decks;
530   dpcalcs = (float) (L+2)*(float)(L+1)*0.5*(float) (cm->M - bif_decks - nends +1);
531   if(dmin == NULL && dmax == NULL)
532     {
533       bigmemory   = (float) (cm->M - nends +1) * Mb_per_deck;
534       dpcells     = (float) (L+2)*(float)(L+1)*0.5*(float) (cm->M - nends +1);
535       avg_Mb_per_banded_deck = 0.; /* irrelevant */
536     }
537   else
538     {
539       dpcells = 0.;
540       dpcalcs_b = 0.;
541       for(v = 0; v < cm->M; v++)
542 	{
543 	  dpcells   += (float) (L+1) * (float) (dmax[v] - dmin[v] + 1.);
544 	  if(cm->sttype[v] != B_st)
545 	    dpcalcs_b   += (float) (L+1) * (float) (dmax[v] - dmin[v] + 1.);
546 	  for(d = dmin[v]; d <= dmax[v]; d++)
547 	    {
548 	      dpcells -= (float) d; /* subtract out cells for which d <= j */
549 	      if(cm->sttype[v] != B_st)
550 		dpcalcs_b   -= (float) d;
551 	    }
552 	}
553       bigmemory   = (sizeof(float) * dpcells) / 1000000.;
554       avg_Mb_per_banded_deck = bigmemory / ((float) cm->M -nends + 1);
555       /* bigmemory and avg_Mb_per_banded_deck should be treated as approximates,
556        * I'm not sure if they're exactly correct. EPN, Mon Nov  6 07:56:13 2006 */
557 
558       /* for QDB, to get bifcalcs, we need to count all the cells within the bands on
559        * left and right childs y and z of v, that are consistent with band on v
560        * there's probably a more efficient way of doing this. */
561       bifcalcs_b = 0.;
562       for (v = 0; v < cm->M; v++)
563 	{
564 	  if(cm->sttype[v] == B_st)
565 	    {
566 	      y = cm->cfirst[v];
567 	      z = cm->cnum[v];
568 	      for (j = 0; j <= L; j++)
569 		{
570 		  for (d = dmin[v]; d <= dmax[v] && d <= j; d++)
571 		    {
572 		      if(dmin[z] > (d-dmax[y])) kmin = dmin[z];
573 		      else kmin = d-dmax[y];
574 		      if(kmin < 0) kmin = 0;
575 		      if(dmax[z] < (d-dmin[y])) kmax = dmax[z];
576 		      else kmax = d-dmin[y];
577 		      if(kmin <= kmax)
578 			bifcalcs_b += (float)(kmax - kmin + 1);
579 		    }
580 		}
581 	    }
582 	}
583     }
584 
585   if(dmin == NULL && dmax == NULL)
586     {
587       if(!be_quiet)
588 	{
589 	  printf("CYK cpu/memory demand estimates:\n");
590 	  printf("Mb per cyk deck:                  %.4f\n", Mb_per_deck);
591 	  printf("# of decks (M):                   %d\n",   cm->M);
592 	  printf("# of decks needed in small CYK:   %d\n",   maxdecks);
593 	  printf("# of extra decks needed:          %d\n",   extradecks);
594 	  printf("RAM needed for full CYK, Mb:      %.2f\n", bigmemory);
595 	  printf("RAM needed for small CYK, Mb:     %.2f\n", smallmemory);
596 	  printf("# of dp cells, total:             %.3g\n", dpcells);
597 	  printf("# of non-bifurc dp cells:         %.3g\n", dpcalcs);
598 	  printf("# of bifurcations:                %d\n",   bif_decks);
599 	  printf("# of bifurc dp inner loop calcs:  %.3g\n", bifcalcs);
600 	  printf("# of dp inner loops:              %.3g\n", dpcalcs+bifcalcs);
601 	}
602       return (dpcalcs + bifcalcs);
603     }
604   else /* QDB */
605     {
606       if(!be_quiet)
607 	{
608 	  printf("QDB CYK cpu/memory demand estimates:\n");
609 	  printf("Mb per cyk deck:                     %.4f\n", Mb_per_deck);
610 	  printf("Avg Mb per QDB cyk deck:             %.4f\n", avg_Mb_per_banded_deck);
611 	  printf("# of decks (M):                      %d\n",   cm->M);
612 	  printf("# of decks needed in small QDB CYK:  %d\n",   maxdecks);
613 	  printf("# of extra decks needed:             %d\n",   extradecks);
614 	  printf("RAM needed for full QDB CYK, Mb:     %.2f\n", bigmemory);
615 	  printf("RAM needed for small QDB CYK, Mb:    %.2f\n", smallmemory);
616 	  printf("# of QDB dp cells, total:            %.3g\n", dpcells);
617 	  printf("# of QDB non-bifurc dp cells:        %.3g\n", dpcalcs_b);
618 	  printf("# of bifurcations:                   %d\n",   bif_decks);
619 	  printf("# of QDB bifurc dp inner loop calcs: %.3g\n", bifcalcs_b);
620 	  printf("# of QDB dp inner loops:             %.3g\n", dpcalcs_b+bifcalcs_b);
621 	  printf("Estimated small CYK QDB aln speedup: %.4f\n", ((dpcalcs+bifcalcs)/(dpcalcs_b+bifcalcs_b)));
622 	}
623       return (dpcalcs_b + bifcalcs_b);
624     }
625 }
626 
627 
628 /* Function: CYKNonQDBSmallMbNeeded()
629  * Date:     EPN, Fri May 27 11:43:56 2011
630  *
631  * Purpose:  Return number of Mb needed for non-QDB
632  *           divide and conquer CYK.
633  *
634  * Args:     cm     - the model
635  *           L      - length of sequence.
636  *
637  * Returns: Number of Mb required.
638  */
639 float
CYKNonQDBSmallMbNeeded(CM_t * cm,int L)640 CYKNonQDBSmallMbNeeded(CM_t *cm, int L)
641 {
642   float Mb_per_deck;    /* megabytes per deck */
643   int   maxdecks;	/* maximum # of decks needed by CYKInside() */
644   float smallmemory;	/* how much memory small version of CYKInside() needs */
645 
646   Mb_per_deck = size_vjd_deck(L, 1, L);
647   maxdecks    = cyk_deck_count(cm, 0, cm->M-1);
648   smallmemory = (float) maxdecks * Mb_per_deck;
649   return smallmemory;
650 }
651 
652 /*################################################################
653  * The dividers and conquerors.
654  *################################################################*/
655 
656 /* Function: generic_splitter()
657  * Date:     SRE, Sat May 12 15:08:38 2001 [CSHL]
658  *
659  * Purpose:  Solve a "generic problem": best parse of
660  *           a possibly bifurcated subgraph cm^r_z to
661  *           a substring sq->sq[i0..j0]. r is usually a start
662  *           state (S_st) but may be any non-end state type in
663  *           the case of local alignment begins (ROOT 0->r).
664  *           z is always an end state (E_st).
665  *
666  *           Given: a cm subgraph from r..z
667  *                  a subsequence from i0..j0
668  *           Attaches the optimal trace T{r..z}, exclusive of r
669  *           and inclusive of z, to tr.
670  *
671  *           A full divide & conquer never terminates
672  *           in generic_splitter; the recursion must
673  *           terminate in v_splitter and wedge_splitter;
674  *           so we don't test an end-of-recursion boundary.
675  *
676  * Args:     cm          - model
677  *           sq          - sequence, digitized, 1..L
678  *           tr          - the traceback we're adding on to.
679  *           r           - index of the root state of this problem in the model
680  *           z           - index of an end state (E_st) in the model
681  *           i0          - start in the sequence (1..L)
682  *           j0          - end in the sequence (1..L)
683  *
684  * Returns:  score of the optimal parse of sq(i0..j0) with cm^r_z
685  */
686 static float
generic_splitter(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int j0)687 generic_splitter(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
688 		 int r, int z, int i0, int j0)
689 {
690   float ***alpha;
691   float ***beta;
692   struct deckpool_s *pool;
693   int      v,w,y;		/* state indices */
694   int      wend, yend;		/* indices for end of subgraphs rooted at w,y */
695   int      jp;			/* j': relative position in subseq, 0..W */
696   int      W;			/* length of subseq i0..j0 */
697   float    sc;			/* tmp variable for a score */
698   int      j,d,k;		/* sequence indices */
699   float    best_sc;		/* optimal score at the optimal split point */
700   int      best_k;		/* optimal k for the optimal split */
701   int      best_d;		/* optimal d for the optimal split */
702   int      best_j;		/* optimal j for the optimal split */
703   int      tv;			/* remember the position of a bifurc in the trace. */
704   int      b1,b2;		/* argmax_v for 0->v local begin transitions */
705   float    b1_sc, b2_sc;	/* max_v scores for 0->v local begin transitions */
706 
707   /* 1. If the generic problem is small enough, solve it with inside^T,
708    *    and append the trace to tr.
709    */
710   if (insideT_size(cm, L, r, z, i0, j0) < RAMLIMIT) {
711     ESL_DPRINTF2(("#DEBUG: Solving a generic w/ insideT - G%d[%s]..%d[%s], %d..%d\n",
712 		  r, UniqueStatetype(cm->stid[r]),
713 		  z, UniqueStatetype(cm->stid[z]),
714 		  i0, j0));
715     sc = insideT(cm, dsq, L, tr, r, z, i0, j0, (r==0),
716 		NULL, NULL); /* two NULLs mean 'don't use bands' */
717 
718     return sc;
719   }
720 
721   /* 2. Traverse down from r, find first bifurc.
722    *    The lowest a bifurc could be: B-S-E/S-IL-E = vend-5
723    *
724    */
725   for (v = r; v <= z-5; v++)
726     if (cm->sttype[v] == B_st) break; /* found the first bifurcation, now v */
727 
728   /* 3. If there was no bifurcation, this is a wedge problem; solve it
729    *    with wedge_splitter.
730    */
731   if (v > z-5) {		/* no bifurc? it's a wedge problem  */
732     if (cm->sttype[z] != E_st) cm_Fail("inconceivable.");
733     sc = wedge_splitter(cm, dsq, L, tr, r, z, i0, j0);
734     return sc;
735   }
736 
737   /* Set up the state quartet r,v,w,y for a divide and conquer
738    * solution of the generic problem.
739    */
740   w = cm->cfirst[v];		/* index of left S  */
741   y = cm->cnum[v];		/* index right S    */
742   if (w < y) { wend = y-1; yend = z; }
743   else       { yend = w-1; wend = z; }
744 
745   /* Calculate alpha[w] deck and alpha[y] deck.
746    * We also get b1: best choice for 0->b local begin. b1_sc is the score if we do this.
747    * Analogous for b2, b2_sc on the other side.
748    */
749   inside(cm, dsq, L, w, wend, i0, j0, BE_EFFICIENT, NULL,  &alpha, NULL, &pool, NULL,
750 	 (r==0), &b1, &b1_sc);
751   inside(cm, dsq, L, y, yend, i0, j0, BE_EFFICIENT, alpha, &alpha, pool, &pool, NULL,
752 	 (r==0), &b2, &b2_sc);
753 
754   /* Calculate beta[v] deck (stick it in alpha). Let the pool get free'd.
755    * (If we're doing local alignment, deck M is the beta[EL] deck.)
756    */
757   outside(cm, dsq, L, r, v, i0, j0, BE_EFFICIENT, alpha, &beta, pool, NULL);
758 
759   /* Find the optimal split at the B.
760    */
761   W = j0-i0+1;
762   best_sc = IMPOSSIBLE;
763   for (jp = 0; jp <= W; jp++)
764     {
765       j = i0-1+jp;
766       for (d = 0; d <= jp; d++)
767 	for (k = 0; k <= d; k++)
768 	  if ((sc = alpha[w][j-k][d-k] + alpha[y][j][k] + beta[v][j][d]) > best_sc)
769 	    {
770 	      best_sc = sc;
771 	      best_k  = k;
772 	      best_j  = j;
773 	      best_d  = d;
774 	    }
775     }
776 
777   /* Local alignment only: maybe we're better off in EL?
778    */
779   if (cm->flags & CMH_LOCAL_END) {
780     for (jp = 0; jp <= W; jp++)
781       {
782 	j = i0-1+jp;
783 	for (d = jp; d >= 0; d--)
784 	  if ((sc = beta[cm->M][j][d]) > best_sc) {
785 	    best_sc = sc;
786 	    best_k  = -1;	/* special flag for local end, EL. */
787 	    best_j  = j;
788 	    best_d  = d;
789 	  }
790       }
791   }
792 
793   /* Local alignment only: maybe we're better off in ROOT?
794    */
795   if (r == 0 && cm->flags & CMH_LOCAL_BEGIN) {
796     if (b1_sc > best_sc) {
797       best_sc = b1_sc;
798       best_k  = -2;		/* flag for using local begin into left wedge w..wend */
799       best_j  = j0;
800       best_d  = W;
801     }
802     if (b2_sc > best_sc) {
803       best_sc = b2_sc;
804       best_k  = -3;		/* flag for using local begin into right wedge y..yend */
805       best_j  = j0;
806       best_d  = W;
807     }
808   }
809 
810   /* Free now, before recursing.
811    * The two alpha matrices and the beta matrix
812    * actually all point to the same memory, since no
813    * decks in Inside and Outside needed to overlap.
814    * Free 'em all in one call.
815    */
816   free_vjd_matrix(alpha, cm->M, i0, j0);
817 
818   /* If we're in EL, instead of B, the optimal alignment is entirely
819    * in a V problem that's still above us. The TRUE flag sets useEL.
820    */
821   if (best_k == -1) {
822     v_splitter(cm, dsq, L, tr, r, v, i0, best_j-best_d+1, best_j, j0, TRUE);
823     return best_sc;
824   }
825 
826   /* Else: if we're in the root 0, we know which r we did our local begin into.
827    * We have a generic problem rooted there. The FALSE flag disallows
828    * any further local begins.
829    */
830   if (best_k == -2) {
831     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b1);
832     z = CMSubtreeFindEnd(cm, b1);
833     generic_splitter(cm, dsq, L, tr, b1, z, i0, j0);
834     return best_sc;
835   }
836   if (best_k == -3) {
837     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b2);
838     z = CMSubtreeFindEnd(cm, b2);
839     generic_splitter(cm, dsq, L, tr, b2, z, i0, j0);
840     return best_sc;
841   }
842 
843   /* Else (the usual case), ok, we did use B in the optimal split.
844    * Split now into a V problem and two generic problems, and recurse
845    * left fragment: i1 = j-d+1, j1 = j-k, vroot = w, vend = wend
846    * right frag:    i2 = j-k+1, j2 = j,   vroot = y, vend = yend
847    *
848    * The problems must be solved in a particular order, since we're
849    * constructing the trace in a postorder traversal.
850    */
851   ESL_DPRINTF2(("#DEBUG: Generic splitter:\n"));
852   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
853 		r, UniqueStatetype(cm->stid[r]),
854 		v, UniqueStatetype(cm->stid[v]),
855 		i0, best_j-best_d+1, best_j, j0));
856   ESL_DPRINTF2(("#DEBUG:    generic: G%d[%s]..%d[%s], %d..%d\n",
857 		w,    UniqueStatetype(cm->stid[w]),
858 		wend, UniqueStatetype(cm->stid[wend]),
859 		best_j-best_d+1, best_j-best_k));
860   ESL_DPRINTF2(("#DEBUG:    generic: G%d[%s]..%d[%s], %d..%d\n",
861 		y,    UniqueStatetype(cm->stid[y]),
862 		yend, UniqueStatetype(cm->stid[yend]),
863 		best_j-best_k+1, best_j));
864 
865   v_splitter(cm, dsq, L, tr, r, v, i0, best_j-best_d+1, best_j, j0, FALSE);
866   tv = tr->n-1;
867 
868   InsertTraceNode(tr, tv, TRACE_LEFT_CHILD, best_j-best_d+1, best_j-best_k, w);
869   generic_splitter(cm, dsq, L, tr, w, wend, best_j-best_d+1, best_j-best_k);
870   InsertTraceNode(tr, tv, TRACE_RIGHT_CHILD, best_j-best_k+1, best_j, y);
871   generic_splitter(cm, dsq, L, tr, y, yend, best_j-best_k+1, best_j);
872 
873   return best_sc;
874 }
875 
876 /* Function: wedge_splitter()
877  * Date:     SRE, Sun May 13 08:44:15 2001 [CSHL genome mtg]
878  *
879  * Purpose:  Solve a "wedge problem": best parse of an
880  *           unbifurcated subgraph cm^r..z to a substring
881  *           sq->sq[i0..j0]. r may be a start state (when
882  *           the wedge problem comes from being a special case
883  *           of a generic problem) or a non-insert state
884  *           (D, MP, ML, MR) (when the wedge comes from a
885  *           previous wedge_splitter), or indeed, any non-end
886  *           state (when wedge comes from a local begin).
887  *           z, however, is always an end state.
888  *
889  *           Attaches the optimal trace T(r..z), exclusive
890  *           of r and inclusive of z, to the growing trace tr.
891  *
892  *           Deal with a divide and conquer boundary condition:
893  *           the next non-insert state after r is the end state z.
894  *           All remaining sequence of i0..j0 that r doesn't emit
895  *           must be dealt with by insert states.
896  *
897  * Args:     cm          - model
898  *           sq          - digitized sequence 1..L
899  *           tr          - the traceback we're adding on to.
900  *           r           - index of the first state in the subgraph
901  *           z           - index of an end state (E_st) in the model
902  *           i0          - start in the sequence (1..L)
903  *           j0          - end in the sequence (1..L)
904  *
905  * Returns:  The score of the best parse in bits.
906  */
907 static float
wedge_splitter(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int j0)908 wedge_splitter(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr, int r, int z, int i0, int j0)
909 {
910   float ***alpha;
911   float ***beta;
912   struct deckpool_s *pool;
913   float sc;
914   float best_sc;
915   int   v,w,y;
916   int   W;
917   int   d, jp, j;
918   int   best_v, best_d, best_j;
919   int   midnode;
920   int   b;	/* optimal local begin: b = argmax_v alpha_v(i0,j0) + t_0(v) */
921   float bsc;	/* score for optimal local begin      */
922 
923   /* 1. If the wedge problem is either a boundary condition,
924    *    or small enough, solve it with inside^T and append
925    *    the trace to tr.
926    *    It's formally possible that someone could set RAMLIMIT
927    *    to something so small that even the boundary condition
928    *    couldn't be done with inside^T - but that'd be a silly
929    *    thing to do, so we ignore RAMLIMIT in that case.
930    */
931   if (cm->ndidx[z] == cm->ndidx[r] + 1 ||
932       insideT_size(cm, L, r, z, i0, j0) < RAMLIMIT)
933     {
934       ESL_DPRINTF2(("#DEBUG: Solving a wedge:   G%d[%s]..%d[%s], %d..%d\n",
935 		r, UniqueStatetype(cm->stid[r]),
936 		z, UniqueStatetype(cm->stid[z]),
937 		i0,j0));
938       sc = insideT(cm, dsq, L, tr, r, z, i0, j0, (r==0),
939 		   NULL, NULL); /* two NULLs mean 'don't use bands' */
940 
941       return sc;
942     }
943 
944   /* 2. Find our split set, w..y
945    *    We choose the node in the middle.
946    *    This can't be a BIF_nd (we're a wedge), or an END_nd (midnode
947    *    can't be z) but it could be any other node including
948    *    begin nodes (i.e. it might be that w==y).
949    */
950   midnode = cm->ndidx[r] + ((cm->ndidx[z] - cm->ndidx[r]) / 2);
951   w = cm->nodemap[midnode];
952   y = cm->cfirst[w]-1;
953 
954   /* 3. Calculate inside up to w, and outside down to y.
955    *    We rely on a side effect of how deallocation works
956    *    in these routines; the w..y decks are guaranteed
957    *    to be retained.
958    *    b will contain the optimal 0->v state for a local begin, and bsc
959    *    is the score for using it.
960    *    beta[cm->M] will contain the EL deck, if needed for local ends.
961    */
962   inside(cm, dsq, L, w, z, i0, j0, BE_EFFICIENT,
963 	 NULL, &alpha, NULL, &pool, NULL,
964 	 (r==0), &b, &bsc);
965   outside(cm, dsq, L, r, y, i0, j0, BE_EFFICIENT, NULL, &beta, pool, NULL);
966 
967   /* 4. Find the optimal split at the split set: best_v, best_d, best_j
968    */
969   W = j0-i0+1;
970   best_sc = IMPOSSIBLE;
971   for (v = w; v <= y; v++)
972     for (jp = 0; jp <= W; jp++)
973       {
974 	j = i0-1+jp;
975 	for (d = 0; d <= jp; d++)
976 	  if ((sc = alpha[v][j][d] + beta[v][j][d]) > best_sc)
977 	    {
978 	      best_sc = sc;
979 	      best_v  = v;
980 	      best_d  = d;
981 	      best_j  = j;
982 	    }
983       }
984 
985   /* Local alignment ends only: maybe we're better off in EL,
986    * not in the split set?
987    */
988   if (cm->flags & CMH_LOCAL_END) {
989     for (jp = 0; jp <= W; jp++)
990       {
991 	j = i0-1+jp;
992 	for (d = 0; d <= jp; d++)
993 	  if ((sc = beta[cm->M][j][d]) > best_sc) {
994 	    best_sc = sc;
995 	    best_v  = -1;	/* flag for local alignment. */
996 	    best_j  = j;
997 	    best_d  = d;
998 	  }
999       }
1000   }
1001 
1002   /* Local alignment begins only: maybe we're better off in the root.
1003    */
1004   if (r==0 && (cm->flags & CMH_LOCAL_BEGIN)) {
1005     if (bsc > best_sc) {
1006       best_sc = bsc;
1007       best_v  = -2;		/* flag for local alignment */
1008       best_j  = j0;
1009       best_d  = W;
1010     }
1011   }
1012 
1013   /* free now, before recursing!
1014    */
1015   free_vjd_matrix(alpha, cm->M, i0, j0);
1016   free_vjd_matrix(beta,  cm->M, i0, j0);
1017 
1018   /* If we're in EL, instead of the split set, the optimal alignment
1019    * is entirely in a V problem that's still above us. The TRUE
1020    * flag sets useEL. It doesn't matter which state in the split
1021    * set w..y we use as the end of the graph; vinside() will have to
1022    * initialize the whole thing to IMPOSSIBLE anyway.
1023    */
1024   if (best_v == -1) {
1025     v_splitter(cm, dsq, L, tr, r, w, i0, best_j-best_d+1, best_j, j0, TRUE);
1026     return best_sc;
1027   }
1028 
1029   /* If we're in the root because of a local begin, the local alignment
1030    * is entirely in a wedge problem that's still below us, rooted at b.
1031    * The FALSE flag prohibits any more local begins in this and subsequent
1032    * problems.
1033    */
1034   if (best_v == -2) {
1035     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b);
1036     wedge_splitter(cm, dsq, L, tr, b, z, i0, j0);
1037     return best_sc;
1038   }
1039 
1040   /* Else (usual case): the optimal split into a V problem and a wedge problem:
1041    *    i1 = best_j-best_d+1, j1 = best_j
1042    *    the V problem:     r..v, i0..i1, j1..j0
1043    *    the wedge problem: v..z, i1..j1
1044    *
1045    *    These have to solved in the order given because we're
1046    *    constructing the trace in postorder traversal.
1047    */
1048   ESL_DPRINTF2(("#DEBUG: Wedge splitter:\n"));
1049   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
1050 		r, UniqueStatetype(cm->stid[r]),
1051 		best_v, UniqueStatetype(cm->stid[best_v]),
1052 		i0, best_j-best_d+1, best_j, j0));
1053   ESL_DPRINTF2(("#DEBUG:    wedge:   G%d[%s]..%d[%s], %d..%d\n",
1054 		best_v, UniqueStatetype(cm->stid[best_v]),
1055 		z, UniqueStatetype(cm->stid[z]),
1056 		best_j-best_d+1, best_j));
1057 
1058   v_splitter(cm, dsq, L, tr, r, best_v, i0, best_j-best_d+1, best_j, j0, FALSE);
1059   wedge_splitter(cm, dsq, L, tr, best_v, z, best_j-best_d+1, best_j);
1060   return best_sc;
1061 }
1062 
1063 /* Function: v_splitter()
1064  * Date:     SRE, Thu May 31 19:47:57 2001 [Kaldi's]
1065  *
1066  * Purpose:  Solve a "V problem": best parse of an unbifurcated
1067  *           subgraph cm^r..z to a one-hole subsequence
1068  *           i0..i1 // j1..j0.
1069  *
1070  *           Attaches the optimal trace T(r..z), exclusive of
1071  *           r, inclusive of z, to the growing trace tr.
1072  *
1073  *           r and z can be any non-insert state.
1074  *
1075  * Args:     cm          -  model
1076  *           sq          - digitized sequence 1..L
1077  *           tr          - the traceback we're adding on to.
1078  *           r           - index of the first state in the subgraph
1079  *           z           - index of the last state in the subgraph
1080  *           i0,i1       - first part of the subsequence (1..L)
1081  *           j1,j0       - second part of the subsequence (1..L)
1082  *           useEL       - TRUE if i1,j1 aligned to EL, not z
1083  *
1084  * Returns:  (void)
1085  */
1086 static void
v_splitter(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int i1,int j1,int j0,int useEL)1087 v_splitter(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
1088 	   int r, int z, int i0, int i1, int j1, int j0,
1089 	   int useEL)
1090 {
1091   float ***alpha, ***beta;      /* inside and outside matrices */
1092   struct deckpool_s *pool;      /* pool for holding alloced decks */
1093   float sc;			/* tmp variable holding a score */
1094   int   v,w,y;			/* state indexes */
1095   int   ip,jp;
1096   int   best_v;
1097   int   best_i, best_j;		/* optimal i', j' split point */
1098   float best_sc;		/* score at optimal split point */
1099   int   midnode;
1100   int   b;			/* optimal choice for a 0->b local begin  */
1101   float bsc;			/* score if we use the local begin */
1102 
1103   /* 1. If the V problem is either a boundary condition, or small
1104    *    enough, solve it with v_inside^T and append the trace to tr.
1105    *    (With local alignment, we might even see a lone B state
1106    *     get handed to v_splitter(); hence the r==z case.)
1107    */
1108    if (cm->ndidx[z] == cm->ndidx[r] + 1 || r == z ||
1109       vinsideT_size(cm, r, z, i0, i1, j1, j0) < RAMLIMIT)
1110     {
1111       ESL_DPRINTF2(("#DEBUG: Solving a V:   G%d[%s]..%d[%s], %d..%d//%d..%d\n",
1112 		r, UniqueStatetype(cm->stid[r]),
1113 		z, UniqueStatetype(cm->stid[z]),
1114 		i0,j1,j1,j0));
1115       vinsideT(cm, dsq, L, tr, r, z, i0, i1, j1, j0, useEL, (r==0),
1116 		NULL, NULL); /* two NULLs mean 'don't use bands' */
1117       return;
1118     }
1119 
1120   /* 2. Find our split set, w..y.
1121    *    Choose the node in the middle.
1122    */
1123   midnode = cm->ndidx[r] + ((cm->ndidx[z] - cm->ndidx[r]) / 2);
1124   w = cm->nodemap[midnode];
1125   y = cm->cfirst[w]-1;
1126 
1127   /* 3. Calculate v_inside up to w, and v_outside down to y.
1128    *    As with wedge_splitter(), we rely on a side effect of how
1129    *    deallocation works, so the w..y decks are retained
1130    *    in alpha and beta even though we're in small memory mode.
1131    *    beta[cm->M] is the EL deck, needed for local ends.
1132    */
1133   vinside (cm, dsq, L, w, z, i0, i1, j1, j0, useEL, BE_EFFICIENT,
1134 	   NULL, &alpha, NULL, &pool, NULL, (r==0), &b, &bsc);
1135   voutside(cm, dsq, L, r, y, i0, i1, j1, j0, useEL, BE_EFFICIENT,
1136 	   NULL, &beta,  pool, NULL);
1137 
1138   /* 4. Find the optimal split: v, ip, jp.
1139    */
1140   best_sc = IMPOSSIBLE;
1141   for (v = w; v <= y; v++)
1142     for (ip = 0; ip <= i1-i0; ip++)
1143       for (jp = 0; jp <= j0-j1; jp++)
1144 	if ((sc = alpha[v][jp][ip] + beta[v][jp][ip]) > best_sc)
1145 	  {
1146 	    best_sc = sc;
1147 	    best_v  = v;
1148 	    best_i  = ip + i0;
1149 	    best_j  = jp + j1;
1150 	  }
1151 
1152   /* Local alignment ends: maybe we're better off in EL, not
1153    * the split set?
1154    */
1155   if (useEL && (cm->flags & CMH_LOCAL_END)) {
1156     for (ip = 0; ip <= i1-i0; ip++)
1157       for (jp = 0; jp <= j0-j1; jp++)
1158 	if ((sc = beta[cm->M][jp][ip]) > best_sc) {
1159 	  best_sc = sc;
1160 	  best_v  = -1;
1161 	  best_i  = ip + i0;
1162 	  best_j  = jp + j1;
1163 	}
1164   }
1165 
1166   /* Local alignment begins: maybe we're better off in root...
1167    */
1168   if (r==0 && (cm->flags & CMH_LOCAL_BEGIN)) {
1169     if (bsc > best_sc) {
1170       best_sc = bsc;
1171       best_v  = -2;
1172       best_i  = i0;
1173       best_j  = j0;
1174     }
1175   }
1176 
1177   /* Free now, before recursing!
1178    */
1179   free_vji_matrix(alpha, cm->M, j1, j0);
1180   free_vji_matrix(beta,  cm->M, j1, j0);
1181 
1182   /* If we're in EL, instead of the split set, the optimal
1183    * alignment is entirely in a V problem that's still above us.
1184    * The TRUE flag sets useEL; we propagate allow_begin.
1185    */
1186   if (best_v == -1) {
1187     v_splitter(cm, dsq, L, tr, r, w, i0, best_i, best_j, j0, TRUE);
1188     return;
1189   }
1190 
1191   /* If we used a local begin, the optimal alignment is
1192    * entirely in a V problem that's still below us, rooted
1193    * at b, for the entire one-hole sequence. The FALSE
1194    * flag prohibits more local begin transitions; we propagate
1195    * useEL.
1196    */
1197   if (best_v == -2) {
1198     if (b != z)
1199       {
1200 	InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b);
1201       }
1202     v_splitter(cm, dsq, L, tr, b, z, i0, i1, j1, j0, useEL);
1203     return;
1204   }
1205 
1206   /* The optimal split into two V problems:
1207    *    V:   r..v, i0..i', j'..j0
1208    *    V:   v..z, i'..i1, j1..j'
1209    * Solve in this order, because we're constructing the
1210    * trace in postorder traversal.
1211    */
1212   ESL_DPRINTF2(("#DEBUG: V splitter:\n"));
1213   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
1214 		r, UniqueStatetype(cm->stid[r]),
1215 		best_v, UniqueStatetype(cm->stid[best_v]),
1216 		i0, best_i, best_j, j0));
1217   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
1218 		best_v, UniqueStatetype(cm->stid[best_v]),
1219 		z, UniqueStatetype(cm->stid[z]),
1220 		best_i, i1, j1, best_j));
1221 
1222   v_splitter(cm, dsq, L, tr, r,      best_v, i0,     best_i, best_j, j0, FALSE);
1223   v_splitter(cm, dsq, L, tr, best_v, z,      best_i, i1,     j1,     best_j, useEL);
1224   return;
1225 }
1226 
1227 
1228 /*****************************************************************
1229  * The alignment engines:
1230  *     inside   - given generic or wedge problem G^r_z to i0..j0, return score and matrix
1231  *     outside  - given unbifurcated G^r_z to i0..j0, return matrix
1232  *
1233  *     vinside  - given V problem G^r_z to i0..i1//j1..j0, return score and matrix
1234  *     voutside - given unbifurcated G^r_z to i0..i1//j1..j0, return matrix
1235  ******************************************************************/
1236 
1237 /* Function: inside()
1238  * Date:     SRE, Mon Aug  7 13:15:37 2000 [St. Louis]
1239  *
1240  * Purpose:  Run the inside phase of a CYK alignment algorithm, on a
1241  *           subsequence from i0..j0, using a subtree of a model
1242  *           anchored at a start state vroot, and ending at an end
1243  *           state vend. (It is a feature of the model layout in
1244  *           a CM structure that all subtrees are contiguous in the
1245  *           model.)
1246  *
1247  *           A note on the loop conventions. We're going to keep the
1248  *           sequence (sq) and the matrix (alpha) in the full coordinate
1249  *           system: [0..v..M-1][0..j..L][0..d..j]. However, we're
1250  *           only calculating a part of that matrix: only vroot..vend
1251  *           in the decks, i0-1..j in the rows, and up to j0-i0+1 in
1252  *           the columns (d dimension). Where this is handled the most
1253  *           is in two variables: W, which is the length of the subsequence
1254  *           (j0-i0+1), and is oft used in place of L in the usual CYK;
1255  *           and jp (read: j'), which is the *relative* j w.r.t. the
1256  *           subsequence, ranging from 0..W, and then d ranges from
1257  *           0 to jp, and j is calculated from jp (i0-1+jp).
1258  *
1259  *           The caller is allowed to provide us with a preexisting
1260  *           matrix and/or deckpool (thru "alpha" and "dpool"), or
1261  *           have them newly created by passing NULL. If we pass in an
1262  *           alpha, we expect that alpha[vroot..vend] are all NULL
1263  *           decks already; any other decks <vroot and >vend will
1264  *           be preserved. If we pass in a dpool, the decks *must* be
1265  *           sized for the same subsequence i0,j0.
1266  *
1267  *           Note that the (alpha, ret_alpha) calling idiom allows the
1268  *           caller to provide an existing matrix or not, and to
1269  *           retrieve the calculated matrix or not, in any combination.
1270  *
1271  *           We also deal with local begins, by keeping track of the optimal
1272  *           state that we could enter and account for the whole target
1273  *           sequence: b = argmax_v  alpha_v(i0,j0) + log t_0(v),
1274  *           and bsc is the score for that.
1275  *
1276  *           If vroot==0, i0==1, and j0==L (e.g. a complete alignment),
1277  *           the optimal alignment might use a local begin transition, 0->b,
1278  *           and we'd have to be able to trace that back. For any
1279  *           problem where the caller sets allow_begin, we return a valid b
1280  *           (the optimal 0->b choice) and bsc (the score if 0->b is used).
1281  *           If a local begin is part of the optimal parse tree, the optimal
1282  *           alignment score returned by inside() will be bsc and yshad[0][L][L]
1283  *           will be USE_LOCAL_BEGIN, telling insideT() to check b and
1284  *           start with a local 0->b entry transition. When inside()
1285  *           is called on smaller subproblems (v != 0 || i0 > 1 || j0
1286  *           < L), we're using inside() as an engine in divide &
1287  *           conquer, and we don't use the overall return score nor
1288  *           shadow matrices, but we do need allow_begin, b, and bsc for
1289  *           divide&conquer to sort out where a local begin might be used.
1290  *
1291  * Args:     cm        - the model    [0..M-1]
1292  *           sq        - the sequence [1..L]
1293  *           vroot     - first start state of subtree (0, for whole model)
1294  *           vend      - last end state of subtree (cm->M-1, for whole model)
1295  *           i0        - first position in subseq to align (1, for whole seq)
1296  *           j0        - last position in subseq to align (L, for whole seq)
1297  *           do_full   - if TRUE, we save all the decks in alpha, instead of
1298  *                       working in our default memory-efficient mode where
1299  *                       we reuse decks and only the uppermost deck (vroot) is valid
1300  *                       at the end.
1301  *           alpha     - if non-NULL, this is an existing matrix, with NULL
1302  *                       decks for vroot..vend, and we'll fill in those decks
1303  *                       appropriately instead of creating a new matrix
1304  *           ret_alpha - if non-NULL, return the matrix with one or more
1305  *                       decks available for examination (see "do_full")
1306  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
1307  *                       but usually containing one or more allocated decks sized
1308  *                       for this subsequence i0..j0.
1309  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
1310  *                       *only* be valid on exactly the same i0..j0 subseq,
1311  *                       because of the size of the subseq decks.
1312  *           ret_shadow- if non-NULL, the caller wants a shadow matrix, because
1313  *                       he intends to do a traceback.
1314  *           allow_begin- TRUE to allow 0->b local alignment begin transitions.
1315  *           ret_b     - best local begin state, or NULL if unwanted
1316  *           ret_bsc   - score for using ret_b, or NULL if unwanted
1317  *
1318  *
1319  * Returns: Score of the optimal alignment.
1320  */
1321 static float
inside(CM_t * cm,ESL_DSQ * dsq,int L,int vroot,int vend,int i0,int j0,int do_full,float *** alpha,float **** ret_alpha,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool,void **** ret_shadow,int allow_begin,int * ret_b,float * ret_bsc)1322 inside(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0, int do_full,
1323        float ***alpha, float ****ret_alpha,
1324        struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
1325        void ****ret_shadow,
1326        int allow_begin, int *ret_b, float *ret_bsc)
1327 {
1328   int      status;
1329   float  **end;         /* we re-use the end deck. */
1330   int      nends;       /* counter that tracks when we can release end deck to the pool */
1331   int     *touch;       /* keeps track of how many higher decks still need this deck */
1332   int      v,y,z;	/* indices for states  */
1333   int      j,d,i,k;	/* indices in sequence dimensions */
1334   float    sc;		/* a temporary variable holding a score */
1335   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
1336   int      W;		/* subsequence length */
1337   int      jp;		/* j': relative position in the subsequence  */
1338   void  ***shadow;      /* shadow matrix for tracebacks */
1339   int    **kshad;       /* a shadow deck for bifurcations */
1340   char   **yshad;       /* a shadow deck for every other kind of state */
1341   int      b;		/* best local begin state */
1342   float    bsc;		/* score for using the best local begin state */
1343 
1344   /* Allocations and initializations
1345    */
1346   b   = -1;
1347   bsc = IMPOSSIBLE;
1348   W   = j0-i0+1;		/* the length of the subsequence -- used in many loops  */
1349 				/* if caller didn't give us a deck pool, make one */
1350   if (dpool == NULL) dpool = deckpool_create();
1351   if (! deckpool_pop(dpool, &end))
1352     end = alloc_vjd_deck(L, i0, j0);
1353   nends = CMSubtreeCountStatetype(cm, vroot, E_st);
1354   for (jp = 0; jp <= W; jp++) {
1355     j = i0+jp-1;		/* e.g. j runs from 0..L on whole seq */
1356     end[j][0] = 0.;
1357     for (d = 1; d <= jp; d++) end[j][d] = IMPOSSIBLE;
1358   }
1359 
1360   /* if caller didn't give us a matrix, make one.
1361    * It's important to allocate for M+1 decks (deck M is for EL, local
1362    * alignment) - even though Inside doesn't need EL, Outside does,
1363    * and we might reuse this memory in a call to Outside.
1364    */
1365   if (alpha == NULL) {
1366     ESL_ALLOC(alpha, sizeof(float **) * (cm->M+1));
1367     for (v = 0; v <= cm->M; v++) alpha[v] = NULL;
1368   }
1369 
1370   ESL_ALLOC(touch, sizeof(int) * (cm->M+1));
1371   for (v = 0;     v < vroot; v++) touch[v] = 0;
1372   for (v = vroot; v <= vend; v++) touch[v] = cm->pnum[v];
1373   for (v = vend+1;v < cm->M; v++) touch[v] = 0;
1374 
1375   /* The shadow matrix, if caller wants a traceback.
1376    * We do some pointer tricks here to save memory. The shadow matrix
1377    * is a void ***. Decks may either be char ** (usually) or
1378    * int ** (for bifurcation decks). Watch out for the casts.
1379    * For most states we only need
1380    * to keep y as traceback info, and y <= 6. For bifurcations,
1381    * we need to keep k, and k <= L, and L might be fairly big.
1382    * (We could probably limit k to an unsigned short ... anyone
1383    * aligning an RNA > 65536 would need a big computer... but
1384    * we'll hold off on that for now. We could also pack more
1385    * traceback pointers into a smaller space since we only really
1386    * need 3 bits, not 8.)
1387    */
1388   if (ret_shadow != NULL) {
1389     ESL_ALLOC(shadow, sizeof(void **) * cm->M);
1390     for (v = 0; v < cm->M; v++) shadow[v] = NULL;
1391   }
1392 
1393   /* Main recursion
1394    */
1395   for (v = vend; v >= vroot; v--)
1396     {
1397       /* First we need a deck to fill in.
1398        * 1. if we're an E, reuse the end deck (and it's already calculated)
1399        * 2. else, see if we can take something from the pool
1400        * 3. else, allocate a new deck.
1401        */
1402       if (cm->sttype[v] == E_st) {
1403 	alpha[v] = end; continue;
1404       }
1405       if (! deckpool_pop(dpool, &(alpha[v])))
1406 	alpha[v] = alloc_vjd_deck(L, i0, j0);
1407 
1408       if (ret_shadow != NULL) {
1409 	if (cm->sttype[v] == B_st) {
1410 	  kshad     = alloc_vjd_kshadow_deck(L, i0, j0);
1411 	  shadow[v] = (void **) kshad;
1412 	} else {
1413 	  yshad     = alloc_vjd_yshadow_deck(L, i0, j0);
1414 	  shadow[v] = (void **) yshad;
1415 	}
1416       }
1417 
1418       if (cm->sttype[v] == D_st || cm->sttype[v] == S_st)
1419 	{
1420 	  for (jp = 0; jp <= W; jp++) {
1421 	    j = i0-1+jp;
1422 	    for (d = 0; d <= jp; d++)
1423 	      {
1424 		y = cm->cfirst[v];
1425 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
1426 		/* treat EL as emitting only on self transition */
1427 		if (ret_shadow != NULL) yshad[j][d]  = USED_EL;
1428 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
1429 		  if ((sc = alpha[y+yoffset][j][d] + cm->tsc[v][yoffset]) >  alpha[v][j][d]) {
1430 		    alpha[v][j][d] = sc;
1431 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
1432 		  }
1433 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
1434 	      }
1435 //printf("j%2d v%2d ",j,v);
1436 //for (d = 0; d <= W && d <= j; d++) { printf("%10.2e ",alpha[v][j][d]); }
1437 //printf("\n");
1438 	  }
1439 	}
1440       else if (cm->sttype[v] == B_st)
1441 	{
1442 	  for (jp = 0; jp <= W; jp++) {
1443 	    j = i0-1+jp;
1444 	    for (d = 0; d <= jp; d++)
1445 	      {
1446 		y = cm->cfirst[v];
1447 		z = cm->cnum[v];
1448 
1449 		alpha[v][j][d] = alpha[y][j][d] + alpha[z][j][0];
1450 		if (ret_shadow != NULL) kshad[j][d] = 0;
1451 		for (k = 1; k <= d; k++)
1452 		  if ((sc = alpha[y][j-k][d-k] + alpha[z][j][k]) > alpha[v][j][d]) {
1453 		    alpha[v][j][d] = sc;
1454 		    if (ret_shadow != NULL) kshad[j][d] = k;
1455 		  }
1456 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
1457 	      }
1458 //printf("j%2d v%2d ",j,v);
1459 //for (d = 0; d <= W && d <= j; d++) { printf("%10.2e ",alpha[v][j][d]); }
1460 //printf("\n");
1461 	  }
1462 	}
1463       else if (cm->sttype[v] == MP_st)
1464 	{
1465 	  for (jp = 0; jp <= W; jp++) {
1466 	    j = i0-1+jp;
1467 	    alpha[v][j][0] = IMPOSSIBLE;
1468 	    if (jp > 0) alpha[v][j][1] = IMPOSSIBLE;
1469 	    for (d = 2; d <= jp; d++)
1470 	      {
1471 		y = cm->cfirst[v];
1472 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
1473 		/* treat EL as emitting only on self transition */
1474 		if (ret_shadow != NULL) yshad[j][d] = USED_EL;
1475 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
1476 		  if ((sc = alpha[y+yoffset][j-1][d-2] + cm->tsc[v][yoffset]) >  alpha[v][j][d]) {
1477 		    alpha[v][j][d] = sc;
1478 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
1479 		  }
1480 
1481 		i = j-d+1;
1482 		if (dsq[i] < cm->abc->K && dsq[j] < cm->abc->K)
1483 		  alpha[v][j][d] += cm->esc[v][(int) (dsq[i]*cm->abc->K+dsq[j])];
1484 		else
1485 		  alpha[v][j][d] += DegeneratePairScore(cm->abc, cm->esc[v], dsq[i], dsq[j]);
1486 
1487 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
1488 	      }
1489 //printf("j%2d v%2d ",j,v);
1490 //for (d = 0; d <= W && d <= j; d++) { printf("%10.2e ",alpha[v][j][d]); }
1491 //printf("\n");
1492 	  }
1493 	}
1494       else if (cm->sttype[v] == IL_st || cm->sttype[v] == ML_st)
1495 	{
1496 	  for (jp = 0; jp <= W; jp++) {
1497 	    j = i0-1+jp;
1498 	    alpha[v][j][0] = IMPOSSIBLE;
1499 	    for (d = 1; d <= jp; d++)
1500 	      {
1501 		y = cm->cfirst[v];
1502 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
1503 		/* treat EL as emitting only on self transition */
1504 		if (ret_shadow != NULL) yshad[j][d] = USED_EL;
1505 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
1506 		  if ((sc = alpha[y+yoffset][j][d-1] + cm->tsc[v][yoffset]) >  alpha[v][j][d]) {
1507 		    alpha[v][j][d] = sc;
1508 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
1509 		  }
1510 
1511 		i = j-d+1;
1512 		if (dsq[i] < cm->abc->K)
1513 		  alpha[v][j][d] += cm->esc[v][dsq[i]];
1514 		else
1515 		  alpha[v][j][d] += esl_abc_FAvgScore(cm->abc, dsq[i], cm->esc[v]);
1516 
1517 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
1518 	      }
1519 //printf("j%2d v%2d ",j,v);
1520 //for (d = 0; d <= W && d <= j; d++) { printf("%10.2e ",alpha[v][j][d]); }
1521 //printf("\n");
1522 	  }
1523 	}
1524       else if (cm->sttype[v] == IR_st || cm->sttype[v] == MR_st)
1525 	{
1526 	  for (jp = 0; jp <= W; jp++) {
1527 	    j = i0-1+jp;
1528 	    alpha[v][j][0] = IMPOSSIBLE;
1529 	    for (d = 1; d <= jp; d++)
1530 	      {
1531 		y = cm->cfirst[v];
1532 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
1533 		/* treat EL as emitting only on self transition */
1534 		if (ret_shadow != NULL) yshad[j][d] = USED_EL;
1535 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
1536 		  if ((sc = alpha[y+yoffset][j-1][d-1] + cm->tsc[v][yoffset]) > alpha[v][j][d]) {
1537 		    alpha[v][j][d] = sc;
1538 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
1539 		  }
1540 		if (dsq[j] < cm->abc->K)
1541 		  alpha[v][j][d] += cm->esc[v][dsq[j]];
1542 		else
1543 		  alpha[v][j][d] += esl_abc_FAvgScore(cm->abc, dsq[j], cm->esc[v]);
1544 
1545 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
1546 	      }
1547 //printf("j%2d v%2d ",j,v);
1548 //for (d = 0; d <= W && d <= j; d++) { printf("%10.2e ",alpha[v][j][d]); }
1549 //printf("\n");
1550 	  }
1551 	}				/* finished calculating deck v. */
1552 
1553       /* Check for local begin getting us to the root.
1554        * This is "off-shadow": if/when we trace back, we'll handle this
1555        * case separately (and we'll know to do it because we'll immediately
1556        * see a USED_LOCAL_BEGIN flag in the shadow matrix, telling us
1557        * to jump right to state b; see below)
1558        */
1559       if (allow_begin && alpha[v][j0][W] + cm->beginsc[v] > bsc)
1560 	{
1561 	  b   = v;
1562 	  bsc = alpha[v][j0][W] + cm->beginsc[v];
1563 	}
1564 
1565       /* Check for whether we need to store an optimal local begin score
1566        * as the optimal overall score, and if we need to put a flag
1567        * in the shadow matrix telling insideT() to use the b we return.
1568        */
1569       if (allow_begin && v == 0 && bsc > alpha[0][j0][W]) {
1570 	alpha[0][j0][W] = bsc;
1571 	if (ret_shadow != NULL) yshad[j0][W] = USED_LOCAL_BEGIN;
1572       }
1573 
1574       /* Now, if we're trying to reuse memory in our normal mode (e.g. ! do_full):
1575        * Look at our children; if they're fully released, take their deck
1576        * into the pool for reuse.
1577        */
1578       if (! do_full) {
1579 	if (cm->sttype[v] == B_st)
1580 	  { /* we can definitely release the S children of a bifurc. */
1581 	    y = cm->cfirst[v]; deckpool_push(dpool, alpha[y]); alpha[y] = NULL;
1582 	    z = cm->cnum[v];   deckpool_push(dpool, alpha[z]); alpha[z] = NULL;
1583 	  }
1584 	else
1585 	  {
1586 	    for (y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++)
1587 	      {
1588 		touch[y]--;
1589 		if (touch[y] == 0)
1590 		  {
1591 		    if (cm->sttype[y] == E_st) {
1592 		      nends--;
1593 		      if (nends == 0) { deckpool_push(dpool, end); end = NULL;}
1594 		    } else
1595 		      deckpool_push(dpool, alpha[y]);
1596 		    alpha[y] = NULL;
1597 		  }
1598 	      }
1599 	  }
1600       }
1601   } /* end loop over all v */
1602 
1603   /* debug_print_alpha(alpha, cm, L);*/
1604 
1605   /* Now we free our memory.
1606    * if we've got do_full set, all decks vroot..vend are now valid (end is shared).
1607    * else, only vroot deck is valid now and all others vroot+1..vend are NULL,
1608    * and end is NULL.
1609    * We could check this status to be sure (and we used to) but now we trust.
1610    */
1611   sc       = alpha[vroot][j0][W];
1612   if (ret_b != NULL)   *ret_b   = b;    /* b is -1 if allow_begin is FALSE. */
1613   if (ret_bsc != NULL) *ret_bsc = bsc;  /* bsc is IMPOSSIBLE if allow_begin is FALSE */
1614 
1615   /* If the caller doesn't want the matrix, free it (saving the decks in the pool!)
1616    * Else, pass it back to him.
1617    */
1618   if (ret_alpha == NULL) {
1619     for (v = vroot; v <= vend; v++) /* be careful of our reuse of the end deck -- free it only once */
1620       if (alpha[v] != NULL) {
1621 	if (cm->sttype[v] != E_st) { deckpool_push(dpool, alpha[v]); alpha[v] = NULL; }
1622 	else end = alpha[v];
1623       }
1624     if (end != NULL) { deckpool_push(dpool, end); end = NULL; }
1625     free(alpha);
1626   } else *ret_alpha = alpha;
1627 
1628   /* If the caller doesn't want the deck pool, free it.
1629    * Else, pass it back to him.
1630    */
1631   if (ret_dpool == NULL) {
1632     while (deckpool_pop(dpool, &end)) free_vjd_deck(end, i0, j0);
1633     deckpool_free(dpool);
1634   } else {
1635     *ret_dpool = dpool;
1636   }
1637 
1638   free(touch);
1639   if (ret_shadow != NULL) *ret_shadow = shadow;
1640   return sc;
1641 
1642  ERROR:
1643   cm_Fail("Memory allocation error.\n");
1644   return 0.; /* never reached */
1645 }
1646 
1647 
1648 /* Function: outside()
1649  * Date:     SRE, Tue Aug  8 10:42:52 2000 [St. Louis]
1650  *
1651  * Purpose:  Run the outside version of a CYK alignment algorithm,
1652  *           on a subsequence i0..j0 of a digitized sequence sq [1..L],
1653  *           using a linear segment of a model anchored at a start state
1654  *           (possibly the absolute root, 0) or (MP,ML,MR,D) and ending at an end
1655  *           state, bifurcation state, or (MP|ML|MR|D) vend. There must be no
1656  *           start, end, or bifurcation states in the path other than
1657  *           these termini: this is not a full Outside implementation,
1658  *           it is only the bit that's necessary in the divide
1659  *           and conquer alignment algorithm.
1660  *
1661  *           Much of the behavior in calling conventions, etc., is
1662  *           analogous to the cyk_inside_engine(); see its preface
1663  *           for more info.
1664  *
1665  *           At the end of the routine, the bottom deck (vend) is valid.
1666  *
1667  * Args:     cm        - the model    [0..M-1]
1668  *           dsq       - the sequence [1..L]
1669  *           vroot     - first state of linear model segment (S; MP|ML|MR|D)
1670  *           vend      - last state of linear model segment  (B; E; MP|ML|MR|D)
1671  *           i0        - first position in subseq to align (1, for whole seq)
1672  *           j0        - last position in subseq to align (L, for whole seq)
1673  *           do_full   - if TRUE, we save all the decks in beta, instead of
1674  *                       working in our default memory-efficient mode where
1675  *                       we reuse decks and only the lowermost deck (vend) is valid
1676  *                       at the end.
1677  *           beta      - if non-NULL, this is an existing matrix, with NULL
1678  *                       decks for vroot..vend, and we'll fill in those decks
1679  *                       appropriately instead of creating a new matrix
1680  *           ret_beta  - if non-NULL, return the matrix with one or more
1681  *                       decks available for examination (see "do_full")
1682  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
1683  *                       but usually containing one or more allocated decks sized
1684  *                       for this subsequence i0..j0.
1685  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
1686  *                       *only* be valid on exactly the same i0..j0 subseq,
1687  *                       because of the size of the subseq decks.
1688  */
1689 static void
outside(CM_t * cm,ESL_DSQ * dsq,int L,int vroot,int vend,int i0,int j0,int do_full,float *** beta,float **** ret_beta,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool)1690 outside(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0,
1691 	int do_full, float ***beta, float ****ret_beta,
1692 	struct deckpool_s *dpool, struct deckpool_s **ret_dpool)
1693 {
1694   int      status;
1695   int      v,y;			/* indices for states */
1696   int      j,d,i;		/* indices in sequence dimensions */
1697   float    sc;			/* a temporary variable holding a score */
1698   int     *touch;               /* keeps track of how many lower decks still need this deck */
1699   float    escore;		/* an emission score, tmp variable */
1700   int      W;			/* subsequence length */
1701   int      jp;			/* j': relative position in the subsequence, 0..W */
1702   int      voffset;		/* index of v in t_v(y) transition scores */
1703   int      w1,w2;		/* bounds of split set */
1704 
1705   /* Allocations and initializations
1706    */
1707   W = j0-i0+1;		/* the length of the subsequence: used in many loops */
1708 
1709   			/* if caller didn't give us a deck pool, make one */
1710   if (dpool == NULL) dpool = deckpool_create();
1711 
1712   /* if caller didn't give us a matrix, make one.
1713    * Allocate room for M+1 decks because we might need the EL deck (M)
1714    * if we're doing local alignment.
1715    */
1716   if (beta == NULL) {
1717     ESL_ALLOC(beta, sizeof(float **) * (cm->M+1));
1718     for (v = 0; v < cm->M+1; v++) beta[v] = NULL;
1719   }
1720 
1721   /* Initialize the root deck.
1722    * If the root is in a split set, initialize the whole split set.
1723    */
1724   w1 = cm->nodemap[cm->ndidx[vroot]]; /* first state in split set */
1725   if (cm->sttype[vroot] == B_st) {    /* special boundary case of Outside on a single B state. */
1726     w2 = w1;
1727     if (vend != vroot) cm_Fail("oh no. not again.");
1728   } else
1729     w2 = cm->cfirst[w1]-1;	      /* last state in split set w1<=vroot<=w2 */
1730 
1731   for (v = w1; v <= w2; v++) {
1732     if (! deckpool_pop(dpool, &(beta[v])))
1733       beta[v] = alloc_vjd_deck(L, i0, j0);
1734     for (jp = 0; jp <= W; jp++) {
1735       j = i0-1+jp;
1736       for (d = 0; d <= jp; d++)
1737 	beta[v][j][d] = IMPOSSIBLE;
1738     }
1739   }
1740   beta[vroot][j0][W] = 0;
1741 
1742   /* Initialize the EL deck at M, if we're doing local alignment w.r.t. ends.
1743    */
1744   if (cm->flags & CMH_LOCAL_END) {
1745     if (! deckpool_pop(dpool, &(beta[cm->M])))
1746       beta[cm->M] = alloc_vjd_deck(L, i0, j0);
1747     for (jp = 0; jp <= W; jp++) {
1748       j = i0-1+jp;
1749       for (d = 0; d <= jp; d++)
1750 	beta[cm->M][j][d] = IMPOSSIBLE;
1751     }
1752 
1753     /* We have to worry about vroot -> EL transitions.
1754      * since we start the main recursion at w2+1. This requires a
1755      * laborious partial unroll of the main recursion, grabbing
1756      * the stuff relevant to a beta[EL] calculation for just the
1757      * vroot->EL transition.
1758      */
1759     if (NOT_IMPOSSIBLE(cm->endsc[vroot])) {
1760       switch (cm->sttype[vroot]) {
1761       case MP_st:
1762 	if (W < 2) break;
1763 	if (dsq[i0] < cm->abc->K && dsq[j0] < cm->abc->K)
1764 	  escore = cm->esc[vroot][(int) (dsq[i0]*cm->abc->K+dsq[j0])];
1765 	else
1766 	  escore = DegeneratePairScore(cm->abc, cm->esc[vroot], dsq[i0], dsq[j0]);
1767 	beta[cm->M][j0-1][W-2] = cm->endsc[vroot] +
1768 	  (cm->el_selfsc * (W-2)) + escore;
1769 
1770 	if (beta[cm->M][j0-1][W-2] < IMPOSSIBLE) beta[cm->M][j0-1][W-2] = IMPOSSIBLE;
1771 	break;
1772       case ML_st:
1773       case IL_st:
1774 	if (W < 1) break;
1775 	if (dsq[i0] < cm->abc->K)
1776 	  escore = cm->esc[vroot][(int) dsq[i0]];
1777 	else
1778 	  escore = esl_abc_FAvgScore(cm->abc, dsq[i0], cm->esc[vroot]);
1779 	beta[cm->M][j0][W-1] = cm->endsc[vroot] +
1780 	  (cm->el_selfsc * (W-1)) + escore;
1781 
1782 	if (beta[cm->M][j0][W-1] < IMPOSSIBLE) beta[cm->M][j0][W-1] = IMPOSSIBLE;
1783 	break;
1784       case MR_st:
1785       case IR_st:
1786 	if (W < 1) break;
1787 	if (dsq[j0] < cm->abc->K)
1788 	  escore = cm->esc[vroot][(int) dsq[j0]];
1789 	else
1790 	  escore = esl_abc_FAvgScore(cm->abc, dsq[j0], cm->esc[vroot]);
1791 	beta[cm->M][j0-1][W-1] = cm->endsc[vroot] +
1792 	  (cm->el_selfsc * (W-1)) + escore;
1793 
1794 	if (beta[cm->M][j0-1][W-1] < IMPOSSIBLE) beta[cm->M][j0-1][W-1] = IMPOSSIBLE;
1795 	break;
1796       case S_st:
1797       case D_st:
1798 	beta[cm->M][j0][W] = cm->endsc[vroot] +
1799 	  (cm->el_selfsc * W);
1800 	if (beta[cm->M][j0][W] < IMPOSSIBLE) beta[cm->M][j0][W] = IMPOSSIBLE;
1801 	break;
1802       case B_st:		/* can't start w/ bifurcation at vroot. */
1803       default: cm_Fail("bogus parent state %d\n", cm->sttype[vroot]);
1804       }
1805     }
1806   }
1807 
1808   ESL_ALLOC(touch, sizeof(int) * cm->M);
1809   for (v = 0;      v < w1; v++) touch[v] = 0; /* note: top of split set w1, not vroot */
1810 
1811   for (v = vend+1; v < cm->M; v++) touch[v] = 0;
1812   for (v = w1; v <= vend; v++) {
1813     if (cm->sttype[v] == B_st) touch[v] = 2; /* well, we'll never use this, but set it anyway. */
1814     else                       touch[v] = cm->cnum[v];
1815   }
1816 
1817 
1818 
1819   /* Main loop down through the decks
1820    */
1821   for (v = w2+1; v <= vend; v++)
1822     {
1823       /* First we need to fetch a deck of memory to fill in;
1824        * we try to reuse a deck but if one's not available we allocate
1825        * a fresh one.
1826        */
1827       if (! deckpool_pop(dpool, &(beta[v])))
1828 	beta[v] = alloc_vjd_deck(L, i0, j0);
1829 
1830       /* Init the whole deck to IMPOSSIBLE
1831        */
1832       for (jp = W; jp >= 0; jp--) {
1833 	j = i0-1+jp;
1834 	for (d = jp; d >= 0; d--)
1835 	  beta[v][j][d] = IMPOSSIBLE;
1836       }
1837 
1838       /* If we can do a local begin into v, also init with that.
1839        * By definition, beta[0][j0][W] == 0.
1840        */
1841       if (vroot == 0 && i0 == 1 && j0 == L && (cm->flags & CMH_LOCAL_BEGIN))
1842 	beta[v][j0][W] = cm->beginsc[v];
1843 
1844       /* main recursion:
1845        */
1846       for (jp = W; jp >= 0; jp--) {
1847 	j = i0-1+jp;
1848 	for (d = jp; d >= 0; d--)
1849 	  {
1850 	    i = j-d+1;
1851 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
1852 	      if (y < vroot) continue; /* deal with split sets */
1853 	      voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
1854 
1855 	      switch(cm->sttype[y]) {
1856 	      case MP_st:
1857 		if (j == j0 || d == jp) continue; /* boundary condition */
1858 
1859 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
1860 		  escore = cm->esc[y][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
1861 		else
1862 		  escore = DegeneratePairScore(cm->abc, cm->esc[y], dsq[i-1], dsq[j+1]);
1863 
1864 		if ((sc = beta[y][j+1][d+2] + cm->tsc[y][voffset] + escore) > beta[v][j][d])
1865 		  beta[v][j][d] = sc;
1866 		break;
1867 
1868 	      case ML_st:
1869 	      case IL_st:
1870 		if (d == jp) continue;	/* boundary condition (note when j=0, d=0*/
1871 
1872 		if (dsq[i-1] < cm->abc->K)
1873 		  escore = cm->esc[y][(int) dsq[i-1]];
1874 		else
1875 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[y]);
1876 
1877 		if ((sc = beta[y][j][d+1] + cm->tsc[y][voffset] + escore) > beta[v][j][d])
1878 		  beta[v][j][d] = sc;
1879 		break;
1880 
1881 	      case MR_st:
1882 	      case IR_st:
1883 		if (j == j0) continue;
1884 
1885 		if (dsq[j+1] < cm->abc->K)
1886 		  escore = cm->esc[y][(int) dsq[j+1]];
1887 		else
1888 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[y]);
1889 
1890 		if ((sc = beta[y][j+1][d+1] + cm->tsc[y][voffset] + escore) > beta[v][j][d])
1891 		  beta[v][j][d] = sc;
1892 		break;
1893 
1894 	      case S_st:
1895 	      case E_st:
1896 	      case D_st:
1897 		if ((sc = beta[y][j][d] + cm->tsc[y][voffset]) > beta[v][j][d])
1898 		  beta[v][j][d] = sc;
1899 		break;
1900 
1901 	      default: cm_Fail("bogus child state %d\n", cm->sttype[y]);
1902 	      }/* end switch over states*/
1903 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
1904 	    if (beta[v][j][d] < IMPOSSIBLE) beta[v][j][d] = IMPOSSIBLE;
1905 
1906 
1907 	  } /* ends loop over d. We know all beta[v][j][d] in this row j*/
1908       }/* end loop over jp. We know the beta's for the whole deck.*/
1909 
1910 
1911       /* Deal with local alignment end transitions v->EL
1912        * (EL = deck at M.)
1913        */
1914       if (NOT_IMPOSSIBLE(cm->endsc[v])) {
1915 	for (jp = 0; jp <= W; jp++) {
1916 	  j = i0-1+jp;
1917 	  for (d = 0; d <= jp; d++)
1918 	    {
1919 	      i = j-d+1;
1920 	      switch (cm->sttype[v]) {
1921 	      case MP_st:
1922 		if (j == j0 || d == jp) continue; /* boundary condition */
1923 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
1924 		  escore = cm->esc[v][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
1925 		else
1926 		  escore = DegeneratePairScore(cm->abc, cm->esc[v], dsq[i-1], dsq[j+1]);
1927 		if ((sc = beta[v][j+1][d+2] + cm->endsc[v] +
1928 		     (cm->el_selfsc * d) + escore) > beta[cm->M][j][d])
1929 		  beta[cm->M][j][d] = sc;
1930 		break;
1931 	      case ML_st:
1932 	      case IL_st:
1933 		if (d == jp) continue;
1934 		if (dsq[i-1] < cm->abc->K)
1935 		  escore = cm->esc[v][(int) dsq[i-1]];
1936 		else
1937 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[v]);
1938 		if ((sc = beta[v][j][d+1] + cm->endsc[v] +
1939 		     (cm->el_selfsc * d) + escore) > beta[cm->M][j][d])
1940 		  /*(cm->el_selfsc * (d+1)) + escore) > beta[cm->M][j][d])*/
1941 		  beta[cm->M][j][d] = sc;
1942 		break;
1943 	      case MR_st:
1944 	      case IR_st:
1945 		if (j == j0) continue;
1946 		if (dsq[j+1] < cm->abc->K)
1947 		  escore = cm->esc[v][(int) dsq[j+1]];
1948 		else
1949 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[v]);
1950 		if ((sc = beta[v][j+1][d+1] + cm->endsc[v] +
1951 		     (cm->el_selfsc * d) + escore) > beta[cm->M][j][d])
1952 		     /*(cm->el_selfsc * (d+1)) + escore) > beta[cm->M][j][d])*/
1953 		  beta[cm->M][j][d] = sc;
1954 		break;
1955 	      case S_st:
1956 	      case D_st:
1957 	      case E_st:
1958 		if ((sc = beta[v][j][d] + cm->endsc[v] +
1959 		     (cm->el_selfsc * d)) > beta[cm->M][j][d])
1960 		  beta[cm->M][j][d] = sc;
1961 		break;
1962 	      case B_st:
1963 	      default: cm_Fail("bogus parent state %d\n", cm->sttype[v]);
1964 		/* note that although B is a valid vend for a segment we'd do
1965                    outside on, B->EL is set to be impossible, by the local alignment
1966                    config. There's no point in having a B->EL because B is a nonemitter
1967                    (indeed, it would introduce an alignment ambiguity). The same
1968 		   alignment case is handled by the X->EL transition where X is the
1969 		   parent consensus state (S, MP, ML, or MR) above the B. Thus,
1970 		   this code is relying on the NOT_IMPOSSIBLE() test, above,
1971 		   to make sure the sttype[vend]=B case gets into this switch.
1972 		*/
1973 	      } /* end switch over parent state type v */
1974 	    } /* end inner loop over d */
1975 	} /* end outer loop over jp */
1976       } /* end conditional section for dealing w/ v->EL local end transitions */
1977 
1978       /* Look at v's parents; if we're reusing memory (! do_full)
1979        * push the parents that we don't need any more into the pool.
1980        */
1981       if (! do_full) {
1982 	for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
1983 	  touch[y]--;
1984 	  if (touch[y] == 0) { deckpool_push(dpool, beta[y]); beta[y] = NULL; }
1985 	}
1986       }
1987     } /* end loop over decks v. */
1988 
1989 #if 0
1990   /* SRE: this code is superfluous, yes??? */
1991   /* Deal with last step needed for local alignment
1992    * w.r.t. ends: left-emitting, zero-scoring EL->EL transitions.
1993    * (EL = deck at M.)
1994    */
1995   if (cm->flags & CMH_LOCAL_END) {
1996     for (jp = W; jp > 0; jp--) { /* careful w/ boundary here */
1997       j = i0-1+jp;
1998       for (d = jp-1; d >= 0; d--) /* careful w/ boundary here */
1999 	if ((sc = beta[cm->M][j][d+1]) > beta[cm->M][j][d])
2000 	  beta[cm->M][j][d] = sc;
2001     }
2002   }
2003 #endif
2004 
2005   /* If the caller doesn't want the matrix, free it.
2006    * (though it would be *stupid* for the caller not to want the
2007    * matrix in the current implementation...)
2008    */
2009   if (ret_beta == NULL) {
2010     for (v = w1; v <= vend; v++) /* start at w1 - top of split set - not vroot */
2011       if (beta[v] != NULL) { deckpool_push(dpool, beta[v]); beta[v] = NULL; }
2012     if (cm->flags & CMH_LOCAL_END) {
2013       deckpool_push(dpool, beta[cm->M]);
2014       beta[cm->M] = NULL;
2015     }
2016     free(beta);
2017   } else *ret_beta = beta;
2018 
2019   /* If the caller doesn't want the deck pool, free it.
2020    * Else, pass it back to him.
2021    */
2022   if (ret_dpool == NULL) {
2023     float **a;
2024     while (deckpool_pop(dpool, &a)) free_vjd_deck(a, i0, j0);
2025     deckpool_free(dpool);
2026   } else {
2027     *ret_dpool = dpool;
2028   }
2029   free(touch);
2030   return;
2031 
2032  ERROR:
2033   cm_Fail("Memory allocation error.\n");
2034 }
2035 
2036 
2037 /* Function: vinside()
2038  * Date:     SRE, Sat Jun  2 09:24:51 2001 [Kaldi's]
2039  *
2040  * Purpose:  Run the inside phase of the CYK alignment algorithm for
2041  *           a V problem: an unbifurcated CM subgraph from
2042  *           r..z, aligned to a one-hole subsequence
2043  *           i0..i1 // j1..j0, exclusive of z,i1,j1.
2044  *
2045  *           This is done in the vji coord system, where
2046  *           both our j and i coordinates are transformed.
2047  *           The Platonic matrix runs [j1..j0][i0..i1].
2048  *           The actual matrix runs [0..j0-j1][0..i1-i0].
2049  *           To transform a sequence coord i to a transformed
2050  *           coord i', subtract i0; to transform i' to i,
2051  *           add i0.
2052  *
2053  *           The conventions for alpha and dpool are the
2054  *           same as cyk_inside_engine().
2055  *
2056  * Args:     cm        - the model    [0..M-1]
2057  *           dsq       - the sequence [1..L]
2058  *           L         - length of the dsq
2059  *           r         - first start state of subtree (0, for whole model)
2060  *           z         - last end state of subtree (cm->M-1, for whole model)
2061  *           i0,i1     - first subseq part of the V problem
2062  *           j1,j0     - second subseq part
2063  *           useEL     - if TRUE, V problem ends at EL/i1/j1, not z/i1/j1
2064  *           do_full   - if TRUE, we save all the decks in alpha, instead of
2065  *                       working in our default memory-efficient mode where
2066  *                       we reuse decks and only the uppermost deck (r) is valid
2067  *                       at the end.
2068  *           a         - if non-NULL, this is an existing matrix, with NULL
2069  *                       decks for r..z, and we'll fill in those decks
2070  *                       appropriately instead of creating a new matrix
2071  *           ret_a     - if non-NULL, return the matrix with one or more
2072  *                       decks available for examination (see "do_full")
2073  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
2074  *                       but usually containing one or more allocated vji decks sized
2075  *                       for this subsequence i0..i1//j0..j1.
2076  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
2077  *                       *only* be valid on exactly the same i0..i1//j0..j1 subseq
2078  *                       because of the size of the subseq decks.
2079  *           ret_shadow- if non-NULL, the caller wants a shadow matrix, because
2080  *                       he intends to do a traceback.
2081  *           allow_begin- TRUE to allow 0->b local alignment begin transitions.
2082  *           ret_b     - best local begin state, or NULL if unwanted
2083  *           ret_bsc   - score for using ret_b, or NULL if unwanted
2084 
2085  *
2086  * Returns:  score.
2087  */
2088 static float
vinside(CM_t * cm,ESL_DSQ * dsq,int L,int r,int z,int i0,int i1,int j1,int j0,int useEL,int do_full,float *** a,float **** ret_a,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool,char **** ret_shadow,int allow_begin,int * ret_b,float * ret_bsc)2089 vinside(CM_t *cm, ESL_DSQ *dsq, int L,
2090 	int r, int z, int i0, int i1, int j1, int j0, int useEL,
2091 	int do_full, float ***a, float ****ret_a,
2092 	struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
2093 	char ****ret_shadow,
2094 	int allow_begin, int *ret_b, float *ret_bsc)
2095 {
2096   int      status;
2097   char  ***shadow;              /* the shadow matrix -- traceback ptrs -- memory is kept */
2098   int     v,i,j;
2099   int     w1,w2;		/* bounds of the split set */
2100   int     jp, ip;		/* j' and i' -- in the matrix coords */
2101   int    *touch;                /* keeps track of whether we can free a deck yet or not */
2102   int     y, yoffset;
2103   float   sc;			/* tmp variable holding a score */
2104   int      b;			/* best local begin state */
2105   float    bsc;			/* score for using the best local begin state */
2106 
2107   /*printf("***in vinside()****\n");
2108     printf("\tr  : %d\n", r);
2109     printf("\tz  : %d\n", z);
2110     printf("\ti0 : %d\n", i0);
2111     printf("\ti1 : %d\n", i1);
2112     printf("\tj1 : %d\n", j1);
2113     printf("\tj0 : %d\n", j0);
2114   */
2115 
2116   /* Allocations, initializations.
2117    * Remember to allocate for M+1 decks, in case we reuse this
2118    * memory for a local alignment voutside() calculation.
2119    */
2120   b   = -1;
2121   bsc = IMPOSSIBLE;
2122   if (dpool == NULL) dpool = deckpool_create();
2123   if (a == NULL) {
2124     ESL_ALLOC(a, sizeof(float **) * (cm->M+1));
2125     for (v = 0; v <= cm->M; v++) a[v] = NULL;
2126   }
2127 				/* the whole split set w<=z<=y must be initialized */
2128   w1 = cm->nodemap[cm->ndidx[z]];
2129   w2 = cm->cfirst[w1]-1;
2130   for (v = w1; v <= w2; v++) {
2131     if (! deckpool_pop(dpool, &(a[v])))
2132       a[v] = alloc_vji_deck(i0, i1, j1, j0);
2133     for (jp = 0; jp <= j0-j1; jp++)
2134       for (ip = 0; ip <= i1-i0; ip++)
2135 	a[v][jp][ip] = IMPOSSIBLE;
2136   }
2137 
2138   if (ret_shadow != NULL) {
2139     ESL_ALLOC(shadow, sizeof(char **) * cm->M);
2140     for (v = 0; v < cm->M; v++) shadow[v] = NULL;
2141   }
2142 
2143   /* Initialize the one non-IMPOSSIBLE cell as a boundary
2144    * condition.
2145    * If local alignment (useEL=1), we must connect z to EL;
2146    * we would init a[EL][0][i1-i0] = 0. But, we're not explicitly
2147    * keeping an EL deck, we're swallowing it into the recursion.
2148    * So, we unroll a chunk of the main recursion;
2149    * we have to laboriously figure out from the statetype z
2150    * and our position where and what our initialization is.
2151    * Else, for global alignments, we simply connect to z,0,i1-i0.
2152    */
2153   ip = i1-i0;
2154   jp = 0;
2155   if (! useEL)
2156     a[z][jp][ip] = 0.;
2157   else
2158     {
2159       if (ret_shadow != NULL)
2160 	shadow[z] = alloc_vji_shadow_deck(i0,i1,j1,j0);
2161 
2162       switch (cm->sttype[z]) {
2163       case D_st:
2164       case S_st:
2165 	/*a[z][jp][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
2166 	a[z][jp][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
2167 	if (ret_shadow != NULL) shadow[z][jp][ip] = USED_EL;
2168 	break;
2169       case MP_st:
2170 	if (i0 == i1 || j1 == j0) break;
2171 	/*a[z][jp+1][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
2172 	a[z][jp+1][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
2173 	if (dsq[i1-1] < cm->abc->K && dsq[j1+1] < cm->abc->K)
2174 	  a[z][jp+1][ip-1] += cm->esc[z][(int) (dsq[i1-1]*cm->abc->K+dsq[j1+1])];
2175 	else
2176 	  a[z][jp+1][ip-1] += DegeneratePairScore(cm->abc, cm->esc[z], dsq[i1-1], dsq[j1+1]);
2177 	if (ret_shadow != NULL) shadow[z][jp+1][ip-1] = USED_EL;
2178 	if (a[z][jp+1][ip-1] < IMPOSSIBLE) a[z][jp+1][ip-1] = IMPOSSIBLE;
2179 	break;
2180       case ML_st:
2181       case IL_st:
2182 	if (i0==i1) break;
2183 	/*a[z][jp][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
2184 	a[z][jp][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
2185 	if (dsq[i1-1] < cm->abc->K)
2186 	  a[z][jp][ip-1] += cm->esc[z][(int) dsq[i1-1]];
2187 	else
2188 	  a[z][jp][ip-1] += esl_abc_FAvgScore(cm->abc, dsq[i1-1], cm->esc[z]);
2189 	if (ret_shadow != NULL) shadow[z][jp][ip-1] = USED_EL;
2190 	if (a[z][jp][ip-1] < IMPOSSIBLE) a[z][jp][ip-1] = IMPOSSIBLE;
2191 	break;
2192       case MR_st:
2193       case IR_st:
2194 	if (j1==j0) break;
2195 	/*a[z][jp+1][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
2196 	a[z][jp+1][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
2197 	if (dsq[j1+1] < cm->abc->K)
2198 	  a[z][jp+1][ip] += cm->esc[z][(int) dsq[j1+1]];
2199 	else
2200 	  a[z][jp+1][ip] += esl_abc_FAvgScore(cm->abc, dsq[j1+1], cm->esc[z]);
2201 	if (ret_shadow != NULL) shadow[z][jp+1][ip] = USED_EL;
2202 	if (a[z][jp+1][ip] < IMPOSSIBLE) a[z][jp+1][ip] = IMPOSSIBLE;
2203 	break;
2204       }
2205 
2206     } /* done initializing the appropriate cell for useEL=TRUE */
2207 
2208   ESL_ALLOC(touch, sizeof(int) * cm->M);
2209   for (v = 0;   v < r;  v++) touch[v] = 0;
2210   for (v = r;   v <= w2; v++) touch[v] = cm->pnum[v]; /* note w2 not z: to bottom of split set */
2211   for (v = w2+1; v < cm->M; v++) touch[v] = 0;
2212 
2213   /* A special case. If vinside() is called on empty sequences,
2214    * we might do a begin transition right into z.
2215    */
2216   if (allow_begin && j0-j1 == 0 && i1-i0 == 0)
2217     {
2218       b   = z;
2219       bsc = a[z][0][0] + cm->beginsc[z];
2220       if (z == 0) {
2221 	a[0][0][0] = bsc;
2222 	if (ret_shadow != NULL) shadow[0][0][0] = USED_LOCAL_BEGIN;
2223       }
2224     }
2225 
2226   /* Main recursion
2227    */
2228   for (v = w1-1; v >= r; v--)
2229     {
2230       /* Get a deck and a shadow deck.
2231        */
2232       if (! deckpool_pop(dpool, &(a[v])))
2233 	a[v] = alloc_vji_deck(i0,i1,j1,j0);
2234       if (ret_shadow != NULL)
2235 	shadow[v] = alloc_vji_shadow_deck(i0,i1,j1,j0);
2236 				/* reassert our definition of a V problem */
2237       if (cm->sttype[v] == E_st || cm->sttype[v] == B_st || (cm->sttype[v] == S_st && v > r))
2238 	cm_Fail("you told me you wouldn't ever do that again.");
2239 
2240       if (cm->sttype[v] == D_st || cm->sttype[v] == S_st)
2241 	{
2242 	  for (jp = 0; jp <= j0-j1; jp++)
2243 	    for (ip = i1-i0; ip >= 0; ip--) {
2244 	      /*printf("D S jp : %d | ip : %d\n", jp, ip);*/
2245 	      y = cm->cfirst[v];
2246 	      a[v][jp][ip]      = a[y][jp][ip] + cm->tsc[v][0];
2247 	      /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2248 	      if (ret_shadow != NULL) shadow[v][jp][ip] = (char) 0;
2249 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
2250 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
2251 		  > a[v][jp][ip])) {
2252 		a[v][jp][ip]      = cm->endsc[v] +
2253 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
2254 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
2255 	      }
2256 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
2257 		if ((sc = a[y+yoffset][jp][ip] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
2258 		  {
2259 		    a[v][jp][ip] = sc;
2260 		    /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2261 		    if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
2262 		  }
2263 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
2264 	    }
2265 	} else if (cm->sttype[v] == MP_st) {
2266 	  for (ip = i1-i0; ip >= 0; ip--) a[v][0][ip] = IMPOSSIBLE; /* boundary condition */
2267 
2268 	  for (jp = 1; jp <= j0-j1; jp++) {
2269 	    j = jp+j1;
2270 	    a[v][jp][i1-i0] = IMPOSSIBLE; /* boundary condition */
2271 	    for (ip = i1-i0-1; ip >= 0; ip--) {
2272 	      /*printf("MP jp : %d | ip : %d\n", jp, ip);*/
2273 	      i = ip+i0;
2274 	      y = cm->cfirst[v];
2275 	      a[v][jp][ip] = a[y][jp-1][ip+1] + cm->tsc[v][0];
2276 	      /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2277 	      if (ret_shadow != NULL) shadow[v][jp][ip] = (char) 0;
2278 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
2279 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
2280 		  > a[v][jp][ip])) {
2281 		a[v][jp][ip]      = cm->endsc[v] +
2282 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
2283 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
2284 	      }
2285 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
2286 		if ((sc = a[y+yoffset][jp-1][ip+1] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
2287 		   {
2288 		     a[v][jp][ip] = sc;
2289 		     /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2290 		     if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
2291 		   }
2292 	      if (dsq[i] < cm->abc->K && dsq[j] < cm->abc->K)
2293 		a[v][jp][ip] += cm->esc[v][(int) (dsq[i]*cm->abc->K+dsq[j])];
2294 	      else
2295 		a[v][jp][ip] += DegeneratePairScore(cm->abc, cm->esc[v], dsq[i], dsq[j]);
2296 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
2297 	    }
2298 	  }
2299 	} else if (cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
2300 
2301 	  for (jp = 0; jp <= j0-j1; jp++) {
2302 	    a[v][jp][i1-i0] = IMPOSSIBLE; /* boundary condition */
2303 	    for (ip = i1-i0-1; ip >= 0; ip--) {
2304 	      /*printf("ML IL jp : %d | ip : %d\n", jp, ip);*/
2305 	      i = ip+i0;
2306 	      y = cm->cfirst[v];
2307 	      a[v][jp][ip] = a[y][jp][ip+1] + cm->tsc[v][0];
2308 	      if (ret_shadow != NULL) shadow[v][jp][ip] = 0;
2309 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
2310 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
2311 		  > a[v][jp][ip])) {
2312 		a[v][jp][ip]      = cm->endsc[v] +
2313 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
2314 		/*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2315 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
2316 	      }
2317 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
2318 		if ((sc = a[y+yoffset][jp][ip+1] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
2319 		  {
2320 		    a[v][jp][ip] = sc;
2321 		    /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2322 		    if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
2323 		  }
2324 
2325 	      if (dsq[i] < cm->abc->K)
2326 		a[v][jp][ip] += cm->esc[v][dsq[i]];
2327 	      else
2328 		a[v][jp][ip] += esl_abc_FAvgScore(cm->abc, dsq[i], cm->esc[v]);
2329 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
2330 	    }
2331 	  }
2332 	} else if (cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
2333 	  for (ip = i1-i0; ip >= 0; ip--) a[v][0][ip] = IMPOSSIBLE; /* boundary condition */
2334 
2335 	  for (jp = 1; jp <= j0-j1; jp++) {
2336 	    j = jp+j1;
2337 	    for (ip = i1-i0; ip >= 0; ip--) {
2338 	      /*printf("MR IR jp : %d | ip : %d\n", jp, ip);*/
2339 	      y = cm->cfirst[v];
2340 	      a[v][jp][ip]      = a[y][jp-1][ip] + cm->tsc[v][0];
2341 	      /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2342 	      if (ret_shadow != NULL) shadow[v][jp][ip] = 0;
2343 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
2344 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
2345 		  > a[v][jp][ip])) {
2346 		a[v][jp][ip] = cm->endsc[v] +
2347 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
2348 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
2349 	      }
2350 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
2351 		if ((sc = a[y+yoffset][jp-1][ip] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
2352 		  {
2353 		    a[v][jp][ip] = sc;
2354 		    /*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
2355 		    if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
2356 		  }
2357 
2358 	      if (dsq[j] < cm->abc->K)
2359 		a[v][jp][ip] += cm->esc[v][dsq[j]];
2360 	      else
2361 		a[v][jp][ip] += esl_abc_FAvgScore(cm->abc, dsq[j], cm->esc[v]);
2362 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
2363 	    }
2364 	  }
2365 	} /* finished calculating deck v */
2366 
2367       /* Check for local begin getting us to the root.
2368        */
2369       if (allow_begin && a[v][j0-j1][0] + cm->beginsc[v] > bsc)
2370 	{
2371 	  b   = v;
2372 	  bsc = a[v][j0-j1][0] + cm->beginsc[v];
2373 	}
2374 
2375       /* Check whether we need to store the local begin score
2376        * for a possible traceback.
2377        */
2378       if (allow_begin && v == 0 && bsc > a[0][j0-j1][0])
2379 	{
2380 	  a[0][j0-j1][0] = bsc;
2381 	  if (ret_shadow != NULL) shadow[v][j0-j1][0] = USED_LOCAL_BEGIN;
2382 	}
2383 
2384 
2385       /* Now, try to reuse memory under v.
2386        */
2387       if (! do_full) {
2388 	for (y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++)
2389 	  {
2390 	    touch[y]--;
2391 	    if (touch[y] == 0) {
2392 	      deckpool_push(dpool, a[y]);
2393 	      a[y] = NULL;
2394 	    }
2395 	  }
2396       }
2397     } /* end loop over v; we now have a complete matrix */
2398 
2399   /* Keep the score.
2400    */
2401   sc = a[r][j0-j1][0];
2402   if (ret_b != NULL)   *ret_b   = b;    /* b is -1 if allow_begin is FALSE. */
2403   if (ret_bsc != NULL) *ret_bsc = bsc;  /* bsc is IMPOSSIBLE if allow_begin is FALSE */
2404 
2405 
2406   /* If the caller doesn't want the score matrix back, blow
2407    * it away (saving decks in the pool). Else, pass it back.
2408    */
2409   if (ret_a == NULL) {
2410     for (v = r; v <= w2; v++)	/* note: go all the way to the bottom of the split set */
2411       if (a[v] != NULL) {
2412 	deckpool_push(dpool, a[v]);
2413 	a[v] = NULL;
2414       }
2415     free(a);
2416   } else *ret_a = a;
2417 
2418   /* If caller doesn't want the deck pool, blow it away.
2419    * Else, pass it back.
2420    */
2421   if (ret_dpool == NULL) {
2422     float **foo;
2423     while (deckpool_pop(dpool, &foo))
2424       free_vji_deck(foo, j1,j0);
2425     deckpool_free(dpool);
2426   } else *ret_dpool = dpool;
2427 
2428   free(touch);
2429   if (ret_shadow != NULL) *ret_shadow = shadow;
2430   return sc;
2431 
2432  ERROR:
2433   cm_Fail("Memory allocation error.\n");
2434   return 0.; /* never reached */
2435 }
2436 
2437 /* Function: voutside()
2438  * Date:     SRE, Sun Jun  3 15:44:41 2001 [St. Louis]
2439  *
2440  * Purpose:  Run the outside version of a CYK alignment algorithm for
2441  *           a V problem: an unbifurcated CM subgraph from r..z, aligned
2442  *           to a one-whole subsequence i0..i1//j1..j0, exclusive of
2443  *           z, i1, j1.
2444  *
2445  *           This is done in the vji coordinate system, where both
2446  *           our j and i coordinates are transformed. The Platonic
2447  *           ideal matrix runs [j1..j0][i0..i1]. The implemented
2448  *           matrix runs [0..j0-j1][0..i1-i0].
2449  *
2450  *           Much of the behavior in calling conventions, etc., is
2451  *           analogous to inside() and vinside(); see their prefaces
2452  *           for more info. Unlike the inside engines, we never
2453  *           need to calculate a shadow matrix - outside engines are
2454  *           only used for divide and conquer steps.
2455  *
2456  * Args:     cm        - the model    [0..M-1]
2457  *           dsq       - the sequence [1..L]
2458  *           L         - length of the dsq
2459  *           r         - first state of linear model segment (S; MP, ML, MR, or D)
2460  *           z         - last state of linear model segment (B; MP, ML, MR, or D)
2461  *           i0,i1     - subsequence before the hole  (1..L)
2462  *           j1,j0     - subsequence after the hole (1..L)
2463  *           useEL     - if TRUE, worry about local alignment.
2464  *           do_full   - if TRUE, we save all the decks in beta, instead of
2465  *                       working in our default memory-efficient mode where
2466  *                       we reuse decks and only the lowermost decks (inc. z) are valid
2467  *                       at the end.
2468  *           beta      - if non-NULL, this is an existing matrix, with NULL
2469  *                       decks for r..z, and we'll fill in those decks
2470  *                       appropriately instead of creating a new matrix
2471  *           ret_beta  - if non-NULL, return the matrix with one or more
2472  *                       decks available for examination (see "do_full")
2473  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
2474  *                       but usually containing one or more allocated vji decks sized
2475  *                       for this subsequence i0..i1//j1..j0.
2476  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
2477  *                       *only* be valid on exactly the same i0..i1//j1..j0 subseq,
2478  *                       because of the size of the subseq decks.
2479  */
2480 static void
voutside(CM_t * cm,ESL_DSQ * dsq,int L,int r,int z,int i0,int i1,int j1,int j0,int useEL,int do_full,float *** beta,float **** ret_beta,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool)2481 voutside(CM_t *cm, ESL_DSQ *dsq, int L,
2482 	 int r, int z, int i0, int i1, int j1, int j0, int useEL,
2483 	 int do_full, float ***beta, float ****ret_beta,
2484 	 struct deckpool_s *dpool, struct deckpool_s **ret_dpool)
2485 {
2486   int      status;
2487   int      v,y;			/* indices for states */
2488   int      i,j;			/* indices in sequence dimensions */
2489   int      ip, jp;		/* transformed sequence indices */
2490   float    sc;			/* a temporary variable holding a score */
2491   int     *touch;               /* keeps track of how many lower decks still need this deck */
2492   float    escore;		/* an emission score, tmp variable */
2493   int      voffset;		/* index of v in t_v(y) transition scores */
2494 
2495 
2496   /* Allocations and initializations
2497    */
2498   			/* if caller didn't give us a deck pool, make one */
2499   if (dpool == NULL) dpool = deckpool_create();
2500 
2501   /* If caller didn't give us a matrix, make one.
2502    * Remember to allow for deck M, the EL deck, for local alignments.
2503    */
2504   if (beta == NULL) {
2505     ESL_ALLOC(beta, sizeof(float **) * (cm->M+1));
2506     for (v = 0; v <= cm->M; v++) beta[v] = NULL;
2507   }
2508   /* Initialize the root deck. This probably isn't the most efficient way to do it.
2509    */
2510   if (! deckpool_pop(dpool, &(beta[r])))
2511     beta[r] = alloc_vji_deck(i0,i1,j1,j0);
2512   for (jp = 0; jp <= j0-j1; jp++) {
2513     for (ip = 0; ip <= i1-i0; ip++)
2514       beta[r][jp][ip] = IMPOSSIBLE;
2515   }
2516   beta[r][j0-j1][0] = 0;
2517 
2518   /* Initialize the EL deck, if we're in local mode w.r.t. ends.
2519    * Deal with the special initialization case of the root state r
2520    * immediately transitioning to EL, if we're supposed to use EL.
2521    */
2522   if (useEL && cm->flags & CMH_LOCAL_END) {
2523     if (! deckpool_pop(dpool, &(beta[cm->M])))
2524       beta[cm->M] = alloc_vji_deck(i0,i1,j1,j0);
2525     for (jp = 0; jp <= j0-j1; jp++) {
2526       for (ip = 0; ip <= i1-i0; ip++)
2527 	beta[cm->M][jp][ip] = IMPOSSIBLE;
2528     }
2529   }
2530   if (useEL && NOT_IMPOSSIBLE(cm->endsc[r])) {
2531     switch(cm->sttype[r]) {
2532     case MP_st:
2533       if (i0 == i1 || j1 == j0) break;
2534       if (dsq[i0] < cm->abc->K && dsq[j0] < cm->abc->K)
2535 	escore = cm->esc[r][(int) (dsq[i0]*cm->abc->K+dsq[j0])];
2536       else
2537 	escore = DegeneratePairScore(cm->abc, cm->esc[r], dsq[i0], dsq[j0]);
2538       beta[cm->M][j0-j1-1][1] = cm->endsc[r] +
2539 	(cm->el_selfsc * ((j0-1)-(i0+1)+1)) + escore;
2540       break;
2541     case ML_st:
2542     case IL_st:
2543       if (i0 == i1) break;
2544       if (dsq[i0] < cm->abc->K)
2545 	escore = cm->esc[r][(int) dsq[i0]];
2546       else
2547 	escore = esl_abc_FAvgScore(cm->abc, dsq[i0], cm->esc[r]);
2548       beta[cm->M][j0-j1][1] = cm->endsc[r] +
2549 	(cm->el_selfsc * ((j0)-(i0+1)+1)) + escore;
2550       break;
2551     case MR_st:
2552     case IR_st:
2553       if (j0==j1) break;
2554       if (dsq[j0] < cm->abc->K)
2555 	escore = cm->esc[r][(int) dsq[j0]];
2556       else
2557 	escore = esl_abc_FAvgScore(cm->abc, dsq[j0], cm->esc[r]);
2558       beta[cm->M][j0-j1-1][0] = cm->endsc[r] +
2559 	(cm->el_selfsc * ((j0-1)-(i0)+1)) + escore;
2560       break;
2561     case S_st:
2562     case D_st:
2563       beta[cm->M][j0-j1][0] = cm->endsc[r] +
2564 	(cm->el_selfsc * ((j0)-(i0)+1));
2565       break;
2566     default:  cm_Fail("bogus parent state %d\n", cm->sttype[r]);
2567     }
2568   }
2569 
2570   /* Initialize the "touch" array, used for figuring out
2571    * when a deck is no longer touched, so it can be free'd.
2572    */
2573   ESL_ALLOC(touch, sizeof(int) * cm->M);
2574   for (v = 0;   v < r;     v++) touch[v] = 0;
2575   for (v = z+1; v < cm->M; v++) touch[v] = 0;
2576   for (v = r;   v <= z;    v++) {
2577     if (cm->sttype[v] == B_st) touch[v] = 2; /* well, we never use this, but be complete */
2578     else                       touch[v] = cm->cnum[v];
2579   }
2580 
2581 
2582   /* Main loop down through the decks
2583    */
2584   for (v = r+1; v <= z; v++)
2585     {
2586       /* First we need to fetch a deck of memory to fill in;
2587        * we try to reuse a deck but if one's not available we allocate
2588        * a fresh one.
2589        */
2590       if (! deckpool_pop(dpool, &(beta[v])))
2591 	beta[v] = alloc_vji_deck(i0,i1,j1,j0);
2592 
2593       /* Init the whole deck to IMPOSSIBLE.
2594        */
2595       for (jp = j0-j1; jp >= 0; jp--)
2596 	for (ip = 0; ip <= i1-i0; ip++)
2597 	  beta[v][jp][ip] = IMPOSSIBLE;
2598 
2599       /* If we can get into deck v by a local begin transition, do an init
2600        * with that.
2601        */
2602       if (r == 0 && i0 == 1 && j0 == L && (cm->flags & CMH_LOCAL_BEGIN))
2603 	{
2604 	  if (cm->beginsc[v] > beta[v][j0-j1][0])
2605 	    beta[v][j0-j1][0] = cm->beginsc[v];
2606 	}
2607 
2608       /* main recursion:
2609        */
2610       for (jp = j0-j1; jp >= 0; jp--) {
2611 	j = jp+j1;
2612 	for (ip = 0; ip <= i1-i0; ip++)
2613 	  {
2614 	    i = ip+i0;
2615 
2616 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
2617 	      if (y < r) continue; /* deal with split sets */
2618 	      voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
2619 
2620 	      switch(cm->sttype[y]) {
2621 	      case MP_st:
2622 		if (j == j0 || i == i0) continue; /* boundary condition */
2623 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
2624 		  escore = cm->esc[y][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
2625 		else
2626 		  escore = DegeneratePairScore(cm->abc, cm->esc[y], dsq[i-1], dsq[j+1]);
2627 
2628 		if ((sc = beta[y][jp+1][ip-1]+cm->tsc[y][voffset]+escore) > beta[v][jp][ip])
2629 		  beta[v][jp][ip] = sc;
2630 		break;
2631 
2632 	      case ML_st:
2633 	      case IL_st:
2634 		if (i == i0) continue;	/* boundary condition */
2635 
2636 		if (dsq[i-1] < cm->abc->K)
2637 		  escore = cm->esc[y][(int) dsq[i-1]];
2638 		else
2639 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[y]);
2640 
2641 		if ((sc = beta[y][jp][ip-1]+cm->tsc[y][voffset]+escore) > beta[v][jp][ip])
2642 		  beta[v][jp][ip] = sc;
2643 		break;
2644 
2645 	      case MR_st:
2646 	      case IR_st:
2647 		if (j == j0) continue;
2648 
2649 		if (dsq[j+1] < cm->abc->K)
2650 		  escore = cm->esc[y][(int) dsq[j+1]];
2651 		else
2652 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[y]);
2653 
2654 		if ((sc = beta[y][jp+1][ip]+cm->tsc[y][voffset]+escore) > beta[v][jp][ip])
2655 		  beta[v][jp][ip] = sc;
2656 		break;
2657 
2658 	      case S_st:
2659 	      case E_st:
2660 	      case D_st:
2661 		if ((sc = beta[y][jp][ip] + cm->tsc[y][voffset]) > beta[v][jp][ip])
2662 		  beta[v][jp][ip] = sc;
2663 		break;
2664 
2665 	      default: cm_Fail("bogus parent state %d\n", cm->sttype[y]);
2666 	      }/* end switch over states*/
2667 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
2668 	    if (beta[v][jp][ip] < IMPOSSIBLE) beta[v][jp][ip] = IMPOSSIBLE;
2669 
2670 	  } /* ends loop over ip. We know all beta[v][jp][ip] in this row jp */
2671 
2672       }/* end loop over jp. We know the beta's for the whole deck.*/
2673 
2674       /* Deal with local alignment
2675        * transitions v->EL, if we're doing local alignment and there's a
2676        * possible transition.
2677        */
2678       if (useEL && NOT_IMPOSSIBLE(cm->endsc[v])) {
2679 	for (jp = j0-j1; jp >= 0; jp--) {
2680 	  j = jp+j1;
2681 	  for (ip = 0; ip <= i1-i0; ip++)
2682 	    {
2683 	      i = ip+i0;
2684 	      switch (cm->sttype[v]) {
2685 	      case MP_st:
2686 		if (j == j0 || i == i0) continue; /* boundary condition */
2687 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
2688 		  escore = cm->esc[v][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
2689 		else
2690 		  escore = DegeneratePairScore(cm->abc, cm->esc[v], dsq[i-1], dsq[j+1]);
2691 		if ((sc = beta[v][jp+1][ip-1] + cm->endsc[v] +
2692 		     (cm->el_selfsc * (j-i+1)) + escore) > beta[cm->M][jp][ip])
2693 		  beta[cm->M][jp][ip] = sc;
2694 		break;
2695 	      case ML_st:
2696 	      case IL_st:
2697 		if (i == i0) continue;
2698 		if (dsq[i-1] < cm->abc->K)
2699 		  escore = cm->esc[v][(int) dsq[i-1]];
2700 		else
2701 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[v]);
2702 		if ((sc = beta[v][jp][ip-1] + cm->endsc[v] +
2703 		     (cm->el_selfsc * (j-i+1)) + escore) > beta[cm->M][jp][ip])
2704 		  beta[cm->M][jp][ip] = sc;
2705 		break;
2706 	      case MR_st:
2707 	      case IR_st:
2708 		if (j == j0) continue;
2709 		if (dsq[j+1] < cm->abc->K)
2710 		  escore = cm->esc[v][(int) dsq[j+1]];
2711 		else
2712 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[v]);
2713 		if ((sc = beta[v][jp+1][ip] + cm->endsc[v] +
2714 		     (cm->el_selfsc * (j-i+1)) + escore) > beta[cm->M][jp][ip])
2715 		  beta[cm->M][jp][ip] = sc;
2716 		break;
2717 	      case S_st:
2718 	      case D_st:
2719 	      case E_st:
2720 		if ((sc = beta[v][jp][ip] + cm->endsc[v] +
2721 		     (cm->el_selfsc * (j-i+1))) > beta[cm->M][jp][ip])
2722 		    beta[cm->M][jp][ip] = sc;
2723 		break;
2724 	      default:  cm_Fail("bogus parent state %d\n", cm->sttype[y]);
2725 	      } /* end switch over parent v state type */
2726 	    } /* end loop over ip */
2727 	} /* end loop over jp */
2728       }
2729 
2730       /* Finished deck v.
2731        * now look at its parents; if we're reusing memory (! do_full)
2732        * push the parents that we don't need any more into the pool.
2733        */
2734       if (! do_full) {
2735 	for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
2736 	  touch[y]--;
2737 	  if (touch[y] == 0) {
2738 	    deckpool_push(dpool, beta[y]);
2739 	    beta[y] = NULL;
2740 	  }
2741 	}
2742       }
2743 
2744     } /* end loop over decks v. */
2745 
2746 #if 0
2747   /* superfluous code, I think...*/
2748   /* Deal with the last step needed for local alignment
2749    * w.r.t. ends: left-emitting, zero-scoring EL->EL transitions.
2750    */
2751   if (useEL && cm->flags & CMH_LOCAL_END) {
2752     for (jp = j0-j1; jp >= 0; jp--)
2753       for (ip = 1; ip <= i1-i0; ip++) /* careful w/ boundary here */
2754 	if ((sc = beta[cm->M][jp][ip-1]) > beta[cm->M][jp][ip])
2755 	  beta[cm->M][jp][ip] = sc;
2756   }
2757 #endif
2758 
2759   /* If the caller doesn't want the matrix, free it.
2760    * (though it would be *stupid* for the caller not to want the
2761    * matrix in the current implementation!)
2762    */
2763   if (ret_beta == NULL) {
2764     for (v = r; v <= z; v++)
2765       if (beta[v] != NULL) { deckpool_push(dpool, beta[v]); beta[v] = NULL; }
2766     if (cm->flags & CMH_LOCAL_END) {
2767       deckpool_push(dpool, beta[cm->M]);
2768       beta[cm->M] = NULL;
2769     }
2770     free(beta);
2771   } else *ret_beta = beta;
2772 
2773   /* If the caller doesn't want the deck pool, free it.
2774    * Else, pass it back to him.
2775    */
2776   if (ret_dpool == NULL) {
2777     float **a;
2778     while (deckpool_pop(dpool, &a))
2779       free_vji_deck(a,j1,j0);
2780     deckpool_free(dpool);
2781   } else *ret_dpool = dpool;
2782 
2783   free(touch);
2784   return;
2785 
2786  ERROR:
2787   cm_Fail("Memory allocation error.\n");
2788 }
2789 
2790 /*****************************************************************
2791  * The traceback routines
2792  *   insideT  - run inside(), append trace in postorder traversal
2793  *   vinsideT - run vinside(), append trace in postorder traversal
2794  *****************************************************************/
2795 
2796 /* Function: insideT()
2797  * Date:     SRE, Fri Aug 11 12:08:18 2000 [Pittsburgh]
2798  *
2799  * Purpose:  Call inside, get vjd shadow matrix;
2800  *           then trace back. Append the trace to a given
2801  *           traceback, which already has state r at tr->n-1.
2802  *
2803  *           If we're not in banded mode, dmin and dmax should
2804  *           be passed in as NULL.
2805  */
2806 static float
insideT(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int j0,int allow_begin,int * dmin,int * dmax)2807 insideT(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
2808 	int r, int z, int i0, int j0,
2809 	int allow_begin, int *dmin, int *dmax)
2810 {
2811 
2812   int       status;
2813   void   ***shadow;             /* the traceback shadow matrix */
2814   float     sc;			/* the score of the CYK alignment */
2815   ESL_STACK *pda;                /* stack that tracks bifurc parent of a right start */
2816   int       v,j,d,i;		/* indices for state, j, subseq len */
2817   int       k;
2818   int       y, yoffset;
2819   int       bifparent;
2820   int       b;
2821   float     bsc;
2822 
2823   if(dmin == NULL && dmax == NULL)
2824     {
2825       sc = inside(cm, dsq, L, r, z, i0, j0,
2826 		  BE_EFFICIENT,	/* memory-saving mode */
2827 		  NULL, NULL,	/* manage your own matrix, I don't want it */
2828 		  NULL, NULL,	/* manage your own deckpool, I don't want it */
2829 		  &shadow,	/* return a shadow matrix to me. */
2830 		  allow_begin,  /* TRUE to allow local begins */
2831 		  &b, &bsc);	/* if allow_begin is TRUE, gives info on optimal b */
2832     }
2833   else
2834     {
2835       sc = inside_b(cm, dsq, L, r, z, i0, j0,
2836 		    BE_EFFICIENT,/* memory-saving mode */
2837 		    NULL, NULL,	 /* manage your own matrix, I don't want it */
2838 		    NULL, NULL,	 /* manage your own deckpool, I don't want it */
2839 		    &shadow,	 /* return a shadow matrix to me. */
2840 		    allow_begin, /* TRUE to allow local begins */
2841 		    &b, &bsc,	 /* if allow_begin is TRUE, gives info on optimal b */
2842 		    dmin, dmax); /* the bands */
2843     }
2844 
2845   pda = esl_stack_ICreate();
2846   if(pda == NULL) goto ERROR;
2847   v = r;
2848   j = j0;
2849   i = i0;
2850   d = j0-i0+1;
2851 
2852   /*printf("Starting traceback in insideT()\n");*/
2853   while (1) {
2854     if (cm->sttype[v] == B_st) {
2855       k = ((int **) shadow[v])[j][d];   /* k = len of right fragment */
2856 
2857       /* Store info about the right fragment that we'll retrieve later:
2858        */
2859       if((status = esl_stack_IPush(pda, j)) != eslOK) goto ERROR;	/* remember the end j    */
2860       if((status = esl_stack_IPush(pda, k)) != eslOK) goto ERROR;	/* remember the subseq length k */
2861       if((status = esl_stack_IPush(pda, tr->n-1)) != eslOK) goto ERROR;	/* remember the trace index of the parent B state */
2862 
2863       /* Deal with attaching left start state.
2864        */
2865       j = j-k;
2866       d = d-k;
2867       i = j-d+1;
2868       y = cm->cfirst[v];
2869       InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
2870       v = y;
2871     } else if (cm->sttype[v] == E_st || cm->sttype[v] == EL_st) {
2872       /* We don't trace back from an E or EL. Instead, we're done with the
2873        * left branch of the tree, and we try to swing over to the right
2874        * branch by popping a right start off the stack and attaching
2875        * it. If the stack is empty, then we're done with the
2876        * traceback altogether. This is the only way to break the
2877        * while (1) loop.
2878        */
2879       if (esl_stack_IPop(pda, &bifparent) == eslEOD) break;
2880       esl_stack_IPop(pda, &d);
2881       esl_stack_IPop(pda, &j);
2882       v = tr->state[bifparent];	/* recover state index of B */
2883       y = cm->cnum[v];		/* find state index of right S */
2884       i = j-d+1;
2885 				/* attach the S to the right */
2886       InsertTraceNode(tr, bifparent, TRACE_RIGHT_CHILD, i, j, y);
2887       v = y;
2888     } else {
2889       yoffset = ((char **) shadow[v])[j][d];
2890 
2891       /*printf("v : %d | r : %d | z : %d | i0 : %d | \n", v, r, z, i0);*/
2892       /*printf("\tyoffset : %d\n", yoffset);*/
2893       switch (cm->sttype[v]) {
2894       case D_st:            break;
2895       case MP_st: i++; j--; break;
2896       case ML_st: i++;      break;
2897       case MR_st:      j--; break;
2898       case IL_st: i++;      break;
2899       case IR_st:      j--; break;
2900       case S_st:            break;
2901       default:    cm_Fail("'Inconceivable!'\n'You keep using that word...'");
2902       }
2903       d = j-i+1;
2904 
2905       if (yoffset == USED_EL)
2906 	{	/* a local alignment end */
2907 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M);
2908 	  v = cm->M;		/* now we're in EL. */
2909 	}
2910       else if (yoffset == USED_LOCAL_BEGIN)
2911 	{ /* local begin; can only happen once, from root */
2912 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b);
2913 	  v = b;
2914 	}
2915       else
2916 	{
2917 	  y = cm->cfirst[v] + yoffset;
2918 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
2919 	  v = y;
2920 	}
2921     }
2922   }
2923   esl_stack_Destroy(pda);  /* it should be empty; we could check; naaah. */
2924   free_vjd_shadow_matrix(shadow, cm, i0, j0);
2925   return sc;
2926 
2927  ERROR:
2928   cm_Fail("Memory allocation error.");
2929   return 0.; /* NEVERREACHED */
2930 }
2931 
2932 /* Function: vinsideT()
2933  * Date:     SRE, Sat Jun  2 14:40:13 2001 [St. Louis]
2934  *
2935  * Purpose:  Call vinside(), get vji shadow matrix for a V problem;
2936  *           then trace back. Append the trace to a
2937  *           given traceback, which has state r already at
2938  *           t->n-1.
2939  *
2940  *           If we're not in banded mode, dmin and dmax should
2941  *           be passed in as NULL.
2942  */
2943 static float
vinsideT(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int i1,int j1,int j0,int useEL,int allow_begin,int * dmin,int * dmax)2944 vinsideT(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
2945 	 int r, int z, int i0, int i1, int j1, int j0, int useEL,
2946 	 int allow_begin, int *dmin, int *dmax)
2947 {
2948   char ***shadow;
2949   float   sc;
2950   int     v,y;
2951   int     j,i;
2952   int     jp,ip;
2953   int     yoffset;
2954   int     b;
2955   float   bsc;
2956 
2957   /* If we can deduce the traceback unambiguously without
2958    * doing any DP... do it.
2959    */
2960   if (r == z) {
2961     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, r);
2962     return 0.;
2963   }
2964 
2965   if(dmin == NULL && dmax == NULL)
2966     {
2967       sc = vinside(cm, dsq, L, r, z, i0, i1, j1, j0, useEL,
2968 		   BE_EFFICIENT,	/* memory-saving mode */
2969 		   NULL, NULL,	/* manage your own matrix, I don't want it */
2970 		   NULL, NULL,	/* manage your own deckpool, I don't want it */
2971 		   &shadow,      	/* return a shadow matrix to me. */
2972 		   allow_begin,     /* TRUE to allow local begin transitions */
2973 		   &b, &bsc);       /* info on optimal local begin */
2974     }
2975   else
2976     {
2977       sc = vinside_b(cm, dsq, L, r, z, i0, i1, j1, j0, useEL,
2978 		     BE_EFFICIENT,	/* memory-saving mode */
2979 		     NULL, NULL,	/* manage your own matrix, I don't want it */
2980 		     NULL, NULL,	/* manage your own deckpool, I don't want it */
2981 		     &shadow,      	/* return a shadow matrix to me. */
2982 		     allow_begin,       /* TRUE to allow local begin transitions */
2983 		     &b, &bsc,          /* info on optimal local begin */
2984 		     dmin, dmax);
2985     }
2986   /* We've got a complete shadow matrix. Trace it back. We know
2987    * that the trace will begin with the start state r, at i0,j0
2988    * (e.g. jp=j0-j1, ip=0)
2989    */
2990   v = r;
2991   j = j0;
2992   i = i0;
2993 
2994   /*printf("Starting traceback in vinsideT()\n");*/
2995   while (1) {
2996     jp = j-j1;
2997     ip = i-i0;
2998 
2999     /* 1. figure out the next state (deck) in the shadow matrix.
3000      */
3001     /*printf("v : %d | jp : %d | ip : %d | i0 : %d | \n", v, jp, ip, i0);*/
3002     yoffset = shadow[v][jp][ip];
3003     /*printf("\tyoffset : %d\n", yoffset);*/
3004 
3005     /* 2. figure out the i,j for state y, which is dependent
3006      *    on what v emits (if anything)
3007      */
3008     switch (cm->sttype[v]) {
3009     case D_st:            break;
3010     case MP_st: i++; j--; break;
3011     case ML_st: i++;      break;
3012     case MR_st:      j--; break;
3013     case IL_st: i++;      break;
3014     case IR_st:      j--; break;
3015     case S_st:            break;
3016     default:    cm_Fail("'Inconceivable!'\n'You keep using that word...'");
3017     }
3018 
3019     /* If the traceback pointer (yoffset) is -1, that's a special
3020      * flag for a local alignment end, e.g. transition to EL (state "M").
3021      */
3022     if (yoffset == USED_EL)
3023       {
3024 	InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M);
3025 	break;			/* one way out of the while loop */
3026       }
3027     else if (yoffset == USED_LOCAL_BEGIN)
3028       {
3029 	InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b);
3030 	v = b;
3031 	if (! useEL && v == z) break; /* the other way out of the while loop */
3032       }
3033     else
3034       {
3035 	/*    Attach y,i,j to the trace. This new node always attaches
3036 	 *    to the end of the growing trace -- e.g. trace node
3037 	 *    tr->n-1.
3038 	 */
3039 	y = cm->cfirst[v] + yoffset;
3040 	InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
3041 	v = y;
3042 	if (! useEL && v == z) break; /* the other way out of the while loop */
3043       }
3044   }
3045 
3046   /* We're done. Our traceback has just ended. We have just attached
3047    * state z for i1,j1; it is in the traceback at node tr->n-1.
3048    */
3049   free_vji_shadow_matrix(shadow, cm->M, j1, j0);
3050   return sc;
3051 }
3052 
3053 
3054 /*****************************************************************
3055  * The size calculators:
3056  *    insideT_size()   - Mb required by insideT
3057  *    vinsideT_size()  - Mb required by vinsideT
3058  *****************************************************************/
3059 
3060 /* Function: insideT_size()
3061  * Date:     SRE, Sun Jun  3 17:56:08 2001 [St. Louis]
3062  *
3063  * Purpose:  Calculate the # of Mb required to run insideT()
3064  *           and solve a generic or wedge problem without any
3065  *           more divide/conquer.
3066  */
3067 float
insideT_size(CM_t * cm,int L,int r,int z,int i0,int j0)3068 insideT_size(CM_t *cm, int L, int r, int z, int i0, int j0)
3069 {
3070   float Mb;
3071   int   maxdecks;
3072   int   nends;
3073   int   nbif;
3074 
3075   nends = CMSegmentCountStatetype(cm, r, z, E_st);
3076   nbif  = CMSegmentCountStatetype(cm, r, z, B_st);
3077   maxdecks = cyk_deck_count(cm, r, z);
3078 
3079   Mb = (float) (sizeof(float **) * cm->M) / 1000000.;  /* the score matrix */
3080   Mb += (float) maxdecks * size_vjd_deck(L, i0, j0);
3081   Mb += (float) (sizeof(int) * cm->M) / 1000000.;      /* the touch array */
3082 
3083   Mb += (float) (sizeof(void **) * cm->M) / 1000000.;
3084   Mb += (float) (z-r+1-nends-nbif) * size_vjd_yshadow_deck(L, i0, j0);
3085   Mb += (float) nbif * size_vjd_kshadow_deck(L, i0, j0);
3086 
3087   return Mb;
3088 }
3089 
3090 float
vinsideT_size(CM_t * cm,int r,int z,int i0,int i1,int j1,int j0)3091 vinsideT_size(CM_t *cm, int r, int z, int i0, int i1, int j1, int j0)
3092 {
3093   float Mb;
3094   int   maxdecks;
3095 
3096   Mb = (float) (sizeof(float **) * cm->M) / 1000000.;
3097   maxdecks = cyk_deck_count(cm, r, z);
3098   Mb += maxdecks * size_vji_deck(i0,i1,j1,j0);
3099   Mb += (float)(z-r) * size_vji_shadow_deck(i0,i1,j1,j0);
3100   return Mb;
3101 }
3102 
3103 /* Function: cyk_deck_count()
3104  * Date:     SRE, Sun Jun  3 20:05:18 2001 [St. Louis]
3105  *
3106  * Purpose:  calculate and return the maximum number of
3107  *           decks that would be required in memory to
3108  *           solve an alignment problem involving a CM
3109  *           subgraph from r..z.
3110  *
3111  *           For a whole model, except for trivially small models with no
3112  *           stacked base pairs, this is almost invariably
3113  *           10+1+cyk_extra_decks(): MATP-MATP connections require
3114  *           10 decks (6 states in current node, 4 states in connected
3115  *           split set of next node). We share 1 end state deck. All
3116  *           other decks are retained S decks, needed for bifurcation
3117  *           calculations.
3118  */
3119 static int
cyk_deck_count(CM_t * cm,int r,int z)3120 cyk_deck_count(CM_t *cm, int r, int z)
3121 {
3122   int       status;
3123   ESL_STACK *pda;	/* pushdown stack simulating the deck pool */
3124   int       v,w,y;	/* state indices */
3125   int       nends;
3126   int       ndecks;
3127   int      *touch;	/* keeps track of how many higher decks still need this deck */
3128 
3129   /* Initializations, mirroring key parts of CYKInside()
3130    */
3131   ndecks = 1;			/* deck z, which we always need to start with. */
3132   nends  = CMSegmentCountStatetype(cm, r, z, E_st);
3133   pda    = esl_stack_ICreate();
3134   if(pda == NULL) goto ERROR;
3135 
3136   ESL_ALLOC(touch, sizeof(int) * cm->M);
3137   for (v = 0; v < r;     v++) touch[v] = 0;
3138   for (v = r; v < z;     v++) touch[v] = cm->pnum[v];
3139   for (v = z; v < cm->M; v++) touch[v] = 0;
3140 
3141   for (v = z; v >= r; v--)
3142     {
3143       if (cm->sttype[v] != E_st) {
3144 	if (esl_stack_IPop(pda, &y) == eslEOD) ndecks++; /* simulated allocation of a new deck */
3145       }
3146 
3147       if (cm->sttype[v] == B_st) { /* release both S children of a bifurc */
3148 	w = cm->cfirst[v];
3149 	y = cm->cnum[v];
3150 	if((status =esl_stack_IPush(pda, w)) != eslOK) goto ERROR;
3151 	if((status = esl_stack_IPush(pda, y)) != eslOK) goto ERROR;
3152       } else {
3153 	for (w = cm->cfirst[v]; w < cm->cfirst[v]+cm->cnum[v]; w++)
3154 	  {
3155 	    touch[w]--;
3156 	    if (touch[w] == 0)
3157 	      {
3158 		if (cm->sttype[w] == E_st) {
3159 		  nends--;
3160 		  if (nends == 0) { if((status = esl_stack_IPush(pda, cm->M-1)) != eslOK) goto ERROR; }
3161 		} else
3162 		  if((status = esl_stack_IPush(pda, w)) != eslOK) goto ERROR;
3163 	      }
3164 	  }
3165       }
3166     }
3167   free(touch);
3168   esl_stack_Destroy(pda);
3169   return ndecks;
3170 
3171  ERROR:
3172   cm_Fail("Memory allocation error.\n");
3173   return 0; /* never reached */
3174 }
3175 
3176 /* Function: cyk_extra_decks()
3177  * Date:     SRE, Sun Apr  7 14:42:48 2002 [St. Louis]
3178  *
3179  * Purpose:  Calculate the number of extra
3180  *           decks that will be needed to accommodate bifurc
3181  *           calculations.
3182  *
3183  * Args:     cm - the model.
3184  *
3185  * Returns:  # of extra decks.
3186  */
3187 static int
cyk_extra_decks(CM_t * cm)3188 cyk_extra_decks(CM_t *cm)
3189 {
3190   int  max;
3191   int  x;
3192   int  v;
3193 
3194   max = x = 0;
3195   for (v = cm->M-1; v >= 0; v--)
3196     {
3197       if      (cm->sttype[v] == S_st) x++;
3198       else if (cm->sttype[v] == B_st) x-=2;
3199       if (x > max) max = x;
3200     }
3201   return max-1;			/* discount ROOT S */
3202 }
3203 
3204 /*################################################################
3205  * The memory management routines.
3206  ################################################################*/
3207 
3208 /*################################################################*/
3209 /* Functions: deckpool_*()
3210  * Date:      SRE, Wed Aug  2 10:43:17 2000 [St. Louis]
3211  *
3212  * Purpose:   Implementation of a pushdown stack for storing decks
3213  *            of the inside or outside dynamic programming matrices, with the
3214  *            usual _create, _push, _pop, and _free API.
3215  *
3216  *            The deck pool allows us to efficiently reuse memory,
3217  *            so long as our DP algorithms step through the decks
3218  *            as their outermost loop.
3219  *
3220  *            Works for either coordinate system (vjd or vji)
3221  *            and subseq variants, because it's simply managing
3222  *            a deck as a float **.
3223  */
3224 struct deckpool_s *
deckpool_create(void)3225 deckpool_create(void)
3226 {
3227   int status;
3228   struct deckpool_s *dpool;
3229 
3230   ESL_ALLOC(dpool, sizeof(struct deckpool_s));
3231   dpool->block  = 10;		/* configurable if you want */
3232   ESL_ALLOC(dpool->pool, sizeof(float **) * dpool->block);
3233   dpool->nalloc = dpool->block;;
3234   dpool->n      = 0;
3235   return dpool;
3236  ERROR:
3237   cm_Fail("Memory allocation error.\n");
3238   return NULL; /* never reached */
3239 }
3240 void
deckpool_push(struct deckpool_s * dpool,float ** deck)3241 deckpool_push(struct deckpool_s *dpool, float **deck)
3242 {
3243   int   status;
3244   void *tmp;
3245   if (dpool->n == dpool->nalloc) {
3246     dpool->nalloc += dpool->block;
3247     ESL_RALLOC(dpool->pool, tmp, sizeof(float **) * dpool->nalloc);
3248   }
3249   dpool->pool[dpool->n] = deck;
3250   dpool->n++;
3251   ESL_DPRINTF3(("#DEBUG: deckpool_push\n"));
3252   return;
3253  ERROR:
3254   cm_Fail("Memory reallocation error.\n");
3255 }
3256 int
deckpool_pop(struct deckpool_s * d,float *** ret_deck)3257 deckpool_pop(struct deckpool_s *d, float ***ret_deck)
3258 {
3259   if (d->n == 0) { *ret_deck = NULL; return 0;}
3260   d->n--;
3261   *ret_deck = d->pool[d->n];
3262   ESL_DPRINTF3(("#DEBUG: deckpool_pop\n"));
3263   return 1;
3264 }
3265 void
deckpool_free(struct deckpool_s * d)3266 deckpool_free(struct deckpool_s *d)
3267 {
3268   free(d->pool);
3269   free(d);
3270 }
3271 /*================================================================*/
3272 
3273 
3274 /*################################################################*/
3275 /* Functions: *_vjd_*
3276  * Date:     SRE, Sat Aug 12 16:27:37 2000 [Titusville]
3277  *
3278  * Purpose:  Allocation and freeing of 3D matrices and 2D decks
3279  *           in the vjd coord system. These can be called on
3280  *           subsequences i..j, not just the full sequence 1..L,
3281  *           so they need i,j... if you're doing the full sequence
3282  *           just pass 1,L.
3283  *
3284  *           Also deal with shadow matrices and shadow decks in the
3285  *           vjd coordinate system. Note that bifurcation shadow decks
3286  *           need more dynamic range than other shadow decks, hence
3287  *           a separation into "kshadow" (BIFURC) and "yshadow" (other
3288  *           states) decks, and some casting shenanigans in
3289  *           a full ***shadow matrix.
3290  *
3291  *           Values in yshad are offsets to the next connected state,
3292  *           or a flag for local alignment. Possible offsets range from
3293  *           0..5 (maximum of 6 connected states). The flags are
3294  *           USED_LOCAL_BEGIN (101) and USED_EL (102), defined at
3295  *           the top of this file. Only yshad[0][L][L] (e.g. root state 0,
3296  *           aligned to the whole sequence) may be set to USED_LOCAL_BEGIN.
3297  *           (Remember that the dynamic range of yshad, as a char, is
3298  *           0..127, in ANSI C; we don't know if a machine will make it
3299  *           signed or unsigned.)
3300  */
3301 float **
alloc_vjd_deck(int L,int i,int j)3302 alloc_vjd_deck(int L, int i, int j)
3303 {
3304   int status;
3305   float **a;
3306   int     jp;
3307   ESL_DPRINTF3(("#DEBUG: alloc_vjd_deck : %.4f\n", size_vjd_deck(L,i,j)));
3308   ESL_ALLOC(a, sizeof(float *) * (L+1)); /* always alloc 0..L rows, some of which are NULL */
3309   for (jp = 0;   jp < i-1;    jp++) a[jp]     = NULL;
3310   for (jp = j+1; jp <= L;     jp++) a[jp]     = NULL;
3311   for (jp = 0;   jp <= j-i+1; jp++) ESL_ALLOC(a[jp+i-1], sizeof(float) * (jp+1));
3312   return a;
3313  ERROR:
3314   cm_Fail("Memory allocation error.");
3315   return NULL; /* never reached */
3316 }
3317 float
size_vjd_deck(int L,int i,int j)3318 size_vjd_deck(int L, int i, int j)
3319 {
3320   float Mb;
3321   int   jp;
3322   Mb = (float) (sizeof(float *) * (L+1));
3323   for (jp = 0; jp <= j-i+1; jp++)
3324     Mb += (float) (sizeof(float) * (jp+1));
3325   return (Mb / 1000000.);
3326 }
3327 void
free_vjd_deck(float ** a,int i,int j)3328 free_vjd_deck(float **a, int i, int j)
3329 {
3330   int jp;
3331   for (jp = 0; jp <= j-i+1; jp++) if (a[jp+i-1] != NULL) free(a[jp+i-1]);
3332   free(a);
3333 }
3334 void
free_vjd_matrix(float *** a,int M,int i,int j)3335 free_vjd_matrix(float ***a, int M, int i, int j)
3336 {
3337   int v;
3338   for (v = 0; v <= M; v++)
3339     if (a[v] != NULL)		/* protect against double free's of reused decks (ends) */
3340       { free_vjd_deck(a[v], i, j); a[v] = NULL; }
3341   free(a);
3342 }
3343 char **
alloc_vjd_yshadow_deck(int L,int i,int j)3344 alloc_vjd_yshadow_deck(int L, int i, int j)
3345 {
3346   int status;
3347   char **a;
3348   int    jp;
3349   ESL_ALLOC(a, sizeof(char *) * (L+1)); /* always alloc 0..L rows, same as alloc_deck */
3350   for (jp = 0;   jp < i-1;    jp++) a[jp] = NULL;
3351   for (jp = j+1; jp <= L;     jp++) a[jp] = NULL;
3352   for (jp = 0;   jp <= j-i+1; jp++) ESL_ALLOC(a[jp+i-1], sizeof(char) * (jp+1));
3353   return a;
3354  ERROR:
3355   cm_Fail("Memory allocation error.");
3356   return NULL; /* never reached */
3357 }
3358 float
size_vjd_yshadow_deck(int L,int i,int j)3359 size_vjd_yshadow_deck(int L, int i, int j)
3360 {
3361   float  Mb;
3362   int    jp;
3363   Mb = (float) (sizeof(char *) * (L+1));
3364   for (jp = 0; jp <= j-i+1; jp++)
3365     Mb += (float) (sizeof(char) * (jp+1));
3366   return Mb / 1000000.;
3367 }
3368 void
free_vjd_yshadow_deck(char ** a,int i,int j)3369 free_vjd_yshadow_deck(char **a, int i, int j)
3370 {
3371   int jp;
3372   for (jp = 0; jp <= j-i+1; jp++) if (a[jp+i-1] != NULL) free(a[jp+i-1]);
3373   free(a);
3374 }
3375 int **
alloc_vjd_kshadow_deck(int L,int i,int j)3376 alloc_vjd_kshadow_deck(int L, int i, int j)
3377 {
3378   int status;
3379   int **a;
3380   int   jp;
3381   ESL_ALLOC(a, sizeof(int *) * (L+1)); /* always alloc 0..L rows, same as alloc_deck */
3382   for (jp = 0;   jp <  i-1;   jp++) a[jp] = NULL;
3383   for (jp = 0;   jp <= j-i+1; jp++) ESL_ALLOC(a[jp+i-1], sizeof(int) * (jp+1));
3384   for (jp = j+1; jp <= L;     jp++) a[jp] = NULL;
3385   return a;
3386  ERROR:
3387   cm_Fail("Memory allocation error.");
3388   return NULL; /* never reached */
3389 }
3390 float
size_vjd_kshadow_deck(int L,int i,int j)3391 size_vjd_kshadow_deck(int L, int i, int j)
3392 {
3393   float Mb;
3394   int   jp;
3395 
3396   Mb = (float)(sizeof(int *) * (L+1));
3397   for (jp = 0;   jp <= j-i+1; jp++)
3398     Mb += (float) (sizeof(int) * (jp+1));
3399   return Mb / 1000000.;
3400 }
3401 void
free_vjd_kshadow_deck(int ** a,int i,int j)3402 free_vjd_kshadow_deck(int **a, int i, int j)
3403 {
3404   int jp;
3405   /*11.14.05 old line: for (jp = 0; jp <= j-i+1; jp++) if (a[jp+i-1] != NULL) free(a[jp]);*/
3406   for (jp = 0; jp <= j-i+1; jp++) if (a[jp+i-1] != NULL) free(a[jp-i+1]);
3407   free(a);
3408 }
3409 void
free_vjd_shadow_matrix(void *** shadow,CM_t * cm,int i,int j)3410 free_vjd_shadow_matrix(void ***shadow, CM_t *cm, int i, int j)
3411 {
3412   int v;
3413   for (v = 0; v < cm->M; v++)
3414     if (shadow[v] != NULL) {
3415       if (cm->sttype[v] == B_st) free_vjd_kshadow_deck((int **)  shadow[v], i, j);
3416       else                       free_vjd_yshadow_deck((char **) shadow[v], i, j);
3417       shadow[v] = NULL;
3418     }
3419   free(shadow);
3420 }
3421 /*================================================================*/
3422 
3423 
3424 /*################################################################*/
3425 /* Functions: *_vji_*
3426  * Date:     SRE, Sat Aug 12 16:44:55 2000 [Titusville]
3427  *
3428  * Purpose:  Allocation and freeing of 3D matrices and 2D decks
3429  *           in the vji coordinate system. Since these are used
3430  *           only for solving V problems, they work only
3431  *           on a defined cube in the 3D matrix: they need
3432  *           two triplets (r, i0, j0), (z, i1, j1)
3433  *           defining the known optimal endpoints of a segment from
3434  *           an S state to a B state.
3435  *
3436  *           By definition of V problems, there's no B states
3437  *           in between, so the shadow matrix doesn't need any
3438  *           special casting tricks the way the more generally
3439  *           used vjd system does.
3440  */
3441 float **                 /* allocation of a score deck. */
alloc_vji_deck(int i0,int i1,int j1,int j0)3442 alloc_vji_deck(int i0, int i1, int j1, int j0)
3443 {
3444   int status;
3445   float **a;
3446   int     jp;
3447   ESL_DPRINTF3(("#DEBUG: alloc_vji_deck : %.4f\n", size_vji_deck(i0,i1,j1,j0)));
3448   ESL_ALLOC(a, sizeof(float *) * (j0-j1+1));
3449   for (jp = 0; jp <= j0-j1; jp++)
3450     ESL_ALLOC(a[jp], sizeof(float)*(i1-i0+1));
3451   return a;
3452  ERROR:
3453   cm_Fail("Memory allocation error.");
3454   return NULL; /* never reached */
3455 }
3456 float
size_vji_deck(int i0,int i1,int j1,int j0)3457 size_vji_deck(int i0, int i1, int j1, int j0)
3458 {
3459   float Mb;
3460   int   jp;
3461   Mb = (float)(sizeof(float *) * (j0-j1+1));
3462   for (jp = 0; jp <= j0-j1; jp++)
3463     Mb += (float)(sizeof(float)*(i1-i0+1));
3464   return Mb / 1000000.;
3465 }
3466 void			/* free'ing a score deck */
free_vji_deck(float ** a,int j1,int j0)3467 free_vji_deck(float **a, int j1, int j0)
3468 {
3469   int jp;
3470   ESL_DPRINTF3(("#DEBUG: free_vji_deck called\n"));
3471   for (jp = 0; jp <= j0-j1; jp++)
3472     if (a[jp] != NULL) free(a[jp]);
3473   free(a);
3474 }
3475 void
free_vji_matrix(float *** a,int M,int j1,int j0)3476 free_vji_matrix(float ***a, int M, int j1, int j0)
3477 {
3478   int v;
3479   /* Free the whole matrix - even if we used only a subset of
3480    * the decks, all initialization routines init all decks 0..M
3481    * to NULL, so this is safe. (see bug #i2).
3482    */
3483   for (v = 0; v <= M; v++)
3484     if (a[v] != NULL) { free_vji_deck(a[v], j1, j0); a[v] = NULL; }
3485   free(a);
3486 }
3487 char **		        /* allocation of a traceback ptr (shadow matrix) deck */
alloc_vji_shadow_deck(int i0,int i1,int j1,int j0)3488 alloc_vji_shadow_deck(int i0, int i1, int j1, int j0)
3489 {
3490   int status;
3491   char **a;
3492   int     jp;
3493   ESL_ALLOC(a, sizeof(char *) * (j0-j1+1));
3494   for (jp = 0; jp <= j0-j1; jp++)
3495     ESL_ALLOC(a[jp], sizeof(char)*(i1-i0+1));
3496   return a;
3497  ERROR:
3498   cm_Fail("Memory allocation error.");
3499   return NULL; /* never reached */
3500 }
3501 float		        /* allocation of a traceback ptr (shadow matrix) deck */
size_vji_shadow_deck(int i0,int i1,int j1,int j0)3502 size_vji_shadow_deck(int i0, int i1, int j1, int j0)
3503 {
3504   float   Mb;
3505   int     jp;
3506   Mb = (float)(sizeof(char *) * (j0-j1+1));
3507   for (jp = 0; jp <= j0-j1; jp++)
3508     Mb += (float)(sizeof(char)*(i1-i0+1));
3509   return Mb / 1000000;
3510 }
3511 void	                /* free'ing a shadow deck */
free_vji_shadow_deck(char ** a,int j1,int j0)3512 free_vji_shadow_deck(char **a, int j1, int j0)
3513 {
3514   int jp;
3515   for (jp = 0; jp <= j0-j1; jp++)
3516     if (a[jp] != NULL) free(a[jp]);
3517   free(a);
3518 }
3519 void
free_vji_shadow_matrix(char *** a,int M,int j1,int j0)3520 free_vji_shadow_matrix(char ***a, int M, int j1, int j0)
3521 {
3522   int v;
3523   for (v = 0; v < M; v++)
3524     if (a[v] != NULL) { free_vji_shadow_deck(a[v], j1, j0); a[v] = NULL; }
3525   free(a);
3526 }
3527 
3528 
3529 /*################################################################
3530  * Unused code -
3531  *     a reference implementation of the real Outside() algorithm,
3532  *     including bifurcations.
3533  *################################################################*/
3534 #if 0
3535 /* Function: CYKOutside()
3536  * Date:     SRE, Mon Aug  7 07:45:37 2000 [St. Louis]
3537  */
3538 void
3539 CYKOutside(CM_t *cm, ESL_DSQ *dsq, int L, float ***alpha)
3540 {
3541   int      status;
3542   float ***beta;		/* the scoring cube [v=0..M-1][j=0..L][d=0..j]*/
3543   int      v,y,z;		/* indices for states */
3544   int      j,d,i,k;		/* indices in sequence dimensions */
3545   float    sc;			/* a temporary variable holding a score */
3546   struct deckpool_s *dpool;     /* a pool of decks for beta that we can reuse */
3547   int     *touch;               /* keeps track of how many lower decks still need this deck */
3548   float    escore;		/* an emission score, tmp variable */
3549 
3550   /* Allocations and initializations
3551    */
3552   ESL_ALLOC(beta, (sizeof(float **) * cm->M));
3553   for (v = 0; v < cm->M; v++) beta[v] = NULL;
3554 
3555   dpool = deckpool_create();
3556 
3557   ESL_ALLOC(touch, sizeof(int) * cm->M);
3558   for (v = 0; v < cm->M; v++)
3559     if (cm->sttype[v] == B_st) touch[v] = 2;
3560     else                       touch[v] = cm->cnum[v];
3561 
3562   for (j = 0; j <= L; j++)
3563     for (d = 0; d <= j; j++)
3564       beta[0][j][d] = IMPOSSIBLE; /* can prob speed this initialization up */
3565   beta[0][L][L] = 0;
3566 
3567   /* Main loop down through the decks
3568    */
3569   /* EPN bug fix 05.25.06. Durbin et. al. p.287 CM Outside alg uses state
3570    * indices 1..M, with state 1 = ROOT_S, so there's an off-by-one
3571    * w.r.t this implementation. Following loop followed Durbin convention,
3572    * but should follow implemented convention:
3573    * OLD LINE: for (v = 2; v < cm->M; v++)
3574    */
3575   for (v = 1; v < cm->M; v++)
3576     {
3577       /* First we need to fetch a deck of memory to fill in;
3578        * we try to reuse a deck but if one's not available we allocate
3579        * a fresh one.
3580        */
3581       if (! deckpool_pop(dpool, &(beta[v])))
3582 	beta[v] = alloc_vjd_deck(L, 1, L);
3583 
3584       /* main recursion:
3585        */
3586       for (j = L; j >= 0; j--)
3587 	for (d = j; d >= 0; d--)
3588 	  {
3589 	    if (cm->stid[v] == BEGL_S)
3590 	      {
3591 		y = cm->plast[v];	/* the parent bifurcation    */
3592 		z = cm->cnum[y];	/* the other (right) S state */
3593 
3594 		beta[v][j][d] = beta[y][j][d] + alpha[z][j][0]; /* init on k=0 */
3595 		for (k = 1; k <= L-j; k++)
3596 		  if ((sc = beta[y][j+k][d+k] + alpha[z][j+k][k]) > beta[v][j][d])
3597 		    beta[v][j][d] = sc;
3598 	      }
3599 	    else if (cm->stid[v] == BEGR_S)
3600 	      {
3601 		y = cm->plast[v];	        /* the parent bifurcation    */
3602 		z = cm->cfirst[y];	/* the other (left) S state */
3603 
3604 		beta[v][j][d] = beta[y][j][d] + alpha[z][j-d][0];	/* init on k=0 */
3605 		for (k = 1; k <= j-d; k++)
3606 		  if ((sc = beta[y][j][d+k] + alpha[z][j-d][k]) > beta[v][j][d])
3607 		    beta[v][j][d] = sc;
3608 	      }
3609 	    else
3610 	      {
3611 		alpha[v][j][d] = IMPOSSIBLE;
3612 		i = j-d+1;
3613 		for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
3614 		  switch(cm->sttype[j]) {
3615 		  case MP_st:
3616 		    if (d == j || d == j-1) continue; /* boundary condition */
3617 
3618 		    if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
3619 		      escore = cm->esc[y][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
3620 		    else
3621 		      escore = DegeneratePairScore(cm->abc, cm->esc[y], dsq[i-1], dsq[j+1]);
3622 
3623 		    if ((sc = beta[y][j+1][d+2] + cm->tsc[y][v] + escore) > beta[v][j][d])
3624 		      beta[v][j][d] = sc;
3625 		    break;
3626 
3627 		  case ML_st:
3628 		  case IL_st:
3629 		    if (d == j) continue;	/* boundary condition (note when j=0, d=0*/
3630 
3631 		    if (dsq[i-1] < cm->abc->K)
3632 		      escore = cm->esc[y][(int) dsq[i-1]];
3633 		    else
3634 		      escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[y]);
3635 
3636 		    if ((sc = beta[y][j][d+1] + cm->tsc[y][v] + escore) > beta[v][j][d])
3637 		      beta[v][j][d] = sc;
3638 		    break;
3639 
3640 		  case MR_st:
3641 		  case IR_st:
3642 		    if (d == j || j == L) continue;
3643 
3644 		    if (dsq[j+1] < cm->abc->K)
3645 		      escore = cm->esc[y][(int) dsq[j+1]];
3646 		    else
3647 		      escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[y]);
3648 
3649 		    if ((sc = beta[y][j+1][d+1] + cm->tsc[y][v] + escore) > beta[v][j][d])
3650 		      beta[v][j][d] = sc;
3651 		    break;
3652 
3653 		  case B_st:
3654 		  case E_st:
3655 		  case D_st:
3656 		    if ((sc = beta[y][j][d] + cm->tsc[y][v]) > beta[v][j][d])
3657 		      beta[v][j][d] = sc;
3658 		    break;
3659 
3660 		  default: cm_Fail("bogus parent state %d\n", cm->sttype[y]);
3661 		  }/* end switch over states*/
3662 		}
3663 	      }/*ends our handling of beta[v][j][d] */
3664 	    if (beta[v][j][d] < IMPOSSIBLE) beta[v][j][d] = IMPOSSIBLE;
3665 	  }
3666 
3667       /* Finished deck v.
3668        * now worry about reuse of memory in beta:
3669        */
3670       for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--)
3671 	{
3672 	  touch[y]--;
3673 	  if (touch[y] == 0) {
3674 	    deckpool_push(dpool, beta[y]);
3675 	    beta[y] = NULL;
3676 	  }
3677 	}
3678     } /* end loop over decks v. */
3679 
3680   free(touch);
3681   /*dpool*/
3682   /*beta*/
3683   return;
3684  ERROR:
3685   cm_Fail("Memory allocation error.");
3686 }
3687 #endif
3688 
3689 /*################################################################
3690  * The banded dividers and conquerors.
3691  *################################################################*/
3692 
3693 /* Function: generic_splitter_b()
3694  *           EPN 05.19.05
3695  * *based on generic_splitter(), only difference is bands are used :
3696  * Date:     SRE, Sat May 12 15:08:38 2001 [CSHL]
3697  *
3698  * Purpose:  Solve a "generic problem": best parse of
3699  *           a possibly bifurcated subgraph cm^r_z to
3700  *           a substring dsq[i0..j0]. r is usually a start
3701  *           state (S_st) but may be any non-end state type in
3702  *           the case of local alignment begins (ROOT 0->r).
3703  *           z is always an end state (E_st).
3704  *
3705  *           Given: a cm subgraph from r..z
3706  *                  a subsequence from i0..j0
3707  *           Attaches the optimal trace T{r..z}, exclusive of r
3708  *           and inclusive of z, to tr.
3709  *
3710  *           A full divide & conquer never terminates
3711  *           in generic_splitter; the recursion must
3712  *           terminate in v_splitter and wedge_splitter;
3713  *           so we don't test an end-of-recursion boundary.
3714  *
3715  * Args:     cm          - model
3716  *           sq          - digitized sequence 1..L
3717  *           tr          - the traceback we're adding on to.
3718  *           r           - index of the root state of this problem in the model
3719  *           z           - index of an end state (E_st) in the model
3720  *           i0          - start in the sequence (1..L)
3721  *           j0          - end in the sequence (1..L)
3722  *           dmin   - minimum d bound for each state v; [0..v..M-1]
3723  *           dmax   - maximum d bound for each state v; [0..v..M-1]
3724  *
3725  * Returns:  score of the optimal parse of dsq(i0..j0) with cm^r_z
3726  */
3727 static float
generic_splitter_b(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int j0,int * dmin,int * dmax)3728 generic_splitter_b(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
3729 		 int r, int z, int i0, int j0, int *dmin, int *dmax)
3730 {
3731   float ***alpha;
3732   float ***beta;
3733   struct deckpool_s *pool;
3734   int      v,w,y;		/* state indices */
3735   int      wend, yend;		/* indices for end of subgraphs rooted at w,y */
3736   int      jp;			/* j': relative position in subseq, 0..W */
3737   int      W;			/* length of subseq i0..j0 */
3738   float    sc;			/* tmp variable for a score */
3739   int      j,d,k;		/* sequence indices */
3740   float    best_sc;		/* optimal score at the optimal split point */
3741   int      best_k;		/* optimal k for the optimal split */
3742   int      best_d;		/* optimal d for the optimal split */
3743   int      best_j;		/* optimal j for the optimal split */
3744   int      tv;			/* remember the position of a bifurc in the trace. */
3745   int      b1,b2;		/* argmax_v for 0->v local begin transitions */
3746   float    b1_sc, b2_sc;	/* max_v scores for 0->v local begin transitions */
3747 
3748   /* 1. If the generic problem is small enough, solve it with insideT,
3749    *    and append the trace to tr.
3750    */
3751   if (insideT_size(cm, L, r, z, i0, j0) < RAMLIMIT) {
3752     ESL_DPRINTF2(("#DEBUG: Solving a generic w/ insideT - G%d[%s]..%d[%s], %d..%d\n",
3753 		  r, UniqueStatetype(cm->stid[r]),
3754 		  z, UniqueStatetype(cm->stid[z]),
3755 		  i0, j0));
3756     sc = insideT(cm, dsq, L, tr, r, z, i0, j0, (r==0), dmin, dmax);
3757     return sc;
3758   }
3759 
3760   /* 2. Traverse down from r, find first bifurc.
3761    *    The lowest a bifurc could be: B-S-E/S-IL-E = vend-5
3762    *
3763    */
3764   for (v = r; v <= z-5; v++)
3765     if (cm->sttype[v] == B_st) break; /* found the first bifurcation, now v */
3766 
3767   /* 3. If there was no bifurcation, this is a wedge problem; solve it
3768    *    with wedge_splitter.
3769    */
3770   if (v > z-5) {		/* no bifurc? it's a wedge problem  */
3771     if (cm->sttype[z] != E_st) cm_Fail("inconceivable.");
3772     sc = wedge_splitter_b(cm, dsq, L, tr, r, z, i0, j0, dmin, dmax);
3773     return sc;
3774   }
3775 
3776   /* Set up the state quartet r,v,w,y for a divide and conquer
3777    * solution of the generic problem.
3778    */
3779   w = cm->cfirst[v];		/* index of left S  */
3780   y = cm->cnum[v];		/* index right S    */
3781   if (w < y) { wend = y-1; yend = z; }
3782   else       { yend = w-1; wend = z; }
3783 
3784   /* Calculate alpha[w] deck and alpha[y] deck.
3785    * We also get b1: best choice for 0->b local begin. b1_sc is the score if we do this.
3786    * Analogous for b2, b2_sc on the other side.
3787    */
3788   inside_b(cm, dsq, L, w, wend, i0, j0, BE_EFFICIENT, NULL,  &alpha, NULL, &pool, NULL,
3789 	   (r==0), &b1, &b1_sc, dmin, dmax);
3790   inside_b(cm, dsq, L, y, yend, i0, j0, BE_EFFICIENT, alpha, &alpha, pool, &pool, NULL,
3791 	   (r==0), &b2, &b2_sc, dmin, dmax);
3792 
3793   /* Calculate beta[v] deck (stick it in alpha). Let the pool get free'd.
3794    * (If we're doing local alignment, deck M is the beta[EL] deck.)
3795    */
3796   outside_b(cm, dsq, L, r, v, i0, j0, BE_EFFICIENT, alpha, &beta, pool, NULL, dmin, dmax);
3797 
3798   /* Find the optimal split at the B.
3799    */
3800   W = j0-i0+1;
3801   best_sc = IMPOSSIBLE;
3802   for (jp = 0; jp <= W; jp++)
3803     {
3804       j = i0-1+jp;
3805       /* Bands used */
3806       /* old line : for (d = 0; d <= jp; d++) */
3807       for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
3808 	for (k = 0; k <= d; k++)
3809 	  if ((sc = alpha[w][j-k][d-k] + alpha[y][j][k] + beta[v][j][d]) > best_sc)
3810 	    {
3811 	      best_sc = sc;
3812 	      best_k  = k;
3813 	      best_j  = j;
3814 	      best_d  = d;
3815 	    }
3816     }
3817 
3818   /* Local alignment only: maybe we're better off in EL?
3819    */
3820   if (cm->flags & CMH_LOCAL_END) {
3821     for (jp = 0; jp <= W; jp++)
3822       {
3823 	j = i0-1+jp;
3824 	/* There is no band on the EL state */
3825 	for (d = 0; d <= jp; d++)
3826 	  if ((sc = beta[cm->M][j][d]) > best_sc) {
3827 	    best_sc = sc;
3828 	    best_k  = -1;	/* special flag for local end, EL. */
3829 	    best_j  = j;
3830 	    best_d  = d;
3831 	  }
3832       }
3833   }
3834 
3835   /* Local alignment only: maybe we're better off in ROOT?
3836    */
3837   if (r == 0 && cm->flags & CMH_LOCAL_BEGIN) {
3838     if (b1_sc > best_sc) {
3839       best_sc = b1_sc;
3840       best_k  = -2;		/* flag for using local begin into left wedge w..wend */
3841       best_j  = j0;
3842       best_d  = W;
3843     }
3844     if (b2_sc > best_sc) {
3845       best_sc = b2_sc;
3846       best_k  = -3;		/* flag for using local begin into right wedge y..yend */
3847       best_j  = j0;
3848       best_d  = W;
3849     }
3850   }
3851 
3852   /* Free now, before recursing.
3853    * The two alpha matrices and the beta matrix
3854    * actually all point to the same memory, since no
3855    * decks in Inside and Outside needed to overlap.
3856    * Free 'em all in one call.
3857    */
3858   free_vjd_matrix(alpha, cm->M, i0, j0);
3859 
3860   /* If we're in EL, instead of B, the optimal alignment is entirely
3861    * in a V problem that's still above us. The TRUE flag sets useEL.
3862    */
3863   if (best_k == -1) {
3864     v_splitter_b(cm, dsq, L, tr, r, v, i0, best_j-best_d+1, best_j, j0, TRUE, dmin, dmax);
3865     return best_sc;
3866   }
3867 
3868   /* Else: if we're in the root 0, we know which r we did our local begin into.
3869    * We have a generic problem rooted there. The FALSE flag disallows
3870    * any further local begins.
3871    */
3872   if (best_k == -2) {
3873     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b1);
3874     z = CMSubtreeFindEnd(cm, b1);
3875     generic_splitter_b(cm, dsq, L, tr, b1, z, i0, j0, dmin, dmax);
3876     return best_sc;
3877   }
3878   if (best_k == -3) {
3879     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b2);
3880     z = CMSubtreeFindEnd(cm, b2);
3881     generic_splitter_b(cm, dsq, L, tr, b2, z, i0, j0, dmin, dmax);
3882     return best_sc;
3883   }
3884 
3885   /* Else (the usual case), ok, we did use B in the optimal split.
3886    * Split now into a V problem and two generic problems, and recurse
3887    * left fragment: i1 = j-d+1, j1 = j-k, vroot = w, vend = wend
3888    * right frag:    i2 = j-k+1, j2 = j,   vroot = y, vend = yend
3889    *
3890    * The problems must be solved in a particular order, since we're
3891    * constructing the trace in a postorder traversal.
3892    */
3893   ESL_DPRINTF2(("#DEBUG: Generic splitter:\n"));
3894   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
3895 		r, UniqueStatetype(cm->stid[r]),
3896 		v, UniqueStatetype(cm->stid[v]),
3897 		i0, best_j-best_d+1, best_j, j0));
3898   ESL_DPRINTF2(("#DEBUG:    generic: G%d[%s]..%d[%s], %d..%d\n",
3899 		w,    UniqueStatetype(cm->stid[w]),
3900 		wend, UniqueStatetype(cm->stid[wend]),
3901 		best_j-best_d+1, best_j-best_k));
3902   ESL_DPRINTF2(("#DEBUG:    generic: G%d[%s]..%d[%s], %d..%d\n",
3903 		y,    UniqueStatetype(cm->stid[y]),
3904 		yend, UniqueStatetype(cm->stid[yend]),
3905 		best_j-best_k+1, best_j));
3906 
3907   v_splitter_b(cm, dsq, L, tr, r, v, i0, best_j-best_d+1, best_j, j0, FALSE, dmin, dmax);
3908   tv = tr->n-1;
3909 
3910   InsertTraceNode(tr, tv, TRACE_LEFT_CHILD, best_j-best_d+1, best_j-best_k, w);
3911   generic_splitter_b(cm, dsq, L, tr, w, wend, best_j-best_d+1, best_j-best_k, dmin, dmax);
3912   InsertTraceNode(tr, tv, TRACE_RIGHT_CHILD, best_j-best_k+1, best_j, y);
3913   generic_splitter_b(cm, dsq, L, tr, y, yend, best_j-best_k+1, best_j, dmin, dmax);
3914 
3915   return best_sc;
3916 }
3917 
3918 /* Function: wedge_splitter_b()
3919  *           EPN 05.19.05
3920  * *based on wedge_splitter(), only difference is bands are used :
3921  * Date:     SRE, Sun May 13 08:44:15 2001 [CSHL genome mtg]
3922  *
3923  * Purpose:  Solve a "wedge problem": best parse of an
3924  *           unbifurcated subgraph cm^r..z to a substring
3925  *           dsq[i0..j0]. r may be a start state (when
3926  *           the wedge problem comes from being a special case
3927  *           of a generic problem) or a non-insert state
3928  *           (D, MP, ML, MR) (when the wedge comes from a
3929  *           previous wedge_splitter), or indeed, any non-end
3930  *           state (when wedge comes from a local begin).
3931  *           z, however, is always an end state.
3932  *
3933  *           Attaches the optimal trace T(r..z), exclusive
3934  *           of r and inclusive of z, to the growing trace tr.
3935  *
3936  *           Deal with a divide and conquer boundary condition:
3937  *           the next non-insert state after r is the end state z.
3938  *           All remaining sequence of i0..j0 that r doesn't emit
3939  *           must be dealt with by insert states.
3940  *
3941  * Args:     cm          - model
3942  *           dsq         - digitized sequence 1..L
3943  *           L           - length of dsq
3944  *           tr          - the traceback we're adding on to.
3945  *           r           - index of the first state in the subgraph
3946  *           z           - index of an end state (E_st) in the model
3947  *           i0          - start in the sequence (1..L)
3948  *           j0          - end in the sequence (1..L)
3949  *           dmin   - minimum d bound for each state v; [0..v..M-1]
3950  *           dmax   - maximum d bound for each state v; [0..v..M-1]
3951  *
3952  * Returns:  The score of the best parse in bits.
3953  */
3954 static float
wedge_splitter_b(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int j0,int * dmin,int * dmax)3955 wedge_splitter_b(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr, int r, int z, int i0, int j0,
3956 		 int *dmin, int *dmax)
3957 {
3958   float ***alpha;
3959   float ***beta;
3960   struct deckpool_s *pool;
3961   float sc;
3962   float best_sc;
3963   int   v,w,y;
3964   int   W;
3965   int   d, jp, j;
3966   int   best_v, best_d, best_j;
3967   int   midnode;
3968   int   b;	/* optimal local begin: b = argmax_v alpha_v(i0,j0) + t_0(v) */
3969   float bsc;	/* score for optimal local begin      */
3970 
3971   /* 1. If the wedge problem is either a boundary condition,
3972    *    or small enough, solve it with inside^T and append
3973    *    the trace to tr.
3974    *    It's formally possible that someone could set RAMLIMIT
3975    *    to something so small that even the boundary condition
3976    *    couldn't be done with inside^T - but that'd be a silly
3977    *    thing to do, so we ignore RAMLIMIT in that case.
3978    */
3979   if (cm->ndidx[z] == cm->ndidx[r] + 1 ||
3980       insideT_size(cm, L, r, z, i0, j0) < RAMLIMIT)
3981     {
3982       ESL_DPRINTF2(("#DEBUG: Solving a wedge:   G%d[%s]..%d[%s], %d..%d\n",
3983 		r, UniqueStatetype(cm->stid[r]),
3984 		z, UniqueStatetype(cm->stid[z]),
3985 		i0,j0));
3986       sc = insideT(cm, dsq, L, tr, r, z, i0, j0, (r==0), dmin, dmax);
3987       return sc;
3988     }
3989 
3990   /* 2. Find our split set, w..y
3991    *    We choose the node in the middle.
3992    *    This can't be a BIF_nd (we're a wedge), or an END_nd (midnode
3993    *    can't be z) but it could be any other node including
3994    *    begin nodes (i.e. it might be that w==y).
3995    */
3996   midnode = cm->ndidx[r] + ((cm->ndidx[z] - cm->ndidx[r]) / 2);
3997   w = cm->nodemap[midnode];
3998   y = cm->cfirst[w]-1;
3999 
4000   /* 3. Calculate inside up to w, and outside down to y.
4001    *    We rely on a side effect of how deallocation works
4002    *    in these routines; the w..y decks are guaranteed
4003    *    to be retained.
4004    *    b will contain the optimal 0->v state for a local begin, and bsc
4005    *    is the score for using it.
4006    *    beta[cm->M] will contain the EL deck, if needed for local ends.
4007    */
4008   inside_b(cm, dsq, L, w, z, i0, j0, BE_EFFICIENT,
4009 	   NULL, &alpha, NULL, &pool, NULL,
4010 	   (r==0), &b, &bsc, dmin, dmax);
4011   outside_b(cm, dsq, L, r, y, i0, j0, BE_EFFICIENT, NULL, &beta, pool, NULL,
4012   dmin, dmax);
4013 
4014   /* 4. Find the optimal split at the split set: best_v, best_d, best_j
4015    */
4016   W = j0-i0+1;
4017   best_sc = IMPOSSIBLE;
4018   for (v = w; v <= y; v++)
4019     for (jp = 0; jp <= W; jp++)
4020       {
4021 	j = i0-1+jp;
4022 	for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
4023 	  if ((sc = alpha[v][j][d] + beta[v][j][d]) > best_sc)
4024 	    {
4025 	      best_sc = sc;
4026 	      best_v  = v;
4027 	      best_d  = d;
4028 	      best_j  = j;
4029 	    }
4030       }
4031 
4032   /* Local alignment ends only: maybe we're better off in EL,
4033    * not in the split set?
4034    */
4035   if (cm->flags & CMH_LOCAL_END) {
4036     for (jp = 0; jp <= W; jp++)
4037       {
4038 	j = i0-1+jp;
4039 	/* There is no band on the EL state */
4040 	for (d = 0; d <= jp; d++)
4041 	  if ((sc = beta[cm->M][j][d]) > best_sc) {
4042 	    best_sc = sc;
4043 	    best_v  = -1;	/* flag for local alignment. */
4044 	    best_j  = j;
4045 	    best_d  = d;
4046 	  }
4047       }
4048   }
4049 
4050   /* Local alignment begins only: maybe we're better off in the root.
4051    */
4052   if (r==0 && (cm->flags & CMH_LOCAL_BEGIN)) {
4053     if (bsc > best_sc) {
4054       best_sc = bsc;
4055       best_v  = -2;		/* flag for local alignment */
4056       best_j  = j0;
4057       best_d  = W;
4058     }
4059   }
4060 
4061   /* free now, before recursing!
4062    */
4063   free_vjd_matrix(alpha, cm->M, i0, j0);
4064   free_vjd_matrix(beta,  cm->M, i0, j0);
4065 
4066   /* If we're in EL, instead of the split set, the optimal alignment
4067    * is entirely in a V problem that's still above us. The TRUE
4068    * flag sets useEL. It doesn't matter which state in the split
4069    * set w..y we use as the end of the graph; vinside() will have to
4070    * initialize the whole thing to IMPOSSIBLE anyway.
4071    */
4072   if (best_v == -1) {
4073     v_splitter_b(cm, dsq, L, tr, r, w, i0, best_j-best_d+1, best_j, j0, TRUE, dmin, dmax);
4074     return best_sc;
4075   }
4076 
4077   /* If we're in the root because of a local begin, the local alignment
4078    * is entirely in a wedge problem that's still below us, rooted at b.
4079    * The FALSE flag prohibits any more local begins in this and subsequent
4080    * problems.
4081    */
4082   if (best_v == -2) {
4083     InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b);
4084     wedge_splitter_b(cm, dsq, L, tr, b, z, i0, j0, dmin, dmax);
4085     return best_sc;
4086   }
4087 
4088   /* Else (usual case): the optimal split into a V problem and a wedge problem:
4089    *    i1 = best_j-best_d+1, j1 = best_j
4090    *    the V problem:     r..v, i0..i1, j1..j0
4091    *    the wedge problem: v..z, i1..j1
4092    *
4093    *    These have to solved in the order given because we're
4094    *    constructing the trace in postorder traversal.
4095    */
4096   ESL_DPRINTF2(("#DEBUG: Wedge splitter:\n"));
4097   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
4098 		r, UniqueStatetype(cm->stid[r]),
4099 		best_v, UniqueStatetype(cm->stid[best_v]),
4100 		i0, best_j-best_d+1, best_j, j0));
4101   ESL_DPRINTF2(("#DEBUG:    wedge:   G%d[%s]..%d[%s], %d..%d\n",
4102 		best_v, UniqueStatetype(cm->stid[best_v]),
4103 		z, UniqueStatetype(cm->stid[z]),
4104 		best_j-best_d+1, best_j));
4105 
4106   v_splitter_b(cm, dsq, L, tr, r, best_v, i0, best_j-best_d+1, best_j, j0, FALSE,
4107 	       dmin, dmax);
4108   wedge_splitter_b(cm, dsq, L, tr, best_v, z, best_j-best_d+1, best_j, dmin, dmax);
4109   return best_sc;
4110 }
4111 
4112 
4113 
4114 /* Function: vsplitter_b()
4115  *           EPN 05.19.05
4116  * *based on vsplitter(), only difference is bands are used :
4117  *
4118  * Date:     SRE, Thu May 31 19:47:57 2001 [Kaldi's]
4119  *
4120  * Purpose:  Solve a "V problem": best parse of an unbifurcated
4121  *           subgraph cm^r..z to a one-hole subsequence
4122  *           i0..i1 // j1..j0.
4123  *
4124  *           Attaches the optimal trace T(r..z), exclusive of
4125  *           r, inclusive of z, to the growing trace tr.
4126  *
4127  *           r and z can be any non-insert state.
4128  *
4129  * Args:     cm          -  model
4130  *           dsq         - digitized sequence 1..L
4131  *           L           - length of dsq
4132  *           tr          - the traceback we're adding on to.
4133  *           r           - index of the first state in the subgraph
4134  *           z           - index of the last state in the subgraph
4135  *           i0,i1       - first part of the subsequence (1..L)
4136  *           j1,j0       - second part of the subsequence (1..L)
4137  *           useEL       - TRUE if i1,j1 aligned to EL, not z
4138  *           dmin        - minimum d bound for each state v; [0..v..M-1]
4139  *           dmax        - maximum d bound for each state v; [0..v..M-1]
4140  *
4141  * Returns:  (void)
4142  */
4143 static void
v_splitter_b(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int i1,int j1,int j0,int useEL,int * dmin,int * dmax)4144 v_splitter_b(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
4145 	   int r, int z, int i0, int i1, int j1, int j0,
4146 	   int useEL, int *dmin, int *dmax)
4147 {
4148   float ***alpha, ***beta;      /* inside and outside matrices */
4149   struct deckpool_s *pool;      /* pool for holding alloced decks */
4150   float sc;			/* tmp variable holding a score */
4151   int   v,w,y;			/* state indexes */
4152   int   ip,jp;
4153   int   best_v;
4154   int   best_i, best_j;		/* optimal i', j' split point */
4155   float best_sc;		/* score at optimal split point */
4156   int   midnode;
4157   int   b;			/* optimal choice for a 0->b local begin  */
4158   float bsc;			/* score if we use the local begin */
4159   int     *imin;                /* minimum i bound for each state v; [0..y-w]
4160                                  * calculated using *dmin; offset from v, the
4161 				 * band that corresponds to state v, is imin[v-w] */
4162   int     *imax;                /* maximum i bound for each state v; [0..y-w]
4163                                  * calculated using *dmax; offset from v, the
4164 				 * band that corresponds to state v, is imax[v-w] */
4165 
4166 
4167   /* 1. If the V problem is either a boundary condition, or small
4168    *    enough, solve it with v_inside^T and append the trace to tr.
4169    *    (With local alignment, we might even see a lone B state
4170    *     get handed to v_splitter(); hence the r==z case.)
4171    */
4172    if (cm->ndidx[z] == cm->ndidx[r] + 1 || r == z ||
4173       vinsideT_size(cm, r, z, i0, i1, j1, j0) < RAMLIMIT)
4174     {
4175       ESL_DPRINTF2(("#DEBUG: Solving a V:   G%d[%s]..%d[%s], %d..%d//%d..%d\n",
4176 		r, UniqueStatetype(cm->stid[r]),
4177 		z, UniqueStatetype(cm->stid[z]),
4178 		i0,j1,j1,j0));
4179       vinsideT(cm, dsq, L, tr, r, z, i0, i1, j1, j0, useEL, (r==0), dmin, dmax);
4180       return;
4181     }
4182 
4183   /* 2. Find our split set, w..y.
4184    *    Choose the node in the middle.
4185    */
4186   midnode = cm->ndidx[r] + ((cm->ndidx[z] - cm->ndidx[r]) / 2);
4187   w = cm->nodemap[midnode];
4188   y = cm->cfirst[w]-1;
4189 
4190   /* 3. Calculate v_inside up to w, and v_outside down to y.
4191    *    As with wedge_splitter(), we rely on a side effect of how
4192    *    deallocation works, so the w..y decks are retained
4193    *    in alpha and beta even though we're in small memory mode.
4194    *    beta[cm->M] is the EL deck, needed for local ends.
4195    */
4196   vinside_b (cm, dsq, L, w, z, i0, i1, j1, j0, useEL, BE_EFFICIENT,
4197 	     NULL, &alpha, NULL, &pool, NULL, (r==0), &b, &bsc,
4198 	     dmin, dmax);
4199   voutside_b(cm, dsq, L, r, y, i0, i1, j1, j0, useEL, BE_EFFICIENT,
4200 	     NULL, &beta,  pool, NULL, dmin, dmax);
4201 
4202   /* 4. Find the optimal split: v, ip, jp.
4203    */
4204   /* Bands used ip 1A */
4205   imin = malloc(sizeof (int) * (y-w+1));
4206   imax = malloc(sizeof (int) * (y-w+1));
4207 
4208   best_sc = IMPOSSIBLE;
4209   for (v = w; v <= y; v++)
4210     {
4211       /* Bands used ip 1B */
4212 
4213       /* Fill imin[v-w] and imax[v-w] as we go, one of many ways to handle imin and imax
4214        * Remember state indices in imin and imax are offset from v because imin and
4215        * imax run [0..y-w], ==> dmin[v] corresponds to imin[v-w]
4216        */
4217 
4218       imin[v-w] = j1-i0-dmax[v]+1;
4219       imax[v-w] = j1-i0-dmin[v]+1;
4220 
4221       /*orig lines : for (ip = 0; ip <= i1-i0; ip++)
4222        *                    for (jp = 0; jp <= j0-j1; jp++)
4223        *the order is switched here because the band on ip depends
4224        *on jp.
4225        */
4226       for (jp = 0; jp <= j0-j1; jp++)
4227 	{
4228 	  if((imin[v-w]+jp) < 0) ip = 0;
4229 	  else ip = imin[v-w]+jp;
4230 	  for (; (ip <= imax[v-w]+jp) && ip <= (i1-i0); ip++)
4231 	    if ((sc = alpha[v][jp][ip] + beta[v][jp][ip]) > best_sc)
4232 	      {
4233 		best_sc = sc;
4234 		best_v  = v;
4235 		best_i  = ip + i0;
4236 		best_j  = jp + j1;
4237 	      }
4238 	}
4239     }
4240   /* Local alignment ends: maybe we're better off in EL, not
4241    * the split set?
4242    */
4243   if (useEL && (cm->flags & CMH_LOCAL_END)) {
4244     /* There is no band on the EL state */
4245     for (ip = 0; ip <= i1-i0; ip++)
4246       for (jp = 0; jp <= j0-j1; jp++)
4247 	if ((sc = beta[cm->M][jp][ip]) > best_sc) {
4248 	  best_sc = sc;
4249 	  best_v  = -1;
4250 	  best_i  = ip + i0;
4251 	  best_j  = jp + j1;
4252 	}
4253   }
4254 
4255   /* Local alignment begins: maybe we're better off in root...
4256    */
4257   if (r==0 && (cm->flags & CMH_LOCAL_BEGIN)) {
4258     if (bsc > best_sc) {
4259       best_sc = bsc;
4260       best_v  = -2;
4261       best_i  = i0;
4262       best_j  = j0;
4263     }
4264   }
4265 
4266   /* Free now, before recursing!
4267    */
4268   free_vji_matrix(alpha, cm->M, j1, j0);
4269   free_vji_matrix(beta,  cm->M, j1, j0);
4270 
4271   /* If we're in EL, instead of the split set, the optimal
4272    * alignment is entirely in a V problem that's still above us.
4273    * The TRUE flag sets useEL; we propagate allow_begin.
4274    */
4275   if (best_v == -1) {
4276     v_splitter_b(cm, dsq, L, tr, r, w, i0, best_i, best_j, j0, TRUE, dmin, dmax);
4277     return;
4278   }
4279 
4280   /* If we used a local begin, the optimal alignment is
4281    * entirely in a V problem that's still below us, rooted
4282    * at b, for the entire one-hole sequence. The FALSE
4283    * flag prohibits more local begin transitions; we propagate
4284    * useEL.
4285    */
4286   if (best_v == -2) {
4287     if (b != z)
4288       InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i0, j0, b);
4289     v_splitter_b(cm, dsq, L, tr, b, z, i0, i1, j1, j0, useEL, dmin, dmax);
4290     return;
4291   }
4292 
4293   /* The optimal split into two V problems:
4294    *    V:   r..v, i0..i', j'..j0
4295    *    V:   v..z, i'..i1, j1..j'
4296    * Solve in this order, because we're constructing the
4297    * trace in postorder traversal.
4298    */
4299   ESL_DPRINTF2(("#DEBUG: V splitter:\n"));
4300   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
4301 		r, UniqueStatetype(cm->stid[r]),
4302 		best_v, UniqueStatetype(cm->stid[best_v]),
4303 		i0, best_i, best_j, j0));
4304   ESL_DPRINTF2(("#DEBUG:    V:       G%d[%s]..%d[%s], %d..%d//%d..%d\n",
4305 		best_v, UniqueStatetype(cm->stid[best_v]),
4306 		z, UniqueStatetype(cm->stid[z]),
4307 		best_i, i1, j1, best_j));
4308 
4309   v_splitter_b(cm, dsq, L, tr, r,      best_v, i0,     best_i, best_j, j0, FALSE,
4310 	       dmin, dmax);
4311   v_splitter_b(cm, dsq, L, tr, best_v, z,      best_i, i1,     j1,     best_j, useEL,
4312 	       dmin, dmax);
4313 
4314   free(imax);
4315   free(imin);
4316   return;
4317 }
4318 
4319 
4320 /*****************************************************************
4321  * The alignment engines, using bands:
4322  *     inside_b   - given generic or wedge problem G^r_z to i0..j0, return score and matrix
4323  *     outside_b  - given unbifurcated G^r_z to i0..j0, return matrix
4324  *
4325  *     vinside_b  - given V problem G^r_z to i0..i1//j1..j0, return score and matrix
4326  *     voutside_b - given unbifurcated G^r_z to i0..i1//j1..j0, return matrix
4327  ******************************************************************/
4328 
4329 /* Function: inside_b()
4330  *           EPN 05.19.05
4331  * *based on inside(), only difference is bands are used :
4332  * Date:     SRE, Mon Aug  7 13:15:37 2000 [St. Louis]
4333  *
4334  * Purpose:  (See inside())
4335  *
4336  * Args:     cm        - the model    [0..M-1]
4337  *           dsq       - the sequence [1..L]
4338  *           L         - length of the dsq
4339  *           vroot     - first start state of subtree (0, for whole model)
4340  *           vend      - last end state of subtree (cm->M-1, for whole model)
4341  *           i0        - first position in subseq to align (1, for whole seq)
4342  *           j0        - last position in subseq to align (L, for whole seq)
4343  *           do_full   - if TRUE, we save all the decks in alpha, instead of
4344  *                       working in our default memory-efficient mode where
4345  *                       we reuse decks and only the uppermost deck (vroot) is valid
4346  *                       at the end.
4347  *           alpha     - if non-NULL, this is an existing matrix, with NULL
4348  *                       decks for vroot..vend, and we'll fill in those decks
4349  *                       appropriately instead of creating a new matrix
4350  *           ret_alpha - if non-NULL, return the matrix with one or more
4351  *                       decks available for examination (see "do_full")
4352  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
4353  *                       but usually containing one or more allocated decks sized
4354  *                       for this subsequence i0..j0.
4355  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
4356  *                       *only* be valid on exactly the same i0..j0 subseq,
4357  *                       because of the size of the subseq decks.
4358  *           ret_shadow- if non-NULL, the caller wants a shadow matrix, because
4359  *                       he intends to do a traceback.
4360  *           allow_begin- TRUE to allow 0->b local alignment begin transitions.
4361  *           ret_b     - best local begin state, or NULL if unwanted
4362  *           ret_bsc   - score for using ret_b, or NULL if unwanted
4363  *           dmin   - minimum d bound for each state v; [0..v..M-1]
4364  *           dmax   - maximum d bound for each state v; [0..v..M-1]
4365  *
4366  * Returns: Score of the optimal alignment.
4367  */
4368 static float
inside_b(CM_t * cm,ESL_DSQ * dsq,int L,int vroot,int vend,int i0,int j0,int do_full,float *** alpha,float **** ret_alpha,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool,void **** ret_shadow,int allow_begin,int * ret_b,float * ret_bsc,int * dmin,int * dmax)4369 inside_b(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0, int do_full,
4370 	 float ***alpha, float ****ret_alpha,
4371 	 struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
4372 	 void ****ret_shadow,
4373 	 int allow_begin, int *ret_b, float *ret_bsc,
4374 	 int *dmin, int *dmax)
4375 {
4376   int      status;
4377   float  **end;         /* we re-use the end deck. */
4378   int      nends;       /* counter that tracks when we can release end deck to the pool */
4379   int     *touch;       /* keeps track of how many higher decks still need this deck */
4380   int      v,y,z;	/* indices for states  */
4381   int      j,d,i,k;	/* indices in sequence dimensions */
4382   float    sc;		/* a temporary variable holding a score */
4383   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
4384   int      W;		/* subsequence length */
4385   int      jp;		/* j': relative position in the subsequence  */
4386   void  ***shadow;      /* shadow matrix for tracebacks */
4387   int    **kshad;       /* a shadow deck for bifurcations */
4388   char   **yshad;       /* a shadow deck for every other kind of state */
4389   int      b;		/* best local begin state */
4390   float    bsc;		/* score for using the best local begin state */
4391   int      kmax;        /* for B_st's, maximum k value consistent with bands*/
4392 
4393   /* Allocations and initializations
4394    */
4395   b   = -1;
4396   bsc = IMPOSSIBLE;
4397   W   = j0-i0+1;		/* the length of the subsequence -- used in many loops  */
4398 				/* if caller didn't give us a deck pool, make one */
4399   if (dpool == NULL) dpool = deckpool_create();
4400   if (! deckpool_pop(dpool, &end))
4401     end = alloc_vjd_deck(L, i0, j0);
4402   nends = CMSubtreeCountStatetype(cm, vroot, E_st);
4403   for (jp = 0; jp <= W; jp++) {
4404     j = i0+jp-1;		/* e.g. j runs from 0..L on whole seq */
4405     end[j][0] = 0.;
4406     for (d = 1; d <= jp; d++) end[j][d] = IMPOSSIBLE;
4407   }
4408 
4409   /* if caller didn't give us a matrix, make one.
4410    * It's important to allocate for M+1 decks (deck M is for EL, local
4411    * alignment) - even though Inside doesn't need EL, Outside does,
4412    * and we might reuse this memory in a call to Outside.
4413    */
4414   if (alpha == NULL) {
4415     ESL_ALLOC(alpha, sizeof(float **) * (cm->M+1));
4416     for (v = 0; v <= cm->M; v++) alpha[v] = NULL;
4417   }
4418 
4419   ESL_ALLOC(touch,  sizeof(int) * cm->M);
4420   for (v = 0;     v < vroot; v++) touch[v] = 0;
4421   for (v = vroot; v <= vend; v++) touch[v] = cm->pnum[v];
4422   for (v = vend+1;v < cm->M; v++) touch[v] = 0;
4423 
4424   /* The shadow matrix, if caller wants a traceback.
4425    * We do some pointer tricks here to save memory. The shadow matrix
4426    * is a void ***. Decks may either be char ** (usually) or
4427    * int ** (for bifurcation decks). Watch out for the casts.
4428    * For most states we only need
4429    * to keep y as traceback info, and y <= 6. For bifurcations,
4430    * we need to keep k, and k <= L, and L might be fairly big.
4431    * (We could probably limit k to an unsigned short ... anyone
4432    * aligning an RNA > 65536 would need a big computer... but
4433    * we'll hold off on that for now. We could also pack more
4434    * traceback pointers into a smaller space since we only really
4435    * need 3 bits, not 8.)
4436    */
4437   if (ret_shadow != NULL) {
4438     ESL_ALLOC(shadow, sizeof(void **) * cm->M);
4439     for (v = 0; v < cm->M; v++) shadow[v] = NULL;
4440   }
4441 
4442   /* Main recursion
4443    */
4444   for (v = vend; v >= vroot; v--)
4445     {
4446       /* First we need a deck to fill in.
4447        * 1. if we're an E, reuse the end deck (and it's already calculated)
4448        * 2. else, see if we can take something from the pool
4449        * 3. else, allocate a new deck.
4450        */
4451       if (cm->sttype[v] == E_st) {
4452 	alpha[v] = end; continue;
4453       }
4454       if (! deckpool_pop(dpool, &(alpha[v])))
4455 	alpha[v] = alloc_vjd_deck(L, i0, j0);
4456 
4457       if (ret_shadow != NULL) {
4458 	if (cm->sttype[v] == B_st) {
4459 	  kshad     = alloc_vjd_kshadow_deck(L, i0, j0);
4460 	  shadow[v] = (void **) kshad;
4461 	} else {
4462 	  yshad     = alloc_vjd_yshadow_deck(L, i0, j0);
4463 	  shadow[v] = (void **) yshad;
4464 	}
4465       }
4466 
4467       /* Impose bands by setting all cells outside the bands to 0
4468        * This is independent of state type so we do it outside
4469        * the following set of if then statements.
4470        */
4471 
4472       for (jp = 0; jp <= W; jp++) {
4473 	j = i0-1+jp;
4474 	for (d = 0; d < dmin[v] && d <= jp; d++)
4475 	  alpha[v][j][d] = IMPOSSIBLE;
4476 	for (d = dmax[v]+1; d <= jp;     d++)
4477 	  alpha[v][j][d] = IMPOSSIBLE;
4478       }
4479 
4480       if (cm->sttype[v] == D_st || cm->sttype[v] == S_st)
4481 	{
4482 	  for (jp = 0; jp <= W; jp++) {
4483 	    j = i0-1+jp;
4484 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
4485 	      {
4486 		y = cm->cfirst[v];
4487 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
4488 		/* treat EL as emitting only on self transition */
4489 		if (ret_shadow != NULL) yshad[j][d]  = USED_EL;
4490 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
4491 		  if ((sc = alpha[y+yoffset][j][d] + cm->tsc[v][yoffset]) >  alpha[v][j][d]) {
4492 		    alpha[v][j][d] = sc;
4493 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
4494 		  }
4495 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
4496 	      }
4497 	  }
4498 	}
4499       else if (cm->sttype[v] == B_st)
4500 	{
4501 	  for (jp = 0; jp <= W; jp++) {
4502 	    j = i0-1+jp;
4503 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
4504 	      {
4505 		y = cm->cfirst[v];
4506 		z = cm->cnum[v];
4507 		/* Careful, in qdb, we only want to look at alpha cells that are
4508 		 * within the bands for all states involved (v, y and z) */
4509 		/* k is the length of the right fragment */
4510 		if(dmin[z] > (d-dmax[y])) k = dmin[z];
4511 		else k = d-dmax[y];
4512 		if(k < 0) k = 0;
4513 
4514 		if(dmax[z] < (d-dmin[y])) kmax = dmax[z];
4515 		else kmax = d-dmin[y];
4516 
4517 		if(k <= kmax)
4518 		  {
4519 		    alpha[v][j][d] = alpha[y][j-k][d-k] + alpha[z][j][k];
4520 		    if (ret_shadow != NULL) kshad[j][d] = k;
4521 		    for (k=k+1; k <= kmax; k++)
4522 		      {
4523 			if ((sc = alpha[y][j-k][d-k] + alpha[z][j][k]) > alpha[v][j][d]) {
4524 			  alpha[v][j][d] = sc;
4525 			  if (ret_shadow != NULL) kshad[j][d] = k;
4526 			}
4527 		      }
4528 		  }
4529 		else alpha[v][j][d] = IMPOSSIBLE;
4530 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
4531 	      }
4532 	  }
4533 	}
4534       else if (cm->sttype[v] == MP_st)
4535 	{
4536 	  for (jp = 0; jp <= W; jp++) {
4537 	    j = i0-1+jp;
4538 	    alpha[v][j][0] = IMPOSSIBLE;
4539 	    if (jp > 0) alpha[v][j][1] = IMPOSSIBLE;
4540 	    /* dmin[v] must be >= 2 */
4541 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
4542 	      {
4543 		y = cm->cfirst[v];
4544 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
4545 		/* treat EL as emitting only on self transition */
4546 		if (ret_shadow != NULL) yshad[j][d] = USED_EL;
4547 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
4548 		  if ((sc = alpha[y+yoffset][j-1][d-2] + cm->tsc[v][yoffset]) >  alpha[v][j][d]) {
4549 		    alpha[v][j][d] = sc;
4550 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
4551 		  }
4552 
4553 		i = j-d+1;
4554 		if (dsq[i] < cm->abc->K && dsq[j] < cm->abc->K)
4555 		  alpha[v][j][d] += cm->esc[v][(int) (dsq[i]*cm->abc->K+dsq[j])];
4556 		else
4557 		  alpha[v][j][d] += DegeneratePairScore(cm->abc, cm->esc[v], dsq[i], dsq[j]);
4558 
4559 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
4560 	      }
4561 	  }
4562 	}
4563       else if (cm->sttype[v] == IL_st || cm->sttype[v] == ML_st)
4564 	{
4565 	  for (jp = 0; jp <= W; jp++) {
4566 	    j = i0-1+jp;
4567 	    alpha[v][j][0] = IMPOSSIBLE;
4568 	    /* dmin[v] must be >= 1 */
4569 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
4570 	      {
4571 		y = cm->cfirst[v];
4572 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
4573 		/* treat EL as emitting only on self transition */
4574 		if (ret_shadow != NULL) yshad[j][d] = USED_EL;
4575 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
4576 		  if ((sc = alpha[y+yoffset][j][d-1] + cm->tsc[v][yoffset]) >  alpha[v][j][d]) {
4577 		    alpha[v][j][d] = sc;
4578 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
4579 		  }
4580 
4581 		i = j-d+1;
4582 		if (dsq[i] < cm->abc->K)
4583 		  alpha[v][j][d] += cm->esc[v][dsq[i]];
4584 		else
4585 		  alpha[v][j][d] += esl_abc_FAvgScore(cm->abc, dsq[i], cm->esc[v]);
4586 
4587 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
4588 	      }
4589 	  }
4590 	}
4591       else if (cm->sttype[v] == IR_st || cm->sttype[v] == MR_st)
4592 	{
4593 	  for (jp = 0; jp <= W; jp++) {
4594 	    j = i0-1+jp;
4595 	    alpha[v][j][0] = IMPOSSIBLE;
4596 	    /* dmin[v] must be >= 1 */
4597 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
4598 	      {
4599 		y = cm->cfirst[v];
4600 		alpha[v][j][d] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
4601 		/* treat EL as emitting only on self transition */
4602 		if (ret_shadow != NULL) yshad[j][d] = USED_EL;
4603 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
4604 		  if ((sc = alpha[y+yoffset][j-1][d-1] + cm->tsc[v][yoffset]) > alpha[v][j][d]) {
4605 		    alpha[v][j][d] = sc;
4606 		    if (ret_shadow != NULL) yshad[j][d] = yoffset;
4607 		  }
4608 		if (dsq[j] < cm->abc->K)
4609 		  alpha[v][j][d] += cm->esc[v][dsq[j]];
4610 		else
4611 		  alpha[v][j][d] += esl_abc_FAvgScore(cm->abc, dsq[j], cm->esc[v]);
4612 
4613 		if (alpha[v][j][d] < IMPOSSIBLE) alpha[v][j][d] = IMPOSSIBLE;
4614 	      }
4615 	  }
4616 	}				/* finished calculating deck v. */
4617 
4618       /* Check for local begin getting us to the root.
4619        * This is "off-shadow": if/when we trace back, we'll handle this
4620        * case separately (and we'll know to do it because we'll immediately
4621        * see a USED_LOCAL_BEGIN flag in the shadow matrix, telling us
4622        * to jump right to state b; see below)
4623        */
4624       if (allow_begin && alpha[v][j0][W] + cm->beginsc[v] > bsc)
4625 	{
4626 	  b   = v;
4627 	  bsc = alpha[v][j0][W] + cm->beginsc[v];
4628 	}
4629 
4630       /* Check for whether we need to store an optimal local begin score
4631        * as the optimal overall score, and if we need to put a flag
4632        * in the shadow matrix telling insideT() to use the b we return.
4633        */
4634       if (allow_begin && v == 0 && bsc > alpha[0][j0][W]) {
4635 	alpha[0][j0][W] = bsc;
4636 	if (ret_shadow != NULL) yshad[j0][W] = USED_LOCAL_BEGIN;
4637       }
4638 
4639       /* Now, if we're trying to reuse memory in our normal mode (e.g. ! do_full):
4640        * Look at our children; if they're fully released, take their deck
4641        * into the pool for reuse.
4642        */
4643       if (! do_full) {
4644 	if (cm->sttype[v] == B_st)
4645 	  { /* we can definitely release the S children of a bifurc. */
4646 	    y = cm->cfirst[v]; deckpool_push(dpool, alpha[y]); alpha[y] = NULL;
4647 	    z = cm->cnum[v];   deckpool_push(dpool, alpha[z]); alpha[z] = NULL;
4648 	  }
4649 	else
4650 	  {
4651 	    for (y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++)
4652 	      {
4653 		touch[y]--;
4654 		if (touch[y] == 0)
4655 		  {
4656 		    if (cm->sttype[y] == E_st) {
4657 		      nends--;
4658 		      if (nends == 0) { deckpool_push(dpool, end); end = NULL;}
4659 		    } else
4660 		      deckpool_push(dpool, alpha[y]);
4661 		    alpha[y] = NULL;
4662 		  }
4663 	      }
4664 	  }
4665       }
4666   } /* end loop over all v */
4667 
4668   /* Now we free our memory.
4669    * if we've got do_full set, all decks vroot..vend are now valid (end is shared).
4670    * else, only vroot deck is valid now and all others vroot+1..vend are NULL,
4671    * and end is NULL.
4672    * We could check this status to be sure (and we used to) but now we trust.
4673    */
4674   sc       = alpha[vroot][j0][W];
4675   if (ret_b != NULL)   *ret_b   = b;    /* b is -1 if allow_begin is FALSE. */
4676   if (ret_bsc != NULL) *ret_bsc = bsc;  /* bsc is IMPOSSIBLE if allow_begin is FALSE */
4677 
4678   /* If the caller doesn't want the matrix, free it (saving the decks in the pool!)
4679    * Else, pass it back to him.
4680    */
4681   if (ret_alpha == NULL) {
4682     for (v = vroot; v <= vend; v++) /* be careful of our reuse of the end deck -- free it only once */
4683       if (alpha[v] != NULL) {
4684 	if (cm->sttype[v] != E_st) { deckpool_push(dpool, alpha[v]); alpha[v] = NULL; }
4685 	else end = alpha[v];
4686       }
4687     if (end != NULL) { deckpool_push(dpool, end); end = NULL; }
4688     free(alpha);
4689   } else *ret_alpha = alpha;
4690 
4691   /* If the caller doesn't want the deck pool, free it.
4692    * Else, pass it back to him.
4693    */
4694   if (ret_dpool == NULL) {
4695     while (deckpool_pop(dpool, &end)) free_vjd_deck(end, i0, j0);
4696     deckpool_free(dpool);
4697   } else {
4698     *ret_dpool = dpool;
4699   }
4700 
4701   free(touch);
4702   if (ret_shadow != NULL) *ret_shadow = shadow;
4703   return sc;
4704 
4705  ERROR:
4706   cm_Fail("Memory allocation error.");
4707   return 0.; /* never reached */
4708 }
4709 
4710 
4711 /* Function: outside_b()
4712  *           EPN 05.19.05
4713  * *based on outside(), only difference is bands are used :
4714  *
4715  * Date:     SRE, Tue Aug  8 10:42:52 2000 [St. Louis]
4716  * Purpose:  (See outside())
4717  *
4718  * Args:     cm        - the model    [0..M-1]
4719  *           dsq       - the sequence [1..L]
4720  *           L         - length of the dsq
4721  *           vroot     - first state of linear model segment (S; MP|ML|MR|D)
4722  *           vend      - last state of linear model segment  (B; E; MP|ML|MR|D)
4723  *           i0        - first position in subseq to align (1, for whole seq)
4724  *           j0        - last position in subseq to align (L, for whole seq)
4725  *           do_full   - if TRUE, we save all the decks in beta, instead of
4726  *                       working in our default memory-efficient mode where
4727  *                       we reuse decks and only the lowermost deck (vend) is valid
4728  *                       at the end.
4729  *           beta      - if non-NULL, this is an existing matrix, with NULL
4730  *                       decks for vroot..vend, and we'll fill in those decks
4731  *                       appropriately instead of creating a new matrix
4732  *           ret_beta  - if non-NULL, return the matrix with one or more
4733  *                       decks available for examination (see "do_full")
4734  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
4735  *                       but usually containing one or more allocated decks sized
4736  *                       for this subsequence i0..j0.
4737  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
4738  *                       *only* be valid on exactly the same i0..j0 subseq,
4739  *                       because of the size of the subseq decks.
4740  *           dmin      - minimum d bound for each state v; [0..v..M-1]
4741  *           dmax      - maximum d bound for each state v; [0..v..M-1]
4742  */
4743 static void
outside_b(CM_t * cm,ESL_DSQ * dsq,int L,int vroot,int vend,int i0,int j0,int do_full,float *** beta,float **** ret_beta,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool,int * dmin,int * dmax)4744 outside_b(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0,
4745 	  int do_full, float ***beta, float ****ret_beta,
4746 	  struct deckpool_s *dpool, struct deckpool_s **ret_dpool, int *dmin, int *dmax)
4747 {
4748   int      status;
4749   int      v,y;			/* indices for states */
4750   int      j,d,i;		/* indices in sequence dimensions */
4751   float    sc;			/* a temporary variable holding a score */
4752   int     *touch;               /* keeps track of how many lower decks still need this deck */
4753   float    escore;		/* an emission score, tmp variable */
4754   int      W;			/* subsequence length */
4755   int      jp;			/* j': relative position in the subsequence, 0..W */
4756   int      voffset;		/* index of v in t_v(y) transition scores */
4757   int      w1,w2;		/* bounds of split set */
4758   int      dv;                  /* StateDelta() for state v */
4759 
4760   /* Allocations and initializations
4761    */
4762   W = j0-i0+1;		/* the length of the subsequence: used in many loops */
4763 
4764   			/* if caller didn't give us a deck pool, make one */
4765   if (dpool == NULL) dpool = deckpool_create();
4766 
4767   /* if caller didn't give us a matrix, make one.
4768    * Allocate room for M+1 decks because we might need the EL deck (M)
4769    * if we're doing local alignment.
4770    */
4771   if (beta == NULL) {
4772     ESL_ALLOC(beta, sizeof(float **) * (cm->M+1));
4773     for (v = 0; v < cm->M+1; v++) beta[v] = NULL;
4774   }
4775 
4776   /* Initialize the root deck.
4777    * If the root is in a split set, initialize the whole split set.
4778    */
4779   w1 = cm->nodemap[cm->ndidx[vroot]]; /* first state in split set */
4780   if (cm->sttype[vroot] == B_st) {    /* special boundary case of Outside on a single B state. */
4781     w2 = w1;
4782     if (vend != vroot) cm_Fail("oh no. not again.");
4783   } else
4784     w2 = cm->cfirst[w1]-1;	      /* last state in split set w1<=vroot<=w2 */
4785 
4786   for (v = w1; v <= w2; v++) {
4787     if (! deckpool_pop(dpool, &(beta[v])))
4788       beta[v] = alloc_vjd_deck(L, i0, j0);
4789     for (jp = 0; jp <= W; jp++) {
4790       j = i0-1+jp;
4791       for (d = 0; d <= jp; d++)
4792 	beta[v][j][d] = IMPOSSIBLE;
4793     }
4794   }
4795   beta[vroot][j0][W] = 0;
4796 
4797   /* Initialize the EL deck at M, if we're doing local alignment w.r.t. ends.
4798    */
4799   if (cm->flags & CMH_LOCAL_END) {
4800     if (! deckpool_pop(dpool, &(beta[cm->M])))
4801       beta[cm->M] = alloc_vjd_deck(L, i0, j0);
4802     for (jp = 0; jp <= W; jp++) {
4803       j = i0-1+jp;
4804       for (d = 0; d <= jp; d++)
4805 	beta[cm->M][j][d] = IMPOSSIBLE;
4806     }
4807 
4808     /* We have to worry about vroot -> EL transitions.
4809      * since we start the main recursion at w2+1. This requires a
4810      * laborious partial unroll of the main recursion, grabbing
4811      * the stuff relevant to a beta[EL] calculation for just the
4812      * vroot->EL transition.
4813      */
4814     if (NOT_IMPOSSIBLE(cm->endsc[vroot])) {
4815       switch (cm->sttype[vroot]) {
4816       case MP_st:
4817 	if (W < 2) break;
4818 	if (dsq[i0] < cm->abc->K && dsq[j0] < cm->abc->K)
4819 	  escore = cm->esc[vroot][(int) (dsq[i0]*cm->abc->K+dsq[j0])];
4820 	else
4821 	  escore = DegeneratePairScore(cm->abc, cm->esc[vroot], dsq[i0], dsq[j0]);
4822 	beta[cm->M][j0-1][W-2] = cm->endsc[vroot] +
4823 	  (cm->el_selfsc * (W-2)) + escore;
4824 	if (beta[cm->M][j0-1][W-2] < IMPOSSIBLE) beta[cm->M][j0-1][W-2] = IMPOSSIBLE;
4825 	break;
4826       case ML_st:
4827       case IL_st:
4828 	if (W < 1) break;
4829 	if (dsq[i0] < cm->abc->K)
4830 	  escore = cm->esc[vroot][(int) dsq[i0]];
4831 	else
4832 	  escore = esl_abc_FAvgScore(cm->abc, dsq[i0], cm->esc[vroot]);
4833 	beta[cm->M][j0][W-1] = cm->endsc[vroot] +
4834 	  (cm->el_selfsc * (W-1)) + escore;
4835 	if (beta[cm->M][j0][W-1] < IMPOSSIBLE) beta[cm->M][j0][W-1] = IMPOSSIBLE;
4836 	break;
4837       case MR_st:
4838       case IR_st:
4839 	if (W < 1) break;
4840 	if (dsq[j0] < cm->abc->K)
4841 	  escore = cm->esc[vroot][(int) dsq[j0]];
4842 	else
4843 	  escore = esl_abc_FAvgScore(cm->abc, dsq[j0], cm->esc[vroot]);
4844 	beta[cm->M][j0-1][W-1] = cm->endsc[vroot] +
4845 	  (cm->el_selfsc * (W-1)) + escore;
4846 	if (beta[cm->M][j0-1][W-1] < IMPOSSIBLE) beta[cm->M][j0-1][W-1] = IMPOSSIBLE;
4847 	break;
4848       case S_st:
4849       case D_st:
4850 	beta[cm->M][j0][W] = cm->endsc[vroot] +
4851 	  (cm->el_selfsc * W);
4852 	if (beta[cm->M][j0][W] < IMPOSSIBLE) beta[cm->M][j0][W] = IMPOSSIBLE;
4853 	break;
4854       case B_st:		/* can't start w/ bifurcation at vroot. */
4855       default: cm_Fail("bogus parent state %d\n", cm->sttype[vroot]);
4856       }
4857     }
4858   }
4859 
4860   ESL_ALLOC(touch, sizeof(int) * cm->M);
4861   for (v = 0;      v < w1; v++) touch[v] = 0; /* note: top of split set w1, not vroot */
4862   for (v = vend+1; v < cm->M; v++) touch[v] = 0;
4863   for (v = w1; v <= vend; v++) {
4864     if (cm->sttype[v] == B_st) touch[v] = 2; /* well, we'll never use this, but set it anyway. */
4865     else                       touch[v] = cm->cnum[v];
4866   }
4867 
4868   /* Main loop down through the decks
4869    */
4870   for (v = w2+1; v <= vend; v++)
4871     {
4872       /* First we need to fetch a deck of memory to fill in;
4873        * we try to reuse a deck but if one's not available we allocate
4874        * a fresh one.
4875        */
4876       if (! deckpool_pop(dpool, &(beta[v])))
4877 	beta[v] = alloc_vjd_deck(L, i0, j0);
4878 
4879       /* Init the whole deck to IMPOSSIBLE
4880        */
4881       for (jp = W; jp >= 0; jp--) {
4882 	j = i0-1+jp;
4883 	for (d = jp; d >= 0; d--)
4884 	  beta[v][j][d] = IMPOSSIBLE;
4885       }
4886 
4887       /* If we can do a local begin into v, also init with that.
4888        * By definition, beta[0][j0][W] == 0.
4889        */
4890       if ((vroot == 0 && i0 == 1 && j0 == L && (cm->flags & CMH_LOCAL_BEGIN))
4891 	  && (dmin[v] <= W && dmax[v] >= W))
4892 	  beta[v][j0][W] = cm->beginsc[v];
4893 
4894       /* main recursion:
4895        */
4896       for (jp = W; jp >= 0; jp--) {
4897 	j = i0-1+jp;
4898 	if((dmax[v]) > jp) d = jp;
4899 	else d = (dmax[v]);
4900 	for (; d >= (dmin[v]); d--)
4901 	  {
4902 	    i = j-d+1;
4903 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
4904 	      if (y < vroot) continue; /* deal with split sets */
4905 	      voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
4906 
4907 	      switch(cm->sttype[y]) {
4908 	      case MP_st:
4909 		if (j == j0 || d == jp) continue; /* boundary condition */
4910 
4911 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
4912 		  escore = cm->esc[y][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
4913 		else
4914 		  escore = DegeneratePairScore(cm->abc, cm->esc[y], dsq[i-1], dsq[j+1]);
4915 
4916 		if ((sc = beta[y][j+1][d+2] + cm->tsc[y][voffset] + escore) > beta[v][j][d])
4917 		  beta[v][j][d] = sc;
4918 		break;
4919 
4920 	      case ML_st:
4921 	      case IL_st:
4922 		if (d == jp) continue;	/* boundary condition (note when j=0, d=0*/
4923 
4924 		if (dsq[i-1] < cm->abc->K)
4925 		  escore = cm->esc[y][(int) dsq[i-1]];
4926 		else
4927 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[y]);
4928 
4929 		if ((sc = beta[y][j][d+1] + cm->tsc[y][voffset] + escore) > beta[v][j][d])
4930 		  beta[v][j][d] = sc;
4931 		break;
4932 
4933 	      case MR_st:
4934 	      case IR_st:
4935 		if (j == j0) continue;
4936 
4937 		if (dsq[j+1] < cm->abc->K)
4938 		  escore = cm->esc[y][(int) dsq[j+1]];
4939 		else
4940 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[y]);
4941 
4942 		if ((sc = beta[y][j+1][d+1] + cm->tsc[y][voffset] + escore) > beta[v][j][d])
4943 		  beta[v][j][d] = sc;
4944 		break;
4945 
4946 	      case S_st:
4947 	      case E_st:
4948 	      case D_st:
4949 		if ((sc = beta[y][j][d] + cm->tsc[y][voffset]) > beta[v][j][d])
4950 		  beta[v][j][d] = sc;
4951 		break;
4952 
4953 	      default: cm_Fail("bogus child state %d\n", cm->sttype[y]);
4954 	      }/* end switch over states*/
4955 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
4956 	    if (beta[v][j][d] < IMPOSSIBLE) beta[v][j][d] = IMPOSSIBLE;
4957 
4958 
4959 	  } /* ends loop over d. We know all beta[v][j][d] in this row j*/
4960       }/* end loop over jp. We know the beta's for the whole deck.*/
4961 
4962       /* Deal with local alignment end transitions v->EL
4963        * (EL = deck at M.)
4964        */
4965       if (NOT_IMPOSSIBLE(cm->endsc[v])) {
4966 	for (jp = 0; jp <= W; jp++) {
4967 	  j = i0-1+jp;
4968 	  /* Careful here, we're filling in beta[cm->M][j][d] which is unbanded
4969 	   * by adding beta[v][j+{0,1}][d+dv] to endsc[v], and we know there's a
4970 	   * band on v, so we can save time here as follows:
4971 	   */
4972 	  dv = StateDelta(cm->sttype[v]);
4973 	  for (d = (dmin[v]-dv); d <= (dmax[v]-dv) && d <= jp; d++)
4974 	    {
4975 	      i = j-d+1;
4976 	      switch (cm->sttype[v]) {
4977 	      case MP_st:
4978 		if (j == j0 || d == jp) continue; /* boundary condition */
4979 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
4980 		  escore = cm->esc[v][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
4981 		else
4982 		  escore = DegeneratePairScore(cm->abc, cm->esc[v], dsq[i-1], dsq[j+1]);
4983 		if ((sc = beta[v][j+1][d+2] + cm->endsc[v] +
4984 		     (cm->el_selfsc * d) + escore) > beta[cm->M][j][d])
4985 		  beta[cm->M][j][d] = sc;
4986 		break;
4987 	      case ML_st:
4988 	      case IL_st:
4989 		if (d == jp) continue;
4990 		if (dsq[i-1] < cm->abc->K)
4991 		  escore = cm->esc[v][(int) dsq[i-1]];
4992 		else
4993 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[v]);
4994 		if ((sc = beta[v][j][d+1] + cm->endsc[v] +
4995 		     (cm->el_selfsc * d) + escore) > beta[cm->M][j][d])
4996 		  beta[cm->M][j][d] = sc;
4997 		break;
4998 	      case MR_st:
4999 	      case IR_st:
5000 		if (j == j0) continue;
5001 		if (dsq[j+1] < cm->abc->K)
5002 		  escore = cm->esc[v][(int) dsq[j+1]];
5003 		else
5004 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[v]);
5005 		if ((sc = beta[v][j+1][d+1] + cm->endsc[v] +
5006 		     (cm->el_selfsc * d) + escore) > beta[cm->M][j][d])
5007 		  beta[cm->M][j][d] = sc;
5008 		break;
5009 	      case S_st:
5010 	      case D_st:
5011 	      case E_st:
5012 		if ((sc = beta[v][j][d] + cm->endsc[v] +
5013 		     (cm->el_selfsc * d)) > beta[cm->M][j][d])
5014 		  beta[cm->M][j][d] = sc;
5015 		break;
5016 	      case B_st:
5017 	      default: cm_Fail("bogus parent state %d\n", cm->sttype[v]);
5018 		/* note that although B is a valid vend for a segment we'd do
5019                    outside on, B->EL is set to be impossible, by the local alignment
5020                    config. There's no point in having a B->EL because B is a nonemitter
5021                    (indeed, it would introduce an alignment ambiguity). The same
5022 		   alignment case is handled by the X->EL transition where X is the
5023 		   parent consensus state (S, MP, ML, or MR) above the B. Thus,
5024 		   this code is relying on the NOT_IMPOSSIBLE() test, above,
5025 		   to make sure the sttype[vend]=B case gets into this switch.
5026 		*/
5027 	      } /* end switch over parent state type v */
5028 	    } /* end inner loop over d */
5029 	} /* end outer loop over jp */
5030       } /* end conditional section for dealing w/ v->EL local end transitions */
5031 
5032 	/* Look at v's parents; if we're reusing memory (! do_full)
5033 	 * push the parents that we don't need any more into the pool.
5034 	 */
5035 	if (! do_full) {
5036 	for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
5037 	  touch[y]--;
5038 	  if (touch[y] == 0) { deckpool_push(dpool, beta[y]); beta[y] = NULL; }
5039 	}
5040       }
5041     } /* end loop over decks v. */
5042 
5043 #if 0
5044   /* SRE: this code is superfluous, yes??? */
5045   /* Deal with last step needed for local alignment
5046    * w.r.t. ends: left-emitting, zero-scoring EL->EL transitions.
5047    * (EL = deck at M.)
5048    */
5049   if (cm->flags & CMH_LOCAL_END) {
5050     for (jp = W; jp > 0; jp--) { /* careful w/ boundary here */
5051       j = i0-1+jp;
5052       /* There is no band on the EL state */
5053       for (d = jp-1; d >= 0; d--)
5054 	if ((sc = beta[cm->M][j][d+1]) > beta[cm->M][j][d])
5055 	  beta[cm->M][j][d] = sc;
5056     }
5057   }
5058 #endif
5059 
5060   /* If the caller doesn't want the matrix, free it.
5061    * (though it would be *stupid* for the caller not to want the
5062    * matrix in the current implementation...)
5063    */
5064   if (ret_beta == NULL) {
5065     for (v = w1; v <= vend; v++) /* start at w1 - top of split set - not vroot */
5066       if (beta[v] != NULL) { deckpool_push(dpool, beta[v]); beta[v] = NULL; }
5067     if (cm->flags & CMH_LOCAL_END) {
5068       deckpool_push(dpool, beta[cm->M]);
5069       beta[cm->M] = NULL;
5070     }
5071     free(beta);
5072   } else *ret_beta = beta;
5073 
5074   /* If the caller doesn't want the deck pool, free it.
5075    * Else, pass it back to him.
5076    */
5077   if (ret_dpool == NULL) {
5078     float **a;
5079     while (deckpool_pop(dpool, &a)) free_vjd_deck(a, i0, j0);
5080     deckpool_free(dpool);
5081   } else {
5082     *ret_dpool = dpool;
5083   }
5084   free(touch);
5085   return;
5086  ERROR:
5087   cm_Fail("Memory allocation error.");
5088 }
5089 
5090 
5091 /* Function: vinside_b()
5092  *           EPN 05.19.05
5093  * *based on vinside(), only difference is bands are used :
5094  *
5095  * Date:     SRE, Sat Jun  2 09:24:51 2001 [Kaldi's]
5096  *
5097  * Purpose:  Run the inside phase of the CYK alignment algorithm for
5098  *           a V problem: an unbifurcated CM subgraph from
5099  *           r..z, aligned to a one-hole subsequence
5100  *           i0..i1 // j1..j0, exclusive of z,i1,j1.
5101  *
5102  *           This is done in the vji coord system, where
5103  *           both our j and i coordinates are transformed.
5104  *           The Platonic matrix runs [j1..j0][i0..i1].
5105  *           The actual matrix runs [0..j0-j1][0..i1-i0].
5106  *           To transform a sequence coord i to a transformed
5107  *           coord i', subtract i0; to transform i' to i,
5108  *           add i0.
5109  *
5110  *           The conventions for alpha and dpool are the
5111  *           same as cyk_inside_engine().
5112  *
5113  * Args:     cm        - the model    [0..M-1]
5114  *           dsq       - the sequence [1..L]
5115  *           L         - length of the dsq
5116  *           r         - first start state of subtree (0, for whole model)
5117  *           z         - last end state of subtree (cm->M-1, for whole model)
5118  *           i0,i1     - first subseq part of the V problem
5119  *           j1,j0     - second subseq part
5120  *           useEL     - if TRUE, V problem ends at EL/i1/j1, not z/i1/j1
5121  *           do_full   - if TRUE, we save all the decks in alpha, instead of
5122  *                       working in our default memory-efficient mode where
5123  *                       we reuse decks and only the uppermost deck (r) is valid
5124  *                       at the end.
5125  *           a         - if non-NULL, this is an existing matrix, with NULL
5126  *                       decks for r..z, and we'll fill in those decks
5127  *                       appropriately instead of creating a new matrix
5128  *           ret_a     - if non-NULL, return the matrix with one or more
5129  *                       decks available for examination (see "do_full")
5130  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
5131  *                       but usually containing one or more allocated vji decks sized
5132  *                       for this subsequence i0..i1//j0..j1.
5133  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
5134  *                       *only* be valid on exactly the same i0..i1//j0..j1 subseq
5135  *                       because of the size of the subseq decks.
5136  *           ret_shadow- if non-NULL, the caller wants a shadow matrix, because
5137  *                       he intends to do a traceback.
5138  *           allow_begin- TRUE to allow 0->b local alignment begin transitions.
5139  *           ret_b     - best local begin state, or NULL if unwanted
5140  *           ret_bsc   - score for using ret_b, or NULL if unwanted
5141  *           dmin      - minimum d bound for each state v; [0..v..M-1]
5142  *           dmax      - maximum d bound for each state v; [0..v..M-1]
5143  *
5144  * Returns:  score.
5145  */
5146 static float
vinside_b(CM_t * cm,ESL_DSQ * dsq,int L,int r,int z,int i0,int i1,int j1,int j0,int useEL,int do_full,float *** a,float **** ret_a,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool,char **** ret_shadow,int allow_begin,int * ret_b,float * ret_bsc,int * dmin,int * dmax)5147 vinside_b(CM_t *cm, ESL_DSQ *dsq, int L,
5148 	int r, int z, int i0, int i1, int j1, int j0, int useEL,
5149 	int do_full, float ***a, float ****ret_a,
5150 	struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
5151 	char ****ret_shadow,
5152 	int allow_begin, int *ret_b, float *ret_bsc, int *dmin, int *dmax)
5153 {
5154   int     status;
5155   char  ***shadow;              /* the shadow matrix -- traceback ptrs -- memory is kept */
5156   int     v,i,j;
5157   int     w1,w2;		/* bounds of the split set */
5158   int     jp, ip;		/* j' and i' -- in the matrix coords */
5159   int    *touch;                /* keeps track of whether we can free a deck yet or not */
5160   int     y, yoffset;
5161   float   sc;			/* tmp variable holding a score */
5162   int      b;			/* best local begin state */
5163   float    bsc;			/* score for using the best local begin state */
5164   int     *imin;                /* minimum i bound for each state v; [0..w1-r]
5165                                  * calculated using *dmin; offset from v, the
5166 				 * band that corresponds to state v, is imin[v-r] */
5167   int     *imax;                /* maximum i bound for each state v; [0..w1-r]
5168                                  * calculated using *dmax; offset from v, the
5169 				 * band that corresponds to state v, is imax[v-r] */
5170 
5171   /*debugging block*/
5172   /*printf("***in vinside_b()****\n");
5173   printf("\tr  : %d\n", r);
5174   printf("\tz  : %d\n", z);
5175   printf("\ti0 : %d\n", i0);
5176   printf("\ti1 : %d\n", i1);
5177   printf("\tj1 : %d\n", j1);
5178   printf("\tj0 : %d\n", j0);
5179   */
5180 
5181   /* Allocations, initializations.
5182    * Remember to allocate for M+1 decks, in case we reuse this
5183    * memorry for a local alignment voutside() calculation.
5184    */
5185   b   = -1;
5186   bsc = IMPOSSIBLE;
5187   if (dpool == NULL) dpool = deckpool_create();
5188   if (a == NULL) {
5189     ESL_ALLOC(a, sizeof(float **) * (cm->M+1));
5190     for (v = 0; v <= cm->M; v++) a[v] = NULL;
5191   }
5192 				/* the whole split set w<=z<=y must be initialized */
5193   w1 = cm->nodemap[cm->ndidx[z]];
5194   w2 = cm->cfirst[w1]-1;
5195 
5196   /* Bands used ip 3 */
5197   /* Allocate imin and imax */
5198 
5199   imin = malloc(sizeof (int) * (w1-r+1));
5200   imax = malloc(sizeof (int) * (w1-r+1));
5201 
5202   for (v = w1; v <= w2; v++) {
5203     if (! deckpool_pop(dpool, &(a[v])))
5204       a[v] = alloc_vji_deck(i0, i1, j1, j0);
5205     for (jp = 0; jp <= j0-j1; jp++)
5206       for (ip = 0; ip <= i1-i0; ip++)
5207 	a[v][jp][ip] = IMPOSSIBLE;
5208   }
5209 
5210   if (ret_shadow != NULL) {
5211     ESL_ALLOC(shadow, sizeof(char **) * cm->M);
5212     for (v = 0; v < cm->M; v++) shadow[v] = NULL;
5213   }
5214 
5215   /* Initialize the one non-IMPOSSIBLE cell as a boundary
5216    * condition.
5217    * If local alignment (useEL=1), we must connect z to EL;
5218    * we would init a[EL][0][i1-i0] = 0. But, we're not explicitly
5219    * keeping an EL deck, we're swallowing it into the recursion.
5220    * So, we unroll a chunk of the main recursion;
5221    * we have to laboriously figure out from the statetype z
5222    * and our position where and what our initialization is.
5223    * Else, for global alignments, we simply connect to z,0,i1-i0.
5224    */
5225 
5226   ip = i1-i0;
5227   jp = 0;
5228 
5229   if (! useEL)
5230     a[z][jp][ip] = 0.;
5231   else
5232     {
5233       if (ret_shadow != NULL)
5234 	shadow[z] = alloc_vji_shadow_deck(i0,i1,j1,j0);
5235 
5236       switch (cm->sttype[z]) {
5237       case D_st:
5238       case S_st:
5239 	/*a[z][jp][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
5240 	a[z][jp][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
5241 	if (ret_shadow != NULL) shadow[z][jp][ip] = USED_EL;
5242 	break;
5243       case MP_st:
5244 	if (i0 == i1 || j1 == j0) break;
5245 	/*a[z][jp+1][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
5246 	a[z][jp+1][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
5247 
5248 	if (dsq[i1-1] < cm->abc->K && dsq[j1+1] < cm->abc->K)
5249 	  a[z][jp+1][ip-1] += cm->esc[z][(int) (dsq[i1-1]*cm->abc->K+dsq[j1+1])];
5250 	else
5251 	  a[z][jp+1][ip-1] += DegeneratePairScore(cm->abc, cm->esc[z], dsq[i1-1], dsq[j1+1]);
5252 	if (ret_shadow != NULL) shadow[z][jp+1][ip-1] = USED_EL;
5253 	if (a[z][jp+1][ip-1] < IMPOSSIBLE) a[z][jp+1][ip-1] = IMPOSSIBLE;
5254 	break;
5255       case ML_st:
5256       case IL_st:
5257 	if (i0==i1) break;
5258 	/*a[z][jp][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
5259 	a[z][jp][ip-1] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
5260 
5261 	if (dsq[i1-1] < cm->abc->K)
5262 	  a[z][jp][ip-1] += cm->esc[z][(int) dsq[i1-1]];
5263 	else
5264 	  a[z][jp][ip-1] += esl_abc_FAvgScore(cm->abc, dsq[i1-1], cm->esc[z]);
5265 	if (ret_shadow != NULL) shadow[z][jp][ip-1] = USED_EL;
5266 	if (a[z][jp][ip-1] < IMPOSSIBLE) a[z][jp][ip-1] = IMPOSSIBLE;
5267 	break;
5268       case MR_st:
5269       case IR_st:
5270 	if (j1==j0) break;
5271 	/*a[z][jp+1][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1 - StateDelta(cm->sttype[z])));*/
5272 	a[z][jp+1][ip] = cm->endsc[z] + (cm->el_selfsc * ((jp+j1)-(ip+i0)+1));
5273 
5274 	if (dsq[j1+1] < cm->abc->K)
5275 	  a[z][jp+1][ip] += cm->esc[z][(int) dsq[j1+1]];
5276 	else
5277 	  a[z][jp+1][ip] += esl_abc_FAvgScore(cm->abc, dsq[j1+1], cm->esc[z]);
5278 	if (ret_shadow != NULL) shadow[z][jp+1][ip] = USED_EL;
5279 	if (a[z][jp+1][ip] < IMPOSSIBLE) a[z][jp+1][ip] = IMPOSSIBLE;
5280 	break;
5281       }
5282     } /* done initializing the appropriate cell for useEL=TRUE */
5283 
5284   ESL_ALLOC(touch, sizeof(int) * cm->M);
5285   for (v = 0;   v < r;  v++) touch[v] = 0;
5286   for (v = r;   v <= w2; v++) touch[v] = cm->pnum[v]; /* note w2 not z: to bottom of split set */
5287   for (v = w2+1; v < cm->M; v++) touch[v] = 0;
5288 
5289   /* A special case. If vinside() is called on empty sequences,
5290    * we might do a begin transition right into z.
5291    */
5292 
5293   /* EPN 05.19.05
5294      We are setting alpha cells in the following block, we should make
5295      sure they're within the bands */
5296 
5297   if (allow_begin && j0-j1 == 0 && i1-i0 == 0)
5298     {
5299       b   = z;
5300       bsc = a[z][0][0] + cm->beginsc[z];
5301       if (z == 0) {
5302 	a[0][0][0] = bsc;
5303 	if (ret_shadow != NULL) shadow[0][0][0] = USED_LOCAL_BEGIN;
5304       }
5305     }
5306 
5307   /* Main recursion
5308    */
5309   for (v = w1-1; v >= r; v--)
5310     {
5311       /* Get a deck and a shadow deck.
5312        */
5313       if (! deckpool_pop(dpool, &(a[v])))
5314 	a[v] = alloc_vji_deck(i0,i1,j1,j0);
5315       if (ret_shadow != NULL)
5316 	shadow[v] = alloc_vji_shadow_deck(i0,i1,j1,j0);
5317 
5318       /* Bands used ip 8 */
5319       /* First fill imin[v] and imax[v] */
5320 
5321       /* debugging block */
5322       /*
5323       if((dmin[v] > (j0-i0+1)) || (dmax[v] < (j1-i1+1)))
5324 	{
5325 	  printf("ERROR vinside_b() whole deck is outside bands\n");
5326 	  printf("v : %d\n", v);
5327 	  printf("dmin[v] : %d\n", dmin[v]);
5328 	  printf("dmax[v] : %d\n", dmax[v]);
5329 	  printf("i0 : %d\n", i0);
5330 	  printf("i1 : %d\n", i1);
5331 	  printf("j1 : %d\n", j1);
5332 	  printf("j0 : %d\n", j0);
5333 	}
5334       */
5335 
5336       imin[v-r] = j1-i0-dmax[v]+1;
5337       imax[v-r] = j1-i0-dmin[v]+1;
5338 
5339       /* Bands used ip 8 continued */
5340       /* Impose bands by setting all cells outside the bands to IMPOSSIBLE
5341        * This is independent of state type so we do it outside
5342        * the following set of if then statements.
5343        * Alternatively, it could be done within each of the following
5344        * if(cm->sttype[v] == *) statements - matter of style I suppose.
5345        */
5346 
5347       for (jp = 0; jp <= j0-j1; jp++) {
5348 	for (ip = 0; ip < (imin[v-r]+jp) && ip<=(i1-i0); ip++)
5349 	  {
5350 	    a[v][jp][ip] = IMPOSSIBLE;
5351 	  }
5352 	if((imax[v-r]+jp) > (i1-i0)) ip = (i1-i0+1);
5353 	else ip = imax[v-r]+jp+1;
5354 	if(ip < 0) ip = 0;
5355 	for (; ip <= (i1-i0); ip++)
5356 	  {
5357 	    a[v][jp][ip] = IMPOSSIBLE;
5358 	  }
5359       }
5360       /* reassert our definition of a V problem */
5361       if (cm->sttype[v] == E_st || cm->sttype[v] == B_st || (cm->sttype[v] == S_st && v > r))
5362 	cm_Fail("you told me you wouldn't ever do that again.");
5363 
5364       if (cm->sttype[v] == D_st || cm->sttype[v] == S_st)
5365 	{
5366 	  for (jp = 0; jp <= j0-j1; jp++)
5367 	    {
5368 	      /* Bands used ip 9 */
5369 	      /* old line :  for (ip = i1-i0; ip >= 0; ip--) { */
5370 	      /* Use the imin[v-r] and imax[v-r] we have already set (see Bands used ip 3B) */
5371 	      /* Remember 'state' indices in imin and imax are offset from v because imin and
5372 		 imax run [0..z-r], ==> dmin[v] corresponds to imin[v-r] */
5373 	      if((imax[v-r]+jp) > (i1-i0)) ip = (i1-i0);
5374 	      else ip = imax[v-r] + jp;
5375 	      for(; ip >= imin[v-r]+jp && ip >= 0; ip--) {
5376 		y = cm->cfirst[v];
5377 		a[v][jp][ip]      = a[y][jp][ip] + cm->tsc[v][0];
5378 		if (ret_shadow != NULL) shadow[v][jp][ip] = (char) 0;
5379 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
5380 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
5381 		   > a[v][jp][ip])) {
5382 		a[v][jp][ip]      = cm->endsc[v] +
5383 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
5384 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
5385 	      }
5386 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
5387 		  if ((sc = a[y+yoffset][jp][ip] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
5388 		    {
5389 		      a[v][jp][ip] = sc;
5390 		      if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
5391 		    }
5392 		if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
5393 	      }
5394 	    }
5395 	} else if (cm->sttype[v] == MP_st) {
5396 
5397 	  /* EPN following line redundant? are these cells already IMPOSSIBLE
5398 	     due to band imposition */
5399 
5400 	  for (ip = i1-i0; ip >= 0; ip--) a[v][0][ip] = IMPOSSIBLE; /* boundary condition */
5401 
5402 	  for (jp = 1; jp <= j0-j1; jp++) {
5403 	    j = jp+j1;
5404 	    a[v][jp][i1-i0] = IMPOSSIBLE; /* boundary condition */
5405 	    /* Bands used ip 10 */
5406 	    /* old line :  for (ip = i1-i0-1; ip >= 0; ip--) { */
5407 	    /* Use the imin[v-w1] and imax[v-w1] we have already set (see Bands used ip 3B) */
5408 	    /* Remember 'state' indices in imin and imax are offset from v because imin and
5409 	       imax run [0..z-r], ==> dmin[v] corresponds to imin[v-r] */
5410 	    if((imax[v-r]+jp) > (i1-i0-1)) ip = (i1-i0-1);
5411 	    else ip = imax[v-r] + jp;
5412 	    for(; ip >= imin[v-r]+jp && ip >= 0; ip--) {
5413 	      i = ip+i0;
5414 	      y = cm->cfirst[v];
5415 	      a[v][jp][ip] = a[y][jp-1][ip+1] + cm->tsc[v][0];
5416 	      if (ret_shadow != NULL) shadow[v][jp][ip] = (char) 0;
5417 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
5418 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
5419 		  > a[v][jp][ip])) {
5420 		a[v][jp][ip]      = cm->endsc[v] +
5421 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
5422 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
5423 	      }
5424 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
5425 		if ((sc = a[y+yoffset][jp-1][ip+1] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
5426 		  {
5427 		    a[v][jp][ip] = sc;
5428 		    if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
5429 		  }
5430 	      if (dsq[i] < cm->abc->K && dsq[j] < cm->abc->K)
5431 		a[v][jp][ip] += cm->esc[v][(int) (dsq[i]*cm->abc->K+dsq[j])];
5432 	      else
5433 		a[v][jp][ip] += DegeneratePairScore(cm->abc, cm->esc[v], dsq[i], dsq[j]);
5434 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
5435 	    }
5436 	  }
5437 	} else if (cm->sttype[v] == ML_st || cm->sttype[v] == IL_st) {
5438 
5439 	  for (jp = 0; jp <= j0-j1; jp++) {
5440 	    a[v][jp][i1-i0] = IMPOSSIBLE; /* boundary condition */
5441 	    /* Bands used ip 11 */
5442 	    /* old line :  for (ip = i1-i0-1; ip >= 0; ip--) { */
5443 	    /* Use the imin[v-w1] and imax[v-w1] we have already set (see Bands used ip 3B) */
5444 	    /* Remember 'state' indices in imin and imax are offset from v because imin and
5445 	       imax run [0..z-r], ==> dmin[v] corresponds to imin[v-r] */
5446 	    if((imax[v-r]+jp) > (i1-i0-1)) ip = (i1-i0-1);
5447 	    else ip = imax[v-r] + jp;
5448 	    for(; ip >= imin[v-r]+jp && ip >= 0; ip--) {
5449 	      i = ip+i0;
5450 	      y = cm->cfirst[v];
5451 	      a[v][jp][ip] = a[y][jp][ip+1] + cm->tsc[v][0];
5452 	      if (ret_shadow != NULL) shadow[v][jp][ip] = 0;
5453 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
5454 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
5455 		  > a[v][jp][ip])) {
5456 		a[v][jp][ip]      = cm->endsc[v] +
5457 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
5458 		/*printf("set a[%d][%d][%d] to %f\n", v, jp, ip, sc);*/
5459 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
5460 	      }
5461 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
5462 		if ((sc = a[y+yoffset][jp][ip+1] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
5463 		  {
5464 		    a[v][jp][ip] = sc;
5465 		    if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
5466 		  }
5467 
5468 	      if (dsq[i] < cm->abc->K)
5469 		a[v][jp][ip] += cm->esc[v][dsq[i]];
5470 	      else
5471 		a[v][jp][ip] += esl_abc_FAvgScore(cm->abc, dsq[i], cm->esc[v]);
5472 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
5473 	    }
5474 	  }
5475 	} else if (cm->sttype[v] == MR_st || cm->sttype[v] == IR_st) {
5476 	  /* EPN following line redundant? are these cells already IMPOSSIBLE
5477 	     due to band imposition */
5478 	  for (ip = i1-i0; ip >= 0; ip--) a[v][0][ip] = IMPOSSIBLE; /* boundary condition */
5479 
5480 	  for (jp = 1; jp <= j0-j1; jp++) {
5481 	    j = jp+j1;
5482 	    /* Bands used ip 12 */
5483 	    /* old line :  for (ip = i1-i0; ip >= 0; ip--) { */
5484 	    /* Use the imin[v-w1] and imax[v] we have already set (see Bands used ip 3B) */
5485 	    /* Remember 'state' indices in imin and imax are offset from v because imin and
5486 	       imax run [0..z-r], ==> dmin[v] corresponds to imin[v-r] */
5487 	    /*05.20 for (ip = imax[v-r]; ip >= imin[v-r]; ip--) {		*/
5488 	    if((imax[v-r]+jp) > (i1-i0)) ip = (i1-i0);
5489 	    else ip = imax[v-r] + jp;
5490 	    for(; ip >= imin[v-r]+jp && ip >= 0; ip--) {
5491 	      y = cm->cfirst[v];
5492 	      a[v][jp][ip]      = a[y][jp-1][ip] + cm->tsc[v][0];
5493 	      if (ret_shadow != NULL) shadow[v][jp][ip] = 0;
5494 	      if (useEL && NOT_IMPOSSIBLE(cm->endsc[v]) &&
5495 		  ((cm->endsc[v] + (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v]))))
5496 		  > a[v][jp][ip])) {
5497 		a[v][jp][ip] = cm->endsc[v] +
5498 		  (cm->el_selfsc * (((jp+j1)-(ip+i0)+1) - StateDelta(cm->sttype[v])));
5499 		if (ret_shadow != NULL) shadow[v][jp][ip] = USED_EL;
5500 	      }
5501 	      for (yoffset = 1; yoffset < cm->cnum[v]; yoffset++)
5502 		if ((sc = a[y+yoffset][jp-1][ip] + cm->tsc[v][yoffset]) >  a[v][jp][ip])
5503 		  {
5504 		    a[v][jp][ip] = sc;
5505 		    if (ret_shadow != NULL) shadow[v][jp][ip] = (char) yoffset;
5506 		  }
5507 
5508 	      if (dsq[j] < cm->abc->K)
5509 		a[v][jp][ip] += cm->esc[v][dsq[j]];
5510 	      else
5511 		a[v][jp][ip] += esl_abc_FAvgScore(cm->abc, dsq[j], cm->esc[v]);
5512 	      if (a[v][jp][ip] < IMPOSSIBLE) a[v][jp][ip] = IMPOSSIBLE;
5513 	    }
5514 	  }
5515 	} /* finished calculating deck v */
5516 
5517       /* Check for local begin getting us to the root.
5518        */
5519       if (allow_begin && a[v][j0-j1][0] + cm->beginsc[v] > bsc)
5520 	{
5521 	  b   = v;
5522 	  bsc = a[v][j0-j1][0] + cm->beginsc[v];
5523 	}
5524 
5525       /* Check whether we need to store the local begin score
5526        * for a possible traceback.
5527        */
5528       if (allow_begin && v == 0 && bsc > a[0][j0-j1][0])
5529 	{
5530 	  a[0][j0-j1][0] = bsc;
5531 	  if (ret_shadow != NULL) shadow[v][j0-j1][0] = USED_LOCAL_BEGIN;
5532 	}
5533 
5534 
5535       /* Now, try to reuse memory under v.
5536        */
5537       if (! do_full) {
5538 	for (y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++)
5539 	  {
5540 	    touch[y]--;
5541 	    if (touch[y] == 0) {
5542 	      deckpool_push(dpool, a[y]);
5543 	      a[y] = NULL;
5544 	    }
5545 	  }
5546       }
5547     } /* end loop over v; we now have a complete matrix */
5548 
5549   /* Keep the score.
5550    */
5551   sc = a[r][j0-j1][0];
5552   if (ret_b != NULL)   *ret_b   = b;    /* b is -1 if allow_begin is FALSE. */
5553   if (ret_bsc != NULL) *ret_bsc = bsc;  /* bsc is IMPOSSIBLE if allow_begin is FALSE */
5554 
5555 
5556   /* If the caller doesn't want the score matrix back, blow
5557    * it away (saving decks in the pool). Else, pass it back.
5558    */
5559   if (ret_a == NULL) {
5560     for (v = r; v <= w2; v++)	/* note: go all the way to the bottom of the split set */
5561       if (a[v] != NULL) {
5562 	deckpool_push(dpool, a[v]);
5563 	a[v] = NULL;
5564       }
5565     free(a);
5566   } else *ret_a = a;
5567 
5568   /* If caller doesn't want the deck pool, blow it away.
5569    * Else, pass it back.
5570    */
5571   if (ret_dpool == NULL) {
5572     float **foo;
5573     while (deckpool_pop(dpool, &foo))
5574       free_vji_deck(foo, j1,j0);
5575     deckpool_free(dpool);
5576   } else *ret_dpool = dpool;
5577 
5578   free(touch);
5579   free(imax);
5580   free(imin);
5581   if (ret_shadow != NULL) *ret_shadow = shadow;
5582   return sc;
5583 
5584  ERROR:
5585   cm_Fail("Memory allocation error.");
5586   return 0.; /* never reached */
5587 }
5588 
5589 
5590 /* Function: voutside_b()
5591  *           EPN 05.19.05
5592  * *based on voutside(), only difference is bands are used :
5593  *
5594  * Date:     SRE, Sun Jun  3 15:44:41 2001 [St. Louis]
5595  *
5596  * Purpose:  Run the outside version of a CYK alignment algorithm for
5597  *           a V problem: an unbifurcated CM subgraph from r..z, aligned
5598  *           to a one-whole subsequence i0..i1//j1..j0, exclusive of
5599  *           z, i1, j1.
5600  *
5601  *           This is done in the vji coordinate system, where both
5602  *           our j and i coordinates are transformed. The Platonic
5603  *           ideal matrix runs [j1..j0][i0..i1]. The implemented
5604  *           matrix runs [0..j0-j1][0..i1-i0].
5605  *
5606  *           Much of the behavior in calling conventions, etc., is
5607  *           analogous to inside() and vinside(); see their prefaces
5608  *           for more info. Unlike the inside engines, we never
5609  *           need to calculate a shadow matrix - outside engines are
5610  *           only used for divide and conquer steps.
5611  *
5612  * Args:     cm        - the model    [0..M-1]
5613  *           dsq       - the sequence [1..L]
5614  *           L         - length of the dsq
5615  *           r         - first state of linear model segment (S; MP, ML, MR, or D)
5616  *           z         - last state of linear model segment (B; MP, ML, MR, or D)
5617  *           i0,i1     - subsequence before the hole  (1..L)
5618  *           j1,j0     - subsequence after the hole (1..L)
5619  *           useEL     - if TRUE, worry about local alignment.
5620  *           do_full   - if TRUE, we save all the decks in beta, instead of
5621  *                       working in our default memory-efficient mode where
5622  *                       we reuse decks and only the lowermost decks (inc. z) are valid
5623  *                       at the end.
5624  *           beta      - if non-NULL, this is an existing matrix, with NULL
5625  *                       decks for r..z, and we'll fill in those decks
5626  *                       appropriately instead of creating a new matrix
5627  *           ret_beta  - if non-NULL, return the matrix with one or more
5628  *                       decks available for examination (see "do_full")
5629  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
5630  *                       but usually containing one or more allocated vji decks sized
5631  *                       for this subsequence i0..i1//j1..j0.
5632  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
5633  *                       *only* be valid on exactly the same i0..i1//j1..j0 subseq,
5634  *                       because of the size of the subseq decks.
5635  *           dmin      - minimum d bound for each state v; [0..v..M-1]
5636  *           dmax      - maximum d bound for each state v; [0..v..M-1]
5637  *
5638  */
5639 static void
voutside_b(CM_t * cm,ESL_DSQ * dsq,int L,int r,int z,int i0,int i1,int j1,int j0,int useEL,int do_full,float *** beta,float **** ret_beta,struct deckpool_s * dpool,struct deckpool_s ** ret_dpool,int * dmin,int * dmax)5640 voutside_b(CM_t *cm, ESL_DSQ *dsq, int L,
5641 	   int r, int z, int i0, int i1, int j1, int j0, int useEL,
5642 	   int do_full, float ***beta, float ****ret_beta,
5643 	   struct deckpool_s *dpool, struct deckpool_s **ret_dpool,
5644 	   int *dmin, int *dmax)
5645 {
5646   int      status;
5647   int      v,y;			/* indices for states */
5648   int      i,j;			/* indices in sequence dimensions */
5649   int      ip, jp;		/* transformed sequence indices */
5650   float    sc;			/* a temporary variable holding a score */
5651   int     *touch;               /* keeps track of how many lower decks still need this deck */
5652   float    escore;		/* an emission score, tmp variable */
5653   int      voffset;		/* index of v in t_v(y) transition scores */
5654   int     *imin;                /* minimum i bound for each state v; [0..r-z]
5655                                  * calculated using *dmin; offset from v, the
5656 				 * band that corresponds to state v, is imin[v-r] */
5657   int     *imax;                /* maximum i bound for each state v; [0..r-z]
5658                                  * calculated using *dmax; offset from v, the
5659 				 * band that corresponds to state v, is imax[v-r] */
5660   int      dv;                  /* state delta */
5661 
5662   /* Allocations and initializations
5663    */
5664   			/* if caller didn't give us a deck pool, make one */
5665   if (dpool == NULL) dpool = deckpool_create();
5666 
5667   /* If caller didn't give us a matrix, make one.
5668    * Remember to allow for deck M, the EL deck, for local alignments.
5669    */
5670   if (beta == NULL) {
5671     ESL_ALLOC(beta, sizeof(float **) * (cm->M+1));
5672     for (v = 0; v <= cm->M; v++) beta[v] = NULL;
5673   }
5674   /* Initialize the root deck. This probably isn't the most efficient way to do it.
5675    */
5676   if (! deckpool_pop(dpool, &(beta[r])))
5677     beta[r] = alloc_vji_deck(i0,i1,j1,j0);
5678   for (jp = 0; jp <= j0-j1; jp++) {
5679     for (ip = 0; ip <= i1-i0; ip++)
5680       beta[r][jp][ip] = IMPOSSIBLE;
5681   }
5682   /* Bands used ip 15 */
5683   /* We want to make sure that imin[0] <= 0; but we don't have imin[0] */
5684   /* First calculate imin[0], then assert its less than 0, not sure
5685      if this is necessary, imin[0] == 0 may be guaranteed, I'll use
5686      the assert here to be safe*/
5687   /* Note imin[0] corresponds to state r */
5688 
5689   imin = malloc(sizeof (int) * (z-r+1));
5690   imax = malloc(sizeof (int) * (z-r+1));
5691 
5692   /* debugging block */
5693   /*
5694   if((dmin[r] > (j0-i0)) || (dmax[r] < (j1-i1)))
5695     {
5696       printf("ERROR voutside_b()\n");
5697       printf("v : %d\n", r);
5698       printf("dmin[v] : %d\n", dmin[r]);
5699       printf("dmax[v] : %d\n", dmax[r]);
5700       printf("i0 : %d\n", i0);
5701       printf("i1 : %d\n", i1);
5702       printf("j1 : %d\n", j1);
5703       printf("j0 : %d\n", j0);
5704     }
5705   */
5706 
5707   assert(dmin[r] <= (j0-i0)+1);
5708   assert(dmax[r] >= (j1-i1)+1);
5709 
5710   imin[0] = j1-i0-dmax[r]+1;
5711   imax[0] = j1-i0-dmin[r]+1;
5712 
5713   assert(imin[0] <= 0);
5714 
5715   beta[r][j0-j1][0] = 0;
5716 
5717   /* Initialize the EL deck, if we're in local mode w.r.t. ends.
5718    * Deal with the special initialization case of the root state r
5719    * immediately transitioning to EL, if we're supposed to use EL.
5720    */
5721 
5722   if (useEL && cm->flags & CMH_LOCAL_END) {
5723     if (! deckpool_pop(dpool, &(beta[cm->M])))
5724       beta[cm->M] = alloc_vji_deck(i0,i1,j1,j0);
5725     for (jp = 0; jp <= j0-j1; jp++) {
5726       for (ip = 0; ip <= i1-i0; ip++)
5727 	beta[cm->M][jp][ip] = IMPOSSIBLE;
5728     }
5729   }
5730   if (useEL && NOT_IMPOSSIBLE(cm->endsc[r])) {
5731     switch(cm->sttype[r]) {
5732     case MP_st:
5733       if (i0 == i1 || j1 == j0) break;
5734       if (dsq[i0] < cm->abc->K && dsq[j0] < cm->abc->K)
5735 	escore = cm->esc[r][(int) (dsq[i0]*cm->abc->K+dsq[j0])];
5736       else
5737 	escore = DegeneratePairScore(cm->abc, cm->esc[r], dsq[i0], dsq[j0]);
5738       beta[cm->M][j0-j1-1][1] = cm->endsc[r] +
5739 	(cm->el_selfsc * ((j0-1)-(i0+1)+1)) + escore;
5740       break;
5741     case ML_st:
5742     case IL_st:
5743       if (i0 == i1) break;
5744       if (dsq[i0] < cm->abc->K)
5745 	escore = cm->esc[r][(int) dsq[i0]];
5746       else
5747 	escore = esl_abc_FAvgScore(cm->abc, dsq[i0], cm->esc[r]);
5748       beta[cm->M][j0-j1][1] = cm->endsc[r] +
5749 	(cm->el_selfsc * ((j0)-(i0+1)+1)) + escore;
5750       break;
5751     case MR_st:
5752     case IR_st:
5753       if (j0==j1) break;
5754       if (dsq[j0] < cm->abc->K)
5755 	escore = cm->esc[r][(int) dsq[j0]];
5756       else
5757 	escore = esl_abc_FAvgScore(cm->abc, dsq[j0], cm->esc[r]);
5758       beta[cm->M][j0-j1-1][0] = cm->endsc[r] +
5759 	(cm->el_selfsc * ((j0-1)-(i0)+1)) + escore;
5760       break;
5761     case S_st:
5762     case D_st:
5763       beta[cm->M][j0-j1][0] = cm->endsc[r] +
5764 	(cm->el_selfsc * ((j0)-(i0)+1));
5765       break;
5766     default:  cm_Fail("bogus parent state %d\n", cm->sttype[r]);
5767     }
5768   }
5769 
5770   /* Initialize the "touch" array, used for figuring out
5771    * when a deck is no longer touched, so it can be free'd.
5772    */
5773   ESL_ALLOC(touch, sizeof(int) * cm->M);
5774   for (v = 0;   v < r;     v++) touch[v] = 0;
5775   for (v = z+1; v < cm->M; v++) touch[v] = 0;
5776   for (v = r;   v <= z;    v++) {
5777     if (cm->sttype[v] == B_st) touch[v] = 2; /* well, we never use this, but be complete */
5778     else                       touch[v] = cm->cnum[v];
5779   }
5780 
5781 
5782   /* Main loop down through the decks
5783    */
5784   for (v = r+1; v <= z; v++)
5785     {
5786       /* Bands used ip 16 */
5787       /* Fill imin[v-r+1] and imax[v-r+1] as we go, one of many ways to handle imin and imax */
5788       /* Remember 'state' indices in imin and imax are offset from v because imin and
5789 	 imax run [0..z-r+1], ==> dmin[v] corresponds to imin[v-r] */
5790 
5791       imin[v-r] = j1-i0-dmax[v]+1;
5792       imax[v-r] = j1-i0-dmin[v]+1;
5793 
5794       /* An awkward situation here.  If dmin[v] > i1, imin[v-r] will be 0
5795 	 however, we don't want to query ANY cells (in other words
5796 	 none of the following for(ip*) loops should ever be entered)
5797 	 because in this case the whole vji deck is outside the bands, so
5798 	 the bestsc we want is IMPOSSIBLE (which was set before the
5799 	 for (v = w; v <= y; v++) loop).  There is probably a better
5800          way to do this but I'll explicitly check for this situation.
5801          Note - it's okay if dmax < i0 (which also means the entire
5802          deck is outside the bands) because this will make the
5803          for(ip*) loops always evaluate to false because imin[v-r] will
5804          be 0 and imax[v-r] will be < 0.*/
5805       /* This situation is recapitulated in v_splitter_b() */
5806 
5807       /* unnecssary 05.22
5808 	 05.20 code : if(dmin[v] > i1) imin[v-r] = imax[v-r]+1;  */
5809 	 /* now the for(ip) loops
5810 						    will never be entered
5811 						    (see above comments) */
5812 
5813       /* First we need to fetch a deck of memory to fill in;
5814        * we try to reuse a deck but if one's not available we allocate
5815        * a fresh one.
5816        */
5817       if (! deckpool_pop(dpool, &(beta[v])))
5818 	beta[v] = alloc_vji_deck(i0,i1,j1,j0);
5819 
5820       /* Init the whole deck to IMPOSSIBLE.
5821        */
5822       for (jp = j0-j1; jp >= 0; jp--)
5823 	for (ip = 0; ip <= i1-i0; ip++)
5824 	  beta[v][jp][ip] = IMPOSSIBLE;
5825 
5826       /* We've set the whole matrix to impossible, everything outside bands must be impossible */
5827 
5828       /* If we can get into deck v by a local begin transition, do an init
5829        * with that.
5830        */
5831       if (r == 0 && i0 == 1 && j0 == L && (cm->flags & CMH_LOCAL_BEGIN))
5832 	{
5833 	  if (cm->beginsc[v] > beta[v][j0-j1][0])
5834 	    beta[v][j0-j1][0] = cm->beginsc[v];
5835 	}
5836 
5837       /* main recursion:
5838        */
5839       for (jp = j0-j1; jp >= 0; jp--) {
5840 	j = jp+j1;
5841 	/* Bands used ip 17 */
5842 	/* old line :	for (ip = 0; ip <= i1-i0; ip++) */
5843 	/* Remember 'state' indices in imin and imax are offset from v because imin and
5844 	   imax run [0..z-r+1], ==> dmin[v] corresponds to imin[v-r] */
5845 	/* 05.20 for (ip = imin[v-r]; ip <= imax[v-r]; ip++) */
5846 
5847 	if((imin[v-r]+jp) < 0) ip = 0;
5848         else ip = imin[v-r]+jp;
5849         for(; ip <= imax[v-r] + jp && ip <= (i1-i0); ip++)
5850 	  {
5851 	    i = ip+i0;
5852 
5853 	    for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
5854 	      if (y < r) continue; /* deal with split sets */
5855 	      voffset = v - cm->cfirst[y]; /* gotta calculate the transition score index for t_y(v) */
5856 
5857 	      switch(cm->sttype[y]) {
5858 	      case MP_st:
5859 		if (j == j0 || i == i0) continue; /* boundary condition */
5860 
5861 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
5862 		  escore = cm->esc[y][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
5863 		else
5864 		  escore = DegeneratePairScore(cm->abc, cm->esc[y], dsq[i-1], dsq[j+1]);
5865 
5866 		if ((sc = beta[y][jp+1][ip-1]+cm->tsc[y][voffset]+escore) > beta[v][jp][ip])
5867 		  beta[v][jp][ip] = sc;
5868 		break;
5869 
5870 	      case ML_st:
5871 	      case IL_st:
5872 		if (i == i0) continue;	/* boundary condition */
5873 
5874 		if (dsq[i-1] < cm->abc->K)
5875 		  escore = cm->esc[y][(int) dsq[i-1]];
5876 		else
5877 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[y]);
5878 
5879 		if ((sc = beta[y][jp][ip-1]+cm->tsc[y][voffset]+escore) > beta[v][jp][ip])
5880 		  beta[v][jp][ip] = sc;
5881 		break;
5882 
5883 	      case MR_st:
5884 	      case IR_st:
5885 		if (j == j0) continue;
5886 
5887 		if (dsq[j+1] < cm->abc->K)
5888 		  escore = cm->esc[y][(int) dsq[j+1]];
5889 		else
5890 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[y]);
5891 
5892 		if ((sc = beta[y][jp+1][ip]+cm->tsc[y][voffset]+escore) > beta[v][jp][ip])
5893 		  beta[v][jp][ip] = sc;
5894 		break;
5895 
5896 	      case S_st:
5897 	      case E_st:
5898 	      case D_st:
5899 		if ((sc = beta[y][jp][ip] + cm->tsc[y][voffset]) > beta[v][jp][ip])
5900 		  beta[v][jp][ip] = sc;
5901 		break;
5902 
5903 	      default: cm_Fail("bogus parent state %d\n", cm->sttype[y]);
5904 	      }/* end switch over states*/
5905 	    } /* ends for loop over parent states. we now know beta[v][j][d] for this d */
5906 	    if (beta[v][jp][ip] < IMPOSSIBLE) beta[v][jp][ip] = IMPOSSIBLE;
5907 
5908 	  } /* ends loop over ip. We know all beta[v][jp][ip] in this row jp */
5909 
5910       }/* end loop over jp. We know the beta's for the whole deck.*/
5911 
5912       /* Deal with local alignment
5913        * transitions v->EL, if we're doing local alignment and there's a
5914        * possible transition.
5915        */
5916       if (useEL && NOT_IMPOSSIBLE(cm->endsc[v])) {
5917 	for (jp = j0-j1; jp >= 0; jp--) {
5918 	  j = jp+j1;
5919 	  /* Careful here, we're filling in beta[cm->M][jp][ip] which is unbanded
5920 	   * by adding beta[v][jp+{0,1}][ip-{0,1}] to endsc[v], and we know there's a
5921 	   * i band on v (imin[v-r]..imax[v-r], so we can save time here as follows:
5922 	   */
5923 	  dv = StateDelta(cm->sttype[v]);
5924 	  if((imin[v-r]+jp+dv) < 0) ip = 0;
5925 	  else ip = imin[v-r]+jp+dv;
5926 	  for(; (ip<=imax[v-r]+jp+dv) && ip <= (i1-i0); ip++)
5927 	    {
5928 	      i = ip+i0;
5929 	      switch (cm->sttype[v]) {
5930 	      case MP_st:
5931 		if (j == j0 || i == i0) continue; /* boundary condition */
5932 		if (dsq[i-1] < cm->abc->K && dsq[j+1] < cm->abc->K)
5933 		  escore = cm->esc[v][(int) (dsq[i-1]*cm->abc->K+dsq[j+1])];
5934 		else
5935 		  escore = DegeneratePairScore(cm->abc, cm->esc[v], dsq[i-1], dsq[j+1]);
5936 		if ((sc = beta[v][jp+1][ip-1] + cm->endsc[v] +
5937 		     (cm->el_selfsc * (j-i+1))
5938 		     + escore) > beta[cm->M][jp][ip])
5939 		  beta[cm->M][jp][ip] = sc;
5940 		break;
5941 	      case ML_st:
5942 	      case IL_st:
5943 		if (i == i0) continue;
5944 		if (dsq[i-1] < cm->abc->K)
5945 		  escore = cm->esc[v][(int) dsq[i-1]];
5946 		else
5947 		  escore = esl_abc_FAvgScore(cm->abc, dsq[i-1], cm->esc[v]);
5948 		if ((sc = beta[v][jp][ip-1] + cm->endsc[v] +
5949 		     (cm->el_selfsc * (j-i+1))
5950 		     + escore) > beta[cm->M][jp][ip])
5951 		  beta[cm->M][jp][ip] = sc;
5952 		break;
5953 	      case MR_st:
5954 	      case IR_st:
5955 		if (j == j0) continue;
5956 		if (dsq[j+1] < cm->abc->K)
5957 		  escore = cm->esc[v][(int) dsq[j+1]];
5958 		else
5959 		  escore = esl_abc_FAvgScore(cm->abc, dsq[j+1], cm->esc[v]);
5960 		if ((sc = beta[v][jp+1][ip] + cm->endsc[v] +
5961 		     (cm->el_selfsc * (j-i+1))
5962 		     + escore) > beta[cm->M][jp][ip])
5963 		  beta[cm->M][jp][ip] = sc;
5964 		break;
5965 	      case S_st:
5966 	      case D_st:
5967 	      case E_st:
5968 		if ((sc = beta[v][jp][ip] + cm->endsc[v] +
5969 		     (cm->el_selfsc * (j-i+1)))
5970 		     > beta[cm->M][jp][ip])
5971 		  beta[cm->M][jp][ip] = sc;
5972 		break;
5973 	      default:  cm_Fail("bogus parent state %d\n", cm->sttype[y]);
5974 	      } /* end switch over parent v state type */
5975 	    } /* end loop over ip */
5976 	} /* end loop over jp */
5977       }
5978 
5979       /* Finished deck v.
5980        * now look at its parents; if we're reusing memory (! do_full)
5981        * push the parents that we don't need any more into the pool.
5982        */
5983       if (! do_full) {
5984 	for (y = cm->plast[v]; y > cm->plast[v]-cm->pnum[v]; y--) {
5985 	  touch[y]--;
5986 	  if (touch[y] == 0) {
5987 	    deckpool_push(dpool, beta[y]);
5988 	    beta[y] = NULL;
5989 	  }
5990 	}
5991       }
5992 
5993     } /* end loop over decks v. */
5994 
5995 #if 0
5996   /* superfluous code, I think...*/
5997   /* Deal with the last step needed for local alignment
5998    * w.r.t. ends: left-emitting, zero-scoring EL->EL transitions.
5999    */
6000   if (useEL && cm->flags & CMH_LOCAL_END) {
6001     for (jp = j0-j1; jp >= 0; jp--)
6002       {
6003 	/* Bands used ip 19 */
6004 	/* Actually the bands are not used here, because there are no bands for
6005 	   state cm->M.  I'll just leave the unbanded code alone here.  Not sure
6006 	   how to think about bands in terms of local alignment??? */
6007 	for (ip = 1; ip <= i1-i0; ip++) /* careful w/boundary here */
6008 	  if ((sc = beta[cm->M][jp][ip-1]) > beta[cm->M][jp][ip])
6009 	    beta[cm->M][jp][ip] = sc;
6010       }
6011   }
6012 #endif
6013 
6014   /* If the caller doesn't want the matrix, free it.
6015    * (though it would be *stupid* for the caller not to want the
6016    * matrix in the current implementation!)
6017    */
6018   if (ret_beta == NULL) {
6019     for (v = r; v <= z; v++)
6020       if (beta[v] != NULL) { deckpool_push(dpool, beta[v]); beta[v] = NULL; }
6021     if (cm->flags & CMH_LOCAL_END) {
6022       deckpool_push(dpool, beta[cm->M]);
6023       beta[cm->M] = NULL;
6024     }
6025     free(beta);
6026   } else *ret_beta = beta;
6027 
6028   /* If the caller doesn't want the deck pool, free it.
6029    * Else, pass it back to him.
6030    */
6031   if (ret_dpool == NULL) {
6032     float **a;
6033     while (deckpool_pop(dpool, &a))
6034       free_vji_deck(a,j1,j0);
6035     deckpool_free(dpool);
6036   } else *ret_dpool = dpool;
6037 
6038   free(touch);
6039   free(imax);
6040   free(imin);
6041   return;
6042  ERROR:
6043   cm_Fail("Memory allocation error.");
6044 }
6045 
6046 
6047 /* For the Full CYK memory efficient banded implementation we need
6048  *  banded versions of some of the memory management routines
6049  *
6050  * The D&C banded implementation is not memory efficient, in that
6051  * it requires the same amount of memory as the non-banded D&C implementation.
6052  * This means that we still allocate the same memory as we would without bands,
6053  * we just set all cells of alpha or beta that are outside of the bands to
6054  * IMPOSSIBLE.  Because of this we should be able to use the same memory management
6055  * routines as the non-banded implementation.
6056  *
6057  * Therefore we can use the D&C memory routines for banded D&C.
6058  */
6059 
6060 /*################################################################*/
6061 /* EPN *_banded_vjd_*
6062    adapted from *_vjd_* from SRE*/
6063 
6064 /* Functions: *_vjd_*
6065  * Date:     SRE, Sat Aug 12 16:27:37 2000 [Titusville]
6066  *
6067  * Purpose:  Allocation and freeing of 3D matrices and 2D decks
6068  *           in the vjd coord system. These can be called on
6069  *           subsequences i..j, not just the full sequence 1..L,
6070  *           so they need i,j... if you're doing the full sequence
6071  *           just pass 1,L.
6072  *
6073  *           Also deal with shadow matrices and shadow decks in the
6074  *           vjd coordinate system. Note that bifurcation shadow decks
6075  *           need more dynamic range than other shadow decks, hence
6076  *           a separation into "kshadow" (BIFURC) and "yshadow" (other
6077  *           states) decks, and some casting shenanigans in
6078  *           a full ***shadow matrix.
6079  *
6080  *           Values in yshad are offsets to the next connected state,
6081  *           or a flag for local alignment. Possible offsets range from
6082  *           0..5 (maximum of 6 connected states). The flags are
6083  *           USED_LOCAL_BEGIN (101) and USED_EL (102), defined at
6084  *           the top of this file. Only yshad[0][L][L] (e.g. root state 0,
6085  *           aligned to the whole sequence) may be set to USED_LOCAL_BEGIN.
6086  *           (Remember that the dynamic range of yshad, as a char, is
6087  *           0..127, in ANSI C; we don't know if a machine will make it
6088  *           signed or unsigned.)
6089  */
6090 float **
alloc_banded_vjd_deck(int L,int i,int j,int min,int max)6091 alloc_banded_vjd_deck(int L, int i, int j, int min, int max)
6092 {
6093   int     status;
6094   float **a;
6095   int     jp;
6096   int     bw; /* width of band, depends on jp, so we need to calculate
6097 	         this inside the jp loop*/
6098 
6099   /*printf("in alloc banded vjd deck, L : %d, i : %d, j : %d, min : %d, max : %d\n", L, i, j, min, max);*/
6100 
6101   ESL_DPRINTF3(("#DEBUG: alloc_vjd_deck : %.4f\n", size_vjd_deck(L,i,j)));
6102   ESL_ALLOC(a, sizeof(float *) * (L+1)); /* always alloc 0..L rows, some of which are NULL */
6103   for (jp = 0;   jp < i-1;    jp++) a[jp]     = NULL;
6104   for (jp = j+1; jp <= L;     jp++) a[jp]     = NULL;
6105   for (jp = 0; jp <= j-i+1; jp++)
6106     {
6107       if(jp > max)
6108 	bw = max - min + 1;
6109       else
6110 	bw = jp - (min) + 1;
6111 
6112       if(bw > 0)
6113 	{
6114 	  /*printf("\tallocated a[%d]\n", jp+i-1);*/
6115 	  ESL_ALLOC(a[jp+i-1], sizeof(float) * bw);
6116 	}
6117       else
6118 	{
6119 	  a[jp+i-1] = NULL;
6120 	  /*printf("\tdid not allocate a[%d]\n", jp+i-1);*/
6121 	}
6122     }
6123   return a;
6124 
6125  ERROR:
6126   cm_Fail("Memory allocation error.");
6127   return NULL; /* never reached */
6128 }
6129 
6130 char **
alloc_banded_vjd_yshadow_deck(int L,int i,int j,int min,int max)6131 alloc_banded_vjd_yshadow_deck(int L, int i, int j, int min, int max)
6132 {
6133   int    status;
6134   char **a;
6135   int    jp;
6136   int    bw; /* width of band, depends on jp, so we need to calculate
6137 	        this inside the jp loop*/
6138 
6139   ESL_ALLOC(a, sizeof(char *) * (L+1)); /* always alloc 0..L rows, same as alloc_deck */
6140   for (jp = 0;   jp < i-1;    jp++) a[jp] = NULL;
6141   for (jp = j+1; jp <= L;     jp++) a[jp] = NULL;
6142   for (jp = 0;   jp <= j-i+1; jp++)
6143     {
6144       if(jp > max)
6145 	bw = max - min + 1;
6146       else
6147 	bw = jp - min + 1;
6148       if(bw > 0)
6149 	{
6150 	  ESL_ALLOC(a[jp+i-1], sizeof(char) * (bw));
6151 	}
6152       else a[jp+i-1] = NULL;
6153     }
6154   return a;
6155 
6156  ERROR:
6157   cm_Fail("Memory allocation error.");
6158   return NULL; /* never reached */
6159 }
6160 int **
alloc_banded_vjd_kshadow_deck(int L,int i,int j,int min,int max)6161 alloc_banded_vjd_kshadow_deck(int L, int i, int j, int min, int max)
6162 {
6163   int   status;
6164   int **a;
6165   int   jp;
6166   int     bw; /* width of band, depends on jp, so we need to calculate
6167 	         this inside the jp loop*/
6168 
6169   ESL_ALLOC(a, sizeof(int *) * (L+1)); /* always alloc 0..L rows, same as alloc_deck */
6170   for (jp = 0;   jp <  i-1;   jp++) a[jp] = NULL;
6171   for (jp = j+1; jp <= L;     jp++) a[jp] = NULL;
6172   for (jp = 0;   jp <= j-i+1; jp++)
6173     {
6174       if(jp > max) bw = max - min + 1;
6175       else bw = jp - min + 1;
6176       if(bw > 0)
6177 	{
6178 	  ESL_ALLOC(a[jp+i-1], sizeof(int) * bw);
6179 	}
6180       else a[jp+i-1] = NULL;
6181     }
6182 
6183   return a;
6184 
6185  ERROR:
6186   cm_Fail("Memory allocation error.");
6187   return NULL; /* never reached */
6188 }
6189 
6190 /******************************************************************/
6191 /* The below functions were written during debugging, and print
6192    out either the shadow or alpha matrix.  They are kept
6193    here just in case they're needed again.  Note : the functions
6194    that print out the entire matrix are really only useful
6195    when the BE_PARANOID flag is set, meaning that decks are
6196    never freed until the end.
6197 */
6198 /*================================================================*/
6199 /* EPN 05.09.05
6200    debug_print_shadow()
6201  * Function: debug_print_shadow
6202  *
6203  * Purpose:  Print shadow matrix
6204  */
6205 
6206 void
debug_print_shadow(void *** shadow,CM_t * cm,int L)6207 debug_print_shadow(void ***shadow, CM_t *cm, int L)
6208 {
6209   int v, j, d;
6210   int yoffset;
6211 
6212   printf("\nPrinting alpha matrix :\n");
6213   printf("************************************\n");
6214   for(v = 0; v < cm->M; v++)
6215     {
6216       printf("====================================\n");
6217       for(j = 0; j <= L; j++)
6218 	{
6219 	  printf("------------------------------------\n");
6220 	  for(d = 0; d <= j; d++)
6221 	    {
6222 	      if(cm->sttype[v] == E_st)
6223 		{
6224 		  printf("END state\n");
6225 		}
6226 	      else
6227 		{
6228 		  if(cm->sttype[v] == B_st)
6229 		    {
6230 		      yoffset = ((int **) shadow[v])[j][d];
6231 		      printf("INT  shadow[%2d][%2d][%2d] : %d\n", v, j, d, yoffset);
6232 		    }
6233 		  else
6234 		    {
6235 		      yoffset = ((int **) shadow[v])[j][d];
6236 		      printf("CHAR shadow[%2d][%2d][%2d] : %d\n", v, j, d, yoffset);
6237 		    }
6238 		}
6239 	    }
6240 	}
6241     }
6242   printf("****************\n\n");
6243 }
6244 
6245 /* EPN 05.16.05
6246    debug_print_shadow_banded()
6247  * Function: debug_print_shadow_banded
6248  *
6249  * Purpose:  Print banded shadow matrix
6250  */
6251 
6252 void
debug_print_shadow_banded(void *** shadow,CM_t * cm,int L,int * dmin,int * dmax)6253 debug_print_shadow_banded(void ***shadow, CM_t *cm, int L, int *dmin, int *dmax)
6254 {
6255   int v, j, d, vdp;
6256   int yoffset;
6257 
6258   printf("\nPrinting banded shadow matrix :\n");
6259   printf("************************************\n");
6260   for(v = 0; v < cm->M; v++)
6261     {
6262       printf("====================================\n");
6263       for(j = 0; j <= L; j++)
6264 	{
6265 	  printf("------------------------------------\n");
6266 	  /* there may be a problem with using j and not jp */
6267 	  for (d = dmin[v]; d <= dmax[v] && d <= j; d++)
6268 	    {
6269 	      vdp = d - dmin[v]; /* d index for state v in alpha w/mem eff bands */
6270 	      if(cm->sttype[v] == E_st)
6271 		{
6272 		  printf("END state\n");
6273 		}
6274 	      else
6275 		{
6276 		  if(cm->sttype[v] == B_st)
6277 		    {
6278 		      yoffset = ((int **) shadow[v])[j][vdp];
6279 		      printf("INT  shadow[%2d][%2d][%2d] : %d | d is %d\n", v, j, vdp, yoffset, d);
6280 		    }
6281 		  else
6282 		    {
6283 		      yoffset = ((int **) shadow[v])[j][vdp];
6284 		      printf("CHAR shadow[%2d][%2d][%2d] : %d | d is %d\n", v, j, vdp, yoffset, d);
6285 		    }
6286 		}
6287 	    }
6288 	}
6289     }
6290   printf("****************\n\n");
6291 }
6292 
6293 /* EPN 05.16.05
6294    debug_print_shadow_banded_deck()
6295  * Function: debug_print_shadow_banded_deck
6296  *
6297  * Purpose:  Print banded shadow matrix deck
6298  */
6299 
6300 void
debug_print_shadow_banded_deck(int v,void *** shadow,CM_t * cm,int L,int * dmin,int * dmax)6301 debug_print_shadow_banded_deck(int v, void ***shadow, CM_t *cm, int L, int *dmin, int *dmax)
6302 {
6303   int j, d, vdp;
6304   int yoffset;
6305 
6306   printf("\nPrinting banded shadow matrix deck for v : %d:\n", v);
6307   printf("====================================\n");
6308   for(j = 0; j <= L; j++)
6309     {
6310       printf("------------------------------------\n");
6311       /* there may be a problem with using j and not jp*/
6312       for (d = dmin[v]; d <= dmax[v] && d <= j; d++)
6313 	{
6314 	  vdp = d - dmin[v]; /* d index for state v in alpha w/mem eff bands */
6315 
6316 	  if(cm->sttype[v] == E_st)
6317 	    {
6318 	      printf("END state\n");
6319 	    }
6320 	  else
6321 	    {
6322 	      yoffset = ((char **) shadow[v])[j][vdp];
6323 	      printf("shadow_banded[%2d][%2d][%2d] : %d| d is %d\n", v, j, vdp, yoffset, d);
6324 	    }
6325 	}
6326     }
6327 }
6328 
6329 
6330 
6331 /* EPN 05.09.05
6332    debug_print_alpha_banded()
6333  * Function: debug_print_alpha_banded
6334  *
6335  * Purpose:  Print alpha matrix
6336  */
6337 void
debug_print_alpha_banded(float *** alpha,CM_t * cm,int L,int * dmin,int * dmax)6338 debug_print_alpha_banded(float ***alpha, CM_t *cm, int L, int *dmin, int *dmax)
6339 {
6340   int v, j, d, vdp, max_v;
6341 
6342   printf("\nPrinting banded alpha matrix :\n");
6343   printf("************************************\n");
6344   max_v = cm->M-1;
6345   if(cm->flags & CMH_LOCAL_BEGIN)
6346     {
6347       max_v = cm->M;
6348     }
6349   for(v = 0; v <= max_v; v++)
6350     {
6351       printf("====================================\n");
6352       for(j = 0; j <= L; j++)
6353 	{
6354 	  printf("------------------------------------\n");
6355 	  for (d = dmin[v]; d <= dmax[v] && d <= j; d++)
6356 	    {
6357 	      vdp = d - dmin[v]; /* d index for state v in alpha w/mem eff bands */
6358 	      printf("alpha[%2d][%2d][%2d] : %6.2f | d is %d\n", v, j, vdp, alpha[v][j][vdp], d);
6359 	    }
6360 	}
6361     }
6362   printf("****************\n\n");
6363 }
6364 
6365 /* EPN 05.09.05
6366    debug_print_alpha()
6367  * Function: debug_print_alpha
6368  *
6369  * Purpose:  Print alpha matrix
6370  */
6371 
6372 void
debug_print_alpha(float *** alpha,CM_t * cm,int L)6373 debug_print_alpha(float ***alpha, CM_t *cm, int L)
6374 {
6375   int v, j, d, max_v;
6376 
6377   printf("\nPrinting alpha matrix :\n");
6378   printf("************************************\n");
6379   max_v = cm->M-1;
6380   if(cm->flags & CMH_LOCAL_BEGIN)
6381     {
6382       max_v = cm->M;
6383     }
6384   for(v = 0; v <= max_v; v++)
6385     {
6386       printf("====================================\n");
6387       for(j = 0; j <= L; j++)
6388 	{
6389 	  printf("------------------------------------\n");
6390 	  for(d = 0; d <= j; d++)
6391 	    {
6392 	      printf("alpha[%2d][%2d][%2d] : %6.2f\n", v, j, d, alpha[v][j][d]);
6393 	    }
6394 	}
6395     }
6396   printf("****************\n\n");
6397 }
6398 
6399 
6400 /* EPN Memory efficient banded functions */
6401 /* Function: inside_b_me()
6402  *
6403  * Based on inside(), only difference is bands are used :
6404  * further the bands are used in a memory-efficient way
6405  * Another big difference is that we can't employ the deck
6406  * reuse strategy because the size of each deck depends
6407  * on the band for that state, so each deck can be different.
6408  *
6409  * Comments below are from inside():
6410  *
6411  * Date:     SRE, Mon Aug  7 13:15:37 2000 [St. Louis]
6412  *
6413  * Purpose:  Run the inside phase of a CYK alignment algorithm, on a
6414  *           subsequence from i0..j0, using a subtree of a model
6415  *           anchored at a start state vroot, and ending at an end
6416  *           state vend. (It is a feature of the model layout in
6417  *           a CM structure that all subtrees are contiguous in the
6418  *           model.)
6419  *
6420  *           A note on the loop conventions. We're going to keep the
6421  *           sequence (dsq) and the matrix (alpha) in the full coordinate
6422  *           system: [0..v..M-1][0..j..L][0..d..j]. However, we're
6423  *           only calculating a part of that matrix: only vroot..vend
6424  *           in the decks, i0-1..j in the rows, and up to j0-i0+1 in
6425  *           the columns (d dimension). Where this is handled the most
6426  *           is in two variables: W, which is the length of the subsequence
6427  *           (j0-i0+1), and is oft used in place of L in the usual CYK;
6428  *           and jp (read: j'), which is the *relative* j w.r.t. the
6429  *           subsequence, ranging from 0..W, and then d ranges from
6430  *           0 to jp, and j is calculated from jp (i0-1+jp).
6431  *
6432  *           The caller is allowed to provide us with a preexisting
6433  *           matrix and/or deckpool (thru "alpha" and "dpool"), or
6434  *           have them newly created by passing NULL. If we pass in an
6435  *           alpha, we expect that alpha[vroot..vend] are all NULL
6436  *           decks already; any other decks <vroot and >vend will
6437  *           be preserved. If we pass in a dpool, the decks *must* be
6438  *           sized for the same subsequence i0,j0.
6439  *
6440  *           Note that the (alpha, ret_alpha) calling idiom allows the
6441  *           caller to provide an existing matrix or not, and to
6442  *           retrieve the calculated matrix or not, in any combination.
6443  *
6444  *           We also deal with local begins, by keeping track of the optimal
6445  *           state that we could enter and account for the whole target
6446  *           sequence: b = argmax_v  alpha_v(i0,j0) + log t_0(v),
6447  *           and bsc is the score for that.
6448  *
6449  *           If vroot==0, i0==1, and j0==L (e.g. a complete alignment),
6450  *           the optimal alignment might use a local begin transition, 0->b,
6451  *           and we'd have to be able to trace that back. For any
6452  *           problem where the caller sets allow_begin, we return a valid b
6453  *           (the optimal 0->b choice) and bsc (the score if 0->b is used).
6454  *           If a local begin is part of the optimal parse tree, the optimal
6455  *           alignment score returned by inside() will be bsc and yshad[0][L][L]
6456  *           will be USE_LOCAL_BEGIN, telling insideT() to check b and
6457  *           start with a local 0->b entry transition. When inside()
6458  *           is called on smaller subproblems (v != 0 || i0 > 1 || j0
6459  *           < L), we're using inside() as an engine in divide &
6460  *           conquer, and we don't use the overall return score nor
6461  *           shadow matrices, but we do need allow_begin, b, and bsc for
6462  *           divide&conquer to sort out where a local begin might be used.
6463  *
6464  * Args:     cm        - the model    [0..M-1]
6465  *           dsq       - the sequence [1..L]
6466  *           L         - length of the dsq
6467  *           vroot     - first start state of subtree (0, for whole model)
6468  *           vend      - last end state of subtree (cm->M-1, for whole model)
6469  *           i0        - first position in subseq to align (1, for whole seq)
6470  *           j0        - last position in subseq to align (L, for whole seq)
6471  *           do_full   - if TRUE, we save all the decks in alpha, instead of
6472  *                       working in our default memory-efficient mode where
6473  *                       we reuse decks and only the uppermost deck (vroot) is valid
6474  *                       at the end.
6475  *           alpha     - if non-NULL, this is an existing matrix, with NULL
6476  *                       decks for vroot..vend, and we'll fill in those decks
6477  *                       appropriately instead of creating a new matrix
6478  *           ret_alpha - if non-NULL, return the matrix with one or more
6479  *                       decks available for examination (see "do_full")
6480  *           dpool     - if non-NULL, this is an existing deck pool, possibly empty,
6481  *                       but usually containing one or more allocated decks sized
6482  *                       for this subsequence i0..j0.
6483  *           ret_dpool - if non-NULL, return the deck pool for reuse -- these will
6484  *                       *only* be valid on exactly the same i0..j0 subseq,
6485  *                       because of the size of the subseq decks.
6486  *           ret_shadow- if non-NULL, the caller wants a shadow matrix, because
6487  *                       he intends to do a traceback.
6488  *           allow_begin- TRUE to allow 0->b local alignment begin transitions.
6489  *           ret_b     - best local begin state, or NULL if unwanted
6490  *           ret_bsc   - score for using ret_b, or NULL if unwanted
6491  *           dmin      - minimum d bound for each state v; [0..v..M-1]
6492  *           dmax      - maximum d bound for each state v; [0..v..M-1]
6493  *
6494  * Returns: Score of the optimal alignment.
6495  */
6496 static float
inside_b_me(CM_t * cm,ESL_DSQ * dsq,int L,int vroot,int vend,int i0,int j0,int do_full,float *** alpha,float **** ret_alpha,void **** ret_shadow,int allow_begin,int * ret_b,float * ret_bsc,int * dmin,int * dmax)6497 inside_b_me(CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0, int do_full,
6498 	    float ***alpha, float ****ret_alpha,
6499 	    void ****ret_shadow,
6500 	    int allow_begin, int *ret_b, float *ret_bsc,
6501 	    int *dmin, int *dmax)
6502 {
6503   int      status;
6504   float  **end;         /* we re-use the end deck. */
6505   int      nends;       /* counter that tracks when we can release end deck to the pool */
6506   int     *touch;       /* keeps track of how many higher decks still need this deck */
6507   int      v,y,z;	/* indices for states  */
6508   int      j,d,i;	/* indices in sequence dimensions */
6509   float    sc;		/* a temporary variable holding a score */
6510   int      yoffset;	/* y=base+offset -- counter in child states that v can transit to */
6511   int      W;		/* subsequence length */
6512   int      jp;		/* j': relative position in the subsequence  */
6513   void  ***shadow;      /* shadow matrix for tracebacks */
6514   int    **kshad;       /* a shadow deck for bifurcations */
6515   char   **yshad;       /* a shadow deck for every other kind of state */
6516   int      b;		/* best local begin state */
6517   float    bsc;		/* score for using the best local begin state */
6518 
6519   /* variables used for memory efficient bands */
6520   int      dp_v;           /* d index for state v in alpha w/mem eff bands */
6521   int      dp_y;           /* d index for state y in alpha w/mem eff bands */
6522   int      kp;             /* k' - what k should be, now that we're banded */
6523   int      Wp;             /* W also changes depending on state */
6524 
6525   /* Allocations and initializations
6526    */
6527   b   = -1;
6528   bsc = IMPOSSIBLE;
6529   W   = j0-i0+1;		/* the length of the subsequence -- used in many loops  */
6530 				/* if caller didn't give us a deck pool, make one */
6531   end = alloc_vjd_deck(L, i0, j0);
6532   nends = CMSubtreeCountStatetype(cm, vroot, E_st);
6533   for (jp = 0; jp <= W; jp++) {
6534     j = i0+jp-1;		/* e.g. j runs from 0..L on whole seq */
6535     end[j][0] = 0.;
6536     for (d = 1; d <= jp; d++) end[j][d] = IMPOSSIBLE;
6537   }
6538 
6539   /* if caller didn't give us a matrix, make one.
6540    * It's important to allocate for M+1 decks (deck M is for EL, local
6541    * alignment) - even though Inside doesn't need EL, Outside does,
6542    * and we might reuse this memory in a call to Outside.
6543    */
6544   if (alpha == NULL) {
6545     ESL_ALLOC(alpha, sizeof(float **) * (cm->M+1));
6546     for (v = 0; v <= cm->M; v++) alpha[v] = NULL;
6547   }
6548 
6549   ESL_ALLOC(touch, (sizeof(int) * cm->M));
6550   for (v = 0;     v < vroot; v++) touch[v] = 0;
6551   for (v = vroot; v <= vend; v++) touch[v] = cm->pnum[v];
6552   for (v = vend+1;v < cm->M; v++) touch[v] = 0;
6553 
6554   /* The shadow matrix, if caller wants a traceback.
6555    * We do some pointer tricks here to save memory. The shadow matrix
6556    * is a void ***. Decks may either be char ** (usually) or
6557    * int ** (for bifurcation decks). Watch out for the casts.
6558    * For most states we only need
6559    * to keep y as traceback info, and y <= 6. For bifurcations,
6560    * we need to keep k, and k <= L, and L might be fairly big.
6561    * (We could probably limit k to an unsigned short ... anyone
6562    * aligning an RNA > 65536 would need a big computer... but
6563    * we'll hold off on that for now. We could also pack more
6564    * traceback pointers into a smaller space since we only really
6565    * need 3 bits, not 8.)
6566    */
6567   if (ret_shadow != NULL) {
6568     ESL_ALLOC(shadow, sizeof(void **) * cm->M);
6569     for (v = 0; v < cm->M; v++) shadow[v] = NULL;
6570   }
6571 
6572   /* Main recursion
6573    */
6574   for (v = vend; v >= vroot; v--)
6575     {
6576       /* First we need a deck to fill in.
6577        * 1. if we're an E, reuse the end deck (and it's already calculated)
6578        * 2. else, see if we can take something from the pool
6579        * 3. else, allocate a new deck.
6580        */
6581       if (cm->sttype[v] == E_st) {
6582 	alpha[v] = end; continue;
6583       }
6584       alpha[v] = alloc_banded_vjd_deck(L, i0, j0, dmin[v], dmax[v]);
6585 
6586       if (ret_shadow != NULL) {
6587 	if (cm->sttype[v] == B_st) {
6588 	  kshad     = alloc_banded_vjd_kshadow_deck(L, i0, j0, dmin[v], dmax[v]);
6589 	  shadow[v] = (void **) kshad;
6590 	} else {
6591 	  yshad     = alloc_banded_vjd_yshadow_deck(L, i0, j0, dmin[v], dmax[v]);
6592 	  shadow[v] = (void **) yshad;
6593 	}
6594       }
6595 
6596       if (cm->sttype[v] == D_st || cm->sttype[v] == S_st)
6597 	{
6598 	  for (jp = 0; jp <= W; jp++) {
6599 	    j = i0-1+jp;
6600 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
6601 	      {
6602 		y = cm->cfirst[v];
6603 		dp_v = d - dmin[v];  /* d index for state v in alpha w/mem eff bands */
6604 
6605 		alpha[v][j][dp_v] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
6606 		/* treat EL as emitting only on self transition */
6607 		if (ret_shadow != NULL) yshad[j][dp_v]  = USED_EL;
6608 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
6609 		  {
6610 		    dp_y = d - dmin[y+yoffset];  /* d index for state (y+yoffset)
6611 						   in alpha w/mem eff bands */
6612 		    /* check to make sure the cell we're about to query is within the
6613 		       bands for state y; this might be more complex than necessary */
6614 		    if((dp_y >= 0) && ((dp_y < (jp - (dmin[y+yoffset]) + 1))
6615 				       && (dp_y < (dmax[y+yoffset] - dmin[y+yoffset] + 1))))
6616 		      {
6617 			if ((sc = alpha[y+yoffset][j][dp_y] + cm->tsc[v][yoffset]) >  alpha[v][j][dp_v]) {
6618 			  alpha[v][j][dp_v] = sc;
6619 			  if (ret_shadow != NULL) yshad[j][dp_v] = yoffset;
6620 			}
6621 		      }
6622 		  }
6623 		if (alpha[v][j][dp_v] < IMPOSSIBLE) alpha[v][j][dp_v] = IMPOSSIBLE;
6624 	      }
6625 	  }
6626 	}
6627       else if (cm->sttype[v] == B_st)
6628 	{
6629 	  for (jp = 0; jp <= W; jp++) {
6630 	    j = i0-1+jp;
6631 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
6632 	      {
6633 		y = cm->cfirst[v];
6634 		z = cm->cnum[v];
6635 
6636 		/* The changes made to this section of code in the memory efficient
6637 		 * banded implementation are the most complex changes necessary to
6638 		 * get memory efficiency.  The reason is because there are indices in
6639 		 * two other states for a B_st, y and z (instead of just y).  This
6640 		 * means that when we're dealing with a dp_v that is d minus a v-state
6641 		 * specific offset, we also have to worry about the y-state offset
6642 		 * and z-state offset.
6643 		 * Let's set kp as the equivalent of k from the old code, but
6644 		 * now we have to take into account the offsets.  To remain as
6645 		 * consistent as possible with the old code, we will keep the
6646 		 * indexing in z the same in the recursion, and figure out what
6647 		 * the corresponding indices involving state y are.
6648 		 * So the old recursion code is :
6649 		 *
6650 		 * for (jp = 0; jp <= W; jp++) {
6651 		 * j = i0-1+jp;
6652 		 * for (d = 0; d <= jp; d++)
6653 		 * {
6654 		 *   alpha[v][j][d] = alpha[y][j][d] + alpha[z][j][0]; *INIT*
6655 		 *   if (ret_shadow != NULL) kshad[j][d] = 0;
6656 		 *   for (k = 1; k <= d; k++)
6657 		 *   *RECURSION*
6658 		 *   if ((sc = alpha[y][j-k][d-k] + alpha[z][j][k]) > alpha[v][j][d]) {
6659 		 *     alpha[v][j][d] = sc;
6660 		 *     if (ret_shadow != NULL) kshad[j][d] = k; }
6661 		 *
6662 		 * So we'll minimally change alpha[z][j][k] to alpha[z][j][kp]
6663 		 * The INIT may change because although alpha[z][j][0] MUST be
6664 		 * within the bands (because dmin[z] >= 0), the corresponding
6665 		 * cell in alpha[y] might not be within the bands for y.
6666 		 * That cell is alpha[y][j-dmin[z]-kp][d-dmin[y]-dmin[z]-kp]
6667 		 * because k = kp + dmin[z] (it probably takes some time writing
6668 		 * down the new and old equations, and staring and thinking for a
6669 		 * while - I would write down more here - but this is already pretty
6670 		 * verbose ... ).
6671 		 *
6672 		 * Therefore we can't just start with k (or kp)  = 0
6673 		 * (like the old code did), because that might not be valid.
6674 		 *
6675 		 * First we need to determine the smallest kp for which we can
6676 		 * do a valid traceback, which means the alpha cell for both the y
6677 		 * state and z state are within the bands.  For a kp to be valid given
6678 		 * the following code, the following three inequalities have to be
6679 		 * true.
6680 		 *
6681 		 * (1) d-dmin[z]-kp <= dmax[y]
6682 		 * (2) d-dmin[z]-kp >= dmin[y]
6683 		 * (3) kp <= dmax[z]-dmin[z]
6684 		 *
6685 		 * (1) and (2) need to be satisified to guarantee that the cell we
6686 		 * are going to access in the alpha[y] deck is within the bands for
6687 		 * state y.  (3) is necessary to guarantee that the cell we are
6688 		 * going to access in the alpha[z] deck is within the bands for
6689 		 * state z.
6690 		 * We can rearrange 1 and 2 :
6691 		 *
6692 		 * (1) kp >= d-dmax[y]-dmin[z]
6693 		 * (2) kp <= d-dmin[y]-dmin[z]
6694 		 *
6695 		 * First to check to see if ANY kp is valid, we can first
6696 		 * check to make sure that (d-dmin[y]-dmin[z]) (RHS of (2))
6697 		 * is >= 0.  If not, then kp can never be 0 or greater.
6698 		 * So it can never be valid. So we check for this at
6699 		 * the beginning.
6700 		 *
6701 		 * So, to find the minimal kp that satisfies (1), (2) and (3)
6702 		 * I set kp = d-dmax[y]-dmin[z], and then check that it kp >= 0
6703 		 * If kp < 0, we set it to 0.  Then we check to make sure kp
6704 		 * satisfies (3) (It has to satisfy (2) if it satisfies (1)
6705 		 * because dmax[y] >= dmin[y]).  This is our *INIT* assignment.
6706 		 * Next we incrementally step through all valid kp values, we'll need
6707 		 * a for loop with two conditions to check in the 'while' portion.
6708 		 * Namely, that kp satisfies inequalities (2) and (3), that is
6709 		 * kp <= (d-dmin[y]-dmin[z]) and kp <= (dmax[z]-dmin[z])
6710 		 * This is marked in the code by *RECUR*
6711 		 *
6712 		 * Also, we want to make sure the while statement from the
6713 		 * original for loop (non-banded) is also satisfied.  This
6714 		 * statement is k <= d.  We're dealing with kp, and k = kp+dmin[z]
6715 		 * so this statement becomes kp <= d-dmin[z].  However, inequality
6716 		 * (2) (kp <= d-dmin[y]-dmin[z]) takes care of this because dmin[y] >= 0
6717 		 *
6718 		 */
6719 		dp_v = d - dmin[v];  /* d index for state v in alpha w/mem eff bands */
6720 		dp_y = d - dmin[y];  /* d index for state y in alpha w/mem eff bands */
6721 
6722 		/* First make sure we have any valid kp, we know from inequality (2)
6723 		   that kp <= d-dmin[y]-dmin[z] so if this is < 0 then no kp
6724 		   is valid (see notes above) */
6725 
6726 		if((d-dmin[y]-dmin[z]) >= 0)
6727 		{
6728 		  if(jp < dmax[y]) kp = d-dmin[z]-jp;
6729 		  else kp = d-dmin[z]-dmax[y];
6730 		  if(kp < 0) kp = 0;
6731 		  if(kp <= dmax[z] - dmin[z]) /* make sure its valid in deck alpha[z] */
6732 		    {
6733 		      alpha[v][j][dp_v] = alpha[y][j-dmin[z]-kp][d-dmin[y]-dmin[z]-kp]
6734 			+ alpha[z][j][kp];
6735 		      if (ret_shadow != NULL) kshad[j][dp_v] = kp;
6736 		      for (kp = kp+1; kp <= (d-dmin[y]-dmin[z]) && kp <= (dmax[z]-dmin[z]);
6737 			   kp++)
6738 			{
6739 			  /* the following if statement ensures that the alpha cell for
6740 			     state y and the cell for state z that we are about to query
6741 			     is in fact within the bands for state y and state z respectively*/
6742 			  if ((sc = alpha[y][j-dmin[z]-kp][d-dmin[y]-dmin[z]-kp]
6743 			       + alpha[z][j][kp]) > alpha[v][j][dp_v])
6744 			    {
6745 			      alpha[v][j][dp_v] = sc;
6746 			      if (ret_shadow != NULL) kshad[j][dp_v] = kp;
6747 			    }
6748 			}
6749 		    }
6750 		}
6751 		else alpha[v][j][dp_v] = IMPOSSIBLE;
6752 		/*else cm_Fail("cell in alpha matrix was not filled in due to bands.\n");*/
6753 		if (alpha[v][j][dp_v] < IMPOSSIBLE) alpha[v][j][dp_v] = IMPOSSIBLE;
6754 	      }
6755 	  }
6756 	}
6757       else if (cm->sttype[v] == MP_st)
6758 	{
6759 	  for (jp = 0; jp <= W; jp++) {
6760 	    j = i0-1+jp;
6761 	    /* We assume dmin[v] >= 2 (it has to be) */
6762 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
6763 	      {
6764 		y = cm->cfirst[v];
6765 		dp_v = d - dmin[v]; /* d index for state v in alpha w/mem eff bands */
6766 		alpha[v][j][dp_v] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
6767 		/* treat EL as emitting only on self transition */
6768 		if(ret_shadow != NULL) yshad[j][dp_v] = USED_EL;
6769 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
6770 		  {
6771 		    dp_y = d - dmin[y+yoffset];  /* d index for state (y+yoffset)
6772 						   in alpha w/mem eff bands */
6773 		    /* the following if statement ensures that the alpha cell for
6774 		       state y that we are about to query is in fact within the
6775 		       bands for state y */
6776 		    if(((dp_y-2) >= 0) && (((dp_y-2) < (jp - (dmin[y+yoffset]) + 1))
6777 					   && ((dp_y-2) < (dmax[y+yoffset] - dmin[y+yoffset] + 1))))
6778 		      {
6779 			if ((sc = alpha[y+yoffset][j-1][dp_y-2] + cm->tsc[v][yoffset]) >  alpha[v][j][dp_v])
6780 			  {
6781 			    alpha[v][j][dp_v] = sc;
6782 			    if (ret_shadow != NULL) yshad[j][dp_v] = yoffset;
6783 			  }
6784 		      }
6785 		  }
6786 		i = j-d+1;
6787 		if (dsq[i] < cm->abc->K && dsq[j] < cm->abc->K)
6788 		  alpha[v][j][dp_v] += cm->esc[v][(int) (dsq[i]*cm->abc->K+dsq[j])];
6789 		else
6790 		  alpha[v][j][dp_v] += DegeneratePairScore(cm->abc, cm->esc[v], dsq[i], dsq[j]);
6791 
6792 		if (alpha[v][j][dp_v] < IMPOSSIBLE) alpha[v][j][dp_v] = IMPOSSIBLE;
6793 		/* CYK Full ME Bands used 7 end block */
6794 	      }
6795 	  }
6796 	}
6797       else if (cm->sttype[v] == IL_st || cm->sttype[v] == ML_st)
6798 	{
6799 	  for (jp = 0; jp <= W; jp++) {
6800 	    j = i0-1+jp;
6801 
6802 	    /* we assume dmin[v] >= 1, it has to be */
6803 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
6804 	      {
6805 		y = cm->cfirst[v];
6806 		dp_v = d - dmin[v]; /* d index for state v in alpha w/mem eff bands */
6807 		alpha[v][j][dp_v] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
6808 		/* treat EL as emitting only on self transition */
6809 		if (ret_shadow != NULL) yshad[j][dp_v] = USED_EL;
6810 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
6811 		  {
6812 		    dp_y = d - dmin[y+yoffset];  /* d index for state (y+yoffset)
6813 						   in alpha w/mem eff bands */
6814 		    /* the following if statement ensures that the alpha cell for
6815 		       state y that we are about to query is in fact within the
6816 		       bands for state y */
6817 		    if(((dp_y-1) >= 0) && (((dp_y-1) < (jp - (dmin[y+yoffset]) + 1))
6818 				      && ((dp_y-1) < (dmax[y+yoffset] - dmin[y+yoffset] + 1))))
6819 		      {
6820 			if ((sc = alpha[y+yoffset][j][dp_y-1] + cm->tsc[v][yoffset]) >  alpha[v][j][dp_v])
6821 			  {
6822 			    alpha[v][j][dp_v] = sc;
6823 			    if (ret_shadow != NULL) yshad[j][dp_v] = yoffset;
6824 			  }
6825 		      }
6826 		  }
6827 		i = j-d+1;
6828 		if (dsq[i] < cm->abc->K)
6829 		  alpha[v][j][dp_v] += cm->esc[v][dsq[i]];
6830 		else
6831 		  alpha[v][j][dp_v] += esl_abc_FAvgScore(cm->abc, dsq[i], cm->esc[v]);
6832 		if (alpha[v][j][dp_v] < IMPOSSIBLE) alpha[v][j][dp_v] = IMPOSSIBLE;
6833 		/* CYK Full ME Bands used 9 end block */
6834 	      }
6835 	  }
6836 	}
6837       else if (cm->sttype[v] == IR_st || cm->sttype[v] == MR_st)
6838 	{
6839 	  for (jp = 0; jp <= W; jp++) {
6840 	    j = i0-1+jp;
6841 	    for (d = dmin[v]; d <= dmax[v] && d <= jp; d++)
6842 	      {
6843 		y = cm->cfirst[v];
6844 		dp_v = d - dmin[v]; /* d index for state v in alpha w/mem eff bands */
6845 		alpha[v][j][dp_v] = cm->endsc[v] + (cm->el_selfsc * (d-StateDelta(cm->sttype[v])));
6846 		/* treat EL as emitting only on self transition */
6847 		if (ret_shadow != NULL) yshad[j][dp_v] = USED_EL;
6848 		for (yoffset = 0; yoffset < cm->cnum[v]; yoffset++)
6849 		  {
6850 		    dp_y = d - dmin[y+yoffset];  /* d index for state (y+yoffset)
6851 						   in alpha w/mem eff bands */
6852 		    /* the following if statement ensures that the alpha cell for
6853 		       state y that we are about to query is in fact within the
6854 		       bands for state y */
6855 		    if(((dp_y-1) >= 0) && (((dp_y-1) < (jp - (dmin[y+yoffset]) + 1))
6856 				      && ((dp_y-1) < (dmax[y+yoffset] - dmin[y+yoffset] + 1))))
6857 		      {
6858 			if ((sc = alpha[y+yoffset][j-1][dp_y-1] + cm->tsc[v][yoffset]) > alpha[v][j][dp_v])
6859 			  {
6860 			    alpha[v][j][dp_v] = sc;
6861 			    if (ret_shadow != NULL) yshad[j][dp_v] = yoffset;
6862 			  }
6863 		      }
6864 		  }
6865 		if (dsq[j] < cm->abc->K)
6866 		  alpha[v][j][dp_v] += cm->esc[v][dsq[j]];
6867 		else
6868 		  alpha[v][j][dp_v] += esl_abc_FAvgScore(cm->abc, dsq[j], cm->esc[v]);
6869 
6870 		if (alpha[v][j][dp_v] < IMPOSSIBLE) alpha[v][j][dp_v] = IMPOSSIBLE;
6871 		/* CYK Full ME Bands used 11 end block */
6872 	      }
6873 	  }
6874 	}				/* finished calculating deck v. */
6875 
6876       /* The following loops originally access alpha[v][j0][W] but the index W will be
6877 	 in different positions due to the bands */
6878 
6879       Wp = W - dmin[v];
6880       /* We need to make sure that Wp is within the bands */
6881       if(Wp >= 0 && Wp <= (dmax[v] - dmin[v]))
6882 	{
6883 	  /* Check for local begin getting us to the root.
6884 	   * This is "off-shadow": if/when we trace back, we'll handle this
6885 	   * case separately (and we'll know to do it because we'll immediately
6886 	   * see a USED_LOCAL_BEGIN flag in the shadow matrix, telling us
6887 	   * to jump right to state b; see below)
6888 	   */
6889 	  if (allow_begin && alpha[v][j0][Wp] + cm->beginsc[v] > bsc)
6890 	    {
6891 	      b   = v;
6892 	      bsc = alpha[v][j0][Wp] + cm->beginsc[v];
6893 	    }
6894 
6895 	  /* Check for whether we need to store an optimal local begin score
6896 	   * as the optimal overall score, and if we need to put a flag
6897 	   * in the shadow matrix telling insideT() to use the b we return.
6898 	   */
6899 	  if (allow_begin && v == 0 && bsc > alpha[0][j0][Wp]) {
6900 	    alpha[0][j0][Wp] = bsc;
6901 	    if (ret_shadow != NULL) yshad[j0][Wp] = USED_LOCAL_BEGIN;
6902 	  }
6903 	}
6904       /* In the non-banded code, we used the deck reuse strategy, however, here
6905 	 we can't do that, because for each state, the bands are different, so
6906 	 we can't use old decks, but rather must allocate a new one, and free
6907 	 the old one. */
6908 
6909       if (! do_full) {
6910 	if (cm->sttype[v] == B_st)
6911 	  {
6912 	    /* we can definitely release the S children of a bifurc. */
6913 	    y = cm->cfirst[v];
6914 	    z = cm->cnum[v];
6915 	    free_vjd_deck(alpha[y], i0, j0);
6916 	    alpha[y] = NULL;
6917 	    free_vjd_deck(alpha[z], i0, j0);
6918 	    alpha[z] = NULL;
6919 	  }
6920 	else
6921 	  {
6922 	    for (y = cm->cfirst[v]; y < cm->cfirst[v]+cm->cnum[v]; y++)
6923 	      {
6924 		touch[y]--;
6925 		if (touch[y] == 0)
6926 		  {
6927 		    if (cm->sttype[y] == E_st) {
6928 		      nends--;
6929 		      /* Original code : if (nends == 0) { deckpool_push(dpool, end); end = NULL;} */
6930 		      /* ME code deletes the previous line, we don't mess with end, because
6931 			 it is used later */
6932 		    } else
6933 		      free_vjd_deck(alpha[y], i0, j0);
6934 		    alpha[y] = NULL;
6935 		  }
6936 	      }
6937 	  }
6938       }
6939   } /* end loop over all v */
6940 
6941   /* Now we free our memory.
6942    * if we've got do_full set, all decks vroot..vend are now valid (end is shared).
6943    * else, only vroot deck is valid now and all others vroot+1..vend are NULL,
6944    * and end is NULL.
6945    * We could check this status to be sure (and we used to) but now we trust.
6946    */
6947 
6948   /* CYK Full ME Bands used 14 */
6949   /* original line :  sc       = alpha[vroot][j0][W];*/
6950   Wp = W - dmin[vroot];
6951   sc       = alpha[vroot][j0][Wp];
6952 
6953   if (ret_b != NULL)   *ret_b   = b;    /* b is -1 if allow_begin is FALSE. */
6954   if (ret_bsc != NULL) *ret_bsc = bsc;  /* bsc is IMPOSSIBLE if allow_begin is FALSE */
6955 
6956   /* If the caller doesn't want the matrix, free it (saving the decks in the pool!)
6957    * Else, pass it back to him.
6958    */
6959   if (ret_alpha == NULL) {
6960     for (v = vroot; v <= vend; v++) /* be careful of our reuse of the end deck -- free it only once */
6961       if (alpha[v] != NULL) {
6962 	if (cm->sttype[v] != E_st) { free_vjd_deck(alpha[v], i0, j0); alpha[v] = NULL; }
6963 	else end = alpha[v];
6964       }
6965     if (end != NULL) { free_vjd_deck(end, i0, j0); end = NULL; }
6966     free(alpha);
6967   } else *ret_alpha = alpha;
6968 
6969   free(touch);
6970   if (ret_shadow != NULL) *ret_shadow = shadow;
6971   return sc;
6972 
6973  ERROR:
6974   cm_Fail("Memory allocation error.");
6975   return 0.; /* never reached */
6976 }
6977 
6978 /* Function: insideT_b_me()
6979  *           EPN 05.24.05
6980  * *based on insideT(), only difference is memory efficient bands are used :
6981  *
6982  * Date:     SRE, Fri Aug 11 12:08:18 2000 [Pittsburgh]
6983  *
6984  * Purpose:  Call inside, get vjd shadow matrix;
6985  *           then trace back. Append the trace to a given
6986  *           traceback, which already has state r at tr->n-1.
6987  */
6988 static float
insideT_b_me(CM_t * cm,ESL_DSQ * dsq,int L,Parsetree_t * tr,int r,int z,int i0,int j0,int allow_begin,int * dmin,int * dmax)6989 insideT_b_me(CM_t *cm, ESL_DSQ *dsq, int L, Parsetree_t *tr,
6990 	     int r, int z, int i0, int j0,
6991 	     int allow_begin, int *dmin, int *dmax)
6992 {
6993   int       status;
6994   void   ***shadow;             /* the traceback shadow matrix */
6995   float     sc;			/* the score of the CYK alignment */
6996   ESL_STACK *pda;                /* stack that tracks bifurc parent of a right start */
6997   int       v,j,d,i;		/* indices for state, j, subseq len */
6998   int       k;
6999   int       y, yoffset;
7000   int       bifparent;
7001   int       b;
7002   float     bsc;
7003   int       dp;                 /* dp: d' d offset in current state v's band; dp = d - dmin[v] */
7004   int       kp;                 /* dp: k' k offset in current state v's band; kp = k - dmin[v] */
7005 
7006   sc = inside_b_me(cm, dsq, L, r, z, i0, j0,
7007 		   BE_EFFICIENT,	/* memory-saving mode */
7008 		   NULL, NULL,	        /* manage your own matrix, I don't want it */
7009 		   &shadow,		/* return a shadow matrix to me. */
7010 		   allow_begin,         /* TRUE to allow local begins */
7011 		   &b, &bsc,	        /* if allow_begin is TRUE, gives info on optimal b */
7012 		   dmin, dmax);
7013 
7014   pda = esl_stack_ICreate();
7015   if(pda == NULL) goto ERROR;
7016   v = r;
7017   j = j0;
7018   i = i0;
7019   d = j0-i0+1;
7020 
7021   while (1) {
7022     if(v == cm->M)
7023       dp = d;
7024     else
7025       dp = d - dmin[v];
7026     if(v != cm->M)
7027       {
7028 	assert(d <= dmax[v]);
7029 	assert(d >= dmin[v]);
7030       }
7031     if (cm->sttype[v] == B_st) {
7032       assert(v >= 0);
7033       kp = ((int **) shadow[v])[j][dp];   /* kp = offset len of right fragment */
7034       z = cm->cnum[v];
7035       k = kp + dmin[z];  /* k = len of right fragment */
7036 
7037       /* Store info about the right fragment that we'll retrieve later:
7038        */
7039       if((status = esl_stack_IPush(pda, j)) != eslOK) goto ERROR;	/* remember the end j    */
7040       if((status = esl_stack_IPush(pda, k)) != eslOK) goto ERROR;	/* remember the subseq length k */
7041       if((status = esl_stack_IPush(pda, tr->n-1)) != eslOK) goto ERROR;	/* remember the trace index of the parent B state */
7042       /* Deal with attaching left start state.
7043        */
7044       j = j-k;
7045       d = d-k;
7046       i = j-d+1;
7047       y = cm->cfirst[v];
7048       InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
7049       v = y;
7050     } else if (cm->sttype[v] == E_st || cm->sttype[v] == EL_st) {
7051       /* We don't trace back from an E or EL. Instead, we're done with the
7052        * left branch of the tree, and we try to swing over to the right
7053        * branch by popping a right start off the stack and attaching
7054        * it. If the stack is empty, then we're done with the
7055        * traceback altogether. This is the only way to break the
7056        * while (1) loop.
7057        */
7058       if (esl_stack_IPop(pda, &bifparent) == eslEOD) break;
7059       /* Note: we don't pop dp below, but d, because we're either in an E state
7060        * in which case d must be 0, or the EL state, which has no
7061        * dmin and dmax band, so if we pop dp and add dmin[v] to get d,
7062        * we'll f*** everything up, as Sam Griffiths-Jones found
7063        * when preparing Rfam 8.0 on 08.04.06.
7064        */
7065       esl_stack_IPop(pda, &d);
7066       esl_stack_IPop(pda, &j);
7067       v = tr->state[bifparent];	/* recover state index of B */
7068       y = cm->cnum[v];		/* find state index of right S */
7069       i = j-d+1;
7070 				/* attach the S to the right */
7071       InsertTraceNode(tr, bifparent, TRACE_RIGHT_CHILD, i, j, y);
7072       v = y;
7073     } else {
7074       yoffset = ((char **) shadow[v])[j][dp];
7075       if((((int) yoffset) != USED_LOCAL_BEGIN) && (((int) yoffset) != USED_EL))
7076 	{
7077 	  if(!((yoffset >= 0) && yoffset <= cm->M))
7078 	    y = cm->cfirst[v] + yoffset;
7079 	}
7080       if((yoffset != USED_LOCAL_BEGIN) && (yoffset != USED_EL))
7081 	assert(yoffset >= 0 &&  yoffset <= cm->M);
7082       switch (cm->sttype[v]) {
7083       case D_st:            break;
7084       case MP_st: i++; j--; break;
7085       case ML_st: i++;      break;
7086       case MR_st:      j--; break;
7087       case IL_st: i++;      break;
7088       case IR_st:      j--; break;
7089       case S_st:            break;
7090       default:    cm_Fail("'Inconceivable!'\n'You keep using that word...'");
7091       }
7092       d = j-i+1;
7093 
7094       if (yoffset == USED_EL)
7095 	{	/* a local alignment end */
7096 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, cm->M);
7097 	  v = cm->M;		/* now we're in EL. */
7098 	}
7099       else if (yoffset == USED_LOCAL_BEGIN)
7100 	{ /* local begin; can only happen once, from root */
7101 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, b);
7102 	  v = b;
7103 	}
7104       else
7105 	{
7106 	  y = cm->cfirst[v] + yoffset;
7107 	  InsertTraceNode(tr, tr->n-1, TRACE_LEFT_CHILD, i, j, y);
7108 	  v = y;
7109 	}
7110     }
7111   }
7112   esl_stack_Destroy(pda);  /* it should be empty; we could check; naaah. */
7113   free_vjd_shadow_matrix(shadow, cm, i0, j0);
7114   return sc;
7115 
7116  ERROR:
7117   cm_Fail("Memory allocation error.");
7118   return 0.; /* NEVERREACHED */
7119 }
7120 
7121