1 /* The all-encompassing include file for INFERNAL.
2  * All-encompassing because there's a lot of crossdependency.
3  * Previously (versions 1.0.2 and earlier) this
4  * was broken down into structs.h and funcs.h
5  *
6  *    1. Default values for various parameters, and other constants
7  *    2. Parsetree_t:        binary tree structure for storing a traceback of an alignment
8  *    3. CMConsensus_t:      consensus information on a CM
9  *    4. Fancyali_t:         alignment of CM to a target sequence (largely replaced by CM_ALIDISPLAY)
10  *    5. CMEmitMap_t:        map of model nodes to consensus positions.
11  *    6. deckpool_s:         divide and conquer DP matrix state deck
12  *    7. GammaHitMx_t:       semi-HMM used for optimal hit resolution of a CM scan
13  *    8. Prior_t:            Dirichlet priors on all CM parameters
14  *    9. ExpInfo_t:          exponential tail information for E-values
15  *   10. CP9_t:              a CM plan 9 HMM
16  *   11. CP9_MX:             a dynamic programming matrix for a CM Plan 9 HMM
17  *   12. CP9Bands_t:         sequence and CM specific HMM bands
18  *   13. CP9trace_t:         traceback structure for CP9 HMMs
19  *   14. CP9Map_t:           map from a CM to a CP9 HMM and vice versa
20  *   15. CMSubMap_t:         map of a template CM to a sub CM and vice versa
21  *   16. CMSubInfo_t:        sub CM information, used for validating the sub CM construction procedure
22  *   17. RSEARCH constants
23  *   18. CM_MX:              CM dynamic programming matrix; non-banded, non-truncated
24  *   19. CM_TR_MX:           CM dynamic programming matrix; non-banded, truncated
25  *   20. CM_HB_MX:           CM dynamic programming matrix; HMM banded, non-truncated
26  *   21. CM_TR_HB_MX:        CM dynamic programming matrix; HMM banded, truncated
27  *   22. CM_SHADOW_MX:       CM shadow matrix, for DP tracebacks; non-banded, non-truncated
28  *   23. CM_TR_SHADOW_MX:    CM shadow matrix, for DP tracebacks; non-banded, truncated
29  *   24. CM_HB_SHADOW_MX:    CM shadow matrix, for DP tracebacks; HMM banded, non-truncated
30  *   25. CM_TR_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, truncated
31  *   26. CM_EMIT_MX:         CM emit matrix, info on PP of emitted residues; non-banded, non-truncated
32  *   27. CM_TR_EMIT_MX:      CM emit matrix, info on PP of emitted residues; non-banded, truncated
33  *   28. CM_HB_EMIT_MX:      CM emit matrix, info on PP of emitted residues; HMM banded, non-truncated
34  *   29. CM_TR_HB_EMIT_MX:   CM emit matrix, info on PP of emitted residues; HMM banded, truncated
35  *   30. CM_QDBINFO:         model specific QDB information, including 2 sets of bands
36  *   31. CM_SCAN_MX:         matrices used for scanning CM DP algorithms; non-truncated
37  *   32. CM_TR_SCAN_MX:      matrices used for scanning CM DP algorithms; truncated
38  *   33. CM_TR_PENALTIES:    pass, state and locality-specific truncated alignment penalties
39  *   34. CM_t:               a covariance model
40  *   35. CM_FILE:            a CM save file or database, open for reading
41  *   36. CM_PLI_ACCT:        pass specific statistics for a search/scan pipeline
42  *   37. CM_PIPELINE:        the accelerated seq/profile comparison pipeline
43  *   38. CM_ALIDISPLAY:      an alignment formatted for printing (replaces FancyAli_t)
44  *   39. CM_HIT:             a hit between a CM and a sequence
45  *   40. CM_TOPHITS:         ranked list of top-scoring hits
46  *   41. CM_P7_OM_BLOCK:     block of P7_OPROFILEs and related info, for cmscan
47  *   42. CM_ALNDATA:         information for alignment of a sequence to a CM
48  *   43. Routines in Infernal's exposed API
49  *
50  * Also, see impl_{sse,vmx}/impl_{sse,vmx}.h for additional API
51  * specific to the acceleration layer.
52  */
53 #ifndef INFERNALH_INCLUDED
54 #define INFERNALH_INCLUDED
55 
56 #include "esl_config.h"
57 #include "p7_config.h"
58 #include "config.h"
59 
60 #include "easel.h"
61 #include "esl_alphabet.h"
62 #include "esl_mixdchlet.h"
63 #include "esl_random.h"
64 #include "esl_msa.h"
65 #include "esl_sq.h"
66 #include "esl_sqio.h"
67 
68 #include "hmmer.h"
69 
70 #ifdef HAVE_MPI
71 #include "mpi.h"
72 #endif
73 
74 /***********************************************************************************
75  * 1. Default values for various parameters, and other constant definitions.
76  ***********************************************************************************/
77 
78 #define DEFAULT_BETA_W              1E-7
79 #define DEFAULT_BETA_QDB1           1E-7
80 #define DEFAULT_BETA_QDB2           1E-15
81 #define DEFAULT_TAU                 1E-7
82 #define DEFAULT_PBEGIN              0.05           /* EPN 06.29.07 (formerly 0.5) */
83 #define DEFAULT_PEND                0.05           /* EPN 06.29.07 (formerly 0.5) */
84 #define DEFAULT_ETARGET             0.59           /* EPN 07.10.07 (formerly (v0.7->v0.8)= 2.-0.54 = 1.46 */
85 #define DEFAULT_ETARGET_HMMFILTER   0.38           /* EPN 04.16.12 */
86 #define DEFAULT_NULL2_OMEGA         0.000015258791 /* 1/(2^16), the hard-coded prior probability of the null2 model */
87 #define DEFAULT_NULL3_OMEGA         0.000015258791 /* 1/(2^16), the hard-coded prior probability of the null3 model */
88 #define V1P0_NULL2_OMEGA            0.03125        /* 1/(2^5),  the prior probability of the null2 model for v0.56->v1.0.2 */
89 #define V1P0_NULL3_OMEGA            0.03125        /* 1/(2^5),  the prior probability of the null3 model for v0.56->v1.0.2 */
90 #define DEFAULT_EL_SELFPROB         0.94
91 #define DEFAULT_MAXTAU              0.1            /* default cm->maxtau, max allowed tau value during HMM band tightening */
92 #define DEFAULT_CP9BANDS_THRESH1    0.01           /* default for CP9Bands_t thresh1, if occ[k] > thresh1 HMM posn k 'maybe used'  */
93 #define DEFAULT_CP9BANDS_THRESH2    0.98           /* default for CP9Bands_t thresh2, if occ[k] > thresh2 HMM posn k 'likely used' */
94 #define DEFAULT_HB_MXSIZE_MAX_MB    512.           /* maximum for auto-determined maximum HMM banded matrix size */
95 #define DEFAULT_HB_MXSIZE_MAX_W     3000.          /* a CM window size (cm->W) of this or higher results in max matrix size (DEFAULT_HB_MXSIZE_MAX_MB) */
96 #define DEFAULT_HB_MXSIZE_MIN_MB    128.           /* minimum for auto-determined maximum HMM banded matrix size */
97 #define DEFAULT_HB_MXSIZE_MIN_W     1000.          /* a CM window size (cm->W) of this or lower results in min matrix size (DEFAULT_HB_MXSIZE_MIN_MB) */
98 
99 /* Hard-coded values (not changeable by command-line options).
100  * All of these are related to HMM band tightening to reduce
101  * the required size of DP matrices for alignments to below
102  * a maximum limit.
103  */
104 #define TAU_MULTIPLIER              2.0            /* value to multiply tau by during HMM band tightening */
105 #define MAX_CP9BANDS_THRESH1        0.50           /* maximum value allowed for cp9b->thresh1 during HMM band tightening */
106 #define MIN_CP9BANDS_THRESH2        0.75           /* minimum value allowed for cp9b->thresh2 during HMM band tightening */
107 #define DELTA_CP9BANDS_THRESH1      0.02           /* value to increment cp9b->thresh1 by during HMM band tightening */
108 #define DELTA_CP9BANDS_THRESH2      0.01           /* value to decrement cp9b->thresh2 by during HMM band tightening */
109 
110 /* number of possible integer GC contents, example 40 = 0.40 GC */
111 #define GC_SEGMENTS 101
112 
113 /* length of a sequence 'chunk' that gets processed in cmsearch/cmscan
114  * sequences larger than this get broken up into overlapping chunks of this size.
115  */
116 #define CM_MAX_RESIDUE_COUNT 100000 /* differs from HMMER's default which is MAX_RESIDUE_COUNT from esl_sqio_(ascii|ncbi).c */
117 
118 /* P7 HMM E-value parameters, borrowed from HMMER, but with 2 new additions: CM_p7_GMU and CM_p7_GLAMBDA */
119 #define CM_p7_NEVPARAM 8  /* number of statistical parameters stored in cm->p7_evparam */
120 enum cm_p7_evparams_e {CM_p7_LMMU  = 0, CM_p7_LMLAMBDA = 1, CM_p7_LVMU = 2,  CM_p7_LVLAMBDA = 3, CM_p7_LFTAU = 4, CM_p7_LFLAMBDA = 5, CM_p7_GFMU = 6, CM_p7_GFLAMBDA = 7 };
121 #define CM_p7_EVPARAM_UNSET -99999.0f  /* if p7_evparam[0] is unset, then all unset */
122 
123 /* BE_EFFICIENT and BE_PARANOID are alternative (exclusive) settings
124  * for the do_full? argument to the alignment engines.
125  */
126 #define BE_EFFICIENT  0		/* setting for do_full: small memory mode */
127 #define BE_PARANOID   1		/* setting for do_full: keep whole matrix, perhaps for debugging */
128 
129 /* We're moderately paranoid about underflow and overflow errors, so
130  * we do some checking on the magnitude of the scores.
131  *
132  * IMPOSSIBLE, the "-infinity" value in a DP matrix must be > -FLT_MAX/3, so that
133  *   we can add three of them together (alpha + tsc + esc) and not get an
134  *   underflow error. ANSI guarantees us FLT_MAX >= 1e37.
135  *
136  * MAXSCOREVAL is the maximum value we allow in any alpha cell, tsc, or esc.
137  *
138  * IMPROBABLE must be > IMPOSSIBLE + 2* MAXSCOREVAL; such that adding
139  *  any two valid score values (say, an alpha and a tsc) to IMPOSSIBLE
140  *  gives us a number < IMPROBABLE, and we can reset it to IMPOSSIBLE.
141  *
142  * NOT_IMPOSSIBLE() exists because we can't compare floating point by
143  * equality.
144  *
145  * sreLOG2() exists because we want to work in bits, and we will need
146  * to take log(0).
147 */
148 #define IMPOSSIBLE  -1e36
149 #define MAXSCOREVAL  1e35
150 #define IMPROBABLE  -5e35
151 #define NOT_IMPOSSIBLE(x)  ((x) > -9.999e35)
152 #define NOT_IMPROBABLE(x)  ((x) > -4.999e35)
153 #define sreLOG2(x)  ((x) > 0 ? log(x) * 1.44269504 : IMPOSSIBLE)
154 #define sreEXP2(x)  (exp((x) * 0.69314718 ))
155 #define epnEXP10(x) (exp((x) * 2.30258509 ))
156 #define NOTZERO(x)  (fabs(x - 0.) > -1e6)
157 #define INFTY       987654321   /* infinity for purposes of integer DP cells       */
158 
159 /* EPN, Fri Sep  7 16:49:43 2007
160  * From HMMER3's p7_config.h:
161  *
162  * Sean's notes (verbatim):
163  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
164  * In Forward algorithm implementations, we use a table lookup in
165  * p7_FLogsum() to calculate summed probabilities in log
166  * space. p7_INTSCALE defines the precision of the calculation; the
167  * default of 1000.0 means rounding differences to the nearest 0.001
168  * nat. p7_LOGSUM_TBL defines the size of the lookup table; the
169  * default of 16000 means entries are calculated for differences of 0
170  * to 16.000 nats (when p7_INTSCALE is 1000.0).  e^{-p7_LOGSUM_TBL /
171  * p7_INTSCALE} should be on the order of the machine FLT_EPSILON,
172  * typically 1.2e-7.
173  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174  * EPN: Infernal uses bits, not nats. 1.2e-7 =~ 2^-23 =~ e^-16.
175  *      And I've removed the p7_ prefixes.
176  */
177 #define INTSCALE     1000.0f
178 #define LOGSUM_TBL   23000
179 
180 enum emitmode_e {
181   EMITLEFT  = 0,
182   EMITRIGHT = 1,
183   EMITPAIR  = 2,
184   EMITNONE  = 3
185 };
186 #define nEMITMODES 4
187 
188 
189 /* CM State types. (cm->sttype[])
190  */
191 #define MAXCONNECT 6            /* maximum number of states per node */
192 
193 #define D_st     0		/* delete       */
194 #define MP_st    1		/* match-pair   */
195 #define ML_st    2		/* match-left   */
196 #define MR_st    3		/* match-right  */
197 #define IL_st    4		/* insert-left  */
198 #define IR_st    5		/* insert-right */
199 #define S_st     6		/* start        */
200 #define E_st     7		/* end          */
201 #define B_st     8		/* bifurcation  */
202 #define EL_st    9              /* local end    */
203 
204 /* CM Node types (8) (cm->ndtype[])
205  */
206 #define NODETYPES 8
207 
208 #define DUMMY_nd -1
209 #define BIF_nd    0
210 #define MATP_nd   1
211 #define MATL_nd   2
212 #define MATR_nd   3
213 #define BEGL_nd   4
214 #define BEGR_nd   5
215 #define ROOT_nd   6
216 #define END_nd    7
217 
218 /* CM Unique state identifiers  (cm->stid[])
219  */
220 #define UNIQUESTATES 21
221 
222 #define DUMMY   -1
223 #define ROOT_S  0
224 #define ROOT_IL 1
225 #define ROOT_IR 2
226 #define BEGL_S  3
227 #define BEGR_S  4
228 #define BEGR_IL 5
229 #define MATP_MP 6
230 #define MATP_ML 7
231 #define MATP_MR 8
232 #define MATP_D  9
233 #define MATP_IL 10
234 #define MATP_IR 11
235 #define MATL_ML 12
236 #define MATL_D  13
237 #define MATL_IL 14
238 #define MATR_MR 15
239 #define MATR_D  16
240 #define MATR_IR 17
241 #define END_E   18
242 #define BIF_B   19
243 #define END_EL  20
244 
245 /* Flags used in InsertTraceNode()
246  */
247 #define TRACE_LEFT_CHILD  1
248 #define TRACE_RIGHT_CHILD 2
249 
250 /* Flags used to define PDA moves,
251  * in display.c and emit.c (if not elsewhere)
252  *
253  */
254 #define PDA_RESIDUE 0
255 #define PDA_STATE   1
256 #define PDA_MARKER  2
257 
258 /*************************************************************************************
259  *  2. Parsetree_t: binary tree structure for storing a traceback of an alignment.
260  *************************************************************************************/
261 
262 /* Structure: Parsetree_t
263  * Incept:    SRE 29 Feb 2000 [Seattle]
264  *
265  * Binary tree structure for storing a traceback of an alignment.
266  *
267  * Also used for tracebacks of model constructions. Then,
268  * "state" is misused for a node (not state) index.
269  *
270  * Example of a traceback (from ParsetreeDump(), from a tRNA
271  * model:
272  *
273  * > DF6280
274  * idx   emitl  emitr   state  nxtl  nxtr  prv   tsc   esc
275  * ----- ------ ------ ------- ----- ----- ----- ----- -----
276  *    0     1     74      0S      1    -1    -1 -0.58  0.00
277  *    1     1     74A     3MR     2    -1     0 -0.74  0.41
278  *    2     1G    73C     6MP     3    -1     1 -0.87  1.58
279  * ...<snip>...
280  *   11    10     66     54B     12    43    10  0.00  0.00
281  *   12    10     44    124S     13    -1    11  0.00  0.00
282  *   13    10     44    125B     14    28    12  0.00  0.00
283  * ...<snip>...
284  *   60    61U    61    120ML    61    -1    59 -0.22  0.87
285  *   61    -1     -1    123E     -1    -1    60  0.00  0.00
286  * ----- ------ ------ ------- ----- ----- ----- ----- -----
287  *
288  * That is, emitl and emitr are always valid and always represent
289  * the bounds of the subsequence accounted for by the parse
290  * subtree rooted at this state. (Except for end states, which
291  * are -1,-1). nxtl is always a valid state (again except for E
292  * states, which are -1. nxtr is only != -1 for bifurcation states.
293  *
294  * For reasons of malloc() efficiency, the binary tree is organized
295  * in a set of arrays.
296  */
297 typedef struct parsetree_s {
298   int  *emitl;		/* i position in seq or ali (1..L or alen) */
299   int  *emitr;		/* j position in seq or ali (1..L or alen) */
300   int  *state;		/* y of state (0..M-1)                     */
301   char *mode;		/* mode of state, used in marginal alignment   *
302                          *  (TRMODE_J, TRMODE_L, TRMODE_R or TRMODE_T) */
303 
304   int *nxtl;		/* index in trace of left child            */
305   int *nxtr;		/* index in trace of right child           */
306   int *prv;		/* index in trace of parent                */
307 
308   int  n;		/* number of elements in use so far        */
309   int  nalloc;		/* number of elements allocated for        */
310   int  memblock;	/* size of malloc() chunk, # of elems      */
311   int  is_std;          /* TRUE if parsetree was determined using standard  CYK/optacc,
312 			 * FALSE if parsetree was determined using truncated CYK/optacc */
313   int  pass_idx;        /* pipeline pass the hit the parsetree is for was found in */
314   float trpenalty;      /* truncated alignment score penalty, 0.0 if is_std is TRUE. */
315 } Parsetree_t;
316 
317 /* Special flags for use in shadow (traceback) matrices, instead of
318  * offsets to connected states. When yshad[0][][] is USED_LOCAL_BEGIN,
319  * the b value returned by inside() is the best connected state (a 0->b
320  * local entry). When yshad[v][][] is USED_EL, there is a v->EL transition
321  * and the remaining subsequence is aligned to the EL state.
322  */
323 #define USED_LOCAL_BEGIN  101
324 #define USED_EL           102
325 #define USED_TRUNC_BEGIN  103
326 #define USED_TRUNC_END    104
327 
328 /* Constants for alignment truncation modes, used during alignment
329  * traceback. We use TRMODE{J,L,R}_OFFSET as a way of determining the
330  * marginal alignment mode solely from the yoffset stored in the
331  * {J,L,R}shadow matrices, by adding the appropriate offset to yoffset
332  * depending on the truncation mode. A crucial fact is that yoffset
333  * ranges from 0..MAXCONNECT-1 for normal states, but it can also be
334  * USED_LOCAL_BEGIN, USED_EL, USED_TRUNC_BEGIN and USED_TRUNC_END, so
335  * we have to make sure that adding 0..MAXCONNECT-1 to any of the
336  * TRMODE_{J,L,R}_OFFSET values does not add up to USED_LOCAL_BEGIN,
337  * USED_EL, USED_TRUNC_BEGIN, USED_TRUNC_END (defined above). And
338  * remember that these values have to be able to be stored in a char
339  * (must be 0..255).
340  */
341 #define TRMODE_UNKNOWN  4
342 #define TRMODE_J        3
343 #define TRMODE_L        2
344 #define TRMODE_R        1
345 #define TRMODE_T        0
346 #define TRMODE_J_OFFSET 0
347 #define TRMODE_L_OFFSET 10
348 #define TRMODE_R_OFFSET 20
349 
350 
351 /*************************************************************************************
352  *  3. CMConsensus_t: consensus information on a CM
353  *************************************************************************************/
354 
355 /* Structure: CMConsensus_t
356  * Incept:    SRE, Thu May 23 16:55:04 2002 [St. Louis]
357  *
358  * Created by display.c:CreateCMConsensus().
359  * Preprocesses a CM into consensus information that is needed by
360  * display.c:CreateFancyAli().
361  *
362  *   ct[x]:  Zuker-style ct map.
363  *           indicates the pairing partner for consensus position ct[x].
364  *           x can be 0..clen-1
365  *           ct[x] is -1 (no partner) or 0..clen-1 (coord of partner)
366  *
367  *   (lpos, rpos may be redundant w/ CMEmitMap_t now.)
368  *   (off-by-one w.r.t. CMEmitMap_t; 1..clen is better)
369  */
370 typedef struct consensus_s {
371   char *cseq;           /* consensus sequence display string; 0..clen-1     */
372   char *cstr;		/* consensus structure display string; 0..clen-1    */
373   int  *ct;             /* Zuker-style ct pairing map; [0..clen-1]          */
374   int  *lpos;		/* maps node->consensus position; 0..nodes-1        */
375   int  *rpos;		/* maps node->consensus position; 0..nodes-1        */
376   int   clen;		/* length of cseq, cstr                             */
377 } CMConsensus_t;
378 
379 /*************************************************************************************
380  *  4. Fancyali_t: alignment of CM to a target sequence (largely replaced by CM_ALIDISPLAY)
381  *************************************************************************************/
382 
383 /* Structure: Fancyali_t
384  * Incept:    SRE, May 2002 [St. Louis]
385  *
386  * See display.c:CreateFancyAli().
387  * An alignment of a CM to a target sequence, formatted for display.
388  */
389 typedef struct fancyali_s {
390   char *rf;             /* reference annotation line (NULL if unavail) */
391   char *cstr;		/* CM consensus structure line                 */
392   char *cseq;		/* CM consensus sequence line                  */
393   char *mid;		/* alignment identity middle line              */
394   char *top;		/* optional, non-compensatory 'x' top line     */
395   char *aseq;		/* aligned target sequence                     */
396   char *pcode;          /* aligned posteriors 'ones' place (9 in 93)   */
397   int  *scoord;		/* coords 1..L for aligned dsq chars           */
398   int  *ccoord;	        /* coords 1..clen for aligned consensus chars  */
399   int   len;		/* len of the strings above                    */
400   int   cfrom, cto;	/* max bounds in ccoord                        */
401   int   sqfrom, sqto;	/* max bounds in scoord                        */
402 
403   char *hmmname;		/* name of HMM                          */
404   char *hmmacc;			/* accession of HMM; or [0]='\0'        */
405   char *hmmdesc;		/* description of HMM; or [0]='\0'      */
406 
407 } Fancyali_t;
408 
409 
410 /*************************************************************************************
411  *  5. CMEmitMap_t: map of model nodes to consensus positions.
412  *************************************************************************************/
413 
414 /* Structure: CMEmitMap_t
415  * Incept:    SRE, Thu Aug  8 12:47:49 2002 [St. Louis]
416  *
417  * Maps model nodes to consensus positions.
418  *    Consensus positions are indexed 1..clen.
419  *    Each array (lpos, rpos, epos) is 0..nodes-1.
420  *    Residues from an MP go into lpos and rpos in the consensus.
421  *    Residues from an IL follow lpos.
422  *    Residues from an IR precede rpos.
423  *    Residues from an EL follow epos[nd] for the nd that went to EL.
424  *    For nonemitters, rpos and lpos are a non-inclusive bound: for
425  *      example, rpos[0], lpos[0] are 0,clen+1.
426  *    There are no dummy values; all rpos, lpos, epos are valid coords
427  *      0..clen+1 in the consensus.
428  *
429  * See emitmap.c for implementation and more documentation.
430  */
431 typedef struct emitmap_s {
432   int *lpos;           /* left bound of consensus for subtree under nd   */
433   int *rpos;           /* right bound of consensus for subtree under nd  */
434   int *epos;           /* EL inserts come after this consensus pos */
435   int  clen;           /* consensus length */
436 } CMEmitMap_t;
437 
438 /*************************************************************************************
439  * 6. deckpool_s:  divide and conquer DP matrix state deck.
440  *************************************************************************************/
441 
442 struct deckpool_s {
443   float ***pool;
444   int      n;
445   int      nalloc;
446   int      block;
447 };
448 
449 /*************************************************************************************
450  * 7. GammaHitMx_t: semi-HMM used for optimal hit resolution of a CM scan.
451  *************************************************************************************/
452 
453 typedef struct gammahitmx_s {
454   int       L;                  /* length of sequence, arrays are size L+1 */
455   float    *mx;                 /* [0..L] SHMM DP matrix for optimum nonoverlap resolution */
456   int      *gback;              /* [0..L] traceback pointers for SHMM */
457   float    *savesc;             /* [0..L] saves score of hit added to best parse at j */
458   int      *saver;		/* [0..L] saves initial non-ROOT state of best parse ended at j */
459   int      *savemode;		/* [0..L] saves mode best parse ended at j (TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T) */
460   float     cutoff;             /* minimum score to report */
461   int       i0;                 /* position of first residue in sequence, in actual sequence
462 				 * coords (gamma->mx[0] corresponds to this residue)
463 				 */
464 } GammaHitMx_t;
465 
466 
467 /*************************************************************************************
468  * 8. Prior_t:  Dirichlet priors on all CM parameters.
469  *************************************************************************************/
470 
471 typedef struct {
472   /* transition priors */
473   int    tsetnum;                           /* number of transition sets to read in */
474   int    tsetmap[UNIQUESTATES][NODETYPES];  /* tsetmap[a][b] is for transition set from ustate a to node b */
475   ESL_MIXDCHLET **t;	                    /* array of transition priors, 0..tsetnum-1 */
476 
477   /* emission priors */
478   ESL_MIXDCHLET *mbp;		/* consensus base pair emission prior */
479   ESL_MIXDCHLET *mnt;		/* consensus singlet emission prior */
480   ESL_MIXDCHLET *i;		/* nonconsensus singlet emission prior */
481 
482   /* bookkeeping */
483   int  maxnq;			/* maximum # of components in any prior */
484   int  maxnalpha;		/* maximum # of parameters in any prior */
485 } Prior_t;
486 
487 /*************************************************************************************
488  * 10. ExpInfo_t: exponential tail information for E-values
489  *************************************************************************************/
490 
491 /* Info on an exponential tail that describes score distribution in
492  * random sequence of a given algorithm, model configuration (can be 1
493  * of 4 modes, local/glocal of cyk or inside). Fit in cmcalibrate and
494  * stored in the cm file. All values with the sole exception of
495  * <cur_eff_dbsize> are never changed once read, and some of the
496  * params are actually unnecessary downstream of cmcalibrate, but are
497  * potentially informative to the user.
498  */
499 typedef struct expinfo_s {
500   double cur_eff_dbsize;/* the total number of possible hits we expect for current database search,
501 			 * cur_eff_dbsize = (current dbsize) / <dbsize> * <nrandhits>*/
502   double lambda;	/* scale param exponential tail */
503   double mu_extrap;	/* offset/location param for exponential tail extrapolated to include all <nrandhits> from cmcalibrate,
504 			 * mu_corrected = mu_orig - log(1./tailp) / lambda */
505   double mu_orig;	/* offset/location param for exponential tail's original fit to tailp of rand seq score histogram in cmcalibrate */
506   double dbsize;        /* db size in residues that was used in cmcalibrate */
507   int    nrandhits;     /* total number of hits in random sequence database in cmcalibrate */
508   double tailp;         /* fractional tail mass threshold for hit histogram in random sequences in cmcalibrate */
509   int    is_valid;      /* TRUE if this expinfo_s object is valid (it's parameters have been set), FALSE if not */
510 } ExpInfo_t;
511 
512 /* Exponential tail statistics modes, a different exp tail fit exists for each mode
513  * 0..EXP_NMODES-1 are first dimension of cm->expA
514  * order is important, to cmcalibrate at least
515  */
516 #define EXP_CM_GC  0
517 #define EXP_CM_GI  1
518 #define EXP_CM_LC  2
519 #define EXP_CM_LI  3
520 #define EXP_NMODES 4
521 
522 /***********************************************************************************
523  * 11. CP9_t: a CM Plan 9 HMM
524  ***********************************************************************************/
525 
526 /* Declaration of a CM Plan 9 profile-HMM structure.  Modified from a
527  * HMMER2 plan 7 (with (hopefully) minimal change) to mirror a CM as
528  * closely as possible.
529  *
530  * The model has two forms:
531  * 1. The "core" model has 0..M nodes, node 0 is special, its "match" state
532  *    is really state B (which is forced silent by having hmm->mat[0] = NULL and
533  *    hmm->msc[0] = NULL), its "insert" state is really state N (with emission
534  *    probs hmm->ins[0]), and it has NO DELETE STATE.
535  *
536  *    hmm->t[0][CTMM]: 0. (B->M_1 transition is hmm->begin[1])
537  *    hmm->t[0][CTMI]: transition from B to N (I_0);
538  *    hmm->t[0][CTMD]: transition from B to D_1;
539  *    hmm->t[0][CTME]: null (transition from B to an EL state is impossible)
540  *    hmm->t[0][CTIM]: transition from N to M_1;
541  *    hmm->t[0][CTII]: N self transition;
542  *    hmm->t[0][CTID]: N -> D_1
543  *    hmm->t[0][CTDM]: null
544  *    hmm->t[0][CTDI]: null
545  *    hmm->t[0][CTDD]: null
546  *
547  *    t[0..M] are the state transition probs. t[k][CTME] is an
548  *    end-local probability, the EL states can only be reached by a
549  *    subset of match states, this probability is -INFTY for states
550  *    that can't reach the EL.
551  *
552  *    t[M] are special, because this node transits to the end (E
553  *    state). The E state is (sort-of) treated as match state M+1, as
554  *    t[M][CTIM] is the transition from I_M to E, t[M][CTDM] is the
555  *    transition from D_M to E. However, t[M][CTMM] is always 0.0,
556  *    the transition from M_M to E is end[hmm->M]; t[M][CTMD],
557  *    t[M][CTDD], t[M][CTDI] are set as 0.0.
558  *
559  *    mat[1..M] are match emission probs.
560  *    ins[0..M] are insert emission probs.  (ins[0] is state N emission probs)
561  *
562  *    The CM_PLAN9_HASPROB flag is up when these all correspond to a fully normalized
563  *    profile HMM.
564  *
565  * 2. The "logoddsified" model is the configured model, converted to
566  *    integer score form and ready for alignment algorithms.
567  *    bsc, esc scores correspond to begin, and end probabilities.
568  *
569  *    The CPLAN9_HASBITS flag is up when both of these are ready for
570  *    alignment.
571  *
572  */
573 typedef struct cplan9_s {
574   /* The main model in probability form: data-dependent probabilities.
575    * Transition probabilities are usually accessed as a
576    *   two-D array: hmm->t[k][CTMM], for instance. They are allocated
577    *   such that they can also be stepped through in 1D by pointer
578    *   manipulations, for efficiency in DP algorithms.
579    * CPLAN9_HASPROB flag is raised when these probs are all valid.
580    */
581   const ESL_ALPHABET *abc;      /* pointer to the alphabet, usually points to cm->abc */
582   int     M;                    /* length of the model (# nodes)        +*/
583   float **t;                    /* transition prob's. t[0..M][0..9]   +*/
584   float **mat;                  /* match emissions.  mat[1..M][0..3]   +*/
585   float **ins;                  /* insert emissions. ins[0..M][0..3] +*/
586 
587   /* The unique states of CM Plan 9 in probability form.
588    * These are the algorithm-dependent, data-independent probabilities.
589    * Some parts of the code may briefly use a trick of copying tbd1
590    *   into begin[0]; this makes it easy to call FChoose() or FNorm()
591    *   on the resulting vector. However, in general begin[0] is not
592    *   a valid number.
593    */
594   float  *begin;                /* 1..M B->M state transitions                +*/
595   float  *end;                  /* 1..M M->E state transitions (!= a dist!)   +*/
596 
597   /* The model's log-odds score form.
598    * These are created from the probabilities by CP9Logoddsify().
599    * By definition, null[] emission scores are all zero.
600    * Note that emission distributions are over possible alphabet symbols,
601    * not just the unambiguous DNA alphabet: we
602    * precalculate the scores for all IUPAC degenerate symbols we
603    * may see.
604    *
605    * Note the reversed indexing on msc, isc, tsc -- for efficiency reasons.
606    * They're not probability vectors any more so we can reorder them
607    * without wildly complicating our life.
608    *
609    * The _mem ptrs are where the real memory is alloc'ed and free'd,
610    * as opposed to where it is accessed.
611    * This came in with Erik Lindahl's altivec port; it allows alignment on
612    * 16-byte boundaries. In the non-altivec code, this is just a little
613    * redundancy; tsc and tsc_mem point to the same thing, for example.
614    *
615    * The otsc are reordered transition scores [0..k..M][0..cp9O_NTRANS-1],
616    * for efficiency in DP calculations. This reordering is based on HMMER3.
617    *
618    * CPLAN9_HASBITS flag is up when these scores are valid.
619    */
620   int  **tsc;                   /* transition scores     [0.9][0.M]       +*/
621   int  **msc;                   /* match emission scores [0.Kp-1][1.M] +*/
622   int  **isc;                   /* ins emission scores   [0.Kp-1][0.M] +*/
623   int   *bsc;                   /* begin transitions     [1.M]              +*/
624   int   *esc;			/* end transitions       [1.M]              +*/
625   int   *tsc_mem, *msc_mem, *isc_mem, *bsc_mem, *esc_mem;
626   int   *otsc;                  /* transition scores [0..M][0..9], special ordering */
627 
628   /* The null model probabilities.
629    */
630   float  *null;                    /* "random sequence" emission prob's     +*/
631   float  null2_omega;              /* prior probability of null2 model, copied from CM */
632   float  null3_omega;              /* prior probability of null3 model, copied from CM */
633   float  p1;                       /* null model loop probability           +*/
634   /* local end, EL state parameters */
635   float  el_self;                  /* EL transition self loop probability    */
636   int    el_selfsc;                /* EL transition self loop score          */
637   int   *has_el;                   /* has_el[k] is TRUE if node k has an EL state */
638   int   *el_from_ct;               /* [0..M+1] el_from_ct[k] is the number of HMM nodes kp
639 				    * where a transition from kp's EL state to k's
640 				    * match state is valid. */
641   int  **el_from_idx;              /* [0..M+1][] el_from_idx[k] is an array of
642 				    * size el_from_ct[k] each element is a node
643 				    * kp where a transition from kp's EL state
644 				    * to k's match state is allowed */
645   int  **el_from_cmnd;             /* [0..M+1][] el_from_cmnd[k] is an array of
646 				    * size el_from_ct[k] element i is the CM
647 				    * node that the EL transition to k to
648 				    * el_from_idx[k][i] corresponds with, used
649 				    * only for building alignments from traces. */
650   int flags;                       /* bit flags indicating state of HMM, valid data +*/
651 } CP9_t;
652 
653 /* Flag codes for cplan9->flags.
654  */
655 #define CPLAN9_HASBITS     (1<<0)    /* raised if model has log-odds scores      */
656 #define CPLAN9_HASPROB     (1<<1)    /* raised if model has probabilities        */
657 #define CPLAN9_LOCAL_BEGIN (1<<2)    /* raised if model has local begins turned on */
658 #define CPLAN9_LOCAL_END   (1<<3)    /* raised if model has S/W local ends turned on */
659 #define CPLAN9_EL          (1<<4)    /* raised if model has EL local ends turned on */
660 
661 /* Indices for CM Plan9 main model state transitions.
662  * Used for indexing hmm->t[k][]
663  * mnemonic: Cm plan 9 Transition from Match to Match = CTMM
664  */
665 #define CTMM  0
666 #define CTMI  1
667 #define CTMD  2
668 #define CTMEL 3
669 #define CTIM  4
670 #define CTII  5
671 #define CTID  6
672 #define CTDM  7
673 #define CTDI  8
674 #define CTDD  9
675 #define cp9_NTRANS 10
676 
677 #define cp9_TRANS_MATCH_OFFSET  0 /* hmm->t[k][0] is first transition out of match */
678 #define cp9_TRANS_INSERT_OFFSET 4 /* hmm->t[k][4] is first transition out of insert */
679 #define cp9_TRANS_DELETE_OFFSET 7 /* hmm->t[k][7] is first transition out of delete */
680 #define cp9_TRANS_NMATCH        4 /* there are 4 transitions out of match */
681 #define cp9_TRANS_NINSERT       3 /* there are 3 transitions out of insert */
682 #define cp9_TRANS_NDELETE       3 /* there are 3 transitions out of delete */
683 
684 /* Indices for transition scores tsc[k][] */
685 /* order is optimized for dynamic programming */
686 enum cp9o_tsc_e {
687   cp9O_MM = 0,
688   cp9O_IM = 1,
689   cp9O_DM = 2,
690   cp9O_BM = 3,
691   cp9O_MI = 4,
692   cp9O_II = 5,
693   cp9O_DI = 6,
694   cp9O_MD = 7,
695   cp9O_ID = 8,
696   cp9O_DD = 9,
697   cp9O_ME =10,
698   cp9O_MEL=11
699 };
700 #define cp9O_NTRANS 12
701 
702 /* used by CM Plan 9 HMM structures */
703 #define HMMMATCH  0
704 #define HMMINSERT 1
705 #define HMMDELETE 2
706 #define NHMMSTATETYPES 3
707 
708 /***********************************************************************************
709  * 12. CP9_MX: a dynamic programming matrix for a CM Plan 9 HMM
710  ***********************************************************************************/
711 
712 /* Declaration of CM Plan9 dynamic programming matrix structure.
713  */
714 typedef struct cp9_mx_s {
715   int **mmx;			/* match scores  [0.1..N][0..M] */
716   int **imx;			/* insert scores [0.1..N][0..M] */
717   int **dmx;			/* delete scores [0.1..N][0..M] */
718   int **elmx;			/* end local scores [0.1..N][0..M] */
719   int  *erow;                   /* score for E state [0.1..N] */
720   /* Hidden ptrs where the real memory is kept */
721   int *mmx_mem, *imx_mem, *dmx_mem, *elmx_mem;
722 
723   int    M;             /* number of nodes in HMM this mx corresponds to, never changes */
724   int    rows;          /* generally L or 2, # of DP rows in seq dimension, where L is length of seq,
725 			 * == 2 if we're scanning in mem efficient mode,
726 			 * never shrinks, but can increase to 'grow' the matrix
727 			 */
728   float  size_Mb;       /* current size of matrix in Megabytes */
729 
730   /* variables added for HMMER3 p7 HMM banding of CP9 HMM dp algorithms */
731   int *kmin;            /* OPTIONAL (can be null) [0.1..i..rows] = k, minimum node for residue i is k */
732   int *kmax;            /* OPTIONAL (can be null) [0.1..i..rows] = k, maximum node for residue i is k */
733   int  ncells_allocated; /* number of cells allocated in matrix */
734   int  ncells_valid;     /* number of cells currently valid in the matrix */
735 
736 } CP9_MX;
737 
738 
739 /*************************************************************************************
740  * 13. CP9Bands_t:  sequence and CM specific HMM bands.
741  *************************************************************************************/
742 
743 /* Structure: CP9Bands_t
744  * Incept:    EPN, 10.27.06
745  *
746  * CP9 HMM bands and the resulting CM bands for HMM banded
747  * CYK (or Inside or Outside) algorithms.
748  *
749  */
750 typedef struct cp9bands_s {
751   int      hmm_M;             /* Number of nodes in CP9 HMM */
752   int      cm_M;              /* Number of nodes in CM, the CP9 HMM was built from */
753 
754   /* data structures for hmm bands (bands on the hmm states) */
755   int     *pn_min_m;          /* HMM band: minimum position node k match state will emit  */
756   int     *pn_max_m;          /* HMM band: maximum position node k match state will emit  */
757   int     *pn_min_i;          /* HMM band: minimum position node k insert state will emit */
758   int     *pn_max_i;          /* HMM band: maximum position node k insert state will emit */
759   int     *pn_min_d;          /* HMM band: minimum position that was emitted prior to entering
760 			       * node k delete state */
761   int     *pn_max_d;          /* HMM band: maximum position that was emitted prior to entering
762 			       * node k delete state */
763   int     *isum_pn_m;         /* [0..k..hmm_M] sum over i of log post probs from post->mmx[i][k]*/
764   int     *isum_pn_i;         /* [0..k..hmm_M] sum over i of log post probs from post->imx[i][k]*/
765   int     *isum_pn_d;         /* [0..k..hmm_M] sum over i of log post probs from post->dmx[i][k]*/
766 
767   /* Predicted first and final consensus positions that might be
768    * (sp1/ep1) and are likely to be (sp2/ep2) involved in the parse of
769    * the sequence based on the HMM posterior probabilities. These are
770    * used to determine what types of marginal alignments should be
771    * allowed from each state (the {J,L,R,T}valid arrays)
772    */
773   int sp1;                    /* minimum cpos for which occupancy probability exceeds thresh1, first 'maybe used'  */
774   int ep1;                    /* maximum cpos for which occupancy probability exceeds thresh1, final 'maybe used'  */
775   int sp2;                    /* minimum cpos for which occupancy probability exceeds thresh2, first 'likely used' */
776   int ep2;                    /* maximum cpos for which occupancy probability exceeds thresh2, final 'likely used' */
777 
778   float thresh1;              /* probability threshold for sp1, ep1 (typically 0.01), 'maybe used' */
779   float thresh2;              /* probability threshold for sp2, ep2 (typically 0.98), 'likely used' */
780 
781   int Rmarg_imin;             /* for Right marginal alignments, minimum target sequence position that can align to CM as i */
782   int Rmarg_imax;             /* for Right marginal alignments, maximum target sequence position that can align to CM as i */
783   int Lmarg_jmin;             /* for Left  marginal alignments, minimum target sequence position that can align to CM as j */
784   int Lmarg_jmax;             /* for Left  marginal alignments, maximum target sequence position that can align to CM as j */
785 
786   /* {J,L,R,T}valid [0..cm_M] for trCYK/trInside/trOutside: are {J,L,R,T} do DP matrix cells exist for state v? */
787   int *Jvalid;                /* [0..v..cm_M] TRUE to calculate J DP matrix cells for state v, FALSE not to */
788   int *Lvalid;                /* [0..v..cm_M] TRUE to calculate L DP matrix cells for state v, FALSE not to */
789   int *Rvalid;                /* [0..v..cm_M] TRUE to calculate R DP matrix cells for state v, FALSE not to */
790   int *Tvalid;                /* [0..v..cm_M] TRUE to calculate T DP matrix cells for state v, FALSE not to */
791 
792   /* arrays for CM state bands, derived from HMM bands */
793   int *imin;                  /* [0..cm_M-1] imin[v] = first position in band on i for state v*/
794   int *imax;                  /* [0..cm_M-1] imax[v] = last position in band on i for state v*/
795   int *jmin;                  /* [0..cm_M-1] jmin[v] = first position in band on j for state v*/
796   int *jmax;                  /* [0..cm_M-1] jmax[v] = last position in band on j for state v*/
797   int **hdmin;                /* [0..cm_M-1][0..(jmax[v]-jmin[v])]
798 			       * hdmin[v][j0] = first position in band on d for state v, and position
799 			       * j = jmin[v] + j0. */
800   int **hdmax;                /* [0..cm_M-1][0..(jmax[v]-jmin[v])]
801 			       * hdmin[v][j0] = last position in band on d for state v, and position
802 			       * j = jmin[v] + j0.*/
803   int *hdmin_mem;             /* actual memory for hdmin */
804   int *hdmax_mem;             /* actual memory for hdmax */
805   int *safe_hdmin;            /* [0..cm_M-1] safe_hdmin[v] = min_d (hdmin[v][j0]) (over all valid j0) */
806   int *safe_hdmax;            /* [0..cm_M-1] safe_hdmax[v] = max_d (hdmax[v][j0]) (over all valid j0) */
807 
808   /* info on size of bands */
809   int hd_needed;              /* Sum_v cp9b->jmax[v] - cp9b->jmin[v] + 1, number of hd arrays needed */
810   int hd_alloced;             /* number of hd arrays currently alloc'ed */
811 
812   double   tau;               /* tau used to calculate current bands */
813 
814 } CP9Bands_t;
815 
816 /*************************************************************************************
817  * 14. CP9trace_t: traceback structure for CP9 HMMs.
818  *************************************************************************************/
819 
820 /* CM Plan 9 model state types
821  * used in traceback structure.
822  */
823 #define CSTBOGUS 0
824 #define CSTM     1
825 #define CSTD     2
826 #define CSTI     3
827 #define CSTB     4  /* M_0 the B state */
828 #define CSTE     5  /* the end state, M_(k+1) */
829 #define CSTEL    6  /* an EL (end local) state */
830 /* Structure: cp9trace_s
831  *
832  * Traceback structure for alignments of model to sequence.
833  * Each array in a trace_s is 0..tlen-1.
834  * Element 0 is always to M_0 (match state of node 0)
835  * Element tlen-1 is always to the E_st
836  */
837 typedef struct cp9trace_s {
838   int   tlen;                   /* length of traceback                           */
839   char *statetype;              /* state type used for alignment                 */
840   int  *nodeidx;                /* idx of aligned node, 0..M if M or I 1..M if D */
841   int  *pos;                    /* position in dsq, 1..L, or 0 if none           */
842 } CP9trace_t;
843 
844 /*************************************************************************************
845  * 15. CP9Map_t: map from a CM to a CP9 HMM and vice versa.
846  *************************************************************************************/
847 
848 /* Structure: CP9Map_t
849  * Incept:    EPN, 10.23.06
850  *
851  * Maps a CM to a CM plan 9 HMM and vice versa.
852  *    Consensus positions are indexed 1..hmm_M.
853  *
854  * The lpos and rpos arrays are somewhat redundant with
855  * CMEmitMap_t, but they're not identical. I didn't realize emitmap existed
856  * prior to implementing CP9 HMMs - if I had I would have used emitmaps, but
857  * it's difficult to go back and use emitmaps now.
858  *
859  * See emitmap.c for implementation and more documentation.
860  */
861 typedef struct cp9map_s {
862   int   *nd2lpos;   /* [0..cm_nodes] left bound of consensus for subtree under this nd,
863 		     *               -1 for non-{MATL|MATR|MATP} nodes */
864   int   *nd2rpos;   /* [0..cm_nodes] right bound of consensus for subtree under this nd
865 		     *               -1 for non-{MATL|MATR|MATP} nodes */
866   int   *pos2nd;    /* [1..clen], the MATL, MATR or MATP CM node that maps to this
867 		     *            consensus column */
868   int  **cs2hn;     /* [0..cm_M][0..1], 1 or 2 HMM nodes that maps to this CM state
869 		     *                  [x][1] is -1 if state x maps to only 1 HMM node */
870   int  **cs2hs;     /* [0..cm_M][0..1], 1 or 2 HMM states (0=MATCH, 1=INSERT, 2=DELETE)
871 		     *            that maps to this CM state
872 		     *            [x][0] corresponds to the HMM node in cs2hn[x][0],
873 		     *            [x][1] corresponds to the HMM node in cs2hn[x][1] (or -1) */
874   int ***hns2cs;    /* [0..clen][0..2][0..1]
875 		     * hns2cs[x][y][0] 1st CM state that maps to HMM node x state type y (0,1,2)
876 		     * hns2cs[x][y][1] 2nd CM state that maps to HMM node x state type y (0,1,2)
877 		     *                 -1 if only 1 CM state maps to HMM node x state y */
878   int    hmm_M;     /* consensus length, the number of HMM nodes */
879   int    cm_M;      /* number of states in the CM this HMM maps to */
880   int    cm_nodes;  /* number of nodes in the CM this HMM maps to */
881 } CP9Map_t;
882 
883 
884 /*************************************************************************************
885  * 16. CMSubMap_t: map of a template CM to a sub CM and vice versa.
886  *************************************************************************************/
887 
888 /* Structure: CMSubMap_t
889  * Incept:    EPN, 10.23.06
890  *
891  * Maps a template CM to a sub CM and vice versa.
892  *    Consensus positions are indexed 1..clen.
893  *
894  * The *node_cc_left and *node_cc_right arrays are redundant with CMEmitMap_t.
895  * See emitmap.c for implementation and more documentation.
896  */
897 typedef struct submap_s {
898   int spos;            /* first consensus column this sub_cm models */
899   int epos;            /* final consensus column this sub_cm models */
900   int sstruct;         /* first consensus column this sub_cm models structure of */
901   int estruct;         /* final consensus column this sub_cm models structure of */
902 
903   int **s2o_smap;      /* v = [0..sub_M-1] [0..1], orig_cm state(s) that maps to v */
904   int **o2s_smap;      /* v = [0..orig_M-1][0..1], sub_cm  state(s) that maps to v */
905   int  *s2o_id;        /* v = [0..sub_M-1] TRUE if sub_cm state v maps identically *
906                         * to a orig_cm state (this will be s2o_smap[v][0])         */
907 
908   int  sub_clen;       /* consensus length orig_cm */
909   int  orig_clen;      /* consensus length sub_cm  */
910 
911   int sub_M;           /* number of states in the sub CM */
912   int orig_M;          /* number of states in the original CM */
913 } CMSubMap_t;
914 
915 
916 /*************************************************************************************
917  * 17. CMSubInfo_t: sub CM information, used for validating the sub CM construction procedure.
918  *************************************************************************************/
919 
920 /* Structure: CMSubInfo_t
921  * Incept:    EPN, 10.23.06
922  *
923  * Information on a sub CM, used for checking the sub CM
924  * construction procedure works.
925  *    Consensus positions are indexed 1..clen.
926  *
927  */
928 typedef struct subinfo_s {
929   int  *imp_cc;         /* [0..(epos-spos+1)] ret_imp_cc[k] != 0 if it is
930 			* impossible for CP9 node (consensus column) k to be
931 			* calculated in the sub_cm to have distros to match the
932 			* corresponding CP9 node in the original CM - due to
933 			* topological differences in the architecture of the
934 			* sub_cm and orig_cm.
935 			*/
936   int  *apredict_ct;   /* For an analytical test, the number of times we
937 			* predict we'll fail the test for an HMM node for each
938 			* of 6 cases - 6 different reasons we predict we'll fail.
939 			*/
940   int  *spredict_ct;   /* For as sampling test, the number of times we
941 			* predict we'll fail the test for an HMM node for each
942 			* of 6 cases - 6 different reasons we predict we'll fail.
943 			*/
944   int  *awrong_ct;     /* Subset of cases in apredict_ct that were incorrectly
945 			* predicted. */
946   int  *swrong_ct;     /* Subset of cases in spredict_ct that were incorrectly
947 			* predicted. */
948 } CMSubInfo_t;
949 
950 
951 /*************************************************************************************
952  * 18. RSEARCH constants.
953  *************************************************************************************/
954 
955 /* RSEARCH defaults defined here */
956 #define DEFAULT_RMATRIX "RIBOSUM85-60"
957 #define DEFAULT_RALPHA 10.
958 #define DEFAULT_RBETA 5.
959 #define DEFAULT_RALPHAP 0.
960 #define DEFAULT_RBETAP 15.
961 /* the RSEARCH default is below, it was changed b/c
962  * with no local begin penalty, a glocal hit is ALWAYS
963  * going to be decomposed into it's subtrees.
964  *#define DEFAULT_RBEGINSC 0.  */
965 #define DEFAULT_RBEGINSC -0.01
966 #define DEFAULT_RENDSC -15.
967 
968 /* The six classes of states in RSEARCH */
969 #define M_cl 0
970 #define IL_cl 1
971 #define DL_cl 2
972 #define IR_cl 3
973 #define DR_cl 4
974 #define DB_cl 5
975 
976 /* coordinate -- macro that checks if it's reverse complement and if so
977    returns coordinate in original strand
978    a = true if revcomp, false if not
979    b = the position in current seq
980    c = length of the seq
981 */
982 #define COORDINATE(a,b,c) ( a ? -1*b+c+1 : b)
983 #define RNAPAIR_ALPHABET "AAAACCCCGGGGUUUU"
984 #define RNAPAIR_ALPHABET2 "ACGUACGUACGUACGU"
985 /* Returns true if pos. C of seq B of msa A is a gap */
986 #define is_rna_gap(A, B, C) (esl_abc_CIsGap(A->abc, A->aseq[B][C]))
987 /* Returns true if position C of digitized sequence B of msa A is a canonical */
988 #define is_defined_rna_nucleotide(A, B, C) (esl_abc_CIsCanonical(A->abc, A->aseq[B][C]))
989 #define unpairedmat_size (matrix_index(3,3) + 1)
990 #define pairedmat_size (matrix_index (15,15) + 1)
991 /* Maps to index of matrix, using binary representation of
992  * nucleotides (unsorted).
993  * See lab book 7, p. 3-4 for details of mapping function (RJK) */
994 #define matrix_index(X,Y) ((X>Y) ? X*(X+1)/2+Y: Y*(Y+1)/2+X)
995 /* Matrix type
996  * Contains array in one dimension (to be indexed later), matrix size,
997  * H, and E.
998  */
999 typedef struct _matrix_t {
1000   double *matrix;
1001   int edge_size;         /* Size of one edge, e.g. 4 for 4x4 matrix */
1002   int full_size;         /* Num of elements, e.g. 10 for 4x4 matirx */
1003   double H;
1004   double E;
1005 } matrix_t;
1006 
1007 /* Full matrix definition, includes the g background freq vector (g added by EPN). */
1008 typedef struct _fullmat_t {
1009   const ESL_ALPHABET *abc;/* alphabet, we enforce it's eslRNA */
1010   matrix_t *unpaired;
1011   matrix_t *paired;
1012   char     *name;
1013   float    *g;           /* EPN: the background distro, g vector in RSEARCH paper
1014 			  * this now appears in the RIBOSUM matrix files */
1015   int       scores_flag; /* TRUE if matrix values are log odds scores, FALSE if
1016 			  * they're target probs, or unfilled */
1017   int       probs_flag;  /* TRUE if matrix values are target probs, FALSE if
1018 			  * they're log odds scores, or unfilled */
1019 } fullmat_t;
1020 
1021 /***********************************************************************************
1022  * 19. CM_MX: CM dynamic programming matrix; non-banded, non-truncated.
1023  ***********************************************************************************/
1024 
1025 /* Declaration of CM dynamic programming matrices for alignment.
1026  * There are eight matrices here, four DP matrices for DP calculations
1027  * and four shadow matrices for alignment tracebacks. There is one DP
1028  * matrix and one shadow matrix for each combination of standard vs
1029  * truncated and non-banded vs HMM-banded alignment.  All matrices are
1030  * setup in 'vjd' (state idx, aln posn, subseq len) coordinates.  HMM
1031  * banded variants of all matrices are banded in j and d dimensions
1032  * with only cells within bands allocated. HMM banded DP variants are
1033  * also used for DP search functions.
1034  *
1035  * CM_MX:                non-banded standard  CYK/Inside/Outside DP matrix
1036  * CM_TR_MX:             non-banded truncated CYK/Inside/Outside DP matrix
1037  * CM_HB_MX:             HMM banded standard  CYK/Inside/Outside DP matrix
1038  * CM_TR_HB_MX:          HMM banded truncated CYK/Inside/Outside DP matrix
1039  *
1040  * CM_SHADOW_MX:         non-banded standard  CYK/optacc shadow matrix
1041  * CM_SHADOW_TR_MX:      non-banded truncated CYK/optacc shadow matrix
1042  * CM_SHADOW_HB_MX:      HMM banded standard  CYK/optacc shadow matrix
1043  * CM_SHADOW_TR_HB_MX:   HMM banded truncated CYK/optacc shadow matrix
1044  */
1045 typedef struct cm_mx_s {
1046   int  M;		/* number of states (1st dim ptrs) in current mx */
1047   int  L;               /* length of sequence the matrix currently corresponds to */
1048 
1049   int64_t    ncells_alloc;  /* current cell allocation limit for dp */
1050   int64_t    ncells_valid;  /* current number of valid cells for dp */
1051   float      size_Mb;        /* current size of matrix in Megabytes */
1052 
1053   float  ***dp;          /* matrix, [0..v..M][0..j..L][0..j] */
1054   float    *dp_mem;      /* the actual mem, points to dp[0][0][0] */
1055 } CM_MX;
1056 
1057 /***********************************************************************************
1058  * 20. CM_TR_MX: CM dynamic programming matrix; non-banded, truncated.
1059  ***********************************************************************************/
1060 
1061 typedef struct cm_tr_mx_s {
1062   int  M;		/* number of states (1st dim ptrs) in current mx */
1063   int  B;               /* number of B states in current mx */
1064   int  L;               /* length of sequence the matrix currently corresponds to */
1065 
1066   int64_t    Jncells_alloc;  /* current cell allocation limit for Jdp */
1067   int64_t    Jncells_valid;  /* current number of valid cells for Jdp */
1068   int64_t    Lncells_alloc;  /* current cell allocation limit for Ldp */
1069   int64_t    Lncells_valid;  /* current number of valid cells for Ldp */
1070   int64_t    Rncells_alloc;  /* current cell allocation limit for Rdp, same as for Ldp */
1071   int64_t    Rncells_valid;  /* current number of valid cells for Rdp, same as for Ldp  */
1072   int64_t    Tncells_alloc;  /* current cell allocation limit for Tdp */
1073   int64_t    Tncells_valid;  /* current number of valid cells for Tdp */
1074   float      size_Mb;        /* current size of matrix in Megabytes */
1075 
1076   float  ***Jdp;          /* J matrix, [0..v..M][0..j..L][0..j] */
1077   float    *Jdp_mem;      /* the actual mem, points to Jdp[0][0][0] */
1078   float  ***Ldp;          /* L matrix, [0..v..M][0..j..L][0..j] */
1079   float    *Ldp_mem;      /* the actual mem, points to Ldp[0][0][0] */
1080   float  ***Rdp;          /* R matrix, [0..v..M][0..j..L][0..j] */
1081   float    *Rdp_mem;      /* the actual mem, points to Rdp[0][0][0] */
1082   float  ***Tdp;          /* T matrix, [0..v..M][0..j..L][0..j] */
1083   float    *Tdp_mem;      /* the actual mem, points to Tdp[0][0][0] */
1084 } CM_TR_MX;
1085 
1086 /***********************************************************************************
1087  * 21. CM_HB_MX: CM dynamic programming matrix; HMM banded, non-truncated.
1088  ***********************************************************************************/
1089 
1090 typedef struct cm_hb_mx_s {
1091   int  M;		/* number of states (1st dim ptrs) in current mx */
1092   int  L;               /* length of sequence the matrix currently corresponds to */
1093 
1094   int64_t    ncells_alloc;  /* current cell allocation limit */
1095   int64_t    ncells_valid;  /* current number of valid cells */
1096   float      size_Mb;       /* current size of matrix in Megabytes */
1097 
1098   int   *nrowsA;        /* [0..v..M] current number allocated rows for deck v */
1099 
1100   float ***dp;          /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1101   float   *dp_mem;      /* the actual mem, points to dp[0][0][0] */
1102 
1103   CP9Bands_t *cp9b;     /* the CP9Bands_t object associated with this
1104 			 * matrix, which defines j, d, bands for each
1105 			 * state, only a reference, so don't free
1106 			 * it when mx is freed. */
1107 } CM_HB_MX;
1108 
1109 /***********************************************************************************
1110  * 22. CM_TR_HB_MX: CM dynamic programming matrix; HMM banded, truncated.
1111  ***********************************************************************************/
1112 
1113 typedef struct cm_tr_hb_mx_s {
1114   int  M;		/* number of states (1st dim ptrs) in current mx */
1115   int  B;		/* number of BIF_B states (1st dim ptrs) in current mx (valid, non-null Tdp states) */
1116   int  L;               /* length of sequence the matrix currently corresponds to */
1117 
1118   int64_t    Jncells_alloc;  /* current cell allocation limit for Jdp */
1119   int64_t    Lncells_alloc;  /* current cell allocation limit for Ldp */
1120   int64_t    Rncells_alloc;  /* current cell allocation limit for Rdp */
1121   int64_t    Tncells_alloc;  /* current cell allocation limit for Tdp */
1122   int64_t    Jncells_valid;  /* current number of valid cells in Jdp */
1123   int64_t    Lncells_valid;  /* current number of valid cells in Ldp */
1124   int64_t    Rncells_valid;  /* current number of valid cells in Rdp */
1125   int64_t    Tncells_valid;  /* current number of valid cells in Tdp */
1126   float      size_Mb;        /* current size of full matrix (J,L,R&T) in Megabytes */
1127 
1128   int   *JnrowsA;       /* [0..v..M] current number allocated rows for deck v in J matrix */
1129   int   *LnrowsA;       /* [0..v..M] current number allocated rows for deck v in L matrix */
1130   int   *RnrowsA;       /* [0..v..M] current number allocated rows for deck v in R matrix */
1131   int   *TnrowsA;       /* [0..v..M] current number allocated rows for deck v in T matrix */
1132 
1133   float ***Jdp;         /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1134   float   *Jdp_mem;     /* the actual mem, points to Jdp[0][0][0] */
1135   float ***Ldp;         /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1136   float   *Ldp_mem;     /* the actual mem, points to Ldp[0][0][0] */
1137   float ***Rdp;         /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1138   float   *Rdp_mem;     /* the actual mem, points to Rdp[0][0][0] */
1139   float ***Tdp;         /* B states only: [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1140   float   *Tdp_mem;     /* the actual mem, points to Tdp[0][0][0] */
1141 
1142   CP9Bands_t *cp9b;     /* the CP9Bands_t object associated with this
1143 			 * matrix, which defines j, d, bands for each
1144 			 * state, only a reference, so don't free
1145 			 * it when mx is freed. */
1146 } CM_TR_HB_MX;
1147 
1148 
1149 /***********************************************************************************
1150  * 23. CM_SHADOW_MX: CM shadow matrix, for DP tracebacks; non-banded, non-truncated.
1151  ***********************************************************************************/
1152 
1153 typedef struct cm_shadow_mx_s {
1154   int  M;		/* number of states (1st dim ptrs) in current mx */
1155   int  L;               /* length of sequence the matrix currently corresponds to */
1156   int  B;		/* number of BIF_B states (1st dim ptrs) in current mx */
1157 
1158   int64_t y_ncells_alloc;  /* current cell allocation limit in yshadow*/
1159   int64_t k_ncells_alloc;  /* current cell allocation limit in kshadow*/
1160   int64_t y_ncells_valid;  /* current number of valid cells in yshadow */
1161   int64_t k_ncells_valid;  /* current number of valid cells in kshadow */
1162 
1163   float  size_Mb;         /* current size of matrix in Megabytes */
1164 
1165   /* yshadow holds the shadow matrix for all non-BIF_B states, yshadow[v] == NULL if cm->sttype[v] == B_st */
1166   char ***yshadow;       /* [0..v..M][0..j..L][0..d..j] */
1167   char   *yshadow_mem;   /* the actual mem, points to yshadow[0][0][0] */
1168 
1169   /* kshadow holds the shadow matrix for all BIF_B states, kshadow[v] == NULL if cm->sttype[v] != B_st */
1170   int ***kshadow;       /* [0..v..M][0..j..L][0..d..j] */
1171   int   *kshadow_mem;   /* the actual mem, points to Jkshadow[0][0][0] */
1172 } CM_SHADOW_MX;
1173 
1174 
1175 /***********************************************************************************
1176  * 24. CM_TR_SHADOW_MX: CM shadow matrix, for DP tracebacks; non-banded, truncated.
1177  ***********************************************************************************/
1178 
1179 typedef struct cm_tr_shadow_mx_s {
1180   int  M;		/* number of states (1st dim ptrs) in current mx */
1181   int  L;               /* length of sequence the matrix currently corresponds to */
1182   int  B;		/* number of BIF_B states (1st dim ptrs) in current mx */
1183 
1184   int64_t Jy_ncells_alloc;  /* current cell allocation limit in Jyshadow*/
1185   int64_t Ly_ncells_alloc;  /* current cell allocation limit in Lyshadow*/
1186   int64_t Ry_ncells_alloc;  /* current cell allocation limit in Ryshadow*/
1187 
1188   int64_t Jk_ncells_alloc;  /* current cell allocation limit in Jkshadow*/
1189   int64_t Lk_ncells_alloc;  /* current cell allocation limit in Lkshadow/Lkmode*/
1190   int64_t Rk_ncells_alloc;  /* current cell allocation limit in Rkshadow/Rkmode*/
1191   int64_t Tk_ncells_alloc;  /* current cell allocation limit in Tkshadow*/
1192 
1193   int64_t Jy_ncells_valid;  /* current number of valid cells in Jyshadow */
1194   int64_t Ly_ncells_valid;  /* current number of valid cells in Lyshadow */
1195   int64_t Ry_ncells_valid;  /* current number of valid cells in Ryshadow */
1196 
1197   int64_t Jk_ncells_valid;  /* current number of valid cells in Jkshadow */
1198   int64_t Lk_ncells_valid;  /* current number of valid cells in Lkshadow/Lkmode */
1199   int64_t Rk_ncells_valid;  /* current number of valid cells in Rkshadow/Rkmode */
1200   int64_t Tk_ncells_valid;  /* current number of valid cells in Tkshadow */
1201 
1202   float  size_Mb;         /* current size of matrix in Megabytes */
1203 
1204   /* {J,L,R}yshadow holds the shadow matrix for all non-BIF_B states, {J,L,R}yshadow[v] == NULL if cm->sttype[v] == B_st */
1205   char ***Jyshadow;       /* [0..v..M][0..j..L][0..d..j] */
1206   char   *Jyshadow_mem;   /* the actual mem, points to Jyshadow[0][0][0] */
1207   char ***Lyshadow;       /* [0..v..M][0..j..L][0..d..j] */
1208   char   *Lyshadow_mem;   /* the actual mem, points to Lyshadow[0][0][0] */
1209   char ***Ryshadow;       /* [0..v..M][0..j..L][0..d..j] */
1210   char   *Ryshadow_mem;   /* the actual mem, points to Ryshadow[0][0][0] */
1211 
1212   /* {J,L,R,T}kshadow holds the shadow matrix for all BIF_B states, {J,L,R,T}kshadow[v] == NULL if cm->sttype[v] != B_st */
1213   int ***Jkshadow;       /* [0..v..M][0..j..L][0..d..j] */
1214   int   *Jkshadow_mem;   /* the actual mem, points to Jkshadow[0][0][0] */
1215   int ***Lkshadow;       /* [0..v..M][0..j..L][0..d..j] */
1216   int   *Lkshadow_mem;   /* the actual mem, points to Lkshadow[0][0][0] */
1217   int ***Rkshadow;       /* [0..v..M][0..j..L][0..d..j] */
1218   int   *Rkshadow_mem;   /* the actual mem, points to Rkshadow[0][0][0] */
1219   int ***Tkshadow;       /* [0..v..M][0..j..L][0..d..j] */
1220   int   *Tkshadow_mem;   /* the actual mem, points to Tkshadow[0][0][0] */
1221 
1222   /* {L,R}kmode holds the alignment mode for all BIF_B states {L,R}kmode == NULL for non-B states */
1223   char ***Lkmode;        /* [0..v..M][0..j..L][0..d..j] */
1224   char   *Lkmode_mem;    /* the actual mem, points to Lkmode[0][0][0] */
1225   char ***Rkmode;        /* [0..v..M][0..j..L][0..d..j] */
1226   char   *Rkmode_mem;    /* the actual mem, points to Rkmode[0][0][0] */
1227 } CM_TR_SHADOW_MX;
1228 
1229 /***********************************************************************************
1230  * 25. CM_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, non-truncated.
1231  ***********************************************************************************/
1232 
1233 typedef struct cm_hb_shadow_mx_s {
1234   int  M;		/* number of states (1st dim ptrs) in current mx */
1235   int  L;               /* length of sequence the matrix currently corresponds to */
1236   int  B;               /* number of B_st's in the cm this mx was created for */
1237 
1238   int64_t y_ncells_alloc;  /* current cell allocation limit in yshadow*/
1239   int64_t y_ncells_valid;  /* current number of valid cells in yshadow */
1240   int64_t k_ncells_alloc;  /* current cell allocation limit in kshadow*/
1241   int64_t k_ncells_valid;  /* current number of valid cells in kshadow */
1242   float  size_Mb;         /* current size of matrix in Megabytes */
1243 
1244   int   *nrowsA;          /* [0..v..M] current number allocated rows for deck v */
1245 
1246   /* yshadow holds the shadow matrix for all non-BIF_B states, yshadow[v] == NULL if cm->sttype[v] == B_st */
1247   char ***yshadow;       /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1248   char   *yshadow_mem;   /* the actual mem, points to yshadow[0][0][0] */
1249 
1250   /* kshadow holds the shadow matrix for all BIF_B states, kshadow[v] == NULL if cm->sttype[v] != B_st */
1251   int ***kshadow;       /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1252   int   *kshadow_mem;   /* the actual mem, points to kshadow[0][0][0] */
1253 
1254   CP9Bands_t *cp9b;     /* the CP9Bands_t object associated with this
1255 			 * matrix, which defines j, d, bands for each
1256 			 * state, only a reference, so don't free
1257 			 * it when mx is freed. */
1258 } CM_HB_SHADOW_MX;
1259 
1260 
1261 /***********************************************************************************
1262  * 26. CM_TR_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, truncated.
1263  ***********************************************************************************/
1264 
1265 typedef struct cm_tr_hb_shadow_mx_s {
1266   int  M;		/* number of states (1st dim ptrs) in current mx */
1267   int  L;               /* length of sequence the matrix currently corresponds to */
1268   int  B;		/* number of BIF_B states (1st dim ptrs) in current mx */
1269 
1270   int64_t Jy_ncells_alloc;  /* current cell allocation limit in Jyshadow*/
1271   int64_t Ly_ncells_alloc;  /* current cell allocation limit in Lyshadow*/
1272   int64_t Ry_ncells_alloc;  /* current cell allocation limit in Ryshadow*/
1273 
1274   int64_t Jk_ncells_alloc;  /* current cell allocation limit in Jkshadow*/
1275   int64_t Lk_ncells_alloc;  /* current cell allocation limit in Lkshadow/Lkmode*/
1276   int64_t Rk_ncells_alloc;  /* current cell allocation limit in Rkshadow/Rkmode*/
1277   int64_t Tk_ncells_alloc;  /* current cell allocation limit in Tkshadow*/
1278 
1279   int64_t Jy_ncells_valid;  /* current number of valid cells in Jyshadow */
1280   int64_t Ly_ncells_valid;  /* current number of valid cells in Lyshadow */
1281   int64_t Ry_ncells_valid;  /* current number of valid cells in Ryshadow */
1282 
1283   int64_t Jk_ncells_valid;  /* current number of valid cells in Jkshadow */
1284   int64_t Lk_ncells_valid;  /* current number of valid cells in Lkshadow/Lkmode */
1285   int64_t Rk_ncells_valid;  /* current number of valid cells in Rkshadow/Rkmode */
1286   int64_t Tk_ncells_valid;  /* current number of valid cells in Tkshadow */
1287 
1288   float  size_Mb;         /* current size of matrix in Megabytes */
1289 
1290   int   *JnrowsA;         /* [0..v..M] current number allocated rows for deck v */
1291   int   *LnrowsA;         /* [0..v..M] current number allocated rows for deck v */
1292   int   *RnrowsA;         /* [0..v..M] current number allocated rows for deck v */
1293   int   *TnrowsA;         /* [0..v..M] current number allocated rows for deck v */
1294 
1295   /* {J,L,R}yshadow holds the shadow matrix for all non-BIF_B states, {J,L,R}yshadow[v] == NULL if cm->sttype[v] == B_st */
1296   char ***Jyshadow;       /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1297   char   *Jyshadow_mem;   /* the actual mem, points to Jyshadow[0][0][0] */
1298   char ***Lyshadow;       /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1299   char   *Lyshadow_mem;   /* the actual mem, points to Lyshadow[0][0][0] */
1300   char ***Ryshadow;       /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1301   char   *Ryshadow_mem;   /* the actual mem, points to Ryshadow[0][0][0] */
1302 
1303   /* {J,L,R,T}kshadow holds the shadow matrix for all BIF_B states, {J,L,R,T}kshadow[v] == NULL if cm->sttype[v] != B_st */
1304   int ***Jkshadow;       /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1305   int   *Jkshadow_mem;   /* the actual mem, points to Jkshadow[0][0][0] */
1306   int ***Lkshadow;       /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1307   int   *Lkshadow_mem;   /* the actual mem, points to Lkshadow[0][0][0] */
1308   int ***Rkshadow;       /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1309   int   *Rkshadow_mem;   /* the actual mem, points to Rkshadow[0][0][0] */
1310   int ***Tkshadow;       /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1311   int   *Tkshadow_mem;   /* the actual mem, points to Tkshadow[0][0][0] */
1312 
1313   /* {L,R}kmode holds the alignment mode for all BIF_B states {L,R}kmode == NULL for non-B states */
1314   char ***Lkmode;        /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1315   char   *Lkmode_mem;    /* the actual mem, points to Lkmode[0][0][0] */
1316   char ***Rkmode;        /*  [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */
1317   char   *Rkmode_mem;    /* the actual mem, points to Rkmode[0][0][0] */
1318 
1319   CP9Bands_t *cp9b;     /* the CP9Bands_t object associated with this
1320 			 * matrix, which defines j, d, bands for each
1321 			 * state, only a reference, so don't free
1322 			 * it when mx is freed. */
1323 } CM_TR_HB_SHADOW_MX;
1324 
1325 /***********************************************************************************
1326  * 27. CM_EMIT_MX: CM emit matrix, info on PP of emitted residues; non-banded, non-truncated.
1327  ***********************************************************************************/
1328 
1329 /* CM_EMIT_MX: Two 2-dimensional matrices <l_pp> and <r_pp>
1330  * where:
1331  *
1332  * l_pp[v][i]: log of the posterior probability that state v emitted
1333  *             residue i leftwise either at (if a match state) or
1334  *             *after* (if an insert state) the left consensus
1335  *             position modeled by state v's node.
1336  *
1337  * r_pp[v][i]: log of the posterior probability that state v emitted
1338  *             residue i rightwise either at (if a match state) or
1339  *             *before* (if an insert state) the right consensus
1340  *             position modeled by state v's node.
1341  *
1342  * l_pp[v] is NULL for states that do not emit leftwise  (B,S,D,E,IR,MR)
1343  * r_pp[v] is NULL for states that do not emit rightwise (B,S,D,E,IL,ML)
1344  *
1345  * Importantly the definition above is not: "l_pp[v][i] is the posterior
1346  * probability that residue i was emitted from state v", although that
1347  * is true for MATL_ML and all IL states. It is not true however for
1348  * MATP_MP states and MATP_MP states because we want to combine the
1349  * posterior probability that either the MATP_MP or the MATP_ML states
1350  * from the same node emitted each residue i, it is the sum that is stored
1351  * in l_pp[v][i]. The same is true for the analogous case in r_pp with
1352  * MATP_MP and MATP_MR states.
1353  */
1354 typedef struct cm_emit_mx_s {
1355   int      M;		    /* number of states (1st dim ptrs) in current mx */
1356   int      L;               /* length of sequence the matrix currently corresponds to */
1357   int64_t  l_ncells_alloc;  /* current cell allocation limit for l_pp */
1358   int64_t  l_ncells_valid;  /* current number of valid cells for l_pp */
1359   int64_t  r_ncells_alloc;  /* current cell allocation limit for r_pp */
1360   int64_t  r_ncells_valid;  /* current number of valid cells for r_pp */
1361   float    size_Mb;         /* current size of matrix in Megabytes  */
1362 
1363   float   **l_pp;         /* matrix: [0..v..M][0..1..i..L], l_pp[v][0] is
1364 			   * always IMPOSSIBLE l_pp[v] == NULL if v is
1365 			   * not a left emitter.
1366 			   */
1367   float   **r_pp;         /* matrix: [0..v..M][0..1..i..L], r_pp[v][0] is
1368 			   * always IMPOSSIBLE r_pp[v] == NULL if v is
1369 			   * not a right emitter.
1370 			   */
1371   float    *l_pp_mem;     /* the actual mem for l_pp, points to
1372 			   * l_pp[v][0], where v is min v for which
1373 			   * l_pp != NULL */
1374   float    *r_pp_mem;     /* the actual mem for r_pp, points to
1375 			   * r_pp[v][0], where v is min v for which
1376 			   * r_pp != NULL */
1377   float    *sum;          /* [0..1..i..L] log of the summed posterior
1378 			   * probability that residue i was emitted
1379 			   * either leftwise or rightwise by any state.
1380 			   * Used for normalizing l_pp and r_pp.
1381 			   */
1382 } CM_EMIT_MX;
1383 
1384 
1385 /***********************************************************************************
1386  * 28. CM_TR_EMIT_MX: CM emit matrix, info on PP of emitted residues; non-banded, truncated.
1387  ***********************************************************************************/
1388 
1389 /* CM_TR_EMIT_MX: Same as CM_EMIT_MX except extended for truncated
1390  * alignment. Two l_pp and r_pp matrices exist:
1391  *
1392  * Jl_pp, Ll_pp, and Jr_pp, Rr_pp.
1393  *
1394  * Each as defined above for CM_EMIT_MX with the caveat that
1395  * residue i for a [v][i] cell in a 'J', 'L', or 'R' matrix
1396  * indicates that i was emitted in Joint (J), Left (L), or
1397  * Right (R) marginal alignment mode.
1398  *
1399  * Note that we don't need a Lr_pp or Rl_pp matrix because
1400  * in Left marginal mode we can't emit rightwise, and
1401  * in Right marginal mode we can't emit leftwise. Also,
1402  * no Terminal mode matrices are necessary because only
1403  * B states can be in Terminal mode, and they don't emit.
1404  *
1405  * When we're filling a CM_TR_EMIT_MX matrix we'll know the optimal
1406  * alignment mode <mode>, which dictates which of the *pp matrices
1407  * need be filled:
1408  *
1409  * <mode> == TRMODE_J, fill Jl_pp, Jr_pp
1410  * <mode> == TRMODE_L, fill Jl_pp, Jr_pp and Ll_pp
1411  * <mode> == TRMODE_R, fill Jl_pp, Jr_pp and Rr_pp
1412  * <mode> == TRMODE_T, fill Jl_pp, Jr_pp, Ll_pp, and Rr_pp.
1413  *
1414  * However, we don't store <mode> in the CM_TR_EMIT_MX because, as
1415  * it's implemented, it will not affect how the matrix is allocated.
1416  * We always allocate for all four *pp matrices because space is
1417  * not a big concern because we're using 2D matrices, and this
1418  * way prevents future reallocations when the marginal mode
1419  * switches in subsequent sequence alignment.
1420  */
1421 typedef struct cm_tr_emit_mx_s {
1422   int      M;		     /* number of states (1st dim ptrs) in current mx */
1423   int      L;                /* length of sequence the matrix currently corresponds to */
1424   int64_t  l_ncells_alloc;   /* current cell allocation limit for Jl_pp, Ll_pp */
1425   int64_t  l_ncells_valid;   /* current number of valid cells for Jl_pp, Ll_pp */
1426   int64_t  r_ncells_alloc;   /* current cell allocation limit for Jr_pp, Rr_pp */
1427   int64_t  r_ncells_valid;   /* current number of valid cells for Jr_pp, Rr_pp */
1428   float    size_Mb;          /* current size of matrix in Megabytes  */
1429 
1430   /* for all *_pp matrices, *_pp[v][0] is always IMPOSSIBLE,
1431    * *l_pp[v] == NULL for non-left emitters,
1432    * *r_pp[v] == NULL for non-right emitters.
1433    */
1434   float   **Jl_pp;         /* matrix: [0..v..M][0..1..i..L], Joint mode */
1435   float   **Ll_pp;         /* matrix: [0..v..M][0..1..i..L], Left mode */
1436   float   **Jr_pp;         /* matrix: [0..v..M][0..1..i..L], Joint mode */
1437   float   **Rr_pp;         /* matrix: [0..v..M][0..1..i..L], Right mode */
1438   float    *Jl_pp_mem;     /* the actual mem for Jl_pp */
1439   float    *Ll_pp_mem;     /* the actual mem for Ll_pp */
1440   float    *Jr_pp_mem;     /* the actual mem for Jr_pp */
1441   float    *Rr_pp_mem;     /* the actual mem for Rr_pp */
1442   float    *sum;           /* [0..1..i..L] log of the summed posterior
1443 		 	    * probability that residue i was emitted
1444 			    * either leftwise or rightwise by any state.
1445 			    * Used for normalizing *l_pp and *r_pp.
1446 			    */
1447 } CM_TR_EMIT_MX;
1448 
1449 
1450 /***********************************************************************************
1451  * 29. CM_HB_EMIT_MX: CM emit matrix, info on PP of emitted residues; HMM banded, non-truncated.
1452  ***********************************************************************************/
1453 
1454 /* CM_HB_EMIT_MX: HMM-banded version of CM_EMIT_MX (see the
1455  * description of that structure above). The only difference is that
1456  * now bands are enforced by only allocating l_pp and r_pp for
1457  * residues within the bands. A pointer to the CP9Bands_t object is in
1458  * <cp9b>.  The bandwidth of the l_pp rows are defined by cp9b->imin
1459  * and cp9b->imax and for r_pp rows by cp9b->jmin and cp9b->jmax.
1460  */
1461 typedef struct cm_hb_emit_mx_s {
1462   int      M;		    /* number of states (1st dim ptrs) in current mx */
1463   int      L;               /* length of sequence the matrix currently corresponds to */
1464   int64_t  l_ncells_alloc;  /* current cell allocation limit for dp */
1465   int64_t  l_ncells_valid;  /* current number of valid cells for dp */
1466   int64_t  r_ncells_alloc;  /* current cell allocation limit for dp */
1467   int64_t  r_ncells_valid;  /* current number of valid cells for dp */
1468   float    size_Mb;         /* current size of matrix in Megabytes  */
1469 
1470   float   **l_pp;         /* matrix: [0..v..M][0..i..(lmax[v]-lmin[v])],
1471 			   * l_pp[v][i] corresponds to residue i+lmin[v].
1472 			   * l_pp[v] == NULL if v is not a left emitter.
1473 			   */
1474   float   **r_pp;         /* matrix: [0..v..M][0..i..(rmax[v]-rmin[v])],
1475 			   * r_pp[v][i] corresponds to residue i+rmin[v].
1476 			   * r_pp[v] == NULL if v is not a right emitter.
1477 			   */
1478   float    *l_pp_mem;     /* the actual mem for l_pp, points to
1479 			   * l_pp[v][0], where v is min v for which
1480 			   * l_pp != NULL */
1481   float    *r_pp_mem;     /* the actual mem for r_pp, points to
1482 			   * r_pp[v][0], where v is min v for which
1483 			   * r_pp != NULL */
1484   float    *sum;          /* [0..1..i..L] log of the summed posterior
1485 			   * probability that residue i was emitted
1486 			   * either leftwise or rightwise by any state.
1487 			   * Used for normalizing l_pp and r_pp.
1488 			   */
1489   CP9Bands_t *cp9b;       /* the CP9Bands_t object associated with
1490 	  		   * this matrix. We use the imin and imax
1491 	  		   * arrays as bands on l_pp and jmin and jmax
1492 	  		   * arrays as bands on r_pp. Only a
1493 	  		   * reference, so don't free it when mx is
1494 	  		   * freed. */
1495 } CM_HB_EMIT_MX;
1496 
1497 
1498 /***********************************************************************************
1499  * 30. CM_TR_HB_EMIT_MX: CM emit matrix, info on PP of emitted residues; HMM banded, truncated.
1500  ***********************************************************************************/
1501 
1502 /* CM_TR_HB_EMIT_MX: HMM-banded version of CM_TR_EMIT_MX (see the
1503  * description of that structure above). The only difference is that
1504  * now bands are enforced by only allocating Jl_pp, Ll_pp, Jr_pp, and
1505  * Rr_pp for residues within the bands. A pointer to the CP9Bands_t
1506  * object is in <cp9b>. The bandwidth of the {J,L}l_pp rows are
1507  * defined by cp9b->imin and cp9b->imax and for {J,R}r_pp rows by
1508  * cp9b->jmin and cp9b->jmax.
1509  */
1510 typedef struct cm_tr_hb_emit_mx_s {
1511   int      M;		     /* number of states (1st dim ptrs) in current mx */
1512   int      L;                /* length of sequence the matrix currently corresponds to */
1513   int64_t  l_ncells_alloc;   /* current cell allocation limit for Jl_pp, Ll_pp */
1514   int64_t  l_ncells_valid;   /* current number of valid cells for Jl_pp, Ll_pp */
1515   int64_t  r_ncells_alloc;   /* current cell allocation limit for Jr_pp, Rr_pp */
1516   int64_t  r_ncells_valid;   /* current number of valid cells for Jr_pp, Rr_pp */
1517   float    size_Mb;          /* current size of matrix in Megabytes  */
1518 
1519   /* for all *_pp matrices, *_pp[v][0] is always IMPOSSIBLE,
1520    * *l_pp[v] == NULL for non-left emitters,
1521    * *r_pp[v] == NULL for non-right emitters.
1522    */
1523   float   **Jl_pp;         /* matrix: [0..v..M][0..1..i..L], Joint mode */
1524   float   **Ll_pp;         /* matrix: [0..v..M][0..1..i..L], Left mode */
1525   float   **Jr_pp;         /* matrix: [0..v..M][0..1..i..L], Joint mode */
1526   float   **Rr_pp;         /* matrix: [0..v..M][0..1..i..L], Right mode */
1527   float    *Jl_pp_mem;     /* the actual mem for Jl_pp */
1528   float    *Ll_pp_mem;     /* the actual mem for Ll_pp */
1529   float    *Jr_pp_mem;     /* the actual mem for Jr_pp */
1530   float    *Rr_pp_mem;     /* the actual mem for Rr_pp */
1531   float    *sum;           /* [0..1..i..L] log of the summed posterior
1532 		 	    * probability that residue i was emitted
1533 			    * either leftwise or rightwise by any state.
1534 			    * Used for normalizing *l_pp and *r_pp.
1535 			    */
1536   CP9Bands_t *cp9b;        /* the CP9Bands_t object associated with
1537 			    * this matrix. We use the imin and imax
1538 			    * arrays as bands on l_pp and jmin and jmax
1539 			    * arrays as bands on r_pp. Only a
1540 			    * reference, so don't free it when mx is
1541 			    * freed. */
1542 } CM_TR_HB_EMIT_MX;
1543 
1544 
1545 /***********************************************************************************
1546  * 31. CM_QDBINFO: model specific QDB information, including 2 sets of bands.
1547  ***********************************************************************************/
1548 
1549 enum cm_qdbinfo_setby_e    { CM_QDBINFO_SETBY_INIT = 0, CM_QDBINFO_SETBY_CMFILE = 1, CM_QDBINFO_SETBY_BANDCALC = 2, CM_QDBINFO_SETBY_SUBINIT };
1550 enum cm_w_setby_e          { CM_W_SETBY_INIT = 0, CM_W_SETBY_CMFILE = 1, CM_W_SETBY_BANDCALC = 2, CM_W_SETBY_CMDLINE = 3, CM_W_SETBY_SUBCOPY };
1551 
1552 /* Structure CM_QDBINFO: Per-model information on QDBs including
1553  * two sets of dmin/dmax values and the beta values used to
1554  * calculate them.
1555  */
1556 typedef struct cm_qdbinfo_s {
1557   int    M;       /* number of states in CM these QDBs are for, size of dmin/dmax arrays */
1558   double beta1;   /* tail loss probability for calculating QDBs dmin1/dmax1 */
1559   int   *dmin1;   /* [0..cm_M-1] minimum subsequence length d allowed for state v */
1560   int   *dmax1;   /* [0..cm_M-1] maximum subsequence length d allowed for state v */
1561   double beta2;   /* tail loss probability for calculating QDBs dmin2/dmax2 */
1562   int   *dmin2;   /* [0..cm_M-1] minimum subsequence length d allowed for state v */
1563   int   *dmax2;   /* [0..cm_M-1] maximum subsequence length d allowed for state v */
1564   enum cm_qdbinfo_setby_e setby; /* how current dmin/dmax values were set:
1565 				  * CM_QDBINFO_SETBY_INIT | CM_QDBINFO_SETBY_CMFILE | CM_QDBINFO_SETBY_BANDCALC */
1566 
1567 } CM_QDBINFO;
1568 
1569 /***********************************************************************************
1570  * 32. CM_SCAN_MX: matrices used for scanning CM DP algorithms; non-truncated.
1571  ***********************************************************************************/
1572 
1573 /* Structure CM_SCAN_MX:
1574  *
1575  * Information used by all CYK/Inside scanning
1576  * functions, compiled together into one data structure for
1577  * convenience. The matrix is allocated to allow either non-banded or
1578  * query-dependent banded (QDB) scans.
1579  *
1580  * QDB information, including two sets of dmin/dmax values, W, and
1581  * the three beta values used to calculate those numbers are in
1582  * <qdbinfo> (see that structure definition for more information).
1583  *
1584  * The CM_SCAN_MX contains three sets of dn and dx values for each
1585  * state and each possible endpoint in the sequence. The first set
1586  * dnAAA[0]/dxAAA[0] does not use bands. The second set
1587  * dnAAA[1]/dxAAA[1] uses the tighter set of bands in <qdbinfo> (which
1588  * is qdbinfo->dmin1/dmax1) and the third set dnAAA[2]/dxAAA[2] uses
1589  * the looser set of bands in <qdbinfo> (which is
1590  * qdbinfo->dmin2/dmax2). Each one of these sets is itself a
1591  * two-dimensional array indexed [0..j..W] (first dim) and [0..v..M-1]
1592  * (second dim), indicating the minimum and maximum d value to
1593  * be considered by a DP scanner for the sequence position j and
1594  * CM state v (if the position is > W, then the value for W is used).
1595  *
1596  * This set of three bands is necessary so we can call any DP scanner
1597  * function and specify the use of any of these three sets of bands,
1598  * and all the function has to do is point to the appropriate
1599  * dnAAA/dxAAA two-dimensional array.
1600  *
1601  * Additionally, each matrix can have valid float matrices, int
1602  * matrices or both. The status of each is stored in the <floats_valid>
1603  * and <ints_valid> parameters.
1604  */
1605 
1606 /* indexes for first-dimension of dnAAA/dxAAA in a CM_SCAN_MX or CM_TR_SCAN_MX */
1607 #define SMX_NOQDB       0
1608 #define SMX_QDB1_TIGHT  1
1609 #define SMX_QDB2_LOOSE  2
1610 #define NSMX_QDB_IDX 3
1611 
1612 typedef struct cm_scan_mx_s {
1613   /* general info about the model/search */
1614   CM_QDBINFO *qdbinfo;   /* a pointer to the qdbinfo related to the matrix */
1615   int      M;            /* copy of cm->M from cm this CM_SCAN_MX is associated with */
1616   int      W;            /* copy of cm->W from cm this CM_SCAN_MX is associated with */
1617   int   ***dnAAA;        /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] min d value allowed for posn j, state v using QDB set n */
1618   int   ***dxAAA;        /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] max d value allowed for posn j, state v using QDB set n */
1619   int     *bestr;        /* auxil info: best root state v at alpha[0][cur][d] (0->v local begin used if v != 0)*/
1620   float   *bestsc;       /* auxil info: best score for parsetree at alpha[0][cur][d] in mode bestmode[d] */
1621   int      floats_valid; /* TRUE if float alpha matrices are valid, FALSE if not */
1622   int      ints_valid;   /* TRUE if int   alpha matrices are valid, FALSE if not */
1623   float    size_Mb;      /* size of matrix in Megabytes */
1624 
1625   /* falpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for float implementations of CYK/Inside */
1626   float ***falpha;          /* non-BEGL_S states for float versions of CYK/Inside */
1627   float ***falpha_begl;     /*     BEGL_S states for float versions of CYK/Inside */
1628   float   *falpha_mem;      /* ptr to the actual memory for falpha */
1629   float   *falpha_begl_mem; /* ptr to the actual memory for falpha_begl */
1630 
1631   /* ialpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for integer implementations of CYK/Inside */
1632   int   ***ialpha;          /* non-BEGL_S states for int   versions of CYK/Inside */
1633   int   ***ialpha_begl;     /*     BEGL_S states for int   versions of CYK/Inside */
1634   int     *ialpha_mem;      /* ptr to the actual memory for ialpha */
1635   int     *ialpha_begl_mem; /* ptr to the actual memory for ialpha_begl */
1636 
1637   int64_t  ncells_alpha;      /* number of alloc'ed, valid cells for falpha and ialpha matrices, alloc'ed as contiguous block */
1638   int64_t  ncells_alpha_begl; /* number of alloc'ed, valid cells for falpha_begl and ialpha_begl matrices, alloc'ed as contiguous block */
1639 } CM_SCAN_MX;
1640 
1641 
1642 /***********************************************************************************
1643  * 33. CM_TR_SCAN_MX: matrices used for scanning CM DP algorithms; truncated.
1644  ***********************************************************************************/
1645 
1646 /* Structure CM_TR_SCAN_MX:
1647  *
1648  * Similar to CM_SCAN_MX except that additional matrices are allocated
1649  * for L and R marginal type (truncated) alignments. Also, dmin values cannot be
1650  * enforced for any state because in truncated alignments we assume a
1651  * truncation is possible anywhere so enforcing minimum subsequence lengths
1652  * is illogical. Enforcing maximum lengths (dmax) still makes sense
1653  * though and these are enforced. See the explanation of CM_SCAN_MX
1654  * above for more information.
1655  */
1656 typedef struct cm_tr_scan_mx_s {
1657   /* general info about the model/search */
1658   CM_QDBINFO *qdbinfo;   /* a pointer to the qdbinfo related to the matrix */
1659   int      M;            /* copy of cm->M from cm this CM_TR_SCAN_MX is associated with */
1660   int      W;            /* copy of cm->W from cm this CM_TR_SCAN_MX is associated with */
1661   int   ***dnAAA;        /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] min d value allowed for posn j, state v using QDB set n */
1662   int   ***dxAAA;        /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] max d value allowed for posn j, state v using QDB set n */
1663   int     *bestr;        /* auxil info: best root state v at alpha[0][cur][d] (0->v truncated begin used if v != 0)*/
1664   float   *bestsc;       /* auxil info: best score for parsetree at alpha[0][cur][d] in mode bestmode[d] */
1665   char    *bestmode;     /* auxil info: best mode for parsetree at alpha[0][cur][d], gives score in bestsc[d] */
1666   int      floats_valid; /* TRUE if float alpha matrices are valid, FALSE if not */
1667   int      ints_valid;   /* TRUE if int   alpha matrices are valid, FALSE if not */
1668   float    size_Mb;      /* size of matrix in Megabytes */
1669 
1670   /* f{J,L,R,T}alpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for float implementations of CYK/Inside */
1671   float ***fJalpha;          /* non-BEGL_S states for float versions of CYK/Inside */
1672   float ***fJalpha_begl;     /*     BEGL_S states for float versions of CYK/Inside */
1673   float   *fJalpha_mem;      /* ptr to the actual memory for fJalpha */
1674   float   *fJalpha_begl_mem; /* ptr to the actual memory for fJalpha_begl */
1675 
1676   float ***fLalpha;          /* non-BEGL_S states for float versions of CYK/Inside */
1677   float ***fLalpha_begl;     /*     BEGL_S states for float versions of CYK/Inside */
1678   float   *fLalpha_mem;      /* ptr to the actual memory for fLalpha */
1679   float   *fLalpha_begl_mem; /* ptr to the actual memory for fLalpha_begl */
1680 
1681   float ***fRalpha;          /* non-BEGL_S states for float versions of CYK/Inside */
1682   float ***fRalpha_begl;     /*     BEGL_S states for float versions of CYK/Inside */
1683   float   *fRalpha_mem;      /* ptr to the actual memory for fRalpha */
1684   float   *fRalpha_begl_mem; /* ptr to the actual memory for fRalpha_begl */
1685 
1686   float ***fTalpha;          /* BIF states for float versions of CYK/Inside */
1687   float   *fTalpha_mem;      /* ptr to the actual memory for fTalpha */
1688 
1689   /* i{J,L,R,T}alpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for integer implementations of CYK/Inside */
1690   int   ***iJalpha;          /* non-BEGL_S states for int   versions of CYK/Inside */
1691   int   ***iJalpha_begl;     /*     BEGL_S states for int   versions of CYK/Inside */
1692   int     *iJalpha_mem;      /* ptr to the actual memory for iJalpha */
1693   int     *iJalpha_begl_mem; /* ptr to the actual memory for iJalpha_begl */
1694 
1695   int   ***iLalpha;          /* non-BEGL_S states for int   versions of CYK/Inside */
1696   int   ***iLalpha_begl;     /*     BEGL_S states for int   versions of CYK/Inside */
1697   int     *iLalpha_mem;      /* ptr to the actual memory for iLalpha */
1698   int     *iLalpha_begl_mem; /* ptr to the actual memory for iLalpha_begl */
1699 
1700   int   ***iRalpha;          /* non-BEGL_S states for int   versions of CYK/Inside */
1701   int   ***iRalpha_begl;     /*     BEGL_S states for int   versions of CYK/Inside */
1702   int     *iRalpha_mem;      /* ptr to the actual memory for iRalpha */
1703   int     *iRalpha_begl_mem; /* ptr to the actual memory for iRalpha_begl */
1704 
1705   int   ***iTalpha;          /* BIF states for int   versions of CYK/Inside */
1706   int     *iTalpha_mem;      /* ptr to the actual memory for iTalpha */
1707 
1708   int64_t  ncells_alpha;      /* number of alloc'ed, valid cells for f{J,L,R}alpha and i{J,L,R}alpha matrices, alloc'ed as contiguous block */
1709   int64_t  ncells_alpha_begl; /* number of alloc'ed, valid cells for f{J,L,R}alpha_begl and i{J,L,R}alpha_begl matrices, alloc'ed as contiguous block */
1710   int64_t  ncells_Talpha;     /* number of alloc'ed, valid cells for fTalpha and iTalpha matrices, alloc'ed as contiguous block */
1711 } CM_TR_SCAN_MX;
1712 
1713 
1714 
1715 /*************************************************************************************
1716  * 34. CM_TR_PENALTIES: pass, state and locality-specific truncated alignment penalties.
1717  *************************************************************************************/
1718 
1719 /* Truncation penalty parameters, we either allow 5' truncation, 3' truncation or both.
1720  * These allow truncated DP aligners and scanners (cm_dpalign_trunc.c and cm_dpsearch_trunc.c)
1721  * to know which truncation penalty to apply.
1722  */
1723 #define TRPENALTY_5P_AND_3P 0
1724 #define TRPENALTY_5P_ONLY   1
1725 #define TRPENALTY_3P_ONLY   2
1726 #define NTRPENALTY          3
1727 #define TRPENALTY_NONE     -1
1728 
1729 typedef struct cm_tr_penalties_s {
1730   int     M;          /* number of states in the CM this object is associated with */
1731   int     ignored_inserts; /* TRUE if inserts were ignored, FALSE if not (normally this is FALSE) */
1732   float **g_ptyAA;    /* g_ptyAA[TRPENALTY_5P_AND_3][0..v..M-1] sc penalty for global aln if we allowed 5' and 3' truncation
1733 		       * g_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]  sc penalty for global aln if we allowed 5' truncation only
1734 		       * g_ptyAA[TRPENALTY_3P_ONLY][0..v..M-1]  sc penalty for global aln if we allowed 3' truncation only */
1735   float **l_ptyAA;    /* l_ptyAA[TRPENALTY_5P_AND_3][0..v..M-1] sc penalty for local aln if we allowed 5' and 3' truncation
1736 		       * l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]  sc penalty for local aln if we allowed 5' truncation only
1737 		       * l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1]  sc penalty for local aln if we allowed 3' truncation only */
1738   int  **ig_ptyAA;    /* same as g_ptyAA but scaled int scores */
1739   int  **il_ptyAA;    /* same as l_ptyAA but scaled int scores */
1740 } CM_TR_PENALTIES;
1741 
1742 
1743 /*****************************************************************
1744  * 35. CM_t: a covariance model
1745  *****************************************************************/
1746 
1747 /* Structure: CM_t
1748  * Incept:    SRE, 9 Mar 2000 [San Carlos CA]
1749  *
1750  * A covariance model. M states, arranged logically as a directed graph
1751  * (on a binary tree backbone); arranged physically as a set of arrays 0..M-1.
1752  *
1753  * State 0 is always the root state. State M-1 is always an end state.
1754  *
1755  * EPN 12.19.06: added arrays to hold integer log-odds scores for faster
1756  * inside/outside
1757  *
1758  */
1759 typedef struct cm_s {
1760 			/* General information about the model:                          */
1761   char    *name;        /* name of the model                            (mandatory)      */ /* String, \0-terminated   */
1762   char    *acc;	        /* accession number of model (Rfam)             (CMH_ACC)        */ /* String, \0-terminated   */
1763   char    *desc;        /* brief (1-line) description of model          (CMH_DESC))      */ /* String, \0-terminated   */
1764   char    *rf;          /* reference line from alignment    1..clen     (CMH_RF)         */ /* String; 0=' ', clen+1='\0' */
1765   char    *consensus;   /* consensus residue line           1..clen     (CMH_CONS)       */ /* String; 0=' ', clen+1='\0' */
1766   uint32_t checksum;    /* checksum of training sequences               (CMH_CHKSUM)     */
1767   int     *map;         /* map of alignment cols onto model 1..clen     (CMH_MAP)        */ /* Array; map[0]=0 */
1768 
1769   /* new as of v1.0 */
1770   char  *comlog;        /*   command line(s) that built, modified model (optional: NULL) */ /* String, \0-terminated   */
1771   char  *ctime;	        /*   creation date                              (optional: NULL) */
1772   int    nseq;		/*   number of training sequences               (mandatory)      */
1773   float  eff_nseq;	/*   effective number of seqs (<= nseq)         (mandatory)      */
1774   float  ga;	        /*   per-seq gathering thresholds (bits)        (CMH_GA)         */
1775   float  tc;            /*   per-seq trusted cutoff (bits)              (CMH_TC)         */
1776   float  nc;	        /*   per-seq noise cutoff (bits)                (CMH_NC)         */
1777 
1778 			/* Information about the null model:                     */
1779   float *null;          /*   residue probabilities [0..3]                        */
1780 
1781 			/* Information about the state type:                     */
1782   int   M;		/*   number of states in the model                       */
1783   int   clen;		/*   consensus length (2*MATP+MATL+MATR)                 */
1784   char *sttype;		/*   type of state this is; e.g. MP_st                   */
1785   int  *ndidx;		/*   index of node this state belongs to                 */
1786   char *stid;		/*   unique state identifier; e.g. MATP_MP               */
1787 
1788 			/* Information about its connectivity in CM:             */
1789   int  *cfirst;		/*   index of left child state                           */
1790   int  *cnum;		/*   overloaded: for non-BIF: # connections;             */
1791 			/*               for BIF: right child S_st               */
1792   int  *plast;          /*   index to first parent state                         */
1793   int  *pnum;           /*   number of parent connections                        */
1794 
1795 			/* Information mapping nodes->states                     */
1796   int   nodes;		/*   number of nodes in the model                        */
1797   int  *nodemap;        /*   nodemap[5] = idx first state, node 5                */
1798   char *ndtype;		/*   type of node, e.g. MATP_nd                          */
1799 
1800                         /* Parameters of the probabilistic model:                */
1801   float **t;		/*   Transition prob's [0..M-1][0..MAXCONNECT-1]         */
1802   float **e;		/*   Emission probabilities.  [0..M-1][0..15]            */
1803   float  *begin;	/*   Local alignment start probabilities [0..M-1]        */
1804   float  *end;		/*   Local alignment ending probabilities [0..M-1]       */
1805 
1806 			/* Parameters of the log odds model:                     */
1807   float **tsc;		/*   Transition score vector, log odds                   */
1808   float **esc;		/*   Emission score vector, log odds                     */
1809   float **oesc;         /*   Optimized emission score log odds float vec         */
1810   float **lmesc;        /*   Left marginal emission scores (log odds)            */
1811   float **rmesc;        /*   Right marginal emission scores (log odds)           */
1812   float *beginsc;	/*   Score for ROOT_S -> state v (local alignment)       */
1813   float *endsc;   	/*   Score for state_v -> EL (local alignment)           */
1814 
1815 			/* Scaled int parameters of the log odds model:          */
1816   int  **itsc;		/*   Transition score vector, scaled log odds int        */
1817   int  **iesc;		/*   Emission score vector, scaled log odds int          */
1818   int  **ioesc;         /*   Optimized emission score log odds int vector        */
1819   int  **ilmesc;        /*   Left marginal emission scores (log odds int)        */
1820   int  **irmesc;        /*   Right marginal emission scores (log odds int)       */
1821   int   *ibeginsc;      /*   Score for ROOT_S -> state v (local alignment)       */
1822   int   *iendsc;  	/*   Score for state_v -> EL (local alignment)           */
1823 
1824   float  pbegin;        /* local begin prob to spread across internal nodes for local mode    */
1825   float  pend;          /* local end prob to spread across internal nodes for local mode      */
1826 
1827   float  null2_omega;   /* prior probability of the null2 model (if it is used) */
1828   float  null3_omega;   /* prior probability of the null3 model (if it is used) */
1829 
1830   int    flags;		/* status flags                                    */
1831 
1832   /* W and query dependent bands (QDBs) on subsequence lengths at each state */
1833   int    W;             /* max d: max size of a hit (EPN 08.18.05)                 */
1834   double beta_W;        /* tail loss probability for QDB calculation used to set W */
1835   enum cm_w_setby_e W_setby; /* how current W value was set:
1836 			      * CM_W_SETBY_INIT | CM_W_SETBY_CMFILE | CM_W_SETBY_BANDCALC | CM_W_SETBY_CMDLINE | CM_W_SETBY_SUBCOPY */
1837   /* regarding W: if W_setby is CM_W_SETBY_INIT or CM_W_SETBY_CMDLINE, then beta_W does not correspond
1838    * to a band calculation beta value used to compute W (set as dmax[0]). Otherwise, it does. */
1839 
1840   CM_QDBINFO *qdbinfo;  /* two sets of QDBs and the beta values used to calc them  */
1841 
1842   double  tau;          /* tail loss probability for HMM target dependent banding             */
1843   double  maxtau;       /* maximum allowed tau value for HMM band tightening                  */
1844 
1845   int         config_opts;/* model configuration options                                        */
1846   int         align_opts; /* alignment options                                                  */
1847   int         search_opts;/* search options                                                     */
1848   float      *root_trans; /* transition probs from state 0, saved IFF zeroed in ConfigLocal()   */
1849 
1850   float  el_selfsc;     /* score of a self transition in the EL state
1851 			 * the EL state emits only on self transition (EPN 11.15.05)*/
1852   int   iel_selfsc;     /* scaled int version of el_selfsc         */
1853 
1854   /* CP9 HMMs and associated data structures. These are built and
1855    * configured in cm_modelconfig.c:ConfigCM(). <cp9> is always built
1856    * and is configured (local begins/ ends and ELs) to match the CM.
1857    * <Lcp9>, <Rcp9> and <Tcp9> are for truncated alignment, one each
1858    * for L, R and T modes. These are built only if the
1859    * CM_CONFIG_TRUNC flag is raised. They are configured to match
1860    * their alignment mode. For example, Lcp9 has a global-like begin
1861    * (prob of ~1.0 into match state 1) but local-like ends
1862    * (equiprobable end points) to match the possibility of a 3'
1863    * truncation (L mode alignment). If the CM_CONFIG_TRUNC_NOFORCE
1864    * flag is raised then Lcp9, Rcp9, Tcp9 are all configured
1865    * identically with equiprobable begin/ends (this is wasteful,
1866    * we only need one in this case, but its easier to deal with
1867    * this way and its non-default).
1868    */
1869   CP9_t      *cp9;        /* a CM Plan 9 HMM, built from and configured to match CM */
1870   CP9_t      *Lcp9;       /* a CM Plan 9 HMM for L mode truncated alignment (global begins, equiprobable local ends) */
1871   CP9_t      *Rcp9;       /* a CM Plan 9 HMM for R mode truncated alignment (equiprobable local begins, global ends) */
1872   CP9_t      *Tcp9;       /* a CM Plan 9 HMM for T mode truncated alignment (equiprobable local begin/ends) */
1873   CP9Map_t   *cp9map;     /* the map from the Plan 9 HMM to the CM and vice versa   */
1874   CP9Bands_t *cp9b;       /* the CP9 bands                                          */
1875 
1876   /* DP matrices and some auxiliary info for DP algorithms */
1877   /* for standard HMM banded CM alignment/search */
1878   CM_HB_MX           *hb_mx;     /* growable HMM banded float matrix */
1879   CM_HB_MX           *hb_omx;    /* another, growable HMM banded float matrix for Outside/Posterior calcs */
1880   CM_HB_EMIT_MX      *hb_emx;    /* growable HMM banded emit matrix for residue posterior probability calcs */
1881   CM_HB_SHADOW_MX    *hb_shmx;   /* growable HMM banded shadow matrix, for alignment tracebacks */
1882   /* for truncated HMM banded CM alignment/search */
1883   CM_TR_HB_MX        *trhb_mx;   /* growable truncated HMM banded float matrix */
1884   CM_TR_HB_MX        *trhb_omx;  /* another, growable truncated HMM banded float matrix for Outside/Posterior calcs */
1885   CM_TR_HB_EMIT_MX   *trhb_emx;  /* growable truncated HMM banded emit matrix for residue posterior probability calcs */
1886   CM_TR_HB_SHADOW_MX *trhb_shmx; /* growable truncated HMM banded shadow matrix, for alignment tracebacks */
1887 
1888   /* for standard non-banded CM alignment/search (these will usually stay unallocated, as NULL) */
1889   CM_MX              *nb_mx;     /* growable non-banded float matrix */
1890   CM_MX              *nb_omx;    /* another, growable non-banded float matrix for Outside/Posterior calcs */
1891   CM_EMIT_MX         *nb_emx;    /* growable non-banded emit matrix for residue posterior probability calcs */
1892   CM_SHADOW_MX       *nb_shmx;   /* growable non-banded shadow matrix, for alignment tracebacks */
1893   /* for truncated HMM banded CM alignment/search (these will usually stay unallocated, as NULL) */
1894   CM_TR_MX           *trnb_mx;   /* growable truncated non-banded float matrix */
1895   CM_TR_MX           *trnb_omx;  /* another, growable truncated non-banded float matrix for Outside/Posterior calcs */
1896   CM_TR_EMIT_MX      *trnb_emx;  /* growable truncated non-banded emit matrix for residue posterior probability calcs */
1897   CM_TR_SHADOW_MX    *trnb_shmx; /* growable truncated non-banded shadow matrix, for alignment tracebacks */
1898 
1899   /* for standard non-HMM banded CM search */
1900   CM_SCAN_MX         *smx;      /* matrices, info for CYK/Inside scans with this CM */
1901   /* for truncated non-HMM banded CM search */
1902   CM_TR_SCAN_MX      *trsmx;    /* matrices, info for CYK/Inside scans with this CM */
1903   /* for CP9 HMM search/alignment */
1904   CP9_MX             *cp9_mx;   /* growable CP9 DP matrix */
1905   CP9_MX             *cp9_bmx;  /* another growable CP9 DP matrix, 'b' is for backward,
1906 				 * only alloc'ed to any significant size if we do Forward,Backward->Posteriors */
1907 
1908   /* statistics */
1909   ExpInfo_t       **expA;  /* Exponential tail stats, [0..EXP_NMODES-1]  */
1910 
1911   /* p7 hmms, added 08.05.08 */
1912   P7_HMM       *mlp7;         /* the maximum likelihood p7 HMM, built from the CM  */
1913   P7_HMM       *fp7;          /* the filter p7 HMM, read from CM file */
1914   float         fp7_evparam[CM_p7_NEVPARAM]; /* E-value params (CMH_FP7_STATS) */
1915 
1916   const  ESL_ALPHABET *abc; /* ptr to alphabet info (cm->abc->K is alphabet size)*/
1917   off_t    offset;          /* CM record offset on disk                              */
1918 
1919   /* additional data added for convenience, post-v1.0.2, pre-v1.1) */
1920   CMEmitMap_t     *emap;   /* maps model nodes to consensus positions */
1921   CMConsensus_t   *cmcons; /* consensus information for CM_ALIDISPLAY */
1922   CM_TR_PENALTIES *trp;    /* stores truncation bit score penalties */
1923 
1924 } CM_t;
1925 
1926 /* status flags, cm->flags */
1927 #define CMH_BITS                (1<<0)  /* CM has valid log odds scores             */
1928 #define CMH_ACC                 (1<<1)  /* accession number is available            */
1929 #define CMH_DESC                (1<<2)  /* description exists                       */
1930 #define CMH_RF                  (1<<3)  /* reference exists                         */
1931 #define CMH_GA                  (1<<4)  /* gathering threshold exists               */
1932 #define CMH_TC                  (1<<5)  /* trusted cutoff exists                    */
1933 #define CMH_NC                  (1<<6)  /* noise cutoff exists                      */
1934 #define CMH_CHKSUM              (1<<7)  /* checksum exists                          */
1935 #define CMH_MAP                 (1<<8)  /* alignment map exists                     */
1936 #define CMH_CONS                (1<<9)  /* consensus sequence exists                */
1937 #define CMH_LOCAL_BEGIN         (1<<10) /* Begin distribution is active (local ali) */
1938 #define CMH_LOCAL_END           (1<<11) /* End distribution is active (local ali)   */
1939 #define CMH_EXPTAIL_STATS       (1<<12) /* exponential tail stats set               */
1940 #define CMH_CP9                 (1<<13) /* CP9 HMM is valid in cm->cp9              */
1941 #define CMH_CP9_TRUNC           (1<<14) /* cm->Lcp9, cm->Rcp9, cm->Tcp9 are valid   */
1942 #define CMH_MLP7                (1<<15) /* 'maximum likelihood' cm->mlp7 is valid   */
1943 #define CMH_FP7                 (1<<16) /* filter p7 is valid in cm->fp7            */
1944 #define CM_IS_SUB               (1<<17) /* the CM is a sub CM                       */
1945 #define CM_IS_RSEARCH           (1<<18) /* the CM was parameterized a la RSEARCH    */
1946 #define CM_RSEARCHTRANS         (1<<19) /* CM has/will have RSEARCH transitions     */
1947 #define CM_RSEARCHEMIT          (1<<20) /* CM has/will have RSEARCH emissions       */
1948 #define CM_EMIT_NO_LOCAL_BEGINS (1<<21) /* emitted parsetrees will never have local begins */
1949 #define CM_EMIT_NO_LOCAL_ENDS   (1<<22) /* emitted parsetrees will never have local ends   */
1950 #define CM_IS_CONFIGURED        (1<<23) /* TRUE if CM has been configured in some way */
1951 
1952 /* model configuration options, cm->config_opts */
1953 #define CM_CONFIG_LOCAL         (1<<0)  /* configure the model for local alignment */
1954 #define CM_CONFIG_HMMLOCAL      (1<<1)  /* configure the CP9 for local alignment   */
1955 #define CM_CONFIG_HMMEL         (1<<2)  /* configure the CP9 for EL local aln      */
1956 #define CM_CONFIG_QDB           (1<<3)  /* recalculate query dependent bands       */
1957 #define CM_CONFIG_W_BETA        (1<<4)  /* recalculate W using band calculation    */
1958 #define CM_CONFIG_TRUNC         (1<<5)  /* set up for truncated alignment (cm->{L,R,T}cp9 will be built */
1959 #define CM_CONFIG_SCANMX        (1<<6)  /* create a CM_SCAN_MX in cm->smx          */
1960 #define CM_CONFIG_TRSCANMX      (1<<7)  /* create a CM_TR_SCAN_MX in cm->trsmx     */
1961 #define CM_CONFIG_SUB           (1<<8)  /* set up for submodel alignment (cm->cp9 gets equiprobable begin/ends) */
1962 #define CM_CONFIG_NONBANDEDMX   (1<<9)  /* set up for non-banded alignment (cm->*nb*mx will be created) */
1963 
1964 /* alignment options, cm->align_opts */
1965 #define CM_ALIGN_HBANDED       (1<<0)  /* use CP9 HMM bands                        */
1966 #define CM_ALIGN_P7BANDED      (1<<1)  /* use p7 HMM bands to band the CP9 HMM (CAUTION: only partially implemented) */
1967 #define CM_ALIGN_NONBANDED     (1<<2)  /* do not use HMM bands                     */
1968 #define CM_ALIGN_CYK           (1<<3)  /* aln wwith CYK algorithm                  */
1969 #define CM_ALIGN_OPTACC        (1<<4)  /* aln w/Holmes/Durbin opt accuracy         */
1970 #define CM_ALIGN_SAMPLE        (1<<5)  /* sample parsetrees from the inside matrix */
1971 #define CM_ALIGN_POST          (1<<6)  /* do inside/outside and append posteriors  */
1972 #define CM_ALIGN_SMALL         (1<<7)  /* use small CYK D&C                        */
1973 #define CM_ALIGN_SUMS          (1<<8)  /* if using HMM bands, use posterior sums   */
1974 #define CM_ALIGN_SUB           (1<<9)  /* build a sub CM for each seq to align     */
1975 #define CM_ALIGN_HMMVITERBI    (1<<10) /* use a CP9 HMM only to align, w/viterbi   */
1976 #define CM_ALIGN_CHECKINOUT    (1<<11) /* check inside/outside calculations        */
1977 #define CM_ALIGN_CHECKPARSESC  (1<<12) /* check parsetree score against aln alg sc */
1978 #define CM_ALIGN_PRINTTREES    (1<<13) /* print parsetrees to stdout               */
1979 #define CM_ALIGN_HMMSAFE       (1<<14) /* realign seqs w/HMM banded CYK bit sc < 0 */
1980 #define CM_ALIGN_SCOREONLY     (1<<15) /* do full CYK/inside to get score only     */
1981 #define CM_ALIGN_FLUSHINSERTS  (1<<16) /* flush inserts L/R like pre 1.0 infernal  */
1982 #define CM_ALIGN_CHECKFB       (1<<17) /* check forward/backward CP9 HMM calcs     */
1983 #define CM_ALIGN_HMM2IJOLD     (1<<18) /* use old hmm2ij band calculation alg      */
1984 #define CM_ALIGN_QDB           (1<<19) /* align with QDBs                          */
1985 #define CM_ALIGN_INSIDE        (1<<20) /* use Inside algorithm                     */
1986 #define CM_ALIGN_TRUNC         (1<<21) /* use truncated alignment algorithms       */
1987 #define CM_ALIGN_XTAU          (1<<22) /* multiply tau until banded mx size < limit*/
1988 
1989 /* search options, cm->search_opts */
1990 #define CM_SEARCH_HBANDED      (1<<0)  /* use HMM bands to search (default)        */
1991 #define CM_SEARCH_QDB          (1<<1)  /* use QDBs to search                       */
1992 #define CM_SEARCH_NONBANDED    (1<<2)  /* do not use QDBs or HMM bands for search  */
1993 #define CM_SEARCH_HMMALNBANDS  (1<<3)  /* force full aln when deriving HMM bands   */
1994 #define CM_SEARCH_SUMS         (1<<4)  /* if using HMM bands, use posterior sums   */
1995 #define CM_SEARCH_INSIDE       (1<<5)  /* scan with Inside, not CYK                */
1996 #define CM_SEARCH_NOALIGN      (1<<6)  /* don't align hits, just report locations  */
1997 #define CM_SEARCH_RSEARCH      (1<<7)  /* use RSEARCH parameterized CM             */
1998 #define CM_SEARCH_CMNOTGREEDY  (1<<8)  /* don't use greedy alg to resolve CM overlaps */
1999 #define CM_SEARCH_HMM2IJOLD    (1<<9)  /* use old hmm2ij band calculation alg      */
2000 #define CM_SEARCH_NULL2        (1<<10) /* use NULL2 score correction               */
2001 #define CM_SEARCH_NULL3        (1<<11) /* use NULL3 score correction               */
2002 
2003 /*****************************************************************
2004  * 36. CM_FILE:  a CM save file or database, open for reading.
2005  *****************************************************************/
2006 /* These tags need to be in temporal order, so we can do tests
2007  * like "if (format >= CM_FILE_1a) ..."
2008  */
2009 enum cm_file_formats_e {
2010   CM_FILE_1  = 0, /* Infernal v1.0->v1.0.2 */
2011   CM_FILE_1a = 1,
2012 };
2013 
2014 typedef struct cm_file_s {
2015   FILE         *f;		 /* pointer to stream for reading models                 */
2016   char         *fname;	         /* (fully qualified) name of the CM file; [STDIN] if -  */
2017   ESL_SSI      *ssi;		 /* open SSI index for model file <f>; NULL if none.     */
2018 
2019   int           do_gzip;	/* TRUE if f is "gzip -dc |" (will pclose(f))           */
2020   int           do_stdin;       /* TRUE if f is stdin (won't close f)                   */
2021   int           newly_opened;	/* TRUE if we just opened the stream (and parsed magic) */
2022   int           is_binary;	/* TRUE if a binary file (output with WriteBinary)      */
2023   int           is_pressed;	/* TRUE if a pressed CM database file (Rfam or equiv)   */
2024 
2025   int            format;	/* CM file format code */
2026   int           (*parser)(struct cm_file_s *, int, ESL_ALPHABET **, CM_t **);
2027   ESL_FILEPARSER *efp;
2028 
2029   P7_HMMFILE   *hfp;            /* for reading p7 HMMs within the CM file */
2030 
2031   /* If <is_pressed>, we can read HMM filters directly, via: */
2032   FILE         *ffp;		/* MSV part of the optimized profile HMM */
2033   FILE         *pfp;		/* rest of the optimized profile HMM     */
2034 
2035 #ifdef HMMER_THREADS
2036   int              syncRead;
2037   pthread_mutex_t  readMutex;
2038 #endif
2039 
2040   char          errbuf[eslERRBUFSIZE];
2041 } CM_FILE;
2042 
2043 /* note on <fname>, above:
2044  * this is the actual name of the CM file being read.
2045  *
2046  * The way cm_file_Open() works, it will preferentially look for
2047  * cmpress'ed binary files. If you open "foo", it will first try to
2048  * open "foo.i1m" and <fname> will be "foo.i1m". "foo" does not even
2049  * have to exist. If a parsing error occurs, you want <fname> to
2050  * be "foo.i1m", so error messages report blame correctly.
2051  * In the special case of reading from stdin, <fname> is "[STDIN]".
2052  */
2053 
2054 
2055 /***********************************************************************************
2056  * 37. CM_PLI_ACCT: pass specific statistics for a search/scan pipeline.
2057  ***********************************************************************************/
2058 
2059 /* Pipeline pass indices. The pipeline potentially does multiple
2060  * passes over each sequence. Don't muck with the order here, it'll
2061  * screw up things in strange ways. For example, PLI_PASS_STD_ANY must
2062  * come before the 5P and 3P truncated stages, cm_Pipeline() depends
2063  * on it.
2064  *
2065  * Not all passes are performed in a pipeline. If pli->do_trunc_ends,
2066  * passes 1,2,3,4 are performed. If pli->do_trunc_5p_ends, passes
2067  * 1 and 2 are performed. If pli->do_trunc_3p_ends, passes 1
2068  * and 3 are performed. If pli->do_trunc_any, passes 1 and 5 are
2069  * performed. If pli->do_trunc_only, only pass 5 is performed.
2070  * If pli->do_only_trunc_5p_and_3p_ends, only pass 4 is
2071  * performed. If pli->do_hmmonly_cur, only pass 6 is performed. If
2072  * none of these flags is TRUE only pass 1 is performed.
2073  *
2074  * These values have two interrelated but different roles:
2075  *
2076  * 1. [1..6] are flags indicating which type of truncated alignment
2077  * is/was allowed in/by a CM DP scanner/alignment function. Each pass of
2078  * the pipeline allows a different combination of 5' and/or 3'
2079  * truncated alignment and differs in whether it enforces the
2080  * first and/or final residue be included (_FORCE suffixed) or
2081  * not (_ANY suffixed). For the PLI_PASS_HMM_ONLY_ANY no CM
2082  * algorithms will be called but _ANY is used as a suffix because
2083  * HMM local alignment algorithms allow 5' and 3' truncation.
2084  *
2085  * A wrinkle is that these indices are used for DP truncated alignment
2086  * functions called for 'cmalign' (either PLI_PASS_5P_AND_3P_FORCE or
2087  * PLI_PASS_STD_ANY) even though those functions are not called as
2088  * part of a search/scan pipeline. In this case, the pass index is
2089  * still relevant in informing the alignment function which truncation
2090  * penalty score to apply to any resulting alignment score.
2091  *
2092  * 2. [0..6] are indices in cm->pli->acct[], accounting states for each
2093  * pass of the pipeline.
2094  */
2095 #define PLI_PASS_CM_SUMMED       0
2096 #define PLI_PASS_STD_ANY         1  /* only standard alns allowed, no truncated ones, any subseq */
2097 #define PLI_PASS_5P_ONLY_FORCE   2  /* only 5' truncated alns allowed, first (i0) residue must be included */
2098 #define PLI_PASS_3P_ONLY_FORCE   3  /* only 3' truncated alns allowed, final (j0) residue must be included */
2099 #define PLI_PASS_5P_AND_3P_FORCE 4  /* 5' and 3' truncated alns allowed, first & final (i0 & j0) residue must be included */
2100 #define PLI_PASS_5P_AND_3P_ANY   5  /* 5' and 3' truncated alns allowed, any subseq can comprise hit */
2101 #define PLI_PASS_HMM_ONLY_ANY    6  /* HMM only pass, all types of truncated hits are allowed in local HMM algs */
2102 #define NPLI_PASSES              7
2103 
2104 typedef struct cm_pipeline_accounting_s {
2105   /* CM_PIPELINE accounting. (reduceable in threaded/MPI parallel version)
2106    * Each pipeline pass keeps track of its own accounting, so we know how
2107    * many residues were searched in each pass.
2108    *
2109    * <npli_top> and <npli_bot> keep track of number of times pipeline
2110    * was run for each pass, this is not the same as the number of
2111    * sequences searched in *any* pass (that's <pli->nseq>), but is
2112    * equal to the number of sequences searched in the
2113    * PLI_PASS_5P_AND_3P_FORCE pass, since that pass only takes place
2114    * for sequences shorter or equal to <pli->maxW>.
2115    */
2116   uint64_t      npli_top;          /* # of times pipeline called on top strand */
2117   uint64_t      npli_bot;          /* # of times pipeline called on bot strand */
2118   uint64_t      nres_top;	   /* # of residues searched on top strand     */
2119   uint64_t      nres_bot;	   /* # of residues searched on bottom strand  */
2120   uint64_t      n_past_msv;	   /* # windows that pass MSVFilter()          */
2121   uint64_t      n_past_vit;	   /* # windows that pass ViterbiFilter()      */
2122   uint64_t      n_past_fwd;	   /* # windows that pass ForwardFilter()      */
2123   uint64_t      n_past_gfwd;	   /* # windows that pass glocal GForward()    */
2124   uint64_t      n_past_edef;	   /* # envelopes that pass envelope definition */
2125   uint64_t      n_past_cyk;	   /* # windows that pass CYK filter           */
2126   uint64_t      n_past_ins;	   /* # windows that pass Inside               */
2127   uint64_t      n_output;	   /* # alignments that make it to the final output */
2128   uint64_t      n_past_msvbias;	   /* # windows that pass MSV bias filter      */
2129   uint64_t      n_past_vitbias;	   /* # windows that pass Vit bias filter      */
2130   uint64_t      n_past_fwdbias;	   /* # windows that pass Fwd bias filter      */
2131   uint64_t      n_past_gfwdbias;   /* # windows that pass gFwd bias filter     */
2132   uint64_t      n_past_edefbias;   /* # envelopes that pass env bias filter    */
2133   uint64_t      pos_past_msv;	   /* # positions that pass MSVFilter()        */
2134   uint64_t      pos_past_vit;	   /* # positions that pass ViterbiFilter()    */
2135   uint64_t      pos_past_fwd;	   /* # positions that pass ForwardFilter()    */
2136   uint64_t      pos_past_gfwd;	   /* # positions that pass glocal GForward()  */
2137   uint64_t      pos_past_edef;	   /* # positions that pass env definition     */
2138   uint64_t      pos_past_cyk;	   /* # positions that pass CYK filter         */
2139   uint64_t      pos_past_ins;      /* # positions that pass Inside             */
2140   uint64_t      pos_output;	   /* # positions that make it to the final output */
2141   uint64_t      pos_past_msvbias;  /* # positions that pass MSV bias filter */
2142   uint64_t      pos_past_vitbias;  /* # positions that pass Vit bias filter */
2143   uint64_t      pos_past_fwdbias;  /* # positions that pass Fwd bias filter */
2144   uint64_t      pos_past_gfwdbias; /* # positions that pass gFwd bias filter*/
2145   uint64_t      pos_past_edefbias; /* # positions that pass dom def bias filter */
2146   uint64_t      n_overflow_fcyk;   /* # hits that couldn't use an HMM banded mx in CYK filter stage */
2147   uint64_t      n_overflow_final;  /* # hits that couldn't use an HMM banded mx in final stage */
2148   uint64_t      n_aln_hb;          /* # HMM banded alignments computed */
2149   uint64_t      n_aln_dccyk;       /* # nonbanded divide and conquer CYK alignments computed */
2150 } CM_PLI_ACCT;
2151 
2152 
2153 /***********************************************************************************
2154  * 38. CM_PIPELINE: the accelerated seq/profile comparison pipeline
2155  ***********************************************************************************/
2156 
2157 enum cm_pipemodes_e     { CM_SEARCH_SEQS = 0, CM_SCAN_MODELS = 1 };
2158 enum cm_newmodelmodes_e { CM_NEWMODEL_MSV = 0, CM_NEWMODEL_CM = 1 };
2159 enum cm_zsetby_e        { CM_ZSETBY_SSIINFO = 0, CM_ZSETBY_SSI_AND_QLENGTH = 1, CM_ZSETBY_FILEREAD = 2, CM_ZSETBY_OPTION = 3, CM_ZSETBY_FILEINFO = 4};
2160 
2161 typedef struct cm_pipeline_s {
2162   /* Dynamic programming matrices                                           */
2163   P7_OMX       *oxf;		/* one-row Forward matrix, accel pipe       */
2164   P7_OMX       *oxb;		/* one-row Backward matrix, accel pipe      */
2165   P7_OMX       *fwd;		/* full Fwd matrix for envelopes            */
2166   P7_OMX       *bck;		/* full Bck matrix for envelopes            */
2167   P7_GMX       *gxf;		/* generic Forward matrix                   */
2168   P7_GMX       *gxb;		/* generic Backward matrix                  */
2169   P7_GMX       *gfwd;		/* generic full Fwd matrix for envelopes    */
2170   P7_GMX       *gbck;		/* generic full Bck matrix for envelopes    */
2171 
2172   enum cm_pipemodes_e mode;    	/* CM_SCAN_MODELS | CM_SEARCH_SEQS           */
2173   ESL_ALPHABET *abc;            /* ptr to alphabet info */
2174   CM_FILE      *cmfp;		/* COPY of open CM database (if scan mode, else NULl) */
2175   char          errbuf[eslERRBUFSIZE];
2176 
2177   /* Model-dependent parameters                                             */
2178   int 		maxW;           /* # residues to overlap in adjacent windows*/
2179   int 		cmW;            /* CM's window length                       */
2180   int 		clen;           /* CM's consensus length of model           */
2181   int64_t       cur_cm_idx;     /* model index currently being used         */
2182   int           cur_clan_idx;   /* clan index currently being used, -1 if none */
2183 
2184   int64_t       cur_seq_idx;    /* sequence index currently being searched */
2185   int64_t       cur_pass_idx;   /* pipeline pass index currently underway */
2186 
2187   /* Accounting. (reduceable in threaded/MPI parallel version)              */
2188   uint64_t      nseqs;	           /* # of sequences searched               */
2189   uint64_t      nmodels;           /* # of models searched, CM mode         */
2190   uint64_t      nnodes;	           /* # of model nodes searched, CM mode    */
2191   uint64_t      nmodels_hmmonly;   /* # of models searched, HMM only mode   */
2192   uint64_t      nnodes_hmmonly;	   /* # of model nodes, HMM only mode       */
2193   CM_PLI_ACCT   acct[NPLI_PASSES];
2194 
2195   /* Domain/envelope postprocessing                                         */
2196   ESL_RANDOMNESS *r;		/* random number generator                  */
2197   int             do_reseeding; /* TRUE: reseed for reproducible results    */
2198   P7_DOMAINDEF   *ddef;		/* domain definition workflow               */
2199 
2200   /* miscellaneous parameters */
2201   float         mxsize_limit;       /* maximum size in Mb allowed for HB alignment           */
2202   int           mxsize_set;         /* TRUE if mxsize_limit was set by user (default: FALSE) */
2203   int           be_verbose;         /* TRUE for verbose reporting mode          */
2204   int           do_top;             /* TRUE to do top    strand (usually TRUE)  */
2205   int           do_bot;             /* TRUE to do bottom strand (usually TRUE)  */
2206   int           show_accessions;    /* TRUE to output accessions not names      */
2207   int           show_alignments;    /* TRUE to compute and output alignments (default)*/
2208   double        maxtau;             /* max tau when tightening bands            */
2209   int           do_wcx;             /* TRUE to set cm->W as cm->clen * wcx      */
2210   float         wcx;                /* set W as cm->clen * wcx, ignoring W from CM file */
2211   int           do_one_cmpass;      /* TRUE to only use CM for best scoring HMM pass if envelope encompasses full sequence */
2212   int           do_one_cmpass_olap; /* TRUE to only use CM for best scoring HMM pass if all envelopes overlap > 50% */
2213   int           do_not_iterate;     /* TRUE to *not* iteratively tighten bands to get alignment that will fit in the matrix */
2214   /* these are all currently hard-coded, in cm_pipeline_Create() */
2215   float         smult;          /* 2.0;  W multiplier for window splitting */
2216   float         wmult;          /* 1.0;  maxW will be max of wmult * cm->W and cmult * cm->clen */
2217   float         cmult;          /* 1.25; maxW will be max of wmult * cm->W and cmult * cm->clen */
2218   float         mlmult;         /* 0.10; om->max_length multiplier for MSV window defn */
2219   /* flags for timing experiments */
2220   int           do_time_F1;      /* TRUE to abort after Stage 1 MSV, for timing expts */
2221   int           do_time_F2;      /* TRUE to abort after Stage 2 Vit, for timing expts */
2222   int           do_time_F3;      /* TRUE to abort after Stage 3 Fwd, for timing expts */
2223   int           do_time_F4;      /* TRUE to abort after Stage 4 glocal Fwd, for timing expts */
2224   int           do_time_F5;      /* TRUE to abort after Stage 5 env def, for timing expts */
2225   int           do_time_F6;      /* TRUE to abort after Stage 6 CYK, for timing expts */
2226   /* flag for terminating after a stage and outputting surviving windows (currently only F3 is possible) */
2227   int           do_trm_F3;       /* TRUE to abort after Stage 3 Fwd and output surviving windows */
2228 
2229   /* Reporting threshold settings                                           */
2230   int     by_E;		        /* TRUE to cut per-target report off by E   */
2231   double  E;	                /* per-target E-value threshold             */
2232   double  T;	                /* per-target bit score threshold           */
2233   int     use_bit_cutoffs;      /* (FALSE | CMH_GA | CMH_TC | CMH_NC)       */
2234 
2235   /* Inclusion threshold settings                                           */
2236   int     inc_by_E;		/* TRUE to threshold inclusion by E-values  */
2237   double  incE;			/* per-target inclusion E-value threshold   */
2238   double  incT;			/* per-target inclusion score threshold     */
2239 
2240   /* Tracking search space sizes for E value calculations                   *
2241    * NOTE: Definition of Z here differs markedly from HMMER, where Z is     *
2242    * number of target sequences (SEARCH) or models (SCAN).                  */
2243   double  Z;   /* database size, defn differs between SEARCH/SCAN mode      *
2244                 * SEARCH: # of residues in target sequence database         *
2245 		* SCAN:   # of models in target database multiplied by the  *
2246 		*         # of residues in current query seq                */
2247   enum cm_zsetby_e Z_setby;   	/* how Z was set                            */
2248 
2249   /* Threshold settings for pipeline                                        */
2250   /* non-default filter strategies */
2251   int     do_max;	        /* TRUE to skip all filters                 */
2252   int     do_nohmm;	        /* TRUE to skip all HMM filters             */
2253   int     do_mid;	        /* TRUE to skip MSV and Viterbi filters     */
2254   int     do_rfam;	        /* TRUE to run in fast, rfam mode           */
2255   /* filter thresholds */
2256   double  F1;		        /* MSV filter threshold                     */
2257   double  F2;		        /* Viterbi filter threshold                 */
2258   double  F3;		        /* uncorrected Forward filter threshold     */
2259   double  F4;		        /* glocal Forward filter thr                */
2260   double  F5;		        /* glocal env def filter thr                */
2261   double  F6;		        /* CYK filter thr                           */
2262   double  F1b;		        /* bias-corrected MSV filter threshold      */
2263   double  F2b;		        /* bias-corrected Viterbi filter threshold  */
2264   double  F3b;		        /* bias-corrected Forward filter threshold  */
2265   double  F4b;		        /* bias-corrected gloc Forward filter threshold */
2266   double  F5b;		        /* bias-corrected env def filter threshold  */
2267   /* on/off parameters for each stage */
2268   int     do_msv;		/* TRUE to filter with MSV, FALSE not to    */
2269   int     do_vit;		/* TRUE to filter with Vit, FALSE not to    */
2270   int     do_fwd;		/* TRUE to filter with Fwd, FALSE not to    */
2271   int     do_gfwd;		/* TRUE to filter w/glocal Fwd, FALSE not to*/
2272   int     do_edef;		/* TRUE to find envelopes in windows prior to CM stages */
2273   int     do_fcyk;	        /* TRUE to filter with CYK, FALSE not to    */
2274   int     do_msvbias;	        /* TRUE to use biased comp HMM filter w/MSV */
2275   int     do_vitbias;      	/* TRUE to use biased comp HMM filter w/Vit */
2276   int     do_fwdbias;     	/* TRUE to use biased comp HMM filter w/Fwd */
2277   int     do_gfwdbias;     	/* TRUE to use biased comp HMM filter w/gFwd*/
2278   int     do_edefbias;     	/* TRUE to use biased comp HMM filter w/edef*/
2279 
2280   /* truncated sequence detection parameters */
2281   int     do_trunc_ends;                /* TRUE to use truncated CM algs at sequence ends */
2282   int     do_trunc_any;                 /* TRUE to use truncated CM algs for entire sequences */
2283   int     do_trunc_only;                /* TRUE to use truncated CM algs for entire sequences */
2284   int     do_trunc_5p_ends;             /* TRUE to use truncated CM algs only at 5' ends (added for RNAVORE, post 1.1.1) */
2285   int     do_trunc_3p_ends;             /* TRUE to use truncated CM algs only at 3' ends (added for RNAVORE, post 1.1.1) */
2286 
2287   /* Parameters controlling p7 domain/envelope defintion */
2288   float  rt1;   	/* controls when regions are called. mocc[i] post prob >= dt1 : triggers a region around i */
2289   float  rt2;		/* controls extent of regions. regions extended until mocc[i]-{b,e}occ[i] < dt2            */
2290   float  rt3;		/* controls when regions are flagged for split: if expected # of E preceding B is >= dt3   */
2291   int    ns;            /* number of traceback samples for domain/envelope def */
2292 
2293   /* CM search options for CYK filter and final stage */
2294   int     do_fcykenv;	          /* TRUE to redefine envelopes after CYK     */
2295   double  F6env;	          /* CYK envelope P-value threshold           */
2296   int     do_null2;		  /* TRUE to use null2 score corrections      */
2297   int     do_null3;		  /* TRUE to use null3 score corrections      */
2298   int     do_glocal_cm_always;    /* TRUE to use glocal mode for CM stages, for all models */
2299   int     do_glocal_cm_cur;       /* TRUE to use glocal mode for CM stages, for current model */
2300   int     do_glocal_cm_sometimes; /* TRUE to use glocal mode for CM stages, for some models */
2301   int     fcyk_cm_search_opts;    /* CYK filter stage search opts             */
2302   int     final_cm_search_opts;   /* final stage search opts                  */
2303   int     fcyk_cm_exp_mode;       /* CYK filter exp mode                      */
2304   int     final_cm_exp_mode;      /* final stage exp mode                     */
2305   double  fcyk_beta;              /* QDB beta for CYK filter stage            */
2306   double  final_beta;             /* QDB beta for final stage                 */
2307   double  fcyk_tau;               /* HMM bands tau for CYK filter stage       */
2308   double  final_tau;              /* HMM bands tau for final stage            */
2309 
2310   /* Threshold settings for HMM-only pipeline                               */
2311   int     do_hmmonly_cur;	/* TRUE to only use filter HMM for current model */
2312   int     do_hmmonly_always;	/* TRUE to only use filter HMM for all models */
2313   int     do_hmmonly_never;	/* TRUE to never only use filter HMM for any model */
2314   int     do_max_hmmonly;       /* TRUE to skip all filters in HMM only mode  */
2315   /* filter thresholds, HMM only mode */
2316   double  F1_hmmonly;	        /* MSV filter threshold, HMM only mode      */
2317   double  F2_hmmonly;	        /* Viterbi filter threshold, HMM only mode  */
2318   double  F3_hmmonly;	        /* Forward filter threshold, HMM only mode  */
2319   /* on/off parameters, HMM only mode */
2320   int     do_bias_hmmonly;      /* TRUE to use bias filter, HMM only mode   */
2321   int     do_null2_hmmonly;     /* TRUE to use null2, HMM only mode         */
2322 
2323   /* configure/alignment options for all CMs we'll use in the pipeline */
2324   int     cm_config_opts;
2325   int     cm_align_opts;
2326 
2327 } CM_PIPELINE;
2328 
2329 /* constants used in cm_pipeline for tallying up number of residues surviving each stage */
2330 #define p7_SURV_F1  0
2331 #define p7_SURV_F1b 1
2332 #define p7_SURV_F2  2
2333 #define p7_SURV_F2b 3
2334 #define p7_SURV_F3  4
2335 #define p7_SURV_F3b 5
2336 #define Np7_SURV    6
2337 
2338 
2339 /***********************************************************************************
2340  * 39. CM_ALIDISPLAY: an alignment formatted for printing (replaces FancyAli_t)
2341  ***********************************************************************************/
2342 
2343 /* Structure: CM_ALIDISPLAY
2344  *
2345  * Alignment of a sequence to a CM, formatted for printing.
2346  * Based on HMMER's P7_ALIDISPLAY.
2347  *
2348  * For an alignment of L residues and names C chars long, requires
2349  * 9L + 2C + 50 bytes (or so); for typical case of L=100,C=10, that's
2350  * about 1 Kb.
2351  */
2352 typedef struct cm_alidisplay_s {
2353   char *rfline;                 /* reference coord info; or NULL        */
2354   char *ncline;                 /* negative scoring noncanonical bps    */
2355   char *csline;                 /* consensus structure info             */
2356   char *model;                  /* aligned query consensus sequence     */
2357   char *mline;                  /* "identities", conservation +'s, etc. */
2358   char *aseq;                   /* aligned target sequence              */
2359   char *ppline;			/* posterior prob annotation; or NULL   */
2360   int   N;			/* length of strings                    */
2361 
2362   char *aseq_el;                /* aligned sequence, including EL emissions */
2363   char *rfline_el;              /* reference annotation for aligned sequence w/EL */
2364   char *ppline_el;              /* posterior probs, including EL emissions */
2365   int   N_el;                   /* length of aseq_el, ppline_el         */
2366 
2367   char *cmname;	    	        /* name of CM                           */
2368   char *cmacc;			/* accession of CM; or [0]='\0'         */
2369   char *cmdesc;		        /* description of CM; or [0]='\0'       */
2370   int   cfrom_emit;             /* min consensus pos, start posn in CM  */
2371   int   cto_emit;		/* max consensus pos, end posn in CM    */
2372   int   cfrom_span;             /* min cons pos in predicted non-truncated hit, == cfrom_emit unless hit is 5' truncated */
2373   int   cto_span;               /* max cons pos in predicted non-truncated hit, == cto_emit   unless hit is 3' truncated */
2374   int   clen;			/* consensus length of model            */
2375 
2376   char *sqname;			/* name of target sequence              */
2377   char *sqacc;			/* accession of target seq; or [0]='\0' */
2378   char *sqdesc;			/* description of targ seq; or [0]='\0' */
2379   long  sqfrom;			/* min bound in scoord, start posn in seq (1..L) */
2380   long  sqto;		        /* max bound in scoord, end posn in seq   (1..L) */
2381 
2382   float  sc;		        /* alignment score */
2383   float  avgpp;		        /* average PP of all aligned residues, 0.0 if no PPs available */
2384   float  gc;                    /* GC content of all aligned residues [0..1] */
2385   double tau;                   /* tau used to calc HMM bands, -1.0 if HMM bands not used */
2386   float  matrix_Mb;             /* size of DP matrix used in Mb, either HMM banded CYK/OA or D&C CYK */
2387   double elapsed_secs;          /* number of seconds required for alignment */
2388 
2389   int    hmmonly;               /* TRUE if this CM_ALIDISPLAY was
2390 				 * converted from a P7_ALIDISPLAY
2391 				 * during an HMM only pipeline run.
2392 				 */
2393 
2394   int   memsize;                /* size of allocated block of char memory */
2395   char *mem;		        /* memory used for the char data above  */
2396 } CM_ALIDISPLAY;
2397 
2398 
2399 /***********************************************************************************
2400  * 40. CM_HIT: a hit between a CM and a sequence
2401  ***********************************************************************************/
2402 
2403 #define CM_HIT_FLAGS_DEFAULT 0
2404 #define CM_HIT_IS_INCLUDED            (1<<0)
2405 #define CM_HIT_IS_REPORTED            (1<<1)
2406 #define CM_HIT_IS_REMOVED_DUPLICATE   (1<<2)
2407 #define CM_HIT_IS_MARKED_OVERLAP      (1<<3)
2408 
2409 /* Structure: CM_HIT
2410  *
2411  * Info about a high-scoring database hit, kept so we can output a
2412  * sorted list of high hits at the end.
2413  *
2414  * <start> and <stop> are the coordinates that will be shown
2415  * in the results, not coords in arrays... therefore, reverse
2416  * complements have <start> > <stop>. To handle the rare
2417  * case that we have a hit that spans a single residue,
2418  * <in_rc> is TRUE if hit is on reverse complement, FALSE
2419  * if not.
2420  */
2421 typedef struct cm_hit_s {
2422   char          *name;		/* name of the target               (mandatory)           */
2423   char          *acc;		/* accession of the target          (optional; else NULL) */
2424   char          *desc;		/* description of the target        (optional; else NULL) */
2425   int64_t        cm_idx;        /* model    index in the cmfile,  unique id for the model */
2426   int            clan_idx;      /* clan     index in the cmfile,  -1 if none (usually -1) */
2427   int64_t        seq_idx;       /* sequence index in the seqfile, unique id for the sequence */
2428   int            pass_idx;      /* index of pipeline pass hit was found on */
2429   int64_t        hit_idx;       /* index of this hit in the hit list */
2430   int64_t        srcL;          /* full length of source sequence the hit is from */
2431   int64_t        start, stop;   /* start/end points of hit */
2432   int            in_rc;         /* TRUE if hit is in reverse complement of a target, FALSE if not */
2433   int            root;          /* internal state entry point, != 0 if hit involves a local begin */
2434   int            mode;          /* joint or marginal hit mode: CM_MODE_J | CM_MODE_R | CM_MODE_L | CM_MODE_T */
2435   float          score;		/* bit score of the hit (with corrections) */
2436   float          bias;          /* null{2,3} (2 if hmmonly, 3 if not) correction, in bits (already subtracted from score) */
2437   double         pvalue;	/* P-value of the hit (with corrections) */
2438   double         evalue;	/* E-value of the hit (with corrections) */
2439   int            has_evalue;	/* TRUE if E-value has been set for this hit */
2440   int            hmmonly;       /* TRUE if hit was found during HMM only pipeline run, FALSE if not */
2441   int            glocal;        /* TRUE if hit was found by model in global configuration, FALSE if not */
2442   CM_ALIDISPLAY *ad;            /* alignment display */
2443   uint32_t       flags;         /* CM_HIT_IS_REPORTED | CM_HIT_IS_INCLUDED | CM_HIT_IS_REMOVED_DUPLICATE | CM_HIT_IS_MARKED_OVERLAP */
2444   /* overlap information */
2445   int64_t        any_oidx;      /* hit index of best scoring (E-value, then bit score) hit that has
2446                                  * better score than this hit and overlaps with it */
2447   int64_t        win_oidx;      /* hit index of best scoring (E-value, then bit score) hit that has
2448                                  * better score than this AND has no CM_HIT_IS_MARKED_OVERLAP flag raised */
2449   double         any_bitE;      /* score of hit <any_oidx>, E-value if 'has_evalue' is TRUE, else bit score */
2450   double         win_bitE;      /* score of hit <win_oidx>, E-value if 'has_evalue' is TRUE, else bit score */
2451 
2452 } CM_HIT;
2453 
2454 
2455 /***********************************************************************************
2456  * 41. CM_TOPHITS: ranked list of top-scoring hits
2457  ***********************************************************************************/
2458 
2459 /* Structure: CM_TOPHITS: Collection of hits that can be sorted by
2460  * position or by score. This allows many hits to be merged when we
2461  * prepare to output results. "hit" list is NULL and unavailable until
2462  * after we do a sort.
2463  */
2464 typedef struct cm_tophits_s {
2465   CM_HIT **hit;                           /* sorted pointer array                     */
2466   CM_HIT  *unsrt;	                  /* unsorted data storage                    */
2467   uint64_t Nalloc;	                  /* current allocation size                  */
2468   uint64_t N;	  	                  /* number of hits in list now               */
2469   uint64_t nreported;                     /* number of hits that are reportable       */
2470   uint64_t nincluded;	                  /* number of hits that are includable       */
2471   int      is_sorted_by_evalue;           /* TRUE when hits are sorted by E-value, score, length, th->hit valid for all N hits */
2472   int      is_sorted_for_overlap_removal; /* TRUE when hits are sorted by cm_idx, seq_idx, strand, score, th->hit valid for all N hits */
2473   int      is_sorted_for_overlap_markup;  /* TRUE when hits are sorted by seq_idx, strand, score, th->hit valid for all N hits */
2474   int      is_sorted_by_position;         /* TRUE when hits are sorted by cm_idx, seq_idx, strand, first residue
2475 					   * (start if ! in_rc, stop if in_rc), th->hit valid for all N hits
2476 					   */
2477 } CM_TOPHITS;
2478 
2479 
2480 /***********************************************************************************
2481  * 42. CM_P7_OM_BLOCK: block of P7_OPROFILEs and related info, for cmscan.
2482  ***********************************************************************************/
2483 
2484 typedef struct {
2485   int            count;       /* number of <P7_OPROFILE> objects in the block (and cm_offsetA, cm_clenA, cm_WA, gfmuA, gflambdaA) */
2486   int64_t        idx0;        /* index of first profile in file >= 0          */
2487   int            listSize;    /* maximum number elements in the list          */
2488   P7_OPROFILE  **list;        /* array of <P7_OPROFILE> objects               */
2489   P7_SCOREDATA **msvdataA;    /* array of <P7_SCOREDATA> objects              */
2490   off_t         *cm_offsetA;  /* file offsets for CMs */
2491   int           *cm_clenA;    /* consensus length of CMs */
2492   int           *cm_WA;       /* window length of CMs */
2493   int           *cm_nbpA;     /* number of basepairs in CMs */
2494   float         *gfmuA;       /* glocal forward mu parameter for HMM */
2495   float         *gflambdaA;   /* glocal forward lambda parameter for HMM */
2496   int           *clan_idxA;   /* clan index for profile, or -1 for none */
2497 } CM_P7_OM_BLOCK;
2498 
2499 
2500 /***********************************************************************************
2501  * 43. CM_ALNDATA: information for alignment of a sequence to a CM.
2502  ***********************************************************************************/
2503 
2504 /* Structure CM_ALNDATA: Per-sequence information relevant to the
2505  * collection and output of an alignment of one sequence to a CM.
2506  * Used primarily in cmalign, but also used in cmbuild if the input
2507  * alignment refinement is used (--refine).
2508  */
2509 typedef struct {
2510   ESL_SQ           *sq;         /* sequence, often just a reference - not to be free'd */
2511   int64_t           idx;        /* index, for ordering sequences properly in output aln */
2512   float             sc;         /* alignment score for this sequence (CYK or Inside) */
2513   float             pp;         /* average posterior probability for this sequence */
2514   Parsetree_t      *tr;         /* Parsetree for this sequence */
2515   char             *ppstr;      /* posterior probability string for this sequence */
2516   int               spos;       /* first consensus pos of the CM that emits in tr */
2517   int               epos;       /* final consensus pos of the CM that emits in tr */
2518   float             secs_bands; /* seconds elapsed during band calculation */
2519   float             secs_aln;   /* seconds elapsed during alignment calculation */
2520   float             secs_tot;   /* seconds elapsed for entire processing of this sequence */
2521   float             mb_tot;     /* total Mb required for all DP matrices for alignment */
2522   double            tau;        /* tau used for HMM band calculation, -1 if no hmm bands */
2523   /* thresh1 and thresh2 are only relevant if alignment is truncated */
2524   float             thresh1;    /* cp9b->thresh1 used for HMM band calculation */
2525   float             thresh2;    /* cp9b->thresh2 used for HMM band calculation */
2526 } CM_ALNDATA;
2527 
2528 /*****************************************************************
2529  * 45. Routines in Infernal's exposed API.
2530  *****************************************************************/
2531 
2532 /* from cm.c */
2533 extern CM_t *CreateCM(int nnodes, int nstates, int clen, const ESL_ALPHABET *abc);
2534 extern CM_t *CreateCMShell(void);
2535 extern void  CreateCMBody(CM_t *cm, int nnodes, int nstates, int clen, const ESL_ALPHABET *abc);
2536 extern void  CMZero(CM_t *cm);
2537 extern void  CMRenormalize(CM_t *cm);
2538 extern void  FreeCM(CM_t *cm);
2539 extern void  CMSimpleProbify(CM_t *cm);
2540 extern int   rsearch_CMProbifyEmissions(CM_t *cm, fullmat_t *fullmat);
2541 extern int   CMLogoddsify(CM_t *cm);
2542 extern int   CMCountStatetype(CM_t *cm, char type);
2543 extern int   CMCountNodetype(CM_t *cm, char type);
2544 extern int   CMSegmentCountStatetype(CM_t *cm, int r, int z, char type);
2545 extern int   CMSubtreeCountStatetype(CM_t *cm, int v, char type);
2546 extern int   CMSubtreeCountNodetype(CM_t *cm, int v, char type);
2547 extern int   CMSubtreeFindEnd(CM_t *cm, int v);
2548 extern int   CalculateStateIndex(CM_t *cm, int node, char utype);
2549 extern int   TotalStatesInNode(int ndtype);
2550 extern int   SplitStatesInNode(int ndtype);
2551 extern int   InsertStatesInNode(int ndtype);
2552 extern int   StateDelta(int sttype);
2553 extern int   StateLeftDelta(int sttype);
2554 extern int   StateRightDelta(int sttype);
2555 extern int   Emitmode(int sttype);
2556 extern int   NumReachableInserts(int stid);
2557 extern void  PrintCM(FILE *fp, CM_t *cm);
2558 extern void  SummarizeCM(FILE *fp, CM_t *cm);
2559 extern char *Statetype(int type);
2560 extern int   StateCode(char *s);
2561 extern char *Nodetype(int type);
2562 extern int   NodeCode(char *s);
2563 extern char *UniqueStatetype(int type);
2564 extern int   UniqueStateCode(char *s);
2565 extern int   DeriveUniqueStateCode(int ndtype, int sttype);
2566 extern int   StateMapsLeft(char st);
2567 extern int   StateMapsRight(char st);
2568 extern int   StateMapsMatch(char st);
2569 extern int   StateMapsInsert(char st);
2570 extern int   StateMapsDelete(char st);
2571 extern int   NodeMapsLeft(char ndtype);
2572 extern int   NodeMapsRight(char ndtype);
2573 extern int   StateIsDetached(CM_t *cm, int v);
2574 extern int   CMRebalance(CM_t *cm, char *errbuf, CM_t **ret_new_cm);
2575 extern int **IMX2Alloc(int rows, int cols);
2576 extern void  IMX2Free(int **mx);
2577 extern float rsearch_calculate_gap_penalty (char from_state, char to_state, int from_node, int to_node, float input_alpha, float input_beta, float input_alphap, float input_betap);
2578 extern int   cm_Exponentiate(CM_t *cm, double z);
2579 extern int   cm_p7_Exponentiate(P7_HMM *hmm, double z);
2580 extern void  cm_banner(FILE *fp, char *progname, char *banner);
2581 extern void  cm_CalcExpSc(CM_t *cm, float **ret_expsc, float **ret_expsc_noss);
2582 extern int   cm_Validate(CM_t *cm, float tol, char *errbuf);
2583 extern char *CMStatetype(char st);
2584 extern char *CMNodetype(char nd);
2585 extern char *CMStateid(char st);
2586 extern char *MarginalMode(char mode);
2587 extern int   ModeEmitsLeft(char mode);
2588 extern int   ModeEmitsRight(char mode);
2589 extern int   cm_SetName(CM_t *cm, char *name);
2590 extern int   cm_SetAccession(CM_t *cm, char *acc);
2591 extern int   cm_SetDescription(CM_t *cm, char *desc);
2592 extern int   cm_SetConsensus(CM_t *cm, CMConsensus_t *cons, ESL_SQ *sq);
2593 extern int   cm_AppendComlog(CM_t *cm, int argc, char **argv, int add_seed, uint32_t seed);
2594 extern int   cm_SetCtime(CM_t *cm);
2595 extern int   DefaultNullModel(const ESL_ALPHABET *abc, float **ret_null);
2596 extern int   CMAllocNullModel(CM_t *cm);
2597 extern void  CMSetNullModel(CM_t *cm, float *null);
2598 extern int   CMReadNullModel(const ESL_ALPHABET *abc, char *nullfile, float **ret_null);
2599 extern int   IntMaxDigits();
2600 extern int   IntDigits(int i);
2601 extern int        cm_GetAvgHitLen(CM_t *cm, char *errbuf, float *ret_avgL_loc, float *ret_avgL_glb);
2602 extern int        CompareCMGuideTrees(CM_t *cm1, CM_t *cm2);
2603 extern void       DumpCMFlags(FILE *fp, CM_t *cm);
2604 extern ESL_GETOPTS *cm_CreateDefaultApp(ESL_OPTIONS *options, int nargs, int argc, char **argv, char *banner, char *usage);
2605 extern CM_P7_OM_BLOCK *cm_p7_oprofile_CreateBlock(int size);
2606 extern void            cm_p7_oprofile_DestroyBlock(CM_P7_OM_BLOCK *block);
2607 extern float **FCalcOptimizedEmitScores      (CM_t *cm);
2608 extern int   **ICalcOptimizedEmitScores      (CM_t *cm);
2609 extern int   **ICopyOptimizedEmitScoresFromFloats(CM_t *cm, float **oesc);
2610 extern int     CloneOptimizedEmitScores      (const CM_t *src, CM_t *dest, char *errbuf);
2611 extern void    DumpOptimizedEmitScores       (CM_t *cm, FILE *fp);
2612 extern void    FreeOptimizedEmitScores       (float **fesc_vAA, int **iesc_vAA, int M);
2613 extern float **FCalcInitDPScores             (CM_t *cm);
2614 extern int   **ICalcInitDPScores             (CM_t *cm);
2615 extern int     cm_nonconfigured_Verify(CM_t *cm, char *errbuf);
2616 extern int     cm_Clone(CM_t *cm, char *errbuf, CM_t **ret_cm);
2617 extern float   cm_Sizeof(CM_t *cm);
2618 extern int     Prob2Score(float p, float null);
2619 extern float   Score2Prob(int sc, float null);
2620 extern float   Scorify(int sc);
2621 extern double *cm_ExpectedStateOccupancy(CM_t *cm);
2622 extern int     cm_ExpectedPositionOccupancy(CM_t *cm, float **ret_mexpocc, float **ret_iexpocc, double **opt_psi, int **opt_m2v_1, int **opt_m2v_2, int **opt_i2v);
2623 extern char ***cm_CreateTransitionMap();
2624 extern void    cm_FreeTransitionMap(char ***tmap);
2625 extern void    InsertsGivenNodeIndex(CM_t *cm, int nd, int *ret_i1, int *ret_2);
2626 extern int     cm_Guidetree(CM_t *cm, char *errbuf, ESL_MSA *msa, Parsetree_t **ret_gtr);
2627 
2628 /* cm_alidisplay.c */
2629 extern int            cm_alidisplay_Create(CM_t *cm, char *errbuf, CM_ALNDATA *adata, const ESL_SQ *sq, int64_t seqoffset,
2630 					   double tau, double elapsed_secs, CM_ALIDISPLAY **ret_ad);
2631 extern int            cm_alidisplay_CreateFromP7(CM_t *cm, char *errbuf, const ESL_SQ *sq, int64_t seqoffset, float p7sc, float p7pp, P7_ALIDISPLAY *p7ad, CM_ALIDISPLAY **ret_ad);
2632 extern CM_ALIDISPLAY *cm_alidisplay_Clone(const CM_ALIDISPLAY *ad);
2633 extern size_t         cm_alidisplay_Sizeof(const CM_ALIDISPLAY *ad);
2634 extern void           cm_alidisplay_Destroy(CM_ALIDISPLAY *ad);
2635 extern char           cm_alidisplay_EncodePostProb(float p);
2636 extern float          cm_alidisplay_DecodePostProb(char pc);
2637 extern int            cm_alidisplay_Print(FILE *fp, CM_ALIDISPLAY *ad, int min_aliwidth, int linewidth, int show_accessions);
2638 extern int            cm_alidisplay_Is5PTrunc     (const CM_ALIDISPLAY *ad);
2639 extern int            cm_alidisplay_Is3PTrunc     (const CM_ALIDISPLAY *ad);
2640 extern int            cm_alidisplay_Is5PAnd3PTrunc(const CM_ALIDISPLAY *ad);
2641 extern int            cm_alidisplay_Is5PTruncOnly (const CM_ALIDISPLAY *ad);
2642 extern int            cm_alidisplay_Is3PTruncOnly (const CM_ALIDISPLAY *ad);
2643 extern char          *cm_alidisplay_TruncString   (const CM_ALIDISPLAY *ad);
2644 extern int            cm_alidisplay_Backconvert(CM_t *cm, const CM_ALIDISPLAY *ad, char *errbuf, ESL_SQ **ret_sq, Parsetree_t **ret_tr, char **ret_pp);
2645 extern int            cm_alidisplay_Dump(FILE *fp, const CM_ALIDISPLAY *ad);
2646 extern int            cm_alidisplay_Compare(const CM_ALIDISPLAY *ad1, const CM_ALIDISPLAY *ad2);
2647 
2648 /* from cm_alndata.c */
2649 CM_ALNDATA * cm_alndata_Create(void);
2650 void         cm_alndata_Destroy(CM_ALNDATA *data, int free_sq);
2651 int          DispatchSqBlockAlignment(CM_t *cm, char *errbuf, ESL_SQ_BLOCK *sq_block, float mxsize, ESL_STOPWATCH *w, ESL_STOPWATCH *w_tot, ESL_RANDOMNESS *r, CM_ALNDATA ***ret_dataA);
2652 int          DispatchSqAlignment     (CM_t *cm, char *errbuf, ESL_SQ *sq, int64_t idx, float mxsize, char mode, int pass_idx,
2653 				      int cp9b_valid, ESL_STOPWATCH *w, ESL_STOPWATCH *w_tot, ESL_RANDOMNESS *r, CM_ALNDATA **ret_data);
2654 
2655 /* from cm_dpalign.c */
2656 extern int   cm_AlignSizeNeeded   (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb);
2657 extern int   cm_AlignSizeNeededHB (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb);
2658 extern int   cm_Align             (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, int do_sample, CM_MX *mx,    CM_SHADOW_MX    *shmx, CM_MX    *post_mx, CM_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, float *ret_avgpp, float *ret_sc);
2659 extern int   cm_AlignHB           (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, int do_sample, CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_MX *post_mx, CM_HB_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, float *ret_avgpp, float *ret_sc);
2660 extern int   cm_CYKInsideAlign    (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,               CM_MX    *mx, CM_SHADOW_MX    *shmx, int *ret_b, float *ret_sc);
2661 extern int   cm_CYKInsideAlignHB  (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,               CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, int *ret_b, float *ret_sc);
2662 extern int   cm_InsideAlign       (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,               CM_MX    *mx, float *ret_sc);
2663 extern int   cm_InsideAlignHB     (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,               CM_HB_MX *mx, float *ret_sc);
2664 extern int   cm_OptAccAlign       (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,               CM_MX    *mx, CM_SHADOW_MX    *shmx, CM_EMIT_MX *emit_mx,    int *ret_b, float *ret_pp);
2665 extern int   cm_OptAccAlignHB     (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit,               CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_EMIT_MX *emit_mx, int *ret_b, float *ret_pp);
2666 extern int   cm_CYKOutsideAlign   (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_MX    *mx, CM_MX *inscyk_mx, float *ret_sc);
2667 extern int   cm_CYKOutsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_HB_MX *mx, CM_HB_MX *ins_mx, float *ret_sc);
2668 extern int   cm_OutsideAlign      (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_MX    *mx, CM_MX    *ins_mx, float *ret_sc);
2669 extern int   cm_OutsideAlignHB    (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_HB_MX *mx, CM_HB_MX *ins_mx, float *ret_sc);
2670 extern int   cm_Posterior         (CM_t *cm, char *errbuf,               int L, float size_limit,               CM_MX    *ins_mx, CM_MX    *out_mx, CM_MX    *post_mx);
2671 extern int   cm_PosteriorHB       (CM_t *cm, char *errbuf,               int L, float size_limit,               CM_HB_MX *ins_mx, CM_HB_MX *out_mx, CM_HB_MX *post_mx);
2672 extern int   cm_EmitterPosterior  (CM_t *cm, char *errbuf,               int L, float size_limit, CM_MX    *post, CM_EMIT_MX    *emit_mx, int do_check);
2673 extern int   cm_EmitterPosteriorHB(CM_t *cm, char *errbuf,               int L, float size_limit, CM_HB_MX *post, CM_HB_EMIT_MX *emit_mx, int do_check);
2674 extern int   cm_PostCode  (CM_t *cm, char *errbuf, int L, CM_EMIT_MX    *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp);
2675 extern int   cm_PostCodeHB(CM_t *cm, char *errbuf, int L, CM_HB_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp);
2676 extern int   cm_InitializeOptAccShadowDZero  (CM_t *cm, char *errbuf, char ***yshadow, int L);
2677 extern int   cm_InitializeOptAccShadowDZeroHB(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, char ***yshadow, int L);
2678 extern float FScore2Prob(float sc, float null);
2679 extern char  Fscore2postcode(float sc);
2680 
2681 /* from cm_dpalign_trunc.c */
2682 extern int  cm_TrAlignSizeNeeded    (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb);
2683 extern int  cm_TrAlignSizeNeededHB  (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb);
2684 
2685 extern int  cm_TrAlign              (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_optacc, int do_sample, CM_TR_MX    *mx, CM_TR_SHADOW_MX    *shmx, CM_TR_MX    *post_mx, CM_TR_EMIT_MX    *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, char *ret_mode, float *ret_avgpp, float *ret_sc);
2686 extern int  cm_TrAlignHB            (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_optacc, int do_sample, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_MX *post_mx, CM_TR_HB_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, char *ret_mode, float *ret_avgpp, float *ret_sc);
2687 extern int  cm_TrCYKInsideAlign     (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_MX    *mx, CM_TR_SHADOW_MX    *shmx, int *ret_b, char *ret_mode, float *ret_sc);
2688 extern int  cm_TrCYKInsideAlignHB   (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, int *ret_b, char *ret_mode, float *ret_sc);
2689 extern int  cm_TrInsideAlign        (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_MX    *mx, char *ret_mode, float *ret_sc);
2690 extern int  cm_TrInsideAlignHB      (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, char *ret_mode, float *ret_sc);
2691 extern int  cm_TrOptAccAlign        (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_MX    *mx, CM_TR_SHADOW_MX    *shmx, CM_TR_EMIT_MX    *emit_mx, int *ret_b, float *ret_pp);
2692 extern int  cm_TrOptAccAlignHB      (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_EMIT_MX *emit_mx, int *ret_b, float *ret_pp);
2693 extern int  cm_TrCYKOutsideAlign    (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_MX    *mx, CM_TR_MX    *inscyk_mx);
2694 extern int  cm_TrCYKOutsideAlignHB  (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_HB_MX *mx, CM_TR_HB_MX *inscyk_mx);
2695 extern int  cm_TrOutsideAlign       (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_MX    *mx, CM_TR_MX    *ins_mx);
2696 extern int  cm_TrOutsideAlignHB     (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_HB_MX *mx, CM_TR_HB_MX *ins_mx);
2697 
2698 extern int  cm_TrPosterior          (CM_t *cm, char *errbuf,               int L, float size_limit, char preset_mode, CM_TR_MX    *ins_mx, CM_TR_MX    *out_mx, CM_TR_MX    *post_mx);
2699 extern int  cm_TrPosteriorHB        (CM_t *cm, char *errbuf,               int L, float size_limit, char preset_mode, CM_TR_HB_MX *ins_mx, CM_TR_HB_MX *out_mx, CM_TR_HB_MX *post_mx);
2700 extern int  cm_TrEmitterPosterior   (CM_t *cm, char *errbuf,               int L, float size_limit, char preset_mode, int do_check, CM_TR_MX    *post, CM_TR_EMIT_MX    *emit_mx);
2701 extern int  cm_TrEmitterPosteriorHB (CM_t *cm, char *errbuf,               int L, float size_limit, char preset_mode, int do_check, CM_TR_HB_MX *post, CM_TR_HB_EMIT_MX *emit_mx);
2702 extern int  cm_TrPostCode           (CM_t *cm, char *errbuf,               int L, CM_TR_EMIT_MX    *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp);
2703 extern int  cm_TrPostCodeHB         (CM_t *cm, char *errbuf,               int L, CM_TR_HB_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp);
2704 extern int  cm_TrFillFromMode       (char mode, int *ret_fill_L, int *ret_fill_R, int *ret_fill_T);
2705 
2706 /* from cm_dpsearch.c */
2707 extern int  FastCYKScan      (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc);
2708 extern int  RefCYKScan       (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc);
2709 extern int  FastIInsideScan  (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc);
2710 extern int  RefIInsideScan   (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc);
2711 extern int  FastFInsideScan  (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc);
2712 extern int  RefFInsideScan   (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc);
2713 extern int  FastCYKScanHB    (CM_t *cm, char *errbuf, CM_HB_MX   *mx,  float size_limit, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float *ret_sc);
2714 extern int  FastFInsideScanHB(CM_t *cm, char *errbuf, CM_HB_MX   *mx,  float size_limit, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float *ret_sc);
2715 extern int  cm_CountSearchDPCalcs(CM_t *cm, char *errbuf, int L, int *dmin, int *dmax, int W, int correct_for_first_W, float **ret_vcalcs, float *ret_calcs);
2716 extern int  DetermineSeqChunksize(int nproc, int L, int W);
2717 
2718 /* from cm_dpsearch_trunc.c */
2719 extern int  RefTrCYKScan    (CM_t *cm, char *errbuf, CM_TR_SCAN_MX *trsmx, int qdbidx, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist,
2720 			     int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, char *ret_mode, float *ret_sc);
2721 extern int  RefITrInsideScan(CM_t *cm, char *errbuf, CM_TR_SCAN_MX *trsmx, int qdbidx, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist,
2722 			     int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, char *ret_mode, float *ret_sc);
2723 extern int  TrCYKScanHB(CM_t *cm, char *errbuf, CM_TR_HB_MX *mx, float size_limit, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist,
2724 			int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, char *ret_mode, float *ret_sc);
2725 extern int  FTrInsideScanHB(CM_t *cm, char *errbuf, CM_TR_HB_MX *mx, float size_limit, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist,
2726 			    int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, char *ret_mode, float *ret_sc);
2727 extern int  cm_TrFillFromPassIdx(int pass_idx, int *ret_fill_L, int *ret_fill_R, int *ret_fill_T);
2728 
2729 /* from cm_dpsmall.c */
2730 extern float CYKDivideAndConquer(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, Parsetree_t **ret_tr, int *dmin, int *dmax);
2731 extern float CYKInside(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, Parsetree_t **ret_tr, int *dmin, int *dmax);
2732 extern float CYKInsideScore(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int *dmin, int *dmax);
2733 extern float CYKDemands(CM_t *cm, int L, int *dmin, int *dmax, int be_quiet);
2734 extern float CYKNonQDBSmallMbNeeded(CM_t *cm, int L);
2735 extern void  debug_print_bands(FILE *fp, CM_t *cm, int *dmin, int *dmax);
2736 /* cm_dpsmall.c: size calculators - not normally part of external API, but truncyk.c currently uses them */
2737 extern float insideT_size(CM_t *cm, int L, int r, int z, int i0, int j0);
2738 extern float vinsideT_size(CM_t *cm, int r, int z, int i0, int i1, int j1, int j0);
2739 /* cm_dpsmall.c: the memory management routines. */
2740 extern struct  deckpool_s *deckpool_create(void);
2741 extern void    deckpool_push(struct deckpool_s *dpool, float **deck);
2742 extern int     deckpool_pop(struct deckpool_s *d, float ***ret_deck);
2743 extern void    deckpool_free(struct deckpool_s *d);
2744 extern float **alloc_vjd_deck(int L, int i, int j);
2745 extern float   size_vjd_deck(int L, int i, int j);
2746 extern void    free_vjd_deck(float **a, int i, int j);
2747 extern void    free_vjd_matrix(float ***a, int M, int i, int j);
2748 extern char  **alloc_vjd_yshadow_deck(int L, int i, int j);
2749 extern float   size_vjd_yshadow_deck(int L, int i, int j);
2750 extern void    free_vjd_yshadow_deck(char **a, int i, int j);
2751 extern int   **alloc_vjd_kshadow_deck(int L, int i, int j);
2752 extern float   size_vjd_kshadow_deck(int L, int i, int j);
2753 extern void    free_vjd_kshadow_deck(int **a, int i, int j);
2754 extern void    free_vjd_shadow_matrix(void ***shadow, CM_t *cm, int i, int j);
2755 extern float **alloc_vji_deck(int i0, int i1, int j1, int j0);
2756 extern float   size_vji_deck(int i0, int i1, int j1, int j0);
2757 extern void    free_vji_deck(float **a, int j1, int j0);
2758 extern void    free_vji_matrix(float ***a, int M, int j1, int j0);
2759 extern char  **alloc_vji_shadow_deck(int i0, int i1, int j1, int j0);
2760 extern float   size_vji_shadow_deck(int i0, int i1, int j1, int j0);
2761 extern void    free_vji_shadow_deck(char **a, int j1, int j0);
2762 extern void    free_vji_shadow_matrix(char ***a, int M, int j1, int j0);
2763 
2764 extern float **alloc_banded_vjd_deck(int L, int i, int j, int min, int max);
2765 extern char  **alloc_banded_vjd_yshadow_deck(int L, int i, int j, int min, int max);
2766 extern int   **alloc_banded_vjd_kshadow_deck(int L, int i, int j, int min, int max);
2767 
2768 extern void debug_print_alpha(float ***alpha, CM_t *cm, int L);
2769 extern void debug_print_alpha_banded(float ***alpha, CM_t *cm, int L, int *dmin, int *dmax);
2770 extern void debug_print_alpha_deck(int v, float **deck, CM_t *cm, int L);
2771 extern void debug_print_shadow(void ***shadow, CM_t *cm, int L);
2772 extern void debug_print_shadow_banded(void ***shadow, CM_t *cm, int L, int *dmin, int *dmax);
2773 extern void debug_print_shadow_banded_deck(int v, void ***shadow, CM_t *cm, int L, int *dmin, int *dmax);
2774 
2775 /* from cm_file.c */
2776 extern int     cm_file_Open(char *filename, char *env, int allow_1p0, CM_FILE **ret_cmfp, char *errbuf);
2777 extern int     cm_file_OpenNoDB(char *filename, char *env, int allow_1p0, CM_FILE **ret_cmfp, char *errbuf);
2778 extern int     cm_file_OpenBuffer(char *buffer, int size, int allow_1p0, CM_FILE **ret_cmfp);
2779 extern void    cm_file_Close(CM_FILE *cmfp);
2780 extern int     cm_file_CreateLock(CM_FILE *cmfp);
2781 extern int     cm_file_WriteASCII(FILE *fp, int format, CM_t *cm);
2782 extern int     cm_file_WriteBinary(FILE *fp, int format, CM_t *cm, off_t *opt_fp7_offset);
2783 extern int     cm_file_Read(CM_FILE *cmfp, int read_fp7, ESL_ALPHABET **ret_abc, CM_t **opt_cm);
2784 extern int     cm_file_PositionByKey(CM_FILE *cmfp, const char *key);
2785 extern int     cm_file_Position(CM_FILE *cmfp, const off_t offset);
2786 extern int     cm_p7_hmmfile_Read(CM_FILE *cmfp, ESL_ALPHABET *abc, off_t offset, P7_HMM **ret_hmm);
2787 extern int     cm_p7_oprofile_Write(FILE *ffp, FILE *pfp, off_t cm_offset, int cm_len, int cm_W, int cm_nbp, float gfmu, float gflambda, P7_OPROFILE *om);
2788 extern int     cm_p7_oprofile_ReadMSV(CM_FILE *cmfp, int read_scores, ESL_ALPHABET **byp_abc, off_t *ret_cm_offset, int *ret_cm_clen, int *ret_cm_W, int *ret_cm_nbp, float *ret_gfmu, float *ret_gflambda, P7_OPROFILE **ret_om);
2789 extern int     cm_p7_oprofile_ReadBlockMSV(CM_FILE *cmfp, int64_t cm_idx, ESL_ALPHABET **byp_abc, CM_P7_OM_BLOCK *hmmBlock);
2790 extern int     cm_p7_oprofile_Position(CM_FILE *cmfp, off_t offset);
2791 extern int     cm_file_Write1p0ASCII(FILE *fp, CM_t *cm);
2792 
2793 /* from cm_modelconfig.c */
2794 extern int   cm_Configure   (CM_t *cm, char *errbuf, int W_from_cmdline);
2795 extern int   cm_ConfigureSub(CM_t *cm, char *errbuf, int W_from_cmdline, CM_t *mother_cm, CMSubMap_t *mother_map);
2796 extern int   cm_CalculateLocalBeginProbs(CM_t *cm, float p_internal_start, float **t, float *begin);
2797 
2798 /* from cm_modelmaker.c */
2799 extern int  HandModelmaker(ESL_MSA *msa, char *errbuf, int use_rf, int use_el, int use_wts, float gapthresh, CM_t **ret_cm, Parsetree_t **ret_mtr);
2800 extern int  ConsensusModelmaker(const ESL_ALPHABET *abc, char *errbuf, char *ss_cons, int clen, int building_sub_model, CM_t **ret_cm, Parsetree_t **ret_gtr);
2801 extern int  Transmogrify(CM_t *cm, char *errbuf, Parsetree_t *gtr, ESL_DSQ *ax, int *used_el, int alen, Parsetree_t **ret_tr);
2802 extern int  cm_from_guide(CM_t *cm, char *errbuf, Parsetree_t *gtr, int will_never_localize);
2803 extern int  cm_find_and_detach_dual_inserts(CM_t *cm, int do_check, int do_detach);
2804 extern int  cm_check_before_detaching(CM_t *cm, int insert1, int insert2);
2805 extern int  cm_detach_state(CM_t *cm, int insert1, int insert2);
2806 extern int  cm_zero_flanking_insert_counts(CM_t *cm, char *errbuf);
2807 extern int  clean_cs(char *cs, int alen, int be_quiet);
2808 
2809 /* from cm_mx.c */
2810 extern CM_MX           *cm_mx_Create                  (int M);
2811 extern int              cm_mx_GrowTo                  (CM_t *cm, CM_MX *mx, char *errbuf, int L, float size_limit);
2812 extern int              cm_mx_Dump                    (FILE *ofp, CM_MX *mx, int print_mx);
2813 extern void             cm_mx_Destroy                 (CM_MX *mx);
2814 extern int              cm_mx_SizeNeeded              (CM_t *cm, char *errbuf, int L, int64_t *ret_ncells, float *ret_Mb);
2815 
2816 extern CM_TR_MX        *cm_tr_mx_Create               (CM_t *cm);
2817 extern int              cm_tr_mx_GrowTo               (CM_t *cm, CM_TR_MX *mx, char *errbuf, int L, float size_limit);
2818 extern int              cm_tr_mx_Dump                 (FILE *ofp, CM_TR_MX *mx, char mode, int print_mx);
2819 extern void             cm_tr_mx_Destroy              (CM_TR_MX *mx);
2820 extern int              cm_tr_mx_SizeNeeded           (CM_t *cm, char *errbuf, int L, int64_t *ret_Jncells, int64_t *ret_Lncells, int64_t *ret_Rncells, int64_t *ret_Tncells, float *ret_Mb);
2821 
2822 extern CM_HB_MX        *cm_hb_mx_Create               (int M);
2823 extern int              cm_hb_mx_GrowTo               (CM_t *cm, CM_HB_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit);
2824 extern int              cm_hb_mx_Dump                 (FILE *ofp, CM_HB_MX *mx, int print_mx);
2825 extern void             cm_hb_mx_Destroy              (CM_HB_MX *mx);
2826 extern int              cm_hb_mx_SizeNeeded           (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_ncells, float *ret_Mb);
2827 
2828 extern CM_TR_HB_MX     *cm_tr_hb_mx_Create            (CM_t *cm);
2829 extern int              cm_tr_hb_mx_GrowTo            (CM_t *cm, CM_TR_HB_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit);
2830 extern int              cm_tr_hb_mx_Dump              (FILE *ofp, CM_TR_HB_MX *mx, char mode, int print_mx);
2831 extern void             cm_tr_hb_mx_Destroy           (CM_TR_HB_MX *mx);
2832 extern int              cm_tr_hb_mx_SizeNeeded        (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_Jncells, int64_t *ret_Lncells,
2833 						       int64_t *ret_Rncells, int64_t *ret_Tncells, float *ret_Mb);
2834 
2835 extern CM_SHADOW_MX    *cm_shadow_mx_Create           (CM_t *cm);
2836 extern int              cm_shadow_mx_GrowTo           (CM_t *cm, CM_SHADOW_MX *mx, char *errbuf, int L, float size_limit);
2837 extern int              cm_shadow_mx_Dump             (FILE *ofp, CM_t *cm, CM_SHADOW_MX *mx, int print_mx);
2838 extern void             cm_shadow_mx_Destroy          (CM_SHADOW_MX *mx);
2839 extern int              cm_shadow_mx_SizeNeeded       (CM_t *cm, char *errbuf, int L, int64_t *ret_ny_cells, int64_t *ret_nk_cells, float *ret_Mb);
2840 
2841 extern CM_TR_SHADOW_MX *cm_tr_shadow_mx_Create        (CM_t *cm);
2842 extern int              cm_tr_shadow_mx_GrowTo        (CM_t *cm, CM_TR_SHADOW_MX *mx, char *errbuf, int L, float size_limit);
2843 extern int              cm_tr_shadow_mx_Dump          (FILE *ofp, CM_t *cm, CM_TR_SHADOW_MX *mx, char mode, int print_mx);
2844 extern void             cm_tr_shadow_mx_Destroy       (CM_TR_SHADOW_MX *mx);
2845 extern int              cm_tr_shadow_mx_SizeNeeded    (CM_t *cm, char *errbuf, int L, int64_t *ret_Jny_cells, int64_t *ret_Lny_cells, int64_t *ret_Rny_cells,
2846 						       int64_t *ret_Jnk_cells, int64_t *ret_Lnk_cells, int64_t *ret_Rnk_cells, int64_t *ret_Tnk_cells, float *ret_Mb);
2847 
2848 extern CM_HB_SHADOW_MX *cm_hb_shadow_mx_Create        (CM_t *cm);
2849 extern int              cm_hb_shadow_mx_GrowTo        (CM_t *cm, CM_HB_SHADOW_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit);
2850 extern int              cm_hb_shadow_mx_Dump          (FILE *ofp, CM_t *cm, CM_HB_SHADOW_MX *mx, int print_mx);
2851 extern void             cm_hb_shadow_mx_Destroy       (CM_HB_SHADOW_MX *mx);
2852 extern int              cm_hb_shadow_mx_SizeNeeded    (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int64_t *ret_ny_cells, int64_t *ret_nk_cells, float *ret_Mb);
2853 
2854 extern CM_TR_HB_SHADOW_MX *cm_tr_hb_shadow_mx_Create  (CM_t *cm);
2855 extern int              cm_tr_hb_shadow_mx_GrowTo     (CM_t *cm, CM_TR_HB_SHADOW_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit);
2856 extern int              cm_tr_hb_shadow_mx_Dump       (FILE *ofp, CM_t *cm, CM_TR_HB_SHADOW_MX *mx, char mode, int print_mx);
2857 extern void             cm_tr_hb_shadow_mx_Destroy    (CM_TR_HB_SHADOW_MX *mx);
2858 extern int              cm_tr_hb_shadow_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int64_t *ret_Jny_cells, int64_t *ret_Lny_cells, int64_t *ret_Rny_cells,
2859 						       int64_t *ret_Jnk_cells, int64_t *ret_Lnk_cells, int64_t *ret_Rnk_cells, int64_t *ret_Tnk_cells, float *ret_Mb);
2860 
2861 extern CM_EMIT_MX      *cm_emit_mx_Create     (CM_t *cm);
2862 extern int              cm_emit_mx_GrowTo     (CM_t *cm, CM_EMIT_MX *mx, char *errbuf, int L, float size_limit);
2863 extern int              cm_emit_mx_Dump       (FILE *ofp, CM_t *cm, CM_EMIT_MX *mx, int print_mx);
2864 extern void             cm_emit_mx_Destroy    (CM_EMIT_MX *mx);
2865 extern int              cm_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb);
2866 
2867 extern CM_TR_EMIT_MX   *cm_tr_emit_mx_Create     (CM_t *cm);
2868 extern int              cm_tr_emit_mx_GrowTo     (CM_t *cm, CM_TR_EMIT_MX *mx, char *errbuf, int L, float size_limit);
2869 extern int              cm_tr_emit_mx_Dump       (FILE *ofp, CM_t *cm, CM_TR_EMIT_MX *mx, char mode, int print_mx);
2870 extern void             cm_tr_emit_mx_Destroy    (CM_TR_EMIT_MX *mx);
2871 extern int              cm_tr_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb);
2872 
2873 extern CM_HB_EMIT_MX   *cm_hb_emit_mx_Create     (CM_t *cm);
2874 extern int              cm_hb_emit_mx_GrowTo     (CM_t *cm, CM_HB_EMIT_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit);
2875 extern int              cm_hb_emit_mx_Dump       (FILE *ofp, CM_t *cm, CM_HB_EMIT_MX *mx, int print_mx);
2876 extern void             cm_hb_emit_mx_Destroy    (CM_HB_EMIT_MX *mx);
2877 extern int              cm_hb_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb);
2878 
2879 extern CM_TR_HB_EMIT_MX *cm_tr_hb_emit_mx_Create     (CM_t *cm);
2880 extern int               cm_tr_hb_emit_mx_GrowTo     (CM_t *cm, CM_TR_HB_EMIT_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit);
2881 extern int               cm_tr_hb_emit_mx_Dump       (FILE *ofp, CM_t *cm, CM_TR_HB_EMIT_MX *mx, char mode, int print_mx);
2882 extern void              cm_tr_hb_emit_mx_Destroy    (CM_TR_HB_EMIT_MX *mx);
2883 extern int               cm_tr_hb_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb);
2884 
2885 extern int   cm_scan_mx_Create            (CM_t *cm, char *errbuf, int do_float, int do_int, CM_SCAN_MX **ret_smx);
2886 extern int   cm_scan_mx_InitializeFloats  (CM_t *cm, CM_SCAN_MX *smx, char *errbuf);
2887 extern int   cm_scan_mx_InitializeIntegers(CM_t *cm, CM_SCAN_MX *smx, char *errbuf);
2888 extern float cm_scan_mx_SizeNeeded        (CM_t *cm, int do_float, int do_int);
2889 extern void  cm_scan_mx_Destroy           (CM_t *cm, CM_SCAN_MX *smx);
2890 extern void  cm_scan_mx_Dump              (FILE *ofp, CM_t *cm, int j, int i0, int qdbidx, int doing_float);
2891 
2892 extern int   cm_tr_scan_mx_Create            (CM_t *cm, char *errbuf, int do_float, int do_int, CM_TR_SCAN_MX **ret_smx);
2893 extern int   cm_tr_scan_mx_InitializeFloats  (CM_t *cm, CM_TR_SCAN_MX *trsmx, char *errbuf);
2894 extern int   cm_tr_scan_mx_InitializeIntegers(CM_t *cm, CM_TR_SCAN_MX *trsmx, char *errbuf);
2895 extern float cm_tr_scan_mx_SizeNeeded        (CM_t *cm, int do_float, int do_int);
2896 extern void  cm_tr_scan_mx_Destroy           (CM_t *cm, CM_TR_SCAN_MX *smx);
2897 extern void  cm_tr_scan_mx_Dump              (FILE *ofp, CM_t *cm, int j, int i0, int qdbidx, int doing_float);
2898 
2899 extern GammaHitMx_t    *CreateGammaHitMx              (int L, int64_t i0, float cutoff);
2900 extern void             FreeGammaHitMx                (GammaHitMx_t *gamma);
2901 extern int              UpdateGammaHitMx              (CM_t *cm, char *errbuf, int pass_idx, GammaHitMx_t *gamma, int j, int dmin, int dmax, float *bestsc, int *bestr, char *bestmode, int W, double **act);
2902 extern int              ReportHitsGreedily            (CM_t *cm, char *errbuf, int pass_idx, int j, int dmin, int dmax, float *bestsc, int *bestr, char *bestmode, int W, double **act, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist);
2903 extern void             TBackGammaHitMx               (GammaHitMx_t *gamma, CM_TOPHITS *hitlist, int64_t i0, int64_t j0);
2904 
2905 
2906 
2907 /* from cm_parsetree.c */
2908 extern Parsetree_t *CreateParsetree(int size);
2909 extern void         GrowParsetree(Parsetree_t *tr);
2910 extern void         FreeParsetree(Parsetree_t *tr);
2911 extern float        SizeofParsetree(Parsetree_t *tr);
2912 extern int          InsertTraceNode(Parsetree_t *tr, int y, int whichway, int emitl, int emitr, int state);
2913 extern int          InsertTraceNodewithMode(Parsetree_t *tr, int y, int whichway, int emitl, int emitr, int state, char mode);
2914 extern void         PrintParsetree(FILE *fp, Parsetree_t *tr);
2915 extern void         ParsetreeCount(CM_t *cm, Parsetree_t *tr, ESL_DSQ *dsq, float wgt);
2916 extern int          ParsetreeScore(CM_t *cm, CMEmitMap_t *emap, char *errbuf, Parsetree_t *tr, ESL_DSQ *dsq, int do_null2, float *ret_sc, float *ret_struct_sc, float *ret_primary_sc, int *ret_spos, int *ret_epos);
2917 extern void         ParsetreeDump(FILE *fp, Parsetree_t *tr, CM_t *cm, ESL_DSQ *dsq);
2918 extern int          ParsetreeCompare(Parsetree_t *t1, Parsetree_t *t2);
2919 extern void         SummarizeMasterTrace(FILE *fp, Parsetree_t *tr);
2920 extern void         MasterTraceDisplay(FILE *fp, Parsetree_t *mtr, CM_t *cm);
2921 extern int          Parsetrees2Alignment(CM_t *cm, char *errbuf, const ESL_ALPHABET *abc, ESL_SQ **sq, double *wgt, Parsetree_t **tr, char **postcode, int nseq, FILE *insertfp, FILE *elfp, int do_full, int do_matchonly, ESL_MSA **ret_msa);
2922 extern int          Alignment2Parsetrees(ESL_MSA *msa, CM_t *cm, Parsetree_t *mtr, char *errbuf, ESL_SQ ***ret_sq, Parsetree_t ***ret_tr);
2923 extern float        ParsetreeScore_Global2Local(CM_t *cm, Parsetree_t *tr, ESL_DSQ *dsq, int print_flag);
2924 extern int          Parsetree2CP9trace(CM_t *cm, Parsetree_t *tr, CP9trace_t **ret_cp9_tr);
2925 extern void         rightjustify(const ESL_ALPHABET *abc, char *s, int n);
2926 extern void         leftjustify(const ESL_ALPHABET *abc, char *s, int n);
2927 extern int          EmitParsetree(CM_t *cm, char *errbuf, ESL_RANDOMNESS *r, char *name, int do_digital, Parsetree_t **ret_tr, ESL_SQ **ret_sq, int *ret_N);
2928 extern int          ParsetreeScoreCorrectionNull2(CM_t *cm, char *errbuf, Parsetree_t *tr, ESL_DSQ *dsq, int start, float omega, float *ret_sc);
2929 extern int          ParsetreeScoreCorrectionNull3(CM_t *cm, char *errbuf, Parsetree_t *tr, ESL_DSQ *dsq, int start, float omega, float *ret_sc);
2930 extern int          ParsetreeCountMPEmissions(CM_t *cm, Parsetree_t *tr);
2931 extern void         ScoreCorrectionNull3(const ESL_ALPHABET *abc, float *null0, float *comp, int len, float omega, float *ret_sc);
2932 extern void         ScoreCorrectionNull3CompUnknown(const ESL_ALPHABET *abc, float *null0, ESL_DSQ *dsq, int start, int stop, float omega, float *ret_sc);
2933 extern char         ParsetreeMode(Parsetree_t *tr);
2934 extern int          ParsetreeToCMBounds(CM_t *cm, Parsetree_t *tr, int have_i0, int have_j0, char *errbuf, int *ret_cfrom_span, int *ret_cto_span, int *ret_cfrom_emit, int *ret_cto_emit, int *ret_first_emit, int *ret_final_emit);
2935 extern int          cm_StochasticParsetree    (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, CM_MX    *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, float *ret_sc);
2936 extern int          cm_StochasticParsetreeHB  (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, CM_HB_MX *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, float *ret_sc);
2937 extern int          cm_TrStochasticParsetree  (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, char preset_mode, int pass_idx, CM_TR_MX    *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc);
2938 extern int          cm_TrStochasticParsetreeHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc);
2939 extern int          cm_parsetree_Doctor(CM_t *cm, char *errbuf, Parsetree_t *tr, int *opt_ndi, int *opt_nid);
2940 
2941 
2942 /* from cm_pipeline.c */
2943 extern CM_PIPELINE *cm_pipeline_Create (ESL_GETOPTS *go, ESL_ALPHABET *abc, int clen_hint, int L_hint, int64_t Z, enum cm_zsetby_e Z_setby, enum cm_pipemodes_e mode);
2944 extern int          cm_pipeline_Reuse  (CM_PIPELINE *pli);
2945 extern void         cm_pipeline_Destroy(CM_PIPELINE *pli, CM_t *cm);
2946 extern int          cm_pipeline_Merge  (CM_PIPELINE *p1, CM_PIPELINE *p2);
2947 
2948 extern int   cm_pli_TargetReportable  (CM_PIPELINE *pli, float score,     double Eval);
2949 extern int   cm_pli_TargetIncludable  (CM_PIPELINE *pli, float score,     double Eval);
2950 extern int   cm_pli_NewModel          (CM_PIPELINE *pli, int modmode, CM_t *cm, int cm_clen, int cm_W, int cm_nbp, P7_OPROFILE *om, P7_BG *bg, float *p7_evparam, int p7_max_length, int64_t cur_cm_idx, int cur_clan_idx, ESL_KEYHASH *glocal_kh);
2951 extern int   cm_pli_NewModelThresholds(CM_PIPELINE *pli, CM_t *cm);
2952 extern int   cm_pli_NewSeq            (CM_PIPELINE *pli, const ESL_SQ *sq, int64_t cur_seq_idx);
2953 extern int   cm_Pipeline              (CM_PIPELINE *pli, off_t cm_offset, P7_OPROFILE *om, P7_BG *bg, float *p7_evparam, P7_SCOREDATA *msvdata, ESL_SQ *sq, CM_TOPHITS *hitlist, int in_rc, P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_PROFILE **opt_Rgm, P7_PROFILE **opt_Lgm, P7_PROFILE **opt_Tgm, CM_t **opt_cm);
2954 extern int   cm_pli_Statistics    (FILE *ofp, CM_PIPELINE *pli, ESL_STOPWATCH *w);
2955 extern int   cm_pli_ZeroAccounting(CM_PLI_ACCT *pli_acct);
2956 extern int   cm_pli_PassEnforcesFirstRes(int pass_idx);
2957 extern int   cm_pli_PassEnforcesFinalRes(int pass_idx);
2958 extern int   cm_pli_PassAllowsTruncation(int pass_idx);
2959 extern void  cm_pli_AdjustNresForOverlaps(CM_PIPELINE *pli, int64_t noverlap, int in_rc);
2960 
2961 /* from cm_qdband.c */
2962 extern void     BandExperiment(CM_t *cm);
2963 extern int      CalculateQueryDependentBands(CM_t *cm, char *errbuf, CM_QDBINFO *qdbinfo, double beta_W, int *ret_W, double **ret_gamma0_loc, double **ret_gamma0_glb, int *ret_Z);
2964 extern int      BandCalculationEngine(CM_t *cm, int Z, CM_QDBINFO *qdbinfo, double beta_W, int *ret_W, double ***ret_gamma, double **ret_gamma0_loc, double **ret_gamma0_glb);
2965 extern int      BandTruncationNegligible(double *density, int b, int Z, double *ret_beta);
2966 extern int      BandMonteCarlo(CM_t *cm, int nsample, int Z, double ***ret_gamma);
2967 extern void     FreeBandDensities(CM_t *cm, double **gamma);
2968 extern void     BandBounds(double **gamma, int M, int Z, double p,
2969 			   int **ret_min, int **ret_max);
2970 extern void     PrintBandGraph(FILE *fp, double **gamma, int *min, int *max, int v, int Z);
2971 extern void     PrintDPCellsSaved(CM_t *cm, int *min, int *max, int W);
2972 extern void     ExpandBands(CM_t *cm, int qlen, int *dmin, int *dmax);
2973 extern void     qdb_trace_info_dump(CM_t *cm, Parsetree_t *tr, int *dmin, int *dmax, int bdump_level);
2974 extern CM_QDBINFO  *CreateCMQDBInfo(int M, int clen);
2975 extern float        SizeofCMQDBInfo(CM_QDBINFO *qdbinfo);
2976 extern void         FreeCMQDBInfo(CM_QDBINFO *qdbinfo);
2977 extern int          CopyCMQDBInfo(const CM_QDBINFO *src, CM_QDBINFO *dst, char *errbuf);
2978 extern void         DumpCMQDBInfo(FILE *fp, CM_t *cm, CM_QDBINFO *qdbinfo);
2979 extern int          CheckCMQDBInfo(CM_QDBINFO *qdbinfo, double beta1, int do_check1, double beta2, int do_check2);
2980 
2981 /* from cm_submodel.c */
2982 extern int  build_sub_cm(CM_t *orig_cm, char *errbuf, CM_t **ret_cm, int sstruct, int estruct, CMSubMap_t **ret_submap, int print_flag);
2983 extern void CP9NodeForPosn(CP9_t *hmm, int i0, int j0, int x, CP9_MX *post, int *ret_node, int *ret_type, float pmass, int is_start, int print_flag);
2984 extern void StripWUSSGivenCC(ESL_MSA *msa, float gapthresh, int first_match, int last_match);
2985 extern int  check_orig_psi_vs_sub_psi(CM_t *orig_cm, CM_t *sub_cm, char *errbuf, CMSubMap_t *submap, double threshold, int print_flag);
2986 extern int  check_sub_cm(CM_t *orig_cm, CM_t *sub_cm, char *errbuf, CMSubMap_t *submap, CMSubInfo_t *subinfo, float pthresh, int print_flag);
2987 extern int  check_sub_cm_by_sampling(CM_t *orig_cm, CM_t *sub_cm, char *errbuf, ESL_RANDOMNESS *r, CMSubMap_t *submap, CMSubInfo_t *subinfo,
2988 				     float chi_thresh, int nsamples, int print_flag);
2989 extern int  sub_cm2cm_parsetree(CM_t *orig_cm, CM_t *sub_cm, Parsetree_t **ret_orig_tr, Parsetree_t *sub_tr,
2990 				CMSubMap_t *submap, int print_flag);
2991 extern CMSubMap_t  *AllocSubMap(CM_t *sub_cm, CM_t *orig_cm, int sstruct, int estruct);
2992 extern void         FreeSubMap(CMSubMap_t *submap);
2993 extern CMSubInfo_t *AllocSubInfo(int clen);
2994 extern void         FreeSubInfo(CMSubInfo_t *subinfo);
2995 extern void  debug_print_cm_params(FILE *fp, CM_t *cm);
2996 extern int   SubCMLogoddsify(CM_t *cm, char *errbuf, CM_t *mother_cm, CMSubMap_t *mother_map);
2997 extern float ** SubFCalcAndCopyOptimizedEmitScoresFromMother(CM_t *cm, CM_t *mother_cm, CMSubMap_t *mother_map);
2998 extern void  CP9_reconfig2sub(CP9_t *hmm, int spos, int epos, int spos_nd, int epos_nd, double **orig_phi);
2999 
3000 
3001 /* from cm_tophits.c */
3002 extern CM_TOPHITS *cm_tophits_Create(void);
3003 extern int         cm_tophits_Grow(CM_TOPHITS *h);
3004 extern int         cm_tophits_CreateNextHit(CM_TOPHITS *h, CM_HIT **ret_hit);
3005 extern int         cm_tophits_SortByEvalue(CM_TOPHITS *h);
3006 extern int         cm_tophits_SortForOverlapRemoval(CM_TOPHITS *h);
3007 extern int         cm_tophits_SortForOverlapMarkup(CM_TOPHITS *h, int do_clans_only);
3008 extern int         cm_tophits_SortByPosition(CM_TOPHITS *h);
3009 extern int         cm_tophits_Merge(CM_TOPHITS *h1, CM_TOPHITS *h2);
3010 extern int         cm_tophits_GetMaxPositionLength(CM_TOPHITS *h);
3011 extern int         cm_tophits_GetMaxTargetLength(CM_TOPHITS *h);
3012 extern int         cm_tophits_GetMaxNameLength(CM_TOPHITS *h);
3013 extern int         cm_tophits_GetMaxDescLength(CM_TOPHITS *h);
3014 extern int         cm_tophits_GetMaxAccessionLength(CM_TOPHITS *h);
3015 extern int         cm_tophits_GetMaxShownLength(CM_TOPHITS *h);
3016 extern int         cm_tophits_GetMaxClanLength(CM_TOPHITS *h, ESL_KEYHASH *clan_name_kh);
3017 extern int         cm_tophits_Reuse(CM_TOPHITS *h);
3018 extern void        cm_tophits_Destroy(CM_TOPHITS *h);
3019 extern int         cm_tophits_CloneHitMostly(CM_TOPHITS *src_th, int h, CM_TOPHITS *dest_th);
3020 extern int         cm_tophits_ComputeEvalues(CM_TOPHITS *th, double eZ, int istart);
3021 extern int         cm_tophits_RemoveOrMarkOverlaps(CM_TOPHITS *th, int do_clans_only, char *errbuf);
3022 extern int         cm_tophits_UpdateHitPositions(CM_TOPHITS *th, int hit_start, int64_t seq_start, int in_revcomp);
3023 extern int         cm_tophits_SetSourceLengths(CM_TOPHITS *th, int64_t *srcL, uint64_t nseqs);
3024 extern int64_t     cm_tophits_OverlapNres(int64_t from1, int64_t to1, int64_t from2, int64_t to2, int64_t *ret_nes, char *errbuf);
3025 
3026 extern int cm_tophits_Threshold(CM_TOPHITS *th, CM_PIPELINE *pli);
3027 extern int cm_tophits_Targets(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli, int textw);
3028 extern int cm_tophits_F3Targets(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli);
3029 extern int cm_tophits_HitAlignments(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli, int textw);
3030 extern int cm_tophits_HitAlignmentStatistics(FILE *ofp, CM_TOPHITS *th, int used_cyk, int used_hb, double default_tau);
3031 extern int cm_tophits_Alignment(CM_t *cm, const CM_TOPHITS *th, char *errbuf, ESL_MSA **ret_msa);
3032 extern int cm_tophits_TabularTargets1(FILE *ofp, char *qname, char *qacc, CM_TOPHITS *th, CM_PIPELINE *pli, int show_header);
3033 extern int cm_tophits_TabularTargets2(FILE *ofp, char *qname, char *qacc, CM_TOPHITS *th, CM_PIPELINE *pli, int show_header, ESL_KEYHASH *clan_name_kh, int skip_overlaps, char *errbuf);
3034 extern int cm_tophits_F3TabularTargets1(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli, int show_header);
3035 extern int cm_tophits_TabularTail(FILE *ofp, const char *progname, enum cm_pipemodes_e pipemode, const char *qfile, const char *tfile, const ESL_GETOPTS *go);
3036 extern int cm_tophits_Dump(FILE *fp, const CM_TOPHITS *th);
3037 
3038 extern int    cm_hit_AllowTruncation(CM_t *cm, int pass_idx, int64_t start, int64_t stop, int64_t i0, int64_t j0, char mode, int b);
3039 extern int    cm_hit_Dump(FILE *fp, const CM_HIT *h);
3040 
3041 /* from cm_trunc.c */
3042 extern CM_TR_PENALTIES *cm_tr_penalties_Create(CM_t *cm, int ignore_inserts, char *errbuf);
3043 extern int              cm_tr_penalties_Validate(CM_TR_PENALTIES *trp, CM_t *cm, double tol, char *errbuf);
3044 extern void             cm_tr_penalties_Dump(FILE *fp, const CM_t *cm, const CM_TR_PENALTIES *trp);
3045 extern float            cm_tr_penalties_Sizeof(CM_TR_PENALTIES *trp);
3046 extern void             cm_tr_penalties_Destroy(CM_TR_PENALTIES *trp);
3047 extern int              cm_tr_penalties_IdxForPass(int pass_idx);
3048 
3049 /* from cm_p7_band.c */
3050 extern int          p7_gmx_Match2DMatrix(P7_GMX *gx, int do_diff, ESL_DMATRIX **ret_D, double *ret_min, double *ret_max);
3051 extern int          my_dmx_Visualize(FILE *fp, ESL_DMATRIX *D, double min, double max, double min2fill);
3052 extern int          my_p7_GTraceMSV(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_GMX *gx, P7_TRACE *tr, int **ret_i2k, int **ret_k2i, float **ret_sc, int **ret_iconflict);
3053 extern int          Parsetree2i_to_k(CM_t *cm, CMEmitMap_t *emap, int L, char *errbuf, Parsetree_t *tr, int **ret_i2k);
3054 extern int          prune_i2k(int *i2k, int *iconflict, float *isc, int L, double **phi, float min_sc, int min_len, int min_end, float min_mprob, float min_mcprob, float max_iprob, float max_ilprob);
3055 extern int          p7_pins2bands(int *i2k, char *errbuf, int L, int M, int pad, int **ret_imin, int **ret_imax, int *ret_ncells);
3056 extern int          DumpP7Bands(FILE *fp, int *i2k, int *kmin, int *kmax, int L);
3057 extern int          cp9_ForwardP7B(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, float *ret_sc);
3058 extern int          cp9_ForwardP7B_OLD_WITH_EL(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, float *ret_sc);
3059 extern int          cp9_BackwardP7B(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, float *ret_sc);
3060 extern int          cp9_CheckFBP7B(CP9_MX *fmx, CP9_MX *bmx, CP9_t *hmm, char *errbuf, float sc, int i0, int j0, ESL_DSQ *dsq, int *kmin, int *kmax);
3061 extern int          cp9_Seq2BandsP7B     (CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int L, CP9Bands_t *cp9b, int *kmin, int *kmax, int debug_level);
3062 extern int          cp9_Seq2PosteriorsP7B(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, int debug_level);
3063 extern int          cp9_PosteriorP7B(ESL_DSQ *dsq, char *errbuf, int L, CP9_t *hmm, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, int *kmin, int *kmax);
3064 extern int          cp9_FB2HMMBandsP7B(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b, int L, int M, double p_thresh, int do_old_hmm2ij, int *kmin, int *kmax, int debug_level);
3065 extern int          p7_Seq2Bands(CM_t *cm, char *errbuf, P7_PROFILE *gm, P7_GMX *gx, P7_BG *bg, P7_TRACE *p7_tr, ESL_DSQ *dsq, int L,
3066 				 double **phi, float sc7, int len7, int end7, float mprob7, float mcprob7, float iprob7, float ilprob7, int pad7,
3067 				 int **ret_i2k, int **ret_kmin, int **ret_kmax, int *ret_ncells);
3068 
3069 extern int          CP9NodeForPosnP7B(CP9_t *hmm, char *errbuf, int x, CP9_MX *post, int kn, int kx, int *ret_node, int *ret_type, int print_flag);
3070 extern int          P7BandsAdjustForSubCM(int *kmin, int *kmax, int L, int spos, int epos);
3071 
3072 /* from cm_p7_domaindef.c */
3073 extern int p7_domaindef_GlocalByPosteriorHeuristics(const ESL_SQ *sq, P7_PROFILE *gm, P7_GMX *gxf, P7_GMX *gxb,
3074 						    P7_GMX *fwd, P7_GMX *bck, P7_DOMAINDEF *ddef, int do_null2);
3075 
3076 /* from cm_p7_modelconfig_trunc.c */
3077 extern int p7_ProfileConfig5PrimeTrunc(P7_PROFILE *gm, int L);
3078 extern int p7_ProfileConfig3PrimeTrunc(const P7_HMM *hmm, P7_PROFILE *gm, int L);
3079 extern int p7_ProfileConfig5PrimeAnd3PrimeTrunc(P7_PROFILE *gm, int L);
3080 extern int p7_ReconfigLength5PrimeTrunc(P7_PROFILE *gm, int L);
3081 extern int p7_ReconfigLength3PrimeTrunc(P7_PROFILE *gm, int L);
3082 
3083 /* from cm_p7_modelmaker.c */
3084 extern int          BuildP7HMM_MatchEmitsOnly(CM_t *cm, CP9_t *cp9, P7_HMM **ret_p7);
3085 extern int          cm_cp9_to_p7(CM_t *cm, CP9_t *cp9, char *errbuf);
3086 extern int          cm_p7_Calibrate(P7_HMM *hmm, char *errbuf, int ElmL, int ElvL, int ElfL, int EgfL, int ElmN, int ElvN, int ElfN, int EgfN, double ElfT, double EgfT, double *ret_gfmu, double *ret_gflambda);
3087 extern int          cm_p7_Tau(ESL_RANDOMNESS *r, char *errbuf, P7_OPROFILE *om, P7_PROFILE *gm, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tau);
3088 extern int          cm_SetFilterHMM(CM_t *cm, P7_HMM *hmm, double gfmu, double gflambda);
3089 extern int          dump_p7(P7_HMM *hmm, FILE *fp);
3090 extern float        cm_p7_hmm_Sizeof(P7_HMM *hmm);
3091 extern int          cm_p7_hmm_SetConsensus(P7_HMM *hmm);
3092 
3093 /* from cp9.c */
3094 extern CP9_t *AllocCPlan9(int M, const ESL_ALPHABET *abc);
3095 extern CP9_t *AllocCPlan9Shell(void);
3096 extern void   AllocCPlan9Body(CP9_t *hmm, int M, const ESL_ALPHABET *abc);
3097 extern void   FreeCPlan9(CP9_t *hmm);
3098 extern void   ZeroCPlan9(CP9_t *hmm);
3099 extern void   CPlan9SetNullModel(CP9_t *hmm, float *null, float p1);
3100 extern int    cp9_GetNCalcsPerResidue(CP9_t *cp9, char *errbuf, float *ret_cp9_ncalcs_per_res);
3101 extern CP9_t *cp9_Clone(CP9_t *cp9);
3102 extern int    cp9_Copy(const CP9_t *src, CP9_t *dst);
3103 extern float  cp9_Sizeof(CP9_t *cp9);
3104 extern void   CP9Logoddsify(CP9_t *hmm);
3105 extern void   CPlan9Renormalize(CP9_t *hmm);
3106 
3107 /* from cp9_dp.c */
3108 extern int cp9_Viterbi(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align,
3109 		       int be_efficient, int **ret_psc, int *ret_maxres, CP9trace_t **ret_tr, float *ret_sc);
3110 extern int cp9_ViterbiBackward(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align,
3111 			       int be_efficient, int **ret_psc, int *ret_maxres, CP9trace_t **ret_tr, float *ret_sc);
3112 extern int cp9_Forward(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align,
3113 		       int be_efficient, int **ret_psc, int *ret_maxres, float *ret_sc);
3114 extern int cp9_Backward(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align,
3115 			int be_efficient, int **ret_psc, int *ret_maxres, float *ret_sc);
3116 extern int cp9_CheckFB(CP9_MX *fmx, CP9_MX *bmx, CP9_t *hmm, char *errbuf, float sc, int i0, int j0, ESL_DSQ *dsq);
3117 
3118 /* from cp9_modelmaker.c */
3119 extern CP9Map_t *AllocCP9Map(CM_t *cm);
3120 extern float SizeofCP9Map(CP9Map_t *cp9map);
3121 extern void  FreeCP9Map(CP9Map_t *cp9map);
3122 extern int   build_cp9_hmm(CM_t *cm, char *errbuf, int do_psi_test, float thresh, int debug_level, CP9_t **ret_hmm, CP9Map_t **ret_cp9map);
3123 extern void  CP9_map_cm2hmm(CM_t *cm, CP9Map_t *cp9map, int debug_level);
3124 extern void  fill_phi_cp9(CP9_t *hmm, double ***ret_phi, int spos, int entered_only);
3125 extern void  map_helper(CM_t *cm, CP9Map_t *cp9map, int k, int ks, int v);
3126 extern int   CP9_check_by_sampling(CM_t *cm, CP9_t *hmm, char *errbuf, ESL_RANDOMNESS *r, CMSubInfo_t *subinfo, int spos, int epos, float chi_thresh, int nsamples, int print_flag);
3127 extern void  debug_print_cp9_params(FILE *fp, CP9_t *hmm, int print_scores);
3128 extern void  debug_print_phi_cp9(CP9_t *hmm, double **phi);
3129 extern int   MakeDealignedString(const ESL_ALPHABET *abc, char *aseq, int alen, char *ss, char **ret_s);
3130 extern int   sub_build_cp9_hmm_from_mother(CM_t *cm, char *errbuf, CM_t *mother_cm, CMSubMap_t *mother_map, CP9_t **ret_hmm, CP9Map_t **ret_cp9map, int do_psi_test,
3131 					  float psi_vs_phi_threshold, int debug_level);
3132 extern void  CPlan9InitEL(CP9_t *cp9, CM_t *cm);
3133 
3134 /* from cp9_mx.c */
3135 extern CP9_MX *CreateCP9Matrix(int N, int M);
3136 extern void    FreeCP9Matrix  (CP9_MX *mx);
3137 extern int     GrowCP9Matrix  (CP9_MX *mx, char *errbuf, int N, int M, int *kmin, int *kmax, int ***mmx, int ***imx, int ***dmx, int ***elmx, int **erow);
3138 extern void    InitializeCP9Matrix(CP9_MX *mx);
3139 
3140 /* from cp9_trace.c */
3141 extern void  CP9AllocTrace(int tlen, CP9trace_t **ret_tr);
3142 extern void  CP9ReallocTrace(CP9trace_t *tr, int tlen);
3143 extern void  CP9FreeTrace(CP9trace_t *tr);
3144 extern void  CP9_fake_tracebacks(ESL_MSA *msa, int *matassign, CP9trace_t ***ret_tr);
3145 extern void  CP9TraceCount(CP9_t *hmm, ESL_DSQ *dsq, float wt, CP9trace_t *tr);
3146 extern float CP9TraceScore(CP9_t *hmm, ESL_DSQ *dsq, CP9trace_t *tr);
3147 extern char *CP9Statetype(char st);
3148 extern void  CP9PrintTrace(FILE *fp, CP9trace_t *tr, CP9_t *hmm, ESL_DSQ *dsq);
3149 extern int   CP9TransitionScoreLookup(CP9_t *hmm, char st1, int k1,
3150 				    char st2, int k2);
3151 extern void  CP9ViterbiTrace(CP9_t *hmm, ESL_DSQ *dsq, int i0, int j0,
3152 			     CP9_MX *mx, CP9trace_t **ret_tr);
3153 extern void  CP9ReverseTrace(CP9trace_t *tr);
3154 extern int   CP9Traces2Alignment(CM_t *cm, CP9_t *cp9, const ESL_ALPHABET *abc, ESL_SQ **sq, float *wgt,
3155 				 int nseq, CP9trace_t **tr, int do_full, int do_matchonly, ESL_MSA **ret_msa);
3156 extern int   CP9TraceScoreCorrectionNull2(CP9_t *hmm, char *errbuf, CP9trace_t *tr, ESL_DSQ *dsq, int start, float omega, float *ret_sc);
3157 
3158 /* from alphabet.c */
3159 extern void   PairCount(const ESL_ALPHABET *abc, float *counters, ESL_DSQ syml, ESL_DSQ symr, float wt);
3160 extern float  DegeneratePairScore(const ESL_ALPHABET *abc, float *esc, ESL_DSQ syml, ESL_DSQ symr);
3161 extern int    iDegeneratePairScore(const ESL_ALPHABET *abc, int *esc, ESL_DSQ syml, ESL_DSQ symr);
3162 extern char   resolve_degenerate (ESL_RANDOMNESS *r, char c);
3163 extern int    revcomp(const ESL_ALPHABET *abc, ESL_SQ *comp, ESL_SQ *sq);
3164 extern float  FastPairScoreBothDegenerate(int K, float *esc, float *left, float *right);
3165 extern int  iFastPairScoreBothDegenerate(int K, int *esc, float *left, float *right);
3166 extern float FastPairScoreLeftOnlyDegenerate(int K, float *esc, float *left, ESL_DSQ symr);
3167 extern int  iFastPairScoreLeftOnlyDegenerate(int K, int *iesc, float *left, ESL_DSQ symr);
3168 extern float FastPairScoreRightOnlyDegenerate(int K, float *esc, float *right, ESL_DSQ syml);
3169 extern float iFastPairScoreRightOnlyDegenerate(int K, int *iesc, float *right, ESL_DSQ syml);
3170 extern float  FastPairScoreBothDegenerate(int K, float *esc, float *left, float *right);
3171 extern int  iFastPairScoreBothDegenerate(int K, int *esc, float *left, float *right);
3172 extern float FastPairScoreLeftOnlyDegenerate(int K, float *esc, float *left, ESL_DSQ symr);
3173 extern int  iFastPairScoreLeftOnlyDegenerate(int K, int *iesc, float *left, ESL_DSQ symr);
3174 extern float FastPairScoreRightOnlyDegenerate(int K, float *esc, float *right, ESL_DSQ syml);
3175 extern float iFastPairScoreRightOnlyDegenerate(int K, int *iesc, float *right, ESL_DSQ syml);
3176 
3177 
3178 /* from display.c */
3179 extern Fancyali_t    *CreateFancyAli(const ESL_ALPHABET *abc, Parsetree_t *tr, CM_t *cm, CMConsensus_t *cons, ESL_DSQ *dsq, int do_noncanonical, char *pcode);
3180 extern void           PrintFancyAli(FILE *fp, Fancyali_t *ali, int64_t offset, int in_revcomp, int do_top, int linewidth);
3181 extern void           FreeFancyAli(Fancyali_t *ali);
3182 extern CMConsensus_t *CreateCMConsensus(CM_t *cm, const ESL_ALPHABET *abc);
3183 extern void           FreeCMConsensus(CMConsensus_t *con);
3184 extern int            IsCompensatory(const ESL_ALPHABET *abc, float *pij, int symi, int symj);
3185 extern CMEmitMap_t   *CreateEmitMap(CM_t *cm);
3186 extern float          SizeofEmitMap(CM_t *cm, CMEmitMap_t *emap);
3187 extern void           DumpEmitMap(FILE *fp, CMEmitMap_t *map, CM_t *cm);
3188 extern void           FreeEmitMap(CMEmitMap_t *map);
3189 extern void           FormatTimeString(char *buf, double sec, int do_frac);
3190 extern int            GetDate(char *errbuf, char **ret_date);
3191 
3192 /* from errors.c */
3193 extern void cm_Die (char *format, ...);
3194 extern void cm_Fail(char *format, ...);
3195 
3196 /* from eweight.c */
3197 extern int    cm_EntropyWeight(CM_t *cm, const Prior_t *pri, double etarget, double min_Neff, double max_Neff, int pretend_cm_is_hmm, double *ret_hmm_re, double *ret_Neff);
3198 extern void   cm_Rescale(CM_t *hmm, float scale);
3199 extern void   cp9_Rescale(CP9_t *hmm, float scale);
3200 extern double cm_MeanMatchInfo(const CM_t *cm);
3201 extern double cm_MeanMatchEntropy(const CM_t *cm);
3202 extern double cm_MeanMatchRelativeEntropy(const CM_t *cm);
3203 extern double cm_MeanMatchInfoHMM(const CM_t *cm);
3204 extern double cm_MeanMatchEntropyHMM(const CM_t *cm);
3205 extern double cm_MeanMatchRelativeEntropyHMM(const CM_t *cm);
3206 extern double cp9_MeanMatchInfo(const CP9_t *cp9);
3207 extern double cp9_MeanMatchEntropy(const CP9_t *cp9);
3208 extern double cp9_MeanMatchRelativeEntropy(const CP9_t *cp9);
3209 
3210 /* from hmmband.c */
3211 extern CP9Bands_t  *AllocCP9Bands(int cm_M, int hmm_M);
3212 extern float        SizeofCP9Bands(CP9Bands_t *cp9b);
3213 extern void         FreeCP9Bands(CP9Bands_t *cp9bands);
3214 extern int          cp9_HMM2ijBands(CM_t *cm, char *errbuf, CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int do_trunc, int debug_level);
3215 extern int          cp9_HMM2ijBands_OLD(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int debug_level);
3216 extern int          cp9_Seq2Bands     (CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, CP9Bands_t *cp9b, int doing_search, int pass_idx, int debug_level);
3217 extern int          cp9_IterateSeq2Bands(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int64_t i0, int64_t j0, int pass_idx, float size_limit, int doing_search, int do_sample, int do_post, int do_iterate, double maxtau, float *ret_Mb);
3218 extern int          cp9_Seq2Posteriors(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, int debug_level);
3219 extern void         cp9_DebugPrintHMMBands(FILE *ofp, int L, CP9Bands_t *cp9b, double hmm_bandp, int debug_level);
3220 extern int          cp9_GrowHDBands(CP9Bands_t *cp9b, char *errbuf);
3221 extern int          cp9_ValidateBands(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int i0, int j0, int do_trunc);
3222 extern void         cp9_ShiftCMBands(CM_t *cm, int i, int j, int do_trunc);
3223 extern CP9Bands_t  *cp9_CloneBands(CP9Bands_t *src_cp9b, char *errbuf);
3224 extern void         cp9_PredictStartAndEndPositions(CP9_MX *pmx, CP9Bands_t *cp9b, int i0, int j0);
3225 extern int          cp9_MarginalCandidatesFromStartEndPositions(CM_t *cm, CP9Bands_t *cp9b, int pass_idx, char *errbuf);
3226 extern void         ij2d_bands(CM_t *cm, int L, int *imin, int *imax, int *jmin, int *jmax,
3227 			       int **hdmin, int **hdmax, int do_trunc, int debug_level);
3228 extern void         PrintDPCellsSaved_jd(CM_t *cm, int *jmin, int *jmax, int **hdmin, int **hdmax, int W);
3229 extern void         debug_print_ij_bands(CM_t *cm);
3230 
3231 /* from logsum.c */
3232 extern void  init_ilogsum(void);
3233 extern int   ILogsum(int s1, int s2);
3234 extern int   ILogsumNI(int s1, int s2);
3235 extern int   ILogsumNI_diff(int s1a, int s1b, int s2a, int s2b, int db);
3236 extern void  FLogsumInit(void);
3237 extern float LogSum2(float p1, float p2);
3238 extern float FLogsum(float p1, float p2);
3239 
3240 /* from mpisupport.c */
3241 #if HAVE_MPI
3242 extern int cm_master_MPIBcast(CM_t *cm, char *errbuf, int tag, MPI_Comm comm, char **buf, int *nalloc);
3243 extern int cm_worker_MPIBcast(char *errbuf, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **abc, CM_t **ret_cm);
3244 extern int cm_nonconfigured_MPIUnpack(ESL_ALPHABET **abc, char *errbuf, char *buf, int n, int *pos, MPI_Comm comm, CM_t **ret_cm);
3245 extern int cm_nonconfigured_MPIPack(CM_t *cm, char *errbuf, char *buf, int n, int *pos, MPI_Comm comm);
3246 extern int cm_nonconfigured_MPIPackSize(CM_t *cm, MPI_Comm comm, int *ret_n);
3247 extern int cm_dsq_MPISend(ESL_DSQ *dsq, int64_t L, int64_t idx, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc);
3248 extern int cm_dsq_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_DSQ **ret_dsq, int64_t *ret_L, int64_t *ret_idx);
3249 extern int cm_parsetree_MPIPackSize(Parsetree_t *tr, MPI_Comm comm, int *ret_n);
3250 extern int cm_parsetree_MPIPack(Parsetree_t *tr, char *buf, int n, int *position, MPI_Comm comm);
3251 extern int cm_parsetree_MPIUnpack(char *buf, int n, int *pos, MPI_Comm comm, Parsetree_t **ret_tr);
3252 extern int cm_pipeline_MPISend(CM_PIPELINE *pli, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc);
3253 extern int cm_pipeline_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_GETOPTS *go, CM_PIPELINE **ret_pli);
3254 extern int cm_tophits_MPISend(CM_TOPHITS *th, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc);
3255 extern int cm_tophits_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, CM_TOPHITS **ret_th);
3256 extern int cm_alndata_MPISend(CM_ALNDATA *data, int include_sq, char *errbuf, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc);
3257 extern int cm_alndata_MPIUnpack(char *buf, int n, int *pos, MPI_Comm comm, ESL_ALPHABET *abc, CM_ALNDATA **ret_data);
3258 #endif
3259 
3260 /* from prior.c */
3261 extern Prior_t *Prior_Create(void);
3262 extern void     Prior_Destroy(Prior_t *pri);
3263 extern Prior_t *Prior_Read(FILE *fp);
3264 extern void     PriorifyCM(CM_t *cm, const Prior_t *pri);
3265 extern Prior_t *Prior_Default(int mimic_h3);
3266 extern Prior_t *Prior_Default_v0p56_through_v1p02(void);
3267 
3268 /* from rnamat.c */
3269 extern int numbered_nucleotide (char c);
3270 extern int numbered_basepair (char c, char d);
3271 extern FILE *MatFileOpen (char *matfile);
3272 extern fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp);
3273 extern int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa);
3274 extern int ribosum_calc_targets(fullmat_t *fullmat);
3275 extern void FreeMat(fullmat_t *fullmat);
3276 
3277 /* from stats.c */
3278 extern int        debug_print_expinfo_array(CM_t *cm, char *errbuf, ExpInfo_t **expA);
3279 extern int        debug_print_expinfo(ExpInfo_t *exp);
3280 extern int        get_gc_comp(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int start, int stop);
3281 extern int        get_alphabet_comp(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int start, int stop, float **ret_freq);
3282 extern int        GetDBSize (ESL_SQFILE *sqfp, char *errbuf, long *ret_N, int *ret_nseq, int *ret_namewidth);
3283 extern int        GetDBInfo(const ESL_ALPHABET *abc, ESL_SQFILE *sqfp, char *errbuf, long *ret_N, int *ret_nseq, double **ret_gc_ct);
3284 extern int        E2ScoreGivenExpInfo(ExpInfo_t *exp, char *errbuf, double E, float *ret_sc);
3285 extern int        P2ScoreGivenExpInfo(ExpInfo_t *exp, char *errbuf, double P, float *ret_sc);
3286 extern double     Score2E(float x, double mu, double lambda, double eff_dbsize);
3287 extern float      cm_p7_E2Score(double E, double Z, int hitlen, float mu, float lambda);
3288 extern float      cm_p7_P2Score(double P, float mu, float lambda);
3289 extern int        ExpModeIsLocal(int exp_mode);
3290 extern int        ExpModeIsInside(int exp_mode);
3291 extern ExpInfo_t *CreateExpInfo();
3292 extern void       SetExpInfo(ExpInfo_t *exp, double lambda, double mu_orig, double dbsize, int nrandhits, double tailp);
3293 extern ExpInfo_t *DuplicateExpInfo(ExpInfo_t *src);
3294 extern char      *DescribeExpMode(int exp_mode);
3295 extern int        UpdateExpsForDBSize(CM_t *cm, char *errbuf, double dbsize);
3296 extern int        CreateGenomicHMM(const ESL_ALPHABET *abc, char *errbuf, double **ret_sA, double ***ret_tAA, double ***ret_eAA, int *ret_nstates);
3297 extern int        SampleGenomicSequenceFromHMM(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, char *errbuf, double *sA, double **tAA, double **eAA, int nstates, int L, ESL_DSQ **ret_dsq);
3298 extern int        CopyExpInfo(ExpInfo_t *src, ExpInfo_t *dest);
3299 
3300 /* from truncyk.c */
3301 void  SetMarginalScores_reproduce_i27(CM_t *cm);
3302 float LeftMarginalScore_reproduce_i27(const ESL_ALPHABET *abc, float *esc, ESL_DSQ dres);
3303 float RightMarginalScore_reproduce_i27(const ESL_ALPHABET *abc, float *esc, ESL_DSQ dres);
3304 
3305 float TrCYK_DnC(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int pass_idx, int do_1p0, Parsetree_t **ret_tr);
3306 float TrCYK_Inside(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int pass_idx, int do_1p0, int lenCORREX, Parsetree_t **ret_tr);
3307 /* legacy, avoid use: */
3308 float trinside (CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0, int do_full,
3309                 void ****ret_shadow, void ****ret_L_shadow, void ****ret_R_shadow,
3310                 void ****ret_T_shadow, void ****ret_Lmode_shadow, void ****ret_Rmode_shadow,
3311                 int *ret_mode, int *ret_v, int *ret_i, int *ret_j);
3312 
3313 
3314 #endif /*INFERNALH_INCLUDED*/
3315 
3316 
3317