1 /* The all-encompassing include file for INFERNAL. 2 * All-encompassing because there's a lot of crossdependency. 3 * Previously (versions 1.0.2 and earlier) this 4 * was broken down into structs.h and funcs.h 5 * 6 * 1. Default values for various parameters, and other constants 7 * 2. Parsetree_t: binary tree structure for storing a traceback of an alignment 8 * 3. CMConsensus_t: consensus information on a CM 9 * 4. Fancyali_t: alignment of CM to a target sequence (largely replaced by CM_ALIDISPLAY) 10 * 5. CMEmitMap_t: map of model nodes to consensus positions. 11 * 6. deckpool_s: divide and conquer DP matrix state deck 12 * 7. GammaHitMx_t: semi-HMM used for optimal hit resolution of a CM scan 13 * 8. Prior_t: Dirichlet priors on all CM parameters 14 * 9. ExpInfo_t: exponential tail information for E-values 15 * 10. CP9_t: a CM plan 9 HMM 16 * 11. CP9_MX: a dynamic programming matrix for a CM Plan 9 HMM 17 * 12. CP9Bands_t: sequence and CM specific HMM bands 18 * 13. CP9trace_t: traceback structure for CP9 HMMs 19 * 14. CP9Map_t: map from a CM to a CP9 HMM and vice versa 20 * 15. CMSubMap_t: map of a template CM to a sub CM and vice versa 21 * 16. CMSubInfo_t: sub CM information, used for validating the sub CM construction procedure 22 * 17. RSEARCH constants 23 * 18. CM_MX: CM dynamic programming matrix; non-banded, non-truncated 24 * 19. CM_TR_MX: CM dynamic programming matrix; non-banded, truncated 25 * 20. CM_HB_MX: CM dynamic programming matrix; HMM banded, non-truncated 26 * 21. CM_TR_HB_MX: CM dynamic programming matrix; HMM banded, truncated 27 * 22. CM_SHADOW_MX: CM shadow matrix, for DP tracebacks; non-banded, non-truncated 28 * 23. CM_TR_SHADOW_MX: CM shadow matrix, for DP tracebacks; non-banded, truncated 29 * 24. CM_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, non-truncated 30 * 25. CM_TR_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, truncated 31 * 26. CM_EMIT_MX: CM emit matrix, info on PP of emitted residues; non-banded, non-truncated 32 * 27. CM_TR_EMIT_MX: CM emit matrix, info on PP of emitted residues; non-banded, truncated 33 * 28. CM_HB_EMIT_MX: CM emit matrix, info on PP of emitted residues; HMM banded, non-truncated 34 * 29. CM_TR_HB_EMIT_MX: CM emit matrix, info on PP of emitted residues; HMM banded, truncated 35 * 30. CM_QDBINFO: model specific QDB information, including 2 sets of bands 36 * 31. CM_SCAN_MX: matrices used for scanning CM DP algorithms; non-truncated 37 * 32. CM_TR_SCAN_MX: matrices used for scanning CM DP algorithms; truncated 38 * 33. CM_TR_PENALTIES: pass, state and locality-specific truncated alignment penalties 39 * 34. CM_t: a covariance model 40 * 35. CM_FILE: a CM save file or database, open for reading 41 * 36. CM_PLI_ACCT: pass specific statistics for a search/scan pipeline 42 * 37. CM_PIPELINE: the accelerated seq/profile comparison pipeline 43 * 38. CM_ALIDISPLAY: an alignment formatted for printing (replaces FancyAli_t) 44 * 39. CM_HIT: a hit between a CM and a sequence 45 * 40. CM_TOPHITS: ranked list of top-scoring hits 46 * 41. CM_P7_OM_BLOCK: block of P7_OPROFILEs and related info, for cmscan 47 * 42. CM_ALNDATA: information for alignment of a sequence to a CM 48 * 43. Routines in Infernal's exposed API 49 * 50 * Also, see impl_{sse,vmx}/impl_{sse,vmx}.h for additional API 51 * specific to the acceleration layer. 52 */ 53 #ifndef INFERNALH_INCLUDED 54 #define INFERNALH_INCLUDED 55 56 #include "esl_config.h" 57 #include "p7_config.h" 58 #include "config.h" 59 60 #include "easel.h" 61 #include "esl_alphabet.h" 62 #include "esl_mixdchlet.h" 63 #include "esl_random.h" 64 #include "esl_msa.h" 65 #include "esl_sq.h" 66 #include "esl_sqio.h" 67 68 #include "hmmer.h" 69 70 #ifdef HAVE_MPI 71 #include "mpi.h" 72 #endif 73 74 /*********************************************************************************** 75 * 1. Default values for various parameters, and other constant definitions. 76 ***********************************************************************************/ 77 78 #define DEFAULT_BETA_W 1E-7 79 #define DEFAULT_BETA_QDB1 1E-7 80 #define DEFAULT_BETA_QDB2 1E-15 81 #define DEFAULT_TAU 1E-7 82 #define DEFAULT_PBEGIN 0.05 /* EPN 06.29.07 (formerly 0.5) */ 83 #define DEFAULT_PEND 0.05 /* EPN 06.29.07 (formerly 0.5) */ 84 #define DEFAULT_ETARGET 0.59 /* EPN 07.10.07 (formerly (v0.7->v0.8)= 2.-0.54 = 1.46 */ 85 #define DEFAULT_ETARGET_HMMFILTER 0.38 /* EPN 04.16.12 */ 86 #define DEFAULT_NULL2_OMEGA 0.000015258791 /* 1/(2^16), the hard-coded prior probability of the null2 model */ 87 #define DEFAULT_NULL3_OMEGA 0.000015258791 /* 1/(2^16), the hard-coded prior probability of the null3 model */ 88 #define V1P0_NULL2_OMEGA 0.03125 /* 1/(2^5), the prior probability of the null2 model for v0.56->v1.0.2 */ 89 #define V1P0_NULL3_OMEGA 0.03125 /* 1/(2^5), the prior probability of the null3 model for v0.56->v1.0.2 */ 90 #define DEFAULT_EL_SELFPROB 0.94 91 #define DEFAULT_MAXTAU 0.1 /* default cm->maxtau, max allowed tau value during HMM band tightening */ 92 #define DEFAULT_CP9BANDS_THRESH1 0.01 /* default for CP9Bands_t thresh1, if occ[k] > thresh1 HMM posn k 'maybe used' */ 93 #define DEFAULT_CP9BANDS_THRESH2 0.98 /* default for CP9Bands_t thresh2, if occ[k] > thresh2 HMM posn k 'likely used' */ 94 #define DEFAULT_HB_MXSIZE_MAX_MB 512. /* maximum for auto-determined maximum HMM banded matrix size */ 95 #define DEFAULT_HB_MXSIZE_MAX_W 3000. /* a CM window size (cm->W) of this or higher results in max matrix size (DEFAULT_HB_MXSIZE_MAX_MB) */ 96 #define DEFAULT_HB_MXSIZE_MIN_MB 128. /* minimum for auto-determined maximum HMM banded matrix size */ 97 #define DEFAULT_HB_MXSIZE_MIN_W 1000. /* a CM window size (cm->W) of this or lower results in min matrix size (DEFAULT_HB_MXSIZE_MIN_MB) */ 98 99 /* Hard-coded values (not changeable by command-line options). 100 * All of these are related to HMM band tightening to reduce 101 * the required size of DP matrices for alignments to below 102 * a maximum limit. 103 */ 104 #define TAU_MULTIPLIER 2.0 /* value to multiply tau by during HMM band tightening */ 105 #define MAX_CP9BANDS_THRESH1 0.50 /* maximum value allowed for cp9b->thresh1 during HMM band tightening */ 106 #define MIN_CP9BANDS_THRESH2 0.75 /* minimum value allowed for cp9b->thresh2 during HMM band tightening */ 107 #define DELTA_CP9BANDS_THRESH1 0.02 /* value to increment cp9b->thresh1 by during HMM band tightening */ 108 #define DELTA_CP9BANDS_THRESH2 0.01 /* value to decrement cp9b->thresh2 by during HMM band tightening */ 109 110 /* number of possible integer GC contents, example 40 = 0.40 GC */ 111 #define GC_SEGMENTS 101 112 113 /* length of a sequence 'chunk' that gets processed in cmsearch/cmscan 114 * sequences larger than this get broken up into overlapping chunks of this size. 115 */ 116 #define CM_MAX_RESIDUE_COUNT 100000 /* differs from HMMER's default which is MAX_RESIDUE_COUNT from esl_sqio_(ascii|ncbi).c */ 117 118 /* P7 HMM E-value parameters, borrowed from HMMER, but with 2 new additions: CM_p7_GMU and CM_p7_GLAMBDA */ 119 #define CM_p7_NEVPARAM 8 /* number of statistical parameters stored in cm->p7_evparam */ 120 enum cm_p7_evparams_e {CM_p7_LMMU = 0, CM_p7_LMLAMBDA = 1, CM_p7_LVMU = 2, CM_p7_LVLAMBDA = 3, CM_p7_LFTAU = 4, CM_p7_LFLAMBDA = 5, CM_p7_GFMU = 6, CM_p7_GFLAMBDA = 7 }; 121 #define CM_p7_EVPARAM_UNSET -99999.0f /* if p7_evparam[0] is unset, then all unset */ 122 123 /* BE_EFFICIENT and BE_PARANOID are alternative (exclusive) settings 124 * for the do_full? argument to the alignment engines. 125 */ 126 #define BE_EFFICIENT 0 /* setting for do_full: small memory mode */ 127 #define BE_PARANOID 1 /* setting for do_full: keep whole matrix, perhaps for debugging */ 128 129 /* We're moderately paranoid about underflow and overflow errors, so 130 * we do some checking on the magnitude of the scores. 131 * 132 * IMPOSSIBLE, the "-infinity" value in a DP matrix must be > -FLT_MAX/3, so that 133 * we can add three of them together (alpha + tsc + esc) and not get an 134 * underflow error. ANSI guarantees us FLT_MAX >= 1e37. 135 * 136 * MAXSCOREVAL is the maximum value we allow in any alpha cell, tsc, or esc. 137 * 138 * IMPROBABLE must be > IMPOSSIBLE + 2* MAXSCOREVAL; such that adding 139 * any two valid score values (say, an alpha and a tsc) to IMPOSSIBLE 140 * gives us a number < IMPROBABLE, and we can reset it to IMPOSSIBLE. 141 * 142 * NOT_IMPOSSIBLE() exists because we can't compare floating point by 143 * equality. 144 * 145 * sreLOG2() exists because we want to work in bits, and we will need 146 * to take log(0). 147 */ 148 #define IMPOSSIBLE -1e36 149 #define MAXSCOREVAL 1e35 150 #define IMPROBABLE -5e35 151 #define NOT_IMPOSSIBLE(x) ((x) > -9.999e35) 152 #define NOT_IMPROBABLE(x) ((x) > -4.999e35) 153 #define sreLOG2(x) ((x) > 0 ? log(x) * 1.44269504 : IMPOSSIBLE) 154 #define sreEXP2(x) (exp((x) * 0.69314718 )) 155 #define epnEXP10(x) (exp((x) * 2.30258509 )) 156 #define NOTZERO(x) (fabs(x - 0.) > -1e6) 157 #define INFTY 987654321 /* infinity for purposes of integer DP cells */ 158 159 /* EPN, Fri Sep 7 16:49:43 2007 160 * From HMMER3's p7_config.h: 161 * 162 * Sean's notes (verbatim): 163 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 164 * In Forward algorithm implementations, we use a table lookup in 165 * p7_FLogsum() to calculate summed probabilities in log 166 * space. p7_INTSCALE defines the precision of the calculation; the 167 * default of 1000.0 means rounding differences to the nearest 0.001 168 * nat. p7_LOGSUM_TBL defines the size of the lookup table; the 169 * default of 16000 means entries are calculated for differences of 0 170 * to 16.000 nats (when p7_INTSCALE is 1000.0). e^{-p7_LOGSUM_TBL / 171 * p7_INTSCALE} should be on the order of the machine FLT_EPSILON, 172 * typically 1.2e-7. 173 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 174 * EPN: Infernal uses bits, not nats. 1.2e-7 =~ 2^-23 =~ e^-16. 175 * And I've removed the p7_ prefixes. 176 */ 177 #define INTSCALE 1000.0f 178 #define LOGSUM_TBL 23000 179 180 enum emitmode_e { 181 EMITLEFT = 0, 182 EMITRIGHT = 1, 183 EMITPAIR = 2, 184 EMITNONE = 3 185 }; 186 #define nEMITMODES 4 187 188 189 /* CM State types. (cm->sttype[]) 190 */ 191 #define MAXCONNECT 6 /* maximum number of states per node */ 192 193 #define D_st 0 /* delete */ 194 #define MP_st 1 /* match-pair */ 195 #define ML_st 2 /* match-left */ 196 #define MR_st 3 /* match-right */ 197 #define IL_st 4 /* insert-left */ 198 #define IR_st 5 /* insert-right */ 199 #define S_st 6 /* start */ 200 #define E_st 7 /* end */ 201 #define B_st 8 /* bifurcation */ 202 #define EL_st 9 /* local end */ 203 204 /* CM Node types (8) (cm->ndtype[]) 205 */ 206 #define NODETYPES 8 207 208 #define DUMMY_nd -1 209 #define BIF_nd 0 210 #define MATP_nd 1 211 #define MATL_nd 2 212 #define MATR_nd 3 213 #define BEGL_nd 4 214 #define BEGR_nd 5 215 #define ROOT_nd 6 216 #define END_nd 7 217 218 /* CM Unique state identifiers (cm->stid[]) 219 */ 220 #define UNIQUESTATES 21 221 222 #define DUMMY -1 223 #define ROOT_S 0 224 #define ROOT_IL 1 225 #define ROOT_IR 2 226 #define BEGL_S 3 227 #define BEGR_S 4 228 #define BEGR_IL 5 229 #define MATP_MP 6 230 #define MATP_ML 7 231 #define MATP_MR 8 232 #define MATP_D 9 233 #define MATP_IL 10 234 #define MATP_IR 11 235 #define MATL_ML 12 236 #define MATL_D 13 237 #define MATL_IL 14 238 #define MATR_MR 15 239 #define MATR_D 16 240 #define MATR_IR 17 241 #define END_E 18 242 #define BIF_B 19 243 #define END_EL 20 244 245 /* Flags used in InsertTraceNode() 246 */ 247 #define TRACE_LEFT_CHILD 1 248 #define TRACE_RIGHT_CHILD 2 249 250 /* Flags used to define PDA moves, 251 * in display.c and emit.c (if not elsewhere) 252 * 253 */ 254 #define PDA_RESIDUE 0 255 #define PDA_STATE 1 256 #define PDA_MARKER 2 257 258 /************************************************************************************* 259 * 2. Parsetree_t: binary tree structure for storing a traceback of an alignment. 260 *************************************************************************************/ 261 262 /* Structure: Parsetree_t 263 * Incept: SRE 29 Feb 2000 [Seattle] 264 * 265 * Binary tree structure for storing a traceback of an alignment. 266 * 267 * Also used for tracebacks of model constructions. Then, 268 * "state" is misused for a node (not state) index. 269 * 270 * Example of a traceback (from ParsetreeDump(), from a tRNA 271 * model: 272 * 273 * > DF6280 274 * idx emitl emitr state nxtl nxtr prv tsc esc 275 * ----- ------ ------ ------- ----- ----- ----- ----- ----- 276 * 0 1 74 0S 1 -1 -1 -0.58 0.00 277 * 1 1 74A 3MR 2 -1 0 -0.74 0.41 278 * 2 1G 73C 6MP 3 -1 1 -0.87 1.58 279 * ...<snip>... 280 * 11 10 66 54B 12 43 10 0.00 0.00 281 * 12 10 44 124S 13 -1 11 0.00 0.00 282 * 13 10 44 125B 14 28 12 0.00 0.00 283 * ...<snip>... 284 * 60 61U 61 120ML 61 -1 59 -0.22 0.87 285 * 61 -1 -1 123E -1 -1 60 0.00 0.00 286 * ----- ------ ------ ------- ----- ----- ----- ----- ----- 287 * 288 * That is, emitl and emitr are always valid and always represent 289 * the bounds of the subsequence accounted for by the parse 290 * subtree rooted at this state. (Except for end states, which 291 * are -1,-1). nxtl is always a valid state (again except for E 292 * states, which are -1. nxtr is only != -1 for bifurcation states. 293 * 294 * For reasons of malloc() efficiency, the binary tree is organized 295 * in a set of arrays. 296 */ 297 typedef struct parsetree_s { 298 int *emitl; /* i position in seq or ali (1..L or alen) */ 299 int *emitr; /* j position in seq or ali (1..L or alen) */ 300 int *state; /* y of state (0..M-1) */ 301 char *mode; /* mode of state, used in marginal alignment * 302 * (TRMODE_J, TRMODE_L, TRMODE_R or TRMODE_T) */ 303 304 int *nxtl; /* index in trace of left child */ 305 int *nxtr; /* index in trace of right child */ 306 int *prv; /* index in trace of parent */ 307 308 int n; /* number of elements in use so far */ 309 int nalloc; /* number of elements allocated for */ 310 int memblock; /* size of malloc() chunk, # of elems */ 311 int is_std; /* TRUE if parsetree was determined using standard CYK/optacc, 312 * FALSE if parsetree was determined using truncated CYK/optacc */ 313 int pass_idx; /* pipeline pass the hit the parsetree is for was found in */ 314 float trpenalty; /* truncated alignment score penalty, 0.0 if is_std is TRUE. */ 315 } Parsetree_t; 316 317 /* Special flags for use in shadow (traceback) matrices, instead of 318 * offsets to connected states. When yshad[0][][] is USED_LOCAL_BEGIN, 319 * the b value returned by inside() is the best connected state (a 0->b 320 * local entry). When yshad[v][][] is USED_EL, there is a v->EL transition 321 * and the remaining subsequence is aligned to the EL state. 322 */ 323 #define USED_LOCAL_BEGIN 101 324 #define USED_EL 102 325 #define USED_TRUNC_BEGIN 103 326 #define USED_TRUNC_END 104 327 328 /* Constants for alignment truncation modes, used during alignment 329 * traceback. We use TRMODE{J,L,R}_OFFSET as a way of determining the 330 * marginal alignment mode solely from the yoffset stored in the 331 * {J,L,R}shadow matrices, by adding the appropriate offset to yoffset 332 * depending on the truncation mode. A crucial fact is that yoffset 333 * ranges from 0..MAXCONNECT-1 for normal states, but it can also be 334 * USED_LOCAL_BEGIN, USED_EL, USED_TRUNC_BEGIN and USED_TRUNC_END, so 335 * we have to make sure that adding 0..MAXCONNECT-1 to any of the 336 * TRMODE_{J,L,R}_OFFSET values does not add up to USED_LOCAL_BEGIN, 337 * USED_EL, USED_TRUNC_BEGIN, USED_TRUNC_END (defined above). And 338 * remember that these values have to be able to be stored in a char 339 * (must be 0..255). 340 */ 341 #define TRMODE_UNKNOWN 4 342 #define TRMODE_J 3 343 #define TRMODE_L 2 344 #define TRMODE_R 1 345 #define TRMODE_T 0 346 #define TRMODE_J_OFFSET 0 347 #define TRMODE_L_OFFSET 10 348 #define TRMODE_R_OFFSET 20 349 350 351 /************************************************************************************* 352 * 3. CMConsensus_t: consensus information on a CM 353 *************************************************************************************/ 354 355 /* Structure: CMConsensus_t 356 * Incept: SRE, Thu May 23 16:55:04 2002 [St. Louis] 357 * 358 * Created by display.c:CreateCMConsensus(). 359 * Preprocesses a CM into consensus information that is needed by 360 * display.c:CreateFancyAli(). 361 * 362 * ct[x]: Zuker-style ct map. 363 * indicates the pairing partner for consensus position ct[x]. 364 * x can be 0..clen-1 365 * ct[x] is -1 (no partner) or 0..clen-1 (coord of partner) 366 * 367 * (lpos, rpos may be redundant w/ CMEmitMap_t now.) 368 * (off-by-one w.r.t. CMEmitMap_t; 1..clen is better) 369 */ 370 typedef struct consensus_s { 371 char *cseq; /* consensus sequence display string; 0..clen-1 */ 372 char *cstr; /* consensus structure display string; 0..clen-1 */ 373 int *ct; /* Zuker-style ct pairing map; [0..clen-1] */ 374 int *lpos; /* maps node->consensus position; 0..nodes-1 */ 375 int *rpos; /* maps node->consensus position; 0..nodes-1 */ 376 int clen; /* length of cseq, cstr */ 377 } CMConsensus_t; 378 379 /************************************************************************************* 380 * 4. Fancyali_t: alignment of CM to a target sequence (largely replaced by CM_ALIDISPLAY) 381 *************************************************************************************/ 382 383 /* Structure: Fancyali_t 384 * Incept: SRE, May 2002 [St. Louis] 385 * 386 * See display.c:CreateFancyAli(). 387 * An alignment of a CM to a target sequence, formatted for display. 388 */ 389 typedef struct fancyali_s { 390 char *rf; /* reference annotation line (NULL if unavail) */ 391 char *cstr; /* CM consensus structure line */ 392 char *cseq; /* CM consensus sequence line */ 393 char *mid; /* alignment identity middle line */ 394 char *top; /* optional, non-compensatory 'x' top line */ 395 char *aseq; /* aligned target sequence */ 396 char *pcode; /* aligned posteriors 'ones' place (9 in 93) */ 397 int *scoord; /* coords 1..L for aligned dsq chars */ 398 int *ccoord; /* coords 1..clen for aligned consensus chars */ 399 int len; /* len of the strings above */ 400 int cfrom, cto; /* max bounds in ccoord */ 401 int sqfrom, sqto; /* max bounds in scoord */ 402 403 char *hmmname; /* name of HMM */ 404 char *hmmacc; /* accession of HMM; or [0]='\0' */ 405 char *hmmdesc; /* description of HMM; or [0]='\0' */ 406 407 } Fancyali_t; 408 409 410 /************************************************************************************* 411 * 5. CMEmitMap_t: map of model nodes to consensus positions. 412 *************************************************************************************/ 413 414 /* Structure: CMEmitMap_t 415 * Incept: SRE, Thu Aug 8 12:47:49 2002 [St. Louis] 416 * 417 * Maps model nodes to consensus positions. 418 * Consensus positions are indexed 1..clen. 419 * Each array (lpos, rpos, epos) is 0..nodes-1. 420 * Residues from an MP go into lpos and rpos in the consensus. 421 * Residues from an IL follow lpos. 422 * Residues from an IR precede rpos. 423 * Residues from an EL follow epos[nd] for the nd that went to EL. 424 * For nonemitters, rpos and lpos are a non-inclusive bound: for 425 * example, rpos[0], lpos[0] are 0,clen+1. 426 * There are no dummy values; all rpos, lpos, epos are valid coords 427 * 0..clen+1 in the consensus. 428 * 429 * See emitmap.c for implementation and more documentation. 430 */ 431 typedef struct emitmap_s { 432 int *lpos; /* left bound of consensus for subtree under nd */ 433 int *rpos; /* right bound of consensus for subtree under nd */ 434 int *epos; /* EL inserts come after this consensus pos */ 435 int clen; /* consensus length */ 436 } CMEmitMap_t; 437 438 /************************************************************************************* 439 * 6. deckpool_s: divide and conquer DP matrix state deck. 440 *************************************************************************************/ 441 442 struct deckpool_s { 443 float ***pool; 444 int n; 445 int nalloc; 446 int block; 447 }; 448 449 /************************************************************************************* 450 * 7. GammaHitMx_t: semi-HMM used for optimal hit resolution of a CM scan. 451 *************************************************************************************/ 452 453 typedef struct gammahitmx_s { 454 int L; /* length of sequence, arrays are size L+1 */ 455 float *mx; /* [0..L] SHMM DP matrix for optimum nonoverlap resolution */ 456 int *gback; /* [0..L] traceback pointers for SHMM */ 457 float *savesc; /* [0..L] saves score of hit added to best parse at j */ 458 int *saver; /* [0..L] saves initial non-ROOT state of best parse ended at j */ 459 int *savemode; /* [0..L] saves mode best parse ended at j (TRMODE_J | TRMODE_L | TRMODE_R | TRMODE_T) */ 460 float cutoff; /* minimum score to report */ 461 int i0; /* position of first residue in sequence, in actual sequence 462 * coords (gamma->mx[0] corresponds to this residue) 463 */ 464 } GammaHitMx_t; 465 466 467 /************************************************************************************* 468 * 8. Prior_t: Dirichlet priors on all CM parameters. 469 *************************************************************************************/ 470 471 typedef struct { 472 /* transition priors */ 473 int tsetnum; /* number of transition sets to read in */ 474 int tsetmap[UNIQUESTATES][NODETYPES]; /* tsetmap[a][b] is for transition set from ustate a to node b */ 475 ESL_MIXDCHLET **t; /* array of transition priors, 0..tsetnum-1 */ 476 477 /* emission priors */ 478 ESL_MIXDCHLET *mbp; /* consensus base pair emission prior */ 479 ESL_MIXDCHLET *mnt; /* consensus singlet emission prior */ 480 ESL_MIXDCHLET *i; /* nonconsensus singlet emission prior */ 481 482 /* bookkeeping */ 483 int maxnq; /* maximum # of components in any prior */ 484 int maxnalpha; /* maximum # of parameters in any prior */ 485 } Prior_t; 486 487 /************************************************************************************* 488 * 10. ExpInfo_t: exponential tail information for E-values 489 *************************************************************************************/ 490 491 /* Info on an exponential tail that describes score distribution in 492 * random sequence of a given algorithm, model configuration (can be 1 493 * of 4 modes, local/glocal of cyk or inside). Fit in cmcalibrate and 494 * stored in the cm file. All values with the sole exception of 495 * <cur_eff_dbsize> are never changed once read, and some of the 496 * params are actually unnecessary downstream of cmcalibrate, but are 497 * potentially informative to the user. 498 */ 499 typedef struct expinfo_s { 500 double cur_eff_dbsize;/* the total number of possible hits we expect for current database search, 501 * cur_eff_dbsize = (current dbsize) / <dbsize> * <nrandhits>*/ 502 double lambda; /* scale param exponential tail */ 503 double mu_extrap; /* offset/location param for exponential tail extrapolated to include all <nrandhits> from cmcalibrate, 504 * mu_corrected = mu_orig - log(1./tailp) / lambda */ 505 double mu_orig; /* offset/location param for exponential tail's original fit to tailp of rand seq score histogram in cmcalibrate */ 506 double dbsize; /* db size in residues that was used in cmcalibrate */ 507 int nrandhits; /* total number of hits in random sequence database in cmcalibrate */ 508 double tailp; /* fractional tail mass threshold for hit histogram in random sequences in cmcalibrate */ 509 int is_valid; /* TRUE if this expinfo_s object is valid (it's parameters have been set), FALSE if not */ 510 } ExpInfo_t; 511 512 /* Exponential tail statistics modes, a different exp tail fit exists for each mode 513 * 0..EXP_NMODES-1 are first dimension of cm->expA 514 * order is important, to cmcalibrate at least 515 */ 516 #define EXP_CM_GC 0 517 #define EXP_CM_GI 1 518 #define EXP_CM_LC 2 519 #define EXP_CM_LI 3 520 #define EXP_NMODES 4 521 522 /*********************************************************************************** 523 * 11. CP9_t: a CM Plan 9 HMM 524 ***********************************************************************************/ 525 526 /* Declaration of a CM Plan 9 profile-HMM structure. Modified from a 527 * HMMER2 plan 7 (with (hopefully) minimal change) to mirror a CM as 528 * closely as possible. 529 * 530 * The model has two forms: 531 * 1. The "core" model has 0..M nodes, node 0 is special, its "match" state 532 * is really state B (which is forced silent by having hmm->mat[0] = NULL and 533 * hmm->msc[0] = NULL), its "insert" state is really state N (with emission 534 * probs hmm->ins[0]), and it has NO DELETE STATE. 535 * 536 * hmm->t[0][CTMM]: 0. (B->M_1 transition is hmm->begin[1]) 537 * hmm->t[0][CTMI]: transition from B to N (I_0); 538 * hmm->t[0][CTMD]: transition from B to D_1; 539 * hmm->t[0][CTME]: null (transition from B to an EL state is impossible) 540 * hmm->t[0][CTIM]: transition from N to M_1; 541 * hmm->t[0][CTII]: N self transition; 542 * hmm->t[0][CTID]: N -> D_1 543 * hmm->t[0][CTDM]: null 544 * hmm->t[0][CTDI]: null 545 * hmm->t[0][CTDD]: null 546 * 547 * t[0..M] are the state transition probs. t[k][CTME] is an 548 * end-local probability, the EL states can only be reached by a 549 * subset of match states, this probability is -INFTY for states 550 * that can't reach the EL. 551 * 552 * t[M] are special, because this node transits to the end (E 553 * state). The E state is (sort-of) treated as match state M+1, as 554 * t[M][CTIM] is the transition from I_M to E, t[M][CTDM] is the 555 * transition from D_M to E. However, t[M][CTMM] is always 0.0, 556 * the transition from M_M to E is end[hmm->M]; t[M][CTMD], 557 * t[M][CTDD], t[M][CTDI] are set as 0.0. 558 * 559 * mat[1..M] are match emission probs. 560 * ins[0..M] are insert emission probs. (ins[0] is state N emission probs) 561 * 562 * The CM_PLAN9_HASPROB flag is up when these all correspond to a fully normalized 563 * profile HMM. 564 * 565 * 2. The "logoddsified" model is the configured model, converted to 566 * integer score form and ready for alignment algorithms. 567 * bsc, esc scores correspond to begin, and end probabilities. 568 * 569 * The CPLAN9_HASBITS flag is up when both of these are ready for 570 * alignment. 571 * 572 */ 573 typedef struct cplan9_s { 574 /* The main model in probability form: data-dependent probabilities. 575 * Transition probabilities are usually accessed as a 576 * two-D array: hmm->t[k][CTMM], for instance. They are allocated 577 * such that they can also be stepped through in 1D by pointer 578 * manipulations, for efficiency in DP algorithms. 579 * CPLAN9_HASPROB flag is raised when these probs are all valid. 580 */ 581 const ESL_ALPHABET *abc; /* pointer to the alphabet, usually points to cm->abc */ 582 int M; /* length of the model (# nodes) +*/ 583 float **t; /* transition prob's. t[0..M][0..9] +*/ 584 float **mat; /* match emissions. mat[1..M][0..3] +*/ 585 float **ins; /* insert emissions. ins[0..M][0..3] +*/ 586 587 /* The unique states of CM Plan 9 in probability form. 588 * These are the algorithm-dependent, data-independent probabilities. 589 * Some parts of the code may briefly use a trick of copying tbd1 590 * into begin[0]; this makes it easy to call FChoose() or FNorm() 591 * on the resulting vector. However, in general begin[0] is not 592 * a valid number. 593 */ 594 float *begin; /* 1..M B->M state transitions +*/ 595 float *end; /* 1..M M->E state transitions (!= a dist!) +*/ 596 597 /* The model's log-odds score form. 598 * These are created from the probabilities by CP9Logoddsify(). 599 * By definition, null[] emission scores are all zero. 600 * Note that emission distributions are over possible alphabet symbols, 601 * not just the unambiguous DNA alphabet: we 602 * precalculate the scores for all IUPAC degenerate symbols we 603 * may see. 604 * 605 * Note the reversed indexing on msc, isc, tsc -- for efficiency reasons. 606 * They're not probability vectors any more so we can reorder them 607 * without wildly complicating our life. 608 * 609 * The _mem ptrs are where the real memory is alloc'ed and free'd, 610 * as opposed to where it is accessed. 611 * This came in with Erik Lindahl's altivec port; it allows alignment on 612 * 16-byte boundaries. In the non-altivec code, this is just a little 613 * redundancy; tsc and tsc_mem point to the same thing, for example. 614 * 615 * The otsc are reordered transition scores [0..k..M][0..cp9O_NTRANS-1], 616 * for efficiency in DP calculations. This reordering is based on HMMER3. 617 * 618 * CPLAN9_HASBITS flag is up when these scores are valid. 619 */ 620 int **tsc; /* transition scores [0.9][0.M] +*/ 621 int **msc; /* match emission scores [0.Kp-1][1.M] +*/ 622 int **isc; /* ins emission scores [0.Kp-1][0.M] +*/ 623 int *bsc; /* begin transitions [1.M] +*/ 624 int *esc; /* end transitions [1.M] +*/ 625 int *tsc_mem, *msc_mem, *isc_mem, *bsc_mem, *esc_mem; 626 int *otsc; /* transition scores [0..M][0..9], special ordering */ 627 628 /* The null model probabilities. 629 */ 630 float *null; /* "random sequence" emission prob's +*/ 631 float null2_omega; /* prior probability of null2 model, copied from CM */ 632 float null3_omega; /* prior probability of null3 model, copied from CM */ 633 float p1; /* null model loop probability +*/ 634 /* local end, EL state parameters */ 635 float el_self; /* EL transition self loop probability */ 636 int el_selfsc; /* EL transition self loop score */ 637 int *has_el; /* has_el[k] is TRUE if node k has an EL state */ 638 int *el_from_ct; /* [0..M+1] el_from_ct[k] is the number of HMM nodes kp 639 * where a transition from kp's EL state to k's 640 * match state is valid. */ 641 int **el_from_idx; /* [0..M+1][] el_from_idx[k] is an array of 642 * size el_from_ct[k] each element is a node 643 * kp where a transition from kp's EL state 644 * to k's match state is allowed */ 645 int **el_from_cmnd; /* [0..M+1][] el_from_cmnd[k] is an array of 646 * size el_from_ct[k] element i is the CM 647 * node that the EL transition to k to 648 * el_from_idx[k][i] corresponds with, used 649 * only for building alignments from traces. */ 650 int flags; /* bit flags indicating state of HMM, valid data +*/ 651 } CP9_t; 652 653 /* Flag codes for cplan9->flags. 654 */ 655 #define CPLAN9_HASBITS (1<<0) /* raised if model has log-odds scores */ 656 #define CPLAN9_HASPROB (1<<1) /* raised if model has probabilities */ 657 #define CPLAN9_LOCAL_BEGIN (1<<2) /* raised if model has local begins turned on */ 658 #define CPLAN9_LOCAL_END (1<<3) /* raised if model has S/W local ends turned on */ 659 #define CPLAN9_EL (1<<4) /* raised if model has EL local ends turned on */ 660 661 /* Indices for CM Plan9 main model state transitions. 662 * Used for indexing hmm->t[k][] 663 * mnemonic: Cm plan 9 Transition from Match to Match = CTMM 664 */ 665 #define CTMM 0 666 #define CTMI 1 667 #define CTMD 2 668 #define CTMEL 3 669 #define CTIM 4 670 #define CTII 5 671 #define CTID 6 672 #define CTDM 7 673 #define CTDI 8 674 #define CTDD 9 675 #define cp9_NTRANS 10 676 677 #define cp9_TRANS_MATCH_OFFSET 0 /* hmm->t[k][0] is first transition out of match */ 678 #define cp9_TRANS_INSERT_OFFSET 4 /* hmm->t[k][4] is first transition out of insert */ 679 #define cp9_TRANS_DELETE_OFFSET 7 /* hmm->t[k][7] is first transition out of delete */ 680 #define cp9_TRANS_NMATCH 4 /* there are 4 transitions out of match */ 681 #define cp9_TRANS_NINSERT 3 /* there are 3 transitions out of insert */ 682 #define cp9_TRANS_NDELETE 3 /* there are 3 transitions out of delete */ 683 684 /* Indices for transition scores tsc[k][] */ 685 /* order is optimized for dynamic programming */ 686 enum cp9o_tsc_e { 687 cp9O_MM = 0, 688 cp9O_IM = 1, 689 cp9O_DM = 2, 690 cp9O_BM = 3, 691 cp9O_MI = 4, 692 cp9O_II = 5, 693 cp9O_DI = 6, 694 cp9O_MD = 7, 695 cp9O_ID = 8, 696 cp9O_DD = 9, 697 cp9O_ME =10, 698 cp9O_MEL=11 699 }; 700 #define cp9O_NTRANS 12 701 702 /* used by CM Plan 9 HMM structures */ 703 #define HMMMATCH 0 704 #define HMMINSERT 1 705 #define HMMDELETE 2 706 #define NHMMSTATETYPES 3 707 708 /*********************************************************************************** 709 * 12. CP9_MX: a dynamic programming matrix for a CM Plan 9 HMM 710 ***********************************************************************************/ 711 712 /* Declaration of CM Plan9 dynamic programming matrix structure. 713 */ 714 typedef struct cp9_mx_s { 715 int **mmx; /* match scores [0.1..N][0..M] */ 716 int **imx; /* insert scores [0.1..N][0..M] */ 717 int **dmx; /* delete scores [0.1..N][0..M] */ 718 int **elmx; /* end local scores [0.1..N][0..M] */ 719 int *erow; /* score for E state [0.1..N] */ 720 /* Hidden ptrs where the real memory is kept */ 721 int *mmx_mem, *imx_mem, *dmx_mem, *elmx_mem; 722 723 int M; /* number of nodes in HMM this mx corresponds to, never changes */ 724 int rows; /* generally L or 2, # of DP rows in seq dimension, where L is length of seq, 725 * == 2 if we're scanning in mem efficient mode, 726 * never shrinks, but can increase to 'grow' the matrix 727 */ 728 float size_Mb; /* current size of matrix in Megabytes */ 729 730 /* variables added for HMMER3 p7 HMM banding of CP9 HMM dp algorithms */ 731 int *kmin; /* OPTIONAL (can be null) [0.1..i..rows] = k, minimum node for residue i is k */ 732 int *kmax; /* OPTIONAL (can be null) [0.1..i..rows] = k, maximum node for residue i is k */ 733 int ncells_allocated; /* number of cells allocated in matrix */ 734 int ncells_valid; /* number of cells currently valid in the matrix */ 735 736 } CP9_MX; 737 738 739 /************************************************************************************* 740 * 13. CP9Bands_t: sequence and CM specific HMM bands. 741 *************************************************************************************/ 742 743 /* Structure: CP9Bands_t 744 * Incept: EPN, 10.27.06 745 * 746 * CP9 HMM bands and the resulting CM bands for HMM banded 747 * CYK (or Inside or Outside) algorithms. 748 * 749 */ 750 typedef struct cp9bands_s { 751 int hmm_M; /* Number of nodes in CP9 HMM */ 752 int cm_M; /* Number of nodes in CM, the CP9 HMM was built from */ 753 754 /* data structures for hmm bands (bands on the hmm states) */ 755 int *pn_min_m; /* HMM band: minimum position node k match state will emit */ 756 int *pn_max_m; /* HMM band: maximum position node k match state will emit */ 757 int *pn_min_i; /* HMM band: minimum position node k insert state will emit */ 758 int *pn_max_i; /* HMM band: maximum position node k insert state will emit */ 759 int *pn_min_d; /* HMM band: minimum position that was emitted prior to entering 760 * node k delete state */ 761 int *pn_max_d; /* HMM band: maximum position that was emitted prior to entering 762 * node k delete state */ 763 int *isum_pn_m; /* [0..k..hmm_M] sum over i of log post probs from post->mmx[i][k]*/ 764 int *isum_pn_i; /* [0..k..hmm_M] sum over i of log post probs from post->imx[i][k]*/ 765 int *isum_pn_d; /* [0..k..hmm_M] sum over i of log post probs from post->dmx[i][k]*/ 766 767 /* Predicted first and final consensus positions that might be 768 * (sp1/ep1) and are likely to be (sp2/ep2) involved in the parse of 769 * the sequence based on the HMM posterior probabilities. These are 770 * used to determine what types of marginal alignments should be 771 * allowed from each state (the {J,L,R,T}valid arrays) 772 */ 773 int sp1; /* minimum cpos for which occupancy probability exceeds thresh1, first 'maybe used' */ 774 int ep1; /* maximum cpos for which occupancy probability exceeds thresh1, final 'maybe used' */ 775 int sp2; /* minimum cpos for which occupancy probability exceeds thresh2, first 'likely used' */ 776 int ep2; /* maximum cpos for which occupancy probability exceeds thresh2, final 'likely used' */ 777 778 float thresh1; /* probability threshold for sp1, ep1 (typically 0.01), 'maybe used' */ 779 float thresh2; /* probability threshold for sp2, ep2 (typically 0.98), 'likely used' */ 780 781 int Rmarg_imin; /* for Right marginal alignments, minimum target sequence position that can align to CM as i */ 782 int Rmarg_imax; /* for Right marginal alignments, maximum target sequence position that can align to CM as i */ 783 int Lmarg_jmin; /* for Left marginal alignments, minimum target sequence position that can align to CM as j */ 784 int Lmarg_jmax; /* for Left marginal alignments, maximum target sequence position that can align to CM as j */ 785 786 /* {J,L,R,T}valid [0..cm_M] for trCYK/trInside/trOutside: are {J,L,R,T} do DP matrix cells exist for state v? */ 787 int *Jvalid; /* [0..v..cm_M] TRUE to calculate J DP matrix cells for state v, FALSE not to */ 788 int *Lvalid; /* [0..v..cm_M] TRUE to calculate L DP matrix cells for state v, FALSE not to */ 789 int *Rvalid; /* [0..v..cm_M] TRUE to calculate R DP matrix cells for state v, FALSE not to */ 790 int *Tvalid; /* [0..v..cm_M] TRUE to calculate T DP matrix cells for state v, FALSE not to */ 791 792 /* arrays for CM state bands, derived from HMM bands */ 793 int *imin; /* [0..cm_M-1] imin[v] = first position in band on i for state v*/ 794 int *imax; /* [0..cm_M-1] imax[v] = last position in band on i for state v*/ 795 int *jmin; /* [0..cm_M-1] jmin[v] = first position in band on j for state v*/ 796 int *jmax; /* [0..cm_M-1] jmax[v] = last position in band on j for state v*/ 797 int **hdmin; /* [0..cm_M-1][0..(jmax[v]-jmin[v])] 798 * hdmin[v][j0] = first position in band on d for state v, and position 799 * j = jmin[v] + j0. */ 800 int **hdmax; /* [0..cm_M-1][0..(jmax[v]-jmin[v])] 801 * hdmin[v][j0] = last position in band on d for state v, and position 802 * j = jmin[v] + j0.*/ 803 int *hdmin_mem; /* actual memory for hdmin */ 804 int *hdmax_mem; /* actual memory for hdmax */ 805 int *safe_hdmin; /* [0..cm_M-1] safe_hdmin[v] = min_d (hdmin[v][j0]) (over all valid j0) */ 806 int *safe_hdmax; /* [0..cm_M-1] safe_hdmax[v] = max_d (hdmax[v][j0]) (over all valid j0) */ 807 808 /* info on size of bands */ 809 int hd_needed; /* Sum_v cp9b->jmax[v] - cp9b->jmin[v] + 1, number of hd arrays needed */ 810 int hd_alloced; /* number of hd arrays currently alloc'ed */ 811 812 double tau; /* tau used to calculate current bands */ 813 814 } CP9Bands_t; 815 816 /************************************************************************************* 817 * 14. CP9trace_t: traceback structure for CP9 HMMs. 818 *************************************************************************************/ 819 820 /* CM Plan 9 model state types 821 * used in traceback structure. 822 */ 823 #define CSTBOGUS 0 824 #define CSTM 1 825 #define CSTD 2 826 #define CSTI 3 827 #define CSTB 4 /* M_0 the B state */ 828 #define CSTE 5 /* the end state, M_(k+1) */ 829 #define CSTEL 6 /* an EL (end local) state */ 830 /* Structure: cp9trace_s 831 * 832 * Traceback structure for alignments of model to sequence. 833 * Each array in a trace_s is 0..tlen-1. 834 * Element 0 is always to M_0 (match state of node 0) 835 * Element tlen-1 is always to the E_st 836 */ 837 typedef struct cp9trace_s { 838 int tlen; /* length of traceback */ 839 char *statetype; /* state type used for alignment */ 840 int *nodeidx; /* idx of aligned node, 0..M if M or I 1..M if D */ 841 int *pos; /* position in dsq, 1..L, or 0 if none */ 842 } CP9trace_t; 843 844 /************************************************************************************* 845 * 15. CP9Map_t: map from a CM to a CP9 HMM and vice versa. 846 *************************************************************************************/ 847 848 /* Structure: CP9Map_t 849 * Incept: EPN, 10.23.06 850 * 851 * Maps a CM to a CM plan 9 HMM and vice versa. 852 * Consensus positions are indexed 1..hmm_M. 853 * 854 * The lpos and rpos arrays are somewhat redundant with 855 * CMEmitMap_t, but they're not identical. I didn't realize emitmap existed 856 * prior to implementing CP9 HMMs - if I had I would have used emitmaps, but 857 * it's difficult to go back and use emitmaps now. 858 * 859 * See emitmap.c for implementation and more documentation. 860 */ 861 typedef struct cp9map_s { 862 int *nd2lpos; /* [0..cm_nodes] left bound of consensus for subtree under this nd, 863 * -1 for non-{MATL|MATR|MATP} nodes */ 864 int *nd2rpos; /* [0..cm_nodes] right bound of consensus for subtree under this nd 865 * -1 for non-{MATL|MATR|MATP} nodes */ 866 int *pos2nd; /* [1..clen], the MATL, MATR or MATP CM node that maps to this 867 * consensus column */ 868 int **cs2hn; /* [0..cm_M][0..1], 1 or 2 HMM nodes that maps to this CM state 869 * [x][1] is -1 if state x maps to only 1 HMM node */ 870 int **cs2hs; /* [0..cm_M][0..1], 1 or 2 HMM states (0=MATCH, 1=INSERT, 2=DELETE) 871 * that maps to this CM state 872 * [x][0] corresponds to the HMM node in cs2hn[x][0], 873 * [x][1] corresponds to the HMM node in cs2hn[x][1] (or -1) */ 874 int ***hns2cs; /* [0..clen][0..2][0..1] 875 * hns2cs[x][y][0] 1st CM state that maps to HMM node x state type y (0,1,2) 876 * hns2cs[x][y][1] 2nd CM state that maps to HMM node x state type y (0,1,2) 877 * -1 if only 1 CM state maps to HMM node x state y */ 878 int hmm_M; /* consensus length, the number of HMM nodes */ 879 int cm_M; /* number of states in the CM this HMM maps to */ 880 int cm_nodes; /* number of nodes in the CM this HMM maps to */ 881 } CP9Map_t; 882 883 884 /************************************************************************************* 885 * 16. CMSubMap_t: map of a template CM to a sub CM and vice versa. 886 *************************************************************************************/ 887 888 /* Structure: CMSubMap_t 889 * Incept: EPN, 10.23.06 890 * 891 * Maps a template CM to a sub CM and vice versa. 892 * Consensus positions are indexed 1..clen. 893 * 894 * The *node_cc_left and *node_cc_right arrays are redundant with CMEmitMap_t. 895 * See emitmap.c for implementation and more documentation. 896 */ 897 typedef struct submap_s { 898 int spos; /* first consensus column this sub_cm models */ 899 int epos; /* final consensus column this sub_cm models */ 900 int sstruct; /* first consensus column this sub_cm models structure of */ 901 int estruct; /* final consensus column this sub_cm models structure of */ 902 903 int **s2o_smap; /* v = [0..sub_M-1] [0..1], orig_cm state(s) that maps to v */ 904 int **o2s_smap; /* v = [0..orig_M-1][0..1], sub_cm state(s) that maps to v */ 905 int *s2o_id; /* v = [0..sub_M-1] TRUE if sub_cm state v maps identically * 906 * to a orig_cm state (this will be s2o_smap[v][0]) */ 907 908 int sub_clen; /* consensus length orig_cm */ 909 int orig_clen; /* consensus length sub_cm */ 910 911 int sub_M; /* number of states in the sub CM */ 912 int orig_M; /* number of states in the original CM */ 913 } CMSubMap_t; 914 915 916 /************************************************************************************* 917 * 17. CMSubInfo_t: sub CM information, used for validating the sub CM construction procedure. 918 *************************************************************************************/ 919 920 /* Structure: CMSubInfo_t 921 * Incept: EPN, 10.23.06 922 * 923 * Information on a sub CM, used for checking the sub CM 924 * construction procedure works. 925 * Consensus positions are indexed 1..clen. 926 * 927 */ 928 typedef struct subinfo_s { 929 int *imp_cc; /* [0..(epos-spos+1)] ret_imp_cc[k] != 0 if it is 930 * impossible for CP9 node (consensus column) k to be 931 * calculated in the sub_cm to have distros to match the 932 * corresponding CP9 node in the original CM - due to 933 * topological differences in the architecture of the 934 * sub_cm and orig_cm. 935 */ 936 int *apredict_ct; /* For an analytical test, the number of times we 937 * predict we'll fail the test for an HMM node for each 938 * of 6 cases - 6 different reasons we predict we'll fail. 939 */ 940 int *spredict_ct; /* For as sampling test, the number of times we 941 * predict we'll fail the test for an HMM node for each 942 * of 6 cases - 6 different reasons we predict we'll fail. 943 */ 944 int *awrong_ct; /* Subset of cases in apredict_ct that were incorrectly 945 * predicted. */ 946 int *swrong_ct; /* Subset of cases in spredict_ct that were incorrectly 947 * predicted. */ 948 } CMSubInfo_t; 949 950 951 /************************************************************************************* 952 * 18. RSEARCH constants. 953 *************************************************************************************/ 954 955 /* RSEARCH defaults defined here */ 956 #define DEFAULT_RMATRIX "RIBOSUM85-60" 957 #define DEFAULT_RALPHA 10. 958 #define DEFAULT_RBETA 5. 959 #define DEFAULT_RALPHAP 0. 960 #define DEFAULT_RBETAP 15. 961 /* the RSEARCH default is below, it was changed b/c 962 * with no local begin penalty, a glocal hit is ALWAYS 963 * going to be decomposed into it's subtrees. 964 *#define DEFAULT_RBEGINSC 0. */ 965 #define DEFAULT_RBEGINSC -0.01 966 #define DEFAULT_RENDSC -15. 967 968 /* The six classes of states in RSEARCH */ 969 #define M_cl 0 970 #define IL_cl 1 971 #define DL_cl 2 972 #define IR_cl 3 973 #define DR_cl 4 974 #define DB_cl 5 975 976 /* coordinate -- macro that checks if it's reverse complement and if so 977 returns coordinate in original strand 978 a = true if revcomp, false if not 979 b = the position in current seq 980 c = length of the seq 981 */ 982 #define COORDINATE(a,b,c) ( a ? -1*b+c+1 : b) 983 #define RNAPAIR_ALPHABET "AAAACCCCGGGGUUUU" 984 #define RNAPAIR_ALPHABET2 "ACGUACGUACGUACGU" 985 /* Returns true if pos. C of seq B of msa A is a gap */ 986 #define is_rna_gap(A, B, C) (esl_abc_CIsGap(A->abc, A->aseq[B][C])) 987 /* Returns true if position C of digitized sequence B of msa A is a canonical */ 988 #define is_defined_rna_nucleotide(A, B, C) (esl_abc_CIsCanonical(A->abc, A->aseq[B][C])) 989 #define unpairedmat_size (matrix_index(3,3) + 1) 990 #define pairedmat_size (matrix_index (15,15) + 1) 991 /* Maps to index of matrix, using binary representation of 992 * nucleotides (unsorted). 993 * See lab book 7, p. 3-4 for details of mapping function (RJK) */ 994 #define matrix_index(X,Y) ((X>Y) ? X*(X+1)/2+Y: Y*(Y+1)/2+X) 995 /* Matrix type 996 * Contains array in one dimension (to be indexed later), matrix size, 997 * H, and E. 998 */ 999 typedef struct _matrix_t { 1000 double *matrix; 1001 int edge_size; /* Size of one edge, e.g. 4 for 4x4 matrix */ 1002 int full_size; /* Num of elements, e.g. 10 for 4x4 matirx */ 1003 double H; 1004 double E; 1005 } matrix_t; 1006 1007 /* Full matrix definition, includes the g background freq vector (g added by EPN). */ 1008 typedef struct _fullmat_t { 1009 const ESL_ALPHABET *abc;/* alphabet, we enforce it's eslRNA */ 1010 matrix_t *unpaired; 1011 matrix_t *paired; 1012 char *name; 1013 float *g; /* EPN: the background distro, g vector in RSEARCH paper 1014 * this now appears in the RIBOSUM matrix files */ 1015 int scores_flag; /* TRUE if matrix values are log odds scores, FALSE if 1016 * they're target probs, or unfilled */ 1017 int probs_flag; /* TRUE if matrix values are target probs, FALSE if 1018 * they're log odds scores, or unfilled */ 1019 } fullmat_t; 1020 1021 /*********************************************************************************** 1022 * 19. CM_MX: CM dynamic programming matrix; non-banded, non-truncated. 1023 ***********************************************************************************/ 1024 1025 /* Declaration of CM dynamic programming matrices for alignment. 1026 * There are eight matrices here, four DP matrices for DP calculations 1027 * and four shadow matrices for alignment tracebacks. There is one DP 1028 * matrix and one shadow matrix for each combination of standard vs 1029 * truncated and non-banded vs HMM-banded alignment. All matrices are 1030 * setup in 'vjd' (state idx, aln posn, subseq len) coordinates. HMM 1031 * banded variants of all matrices are banded in j and d dimensions 1032 * with only cells within bands allocated. HMM banded DP variants are 1033 * also used for DP search functions. 1034 * 1035 * CM_MX: non-banded standard CYK/Inside/Outside DP matrix 1036 * CM_TR_MX: non-banded truncated CYK/Inside/Outside DP matrix 1037 * CM_HB_MX: HMM banded standard CYK/Inside/Outside DP matrix 1038 * CM_TR_HB_MX: HMM banded truncated CYK/Inside/Outside DP matrix 1039 * 1040 * CM_SHADOW_MX: non-banded standard CYK/optacc shadow matrix 1041 * CM_SHADOW_TR_MX: non-banded truncated CYK/optacc shadow matrix 1042 * CM_SHADOW_HB_MX: HMM banded standard CYK/optacc shadow matrix 1043 * CM_SHADOW_TR_HB_MX: HMM banded truncated CYK/optacc shadow matrix 1044 */ 1045 typedef struct cm_mx_s { 1046 int M; /* number of states (1st dim ptrs) in current mx */ 1047 int L; /* length of sequence the matrix currently corresponds to */ 1048 1049 int64_t ncells_alloc; /* current cell allocation limit for dp */ 1050 int64_t ncells_valid; /* current number of valid cells for dp */ 1051 float size_Mb; /* current size of matrix in Megabytes */ 1052 1053 float ***dp; /* matrix, [0..v..M][0..j..L][0..j] */ 1054 float *dp_mem; /* the actual mem, points to dp[0][0][0] */ 1055 } CM_MX; 1056 1057 /*********************************************************************************** 1058 * 20. CM_TR_MX: CM dynamic programming matrix; non-banded, truncated. 1059 ***********************************************************************************/ 1060 1061 typedef struct cm_tr_mx_s { 1062 int M; /* number of states (1st dim ptrs) in current mx */ 1063 int B; /* number of B states in current mx */ 1064 int L; /* length of sequence the matrix currently corresponds to */ 1065 1066 int64_t Jncells_alloc; /* current cell allocation limit for Jdp */ 1067 int64_t Jncells_valid; /* current number of valid cells for Jdp */ 1068 int64_t Lncells_alloc; /* current cell allocation limit for Ldp */ 1069 int64_t Lncells_valid; /* current number of valid cells for Ldp */ 1070 int64_t Rncells_alloc; /* current cell allocation limit for Rdp, same as for Ldp */ 1071 int64_t Rncells_valid; /* current number of valid cells for Rdp, same as for Ldp */ 1072 int64_t Tncells_alloc; /* current cell allocation limit for Tdp */ 1073 int64_t Tncells_valid; /* current number of valid cells for Tdp */ 1074 float size_Mb; /* current size of matrix in Megabytes */ 1075 1076 float ***Jdp; /* J matrix, [0..v..M][0..j..L][0..j] */ 1077 float *Jdp_mem; /* the actual mem, points to Jdp[0][0][0] */ 1078 float ***Ldp; /* L matrix, [0..v..M][0..j..L][0..j] */ 1079 float *Ldp_mem; /* the actual mem, points to Ldp[0][0][0] */ 1080 float ***Rdp; /* R matrix, [0..v..M][0..j..L][0..j] */ 1081 float *Rdp_mem; /* the actual mem, points to Rdp[0][0][0] */ 1082 float ***Tdp; /* T matrix, [0..v..M][0..j..L][0..j] */ 1083 float *Tdp_mem; /* the actual mem, points to Tdp[0][0][0] */ 1084 } CM_TR_MX; 1085 1086 /*********************************************************************************** 1087 * 21. CM_HB_MX: CM dynamic programming matrix; HMM banded, non-truncated. 1088 ***********************************************************************************/ 1089 1090 typedef struct cm_hb_mx_s { 1091 int M; /* number of states (1st dim ptrs) in current mx */ 1092 int L; /* length of sequence the matrix currently corresponds to */ 1093 1094 int64_t ncells_alloc; /* current cell allocation limit */ 1095 int64_t ncells_valid; /* current number of valid cells */ 1096 float size_Mb; /* current size of matrix in Megabytes */ 1097 1098 int *nrowsA; /* [0..v..M] current number allocated rows for deck v */ 1099 1100 float ***dp; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1101 float *dp_mem; /* the actual mem, points to dp[0][0][0] */ 1102 1103 CP9Bands_t *cp9b; /* the CP9Bands_t object associated with this 1104 * matrix, which defines j, d, bands for each 1105 * state, only a reference, so don't free 1106 * it when mx is freed. */ 1107 } CM_HB_MX; 1108 1109 /*********************************************************************************** 1110 * 22. CM_TR_HB_MX: CM dynamic programming matrix; HMM banded, truncated. 1111 ***********************************************************************************/ 1112 1113 typedef struct cm_tr_hb_mx_s { 1114 int M; /* number of states (1st dim ptrs) in current mx */ 1115 int B; /* number of BIF_B states (1st dim ptrs) in current mx (valid, non-null Tdp states) */ 1116 int L; /* length of sequence the matrix currently corresponds to */ 1117 1118 int64_t Jncells_alloc; /* current cell allocation limit for Jdp */ 1119 int64_t Lncells_alloc; /* current cell allocation limit for Ldp */ 1120 int64_t Rncells_alloc; /* current cell allocation limit for Rdp */ 1121 int64_t Tncells_alloc; /* current cell allocation limit for Tdp */ 1122 int64_t Jncells_valid; /* current number of valid cells in Jdp */ 1123 int64_t Lncells_valid; /* current number of valid cells in Ldp */ 1124 int64_t Rncells_valid; /* current number of valid cells in Rdp */ 1125 int64_t Tncells_valid; /* current number of valid cells in Tdp */ 1126 float size_Mb; /* current size of full matrix (J,L,R&T) in Megabytes */ 1127 1128 int *JnrowsA; /* [0..v..M] current number allocated rows for deck v in J matrix */ 1129 int *LnrowsA; /* [0..v..M] current number allocated rows for deck v in L matrix */ 1130 int *RnrowsA; /* [0..v..M] current number allocated rows for deck v in R matrix */ 1131 int *TnrowsA; /* [0..v..M] current number allocated rows for deck v in T matrix */ 1132 1133 float ***Jdp; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1134 float *Jdp_mem; /* the actual mem, points to Jdp[0][0][0] */ 1135 float ***Ldp; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1136 float *Ldp_mem; /* the actual mem, points to Ldp[0][0][0] */ 1137 float ***Rdp; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1138 float *Rdp_mem; /* the actual mem, points to Rdp[0][0][0] */ 1139 float ***Tdp; /* B states only: [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1140 float *Tdp_mem; /* the actual mem, points to Tdp[0][0][0] */ 1141 1142 CP9Bands_t *cp9b; /* the CP9Bands_t object associated with this 1143 * matrix, which defines j, d, bands for each 1144 * state, only a reference, so don't free 1145 * it when mx is freed. */ 1146 } CM_TR_HB_MX; 1147 1148 1149 /*********************************************************************************** 1150 * 23. CM_SHADOW_MX: CM shadow matrix, for DP tracebacks; non-banded, non-truncated. 1151 ***********************************************************************************/ 1152 1153 typedef struct cm_shadow_mx_s { 1154 int M; /* number of states (1st dim ptrs) in current mx */ 1155 int L; /* length of sequence the matrix currently corresponds to */ 1156 int B; /* number of BIF_B states (1st dim ptrs) in current mx */ 1157 1158 int64_t y_ncells_alloc; /* current cell allocation limit in yshadow*/ 1159 int64_t k_ncells_alloc; /* current cell allocation limit in kshadow*/ 1160 int64_t y_ncells_valid; /* current number of valid cells in yshadow */ 1161 int64_t k_ncells_valid; /* current number of valid cells in kshadow */ 1162 1163 float size_Mb; /* current size of matrix in Megabytes */ 1164 1165 /* yshadow holds the shadow matrix for all non-BIF_B states, yshadow[v] == NULL if cm->sttype[v] == B_st */ 1166 char ***yshadow; /* [0..v..M][0..j..L][0..d..j] */ 1167 char *yshadow_mem; /* the actual mem, points to yshadow[0][0][0] */ 1168 1169 /* kshadow holds the shadow matrix for all BIF_B states, kshadow[v] == NULL if cm->sttype[v] != B_st */ 1170 int ***kshadow; /* [0..v..M][0..j..L][0..d..j] */ 1171 int *kshadow_mem; /* the actual mem, points to Jkshadow[0][0][0] */ 1172 } CM_SHADOW_MX; 1173 1174 1175 /*********************************************************************************** 1176 * 24. CM_TR_SHADOW_MX: CM shadow matrix, for DP tracebacks; non-banded, truncated. 1177 ***********************************************************************************/ 1178 1179 typedef struct cm_tr_shadow_mx_s { 1180 int M; /* number of states (1st dim ptrs) in current mx */ 1181 int L; /* length of sequence the matrix currently corresponds to */ 1182 int B; /* number of BIF_B states (1st dim ptrs) in current mx */ 1183 1184 int64_t Jy_ncells_alloc; /* current cell allocation limit in Jyshadow*/ 1185 int64_t Ly_ncells_alloc; /* current cell allocation limit in Lyshadow*/ 1186 int64_t Ry_ncells_alloc; /* current cell allocation limit in Ryshadow*/ 1187 1188 int64_t Jk_ncells_alloc; /* current cell allocation limit in Jkshadow*/ 1189 int64_t Lk_ncells_alloc; /* current cell allocation limit in Lkshadow/Lkmode*/ 1190 int64_t Rk_ncells_alloc; /* current cell allocation limit in Rkshadow/Rkmode*/ 1191 int64_t Tk_ncells_alloc; /* current cell allocation limit in Tkshadow*/ 1192 1193 int64_t Jy_ncells_valid; /* current number of valid cells in Jyshadow */ 1194 int64_t Ly_ncells_valid; /* current number of valid cells in Lyshadow */ 1195 int64_t Ry_ncells_valid; /* current number of valid cells in Ryshadow */ 1196 1197 int64_t Jk_ncells_valid; /* current number of valid cells in Jkshadow */ 1198 int64_t Lk_ncells_valid; /* current number of valid cells in Lkshadow/Lkmode */ 1199 int64_t Rk_ncells_valid; /* current number of valid cells in Rkshadow/Rkmode */ 1200 int64_t Tk_ncells_valid; /* current number of valid cells in Tkshadow */ 1201 1202 float size_Mb; /* current size of matrix in Megabytes */ 1203 1204 /* {J,L,R}yshadow holds the shadow matrix for all non-BIF_B states, {J,L,R}yshadow[v] == NULL if cm->sttype[v] == B_st */ 1205 char ***Jyshadow; /* [0..v..M][0..j..L][0..d..j] */ 1206 char *Jyshadow_mem; /* the actual mem, points to Jyshadow[0][0][0] */ 1207 char ***Lyshadow; /* [0..v..M][0..j..L][0..d..j] */ 1208 char *Lyshadow_mem; /* the actual mem, points to Lyshadow[0][0][0] */ 1209 char ***Ryshadow; /* [0..v..M][0..j..L][0..d..j] */ 1210 char *Ryshadow_mem; /* the actual mem, points to Ryshadow[0][0][0] */ 1211 1212 /* {J,L,R,T}kshadow holds the shadow matrix for all BIF_B states, {J,L,R,T}kshadow[v] == NULL if cm->sttype[v] != B_st */ 1213 int ***Jkshadow; /* [0..v..M][0..j..L][0..d..j] */ 1214 int *Jkshadow_mem; /* the actual mem, points to Jkshadow[0][0][0] */ 1215 int ***Lkshadow; /* [0..v..M][0..j..L][0..d..j] */ 1216 int *Lkshadow_mem; /* the actual mem, points to Lkshadow[0][0][0] */ 1217 int ***Rkshadow; /* [0..v..M][0..j..L][0..d..j] */ 1218 int *Rkshadow_mem; /* the actual mem, points to Rkshadow[0][0][0] */ 1219 int ***Tkshadow; /* [0..v..M][0..j..L][0..d..j] */ 1220 int *Tkshadow_mem; /* the actual mem, points to Tkshadow[0][0][0] */ 1221 1222 /* {L,R}kmode holds the alignment mode for all BIF_B states {L,R}kmode == NULL for non-B states */ 1223 char ***Lkmode; /* [0..v..M][0..j..L][0..d..j] */ 1224 char *Lkmode_mem; /* the actual mem, points to Lkmode[0][0][0] */ 1225 char ***Rkmode; /* [0..v..M][0..j..L][0..d..j] */ 1226 char *Rkmode_mem; /* the actual mem, points to Rkmode[0][0][0] */ 1227 } CM_TR_SHADOW_MX; 1228 1229 /*********************************************************************************** 1230 * 25. CM_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, non-truncated. 1231 ***********************************************************************************/ 1232 1233 typedef struct cm_hb_shadow_mx_s { 1234 int M; /* number of states (1st dim ptrs) in current mx */ 1235 int L; /* length of sequence the matrix currently corresponds to */ 1236 int B; /* number of B_st's in the cm this mx was created for */ 1237 1238 int64_t y_ncells_alloc; /* current cell allocation limit in yshadow*/ 1239 int64_t y_ncells_valid; /* current number of valid cells in yshadow */ 1240 int64_t k_ncells_alloc; /* current cell allocation limit in kshadow*/ 1241 int64_t k_ncells_valid; /* current number of valid cells in kshadow */ 1242 float size_Mb; /* current size of matrix in Megabytes */ 1243 1244 int *nrowsA; /* [0..v..M] current number allocated rows for deck v */ 1245 1246 /* yshadow holds the shadow matrix for all non-BIF_B states, yshadow[v] == NULL if cm->sttype[v] == B_st */ 1247 char ***yshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1248 char *yshadow_mem; /* the actual mem, points to yshadow[0][0][0] */ 1249 1250 /* kshadow holds the shadow matrix for all BIF_B states, kshadow[v] == NULL if cm->sttype[v] != B_st */ 1251 int ***kshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1252 int *kshadow_mem; /* the actual mem, points to kshadow[0][0][0] */ 1253 1254 CP9Bands_t *cp9b; /* the CP9Bands_t object associated with this 1255 * matrix, which defines j, d, bands for each 1256 * state, only a reference, so don't free 1257 * it when mx is freed. */ 1258 } CM_HB_SHADOW_MX; 1259 1260 1261 /*********************************************************************************** 1262 * 26. CM_TR_HB_SHADOW_MX: CM shadow matrix, for DP tracebacks; HMM banded, truncated. 1263 ***********************************************************************************/ 1264 1265 typedef struct cm_tr_hb_shadow_mx_s { 1266 int M; /* number of states (1st dim ptrs) in current mx */ 1267 int L; /* length of sequence the matrix currently corresponds to */ 1268 int B; /* number of BIF_B states (1st dim ptrs) in current mx */ 1269 1270 int64_t Jy_ncells_alloc; /* current cell allocation limit in Jyshadow*/ 1271 int64_t Ly_ncells_alloc; /* current cell allocation limit in Lyshadow*/ 1272 int64_t Ry_ncells_alloc; /* current cell allocation limit in Ryshadow*/ 1273 1274 int64_t Jk_ncells_alloc; /* current cell allocation limit in Jkshadow*/ 1275 int64_t Lk_ncells_alloc; /* current cell allocation limit in Lkshadow/Lkmode*/ 1276 int64_t Rk_ncells_alloc; /* current cell allocation limit in Rkshadow/Rkmode*/ 1277 int64_t Tk_ncells_alloc; /* current cell allocation limit in Tkshadow*/ 1278 1279 int64_t Jy_ncells_valid; /* current number of valid cells in Jyshadow */ 1280 int64_t Ly_ncells_valid; /* current number of valid cells in Lyshadow */ 1281 int64_t Ry_ncells_valid; /* current number of valid cells in Ryshadow */ 1282 1283 int64_t Jk_ncells_valid; /* current number of valid cells in Jkshadow */ 1284 int64_t Lk_ncells_valid; /* current number of valid cells in Lkshadow/Lkmode */ 1285 int64_t Rk_ncells_valid; /* current number of valid cells in Rkshadow/Rkmode */ 1286 int64_t Tk_ncells_valid; /* current number of valid cells in Tkshadow */ 1287 1288 float size_Mb; /* current size of matrix in Megabytes */ 1289 1290 int *JnrowsA; /* [0..v..M] current number allocated rows for deck v */ 1291 int *LnrowsA; /* [0..v..M] current number allocated rows for deck v */ 1292 int *RnrowsA; /* [0..v..M] current number allocated rows for deck v */ 1293 int *TnrowsA; /* [0..v..M] current number allocated rows for deck v */ 1294 1295 /* {J,L,R}yshadow holds the shadow matrix for all non-BIF_B states, {J,L,R}yshadow[v] == NULL if cm->sttype[v] == B_st */ 1296 char ***Jyshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1297 char *Jyshadow_mem; /* the actual mem, points to Jyshadow[0][0][0] */ 1298 char ***Lyshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1299 char *Lyshadow_mem; /* the actual mem, points to Lyshadow[0][0][0] */ 1300 char ***Ryshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1301 char *Ryshadow_mem; /* the actual mem, points to Ryshadow[0][0][0] */ 1302 1303 /* {J,L,R,T}kshadow holds the shadow matrix for all BIF_B states, {J,L,R,T}kshadow[v] == NULL if cm->sttype[v] != B_st */ 1304 int ***Jkshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1305 int *Jkshadow_mem; /* the actual mem, points to Jkshadow[0][0][0] */ 1306 int ***Lkshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1307 int *Lkshadow_mem; /* the actual mem, points to Lkshadow[0][0][0] */ 1308 int ***Rkshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1309 int *Rkshadow_mem; /* the actual mem, points to Rkshadow[0][0][0] */ 1310 int ***Tkshadow; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1311 int *Tkshadow_mem; /* the actual mem, points to Tkshadow[0][0][0] */ 1312 1313 /* {L,R}kmode holds the alignment mode for all BIF_B states {L,R}kmode == NULL for non-B states */ 1314 char ***Lkmode; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1315 char *Lkmode_mem; /* the actual mem, points to Lkmode[0][0][0] */ 1316 char ***Rkmode; /* [0..v..M][0..j..(cp9b->jmax[v]-cp9b->jmin[v])[0..d..cp9b->hdmax[v][j-jmin[v]]-cp9b->hdmin[v][j-jmin[v]]] */ 1317 char *Rkmode_mem; /* the actual mem, points to Rkmode[0][0][0] */ 1318 1319 CP9Bands_t *cp9b; /* the CP9Bands_t object associated with this 1320 * matrix, which defines j, d, bands for each 1321 * state, only a reference, so don't free 1322 * it when mx is freed. */ 1323 } CM_TR_HB_SHADOW_MX; 1324 1325 /*********************************************************************************** 1326 * 27. CM_EMIT_MX: CM emit matrix, info on PP of emitted residues; non-banded, non-truncated. 1327 ***********************************************************************************/ 1328 1329 /* CM_EMIT_MX: Two 2-dimensional matrices <l_pp> and <r_pp> 1330 * where: 1331 * 1332 * l_pp[v][i]: log of the posterior probability that state v emitted 1333 * residue i leftwise either at (if a match state) or 1334 * *after* (if an insert state) the left consensus 1335 * position modeled by state v's node. 1336 * 1337 * r_pp[v][i]: log of the posterior probability that state v emitted 1338 * residue i rightwise either at (if a match state) or 1339 * *before* (if an insert state) the right consensus 1340 * position modeled by state v's node. 1341 * 1342 * l_pp[v] is NULL for states that do not emit leftwise (B,S,D,E,IR,MR) 1343 * r_pp[v] is NULL for states that do not emit rightwise (B,S,D,E,IL,ML) 1344 * 1345 * Importantly the definition above is not: "l_pp[v][i] is the posterior 1346 * probability that residue i was emitted from state v", although that 1347 * is true for MATL_ML and all IL states. It is not true however for 1348 * MATP_MP states and MATP_MP states because we want to combine the 1349 * posterior probability that either the MATP_MP or the MATP_ML states 1350 * from the same node emitted each residue i, it is the sum that is stored 1351 * in l_pp[v][i]. The same is true for the analogous case in r_pp with 1352 * MATP_MP and MATP_MR states. 1353 */ 1354 typedef struct cm_emit_mx_s { 1355 int M; /* number of states (1st dim ptrs) in current mx */ 1356 int L; /* length of sequence the matrix currently corresponds to */ 1357 int64_t l_ncells_alloc; /* current cell allocation limit for l_pp */ 1358 int64_t l_ncells_valid; /* current number of valid cells for l_pp */ 1359 int64_t r_ncells_alloc; /* current cell allocation limit for r_pp */ 1360 int64_t r_ncells_valid; /* current number of valid cells for r_pp */ 1361 float size_Mb; /* current size of matrix in Megabytes */ 1362 1363 float **l_pp; /* matrix: [0..v..M][0..1..i..L], l_pp[v][0] is 1364 * always IMPOSSIBLE l_pp[v] == NULL if v is 1365 * not a left emitter. 1366 */ 1367 float **r_pp; /* matrix: [0..v..M][0..1..i..L], r_pp[v][0] is 1368 * always IMPOSSIBLE r_pp[v] == NULL if v is 1369 * not a right emitter. 1370 */ 1371 float *l_pp_mem; /* the actual mem for l_pp, points to 1372 * l_pp[v][0], where v is min v for which 1373 * l_pp != NULL */ 1374 float *r_pp_mem; /* the actual mem for r_pp, points to 1375 * r_pp[v][0], where v is min v for which 1376 * r_pp != NULL */ 1377 float *sum; /* [0..1..i..L] log of the summed posterior 1378 * probability that residue i was emitted 1379 * either leftwise or rightwise by any state. 1380 * Used for normalizing l_pp and r_pp. 1381 */ 1382 } CM_EMIT_MX; 1383 1384 1385 /*********************************************************************************** 1386 * 28. CM_TR_EMIT_MX: CM emit matrix, info on PP of emitted residues; non-banded, truncated. 1387 ***********************************************************************************/ 1388 1389 /* CM_TR_EMIT_MX: Same as CM_EMIT_MX except extended for truncated 1390 * alignment. Two l_pp and r_pp matrices exist: 1391 * 1392 * Jl_pp, Ll_pp, and Jr_pp, Rr_pp. 1393 * 1394 * Each as defined above for CM_EMIT_MX with the caveat that 1395 * residue i for a [v][i] cell in a 'J', 'L', or 'R' matrix 1396 * indicates that i was emitted in Joint (J), Left (L), or 1397 * Right (R) marginal alignment mode. 1398 * 1399 * Note that we don't need a Lr_pp or Rl_pp matrix because 1400 * in Left marginal mode we can't emit rightwise, and 1401 * in Right marginal mode we can't emit leftwise. Also, 1402 * no Terminal mode matrices are necessary because only 1403 * B states can be in Terminal mode, and they don't emit. 1404 * 1405 * When we're filling a CM_TR_EMIT_MX matrix we'll know the optimal 1406 * alignment mode <mode>, which dictates which of the *pp matrices 1407 * need be filled: 1408 * 1409 * <mode> == TRMODE_J, fill Jl_pp, Jr_pp 1410 * <mode> == TRMODE_L, fill Jl_pp, Jr_pp and Ll_pp 1411 * <mode> == TRMODE_R, fill Jl_pp, Jr_pp and Rr_pp 1412 * <mode> == TRMODE_T, fill Jl_pp, Jr_pp, Ll_pp, and Rr_pp. 1413 * 1414 * However, we don't store <mode> in the CM_TR_EMIT_MX because, as 1415 * it's implemented, it will not affect how the matrix is allocated. 1416 * We always allocate for all four *pp matrices because space is 1417 * not a big concern because we're using 2D matrices, and this 1418 * way prevents future reallocations when the marginal mode 1419 * switches in subsequent sequence alignment. 1420 */ 1421 typedef struct cm_tr_emit_mx_s { 1422 int M; /* number of states (1st dim ptrs) in current mx */ 1423 int L; /* length of sequence the matrix currently corresponds to */ 1424 int64_t l_ncells_alloc; /* current cell allocation limit for Jl_pp, Ll_pp */ 1425 int64_t l_ncells_valid; /* current number of valid cells for Jl_pp, Ll_pp */ 1426 int64_t r_ncells_alloc; /* current cell allocation limit for Jr_pp, Rr_pp */ 1427 int64_t r_ncells_valid; /* current number of valid cells for Jr_pp, Rr_pp */ 1428 float size_Mb; /* current size of matrix in Megabytes */ 1429 1430 /* for all *_pp matrices, *_pp[v][0] is always IMPOSSIBLE, 1431 * *l_pp[v] == NULL for non-left emitters, 1432 * *r_pp[v] == NULL for non-right emitters. 1433 */ 1434 float **Jl_pp; /* matrix: [0..v..M][0..1..i..L], Joint mode */ 1435 float **Ll_pp; /* matrix: [0..v..M][0..1..i..L], Left mode */ 1436 float **Jr_pp; /* matrix: [0..v..M][0..1..i..L], Joint mode */ 1437 float **Rr_pp; /* matrix: [0..v..M][0..1..i..L], Right mode */ 1438 float *Jl_pp_mem; /* the actual mem for Jl_pp */ 1439 float *Ll_pp_mem; /* the actual mem for Ll_pp */ 1440 float *Jr_pp_mem; /* the actual mem for Jr_pp */ 1441 float *Rr_pp_mem; /* the actual mem for Rr_pp */ 1442 float *sum; /* [0..1..i..L] log of the summed posterior 1443 * probability that residue i was emitted 1444 * either leftwise or rightwise by any state. 1445 * Used for normalizing *l_pp and *r_pp. 1446 */ 1447 } CM_TR_EMIT_MX; 1448 1449 1450 /*********************************************************************************** 1451 * 29. CM_HB_EMIT_MX: CM emit matrix, info on PP of emitted residues; HMM banded, non-truncated. 1452 ***********************************************************************************/ 1453 1454 /* CM_HB_EMIT_MX: HMM-banded version of CM_EMIT_MX (see the 1455 * description of that structure above). The only difference is that 1456 * now bands are enforced by only allocating l_pp and r_pp for 1457 * residues within the bands. A pointer to the CP9Bands_t object is in 1458 * <cp9b>. The bandwidth of the l_pp rows are defined by cp9b->imin 1459 * and cp9b->imax and for r_pp rows by cp9b->jmin and cp9b->jmax. 1460 */ 1461 typedef struct cm_hb_emit_mx_s { 1462 int M; /* number of states (1st dim ptrs) in current mx */ 1463 int L; /* length of sequence the matrix currently corresponds to */ 1464 int64_t l_ncells_alloc; /* current cell allocation limit for dp */ 1465 int64_t l_ncells_valid; /* current number of valid cells for dp */ 1466 int64_t r_ncells_alloc; /* current cell allocation limit for dp */ 1467 int64_t r_ncells_valid; /* current number of valid cells for dp */ 1468 float size_Mb; /* current size of matrix in Megabytes */ 1469 1470 float **l_pp; /* matrix: [0..v..M][0..i..(lmax[v]-lmin[v])], 1471 * l_pp[v][i] corresponds to residue i+lmin[v]. 1472 * l_pp[v] == NULL if v is not a left emitter. 1473 */ 1474 float **r_pp; /* matrix: [0..v..M][0..i..(rmax[v]-rmin[v])], 1475 * r_pp[v][i] corresponds to residue i+rmin[v]. 1476 * r_pp[v] == NULL if v is not a right emitter. 1477 */ 1478 float *l_pp_mem; /* the actual mem for l_pp, points to 1479 * l_pp[v][0], where v is min v for which 1480 * l_pp != NULL */ 1481 float *r_pp_mem; /* the actual mem for r_pp, points to 1482 * r_pp[v][0], where v is min v for which 1483 * r_pp != NULL */ 1484 float *sum; /* [0..1..i..L] log of the summed posterior 1485 * probability that residue i was emitted 1486 * either leftwise or rightwise by any state. 1487 * Used for normalizing l_pp and r_pp. 1488 */ 1489 CP9Bands_t *cp9b; /* the CP9Bands_t object associated with 1490 * this matrix. We use the imin and imax 1491 * arrays as bands on l_pp and jmin and jmax 1492 * arrays as bands on r_pp. Only a 1493 * reference, so don't free it when mx is 1494 * freed. */ 1495 } CM_HB_EMIT_MX; 1496 1497 1498 /*********************************************************************************** 1499 * 30. CM_TR_HB_EMIT_MX: CM emit matrix, info on PP of emitted residues; HMM banded, truncated. 1500 ***********************************************************************************/ 1501 1502 /* CM_TR_HB_EMIT_MX: HMM-banded version of CM_TR_EMIT_MX (see the 1503 * description of that structure above). The only difference is that 1504 * now bands are enforced by only allocating Jl_pp, Ll_pp, Jr_pp, and 1505 * Rr_pp for residues within the bands. A pointer to the CP9Bands_t 1506 * object is in <cp9b>. The bandwidth of the {J,L}l_pp rows are 1507 * defined by cp9b->imin and cp9b->imax and for {J,R}r_pp rows by 1508 * cp9b->jmin and cp9b->jmax. 1509 */ 1510 typedef struct cm_tr_hb_emit_mx_s { 1511 int M; /* number of states (1st dim ptrs) in current mx */ 1512 int L; /* length of sequence the matrix currently corresponds to */ 1513 int64_t l_ncells_alloc; /* current cell allocation limit for Jl_pp, Ll_pp */ 1514 int64_t l_ncells_valid; /* current number of valid cells for Jl_pp, Ll_pp */ 1515 int64_t r_ncells_alloc; /* current cell allocation limit for Jr_pp, Rr_pp */ 1516 int64_t r_ncells_valid; /* current number of valid cells for Jr_pp, Rr_pp */ 1517 float size_Mb; /* current size of matrix in Megabytes */ 1518 1519 /* for all *_pp matrices, *_pp[v][0] is always IMPOSSIBLE, 1520 * *l_pp[v] == NULL for non-left emitters, 1521 * *r_pp[v] == NULL for non-right emitters. 1522 */ 1523 float **Jl_pp; /* matrix: [0..v..M][0..1..i..L], Joint mode */ 1524 float **Ll_pp; /* matrix: [0..v..M][0..1..i..L], Left mode */ 1525 float **Jr_pp; /* matrix: [0..v..M][0..1..i..L], Joint mode */ 1526 float **Rr_pp; /* matrix: [0..v..M][0..1..i..L], Right mode */ 1527 float *Jl_pp_mem; /* the actual mem for Jl_pp */ 1528 float *Ll_pp_mem; /* the actual mem for Ll_pp */ 1529 float *Jr_pp_mem; /* the actual mem for Jr_pp */ 1530 float *Rr_pp_mem; /* the actual mem for Rr_pp */ 1531 float *sum; /* [0..1..i..L] log of the summed posterior 1532 * probability that residue i was emitted 1533 * either leftwise or rightwise by any state. 1534 * Used for normalizing *l_pp and *r_pp. 1535 */ 1536 CP9Bands_t *cp9b; /* the CP9Bands_t object associated with 1537 * this matrix. We use the imin and imax 1538 * arrays as bands on l_pp and jmin and jmax 1539 * arrays as bands on r_pp. Only a 1540 * reference, so don't free it when mx is 1541 * freed. */ 1542 } CM_TR_HB_EMIT_MX; 1543 1544 1545 /*********************************************************************************** 1546 * 31. CM_QDBINFO: model specific QDB information, including 2 sets of bands. 1547 ***********************************************************************************/ 1548 1549 enum cm_qdbinfo_setby_e { CM_QDBINFO_SETBY_INIT = 0, CM_QDBINFO_SETBY_CMFILE = 1, CM_QDBINFO_SETBY_BANDCALC = 2, CM_QDBINFO_SETBY_SUBINIT }; 1550 enum cm_w_setby_e { CM_W_SETBY_INIT = 0, CM_W_SETBY_CMFILE = 1, CM_W_SETBY_BANDCALC = 2, CM_W_SETBY_CMDLINE = 3, CM_W_SETBY_SUBCOPY }; 1551 1552 /* Structure CM_QDBINFO: Per-model information on QDBs including 1553 * two sets of dmin/dmax values and the beta values used to 1554 * calculate them. 1555 */ 1556 typedef struct cm_qdbinfo_s { 1557 int M; /* number of states in CM these QDBs are for, size of dmin/dmax arrays */ 1558 double beta1; /* tail loss probability for calculating QDBs dmin1/dmax1 */ 1559 int *dmin1; /* [0..cm_M-1] minimum subsequence length d allowed for state v */ 1560 int *dmax1; /* [0..cm_M-1] maximum subsequence length d allowed for state v */ 1561 double beta2; /* tail loss probability for calculating QDBs dmin2/dmax2 */ 1562 int *dmin2; /* [0..cm_M-1] minimum subsequence length d allowed for state v */ 1563 int *dmax2; /* [0..cm_M-1] maximum subsequence length d allowed for state v */ 1564 enum cm_qdbinfo_setby_e setby; /* how current dmin/dmax values were set: 1565 * CM_QDBINFO_SETBY_INIT | CM_QDBINFO_SETBY_CMFILE | CM_QDBINFO_SETBY_BANDCALC */ 1566 1567 } CM_QDBINFO; 1568 1569 /*********************************************************************************** 1570 * 32. CM_SCAN_MX: matrices used for scanning CM DP algorithms; non-truncated. 1571 ***********************************************************************************/ 1572 1573 /* Structure CM_SCAN_MX: 1574 * 1575 * Information used by all CYK/Inside scanning 1576 * functions, compiled together into one data structure for 1577 * convenience. The matrix is allocated to allow either non-banded or 1578 * query-dependent banded (QDB) scans. 1579 * 1580 * QDB information, including two sets of dmin/dmax values, W, and 1581 * the three beta values used to calculate those numbers are in 1582 * <qdbinfo> (see that structure definition for more information). 1583 * 1584 * The CM_SCAN_MX contains three sets of dn and dx values for each 1585 * state and each possible endpoint in the sequence. The first set 1586 * dnAAA[0]/dxAAA[0] does not use bands. The second set 1587 * dnAAA[1]/dxAAA[1] uses the tighter set of bands in <qdbinfo> (which 1588 * is qdbinfo->dmin1/dmax1) and the third set dnAAA[2]/dxAAA[2] uses 1589 * the looser set of bands in <qdbinfo> (which is 1590 * qdbinfo->dmin2/dmax2). Each one of these sets is itself a 1591 * two-dimensional array indexed [0..j..W] (first dim) and [0..v..M-1] 1592 * (second dim), indicating the minimum and maximum d value to 1593 * be considered by a DP scanner for the sequence position j and 1594 * CM state v (if the position is > W, then the value for W is used). 1595 * 1596 * This set of three bands is necessary so we can call any DP scanner 1597 * function and specify the use of any of these three sets of bands, 1598 * and all the function has to do is point to the appropriate 1599 * dnAAA/dxAAA two-dimensional array. 1600 * 1601 * Additionally, each matrix can have valid float matrices, int 1602 * matrices or both. The status of each is stored in the <floats_valid> 1603 * and <ints_valid> parameters. 1604 */ 1605 1606 /* indexes for first-dimension of dnAAA/dxAAA in a CM_SCAN_MX or CM_TR_SCAN_MX */ 1607 #define SMX_NOQDB 0 1608 #define SMX_QDB1_TIGHT 1 1609 #define SMX_QDB2_LOOSE 2 1610 #define NSMX_QDB_IDX 3 1611 1612 typedef struct cm_scan_mx_s { 1613 /* general info about the model/search */ 1614 CM_QDBINFO *qdbinfo; /* a pointer to the qdbinfo related to the matrix */ 1615 int M; /* copy of cm->M from cm this CM_SCAN_MX is associated with */ 1616 int W; /* copy of cm->W from cm this CM_SCAN_MX is associated with */ 1617 int ***dnAAA; /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] min d value allowed for posn j, state v using QDB set n */ 1618 int ***dxAAA; /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] max d value allowed for posn j, state v using QDB set n */ 1619 int *bestr; /* auxil info: best root state v at alpha[0][cur][d] (0->v local begin used if v != 0)*/ 1620 float *bestsc; /* auxil info: best score for parsetree at alpha[0][cur][d] in mode bestmode[d] */ 1621 int floats_valid; /* TRUE if float alpha matrices are valid, FALSE if not */ 1622 int ints_valid; /* TRUE if int alpha matrices are valid, FALSE if not */ 1623 float size_Mb; /* size of matrix in Megabytes */ 1624 1625 /* falpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for float implementations of CYK/Inside */ 1626 float ***falpha; /* non-BEGL_S states for float versions of CYK/Inside */ 1627 float ***falpha_begl; /* BEGL_S states for float versions of CYK/Inside */ 1628 float *falpha_mem; /* ptr to the actual memory for falpha */ 1629 float *falpha_begl_mem; /* ptr to the actual memory for falpha_begl */ 1630 1631 /* ialpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for integer implementations of CYK/Inside */ 1632 int ***ialpha; /* non-BEGL_S states for int versions of CYK/Inside */ 1633 int ***ialpha_begl; /* BEGL_S states for int versions of CYK/Inside */ 1634 int *ialpha_mem; /* ptr to the actual memory for ialpha */ 1635 int *ialpha_begl_mem; /* ptr to the actual memory for ialpha_begl */ 1636 1637 int64_t ncells_alpha; /* number of alloc'ed, valid cells for falpha and ialpha matrices, alloc'ed as contiguous block */ 1638 int64_t ncells_alpha_begl; /* number of alloc'ed, valid cells for falpha_begl and ialpha_begl matrices, alloc'ed as contiguous block */ 1639 } CM_SCAN_MX; 1640 1641 1642 /*********************************************************************************** 1643 * 33. CM_TR_SCAN_MX: matrices used for scanning CM DP algorithms; truncated. 1644 ***********************************************************************************/ 1645 1646 /* Structure CM_TR_SCAN_MX: 1647 * 1648 * Similar to CM_SCAN_MX except that additional matrices are allocated 1649 * for L and R marginal type (truncated) alignments. Also, dmin values cannot be 1650 * enforced for any state because in truncated alignments we assume a 1651 * truncation is possible anywhere so enforcing minimum subsequence lengths 1652 * is illogical. Enforcing maximum lengths (dmax) still makes sense 1653 * though and these are enforced. See the explanation of CM_SCAN_MX 1654 * above for more information. 1655 */ 1656 typedef struct cm_tr_scan_mx_s { 1657 /* general info about the model/search */ 1658 CM_QDBINFO *qdbinfo; /* a pointer to the qdbinfo related to the matrix */ 1659 int M; /* copy of cm->M from cm this CM_TR_SCAN_MX is associated with */ 1660 int W; /* copy of cm->W from cm this CM_TR_SCAN_MX is associated with */ 1661 int ***dnAAA; /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] min d value allowed for posn j, state v using QDB set n */ 1662 int ***dxAAA; /* [0..n..NSMX_QDB_IDX-1][1..j..W][0..v..M-1] max d value allowed for posn j, state v using QDB set n */ 1663 int *bestr; /* auxil info: best root state v at alpha[0][cur][d] (0->v truncated begin used if v != 0)*/ 1664 float *bestsc; /* auxil info: best score for parsetree at alpha[0][cur][d] in mode bestmode[d] */ 1665 char *bestmode; /* auxil info: best mode for parsetree at alpha[0][cur][d], gives score in bestsc[d] */ 1666 int floats_valid; /* TRUE if float alpha matrices are valid, FALSE if not */ 1667 int ints_valid; /* TRUE if int alpha matrices are valid, FALSE if not */ 1668 float size_Mb; /* size of matrix in Megabytes */ 1669 1670 /* f{J,L,R,T}alpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for float implementations of CYK/Inside */ 1671 float ***fJalpha; /* non-BEGL_S states for float versions of CYK/Inside */ 1672 float ***fJalpha_begl; /* BEGL_S states for float versions of CYK/Inside */ 1673 float *fJalpha_mem; /* ptr to the actual memory for fJalpha */ 1674 float *fJalpha_begl_mem; /* ptr to the actual memory for fJalpha_begl */ 1675 1676 float ***fLalpha; /* non-BEGL_S states for float versions of CYK/Inside */ 1677 float ***fLalpha_begl; /* BEGL_S states for float versions of CYK/Inside */ 1678 float *fLalpha_mem; /* ptr to the actual memory for fLalpha */ 1679 float *fLalpha_begl_mem; /* ptr to the actual memory for fLalpha_begl */ 1680 1681 float ***fRalpha; /* non-BEGL_S states for float versions of CYK/Inside */ 1682 float ***fRalpha_begl; /* BEGL_S states for float versions of CYK/Inside */ 1683 float *fRalpha_mem; /* ptr to the actual memory for fRalpha */ 1684 float *fRalpha_begl_mem; /* ptr to the actual memory for fRalpha_begl */ 1685 1686 float ***fTalpha; /* BIF states for float versions of CYK/Inside */ 1687 float *fTalpha_mem; /* ptr to the actual memory for fTalpha */ 1688 1689 /* i{J,L,R,T}alpha dp matrices [0..j..1][0..v..cm->M-1][0..d..W] for integer implementations of CYK/Inside */ 1690 int ***iJalpha; /* non-BEGL_S states for int versions of CYK/Inside */ 1691 int ***iJalpha_begl; /* BEGL_S states for int versions of CYK/Inside */ 1692 int *iJalpha_mem; /* ptr to the actual memory for iJalpha */ 1693 int *iJalpha_begl_mem; /* ptr to the actual memory for iJalpha_begl */ 1694 1695 int ***iLalpha; /* non-BEGL_S states for int versions of CYK/Inside */ 1696 int ***iLalpha_begl; /* BEGL_S states for int versions of CYK/Inside */ 1697 int *iLalpha_mem; /* ptr to the actual memory for iLalpha */ 1698 int *iLalpha_begl_mem; /* ptr to the actual memory for iLalpha_begl */ 1699 1700 int ***iRalpha; /* non-BEGL_S states for int versions of CYK/Inside */ 1701 int ***iRalpha_begl; /* BEGL_S states for int versions of CYK/Inside */ 1702 int *iRalpha_mem; /* ptr to the actual memory for iRalpha */ 1703 int *iRalpha_begl_mem; /* ptr to the actual memory for iRalpha_begl */ 1704 1705 int ***iTalpha; /* BIF states for int versions of CYK/Inside */ 1706 int *iTalpha_mem; /* ptr to the actual memory for iTalpha */ 1707 1708 int64_t ncells_alpha; /* number of alloc'ed, valid cells for f{J,L,R}alpha and i{J,L,R}alpha matrices, alloc'ed as contiguous block */ 1709 int64_t ncells_alpha_begl; /* number of alloc'ed, valid cells for f{J,L,R}alpha_begl and i{J,L,R}alpha_begl matrices, alloc'ed as contiguous block */ 1710 int64_t ncells_Talpha; /* number of alloc'ed, valid cells for fTalpha and iTalpha matrices, alloc'ed as contiguous block */ 1711 } CM_TR_SCAN_MX; 1712 1713 1714 1715 /************************************************************************************* 1716 * 34. CM_TR_PENALTIES: pass, state and locality-specific truncated alignment penalties. 1717 *************************************************************************************/ 1718 1719 /* Truncation penalty parameters, we either allow 5' truncation, 3' truncation or both. 1720 * These allow truncated DP aligners and scanners (cm_dpalign_trunc.c and cm_dpsearch_trunc.c) 1721 * to know which truncation penalty to apply. 1722 */ 1723 #define TRPENALTY_5P_AND_3P 0 1724 #define TRPENALTY_5P_ONLY 1 1725 #define TRPENALTY_3P_ONLY 2 1726 #define NTRPENALTY 3 1727 #define TRPENALTY_NONE -1 1728 1729 typedef struct cm_tr_penalties_s { 1730 int M; /* number of states in the CM this object is associated with */ 1731 int ignored_inserts; /* TRUE if inserts were ignored, FALSE if not (normally this is FALSE) */ 1732 float **g_ptyAA; /* g_ptyAA[TRPENALTY_5P_AND_3][0..v..M-1] sc penalty for global aln if we allowed 5' and 3' truncation 1733 * g_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1] sc penalty for global aln if we allowed 5' truncation only 1734 * g_ptyAA[TRPENALTY_3P_ONLY][0..v..M-1] sc penalty for global aln if we allowed 3' truncation only */ 1735 float **l_ptyAA; /* l_ptyAA[TRPENALTY_5P_AND_3][0..v..M-1] sc penalty for local aln if we allowed 5' and 3' truncation 1736 * l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1] sc penalty for local aln if we allowed 5' truncation only 1737 * l_ptyAA[TRPENALTY_5P_ONLY][0..v..M-1] sc penalty for local aln if we allowed 3' truncation only */ 1738 int **ig_ptyAA; /* same as g_ptyAA but scaled int scores */ 1739 int **il_ptyAA; /* same as l_ptyAA but scaled int scores */ 1740 } CM_TR_PENALTIES; 1741 1742 1743 /***************************************************************** 1744 * 35. CM_t: a covariance model 1745 *****************************************************************/ 1746 1747 /* Structure: CM_t 1748 * Incept: SRE, 9 Mar 2000 [San Carlos CA] 1749 * 1750 * A covariance model. M states, arranged logically as a directed graph 1751 * (on a binary tree backbone); arranged physically as a set of arrays 0..M-1. 1752 * 1753 * State 0 is always the root state. State M-1 is always an end state. 1754 * 1755 * EPN 12.19.06: added arrays to hold integer log-odds scores for faster 1756 * inside/outside 1757 * 1758 */ 1759 typedef struct cm_s { 1760 /* General information about the model: */ 1761 char *name; /* name of the model (mandatory) */ /* String, \0-terminated */ 1762 char *acc; /* accession number of model (Rfam) (CMH_ACC) */ /* String, \0-terminated */ 1763 char *desc; /* brief (1-line) description of model (CMH_DESC)) */ /* String, \0-terminated */ 1764 char *rf; /* reference line from alignment 1..clen (CMH_RF) */ /* String; 0=' ', clen+1='\0' */ 1765 char *consensus; /* consensus residue line 1..clen (CMH_CONS) */ /* String; 0=' ', clen+1='\0' */ 1766 uint32_t checksum; /* checksum of training sequences (CMH_CHKSUM) */ 1767 int *map; /* map of alignment cols onto model 1..clen (CMH_MAP) */ /* Array; map[0]=0 */ 1768 1769 /* new as of v1.0 */ 1770 char *comlog; /* command line(s) that built, modified model (optional: NULL) */ /* String, \0-terminated */ 1771 char *ctime; /* creation date (optional: NULL) */ 1772 int nseq; /* number of training sequences (mandatory) */ 1773 float eff_nseq; /* effective number of seqs (<= nseq) (mandatory) */ 1774 float ga; /* per-seq gathering thresholds (bits) (CMH_GA) */ 1775 float tc; /* per-seq trusted cutoff (bits) (CMH_TC) */ 1776 float nc; /* per-seq noise cutoff (bits) (CMH_NC) */ 1777 1778 /* Information about the null model: */ 1779 float *null; /* residue probabilities [0..3] */ 1780 1781 /* Information about the state type: */ 1782 int M; /* number of states in the model */ 1783 int clen; /* consensus length (2*MATP+MATL+MATR) */ 1784 char *sttype; /* type of state this is; e.g. MP_st */ 1785 int *ndidx; /* index of node this state belongs to */ 1786 char *stid; /* unique state identifier; e.g. MATP_MP */ 1787 1788 /* Information about its connectivity in CM: */ 1789 int *cfirst; /* index of left child state */ 1790 int *cnum; /* overloaded: for non-BIF: # connections; */ 1791 /* for BIF: right child S_st */ 1792 int *plast; /* index to first parent state */ 1793 int *pnum; /* number of parent connections */ 1794 1795 /* Information mapping nodes->states */ 1796 int nodes; /* number of nodes in the model */ 1797 int *nodemap; /* nodemap[5] = idx first state, node 5 */ 1798 char *ndtype; /* type of node, e.g. MATP_nd */ 1799 1800 /* Parameters of the probabilistic model: */ 1801 float **t; /* Transition prob's [0..M-1][0..MAXCONNECT-1] */ 1802 float **e; /* Emission probabilities. [0..M-1][0..15] */ 1803 float *begin; /* Local alignment start probabilities [0..M-1] */ 1804 float *end; /* Local alignment ending probabilities [0..M-1] */ 1805 1806 /* Parameters of the log odds model: */ 1807 float **tsc; /* Transition score vector, log odds */ 1808 float **esc; /* Emission score vector, log odds */ 1809 float **oesc; /* Optimized emission score log odds float vec */ 1810 float **lmesc; /* Left marginal emission scores (log odds) */ 1811 float **rmesc; /* Right marginal emission scores (log odds) */ 1812 float *beginsc; /* Score for ROOT_S -> state v (local alignment) */ 1813 float *endsc; /* Score for state_v -> EL (local alignment) */ 1814 1815 /* Scaled int parameters of the log odds model: */ 1816 int **itsc; /* Transition score vector, scaled log odds int */ 1817 int **iesc; /* Emission score vector, scaled log odds int */ 1818 int **ioesc; /* Optimized emission score log odds int vector */ 1819 int **ilmesc; /* Left marginal emission scores (log odds int) */ 1820 int **irmesc; /* Right marginal emission scores (log odds int) */ 1821 int *ibeginsc; /* Score for ROOT_S -> state v (local alignment) */ 1822 int *iendsc; /* Score for state_v -> EL (local alignment) */ 1823 1824 float pbegin; /* local begin prob to spread across internal nodes for local mode */ 1825 float pend; /* local end prob to spread across internal nodes for local mode */ 1826 1827 float null2_omega; /* prior probability of the null2 model (if it is used) */ 1828 float null3_omega; /* prior probability of the null3 model (if it is used) */ 1829 1830 int flags; /* status flags */ 1831 1832 /* W and query dependent bands (QDBs) on subsequence lengths at each state */ 1833 int W; /* max d: max size of a hit (EPN 08.18.05) */ 1834 double beta_W; /* tail loss probability for QDB calculation used to set W */ 1835 enum cm_w_setby_e W_setby; /* how current W value was set: 1836 * CM_W_SETBY_INIT | CM_W_SETBY_CMFILE | CM_W_SETBY_BANDCALC | CM_W_SETBY_CMDLINE | CM_W_SETBY_SUBCOPY */ 1837 /* regarding W: if W_setby is CM_W_SETBY_INIT or CM_W_SETBY_CMDLINE, then beta_W does not correspond 1838 * to a band calculation beta value used to compute W (set as dmax[0]). Otherwise, it does. */ 1839 1840 CM_QDBINFO *qdbinfo; /* two sets of QDBs and the beta values used to calc them */ 1841 1842 double tau; /* tail loss probability for HMM target dependent banding */ 1843 double maxtau; /* maximum allowed tau value for HMM band tightening */ 1844 1845 int config_opts;/* model configuration options */ 1846 int align_opts; /* alignment options */ 1847 int search_opts;/* search options */ 1848 float *root_trans; /* transition probs from state 0, saved IFF zeroed in ConfigLocal() */ 1849 1850 float el_selfsc; /* score of a self transition in the EL state 1851 * the EL state emits only on self transition (EPN 11.15.05)*/ 1852 int iel_selfsc; /* scaled int version of el_selfsc */ 1853 1854 /* CP9 HMMs and associated data structures. These are built and 1855 * configured in cm_modelconfig.c:ConfigCM(). <cp9> is always built 1856 * and is configured (local begins/ ends and ELs) to match the CM. 1857 * <Lcp9>, <Rcp9> and <Tcp9> are for truncated alignment, one each 1858 * for L, R and T modes. These are built only if the 1859 * CM_CONFIG_TRUNC flag is raised. They are configured to match 1860 * their alignment mode. For example, Lcp9 has a global-like begin 1861 * (prob of ~1.0 into match state 1) but local-like ends 1862 * (equiprobable end points) to match the possibility of a 3' 1863 * truncation (L mode alignment). If the CM_CONFIG_TRUNC_NOFORCE 1864 * flag is raised then Lcp9, Rcp9, Tcp9 are all configured 1865 * identically with equiprobable begin/ends (this is wasteful, 1866 * we only need one in this case, but its easier to deal with 1867 * this way and its non-default). 1868 */ 1869 CP9_t *cp9; /* a CM Plan 9 HMM, built from and configured to match CM */ 1870 CP9_t *Lcp9; /* a CM Plan 9 HMM for L mode truncated alignment (global begins, equiprobable local ends) */ 1871 CP9_t *Rcp9; /* a CM Plan 9 HMM for R mode truncated alignment (equiprobable local begins, global ends) */ 1872 CP9_t *Tcp9; /* a CM Plan 9 HMM for T mode truncated alignment (equiprobable local begin/ends) */ 1873 CP9Map_t *cp9map; /* the map from the Plan 9 HMM to the CM and vice versa */ 1874 CP9Bands_t *cp9b; /* the CP9 bands */ 1875 1876 /* DP matrices and some auxiliary info for DP algorithms */ 1877 /* for standard HMM banded CM alignment/search */ 1878 CM_HB_MX *hb_mx; /* growable HMM banded float matrix */ 1879 CM_HB_MX *hb_omx; /* another, growable HMM banded float matrix for Outside/Posterior calcs */ 1880 CM_HB_EMIT_MX *hb_emx; /* growable HMM banded emit matrix for residue posterior probability calcs */ 1881 CM_HB_SHADOW_MX *hb_shmx; /* growable HMM banded shadow matrix, for alignment tracebacks */ 1882 /* for truncated HMM banded CM alignment/search */ 1883 CM_TR_HB_MX *trhb_mx; /* growable truncated HMM banded float matrix */ 1884 CM_TR_HB_MX *trhb_omx; /* another, growable truncated HMM banded float matrix for Outside/Posterior calcs */ 1885 CM_TR_HB_EMIT_MX *trhb_emx; /* growable truncated HMM banded emit matrix for residue posterior probability calcs */ 1886 CM_TR_HB_SHADOW_MX *trhb_shmx; /* growable truncated HMM banded shadow matrix, for alignment tracebacks */ 1887 1888 /* for standard non-banded CM alignment/search (these will usually stay unallocated, as NULL) */ 1889 CM_MX *nb_mx; /* growable non-banded float matrix */ 1890 CM_MX *nb_omx; /* another, growable non-banded float matrix for Outside/Posterior calcs */ 1891 CM_EMIT_MX *nb_emx; /* growable non-banded emit matrix for residue posterior probability calcs */ 1892 CM_SHADOW_MX *nb_shmx; /* growable non-banded shadow matrix, for alignment tracebacks */ 1893 /* for truncated HMM banded CM alignment/search (these will usually stay unallocated, as NULL) */ 1894 CM_TR_MX *trnb_mx; /* growable truncated non-banded float matrix */ 1895 CM_TR_MX *trnb_omx; /* another, growable truncated non-banded float matrix for Outside/Posterior calcs */ 1896 CM_TR_EMIT_MX *trnb_emx; /* growable truncated non-banded emit matrix for residue posterior probability calcs */ 1897 CM_TR_SHADOW_MX *trnb_shmx; /* growable truncated non-banded shadow matrix, for alignment tracebacks */ 1898 1899 /* for standard non-HMM banded CM search */ 1900 CM_SCAN_MX *smx; /* matrices, info for CYK/Inside scans with this CM */ 1901 /* for truncated non-HMM banded CM search */ 1902 CM_TR_SCAN_MX *trsmx; /* matrices, info for CYK/Inside scans with this CM */ 1903 /* for CP9 HMM search/alignment */ 1904 CP9_MX *cp9_mx; /* growable CP9 DP matrix */ 1905 CP9_MX *cp9_bmx; /* another growable CP9 DP matrix, 'b' is for backward, 1906 * only alloc'ed to any significant size if we do Forward,Backward->Posteriors */ 1907 1908 /* statistics */ 1909 ExpInfo_t **expA; /* Exponential tail stats, [0..EXP_NMODES-1] */ 1910 1911 /* p7 hmms, added 08.05.08 */ 1912 P7_HMM *mlp7; /* the maximum likelihood p7 HMM, built from the CM */ 1913 P7_HMM *fp7; /* the filter p7 HMM, read from CM file */ 1914 float fp7_evparam[CM_p7_NEVPARAM]; /* E-value params (CMH_FP7_STATS) */ 1915 1916 const ESL_ALPHABET *abc; /* ptr to alphabet info (cm->abc->K is alphabet size)*/ 1917 off_t offset; /* CM record offset on disk */ 1918 1919 /* additional data added for convenience, post-v1.0.2, pre-v1.1) */ 1920 CMEmitMap_t *emap; /* maps model nodes to consensus positions */ 1921 CMConsensus_t *cmcons; /* consensus information for CM_ALIDISPLAY */ 1922 CM_TR_PENALTIES *trp; /* stores truncation bit score penalties */ 1923 1924 } CM_t; 1925 1926 /* status flags, cm->flags */ 1927 #define CMH_BITS (1<<0) /* CM has valid log odds scores */ 1928 #define CMH_ACC (1<<1) /* accession number is available */ 1929 #define CMH_DESC (1<<2) /* description exists */ 1930 #define CMH_RF (1<<3) /* reference exists */ 1931 #define CMH_GA (1<<4) /* gathering threshold exists */ 1932 #define CMH_TC (1<<5) /* trusted cutoff exists */ 1933 #define CMH_NC (1<<6) /* noise cutoff exists */ 1934 #define CMH_CHKSUM (1<<7) /* checksum exists */ 1935 #define CMH_MAP (1<<8) /* alignment map exists */ 1936 #define CMH_CONS (1<<9) /* consensus sequence exists */ 1937 #define CMH_LOCAL_BEGIN (1<<10) /* Begin distribution is active (local ali) */ 1938 #define CMH_LOCAL_END (1<<11) /* End distribution is active (local ali) */ 1939 #define CMH_EXPTAIL_STATS (1<<12) /* exponential tail stats set */ 1940 #define CMH_CP9 (1<<13) /* CP9 HMM is valid in cm->cp9 */ 1941 #define CMH_CP9_TRUNC (1<<14) /* cm->Lcp9, cm->Rcp9, cm->Tcp9 are valid */ 1942 #define CMH_MLP7 (1<<15) /* 'maximum likelihood' cm->mlp7 is valid */ 1943 #define CMH_FP7 (1<<16) /* filter p7 is valid in cm->fp7 */ 1944 #define CM_IS_SUB (1<<17) /* the CM is a sub CM */ 1945 #define CM_IS_RSEARCH (1<<18) /* the CM was parameterized a la RSEARCH */ 1946 #define CM_RSEARCHTRANS (1<<19) /* CM has/will have RSEARCH transitions */ 1947 #define CM_RSEARCHEMIT (1<<20) /* CM has/will have RSEARCH emissions */ 1948 #define CM_EMIT_NO_LOCAL_BEGINS (1<<21) /* emitted parsetrees will never have local begins */ 1949 #define CM_EMIT_NO_LOCAL_ENDS (1<<22) /* emitted parsetrees will never have local ends */ 1950 #define CM_IS_CONFIGURED (1<<23) /* TRUE if CM has been configured in some way */ 1951 1952 /* model configuration options, cm->config_opts */ 1953 #define CM_CONFIG_LOCAL (1<<0) /* configure the model for local alignment */ 1954 #define CM_CONFIG_HMMLOCAL (1<<1) /* configure the CP9 for local alignment */ 1955 #define CM_CONFIG_HMMEL (1<<2) /* configure the CP9 for EL local aln */ 1956 #define CM_CONFIG_QDB (1<<3) /* recalculate query dependent bands */ 1957 #define CM_CONFIG_W_BETA (1<<4) /* recalculate W using band calculation */ 1958 #define CM_CONFIG_TRUNC (1<<5) /* set up for truncated alignment (cm->{L,R,T}cp9 will be built */ 1959 #define CM_CONFIG_SCANMX (1<<6) /* create a CM_SCAN_MX in cm->smx */ 1960 #define CM_CONFIG_TRSCANMX (1<<7) /* create a CM_TR_SCAN_MX in cm->trsmx */ 1961 #define CM_CONFIG_SUB (1<<8) /* set up for submodel alignment (cm->cp9 gets equiprobable begin/ends) */ 1962 #define CM_CONFIG_NONBANDEDMX (1<<9) /* set up for non-banded alignment (cm->*nb*mx will be created) */ 1963 1964 /* alignment options, cm->align_opts */ 1965 #define CM_ALIGN_HBANDED (1<<0) /* use CP9 HMM bands */ 1966 #define CM_ALIGN_P7BANDED (1<<1) /* use p7 HMM bands to band the CP9 HMM (CAUTION: only partially implemented) */ 1967 #define CM_ALIGN_NONBANDED (1<<2) /* do not use HMM bands */ 1968 #define CM_ALIGN_CYK (1<<3) /* aln wwith CYK algorithm */ 1969 #define CM_ALIGN_OPTACC (1<<4) /* aln w/Holmes/Durbin opt accuracy */ 1970 #define CM_ALIGN_SAMPLE (1<<5) /* sample parsetrees from the inside matrix */ 1971 #define CM_ALIGN_POST (1<<6) /* do inside/outside and append posteriors */ 1972 #define CM_ALIGN_SMALL (1<<7) /* use small CYK D&C */ 1973 #define CM_ALIGN_SUMS (1<<8) /* if using HMM bands, use posterior sums */ 1974 #define CM_ALIGN_SUB (1<<9) /* build a sub CM for each seq to align */ 1975 #define CM_ALIGN_HMMVITERBI (1<<10) /* use a CP9 HMM only to align, w/viterbi */ 1976 #define CM_ALIGN_CHECKINOUT (1<<11) /* check inside/outside calculations */ 1977 #define CM_ALIGN_CHECKPARSESC (1<<12) /* check parsetree score against aln alg sc */ 1978 #define CM_ALIGN_PRINTTREES (1<<13) /* print parsetrees to stdout */ 1979 #define CM_ALIGN_HMMSAFE (1<<14) /* realign seqs w/HMM banded CYK bit sc < 0 */ 1980 #define CM_ALIGN_SCOREONLY (1<<15) /* do full CYK/inside to get score only */ 1981 #define CM_ALIGN_FLUSHINSERTS (1<<16) /* flush inserts L/R like pre 1.0 infernal */ 1982 #define CM_ALIGN_CHECKFB (1<<17) /* check forward/backward CP9 HMM calcs */ 1983 #define CM_ALIGN_HMM2IJOLD (1<<18) /* use old hmm2ij band calculation alg */ 1984 #define CM_ALIGN_QDB (1<<19) /* align with QDBs */ 1985 #define CM_ALIGN_INSIDE (1<<20) /* use Inside algorithm */ 1986 #define CM_ALIGN_TRUNC (1<<21) /* use truncated alignment algorithms */ 1987 #define CM_ALIGN_XTAU (1<<22) /* multiply tau until banded mx size < limit*/ 1988 1989 /* search options, cm->search_opts */ 1990 #define CM_SEARCH_HBANDED (1<<0) /* use HMM bands to search (default) */ 1991 #define CM_SEARCH_QDB (1<<1) /* use QDBs to search */ 1992 #define CM_SEARCH_NONBANDED (1<<2) /* do not use QDBs or HMM bands for search */ 1993 #define CM_SEARCH_HMMALNBANDS (1<<3) /* force full aln when deriving HMM bands */ 1994 #define CM_SEARCH_SUMS (1<<4) /* if using HMM bands, use posterior sums */ 1995 #define CM_SEARCH_INSIDE (1<<5) /* scan with Inside, not CYK */ 1996 #define CM_SEARCH_NOALIGN (1<<6) /* don't align hits, just report locations */ 1997 #define CM_SEARCH_RSEARCH (1<<7) /* use RSEARCH parameterized CM */ 1998 #define CM_SEARCH_CMNOTGREEDY (1<<8) /* don't use greedy alg to resolve CM overlaps */ 1999 #define CM_SEARCH_HMM2IJOLD (1<<9) /* use old hmm2ij band calculation alg */ 2000 #define CM_SEARCH_NULL2 (1<<10) /* use NULL2 score correction */ 2001 #define CM_SEARCH_NULL3 (1<<11) /* use NULL3 score correction */ 2002 2003 /***************************************************************** 2004 * 36. CM_FILE: a CM save file or database, open for reading. 2005 *****************************************************************/ 2006 /* These tags need to be in temporal order, so we can do tests 2007 * like "if (format >= CM_FILE_1a) ..." 2008 */ 2009 enum cm_file_formats_e { 2010 CM_FILE_1 = 0, /* Infernal v1.0->v1.0.2 */ 2011 CM_FILE_1a = 1, 2012 }; 2013 2014 typedef struct cm_file_s { 2015 FILE *f; /* pointer to stream for reading models */ 2016 char *fname; /* (fully qualified) name of the CM file; [STDIN] if - */ 2017 ESL_SSI *ssi; /* open SSI index for model file <f>; NULL if none. */ 2018 2019 int do_gzip; /* TRUE if f is "gzip -dc |" (will pclose(f)) */ 2020 int do_stdin; /* TRUE if f is stdin (won't close f) */ 2021 int newly_opened; /* TRUE if we just opened the stream (and parsed magic) */ 2022 int is_binary; /* TRUE if a binary file (output with WriteBinary) */ 2023 int is_pressed; /* TRUE if a pressed CM database file (Rfam or equiv) */ 2024 2025 int format; /* CM file format code */ 2026 int (*parser)(struct cm_file_s *, int, ESL_ALPHABET **, CM_t **); 2027 ESL_FILEPARSER *efp; 2028 2029 P7_HMMFILE *hfp; /* for reading p7 HMMs within the CM file */ 2030 2031 /* If <is_pressed>, we can read HMM filters directly, via: */ 2032 FILE *ffp; /* MSV part of the optimized profile HMM */ 2033 FILE *pfp; /* rest of the optimized profile HMM */ 2034 2035 #ifdef HMMER_THREADS 2036 int syncRead; 2037 pthread_mutex_t readMutex; 2038 #endif 2039 2040 char errbuf[eslERRBUFSIZE]; 2041 } CM_FILE; 2042 2043 /* note on <fname>, above: 2044 * this is the actual name of the CM file being read. 2045 * 2046 * The way cm_file_Open() works, it will preferentially look for 2047 * cmpress'ed binary files. If you open "foo", it will first try to 2048 * open "foo.i1m" and <fname> will be "foo.i1m". "foo" does not even 2049 * have to exist. If a parsing error occurs, you want <fname> to 2050 * be "foo.i1m", so error messages report blame correctly. 2051 * In the special case of reading from stdin, <fname> is "[STDIN]". 2052 */ 2053 2054 2055 /*********************************************************************************** 2056 * 37. CM_PLI_ACCT: pass specific statistics for a search/scan pipeline. 2057 ***********************************************************************************/ 2058 2059 /* Pipeline pass indices. The pipeline potentially does multiple 2060 * passes over each sequence. Don't muck with the order here, it'll 2061 * screw up things in strange ways. For example, PLI_PASS_STD_ANY must 2062 * come before the 5P and 3P truncated stages, cm_Pipeline() depends 2063 * on it. 2064 * 2065 * Not all passes are performed in a pipeline. If pli->do_trunc_ends, 2066 * passes 1,2,3,4 are performed. If pli->do_trunc_5p_ends, passes 2067 * 1 and 2 are performed. If pli->do_trunc_3p_ends, passes 1 2068 * and 3 are performed. If pli->do_trunc_any, passes 1 and 5 are 2069 * performed. If pli->do_trunc_only, only pass 5 is performed. 2070 * If pli->do_only_trunc_5p_and_3p_ends, only pass 4 is 2071 * performed. If pli->do_hmmonly_cur, only pass 6 is performed. If 2072 * none of these flags is TRUE only pass 1 is performed. 2073 * 2074 * These values have two interrelated but different roles: 2075 * 2076 * 1. [1..6] are flags indicating which type of truncated alignment 2077 * is/was allowed in/by a CM DP scanner/alignment function. Each pass of 2078 * the pipeline allows a different combination of 5' and/or 3' 2079 * truncated alignment and differs in whether it enforces the 2080 * first and/or final residue be included (_FORCE suffixed) or 2081 * not (_ANY suffixed). For the PLI_PASS_HMM_ONLY_ANY no CM 2082 * algorithms will be called but _ANY is used as a suffix because 2083 * HMM local alignment algorithms allow 5' and 3' truncation. 2084 * 2085 * A wrinkle is that these indices are used for DP truncated alignment 2086 * functions called for 'cmalign' (either PLI_PASS_5P_AND_3P_FORCE or 2087 * PLI_PASS_STD_ANY) even though those functions are not called as 2088 * part of a search/scan pipeline. In this case, the pass index is 2089 * still relevant in informing the alignment function which truncation 2090 * penalty score to apply to any resulting alignment score. 2091 * 2092 * 2. [0..6] are indices in cm->pli->acct[], accounting states for each 2093 * pass of the pipeline. 2094 */ 2095 #define PLI_PASS_CM_SUMMED 0 2096 #define PLI_PASS_STD_ANY 1 /* only standard alns allowed, no truncated ones, any subseq */ 2097 #define PLI_PASS_5P_ONLY_FORCE 2 /* only 5' truncated alns allowed, first (i0) residue must be included */ 2098 #define PLI_PASS_3P_ONLY_FORCE 3 /* only 3' truncated alns allowed, final (j0) residue must be included */ 2099 #define PLI_PASS_5P_AND_3P_FORCE 4 /* 5' and 3' truncated alns allowed, first & final (i0 & j0) residue must be included */ 2100 #define PLI_PASS_5P_AND_3P_ANY 5 /* 5' and 3' truncated alns allowed, any subseq can comprise hit */ 2101 #define PLI_PASS_HMM_ONLY_ANY 6 /* HMM only pass, all types of truncated hits are allowed in local HMM algs */ 2102 #define NPLI_PASSES 7 2103 2104 typedef struct cm_pipeline_accounting_s { 2105 /* CM_PIPELINE accounting. (reduceable in threaded/MPI parallel version) 2106 * Each pipeline pass keeps track of its own accounting, so we know how 2107 * many residues were searched in each pass. 2108 * 2109 * <npli_top> and <npli_bot> keep track of number of times pipeline 2110 * was run for each pass, this is not the same as the number of 2111 * sequences searched in *any* pass (that's <pli->nseq>), but is 2112 * equal to the number of sequences searched in the 2113 * PLI_PASS_5P_AND_3P_FORCE pass, since that pass only takes place 2114 * for sequences shorter or equal to <pli->maxW>. 2115 */ 2116 uint64_t npli_top; /* # of times pipeline called on top strand */ 2117 uint64_t npli_bot; /* # of times pipeline called on bot strand */ 2118 uint64_t nres_top; /* # of residues searched on top strand */ 2119 uint64_t nres_bot; /* # of residues searched on bottom strand */ 2120 uint64_t n_past_msv; /* # windows that pass MSVFilter() */ 2121 uint64_t n_past_vit; /* # windows that pass ViterbiFilter() */ 2122 uint64_t n_past_fwd; /* # windows that pass ForwardFilter() */ 2123 uint64_t n_past_gfwd; /* # windows that pass glocal GForward() */ 2124 uint64_t n_past_edef; /* # envelopes that pass envelope definition */ 2125 uint64_t n_past_cyk; /* # windows that pass CYK filter */ 2126 uint64_t n_past_ins; /* # windows that pass Inside */ 2127 uint64_t n_output; /* # alignments that make it to the final output */ 2128 uint64_t n_past_msvbias; /* # windows that pass MSV bias filter */ 2129 uint64_t n_past_vitbias; /* # windows that pass Vit bias filter */ 2130 uint64_t n_past_fwdbias; /* # windows that pass Fwd bias filter */ 2131 uint64_t n_past_gfwdbias; /* # windows that pass gFwd bias filter */ 2132 uint64_t n_past_edefbias; /* # envelopes that pass env bias filter */ 2133 uint64_t pos_past_msv; /* # positions that pass MSVFilter() */ 2134 uint64_t pos_past_vit; /* # positions that pass ViterbiFilter() */ 2135 uint64_t pos_past_fwd; /* # positions that pass ForwardFilter() */ 2136 uint64_t pos_past_gfwd; /* # positions that pass glocal GForward() */ 2137 uint64_t pos_past_edef; /* # positions that pass env definition */ 2138 uint64_t pos_past_cyk; /* # positions that pass CYK filter */ 2139 uint64_t pos_past_ins; /* # positions that pass Inside */ 2140 uint64_t pos_output; /* # positions that make it to the final output */ 2141 uint64_t pos_past_msvbias; /* # positions that pass MSV bias filter */ 2142 uint64_t pos_past_vitbias; /* # positions that pass Vit bias filter */ 2143 uint64_t pos_past_fwdbias; /* # positions that pass Fwd bias filter */ 2144 uint64_t pos_past_gfwdbias; /* # positions that pass gFwd bias filter*/ 2145 uint64_t pos_past_edefbias; /* # positions that pass dom def bias filter */ 2146 uint64_t n_overflow_fcyk; /* # hits that couldn't use an HMM banded mx in CYK filter stage */ 2147 uint64_t n_overflow_final; /* # hits that couldn't use an HMM banded mx in final stage */ 2148 uint64_t n_aln_hb; /* # HMM banded alignments computed */ 2149 uint64_t n_aln_dccyk; /* # nonbanded divide and conquer CYK alignments computed */ 2150 } CM_PLI_ACCT; 2151 2152 2153 /*********************************************************************************** 2154 * 38. CM_PIPELINE: the accelerated seq/profile comparison pipeline 2155 ***********************************************************************************/ 2156 2157 enum cm_pipemodes_e { CM_SEARCH_SEQS = 0, CM_SCAN_MODELS = 1 }; 2158 enum cm_newmodelmodes_e { CM_NEWMODEL_MSV = 0, CM_NEWMODEL_CM = 1 }; 2159 enum cm_zsetby_e { CM_ZSETBY_SSIINFO = 0, CM_ZSETBY_SSI_AND_QLENGTH = 1, CM_ZSETBY_FILEREAD = 2, CM_ZSETBY_OPTION = 3, CM_ZSETBY_FILEINFO = 4}; 2160 2161 typedef struct cm_pipeline_s { 2162 /* Dynamic programming matrices */ 2163 P7_OMX *oxf; /* one-row Forward matrix, accel pipe */ 2164 P7_OMX *oxb; /* one-row Backward matrix, accel pipe */ 2165 P7_OMX *fwd; /* full Fwd matrix for envelopes */ 2166 P7_OMX *bck; /* full Bck matrix for envelopes */ 2167 P7_GMX *gxf; /* generic Forward matrix */ 2168 P7_GMX *gxb; /* generic Backward matrix */ 2169 P7_GMX *gfwd; /* generic full Fwd matrix for envelopes */ 2170 P7_GMX *gbck; /* generic full Bck matrix for envelopes */ 2171 2172 enum cm_pipemodes_e mode; /* CM_SCAN_MODELS | CM_SEARCH_SEQS */ 2173 ESL_ALPHABET *abc; /* ptr to alphabet info */ 2174 CM_FILE *cmfp; /* COPY of open CM database (if scan mode, else NULl) */ 2175 char errbuf[eslERRBUFSIZE]; 2176 2177 /* Model-dependent parameters */ 2178 int maxW; /* # residues to overlap in adjacent windows*/ 2179 int cmW; /* CM's window length */ 2180 int clen; /* CM's consensus length of model */ 2181 int64_t cur_cm_idx; /* model index currently being used */ 2182 int cur_clan_idx; /* clan index currently being used, -1 if none */ 2183 2184 int64_t cur_seq_idx; /* sequence index currently being searched */ 2185 int64_t cur_pass_idx; /* pipeline pass index currently underway */ 2186 2187 /* Accounting. (reduceable in threaded/MPI parallel version) */ 2188 uint64_t nseqs; /* # of sequences searched */ 2189 uint64_t nmodels; /* # of models searched, CM mode */ 2190 uint64_t nnodes; /* # of model nodes searched, CM mode */ 2191 uint64_t nmodels_hmmonly; /* # of models searched, HMM only mode */ 2192 uint64_t nnodes_hmmonly; /* # of model nodes, HMM only mode */ 2193 CM_PLI_ACCT acct[NPLI_PASSES]; 2194 2195 /* Domain/envelope postprocessing */ 2196 ESL_RANDOMNESS *r; /* random number generator */ 2197 int do_reseeding; /* TRUE: reseed for reproducible results */ 2198 P7_DOMAINDEF *ddef; /* domain definition workflow */ 2199 2200 /* miscellaneous parameters */ 2201 float mxsize_limit; /* maximum size in Mb allowed for HB alignment */ 2202 int mxsize_set; /* TRUE if mxsize_limit was set by user (default: FALSE) */ 2203 int be_verbose; /* TRUE for verbose reporting mode */ 2204 int do_top; /* TRUE to do top strand (usually TRUE) */ 2205 int do_bot; /* TRUE to do bottom strand (usually TRUE) */ 2206 int show_accessions; /* TRUE to output accessions not names */ 2207 int show_alignments; /* TRUE to compute and output alignments (default)*/ 2208 double maxtau; /* max tau when tightening bands */ 2209 int do_wcx; /* TRUE to set cm->W as cm->clen * wcx */ 2210 float wcx; /* set W as cm->clen * wcx, ignoring W from CM file */ 2211 int do_one_cmpass; /* TRUE to only use CM for best scoring HMM pass if envelope encompasses full sequence */ 2212 int do_one_cmpass_olap; /* TRUE to only use CM for best scoring HMM pass if all envelopes overlap > 50% */ 2213 int do_not_iterate; /* TRUE to *not* iteratively tighten bands to get alignment that will fit in the matrix */ 2214 /* these are all currently hard-coded, in cm_pipeline_Create() */ 2215 float smult; /* 2.0; W multiplier for window splitting */ 2216 float wmult; /* 1.0; maxW will be max of wmult * cm->W and cmult * cm->clen */ 2217 float cmult; /* 1.25; maxW will be max of wmult * cm->W and cmult * cm->clen */ 2218 float mlmult; /* 0.10; om->max_length multiplier for MSV window defn */ 2219 /* flags for timing experiments */ 2220 int do_time_F1; /* TRUE to abort after Stage 1 MSV, for timing expts */ 2221 int do_time_F2; /* TRUE to abort after Stage 2 Vit, for timing expts */ 2222 int do_time_F3; /* TRUE to abort after Stage 3 Fwd, for timing expts */ 2223 int do_time_F4; /* TRUE to abort after Stage 4 glocal Fwd, for timing expts */ 2224 int do_time_F5; /* TRUE to abort after Stage 5 env def, for timing expts */ 2225 int do_time_F6; /* TRUE to abort after Stage 6 CYK, for timing expts */ 2226 /* flag for terminating after a stage and outputting surviving windows (currently only F3 is possible) */ 2227 int do_trm_F3; /* TRUE to abort after Stage 3 Fwd and output surviving windows */ 2228 2229 /* Reporting threshold settings */ 2230 int by_E; /* TRUE to cut per-target report off by E */ 2231 double E; /* per-target E-value threshold */ 2232 double T; /* per-target bit score threshold */ 2233 int use_bit_cutoffs; /* (FALSE | CMH_GA | CMH_TC | CMH_NC) */ 2234 2235 /* Inclusion threshold settings */ 2236 int inc_by_E; /* TRUE to threshold inclusion by E-values */ 2237 double incE; /* per-target inclusion E-value threshold */ 2238 double incT; /* per-target inclusion score threshold */ 2239 2240 /* Tracking search space sizes for E value calculations * 2241 * NOTE: Definition of Z here differs markedly from HMMER, where Z is * 2242 * number of target sequences (SEARCH) or models (SCAN). */ 2243 double Z; /* database size, defn differs between SEARCH/SCAN mode * 2244 * SEARCH: # of residues in target sequence database * 2245 * SCAN: # of models in target database multiplied by the * 2246 * # of residues in current query seq */ 2247 enum cm_zsetby_e Z_setby; /* how Z was set */ 2248 2249 /* Threshold settings for pipeline */ 2250 /* non-default filter strategies */ 2251 int do_max; /* TRUE to skip all filters */ 2252 int do_nohmm; /* TRUE to skip all HMM filters */ 2253 int do_mid; /* TRUE to skip MSV and Viterbi filters */ 2254 int do_rfam; /* TRUE to run in fast, rfam mode */ 2255 /* filter thresholds */ 2256 double F1; /* MSV filter threshold */ 2257 double F2; /* Viterbi filter threshold */ 2258 double F3; /* uncorrected Forward filter threshold */ 2259 double F4; /* glocal Forward filter thr */ 2260 double F5; /* glocal env def filter thr */ 2261 double F6; /* CYK filter thr */ 2262 double F1b; /* bias-corrected MSV filter threshold */ 2263 double F2b; /* bias-corrected Viterbi filter threshold */ 2264 double F3b; /* bias-corrected Forward filter threshold */ 2265 double F4b; /* bias-corrected gloc Forward filter threshold */ 2266 double F5b; /* bias-corrected env def filter threshold */ 2267 /* on/off parameters for each stage */ 2268 int do_msv; /* TRUE to filter with MSV, FALSE not to */ 2269 int do_vit; /* TRUE to filter with Vit, FALSE not to */ 2270 int do_fwd; /* TRUE to filter with Fwd, FALSE not to */ 2271 int do_gfwd; /* TRUE to filter w/glocal Fwd, FALSE not to*/ 2272 int do_edef; /* TRUE to find envelopes in windows prior to CM stages */ 2273 int do_fcyk; /* TRUE to filter with CYK, FALSE not to */ 2274 int do_msvbias; /* TRUE to use biased comp HMM filter w/MSV */ 2275 int do_vitbias; /* TRUE to use biased comp HMM filter w/Vit */ 2276 int do_fwdbias; /* TRUE to use biased comp HMM filter w/Fwd */ 2277 int do_gfwdbias; /* TRUE to use biased comp HMM filter w/gFwd*/ 2278 int do_edefbias; /* TRUE to use biased comp HMM filter w/edef*/ 2279 2280 /* truncated sequence detection parameters */ 2281 int do_trunc_ends; /* TRUE to use truncated CM algs at sequence ends */ 2282 int do_trunc_any; /* TRUE to use truncated CM algs for entire sequences */ 2283 int do_trunc_only; /* TRUE to use truncated CM algs for entire sequences */ 2284 int do_trunc_5p_ends; /* TRUE to use truncated CM algs only at 5' ends (added for RNAVORE, post 1.1.1) */ 2285 int do_trunc_3p_ends; /* TRUE to use truncated CM algs only at 3' ends (added for RNAVORE, post 1.1.1) */ 2286 2287 /* Parameters controlling p7 domain/envelope defintion */ 2288 float rt1; /* controls when regions are called. mocc[i] post prob >= dt1 : triggers a region around i */ 2289 float rt2; /* controls extent of regions. regions extended until mocc[i]-{b,e}occ[i] < dt2 */ 2290 float rt3; /* controls when regions are flagged for split: if expected # of E preceding B is >= dt3 */ 2291 int ns; /* number of traceback samples for domain/envelope def */ 2292 2293 /* CM search options for CYK filter and final stage */ 2294 int do_fcykenv; /* TRUE to redefine envelopes after CYK */ 2295 double F6env; /* CYK envelope P-value threshold */ 2296 int do_null2; /* TRUE to use null2 score corrections */ 2297 int do_null3; /* TRUE to use null3 score corrections */ 2298 int do_glocal_cm_always; /* TRUE to use glocal mode for CM stages, for all models */ 2299 int do_glocal_cm_cur; /* TRUE to use glocal mode for CM stages, for current model */ 2300 int do_glocal_cm_sometimes; /* TRUE to use glocal mode for CM stages, for some models */ 2301 int fcyk_cm_search_opts; /* CYK filter stage search opts */ 2302 int final_cm_search_opts; /* final stage search opts */ 2303 int fcyk_cm_exp_mode; /* CYK filter exp mode */ 2304 int final_cm_exp_mode; /* final stage exp mode */ 2305 double fcyk_beta; /* QDB beta for CYK filter stage */ 2306 double final_beta; /* QDB beta for final stage */ 2307 double fcyk_tau; /* HMM bands tau for CYK filter stage */ 2308 double final_tau; /* HMM bands tau for final stage */ 2309 2310 /* Threshold settings for HMM-only pipeline */ 2311 int do_hmmonly_cur; /* TRUE to only use filter HMM for current model */ 2312 int do_hmmonly_always; /* TRUE to only use filter HMM for all models */ 2313 int do_hmmonly_never; /* TRUE to never only use filter HMM for any model */ 2314 int do_max_hmmonly; /* TRUE to skip all filters in HMM only mode */ 2315 /* filter thresholds, HMM only mode */ 2316 double F1_hmmonly; /* MSV filter threshold, HMM only mode */ 2317 double F2_hmmonly; /* Viterbi filter threshold, HMM only mode */ 2318 double F3_hmmonly; /* Forward filter threshold, HMM only mode */ 2319 /* on/off parameters, HMM only mode */ 2320 int do_bias_hmmonly; /* TRUE to use bias filter, HMM only mode */ 2321 int do_null2_hmmonly; /* TRUE to use null2, HMM only mode */ 2322 2323 /* configure/alignment options for all CMs we'll use in the pipeline */ 2324 int cm_config_opts; 2325 int cm_align_opts; 2326 2327 } CM_PIPELINE; 2328 2329 /* constants used in cm_pipeline for tallying up number of residues surviving each stage */ 2330 #define p7_SURV_F1 0 2331 #define p7_SURV_F1b 1 2332 #define p7_SURV_F2 2 2333 #define p7_SURV_F2b 3 2334 #define p7_SURV_F3 4 2335 #define p7_SURV_F3b 5 2336 #define Np7_SURV 6 2337 2338 2339 /*********************************************************************************** 2340 * 39. CM_ALIDISPLAY: an alignment formatted for printing (replaces FancyAli_t) 2341 ***********************************************************************************/ 2342 2343 /* Structure: CM_ALIDISPLAY 2344 * 2345 * Alignment of a sequence to a CM, formatted for printing. 2346 * Based on HMMER's P7_ALIDISPLAY. 2347 * 2348 * For an alignment of L residues and names C chars long, requires 2349 * 9L + 2C + 50 bytes (or so); for typical case of L=100,C=10, that's 2350 * about 1 Kb. 2351 */ 2352 typedef struct cm_alidisplay_s { 2353 char *rfline; /* reference coord info; or NULL */ 2354 char *ncline; /* negative scoring noncanonical bps */ 2355 char *csline; /* consensus structure info */ 2356 char *model; /* aligned query consensus sequence */ 2357 char *mline; /* "identities", conservation +'s, etc. */ 2358 char *aseq; /* aligned target sequence */ 2359 char *ppline; /* posterior prob annotation; or NULL */ 2360 int N; /* length of strings */ 2361 2362 char *aseq_el; /* aligned sequence, including EL emissions */ 2363 char *rfline_el; /* reference annotation for aligned sequence w/EL */ 2364 char *ppline_el; /* posterior probs, including EL emissions */ 2365 int N_el; /* length of aseq_el, ppline_el */ 2366 2367 char *cmname; /* name of CM */ 2368 char *cmacc; /* accession of CM; or [0]='\0' */ 2369 char *cmdesc; /* description of CM; or [0]='\0' */ 2370 int cfrom_emit; /* min consensus pos, start posn in CM */ 2371 int cto_emit; /* max consensus pos, end posn in CM */ 2372 int cfrom_span; /* min cons pos in predicted non-truncated hit, == cfrom_emit unless hit is 5' truncated */ 2373 int cto_span; /* max cons pos in predicted non-truncated hit, == cto_emit unless hit is 3' truncated */ 2374 int clen; /* consensus length of model */ 2375 2376 char *sqname; /* name of target sequence */ 2377 char *sqacc; /* accession of target seq; or [0]='\0' */ 2378 char *sqdesc; /* description of targ seq; or [0]='\0' */ 2379 long sqfrom; /* min bound in scoord, start posn in seq (1..L) */ 2380 long sqto; /* max bound in scoord, end posn in seq (1..L) */ 2381 2382 float sc; /* alignment score */ 2383 float avgpp; /* average PP of all aligned residues, 0.0 if no PPs available */ 2384 float gc; /* GC content of all aligned residues [0..1] */ 2385 double tau; /* tau used to calc HMM bands, -1.0 if HMM bands not used */ 2386 float matrix_Mb; /* size of DP matrix used in Mb, either HMM banded CYK/OA or D&C CYK */ 2387 double elapsed_secs; /* number of seconds required for alignment */ 2388 2389 int hmmonly; /* TRUE if this CM_ALIDISPLAY was 2390 * converted from a P7_ALIDISPLAY 2391 * during an HMM only pipeline run. 2392 */ 2393 2394 int memsize; /* size of allocated block of char memory */ 2395 char *mem; /* memory used for the char data above */ 2396 } CM_ALIDISPLAY; 2397 2398 2399 /*********************************************************************************** 2400 * 40. CM_HIT: a hit between a CM and a sequence 2401 ***********************************************************************************/ 2402 2403 #define CM_HIT_FLAGS_DEFAULT 0 2404 #define CM_HIT_IS_INCLUDED (1<<0) 2405 #define CM_HIT_IS_REPORTED (1<<1) 2406 #define CM_HIT_IS_REMOVED_DUPLICATE (1<<2) 2407 #define CM_HIT_IS_MARKED_OVERLAP (1<<3) 2408 2409 /* Structure: CM_HIT 2410 * 2411 * Info about a high-scoring database hit, kept so we can output a 2412 * sorted list of high hits at the end. 2413 * 2414 * <start> and <stop> are the coordinates that will be shown 2415 * in the results, not coords in arrays... therefore, reverse 2416 * complements have <start> > <stop>. To handle the rare 2417 * case that we have a hit that spans a single residue, 2418 * <in_rc> is TRUE if hit is on reverse complement, FALSE 2419 * if not. 2420 */ 2421 typedef struct cm_hit_s { 2422 char *name; /* name of the target (mandatory) */ 2423 char *acc; /* accession of the target (optional; else NULL) */ 2424 char *desc; /* description of the target (optional; else NULL) */ 2425 int64_t cm_idx; /* model index in the cmfile, unique id for the model */ 2426 int clan_idx; /* clan index in the cmfile, -1 if none (usually -1) */ 2427 int64_t seq_idx; /* sequence index in the seqfile, unique id for the sequence */ 2428 int pass_idx; /* index of pipeline pass hit was found on */ 2429 int64_t hit_idx; /* index of this hit in the hit list */ 2430 int64_t srcL; /* full length of source sequence the hit is from */ 2431 int64_t start, stop; /* start/end points of hit */ 2432 int in_rc; /* TRUE if hit is in reverse complement of a target, FALSE if not */ 2433 int root; /* internal state entry point, != 0 if hit involves a local begin */ 2434 int mode; /* joint or marginal hit mode: CM_MODE_J | CM_MODE_R | CM_MODE_L | CM_MODE_T */ 2435 float score; /* bit score of the hit (with corrections) */ 2436 float bias; /* null{2,3} (2 if hmmonly, 3 if not) correction, in bits (already subtracted from score) */ 2437 double pvalue; /* P-value of the hit (with corrections) */ 2438 double evalue; /* E-value of the hit (with corrections) */ 2439 int has_evalue; /* TRUE if E-value has been set for this hit */ 2440 int hmmonly; /* TRUE if hit was found during HMM only pipeline run, FALSE if not */ 2441 int glocal; /* TRUE if hit was found by model in global configuration, FALSE if not */ 2442 CM_ALIDISPLAY *ad; /* alignment display */ 2443 uint32_t flags; /* CM_HIT_IS_REPORTED | CM_HIT_IS_INCLUDED | CM_HIT_IS_REMOVED_DUPLICATE | CM_HIT_IS_MARKED_OVERLAP */ 2444 /* overlap information */ 2445 int64_t any_oidx; /* hit index of best scoring (E-value, then bit score) hit that has 2446 * better score than this hit and overlaps with it */ 2447 int64_t win_oidx; /* hit index of best scoring (E-value, then bit score) hit that has 2448 * better score than this AND has no CM_HIT_IS_MARKED_OVERLAP flag raised */ 2449 double any_bitE; /* score of hit <any_oidx>, E-value if 'has_evalue' is TRUE, else bit score */ 2450 double win_bitE; /* score of hit <win_oidx>, E-value if 'has_evalue' is TRUE, else bit score */ 2451 2452 } CM_HIT; 2453 2454 2455 /*********************************************************************************** 2456 * 41. CM_TOPHITS: ranked list of top-scoring hits 2457 ***********************************************************************************/ 2458 2459 /* Structure: CM_TOPHITS: Collection of hits that can be sorted by 2460 * position or by score. This allows many hits to be merged when we 2461 * prepare to output results. "hit" list is NULL and unavailable until 2462 * after we do a sort. 2463 */ 2464 typedef struct cm_tophits_s { 2465 CM_HIT **hit; /* sorted pointer array */ 2466 CM_HIT *unsrt; /* unsorted data storage */ 2467 uint64_t Nalloc; /* current allocation size */ 2468 uint64_t N; /* number of hits in list now */ 2469 uint64_t nreported; /* number of hits that are reportable */ 2470 uint64_t nincluded; /* number of hits that are includable */ 2471 int is_sorted_by_evalue; /* TRUE when hits are sorted by E-value, score, length, th->hit valid for all N hits */ 2472 int is_sorted_for_overlap_removal; /* TRUE when hits are sorted by cm_idx, seq_idx, strand, score, th->hit valid for all N hits */ 2473 int is_sorted_for_overlap_markup; /* TRUE when hits are sorted by seq_idx, strand, score, th->hit valid for all N hits */ 2474 int is_sorted_by_position; /* TRUE when hits are sorted by cm_idx, seq_idx, strand, first residue 2475 * (start if ! in_rc, stop if in_rc), th->hit valid for all N hits 2476 */ 2477 } CM_TOPHITS; 2478 2479 2480 /*********************************************************************************** 2481 * 42. CM_P7_OM_BLOCK: block of P7_OPROFILEs and related info, for cmscan. 2482 ***********************************************************************************/ 2483 2484 typedef struct { 2485 int count; /* number of <P7_OPROFILE> objects in the block (and cm_offsetA, cm_clenA, cm_WA, gfmuA, gflambdaA) */ 2486 int64_t idx0; /* index of first profile in file >= 0 */ 2487 int listSize; /* maximum number elements in the list */ 2488 P7_OPROFILE **list; /* array of <P7_OPROFILE> objects */ 2489 P7_SCOREDATA **msvdataA; /* array of <P7_SCOREDATA> objects */ 2490 off_t *cm_offsetA; /* file offsets for CMs */ 2491 int *cm_clenA; /* consensus length of CMs */ 2492 int *cm_WA; /* window length of CMs */ 2493 int *cm_nbpA; /* number of basepairs in CMs */ 2494 float *gfmuA; /* glocal forward mu parameter for HMM */ 2495 float *gflambdaA; /* glocal forward lambda parameter for HMM */ 2496 int *clan_idxA; /* clan index for profile, or -1 for none */ 2497 } CM_P7_OM_BLOCK; 2498 2499 2500 /*********************************************************************************** 2501 * 43. CM_ALNDATA: information for alignment of a sequence to a CM. 2502 ***********************************************************************************/ 2503 2504 /* Structure CM_ALNDATA: Per-sequence information relevant to the 2505 * collection and output of an alignment of one sequence to a CM. 2506 * Used primarily in cmalign, but also used in cmbuild if the input 2507 * alignment refinement is used (--refine). 2508 */ 2509 typedef struct { 2510 ESL_SQ *sq; /* sequence, often just a reference - not to be free'd */ 2511 int64_t idx; /* index, for ordering sequences properly in output aln */ 2512 float sc; /* alignment score for this sequence (CYK or Inside) */ 2513 float pp; /* average posterior probability for this sequence */ 2514 Parsetree_t *tr; /* Parsetree for this sequence */ 2515 char *ppstr; /* posterior probability string for this sequence */ 2516 int spos; /* first consensus pos of the CM that emits in tr */ 2517 int epos; /* final consensus pos of the CM that emits in tr */ 2518 float secs_bands; /* seconds elapsed during band calculation */ 2519 float secs_aln; /* seconds elapsed during alignment calculation */ 2520 float secs_tot; /* seconds elapsed for entire processing of this sequence */ 2521 float mb_tot; /* total Mb required for all DP matrices for alignment */ 2522 double tau; /* tau used for HMM band calculation, -1 if no hmm bands */ 2523 /* thresh1 and thresh2 are only relevant if alignment is truncated */ 2524 float thresh1; /* cp9b->thresh1 used for HMM band calculation */ 2525 float thresh2; /* cp9b->thresh2 used for HMM band calculation */ 2526 } CM_ALNDATA; 2527 2528 /***************************************************************** 2529 * 45. Routines in Infernal's exposed API. 2530 *****************************************************************/ 2531 2532 /* from cm.c */ 2533 extern CM_t *CreateCM(int nnodes, int nstates, int clen, const ESL_ALPHABET *abc); 2534 extern CM_t *CreateCMShell(void); 2535 extern void CreateCMBody(CM_t *cm, int nnodes, int nstates, int clen, const ESL_ALPHABET *abc); 2536 extern void CMZero(CM_t *cm); 2537 extern void CMRenormalize(CM_t *cm); 2538 extern void FreeCM(CM_t *cm); 2539 extern void CMSimpleProbify(CM_t *cm); 2540 extern int rsearch_CMProbifyEmissions(CM_t *cm, fullmat_t *fullmat); 2541 extern int CMLogoddsify(CM_t *cm); 2542 extern int CMCountStatetype(CM_t *cm, char type); 2543 extern int CMCountNodetype(CM_t *cm, char type); 2544 extern int CMSegmentCountStatetype(CM_t *cm, int r, int z, char type); 2545 extern int CMSubtreeCountStatetype(CM_t *cm, int v, char type); 2546 extern int CMSubtreeCountNodetype(CM_t *cm, int v, char type); 2547 extern int CMSubtreeFindEnd(CM_t *cm, int v); 2548 extern int CalculateStateIndex(CM_t *cm, int node, char utype); 2549 extern int TotalStatesInNode(int ndtype); 2550 extern int SplitStatesInNode(int ndtype); 2551 extern int InsertStatesInNode(int ndtype); 2552 extern int StateDelta(int sttype); 2553 extern int StateLeftDelta(int sttype); 2554 extern int StateRightDelta(int sttype); 2555 extern int Emitmode(int sttype); 2556 extern int NumReachableInserts(int stid); 2557 extern void PrintCM(FILE *fp, CM_t *cm); 2558 extern void SummarizeCM(FILE *fp, CM_t *cm); 2559 extern char *Statetype(int type); 2560 extern int StateCode(char *s); 2561 extern char *Nodetype(int type); 2562 extern int NodeCode(char *s); 2563 extern char *UniqueStatetype(int type); 2564 extern int UniqueStateCode(char *s); 2565 extern int DeriveUniqueStateCode(int ndtype, int sttype); 2566 extern int StateMapsLeft(char st); 2567 extern int StateMapsRight(char st); 2568 extern int StateMapsMatch(char st); 2569 extern int StateMapsInsert(char st); 2570 extern int StateMapsDelete(char st); 2571 extern int NodeMapsLeft(char ndtype); 2572 extern int NodeMapsRight(char ndtype); 2573 extern int StateIsDetached(CM_t *cm, int v); 2574 extern int CMRebalance(CM_t *cm, char *errbuf, CM_t **ret_new_cm); 2575 extern int **IMX2Alloc(int rows, int cols); 2576 extern void IMX2Free(int **mx); 2577 extern float rsearch_calculate_gap_penalty (char from_state, char to_state, int from_node, int to_node, float input_alpha, float input_beta, float input_alphap, float input_betap); 2578 extern int cm_Exponentiate(CM_t *cm, double z); 2579 extern int cm_p7_Exponentiate(P7_HMM *hmm, double z); 2580 extern void cm_banner(FILE *fp, char *progname, char *banner); 2581 extern void cm_CalcExpSc(CM_t *cm, float **ret_expsc, float **ret_expsc_noss); 2582 extern int cm_Validate(CM_t *cm, float tol, char *errbuf); 2583 extern char *CMStatetype(char st); 2584 extern char *CMNodetype(char nd); 2585 extern char *CMStateid(char st); 2586 extern char *MarginalMode(char mode); 2587 extern int ModeEmitsLeft(char mode); 2588 extern int ModeEmitsRight(char mode); 2589 extern int cm_SetName(CM_t *cm, char *name); 2590 extern int cm_SetAccession(CM_t *cm, char *acc); 2591 extern int cm_SetDescription(CM_t *cm, char *desc); 2592 extern int cm_SetConsensus(CM_t *cm, CMConsensus_t *cons, ESL_SQ *sq); 2593 extern int cm_AppendComlog(CM_t *cm, int argc, char **argv, int add_seed, uint32_t seed); 2594 extern int cm_SetCtime(CM_t *cm); 2595 extern int DefaultNullModel(const ESL_ALPHABET *abc, float **ret_null); 2596 extern int CMAllocNullModel(CM_t *cm); 2597 extern void CMSetNullModel(CM_t *cm, float *null); 2598 extern int CMReadNullModel(const ESL_ALPHABET *abc, char *nullfile, float **ret_null); 2599 extern int IntMaxDigits(); 2600 extern int IntDigits(int i); 2601 extern int cm_GetAvgHitLen(CM_t *cm, char *errbuf, float *ret_avgL_loc, float *ret_avgL_glb); 2602 extern int CompareCMGuideTrees(CM_t *cm1, CM_t *cm2); 2603 extern void DumpCMFlags(FILE *fp, CM_t *cm); 2604 extern ESL_GETOPTS *cm_CreateDefaultApp(ESL_OPTIONS *options, int nargs, int argc, char **argv, char *banner, char *usage); 2605 extern CM_P7_OM_BLOCK *cm_p7_oprofile_CreateBlock(int size); 2606 extern void cm_p7_oprofile_DestroyBlock(CM_P7_OM_BLOCK *block); 2607 extern float **FCalcOptimizedEmitScores (CM_t *cm); 2608 extern int **ICalcOptimizedEmitScores (CM_t *cm); 2609 extern int **ICopyOptimizedEmitScoresFromFloats(CM_t *cm, float **oesc); 2610 extern int CloneOptimizedEmitScores (const CM_t *src, CM_t *dest, char *errbuf); 2611 extern void DumpOptimizedEmitScores (CM_t *cm, FILE *fp); 2612 extern void FreeOptimizedEmitScores (float **fesc_vAA, int **iesc_vAA, int M); 2613 extern float **FCalcInitDPScores (CM_t *cm); 2614 extern int **ICalcInitDPScores (CM_t *cm); 2615 extern int cm_nonconfigured_Verify(CM_t *cm, char *errbuf); 2616 extern int cm_Clone(CM_t *cm, char *errbuf, CM_t **ret_cm); 2617 extern float cm_Sizeof(CM_t *cm); 2618 extern int Prob2Score(float p, float null); 2619 extern float Score2Prob(int sc, float null); 2620 extern float Scorify(int sc); 2621 extern double *cm_ExpectedStateOccupancy(CM_t *cm); 2622 extern int cm_ExpectedPositionOccupancy(CM_t *cm, float **ret_mexpocc, float **ret_iexpocc, double **opt_psi, int **opt_m2v_1, int **opt_m2v_2, int **opt_i2v); 2623 extern char ***cm_CreateTransitionMap(); 2624 extern void cm_FreeTransitionMap(char ***tmap); 2625 extern void InsertsGivenNodeIndex(CM_t *cm, int nd, int *ret_i1, int *ret_2); 2626 extern int cm_Guidetree(CM_t *cm, char *errbuf, ESL_MSA *msa, Parsetree_t **ret_gtr); 2627 2628 /* cm_alidisplay.c */ 2629 extern int cm_alidisplay_Create(CM_t *cm, char *errbuf, CM_ALNDATA *adata, const ESL_SQ *sq, int64_t seqoffset, 2630 double tau, double elapsed_secs, CM_ALIDISPLAY **ret_ad); 2631 extern int cm_alidisplay_CreateFromP7(CM_t *cm, char *errbuf, const ESL_SQ *sq, int64_t seqoffset, float p7sc, float p7pp, P7_ALIDISPLAY *p7ad, CM_ALIDISPLAY **ret_ad); 2632 extern CM_ALIDISPLAY *cm_alidisplay_Clone(const CM_ALIDISPLAY *ad); 2633 extern size_t cm_alidisplay_Sizeof(const CM_ALIDISPLAY *ad); 2634 extern void cm_alidisplay_Destroy(CM_ALIDISPLAY *ad); 2635 extern char cm_alidisplay_EncodePostProb(float p); 2636 extern float cm_alidisplay_DecodePostProb(char pc); 2637 extern int cm_alidisplay_Print(FILE *fp, CM_ALIDISPLAY *ad, int min_aliwidth, int linewidth, int show_accessions); 2638 extern int cm_alidisplay_Is5PTrunc (const CM_ALIDISPLAY *ad); 2639 extern int cm_alidisplay_Is3PTrunc (const CM_ALIDISPLAY *ad); 2640 extern int cm_alidisplay_Is5PAnd3PTrunc(const CM_ALIDISPLAY *ad); 2641 extern int cm_alidisplay_Is5PTruncOnly (const CM_ALIDISPLAY *ad); 2642 extern int cm_alidisplay_Is3PTruncOnly (const CM_ALIDISPLAY *ad); 2643 extern char *cm_alidisplay_TruncString (const CM_ALIDISPLAY *ad); 2644 extern int cm_alidisplay_Backconvert(CM_t *cm, const CM_ALIDISPLAY *ad, char *errbuf, ESL_SQ **ret_sq, Parsetree_t **ret_tr, char **ret_pp); 2645 extern int cm_alidisplay_Dump(FILE *fp, const CM_ALIDISPLAY *ad); 2646 extern int cm_alidisplay_Compare(const CM_ALIDISPLAY *ad1, const CM_ALIDISPLAY *ad2); 2647 2648 /* from cm_alndata.c */ 2649 CM_ALNDATA * cm_alndata_Create(void); 2650 void cm_alndata_Destroy(CM_ALNDATA *data, int free_sq); 2651 int DispatchSqBlockAlignment(CM_t *cm, char *errbuf, ESL_SQ_BLOCK *sq_block, float mxsize, ESL_STOPWATCH *w, ESL_STOPWATCH *w_tot, ESL_RANDOMNESS *r, CM_ALNDATA ***ret_dataA); 2652 int DispatchSqAlignment (CM_t *cm, char *errbuf, ESL_SQ *sq, int64_t idx, float mxsize, char mode, int pass_idx, 2653 int cp9b_valid, ESL_STOPWATCH *w, ESL_STOPWATCH *w_tot, ESL_RANDOMNESS *r, CM_ALNDATA **ret_data); 2654 2655 /* from cm_dpalign.c */ 2656 extern int cm_AlignSizeNeeded (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb); 2657 extern int cm_AlignSizeNeededHB (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb); 2658 extern int cm_Align (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, int do_sample, CM_MX *mx, CM_SHADOW_MX *shmx, CM_MX *post_mx, CM_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, float *ret_avgpp, float *ret_sc); 2659 extern int cm_AlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_optacc, int do_sample, CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_MX *post_mx, CM_HB_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, float *ret_avgpp, float *ret_sc); 2660 extern int cm_CYKInsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_MX *mx, CM_SHADOW_MX *shmx, int *ret_b, float *ret_sc); 2661 extern int cm_CYKInsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, int *ret_b, float *ret_sc); 2662 extern int cm_InsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_MX *mx, float *ret_sc); 2663 extern int cm_InsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_HB_MX *mx, float *ret_sc); 2664 extern int cm_OptAccAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_MX *mx, CM_SHADOW_MX *shmx, CM_EMIT_MX *emit_mx, int *ret_b, float *ret_pp); 2665 extern int cm_OptAccAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, CM_HB_MX *mx, CM_HB_SHADOW_MX *shmx, CM_HB_EMIT_MX *emit_mx, int *ret_b, float *ret_pp); 2666 extern int cm_CYKOutsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_MX *mx, CM_MX *inscyk_mx, float *ret_sc); 2667 extern int cm_CYKOutsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_HB_MX *mx, CM_HB_MX *ins_mx, float *ret_sc); 2668 extern int cm_OutsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_MX *mx, CM_MX *ins_mx, float *ret_sc); 2669 extern int cm_OutsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, int do_check, CM_HB_MX *mx, CM_HB_MX *ins_mx, float *ret_sc); 2670 extern int cm_Posterior (CM_t *cm, char *errbuf, int L, float size_limit, CM_MX *ins_mx, CM_MX *out_mx, CM_MX *post_mx); 2671 extern int cm_PosteriorHB (CM_t *cm, char *errbuf, int L, float size_limit, CM_HB_MX *ins_mx, CM_HB_MX *out_mx, CM_HB_MX *post_mx); 2672 extern int cm_EmitterPosterior (CM_t *cm, char *errbuf, int L, float size_limit, CM_MX *post, CM_EMIT_MX *emit_mx, int do_check); 2673 extern int cm_EmitterPosteriorHB(CM_t *cm, char *errbuf, int L, float size_limit, CM_HB_MX *post, CM_HB_EMIT_MX *emit_mx, int do_check); 2674 extern int cm_PostCode (CM_t *cm, char *errbuf, int L, CM_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp); 2675 extern int cm_PostCodeHB(CM_t *cm, char *errbuf, int L, CM_HB_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp); 2676 extern int cm_InitializeOptAccShadowDZero (CM_t *cm, char *errbuf, char ***yshadow, int L); 2677 extern int cm_InitializeOptAccShadowDZeroHB(CM_t *cm, CP9Bands_t *cp9b, char *errbuf, char ***yshadow, int L); 2678 extern float FScore2Prob(float sc, float null); 2679 extern char Fscore2postcode(float sc); 2680 2681 /* from cm_dpalign_trunc.c */ 2682 extern int cm_TrAlignSizeNeeded (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb); 2683 extern int cm_TrAlignSizeNeededHB (CM_t *cm, char *errbuf, int L, float size_limit, int do_sample, int do_post, float *ret_mxmb, float *ret_emxmb, float *ret_shmxmb, float *ret_totmb); 2684 2685 extern int cm_TrAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_optacc, int do_sample, CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, CM_TR_MX *post_mx, CM_TR_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, char *ret_mode, float *ret_avgpp, float *ret_sc); 2686 extern int cm_TrAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_optacc, int do_sample, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_MX *post_mx, CM_TR_HB_EMIT_MX *emit_mx, ESL_RANDOMNESS *r, char **ret_ppstr, Parsetree_t **ret_tr, char *ret_mode, float *ret_avgpp, float *ret_sc); 2687 extern int cm_TrCYKInsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, int *ret_b, char *ret_mode, float *ret_sc); 2688 extern int cm_TrCYKInsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, int *ret_b, char *ret_mode, float *ret_sc); 2689 extern int cm_TrInsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_MX *mx, char *ret_mode, float *ret_sc); 2690 extern int cm_TrInsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, char *ret_mode, float *ret_sc); 2691 extern int cm_TrOptAccAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_MX *mx, CM_TR_SHADOW_MX *shmx, CM_TR_EMIT_MX *emit_mx, int *ret_b, float *ret_pp); 2692 extern int cm_TrOptAccAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, CM_TR_HB_SHADOW_MX *shmx, CM_TR_HB_EMIT_MX *emit_mx, int *ret_b, float *ret_pp); 2693 extern int cm_TrCYKOutsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_MX *mx, CM_TR_MX *inscyk_mx); 2694 extern int cm_TrCYKOutsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_HB_MX *mx, CM_TR_HB_MX *inscyk_mx); 2695 extern int cm_TrOutsideAlign (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_MX *mx, CM_TR_MX *ins_mx); 2696 extern int cm_TrOutsideAlignHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, float size_limit, char preset_mode, int pass_idx, int do_check, CM_TR_HB_MX *mx, CM_TR_HB_MX *ins_mx); 2697 2698 extern int cm_TrPosterior (CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode, CM_TR_MX *ins_mx, CM_TR_MX *out_mx, CM_TR_MX *post_mx); 2699 extern int cm_TrPosteriorHB (CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode, CM_TR_HB_MX *ins_mx, CM_TR_HB_MX *out_mx, CM_TR_HB_MX *post_mx); 2700 extern int cm_TrEmitterPosterior (CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode, int do_check, CM_TR_MX *post, CM_TR_EMIT_MX *emit_mx); 2701 extern int cm_TrEmitterPosteriorHB (CM_t *cm, char *errbuf, int L, float size_limit, char preset_mode, int do_check, CM_TR_HB_MX *post, CM_TR_HB_EMIT_MX *emit_mx); 2702 extern int cm_TrPostCode (CM_t *cm, char *errbuf, int L, CM_TR_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp); 2703 extern int cm_TrPostCodeHB (CM_t *cm, char *errbuf, int L, CM_TR_HB_EMIT_MX *emit_mx, Parsetree_t *tr, char **ret_ppstr, float *ret_avgp); 2704 extern int cm_TrFillFromMode (char mode, int *ret_fill_L, int *ret_fill_R, int *ret_fill_T); 2705 2706 /* from cm_dpsearch.c */ 2707 extern int FastCYKScan (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc); 2708 extern int RefCYKScan (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc); 2709 extern int FastIInsideScan (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc); 2710 extern int RefIInsideScan (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc); 2711 extern int FastFInsideScan (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc); 2712 extern int RefFInsideScan (CM_t *cm, char *errbuf, CM_SCAN_MX *smx, int qdbidx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, float *ret_sc); 2713 extern int FastCYKScanHB (CM_t *cm, char *errbuf, CM_HB_MX *mx, float size_limit, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float *ret_sc); 2714 extern int FastFInsideScanHB(CM_t *cm, char *errbuf, CM_HB_MX *mx, float size_limit, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float *ret_sc); 2715 extern int cm_CountSearchDPCalcs(CM_t *cm, char *errbuf, int L, int *dmin, int *dmax, int W, int correct_for_first_W, float **ret_vcalcs, float *ret_calcs); 2716 extern int DetermineSeqChunksize(int nproc, int L, int W); 2717 2718 /* from cm_dpsearch_trunc.c */ 2719 extern int RefTrCYKScan (CM_t *cm, char *errbuf, CM_TR_SCAN_MX *trsmx, int qdbidx, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, 2720 int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, char *ret_mode, float *ret_sc); 2721 extern int RefITrInsideScan(CM_t *cm, char *errbuf, CM_TR_SCAN_MX *trsmx, int qdbidx, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, 2722 int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, float **ret_vsc, char *ret_mode, float *ret_sc); 2723 extern int TrCYKScanHB(CM_t *cm, char *errbuf, CM_TR_HB_MX *mx, float size_limit, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, 2724 int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, char *ret_mode, float *ret_sc); 2725 extern int FTrInsideScanHB(CM_t *cm, char *errbuf, CM_TR_HB_MX *mx, float size_limit, int pass_idx, ESL_DSQ *dsq, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist, 2726 int do_null3, float env_cutoff, int64_t *ret_envi, int64_t *ret_envj, char *ret_mode, float *ret_sc); 2727 extern int cm_TrFillFromPassIdx(int pass_idx, int *ret_fill_L, int *ret_fill_R, int *ret_fill_T); 2728 2729 /* from cm_dpsmall.c */ 2730 extern float CYKDivideAndConquer(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, Parsetree_t **ret_tr, int *dmin, int *dmax); 2731 extern float CYKInside(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, Parsetree_t **ret_tr, int *dmin, int *dmax); 2732 extern float CYKInsideScore(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int *dmin, int *dmax); 2733 extern float CYKDemands(CM_t *cm, int L, int *dmin, int *dmax, int be_quiet); 2734 extern float CYKNonQDBSmallMbNeeded(CM_t *cm, int L); 2735 extern void debug_print_bands(FILE *fp, CM_t *cm, int *dmin, int *dmax); 2736 /* cm_dpsmall.c: size calculators - not normally part of external API, but truncyk.c currently uses them */ 2737 extern float insideT_size(CM_t *cm, int L, int r, int z, int i0, int j0); 2738 extern float vinsideT_size(CM_t *cm, int r, int z, int i0, int i1, int j1, int j0); 2739 /* cm_dpsmall.c: the memory management routines. */ 2740 extern struct deckpool_s *deckpool_create(void); 2741 extern void deckpool_push(struct deckpool_s *dpool, float **deck); 2742 extern int deckpool_pop(struct deckpool_s *d, float ***ret_deck); 2743 extern void deckpool_free(struct deckpool_s *d); 2744 extern float **alloc_vjd_deck(int L, int i, int j); 2745 extern float size_vjd_deck(int L, int i, int j); 2746 extern void free_vjd_deck(float **a, int i, int j); 2747 extern void free_vjd_matrix(float ***a, int M, int i, int j); 2748 extern char **alloc_vjd_yshadow_deck(int L, int i, int j); 2749 extern float size_vjd_yshadow_deck(int L, int i, int j); 2750 extern void free_vjd_yshadow_deck(char **a, int i, int j); 2751 extern int **alloc_vjd_kshadow_deck(int L, int i, int j); 2752 extern float size_vjd_kshadow_deck(int L, int i, int j); 2753 extern void free_vjd_kshadow_deck(int **a, int i, int j); 2754 extern void free_vjd_shadow_matrix(void ***shadow, CM_t *cm, int i, int j); 2755 extern float **alloc_vji_deck(int i0, int i1, int j1, int j0); 2756 extern float size_vji_deck(int i0, int i1, int j1, int j0); 2757 extern void free_vji_deck(float **a, int j1, int j0); 2758 extern void free_vji_matrix(float ***a, int M, int j1, int j0); 2759 extern char **alloc_vji_shadow_deck(int i0, int i1, int j1, int j0); 2760 extern float size_vji_shadow_deck(int i0, int i1, int j1, int j0); 2761 extern void free_vji_shadow_deck(char **a, int j1, int j0); 2762 extern void free_vji_shadow_matrix(char ***a, int M, int j1, int j0); 2763 2764 extern float **alloc_banded_vjd_deck(int L, int i, int j, int min, int max); 2765 extern char **alloc_banded_vjd_yshadow_deck(int L, int i, int j, int min, int max); 2766 extern int **alloc_banded_vjd_kshadow_deck(int L, int i, int j, int min, int max); 2767 2768 extern void debug_print_alpha(float ***alpha, CM_t *cm, int L); 2769 extern void debug_print_alpha_banded(float ***alpha, CM_t *cm, int L, int *dmin, int *dmax); 2770 extern void debug_print_alpha_deck(int v, float **deck, CM_t *cm, int L); 2771 extern void debug_print_shadow(void ***shadow, CM_t *cm, int L); 2772 extern void debug_print_shadow_banded(void ***shadow, CM_t *cm, int L, int *dmin, int *dmax); 2773 extern void debug_print_shadow_banded_deck(int v, void ***shadow, CM_t *cm, int L, int *dmin, int *dmax); 2774 2775 /* from cm_file.c */ 2776 extern int cm_file_Open(char *filename, char *env, int allow_1p0, CM_FILE **ret_cmfp, char *errbuf); 2777 extern int cm_file_OpenNoDB(char *filename, char *env, int allow_1p0, CM_FILE **ret_cmfp, char *errbuf); 2778 extern int cm_file_OpenBuffer(char *buffer, int size, int allow_1p0, CM_FILE **ret_cmfp); 2779 extern void cm_file_Close(CM_FILE *cmfp); 2780 extern int cm_file_CreateLock(CM_FILE *cmfp); 2781 extern int cm_file_WriteASCII(FILE *fp, int format, CM_t *cm); 2782 extern int cm_file_WriteBinary(FILE *fp, int format, CM_t *cm, off_t *opt_fp7_offset); 2783 extern int cm_file_Read(CM_FILE *cmfp, int read_fp7, ESL_ALPHABET **ret_abc, CM_t **opt_cm); 2784 extern int cm_file_PositionByKey(CM_FILE *cmfp, const char *key); 2785 extern int cm_file_Position(CM_FILE *cmfp, const off_t offset); 2786 extern int cm_p7_hmmfile_Read(CM_FILE *cmfp, ESL_ALPHABET *abc, off_t offset, P7_HMM **ret_hmm); 2787 extern int cm_p7_oprofile_Write(FILE *ffp, FILE *pfp, off_t cm_offset, int cm_len, int cm_W, int cm_nbp, float gfmu, float gflambda, P7_OPROFILE *om); 2788 extern int cm_p7_oprofile_ReadMSV(CM_FILE *cmfp, int read_scores, ESL_ALPHABET **byp_abc, off_t *ret_cm_offset, int *ret_cm_clen, int *ret_cm_W, int *ret_cm_nbp, float *ret_gfmu, float *ret_gflambda, P7_OPROFILE **ret_om); 2789 extern int cm_p7_oprofile_ReadBlockMSV(CM_FILE *cmfp, int64_t cm_idx, ESL_ALPHABET **byp_abc, CM_P7_OM_BLOCK *hmmBlock); 2790 extern int cm_p7_oprofile_Position(CM_FILE *cmfp, off_t offset); 2791 extern int cm_file_Write1p0ASCII(FILE *fp, CM_t *cm); 2792 2793 /* from cm_modelconfig.c */ 2794 extern int cm_Configure (CM_t *cm, char *errbuf, int W_from_cmdline); 2795 extern int cm_ConfigureSub(CM_t *cm, char *errbuf, int W_from_cmdline, CM_t *mother_cm, CMSubMap_t *mother_map); 2796 extern int cm_CalculateLocalBeginProbs(CM_t *cm, float p_internal_start, float **t, float *begin); 2797 2798 /* from cm_modelmaker.c */ 2799 extern int HandModelmaker(ESL_MSA *msa, char *errbuf, int use_rf, int use_el, int use_wts, float gapthresh, CM_t **ret_cm, Parsetree_t **ret_mtr); 2800 extern int ConsensusModelmaker(const ESL_ALPHABET *abc, char *errbuf, char *ss_cons, int clen, int building_sub_model, CM_t **ret_cm, Parsetree_t **ret_gtr); 2801 extern int Transmogrify(CM_t *cm, char *errbuf, Parsetree_t *gtr, ESL_DSQ *ax, int *used_el, int alen, Parsetree_t **ret_tr); 2802 extern int cm_from_guide(CM_t *cm, char *errbuf, Parsetree_t *gtr, int will_never_localize); 2803 extern int cm_find_and_detach_dual_inserts(CM_t *cm, int do_check, int do_detach); 2804 extern int cm_check_before_detaching(CM_t *cm, int insert1, int insert2); 2805 extern int cm_detach_state(CM_t *cm, int insert1, int insert2); 2806 extern int cm_zero_flanking_insert_counts(CM_t *cm, char *errbuf); 2807 extern int clean_cs(char *cs, int alen, int be_quiet); 2808 2809 /* from cm_mx.c */ 2810 extern CM_MX *cm_mx_Create (int M); 2811 extern int cm_mx_GrowTo (CM_t *cm, CM_MX *mx, char *errbuf, int L, float size_limit); 2812 extern int cm_mx_Dump (FILE *ofp, CM_MX *mx, int print_mx); 2813 extern void cm_mx_Destroy (CM_MX *mx); 2814 extern int cm_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_ncells, float *ret_Mb); 2815 2816 extern CM_TR_MX *cm_tr_mx_Create (CM_t *cm); 2817 extern int cm_tr_mx_GrowTo (CM_t *cm, CM_TR_MX *mx, char *errbuf, int L, float size_limit); 2818 extern int cm_tr_mx_Dump (FILE *ofp, CM_TR_MX *mx, char mode, int print_mx); 2819 extern void cm_tr_mx_Destroy (CM_TR_MX *mx); 2820 extern int cm_tr_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_Jncells, int64_t *ret_Lncells, int64_t *ret_Rncells, int64_t *ret_Tncells, float *ret_Mb); 2821 2822 extern CM_HB_MX *cm_hb_mx_Create (int M); 2823 extern int cm_hb_mx_GrowTo (CM_t *cm, CM_HB_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit); 2824 extern int cm_hb_mx_Dump (FILE *ofp, CM_HB_MX *mx, int print_mx); 2825 extern void cm_hb_mx_Destroy (CM_HB_MX *mx); 2826 extern int cm_hb_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_ncells, float *ret_Mb); 2827 2828 extern CM_TR_HB_MX *cm_tr_hb_mx_Create (CM_t *cm); 2829 extern int cm_tr_hb_mx_GrowTo (CM_t *cm, CM_TR_HB_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit); 2830 extern int cm_tr_hb_mx_Dump (FILE *ofp, CM_TR_HB_MX *mx, char mode, int print_mx); 2831 extern void cm_tr_hb_mx_Destroy (CM_TR_HB_MX *mx); 2832 extern int cm_tr_hb_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_Jncells, int64_t *ret_Lncells, 2833 int64_t *ret_Rncells, int64_t *ret_Tncells, float *ret_Mb); 2834 2835 extern CM_SHADOW_MX *cm_shadow_mx_Create (CM_t *cm); 2836 extern int cm_shadow_mx_GrowTo (CM_t *cm, CM_SHADOW_MX *mx, char *errbuf, int L, float size_limit); 2837 extern int cm_shadow_mx_Dump (FILE *ofp, CM_t *cm, CM_SHADOW_MX *mx, int print_mx); 2838 extern void cm_shadow_mx_Destroy (CM_SHADOW_MX *mx); 2839 extern int cm_shadow_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_ny_cells, int64_t *ret_nk_cells, float *ret_Mb); 2840 2841 extern CM_TR_SHADOW_MX *cm_tr_shadow_mx_Create (CM_t *cm); 2842 extern int cm_tr_shadow_mx_GrowTo (CM_t *cm, CM_TR_SHADOW_MX *mx, char *errbuf, int L, float size_limit); 2843 extern int cm_tr_shadow_mx_Dump (FILE *ofp, CM_t *cm, CM_TR_SHADOW_MX *mx, char mode, int print_mx); 2844 extern void cm_tr_shadow_mx_Destroy (CM_TR_SHADOW_MX *mx); 2845 extern int cm_tr_shadow_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_Jny_cells, int64_t *ret_Lny_cells, int64_t *ret_Rny_cells, 2846 int64_t *ret_Jnk_cells, int64_t *ret_Lnk_cells, int64_t *ret_Rnk_cells, int64_t *ret_Tnk_cells, float *ret_Mb); 2847 2848 extern CM_HB_SHADOW_MX *cm_hb_shadow_mx_Create (CM_t *cm); 2849 extern int cm_hb_shadow_mx_GrowTo (CM_t *cm, CM_HB_SHADOW_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit); 2850 extern int cm_hb_shadow_mx_Dump (FILE *ofp, CM_t *cm, CM_HB_SHADOW_MX *mx, int print_mx); 2851 extern void cm_hb_shadow_mx_Destroy (CM_HB_SHADOW_MX *mx); 2852 extern int cm_hb_shadow_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int64_t *ret_ny_cells, int64_t *ret_nk_cells, float *ret_Mb); 2853 2854 extern CM_TR_HB_SHADOW_MX *cm_tr_hb_shadow_mx_Create (CM_t *cm); 2855 extern int cm_tr_hb_shadow_mx_GrowTo (CM_t *cm, CM_TR_HB_SHADOW_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit); 2856 extern int cm_tr_hb_shadow_mx_Dump (FILE *ofp, CM_t *cm, CM_TR_HB_SHADOW_MX *mx, char mode, int print_mx); 2857 extern void cm_tr_hb_shadow_mx_Destroy (CM_TR_HB_SHADOW_MX *mx); 2858 extern int cm_tr_hb_shadow_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int64_t *ret_Jny_cells, int64_t *ret_Lny_cells, int64_t *ret_Rny_cells, 2859 int64_t *ret_Jnk_cells, int64_t *ret_Lnk_cells, int64_t *ret_Rnk_cells, int64_t *ret_Tnk_cells, float *ret_Mb); 2860 2861 extern CM_EMIT_MX *cm_emit_mx_Create (CM_t *cm); 2862 extern int cm_emit_mx_GrowTo (CM_t *cm, CM_EMIT_MX *mx, char *errbuf, int L, float size_limit); 2863 extern int cm_emit_mx_Dump (FILE *ofp, CM_t *cm, CM_EMIT_MX *mx, int print_mx); 2864 extern void cm_emit_mx_Destroy (CM_EMIT_MX *mx); 2865 extern int cm_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb); 2866 2867 extern CM_TR_EMIT_MX *cm_tr_emit_mx_Create (CM_t *cm); 2868 extern int cm_tr_emit_mx_GrowTo (CM_t *cm, CM_TR_EMIT_MX *mx, char *errbuf, int L, float size_limit); 2869 extern int cm_tr_emit_mx_Dump (FILE *ofp, CM_t *cm, CM_TR_EMIT_MX *mx, char mode, int print_mx); 2870 extern void cm_tr_emit_mx_Destroy (CM_TR_EMIT_MX *mx); 2871 extern int cm_tr_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb); 2872 2873 extern CM_HB_EMIT_MX *cm_hb_emit_mx_Create (CM_t *cm); 2874 extern int cm_hb_emit_mx_GrowTo (CM_t *cm, CM_HB_EMIT_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit); 2875 extern int cm_hb_emit_mx_Dump (FILE *ofp, CM_t *cm, CM_HB_EMIT_MX *mx, int print_mx); 2876 extern void cm_hb_emit_mx_Destroy (CM_HB_EMIT_MX *mx); 2877 extern int cm_hb_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb); 2878 2879 extern CM_TR_HB_EMIT_MX *cm_tr_hb_emit_mx_Create (CM_t *cm); 2880 extern int cm_tr_hb_emit_mx_GrowTo (CM_t *cm, CM_TR_HB_EMIT_MX *mx, char *errbuf, CP9Bands_t *cp9b, int L, float size_limit); 2881 extern int cm_tr_hb_emit_mx_Dump (FILE *ofp, CM_t *cm, CM_TR_HB_EMIT_MX *mx, char mode, int print_mx); 2882 extern void cm_tr_hb_emit_mx_Destroy (CM_TR_HB_EMIT_MX *mx); 2883 extern int cm_tr_hb_emit_mx_SizeNeeded (CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int L, int64_t *ret_l_ncells, int64_t *ret_r_ncells, float *ret_Mb); 2884 2885 extern int cm_scan_mx_Create (CM_t *cm, char *errbuf, int do_float, int do_int, CM_SCAN_MX **ret_smx); 2886 extern int cm_scan_mx_InitializeFloats (CM_t *cm, CM_SCAN_MX *smx, char *errbuf); 2887 extern int cm_scan_mx_InitializeIntegers(CM_t *cm, CM_SCAN_MX *smx, char *errbuf); 2888 extern float cm_scan_mx_SizeNeeded (CM_t *cm, int do_float, int do_int); 2889 extern void cm_scan_mx_Destroy (CM_t *cm, CM_SCAN_MX *smx); 2890 extern void cm_scan_mx_Dump (FILE *ofp, CM_t *cm, int j, int i0, int qdbidx, int doing_float); 2891 2892 extern int cm_tr_scan_mx_Create (CM_t *cm, char *errbuf, int do_float, int do_int, CM_TR_SCAN_MX **ret_smx); 2893 extern int cm_tr_scan_mx_InitializeFloats (CM_t *cm, CM_TR_SCAN_MX *trsmx, char *errbuf); 2894 extern int cm_tr_scan_mx_InitializeIntegers(CM_t *cm, CM_TR_SCAN_MX *trsmx, char *errbuf); 2895 extern float cm_tr_scan_mx_SizeNeeded (CM_t *cm, int do_float, int do_int); 2896 extern void cm_tr_scan_mx_Destroy (CM_t *cm, CM_TR_SCAN_MX *smx); 2897 extern void cm_tr_scan_mx_Dump (FILE *ofp, CM_t *cm, int j, int i0, int qdbidx, int doing_float); 2898 2899 extern GammaHitMx_t *CreateGammaHitMx (int L, int64_t i0, float cutoff); 2900 extern void FreeGammaHitMx (GammaHitMx_t *gamma); 2901 extern int UpdateGammaHitMx (CM_t *cm, char *errbuf, int pass_idx, GammaHitMx_t *gamma, int j, int dmin, int dmax, float *bestsc, int *bestr, char *bestmode, int W, double **act); 2902 extern int ReportHitsGreedily (CM_t *cm, char *errbuf, int pass_idx, int j, int dmin, int dmax, float *bestsc, int *bestr, char *bestmode, int W, double **act, int64_t i0, int64_t j0, float cutoff, CM_TOPHITS *hitlist); 2903 extern void TBackGammaHitMx (GammaHitMx_t *gamma, CM_TOPHITS *hitlist, int64_t i0, int64_t j0); 2904 2905 2906 2907 /* from cm_parsetree.c */ 2908 extern Parsetree_t *CreateParsetree(int size); 2909 extern void GrowParsetree(Parsetree_t *tr); 2910 extern void FreeParsetree(Parsetree_t *tr); 2911 extern float SizeofParsetree(Parsetree_t *tr); 2912 extern int InsertTraceNode(Parsetree_t *tr, int y, int whichway, int emitl, int emitr, int state); 2913 extern int InsertTraceNodewithMode(Parsetree_t *tr, int y, int whichway, int emitl, int emitr, int state, char mode); 2914 extern void PrintParsetree(FILE *fp, Parsetree_t *tr); 2915 extern void ParsetreeCount(CM_t *cm, Parsetree_t *tr, ESL_DSQ *dsq, float wgt); 2916 extern int ParsetreeScore(CM_t *cm, CMEmitMap_t *emap, char *errbuf, Parsetree_t *tr, ESL_DSQ *dsq, int do_null2, float *ret_sc, float *ret_struct_sc, float *ret_primary_sc, int *ret_spos, int *ret_epos); 2917 extern void ParsetreeDump(FILE *fp, Parsetree_t *tr, CM_t *cm, ESL_DSQ *dsq); 2918 extern int ParsetreeCompare(Parsetree_t *t1, Parsetree_t *t2); 2919 extern void SummarizeMasterTrace(FILE *fp, Parsetree_t *tr); 2920 extern void MasterTraceDisplay(FILE *fp, Parsetree_t *mtr, CM_t *cm); 2921 extern int Parsetrees2Alignment(CM_t *cm, char *errbuf, const ESL_ALPHABET *abc, ESL_SQ **sq, double *wgt, Parsetree_t **tr, char **postcode, int nseq, FILE *insertfp, FILE *elfp, int do_full, int do_matchonly, ESL_MSA **ret_msa); 2922 extern int Alignment2Parsetrees(ESL_MSA *msa, CM_t *cm, Parsetree_t *mtr, char *errbuf, ESL_SQ ***ret_sq, Parsetree_t ***ret_tr); 2923 extern float ParsetreeScore_Global2Local(CM_t *cm, Parsetree_t *tr, ESL_DSQ *dsq, int print_flag); 2924 extern int Parsetree2CP9trace(CM_t *cm, Parsetree_t *tr, CP9trace_t **ret_cp9_tr); 2925 extern void rightjustify(const ESL_ALPHABET *abc, char *s, int n); 2926 extern void leftjustify(const ESL_ALPHABET *abc, char *s, int n); 2927 extern int EmitParsetree(CM_t *cm, char *errbuf, ESL_RANDOMNESS *r, char *name, int do_digital, Parsetree_t **ret_tr, ESL_SQ **ret_sq, int *ret_N); 2928 extern int ParsetreeScoreCorrectionNull2(CM_t *cm, char *errbuf, Parsetree_t *tr, ESL_DSQ *dsq, int start, float omega, float *ret_sc); 2929 extern int ParsetreeScoreCorrectionNull3(CM_t *cm, char *errbuf, Parsetree_t *tr, ESL_DSQ *dsq, int start, float omega, float *ret_sc); 2930 extern int ParsetreeCountMPEmissions(CM_t *cm, Parsetree_t *tr); 2931 extern void ScoreCorrectionNull3(const ESL_ALPHABET *abc, float *null0, float *comp, int len, float omega, float *ret_sc); 2932 extern void ScoreCorrectionNull3CompUnknown(const ESL_ALPHABET *abc, float *null0, ESL_DSQ *dsq, int start, int stop, float omega, float *ret_sc); 2933 extern char ParsetreeMode(Parsetree_t *tr); 2934 extern int ParsetreeToCMBounds(CM_t *cm, Parsetree_t *tr, int have_i0, int have_j0, char *errbuf, int *ret_cfrom_span, int *ret_cto_span, int *ret_cfrom_emit, int *ret_cto_emit, int *ret_first_emit, int *ret_final_emit); 2935 extern int cm_StochasticParsetree (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, CM_MX *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, float *ret_sc); 2936 extern int cm_StochasticParsetreeHB (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, CM_HB_MX *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, float *ret_sc); 2937 extern int cm_TrStochasticParsetree (CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, char preset_mode, int pass_idx, CM_TR_MX *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc); 2938 extern int cm_TrStochasticParsetreeHB(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int L, char preset_mode, int pass_idx, CM_TR_HB_MX *mx, ESL_RANDOMNESS *r, Parsetree_t **ret_tr, char *ret_mode, float *ret_sc); 2939 extern int cm_parsetree_Doctor(CM_t *cm, char *errbuf, Parsetree_t *tr, int *opt_ndi, int *opt_nid); 2940 2941 2942 /* from cm_pipeline.c */ 2943 extern CM_PIPELINE *cm_pipeline_Create (ESL_GETOPTS *go, ESL_ALPHABET *abc, int clen_hint, int L_hint, int64_t Z, enum cm_zsetby_e Z_setby, enum cm_pipemodes_e mode); 2944 extern int cm_pipeline_Reuse (CM_PIPELINE *pli); 2945 extern void cm_pipeline_Destroy(CM_PIPELINE *pli, CM_t *cm); 2946 extern int cm_pipeline_Merge (CM_PIPELINE *p1, CM_PIPELINE *p2); 2947 2948 extern int cm_pli_TargetReportable (CM_PIPELINE *pli, float score, double Eval); 2949 extern int cm_pli_TargetIncludable (CM_PIPELINE *pli, float score, double Eval); 2950 extern int cm_pli_NewModel (CM_PIPELINE *pli, int modmode, CM_t *cm, int cm_clen, int cm_W, int cm_nbp, P7_OPROFILE *om, P7_BG *bg, float *p7_evparam, int p7_max_length, int64_t cur_cm_idx, int cur_clan_idx, ESL_KEYHASH *glocal_kh); 2951 extern int cm_pli_NewModelThresholds(CM_PIPELINE *pli, CM_t *cm); 2952 extern int cm_pli_NewSeq (CM_PIPELINE *pli, const ESL_SQ *sq, int64_t cur_seq_idx); 2953 extern int cm_Pipeline (CM_PIPELINE *pli, off_t cm_offset, P7_OPROFILE *om, P7_BG *bg, float *p7_evparam, P7_SCOREDATA *msvdata, ESL_SQ *sq, CM_TOPHITS *hitlist, int in_rc, P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_PROFILE **opt_Rgm, P7_PROFILE **opt_Lgm, P7_PROFILE **opt_Tgm, CM_t **opt_cm); 2954 extern int cm_pli_Statistics (FILE *ofp, CM_PIPELINE *pli, ESL_STOPWATCH *w); 2955 extern int cm_pli_ZeroAccounting(CM_PLI_ACCT *pli_acct); 2956 extern int cm_pli_PassEnforcesFirstRes(int pass_idx); 2957 extern int cm_pli_PassEnforcesFinalRes(int pass_idx); 2958 extern int cm_pli_PassAllowsTruncation(int pass_idx); 2959 extern void cm_pli_AdjustNresForOverlaps(CM_PIPELINE *pli, int64_t noverlap, int in_rc); 2960 2961 /* from cm_qdband.c */ 2962 extern void BandExperiment(CM_t *cm); 2963 extern int CalculateQueryDependentBands(CM_t *cm, char *errbuf, CM_QDBINFO *qdbinfo, double beta_W, int *ret_W, double **ret_gamma0_loc, double **ret_gamma0_glb, int *ret_Z); 2964 extern int BandCalculationEngine(CM_t *cm, int Z, CM_QDBINFO *qdbinfo, double beta_W, int *ret_W, double ***ret_gamma, double **ret_gamma0_loc, double **ret_gamma0_glb); 2965 extern int BandTruncationNegligible(double *density, int b, int Z, double *ret_beta); 2966 extern int BandMonteCarlo(CM_t *cm, int nsample, int Z, double ***ret_gamma); 2967 extern void FreeBandDensities(CM_t *cm, double **gamma); 2968 extern void BandBounds(double **gamma, int M, int Z, double p, 2969 int **ret_min, int **ret_max); 2970 extern void PrintBandGraph(FILE *fp, double **gamma, int *min, int *max, int v, int Z); 2971 extern void PrintDPCellsSaved(CM_t *cm, int *min, int *max, int W); 2972 extern void ExpandBands(CM_t *cm, int qlen, int *dmin, int *dmax); 2973 extern void qdb_trace_info_dump(CM_t *cm, Parsetree_t *tr, int *dmin, int *dmax, int bdump_level); 2974 extern CM_QDBINFO *CreateCMQDBInfo(int M, int clen); 2975 extern float SizeofCMQDBInfo(CM_QDBINFO *qdbinfo); 2976 extern void FreeCMQDBInfo(CM_QDBINFO *qdbinfo); 2977 extern int CopyCMQDBInfo(const CM_QDBINFO *src, CM_QDBINFO *dst, char *errbuf); 2978 extern void DumpCMQDBInfo(FILE *fp, CM_t *cm, CM_QDBINFO *qdbinfo); 2979 extern int CheckCMQDBInfo(CM_QDBINFO *qdbinfo, double beta1, int do_check1, double beta2, int do_check2); 2980 2981 /* from cm_submodel.c */ 2982 extern int build_sub_cm(CM_t *orig_cm, char *errbuf, CM_t **ret_cm, int sstruct, int estruct, CMSubMap_t **ret_submap, int print_flag); 2983 extern void CP9NodeForPosn(CP9_t *hmm, int i0, int j0, int x, CP9_MX *post, int *ret_node, int *ret_type, float pmass, int is_start, int print_flag); 2984 extern void StripWUSSGivenCC(ESL_MSA *msa, float gapthresh, int first_match, int last_match); 2985 extern int check_orig_psi_vs_sub_psi(CM_t *orig_cm, CM_t *sub_cm, char *errbuf, CMSubMap_t *submap, double threshold, int print_flag); 2986 extern int check_sub_cm(CM_t *orig_cm, CM_t *sub_cm, char *errbuf, CMSubMap_t *submap, CMSubInfo_t *subinfo, float pthresh, int print_flag); 2987 extern int check_sub_cm_by_sampling(CM_t *orig_cm, CM_t *sub_cm, char *errbuf, ESL_RANDOMNESS *r, CMSubMap_t *submap, CMSubInfo_t *subinfo, 2988 float chi_thresh, int nsamples, int print_flag); 2989 extern int sub_cm2cm_parsetree(CM_t *orig_cm, CM_t *sub_cm, Parsetree_t **ret_orig_tr, Parsetree_t *sub_tr, 2990 CMSubMap_t *submap, int print_flag); 2991 extern CMSubMap_t *AllocSubMap(CM_t *sub_cm, CM_t *orig_cm, int sstruct, int estruct); 2992 extern void FreeSubMap(CMSubMap_t *submap); 2993 extern CMSubInfo_t *AllocSubInfo(int clen); 2994 extern void FreeSubInfo(CMSubInfo_t *subinfo); 2995 extern void debug_print_cm_params(FILE *fp, CM_t *cm); 2996 extern int SubCMLogoddsify(CM_t *cm, char *errbuf, CM_t *mother_cm, CMSubMap_t *mother_map); 2997 extern float ** SubFCalcAndCopyOptimizedEmitScoresFromMother(CM_t *cm, CM_t *mother_cm, CMSubMap_t *mother_map); 2998 extern void CP9_reconfig2sub(CP9_t *hmm, int spos, int epos, int spos_nd, int epos_nd, double **orig_phi); 2999 3000 3001 /* from cm_tophits.c */ 3002 extern CM_TOPHITS *cm_tophits_Create(void); 3003 extern int cm_tophits_Grow(CM_TOPHITS *h); 3004 extern int cm_tophits_CreateNextHit(CM_TOPHITS *h, CM_HIT **ret_hit); 3005 extern int cm_tophits_SortByEvalue(CM_TOPHITS *h); 3006 extern int cm_tophits_SortForOverlapRemoval(CM_TOPHITS *h); 3007 extern int cm_tophits_SortForOverlapMarkup(CM_TOPHITS *h, int do_clans_only); 3008 extern int cm_tophits_SortByPosition(CM_TOPHITS *h); 3009 extern int cm_tophits_Merge(CM_TOPHITS *h1, CM_TOPHITS *h2); 3010 extern int cm_tophits_GetMaxPositionLength(CM_TOPHITS *h); 3011 extern int cm_tophits_GetMaxTargetLength(CM_TOPHITS *h); 3012 extern int cm_tophits_GetMaxNameLength(CM_TOPHITS *h); 3013 extern int cm_tophits_GetMaxDescLength(CM_TOPHITS *h); 3014 extern int cm_tophits_GetMaxAccessionLength(CM_TOPHITS *h); 3015 extern int cm_tophits_GetMaxShownLength(CM_TOPHITS *h); 3016 extern int cm_tophits_GetMaxClanLength(CM_TOPHITS *h, ESL_KEYHASH *clan_name_kh); 3017 extern int cm_tophits_Reuse(CM_TOPHITS *h); 3018 extern void cm_tophits_Destroy(CM_TOPHITS *h); 3019 extern int cm_tophits_CloneHitMostly(CM_TOPHITS *src_th, int h, CM_TOPHITS *dest_th); 3020 extern int cm_tophits_ComputeEvalues(CM_TOPHITS *th, double eZ, int istart); 3021 extern int cm_tophits_RemoveOrMarkOverlaps(CM_TOPHITS *th, int do_clans_only, char *errbuf); 3022 extern int cm_tophits_UpdateHitPositions(CM_TOPHITS *th, int hit_start, int64_t seq_start, int in_revcomp); 3023 extern int cm_tophits_SetSourceLengths(CM_TOPHITS *th, int64_t *srcL, uint64_t nseqs); 3024 extern int64_t cm_tophits_OverlapNres(int64_t from1, int64_t to1, int64_t from2, int64_t to2, int64_t *ret_nes, char *errbuf); 3025 3026 extern int cm_tophits_Threshold(CM_TOPHITS *th, CM_PIPELINE *pli); 3027 extern int cm_tophits_Targets(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli, int textw); 3028 extern int cm_tophits_F3Targets(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli); 3029 extern int cm_tophits_HitAlignments(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli, int textw); 3030 extern int cm_tophits_HitAlignmentStatistics(FILE *ofp, CM_TOPHITS *th, int used_cyk, int used_hb, double default_tau); 3031 extern int cm_tophits_Alignment(CM_t *cm, const CM_TOPHITS *th, char *errbuf, ESL_MSA **ret_msa); 3032 extern int cm_tophits_TabularTargets1(FILE *ofp, char *qname, char *qacc, CM_TOPHITS *th, CM_PIPELINE *pli, int show_header); 3033 extern int cm_tophits_TabularTargets2(FILE *ofp, char *qname, char *qacc, CM_TOPHITS *th, CM_PIPELINE *pli, int show_header, ESL_KEYHASH *clan_name_kh, int skip_overlaps, char *errbuf); 3034 extern int cm_tophits_F3TabularTargets1(FILE *ofp, CM_TOPHITS *th, CM_PIPELINE *pli, int show_header); 3035 extern int cm_tophits_TabularTail(FILE *ofp, const char *progname, enum cm_pipemodes_e pipemode, const char *qfile, const char *tfile, const ESL_GETOPTS *go); 3036 extern int cm_tophits_Dump(FILE *fp, const CM_TOPHITS *th); 3037 3038 extern int cm_hit_AllowTruncation(CM_t *cm, int pass_idx, int64_t start, int64_t stop, int64_t i0, int64_t j0, char mode, int b); 3039 extern int cm_hit_Dump(FILE *fp, const CM_HIT *h); 3040 3041 /* from cm_trunc.c */ 3042 extern CM_TR_PENALTIES *cm_tr_penalties_Create(CM_t *cm, int ignore_inserts, char *errbuf); 3043 extern int cm_tr_penalties_Validate(CM_TR_PENALTIES *trp, CM_t *cm, double tol, char *errbuf); 3044 extern void cm_tr_penalties_Dump(FILE *fp, const CM_t *cm, const CM_TR_PENALTIES *trp); 3045 extern float cm_tr_penalties_Sizeof(CM_TR_PENALTIES *trp); 3046 extern void cm_tr_penalties_Destroy(CM_TR_PENALTIES *trp); 3047 extern int cm_tr_penalties_IdxForPass(int pass_idx); 3048 3049 /* from cm_p7_band.c */ 3050 extern int p7_gmx_Match2DMatrix(P7_GMX *gx, int do_diff, ESL_DMATRIX **ret_D, double *ret_min, double *ret_max); 3051 extern int my_dmx_Visualize(FILE *fp, ESL_DMATRIX *D, double min, double max, double min2fill); 3052 extern int my_p7_GTraceMSV(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_GMX *gx, P7_TRACE *tr, int **ret_i2k, int **ret_k2i, float **ret_sc, int **ret_iconflict); 3053 extern int Parsetree2i_to_k(CM_t *cm, CMEmitMap_t *emap, int L, char *errbuf, Parsetree_t *tr, int **ret_i2k); 3054 extern int prune_i2k(int *i2k, int *iconflict, float *isc, int L, double **phi, float min_sc, int min_len, int min_end, float min_mprob, float min_mcprob, float max_iprob, float max_ilprob); 3055 extern int p7_pins2bands(int *i2k, char *errbuf, int L, int M, int pad, int **ret_imin, int **ret_imax, int *ret_ncells); 3056 extern int DumpP7Bands(FILE *fp, int *i2k, int *kmin, int *kmax, int L); 3057 extern int cp9_ForwardP7B(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, float *ret_sc); 3058 extern int cp9_ForwardP7B_OLD_WITH_EL(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, float *ret_sc); 3059 extern int cp9_BackwardP7B(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, float *ret_sc); 3060 extern int cp9_CheckFBP7B(CP9_MX *fmx, CP9_MX *bmx, CP9_t *hmm, char *errbuf, float sc, int i0, int j0, ESL_DSQ *dsq, int *kmin, int *kmax); 3061 extern int cp9_Seq2BandsP7B (CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int L, CP9Bands_t *cp9b, int *kmin, int *kmax, int debug_level); 3062 extern int cp9_Seq2PosteriorsP7B(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int L, int *kmin, int *kmax, int debug_level); 3063 extern int cp9_PosteriorP7B(ESL_DSQ *dsq, char *errbuf, int L, CP9_t *hmm, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, int *kmin, int *kmax); 3064 extern int cp9_FB2HMMBandsP7B(CP9_t *hmm, char *errbuf, ESL_DSQ *dsq, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, CP9Bands_t *cp9b, int L, int M, double p_thresh, int do_old_hmm2ij, int *kmin, int *kmax, int debug_level); 3065 extern int p7_Seq2Bands(CM_t *cm, char *errbuf, P7_PROFILE *gm, P7_GMX *gx, P7_BG *bg, P7_TRACE *p7_tr, ESL_DSQ *dsq, int L, 3066 double **phi, float sc7, int len7, int end7, float mprob7, float mcprob7, float iprob7, float ilprob7, int pad7, 3067 int **ret_i2k, int **ret_kmin, int **ret_kmax, int *ret_ncells); 3068 3069 extern int CP9NodeForPosnP7B(CP9_t *hmm, char *errbuf, int x, CP9_MX *post, int kn, int kx, int *ret_node, int *ret_type, int print_flag); 3070 extern int P7BandsAdjustForSubCM(int *kmin, int *kmax, int L, int spos, int epos); 3071 3072 /* from cm_p7_domaindef.c */ 3073 extern int p7_domaindef_GlocalByPosteriorHeuristics(const ESL_SQ *sq, P7_PROFILE *gm, P7_GMX *gxf, P7_GMX *gxb, 3074 P7_GMX *fwd, P7_GMX *bck, P7_DOMAINDEF *ddef, int do_null2); 3075 3076 /* from cm_p7_modelconfig_trunc.c */ 3077 extern int p7_ProfileConfig5PrimeTrunc(P7_PROFILE *gm, int L); 3078 extern int p7_ProfileConfig3PrimeTrunc(const P7_HMM *hmm, P7_PROFILE *gm, int L); 3079 extern int p7_ProfileConfig5PrimeAnd3PrimeTrunc(P7_PROFILE *gm, int L); 3080 extern int p7_ReconfigLength5PrimeTrunc(P7_PROFILE *gm, int L); 3081 extern int p7_ReconfigLength3PrimeTrunc(P7_PROFILE *gm, int L); 3082 3083 /* from cm_p7_modelmaker.c */ 3084 extern int BuildP7HMM_MatchEmitsOnly(CM_t *cm, CP9_t *cp9, P7_HMM **ret_p7); 3085 extern int cm_cp9_to_p7(CM_t *cm, CP9_t *cp9, char *errbuf); 3086 extern int cm_p7_Calibrate(P7_HMM *hmm, char *errbuf, int ElmL, int ElvL, int ElfL, int EgfL, int ElmN, int ElvN, int ElfN, int EgfN, double ElfT, double EgfT, double *ret_gfmu, double *ret_gflambda); 3087 extern int cm_p7_Tau(ESL_RANDOMNESS *r, char *errbuf, P7_OPROFILE *om, P7_PROFILE *gm, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tau); 3088 extern int cm_SetFilterHMM(CM_t *cm, P7_HMM *hmm, double gfmu, double gflambda); 3089 extern int dump_p7(P7_HMM *hmm, FILE *fp); 3090 extern float cm_p7_hmm_Sizeof(P7_HMM *hmm); 3091 extern int cm_p7_hmm_SetConsensus(P7_HMM *hmm); 3092 3093 /* from cp9.c */ 3094 extern CP9_t *AllocCPlan9(int M, const ESL_ALPHABET *abc); 3095 extern CP9_t *AllocCPlan9Shell(void); 3096 extern void AllocCPlan9Body(CP9_t *hmm, int M, const ESL_ALPHABET *abc); 3097 extern void FreeCPlan9(CP9_t *hmm); 3098 extern void ZeroCPlan9(CP9_t *hmm); 3099 extern void CPlan9SetNullModel(CP9_t *hmm, float *null, float p1); 3100 extern int cp9_GetNCalcsPerResidue(CP9_t *cp9, char *errbuf, float *ret_cp9_ncalcs_per_res); 3101 extern CP9_t *cp9_Clone(CP9_t *cp9); 3102 extern int cp9_Copy(const CP9_t *src, CP9_t *dst); 3103 extern float cp9_Sizeof(CP9_t *cp9); 3104 extern void CP9Logoddsify(CP9_t *hmm); 3105 extern void CPlan9Renormalize(CP9_t *hmm); 3106 3107 /* from cp9_dp.c */ 3108 extern int cp9_Viterbi(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align, 3109 int be_efficient, int **ret_psc, int *ret_maxres, CP9trace_t **ret_tr, float *ret_sc); 3110 extern int cp9_ViterbiBackward(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align, 3111 int be_efficient, int **ret_psc, int *ret_maxres, CP9trace_t **ret_tr, float *ret_sc); 3112 extern int cp9_Forward(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align, 3113 int be_efficient, int **ret_psc, int *ret_maxres, float *ret_sc); 3114 extern int cp9_Backward(CP9_t *cp9, char *errbuf, CP9_MX *mx, ESL_DSQ *dsq, int i0, int j0, int do_scan, int doing_align, 3115 int be_efficient, int **ret_psc, int *ret_maxres, float *ret_sc); 3116 extern int cp9_CheckFB(CP9_MX *fmx, CP9_MX *bmx, CP9_t *hmm, char *errbuf, float sc, int i0, int j0, ESL_DSQ *dsq); 3117 3118 /* from cp9_modelmaker.c */ 3119 extern CP9Map_t *AllocCP9Map(CM_t *cm); 3120 extern float SizeofCP9Map(CP9Map_t *cp9map); 3121 extern void FreeCP9Map(CP9Map_t *cp9map); 3122 extern int build_cp9_hmm(CM_t *cm, char *errbuf, int do_psi_test, float thresh, int debug_level, CP9_t **ret_hmm, CP9Map_t **ret_cp9map); 3123 extern void CP9_map_cm2hmm(CM_t *cm, CP9Map_t *cp9map, int debug_level); 3124 extern void fill_phi_cp9(CP9_t *hmm, double ***ret_phi, int spos, int entered_only); 3125 extern void map_helper(CM_t *cm, CP9Map_t *cp9map, int k, int ks, int v); 3126 extern int CP9_check_by_sampling(CM_t *cm, CP9_t *hmm, char *errbuf, ESL_RANDOMNESS *r, CMSubInfo_t *subinfo, int spos, int epos, float chi_thresh, int nsamples, int print_flag); 3127 extern void debug_print_cp9_params(FILE *fp, CP9_t *hmm, int print_scores); 3128 extern void debug_print_phi_cp9(CP9_t *hmm, double **phi); 3129 extern int MakeDealignedString(const ESL_ALPHABET *abc, char *aseq, int alen, char *ss, char **ret_s); 3130 extern int sub_build_cp9_hmm_from_mother(CM_t *cm, char *errbuf, CM_t *mother_cm, CMSubMap_t *mother_map, CP9_t **ret_hmm, CP9Map_t **ret_cp9map, int do_psi_test, 3131 float psi_vs_phi_threshold, int debug_level); 3132 extern void CPlan9InitEL(CP9_t *cp9, CM_t *cm); 3133 3134 /* from cp9_mx.c */ 3135 extern CP9_MX *CreateCP9Matrix(int N, int M); 3136 extern void FreeCP9Matrix (CP9_MX *mx); 3137 extern int GrowCP9Matrix (CP9_MX *mx, char *errbuf, int N, int M, int *kmin, int *kmax, int ***mmx, int ***imx, int ***dmx, int ***elmx, int **erow); 3138 extern void InitializeCP9Matrix(CP9_MX *mx); 3139 3140 /* from cp9_trace.c */ 3141 extern void CP9AllocTrace(int tlen, CP9trace_t **ret_tr); 3142 extern void CP9ReallocTrace(CP9trace_t *tr, int tlen); 3143 extern void CP9FreeTrace(CP9trace_t *tr); 3144 extern void CP9_fake_tracebacks(ESL_MSA *msa, int *matassign, CP9trace_t ***ret_tr); 3145 extern void CP9TraceCount(CP9_t *hmm, ESL_DSQ *dsq, float wt, CP9trace_t *tr); 3146 extern float CP9TraceScore(CP9_t *hmm, ESL_DSQ *dsq, CP9trace_t *tr); 3147 extern char *CP9Statetype(char st); 3148 extern void CP9PrintTrace(FILE *fp, CP9trace_t *tr, CP9_t *hmm, ESL_DSQ *dsq); 3149 extern int CP9TransitionScoreLookup(CP9_t *hmm, char st1, int k1, 3150 char st2, int k2); 3151 extern void CP9ViterbiTrace(CP9_t *hmm, ESL_DSQ *dsq, int i0, int j0, 3152 CP9_MX *mx, CP9trace_t **ret_tr); 3153 extern void CP9ReverseTrace(CP9trace_t *tr); 3154 extern int CP9Traces2Alignment(CM_t *cm, CP9_t *cp9, const ESL_ALPHABET *abc, ESL_SQ **sq, float *wgt, 3155 int nseq, CP9trace_t **tr, int do_full, int do_matchonly, ESL_MSA **ret_msa); 3156 extern int CP9TraceScoreCorrectionNull2(CP9_t *hmm, char *errbuf, CP9trace_t *tr, ESL_DSQ *dsq, int start, float omega, float *ret_sc); 3157 3158 /* from alphabet.c */ 3159 extern void PairCount(const ESL_ALPHABET *abc, float *counters, ESL_DSQ syml, ESL_DSQ symr, float wt); 3160 extern float DegeneratePairScore(const ESL_ALPHABET *abc, float *esc, ESL_DSQ syml, ESL_DSQ symr); 3161 extern int iDegeneratePairScore(const ESL_ALPHABET *abc, int *esc, ESL_DSQ syml, ESL_DSQ symr); 3162 extern char resolve_degenerate (ESL_RANDOMNESS *r, char c); 3163 extern int revcomp(const ESL_ALPHABET *abc, ESL_SQ *comp, ESL_SQ *sq); 3164 extern float FastPairScoreBothDegenerate(int K, float *esc, float *left, float *right); 3165 extern int iFastPairScoreBothDegenerate(int K, int *esc, float *left, float *right); 3166 extern float FastPairScoreLeftOnlyDegenerate(int K, float *esc, float *left, ESL_DSQ symr); 3167 extern int iFastPairScoreLeftOnlyDegenerate(int K, int *iesc, float *left, ESL_DSQ symr); 3168 extern float FastPairScoreRightOnlyDegenerate(int K, float *esc, float *right, ESL_DSQ syml); 3169 extern float iFastPairScoreRightOnlyDegenerate(int K, int *iesc, float *right, ESL_DSQ syml); 3170 extern float FastPairScoreBothDegenerate(int K, float *esc, float *left, float *right); 3171 extern int iFastPairScoreBothDegenerate(int K, int *esc, float *left, float *right); 3172 extern float FastPairScoreLeftOnlyDegenerate(int K, float *esc, float *left, ESL_DSQ symr); 3173 extern int iFastPairScoreLeftOnlyDegenerate(int K, int *iesc, float *left, ESL_DSQ symr); 3174 extern float FastPairScoreRightOnlyDegenerate(int K, float *esc, float *right, ESL_DSQ syml); 3175 extern float iFastPairScoreRightOnlyDegenerate(int K, int *iesc, float *right, ESL_DSQ syml); 3176 3177 3178 /* from display.c */ 3179 extern Fancyali_t *CreateFancyAli(const ESL_ALPHABET *abc, Parsetree_t *tr, CM_t *cm, CMConsensus_t *cons, ESL_DSQ *dsq, int do_noncanonical, char *pcode); 3180 extern void PrintFancyAli(FILE *fp, Fancyali_t *ali, int64_t offset, int in_revcomp, int do_top, int linewidth); 3181 extern void FreeFancyAli(Fancyali_t *ali); 3182 extern CMConsensus_t *CreateCMConsensus(CM_t *cm, const ESL_ALPHABET *abc); 3183 extern void FreeCMConsensus(CMConsensus_t *con); 3184 extern int IsCompensatory(const ESL_ALPHABET *abc, float *pij, int symi, int symj); 3185 extern CMEmitMap_t *CreateEmitMap(CM_t *cm); 3186 extern float SizeofEmitMap(CM_t *cm, CMEmitMap_t *emap); 3187 extern void DumpEmitMap(FILE *fp, CMEmitMap_t *map, CM_t *cm); 3188 extern void FreeEmitMap(CMEmitMap_t *map); 3189 extern void FormatTimeString(char *buf, double sec, int do_frac); 3190 extern int GetDate(char *errbuf, char **ret_date); 3191 3192 /* from errors.c */ 3193 extern void cm_Die (char *format, ...); 3194 extern void cm_Fail(char *format, ...); 3195 3196 /* from eweight.c */ 3197 extern int cm_EntropyWeight(CM_t *cm, const Prior_t *pri, double etarget, double min_Neff, double max_Neff, int pretend_cm_is_hmm, double *ret_hmm_re, double *ret_Neff); 3198 extern void cm_Rescale(CM_t *hmm, float scale); 3199 extern void cp9_Rescale(CP9_t *hmm, float scale); 3200 extern double cm_MeanMatchInfo(const CM_t *cm); 3201 extern double cm_MeanMatchEntropy(const CM_t *cm); 3202 extern double cm_MeanMatchRelativeEntropy(const CM_t *cm); 3203 extern double cm_MeanMatchInfoHMM(const CM_t *cm); 3204 extern double cm_MeanMatchEntropyHMM(const CM_t *cm); 3205 extern double cm_MeanMatchRelativeEntropyHMM(const CM_t *cm); 3206 extern double cp9_MeanMatchInfo(const CP9_t *cp9); 3207 extern double cp9_MeanMatchEntropy(const CP9_t *cp9); 3208 extern double cp9_MeanMatchRelativeEntropy(const CP9_t *cp9); 3209 3210 /* from hmmband.c */ 3211 extern CP9Bands_t *AllocCP9Bands(int cm_M, int hmm_M); 3212 extern float SizeofCP9Bands(CP9Bands_t *cp9b); 3213 extern void FreeCP9Bands(CP9Bands_t *cp9bands); 3214 extern int cp9_HMM2ijBands(CM_t *cm, char *errbuf, CP9_t *cp9, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int do_trunc, int debug_level); 3215 extern int cp9_HMM2ijBands_OLD(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, CP9Map_t *cp9map, int i0, int j0, int doing_search, int debug_level); 3216 extern int cp9_Seq2Bands (CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, CP9Bands_t *cp9b, int doing_search, int pass_idx, int debug_level); 3217 extern int cp9_IterateSeq2Bands(CM_t *cm, char *errbuf, ESL_DSQ *dsq, int64_t i0, int64_t j0, int pass_idx, float size_limit, int doing_search, int do_sample, int do_post, int do_iterate, double maxtau, float *ret_Mb); 3218 extern int cp9_Seq2Posteriors(CM_t *cm, char *errbuf, CP9_MX *fmx, CP9_MX *bmx, CP9_MX *pmx, ESL_DSQ *dsq, int i0, int j0, int debug_level); 3219 extern void cp9_DebugPrintHMMBands(FILE *ofp, int L, CP9Bands_t *cp9b, double hmm_bandp, int debug_level); 3220 extern int cp9_GrowHDBands(CP9Bands_t *cp9b, char *errbuf); 3221 extern int cp9_ValidateBands(CM_t *cm, char *errbuf, CP9Bands_t *cp9b, int i0, int j0, int do_trunc); 3222 extern void cp9_ShiftCMBands(CM_t *cm, int i, int j, int do_trunc); 3223 extern CP9Bands_t *cp9_CloneBands(CP9Bands_t *src_cp9b, char *errbuf); 3224 extern void cp9_PredictStartAndEndPositions(CP9_MX *pmx, CP9Bands_t *cp9b, int i0, int j0); 3225 extern int cp9_MarginalCandidatesFromStartEndPositions(CM_t *cm, CP9Bands_t *cp9b, int pass_idx, char *errbuf); 3226 extern void ij2d_bands(CM_t *cm, int L, int *imin, int *imax, int *jmin, int *jmax, 3227 int **hdmin, int **hdmax, int do_trunc, int debug_level); 3228 extern void PrintDPCellsSaved_jd(CM_t *cm, int *jmin, int *jmax, int **hdmin, int **hdmax, int W); 3229 extern void debug_print_ij_bands(CM_t *cm); 3230 3231 /* from logsum.c */ 3232 extern void init_ilogsum(void); 3233 extern int ILogsum(int s1, int s2); 3234 extern int ILogsumNI(int s1, int s2); 3235 extern int ILogsumNI_diff(int s1a, int s1b, int s2a, int s2b, int db); 3236 extern void FLogsumInit(void); 3237 extern float LogSum2(float p1, float p2); 3238 extern float FLogsum(float p1, float p2); 3239 3240 /* from mpisupport.c */ 3241 #if HAVE_MPI 3242 extern int cm_master_MPIBcast(CM_t *cm, char *errbuf, int tag, MPI_Comm comm, char **buf, int *nalloc); 3243 extern int cm_worker_MPIBcast(char *errbuf, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_ALPHABET **abc, CM_t **ret_cm); 3244 extern int cm_nonconfigured_MPIUnpack(ESL_ALPHABET **abc, char *errbuf, char *buf, int n, int *pos, MPI_Comm comm, CM_t **ret_cm); 3245 extern int cm_nonconfigured_MPIPack(CM_t *cm, char *errbuf, char *buf, int n, int *pos, MPI_Comm comm); 3246 extern int cm_nonconfigured_MPIPackSize(CM_t *cm, MPI_Comm comm, int *ret_n); 3247 extern int cm_dsq_MPISend(ESL_DSQ *dsq, int64_t L, int64_t idx, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc); 3248 extern int cm_dsq_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_DSQ **ret_dsq, int64_t *ret_L, int64_t *ret_idx); 3249 extern int cm_parsetree_MPIPackSize(Parsetree_t *tr, MPI_Comm comm, int *ret_n); 3250 extern int cm_parsetree_MPIPack(Parsetree_t *tr, char *buf, int n, int *position, MPI_Comm comm); 3251 extern int cm_parsetree_MPIUnpack(char *buf, int n, int *pos, MPI_Comm comm, Parsetree_t **ret_tr); 3252 extern int cm_pipeline_MPISend(CM_PIPELINE *pli, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc); 3253 extern int cm_pipeline_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, ESL_GETOPTS *go, CM_PIPELINE **ret_pli); 3254 extern int cm_tophits_MPISend(CM_TOPHITS *th, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc); 3255 extern int cm_tophits_MPIRecv(int source, int tag, MPI_Comm comm, char **buf, int *nalloc, CM_TOPHITS **ret_th); 3256 extern int cm_alndata_MPISend(CM_ALNDATA *data, int include_sq, char *errbuf, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc); 3257 extern int cm_alndata_MPIUnpack(char *buf, int n, int *pos, MPI_Comm comm, ESL_ALPHABET *abc, CM_ALNDATA **ret_data); 3258 #endif 3259 3260 /* from prior.c */ 3261 extern Prior_t *Prior_Create(void); 3262 extern void Prior_Destroy(Prior_t *pri); 3263 extern Prior_t *Prior_Read(FILE *fp); 3264 extern void PriorifyCM(CM_t *cm, const Prior_t *pri); 3265 extern Prior_t *Prior_Default(int mimic_h3); 3266 extern Prior_t *Prior_Default_v0p56_through_v1p02(void); 3267 3268 /* from rnamat.c */ 3269 extern int numbered_nucleotide (char c); 3270 extern int numbered_basepair (char c, char d); 3271 extern FILE *MatFileOpen (char *matfile); 3272 extern fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp); 3273 extern int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa); 3274 extern int ribosum_calc_targets(fullmat_t *fullmat); 3275 extern void FreeMat(fullmat_t *fullmat); 3276 3277 /* from stats.c */ 3278 extern int debug_print_expinfo_array(CM_t *cm, char *errbuf, ExpInfo_t **expA); 3279 extern int debug_print_expinfo(ExpInfo_t *exp); 3280 extern int get_gc_comp(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int start, int stop); 3281 extern int get_alphabet_comp(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int start, int stop, float **ret_freq); 3282 extern int GetDBSize (ESL_SQFILE *sqfp, char *errbuf, long *ret_N, int *ret_nseq, int *ret_namewidth); 3283 extern int GetDBInfo(const ESL_ALPHABET *abc, ESL_SQFILE *sqfp, char *errbuf, long *ret_N, int *ret_nseq, double **ret_gc_ct); 3284 extern int E2ScoreGivenExpInfo(ExpInfo_t *exp, char *errbuf, double E, float *ret_sc); 3285 extern int P2ScoreGivenExpInfo(ExpInfo_t *exp, char *errbuf, double P, float *ret_sc); 3286 extern double Score2E(float x, double mu, double lambda, double eff_dbsize); 3287 extern float cm_p7_E2Score(double E, double Z, int hitlen, float mu, float lambda); 3288 extern float cm_p7_P2Score(double P, float mu, float lambda); 3289 extern int ExpModeIsLocal(int exp_mode); 3290 extern int ExpModeIsInside(int exp_mode); 3291 extern ExpInfo_t *CreateExpInfo(); 3292 extern void SetExpInfo(ExpInfo_t *exp, double lambda, double mu_orig, double dbsize, int nrandhits, double tailp); 3293 extern ExpInfo_t *DuplicateExpInfo(ExpInfo_t *src); 3294 extern char *DescribeExpMode(int exp_mode); 3295 extern int UpdateExpsForDBSize(CM_t *cm, char *errbuf, double dbsize); 3296 extern int CreateGenomicHMM(const ESL_ALPHABET *abc, char *errbuf, double **ret_sA, double ***ret_tAA, double ***ret_eAA, int *ret_nstates); 3297 extern int SampleGenomicSequenceFromHMM(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, char *errbuf, double *sA, double **tAA, double **eAA, int nstates, int L, ESL_DSQ **ret_dsq); 3298 extern int CopyExpInfo(ExpInfo_t *src, ExpInfo_t *dest); 3299 3300 /* from truncyk.c */ 3301 void SetMarginalScores_reproduce_i27(CM_t *cm); 3302 float LeftMarginalScore_reproduce_i27(const ESL_ALPHABET *abc, float *esc, ESL_DSQ dres); 3303 float RightMarginalScore_reproduce_i27(const ESL_ALPHABET *abc, float *esc, ESL_DSQ dres); 3304 3305 float TrCYK_DnC(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int pass_idx, int do_1p0, Parsetree_t **ret_tr); 3306 float TrCYK_Inside(CM_t *cm, ESL_DSQ *dsq, int L, int r, int i0, int j0, int pass_idx, int do_1p0, int lenCORREX, Parsetree_t **ret_tr); 3307 /* legacy, avoid use: */ 3308 float trinside (CM_t *cm, ESL_DSQ *dsq, int L, int vroot, int vend, int i0, int j0, int do_full, 3309 void ****ret_shadow, void ****ret_L_shadow, void ****ret_R_shadow, 3310 void ****ret_T_shadow, void ****ret_Lmode_shadow, void ****ret_Rmode_shadow, 3311 int *ret_mode, int *ret_v, int *ret_i, int *ret_j); 3312 3313 3314 #endif /*INFERNALH_INCLUDED*/ 3315 3316 3317