1 #ifndef STRUCTSH_INCLUDED 2 #define STRUCTSH_INCLUDED 3 /* structs.h - declarations of data structures 4 * SRE, Tue Aug 31 15:17:12 1993 5 */ 6 #include "squid.h" 7 8 /* Alphabet information. 9 * The package is designed to be configurable for protein analysis 10 * just by changing these define's. Dunno if it would be *useful* 11 * to apply it to protein work -- but the possibility's there. 12 */ 13 #define ALPHATYPE kRNA 14 #define ALPHASIZE 4 15 extern char *ALPHABET; /* defined at top of misc.c */ 16 17 /* 18 * Node types. 19 * These are used for clarity in the code, not to make it easy 20 * to change them: the program makes assumptions about the order 21 * they come in, so DON'T CHANGE THESE. 22 * 23 * Specifically, the following assumptions are made: 24 * - that they come in *exactly* this order (static state transition array in prior.h) 25 * - that BIFURC, MATP, MATL, MATR are all less than 4 26 * (second index, prior.h; also some loops in maxmodelmaker.c) 27 * - ROOT is last (indexing of scores of mmx model construction matrix, maxmodelmaker.c) 28 */ 29 #define BIFURC_NODE 0 30 #define MATP_NODE 1 31 #define MATL_NODE 2 32 #define MATR_NODE 3 33 #define BEGINL_NODE 4 34 #define BEGINR_NODE 5 35 #define ROOT_NODE 6 36 #define NODETYPES 7 /* number of different node types */ 37 38 #define END_NODE BIFURC_NODE 39 40 /* 41 * State types. 42 * These are used for clarity in the code, not to make it easy 43 * to change them: the program makes assumptions about the order 44 * they come in, so DON'T CHANGE THESE. 45 */ 46 #define DEL_ST 0 47 #define MATP_ST 1 48 #define MATL_ST 2 49 #define MATR_ST 3 50 #define INSL_ST 4 51 #define INSR_ST 5 52 #define STATETYPES 6 /* MATP nodes contain 6 states */ 53 54 #define BEGIN_ST DEL_ST 55 #define BIFURC_ST DEL_ST 56 #define END_ST DEL_ST 57 58 /* Unique identifiers for state types, used as flags not indexes 59 * in the alignment algorithms. 60 */ 61 #define uDEL_ST (1<<0) 62 #define uMATP_ST (1<<1) 63 #define uMATL_ST (1<<2) 64 #define uMATR_ST (1<<3) 65 #define uINSL_ST (1<<4) 66 #define uINSR_ST (1<<5) 67 #define uBEGIN_ST (1<<6) 68 #define uEND_ST (1<<7) 69 #define uBIFURC_ST (1<<8) 70 71 72 /* Structure: node_s 73 * 74 * Purpose: Contains all the information necessary to describe a node. 75 */ 76 struct node_s { 77 int type; 78 79 double tmx[STATETYPES][STATETYPES]; /* up to 49 transition probs */ 80 double mp_emit[ALPHASIZE][ALPHASIZE]; /* 4x4 MATP emission probs */ 81 double il_emit[ALPHASIZE]; /* 4 INSL emission probs */ 82 double ir_emit[ALPHASIZE]; /* 4 INSR emission probs */ 83 double ml_emit[ALPHASIZE]; /* 4 MATL emission probs */ 84 double mr_emit[ALPHASIZE]; /* 4 MATR emission probs */ 85 86 int nxt; /* connection to left child */ 87 int nxt2; /* connection to right child */ 88 }; 89 90 91 92 /* Structure: cm_s 93 * 94 * Purpose: A covariance model. 95 */ 96 struct cm_s { 97 int nodes; /* number of nodes */ 98 struct node_s *nd; /* array of nodes 0..nodes-1 */ 99 }; 100 101 102 /* Structure: istate_s 103 * 104 * In the alignment algorithms, a CM is converted to an array of states, 105 * each represented by one of these structures. Each state contains 106 * probability info as integers instead of floating point. 107 * 108 * The order of the state transition vector is different than in 109 * the CM. INSL and INSR are first: INSL, INSR, DEL, MATP, MATL, MATR. 110 */ 111 struct istate_s { 112 int nodeidx; /* index of node this state belongs to */ 113 int statetype; /* unique id for type of this state (uMATP_ST, etc.) */ 114 int offset; /* offset in state array to first INS state */ 115 int connectnum; /* number of elements in tmx */ 116 int tmx[STATETYPES]; /* rearranged transition vector, int log-odds */ 117 int emit[ALPHASIZE*ALPHASIZE]; /* int lod emission vector (4 or 16) or NULL */ 118 }; 119 120 struct pstate_s { 121 int nodeidx; /* index of node this state belongs to */ 122 int statetype; /* unique id for type of this state (uMATP_ST, etc.) */ 123 int offset; /* offset in state array to first INS state */ 124 int connectnum; /* number of elements in tmx */ 125 int bifr; /* (uBIF_ST only) index of right connection */ 126 double tmx[STATETYPES]; /* rearranged transition vector */ 127 double emit[ALPHASIZE*ALPHASIZE]; /* emission vector (4 or 16) or NULL */ 128 }; 129 130 131 132 /* Structure: prior_s 133 * 134 * Purpose: Contains the prior probability distributions for 135 * state transitions and symbol emissions, as well 136 * as the alpha "confidence" values applied during 137 * regularization, and alphabet information. 138 */ 139 struct prior_s { 140 double tprior[7][4][STATETYPES][STATETYPES]; /* state transitions */ 141 double matp_prior[ALPHASIZE][ALPHASIZE]; /* MATP_ST emissions */ 142 double matl_prior[ALPHASIZE]; /* MATL_ST emissions */ 143 double matr_prior[ALPHASIZE]; /* MATR_ST emissions */ 144 double insl_prior[ALPHASIZE]; /* INSL_ST emissions */ 145 double insr_prior[ALPHASIZE]; /* INSR_ST emissions */ 146 147 double talpha[STATETYPES]; /* alpha's for state transitions */ 148 double emalpha[STATETYPES]; /* alpha's for symbol emissions */ 149 150 double rfreq[ALPHASIZE]; /* background symbol freqs for random model */ 151 }; 152 153 154 155 /* Structure: trace_s 156 * 157 * Binary tree structure for storing a traceback of an alignment; 158 * also used for tracebacks of model constructions. 159 */ 160 struct trace_s { 161 int emitl; /* i position (1..N) or 0 if nothing */ 162 int emitr; /* j position (1..N) or 0 if nothing */ 163 164 int nodeidx; /* index of node responsible for this alignment */ 165 int type; /* type of substate (uMATP_ST, etc.) used (unique) */ 166 167 struct trace_s *nxtl; /* ptr to left (or only) branch, or NULL for end */ 168 struct trace_s *nxtr; /* ptr to right branch, BIFURC only, else NULL */ 169 struct trace_s *prv; /* ptr to parent */ 170 }; 171 172 173 /* Structure: trmem_s 174 * 175 * It's expensive in malloc()'s to build trace trees. This structure 176 * allows trace.c to cut down malloc overhead, by keeping a pool 177 * of trace_s structures. 178 */ 179 struct trmem_s { 180 int next; /* index of next trace_s to use in pool */ 181 int num; /* how many trace_s total in pool */ 182 struct trace_s *pool; /* alloced array of trace_s structs */ 183 struct tracestack_s *used; /* old (fully used) pools, waiting to be freed */ 184 }; 185 #define TMEM_BLOCK 256 /* how many trace_s to alloc per malloc() call */ 186 187 188 /* Structure: tracestack_s 189 * 190 * Formerly a pushdown stack used for traversing a binary tree of trace_s structures. 191 * Reimplemented as an array for malloc efficiency. 192 */ 193 struct tracestack_s { 194 int next; /* index of next trace_s pointer to use */ 195 int num; /* number of trace_s pointers alloc'ed */ 196 struct trace_s **list; /* array of trace_s pointers */ 197 }; 198 #define TSTACK_BLOCK 64 199 200 /* A struct align_s implements a linked list describing the alignment 201 * of a model to a sequence. Note that this is the inverse of what 202 * trace_s trees are for; align_s is a linear representation of 203 * the alignment (from the sequence's point of view, if you will) 204 */ 205 struct align_s { 206 int pos; /* pos in seq emitted (0..N-1; -1 if none) */ 207 char sym; /* symbol emitted (ACGU, . if none) */ 208 char ss; /* secondary structure character, <>. */ 209 int nodeidx; /* index of model state aligned to this position */ 210 int type; /* type of substate reponsible for this emission (unique) */ 211 struct align_s *nxt; 212 }; 213 214 /* A struct m2ali_s implements a pushdown stack used for traversing 215 * a model and producing an align_s alignment list. 216 */ 217 struct m2ali_s { 218 int nodeidx; /* index of position in model (0..M) */ 219 int type; /* subtype of position in model */ 220 struct align_s *after; /* position in align_s list */ 221 struct m2ali_s *nxt; 222 }; 223 224 225 /* A struct t2ali_s implements a pushdown stack for traversing a 226 * traceback tree and producing an align_s alignment list. 227 */ 228 struct t2ali_s { 229 struct trace_s *tracenode; 230 struct align_s *after; 231 struct t2ali_s *nxt; 232 }; 233 234 /* some stuff used when we store sums of log scores as integers, 235 * for speed and precision 236 */ 237 #define INTPRECISION 1000.0 /* pick up three decimal places in our ints */ 238 #define NEGINFINITY -999999 /* -999.999 is small enough for -Inf */ 239 #define POSINFINITY 999999 /* +999.999 is large enough for +Inf */ 240 #define ILOG2(a) (((a) > 0.0) ? (log(a) / 0.69314718 * INTPRECISION) : NEGINFINITY) 241 242 #endif /* STRUCTSH_INCLUDED */ 243