1 #ifndef STRUCTSH_INCLUDED
2 #define STRUCTSH_INCLUDED
3 /* structs.h - declarations of data structures
4  * SRE, Tue Aug 31 15:17:12 1993
5  */
6 #include "squid.h"
7 
8 /* Alphabet information.
9  * The package is designed to be configurable for protein analysis
10  * just by changing these define's. Dunno if it would be *useful*
11  * to apply it to protein work -- but the possibility's there.
12  */
13 #define ALPHATYPE   kRNA
14 #define ALPHASIZE   4
15 extern char *ALPHABET;		/* defined at top of misc.c */
16 
17 /*
18  * Node types.
19  * These are used for clarity in the code, not to make it easy
20  * to change them: the program makes assumptions about the order
21  * they come in, so DON'T CHANGE THESE.
22  *
23  * Specifically, the following assumptions are made:
24  *  - that they come in *exactly* this order (static state transition array in prior.h)
25  *  - that BIFURC, MATP, MATL, MATR are all less than 4
26  *       (second index, prior.h; also some loops in maxmodelmaker.c)
27  *  - ROOT is last (indexing of scores of mmx model construction matrix, maxmodelmaker.c)
28  */
29 #define BIFURC_NODE 0
30 #define MATP_NODE   1
31 #define MATL_NODE   2
32 #define MATR_NODE   3
33 #define BEGINL_NODE 4
34 #define BEGINR_NODE 5
35 #define ROOT_NODE   6
36 #define NODETYPES   7		/* number of different node types  */
37 
38 #define END_NODE    BIFURC_NODE
39 
40 /*
41  * State types.
42  * These are used for clarity in the code, not to make it easy
43  * to change them: the program makes assumptions about the order
44  * they come in, so DON'T CHANGE THESE.
45  */
46 #define DEL_ST         0
47 #define MATP_ST        1
48 #define MATL_ST        2
49 #define MATR_ST        3
50 #define INSL_ST        4
51 #define INSR_ST        5
52 #define STATETYPES     6	/* MATP nodes contain 6 states */
53 
54 #define BEGIN_ST       DEL_ST
55 #define BIFURC_ST      DEL_ST
56 #define END_ST         DEL_ST
57 
58 /* Unique identifiers for state types, used as flags not indexes
59  * in the alignment algorithms.
60  */
61 #define uDEL_ST     (1<<0)
62 #define uMATP_ST    (1<<1)
63 #define uMATL_ST    (1<<2)
64 #define uMATR_ST    (1<<3)
65 #define uINSL_ST    (1<<4)
66 #define uINSR_ST    (1<<5)
67 #define uBEGIN_ST   (1<<6)
68 #define uEND_ST     (1<<7)
69 #define uBIFURC_ST  (1<<8)
70 
71 
72 /* Structure: node_s
73  *
74  * Purpose:   Contains all the information necessary to describe a node.
75  */
76 struct node_s {
77   int  type;
78 
79   double tmx[STATETYPES][STATETYPES];	 /* up to 49 transition probs */
80   double mp_emit[ALPHASIZE][ALPHASIZE];  /* 4x4 MATP emission probs   */
81   double il_emit[ALPHASIZE];             /* 4 INSL emission probs     */
82   double ir_emit[ALPHASIZE];             /* 4 INSR emission probs     */
83   double ml_emit[ALPHASIZE];             /* 4 MATL emission probs     */
84   double mr_emit[ALPHASIZE];             /* 4 MATR emission probs     */
85 
86   int nxt;			/* connection to left child */
87   int nxt2;                     /* connection to right child */
88 };
89 
90 
91 
92 /* Structure: cm_s
93  *
94  * Purpose:   A covariance model.
95  */
96 struct cm_s {
97   int nodes;			/* number of nodes  */
98   struct node_s *nd;            /* array of nodes 0..nodes-1 */
99 };
100 
101 
102 /* Structure: istate_s
103  *
104  * In the alignment algorithms, a CM is converted to an array of states,
105  * each represented by one of these structures. Each state contains
106  * probability info as integers instead of floating point.
107  *
108  * The order of the state transition vector is different than in
109  * the CM. INSL and INSR are first: INSL, INSR, DEL, MATP, MATL, MATR.
110  */
111 struct istate_s {
112   int   nodeidx;		/* index of node this state belongs to        */
113   int   statetype;		/* unique id for type of this state (uMATP_ST, etc.) */
114   int   offset;			/* offset in state array to first INS state   */
115   int   connectnum;		/* number of elements in tmx                  */
116   int   tmx[STATETYPES];	/* rearranged transition vector, int log-odds */
117   int   emit[ALPHASIZE*ALPHASIZE]; /* int lod emission vector (4 or 16) or NULL  */
118 };
119 
120 struct pstate_s {
121   int   nodeidx;		/* index of node this state belongs to        */
122   int   statetype;		/* unique id for type of this state (uMATP_ST, etc.) */
123   int   offset;			/* offset in state array to first INS state   */
124   int   connectnum;		/* number of elements in tmx                  */
125   int   bifr;			/* (uBIF_ST only) index of right connection   */
126   double tmx[STATETYPES];	/* rearranged transition vector               */
127   double emit[ALPHASIZE*ALPHASIZE]; /* emission vector (4 or 16) or NULL       */
128 };
129 
130 
131 
132 /* Structure: prior_s
133  *
134  * Purpose:   Contains the prior probability distributions for
135  *            state transitions and symbol emissions, as well
136  *            as the alpha "confidence" values applied during
137  *            regularization, and alphabet information.
138  */
139 struct prior_s {
140   double tprior[7][4][STATETYPES][STATETYPES];	/* state transitions  */
141   double matp_prior[ALPHASIZE][ALPHASIZE];      /* MATP_ST emissions  */
142   double matl_prior[ALPHASIZE];                 /* MATL_ST emissions  */
143   double matr_prior[ALPHASIZE];                 /* MATR_ST emissions  */
144   double insl_prior[ALPHASIZE];                 /* INSL_ST emissions  */
145   double insr_prior[ALPHASIZE];                 /* INSR_ST emissions  */
146 
147   double talpha[STATETYPES];	/* alpha's for state transitions      */
148   double emalpha[STATETYPES];   /* alpha's for symbol emissions       */
149 
150   double rfreq[ALPHASIZE];	/* background symbol freqs for random model */
151 };
152 
153 
154 
155 /* Structure: trace_s
156  *
157  * Binary tree structure for storing a traceback of an alignment;
158  * also used for tracebacks of model constructions.
159  */
160 struct trace_s {
161   int emitl;			/* i position (1..N) or 0 if nothing */
162   int emitr;			/* j position (1..N) or 0 if nothing */
163 
164   int nodeidx;			/* index of node responsible for this alignment */
165   int type;			/* type of substate (uMATP_ST, etc.) used (unique) */
166 
167   struct trace_s *nxtl;		/* ptr to left (or only) branch, or NULL for end */
168   struct trace_s *nxtr;		/* ptr to right branch, BIFURC only, else NULL */
169   struct trace_s *prv;          /* ptr to parent                               */
170 };
171 
172 
173 /* Structure: trmem_s
174  *
175  * It's expensive in malloc()'s to build trace trees. This structure
176  * allows trace.c to cut down malloc overhead, by keeping a pool
177  * of trace_s structures.
178  */
179 struct trmem_s {
180   int next;			/* index of next trace_s to use in pool        */
181   int num;			/* how many trace_s total in pool              */
182   struct trace_s *pool;         /* alloced array of trace_s structs            */
183   struct tracestack_s *used;    /* old (fully used) pools, waiting to be freed */
184 };
185 #define TMEM_BLOCK 256		/* how many trace_s to alloc per malloc() call */
186 
187 
188 /* Structure: tracestack_s
189  *
190  * Formerly a pushdown stack used for traversing a binary tree of trace_s structures.
191  * Reimplemented as an array for malloc efficiency.
192  */
193 struct tracestack_s {
194   int next;			/* index of next trace_s pointer to use */
195   int num;			/* number of trace_s pointers alloc'ed  */
196   struct trace_s **list;        /* array of trace_s pointers            */
197 };
198 #define TSTACK_BLOCK 64
199 
200 /* A struct align_s implements a linked list describing the alignment
201  * of a model to a sequence. Note that this is the inverse of what
202  * trace_s trees are for; align_s is a linear representation of
203  * the alignment (from the sequence's point of view, if you will)
204  */
205 struct align_s {
206   int              pos;		/* pos in seq emitted (0..N-1; -1 if none)       */
207   char             sym;		/* symbol emitted (ACGU, . if none)              */
208   char             ss;          /* secondary structure character, <>.            */
209   int              nodeidx;     /* index of model state aligned to this position */
210   int              type;	/* type of substate reponsible for this emission (unique) */
211   struct align_s  *nxt;
212 };
213 
214 /* A struct m2ali_s implements a pushdown stack used for traversing
215  * a model and producing an align_s alignment list.
216  */
217 struct m2ali_s {
218   int              nodeidx;	/* index of position in model (0..M) */
219   int              type;	/* subtype of position in model      */
220   struct align_s  *after;       /* position in align_s list          */
221   struct m2ali_s  *nxt;
222 };
223 
224 
225 /* A struct t2ali_s implements a pushdown stack for traversing a
226  * traceback tree and producing an align_s alignment list.
227  */
228 struct t2ali_s {
229   struct trace_s *tracenode;
230   struct align_s *after;
231   struct t2ali_s *nxt;
232 };
233 
234 /* some stuff used when we store sums of log scores as integers,
235  * for speed and precision
236  */
237 #define INTPRECISION 1000.0	/* pick up three decimal places in our ints */
238 #define NEGINFINITY -999999     /* -999.999 is small enough for -Inf        */
239 #define POSINFINITY  999999     /* +999.999 is large enough for +Inf        */
240 #define ILOG2(a) (((a) > 0.0) ? (log(a) / 0.69314718 * INTPRECISION) : NEGINFINITY)
241 
242 #endif /* STRUCTSH_INCLUDED */
243