structs.h - OpenGrok cross reference for /dports/biology/tRNAscan-SE/tRNAscan-SE-2.0/src/structs.h

#ifndef STRUCTSH_INCLUDED
#define STRUCTSH_INCLUDED
/* structs.h - declarations of data structures
 * SRE, Tue Aug 31 15:17:12 1993
 */
#include "squid.h"

/* Alphabet information.
 * The package is designed to be configurable for protein analysis
 * just by changing these define's. Dunno if it would be *useful*
 * to apply it to protein work -- but the possibility's there.
 */
#define ALPHATYPE   kRNA
#define ALPHASIZE   4
extern char *ALPHABET;		/* defined at top of misc.c */

/*
 * Node types.
 * These are used for clarity in the code, not to make it easy
 * to change them: the program makes assumptions about the order
 * they come in, so DON'T CHANGE THESE.
 *
 * Specifically, the following assumptions are made:
 *  - that they come in *exactly* this order (static state transition array in prior.h)
 *  - that BIFURC, MATP, MATL, MATR are all less than 4
 *       (second index, prior.h; also some loops in maxmodelmaker.c)
 *  - ROOT is last (indexing of scores of mmx model construction matrix, maxmodelmaker.c)
 */
#define BIFURC_NODE 0
#define MATP_NODE   1
#define MATL_NODE   2
#define MATR_NODE   3
#define BEGINL_NODE 4
#define BEGINR_NODE 5
#define ROOT_NODE   6
#define NODETYPES   7		/* number of different node types  */

#define END_NODE    BIFURC_NODE

/*
 * State types.
 * These are used for clarity in the code, not to make it easy
 * to change them: the program makes assumptions about the order
 * they come in, so DON'T CHANGE THESE.
 */
#define DEL_ST         0
#define MATP_ST        1
#define MATL_ST        2
#define MATR_ST        3
#define INSL_ST        4
#define INSR_ST        5
#define STATETYPES     6	/* MATP nodes contain 6 states */

#define BEGIN_ST       DEL_ST
#define BIFURC_ST      DEL_ST
#define END_ST         DEL_ST

/* Unique identifiers for state types, used as flags not indexes
 * in the alignment algorithms.
 */
#define uDEL_ST     (1<<0)
#define uMATP_ST    (1<<1)
#define uMATL_ST    (1<<2)
#define uMATR_ST    (1<<3)
#define uINSL_ST    (1<<4)
#define uINSR_ST    (1<<5)
#define uBEGIN_ST   (1<<6)
#define uEND_ST     (1<<7)
#define uBIFURC_ST  (1<<8)


/* Structure: node_s
 *
 * Purpose:   Contains all the information necessary to describe a node.
 */
struct node_s {
  int  type;

  double tmx[STATETYPES][STATETYPES];	 /* up to 49 transition probs */
  double mp_emit[ALPHASIZE][ALPHASIZE];  /* 4x4 MATP emission probs   */
  double il_emit[ALPHASIZE];             /* 4 INSL emission probs     */
  double ir_emit[ALPHASIZE];             /* 4 INSR emission probs     */
  double ml_emit[ALPHASIZE];             /* 4 MATL emission probs     */
  double mr_emit[ALPHASIZE];             /* 4 MATR emission probs     */

  int nxt;			/* connection to left child */
  int nxt2;                     /* connection to right child */
};


/* Structure: cm_s
 *
 * Purpose:   A covariance model.
 */
struct cm_s {
  int nodes;			/* number of nodes  */
  struct node_s *nd;            /* array of nodes 0..nodes-1 */
};


/* Structure: istate_s
 *
 * In the alignment algorithms, a CM is converted to an array of states,
 * each represented by one of these structures. Each state contains
 * probability info as integers instead of floating point.
 *
 * The order of the state transition vector is different than in
 * the CM. INSL and INSR are first: INSL, INSR, DEL, MATP, MATL, MATR.
 */
struct istate_s {
  int   nodeidx;		/* index of node this state belongs to        */
  int   statetype;		/* unique id for type of this state (uMATP_ST, etc.) */
  int   offset;			/* offset in state array to first INS state   */
  int   connectnum;		/* number of elements in tmx                  */
  int   tmx[STATETYPES];	/* rearranged transition vector, int log-odds */
  int   emit[ALPHASIZE*ALPHASIZE]; /* int lod emission vector (4 or 16) or NULL  */
};

struct pstate_s {
  int   nodeidx;		/* index of node this state belongs to        */
  int   statetype;		/* unique id for type of this state (uMATP_ST, etc.) */
  int   offset;			/* offset in state array to first INS state   */
  int   connectnum;		/* number of elements in tmx                  */
  int   bifr;			/* (uBIF_ST only) index of right connection   */
  double tmx[STATETYPES];	/* rearranged transition vector               */
  double emit[ALPHASIZE*ALPHASIZE]; /* emission vector (4 or 16) or NULL       */
};


/* Structure: prior_s
 *
 * Purpose:   Contains the prior probability distributions for
 *            state transitions and symbol emissions, as well
 *            as the alpha "confidence" values applied during
 *            regularization, and alphabet information.
 */
struct prior_s {
  double tprior[7][4][STATETYPES][STATETYPES];	/* state transitions  */
  double matp_prior[ALPHASIZE][ALPHASIZE];      /* MATP_ST emissions  */
  double matl_prior[ALPHASIZE];                 /* MATL_ST emissions  */
  double matr_prior[ALPHASIZE];                 /* MATR_ST emissions  */
  double insl_prior[ALPHASIZE];                 /* INSL_ST emissions  */
  double insr_prior[ALPHASIZE];                 /* INSR_ST emissions  */

  double talpha[STATETYPES];	/* alpha's for state transitions      */
  double emalpha[STATETYPES];   /* alpha's for symbol emissions       */

  double rfreq[ALPHASIZE];	/* background symbol freqs for random model */
};


/* Structure: trace_s
 *
 * Binary tree structure for storing a traceback of an alignment;
 * also used for tracebacks of model constructions.
 */
struct trace_s {
  int emitl;			/* i position (1..N) or 0 if nothing */
  int emitr;			/* j position (1..N) or 0 if nothing */

  int nodeidx;			/* index of node responsible for this alignment */
  int type;			/* type of substate (uMATP_ST, etc.) used (unique) */

  struct trace_s *nxtl;		/* ptr to left (or only) branch, or NULL for end */
  struct trace_s *nxtr;		/* ptr to right branch, BIFURC only, else NULL */
  struct trace_s *prv;          /* ptr to parent                               */
};


/* Structure: trmem_s
 *
 * It's expensive in malloc()'s to build trace trees. This structure
 * allows trace.c to cut down malloc overhead, by keeping a pool
 * of trace_s structures.
 */
struct trmem_s {
  int next;			/* index of next trace_s to use in pool        */
  int num;			/* how many trace_s total in pool              */
  struct trace_s *pool;         /* alloced array of trace_s structs            */
  struct tracestack_s *used;    /* old (fully used) pools, waiting to be freed */
};
#define TMEM_BLOCK 256		/* how many trace_s to alloc per malloc() call */


/* Structure: tracestack_s
 *
 * Formerly a pushdown stack used for traversing a binary tree of trace_s structures.
 * Reimplemented as an array for malloc efficiency.
 */
struct tracestack_s {
  int next;			/* index of next trace_s pointer to use */
  int num;			/* number of trace_s pointers alloc'ed  */
  struct trace_s **list;        /* array of trace_s pointers            */
};
#define TSTACK_BLOCK 64

/* A struct align_s implements a linked list describing the alignment
 * of a model to a sequence. Note that this is the inverse of what
 * trace_s trees are for; align_s is a linear representation of
 * the alignment (from the sequence's point of view, if you will)
 */
struct align_s {
  int              pos;		/* pos in seq emitted (0..N-1; -1 if none)       */
  char             sym;		/* symbol emitted (ACGU, . if none)              */
  char             ss;          /* secondary structure character, <>.            */
  int              nodeidx;     /* index of model state aligned to this position */
  int              type;	/* type of substate reponsible for this emission (unique) */
  struct align_s  *nxt;
};

/* A struct m2ali_s implements a pushdown stack used for traversing
 * a model and producing an align_s alignment list.
 */
struct m2ali_s {
  int              nodeidx;	/* index of position in model (0..M) */
  int              type;	/* subtype of position in model      */
  struct align_s  *after;       /* position in align_s list          */
  struct m2ali_s  *nxt;
};


/* A struct t2ali_s implements a pushdown stack for traversing a
 * traceback tree and producing an align_s alignment list.
 */
struct t2ali_s {
  struct trace_s *tracenode;
  struct align_s *after;
  struct t2ali_s *nxt;
};

/* some stuff used when we store sums of log scores as integers,
 * for speed and precision
 */
#define INTPRECISION 1000.0	/* pick up three decimal places in our ints */
#define NEGINFINITY -999999     /* -999.999 is small enough for -Inf        */
#define POSINFINITY  999999     /* +999.999 is large enough for +Inf        */
#define ILOG2(a) (((a) > 0.0) ? (log(a) / 0.69314718 * INTPRECISION) : NEGINFINITY)

#endif /* STRUCTSH_INCLUDED */