1 /*
2  * rnamat.h
3  *
4  * Header file for API for RNA matrix routines.  Used in parsing alignment
5  * into matrix and later reading in matrix.
6  *
7  * Robert J. Klein
8  * February 25, 2002
9  */
10 
11 #ifndef _RNAMAT_H
12 #define _RNAMAT_H
13 
14 #include "esl_config.h"
15 #include "config.h"
16 
17 #include "easel.h"
18 #include "esl_msa.h"
19 
20 #include "structs.h"
21 
22 #define RNAPAIR_ALPHABET "AAAACCCCGGGGUUUU"
23 #define RNAPAIR_ALPHABET2 "ACGUACGUACGUACGU"
24 
25 /*
26  * Matrix type
27  *
28  * Contains array in one dimension (to be indexed later), matrix size,
29  * H, and E.
30  */
31 typedef struct _matrix_t {
32   double *matrix;
33   int edge_size;         /* Size of one edge, e.g. 4 for 4x4 matrix */
34   int full_size;         /* Num of elements, e.g. 10 for 4x4 matirx */
35   double H;
36   double E;
37 } matrix_t;
38 
39 /*
40  * Full matrix definition, includes the g background freq vector (added by EPN).
41  */
42 typedef struct _fullmat_t {
43   const ESL_ALPHABET *abc;/* alphabet, we enforce it's eslRNA */
44   matrix_t *unpaired;
45   matrix_t *paired;
46   char     *name;
47   float    *g;           /* EPN: the background distro, g vector in RSEARCH paper
48 			  * this now appears in the RIBOSUM matrix files */
49   int       scores_flag; /* TRUE if matrix values are log odds scores, FALSE if
50 			  * they're target probs, or unfilled */
51   int       probs_flag;  /* TRUE if matrix values are target probs, FALSE if
52 			  * they're log odds scores, or unfilled */
53 } fullmat_t;
54 
55 /* Returns true if pos. C of seq B of msa A is a gap */
56 #define is_rna_gap(A, B, C) (esl_abc_CIsGap(A->abc, A->aseq[B][C]))
57 
58 /* Returns true if position C of digitized sequence B of msa A is a canonical */
59 #define is_defined_rna_nucleotide(A, B, C) (esl_abc_CIsCanonical(A->abc, A->aseq[B][C]))
60 
61 /*
62  * Maps c as follows
63  *
64  * A->0
65  * C->1
66  * G->2
67  * T->3
68  * U->3
69  * else -> 4
70  */
71 int numbered_nucleotide (char c);
72 
73 /*
74  * Maps base pair c,d as follows:
75  *
76  * AA -> 0
77  * AC -> 1
78  * ....
79  * TG -> 15
80  * TT -> 16 (T==U)
81  * Anything else maps to -1
82  */
83 int numbered_basepair (char c, char d);
84 
85 /*
86  * Maps to index of matrix, using binary representation of
87  * nucleotides (unsorted).
88  *
89  * See lab book 7, p. 3-4 for details of mapping function
90  */
91 #define matrix_index(X,Y) ((X>Y) ? X*(X+1)/2+Y: Y*(Y+1)/2+X)
92 
93 #define unpairedmat_size (matrix_index(3,3) + 1)
94 #define pairedmat_size (matrix_index (15,15) + 1)
95 
96 /*
97  * Setup the matrix by allocating matrix in two dimensions as triangle.
98  * Initialize to 0.0
99  */
100 matrix_t *setup_matrix (int size);
101 
102 /*
103  * Actually count the basepairs and gaps into the fullmat simply by summing
104  * to existing values there.  Also counts nt counts to background_nt
105  */
106 void count_matrix (ESL_MSA *msa, fullmat_t *fullmat, double *background_nt,
107 		   int cutoff_perc, int product_weights);
108 
109 /*
110  * Prints the matrix
111  */
112 void print_matrix (FILE *fp, fullmat_t *fullmat);
113 
114 /*
115  * Read the matrix from a file
116  */
117 fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp);
118 
119 /*
120  * Opens matrix file
121  */
122 FILE *MatFileOpen (char *matfile);
123 
124 /*
125  * Reports minium allowed sum of alpha + beta for matrix
126  */
127 float get_min_alpha_beta_sum (fullmat_t *fullmat);
128 
129 /* Free a fullmat_t object */
130 void FreeMat(fullmat_t *fullmat);
131 
132 /* convert a matrix with log odds scores to target freqs */
133 int ribosum_calc_targets(fullmat_t *fullmat);
134 
135 /* resolve degeneracies in a single seq MSA by replacing
136  * with most likely target residue within degenerate alphabet */
137 int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa);
138 
139 /*
140  * Maps i as follows:
141  * 0->A
142  * 1->C
143  * 2->G
144  * 3->U
145  * else->-1
146  */
147 int unpaired_res (int i);
148 
149 #endif
150 
151