1 /* 2 * rnamat.h 3 * 4 * Header file for API for RNA matrix routines. Used in parsing alignment 5 * into matrix and later reading in matrix. 6 * 7 * Robert J. Klein 8 * February 25, 2002 9 */ 10 11 #ifndef _RNAMAT_H 12 #define _RNAMAT_H 13 14 #include "esl_config.h" 15 #include "config.h" 16 17 #include "easel.h" 18 #include "esl_msa.h" 19 20 #include "structs.h" 21 22 #define RNAPAIR_ALPHABET "AAAACCCCGGGGUUUU" 23 #define RNAPAIR_ALPHABET2 "ACGUACGUACGUACGU" 24 25 /* 26 * Matrix type 27 * 28 * Contains array in one dimension (to be indexed later), matrix size, 29 * H, and E. 30 */ 31 typedef struct _matrix_t { 32 double *matrix; 33 int edge_size; /* Size of one edge, e.g. 4 for 4x4 matrix */ 34 int full_size; /* Num of elements, e.g. 10 for 4x4 matirx */ 35 double H; 36 double E; 37 } matrix_t; 38 39 /* 40 * Full matrix definition, includes the g background freq vector (added by EPN). 41 */ 42 typedef struct _fullmat_t { 43 const ESL_ALPHABET *abc;/* alphabet, we enforce it's eslRNA */ 44 matrix_t *unpaired; 45 matrix_t *paired; 46 char *name; 47 float *g; /* EPN: the background distro, g vector in RSEARCH paper 48 * this now appears in the RIBOSUM matrix files */ 49 int scores_flag; /* TRUE if matrix values are log odds scores, FALSE if 50 * they're target probs, or unfilled */ 51 int probs_flag; /* TRUE if matrix values are target probs, FALSE if 52 * they're log odds scores, or unfilled */ 53 } fullmat_t; 54 55 /* Returns true if pos. C of seq B of msa A is a gap */ 56 #define is_rna_gap(A, B, C) (esl_abc_CIsGap(A->abc, A->aseq[B][C])) 57 58 /* Returns true if position C of digitized sequence B of msa A is a canonical */ 59 #define is_defined_rna_nucleotide(A, B, C) (esl_abc_CIsCanonical(A->abc, A->aseq[B][C])) 60 61 /* 62 * Maps c as follows 63 * 64 * A->0 65 * C->1 66 * G->2 67 * T->3 68 * U->3 69 * else -> 4 70 */ 71 int numbered_nucleotide (char c); 72 73 /* 74 * Maps base pair c,d as follows: 75 * 76 * AA -> 0 77 * AC -> 1 78 * .... 79 * TG -> 15 80 * TT -> 16 (T==U) 81 * Anything else maps to -1 82 */ 83 int numbered_basepair (char c, char d); 84 85 /* 86 * Maps to index of matrix, using binary representation of 87 * nucleotides (unsorted). 88 * 89 * See lab book 7, p. 3-4 for details of mapping function 90 */ 91 #define matrix_index(X,Y) ((X>Y) ? X*(X+1)/2+Y: Y*(Y+1)/2+X) 92 93 #define unpairedmat_size (matrix_index(3,3) + 1) 94 #define pairedmat_size (matrix_index (15,15) + 1) 95 96 /* 97 * Setup the matrix by allocating matrix in two dimensions as triangle. 98 * Initialize to 0.0 99 */ 100 matrix_t *setup_matrix (int size); 101 102 /* 103 * Actually count the basepairs and gaps into the fullmat simply by summing 104 * to existing values there. Also counts nt counts to background_nt 105 */ 106 void count_matrix (ESL_MSA *msa, fullmat_t *fullmat, double *background_nt, 107 int cutoff_perc, int product_weights); 108 109 /* 110 * Prints the matrix 111 */ 112 void print_matrix (FILE *fp, fullmat_t *fullmat); 113 114 /* 115 * Read the matrix from a file 116 */ 117 fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp); 118 119 /* 120 * Opens matrix file 121 */ 122 FILE *MatFileOpen (char *matfile); 123 124 /* 125 * Reports minium allowed sum of alpha + beta for matrix 126 */ 127 float get_min_alpha_beta_sum (fullmat_t *fullmat); 128 129 /* Free a fullmat_t object */ 130 void FreeMat(fullmat_t *fullmat); 131 132 /* convert a matrix with log odds scores to target freqs */ 133 int ribosum_calc_targets(fullmat_t *fullmat); 134 135 /* resolve degeneracies in a single seq MSA by replacing 136 * with most likely target residue within degenerate alphabet */ 137 int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa); 138 139 /* 140 * Maps i as follows: 141 * 0->A 142 * 1->C 143 * 2->G 144 * 3->U 145 * else->-1 146 */ 147 int unpaired_res (int i); 148 149 #endif 150 151