1 /* rnamat.c
2  *
3  * Routines for dealing with RNA subsitution/transition matrix.
4  *
5  * Robert J. Klein
6  * February 25, 2002
7  */
8 #include "esl_config.h"
9 #include "p7_config.h"
10 #include "config.h"
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <ctype.h>
15 #include <math.h>
16 
17 #include "easel.h"
18 #include "esl_msa.h"
19 #include "esl_stack.h"
20 #include "esl_vectorops.h"
21 #include "esl_wuss.h"
22 
23 #include "hmmer.h"
24 
25 #include "infernal.h"
26 
27 static matrix_t *setup_matrix (int size);
28 static float simple_identity(const ESL_ALPHABET *abc, char *s1, char *s2);
29 /*
30   static void print_matrix (FILE *fp, fullmat_t *fullmat);
31   static float get_min_alpha_beta_sum (fullmat_t *fullmat);
32   static void count_matrix (ESL_MSA *msa, fullmat_t *fullmat, double *background_nt, int cutoff_perc, int product_weights);
33   static int unpaired_res (int i);
34 */
35 
36 /*
37  * Maps c as follows:
38  * A->0
39  * C->1
40  * G->2
41  * T->3
42  * U->3
43  * else->-1
44  */
numbered_nucleotide(char c)45 int numbered_nucleotide (char c) {
46   switch (c) {
47   case 'A':
48   case 'a':
49     return (0);
50   case 'C':
51   case 'c':
52     return (1);
53   case 'G':
54   case 'g':
55     return (2);
56   case 'T':
57   case 't':
58   case 'U':
59   case 'u':
60     return (3);
61   }
62   return (-1);
63 }
64 
65 /*
66  * Maps base pair c,d as follows:
67  *
68  * AA -> 0
69  * AC -> 1
70  * ....
71  * TG -> 14
72  * TT -> 15 (T==U)
73  * Anything else maps to -1
74  */
numbered_basepair(char c,char d)75 int numbered_basepair (char c, char d) {
76   int c_num, d_num;
77   c_num = numbered_nucleotide (c);
78   d_num = numbered_nucleotide (d);
79   if (c_num < 0 || d_num < 0) {
80     return (-1);
81   } else {
82     return ((c_num << 2) | d_num);
83   }
84 }
85 
86 /* Function: rjk_KHS2ct()
87  * Incept:   SRE 29 Feb 2000 [Seattle]; from COVE 1.0 code
88  * Modified: RJK 27 Feb 2002 [St. Louis]; from Infernal code (rna_ops.c)
89  * Purpose:  Convert a secondary structure string (0..len-1) to an array of
90  *           integers representing what position each position is base-paired
91  *           to (0..len-1) or -1 if none.  This is a change from what Sean
92  *           did in the Infernal code back towards the original way it was
93  *           done in the Squid code (compstruct_main.c).  In this case, the
94  *           numbering scheme does not match Zuker's .ct files, but does
95  *           match the way the MSA is stored using the SQUID library
96  *           functions.
97  *
98  *           This version does not allow pseudoknots.  Thus ">" and "<" are
99  *           used for base pairs, and all other characters, including white
100  *           space, are taken to mean unpaired nucleotides.
101  *
102  * Return:   ret_ct is allocated here and must be free'd by caller.
103  *           Returns pointer to ret_ct, or NULL if ss is somehow inconsistent.
104  */
rjk_KHS2ct(char * ss,int len)105 int *rjk_KHS2ct(char *ss, int len) {
106   ESL_STACK *pda;
107   int      *ct;
108   int       pos, pair;
109   int       status;
110 
111  /* Initialization: always initialize the main pda (0),
112    */
113   pda = esl_stack_ICreate();
114 
115   ESL_ALLOC(ct, len * sizeof(int));
116   for (pos = 0; pos < len; pos++)
117     ct[pos] = -1;
118 
119   for (pos = 0; pos < len; pos++) {
120       if (!isprint(ss[pos])) {   /* armor against garbage strings */
121 	free (ct);
122 	esl_stack_Destroy(pda);
123 	return (NULL);
124       } else if (ss[pos] == '>') {  /* left side of a pair: push onto stack */
125         if((status = esl_stack_IPush(pda, pos)) != eslOK) goto ERROR;
126       } else if (ss[pos] == '<') { /* right side of a pair; resolve pair */
127 	if (esl_stack_IPop(pda, &pair) == eslEOD) {
128 	  free (ct);
129 	  esl_stack_Destroy(pda);
130 	  return (NULL);
131 	} else {
132 	  if(status != eslOK) goto ERROR;
133 	  ct[pos]  = pair;
134 	  ct[pair] = pos;
135 	}
136       }
137   }
138                                 /* nothing should be left on stacks */
139   if (esl_stack_ObjectCount(pda) != 0) {
140     free (ct);
141     esl_stack_Destroy(pda);
142     return (NULL);
143   }
144   esl_stack_Destroy(pda);
145   return (ct);
146 
147  ERROR:
148   cm_Fail("Memory allocation error.");
149   return NULL; /* never reached */
150 }
151 
152 /*
153  * Setup the matrix by allocating matrix in two dimensions as triangle.
154  * Initialize to 0.0
155  */
setup_matrix(int size)156 matrix_t *setup_matrix (int size) {
157   int c;
158   matrix_t *mat;
159   int status;
160 
161   ESL_ALLOC(mat, sizeof(matrix_t));
162   mat->edge_size = size;
163   mat->full_size = matrix_index((size-1),(size-1)) + 1;
164   mat->E = 0.0;
165   mat->H = 0.0;
166   ESL_ALLOC(mat->matrix, sizeof(double) * mat->full_size);
167   for (c=0; c<mat->full_size; c++) {
168     mat->matrix[c] = 0.0;
169   }
170   return(mat);
171 
172  ERROR:
173   cm_Fail("Memory allocation error.");
174   return NULL; /* never reached */
175 }
176 
177 /*
178  * middle_of_stem
179  *
180  * Boolean function, returns TRUE if the gap is in the middle of a stem,
181  * false if otherwise.
182  *
183  * Inputs:
184  * msa -- the msa
185  * i, j -- indices of the two seqs
186  * pos -- the position of the gap in question
187  * ct -- array of ct arrays
188  */
middle_of_stem(ESL_MSA * msa,int i,int j,int pos,int ** ct)189 int middle_of_stem (ESL_MSA *msa, int i, int j, int pos, int **ct) {
190   int gap_start_pos, gap_stop_pos;
191 
192   if (esl_abc_CIsGap(msa->abc, msa->aseq[i][pos]) &&
193       esl_abc_CIsGap(msa->abc, msa->aseq[j][pos]))
194     /* Double gap -- exit */
195     return (0);
196 
197   if (esl_abc_CIsGap(msa->abc, msa->aseq[j][pos])) {
198     /* Swap so gap is in i */
199     gap_start_pos = i;
200     i = j;
201     j = gap_start_pos;
202   }
203 
204   /* Now, find start and end positions of the gap */
205   for (gap_start_pos = pos; gap_start_pos >= 0; gap_start_pos--) {
206     if (!esl_abc_CIsGap(msa->abc, msa->aseq[i][gap_start_pos]))
207       break;
208   }
209   for (gap_stop_pos = pos; gap_stop_pos < msa->alen; gap_stop_pos++) {
210     if (!esl_abc_CIsGap(msa->abc, msa->aseq[i][gap_stop_pos]))
211       break;
212   }
213   if (gap_start_pos < 0 || gap_start_pos >= msa->alen)
214     /* Gap at end of alignment; can't be internal to stem */
215     return (0);
216 
217   if (ct[i][gap_start_pos] == 0 || ct[j][gap_start_pos] == 0 ||
218       ct[i][gap_stop_pos] == 0 || ct[j][gap_stop_pos] == 0)
219     /* Either of the ends not paired */
220     return (0);
221 
222   if (ct[i][gap_start_pos] != ct[j][gap_start_pos] ||
223       ct[i][gap_stop_pos] != ct[j][gap_stop_pos])
224     /* The ends not paired homologously */
225     return (0);
226 
227   return (1);
228 }
229 
230 /* TAKEN FROM SQUID's weight.c's simple_distance, but rewritten to
231  *  be simple_identity
232  * Function: simple_identity()
233  *
234  * Purpose:  For two identical-length null-terminated strings, return
235  *           the fractional identity between them. (0..1)
236  *           (Gaps don't count toward anything.)
237  */
238 float
simple_identity(const ESL_ALPHABET * abc,char * s1,char * s2)239 simple_identity(const ESL_ALPHABET *abc, char *s1, char *s2)
240 {
241   int diff  = 0;
242   int valid = 0;
243 
244   for (; *s1 != '\0'; s1++, s2++)
245     {
246       if (esl_abc_CIsGap(abc, *s1) || esl_abc_CIsGap(abc, *s2)) continue;
247       if (*s1 == *s2) diff++;
248       valid++;
249     }
250   return (valid > 0 ? ((float) diff / (float) valid) : 0.0);
251 }
252 
253 /*
254  * count_matrix
255  *
256  * Given an MSA and two matrices, counts pairs of column(s) from two sequences
257  * at a time into the matrices using the BLOSUM algorithm.
258  *
259  * Fills in paired matrix (for basepairs), unpaired, background nt count in
260  * aligned regions
261  *
262  * If product weights is false, weight of a pair is average of their weights.
263  * If it is true, weight of a pair is product of their weights
264  *
265  * Each nucleotide at each position can be:
266  *    one of gap, unknown character (not ACGTU), known character
267  *    one of left bp, right bp, not base paired
268  * If both characters are gaps:
269  *   Skip
270  * If both characters are known:
271  *   If both are left bps and match to same right bps
272  *     continue
273  *   If both are right bps and match to same left bps
274  *     add to pairedmat
275  *   Otherwise
276  *     Add to unpairedmat
277  *
278  * Note: Never used in current implementation. Here for reference,
279  *       in case new RIBOSUM matrices are trained someday.
280  *       EPN, Tue Aug  7 15:10:33 2007
281  */
count_matrix(ESL_MSA * msa,fullmat_t * fullmat,double * background_nt,int cutoff_perc,int product_weights)282 void count_matrix (ESL_MSA *msa, fullmat_t *fullmat, double *background_nt,
283 		   int cutoff_perc, int product_weights) {
284 
285   /* contract check */
286   if(msa->abc->type != eslRNA) cm_Fail("In count_matrix, MSA's alphabet is not RNA.");
287   if(msa->flags & eslMSA_DIGITAL) cm_Fail("In count_matrix, MSA must be text, not digitized.");
288 
289   int status;
290   int i, j;            /* Seqs we're checking */
291   int pos;             /* Column we're counting */
292   int prev_pos;        /* Last column we counted */
293   float cur_wgt;
294   int **ct;            /* ct matrix for all the seqs */
295 #ifdef REPORT_COUNTED
296   int totpair = 0;
297   int totsing = 0;
298 #endif
299 
300   /*****************************************
301    * 1.  Allocate and fill in ct array
302    *****************************************/
303   ESL_ALLOC(ct, sizeof(int *)*msa->nseq);
304   if (msa->ss_cons != NULL) {
305     ct[0] = rjk_KHS2ct (msa->ss_cons, msa->alen);
306   } else {
307     ct[0] = rjk_KHS2ct (msa->ss[0], msa->alen);
308   }
309   for (i=1; i<msa->nseq; i++) {
310     if (msa->ss_cons != NULL) {
311       ct[i] = ct[0];
312     } else {
313       ct[i] = rjk_KHS2ct (msa->ss[i], msa->alen);
314     }
315   }
316   for (i=0; i<msa->nseq; i++) {
317     if (ct[i] == NULL) {
318     cm_Fail("CT string %d is NULL\n", i);
319     }
320   }
321 
322   /**********************************
323    * 2.  Count
324    **********************************/
325   for (i=0; i<msa->nseq; i++) {
326     for (j=0; j<i; j++) {
327       /* First, make sure it's above the cutoff */
328       if (simple_identity(msa->abc, msa->aseq[i], msa->aseq[j]) <
329 	  0.01*(float)cutoff_perc)
330 	continue;       /* Not above cutoff */
331 
332       if (product_weights == 1) {
333 	cur_wgt = msa->wgt[i] * msa->wgt[j];
334       } else {
335 	cur_wgt = (msa->wgt[i] + msa->wgt[j])/2.;
336       }
337 
338       /* First, skip starting double gaps */
339       for (prev_pos = 0; prev_pos <msa->alen &&
340 	     is_rna_gap (msa, i, prev_pos) && is_rna_gap (msa, j, prev_pos);
341 	   prev_pos++);
342 
343       for (pos=prev_pos; pos<msa->alen; pos++) {
344 	if (is_defined_rna_nucleotide(msa, i, pos) &&
345 	    is_defined_rna_nucleotide (msa, j, pos)) {
346 	  /* If both positions are defined nucleotides */
347 	  /* If both are left bps and match to same right bps, continue
348              If both are right bps and match to same left bps, add to \
349 	     pairedmat.  Otherwise, add to unpairedmat */
350 	  if (ct[i][pos] >= 0 && ct[j][pos] >= 0) {        /* Base pairs */
351 	    if (ct[i][pos] == ct[j][pos]) {              /* Both equal */
352 	      if (is_defined_rna_nucleotide(msa, i, ct[i][pos]) && \
353 		  is_defined_rna_nucleotide (msa, j, ct[j][pos])) {
354 		/* Both are RNA nucleotides */
355 		if (pos < ct[i][pos] && pos <ct[j][pos]) { /* Both left bps */
356 		  continue;
357 		} else {                                   /* Both right bps */
358 		  fullmat->paired->matrix\
359 		    [matrix_index(numbered_basepair(msa->aseq[i][ct[i][pos]],
360 						    msa->aseq[i][pos]),
361 				  numbered_basepair(msa->aseq[j][ct[j][pos]],
362 						    msa->aseq[j][pos]))] +=
363 		    cur_wgt;
364 		  background_nt[numbered_nucleotide
365 			       (msa->aseq[i][ct[i][pos]])] += cur_wgt;
366 		  background_nt[numbered_nucleotide
367 			       (msa->aseq[i][pos])] += cur_wgt;
368 		  background_nt[numbered_nucleotide
369 			       (msa->aseq[j][ct[j][pos]])] += cur_wgt;
370 		  background_nt[numbered_nucleotide
371 			       (msa->aseq[j][pos])] += cur_wgt;
372 #ifdef REPORT_COUNTED
373 		  totpair++;
374 #endif
375 		  continue;
376 		}
377 	      }
378 	    }
379 	  }
380 
381 	  fullmat->unpaired->matrix
382 	    [matrix_index(numbered_nucleotide(msa->aseq[i][pos]),
383 			  numbered_nucleotide(msa->aseq[j][pos]))] += cur_wgt;
384 	  background_nt[numbered_nucleotide(msa->aseq[i][pos])] += cur_wgt;
385 	  background_nt[numbered_nucleotide(msa->aseq[j][pos])] += cur_wgt;
386 #ifdef REPORT_COUNTED
387 	  totsing++;
388 #endif
389 	}
390       }
391 #ifdef REPORT_COUNTED
392       printf ("%d pairs counted\n%d singles counted\n", totpair, totsing);
393       totpair = 0;
394       totsing = 0;
395 #endif
396     }
397   }
398 
399   /* Free ct arrays */
400   if (ct[0] == ct[1]) {
401     free (ct[0]);
402   } else {
403     for (i=0; i<msa->nseq; i++) {
404       free (ct[i]);
405     }
406   }
407   free (ct);
408   return;
409 
410  ERROR:
411   cm_Fail("Memory allocation error.");
412 }
413 
414 /*
415  * print_matrix
416  *
417  * Dumps the paired and unpaired matrices and gap penalties
418  */
print_matrix(FILE * fp,fullmat_t * fullmat)419 void print_matrix (FILE *fp, fullmat_t *fullmat)
420 {
421   int i, j;
422 
423   fprintf (fp, "%s\n\n", fullmat->name);
424 
425   /* EPN: print background NT frequencies */
426   fprintf (fp, "    ");
427   for (i=0; i < fullmat->abc->K; i++) {
428     fprintf (fp, "%c           ", fullmat->abc->sym[i]);
429   }
430   fprintf (fp, "\n");
431 
432   fprintf (fp, "    ");
433   for (i=0; i < fullmat->abc->K; i++) {
434     fprintf (fp, "%-11f ", fullmat->g[i]);
435   }
436   fprintf (fp, "\n\n");
437 
438   fprintf (fp, "    ");
439   for (i=0; i < fullmat->abc->K; i++) {
440     fprintf (fp, "%c           ", fullmat->abc->sym[i]);
441   }
442   fprintf (fp, "\n");
443 
444   for (i=0; i < fullmat->abc->K; i++) {
445     fprintf (fp, "%c   ", fullmat->abc->sym[i]);
446     for (j=0; j<=i; j++) {
447       fprintf (fp, "%-11f ", fullmat->unpaired->matrix[matrix_index(fullmat->abc->inmap[(int) fullmat->abc->sym[i]], fullmat->abc->inmap[(int)fullmat->abc->sym[j]])]);
448     }
449     fprintf (fp, "\n");
450   }
451   if (strstr (fullmat->name, "RIBOPROB") == NULL)    /* Not probability mat */
452     fprintf (fp, "H: %.4f\nE: %.4f\n", fullmat->unpaired->H, fullmat->unpaired->E);
453 
454   fprintf (fp, "\n    ");
455   for (i=0; i < (fullmat->abc->K * fullmat->abc->K); i++) {
456     fprintf (fp, "%c%c          ", RNAPAIR_ALPHABET[i], RNAPAIR_ALPHABET2[i]);
457   }
458   fprintf (fp, "\n");
459   for (i=0; i < (fullmat->abc->K * fullmat->abc->K); i++) {
460     fprintf (fp, "%c%c  ", RNAPAIR_ALPHABET[i], RNAPAIR_ALPHABET2[i]);
461     for (j=0; j<=i; j++) {
462       /* ORIGINAL: fprintf (fp, "%-9.2f ", fullmat->paired->matrix[matrix_index(numbered_basepair(RNAPAIR_ALPHABET[i], RNAPAIR_ALPHABET2[i]), numbered_basepair (RNAPAIR_ALPHABET[j], RNAPAIR_ALPHABET2[j]))]);*/
463       /*EPN: */
464       fprintf (fp, "%-11f ", fullmat->paired->matrix[matrix_index(numbered_basepair(RNAPAIR_ALPHABET[i], RNAPAIR_ALPHABET2[i]), numbered_basepair (RNAPAIR_ALPHABET[j], RNAPAIR_ALPHABET2[j]))]);
465     }
466     fprintf (fp, "\n");
467   }
468 
469   if (strstr (fullmat->name, "RIBOPROB") == NULL)    /* Not probability mat */
470     fprintf (fp, "H: %.4f\nE: %.4f\n", fullmat->paired->H, fullmat->paired->E);
471   fprintf (fp, "\n");
472 }
473 
474 /*
475  * Read the matrix from a file.
476  * New EPN version, expects background freqs in file.
477  */
ReadMatrix(const ESL_ALPHABET * abc,FILE * matfp)478 fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp) {
479 
480   /* Contract check */
481   if(abc->type != eslRNA)
482     cm_Fail("Trying to read RIBOSUM matrix from RSEARCH, but alphabet is not eslRNA.");
483 
484   int status;
485   char linebuf[256];
486   char fullbuf[16384];
487   int fullbuf_used = 0;
488   fullmat_t *fullmat;
489   int i;
490   char *cp, *end_mat_pos;
491 
492   ESL_ALLOC(fullmat, (sizeof(fullmat_t)));
493   fullmat->abc      = abc; /* just a pointer */
494   fullmat->unpaired = setup_matrix (fullmat->abc->K);
495   fullmat->paired = setup_matrix (fullmat->abc->K * fullmat->abc->K);
496   ESL_ALLOC(fullmat->g, sizeof(float) * fullmat->abc->K);
497 
498   while (fgets (linebuf, 255, matfp)) {
499     strncpy (fullbuf+fullbuf_used, linebuf, 16384-fullbuf_used-1);
500     fullbuf_used += strlen(linebuf);
501     if (fullbuf_used >= 16384) {
502       cm_Fail ("ERROR: Matrix file bigger than 16kb\n");
503     }
504   }
505 
506   /* First, find RIBO, and copy matrix name to fullmat->name */
507   cp = strstr (fullbuf, "RIBO");
508   for (i = 0; cp[i] && !isspace(cp[i]); i++);   /* Find space after RIBO */
509   ESL_ALLOC(fullmat->name, sizeof(char)*(i+1));
510   strncpy (fullmat->name, cp, i);
511   fullmat->name[i] = '\0';
512   cp = cp + i;
513   if(strstr (fullmat->name, "SUM")) { fullmat->scores_flag = TRUE; fullmat->probs_flag = FALSE; }
514   else if(strstr (fullmat->name, "PROB")) { fullmat->scores_flag = FALSE; fullmat->probs_flag = TRUE; }
515   else cm_Fail("ERROR reading matrix, name does not include SUM or PROB.\n");
516 
517   /* Now, find the first A */
518   cp = strchr (cp, 'A');
519   fullmat->unpaired->edge_size = 0;
520   /* And count how edge size of the matrix */
521   while (*cp != '\n' && cp-fullbuf < fullbuf_used) {
522     if (!isspace (cp[0]) && isspace (cp[1])) {
523       fullmat->unpaired->edge_size++;
524     }
525     cp++;
526   }
527   /* EPN added to read background freqs to store in g vector */
528 
529   /* Read background freqs until we hit the next A */
530   end_mat_pos = strchr (cp, 'A');
531   for (i=0; cp - fullbuf < end_mat_pos-fullbuf; i++) {
532     while (!isdigit(*cp) && *cp != '-' && *cp != '.' && \
533 	   cp-fullbuf < fullbuf_used && cp != end_mat_pos) {
534 	cp++;
535     }
536     if (cp == end_mat_pos)
537       break;
538     if (cp-fullbuf < fullbuf_used) {
539       fullmat->g[i] = atof(cp);
540       while ((isdigit (*cp) || *cp == '-' || *cp == '.') &&\
541 	     (cp-fullbuf <fullbuf_used)) {
542 	cp++;
543       }
544     }
545   }
546   /* we've read the background, normalize it */
547   esl_vec_FNorm(fullmat->g, fullmat->unpaired->edge_size);
548 
549   /* We've already found the next A */
550   /* end EPN block */
551 
552   /* Take numbers until we hit the H: */
553   end_mat_pos = strstr (cp, "H:");
554   for (i=0; cp - fullbuf < end_mat_pos-fullbuf; i++) {
555     while (!isdigit(*cp) && *cp != '-' && *cp != '.' && \
556 	   cp-fullbuf < fullbuf_used && cp != end_mat_pos) {
557 	cp++;
558     }
559     if (cp == end_mat_pos)
560       break;
561     if (cp-fullbuf < fullbuf_used) {
562       fullmat->unpaired->matrix[i] = atof(cp);
563       while ((isdigit (*cp) || *cp == '-' || *cp == '.') &&\
564 	     (cp-fullbuf <fullbuf_used)) {
565 	cp++;
566       }
567     }
568   }
569   fullmat->unpaired->full_size = i;
570 
571   /* Skip the H: */
572   cp += 2;
573   fullmat->unpaired->H = atof(cp);
574 
575   /* Now, go past the E: */
576   cp = strstr (cp, "E:") + 2;
577   fullmat->unpaired->E = atof(cp);
578 
579   /********* PAIRED MATRIX ************/
580   /* Now, find the first A */
581   cp = strchr (cp, 'A');
582   fullmat->paired->edge_size = 0;
583   /* And count how edge size of the matrix */
584   while (*cp != '\n') {
585     if (!isspace (cp[0]) && isspace (cp[1])) {
586       fullmat->paired->edge_size++;
587     }
588     cp++;
589   }
590 
591   /* Find next A */
592   while (*cp != 'A' && (cp-fullbuf) < fullbuf_used) cp++;
593 
594   /* Take numbers until we hit the H: */
595   end_mat_pos = strstr (cp, "H:");
596   for (i=0; cp - fullbuf < end_mat_pos-fullbuf; i++) {
597     while (!isdigit(*cp) && *cp != '-' && *cp != '.' && \
598 	   cp-fullbuf < fullbuf_used && cp != end_mat_pos) {
599 	cp++;
600     }
601     if (cp == end_mat_pos)
602       break;
603     if (cp-fullbuf < fullbuf_used) {
604       fullmat->paired->matrix[i] = atof(cp);
605       while ((isdigit (*cp) || *cp == '-' || *cp == '.') &&\
606 	     (cp-fullbuf <fullbuf_used)) {
607 	cp++;
608       }
609     }
610   }
611   fullmat->paired->full_size = i;
612 
613   /* Skip the H: */
614   cp += 2;
615   fullmat->paired->H = atof(cp);
616 
617   /* Now, go past the E: */
618   cp = strstr (cp, "E:") + 2;
619   fullmat->paired->E = atof(cp);
620 
621   /*print_matrix(stdout, fullmat);*/
622   return (fullmat);
623 
624  ERROR:
625   cm_Fail("Memory allocation error.");
626   return NULL; /* never reached */
627 }
628 
629 /*
630  * MatFileOpen
631  *
632  * Given name of matrix file, open it
633  *
634  */
MatFileOpen(char * matfile)635 FILE *MatFileOpen (char *matfile)
636 {
637      FILE *fp;
638 
639      if (matfile == NULL)
640        return NULL;
641 
642      fp = fopen (matfile, "r");
643      if (fp != NULL) return (fp);
644 
645      return (NULL);
646 }
647 
648 /*
649  * Function: get_min_alpha_beta_sum()
650  * Date:     RJK, Mon Apr 29, 2002 [St. Louis]
651  * Purpose:  Given a full matrix, reports minimum sum allowed
652  *           for alpha and beta (or alpha' and beta')
653  *           The maximum for alpha+beta is found by
654  *           min Sp(i,j)(k,l) - max{Su(i,k), Su(j,l)}
655  *            for all i,j,k.l
656  */
get_min_alpha_beta_sum(fullmat_t * fullmat)657 float get_min_alpha_beta_sum (fullmat_t *fullmat) {
658   float max_sum = 9999999.9;              /* max allowed value of alpha+beta */
659   float cur_dif;
660   int i,j,k,l;
661   int pair_ij, pair_kl;
662 
663   for (i=0; i<fullmat->abc->K; i++)
664     for (j=0; j<fullmat->abc->K; j++)
665       for (k=0; k<fullmat->abc->K; k++)
666 	for (l=0; l<fullmat->abc->K; l++) {
667 	  pair_ij = numbered_basepair(fullmat->abc->sym[i], fullmat->abc->sym[j]);
668 	  pair_kl = numbered_basepair(fullmat->abc->sym[k], fullmat->abc->sym[l]);
669 	  /* First check for i,k paired */
670 	  cur_dif = fullmat->paired->matrix[matrix_index(pair_ij, pair_kl)] -
671 	    fullmat->unpaired->matrix[matrix_index(i,k)];
672 	  if (cur_dif < max_sum)
673 	    max_sum = cur_dif;
674 	  /* And repeat for j,l */
675 	  cur_dif = fullmat->paired->matrix[matrix_index(pair_ij, pair_kl)] -
676 	    fullmat->unpaired->matrix[matrix_index(j,l)];
677 	  if (cur_dif < max_sum)
678 	    max_sum = cur_dif;
679 	}
680   return (-1. * max_sum);
681 }
682 
683 /* EPN, Tue Feb  6 15:34:00 2007 */
FreeMat(fullmat_t * fullmat)684 void FreeMat(fullmat_t *fullmat)
685 {
686   if(fullmat->unpaired != NULL)
687     {
688       free(fullmat->unpaired->matrix);
689       free(fullmat->unpaired);
690     }
691   if(fullmat->paired != NULL)
692     {
693       free(fullmat->paired->matrix);
694       free(fullmat->paired);
695     }
696   if(fullmat->name != NULL)
697     free(fullmat->name);
698   if(fullmat->g != NULL)
699     free(fullmat->g);
700   free(fullmat);
701 }
702 
703 
704 /* Function: ribosum_calc_targets()
705  * Incept:   EPN, Wed Mar 14 06:01:11 2007
706  *
707  * Purpose:  Given a RIBOSUM score matrix data structure (fullmat_t) with
708  *           log odds scores (as read in from a RIBOSUM file) and a background
709  *           model (fullmat->g), overwrite the log-odds scores with target
710  *           probabilities f_ij that satisfy:
711  *
712  *           sum_ij f_ij = 1.0
713  *
714  *           NOTE: these target probs are not the same target probs
715  *                 dumped by makernamat -p in RSEARCH, those *off-diagonals*
716  *                 are double what I'm calculating here.
717  *                 (so that sum_ii f'_ii + sum_j<i f'_ij = 1.0)
718  *
719  *
720  * Returns:   <eslOK> on success.
721  *
722  */
ribosum_calc_targets(fullmat_t * fullmat)723 int ribosum_calc_targets(fullmat_t *fullmat)
724 {
725   int       idx;
726   int       a,b,i,j,k,l;
727 
728   /* Check the contract. */
729   if(!(fullmat->scores_flag)) ESL_EXCEPTION(eslEINVAL, "in ribosum_calc_targets(), matrix is not in log odds mode");
730   if(fullmat->probs_flag) ESL_EXCEPTION(eslEINVAL, "in ribosum_calc_targets(), matrix is already in probs mode");
731 
732   /*printf("\nbeginning of ribosum_calc_targets, printing mx:\n");
733     print_matrix(stdout, fullmat);*/
734 
735   /* convert log odds score s_ij, to target (f_ij)
736    * using background freqs (g), by:
737    * f_ij = g_i * g_j * 2^{s_ij} */
738 
739   /* first convert the unpaired (singlet) matrix,
740   *  remember matrix is set up as a vector */
741   idx = 0;
742   for(i = 0; i < fullmat->abc->K; i++)
743     for(j = 0; j <= i; j++)
744       {
745 	fullmat->unpaired->matrix[idx] =
746 	  fullmat->g[i] * fullmat->g[j] * sreEXP2(fullmat->unpaired->matrix[idx]);
747 	idx++;
748       }
749   /* and the paired matrix, careful about for loops here, we
750    * use 4 nested ones just to keep track of which backgrounds to multiply
751    * (the g[i] * g[j] * g[k] * g[l] part) */
752   idx = 0;
753   for(a = 0; a < sizeof(RNAPAIR_ALPHABET)-1; a++)
754     for(b = 0; b <= a; b++)
755       {
756 	i = a / fullmat->abc->K;
757 	j = a % fullmat->abc->K;
758 	k = b / fullmat->abc->K;
759 	l = b % fullmat->abc->K;
760 
761 	fullmat->paired->matrix[idx] =
762 	  fullmat->g[i] * fullmat->g[j] * fullmat->g[k] * fullmat->g[l] *
763 	  sreEXP2(fullmat->paired->matrix[idx]);
764 	idx++;
765       }
766 
767   /* We have to be careful with normalizing the matrices b/c they are
768    * symmetric and fullmat_t only stores f_ij i<=j. So we double
769    * the f_ij if i!=j, normalize it (it should then sum to 1.)
770    * and then halve the f_ij i != j's. */
771   /* normalize the unpaired matrix */
772   idx = 0;
773   for(i = 0; i < fullmat->abc->K; i++)
774     for(j = 0; j <= i; j++)
775       {
776 	if(i != j) fullmat->unpaired->matrix[idx] *= 2.;
777 	idx++;
778       }
779   esl_vec_DNorm(fullmat->unpaired->matrix, fullmat->unpaired->full_size);
780   idx = 0;
781   for(i = 0; i < fullmat->abc->K; i++)
782     for(j = 0; j <= i; j++)
783       {
784 	if(i != j) fullmat->unpaired->matrix[idx] *= 0.5;
785 	idx++;
786       }
787 
788   /* normalize the paired matrix */
789   idx = 0;
790   for(a = 0; a < sizeof(RNAPAIR_ALPHABET)-1; a++)
791     for(b = 0; b <= a; b++)
792       {
793 	if(a != b) fullmat->paired->matrix[idx] *= 2.;
794 	idx++;
795       }
796   esl_vec_DNorm(fullmat->paired->matrix,   fullmat->paired->full_size);
797   idx = 0;
798   for(a = 0; a < sizeof(RNAPAIR_ALPHABET)-1; a++)
799     for(b = 0; b <= a; b++)
800       {
801 	if(a != b) fullmat->paired->matrix[idx] *= 0.5;
802 	idx++;
803       }
804 
805   /* Lower the scores_flag, raise probs_flag */
806   fullmat->scores_flag = FALSE;
807   fullmat->probs_flag = TRUE;
808 
809   /*printf("\nend of ribosum_calc_targets, printing mx:\n");
810     print_matrix(stdout, fullmat);*/
811   return eslOK;
812 }
813 
814 /* Function: ribosum_MSA_resolve_degeneracies
815  *
816  * Incept:   EPN, Thu Mar 15 05:37:22 2007
817  *
818  * Purpose:  Given a RIBOSUM score matrix data structure (fullmat_t) with
819  *           target probabilites and a MSA with SS markup, remove all
820  *           ambiguous bases. Do this by selecting the most likely
821  *           singlet or base pair that matches each ambiguity given the
822  *           RIBOSUM matrix.
823  *           (ex: (G|A) for 'R', or (GG|GC|AG|AC) bp for 'RS' base pair)
824  *
825  *
826  * Returns:   <eslOK> on success.
827  *
828  * The degenerate code used here is:
829  * (taken from http://www.neb.com/neb/products/REs/RE_code.html
830  *
831  *                         X = A or C or G or T
832  *                         R = G or A
833  *                         Y = C or T
834  *                         M = A or C
835  *                         K = G or T
836  *                         S = G or C
837  *                         W = A or T
838  *                         H = not G (A or C or T)
839  *                         B = not A (C or G or T)
840  *                         V = not T (A or C or G)
841  *                         D = not C (A or G or T)
842  *                         N = A or C or G or T
843  */
ribosum_MSA_resolve_degeneracies(fullmat_t * fullmat,ESL_MSA * msa)844 int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa)
845 {
846   int       idx;
847   int       i,j;
848   int      *ct;		  /* 0..alen-1 base pair partners array         */
849   int       apos;
850   char       c;           /* tmp char for current degeneracy, uppercase */
851   char      *cp;          /* tmp char pointer for finding c in degen_string */
852   char       c_m;         /* tmp char for current bp mate's degeneracy, uppercase */
853   char      *cp_m;        /* tmp char pointer for finding c_m in degen_string */
854   char degen_string[13] = "XRYMKSWHBVDN\0";
855   char rna_string[5] =    "ACGU\0";
856   int  **degen_mx;
857   float      *unpaired_marginals;
858   float      *paired_marginals;
859   float  *cur_unpaired_marginals;
860   float  *cur_paired_marginals;
861   char      *aseq;
862   int        dpos;       /* position of c within degen_string */
863   int        dpos_m;     /* position of c_m within degen_string */
864   int        argmax;
865   int        mate;       /* used as ct[apos-1] */
866   int        status;
867   ESL_ALPHABET *msa_abc = msa->abc; /* when we textize this, msa->abc will be set to NULL! */
868 
869   /* Check the contract. */
870   if(!(fullmat->probs_flag))                               ESL_EXCEPTION(eslEINVAL, "in ribosum_MSA_resolve_degeneracies(), matrix is not in probs mode");
871   if(fullmat->scores_flag)                                 ESL_EXCEPTION(eslEINVAL, "in ribosum_MSA_resolve_degeneracies(), matrix is in scores mode");
872   if(msa->nseq != 1)                                       ESL_EXCEPTION(eslEINVAL, "MSA does not have exactly 1 seq");
873   if(fullmat->abc->type != eslRNA)                         ESL_EXCEPTION(eslEINVAL, " matrix alphabet not RNA");
874   if(! (msa->flags & eslMSA_DIGITAL))                       ESL_EXCEPTION(eslEINVAL, " MSA is not digitized");
875   if(msa->abc->type != eslRNA && msa->abc->type != eslDNA) ESL_EXCEPTION(eslEINVAL, " MSA alphabet not DNA or RNA");
876 
877   /*printf("in ribosum_MSA_resolve_degeneracies()\n");*/
878 
879   ESL_ALLOC(unpaired_marginals, sizeof(float) * fullmat->abc->K);
880   ESL_ALLOC(paired_marginals, sizeof(float) * (fullmat->abc->K * fullmat->abc->K));
881   ESL_ALLOC(cur_unpaired_marginals, sizeof(float) * fullmat->abc->K);
882   ESL_ALLOC(cur_paired_marginals, sizeof(float) * (fullmat->abc->K * fullmat->abc->K));
883 
884   /* Laboriously fill in degen_mx, NOTE: this will fall over if alphabet is not RNA! */
885   /* This is somewhat unnec, now that we use esl_alphabet.c, but I didnt' want to redo it, so I left it */
886   ESL_ALLOC(degen_mx, sizeof(int *) * 12);
887   for(i = 0; i < 12; i++)
888     {
889       ESL_ALLOC(degen_mx[i], sizeof(int) * fullmat->abc->K);
890       esl_vec_ISet(degen_mx[i], fullmat->abc->K, 0.);
891     }
892   /* 'X' = A|C|G|U */
893   degen_mx[0][0] = degen_mx[0][1] = degen_mx[0][2] = degen_mx[0][3] = 1;
894   /* 'R' = A|G */
895   degen_mx[1][0] = degen_mx[1][2] = 1;
896   /* 'Y' = C|U */
897   degen_mx[2][1] = degen_mx[2][3] = 1;
898   /* 'M' = A|C */
899   degen_mx[3][0] = degen_mx[3][1] = 1;
900   /* 'K' = G|U */
901   degen_mx[4][2] = degen_mx[4][3] = 1;
902   /* 'S' = C|G */
903   degen_mx[5][1] = degen_mx[5][2] = 1;
904   /* 'W' = A|U */
905   degen_mx[6][0] = degen_mx[6][3] = 1;
906   /* 'H' = A|C|U */
907   degen_mx[7][0] = degen_mx[7][1] = degen_mx[7][3] = 1;
908   /* 'B' = C|G|U */
909   degen_mx[8][1] = degen_mx[8][2] = degen_mx[8][3] = 1;
910   /* 'V' = A|C|G */
911   degen_mx[9][0] = degen_mx[9][1] = degen_mx[9][2] = 1;
912   /* 'D' = A|G|U */
913   degen_mx[10][0] = degen_mx[10][2] = degen_mx[10][3] = 1;
914   /* 'N' = A|C|G|U */
915   degen_mx[11][0] = degen_mx[11][1] = degen_mx[11][2] = degen_mx[11][3] = 1;
916 
917   /* calculate paired_marginals and unpaired_marginals as:
918    * marginal[x] = sum_y P(x,y)
919    */
920   esl_vec_FSet(unpaired_marginals, fullmat->abc->K, 0.);
921   esl_vec_FSet(paired_marginals, fullmat->abc->K * fullmat->abc->K, 0.);
922 
923   for(i = 0; i < fullmat->abc->K; i++)
924     for(j = 0; j < fullmat->abc->K; j++)
925       unpaired_marginals[i] += fullmat->unpaired->matrix[matrix_index(i,j)];
926   idx = 0;
927   for(i = 0; i < (fullmat->abc->K*fullmat->abc->K); i++)
928     for(j = 0; j < (fullmat->abc->K*fullmat->abc->K); j++)
929       paired_marginals[i] += fullmat->paired->matrix[matrix_index(i,j)];
930 
931   /*for(i = 0; i < (fullmat->abc->K); i++)
932     printf("unpaired_marginals[i:%d]: %f\n", i, unpaired_marginals[i]);
933     for(i = 0; i < (fullmat->abc->K*fullmat->abc->K); i++)
934     printf("paired_marginals[i:%d]: %f\n", i, paired_marginals[i]);*/
935 
936   esl_vec_FNorm(unpaired_marginals, fullmat->abc->K);
937   esl_vec_FNorm(paired_marginals, fullmat->abc->K*fullmat->abc->K);
938 
939   /* get ct array, indexed 1..alen while apos is 0..alen-1 */
940   ESL_ALLOC(ct, (msa->alen+1) * sizeof(int));
941   esl_wuss2ct(msa->ss_cons, msa->alen, ct);
942 
943   ESL_ALLOC(aseq, sizeof(char) * (msa->alen+1));
944   status = esl_msa_Textize(msa);
945   if(status == eslECORRUPT)      cm_Fail("esl_msa_Textize() returned status: %d, the msa must contain invalid digitized chars.", status);
946   else if(status != eslOK) goto ERROR;
947 
948   /* remember we only have 1 seq in the MSA */
949   for(apos = 0; apos < msa->alen; apos++)
950     {
951       if (esl_abc_CIsGap(fullmat->abc, msa->aseq[0][apos])) continue; /* we can still have gaps in 1 seq MSA, they'll
952 								       * be dealt with (ignored) in
953 								       * modelmaker.c:HandModelmaker() */
954       mate = ct[(apos+1)]; /* apos is 0..alen-1, ct is 1..alen, so mate will be 1..alen now */
955       if(mate != 0 && esl_abc_CIsGap(msa_abc, msa->aseq[0][(mate-1)])) mate = 0;
956       /* apos is a base paired res, but mate is a gap, pretend apos is SS for our purposes here */
957       else if(mate != 0 && ((mate-1) < apos)) continue;
958       /* apos is a base paired res, but we've already changed him to an unambiguous res when we changed his mate (which was not a gap) */
959 
960       c = toupper(msa->aseq[0][apos]);
961       if(c == 'T') c = 'U';
962       cp = strchr(rna_string, c);
963       if(cp == NULL)
964 	{
965 	  /* a degeneracy */
966 	  if((cp = strchr(degen_string, c)) == NULL) ESL_XEXCEPTION(eslEINVAL, "character is not ACGTU or a recognized ambiguity code");
967 	  dpos = cp-degen_string;
968 	  if(mate == 0) /* single stranded */
969 	    {
970 	      /*printf("\nCASE 1 SS AMBIG\n");
971 		printf("apos: %d c: %c\n", apos, c);*/
972 	      /* of possible residues, find the one with the highest marginal
973 	       * in RIBOSUM: argmax_x sum_Y P(x,y)  */
974 	      for(i = 0; i < fullmat->abc->K; i++)
975 		cur_unpaired_marginals[i] = degen_mx[dpos][i] * unpaired_marginals[i];
976 	      argmax = esl_vec_FArgMax(cur_unpaired_marginals, fullmat->abc->K);
977 	      msa->aseq[0][apos] = rna_string[argmax];
978 	      /*printf("c: %c at posn %d argmax: %d msa[apos:%d]: %c\n", c, (int) (cp-degen_string), argmax, apos, msa->aseq[0][apos]);
979 		printf("new ss: %c\n", rna_string[argmax]);*/
980 	    }
981 	  else /* paired */
982 	    {
983 	      /* is mate ambiguous? */
984 	      c_m = toupper(msa->aseq[0][(mate-1)]);
985 	      if(c_m == 'T') c_m = 'U';
986 	      cp_m = strchr(rna_string, c_m);
987 	      if(cp_m == NULL)
988 		{
989 		  /* mate is ambiguous */
990 		  /*printf("\nCASE 4 PAIR, BOTH AMBIG\n");
991 		    printf("left (apos) %d c: %c mate %d c_m: %c\n", apos, c, (mate-1), c_m);
992 		  */
993 		  if((cp_m = strchr(degen_string, c_m)) == NULL) ESL_XEXCEPTION(eslEINVAL, "character is not ACGTU or a recognized ambiguity code");
994 		  dpos_m = cp_m-degen_string;
995 		  /* we know that mate != 0 and (mate-1) >= apos, we continued above if that was false */
996 		  idx = 0;
997 		  for(i = 0; i < (fullmat->abc->K); i++)
998 		    for(j = 0; j < (fullmat->abc->K); j++)
999 		      {
1000 			cur_paired_marginals[idx] = degen_mx[dpos][i] * degen_mx[dpos_m][j] *
1001 			  paired_marginals[idx];
1002 			/*printf("degen_mx[dpos:  %d][i:%d]: %d\n", dpos, i, degen_mx[dpos][i]);
1003 			  printf("degen_mx[dpos_m:%d][j:%d]: %d\n", dpos_m, j, degen_mx[dpos_m][j]);
1004 			  printf("cur_paired_marginals[idx:%d]: %f\n", idx, cur_paired_marginals[idx]);*/
1005 			idx++;
1006 		      }
1007 		  argmax = esl_vec_FArgMax(cur_paired_marginals, (fullmat->abc->K*fullmat->abc->K));
1008 		  msa->aseq[0][apos]     = RNAPAIR_ALPHABET[argmax];
1009 		  msa->aseq[0][(mate-1)] = RNAPAIR_ALPHABET2[argmax];
1010 		  /*printf("new bp: left: %c right: %c\n", RNAPAIR_ALPHABET[argmax], RNAPAIR_ALPHABET2[argmax]);*/
1011 
1012 		}
1013 	      else /* mate is unambiguous */
1014 		{
1015 		  /*printf("\nCASE 2 PAIR, LEFT AMBIG, RIGHT NOT\n");
1016 		    printf("left (apos) %d c: %c mate %d c_m: %c\n", apos, c, (mate-1), c_m);*/
1017 		  cp_m = strchr(rna_string, c_m);
1018 		  dpos_m = cp_m - rna_string;
1019 		  /* cp_m is 0 for A, 1 for C, 2 for G, 3 for U in mate-1 */
1020 		  idx = 0;
1021 		  for(i = 0; i < (fullmat->abc->K); i++)
1022 		    for(j = 0; j < (fullmat->abc->K); j++)
1023 		      {
1024 			cur_paired_marginals[idx] = degen_mx[dpos][i] * (j == dpos_m) *
1025 			  paired_marginals[idx];
1026 			/*printf("cur_paired_marginals[idx:%d]: %f\n", idx, cur_paired_marginals[idx]);*/
1027 			idx++;
1028 		      }
1029 		  argmax = esl_vec_FArgMax(cur_paired_marginals, (fullmat->abc->K*fullmat->abc->K));
1030 		  msa->aseq[0][apos]     = RNAPAIR_ALPHABET[argmax];
1031 		  msa->aseq[0][(mate-1)] = RNAPAIR_ALPHABET2[argmax];
1032 		  /*printf("new bp: left: %c right: %c\n", RNAPAIR_ALPHABET[argmax], RNAPAIR_ALPHABET2[argmax]);*/
1033 		}
1034 	    }
1035 	}
1036       /* we could still have unambiguous apos, but an ambiguous mate, which we deal
1037        * with here: */
1038       if(mate != 0)
1039 	{
1040 	  c_m = toupper(msa->aseq[0][(mate-1)]);
1041 	  if(c_m == 'T') c = 'U';
1042 	  cp_m = strchr(rna_string, c_m);
1043 	  if(cp_m == NULL)
1044 	    {
1045 	      /* mate is ambiguous */
1046 	      if((cp_m = strchr(degen_string, c_m)) == NULL) ESL_XEXCEPTION(eslEINVAL, "character is not ACGTU or a recognized ambiguity code");
1047 	      dpos_m = cp_m - degen_string;
1048 	      /*printf("\nCASE 3 PAIR, LEFT NOT, RIGHT AMBIG\n");
1049 		printf("left (apos) %d c: %c mate %d c_m: %c dpos_m\n", apos, c, (mate-1), c_m, dpos);*/
1050 	      cp = strchr(rna_string, c);
1051 	      if(cp == NULL) ESL_XEXCEPTION(eslEINVAL, "character is not ACGTU or a recognized ambiguity code");
1052 	      dpos = cp - rna_string;
1053 	      idx = 0;
1054 	      for(i = 0; i < (fullmat->abc->K); i++)
1055 		for(j = 0; j < (fullmat->abc->K); j++)
1056 		  {
1057 		    cur_paired_marginals[idx] = (i == dpos) * degen_mx[dpos_m][j] *
1058 		      paired_marginals[idx];
1059 		    /*printf("cur_paired_marginals[idx:%d]: %f\n", idx, cur_paired_marginals[idx]);*/
1060 		    idx++;
1061 		  }
1062 	      argmax = esl_vec_FArgMax(cur_paired_marginals, (fullmat->abc->K*fullmat->abc->K));
1063 	      msa->aseq[0][apos]     = RNAPAIR_ALPHABET[argmax];
1064 	      msa->aseq[0][(mate-1)] = RNAPAIR_ALPHABET2[argmax];
1065 	      /*printf("new bp: left: %c right: %c\n", RNAPAIR_ALPHABET[argmax], RNAPAIR_ALPHABET2[argmax]);*/
1066 	    }
1067 	}
1068     }
1069   /* go through the sequence again, there should be no ambiguities now */
1070   for(apos = 0; apos < msa->alen; apos++) {
1071     if(esl_abc_CIsGap(msa_abc, msa->aseq[0][apos])) continue;
1072     c = toupper(msa->aseq[0][apos]);
1073     if(c == 'T') c = 'U';
1074     cp = strchr(rna_string, c);
1075     if(cp == NULL) ESL_XEXCEPTION(eslEINVAL, "ribosum_MSA_resolve_degeneracies(), second pass check character %d (%c) is still ambiguous!\n", apos, c);
1076   }
1077 
1078   if((status = esl_msa_Digitize(msa_abc, msa, NULL)) != eslOK) goto ERROR;
1079   free(unpaired_marginals);
1080   free(paired_marginals);
1081   free(cur_unpaired_marginals);
1082   free(cur_paired_marginals);
1083   for(i = 0; i < 12; i++) free(degen_mx[i]);
1084   free(degen_mx);
1085   free(ct);
1086   free(aseq);
1087 
1088   return eslOK;
1089 
1090  ERROR:
1091   return eslEINVAL;
1092 }
1093 
1094 /*
1095  * Maps i as follows:
1096  * 0->A
1097  * 1->C
1098  * 2->G
1099  * 3->U
1100  * else->-1
1101  */
unpaired_res(int i)1102 int unpaired_res (int i)
1103 {
1104   switch (i) {
1105   case 0:
1106     return ('A');
1107   case 1:
1108     return ('C');
1109   case 2:
1110     return ('G');
1111   case 3:
1112     return ('U');
1113   }
1114   return (-1);
1115 }
1116 
1117 
1118