1 /*
2  *                centroid structure prediction
3  *
4  *                Ivo L Hofacker + Ronny Lorenz
5  *                Vienna RNA package
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11 
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <math.h>
16 
17 #include "ViennaRNA/utils/basic.h"
18 #include "ViennaRNA/fold_vars.h"
19 #include "ViennaRNA/gquad.h"
20 #include "ViennaRNA/centroid.h"
21 
22 /*
23  #################################
24  # GLOBAL VARIABLES              #
25  #################################
26  */
27 
28 /*
29  #################################
30  # PRIVATE VARIABLES             #
31  #################################
32  */
33 
34 /*
35  #################################
36  # PRIVATE FUNCTION DECLARATIONS #
37  #################################
38  */
39 
40 /*
41  #################################
42  # BEGIN OF FUNCTION DEFINITIONS #
43  #################################
44  */
45 
46 /* compute the centroid structure of the ensemble, i.e. the strutcure
47  * with the minimal average distance to all other structures
48  * <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
49  * Thus, the centroid is simply the structure containing all pairs with
50  * p_ij>0.5
51  */
52 PUBLIC char *
vrna_centroid_from_plist(int length,double * dist,vrna_ep_t * pl)53 vrna_centroid_from_plist(int        length,
54                          double     *dist,
55                          vrna_ep_t  *pl)
56 {
57   int   i;
58   char  *centroid;
59 
60   if (pl == NULL) {
61     vrna_message_warning("vrna_centroid_from_plist: "
62                          "pl == NULL!");
63     return NULL;
64   }
65 
66   *dist     = 0.;
67   centroid  = (char *)vrna_alloc((length + 1) * sizeof(char));
68   for (i = 0; i < length; i++)
69     centroid[i] = '.';
70   for (i = 0; pl[i].i > 0; i++) {
71     if ((pl[i].p) > 0.5) {
72       centroid[pl[i].i - 1] = '(';
73       centroid[pl[i].j - 1] = ')';
74       *dist                 += (1 - pl[i].p);
75     } else {
76       *dist += pl[i].p;
77     }
78   }
79   centroid[length] = '\0';
80   return centroid;
81 }
82 
83 
84 /* compute the centroid structure of the ensemble, i.e. the strutcure
85  * with the minimal average distance to all other structures
86  * <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
87  * Thus, the centroid is simply the structure containing all pairs with
88  * p_ij>0.5
89  */
90 PUBLIC char *
vrna_centroid_from_probs(int length,double * dist,FLT_OR_DBL * probs)91 vrna_centroid_from_probs(int        length,
92                          double     *dist,
93                          FLT_OR_DBL *probs)
94 {
95   int         i, j;
96   FLT_OR_DBL  p;
97   char        *centroid;
98   int         *index = vrna_idx_row_wise(length);
99 
100   if (probs == NULL) {
101     vrna_message_warning("vrna_centroid_from_probs: "
102                          "probs == NULL!");
103     return NULL;
104   }
105 
106   *dist     = 0.;
107   centroid  = (char *)vrna_alloc((length + 1) * sizeof(char));
108   for (i = 0; i < length; i++)
109     centroid[i] = '.';
110   for (i = 1; i <= length; i++)
111     for (j = i + TURN + 1; j <= length; j++) {
112       if ((p = probs[index[i] - j]) > 0.5) {
113         centroid[i - 1] = '(';
114         centroid[j - 1] = ')';
115         *dist           += (1 - p);
116       } else {
117         *dist += p;
118       }
119     }
120   free(index);
121   centroid[length] = '\0';
122   return centroid;
123 }
124 
125 
126 /* compute the centroid structure of the ensemble, i.e. the strutcure
127  * with the minimal average distance to all other structures
128  * <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
129  * Thus, the centroid is simply the structure containing all pairs with
130  * p_ij>0.5
131  */
132 PUBLIC char *
vrna_centroid(vrna_fold_compound_t * vc,double * dist)133 vrna_centroid(vrna_fold_compound_t  *vc,
134               double                *dist)
135 {
136   int               i, j, k, length, turn;
137   FLT_OR_DBL        p;
138   char              *centroid;
139   short             *S;
140   vrna_mx_pf_t      *matrices;
141   FLT_OR_DBL        *probs;
142   int               *my_iindx;
143   vrna_exp_param_t  *pf_params;
144 
145 
146   if (!vc) {
147     vrna_message_warning("vrna_centroid: "
148                          "run vrna_pf_fold first!");
149     return NULL;
150   } else if (!vc->exp_matrices->probs) {
151     vrna_message_warning("vrna_centroid: "
152                          "probs == NULL!");
153     return NULL;
154   }
155 
156   length    = vc->length;
157   pf_params = vc->exp_params;
158   S         = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : vc->S_cons;
159   my_iindx  = vc->iindx;
160 
161   matrices  = vc->exp_matrices;
162   probs     = matrices->probs;
163   turn      = pf_params->model_details.min_loop_size;
164 
165   *dist     = 0.;
166   centroid  = (char *)vrna_alloc((length + 1) * sizeof(char));
167   for (i = 0; i < length; i++)
168     centroid[i] = '.';
169   for (i = 1; i <= length; i++)
170     for (j = i + turn + 1; j <= length; j++) {
171       if ((p = probs[my_iindx[i] - j]) > 0.5) {
172         if (pf_params->model_details.gquad) {
173           /* check for presence of gquadruplex */
174           if ((S[i] == 3) && (S[j] == 3)) {
175             int L, l[3];
176             get_gquad_pattern_pf(S, i, j, pf_params, &L, l);
177             for (k = 0; k < L; k++) {
178               centroid[i + k - 1] \
179                             = centroid[i + k + L + l[0] - 1] \
180                             = centroid[i + k + 2 * L + l[0] + l[1] - 1] \
181                             = centroid[i + k + 3 * L + l[0] + l[1] + l[2] - 1] \
182                             = '+';
183             }
184             /* skip everything within the gquad */
185             i     = j;
186             j     = j + turn + 1;
187             *dist += (1 - p); /* right? */
188             break;
189           }
190         }
191 
192         /* regular base pair */
193         centroid[i - 1] = '(';
194         centroid[j - 1] = ')';
195         *dist           += (1 - p);
196       } else {
197         *dist += p;
198       }
199     }
200 
201   centroid[length] = '\0';
202   return centroid;
203 }
204 
205 
206 /*###########################################*/
207 /*# deprecated functions below              #*/
208 /*###########################################*/
209 
210 
211 /* this function is a threadsafe replacement for centroid() */
212 PUBLIC char *
get_centroid_struct_pl(int length,double * dist,vrna_ep_t * pl)213 get_centroid_struct_pl(int        length,
214                        double     *dist,
215                        vrna_ep_t  *pl)
216 {
217   return vrna_centroid_from_plist(length, dist, pl);
218 }
219 
220 
221 /* this function is a threadsafe replacement for centroid() */
222 PUBLIC char *
get_centroid_struct_pr(int length,double * dist,FLT_OR_DBL * probs)223 get_centroid_struct_pr(int        length,
224                        double     *dist,
225                        FLT_OR_DBL *probs)
226 {
227   return vrna_centroid_from_probs(length, dist, probs);
228 }
229