1 /*
2 * centroid structure prediction
3 *
4 * Ivo L Hofacker + Ronny Lorenz
5 * Vienna RNA package
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <math.h>
16
17 #include "ViennaRNA/utils/basic.h"
18 #include "ViennaRNA/fold_vars.h"
19 #include "ViennaRNA/gquad.h"
20 #include "ViennaRNA/centroid.h"
21
22 /*
23 #################################
24 # GLOBAL VARIABLES #
25 #################################
26 */
27
28 /*
29 #################################
30 # PRIVATE VARIABLES #
31 #################################
32 */
33
34 /*
35 #################################
36 # PRIVATE FUNCTION DECLARATIONS #
37 #################################
38 */
39
40 /*
41 #################################
42 # BEGIN OF FUNCTION DEFINITIONS #
43 #################################
44 */
45
46 /* compute the centroid structure of the ensemble, i.e. the strutcure
47 * with the minimal average distance to all other structures
48 * <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
49 * Thus, the centroid is simply the structure containing all pairs with
50 * p_ij>0.5
51 */
52 PUBLIC char *
vrna_centroid_from_plist(int length,double * dist,vrna_ep_t * pl)53 vrna_centroid_from_plist(int length,
54 double *dist,
55 vrna_ep_t *pl)
56 {
57 int i;
58 char *centroid;
59
60 if (pl == NULL) {
61 vrna_message_warning("vrna_centroid_from_plist: "
62 "pl == NULL!");
63 return NULL;
64 }
65
66 *dist = 0.;
67 centroid = (char *)vrna_alloc((length + 1) * sizeof(char));
68 for (i = 0; i < length; i++)
69 centroid[i] = '.';
70 for (i = 0; pl[i].i > 0; i++) {
71 if ((pl[i].p) > 0.5) {
72 centroid[pl[i].i - 1] = '(';
73 centroid[pl[i].j - 1] = ')';
74 *dist += (1 - pl[i].p);
75 } else {
76 *dist += pl[i].p;
77 }
78 }
79 centroid[length] = '\0';
80 return centroid;
81 }
82
83
84 /* compute the centroid structure of the ensemble, i.e. the strutcure
85 * with the minimal average distance to all other structures
86 * <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
87 * Thus, the centroid is simply the structure containing all pairs with
88 * p_ij>0.5
89 */
90 PUBLIC char *
vrna_centroid_from_probs(int length,double * dist,FLT_OR_DBL * probs)91 vrna_centroid_from_probs(int length,
92 double *dist,
93 FLT_OR_DBL *probs)
94 {
95 int i, j;
96 FLT_OR_DBL p;
97 char *centroid;
98 int *index = vrna_idx_row_wise(length);
99
100 if (probs == NULL) {
101 vrna_message_warning("vrna_centroid_from_probs: "
102 "probs == NULL!");
103 return NULL;
104 }
105
106 *dist = 0.;
107 centroid = (char *)vrna_alloc((length + 1) * sizeof(char));
108 for (i = 0; i < length; i++)
109 centroid[i] = '.';
110 for (i = 1; i <= length; i++)
111 for (j = i + TURN + 1; j <= length; j++) {
112 if ((p = probs[index[i] - j]) > 0.5) {
113 centroid[i - 1] = '(';
114 centroid[j - 1] = ')';
115 *dist += (1 - p);
116 } else {
117 *dist += p;
118 }
119 }
120 free(index);
121 centroid[length] = '\0';
122 return centroid;
123 }
124
125
126 /* compute the centroid structure of the ensemble, i.e. the strutcure
127 * with the minimal average distance to all other structures
128 * <d(S)> = \sum_{(i,j) \in S} (1-p_{ij}) + \sum_{(i,j) \notin S} p_{ij}
129 * Thus, the centroid is simply the structure containing all pairs with
130 * p_ij>0.5
131 */
132 PUBLIC char *
vrna_centroid(vrna_fold_compound_t * vc,double * dist)133 vrna_centroid(vrna_fold_compound_t *vc,
134 double *dist)
135 {
136 int i, j, k, length, turn;
137 FLT_OR_DBL p;
138 char *centroid;
139 short *S;
140 vrna_mx_pf_t *matrices;
141 FLT_OR_DBL *probs;
142 int *my_iindx;
143 vrna_exp_param_t *pf_params;
144
145
146 if (!vc) {
147 vrna_message_warning("vrna_centroid: "
148 "run vrna_pf_fold first!");
149 return NULL;
150 } else if (!vc->exp_matrices->probs) {
151 vrna_message_warning("vrna_centroid: "
152 "probs == NULL!");
153 return NULL;
154 }
155
156 length = vc->length;
157 pf_params = vc->exp_params;
158 S = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : vc->S_cons;
159 my_iindx = vc->iindx;
160
161 matrices = vc->exp_matrices;
162 probs = matrices->probs;
163 turn = pf_params->model_details.min_loop_size;
164
165 *dist = 0.;
166 centroid = (char *)vrna_alloc((length + 1) * sizeof(char));
167 for (i = 0; i < length; i++)
168 centroid[i] = '.';
169 for (i = 1; i <= length; i++)
170 for (j = i + turn + 1; j <= length; j++) {
171 if ((p = probs[my_iindx[i] - j]) > 0.5) {
172 if (pf_params->model_details.gquad) {
173 /* check for presence of gquadruplex */
174 if ((S[i] == 3) && (S[j] == 3)) {
175 int L, l[3];
176 get_gquad_pattern_pf(S, i, j, pf_params, &L, l);
177 for (k = 0; k < L; k++) {
178 centroid[i + k - 1] \
179 = centroid[i + k + L + l[0] - 1] \
180 = centroid[i + k + 2 * L + l[0] + l[1] - 1] \
181 = centroid[i + k + 3 * L + l[0] + l[1] + l[2] - 1] \
182 = '+';
183 }
184 /* skip everything within the gquad */
185 i = j;
186 j = j + turn + 1;
187 *dist += (1 - p); /* right? */
188 break;
189 }
190 }
191
192 /* regular base pair */
193 centroid[i - 1] = '(';
194 centroid[j - 1] = ')';
195 *dist += (1 - p);
196 } else {
197 *dist += p;
198 }
199 }
200
201 centroid[length] = '\0';
202 return centroid;
203 }
204
205
206 /*###########################################*/
207 /*# deprecated functions below #*/
208 /*###########################################*/
209
210
211 /* this function is a threadsafe replacement for centroid() */
212 PUBLIC char *
get_centroid_struct_pl(int length,double * dist,vrna_ep_t * pl)213 get_centroid_struct_pl(int length,
214 double *dist,
215 vrna_ep_t *pl)
216 {
217 return vrna_centroid_from_plist(length, dist, pl);
218 }
219
220
221 /* this function is a threadsafe replacement for centroid() */
222 PUBLIC char *
get_centroid_struct_pr(int length,double * dist,FLT_OR_DBL * probs)223 get_centroid_struct_pr(int length,
224 double *dist,
225 FLT_OR_DBL *probs)
226 {
227 return vrna_centroid_from_probs(length, dist, probs);
228 }
229