1 /*
2 * ViennaRNA/utils/structure_tree.c
3 *
4 * Various functions for tree representation of secondary strucures
5 *
6 * c Ivo L Hofacker, Walter Fontana, Ronny Lorenz
7 * ViennaRNA package
8 */
9
10 #ifdef HAVE_CONFIG_H
11 #include "config.h"
12 #endif
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <ctype.h>
18 #include <limits.h>
19
20 #include "ViennaRNA/utils/basic.h"
21 #include "ViennaRNA/datastructures/char_stream.h"
22 #include "ViennaRNA/utils/strings.h"
23 #include "ViennaRNA/utils/structures.h"
24
25 /*
26 #################################
27 # PRIVATE FUNCTION DECLARATIONS #
28 #################################
29 */
30
31 PRIVATE char *
32 annotate_enclosing_pairs(const char *structure);
33
34
35 PRIVATE char *
36 db2HIT(const char *structure);
37
38
39 PRIVATE char *
40 db2Shapiro(const char *structure,
41 int with_stems,
42 int with_weights,
43 int with_external);
44
45
46 PRIVATE char *
47 db2ExpandedTree(const char *structure);
48
49
50 /*
51 #################################
52 # BEGIN OF FUNCTION DEFINITIONS #
53 #################################
54 */
55 PUBLIC char *
vrna_db_to_tree_string(const char * structure,unsigned int type)56 vrna_db_to_tree_string(const char *structure,
57 unsigned int type)
58 {
59 char *tree = NULL;
60
61 if (structure) {
62 switch (type) {
63 case VRNA_STRUCTURE_TREE_HIT:
64 tree = db2HIT(structure);
65 break;
66
67 case VRNA_STRUCTURE_TREE_SHAPIRO_SHORT:
68 tree = db2Shapiro(structure, 0, 0, 0);
69 break;
70
71 case VRNA_STRUCTURE_TREE_SHAPIRO:
72 tree = db2Shapiro(structure, 1, 0, 0);
73 break;
74
75 case VRNA_STRUCTURE_TREE_SHAPIRO_EXT:
76 tree = db2Shapiro(structure, 1, 0, 1);
77 break;
78
79 case VRNA_STRUCTURE_TREE_SHAPIRO_WEIGHT:
80 tree = db2Shapiro(structure, 1, 1, 1);
81 break;
82
83 case VRNA_STRUCTURE_TREE_EXPANDED:
84 tree = db2ExpandedTree(structure);
85 break;
86
87 default:
88 break;
89 }
90 }
91
92 return tree;
93 }
94
95
96 PUBLIC char *
vrna_tree_string_unweight(const char * structure)97 vrna_tree_string_unweight(const char *structure)
98 {
99 unsigned int i, l;
100 char *tree;
101
102 tree = NULL;
103
104 if (structure) {
105 tree = (char *)vrna_alloc(strlen(structure) + 1);
106
107 i = l = 0;
108 while (structure[i]) {
109 if (!isdigit((int)structure[i]))
110 tree[l++] = structure[i];
111
112 i++;
113 }
114 tree[l] = '\0';
115 /* resize to actual length */
116 tree = (char *)vrna_realloc(tree, sizeof(char) * (l + 1));
117 }
118
119 return tree;
120 }
121
122
123 PUBLIC char *
vrna_tree_string_to_db(const char * structure)124 vrna_tree_string_to_db(const char *structure)
125 {
126 unsigned int i, j, k, l, n, w, *match_paren;
127 char id[10], *db;
128 const char *temp;
129 int o;
130 vrna_cstr_t tmp_struct;
131
132 db = NULL;
133
134 if (structure) {
135 n = strlen(structure);
136 tmp_struct = vrna_cstr(4 * n, NULL);
137 match_paren = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n / 2 + 1));
138
139 i = n - 1;
140 o = 0;
141 k = 9;
142 id[k] = '\0';
143
144 while (1) {
145 switch (structure[i]) {
146 case '(':
147 if (o < 0) {
148 vrna_message_warning("vrna_tree_string_to_db(): "
149 "Unbalanced parenthesis detected in tree string!"
150 "Can't convert back to dot-bracket notation");
151 goto tree_string_to_db_exit;
152 }
153
154 for (j = 0; j < match_paren[o]; j++)
155 vrna_cstr_printf(tmp_struct, "(");
156
157 match_paren[o--] = 0;
158 break;
159
160 case 'U':
161 w = 1;
162 sscanf(id + k, "%9u", &w);
163 for (j = 0; j < w; j++)
164 vrna_cstr_printf(tmp_struct, ".");
165
166 k = 9;
167 break;
168
169 case 'P':
170 w = 1;
171 sscanf(id + k, "%9u", &w);
172 for (j = 0; j < w; j++)
173 vrna_cstr_printf(tmp_struct, ")");
174
175 match_paren[o] = w;
176 k = 9;
177 break;
178
179 case 'R':
180 break;
181
182 case ')':
183 o++;
184 break;
185
186 default:
187 if (!isdigit((int)structure[i])) {
188 vrna_message_warning("vrna_tree_string_to_db(): "
189 "Unsupported character \"%c\" detected in tree string! "
190 "Can't convert back to dot-bracket notation",
191 structure[i]);
192 goto tree_string_to_db_exit;
193 }
194
195 if (k == 0) {
196 vrna_message_warning("vrna_tree_string_unexpand(): "
197 "Node weight too large! "
198 "Can't convert back to dot-bracket notation");
199 goto tree_string_to_db_exit;
200 }
201
202 id[--k] = structure[i];
203 break;
204 }
205
206 if (i == 0)
207 break;
208
209 i--;
210 }
211
212 temp = vrna_cstr_string(tmp_struct);
213 l = strlen(temp);
214 db = (char *)vrna_alloc(sizeof(char) * (l + 1));
215
216 for (i = 0; i < l; i++)
217 db[i] = temp[l - i - 1];
218
219 db[l] = '\0';
220
221 tree_string_to_db_exit:
222
223 vrna_cstr_free(tmp_struct);
224 free(match_paren);
225 }
226
227 return db;
228 }
229
230
231 /*
232 #####################################
233 # BEGIN OF STATIC HELPER FUNCTIONS #
234 #####################################
235 */
236 PRIVATE char *
annotate_enclosing_pairs(const char * structure)237 annotate_enclosing_pairs(const char *structure)
238 {
239 int *match;
240 int i, n, o, p;
241 char *annot;
242
243 annot = NULL;
244
245 if (structure) {
246 n = (int)strlen(structure);
247 annot = strdup(structure);
248 match = (int *)vrna_alloc(sizeof(int) * (n / 2 + 1));
249
250 for (i = o = 0; i < n; i++)
251 switch (annot[i]) {
252 case '.':
253 break;
254
255 case '(':
256 match[++o] = i;
257 break;
258
259 case ')':
260 p = i;
261 /* seek for outer-most pair in current helix */
262 while ((annot[p + 1] == ')') && (match[o - 1] == match[o] - 1)) {
263 p++;
264 o--;
265 }
266 /* annotate enclosing pair */
267 annot[p] = ']';
268 annot[match[o]] = '[';
269 i = p;
270 o--;
271 break;
272
273 default:
274 vrna_message_warning("annotate_enclosing_pairs: "
275 "Dot-braket string contains junk character \"%c\"",
276 annot[i]);
277 free(annot);
278 free(match);
279 return NULL;
280 }
281
282 free(match);
283 }
284
285 return annot;
286 }
287
288
289 PRIVATE char *
db2HIT(const char * structure)290 db2HIT(const char *structure)
291 {
292 unsigned int i, p, u, n;
293 char *HIT, *annot_struct;
294 vrna_cstr_t tmp_struct;
295
296 HIT = NULL;
297 annot_struct = annotate_enclosing_pairs(structure);
298 if (annot_struct) {
299 n = strlen(structure);
300 tmp_struct = vrna_cstr(4 * n, NULL);
301
302 /* start of tree */
303 vrna_cstr_printf(tmp_struct, "(");
304
305 for (i = p = u = 0; i < n; i++) {
306 switch (annot_struct[i]) {
307 case '.':
308 u++;
309 break;
310
311 case '[':
312 if (u > 0) {
313 vrna_cstr_printf(tmp_struct, "(U%d)", u);
314 u = 0;
315 }
316
317 vrna_cstr_printf(tmp_struct, "(");
318 break;
319
320 case ')':
321 if (u > 0) {
322 vrna_cstr_printf(tmp_struct, "(U%d)", u);
323 u = 0;
324 }
325
326 p++;
327 break;
328
329 case ']':
330 if (u > 0) {
331 vrna_cstr_printf(tmp_struct, "(U%d)", u);
332 u = 0;
333 }
334
335 vrna_cstr_printf(tmp_struct, "P%d)", p + 1);
336 p = 0;
337 break;
338 }
339 }
340
341 if (u > 0)
342 vrna_cstr_printf(tmp_struct, "(U%d)", u);
343
344 /* add root and end of tree */
345 vrna_cstr_printf(tmp_struct, "R)");
346
347 HIT = strdup(vrna_cstr_string(tmp_struct));
348
349 vrna_cstr_free(tmp_struct);
350 free(annot_struct);
351 }
352
353 return HIT;
354 }
355
356
357 PRIVATE char *
db2Shapiro(const char * structure,int with_stems,int with_weights,int with_external)358 db2Shapiro(const char *structure,
359 int with_stems,
360 int with_weights,
361 int with_external)
362 {
363 unsigned int i, n, p, lp, loops, pairs, unpaired, *loop_size, *loop, *bulge,
364 *loop_degree, *helix_size;
365 char *tree, *annot_struct;
366 vrna_cstr_t tmp_struct;
367
368 tree = NULL;
369 annot_struct = annotate_enclosing_pairs(structure);
370
371 if (annot_struct) {
372 n = strlen(structure);
373 tmp_struct = vrna_cstr(4 * n, NULL);
374
375 loop_size = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n / 2 + 1));
376 helix_size = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n / 2 + 1));
377 loop = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n / 2 + 1));
378 bulge = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n / 2 + 1));
379 loop_degree = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n / 2 + 1));
380
381 p = lp = loops = pairs = unpaired = 0;
382
383 for (i = 0; i < n; i++) {
384 switch (annot_struct[i]) {
385 case '.':
386 unpaired++;
387 loop_size[loop[lp]]++;
388 break;
389
390 case '[':
391 vrna_cstr_printf(tmp_struct, "(");
392
393 if (with_stems)
394 vrna_cstr_printf(tmp_struct, "(");
395
396 if ((i > 0) && (annot_struct[i - 1] == '(' || annot_struct[i - 1] == '['))
397 bulge[lp] = 1;
398
399 lp++;
400 loop_degree[++loops] = 1;
401 loop[lp] = loops;
402 bulge[lp] = 0;
403 break;
404
405 case ')':
406 if (annot_struct[i - 1] == ']')
407 bulge[lp] = 1;
408
409 p++;
410 break;
411
412 case ']':
413 if (annot_struct[i - 1] == ']')
414 bulge[lp] = 1;
415
416 switch (loop_degree[loop[lp]]) {
417 case 1:
418 vrna_cstr_printf(tmp_struct, "H"); /* hairpin */
419 break;
420
421 case 2:
422 if (bulge[lp] == 1)
423 vrna_cstr_printf(tmp_struct, "B"); /* bulge */
424 else
425 vrna_cstr_printf(tmp_struct, "I"); /* internal loop */
426
427 break;
428
429 default:
430 vrna_cstr_printf(tmp_struct, "M"); /* multiloop */
431 }
432
433 helix_size[loop[lp]] = p + 1;
434
435 if (with_weights)
436 vrna_cstr_printf(tmp_struct,
437 "%d",
438 loop_size[loop[lp]]);
439
440 vrna_cstr_printf(tmp_struct, ")");
441
442 if (with_stems) {
443 vrna_cstr_printf(tmp_struct, "S");
444 if (with_weights)
445 vrna_cstr_printf(tmp_struct,
446 "%d",
447 helix_size[loop[lp]]);
448
449 vrna_cstr_printf(tmp_struct, ")");
450 }
451
452 pairs += p + 1;
453 p = 0;
454 loop_degree[loop[--lp]]++;
455 break;
456 }
457 }
458
459 if ((with_external) && (loop_size[0])) {
460 if (with_weights)
461 tree = vrna_strdup_printf("((%sE%d)R)",
462 vrna_cstr_string(tmp_struct),
463 loop_size[0]);
464 else
465 tree = vrna_strdup_printf("((%sE)R)",
466 vrna_cstr_string(tmp_struct));
467 } else {
468 /* add root and end of tree */
469 tree = vrna_strdup_printf("(%sR)",
470 vrna_cstr_string(tmp_struct));
471 }
472
473 vrna_cstr_free(tmp_struct);
474 free(loop_degree);
475 free(loop_size);
476 free(helix_size);
477 free(loop);
478 free(bulge);
479 free(annot_struct);
480 }
481
482 return tree;
483 }
484
485
486 PRIVATE char *
db2ExpandedTree(const char * structure)487 db2ExpandedTree(const char *structure)
488 {
489 unsigned int i, n;
490 char *tree;
491 vrna_cstr_t tmp_struct;
492
493 n = strlen(structure);
494 tmp_struct = vrna_cstr(4 * n, NULL);
495
496 for (i = 0; i < n; i++) {
497 if (structure[i] == '(')
498 vrna_cstr_printf(tmp_struct, "(");
499 else if (structure[i] == ')')
500 vrna_cstr_printf(tmp_struct, "P)");
501 else
502 vrna_cstr_printf(tmp_struct, "(U)");
503 }
504
505 tree = vrna_strdup_printf("(%sR)",
506 vrna_cstr_string(tmp_struct));
507
508 vrna_cstr_free(tmp_struct);
509
510 return tree;
511 }
512