1 /*
2 * ViennaRNA/utils/structures.c
3 *
4 * Various functions to convert, parse, encode secondary structures
5 *
6 * c Ivo L Hofacker, Walter Fontana, Ronny Lorenz
7 * Vienna RNA package
8 */
9
10 #ifdef HAVE_CONFIG_H
11 #include "config.h"
12 #endif
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <math.h>
18 #include <limits.h>
19
20 #include "ViennaRNA/fold_vars.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/params/basic.h"
23 #include "ViennaRNA/gquad.h"
24 #include "ViennaRNA/utils/structures.h"
25 #include "ViennaRNA/MEA.h"
26
27 #ifdef __GNUC__
28 # define INLINE inline
29 #else
30 # define INLINE
31 #endif
32
33
34 #define SHAPES_OPEN '['
35 #define SHAPES_CLOSE ']'
36 #define SHAPES_UP '_'
37
38 struct shrep {
39 struct shrep *pred;
40 struct shrep *succ;
41 char character;
42 };
43
44
45 /*
46 #################################
47 # PRIVATE FUNCTION DECLARATIONS #
48 #################################
49 */
50 PRIVATE vrna_ep_t *
51 wrap_get_plist(vrna_mx_pf_t *matrices,
52 int length,
53 int *index,
54 short *S,
55 vrna_exp_param_t *pf_params,
56 double cut_off);
57
58
59 PRIVATE vrna_ep_t *
60 wrap_plist(vrna_fold_compound_t *vc,
61 double cut_off);
62
63
64 PRIVATE void
65 assign_elements_pair(short *pt,
66 int i,
67 int j,
68 char *elements);
69
70
71 PRIVATE INLINE void
72 flatten_brackets(char *string,
73 const char pair[3],
74 const char target[3]);
75
76
77 PRIVATE INLINE int
78 extract_pairs(short *pt,
79 const char *structure,
80 const char *pair);
81
82
83 PRIVATE INLINE struct shrep *
84 shrep_insert_after(struct shrep *target,
85 char character);
86
87
88 PRIVATE INLINE struct shrep *
89 shrep_insert_before(struct shrep *target,
90 char character);
91
92
93 PRIVATE INLINE void
94 shrep_concat(struct shrep **target,
95 struct shrep *suffix);
96
97
98 PRIVATE INLINE char *
99 db2shapes(const char *structure,
100 unsigned int level);
101
102
103 PRIVATE INLINE char *
104 db2shapes_pt(const short *pt,
105 unsigned int n,
106 unsigned int level);
107
108
109 PRIVATE INLINE struct shrep *
110 get_shrep(const short *pt,
111 unsigned int start,
112 unsigned int end,
113 unsigned int level);
114
115
116 /*
117 #################################
118 # BEGIN OF FUNCTION DEFINITIONS #
119 #################################
120 */
121 PUBLIC char *
vrna_db_pack(const char * struc)122 vrna_db_pack(const char *struc)
123 {
124 /* 5:1 compression using base 3 encoding */
125 int i, j, l, pi;
126 unsigned char *packed;
127
128 l = (int)strlen(struc);
129 packed = (unsigned char *)vrna_alloc(((l + 4) / 5 + 1) * sizeof(unsigned char));
130
131 j = i = pi = 0;
132 while (i < l) {
133 register int p;
134 for (p = pi = 0; pi < 5; pi++) {
135 p *= 3;
136 switch (struc[i]) {
137 case '(':
138 case '\0':
139 break;
140 case ')':
141 p++;
142 break;
143 case '.':
144 p += 2;
145 break;
146 default:
147 vrna_message_warning("vrna_db_pack: "
148 "illegal character %c at position %d in structure\n%s",
149 struc[i],
150 i + 1,
151 struc);
152 return NULL;
153 }
154 if (i < l)
155 i++;
156 }
157 packed[j++] = (unsigned char)(p + 1); /* never use 0, so we can use
158 * strcmp() etc. */
159 }
160 packed[j] = '\0'; /* for str*() functions */
161 return (char *)packed;
162 }
163
164
165 PUBLIC char *
vrna_db_unpack(const char * packed)166 vrna_db_unpack(const char *packed)
167 {
168 /* 5:1 compression using base 3 encoding */
169 int i, j, l;
170 char *struc;
171 unsigned const char *pp;
172 char code[3] = {
173 '(', ')', '.'
174 };
175
176 l = (int)strlen(packed);
177 pp = (const unsigned char *)packed;
178 struc = (char *)vrna_alloc((l * 5 + 1) * sizeof(char)); /* up to 4 byte extra */
179
180 for (i = j = 0; i < l; i++) {
181 register int p, c, k;
182
183 p = (int)pp[i] - 1;
184 for (k = 4; k >= 0; k--) {
185 c = p % 3;
186 p /= 3;
187 struc[j + k] = code[c];
188 }
189 j += 5;
190 }
191 struc[j--] = '\0';
192 /* strip trailing ( */
193 while ((j >= 0) &&
194 (struc[j] == '('))
195 struc[j--] = '\0';
196
197 return struc;
198 }
199
200
201 PUBLIC short *
vrna_ptable(const char * structure)202 vrna_ptable(const char *structure)
203 {
204 return vrna_ptable_from_string(structure, VRNA_BRACKETS_RND);
205 }
206
207
208 PUBLIC short *
vrna_pt_pk_get(const char * structure)209 vrna_pt_pk_get(const char *structure)
210 {
211 return vrna_ptable_from_string(structure, VRNA_BRACKETS_RND | VRNA_BRACKETS_SQR);
212 }
213
214
215 PUBLIC short *
vrna_pt_snoop_get(const char * structure)216 vrna_pt_snoop_get(const char *structure)
217 {
218 return vrna_ptable_from_string(structure, VRNA_BRACKETS_ANG);
219 }
220
221
222 PUBLIC short *
vrna_pt_ali_get(const char * structure)223 vrna_pt_ali_get(const char *structure)
224 {
225 return vrna_ptable_from_string(structure,
226 VRNA_BRACKETS_RND | VRNA_BRACKETS_ANG | VRNA_BRACKETS_SQR);
227 }
228
229
230 PUBLIC short *
vrna_ptable_copy(const short * pt)231 vrna_ptable_copy(const short *pt)
232 {
233 short *table = (short *)vrna_alloc(sizeof(short) * (pt[0] + 2));
234
235 memcpy(table, pt, sizeof(short) * (pt[0] + 2));
236 return table;
237 }
238
239
240 PUBLIC int *
vrna_loopidx_from_ptable(const short * pt)241 vrna_loopidx_from_ptable(const short *pt)
242 {
243 /* number each position by which loop it belongs to (positions start
244 * at 1) */
245 int i, hx, l, nl;
246 int length;
247 int *stack = NULL;
248 int *loop = NULL;
249
250 length = pt[0];
251 stack = (int *)vrna_alloc(sizeof(int) * (length + 1));
252 loop = (int *)vrna_alloc(sizeof(int) * (length + 2));
253 hx = l = nl = 0;
254
255 for (i = 1; i <= length; i++) {
256 if ((pt[i] != 0) && (i < pt[i])) {
257 /* ( */
258 nl++;
259 l = nl;
260 stack[hx++] = i;
261 }
262
263 loop[i] = l;
264
265 if ((pt[i] != 0) && (i > pt[i])) {
266 /* ) */
267 --hx;
268 if (hx > 0)
269 l = loop[stack[hx - 1]]; /* index of enclosing loop */
270 else
271 l = 0; /* external loop has index 0 */
272
273 if (hx < 0) {
274 vrna_message_warning("vrna_loopidx_from_ptable: "
275 "unbalanced brackets in make_pair_table");
276 free(stack);
277 return NULL;
278 }
279 }
280 }
281 loop[0] = nl;
282 free(stack);
283 return loop;
284 }
285
286
287 PUBLIC short *
vrna_pt_pk_remove(const short * ptable,unsigned int options)288 vrna_pt_pk_remove(const short *ptable,
289 unsigned int options)
290 {
291 short *pt = NULL;
292
293 if (ptable) {
294 char *mea_structure;
295 unsigned int i, j, n;
296 vrna_ep_t *pairs;
297
298 n = (unsigned int)ptable[0];
299 mea_structure = (char *)vrna_alloc(sizeof(char) * (n + 1));
300 pairs = (vrna_ep_t *)vrna_alloc(sizeof(vrna_ep_t) * (n + 1));
301
302 /* compose list of pairs to be used in MEA() function */
303 for (j = 0, i = 1; i <= n; i++)
304 if (ptable[i] > i) {
305 pairs[j].i = i;
306 pairs[j].j = ptable[i];
307 pairs[j].p = 1.;
308 pairs[j].type = VRNA_PLIST_TYPE_BASEPAIR;
309 j++;
310 }
311
312 pairs[j].i = 0;
313 pairs[j].j = 0;
314 pairs[j].p = 0;
315 pairs[j].type = VRNA_PLIST_TYPE_BASEPAIR;
316
317 /* use MEA() implementation to remove crossing base pairs */
318 memset(mea_structure, '.', n);
319
320 (void)MEA(pairs, mea_structure, 2.0);
321
322 /* convert dot-bracket structure to pair table */
323 pt = vrna_ptable(mea_structure);
324
325 free(mea_structure);
326 free(pairs);
327 }
328
329 return pt;
330 }
331
332
333 PUBLIC char *
vrna_db_pk_remove(const char * structure,unsigned int options)334 vrna_db_pk_remove(const char *structure,
335 unsigned int options)
336 {
337 char *s;
338 short *pt_pk, *pt;
339
340 s = NULL;
341
342 if (structure) {
343 pt_pk = vrna_ptable_from_string(structure, options & VRNA_BRACKETS_ANY);
344 pt = vrna_pt_pk_remove(pt_pk, options);
345 s = vrna_db_from_ptable(pt);
346
347 free(pt_pk);
348 free(pt);
349 }
350
351 return s;
352 }
353
354
355 PUBLIC char *
vrna_db_from_ptable(short * pt)356 vrna_db_from_ptable(short *pt)
357 {
358 unsigned int n;
359 int i;
360 char *dotbracket = NULL;
361
362 if (pt) {
363 n = (unsigned int)pt[0];
364 if (n > 0) {
365 dotbracket = (char *)vrna_alloc((n + 1) * sizeof(char));
366 memset(dotbracket, '.', n);
367
368 for (i = 1; i <= n; i++) {
369 if (pt[i] > i) {
370 dotbracket[i - 1] = '(';
371 dotbracket[pt[i] - 1] = ')';
372 }
373 }
374 dotbracket[i - 1] = '\0';
375 }
376 }
377
378 return dotbracket;
379 }
380
381
382 PUBLIC void
vrna_db_flatten(char * string,unsigned int options)383 vrna_db_flatten(char *string,
384 unsigned int options)
385 {
386 vrna_db_flatten_to(string, "()", options);
387 }
388
389
390 PUBLIC void
vrna_db_flatten_to(char * string,const char target[3],unsigned int options)391 vrna_db_flatten_to(char *string,
392 const char target[3],
393 unsigned int options)
394 {
395 if (string) {
396 if (options & VRNA_BRACKETS_RND)
397 flatten_brackets(string, "()", target);
398
399 if (options & VRNA_BRACKETS_ANG)
400 flatten_brackets(string, "<>", target);
401
402 if (options & VRNA_BRACKETS_CLY)
403 flatten_brackets(string, "{}", target);
404
405 if (options & VRNA_BRACKETS_SQR)
406 flatten_brackets(string, "<>", target);
407
408 if (options & VRNA_BRACKETS_ALPHA) {
409 char pairs[3];
410
411 for (int i = 65; i < 91; i++) {
412 pairs[0] = (char)i;
413 pairs[1] = (char)(i + 32);
414 pairs[2] = '\0';
415 flatten_brackets(string, pairs, target);
416 }
417 }
418 }
419 }
420
421
422 PUBLIC short *
vrna_ptable_from_string(const char * string,unsigned int options)423 vrna_ptable_from_string(const char *string,
424 unsigned int options)
425 {
426 char pairs[3];
427 short *pt;
428 unsigned int i, n;
429
430 n = strlen(string);
431
432 if (n > SHRT_MAX) {
433 vrna_message_warning("vrna_ptable_from_string: "
434 "Structure too long to be converted to pair table (n=%d, max=%d)",
435 n,
436 SHRT_MAX);
437 return NULL;
438 }
439
440 pt = (short *)vrna_alloc(sizeof(short) * (n + 2));
441 pt[0] = (short)n;
442
443
444 if ((options & VRNA_BRACKETS_RND) &&
445 (!extract_pairs(pt, string, "()"))) {
446 free(pt);
447 return NULL;
448 }
449
450 if ((options & VRNA_BRACKETS_ANG) &&
451 (!extract_pairs(pt, string, "<>"))) {
452 free(pt);
453 return NULL;
454 }
455
456 if ((options & VRNA_BRACKETS_CLY) &&
457 (!extract_pairs(pt, string, "{}"))) {
458 free(pt);
459 return NULL;
460 }
461
462 if ((options & VRNA_BRACKETS_SQR) &&
463 (!extract_pairs(pt, string, "[]"))) {
464 free(pt);
465 return NULL;
466 }
467
468 if (options & VRNA_BRACKETS_ALPHA) {
469 for (i = 65; i < 91; i++) {
470 pairs[0] = (char)i;
471 pairs[1] = (char)(i + 32);
472 pairs[2] = '\0';
473 if (!extract_pairs(pt, string, pairs)) {
474 free(pt);
475 return NULL;
476 }
477 }
478 }
479
480 return pt;
481 }
482
483
484 PUBLIC char *
vrna_db_from_WUSS(const char * wuss)485 vrna_db_from_WUSS(const char *wuss)
486 {
487 char *db, *tmp;
488 short *pt;
489 int pos, L, l[3], i, p, q;
490 unsigned int n;
491
492 db = NULL;
493
494 if (wuss) {
495 n = strlen(wuss);
496
497 /*
498 * Note, in WUSS notation, matching pairs of (), <>, {}, [] are allowed but must not
499 * cross! Thus, we can simply flatten all brackets to ().
500 */
501 tmp = (char *)vrna_alloc(sizeof(char) * (n + 1));
502 tmp = (char *)memcpy(tmp, wuss, n + 1);
503
504 vrna_db_flatten(tmp, VRNA_BRACKETS_DEFAULT);
505
506 /* now convert flattened structure string to pair-table (removes pseudo-knots) */
507 pt = vrna_ptable_from_string(tmp, VRNA_BRACKETS_RND);
508
509 /* convert back to dot-bracket (replaces all special characters for unpaired positions) */
510 db = vrna_db_from_ptable(pt);
511
512 /* check for G-Quadruplexes, annotated as G quartets */
513 q = 1;
514 while ((pos = parse_gquad(wuss + q - 1, &L, l)) > 0) {
515 q += pos - 1;
516 p = q - 4 * L - l[0] - l[1] - l[2] + 1;
517
518 if (q > n)
519 break;
520
521 /* re-insert G-Quadruplex */
522 for (i = 0; i < L; i++) {
523 db[p + i - 1] = '+';
524 db[p + L + l[0] + i - 1] = '+';
525 db[p + (2 * L) + l[0] + l[1] + i - 1] = '+';
526 db[p + (3 * L) + l[0] + l[1] + l[2] + i - 1] = '+';
527 }
528 q++;
529 }
530
531 free(pt);
532 free(tmp);
533 }
534
535 return db;
536 }
537
538
539 /*---------------------------------------------------------------------------*/
540
541 PUBLIC int
vrna_bp_distance(const char * str1,const char * str2)542 vrna_bp_distance(const char *str1,
543 const char *str2)
544 {
545 /* dist = {number of base pairs in one structure but not in the other} */
546 /* same as edit distance with pair_open pair_close as move set */
547 int dist;
548 short i, l;
549 short *t1, *t2;
550
551 dist = 0;
552 t1 = vrna_ptable(str1);
553 t2 = vrna_ptable(str2);
554
555 if (t1 && t2) {
556 l = (t1[0] < t2[0]) ? t1[0] : t2[0]; /* minimum of the two lengths */
557
558 for (i = 1; i <= l; i++)
559 if (t1[i] != t2[i]) {
560 if (t1[i] > i)
561 dist++;
562
563 if (t2[i] > i)
564 dist++;
565 }
566 }
567
568 free(t1);
569 free(t2);
570
571 return dist;
572 }
573
574
575 PUBLIC double
vrna_dist_mountain(const char * str1,const char * str2,unsigned int p)576 vrna_dist_mountain(const char *str1,
577 const char *str2,
578 unsigned int p)
579 {
580 short *pt1, *pt2;
581 unsigned int i, n;
582 double distance, w, *f1, *f2;
583
584 distance = -1.;
585 f1 = NULL;
586 f2 = NULL;
587
588 if ((str1) && (str2)) {
589 n = strlen(str1);
590
591 if (n != strlen(str2)) {
592 vrna_message_warning("vrna_dist_mountain: input structures have unequal lengths!");
593 return distance;
594 }
595
596 pt1 = vrna_ptable(str1);
597 pt2 = vrna_ptable(str2);
598 f1 = (double *)vrna_alloc(sizeof(double) * (n + 1));
599 f2 = (double *)vrna_alloc(sizeof(double) * (n + 1));
600
601 /* count (mountain)heights for positions 1 <= i <= n */
602 for (w = 0., i = 1; i <= n; i++) {
603 if (pt1[i] == 0)
604 continue;
605
606 if (pt1[i] > i)
607 w += 1. / (double)(pt1[i] - i);
608 else
609 w -= 1. / (double)(i - pt1[i]);
610
611 f1[i] = w;
612 }
613
614 for (w = 0., i = 1; i <= n; i++) {
615 if (pt2[i] == 0)
616 continue;
617
618 if (pt2[i] > i)
619 w += 1. / (double)(pt2[i] - i);
620 else
621 w -= 1. / (double)(i - pt2[i]);
622
623 f2[i] = w;
624 }
625
626
627 /* finally, compute L_p-norm */
628 for (distance = 0., i = 1; i <= n; i++)
629 distance += pow(fabs(f1[i] - f2[i]), (double)p);
630
631 distance = pow(distance, 1. / (double)p);
632
633 free(pt1);
634 free(pt2);
635 free(f1);
636 free(f2);
637 }
638
639 return distance;
640 }
641
642
643 /* get a matrix containing the number of basepairs of a reference structure for each interval [i,j] with i<j
644 * access it via iindx!!!
645 */
646 PUBLIC unsigned int *
vrna_refBPcnt_matrix(const short * reference_pt,unsigned int turn)647 vrna_refBPcnt_matrix(const short *reference_pt,
648 unsigned int turn)
649 {
650 unsigned int i, j, k, ij, length;
651 int *iindx;
652 unsigned int *array;
653 unsigned int size;
654
655 length = (unsigned int)reference_pt[0];
656 size = ((length + 1) * (length + 2)) / 2;
657 iindx = vrna_idx_row_wise(length);
658 array = (unsigned int *)vrna_alloc(sizeof(unsigned int) * size);
659 /* matrix containing number of basepairs of reference structure1 in interval [i,j] */;
660 for (k = 0; k <= turn; k++)
661 for (i = 1; i <= length - k; i++) {
662 j = i + k;
663 ij = iindx[i] - j;
664 array[ij] = 0;
665 }
666
667 for (i = length - turn - 1; i >= 1; i--)
668 for (j = i + turn + 1; j <= length; j++) {
669 int bps;
670 ij = iindx[i] - j;
671 bps = array[ij + 1];
672 if ((i <= (unsigned int)reference_pt[j]) && ((unsigned int)reference_pt[j] < j))
673 bps++;
674
675 array[ij] = bps;
676 }
677 free(iindx);
678 return array;
679 }
680
681
682 PUBLIC unsigned int *
vrna_refBPdist_matrix(const short * pt1,const short * pt2,unsigned int turn)683 vrna_refBPdist_matrix(const short *pt1,
684 const short *pt2,
685 unsigned int turn)
686 {
687 unsigned int *array;
688 unsigned int n, size, i, j, ij, d;
689
690 n = (unsigned int)pt1[0];
691 size = ((n + 1) * (n + 2)) / 2;
692 array = (unsigned int *)vrna_alloc(sizeof(unsigned int) * size);
693 int *iindx = vrna_idx_row_wise(n);
694 for (i = n - turn - 1; i >= 1; i--) {
695 d = 0;
696 for (j = i + turn + 1; j <= n; j++) {
697 ij = iindx[i] - j;
698 d = array[ij + 1];
699 if (pt1[j] != pt2[j]) {
700 if (i <= (unsigned int)pt1[j] && (unsigned int)pt1[j] < j)
701 /* we got an additional base pair in reference structure 1 */
702 d++;
703
704 if (i <= (unsigned int)pt2[j] && (unsigned int)pt2[j] < j)
705 /* we got another base pair in reference structure 2 */
706 d++;
707 }
708
709 array[ij] = d;
710 }
711 }
712 free(iindx);
713 return array;
714 }
715
716
717 PUBLIC char
vrna_bpp_symbol(const float * x)718 vrna_bpp_symbol(const float *x)
719 {
720 /* if( ((x[1]-x[2])*(x[1]-x[2]))<0.1&&x[0]<=0.677) return '|'; */
721 if (x[0] > 0.667)
722 return '.';
723
724 if (x[1] > 0.667)
725 return '(';
726
727 if (x[2] > 0.667)
728 return ')';
729
730 if ((x[1] + x[2]) > x[0]) {
731 if ((x[1] / (x[1] + x[2])) > 0.667)
732 return '{';
733
734 if ((x[2] / (x[1] + x[2])) > 0.667)
735 return '}';
736 else
737 return '|';
738 }
739
740 if (x[0] > (x[1] + x[2]))
741 return ',';
742
743 return ':';
744 }
745
746
747 PUBLIC char *
vrna_db_from_probs(const FLT_OR_DBL * p,unsigned int length)748 vrna_db_from_probs(const FLT_OR_DBL *p,
749 unsigned int length)
750 {
751 int i, j, *index;
752 float P[3]; /* P[][0] unpaired, P[][1] upstream p, P[][2] downstream p */
753 char *s;
754
755 index = vrna_idx_row_wise(length);
756 s = (char *)vrna_alloc(sizeof(char) * (length + 1));
757
758 for (j = 1; j <= length; j++) {
759 P[0] = 1.0;
760 P[1] = P[2] = 0.0;
761 for (i = 1; i < j; i++) {
762 P[2] += (float)p[index[i] - j]; /* j is paired downstream */
763 P[0] -= (float)p[index[i] - j]; /* j is unpaired */
764 }
765 for (i = j + 1; i <= length; i++) {
766 P[1] += (float)p[index[j] - i]; /* j is paired upstream */
767 P[0] -= (float)p[index[j] - i]; /* j is unpaired */
768 }
769 s[j - 1] = vrna_bpp_symbol(P);
770 }
771 s[length] = '\0';
772 free(index);
773
774 return s;
775 }
776
777
778 PUBLIC void
vrna_letter_structure(char * structure,vrna_bp_stack_t * bp,unsigned int length)779 vrna_letter_structure(char *structure,
780 vrna_bp_stack_t *bp,
781 unsigned int length)
782 {
783 int n, k, x, y;
784 char alpha[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
785
786 if (length > 0) {
787 memset(structure, '.', length);
788 structure[length] = '\0';
789
790 for (n = 0, k = 1; k <= bp[0].i; k++) {
791 y = bp[k].j;
792 x = bp[k].i;
793 if ((x - 1 > 0) && (y + 1 <= length)) {
794 if (structure[x - 2] != ' ' && structure[y] == structure[x - 2]) {
795 structure[x - 1] = structure[x - 2];
796 structure[y - 1] = structure[x - 1];
797 continue;
798 }
799 }
800
801 if ((structure[x] != ' ') && (structure[y - 2] == structure[x])) {
802 structure[x - 1] = structure[x];
803 structure[y - 1] = structure[x - 1];
804 continue;
805 }
806
807 n++;
808 structure[x - 1] = alpha[n - 1];
809 structure[y - 1] = alpha[n - 1];
810 }
811 }
812 }
813
814
815 PUBLIC char *
vrna_db_from_bp_stack(vrna_bp_stack_t * bp,unsigned int length)816 vrna_db_from_bp_stack(vrna_bp_stack_t *bp,
817 unsigned int length)
818 {
819 int k, i, j, temp;
820 char *structure;
821
822 structure = vrna_alloc(sizeof(char) * (length + 1));
823
824 if (length > 0)
825 memset(structure, '.', length);
826
827 structure[length] = '\0';
828
829 for (k = 1; k <= bp[0].i; k++) {
830 i = bp[k].i;
831 j = bp[k].j;
832 if (i > length)
833 i -= length;
834
835 if (j > length)
836 j -= length;
837
838 if (i > j) {
839 temp = i;
840 i = j;
841 j = temp;
842 }
843
844 if (i == j) {
845 /* Gquad bonds are marked as bp[i].i == bp[i].j */
846 structure[i - 1] = '+';
847 } else {
848 /* the following ones are regular base pairs */
849 structure[i - 1] = '(';
850 structure[j - 1] = ')';
851 }
852 }
853 return structure;
854 }
855
856
857 PUBLIC vrna_ep_t *
vrna_plist(const char * struc,float pr)858 vrna_plist(const char *struc,
859 float pr)
860 {
861 /* convert bracket string to plist */
862 short *pt;
863 int i, k = 0, size, n;
864 vrna_ep_t *gpl, *ptr, *pl;
865
866 size = strlen(struc);
867 n = 2;
868
869 pt = vrna_ptable(struc);
870 pl = (vrna_ep_t *)vrna_alloc(n * size * sizeof(vrna_ep_t));
871 for (i = 1; i < size; i++) {
872 if (pt[i] > i) {
873 (pl)[k].i = i;
874 (pl)[k].j = pt[i];
875 (pl)[k].p = pr;
876 (pl)[k++].type = VRNA_PLIST_TYPE_BASEPAIR;
877 }
878 }
879
880 gpl = get_plist_gquad_from_db(struc, pr);
881 for (ptr = gpl; ptr->i != 0; ptr++) {
882 if (k == n * size - 1) {
883 n *= 2;
884 pl = (vrna_ep_t *)vrna_realloc(pl, n * size * sizeof(vrna_ep_t));
885 }
886
887 (pl)[k].i = ptr->i;
888 (pl)[k].j = ptr->j;
889 (pl)[k].p = ptr->p;
890 (pl)[k++].type = ptr->type;
891 }
892 free(gpl);
893
894 (pl)[k].i = 0;
895 (pl)[k].j = 0;
896 (pl)[k].p = 0.;
897 (pl)[k++].type = 0.;
898 free(pt);
899 pl = (vrna_ep_t *)vrna_realloc(pl, k * sizeof(vrna_ep_t));
900
901 return pl;
902 }
903
904
905 PUBLIC vrna_ep_t *
vrna_plist_from_probs(vrna_fold_compound_t * vc,double cut_off)906 vrna_plist_from_probs(vrna_fold_compound_t *vc,
907 double cut_off)
908 {
909 if (!vc)
910 vrna_message_warning("vrna_pl_get_from_pr: run vrna_pf_fold first!");
911 else if (!vc->exp_matrices->probs)
912 vrna_message_warning("vrna_pl_get_from_pr: probs==NULL!");
913 else
914 return wrap_plist(vc, cut_off);
915
916 return NULL;
917 }
918
919
920 PUBLIC char *
vrna_db_from_plist(vrna_ep_t * pairs,unsigned int n)921 vrna_db_from_plist(vrna_ep_t *pairs,
922 unsigned int n)
923 {
924 vrna_ep_t *ptr;
925 char *structure = NULL;
926
927 if (n > 0) {
928 structure = (char *)vrna_alloc(sizeof(char) * (n + 1));
929 memset(structure, '.', n);
930 structure[n] = '\0';
931
932 for (ptr = pairs; (*ptr).i; ptr++) {
933 if (((*ptr).i < n) && ((*ptr).j <= n)) {
934 structure[(*ptr).i - 1] = '(';
935 structure[(*ptr).j - 1] = ')';
936 }
937 }
938 }
939
940 return structure;
941 }
942
943
944 PUBLIC int
vrna_plist_append(vrna_ep_t ** target,const vrna_ep_t * list)945 vrna_plist_append(vrna_ep_t **target,
946 const vrna_ep_t *list)
947 {
948 int size1, size2;
949 const vrna_ep_t *ptr;
950
951 if ((target) && (list)) {
952 size1 = size2 = 0;
953
954 if (*target)
955 for (ptr = *target; ptr->i; size1++, ptr++);
956
957 for (ptr = list; ptr->i; size2++, ptr++);
958
959 *target = (vrna_ep_t *)vrna_realloc(*target, sizeof(vrna_ep_t) * (size1 + size2 + 1));
960
961 if (*target) {
962 memcpy(*target + size1, list, sizeof(vrna_ep_t) * size2);
963 (*target)[size1 + size2].i = (*target)[size1 + size2].j = 0;
964 return 1;
965 }
966 }
967
968 return 0;
969 }
970
971
972 PUBLIC vrna_hx_t *
vrna_hx_from_ptable(short * pt)973 vrna_hx_from_ptable(short *pt)
974 {
975 int i, k, n, l, s, *stack;
976 vrna_hx_t *list;
977
978 n = pt[0];
979 l = 0;
980 s = 1;
981 list = (vrna_hx_t *)vrna_alloc(sizeof(vrna_hx_t) * n / 2);
982 stack = (int *)vrna_alloc(sizeof(int) * n / 2);
983
984 stack[s] = 1;
985
986 do {
987 for (i = stack[s--]; i <= n; i++) {
988 if (pt[i] > (short)i) {
989 /* found a base pair */
990 k = i;
991 /* go through stack */
992 for (; pt[k + 1] == pt[k] - 1; k++);
993 list[l].start = i;
994 list[l].end = pt[i];
995 list[l].length = k - i + 1;
996 list[l].up5 = list[l].up3 = 0;
997 l++;
998 stack[++s] = pt[i] + 1;
999 stack[++s] = k + 1;
1000 break;
1001 } else if (pt[i]) {
1002 /* end of region */
1003 break;
1004 }
1005 }
1006 } while (s > 0);
1007
1008 list = vrna_realloc(list, (l + 1) * sizeof(vrna_hx_t));
1009 list[l].start = list[l].end = list[l].length = list[l].up5 = list[l].up3 = 0;
1010
1011 free(stack);
1012 return list;
1013 }
1014
1015
1016 PUBLIC vrna_hx_t *
vrna_hx_merge(const vrna_hx_t * list,int maxdist)1017 vrna_hx_merge(const vrna_hx_t *list,
1018 int maxdist)
1019 {
1020 int merged, i, j, s, neighbors, n;
1021 vrna_hx_t *merged_list;
1022
1023 for (n = 0; list[n].length > 0; n++); /* check size of list */
1024
1025 merged_list = (vrna_hx_t *)vrna_alloc(sizeof(vrna_hx_t) * (n + 1));
1026 memcpy(merged_list, list, sizeof(vrna_hx_t) * (n + 1));
1027
1028 s = n + 1;
1029
1030 do {
1031 merged = 0;
1032 for (i = 1; merged_list[i].length > 0; i++) {
1033 /*
1034 * GOAL: merge two consecutive helices i and i-1, if i-1
1035 * subsumes i, and not more than i
1036 */
1037
1038 /* 1st, check for neighbors */
1039 neighbors = 0;
1040 for (j = i + 1; merged_list[j].length > 0; j++) {
1041 if (merged_list[j].start > merged_list[i - 1].end)
1042 break;
1043
1044 if (merged_list[j].start < merged_list[i].end)
1045 continue;
1046
1047 neighbors = 1;
1048 }
1049 if (neighbors)
1050 continue;
1051
1052 /* check if we may merge i with i-1 */
1053 if (merged_list[i].end < merged_list[i - 1].end) {
1054 merged_list[i - 1].up5 += merged_list[i].start
1055 - merged_list[i - 1].start
1056 - merged_list[i - 1].length
1057 - merged_list[i - 1].up5
1058 + merged_list[i].up5;
1059 merged_list[i - 1].up3 += merged_list[i - 1].end
1060 - merged_list[i - 1].length
1061 - merged_list[i - 1].up3
1062 - merged_list[i].end
1063 + merged_list[i].up3;
1064 merged_list[i - 1].length += merged_list[i].length;
1065 /* splice out helix i */
1066 memmove(merged_list + i, merged_list + i + 1, sizeof(vrna_hx_t) * (n - i));
1067 s--;
1068 merged = 1;
1069 break;
1070 }
1071 }
1072 } while (merged);
1073
1074 merged_list = vrna_realloc(merged_list, sizeof(vrna_hx_t) * s);
1075
1076 return merged_list;
1077 }
1078
1079
1080 PUBLIC char *
vrna_db_to_element_string(const char * structure)1081 vrna_db_to_element_string(const char *structure)
1082 {
1083 char *elements;
1084 int n, i;
1085 short *pt;
1086
1087 elements = NULL;
1088
1089 if (structure) {
1090 n = (int)strlen(structure);
1091 pt = vrna_ptable(structure);
1092 elements = (char *)vrna_alloc(sizeof(char) * (n + 1));
1093
1094 for (i = 1; i <= n; i++) {
1095 if (!pt[i]) {
1096 /* mark nucleotides in exterior loop */
1097 elements[i - 1] = 'e';
1098 } else {
1099 assign_elements_pair(pt, i, pt[i], elements);
1100 i = pt[i];
1101 }
1102 }
1103
1104 elements[n] = '\0';
1105 free(pt);
1106 }
1107
1108 return elements;
1109 }
1110
1111
1112 PUBLIC char *
vrna_abstract_shapes(const char * structure,unsigned int level)1113 vrna_abstract_shapes(const char *structure,
1114 unsigned int level)
1115 {
1116 if (structure) {
1117 if (level > 5)
1118 level = 5;
1119
1120 return db2shapes(structure, level);
1121 }
1122
1123 return NULL;
1124 }
1125
1126
1127 PUBLIC char *
vrna_abstract_shapes_pt(const short * pt,unsigned int level)1128 vrna_abstract_shapes_pt(const short *pt,
1129 unsigned int level)
1130 {
1131 if (pt) {
1132 if (level > 5)
1133 level = 5;
1134
1135 return db2shapes_pt(pt, (unsigned int)pt[0], level);
1136 }
1137
1138 return NULL;
1139 }
1140
1141
1142 /*
1143 #####################################
1144 # BEGIN OF STATIC HELPER FUNCTIONS #
1145 #####################################
1146 */
1147 PRIVATE INLINE struct shrep *
shrep_insert_after(struct shrep * target,char character)1148 shrep_insert_after(struct shrep *target,
1149 char character)
1150 {
1151 struct shrep *entry, *ptr;
1152
1153 entry = (struct shrep *)vrna_alloc(sizeof(struct shrep));
1154 entry->character = character;
1155 entry->pred = NULL;
1156 entry->succ = NULL;
1157
1158 if (target) {
1159 for (ptr = target; ptr->succ; ptr = ptr->succ);
1160 entry->pred = ptr;
1161 ptr->succ = entry;
1162 }
1163
1164 return entry;
1165 }
1166
1167
1168 PRIVATE INLINE struct shrep *
shrep_insert_before(struct shrep * target,char character)1169 shrep_insert_before(struct shrep *target,
1170 char character)
1171 {
1172 struct shrep *entry, *ptr;
1173
1174 entry = (struct shrep *)vrna_alloc(sizeof(struct shrep));
1175 entry->character = character;
1176 entry->pred = NULL;
1177 entry->succ = NULL;
1178
1179 if (target) {
1180 for (ptr = target; ptr->pred; ptr = ptr->pred);
1181 entry->succ = ptr;
1182 ptr->pred = entry;
1183 }
1184
1185 return entry;
1186 }
1187
1188
1189 PRIVATE INLINE void
shrep_concat(struct shrep ** target,struct shrep * suffix)1190 shrep_concat(struct shrep **target,
1191 struct shrep *suffix)
1192 {
1193 struct shrep *ptr_s, *ptr_p;
1194
1195 ptr_p = *target;
1196 ptr_s = suffix;
1197
1198 if (ptr_p)
1199 for (; ptr_p->succ; ptr_p = ptr_p->succ);
1200
1201 if (ptr_s)
1202 for (; ptr_s->pred; ptr_s = ptr_s->pred);
1203
1204 if (!ptr_p)
1205 *target = ptr_s;
1206 else if (ptr_s) {
1207 ptr_p->succ = ptr_s;
1208 ptr_s->pred = ptr_p;
1209 }
1210 }
1211
1212
1213 PRIVATE INLINE char *
db2shapes(const char * structure,unsigned int level)1214 db2shapes(const char *structure,
1215 unsigned int level)
1216 {
1217 unsigned int n;
1218 char *SHAPE;
1219 short *pt;
1220
1221 n = strlen(structure);
1222 pt = vrna_ptable(structure);
1223
1224 SHAPE = db2shapes_pt(pt, n, level);
1225
1226 free(pt);
1227
1228 return SHAPE;
1229 }
1230
1231
1232 PRIVATE INLINE char *
db2shapes_pt(const short * pt,unsigned int n,unsigned int level)1233 db2shapes_pt(const short *pt,
1234 unsigned int n,
1235 unsigned int level)
1236 {
1237 unsigned int start;
1238 struct shrep *ptr, *ptr2;
1239 char *SHAPE;
1240
1241 SHAPE = NULL;
1242 start = 1;
1243
1244 ptr = get_shrep(pt, start, n, level);
1245
1246 if (ptr) {
1247 SHAPE = (char *)vrna_alloc(sizeof(char) * (n + 1));
1248
1249 for (; ptr->pred; ptr = ptr->pred);
1250
1251 for (n = 0; ptr; n++) {
1252 SHAPE[n] = ptr->character;
1253 ptr2 = ptr;
1254 ptr = ptr->succ;
1255 free(ptr2);
1256 }
1257
1258 free(ptr);
1259
1260 SHAPE = vrna_realloc(SHAPE, sizeof(char) * (n + 1));
1261 SHAPE[n] = '\0';
1262 }
1263
1264 return SHAPE;
1265 }
1266
1267
1268 PRIVATE INLINE struct shrep *
get_shrep(const short * pt,unsigned int start,unsigned int end,unsigned int level)1269 get_shrep(const short *pt,
1270 unsigned int start,
1271 unsigned int end,
1272 unsigned int level)
1273 {
1274 struct shrep *shape_string, *substring, *sub5, *sub3;
1275 unsigned int components, i, k, l, ext, u5, u3, cnt;
1276 unsigned char no_stack, bulge;
1277
1278 shape_string = NULL;
1279
1280 /* find out if there is more than one component */
1281 for (i = start, components = 0; i <= end; i++) {
1282 if (i < pt[i]) {
1283 components++;
1284 i = pt[i];
1285 if (components > 1)
1286 break;
1287 }
1288 }
1289
1290 /* find out if we process the exterior loop */
1291 for (i = start - 1, ext = 1; i > 0; i--) {
1292 if (pt[i] > end) {
1293 ext = 0;
1294 break;
1295 }
1296 }
1297
1298 for (; start <= end; start++) {
1299 if (start < pt[start]) { /* we have base pair (start, pt[start]) */
1300 substring = sub5 = sub3 = NULL;
1301 k = start + 1;
1302 l = pt[start] - 1;
1303
1304 while (1) {
1305 u5 = 0;
1306 u3 = 0;
1307
1308 for(; pt[k] == 0; k++, u5++); /* skip unpaired 5' side */
1309 for(; pt[l] == 0; l--, u3++); /* skip unpaired 3' side */
1310
1311 if (k >= l) { /* hairpin loop */
1312 if (level == 0)
1313 for (cnt = 0; cnt < u5; cnt++)
1314 substring = shrep_insert_after(substring, SHAPES_UP);
1315
1316 break;
1317 } else {
1318 if(pt[k] != l){ /* multi branch loop or external loop */
1319 substring = get_shrep(pt, k, l, level);
1320 if (level == 1) {
1321 if (u5)
1322 sub5 = shrep_insert_after(sub5, SHAPES_UP);
1323
1324 if (u3)
1325 sub3 = shrep_insert_before(sub3, SHAPES_UP);
1326 } else if (level == 0) {
1327 for (cnt = 0; cnt < u5; cnt++)
1328 sub5 = shrep_insert_after(sub5, SHAPES_UP);
1329
1330 for (cnt = 0; cnt < u3; cnt++)
1331 sub3 = shrep_insert_before(sub3, SHAPES_UP);
1332 }
1333
1334 break;
1335 } else {
1336 /* interior loop with enclosed pair (k,l) */
1337 no_stack = (u5 + u3 > 0) ? 1 : 0;
1338 bulge = ((u5 > 0) && (u3 > 0)) ? 1 : 0;
1339
1340 switch (level) {
1341 case 4:
1342 if (bulge) {
1343 sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1344 sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1345 }
1346 break;
1347
1348 case 3:
1349 if (no_stack) {
1350 sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1351 sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1352 }
1353 break;
1354
1355 case 2: /* fall through */
1356 case 1:
1357 if (no_stack) {
1358 if (u5)
1359 sub5 = shrep_insert_after(sub5, SHAPES_UP);
1360
1361 if (u3)
1362 sub3 = shrep_insert_before(sub3, SHAPES_UP);
1363
1364 sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1365 sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1366 }
1367 break;
1368
1369 case 0:
1370 for (unsigned int cnt = 0; cnt < u5; cnt++)
1371 sub5 = shrep_insert_after(sub5, SHAPES_UP);
1372
1373 for (unsigned int cnt = 0; cnt < u3; cnt++)
1374 sub3 = shrep_insert_before(sub3, SHAPES_UP);
1375
1376 sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1377 sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1378 break;
1379
1380 default:
1381 break;
1382 }
1383
1384 k++;
1385 l--;
1386 }
1387 }
1388 }
1389
1390 sub5 = shrep_insert_before(sub5, SHAPES_OPEN);
1391 shrep_concat(&sub5, substring);
1392 shrep_concat(&sub5, sub3);
1393 sub5 = shrep_insert_after(sub5, SHAPES_CLOSE);
1394 shrep_concat(&shape_string, sub5);
1395
1396 start = pt[start];
1397 } else if ((pt[start] == 0) && (level < 3)) {
1398 if (level == 0) {
1399 shape_string = shrep_insert_after(shape_string, SHAPES_UP);
1400 } else if ((level == 1) ||
1401 ((components < 2) && (ext == 0))) {
1402 shape_string = shrep_insert_after(shape_string, SHAPES_UP);
1403 for(; (start <= end) && (pt[start] == 0); start++);
1404 start--;
1405 }
1406 }
1407 }
1408
1409 return shape_string;
1410 }
1411
1412
1413 PRIVATE INLINE void
flatten_brackets(char * string,const char pair[3],const char target[3])1414 flatten_brackets(char *string,
1415 const char pair[3],
1416 const char target[3])
1417 {
1418 unsigned int i;
1419
1420 for (i = 0; string[i] != '\0'; i++) {
1421 if (string[i] == pair[0])
1422 string[i] = target[0];
1423 else if (string[i] == pair[1])
1424 string[i] = target[1];
1425 }
1426 }
1427
1428
1429 /* requires that pt[0] already contains the length of the string! */
1430 PRIVATE INLINE int
extract_pairs(short * pt,const char * structure,const char * pair)1431 extract_pairs(short *pt,
1432 const char *structure,
1433 const char *pair)
1434 {
1435 const char *ptr;
1436 char open, close;
1437 short *stack;
1438 unsigned int i, j, n;
1439 int hx;
1440
1441 n = (unsigned int)pt[0];
1442 stack = (short *)vrna_alloc(sizeof(short) * (n + 1));
1443
1444 open = pair[0];
1445 close = pair[1];
1446
1447 for (hx = 0, i = 1, ptr = structure; (i <= n) && (*ptr != '\0'); ptr++, i++) {
1448 if (*ptr == open) {
1449 stack[hx++] = i;
1450 } else if (*ptr == close) {
1451 j = stack[--hx];
1452
1453 if (hx < 0) {
1454 vrna_message_warning("%s\nunbalanced brackets '%2s' found while extracting base pairs",
1455 structure,
1456 pair);
1457 free(stack);
1458 return 0;
1459 }
1460
1461 pt[i] = j;
1462 pt[j] = i;
1463 }
1464 }
1465
1466 free(stack);
1467
1468 if (hx != 0) {
1469 vrna_message_warning("%s\nunbalanced brackets '%2s' found while extracting base pairs",
1470 structure,
1471 pair);
1472 return 0;
1473 }
1474
1475 return 1; /* success */
1476 }
1477
1478
1479 PRIVATE vrna_ep_t *
wrap_get_plist(vrna_mx_pf_t * matrices,int length,int * index,short * S,vrna_exp_param_t * pf_params,double cut_off)1480 wrap_get_plist(vrna_mx_pf_t *matrices,
1481 int length,
1482 int *index,
1483 short *S,
1484 vrna_exp_param_t *pf_params,
1485 double cut_off)
1486 {
1487 int i, j, k, n, count, gquad;
1488 FLT_OR_DBL *probs, *G, *scale;
1489 vrna_ep_t *pl;
1490
1491 probs = matrices->probs;
1492 G = matrices->G;
1493 scale = matrices->scale;
1494 gquad = pf_params->model_details.gquad;
1495
1496 count = 0;
1497 n = 2;
1498
1499 /* first guess of the size needed for pl */
1500 pl = (vrna_ep_t *)vrna_alloc(n * length * sizeof(vrna_ep_t));
1501
1502 for (i = 1; i < length; i++) {
1503 for (j = i + 1; j <= length; j++) {
1504 /* skip all entries below the cutoff */
1505 if (probs[index[i] - j] < (FLT_OR_DBL)cut_off)
1506 continue;
1507
1508 /* do we need to allocate more memory? */
1509 if (count == n * length - 1) {
1510 n *= 2;
1511 pl = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1512 }
1513
1514 /* check for presence of gquadruplex */
1515 if (gquad && (S[i] == 3) && (S[j] == 3)) {
1516 /* add probability of a gquadruplex at position (i,j)
1517 * for dot_plot
1518 */
1519 (pl)[count].i = i;
1520 (pl)[count].j = j;
1521 (pl)[count].p = (float)probs[index[i] - j];
1522 (pl)[count++].type = VRNA_PLIST_TYPE_GQUAD;
1523 /* now add the probabilies of it's actual pairing patterns */
1524 vrna_ep_t *inner, *ptr;
1525 inner = get_plist_gquad_from_pr(S, i, j, G, probs, scale, pf_params);
1526 for (ptr = inner; ptr->i != 0; ptr++) {
1527 if (count == n * length - 1) {
1528 n *= 2;
1529 pl = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1530 }
1531
1532 /* check if we've already seen this pair */
1533 for (k = 0; k < count; k++)
1534 if (((pl)[k].i == ptr->i) && ((pl)[k].j == ptr->j))
1535 break;
1536
1537 (pl)[k].i = ptr->i;
1538 (pl)[k].j = ptr->j;
1539 (pl)[k].type = VRNA_PLIST_TYPE_GQUAD;
1540 if (k == count) {
1541 (pl)[k].p = ptr->p;
1542 count++;
1543 } else {
1544 (pl)[k].p += ptr->p;
1545 }
1546 }
1547 } else {
1548 (pl)[count].i = i;
1549 (pl)[count].j = j;
1550 (pl)[count].p = (float)probs[index[i] - j];
1551 (pl)[count++].type = VRNA_PLIST_TYPE_BASEPAIR;
1552 }
1553 }
1554 }
1555 /* mark the end of pl */
1556 (pl)[count].i = 0;
1557 (pl)[count].j = 0;
1558 (pl)[count].type = 0;
1559 (pl)[count++].p = 0.;
1560 /* shrink memory to actual size needed */
1561 pl = (vrna_ep_t *)vrna_realloc(pl, count * sizeof(vrna_ep_t));
1562
1563 return pl;
1564 }
1565
1566
1567 PRIVATE vrna_ep_t *
wrap_plist(vrna_fold_compound_t * vc,double cut_off)1568 wrap_plist(vrna_fold_compound_t *vc,
1569 double cut_off)
1570 {
1571 short *S;
1572 int i, j, k, n, m, count, gquad, length, *index;
1573 FLT_OR_DBL *probs;
1574 vrna_ep_t *pl;
1575 vrna_mx_pf_t *matrices;
1576 vrna_exp_param_t *pf_params;
1577
1578 S = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : vc->S_cons;
1579 index = vc->iindx;
1580 length = vc->length;
1581 pf_params = vc->exp_params;
1582 matrices = vc->exp_matrices;
1583 probs = matrices->probs;
1584 gquad = pf_params->model_details.gquad;
1585
1586 count = 0;
1587 n = 2;
1588
1589 /* first guess of the size needed for pl */
1590 pl = (vrna_ep_t *)vrna_alloc(n * length * sizeof(vrna_ep_t));
1591
1592 for (i = 1; i < length; i++) {
1593 for (j = i + 1; j <= length; j++) {
1594 /* skip all entries below the cutoff */
1595 if (probs[index[i] - j] < (FLT_OR_DBL)cut_off)
1596 continue;
1597
1598 /* do we need to allocate more memory? */
1599 if (count == n * length - 1) {
1600 n *= 2;
1601 pl = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1602 }
1603
1604 /* check for presence of gquadruplex */
1605 if (gquad && (S[i] == 3) && (S[j] == 3)) {
1606 /* add probability of a gquadruplex at position (i,j)
1607 * for dot_plot
1608 */
1609 (pl)[count].i = i;
1610 (pl)[count].j = j;
1611 (pl)[count].p = (float)probs[index[i] - j];
1612 (pl)[count++].type = VRNA_PLIST_TYPE_GQUAD;
1613
1614 /* now add the probabilies of it's actual pairing patterns */
1615 vrna_ep_t *inner, *ptr;
1616 inner = vrna_get_plist_gquad_from_pr(vc, i, j);
1617 for (ptr = inner; ptr->i != 0; ptr++) {
1618 if (count == n * length - 1) {
1619 n *= 2;
1620 pl = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1621 }
1622
1623 /* check if we've already seen this pair */
1624 for (k = 0; k < count; k++)
1625 if (((pl)[k].i == ptr->i) && ((pl)[k].j == ptr->j) &&
1626 ((pl)[k].type == VRNA_PLIST_TYPE_BASEPAIR))
1627 break;
1628
1629 (pl)[k].i = ptr->i;
1630 (pl)[k].j = ptr->j;
1631 (pl)[k].type = VRNA_PLIST_TYPE_BASEPAIR;
1632 if (k == count) {
1633 (pl)[k].p = ptr->p;
1634 count++;
1635 } else {
1636 (pl)[k].p += ptr->p;
1637 }
1638 }
1639 free(inner);
1640 } else {
1641 (pl)[count].i = i;
1642 (pl)[count].j = j;
1643 (pl)[count].p = (float)probs[index[i] - j];
1644 (pl)[count++].type = VRNA_PLIST_TYPE_BASEPAIR;
1645 }
1646 }
1647 }
1648
1649 /* check unstructured domains */
1650 if (vc->domains_up) {
1651 vrna_ud_t *domains_up;
1652 domains_up = vc->domains_up;
1653
1654 if (domains_up->probs_get) {
1655 for (i = 1; i <= length; i++)
1656 for (m = 0; m < domains_up->motif_count; m++) {
1657 FLT_OR_DBL pp;
1658 j = i + domains_up->motif_size[m] - 1;
1659 pp = 0.;
1660 pp += domains_up->probs_get(vc,
1661 i,
1662 j,
1663 VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP,
1664 m,
1665 domains_up->data);
1666 pp += domains_up->probs_get(vc,
1667 i,
1668 j,
1669 VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP,
1670 m,
1671 domains_up->data);
1672 pp += domains_up->probs_get(vc,
1673 i,
1674 j,
1675 VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP,
1676 m,
1677 domains_up->data);
1678 pp += domains_up->probs_get(vc,
1679 i,
1680 j,
1681 VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP,
1682 m,
1683 domains_up->data);
1684 if (pp >= (FLT_OR_DBL)cut_off) {
1685 /* do we need to allocate more memory? */
1686 if (count == n * length - 1) {
1687 n *= 2;
1688 pl = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1689 }
1690
1691 (pl)[count].i = i;
1692 (pl)[count].j = j;
1693 (pl)[count].p = (float)pp;
1694 (pl)[count++].type = VRNA_PLIST_TYPE_UD_MOTIF;
1695 }
1696 }
1697 }
1698 }
1699
1700 /* mark the end of pl */
1701 (pl)[count].i = 0;
1702 (pl)[count].j = 0;
1703 (pl)[count].type = 0;
1704 (pl)[count++].p = 0.;
1705 /* shrink memory to actual size needed */
1706 pl = (vrna_ep_t *)vrna_realloc(pl, count * sizeof(vrna_ep_t));
1707
1708 return pl;
1709 }
1710
1711
1712 PRIVATE void
assign_elements_pair(short * pt,int i,int j,char * elements)1713 assign_elements_pair(short *pt,
1714 int i,
1715 int j,
1716 char *elements)
1717 {
1718 int p, k, num_pairs;
1719
1720 num_pairs = 0;
1721 /* first, determine the number of pairs (i,j) is enclosing */
1722 for (k = i + 1; k < j; k++) {
1723 if (k < pt[k]) {
1724 num_pairs++;
1725 k = pt[k];
1726 }
1727 }
1728
1729 switch (num_pairs) {
1730 case 0: /* hairpin loop */
1731 elements[i - 1] = elements[j - 1] = 'H';
1732 for (k = i + 1; k < j; k++)
1733 elements[k - 1] = 'h';
1734 break;
1735
1736 case 1: /* interior loop */
1737 elements[i - 1] = elements[j - 1] = 'I';
1738 p = 0;
1739 for (k = i + 1; k < j; k++) {
1740 if (!pt[k]) {
1741 elements[k - 1] = 'i';
1742 } else {
1743 p = k;
1744 k = pt[k];
1745 }
1746 }
1747
1748 if (p)
1749 assign_elements_pair(pt, p, pt[p], elements);
1750
1751 break;
1752
1753 default: /* multibranch loop */
1754 elements[i - 1] = elements[j - 1] = 'M';
1755 for (k = i + 1; k < j; k++) {
1756 if (!pt[k]) {
1757 elements[k - 1] = 'm';
1758 } else {
1759 assign_elements_pair(pt, k, pt[k], elements);
1760 k = pt[k];
1761 }
1762 }
1763 break;
1764 }
1765 }
1766
1767
1768 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
1769
1770 /*###########################################*/
1771 /*# deprecated functions below #*/
1772 /*###########################################*/
1773
1774
1775 PUBLIC char *
pack_structure(const char * struc)1776 pack_structure(const char *struc)
1777 {
1778 return vrna_db_pack(struc);
1779 }
1780
1781
1782 PUBLIC char *
unpack_structure(const char * packed)1783 unpack_structure(const char *packed)
1784 {
1785 return vrna_db_unpack(packed);
1786 }
1787
1788
1789 PUBLIC void
parenthesis_structure(char * structure,vrna_bp_stack_t * bp,int length)1790 parenthesis_structure(char *structure,
1791 vrna_bp_stack_t *bp,
1792 int length)
1793 {
1794 char *s = vrna_db_from_bp_stack(bp, length);
1795
1796 strncpy(structure, s, length + 1);
1797 free(s);
1798 }
1799
1800
1801 PUBLIC void
letter_structure(char * structure,vrna_bp_stack_t * bp,int length)1802 letter_structure(char *structure,
1803 vrna_bp_stack_t *bp,
1804 int length)
1805 {
1806 vrna_letter_structure(structure, bp, length);
1807 }
1808
1809
1810 PUBLIC void
parenthesis_zuker(char * structure,vrna_bp_stack_t * bp,int length)1811 parenthesis_zuker(char *structure,
1812 vrna_bp_stack_t *bp,
1813 int length)
1814 {
1815 char *s = vrna_db_from_bp_stack(bp, length);
1816
1817 strncpy(structure, s, length + 1);
1818 free(s);
1819 }
1820
1821
1822 PUBLIC void
assign_plist_from_pr(vrna_ep_t ** pl,FLT_OR_DBL * probs,int length,double cut_off)1823 assign_plist_from_pr(vrna_ep_t **pl,
1824 FLT_OR_DBL *probs,
1825 int length,
1826 double cut_off)
1827 {
1828 int *index;
1829 vrna_mx_pf_t *matrices;
1830 vrna_md_t md;
1831 vrna_exp_param_t *pf_params;
1832
1833 index = vrna_idx_row_wise(length);
1834 matrices = (vrna_mx_pf_t *)vrna_alloc(sizeof(vrna_mx_pf_t));
1835
1836 set_model_details(&md);
1837 md.gquad = 0;
1838 pf_params = vrna_exp_params(&md);
1839 matrices->probs = probs;
1840
1841 *pl = wrap_get_plist(matrices,
1842 length,
1843 index,
1844 NULL,
1845 pf_params,
1846 cut_off);
1847
1848 free(index);
1849 free(pf_params);
1850 free(matrices);
1851 }
1852
1853
1854 PUBLIC void
assign_plist_from_db(vrna_ep_t ** pl,const char * struc,float pr)1855 assign_plist_from_db(vrna_ep_t **pl,
1856 const char *struc,
1857 float pr)
1858 {
1859 *pl = vrna_plist(struc, pr);
1860 }
1861
1862
1863 PUBLIC short *
make_pair_table(const char * structure)1864 make_pair_table(const char *structure)
1865 {
1866 return vrna_ptable(structure);
1867 }
1868
1869
1870 PUBLIC short *
copy_pair_table(const short * pt)1871 copy_pair_table(const short *pt)
1872 {
1873 return vrna_ptable_copy(pt);
1874 }
1875
1876
1877 PUBLIC short *
make_pair_table_pk(const char * structure)1878 make_pair_table_pk(const char *structure)
1879 {
1880 return vrna_pt_pk_get(structure);
1881 }
1882
1883
1884 PUBLIC short *
make_pair_table_snoop(const char * structure)1885 make_pair_table_snoop(const char *structure)
1886 {
1887 return vrna_pt_snoop_get(structure);
1888 }
1889
1890
1891 PUBLIC short *
alimake_pair_table(const char * structure)1892 alimake_pair_table(const char *structure)
1893 {
1894 return vrna_pt_ali_get(structure);
1895 }
1896
1897
1898 PUBLIC int *
make_loop_index_pt(short * pt)1899 make_loop_index_pt(short *pt)
1900 {
1901 return vrna_loopidx_from_ptable((const short *)pt);
1902 }
1903
1904
1905 PUBLIC int
bp_distance(const char * str1,const char * str2)1906 bp_distance(const char *str1,
1907 const char *str2)
1908 {
1909 return vrna_bp_distance(str1, str2);
1910 }
1911
1912
1913 PUBLIC unsigned int *
make_referenceBP_array(short * reference_pt,unsigned int turn)1914 make_referenceBP_array(short *reference_pt,
1915 unsigned int turn)
1916 {
1917 return vrna_refBPcnt_matrix((const short *)reference_pt, turn);
1918 }
1919
1920
1921 PUBLIC unsigned int *
compute_BPdifferences(short * pt1,short * pt2,unsigned int turn)1922 compute_BPdifferences(short *pt1,
1923 short *pt2,
1924 unsigned int turn)
1925 {
1926 return vrna_refBPdist_matrix((const short *)pt1, (const short *)pt2, turn);
1927 }
1928
1929
1930 PUBLIC char
bppm_symbol(const float * x)1931 bppm_symbol(const float *x)
1932 {
1933 return vrna_bpp_symbol(x);
1934 }
1935
1936
1937 PUBLIC void
bppm_to_structure(char * structure,FLT_OR_DBL * p,unsigned int length)1938 bppm_to_structure(char *structure,
1939 FLT_OR_DBL *p,
1940 unsigned int length)
1941 {
1942 char *s = vrna_db_from_probs((const FLT_OR_DBL *)p, length);
1943
1944 memcpy(structure, s, length);
1945 structure[length] = '\0';
1946 free(s);
1947 }
1948
1949
1950 #endif
1951