1 /*
2  *  ViennaRNA/utils/structures.c
3  *
4  *  Various functions to convert, parse, encode secondary structures
5  *
6  *  c  Ivo L Hofacker, Walter Fontana, Ronny Lorenz
7  *              Vienna RNA package
8  */
9 
10 #ifdef HAVE_CONFIG_H
11 #include "config.h"
12 #endif
13 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <math.h>
18 #include <limits.h>
19 
20 #include "ViennaRNA/fold_vars.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/params/basic.h"
23 #include "ViennaRNA/gquad.h"
24 #include "ViennaRNA/utils/structures.h"
25 #include "ViennaRNA/MEA.h"
26 
27 #ifdef __GNUC__
28 # define INLINE inline
29 #else
30 # define INLINE
31 #endif
32 
33 
34 #define SHAPES_OPEN   '['
35 #define SHAPES_CLOSE  ']'
36 #define SHAPES_UP     '_'
37 
38 struct shrep {
39   struct shrep  *pred;
40   struct shrep  *succ;
41   char          character;
42 };
43 
44 
45 /*
46  #################################
47  # PRIVATE FUNCTION DECLARATIONS #
48  #################################
49  */
50 PRIVATE vrna_ep_t *
51 wrap_get_plist(vrna_mx_pf_t     *matrices,
52                int              length,
53                int              *index,
54                short            *S,
55                vrna_exp_param_t *pf_params,
56                double           cut_off);
57 
58 
59 PRIVATE vrna_ep_t *
60 wrap_plist(vrna_fold_compound_t *vc,
61            double               cut_off);
62 
63 
64 PRIVATE void
65 assign_elements_pair(short  *pt,
66                      int    i,
67                      int    j,
68                      char   *elements);
69 
70 
71 PRIVATE INLINE void
72 flatten_brackets(char       *string,
73                  const char pair[3],
74                  const char target[3]);
75 
76 
77 PRIVATE INLINE int
78 extract_pairs(short       *pt,
79               const char  *structure,
80               const char  *pair);
81 
82 
83 PRIVATE INLINE struct shrep *
84 shrep_insert_after(struct shrep *target,
85                    char         character);
86 
87 
88 PRIVATE INLINE struct shrep *
89 shrep_insert_before(struct shrep  *target,
90                     char          character);
91 
92 
93 PRIVATE INLINE void
94 shrep_concat(struct shrep **target,
95              struct shrep *suffix);
96 
97 
98 PRIVATE INLINE char *
99 db2shapes(const char    *structure,
100           unsigned int  level);
101 
102 
103 PRIVATE INLINE char *
104 db2shapes_pt(const short  *pt,
105              unsigned int n,
106              unsigned int level);
107 
108 
109 PRIVATE INLINE struct shrep *
110 get_shrep(const short   *pt,
111           unsigned int  start,
112           unsigned int  end,
113           unsigned int  level);
114 
115 
116 /*
117  #################################
118  # BEGIN OF FUNCTION DEFINITIONS #
119  #################################
120  */
121 PUBLIC char *
vrna_db_pack(const char * struc)122 vrna_db_pack(const char *struc)
123 {
124   /* 5:1 compression using base 3 encoding */
125   int           i, j, l, pi;
126   unsigned char *packed;
127 
128   l       = (int)strlen(struc);
129   packed  = (unsigned char *)vrna_alloc(((l + 4) / 5 + 1) * sizeof(unsigned char));
130 
131   j = i = pi = 0;
132   while (i < l) {
133     register int p;
134     for (p = pi = 0; pi < 5; pi++) {
135       p *= 3;
136       switch (struc[i]) {
137         case '(':
138         case '\0':
139           break;
140         case ')':
141           p++;
142           break;
143         case '.':
144           p += 2;
145           break;
146         default:
147           vrna_message_warning("vrna_db_pack: "
148                                "illegal character %c at position %d in structure\n%s",
149                                struc[i],
150                                i + 1,
151                                struc);
152           return NULL;
153       }
154       if (i < l)
155         i++;
156     }
157     packed[j++] = (unsigned char)(p + 1); /* never use 0, so we can use
158                                            * strcmp()  etc. */
159   }
160   packed[j] = '\0';                       /* for str*() functions */
161   return (char *)packed;
162 }
163 
164 
165 PUBLIC char *
vrna_db_unpack(const char * packed)166 vrna_db_unpack(const char *packed)
167 {
168   /* 5:1 compression using base 3 encoding */
169   int                 i, j, l;
170   char                *struc;
171   unsigned const char *pp;
172   char                code[3] = {
173     '(', ')', '.'
174   };
175 
176   l     = (int)strlen(packed);
177   pp    = (const unsigned char *)packed;
178   struc = (char *)vrna_alloc((l * 5 + 1) * sizeof(char));   /* up to 4 byte extra */
179 
180   for (i = j = 0; i < l; i++) {
181     register int p, c, k;
182 
183     p = (int)pp[i] - 1;
184     for (k = 4; k >= 0; k--) {
185       c             = p % 3;
186       p             /= 3;
187       struc[j + k]  = code[c];
188     }
189     j += 5;
190   }
191   struc[j--] = '\0';
192   /* strip trailing ( */
193   while ((j >= 0) &&
194          (struc[j] == '('))
195     struc[j--] = '\0';
196 
197   return struc;
198 }
199 
200 
201 PUBLIC short *
vrna_ptable(const char * structure)202 vrna_ptable(const char *structure)
203 {
204   return vrna_ptable_from_string(structure, VRNA_BRACKETS_RND);
205 }
206 
207 
208 PUBLIC short *
vrna_pt_pk_get(const char * structure)209 vrna_pt_pk_get(const char *structure)
210 {
211   return vrna_ptable_from_string(structure, VRNA_BRACKETS_RND | VRNA_BRACKETS_SQR);
212 }
213 
214 
215 PUBLIC short *
vrna_pt_snoop_get(const char * structure)216 vrna_pt_snoop_get(const char *structure)
217 {
218   return vrna_ptable_from_string(structure, VRNA_BRACKETS_ANG);
219 }
220 
221 
222 PUBLIC short *
vrna_pt_ali_get(const char * structure)223 vrna_pt_ali_get(const char *structure)
224 {
225   return vrna_ptable_from_string(structure,
226                                  VRNA_BRACKETS_RND | VRNA_BRACKETS_ANG | VRNA_BRACKETS_SQR);
227 }
228 
229 
230 PUBLIC short *
vrna_ptable_copy(const short * pt)231 vrna_ptable_copy(const short *pt)
232 {
233   short *table = (short *)vrna_alloc(sizeof(short) * (pt[0] + 2));
234 
235   memcpy(table, pt, sizeof(short) * (pt[0] + 2));
236   return table;
237 }
238 
239 
240 PUBLIC int *
vrna_loopidx_from_ptable(const short * pt)241 vrna_loopidx_from_ptable(const short *pt)
242 {
243   /* number each position by which loop it belongs to (positions start
244    * at 1) */
245   int i, hx, l, nl;
246   int length;
247   int *stack  = NULL;
248   int *loop   = NULL;
249 
250   length  = pt[0];
251   stack   = (int *)vrna_alloc(sizeof(int) * (length + 1));
252   loop    = (int *)vrna_alloc(sizeof(int) * (length + 2));
253   hx      = l = nl = 0;
254 
255   for (i = 1; i <= length; i++) {
256     if ((pt[i] != 0) && (i < pt[i])) {
257       /* ( */
258       nl++;
259       l           = nl;
260       stack[hx++] = i;
261     }
262 
263     loop[i] = l;
264 
265     if ((pt[i] != 0) && (i > pt[i])) {
266       /* ) */
267       --hx;
268       if (hx > 0)
269         l = loop[stack[hx - 1]];  /* index of enclosing loop   */
270       else
271         l = 0;                    /* external loop has index 0 */
272 
273       if (hx < 0) {
274         vrna_message_warning("vrna_loopidx_from_ptable: "
275                              "unbalanced brackets in make_pair_table");
276         free(stack);
277         return NULL;
278       }
279     }
280   }
281   loop[0] = nl;
282   free(stack);
283   return loop;
284 }
285 
286 
287 PUBLIC short *
vrna_pt_pk_remove(const short * ptable,unsigned int options)288 vrna_pt_pk_remove(const short   *ptable,
289                   unsigned int  options)
290 {
291   short *pt = NULL;
292 
293   if (ptable) {
294     char          *mea_structure;
295     unsigned int  i, j, n;
296     vrna_ep_t     *pairs;
297 
298     n             = (unsigned int)ptable[0];
299     mea_structure = (char *)vrna_alloc(sizeof(char) * (n + 1));
300     pairs         = (vrna_ep_t *)vrna_alloc(sizeof(vrna_ep_t) * (n + 1));
301 
302     /* compose list of pairs to be used in MEA() function */
303     for (j = 0, i = 1; i <= n; i++)
304       if (ptable[i] > i) {
305         pairs[j].i    = i;
306         pairs[j].j    = ptable[i];
307         pairs[j].p    = 1.;
308         pairs[j].type = VRNA_PLIST_TYPE_BASEPAIR;
309         j++;
310       }
311 
312     pairs[j].i    = 0;
313     pairs[j].j    = 0;
314     pairs[j].p    = 0;
315     pairs[j].type = VRNA_PLIST_TYPE_BASEPAIR;
316 
317     /* use MEA() implementation to remove crossing base pairs */
318     memset(mea_structure, '.', n);
319 
320     (void)MEA(pairs, mea_structure, 2.0);
321 
322     /* convert dot-bracket structure to pair table */
323     pt = vrna_ptable(mea_structure);
324 
325     free(mea_structure);
326     free(pairs);
327   }
328 
329   return pt;
330 }
331 
332 
333 PUBLIC char *
vrna_db_pk_remove(const char * structure,unsigned int options)334 vrna_db_pk_remove(const char    *structure,
335                   unsigned int  options)
336 {
337   char  *s;
338   short *pt_pk, *pt;
339 
340   s = NULL;
341 
342   if (structure) {
343     pt_pk = vrna_ptable_from_string(structure, options & VRNA_BRACKETS_ANY);
344     pt    = vrna_pt_pk_remove(pt_pk, options);
345     s     = vrna_db_from_ptable(pt);
346 
347     free(pt_pk);
348     free(pt);
349   }
350 
351   return s;
352 }
353 
354 
355 PUBLIC char *
vrna_db_from_ptable(short * pt)356 vrna_db_from_ptable(short *pt)
357 {
358   unsigned int  n;
359   int           i;
360   char          *dotbracket = NULL;
361 
362   if (pt) {
363     n = (unsigned int)pt[0];
364     if (n > 0) {
365       dotbracket = (char *)vrna_alloc((n + 1) * sizeof(char));
366       memset(dotbracket, '.', n);
367 
368       for (i = 1; i <= n; i++) {
369         if (pt[i] > i) {
370           dotbracket[i - 1]     = '(';
371           dotbracket[pt[i] - 1] = ')';
372         }
373       }
374       dotbracket[i - 1] = '\0';
375     }
376   }
377 
378   return dotbracket;
379 }
380 
381 
382 PUBLIC void
vrna_db_flatten(char * string,unsigned int options)383 vrna_db_flatten(char          *string,
384                 unsigned int  options)
385 {
386   vrna_db_flatten_to(string, "()", options);
387 }
388 
389 
390 PUBLIC void
vrna_db_flatten_to(char * string,const char target[3],unsigned int options)391 vrna_db_flatten_to(char         *string,
392                    const char   target[3],
393                    unsigned int options)
394 {
395   if (string) {
396     if (options & VRNA_BRACKETS_RND)
397       flatten_brackets(string, "()", target);
398 
399     if (options & VRNA_BRACKETS_ANG)
400       flatten_brackets(string, "<>", target);
401 
402     if (options & VRNA_BRACKETS_CLY)
403       flatten_brackets(string, "{}", target);
404 
405     if (options & VRNA_BRACKETS_SQR)
406       flatten_brackets(string, "<>", target);
407 
408     if (options & VRNA_BRACKETS_ALPHA) {
409       char pairs[3];
410 
411       for (int i = 65; i < 91; i++) {
412         pairs[0]  = (char)i;
413         pairs[1]  = (char)(i + 32);
414         pairs[2]  = '\0';
415         flatten_brackets(string, pairs, target);
416       }
417     }
418   }
419 }
420 
421 
422 PUBLIC short *
vrna_ptable_from_string(const char * string,unsigned int options)423 vrna_ptable_from_string(const char    *string,
424                         unsigned int  options)
425 {
426   char          pairs[3];
427   short         *pt;
428   unsigned int  i, n;
429 
430   n = strlen(string);
431 
432   if (n > SHRT_MAX) {
433     vrna_message_warning("vrna_ptable_from_string: "
434                          "Structure too long to be converted to pair table (n=%d, max=%d)",
435                          n,
436                          SHRT_MAX);
437     return NULL;
438   }
439 
440   pt    = (short *)vrna_alloc(sizeof(short) * (n + 2));
441   pt[0] = (short)n;
442 
443 
444   if ((options & VRNA_BRACKETS_RND) &&
445       (!extract_pairs(pt, string, "()"))) {
446     free(pt);
447     return NULL;
448   }
449 
450   if ((options & VRNA_BRACKETS_ANG) &&
451       (!extract_pairs(pt, string, "<>"))) {
452     free(pt);
453     return NULL;
454   }
455 
456   if ((options & VRNA_BRACKETS_CLY) &&
457       (!extract_pairs(pt, string, "{}"))) {
458     free(pt);
459     return NULL;
460   }
461 
462   if ((options & VRNA_BRACKETS_SQR) &&
463       (!extract_pairs(pt, string, "[]"))) {
464     free(pt);
465     return NULL;
466   }
467 
468   if (options & VRNA_BRACKETS_ALPHA) {
469     for (i = 65; i < 91; i++) {
470       pairs[0]  = (char)i;
471       pairs[1]  = (char)(i + 32);
472       pairs[2]  = '\0';
473       if (!extract_pairs(pt, string, pairs)) {
474         free(pt);
475         return NULL;
476       }
477     }
478   }
479 
480   return pt;
481 }
482 
483 
484 PUBLIC char *
vrna_db_from_WUSS(const char * wuss)485 vrna_db_from_WUSS(const char *wuss)
486 {
487   char          *db, *tmp;
488   short         *pt;
489   int           pos, L, l[3], i, p, q;
490   unsigned int  n;
491 
492   db = NULL;
493 
494   if (wuss) {
495     n = strlen(wuss);
496 
497     /*
498      *  Note, in WUSS notation, matching pairs of (), <>, {}, [] are allowed but must not
499      *  cross! Thus, we can simply flatten all brackets to ().
500      */
501     tmp = (char *)vrna_alloc(sizeof(char) * (n + 1));
502     tmp = (char *)memcpy(tmp, wuss, n + 1);
503 
504     vrna_db_flatten(tmp, VRNA_BRACKETS_DEFAULT);
505 
506     /* now convert flattened structure string to pair-table (removes pseudo-knots) */
507     pt = vrna_ptable_from_string(tmp, VRNA_BRACKETS_RND);
508 
509     /* convert back to dot-bracket (replaces all special characters for unpaired positions) */
510     db = vrna_db_from_ptable(pt);
511 
512     /* check for G-Quadruplexes, annotated as G quartets */
513     q = 1;
514     while ((pos = parse_gquad(wuss + q - 1, &L, l)) > 0) {
515       q += pos - 1;
516       p = q - 4 * L - l[0] - l[1] - l[2] + 1;
517 
518       if (q > n)
519         break;
520 
521       /* re-insert G-Quadruplex */
522       for (i = 0; i < L; i++) {
523         db[p + i - 1]                                 = '+';
524         db[p + L + l[0] + i - 1]                      = '+';
525         db[p + (2 * L) + l[0] + l[1] + i - 1]         = '+';
526         db[p + (3 * L) + l[0] + l[1] + l[2] + i - 1]  = '+';
527       }
528       q++;
529     }
530 
531     free(pt);
532     free(tmp);
533   }
534 
535   return db;
536 }
537 
538 
539 /*---------------------------------------------------------------------------*/
540 
541 PUBLIC int
vrna_bp_distance(const char * str1,const char * str2)542 vrna_bp_distance(const char *str1,
543                  const char *str2)
544 {
545   /* dist = {number of base pairs in one structure but not in the other} */
546   /* same as edit distance with pair_open pair_close as move set */
547   int   dist;
548   short i, l;
549   short *t1, *t2;
550 
551   dist  = 0;
552   t1    = vrna_ptable(str1);
553   t2    = vrna_ptable(str2);
554 
555   if (t1 && t2) {
556     l = (t1[0] < t2[0]) ? t1[0] : t2[0]; /* minimum of the two lengths */
557 
558     for (i = 1; i <= l; i++)
559       if (t1[i] != t2[i]) {
560         if (t1[i] > i)
561           dist++;
562 
563         if (t2[i] > i)
564           dist++;
565       }
566   }
567 
568   free(t1);
569   free(t2);
570 
571   return dist;
572 }
573 
574 
575 PUBLIC double
vrna_dist_mountain(const char * str1,const char * str2,unsigned int p)576 vrna_dist_mountain(const char   *str1,
577                    const char   *str2,
578                    unsigned int p)
579 {
580   short         *pt1, *pt2;
581   unsigned int  i, n;
582   double        distance, w, *f1, *f2;
583 
584   distance  = -1.;
585   f1        = NULL;
586   f2        = NULL;
587 
588   if ((str1) && (str2)) {
589     n = strlen(str1);
590 
591     if (n != strlen(str2)) {
592       vrna_message_warning("vrna_dist_mountain: input structures have unequal lengths!");
593       return distance;
594     }
595 
596     pt1 = vrna_ptable(str1);
597     pt2 = vrna_ptable(str2);
598     f1  = (double *)vrna_alloc(sizeof(double) * (n + 1));
599     f2  = (double *)vrna_alloc(sizeof(double) * (n + 1));
600 
601     /* count (mountain)heights for positions 1 <= i <= n */
602     for (w = 0., i = 1; i <= n; i++) {
603       if (pt1[i] == 0)
604         continue;
605 
606       if (pt1[i] > i)
607         w += 1. / (double)(pt1[i] - i);
608       else
609         w -= 1. / (double)(i - pt1[i]);
610 
611       f1[i] = w;
612     }
613 
614     for (w = 0., i = 1; i <= n; i++) {
615       if (pt2[i] == 0)
616         continue;
617 
618       if (pt2[i] > i)
619         w += 1. / (double)(pt2[i] - i);
620       else
621         w -= 1. / (double)(i - pt2[i]);
622 
623       f2[i] = w;
624     }
625 
626 
627     /* finally, compute L_p-norm */
628     for (distance = 0., i = 1; i <= n; i++)
629       distance += pow(fabs(f1[i] - f2[i]), (double)p);
630 
631     distance = pow(distance, 1. / (double)p);
632 
633     free(pt1);
634     free(pt2);
635     free(f1);
636     free(f2);
637   }
638 
639   return distance;
640 }
641 
642 
643 /* get a matrix containing the number of basepairs of a reference structure for each interval [i,j] with i<j
644  *  access it via iindx!!!
645  */
646 PUBLIC unsigned int *
vrna_refBPcnt_matrix(const short * reference_pt,unsigned int turn)647 vrna_refBPcnt_matrix(const short  *reference_pt,
648                      unsigned int turn)
649 {
650   unsigned int  i, j, k, ij, length;
651   int           *iindx;
652   unsigned int  *array;
653   unsigned int  size;
654 
655   length  = (unsigned int)reference_pt[0];
656   size    = ((length + 1) * (length + 2)) / 2;
657   iindx   = vrna_idx_row_wise(length);
658   array   = (unsigned int *)vrna_alloc(sizeof(unsigned int) * size);
659   /* matrix containing number of basepairs of reference structure1 in interval [i,j] */;
660   for (k = 0; k <= turn; k++)
661     for (i = 1; i <= length - k; i++) {
662       j         = i + k;
663       ij        = iindx[i] - j;
664       array[ij] = 0;
665     }
666 
667   for (i = length - turn - 1; i >= 1; i--)
668     for (j = i + turn + 1; j <= length; j++) {
669       int bps;
670       ij  = iindx[i] - j;
671       bps = array[ij + 1];
672       if ((i <= (unsigned int)reference_pt[j]) && ((unsigned int)reference_pt[j] < j))
673         bps++;
674 
675       array[ij] = bps;
676     }
677   free(iindx);
678   return array;
679 }
680 
681 
682 PUBLIC unsigned int *
vrna_refBPdist_matrix(const short * pt1,const short * pt2,unsigned int turn)683 vrna_refBPdist_matrix(const short   *pt1,
684                       const short   *pt2,
685                       unsigned int  turn)
686 {
687   unsigned int  *array;
688   unsigned int  n, size, i, j, ij, d;
689 
690   n     = (unsigned int)pt1[0];
691   size  = ((n + 1) * (n + 2)) / 2;
692   array = (unsigned int *)vrna_alloc(sizeof(unsigned int) * size);
693   int           *iindx = vrna_idx_row_wise(n);
694   for (i = n - turn - 1; i >= 1; i--) {
695     d = 0;
696     for (j = i + turn + 1; j <= n; j++) {
697       ij  = iindx[i] - j;
698       d   = array[ij + 1];
699       if (pt1[j] != pt2[j]) {
700         if (i <= (unsigned int)pt1[j] && (unsigned int)pt1[j] < j)
701           /* we got an additional base pair in reference structure 1 */
702           d++;
703 
704         if (i <= (unsigned int)pt2[j] && (unsigned int)pt2[j] < j)
705           /* we got another base pair in reference structure 2 */
706           d++;
707       }
708 
709       array[ij] = d;
710     }
711   }
712   free(iindx);
713   return array;
714 }
715 
716 
717 PUBLIC char
vrna_bpp_symbol(const float * x)718 vrna_bpp_symbol(const float *x)
719 {
720   /*  if( ((x[1]-x[2])*(x[1]-x[2]))<0.1&&x[0]<=0.677) return '|'; */
721   if (x[0] > 0.667)
722     return '.';
723 
724   if (x[1] > 0.667)
725     return '(';
726 
727   if (x[2] > 0.667)
728     return ')';
729 
730   if ((x[1] + x[2]) > x[0]) {
731     if ((x[1] / (x[1] + x[2])) > 0.667)
732       return '{';
733 
734     if ((x[2] / (x[1] + x[2])) > 0.667)
735       return '}';
736     else
737       return '|';
738   }
739 
740   if (x[0] > (x[1] + x[2]))
741     return ',';
742 
743   return ':';
744 }
745 
746 
747 PUBLIC char *
vrna_db_from_probs(const FLT_OR_DBL * p,unsigned int length)748 vrna_db_from_probs(const FLT_OR_DBL *p,
749                    unsigned int     length)
750 {
751   int   i, j, *index;
752   float P[3];    /* P[][0] unpaired, P[][1] upstream p, P[][2] downstream p */
753   char  *s;
754 
755   index = vrna_idx_row_wise(length);
756   s     = (char *)vrna_alloc(sizeof(char) * (length + 1));
757 
758   for (j = 1; j <= length; j++) {
759     P[0]  = 1.0;
760     P[1]  = P[2] = 0.0;
761     for (i = 1; i < j; i++) {
762       P[2]  += (float)p[index[i] - j];  /* j is paired downstream */
763       P[0]  -= (float)p[index[i] - j];  /* j is unpaired */
764     }
765     for (i = j + 1; i <= length; i++) {
766       P[1]  += (float)p[index[j] - i];  /* j is paired upstream */
767       P[0]  -= (float)p[index[j] - i];  /* j is unpaired */
768     }
769     s[j - 1] = vrna_bpp_symbol(P);
770   }
771   s[length] = '\0';
772   free(index);
773 
774   return s;
775 }
776 
777 
778 PUBLIC void
vrna_letter_structure(char * structure,vrna_bp_stack_t * bp,unsigned int length)779 vrna_letter_structure(char            *structure,
780                       vrna_bp_stack_t *bp,
781                       unsigned int    length)
782 {
783   int   n, k, x, y;
784   char  alpha[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
785 
786   if (length > 0) {
787     memset(structure, '.', length);
788     structure[length] = '\0';
789 
790     for (n = 0, k = 1; k <= bp[0].i; k++) {
791       y = bp[k].j;
792       x = bp[k].i;
793       if ((x - 1 > 0) && (y + 1 <= length)) {
794         if (structure[x - 2] != ' ' && structure[y] == structure[x - 2]) {
795           structure[x - 1]  = structure[x - 2];
796           structure[y - 1]  = structure[x - 1];
797           continue;
798         }
799       }
800 
801       if ((structure[x] != ' ') && (structure[y - 2] == structure[x])) {
802         structure[x - 1]  = structure[x];
803         structure[y - 1]  = structure[x - 1];
804         continue;
805       }
806 
807       n++;
808       structure[x - 1]  = alpha[n - 1];
809       structure[y - 1]  = alpha[n - 1];
810     }
811   }
812 }
813 
814 
815 PUBLIC char *
vrna_db_from_bp_stack(vrna_bp_stack_t * bp,unsigned int length)816 vrna_db_from_bp_stack(vrna_bp_stack_t *bp,
817                       unsigned int    length)
818 {
819   int   k, i, j, temp;
820   char  *structure;
821 
822   structure = vrna_alloc(sizeof(char) * (length + 1));
823 
824   if (length > 0)
825     memset(structure, '.', length);
826 
827   structure[length] = '\0';
828 
829   for (k = 1; k <= bp[0].i; k++) {
830     i = bp[k].i;
831     j = bp[k].j;
832     if (i > length)
833       i -= length;
834 
835     if (j > length)
836       j -= length;
837 
838     if (i > j) {
839       temp  = i;
840       i     = j;
841       j     = temp;
842     }
843 
844     if (i == j) {
845       /* Gquad bonds are marked as bp[i].i == bp[i].j */
846       structure[i - 1] = '+';
847     } else {
848       /* the following ones are regular base pairs */
849       structure[i - 1]  = '(';
850       structure[j - 1]  = ')';
851     }
852   }
853   return structure;
854 }
855 
856 
857 PUBLIC vrna_ep_t *
vrna_plist(const char * struc,float pr)858 vrna_plist(const char *struc,
859            float      pr)
860 {
861   /* convert bracket string to plist */
862   short     *pt;
863   int       i, k = 0, size, n;
864   vrna_ep_t *gpl, *ptr, *pl;
865 
866   size  = strlen(struc);
867   n     = 2;
868 
869   pt  = vrna_ptable(struc);
870   pl  = (vrna_ep_t *)vrna_alloc(n * size * sizeof(vrna_ep_t));
871   for (i = 1; i < size; i++) {
872     if (pt[i] > i) {
873       (pl)[k].i       = i;
874       (pl)[k].j       = pt[i];
875       (pl)[k].p       = pr;
876       (pl)[k++].type  = VRNA_PLIST_TYPE_BASEPAIR;
877     }
878   }
879 
880   gpl = get_plist_gquad_from_db(struc, pr);
881   for (ptr = gpl; ptr->i != 0; ptr++) {
882     if (k == n * size - 1) {
883       n   *= 2;
884       pl  = (vrna_ep_t *)vrna_realloc(pl, n * size * sizeof(vrna_ep_t));
885     }
886 
887     (pl)[k].i       = ptr->i;
888     (pl)[k].j       = ptr->j;
889     (pl)[k].p       = ptr->p;
890     (pl)[k++].type  = ptr->type;
891   }
892   free(gpl);
893 
894   (pl)[k].i       = 0;
895   (pl)[k].j       = 0;
896   (pl)[k].p       = 0.;
897   (pl)[k++].type  = 0.;
898   free(pt);
899   pl = (vrna_ep_t *)vrna_realloc(pl, k * sizeof(vrna_ep_t));
900 
901   return pl;
902 }
903 
904 
905 PUBLIC vrna_ep_t *
vrna_plist_from_probs(vrna_fold_compound_t * vc,double cut_off)906 vrna_plist_from_probs(vrna_fold_compound_t  *vc,
907                       double                cut_off)
908 {
909   if (!vc)
910     vrna_message_warning("vrna_pl_get_from_pr: run vrna_pf_fold first!");
911   else if (!vc->exp_matrices->probs)
912     vrna_message_warning("vrna_pl_get_from_pr: probs==NULL!");
913   else
914     return wrap_plist(vc, cut_off);
915 
916   return NULL;
917 }
918 
919 
920 PUBLIC char *
vrna_db_from_plist(vrna_ep_t * pairs,unsigned int n)921 vrna_db_from_plist(vrna_ep_t    *pairs,
922                    unsigned int n)
923 {
924   vrna_ep_t *ptr;
925   char      *structure = NULL;
926 
927   if (n > 0) {
928     structure = (char *)vrna_alloc(sizeof(char) * (n + 1));
929     memset(structure, '.', n);
930     structure[n] = '\0';
931 
932     for (ptr = pairs; (*ptr).i; ptr++) {
933       if (((*ptr).i < n) && ((*ptr).j <= n)) {
934         structure[(*ptr).i - 1] = '(';
935         structure[(*ptr).j - 1] = ')';
936       }
937     }
938   }
939 
940   return structure;
941 }
942 
943 
944 PUBLIC int
vrna_plist_append(vrna_ep_t ** target,const vrna_ep_t * list)945 vrna_plist_append(vrna_ep_t       **target,
946                   const vrna_ep_t *list)
947 {
948   int             size1, size2;
949   const vrna_ep_t *ptr;
950 
951   if ((target) && (list)) {
952     size1 = size2 = 0;
953 
954     if (*target)
955       for (ptr = *target; ptr->i; size1++, ptr++);
956 
957     for (ptr = list; ptr->i; size2++, ptr++);
958 
959     *target = (vrna_ep_t *)vrna_realloc(*target, sizeof(vrna_ep_t) * (size1 + size2 + 1));
960 
961     if (*target) {
962       memcpy(*target + size1, list, sizeof(vrna_ep_t) * size2);
963       (*target)[size1 + size2].i = (*target)[size1 + size2].j = 0;
964       return 1;
965     }
966   }
967 
968   return 0;
969 }
970 
971 
972 PUBLIC vrna_hx_t *
vrna_hx_from_ptable(short * pt)973 vrna_hx_from_ptable(short *pt)
974 {
975   int       i, k, n, l, s, *stack;
976   vrna_hx_t *list;
977 
978   n     = pt[0];
979   l     = 0;
980   s     = 1;
981   list  = (vrna_hx_t *)vrna_alloc(sizeof(vrna_hx_t) * n / 2);
982   stack = (int *)vrna_alloc(sizeof(int) * n / 2);
983 
984   stack[s] = 1;
985 
986   do {
987     for (i = stack[s--]; i <= n; i++) {
988       if (pt[i] > (short)i) {
989         /* found a base pair */
990         k = i;
991         /* go through stack */
992         for (; pt[k + 1] == pt[k] - 1; k++);
993         list[l].start   = i;
994         list[l].end     = pt[i];
995         list[l].length  = k - i + 1;
996         list[l].up5     = list[l].up3 = 0;
997         l++;
998         stack[++s]  = pt[i] + 1;
999         stack[++s]  = k + 1;
1000         break;
1001       } else if (pt[i]) {
1002         /* end of region */
1003         break;
1004       }
1005     }
1006   } while (s > 0);
1007 
1008   list          = vrna_realloc(list, (l + 1) * sizeof(vrna_hx_t));
1009   list[l].start = list[l].end = list[l].length = list[l].up5 = list[l].up3 = 0;
1010 
1011   free(stack);
1012   return list;
1013 }
1014 
1015 
1016 PUBLIC vrna_hx_t *
vrna_hx_merge(const vrna_hx_t * list,int maxdist)1017 vrna_hx_merge(const vrna_hx_t *list,
1018               int             maxdist)
1019 {
1020   int       merged, i, j, s, neighbors, n;
1021   vrna_hx_t *merged_list;
1022 
1023   for (n = 0; list[n].length > 0; n++); /* check size of list */
1024 
1025   merged_list = (vrna_hx_t *)vrna_alloc(sizeof(vrna_hx_t) * (n + 1));
1026   memcpy(merged_list, list, sizeof(vrna_hx_t) * (n + 1));
1027 
1028   s = n + 1;
1029 
1030   do {
1031     merged = 0;
1032     for (i = 1; merged_list[i].length > 0; i++) {
1033       /*
1034        * GOAL: merge two consecutive helices i and i-1, if i-1
1035        * subsumes i, and not more than i
1036        */
1037 
1038       /* 1st, check for neighbors */
1039       neighbors = 0;
1040       for (j = i + 1; merged_list[j].length > 0; j++) {
1041         if (merged_list[j].start > merged_list[i - 1].end)
1042           break;
1043 
1044         if (merged_list[j].start < merged_list[i].end)
1045           continue;
1046 
1047         neighbors = 1;
1048       }
1049       if (neighbors)
1050         continue;
1051 
1052       /* check if we may merge i with i-1 */
1053       if (merged_list[i].end < merged_list[i - 1].end) {
1054         merged_list[i - 1].up5 += merged_list[i].start
1055                                   - merged_list[i - 1].start
1056                                   - merged_list[i - 1].length
1057                                   - merged_list[i - 1].up5
1058                                   + merged_list[i].up5;
1059         merged_list[i - 1].up3 += merged_list[i - 1].end
1060                                   - merged_list[i - 1].length
1061                                   - merged_list[i - 1].up3
1062                                   - merged_list[i].end
1063                                   + merged_list[i].up3;
1064         merged_list[i - 1].length += merged_list[i].length;
1065         /* splice out helix i */
1066         memmove(merged_list + i, merged_list + i + 1, sizeof(vrna_hx_t) * (n - i));
1067         s--;
1068         merged = 1;
1069         break;
1070       }
1071     }
1072   } while (merged);
1073 
1074   merged_list = vrna_realloc(merged_list, sizeof(vrna_hx_t) * s);
1075 
1076   return merged_list;
1077 }
1078 
1079 
1080 PUBLIC char *
vrna_db_to_element_string(const char * structure)1081 vrna_db_to_element_string(const char *structure)
1082 {
1083   char  *elements;
1084   int   n, i;
1085   short *pt;
1086 
1087   elements = NULL;
1088 
1089   if (structure) {
1090     n         = (int)strlen(structure);
1091     pt        = vrna_ptable(structure);
1092     elements  = (char *)vrna_alloc(sizeof(char) * (n + 1));
1093 
1094     for (i = 1; i <= n; i++) {
1095       if (!pt[i]) {
1096         /* mark nucleotides in exterior loop */
1097         elements[i - 1] = 'e';
1098       } else {
1099         assign_elements_pair(pt, i, pt[i], elements);
1100         i = pt[i];
1101       }
1102     }
1103 
1104     elements[n] = '\0';
1105     free(pt);
1106   }
1107 
1108   return elements;
1109 }
1110 
1111 
1112 PUBLIC char *
vrna_abstract_shapes(const char * structure,unsigned int level)1113 vrna_abstract_shapes(const char   *structure,
1114                      unsigned int level)
1115 {
1116   if (structure) {
1117     if (level > 5)
1118       level = 5;
1119 
1120     return db2shapes(structure, level);
1121   }
1122 
1123   return NULL;
1124 }
1125 
1126 
1127 PUBLIC char *
vrna_abstract_shapes_pt(const short * pt,unsigned int level)1128 vrna_abstract_shapes_pt(const short  *pt,
1129                         unsigned int level)
1130 {
1131   if (pt) {
1132     if (level > 5)
1133       level = 5;
1134 
1135     return db2shapes_pt(pt, (unsigned int)pt[0], level);
1136   }
1137 
1138   return NULL;
1139 }
1140 
1141 
1142 /*
1143  #####################################
1144  # BEGIN OF STATIC HELPER FUNCTIONS  #
1145  #####################################
1146  */
1147 PRIVATE INLINE struct shrep *
shrep_insert_after(struct shrep * target,char character)1148 shrep_insert_after(struct shrep *target,
1149                    char         character)
1150 {
1151   struct shrep *entry, *ptr;
1152 
1153   entry             = (struct shrep *)vrna_alloc(sizeof(struct shrep));
1154   entry->character  = character;
1155   entry->pred       = NULL;
1156   entry->succ       = NULL;
1157 
1158   if (target) {
1159     for (ptr = target; ptr->succ; ptr = ptr->succ);
1160     entry->pred = ptr;
1161     ptr->succ   = entry;
1162   }
1163 
1164   return entry;
1165 }
1166 
1167 
1168 PRIVATE INLINE struct shrep *
shrep_insert_before(struct shrep * target,char character)1169 shrep_insert_before(struct shrep  *target,
1170                     char          character)
1171 {
1172   struct shrep *entry, *ptr;
1173 
1174   entry             = (struct shrep *)vrna_alloc(sizeof(struct shrep));
1175   entry->character  = character;
1176   entry->pred       = NULL;
1177   entry->succ       = NULL;
1178 
1179   if (target) {
1180     for (ptr = target; ptr->pred; ptr = ptr->pred);
1181     entry->succ = ptr;
1182     ptr->pred   = entry;
1183   }
1184 
1185   return entry;
1186 }
1187 
1188 
1189 PRIVATE INLINE void
shrep_concat(struct shrep ** target,struct shrep * suffix)1190 shrep_concat(struct shrep **target,
1191              struct shrep *suffix)
1192 {
1193   struct shrep *ptr_s, *ptr_p;
1194 
1195   ptr_p = *target;
1196   ptr_s = suffix;
1197 
1198   if (ptr_p)
1199     for (; ptr_p->succ; ptr_p = ptr_p->succ);
1200 
1201   if (ptr_s)
1202     for (; ptr_s->pred; ptr_s = ptr_s->pred);
1203 
1204   if (!ptr_p)
1205     *target = ptr_s;
1206   else if (ptr_s) {
1207     ptr_p->succ = ptr_s;
1208     ptr_s->pred = ptr_p;
1209   }
1210 }
1211 
1212 
1213 PRIVATE INLINE char *
db2shapes(const char * structure,unsigned int level)1214 db2shapes(const char    *structure,
1215           unsigned int  level)
1216 {
1217   unsigned int  n;
1218   char          *SHAPE;
1219   short         *pt;
1220 
1221   n     = strlen(structure);
1222   pt    = vrna_ptable(structure);
1223 
1224   SHAPE = db2shapes_pt(pt, n, level);
1225 
1226   free(pt);
1227 
1228   return SHAPE;
1229 }
1230 
1231 
1232 PRIVATE INLINE char *
db2shapes_pt(const short * pt,unsigned int n,unsigned int level)1233 db2shapes_pt(const short  *pt,
1234              unsigned int n,
1235              unsigned int level)
1236 {
1237   unsigned int  start;
1238   struct shrep  *ptr, *ptr2;
1239   char          *SHAPE;
1240 
1241   SHAPE = NULL;
1242   start = 1;
1243 
1244   ptr = get_shrep(pt, start, n, level);
1245 
1246   if (ptr) {
1247     SHAPE = (char *)vrna_alloc(sizeof(char) * (n + 1));
1248 
1249     for (; ptr->pred; ptr = ptr->pred);
1250 
1251     for (n = 0; ptr; n++) {
1252       SHAPE[n] = ptr->character;
1253       ptr2 = ptr;
1254       ptr = ptr->succ;
1255       free(ptr2);
1256     }
1257 
1258     free(ptr);
1259 
1260     SHAPE = vrna_realloc(SHAPE, sizeof(char) * (n + 1));
1261     SHAPE[n] = '\0';
1262   }
1263 
1264   return SHAPE;
1265 }
1266 
1267 
1268 PRIVATE INLINE struct shrep *
get_shrep(const short * pt,unsigned int start,unsigned int end,unsigned int level)1269 get_shrep(const short   *pt,
1270           unsigned int  start,
1271           unsigned int  end,
1272           unsigned int  level)
1273 {
1274   struct shrep  *shape_string, *substring, *sub5, *sub3;
1275   unsigned int  components, i, k, l, ext, u5, u3, cnt;
1276   unsigned char no_stack, bulge;
1277 
1278   shape_string  = NULL;
1279 
1280   /* find out if there is more than one component */
1281   for (i = start, components = 0; i <= end; i++) {
1282     if (i < pt[i]) {
1283       components++;
1284       i = pt[i];
1285       if (components > 1)
1286         break;
1287     }
1288   }
1289 
1290   /* find out if we process the exterior loop */
1291   for (i = start - 1, ext = 1; i > 0; i--) {
1292     if (pt[i] > end) {
1293       ext = 0;
1294       break;
1295     }
1296   }
1297 
1298   for (; start <= end; start++) {
1299     if (start < pt[start]) { /* we have base pair (start, pt[start]) */
1300       substring = sub5 = sub3 = NULL;
1301       k         = start + 1;
1302       l         = pt[start] - 1;
1303 
1304       while (1) {
1305         u5 = 0;
1306         u3 = 0;
1307 
1308         for(; pt[k] == 0; k++, u5++); /* skip unpaired 5' side */
1309         for(; pt[l] == 0; l--, u3++); /* skip unpaired 3' side */
1310 
1311         if (k >= l) { /* hairpin loop */
1312           if (level == 0)
1313             for (cnt = 0; cnt < u5; cnt++)
1314               substring = shrep_insert_after(substring, SHAPES_UP);
1315 
1316           break;
1317         } else {
1318           if(pt[k] != l){ /* multi branch loop or external loop */
1319             substring = get_shrep(pt, k, l, level);
1320             if (level == 1) {
1321               if (u5)
1322                 sub5 = shrep_insert_after(sub5, SHAPES_UP);
1323 
1324               if (u3)
1325                 sub3 = shrep_insert_before(sub3, SHAPES_UP);
1326             } else if (level == 0) {
1327               for (cnt = 0; cnt < u5; cnt++)
1328                 sub5 = shrep_insert_after(sub5, SHAPES_UP);
1329 
1330               for (cnt = 0; cnt < u3; cnt++)
1331                 sub3 = shrep_insert_before(sub3, SHAPES_UP);
1332             }
1333 
1334             break;
1335           } else {
1336             /* interior loop with enclosed pair (k,l) */
1337             no_stack  = (u5 + u3 > 0) ? 1 : 0;
1338             bulge     = ((u5 > 0) && (u3 > 0)) ? 1 : 0;
1339 
1340             switch (level) {
1341               case 4:
1342                 if (bulge) {
1343                   sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1344                   sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1345                 }
1346                 break;
1347 
1348               case 3:
1349                 if (no_stack) {
1350                   sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1351                   sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1352                 }
1353                 break;
1354 
1355               case 2: /* fall through */
1356               case 1:
1357                 if (no_stack) {
1358                   if (u5)
1359                     sub5 = shrep_insert_after(sub5, SHAPES_UP);
1360 
1361                   if (u3)
1362                     sub3 = shrep_insert_before(sub3, SHAPES_UP);
1363 
1364                   sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1365                   sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1366                 }
1367                 break;
1368 
1369               case 0:
1370                 for (unsigned int cnt = 0; cnt < u5; cnt++)
1371                   sub5 = shrep_insert_after(sub5, SHAPES_UP);
1372 
1373                 for (unsigned int cnt = 0; cnt < u3; cnt++)
1374                   sub3 = shrep_insert_before(sub3, SHAPES_UP);
1375 
1376                 sub5 = shrep_insert_after(sub5, SHAPES_OPEN);
1377                 sub3 = shrep_insert_before(sub3, SHAPES_CLOSE);
1378                 break;
1379 
1380               default:
1381                 break;
1382             }
1383 
1384             k++;
1385             l--;
1386           }
1387         }
1388       }
1389 
1390       sub5 = shrep_insert_before(sub5, SHAPES_OPEN);
1391       shrep_concat(&sub5, substring);
1392       shrep_concat(&sub5, sub3);
1393       sub5 = shrep_insert_after(sub5, SHAPES_CLOSE);
1394       shrep_concat(&shape_string, sub5);
1395 
1396       start = pt[start];
1397     } else if ((pt[start] == 0) && (level < 3)) {
1398       if (level == 0) {
1399         shape_string = shrep_insert_after(shape_string, SHAPES_UP);
1400       } else if ((level == 1) ||
1401                  ((components < 2) && (ext == 0))) {
1402         shape_string = shrep_insert_after(shape_string, SHAPES_UP);
1403         for(; (start <= end) && (pt[start] == 0); start++);
1404         start--;
1405       }
1406     }
1407   }
1408 
1409   return shape_string;
1410 }
1411 
1412 
1413 PRIVATE INLINE void
flatten_brackets(char * string,const char pair[3],const char target[3])1414 flatten_brackets(char       *string,
1415                  const char pair[3],
1416                  const char target[3])
1417 {
1418   unsigned int i;
1419 
1420   for (i = 0; string[i] != '\0'; i++) {
1421     if (string[i] == pair[0])
1422       string[i] = target[0];
1423     else if (string[i] == pair[1])
1424       string[i] = target[1];
1425   }
1426 }
1427 
1428 
1429 /* requires that pt[0] already contains the length of the string! */
1430 PRIVATE INLINE int
extract_pairs(short * pt,const char * structure,const char * pair)1431 extract_pairs(short       *pt,
1432               const char  *structure,
1433               const char  *pair)
1434 {
1435   const char    *ptr;
1436   char          open, close;
1437   short         *stack;
1438   unsigned int  i, j, n;
1439   int           hx;
1440 
1441   n     = (unsigned int)pt[0];
1442   stack = (short *)vrna_alloc(sizeof(short) * (n + 1));
1443 
1444   open  = pair[0];
1445   close = pair[1];
1446 
1447   for (hx = 0, i = 1, ptr = structure; (i <= n) && (*ptr != '\0'); ptr++, i++) {
1448     if (*ptr == open) {
1449       stack[hx++] = i;
1450     } else if (*ptr == close) {
1451       j = stack[--hx];
1452 
1453       if (hx < 0) {
1454         vrna_message_warning("%s\nunbalanced brackets '%2s' found while extracting base pairs",
1455                              structure,
1456                              pair);
1457         free(stack);
1458         return 0;
1459       }
1460 
1461       pt[i] = j;
1462       pt[j] = i;
1463     }
1464   }
1465 
1466   free(stack);
1467 
1468   if (hx != 0) {
1469     vrna_message_warning("%s\nunbalanced brackets '%2s' found while extracting base pairs",
1470                          structure,
1471                          pair);
1472     return 0;
1473   }
1474 
1475   return 1; /* success */
1476 }
1477 
1478 
1479 PRIVATE vrna_ep_t *
wrap_get_plist(vrna_mx_pf_t * matrices,int length,int * index,short * S,vrna_exp_param_t * pf_params,double cut_off)1480 wrap_get_plist(vrna_mx_pf_t     *matrices,
1481                int              length,
1482                int              *index,
1483                short            *S,
1484                vrna_exp_param_t *pf_params,
1485                double           cut_off)
1486 {
1487   int         i, j, k, n, count, gquad;
1488   FLT_OR_DBL  *probs, *G, *scale;
1489   vrna_ep_t   *pl;
1490 
1491   probs = matrices->probs;
1492   G     = matrices->G;
1493   scale = matrices->scale;
1494   gquad = pf_params->model_details.gquad;
1495 
1496   count = 0;
1497   n     = 2;
1498 
1499   /* first guess of the size needed for pl */
1500   pl = (vrna_ep_t *)vrna_alloc(n * length * sizeof(vrna_ep_t));
1501 
1502   for (i = 1; i < length; i++) {
1503     for (j = i + 1; j <= length; j++) {
1504       /* skip all entries below the cutoff */
1505       if (probs[index[i] - j] < (FLT_OR_DBL)cut_off)
1506         continue;
1507 
1508       /* do we need to allocate more memory? */
1509       if (count == n * length - 1) {
1510         n   *= 2;
1511         pl  = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1512       }
1513 
1514       /* check for presence of gquadruplex */
1515       if (gquad && (S[i] == 3) && (S[j] == 3)) {
1516         /* add probability of a gquadruplex at position (i,j)
1517          * for dot_plot
1518          */
1519         (pl)[count].i       = i;
1520         (pl)[count].j       = j;
1521         (pl)[count].p       = (float)probs[index[i] - j];
1522         (pl)[count++].type  = VRNA_PLIST_TYPE_GQUAD;
1523         /* now add the probabilies of it's actual pairing patterns */
1524         vrna_ep_t *inner, *ptr;
1525         inner = get_plist_gquad_from_pr(S, i, j, G, probs, scale, pf_params);
1526         for (ptr = inner; ptr->i != 0; ptr++) {
1527           if (count == n * length - 1) {
1528             n   *= 2;
1529             pl  = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1530           }
1531 
1532           /* check if we've already seen this pair */
1533           for (k = 0; k < count; k++)
1534             if (((pl)[k].i == ptr->i) && ((pl)[k].j == ptr->j))
1535               break;
1536 
1537           (pl)[k].i     = ptr->i;
1538           (pl)[k].j     = ptr->j;
1539           (pl)[k].type  = VRNA_PLIST_TYPE_GQUAD;
1540           if (k == count) {
1541             (pl)[k].p = ptr->p;
1542             count++;
1543           } else {
1544             (pl)[k].p += ptr->p;
1545           }
1546         }
1547       } else {
1548         (pl)[count].i       = i;
1549         (pl)[count].j       = j;
1550         (pl)[count].p       = (float)probs[index[i] - j];
1551         (pl)[count++].type  = VRNA_PLIST_TYPE_BASEPAIR;
1552       }
1553     }
1554   }
1555   /* mark the end of pl */
1556   (pl)[count].i     = 0;
1557   (pl)[count].j     = 0;
1558   (pl)[count].type  = 0;
1559   (pl)[count++].p   = 0.;
1560   /* shrink memory to actual size needed */
1561   pl = (vrna_ep_t *)vrna_realloc(pl, count * sizeof(vrna_ep_t));
1562 
1563   return pl;
1564 }
1565 
1566 
1567 PRIVATE vrna_ep_t *
wrap_plist(vrna_fold_compound_t * vc,double cut_off)1568 wrap_plist(vrna_fold_compound_t *vc,
1569            double               cut_off)
1570 {
1571   short             *S;
1572   int               i, j, k, n, m, count, gquad, length, *index;
1573   FLT_OR_DBL        *probs;
1574   vrna_ep_t         *pl;
1575   vrna_mx_pf_t      *matrices;
1576   vrna_exp_param_t  *pf_params;
1577 
1578   S         = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : vc->S_cons;
1579   index     = vc->iindx;
1580   length    = vc->length;
1581   pf_params = vc->exp_params;
1582   matrices  = vc->exp_matrices;
1583   probs     = matrices->probs;
1584   gquad     = pf_params->model_details.gquad;
1585 
1586   count = 0;
1587   n     = 2;
1588 
1589   /* first guess of the size needed for pl */
1590   pl = (vrna_ep_t *)vrna_alloc(n * length * sizeof(vrna_ep_t));
1591 
1592   for (i = 1; i < length; i++) {
1593     for (j = i + 1; j <= length; j++) {
1594       /* skip all entries below the cutoff */
1595       if (probs[index[i] - j] < (FLT_OR_DBL)cut_off)
1596         continue;
1597 
1598       /* do we need to allocate more memory? */
1599       if (count == n * length - 1) {
1600         n   *= 2;
1601         pl  = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1602       }
1603 
1604       /* check for presence of gquadruplex */
1605       if (gquad && (S[i] == 3) && (S[j] == 3)) {
1606         /* add probability of a gquadruplex at position (i,j)
1607          * for dot_plot
1608          */
1609         (pl)[count].i       = i;
1610         (pl)[count].j       = j;
1611         (pl)[count].p       = (float)probs[index[i] - j];
1612         (pl)[count++].type  = VRNA_PLIST_TYPE_GQUAD;
1613 
1614         /* now add the probabilies of it's actual pairing patterns */
1615         vrna_ep_t *inner, *ptr;
1616         inner = vrna_get_plist_gquad_from_pr(vc, i, j);
1617         for (ptr = inner; ptr->i != 0; ptr++) {
1618           if (count == n * length - 1) {
1619             n   *= 2;
1620             pl  = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1621           }
1622 
1623           /* check if we've already seen this pair */
1624           for (k = 0; k < count; k++)
1625             if (((pl)[k].i == ptr->i) && ((pl)[k].j == ptr->j) &&
1626                 ((pl)[k].type == VRNA_PLIST_TYPE_BASEPAIR))
1627               break;
1628 
1629           (pl)[k].i     = ptr->i;
1630           (pl)[k].j     = ptr->j;
1631           (pl)[k].type  = VRNA_PLIST_TYPE_BASEPAIR;
1632           if (k == count) {
1633             (pl)[k].p = ptr->p;
1634             count++;
1635           } else {
1636             (pl)[k].p += ptr->p;
1637           }
1638         }
1639         free(inner);
1640       } else {
1641         (pl)[count].i       = i;
1642         (pl)[count].j       = j;
1643         (pl)[count].p       = (float)probs[index[i] - j];
1644         (pl)[count++].type  = VRNA_PLIST_TYPE_BASEPAIR;
1645       }
1646     }
1647   }
1648 
1649   /* check unstructured domains */
1650   if (vc->domains_up) {
1651     vrna_ud_t *domains_up;
1652     domains_up = vc->domains_up;
1653 
1654     if (domains_up->probs_get) {
1655       for (i = 1; i <= length; i++)
1656         for (m = 0; m < domains_up->motif_count; m++) {
1657           FLT_OR_DBL pp;
1658           j   = i + domains_up->motif_size[m] - 1;
1659           pp  = 0.;
1660           pp  += domains_up->probs_get(vc,
1661                                        i,
1662                                        j,
1663                                        VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP,
1664                                        m,
1665                                        domains_up->data);
1666           pp += domains_up->probs_get(vc,
1667                                       i,
1668                                       j,
1669                                       VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP,
1670                                       m,
1671                                       domains_up->data);
1672           pp += domains_up->probs_get(vc,
1673                                       i,
1674                                       j,
1675                                       VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP,
1676                                       m,
1677                                       domains_up->data);
1678           pp += domains_up->probs_get(vc,
1679                                       i,
1680                                       j,
1681                                       VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP,
1682                                       m,
1683                                       domains_up->data);
1684           if (pp >= (FLT_OR_DBL)cut_off) {
1685             /* do we need to allocate more memory? */
1686             if (count == n * length - 1) {
1687               n   *= 2;
1688               pl  = (vrna_ep_t *)vrna_realloc(pl, n * length * sizeof(vrna_ep_t));
1689             }
1690 
1691             (pl)[count].i       = i;
1692             (pl)[count].j       = j;
1693             (pl)[count].p       = (float)pp;
1694             (pl)[count++].type  = VRNA_PLIST_TYPE_UD_MOTIF;
1695           }
1696         }
1697     }
1698   }
1699 
1700   /* mark the end of pl */
1701   (pl)[count].i     = 0;
1702   (pl)[count].j     = 0;
1703   (pl)[count].type  = 0;
1704   (pl)[count++].p   = 0.;
1705   /* shrink memory to actual size needed */
1706   pl = (vrna_ep_t *)vrna_realloc(pl, count * sizeof(vrna_ep_t));
1707 
1708   return pl;
1709 }
1710 
1711 
1712 PRIVATE void
assign_elements_pair(short * pt,int i,int j,char * elements)1713 assign_elements_pair(short  *pt,
1714                      int    i,
1715                      int    j,
1716                      char   *elements)
1717 {
1718   int p, k, num_pairs;
1719 
1720   num_pairs = 0;
1721   /* first, determine the number of pairs (i,j) is enclosing */
1722   for (k = i + 1; k < j; k++) {
1723     if (k < pt[k]) {
1724       num_pairs++;
1725       k = pt[k];
1726     }
1727   }
1728 
1729   switch (num_pairs) {
1730     case 0:   /* hairpin loop */
1731       elements[i - 1] = elements[j - 1] = 'H';
1732       for (k = i + 1; k < j; k++)
1733         elements[k - 1] = 'h';
1734       break;
1735 
1736     case 1:   /* interior loop */
1737       elements[i - 1] = elements[j - 1] = 'I';
1738       p               = 0;
1739       for (k = i + 1; k < j; k++) {
1740         if (!pt[k]) {
1741           elements[k - 1] = 'i';
1742         } else {
1743           p = k;
1744           k = pt[k];
1745         }
1746       }
1747 
1748       if (p)
1749         assign_elements_pair(pt, p, pt[p], elements);
1750 
1751       break;
1752 
1753     default:  /* multibranch loop */
1754       elements[i - 1] = elements[j - 1] = 'M';
1755       for (k = i + 1; k < j; k++) {
1756         if (!pt[k]) {
1757           elements[k - 1] = 'm';
1758         } else {
1759           assign_elements_pair(pt, k, pt[k], elements);
1760           k = pt[k];
1761         }
1762       }
1763       break;
1764   }
1765 }
1766 
1767 
1768 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
1769 
1770 /*###########################################*/
1771 /*# deprecated functions below              #*/
1772 /*###########################################*/
1773 
1774 
1775 PUBLIC char *
pack_structure(const char * struc)1776 pack_structure(const char *struc)
1777 {
1778   return vrna_db_pack(struc);
1779 }
1780 
1781 
1782 PUBLIC char *
unpack_structure(const char * packed)1783 unpack_structure(const char *packed)
1784 {
1785   return vrna_db_unpack(packed);
1786 }
1787 
1788 
1789 PUBLIC void
parenthesis_structure(char * structure,vrna_bp_stack_t * bp,int length)1790 parenthesis_structure(char            *structure,
1791                       vrna_bp_stack_t *bp,
1792                       int             length)
1793 {
1794   char *s = vrna_db_from_bp_stack(bp, length);
1795 
1796   strncpy(structure, s, length + 1);
1797   free(s);
1798 }
1799 
1800 
1801 PUBLIC void
letter_structure(char * structure,vrna_bp_stack_t * bp,int length)1802 letter_structure(char             *structure,
1803                  vrna_bp_stack_t  *bp,
1804                  int              length)
1805 {
1806   vrna_letter_structure(structure, bp, length);
1807 }
1808 
1809 
1810 PUBLIC void
parenthesis_zuker(char * structure,vrna_bp_stack_t * bp,int length)1811 parenthesis_zuker(char            *structure,
1812                   vrna_bp_stack_t *bp,
1813                   int             length)
1814 {
1815   char *s = vrna_db_from_bp_stack(bp, length);
1816 
1817   strncpy(structure, s, length + 1);
1818   free(s);
1819 }
1820 
1821 
1822 PUBLIC void
assign_plist_from_pr(vrna_ep_t ** pl,FLT_OR_DBL * probs,int length,double cut_off)1823 assign_plist_from_pr(vrna_ep_t  **pl,
1824                      FLT_OR_DBL *probs,
1825                      int        length,
1826                      double     cut_off)
1827 {
1828   int               *index;
1829   vrna_mx_pf_t      *matrices;
1830   vrna_md_t         md;
1831   vrna_exp_param_t  *pf_params;
1832 
1833   index     = vrna_idx_row_wise(length);
1834   matrices  = (vrna_mx_pf_t *)vrna_alloc(sizeof(vrna_mx_pf_t));
1835 
1836   set_model_details(&md);
1837   md.gquad        = 0;
1838   pf_params       = vrna_exp_params(&md);
1839   matrices->probs = probs;
1840 
1841   *pl = wrap_get_plist(matrices,
1842                        length,
1843                        index,
1844                        NULL,
1845                        pf_params,
1846                        cut_off);
1847 
1848   free(index);
1849   free(pf_params);
1850   free(matrices);
1851 }
1852 
1853 
1854 PUBLIC void
assign_plist_from_db(vrna_ep_t ** pl,const char * struc,float pr)1855 assign_plist_from_db(vrna_ep_t  **pl,
1856                      const char *struc,
1857                      float      pr)
1858 {
1859   *pl = vrna_plist(struc, pr);
1860 }
1861 
1862 
1863 PUBLIC short *
make_pair_table(const char * structure)1864 make_pair_table(const char *structure)
1865 {
1866   return vrna_ptable(structure);
1867 }
1868 
1869 
1870 PUBLIC short *
copy_pair_table(const short * pt)1871 copy_pair_table(const short *pt)
1872 {
1873   return vrna_ptable_copy(pt);
1874 }
1875 
1876 
1877 PUBLIC short *
make_pair_table_pk(const char * structure)1878 make_pair_table_pk(const char *structure)
1879 {
1880   return vrna_pt_pk_get(structure);
1881 }
1882 
1883 
1884 PUBLIC short *
make_pair_table_snoop(const char * structure)1885 make_pair_table_snoop(const char *structure)
1886 {
1887   return vrna_pt_snoop_get(structure);
1888 }
1889 
1890 
1891 PUBLIC short *
alimake_pair_table(const char * structure)1892 alimake_pair_table(const char *structure)
1893 {
1894   return vrna_pt_ali_get(structure);
1895 }
1896 
1897 
1898 PUBLIC int *
make_loop_index_pt(short * pt)1899 make_loop_index_pt(short *pt)
1900 {
1901   return vrna_loopidx_from_ptable((const short *)pt);
1902 }
1903 
1904 
1905 PUBLIC int
bp_distance(const char * str1,const char * str2)1906 bp_distance(const char  *str1,
1907             const char  *str2)
1908 {
1909   return vrna_bp_distance(str1, str2);
1910 }
1911 
1912 
1913 PUBLIC unsigned int *
make_referenceBP_array(short * reference_pt,unsigned int turn)1914 make_referenceBP_array(short        *reference_pt,
1915                        unsigned int turn)
1916 {
1917   return vrna_refBPcnt_matrix((const short *)reference_pt, turn);
1918 }
1919 
1920 
1921 PUBLIC unsigned int *
compute_BPdifferences(short * pt1,short * pt2,unsigned int turn)1922 compute_BPdifferences(short         *pt1,
1923                       short         *pt2,
1924                       unsigned int  turn)
1925 {
1926   return vrna_refBPdist_matrix((const short *)pt1, (const short *)pt2, turn);
1927 }
1928 
1929 
1930 PUBLIC char
bppm_symbol(const float * x)1931 bppm_symbol(const float *x)
1932 {
1933   return vrna_bpp_symbol(x);
1934 }
1935 
1936 
1937 PUBLIC void
bppm_to_structure(char * structure,FLT_OR_DBL * p,unsigned int length)1938 bppm_to_structure(char          *structure,
1939                   FLT_OR_DBL    *p,
1940                   unsigned int  length)
1941 {
1942   char *s = vrna_db_from_probs((const FLT_OR_DBL *)p, length);
1943 
1944   memcpy(structure, s, length);
1945   structure[length] = '\0';
1946   free(s);
1947 }
1948 
1949 
1950 #endif
1951