1 /*
2  *                minimum free energy
3  *                RNA secondary structure prediction
4  *
5  *                c Ivo Hofacker, Chrisoph Flamm
6  *                original implementation by
7  *                Walter Fontana
8  *
9  *                Vienna RNA package
10  */
11 
12 #ifdef HAVE_CONFIG_H
13 #include "config.h"
14 #endif
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include <ctype.h>
20 #include <string.h>
21 #include <limits.h>
22 
23 #include "ViennaRNA/utils/basic.h"
24 #include "ViennaRNA/utils/strings.h"
25 #include "ViennaRNA/utils/structures.h"
26 #include "ViennaRNA/params/default.h"
27 #include "ViennaRNA/fold_vars.h"
28 #include "ViennaRNA/params/basic.h"
29 #include "ViennaRNA/subopt.h"
30 #include "ViennaRNA/fold.h"
31 #include "ViennaRNA/loops/all.h"
32 #include "ViennaRNA/gquad.h"
33 #include "ViennaRNA/alphabet.h"
34 #include "ViennaRNA/cofold.h"
35 
36 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
37 
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41 
42 #endif
43 
44 #define MAXSECTORS        500     /* dimension for a backtrack array */
45 
46 /*
47  #################################
48  # GLOBAL VARIABLES              #
49  #################################
50  */
51 
52 
53 /*
54  #################################
55  # PRIVATE VARIABLES             #
56  #################################
57  */
58 
59 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
60 
61 /* some backward compatibility stuff */
62 PRIVATE int                   backward_compat           = 0;
63 PRIVATE vrna_fold_compound_t  *backward_compat_compound = NULL;
64 
65 PRIVATE float                 mfe1, mfe2; /* minimum free energies of the monomers */
66 
67 #ifdef _OPENMP
68 
69 #pragma omp threadprivate(mfe1, mfe2, backward_compat_compound, backward_compat)
70 
71 #endif
72 
73 #endif
74 
75 /*
76  #################################
77  # PRIVATE FUNCTION DECLARATIONS #
78  #################################
79  */
80 
81 PRIVATE int
82 backtrack(sect                  bt_stack[],
83           vrna_bp_stack_t       *bp_list,
84           vrna_fold_compound_t  *vc);
85 
86 
87 PRIVATE int
88 fill_arrays(vrna_fold_compound_t  *vc,
89             int                   zuker);
90 
91 
92 PRIVATE void
93 free_end(int                  *array,
94          int                  i,
95          int                  start,
96          vrna_fold_compound_t *vc);
97 
98 
99 PRIVATE void
100 doubleseq(vrna_fold_compound_t *vc);                /* do magic */
101 
102 
103 PRIVATE void
104 halfseq(vrna_fold_compound_t *vc);                  /* undo magic */
105 
106 
107 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
108 
109 /* wrappers for old API compatibility */
110 PRIVATE void
111 wrap_array_export(int   **f5_p,
112                   int   **c_p,
113                   int   **fML_p,
114                   int   **fM1_p,
115                   int   **fc_p,
116                   int   **indx_p,
117                   char  **ptype_p);
118 
119 
120 PRIVATE float
121 wrap_cofold(const char    *string,
122             char          *structure,
123             vrna_param_t  *parameters,
124             int           is_constrained);
125 
126 
127 PRIVATE SOLUTION *
128 wrap_zukersubopt(const char   *string,
129                  vrna_param_t *parameters);
130 
131 
132 #endif
133 
134 /*
135  #################################
136  # BEGIN OF FUNCTION DEFINITIONS #
137  #################################
138  */
139 PUBLIC float
vrna_mfe_dimer(vrna_fold_compound_t * vc,char * structure)140 vrna_mfe_dimer(vrna_fold_compound_t *vc,
141                char                 *structure)
142 {
143   int             length, energy;
144   char            *s;
145   sect            bt_stack[MAXSECTORS]; /* stack of partial structures for backtracking */
146   vrna_bp_stack_t *bp;
147 
148   length = (int)vc->length;
149 
150   vc->sequence_encoding[0] = vc->sequence_encoding2[0]; /* store length at pos. 0 in S1 too */
151 
152   if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_HYBRID)) {
153     vrna_message_warning("vrna_mfe_dimer@cofold.c: Failed to prepare vrna_fold_compound");
154     return (float)(INF / 100.);
155   }
156 
157   /* call user-defined recursion status callback function */
158   if (vc->stat_cb)
159     vc->stat_cb(VRNA_STATUS_MFE_PRE, vc->auxdata);
160 
161   energy = fill_arrays(vc, 0);
162 
163   /* call user-defined recursion status callback function */
164   if (vc->stat_cb)
165     vc->stat_cb(VRNA_STATUS_MFE_POST, vc->auxdata);
166 
167   if (structure && vc->params->model_details.backtrack) {
168     bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2))); /* add a guess of how many G's may be involved in a G quadruplex */
169 
170     backtrack(bt_stack, bp, vc);
171 
172     s = vrna_db_from_bp_stack(bp, length);
173     strncpy(structure, s, length + 1);
174     free(s);
175     free(bp);
176   }
177 
178   if (vc->params->model_details.backtrack_type == 'C')
179     return (float)vc->matrices->c[vc->jindx[length] + 1] / 100.;
180   else if (vc->params->model_details.backtrack_type == 'M')
181     return (float)vc->matrices->fML[vc->jindx[length] + 1] / 100.;
182   else
183     return (float)energy / 100.;
184 }
185 
186 
187 PRIVATE int
fill_arrays(vrna_fold_compound_t * vc,int zuker)188 fill_arrays(vrna_fold_compound_t  *vc,
189             int                   zuker)
190 {
191   /* fill "c", "fML" and "f5" arrays and return  optimal energy */
192 
193   unsigned int  strands, *sn, *ss, *se, *so;
194   int           i, j, length, energy;
195   int           uniq_ML;
196   int           no_close, type, maxj, *indx;
197   int           *my_f5, *my_c, *my_fML, *my_fM1, *my_fc;
198   int           *cc, *cc1;  /* auxilary arrays for canonical structures     */
199   int           *Fmi;       /* holds row i of fML (avoids jumps in memory)  */
200   int           *DMLi;      /* DMLi[j] holds  MIN(fML[i,k]+fML[k+1,j])      */
201   int           *DMLi1;     /*                MIN(fML[i+1,k]+fML[k+1,j])    */
202   int           *DMLi2;     /*                MIN(fML[i+2,k]+fML[k+1,j])    */
203 
204   int           dangle_model, noGUclosure, noLP, hc_decompose, turn;
205   char          *ptype;
206   unsigned char *hard_constraints;
207   vrna_param_t  *P;
208   vrna_mx_mfe_t *matrices;
209   vrna_hc_t     *hc;
210 
211   length            = (int)vc->length;
212   ptype             = vc->ptype;
213   indx              = vc->jindx;
214   P                 = vc->params;
215   dangle_model      = P->model_details.dangles;
216   noGUclosure       = P->model_details.noGUclosure;
217   noLP              = P->model_details.noLP;
218   uniq_ML           = P->model_details.uniq_ML;
219   strands           = vc->strands;
220   sn                = vc->strand_number;
221   ss                = vc->strand_start;
222   se                = vc->strand_end;
223   so                = vc->strand_order;
224   hc                = vc->hc;
225   hard_constraints  = hc->mx;
226   matrices          = vc->matrices;
227   my_f5             = matrices->f5;
228   my_c              = matrices->c;
229   my_fML            = matrices->fML;
230   my_fM1            = matrices->fM1;
231   my_fc             = matrices->fc;
232   turn              = P->model_details.min_loop_size;
233 
234   /* allocate memory for all helper arrays */
235   cc    = (int *)vrna_alloc(sizeof(int) * (length + 2));
236   cc1   = (int *)vrna_alloc(sizeof(int) * (length + 2));
237   Fmi   = (int *)vrna_alloc(sizeof(int) * (length + 1));
238   DMLi  = (int *)vrna_alloc(sizeof(int) * (length + 1));
239   DMLi1 = (int *)vrna_alloc(sizeof(int) * (length + 1));
240   DMLi2 = (int *)vrna_alloc(sizeof(int) * (length + 1));
241 
242 
243   /* hard code min_loop_size to 0, since we can not be sure yet that this is already the case */
244   turn = 0;
245 
246   for (j = 1; j <= length; j++) {
247     Fmi[j]    = DMLi[j] = DMLi1[j] = DMLi2[j] = INF;
248     my_fc[j]  = 0;
249   }
250 
251   for (j = 1; j <= length; j++)
252     for (i = 1; i <= j; i++) {
253       my_c[indx[j] + i] = my_fML[indx[j] + i] = INF;
254       if (uniq_ML)
255         my_fM1[indx[j] + i] = INF;
256     }
257 
258   for (i = length - turn - 1; i >= 1; i--) {
259     /* i,j in [1..length] */
260 
261     maxj = (zuker) ? (MIN2(i + se[so[0]], length)) : length;
262     for (j = i + turn + 1; j <= maxj; j++) {
263       int ij;
264       ij            = indx[j] + i;
265       type          = vrna_get_ptype(ij, ptype);
266       hc_decompose  = hard_constraints[length * i + j];
267       energy        = INF;
268 
269       no_close = (((type == 3) || (type == 4)) && noGUclosure);
270 
271       if (hc_decompose) {
272         /* we have a pair */
273         int new_c = INF;
274 
275         if (!no_close) {
276           /* check for hairpin loop */
277           energy  = vrna_E_hp_loop(vc, i, j);
278           new_c   = MIN2(new_c, energy);
279 
280           /* check for multibranch loops */
281           energy  = vrna_E_mb_loop_fast(vc, i, j, DMLi1, DMLi2);
282           new_c   = MIN2(new_c, energy);
283         }
284 
285         if (dangle_model == 3) {
286           /* coaxial stacking */
287           energy  = vrna_E_mb_loop_stack(vc, i, j);
288           new_c   = MIN2(new_c, energy);
289         }
290 
291         /* check for interior loops */
292         energy  = vrna_E_int_loop(vc, i, j);
293         new_c   = MIN2(new_c, energy);
294 
295         /* remember stack energy for --noLP option */
296         if (noLP) {
297           if ((sn[i] == sn[i + 1]) && (sn[j - 1] == sn[j])) {
298             int stackEnergy = vrna_E_stack(vc, i, j);
299             new_c     = MIN2(new_c, cc1[j - 1] + stackEnergy);
300             my_c[ij]  = cc1[j - 1] + stackEnergy;
301           } else {
302             /* currently we don't allow stacking over the cut point */
303             my_c[ij] = FORBIDDEN;
304           }
305 
306           cc[j] = new_c;
307         } else {
308           my_c[ij] = new_c;
309         }
310       } /* end >> if (pair) << */
311       else {
312         my_c[ij] = INF;
313       }
314 
315       /*
316        * done with c[i,j], now compute fML[i,j]
317        * free ends ? -----------------------------------------
318        */
319 
320       my_fML[ij] = vrna_E_ml_stems_fast(vc, i, j, Fmi, DMLi);
321 
322       if (uniq_ML)   /* compute fM1 for unique decomposition */
323         my_fM1[ij] = E_ml_rightmost_stem(i, j, vc);
324     }
325 
326     if (i == se[so[0]] + 1)
327       for (j = i; j <= maxj; j++)
328         free_end(my_fc, j, ss[so[1]], vc);
329 
330     if (i <= se[so[0]])
331       free_end(my_fc, i, se[so[0]], vc);
332 
333     {
334       int *FF; /* rotate the auxilliary arrays */
335       FF    = DMLi2;
336       DMLi2 = DMLi1;
337       DMLi1 = DMLi;
338       DMLi  = FF;
339       FF    = cc1;
340       cc1   = cc;
341       cc    = FF;
342       for (j = 1; j <= maxj; j++)
343         cc[j] = Fmi[j] = DMLi[j] = INF;
344     }
345   }
346 
347   /* calculate energies of 5' and 3' fragments */
348 
349   for (i = 1; i <= length; i++)
350     free_end(my_f5, i, 1, vc);
351 
352   if (strands > 1) {
353     mfe1  = my_f5[se[so[0]]];
354     mfe2  = my_fc[length];
355     /* add DuplexInit, check whether duplex*/
356     for (i = ss[so[1]]; i <= length; i++) {
357       if (my_f5[i] != INF)
358         my_f5[i] += P->DuplexInit;
359 
360       if ((my_fc[i] != INF) &&
361           (my_fc[1] != INF)) {
362         energy  = my_fc[i] +
363                   my_fc[1];
364 
365         if (energy < my_f5[i])
366           my_f5[i] = energy;
367       }
368     }
369   }
370 
371   energy = my_f5[length];
372   if (strands == 1)
373     mfe1 = mfe2 = energy;
374 
375   /* clean up memory */
376   free(cc);
377   free(cc1);
378   free(Fmi);
379   free(DMLi);
380   free(DMLi1);
381   free(DMLi2);
382 
383   return energy;
384 }
385 
386 
387 PRIVATE int
backtrack_co(sect bt_stack[],vrna_bp_stack_t * bp_list,int s,int b,vrna_fold_compound_t * vc)388 backtrack_co(sect                 bt_stack[],
389              vrna_bp_stack_t      *bp_list,
390              int                  s,
391              int                  b, /* b=0: start new structure, b \ne 0: add to existing structure */
392              vrna_fold_compound_t *vc)
393 {
394   /*------------------------------------------------------------------
395    *  trace back through the "c", "fc", "f5" and "fML" arrays to get the
396    *  base pairing list. No search for equivalent structures is done.
397    *  This is fast, since only few structure elements are recalculated.
398    *  ------------------------------------------------------------------*/
399 
400   unsigned int  *se, *so;
401   int           i, j, ij, k, length, no_close, type, ret;
402   char          *string = vc->sequence;
403   vrna_param_t  *P      = vc->params;
404   int           *indx   = vc->jindx;
405   char          *ptype  = vc->ptype;
406 
407   int           noLP            = P->model_details.noLP;
408   int           noGUclosure     = P->model_details.noGUclosure;
409   char          backtrack_type  = P->model_details.backtrack_type;
410 
411   /* the folding matrices */
412   int           *my_c;
413 
414   ret     = 1;
415   length  = vc->length;
416   my_c    = vc->matrices->c;
417   se      = vc->strand_end;
418   so      = vc->strand_order;
419 
420   /* int   b=0;*/
421 
422   length = strlen(string);
423   if (s == 0) {
424     bt_stack[++s].i = 1;
425     bt_stack[s].j   = length;
426     bt_stack[s].ml  = (backtrack_type == 'M') ? 1 : ((backtrack_type == 'C') ? 2 : 0);
427   }
428 
429   while (s > 0) {
430     int ml, cij;
431     int canonical = 1;     /* (i,j) closes a canonical structure */
432 
433     /* pop one element from stack */
434     i   = bt_stack[s].i;
435     j   = bt_stack[s].j;
436     ml  = bt_stack[s--].ml;
437 
438     switch (ml) {
439       /* backtrack in f5 */
440       case 0:
441       {
442         int p, q;
443         if (vrna_BT_ext_loop_f5(vc, &j, &p, &q, bp_list, &b)) {
444           if (j > 0) {
445             bt_stack[++s].i = 1;
446             bt_stack[s].j   = j;
447             bt_stack[s].ml  = 0;
448           }
449 
450           if (p > 0) {
451             i = p;
452             j = q;
453             goto repeat1;
454           }
455 
456           continue;
457         } else {
458           vrna_message_warning("backtracking failed in f5, segment [%d,%d]\n", i, j);
459           ret = 0;
460           goto backtrack_exit;
461         }
462       }
463       break;
464 
465       /* true multi-loop backtrack in fML */
466       case 1:
467       {
468         int p, q, comp1, comp2;
469         if (vrna_BT_mb_loop_split(vc, &i, &j, &p, &q, &comp1, &comp2, bp_list, &b)) {
470           if (i > 0) {
471             bt_stack[++s].i = i;
472             bt_stack[s].j   = j;
473             bt_stack[s].ml  = comp1;
474           }
475 
476           if (p > 0) {
477             bt_stack[++s].i = p;
478             bt_stack[s].j   = q;
479             bt_stack[s].ml  = comp2;
480           }
481 
482           continue;
483         } else {
484           vrna_message_warning("backtrack failed in fML\n%s", string);
485           ret = 0;
486           goto backtrack_exit;
487         }
488       }
489       break;
490 
491       case 2:
492         bp_list[++b].i  = i;
493         bp_list[b].j    = j;
494         goto repeat1;
495 
496       /* backtrack fake-multi loop parts */
497       case 3:
498       case 4:
499       {
500         int lower, k, p, q;
501         p     = i;
502         q     = j;
503         lower = (i <= se[so[0]]) ? 1 : 0;
504 
505         if (vrna_BT_mb_loop_fake(vc, &k, &i, &j, bp_list, &b)) {
506           if (k > 0) {
507             bt_stack[++s].i = (lower) ? k : p;
508             bt_stack[s].j   = (lower) ? q : k;
509             bt_stack[s].ml  = ml;
510           }
511 
512           if (i > 0)
513             goto repeat1;
514 
515           continue;
516         } else {
517           vrna_message_warning("backtrack failed in fc\n%s", string);
518           ret = 0;
519           goto backtrack_exit;
520         }
521       }
522       break;
523     } /* end of switch(ml) */
524 
525 repeat1:
526 
527     /*----- begin of "repeat:" -----*/
528     ij = indx[j] + i;
529 
530     if (canonical)
531       cij = my_c[ij];
532 
533     type = vrna_get_ptype(ij, ptype);
534 
535     if (noLP) {
536       if (vrna_BT_stack(vc, &i, &j, &cij, bp_list, &b)) {
537         canonical = 0;
538         goto repeat1;
539       }
540     }
541 
542     canonical = 1;
543 
544     no_close = (((type == 3) || (type == 4)) && noGUclosure);
545     if (no_close) {
546       if (cij == FORBIDDEN)
547         continue;
548     } else {
549       if (vrna_BT_hp_loop(vc, i, j, cij, bp_list, &b))
550         continue;
551     }
552 
553     if (vrna_BT_int_loop(vc, &i, &j, cij, bp_list, &b)) {
554       if (i < 0)
555         continue;
556       else
557         goto repeat1;
558     }
559 
560     /* (i.j) must close a fake or true multi-loop */
561     int comp1, comp2;
562 
563     if (vrna_BT_mb_loop(vc, &i, &j, &k, cij, &comp1, &comp2)) {
564       bt_stack[++s].i = i;
565       bt_stack[s].j   = k;
566       bt_stack[s].ml  = comp1;
567       bt_stack[++s].i = k + 1;
568       bt_stack[s].j   = j;
569       bt_stack[s].ml  = comp2;
570     } else {
571       vrna_message_warning("backtracking failed in repeat, segment [%d,%d]\n", i, j);
572       ret = 0;
573       goto backtrack_exit;
574     }
575 
576     /* end of repeat: --------------------------------------------------*/
577   } /* end >> while (s>0) << */
578 
579 backtrack_exit:
580 
581   bp_list[0].i = b;    /* save the total number of base pairs */
582   return ret;
583 }
584 
585 
586 PRIVATE void
free_end(int * array,int i,int start,vrna_fold_compound_t * vc)587 free_end(int                  *array,
588          int                  i,
589          int                  start,
590          vrna_fold_compound_t *vc)
591 {
592   unsigned int  *sn;
593   int           inc, type, energy, en, length, j, left, right, dangle_model, with_gquad, *indx,
594                 *c, *ggg, turn;
595   vrna_param_t  *P;
596   short         *S1;
597   char          *ptype;
598   unsigned char *hard_constraints;
599   vrna_mx_mfe_t *matrices;
600   vrna_hc_t     *hc;
601   vrna_sc_t     *sc;
602 
603   P                 = vc->params;
604   dangle_model      = P->model_details.dangles;
605   with_gquad        = P->model_details.gquad;
606   turn              = P->model_details.min_loop_size;
607   inc               = (i > start) ? 1 : -1;
608   length            = (int)vc->length;
609   S1                = vc->sequence_encoding;
610   ptype             = vc->ptype;
611   indx              = vc->jindx;
612   sn                = vc->strand_number;
613   matrices          = vc->matrices;
614   c                 = matrices->c;
615   ggg               = matrices->ggg;
616   hc                = vc->hc;
617   sc                = vc->sc;
618   hard_constraints  = hc->mx;
619 
620   if (hc->up_ext[i]) {
621     if (i == start)
622       array[i] = 0;
623     else
624       array[i] = array[i - inc];
625 
626     if (sc) {
627       if (sc->energy_up)
628         array[i] += sc->energy_up[i][1];
629 
630       if (sc->f)
631         array[i] += sc->f(start, i, start, i - 1, VRNA_DECOMP_EXT_EXT, sc->data);
632     }
633   } else {
634     array[i] = INF;
635   }
636 
637   if (inc > 0) {
638     left  = start;
639     right = i;
640   } else {
641     left  = i;
642     right = start;
643   }
644 
645   /* hard code min_loop_size to 0, since we can not be sure yet that this is already the case */
646   turn = 0;
647 
648   for (j = start; inc * (i - j) > turn; j += inc) {
649     int   ii, jj;
650     short si, sj;
651     if (i > j) {
652       ii  = j;
653       jj  = i;
654     }                           /* inc>0 */
655     else {
656       ii  = i;
657       jj  = j;
658     }                           /* inc<0 */
659 
660     if (hard_constraints[length * ii + jj] & VRNA_CONSTRAINT_CONTEXT_EXT_LOOP) {
661       type    = vrna_get_ptype(indx[jj] + ii, ptype);
662       si      = ((ii > 1) && (sn[ii - 1] == sn[ii])) ? S1[ii - 1] : -1;
663       sj      = ((jj < length) && (sn[jj] == sn[jj + 1])) ? S1[jj + 1] : -1;
664       energy  = c[indx[jj] + ii];
665 
666       if ((sc) && (sc->f))
667         energy += sc->f(start, jj, ii - 1, ii, VRNA_DECOMP_EXT_EXT_STEM, sc->data);
668 
669       if (energy != INF) {
670         switch (dangle_model) {
671           case 0:
672             if (array[j - inc] != INF) {
673               en        = array[j - inc] + energy + vrna_E_ext_stem(type, -1, -1, P);
674               array[i]  = MIN2(array[i], en);
675             }
676 
677             break;
678           case 2:
679             if (array[j - inc] != INF) {
680               en        = array[j - inc] + energy + vrna_E_ext_stem(type, si, sj, P);
681               array[i]  = MIN2(array[i], en);
682             }
683 
684             break;
685           default:
686             if (array[j - inc] != INF) {
687               en        = array[j - inc] + energy + vrna_E_ext_stem(type, -1, -1, P);
688               array[i]  = MIN2(array[i], en);
689             }
690 
691             if (inc > 0) {
692               if (j > left) {
693                 if (hc->up_ext[ii - 1]) {
694                   if (array[j - 2] != INF) {
695                     en = array[j - 2] + energy + vrna_E_ext_stem(type, si, -1, P);
696                     if (sc)
697                       if (sc->energy_up)
698                         en += sc->energy_up[ii - 1][1];
699 
700                     array[i] = MIN2(array[i], en);
701                   }
702                 }
703               }
704             } else if (j < right) {
705               if (hc->up_ext[jj + 1]) {
706                 if (array[j + 2] != INF) {
707                   en = array[j + 2] + energy + vrna_E_ext_stem(type, -1, sj, P);
708                   if (sc)
709                     if (sc->energy_up)
710                       en += sc->energy_up[jj + 1][1];
711 
712                   array[i] = MIN2(array[i], en);
713                 }
714               }
715             }
716 
717             break;
718         }
719       }
720     }
721 
722     if (with_gquad) {
723       if (sn[ii] == sn[jj])
724         if (array[j - inc] != INF)
725           array[i] = MIN2(array[i], array[j - inc] + ggg[indx[jj] + ii]);
726     }
727 
728     if (dangle_model % 2 == 1) {
729       /* interval ends in a dangle (i.e. i-inc is paired) */
730       if (i > j) {
731         ii  = j;
732         jj  = i - 1;
733       }                             /* inc>0 */
734       else {
735         ii  = i + 1;
736         jj  = j;
737       }                             /* inc<0 */
738 
739       if (!(hard_constraints[length * ii + jj] & VRNA_CONSTRAINT_CONTEXT_EXT_LOOP))
740         continue;
741 
742       type    = vrna_get_ptype(indx[jj] + ii, ptype);
743       si      = (ii > left) && (sn[ii - 1] == sn[ii]) ? S1[ii - 1] : -1;
744       sj      = (jj < right) && (sn[jj] == sn[jj + 1]) ? S1[jj + 1] : -1;
745       energy  = c[indx[jj] + ii];
746       if (energy != INF) {
747         if (inc > 0) {
748           if (hc->up_ext[jj - 1]) {
749             if (array[j - inc] != INF) {
750               en = array[j - inc] + energy + vrna_E_ext_stem(type, -1, sj, P);
751               if (sc)
752                 if (sc->energy_up)
753                   en += sc->energy_up[jj + 1][1];
754 
755               array[i] = MIN2(array[i], en);
756             }
757           }
758         } else {
759           if (hc->up_ext[ii - 1]) {
760             if (array[j - inc] != INF) {
761               en = array[j - inc] + energy + vrna_E_ext_stem(type, si, -1, P);
762               if (sc)
763                 if (sc->energy_up)
764                   en += sc->energy_up[ii - 1][1];
765 
766               array[i] = MIN2(array[i], en);
767             }
768           }
769         }
770 
771         if (j != start) {
772           /* dangle_model on both sides */
773           if (hc->up_ext[jj - 1] && hc->up_ext[ii - 1]) {
774             if (array[j - 2 * inc] != INF) {
775               en = array[j - 2 * inc] + energy + vrna_E_ext_stem(type, si, sj, P);
776               if (sc)
777                 if (sc->energy_up)
778                   en += sc->energy_up[ii - 1][1] + sc->energy_up[jj + 1][1];
779 
780               array[i] = MIN2(array[i], en);
781             }
782           }
783         }
784       }
785     }
786   }
787 }
788 
789 
790 PRIVATE int
backtrack(sect bt_stack[],vrna_bp_stack_t * bp_list,vrna_fold_compound_t * vc)791 backtrack(sect                  bt_stack[],
792           vrna_bp_stack_t       *bp_list,
793           vrna_fold_compound_t  *vc)
794 {
795   /*routine to call backtrack_co from 1 to n, backtrack type??*/
796   return backtrack_co(bt_stack, bp_list, 0, 0, vc);
797 }
798 
799 
800 PRIVATE void
doubleseq(vrna_fold_compound_t * vc)801 doubleseq(vrna_fold_compound_t *vc)
802 {
803   unsigned int length, i, s;
804 
805   length = vc->length;
806 
807   /* do some magic to re-use cofold code */
808   vc->sequence = vrna_realloc(vc->sequence, sizeof(char) * (2 * length + 2));
809   memcpy(vc->sequence + length, vc->sequence, sizeof(char) * length);
810   vc->sequence[2 * length]  = '\0';
811   vc->length                = (unsigned int)strlen(vc->sequence);
812   vc->cutpoint              = length + 1;
813 
814   vc->strands = 2;
815 
816   free(vc->strand_number);
817   vc->strand_number = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->length + 1));
818   for (s = i = 0; i <= vc->length; i++) {
819     if (i == length + 1)
820       s++;
821 
822     vc->strand_number[i] = s;
823   }
824 
825   free(vc->strand_order);
826   free(vc->strand_start);
827   free(vc->strand_end);
828   vc->strand_order    = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->strands + 1));
829   vc->strand_start    = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->strands + 1));
830   vc->strand_end      = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->strands + 1));
831   vc->strand_order[0] = 0;
832   vc->strand_order[1] = 1;
833   vc->strand_start[0] = 1;
834   vc->strand_end[0]   = vc->strand_start[0] + length - 1;
835   vc->strand_start[1] = vc->strand_end[0] + 1;
836   vc->strand_end[1]   = vc->strand_start[1] + length - 1;
837 
838 
839   vc->sequence_encoding = vrna_realloc(vc->sequence_encoding, sizeof(short) * (vc->length + 2));
840   memcpy(vc->sequence_encoding + length + 1, vc->sequence_encoding + 1, sizeof(short) * length);
841   vc->sequence_encoding[0]              = vc->sequence_encoding[vc->length];
842   vc->sequence_encoding[vc->length + 1] = vc->sequence_encoding[1];
843 
844   vc->sequence_encoding2 = vrna_realloc(vc->sequence_encoding2, sizeof(short) * (vc->length + 2));
845   memcpy(vc->sequence_encoding2 + length + 1, vc->sequence_encoding2 + 1, sizeof(short) * length);
846   vc->sequence_encoding2[0]               = vc->length;
847   vc->sequence_encoding2[vc->length + 1]  = 0;
848 
849   free(vc->ptype);
850   vc->ptype = vrna_ptypes(vc->sequence_encoding2, &(vc->params->model_details));
851   free(vc->iindx);
852   vc->iindx = vrna_idx_row_wise(vc->length);
853   free(vc->jindx);
854   vc->jindx = vrna_idx_col_wise(vc->length);
855 
856   vrna_hc_init(vc);
857 
858   /* add DP matrices */
859   vrna_mx_mfe_add(vc, VRNA_MX_DEFAULT, 0);
860 }
861 
862 
863 PRIVATE void
halfseq(vrna_fold_compound_t * vc)864 halfseq(vrna_fold_compound_t *vc)
865 {
866   unsigned int halflength;
867 
868   halflength = vc->length / 2;
869 
870   vc->sequence              = vrna_realloc(vc->sequence, sizeof(char) * (halflength + 1));
871   vc->sequence[halflength]  = '\0';
872   vc->length                = (unsigned int)strlen(vc->sequence);
873   vc->cutpoint              = -1;
874   vc->strands               = 1;
875   vc->strand_number         = (unsigned int *)vrna_realloc(vc->strand_number,
876                                                            sizeof(unsigned int) * (vc->length + 1));
877   vc->strand_order = (unsigned int *)vrna_realloc(vc->strand_order,
878                                                   sizeof(unsigned int) * (vc->strands + 1));
879   vc->strand_start = (unsigned int *)vrna_realloc(vc->strand_start,
880                                                   sizeof(unsigned int) * (vc->strands + 1));
881   vc->strand_end = (unsigned int *)vrna_realloc(vc->strand_end,
882                                                 sizeof(unsigned int) * (vc->strands + 1));
883 
884   vc->sequence_encoding =
885     vrna_realloc(vc->sequence_encoding, sizeof(short) * (vc->length + 2));
886   vc->sequence_encoding[0]              = vc->sequence_encoding[vc->length];
887   vc->sequence_encoding[vc->length + 1] = vc->sequence_encoding[1];
888 
889   vc->sequence_encoding2 =
890     vrna_realloc(vc->sequence_encoding2, sizeof(short) * (vc->length + 2));
891   vc->sequence_encoding2[0]               = vc->length;
892   vc->sequence_encoding2[vc->length + 1]  = 0;
893 
894   free(vc->ptype);
895   vc->ptype = vrna_ptypes(vc->sequence_encoding2, &(vc->params->model_details));
896   free(vc->iindx);
897   vc->iindx = vrna_idx_row_wise(vc->length);
898   free(vc->jindx);
899   vc->jindx = vrna_idx_col_wise(vc->length);
900 
901   vrna_hc_init(vc);
902 
903   /* add DP matrices */
904   vrna_mx_mfe_add(vc, VRNA_MX_DEFAULT, 0);
905 }
906 
907 
908 typedef struct {
909   int i;
910   int j;
911   int e;
912   int idxj;
913 } zuker_pair;
914 
915 PRIVATE int
comp_pair(const void * A,const void * B)916 comp_pair(const void  *A,
917           const void  *B)
918 {
919   zuker_pair  *x, *y;
920   int         ex, ey;
921 
922   x   = (zuker_pair *)A;
923   y   = (zuker_pair *)B;
924   ex  = x->e;
925   ey  = y->e;
926   if (ex > ey)
927     return 1;
928 
929   if (ex < ey)
930     return -1;
931 
932   return x->idxj + x->i - y->idxj + y->i;
933 }
934 
935 
936 PUBLIC SOLUTION *
vrna_subopt_zuker(vrna_fold_compound_t * vc)937 vrna_subopt_zuker(vrna_fold_compound_t *vc)
938 {
939   /* Compute zuker suboptimal. Here, we're abusing the cofold() code
940    * "double" sequence, compute dimerarray entries, track back every base pair.
941    * This is slightly wasteful compared to the normal solution */
942 
943   char            *structure, *mfestructure, **todo, *ptype;
944   int             i, j, counter, num_pairs, psize, p, *indx, *c, turn;
945   unsigned int    length, doublelength;
946   float           energy;
947   SOLUTION        *zukresults;
948   vrna_bp_stack_t *bp_list;
949   zuker_pair      *pairlist;
950   sect            bt_stack[MAXSECTORS]; /* stack of partial structures for backtracking */
951   vrna_mx_mfe_t   *matrices;
952   vrna_md_t       *md;
953 
954   md    = &(vc->params->model_details);
955   turn  = md->min_loop_size;
956 
957   /* do some magic to re-use cofold code although vc is single sequence */
958   md->min_loop_size = 0;
959   doubleseq(vc);
960 
961   if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_HYBRID)) {
962     vrna_message_warning("vrna_subopt_zuker@cofold.c: Failed to prepare vrna_fold_compound");
963     return NULL;
964   }
965 
966   doublelength    = vc->length;
967   length          = doublelength / 2;
968   indx            = vc->jindx;
969   ptype           = vc->ptype;
970   matrices        = vc->matrices;
971   c               = matrices->c;
972   num_pairs       = counter = 0;
973   mfestructure    = (char *)vrna_alloc((unsigned)doublelength + 1);
974   structure       = (char *)vrna_alloc((unsigned)doublelength + 1);
975   zukresults      = (SOLUTION *)vrna_alloc(((length * (length - 1)) / 2) * sizeof(SOLUTION));
976   mfestructure[0] = '\0';
977 
978   /* store length at pos. 0 */
979   vc->sequence_encoding[0] = vc->sequence_encoding2[0];
980 
981   /* get mfe and do forward recursion */
982   (void)fill_arrays(vc, 1);
983 
984   psize     = length;
985   pairlist  = (zuker_pair *)vrna_alloc(sizeof(zuker_pair) * (psize + 1));
986   bp_list   = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (1 + length / 2));
987   todo      = (char **)vrna_alloc(sizeof(char *) * (length + 1));
988   for (i = 1; i < length; i++)
989     todo[i] = (char *)vrna_alloc(sizeof(char) * (length + 1));
990 
991   /* Make a list of all base pairs */
992   for (i = 1; i < length; i++) {
993     for (j = i + turn + 1 /*??*/; j <= length; j++) {
994       if (ptype[indx[j] + i] == 0)
995         continue;
996 
997       if (num_pairs >= psize) {
998         psize     = 1.2 * psize + 32;
999         pairlist  = vrna_realloc(pairlist, sizeof(zuker_pair) * (psize + 1));
1000       }
1001 
1002       pairlist[num_pairs].i       = i;
1003       pairlist[num_pairs].j       = j;
1004       pairlist[num_pairs].e       = c[indx[j] + i] + c[indx[i + length] + j];
1005       pairlist[num_pairs++].idxj  = indx[j];
1006 
1007       todo[i][j] = 1;
1008     }
1009   }
1010 
1011   qsort(pairlist, num_pairs, sizeof(zuker_pair), comp_pair);
1012 
1013   for (p = 0; p < num_pairs; p++) {
1014     i = pairlist[p].i;
1015     j = pairlist[p].j;
1016     if (todo[i][j]) {
1017       int   k;
1018       char  *sz;
1019       bt_stack[1].i   = i;
1020       bt_stack[1].j   = j;
1021       bt_stack[1].ml  = 2;
1022       backtrack_co(bt_stack, bp_list, 1, 0, vc);
1023       bt_stack[1].i   = j;
1024       bt_stack[1].j   = i + length;
1025       bt_stack[1].ml  = 2;
1026       backtrack_co(bt_stack, bp_list, 1, bp_list[0].i, vc);
1027       energy                          = pairlist[p].e;
1028       sz                              = vrna_db_from_bp_stack(bp_list, length);
1029       zukresults[counter].energy      = energy / 100.;
1030       zukresults[counter++].structure = sz;
1031       for (k = 1; k <= bp_list[0].i; k++) {
1032         /* mark all pairs in structure as done */
1033         int x, y;
1034         x = bp_list[k].i;
1035         y = bp_list[k].j;
1036         if (x > length)
1037           x -= length;
1038 
1039         if (y > length)
1040           y -= length;
1041 
1042         if (x > y) {
1043           int temp;
1044           temp  = x;
1045           x     = y;
1046           y     = temp;
1047         }
1048 
1049         todo[x][y] = 0;
1050       }
1051     }
1052   }
1053 
1054   /* clean up */
1055   free(pairlist);
1056   for (i = 1; i < length; i++)
1057     free(todo[i]);
1058   free(todo);
1059   free(structure);
1060   free(mfestructure);
1061   free(bp_list);
1062 
1063   /* undo magic */
1064   halfseq(vc);
1065   md->min_loop_size = turn;
1066 
1067   return zukresults;
1068 }
1069 
1070 
1071 /*
1072  *###########################################
1073  *# deprecated functions below              #
1074  *###########################################
1075  */
1076 
1077 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
1078 
1079 PRIVATE void
wrap_array_export(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** fc_p,int ** indx_p,char ** ptype_p)1080 wrap_array_export(int   **f5_p,
1081                   int   **c_p,
1082                   int   **fML_p,
1083                   int   **fM1_p,
1084                   int   **fc_p,
1085                   int   **indx_p,
1086                   char  **ptype_p)
1087 {
1088   /* make the DP arrays available to routines such as subopt() */
1089   if (backward_compat_compound) {
1090     *f5_p     = backward_compat_compound->matrices->f5;
1091     *c_p      = backward_compat_compound->matrices->c;
1092     *fML_p    = backward_compat_compound->matrices->fML;
1093     *fM1_p    = backward_compat_compound->matrices->fM1;
1094     *fc_p     = backward_compat_compound->matrices->fc;
1095     *indx_p   = backward_compat_compound->jindx;
1096     *ptype_p  = backward_compat_compound->ptype;
1097   }
1098 }
1099 
1100 
1101 /*--------------------------------------------------------------------------*/
1102 
1103 PRIVATE float
wrap_cofold(const char * string,char * structure,vrna_param_t * parameters,int is_constrained)1104 wrap_cofold(const char    *string,
1105             char          *structure,
1106             vrna_param_t  *parameters,
1107             int           is_constrained)
1108 {
1109   unsigned int          length;
1110   char                  *seq;
1111   vrna_fold_compound_t  *vc;
1112   vrna_param_t          *P;
1113   float                 mfe;
1114 
1115   vc      = NULL;
1116   length  = strlen(string);
1117 
1118 #ifdef _OPENMP
1119   /* Explicitly turn off dynamic threads */
1120   omp_set_dynamic(0);
1121 #endif
1122 
1123   /* we need the parameter structure for hard constraints */
1124   if (parameters) {
1125     P = vrna_params_copy(parameters);
1126   } else {
1127     vrna_md_t md;
1128     set_model_details(&md);
1129     md.temperature  = temperature;
1130     P               = vrna_params(&md);
1131   }
1132 
1133   P->model_details.min_loop_size = 0;  /* set min loop length to 0 */
1134 
1135   /* dirty hack to reinsert the '&' according to the global variable 'cut_point' */
1136   seq = vrna_cut_point_insert(string, cut_point);
1137 
1138   /* get compound structure */
1139   vc = vrna_fold_compound(seq, &(P->model_details), 0);
1140 
1141   if (parameters) {
1142     /* replace params if necessary */
1143     free(vc->params);
1144     vc->params = P;
1145   } else {
1146     free(P);
1147   }
1148 
1149   /* handle hard constraints in pseudo dot-bracket format if passed via simple interface */
1150   if (is_constrained && structure) {
1151     unsigned int constraint_options = 0;
1152     constraint_options |= VRNA_CONSTRAINT_DB
1153                           | VRNA_CONSTRAINT_DB_PIPE
1154                           | VRNA_CONSTRAINT_DB_DOT
1155                           | VRNA_CONSTRAINT_DB_X
1156                           | VRNA_CONSTRAINT_DB_ANG_BRACK
1157                           | VRNA_CONSTRAINT_DB_RND_BRACK
1158                           | VRNA_CONSTRAINT_DB_INTRAMOL
1159                           | VRNA_CONSTRAINT_DB_INTERMOL;
1160 
1161     vrna_constraints_add(vc, (const char *)structure, constraint_options);
1162   }
1163 
1164   if (backward_compat_compound)
1165     vrna_fold_compound_free(backward_compat_compound);
1166 
1167   backward_compat_compound  = vc;
1168   backward_compat           = 1;
1169 
1170   /* cleanup */
1171   free(seq);
1172 
1173   /* call mfe_dimer without backtracing */
1174   mfe = vrna_mfe_dimer(vc, NULL);
1175 
1176   /* now we backtrace in a backward compatible way */
1177   if (structure && vc->params->model_details.backtrack) {
1178     char            *s;
1179     sect            bt_stack[MAXSECTORS];
1180     vrna_bp_stack_t *bp;
1181 
1182     bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2))); /* add a guess of how many G's may be involved in a G quadruplex */
1183 
1184     backtrack(bt_stack, bp, vc);
1185 
1186     s = vrna_db_from_bp_stack(bp, length);
1187     strncpy(structure, s, length + 1);
1188     free(s);
1189 
1190     if (base_pair)
1191       free(base_pair);
1192 
1193     base_pair = bp;
1194   }
1195 
1196   return mfe;
1197 }
1198 
1199 
1200 PRIVATE SOLUTION *
wrap_zukersubopt(const char * string,vrna_param_t * parameters)1201 wrap_zukersubopt(const char   *string,
1202                  vrna_param_t *parameters)
1203 {
1204   vrna_fold_compound_t  *vc;
1205   vrna_param_t          *P;
1206 
1207   vc = NULL;
1208 
1209 #ifdef _OPENMP
1210   /* Explicitly turn off dynamic threads */
1211   omp_set_dynamic(0);
1212 #endif
1213 
1214   /* we need the parameter structure for hard constraints */
1215   if (parameters) {
1216     P = vrna_params_copy(parameters);
1217   } else {
1218     vrna_md_t md;
1219     set_model_details(&md);
1220     md.temperature  = temperature;
1221     P               = vrna_params(&md);
1222   }
1223 
1224   /* get compound structure */
1225   vc = vrna_fold_compound(string, &(P->model_details), VRNA_OPTION_DEFAULT);
1226 
1227   if (parameters) {
1228     /* replace params if necessary */
1229     free(vc->params);
1230     vc->params = P;
1231   } else {
1232     free(P);
1233   }
1234 
1235   if (backward_compat_compound)
1236     vrna_fold_compound_free(backward_compat_compound);
1237 
1238   backward_compat_compound  = vc;
1239   backward_compat           = 1;
1240 
1241   return vrna_subopt_zuker(vc);
1242 }
1243 
1244 
1245 PUBLIC void
initialize_cofold(int length)1246 initialize_cofold(int length)
1247 {
1248   /* DO NOTHING */ }
1249 
1250 
1251 PUBLIC void
free_co_arrays(void)1252 free_co_arrays(void)
1253 {
1254   if (backward_compat_compound && backward_compat) {
1255     vrna_fold_compound_free(backward_compat_compound);
1256     backward_compat_compound  = NULL;
1257     backward_compat           = 0;
1258   }
1259 }
1260 
1261 
1262 /*--------------------------------------------------------------------------*/
1263 
1264 PUBLIC void
export_cofold_arrays_gq(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** fc_p,int ** ggg_p,int ** indx_p,char ** ptype_p)1265 export_cofold_arrays_gq(int   **f5_p,
1266                         int   **c_p,
1267                         int   **fML_p,
1268                         int   **fM1_p,
1269                         int   **fc_p,
1270                         int   **ggg_p,
1271                         int   **indx_p,
1272                         char  **ptype_p)
1273 {
1274   /* make the DP arrays available to routines such as subopt() */
1275   wrap_array_export(f5_p, c_p, fML_p, fM1_p, fc_p, indx_p, ptype_p);
1276   if (backward_compat_compound)
1277     *ggg_p = backward_compat_compound->matrices->ggg;
1278 }
1279 
1280 
1281 PUBLIC void
export_cofold_arrays(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** fc_p,int ** indx_p,char ** ptype_p)1282 export_cofold_arrays(int  **f5_p,
1283                      int  **c_p,
1284                      int  **fML_p,
1285                      int  **fM1_p,
1286                      int  **fc_p,
1287                      int  **indx_p,
1288                      char **ptype_p)
1289 {
1290   wrap_array_export(f5_p, c_p, fML_p, fM1_p, fc_p, indx_p, ptype_p);
1291 }
1292 
1293 
1294 PUBLIC float
cofold(const char * string,char * structure)1295 cofold(const char *string,
1296        char       *structure)
1297 {
1298   return wrap_cofold(string, structure, NULL, fold_constrained);
1299 }
1300 
1301 
1302 PUBLIC float
cofold_par(const char * string,char * structure,vrna_param_t * parameters,int is_constrained)1303 cofold_par(const char   *string,
1304            char         *structure,
1305            vrna_param_t *parameters,
1306            int          is_constrained)
1307 {
1308   return wrap_cofold(string, structure, parameters, is_constrained);
1309 }
1310 
1311 
1312 PUBLIC SOLUTION *
zukersubopt(const char * string)1313 zukersubopt(const char *string)
1314 {
1315   return wrap_zukersubopt(string, NULL);
1316 }
1317 
1318 
1319 PUBLIC SOLUTION *
zukersubopt_par(const char * string,vrna_param_t * parameters)1320 zukersubopt_par(const char    *string,
1321                 vrna_param_t  *parameters)
1322 {
1323   return wrap_zukersubopt(string, parameters);
1324 }
1325 
1326 
1327 PUBLIC void
update_cofold_params(void)1328 update_cofold_params(void)
1329 {
1330   vrna_fold_compound_t *v;
1331 
1332   if (backward_compat_compound && backward_compat) {
1333     vrna_md_t md;
1334     v = backward_compat_compound;
1335 
1336     if (v->params)
1337       free(v->params);
1338 
1339     set_model_details(&md);
1340     v->params = vrna_params(&md);
1341   }
1342 }
1343 
1344 
1345 PUBLIC void
update_cofold_params_par(vrna_param_t * parameters)1346 update_cofold_params_par(vrna_param_t *parameters)
1347 {
1348   vrna_fold_compound_t *v;
1349 
1350   if (backward_compat_compound && backward_compat) {
1351     v = backward_compat_compound;
1352 
1353     if (v->params)
1354       free(v->params);
1355 
1356     if (parameters) {
1357       v->params = vrna_params_copy(parameters);
1358     } else {
1359       vrna_md_t md;
1360       set_model_details(&md);
1361       md.temperature  = temperature;
1362       v->params       = vrna_params(&md);
1363     }
1364   }
1365 }
1366 
1367 
1368 PUBLIC void
get_monomere_mfes(float * e1,float * e2)1369 get_monomere_mfes(float *e1,
1370                   float *e2)
1371 {
1372   /*exports monomere free energies*/
1373   *e1 = mfe1;
1374   *e2 = mfe2;
1375 }
1376 
1377 
1378 #endif
1379