1 /*
2  *                minimum free energy
3  *                RNA secondary structure prediction
4  *
5  *                c Ivo Hofacker, Chrisoph Flamm
6  *                original implementation by
7  *                Walter Fontana
8  *                g-quadruplex support and threadsafety
9  *                by Ronny Lorenz
10  *
11  *                Vienna RNA package
12  */
13 
14 #ifdef HAVE_CONFIG_H
15 #include "config.h"
16 #endif
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <math.h>
21 #include <ctype.h>
22 #include <string.h>
23 #include <limits.h>
24 
25 #include "ViennaRNA/utils/basic.h"
26 #include "ViennaRNA/params/default.h"
27 #include "ViennaRNA/datastructures/basic.h"
28 #include "ViennaRNA/fold_vars.h"
29 #include "ViennaRNA/params/basic.h"
30 #include "ViennaRNA/constraints/hard.h"
31 #include "ViennaRNA/constraints/soft.h"
32 #include "ViennaRNA/gquad.h"
33 #include "ViennaRNA/loops/all.h"
34 #include "ViennaRNA/fold.h"
35 
36 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
37 
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41 
42 #endif
43 
44 #define MAXSECTORS        500     /* dimension for a backtrack array */
45 
46 /*
47  #################################
48  # GLOBAL VARIABLES              #
49  #################################
50  */
51 
52 /*
53  #################################
54  # PRIVATE VARIABLES             #
55  #################################
56  */
57 
58 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
59 
60 /* some backward compatibility stuff */
61 PRIVATE int                   backward_compat           = 0;
62 PRIVATE vrna_fold_compound_t  *backward_compat_compound = NULL;
63 
64 #ifdef _OPENMP
65 
66 #pragma omp threadprivate(backward_compat_compound, backward_compat)
67 
68 #endif
69 
70 #endif
71 
72 /*
73  #################################
74  # PRIVATE FUNCTION DECLARATIONS #
75  #################################
76  */
77 
78 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
79 
80 /* wrappers for old API compatibility */
81 PRIVATE float
82 wrap_fold(const char    *string,
83           char          *structure,
84           vrna_param_t  *parameters,
85           int           is_constrained,
86           int           is_circular);
87 
88 
89 PRIVATE void
90 wrap_array_export(int   **f5_p,
91                   int   **c_p,
92                   int   **fML_p,
93                   int   **fM1_p,
94                   int   **indx_p,
95                   char  **ptype_p);
96 
97 
98 PRIVATE void
99 wrap_array_export_circ(int  *Fc_p,
100                        int  *FcH_p,
101                        int  *FcI_p,
102                        int  *FcM_p,
103                        int  **fM2_p);
104 
105 
106 #endif
107 
108 /*
109  #################################
110  # BEGIN OF FUNCTION DEFINITIONS #
111  #################################
112  */
113 
114 /*###########################################*/
115 /*# deprecated functions below              #*/
116 /*###########################################*/
117 
118 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
119 
120 PRIVATE void
wrap_array_export(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p)121 wrap_array_export(int   **f5_p,
122                   int   **c_p,
123                   int   **fML_p,
124                   int   **fM1_p,
125                   int   **indx_p,
126                   char  **ptype_p)
127 {
128   /* make the DP arrays available to routines such as subopt() */
129   if (backward_compat_compound) {
130     *f5_p     = backward_compat_compound->matrices->f5;
131     *c_p      = backward_compat_compound->matrices->c;
132     *fML_p    = backward_compat_compound->matrices->fML;
133     *fM1_p    = backward_compat_compound->matrices->fM1;
134     *indx_p   = backward_compat_compound->jindx;
135     *ptype_p  = backward_compat_compound->ptype;
136   }
137 }
138 
139 
140 PRIVATE void
wrap_array_export_circ(int * Fc_p,int * FcH_p,int * FcI_p,int * FcM_p,int ** fM2_p)141 wrap_array_export_circ(int  *Fc_p,
142                        int  *FcH_p,
143                        int  *FcI_p,
144                        int  *FcM_p,
145                        int  **fM2_p)
146 {
147   /* make the DP arrays available to routines such as subopt() */
148   if (backward_compat_compound) {
149     *Fc_p   = backward_compat_compound->matrices->Fc;
150     *FcH_p  = backward_compat_compound->matrices->FcH;
151     *FcI_p  = backward_compat_compound->matrices->FcI;
152     *FcM_p  = backward_compat_compound->matrices->FcM;
153     *fM2_p  = backward_compat_compound->matrices->fM2;
154   }
155 }
156 
157 
158 PRIVATE float
wrap_fold(const char * string,char * structure,vrna_param_t * parameters,int is_constrained,int is_circular)159 wrap_fold(const char    *string,
160           char          *structure,
161           vrna_param_t  *parameters,
162           int           is_constrained,
163           int           is_circular)
164 {
165   vrna_fold_compound_t  *vc;
166   vrna_param_t          *P;
167   float                 mfe;
168 
169 #ifdef _OPENMP
170   /* Explicitly turn off dynamic threads */
171   omp_set_dynamic(0);
172 #endif
173 
174   /* we need the parameter structure for hard constraints */
175   if (parameters) {
176     P = vrna_params_copy(parameters);
177   } else {
178     vrna_md_t md;
179     set_model_details(&md);
180     md.temperature  = temperature;
181     P               = vrna_params(&md);
182   }
183 
184   P->model_details.circ = is_circular;
185 
186   vc = vrna_fold_compound(string, &(P->model_details), VRNA_OPTION_DEFAULT);
187 
188   if (parameters) {
189     /* replace params if necessary */
190     free(vc->params);
191     vc->params = P;
192   } else {
193     free(P);
194   }
195 
196   /* handle hard constraints in pseudo dot-bracket format if passed via simple interface */
197   if (is_constrained && structure) {
198     unsigned int constraint_options = 0;
199     constraint_options |= VRNA_CONSTRAINT_DB
200                           | VRNA_CONSTRAINT_DB_PIPE
201                           | VRNA_CONSTRAINT_DB_DOT
202                           | VRNA_CONSTRAINT_DB_X
203                           | VRNA_CONSTRAINT_DB_ANG_BRACK
204                           | VRNA_CONSTRAINT_DB_RND_BRACK;
205 
206     vrna_constraints_add(vc, (const char *)structure, constraint_options);
207   }
208 
209   if (backward_compat_compound && backward_compat)
210     vrna_fold_compound_free(backward_compat_compound);
211 
212   backward_compat_compound  = vc;
213   backward_compat           = 1;
214 
215   /* call mfe() function without backtracking */
216   mfe = vrna_mfe(vc, NULL);
217 
218   /* backtrack structure */
219   if (structure && vc->params->model_details.backtrack) {
220     char            *ss;
221     int             length;
222     sect            bt_stack[MAXSECTORS];
223     vrna_bp_stack_t *bp;
224 
225     length = vc->length;
226     /* add a guess of how many G's may be involved in a G quadruplex */
227     bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2)));
228 
229     vrna_backtrack_from_intervals(vc, bp, bt_stack, 0);
230 
231     ss = vrna_db_from_bp_stack(bp, length);
232     strncpy(structure, ss, length + 1);
233     free(ss);
234 
235     if (base_pair)
236       free(base_pair);
237 
238     base_pair = bp;
239   }
240 
241   return mfe;
242 }
243 
244 
245 PUBLIC void
free_arrays(void)246 free_arrays(void)
247 {
248   if (backward_compat_compound && backward_compat) {
249     vrna_fold_compound_free(backward_compat_compound);
250     backward_compat_compound  = NULL;
251     backward_compat           = 0;
252   }
253 }
254 
255 
256 PUBLIC float
fold_par(const char * string,char * structure,vrna_param_t * parameters,int is_constrained,int is_circular)257 fold_par(const char   *string,
258          char         *structure,
259          vrna_param_t *parameters,
260          int          is_constrained,
261          int          is_circular)
262 {
263   return wrap_fold(string, structure, parameters, is_constrained, is_circular);
264 }
265 
266 
267 PUBLIC float
fold(const char * string,char * structure)268 fold(const char *string,
269      char       *structure)
270 {
271   return wrap_fold(string, structure, NULL, fold_constrained, 0);
272 }
273 
274 
275 PUBLIC float
circfold(const char * string,char * structure)276 circfold(const char *string,
277          char       *structure)
278 {
279   return wrap_fold(string, structure, NULL, fold_constrained, 1);
280 }
281 
282 
283 PUBLIC void
initialize_fold(int length)284 initialize_fold(int length)
285 {
286   /* DO NOTHING */
287 }
288 
289 
290 PUBLIC void
update_fold_params(void)291 update_fold_params(void)
292 {
293   vrna_md_t md;
294 
295   if (backward_compat_compound && backward_compat) {
296     set_model_details(&md);
297     vrna_params_reset(backward_compat_compound, &md);
298   }
299 }
300 
301 
302 PUBLIC void
update_fold_params_par(vrna_param_t * parameters)303 update_fold_params_par(vrna_param_t *parameters)
304 {
305   vrna_md_t md;
306 
307   if (backward_compat_compound && backward_compat) {
308     if (parameters) {
309       vrna_params_subst(backward_compat_compound, parameters);
310     } else {
311       set_model_details(&md);
312       vrna_params_reset(backward_compat_compound, &md);
313     }
314   }
315 }
316 
317 
318 PUBLIC void
export_fold_arrays(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p)319 export_fold_arrays(int  **f5_p,
320                    int  **c_p,
321                    int  **fML_p,
322                    int  **fM1_p,
323                    int  **indx_p,
324                    char **ptype_p)
325 {
326   wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
327 }
328 
329 
330 PUBLIC void
export_fold_arrays_par(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p,vrna_param_t ** P_p)331 export_fold_arrays_par(int          **f5_p,
332                        int          **c_p,
333                        int          **fML_p,
334                        int          **fM1_p,
335                        int          **indx_p,
336                        char         **ptype_p,
337                        vrna_param_t **P_p)
338 {
339   wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
340   if (backward_compat_compound)
341     *P_p = backward_compat_compound->params;
342 }
343 
344 
345 PUBLIC void
export_circfold_arrays(int * Fc_p,int * FcH_p,int * FcI_p,int * FcM_p,int ** fM2_p,int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p)346 export_circfold_arrays(int  *Fc_p,
347                        int  *FcH_p,
348                        int  *FcI_p,
349                        int  *FcM_p,
350                        int  **fM2_p,
351                        int  **f5_p,
352                        int  **c_p,
353                        int  **fML_p,
354                        int  **fM1_p,
355                        int  **indx_p,
356                        char **ptype_p)
357 {
358   wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
359   wrap_array_export_circ(Fc_p, FcH_p, FcI_p, FcM_p, fM2_p);
360 }
361 
362 
363 PUBLIC void
export_circfold_arrays_par(int * Fc_p,int * FcH_p,int * FcI_p,int * FcM_p,int ** fM2_p,int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p,vrna_param_t ** P_p)364 export_circfold_arrays_par(int          *Fc_p,
365                            int          *FcH_p,
366                            int          *FcI_p,
367                            int          *FcM_p,
368                            int          **fM2_p,
369                            int          **f5_p,
370                            int          **c_p,
371                            int          **fML_p,
372                            int          **fM1_p,
373                            int          **indx_p,
374                            char         **ptype_p,
375                            vrna_param_t **P_p)
376 {
377   wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
378   wrap_array_export_circ(Fc_p, FcH_p, FcI_p, FcM_p, fM2_p);
379   if (backward_compat_compound)
380     *P_p = backward_compat_compound->params;
381 }
382 
383 
384 PUBLIC char *
backtrack_fold_from_pair(char * sequence,int i,int j)385 backtrack_fold_from_pair(char *sequence,
386                          int  i,
387                          int  j)
388 {
389   char            *structure  = NULL;
390   unsigned int    length      = 0;
391   vrna_bp_stack_t *bp         = NULL;
392   sect            bt_stack[MAXSECTORS]; /* stack of partial structures for backtracking */
393 
394   if (sequence) {
395     length  = strlen(sequence);
396     bp      = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (1 + length / 2));
397   } else {
398     vrna_message_warning("backtrack_fold_from_pair: "
399                          "no sequence given");
400     return NULL;
401   }
402 
403   bt_stack[1].i   = i;
404   bt_stack[1].j   = j;
405   bt_stack[1].ml  = 2;
406 
407   bp[0].i = 0; /* ??? this is set by backtrack anyway... */
408 
409   vrna_backtrack_from_intervals(backward_compat_compound, bp, bt_stack, 1);
410   structure = vrna_db_from_bp_stack(bp, length);
411 
412   /* backward compatibitlity stuff */
413   if (base_pair)
414     free(base_pair);
415 
416   base_pair = bp;
417 
418   return structure;
419 }
420 
421 
422 #define STACK_BULGE1      1       /* stacking energies for bulges of size 1 */
423 #define NEW_NINIO         1       /* new asymetry penalty */
424 
425 PUBLIC int
HairpinE(int size,int type,int si1,int sj1,const char * string)426 HairpinE(int        size,
427          int        type,
428          int        si1,
429          int        sj1,
430          const char *string)
431 {
432   vrna_param_t  *P = backward_compat_compound->params;
433   int           energy;
434 
435   energy = (size <= 30) ? P->hairpin[size] :
436            P->hairpin[30] + (int)(P->lxc * log((size) / 30.));
437 
438   if (tetra_loop) {
439     if (size == 4) {
440       /* check for tetraloop bonus */
441       char tl[7] = {
442         0
443       }, *ts;
444       strncpy(tl, string, 6);
445       if ((ts = strstr(P->Tetraloops, tl)))
446         return P->Tetraloop_E[(ts - P->Tetraloops) / 7];
447     }
448 
449     if (size == 6) {
450       char tl[9] = {
451         0
452       }, *ts;
453       strncpy(tl, string, 8);
454       if ((ts = strstr(P->Hexaloops, tl)))
455         return energy = P->Hexaloop_E[(ts - P->Hexaloops) / 9];
456     }
457 
458     if (size == 3) {
459       char tl[6] = {
460         0, 0, 0, 0, 0, 0
461       }, *ts;
462       strncpy(tl, string, 5);
463       if ((ts = strstr(P->Triloops, tl)))
464         return P->Triloop_E[(ts - P->Triloops) / 6];
465 
466       if (type > 2)               /* neither CG nor GC */
467         energy += P->TerminalAU;  /* penalty for closing AU GU pair IVOO??
468                                    * sind dass jetzt beaunuesse oder mahlnuesse (vorzeichen?)*/
469 
470       return energy;
471     }
472   }
473 
474   energy += P->mismatchH[type][si1][sj1];
475 
476   return energy;
477 }
478 
479 
480 /*---------------------------------------------------------------------------*/
481 
482 PUBLIC int
oldLoopEnergy(int i,int j,int p,int q,int type,int type_2)483 oldLoopEnergy(int i,
484               int j,
485               int p,
486               int q,
487               int type,
488               int type_2)
489 {
490   vrna_param_t  *P  = backward_compat_compound->params;
491   short         *S1 = backward_compat_compound->sequence_encoding;
492 
493   /* compute energy of degree 2 loop (stack bulge or interior) */
494   int           n1, n2, m, energy;
495 
496   n1  = p - i - 1;
497   n2  = j - q - 1;
498 
499   if (n1 > n2) {
500     m   = n1;
501     n1  = n2;
502     n2  = m;
503   }                                 /* so that n2>=n1 */
504 
505   if (n2 == 0) {
506     energy = P->stack[type][type_2];   /* stack */
507   } else if (n1 == 0) {
508     /* bulge */
509     energy = (n2 <= MAXLOOP) ? P->bulge[n2] :
510              (P->bulge[30] + (int)(P->lxc * log(n2 / 30.)));
511 
512 #if STACK_BULGE1
513     if (n2 == 1)
514       energy += P->stack[type][type_2];
515 
516 #endif
517   } else {
518     /* interior loop */
519 
520     if ((n1 + n2 == 2) && (james_rule)) {
521       /* special case for loop size 2 */
522       energy = P->int11[type][type_2][S1[i + 1]][S1[j - 1]];
523     } else {
524       energy = (n1 + n2 <= MAXLOOP) ? (P->internal_loop[n1 + n2]) :
525                (P->internal_loop[30] + (int)(P->lxc * log((n1 + n2) / 30.)));
526 
527 #if NEW_NINIO
528       energy += MIN2(MAX_NINIO, (n2 - n1) * P->ninio[2]);
529 #else
530       m       = MIN2(4, n1);
531       energy  += MIN2(MAX_NINIO, ((n2 - n1) * P->ninio[m]));
532 #endif
533       energy += P->mismatchI[type][S1[i + 1]][S1[j - 1]] +
534                 P->mismatchI[type_2][S1[q + 1]][S1[p - 1]];
535     }
536   }
537 
538   return energy;
539 }
540 
541 
542 /*--------------------------------------------------------------------------*/
543 
544 PUBLIC int
LoopEnergy(int n1,int n2,int type,int type_2,int si1,int sj1,int sp1,int sq1)545 LoopEnergy(int  n1,
546            int  n2,
547            int  type,
548            int  type_2,
549            int  si1,
550            int  sj1,
551            int  sp1,
552            int  sq1)
553 {
554   vrna_param_t  *P = backward_compat_compound->params;
555   /* compute energy of degree 2 loop (stack bulge or interior) */
556   int           nl, ns, energy;
557 
558   if (n1 > n2) {
559     nl  = n1;
560     ns  = n2;
561   } else {
562     nl  = n2;
563     ns  = n1;
564   }
565 
566   if (nl == 0)
567     return P->stack[type][type_2];    /* stack */
568 
569   if (ns == 0) {
570     /* bulge */
571     energy = (nl <= MAXLOOP) ? P->bulge[nl] :
572              (P->bulge[30] + (int)(P->lxc * log(nl / 30.)));
573     if (nl == 1) {
574       energy += P->stack[type][type_2];
575     } else {
576       if (type > 2)
577         energy += P->TerminalAU;
578 
579       if (type_2 > 2)
580         energy += P->TerminalAU;
581     }
582 
583     return energy;
584   } else {
585     /* interior loop */
586     if (ns == 1) {
587       if (nl == 1)                     /* 1x1 loop */
588         return P->int11[type][type_2][si1][sj1];
589 
590       if (nl == 2) {
591         /* 2x1 loop */
592         if (n1 == 1)
593           energy = P->int21[type][type_2][si1][sq1][sj1];
594         else
595           energy = P->int21[type_2][type][sq1][si1][sp1];
596 
597         return energy;
598       } else {
599         /* 1xn loop */
600         energy = (nl + 1 <= MAXLOOP) ? (P->internal_loop[nl + 1]) :
601                  (P->internal_loop[30] + (int)(P->lxc * log((nl + 1) / 30.)));
602         energy  += MIN2(MAX_NINIO, (nl - ns) * P->ninio[2]);
603         energy  += P->mismatch1nI[type][si1][sj1] +
604                    P->mismatch1nI[type_2][sq1][sp1];
605         return energy;
606       }
607     } else if (ns == 2) {
608       if (nl == 2) {
609         /* 2x2 loop */
610         return P->int22[type][type_2][si1][sp1][sq1][sj1];
611       } else if (nl == 3) {
612         /* 2x3 loop */
613         energy  = P->internal_loop[5] + P->ninio[2];
614         energy  += P->mismatch23I[type][si1][sj1] +
615                    P->mismatch23I[type_2][sq1][sp1];
616         return energy;
617       }
618     }
619 
620     {
621       /* generic interior loop (no else here!)*/
622       energy = (n1 + n2 <= MAXLOOP) ? (P->internal_loop[n1 + n2]) :
623                (P->internal_loop[30] + (int)(P->lxc * log((n1 + n2) / 30.)));
624 
625       energy += MIN2(MAX_NINIO, (nl - ns) * P->ninio[2]);
626 
627       energy += P->mismatchI[type][si1][sj1] +
628                 P->mismatchI[type_2][sq1][sp1];
629     }
630   }
631 
632   return energy;
633 }
634 
635 
636 #endif
637