1 /*
2 * minimum free energy
3 * RNA secondary structure prediction
4 *
5 * c Ivo Hofacker, Chrisoph Flamm
6 * original implementation by
7 * Walter Fontana
8 * g-quadruplex support and threadsafety
9 * by Ronny Lorenz
10 *
11 * Vienna RNA package
12 */
13
14 #ifdef HAVE_CONFIG_H
15 #include "config.h"
16 #endif
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <math.h>
21 #include <ctype.h>
22 #include <string.h>
23 #include <limits.h>
24
25 #include "ViennaRNA/utils/basic.h"
26 #include "ViennaRNA/params/default.h"
27 #include "ViennaRNA/datastructures/basic.h"
28 #include "ViennaRNA/fold_vars.h"
29 #include "ViennaRNA/params/basic.h"
30 #include "ViennaRNA/constraints/hard.h"
31 #include "ViennaRNA/constraints/soft.h"
32 #include "ViennaRNA/gquad.h"
33 #include "ViennaRNA/loops/all.h"
34 #include "ViennaRNA/fold.h"
35
36 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
37
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41
42 #endif
43
44 #define MAXSECTORS 500 /* dimension for a backtrack array */
45
46 /*
47 #################################
48 # GLOBAL VARIABLES #
49 #################################
50 */
51
52 /*
53 #################################
54 # PRIVATE VARIABLES #
55 #################################
56 */
57
58 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
59
60 /* some backward compatibility stuff */
61 PRIVATE int backward_compat = 0;
62 PRIVATE vrna_fold_compound_t *backward_compat_compound = NULL;
63
64 #ifdef _OPENMP
65
66 #pragma omp threadprivate(backward_compat_compound, backward_compat)
67
68 #endif
69
70 #endif
71
72 /*
73 #################################
74 # PRIVATE FUNCTION DECLARATIONS #
75 #################################
76 */
77
78 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
79
80 /* wrappers for old API compatibility */
81 PRIVATE float
82 wrap_fold(const char *string,
83 char *structure,
84 vrna_param_t *parameters,
85 int is_constrained,
86 int is_circular);
87
88
89 PRIVATE void
90 wrap_array_export(int **f5_p,
91 int **c_p,
92 int **fML_p,
93 int **fM1_p,
94 int **indx_p,
95 char **ptype_p);
96
97
98 PRIVATE void
99 wrap_array_export_circ(int *Fc_p,
100 int *FcH_p,
101 int *FcI_p,
102 int *FcM_p,
103 int **fM2_p);
104
105
106 #endif
107
108 /*
109 #################################
110 # BEGIN OF FUNCTION DEFINITIONS #
111 #################################
112 */
113
114 /*###########################################*/
115 /*# deprecated functions below #*/
116 /*###########################################*/
117
118 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
119
120 PRIVATE void
wrap_array_export(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p)121 wrap_array_export(int **f5_p,
122 int **c_p,
123 int **fML_p,
124 int **fM1_p,
125 int **indx_p,
126 char **ptype_p)
127 {
128 /* make the DP arrays available to routines such as subopt() */
129 if (backward_compat_compound) {
130 *f5_p = backward_compat_compound->matrices->f5;
131 *c_p = backward_compat_compound->matrices->c;
132 *fML_p = backward_compat_compound->matrices->fML;
133 *fM1_p = backward_compat_compound->matrices->fM1;
134 *indx_p = backward_compat_compound->jindx;
135 *ptype_p = backward_compat_compound->ptype;
136 }
137 }
138
139
140 PRIVATE void
wrap_array_export_circ(int * Fc_p,int * FcH_p,int * FcI_p,int * FcM_p,int ** fM2_p)141 wrap_array_export_circ(int *Fc_p,
142 int *FcH_p,
143 int *FcI_p,
144 int *FcM_p,
145 int **fM2_p)
146 {
147 /* make the DP arrays available to routines such as subopt() */
148 if (backward_compat_compound) {
149 *Fc_p = backward_compat_compound->matrices->Fc;
150 *FcH_p = backward_compat_compound->matrices->FcH;
151 *FcI_p = backward_compat_compound->matrices->FcI;
152 *FcM_p = backward_compat_compound->matrices->FcM;
153 *fM2_p = backward_compat_compound->matrices->fM2;
154 }
155 }
156
157
158 PRIVATE float
wrap_fold(const char * string,char * structure,vrna_param_t * parameters,int is_constrained,int is_circular)159 wrap_fold(const char *string,
160 char *structure,
161 vrna_param_t *parameters,
162 int is_constrained,
163 int is_circular)
164 {
165 vrna_fold_compound_t *vc;
166 vrna_param_t *P;
167 float mfe;
168
169 #ifdef _OPENMP
170 /* Explicitly turn off dynamic threads */
171 omp_set_dynamic(0);
172 #endif
173
174 /* we need the parameter structure for hard constraints */
175 if (parameters) {
176 P = vrna_params_copy(parameters);
177 } else {
178 vrna_md_t md;
179 set_model_details(&md);
180 md.temperature = temperature;
181 P = vrna_params(&md);
182 }
183
184 P->model_details.circ = is_circular;
185
186 vc = vrna_fold_compound(string, &(P->model_details), VRNA_OPTION_DEFAULT);
187
188 if (parameters) {
189 /* replace params if necessary */
190 free(vc->params);
191 vc->params = P;
192 } else {
193 free(P);
194 }
195
196 /* handle hard constraints in pseudo dot-bracket format if passed via simple interface */
197 if (is_constrained && structure) {
198 unsigned int constraint_options = 0;
199 constraint_options |= VRNA_CONSTRAINT_DB
200 | VRNA_CONSTRAINT_DB_PIPE
201 | VRNA_CONSTRAINT_DB_DOT
202 | VRNA_CONSTRAINT_DB_X
203 | VRNA_CONSTRAINT_DB_ANG_BRACK
204 | VRNA_CONSTRAINT_DB_RND_BRACK;
205
206 vrna_constraints_add(vc, (const char *)structure, constraint_options);
207 }
208
209 if (backward_compat_compound && backward_compat)
210 vrna_fold_compound_free(backward_compat_compound);
211
212 backward_compat_compound = vc;
213 backward_compat = 1;
214
215 /* call mfe() function without backtracking */
216 mfe = vrna_mfe(vc, NULL);
217
218 /* backtrack structure */
219 if (structure && vc->params->model_details.backtrack) {
220 char *ss;
221 int length;
222 sect bt_stack[MAXSECTORS];
223 vrna_bp_stack_t *bp;
224
225 length = vc->length;
226 /* add a guess of how many G's may be involved in a G quadruplex */
227 bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2)));
228
229 vrna_backtrack_from_intervals(vc, bp, bt_stack, 0);
230
231 ss = vrna_db_from_bp_stack(bp, length);
232 strncpy(structure, ss, length + 1);
233 free(ss);
234
235 if (base_pair)
236 free(base_pair);
237
238 base_pair = bp;
239 }
240
241 return mfe;
242 }
243
244
245 PUBLIC void
free_arrays(void)246 free_arrays(void)
247 {
248 if (backward_compat_compound && backward_compat) {
249 vrna_fold_compound_free(backward_compat_compound);
250 backward_compat_compound = NULL;
251 backward_compat = 0;
252 }
253 }
254
255
256 PUBLIC float
fold_par(const char * string,char * structure,vrna_param_t * parameters,int is_constrained,int is_circular)257 fold_par(const char *string,
258 char *structure,
259 vrna_param_t *parameters,
260 int is_constrained,
261 int is_circular)
262 {
263 return wrap_fold(string, structure, parameters, is_constrained, is_circular);
264 }
265
266
267 PUBLIC float
fold(const char * string,char * structure)268 fold(const char *string,
269 char *structure)
270 {
271 return wrap_fold(string, structure, NULL, fold_constrained, 0);
272 }
273
274
275 PUBLIC float
circfold(const char * string,char * structure)276 circfold(const char *string,
277 char *structure)
278 {
279 return wrap_fold(string, structure, NULL, fold_constrained, 1);
280 }
281
282
283 PUBLIC void
initialize_fold(int length)284 initialize_fold(int length)
285 {
286 /* DO NOTHING */
287 }
288
289
290 PUBLIC void
update_fold_params(void)291 update_fold_params(void)
292 {
293 vrna_md_t md;
294
295 if (backward_compat_compound && backward_compat) {
296 set_model_details(&md);
297 vrna_params_reset(backward_compat_compound, &md);
298 }
299 }
300
301
302 PUBLIC void
update_fold_params_par(vrna_param_t * parameters)303 update_fold_params_par(vrna_param_t *parameters)
304 {
305 vrna_md_t md;
306
307 if (backward_compat_compound && backward_compat) {
308 if (parameters) {
309 vrna_params_subst(backward_compat_compound, parameters);
310 } else {
311 set_model_details(&md);
312 vrna_params_reset(backward_compat_compound, &md);
313 }
314 }
315 }
316
317
318 PUBLIC void
export_fold_arrays(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p)319 export_fold_arrays(int **f5_p,
320 int **c_p,
321 int **fML_p,
322 int **fM1_p,
323 int **indx_p,
324 char **ptype_p)
325 {
326 wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
327 }
328
329
330 PUBLIC void
export_fold_arrays_par(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p,vrna_param_t ** P_p)331 export_fold_arrays_par(int **f5_p,
332 int **c_p,
333 int **fML_p,
334 int **fM1_p,
335 int **indx_p,
336 char **ptype_p,
337 vrna_param_t **P_p)
338 {
339 wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
340 if (backward_compat_compound)
341 *P_p = backward_compat_compound->params;
342 }
343
344
345 PUBLIC void
export_circfold_arrays(int * Fc_p,int * FcH_p,int * FcI_p,int * FcM_p,int ** fM2_p,int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p)346 export_circfold_arrays(int *Fc_p,
347 int *FcH_p,
348 int *FcI_p,
349 int *FcM_p,
350 int **fM2_p,
351 int **f5_p,
352 int **c_p,
353 int **fML_p,
354 int **fM1_p,
355 int **indx_p,
356 char **ptype_p)
357 {
358 wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
359 wrap_array_export_circ(Fc_p, FcH_p, FcI_p, FcM_p, fM2_p);
360 }
361
362
363 PUBLIC void
export_circfold_arrays_par(int * Fc_p,int * FcH_p,int * FcI_p,int * FcM_p,int ** fM2_p,int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** indx_p,char ** ptype_p,vrna_param_t ** P_p)364 export_circfold_arrays_par(int *Fc_p,
365 int *FcH_p,
366 int *FcI_p,
367 int *FcM_p,
368 int **fM2_p,
369 int **f5_p,
370 int **c_p,
371 int **fML_p,
372 int **fM1_p,
373 int **indx_p,
374 char **ptype_p,
375 vrna_param_t **P_p)
376 {
377 wrap_array_export(f5_p, c_p, fML_p, fM1_p, indx_p, ptype_p);
378 wrap_array_export_circ(Fc_p, FcH_p, FcI_p, FcM_p, fM2_p);
379 if (backward_compat_compound)
380 *P_p = backward_compat_compound->params;
381 }
382
383
384 PUBLIC char *
backtrack_fold_from_pair(char * sequence,int i,int j)385 backtrack_fold_from_pair(char *sequence,
386 int i,
387 int j)
388 {
389 char *structure = NULL;
390 unsigned int length = 0;
391 vrna_bp_stack_t *bp = NULL;
392 sect bt_stack[MAXSECTORS]; /* stack of partial structures for backtracking */
393
394 if (sequence) {
395 length = strlen(sequence);
396 bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (1 + length / 2));
397 } else {
398 vrna_message_warning("backtrack_fold_from_pair: "
399 "no sequence given");
400 return NULL;
401 }
402
403 bt_stack[1].i = i;
404 bt_stack[1].j = j;
405 bt_stack[1].ml = 2;
406
407 bp[0].i = 0; /* ??? this is set by backtrack anyway... */
408
409 vrna_backtrack_from_intervals(backward_compat_compound, bp, bt_stack, 1);
410 structure = vrna_db_from_bp_stack(bp, length);
411
412 /* backward compatibitlity stuff */
413 if (base_pair)
414 free(base_pair);
415
416 base_pair = bp;
417
418 return structure;
419 }
420
421
422 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
423 #define NEW_NINIO 1 /* new asymetry penalty */
424
425 PUBLIC int
HairpinE(int size,int type,int si1,int sj1,const char * string)426 HairpinE(int size,
427 int type,
428 int si1,
429 int sj1,
430 const char *string)
431 {
432 vrna_param_t *P = backward_compat_compound->params;
433 int energy;
434
435 energy = (size <= 30) ? P->hairpin[size] :
436 P->hairpin[30] + (int)(P->lxc * log((size) / 30.));
437
438 if (tetra_loop) {
439 if (size == 4) {
440 /* check for tetraloop bonus */
441 char tl[7] = {
442 0
443 }, *ts;
444 strncpy(tl, string, 6);
445 if ((ts = strstr(P->Tetraloops, tl)))
446 return P->Tetraloop_E[(ts - P->Tetraloops) / 7];
447 }
448
449 if (size == 6) {
450 char tl[9] = {
451 0
452 }, *ts;
453 strncpy(tl, string, 8);
454 if ((ts = strstr(P->Hexaloops, tl)))
455 return energy = P->Hexaloop_E[(ts - P->Hexaloops) / 9];
456 }
457
458 if (size == 3) {
459 char tl[6] = {
460 0, 0, 0, 0, 0, 0
461 }, *ts;
462 strncpy(tl, string, 5);
463 if ((ts = strstr(P->Triloops, tl)))
464 return P->Triloop_E[(ts - P->Triloops) / 6];
465
466 if (type > 2) /* neither CG nor GC */
467 energy += P->TerminalAU; /* penalty for closing AU GU pair IVOO??
468 * sind dass jetzt beaunuesse oder mahlnuesse (vorzeichen?)*/
469
470 return energy;
471 }
472 }
473
474 energy += P->mismatchH[type][si1][sj1];
475
476 return energy;
477 }
478
479
480 /*---------------------------------------------------------------------------*/
481
482 PUBLIC int
oldLoopEnergy(int i,int j,int p,int q,int type,int type_2)483 oldLoopEnergy(int i,
484 int j,
485 int p,
486 int q,
487 int type,
488 int type_2)
489 {
490 vrna_param_t *P = backward_compat_compound->params;
491 short *S1 = backward_compat_compound->sequence_encoding;
492
493 /* compute energy of degree 2 loop (stack bulge or interior) */
494 int n1, n2, m, energy;
495
496 n1 = p - i - 1;
497 n2 = j - q - 1;
498
499 if (n1 > n2) {
500 m = n1;
501 n1 = n2;
502 n2 = m;
503 } /* so that n2>=n1 */
504
505 if (n2 == 0) {
506 energy = P->stack[type][type_2]; /* stack */
507 } else if (n1 == 0) {
508 /* bulge */
509 energy = (n2 <= MAXLOOP) ? P->bulge[n2] :
510 (P->bulge[30] + (int)(P->lxc * log(n2 / 30.)));
511
512 #if STACK_BULGE1
513 if (n2 == 1)
514 energy += P->stack[type][type_2];
515
516 #endif
517 } else {
518 /* interior loop */
519
520 if ((n1 + n2 == 2) && (james_rule)) {
521 /* special case for loop size 2 */
522 energy = P->int11[type][type_2][S1[i + 1]][S1[j - 1]];
523 } else {
524 energy = (n1 + n2 <= MAXLOOP) ? (P->internal_loop[n1 + n2]) :
525 (P->internal_loop[30] + (int)(P->lxc * log((n1 + n2) / 30.)));
526
527 #if NEW_NINIO
528 energy += MIN2(MAX_NINIO, (n2 - n1) * P->ninio[2]);
529 #else
530 m = MIN2(4, n1);
531 energy += MIN2(MAX_NINIO, ((n2 - n1) * P->ninio[m]));
532 #endif
533 energy += P->mismatchI[type][S1[i + 1]][S1[j - 1]] +
534 P->mismatchI[type_2][S1[q + 1]][S1[p - 1]];
535 }
536 }
537
538 return energy;
539 }
540
541
542 /*--------------------------------------------------------------------------*/
543
544 PUBLIC int
LoopEnergy(int n1,int n2,int type,int type_2,int si1,int sj1,int sp1,int sq1)545 LoopEnergy(int n1,
546 int n2,
547 int type,
548 int type_2,
549 int si1,
550 int sj1,
551 int sp1,
552 int sq1)
553 {
554 vrna_param_t *P = backward_compat_compound->params;
555 /* compute energy of degree 2 loop (stack bulge or interior) */
556 int nl, ns, energy;
557
558 if (n1 > n2) {
559 nl = n1;
560 ns = n2;
561 } else {
562 nl = n2;
563 ns = n1;
564 }
565
566 if (nl == 0)
567 return P->stack[type][type_2]; /* stack */
568
569 if (ns == 0) {
570 /* bulge */
571 energy = (nl <= MAXLOOP) ? P->bulge[nl] :
572 (P->bulge[30] + (int)(P->lxc * log(nl / 30.)));
573 if (nl == 1) {
574 energy += P->stack[type][type_2];
575 } else {
576 if (type > 2)
577 energy += P->TerminalAU;
578
579 if (type_2 > 2)
580 energy += P->TerminalAU;
581 }
582
583 return energy;
584 } else {
585 /* interior loop */
586 if (ns == 1) {
587 if (nl == 1) /* 1x1 loop */
588 return P->int11[type][type_2][si1][sj1];
589
590 if (nl == 2) {
591 /* 2x1 loop */
592 if (n1 == 1)
593 energy = P->int21[type][type_2][si1][sq1][sj1];
594 else
595 energy = P->int21[type_2][type][sq1][si1][sp1];
596
597 return energy;
598 } else {
599 /* 1xn loop */
600 energy = (nl + 1 <= MAXLOOP) ? (P->internal_loop[nl + 1]) :
601 (P->internal_loop[30] + (int)(P->lxc * log((nl + 1) / 30.)));
602 energy += MIN2(MAX_NINIO, (nl - ns) * P->ninio[2]);
603 energy += P->mismatch1nI[type][si1][sj1] +
604 P->mismatch1nI[type_2][sq1][sp1];
605 return energy;
606 }
607 } else if (ns == 2) {
608 if (nl == 2) {
609 /* 2x2 loop */
610 return P->int22[type][type_2][si1][sp1][sq1][sj1];
611 } else if (nl == 3) {
612 /* 2x3 loop */
613 energy = P->internal_loop[5] + P->ninio[2];
614 energy += P->mismatch23I[type][si1][sj1] +
615 P->mismatch23I[type_2][sq1][sp1];
616 return energy;
617 }
618 }
619
620 {
621 /* generic interior loop (no else here!)*/
622 energy = (n1 + n2 <= MAXLOOP) ? (P->internal_loop[n1 + n2]) :
623 (P->internal_loop[30] + (int)(P->lxc * log((n1 + n2) / 30.)));
624
625 energy += MIN2(MAX_NINIO, (nl - ns) * P->ninio[2]);
626
627 energy += P->mismatchI[type][si1][sj1] +
628 P->mismatchI[type_2][sq1][sp1];
629 }
630 }
631
632 return energy;
633 }
634
635
636 #endif
637