1 /*
2 * minimum free energy
3 * RNA secondary structure prediction
4 *
5 * c Ivo Hofacker, Chrisoph Flamm
6 * original implementation by
7 * Walter Fontana
8 *
9 * Vienna RNA package
10 */
11
12 #ifdef HAVE_CONFIG_H
13 #include "config.h"
14 #endif
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <math.h>
19 #include <ctype.h>
20 #include <string.h>
21 #include <limits.h>
22
23 #include "ViennaRNA/utils/basic.h"
24 #include "ViennaRNA/utils/strings.h"
25 #include "ViennaRNA/utils/structures.h"
26 #include "ViennaRNA/params/default.h"
27 #include "ViennaRNA/fold_vars.h"
28 #include "ViennaRNA/params/basic.h"
29 #include "ViennaRNA/subopt.h"
30 #include "ViennaRNA/fold.h"
31 #include "ViennaRNA/loops/all.h"
32 #include "ViennaRNA/gquad.h"
33 #include "ViennaRNA/alphabet.h"
34 #include "ViennaRNA/cofold.h"
35
36 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
37
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41
42 #endif
43
44 #define MAXSECTORS 500 /* dimension for a backtrack array */
45
46 /*
47 #################################
48 # GLOBAL VARIABLES #
49 #################################
50 */
51
52
53 /*
54 #################################
55 # PRIVATE VARIABLES #
56 #################################
57 */
58
59 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
60
61 /* some backward compatibility stuff */
62 PRIVATE int backward_compat = 0;
63 PRIVATE vrna_fold_compound_t *backward_compat_compound = NULL;
64
65 PRIVATE float mfe1, mfe2; /* minimum free energies of the monomers */
66
67 #ifdef _OPENMP
68
69 #pragma omp threadprivate(mfe1, mfe2, backward_compat_compound, backward_compat)
70
71 #endif
72
73 #endif
74
75 /*
76 #################################
77 # PRIVATE FUNCTION DECLARATIONS #
78 #################################
79 */
80
81 PRIVATE int
82 backtrack(sect bt_stack[],
83 vrna_bp_stack_t *bp_list,
84 vrna_fold_compound_t *vc);
85
86
87 PRIVATE int
88 fill_arrays(vrna_fold_compound_t *vc,
89 int zuker);
90
91
92 PRIVATE void
93 free_end(int *array,
94 int i,
95 int start,
96 vrna_fold_compound_t *vc);
97
98
99 PRIVATE void
100 doubleseq(vrna_fold_compound_t *vc); /* do magic */
101
102
103 PRIVATE void
104 halfseq(vrna_fold_compound_t *vc); /* undo magic */
105
106
107 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
108
109 /* wrappers for old API compatibility */
110 PRIVATE void
111 wrap_array_export(int **f5_p,
112 int **c_p,
113 int **fML_p,
114 int **fM1_p,
115 int **fc_p,
116 int **indx_p,
117 char **ptype_p);
118
119
120 PRIVATE float
121 wrap_cofold(const char *string,
122 char *structure,
123 vrna_param_t *parameters,
124 int is_constrained);
125
126
127 PRIVATE SOLUTION *
128 wrap_zukersubopt(const char *string,
129 vrna_param_t *parameters);
130
131
132 #endif
133
134 /*
135 #################################
136 # BEGIN OF FUNCTION DEFINITIONS #
137 #################################
138 */
139 PUBLIC float
vrna_mfe_dimer(vrna_fold_compound_t * vc,char * structure)140 vrna_mfe_dimer(vrna_fold_compound_t *vc,
141 char *structure)
142 {
143 int length, energy;
144 char *s;
145 sect bt_stack[MAXSECTORS]; /* stack of partial structures for backtracking */
146 vrna_bp_stack_t *bp;
147
148 length = (int)vc->length;
149
150 vc->sequence_encoding[0] = vc->sequence_encoding2[0]; /* store length at pos. 0 in S1 too */
151
152 if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_HYBRID)) {
153 vrna_message_warning("vrna_mfe_dimer@cofold.c: Failed to prepare vrna_fold_compound");
154 return (float)(INF / 100.);
155 }
156
157 /* call user-defined recursion status callback function */
158 if (vc->stat_cb)
159 vc->stat_cb(VRNA_STATUS_MFE_PRE, vc->auxdata);
160
161 energy = fill_arrays(vc, 0);
162
163 /* call user-defined recursion status callback function */
164 if (vc->stat_cb)
165 vc->stat_cb(VRNA_STATUS_MFE_POST, vc->auxdata);
166
167 if (structure && vc->params->model_details.backtrack) {
168 bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2))); /* add a guess of how many G's may be involved in a G quadruplex */
169
170 backtrack(bt_stack, bp, vc);
171
172 s = vrna_db_from_bp_stack(bp, length);
173 strncpy(structure, s, length + 1);
174 free(s);
175 free(bp);
176 }
177
178 if (vc->params->model_details.backtrack_type == 'C')
179 return (float)vc->matrices->c[vc->jindx[length] + 1] / 100.;
180 else if (vc->params->model_details.backtrack_type == 'M')
181 return (float)vc->matrices->fML[vc->jindx[length] + 1] / 100.;
182 else
183 return (float)energy / 100.;
184 }
185
186
187 PRIVATE int
fill_arrays(vrna_fold_compound_t * vc,int zuker)188 fill_arrays(vrna_fold_compound_t *vc,
189 int zuker)
190 {
191 /* fill "c", "fML" and "f5" arrays and return optimal energy */
192
193 unsigned int strands, *sn, *ss, *se, *so;
194 int i, j, length, energy;
195 int uniq_ML;
196 int no_close, type, maxj, *indx;
197 int *my_f5, *my_c, *my_fML, *my_fM1, *my_fc;
198 int *cc, *cc1; /* auxilary arrays for canonical structures */
199 int *Fmi; /* holds row i of fML (avoids jumps in memory) */
200 int *DMLi; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
201 int *DMLi1; /* MIN(fML[i+1,k]+fML[k+1,j]) */
202 int *DMLi2; /* MIN(fML[i+2,k]+fML[k+1,j]) */
203
204 int dangle_model, noGUclosure, noLP, hc_decompose, turn;
205 char *ptype;
206 unsigned char *hard_constraints;
207 vrna_param_t *P;
208 vrna_mx_mfe_t *matrices;
209 vrna_hc_t *hc;
210
211 length = (int)vc->length;
212 ptype = vc->ptype;
213 indx = vc->jindx;
214 P = vc->params;
215 dangle_model = P->model_details.dangles;
216 noGUclosure = P->model_details.noGUclosure;
217 noLP = P->model_details.noLP;
218 uniq_ML = P->model_details.uniq_ML;
219 strands = vc->strands;
220 sn = vc->strand_number;
221 ss = vc->strand_start;
222 se = vc->strand_end;
223 so = vc->strand_order;
224 hc = vc->hc;
225 hard_constraints = hc->mx;
226 matrices = vc->matrices;
227 my_f5 = matrices->f5;
228 my_c = matrices->c;
229 my_fML = matrices->fML;
230 my_fM1 = matrices->fM1;
231 my_fc = matrices->fc;
232 turn = P->model_details.min_loop_size;
233
234 /* allocate memory for all helper arrays */
235 cc = (int *)vrna_alloc(sizeof(int) * (length + 2));
236 cc1 = (int *)vrna_alloc(sizeof(int) * (length + 2));
237 Fmi = (int *)vrna_alloc(sizeof(int) * (length + 1));
238 DMLi = (int *)vrna_alloc(sizeof(int) * (length + 1));
239 DMLi1 = (int *)vrna_alloc(sizeof(int) * (length + 1));
240 DMLi2 = (int *)vrna_alloc(sizeof(int) * (length + 1));
241
242
243 /* hard code min_loop_size to 0, since we can not be sure yet that this is already the case */
244 turn = 0;
245
246 for (j = 1; j <= length; j++) {
247 Fmi[j] = DMLi[j] = DMLi1[j] = DMLi2[j] = INF;
248 my_fc[j] = 0;
249 }
250
251 for (j = 1; j <= length; j++)
252 for (i = 1; i <= j; i++) {
253 my_c[indx[j] + i] = my_fML[indx[j] + i] = INF;
254 if (uniq_ML)
255 my_fM1[indx[j] + i] = INF;
256 }
257
258 for (i = length - turn - 1; i >= 1; i--) {
259 /* i,j in [1..length] */
260
261 maxj = (zuker) ? (MIN2(i + se[so[0]], length)) : length;
262 for (j = i + turn + 1; j <= maxj; j++) {
263 int ij;
264 ij = indx[j] + i;
265 type = vrna_get_ptype(ij, ptype);
266 hc_decompose = hard_constraints[length * i + j];
267 energy = INF;
268
269 no_close = (((type == 3) || (type == 4)) && noGUclosure);
270
271 if (hc_decompose) {
272 /* we have a pair */
273 int new_c = INF;
274
275 if (!no_close) {
276 /* check for hairpin loop */
277 energy = vrna_E_hp_loop(vc, i, j);
278 new_c = MIN2(new_c, energy);
279
280 /* check for multibranch loops */
281 energy = vrna_E_mb_loop_fast(vc, i, j, DMLi1, DMLi2);
282 new_c = MIN2(new_c, energy);
283 }
284
285 if (dangle_model == 3) {
286 /* coaxial stacking */
287 energy = vrna_E_mb_loop_stack(vc, i, j);
288 new_c = MIN2(new_c, energy);
289 }
290
291 /* check for interior loops */
292 energy = vrna_E_int_loop(vc, i, j);
293 new_c = MIN2(new_c, energy);
294
295 /* remember stack energy for --noLP option */
296 if (noLP) {
297 if ((sn[i] == sn[i + 1]) && (sn[j - 1] == sn[j])) {
298 int stackEnergy = vrna_E_stack(vc, i, j);
299 new_c = MIN2(new_c, cc1[j - 1] + stackEnergy);
300 my_c[ij] = cc1[j - 1] + stackEnergy;
301 } else {
302 /* currently we don't allow stacking over the cut point */
303 my_c[ij] = FORBIDDEN;
304 }
305
306 cc[j] = new_c;
307 } else {
308 my_c[ij] = new_c;
309 }
310 } /* end >> if (pair) << */
311 else {
312 my_c[ij] = INF;
313 }
314
315 /*
316 * done with c[i,j], now compute fML[i,j]
317 * free ends ? -----------------------------------------
318 */
319
320 my_fML[ij] = vrna_E_ml_stems_fast(vc, i, j, Fmi, DMLi);
321
322 if (uniq_ML) /* compute fM1 for unique decomposition */
323 my_fM1[ij] = E_ml_rightmost_stem(i, j, vc);
324 }
325
326 if (i == se[so[0]] + 1)
327 for (j = i; j <= maxj; j++)
328 free_end(my_fc, j, ss[so[1]], vc);
329
330 if (i <= se[so[0]])
331 free_end(my_fc, i, se[so[0]], vc);
332
333 {
334 int *FF; /* rotate the auxilliary arrays */
335 FF = DMLi2;
336 DMLi2 = DMLi1;
337 DMLi1 = DMLi;
338 DMLi = FF;
339 FF = cc1;
340 cc1 = cc;
341 cc = FF;
342 for (j = 1; j <= maxj; j++)
343 cc[j] = Fmi[j] = DMLi[j] = INF;
344 }
345 }
346
347 /* calculate energies of 5' and 3' fragments */
348
349 for (i = 1; i <= length; i++)
350 free_end(my_f5, i, 1, vc);
351
352 if (strands > 1) {
353 mfe1 = my_f5[se[so[0]]];
354 mfe2 = my_fc[length];
355 /* add DuplexInit, check whether duplex*/
356 for (i = ss[so[1]]; i <= length; i++) {
357 if (my_f5[i] != INF)
358 my_f5[i] += P->DuplexInit;
359
360 if ((my_fc[i] != INF) &&
361 (my_fc[1] != INF)) {
362 energy = my_fc[i] +
363 my_fc[1];
364
365 if (energy < my_f5[i])
366 my_f5[i] = energy;
367 }
368 }
369 }
370
371 energy = my_f5[length];
372 if (strands == 1)
373 mfe1 = mfe2 = energy;
374
375 /* clean up memory */
376 free(cc);
377 free(cc1);
378 free(Fmi);
379 free(DMLi);
380 free(DMLi1);
381 free(DMLi2);
382
383 return energy;
384 }
385
386
387 PRIVATE int
backtrack_co(sect bt_stack[],vrna_bp_stack_t * bp_list,int s,int b,vrna_fold_compound_t * vc)388 backtrack_co(sect bt_stack[],
389 vrna_bp_stack_t *bp_list,
390 int s,
391 int b, /* b=0: start new structure, b \ne 0: add to existing structure */
392 vrna_fold_compound_t *vc)
393 {
394 /*------------------------------------------------------------------
395 * trace back through the "c", "fc", "f5" and "fML" arrays to get the
396 * base pairing list. No search for equivalent structures is done.
397 * This is fast, since only few structure elements are recalculated.
398 * ------------------------------------------------------------------*/
399
400 unsigned int *se, *so;
401 int i, j, ij, k, length, no_close, type, ret;
402 char *string = vc->sequence;
403 vrna_param_t *P = vc->params;
404 int *indx = vc->jindx;
405 char *ptype = vc->ptype;
406
407 int noLP = P->model_details.noLP;
408 int noGUclosure = P->model_details.noGUclosure;
409 char backtrack_type = P->model_details.backtrack_type;
410
411 /* the folding matrices */
412 int *my_c;
413
414 ret = 1;
415 length = vc->length;
416 my_c = vc->matrices->c;
417 se = vc->strand_end;
418 so = vc->strand_order;
419
420 /* int b=0;*/
421
422 length = strlen(string);
423 if (s == 0) {
424 bt_stack[++s].i = 1;
425 bt_stack[s].j = length;
426 bt_stack[s].ml = (backtrack_type == 'M') ? 1 : ((backtrack_type == 'C') ? 2 : 0);
427 }
428
429 while (s > 0) {
430 int ml, cij;
431 int canonical = 1; /* (i,j) closes a canonical structure */
432
433 /* pop one element from stack */
434 i = bt_stack[s].i;
435 j = bt_stack[s].j;
436 ml = bt_stack[s--].ml;
437
438 switch (ml) {
439 /* backtrack in f5 */
440 case 0:
441 {
442 int p, q;
443 if (vrna_BT_ext_loop_f5(vc, &j, &p, &q, bp_list, &b)) {
444 if (j > 0) {
445 bt_stack[++s].i = 1;
446 bt_stack[s].j = j;
447 bt_stack[s].ml = 0;
448 }
449
450 if (p > 0) {
451 i = p;
452 j = q;
453 goto repeat1;
454 }
455
456 continue;
457 } else {
458 vrna_message_warning("backtracking failed in f5, segment [%d,%d]\n", i, j);
459 ret = 0;
460 goto backtrack_exit;
461 }
462 }
463 break;
464
465 /* true multi-loop backtrack in fML */
466 case 1:
467 {
468 int p, q, comp1, comp2;
469 if (vrna_BT_mb_loop_split(vc, &i, &j, &p, &q, &comp1, &comp2, bp_list, &b)) {
470 if (i > 0) {
471 bt_stack[++s].i = i;
472 bt_stack[s].j = j;
473 bt_stack[s].ml = comp1;
474 }
475
476 if (p > 0) {
477 bt_stack[++s].i = p;
478 bt_stack[s].j = q;
479 bt_stack[s].ml = comp2;
480 }
481
482 continue;
483 } else {
484 vrna_message_warning("backtrack failed in fML\n%s", string);
485 ret = 0;
486 goto backtrack_exit;
487 }
488 }
489 break;
490
491 case 2:
492 bp_list[++b].i = i;
493 bp_list[b].j = j;
494 goto repeat1;
495
496 /* backtrack fake-multi loop parts */
497 case 3:
498 case 4:
499 {
500 int lower, k, p, q;
501 p = i;
502 q = j;
503 lower = (i <= se[so[0]]) ? 1 : 0;
504
505 if (vrna_BT_mb_loop_fake(vc, &k, &i, &j, bp_list, &b)) {
506 if (k > 0) {
507 bt_stack[++s].i = (lower) ? k : p;
508 bt_stack[s].j = (lower) ? q : k;
509 bt_stack[s].ml = ml;
510 }
511
512 if (i > 0)
513 goto repeat1;
514
515 continue;
516 } else {
517 vrna_message_warning("backtrack failed in fc\n%s", string);
518 ret = 0;
519 goto backtrack_exit;
520 }
521 }
522 break;
523 } /* end of switch(ml) */
524
525 repeat1:
526
527 /*----- begin of "repeat:" -----*/
528 ij = indx[j] + i;
529
530 if (canonical)
531 cij = my_c[ij];
532
533 type = vrna_get_ptype(ij, ptype);
534
535 if (noLP) {
536 if (vrna_BT_stack(vc, &i, &j, &cij, bp_list, &b)) {
537 canonical = 0;
538 goto repeat1;
539 }
540 }
541
542 canonical = 1;
543
544 no_close = (((type == 3) || (type == 4)) && noGUclosure);
545 if (no_close) {
546 if (cij == FORBIDDEN)
547 continue;
548 } else {
549 if (vrna_BT_hp_loop(vc, i, j, cij, bp_list, &b))
550 continue;
551 }
552
553 if (vrna_BT_int_loop(vc, &i, &j, cij, bp_list, &b)) {
554 if (i < 0)
555 continue;
556 else
557 goto repeat1;
558 }
559
560 /* (i.j) must close a fake or true multi-loop */
561 int comp1, comp2;
562
563 if (vrna_BT_mb_loop(vc, &i, &j, &k, cij, &comp1, &comp2)) {
564 bt_stack[++s].i = i;
565 bt_stack[s].j = k;
566 bt_stack[s].ml = comp1;
567 bt_stack[++s].i = k + 1;
568 bt_stack[s].j = j;
569 bt_stack[s].ml = comp2;
570 } else {
571 vrna_message_warning("backtracking failed in repeat, segment [%d,%d]\n", i, j);
572 ret = 0;
573 goto backtrack_exit;
574 }
575
576 /* end of repeat: --------------------------------------------------*/
577 } /* end >> while (s>0) << */
578
579 backtrack_exit:
580
581 bp_list[0].i = b; /* save the total number of base pairs */
582 return ret;
583 }
584
585
586 PRIVATE void
free_end(int * array,int i,int start,vrna_fold_compound_t * vc)587 free_end(int *array,
588 int i,
589 int start,
590 vrna_fold_compound_t *vc)
591 {
592 unsigned int *sn;
593 int inc, type, energy, en, length, j, left, right, dangle_model, with_gquad, *indx,
594 *c, *ggg, turn;
595 vrna_param_t *P;
596 short *S1;
597 char *ptype;
598 unsigned char *hard_constraints;
599 vrna_mx_mfe_t *matrices;
600 vrna_hc_t *hc;
601 vrna_sc_t *sc;
602
603 P = vc->params;
604 dangle_model = P->model_details.dangles;
605 with_gquad = P->model_details.gquad;
606 turn = P->model_details.min_loop_size;
607 inc = (i > start) ? 1 : -1;
608 length = (int)vc->length;
609 S1 = vc->sequence_encoding;
610 ptype = vc->ptype;
611 indx = vc->jindx;
612 sn = vc->strand_number;
613 matrices = vc->matrices;
614 c = matrices->c;
615 ggg = matrices->ggg;
616 hc = vc->hc;
617 sc = vc->sc;
618 hard_constraints = hc->mx;
619
620 if (hc->up_ext[i]) {
621 if (i == start)
622 array[i] = 0;
623 else
624 array[i] = array[i - inc];
625
626 if (sc) {
627 if (sc->energy_up)
628 array[i] += sc->energy_up[i][1];
629
630 if (sc->f)
631 array[i] += sc->f(start, i, start, i - 1, VRNA_DECOMP_EXT_EXT, sc->data);
632 }
633 } else {
634 array[i] = INF;
635 }
636
637 if (inc > 0) {
638 left = start;
639 right = i;
640 } else {
641 left = i;
642 right = start;
643 }
644
645 /* hard code min_loop_size to 0, since we can not be sure yet that this is already the case */
646 turn = 0;
647
648 for (j = start; inc * (i - j) > turn; j += inc) {
649 int ii, jj;
650 short si, sj;
651 if (i > j) {
652 ii = j;
653 jj = i;
654 } /* inc>0 */
655 else {
656 ii = i;
657 jj = j;
658 } /* inc<0 */
659
660 if (hard_constraints[length * ii + jj] & VRNA_CONSTRAINT_CONTEXT_EXT_LOOP) {
661 type = vrna_get_ptype(indx[jj] + ii, ptype);
662 si = ((ii > 1) && (sn[ii - 1] == sn[ii])) ? S1[ii - 1] : -1;
663 sj = ((jj < length) && (sn[jj] == sn[jj + 1])) ? S1[jj + 1] : -1;
664 energy = c[indx[jj] + ii];
665
666 if ((sc) && (sc->f))
667 energy += sc->f(start, jj, ii - 1, ii, VRNA_DECOMP_EXT_EXT_STEM, sc->data);
668
669 if (energy != INF) {
670 switch (dangle_model) {
671 case 0:
672 if (array[j - inc] != INF) {
673 en = array[j - inc] + energy + vrna_E_ext_stem(type, -1, -1, P);
674 array[i] = MIN2(array[i], en);
675 }
676
677 break;
678 case 2:
679 if (array[j - inc] != INF) {
680 en = array[j - inc] + energy + vrna_E_ext_stem(type, si, sj, P);
681 array[i] = MIN2(array[i], en);
682 }
683
684 break;
685 default:
686 if (array[j - inc] != INF) {
687 en = array[j - inc] + energy + vrna_E_ext_stem(type, -1, -1, P);
688 array[i] = MIN2(array[i], en);
689 }
690
691 if (inc > 0) {
692 if (j > left) {
693 if (hc->up_ext[ii - 1]) {
694 if (array[j - 2] != INF) {
695 en = array[j - 2] + energy + vrna_E_ext_stem(type, si, -1, P);
696 if (sc)
697 if (sc->energy_up)
698 en += sc->energy_up[ii - 1][1];
699
700 array[i] = MIN2(array[i], en);
701 }
702 }
703 }
704 } else if (j < right) {
705 if (hc->up_ext[jj + 1]) {
706 if (array[j + 2] != INF) {
707 en = array[j + 2] + energy + vrna_E_ext_stem(type, -1, sj, P);
708 if (sc)
709 if (sc->energy_up)
710 en += sc->energy_up[jj + 1][1];
711
712 array[i] = MIN2(array[i], en);
713 }
714 }
715 }
716
717 break;
718 }
719 }
720 }
721
722 if (with_gquad) {
723 if (sn[ii] == sn[jj])
724 if (array[j - inc] != INF)
725 array[i] = MIN2(array[i], array[j - inc] + ggg[indx[jj] + ii]);
726 }
727
728 if (dangle_model % 2 == 1) {
729 /* interval ends in a dangle (i.e. i-inc is paired) */
730 if (i > j) {
731 ii = j;
732 jj = i - 1;
733 } /* inc>0 */
734 else {
735 ii = i + 1;
736 jj = j;
737 } /* inc<0 */
738
739 if (!(hard_constraints[length * ii + jj] & VRNA_CONSTRAINT_CONTEXT_EXT_LOOP))
740 continue;
741
742 type = vrna_get_ptype(indx[jj] + ii, ptype);
743 si = (ii > left) && (sn[ii - 1] == sn[ii]) ? S1[ii - 1] : -1;
744 sj = (jj < right) && (sn[jj] == sn[jj + 1]) ? S1[jj + 1] : -1;
745 energy = c[indx[jj] + ii];
746 if (energy != INF) {
747 if (inc > 0) {
748 if (hc->up_ext[jj - 1]) {
749 if (array[j - inc] != INF) {
750 en = array[j - inc] + energy + vrna_E_ext_stem(type, -1, sj, P);
751 if (sc)
752 if (sc->energy_up)
753 en += sc->energy_up[jj + 1][1];
754
755 array[i] = MIN2(array[i], en);
756 }
757 }
758 } else {
759 if (hc->up_ext[ii - 1]) {
760 if (array[j - inc] != INF) {
761 en = array[j - inc] + energy + vrna_E_ext_stem(type, si, -1, P);
762 if (sc)
763 if (sc->energy_up)
764 en += sc->energy_up[ii - 1][1];
765
766 array[i] = MIN2(array[i], en);
767 }
768 }
769 }
770
771 if (j != start) {
772 /* dangle_model on both sides */
773 if (hc->up_ext[jj - 1] && hc->up_ext[ii - 1]) {
774 if (array[j - 2 * inc] != INF) {
775 en = array[j - 2 * inc] + energy + vrna_E_ext_stem(type, si, sj, P);
776 if (sc)
777 if (sc->energy_up)
778 en += sc->energy_up[ii - 1][1] + sc->energy_up[jj + 1][1];
779
780 array[i] = MIN2(array[i], en);
781 }
782 }
783 }
784 }
785 }
786 }
787 }
788
789
790 PRIVATE int
backtrack(sect bt_stack[],vrna_bp_stack_t * bp_list,vrna_fold_compound_t * vc)791 backtrack(sect bt_stack[],
792 vrna_bp_stack_t *bp_list,
793 vrna_fold_compound_t *vc)
794 {
795 /*routine to call backtrack_co from 1 to n, backtrack type??*/
796 return backtrack_co(bt_stack, bp_list, 0, 0, vc);
797 }
798
799
800 PRIVATE void
doubleseq(vrna_fold_compound_t * vc)801 doubleseq(vrna_fold_compound_t *vc)
802 {
803 unsigned int length, i, s;
804
805 length = vc->length;
806
807 /* do some magic to re-use cofold code */
808 vc->sequence = vrna_realloc(vc->sequence, sizeof(char) * (2 * length + 2));
809 memcpy(vc->sequence + length, vc->sequence, sizeof(char) * length);
810 vc->sequence[2 * length] = '\0';
811 vc->length = (unsigned int)strlen(vc->sequence);
812 vc->cutpoint = length + 1;
813
814 vc->strands = 2;
815
816 free(vc->strand_number);
817 vc->strand_number = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->length + 1));
818 for (s = i = 0; i <= vc->length; i++) {
819 if (i == length + 1)
820 s++;
821
822 vc->strand_number[i] = s;
823 }
824
825 free(vc->strand_order);
826 free(vc->strand_start);
827 free(vc->strand_end);
828 vc->strand_order = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->strands + 1));
829 vc->strand_start = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->strands + 1));
830 vc->strand_end = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (vc->strands + 1));
831 vc->strand_order[0] = 0;
832 vc->strand_order[1] = 1;
833 vc->strand_start[0] = 1;
834 vc->strand_end[0] = vc->strand_start[0] + length - 1;
835 vc->strand_start[1] = vc->strand_end[0] + 1;
836 vc->strand_end[1] = vc->strand_start[1] + length - 1;
837
838
839 vc->sequence_encoding = vrna_realloc(vc->sequence_encoding, sizeof(short) * (vc->length + 2));
840 memcpy(vc->sequence_encoding + length + 1, vc->sequence_encoding + 1, sizeof(short) * length);
841 vc->sequence_encoding[0] = vc->sequence_encoding[vc->length];
842 vc->sequence_encoding[vc->length + 1] = vc->sequence_encoding[1];
843
844 vc->sequence_encoding2 = vrna_realloc(vc->sequence_encoding2, sizeof(short) * (vc->length + 2));
845 memcpy(vc->sequence_encoding2 + length + 1, vc->sequence_encoding2 + 1, sizeof(short) * length);
846 vc->sequence_encoding2[0] = vc->length;
847 vc->sequence_encoding2[vc->length + 1] = 0;
848
849 free(vc->ptype);
850 vc->ptype = vrna_ptypes(vc->sequence_encoding2, &(vc->params->model_details));
851 free(vc->iindx);
852 vc->iindx = vrna_idx_row_wise(vc->length);
853 free(vc->jindx);
854 vc->jindx = vrna_idx_col_wise(vc->length);
855
856 vrna_hc_init(vc);
857
858 /* add DP matrices */
859 vrna_mx_mfe_add(vc, VRNA_MX_DEFAULT, 0);
860 }
861
862
863 PRIVATE void
halfseq(vrna_fold_compound_t * vc)864 halfseq(vrna_fold_compound_t *vc)
865 {
866 unsigned int halflength;
867
868 halflength = vc->length / 2;
869
870 vc->sequence = vrna_realloc(vc->sequence, sizeof(char) * (halflength + 1));
871 vc->sequence[halflength] = '\0';
872 vc->length = (unsigned int)strlen(vc->sequence);
873 vc->cutpoint = -1;
874 vc->strands = 1;
875 vc->strand_number = (unsigned int *)vrna_realloc(vc->strand_number,
876 sizeof(unsigned int) * (vc->length + 1));
877 vc->strand_order = (unsigned int *)vrna_realloc(vc->strand_order,
878 sizeof(unsigned int) * (vc->strands + 1));
879 vc->strand_start = (unsigned int *)vrna_realloc(vc->strand_start,
880 sizeof(unsigned int) * (vc->strands + 1));
881 vc->strand_end = (unsigned int *)vrna_realloc(vc->strand_end,
882 sizeof(unsigned int) * (vc->strands + 1));
883
884 vc->sequence_encoding =
885 vrna_realloc(vc->sequence_encoding, sizeof(short) * (vc->length + 2));
886 vc->sequence_encoding[0] = vc->sequence_encoding[vc->length];
887 vc->sequence_encoding[vc->length + 1] = vc->sequence_encoding[1];
888
889 vc->sequence_encoding2 =
890 vrna_realloc(vc->sequence_encoding2, sizeof(short) * (vc->length + 2));
891 vc->sequence_encoding2[0] = vc->length;
892 vc->sequence_encoding2[vc->length + 1] = 0;
893
894 free(vc->ptype);
895 vc->ptype = vrna_ptypes(vc->sequence_encoding2, &(vc->params->model_details));
896 free(vc->iindx);
897 vc->iindx = vrna_idx_row_wise(vc->length);
898 free(vc->jindx);
899 vc->jindx = vrna_idx_col_wise(vc->length);
900
901 vrna_hc_init(vc);
902
903 /* add DP matrices */
904 vrna_mx_mfe_add(vc, VRNA_MX_DEFAULT, 0);
905 }
906
907
908 typedef struct {
909 int i;
910 int j;
911 int e;
912 int idxj;
913 } zuker_pair;
914
915 PRIVATE int
comp_pair(const void * A,const void * B)916 comp_pair(const void *A,
917 const void *B)
918 {
919 zuker_pair *x, *y;
920 int ex, ey;
921
922 x = (zuker_pair *)A;
923 y = (zuker_pair *)B;
924 ex = x->e;
925 ey = y->e;
926 if (ex > ey)
927 return 1;
928
929 if (ex < ey)
930 return -1;
931
932 return x->idxj + x->i - y->idxj + y->i;
933 }
934
935
936 PUBLIC SOLUTION *
vrna_subopt_zuker(vrna_fold_compound_t * vc)937 vrna_subopt_zuker(vrna_fold_compound_t *vc)
938 {
939 /* Compute zuker suboptimal. Here, we're abusing the cofold() code
940 * "double" sequence, compute dimerarray entries, track back every base pair.
941 * This is slightly wasteful compared to the normal solution */
942
943 char *structure, *mfestructure, **todo, *ptype;
944 int i, j, counter, num_pairs, psize, p, *indx, *c, turn;
945 unsigned int length, doublelength;
946 float energy;
947 SOLUTION *zukresults;
948 vrna_bp_stack_t *bp_list;
949 zuker_pair *pairlist;
950 sect bt_stack[MAXSECTORS]; /* stack of partial structures for backtracking */
951 vrna_mx_mfe_t *matrices;
952 vrna_md_t *md;
953
954 md = &(vc->params->model_details);
955 turn = md->min_loop_size;
956
957 /* do some magic to re-use cofold code although vc is single sequence */
958 md->min_loop_size = 0;
959 doubleseq(vc);
960
961 if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_HYBRID)) {
962 vrna_message_warning("vrna_subopt_zuker@cofold.c: Failed to prepare vrna_fold_compound");
963 return NULL;
964 }
965
966 doublelength = vc->length;
967 length = doublelength / 2;
968 indx = vc->jindx;
969 ptype = vc->ptype;
970 matrices = vc->matrices;
971 c = matrices->c;
972 num_pairs = counter = 0;
973 mfestructure = (char *)vrna_alloc((unsigned)doublelength + 1);
974 structure = (char *)vrna_alloc((unsigned)doublelength + 1);
975 zukresults = (SOLUTION *)vrna_alloc(((length * (length - 1)) / 2) * sizeof(SOLUTION));
976 mfestructure[0] = '\0';
977
978 /* store length at pos. 0 */
979 vc->sequence_encoding[0] = vc->sequence_encoding2[0];
980
981 /* get mfe and do forward recursion */
982 (void)fill_arrays(vc, 1);
983
984 psize = length;
985 pairlist = (zuker_pair *)vrna_alloc(sizeof(zuker_pair) * (psize + 1));
986 bp_list = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (1 + length / 2));
987 todo = (char **)vrna_alloc(sizeof(char *) * (length + 1));
988 for (i = 1; i < length; i++)
989 todo[i] = (char *)vrna_alloc(sizeof(char) * (length + 1));
990
991 /* Make a list of all base pairs */
992 for (i = 1; i < length; i++) {
993 for (j = i + turn + 1 /*??*/; j <= length; j++) {
994 if (ptype[indx[j] + i] == 0)
995 continue;
996
997 if (num_pairs >= psize) {
998 psize = 1.2 * psize + 32;
999 pairlist = vrna_realloc(pairlist, sizeof(zuker_pair) * (psize + 1));
1000 }
1001
1002 pairlist[num_pairs].i = i;
1003 pairlist[num_pairs].j = j;
1004 pairlist[num_pairs].e = c[indx[j] + i] + c[indx[i + length] + j];
1005 pairlist[num_pairs++].idxj = indx[j];
1006
1007 todo[i][j] = 1;
1008 }
1009 }
1010
1011 qsort(pairlist, num_pairs, sizeof(zuker_pair), comp_pair);
1012
1013 for (p = 0; p < num_pairs; p++) {
1014 i = pairlist[p].i;
1015 j = pairlist[p].j;
1016 if (todo[i][j]) {
1017 int k;
1018 char *sz;
1019 bt_stack[1].i = i;
1020 bt_stack[1].j = j;
1021 bt_stack[1].ml = 2;
1022 backtrack_co(bt_stack, bp_list, 1, 0, vc);
1023 bt_stack[1].i = j;
1024 bt_stack[1].j = i + length;
1025 bt_stack[1].ml = 2;
1026 backtrack_co(bt_stack, bp_list, 1, bp_list[0].i, vc);
1027 energy = pairlist[p].e;
1028 sz = vrna_db_from_bp_stack(bp_list, length);
1029 zukresults[counter].energy = energy / 100.;
1030 zukresults[counter++].structure = sz;
1031 for (k = 1; k <= bp_list[0].i; k++) {
1032 /* mark all pairs in structure as done */
1033 int x, y;
1034 x = bp_list[k].i;
1035 y = bp_list[k].j;
1036 if (x > length)
1037 x -= length;
1038
1039 if (y > length)
1040 y -= length;
1041
1042 if (x > y) {
1043 int temp;
1044 temp = x;
1045 x = y;
1046 y = temp;
1047 }
1048
1049 todo[x][y] = 0;
1050 }
1051 }
1052 }
1053
1054 /* clean up */
1055 free(pairlist);
1056 for (i = 1; i < length; i++)
1057 free(todo[i]);
1058 free(todo);
1059 free(structure);
1060 free(mfestructure);
1061 free(bp_list);
1062
1063 /* undo magic */
1064 halfseq(vc);
1065 md->min_loop_size = turn;
1066
1067 return zukresults;
1068 }
1069
1070
1071 /*
1072 *###########################################
1073 *# deprecated functions below #
1074 *###########################################
1075 */
1076
1077 #ifndef VRNA_DISABLE_BACKWARD_COMPATIBILITY
1078
1079 PRIVATE void
wrap_array_export(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** fc_p,int ** indx_p,char ** ptype_p)1080 wrap_array_export(int **f5_p,
1081 int **c_p,
1082 int **fML_p,
1083 int **fM1_p,
1084 int **fc_p,
1085 int **indx_p,
1086 char **ptype_p)
1087 {
1088 /* make the DP arrays available to routines such as subopt() */
1089 if (backward_compat_compound) {
1090 *f5_p = backward_compat_compound->matrices->f5;
1091 *c_p = backward_compat_compound->matrices->c;
1092 *fML_p = backward_compat_compound->matrices->fML;
1093 *fM1_p = backward_compat_compound->matrices->fM1;
1094 *fc_p = backward_compat_compound->matrices->fc;
1095 *indx_p = backward_compat_compound->jindx;
1096 *ptype_p = backward_compat_compound->ptype;
1097 }
1098 }
1099
1100
1101 /*--------------------------------------------------------------------------*/
1102
1103 PRIVATE float
wrap_cofold(const char * string,char * structure,vrna_param_t * parameters,int is_constrained)1104 wrap_cofold(const char *string,
1105 char *structure,
1106 vrna_param_t *parameters,
1107 int is_constrained)
1108 {
1109 unsigned int length;
1110 char *seq;
1111 vrna_fold_compound_t *vc;
1112 vrna_param_t *P;
1113 float mfe;
1114
1115 vc = NULL;
1116 length = strlen(string);
1117
1118 #ifdef _OPENMP
1119 /* Explicitly turn off dynamic threads */
1120 omp_set_dynamic(0);
1121 #endif
1122
1123 /* we need the parameter structure for hard constraints */
1124 if (parameters) {
1125 P = vrna_params_copy(parameters);
1126 } else {
1127 vrna_md_t md;
1128 set_model_details(&md);
1129 md.temperature = temperature;
1130 P = vrna_params(&md);
1131 }
1132
1133 P->model_details.min_loop_size = 0; /* set min loop length to 0 */
1134
1135 /* dirty hack to reinsert the '&' according to the global variable 'cut_point' */
1136 seq = vrna_cut_point_insert(string, cut_point);
1137
1138 /* get compound structure */
1139 vc = vrna_fold_compound(seq, &(P->model_details), 0);
1140
1141 if (parameters) {
1142 /* replace params if necessary */
1143 free(vc->params);
1144 vc->params = P;
1145 } else {
1146 free(P);
1147 }
1148
1149 /* handle hard constraints in pseudo dot-bracket format if passed via simple interface */
1150 if (is_constrained && structure) {
1151 unsigned int constraint_options = 0;
1152 constraint_options |= VRNA_CONSTRAINT_DB
1153 | VRNA_CONSTRAINT_DB_PIPE
1154 | VRNA_CONSTRAINT_DB_DOT
1155 | VRNA_CONSTRAINT_DB_X
1156 | VRNA_CONSTRAINT_DB_ANG_BRACK
1157 | VRNA_CONSTRAINT_DB_RND_BRACK
1158 | VRNA_CONSTRAINT_DB_INTRAMOL
1159 | VRNA_CONSTRAINT_DB_INTERMOL;
1160
1161 vrna_constraints_add(vc, (const char *)structure, constraint_options);
1162 }
1163
1164 if (backward_compat_compound)
1165 vrna_fold_compound_free(backward_compat_compound);
1166
1167 backward_compat_compound = vc;
1168 backward_compat = 1;
1169
1170 /* cleanup */
1171 free(seq);
1172
1173 /* call mfe_dimer without backtracing */
1174 mfe = vrna_mfe_dimer(vc, NULL);
1175
1176 /* now we backtrace in a backward compatible way */
1177 if (structure && vc->params->model_details.backtrack) {
1178 char *s;
1179 sect bt_stack[MAXSECTORS];
1180 vrna_bp_stack_t *bp;
1181
1182 bp = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2))); /* add a guess of how many G's may be involved in a G quadruplex */
1183
1184 backtrack(bt_stack, bp, vc);
1185
1186 s = vrna_db_from_bp_stack(bp, length);
1187 strncpy(structure, s, length + 1);
1188 free(s);
1189
1190 if (base_pair)
1191 free(base_pair);
1192
1193 base_pair = bp;
1194 }
1195
1196 return mfe;
1197 }
1198
1199
1200 PRIVATE SOLUTION *
wrap_zukersubopt(const char * string,vrna_param_t * parameters)1201 wrap_zukersubopt(const char *string,
1202 vrna_param_t *parameters)
1203 {
1204 vrna_fold_compound_t *vc;
1205 vrna_param_t *P;
1206
1207 vc = NULL;
1208
1209 #ifdef _OPENMP
1210 /* Explicitly turn off dynamic threads */
1211 omp_set_dynamic(0);
1212 #endif
1213
1214 /* we need the parameter structure for hard constraints */
1215 if (parameters) {
1216 P = vrna_params_copy(parameters);
1217 } else {
1218 vrna_md_t md;
1219 set_model_details(&md);
1220 md.temperature = temperature;
1221 P = vrna_params(&md);
1222 }
1223
1224 /* get compound structure */
1225 vc = vrna_fold_compound(string, &(P->model_details), VRNA_OPTION_DEFAULT);
1226
1227 if (parameters) {
1228 /* replace params if necessary */
1229 free(vc->params);
1230 vc->params = P;
1231 } else {
1232 free(P);
1233 }
1234
1235 if (backward_compat_compound)
1236 vrna_fold_compound_free(backward_compat_compound);
1237
1238 backward_compat_compound = vc;
1239 backward_compat = 1;
1240
1241 return vrna_subopt_zuker(vc);
1242 }
1243
1244
1245 PUBLIC void
initialize_cofold(int length)1246 initialize_cofold(int length)
1247 {
1248 /* DO NOTHING */ }
1249
1250
1251 PUBLIC void
free_co_arrays(void)1252 free_co_arrays(void)
1253 {
1254 if (backward_compat_compound && backward_compat) {
1255 vrna_fold_compound_free(backward_compat_compound);
1256 backward_compat_compound = NULL;
1257 backward_compat = 0;
1258 }
1259 }
1260
1261
1262 /*--------------------------------------------------------------------------*/
1263
1264 PUBLIC void
export_cofold_arrays_gq(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** fc_p,int ** ggg_p,int ** indx_p,char ** ptype_p)1265 export_cofold_arrays_gq(int **f5_p,
1266 int **c_p,
1267 int **fML_p,
1268 int **fM1_p,
1269 int **fc_p,
1270 int **ggg_p,
1271 int **indx_p,
1272 char **ptype_p)
1273 {
1274 /* make the DP arrays available to routines such as subopt() */
1275 wrap_array_export(f5_p, c_p, fML_p, fM1_p, fc_p, indx_p, ptype_p);
1276 if (backward_compat_compound)
1277 *ggg_p = backward_compat_compound->matrices->ggg;
1278 }
1279
1280
1281 PUBLIC void
export_cofold_arrays(int ** f5_p,int ** c_p,int ** fML_p,int ** fM1_p,int ** fc_p,int ** indx_p,char ** ptype_p)1282 export_cofold_arrays(int **f5_p,
1283 int **c_p,
1284 int **fML_p,
1285 int **fM1_p,
1286 int **fc_p,
1287 int **indx_p,
1288 char **ptype_p)
1289 {
1290 wrap_array_export(f5_p, c_p, fML_p, fM1_p, fc_p, indx_p, ptype_p);
1291 }
1292
1293
1294 PUBLIC float
cofold(const char * string,char * structure)1295 cofold(const char *string,
1296 char *structure)
1297 {
1298 return wrap_cofold(string, structure, NULL, fold_constrained);
1299 }
1300
1301
1302 PUBLIC float
cofold_par(const char * string,char * structure,vrna_param_t * parameters,int is_constrained)1303 cofold_par(const char *string,
1304 char *structure,
1305 vrna_param_t *parameters,
1306 int is_constrained)
1307 {
1308 return wrap_cofold(string, structure, parameters, is_constrained);
1309 }
1310
1311
1312 PUBLIC SOLUTION *
zukersubopt(const char * string)1313 zukersubopt(const char *string)
1314 {
1315 return wrap_zukersubopt(string, NULL);
1316 }
1317
1318
1319 PUBLIC SOLUTION *
zukersubopt_par(const char * string,vrna_param_t * parameters)1320 zukersubopt_par(const char *string,
1321 vrna_param_t *parameters)
1322 {
1323 return wrap_zukersubopt(string, parameters);
1324 }
1325
1326
1327 PUBLIC void
update_cofold_params(void)1328 update_cofold_params(void)
1329 {
1330 vrna_fold_compound_t *v;
1331
1332 if (backward_compat_compound && backward_compat) {
1333 vrna_md_t md;
1334 v = backward_compat_compound;
1335
1336 if (v->params)
1337 free(v->params);
1338
1339 set_model_details(&md);
1340 v->params = vrna_params(&md);
1341 }
1342 }
1343
1344
1345 PUBLIC void
update_cofold_params_par(vrna_param_t * parameters)1346 update_cofold_params_par(vrna_param_t *parameters)
1347 {
1348 vrna_fold_compound_t *v;
1349
1350 if (backward_compat_compound && backward_compat) {
1351 v = backward_compat_compound;
1352
1353 if (v->params)
1354 free(v->params);
1355
1356 if (parameters) {
1357 v->params = vrna_params_copy(parameters);
1358 } else {
1359 vrna_md_t md;
1360 set_model_details(&md);
1361 md.temperature = temperature;
1362 v->params = vrna_params(&md);
1363 }
1364 }
1365 }
1366
1367
1368 PUBLIC void
get_monomere_mfes(float * e1,float * e2)1369 get_monomere_mfes(float *e1,
1370 float *e2)
1371 {
1372 /*exports monomere free energies*/
1373 *e1 = mfe1;
1374 *e2 = mfe2;
1375 }
1376
1377
1378 #endif
1379