1 /*
2 * minimum free energy
3 * RNA secondary structure prediction
4 * with sliding window approach
5 *
6 * c Ivo Hofacker, Peter Stadler, Ronny Lorenz
7 *
8 * ViennaRNA package
9 */
10
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <math.h>
18 #include <ctype.h>
19 #include <string.h>
20 #include <limits.h>
21
22 #include "ViennaRNA/utils/basic.h"
23 #include "ViennaRNA/params/default.h"
24 #include "ViennaRNA/params/constants.h" /* defines MINPSCORE */
25 #include "ViennaRNA/fold_vars.h"
26 #include "ViennaRNA/params/basic.h"
27 #include "ViennaRNA/loops/all.h"
28 #include "ViennaRNA/gquad.h"
29 #include "ViennaRNA/ribo.h"
30 #include "ViennaRNA/utils/alignments.h"
31 #include "ViennaRNA/alphabet.h"
32 #include "ViennaRNA/constraints/hard.h"
33 #include "ViennaRNA/eval.h"
34 #include "ViennaRNA/io/utils.h"
35 #include "ViennaRNA/utils/units.h"
36 #include "ViennaRNA/mfe_window.h"
37
38 #ifdef VRNA_WITH_SVM
39 #include "ViennaRNA/zscore_dat.inc"
40 #endif
41
42 #ifdef __GNUC__
43 # define INLINE inline
44 #else
45 # define INLINE
46 #endif
47
48 #define MAXSECTORS 500 /* dimension for a backtrack array */
49 #define INT_CLOSE_TO_UNDERFLOW(i) ((i) <= (INT_MIN / 16))
50 #define UNDERFLOW_CORRECTION (((INT_MIN / 32) / 100) * 100)
51
52 #define NONE -10000 /* score for forbidden pairs */
53
54
55 typedef struct {
56 FILE *output;
57 int dangle_model;
58 int csv;
59 } hit_data;
60
61
62 struct aux_arrays {
63 int *cc; /* auxilary arrays for canonical structures */
64 int *cc1; /* auxilary arrays for canonical structures */
65 int *Fmi; /* holds row i of fML (avoids jumps in memory) */
66 int *DMLi; /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
67 int *DMLi1; /* MIN(fML[i+1,k]+fML[k+1,j]) */
68 int *DMLi2; /* MIN(fML[i+2,k]+fML[k+1,j]) */
69 };
70
71 /*
72 #################################
73 # GLOBAL VARIABLES #
74 #################################
75 */
76
77 /*
78 #################################
79 # PRIVATE VARIABLES #
80 #################################
81 */
82
83 /*
84 #################################
85 # PRIVATE FUNCTION DECLARATIONS #
86 #################################
87 */
88 PRIVATE void
89 make_ptypes(vrna_fold_compound_t *vc,
90 int i);
91
92
93 PRIVATE char *
94 backtrack(vrna_fold_compound_t *vc,
95 int start,
96 int maxdist);
97
98
99 PRIVATE int
100 fill_arrays(vrna_fold_compound_t *vc,
101 int *underflow,
102 vrna_mfe_window_callback *cb,
103 #ifdef VRNA_WITH_SVM
104 vrna_mfe_window_zscore_callback *cb_z,
105 #endif
106 void *data);
107
108
109 PRIVATE void
110 default_callback(int start,
111 int end,
112 const char *structure,
113 float en,
114 void *data);
115
116
117 PRIVATE double
118 cov_score(vrna_fold_compound_t *fc,
119 int i,
120 int j,
121 float **dm);
122
123
124 PRIVATE void
125 make_pscores(vrna_fold_compound_t *fc,
126 int start,
127 float **dm);
128
129
130 PRIVATE void
131 default_callback_comparative(int start,
132 int end,
133 const char *structure,
134 float en,
135 void *data);
136
137
138 PRIVATE INLINE void
139 allocate_dp_matrices(vrna_fold_compound_t *fc);
140
141
142 PRIVATE INLINE void
143 free_dp_matrices(vrna_fold_compound_t *fc);
144
145
146 PRIVATE INLINE void
147 rotate_dp_matrices(vrna_fold_compound_t *fc,
148 int i);
149
150
151 PRIVATE INLINE void
152 init_constraints(vrna_fold_compound_t *fc,
153 float **dm);
154
155
156 PRIVATE INLINE void
157 rotate_constraints(vrna_fold_compound_t *fc,
158 float **dm,
159 int i);
160
161
162 PRIVATE INLINE struct aux_arrays *
163 get_aux_arrays(unsigned int maxdist);
164
165
166 PRIVATE INLINE void
167 rotate_aux_arrays(struct aux_arrays *aux,
168 unsigned int maxdist);
169
170
171 PRIVATE INLINE void
172 free_aux_arrays(struct aux_arrays *aux);
173
174
175 PRIVATE INLINE int
176 decompose_pair(vrna_fold_compound_t *fc,
177 int i,
178 int j,
179 struct aux_arrays *aux);
180
181
182 #ifdef VRNA_WITH_SVM
183
184 PRIVATE void
185 default_callback_z(int start,
186 int end,
187 const char *structure,
188 float en,
189 float zscore,
190 void *data);
191
192
193 PRIVATE INLINE int
194 want_backtrack(vrna_fold_compound_t *fc,
195 int i,
196 int j,
197 double *z);
198
199
200 #endif
201
202
203 /*
204 #################################
205 # BEGIN OF FUNCTION DEFINITIONS #
206 #################################
207 */
208 PUBLIC float
vrna_mfe_window(vrna_fold_compound_t * vc,FILE * file)209 vrna_mfe_window(vrna_fold_compound_t *vc,
210 FILE *file)
211 {
212 hit_data data;
213
214 data.output = (file) ? file : stdout;
215 data.dangle_model = vc->params->model_details.dangles;
216 data.csv = 0; /* csv output is for backward-compatibility only */
217
218 if (vc->type == VRNA_FC_TYPE_COMPARATIVE)
219 return vrna_mfe_window_cb(vc, &default_callback_comparative, (void *)&data);
220 else
221 return vrna_mfe_window_cb(vc, &default_callback, (void *)&data);
222 }
223
224
225 PUBLIC float
vrna_mfe_window_cb(vrna_fold_compound_t * vc,vrna_mfe_window_callback * cb,void * data)226 vrna_mfe_window_cb(vrna_fold_compound_t *vc,
227 vrna_mfe_window_callback *cb,
228 void *data)
229 {
230 int energy, underflow, n_seq;
231 float mfe_local, e_factor;
232
233 /* keep track of how many times we were close to an integer underflow */
234 underflow = 0;
235
236 if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW)) {
237 vrna_message_warning("vrna_mfe_window@Lfold.c: Failed to prepare vrna_fold_compound");
238 return (float)(INF / 100.);
239 }
240
241 n_seq = (vc->type == VRNA_FC_TYPE_COMPARATIVE) ? vc->n_seq : 1;
242 e_factor = 100. * n_seq;
243
244 #ifdef VRNA_WITH_SVM
245 energy = fill_arrays(vc, &underflow, cb, NULL, data);
246 #else
247 energy = fill_arrays(vc, &underflow, cb, data);
248 #endif
249 mfe_local = (underflow > 0) ? ((float)underflow * (float)(UNDERFLOW_CORRECTION)) / e_factor : 0.;
250 mfe_local += (float)energy / e_factor;
251
252 return mfe_local;
253 }
254
255
256 #ifdef VRNA_WITH_SVM
257
258 PUBLIC float
vrna_mfe_window_zscore(vrna_fold_compound_t * vc,double min_z,FILE * file)259 vrna_mfe_window_zscore(vrna_fold_compound_t *vc,
260 double min_z,
261 FILE *file)
262 {
263 hit_data data;
264
265 data.output = (file) ? file : stdout;
266 data.dangle_model = vc->params->model_details.dangles;
267
268 return vrna_mfe_window_zscore_cb(vc, min_z, &default_callback_z, (void *)&data);
269 }
270
271
272 PUBLIC float
vrna_mfe_window_zscore_cb(vrna_fold_compound_t * vc,double min_z,vrna_mfe_window_zscore_callback * cb_z,void * data)273 vrna_mfe_window_zscore_cb(vrna_fold_compound_t *vc,
274 double min_z,
275 vrna_mfe_window_zscore_callback *cb_z,
276 void *data)
277 {
278 int energy, underflow;
279 float mfe_local;
280
281 if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
282 vrna_message_warning(
283 "vrna_mfe_window_zscore@mfe_window.c: Comparative prediction not implemented");
284 return (float)(INF / 100.);
285 }
286
287 if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW)) {
288 vrna_message_warning("vrna_mfe_window@Lfold.c: Failed to prepare vrna_fold_compound");
289 return (float)(INF / 100.);
290 }
291
292 vrna_zsc_filter_update(vc, min_z, VRNA_ZSCORE_OPTIONS_NONE);
293
294 /* keep track of how many times we were close to an integer underflow */
295 underflow = 0;
296
297 energy = fill_arrays(vc, &underflow, NULL, cb_z, data);
298
299 mfe_local = (underflow > 0) ? ((float)underflow * (float)(UNDERFLOW_CORRECTION)) / 100. : 0.;
300 mfe_local += (float)energy / 100.;
301
302 return mfe_local;
303 }
304
305
306 #endif
307
308
309 /*
310 #####################################
311 # BEGIN OF STATIC HELPER FUNCTIONS #
312 #####################################
313 */
314 PRIVATE INLINE void
allocate_dp_matrices(vrna_fold_compound_t * fc)315 allocate_dp_matrices(vrna_fold_compound_t *fc)
316 {
317 int i, j, length, maxdist, **c, **fML;
318 vrna_hc_t *hc;
319 vrna_sc_t *sc;
320
321 length = fc->length;
322 maxdist = MIN2(fc->window_size, length);
323 hc = fc->hc;
324 c = fc->matrices->c_local;
325 fML = fc->matrices->fML_local;
326
327 /* reserve additional memory for j-dimension */
328 for (i = length; (i > length - maxdist - 5) && (i >= 0); i--) {
329 c[i] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
330 fML[i] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
331 hc->matrix_local[i] = (unsigned char *)vrna_alloc(sizeof(unsigned char) * (maxdist + 5));
332 if (fc->type == VRNA_FC_TYPE_SINGLE)
333 fc->ptype_local[i] = vrna_alloc(sizeof(char) * (maxdist + 5));
334 else if (fc->type == VRNA_FC_TYPE_COMPARATIVE)
335 fc->pscore_local[i] = vrna_alloc(sizeof(int) * (maxdist + 5));
336 }
337
338 /*
339 * allocate one more entry for comparative predictions to allow for
340 * access to i - 1 when processing [i ... maxdist]. This is required
341 * for (default) hard constraints with noLP option
342 */
343 if (fc->type == VRNA_FC_TYPE_COMPARATIVE)
344 if (length > maxdist + 5)
345 fc->pscore_local[length - maxdist - 5] = vrna_alloc(sizeof(int) * (maxdist + 5));
346
347 switch (fc->type) {
348 case VRNA_FC_TYPE_SINGLE:
349 sc = fc->sc;
350 if (sc) {
351 if (sc->energy_bp_local)
352 for (i = length; (i > length - maxdist - 5) && (i >= 0); i--)
353 sc->energy_bp_local[i] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
354
355 if (sc->energy_up)
356 for (i = length; (i > length - maxdist - 5) && (i >= 0); i--)
357 sc->energy_up[i] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
358
359 for (i = length; (i > length - maxdist - 5) && (i >= 0); i--)
360 vrna_sc_update(fc, i, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW);
361 }
362
363 break;
364
365 case VRNA_FC_TYPE_COMPARATIVE:
366 break;
367 }
368
369 if (fc->type == VRNA_FC_TYPE_SINGLE)
370 for (j = length; j > length - maxdist - 4; j--)
371 for (i = (length - maxdist - 4 > 0) ? length - maxdist - 4 : 1; i < j; i++)
372 c[i][j - i] = fML[i][j - i] = INF;
373 else if (fc->type == VRNA_FC_TYPE_COMPARATIVE)
374 for (j = length; j > length - maxdist - 3; j--)
375 for (i = (length - maxdist - 2 > 0) ? length - maxdist - 2 : 1; i < j; i++)
376 c[i][j - i] = fML[i][j - i] = INF;
377 }
378
379
380 PRIVATE INLINE void
free_dp_matrices(vrna_fold_compound_t * fc)381 free_dp_matrices(vrna_fold_compound_t *fc)
382 {
383 int i, length, maxdist, **c, **fML, **ggg, with_gquad;
384 vrna_hc_t *hc;
385 vrna_sc_t *sc;
386
387 length = fc->length;
388 maxdist = MIN2(fc->window_size, length);
389 hc = fc->hc;
390 c = fc->matrices->c_local;
391 fML = fc->matrices->fML_local;
392 ggg = fc->matrices->ggg_local;
393 with_gquad = fc->params->model_details.gquad;
394
395
396 /* free additional memory for j-dimension */
397 for (i = 0; (i < maxdist + 5) && (i <= length); i++) {
398 if (fc->type == VRNA_FC_TYPE_SINGLE) {
399 free(fc->ptype_local[i]);
400 fc->ptype_local[i] = NULL;
401 } else if (fc->type == VRNA_FC_TYPE_COMPARATIVE) {
402 free(fc->pscore_local[i]);
403 fc->pscore_local[i] = NULL;
404 }
405
406 free(c[i]);
407 c[i] = NULL;
408 free(fML[i]);
409 fML[i] = NULL;
410 free(hc->matrix_local[i]);
411 hc->matrix_local[i] = NULL;
412 }
413
414 switch (fc->type) {
415 case VRNA_FC_TYPE_SINGLE:
416 sc = fc->sc;
417 if (sc) {
418 if (sc->energy_up) {
419 for (i = 0; (i < maxdist + 5) && (i <= length); i++) {
420 free(sc->energy_up[i]);
421 sc->energy_up[i] = NULL;
422 }
423 }
424
425 if (sc->energy_bp_local) {
426 for (i = 0; (i < maxdist + 5) && (i <= length); i++) {
427 free(sc->energy_bp_local[i]);
428 sc->energy_bp_local[i] = NULL;
429 }
430 }
431 }
432
433 break;
434
435 case VRNA_FC_TYPE_COMPARATIVE:
436 break;
437 }
438
439 if (with_gquad) {
440 for (i = 0; (i <= maxdist + 5) && (i <= length); i++)
441 free(ggg[i]);
442 free(ggg);
443 fc->matrices->ggg_local = NULL;
444 }
445 }
446
447
448 PRIVATE INLINE void
rotate_dp_matrices(vrna_fold_compound_t * fc,int i)449 rotate_dp_matrices(vrna_fold_compound_t *fc,
450 int i)
451 {
452 int ii, maxdist, length, **c, **fML;
453 vrna_hc_t *hc;
454 vrna_sc_t *sc;
455
456 length = fc->length;
457 maxdist = fc->window_size;
458 c = fc->matrices->c_local;
459 fML = fc->matrices->fML_local;
460 hc = fc->hc;
461
462 if (i + maxdist + 4 <= length) {
463 c[i - 1] = c[i + maxdist + 4];
464 c[i + maxdist + 4] = NULL;
465 fML[i - 1] = fML[i + maxdist + 4];
466 fML[i + maxdist + 4] = NULL;
467 hc->matrix_local[i - 1] = hc->matrix_local[i + maxdist + 4];
468 hc->matrix_local[i + maxdist + 4] = NULL;
469
470 /* rotate base pair soft constraints */
471 switch (fc->type) {
472 case VRNA_FC_TYPE_SINGLE:
473 sc = fc->sc;
474 if (sc) {
475 if (sc->energy_bp_local) {
476 sc->energy_bp_local[i - 1] = sc->energy_bp_local[i + maxdist + 4];
477 sc->energy_bp_local[i + maxdist + 4] = NULL;
478 }
479
480 if (sc->energy_up) {
481 sc->energy_up[i - 1] = sc->energy_up[i + maxdist + 4];
482 sc->energy_up[i + maxdist + 4] = NULL;
483 }
484 }
485
486 break;
487
488 case VRNA_FC_TYPE_COMPARATIVE:
489 break;
490 }
491
492 if ((fc->params->model_details.gquad) && (i > 1))
493 vrna_gquad_mx_local_update(fc, i - 1);
494
495 for (ii = 0; ii < maxdist + 5; ii++) {
496 c[i - 1][ii] = INF;
497 fML[i - 1][ii] = INF;
498 }
499 }
500 }
501
502
503 PRIVATE INLINE void
init_constraints(vrna_fold_compound_t * fc,float ** dm)504 init_constraints(vrna_fold_compound_t *fc,
505 float **dm)
506 {
507 int i, length, maxdist;
508
509 length = fc->length;
510 maxdist = fc->window_size;
511
512 switch (fc->type) {
513 case VRNA_FC_TYPE_SINGLE:
514 for (i = length; (i >= length - maxdist - 4) && (i > 0); i--) {
515 make_ptypes(fc, i);
516 vrna_hc_update(fc, i, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
517 vrna_sc_update(fc, i, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW);
518 }
519 break;
520
521 case VRNA_FC_TYPE_COMPARATIVE:
522 for (i = length; (i >= length - maxdist - 4) && (i > 0); i--) {
523 make_pscores(fc, i, dm);
524 vrna_hc_update(fc, i, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
525 }
526
527 /* for noLP option */
528 if (length > maxdist + 5)
529 make_pscores(fc, length - maxdist - 5, dm);
530
531 break;
532 }
533 }
534
535
536 PRIVATE INLINE void
rotate_constraints(vrna_fold_compound_t * fc,float ** dm,int i)537 rotate_constraints(vrna_fold_compound_t *fc,
538 float **dm,
539 int i)
540 {
541 int length, maxdist;
542
543 length = fc->length;
544 maxdist = fc->window_size;
545
546 switch (fc->type) {
547 case VRNA_FC_TYPE_SINGLE:
548 if (i + maxdist + 4 <= length) {
549 fc->ptype_local[i - 1] = fc->ptype_local[i + maxdist + 4];
550 fc->ptype_local[i + maxdist + 4] = NULL;
551 if (i > 1) {
552 make_ptypes(fc, i - 1);
553 vrna_hc_update(fc, i - 1, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
554 vrna_sc_update(fc, i - 1, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW);
555 }
556 }
557
558 break;
559
560 case VRNA_FC_TYPE_COMPARATIVE:
561 if (i + maxdist + 4 <= length) {
562 if (i > 1) {
563 fc->pscore_local[i - 2] = fc->pscore_local[i + maxdist + 4];
564 fc->pscore_local[i + maxdist + 4] = NULL;
565 if (i > 2)
566 make_pscores(fc, i - 2, dm);
567
568 vrna_hc_update(fc, i - 1, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
569 } else if (i == 1) {
570 free(fc->pscore_local[i - 1]);
571 fc->pscore_local[i - 1] = fc->pscore_local[i + maxdist + 4];
572 fc->pscore_local[i + maxdist + 4] = NULL;
573 }
574 }
575
576 break;
577 }
578 }
579
580
581 PRIVATE int
fill_arrays(vrna_fold_compound_t * vc,int * underflow,vrna_mfe_window_callback * cb,vrna_mfe_window_zscore_callback * cb_z,void * data)582 fill_arrays(vrna_fold_compound_t *vc,
583 int *underflow,
584 vrna_mfe_window_callback *cb,
585 #ifdef VRNA_WITH_SVM
586 vrna_mfe_window_zscore_callback *cb_z,
587 #endif
588 void *data)
589 {
590 /* fill "c", "fML" and "f3" arrays and return optimal energy */
591
592 char *prev;
593 int i, j, length, maxdist, **c, **fML, *f3,
594 with_gquad, dangle_model, turn, n_seq,
595 prev_i, prev_j, prev_end, prev_en;
596 float **dm;
597 double e_fact;
598
599 #ifdef VRNA_WITH_SVM
600 double prevz;
601 unsigned char with_zscore;
602 unsigned char report_subsumed;
603 vrna_zsc_dat_t zsc_data;
604 #endif
605
606 vrna_md_t *md;
607 struct aux_arrays *helper_arrays;
608
609 int olddm[7][7] = { { 0, 0, 0, 0, 0, 0, 0 },/* hamming distance between pairs */
610 { 0, 0, 2, 2, 1, 2, 2 } /* CG */,
611 { 0, 2, 0, 1, 2, 2, 2 } /* GC */,
612 { 0, 2, 1, 0, 2, 1, 2 } /* GU */,
613 { 0, 1, 2, 2, 0, 2, 1 } /* UG */,
614 { 0, 2, 2, 1, 2, 0, 2 } /* AU */,
615 { 0, 2, 2, 2, 1, 2, 0 } /* UA */ };
616
617 n_seq = (vc->type == VRNA_FC_TYPE_COMPARATIVE) ? vc->n_seq : 1;
618 length = vc->length;
619 maxdist = vc->window_size;
620 md = &(vc->params->model_details);
621 dangle_model = md->dangles;
622 with_gquad = md->gquad;
623 turn = md->min_loop_size;
624 do_backtrack = 0;
625 prev_i = 0;
626 prev_j = 0;
627 prev_end = 0;
628 prev = NULL;
629 prev_en = 0;
630 e_fact = 100 * n_seq;
631 dm = NULL;
632 #ifdef VRNA_WITH_SVM
633 prevz = 0.;
634 zsc_data = vc->zscore_data;
635 with_zscore = (zsc_data) ? zsc_data->filter_on : 0;
636 report_subsumed = (zsc_data) ? zsc_data->report_subsumed : 0;
637 #endif
638
639 if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
640 #ifdef VRNA_WITH_SVM
641 /* no z-scoring for comparative structure prediction */
642 if (zsc_data) {
643 vrna_zsc_filter_free(vc);
644 zsc_data = NULL;
645 with_zscore = 0;
646 report_subsumed = 0;
647 }
648 #endif
649
650 if (md->ribo) {
651 if (RibosumFile != NULL)
652 dm = readribosum(RibosumFile);
653 else
654 dm = get_ribosum((const char **)vc->sequences, n_seq, length);
655 } else {
656 /*use usual matrix*/
657 dm = (float **)vrna_alloc(7 * sizeof(float *));
658 for (i = 0; i < 7; i++) {
659 dm[i] = (float *)vrna_alloc(7 * sizeof(float));
660 for (j = 0; j < 7; j++)
661 dm[i][j] = (float)olddm[i][j];
662 }
663 }
664 }
665
666 c = vc->matrices->c_local;
667 fML = vc->matrices->fML_local;
668 f3 = vc->matrices->f3_local;
669
670 /* allocate memory for all helper arrays */
671 helper_arrays = get_aux_arrays(maxdist);
672
673 /* reserve additional memory for j-dimension */
674 allocate_dp_matrices(vc);
675
676 init_constraints(vc, dm);
677
678 if (with_gquad)
679 vrna_gquad_mx_local_update(vc, length - maxdist - 4);
680
681 for (i = length - turn - 1; i >= 1; i--) {
682 /* i,j in [1..length] */
683 for (j = i + 1; j <= MIN2(i + turn, length); j++)
684 c[i][j - i] = fML[i][j - i] = INF;
685
686 for (j = i + turn + 1; j <= length && j <= i + maxdist; j++) {
687 /* decompose subsegment [i, j] with pair (i, j) */
688 c[i][j - i] = decompose_pair(vc, i, j, helper_arrays);
689
690 /* decompose subsegment [i, j] that is multibranch loop part with at least one branch */
691 fML[i][j - i] = vrna_E_ml_stems_fast(vc, i, j, helper_arrays->Fmi, helper_arrays->DMLi);
692
693 if ((vc->aux_grammar) && (vc->aux_grammar->cb_aux))
694 vc->aux_grammar->cb_aux(vc, i, j, vc->aux_grammar->data);
695 } /* for (j...) */
696
697 /* calculate energies of 5' and 3' fragments */
698 f3[i] = vrna_E_ext_loop_3(vc, i);
699
700 {
701 char *ss = NULL;
702
703 if (f3[i] < f3[i + 1]) {
704 /*
705 * instead of backtracing in the next iteration, we backtrack now
706 * already. This is necessary to accomodate for change in free
707 * energy due to unpaired nucleotides in the exterior loop, which
708 * may happen in the case of using soft constraints
709 */
710 int ii, jj;
711 ii = i;
712 jj = vrna_BT_ext_loop_f3_pp(vc, &ii, maxdist);
713 if (jj > 0) {
714 #ifdef VRNA_WITH_SVM
715 double thisz = 0;
716 if (want_backtrack(vc, ii, jj, &thisz)) {
717 #endif
718 ss = backtrack(vc, ii, jj);
719 if (prev) {
720 if ((jj < prev_j) ||
721 #ifdef VRNA_WITH_SVM
722 ((report_subsumed) && (prevz < thisz)) || /* yield last structure if it's z-score is higher than the current one */
723 #endif
724 (strncmp(ss + prev_i - ii, prev, prev_j - prev_i + 1))) {
725 /* ss does not contain prev */
726 #ifdef VRNA_WITH_SVM
727 if (with_zscore)
728 cb_z(prev_i, prev_end, prev, prev_en / e_fact, prevz, data);
729 else
730 #endif
731 cb(prev_i, prev_end, prev, prev_en / e_fact, data);
732 }
733
734 free(prev);
735 }
736
737 prev = ss;
738 prev_i = ii;
739 prev_j = jj;
740 prev_end = MIN2(jj + ((dangle_model) ? 1 : 0), length);
741 prev_en = f3[ii] - f3[jj + 1];
742 #ifdef VRNA_WITH_SVM
743 prevz = thisz;
744 }
745
746 #endif
747 } else if (jj == -1) {
748 /* some error occured during backtracking */
749 vrna_message_error("backtrack failed in short backtrack 1");
750 }
751 }
752
753 if (i == 1) {
754 if (prev) {
755 #ifdef VRNA_WITH_SVM
756 if (with_zscore)
757 cb_z(prev_i, prev_end, prev, prev_en / e_fact, prevz, data);
758 else
759 #endif
760 cb(prev_i, prev_end, prev, prev_en / e_fact, data);
761
762 free(prev);
763 prev = NULL;
764 #ifdef VRNA_WITH_SVM
765 } else if ((f3[i] < 0) && (!with_zscore)) {
766 /* why !with_zscore? */
767 #else
768 } else if (f3[i] < 0) {
769 #endif
770 int ii, jj;
771 ii = i;
772 jj = vrna_BT_ext_loop_f3_pp(vc, &ii, maxdist);
773 if (jj > 0) {
774 #ifdef VRNA_WITH_SVM
775 double thisz = 0;
776 if (want_backtrack(vc, ii, jj, &thisz)) {
777 #endif
778 ss = backtrack(vc, ii, jj);
779 #ifdef VRNA_WITH_SVM
780 if (with_zscore)
781 cb_z(ii, MIN2(jj + ((dangle_model) ? 1 : 0),
782 length), ss, (f3[1] - f3[jj + 1]) / e_fact, thisz, data);
783 else
784 #endif
785 cb(ii, MIN2(jj + ((dangle_model) ? 1 : 0),
786 length), ss, (f3[1] - f3[jj + 1]) / e_fact, data);
787
788 free(ss);
789 #ifdef VRNA_WITH_SVM
790 }
791
792 #endif
793 } else if (jj == -1) {
794 /* some error occured during backtracking */
795 vrna_message_error("backtrack failed in short backtrack 2");
796 }
797 }
798 }
799 }
800
801 /* check for values close to integer underflow */
802 if (INT_CLOSE_TO_UNDERFLOW(f3[i])) {
803 /* correct f3 free energies and increase underflow counter */
804 int cnt;
805 for (cnt = i; cnt <= MIN2(i + maxdist + 2, length); cnt++)
806 f3[cnt] -= UNDERFLOW_CORRECTION;
807 (*underflow)++;
808 }
809
810 rotate_aux_arrays(helper_arrays, maxdist);
811 rotate_dp_matrices(vc, i);
812 rotate_constraints(vc, dm, i);
813 }
814
815 /* clean up memory */
816 free_aux_arrays(helper_arrays);
817 free_dp_matrices(vc);
818
819 if (dm) {
820 for (i = 0; i < 7; i++)
821 free(dm[i]);
822 free(dm);
823 }
824
825 return f3[1];
826 }
827
828
829 #ifdef VRNA_WITH_SVM
830 PRIVATE INLINE int
want_backtrack(vrna_fold_compound_t * fc,int i,int j,double * z)831 want_backtrack(vrna_fold_compound_t *fc,
832 int i,
833 int j,
834 double *z)
835 {
836 int bt, *f3;
837
838 bt = 1; /* we want to backtrack by default */
839 *z = (double)INF;
840
841 if ((fc->zscore_data) &&
842 (fc->zscore_data->filter_on)) {
843 if ((fc->zscore_data->pre_filter) &&
844 (fc->zscore_data->current_i == i)) {
845 *z = fc->zscore_data->current_z[j];
846 } else {
847 bt = 0;
848 f3 = fc->matrices->f3_local;
849 *z = vrna_zsc_compute(fc, i, j, f3[i] - f3[j + 1]);
850 }
851
852 if ((*z) <= fc->zscore_data->min_z)
853 bt = 1;
854 }
855
856 return bt;
857 }
858
859
860 #endif
861
862
863 PRIVATE char *
backtrack(vrna_fold_compound_t * vc,int start,int end)864 backtrack(vrna_fold_compound_t *vc,
865 int start,
866 int end)
867 {
868 /*------------------------------------------------------------------
869 * trace back through the "c", "f3" and "fML" arrays to get the
870 * base pairing list. No search for equivalent structures is done.
871 * This is fast, since only few structure elements are recalculated.
872 * ------------------------------------------------------------------*/
873 sect sector[MAXSECTORS]; /* backtracking sectors */
874 char *structure, **ptype;
875 int i, j, k, length, no_close, type, s, b, bt_type, turn,
876 dangle_model, noLP, noGUclosure, **c, dangle3, ml, cij,
877 **pscore, canonical, p, q, comp1, comp2, max3;
878 vrna_param_t *P;
879 vrna_md_t *md;
880 vrna_bp_stack_t *bp_stack;
881
882 length = vc->length;
883 ptype = vc->ptype_local;
884 pscore = vc->pscore_local;
885 P = vc->params;
886 md = &(P->model_details);
887 dangle_model = md->dangles;
888 noLP = md->noLP;
889 noGUclosure = md->noGUclosure;
890 bt_type = md->backtrack_type;
891 turn = md->min_loop_size;
892 c = vc->matrices->c_local;
893
894 s = 0; /* depth of backtracking stack */
895 b = 0; /* number of base pairs */
896 bp_stack = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2))); /* add a guess of how many G's may be involved in a G quadruplex */
897
898 sector[++s].i = start;
899 sector[s].j = MIN2(length, end);
900 sector[s].ml = (bt_type == 'M') ? 1 : ((bt_type == 'C') ? 2 : 0);
901
902 structure = (char *)vrna_alloc((MIN2(length - start, end) + 3) * sizeof(char));
903
904 memset(structure, '.', MIN2(length - start, end) + 1);
905
906 dangle3 = 0;
907
908 while (s > 0) {
909 canonical = 1; /* (i,j) closes a canonical structure */
910
911 /* pop one element from stack */
912 i = sector[s].i;
913 j = sector[s].j;
914 ml = sector[s--].ml; /* ml is a flag indicating if backtracking is to
915 * occur in the fML- (1) or in the f-array (0) */
916
917 if (j < i + turn + 1)
918 continue; /* no more pairs in this interval */
919
920 switch (ml) {
921 /* backtrack in f3 */
922 case 0:
923 if (vrna_BT_ext_loop_f3(vc, &i, j, &p, &q, bp_stack, &b)) {
924 if (i > 0) {
925 sector[++s].i = i;
926 sector[s].j = j;
927 sector[s].ml = 0;
928 }
929
930 if (p > 0) {
931 if (((i == q + 2) || (dangle_model)) && (q < length))
932 dangle3 = 1;
933
934 i = p;
935 j = q;
936 goto repeat1;
937 } else if (md->gquad) {
938 /*
939 * check whether last element on the bp_stack is involved in G-Quadruplex formation
940 * and increase output dot-bracket string length by 1 if necessary
941 */
942 if ((bp_stack[b].i == bp_stack[b].j) && (bp_stack[b].i < length))
943 dangle3 = 1;
944 }
945
946 continue;
947 } else {
948 vrna_message_error("backtracking failed in f3, segment [%d,%d]\n", i, j);
949 }
950
951 break;
952
953 /* trace back in fML array */
954 case 1:
955 if (vrna_BT_mb_loop_split(vc, &i, &j, &p, &q, &comp1, &comp2, bp_stack, &b)) {
956 if (i > 0) {
957 sector[++s].i = i;
958 sector[s].j = j;
959 sector[s].ml = comp1;
960 }
961
962 if (p > 0) {
963 sector[++s].i = p;
964 sector[s].j = q;
965 sector[s].ml = comp2;
966 }
967
968 continue;
969 } else {
970 vrna_message_error("backtracking failed in fML, segment [%d,%d]\n", i, j);
971 }
972
973 break;
974
975 /* backtrack in c */
976 case 2:
977 bp_stack[++b].i = i;
978 bp_stack[b].j = j;
979 goto repeat1;
980 break;
981
982 default:
983 vrna_message_error("Backtracking failed due to unrecognized DP matrix!");
984 break;
985 }
986
987 repeat1:
988
989 /*----- begin of "repeat:" -----*/
990 if (canonical)
991 cij = c[i][j - i];
992
993 if (noLP) {
994 if (vrna_BT_stack(vc, &i, &j, &cij, bp_stack, &b)) {
995 canonical = 0;
996 goto repeat1;
997 }
998 }
999
1000 canonical = 1;
1001
1002 switch (vc->type) {
1003 case VRNA_FC_TYPE_SINGLE:
1004 type = vrna_get_ptype_window(i, j, ptype);
1005
1006 no_close = (((type == 3) || (type == 4)) && noGUclosure);
1007
1008 if (no_close) {
1009 if (cij == FORBIDDEN)
1010 continue;
1011 } else {
1012 if (vrna_BT_hp_loop(vc, i, j, cij, bp_stack, &b))
1013 continue;
1014 }
1015
1016 break;
1017
1018 case VRNA_FC_TYPE_COMPARATIVE:
1019 cij += pscore[i][j - i];
1020 if (vrna_BT_hp_loop(vc, i, j, cij, bp_stack, &b))
1021 continue;
1022
1023 break;
1024 }
1025
1026 if (vrna_BT_int_loop(vc, &i, &j, cij, bp_stack, &b)) {
1027 if (i < 0)
1028 continue;
1029 else
1030 goto repeat1;
1031 }
1032
1033 /* (i.j) must close a multi-loop */
1034 if (vrna_BT_mb_loop(vc, &i, &j, &k, cij, &comp1, &comp2)) {
1035 sector[++s].i = i;
1036 sector[s].j = k;
1037 sector[s].ml = comp1;
1038 sector[++s].i = k + 1;
1039 sector[s].j = j;
1040 sector[s].ml = comp2;
1041 } else {
1042 vrna_message_error("backtracking failed in repeat, segment [%d,%d]\n", i, j);
1043 }
1044
1045 /* end of repeat: --------------------------------------------------*/
1046 } /* end of infinite while loop */
1047
1048
1049 bp_stack[0].i = b;
1050
1051 /* and now create a dot-brakcet string from the base pair stack... */
1052 max3 = 1;
1053 for (i = 1; i <= b; i++) {
1054 if (bp_stack[i].i == bp_stack[i].j) {
1055 /* Gquad bonds are marked as bp[i].i == bp[i].j */
1056 structure[bp_stack[i].i - start] = '+';
1057 } else {
1058 /* the following ones are regular base pairs */
1059 structure[bp_stack[i].i - start] = '(';
1060 structure[bp_stack[i].j - start] = ')';
1061 }
1062
1063 if (max3 < bp_stack[i].j - start)
1064 max3 = bp_stack[i].j - start;
1065 }
1066
1067 free(bp_stack);
1068
1069 structure = (char *)vrna_realloc(structure,
1070 sizeof(char) * (max3 + dangle3 + 2));
1071 structure[max3 + dangle3 + 1] = '\0';
1072
1073 return structure;
1074 }
1075
1076
1077 PRIVATE void
make_ptypes(vrna_fold_compound_t * vc,int i)1078 make_ptypes(vrna_fold_compound_t *vc,
1079 int i)
1080 {
1081 int j, k, type, n, maxdist, turn, noLP;
1082 short *S;
1083 char **ptype;
1084 vrna_md_t *md;
1085
1086 n = (int)vc->length;
1087 S = vc->sequence_encoding2;
1088 ptype = vc->ptype_local;
1089 maxdist = vc->window_size;
1090 md = &(vc->params->model_details);
1091 turn = md->min_loop_size;
1092 noLP = md->noLP;
1093
1094 for (k = turn + 1; k < maxdist; k++) {
1095 j = i + k;
1096 if (j > n)
1097 break;
1098
1099 type = md->pair[S[i]][S[j]];
1100
1101 if (noLP && type) {
1102 if (!ptype[i + 1][j - 1 - i - 1])
1103 if (j == n || i == 1 || (!md->pair[S[i - 1]][S[j + 1]]))
1104 type = 0;
1105 }
1106
1107 ptype[i][j - i] = type;
1108 }
1109 }
1110
1111
1112 PRIVATE double
cov_score(vrna_fold_compound_t * fc,int i,int j,float ** dm)1113 cov_score(vrna_fold_compound_t *fc,
1114 int i,
1115 int j,
1116 float **dm)
1117 {
1118 char **AS;
1119 short **S;
1120 int n_seq, k, l, s, type;
1121 double score;
1122 vrna_md_t *md;
1123 int pfreq[8] = {
1124 0, 0, 0, 0, 0, 0, 0, 0
1125 };
1126
1127 n_seq = fc->n_seq;
1128 AS = fc->sequences;
1129 S = fc->S;
1130 md = &(fc->params->model_details);
1131
1132 for (s = 0; s < n_seq; s++) {
1133 if (S[s][i] == 0 && S[s][j] == 0) {
1134 type = 7; /* gap-gap */
1135 } else {
1136 if ((AS[s][i] == '~') || (AS[s][j] == '~'))
1137 type = 7;
1138 else
1139 type = md->pair[S[s][i]][S[s][j]];
1140 }
1141
1142 pfreq[type]++;
1143 }
1144
1145 if (pfreq[0] * 2 + pfreq[7] > n_seq) {
1146 return NONE;
1147 } else {
1148 for (k = 1, score = 0.; k <= 6; k++) /* ignore pairtype 7 (gap-gap) */
1149 for (l = k; l <= 6; l++)
1150 /*
1151 * scores for replacements between pairtypes
1152 * consistent or compensatory mutations score 1 or 2
1153 */
1154 score += pfreq[k] * pfreq[l] * dm[k][l];
1155 }
1156
1157 /* counter examples score -1, gap-gap scores -0.25 */
1158 return md->cv_fact * ((UNIT * score) / n_seq - md->nc_fact * UNIT * (pfreq[0] + pfreq[7] * 0.25));
1159 }
1160
1161
1162 PRIVATE void
make_pscores(vrna_fold_compound_t * fc,int i,float ** dm)1163 make_pscores(vrna_fold_compound_t *fc,
1164 int i,
1165 float **dm)
1166 {
1167 /*
1168 * calculate co-variance bonus for each pair depending on
1169 * compensatory/consistent mutations and incompatible seqs
1170 * should be 0 for conserved pairs, >0 for good pairs
1171 */
1172 int n, j, **pscore, maxd, turn, noLP;
1173 vrna_md_t *md;
1174
1175 n = (int)fc->length;
1176 maxd = fc->window_size;
1177 pscore = fc->pscore_local;
1178 md = &(fc->params->model_details);
1179 turn = md->min_loop_size;
1180 noLP = md->noLP;
1181
1182 /*fill pscore[start], too close*/
1183 for (j = i + 1; (j < i + turn + 1) && (j <= n); j++)
1184 pscore[i][j - i] = NONE;
1185 for (j = i + turn + 1; ((j <= n) && (j <= i + maxd)); j++)
1186 pscore[i][j - i] = cov_score(fc, i, j, dm);
1187
1188 if (noLP) {
1189 /* remove unwanted lonely pairs */
1190 int otype = 0, ntype = 0;
1191 for (j = i + turn; ((j < n) && (j < i + maxd)); j++) {
1192 if ((i > 1) && (j < n))
1193 otype = cov_score(fc, i - 1, j + 1, dm);
1194
1195 if (i < n)
1196 ntype = pscore[i + 1][j - 1 - (i + 1)];
1197 else
1198 ntype = NONE;
1199
1200 if ((otype < -4 * UNIT) && (ntype < -4 * UNIT)) /* worse than 2 counterex */
1201 pscore[i][j - i] = NONE; /* i.j can only form isolated pairs */
1202 }
1203 }
1204
1205 if ((j - i + 1) > maxd)
1206 pscore[i][j - i] = NONE;
1207 }
1208
1209
1210 PRIVATE void
default_callback(int start,int end,const char * structure,float en,void * data)1211 default_callback(int start,
1212 int end,
1213 const char *structure,
1214 float en,
1215 void *data)
1216 {
1217 FILE *output = ((hit_data *)data)->output;
1218 int dangle_model = ((hit_data *)data)->dangle_model;
1219
1220 if ((dangle_model == 2) && (start > 1))
1221 fprintf(output, ".%s (%6.2f) %4d\n", structure, en, start - 1);
1222 else
1223 fprintf(output, "%s (%6.2f) %4d\n ", structure, en, start);
1224 }
1225
1226
1227 #ifdef VRNA_WITH_SVM
1228 PRIVATE void
default_callback_z(int start,int end,const char * structure,float en,float zscore,void * data)1229 default_callback_z(int start,
1230 int end,
1231 const char *structure,
1232 float en,
1233 float zscore,
1234 void *data)
1235 {
1236 FILE *output = ((hit_data *)data)->output;
1237 int dangle_model = ((hit_data *)data)->dangle_model;
1238
1239 if ((dangle_model == 2) && (start > 1))
1240 fprintf(output, ".%s (%6.2f) %4d z= %.3f\n", structure, en, start - 1, zscore);
1241 else
1242 fprintf(output, "%s (%6.2f) %4d z= %.3f\n ", structure, en, start, zscore);
1243 }
1244
1245
1246 #endif
1247
1248
1249 PRIVATE void
default_callback_comparative(int start,int end,const char * structure,float en,void * data)1250 default_callback_comparative(int start,
1251 int end,
1252 const char *structure,
1253 float en,
1254 void *data)
1255 {
1256 FILE *output = ((hit_data *)data)->output;
1257 int dangle_model = ((hit_data *)data)->dangle_model;
1258 int csv = ((hit_data *)data)->csv;
1259
1260 if (csv == 1) {
1261 if ((dangle_model == 2) && (start > 1))
1262 fprintf(output, ".%s ,%6.2f, %4d, %4d\n", structure, en, start - 1, end);
1263 else
1264 fprintf(output, "%s ,%6.2f, %4d, %4d\n", structure, en, start, end);
1265 } else {
1266 if ((dangle_model == 2) && (start > 1))
1267 fprintf(output, ".%s (%6.2f) %4d - %4d\n", structure, en, start - 1, end);
1268 else
1269 fprintf(output, "%s (%6.2f) %4d - %4d\n", structure, en, start, end);
1270 }
1271 }
1272
1273
1274 struct block {
1275 vrna_fold_compound_t *fc;
1276 short *pt;
1277 unsigned long int start;
1278 unsigned long int end;
1279 unsigned long int shift;
1280 int energy;
1281 int energy_no3d; /* energy without 3'dangle */
1282 struct block *next_entry;
1283 };
1284
1285
1286 PRIVATE int
update_block(unsigned int i,unsigned int max_n,struct block * b)1287 update_block(unsigned int i,
1288 unsigned int max_n,
1289 struct block *b)
1290 {
1291 short *S1, *S2, d5, d3;
1292 unsigned int type;
1293 int n, i_local, j_local, ediff, dangles;
1294 vrna_fold_compound_t *fc;
1295 vrna_param_t *params;
1296 vrna_md_t *md;
1297
1298 fc = b->fc;
1299 n = fc->length;
1300 S1 = fc->sequence_encoding;
1301 S2 = fc->sequence_encoding2;
1302 params = fc->params;
1303 md = &(params->model_details);
1304 dangles = md->dangles;
1305
1306 /* re-evaluate energy for removal of base pair (i, k) */
1307 i_local = (int)(i - (b->start - b->shift) + 1);
1308
1309 if (b->pt[i_local]) {
1310 j_local = (int)(b->pt[i_local]);
1311 /* compute energy differences due to removal of base pair (i_local, j_local) */
1312 ediff = vrna_eval_move_pt(fc, b->pt, -i_local, -j_local);
1313
1314 /* update energy */
1315 b->energy += ediff;
1316 /* remove base pair from pair table */
1317 b->pt[i_local] = b->pt[j_local] = 0;
1318
1319 /* since we've removed the outermost base pair, the block size
1320 * decreases on the 3' end as well. At this point, we also
1321 * remove any further trailing bases, if any
1322 */
1323 int end = j_local;
1324
1325 do {
1326 b->end--;
1327 if (b->start == b->end)
1328 break;
1329 } while (b->pt[--end] == 0);
1330
1331 if (b->end <= b->start)
1332 return 0;
1333
1334 /* check whether we need to split the block into multiple ones */
1335 size_t stems = 0;
1336 size_t mem_stems = 10;
1337 size_t *start_stem = (size_t *)vrna_alloc(sizeof(size_t) * mem_stems);
1338 size_t *end_stem = (size_t *)vrna_alloc(sizeof(size_t) * mem_stems);
1339
1340 for (size_t pos = i_local + 1; pos <= end; pos++)
1341 if (b->pt[pos] > pos) {
1342 start_stem[stems] = pos;
1343 end_stem[stems] = b->pt[pos];
1344 stems++;
1345 if (stems == mem_stems) {
1346 mem_stems *= 1.4;
1347 start_stem = vrna_realloc(start_stem, sizeof(size_t) * mem_stems);
1348 end_stem = vrna_realloc(end_stem, sizeof(size_t) * mem_stems);
1349 }
1350
1351 pos = b->pt[pos];
1352 }
1353
1354 if (stems > 1) {
1355 for (size_t k = stems - 1; k > 0; k--) {
1356 /* create a new block */
1357 struct block *new_block = (struct block *)vrna_alloc(sizeof(struct block));
1358
1359 /* construct global coordinates for new block */
1360 new_block->start = i + start_stem[k] - 1 - b->shift;
1361 new_block->end = i + end_stem[k] - 1 - b->shift;
1362 new_block->shift = (dangles == 2) ? 1 : 0;
1363
1364 /* create structure pair table for new block */
1365 size_t l = end_stem[k] - start_stem[k] + 1;
1366 if (dangles == 2) {
1367 l++; /* for 5' dangles */
1368 if (new_block->end < max_n)
1369 l++; /* for 3' dangles */
1370 }
1371
1372 new_block->pt = (short *)vrna_alloc(sizeof(short) * (l + 1));
1373 new_block->pt[0] = l;
1374 /* go through original structure and extract base pair positions relative to new block */
1375 for (size_t p = start_stem[k]; p <= end_stem[k]; p++) {
1376 if (b->pt[p] > p) {
1377 short i, j;
1378
1379 i = p - start_stem[k] + 1 + new_block->shift;
1380 j = b->pt[p] - start_stem[k] + 1 + new_block->shift;
1381
1382 new_block->pt[i] = j;
1383 new_block->pt[j] = i;
1384
1385 /* remove base pair from original block */
1386 b->pt[b->pt[p]] = 0;
1387 b->pt[p] = 0;
1388 }
1389 }
1390
1391 /* create fold_compound for new block */
1392 char *seq_block = (char *)vrna_alloc(sizeof(char) * (l + 1));
1393 memcpy(seq_block, b->fc->sequence + start_stem[k] - 1 - new_block->shift,
1394 sizeof(char) * (l));
1395 new_block->fc = vrna_fold_compound(seq_block,
1396 &(b->fc->params->model_details),
1397 VRNA_OPTION_DEFAULT | VRNA_OPTION_EVAL_ONLY);
1398 free(seq_block);
1399
1400 /* compute energy of new block */
1401 new_block->energy = vrna_eval_structure_pt(new_block->fc, new_block->pt);
1402
1403 /* search for position where we can link the new block to */
1404 struct block *ptr, *ptr_prev;
1405 ptr_prev = ptr = b;
1406 do {
1407 ptr_prev = ptr;
1408 ptr = ptr->next_entry;
1409 } while ((ptr) && (ptr->start < new_block->start));
1410
1411 /* insert new block */
1412 ptr_prev->next_entry = new_block;
1413 new_block->next_entry = ptr;
1414 }
1415
1416 /* Finally, we need to update the original block */
1417
1418 /* update global end position */
1419 b->end = i + end_stem[0] - 1 - b->shift;
1420
1421 /* update energy */
1422 b->energy = vrna_eval_structure_pt(b->fc, b->pt);
1423 }
1424
1425 /* clean up memory */
1426 free(start_stem);
1427 free(end_stem);
1428
1429 /* update for odd dangle models below */
1430 if (dangles % 1) {
1431 }
1432 } else if (dangles % 1) {
1433 /* position i is unpaired, so we only need to update energies if
1434 * position i + 1 forms a base pair due to 5' dangles
1435 */
1436 if (b->pt[i + 1]) {
1437 i_local = (int)i + b->start - 1 + 1;
1438 j_local = b->pt[i_local + 1];
1439 switch (dangles) {
1440 case 2:
1441 d5 = S1[i_local - 1];
1442 d3 = (j_local + 1 <= i_local + n - 1) ? S1[j_local + 1 - 1] : -1;
1443 type = vrna_get_ptype_md(S2[i_local],
1444 S2[j_local],
1445 md);
1446 ediff = vrna_E_ext_stem(type, -1, d3, params) -
1447 vrna_E_ext_stem(type, d5, d3, params);
1448 break;
1449
1450 case 0:
1451 ediff = 0;
1452 break;
1453
1454 default:
1455 ediff = 0;
1456 break;
1457 }
1458 /* update the energy */
1459 b->energy += ediff;
1460 }
1461 }
1462
1463 /* increment start position and shift */
1464 b->start++;
1465 b->shift++;
1466
1467 return 1;
1468 }
1469
1470
1471 PRIVATE struct block *
remove_block(struct block ** before,struct block * block)1472 remove_block(struct block **before,
1473 struct block *block)
1474 {
1475 struct block *ptr, **ptr2;
1476
1477 if (*before == block)
1478 ptr2 = before;
1479 else
1480 ptr2 = &((*before)->next_entry);
1481
1482 /* remember next entry */
1483 ptr = block->next_entry;
1484
1485 /* cleanup memory for block we want to remove */
1486 vrna_fold_compound_free(block->fc);
1487 free(block->pt);
1488 /* remove block */
1489 free(block);
1490
1491 /* link next entry */
1492 *ptr2 = ptr;
1493
1494 return ptr;
1495 }
1496
1497
1498 PRIVATE void
truncate_blocks(unsigned long int i,unsigned long int n,struct block ** block_list)1499 truncate_blocks(unsigned long int i,
1500 unsigned long int n,
1501 struct block **block_list)
1502 {
1503 struct block *ptr_prev, *ptr;
1504
1505 ptr_prev = NULL;
1506 ptr = *block_list;
1507
1508 while (ptr) {
1509 /* 1. remove block if it was superseded by index i */
1510 if (ptr->end <= i) {
1511 ptr = remove_block((ptr_prev) ? &ptr_prev : block_list, ptr);
1512 continue;
1513 }
1514
1515 /* 2. Update energy */
1516 if (ptr->start == i) {
1517 /* remove nucleotide at position i and update energies accordingly */
1518 /* this also splits a substructure component into consecutive
1519 * tokens, if necessary
1520 */
1521 if (!update_block(i, n, ptr)) {
1522 /* this block doesn't contain any more pairs so we remove it entirely */
1523 ptr = remove_block((ptr_prev) ? &ptr_prev : block_list, ptr);
1524 continue;
1525 }
1526 } else if (ptr->start > i) {
1527 break;
1528 }
1529
1530 /* go to next block */
1531 ptr_prev = ptr;
1532 ptr = ptr->next_entry;
1533 }
1534 }
1535
1536
1537 PRIVATE struct block *
extract_Lfold_entry(FILE * f,long line_start,const char * sequence,const vrna_md_t * md)1538 extract_Lfold_entry(FILE *f,
1539 long line_start,
1540 const char *sequence,
1541 const vrna_md_t *md)
1542 {
1543 char *l, *seq_local, *structure;
1544 unsigned long int i, j;
1545 float en;
1546 struct block *storage;
1547
1548 storage = NULL;
1549
1550 if (fseek(f, line_start, SEEK_SET) != -1) {
1551 l = vrna_read_line(f);
1552 en = INF / 100.;
1553 structure = (char *)vrna_alloc(sizeof(char) * (strlen(l) + 1));
1554
1555 /* each line should consist of 3 parts,
1556 * 1. A dot-bracket structure
1557 * 2. An energy value in kcal/mol
1558 * 3. A starting position
1559 */
1560 if (sscanf(l, "%[.()] %*c %f %*c %lu", structure, &en, &i) == 3) {
1561 storage = (struct block *)vrna_alloc(sizeof(struct block));
1562
1563 j = i + strlen(structure) - 1;
1564
1565 seq_local = (char *)vrna_alloc(sizeof(char) * (j - i + 2));
1566 memcpy(seq_local, sequence + i - 1, (sizeof(char) * (j - i + 1)));
1567
1568 storage->fc = vrna_fold_compound(seq_local,
1569 md,
1570 VRNA_OPTION_DEFAULT | VRNA_OPTION_EVAL_ONLY);
1571 storage->pt = vrna_ptable(structure);
1572 storage->start = i;
1573 storage->end = j;
1574 storage->shift = 0;
1575 storage->energy = vrna_convert_kcal_to_dcal(en);
1576 storage->energy_no3d = 0; /* energy without 3'dangle */
1577 storage->next_entry = NULL;
1578
1579 free(seq_local);
1580
1581 /* with dangles==0 or dangles==2, we don't need the trailing and leading unpaired base(s) */
1582 if ((md->dangles % 1) == 0) {
1583 if (storage->pt[1] == 0) {
1584 storage->start++;
1585 storage->shift++;
1586 }
1587
1588 if (storage->pt[storage->fc->length] == 0)
1589 storage->end--;
1590 }
1591 }
1592
1593 /* we only store the pair table, and drop the dot-bracket string */
1594 free(structure);
1595 free(l);
1596 }
1597
1598 return storage;
1599 }
1600
1601
1602 PRIVATE unsigned long int
insert_block(char * structure,struct block * selected,int * energy)1603 insert_block(char *structure,
1604 struct block *selected,
1605 int *energy)
1606 {
1607 short *pt;
1608 unsigned int long i, i_local, start, end, shift;
1609 int e;
1610
1611 pt = selected->pt;
1612 start = selected->start;
1613 end = selected->end;
1614 shift = selected->shift;
1615 e = selected->energy;
1616
1617 /* The last block is always part of the full length MFE */
1618 for (i = start; i <= end; i++) {
1619 i_local = i - start + shift + 1;
1620 if (pt[i_local] > i_local) {
1621 structure[i - 1] = '(';
1622 structure[start - shift + pt[i_local] - 1 - 1] = ')';
1623 }
1624 }
1625
1626 *energy = *energy - e;
1627
1628 return end;
1629 }
1630
1631
1632 PRIVATE void
print_block_list(struct block * block_list)1633 print_block_list(struct block *block_list)
1634 {
1635 struct block *ptr;
1636 size_t cnt = 0;
1637
1638 for (ptr = block_list; ptr; ptr = ptr->next_entry, cnt++)
1639 printf("block %lu: en=%d, start: %lu, end: %lu, shift: %lu\n",
1640 cnt,
1641 ptr->energy,
1642 ptr->start,
1643 ptr->end,
1644 ptr->shift);
1645 printf("%lu blocks remaining\n", cnt);
1646 fflush(stdout);
1647 }
1648
1649
1650 PRIVATE void
append_blocks(struct block ** last_block,FILE * f,long * lines,size_t * lines_left,vrna_fold_compound_t * fc,unsigned long int max_start)1651 append_blocks(struct block **last_block,
1652 FILE *f,
1653 long *lines,
1654 size_t *lines_left,
1655 vrna_fold_compound_t *fc,
1656 unsigned long int max_start)
1657 {
1658 vrna_md_t *md;
1659
1660 md = &(fc->params->model_details);
1661
1662 while (((*lines_left) > 0) &&
1663 ((*last_block)->start < max_start)) {
1664 (*lines_left)--;
1665
1666 struct block *ptr = extract_Lfold_entry(f, lines[(*lines_left)], fc->sequence, md);
1667
1668 if (ptr != NULL) {
1669 (*last_block)->next_entry = ptr;
1670 *last_block = ptr;
1671 }
1672 }
1673 ;
1674 }
1675
1676
1677 PUBLIC int
vrna_backtrack_window(vrna_fold_compound_t * fc,const char * Lfold_filename,long file_pos,char ** structure,double mfe)1678 vrna_backtrack_window(vrna_fold_compound_t *fc,
1679 const char *Lfold_filename,
1680 long file_pos,
1681 char **structure,
1682 double mfe)
1683 {
1684 unsigned int n, look_ahead;
1685 int e, maxdist, underflows, *f3;
1686 int ret;
1687 double mfe_corr;
1688 FILE *f;
1689
1690 ret = 0;
1691 *structure = NULL;
1692
1693 if ((fc) &&
1694 (Lfold_filename) &&
1695 (structure)) {
1696 vrna_md_t *md;
1697
1698 n = fc->length;
1699 md = &(fc->params->model_details);
1700 maxdist = md->window_size;
1701 look_ahead = 3 * maxdist;
1702 underflows = 0;
1703 mfe_corr = mfe;
1704
1705 f3 = fc->matrices->f3_local;
1706
1707 if (md->dangles % 2) {
1708 vrna_message_warning(
1709 "Global mfE structure backtracking not available for odd dangle models yet!");
1710 return ret;
1711 }
1712
1713 /* check whether we need to adjust energies due to integer underflows in the forward recursion */
1714 while (vrna_convert_kcal_to_dcal(mfe_corr) < f3[1]) {
1715 mfe_corr -= (double)(UNDERFLOW_CORRECTION) / 100.;
1716 underflows++;
1717 }
1718
1719 e = vrna_convert_kcal_to_dcal(mfe_corr);
1720
1721 #if 0
1722 if (underflows)
1723 printf("detected %d underflows %d vs. %d vs %6.2f\n", underflows, e, f3[1], mfe);
1724
1725 #endif
1726
1727 e = f3[1];
1728
1729 /* default to start at beginning of file */
1730 if (file_pos < 0)
1731 file_pos = 0;
1732
1733 /* open Lfold output file and start parsing line positions */
1734 if ((f = fopen(Lfold_filename, "r"))) {
1735 if (fseek(f, file_pos, SEEK_SET) != -1) {
1736 size_t num_lines, mem_lines;
1737 long *lines; /* array of file positions indicating start of lines */
1738
1739 num_lines = 0;
1740 mem_lines = 1024;
1741
1742 lines = (long *)vrna_alloc(sizeof(long) * mem_lines);
1743 lines[num_lines++] = ftell(f);
1744
1745 /* 1. Fill array of file positions that coincide with start of lines */
1746 do {
1747 /* increase memory if necessary */
1748 if (num_lines == mem_lines) {
1749 mem_lines *= 1.4;
1750 lines = (long *)vrna_realloc(lines, sizeof(long) * mem_lines);
1751 }
1752
1753 /* seek to next newline char */
1754 while ((!feof(f)) && (fgetc(f) != '\n'));
1755
1756 /* stop at end of file */
1757 if (feof(f))
1758 break;
1759
1760 lines[num_lines++] = ftell(f);
1761 } while(1);
1762
1763 /* do the actual parsing of the data */
1764 if (num_lines > 0) {
1765 num_lines--;
1766
1767 size_t block_num = 0;
1768 unsigned long int i = num_lines;
1769 size_t lines_left = num_lines;
1770
1771 /* we will use *ptr as current block pointer and save the list start in 8block_list */
1772 struct block *block_list, *ptr, *block_list_last;
1773
1774 /* handle first block separately */
1775 do {
1776 lines_left--;
1777
1778 block_list_last = block_list = ptr = extract_Lfold_entry(f,
1779 lines[lines_left],
1780 fc->sequence,
1781 md);
1782 } while ((block_list == NULL) && (lines_left > 0));
1783
1784 /* start the actual backtracing procedure if we've read at least one block */
1785 if (block_list) {
1786 block_num++;
1787 *structure = (char *)vrna_alloc(sizeof(char) * (fc->length + 1));
1788 memset(*structure, (int)'.', fc->length);
1789
1790 /* go through remaining file in reverse order
1791 * to extract the relevant data
1792 */
1793 append_blocks(&block_list_last,
1794 f,
1795 lines,
1796 &lines_left,
1797 fc,
1798 ptr->start + look_ahead);
1799
1800 ptr = block_list;
1801
1802 i = insert_block(*structure, ptr, &e);
1803
1804 for (unsigned long int ii = ptr->start; ii <= i; ii++) {
1805 /* truncate remaining blocks */
1806 truncate_blocks(ii, n, &block_list);
1807 append_blocks(&block_list_last,
1808 f,
1809 lines,
1810 &lines_left,
1811 fc,
1812 ii + look_ahead);
1813 }
1814
1815 i++;
1816
1817 /* proceed with remaining blocks */
1818 for (; i <= fc->length; i++) {
1819 unsigned long int prev_i = i;
1820
1821 /* i pairs with some k, find block representing the substructure enclodes by (i,k) */
1822 if (e != f3[i + 1]) {
1823 if ((underflows) &&
1824 (e == (f3[i + 1] - UNDERFLOW_CORRECTION))) {
1825 underflows--;
1826 e += UNDERFLOW_CORRECTION;
1827 } else {
1828 /* go through list of blocks that start at i to check which one we can insert */
1829 char found = 0;
1830 for (ptr = block_list; ptr; ptr = ptr->next_entry) {
1831 if (ptr->start > i)
1832 break;
1833
1834 if ((ptr->start == i) &&
1835 (ptr->end > i)) {
1836 if (e == (ptr->energy + f3[ptr->end + 1])) {
1837 /* found the block, let's insert it */
1838 found = 1;
1839
1840 i = insert_block(*structure, ptr, &e);
1841
1842 break;
1843 } else if ((underflows) &&
1844 (e == (ptr->energy + f3[ptr->end + 1] - UNDERFLOW_CORRECTION))) {
1845 underflows--;
1846 found = 1;
1847 e += UNDERFLOW_CORRECTION;
1848 i = insert_block(*structure, ptr, &e);
1849
1850 break;
1851 }
1852 }
1853 }
1854 if (!found)
1855 printf("didn't find block for position %lu\n", i);
1856 }
1857 }
1858
1859 /*
1860 * if i is unpaired, so we do nothing except for
1861 * truncating the remaining blocks
1862 */
1863 for (unsigned long int ii = prev_i; ii <= i; ii++) {
1864 truncate_blocks(ii, n, &block_list);
1865 append_blocks(&block_list_last,
1866 f,
1867 lines,
1868 &lines_left,
1869 fc,
1870 ii + look_ahead);
1871 }
1872 }
1873
1874 ret = 1;
1875 }
1876 }
1877 }
1878
1879 fclose(f);
1880 }
1881 }
1882
1883 return ret;
1884 }
1885
1886
1887 PRIVATE INLINE int
decompose_pair(vrna_fold_compound_t * fc,int i,int j,struct aux_arrays * aux)1888 decompose_pair(vrna_fold_compound_t *fc,
1889 int i,
1890 int j,
1891 struct aux_arrays *aux)
1892 {
1893 unsigned char hc_decompose;
1894 int e, new_c, energy, stackEnergy, dangle_model, noLP,
1895 *DMLi1, *DMLi2, *cc, *cc1;
1896
1897 dangle_model = fc->params->model_details.dangles;
1898 noLP = fc->params->model_details.noLP;
1899 hc_decompose = fc->hc->matrix_local[i][j - i];
1900 DMLi1 = aux->DMLi1;
1901 DMLi2 = aux->DMLi2;
1902 cc = aux->cc;
1903 cc1 = aux->cc1;
1904 e = INF;
1905
1906 /* do we evaluate this pair? */
1907 if (hc_decompose) {
1908 new_c = INF;
1909
1910 /* check for hairpin loop */
1911 energy = vrna_E_hp_loop(fc, i, j);
1912 new_c = MIN2(new_c, energy);
1913
1914 /* check for multibranch loops */
1915 energy = vrna_E_mb_loop_fast(fc, i, j, DMLi1, DMLi2);
1916 new_c = MIN2(new_c, energy);
1917
1918 if (dangle_model == 3) {
1919 /* coaxial stacking */
1920 energy = vrna_E_mb_loop_stack(fc, i, j);
1921 new_c = MIN2(new_c, energy);
1922 }
1923
1924 /* check for interior loops */
1925 energy = vrna_E_int_loop(fc, i, j);
1926 new_c = MIN2(new_c, energy);
1927
1928 /* remember stack energy for --noLP option */
1929 if (noLP) {
1930 stackEnergy = vrna_E_stack(fc, i, j);
1931 new_c = MIN2(new_c, cc1[j - 1 - (i + 1)] + stackEnergy);
1932 cc[j - i] = new_c;
1933 if ((fc->type == VRNA_FC_TYPE_COMPARATIVE) &&
1934 (cc[j - i] != INF))
1935 cc[j - i] -= fc->pscore_local[i][j - i];
1936
1937 e = cc1[j - 1 - (i + 1)] + stackEnergy;
1938 } else {
1939 e = new_c;
1940 }
1941
1942 /* finally, check for auxiliary grammar rule(s) */
1943 if ((fc->aux_grammar) && (fc->aux_grammar->cb_aux_c)) {
1944 energy = fc->aux_grammar->cb_aux_c(fc, i, j, fc->aux_grammar->data);
1945 e = MIN2(e, energy);
1946 }
1947
1948 if ((fc->type == VRNA_FC_TYPE_COMPARATIVE) &&
1949 (e != INF))
1950 e -= fc->pscore_local[i][j - i];
1951 } /* end >> if (pair) << */
1952
1953 return e;
1954 }
1955
1956
1957 PRIVATE INLINE struct aux_arrays *
get_aux_arrays(unsigned int maxdist)1958 get_aux_arrays(unsigned int maxdist)
1959 {
1960 unsigned int j;
1961 struct aux_arrays *aux = (struct aux_arrays *)vrna_alloc(sizeof(struct aux_arrays));
1962
1963 aux->cc = (int *)vrna_alloc(sizeof(int) * (maxdist + 5)); /* auxilary arrays for canonical structures */
1964 aux->cc1 = (int *)vrna_alloc(sizeof(int) * (maxdist + 5)); /* auxilary arrays for canonical structures */
1965 aux->Fmi = (int *)vrna_alloc(sizeof(int) * (maxdist + 5)); /* holds row i of fML (avoids jumps in memory) */
1966 aux->DMLi = (int *)vrna_alloc(sizeof(int) * (maxdist + 5)); /* DMLi[j] holds MIN(fML[i,k]+fML[k+1,j]) */
1967 aux->DMLi1 = (int *)vrna_alloc(sizeof(int) * (maxdist + 5)); /* MIN(fML[i+1,k]+fML[k+1,j]) */
1968 aux->DMLi2 = (int *)vrna_alloc(sizeof(int) * (maxdist + 5)); /* MIN(fML[i+2,k]+fML[k+1,j]) */
1969
1970 /* prefill helper arrays */
1971 for (j = 0; j < maxdist + 5; j++)
1972 aux->Fmi[j] = aux->DMLi[j] = aux->DMLi1[j] = aux->DMLi2[j] = INF;
1973
1974 return aux;
1975 }
1976
1977
1978 PRIVATE INLINE void
rotate_aux_arrays(struct aux_arrays * aux,unsigned int maxdist)1979 rotate_aux_arrays(struct aux_arrays *aux,
1980 unsigned int maxdist)
1981 {
1982 unsigned int j;
1983 int *FF;
1984
1985 FF = aux->DMLi2;
1986 aux->DMLi2 = aux->DMLi1;
1987 aux->DMLi1 = aux->DMLi;
1988 aux->DMLi = FF;
1989 FF = aux->cc1;
1990 aux->cc1 = aux->cc;
1991 aux->cc = FF;
1992 for (j = 1; j < maxdist + 5; j++)
1993 aux->cc[j] = aux->Fmi[j] = aux->DMLi[j] = INF;
1994 }
1995
1996
1997 PRIVATE INLINE void
free_aux_arrays(struct aux_arrays * aux)1998 free_aux_arrays(struct aux_arrays *aux)
1999 {
2000 free(aux->cc);
2001 free(aux->cc1);
2002 free(aux->Fmi);
2003 free(aux->DMLi);
2004 free(aux->DMLi1);
2005 free(aux->DMLi2);
2006 free(aux);
2007 }
2008