1 /*
2  *                minimum free energy
3  *                RNA secondary structure prediction
4  *                with sliding window approach
5  *
6  *                c Ivo Hofacker, Peter Stadler, Ronny Lorenz
7  *
8  *                ViennaRNA package
9  */
10 
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <math.h>
18 #include <ctype.h>
19 #include <string.h>
20 #include <limits.h>
21 
22 #include "ViennaRNA/utils/basic.h"
23 #include "ViennaRNA/params/default.h"
24 #include "ViennaRNA/params/constants.h" /* defines MINPSCORE */
25 #include "ViennaRNA/fold_vars.h"
26 #include "ViennaRNA/params/basic.h"
27 #include "ViennaRNA/loops/all.h"
28 #include "ViennaRNA/gquad.h"
29 #include "ViennaRNA/ribo.h"
30 #include "ViennaRNA/utils/alignments.h"
31 #include "ViennaRNA/alphabet.h"
32 #include "ViennaRNA/constraints/hard.h"
33 #include "ViennaRNA/eval.h"
34 #include "ViennaRNA/io/utils.h"
35 #include "ViennaRNA/utils/units.h"
36 #include "ViennaRNA/mfe_window.h"
37 
38 #ifdef VRNA_WITH_SVM
39 #include "ViennaRNA/zscore_dat.inc"
40 #endif
41 
42 #ifdef __GNUC__
43 # define INLINE inline
44 #else
45 # define INLINE
46 #endif
47 
48 #define MAXSECTORS                  500   /* dimension for a backtrack array */
49 #define INT_CLOSE_TO_UNDERFLOW(i)   ((i) <= (INT_MIN / 16))
50 #define UNDERFLOW_CORRECTION        (((INT_MIN / 32) / 100) * 100)
51 
52 #define NONE -10000 /* score for forbidden pairs */
53 
54 
55 typedef struct {
56   FILE  *output;
57   int   dangle_model;
58   int   csv;
59 } hit_data;
60 
61 
62 struct aux_arrays {
63   int *cc;    /* auxilary arrays for canonical structures     */
64   int *cc1;   /* auxilary arrays for canonical structures     */
65   int *Fmi;   /* holds row i of fML (avoids jumps in memory)  */
66   int *DMLi;  /* DMLi[j] holds  MIN(fML[i,k]+fML[k+1,j])      */
67   int *DMLi1; /*                MIN(fML[i+1,k]+fML[k+1,j])    */
68   int *DMLi2; /*                MIN(fML[i+2,k]+fML[k+1,j])    */
69 };
70 
71 /*
72  #################################
73  # GLOBAL VARIABLES              #
74  #################################
75  */
76 
77 /*
78  #################################
79  # PRIVATE VARIABLES             #
80  #################################
81  */
82 
83 /*
84  #################################
85  # PRIVATE FUNCTION DECLARATIONS #
86  #################################
87  */
88 PRIVATE void
89 make_ptypes(vrna_fold_compound_t  *vc,
90             int                   i);
91 
92 
93 PRIVATE char *
94 backtrack(vrna_fold_compound_t  *vc,
95           int                   start,
96           int                   maxdist);
97 
98 
99 PRIVATE int
100 fill_arrays(vrna_fold_compound_t            *vc,
101             int                             *underflow,
102             vrna_mfe_window_callback        *cb,
103 #ifdef VRNA_WITH_SVM
104             vrna_mfe_window_zscore_callback *cb_z,
105 #endif
106             void                            *data);
107 
108 
109 PRIVATE void
110 default_callback(int        start,
111                  int        end,
112                  const char *structure,
113                  float      en,
114                  void       *data);
115 
116 
117 PRIVATE double
118 cov_score(vrna_fold_compound_t  *fc,
119           int                   i,
120           int                   j,
121           float                 **dm);
122 
123 
124 PRIVATE void
125 make_pscores(vrna_fold_compound_t *fc,
126              int                  start,
127              float                **dm);
128 
129 
130 PRIVATE void
131 default_callback_comparative(int        start,
132                              int        end,
133                              const char *structure,
134                              float      en,
135                              void       *data);
136 
137 
138 PRIVATE INLINE void
139 allocate_dp_matrices(vrna_fold_compound_t *fc);
140 
141 
142 PRIVATE INLINE void
143 free_dp_matrices(vrna_fold_compound_t *fc);
144 
145 
146 PRIVATE INLINE void
147 rotate_dp_matrices(vrna_fold_compound_t *fc,
148                    int                  i);
149 
150 
151 PRIVATE INLINE void
152 init_constraints(vrna_fold_compound_t *fc,
153                  float                **dm);
154 
155 
156 PRIVATE INLINE void
157 rotate_constraints(vrna_fold_compound_t *fc,
158                    float                **dm,
159                    int                  i);
160 
161 
162 PRIVATE INLINE struct aux_arrays *
163 get_aux_arrays(unsigned int maxdist);
164 
165 
166 PRIVATE INLINE void
167 rotate_aux_arrays(struct aux_arrays *aux,
168                   unsigned int      maxdist);
169 
170 
171 PRIVATE INLINE void
172 free_aux_arrays(struct aux_arrays *aux);
173 
174 
175 PRIVATE INLINE int
176 decompose_pair(vrna_fold_compound_t *fc,
177                int                  i,
178                int                  j,
179                struct aux_arrays    *aux);
180 
181 
182 #ifdef VRNA_WITH_SVM
183 
184 PRIVATE void
185 default_callback_z(int        start,
186                    int        end,
187                    const char *structure,
188                    float      en,
189                    float      zscore,
190                    void       *data);
191 
192 
193 PRIVATE INLINE int
194 want_backtrack(vrna_fold_compound_t *fc,
195                int                  i,
196                int                  j,
197                double               *z);
198 
199 
200 #endif
201 
202 
203 /*
204  #################################
205  # BEGIN OF FUNCTION DEFINITIONS #
206  #################################
207  */
208 PUBLIC float
vrna_mfe_window(vrna_fold_compound_t * vc,FILE * file)209 vrna_mfe_window(vrna_fold_compound_t  *vc,
210                 FILE                  *file)
211 {
212   hit_data data;
213 
214   data.output       = (file) ? file : stdout;
215   data.dangle_model = vc->params->model_details.dangles;
216   data.csv          = 0; /* csv output is for backward-compatibility only */
217 
218   if (vc->type == VRNA_FC_TYPE_COMPARATIVE)
219     return vrna_mfe_window_cb(vc, &default_callback_comparative, (void *)&data);
220   else
221     return vrna_mfe_window_cb(vc, &default_callback, (void *)&data);
222 }
223 
224 
225 PUBLIC float
vrna_mfe_window_cb(vrna_fold_compound_t * vc,vrna_mfe_window_callback * cb,void * data)226 vrna_mfe_window_cb(vrna_fold_compound_t     *vc,
227                    vrna_mfe_window_callback *cb,
228                    void                     *data)
229 {
230   int   energy, underflow, n_seq;
231   float mfe_local, e_factor;
232 
233   /* keep track of how many times we were close to an integer underflow */
234   underflow = 0;
235 
236   if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW)) {
237     vrna_message_warning("vrna_mfe_window@Lfold.c: Failed to prepare vrna_fold_compound");
238     return (float)(INF / 100.);
239   }
240 
241   n_seq     = (vc->type == VRNA_FC_TYPE_COMPARATIVE) ? vc->n_seq : 1;
242   e_factor  = 100. * n_seq;
243 
244 #ifdef VRNA_WITH_SVM
245   energy = fill_arrays(vc, &underflow, cb, NULL, data);
246 #else
247   energy = fill_arrays(vc, &underflow, cb, data);
248 #endif
249   mfe_local = (underflow > 0) ? ((float)underflow * (float)(UNDERFLOW_CORRECTION)) / e_factor : 0.;
250   mfe_local += (float)energy / e_factor;
251 
252   return mfe_local;
253 }
254 
255 
256 #ifdef VRNA_WITH_SVM
257 
258 PUBLIC float
vrna_mfe_window_zscore(vrna_fold_compound_t * vc,double min_z,FILE * file)259 vrna_mfe_window_zscore(vrna_fold_compound_t *vc,
260                        double               min_z,
261                        FILE                 *file)
262 {
263   hit_data data;
264 
265   data.output       = (file) ? file : stdout;
266   data.dangle_model = vc->params->model_details.dangles;
267 
268   return vrna_mfe_window_zscore_cb(vc, min_z, &default_callback_z, (void *)&data);
269 }
270 
271 
272 PUBLIC float
vrna_mfe_window_zscore_cb(vrna_fold_compound_t * vc,double min_z,vrna_mfe_window_zscore_callback * cb_z,void * data)273 vrna_mfe_window_zscore_cb(vrna_fold_compound_t            *vc,
274                           double                          min_z,
275                           vrna_mfe_window_zscore_callback *cb_z,
276                           void                            *data)
277 {
278   int   energy, underflow;
279   float mfe_local;
280 
281   if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
282     vrna_message_warning(
283       "vrna_mfe_window_zscore@mfe_window.c: Comparative prediction not implemented");
284     return (float)(INF / 100.);
285   }
286 
287   if (!vrna_fold_compound_prepare(vc, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW)) {
288     vrna_message_warning("vrna_mfe_window@Lfold.c: Failed to prepare vrna_fold_compound");
289     return (float)(INF / 100.);
290   }
291 
292   vrna_zsc_filter_update(vc, min_z, VRNA_ZSCORE_OPTIONS_NONE);
293 
294   /* keep track of how many times we were close to an integer underflow */
295   underflow = 0;
296 
297   energy = fill_arrays(vc, &underflow, NULL, cb_z, data);
298 
299   mfe_local = (underflow > 0) ? ((float)underflow * (float)(UNDERFLOW_CORRECTION)) / 100. : 0.;
300   mfe_local += (float)energy / 100.;
301 
302   return mfe_local;
303 }
304 
305 
306 #endif
307 
308 
309 /*
310  #####################################
311  # BEGIN OF STATIC HELPER FUNCTIONS  #
312  #####################################
313  */
314 PRIVATE INLINE void
allocate_dp_matrices(vrna_fold_compound_t * fc)315 allocate_dp_matrices(vrna_fold_compound_t *fc)
316 {
317   int       i, j, length, maxdist, **c, **fML;
318   vrna_hc_t *hc;
319   vrna_sc_t *sc;
320 
321   length  = fc->length;
322   maxdist = MIN2(fc->window_size, length);
323   hc      = fc->hc;
324   c       = fc->matrices->c_local;
325   fML     = fc->matrices->fML_local;
326 
327   /* reserve additional memory for j-dimension */
328   for (i = length; (i > length - maxdist - 5) && (i >= 0); i--) {
329     c[i]                = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
330     fML[i]              = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
331     hc->matrix_local[i] = (unsigned char *)vrna_alloc(sizeof(unsigned char) * (maxdist + 5));
332     if (fc->type == VRNA_FC_TYPE_SINGLE)
333       fc->ptype_local[i] = vrna_alloc(sizeof(char) * (maxdist + 5));
334     else if (fc->type == VRNA_FC_TYPE_COMPARATIVE)
335       fc->pscore_local[i] = vrna_alloc(sizeof(int) * (maxdist + 5));
336   }
337 
338   /*
339    *  allocate one more entry for comparative predictions to allow for
340    *  access to i - 1 when processing [i ... maxdist]. This is required
341    *  for (default) hard constraints with noLP option
342    */
343   if (fc->type == VRNA_FC_TYPE_COMPARATIVE)
344     if (length > maxdist + 5)
345       fc->pscore_local[length - maxdist - 5] = vrna_alloc(sizeof(int) * (maxdist + 5));
346 
347   switch (fc->type) {
348     case VRNA_FC_TYPE_SINGLE:
349       sc = fc->sc;
350       if (sc) {
351         if (sc->energy_bp_local)
352           for (i = length; (i > length - maxdist - 5) && (i >= 0); i--)
353             sc->energy_bp_local[i] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
354 
355         if (sc->energy_up)
356           for (i = length; (i > length - maxdist - 5) && (i >= 0); i--)
357             sc->energy_up[i] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
358 
359         for (i = length; (i > length - maxdist - 5) && (i >= 0); i--)
360           vrna_sc_update(fc, i, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW);
361       }
362 
363       break;
364 
365     case VRNA_FC_TYPE_COMPARATIVE:
366       break;
367   }
368 
369   if (fc->type == VRNA_FC_TYPE_SINGLE)
370     for (j = length; j > length - maxdist - 4; j--)
371       for (i = (length - maxdist - 4 > 0) ? length - maxdist - 4 : 1; i < j; i++)
372         c[i][j - i] = fML[i][j - i] = INF;
373   else if (fc->type == VRNA_FC_TYPE_COMPARATIVE)
374     for (j = length; j > length - maxdist - 3; j--)
375       for (i = (length - maxdist - 2 > 0) ? length - maxdist - 2 : 1; i < j; i++)
376         c[i][j - i] = fML[i][j - i] = INF;
377 }
378 
379 
380 PRIVATE INLINE void
free_dp_matrices(vrna_fold_compound_t * fc)381 free_dp_matrices(vrna_fold_compound_t *fc)
382 {
383   int       i, length, maxdist, **c, **fML, **ggg, with_gquad;
384   vrna_hc_t *hc;
385   vrna_sc_t *sc;
386 
387   length      = fc->length;
388   maxdist     = MIN2(fc->window_size, length);
389   hc          = fc->hc;
390   c           = fc->matrices->c_local;
391   fML         = fc->matrices->fML_local;
392   ggg         = fc->matrices->ggg_local;
393   with_gquad  = fc->params->model_details.gquad;
394 
395 
396   /* free additional memory for j-dimension */
397   for (i = 0; (i < maxdist + 5) && (i <= length); i++) {
398     if (fc->type == VRNA_FC_TYPE_SINGLE) {
399       free(fc->ptype_local[i]);
400       fc->ptype_local[i] = NULL;
401     } else if (fc->type == VRNA_FC_TYPE_COMPARATIVE) {
402       free(fc->pscore_local[i]);
403       fc->pscore_local[i] = NULL;
404     }
405 
406     free(c[i]);
407     c[i] = NULL;
408     free(fML[i]);
409     fML[i] = NULL;
410     free(hc->matrix_local[i]);
411     hc->matrix_local[i] = NULL;
412   }
413 
414   switch (fc->type) {
415     case VRNA_FC_TYPE_SINGLE:
416       sc = fc->sc;
417       if (sc) {
418         if (sc->energy_up) {
419           for (i = 0; (i < maxdist + 5) && (i <= length); i++) {
420             free(sc->energy_up[i]);
421             sc->energy_up[i] = NULL;
422           }
423         }
424 
425         if (sc->energy_bp_local) {
426           for (i = 0; (i < maxdist + 5) && (i <= length); i++) {
427             free(sc->energy_bp_local[i]);
428             sc->energy_bp_local[i] = NULL;
429           }
430         }
431       }
432 
433       break;
434 
435     case VRNA_FC_TYPE_COMPARATIVE:
436       break;
437   }
438 
439   if (with_gquad) {
440     for (i = 0; (i <= maxdist + 5) && (i <= length); i++)
441       free(ggg[i]);
442     free(ggg);
443     fc->matrices->ggg_local = NULL;
444   }
445 }
446 
447 
448 PRIVATE INLINE void
rotate_dp_matrices(vrna_fold_compound_t * fc,int i)449 rotate_dp_matrices(vrna_fold_compound_t *fc,
450                    int                  i)
451 {
452   int       ii, maxdist, length, **c, **fML;
453   vrna_hc_t *hc;
454   vrna_sc_t *sc;
455 
456   length  = fc->length;
457   maxdist = fc->window_size;
458   c       = fc->matrices->c_local;
459   fML     = fc->matrices->fML_local;
460   hc      = fc->hc;
461 
462   if (i + maxdist + 4 <= length) {
463     c[i - 1]                          = c[i + maxdist + 4];
464     c[i + maxdist + 4]                = NULL;
465     fML[i - 1]                        = fML[i + maxdist + 4];
466     fML[i + maxdist + 4]              = NULL;
467     hc->matrix_local[i - 1]           = hc->matrix_local[i + maxdist + 4];
468     hc->matrix_local[i + maxdist + 4] = NULL;
469 
470     /* rotate base pair soft constraints */
471     switch (fc->type) {
472       case VRNA_FC_TYPE_SINGLE:
473         sc = fc->sc;
474         if (sc) {
475           if (sc->energy_bp_local) {
476             sc->energy_bp_local[i - 1]            = sc->energy_bp_local[i + maxdist + 4];
477             sc->energy_bp_local[i + maxdist + 4]  = NULL;
478           }
479 
480           if (sc->energy_up) {
481             sc->energy_up[i - 1]            = sc->energy_up[i + maxdist + 4];
482             sc->energy_up[i + maxdist + 4]  = NULL;
483           }
484         }
485 
486         break;
487 
488       case VRNA_FC_TYPE_COMPARATIVE:
489         break;
490     }
491 
492     if ((fc->params->model_details.gquad) && (i > 1))
493       vrna_gquad_mx_local_update(fc, i - 1);
494 
495     for (ii = 0; ii < maxdist + 5; ii++) {
496       c[i - 1][ii]    = INF;
497       fML[i - 1][ii]  = INF;
498     }
499   }
500 }
501 
502 
503 PRIVATE INLINE void
init_constraints(vrna_fold_compound_t * fc,float ** dm)504 init_constraints(vrna_fold_compound_t *fc,
505                  float                **dm)
506 {
507   int i, length, maxdist;
508 
509   length  = fc->length;
510   maxdist = fc->window_size;
511 
512   switch (fc->type) {
513     case VRNA_FC_TYPE_SINGLE:
514       for (i = length; (i >= length - maxdist - 4) && (i > 0); i--) {
515         make_ptypes(fc, i);
516         vrna_hc_update(fc, i, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
517         vrna_sc_update(fc, i, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW);
518       }
519       break;
520 
521     case VRNA_FC_TYPE_COMPARATIVE:
522       for (i = length; (i >= length - maxdist - 4) && (i > 0); i--) {
523         make_pscores(fc, i, dm);
524         vrna_hc_update(fc, i, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
525       }
526 
527       /* for noLP option */
528       if (length > maxdist + 5)
529         make_pscores(fc, length - maxdist - 5, dm);
530 
531       break;
532   }
533 }
534 
535 
536 PRIVATE INLINE void
rotate_constraints(vrna_fold_compound_t * fc,float ** dm,int i)537 rotate_constraints(vrna_fold_compound_t *fc,
538                    float                **dm,
539                    int                  i)
540 {
541   int length, maxdist;
542 
543   length  = fc->length;
544   maxdist = fc->window_size;
545 
546   switch (fc->type) {
547     case VRNA_FC_TYPE_SINGLE:
548       if (i + maxdist + 4 <= length) {
549         fc->ptype_local[i - 1]            = fc->ptype_local[i + maxdist + 4];
550         fc->ptype_local[i + maxdist + 4]  = NULL;
551         if (i > 1) {
552           make_ptypes(fc, i - 1);
553           vrna_hc_update(fc, i - 1, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
554           vrna_sc_update(fc, i - 1, VRNA_OPTION_MFE | VRNA_OPTION_WINDOW);
555         }
556       }
557 
558       break;
559 
560     case VRNA_FC_TYPE_COMPARATIVE:
561       if (i + maxdist + 4 <= length) {
562         if (i > 1) {
563           fc->pscore_local[i - 2]           = fc->pscore_local[i + maxdist + 4];
564           fc->pscore_local[i + maxdist + 4] = NULL;
565           if (i > 2)
566             make_pscores(fc, i - 2, dm);
567 
568           vrna_hc_update(fc, i - 1, VRNA_CONSTRAINT_WINDOW_UPDATE_3);
569         } else if (i == 1) {
570           free(fc->pscore_local[i - 1]);
571           fc->pscore_local[i - 1]           = fc->pscore_local[i + maxdist + 4];
572           fc->pscore_local[i + maxdist + 4] = NULL;
573         }
574       }
575 
576       break;
577   }
578 }
579 
580 
581 PRIVATE int
fill_arrays(vrna_fold_compound_t * vc,int * underflow,vrna_mfe_window_callback * cb,vrna_mfe_window_zscore_callback * cb_z,void * data)582 fill_arrays(vrna_fold_compound_t            *vc,
583             int                             *underflow,
584             vrna_mfe_window_callback        *cb,
585 #ifdef VRNA_WITH_SVM
586             vrna_mfe_window_zscore_callback *cb_z,
587 #endif
588             void                            *data)
589 {
590   /* fill "c", "fML" and "f3" arrays and return  optimal energy */
591 
592   char              *prev;
593   int               i, j, length, maxdist, **c, **fML, *f3,
594                     with_gquad, dangle_model, turn, n_seq,
595                     prev_i, prev_j, prev_end, prev_en;
596   float             **dm;
597   double            e_fact;
598 
599 #ifdef VRNA_WITH_SVM
600   double            prevz;
601   unsigned char     with_zscore;
602   unsigned char     report_subsumed;
603   vrna_zsc_dat_t    zsc_data;
604 #endif
605 
606   vrna_md_t         *md;
607   struct aux_arrays *helper_arrays;
608 
609   int               olddm[7][7] = { { 0, 0, 0, 0, 0, 0, 0 },/* hamming distance between pairs */
610                                     { 0, 0, 2, 2, 1, 2, 2 } /* CG */,
611                                     { 0, 2, 0, 1, 2, 2, 2 } /* GC */,
612                                     { 0, 2, 1, 0, 2, 1, 2 } /* GU */,
613                                     { 0, 1, 2, 2, 0, 2, 1 } /* UG */,
614                                     { 0, 2, 2, 1, 2, 0, 2 } /* AU */,
615                                     { 0, 2, 2, 2, 1, 2, 0 } /* UA */ };
616 
617   n_seq         = (vc->type == VRNA_FC_TYPE_COMPARATIVE) ? vc->n_seq : 1;
618   length        = vc->length;
619   maxdist       = vc->window_size;
620   md            = &(vc->params->model_details);
621   dangle_model  = md->dangles;
622   with_gquad    = md->gquad;
623   turn          = md->min_loop_size;
624   do_backtrack  = 0;
625   prev_i        = 0;
626   prev_j        = 0;
627   prev_end      = 0;
628   prev          = NULL;
629   prev_en       = 0;
630   e_fact        = 100 * n_seq;
631   dm            = NULL;
632 #ifdef VRNA_WITH_SVM
633   prevz           = 0.;
634   zsc_data        = vc->zscore_data;
635   with_zscore     = (zsc_data) ? zsc_data->filter_on : 0;
636   report_subsumed = (zsc_data) ? zsc_data->report_subsumed : 0;
637 #endif
638 
639   if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
640 #ifdef VRNA_WITH_SVM
641     /* no z-scoring for comparative structure prediction */
642     if (zsc_data) {
643       vrna_zsc_filter_free(vc);
644       zsc_data        = NULL;
645       with_zscore     = 0;
646       report_subsumed = 0;
647     }
648 #endif
649 
650     if (md->ribo) {
651       if (RibosumFile != NULL)
652         dm = readribosum(RibosumFile);
653       else
654         dm = get_ribosum((const char **)vc->sequences, n_seq, length);
655     } else {
656       /*use usual matrix*/
657       dm = (float **)vrna_alloc(7 * sizeof(float *));
658       for (i = 0; i < 7; i++) {
659         dm[i] = (float *)vrna_alloc(7 * sizeof(float));
660         for (j = 0; j < 7; j++)
661           dm[i][j] = (float)olddm[i][j];
662       }
663     }
664   }
665 
666   c   = vc->matrices->c_local;
667   fML = vc->matrices->fML_local;
668   f3  = vc->matrices->f3_local;
669 
670   /* allocate memory for all helper arrays */
671   helper_arrays = get_aux_arrays(maxdist);
672 
673   /* reserve additional memory for j-dimension */
674   allocate_dp_matrices(vc);
675 
676   init_constraints(vc, dm);
677 
678   if (with_gquad)
679     vrna_gquad_mx_local_update(vc, length - maxdist - 4);
680 
681   for (i = length - turn - 1; i >= 1; i--) {
682     /* i,j in [1..length] */
683     for (j = i + 1; j <= MIN2(i + turn, length); j++)
684       c[i][j - i] = fML[i][j - i] = INF;
685 
686     for (j = i + turn + 1; j <= length && j <= i + maxdist; j++) {
687       /* decompose subsegment [i, j] with pair (i, j) */
688       c[i][j - i] = decompose_pair(vc, i, j, helper_arrays);
689 
690       /* decompose subsegment [i, j] that is multibranch loop part with at least one branch */
691       fML[i][j - i] = vrna_E_ml_stems_fast(vc, i, j, helper_arrays->Fmi, helper_arrays->DMLi);
692 
693       if ((vc->aux_grammar) && (vc->aux_grammar->cb_aux))
694         vc->aux_grammar->cb_aux(vc, i, j, vc->aux_grammar->data);
695     } /* for (j...) */
696 
697     /* calculate energies of 5' and 3' fragments */
698     f3[i] = vrna_E_ext_loop_3(vc, i);
699 
700     {
701       char *ss = NULL;
702 
703       if (f3[i] < f3[i + 1]) {
704         /*
705          * instead of backtracing in the next iteration, we backtrack now
706          * already. This is necessary to accomodate for change in free
707          * energy due to unpaired nucleotides in the exterior loop, which
708          * may happen in the case of using soft constraints
709          */
710         int ii, jj;
711         ii  = i;
712         jj  = vrna_BT_ext_loop_f3_pp(vc, &ii, maxdist);
713         if (jj > 0) {
714 #ifdef VRNA_WITH_SVM
715           double thisz = 0;
716           if (want_backtrack(vc, ii, jj, &thisz)) {
717 #endif
718           ss = backtrack(vc, ii, jj);
719           if (prev) {
720             if ((jj < prev_j) ||
721 #ifdef VRNA_WITH_SVM
722                 ((report_subsumed) && (prevz < thisz)) || /* yield last structure if it's z-score is higher than the current one */
723 #endif
724                 (strncmp(ss + prev_i - ii, prev, prev_j - prev_i + 1))) {
725               /* ss does not contain prev */
726 #ifdef VRNA_WITH_SVM
727               if (with_zscore)
728                 cb_z(prev_i, prev_end, prev, prev_en / e_fact, prevz, data);
729               else
730 #endif
731               cb(prev_i, prev_end, prev, prev_en / e_fact, data);
732             }
733 
734             free(prev);
735           }
736 
737           prev      = ss;
738           prev_i    = ii;
739           prev_j    = jj;
740           prev_end  = MIN2(jj + ((dangle_model) ? 1 : 0), length);
741           prev_en   = f3[ii] - f3[jj + 1];
742 #ifdef VRNA_WITH_SVM
743           prevz = thisz;
744         }
745 
746 #endif
747         } else if (jj == -1) {
748           /* some error occured during backtracking */
749           vrna_message_error("backtrack failed in short backtrack 1");
750         }
751       }
752 
753       if (i == 1) {
754         if (prev) {
755 #ifdef VRNA_WITH_SVM
756           if (with_zscore)
757             cb_z(prev_i, prev_end, prev, prev_en / e_fact, prevz, data);
758           else
759 #endif
760           cb(prev_i, prev_end, prev, prev_en / e_fact, data);
761 
762           free(prev);
763           prev = NULL;
764 #ifdef VRNA_WITH_SVM
765         } else if ((f3[i] < 0) && (!with_zscore)) {
766           /* why !with_zscore? */
767 #else
768         } else if (f3[i] < 0) {
769 #endif
770           int ii, jj;
771           ii  = i;
772           jj  = vrna_BT_ext_loop_f3_pp(vc, &ii, maxdist);
773           if (jj > 0) {
774 #ifdef VRNA_WITH_SVM
775             double thisz = 0;
776             if (want_backtrack(vc, ii, jj, &thisz)) {
777 #endif
778             ss = backtrack(vc, ii, jj);
779 #ifdef VRNA_WITH_SVM
780             if (with_zscore)
781               cb_z(ii, MIN2(jj + ((dangle_model) ? 1 : 0),
782                             length), ss, (f3[1] - f3[jj + 1]) / e_fact, thisz, data);
783             else
784 #endif
785             cb(ii, MIN2(jj + ((dangle_model) ? 1 : 0),
786                         length), ss, (f3[1] - f3[jj + 1]) / e_fact, data);
787 
788             free(ss);
789 #ifdef VRNA_WITH_SVM
790           }
791 
792 #endif
793           } else if (jj == -1) {
794             /* some error occured during backtracking */
795             vrna_message_error("backtrack failed in short backtrack 2");
796           }
797         }
798       }
799     }
800 
801     /* check for values close to integer underflow */
802     if (INT_CLOSE_TO_UNDERFLOW(f3[i])) {
803       /* correct f3 free energies and increase underflow counter */
804       int cnt;
805       for (cnt = i; cnt <= MIN2(i + maxdist + 2, length); cnt++)
806         f3[cnt] -= UNDERFLOW_CORRECTION;
807       (*underflow)++;
808     }
809 
810     rotate_aux_arrays(helper_arrays, maxdist);
811     rotate_dp_matrices(vc, i);
812     rotate_constraints(vc, dm, i);
813   }
814 
815   /* clean up memory */
816   free_aux_arrays(helper_arrays);
817   free_dp_matrices(vc);
818 
819   if (dm) {
820     for (i = 0; i < 7; i++)
821       free(dm[i]);
822     free(dm);
823   }
824 
825   return f3[1];
826 }
827 
828 
829 #ifdef VRNA_WITH_SVM
830 PRIVATE INLINE int
want_backtrack(vrna_fold_compound_t * fc,int i,int j,double * z)831 want_backtrack(vrna_fold_compound_t *fc,
832                int                  i,
833                int                  j,
834                double               *z)
835 {
836   int bt, *f3;
837 
838   bt  = 1; /* we want to backtrack by default */
839   *z  = (double)INF;
840 
841   if ((fc->zscore_data) &&
842       (fc->zscore_data->filter_on)) {
843     if ((fc->zscore_data->pre_filter) &&
844         (fc->zscore_data->current_i == i)) {
845       *z = fc->zscore_data->current_z[j];
846     } else {
847       bt  = 0;
848       f3  = fc->matrices->f3_local;
849       *z  = vrna_zsc_compute(fc, i, j, f3[i] - f3[j + 1]);
850     }
851 
852     if ((*z) <= fc->zscore_data->min_z)
853       bt = 1;
854   }
855 
856   return bt;
857 }
858 
859 
860 #endif
861 
862 
863 PRIVATE char *
backtrack(vrna_fold_compound_t * vc,int start,int end)864 backtrack(vrna_fold_compound_t  *vc,
865           int                   start,
866           int                   end)
867 {
868   /*------------------------------------------------------------------
869    *  trace back through the "c", "f3" and "fML" arrays to get the
870    *  base pairing list. No search for equivalent structures is done.
871    *  This is fast, since only few structure elements are recalculated.
872    *  ------------------------------------------------------------------*/
873   sect sector[MAXSECTORS];            /* backtracking sectors */
874   char *structure, **ptype;
875   int i, j, k, length, no_close, type, s, b, bt_type, turn,
876       dangle_model, noLP, noGUclosure, **c, dangle3, ml, cij,
877       **pscore, canonical, p, q, comp1, comp2, max3;
878   vrna_param_t *P;
879   vrna_md_t *md;
880   vrna_bp_stack_t *bp_stack;
881 
882   length        = vc->length;
883   ptype         = vc->ptype_local;
884   pscore        = vc->pscore_local;
885   P             = vc->params;
886   md            = &(P->model_details);
887   dangle_model  = md->dangles;
888   noLP          = md->noLP;
889   noGUclosure   = md->noGUclosure;
890   bt_type       = md->backtrack_type;
891   turn          = md->min_loop_size;
892   c             = vc->matrices->c_local;
893 
894   s         = 0;                                                                                /* depth of backtracking stack */
895   b         = 0;                                                                                /* number of base pairs */
896   bp_stack  = (vrna_bp_stack_t *)vrna_alloc(sizeof(vrna_bp_stack_t) * (4 * (1 + length / 2)));  /* add a guess of how many G's may be involved in a G quadruplex */
897 
898   sector[++s].i = start;
899   sector[s].j   = MIN2(length, end);
900   sector[s].ml  = (bt_type == 'M') ? 1 : ((bt_type == 'C') ? 2 : 0);
901 
902   structure = (char *)vrna_alloc((MIN2(length - start, end) + 3) * sizeof(char));
903 
904   memset(structure, '.', MIN2(length - start, end) + 1);
905 
906   dangle3 = 0;
907 
908   while (s > 0) {
909     canonical = 1;     /* (i,j) closes a canonical structure */
910 
911     /* pop one element from stack */
912     i   = sector[s].i;
913     j   = sector[s].j;
914     ml  = sector[s--].ml;  /* ml is a flag indicating if backtracking is to
915                            * occur in the fML- (1) or in the f-array (0) */
916 
917     if (j < i + turn + 1)
918       continue;                     /* no more pairs in this interval */
919 
920     switch (ml) {
921       /* backtrack in f3 */
922       case 0:
923         if (vrna_BT_ext_loop_f3(vc, &i, j, &p, &q, bp_stack, &b)) {
924           if (i > 0) {
925             sector[++s].i = i;
926             sector[s].j   = j;
927             sector[s].ml  = 0;
928           }
929 
930           if (p > 0) {
931             if (((i == q + 2) || (dangle_model)) && (q < length))
932               dangle3 = 1;
933 
934             i = p;
935             j = q;
936             goto repeat1;
937           } else if (md->gquad) {
938             /*
939              * check whether last element on the bp_stack is involved in G-Quadruplex formation
940              * and increase output dot-bracket string length by 1 if necessary
941              */
942             if ((bp_stack[b].i == bp_stack[b].j) && (bp_stack[b].i < length))
943               dangle3 = 1;
944           }
945 
946           continue;
947         } else {
948           vrna_message_error("backtracking failed in f3, segment [%d,%d]\n", i, j);
949         }
950 
951         break;
952 
953       /* trace back in fML array */
954       case 1:
955         if (vrna_BT_mb_loop_split(vc, &i, &j, &p, &q, &comp1, &comp2, bp_stack, &b)) {
956           if (i > 0) {
957             sector[++s].i = i;
958             sector[s].j   = j;
959             sector[s].ml  = comp1;
960           }
961 
962           if (p > 0) {
963             sector[++s].i = p;
964             sector[s].j   = q;
965             sector[s].ml  = comp2;
966           }
967 
968           continue;
969         } else {
970           vrna_message_error("backtracking failed in fML, segment [%d,%d]\n", i, j);
971         }
972 
973         break;
974 
975       /* backtrack in c */
976       case 2:
977         bp_stack[++b].i = i;
978         bp_stack[b].j   = j;
979         goto repeat1;
980         break;
981 
982       default:
983         vrna_message_error("Backtracking failed due to unrecognized DP matrix!");
984         break;
985     }
986 
987 repeat1:
988 
989     /*----- begin of "repeat:" -----*/
990     if (canonical)
991       cij = c[i][j - i];
992 
993     if (noLP) {
994       if (vrna_BT_stack(vc, &i, &j, &cij, bp_stack, &b)) {
995         canonical = 0;
996         goto repeat1;
997       }
998     }
999 
1000     canonical = 1;
1001 
1002     switch (vc->type) {
1003       case VRNA_FC_TYPE_SINGLE:
1004         type = vrna_get_ptype_window(i, j, ptype);
1005 
1006         no_close = (((type == 3) || (type == 4)) && noGUclosure);
1007 
1008         if (no_close) {
1009           if (cij == FORBIDDEN)
1010             continue;
1011         } else {
1012           if (vrna_BT_hp_loop(vc, i, j, cij, bp_stack, &b))
1013             continue;
1014         }
1015 
1016         break;
1017 
1018       case VRNA_FC_TYPE_COMPARATIVE:
1019         cij += pscore[i][j - i];
1020         if (vrna_BT_hp_loop(vc, i, j, cij, bp_stack, &b))
1021           continue;
1022 
1023         break;
1024     }
1025 
1026     if (vrna_BT_int_loop(vc, &i, &j, cij, bp_stack, &b)) {
1027       if (i < 0)
1028         continue;
1029       else
1030         goto repeat1;
1031     }
1032 
1033     /* (i.j) must close a multi-loop */
1034     if (vrna_BT_mb_loop(vc, &i, &j, &k, cij, &comp1, &comp2)) {
1035       sector[++s].i = i;
1036       sector[s].j   = k;
1037       sector[s].ml  = comp1;
1038       sector[++s].i = k + 1;
1039       sector[s].j   = j;
1040       sector[s].ml  = comp2;
1041     } else {
1042       vrna_message_error("backtracking failed in repeat, segment [%d,%d]\n", i, j);
1043     }
1044 
1045     /* end of repeat: --------------------------------------------------*/
1046   } /* end of infinite while loop */
1047 
1048 
1049   bp_stack[0].i = b;
1050 
1051   /* and now create a dot-brakcet string from the base pair stack... */
1052   max3 = 1;
1053   for (i = 1; i <= b; i++) {
1054     if (bp_stack[i].i == bp_stack[i].j) {
1055       /* Gquad bonds are marked as bp[i].i == bp[i].j */
1056       structure[bp_stack[i].i - start] = '+';
1057     } else {
1058       /* the following ones are regular base pairs */
1059       structure[bp_stack[i].i - start]  = '(';
1060       structure[bp_stack[i].j - start]  = ')';
1061     }
1062 
1063     if (max3 < bp_stack[i].j - start)
1064       max3 = bp_stack[i].j - start;
1065   }
1066 
1067   free(bp_stack);
1068 
1069   structure = (char *)vrna_realloc(structure,
1070                                    sizeof(char) * (max3 + dangle3 + 2));
1071   structure[max3 + dangle3 + 1] = '\0';
1072 
1073   return structure;
1074 }
1075 
1076 
1077 PRIVATE void
make_ptypes(vrna_fold_compound_t * vc,int i)1078 make_ptypes(vrna_fold_compound_t  *vc,
1079             int                   i)
1080 {
1081   int j, k, type, n, maxdist, turn, noLP;
1082   short *S;
1083   char **ptype;
1084   vrna_md_t *md;
1085 
1086   n       = (int)vc->length;
1087   S       = vc->sequence_encoding2;
1088   ptype   = vc->ptype_local;
1089   maxdist = vc->window_size;
1090   md      = &(vc->params->model_details);
1091   turn    = md->min_loop_size;
1092   noLP    = md->noLP;
1093 
1094   for (k = turn + 1; k < maxdist; k++) {
1095     j = i + k;
1096     if (j > n)
1097       break;
1098 
1099     type = md->pair[S[i]][S[j]];
1100 
1101     if (noLP && type) {
1102       if (!ptype[i + 1][j - 1 - i - 1])
1103         if (j == n || i == 1 || (!md->pair[S[i - 1]][S[j + 1]]))
1104           type = 0;
1105     }
1106 
1107     ptype[i][j - i] = type;
1108   }
1109 }
1110 
1111 
1112 PRIVATE double
cov_score(vrna_fold_compound_t * fc,int i,int j,float ** dm)1113 cov_score(vrna_fold_compound_t  *fc,
1114           int                   i,
1115           int                   j,
1116           float                 **dm)
1117 {
1118   char **AS;
1119   short **S;
1120   int n_seq, k, l, s, type;
1121   double score;
1122   vrna_md_t *md;
1123   int pfreq[8] = {
1124     0, 0, 0, 0, 0, 0, 0, 0
1125   };
1126 
1127   n_seq = fc->n_seq;
1128   AS    = fc->sequences;
1129   S     = fc->S;
1130   md    = &(fc->params->model_details);
1131 
1132   for (s = 0; s < n_seq; s++) {
1133     if (S[s][i] == 0 && S[s][j] == 0) {
1134       type = 7;                             /* gap-gap  */
1135     } else {
1136       if ((AS[s][i] == '~') || (AS[s][j] == '~'))
1137         type = 7;
1138       else
1139         type = md->pair[S[s][i]][S[s][j]];
1140     }
1141 
1142     pfreq[type]++;
1143   }
1144 
1145   if (pfreq[0] * 2 + pfreq[7] > n_seq) {
1146     return NONE;
1147   } else {
1148     for (k = 1, score = 0.; k <= 6; k++) /* ignore pairtype 7 (gap-gap) */
1149       for (l = k; l <= 6; l++)
1150         /*
1151          * scores for replacements between pairtypes
1152          * consistent or compensatory mutations score 1 or 2
1153          */
1154         score += pfreq[k] * pfreq[l] * dm[k][l];
1155   }
1156 
1157   /* counter examples score -1, gap-gap scores -0.25   */
1158   return md->cv_fact * ((UNIT * score) / n_seq - md->nc_fact * UNIT * (pfreq[0] + pfreq[7] * 0.25));
1159 }
1160 
1161 
1162 PRIVATE void
make_pscores(vrna_fold_compound_t * fc,int i,float ** dm)1163 make_pscores(vrna_fold_compound_t *fc,
1164              int                  i,
1165              float                **dm)
1166 {
1167   /*
1168    * calculate co-variance bonus for each pair depending on
1169    * compensatory/consistent mutations and incompatible seqs
1170    * should be 0 for conserved pairs, >0 for good pairs
1171    */
1172   int n, j, **pscore, maxd, turn, noLP;
1173   vrna_md_t *md;
1174 
1175   n       = (int)fc->length;
1176   maxd    = fc->window_size;
1177   pscore  = fc->pscore_local;
1178   md      = &(fc->params->model_details);
1179   turn    = md->min_loop_size;
1180   noLP    = md->noLP;
1181 
1182   /*fill pscore[start], too close*/
1183   for (j = i + 1; (j < i + turn + 1) && (j <= n); j++)
1184     pscore[i][j - i] = NONE;
1185   for (j = i + turn + 1; ((j <= n) && (j <= i + maxd)); j++)
1186     pscore[i][j - i] = cov_score(fc, i, j, dm);
1187 
1188   if (noLP) {
1189     /* remove unwanted lonely pairs */
1190     int otype = 0, ntype = 0;
1191     for (j = i + turn; ((j < n) && (j < i + maxd)); j++) {
1192       if ((i > 1) && (j < n))
1193         otype = cov_score(fc, i - 1, j + 1, dm);
1194 
1195       if (i < n)
1196         ntype = pscore[i + 1][j - 1 - (i + 1)];
1197       else
1198         ntype = NONE;
1199 
1200       if ((otype < -4 * UNIT) && (ntype < -4 * UNIT)) /* worse than 2 counterex */
1201         pscore[i][j - i] = NONE;                      /* i.j can only form isolated pairs */
1202     }
1203   }
1204 
1205   if ((j - i + 1) > maxd)
1206     pscore[i][j - i] = NONE;
1207 }
1208 
1209 
1210 PRIVATE void
default_callback(int start,int end,const char * structure,float en,void * data)1211 default_callback(int        start,
1212                  int        end,
1213                  const char *structure,
1214                  float      en,
1215                  void       *data)
1216 {
1217   FILE *output      = ((hit_data *)data)->output;
1218   int dangle_model  = ((hit_data *)data)->dangle_model;
1219 
1220   if ((dangle_model == 2) && (start > 1))
1221     fprintf(output, ".%s (%6.2f) %4d\n", structure, en, start - 1);
1222   else
1223     fprintf(output, "%s (%6.2f) %4d\n ", structure, en, start);
1224 }
1225 
1226 
1227 #ifdef VRNA_WITH_SVM
1228 PRIVATE void
default_callback_z(int start,int end,const char * structure,float en,float zscore,void * data)1229 default_callback_z(int        start,
1230                    int        end,
1231                    const char *structure,
1232                    float      en,
1233                    float      zscore,
1234                    void       *data)
1235 {
1236   FILE *output      = ((hit_data *)data)->output;
1237   int dangle_model  = ((hit_data *)data)->dangle_model;
1238 
1239   if ((dangle_model == 2) && (start > 1))
1240     fprintf(output, ".%s (%6.2f) %4d z= %.3f\n", structure, en, start - 1, zscore);
1241   else
1242     fprintf(output, "%s (%6.2f) %4d z= %.3f\n ", structure, en, start, zscore);
1243 }
1244 
1245 
1246 #endif
1247 
1248 
1249 PRIVATE void
default_callback_comparative(int start,int end,const char * structure,float en,void * data)1250 default_callback_comparative(int        start,
1251                              int        end,
1252                              const char *structure,
1253                              float      en,
1254                              void       *data)
1255 {
1256   FILE *output      = ((hit_data *)data)->output;
1257   int dangle_model  = ((hit_data *)data)->dangle_model;
1258   int csv           = ((hit_data *)data)->csv;
1259 
1260   if (csv == 1) {
1261     if ((dangle_model == 2) && (start > 1))
1262       fprintf(output, ".%s ,%6.2f, %4d, %4d\n", structure, en, start - 1, end);
1263     else
1264       fprintf(output, "%s ,%6.2f, %4d, %4d\n", structure, en, start, end);
1265   } else {
1266     if ((dangle_model == 2) && (start > 1))
1267       fprintf(output, ".%s (%6.2f) %4d - %4d\n", structure, en, start - 1, end);
1268     else
1269       fprintf(output, "%s (%6.2f) %4d - %4d\n", structure, en, start, end);
1270   }
1271 }
1272 
1273 
1274 struct block {
1275   vrna_fold_compound_t *fc;
1276   short *pt;
1277   unsigned long int start;
1278   unsigned long int end;
1279   unsigned long int shift;
1280   int energy;
1281   int energy_no3d;                   /* energy without 3'dangle */
1282   struct block *next_entry;
1283 };
1284 
1285 
1286 PRIVATE int
update_block(unsigned int i,unsigned int max_n,struct block * b)1287 update_block(unsigned int i,
1288              unsigned int max_n,
1289              struct block *b)
1290 {
1291   short *S1, *S2, d5, d3;
1292   unsigned int type;
1293   int n, i_local, j_local, ediff, dangles;
1294   vrna_fold_compound_t *fc;
1295   vrna_param_t *params;
1296   vrna_md_t *md;
1297 
1298   fc      = b->fc;
1299   n       = fc->length;
1300   S1      = fc->sequence_encoding;
1301   S2      = fc->sequence_encoding2;
1302   params  = fc->params;
1303   md      = &(params->model_details);
1304   dangles = md->dangles;
1305 
1306   /* re-evaluate energy for removal of base pair (i, k) */
1307   i_local = (int)(i - (b->start - b->shift) + 1);
1308 
1309   if (b->pt[i_local]) {
1310     j_local = (int)(b->pt[i_local]);
1311     /* compute energy differences due to removal of base pair (i_local, j_local) */
1312     ediff = vrna_eval_move_pt(fc, b->pt, -i_local, -j_local);
1313 
1314     /* update energy */
1315     b->energy += ediff;
1316     /* remove base pair from pair table */
1317     b->pt[i_local] = b->pt[j_local] = 0;
1318 
1319     /* since we've removed the outermost base pair, the block size
1320      *  decreases on the 3' end as well. At this point, we also
1321      *  remove any further trailing bases, if any
1322      */
1323     int end = j_local;
1324 
1325     do {
1326       b->end--;
1327       if (b->start == b->end)
1328         break;
1329     } while (b->pt[--end] == 0);
1330 
1331     if (b->end <= b->start)
1332       return 0;
1333 
1334     /* check whether we need to split the block into multiple ones */
1335     size_t stems        = 0;
1336     size_t mem_stems    = 10;
1337     size_t *start_stem  = (size_t *)vrna_alloc(sizeof(size_t) * mem_stems);
1338     size_t *end_stem    = (size_t *)vrna_alloc(sizeof(size_t) * mem_stems);
1339 
1340     for (size_t pos = i_local + 1; pos <= end; pos++)
1341       if (b->pt[pos] > pos) {
1342         start_stem[stems] = pos;
1343         end_stem[stems]   = b->pt[pos];
1344         stems++;
1345         if (stems == mem_stems) {
1346           mem_stems   *= 1.4;
1347           start_stem  = vrna_realloc(start_stem, sizeof(size_t) * mem_stems);
1348           end_stem    = vrna_realloc(end_stem, sizeof(size_t) * mem_stems);
1349         }
1350 
1351         pos = b->pt[pos];
1352       }
1353 
1354     if (stems > 1) {
1355       for (size_t k = stems - 1; k > 0; k--) {
1356         /* create a new block */
1357         struct block *new_block = (struct block *)vrna_alloc(sizeof(struct block));
1358 
1359         /* construct global coordinates for new block */
1360         new_block->start  = i + start_stem[k] - 1 - b->shift;
1361         new_block->end    = i + end_stem[k] - 1 - b->shift;
1362         new_block->shift  = (dangles == 2) ? 1 : 0;
1363 
1364         /* create structure pair table for new block */
1365         size_t l = end_stem[k] - start_stem[k] + 1;
1366         if (dangles == 2) {
1367           l++;    /* for 5' dangles */
1368           if (new_block->end < max_n)
1369             l++;  /* for 3' dangles */
1370         }
1371 
1372         new_block->pt     = (short *)vrna_alloc(sizeof(short) * (l + 1));
1373         new_block->pt[0]  = l;
1374         /* go through original structure and extract base pair positions relative to new block */
1375         for (size_t p = start_stem[k]; p <= end_stem[k]; p++) {
1376           if (b->pt[p] > p) {
1377             short i, j;
1378 
1379             i = p - start_stem[k] + 1 + new_block->shift;
1380             j = b->pt[p] - start_stem[k] + 1 + new_block->shift;
1381 
1382             new_block->pt[i]  = j;
1383             new_block->pt[j]  = i;
1384 
1385             /* remove base pair from original block */
1386             b->pt[b->pt[p]] = 0;
1387             b->pt[p]        = 0;
1388           }
1389         }
1390 
1391         /* create fold_compound for new block */
1392         char *seq_block = (char *)vrna_alloc(sizeof(char) * (l + 1));
1393         memcpy(seq_block, b->fc->sequence + start_stem[k] - 1 - new_block->shift,
1394                sizeof(char) * (l));
1395         new_block->fc = vrna_fold_compound(seq_block,
1396                                            &(b->fc->params->model_details),
1397                                            VRNA_OPTION_DEFAULT | VRNA_OPTION_EVAL_ONLY);
1398         free(seq_block);
1399 
1400         /* compute energy of new block */
1401         new_block->energy = vrna_eval_structure_pt(new_block->fc, new_block->pt);
1402 
1403         /* search for position where we can link the new block to */
1404         struct block *ptr, *ptr_prev;
1405         ptr_prev = ptr = b;
1406         do {
1407           ptr_prev  = ptr;
1408           ptr       = ptr->next_entry;
1409         } while ((ptr) && (ptr->start < new_block->start));
1410 
1411         /* insert new block */
1412         ptr_prev->next_entry  = new_block;
1413         new_block->next_entry = ptr;
1414       }
1415 
1416       /* Finally, we need to update the original block */
1417 
1418       /* update global end position */
1419       b->end = i + end_stem[0] - 1 - b->shift;
1420 
1421       /* update energy */
1422       b->energy = vrna_eval_structure_pt(b->fc, b->pt);
1423     }
1424 
1425     /* clean up memory */
1426     free(start_stem);
1427     free(end_stem);
1428 
1429     /* update for odd dangle models below */
1430     if (dangles % 1) {
1431     }
1432   } else if (dangles % 1) {
1433     /* position i is unpaired, so we only need to update energies if
1434      * position i + 1 forms a base pair due to 5' dangles
1435      */
1436     if (b->pt[i + 1]) {
1437       i_local = (int)i + b->start - 1 + 1;
1438       j_local = b->pt[i_local + 1];
1439       switch (dangles) {
1440         case 2:
1441           d5    = S1[i_local - 1];
1442           d3    = (j_local + 1 <= i_local + n - 1) ? S1[j_local + 1 - 1] : -1;
1443           type  = vrna_get_ptype_md(S2[i_local],
1444                                     S2[j_local],
1445                                     md);
1446           ediff = vrna_E_ext_stem(type, -1, d3, params) -
1447                   vrna_E_ext_stem(type, d5, d3, params);
1448           break;
1449 
1450         case 0:
1451           ediff = 0;
1452           break;
1453 
1454         default:
1455           ediff = 0;
1456           break;
1457       }
1458       /* update the energy */
1459       b->energy += ediff;
1460     }
1461   }
1462 
1463   /* increment start position and shift */
1464   b->start++;
1465   b->shift++;
1466 
1467   return 1;
1468 }
1469 
1470 
1471 PRIVATE struct block *
remove_block(struct block ** before,struct block * block)1472 remove_block(struct block **before,
1473              struct block *block)
1474 {
1475   struct block *ptr, **ptr2;
1476 
1477   if (*before == block)
1478     ptr2 = before;
1479   else
1480     ptr2 = &((*before)->next_entry);
1481 
1482   /* remember next entry */
1483   ptr = block->next_entry;
1484 
1485   /* cleanup memory for block we want to remove */
1486   vrna_fold_compound_free(block->fc);
1487   free(block->pt);
1488   /* remove block */
1489   free(block);
1490 
1491   /* link next entry */
1492   *ptr2 = ptr;
1493 
1494   return ptr;
1495 }
1496 
1497 
1498 PRIVATE void
truncate_blocks(unsigned long int i,unsigned long int n,struct block ** block_list)1499 truncate_blocks(unsigned long int i,
1500                 unsigned long int n,
1501                 struct block      **block_list)
1502 {
1503   struct block *ptr_prev, *ptr;
1504 
1505   ptr_prev  = NULL;
1506   ptr       = *block_list;
1507 
1508   while (ptr) {
1509     /* 1. remove block if it was superseded by index i */
1510     if (ptr->end <= i) {
1511       ptr = remove_block((ptr_prev) ? &ptr_prev : block_list, ptr);
1512       continue;
1513     }
1514 
1515     /* 2. Update energy */
1516     if (ptr->start == i) {
1517       /* remove nucleotide at position i and update energies accordingly */
1518       /* this also splits a substructure component into consecutive
1519        * tokens, if necessary
1520        */
1521       if (!update_block(i, n, ptr)) {
1522         /* this block doesn't contain any more pairs so we remove it entirely */
1523         ptr = remove_block((ptr_prev) ? &ptr_prev : block_list, ptr);
1524         continue;
1525       }
1526     } else if (ptr->start > i) {
1527       break;
1528     }
1529 
1530     /* go to next block */
1531     ptr_prev  = ptr;
1532     ptr       = ptr->next_entry;
1533   }
1534 }
1535 
1536 
1537 PRIVATE struct block *
extract_Lfold_entry(FILE * f,long line_start,const char * sequence,const vrna_md_t * md)1538 extract_Lfold_entry(FILE            *f,
1539                     long            line_start,
1540                     const char      *sequence,
1541                     const vrna_md_t *md)
1542 {
1543   char *l, *seq_local, *structure;
1544   unsigned long int i, j;
1545   float en;
1546   struct block *storage;
1547 
1548   storage = NULL;
1549 
1550   if (fseek(f, line_start, SEEK_SET) != -1) {
1551     l         = vrna_read_line(f);
1552     en        = INF / 100.;
1553     structure = (char *)vrna_alloc(sizeof(char) * (strlen(l) + 1));
1554 
1555     /*  each line should consist of 3 parts,
1556      *  1. A dot-bracket structure
1557      *  2. An energy value in kcal/mol
1558      *  3. A starting position
1559      */
1560     if (sscanf(l, "%[.()] %*c %f %*c %lu", structure, &en, &i) == 3) {
1561       storage = (struct block *)vrna_alloc(sizeof(struct block));
1562 
1563       j = i + strlen(structure) - 1;
1564 
1565       seq_local = (char *)vrna_alloc(sizeof(char) * (j - i + 2));
1566       memcpy(seq_local, sequence + i - 1, (sizeof(char) * (j - i + 1)));
1567 
1568       storage->fc = vrna_fold_compound(seq_local,
1569                                        md,
1570                                        VRNA_OPTION_DEFAULT | VRNA_OPTION_EVAL_ONLY);
1571       storage->pt           = vrna_ptable(structure);
1572       storage->start        = i;
1573       storage->end          = j;
1574       storage->shift        = 0;
1575       storage->energy       = vrna_convert_kcal_to_dcal(en);
1576       storage->energy_no3d  = 0; /* energy without 3'dangle */
1577       storage->next_entry   = NULL;
1578 
1579       free(seq_local);
1580 
1581       /* with dangles==0 or dangles==2, we don't need the trailing and leading unpaired base(s) */
1582       if ((md->dangles % 1) == 0) {
1583         if (storage->pt[1] == 0) {
1584           storage->start++;
1585           storage->shift++;
1586         }
1587 
1588         if (storage->pt[storage->fc->length] == 0)
1589           storage->end--;
1590       }
1591     }
1592 
1593     /* we only store the pair table, and drop the dot-bracket string */
1594     free(structure);
1595     free(l);
1596   }
1597 
1598   return storage;
1599 }
1600 
1601 
1602 PRIVATE unsigned long int
insert_block(char * structure,struct block * selected,int * energy)1603 insert_block(char         *structure,
1604              struct block *selected,
1605              int          *energy)
1606 {
1607   short *pt;
1608   unsigned int long i, i_local, start, end, shift;
1609   int e;
1610 
1611   pt    = selected->pt;
1612   start = selected->start;
1613   end   = selected->end;
1614   shift = selected->shift;
1615   e     = selected->energy;
1616 
1617   /* The last block is always part of the full length MFE */
1618   for (i = start; i <= end; i++) {
1619     i_local = i - start + shift + 1;
1620     if (pt[i_local] > i_local) {
1621       structure[i - 1]                                = '(';
1622       structure[start - shift + pt[i_local] - 1 - 1]  = ')';
1623     }
1624   }
1625 
1626   *energy = *energy - e;
1627 
1628   return end;
1629 }
1630 
1631 
1632 PRIVATE void
print_block_list(struct block * block_list)1633 print_block_list(struct block *block_list)
1634 {
1635   struct block *ptr;
1636   size_t cnt = 0;
1637 
1638   for (ptr = block_list; ptr; ptr = ptr->next_entry, cnt++)
1639     printf("block %lu: en=%d, start: %lu, end: %lu, shift: %lu\n",
1640            cnt,
1641            ptr->energy,
1642            ptr->start,
1643            ptr->end,
1644            ptr->shift);
1645   printf("%lu blocks remaining\n", cnt);
1646   fflush(stdout);
1647 }
1648 
1649 
1650 PRIVATE void
append_blocks(struct block ** last_block,FILE * f,long * lines,size_t * lines_left,vrna_fold_compound_t * fc,unsigned long int max_start)1651 append_blocks(struct block          **last_block,
1652               FILE                  *f,
1653               long                  *lines,
1654               size_t                *lines_left,
1655               vrna_fold_compound_t  *fc,
1656               unsigned long int     max_start)
1657 {
1658   vrna_md_t *md;
1659 
1660   md = &(fc->params->model_details);
1661 
1662   while (((*lines_left) > 0) &&
1663          ((*last_block)->start < max_start)) {
1664     (*lines_left)--;
1665 
1666     struct block *ptr = extract_Lfold_entry(f, lines[(*lines_left)], fc->sequence, md);
1667 
1668     if (ptr != NULL) {
1669       (*last_block)->next_entry = ptr;
1670       *last_block               = ptr;
1671     }
1672   }
1673   ;
1674 }
1675 
1676 
1677 PUBLIC int
vrna_backtrack_window(vrna_fold_compound_t * fc,const char * Lfold_filename,long file_pos,char ** structure,double mfe)1678 vrna_backtrack_window(vrna_fold_compound_t  *fc,
1679                       const char            *Lfold_filename,
1680                       long                  file_pos,
1681                       char                  **structure,
1682                       double                mfe)
1683 {
1684   unsigned int n, look_ahead;
1685   int e, maxdist, underflows, *f3;
1686   int ret;
1687   double mfe_corr;
1688   FILE *f;
1689 
1690   ret         = 0;
1691   *structure  = NULL;
1692 
1693   if ((fc) &&
1694       (Lfold_filename) &&
1695       (structure)) {
1696     vrna_md_t *md;
1697 
1698     n           = fc->length;
1699     md          = &(fc->params->model_details);
1700     maxdist     = md->window_size;
1701     look_ahead  = 3 * maxdist;
1702     underflows  = 0;
1703     mfe_corr    = mfe;
1704 
1705     f3 = fc->matrices->f3_local;
1706 
1707     if (md->dangles % 2) {
1708       vrna_message_warning(
1709         "Global mfE structure backtracking not available for odd dangle models yet!");
1710       return ret;
1711     }
1712 
1713     /* check whether we need to adjust energies due to integer underflows in the forward recursion */
1714     while (vrna_convert_kcal_to_dcal(mfe_corr) < f3[1]) {
1715       mfe_corr -= (double)(UNDERFLOW_CORRECTION) / 100.;
1716       underflows++;
1717     }
1718 
1719     e = vrna_convert_kcal_to_dcal(mfe_corr);
1720 
1721 #if 0
1722     if (underflows)
1723       printf("detected %d underflows %d vs. %d vs %6.2f\n", underflows, e, f3[1], mfe);
1724 
1725 #endif
1726 
1727     e = f3[1];
1728 
1729     /* default to start at beginning of file */
1730     if (file_pos < 0)
1731       file_pos = 0;
1732 
1733     /* open Lfold output file and start parsing line positions */
1734     if ((f = fopen(Lfold_filename, "r"))) {
1735       if (fseek(f, file_pos, SEEK_SET) != -1) {
1736         size_t num_lines, mem_lines;
1737         long *lines;    /* array of file positions indicating start of lines */
1738 
1739         num_lines = 0;
1740         mem_lines = 1024;
1741 
1742         lines               = (long *)vrna_alloc(sizeof(long) * mem_lines);
1743         lines[num_lines++]  = ftell(f);
1744 
1745         /* 1. Fill array of file positions that coincide with start of lines */
1746         do {
1747           /* increase memory if necessary */
1748           if (num_lines == mem_lines) {
1749             mem_lines *= 1.4;
1750             lines     = (long *)vrna_realloc(lines, sizeof(long) * mem_lines);
1751           }
1752 
1753           /* seek to next newline char */
1754           while ((!feof(f)) && (fgetc(f) != '\n'));
1755 
1756           /* stop at end of file */
1757           if (feof(f))
1758             break;
1759 
1760           lines[num_lines++] = ftell(f);
1761         } while(1);
1762 
1763         /* do the actual parsing of the data */
1764         if (num_lines > 0) {
1765           num_lines--;
1766 
1767           size_t block_num    = 0;
1768           unsigned long int i = num_lines;
1769           size_t lines_left   = num_lines;
1770 
1771           /* we will use *ptr as current block pointer and save the list start in 8block_list */
1772           struct block *block_list, *ptr, *block_list_last;
1773 
1774           /* handle first block separately */
1775           do {
1776             lines_left--;
1777 
1778             block_list_last = block_list = ptr = extract_Lfold_entry(f,
1779                                                                      lines[lines_left],
1780                                                                      fc->sequence,
1781                                                                      md);
1782           } while ((block_list == NULL) && (lines_left > 0));
1783 
1784           /* start the actual backtracing procedure if we've read at least one block */
1785           if (block_list) {
1786             block_num++;
1787             *structure = (char *)vrna_alloc(sizeof(char) * (fc->length + 1));
1788             memset(*structure, (int)'.', fc->length);
1789 
1790             /*  go through remaining file in reverse order
1791              *  to extract the relevant data
1792              */
1793             append_blocks(&block_list_last,
1794                           f,
1795                           lines,
1796                           &lines_left,
1797                           fc,
1798                           ptr->start + look_ahead);
1799 
1800             ptr = block_list;
1801 
1802             i = insert_block(*structure, ptr, &e);
1803 
1804             for (unsigned long int ii = ptr->start; ii <= i; ii++) {
1805               /* truncate remaining blocks */
1806               truncate_blocks(ii, n, &block_list);
1807               append_blocks(&block_list_last,
1808                             f,
1809                             lines,
1810                             &lines_left,
1811                             fc,
1812                             ii + look_ahead);
1813             }
1814 
1815             i++;
1816 
1817             /* proceed with remaining blocks */
1818             for (; i <= fc->length; i++) {
1819               unsigned long int prev_i = i;
1820 
1821               /* i pairs with some k, find block representing the substructure enclodes by (i,k) */
1822               if (e != f3[i + 1]) {
1823                 if ((underflows) &&
1824                     (e == (f3[i + 1] - UNDERFLOW_CORRECTION))) {
1825                   underflows--;
1826                   e += UNDERFLOW_CORRECTION;
1827                 } else {
1828                   /* go through list of blocks that start at i to check which one we can insert */
1829                   char found = 0;
1830                   for (ptr = block_list; ptr; ptr = ptr->next_entry) {
1831                     if (ptr->start > i)
1832                       break;
1833 
1834                     if ((ptr->start == i) &&
1835                         (ptr->end > i)) {
1836                       if (e == (ptr->energy + f3[ptr->end + 1])) {
1837                         /* found the block, let's insert it */
1838                         found = 1;
1839 
1840                         i = insert_block(*structure, ptr, &e);
1841 
1842                         break;
1843                       } else if ((underflows) &&
1844                                  (e == (ptr->energy + f3[ptr->end + 1] - UNDERFLOW_CORRECTION))) {
1845                         underflows--;
1846                         found = 1;
1847                         e     += UNDERFLOW_CORRECTION;
1848                         i     = insert_block(*structure, ptr, &e);
1849 
1850                         break;
1851                       }
1852                     }
1853                   }
1854                   if (!found)
1855                     printf("didn't find block for position %lu\n", i);
1856                 }
1857               }
1858 
1859               /*
1860                *  if i is unpaired, so we do nothing except for
1861                *  truncating the remaining blocks
1862                */
1863               for (unsigned long int ii = prev_i; ii <= i; ii++) {
1864                 truncate_blocks(ii, n, &block_list);
1865                 append_blocks(&block_list_last,
1866                               f,
1867                               lines,
1868                               &lines_left,
1869                               fc,
1870                               ii + look_ahead);
1871               }
1872             }
1873 
1874             ret = 1;
1875           }
1876         }
1877       }
1878 
1879       fclose(f);
1880     }
1881   }
1882 
1883   return ret;
1884 }
1885 
1886 
1887 PRIVATE INLINE int
decompose_pair(vrna_fold_compound_t * fc,int i,int j,struct aux_arrays * aux)1888 decompose_pair(vrna_fold_compound_t *fc,
1889                int                  i,
1890                int                  j,
1891                struct aux_arrays    *aux)
1892 {
1893   unsigned char hc_decompose;
1894   int e, new_c, energy, stackEnergy, dangle_model, noLP,
1895       *DMLi1, *DMLi2, *cc, *cc1;
1896 
1897   dangle_model  = fc->params->model_details.dangles;
1898   noLP          = fc->params->model_details.noLP;
1899   hc_decompose  = fc->hc->matrix_local[i][j - i];
1900   DMLi1         = aux->DMLi1;
1901   DMLi2         = aux->DMLi2;
1902   cc            = aux->cc;
1903   cc1           = aux->cc1;
1904   e             = INF;
1905 
1906   /* do we evaluate this pair? */
1907   if (hc_decompose) {
1908     new_c = INF;
1909 
1910     /* check for hairpin loop */
1911     energy  = vrna_E_hp_loop(fc, i, j);
1912     new_c   = MIN2(new_c, energy);
1913 
1914     /* check for multibranch loops */
1915     energy  = vrna_E_mb_loop_fast(fc, i, j, DMLi1, DMLi2);
1916     new_c   = MIN2(new_c, energy);
1917 
1918     if (dangle_model == 3) {
1919       /* coaxial stacking */
1920       energy  = vrna_E_mb_loop_stack(fc, i, j);
1921       new_c   = MIN2(new_c, energy);
1922     }
1923 
1924     /* check for interior loops */
1925     energy  = vrna_E_int_loop(fc, i, j);
1926     new_c   = MIN2(new_c, energy);
1927 
1928     /* remember stack energy for --noLP option */
1929     if (noLP) {
1930       stackEnergy = vrna_E_stack(fc, i, j);
1931       new_c       = MIN2(new_c, cc1[j - 1 - (i + 1)] + stackEnergy);
1932       cc[j - i]   = new_c;
1933       if ((fc->type == VRNA_FC_TYPE_COMPARATIVE) &&
1934           (cc[j - i] != INF))
1935         cc[j - i] -= fc->pscore_local[i][j - i];
1936 
1937       e = cc1[j - 1 - (i + 1)] + stackEnergy;
1938     } else {
1939       e = new_c;
1940     }
1941 
1942     /* finally, check for auxiliary grammar rule(s) */
1943     if ((fc->aux_grammar) && (fc->aux_grammar->cb_aux_c)) {
1944       energy  = fc->aux_grammar->cb_aux_c(fc, i, j, fc->aux_grammar->data);
1945       e   = MIN2(e, energy);
1946     }
1947 
1948     if ((fc->type == VRNA_FC_TYPE_COMPARATIVE) &&
1949         (e != INF))
1950       e -= fc->pscore_local[i][j - i];
1951   } /* end >> if (pair) << */
1952 
1953   return e;
1954 }
1955 
1956 
1957 PRIVATE INLINE struct aux_arrays *
get_aux_arrays(unsigned int maxdist)1958 get_aux_arrays(unsigned int maxdist)
1959 {
1960   unsigned int j;
1961   struct aux_arrays *aux = (struct aux_arrays *)vrna_alloc(sizeof(struct aux_arrays));
1962 
1963   aux->cc     = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));   /* auxilary arrays for canonical structures     */
1964   aux->cc1    = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));   /* auxilary arrays for canonical structures     */
1965   aux->Fmi    = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));   /* holds row i of fML (avoids jumps in memory)  */
1966   aux->DMLi   = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));   /* DMLi[j] holds  MIN(fML[i,k]+fML[k+1,j])      */
1967   aux->DMLi1  = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));   /*                MIN(fML[i+1,k]+fML[k+1,j])    */
1968   aux->DMLi2  = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));   /*                MIN(fML[i+2,k]+fML[k+1,j])    */
1969 
1970   /* prefill helper arrays */
1971   for (j = 0; j < maxdist + 5; j++)
1972     aux->Fmi[j] = aux->DMLi[j] = aux->DMLi1[j] = aux->DMLi2[j] = INF;
1973 
1974   return aux;
1975 }
1976 
1977 
1978 PRIVATE INLINE void
rotate_aux_arrays(struct aux_arrays * aux,unsigned int maxdist)1979 rotate_aux_arrays(struct aux_arrays *aux,
1980                   unsigned int      maxdist)
1981 {
1982   unsigned int j;
1983   int *FF;
1984 
1985   FF          = aux->DMLi2;
1986   aux->DMLi2  = aux->DMLi1;
1987   aux->DMLi1  = aux->DMLi;
1988   aux->DMLi   = FF;
1989   FF          = aux->cc1;
1990   aux->cc1    = aux->cc;
1991   aux->cc     = FF;
1992   for (j = 1; j < maxdist + 5; j++)
1993     aux->cc[j] = aux->Fmi[j] = aux->DMLi[j] = INF;
1994 }
1995 
1996 
1997 PRIVATE INLINE void
free_aux_arrays(struct aux_arrays * aux)1998 free_aux_arrays(struct aux_arrays *aux)
1999 {
2000   free(aux->cc);
2001   free(aux->cc1);
2002   free(aux->Fmi);
2003   free(aux->DMLi);
2004   free(aux->DMLi1);
2005   free(aux->DMLi2);
2006   free(aux);
2007 }
2008