1 /** \file eval.c */
2 
3 
4 /*
5  *                Free energy evaluation
6  *
7  *                c Ivo Hofacker, Chrisoph Flamm
8  *                original implementation by
9  *                Walter Fontana
10  *
11  *                ViennaRNA Package >= v2.0 by Ronny Lorenz
12  *
13  *                Vienna RNA package
14  */
15 
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <math.h>
23 #include <ctype.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <limits.h>
27 
28 #include "ViennaRNA/utils/basic.h"
29 #include "ViennaRNA/utils/structures.h"
30 #include "ViennaRNA/params/default.h"
31 #include "ViennaRNA/model.h"
32 #include "ViennaRNA/fold_vars.h"
33 #include "ViennaRNA/params/basic.h"
34 #include "ViennaRNA/constraints/hard.h"
35 #include "ViennaRNA/constraints/soft.h"
36 #include "ViennaRNA/loops/all.h"
37 #include "ViennaRNA/gquad.h"
38 #include "ViennaRNA/cofold.h"
39 #include "ViennaRNA/alphabet.h"
40 #include "ViennaRNA/datastructures/char_stream.h"
41 #include "ViennaRNA/eval.h"
42 
43 #include "ViennaRNA/color_output.inc"
44 
45 /*
46  #################################
47  # GLOBAL VARIABLES              #
48  #################################
49  */
50 
51 /*
52  #################################
53  # PRIVATE VARIABLES             #
54  #################################
55  */
56 
57 /*
58  #################################
59  # PRIVATE FUNCTION DECLARATIONS #
60  #################################
61  */
62 PRIVATE int
63 stack_energy(vrna_fold_compound_t *vc,
64              int                  i,
65              const short          *pt,
66              vrna_cstr_t          output_stream,
67              int                  verbostiy_level);
68 
69 
70 PRIVATE int
71 energy_of_extLoop_pt(vrna_fold_compound_t *vc,
72                      int                  i,
73                      const short          *pt);
74 
75 
76 PRIVATE int
77 energy_of_ml_pt(vrna_fold_compound_t  *vc,
78                 int                   i,
79                 const short           *pt);
80 
81 
82 PRIVATE int
83 cut_in_loop(int           i,
84             const short   *pt,
85             unsigned int  *sn);
86 
87 
88 PRIVATE int
89 eval_pt(vrna_fold_compound_t  *vc,
90         const short           *pt,
91         vrna_cstr_t           output_stream,
92         int                   verbosity_level);
93 
94 
95 PRIVATE int
96 eval_circ_pt(vrna_fold_compound_t *vc,
97              const short          *pt,
98              vrna_cstr_t          output_stream,
99              int                  verbosity_level);
100 
101 
102 PRIVATE int
103 en_corr_of_loop_gquad(vrna_fold_compound_t  *vc,
104                       int                   i,
105                       int                   j,
106                       const char            *structure,
107                       const short           *pt,
108                       vrna_cstr_t           output_stream,
109                       int                   verbosity_level);
110 
111 
112 PRIVATE float
113 wrap_eval_structure(vrna_fold_compound_t  *vc,
114                     const char            *structure,
115                     const short           *pt,
116                     vrna_cstr_t           output_stream,
117                     int                   verbosity);
118 
119 
120 /* consensus structure variants below */
121 PRIVATE int
122 covar_energy_of_struct_pt(vrna_fold_compound_t  *vc,
123                           const short           *pt);
124 
125 
126 PRIVATE int
127 stack_energy_covar_pt(vrna_fold_compound_t  *vc,
128                       int                   i,
129                       const short           *ptable);
130 
131 
132 PRIVATE int
133 en_corr_of_loop_gquad_ali(vrna_fold_compound_t  *vc,
134                           int                   i,
135                           int                   j,
136                           const char            *structure,
137                           const short           *pt,
138                           const int             *loop_idx,
139                           vrna_cstr_t           output_stream,
140                           int                   verbosity_level);
141 
142 
143 PRIVATE int
144 covar_en_corr_of_loop_gquad(vrna_fold_compound_t  *vc,
145                             int                   i,
146                             int                   j,
147                             const char            *structure,
148                             const short           *pt,
149                             const int             *loop_idx);
150 
151 
152 /*
153  #################################
154  # BEGIN OF FUNCTION DEFINITIONS #
155  #################################
156  */
157 PUBLIC float
vrna_eval_structure_v(vrna_fold_compound_t * vc,const char * structure,int verbosity_level,FILE * file)158 vrna_eval_structure_v(vrna_fold_compound_t  *vc,
159                       const char            *structure,
160                       int                   verbosity_level,
161                       FILE                  *file)
162 {
163   if (strlen(structure) != vc->length) {
164     vrna_message_warning("vrna_eval_structure_*: "
165                          "string and structure have unequal length (%d vs. %d)",
166                          vc->length,
167                          strlen(structure));
168     return (float)INF / 100.;
169   }
170 
171   vrna_cstr_t output_stream = vrna_cstr(vc->length, (file) ? file : stdout);
172   short       *pt           = vrna_ptable(structure);
173   float       en            = wrap_eval_structure(vc,
174                                                   structure,
175                                                   pt,
176                                                   output_stream,
177                                                   verbosity_level);
178 
179   vrna_cstr_fflush(output_stream);
180   vrna_cstr_free(output_stream);
181 
182   free(pt);
183   return en;
184 }
185 
186 
187 PUBLIC float
vrna_eval_structure_cstr(vrna_fold_compound_t * vc,const char * structure,int verbosity_level,vrna_cstr_t output_stream)188 vrna_eval_structure_cstr(vrna_fold_compound_t *vc,
189                          const char           *structure,
190                          int                  verbosity_level,
191                          vrna_cstr_t          output_stream)
192 {
193   if (strlen(structure) != vc->length) {
194     vrna_message_warning("vrna_eval_structure_*: "
195                          "string and structure have unequal length (%d vs. %d)",
196                          vc->length,
197                          strlen(structure));
198     return (float)INF / 100.;
199   }
200 
201   short *pt = vrna_ptable(structure);
202   float en  = wrap_eval_structure(vc, structure, pt, output_stream, verbosity_level);
203 
204   free(pt);
205   return en;
206 }
207 
208 
209 PUBLIC float
vrna_eval_covar_structure(vrna_fold_compound_t * vc,const char * structure)210 vrna_eval_covar_structure(vrna_fold_compound_t  *vc,
211                           const char            *structure)
212 {
213   int   res, gq, *loop_idx;
214   short *pt;
215 
216   pt                              = vrna_ptable(structure);
217   res                             = 0;
218   gq                              = vc->params->model_details.gquad;
219   vc->params->model_details.gquad = 0;
220 
221   if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
222     res = covar_energy_of_struct_pt(vc, pt);
223 
224     vc->params->model_details.gquad = gq;
225     if (gq) {
226       loop_idx  = vrna_loopidx_from_ptable(pt);
227       res       -= covar_en_corr_of_loop_gquad(vc,
228                                                1,
229                                                vc->length,
230                                                structure,
231                                                pt,
232                                                (const int *)loop_idx);
233       free(loop_idx);
234     }
235   }
236 
237   free(pt);
238   return (float)res / (100. * (float)vc->n_seq);
239 }
240 
241 
242 PUBLIC int
vrna_eval_structure_pt_v(vrna_fold_compound_t * vc,const short * pt,int verbosity_level,FILE * file)243 vrna_eval_structure_pt_v(vrna_fold_compound_t *vc,
244                          const short          *pt,
245                          int                  verbosity_level,
246                          FILE                 *file)
247 {
248   int e = INF;
249 
250   if (pt && vc) {
251     if (pt[0] != (short)vc->length) {
252       vrna_message_warning("vrna_eval_structure_*: "
253                            "string and structure have unequal length (%d vs. %d)",
254                            vc->length,
255                            pt[0]);
256       return INF;
257     }
258 
259     vrna_cstr_t output_stream = vrna_cstr(vc->length, (file) ? file : stdout);
260     e = eval_pt(vc, pt, output_stream, verbosity_level);
261     vrna_cstr_fflush(output_stream);
262     vrna_cstr_free(output_stream);
263   }
264 
265   return e;
266 }
267 
268 
269 PUBLIC int
vrna_eval_loop_pt_v(vrna_fold_compound_t * vc,int i,const short * pt,int verbosity_level)270 vrna_eval_loop_pt_v(vrna_fold_compound_t  *vc,
271                     int                   i,
272                     const short           *pt,
273                     int                   verbosity_level)
274 {
275   /* compute energy of a single loop closed by base pair (i,j) */
276   unsigned int  *sn, *so, *ss;
277   int           j, type, p, q, energy;
278   short         *s;
279   vrna_param_t  *P;
280 
281   energy = INF;
282 
283   if (pt && vc) {
284     P   = vc->params;
285     sn  = vc->strand_number;
286     so  = vc->strand_order;
287     ss  = vc->strand_start;
288     s   = vc->sequence_encoding2;
289 
290     vrna_sc_prepare(vc, VRNA_OPTION_MFE);
291 
292     if (i == 0) {
293       /* evaluate exterior loop */
294       energy = energy_of_extLoop_pt(vc, 0, pt);
295       return energy;
296     }
297 
298     j = pt[i];
299     if (j < i) {
300       vrna_message_warning("vrna_eval_loop_pt*: "
301                            "i = %d is unpaired in loop_energy()",
302                            i);
303       return INF;
304     }
305 
306     type = P->model_details.pair[s[i]][s[j]];
307     if (type == 0) {
308       type = 7;
309       if (verbosity_level > VRNA_VERBOSITY_QUIET) {
310         vrna_message_warning("bases %d and %d (%c%c) can't pair!",
311                              i, j,
312                              vrna_nucleotide_decode(s[i], &(P->model_details)),
313                              vrna_nucleotide_decode(s[j], &(P->model_details)));
314       }
315     }
316 
317     p = i;
318     q = j;
319 
320 
321     while (pt[++p] == 0);
322     while (pt[--q] == 0);
323     if (p > q) {
324       /* Hairpin */
325       energy = vrna_eval_hp_loop(vc, i, j);
326     } else if (pt[q] != (short)p) {
327       /* multi-loop */
328       int ii;
329       ii      = cut_in_loop(i, (const short *)pt, sn);
330       energy  =
331         (ii == 0) ? energy_of_ml_pt(vc, i, (const short *)pt) : energy_of_extLoop_pt(vc,
332                                                                                      ii,
333                                                                                      (const short *)pt);
334     } else {
335       /* found interior loop */
336       int type_2;
337       type_2 = P->model_details.pair[s[q]][s[p]];
338       if (type_2 == 0) {
339         type_2 = 7;
340         if (verbosity_level > VRNA_VERBOSITY_QUIET) {
341           vrna_message_warning("bases %d and %d (%c%c) can't pair!",
342                                p, q,
343                                vrna_nucleotide_decode(s[p], &(P->model_details)),
344                                vrna_nucleotide_decode(s[q], &(P->model_details)));
345         }
346       }
347 
348       energy = vrna_eval_int_loop(vc, i, j, p, q);
349     }
350   }
351 
352   return energy;
353 }
354 
355 
356 PUBLIC int
vrna_eval_move_pt(vrna_fold_compound_t * vc,short * pt,int m1,int m2)357 vrna_eval_move_pt(vrna_fold_compound_t  *vc,
358                   short                 *pt,
359                   int                   m1,
360                   int                   m2)
361 {
362   /*compute change in energy given by move (m1,m2)*/
363   unsigned int  *sn, *so, *ss;
364   int           en_post, en_pre, i, j, k, l, len;
365   vrna_param_t  *P;
366 
367   len = vc->length;
368   sn  = vc->strand_number;
369   so  = vc->strand_order;
370   ss  = vc->strand_start;
371   P   = vc->params;
372 
373   k = (m1 > 0) ? m1 : -m1;
374   l = (m2 > 0) ? m2 : -m2;
375   /* first find the enclosing pair i<k<l<j */
376   for (j = l + 1; j <= len; j++) {
377     if (pt[j] <= 0)
378       continue;             /* unpaired */
379 
380     if (pt[j] < k)
381       break;              /* found it */
382 
383     if (pt[j] > j) {
384       j = pt[j];          /* skip substructure */
385     } else {
386       vrna_message_warning("vrna_eval_move_pt: "
387                            "illegal move or broken pair table in vrna_eval_move_pt()\n"
388                            "%d %d %d %d ", m1, m2, j, pt[j]);
389       return INF;
390     }
391   }
392   i       = (j <= len) ? pt[j] : 0;
393   en_pre  = vrna_eval_loop_pt(vc, i, (const short *)pt);
394   en_post = 0;
395   if (m1 < 0) {
396     /*it's a delete move */
397     en_pre  += vrna_eval_loop_pt(vc, k, (const short *)pt);
398     pt[k]   = 0;
399     pt[l]   = 0;
400   } else {
401     /* insert move */
402     pt[k]   = l;
403     pt[l]   = k;
404     en_post += vrna_eval_loop_pt(vc, k, (const short *)pt);
405   }
406 
407   en_post += vrna_eval_loop_pt(vc, i, (const short *)pt);
408   /*  restore pair table */
409   if (m1 < 0) {
410     pt[k] = l;
411     pt[l] = k;
412   } else {
413     pt[k] = 0;
414     pt[l] = 0;
415   }
416 
417   /* Cofolding -- Check if move changes COFOLD-Penalty */
418   if (sn[k] != sn[l]) {
419     int p, c;
420     p = c = 0;
421     for (p = 1; p < ss[so[1]];) {
422       /* Count basepairs between two strands */
423       if (pt[p] != 0) {
424         if (sn[p] == sn[pt[p]]) /* Skip stuff */
425           p = pt[p];
426         else if (++c > 1)
427           break;                 /* Count a basepair, break if we have more than one */
428       }
429 
430       p++;
431     }
432     if (m1 < 0 && c == 1) /* First and only inserted basepair */
433       return en_post - en_pre - P->DuplexInit;
434     else
435     if (c == 0) /* Must have been a delete move */
436       return en_post - en_pre + P->DuplexInit;
437   }
438 
439   return en_post - en_pre;
440 }
441 
442 
443 /*
444  #################################
445  # STATIC helper functions below #
446  #################################
447  */
448 PRIVATE INLINE int
eval_ext_int_loop(vrna_fold_compound_t * vc,int i,int j,int p,int q)449 eval_ext_int_loop(vrna_fold_compound_t  *vc,
450                   int                   i,
451                   int                   j,
452                   int                   p,
453                   int                   q)
454 {
455   unsigned char type, type_2;
456   short         **SS, **S5, **S3, *S, si, sj, sp, sq;
457   unsigned int  s, n_seq, **a2s;
458   int           e, length;
459   vrna_param_t  *P;
460   vrna_md_t     *md;
461   vrna_sc_t     *sc, **scs;
462 
463   length  = vc->length;
464   P       = vc->params;
465   md      = &(P->model_details);
466   S       = vc->sequence_encoding;
467   e       = INF;
468 
469   switch (vc->type) {
470     case VRNA_FC_TYPE_SINGLE:
471       si      = S[j + 1];
472       sj      = S[i - 1];
473       sp      = S[p - 1];
474       sq      = S[q + 1];
475       type    = vrna_get_ptype_md(S[j], S[i], md);
476       type_2  = vrna_get_ptype_md(S[q], S[p], md);
477       sc      = vc->sc;
478 
479       e = ubf_eval_ext_int_loop(i, j, p, q,
480                                 i - 1, j + 1, p - 1, q + 1,
481                                 si, sj, sp, sq,
482                                 type, type_2,
483                                 length,
484                                 P, sc);
485       break;
486 
487     case VRNA_FC_TYPE_COMPARATIVE:
488       n_seq = vc->n_seq;
489       SS    = vc->S;
490       S5    = vc->S5;
491       S3    = vc->S3;
492       a2s   = vc->a2s;
493       n_seq = vc->n_seq;
494       scs   = vc->scs;
495 
496       for (e = 0, s = 0; s < n_seq; s++) {
497         type    = vrna_get_ptype_md(SS[s][j], SS[s][i], md);
498         type_2  = vrna_get_ptype_md(SS[s][q], SS[s][p], md);
499 
500         sc = (scs && scs[s]) ? scs[s] : NULL;
501 
502         e += ubf_eval_ext_int_loop(a2s[s][i], a2s[s][j], a2s[s][p], a2s[s][q],
503                                    a2s[s][i - 1], a2s[s][j + 1], a2s[s][p - 1], a2s[s][q + 1],
504                                    S3[s][j], S5[s][i], S5[s][p], S3[s][q],
505                                    type, type_2,
506                                    a2s[s][length],
507                                    P, sc);
508       }
509 
510       break;
511   }
512 
513   return e;
514 }
515 
516 
517 PRIVATE float
wrap_eval_structure(vrna_fold_compound_t * vc,const char * structure,const short * pt,vrna_cstr_t output_stream,int verbosity)518 wrap_eval_structure(vrna_fold_compound_t  *vc,
519                     const char            *structure,
520                     const short           *pt,
521                     vrna_cstr_t           output_stream,
522                     int                   verbosity)
523 {
524   int   res, gq, L, l[3];
525   float energy;
526 
527   energy                          = (float)INF / 100.;
528   gq                              = vc->params->model_details.gquad;
529   vc->params->model_details.gquad = 0;
530 
531   switch (vc->type) {
532     case VRNA_FC_TYPE_SINGLE:
533       if (vc->params->model_details.circ)
534         res = eval_circ_pt(vc, pt, output_stream, verbosity);
535       else
536         res = eval_pt(vc, pt, output_stream, verbosity);
537 
538       vc->params->model_details.gquad = gq;
539 
540       if (gq && (parse_gquad(structure, &L, l) > 0)) {
541         if (verbosity > 0)
542           vrna_cstr_print_eval_sd_corr(output_stream);
543 
544         res += en_corr_of_loop_gquad(vc, 1, vc->length, structure, pt, output_stream, verbosity);
545       }
546 
547       energy = (float)res / 100.;
548       break;
549 
550     case VRNA_FC_TYPE_COMPARATIVE:
551       if (vc->params->model_details.circ)
552         res = eval_circ_pt(vc, pt, output_stream, verbosity);
553       else
554         res = eval_pt(vc, pt, output_stream, verbosity);
555 
556       vc->params->model_details.gquad = gq;
557 
558       if (gq && (parse_gquad(structure, &L, l) > 0)) {
559         if (verbosity > 0)
560           vrna_cstr_print_eval_sd_corr(output_stream);
561 
562         int *loop_idx = vrna_loopidx_from_ptable(pt);
563         res += en_corr_of_loop_gquad_ali(vc,
564                                          1,
565                                          vc->length,
566                                          structure,
567                                          pt,
568                                          (const int *)loop_idx,
569                                          output_stream,
570                                          verbosity);
571         free(loop_idx);
572       }
573 
574       energy = (float)res / (100. * (float)vc->n_seq);
575       break;
576 
577     default:                      /* do nothing */
578       break;
579   }
580 
581   return energy;
582 }
583 
584 
585 PRIVATE int
eval_pt(vrna_fold_compound_t * vc,const short * pt,vrna_cstr_t output_stream,int verbosity_level)586 eval_pt(vrna_fold_compound_t  *vc,
587         const short           *pt,
588         vrna_cstr_t           output_stream,
589         int                   verbosity_level)
590 {
591   unsigned int  *sn;
592   int           i, length, energy;
593 
594   length  = vc->length;
595   sn      = vc->strand_number;
596 
597   if (vc->params->model_details.gquad)
598     vrna_message_warning("vrna_eval_*_pt: No gquadruplex support!\n"
599                          "Ignoring potential gquads in structure!\n"
600                          "Use e.g. vrna_eval_structure() instead!");
601 
602   vrna_sc_prepare(vc, VRNA_OPTION_MFE);
603 
604   energy = vc->params->model_details.backtrack_type == 'M' ?
605            energy_of_ml_pt(vc, 0, pt) :
606            energy_of_extLoop_pt(vc, 0, pt);
607 
608   if (verbosity_level > 0) {
609     vrna_cstr_print_eval_ext_loop(output_stream,
610                                   (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
611                                   (int)energy / (int)vc->n_seq :
612                                   energy);
613   }
614 
615   for (i = 1; i <= length; i++) {
616     if (pt[i] == 0)
617       continue;
618 
619     energy  += stack_energy(vc, i, pt, output_stream, verbosity_level);
620     i       = pt[i];
621   }
622   for (i = 1; sn[i] != sn[length]; i++) {
623     if (sn[i] != sn[pt[i]]) {
624       energy += vc->params->DuplexInit;
625       break;
626     }
627   }
628 
629   return energy;
630 }
631 
632 
633 PRIVATE int
eval_circ_pt(vrna_fold_compound_t * vc,const short * pt,vrna_cstr_t output_stream,int verbosity_level)634 eval_circ_pt(vrna_fold_compound_t *vc,
635              const short          *pt,
636              vrna_cstr_t          output_stream,
637              int                  verbosity_level)
638 {
639   unsigned int  s, n_seq;
640   int           i, j, length, energy, en0, degree;
641   unsigned int  **a2s;
642   vrna_param_t  *P;
643   vrna_sc_t     *sc, **scs;
644 
645   energy  = 0;
646   en0     = 0;
647   degree  = 0;
648   length  = vc->length;
649   P       = vc->params;
650   sc      = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sc : NULL;
651   scs     = (vc->type == VRNA_FC_TYPE_COMPARATIVE) ? vc->scs : NULL;
652 
653   if (P->model_details.gquad)
654     vrna_message_warning("vrna_eval_*_pt: No gquadruplex support!\n"
655                          "Ignoring potential gquads in structure!\n"
656                          "Use e.g. vrna_eval_structure() instead!");
657 
658   vrna_sc_prepare(vc, VRNA_OPTION_MFE);
659 
660   /* evaluate all stems in exterior loop */
661   for (i = 1; i <= length; i++) {
662     if (pt[i] == 0)
663       continue;
664 
665     degree++;
666     energy  += stack_energy(vc, i, (const short *)pt, output_stream, verbosity_level);
667     i       = pt[i];
668   }
669 
670   /* find first stem */
671   for (i = 1; (i <= length) && (!pt[i]); i++);
672   j = pt[i];
673 
674   /* evaluate exterior loop itself */
675   switch (degree) {
676     case 0:   /* unstructured */
677       switch (vc->type) {
678         case VRNA_FC_TYPE_SINGLE:
679           if (sc)
680             if (sc->energy_up)
681               en0 += sc->energy_up[1][length];
682 
683           break;
684 
685         case VRNA_FC_TYPE_COMPARATIVE:
686           n_seq = vc->n_seq;
687           a2s   = vc->a2s;
688           if (scs) {
689             for (s = 0; s < n_seq; s++)
690               if (scs[s] && scs[s]->energy_up)
691                 en0 += scs[s]->energy_up[1][a2s[s][length]];
692           }
693 
694           break;
695       }
696       break;
697     case 1:   /* hairpin loop */
698       en0 = vrna_eval_ext_hp_loop(vc, i, j);
699       break;
700 
701     case 2:   /* interior loop */
702     {
703       int p, q;
704       /* seek to next pair */
705       for (p = j + 1; pt[p] == 0; p++);
706       q = pt[p];
707 
708       en0 = eval_ext_int_loop(vc, i, j, p, q);
709     }
710     break;
711 
712     default:  /* multibranch loop */
713       en0 = energy_of_ml_pt(vc, 0, (const short *)pt);
714 
715       if (vc->type == VRNA_FC_TYPE_SINGLE)
716         en0 -= E_MLstem(0, -1, -1, P);         /* remove virtual closing pair */
717 
718       break;
719   }
720 
721   if (verbosity_level > 0) {
722     vrna_cstr_print_eval_ext_loop(output_stream,
723                                   (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
724                                   (int)en0 / (int)vc->n_seq :
725                                   en0);
726   }
727 
728   energy += en0;
729 
730   return energy;
731 }
732 
733 
734 /*---------------------------------------------------------------------------*/
735 /*  returns a correction term that may be added to the energy retrieved
736  *  from energy_of_struct_par() to correct misinterpreted loops. This
737  *  correction is necessary since energy_of_struct_par() will forget
738  *  about the existance of gquadruplexes and just treat them as unpaired
739  *  regions.
740  *
741  *  recursive variant
742  */
743 PRIVATE int
en_corr_of_loop_gquad(vrna_fold_compound_t * vc,int i,int j,const char * structure,const short * pt,vrna_cstr_t output_stream,int verbosity_level)744 en_corr_of_loop_gquad(vrna_fold_compound_t  *vc,
745                       int                   i,
746                       int                   j,
747                       const char            *structure,
748                       const short           *pt,
749                       vrna_cstr_t           output_stream,
750                       int                   verbosity_level)
751 {
752   char          *sequence;
753   int           pos, tmp_e, energy, p, q, r, s, u, type, type2, L, l[3], *rtype, *loop_idx;
754   int           num_elem, num_g, elem_i, elem_j, up_mis;
755   short         *s1, *s2;
756   vrna_param_t  *P;
757   vrna_md_t     *md;
758 
759   sequence  = vc->sequence;
760   loop_idx  = vrna_loopidx_from_ptable(pt);
761   s1        = vc->sequence_encoding;
762   s2        = vc->sequence_encoding2;
763   P         = vc->params;
764   md        = &(P->model_details);
765   rtype     = &(md->rtype[0]);
766 
767   energy  = 0;
768   q       = i;
769 
770   while ((pos = parse_gquad(structure + q - 1, &L, l)) > 0) {
771     q += pos - 1;
772     p = q - 4 * L - l[0] - l[1] - l[2] + 1;
773     if (q > j)
774       break;
775 
776     /* we've found the first g-quadruplex at position [p,q] */
777     tmp_e   = E_gquad(L, l, P);
778     energy  += tmp_e;
779     if (verbosity_level > 0)
780       vrna_cstr_print_eval_gquad(output_stream, p, L, l, tmp_e);
781 
782     /* check if it's enclosed in a base pair */
783     if (loop_idx[p] == 0) {
784       q++;
785       continue;                         /* g-quad in exterior loop */
786     } else {
787       /*  find its enclosing pair */
788       num_elem  = 0;
789       num_g     = 1;
790       r         = p - 1;
791       up_mis    = q - p + 1;
792 
793       /* seek for first pairing base located 5' of the g-quad */
794       for (r = p - 1; !pt[r] && (r >= i); r--);
795 
796       if (r < pt[r]) {
797         /* found the enclosing pair */
798         s = pt[r];
799       } else {
800         num_elem++;
801         elem_i  = pt[r];
802         elem_j  = r;
803         r       = pt[r] - 1;
804         /* seek for next pairing base 5' of r */
805         for (; !pt[r] && (r >= i); r--);
806 
807         if (r < pt[r]) {
808           /* found the enclosing pair */
809           s = pt[r];
810         } else {
811           /* hop over stems and unpaired nucleotides */
812           while ((r > pt[r]) && (r >= i)) {
813             if (pt[r]) {
814               r = pt[r];
815               num_elem++;
816             }
817 
818             r--;
819           }
820 
821           s = pt[r]; /* found the enclosing pair */
822         }
823       }
824 
825       /* now we have the enclosing pair (r,s) */
826 
827       u = q + 1;
828       /* we know everything about the 5' part of this loop so check the 3' part */
829       while (u < s) {
830         if (structure[u - 1] == '.') {
831           u++;
832         } else if (structure[u - 1] == '+') {
833           /* found another gquad */
834           pos = parse_gquad(structure + u - 1, &L, l);
835           if (pos > 0) {
836             tmp_e = E_gquad(L, l, P);
837 
838             if (verbosity_level > 0)
839               vrna_cstr_print_eval_gquad(output_stream, pos, L, l, tmp_e);
840 
841             energy  += tmp_e;
842             up_mis  += pos;
843             u       += pos;
844             num_g++;
845           }
846         } else {
847           /* we must have found a stem */
848           num_elem++;
849           elem_i  = u;
850           elem_j  = pt[u];
851           energy  += en_corr_of_loop_gquad(vc,
852                                            u,
853                                            pt[u],
854                                            structure,
855                                            pt,
856                                            output_stream,
857                                            verbosity_level);
858           u = pt[u] + 1;
859         }
860       }
861 
862       /* here, u == s */
863       int e_minus, e_plus;
864 
865       e_plus = e_minus = 0;
866 
867       /* we are done since we've found no other 3' structure element */
868       switch (num_elem) {
869         /* g-quad was misinterpreted as hairpin closed by (r,s) */
870         case 0:
871           e_minus = vrna_eval_hp_loop(vc, r, s);
872           if (verbosity_level > 0) {
873             vrna_cstr_print_eval_hp_loop_revert(output_stream,
874                                                 r,
875                                                 s,
876                                                 sequence[r - 1],
877                                                 sequence[s - 1],
878                                                 e_minus);
879           }
880 
881           type = md->pair[s2[r]][s2[s]];
882 
883           /* if we consider the G-Quadruplex, we have */
884           if (num_g == 1) {
885             /* a) an interior loop like structure */
886             if (dangles == 2)
887               e_plus += P->mismatchI[type][s1[r + 1]][s1[s - 1]];
888 
889             if (type > 2)
890               e_plus += P->TerminalAU;
891 
892             e_plus += P->internal_loop[s - r - 1 - up_mis];
893             if (verbosity_level > 0) {
894               vrna_cstr_print_eval_int_loop(output_stream,
895                                             r,
896                                             s,
897                                             sequence[r - 1],
898                                             sequence[s - 1],
899                                             p,
900                                             q,
901                                             sequence[p - 1],
902                                             sequence[q - 1],
903                                             e_plus);
904             }
905           } else {
906             /* or b) a multibranch loop like structure */
907             e_plus = P->MLclosing
908                      + E_MLstem(rtype[type], s1[s - 1], s1[r + 1], P)
909                      + num_g * E_MLstem(0, -1, -1, P)
910                      + (s - r - 1 - up_mis) * P->MLbase;
911 
912             if (verbosity_level > 0) {
913               vrna_cstr_print_eval_mb_loop(output_stream,
914                                            r,
915                                            s,
916                                            sequence[r - 1],
917                                            sequence[s - 1],
918                                            e_plus);
919             }
920           }
921 
922           energy += e_plus - e_minus;
923           break;
924 
925         /* g-quad was misinterpreted as interior loop closed by (r,s) with enclosed pair (elem_i, elem_j) */
926         case 1:
927           type    = md->pair[s2[r]][s2[s]];
928           type2   = md->pair[s2[elem_i]][s2[elem_j]];
929           e_plus  = P->MLclosing
930                     + E_MLstem(rtype[type], s1[s - 1], s1[r + 1], P)
931                     + (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase
932                     + E_MLstem(type2, s1[elem_i - 1], s1[elem_j + 1], P);
933 
934           e_plus += num_g * E_MLstem(0, -1, -1, P);
935 
936           e_minus = vrna_eval_int_loop(vc, r, s, elem_i, elem_j);
937 
938           energy += e_plus - e_minus;
939 
940           if (verbosity_level > 0) {
941             vrna_cstr_print_eval_int_loop_revert(output_stream,
942                                                  r,
943                                                  s,
944                                                  sequence[r - 1],
945                                                  sequence[j - 1],
946                                                  elem_i,
947                                                  elem_j,
948                                                  sequence[elem_i - 1],
949                                                  sequence[elem_j - 1],
950                                                  e_minus);
951             vrna_cstr_print_eval_mb_loop(output_stream,
952                                          r,
953                                          s,
954                                          sequence[r - 1],
955                                          sequence[s - 1],
956                                          e_plus);
957           }
958 
959           break;
960 
961         /* gquad was misinterpreted as unpaired nucleotides in a multiloop */
962         default:
963           e_minus = (up_mis) * P->MLbase;
964           e_plus  = num_g * E_MLstem(0, -1, -1, P);
965           energy  += e_plus - e_minus;
966 
967           if (verbosity_level > 0) {
968             vrna_cstr_print_eval_mb_loop_revert(output_stream,
969                                                 r,
970                                                 s,
971                                                 sequence[r - 1],
972                                                 sequence[s - 1],
973                                                 e_minus);
974             vrna_cstr_print_eval_mb_loop(output_stream,
975                                          r,
976                                          s,
977                                          sequence[r - 1],
978                                          sequence[s - 1],
979                                          e_plus);
980           }
981 
982           break;
983       }
984 
985       q = s + 1;
986     }
987   }
988 
989   free(loop_idx);
990   return energy;
991 }
992 
993 
994 PRIVATE int
stack_energy(vrna_fold_compound_t * vc,int i,const short * pt,vrna_cstr_t output_stream,int verbosity_level)995 stack_energy(vrna_fold_compound_t *vc,
996              int                  i,
997              const short          *pt,
998              vrna_cstr_t          output_stream,
999              int                  verbosity_level)
1000 {
1001   /* recursively calculate energy of substructure enclosed by (i,j) */
1002   unsigned int  *sn, *so, *ss;
1003   int           ee, energy, j, p, q;
1004   char          *string;
1005   short         *s;
1006   vrna_param_t  *P;
1007   vrna_md_t     *md;
1008 
1009   sn      = vc->strand_number;
1010   so      = vc->strand_order;
1011   ss      = vc->strand_start;
1012   s       = vc->sequence_encoding2;
1013   P       = vc->params;
1014   md      = &(P->model_details);
1015   energy  = 0;
1016 
1017   j = pt[i];
1018 
1019   if (vc->type == VRNA_FC_TYPE_SINGLE) {
1020     string = vc->sequence;
1021     if (md->pair[s[i]][s[j]] == 0) {
1022       if (verbosity_level > VRNA_VERBOSITY_QUIET) {
1023         vrna_message_warning("bases %d and %d (%c%c) can't pair!",
1024                              i, j,
1025                              string[i - 1],
1026                              string[j - 1]);
1027       }
1028     }
1029   } else if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
1030     string = vc->cons_seq;
1031   } else {
1032     return INF;
1033   }
1034 
1035   p = i;
1036   q = j;
1037 
1038   while (p < q) {
1039     /* process all stacks and interior loops */
1040     while (pt[++p] == 0);
1041     while (pt[--q] == 0);
1042     if ((pt[q] != (short)p) || (p > q))
1043       break;
1044 
1045     ee = 0;
1046     if (vc->type == VRNA_FC_TYPE_SINGLE) {
1047       if (md->pair[s[q]][s[p]] == 0) {
1048         if (verbosity_level > VRNA_VERBOSITY_QUIET) {
1049           vrna_message_warning("bases %d and %d (%c%c) can't pair!",
1050                                p, q,
1051                                string[p - 1],
1052                                string[q - 1]);
1053         }
1054       }
1055     }
1056 
1057     ee = vrna_eval_int_loop(vc, i, j, p, q);
1058 
1059     if (verbosity_level > 0) {
1060       vrna_cstr_print_eval_int_loop(output_stream,
1061                                     i, j,
1062                                     string[i - 1], string[j - 1],
1063                                     p, q,
1064                                     string[p - 1], string[q - 1],
1065                                     (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
1066                                     (int)ee / (int)vc->n_seq :
1067                                     ee);
1068     }
1069 
1070     energy  += ee;
1071     i       = p;
1072     j       = q;
1073   } /* end while */
1074 
1075   /* p,q don't pair must have found hairpin or multiloop */
1076 
1077   if (p > q) {
1078     /* hairpin */
1079     ee      = vrna_eval_hp_loop(vc, i, j);
1080     energy  += ee;
1081 
1082     if (verbosity_level > 0) {
1083       vrna_cstr_print_eval_hp_loop(output_stream,
1084                                    i, j,
1085                                    string[i - 1], string[j - 1],
1086                                    (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
1087                                    (int)ee / (int)vc->n_seq :
1088                                    ee);
1089     }
1090 
1091     return energy;
1092   }
1093 
1094   /* (i,j) is exterior pair of multiloop */
1095   while (p < j) {
1096     /* add up the contributions of the substructures of the ML */
1097     energy  += stack_energy(vc, p, pt, output_stream, verbosity_level);
1098     p       = pt[p];
1099     /* search for next base pair in multiloop */
1100     while (pt[++p] == 0);
1101   }
1102 
1103   ee = 0;
1104 
1105   switch (vc->type) {
1106     case VRNA_FC_TYPE_SINGLE:
1107     {
1108       int ii = cut_in_loop(i, pt, sn);
1109       ee = (ii == 0) ? energy_of_ml_pt(vc, i, pt) : energy_of_extLoop_pt(vc, ii, pt);
1110     }
1111     break;
1112 
1113     case VRNA_FC_TYPE_COMPARATIVE:
1114       ee = energy_of_ml_pt(vc, i, pt);
1115       break;
1116   }
1117 
1118   energy += ee;
1119   if (verbosity_level > 0) {
1120     vrna_cstr_print_eval_mb_loop(output_stream,
1121                                  i, j,
1122                                  string[i - 1], string[j - 1],
1123                                  (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
1124                                  (int)ee / (int)vc->n_seq :
1125                                  ee);
1126   }
1127 
1128   return energy;
1129 }
1130 
1131 
1132 /*---------------------------------------------------------------------------*/
1133 
1134 
1135 /**
1136 *** Calculate the energy contribution of
1137 *** stabilizing dangling-ends/mismatches
1138 *** for all stems branching off the exterior
1139 *** loop
1140 **/
1141 PRIVATE int
energy_of_extLoop_pt(vrna_fold_compound_t * vc,int i,const short * pt)1142 energy_of_extLoop_pt(vrna_fold_compound_t *vc,
1143                      int                  i,
1144                      const short          *pt)
1145 {
1146   unsigned int  *sn;
1147   int           energy, mm5, mm3, bonus, p, q, q_prev, length, dangle_model, n_seq, ss, u,
1148                 start;
1149   short         *s, *s1, **S, **S5, **S3;
1150   unsigned int  **a2s;
1151   vrna_param_t  *P;
1152   vrna_md_t     *md;
1153   vrna_sc_t     *sc, **scs;
1154 
1155 
1156   /* helper variables for dangles == 1 case */
1157   int           E3_available;   /* energy of 5' part where 5' mismatch is available for current stem */
1158   int           E3_occupied;    /* energy of 5' part where 5' mismatch is unavailable for current stem */
1159 
1160 
1161   /* initialize vars */
1162   length        = vc->length;
1163   sn            = vc->strand_number;
1164   P             = vc->params;
1165   md            = &(P->model_details);
1166   dangle_model  = md->dangles;
1167   s             = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : NULL;
1168   s1            = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding : NULL;
1169   sc            = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sc : NULL;
1170   S             = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S;
1171   S5            = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S5;
1172   S3            = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S3;
1173   a2s           = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->a2s;
1174   n_seq         = (vc->type == VRNA_FC_TYPE_SINGLE) ? 1 : vc->n_seq;
1175   scs           = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->scs;
1176 
1177   energy  = 0;
1178   bonus   = 0;
1179   p       = start = (i == 0) ? 1 : i;
1180   q_prev  = -1;
1181 
1182   if (dangle_model % 2 == 1) {
1183     E3_available  = INF;
1184     E3_occupied   = 0;
1185   }
1186 
1187   /* seek to opening base of first stem */
1188   while (p <= length && !pt[p])
1189     p++;
1190 
1191   switch (vc->type) {
1192     case VRNA_FC_TYPE_SINGLE:
1193 
1194       /* add soft constraints for first unpaired nucleotides */
1195       if (sc) {
1196         if (sc->energy_up)
1197           bonus += sc->energy_up[start][p - start];
1198 
1199         /* how do we handle generalized soft constraints here ? */
1200       }
1201 
1202       break;
1203 
1204     case VRNA_FC_TYPE_COMPARATIVE:
1205 
1206       /* add soft constraints for first unpaired nucleotides */
1207       if (scs) {
1208         for (ss = 0; ss < n_seq; ss++) {
1209           if (scs[ss]) {
1210             if (scs[ss]->energy_up) {
1211               u     = a2s[ss][p] - a2s[ss][start];
1212               bonus += scs[ss]->energy_up[a2s[ss][start]][u];
1213             }
1214 
1215             /* how do we handle generalized soft constraints here ? */
1216           }
1217         }
1218       }
1219 
1220       break;
1221 
1222     default:
1223       return INF;
1224       break;
1225   }
1226 
1227   while (p < length) {
1228     int tt;
1229     /* p must have a pairing partner */
1230     q = (int)pt[p];
1231 
1232     switch (vc->type) {
1233       case VRNA_FC_TYPE_SINGLE:     /* get type of base pair (p,q) */
1234         tt = vrna_get_ptype_md(s[p], s[q], md);
1235 
1236         switch (dangle_model) {
1237           /* no dangles */
1238           case 0:
1239             energy += vrna_E_ext_stem(tt, -1, -1, P);
1240             break;
1241 
1242           /* the beloved double dangles */
1243           case 2:
1244             mm5     = ((sn[p - 1] == sn[p]) && (p > 1))       ? s1[p - 1] : -1;
1245             mm3     = ((sn[q] == sn[q + 1]) && (q < length))  ? s1[q + 1] : -1;
1246             energy  += vrna_E_ext_stem(tt, mm5, mm3, P);
1247             break;
1248 
1249           default:
1250           {
1251             int tmp;
1252             if (q_prev + 2 < p) {
1253               E3_available  = MIN2(E3_available, E3_occupied);
1254               E3_occupied   = E3_available;
1255             }
1256 
1257             mm5 = ((sn[p - 1] == sn[p]) && (p > 1) && !pt[p - 1])       ? s1[p - 1] : -1;
1258             mm3 = ((sn[q] == sn[q + 1]) && (q < length) && !pt[q + 1])  ? s1[q + 1] : -1;
1259             tmp = MIN2(
1260               E3_occupied + vrna_E_ext_stem(tt, -1, mm3, P),
1261               E3_available + vrna_E_ext_stem(tt, mm5, mm3, P)
1262               );
1263             E3_available = MIN2(
1264               E3_occupied + vrna_E_ext_stem(tt, -1, -1, P),
1265               E3_available + vrna_E_ext_stem(tt, mm5, -1, P)
1266               );
1267             E3_occupied = tmp;
1268           }
1269           break;
1270         }                             /* end switch dangle_model */
1271         break;
1272 
1273       case VRNA_FC_TYPE_COMPARATIVE:
1274         for (ss = 0; ss < n_seq; ss++) {
1275           /* get type of base pair (p,q) */
1276           tt = vrna_get_ptype_md(S[ss][p], S[ss][q], md);
1277 
1278           switch (dangle_model) {
1279             case 0:
1280               energy += vrna_E_ext_stem(tt, -1, -1, P);
1281               break;
1282 
1283             case 2:
1284               mm5     = (a2s[ss][p] > 1) ? S5[ss][p] : -1;
1285               mm3     = (a2s[ss][q] < a2s[ss][length]) ? S3[ss][q] : -1;
1286               energy  += vrna_E_ext_stem(tt, mm5, mm3, P);
1287               break;
1288 
1289             default:
1290               break;                                     /* odd dangles not implemented yet */
1291           }
1292         }
1293         break;
1294 
1295       default:
1296         break;                             /* this should never happen */
1297     }
1298 
1299     /* seek to the next stem */
1300     p       = q + 1;
1301     q_prev  = q;
1302     while (p <= length && !pt[p])
1303       p++;
1304 
1305     switch (vc->type) {
1306       case VRNA_FC_TYPE_SINGLE:     /* add soft constraints for unpaired region */
1307         if (sc && (q_prev + 1 <= length)) {
1308           if (sc->energy_up)
1309             bonus += sc->energy_up[q_prev + 1][p - q_prev - 1];
1310 
1311           /* how do we handle generalized soft constraints here ? */
1312         }
1313 
1314         break;
1315 
1316       case VRNA_FC_TYPE_COMPARATIVE:
1317         if (scs) {
1318           for (ss = 0; ss < n_seq; ss++) {
1319             if (scs[ss]) {
1320               if (scs[ss]->energy_up) {
1321                 u     = a2s[ss][p] - a2s[ss][q_prev + 1];
1322                 bonus += scs[ss]->energy_up[a2s[ss][q_prev + 1]][u];
1323               }
1324             }
1325           }
1326         }
1327 
1328         break;
1329 
1330       default:
1331         break;                             /* this should never happen */
1332     }
1333 
1334     if (p == i)
1335       break; /* cut was in loop */
1336   }
1337 
1338   if (dangle_model % 2 == 1)
1339     energy = MIN2(E3_occupied, E3_available);
1340 
1341   return energy + bonus;
1342 }
1343 
1344 
1345 /**
1346  *** i is the 5'-base of the closing pair
1347  ***
1348  *** since each helix can coaxially stack with at most one of its
1349  *** neighbors we need an auxiliarry variable  cx_energy
1350  *** which contains the best energy given that the last two pairs stack.
1351  *** energy  holds the best energy given the previous two pairs do not
1352  *** stack (i.e. the two current helices may stack)
1353  *** We don't allow the last helix to stack with the first, thus we have to
1354  *** walk around the Loop twice with two starting points and take the minimum
1355  ***/
1356 PRIVATE int
energy_of_ml_pt(vrna_fold_compound_t * vc,int i,const short * pt)1357 energy_of_ml_pt(vrna_fold_compound_t  *vc,
1358                 int                   i,
1359                 const short           *pt)
1360 {
1361   unsigned int  *sn;
1362   int           energy, cx_energy, tmp, tmp2, best_energy = INF, bonus, *idx, dangle_model,
1363                 logML, circular, *rtype, ss, n, n_seq;
1364   int           i1, j, p, q, q_prev, q_prev2, u, uu, x, type, count, mm5, mm3, tt, ld5, new_cx,
1365                 dang5, dang3, dang;
1366   int           e_stem, e_stem5, e_stem3, e_stem53;
1367   int           mlintern[NBPAIRS + 1];
1368   short         *s, *s1, **S, **S5, **S3;
1369   unsigned int  **a2s;
1370   vrna_param_t  *P;
1371   vrna_md_t     *md;
1372   vrna_sc_t     *sc, **scs;
1373 
1374   /* helper variables for dangles == 1|5 case */
1375   int           E_mm5_available;    /* energy of 5' part where 5' mismatch of current stem is available */
1376   int           E_mm5_occupied;     /* energy of 5' part where 5' mismatch of current stem is unavailable */
1377   int           E2_mm5_available;   /* energy of 5' part where 5' mismatch of current stem is available with possible 3' dangle for enclosing pair (i,j) */
1378   int           E2_mm5_occupied;    /* energy of 5' part where 5' mismatch of current stem is unavailable with possible 3' dangle for enclosing pair (i,j) */
1379 
1380   n   = vc->length;
1381   sn  = vc->strand_number;
1382   P   = vc->params;
1383   md  = &(P->model_details);
1384   idx = vc->jindx;
1385 
1386   circular      = md->circ;
1387   dangle_model  = md->dangles;
1388   logML         = md->logML;
1389   rtype         = &(md->rtype[0]);
1390   s             = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : NULL;
1391   s1            = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding : NULL;
1392   sc            = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sc : NULL;
1393   S             = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S;
1394   S5            = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S5;
1395   S3            = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S3;
1396   a2s           = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->a2s;
1397   n_seq         = (vc->type == VRNA_FC_TYPE_SINGLE) ? 1 : vc->n_seq;
1398   scs           = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->scs;
1399 
1400   bonus = 0;
1401 
1402   if (i >= pt[i]) {
1403     vrna_message_warning("energy_of_ml_pt: i is not 5' base of a closing pair!");
1404     return INF;
1405   }
1406 
1407   j = (i == 0) ? n + 1 : (int)pt[i];
1408 
1409   switch (vc->type) {
1410     case VRNA_FC_TYPE_SINGLE:
1411 
1412       if (i != 0) {
1413         /* (i,j) is closing pair of multibranch loop, add soft constraints */
1414         if (sc)
1415           if (sc->energy_bp)
1416             bonus += sc->energy_bp[idx[j] + i];
1417       }
1418 
1419       break;
1420 
1421     case VRNA_FC_TYPE_COMPARATIVE:
1422 
1423       if ((dangle_model % 2) || (dangle_model > 2) || (dangle_model < 0)) {
1424         vrna_message_warning(
1425           "consensus structure evaluation for odd dangle models not implemented (yet)!");
1426         return INF;
1427       }
1428 
1429       if (i != 0) {
1430         /* (i,j) is closing pair of multibranch loop, add soft constraints */
1431         if (scs) {
1432           for (ss = 0; ss < n_seq; ss++)
1433             if (scs[ss] && scs[ss]->energy_bp)
1434               bonus += scs[ss]->energy_bp[idx[j] + i];
1435         }
1436       }
1437 
1438       break;
1439 
1440     default:
1441       return INF;
1442       break;
1443   }
1444 
1445   /* init the variables */
1446   energy  = 0;
1447   u       = 0;     /* the total number of unpaired nucleotides */
1448   p       = i + 1;
1449   q_prev  = i - 1;
1450   q_prev2 = i;
1451 
1452 
1453   for (x = 0; x <= NBPAIRS; x++)
1454     mlintern[x] = P->MLintern[x];
1455 
1456   /* seek to opening base of first stem */
1457   while (p <= j && !pt[p])
1458     p++;
1459 
1460   /* add bonus energies for first stretch of unpaired nucleotides */
1461   switch (vc->type) {
1462     case VRNA_FC_TYPE_SINGLE:
1463       u += p - i - 1;
1464       if (sc)
1465         if (sc->energy_up)
1466           bonus += sc->energy_up[i + 1][u];
1467 
1468       break;
1469 
1470     case VRNA_FC_TYPE_COMPARATIVE:
1471       if (scs) {
1472         for (ss = 0; ss < n_seq; ss++) {
1473           uu = a2s[ss][p] - a2s[ss][i + 1];
1474           if (scs[ss] && scs[ss]->energy_up)
1475             bonus += scs[ss]->energy_up[a2s[ss][i + 1]][uu];
1476 
1477           u += uu;
1478         }
1479       } else {
1480         for (ss = 0; ss < n_seq; ss++)
1481           u += a2s[ss][p] - a2s[ss][i + 1];
1482       }
1483 
1484       break;
1485 
1486     default:
1487       break;                             /* this should never happen */
1488   }
1489 
1490   switch (dangle_model) {
1491     case 0:
1492       switch (vc->type) {
1493         case VRNA_FC_TYPE_SINGLE:
1494           while (p < j) {
1495             /* p must have a pairing partner */
1496             q = (int)pt[p];
1497             /* get type of base pair (p,q) */
1498             tt = vrna_get_ptype_md(s[p], s[q], md);
1499 
1500             energy += E_MLstem(tt, -1, -1, P);
1501 
1502             /* seek to the next stem */
1503             p       = q + 1;
1504             q_prev  = q_prev2 = q;
1505             while (p < j && !pt[p])
1506               p++;
1507             u += p - q - 1;                                     /* add unpaired nucleotides */
1508 
1509             if (sc)
1510               if (sc->energy_up)
1511                 bonus += sc->energy_up[q + 1][p - q - 1];
1512           }
1513 
1514           /* now lets get the energy of the enclosing stem */
1515           if (i > 0) {
1516             /* actual closing pair */
1517             tt = vrna_get_ptype_md(s[j], s[i], md);
1518 
1519             energy += E_MLstem(tt, -1, -1, P);
1520           } else {
1521             /* virtual closing pair */
1522             energy += E_MLstem(0, -1, -1, P);
1523           }
1524 
1525           break;
1526 
1527         case VRNA_FC_TYPE_COMPARATIVE:
1528           while (p < j) {
1529             /* p must have a pairing partner */
1530             q = (int)pt[p];
1531             for (ss = 0; ss < n_seq; ss++) {
1532               /* get type of base pair (p,q) */
1533               tt = vrna_get_ptype_md(S[ss][p], S[ss][q], md);
1534 
1535               energy += E_MLstem(tt, -1, -1, P);
1536             }
1537 
1538             /* seek to the next stem */
1539             p       = q + 1;
1540             q_prev  = q_prev2 = q;
1541             while (p < j && !pt[p])
1542               p++;
1543 
1544             /* add unpaired nucleotides and possible soft constraints */
1545             if (scs) {
1546               for (ss = 0; ss < n_seq; ss++) {
1547                 uu = a2s[ss][p] - a2s[ss][q + 1];
1548                 if (scs[ss] && scs[ss]->energy_up)
1549                   bonus += sc->energy_up[a2s[ss][q + 1]][uu];
1550 
1551                 u += uu;
1552               }
1553             } else {
1554               for (ss = 0; ss < n_seq; ss++)
1555                 u += a2s[ss][p] - a2s[ss][q + 1];
1556             }
1557           }
1558 
1559           /* now lets get the energy of the enclosing stem */
1560           if (i > 0) {
1561             /* actual closing pair */
1562             for (ss = 0; ss < n_seq; ss++) {
1563               tt = vrna_get_ptype_md(S[ss][j], S[ss][i], md);
1564 
1565               energy += E_MLstem(tt, -1, -1, P);
1566             }
1567           }
1568 
1569           break;
1570 
1571         default:
1572           break;                                     /* this should never happen */
1573       }
1574       break;
1575 
1576     case 2:
1577       switch (vc->type) {
1578         case VRNA_FC_TYPE_SINGLE:
1579           while (p < j) {
1580             /* p must have a pairing partner */
1581             q = (int)pt[p];
1582             /* get type of base pair (p,q) */
1583             tt = vrna_get_ptype_md(s[p], s[q], md);
1584 
1585             mm5     = sn[p - 1] == sn[p] ? s1[p - 1] : -1;
1586             mm3     = sn[q] == sn[q + 1] ? s1[q + 1] : -1;
1587             energy  += E_MLstem(tt, mm5, mm3, P);
1588 
1589             /* seek to the next stem */
1590             p       = q + 1;
1591             q_prev  = q_prev2 = q;
1592             while (p < j && !pt[p])
1593               p++;
1594             u += p - q - 1;                                     /* add unpaired nucleotides */
1595 
1596             if (sc)
1597               if (sc->energy_up)
1598                 bonus += sc->energy_up[q + 1][p - q - 1];
1599           }
1600           if (i > 0) {
1601             /* actual closing pair */
1602             tt = vrna_get_ptype_md(s[j], s[i], md);
1603 
1604             mm5     = sn[j - 1] == sn[j] ? s1[j - 1] : -1;
1605             mm3     = sn[i] == sn[i + 1] ? s1[i + 1] : -1;
1606             energy  += E_MLstem(tt, mm5, mm3, P);
1607           } else {
1608             /* virtual closing pair */
1609             energy += E_MLstem(0, -1, -1, P);
1610           }
1611 
1612           break;
1613 
1614         case VRNA_FC_TYPE_COMPARATIVE:
1615           while (p < j) {
1616             /* p must have a pairing partner */
1617             q = (int)pt[p];
1618 
1619             for (ss = 0; ss < n_seq; ss++) {
1620               /* get type of base pair (p,q) */
1621               tt = vrna_get_ptype_md(S[ss][p], S[ss][q], md);
1622 
1623               mm5     = ((a2s[ss][p] > 1) || circular) ? S5[ss][p] : -1;
1624               mm3     = ((a2s[ss][q] < a2s[ss][n]) || circular) ? S3[ss][q] : -1;
1625               energy  += E_MLstem(tt, mm5, mm3, P);
1626             }
1627 
1628             /* seek to the next stem */
1629             p       = q + 1;
1630             q_prev  = q_prev2 = q;
1631             while (p < j && !pt[p])
1632               p++;
1633 
1634             /* add unpaired nucleotides and possible soft constraints */
1635             if (scs) {
1636               for (ss = 0; ss < n_seq; ss++) {
1637                 uu = a2s[ss][p] - a2s[ss][q + 1];
1638                 if (scs[ss] && scs[ss]->energy_up)
1639                   bonus += sc->energy_up[a2s[ss][q + 1]][uu];
1640 
1641                 u += uu;
1642               }
1643             } else {
1644               for (ss = 0; ss < n_seq; ss++)
1645                 u += a2s[ss][p] - a2s[ss][q + 1];
1646             }
1647           }
1648 
1649           if (i > 0) {
1650             /* actual closing pair */
1651             for (ss = 0; ss < n_seq; ss++) {
1652               tt = vrna_get_ptype_md(S[ss][j], S[ss][i], md);
1653 
1654               mm5     = S5[ss][j];
1655               mm3     = S3[ss][i];
1656               energy  += E_MLstem(tt, mm5, mm3, P);
1657             }
1658           }
1659 
1660           break;
1661 
1662         default:
1663           break;                                     /* this should never happen */
1664       }
1665       break;
1666 
1667     case 3:   /* we treat helix stacking different */
1668       for (count = 0; count < 2; count++) {
1669         /* do it twice */
1670         ld5 = 0;         /* 5' dangle energy on prev pair (type) */
1671         if (i == 0) {
1672           j     = (unsigned int)pt[0] + 1;
1673           type  = 0;         /* no pair */
1674         } else {
1675           j     = (unsigned int)pt[i];
1676           type  = vrna_get_ptype_md(s[j], s[i], md);
1677 
1678           /* prime the ld5 variable */
1679           if (sn[j - 1] == sn[j]) {
1680             ld5 = P->dangle5[type][s1[j - 1]];
1681             if ((p = (unsigned int)pt[j - 2]) && (sn[j - 2] == sn[j - 1]))
1682               if (P->dangle3[md->pair[s[p]][s[j - 2]]][s1[j - 1]] < ld5)
1683                 ld5 = 0;
1684           }
1685         }
1686 
1687         i1        = i;
1688         p         = i + 1;
1689         u         = 0;
1690         energy    = 0;
1691         cx_energy = INF;
1692         do {
1693           /* walk around the multi-loop */
1694           new_cx = INF;
1695 
1696           /* hop over unpaired positions */
1697           while (p <= (unsigned int)pt[0] && pt[p] == 0)
1698             p++;
1699 
1700           /* memorize number of unpaired positions */
1701           u += p - i1 - 1;
1702 
1703           if (sc)
1704             if (sc->energy_up)
1705               bonus += sc->energy_up[i1 + 1][p - i1 - 1];
1706 
1707           /* get position of pairing partner */
1708           if (p == (unsigned int)pt[0] + 1) {
1709             q   = 0;
1710             tt  = 0;              /* virtual root pair */
1711           } else {
1712             q = (unsigned int)pt[p];
1713             /* get type of base pair P->q */
1714             tt = vrna_get_ptype_md(s[p], s[q], md);
1715           }
1716 
1717           energy    += mlintern[tt];
1718           cx_energy += mlintern[tt];
1719 
1720           dang5 = dang3 = 0;
1721           if ((sn[p - 1] == sn[p]) && (p > 1))
1722             dang5 = P->dangle5[tt][s1[p - 1]];          /* 5'dangle of pq pair */
1723 
1724           if ((sn[i1] == sn[i1 + 1]) && (i1 < (unsigned int)s[0]))
1725             dang3 = P->dangle3[type][s1[i1 + 1]];       /* 3'dangle of previous pair */
1726 
1727           switch (p - i1 - 1) {
1728             case 0:           /* adjacent helices */
1729               if (i1 != 0) {
1730                 if (sn[i1] == sn[p]) {
1731                   new_cx = energy + P->stack[rtype[type]][rtype[tt]];
1732                   /* subtract 5'dangle and TerminalAU penalty */
1733                   new_cx += -ld5 - mlintern[tt] - mlintern[type] + 2 * mlintern[1];
1734                 }
1735 
1736                 ld5     = 0;
1737                 energy  = MIN2(energy, cx_energy);
1738               }
1739 
1740               break;
1741             case 1:           /* 1 unpaired base between helices */
1742               dang    = MIN2(dang3, dang5);
1743               energy  = energy + dang;
1744               ld5     = dang - dang3;
1745               /* may be problem here: Suppose
1746                * cx_energy>energy, cx_energy+dang5<energy
1747                * and the following helices are also stacked (i.e.
1748                * we'll subtract the dang5 again */
1749               if (cx_energy + dang5 < energy) {
1750                 energy  = cx_energy + dang5;
1751                 ld5     = dang5;
1752               }
1753 
1754               new_cx = INF;   /* no coax stacking with mismatch for now */
1755               break;
1756             default:          /* many unpaired base between helices */
1757               energy  += dang5 + dang3;
1758               energy  = MIN2(energy, cx_energy + dang5);
1759               new_cx  = INF;                 /* no coax stacking possible */
1760               ld5     = dang5;
1761               break;
1762           }
1763           type      = tt;
1764           cx_energy = new_cx;
1765           i1        = q;
1766           p         = q + 1;
1767         } while (q != i);
1768         best_energy = MIN2(energy, best_energy);         /* don't use cx_energy here */
1769         /* skip a helix and start again */
1770         while (pt[p] == 0)
1771           p++;
1772         if (i == (unsigned int)pt[p])
1773           break;
1774 
1775         i = (unsigned int)pt[p];
1776       }         /* end doing it twice */
1777       energy = best_energy;
1778       break;
1779 
1780     default:
1781       E_mm5_available = E2_mm5_available = INF;
1782       E_mm5_occupied  = E2_mm5_occupied = 0;
1783       while (p < j) {
1784         /* p must have a pairing partner */
1785         q = (int)pt[p];
1786         /* get type of base pair (p,q) */
1787         tt = vrna_get_ptype_md(s[p], s[q], md);
1788 
1789         if (q_prev + 2 < p) {
1790           E_mm5_available = MIN2(E_mm5_available, E_mm5_occupied);
1791           E_mm5_occupied  = E_mm5_available;
1792         }
1793 
1794         if (q_prev2 + 2 < p) {
1795           E2_mm5_available  = MIN2(E2_mm5_available, E2_mm5_occupied);
1796           E2_mm5_occupied   = E2_mm5_available;
1797         }
1798 
1799         mm5       = ((sn[p - 1] == sn[p]) && !pt[p - 1])  ? s1[p - 1] : -1;
1800         mm3       = ((sn[q] == sn[q + 1]) && !pt[q + 1])  ? s1[q + 1] : -1;
1801         e_stem    = E_MLstem(tt, -1, -1, P);
1802         e_stem5   = E_MLstem(tt, mm5, -1, P);
1803         e_stem3   = E_MLstem(tt, -1, mm3, P);
1804         e_stem53  = E_MLstem(tt, mm5, mm3, P);
1805 
1806         tmp   = E_mm5_occupied + e_stem3;
1807         tmp   = MIN2(tmp, E_mm5_available + e_stem53);
1808         tmp   = MIN2(tmp, E_mm5_available + e_stem3);
1809         tmp2  = E_mm5_occupied + e_stem;
1810         tmp2  = MIN2(tmp2, E_mm5_available + e_stem5);
1811         tmp2  = MIN2(tmp2, E_mm5_available + e_stem);
1812 
1813         E_mm5_occupied  = tmp;
1814         E_mm5_available = tmp2;
1815 
1816         tmp   = E2_mm5_occupied + e_stem3;
1817         tmp   = MIN2(tmp, E2_mm5_available + e_stem53);
1818         tmp   = MIN2(tmp, E2_mm5_available + e_stem3);
1819         tmp2  = E2_mm5_occupied + e_stem;
1820         tmp2  = MIN2(tmp2, E2_mm5_available + e_stem5);
1821         tmp2  = MIN2(tmp2, E2_mm5_available + e_stem);
1822 
1823         E2_mm5_occupied   = tmp;
1824         E2_mm5_available  = tmp2;
1825 
1826         /* seek to the next stem */
1827         p       = q + 1;
1828         q_prev  = q_prev2 = q;
1829         while (p < j && !pt[p])
1830           p++;
1831         u += p - q - 1;         /* add unpaired nucleotides */
1832 
1833         if (sc)
1834           if (sc->energy_up)
1835             bonus += sc->energy_up[q + 1][p - q - 1];
1836       }
1837       if (i > 0) {
1838         /* actual closing pair */
1839         type = vrna_get_ptype_md(s[j], s[i], md);
1840 
1841         mm5 = ((sn[j - 1] == sn[j]) && !pt[j - 1])  ? s1[j - 1] : -1;
1842         mm3 = ((sn[i] == sn[i + 1]) && !pt[i + 1])  ? s1[i + 1] : -1;
1843         if (q_prev + 2 < p) {
1844           E_mm5_available = MIN2(E_mm5_available, E_mm5_occupied);
1845           E_mm5_occupied  = E_mm5_available;
1846         }
1847 
1848         if (q_prev2 + 2 < p) {
1849           E2_mm5_available  = MIN2(E2_mm5_available, E2_mm5_occupied);
1850           E2_mm5_occupied   = E2_mm5_available;
1851         }
1852 
1853         e_stem    = E_MLstem(type, -1, -1, P);
1854         e_stem5   = E_MLstem(type, mm5, -1, P);
1855         e_stem3   = E_MLstem(type, -1, mm3, P);
1856         e_stem53  = E_MLstem(type, mm5, mm3, P);
1857       } else {
1858         /* virtual closing pair */
1859         e_stem = e_stem5 = e_stem3 = e_stem53 = E_MLstem(0, -1, -1, P);
1860       }
1861 
1862       /* now lets see how we get the minimum including the enclosing stem */
1863       energy  = E_mm5_occupied + e_stem;
1864       energy  = MIN2(energy, E_mm5_available + e_stem5);
1865       energy  = MIN2(energy, E_mm5_available + e_stem);
1866       energy  = MIN2(energy, E2_mm5_occupied + e_stem3);
1867       energy  = MIN2(energy, E2_mm5_occupied + e_stem);
1868       energy  = MIN2(energy, E2_mm5_available + e_stem53);
1869       energy  = MIN2(energy, E2_mm5_available + e_stem3);
1870       energy  = MIN2(energy, E2_mm5_available + e_stem5);
1871       energy  = MIN2(energy, E2_mm5_available + e_stem);
1872       break;
1873   }/* end switch dangle_model */
1874 
1875   switch (vc->type) {
1876     case VRNA_FC_TYPE_SINGLE:
1877       energy += P->MLclosing;
1878       break;
1879 
1880     case VRNA_FC_TYPE_COMPARATIVE:
1881       energy += P->MLclosing * n_seq;
1882       break;
1883 
1884     default:
1885       break;
1886   }
1887 
1888   /*
1889    * logarithmic ML loop energy if logML
1890    * does this work for comparative predictions as well?
1891    */
1892   if (logML && (u > 6))
1893     energy += 6 * P->MLbase + (int)(P->lxc * log((double)u / 6.));
1894   else
1895     energy += (u * P->MLbase);
1896 
1897   return energy + bonus;
1898 }
1899 
1900 
1901 PRIVATE int
cut_in_loop(int i,const short * pt,unsigned int * sn)1902 cut_in_loop(int           i,
1903             const short   *pt,
1904             unsigned int  *sn)
1905 {
1906   /* walk around the loop;  return 5' pos of first pair after cut if
1907    * cut_point in loop else 0 */
1908   int p, j;
1909 
1910   p = j = pt[i];
1911   do {
1912     i = pt[p];
1913     p = i + 1;
1914     while (pt[p] == 0)
1915       p++;
1916   } while ((p != j) && (sn[i] == sn[p]));
1917   return sn[i] == sn[p] ? 0 : p;
1918 }
1919 
1920 
1921 /* below are the consensus structure evaluation functions */
1922 
1923 PRIVATE int
covar_energy_of_struct_pt(vrna_fold_compound_t * vc,const short * pt)1924 covar_energy_of_struct_pt(vrna_fold_compound_t  *vc,
1925                           const short           *pt)
1926 {
1927   int e       = 0;
1928   int length  = vc->length;
1929   int i;
1930 
1931   for (i = 1; i <= length; i++) {
1932     if (pt[i] == 0)
1933       continue;
1934 
1935     e += stack_energy_covar_pt(vc, i, pt);
1936     i = pt[i];
1937   }
1938 
1939   return e;
1940 }
1941 
1942 
1943 PRIVATE int
en_corr_of_loop_gquad_ali(vrna_fold_compound_t * vc,int i,int j,const char * structure,const short * pt,const int * loop_idx,vrna_cstr_t output_stream,int verbosity_level)1944 en_corr_of_loop_gquad_ali(vrna_fold_compound_t  *vc,
1945                           int                   i,
1946                           int                   j,
1947                           const char            *structure,
1948                           const short           *pt,
1949                           const int             *loop_idx,
1950                           vrna_cstr_t           output_stream,
1951                           int                   verbosity_level)
1952 {
1953   int           pos, cnt, tmp_e, energy, p, q, r, s, u, type, gq_en[2];
1954   int           num_elem, num_g, elem_i, elem_j, up_mis;
1955   int           L, l[3];
1956 
1957   char          *sequence     = vc->cons_seq;
1958   short         **S           = vc->S;
1959   short         **S5          = vc->S5;
1960   short         **S3          = vc->S3;
1961   vrna_param_t  *P            = vc->params;
1962   vrna_md_t     *md           = &(P->model_details);
1963   int           n_seq         = vc->n_seq;
1964   int           dangle_model  = md->dangles;
1965 
1966   energy  = 0;
1967   q       = i;
1968   while ((pos = parse_gquad(structure + q - 1, &L, l)) > 0) {
1969     q += pos - 1;
1970     p = q - 4 * L - l[0] - l[1] - l[2] + 1;
1971     if (q > j)
1972       break;
1973 
1974     /* we've found the first g-quadruplex at position [p,q] */
1975     E_gquad_ali_en(p, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
1976     tmp_e   = gq_en[0];
1977     energy  += tmp_e;
1978 
1979     if (verbosity_level > 0) {
1980       vrna_cstr_print_eval_gquad(output_stream,
1981                                  p,
1982                                  L,
1983                                  l,
1984                                  (int)tmp_e / (int)n_seq);
1985     }
1986 
1987     /* check if it's enclosed in a base pair */
1988     if (loop_idx[p] == 0) {
1989       q++;
1990       continue;                          /* g-quad in exterior loop */
1991     } else {
1992       /*  find its enclosing pair */
1993       num_elem  = 0;
1994       num_g     = 1;
1995       r         = p - 1;
1996       up_mis    = q - p + 1;
1997 
1998       /* seek for first pairing base located 5' of the g-quad */
1999       for (r = p - 1; !pt[r] && (r >= i); r--);
2000 
2001       if (r < pt[r]) {
2002         /* found the enclosing pair */
2003         s = pt[r];
2004       } else {
2005         num_elem++;
2006         elem_i  = pt[r];
2007         elem_j  = r;
2008         r       = pt[r] - 1;
2009         /* seek for next pairing base 5' of r */
2010         for (; !pt[r] && (r >= i); r--);
2011 
2012         if (r < pt[r]) {
2013           /* found the enclosing pair */
2014           s = pt[r];
2015         } else {
2016           /* hop over stems and unpaired nucleotides */
2017           while ((r > pt[r]) && (r >= i)) {
2018             if (pt[r]) {
2019               r = pt[r];
2020               num_elem++;
2021             }
2022 
2023             r--;
2024           }
2025 
2026           s = pt[r]; /* found the enclosing pair */
2027         }
2028       }
2029 
2030       /* now we have the enclosing pair (r,s) */
2031 
2032       u = q + 1;
2033       /* we know everything about the 5' part of this loop so check the 3' part */
2034       while (u < s) {
2035         if (structure[u - 1] == '.') {
2036           u++;
2037         } else if (structure[u - 1] == '+') {
2038           /* found another gquad */
2039           pos = parse_gquad(structure + u - 1, &L, l);
2040           if (pos > 0) {
2041             E_gquad_ali_en(u, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
2042 
2043             if (verbosity_level > 0) {
2044               vrna_cstr_print_eval_gquad(output_stream,
2045                                          pos,
2046                                          L,
2047                                          l,
2048                                          (int)tmp_e / (int)n_seq);
2049             }
2050 
2051             tmp_e   = gq_en[0];
2052             energy  += tmp_e;
2053             up_mis  += pos;
2054             u       += pos;
2055             num_g++;
2056           }
2057         } else {
2058           /* we must have found a stem */
2059           num_elem++;
2060           elem_i  = u;
2061           elem_j  = pt[u];
2062           energy  += en_corr_of_loop_gquad_ali(vc,
2063                                                u,
2064                                                pt[u],
2065                                                structure,
2066                                                pt,
2067                                                loop_idx,
2068                                                output_stream,
2069                                                verbosity_level);
2070           u = pt[u] + 1;
2071         }
2072       }
2073 
2074       /* here, u == s */
2075       int e_minus, e_plus, e_temp;
2076 
2077       e_plus = e_minus = 0;
2078 
2079       /* we are done since we've found no other 3' structure element */
2080       switch (num_elem) {
2081         /* g-quad was misinterpreted as hairpin closed by (r,s) */
2082         case 0:
2083           e_minus = vrna_eval_hp_loop(vc, r, s);
2084 
2085           if (verbosity_level > 0) {
2086             vrna_cstr_print_eval_hp_loop_revert(output_stream,
2087                                                 r,
2088                                                 s,
2089                                                 sequence[r - 1],
2090                                                 sequence[s - 1],
2091                                                 (int)e_minus / (int)n_seq);
2092           }
2093 
2094           /* if we consider the G-Quadruplex, we have */
2095           if (num_g == 1) {
2096             /* a) an interior loop like structure */
2097             for (cnt = 0; cnt < n_seq; cnt++) {
2098               type = vrna_get_ptype_md(S[cnt][r], S[cnt][s], md);
2099 
2100               if (dangle_model == 2)
2101                 e_plus += P->mismatchI[type][S3[cnt][r]][S5[cnt][s]];
2102 
2103               if (type > 2)
2104                 e_plus += P->TerminalAU;
2105             }
2106 
2107             e_plus += n_seq * P->internal_loop[s - r - 1 - up_mis];
2108 
2109             if (verbosity_level > 0) {
2110               vrna_cstr_print_eval_int_loop(output_stream,
2111                                             r,
2112                                             s,
2113                                             sequence[r - 1],
2114                                             sequence[s - 1],
2115                                             p,
2116                                             q,
2117                                             sequence[p - 1],
2118                                             sequence[q - 1],
2119                                             (int)e_plus / (int)n_seq);
2120             }
2121           } else {
2122             /* or b) a multibranch loop like structure */
2123             for (cnt = 0; cnt < n_seq; cnt++) {
2124               type = vrna_get_ptype_md(S[cnt][s], S[cnt][r], md);
2125 
2126               e_plus += E_MLstem(type, S5[cnt][s], S3[cnt][r], P);
2127             }
2128 
2129             e_temp = num_g * E_MLstem(0, -1, -1, P) +
2130                      P->MLclosing +
2131                      (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase;
2132 
2133             e_plus += n_seq * e_temp;
2134 
2135             if (verbosity_level > 0) {
2136               vrna_cstr_print_eval_mb_loop(output_stream,
2137                                            r,
2138                                            s,
2139                                            sequence[r - 1],
2140                                            sequence[s - 1],
2141                                            (int)e_plus / (int)n_seq);
2142             }
2143           }
2144 
2145           energy += e_plus - e_minus;
2146           break;
2147 
2148         /* g-quad was misinterpreted as interior loop closed by (r,s) with enclosed pair (elem_i, elem_j) */
2149         case 1:
2150           e_minus = vrna_eval_int_loop(vc, r, s, elem_i, elem_j);
2151 
2152           for (cnt = 0; cnt < n_seq; cnt++) {
2153             type = vrna_get_ptype_md(S[cnt][s], S[cnt][r], md);
2154 
2155             e_plus += E_MLstem(type, S5[cnt][s], S3[cnt][r], P);
2156 
2157             type = vrna_get_ptype_md(S[cnt][elem_i], S[cnt][elem_j], md);
2158 
2159             e_plus += E_MLstem(type, S5[cnt][elem_i], S3[cnt][elem_j], P);
2160           }
2161 
2162           e_temp = num_g * E_MLstem(0, -1, -1, P) +
2163                    P->MLclosing +
2164                    (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase;
2165 
2166           e_plus += n_seq * e_temp;
2167 
2168           energy += e_plus - e_minus;
2169 
2170           if (verbosity_level > 0) {
2171             vrna_cstr_print_eval_int_loop_revert(output_stream,
2172                                                  r,
2173                                                  s,
2174                                                  sequence[r - 1],
2175                                                  sequence[j - 1],
2176                                                  elem_i,
2177                                                  elem_j,
2178                                                  sequence[elem_i - 1],
2179                                                  sequence[elem_j - 1],
2180                                                  (int)e_minus / (int)n_seq);
2181 
2182             vrna_cstr_print_eval_mb_loop(output_stream,
2183                                          r,
2184                                          s,
2185                                          sequence[r - 1],
2186                                          sequence[s - 1],
2187                                          (int)e_plus / (int)n_seq);
2188           }
2189 
2190           break;
2191         /* gquad was misinterpreted as unpaired nucleotides in a multiloop */
2192         default:
2193           e_minus = (up_mis) * P->MLbase * n_seq;
2194           e_plus  = num_g * E_MLstem(0, -1, -1, P) * n_seq;
2195           energy  += e_plus - e_minus;
2196 
2197           if (verbosity_level > 0) {
2198             vrna_cstr_print_eval_mb_loop_revert(output_stream,
2199                                                 r,
2200                                                 s,
2201                                                 sequence[r - 1],
2202                                                 sequence[s - 1],
2203                                                 (int)e_minus / (int)n_seq);
2204 
2205             vrna_cstr_print_eval_mb_loop(output_stream,
2206                                          r,
2207                                          s,
2208                                          sequence[r - 1],
2209                                          sequence[s - 1],
2210                                          (int)e_plus / (int)n_seq);
2211           }
2212 
2213           break;
2214       }
2215 
2216       q = s + 1;
2217     }
2218   }
2219 
2220   return energy;
2221 }
2222 
2223 
2224 PRIVATE int
covar_en_corr_of_loop_gquad(vrna_fold_compound_t * vc,int i,int j,const char * structure,const short * pt,const int * loop_idx)2225 covar_en_corr_of_loop_gquad(vrna_fold_compound_t  *vc,
2226                             int                   i,
2227                             int                   j,
2228                             const char            *structure,
2229                             const short           *pt,
2230                             const int             *loop_idx)
2231 {
2232   int           pos, en_covar, p, q, r, s, u, gq_en[2];
2233   int           num_elem, num_g, up_mis;
2234   int           L, l[3];
2235 
2236   short         **S   = vc->S;
2237   vrna_param_t  *P    = vc->params;
2238   int           n_seq = vc->n_seq;
2239 
2240   en_covar  = 0;
2241   q         = i;
2242   while ((pos = parse_gquad(structure + q - 1, &L, l)) > 0) {
2243     q += pos - 1;
2244     p = q - 4 * L - l[0] - l[1] - l[2] + 1;
2245     if (q > j)
2246       break;
2247 
2248     /* we've found the first g-quadruplex at position [p,q] */
2249     E_gquad_ali_en(p, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
2250     en_covar += gq_en[1];
2251     /* check if it's enclosed in a base pair */
2252     if (loop_idx[p] == 0) {
2253       q++;
2254       continue;                          /* g-quad in exterior loop */
2255     } else {
2256       /*  find its enclosing pair */
2257       num_elem  = 0;
2258       num_g     = 1;
2259       r         = p - 1;
2260       up_mis    = q - p + 1;
2261 
2262       /* seek for first pairing base located 5' of the g-quad */
2263       for (r = p - 1; !pt[r] && (r >= i); r--);
2264 
2265       if (r < pt[r]) {
2266         /* found the enclosing pair */
2267         s = pt[r];
2268       } else {
2269         num_elem++;
2270         r = pt[r] - 1;
2271         /* seek for next pairing base 5' of r */
2272         for (; !pt[r] && (r >= i); r--);
2273 
2274         if (r < pt[r]) {
2275           /* found the enclosing pair */
2276           s = pt[r];
2277         } else {
2278           /* hop over stems and unpaired nucleotides */
2279           while ((r > pt[r]) && (r >= i)) {
2280             if (pt[r]) {
2281               r = pt[r];
2282               num_elem++;
2283             }
2284 
2285             r--;
2286           }
2287 
2288           s = pt[r]; /* found the enclosing pair */
2289         }
2290       }
2291 
2292       /* now we have the enclosing pair (r,s) */
2293 
2294       u = q + 1;
2295       /* we know everything about the 5' part of this loop so check the 3' part */
2296       while (u < s) {
2297         if (structure[u - 1] == '.') {
2298           u++;
2299         } else if (structure[u - 1] == '+') {
2300           /* found another gquad */
2301           pos = parse_gquad(structure + u - 1, &L, l);
2302           if (pos > 0) {
2303             E_gquad_ali_en(u, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
2304             en_covar  += gq_en[1];
2305             up_mis    += pos;
2306             u         += pos;
2307             num_g++;
2308           }
2309         } else {
2310           /* we must have found a stem */
2311           num_elem++;
2312           en_covar += covar_en_corr_of_loop_gquad(vc,
2313                                                   u,
2314                                                   pt[u],
2315                                                   structure,
2316                                                   pt,
2317                                                   loop_idx);
2318           u = pt[u] + 1;
2319         }
2320       }
2321       /* we are done since we've found no other 3' structure element */
2322 
2323       q = s + 1;
2324     }
2325   }
2326   return en_covar;
2327 }
2328 
2329 
2330 PRIVATE int
stack_energy_covar_pt(vrna_fold_compound_t * vc,int i,const short * pt)2331 stack_energy_covar_pt(vrna_fold_compound_t  *vc,
2332                       int                   i,
2333                       const short           *pt)
2334 {
2335   /* calculate energy of substructure enclosed by (i,j) */
2336   int *indx   = vc->jindx;                      /* index for moving in the triangle matrices c[] and fMl[]*/
2337   int *pscore = vc->pscore;                     /* precomputed array of pair types */
2338 
2339   int energy = 0;
2340   int j, p, q;
2341 
2342   j = pt[i];
2343   p = i;
2344   q = j;
2345   while (p < q) {
2346     /* process all stacks and interior loops */
2347     while (pt[++p] == 0);
2348     while (pt[--q] == 0);
2349     if ((pt[q] != (short)p) || (p > q))
2350       break;
2351 
2352     energy  += pscore[indx[j] + i];
2353     i       = p;
2354     j       = q;
2355   }  /* end while */
2356 
2357   /* p,q don't pair must have found hairpin or multiloop */
2358 
2359   if (p > q) {
2360     /* hairpin case */
2361     energy += pscore[indx[j] + i];
2362     return energy;
2363   }
2364 
2365   /* (i,j) is exterior pair of multiloop */
2366   energy += pscore[indx[j] + i];
2367   while (p < j) {
2368     /* add up the contributions of the substructures of the ML */
2369     energy  += stack_energy_covar_pt(vc, p, pt);
2370     p       = pt[p];
2371     /* search for next base pair in multiloop */
2372     while (pt[++p] == 0);
2373   }
2374 
2375   return energy;
2376 }
2377