1 /** \file eval.c */
2
3
4 /*
5 * Free energy evaluation
6 *
7 * c Ivo Hofacker, Chrisoph Flamm
8 * original implementation by
9 * Walter Fontana
10 *
11 * ViennaRNA Package >= v2.0 by Ronny Lorenz
12 *
13 * Vienna RNA package
14 */
15
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <math.h>
23 #include <ctype.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <limits.h>
27
28 #include "ViennaRNA/utils/basic.h"
29 #include "ViennaRNA/utils/structures.h"
30 #include "ViennaRNA/params/default.h"
31 #include "ViennaRNA/model.h"
32 #include "ViennaRNA/fold_vars.h"
33 #include "ViennaRNA/params/basic.h"
34 #include "ViennaRNA/constraints/hard.h"
35 #include "ViennaRNA/constraints/soft.h"
36 #include "ViennaRNA/loops/all.h"
37 #include "ViennaRNA/gquad.h"
38 #include "ViennaRNA/cofold.h"
39 #include "ViennaRNA/alphabet.h"
40 #include "ViennaRNA/datastructures/char_stream.h"
41 #include "ViennaRNA/eval.h"
42
43 #include "ViennaRNA/color_output.inc"
44
45 /*
46 #################################
47 # GLOBAL VARIABLES #
48 #################################
49 */
50
51 /*
52 #################################
53 # PRIVATE VARIABLES #
54 #################################
55 */
56
57 /*
58 #################################
59 # PRIVATE FUNCTION DECLARATIONS #
60 #################################
61 */
62 PRIVATE int
63 stack_energy(vrna_fold_compound_t *vc,
64 int i,
65 const short *pt,
66 vrna_cstr_t output_stream,
67 int verbostiy_level);
68
69
70 PRIVATE int
71 energy_of_extLoop_pt(vrna_fold_compound_t *vc,
72 int i,
73 const short *pt);
74
75
76 PRIVATE int
77 energy_of_ml_pt(vrna_fold_compound_t *vc,
78 int i,
79 const short *pt);
80
81
82 PRIVATE int
83 cut_in_loop(int i,
84 const short *pt,
85 unsigned int *sn);
86
87
88 PRIVATE int
89 eval_pt(vrna_fold_compound_t *vc,
90 const short *pt,
91 vrna_cstr_t output_stream,
92 int verbosity_level);
93
94
95 PRIVATE int
96 eval_circ_pt(vrna_fold_compound_t *vc,
97 const short *pt,
98 vrna_cstr_t output_stream,
99 int verbosity_level);
100
101
102 PRIVATE int
103 en_corr_of_loop_gquad(vrna_fold_compound_t *vc,
104 int i,
105 int j,
106 const char *structure,
107 const short *pt,
108 vrna_cstr_t output_stream,
109 int verbosity_level);
110
111
112 PRIVATE float
113 wrap_eval_structure(vrna_fold_compound_t *vc,
114 const char *structure,
115 const short *pt,
116 vrna_cstr_t output_stream,
117 int verbosity);
118
119
120 /* consensus structure variants below */
121 PRIVATE int
122 covar_energy_of_struct_pt(vrna_fold_compound_t *vc,
123 const short *pt);
124
125
126 PRIVATE int
127 stack_energy_covar_pt(vrna_fold_compound_t *vc,
128 int i,
129 const short *ptable);
130
131
132 PRIVATE int
133 en_corr_of_loop_gquad_ali(vrna_fold_compound_t *vc,
134 int i,
135 int j,
136 const char *structure,
137 const short *pt,
138 const int *loop_idx,
139 vrna_cstr_t output_stream,
140 int verbosity_level);
141
142
143 PRIVATE int
144 covar_en_corr_of_loop_gquad(vrna_fold_compound_t *vc,
145 int i,
146 int j,
147 const char *structure,
148 const short *pt,
149 const int *loop_idx);
150
151
152 /*
153 #################################
154 # BEGIN OF FUNCTION DEFINITIONS #
155 #################################
156 */
157 PUBLIC float
vrna_eval_structure_v(vrna_fold_compound_t * vc,const char * structure,int verbosity_level,FILE * file)158 vrna_eval_structure_v(vrna_fold_compound_t *vc,
159 const char *structure,
160 int verbosity_level,
161 FILE *file)
162 {
163 if (strlen(structure) != vc->length) {
164 vrna_message_warning("vrna_eval_structure_*: "
165 "string and structure have unequal length (%d vs. %d)",
166 vc->length,
167 strlen(structure));
168 return (float)INF / 100.;
169 }
170
171 vrna_cstr_t output_stream = vrna_cstr(vc->length, (file) ? file : stdout);
172 short *pt = vrna_ptable(structure);
173 float en = wrap_eval_structure(vc,
174 structure,
175 pt,
176 output_stream,
177 verbosity_level);
178
179 vrna_cstr_fflush(output_stream);
180 vrna_cstr_free(output_stream);
181
182 free(pt);
183 return en;
184 }
185
186
187 PUBLIC float
vrna_eval_structure_cstr(vrna_fold_compound_t * vc,const char * structure,int verbosity_level,vrna_cstr_t output_stream)188 vrna_eval_structure_cstr(vrna_fold_compound_t *vc,
189 const char *structure,
190 int verbosity_level,
191 vrna_cstr_t output_stream)
192 {
193 if (strlen(structure) != vc->length) {
194 vrna_message_warning("vrna_eval_structure_*: "
195 "string and structure have unequal length (%d vs. %d)",
196 vc->length,
197 strlen(structure));
198 return (float)INF / 100.;
199 }
200
201 short *pt = vrna_ptable(structure);
202 float en = wrap_eval_structure(vc, structure, pt, output_stream, verbosity_level);
203
204 free(pt);
205 return en;
206 }
207
208
209 PUBLIC float
vrna_eval_covar_structure(vrna_fold_compound_t * vc,const char * structure)210 vrna_eval_covar_structure(vrna_fold_compound_t *vc,
211 const char *structure)
212 {
213 int res, gq, *loop_idx;
214 short *pt;
215
216 pt = vrna_ptable(structure);
217 res = 0;
218 gq = vc->params->model_details.gquad;
219 vc->params->model_details.gquad = 0;
220
221 if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
222 res = covar_energy_of_struct_pt(vc, pt);
223
224 vc->params->model_details.gquad = gq;
225 if (gq) {
226 loop_idx = vrna_loopidx_from_ptable(pt);
227 res -= covar_en_corr_of_loop_gquad(vc,
228 1,
229 vc->length,
230 structure,
231 pt,
232 (const int *)loop_idx);
233 free(loop_idx);
234 }
235 }
236
237 free(pt);
238 return (float)res / (100. * (float)vc->n_seq);
239 }
240
241
242 PUBLIC int
vrna_eval_structure_pt_v(vrna_fold_compound_t * vc,const short * pt,int verbosity_level,FILE * file)243 vrna_eval_structure_pt_v(vrna_fold_compound_t *vc,
244 const short *pt,
245 int verbosity_level,
246 FILE *file)
247 {
248 int e = INF;
249
250 if (pt && vc) {
251 if (pt[0] != (short)vc->length) {
252 vrna_message_warning("vrna_eval_structure_*: "
253 "string and structure have unequal length (%d vs. %d)",
254 vc->length,
255 pt[0]);
256 return INF;
257 }
258
259 vrna_cstr_t output_stream = vrna_cstr(vc->length, (file) ? file : stdout);
260 e = eval_pt(vc, pt, output_stream, verbosity_level);
261 vrna_cstr_fflush(output_stream);
262 vrna_cstr_free(output_stream);
263 }
264
265 return e;
266 }
267
268
269 PUBLIC int
vrna_eval_loop_pt_v(vrna_fold_compound_t * vc,int i,const short * pt,int verbosity_level)270 vrna_eval_loop_pt_v(vrna_fold_compound_t *vc,
271 int i,
272 const short *pt,
273 int verbosity_level)
274 {
275 /* compute energy of a single loop closed by base pair (i,j) */
276 unsigned int *sn, *so, *ss;
277 int j, type, p, q, energy;
278 short *s;
279 vrna_param_t *P;
280
281 energy = INF;
282
283 if (pt && vc) {
284 P = vc->params;
285 sn = vc->strand_number;
286 so = vc->strand_order;
287 ss = vc->strand_start;
288 s = vc->sequence_encoding2;
289
290 vrna_sc_prepare(vc, VRNA_OPTION_MFE);
291
292 if (i == 0) {
293 /* evaluate exterior loop */
294 energy = energy_of_extLoop_pt(vc, 0, pt);
295 return energy;
296 }
297
298 j = pt[i];
299 if (j < i) {
300 vrna_message_warning("vrna_eval_loop_pt*: "
301 "i = %d is unpaired in loop_energy()",
302 i);
303 return INF;
304 }
305
306 type = P->model_details.pair[s[i]][s[j]];
307 if (type == 0) {
308 type = 7;
309 if (verbosity_level > VRNA_VERBOSITY_QUIET) {
310 vrna_message_warning("bases %d and %d (%c%c) can't pair!",
311 i, j,
312 vrna_nucleotide_decode(s[i], &(P->model_details)),
313 vrna_nucleotide_decode(s[j], &(P->model_details)));
314 }
315 }
316
317 p = i;
318 q = j;
319
320
321 while (pt[++p] == 0);
322 while (pt[--q] == 0);
323 if (p > q) {
324 /* Hairpin */
325 energy = vrna_eval_hp_loop(vc, i, j);
326 } else if (pt[q] != (short)p) {
327 /* multi-loop */
328 int ii;
329 ii = cut_in_loop(i, (const short *)pt, sn);
330 energy =
331 (ii == 0) ? energy_of_ml_pt(vc, i, (const short *)pt) : energy_of_extLoop_pt(vc,
332 ii,
333 (const short *)pt);
334 } else {
335 /* found interior loop */
336 int type_2;
337 type_2 = P->model_details.pair[s[q]][s[p]];
338 if (type_2 == 0) {
339 type_2 = 7;
340 if (verbosity_level > VRNA_VERBOSITY_QUIET) {
341 vrna_message_warning("bases %d and %d (%c%c) can't pair!",
342 p, q,
343 vrna_nucleotide_decode(s[p], &(P->model_details)),
344 vrna_nucleotide_decode(s[q], &(P->model_details)));
345 }
346 }
347
348 energy = vrna_eval_int_loop(vc, i, j, p, q);
349 }
350 }
351
352 return energy;
353 }
354
355
356 PUBLIC int
vrna_eval_move_pt(vrna_fold_compound_t * vc,short * pt,int m1,int m2)357 vrna_eval_move_pt(vrna_fold_compound_t *vc,
358 short *pt,
359 int m1,
360 int m2)
361 {
362 /*compute change in energy given by move (m1,m2)*/
363 unsigned int *sn, *so, *ss;
364 int en_post, en_pre, i, j, k, l, len;
365 vrna_param_t *P;
366
367 len = vc->length;
368 sn = vc->strand_number;
369 so = vc->strand_order;
370 ss = vc->strand_start;
371 P = vc->params;
372
373 k = (m1 > 0) ? m1 : -m1;
374 l = (m2 > 0) ? m2 : -m2;
375 /* first find the enclosing pair i<k<l<j */
376 for (j = l + 1; j <= len; j++) {
377 if (pt[j] <= 0)
378 continue; /* unpaired */
379
380 if (pt[j] < k)
381 break; /* found it */
382
383 if (pt[j] > j) {
384 j = pt[j]; /* skip substructure */
385 } else {
386 vrna_message_warning("vrna_eval_move_pt: "
387 "illegal move or broken pair table in vrna_eval_move_pt()\n"
388 "%d %d %d %d ", m1, m2, j, pt[j]);
389 return INF;
390 }
391 }
392 i = (j <= len) ? pt[j] : 0;
393 en_pre = vrna_eval_loop_pt(vc, i, (const short *)pt);
394 en_post = 0;
395 if (m1 < 0) {
396 /*it's a delete move */
397 en_pre += vrna_eval_loop_pt(vc, k, (const short *)pt);
398 pt[k] = 0;
399 pt[l] = 0;
400 } else {
401 /* insert move */
402 pt[k] = l;
403 pt[l] = k;
404 en_post += vrna_eval_loop_pt(vc, k, (const short *)pt);
405 }
406
407 en_post += vrna_eval_loop_pt(vc, i, (const short *)pt);
408 /* restore pair table */
409 if (m1 < 0) {
410 pt[k] = l;
411 pt[l] = k;
412 } else {
413 pt[k] = 0;
414 pt[l] = 0;
415 }
416
417 /* Cofolding -- Check if move changes COFOLD-Penalty */
418 if (sn[k] != sn[l]) {
419 int p, c;
420 p = c = 0;
421 for (p = 1; p < ss[so[1]];) {
422 /* Count basepairs between two strands */
423 if (pt[p] != 0) {
424 if (sn[p] == sn[pt[p]]) /* Skip stuff */
425 p = pt[p];
426 else if (++c > 1)
427 break; /* Count a basepair, break if we have more than one */
428 }
429
430 p++;
431 }
432 if (m1 < 0 && c == 1) /* First and only inserted basepair */
433 return en_post - en_pre - P->DuplexInit;
434 else
435 if (c == 0) /* Must have been a delete move */
436 return en_post - en_pre + P->DuplexInit;
437 }
438
439 return en_post - en_pre;
440 }
441
442
443 /*
444 #################################
445 # STATIC helper functions below #
446 #################################
447 */
448 PRIVATE INLINE int
eval_ext_int_loop(vrna_fold_compound_t * vc,int i,int j,int p,int q)449 eval_ext_int_loop(vrna_fold_compound_t *vc,
450 int i,
451 int j,
452 int p,
453 int q)
454 {
455 unsigned char type, type_2;
456 short **SS, **S5, **S3, *S, si, sj, sp, sq;
457 unsigned int s, n_seq, **a2s;
458 int e, length;
459 vrna_param_t *P;
460 vrna_md_t *md;
461 vrna_sc_t *sc, **scs;
462
463 length = vc->length;
464 P = vc->params;
465 md = &(P->model_details);
466 S = vc->sequence_encoding;
467 e = INF;
468
469 switch (vc->type) {
470 case VRNA_FC_TYPE_SINGLE:
471 si = S[j + 1];
472 sj = S[i - 1];
473 sp = S[p - 1];
474 sq = S[q + 1];
475 type = vrna_get_ptype_md(S[j], S[i], md);
476 type_2 = vrna_get_ptype_md(S[q], S[p], md);
477 sc = vc->sc;
478
479 e = ubf_eval_ext_int_loop(i, j, p, q,
480 i - 1, j + 1, p - 1, q + 1,
481 si, sj, sp, sq,
482 type, type_2,
483 length,
484 P, sc);
485 break;
486
487 case VRNA_FC_TYPE_COMPARATIVE:
488 n_seq = vc->n_seq;
489 SS = vc->S;
490 S5 = vc->S5;
491 S3 = vc->S3;
492 a2s = vc->a2s;
493 n_seq = vc->n_seq;
494 scs = vc->scs;
495
496 for (e = 0, s = 0; s < n_seq; s++) {
497 type = vrna_get_ptype_md(SS[s][j], SS[s][i], md);
498 type_2 = vrna_get_ptype_md(SS[s][q], SS[s][p], md);
499
500 sc = (scs && scs[s]) ? scs[s] : NULL;
501
502 e += ubf_eval_ext_int_loop(a2s[s][i], a2s[s][j], a2s[s][p], a2s[s][q],
503 a2s[s][i - 1], a2s[s][j + 1], a2s[s][p - 1], a2s[s][q + 1],
504 S3[s][j], S5[s][i], S5[s][p], S3[s][q],
505 type, type_2,
506 a2s[s][length],
507 P, sc);
508 }
509
510 break;
511 }
512
513 return e;
514 }
515
516
517 PRIVATE float
wrap_eval_structure(vrna_fold_compound_t * vc,const char * structure,const short * pt,vrna_cstr_t output_stream,int verbosity)518 wrap_eval_structure(vrna_fold_compound_t *vc,
519 const char *structure,
520 const short *pt,
521 vrna_cstr_t output_stream,
522 int verbosity)
523 {
524 int res, gq, L, l[3];
525 float energy;
526
527 energy = (float)INF / 100.;
528 gq = vc->params->model_details.gquad;
529 vc->params->model_details.gquad = 0;
530
531 switch (vc->type) {
532 case VRNA_FC_TYPE_SINGLE:
533 if (vc->params->model_details.circ)
534 res = eval_circ_pt(vc, pt, output_stream, verbosity);
535 else
536 res = eval_pt(vc, pt, output_stream, verbosity);
537
538 vc->params->model_details.gquad = gq;
539
540 if (gq && (parse_gquad(structure, &L, l) > 0)) {
541 if (verbosity > 0)
542 vrna_cstr_print_eval_sd_corr(output_stream);
543
544 res += en_corr_of_loop_gquad(vc, 1, vc->length, structure, pt, output_stream, verbosity);
545 }
546
547 energy = (float)res / 100.;
548 break;
549
550 case VRNA_FC_TYPE_COMPARATIVE:
551 if (vc->params->model_details.circ)
552 res = eval_circ_pt(vc, pt, output_stream, verbosity);
553 else
554 res = eval_pt(vc, pt, output_stream, verbosity);
555
556 vc->params->model_details.gquad = gq;
557
558 if (gq && (parse_gquad(structure, &L, l) > 0)) {
559 if (verbosity > 0)
560 vrna_cstr_print_eval_sd_corr(output_stream);
561
562 int *loop_idx = vrna_loopidx_from_ptable(pt);
563 res += en_corr_of_loop_gquad_ali(vc,
564 1,
565 vc->length,
566 structure,
567 pt,
568 (const int *)loop_idx,
569 output_stream,
570 verbosity);
571 free(loop_idx);
572 }
573
574 energy = (float)res / (100. * (float)vc->n_seq);
575 break;
576
577 default: /* do nothing */
578 break;
579 }
580
581 return energy;
582 }
583
584
585 PRIVATE int
eval_pt(vrna_fold_compound_t * vc,const short * pt,vrna_cstr_t output_stream,int verbosity_level)586 eval_pt(vrna_fold_compound_t *vc,
587 const short *pt,
588 vrna_cstr_t output_stream,
589 int verbosity_level)
590 {
591 unsigned int *sn;
592 int i, length, energy;
593
594 length = vc->length;
595 sn = vc->strand_number;
596
597 if (vc->params->model_details.gquad)
598 vrna_message_warning("vrna_eval_*_pt: No gquadruplex support!\n"
599 "Ignoring potential gquads in structure!\n"
600 "Use e.g. vrna_eval_structure() instead!");
601
602 vrna_sc_prepare(vc, VRNA_OPTION_MFE);
603
604 energy = vc->params->model_details.backtrack_type == 'M' ?
605 energy_of_ml_pt(vc, 0, pt) :
606 energy_of_extLoop_pt(vc, 0, pt);
607
608 if (verbosity_level > 0) {
609 vrna_cstr_print_eval_ext_loop(output_stream,
610 (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
611 (int)energy / (int)vc->n_seq :
612 energy);
613 }
614
615 for (i = 1; i <= length; i++) {
616 if (pt[i] == 0)
617 continue;
618
619 energy += stack_energy(vc, i, pt, output_stream, verbosity_level);
620 i = pt[i];
621 }
622 for (i = 1; sn[i] != sn[length]; i++) {
623 if (sn[i] != sn[pt[i]]) {
624 energy += vc->params->DuplexInit;
625 break;
626 }
627 }
628
629 return energy;
630 }
631
632
633 PRIVATE int
eval_circ_pt(vrna_fold_compound_t * vc,const short * pt,vrna_cstr_t output_stream,int verbosity_level)634 eval_circ_pt(vrna_fold_compound_t *vc,
635 const short *pt,
636 vrna_cstr_t output_stream,
637 int verbosity_level)
638 {
639 unsigned int s, n_seq;
640 int i, j, length, energy, en0, degree;
641 unsigned int **a2s;
642 vrna_param_t *P;
643 vrna_sc_t *sc, **scs;
644
645 energy = 0;
646 en0 = 0;
647 degree = 0;
648 length = vc->length;
649 P = vc->params;
650 sc = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sc : NULL;
651 scs = (vc->type == VRNA_FC_TYPE_COMPARATIVE) ? vc->scs : NULL;
652
653 if (P->model_details.gquad)
654 vrna_message_warning("vrna_eval_*_pt: No gquadruplex support!\n"
655 "Ignoring potential gquads in structure!\n"
656 "Use e.g. vrna_eval_structure() instead!");
657
658 vrna_sc_prepare(vc, VRNA_OPTION_MFE);
659
660 /* evaluate all stems in exterior loop */
661 for (i = 1; i <= length; i++) {
662 if (pt[i] == 0)
663 continue;
664
665 degree++;
666 energy += stack_energy(vc, i, (const short *)pt, output_stream, verbosity_level);
667 i = pt[i];
668 }
669
670 /* find first stem */
671 for (i = 1; (i <= length) && (!pt[i]); i++);
672 j = pt[i];
673
674 /* evaluate exterior loop itself */
675 switch (degree) {
676 case 0: /* unstructured */
677 switch (vc->type) {
678 case VRNA_FC_TYPE_SINGLE:
679 if (sc)
680 if (sc->energy_up)
681 en0 += sc->energy_up[1][length];
682
683 break;
684
685 case VRNA_FC_TYPE_COMPARATIVE:
686 n_seq = vc->n_seq;
687 a2s = vc->a2s;
688 if (scs) {
689 for (s = 0; s < n_seq; s++)
690 if (scs[s] && scs[s]->energy_up)
691 en0 += scs[s]->energy_up[1][a2s[s][length]];
692 }
693
694 break;
695 }
696 break;
697 case 1: /* hairpin loop */
698 en0 = vrna_eval_ext_hp_loop(vc, i, j);
699 break;
700
701 case 2: /* interior loop */
702 {
703 int p, q;
704 /* seek to next pair */
705 for (p = j + 1; pt[p] == 0; p++);
706 q = pt[p];
707
708 en0 = eval_ext_int_loop(vc, i, j, p, q);
709 }
710 break;
711
712 default: /* multibranch loop */
713 en0 = energy_of_ml_pt(vc, 0, (const short *)pt);
714
715 if (vc->type == VRNA_FC_TYPE_SINGLE)
716 en0 -= E_MLstem(0, -1, -1, P); /* remove virtual closing pair */
717
718 break;
719 }
720
721 if (verbosity_level > 0) {
722 vrna_cstr_print_eval_ext_loop(output_stream,
723 (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
724 (int)en0 / (int)vc->n_seq :
725 en0);
726 }
727
728 energy += en0;
729
730 return energy;
731 }
732
733
734 /*---------------------------------------------------------------------------*/
735 /* returns a correction term that may be added to the energy retrieved
736 * from energy_of_struct_par() to correct misinterpreted loops. This
737 * correction is necessary since energy_of_struct_par() will forget
738 * about the existance of gquadruplexes and just treat them as unpaired
739 * regions.
740 *
741 * recursive variant
742 */
743 PRIVATE int
en_corr_of_loop_gquad(vrna_fold_compound_t * vc,int i,int j,const char * structure,const short * pt,vrna_cstr_t output_stream,int verbosity_level)744 en_corr_of_loop_gquad(vrna_fold_compound_t *vc,
745 int i,
746 int j,
747 const char *structure,
748 const short *pt,
749 vrna_cstr_t output_stream,
750 int verbosity_level)
751 {
752 char *sequence;
753 int pos, tmp_e, energy, p, q, r, s, u, type, type2, L, l[3], *rtype, *loop_idx;
754 int num_elem, num_g, elem_i, elem_j, up_mis;
755 short *s1, *s2;
756 vrna_param_t *P;
757 vrna_md_t *md;
758
759 sequence = vc->sequence;
760 loop_idx = vrna_loopidx_from_ptable(pt);
761 s1 = vc->sequence_encoding;
762 s2 = vc->sequence_encoding2;
763 P = vc->params;
764 md = &(P->model_details);
765 rtype = &(md->rtype[0]);
766
767 energy = 0;
768 q = i;
769
770 while ((pos = parse_gquad(structure + q - 1, &L, l)) > 0) {
771 q += pos - 1;
772 p = q - 4 * L - l[0] - l[1] - l[2] + 1;
773 if (q > j)
774 break;
775
776 /* we've found the first g-quadruplex at position [p,q] */
777 tmp_e = E_gquad(L, l, P);
778 energy += tmp_e;
779 if (verbosity_level > 0)
780 vrna_cstr_print_eval_gquad(output_stream, p, L, l, tmp_e);
781
782 /* check if it's enclosed in a base pair */
783 if (loop_idx[p] == 0) {
784 q++;
785 continue; /* g-quad in exterior loop */
786 } else {
787 /* find its enclosing pair */
788 num_elem = 0;
789 num_g = 1;
790 r = p - 1;
791 up_mis = q - p + 1;
792
793 /* seek for first pairing base located 5' of the g-quad */
794 for (r = p - 1; !pt[r] && (r >= i); r--);
795
796 if (r < pt[r]) {
797 /* found the enclosing pair */
798 s = pt[r];
799 } else {
800 num_elem++;
801 elem_i = pt[r];
802 elem_j = r;
803 r = pt[r] - 1;
804 /* seek for next pairing base 5' of r */
805 for (; !pt[r] && (r >= i); r--);
806
807 if (r < pt[r]) {
808 /* found the enclosing pair */
809 s = pt[r];
810 } else {
811 /* hop over stems and unpaired nucleotides */
812 while ((r > pt[r]) && (r >= i)) {
813 if (pt[r]) {
814 r = pt[r];
815 num_elem++;
816 }
817
818 r--;
819 }
820
821 s = pt[r]; /* found the enclosing pair */
822 }
823 }
824
825 /* now we have the enclosing pair (r,s) */
826
827 u = q + 1;
828 /* we know everything about the 5' part of this loop so check the 3' part */
829 while (u < s) {
830 if (structure[u - 1] == '.') {
831 u++;
832 } else if (structure[u - 1] == '+') {
833 /* found another gquad */
834 pos = parse_gquad(structure + u - 1, &L, l);
835 if (pos > 0) {
836 tmp_e = E_gquad(L, l, P);
837
838 if (verbosity_level > 0)
839 vrna_cstr_print_eval_gquad(output_stream, pos, L, l, tmp_e);
840
841 energy += tmp_e;
842 up_mis += pos;
843 u += pos;
844 num_g++;
845 }
846 } else {
847 /* we must have found a stem */
848 num_elem++;
849 elem_i = u;
850 elem_j = pt[u];
851 energy += en_corr_of_loop_gquad(vc,
852 u,
853 pt[u],
854 structure,
855 pt,
856 output_stream,
857 verbosity_level);
858 u = pt[u] + 1;
859 }
860 }
861
862 /* here, u == s */
863 int e_minus, e_plus;
864
865 e_plus = e_minus = 0;
866
867 /* we are done since we've found no other 3' structure element */
868 switch (num_elem) {
869 /* g-quad was misinterpreted as hairpin closed by (r,s) */
870 case 0:
871 e_minus = vrna_eval_hp_loop(vc, r, s);
872 if (verbosity_level > 0) {
873 vrna_cstr_print_eval_hp_loop_revert(output_stream,
874 r,
875 s,
876 sequence[r - 1],
877 sequence[s - 1],
878 e_minus);
879 }
880
881 type = md->pair[s2[r]][s2[s]];
882
883 /* if we consider the G-Quadruplex, we have */
884 if (num_g == 1) {
885 /* a) an interior loop like structure */
886 if (dangles == 2)
887 e_plus += P->mismatchI[type][s1[r + 1]][s1[s - 1]];
888
889 if (type > 2)
890 e_plus += P->TerminalAU;
891
892 e_plus += P->internal_loop[s - r - 1 - up_mis];
893 if (verbosity_level > 0) {
894 vrna_cstr_print_eval_int_loop(output_stream,
895 r,
896 s,
897 sequence[r - 1],
898 sequence[s - 1],
899 p,
900 q,
901 sequence[p - 1],
902 sequence[q - 1],
903 e_plus);
904 }
905 } else {
906 /* or b) a multibranch loop like structure */
907 e_plus = P->MLclosing
908 + E_MLstem(rtype[type], s1[s - 1], s1[r + 1], P)
909 + num_g * E_MLstem(0, -1, -1, P)
910 + (s - r - 1 - up_mis) * P->MLbase;
911
912 if (verbosity_level > 0) {
913 vrna_cstr_print_eval_mb_loop(output_stream,
914 r,
915 s,
916 sequence[r - 1],
917 sequence[s - 1],
918 e_plus);
919 }
920 }
921
922 energy += e_plus - e_minus;
923 break;
924
925 /* g-quad was misinterpreted as interior loop closed by (r,s) with enclosed pair (elem_i, elem_j) */
926 case 1:
927 type = md->pair[s2[r]][s2[s]];
928 type2 = md->pair[s2[elem_i]][s2[elem_j]];
929 e_plus = P->MLclosing
930 + E_MLstem(rtype[type], s1[s - 1], s1[r + 1], P)
931 + (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase
932 + E_MLstem(type2, s1[elem_i - 1], s1[elem_j + 1], P);
933
934 e_plus += num_g * E_MLstem(0, -1, -1, P);
935
936 e_minus = vrna_eval_int_loop(vc, r, s, elem_i, elem_j);
937
938 energy += e_plus - e_minus;
939
940 if (verbosity_level > 0) {
941 vrna_cstr_print_eval_int_loop_revert(output_stream,
942 r,
943 s,
944 sequence[r - 1],
945 sequence[j - 1],
946 elem_i,
947 elem_j,
948 sequence[elem_i - 1],
949 sequence[elem_j - 1],
950 e_minus);
951 vrna_cstr_print_eval_mb_loop(output_stream,
952 r,
953 s,
954 sequence[r - 1],
955 sequence[s - 1],
956 e_plus);
957 }
958
959 break;
960
961 /* gquad was misinterpreted as unpaired nucleotides in a multiloop */
962 default:
963 e_minus = (up_mis) * P->MLbase;
964 e_plus = num_g * E_MLstem(0, -1, -1, P);
965 energy += e_plus - e_minus;
966
967 if (verbosity_level > 0) {
968 vrna_cstr_print_eval_mb_loop_revert(output_stream,
969 r,
970 s,
971 sequence[r - 1],
972 sequence[s - 1],
973 e_minus);
974 vrna_cstr_print_eval_mb_loop(output_stream,
975 r,
976 s,
977 sequence[r - 1],
978 sequence[s - 1],
979 e_plus);
980 }
981
982 break;
983 }
984
985 q = s + 1;
986 }
987 }
988
989 free(loop_idx);
990 return energy;
991 }
992
993
994 PRIVATE int
stack_energy(vrna_fold_compound_t * vc,int i,const short * pt,vrna_cstr_t output_stream,int verbosity_level)995 stack_energy(vrna_fold_compound_t *vc,
996 int i,
997 const short *pt,
998 vrna_cstr_t output_stream,
999 int verbosity_level)
1000 {
1001 /* recursively calculate energy of substructure enclosed by (i,j) */
1002 unsigned int *sn, *so, *ss;
1003 int ee, energy, j, p, q;
1004 char *string;
1005 short *s;
1006 vrna_param_t *P;
1007 vrna_md_t *md;
1008
1009 sn = vc->strand_number;
1010 so = vc->strand_order;
1011 ss = vc->strand_start;
1012 s = vc->sequence_encoding2;
1013 P = vc->params;
1014 md = &(P->model_details);
1015 energy = 0;
1016
1017 j = pt[i];
1018
1019 if (vc->type == VRNA_FC_TYPE_SINGLE) {
1020 string = vc->sequence;
1021 if (md->pair[s[i]][s[j]] == 0) {
1022 if (verbosity_level > VRNA_VERBOSITY_QUIET) {
1023 vrna_message_warning("bases %d and %d (%c%c) can't pair!",
1024 i, j,
1025 string[i - 1],
1026 string[j - 1]);
1027 }
1028 }
1029 } else if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
1030 string = vc->cons_seq;
1031 } else {
1032 return INF;
1033 }
1034
1035 p = i;
1036 q = j;
1037
1038 while (p < q) {
1039 /* process all stacks and interior loops */
1040 while (pt[++p] == 0);
1041 while (pt[--q] == 0);
1042 if ((pt[q] != (short)p) || (p > q))
1043 break;
1044
1045 ee = 0;
1046 if (vc->type == VRNA_FC_TYPE_SINGLE) {
1047 if (md->pair[s[q]][s[p]] == 0) {
1048 if (verbosity_level > VRNA_VERBOSITY_QUIET) {
1049 vrna_message_warning("bases %d and %d (%c%c) can't pair!",
1050 p, q,
1051 string[p - 1],
1052 string[q - 1]);
1053 }
1054 }
1055 }
1056
1057 ee = vrna_eval_int_loop(vc, i, j, p, q);
1058
1059 if (verbosity_level > 0) {
1060 vrna_cstr_print_eval_int_loop(output_stream,
1061 i, j,
1062 string[i - 1], string[j - 1],
1063 p, q,
1064 string[p - 1], string[q - 1],
1065 (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
1066 (int)ee / (int)vc->n_seq :
1067 ee);
1068 }
1069
1070 energy += ee;
1071 i = p;
1072 j = q;
1073 } /* end while */
1074
1075 /* p,q don't pair must have found hairpin or multiloop */
1076
1077 if (p > q) {
1078 /* hairpin */
1079 ee = vrna_eval_hp_loop(vc, i, j);
1080 energy += ee;
1081
1082 if (verbosity_level > 0) {
1083 vrna_cstr_print_eval_hp_loop(output_stream,
1084 i, j,
1085 string[i - 1], string[j - 1],
1086 (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
1087 (int)ee / (int)vc->n_seq :
1088 ee);
1089 }
1090
1091 return energy;
1092 }
1093
1094 /* (i,j) is exterior pair of multiloop */
1095 while (p < j) {
1096 /* add up the contributions of the substructures of the ML */
1097 energy += stack_energy(vc, p, pt, output_stream, verbosity_level);
1098 p = pt[p];
1099 /* search for next base pair in multiloop */
1100 while (pt[++p] == 0);
1101 }
1102
1103 ee = 0;
1104
1105 switch (vc->type) {
1106 case VRNA_FC_TYPE_SINGLE:
1107 {
1108 int ii = cut_in_loop(i, pt, sn);
1109 ee = (ii == 0) ? energy_of_ml_pt(vc, i, pt) : energy_of_extLoop_pt(vc, ii, pt);
1110 }
1111 break;
1112
1113 case VRNA_FC_TYPE_COMPARATIVE:
1114 ee = energy_of_ml_pt(vc, i, pt);
1115 break;
1116 }
1117
1118 energy += ee;
1119 if (verbosity_level > 0) {
1120 vrna_cstr_print_eval_mb_loop(output_stream,
1121 i, j,
1122 string[i - 1], string[j - 1],
1123 (vc->type == VRNA_FC_TYPE_COMPARATIVE) ?
1124 (int)ee / (int)vc->n_seq :
1125 ee);
1126 }
1127
1128 return energy;
1129 }
1130
1131
1132 /*---------------------------------------------------------------------------*/
1133
1134
1135 /**
1136 *** Calculate the energy contribution of
1137 *** stabilizing dangling-ends/mismatches
1138 *** for all stems branching off the exterior
1139 *** loop
1140 **/
1141 PRIVATE int
energy_of_extLoop_pt(vrna_fold_compound_t * vc,int i,const short * pt)1142 energy_of_extLoop_pt(vrna_fold_compound_t *vc,
1143 int i,
1144 const short *pt)
1145 {
1146 unsigned int *sn;
1147 int energy, mm5, mm3, bonus, p, q, q_prev, length, dangle_model, n_seq, ss, u,
1148 start;
1149 short *s, *s1, **S, **S5, **S3;
1150 unsigned int **a2s;
1151 vrna_param_t *P;
1152 vrna_md_t *md;
1153 vrna_sc_t *sc, **scs;
1154
1155
1156 /* helper variables for dangles == 1 case */
1157 int E3_available; /* energy of 5' part where 5' mismatch is available for current stem */
1158 int E3_occupied; /* energy of 5' part where 5' mismatch is unavailable for current stem */
1159
1160
1161 /* initialize vars */
1162 length = vc->length;
1163 sn = vc->strand_number;
1164 P = vc->params;
1165 md = &(P->model_details);
1166 dangle_model = md->dangles;
1167 s = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : NULL;
1168 s1 = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding : NULL;
1169 sc = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sc : NULL;
1170 S = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S;
1171 S5 = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S5;
1172 S3 = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S3;
1173 a2s = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->a2s;
1174 n_seq = (vc->type == VRNA_FC_TYPE_SINGLE) ? 1 : vc->n_seq;
1175 scs = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->scs;
1176
1177 energy = 0;
1178 bonus = 0;
1179 p = start = (i == 0) ? 1 : i;
1180 q_prev = -1;
1181
1182 if (dangle_model % 2 == 1) {
1183 E3_available = INF;
1184 E3_occupied = 0;
1185 }
1186
1187 /* seek to opening base of first stem */
1188 while (p <= length && !pt[p])
1189 p++;
1190
1191 switch (vc->type) {
1192 case VRNA_FC_TYPE_SINGLE:
1193
1194 /* add soft constraints for first unpaired nucleotides */
1195 if (sc) {
1196 if (sc->energy_up)
1197 bonus += sc->energy_up[start][p - start];
1198
1199 /* how do we handle generalized soft constraints here ? */
1200 }
1201
1202 break;
1203
1204 case VRNA_FC_TYPE_COMPARATIVE:
1205
1206 /* add soft constraints for first unpaired nucleotides */
1207 if (scs) {
1208 for (ss = 0; ss < n_seq; ss++) {
1209 if (scs[ss]) {
1210 if (scs[ss]->energy_up) {
1211 u = a2s[ss][p] - a2s[ss][start];
1212 bonus += scs[ss]->energy_up[a2s[ss][start]][u];
1213 }
1214
1215 /* how do we handle generalized soft constraints here ? */
1216 }
1217 }
1218 }
1219
1220 break;
1221
1222 default:
1223 return INF;
1224 break;
1225 }
1226
1227 while (p < length) {
1228 int tt;
1229 /* p must have a pairing partner */
1230 q = (int)pt[p];
1231
1232 switch (vc->type) {
1233 case VRNA_FC_TYPE_SINGLE: /* get type of base pair (p,q) */
1234 tt = vrna_get_ptype_md(s[p], s[q], md);
1235
1236 switch (dangle_model) {
1237 /* no dangles */
1238 case 0:
1239 energy += vrna_E_ext_stem(tt, -1, -1, P);
1240 break;
1241
1242 /* the beloved double dangles */
1243 case 2:
1244 mm5 = ((sn[p - 1] == sn[p]) && (p > 1)) ? s1[p - 1] : -1;
1245 mm3 = ((sn[q] == sn[q + 1]) && (q < length)) ? s1[q + 1] : -1;
1246 energy += vrna_E_ext_stem(tt, mm5, mm3, P);
1247 break;
1248
1249 default:
1250 {
1251 int tmp;
1252 if (q_prev + 2 < p) {
1253 E3_available = MIN2(E3_available, E3_occupied);
1254 E3_occupied = E3_available;
1255 }
1256
1257 mm5 = ((sn[p - 1] == sn[p]) && (p > 1) && !pt[p - 1]) ? s1[p - 1] : -1;
1258 mm3 = ((sn[q] == sn[q + 1]) && (q < length) && !pt[q + 1]) ? s1[q + 1] : -1;
1259 tmp = MIN2(
1260 E3_occupied + vrna_E_ext_stem(tt, -1, mm3, P),
1261 E3_available + vrna_E_ext_stem(tt, mm5, mm3, P)
1262 );
1263 E3_available = MIN2(
1264 E3_occupied + vrna_E_ext_stem(tt, -1, -1, P),
1265 E3_available + vrna_E_ext_stem(tt, mm5, -1, P)
1266 );
1267 E3_occupied = tmp;
1268 }
1269 break;
1270 } /* end switch dangle_model */
1271 break;
1272
1273 case VRNA_FC_TYPE_COMPARATIVE:
1274 for (ss = 0; ss < n_seq; ss++) {
1275 /* get type of base pair (p,q) */
1276 tt = vrna_get_ptype_md(S[ss][p], S[ss][q], md);
1277
1278 switch (dangle_model) {
1279 case 0:
1280 energy += vrna_E_ext_stem(tt, -1, -1, P);
1281 break;
1282
1283 case 2:
1284 mm5 = (a2s[ss][p] > 1) ? S5[ss][p] : -1;
1285 mm3 = (a2s[ss][q] < a2s[ss][length]) ? S3[ss][q] : -1;
1286 energy += vrna_E_ext_stem(tt, mm5, mm3, P);
1287 break;
1288
1289 default:
1290 break; /* odd dangles not implemented yet */
1291 }
1292 }
1293 break;
1294
1295 default:
1296 break; /* this should never happen */
1297 }
1298
1299 /* seek to the next stem */
1300 p = q + 1;
1301 q_prev = q;
1302 while (p <= length && !pt[p])
1303 p++;
1304
1305 switch (vc->type) {
1306 case VRNA_FC_TYPE_SINGLE: /* add soft constraints for unpaired region */
1307 if (sc && (q_prev + 1 <= length)) {
1308 if (sc->energy_up)
1309 bonus += sc->energy_up[q_prev + 1][p - q_prev - 1];
1310
1311 /* how do we handle generalized soft constraints here ? */
1312 }
1313
1314 break;
1315
1316 case VRNA_FC_TYPE_COMPARATIVE:
1317 if (scs) {
1318 for (ss = 0; ss < n_seq; ss++) {
1319 if (scs[ss]) {
1320 if (scs[ss]->energy_up) {
1321 u = a2s[ss][p] - a2s[ss][q_prev + 1];
1322 bonus += scs[ss]->energy_up[a2s[ss][q_prev + 1]][u];
1323 }
1324 }
1325 }
1326 }
1327
1328 break;
1329
1330 default:
1331 break; /* this should never happen */
1332 }
1333
1334 if (p == i)
1335 break; /* cut was in loop */
1336 }
1337
1338 if (dangle_model % 2 == 1)
1339 energy = MIN2(E3_occupied, E3_available);
1340
1341 return energy + bonus;
1342 }
1343
1344
1345 /**
1346 *** i is the 5'-base of the closing pair
1347 ***
1348 *** since each helix can coaxially stack with at most one of its
1349 *** neighbors we need an auxiliarry variable cx_energy
1350 *** which contains the best energy given that the last two pairs stack.
1351 *** energy holds the best energy given the previous two pairs do not
1352 *** stack (i.e. the two current helices may stack)
1353 *** We don't allow the last helix to stack with the first, thus we have to
1354 *** walk around the Loop twice with two starting points and take the minimum
1355 ***/
1356 PRIVATE int
energy_of_ml_pt(vrna_fold_compound_t * vc,int i,const short * pt)1357 energy_of_ml_pt(vrna_fold_compound_t *vc,
1358 int i,
1359 const short *pt)
1360 {
1361 unsigned int *sn;
1362 int energy, cx_energy, tmp, tmp2, best_energy = INF, bonus, *idx, dangle_model,
1363 logML, circular, *rtype, ss, n, n_seq;
1364 int i1, j, p, q, q_prev, q_prev2, u, uu, x, type, count, mm5, mm3, tt, ld5, new_cx,
1365 dang5, dang3, dang;
1366 int e_stem, e_stem5, e_stem3, e_stem53;
1367 int mlintern[NBPAIRS + 1];
1368 short *s, *s1, **S, **S5, **S3;
1369 unsigned int **a2s;
1370 vrna_param_t *P;
1371 vrna_md_t *md;
1372 vrna_sc_t *sc, **scs;
1373
1374 /* helper variables for dangles == 1|5 case */
1375 int E_mm5_available; /* energy of 5' part where 5' mismatch of current stem is available */
1376 int E_mm5_occupied; /* energy of 5' part where 5' mismatch of current stem is unavailable */
1377 int E2_mm5_available; /* energy of 5' part where 5' mismatch of current stem is available with possible 3' dangle for enclosing pair (i,j) */
1378 int E2_mm5_occupied; /* energy of 5' part where 5' mismatch of current stem is unavailable with possible 3' dangle for enclosing pair (i,j) */
1379
1380 n = vc->length;
1381 sn = vc->strand_number;
1382 P = vc->params;
1383 md = &(P->model_details);
1384 idx = vc->jindx;
1385
1386 circular = md->circ;
1387 dangle_model = md->dangles;
1388 logML = md->logML;
1389 rtype = &(md->rtype[0]);
1390 s = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding2 : NULL;
1391 s1 = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sequence_encoding : NULL;
1392 sc = (vc->type == VRNA_FC_TYPE_SINGLE) ? vc->sc : NULL;
1393 S = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S;
1394 S5 = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S5;
1395 S3 = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->S3;
1396 a2s = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->a2s;
1397 n_seq = (vc->type == VRNA_FC_TYPE_SINGLE) ? 1 : vc->n_seq;
1398 scs = (vc->type == VRNA_FC_TYPE_SINGLE) ? NULL : vc->scs;
1399
1400 bonus = 0;
1401
1402 if (i >= pt[i]) {
1403 vrna_message_warning("energy_of_ml_pt: i is not 5' base of a closing pair!");
1404 return INF;
1405 }
1406
1407 j = (i == 0) ? n + 1 : (int)pt[i];
1408
1409 switch (vc->type) {
1410 case VRNA_FC_TYPE_SINGLE:
1411
1412 if (i != 0) {
1413 /* (i,j) is closing pair of multibranch loop, add soft constraints */
1414 if (sc)
1415 if (sc->energy_bp)
1416 bonus += sc->energy_bp[idx[j] + i];
1417 }
1418
1419 break;
1420
1421 case VRNA_FC_TYPE_COMPARATIVE:
1422
1423 if ((dangle_model % 2) || (dangle_model > 2) || (dangle_model < 0)) {
1424 vrna_message_warning(
1425 "consensus structure evaluation for odd dangle models not implemented (yet)!");
1426 return INF;
1427 }
1428
1429 if (i != 0) {
1430 /* (i,j) is closing pair of multibranch loop, add soft constraints */
1431 if (scs) {
1432 for (ss = 0; ss < n_seq; ss++)
1433 if (scs[ss] && scs[ss]->energy_bp)
1434 bonus += scs[ss]->energy_bp[idx[j] + i];
1435 }
1436 }
1437
1438 break;
1439
1440 default:
1441 return INF;
1442 break;
1443 }
1444
1445 /* init the variables */
1446 energy = 0;
1447 u = 0; /* the total number of unpaired nucleotides */
1448 p = i + 1;
1449 q_prev = i - 1;
1450 q_prev2 = i;
1451
1452
1453 for (x = 0; x <= NBPAIRS; x++)
1454 mlintern[x] = P->MLintern[x];
1455
1456 /* seek to opening base of first stem */
1457 while (p <= j && !pt[p])
1458 p++;
1459
1460 /* add bonus energies for first stretch of unpaired nucleotides */
1461 switch (vc->type) {
1462 case VRNA_FC_TYPE_SINGLE:
1463 u += p - i - 1;
1464 if (sc)
1465 if (sc->energy_up)
1466 bonus += sc->energy_up[i + 1][u];
1467
1468 break;
1469
1470 case VRNA_FC_TYPE_COMPARATIVE:
1471 if (scs) {
1472 for (ss = 0; ss < n_seq; ss++) {
1473 uu = a2s[ss][p] - a2s[ss][i + 1];
1474 if (scs[ss] && scs[ss]->energy_up)
1475 bonus += scs[ss]->energy_up[a2s[ss][i + 1]][uu];
1476
1477 u += uu;
1478 }
1479 } else {
1480 for (ss = 0; ss < n_seq; ss++)
1481 u += a2s[ss][p] - a2s[ss][i + 1];
1482 }
1483
1484 break;
1485
1486 default:
1487 break; /* this should never happen */
1488 }
1489
1490 switch (dangle_model) {
1491 case 0:
1492 switch (vc->type) {
1493 case VRNA_FC_TYPE_SINGLE:
1494 while (p < j) {
1495 /* p must have a pairing partner */
1496 q = (int)pt[p];
1497 /* get type of base pair (p,q) */
1498 tt = vrna_get_ptype_md(s[p], s[q], md);
1499
1500 energy += E_MLstem(tt, -1, -1, P);
1501
1502 /* seek to the next stem */
1503 p = q + 1;
1504 q_prev = q_prev2 = q;
1505 while (p < j && !pt[p])
1506 p++;
1507 u += p - q - 1; /* add unpaired nucleotides */
1508
1509 if (sc)
1510 if (sc->energy_up)
1511 bonus += sc->energy_up[q + 1][p - q - 1];
1512 }
1513
1514 /* now lets get the energy of the enclosing stem */
1515 if (i > 0) {
1516 /* actual closing pair */
1517 tt = vrna_get_ptype_md(s[j], s[i], md);
1518
1519 energy += E_MLstem(tt, -1, -1, P);
1520 } else {
1521 /* virtual closing pair */
1522 energy += E_MLstem(0, -1, -1, P);
1523 }
1524
1525 break;
1526
1527 case VRNA_FC_TYPE_COMPARATIVE:
1528 while (p < j) {
1529 /* p must have a pairing partner */
1530 q = (int)pt[p];
1531 for (ss = 0; ss < n_seq; ss++) {
1532 /* get type of base pair (p,q) */
1533 tt = vrna_get_ptype_md(S[ss][p], S[ss][q], md);
1534
1535 energy += E_MLstem(tt, -1, -1, P);
1536 }
1537
1538 /* seek to the next stem */
1539 p = q + 1;
1540 q_prev = q_prev2 = q;
1541 while (p < j && !pt[p])
1542 p++;
1543
1544 /* add unpaired nucleotides and possible soft constraints */
1545 if (scs) {
1546 for (ss = 0; ss < n_seq; ss++) {
1547 uu = a2s[ss][p] - a2s[ss][q + 1];
1548 if (scs[ss] && scs[ss]->energy_up)
1549 bonus += sc->energy_up[a2s[ss][q + 1]][uu];
1550
1551 u += uu;
1552 }
1553 } else {
1554 for (ss = 0; ss < n_seq; ss++)
1555 u += a2s[ss][p] - a2s[ss][q + 1];
1556 }
1557 }
1558
1559 /* now lets get the energy of the enclosing stem */
1560 if (i > 0) {
1561 /* actual closing pair */
1562 for (ss = 0; ss < n_seq; ss++) {
1563 tt = vrna_get_ptype_md(S[ss][j], S[ss][i], md);
1564
1565 energy += E_MLstem(tt, -1, -1, P);
1566 }
1567 }
1568
1569 break;
1570
1571 default:
1572 break; /* this should never happen */
1573 }
1574 break;
1575
1576 case 2:
1577 switch (vc->type) {
1578 case VRNA_FC_TYPE_SINGLE:
1579 while (p < j) {
1580 /* p must have a pairing partner */
1581 q = (int)pt[p];
1582 /* get type of base pair (p,q) */
1583 tt = vrna_get_ptype_md(s[p], s[q], md);
1584
1585 mm5 = sn[p - 1] == sn[p] ? s1[p - 1] : -1;
1586 mm3 = sn[q] == sn[q + 1] ? s1[q + 1] : -1;
1587 energy += E_MLstem(tt, mm5, mm3, P);
1588
1589 /* seek to the next stem */
1590 p = q + 1;
1591 q_prev = q_prev2 = q;
1592 while (p < j && !pt[p])
1593 p++;
1594 u += p - q - 1; /* add unpaired nucleotides */
1595
1596 if (sc)
1597 if (sc->energy_up)
1598 bonus += sc->energy_up[q + 1][p - q - 1];
1599 }
1600 if (i > 0) {
1601 /* actual closing pair */
1602 tt = vrna_get_ptype_md(s[j], s[i], md);
1603
1604 mm5 = sn[j - 1] == sn[j] ? s1[j - 1] : -1;
1605 mm3 = sn[i] == sn[i + 1] ? s1[i + 1] : -1;
1606 energy += E_MLstem(tt, mm5, mm3, P);
1607 } else {
1608 /* virtual closing pair */
1609 energy += E_MLstem(0, -1, -1, P);
1610 }
1611
1612 break;
1613
1614 case VRNA_FC_TYPE_COMPARATIVE:
1615 while (p < j) {
1616 /* p must have a pairing partner */
1617 q = (int)pt[p];
1618
1619 for (ss = 0; ss < n_seq; ss++) {
1620 /* get type of base pair (p,q) */
1621 tt = vrna_get_ptype_md(S[ss][p], S[ss][q], md);
1622
1623 mm5 = ((a2s[ss][p] > 1) || circular) ? S5[ss][p] : -1;
1624 mm3 = ((a2s[ss][q] < a2s[ss][n]) || circular) ? S3[ss][q] : -1;
1625 energy += E_MLstem(tt, mm5, mm3, P);
1626 }
1627
1628 /* seek to the next stem */
1629 p = q + 1;
1630 q_prev = q_prev2 = q;
1631 while (p < j && !pt[p])
1632 p++;
1633
1634 /* add unpaired nucleotides and possible soft constraints */
1635 if (scs) {
1636 for (ss = 0; ss < n_seq; ss++) {
1637 uu = a2s[ss][p] - a2s[ss][q + 1];
1638 if (scs[ss] && scs[ss]->energy_up)
1639 bonus += sc->energy_up[a2s[ss][q + 1]][uu];
1640
1641 u += uu;
1642 }
1643 } else {
1644 for (ss = 0; ss < n_seq; ss++)
1645 u += a2s[ss][p] - a2s[ss][q + 1];
1646 }
1647 }
1648
1649 if (i > 0) {
1650 /* actual closing pair */
1651 for (ss = 0; ss < n_seq; ss++) {
1652 tt = vrna_get_ptype_md(S[ss][j], S[ss][i], md);
1653
1654 mm5 = S5[ss][j];
1655 mm3 = S3[ss][i];
1656 energy += E_MLstem(tt, mm5, mm3, P);
1657 }
1658 }
1659
1660 break;
1661
1662 default:
1663 break; /* this should never happen */
1664 }
1665 break;
1666
1667 case 3: /* we treat helix stacking different */
1668 for (count = 0; count < 2; count++) {
1669 /* do it twice */
1670 ld5 = 0; /* 5' dangle energy on prev pair (type) */
1671 if (i == 0) {
1672 j = (unsigned int)pt[0] + 1;
1673 type = 0; /* no pair */
1674 } else {
1675 j = (unsigned int)pt[i];
1676 type = vrna_get_ptype_md(s[j], s[i], md);
1677
1678 /* prime the ld5 variable */
1679 if (sn[j - 1] == sn[j]) {
1680 ld5 = P->dangle5[type][s1[j - 1]];
1681 if ((p = (unsigned int)pt[j - 2]) && (sn[j - 2] == sn[j - 1]))
1682 if (P->dangle3[md->pair[s[p]][s[j - 2]]][s1[j - 1]] < ld5)
1683 ld5 = 0;
1684 }
1685 }
1686
1687 i1 = i;
1688 p = i + 1;
1689 u = 0;
1690 energy = 0;
1691 cx_energy = INF;
1692 do {
1693 /* walk around the multi-loop */
1694 new_cx = INF;
1695
1696 /* hop over unpaired positions */
1697 while (p <= (unsigned int)pt[0] && pt[p] == 0)
1698 p++;
1699
1700 /* memorize number of unpaired positions */
1701 u += p - i1 - 1;
1702
1703 if (sc)
1704 if (sc->energy_up)
1705 bonus += sc->energy_up[i1 + 1][p - i1 - 1];
1706
1707 /* get position of pairing partner */
1708 if (p == (unsigned int)pt[0] + 1) {
1709 q = 0;
1710 tt = 0; /* virtual root pair */
1711 } else {
1712 q = (unsigned int)pt[p];
1713 /* get type of base pair P->q */
1714 tt = vrna_get_ptype_md(s[p], s[q], md);
1715 }
1716
1717 energy += mlintern[tt];
1718 cx_energy += mlintern[tt];
1719
1720 dang5 = dang3 = 0;
1721 if ((sn[p - 1] == sn[p]) && (p > 1))
1722 dang5 = P->dangle5[tt][s1[p - 1]]; /* 5'dangle of pq pair */
1723
1724 if ((sn[i1] == sn[i1 + 1]) && (i1 < (unsigned int)s[0]))
1725 dang3 = P->dangle3[type][s1[i1 + 1]]; /* 3'dangle of previous pair */
1726
1727 switch (p - i1 - 1) {
1728 case 0: /* adjacent helices */
1729 if (i1 != 0) {
1730 if (sn[i1] == sn[p]) {
1731 new_cx = energy + P->stack[rtype[type]][rtype[tt]];
1732 /* subtract 5'dangle and TerminalAU penalty */
1733 new_cx += -ld5 - mlintern[tt] - mlintern[type] + 2 * mlintern[1];
1734 }
1735
1736 ld5 = 0;
1737 energy = MIN2(energy, cx_energy);
1738 }
1739
1740 break;
1741 case 1: /* 1 unpaired base between helices */
1742 dang = MIN2(dang3, dang5);
1743 energy = energy + dang;
1744 ld5 = dang - dang3;
1745 /* may be problem here: Suppose
1746 * cx_energy>energy, cx_energy+dang5<energy
1747 * and the following helices are also stacked (i.e.
1748 * we'll subtract the dang5 again */
1749 if (cx_energy + dang5 < energy) {
1750 energy = cx_energy + dang5;
1751 ld5 = dang5;
1752 }
1753
1754 new_cx = INF; /* no coax stacking with mismatch for now */
1755 break;
1756 default: /* many unpaired base between helices */
1757 energy += dang5 + dang3;
1758 energy = MIN2(energy, cx_energy + dang5);
1759 new_cx = INF; /* no coax stacking possible */
1760 ld5 = dang5;
1761 break;
1762 }
1763 type = tt;
1764 cx_energy = new_cx;
1765 i1 = q;
1766 p = q + 1;
1767 } while (q != i);
1768 best_energy = MIN2(energy, best_energy); /* don't use cx_energy here */
1769 /* skip a helix and start again */
1770 while (pt[p] == 0)
1771 p++;
1772 if (i == (unsigned int)pt[p])
1773 break;
1774
1775 i = (unsigned int)pt[p];
1776 } /* end doing it twice */
1777 energy = best_energy;
1778 break;
1779
1780 default:
1781 E_mm5_available = E2_mm5_available = INF;
1782 E_mm5_occupied = E2_mm5_occupied = 0;
1783 while (p < j) {
1784 /* p must have a pairing partner */
1785 q = (int)pt[p];
1786 /* get type of base pair (p,q) */
1787 tt = vrna_get_ptype_md(s[p], s[q], md);
1788
1789 if (q_prev + 2 < p) {
1790 E_mm5_available = MIN2(E_mm5_available, E_mm5_occupied);
1791 E_mm5_occupied = E_mm5_available;
1792 }
1793
1794 if (q_prev2 + 2 < p) {
1795 E2_mm5_available = MIN2(E2_mm5_available, E2_mm5_occupied);
1796 E2_mm5_occupied = E2_mm5_available;
1797 }
1798
1799 mm5 = ((sn[p - 1] == sn[p]) && !pt[p - 1]) ? s1[p - 1] : -1;
1800 mm3 = ((sn[q] == sn[q + 1]) && !pt[q + 1]) ? s1[q + 1] : -1;
1801 e_stem = E_MLstem(tt, -1, -1, P);
1802 e_stem5 = E_MLstem(tt, mm5, -1, P);
1803 e_stem3 = E_MLstem(tt, -1, mm3, P);
1804 e_stem53 = E_MLstem(tt, mm5, mm3, P);
1805
1806 tmp = E_mm5_occupied + e_stem3;
1807 tmp = MIN2(tmp, E_mm5_available + e_stem53);
1808 tmp = MIN2(tmp, E_mm5_available + e_stem3);
1809 tmp2 = E_mm5_occupied + e_stem;
1810 tmp2 = MIN2(tmp2, E_mm5_available + e_stem5);
1811 tmp2 = MIN2(tmp2, E_mm5_available + e_stem);
1812
1813 E_mm5_occupied = tmp;
1814 E_mm5_available = tmp2;
1815
1816 tmp = E2_mm5_occupied + e_stem3;
1817 tmp = MIN2(tmp, E2_mm5_available + e_stem53);
1818 tmp = MIN2(tmp, E2_mm5_available + e_stem3);
1819 tmp2 = E2_mm5_occupied + e_stem;
1820 tmp2 = MIN2(tmp2, E2_mm5_available + e_stem5);
1821 tmp2 = MIN2(tmp2, E2_mm5_available + e_stem);
1822
1823 E2_mm5_occupied = tmp;
1824 E2_mm5_available = tmp2;
1825
1826 /* seek to the next stem */
1827 p = q + 1;
1828 q_prev = q_prev2 = q;
1829 while (p < j && !pt[p])
1830 p++;
1831 u += p - q - 1; /* add unpaired nucleotides */
1832
1833 if (sc)
1834 if (sc->energy_up)
1835 bonus += sc->energy_up[q + 1][p - q - 1];
1836 }
1837 if (i > 0) {
1838 /* actual closing pair */
1839 type = vrna_get_ptype_md(s[j], s[i], md);
1840
1841 mm5 = ((sn[j - 1] == sn[j]) && !pt[j - 1]) ? s1[j - 1] : -1;
1842 mm3 = ((sn[i] == sn[i + 1]) && !pt[i + 1]) ? s1[i + 1] : -1;
1843 if (q_prev + 2 < p) {
1844 E_mm5_available = MIN2(E_mm5_available, E_mm5_occupied);
1845 E_mm5_occupied = E_mm5_available;
1846 }
1847
1848 if (q_prev2 + 2 < p) {
1849 E2_mm5_available = MIN2(E2_mm5_available, E2_mm5_occupied);
1850 E2_mm5_occupied = E2_mm5_available;
1851 }
1852
1853 e_stem = E_MLstem(type, -1, -1, P);
1854 e_stem5 = E_MLstem(type, mm5, -1, P);
1855 e_stem3 = E_MLstem(type, -1, mm3, P);
1856 e_stem53 = E_MLstem(type, mm5, mm3, P);
1857 } else {
1858 /* virtual closing pair */
1859 e_stem = e_stem5 = e_stem3 = e_stem53 = E_MLstem(0, -1, -1, P);
1860 }
1861
1862 /* now lets see how we get the minimum including the enclosing stem */
1863 energy = E_mm5_occupied + e_stem;
1864 energy = MIN2(energy, E_mm5_available + e_stem5);
1865 energy = MIN2(energy, E_mm5_available + e_stem);
1866 energy = MIN2(energy, E2_mm5_occupied + e_stem3);
1867 energy = MIN2(energy, E2_mm5_occupied + e_stem);
1868 energy = MIN2(energy, E2_mm5_available + e_stem53);
1869 energy = MIN2(energy, E2_mm5_available + e_stem3);
1870 energy = MIN2(energy, E2_mm5_available + e_stem5);
1871 energy = MIN2(energy, E2_mm5_available + e_stem);
1872 break;
1873 }/* end switch dangle_model */
1874
1875 switch (vc->type) {
1876 case VRNA_FC_TYPE_SINGLE:
1877 energy += P->MLclosing;
1878 break;
1879
1880 case VRNA_FC_TYPE_COMPARATIVE:
1881 energy += P->MLclosing * n_seq;
1882 break;
1883
1884 default:
1885 break;
1886 }
1887
1888 /*
1889 * logarithmic ML loop energy if logML
1890 * does this work for comparative predictions as well?
1891 */
1892 if (logML && (u > 6))
1893 energy += 6 * P->MLbase + (int)(P->lxc * log((double)u / 6.));
1894 else
1895 energy += (u * P->MLbase);
1896
1897 return energy + bonus;
1898 }
1899
1900
1901 PRIVATE int
cut_in_loop(int i,const short * pt,unsigned int * sn)1902 cut_in_loop(int i,
1903 const short *pt,
1904 unsigned int *sn)
1905 {
1906 /* walk around the loop; return 5' pos of first pair after cut if
1907 * cut_point in loop else 0 */
1908 int p, j;
1909
1910 p = j = pt[i];
1911 do {
1912 i = pt[p];
1913 p = i + 1;
1914 while (pt[p] == 0)
1915 p++;
1916 } while ((p != j) && (sn[i] == sn[p]));
1917 return sn[i] == sn[p] ? 0 : p;
1918 }
1919
1920
1921 /* below are the consensus structure evaluation functions */
1922
1923 PRIVATE int
covar_energy_of_struct_pt(vrna_fold_compound_t * vc,const short * pt)1924 covar_energy_of_struct_pt(vrna_fold_compound_t *vc,
1925 const short *pt)
1926 {
1927 int e = 0;
1928 int length = vc->length;
1929 int i;
1930
1931 for (i = 1; i <= length; i++) {
1932 if (pt[i] == 0)
1933 continue;
1934
1935 e += stack_energy_covar_pt(vc, i, pt);
1936 i = pt[i];
1937 }
1938
1939 return e;
1940 }
1941
1942
1943 PRIVATE int
en_corr_of_loop_gquad_ali(vrna_fold_compound_t * vc,int i,int j,const char * structure,const short * pt,const int * loop_idx,vrna_cstr_t output_stream,int verbosity_level)1944 en_corr_of_loop_gquad_ali(vrna_fold_compound_t *vc,
1945 int i,
1946 int j,
1947 const char *structure,
1948 const short *pt,
1949 const int *loop_idx,
1950 vrna_cstr_t output_stream,
1951 int verbosity_level)
1952 {
1953 int pos, cnt, tmp_e, energy, p, q, r, s, u, type, gq_en[2];
1954 int num_elem, num_g, elem_i, elem_j, up_mis;
1955 int L, l[3];
1956
1957 char *sequence = vc->cons_seq;
1958 short **S = vc->S;
1959 short **S5 = vc->S5;
1960 short **S3 = vc->S3;
1961 vrna_param_t *P = vc->params;
1962 vrna_md_t *md = &(P->model_details);
1963 int n_seq = vc->n_seq;
1964 int dangle_model = md->dangles;
1965
1966 energy = 0;
1967 q = i;
1968 while ((pos = parse_gquad(structure + q - 1, &L, l)) > 0) {
1969 q += pos - 1;
1970 p = q - 4 * L - l[0] - l[1] - l[2] + 1;
1971 if (q > j)
1972 break;
1973
1974 /* we've found the first g-quadruplex at position [p,q] */
1975 E_gquad_ali_en(p, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
1976 tmp_e = gq_en[0];
1977 energy += tmp_e;
1978
1979 if (verbosity_level > 0) {
1980 vrna_cstr_print_eval_gquad(output_stream,
1981 p,
1982 L,
1983 l,
1984 (int)tmp_e / (int)n_seq);
1985 }
1986
1987 /* check if it's enclosed in a base pair */
1988 if (loop_idx[p] == 0) {
1989 q++;
1990 continue; /* g-quad in exterior loop */
1991 } else {
1992 /* find its enclosing pair */
1993 num_elem = 0;
1994 num_g = 1;
1995 r = p - 1;
1996 up_mis = q - p + 1;
1997
1998 /* seek for first pairing base located 5' of the g-quad */
1999 for (r = p - 1; !pt[r] && (r >= i); r--);
2000
2001 if (r < pt[r]) {
2002 /* found the enclosing pair */
2003 s = pt[r];
2004 } else {
2005 num_elem++;
2006 elem_i = pt[r];
2007 elem_j = r;
2008 r = pt[r] - 1;
2009 /* seek for next pairing base 5' of r */
2010 for (; !pt[r] && (r >= i); r--);
2011
2012 if (r < pt[r]) {
2013 /* found the enclosing pair */
2014 s = pt[r];
2015 } else {
2016 /* hop over stems and unpaired nucleotides */
2017 while ((r > pt[r]) && (r >= i)) {
2018 if (pt[r]) {
2019 r = pt[r];
2020 num_elem++;
2021 }
2022
2023 r--;
2024 }
2025
2026 s = pt[r]; /* found the enclosing pair */
2027 }
2028 }
2029
2030 /* now we have the enclosing pair (r,s) */
2031
2032 u = q + 1;
2033 /* we know everything about the 5' part of this loop so check the 3' part */
2034 while (u < s) {
2035 if (structure[u - 1] == '.') {
2036 u++;
2037 } else if (structure[u - 1] == '+') {
2038 /* found another gquad */
2039 pos = parse_gquad(structure + u - 1, &L, l);
2040 if (pos > 0) {
2041 E_gquad_ali_en(u, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
2042
2043 if (verbosity_level > 0) {
2044 vrna_cstr_print_eval_gquad(output_stream,
2045 pos,
2046 L,
2047 l,
2048 (int)tmp_e / (int)n_seq);
2049 }
2050
2051 tmp_e = gq_en[0];
2052 energy += tmp_e;
2053 up_mis += pos;
2054 u += pos;
2055 num_g++;
2056 }
2057 } else {
2058 /* we must have found a stem */
2059 num_elem++;
2060 elem_i = u;
2061 elem_j = pt[u];
2062 energy += en_corr_of_loop_gquad_ali(vc,
2063 u,
2064 pt[u],
2065 structure,
2066 pt,
2067 loop_idx,
2068 output_stream,
2069 verbosity_level);
2070 u = pt[u] + 1;
2071 }
2072 }
2073
2074 /* here, u == s */
2075 int e_minus, e_plus, e_temp;
2076
2077 e_plus = e_minus = 0;
2078
2079 /* we are done since we've found no other 3' structure element */
2080 switch (num_elem) {
2081 /* g-quad was misinterpreted as hairpin closed by (r,s) */
2082 case 0:
2083 e_minus = vrna_eval_hp_loop(vc, r, s);
2084
2085 if (verbosity_level > 0) {
2086 vrna_cstr_print_eval_hp_loop_revert(output_stream,
2087 r,
2088 s,
2089 sequence[r - 1],
2090 sequence[s - 1],
2091 (int)e_minus / (int)n_seq);
2092 }
2093
2094 /* if we consider the G-Quadruplex, we have */
2095 if (num_g == 1) {
2096 /* a) an interior loop like structure */
2097 for (cnt = 0; cnt < n_seq; cnt++) {
2098 type = vrna_get_ptype_md(S[cnt][r], S[cnt][s], md);
2099
2100 if (dangle_model == 2)
2101 e_plus += P->mismatchI[type][S3[cnt][r]][S5[cnt][s]];
2102
2103 if (type > 2)
2104 e_plus += P->TerminalAU;
2105 }
2106
2107 e_plus += n_seq * P->internal_loop[s - r - 1 - up_mis];
2108
2109 if (verbosity_level > 0) {
2110 vrna_cstr_print_eval_int_loop(output_stream,
2111 r,
2112 s,
2113 sequence[r - 1],
2114 sequence[s - 1],
2115 p,
2116 q,
2117 sequence[p - 1],
2118 sequence[q - 1],
2119 (int)e_plus / (int)n_seq);
2120 }
2121 } else {
2122 /* or b) a multibranch loop like structure */
2123 for (cnt = 0; cnt < n_seq; cnt++) {
2124 type = vrna_get_ptype_md(S[cnt][s], S[cnt][r], md);
2125
2126 e_plus += E_MLstem(type, S5[cnt][s], S3[cnt][r], P);
2127 }
2128
2129 e_temp = num_g * E_MLstem(0, -1, -1, P) +
2130 P->MLclosing +
2131 (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase;
2132
2133 e_plus += n_seq * e_temp;
2134
2135 if (verbosity_level > 0) {
2136 vrna_cstr_print_eval_mb_loop(output_stream,
2137 r,
2138 s,
2139 sequence[r - 1],
2140 sequence[s - 1],
2141 (int)e_plus / (int)n_seq);
2142 }
2143 }
2144
2145 energy += e_plus - e_minus;
2146 break;
2147
2148 /* g-quad was misinterpreted as interior loop closed by (r,s) with enclosed pair (elem_i, elem_j) */
2149 case 1:
2150 e_minus = vrna_eval_int_loop(vc, r, s, elem_i, elem_j);
2151
2152 for (cnt = 0; cnt < n_seq; cnt++) {
2153 type = vrna_get_ptype_md(S[cnt][s], S[cnt][r], md);
2154
2155 e_plus += E_MLstem(type, S5[cnt][s], S3[cnt][r], P);
2156
2157 type = vrna_get_ptype_md(S[cnt][elem_i], S[cnt][elem_j], md);
2158
2159 e_plus += E_MLstem(type, S5[cnt][elem_i], S3[cnt][elem_j], P);
2160 }
2161
2162 e_temp = num_g * E_MLstem(0, -1, -1, P) +
2163 P->MLclosing +
2164 (elem_i - r - 1 + s - elem_j - 1 - up_mis) * P->MLbase;
2165
2166 e_plus += n_seq * e_temp;
2167
2168 energy += e_plus - e_minus;
2169
2170 if (verbosity_level > 0) {
2171 vrna_cstr_print_eval_int_loop_revert(output_stream,
2172 r,
2173 s,
2174 sequence[r - 1],
2175 sequence[j - 1],
2176 elem_i,
2177 elem_j,
2178 sequence[elem_i - 1],
2179 sequence[elem_j - 1],
2180 (int)e_minus / (int)n_seq);
2181
2182 vrna_cstr_print_eval_mb_loop(output_stream,
2183 r,
2184 s,
2185 sequence[r - 1],
2186 sequence[s - 1],
2187 (int)e_plus / (int)n_seq);
2188 }
2189
2190 break;
2191 /* gquad was misinterpreted as unpaired nucleotides in a multiloop */
2192 default:
2193 e_minus = (up_mis) * P->MLbase * n_seq;
2194 e_plus = num_g * E_MLstem(0, -1, -1, P) * n_seq;
2195 energy += e_plus - e_minus;
2196
2197 if (verbosity_level > 0) {
2198 vrna_cstr_print_eval_mb_loop_revert(output_stream,
2199 r,
2200 s,
2201 sequence[r - 1],
2202 sequence[s - 1],
2203 (int)e_minus / (int)n_seq);
2204
2205 vrna_cstr_print_eval_mb_loop(output_stream,
2206 r,
2207 s,
2208 sequence[r - 1],
2209 sequence[s - 1],
2210 (int)e_plus / (int)n_seq);
2211 }
2212
2213 break;
2214 }
2215
2216 q = s + 1;
2217 }
2218 }
2219
2220 return energy;
2221 }
2222
2223
2224 PRIVATE int
covar_en_corr_of_loop_gquad(vrna_fold_compound_t * vc,int i,int j,const char * structure,const short * pt,const int * loop_idx)2225 covar_en_corr_of_loop_gquad(vrna_fold_compound_t *vc,
2226 int i,
2227 int j,
2228 const char *structure,
2229 const short *pt,
2230 const int *loop_idx)
2231 {
2232 int pos, en_covar, p, q, r, s, u, gq_en[2];
2233 int num_elem, num_g, up_mis;
2234 int L, l[3];
2235
2236 short **S = vc->S;
2237 vrna_param_t *P = vc->params;
2238 int n_seq = vc->n_seq;
2239
2240 en_covar = 0;
2241 q = i;
2242 while ((pos = parse_gquad(structure + q - 1, &L, l)) > 0) {
2243 q += pos - 1;
2244 p = q - 4 * L - l[0] - l[1] - l[2] + 1;
2245 if (q > j)
2246 break;
2247
2248 /* we've found the first g-quadruplex at position [p,q] */
2249 E_gquad_ali_en(p, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
2250 en_covar += gq_en[1];
2251 /* check if it's enclosed in a base pair */
2252 if (loop_idx[p] == 0) {
2253 q++;
2254 continue; /* g-quad in exterior loop */
2255 } else {
2256 /* find its enclosing pair */
2257 num_elem = 0;
2258 num_g = 1;
2259 r = p - 1;
2260 up_mis = q - p + 1;
2261
2262 /* seek for first pairing base located 5' of the g-quad */
2263 for (r = p - 1; !pt[r] && (r >= i); r--);
2264
2265 if (r < pt[r]) {
2266 /* found the enclosing pair */
2267 s = pt[r];
2268 } else {
2269 num_elem++;
2270 r = pt[r] - 1;
2271 /* seek for next pairing base 5' of r */
2272 for (; !pt[r] && (r >= i); r--);
2273
2274 if (r < pt[r]) {
2275 /* found the enclosing pair */
2276 s = pt[r];
2277 } else {
2278 /* hop over stems and unpaired nucleotides */
2279 while ((r > pt[r]) && (r >= i)) {
2280 if (pt[r]) {
2281 r = pt[r];
2282 num_elem++;
2283 }
2284
2285 r--;
2286 }
2287
2288 s = pt[r]; /* found the enclosing pair */
2289 }
2290 }
2291
2292 /* now we have the enclosing pair (r,s) */
2293
2294 u = q + 1;
2295 /* we know everything about the 5' part of this loop so check the 3' part */
2296 while (u < s) {
2297 if (structure[u - 1] == '.') {
2298 u++;
2299 } else if (structure[u - 1] == '+') {
2300 /* found another gquad */
2301 pos = parse_gquad(structure + u - 1, &L, l);
2302 if (pos > 0) {
2303 E_gquad_ali_en(u, L, l, (const short **)S, vc->a2s, n_seq, P, gq_en);
2304 en_covar += gq_en[1];
2305 up_mis += pos;
2306 u += pos;
2307 num_g++;
2308 }
2309 } else {
2310 /* we must have found a stem */
2311 num_elem++;
2312 en_covar += covar_en_corr_of_loop_gquad(vc,
2313 u,
2314 pt[u],
2315 structure,
2316 pt,
2317 loop_idx);
2318 u = pt[u] + 1;
2319 }
2320 }
2321 /* we are done since we've found no other 3' structure element */
2322
2323 q = s + 1;
2324 }
2325 }
2326 return en_covar;
2327 }
2328
2329
2330 PRIVATE int
stack_energy_covar_pt(vrna_fold_compound_t * vc,int i,const short * pt)2331 stack_energy_covar_pt(vrna_fold_compound_t *vc,
2332 int i,
2333 const short *pt)
2334 {
2335 /* calculate energy of substructure enclosed by (i,j) */
2336 int *indx = vc->jindx; /* index for moving in the triangle matrices c[] and fMl[]*/
2337 int *pscore = vc->pscore; /* precomputed array of pair types */
2338
2339 int energy = 0;
2340 int j, p, q;
2341
2342 j = pt[i];
2343 p = i;
2344 q = j;
2345 while (p < q) {
2346 /* process all stacks and interior loops */
2347 while (pt[++p] == 0);
2348 while (pt[--q] == 0);
2349 if ((pt[q] != (short)p) || (p > q))
2350 break;
2351
2352 energy += pscore[indx[j] + i];
2353 i = p;
2354 j = q;
2355 } /* end while */
2356
2357 /* p,q don't pair must have found hairpin or multiloop */
2358
2359 if (p > q) {
2360 /* hairpin case */
2361 energy += pscore[indx[j] + i];
2362 return energy;
2363 }
2364
2365 /* (i,j) is exterior pair of multiloop */
2366 energy += pscore[indx[j] + i];
2367 while (p < j) {
2368 /* add up the contributions of the substructures of the ML */
2369 energy += stack_energy_covar_pt(vc, p, pt);
2370 p = pt[p];
2371 /* search for next base pair in multiloop */
2372 while (pt[++p] == 0);
2373 }
2374
2375 return energy;
2376 }
2377