1 /** \file unstructured_domains.c **/
2
3 /*
4 * Unstructured domains
5 *
6 * This file contains everything necessary to
7 * deal with the default implementation for unstructured
8 * domains in secondary structures. This feature enables,
9 * for instance, ligand binding to unpaired stretches of
10 * an RNA secondary structure.
11 *
12 * c 2016 Ronny Lorenz
13 *
14 * ViennaRNA package
15 */
16
17 #include <config.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include <float.h>
22 #include <math.h>
23
24 #include "ViennaRNA/utils/basic.h"
25 #include "ViennaRNA/alphabet.h"
26 #include "ViennaRNA/eval.h"
27 #include "ViennaRNA/unstructured_domains.h"
28
29 /*
30 #################################
31 # PRIVATE MACROS #
32 #################################
33 */
34
35 /*
36 #################################
37 # GLOBAL VARIABLES #
38 #################################
39 */
40
41 /*
42 #################################
43 # PRIVATE VARIABLES/STRUCTS #
44 #################################
45 */
46
47 struct binding_segment {
48 unsigned int start;
49 unsigned int end;
50 unsigned int type;
51 };
52
53
54 struct ud_bt_stack {
55 unsigned int from;
56 vrna_ud_motif_t *motifs;
57 unsigned int motif_cnt;
58 unsigned int motif_size;
59 };
60
61
62 struct default_outside {
63 int motif_num;
64 FLT_OR_DBL exp_energy;
65 };
66
67 /*
68 * Default data structure for ligand binding to unpaired stretches
69 */
70 struct ligands_up_data_default {
71 /*
72 **********************************
73 * pre-computed position-wise
74 * motif list
75 **********************************
76 */
77 int n;
78 int **motif_list_ext;
79 int **motif_list_hp;
80 int **motif_list_int;
81 int **motif_list_mb;
82
83 int *dG;
84 FLT_OR_DBL *exp_dG;
85 int *len;
86
87 /*
88 **********************************
89 * below are DP matrices to store
90 * the production rule results
91 **********************************
92 */
93 int *energies_ext;
94 int *energies_hp;
95 int *energies_int;
96 int *energies_mb;
97 FLT_OR_DBL *exp_energies_ext;
98 FLT_OR_DBL *exp_energies_hp;
99 FLT_OR_DBL *exp_energies_int;
100 FLT_OR_DBL *exp_energies_mb;
101
102 /*
103 **********************************
104 * below are lists to store the
105 * outside partition function for
106 * each motif starting at each
107 * position
108 **********************************
109 */
110 unsigned int *outside_ext_count;
111 struct default_outside **outside_ext;
112 unsigned int *outside_hp_count;
113 struct default_outside **outside_hp;
114 unsigned int *outside_int_count;
115 struct default_outside **outside_int;
116 unsigned int *outside_mb_count;
117 struct default_outside **outside_mb;
118
119 FLT_OR_DBL(*default_cb[32])(int, int, struct ligands_up_data_default *);
120 FLT_OR_DBL *exp_e_mx[32];
121 };
122
123 /*
124 #################################
125 # PRIVATE FUNCTION DECLARATIONS #
126 #################################
127 */
128
129 PRIVATE struct binding_segment *
130 extract_binding_segments(const char *structure,
131 unsigned int *segments_num);
132
133
134 PRIVATE void
135 fill_MFE_matrix(vrna_fold_compound_t *fc,
136 int *mx,
137 unsigned int from,
138 unsigned int to,
139 unsigned int type);
140
141
142 PRIVATE vrna_ud_motif_t *
143 backtrack_MFE_matrix(vrna_fold_compound_t *fc,
144 int *mx,
145 unsigned int from,
146 unsigned int to,
147 unsigned int type);
148
149
150 PRIVATE vrna_ud_motif_t **
151 backtrack_MFE_matrix_exhaustive(vrna_fold_compound_t *fc,
152 int *mx,
153 unsigned int from,
154 unsigned int to,
155 unsigned int type);
156
157
158 PRIVATE void
159 fill_MEA_matrix(vrna_fold_compound_t *fc,
160 float *mx,
161 unsigned int from,
162 unsigned int to,
163 float *pu,
164 unsigned int t);
165
166
167 PRIVATE vrna_ud_motif_t *
168 backtrack_MEA_matrix(vrna_fold_compound_t *fc,
169 float *mx,
170 unsigned int from,
171 unsigned int to,
172 float *pu,
173 unsigned int t);
174
175
176 PRIVATE void
177 remove_ligands_up(vrna_fold_compound_t *vc);
178
179
180 PRIVATE void
181 init_ligands_up(vrna_fold_compound_t *vc);
182
183
184 PRIVATE void
185 add_ligand_motif(vrna_fold_compound_t *vc,
186 const char *motif,
187 double motif_en,
188 const char *motif_name,
189 unsigned int loop_type);
190
191
192 PRIVATE void
193 remove_default_data(void *d);
194
195
196 /* default implementations for unstructured domains feature */
197 PRIVATE void
198 default_prod_rule(vrna_fold_compound_t *vc,
199 void *d);
200
201
202 PRIVATE void
203 default_exp_prod_rule(vrna_fold_compound_t *vc,
204 void *d);
205
206
207 PRIVATE int
208 default_energy(vrna_fold_compound_t *vc,
209 int i,
210 int j,
211 unsigned int loop_type,
212 void *d);
213
214
215 PRIVATE FLT_OR_DBL
216 default_exp_energy(vrna_fold_compound_t *vc,
217 int i,
218 int j,
219 unsigned int loop_type,
220 void *d);
221
222
223 PRIVATE void
224 default_probs_add(vrna_fold_compound_t *vc,
225 int i,
226 int j,
227 unsigned int loop_type,
228 FLT_OR_DBL exp_energy,
229 void *data);
230
231
232 PRIVATE FLT_OR_DBL
233 default_probs_get(vrna_fold_compound_t *vc,
234 int i,
235 int j,
236 unsigned int loop_type,
237 int motif,
238 void *data);
239
240
241 /* helper functions for default implementatations of unstructured domains feature */
242 PRIVATE int
243 default_energy_ext_motif(int i,
244 int j,
245 struct ligands_up_data_default *data);
246
247
248 PRIVATE int
249 default_energy_hp_motif(int i,
250 int j,
251 struct ligands_up_data_default *data);
252
253
254 PRIVATE int
255 default_energy_int_motif(int i,
256 int j,
257 struct ligands_up_data_default *data);
258
259
260 PRIVATE int
261 default_energy_mb_motif(int i,
262 int j,
263 struct ligands_up_data_default *data);
264
265
266 PRIVATE FLT_OR_DBL
267 default_exp_energy_ext_motif(int i,
268 int j,
269 struct ligands_up_data_default *data);
270
271
272 PRIVATE FLT_OR_DBL
273 default_exp_energy_hp_motif(int i,
274 int j,
275 struct ligands_up_data_default *data);
276
277
278 PRIVATE FLT_OR_DBL
279 default_exp_energy_int_motif(int i,
280 int j,
281 struct ligands_up_data_default *data);
282
283
284 PRIVATE FLT_OR_DBL
285 default_exp_energy_mb_motif(int i,
286 int j,
287 struct ligands_up_data_default *data);
288
289
290 PRIVATE void
291 free_default_data_matrices(struct ligands_up_data_default *data);
292
293
294 PRIVATE void
295 free_default_data_exp_matrices(struct ligands_up_data_default *data);
296
297
298 PRIVATE void
299 prepare_matrices(vrna_fold_compound_t *vc,
300 struct ligands_up_data_default *data);
301
302
303 PRIVATE void
304 prepare_exp_matrices(vrna_fold_compound_t *vc,
305 struct ligands_up_data_default *data);
306
307
308 PRIVATE struct ligands_up_data_default *
309 get_default_data(void);
310
311
312 PRIVATE void
313 prepare_default_data(vrna_fold_compound_t *vc,
314 struct ligands_up_data_default *data);
315
316
317 PRIVATE void
318 free_default_data(struct ligands_up_data_default *data);
319
320
321 PRIVATE int *
322 get_motifs(vrna_fold_compound_t *vc,
323 int i,
324 unsigned int loop_type);
325
326
327 PRIVATE void
328 annotate_ud(vrna_fold_compound_t *vc,
329 int start,
330 int end,
331 char l,
332 vrna_ud_motif_t **list,
333 int *list_size,
334 int *list_pos);
335
336
337 /*
338 #################################
339 # BEGIN OF FUNCTION DEFINITIONS #
340 #################################
341 */
342 PUBLIC vrna_ud_motif_t *
vrna_ud_motifs_centroid(vrna_fold_compound_t * fc,const char * structure)343 vrna_ud_motifs_centroid(vrna_fold_compound_t *fc,
344 const char *structure)
345 {
346 unsigned int i, j, m, s, motifs_size, motifs_count, t, num_segments;
347 vrna_ud_motif_t *motifs;
348 vrna_ud_t *domains_up;
349
350 motifs = NULL;
351
352 if ((fc) && (fc->domains_up) && (fc->domains_up->probs_get) && (structure)) {
353 domains_up = fc->domains_up;
354 struct binding_segment *segments = extract_binding_segments(structure, &num_segments);
355
356 motifs_size = 10;
357 motifs_count = 0;
358 motifs = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * (motifs_size + 1));
359
360 for (s = 0; s < num_segments; s++) {
361 t = segments[s].type;
362
363 for (i = segments[s].start; i <= segments[s].end; i++) {
364 for (m = 0; m < domains_up->motif_count; m++) {
365 j = i + domains_up->motif_size[m] - 1;
366 if (j <= segments[s].end) {
367 /* actually, the condition below should check whether the motif probability makes up more than 50% of the unpaired probability */
368 if (domains_up->probs_get(fc, i, j, t, m, domains_up->data) > 0.5) {
369 motifs[motifs_count].start = i;
370 motifs[motifs_count].number = m;
371 motifs_count++;
372
373 if (motifs_count == motifs_size) {
374 motifs_size *= 1.4;
375 motifs = (vrna_ud_motif_t *)vrna_realloc(motifs,
376 sizeof(vrna_ud_motif_t) *
377 (motifs_size + 1));
378 }
379 }
380 }
381 }
382 }
383 }
384
385 free(segments);
386
387 if (motifs_count > 0) {
388 /* add end of list marker */
389 motifs[motifs_count].start = 0;
390 motifs[motifs_count].number = -1;
391 motifs = (vrna_ud_motif_t *)vrna_realloc(motifs,
392 sizeof(vrna_ud_motif_t) *
393 (motifs_count + 1));
394 } else {
395 free(motifs);
396 motifs = NULL;
397 }
398 }
399
400 return motifs;
401 }
402
403
404 PUBLIC vrna_ud_motif_t *
vrna_ud_motifs_MEA(vrna_fold_compound_t * fc,const char * structure,vrna_ep_t * probability_list)405 vrna_ud_motifs_MEA(vrna_fold_compound_t *fc,
406 const char *structure,
407 vrna_ep_t *probability_list)
408 {
409 unsigned int from, to, i, n, s, motifs_size, motifs_count, t, num_segments;
410 float *pu, *mx;
411 vrna_ep_t *ptr;
412 vrna_ud_motif_t *motifs;
413 struct binding_segment *segments;
414
415 motifs = NULL;
416
417 if ((fc) && (fc->domains_up) && (fc->domains_up->probs_get) && (structure) &&
418 (probability_list)) {
419 n = fc->length;
420 segments = extract_binding_segments(structure, &num_segments);
421 pu = (float *)vrna_alloc(sizeof(float) * (n + 1));
422 mx = (float *)vrna_alloc(sizeof(float) * (n + 1));
423
424 /* determine probabilities to be unpaired */
425 for (i = 1; i <= n; i++)
426 pu[i] = 1.;
427
428 for (ptr = probability_list; ptr->i > 0; ptr++) {
429 if (ptr->type == VRNA_PLIST_TYPE_BASEPAIR) {
430 pu[ptr->i] -= ptr->p;
431 pu[ptr->j] -= ptr->p;
432 } else if (ptr->type == VRNA_PLIST_TYPE_UD_MOTIF) {
433 for (i = ptr->i; i <= ptr->j; i++)
434 pu[i] -= ptr->p;
435 }
436 }
437
438 motifs_count = 0;
439 motifs_size = 10;
440 motifs = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * (motifs_size + 1));
441
442 for (s = 0; s < num_segments; s++) {
443 vrna_ud_motif_t *m;
444
445 from = segments[s].start;
446 to = segments[s].end;
447 t = segments[s].type;
448
449 fill_MEA_matrix(fc, mx, from, to, pu, t);
450 m = backtrack_MEA_matrix(fc, mx, from, to, pu, t);
451
452 if (m) {
453 /* determine number of new motifs */
454 for (i = 0; m[i].start != 0; i++);
455
456 /* resize target memory if necessary */
457 if (motifs_count + i >= motifs_size) {
458 motifs_size += motifs_size / 2 + 1 + i;
459 motifs = (vrna_ud_motif_t *)vrna_realloc(motifs,
460 sizeof(vrna_ud_motif_t) *
461 (motifs_size + 1));
462 }
463
464 /* append data */
465 memcpy(motifs + motifs_count, m, sizeof(vrna_ud_motif_t) * i);
466
467 /* increase motif counter */
468 motifs_count += i;
469
470 /* release backtracked motif list */
471 free(m);
472 }
473 }
474 free(mx);
475 free(pu);
476 free(segments);
477
478 if (motifs_count > 0) {
479 /* add end of list marker */
480 motifs[motifs_count].start = 0;
481 motifs[motifs_count].number = -1;
482 motifs = (vrna_ud_motif_t *)vrna_realloc(motifs,
483 sizeof(vrna_ud_motif_t) *
484 (motifs_count + 1));
485 } else {
486 free(motifs);
487 motifs = NULL;
488 }
489 }
490
491 return motifs;
492 }
493
494
495 PUBLIC vrna_ud_motif_t *
vrna_ud_motifs_MFE(vrna_fold_compound_t * fc,const char * structure)496 vrna_ud_motifs_MFE(vrna_fold_compound_t *fc,
497 const char *structure)
498 {
499 unsigned int from, to, i, n, s, motifs_size, motifs_count, t, num_segments;
500 int *mx;
501 vrna_ud_motif_t *motifs;
502 struct binding_segment *segments;
503
504 motifs = NULL;
505
506 if ((fc) && (fc->domains_up) && (fc->domains_up->probs_get) && (structure)) {
507 n = fc->length;
508 segments = extract_binding_segments(structure, &num_segments);
509 mx = (int *)vrna_alloc(sizeof(int) * (n + 1));
510
511 motifs_count = 0;
512 motifs_size = 10;
513 motifs = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * (motifs_size + 1));
514
515 for (s = 0; s < num_segments; s++) {
516 vrna_ud_motif_t *m;
517
518 from = segments[s].start;
519 to = segments[s].end;
520 t = segments[s].type;
521
522 fill_MFE_matrix(fc, mx, from, to, t);
523 m = backtrack_MFE_matrix(fc, mx, from, to, t);
524
525 if (m) {
526 /* determine number of new motifs */
527 for (i = 0; m[i].start != 0; i++);
528
529 /* resize target memory if necessary */
530 if (motifs_count + i >= motifs_size) {
531 motifs_size += motifs_size / 2 + 1 + i;
532 motifs = (vrna_ud_motif_t *)vrna_realloc(motifs,
533 sizeof(vrna_ud_motif_t) *
534 (motifs_size + 1));
535 }
536
537 /* append data */
538 memcpy(motifs + motifs_count, m, sizeof(vrna_ud_motif_t) * i);
539
540 /* increase motif counter */
541 motifs_count += i;
542
543 /* release backtracked motif list */
544 free(m);
545 }
546 }
547 free(mx);
548 free(segments);
549
550 if (motifs_count > 0) {
551 /* add end of list marker */
552 motifs[motifs_count].start = 0;
553 motifs[motifs_count].number = -1;
554 motifs = (vrna_ud_motif_t *)vrna_realloc(motifs,
555 sizeof(vrna_ud_motif_t) *
556 (motifs_count + 1));
557 } else {
558 free(motifs);
559 motifs = NULL;
560 }
561 }
562
563 return motifs;
564 }
565
566
567 PUBLIC void
vrna_ud_remove(vrna_fold_compound_t * vc)568 vrna_ud_remove(vrna_fold_compound_t *vc)
569 {
570 if (vc && vc->domains_up)
571 remove_ligands_up(vc);
572 }
573
574
575 PUBLIC void
vrna_ud_set_data(vrna_fold_compound_t * vc,void * data,vrna_callback_free_auxdata * free_cb)576 vrna_ud_set_data(vrna_fold_compound_t *vc,
577 void *data,
578 vrna_callback_free_auxdata *free_cb)
579 {
580 if (vc) {
581 /* init if not already present */
582 if (!vc->domains_up)
583 init_ligands_up(vc);
584
585 /* free previous data if 'free_data' function present */
586 if (vc->domains_up->free_data)
587 vc->domains_up->free_data(vc->domains_up->data);
588
589 /* set new data and free callback */
590 vc->domains_up->free_data = free_cb;
591 vc->domains_up->data = data;
592 }
593 }
594
595
596 PUBLIC void
vrna_ud_set_prod_rule_cb(vrna_fold_compound_t * vc,vrna_callback_ud_production * pre_cb,vrna_callback_ud_energy * e_cb)597 vrna_ud_set_prod_rule_cb(vrna_fold_compound_t *vc,
598 vrna_callback_ud_production *pre_cb,
599 vrna_callback_ud_energy *e_cb)
600 {
601 if (vc) {
602 /* init if not already present */
603 if (!vc->domains_up)
604 init_ligands_up(vc);
605
606 /* set new callback */
607 vc->domains_up->prod_cb = pre_cb;
608 vc->domains_up->energy_cb = e_cb;
609 }
610 }
611
612
613 PUBLIC void
vrna_ud_set_exp_prod_rule_cb(vrna_fold_compound_t * vc,vrna_callback_ud_exp_production * pre_cb,vrna_callback_ud_exp_energy * exp_e_cb)614 vrna_ud_set_exp_prod_rule_cb(vrna_fold_compound_t *vc,
615 vrna_callback_ud_exp_production *pre_cb,
616 vrna_callback_ud_exp_energy *exp_e_cb)
617 {
618 if (vc) {
619 /* init if not already present */
620 if (!vc->domains_up)
621 init_ligands_up(vc);
622
623 /* set new callback */
624 vc->domains_up->exp_prod_cb = pre_cb;
625 vc->domains_up->exp_energy_cb = exp_e_cb;
626 }
627 }
628
629
630 PUBLIC void
vrna_ud_set_prob_cb(vrna_fold_compound_t * vc,vrna_callback_ud_probs_add * setter,vrna_callback_ud_probs_get * getter)631 vrna_ud_set_prob_cb(vrna_fold_compound_t *vc,
632 vrna_callback_ud_probs_add *setter,
633 vrna_callback_ud_probs_get *getter)
634 {
635 if (vc) {
636 /* init if not already present */
637 if (!vc->domains_up)
638 init_ligands_up(vc);
639
640 /* set new callback */
641 vc->domains_up->probs_add = setter;
642 vc->domains_up->probs_get = getter;
643 }
644 }
645
646
647 PUBLIC void
vrna_ud_add_motif(vrna_fold_compound_t * vc,const char * motif,double motif_en,const char * motif_name,unsigned int loop_type)648 vrna_ud_add_motif(vrna_fold_compound_t *vc,
649 const char *motif,
650 double motif_en,
651 const char *motif_name,
652 unsigned int loop_type)
653 {
654 if (vc) {
655 if (!vc->domains_up) {
656 /* set all default callbacks */
657 vrna_ud_set_prod_rule_cb(vc, &default_prod_rule, &default_energy);
658 vrna_ud_set_exp_prod_rule_cb(vc, &default_exp_prod_rule, &default_exp_energy);
659 vrna_ud_set_data(vc, get_default_data(), &remove_default_data);
660 vrna_ud_set_prob_cb(vc, &default_probs_add, &default_probs_get);
661 }
662
663 add_ligand_motif(vc, motif, motif_en, motif_name, loop_type);
664 }
665 }
666
667
668 PUBLIC int *
vrna_ud_get_motif_size_at(vrna_fold_compound_t * vc,int i,unsigned int loop_type)669 vrna_ud_get_motif_size_at(vrna_fold_compound_t *vc,
670 int i,
671 unsigned int loop_type)
672 {
673 if (vc && vc->domains_up) {
674 int k, l, cnt, *ret, *ptr;
675
676 ret = NULL;
677 if ((i > 0) && (i <= vc->length)) {
678 ptr = get_motifs(vc, i, loop_type);
679 if (ptr) {
680 for (k = 0; ptr[k] != -1; k++) /* replace motif number with its size */
681 ptr[k] = vc->domains_up->motif_size[ptr[k]];
682 /* make the list unique */
683 ret = (int *)vrna_alloc(sizeof(int) * (k + 1));
684 ret[0] = -1;
685 cnt = 0;
686 for (k = 0; ptr[k] != -1; k++) {
687 for (l = 0; l < cnt; l++)
688 if (ptr[k] == ret[l])
689 break;
690
691 /* we've already seen this size */
692
693 if (l == cnt) {
694 /* we've not seen this size before */
695 ret[cnt] = ptr[k];
696 ret[cnt + 1] = -1;
697 cnt++;
698 }
699 }
700 /* resize ret array */
701 ret = (int *)vrna_realloc(ret, sizeof(int) * (cnt + 1));
702 }
703
704 free(ptr);
705 }
706
707 return ret;
708 }
709
710 return NULL;
711 }
712
713
714 PUBLIC int *
vrna_ud_get_motifs_at(vrna_fold_compound_t * vc,int i,unsigned int loop_type)715 vrna_ud_get_motifs_at(vrna_fold_compound_t *vc,
716 int i,
717 unsigned int loop_type)
718 {
719 if (vc && vc->domains_up)
720 if ((i > 0) && (i <= vc->length))
721 return get_motifs(vc, i, loop_type);
722
723 return NULL;
724 }
725
726
727 vrna_ud_motif_t *
vrna_ud_detect_motifs(vrna_fold_compound_t * vc,const char * structure)728 vrna_ud_detect_motifs(vrna_fold_compound_t *vc,
729 const char *structure)
730 {
731 int list_size, list_pos;
732 vrna_ud_motif_t *motif_list;
733
734 motif_list = NULL;
735
736 if (structure && vc->domains_up) {
737 int l, start, end;
738 char last, *loops;
739
740 l = 0;
741 list_pos = 0;
742 list_size = 15;
743 motif_list = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * list_size);
744 loops = vrna_db_to_element_string(structure);
745
746 while (l < vc->length) {
747 /* skip uppercase encodings */
748 while (l < vc->length) {
749 if (islower(loops[l]))
750 break;
751
752 l++;
753 }
754
755 if (l < vc->length) {
756 start = 1 + l;
757 last = loops[l];
758 while (loops[l++] == last)
759 if (l == vc->length)
760 break;
761
762 end = l - 1;
763 annotate_ud(vc, start, end, last, &motif_list, &list_size, &list_pos);
764 }
765 }
766
767 motif_list = (vrna_ud_motif_t *)vrna_realloc(motif_list,
768 sizeof(vrna_ud_motif_t) *
769 (list_pos + 1));
770 motif_list[list_pos].start = 0;
771 motif_list[list_pos].number = -1;
772 free(loops);
773 }
774
775 return motif_list;
776 }
777
778
779 PRIVATE vrna_ud_motif_t **
ud_get_motifs_MFE(vrna_fold_compound_t * fc,struct binding_segment * segments,unsigned int segments_num)780 ud_get_motifs_MFE(vrna_fold_compound_t *fc,
781 struct binding_segment *segments,
782 unsigned int segments_num)
783 {
784 vrna_ud_motif_t **motif_lists;
785
786 motif_lists = NULL;
787
788 if (segments) {
789 unsigned int n, alt_cnt, shift, l, d, m, *merger_cnt, num_combinations, start, end, t;
790 int *mx;
791 vrna_ud_motif_t **ptr, *ptr2, ***alternatives;
792
793 alternatives = (vrna_ud_motif_t ***)vrna_alloc(sizeof(vrna_ud_motif_t **) * segments_num);
794 alt_cnt = 0;
795
796 /* collect optimal configurations for each segment */
797 for (n = 0; n < segments_num; n++) {
798 start = segments[n].start;
799 end = segments[n].end;
800 t = segments[n].type;
801
802 mx = (int *)vrna_alloc(sizeof(int) * (end - start + 2));
803 mx -= start;
804
805 fill_MFE_matrix(fc, mx, start, end, t);
806 if ((ptr = backtrack_MFE_matrix_exhaustive(fc, mx, start, end, t)))
807 alternatives[alt_cnt++] = ptr;
808
809 mx += start;
810 free(mx);
811 }
812
813 /* prepare for merging process */
814
815 num_combinations = 1;
816 merger_cnt = (unsigned int *)vrna_alloc(sizeof(unsigned int) * alt_cnt);
817 vrna_ud_motif_t **merger = (vrna_ud_motif_t **)vrna_alloc(sizeof(vrna_ud_motif_t *) * alt_cnt);
818
819 for (n = 0; n < alt_cnt; n++) {
820 /* count number of alternatives for current segment */
821 for (d = 0; alternatives[n][d]; d++);
822
823 if (d)
824 num_combinations *= d;
825
826 /* set merger pointers */
827 merger[n] = alternatives[n][0];
828
829 /* count number of elements at top of list */
830 for (ptr2 = merger[n]; ptr2->start != 0; ++ptr2);
831
832 /* store motif counter for current configuration */
833 merger_cnt[n] = ptr2 - merger[n];
834 }
835
836 /* finally merge the segments */
837 motif_lists = (vrna_ud_motif_t **)vrna_alloc(sizeof(vrna_ud_motif_t *) *
838 (num_combinations + 1));
839
840 for (n = 0; n < num_combinations; n++) {
841 /* determine length of list */
842 for (l = m = 0; m < alt_cnt; m++)
843 l += merger_cnt[m];
844
845 motif_lists[n] = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * (l + 1));
846 for (shift = m = 0; m < alt_cnt; m++) {
847 memcpy(motif_lists[n] + shift, merger[m], sizeof(vrna_ud_motif_t) * merger_cnt[m]);
848 shift += merger_cnt[m];
849 }
850
851 motif_lists[n][l].start = 0;
852 motif_lists[n][l].number = -1;
853
854 /* update (increase) merger pointers */
855 for (m = alt_cnt; m > 0; m--) {
856 ++(merger[m - 1]);
857 if (merger[m - 1]) {
858 for (ptr2 = merger[m - 1]; ptr2->start != 0; ++ptr2);
859
860 merger_cnt[m - 1] = ptr2 - merger[m - 1];
861 break;
862 } else if (m == 1) {
863 break;
864 } else {
865 merger[m - 1] = alternatives[m - 1][0];
866 for (ptr2 = merger[m - 1]; ptr2->start != 0; ++ptr2);
867
868 merger_cnt[m - 1] = ptr2 - merger[m - 1];
869 }
870 }
871 }
872
873 free(merger);
874 free(merger_cnt);
875
876 for (n = 0; n < alt_cnt; n++) {
877 for (m = 0; alternatives[n][m]; m++)
878 free(alternatives[n][m]);
879 free(alternatives[n]);
880 }
881 free(alternatives);
882
883 motif_lists[num_combinations] = NULL;
884 }
885
886 return motif_lists;
887 }
888
889
890 PRIVATE vrna_ud_motif_t **
ud_get_motifs_energy(vrna_fold_compound_t * fc,struct binding_segment * segments,unsigned int segments_num,int e)891 ud_get_motifs_energy(vrna_fold_compound_t *fc,
892 struct binding_segment *segments,
893 unsigned int segments_num,
894 int e)
895 {
896 vrna_ud_motif_t **motif_lists;
897
898 motif_lists = NULL;
899
900 if (segments) {
901 }
902
903 return motif_lists;
904 }
905
906
907 PUBLIC vrna_ud_motif_t **
vrna_ud_extract_motifs(vrna_fold_compound_t * fc,const char * structure,float * energy)908 vrna_ud_extract_motifs(vrna_fold_compound_t *fc,
909 const char *structure,
910 float *energy)
911 {
912 vrna_ud_motif_t **motif_lists;
913
914 motif_lists = NULL;
915
916 if ((fc) && (fc->domains_up) && (structure)) {
917 unsigned int segments_num;
918 struct binding_segment *segments;
919
920 segments = extract_binding_segments(structure, &segments_num);
921
922 if (!energy) {
923 /* get MFE arrangement(s) */
924 motif_lists = ud_get_motifs_MFE(fc, segments, segments_num);
925 } else {
926 /* get arrangement(s) that result in provided free energy */
927 float e = vrna_eval_structure(fc, structure);
928 int de = (int)roundf(*energy - e) * 100; /* binding free energy in deka cal/mol */
929 motif_lists = ud_get_motifs_energy(fc, segments, segments_num, de);
930 }
931
932 free(segments);
933 }
934
935 return motif_lists;
936 }
937
938
939 /*
940 #####################################
941 # BEGIN OF STATIC HELPER FUNCTIONS #
942 #####################################
943 */
944 PRIVATE struct ligands_up_data_default *
get_default_data(void)945 get_default_data(void)
946 {
947 struct ligands_up_data_default *data = vrna_alloc(sizeof(struct ligands_up_data_default));
948
949 data->n = 0;
950 data->motif_list_ext = NULL;
951 data->motif_list_hp = NULL;
952 data->motif_list_int = NULL;
953 data->motif_list_mb = NULL;
954 data->dG = NULL;
955 data->exp_dG = NULL;
956 data->energies_ext = NULL;
957 data->energies_hp = NULL;
958 data->energies_int = NULL;
959 data->energies_mb = NULL;
960 data->exp_energies_ext = NULL;
961 data->exp_energies_hp = NULL;
962 data->exp_energies_int = NULL;
963 data->exp_energies_mb = NULL;
964 data->outside_ext = NULL;
965 data->outside_hp = NULL;
966 data->outside_int = NULL;
967 data->outside_mb = NULL;
968 data->outside_ext_count = NULL;
969 data->outside_hp_count = NULL;
970 data->outside_int_count = NULL;
971 data->outside_mb_count = NULL;
972 return data;
973 }
974
975
976 PRIVATE void
remove_ligands_up(vrna_fold_compound_t * vc)977 remove_ligands_up(vrna_fold_compound_t *vc)
978 {
979 int i;
980
981 /* free auxiliary data */
982 if (vc->domains_up->free_data)
983 vc->domains_up->free_data(vc->domains_up->data);
984
985 /* free motif sequences */
986 for (i = 0; i < vc->domains_up->motif_count; i++)
987 free(vc->domains_up->motif[i]);
988
989 /* free motif names */
990 for (i = 0; i < vc->domains_up->motif_count; i++)
991 free(vc->domains_up->motif_name[i]);
992
993 free(vc->domains_up->motif);
994 free(vc->domains_up->motif_name);
995 free(vc->domains_up->motif_size);
996 free(vc->domains_up->motif_en);
997 free(vc->domains_up->motif_type);
998
999 free(vc->domains_up->uniq_motif_size);
1000
1001 free(vc->domains_up);
1002
1003 vc->domains_up = NULL;
1004 }
1005
1006
1007 PRIVATE void
init_ligands_up(vrna_fold_compound_t * vc)1008 init_ligands_up(vrna_fold_compound_t *vc)
1009 {
1010 vc->domains_up = (vrna_ud_t *)vrna_alloc(sizeof(vrna_ud_t));
1011
1012 vc->domains_up->uniq_motif_count = 0;
1013 vc->domains_up->uniq_motif_size = NULL;
1014 vc->domains_up->motif_count = 0;
1015 vc->domains_up->motif = NULL;
1016 vc->domains_up->motif_name = NULL;
1017 vc->domains_up->motif_size = NULL;
1018 vc->domains_up->motif_en = NULL;
1019 vc->domains_up->motif_type = NULL;
1020 vc->domains_up->prod_cb = NULL;
1021 vc->domains_up->exp_prod_cb = NULL;
1022 vc->domains_up->energy_cb = NULL;
1023 vc->domains_up->exp_energy_cb = NULL;
1024 vc->domains_up->data = NULL;
1025 vc->domains_up->free_data = NULL;
1026 vc->domains_up->probs_add = NULL;
1027 vc->domains_up->probs_get = NULL;
1028 }
1029
1030
1031 /*
1032 * Given a secondary structure in dot-bracket notation,
1033 * extract all consecutive unpaired nucleotides as individual
1034 * segments.
1035 * After successful execution, this function returns a list of
1036 * segments and the number of list elements is stored in the
1037 * variable passed as segments_num
1038 */
1039 PRIVATE struct binding_segment *
extract_binding_segments(const char * structure,unsigned int * segments_num)1040 extract_binding_segments(const char *structure,
1041 unsigned int *segments_num)
1042 {
1043 struct binding_segment *segments;
1044 char *loops;
1045 unsigned int n, pos, start, segments_length;
1046
1047 segments = NULL;
1048 n = strlen(structure);
1049 loops = vrna_db_to_element_string(structure);
1050
1051 *segments_num = 0;
1052 segments_length = 15;
1053 segments = (struct binding_segment *)vrna_alloc(sizeof(struct binding_segment) *
1054 segments_length);
1055
1056 pos = 1;
1057
1058 /* extract segments that possibly harbor bound ligands */
1059 while (pos <= n) {
1060 /* skip uppercase encodings, i.e. paired nucleotides */
1061 for (; isupper(loops[pos - 1]) && (pos <= n); pos++);
1062
1063 /* no more unpaired segments */
1064 if (pos > n)
1065 break;
1066
1067 start = pos;
1068
1069 /* find next uppercase encoding, i.e. paired nucleotides */
1070 for (; islower(loops[pos - 1]) && (pos <= n); pos++);
1071
1072 segments[(*segments_num)].start = start;
1073 segments[(*segments_num)].end = pos - 1;
1074 segments[(*segments_num)].type = 0;
1075
1076 if (loops[start - 1] == 'e')
1077 segments[(*segments_num)].type = VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP;
1078 else if (loops[start - 1] == 'h')
1079 segments[(*segments_num)].type = VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP;
1080 else if (loops[start - 1] == 'i')
1081 segments[(*segments_num)].type = VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP;
1082 else if (loops[start - 1] == 'm')
1083 segments[(*segments_num)].type = VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP;
1084
1085 (*segments_num)++;
1086
1087 /* resize memory if necessary */
1088 if ((*segments_num) == segments_length) {
1089 segments_length *= 1.4;
1090 segments = (struct binding_segment *)vrna_realloc(segments,
1091 sizeof(struct binding_segment) *
1092 segments_length);
1093 }
1094 }
1095
1096 segments = (struct binding_segment *)vrna_realloc(segments,
1097 sizeof(struct binding_segment) *
1098 (*segments_num));
1099
1100 free(loops);
1101
1102 return segments;
1103 }
1104
1105
1106 PRIVATE void
fill_MFE_matrix(vrna_fold_compound_t * fc,int * mx,unsigned int from,unsigned int to,unsigned int type)1107 fill_MFE_matrix(vrna_fold_compound_t *fc,
1108 int *mx,
1109 unsigned int from,
1110 unsigned int to,
1111 unsigned int type)
1112 {
1113 unsigned int i, d, m;
1114 int u, e, ee;
1115 vrna_ud_t *domains_up;
1116
1117 domains_up = fc->domains_up;
1118
1119 e = 0;
1120 for (m = 0; m < domains_up->uniq_motif_count; m++) {
1121 if (domains_up->uniq_motif_size[m] == 1) {
1122 ee = domains_up->energy_cb(fc,
1123 to,
1124 to,
1125 type | VRNA_UNSTRUCTURED_DOMAIN_MOTIF,
1126 domains_up->data);
1127 e = MIN2(e, ee);
1128 }
1129 }
1130
1131 mx[to] = e;
1132
1133 for (d = 2, i = to - 1; i >= from; i--, d++) {
1134 e = mx[i + 1];
1135
1136 for (m = 0; m < domains_up->uniq_motif_count; m++) {
1137 u = domains_up->uniq_motif_size[m];
1138 if (u <= d) {
1139 ee = domains_up->energy_cb(fc,
1140 i,
1141 i + u - 1,
1142 type | VRNA_UNSTRUCTURED_DOMAIN_MOTIF,
1143 domains_up->data);
1144
1145 if (u < d)
1146 ee += mx[i + u];
1147
1148 e = MIN2(e, ee);
1149 }
1150 }
1151 mx[i] = e;
1152 }
1153 }
1154
1155
1156 PRIVATE vrna_ud_motif_t *
backtrack_MFE_matrix(vrna_fold_compound_t * fc,int * mx,unsigned int from,unsigned int to,unsigned int type)1157 backtrack_MFE_matrix(vrna_fold_compound_t *fc,
1158 int *mx,
1159 unsigned int from,
1160 unsigned int to,
1161 unsigned int type)
1162 {
1163 unsigned int d, i, k, m, u, motif_cnt, motif_size;
1164 int e, ee, eee;
1165 vrna_ud_t *domains_up;
1166 vrna_ud_motif_t *motif_list;
1167
1168 domains_up = fc->domains_up;
1169 motif_cnt = 0;
1170 motif_size = 10;
1171 motif_list = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * (motif_size + 1));
1172
1173 for (d = to - from + 1, i = from; i < to; ) {
1174 e = mx[i];
1175 ee = mx[i + 1];
1176
1177 if (e == ee) {
1178 i++;
1179 d--;
1180 continue;
1181 }
1182
1183 for (m = 0; m < domains_up->uniq_motif_count; m++) {
1184 u = domains_up->uniq_motif_size[m];
1185
1186 if (u <= d) {
1187 eee = ee = domains_up->energy_cb(fc,
1188 i,
1189 i + u - 1,
1190 type | VRNA_UNSTRUCTURED_DOMAIN_MOTIF,
1191 domains_up->data);
1192
1193 if (ee != INF) {
1194 if (u < d)
1195 ee += mx[i + u];
1196
1197 if (e == ee) {
1198 /* determine actual motif number and add this motif to list */
1199 for (k = 0; k < domains_up->motif_count; k++)
1200 if ((domains_up->motif_type[k] & type) && (domains_up->motif_size[k] == u))
1201 if (eee == (int)roundf(domains_up->motif_en[k] * 100.))
1202 break;
1203
1204 motif_list[motif_cnt].start = i;
1205 motif_list[motif_cnt].number = k;
1206 motif_cnt++;
1207
1208 if (motif_cnt == motif_size) {
1209 motif_size *= 1.4;
1210 motif_list = (vrna_ud_motif_t *)vrna_realloc(motif_list,
1211 sizeof(vrna_ud_motif_t) *
1212 (motif_size + 1));
1213 }
1214
1215 i += u;
1216 d -= u;
1217 break;
1218 }
1219 }
1220 }
1221 }
1222 }
1223
1224 if (i == to) {
1225 e = mx[i];
1226
1227 if (e != 0) {
1228 for (m = 0; m < domains_up->uniq_motif_count; m++) {
1229 if (domains_up->uniq_motif_size[m] == 1) {
1230 ee = domains_up->energy_cb(fc,
1231 i,
1232 i,
1233 type | VRNA_UNSTRUCTURED_DOMAIN_MOTIF,
1234 domains_up->data);
1235
1236 if (e == ee) {
1237 /* determine actual motif number and add this motif to list */
1238 for (k = 0; k < domains_up->motif_count; k++)
1239 if ((domains_up->motif_type[k] & type) && (domains_up->motif_size[k] == 1))
1240 if (ee == (int)roundf(domains_up->motif_en[k] * 100.))
1241 break;
1242
1243 motif_list[motif_cnt].start = i;
1244 motif_list[motif_cnt].number = k;
1245 motif_cnt++;
1246
1247 if (motif_cnt == motif_size) {
1248 motif_size *= 1.4;
1249 motif_list = (vrna_ud_motif_t *)vrna_realloc(motif_list,
1250 sizeof(vrna_ud_motif_t) *
1251 (motif_size + 1));
1252 }
1253
1254 break;
1255 }
1256 }
1257 }
1258 }
1259 }
1260
1261 if (motif_cnt == 0) {
1262 free(motif_list);
1263 motif_list = NULL;
1264 } else {
1265 motif_list = (vrna_ud_motif_t *)vrna_realloc(motif_list,
1266 sizeof(vrna_ud_motif_t) *
1267 (motif_cnt + 1));
1268 motif_list[motif_cnt].start = 0;
1269 motif_list[motif_cnt].number = -1;
1270 }
1271
1272 return motif_list;
1273 }
1274
1275
1276 PRIVATE vrna_ud_motif_t **
backtrack_MFE_matrix_exhaustive(vrna_fold_compound_t * fc,int * mx,unsigned int from,unsigned int to,unsigned int type)1277 backtrack_MFE_matrix_exhaustive(vrna_fold_compound_t *fc,
1278 int *mx,
1279 unsigned int from,
1280 unsigned int to,
1281 unsigned int type)
1282 {
1283 vrna_ud_motif_t **motif_lists, *local_list;
1284 vrna_ud_t *domains_up;
1285
1286 domains_up = fc->domains_up;
1287
1288 unsigned int i, k, m, u, motif_list_size, motif_list_num, bt_stack_size, bt_stack_pos,
1289 local_cnt, local_size;
1290 int e, ee;
1291 struct ud_bt_stack *bt_stack;
1292
1293 /* prepare motif list */
1294 motif_list_size = 10;
1295 motif_list_num = 0;
1296 motif_lists = (vrna_ud_motif_t **)vrna_alloc(sizeof(vrna_ud_motif_t *) *
1297 (motif_list_size + 1));
1298
1299 /* prepare backtrack stack */
1300 bt_stack_pos = 0;
1301 bt_stack_size = 10;
1302 bt_stack = (struct ud_bt_stack *)vrna_alloc(sizeof(struct ud_bt_stack) * bt_stack_size);
1303
1304 /* push start condition to backtrack stack */
1305 bt_stack[bt_stack_pos].from = from;
1306 bt_stack[bt_stack_pos].motif_size = 10;
1307 bt_stack[bt_stack_pos].motifs = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * 10);
1308 bt_stack[bt_stack_pos].motif_cnt = 0;
1309 bt_stack_pos++;
1310
1311 /* process backtrack stack */
1312 while (bt_stack_pos > 0) {
1313 /* pop condition from stack */
1314 bt_stack_pos--;
1315 i = bt_stack[bt_stack_pos].from;
1316 local_list = bt_stack[bt_stack_pos].motifs;
1317 local_cnt = bt_stack[bt_stack_pos].motif_cnt;
1318 local_size = bt_stack[bt_stack_pos].motif_size;
1319
1320 if (i > to) {
1321 /* store result */
1322 if (local_list) {
1323 local_list = (vrna_ud_motif_t *)vrna_realloc(local_list,
1324 sizeof(vrna_ud_motif_t *) *
1325 (local_cnt + 1));
1326 local_list[local_cnt].start = 0;
1327 local_list[local_cnt].number = -1;
1328
1329 motif_lists[motif_list_num++] = local_list;
1330
1331 if (motif_list_num == motif_list_size) {
1332 motif_list_size *= 1.4;
1333 motif_lists = (vrna_ud_motif_t **)vrna_realloc(motif_lists,
1334 sizeof(vrna_ud_motif_t *) *
1335 (motif_list_size + 1));
1336 }
1337 }
1338
1339 continue;
1340 }
1341
1342 /* backtrack motifs */
1343 e = mx[i];
1344
1345 /* nibble off unpaired nucleotides at 5' end */
1346 while ((i + 1 <= to) && (mx[i + 1] == e))
1347 i++;
1348
1349 /* detect motif */
1350 for (k = 0; k < domains_up->uniq_motif_count; k++) {
1351 u = domains_up->uniq_motif_size[k];
1352 if (i + u - 1 <= to) {
1353 ee = domains_up->energy_cb(fc,
1354 i,
1355 i + u - 1,
1356 type | VRNA_UNSTRUCTURED_DOMAIN_MOTIF,
1357 domains_up->data);
1358 if (e == ee) {
1359 /* clone current list */
1360 vrna_ud_motif_t *ptr = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) *
1361 (local_cnt + 2));
1362 memcpy(ptr, local_list, sizeof(vrna_ud_motif_t) * local_cnt);
1363
1364 /* determine actual motif number and add this motif to list */
1365 for (m = 0; m < domains_up->uniq_motif_count; m++)
1366 if ((domains_up->motif_type[m] & type) && (domains_up->motif_size[m] == u)) {
1367 if (ee == (int)roundf(domains_up->motif_en[m] * 100.)) {
1368 k = m;
1369 break;
1370 }
1371 }
1372
1373 ptr[local_cnt].start = i;
1374 ptr[local_cnt].number = m;
1375
1376 /* push back to stack */
1377 bt_stack[bt_stack_pos].from = to + 1; /* mark end of backtracking */
1378 bt_stack[bt_stack_pos].motifs = ptr;
1379 bt_stack[bt_stack_pos].motif_cnt = local_cnt + 1;
1380 bt_stack[bt_stack_pos].motif_size = local_cnt + 2;
1381 bt_stack_pos++;
1382 }
1383
1384 if (i + u - 1 < to) {
1385 if (e == ee + mx[i + u]) {
1386 /* clone current list */
1387 vrna_ud_motif_t *ptr = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) *
1388 (local_cnt + local_size));
1389 memcpy(ptr, local_list, sizeof(vrna_ud_motif_t) * local_cnt);
1390
1391 /* determine actual motif number and add this motif to list */
1392 for (m = 0; m < domains_up->uniq_motif_count; m++)
1393 if ((domains_up->motif_type[m] & type) && (domains_up->motif_size[m] == u)) {
1394 if (ee == (int)roundf(domains_up->motif_en[m] * 100.)) {
1395 k = m;
1396 break;
1397 }
1398 }
1399
1400 ptr[local_cnt].start = i;
1401 ptr[local_cnt].number = m;
1402
1403 /* push back to stack */
1404 bt_stack[bt_stack_pos].from = i + u;
1405 bt_stack[bt_stack_pos].motifs = ptr;
1406 bt_stack[bt_stack_pos].motif_cnt = local_cnt + 1;
1407 bt_stack[bt_stack_pos].motif_size = local_cnt + local_size;
1408 bt_stack_pos++;
1409 }
1410 }
1411 }
1412 }
1413
1414 free(local_list);
1415 }
1416
1417 if (motif_list_num == 0) {
1418 free(motif_lists);
1419 motif_lists = NULL;
1420 } else {
1421 motif_lists = (vrna_ud_motif_t **)vrna_realloc(motif_lists,
1422 sizeof(vrna_ud_motif_t *) *
1423 (motif_list_num + 1));
1424 motif_lists[motif_list_num] = NULL;
1425 }
1426
1427 free(bt_stack);
1428
1429 return motif_lists;
1430 }
1431
1432
1433 PRIVATE void
fill_MEA_matrix(vrna_fold_compound_t * fc,float * mx,unsigned int from,unsigned int to,float * pu,unsigned int type)1434 fill_MEA_matrix(vrna_fold_compound_t *fc,
1435 float *mx,
1436 unsigned int from,
1437 unsigned int to,
1438 float *pu,
1439 unsigned int type)
1440 {
1441 unsigned int i, d, m, u;
1442 float ea, p;
1443 vrna_ud_t *domains_up;
1444
1445 domains_up = fc->domains_up;
1446
1447 ea = pu[to];
1448
1449 for (m = 0; m < domains_up->motif_count; m++) {
1450 if (!(type & domains_up->motif_type[m]))
1451 continue;
1452
1453 if (domains_up->motif_size[m] == 1) {
1454 p = domains_up->probs_get(fc, to, to, type, m, domains_up->data);
1455 ea = MAX2(ea, p);
1456 }
1457 }
1458
1459 mx[to] = ea;
1460
1461 for (d = 2, i = to - 1; i >= from; i--, d++) {
1462 ea = mx[i + 1] + pu[i];
1463
1464 for (m = 0; m < domains_up->motif_count; m++) {
1465 if (!(type & domains_up->motif_type[m]))
1466 continue;
1467
1468 u = domains_up->motif_size[m];
1469 if (u <= d) {
1470 p = domains_up->probs_get(fc, i, i + u - 1, type, m, domains_up->data);
1471 if (p > 0) {
1472 p *= u;
1473
1474 if (u < d)
1475 p += mx[i + u];
1476
1477 ea = MAX2(ea, p);
1478 }
1479 }
1480 }
1481 mx[i] = ea;
1482 }
1483 }
1484
1485
1486 PRIVATE vrna_ud_motif_t *
backtrack_MEA_matrix(vrna_fold_compound_t * fc,float * mx,unsigned int from,unsigned int to,float * pu,unsigned int type)1487 backtrack_MEA_matrix(vrna_fold_compound_t *fc,
1488 float *mx,
1489 unsigned int from,
1490 unsigned int to,
1491 float *pu,
1492 unsigned int type)
1493 {
1494 unsigned int i, u, m, d, motif_cnt, motif_size, found;
1495 float mea, p, prec;
1496 vrna_ud_t *domains_up;
1497 vrna_ud_motif_t *motif_list;
1498
1499 domains_up = fc->domains_up;
1500 motif_cnt = 0;
1501 motif_size = 10;
1502 motif_list = (vrna_ud_motif_t *)vrna_alloc(sizeof(vrna_ud_motif_t) * (motif_size + 1));
1503
1504 for (d = to - from + 1, i = from; i <= to; ) {
1505 prec = FLT_EPSILON * mx[i];
1506 mea = mx[i];
1507 p = pu[i];
1508 found = 0;
1509
1510 if (i < to)
1511 p += mx[i + 1];
1512
1513 if (mea <= p + prec) {
1514 /* nibble-off unpaired nucleotides */
1515 i++;
1516 d--;
1517 continue;
1518 }
1519
1520 for (m = 0; m < domains_up->motif_count; m++) {
1521 if (!(type & domains_up->motif_type[m]))
1522 continue;
1523
1524 u = domains_up->motif_size[m];
1525 if (u <= d) {
1526 p = domains_up->probs_get(fc, i, i + u - 1, type, m, domains_up->data);
1527 if (p > 0.) {
1528 p *= u;
1529
1530 if (u < d)
1531 p += mx[i + u];
1532
1533 if (mea <= p + prec) {
1534 motif_list[motif_cnt].start = i;
1535 motif_list[motif_cnt].number = m;
1536 motif_cnt++;
1537
1538 if (motif_cnt == motif_size) {
1539 motif_size *= 1.4;
1540 motif_list = (vrna_ud_motif_t *)vrna_realloc(motif_list,
1541 sizeof(vrna_ud_motif_t) *
1542 (motif_size + 1));
1543 }
1544
1545 i += u;
1546 d -= u;
1547
1548 found = 1;
1549 break;
1550 }
1551 }
1552 }
1553 }
1554
1555 if (!found) {
1556 vrna_message_warning("Backtracking failed in unstructured domains MEA\n");
1557 motif_cnt = 0;
1558 break;
1559 }
1560 }
1561
1562 if (motif_cnt == 0) {
1563 free(motif_list);
1564 motif_list = NULL;
1565 } else {
1566 motif_list = (vrna_ud_motif_t *)vrna_realloc(motif_list,
1567 sizeof(vrna_ud_motif_t) *
1568 (motif_cnt + 1));
1569 motif_list[motif_cnt].start = 0;
1570 motif_list[motif_cnt].number = -1;
1571 }
1572
1573 return motif_list;
1574 }
1575
1576
1577 /*
1578 **********************************
1579 * Default implementation for
1580 * ligand binding to unpaired
1581 * stretches follows below
1582 **********************************
1583 */
1584 PRIVATE void
add_ligand_motif(vrna_fold_compound_t * vc,const char * motif,double motif_en,const char * motif_name,unsigned int loop_type)1585 add_ligand_motif(vrna_fold_compound_t *vc,
1586 const char *motif,
1587 double motif_en,
1588 const char *motif_name,
1589 unsigned int loop_type)
1590 {
1591 unsigned int i, n, same_size;
1592 vrna_ud_t *ud;
1593
1594 n = (unsigned int)strlen(motif);
1595 ud = vc->domains_up;
1596
1597 /* First, we update the list of unique motif lengths */
1598 for (same_size = i = 0; i < ud->uniq_motif_count; i++) {
1599 if (ud->uniq_motif_size[i] == n) {
1600 same_size = 1;
1601 break;
1602 }
1603 }
1604
1605 if (!same_size) {
1606 ud->uniq_motif_size = (unsigned int *)vrna_realloc(ud->uniq_motif_size,
1607 sizeof(unsigned int *) *
1608 (ud->uniq_motif_count + 1));
1609 ud->uniq_motif_size[ud->uniq_motif_count] = n;
1610 ud->uniq_motif_count++;
1611 }
1612
1613 /* And finally, we store the motif */
1614 ud->motif = (char **)vrna_realloc(ud->motif,
1615 sizeof(char *) *
1616 (ud->motif_count + 1));
1617 ud->motif[ud->motif_count] = strdup(motif);
1618
1619 ud->motif_name = (char **)vrna_realloc(ud->motif_name,
1620 sizeof(char *) *
1621 (ud->motif_count + 1));
1622 ud->motif_name[ud->motif_count] = (motif_name) ? strdup(motif) : NULL;
1623
1624 ud->motif_size = (unsigned int *)vrna_realloc(ud->motif_size,
1625 sizeof(unsigned int *) *
1626 (ud->motif_count + 1));
1627 ud->motif_size[ud->motif_count] = n;
1628
1629 ud->motif_en = (double *)vrna_realloc(ud->motif_en,
1630 sizeof(double) *
1631 (ud->motif_count + 1));
1632 ud->motif_en[ud->motif_count] = motif_en;
1633
1634 ud->motif_type = (unsigned int *)vrna_realloc(ud->motif_type,
1635 sizeof(double) *
1636 (ud->motif_count + 1));
1637 ud->motif_type[ud->motif_count] = loop_type;
1638
1639 ud->motif_count++;
1640 }
1641
1642
1643 PRIVATE void
remove_default_data(void * d)1644 remove_default_data(void *d)
1645 {
1646 struct ligands_up_data_default *data;
1647
1648 data = (struct ligands_up_data_default *)d;
1649
1650 free_default_data_matrices(data);
1651 free_default_data_exp_matrices(data);
1652 free_default_data(data);
1653
1654 free(data);
1655 }
1656
1657
1658 PRIVATE void
free_default_data(struct ligands_up_data_default * data)1659 free_default_data(struct ligands_up_data_default *data)
1660 {
1661 int i;
1662
1663 if (data->motif_list_ext) {
1664 for (i = 0; i <= data->n; i++)
1665 free(data->motif_list_ext[i]);
1666 free(data->motif_list_ext);
1667 }
1668
1669 if (data->motif_list_hp) {
1670 for (i = 0; i <= data->n; i++)
1671 free(data->motif_list_hp[i]);
1672 free(data->motif_list_hp);
1673 }
1674
1675 if (data->motif_list_int) {
1676 for (i = 0; i <= data->n; i++)
1677 free(data->motif_list_int[i]);
1678 free(data->motif_list_int);
1679 }
1680
1681 if (data->motif_list_mb) {
1682 for (i = 0; i <= data->n; i++)
1683 free(data->motif_list_mb[i]);
1684 free(data->motif_list_mb);
1685 }
1686
1687 free(data->len);
1688 free(data->dG);
1689 free(data->exp_dG);
1690 }
1691
1692
1693 PRIVATE void
free_default_data_matrices(struct ligands_up_data_default * data)1694 free_default_data_matrices(struct ligands_up_data_default *data)
1695 {
1696 /* the following four pointers may point to the same memory */
1697 if (data->energies_ext) {
1698 /* check whether one of the other b* points to the same memory location */
1699 if (data->energies_ext == data->energies_hp)
1700 data->energies_hp = NULL;
1701
1702 if (data->energies_ext == data->energies_int)
1703 data->energies_int = NULL;
1704
1705 if (data->energies_ext == data->energies_mb)
1706 data->energies_mb = NULL;
1707
1708 free(data->energies_ext);
1709 data->energies_ext = NULL;
1710 }
1711
1712 if (data->energies_hp) {
1713 /* check whether one of the other b* points to the same memory location */
1714 if (data->energies_hp == data->energies_int)
1715 data->energies_int = NULL;
1716
1717 if (data->energies_hp == data->energies_mb)
1718 data->energies_mb = NULL;
1719
1720 free(data->energies_hp);
1721 data->energies_hp = NULL;
1722 }
1723
1724 if (data->energies_int) {
1725 /* check whether one of the other b* points to the same memory location */
1726 if (data->energies_int == data->energies_mb)
1727 data->energies_mb = NULL;
1728
1729 free(data->energies_int);
1730 data->energies_int = NULL;
1731 }
1732
1733 free(data->energies_mb);
1734 data->energies_mb = NULL;
1735 }
1736
1737
1738 PRIVATE void
free_default_data_exp_matrices(struct ligands_up_data_default * data)1739 free_default_data_exp_matrices(struct ligands_up_data_default *data)
1740 {
1741 int i;
1742
1743 /* the following four pointers may point to the same memory */
1744 if (data->exp_energies_ext) {
1745 /* check whether one of the other b* points to the same memory location */
1746 if (data->exp_energies_ext == data->exp_energies_hp)
1747 data->exp_energies_hp = NULL;
1748
1749 if (data->exp_energies_ext == data->exp_energies_int)
1750 data->exp_energies_int = NULL;
1751
1752 if (data->exp_energies_ext == data->exp_energies_mb)
1753 data->exp_energies_mb = NULL;
1754
1755 free(data->exp_energies_ext);
1756 data->exp_energies_ext = NULL;
1757 }
1758
1759 if (data->exp_energies_hp) {
1760 /* check whether one of the other b* points to the same memory location */
1761 if (data->exp_energies_hp == data->exp_energies_int)
1762 data->exp_energies_int = NULL;
1763
1764 if (data->exp_energies_hp == data->exp_energies_mb)
1765 data->exp_energies_mb = NULL;
1766
1767 free(data->exp_energies_hp);
1768 data->exp_energies_hp = NULL;
1769 }
1770
1771 if (data->exp_energies_int) {
1772 /* check whether one of the other b* points to the same memory location */
1773 if (data->exp_energies_int == data->exp_energies_mb)
1774 data->exp_energies_mb = NULL;
1775
1776 free(data->exp_energies_int);
1777 data->exp_energies_int = NULL;
1778 }
1779
1780 free(data->exp_energies_mb);
1781 data->exp_energies_mb = NULL;
1782
1783 if (data->outside_ext)
1784 for (i = 0; i <= data->n; i++)
1785 if (data->outside_ext[i])
1786 free(data->outside_ext[i]);
1787
1788 free(data->outside_ext);
1789 free(data->outside_ext_count);
1790
1791 if (data->outside_hp)
1792 for (i = 0; i <= data->n; i++)
1793 if (data->outside_hp[i])
1794 free(data->outside_hp[i]);
1795
1796 free(data->outside_hp);
1797 free(data->outside_hp_count);
1798
1799 if (data->outside_int)
1800 for (i = 0; i <= data->n; i++)
1801 if (data->outside_int[i])
1802 free(data->outside_int[i]);
1803
1804 free(data->outside_int);
1805 free(data->outside_int_count);
1806
1807 if (data->outside_mb)
1808 for (i = 0; i <= data->n; i++)
1809 if (data->outside_mb[i])
1810 free(data->outside_mb[i]);
1811
1812 free(data->outside_mb);
1813 free(data->outside_mb_count);
1814 }
1815
1816
1817 PRIVATE int *
get_motifs(vrna_fold_compound_t * vc,int i,unsigned int loop_type)1818 get_motifs(vrna_fold_compound_t *vc,
1819 int i,
1820 unsigned int loop_type)
1821 {
1822 int k, j, u, n, *motif_list, cnt, guess;
1823 char *sequence;
1824 vrna_ud_t *domains_up;
1825
1826 sequence = vc->sequence;
1827 n = (int)vc->length;
1828 domains_up = vc->domains_up;
1829
1830 cnt = 0;
1831 guess = domains_up->motif_count;
1832 motif_list = (int *)vrna_alloc(sizeof(int) * (guess + 1));
1833
1834 /* collect list of motif numbers we find that start at position i */
1835 for (k = 0; k < domains_up->motif_count; k++) {
1836 if (!(domains_up->motif_type[k] & loop_type))
1837 continue;
1838
1839 j = i + domains_up->motif_size[k] - 1;
1840 if (j <= n) {
1841 /* only consider motif that does not exceed sequence length (does not work for circular RNAs!) */
1842 for (u = i; u <= j; u++)
1843 if (!vrna_nucleotide_IUPAC_identity(sequence[u - 1], domains_up->motif[k][u - i]))
1844 break;
1845
1846 if (u > j) /* got a complete motif match */
1847 motif_list[cnt++] = k;
1848 }
1849 }
1850
1851 if (cnt == 0) {
1852 free(motif_list);
1853 return NULL;
1854 }
1855
1856 motif_list = (int *)vrna_realloc(motif_list, sizeof(int) * (cnt + 1));
1857 motif_list[cnt] = -1; /* end of list marker */
1858
1859 return motif_list;
1860 }
1861
1862
1863 static void
annotate_ud(vrna_fold_compound_t * vc,int start,int end,char l,vrna_ud_motif_t ** list,int * list_size,int * list_pos)1864 annotate_ud(vrna_fold_compound_t *vc,
1865 int start,
1866 int end,
1867 char l,
1868 vrna_ud_motif_t **list,
1869 int *list_size,
1870 int *list_pos)
1871 {
1872 int i, j;
1873
1874 /* get motifs in segment [start,end] */
1875 for (i = start; i <= end; i++) {
1876 unsigned int type = 0;
1877 switch (l) {
1878 case 'e':
1879 type = VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP;
1880 break;
1881 case 'h':
1882 type = VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP;
1883 break;
1884 case 'i':
1885 type = VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP;
1886 break;
1887 case 'm':
1888 type = VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP;
1889 break;
1890 }
1891
1892 int *m = vrna_ud_get_motifs_at(vc, i, type);
1893 if (m) {
1894 for (j = 0; m[j] != -1; j++) {
1895 int size = vc->domains_up->motif_size[m[j]];
1896
1897 if (i + size - 1 <= end) {
1898 if (*list_pos == *list_size) {
1899 *list_size *= 1.2;
1900 *list = (vrna_ud_motif_t *)vrna_realloc(*list,
1901 sizeof(vrna_ud_motif_t) *
1902 (*list_size));
1903 }
1904
1905 (*list)[*list_pos].start = i;
1906 (*list)[*list_pos].number = m[j];
1907 (*list_pos)++;
1908 }
1909 }
1910 }
1911
1912 free(m);
1913 }
1914 }
1915
1916
1917 PRIVATE void
prepare_matrices(vrna_fold_compound_t * vc,struct ligands_up_data_default * data)1918 prepare_matrices(vrna_fold_compound_t *vc,
1919 struct ligands_up_data_default *data)
1920 {
1921 int i, j, k, n, size;
1922 vrna_ud_t *domains_up;
1923
1924 n = (int)vc->length;
1925 size = ((n + 1) * (n + 2)) / 2 + 1;
1926 domains_up = vc->domains_up;
1927
1928 free_default_data_matrices(data);
1929
1930 /* here we save memory by re-using DP matrices */
1931 unsigned int lt[4] = {
1932 VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP,
1933 VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP,
1934 VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP,
1935 VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP
1936 };
1937 int **m[4], *mx;
1938 m[0] = &data->energies_ext;
1939 m[1] = &data->energies_hp;
1940 m[2] = &data->energies_int;
1941 m[3] = &data->energies_mb;
1942
1943 for (i = 0; i < 4; i++) {
1944 unsigned int *col, *col2;
1945 if (*(m[i]))
1946 continue;
1947
1948 mx = (int *)vrna_alloc(sizeof(int) * size);
1949 col = (unsigned int *)vrna_alloc(sizeof(unsigned int) * domains_up->motif_count);
1950 col2 = (unsigned int *)vrna_alloc(sizeof(unsigned int) * domains_up->motif_count);
1951 *(m[i]) = mx;
1952
1953 for (k = 0; k < domains_up->motif_count; k++)
1954 col[k] = domains_up->motif_type[k] & lt[i];
1955
1956 /* check if any of the remaining DP matrices can point to the same location */
1957 for (j = i + 1; j < 4; j++) {
1958 for (k = 0; k < domains_up->motif_count; k++) {
1959 col2[k] = domains_up->motif_type[k] & lt[j];
1960 if (col[k] != col2[k])
1961 break;
1962 }
1963 if (k == domains_up->motif_count)
1964 *(m[j]) = mx;
1965 }
1966
1967 free(col);
1968 free(col2);
1969 }
1970 }
1971
1972
1973 PRIVATE void
prepare_exp_matrices(vrna_fold_compound_t * vc,struct ligands_up_data_default * data)1974 prepare_exp_matrices(vrna_fold_compound_t *vc,
1975 struct ligands_up_data_default *data)
1976 {
1977 int i, j, k, n, size;
1978 vrna_ud_t *domains_up;
1979
1980 n = (int)vc->length;
1981 size = ((n + 1) * (n + 2)) / 2 + 1;
1982 domains_up = vc->domains_up;
1983
1984 free_default_data_exp_matrices(data);
1985
1986 /* here we save memory by re-using DP matrices */
1987 unsigned int lt[4] = {
1988 VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP,
1989 VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP,
1990 VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP,
1991 VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP
1992 };
1993 FLT_OR_DBL **m[4], *mx;
1994 m[0] = &data->exp_energies_ext;
1995 m[1] = &data->exp_energies_hp;
1996 m[2] = &data->exp_energies_int;
1997 m[3] = &data->exp_energies_mb;
1998
1999 for (i = 0; i < 4; i++) {
2000 unsigned int *col, *col2;
2001 if (*(m[i]))
2002 continue;
2003
2004 mx = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
2005 col = (unsigned int *)vrna_alloc(sizeof(unsigned int) * domains_up->motif_count);
2006 col2 = (unsigned int *)vrna_alloc(sizeof(unsigned int) * domains_up->motif_count);
2007 *(m[i]) = mx;
2008
2009 for (k = 0; k < domains_up->motif_count; k++)
2010 col[k] = domains_up->motif_type[k] & lt[i];
2011
2012 /* check if any of the remaining DP matrices can point to the same location */
2013 for (j = i + 1; j < 4; j++) {
2014 for (k = 0; k < domains_up->motif_count; k++) {
2015 col2[k] = domains_up->motif_type[k] & lt[j];
2016 if (col[k] != col2[k])
2017 break;
2018 }
2019 if (k == domains_up->motif_count)
2020 *(m[j]) = mx;
2021 }
2022
2023 free(col);
2024 free(col2);
2025 }
2026
2027 /* now prepate memory for outside partition function */
2028 data->outside_ext = (struct default_outside **)vrna_alloc(
2029 sizeof(struct default_outside *) * (n + 2));
2030 data->outside_hp = (struct default_outside **)vrna_alloc(
2031 sizeof(struct default_outside *) * (n + 2));
2032 data->outside_int = (struct default_outside **)vrna_alloc(
2033 sizeof(struct default_outside *) * (n + 2));
2034 data->outside_mb = (struct default_outside **)vrna_alloc(
2035 sizeof(struct default_outside *) * (n + 2));
2036 data->outside_ext_count = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 2));
2037 data->outside_hp_count = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 2));
2038 data->outside_int_count = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 2));
2039 data->outside_mb_count = (unsigned int *)vrna_alloc(sizeof(unsigned int) * (n + 2));
2040 }
2041
2042
2043 PRIVATE void
prepare_default_data(vrna_fold_compound_t * vc,struct ligands_up_data_default * data)2044 prepare_default_data(vrna_fold_compound_t *vc,
2045 struct ligands_up_data_default *data)
2046 {
2047 int i, n;
2048 vrna_ud_t *domains_up;
2049
2050 n = (int)vc->length;
2051 domains_up = vc->domains_up;
2052
2053 data->n = n;
2054 free_default_data(data);
2055
2056 /*
2057 * create motif_list for associating a nucleotide position with all
2058 * motifs that start there
2059 */
2060 data->motif_list_ext = (int **)vrna_alloc(sizeof(int *) * (n + 1));
2061 data->motif_list_hp = (int **)vrna_alloc(sizeof(int *) * (n + 1));
2062 data->motif_list_int = (int **)vrna_alloc(sizeof(int *) * (n + 1));
2063 data->motif_list_mb = (int **)vrna_alloc(sizeof(int *) * (n + 1));
2064 data->motif_list_ext[0] = NULL;
2065 data->motif_list_hp[0] = NULL;
2066 data->motif_list_int[0] = NULL;
2067 data->motif_list_mb[0] = NULL;
2068 for (i = 1; i <= n; i++) {
2069 data->motif_list_ext[i] = get_motifs(vc, i, VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP);
2070 data->motif_list_hp[i] = get_motifs(vc, i, VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP);
2071 data->motif_list_int[i] = get_motifs(vc, i, VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP);
2072 data->motif_list_mb[i] = get_motifs(vc, i, VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP);
2073 }
2074
2075 data->default_cb[VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP] = default_exp_energy_ext_motif;
2076 data->default_cb[VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP] = default_exp_energy_hp_motif;
2077 data->default_cb[VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP] = default_exp_energy_int_motif;
2078 data->default_cb[VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP] = default_exp_energy_mb_motif;
2079
2080 /* store length of motifs in 'data' */
2081 data->len = (int *)vrna_alloc(sizeof(int) * domains_up->motif_count);
2082 for (i = 0; i < domains_up->motif_count; i++)
2083 data->len[i] = domains_up->motif_size[i];
2084
2085 /* precompute energy contributions of the motifs */
2086 data->dG = (int *)vrna_alloc(sizeof(int) * domains_up->motif_count);
2087 for (i = 0; i < domains_up->motif_count; i++)
2088 data->dG[i] = (int)roundf(domains_up->motif_en[i] * 100.);
2089 }
2090
2091
2092 PRIVATE void
default_prod_rule(vrna_fold_compound_t * vc,void * d)2093 default_prod_rule(vrna_fold_compound_t *vc,
2094 void *d)
2095 {
2096 int i, j, k, l, u, n, e_ext, e_hp, e_int, e_mb, en, en2, *idx;
2097 struct ligands_up_data_default *data;
2098
2099 int *energies_ext;
2100 int *energies_hp;
2101 int *energies_int;
2102 int *energies_mb;
2103
2104 n = (int)vc->length;
2105 idx = vc->jindx;
2106 data = (struct ligands_up_data_default *)d;
2107
2108 prepare_default_data(vc, data);
2109 prepare_matrices(vc, data);
2110
2111 energies_ext = data->energies_ext;
2112 energies_hp = data->energies_hp;
2113 energies_int = data->energies_int;
2114 energies_mb = data->energies_mb;
2115
2116 /* now we can start to fill the DP matrices */
2117 for (i = n; i > 0; i--) {
2118 int *list_ext = data->motif_list_ext[i];
2119 int *list_hp = data->motif_list_hp[i];
2120 int *list_int = data->motif_list_int[i];
2121 int *list_mb = data->motif_list_mb[i];
2122 for (j = i; j <= n; j++) {
2123 if (i < j) {
2124 e_ext = energies_ext[idx[j] + i + 1];
2125 e_hp = energies_hp[idx[j] + i + 1];
2126 e_int = energies_int[idx[j] + i + 1];
2127 e_mb = energies_mb[idx[j] + i + 1];
2128 } else {
2129 e_ext = INF;
2130 e_hp = INF;
2131 e_int = INF;
2132 e_mb = INF;
2133 }
2134
2135 if (list_ext) {
2136 for (k = 0; -1 != (l = list_ext[k]); k++) {
2137 u = i + data->len[l] - 1;
2138 en = data->dG[l];
2139 if (u <= j) {
2140 e_ext = MIN2(e_ext, en);
2141 if (u < j) {
2142 en2 = en + energies_ext[idx[j] + u + 1];
2143 e_ext = MIN2(e_ext, en2);
2144 }
2145 }
2146 }
2147 }
2148
2149 if (list_hp) {
2150 for (k = 0; -1 != (l = list_hp[k]); k++) {
2151 u = i + data->len[l] - 1;
2152 en = data->dG[l];
2153 if (u <= j) {
2154 e_hp = MIN2(e_hp, en);
2155 if (u < j) {
2156 en2 = en + energies_hp[idx[j] + u + 1];
2157 e_hp = MIN2(e_hp, en2);
2158 }
2159 }
2160 }
2161 }
2162
2163 if (list_int) {
2164 for (k = 0; -1 != (l = list_int[k]); k++) {
2165 u = i + data->len[l] - 1;
2166 en = data->dG[l];
2167 if (u <= j) {
2168 e_int = MIN2(e_int, en);
2169 if (u < j) {
2170 en2 = en + energies_int[idx[j] + u + 1];
2171 e_int = MIN2(e_int, en2);
2172 }
2173 }
2174 }
2175 }
2176
2177 if (list_mb) {
2178 for (k = 0; -1 != (l = list_mb[k]); k++) {
2179 u = i + data->len[l] - 1;
2180 en = data->dG[l];
2181 if (u <= j) {
2182 e_mb = MIN2(e_mb, en);
2183 if (u < j) {
2184 en2 = en + energies_mb[idx[j] + u + 1];
2185 e_mb = MIN2(e_mb, en2);
2186 }
2187 }
2188 }
2189 }
2190
2191 energies_ext[idx[j] + i] = e_ext;
2192 energies_hp[idx[j] + i] = e_hp;
2193 energies_int[idx[j] + i] = e_int;
2194 energies_mb[idx[j] + i] = e_mb;
2195 }
2196 }
2197 }
2198
2199
2200 PRIVATE void
default_exp_prod_rule(vrna_fold_compound_t * vc,void * d)2201 default_exp_prod_rule(vrna_fold_compound_t *vc,
2202 void *d)
2203 {
2204 int i, j, k, l, u, n, *idx;
2205 FLT_OR_DBL q_ext, q_hp, q_int, q_mb, q;
2206 vrna_ud_t *domains_up;
2207 struct ligands_up_data_default *data;
2208
2209 FLT_OR_DBL *exp_energies_ext;
2210 FLT_OR_DBL *exp_energies_hp;
2211 FLT_OR_DBL *exp_energies_int;
2212 FLT_OR_DBL *exp_energies_mb;
2213 double kT;
2214
2215 n = (int)vc->length;
2216 idx = vc->iindx;
2217 domains_up = vc->domains_up;
2218 data = (struct ligands_up_data_default *)d;
2219 kT = vc->exp_params->kT;
2220
2221 prepare_default_data(vc, data);
2222 prepare_exp_matrices(vc, data);
2223
2224 exp_energies_ext = data->exp_energies_ext;
2225 exp_energies_hp = data->exp_energies_hp;
2226 exp_energies_int = data->exp_energies_int;
2227 exp_energies_mb = data->exp_energies_mb;
2228
2229 data->exp_e_mx[VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP] = data->exp_energies_ext;
2230 data->exp_e_mx[VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP] = data->exp_energies_hp;
2231 data->exp_e_mx[VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP] = data->exp_energies_int;
2232 data->exp_e_mx[VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP] = data->exp_energies_mb;
2233
2234 /* precompute energy contributions of the motifs */
2235 data->exp_dG = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * domains_up->motif_count);
2236 for (i = 0; i < domains_up->motif_count; i++) {
2237 double GT = domains_up->motif_en[i] * 1000.; /* in cal/mol */
2238 data->exp_dG[i] = (FLT_OR_DBL)exp(-GT / kT);
2239 }
2240
2241 /* now we can start to fill the DP matrices */
2242 for (i = n; i > 0; i--) {
2243 int *list_ext = data->motif_list_ext[i];
2244 int *list_hp = data->motif_list_hp[i];
2245 int *list_int = data->motif_list_int[i];
2246 int *list_mb = data->motif_list_mb[i];
2247 for (j = i; j <= n; j++) {
2248 if (i < j) {
2249 q_ext = exp_energies_ext[idx[i + 1] - j];
2250 q_hp = exp_energies_hp[idx[i + 1] - j];
2251 q_int = exp_energies_int[idx[i + 1] - j];
2252 q_mb = exp_energies_mb[idx[i + 1] - j];
2253 } else {
2254 q_ext = 0;
2255 q_hp = 0;
2256 q_int = 0;
2257 q_mb = 0;
2258 }
2259
2260 if (list_ext) {
2261 for (k = 0; -1 != (l = list_ext[k]); k++) {
2262 u = i + data->len[l] - 1;
2263 q = data->exp_dG[l];
2264 if (u <= j) {
2265 q_ext += q;
2266 if (u < j)
2267 q_ext += q * exp_energies_ext[idx[u + 1] - j];
2268 }
2269 }
2270 }
2271
2272 if (list_hp) {
2273 for (k = 0; -1 != (l = list_hp[k]); k++) {
2274 u = i + data->len[l] - 1;
2275 q = data->exp_dG[l];
2276 if (u <= j) {
2277 q_hp += q;
2278 if (u < j)
2279 q_hp += q * exp_energies_hp[idx[u + 1] - j];
2280 }
2281 }
2282 }
2283
2284 if (list_int) {
2285 for (k = 0; -1 != (l = list_int[k]); k++) {
2286 u = i + data->len[l] - 1;
2287 q = data->exp_dG[l];
2288 if (u <= j) {
2289 q_int += q;
2290 if (u < j)
2291 q_int += q * exp_energies_int[idx[u + 1] - j];
2292 }
2293 }
2294 }
2295
2296 if (list_mb) {
2297 for (k = 0; -1 != (l = list_mb[k]); k++) {
2298 u = i + data->len[l] - 1;
2299 q = data->exp_dG[l];
2300 if (u <= j) {
2301 q_mb += q;
2302 if (u < j)
2303 q_mb += q * exp_energies_mb[idx[u + 1] - j];
2304 }
2305 }
2306 }
2307
2308 exp_energies_ext[idx[i] - j] = q_ext;
2309 exp_energies_hp[idx[i] - j] = q_hp;
2310 exp_energies_int[idx[i] - j] = q_int;
2311 exp_energies_mb[idx[i] - j] = q_mb;
2312 }
2313 }
2314 }
2315
2316
2317 PRIVATE int
default_energy(vrna_fold_compound_t * vc,int i,int j,unsigned int loop_type,void * d)2318 default_energy(vrna_fold_compound_t *vc,
2319 int i,
2320 int j,
2321 unsigned int loop_type,
2322 void *d)
2323 {
2324 int en, ij, *idx = vc->jindx;
2325 struct ligands_up_data_default *data = (struct ligands_up_data_default *)d;
2326
2327 en = INF;
2328 ij = idx[j] + i;
2329
2330 if (j < i)
2331 return INF;
2332
2333 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MOTIF) {
2334 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP)
2335 en = default_energy_ext_motif(i, j, data);
2336 else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP)
2337 en = default_energy_hp_motif(i, j, data);
2338 else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP)
2339 en = default_energy_int_motif(i, j, data);
2340 else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP)
2341 en = default_energy_mb_motif(i, j, data);
2342 } else {
2343 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP) {
2344 if (data->energies_ext)
2345 en = data->energies_ext[ij];
2346 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP) {
2347 if (data->energies_hp)
2348 en = data->energies_hp[ij];
2349 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP) {
2350 if (data->energies_int)
2351 en = data->energies_int[ij];
2352 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP) {
2353 if (data->energies_mb)
2354 en = data->energies_mb[ij];
2355 }
2356 }
2357
2358 return en;
2359 }
2360
2361
2362 PRIVATE FLT_OR_DBL
default_exp_energy(vrna_fold_compound_t * vc,int i,int j,unsigned int loop_type,void * d)2363 default_exp_energy(vrna_fold_compound_t *vc,
2364 int i,
2365 int j,
2366 unsigned int loop_type,
2367 void *d)
2368 {
2369 FLT_OR_DBL q;
2370 int ij, *idx;
2371 struct ligands_up_data_default *data;
2372
2373 q = 0;
2374 data = (struct ligands_up_data_default *)d;
2375
2376 if (j < i)
2377 return 0.;
2378
2379 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MOTIF) {
2380 q = data->default_cb[loop_type & ~(VRNA_UNSTRUCTURED_DOMAIN_MOTIF)](i, j, data);
2381 } else {
2382 idx = vc->iindx;
2383 ij = idx[i] - j;
2384 q = data->exp_e_mx[loop_type][ij];
2385 }
2386
2387 return q;
2388 }
2389
2390
2391 PRIVATE int
default_energy_ext_motif(int i,int j,struct ligands_up_data_default * data)2392 default_energy_ext_motif(int i,
2393 int j,
2394 struct ligands_up_data_default *data)
2395 {
2396 int k, m;
2397 int e = INF;
2398
2399 if (data->motif_list_ext[i]) {
2400 k = 0;
2401 while (-1 != (m = data->motif_list_ext[i][k])) {
2402 if ((i + data->len[m] - 1) == j)
2403 e = MIN2(e, data->dG[m]);
2404
2405 k++;
2406 }
2407 }
2408
2409 return e;
2410 }
2411
2412
2413 PRIVATE int
default_energy_hp_motif(int i,int j,struct ligands_up_data_default * data)2414 default_energy_hp_motif(int i,
2415 int j,
2416 struct ligands_up_data_default *data)
2417 {
2418 int k, m;
2419 int e = INF;
2420
2421 if (data->motif_list_hp[i]) {
2422 k = 0;
2423 while (-1 != (m = data->motif_list_hp[i][k])) {
2424 if ((i + data->len[m] - 1) == j)
2425 e = MIN2(e, data->dG[m]);
2426
2427 k++;
2428 }
2429 }
2430
2431 return e;
2432 }
2433
2434
2435 PRIVATE int
default_energy_int_motif(int i,int j,struct ligands_up_data_default * data)2436 default_energy_int_motif(int i,
2437 int j,
2438 struct ligands_up_data_default *data)
2439 {
2440 int k, m;
2441 int e = INF;
2442
2443 if (data->motif_list_int[i]) {
2444 k = 0;
2445 while (-1 != (m = data->motif_list_int[i][k])) {
2446 if ((i + data->len[m] - 1) == j)
2447 e = MIN2(e, data->dG[m]);
2448
2449 k++;
2450 }
2451 }
2452
2453 return e;
2454 }
2455
2456
2457 PRIVATE int
default_energy_mb_motif(int i,int j,struct ligands_up_data_default * data)2458 default_energy_mb_motif(int i,
2459 int j,
2460 struct ligands_up_data_default *data)
2461 {
2462 int k, m;
2463 int e = INF;
2464
2465 if (data->motif_list_mb[i]) {
2466 k = 0;
2467 while (-1 != (m = data->motif_list_mb[i][k])) {
2468 if ((i + data->len[m] - 1) == j)
2469 e = MIN2(2, data->dG[m]);
2470
2471 k++;
2472 }
2473 }
2474
2475 return e;
2476 }
2477
2478
2479 PRIVATE FLT_OR_DBL
default_exp_energy_ext_motif(int i,int j,struct ligands_up_data_default * data)2480 default_exp_energy_ext_motif(int i,
2481 int j,
2482 struct ligands_up_data_default *data)
2483 {
2484 int k, m;
2485 FLT_OR_DBL q = 0;
2486
2487 if (data->motif_list_ext[i]) {
2488 k = 0;
2489 while (-1 != (m = data->motif_list_ext[i][k])) {
2490 if ((i + data->len[m] - 1) == j)
2491 q += data->exp_dG[m];
2492
2493 k++;
2494 }
2495 }
2496
2497 return q;
2498 }
2499
2500
2501 PRIVATE FLT_OR_DBL
default_exp_energy_hp_motif(int i,int j,struct ligands_up_data_default * data)2502 default_exp_energy_hp_motif(int i,
2503 int j,
2504 struct ligands_up_data_default *data)
2505 {
2506 int k, m;
2507 FLT_OR_DBL q = 0;
2508
2509 if (data->motif_list_hp[i]) {
2510 k = 0;
2511 while (-1 != (m = data->motif_list_hp[i][k])) {
2512 if ((i + data->len[m] - 1) == j)
2513 q += data->exp_dG[m];
2514
2515 k++;
2516 }
2517 }
2518
2519 return q;
2520 }
2521
2522
2523 PRIVATE FLT_OR_DBL
default_exp_energy_int_motif(int i,int j,struct ligands_up_data_default * data)2524 default_exp_energy_int_motif(int i,
2525 int j,
2526 struct ligands_up_data_default *data)
2527 {
2528 int k, m;
2529 FLT_OR_DBL q = 0;
2530
2531 if (data->motif_list_int[i]) {
2532 k = 0;
2533 while (-1 != (m = data->motif_list_int[i][k])) {
2534 if ((i + data->len[m] - 1) == j)
2535 q += data->exp_dG[m];
2536
2537 k++;
2538 }
2539 }
2540
2541 return q;
2542 }
2543
2544
2545 PRIVATE FLT_OR_DBL
default_exp_energy_mb_motif(int i,int j,struct ligands_up_data_default * data)2546 default_exp_energy_mb_motif(int i,
2547 int j,
2548 struct ligands_up_data_default *data)
2549 {
2550 int k, m;
2551 FLT_OR_DBL q = 0;
2552
2553 if (data->motif_list_mb[i]) {
2554 k = 0;
2555 while (-1 != (m = data->motif_list_mb[i][k])) {
2556 if ((i + data->len[m] - 1) == j)
2557 q += data->exp_dG[m];
2558
2559 k++;
2560 }
2561 }
2562
2563 return q;
2564 }
2565
2566
2567 PRIVATE void
default_probs_add(vrna_fold_compound_t * vc,int i,int j,unsigned int loop_type,FLT_OR_DBL exp_energy,void * data)2568 default_probs_add(vrna_fold_compound_t *vc,
2569 int i,
2570 int j,
2571 unsigned int loop_type,
2572 FLT_OR_DBL exp_energy,
2573 void *data)
2574 {
2575 int **motif_list, k, l, m;
2576 unsigned int *size, *cnt, o;
2577 struct ligands_up_data_default *d;
2578 struct default_outside **storage, **st;
2579
2580 d = (struct ligands_up_data_default *)data;
2581
2582 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MOTIF) {
2583 if (j < i)
2584 return;
2585
2586 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP) {
2587 motif_list = d->motif_list_ext;
2588 storage = &(d->outside_ext[i]);
2589 size = &(d->outside_ext_count[i]);
2590 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP) {
2591 motif_list = d->motif_list_hp;
2592 storage = &(d->outside_hp[i]);
2593 size = &(d->outside_hp_count[i]);
2594 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP) {
2595 motif_list = d->motif_list_int;
2596 storage = &(d->outside_int[i]);
2597 size = &(d->outside_int_count[i]);
2598 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP) {
2599 motif_list = d->motif_list_mb;
2600 storage = &(d->outside_mb[i]);
2601 size = &(d->outside_mb_count[i]);
2602 } else {
2603 vrna_message_warning("Unknown unstructured domain loop type");
2604 return;
2605 }
2606
2607 k = 0;
2608 while (-1 != (m = motif_list[i][k])) {
2609 if ((i + d->len[m] - 1) == j) {
2610 /* check for addition first */
2611 for (o = 0; o < *size; o++)
2612 if ((*storage)[o].motif_num == m) {
2613 /* found previously added motif constribution */
2614 (*storage)[o].exp_energy += exp_energy;
2615 break;
2616 }
2617
2618 /* if we haven't added yet, create new list entry */
2619 if (o == *size) {
2620 *storage =
2621 (struct default_outside *)vrna_realloc(*storage,
2622 sizeof(struct default_outside) * (*size + 1));
2623 (*storage)[*size].motif_num = m;
2624 (*storage)[*size].exp_energy = exp_energy;
2625 (*size)++;
2626 }
2627 }
2628
2629 k++;
2630 }
2631 } else {
2632 if (j < i)
2633 return;
2634
2635 FLT_OR_DBL pf, exp_e;
2636 pf = default_exp_energy(vc, i, j, loop_type, data);
2637
2638 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP) {
2639 motif_list = d->motif_list_ext;
2640 storage = d->outside_ext;
2641 size = d->outside_ext_count;
2642 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP) {
2643 motif_list = d->motif_list_hp;
2644 storage = d->outside_hp;
2645 size = d->outside_hp_count;
2646 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP) {
2647 motif_list = d->motif_list_int;
2648 storage = d->outside_int;
2649 size = d->outside_int_count;
2650 } else if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP) {
2651 motif_list = d->motif_list_mb;
2652 storage = d->outside_mb;
2653 size = d->outside_mb_count;
2654 } else {
2655 vrna_message_warning("Unknown unstructured domain loop type");
2656 return;
2657 }
2658
2659 /* check for each motif starting at any k with i <= k <= j */
2660 for (k = i; k <= j; k++) {
2661 if (motif_list[k]) {
2662 st = &(storage[k]);
2663 cnt = &(size[k]);
2664 for (l = 0; motif_list[k][l] != -1; l++) {
2665 m = motif_list[k][l];
2666
2667 if (j < k + d->len[m] - 1) /* motifs must be sorted be length */
2668 continue;
2669
2670 exp_e = d->exp_dG[m];
2671 FLT_OR_DBL p = exp_e / pf;
2672
2673 /* add/insert contribution */
2674
2675 /* check for addition first */
2676 for (o = 0; o < *cnt; o++)
2677 if ((*st)[o].motif_num == m) {
2678 /* found previously added motif constribution */
2679 (*st)[o].exp_energy += p * exp_energy;
2680 break;
2681 }
2682
2683 /* if we haven't added yet, create new list entry */
2684 if (o == *cnt) {
2685 *st =
2686 (struct default_outside *)vrna_realloc(*st,
2687 sizeof(struct default_outside) * (*cnt + 1));
2688 (*st)[*cnt].motif_num = m;
2689 (*st)[*cnt].exp_energy = p * exp_energy;
2690 (*cnt)++;
2691 }
2692 }
2693 }
2694 }
2695 }
2696 }
2697
2698
2699 PRIVATE FLT_OR_DBL
default_probs_get(vrna_fold_compound_t * vc,int i,int j,unsigned int loop_type,int motif,void * data)2700 default_probs_get(vrna_fold_compound_t *vc,
2701 int i,
2702 int j,
2703 unsigned int loop_type,
2704 int motif,
2705 void *data)
2706 {
2707 FLT_OR_DBL outside = 0.;
2708 unsigned int *size, k;
2709 struct ligands_up_data_default *d;
2710 struct default_outside **storage;
2711
2712 d = (struct ligands_up_data_default *)data;
2713
2714 if (j < i)
2715 return 0.;
2716
2717 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_EXT_LOOP) {
2718 if (d->outside_ext) {
2719 storage = &(d->outside_ext[i]);
2720 size = &(d->outside_ext_count[i]);
2721 if ((storage) && (*storage)) {
2722 for (k = 0; k < *size; k++) {
2723 /* check for motif number match */
2724 if ((*storage)[k].motif_num == motif) {
2725 /* check for length match */
2726 if (i + d->len[motif] - 1 == j)
2727 outside += (*storage)[k].exp_energy;
2728 }
2729 }
2730 }
2731 }
2732 }
2733
2734 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_HP_LOOP) {
2735 if (d->outside_hp) {
2736 storage = &(d->outside_hp[i]);
2737 size = &(d->outside_hp_count[i]);
2738 if ((storage) && (*storage)) {
2739 for (k = 0; k < *size; k++) {
2740 /* check for motif number match */
2741 if ((*storage)[k].motif_num == motif) {
2742 /* check for length match */
2743 if (i + d->len[motif] - 1 == j)
2744 outside += (*storage)[k].exp_energy;
2745 }
2746 }
2747 }
2748 }
2749 }
2750
2751 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_INT_LOOP) {
2752 if (d->outside_int) {
2753 storage = &(d->outside_int[i]);
2754 size = &(d->outside_int_count[i]);
2755 if ((storage) && (*storage)) {
2756 for (k = 0; k < *size; k++) {
2757 /* check for motif number match */
2758 if ((*storage)[k].motif_num == motif) {
2759 /* check for length match */
2760 if (i + d->len[motif] - 1 == j)
2761 outside += (*storage)[k].exp_energy;
2762 }
2763 }
2764 }
2765 }
2766 }
2767
2768 if (loop_type & VRNA_UNSTRUCTURED_DOMAIN_MB_LOOP) {
2769 if (d->outside_mb) {
2770 storage = &(d->outside_mb[i]);
2771 size = &(d->outside_mb_count[i]);
2772 if ((storage) && (*storage)) {
2773 for (k = 0; k < *size; k++) {
2774 /* check for motif number match */
2775 if ((*storage)[k].motif_num == motif) {
2776 /* check for length match */
2777 if (i + d->len[motif] - 1 == j)
2778 outside += (*storage)[k].exp_energy;
2779 }
2780 }
2781 }
2782 }
2783 }
2784
2785 return outside;
2786 }
2787