1 /*
2 * gquad.c
3 *
4 * Ronny Lorenz 2012
5 *
6 * ViennaRNA Package
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include "config.h"
11 #endif
12
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <math.h>
16 #include <string.h>
17
18 #include "ViennaRNA/fold_vars.h"
19 #include "ViennaRNA/datastructures/basic.h"
20 #include "ViennaRNA/params/constants.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/utils/alignments.h"
23 #include "ViennaRNA/gquad.h"
24
25 #ifndef INLINE
26 #ifdef __GNUC__
27 # define INLINE inline
28 #else
29 # define INLINE
30 #endif
31 #endif
32
33 /**
34 * Use this macro to loop over each G-quadruplex
35 * delimited by a and b within the subsequence [c,d]
36 */
37 #define FOR_EACH_GQUAD(a, b, c, d) \
38 for ((a) = (d) - VRNA_GQUAD_MIN_BOX_SIZE + 1; (a) >= (c); (a)--) \
39 for ((b) = (a) + VRNA_GQUAD_MIN_BOX_SIZE - 1; \
40 (b) <= MIN2((d), (a) + VRNA_GQUAD_MAX_BOX_SIZE - 1); \
41 (b)++)
42
43 /**
44 * This macro does almost the same as FOR_EACH_GQUAD() but keeps
45 * the 5' delimiter fixed. 'b' is the 3' delimiter of the gquad,
46 * for gquads within subsequence [a,c] that have 5' delimiter 'a'
47 */
48 #define FOR_EACH_GQUAD_AT(a, b, c) \
49 for ((b) = (a) + VRNA_GQUAD_MIN_BOX_SIZE - 1; \
50 (b) <= MIN2((c), (a) + VRNA_GQUAD_MAX_BOX_SIZE - 1); \
51 (b)++)
52
53
54 struct gquad_ali_helper {
55 short **S;
56 unsigned int **a2s;
57 int n_seq;
58 vrna_param_t *P;
59 vrna_exp_param_t *pf;
60 int L;
61 int *l;
62 };
63
64 /*
65 #################################
66 # PRIVATE FUNCTION DECLARATIONS #
67 #################################
68 */
69
70 PRIVATE INLINE
71 int *
72 get_g_islands(short *S);
73
74
75 PRIVATE INLINE
76 int *
77 get_g_islands_sub(short *S,
78 int i,
79 int j);
80
81
82 /**
83 * IMPORTANT:
84 * If you don't know how to use this function, DONT'T USE IT!
85 *
86 * The function pointer this function takes as argument is
87 * used for individual calculations with each g-quadruplex
88 * delimited by [i,j].
89 * The function it points to always receives as first 3 arguments
90 * position i, the stack size L and an array l[3] containing the
91 * individual linker sizes.
92 * The remaining 4 (void *) pointers of the callback function receive
93 * the parameters 'data', 'P', 'aux1' and 'aux2' and thus may be
94 * used to pass whatever data you like to.
95 * As the names of those parameters suggest the convention is that
96 * 'data' should be used as a pointer where data is stored into,
97 * e.g the MFE or PF and the 'P' parameter should actually be a
98 * 'vrna_param_t *' or 'vrna_exp_param_t *' type.
99 * However, what you actually pass obviously depends on the
100 * function the pointer is pointing to.
101 *
102 * Although all of this may look like an overkill, it is found
103 * to be almost as fast as implementing g-quadruplex enumeration
104 * in each individual scenario, i.e. code duplication.
105 * Using this function, however, ensures that all g-quadruplex
106 * enumerations are absolutely identical.
107 */
108 PRIVATE
109 void
110 process_gquad_enumeration(int *gg,
111 int i,
112 int j,
113 void (*f)(int, int, int *,
114 void *, void *, void *, void *),
115 void *data,
116 void *P,
117 void *aux1,
118 void *aux2);
119
120
121 /**
122 * MFE callback for process_gquad_enumeration()
123 */
124 PRIVATE
125 void
126 gquad_mfe(int i,
127 int L,
128 int *l,
129 void *data,
130 void *P,
131 void *NA,
132 void *NA2);
133
134
135 PRIVATE
136 void
137 gquad_mfe_pos(int i,
138 int L,
139 int *l,
140 void *data,
141 void *P,
142 void *Lmfe,
143 void *lmfe);
144
145
146 PRIVATE void gquad_mfe_ali_pos(int i,
147 int L,
148 int *l,
149 void *data,
150 void *helper,
151 void *Lmfe,
152 void *lmfe);
153
154
155 PRIVATE
156 void
157 gquad_pos_exhaustive(int i,
158 int L,
159 int *l,
160 void *data,
161 void *P,
162 void *Lex,
163 void *lex);
164
165
166 /**
167 * Partition function callback for process_gquad_enumeration()
168 */
169 PRIVATE
170 void
171 gquad_pf(int i,
172 int L,
173 int *l,
174 void *data,
175 void *P,
176 void *NA,
177 void *NA2);
178
179
180 PRIVATE void
181 gquad_pf_ali(int i,
182 int L,
183 int *l,
184 void *data,
185 void *helper,
186 void *NA,
187 void *NA2);
188
189
190 /**
191 * Partition function callback for process_gquad_enumeration()
192 * in contrast to gquad_pf() it stores the stack size L and
193 * the linker lengths l[3] of the g-quadruplex that dominates
194 * the interval [i,j]
195 * (FLT_OR_DBL *)data must be 0. on entry
196 */
197 PRIVATE
198 void
199 gquad_pf_pos(int i,
200 int L,
201 int *l,
202 void *data,
203 void *pf,
204 void *Lmax,
205 void *lmax);
206
207
208 PRIVATE void
209 gquad_pf_pos_ali(int i,
210 int L,
211 int *l,
212 void *data,
213 void *helper,
214 void *NA1,
215 void *NA2);
216
217
218 /**
219 * MFE (alifold) callback for process_gquad_enumeration()
220 */
221 PRIVATE
222 void
223 gquad_mfe_ali(int i,
224 int L,
225 int *l,
226 void *data,
227 void *helper,
228 void *NA,
229 void *NA2);
230
231
232 /**
233 * MFE (alifold) callback for process_gquad_enumeration()
234 * with seperation of free energy and penalty contribution
235 */
236 PRIVATE
237 void
238 gquad_mfe_ali_en(int i,
239 int L,
240 int *l,
241 void *data,
242 void *helper,
243 void *NA,
244 void *NA2);
245
246
247 PRIVATE
248 void
249 gquad_interact(int i,
250 int L,
251 int *l,
252 void *data,
253 void *pf,
254 void *index,
255 void *NA2);
256
257
258 PRIVATE
259 void
260 gquad_interact_ali(int i,
261 int L,
262 int *l,
263 void *data,
264 void *index,
265 void *helper,
266 void *NA);
267
268
269 PRIVATE
270 void
271 gquad_count(int i,
272 int L,
273 int *l,
274 void *data,
275 void *NA,
276 void *NA2,
277 void *NA3);
278
279
280 PRIVATE
281 void
282 gquad_count_layers(int i,
283 int L,
284 int *l,
285 void *data,
286 void *NA,
287 void *NA2,
288 void *NA3);
289
290
291 /* other useful static functions */
292
293 PRIVATE
294 int
295 E_gquad_ali_penalty(int i,
296 int L,
297 int l[3],
298 const short **S,
299 unsigned int n_seq,
300 vrna_param_t *P);
301
302
303 PRIVATE
304 FLT_OR_DBL
305 exp_E_gquad_ali_penalty(int i,
306 int L,
307 int l[3],
308 const short **S,
309 unsigned int n_seq,
310 vrna_exp_param_t *P);
311
312
313 PRIVATE void
314 count_gquad_layer_mismatches(int i,
315 int L,
316 int l[3],
317 const short **S,
318 unsigned int n_seq,
319 unsigned int mm[2]);
320
321
322 PRIVATE int **
323 create_L_matrix(short *S,
324 int start,
325 int maxdist,
326 int n,
327 int **g,
328 vrna_param_t *P);
329
330
331 PRIVATE int **
332 create_aliL_matrix(int start,
333 int maxdist,
334 int n,
335 int **g,
336 short *S_cons,
337 short **S,
338 unsigned int **a2s,
339 int n_seq,
340 vrna_param_t *P);
341
342
343 PRIVATE void gquad_mfe_ali_pos(int i,
344 int L,
345 int *l,
346 void *data,
347 void *P,
348 void *Lmfe,
349 void *lmfe);
350
351
352 /*
353 #########################################
354 # BEGIN OF PUBLIC FUNCTION DEFINITIONS #
355 # (all available in RNAlib) #
356 #########################################
357 */
358
359 /********************************
360 * Here are the G-quadruplex energy
361 * contribution functions
362 *********************************/
363 PUBLIC int
E_gquad(int L,int l[3],vrna_param_t * P)364 E_gquad(int L,
365 int l[3],
366 vrna_param_t *P)
367 {
368 int i, c = INF;
369
370 for (i = 0; i < 3; i++) {
371 if (l[i] > VRNA_GQUAD_MAX_LINKER_LENGTH)
372 return c;
373
374 if (l[i] < VRNA_GQUAD_MIN_LINKER_LENGTH)
375 return c;
376 }
377 if (L > VRNA_GQUAD_MAX_STACK_SIZE)
378 return c;
379
380 if (L < VRNA_GQUAD_MIN_STACK_SIZE)
381 return c;
382
383 gquad_mfe(0, L, l,
384 (void *)(&c),
385 (void *)P,
386 NULL,
387 NULL);
388 return c;
389 }
390
391
392 PUBLIC FLT_OR_DBL
exp_E_gquad(int L,int l[3],vrna_exp_param_t * pf)393 exp_E_gquad(int L,
394 int l[3],
395 vrna_exp_param_t *pf)
396 {
397 int i;
398 FLT_OR_DBL q = 0.;
399
400 for (i = 0; i < 3; i++) {
401 if (l[i] > VRNA_GQUAD_MAX_LINKER_LENGTH)
402 return q;
403
404 if (l[i] < VRNA_GQUAD_MIN_LINKER_LENGTH)
405 return q;
406 }
407 if (L > VRNA_GQUAD_MAX_STACK_SIZE)
408 return q;
409
410 if (L < VRNA_GQUAD_MIN_STACK_SIZE)
411 return q;
412
413 gquad_pf(0, L, l,
414 (void *)(&q),
415 (void *)pf,
416 NULL,
417 NULL);
418 return q;
419 }
420
421
422 PUBLIC FLT_OR_DBL
exp_E_gquad_ali(int i,int L,int l[3],short ** S,unsigned int ** a2s,int n_seq,vrna_exp_param_t * pf)423 exp_E_gquad_ali(int i,
424 int L,
425 int l[3],
426 short **S,
427 unsigned int **a2s,
428 int n_seq,
429 vrna_exp_param_t *pf)
430 {
431 int s;
432 FLT_OR_DBL q = 0.;
433
434 for (s = 0; s < 3; s++) {
435 if (l[s] > VRNA_GQUAD_MAX_LINKER_LENGTH)
436 return q;
437
438 if (l[s] < VRNA_GQUAD_MIN_LINKER_LENGTH)
439 return q;
440 }
441 if (L > VRNA_GQUAD_MAX_STACK_SIZE)
442 return q;
443
444 if (L < VRNA_GQUAD_MIN_STACK_SIZE)
445 return q;
446
447 struct gquad_ali_helper gq_help;
448
449 gq_help.S = S;
450 gq_help.a2s = a2s;
451 gq_help.n_seq = n_seq;
452 gq_help.pf = pf;
453
454 gquad_pf_ali(i, L, l,
455 (void *)(&q),
456 (void *)&gq_help,
457 NULL,
458 NULL);
459 return q;
460 }
461
462
463 PUBLIC void
E_gquad_ali_en(int i,int L,int l[3],const short ** S,unsigned int ** a2s,unsigned int n_seq,vrna_param_t * P,int en[2])464 E_gquad_ali_en(int i,
465 int L,
466 int l[3],
467 const short **S,
468 unsigned int **a2s,
469 unsigned int n_seq,
470 vrna_param_t *P,
471 int en[2])
472 {
473 unsigned int s;
474 int ee, ee2, u1, u2, u3;
475
476 en[0] = en[1] = INF;
477
478 /* check if the quadruplex obeys the canonical form */
479 for (s = 0; s < 3; s++) {
480 if (l[s] > VRNA_GQUAD_MAX_LINKER_LENGTH)
481 return;
482
483 if (l[s] < VRNA_GQUAD_MIN_LINKER_LENGTH)
484 return;
485 }
486 if (L > VRNA_GQUAD_MAX_STACK_SIZE)
487 return;
488
489 if (L < VRNA_GQUAD_MIN_STACK_SIZE)
490 return;
491
492 /* compute actual quadruplex contribution for subalignment */
493 ee = 0;
494
495 for (s = 0; s < n_seq; s++) {
496 u1 = a2s[s][i + L + l[0] - 1] - a2s[s][i + L - 1];
497 u2 = a2s[s][i + 2 * L + l[0] + l[1] - 1] - a2s[s][i + 2 * L + l[0] - 1];
498 u3 = a2s[s][i + 3 * L + l[0] + l[1] + l[2] - 1] - a2s[s][i + 3 * L + l[0] + l[1] - 1];
499 ee += P->gquad[L][u1 + u2 + u3];
500 }
501
502 /* get penalty from incompatible sequences in alignment */
503 ee2 = E_gquad_ali_penalty(i, L, l, S, n_seq, P);
504
505 /* assign return values */
506 if (ee2 != INF) {
507 en[0] = ee;
508 en[1] = ee2;
509 }
510 }
511
512
513 /********************************
514 * Now, the triangular matrix
515 * generators for the G-quadruplex
516 * contributions are following
517 *********************************/
518 PUBLIC int *
get_gquad_matrix(short * S,vrna_param_t * P)519 get_gquad_matrix(short *S,
520 vrna_param_t *P)
521 {
522 int n, size, i, j, *gg, *my_index, *data;
523
524 n = S[0];
525 my_index = vrna_idx_col_wise(n);
526 gg = get_g_islands(S);
527 size = (n * (n + 1)) / 2 + 2;
528 data = (int *)vrna_alloc(sizeof(int) * size);
529
530 /* prefill the upper triangular matrix with INF */
531 for (i = 0; i < size; i++)
532 data[i] = INF;
533
534 FOR_EACH_GQUAD(i, j, 1, n){
535 process_gquad_enumeration(gg, i, j,
536 &gquad_mfe,
537 (void *)(&(data[my_index[j] + i])),
538 (void *)P,
539 NULL,
540 NULL);
541 }
542
543 free(my_index);
544 free(gg);
545 return data;
546 }
547
548
549 PUBLIC FLT_OR_DBL *
get_gquad_pf_matrix(short * S,FLT_OR_DBL * scale,vrna_exp_param_t * pf)550 get_gquad_pf_matrix(short *S,
551 FLT_OR_DBL *scale,
552 vrna_exp_param_t *pf)
553 {
554 int n, size, *gg, i, j, *my_index;
555 FLT_OR_DBL *data;
556
557
558 n = S[0];
559 size = (n * (n + 1)) / 2 + 2;
560 data = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
561 gg = get_g_islands(S);
562 my_index = vrna_idx_row_wise(n);
563
564 FOR_EACH_GQUAD(i, j, 1, n){
565 process_gquad_enumeration(gg, i, j,
566 &gquad_pf,
567 (void *)(&(data[my_index[i] - j])),
568 (void *)pf,
569 NULL,
570 NULL);
571 data[my_index[i] - j] *= scale[j - i + 1];
572 }
573
574 free(my_index);
575 free(gg);
576 return data;
577 }
578
579
580 PUBLIC FLT_OR_DBL *
get_gquad_pf_matrix_comparative(unsigned int n,short * S_cons,short ** S,unsigned int ** a2s,FLT_OR_DBL * scale,unsigned int n_seq,vrna_exp_param_t * pf)581 get_gquad_pf_matrix_comparative(unsigned int n,
582 short *S_cons,
583 short **S,
584 unsigned int **a2s,
585 FLT_OR_DBL *scale,
586 unsigned int n_seq,
587 vrna_exp_param_t *pf)
588 {
589 int size, *gg, i, j, *my_index;
590 FLT_OR_DBL *data;
591 struct gquad_ali_helper gq_help;
592
593
594 size = (n * (n + 1)) / 2 + 2;
595 data = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
596 gg = get_g_islands(S_cons);
597 my_index = vrna_idx_row_wise(n);
598 gq_help.S = S;
599 gq_help.a2s = a2s;
600 gq_help.n_seq = n_seq;
601 gq_help.pf = pf;
602
603 FOR_EACH_GQUAD(i, j, 1, n){
604 process_gquad_enumeration(gg, i, j,
605 &gquad_pf_ali,
606 (void *)(&(data[my_index[i] - j])),
607 (void *)&gq_help,
608 NULL,
609 NULL);
610 data[my_index[i] - j] *= scale[j - i + 1];
611 }
612
613 free(my_index);
614 free(gg);
615 return data;
616 }
617
618
619 PUBLIC int *
get_gquad_ali_matrix(unsigned int n,short * S_cons,short ** S,unsigned int ** a2s,int n_seq,vrna_param_t * P)620 get_gquad_ali_matrix(unsigned int n,
621 short *S_cons,
622 short **S,
623 unsigned int **a2s,
624 int n_seq,
625 vrna_param_t *P)
626 {
627 int size, *data, *gg;
628 int i, j, *my_index;
629 struct gquad_ali_helper gq_help;
630
631 size = (n * (n + 1)) / 2 + 2;
632 data = (int *)vrna_alloc(sizeof(int) * size);
633 gg = get_g_islands(S_cons);
634 my_index = vrna_idx_col_wise(n);
635
636 gq_help.S = S;
637 gq_help.a2s = a2s;
638 gq_help.n_seq = n_seq;
639 gq_help.P = P;
640
641 /* prefill the upper triangular matrix with INF */
642 for (i = 0; i < size; i++)
643 data[i] = INF;
644
645 FOR_EACH_GQUAD(i, j, 1, n){
646 process_gquad_enumeration(gg, i, j,
647 &gquad_mfe_ali,
648 (void *)(&(data[my_index[j] + i])),
649 (void *)&gq_help,
650 NULL,
651 NULL);
652 }
653
654 free(my_index);
655 free(gg);
656 return data;
657 }
658
659
660 PUBLIC int **
get_gquad_L_matrix(short * S,int start,int maxdist,int n,int ** g,vrna_param_t * P)661 get_gquad_L_matrix(short *S,
662 int start,
663 int maxdist,
664 int n,
665 int **g,
666 vrna_param_t *P)
667 {
668 return create_L_matrix(S, start, maxdist, n, g, P);
669 }
670
671
672 PUBLIC void
vrna_gquad_mx_local_update(vrna_fold_compound_t * vc,int start)673 vrna_gquad_mx_local_update(vrna_fold_compound_t *vc,
674 int start)
675 {
676 if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
677 vc->matrices->ggg_local = create_aliL_matrix(
678 start,
679 vc->window_size,
680 vc->length,
681 vc->matrices->ggg_local,
682 vc->S_cons,
683 vc->S,
684 vc->a2s,
685 vc->n_seq,
686 vc->params);
687 } else {
688 vc->matrices->ggg_local = create_L_matrix(
689 vc->sequence_encoding,
690 start,
691 vc->window_size,
692 vc->length,
693 vc->matrices->ggg_local,
694 vc->params);
695 }
696 }
697
698
699 PRIVATE int **
create_L_matrix(short * S,int start,int maxdist,int n,int ** g,vrna_param_t * P)700 create_L_matrix(short *S,
701 int start,
702 int maxdist,
703 int n,
704 int **g,
705 vrna_param_t *P)
706 {
707 int **data;
708 int i, j, k, *gg, p, q;
709
710 p = MAX2(1, start);
711 q = MIN2(n, start + maxdist + 4);
712 gg = get_g_islands_sub(S, p, q);
713
714 if (g) {
715 /* we just update the gquadruplex contribution for the current
716 * start and rotate the rest */
717 data = g;
718 /* we re-use the memory allocated previously */
719 data[start] = data[start + maxdist + 5];
720 data[start + maxdist + 5] = NULL;
721
722 /* prefill with INF */
723 for (i = 0; i < maxdist + 5; i++)
724 data[start][i] = INF;
725
726 /* now we compute contributions for all gquads with 5' delimiter at
727 * position 'start'
728 */
729 FOR_EACH_GQUAD_AT(start, j, start + maxdist + 4){
730 process_gquad_enumeration(gg, start, j,
731 &gquad_mfe,
732 (void *)(&(data[start][j - start])),
733 (void *)P,
734 NULL,
735 NULL);
736 }
737 } else {
738 /* create a new matrix from scratch since this is the first
739 * call to this function */
740
741 /* allocate memory and prefill with INF */
742 data = (int **)vrna_alloc(sizeof(int *) * (n + 1));
743 for (k = n; (k > n - maxdist - 5) && (k >= 0); k--) {
744 data[k] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
745 for (i = 0; i < maxdist + 5; i++)
746 data[k][i] = INF;
747 }
748
749 /* compute all contributions for the gquads in this interval */
750 FOR_EACH_GQUAD(i, j, MAX2(1, n - maxdist - 4), n){
751 process_gquad_enumeration(gg, i, j,
752 &gquad_mfe,
753 (void *)(&(data[i][j - i])),
754 (void *)P,
755 NULL,
756 NULL);
757 }
758 }
759
760 gg += p - 1;
761 free(gg);
762 return data;
763 }
764
765
766 PRIVATE int **
create_aliL_matrix(int start,int maxdist,int n,int ** g,short * S_cons,short ** S,unsigned int ** a2s,int n_seq,vrna_param_t * P)767 create_aliL_matrix(int start,
768 int maxdist,
769 int n,
770 int **g,
771 short *S_cons,
772 short **S,
773 unsigned int **a2s,
774 int n_seq,
775 vrna_param_t *P)
776 {
777 int **data;
778 int i, j, k, *gg, p, q;
779
780 p = MAX2(1, start);
781 q = MIN2(n, start + maxdist + 4);
782 gg = get_g_islands_sub(S_cons, p, q);
783
784 struct gquad_ali_helper gq_help;
785
786 gq_help.S = S;
787 gq_help.a2s = a2s;
788 gq_help.n_seq = n_seq;
789 gq_help.P = P;
790
791 if (g) {
792 /* we just update the gquadruplex contribution for the current
793 * start and rotate the rest */
794 data = g;
795 /* we re-use the memory allocated previously */
796 data[start] = data[start + maxdist + 5];
797 data[start + maxdist + 5] = NULL;
798
799 /* prefill with INF */
800 for (i = 0; i < maxdist + 5; i++)
801 data[start][i] = INF;
802
803 /* now we compute contributions for all gquads with 5' delimiter at
804 * position 'start'
805 */
806 FOR_EACH_GQUAD_AT(start, j, start + maxdist + 4){
807 process_gquad_enumeration(gg, start, j,
808 &gquad_mfe_ali,
809 (void *)(&(data[start][j - start])),
810 (void *)&gq_help,
811 NULL,
812 NULL);
813 }
814 } else {
815 /* create a new matrix from scratch since this is the first
816 * call to this function */
817
818 /* allocate memory and prefill with INF */
819 data = (int **)vrna_alloc(sizeof(int *) * (n + 1));
820 for (k = n; (k > n - maxdist - 5) && (k >= 0); k--) {
821 data[k] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
822 for (i = 0; i < maxdist + 5; i++)
823 data[k][i] = INF;
824 }
825
826 /* compute all contributions for the gquads in this interval */
827 FOR_EACH_GQUAD(i, j, MAX2(1, n - maxdist - 4), n){
828 process_gquad_enumeration(gg, i, j,
829 &gquad_mfe_ali,
830 (void *)(&(data[i][j - i])),
831 (void *)&gq_help,
832 NULL,
833 NULL);
834 }
835 }
836
837 gg += p - 1;
838 free(gg);
839 return data;
840 }
841
842
843 PUBLIC plist *
get_plist_gquad_from_db(const char * structure,float pr)844 get_plist_gquad_from_db(const char *structure,
845 float pr)
846 {
847 int x, size, actual_size, L, n, ge, ee, gb, l[3];
848 plist *pl;
849
850 actual_size = 0;
851 ge = 0;
852 n = 2;
853 size = strlen(structure);
854 pl = (plist *)vrna_alloc(n * size * sizeof(plist));
855
856 while ((ee = parse_gquad(structure + ge, &L, l)) > 0) {
857 ge += ee;
858 gb = ge - L * 4 - l[0] - l[1] - l[2] + 1;
859 /* add pseudo-base pair encloding gquad */
860 for (x = 0; x < L; x++) {
861 if (actual_size >= n * size - 5) {
862 n *= 2;
863 pl = (plist *)vrna_realloc(pl, n * size * sizeof(plist));
864 }
865
866 pl[actual_size].i = gb + x;
867 pl[actual_size].j = ge + x - L + 1;
868 pl[actual_size].p = pr;
869 pl[actual_size++].type = 0;
870
871 pl[actual_size].i = gb + x;
872 pl[actual_size].j = gb + x + l[0] + L;
873 pl[actual_size].p = pr;
874 pl[actual_size++].type = 0;
875
876 pl[actual_size].i = gb + x + l[0] + L;
877 pl[actual_size].j = ge + x - 2 * L - l[2] + 1;
878 pl[actual_size].p = pr;
879 pl[actual_size++].type = 0;
880
881 pl[actual_size].i = ge + x - 2 * L - l[2] + 1;
882 pl[actual_size].j = ge + x - L + 1;
883 pl[actual_size].p = pr;
884 pl[actual_size++].type = 0;
885 }
886 }
887
888 pl[actual_size].i = pl[actual_size].j = 0;
889 pl[actual_size++].p = 0;
890 pl = (plist *)vrna_realloc(pl, actual_size * sizeof(plist));
891 return pl;
892 }
893
894
895 PUBLIC void
get_gquad_pattern_mfe(short * S,int i,int j,vrna_param_t * P,int * L,int l[3])896 get_gquad_pattern_mfe(short *S,
897 int i,
898 int j,
899 vrna_param_t *P,
900 int *L,
901 int l[3])
902 {
903 int *gg = get_g_islands_sub(S, i, j);
904 int c = INF;
905
906 process_gquad_enumeration(gg, i, j,
907 &gquad_mfe_pos,
908 (void *)(&c),
909 (void *)P,
910 (void *)L,
911 (void *)l);
912
913 gg += i - 1;
914 free(gg);
915 }
916
917
918 PUBLIC void
get_gquad_pattern_mfe_ali(short ** S,unsigned int ** a2s,short * S_cons,int n_seq,int i,int j,vrna_param_t * P,int * L,int l[3])919 get_gquad_pattern_mfe_ali(short **S,
920 unsigned int **a2s,
921 short *S_cons,
922 int n_seq,
923 int i,
924 int j,
925 vrna_param_t *P,
926 int *L,
927 int l[3])
928 {
929 int mfe, *gg;
930 struct gquad_ali_helper gq_help;
931
932 gg = get_g_islands_sub(S_cons, i, j);
933 mfe = INF;
934
935 gq_help.S = S;
936 gq_help.a2s = a2s;
937 gq_help.n_seq = n_seq;
938 gq_help.P = P;
939
940 process_gquad_enumeration(gg, i, j,
941 &gquad_mfe_ali_pos,
942 (void *)(&mfe),
943 (void *)&gq_help,
944 (void *)L,
945 (void *)l);
946
947 gg += i - 1;
948 free(gg);
949 }
950
951
952 PUBLIC void
get_gquad_pattern_exhaustive(short * S,int i,int j,vrna_param_t * P,int * L,int * l,int threshold)953 get_gquad_pattern_exhaustive(short *S,
954 int i,
955 int j,
956 vrna_param_t *P,
957 int *L,
958 int *l,
959 int threshold)
960 {
961 int *gg = get_g_islands_sub(S, i, j);
962
963 process_gquad_enumeration(gg, i, j,
964 &gquad_pos_exhaustive,
965 (void *)(&threshold),
966 (void *)P,
967 (void *)L,
968 (void *)l);
969
970 gg += i - 1;
971 free(gg);
972 }
973
974
975 PUBLIC void
get_gquad_pattern_pf(short * S,int i,int j,vrna_exp_param_t * pf,int * L,int l[3])976 get_gquad_pattern_pf(short *S,
977 int i,
978 int j,
979 vrna_exp_param_t *pf,
980 int *L,
981 int l[3])
982 {
983 int *gg = get_g_islands_sub(S, i, j);
984 FLT_OR_DBL q = 0.;
985
986 process_gquad_enumeration(gg, i, j,
987 &gquad_pf_pos,
988 (void *)(&q),
989 (void *)pf,
990 (void *)L,
991 (void *)l);
992
993 gg += i - 1;
994 free(gg);
995 }
996
997
998 PUBLIC void
vrna_get_gquad_pattern_pf(vrna_fold_compound_t * fc,int i,int j,int * L,int l[3])999 vrna_get_gquad_pattern_pf(vrna_fold_compound_t *fc,
1000 int i,
1001 int j,
1002 int *L,
1003 int l[3])
1004 {
1005 short *S = fc->type == VRNA_FC_TYPE_SINGLE ? fc->sequence_encoding2 : fc->S_cons;
1006 int *gg = get_g_islands_sub(S, i, j);
1007 FLT_OR_DBL q = 0.;
1008 vrna_exp_param_t *pf = fc->exp_params;
1009
1010 if (fc->type == VRNA_FC_TYPE_SINGLE) {
1011 process_gquad_enumeration(gg, i, j,
1012 &gquad_pf_pos,
1013 (void *)(&q),
1014 (void *)pf,
1015 (void *)L,
1016 (void *)l);
1017 } else {
1018 struct gquad_ali_helper gq_help;
1019 gq_help.S = fc->S;
1020 gq_help.a2s = fc->a2s;
1021 gq_help.n_seq = fc->n_seq;
1022 gq_help.pf = pf;
1023 gq_help.L = (int)(*L);
1024 gq_help.l = (int *)l;
1025 process_gquad_enumeration(gg, i, j,
1026 &gquad_pf_pos_ali,
1027 (void *)(&q),
1028 (void *)&gq_help,
1029 NULL,
1030 NULL);
1031 *L = gq_help.L;
1032 }
1033
1034 gg += i - 1;
1035 free(gg);
1036 }
1037
1038
1039 PUBLIC plist *
get_plist_gquad_from_pr(short * S,int gi,int gj,FLT_OR_DBL * G,FLT_OR_DBL * probs,FLT_OR_DBL * scale,vrna_exp_param_t * pf)1040 get_plist_gquad_from_pr(short *S,
1041 int gi,
1042 int gj,
1043 FLT_OR_DBL *G,
1044 FLT_OR_DBL *probs,
1045 FLT_OR_DBL *scale,
1046 vrna_exp_param_t *pf)
1047 {
1048 int L, l[3];
1049
1050 return get_plist_gquad_from_pr_max(S, gi, gj, G, probs, scale, &L, l, pf);
1051 }
1052
1053
1054 PUBLIC plist *
vrna_get_plist_gquad_from_pr(vrna_fold_compound_t * fc,int gi,int gj)1055 vrna_get_plist_gquad_from_pr(vrna_fold_compound_t *fc,
1056 int gi,
1057 int gj)
1058 {
1059 int L, l[3];
1060
1061 return vrna_get_plist_gquad_from_pr_max(fc, gi, gj, &L, l);
1062 }
1063
1064
1065 PUBLIC plist *
get_plist_gquad_from_pr_max(short * S,int gi,int gj,FLT_OR_DBL * G,FLT_OR_DBL * probs,FLT_OR_DBL * scale,int * Lmax,int lmax[3],vrna_exp_param_t * pf)1066 get_plist_gquad_from_pr_max(short *S,
1067 int gi,
1068 int gj,
1069 FLT_OR_DBL *G,
1070 FLT_OR_DBL *probs,
1071 FLT_OR_DBL *scale,
1072 int *Lmax,
1073 int lmax[3],
1074 vrna_exp_param_t *pf)
1075 {
1076 int n, size, *gg, counter, i, j, *my_index;
1077 FLT_OR_DBL pp, *tempprobs;
1078 plist *pl;
1079
1080 n = S[0];
1081 size = (n * (n + 1)) / 2 + 2;
1082 tempprobs = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
1083 pl = (plist *)vrna_alloc((S[0] * S[0]) * sizeof(plist));
1084 gg = get_g_islands_sub(S, gi, gj);
1085 counter = 0;
1086 my_index = vrna_idx_row_wise(n);
1087
1088 process_gquad_enumeration(gg, gi, gj,
1089 &gquad_interact,
1090 (void *)tempprobs,
1091 (void *)pf,
1092 (void *)my_index,
1093 NULL);
1094
1095 pp = 0.;
1096 process_gquad_enumeration(gg, gi, gj,
1097 &gquad_pf_pos,
1098 (void *)(&pp),
1099 (void *)pf,
1100 (void *)Lmax,
1101 (void *)lmax);
1102
1103 pp = probs[my_index[gi] - gj] * scale[gj - gi + 1] / G[my_index[gi] - gj];
1104 for (i = gi; i < gj; i++) {
1105 for (j = i; j <= gj; j++) {
1106 if (tempprobs[my_index[i] - j] > 0.) {
1107 pl[counter].i = i;
1108 pl[counter].j = j;
1109 pl[counter++].p = pp * tempprobs[my_index[i] - j];
1110 }
1111 }
1112 }
1113 pl[counter].i = pl[counter].j = 0;
1114 pl[counter++].p = 0.;
1115 /* shrink memory to actual size needed */
1116 pl = (plist *)vrna_realloc(pl, counter * sizeof(plist));
1117
1118 gg += gi - 1;
1119 free(gg);
1120 free(my_index);
1121 free(tempprobs);
1122 return pl;
1123 }
1124
1125
1126 PUBLIC plist *
vrna_get_plist_gquad_from_pr_max(vrna_fold_compound_t * fc,int gi,int gj,int * Lmax,int lmax[3])1127 vrna_get_plist_gquad_from_pr_max(vrna_fold_compound_t *fc,
1128 int gi,
1129 int gj,
1130 int *Lmax,
1131 int lmax[3])
1132 {
1133 short *S;
1134 int n, size, *gg, counter, i, j, *my_index;
1135 FLT_OR_DBL pp, *tempprobs, *G, *probs, *scale;
1136 plist *pl;
1137 vrna_exp_param_t *pf;
1138
1139 n = (int)fc->length;
1140 pf = fc->exp_params;
1141 G = fc->exp_matrices->G;
1142 probs = fc->exp_matrices->probs;
1143 scale = fc->exp_matrices->scale;
1144 S = (fc->type == VRNA_FC_TYPE_SINGLE) ? fc->sequence_encoding2 : fc->S_cons;
1145 size = (n * (n + 1)) / 2 + 2;
1146 tempprobs = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
1147 pl = (plist *)vrna_alloc((n * n) * sizeof(plist));
1148 gg = get_g_islands_sub(S, gi, gj);
1149 counter = 0;
1150 my_index = vrna_idx_row_wise(n);
1151
1152 pp = 0.;
1153
1154 if (fc->type == VRNA_FC_TYPE_SINGLE) {
1155 process_gquad_enumeration(gg, gi, gj,
1156 &gquad_interact,
1157 (void *)tempprobs,
1158 (void *)pf,
1159 (void *)my_index,
1160 NULL);
1161
1162 process_gquad_enumeration(gg, gi, gj,
1163 &gquad_pf_pos,
1164 (void *)(&pp),
1165 (void *)pf,
1166 (void *)Lmax,
1167 (void *)lmax);
1168 } else {
1169 struct gquad_ali_helper gq_help;
1170 gq_help.S = fc->S;
1171 gq_help.a2s = fc->a2s;
1172 gq_help.n_seq = fc->n_seq;
1173 gq_help.pf = pf;
1174 gq_help.L = *Lmax;
1175 gq_help.l = &(lmax[0]);
1176
1177 process_gquad_enumeration(gg, gi, gj,
1178 &gquad_interact_ali,
1179 (void *)tempprobs,
1180 (void *)my_index,
1181 (void *)&(gq_help),
1182 NULL);
1183
1184 process_gquad_enumeration(gg, gi, gj,
1185 &gquad_pf_pos_ali,
1186 (void *)(&pp),
1187 (void *)&gq_help,
1188 NULL,
1189 NULL);
1190 *Lmax = gq_help.L;
1191 }
1192
1193 pp = probs[my_index[gi] - gj] * scale[gj - gi + 1] / G[my_index[gi] - gj];
1194 for (i = gi; i < gj; i++) {
1195 for (j = i; j <= gj; j++) {
1196 if (tempprobs[my_index[i] - j] > 0.) {
1197 pl[counter].i = i;
1198 pl[counter].j = j;
1199 pl[counter++].p = pp * tempprobs[my_index[i] - j];
1200 }
1201 }
1202 }
1203 pl[counter].i = pl[counter].j = 0;
1204 pl[counter++].p = 0.;
1205 /* shrink memory to actual size needed */
1206 pl = (plist *)vrna_realloc(pl, counter * sizeof(plist));
1207
1208 gg += gi - 1;
1209 free(gg);
1210 free(my_index);
1211 free(tempprobs);
1212 return pl;
1213 }
1214
1215
1216 PUBLIC int
get_gquad_count(short * S,int i,int j)1217 get_gquad_count(short *S,
1218 int i,
1219 int j)
1220 {
1221 int *gg = get_g_islands_sub(S, i, j);
1222 int p, q, counter = 0;
1223
1224 FOR_EACH_GQUAD(p, q, i, j)
1225 process_gquad_enumeration(gg, p, q,
1226 &gquad_count,
1227 (void *)(&counter),
1228 NULL,
1229 NULL,
1230 NULL);
1231
1232 gg += i - 1;
1233 free(gg);
1234 return counter;
1235 }
1236
1237
1238 PUBLIC int
get_gquad_layer_count(short * S,int i,int j)1239 get_gquad_layer_count(short *S,
1240 int i,
1241 int j)
1242 {
1243 int *gg = get_g_islands_sub(S, i, j);
1244 int p, q, counter = 0;
1245
1246 FOR_EACH_GQUAD(p, q, i, j)
1247 process_gquad_enumeration(gg, p, q,
1248 &gquad_count_layers,
1249 (void *)(&counter),
1250 NULL,
1251 NULL,
1252 NULL);
1253
1254 gg += i - 1;
1255 free(gg);
1256 return counter;
1257 }
1258
1259
1260 PUBLIC int
parse_gquad(const char * struc,int * L,int l[3])1261 parse_gquad(const char *struc,
1262 int *L,
1263 int l[3])
1264 {
1265 int i, il, start, end, len;
1266
1267 for (i = 0; struc[i] && struc[i] != '+'; i++);
1268 if (struc[i] == '+') {
1269 /* start of gquad */
1270 for (il = 0; il <= 3; il++) {
1271 start = i; /* pos of first '+' */
1272 while (struc[++i] == '+')
1273 if ((il) && (i - start == *L))
1274 break;
1275
1276 end = i;
1277 len = end - start;
1278 if (il == 0)
1279 *L = len;
1280 else if (len != *L)
1281 vrna_message_error("unequal stack lengths in gquad");
1282
1283 if (il == 3)
1284 break;
1285
1286 while (struc[++i] == '.'); /* linker */
1287 l[il] = i - end;
1288 if (struc[i] != '+')
1289 vrna_message_error("illegal character in gquad linker region");
1290 }
1291 } else {
1292 return 0;
1293 }
1294
1295 /* printf("gquad at %d %d %d %d %d\n", end, *L, l[0], l[1], l[2]); */
1296 return end;
1297 }
1298
1299
1300 /*
1301 #########################################
1302 # BEGIN OF PRIVATE FUNCTION DEFINITIONS #
1303 # (internal use only) #
1304 #########################################
1305 */
1306 PRIVATE int
E_gquad_ali_penalty(int i,int L,int l[3],const short ** S,unsigned int n_seq,vrna_param_t * P)1307 E_gquad_ali_penalty(int i,
1308 int L,
1309 int l[3],
1310 const short **S,
1311 unsigned int n_seq,
1312 vrna_param_t *P)
1313 {
1314 unsigned int mm[2];
1315
1316 count_gquad_layer_mismatches(i, L, l, S, n_seq, mm);
1317
1318 if (mm[1] > P->gquadLayerMismatchMax)
1319 return INF;
1320 else
1321 return P->gquadLayerMismatch * mm[0];
1322 }
1323
1324
1325 PRIVATE FLT_OR_DBL
exp_E_gquad_ali_penalty(int i,int L,int l[3],const short ** S,unsigned int n_seq,vrna_exp_param_t * pf)1326 exp_E_gquad_ali_penalty(int i,
1327 int L,
1328 int l[3],
1329 const short **S,
1330 unsigned int n_seq,
1331 vrna_exp_param_t *pf)
1332 {
1333 unsigned int mm[2];
1334
1335 count_gquad_layer_mismatches(i, L, l, S, n_seq, mm);
1336
1337 if (mm[1] > pf->gquadLayerMismatchMax)
1338 return (FLT_OR_DBL)0.;
1339 else
1340 return (FLT_OR_DBL)pow(pf->expgquadLayerMismatch, (double)mm[0]);
1341 }
1342
1343
1344 PRIVATE void
count_gquad_layer_mismatches(int i,int L,int l[3],const short ** S,unsigned int n_seq,unsigned int mm[2])1345 count_gquad_layer_mismatches(int i,
1346 int L,
1347 int l[3],
1348 const short **S,
1349 unsigned int n_seq,
1350 unsigned int mm[2])
1351 {
1352 unsigned int s;
1353 int cnt;
1354
1355 mm[0] = mm[1] = 0;
1356
1357 /* check for compatibility in the alignment */
1358 for (s = 0; s < n_seq; s++) {
1359 unsigned int ld = 0; /* !=0 if layer destruction was detected */
1360 unsigned int mismatch = 0;
1361
1362 /* check bottom layer */
1363 if (S[s][i] != 3)
1364 ld |= 1U;
1365
1366 if (S[s][i + L + l[0]] != 3)
1367 ld |= 2U;
1368
1369 if (S[s][i + 2 * L + l[0] + l[1]] != 3)
1370 ld |= 4U;
1371
1372 if (S[s][i + 3 * L + l[0] + l[1] + l[2]] != 3)
1373 ld |= 8U;
1374
1375 /* add 1x penalty for missing bottom layer */
1376 if (ld)
1377 mismatch++;
1378
1379 /* check top layer */
1380 ld = 0;
1381 if (S[s][i + L - 1] != 3)
1382 ld |= 1U;
1383
1384 if (S[s][i + 2 * L + l[0] - 1] != 3)
1385 ld |= 2U;
1386
1387 if (S[s][i + 3 * L + l[0] + l[1] - 1] != 3)
1388 ld |= 4U;
1389
1390 if (S[s][i + 4 * L + l[0] + l[1] + l[2] - 1] != 3)
1391 ld |= 8U;
1392
1393 /* add 1x penalty for missing top layer */
1394 if (ld)
1395 mismatch++;
1396
1397 /* check inner layers */
1398 ld = 0;
1399 for (cnt = 1; cnt < L - 1; cnt++) {
1400 if (S[s][i + cnt] != 3)
1401 ld |= 1U;
1402
1403 if (S[s][i + L + l[0] + cnt] != 3)
1404 ld |= 2U;
1405
1406 if (S[s][i + 2 * L + l[0] + l[1] + cnt] != 3)
1407 ld |= 4U;
1408
1409 if (S[s][i + 3 * L + l[0] + l[1] + l[2] + cnt] != 3)
1410 ld |= 8U;
1411
1412 /* add 2x penalty for missing inner layer */
1413 if (ld)
1414 mismatch += 2;
1415 }
1416
1417 mm[0] += mismatch;
1418
1419 if (mismatch >= 2 * (L - 1))
1420 mm[1]++;
1421 }
1422 }
1423
1424
1425 PRIVATE void
gquad_mfe(int i,int L,int * l,void * data,void * P,void * NA,void * NA2)1426 gquad_mfe(int i,
1427 int L,
1428 int *l,
1429 void *data,
1430 void *P,
1431 void *NA,
1432 void *NA2)
1433 {
1434 int cc = ((vrna_param_t *)P)->gquad[L][l[0] + l[1] + l[2]];
1435
1436 if (cc < *((int *)data))
1437 *((int *)data) = cc;
1438 }
1439
1440
1441 PRIVATE void
gquad_mfe_pos(int i,int L,int * l,void * data,void * P,void * Lmfe,void * lmfe)1442 gquad_mfe_pos(int i,
1443 int L,
1444 int *l,
1445 void *data,
1446 void *P,
1447 void *Lmfe,
1448 void *lmfe)
1449 {
1450 int cc = ((vrna_param_t *)P)->gquad[L][l[0] + l[1] + l[2]];
1451
1452 if (cc < *((int *)data)) {
1453 *((int *)data) = cc;
1454 *((int *)Lmfe) = L;
1455 *((int *)lmfe) = l[0];
1456 *(((int *)lmfe) + 1) = l[1];
1457 *(((int *)lmfe) + 2) = l[2];
1458 }
1459 }
1460
1461
1462 PRIVATE void
gquad_mfe_ali_pos(int i,int L,int * l,void * data,void * helper,void * Lmfe,void * lmfe)1463 gquad_mfe_ali_pos(int i,
1464 int L,
1465 int *l,
1466 void *data,
1467 void *helper,
1468 void *Lmfe,
1469 void *lmfe)
1470 {
1471 int cc = INF;
1472
1473 gquad_mfe_ali(i, L, l, (void *)&cc, helper, NULL, NULL);
1474
1475 if (cc < *((int *)data)) {
1476 *((int *)data) = cc;
1477 *((int *)Lmfe) = L;
1478 *((int *)lmfe) = l[0];
1479 *(((int *)lmfe) + 1) = l[1];
1480 *(((int *)lmfe) + 2) = l[2];
1481 }
1482 }
1483
1484
1485 PRIVATE
1486 void
gquad_pos_exhaustive(int i,int L,int * l,void * data,void * P,void * Lex,void * lex)1487 gquad_pos_exhaustive(int i,
1488 int L,
1489 int *l,
1490 void *data,
1491 void *P,
1492 void *Lex,
1493 void *lex)
1494 {
1495 int cnt;
1496 int cc = ((vrna_param_t *)P)->gquad[L][l[0] + l[1] + l[2]];
1497
1498 if (cc <= *((int *)data)) {
1499 /* since Lex is an array of L values and lex an
1500 * array of l triples we need to find out where
1501 * the current gquad position is to be stored...
1502 * the below implementation might be slow but we
1503 * still use it for now
1504 */
1505 for (cnt = 0; ((int *)Lex)[cnt] != -1; cnt++);
1506
1507 *((int *)Lex + cnt) = L;
1508 *((int *)Lex + cnt + 1) = -1;
1509 *(((int *)lex) + (3 * cnt) + 0) = l[0];
1510 *(((int *)lex) + (3 * cnt) + 1) = l[1];
1511 *(((int *)lex) + (3 * cnt) + 2) = l[2];
1512 }
1513 }
1514
1515
1516 PRIVATE
1517 void
gquad_count(int i,int L,int * l,void * data,void * NA,void * NA2,void * NA3)1518 gquad_count(int i,
1519 int L,
1520 int *l,
1521 void *data,
1522 void *NA,
1523 void *NA2,
1524 void *NA3)
1525 {
1526 *((int *)data) += 1;
1527 }
1528
1529
1530 PRIVATE
1531 void
gquad_count_layers(int i,int L,int * l,void * data,void * NA,void * NA2,void * NA3)1532 gquad_count_layers(int i,
1533 int L,
1534 int *l,
1535 void *data,
1536 void *NA,
1537 void *NA2,
1538 void *NA3)
1539 {
1540 *((int *)data) += L;
1541 }
1542
1543
1544 PRIVATE void
gquad_pf(int i,int L,int * l,void * data,void * pf,void * NA,void * NA2)1545 gquad_pf(int i,
1546 int L,
1547 int *l,
1548 void *data,
1549 void *pf,
1550 void *NA,
1551 void *NA2)
1552 {
1553 *((FLT_OR_DBL *)data) += ((vrna_exp_param_t *)pf)->expgquad[L][l[0] + l[1] + l[2]];
1554 }
1555
1556
1557 PRIVATE void
gquad_pf_ali(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1558 gquad_pf_ali(int i,
1559 int L,
1560 int *l,
1561 void *data,
1562 void *helper,
1563 void *NA,
1564 void *NA2)
1565 {
1566 short **S;
1567 unsigned int **a2s;
1568 int u1, u2, u3, s, n_seq;
1569 FLT_OR_DBL penalty;
1570 vrna_exp_param_t *pf;
1571 struct gquad_ali_helper *gq_help;
1572
1573 gq_help = (struct gquad_ali_helper *)helper;
1574 S = gq_help->S;
1575 a2s = gq_help->a2s;
1576 n_seq = gq_help->n_seq;
1577 pf = gq_help->pf;
1578 penalty = exp_E_gquad_ali_penalty(i, L, l, (const short **)S, (unsigned int)n_seq, pf);
1579
1580 if (penalty != 0.) {
1581 double q = 1.;
1582 for (s = 0; s < n_seq; s++) {
1583 u1 = a2s[s][i + L + l[0] - 1] - a2s[s][i + L - 1];
1584 u2 = a2s[s][i + 2 * L + l[0] + l[1] - 1] - a2s[s][i + 2 * L + l[0] - 1];
1585 u3 = a2s[s][i + 3 * L + l[0] + l[1] + l[2] - 1] - a2s[s][i + 3 * L + l[0] + l[1] - 1];
1586 q *= pf->expgquad[L][u1 + u2 + u3];
1587 }
1588 *((FLT_OR_DBL *)data) += q * penalty;
1589 }
1590 }
1591
1592
1593 PRIVATE void
gquad_pf_pos(int i,int L,int * l,void * data,void * pf,void * Lmax,void * lmax)1594 gquad_pf_pos(int i,
1595 int L,
1596 int *l,
1597 void *data,
1598 void *pf,
1599 void *Lmax,
1600 void *lmax)
1601 {
1602 FLT_OR_DBL gq = 0.;
1603
1604 gquad_pf(i, L, l, (void *)&gq, pf, NULL, NULL);
1605
1606 if (gq > *((FLT_OR_DBL *)data)) {
1607 *((FLT_OR_DBL *)data) = gq;
1608 *((int *)Lmax) = L;
1609 *((int *)lmax) = l[0];
1610 *(((int *)lmax) + 1) = l[1];
1611 *(((int *)lmax) + 2) = l[2];
1612 }
1613 }
1614
1615
1616 PRIVATE void
gquad_pf_pos_ali(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1617 gquad_pf_pos_ali(int i,
1618 int L,
1619 int *l,
1620 void *data,
1621 void *helper,
1622 void *NA,
1623 void *NA2)
1624 {
1625 FLT_OR_DBL gq = 0.;
1626 struct gquad_ali_helper *gq_help = (struct gquad_ali_helper *)helper;
1627
1628 gquad_pf_ali(i, L, l, (void *)&gq, helper, NULL, NULL);
1629
1630 if (gq > *((FLT_OR_DBL *)data)) {
1631 *((FLT_OR_DBL *)data) = gq;
1632 gq_help->L = L;
1633 gq_help->l[0] = l[0];
1634 gq_help->l[1] = l[1];
1635 gq_help->l[2] = l[2];
1636 }
1637 }
1638
1639
1640 PRIVATE void
gquad_mfe_ali(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1641 gquad_mfe_ali(int i,
1642 int L,
1643 int *l,
1644 void *data,
1645 void *helper,
1646 void *NA,
1647 void *NA2)
1648 {
1649 int j, en[2], cc;
1650
1651 en[0] = en[1] = INF;
1652
1653 for (j = 0; j < 3; j++) {
1654 if (l[j] > VRNA_GQUAD_MAX_LINKER_LENGTH)
1655 return;
1656
1657 if (l[j] < VRNA_GQUAD_MIN_LINKER_LENGTH)
1658 return;
1659 }
1660 if (L > VRNA_GQUAD_MAX_STACK_SIZE)
1661 return;
1662
1663 if (L < VRNA_GQUAD_MIN_STACK_SIZE)
1664 return;
1665
1666 gquad_mfe_ali_en(i, L, l, (void *)(&(en[0])), helper, NULL, NULL);
1667 if (en[1] != INF) {
1668 cc = en[0] + en[1];
1669 if (cc < *((int *)data))
1670 *((int *)data) = cc;
1671 }
1672 }
1673
1674
1675 PRIVATE void
gquad_mfe_ali_en(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1676 gquad_mfe_ali_en(int i,
1677 int L,
1678 int *l,
1679 void *data,
1680 void *helper,
1681 void *NA,
1682 void *NA2)
1683 {
1684 short **S;
1685 unsigned int **a2s;
1686 int en[2], cc, dd, s, n_seq, u1, u2, u3;
1687 vrna_param_t *P;
1688 struct gquad_ali_helper *gq_help;
1689
1690 gq_help = (struct gquad_ali_helper *)helper;
1691 S = gq_help->S;
1692 a2s = gq_help->a2s;
1693 n_seq = gq_help->n_seq;
1694 P = gq_help->P;
1695
1696 for (en[0] = s = 0; s < n_seq; s++) {
1697 u1 = a2s[s][i + L + l[0] - 1] - a2s[s][i + L - 1];
1698 u2 = a2s[s][i + 2 * L + l[0] + l[1] - 1] - a2s[s][i + 2 * L + l[0] - 1];
1699 u3 = a2s[s][i + 3 * L + l[0] + l[1] + l[2] - 1] - a2s[s][i + 3 * L + l[0] + l[1] - 1];
1700 en[0] += P->gquad[L][u1 + u2 + u3];
1701 }
1702 en[1] = E_gquad_ali_penalty(i, L, l, (const short **)S, (unsigned int)n_seq, P);
1703
1704 if (en[1] != INF) {
1705 cc = en[0] + en[1];
1706 dd = ((int *)data)[0] + ((int *)data)[1];
1707 if (cc < dd) {
1708 ((int *)data)[0] = en[0];
1709 ((int *)data)[1] = en[1];
1710 }
1711 }
1712 }
1713
1714
1715 PRIVATE void
gquad_interact(int i,int L,int * l,void * data,void * pf,void * index,void * NA2)1716 gquad_interact(int i,
1717 int L,
1718 int *l,
1719 void *data,
1720 void *pf,
1721 void *index,
1722 void *NA2)
1723 {
1724 int x, *idx;
1725 FLT_OR_DBL gq, *pp;
1726
1727 idx = (int *)index;
1728 pp = (FLT_OR_DBL *)data;
1729 gq = exp_E_gquad(L, l, (vrna_exp_param_t *)pf);
1730
1731 for (x = 0; x < L; x++) {
1732 pp[idx[i + x] - (i + x + 3 * L + l[0] + l[1] + l[2])] += gq;
1733 pp[idx[i + x] - (i + x + L + l[0])] += gq;
1734 pp[idx[i + x + L + l[0]] - (i + x + 2 * L + l[0] + l[1])] += gq;
1735 pp[idx[i + x + 2 * L + l[0] + l[1]] - (i + x + 3 * L + l[0] + l[1] + l[2])] += gq;
1736 }
1737 }
1738
1739
1740 PRIVATE void
gquad_interact_ali(int i,int L,int * l,void * data,void * index,void * helper,void * NA)1741 gquad_interact_ali(int i,
1742 int L,
1743 int *l,
1744 void *data,
1745 void *index,
1746 void *helper,
1747 void *NA)
1748 {
1749 int x, *idx, bad;
1750 FLT_OR_DBL gq, *pp;
1751
1752 idx = (int *)index;
1753 pp = (FLT_OR_DBL *)data;
1754 bad = 0;
1755
1756 for (x = 0; x < 3; x++) {
1757 if (l[x] > VRNA_GQUAD_MAX_LINKER_LENGTH) {
1758 bad = 1;
1759 break;
1760 }
1761
1762 if (l[x] < VRNA_GQUAD_MIN_LINKER_LENGTH) {
1763 bad = 1;
1764 break;
1765 }
1766 }
1767 if (L > VRNA_GQUAD_MAX_STACK_SIZE)
1768 bad = 1;
1769
1770 if (L < VRNA_GQUAD_MIN_STACK_SIZE)
1771 bad = 1;
1772
1773 gq = 0.;
1774
1775 if (!bad) {
1776 gquad_pf_ali(i, L, l,
1777 (void *)(&gq),
1778 helper,
1779 NULL,
1780 NULL);
1781 }
1782
1783 for (x = 0; x < L; x++) {
1784 pp[idx[i + x] - (i + x + 3 * L + l[0] + l[1] + l[2])] += gq;
1785 pp[idx[i + x] - (i + x + L + l[0])] += gq;
1786 pp[idx[i + x + L + l[0]] - (i + x + 2 * L + l[0] + l[1])] += gq;
1787 pp[idx[i + x + 2 * L + l[0] + l[1]] - (i + x + 3 * L + l[0] + l[1] + l[2])] += gq;
1788 }
1789 }
1790
1791
1792 PRIVATE INLINE int *
get_g_islands(short * S)1793 get_g_islands(short *S)
1794 {
1795 return get_g_islands_sub(S, 1, S[0]);
1796 }
1797
1798
1799 PRIVATE INLINE int *
get_g_islands_sub(short * S,int i,int j)1800 get_g_islands_sub(short *S,
1801 int i,
1802 int j)
1803 {
1804 int x, *gg;
1805
1806 gg = (int *)vrna_alloc(sizeof(int) * (j - i + 2));
1807 gg -= i - 1;
1808
1809 if (S[j] == 3)
1810 gg[j] = 1;
1811
1812 for (x = j - 1; x >= i; x--)
1813 if (S[x] == 3)
1814 gg[x] = gg[x + 1] + 1;
1815
1816 return gg;
1817 }
1818
1819
1820 /**
1821 * We could've also created a macro that loops over all G-quadruplexes
1822 * delimited by i and j. However, for the fun of it we use this function
1823 * that receives a pointer to a callback function which in turn does the
1824 * actual computation for each quadruplex found.
1825 */
1826 PRIVATE
1827 void
process_gquad_enumeration(int * gg,int i,int j,void (* f)(int,int,int *,void *,void *,void *,void *),void * data,void * P,void * aux1,void * aux2)1828 process_gquad_enumeration(int *gg,
1829 int i,
1830 int j,
1831 void (*f)(int, int, int *,
1832 void *, void *, void *, void *),
1833 void *data,
1834 void *P,
1835 void *aux1,
1836 void *aux2)
1837 {
1838 int L, l[3], n, max_linker, maxl0, maxl1;
1839
1840 n = j - i + 1;
1841
1842 if ((n >= VRNA_GQUAD_MIN_BOX_SIZE) && (n <= VRNA_GQUAD_MAX_BOX_SIZE)) {
1843 for (L = MIN2(gg[i], VRNA_GQUAD_MAX_STACK_SIZE);
1844 L >= VRNA_GQUAD_MIN_STACK_SIZE;
1845 L--)
1846 if (gg[j - L + 1] >= L) {
1847 max_linker = n - 4 * L;
1848 if ((max_linker >= 3 * VRNA_GQUAD_MIN_LINKER_LENGTH)
1849 && (max_linker <= 3 * VRNA_GQUAD_MAX_LINKER_LENGTH)) {
1850 maxl0 = MIN2(VRNA_GQUAD_MAX_LINKER_LENGTH,
1851 max_linker - 2 * VRNA_GQUAD_MIN_LINKER_LENGTH
1852 );
1853 for (l[0] = VRNA_GQUAD_MIN_LINKER_LENGTH;
1854 l[0] <= maxl0;
1855 l[0]++)
1856 if (gg[i + L + l[0]] >= L) {
1857 maxl1 = MIN2(VRNA_GQUAD_MAX_LINKER_LENGTH,
1858 max_linker - l[0] - VRNA_GQUAD_MIN_LINKER_LENGTH
1859 );
1860 for (l[1] = VRNA_GQUAD_MIN_LINKER_LENGTH;
1861 l[1] <= maxl1;
1862 l[1]++)
1863 if (gg[i + 2 * L + l[0] + l[1]] >= L) {
1864 l[2] = max_linker - l[0] - l[1];
1865 f(i, L, &(l[0]), data, P, aux1, aux2);
1866 }
1867 }
1868 }
1869 }
1870 }
1871 }
1872