1 /*
2  * gquad.c
3  *
4  * Ronny Lorenz 2012
5  *
6  * ViennaRNA Package
7  */
8 
9 #ifdef HAVE_CONFIG_H
10 #include "config.h"
11 #endif
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <math.h>
16 #include <string.h>
17 
18 #include "ViennaRNA/fold_vars.h"
19 #include "ViennaRNA/datastructures/basic.h"
20 #include "ViennaRNA/params/constants.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/utils/alignments.h"
23 #include "ViennaRNA/gquad.h"
24 
25 #ifndef INLINE
26 #ifdef __GNUC__
27 # define INLINE inline
28 #else
29 # define INLINE
30 #endif
31 #endif
32 
33 /**
34  *  Use this macro to loop over each G-quadruplex
35  *  delimited by a and b within the subsequence [c,d]
36  */
37 #define FOR_EACH_GQUAD(a, b, c, d)  \
38   for ((a) = (d) - VRNA_GQUAD_MIN_BOX_SIZE + 1; (a) >= (c); (a)--) \
39     for ((b) = (a) + VRNA_GQUAD_MIN_BOX_SIZE - 1; \
40          (b) <= MIN2((d), (a) + VRNA_GQUAD_MAX_BOX_SIZE - 1); \
41          (b)++)
42 
43 /**
44  *  This macro does almost the same as FOR_EACH_GQUAD() but keeps
45  *  the 5' delimiter fixed. 'b' is the 3' delimiter of the gquad,
46  *  for gquads within subsequence [a,c] that have 5' delimiter 'a'
47  */
48 #define FOR_EACH_GQUAD_AT(a, b, c)  \
49   for ((b) = (a) + VRNA_GQUAD_MIN_BOX_SIZE - 1; \
50        (b) <= MIN2((c), (a) + VRNA_GQUAD_MAX_BOX_SIZE - 1); \
51        (b)++)
52 
53 
54 struct gquad_ali_helper {
55   short             **S;
56   unsigned int      **a2s;
57   int               n_seq;
58   vrna_param_t      *P;
59   vrna_exp_param_t  *pf;
60   int               L;
61   int               *l;
62 };
63 
64 /*
65  #################################
66  # PRIVATE FUNCTION DECLARATIONS #
67  #################################
68  */
69 
70 PRIVATE INLINE
71 int *
72 get_g_islands(short *S);
73 
74 
75 PRIVATE INLINE
76 int *
77 get_g_islands_sub(short *S,
78                   int   i,
79                   int   j);
80 
81 
82 /**
83  *  IMPORTANT:
84  *  If you don't know how to use this function, DONT'T USE IT!
85  *
86  *  The function pointer this function takes as argument is
87  *  used for individual calculations with each g-quadruplex
88  *  delimited by [i,j].
89  *  The function it points to always receives as first 3 arguments
90  *  position i, the stack size L and an array l[3] containing the
91  *  individual linker sizes.
92  *  The remaining 4 (void *) pointers of the callback function receive
93  *  the parameters 'data', 'P', 'aux1' and 'aux2' and thus may be
94  *  used to pass whatever data you like to.
95  *  As the names of those parameters suggest the convention is that
96  *  'data' should be used as a pointer where data is stored into,
97  *  e.g the MFE or PF and the 'P' parameter should actually be a
98  *  'vrna_param_t *' or 'vrna_exp_param_t *' type.
99  *  However, what you actually pass obviously depends on the
100  *  function the pointer is pointing to.
101  *
102  *  Although all of this may look like an overkill, it is found
103  *  to be almost as fast as implementing g-quadruplex enumeration
104  *  in each individual scenario, i.e. code duplication.
105  *  Using this function, however, ensures that all g-quadruplex
106  *  enumerations are absolutely identical.
107  */
108 PRIVATE
109 void
110 process_gquad_enumeration(int *gg,
111                           int i,
112                           int j,
113                           void (*f)(int, int, int *,
114                                     void *, void *, void *, void *),
115                           void *data,
116                           void *P,
117                           void *aux1,
118                           void *aux2);
119 
120 
121 /**
122  *  MFE callback for process_gquad_enumeration()
123  */
124 PRIVATE
125 void
126 gquad_mfe(int   i,
127           int   L,
128           int   *l,
129           void  *data,
130           void  *P,
131           void  *NA,
132           void  *NA2);
133 
134 
135 PRIVATE
136 void
137 gquad_mfe_pos(int   i,
138               int   L,
139               int   *l,
140               void  *data,
141               void  *P,
142               void  *Lmfe,
143               void  *lmfe);
144 
145 
146 PRIVATE void gquad_mfe_ali_pos(int  i,
147                                int  L,
148                                int  *l,
149                                void *data,
150                                void *helper,
151                                void *Lmfe,
152                                void *lmfe);
153 
154 
155 PRIVATE
156 void
157 gquad_pos_exhaustive(int  i,
158                      int  L,
159                      int  *l,
160                      void *data,
161                      void *P,
162                      void *Lex,
163                      void *lex);
164 
165 
166 /**
167  * Partition function callback for process_gquad_enumeration()
168  */
169 PRIVATE
170 void
171 gquad_pf(int  i,
172          int  L,
173          int  *l,
174          void *data,
175          void *P,
176          void *NA,
177          void *NA2);
178 
179 
180 PRIVATE void
181 gquad_pf_ali(int  i,
182              int  L,
183              int  *l,
184              void *data,
185              void *helper,
186              void *NA,
187              void *NA2);
188 
189 
190 /**
191  * Partition function callback for process_gquad_enumeration()
192  * in contrast to gquad_pf() it stores the stack size L and
193  * the linker lengths l[3] of the g-quadruplex that dominates
194  * the interval [i,j]
195  * (FLT_OR_DBL *)data must be 0. on entry
196  */
197 PRIVATE
198 void
199 gquad_pf_pos(int  i,
200              int  L,
201              int  *l,
202              void *data,
203              void *pf,
204              void *Lmax,
205              void *lmax);
206 
207 
208 PRIVATE void
209 gquad_pf_pos_ali(int  i,
210                  int  L,
211                  int  *l,
212                  void *data,
213                  void *helper,
214                  void *NA1,
215                  void *NA2);
216 
217 
218 /**
219  * MFE (alifold) callback for process_gquad_enumeration()
220  */
221 PRIVATE
222 void
223 gquad_mfe_ali(int   i,
224               int   L,
225               int   *l,
226               void  *data,
227               void  *helper,
228               void  *NA,
229               void  *NA2);
230 
231 
232 /**
233  * MFE (alifold) callback for process_gquad_enumeration()
234  * with seperation of free energy and penalty contribution
235  */
236 PRIVATE
237 void
238 gquad_mfe_ali_en(int  i,
239                  int  L,
240                  int  *l,
241                  void *data,
242                  void *helper,
243                  void *NA,
244                  void *NA2);
245 
246 
247 PRIVATE
248 void
249 gquad_interact(int  i,
250                int  L,
251                int  *l,
252                void *data,
253                void *pf,
254                void *index,
255                void *NA2);
256 
257 
258 PRIVATE
259 void
260 gquad_interact_ali(int  i,
261                    int  L,
262                    int  *l,
263                    void *data,
264                    void *index,
265                    void *helper,
266                    void *NA);
267 
268 
269 PRIVATE
270 void
271 gquad_count(int   i,
272             int   L,
273             int   *l,
274             void  *data,
275             void  *NA,
276             void  *NA2,
277             void  *NA3);
278 
279 
280 PRIVATE
281 void
282 gquad_count_layers(int  i,
283                    int  L,
284                    int  *l,
285                    void *data,
286                    void *NA,
287                    void *NA2,
288                    void *NA3);
289 
290 
291 /* other useful static functions */
292 
293 PRIVATE
294 int
295 E_gquad_ali_penalty(int           i,
296                     int           L,
297                     int           l[3],
298                     const short   **S,
299                     unsigned int  n_seq,
300                     vrna_param_t  *P);
301 
302 
303 PRIVATE
304 FLT_OR_DBL
305 exp_E_gquad_ali_penalty(int               i,
306                         int               L,
307                         int               l[3],
308                         const short       **S,
309                         unsigned int      n_seq,
310                         vrna_exp_param_t  *P);
311 
312 
313 PRIVATE void
314 count_gquad_layer_mismatches(int          i,
315                              int          L,
316                              int          l[3],
317                              const short  **S,
318                              unsigned int n_seq,
319                              unsigned int mm[2]);
320 
321 
322 PRIVATE int **
323 create_L_matrix(short         *S,
324                 int           start,
325                 int           maxdist,
326                 int           n,
327                 int           **g,
328                 vrna_param_t  *P);
329 
330 
331 PRIVATE int **
332 create_aliL_matrix(int          start,
333                    int          maxdist,
334                    int          n,
335                    int          **g,
336                    short        *S_cons,
337                    short        **S,
338                    unsigned int **a2s,
339                    int          n_seq,
340                    vrna_param_t *P);
341 
342 
343 PRIVATE void gquad_mfe_ali_pos(int  i,
344                                int  L,
345                                int  *l,
346                                void *data,
347                                void *P,
348                                void *Lmfe,
349                                void *lmfe);
350 
351 
352 /*
353  #########################################
354  # BEGIN OF PUBLIC FUNCTION DEFINITIONS  #
355  #      (all available in RNAlib)        #
356  #########################################
357  */
358 
359 /********************************
360  * Here are the G-quadruplex energy
361  * contribution functions
362  *********************************/
363 PUBLIC int
E_gquad(int L,int l[3],vrna_param_t * P)364 E_gquad(int           L,
365         int           l[3],
366         vrna_param_t  *P)
367 {
368   int i, c = INF;
369 
370   for (i = 0; i < 3; i++) {
371     if (l[i] > VRNA_GQUAD_MAX_LINKER_LENGTH)
372       return c;
373 
374     if (l[i] < VRNA_GQUAD_MIN_LINKER_LENGTH)
375       return c;
376   }
377   if (L > VRNA_GQUAD_MAX_STACK_SIZE)
378     return c;
379 
380   if (L < VRNA_GQUAD_MIN_STACK_SIZE)
381     return c;
382 
383   gquad_mfe(0, L, l,
384             (void *)(&c),
385             (void *)P,
386             NULL,
387             NULL);
388   return c;
389 }
390 
391 
392 PUBLIC FLT_OR_DBL
exp_E_gquad(int L,int l[3],vrna_exp_param_t * pf)393 exp_E_gquad(int               L,
394             int               l[3],
395             vrna_exp_param_t  *pf)
396 {
397   int         i;
398   FLT_OR_DBL  q = 0.;
399 
400   for (i = 0; i < 3; i++) {
401     if (l[i] > VRNA_GQUAD_MAX_LINKER_LENGTH)
402       return q;
403 
404     if (l[i] < VRNA_GQUAD_MIN_LINKER_LENGTH)
405       return q;
406   }
407   if (L > VRNA_GQUAD_MAX_STACK_SIZE)
408     return q;
409 
410   if (L < VRNA_GQUAD_MIN_STACK_SIZE)
411     return q;
412 
413   gquad_pf(0, L, l,
414            (void *)(&q),
415            (void *)pf,
416            NULL,
417            NULL);
418   return q;
419 }
420 
421 
422 PUBLIC FLT_OR_DBL
exp_E_gquad_ali(int i,int L,int l[3],short ** S,unsigned int ** a2s,int n_seq,vrna_exp_param_t * pf)423 exp_E_gquad_ali(int               i,
424                 int               L,
425                 int               l[3],
426                 short             **S,
427                 unsigned int      **a2s,
428                 int               n_seq,
429                 vrna_exp_param_t  *pf)
430 {
431   int         s;
432   FLT_OR_DBL  q = 0.;
433 
434   for (s = 0; s < 3; s++) {
435     if (l[s] > VRNA_GQUAD_MAX_LINKER_LENGTH)
436       return q;
437 
438     if (l[s] < VRNA_GQUAD_MIN_LINKER_LENGTH)
439       return q;
440   }
441   if (L > VRNA_GQUAD_MAX_STACK_SIZE)
442     return q;
443 
444   if (L < VRNA_GQUAD_MIN_STACK_SIZE)
445     return q;
446 
447   struct gquad_ali_helper gq_help;
448 
449   gq_help.S     = S;
450   gq_help.a2s   = a2s;
451   gq_help.n_seq = n_seq;
452   gq_help.pf    = pf;
453 
454   gquad_pf_ali(i, L, l,
455                (void *)(&q),
456                (void *)&gq_help,
457                NULL,
458                NULL);
459   return q;
460 }
461 
462 
463 PUBLIC void
E_gquad_ali_en(int i,int L,int l[3],const short ** S,unsigned int ** a2s,unsigned int n_seq,vrna_param_t * P,int en[2])464 E_gquad_ali_en(int          i,
465                int          L,
466                int          l[3],
467                const short  **S,
468                unsigned int **a2s,
469                unsigned int n_seq,
470                vrna_param_t *P,
471                int          en[2])
472 {
473   unsigned int  s;
474   int           ee, ee2, u1, u2, u3;
475 
476   en[0] = en[1] = INF;
477 
478   /* check if the quadruplex obeys the canonical form */
479   for (s = 0; s < 3; s++) {
480     if (l[s] > VRNA_GQUAD_MAX_LINKER_LENGTH)
481       return;
482 
483     if (l[s] < VRNA_GQUAD_MIN_LINKER_LENGTH)
484       return;
485   }
486   if (L > VRNA_GQUAD_MAX_STACK_SIZE)
487     return;
488 
489   if (L < VRNA_GQUAD_MIN_STACK_SIZE)
490     return;
491 
492   /* compute actual quadruplex contribution for subalignment */
493   ee = 0;
494 
495   for (s = 0; s < n_seq; s++) {
496     u1  = a2s[s][i + L + l[0] - 1] - a2s[s][i + L - 1];
497     u2  = a2s[s][i + 2 * L + l[0] + l[1] - 1] - a2s[s][i + 2 * L + l[0] - 1];
498     u3  = a2s[s][i + 3 * L + l[0] + l[1] + l[2] - 1] - a2s[s][i + 3 * L + l[0] + l[1] - 1];
499     ee  += P->gquad[L][u1 + u2 + u3];
500   }
501 
502   /* get penalty from incompatible sequences in alignment */
503   ee2 = E_gquad_ali_penalty(i, L, l, S, n_seq, P);
504 
505   /* assign return values */
506   if (ee2 != INF) {
507     en[0] = ee;
508     en[1] = ee2;
509   }
510 }
511 
512 
513 /********************************
514  * Now, the triangular matrix
515  * generators for the G-quadruplex
516  * contributions are following
517  *********************************/
518 PUBLIC int *
get_gquad_matrix(short * S,vrna_param_t * P)519 get_gquad_matrix(short        *S,
520                  vrna_param_t *P)
521 {
522   int n, size, i, j, *gg, *my_index, *data;
523 
524   n         = S[0];
525   my_index  = vrna_idx_col_wise(n);
526   gg        = get_g_islands(S);
527   size      = (n * (n + 1)) / 2 + 2;
528   data      = (int *)vrna_alloc(sizeof(int) * size);
529 
530   /* prefill the upper triangular matrix with INF */
531   for (i = 0; i < size; i++)
532     data[i] = INF;
533 
534   FOR_EACH_GQUAD(i, j, 1, n){
535     process_gquad_enumeration(gg, i, j,
536                               &gquad_mfe,
537                               (void *)(&(data[my_index[j] + i])),
538                               (void *)P,
539                               NULL,
540                               NULL);
541   }
542 
543   free(my_index);
544   free(gg);
545   return data;
546 }
547 
548 
549 PUBLIC FLT_OR_DBL *
get_gquad_pf_matrix(short * S,FLT_OR_DBL * scale,vrna_exp_param_t * pf)550 get_gquad_pf_matrix(short             *S,
551                     FLT_OR_DBL        *scale,
552                     vrna_exp_param_t  *pf)
553 {
554   int         n, size, *gg, i, j, *my_index;
555   FLT_OR_DBL  *data;
556 
557 
558   n         = S[0];
559   size      = (n * (n + 1)) / 2 + 2;
560   data      = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
561   gg        = get_g_islands(S);
562   my_index  = vrna_idx_row_wise(n);
563 
564   FOR_EACH_GQUAD(i, j, 1, n){
565     process_gquad_enumeration(gg, i, j,
566                               &gquad_pf,
567                               (void *)(&(data[my_index[i] - j])),
568                               (void *)pf,
569                               NULL,
570                               NULL);
571     data[my_index[i] - j] *= scale[j - i + 1];
572   }
573 
574   free(my_index);
575   free(gg);
576   return data;
577 }
578 
579 
580 PUBLIC FLT_OR_DBL *
get_gquad_pf_matrix_comparative(unsigned int n,short * S_cons,short ** S,unsigned int ** a2s,FLT_OR_DBL * scale,unsigned int n_seq,vrna_exp_param_t * pf)581 get_gquad_pf_matrix_comparative(unsigned int      n,
582                                 short             *S_cons,
583                                 short             **S,
584                                 unsigned int      **a2s,
585                                 FLT_OR_DBL        *scale,
586                                 unsigned int      n_seq,
587                                 vrna_exp_param_t  *pf)
588 {
589   int                     size, *gg, i, j, *my_index;
590   FLT_OR_DBL              *data;
591   struct gquad_ali_helper gq_help;
592 
593 
594   size          = (n * (n + 1)) / 2 + 2;
595   data          = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
596   gg            = get_g_islands(S_cons);
597   my_index      = vrna_idx_row_wise(n);
598   gq_help.S     = S;
599   gq_help.a2s   = a2s;
600   gq_help.n_seq = n_seq;
601   gq_help.pf    = pf;
602 
603   FOR_EACH_GQUAD(i, j, 1, n){
604     process_gquad_enumeration(gg, i, j,
605                               &gquad_pf_ali,
606                               (void *)(&(data[my_index[i] - j])),
607                               (void *)&gq_help,
608                               NULL,
609                               NULL);
610     data[my_index[i] - j] *= scale[j - i + 1];
611   }
612 
613   free(my_index);
614   free(gg);
615   return data;
616 }
617 
618 
619 PUBLIC int *
get_gquad_ali_matrix(unsigned int n,short * S_cons,short ** S,unsigned int ** a2s,int n_seq,vrna_param_t * P)620 get_gquad_ali_matrix(unsigned int  n,
621                      short        *S_cons,
622                      short        **S,
623                      unsigned int **a2s,
624                      int          n_seq,
625                      vrna_param_t *P)
626 {
627   int                     size, *data, *gg;
628   int                     i, j, *my_index;
629   struct gquad_ali_helper gq_help;
630 
631   size      = (n * (n + 1)) / 2 + 2;
632   data      = (int *)vrna_alloc(sizeof(int) * size);
633   gg        = get_g_islands(S_cons);
634   my_index  = vrna_idx_col_wise(n);
635 
636   gq_help.S     = S;
637   gq_help.a2s   = a2s;
638   gq_help.n_seq = n_seq;
639   gq_help.P     = P;
640 
641   /* prefill the upper triangular matrix with INF */
642   for (i = 0; i < size; i++)
643     data[i] = INF;
644 
645   FOR_EACH_GQUAD(i, j, 1, n){
646     process_gquad_enumeration(gg, i, j,
647                               &gquad_mfe_ali,
648                               (void *)(&(data[my_index[j] + i])),
649                               (void *)&gq_help,
650                               NULL,
651                               NULL);
652   }
653 
654   free(my_index);
655   free(gg);
656   return data;
657 }
658 
659 
660 PUBLIC int **
get_gquad_L_matrix(short * S,int start,int maxdist,int n,int ** g,vrna_param_t * P)661 get_gquad_L_matrix(short        *S,
662                    int          start,
663                    int          maxdist,
664                    int          n,
665                    int          **g,
666                    vrna_param_t *P)
667 {
668   return create_L_matrix(S, start, maxdist, n, g, P);
669 }
670 
671 
672 PUBLIC void
vrna_gquad_mx_local_update(vrna_fold_compound_t * vc,int start)673 vrna_gquad_mx_local_update(vrna_fold_compound_t *vc,
674                            int                  start)
675 {
676   if (vc->type == VRNA_FC_TYPE_COMPARATIVE) {
677     vc->matrices->ggg_local = create_aliL_matrix(
678       start,
679       vc->window_size,
680       vc->length,
681       vc->matrices->ggg_local,
682       vc->S_cons,
683       vc->S,
684       vc->a2s,
685       vc->n_seq,
686       vc->params);
687   } else {
688     vc->matrices->ggg_local = create_L_matrix(
689       vc->sequence_encoding,
690       start,
691       vc->window_size,
692       vc->length,
693       vc->matrices->ggg_local,
694       vc->params);
695   }
696 }
697 
698 
699 PRIVATE int **
create_L_matrix(short * S,int start,int maxdist,int n,int ** g,vrna_param_t * P)700 create_L_matrix(short         *S,
701                 int           start,
702                 int           maxdist,
703                 int           n,
704                 int           **g,
705                 vrna_param_t  *P)
706 {
707   int **data;
708   int i, j, k, *gg, p, q;
709 
710   p   = MAX2(1, start);
711   q   = MIN2(n, start + maxdist + 4);
712   gg  = get_g_islands_sub(S, p, q);
713 
714   if (g) {
715     /* we just update the gquadruplex contribution for the current
716      * start and rotate the rest */
717     data = g;
718     /* we re-use the memory allocated previously */
719     data[start]               = data[start + maxdist + 5];
720     data[start + maxdist + 5] = NULL;
721 
722     /* prefill with INF */
723     for (i = 0; i < maxdist + 5; i++)
724       data[start][i] = INF;
725 
726     /*  now we compute contributions for all gquads with 5' delimiter at
727      *  position 'start'
728      */
729     FOR_EACH_GQUAD_AT(start, j, start + maxdist + 4){
730       process_gquad_enumeration(gg, start, j,
731                                 &gquad_mfe,
732                                 (void *)(&(data[start][j - start])),
733                                 (void *)P,
734                                 NULL,
735                                 NULL);
736     }
737   } else {
738     /* create a new matrix from scratch since this is the first
739      * call to this function */
740 
741     /* allocate memory and prefill with INF */
742     data = (int **)vrna_alloc(sizeof(int *) * (n + 1));
743     for (k = n; (k > n - maxdist - 5) && (k >= 0); k--) {
744       data[k] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
745       for (i = 0; i < maxdist + 5; i++)
746         data[k][i] = INF;
747     }
748 
749     /* compute all contributions for the gquads in this interval */
750     FOR_EACH_GQUAD(i, j, MAX2(1, n - maxdist - 4), n){
751       process_gquad_enumeration(gg, i, j,
752                                 &gquad_mfe,
753                                 (void *)(&(data[i][j - i])),
754                                 (void *)P,
755                                 NULL,
756                                 NULL);
757     }
758   }
759 
760   gg += p - 1;
761   free(gg);
762   return data;
763 }
764 
765 
766 PRIVATE int **
create_aliL_matrix(int start,int maxdist,int n,int ** g,short * S_cons,short ** S,unsigned int ** a2s,int n_seq,vrna_param_t * P)767 create_aliL_matrix(int          start,
768                    int          maxdist,
769                    int          n,
770                    int          **g,
771                    short        *S_cons,
772                    short        **S,
773                    unsigned int **a2s,
774                    int          n_seq,
775                    vrna_param_t *P)
776 {
777   int **data;
778   int i, j, k, *gg, p, q;
779 
780   p   = MAX2(1, start);
781   q   = MIN2(n, start + maxdist + 4);
782   gg  = get_g_islands_sub(S_cons, p, q);
783 
784   struct gquad_ali_helper gq_help;
785 
786   gq_help.S     = S;
787   gq_help.a2s   = a2s;
788   gq_help.n_seq = n_seq;
789   gq_help.P     = P;
790 
791   if (g) {
792     /* we just update the gquadruplex contribution for the current
793      * start and rotate the rest */
794     data = g;
795     /* we re-use the memory allocated previously */
796     data[start]               = data[start + maxdist + 5];
797     data[start + maxdist + 5] = NULL;
798 
799     /* prefill with INF */
800     for (i = 0; i < maxdist + 5; i++)
801       data[start][i] = INF;
802 
803     /*  now we compute contributions for all gquads with 5' delimiter at
804      *  position 'start'
805      */
806     FOR_EACH_GQUAD_AT(start, j, start + maxdist + 4){
807       process_gquad_enumeration(gg, start, j,
808                                 &gquad_mfe_ali,
809                                 (void *)(&(data[start][j - start])),
810                                 (void *)&gq_help,
811                                 NULL,
812                                 NULL);
813     }
814   } else {
815     /* create a new matrix from scratch since this is the first
816      * call to this function */
817 
818     /* allocate memory and prefill with INF */
819     data = (int **)vrna_alloc(sizeof(int *) * (n + 1));
820     for (k = n; (k > n - maxdist - 5) && (k >= 0); k--) {
821       data[k] = (int *)vrna_alloc(sizeof(int) * (maxdist + 5));
822       for (i = 0; i < maxdist + 5; i++)
823         data[k][i] = INF;
824     }
825 
826     /* compute all contributions for the gquads in this interval */
827     FOR_EACH_GQUAD(i, j, MAX2(1, n - maxdist - 4), n){
828       process_gquad_enumeration(gg, i, j,
829                                 &gquad_mfe_ali,
830                                 (void *)(&(data[i][j - i])),
831                                 (void *)&gq_help,
832                                 NULL,
833                                 NULL);
834     }
835   }
836 
837   gg += p - 1;
838   free(gg);
839   return data;
840 }
841 
842 
843 PUBLIC plist *
get_plist_gquad_from_db(const char * structure,float pr)844 get_plist_gquad_from_db(const char  *structure,
845                         float       pr)
846 {
847   int   x, size, actual_size, L, n, ge, ee, gb, l[3];
848   plist *pl;
849 
850   actual_size = 0;
851   ge          = 0;
852   n           = 2;
853   size        = strlen(structure);
854   pl          = (plist *)vrna_alloc(n * size * sizeof(plist));
855 
856   while ((ee = parse_gquad(structure + ge, &L, l)) > 0) {
857     ge  += ee;
858     gb  = ge - L * 4 - l[0] - l[1] - l[2] + 1;
859     /* add pseudo-base pair encloding gquad */
860     for (x = 0; x < L; x++) {
861       if (actual_size >= n * size - 5) {
862         n   *= 2;
863         pl  = (plist *)vrna_realloc(pl, n * size * sizeof(plist));
864       }
865 
866       pl[actual_size].i       = gb + x;
867       pl[actual_size].j       = ge + x - L + 1;
868       pl[actual_size].p       = pr;
869       pl[actual_size++].type  = 0;
870 
871       pl[actual_size].i       = gb + x;
872       pl[actual_size].j       = gb + x + l[0] + L;
873       pl[actual_size].p       = pr;
874       pl[actual_size++].type  = 0;
875 
876       pl[actual_size].i       = gb + x + l[0] + L;
877       pl[actual_size].j       = ge + x - 2 * L - l[2] + 1;
878       pl[actual_size].p       = pr;
879       pl[actual_size++].type  = 0;
880 
881       pl[actual_size].i       = ge + x - 2 * L - l[2] + 1;
882       pl[actual_size].j       = ge + x - L + 1;
883       pl[actual_size].p       = pr;
884       pl[actual_size++].type  = 0;
885     }
886   }
887 
888   pl[actual_size].i   = pl[actual_size].j = 0;
889   pl[actual_size++].p = 0;
890   pl                  = (plist *)vrna_realloc(pl, actual_size * sizeof(plist));
891   return pl;
892 }
893 
894 
895 PUBLIC void
get_gquad_pattern_mfe(short * S,int i,int j,vrna_param_t * P,int * L,int l[3])896 get_gquad_pattern_mfe(short         *S,
897                       int           i,
898                       int           j,
899                       vrna_param_t  *P,
900                       int           *L,
901                       int           l[3])
902 {
903   int *gg = get_g_islands_sub(S, i, j);
904   int c   = INF;
905 
906   process_gquad_enumeration(gg, i, j,
907                             &gquad_mfe_pos,
908                             (void *)(&c),
909                             (void *)P,
910                             (void *)L,
911                             (void *)l);
912 
913   gg += i - 1;
914   free(gg);
915 }
916 
917 
918 PUBLIC void
get_gquad_pattern_mfe_ali(short ** S,unsigned int ** a2s,short * S_cons,int n_seq,int i,int j,vrna_param_t * P,int * L,int l[3])919 get_gquad_pattern_mfe_ali(short         **S,
920                           unsigned int  **a2s,
921                           short         *S_cons,
922                           int           n_seq,
923                           int           i,
924                           int           j,
925                           vrna_param_t  *P,
926                           int           *L,
927                           int           l[3])
928 {
929   int                     mfe, *gg;
930   struct gquad_ali_helper gq_help;
931 
932   gg  = get_g_islands_sub(S_cons, i, j);
933   mfe = INF;
934 
935   gq_help.S     = S;
936   gq_help.a2s   = a2s;
937   gq_help.n_seq = n_seq;
938   gq_help.P     = P;
939 
940   process_gquad_enumeration(gg, i, j,
941                             &gquad_mfe_ali_pos,
942                             (void *)(&mfe),
943                             (void *)&gq_help,
944                             (void *)L,
945                             (void *)l);
946 
947   gg += i - 1;
948   free(gg);
949 }
950 
951 
952 PUBLIC void
get_gquad_pattern_exhaustive(short * S,int i,int j,vrna_param_t * P,int * L,int * l,int threshold)953 get_gquad_pattern_exhaustive(short        *S,
954                              int          i,
955                              int          j,
956                              vrna_param_t *P,
957                              int          *L,
958                              int          *l,
959                              int          threshold)
960 {
961   int *gg = get_g_islands_sub(S, i, j);
962 
963   process_gquad_enumeration(gg, i, j,
964                             &gquad_pos_exhaustive,
965                             (void *)(&threshold),
966                             (void *)P,
967                             (void *)L,
968                             (void *)l);
969 
970   gg += i - 1;
971   free(gg);
972 }
973 
974 
975 PUBLIC void
get_gquad_pattern_pf(short * S,int i,int j,vrna_exp_param_t * pf,int * L,int l[3])976 get_gquad_pattern_pf(short            *S,
977                      int              i,
978                      int              j,
979                      vrna_exp_param_t *pf,
980                      int              *L,
981                      int              l[3])
982 {
983   int         *gg = get_g_islands_sub(S, i, j);
984   FLT_OR_DBL  q   = 0.;
985 
986   process_gquad_enumeration(gg, i, j,
987                             &gquad_pf_pos,
988                             (void *)(&q),
989                             (void *)pf,
990                             (void *)L,
991                             (void *)l);
992 
993   gg += i - 1;
994   free(gg);
995 }
996 
997 
998 PUBLIC void
vrna_get_gquad_pattern_pf(vrna_fold_compound_t * fc,int i,int j,int * L,int l[3])999 vrna_get_gquad_pattern_pf(vrna_fold_compound_t  *fc,
1000                           int                   i,
1001                           int                   j,
1002                           int                   *L,
1003                           int                   l[3])
1004 {
1005   short             *S  = fc->type == VRNA_FC_TYPE_SINGLE ? fc->sequence_encoding2 : fc->S_cons;
1006   int               *gg = get_g_islands_sub(S, i, j);
1007   FLT_OR_DBL        q   = 0.;
1008   vrna_exp_param_t  *pf = fc->exp_params;
1009 
1010   if (fc->type == VRNA_FC_TYPE_SINGLE) {
1011     process_gquad_enumeration(gg, i, j,
1012                               &gquad_pf_pos,
1013                               (void *)(&q),
1014                               (void *)pf,
1015                               (void *)L,
1016                               (void *)l);
1017   } else {
1018     struct gquad_ali_helper gq_help;
1019     gq_help.S     = fc->S;
1020     gq_help.a2s   = fc->a2s;
1021     gq_help.n_seq = fc->n_seq;
1022     gq_help.pf    = pf;
1023     gq_help.L     = (int)(*L);
1024     gq_help.l     = (int *)l;
1025     process_gquad_enumeration(gg, i, j,
1026                               &gquad_pf_pos_ali,
1027                               (void *)(&q),
1028                               (void *)&gq_help,
1029                               NULL,
1030                               NULL);
1031     *L = gq_help.L;
1032   }
1033 
1034   gg += i - 1;
1035   free(gg);
1036 }
1037 
1038 
1039 PUBLIC plist *
get_plist_gquad_from_pr(short * S,int gi,int gj,FLT_OR_DBL * G,FLT_OR_DBL * probs,FLT_OR_DBL * scale,vrna_exp_param_t * pf)1040 get_plist_gquad_from_pr(short             *S,
1041                         int               gi,
1042                         int               gj,
1043                         FLT_OR_DBL        *G,
1044                         FLT_OR_DBL        *probs,
1045                         FLT_OR_DBL        *scale,
1046                         vrna_exp_param_t  *pf)
1047 {
1048   int L, l[3];
1049 
1050   return get_plist_gquad_from_pr_max(S, gi, gj, G, probs, scale, &L, l, pf);
1051 }
1052 
1053 
1054 PUBLIC plist *
vrna_get_plist_gquad_from_pr(vrna_fold_compound_t * fc,int gi,int gj)1055 vrna_get_plist_gquad_from_pr(vrna_fold_compound_t *fc,
1056                              int                  gi,
1057                              int                  gj)
1058 {
1059   int L, l[3];
1060 
1061   return vrna_get_plist_gquad_from_pr_max(fc, gi, gj, &L, l);
1062 }
1063 
1064 
1065 PUBLIC plist *
get_plist_gquad_from_pr_max(short * S,int gi,int gj,FLT_OR_DBL * G,FLT_OR_DBL * probs,FLT_OR_DBL * scale,int * Lmax,int lmax[3],vrna_exp_param_t * pf)1066 get_plist_gquad_from_pr_max(short             *S,
1067                             int               gi,
1068                             int               gj,
1069                             FLT_OR_DBL        *G,
1070                             FLT_OR_DBL        *probs,
1071                             FLT_OR_DBL        *scale,
1072                             int               *Lmax,
1073                             int               lmax[3],
1074                             vrna_exp_param_t  *pf)
1075 {
1076   int         n, size, *gg, counter, i, j, *my_index;
1077   FLT_OR_DBL  pp, *tempprobs;
1078   plist       *pl;
1079 
1080   n         = S[0];
1081   size      = (n * (n + 1)) / 2 + 2;
1082   tempprobs = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
1083   pl        = (plist *)vrna_alloc((S[0] * S[0]) * sizeof(plist));
1084   gg        = get_g_islands_sub(S, gi, gj);
1085   counter   = 0;
1086   my_index  = vrna_idx_row_wise(n);
1087 
1088   process_gquad_enumeration(gg, gi, gj,
1089                             &gquad_interact,
1090                             (void *)tempprobs,
1091                             (void *)pf,
1092                             (void *)my_index,
1093                             NULL);
1094 
1095   pp = 0.;
1096   process_gquad_enumeration(gg, gi, gj,
1097                             &gquad_pf_pos,
1098                             (void *)(&pp),
1099                             (void *)pf,
1100                             (void *)Lmax,
1101                             (void *)lmax);
1102 
1103   pp = probs[my_index[gi] - gj] * scale[gj - gi + 1] / G[my_index[gi] - gj];
1104   for (i = gi; i < gj; i++) {
1105     for (j = i; j <= gj; j++) {
1106       if (tempprobs[my_index[i] - j] > 0.) {
1107         pl[counter].i   = i;
1108         pl[counter].j   = j;
1109         pl[counter++].p = pp * tempprobs[my_index[i] - j];
1110       }
1111     }
1112   }
1113   pl[counter].i   = pl[counter].j = 0;
1114   pl[counter++].p = 0.;
1115   /* shrink memory to actual size needed */
1116   pl = (plist *)vrna_realloc(pl, counter * sizeof(plist));
1117 
1118   gg += gi - 1;
1119   free(gg);
1120   free(my_index);
1121   free(tempprobs);
1122   return pl;
1123 }
1124 
1125 
1126 PUBLIC plist *
vrna_get_plist_gquad_from_pr_max(vrna_fold_compound_t * fc,int gi,int gj,int * Lmax,int lmax[3])1127 vrna_get_plist_gquad_from_pr_max(vrna_fold_compound_t *fc,
1128                                  int                  gi,
1129                                  int                  gj,
1130                                  int                  *Lmax,
1131                                  int                  lmax[3])
1132 {
1133   short             *S;
1134   int               n, size, *gg, counter, i, j, *my_index;
1135   FLT_OR_DBL        pp, *tempprobs, *G, *probs, *scale;
1136   plist             *pl;
1137   vrna_exp_param_t  *pf;
1138 
1139   n         = (int)fc->length;
1140   pf        = fc->exp_params;
1141   G         = fc->exp_matrices->G;
1142   probs     = fc->exp_matrices->probs;
1143   scale     = fc->exp_matrices->scale;
1144   S         = (fc->type == VRNA_FC_TYPE_SINGLE) ? fc->sequence_encoding2 : fc->S_cons;
1145   size      = (n * (n + 1)) / 2 + 2;
1146   tempprobs = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * size);
1147   pl        = (plist *)vrna_alloc((n * n) * sizeof(plist));
1148   gg        = get_g_islands_sub(S, gi, gj);
1149   counter   = 0;
1150   my_index  = vrna_idx_row_wise(n);
1151 
1152   pp = 0.;
1153 
1154   if (fc->type == VRNA_FC_TYPE_SINGLE) {
1155     process_gquad_enumeration(gg, gi, gj,
1156                               &gquad_interact,
1157                               (void *)tempprobs,
1158                               (void *)pf,
1159                               (void *)my_index,
1160                               NULL);
1161 
1162     process_gquad_enumeration(gg, gi, gj,
1163                               &gquad_pf_pos,
1164                               (void *)(&pp),
1165                               (void *)pf,
1166                               (void *)Lmax,
1167                               (void *)lmax);
1168   } else {
1169     struct gquad_ali_helper gq_help;
1170     gq_help.S     = fc->S;
1171     gq_help.a2s   = fc->a2s;
1172     gq_help.n_seq = fc->n_seq;
1173     gq_help.pf    = pf;
1174     gq_help.L     = *Lmax;
1175     gq_help.l     = &(lmax[0]);
1176 
1177     process_gquad_enumeration(gg, gi, gj,
1178                               &gquad_interact_ali,
1179                               (void *)tempprobs,
1180                               (void *)my_index,
1181                               (void *)&(gq_help),
1182                               NULL);
1183 
1184     process_gquad_enumeration(gg, gi, gj,
1185                               &gquad_pf_pos_ali,
1186                               (void *)(&pp),
1187                               (void *)&gq_help,
1188                               NULL,
1189                               NULL);
1190     *Lmax = gq_help.L;
1191   }
1192 
1193   pp = probs[my_index[gi] - gj] * scale[gj - gi + 1] / G[my_index[gi] - gj];
1194   for (i = gi; i < gj; i++) {
1195     for (j = i; j <= gj; j++) {
1196       if (tempprobs[my_index[i] - j] > 0.) {
1197         pl[counter].i   = i;
1198         pl[counter].j   = j;
1199         pl[counter++].p = pp * tempprobs[my_index[i] - j];
1200       }
1201     }
1202   }
1203   pl[counter].i   = pl[counter].j = 0;
1204   pl[counter++].p = 0.;
1205   /* shrink memory to actual size needed */
1206   pl = (plist *)vrna_realloc(pl, counter * sizeof(plist));
1207 
1208   gg += gi - 1;
1209   free(gg);
1210   free(my_index);
1211   free(tempprobs);
1212   return pl;
1213 }
1214 
1215 
1216 PUBLIC int
get_gquad_count(short * S,int i,int j)1217 get_gquad_count(short *S,
1218                 int   i,
1219                 int   j)
1220 {
1221   int *gg = get_g_islands_sub(S, i, j);
1222   int p, q, counter = 0;
1223 
1224   FOR_EACH_GQUAD(p, q, i, j)
1225   process_gquad_enumeration(gg, p, q,
1226                             &gquad_count,
1227                             (void *)(&counter),
1228                             NULL,
1229                             NULL,
1230                             NULL);
1231 
1232   gg += i - 1;
1233   free(gg);
1234   return counter;
1235 }
1236 
1237 
1238 PUBLIC int
get_gquad_layer_count(short * S,int i,int j)1239 get_gquad_layer_count(short *S,
1240                       int   i,
1241                       int   j)
1242 {
1243   int *gg = get_g_islands_sub(S, i, j);
1244   int p, q, counter = 0;
1245 
1246   FOR_EACH_GQUAD(p, q, i, j)
1247   process_gquad_enumeration(gg, p, q,
1248                             &gquad_count_layers,
1249                             (void *)(&counter),
1250                             NULL,
1251                             NULL,
1252                             NULL);
1253 
1254   gg += i - 1;
1255   free(gg);
1256   return counter;
1257 }
1258 
1259 
1260 PUBLIC int
parse_gquad(const char * struc,int * L,int l[3])1261 parse_gquad(const char  *struc,
1262             int         *L,
1263             int         l[3])
1264 {
1265   int i, il, start, end, len;
1266 
1267   for (i = 0; struc[i] && struc[i] != '+'; i++);
1268   if (struc[i] == '+') {
1269     /* start of gquad */
1270     for (il = 0; il <= 3; il++) {
1271       start = i; /* pos of first '+' */
1272       while (struc[++i] == '+')
1273         if ((il) && (i - start == *L))
1274           break;
1275 
1276       end = i;
1277       len = end - start;
1278       if (il == 0)
1279         *L = len;
1280       else if (len != *L)
1281         vrna_message_error("unequal stack lengths in gquad");
1282 
1283       if (il == 3)
1284         break;
1285 
1286       while (struc[++i] == '.'); /* linker */
1287       l[il] = i - end;
1288       if (struc[i] != '+')
1289         vrna_message_error("illegal character in gquad linker region");
1290     }
1291   } else {
1292     return 0;
1293   }
1294 
1295   /* printf("gquad at %d %d %d %d %d\n", end, *L, l[0], l[1], l[2]); */
1296   return end;
1297 }
1298 
1299 
1300 /*
1301  #########################################
1302  # BEGIN OF PRIVATE FUNCTION DEFINITIONS #
1303  #          (internal use only)          #
1304  #########################################
1305  */
1306 PRIVATE int
E_gquad_ali_penalty(int i,int L,int l[3],const short ** S,unsigned int n_seq,vrna_param_t * P)1307 E_gquad_ali_penalty(int           i,
1308                     int           L,
1309                     int           l[3],
1310                     const short   **S,
1311                     unsigned int  n_seq,
1312                     vrna_param_t  *P)
1313 {
1314   unsigned int mm[2];
1315 
1316   count_gquad_layer_mismatches(i, L, l, S, n_seq, mm);
1317 
1318   if (mm[1] > P->gquadLayerMismatchMax)
1319     return INF;
1320   else
1321     return P->gquadLayerMismatch * mm[0];
1322 }
1323 
1324 
1325 PRIVATE FLT_OR_DBL
exp_E_gquad_ali_penalty(int i,int L,int l[3],const short ** S,unsigned int n_seq,vrna_exp_param_t * pf)1326 exp_E_gquad_ali_penalty(int               i,
1327                         int               L,
1328                         int               l[3],
1329                         const short       **S,
1330                         unsigned int      n_seq,
1331                         vrna_exp_param_t  *pf)
1332 {
1333   unsigned int mm[2];
1334 
1335   count_gquad_layer_mismatches(i, L, l, S, n_seq, mm);
1336 
1337   if (mm[1] > pf->gquadLayerMismatchMax)
1338     return (FLT_OR_DBL)0.;
1339   else
1340     return (FLT_OR_DBL)pow(pf->expgquadLayerMismatch, (double)mm[0]);
1341 }
1342 
1343 
1344 PRIVATE void
count_gquad_layer_mismatches(int i,int L,int l[3],const short ** S,unsigned int n_seq,unsigned int mm[2])1345 count_gquad_layer_mismatches(int          i,
1346                              int          L,
1347                              int          l[3],
1348                              const short  **S,
1349                              unsigned int n_seq,
1350                              unsigned int mm[2])
1351 {
1352   unsigned int  s;
1353   int           cnt;
1354 
1355   mm[0] = mm[1] = 0;
1356 
1357   /* check for compatibility in the alignment */
1358   for (s = 0; s < n_seq; s++) {
1359     unsigned int  ld        = 0; /* !=0 if layer destruction was detected */
1360     unsigned int  mismatch  = 0;
1361 
1362     /* check bottom layer */
1363     if (S[s][i] != 3)
1364       ld |= 1U;
1365 
1366     if (S[s][i + L + l[0]] != 3)
1367       ld |= 2U;
1368 
1369     if (S[s][i + 2 * L + l[0] + l[1]] != 3)
1370       ld |= 4U;
1371 
1372     if (S[s][i + 3 * L + l[0] + l[1] + l[2]] != 3)
1373       ld |= 8U;
1374 
1375     /* add 1x penalty for missing bottom layer */
1376     if (ld)
1377       mismatch++;
1378 
1379     /* check top layer */
1380     ld = 0;
1381     if (S[s][i + L - 1] != 3)
1382       ld |= 1U;
1383 
1384     if (S[s][i + 2 * L + l[0] - 1] != 3)
1385       ld |= 2U;
1386 
1387     if (S[s][i + 3 * L + l[0] + l[1] - 1] != 3)
1388       ld |= 4U;
1389 
1390     if (S[s][i + 4 * L + l[0] + l[1] + l[2] - 1] != 3)
1391       ld |= 8U;
1392 
1393     /* add 1x penalty for missing top layer */
1394     if (ld)
1395       mismatch++;
1396 
1397     /* check inner layers */
1398     ld = 0;
1399     for (cnt = 1; cnt < L - 1; cnt++) {
1400       if (S[s][i + cnt] != 3)
1401         ld |= 1U;
1402 
1403       if (S[s][i + L + l[0] + cnt] != 3)
1404         ld |= 2U;
1405 
1406       if (S[s][i + 2 * L + l[0] + l[1] + cnt] != 3)
1407         ld |= 4U;
1408 
1409       if (S[s][i + 3 * L + l[0] + l[1] + l[2] + cnt] != 3)
1410         ld |= 8U;
1411 
1412       /* add 2x penalty for missing inner layer */
1413       if (ld)
1414         mismatch += 2;
1415     }
1416 
1417     mm[0] += mismatch;
1418 
1419     if (mismatch >= 2 * (L - 1))
1420       mm[1]++;
1421   }
1422 }
1423 
1424 
1425 PRIVATE void
gquad_mfe(int i,int L,int * l,void * data,void * P,void * NA,void * NA2)1426 gquad_mfe(int   i,
1427           int   L,
1428           int   *l,
1429           void  *data,
1430           void  *P,
1431           void  *NA,
1432           void  *NA2)
1433 {
1434   int cc = ((vrna_param_t *)P)->gquad[L][l[0] + l[1] + l[2]];
1435 
1436   if (cc < *((int *)data))
1437     *((int *)data) = cc;
1438 }
1439 
1440 
1441 PRIVATE void
gquad_mfe_pos(int i,int L,int * l,void * data,void * P,void * Lmfe,void * lmfe)1442 gquad_mfe_pos(int   i,
1443               int   L,
1444               int   *l,
1445               void  *data,
1446               void  *P,
1447               void  *Lmfe,
1448               void  *lmfe)
1449 {
1450   int cc = ((vrna_param_t *)P)->gquad[L][l[0] + l[1] + l[2]];
1451 
1452   if (cc < *((int *)data)) {
1453     *((int *)data)        = cc;
1454     *((int *)Lmfe)        = L;
1455     *((int *)lmfe)        = l[0];
1456     *(((int *)lmfe) + 1)  = l[1];
1457     *(((int *)lmfe) + 2)  = l[2];
1458   }
1459 }
1460 
1461 
1462 PRIVATE void
gquad_mfe_ali_pos(int i,int L,int * l,void * data,void * helper,void * Lmfe,void * lmfe)1463 gquad_mfe_ali_pos(int   i,
1464                   int   L,
1465                   int   *l,
1466                   void  *data,
1467                   void  *helper,
1468                   void  *Lmfe,
1469                   void  *lmfe)
1470 {
1471   int cc = INF;
1472 
1473   gquad_mfe_ali(i, L, l, (void *)&cc, helper, NULL, NULL);
1474 
1475   if (cc < *((int *)data)) {
1476     *((int *)data)        = cc;
1477     *((int *)Lmfe)        = L;
1478     *((int *)lmfe)        = l[0];
1479     *(((int *)lmfe) + 1)  = l[1];
1480     *(((int *)lmfe) + 2)  = l[2];
1481   }
1482 }
1483 
1484 
1485 PRIVATE
1486 void
gquad_pos_exhaustive(int i,int L,int * l,void * data,void * P,void * Lex,void * lex)1487 gquad_pos_exhaustive(int  i,
1488                      int  L,
1489                      int  *l,
1490                      void *data,
1491                      void *P,
1492                      void *Lex,
1493                      void *lex)
1494 {
1495   int cnt;
1496   int cc = ((vrna_param_t *)P)->gquad[L][l[0] + l[1] + l[2]];
1497 
1498   if (cc <= *((int *)data)) {
1499     /*  since Lex is an array of L values and lex an
1500      *  array of l triples we need to find out where
1501      *  the current gquad position is to be stored...
1502      * the below implementation might be slow but we
1503      * still use it for now
1504      */
1505     for (cnt = 0; ((int *)Lex)[cnt] != -1; cnt++);
1506 
1507     *((int *)Lex + cnt)             = L;
1508     *((int *)Lex + cnt + 1)         = -1;
1509     *(((int *)lex) + (3 * cnt) + 0) = l[0];
1510     *(((int *)lex) + (3 * cnt) + 1) = l[1];
1511     *(((int *)lex) + (3 * cnt) + 2) = l[2];
1512   }
1513 }
1514 
1515 
1516 PRIVATE
1517 void
gquad_count(int i,int L,int * l,void * data,void * NA,void * NA2,void * NA3)1518 gquad_count(int   i,
1519             int   L,
1520             int   *l,
1521             void  *data,
1522             void  *NA,
1523             void  *NA2,
1524             void  *NA3)
1525 {
1526   *((int *)data) += 1;
1527 }
1528 
1529 
1530 PRIVATE
1531 void
gquad_count_layers(int i,int L,int * l,void * data,void * NA,void * NA2,void * NA3)1532 gquad_count_layers(int  i,
1533                    int  L,
1534                    int  *l,
1535                    void *data,
1536                    void *NA,
1537                    void *NA2,
1538                    void *NA3)
1539 {
1540   *((int *)data) += L;
1541 }
1542 
1543 
1544 PRIVATE void
gquad_pf(int i,int L,int * l,void * data,void * pf,void * NA,void * NA2)1545 gquad_pf(int  i,
1546          int  L,
1547          int  *l,
1548          void *data,
1549          void *pf,
1550          void *NA,
1551          void *NA2)
1552 {
1553   *((FLT_OR_DBL *)data) += ((vrna_exp_param_t *)pf)->expgquad[L][l[0] + l[1] + l[2]];
1554 }
1555 
1556 
1557 PRIVATE void
gquad_pf_ali(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1558 gquad_pf_ali(int  i,
1559              int  L,
1560              int  *l,
1561              void *data,
1562              void *helper,
1563              void *NA,
1564              void *NA2)
1565 {
1566   short                   **S;
1567   unsigned int            **a2s;
1568   int                     u1, u2, u3, s, n_seq;
1569   FLT_OR_DBL              penalty;
1570   vrna_exp_param_t        *pf;
1571   struct gquad_ali_helper *gq_help;
1572 
1573   gq_help = (struct gquad_ali_helper *)helper;
1574   S       = gq_help->S;
1575   a2s     = gq_help->a2s;
1576   n_seq   = gq_help->n_seq;
1577   pf      = gq_help->pf;
1578   penalty = exp_E_gquad_ali_penalty(i, L, l, (const short **)S, (unsigned int)n_seq, pf);
1579 
1580   if (penalty != 0.) {
1581     double q = 1.;
1582     for (s = 0; s < n_seq; s++) {
1583       u1  = a2s[s][i + L + l[0] - 1] - a2s[s][i + L - 1];
1584       u2  = a2s[s][i + 2 * L + l[0] + l[1] - 1] - a2s[s][i + 2 * L + l[0] - 1];
1585       u3  = a2s[s][i + 3 * L + l[0] + l[1] + l[2] - 1] - a2s[s][i + 3 * L + l[0] + l[1] - 1];
1586       q   *= pf->expgquad[L][u1 + u2 + u3];
1587     }
1588     *((FLT_OR_DBL *)data) += q * penalty;
1589   }
1590 }
1591 
1592 
1593 PRIVATE void
gquad_pf_pos(int i,int L,int * l,void * data,void * pf,void * Lmax,void * lmax)1594 gquad_pf_pos(int  i,
1595              int  L,
1596              int  *l,
1597              void *data,
1598              void *pf,
1599              void *Lmax,
1600              void *lmax)
1601 {
1602   FLT_OR_DBL gq = 0.;
1603 
1604   gquad_pf(i, L, l, (void *)&gq, pf, NULL, NULL);
1605 
1606   if (gq > *((FLT_OR_DBL *)data)) {
1607     *((FLT_OR_DBL *)data) = gq;
1608     *((int *)Lmax)        = L;
1609     *((int *)lmax)        = l[0];
1610     *(((int *)lmax) + 1)  = l[1];
1611     *(((int *)lmax) + 2)  = l[2];
1612   }
1613 }
1614 
1615 
1616 PRIVATE void
gquad_pf_pos_ali(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1617 gquad_pf_pos_ali(int  i,
1618                  int  L,
1619                  int  *l,
1620                  void *data,
1621                  void *helper,
1622                  void *NA,
1623                  void *NA2)
1624 {
1625   FLT_OR_DBL              gq        = 0.;
1626   struct gquad_ali_helper *gq_help  = (struct gquad_ali_helper *)helper;
1627 
1628   gquad_pf_ali(i, L, l, (void *)&gq, helper, NULL, NULL);
1629 
1630   if (gq > *((FLT_OR_DBL *)data)) {
1631     *((FLT_OR_DBL *)data) = gq;
1632     gq_help->L            = L;
1633     gq_help->l[0]         = l[0];
1634     gq_help->l[1]         = l[1];
1635     gq_help->l[2]         = l[2];
1636   }
1637 }
1638 
1639 
1640 PRIVATE void
gquad_mfe_ali(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1641 gquad_mfe_ali(int   i,
1642               int   L,
1643               int   *l,
1644               void  *data,
1645               void  *helper,
1646               void  *NA,
1647               void  *NA2)
1648 {
1649   int j, en[2], cc;
1650 
1651   en[0] = en[1] = INF;
1652 
1653   for (j = 0; j < 3; j++) {
1654     if (l[j] > VRNA_GQUAD_MAX_LINKER_LENGTH)
1655       return;
1656 
1657     if (l[j] < VRNA_GQUAD_MIN_LINKER_LENGTH)
1658       return;
1659   }
1660   if (L > VRNA_GQUAD_MAX_STACK_SIZE)
1661     return;
1662 
1663   if (L < VRNA_GQUAD_MIN_STACK_SIZE)
1664     return;
1665 
1666   gquad_mfe_ali_en(i, L, l, (void *)(&(en[0])), helper, NULL, NULL);
1667   if (en[1] != INF) {
1668     cc = en[0] + en[1];
1669     if (cc < *((int *)data))
1670       *((int *)data) = cc;
1671   }
1672 }
1673 
1674 
1675 PRIVATE void
gquad_mfe_ali_en(int i,int L,int * l,void * data,void * helper,void * NA,void * NA2)1676 gquad_mfe_ali_en(int  i,
1677                  int  L,
1678                  int  *l,
1679                  void *data,
1680                  void *helper,
1681                  void *NA,
1682                  void *NA2)
1683 {
1684   short                   **S;
1685   unsigned int            **a2s;
1686   int                     en[2], cc, dd, s, n_seq, u1, u2, u3;
1687   vrna_param_t            *P;
1688   struct gquad_ali_helper *gq_help;
1689 
1690   gq_help = (struct gquad_ali_helper *)helper;
1691   S       = gq_help->S;
1692   a2s     = gq_help->a2s;
1693   n_seq   = gq_help->n_seq;
1694   P       = gq_help->P;
1695 
1696   for (en[0] = s = 0; s < n_seq; s++) {
1697     u1    = a2s[s][i + L + l[0] - 1] - a2s[s][i + L - 1];
1698     u2    = a2s[s][i + 2 * L + l[0] + l[1] - 1] - a2s[s][i + 2 * L + l[0] - 1];
1699     u3    = a2s[s][i + 3 * L + l[0] + l[1] + l[2] - 1] - a2s[s][i + 3 * L + l[0] + l[1] - 1];
1700     en[0] += P->gquad[L][u1 + u2 + u3];
1701   }
1702   en[1] = E_gquad_ali_penalty(i, L, l, (const short **)S, (unsigned int)n_seq, P);
1703 
1704   if (en[1] != INF) {
1705     cc  = en[0] + en[1];
1706     dd  = ((int *)data)[0] + ((int *)data)[1];
1707     if (cc < dd) {
1708       ((int *)data)[0]  = en[0];
1709       ((int *)data)[1]  = en[1];
1710     }
1711   }
1712 }
1713 
1714 
1715 PRIVATE void
gquad_interact(int i,int L,int * l,void * data,void * pf,void * index,void * NA2)1716 gquad_interact(int  i,
1717                int  L,
1718                int  *l,
1719                void *data,
1720                void *pf,
1721                void *index,
1722                void *NA2)
1723 {
1724   int         x, *idx;
1725   FLT_OR_DBL  gq, *pp;
1726 
1727   idx = (int *)index;
1728   pp  = (FLT_OR_DBL *)data;
1729   gq  = exp_E_gquad(L, l, (vrna_exp_param_t *)pf);
1730 
1731   for (x = 0; x < L; x++) {
1732     pp[idx[i + x] - (i + x + 3 * L + l[0] + l[1] + l[2])]                       += gq;
1733     pp[idx[i + x] - (i + x + L + l[0])]                                         += gq;
1734     pp[idx[i + x + L + l[0]] - (i + x + 2 * L + l[0] + l[1])]                   += gq;
1735     pp[idx[i + x + 2 * L + l[0] + l[1]] - (i + x + 3 * L + l[0] + l[1] + l[2])] += gq;
1736   }
1737 }
1738 
1739 
1740 PRIVATE void
gquad_interact_ali(int i,int L,int * l,void * data,void * index,void * helper,void * NA)1741 gquad_interact_ali(int  i,
1742                    int  L,
1743                    int  *l,
1744                    void *data,
1745                    void *index,
1746                    void *helper,
1747                    void *NA)
1748 {
1749   int         x, *idx, bad;
1750   FLT_OR_DBL  gq, *pp;
1751 
1752   idx = (int *)index;
1753   pp  = (FLT_OR_DBL *)data;
1754   bad = 0;
1755 
1756   for (x = 0; x < 3; x++) {
1757     if (l[x] > VRNA_GQUAD_MAX_LINKER_LENGTH) {
1758       bad = 1;
1759       break;
1760     }
1761 
1762     if (l[x] < VRNA_GQUAD_MIN_LINKER_LENGTH) {
1763       bad = 1;
1764       break;
1765     }
1766   }
1767   if (L > VRNA_GQUAD_MAX_STACK_SIZE)
1768     bad = 1;
1769 
1770   if (L < VRNA_GQUAD_MIN_STACK_SIZE)
1771     bad = 1;
1772 
1773   gq = 0.;
1774 
1775   if (!bad) {
1776     gquad_pf_ali(i, L, l,
1777                  (void *)(&gq),
1778                  helper,
1779                  NULL,
1780                  NULL);
1781   }
1782 
1783   for (x = 0; x < L; x++) {
1784     pp[idx[i + x] - (i + x + 3 * L + l[0] + l[1] + l[2])]                       += gq;
1785     pp[idx[i + x] - (i + x + L + l[0])]                                         += gq;
1786     pp[idx[i + x + L + l[0]] - (i + x + 2 * L + l[0] + l[1])]                   += gq;
1787     pp[idx[i + x + 2 * L + l[0] + l[1]] - (i + x + 3 * L + l[0] + l[1] + l[2])] += gq;
1788   }
1789 }
1790 
1791 
1792 PRIVATE INLINE int *
get_g_islands(short * S)1793 get_g_islands(short *S)
1794 {
1795   return get_g_islands_sub(S, 1, S[0]);
1796 }
1797 
1798 
1799 PRIVATE INLINE int *
get_g_islands_sub(short * S,int i,int j)1800 get_g_islands_sub(short *S,
1801                   int   i,
1802                   int   j)
1803 {
1804   int x, *gg;
1805 
1806   gg  = (int *)vrna_alloc(sizeof(int) * (j - i + 2));
1807   gg  -= i - 1;
1808 
1809   if (S[j] == 3)
1810     gg[j] = 1;
1811 
1812   for (x = j - 1; x >= i; x--)
1813     if (S[x] == 3)
1814       gg[x] = gg[x + 1] + 1;
1815 
1816   return gg;
1817 }
1818 
1819 
1820 /**
1821  *  We could've also created a macro that loops over all G-quadruplexes
1822  *  delimited by i and j. However, for the fun of it we use this function
1823  *  that receives a pointer to a callback function which in turn does the
1824  *  actual computation for each quadruplex found.
1825  */
1826 PRIVATE
1827 void
process_gquad_enumeration(int * gg,int i,int j,void (* f)(int,int,int *,void *,void *,void *,void *),void * data,void * P,void * aux1,void * aux2)1828 process_gquad_enumeration(int *gg,
1829                           int i,
1830                           int j,
1831                           void (*f)(int, int, int *,
1832                                     void *, void *, void *, void *),
1833                           void *data,
1834                           void *P,
1835                           void *aux1,
1836                           void *aux2)
1837 {
1838   int L, l[3], n, max_linker, maxl0, maxl1;
1839 
1840   n = j - i + 1;
1841 
1842   if ((n >= VRNA_GQUAD_MIN_BOX_SIZE) && (n <= VRNA_GQUAD_MAX_BOX_SIZE)) {
1843     for (L = MIN2(gg[i], VRNA_GQUAD_MAX_STACK_SIZE);
1844          L >= VRNA_GQUAD_MIN_STACK_SIZE;
1845          L--)
1846       if (gg[j - L + 1] >= L) {
1847         max_linker = n - 4 * L;
1848         if ((max_linker >= 3 * VRNA_GQUAD_MIN_LINKER_LENGTH)
1849             && (max_linker <= 3 * VRNA_GQUAD_MAX_LINKER_LENGTH)) {
1850           maxl0 = MIN2(VRNA_GQUAD_MAX_LINKER_LENGTH,
1851                        max_linker - 2 * VRNA_GQUAD_MIN_LINKER_LENGTH
1852                        );
1853           for (l[0] = VRNA_GQUAD_MIN_LINKER_LENGTH;
1854                l[0] <= maxl0;
1855                l[0]++)
1856             if (gg[i + L + l[0]] >= L) {
1857               maxl1 = MIN2(VRNA_GQUAD_MAX_LINKER_LENGTH,
1858                            max_linker - l[0] - VRNA_GQUAD_MIN_LINKER_LENGTH
1859                            );
1860               for (l[1] = VRNA_GQUAD_MIN_LINKER_LENGTH;
1861                    l[1] <= maxl1;
1862                    l[1]++)
1863                 if (gg[i + 2 * L + l[0] + l[1]] >= L) {
1864                   l[2] = max_linker - l[0] - l[1];
1865                   f(i, L, &(l[0]), data, P, aux1, aux2);
1866                 }
1867             }
1868         }
1869       }
1870   }
1871 }
1872