1 /*
2  *                partiton function for RNA secondary structures
3  *
4  *                Ivo L Hofacker
5  *
6  *                Vienna RNA package
7  */
8 /*
9  * $Log: part_func_up.c,v $
10  * Revision 1.4  2008/07/04 14:27:36  ivo
11  * Modify output (again)
12  *
13  * Revision 1.3  2008/05/08 14:11:55  ivo
14  * minor output changes
15  *
16  * Revision 1.2  2007/12/13 10:19:54  ivo
17  * major RNAup update from Ulli
18  *
19  * Revision 1.1  2007/04/30 15:13:13  ivo
20  * merge RNAup into package
21  *
22  * Revision 1.11  2006/07/17 11:11:43  ulim
23  * removed all globals from fold_vars.h,c, cleaned code
24  *
25  * Revision 1.10  2006/07/12 09:19:29  ulim
26  * global variables w, incr3 and incr5 are now local
27  *
28  * Revision 1.9  2006/07/11 12:45:02  ulim
29  * remove redundancy in function pf_interact(...)
30  *
31  * Revision 1.8  2006/03/08 15:26:37  ulim
32  * modified -o[1|2], added meaningful default
33  *
34  * Revision 1.5  2006/01/23 11:27:04  ulim
35  * include file into new package version. cleaned it
36  *
37  * Revision 1.2  2005/07/29 15:13:37  ulim
38  * put the function, calculating the probability of an unpaired region in
39  * an RNA and the function calculating the prob. of interaction between 2 RNAs
40  * in a seperate file (pf_two.c)
41  *
42  * Revision 1.1  2005/07/26 13:27:12  ulim
43  * Initial revision
44  *
45  * Revision 1.2  2005/07/01 13:14:57  ulim
46  * fixed error in scaling, included new commandline options -incr5, -incr3 to
47  * allow a variable number of unpaired positions 5' and 3' of the site of
48  * interaction between the two RNAs
49  *
50  * Revision 1.1  2005/04/19 08:16:38  ulim
51  * Initial revision
52  */
53 
54 #ifdef HAVE_CONFIG_H
55 #include "config.h"
56 #endif
57 
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <math.h>
62 #include <float.h>    /* #defines FLT_MAX ... */
63 #include <unistd.h>
64 #include "ViennaRNA/fold.h"
65 #include "ViennaRNA/utils/basic.h"
66 #include "ViennaRNA/params/default.h"
67 #include "ViennaRNA/fold_vars.h"
68 #include "ViennaRNA/pair_mat.h"
69 #include "ViennaRNA/params/basic.h"
70 #include "ViennaRNA/part_func.h"
71 #include "ViennaRNA/loops/all.h"
72 #include "ViennaRNA/part_func_up.h"
73 #include "ViennaRNA/duplex.h"
74 
75 
76 #define CO_TURN 0
77 #define ZERO(A) (fabs(A) < DBL_EPSILON)
78 #define EQUAL(A, B) (fabs((A)-(B)) < 1000 * DBL_EPSILON)
79 #define ISOLATED  256.0
80 /* #define NUMERIC 1 */
81 
82 /*
83  #################################
84  # GLOBAL VARIABLES              #
85  #################################
86  */
87 
88 /*
89  #################################
90  # PRIVATE VARIABLES             #
91  #################################
92  */
93 PRIVATE short             *S = NULL, *S1 = NULL, *SS = NULL, *SS2 = NULL;
94 PRIVATE vrna_exp_param_t  *Pf = NULL;                           /* use this structure for all the exp-arrays*/
95 PRIVATE FLT_OR_DBL        *qb = NULL, *qm = NULL, *prpr = NULL; /* add arrays for pf_unpaired()*/
96 PRIVATE FLT_OR_DBL        *probs = NULL;
97 PRIVATE FLT_OR_DBL        *q1k = NULL, *qln = NULL;
98 PRIVATE double            *qqm2 = NULL, *qq_1m2 = NULL, *qqm = NULL, *qqm1 = NULL;
99 PRIVATE FLT_OR_DBL        *scale = NULL, *expMLbase = NULL;
100 PRIVATE char              *ptype = NULL;  /* precomputed array of pair types */
101 PRIVATE int               init_length;    /* length in last call to init_pf_fold()*/
102 PRIVATE double            init_temp;      /* temperature in last call to scale_pf_params */
103 PRIVATE int               *my_iindx = NULL;
104 /* make iptypes array for intermolecular constrains (ipidx for indexing)*/
105 
106 
107 /*
108  #################################
109  # PRIVATE FUNCTION DECLARATIONS #
110  #################################
111  */
112 PRIVATE pu_out *
113 get_u_vals(pu_contrib *p_c,
114            int        **unpaired_values,
115            char       *select_contrib);
116 
117 
118 PRIVATE int
119 plot_free_pu_out(pu_out   *res,
120                  interact *pint,
121                  char     *ofile,
122                  char     *head);
123 
124 
125 PRIVATE void
126 scale_stru_pf_params(unsigned int length);
127 
128 
129 PRIVATE void
130 init_pf_two(int length);
131 
132 
133 PRIVATE void
134 scale_int(const char  *s,
135           const char  *sl,
136           double      *sc_int);
137 
138 
139 PRIVATE void
140 encode_seq(const char *s1,
141            const char *s2);
142 
143 
144 PRIVATE constrain *
145 get_ptypes_up(char        *S,
146               const char  *structure);
147 
148 
149 PRIVATE void
150 get_up_arrays(unsigned int length);
151 
152 
153 PRIVATE void
154 free_up_arrays(void);
155 
156 
157 PRIVATE void
158 set_encoded_seq(const char  *sequence,
159                 short       **S,
160                 short       **S1);
161 
162 
163 PRIVATE void
164 get_interact_arrays(unsigned int  n1,
165                     unsigned int  n2,
166                     pu_contrib    *p_c,
167                     pu_contrib    *p_c2,
168                     int           w,
169                     int           incr5,
170                     int           incr3,
171                     double        ***p_c_S,
172                     double        ***p_c2_S);
173 
174 
175 /*
176  #################################
177  # BEGIN OF FUNCTION DEFINITIONS #
178  #################################
179  */
180 PUBLIC pu_contrib *
get_pu_contrib_struct(unsigned int n,unsigned int w)181 get_pu_contrib_struct(unsigned int  n,
182                       unsigned int  w)
183 {
184   unsigned int  i;
185   pu_contrib    *pu = (pu_contrib *)vrna_alloc(sizeof(pu_contrib));
186 
187   pu->length  = n;
188   pu->w       = w;
189   /* contributions to probability of being unpaired witihin a(n)
190    * H hairpin,
191    * I interior loop,
192    * M muliloop,
193    * E exterior loop*/
194   /* pu_test->X[i][j] where i <= j and i [1...n], j = [1...w[ */
195   pu->H = (double **)vrna_alloc(sizeof(double *) * (n + 1));
196   pu->I = (double **)vrna_alloc(sizeof(double *) * (n + 1));
197   pu->M = (double **)vrna_alloc(sizeof(double *) * (n + 1));
198   pu->E = (double **)vrna_alloc(sizeof(double *) * (n + 1));
199   for (i = 0; i <= n; i++) {
200     pu->H[i]  = (double *)vrna_alloc(sizeof(double) * (w + 1));
201     pu->I[i]  = (double *)vrna_alloc(sizeof(double) * (w + 1));
202     pu->M[i]  = (double *)vrna_alloc(sizeof(double) * (w + 1));
203     pu->E[i]  = (double *)vrna_alloc(sizeof(double) * (w + 1));
204   }
205   return pu;
206 }
207 
208 
209 PUBLIC void
free_pu_contrib(pu_contrib * pu)210 free_pu_contrib(pu_contrib *pu)
211 {
212   free_pu_contrib_struct(pu);
213 }
214 
215 
216 PUBLIC void
free_pu_contrib_struct(pu_contrib * pu)217 free_pu_contrib_struct(pu_contrib *pu)
218 {
219   unsigned int i;
220 
221   if (pu != NULL) {
222     for (i = 0; i <= pu->length; i++) {
223       free(pu->H[i]);
224       free(pu->I[i]);
225       free(pu->M[i]);
226       free(pu->E[i]);
227     }
228     free(pu->H);
229     free(pu->I);
230     free(pu->M);
231     free(pu->E);
232     free(pu);
233   }
234 }
235 
236 
237 /* you have to call pf_fold(sequence, structure); befor pf_unstru */
238 PUBLIC pu_contrib *
pf_unstru(char * sequence,int w)239 pf_unstru(char  *sequence,
240           int   w)
241 {
242   int           n, i, j, v, k, l, o, p, ij, kl, po, u, u1, d, type, type_2, tt;
243   unsigned int  size;
244   double        temp, tqm2;
245   double        qbt1, *tmp, sum_l, *sum_M;
246   double        *store_H, *store_Io, **store_I2o; /* hairp., interior contribs */
247   double        *store_M_qm_o, *store_M_mlbase;   /* multiloop contributions */
248   pu_contrib    *pu_test;
249 
250   sum_l   = 0.0;
251   temp    = 0;
252   n       = (int)strlen(sequence);
253   sum_M   = (double *)vrna_alloc((n + 1) * sizeof(double));
254   pu_test = get_pu_contrib_struct((unsigned)n, (unsigned)w);
255   size    = ((n + 1) * (n + 2)) >> 1;
256 
257   get_up_arrays((unsigned)n);
258   init_pf_two(n);
259 
260   /* init everything */
261   for (d = 0; d <= TURN; d++)
262     for (i = 1; i <= n - d; i++) {
263       j   = i + d;
264       ij  = my_iindx[i] - j;
265       if (d < w)
266         pu_test->H[i][d] = pu_test->I[i][d] = pu_test->M[i][d] = pu_test->E[i][d] = 0.;
267     }
268 
269 
270   for (i = 0; i < size; i++)
271     prpr[i] = probs[i];
272 
273   sum_M[0] = 0.;
274   for (i = 1; i <= n; i++) {
275     /* set auxillary arrays to 0, reuse qqm and qqm1, reuse qqm2 and qq_1m2*/
276     sum_M[i] = qqm[i] = qqm1[i] = qqm2[i] = qq_1m2[i] = 0;
277     for (j = i + TURN + 1; j <= n; j++) {
278       ij = my_iindx[i] - j;
279       /* i need the part_func of all structures outside bp[ij] */
280       if (qb[ij] > 0.0)
281         prpr[ij] = (probs[ij] / qb[ij]);
282     }
283   }
284 
285   /* alloc even more memory */
286   store_I2o = (double **)vrna_alloc(sizeof(double *) * (n + 1)); /* for p,k */
287   for (i = 0; i <= n; i++)
288     store_I2o[i] = (double *)vrna_alloc(sizeof(double) * (MAXLOOP + 2));
289 
290   /* expMLbase[i-p]*dangles_po */
291   store_M_mlbase = (double *)vrna_alloc(sizeof(double) * (size + 1));
292 
293   /* 2. exterior bp (p,o) encloses unpaired region [i,i+w[*/
294   for (o = TURN + 2; o <= n; o++) {
295     double sum_h;
296     /*allocate space for arrays to store different contributions to H, I & M */
297     store_H = (double *)vrna_alloc(sizeof(double) * (o + 2));
298     /* unpaired between ]l,o[ */
299     store_Io = (double *)vrna_alloc(sizeof(double) * (o + 2));
300     /* qm[p+1,i-1]*dangles_po */
301     store_M_qm_o = (double *)vrna_alloc(sizeof(double) * (n + 1));
302 
303     for (p = o - TURN - 1; p >= 1; p--) {
304       /* construction of partition function of segment [p,o], given that
305        * an unpaired region [i,i+w[ exists within [p,o] */
306       u     = o - p - 1;
307       po    = my_iindx[p] - o;
308       type  = ptype[po];
309       if (type) {
310         /*hairpin contribution*/
311         if (((type == 3) || (type == 4)) && no_closingGU)
312           temp = 0.;
313         else
314           temp = prpr[po] *
315                  exp_E_Hairpin(u, type, S1[p + 1], S1[o - 1], sequence + p - 1, Pf) * scale[u + 2];
316 
317         /* all H contribs are collect for the longest unpaired region */
318         store_H[p + 1] = temp;
319 
320         /* interior loops with interior pair k,l and an unpaired region of
321          * length w between p and k || l and o*/
322         for (k = p + 1; k <= MIN2(p + MAXLOOP + 1, o - TURN - 2); k++) {
323           u1    = k - p - 1;
324           sum_l = 0.;
325           for (l = MAX2(k + TURN + 1, o - 1 - MAXLOOP + u1); l < o; l++) {
326             kl      = my_iindx[k] - l;
327             type_2  = ptype[kl];
328             if ((l + 1) < o)
329               store_Io[l + 1] += sum_l;
330 
331             temp = 0.;
332             if (type_2) {
333               type_2  = rtype[type_2];
334               temp    = prpr[po] * qb[kl] * exp_E_IntLoop(u1,
335                                                           o - l - 1,
336                                                           type,
337                                                           type_2,
338                                                           S1[p + 1],
339                                                           S1[o - 1],
340                                                           S1[k - 1],
341                                                           S1[l + 1],
342                                                           Pf) * scale[u1 + o - l + 1];
343               if ((l + 1) < o)
344                 store_Io[l + 1] += temp;           /* unpaired region between ]l,o[ */
345 
346               sum_l += temp;
347             } /* end of if pair(k,l) */
348           }   /* end of l */
349               /* unpaired in region ]p,k[  */
350           for (i = p + 1; i <= k - 1; i++)
351             store_I2o[i][MIN2(w - 1, k - i - 1)] += sum_l;
352         } /* end of k */
353       }   /*end of if(type) test for bp (p,o) */
354 
355       /* multiple stem loop contribution
356        * calculate qm2[my_iindx[i]-j] in the course of the calculation
357        * of the multiple stem loop contribution:
358        * advantage: you save memory:
359        * instead of a (n+1)*n array for qqm2 you only need 2*n arrays
360        * disadvantage: you have to use two times the op-loop for the full
361        * multiloop contribution
362        * first op-loop: index o goes from 1...n and
363        *                index p from o-TURN-1 ... 1
364        * second op-loop: index o goes from n...1 and
365        *                 index p from o+TURN+1 ... n !!
366        * HERE index o goes from 1...n and index p o-TURN-1 ... 1 ,
367        * we calculate the contributions to multiple stem loop
368        * where exp(i+w-1-p)*(qqm2 values between i+w and o-1)
369        * AND qm[iindex[p+1]-(i-1)]*exp(beta*w)*qm[iindex[i+w]-(o-1)]
370        * you have to recalculate of qqm matrix containing final stem
371        * contributions to multiple loop partition function
372        * from segment p,o */
373 
374       /* recalculate qqm[]
375        * qqm[p] := (contribution with exact one loop in region (p,o)*/
376       qqm[p] = qqm1[p] * expMLbase[1];
377       if (type) {
378         qbt1 = qb[po] *
379                exp_E_MLstem(type, (p > 1) ? S1[p - 1] : -1, (o < n) ? S1[o + 1] : -1, Pf);
380         qqm[p] += qbt1;
381         /* reverse dangles for prpr[po]*... */
382         temp  = 0.;
383         tt    = rtype[type];
384         temp  = prpr[po] * exp_E_MLstem(tt, S1[o - 1], S1[p + 1], Pf) * scale[2] * Pf->expMLclosing;
385         for (i = p + 1; i < o; i++) {
386           int p1i = (p + 1) < (i - 1)  ? my_iindx[p + 1] - (i - 1)  : 0;
387           /*unpaired region expMLbase[i-p] left of structured
388            * region qq_1m2[i+1]*/
389           /* @expMLbase:  note distance of i-p == i-(p+1)+1 */
390           store_M_mlbase[my_iindx[p + 1] - i] += expMLbase[i - p] * temp * qq_1m2[i + 1];
391           /* structured region qm[p1i] left of unpaired region */
392           /* contribition for unpaired region is added after the p-loop */
393           store_M_qm_o[i] += qm[p1i] * temp;
394         } /*end of for i ... */
395       }
396 
397       for (tqm2 = 0., i = p + 1; i < o; i++)
398         tqm2 += qm[my_iindx[p] - i] * qqm[i + 1];
399 
400       /* qqm2[p] contrib with at least 2 loops in region (p,o) */
401       qqm2[p] = tqm2;
402     } /* end for (p=..) */
403 
404     for (sum_h = 0., i = 1; i < o; i++) {
405       int max_v, vo;
406       sum_h += store_H[i];
407       max_v = MIN2(w - 1, o - i - 1);
408       for (v = max_v; v >= 0; v--) {
409         /* Hairpins */
410         pu_test->H[i][v] += sum_h;/* store_H[i][v] + store_H[i][max_v]; */
411         /* Interior loops: unpaired region between  ]l,o[ calculated here !*/
412         /* unpaired region between ]p,k[ collected after after o-loop */
413         if (v <= MIN2(max_v, MAXLOOP))
414           pu_test->I[i][v] += store_Io[i]; /* ]l,o[ */
415 
416         /* Multiloops:*/
417         /* unpaired region [i,v] between structured regions ]p,i[ and ]v,o[. */
418         /* store_M_qm_o[i] = part. funct over all structured regions ]p,i[ */
419         vo                = (i + v + 1) <= (o - 1) ? my_iindx[i + v + 1] - (o - 1) : 0;
420         pu_test->M[i][v]  += store_M_qm_o[i] * expMLbase[v + 1] * qm[vo];
421       }
422     }
423     tmp     = qqm1;
424     qqm1    = qqm;
425     qqm     = tmp;
426     tmp     = qqm2;
427     qqm2    = qq_1m2;
428     qq_1m2  = tmp;
429 
430     free(store_Io);
431     free(store_H);
432     free(store_M_qm_o);
433   }/* end for (o=..) */
434 
435   for (i = 1; i < n; i++) {
436     int     max_v;
437     double  sum_iv;
438     sum_iv  = 0.;
439     max_v   = MIN2(w - 1, n - i);
440     for (v = n; v >= 0; v--) {
441       if (v <= MIN2(max_v, MAXLOOP)) {
442         /* all unpaired regions [i,v] between p and k in interior loops */
443         /* notice v runs from max_v -> 0, sum_iv sums all int. l. contribs */
444         /* for each x, v < x =< max_v, since they contribute to [i,v] */
445         sum_iv            += store_I2o[i][v];
446         pu_test->I[i][v]  += sum_iv;
447       }
448 
449       /* all unpaired region [i,v] for a fixed v, given that */
450       /* region ]v,o[ contains at least 2 structures qq_1m2[v+1]; */
451       if (v >= i) {
452         sum_M[v] += store_M_mlbase[my_iindx[i] - v];
453         if (v - i <= max_v)
454           pu_test->M[i][v - i] += sum_M[v];
455       }
456     }
457   }
458 
459   for (i = 0; i <= n; i++)
460     free(store_I2o[i]);
461   free(store_I2o);
462 
463   for (i = 1; i <= n; i++)
464     /* set auxillary arrays to 0 */
465     qqm[i] = qqm1[i] = qqm2[i] = qq_1m2[i] = 0;
466 
467   /* 2. exterior bp (p,o) encloses unpaired region [i,j]
468    * HERE index o goes from n...1 and index p from o+TURN+1 ... n,
469    * that is, we add the one multiloop contribution that we
470    * could not calculate before  */
471 
472   /* is free'ing plus allocating faster than looping over all entries an setting them to 0? */
473 #if 0
474   free(store_M_mlbase);
475   store_M_mlbase = (double *)vrna_alloc(sizeof(double) * (size + 1));
476 #else
477   /* this should be the fastest way to set everything to 0 */
478   memset(store_M_mlbase, 0, sizeof(double) * (size + 1));
479 #endif
480 
481   for (o = n - TURN - 1; o >= 1; o--) {
482     for (p = o + TURN + 1; p <= n; p++) {
483       po    = my_iindx[o] - p;
484       type  = ptype[po];
485       /* recalculate of qqm matrix containing final stem
486        * contributions to multiple loop partition function
487        * from segment [o,p] */
488       qqm[p] = qqm1[p] * expMLbase[1];
489       if (type) {
490         qbt1    = qb[po];
491         qbt1    *= exp_E_MLstem(type, (o > 1) ? S1[o - 1] : -1, (p < n) ? S1[p + 1] : -1, Pf);
492         qqm[p]  += qbt1;
493         /* revers dangles for prpr[po]...  */
494         temp  = 0.;
495         tt    = rtype[type];
496         temp  = prpr[po] * exp_E_MLstem(tt, S1[p - 1], S1[o + 1], Pf) * Pf->expMLclosing * scale[2];
497       }
498 
499       tqm2 = 0.;
500       for (i = o + 1; i < p; i++) {
501         tqm2 += qqm[i] * qm[my_iindx[i + 1] - p];
502 
503         if (type != 0) {
504           /* structured region qq_1m2[i-1] left of unpaired r. expMLbase[p-i]*/
505           /* @expMLbase:  note distance of p-i == p+1-i+1 */
506           store_M_mlbase[my_iindx[i] - p + 1] += qq_1m2[i - 1] * expMLbase[p - i] * temp;
507         }
508       } /*end of for i ....*/
509       qqm2[p] = tqm2;
510     }   /* end for (p=..) */
511     tmp     = qqm1;
512     qqm1    = qqm;
513     qqm     = tmp;
514     tmp     = qqm2;
515     qqm2    = qq_1m2;
516     qq_1m2  = tmp;
517   }/* end for (o=..) */
518    /* now collect the missing multiloop contributions */
519   for (i = 0; i <= n; i++)
520     sum_M[i] = 0.;
521   for (i = 1; i <= n; i++) {
522     int v_max = MIN2(w - 1, n - i);
523     for (v = n; v >= i; v--) {
524       sum_M[i] += store_M_mlbase[my_iindx[i] - v];
525       if ((v - i <= v_max))
526         pu_test->M[i][v - i] += sum_M[i];
527     }
528   }
529 
530   /* 1. region [i,j] exterior to all loops */
531   for (i = 1; i <= n; i++) {
532     for (j = i; j < MIN2(i + w, n + 1); j++) {
533       ij                    = my_iindx[i] - j;
534       temp                  = q1k[i - 1] * 1 * scale[j - i + 1] * qln[j + 1] / q1k[n];
535       pu_test->E[i][j - i]  += temp;
536     }
537   }
538 
539   free(sum_M);
540   free(store_M_mlbase);
541   free_up_arrays();
542   return pu_test;
543 }
544 
545 
546 PRIVATE void
get_interact_arrays(unsigned int n1,unsigned int n2,pu_contrib * p_c,pu_contrib * p_c2,int w,int incr5,int incr3,double *** p_c_S,double *** p_c2_S)547 get_interact_arrays(unsigned int  n1,
548                     unsigned int  n2,
549                     pu_contrib    *p_c,
550                     pu_contrib    *p_c2,
551                     int           w,
552                     int           incr5,
553                     int           incr3,
554                     double        ***p_c_S,
555                     double        ***p_c2_S)
556 {
557   unsigned int  i;
558   int           pc_size, j;
559 
560   *p_c_S = (double **)vrna_alloc(sizeof(double *) * (n1 + 1));
561 
562   for (i = 1; i <= n1; i++) {
563     pc_size     = MIN2((w + incr5 + incr3), (int)n1);
564     (*p_c_S)[i] = (double *)vrna_alloc(sizeof(double) * (pc_size + 1));
565     for (j = 0; j < pc_size; j++)
566       (*p_c_S)[i][j] = p_c->H[i][j] + p_c->I[i][j] + p_c->M[i][j] + p_c->E[i][j];
567   }
568 
569   if (p_c2 != NULL) {
570     (*p_c2_S) = (double **)vrna_alloc(sizeof(double *) * (n2 + 1));
571     for (i = 1; i <= n2; i++) {
572       pc_size       = MIN2(w, (int)n2);
573       (*p_c2_S)[i]  = (double *)vrna_alloc(sizeof(double) * (pc_size + 2));
574       for (j = 0; j < pc_size; j++)
575         (*p_c2_S)[i][j] = p_c2->H[i][j] + p_c2->I[i][j] + p_c2->M[i][j] + p_c2->E[i][j];
576     }
577   }
578 }
579 
580 
581 /*------------------------------------------------------------------------*/
582 /* s1 is the longer seq */
583 PUBLIC interact *
pf_interact(const char * s1,const char * s2,pu_contrib * p_c,pu_contrib * p_c2,int w,char * cstruc,int incr3,int incr5)584 pf_interact(const char  *s1,
585             const char  *s2,
586             pu_contrib  *p_c,
587             pu_contrib  *p_c2,
588             int         w,
589             char        *cstruc,
590             int         incr3,
591             int         incr5)
592 {
593   int         i, j, k, l, n1, n2, add_i5, add_i3, pc_size;
594   double      temp, Z, rev_d, E, Z2, **p_c_S, **p_c2_S, int_scale;
595   FLT_OR_DBL  ****qint_4, **qint_ik;
596   /* PRIVATE double **pint; array for pf_up() output */
597   interact    *Int;
598   double      G_min, G_is, Gi_min;
599   int         gi, gj, gk, gl, ci, cj, ck, cl, prev_k, prev_l;
600   FLT_OR_DBL  **int_ik;
601   double      Z_int, temp_int, temppfs;
602   double      const_scale, const_T;
603   constrain   *cc = NULL;                           /* constrains for cofolding */
604   char        *Seq, *i_long, *i_short, *pos = NULL; /* short seq appended to long one */
605 
606   /* int ***pu_jl; */ /* positions of interaction in the short RNA */
607 
608   G_min = G_is = Gi_min = 100.0;
609   gi    = gj = gk = gl = ci = cj = ck = cl = 0;
610 
611   n1      = (int)strlen(s1);
612   n2      = (int)strlen(s2);
613   prev_k  = 1;
614   prev_l  = n2;
615 
616   i_long  = (char *)vrna_alloc(sizeof(char) * (n1 + 1));
617   i_short = (char *)vrna_alloc(sizeof(char) * (n2 + 1));
618   Seq     = (char *)vrna_alloc(sizeof(char) * (n1 + n2 + 2));
619 
620   strcpy(Seq, s1);
621   strcat(Seq, s2);
622 
623   set_encoded_seq(s1, &S, &S1);
624   set_encoded_seq(s2, &SS, &SS2);
625 
626   cc = get_ptypes_up(Seq, cstruc);
627 
628   get_interact_arrays(n1, n2, p_c, p_c2, w, incr5, incr3, &p_c_S, &p_c2_S);
629 
630   /*array for pf_up() output */
631   Int     = (interact *)vrna_alloc(sizeof(interact) * 1);
632   Int->Pi = (double *)vrna_alloc(sizeof(double) * (n1 + 2));
633   Int->Gi = (double *)vrna_alloc(sizeof(double) * (n1 + 2));
634 
635   /* use a different scaling for pf_interact*/
636   scale_int(s2, s1, &int_scale);
637 
638   /* set the global scale array and the global variable pf_scale to the
639    * values used to scale the interaction, keep their former values !! */
640   temppfs   = pf_scale;
641   pf_scale  = int_scale;
642 
643   /* in order to scale expLoopEnergy correctly call*/
644   /* we also pass twice the seq-length to avoid bogus access to scale[] array */
645   scale_stru_pf_params((unsigned)2 * n1);
646 
647   qint_ik = (FLT_OR_DBL **)vrna_alloc(sizeof(FLT_OR_DBL *) * (n1 + 1));
648   for (i = 1; i <= n1; i++)
649     qint_ik[i] = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * (n1 + 1));
650   /* int_ik */
651   int_ik = (FLT_OR_DBL **)vrna_alloc(sizeof(FLT_OR_DBL *) * (n1 + 1));
652   for (i = 1; i <= n1; i++)
653     int_ik[i] = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * (n1 + 1));
654   Z_int = 0.;
655   /*  Gint = ( -log(int_ik[gk][gi])-( ((int) w/2)*log(pf_scale)) )*((Pf->temperature+K0)*GASCONST/1000.0); */
656   const_scale = ((int)w / 2) * log(pf_scale);
657   const_T     = (Pf->kT / 1000.0);
658   encode_seq(s1, s2);
659   /* static  short *S~S1, *S1~SS1, *SS~S2, *SS2; */
660   for (i = 0; i <= n1; i++)
661     Int->Pi[i] = Int->Gi[i] = 0.;
662   E = 0.;
663   Z = 0.;
664 
665   if (fold_constrained && cstruc != NULL) {
666     pos = strchr(cstruc, '|');
667     if (pos) {
668       ci = ck = cl = cj = 0;
669       /* long seq              & short seq
670        * .........||..|||||....&....||||...  w = maximal interaction length
671        *         ck       ci       cj  cl    */
672       strncpy(i_long, cstruc, n1);
673       i_long[n1] = '\0';
674       strncpy(i_short, &cstruc[n1], n2);
675       i_short[n2] = '\0';
676       pos         = strchr(i_long, '|');
677       if (pos)
678         ck = (int)(pos - i_long) + 1;    /* k */
679 
680       pos = strrchr(i_long, '|');
681       if (pos)
682         ci = (int)(pos - i_long) + 1;    /* i */
683 
684       pos = strrchr(i_short, '|');
685       if (pos)
686         cl = (int)(pos - i_short) + 1;    /* l */
687 
688       pos = strchr(i_short, '|');
689       if (pos)
690         cj = (int)(pos - i_short) + 1;    /* j */
691 
692       if (ck > 0 && ci > 0 && ci - ck + 1 > w) {
693         vrna_message_warning("distance between constrains in longer seq, %d, larger than -w = %d",
694                              ci - ck + 1,
695                              w);
696         vrna_message_error("pf_interact: could not satisfy all constraints");
697       }
698 
699       if (cj > 0 && cl > 0 && cl - cj + 1 > w) {
700         vrna_message_warning("distance between constrains in shorter seq, %d, larger than -w = %d",
701                              cl - cj + 1,
702                              w);
703         vrna_message_error("pf_interact: could not satisfy all constraints");
704       }
705     }
706   } else if (fold_constrained && cstruc == NULL) {
707     vrna_message_error("option -C selected, but no constrained structure given\n");
708   }
709 
710   if (fold_constrained)
711     pos = strchr(cstruc, '|');
712 
713   /*  qint_4[i][j][k][l] contribution that region (k-i) in seq1 (l=n1)
714    *  is paired to region (l-j) in seq 2(l=n2) that is
715    *  a region closed by bp k-l  and bp i-j */
716   qint_4 = (FLT_OR_DBL ****)vrna_alloc(sizeof(FLT_OR_DBL ***) * (n1 + 1));
717 
718   /* qint_4[i][j][k][l] */
719   for (i = 1; i <= n1; i++) {
720     int end_k;
721     end_k = i - w;
722     if (fold_constrained && pos && ci)
723       end_k = MAX2(i - w, ci - w);
724 
725     /* '|' constrains for long sequence: index i from 1 to n1 (5' to 3')*/
726     /* interaction has to include 3' most '|' constrain, ci */
727     if (fold_constrained && pos && ci && i == 1 && i < ci)
728       i = ci - w + 1 > 1 ? ci - w + 1 : 1;
729 
730     /* interaction has to include 5' most '|' constrain, ck*/
731     if (fold_constrained && pos && ck && i > ck + w - 1)
732       break;
733 
734     /* note: qint_4[i] will be freed before we allocate qint_4[i+1] */
735     qint_4[i] = (FLT_OR_DBL ***)vrna_alloc(sizeof(FLT_OR_DBL **) * (n2 + 1));
736     for (j = n2; j > 0; j--) {
737       qint_4[i][j] = (FLT_OR_DBL **)vrna_alloc(sizeof(FLT_OR_DBL *) * (w + 1));
738       for (k = 0; k <= w; k++)
739         qint_4[i][j][k] = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * (w + 1));
740     }
741 
742     prev_k = 1;
743     for (j = n2; j > 0; j--) {
744       int type, type2, end_l;
745       end_l = j + w;
746       if (fold_constrained && pos && ci)
747         end_l = MIN2(cj + w, j + w);
748 
749       /* '|' constrains for short sequence: index j from n2 to 1 (3' to 5')*/
750       /* interaction has to include 5' most '|' constrain, cj */
751       if (fold_constrained && pos && cj && j == n2 && j > cj)
752         j = cj + w - 1 > n2 ? n2 : cj + w - 1;
753 
754       /* interaction has to include 3' most '|' constrain, cl*/
755       if (fold_constrained && pos && cl && j < cl - w + 1)
756         break;
757 
758       type                = cc->ptype[cc->indx[i] - (n1 + j)];
759       qint_4[i][j][0][0]  = type ? Pf->expDuplexInit : 0;
760 
761       if (!type)
762         continue;
763 
764       qint_4[i][j][0][0] *= vrna_exp_E_ext_stem(type,
765                                           (i > 1) ? S1[i - 1] : -1,
766                                           (j < n2) ? SS2[j + 1] : -1,
767                                           Pf);
768 
769       rev_d = vrna_exp_E_ext_stem(rtype[type], (j > 1) ? SS2[j - 1] : -1, (i < n1) ? S1[i + 1] : -1, Pf);
770 
771       /* add inc5 and incr3 */
772       if ((i - incr5) > 0)
773         add_i5 = i - incr5;
774       else
775         add_i5 = 1;
776 
777       add_i3  = incr3;
778       pc_size = MIN2((w + incr3 + incr5), n1);
779       if (incr3 < pc_size)
780         add_i3 = incr3;
781       else
782         add_i3 = pc_size - 1;
783 
784       /* only one bp (no interior loop) */
785       if (p_c2 == NULL) {
786         /* consider only structure of longer seq. */
787         qint_ik[i][i] += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * scale[((int)w / 2)];
788         Z             += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * scale[((int)w / 2)];
789       } else {
790         /* consider structures of both seqs. */
791         qint_ik[i][i] += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * p_c2_S[j][0] *
792                          scale[((int)w / 2)];
793         Z += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * p_c2_S[j][0] *
794              scale[((int)w / 2)];
795       }
796 
797       /* int_ik */
798       /* check deltaG_ges = deltaG_int + deltaG_unstr; */
799       int_ik[i][i]  += qint_4[i][j][0][0] * rev_d * scale[((int)w / 2)];
800       Z_int         += qint_4[i][j][0][0] * rev_d * scale[((int)w / 2)];
801       temp_int      = 0.;
802 
803       temp    = 0.;
804       prev_l  = n2;
805       for (k = i - 1; k > end_k && k > 0; k--) {
806         if (fold_constrained && pos && cstruc[k - 1] == '|' && k > prev_k)
807           prev_k = k;
808 
809         for (l = j + 1; l < end_l && l <= n2; l++) {
810           int     a, b, ia, ib, isw;
811           double  scalew, tt, intt;
812 
813           type2 = cc->ptype[cc->indx[k] - (n1 + l)];
814           /* '|' : l HAS TO be paired: not pair (k,x) where x>l allowed */
815           if (fold_constrained && pos && cstruc[n1 + l - 1] == '|' && l < prev_l)
816             prev_l = l; /*break*/
817 
818           if (fold_constrained && pos && (k <= ck || i >= ci) && !type2)
819             continue;
820 
821           if (fold_constrained && pos && ((cstruc[k - 1] == '|') || (cstruc[n1 + l - 1] == '|')) &&
822               !type2)
823             break;
824 
825           if (!type2)
826             continue;
827 
828           /* to save memory keep only qint_4[i-w...i][][][] in memory
829            * use indices qint_4[i][j][a={0,1,...,w-1}][b={0,1,...,w-1}] */
830           a = i - k;  /* k -> a from 1...w-1*/
831           b = l - j;  /* l -> b from 1...w-1 */
832 
833           /* scale everything to w/2 */
834           isw = ((int)w / 2);
835           if ((a + b) < isw)
836             scalew = (scale[isw - (a + b)]);
837           else if ((a + b) > isw)
838             scalew = 1 / (scale[(a + b) - isw]);
839           else
840             scalew = 1;
841 
842           if (i - k + l - j - 2 <= MAXLOOP) {
843             if (k >= prev_k && l <= prev_l) {
844               /* don't violate constrains */
845               E = exp_E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type],
846                                 S1[k + 1], SS2[l - 1], S1[i - 1], SS2[j + 1], Pf) *
847                   scale[i - k + l - j]; /* add *scale[u1+u2+2] */
848 
849               qint_4[i][j][a][b] += (qint_4[k][l][0][0] * E);
850 
851               /* use ia and ib to go from a....w-1 and from b....w-1  */
852               ia = ib = 1;
853               while ((a + ia) < w && i - (a + ia) >= 1 && (b + ib) < w && (j + b + ib) <= n2) {
854                 int iaa, ibb;
855 
856                 qint_4[i][j][a + ia][b + ib] += qint_4[k][l][ia][ib] * E;
857 
858                 iaa = ia + 1;
859                 while (a + iaa < w && i - (a + iaa) >= 1) {
860                   qint_4[i][j][a + iaa][b + ib] += qint_4[k][l][iaa][ib] * E;
861                   ++iaa;
862                 }
863 
864                 ibb = ib + 1;
865                 while ((b + ibb) < w && (j + b + ibb) <= n2) {
866                   qint_4[i][j][a + ia][b + ibb] += qint_4[k][l][ia][ibb] * E;
867                   ++ibb;
868                 }
869                 ++ia;
870                 ++ib;
871               }
872             }
873           }
874 
875           /* '|' constrain in long sequence */
876           /* collect interactions starting before 5' most '|' constrain */
877           if (fold_constrained && pos && ci && i < ci)
878             continue;
879 
880           /* collect interactions ending after 3' most '|' constrain*/
881           if (fold_constrained && pos && ck && k > ck)
882             continue;
883 
884           /* '|' constrain in short sequence */
885           /* collect interactions starting before 5' most '|' constrain */
886           if (fold_constrained && pos && cj && j > cj)
887             continue;
888 
889           /* collect interactions ending after 3' most '|' constrain*/
890           if (fold_constrained && pos && cl && l < cl)
891             continue;
892 
893           /* scale everything to w/2*/
894           /* qint_ik[k][i] all interactions where k and i both are paired */
895           /* substract incr5 from k */
896           if (k - incr5 > 0)
897             add_i5 = k - incr5;
898           else
899             add_i5 = 1;
900 
901           /* add incr3 to i */
902           pc_size = MIN2((w + incr3 + incr5), n1);
903           if (i - k + incr3 < pc_size)
904             add_i3 = i - k + incr3;
905           else
906             add_i3 = pc_size - 1;
907 
908           if (p_c2 == NULL) /* consider only structure of longer seq. */
909             tt = qint_4[i][j][a][b] * p_c_S[add_i5][add_i3] * scalew * rev_d;
910           else              /* consider structures of both seqs. */
911             tt = qint_4[i][j][a][b] * p_c_S[add_i5][add_i3] * p_c2_S[j][b] * scalew * rev_d;
912 
913           temp          += tt;
914           qint_ik[k][i] += tt;
915           /* int_ik */
916           /* check deltaG_ges = deltaG_int + deltaG_unstr; */
917           intt          = qint_4[i][j][a][b] * scalew * rev_d;
918           temp_int      += intt;
919           int_ik[k][i]  += intt;
920           G_is          = (-log(tt) - const_scale) * (const_T);
921           if (G_is < G_min || EQUAL(G_is, G_min)) {
922             G_min   = G_is;
923             Gi_min  = (-log(intt) - const_scale) * (const_T);
924             gi      = i;
925             gj      = j;
926             gk      = k;
927             gl      = l;
928           }
929         }
930       }
931       Z += temp;
932       /* int_ik */
933       Z_int += temp_int;
934     }
935 
936     /* free qint_4 values not needed any more */
937     if (i > w) {
938       int bla;
939       bla = i - w;
940       if (fold_constrained && pos && ci && i - w < ci - w + 1)
941         continue;
942 
943       if (fold_constrained && pos && ci)
944         bla = MAX2(ci - w + 1, i - w);
945 
946       for (j = n2; j > 0; j--) {
947         for (k = 0; k <= w; k++)
948           free(qint_4[bla][j][k]);
949         free(qint_4[bla][j]);
950       }
951       free(qint_4[bla]);
952       qint_4[bla] = NULL;
953     }
954   }
955 
956 
957   Z2 = 0.0;
958   for (i = 1; i <= n1; i++) {
959     for (k = i; k <= n1 && k < i + w; k++) {
960       Z2 += qint_ik[i][k];
961       for (l = i; l <= k; l++) {
962         /* Int->Pi[l]: prob that position l is within a paired region */
963         /* qint_ik[i][k] as well as Z are scaled to scale[((int) w/2) */
964         Int->Pi[l] += qint_ik[i][k] / Z;
965         /* Int->Gi[l]: minimal delta G at position [l] */
966         Int->Gi[l] = MIN2(Int->Gi[l],
967                           (-log(qint_ik[i][k]) - (((int)w / 2) * log(pf_scale))) *
968                           (Pf->kT / 1000.0));
969       }
970     }
971   }
972   if (n1 > w) {
973     int start_i, end_i;
974     start_i = n1 - w + 1;
975     end_i   = n1;
976     if (fold_constrained && pos && ci) {
977       /* a break in the k loop might result in unfreed values */
978       start_i = ci - w + 1 < n1 - w + 1 ? ci - w + 1 : n1 - w + 1;
979       start_i = start_i > 0 ? start_i : 1;
980       /* start_i = ck; */
981       end_i = ck + w - 1 > n1 ? n1 : ck + w - 1;
982     }
983 
984     for (i = start_i; i <= end_i; i++) {
985       if (qint_4[i] == NULL)
986         continue;
987 
988       for (j = n2; j > 0; j--) {
989         for (k = 0; k <= w; k++)
990           free(qint_4[i][j][k]);
991         free(qint_4[i][j]);
992       }
993       free(qint_4[i]);
994     }
995     free(qint_4);
996   } else {
997     int start_i, end_i;
998     start_i = 1;
999     end_i   = n1;
1000     if (fold_constrained && pos) {
1001       start_i = ci - w + 1 > 0 ? ci - w + 1 : 1;
1002       end_i   = ck + w - 1 > n1 ? n1 : ck + w - 1;
1003     }
1004 
1005     for (i = start_i; i <= end_i; i++) {
1006       for (j = n2; j > 0; j--) {
1007         for (k = 0; k <= w; k++)
1008           free(qint_4[i][j][k]);
1009         free(qint_4[i][j]);
1010       }
1011       free(qint_4[i]);
1012     }
1013     free(qint_4);
1014   }
1015 
1016   if (fold_constrained && (gi == 0 || gk == 0 || gl == 0 || gj == 0))
1017     vrna_message_error("pf_interact: could not satisfy all constraints");
1018 
1019   /* fill structure interact */
1020   Int->length   = n1;
1021   Int->i        = gi;
1022   Int->j        = gj;
1023   Int->k        = gk;
1024   Int->l        = gl;
1025   Int->Gikjl    = G_min;
1026   Int->Gikjl_wo = Gi_min;
1027 
1028   free(i_long);
1029   free(i_short);
1030 
1031   for (i = 1; i <= n1; i++)
1032     free(int_ik[i]);
1033   free(int_ik);
1034   for (i = 1; i <= n1; i++)
1035     free(qint_ik[i]);
1036   free(qint_ik);
1037 
1038   /* reset the global variables pf_scale and scale to their original values */
1039   pf_scale = temppfs;                 /* reset pf_scale */
1040   scale_stru_pf_params((unsigned)n1); /* reset the scale array */
1041   free_pf_arrays();                   /* for arrays for pf_fold(...) */
1042 
1043   if (expMLbase != NULL) {
1044     free(expMLbase);
1045     expMLbase = NULL;
1046   }
1047 
1048   if (scale != NULL) {
1049     free(scale);
1050     scale = NULL;
1051   }
1052 
1053   for (i = 1; i <= n1; i++)
1054     free(p_c_S[i]);
1055   free(p_c_S);
1056   if (p_c2 != NULL) {
1057     for (i = 1; i <= n2; i++)
1058       free(p_c2_S[i]);
1059     free(p_c2_S);
1060   }
1061 
1062   free(Seq);
1063   free(cc->indx);
1064   free(cc->ptype);
1065   free(cc);
1066   return Int;
1067 }
1068 
1069 
1070 /*------------------------------------------------------------------------*/
1071 /* use an extra scale for pf_interact, here sl is the longer sequence */
1072 PRIVATE void
scale_int(const char * s,const char * sl,double * sc_int)1073 scale_int(const char  *s,
1074           const char  *sl,
1075           double      *sc_int)
1076 {
1077   int     n, nl;
1078   duplexT mfe;
1079   double  kT;
1080 
1081   n   = strlen(s);
1082   nl  = strlen(sl);
1083 
1084   free(expMLbase);
1085   free(scale);
1086 
1087   expMLbase = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * ((nl + 1) * 2));
1088   scale     = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * ((nl + 1) * 2));
1089 
1090   /* use RNA duplex to get a realistic estimate for the best possible
1091    * interaction energy between the short RNA s and its target sl */
1092   mfe = duplexfold(s, sl);
1093 
1094   kT = Pf->kT / 1000.0; /* in Kcal */
1095 
1096   /* sc_int is similar to pf_scale: i.e. one time the scale */
1097   *sc_int = exp(-(mfe.energy) / kT / n);
1098 
1099   /* free the structure returned by duplexfold */
1100   free(mfe.structure);
1101 }
1102 
1103 
1104 /*----------------------------------------------------------------------*/
1105 /* init_pf_two(n) :gets the arrays, that you need, from part_func.c */
1106 /* get_pf_arrays(&S, &S1, &ptype, &qb, &qm, &q1k, &qln);*/
1107 /* init_pf_fold(), update_pf_params, encode_char(), make_ptypes() are called by pf_fold() */
1108 PRIVATE void
init_pf_two(int length)1109 init_pf_two(int length)
1110 {
1111 #ifdef SUN4
1112   nonstandard_arithmetic();
1113 #else
1114 #ifdef HP9
1115   fpsetfastmode(1);
1116 #endif
1117 #endif
1118   make_pair_matrix();
1119 
1120   /* gets the arrays, that we need, from part_func.c */
1121   if (!get_pf_arrays(&S, &S1, &ptype, &qb, &qm, &q1k, &qln))
1122     vrna_message_error("init_pf_two: pf_fold() has to be called before calling pf_unstru()\n");
1123 
1124   /* get a pointer to the base pair probs */
1125   probs = export_bppm();
1126 
1127   scale_stru_pf_params((unsigned)length);
1128 
1129   init_length = length;
1130   if (init_temp != Pf->temperature)
1131     vrna_message_error("init_pf_two: inconsistency with temperature");
1132 }
1133 
1134 
1135 PRIVATE void
get_up_arrays(unsigned int length)1136 get_up_arrays(unsigned int length)
1137 {
1138   unsigned int  l1  = length + 1;
1139   unsigned int  l2  = length + 2;
1140 
1141   prpr      = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * ((l1 * l2) >> 1));
1142   expMLbase = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * l2);
1143   scale     = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * l2);
1144   qqm2      = (double *)vrna_alloc(sizeof(double) * l2);
1145   qq_1m2    = (double *)vrna_alloc(sizeof(double) * l2);
1146   qqm       = (double *)vrna_alloc(sizeof(double) * l2);
1147   qqm1      = (double *)vrna_alloc(sizeof(double) * l2);
1148   my_iindx  = vrna_idx_row_wise(length);
1149 }
1150 
1151 
1152 PRIVATE void
free_up_arrays(void)1153 free_up_arrays(void)
1154 {
1155   if (prpr != NULL) {
1156     free(prpr);
1157     prpr = NULL;
1158   }
1159 
1160   if (expMLbase != NULL) {
1161     free(expMLbase);
1162     expMLbase = NULL;
1163   }
1164 
1165   if (scale != NULL) {
1166     free(scale);
1167     scale = NULL;
1168   }
1169 
1170   if (qqm != NULL) {
1171     free(qqm);
1172     qqm = NULL;
1173   }
1174 
1175   if (qqm1 != NULL) {
1176     free(qqm1);
1177     qqm1 = NULL;
1178   }
1179 
1180   if (qqm2 != NULL) {
1181     free(qqm2);
1182     qqm2 = NULL;
1183   }
1184 
1185   if (qq_1m2 != NULL) {
1186     free(qq_1m2);
1187     qq_1m2 = NULL;
1188   }
1189 
1190   if (my_iindx != NULL) {
1191     free(my_iindx);
1192     my_iindx = NULL;
1193   }
1194 }
1195 
1196 
1197 PUBLIC void
free_interact(interact * pin)1198 free_interact(interact *pin)
1199 {
1200   if (S != NULL && pin != NULL) {
1201     free(S);
1202     S = NULL;
1203   }
1204 
1205   if (S1 != NULL && pin != NULL) {
1206     free(S1);
1207     S1 = NULL;
1208   }
1209 
1210   if (pin != NULL) {
1211     free(pin->Pi);
1212     free(pin->Gi);
1213     free(pin);
1214     pin = NULL;
1215   }
1216 }
1217 
1218 
1219 /*---------------------------------------------------------------------------*/
1220 
1221 PRIVATE void
encode_seq(const char * s1,const char * s2)1222 encode_seq(const char *s1,
1223            const char *s2)
1224 {
1225   unsigned int i, l;
1226 
1227   l = strlen(s1);
1228   /* S and S1 are freed by free_pf_arrays(); ! */
1229   S   = (short *)vrna_alloc(sizeof(short) * (l + 1));
1230   S1  = (short *)vrna_alloc(sizeof(short) * (l + 1));
1231   /* S1 exists only for the special X K and I bases and energy_set!=0 */
1232   S[0] = l;
1233   for (i = 1; i <= l; i++) {
1234     /* make numerical encoding of sequence */
1235     S[i]  = (short)encode_char(toupper(s1[i - 1]));
1236     S1[i] = alias[S[i]]; /* for mismatches of nostandard bases */
1237   }
1238   if (s2 != NULL) {
1239     l = strlen(s2);
1240     /* SS2 exists only for the special X K and I bases and energy_set!=0 */
1241     SS[0] = l;
1242     for (i = 1; i <= l; i++) {
1243       /* make numerical encoding of sequence */
1244       SS[i]   = (short)encode_char(toupper(s2[i - 1]));
1245       SS2[i]  = alias[SS[i]]; /* for mismatches of nostandard bases */
1246     }
1247   }
1248 }
1249 
1250 
1251 /*-------------------------------------------------------------------------*/
1252 /* scale energy parameters and pre-calculate Boltzmann weights:
1253  * most of this is done in structure Pf see params.c,h (function:
1254  * get_scaled_pf_parameters(), only arrays scale and expMLbase are handled here*/
1255 PRIVATE void
scale_stru_pf_params(unsigned int length)1256 scale_stru_pf_params(unsigned int length)
1257 {
1258   unsigned int  i;
1259   double        kT;
1260 
1261 
1262   /* Do this only at the first call for get_scaled_pf_parameters()
1263    * and/or if temperature has changed*/
1264   if (init_temp != temperature) {
1265     if (Pf)
1266       free(Pf);
1267 
1268     vrna_md_t md;
1269     set_model_details(&md);
1270     Pf = vrna_exp_params(&md);
1271   }
1272 
1273   init_temp = Pf->temperature;
1274 
1275   kT = Pf->kT; /* kT in cal/mol  */
1276 
1277   /* scaling factors (to avoid overflows) */
1278   if (pf_scale == -1) {
1279     /* mean energy for random sequences: 184.3*length cal */
1280     pf_scale = exp(-(-185 + (Pf->temperature - 37.) * 7.27) / kT);
1281     if (pf_scale < 1)
1282       pf_scale = 1;
1283   }
1284 
1285   Pf->pf_scale  = pf_scale;
1286   scale[0]      = 1.;
1287   scale[1]      = 1. / pf_scale;
1288   expMLbase[0]  = 1;
1289   expMLbase[1]  = Pf->expMLbase / pf_scale;
1290   for (i = 2; i <= length + 1; i++) {
1291     scale[i]      = scale[i / 2] * scale[i - (i / 2)];
1292     expMLbase[i]  = pow(Pf->expMLbase, (double)i) * scale[i];
1293   }
1294 }
1295 
1296 
1297 /*-------------------------------------------------------------------------*/
1298 /* make a results structure containing all u-values & the header */
1299 PUBLIC pu_out *
get_u_vals(pu_contrib * p_c,int ** unpaired_values,char * select_contrib)1300 get_u_vals(pu_contrib *p_c,
1301            int        **unpaired_values,
1302            char       *select_contrib)
1303 {
1304   int     i, j, k, l, num_u_vals, count, contribs, size, w, len;
1305   int     S, E, H, I, M;
1306   int     off_S, off_E, off_H, off_I, off_M;
1307   /* double **p_cont,**p_cont_sh, dG_u; p_u AND its contributions */
1308   pu_out  *u_results;
1309 
1310   len = p_c->length;
1311 
1312   /* number of different -u values */
1313   for (num_u_vals = 0, i = 1; i <= unpaired_values[0][0]; i++) {
1314     j = unpaired_values[i][0];
1315     do
1316       num_u_vals++;
1317     while (++j <= unpaired_values[i][1]);
1318   }
1319   /* check which contributions ([-c "SHIME"] ) are desired by the user,
1320    * set the offset for each contribution */
1321   contribs  = 0;
1322   S         = E = H = I = M = 0;
1323   off_S     = off_E = off_H = off_I = off_M = 0;
1324   if (strchr(select_contrib, 'S')) {
1325     S     = 1;
1326     off_S = contribs;
1327     ++contribs;
1328   }
1329 
1330   if (strchr(select_contrib, 'E')) {
1331     E     = 1;
1332     off_E = contribs;
1333     ++contribs;
1334   }
1335 
1336   if (strchr(select_contrib, 'H')) {
1337     H     = 1;
1338     off_H = contribs;
1339     ++contribs;
1340   }
1341 
1342   if (strchr(select_contrib, 'I')) {
1343     I     = 1;
1344     off_I = contribs;
1345     ++contribs;
1346   }
1347 
1348   if (strchr(select_contrib, 'M')) {
1349     M     = 1;
1350     off_M = contribs;
1351     ++contribs;
1352   }
1353 
1354   if (contribs > 5)
1355     vrna_message_error("get_u_vals: error with contribs!");
1356 
1357   /* allocate the results structure */
1358   u_results       = (pu_out *)vrna_alloc(1 * sizeof(pu_out));
1359   u_results->len  = len; /* sequence length */
1360   /*num_u_vals differnet -u values, contribs [-c "SHIME"] */
1361   u_results->u_vals   = num_u_vals;
1362   u_results->contribs = contribs;
1363   /* add 1 column for position within the sequence and
1364    * add 1 column for the free energy of interaction values */
1365   /* header e.g. u3I (contribution for u3 interior loops */
1366   size              = 1 + (num_u_vals * contribs) + 1;
1367   u_results->header = (char **)vrna_alloc((size + 1) * sizeof(char *));
1368   for (i = 0; i < (size + 1); i++)
1369     u_results->header[i] = (char *)vrna_alloc(10 * sizeof(char));
1370   /* different free energies for all  -u and -c combinations */
1371   u_results->u_values = (double **)vrna_alloc((size + 1) * sizeof(double *));
1372   for (i = 0; i < (size + 1); i++)
1373     /* position within the sequence  */
1374     u_results->u_values[i] = (double *)vrna_alloc((len + 3) * sizeof(double));
1375   /* write the position within the sequence in the u_results array
1376    * at column zerro */
1377   sprintf(u_results->header[0], "pos");
1378   for (i = 0; i <= len; i++)
1379     /* add the position*/
1380     u_results->u_values[0][i] = i;
1381   /* go over the different -u values, u_vals[] listy of different -u values*/
1382   for (count = k = 1; k <= unpaired_values[0][0]; k++) {
1383     l = unpaired_values[k][0];
1384     do {
1385       int offset;                             /* offset for the respective -u value (depents on the number
1386                                                * of the -u value and on the numbers of contribs */
1387 
1388       offset = ((count - 1) * contribs) + 1;  /* first colum is the position */
1389       /* set the current value of -u : here we call it w */
1390       w = l;                                  /* set w to the actual -u value */
1391       if (w > len)
1392         break;                                /* corr caro */
1393 
1394       /* make the header - look which contribitions are wanted */
1395       if (S)
1396         sprintf(u_results->header[offset + off_S], "u%dS", w);
1397 
1398       if (E)
1399         sprintf(u_results->header[offset + off_E], "u%dE", w);
1400 
1401       if (H)
1402         sprintf(u_results->header[offset + off_H], "u%dH", w);
1403 
1404       if (I)
1405         sprintf(u_results->header[offset + off_I], "u%dI", w);
1406 
1407       if (M)
1408         sprintf(u_results->header[offset + off_M], "u%dM", w);
1409 
1410       if (p_c != NULL) {
1411         for (i = 1; i <= len; i++) {
1412           /* for each position */
1413           /* w goes form j to i (intervall end at i) */
1414           for (j = i; j < MIN2((i + w), len + 1); j++) {
1415             /* for each -u value < w
1416              * this is not necessay ->
1417              * calculate j from i and w
1418              * : (j-i+1) == w */
1419             double blubb;
1420             /* if (j-i+1) == w we have the -u = w value wanted */
1421             if ((j - i + 1) == w && i + w - 1 <= len) {
1422               blubb = p_c->H[i][j - i] + p_c->I[i][j - i] + p_c->M[i][j - i] + p_c->E[i][j - i];
1423 
1424               /* printf("len %d  blubb %.3f \n",len, blubb); */
1425               if (S)
1426                 u_results->u_values[offset + off_S][i + w - 1] += blubb;
1427 
1428               if (E)
1429                 u_results->u_values[offset + off_E][i + w - 1] += p_c->E[i][j - i];
1430 
1431               if (H)
1432                 u_results->u_values[offset + off_H][i + w - 1] += p_c->H[i][j - i];
1433 
1434               if (I)
1435                 u_results->u_values[offset + off_I][i + w - 1] += p_c->I[i][j - i];
1436 
1437               if (M)
1438                 u_results->u_values[offset + off_M][i + w - 1] += p_c->M[i][j - i];
1439             }
1440 
1441             if (i < w && (j - i + 1) != w && i + w - 1 > len && i + w - 1 < len + 3) {
1442               if (S)
1443                 u_results->u_values[offset + off_S][i + w - 1] = -1;
1444 
1445               if (E)
1446                 u_results->u_values[offset + off_E][i + w - 1] = -1;
1447 
1448               if (H)
1449                 u_results->u_values[offset + off_H][i + w - 1] = -1;
1450 
1451               if (I)
1452                 u_results->u_values[offset + off_I][i + w - 1] = -1;
1453 
1454               if (M)
1455                 u_results->u_values[offset + off_M][i + w - 1] = -1;
1456             }
1457           }
1458         }
1459       } else {
1460         return NULL;       /* error */
1461       }
1462 
1463       count++;
1464     } while (++l <= unpaired_values[k][1]);
1465   }
1466   return u_results;  /*success*/
1467 }
1468 
1469 
1470 /* plot the results structure */
1471 /* when plotting the results for the target seq we add a header */
1472 /* when plotting the results for the interaction partner u want no header,
1473  * set s1 to NULL to avoid plotting the header */
1474 /* currently we plot the free energies to a file: the probability of
1475  * being unpaired for region [i,j], p_u[i,j], is related to the free
1476  * energy to open region [i,j], dG_u[i,j] by:
1477  * dG_u[i,j] = -log(p_u[i,j])*(temperature+K0)*GASCONST/1000.0; */
1478 PUBLIC int
plot_free_pu_out(pu_out * res,interact * pint,char * ofile,char * head)1479 plot_free_pu_out(pu_out   *res,
1480                  interact *pint,
1481                  char     *ofile,
1482                  char     *head)
1483 {
1484   int     size, s, i, len;
1485   double  dG_u;
1486   char    nan[4], *time, dg[11];
1487   FILE    *wastl;
1488   double  kT = Pf->kT;
1489 
1490   wastl = fopen(ofile, "a");
1491   if (wastl == NULL) {
1492     vrna_message_warning("p_cont: can't open %s for Up_plot", ofile);
1493     return 0;
1494   }
1495 
1496   sprintf(dg, "dG");
1497 
1498   /* printf("T=%.16f \n(temperature+K0)*GASCONST/1000.0 = %.16f\n",temperature,(temperature+K0)*GASCONST/1000.0); */
1499 
1500   /* write the header of the output file:  */
1501   /*  # timestamp commandlineaufruf   */
1502   /*  # length and name of first sequence (target) */
1503   /*  # first seq */
1504   /*  # length and name of second sequence (interaction partner) */
1505   /*  # second seq */
1506   /* the next line is the output for the target: colums
1507    * position in target | dG_unpaired values for target | interaction energy */
1508   /*  # pos   u1S   u1H  dg */
1509   /*  values for target */
1510   /* if -b was choosen: the next lines are the dG_unpaired values for
1511    * the interaction partner */
1512   /*  # pos   u1S   u1H  */
1513   /*  values for the interaction partner */
1514 
1515   /* print header, if nh is zerro */
1516   if (head) {
1517     time = vrna_time_stamp();
1518     fprintf(wastl, "# %s\n", time);
1519     fprintf(wastl, "%s\n", head);
1520   }
1521 
1522   fprintf(wastl, "# ");
1523   /* }  else { fprintf(wastl," "); } close if before  */
1524   len   = res->len;
1525   size  = res->u_vals * res->contribs;
1526 
1527   sprintf(nan, "NA");
1528   nan[2] = '\0';
1529 
1530   for (i = 0; i <= len; i++) {
1531     for (s = 0; s <= size + 1; s++) {
1532       /* that is for different contribution */
1533       if (i == 0 && s > size && pint != NULL)
1534         fprintf(wastl, "%8s  ", dg);
1535 
1536       if (i != 0) {
1537         if (s > 0 && s <= size) {
1538           if (res->u_values[s][i] > 0.0) {
1539             dG_u = -log(res->u_values[s][i]) * kT / 1000.0;
1540             fprintf(wastl, "%8.3f  ", dG_u);
1541           } else {
1542             /* no p_u value was defined print nan*/
1543             fprintf(wastl, "%8s  ", nan);
1544           }
1545         } else if (s > size && pint != NULL) {
1546           fprintf(wastl, "%8.3f  ", pint->Gi[i]);
1547         } else if (s == 0) {
1548           fprintf(wastl, "%8.0f  ", res->u_values[s][i]);
1549         }
1550       } else {
1551         if (s > 1)
1552           fprintf(wastl, "%8s  ", res->header[s]);
1553         else
1554           fprintf(wastl, "%7s  ", res->header[s]);
1555       }
1556     }
1557     fprintf(wastl, "\n");
1558   }
1559   fclose(wastl);
1560   /*free pu_out* res */
1561   if (res != NULL) {
1562     for (i = 0; i <= (size + 2); i++) {
1563       free(res->u_values[i]);
1564       free(res->header[i]);
1565     }
1566     free(res->u_values);
1567     free(res->header);
1568     free(res);
1569     res = NULL;
1570   }
1571 
1572   return 1;  /* success */
1573 }
1574 
1575 
1576 PUBLIC int
Up_plot(pu_contrib * p_c,pu_contrib * p_c_sh,interact * pint,char * ofile,int ** unpaired_values,char * select_contrib,char * head,unsigned int mode)1577 Up_plot(pu_contrib    *p_c,
1578         pu_contrib    *p_c_sh,
1579         interact      *pint,
1580         char          *ofile,
1581         int           **unpaired_values,
1582         char          *select_contrib,
1583         char          *head,
1584         unsigned int  mode)
1585 {
1586   pu_out  *dada;
1587   int     ret;
1588 
1589   /* check what case we have */
1590 
1591   /* upmode = 1 only one seq */
1592   /* if(p_c != NULL && pint == NULL) { */
1593   if (mode & RNA_UP_MODE_1) {
1594     dada  = get_u_vals(p_c, unpaired_values, select_contrib);
1595     ret   = plot_free_pu_out(dada, NULL, ofile, head);
1596 
1597     /* upmode > 1 cofolding */
1598     /* } else if (p_c != NULL && pint != NULL) { */
1599   } else if (mode & RNA_UP_MODE_2) {
1600     dada  = get_u_vals(p_c, unpaired_values, select_contrib);
1601     ret   = plot_free_pu_out(dada, pint, ofile, head);
1602 
1603     /* upmode = 3  cofolding*/
1604     /* } else if (p_c == NULL && p_c_sh != NULL) { */
1605   }
1606 
1607   if (mode & RNA_UP_MODE_3) {
1608     dada  = get_u_vals(p_c, unpaired_values, select_contrib);
1609     ret   = plot_free_pu_out(dada, pint, ofile, head);
1610 
1611     dada  = get_u_vals(p_c_sh, unpaired_values, select_contrib);
1612     ret   = plot_free_pu_out(dada, NULL, ofile, NULL);
1613   }
1614 
1615   return ret;
1616 }
1617 
1618 
1619 /*-------------------------------------------------------------------------*/
1620 /* copy from part_func_co.c */
1621 PRIVATE constrain *
get_ptypes_up(char * Seq,const char * structure)1622 get_ptypes_up(char        *Seq,
1623               const char  *structure)
1624 {
1625   int       n, i, j, k, l, length;
1626   constrain *con;
1627   short     *s, *s1;
1628 
1629   length = strlen(Seq);
1630   make_pair_matrix();
1631   con       = (constrain *)vrna_alloc(sizeof(constrain));
1632   con->indx = (int *)vrna_alloc(sizeof(int) * (length + 1));
1633   for (i = 1; i <= length; i++)
1634     con->indx[i] = ((length + 1 - i) * (length - i)) / 2 + length + 1;
1635   con->ptype = (char *)vrna_alloc(sizeof(char) * ((length + 1) * (length + 2) / 2));
1636 
1637   set_encoded_seq((const char *)Seq, &s, &s1);
1638 
1639   n = s[0];
1640   for (k = 1; k <= n - CO_TURN - 1; k++)
1641     for (l = 1; l <= 2; l++) {
1642       int type, ntype = 0, otype = 0;
1643       i = k;
1644       j = i + CO_TURN + l;
1645       if (j > n)
1646         continue;
1647 
1648       type = pair[s[i]][s[j]];
1649       while ((i >= 1) && (j <= n)) {
1650         if ((i > 1) && (j < n))
1651           ntype = pair[s[i - 1]][s[j + 1]];
1652 
1653         if (noLonelyPairs && (!otype) && (!ntype))
1654           type = 0; /* i.j can only form isolated pairs */
1655 
1656         con->ptype[con->indx[i] - j]  = (char)type;
1657         otype                         = type;
1658         type                          = ntype;
1659         i--;
1660         j++;
1661       }
1662     }
1663 
1664   if (fold_constrained && (structure != NULL)) {
1665     int   hx, *stack;
1666     char  type;
1667     stack = (int *)vrna_alloc(sizeof(int) * (n + 1));
1668     for (hx = 0, j = 1; j <= n; j++) {
1669       switch (structure[j - 1]) {
1670         case 'x': /* can't pair */
1671           for (l = 1; l < j - CO_TURN; l++)
1672             con->ptype[con->indx[l] - j] = 0;
1673           for (l = j + CO_TURN + 1; l <= n; l++)
1674             con->ptype[con->indx[j] - l] = 0;
1675           break;
1676         case '(':
1677           stack[hx++] = j;
1678         /* fallthrough */
1679         case '<': /* pairs upstream */
1680           break;
1681         case ')':
1682           if (hx <= 0)
1683             vrna_message_error("1. unbalanced brackets in constraints\n%s", structure);
1684 
1685           i     = stack[--hx];
1686           type  = con->ptype[con->indx[i] - j];
1687           /* don't allow pairs i<k<j<l */
1688           for (k = i; k <= j; k++)
1689             for (l = j; l <= n; l++)
1690               con->ptype[con->indx[k] - l] = 0;
1691           /* don't allow pairs k<i<l<j */
1692           for (k = 1; k <= i; k++)
1693             for (l = i; l <= j; l++)
1694               con->ptype[con->indx[k] - l] = 0;
1695           con->ptype[con->indx[i] - j] = (type == 0) ? 7 : type;
1696         case '>': /* pairs downstream */
1697           break;
1698       }
1699     }
1700     if (hx != 0)
1701       vrna_message_error("2. unbalanced brackets in constraint string\n%s", structure);
1702 
1703     free(stack);
1704   }
1705 
1706   free(s);
1707   free(s1);
1708   return con;
1709 }
1710 
1711 
1712 PRIVATE void
set_encoded_seq(const char * sequence,short ** S,short ** S1)1713 set_encoded_seq(const char  *sequence,
1714                 short       **S,
1715                 short       **S1)
1716 {
1717   unsigned int i, l;
1718 
1719   l = strlen(sequence);
1720   if (S != NULL) {
1721     *S = (short *)vrna_alloc(sizeof(short) * (l + 2));
1722     for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
1723       (*S)[i] = (short)encode_char(toupper(sequence[i - 1]));
1724     (*S)[l + 1] = (*S)[1];
1725     (*S)[0]     = (short)l;
1726   }
1727 
1728   /* S1 exists only for the special X K and I bases and energy_set!=0 */
1729   if (S1 != NULL) {
1730     *S1 = (short *)vrna_alloc(sizeof(short) * (l + 2));
1731     for (i = 1; i <= l; i++)                                            /* make numerical encoding of sequence */
1732       (*S1)[i] = alias[(short)encode_char(toupper(sequence[i - 1]))];   /* for mismatches of nostandard bases */
1733     /* for circular folding add first base at position n+1 and last base at position 0 in S1 */
1734     (*S1)[l + 1]  = (*S1)[1];
1735     (*S1)[0]      = (*S1)[l];
1736   }
1737 }
1738