1 /*
2 * partiton function for RNA secondary structures
3 *
4 * Ivo L Hofacker
5 *
6 * Vienna RNA package
7 */
8 /*
9 * $Log: part_func_up.c,v $
10 * Revision 1.4 2008/07/04 14:27:36 ivo
11 * Modify output (again)
12 *
13 * Revision 1.3 2008/05/08 14:11:55 ivo
14 * minor output changes
15 *
16 * Revision 1.2 2007/12/13 10:19:54 ivo
17 * major RNAup update from Ulli
18 *
19 * Revision 1.1 2007/04/30 15:13:13 ivo
20 * merge RNAup into package
21 *
22 * Revision 1.11 2006/07/17 11:11:43 ulim
23 * removed all globals from fold_vars.h,c, cleaned code
24 *
25 * Revision 1.10 2006/07/12 09:19:29 ulim
26 * global variables w, incr3 and incr5 are now local
27 *
28 * Revision 1.9 2006/07/11 12:45:02 ulim
29 * remove redundancy in function pf_interact(...)
30 *
31 * Revision 1.8 2006/03/08 15:26:37 ulim
32 * modified -o[1|2], added meaningful default
33 *
34 * Revision 1.5 2006/01/23 11:27:04 ulim
35 * include file into new package version. cleaned it
36 *
37 * Revision 1.2 2005/07/29 15:13:37 ulim
38 * put the function, calculating the probability of an unpaired region in
39 * an RNA and the function calculating the prob. of interaction between 2 RNAs
40 * in a seperate file (pf_two.c)
41 *
42 * Revision 1.1 2005/07/26 13:27:12 ulim
43 * Initial revision
44 *
45 * Revision 1.2 2005/07/01 13:14:57 ulim
46 * fixed error in scaling, included new commandline options -incr5, -incr3 to
47 * allow a variable number of unpaired positions 5' and 3' of the site of
48 * interaction between the two RNAs
49 *
50 * Revision 1.1 2005/04/19 08:16:38 ulim
51 * Initial revision
52 */
53
54 #ifdef HAVE_CONFIG_H
55 #include "config.h"
56 #endif
57
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <math.h>
62 #include <float.h> /* #defines FLT_MAX ... */
63 #include <unistd.h>
64 #include "ViennaRNA/fold.h"
65 #include "ViennaRNA/utils/basic.h"
66 #include "ViennaRNA/params/default.h"
67 #include "ViennaRNA/fold_vars.h"
68 #include "ViennaRNA/pair_mat.h"
69 #include "ViennaRNA/params/basic.h"
70 #include "ViennaRNA/part_func.h"
71 #include "ViennaRNA/loops/all.h"
72 #include "ViennaRNA/part_func_up.h"
73 #include "ViennaRNA/duplex.h"
74
75
76 #define CO_TURN 0
77 #define ZERO(A) (fabs(A) < DBL_EPSILON)
78 #define EQUAL(A, B) (fabs((A)-(B)) < 1000 * DBL_EPSILON)
79 #define ISOLATED 256.0
80 /* #define NUMERIC 1 */
81
82 /*
83 #################################
84 # GLOBAL VARIABLES #
85 #################################
86 */
87
88 /*
89 #################################
90 # PRIVATE VARIABLES #
91 #################################
92 */
93 PRIVATE short *S = NULL, *S1 = NULL, *SS = NULL, *SS2 = NULL;
94 PRIVATE vrna_exp_param_t *Pf = NULL; /* use this structure for all the exp-arrays*/
95 PRIVATE FLT_OR_DBL *qb = NULL, *qm = NULL, *prpr = NULL; /* add arrays for pf_unpaired()*/
96 PRIVATE FLT_OR_DBL *probs = NULL;
97 PRIVATE FLT_OR_DBL *q1k = NULL, *qln = NULL;
98 PRIVATE double *qqm2 = NULL, *qq_1m2 = NULL, *qqm = NULL, *qqm1 = NULL;
99 PRIVATE FLT_OR_DBL *scale = NULL, *expMLbase = NULL;
100 PRIVATE char *ptype = NULL; /* precomputed array of pair types */
101 PRIVATE int init_length; /* length in last call to init_pf_fold()*/
102 PRIVATE double init_temp; /* temperature in last call to scale_pf_params */
103 PRIVATE int *my_iindx = NULL;
104 /* make iptypes array for intermolecular constrains (ipidx for indexing)*/
105
106
107 /*
108 #################################
109 # PRIVATE FUNCTION DECLARATIONS #
110 #################################
111 */
112 PRIVATE pu_out *
113 get_u_vals(pu_contrib *p_c,
114 int **unpaired_values,
115 char *select_contrib);
116
117
118 PRIVATE int
119 plot_free_pu_out(pu_out *res,
120 interact *pint,
121 char *ofile,
122 char *head);
123
124
125 PRIVATE void
126 scale_stru_pf_params(unsigned int length);
127
128
129 PRIVATE void
130 init_pf_two(int length);
131
132
133 PRIVATE void
134 scale_int(const char *s,
135 const char *sl,
136 double *sc_int);
137
138
139 PRIVATE void
140 encode_seq(const char *s1,
141 const char *s2);
142
143
144 PRIVATE constrain *
145 get_ptypes_up(char *S,
146 const char *structure);
147
148
149 PRIVATE void
150 get_up_arrays(unsigned int length);
151
152
153 PRIVATE void
154 free_up_arrays(void);
155
156
157 PRIVATE void
158 set_encoded_seq(const char *sequence,
159 short **S,
160 short **S1);
161
162
163 PRIVATE void
164 get_interact_arrays(unsigned int n1,
165 unsigned int n2,
166 pu_contrib *p_c,
167 pu_contrib *p_c2,
168 int w,
169 int incr5,
170 int incr3,
171 double ***p_c_S,
172 double ***p_c2_S);
173
174
175 /*
176 #################################
177 # BEGIN OF FUNCTION DEFINITIONS #
178 #################################
179 */
180 PUBLIC pu_contrib *
get_pu_contrib_struct(unsigned int n,unsigned int w)181 get_pu_contrib_struct(unsigned int n,
182 unsigned int w)
183 {
184 unsigned int i;
185 pu_contrib *pu = (pu_contrib *)vrna_alloc(sizeof(pu_contrib));
186
187 pu->length = n;
188 pu->w = w;
189 /* contributions to probability of being unpaired witihin a(n)
190 * H hairpin,
191 * I interior loop,
192 * M muliloop,
193 * E exterior loop*/
194 /* pu_test->X[i][j] where i <= j and i [1...n], j = [1...w[ */
195 pu->H = (double **)vrna_alloc(sizeof(double *) * (n + 1));
196 pu->I = (double **)vrna_alloc(sizeof(double *) * (n + 1));
197 pu->M = (double **)vrna_alloc(sizeof(double *) * (n + 1));
198 pu->E = (double **)vrna_alloc(sizeof(double *) * (n + 1));
199 for (i = 0; i <= n; i++) {
200 pu->H[i] = (double *)vrna_alloc(sizeof(double) * (w + 1));
201 pu->I[i] = (double *)vrna_alloc(sizeof(double) * (w + 1));
202 pu->M[i] = (double *)vrna_alloc(sizeof(double) * (w + 1));
203 pu->E[i] = (double *)vrna_alloc(sizeof(double) * (w + 1));
204 }
205 return pu;
206 }
207
208
209 PUBLIC void
free_pu_contrib(pu_contrib * pu)210 free_pu_contrib(pu_contrib *pu)
211 {
212 free_pu_contrib_struct(pu);
213 }
214
215
216 PUBLIC void
free_pu_contrib_struct(pu_contrib * pu)217 free_pu_contrib_struct(pu_contrib *pu)
218 {
219 unsigned int i;
220
221 if (pu != NULL) {
222 for (i = 0; i <= pu->length; i++) {
223 free(pu->H[i]);
224 free(pu->I[i]);
225 free(pu->M[i]);
226 free(pu->E[i]);
227 }
228 free(pu->H);
229 free(pu->I);
230 free(pu->M);
231 free(pu->E);
232 free(pu);
233 }
234 }
235
236
237 /* you have to call pf_fold(sequence, structure); befor pf_unstru */
238 PUBLIC pu_contrib *
pf_unstru(char * sequence,int w)239 pf_unstru(char *sequence,
240 int w)
241 {
242 int n, i, j, v, k, l, o, p, ij, kl, po, u, u1, d, type, type_2, tt;
243 unsigned int size;
244 double temp, tqm2;
245 double qbt1, *tmp, sum_l, *sum_M;
246 double *store_H, *store_Io, **store_I2o; /* hairp., interior contribs */
247 double *store_M_qm_o, *store_M_mlbase; /* multiloop contributions */
248 pu_contrib *pu_test;
249
250 sum_l = 0.0;
251 temp = 0;
252 n = (int)strlen(sequence);
253 sum_M = (double *)vrna_alloc((n + 1) * sizeof(double));
254 pu_test = get_pu_contrib_struct((unsigned)n, (unsigned)w);
255 size = ((n + 1) * (n + 2)) >> 1;
256
257 get_up_arrays((unsigned)n);
258 init_pf_two(n);
259
260 /* init everything */
261 for (d = 0; d <= TURN; d++)
262 for (i = 1; i <= n - d; i++) {
263 j = i + d;
264 ij = my_iindx[i] - j;
265 if (d < w)
266 pu_test->H[i][d] = pu_test->I[i][d] = pu_test->M[i][d] = pu_test->E[i][d] = 0.;
267 }
268
269
270 for (i = 0; i < size; i++)
271 prpr[i] = probs[i];
272
273 sum_M[0] = 0.;
274 for (i = 1; i <= n; i++) {
275 /* set auxillary arrays to 0, reuse qqm and qqm1, reuse qqm2 and qq_1m2*/
276 sum_M[i] = qqm[i] = qqm1[i] = qqm2[i] = qq_1m2[i] = 0;
277 for (j = i + TURN + 1; j <= n; j++) {
278 ij = my_iindx[i] - j;
279 /* i need the part_func of all structures outside bp[ij] */
280 if (qb[ij] > 0.0)
281 prpr[ij] = (probs[ij] / qb[ij]);
282 }
283 }
284
285 /* alloc even more memory */
286 store_I2o = (double **)vrna_alloc(sizeof(double *) * (n + 1)); /* for p,k */
287 for (i = 0; i <= n; i++)
288 store_I2o[i] = (double *)vrna_alloc(sizeof(double) * (MAXLOOP + 2));
289
290 /* expMLbase[i-p]*dangles_po */
291 store_M_mlbase = (double *)vrna_alloc(sizeof(double) * (size + 1));
292
293 /* 2. exterior bp (p,o) encloses unpaired region [i,i+w[*/
294 for (o = TURN + 2; o <= n; o++) {
295 double sum_h;
296 /*allocate space for arrays to store different contributions to H, I & M */
297 store_H = (double *)vrna_alloc(sizeof(double) * (o + 2));
298 /* unpaired between ]l,o[ */
299 store_Io = (double *)vrna_alloc(sizeof(double) * (o + 2));
300 /* qm[p+1,i-1]*dangles_po */
301 store_M_qm_o = (double *)vrna_alloc(sizeof(double) * (n + 1));
302
303 for (p = o - TURN - 1; p >= 1; p--) {
304 /* construction of partition function of segment [p,o], given that
305 * an unpaired region [i,i+w[ exists within [p,o] */
306 u = o - p - 1;
307 po = my_iindx[p] - o;
308 type = ptype[po];
309 if (type) {
310 /*hairpin contribution*/
311 if (((type == 3) || (type == 4)) && no_closingGU)
312 temp = 0.;
313 else
314 temp = prpr[po] *
315 exp_E_Hairpin(u, type, S1[p + 1], S1[o - 1], sequence + p - 1, Pf) * scale[u + 2];
316
317 /* all H contribs are collect for the longest unpaired region */
318 store_H[p + 1] = temp;
319
320 /* interior loops with interior pair k,l and an unpaired region of
321 * length w between p and k || l and o*/
322 for (k = p + 1; k <= MIN2(p + MAXLOOP + 1, o - TURN - 2); k++) {
323 u1 = k - p - 1;
324 sum_l = 0.;
325 for (l = MAX2(k + TURN + 1, o - 1 - MAXLOOP + u1); l < o; l++) {
326 kl = my_iindx[k] - l;
327 type_2 = ptype[kl];
328 if ((l + 1) < o)
329 store_Io[l + 1] += sum_l;
330
331 temp = 0.;
332 if (type_2) {
333 type_2 = rtype[type_2];
334 temp = prpr[po] * qb[kl] * exp_E_IntLoop(u1,
335 o - l - 1,
336 type,
337 type_2,
338 S1[p + 1],
339 S1[o - 1],
340 S1[k - 1],
341 S1[l + 1],
342 Pf) * scale[u1 + o - l + 1];
343 if ((l + 1) < o)
344 store_Io[l + 1] += temp; /* unpaired region between ]l,o[ */
345
346 sum_l += temp;
347 } /* end of if pair(k,l) */
348 } /* end of l */
349 /* unpaired in region ]p,k[ */
350 for (i = p + 1; i <= k - 1; i++)
351 store_I2o[i][MIN2(w - 1, k - i - 1)] += sum_l;
352 } /* end of k */
353 } /*end of if(type) test for bp (p,o) */
354
355 /* multiple stem loop contribution
356 * calculate qm2[my_iindx[i]-j] in the course of the calculation
357 * of the multiple stem loop contribution:
358 * advantage: you save memory:
359 * instead of a (n+1)*n array for qqm2 you only need 2*n arrays
360 * disadvantage: you have to use two times the op-loop for the full
361 * multiloop contribution
362 * first op-loop: index o goes from 1...n and
363 * index p from o-TURN-1 ... 1
364 * second op-loop: index o goes from n...1 and
365 * index p from o+TURN+1 ... n !!
366 * HERE index o goes from 1...n and index p o-TURN-1 ... 1 ,
367 * we calculate the contributions to multiple stem loop
368 * where exp(i+w-1-p)*(qqm2 values between i+w and o-1)
369 * AND qm[iindex[p+1]-(i-1)]*exp(beta*w)*qm[iindex[i+w]-(o-1)]
370 * you have to recalculate of qqm matrix containing final stem
371 * contributions to multiple loop partition function
372 * from segment p,o */
373
374 /* recalculate qqm[]
375 * qqm[p] := (contribution with exact one loop in region (p,o)*/
376 qqm[p] = qqm1[p] * expMLbase[1];
377 if (type) {
378 qbt1 = qb[po] *
379 exp_E_MLstem(type, (p > 1) ? S1[p - 1] : -1, (o < n) ? S1[o + 1] : -1, Pf);
380 qqm[p] += qbt1;
381 /* reverse dangles for prpr[po]*... */
382 temp = 0.;
383 tt = rtype[type];
384 temp = prpr[po] * exp_E_MLstem(tt, S1[o - 1], S1[p + 1], Pf) * scale[2] * Pf->expMLclosing;
385 for (i = p + 1; i < o; i++) {
386 int p1i = (p + 1) < (i - 1) ? my_iindx[p + 1] - (i - 1) : 0;
387 /*unpaired region expMLbase[i-p] left of structured
388 * region qq_1m2[i+1]*/
389 /* @expMLbase: note distance of i-p == i-(p+1)+1 */
390 store_M_mlbase[my_iindx[p + 1] - i] += expMLbase[i - p] * temp * qq_1m2[i + 1];
391 /* structured region qm[p1i] left of unpaired region */
392 /* contribition for unpaired region is added after the p-loop */
393 store_M_qm_o[i] += qm[p1i] * temp;
394 } /*end of for i ... */
395 }
396
397 for (tqm2 = 0., i = p + 1; i < o; i++)
398 tqm2 += qm[my_iindx[p] - i] * qqm[i + 1];
399
400 /* qqm2[p] contrib with at least 2 loops in region (p,o) */
401 qqm2[p] = tqm2;
402 } /* end for (p=..) */
403
404 for (sum_h = 0., i = 1; i < o; i++) {
405 int max_v, vo;
406 sum_h += store_H[i];
407 max_v = MIN2(w - 1, o - i - 1);
408 for (v = max_v; v >= 0; v--) {
409 /* Hairpins */
410 pu_test->H[i][v] += sum_h;/* store_H[i][v] + store_H[i][max_v]; */
411 /* Interior loops: unpaired region between ]l,o[ calculated here !*/
412 /* unpaired region between ]p,k[ collected after after o-loop */
413 if (v <= MIN2(max_v, MAXLOOP))
414 pu_test->I[i][v] += store_Io[i]; /* ]l,o[ */
415
416 /* Multiloops:*/
417 /* unpaired region [i,v] between structured regions ]p,i[ and ]v,o[. */
418 /* store_M_qm_o[i] = part. funct over all structured regions ]p,i[ */
419 vo = (i + v + 1) <= (o - 1) ? my_iindx[i + v + 1] - (o - 1) : 0;
420 pu_test->M[i][v] += store_M_qm_o[i] * expMLbase[v + 1] * qm[vo];
421 }
422 }
423 tmp = qqm1;
424 qqm1 = qqm;
425 qqm = tmp;
426 tmp = qqm2;
427 qqm2 = qq_1m2;
428 qq_1m2 = tmp;
429
430 free(store_Io);
431 free(store_H);
432 free(store_M_qm_o);
433 }/* end for (o=..) */
434
435 for (i = 1; i < n; i++) {
436 int max_v;
437 double sum_iv;
438 sum_iv = 0.;
439 max_v = MIN2(w - 1, n - i);
440 for (v = n; v >= 0; v--) {
441 if (v <= MIN2(max_v, MAXLOOP)) {
442 /* all unpaired regions [i,v] between p and k in interior loops */
443 /* notice v runs from max_v -> 0, sum_iv sums all int. l. contribs */
444 /* for each x, v < x =< max_v, since they contribute to [i,v] */
445 sum_iv += store_I2o[i][v];
446 pu_test->I[i][v] += sum_iv;
447 }
448
449 /* all unpaired region [i,v] for a fixed v, given that */
450 /* region ]v,o[ contains at least 2 structures qq_1m2[v+1]; */
451 if (v >= i) {
452 sum_M[v] += store_M_mlbase[my_iindx[i] - v];
453 if (v - i <= max_v)
454 pu_test->M[i][v - i] += sum_M[v];
455 }
456 }
457 }
458
459 for (i = 0; i <= n; i++)
460 free(store_I2o[i]);
461 free(store_I2o);
462
463 for (i = 1; i <= n; i++)
464 /* set auxillary arrays to 0 */
465 qqm[i] = qqm1[i] = qqm2[i] = qq_1m2[i] = 0;
466
467 /* 2. exterior bp (p,o) encloses unpaired region [i,j]
468 * HERE index o goes from n...1 and index p from o+TURN+1 ... n,
469 * that is, we add the one multiloop contribution that we
470 * could not calculate before */
471
472 /* is free'ing plus allocating faster than looping over all entries an setting them to 0? */
473 #if 0
474 free(store_M_mlbase);
475 store_M_mlbase = (double *)vrna_alloc(sizeof(double) * (size + 1));
476 #else
477 /* this should be the fastest way to set everything to 0 */
478 memset(store_M_mlbase, 0, sizeof(double) * (size + 1));
479 #endif
480
481 for (o = n - TURN - 1; o >= 1; o--) {
482 for (p = o + TURN + 1; p <= n; p++) {
483 po = my_iindx[o] - p;
484 type = ptype[po];
485 /* recalculate of qqm matrix containing final stem
486 * contributions to multiple loop partition function
487 * from segment [o,p] */
488 qqm[p] = qqm1[p] * expMLbase[1];
489 if (type) {
490 qbt1 = qb[po];
491 qbt1 *= exp_E_MLstem(type, (o > 1) ? S1[o - 1] : -1, (p < n) ? S1[p + 1] : -1, Pf);
492 qqm[p] += qbt1;
493 /* revers dangles for prpr[po]... */
494 temp = 0.;
495 tt = rtype[type];
496 temp = prpr[po] * exp_E_MLstem(tt, S1[p - 1], S1[o + 1], Pf) * Pf->expMLclosing * scale[2];
497 }
498
499 tqm2 = 0.;
500 for (i = o + 1; i < p; i++) {
501 tqm2 += qqm[i] * qm[my_iindx[i + 1] - p];
502
503 if (type != 0) {
504 /* structured region qq_1m2[i-1] left of unpaired r. expMLbase[p-i]*/
505 /* @expMLbase: note distance of p-i == p+1-i+1 */
506 store_M_mlbase[my_iindx[i] - p + 1] += qq_1m2[i - 1] * expMLbase[p - i] * temp;
507 }
508 } /*end of for i ....*/
509 qqm2[p] = tqm2;
510 } /* end for (p=..) */
511 tmp = qqm1;
512 qqm1 = qqm;
513 qqm = tmp;
514 tmp = qqm2;
515 qqm2 = qq_1m2;
516 qq_1m2 = tmp;
517 }/* end for (o=..) */
518 /* now collect the missing multiloop contributions */
519 for (i = 0; i <= n; i++)
520 sum_M[i] = 0.;
521 for (i = 1; i <= n; i++) {
522 int v_max = MIN2(w - 1, n - i);
523 for (v = n; v >= i; v--) {
524 sum_M[i] += store_M_mlbase[my_iindx[i] - v];
525 if ((v - i <= v_max))
526 pu_test->M[i][v - i] += sum_M[i];
527 }
528 }
529
530 /* 1. region [i,j] exterior to all loops */
531 for (i = 1; i <= n; i++) {
532 for (j = i; j < MIN2(i + w, n + 1); j++) {
533 ij = my_iindx[i] - j;
534 temp = q1k[i - 1] * 1 * scale[j - i + 1] * qln[j + 1] / q1k[n];
535 pu_test->E[i][j - i] += temp;
536 }
537 }
538
539 free(sum_M);
540 free(store_M_mlbase);
541 free_up_arrays();
542 return pu_test;
543 }
544
545
546 PRIVATE void
get_interact_arrays(unsigned int n1,unsigned int n2,pu_contrib * p_c,pu_contrib * p_c2,int w,int incr5,int incr3,double *** p_c_S,double *** p_c2_S)547 get_interact_arrays(unsigned int n1,
548 unsigned int n2,
549 pu_contrib *p_c,
550 pu_contrib *p_c2,
551 int w,
552 int incr5,
553 int incr3,
554 double ***p_c_S,
555 double ***p_c2_S)
556 {
557 unsigned int i;
558 int pc_size, j;
559
560 *p_c_S = (double **)vrna_alloc(sizeof(double *) * (n1 + 1));
561
562 for (i = 1; i <= n1; i++) {
563 pc_size = MIN2((w + incr5 + incr3), (int)n1);
564 (*p_c_S)[i] = (double *)vrna_alloc(sizeof(double) * (pc_size + 1));
565 for (j = 0; j < pc_size; j++)
566 (*p_c_S)[i][j] = p_c->H[i][j] + p_c->I[i][j] + p_c->M[i][j] + p_c->E[i][j];
567 }
568
569 if (p_c2 != NULL) {
570 (*p_c2_S) = (double **)vrna_alloc(sizeof(double *) * (n2 + 1));
571 for (i = 1; i <= n2; i++) {
572 pc_size = MIN2(w, (int)n2);
573 (*p_c2_S)[i] = (double *)vrna_alloc(sizeof(double) * (pc_size + 2));
574 for (j = 0; j < pc_size; j++)
575 (*p_c2_S)[i][j] = p_c2->H[i][j] + p_c2->I[i][j] + p_c2->M[i][j] + p_c2->E[i][j];
576 }
577 }
578 }
579
580
581 /*------------------------------------------------------------------------*/
582 /* s1 is the longer seq */
583 PUBLIC interact *
pf_interact(const char * s1,const char * s2,pu_contrib * p_c,pu_contrib * p_c2,int w,char * cstruc,int incr3,int incr5)584 pf_interact(const char *s1,
585 const char *s2,
586 pu_contrib *p_c,
587 pu_contrib *p_c2,
588 int w,
589 char *cstruc,
590 int incr3,
591 int incr5)
592 {
593 int i, j, k, l, n1, n2, add_i5, add_i3, pc_size;
594 double temp, Z, rev_d, E, Z2, **p_c_S, **p_c2_S, int_scale;
595 FLT_OR_DBL ****qint_4, **qint_ik;
596 /* PRIVATE double **pint; array for pf_up() output */
597 interact *Int;
598 double G_min, G_is, Gi_min;
599 int gi, gj, gk, gl, ci, cj, ck, cl, prev_k, prev_l;
600 FLT_OR_DBL **int_ik;
601 double Z_int, temp_int, temppfs;
602 double const_scale, const_T;
603 constrain *cc = NULL; /* constrains for cofolding */
604 char *Seq, *i_long, *i_short, *pos = NULL; /* short seq appended to long one */
605
606 /* int ***pu_jl; */ /* positions of interaction in the short RNA */
607
608 G_min = G_is = Gi_min = 100.0;
609 gi = gj = gk = gl = ci = cj = ck = cl = 0;
610
611 n1 = (int)strlen(s1);
612 n2 = (int)strlen(s2);
613 prev_k = 1;
614 prev_l = n2;
615
616 i_long = (char *)vrna_alloc(sizeof(char) * (n1 + 1));
617 i_short = (char *)vrna_alloc(sizeof(char) * (n2 + 1));
618 Seq = (char *)vrna_alloc(sizeof(char) * (n1 + n2 + 2));
619
620 strcpy(Seq, s1);
621 strcat(Seq, s2);
622
623 set_encoded_seq(s1, &S, &S1);
624 set_encoded_seq(s2, &SS, &SS2);
625
626 cc = get_ptypes_up(Seq, cstruc);
627
628 get_interact_arrays(n1, n2, p_c, p_c2, w, incr5, incr3, &p_c_S, &p_c2_S);
629
630 /*array for pf_up() output */
631 Int = (interact *)vrna_alloc(sizeof(interact) * 1);
632 Int->Pi = (double *)vrna_alloc(sizeof(double) * (n1 + 2));
633 Int->Gi = (double *)vrna_alloc(sizeof(double) * (n1 + 2));
634
635 /* use a different scaling for pf_interact*/
636 scale_int(s2, s1, &int_scale);
637
638 /* set the global scale array and the global variable pf_scale to the
639 * values used to scale the interaction, keep their former values !! */
640 temppfs = pf_scale;
641 pf_scale = int_scale;
642
643 /* in order to scale expLoopEnergy correctly call*/
644 /* we also pass twice the seq-length to avoid bogus access to scale[] array */
645 scale_stru_pf_params((unsigned)2 * n1);
646
647 qint_ik = (FLT_OR_DBL **)vrna_alloc(sizeof(FLT_OR_DBL *) * (n1 + 1));
648 for (i = 1; i <= n1; i++)
649 qint_ik[i] = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * (n1 + 1));
650 /* int_ik */
651 int_ik = (FLT_OR_DBL **)vrna_alloc(sizeof(FLT_OR_DBL *) * (n1 + 1));
652 for (i = 1; i <= n1; i++)
653 int_ik[i] = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * (n1 + 1));
654 Z_int = 0.;
655 /* Gint = ( -log(int_ik[gk][gi])-( ((int) w/2)*log(pf_scale)) )*((Pf->temperature+K0)*GASCONST/1000.0); */
656 const_scale = ((int)w / 2) * log(pf_scale);
657 const_T = (Pf->kT / 1000.0);
658 encode_seq(s1, s2);
659 /* static short *S~S1, *S1~SS1, *SS~S2, *SS2; */
660 for (i = 0; i <= n1; i++)
661 Int->Pi[i] = Int->Gi[i] = 0.;
662 E = 0.;
663 Z = 0.;
664
665 if (fold_constrained && cstruc != NULL) {
666 pos = strchr(cstruc, '|');
667 if (pos) {
668 ci = ck = cl = cj = 0;
669 /* long seq & short seq
670 * .........||..|||||....&....||||... w = maximal interaction length
671 * ck ci cj cl */
672 strncpy(i_long, cstruc, n1);
673 i_long[n1] = '\0';
674 strncpy(i_short, &cstruc[n1], n2);
675 i_short[n2] = '\0';
676 pos = strchr(i_long, '|');
677 if (pos)
678 ck = (int)(pos - i_long) + 1; /* k */
679
680 pos = strrchr(i_long, '|');
681 if (pos)
682 ci = (int)(pos - i_long) + 1; /* i */
683
684 pos = strrchr(i_short, '|');
685 if (pos)
686 cl = (int)(pos - i_short) + 1; /* l */
687
688 pos = strchr(i_short, '|');
689 if (pos)
690 cj = (int)(pos - i_short) + 1; /* j */
691
692 if (ck > 0 && ci > 0 && ci - ck + 1 > w) {
693 vrna_message_warning("distance between constrains in longer seq, %d, larger than -w = %d",
694 ci - ck + 1,
695 w);
696 vrna_message_error("pf_interact: could not satisfy all constraints");
697 }
698
699 if (cj > 0 && cl > 0 && cl - cj + 1 > w) {
700 vrna_message_warning("distance between constrains in shorter seq, %d, larger than -w = %d",
701 cl - cj + 1,
702 w);
703 vrna_message_error("pf_interact: could not satisfy all constraints");
704 }
705 }
706 } else if (fold_constrained && cstruc == NULL) {
707 vrna_message_error("option -C selected, but no constrained structure given\n");
708 }
709
710 if (fold_constrained)
711 pos = strchr(cstruc, '|');
712
713 /* qint_4[i][j][k][l] contribution that region (k-i) in seq1 (l=n1)
714 * is paired to region (l-j) in seq 2(l=n2) that is
715 * a region closed by bp k-l and bp i-j */
716 qint_4 = (FLT_OR_DBL ****)vrna_alloc(sizeof(FLT_OR_DBL ***) * (n1 + 1));
717
718 /* qint_4[i][j][k][l] */
719 for (i = 1; i <= n1; i++) {
720 int end_k;
721 end_k = i - w;
722 if (fold_constrained && pos && ci)
723 end_k = MAX2(i - w, ci - w);
724
725 /* '|' constrains for long sequence: index i from 1 to n1 (5' to 3')*/
726 /* interaction has to include 3' most '|' constrain, ci */
727 if (fold_constrained && pos && ci && i == 1 && i < ci)
728 i = ci - w + 1 > 1 ? ci - w + 1 : 1;
729
730 /* interaction has to include 5' most '|' constrain, ck*/
731 if (fold_constrained && pos && ck && i > ck + w - 1)
732 break;
733
734 /* note: qint_4[i] will be freed before we allocate qint_4[i+1] */
735 qint_4[i] = (FLT_OR_DBL ***)vrna_alloc(sizeof(FLT_OR_DBL **) * (n2 + 1));
736 for (j = n2; j > 0; j--) {
737 qint_4[i][j] = (FLT_OR_DBL **)vrna_alloc(sizeof(FLT_OR_DBL *) * (w + 1));
738 for (k = 0; k <= w; k++)
739 qint_4[i][j][k] = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * (w + 1));
740 }
741
742 prev_k = 1;
743 for (j = n2; j > 0; j--) {
744 int type, type2, end_l;
745 end_l = j + w;
746 if (fold_constrained && pos && ci)
747 end_l = MIN2(cj + w, j + w);
748
749 /* '|' constrains for short sequence: index j from n2 to 1 (3' to 5')*/
750 /* interaction has to include 5' most '|' constrain, cj */
751 if (fold_constrained && pos && cj && j == n2 && j > cj)
752 j = cj + w - 1 > n2 ? n2 : cj + w - 1;
753
754 /* interaction has to include 3' most '|' constrain, cl*/
755 if (fold_constrained && pos && cl && j < cl - w + 1)
756 break;
757
758 type = cc->ptype[cc->indx[i] - (n1 + j)];
759 qint_4[i][j][0][0] = type ? Pf->expDuplexInit : 0;
760
761 if (!type)
762 continue;
763
764 qint_4[i][j][0][0] *= vrna_exp_E_ext_stem(type,
765 (i > 1) ? S1[i - 1] : -1,
766 (j < n2) ? SS2[j + 1] : -1,
767 Pf);
768
769 rev_d = vrna_exp_E_ext_stem(rtype[type], (j > 1) ? SS2[j - 1] : -1, (i < n1) ? S1[i + 1] : -1, Pf);
770
771 /* add inc5 and incr3 */
772 if ((i - incr5) > 0)
773 add_i5 = i - incr5;
774 else
775 add_i5 = 1;
776
777 add_i3 = incr3;
778 pc_size = MIN2((w + incr3 + incr5), n1);
779 if (incr3 < pc_size)
780 add_i3 = incr3;
781 else
782 add_i3 = pc_size - 1;
783
784 /* only one bp (no interior loop) */
785 if (p_c2 == NULL) {
786 /* consider only structure of longer seq. */
787 qint_ik[i][i] += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * scale[((int)w / 2)];
788 Z += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * scale[((int)w / 2)];
789 } else {
790 /* consider structures of both seqs. */
791 qint_ik[i][i] += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * p_c2_S[j][0] *
792 scale[((int)w / 2)];
793 Z += qint_4[i][j][0][0] * rev_d * p_c_S[add_i5][add_i3] * p_c2_S[j][0] *
794 scale[((int)w / 2)];
795 }
796
797 /* int_ik */
798 /* check deltaG_ges = deltaG_int + deltaG_unstr; */
799 int_ik[i][i] += qint_4[i][j][0][0] * rev_d * scale[((int)w / 2)];
800 Z_int += qint_4[i][j][0][0] * rev_d * scale[((int)w / 2)];
801 temp_int = 0.;
802
803 temp = 0.;
804 prev_l = n2;
805 for (k = i - 1; k > end_k && k > 0; k--) {
806 if (fold_constrained && pos && cstruc[k - 1] == '|' && k > prev_k)
807 prev_k = k;
808
809 for (l = j + 1; l < end_l && l <= n2; l++) {
810 int a, b, ia, ib, isw;
811 double scalew, tt, intt;
812
813 type2 = cc->ptype[cc->indx[k] - (n1 + l)];
814 /* '|' : l HAS TO be paired: not pair (k,x) where x>l allowed */
815 if (fold_constrained && pos && cstruc[n1 + l - 1] == '|' && l < prev_l)
816 prev_l = l; /*break*/
817
818 if (fold_constrained && pos && (k <= ck || i >= ci) && !type2)
819 continue;
820
821 if (fold_constrained && pos && ((cstruc[k - 1] == '|') || (cstruc[n1 + l - 1] == '|')) &&
822 !type2)
823 break;
824
825 if (!type2)
826 continue;
827
828 /* to save memory keep only qint_4[i-w...i][][][] in memory
829 * use indices qint_4[i][j][a={0,1,...,w-1}][b={0,1,...,w-1}] */
830 a = i - k; /* k -> a from 1...w-1*/
831 b = l - j; /* l -> b from 1...w-1 */
832
833 /* scale everything to w/2 */
834 isw = ((int)w / 2);
835 if ((a + b) < isw)
836 scalew = (scale[isw - (a + b)]);
837 else if ((a + b) > isw)
838 scalew = 1 / (scale[(a + b) - isw]);
839 else
840 scalew = 1;
841
842 if (i - k + l - j - 2 <= MAXLOOP) {
843 if (k >= prev_k && l <= prev_l) {
844 /* don't violate constrains */
845 E = exp_E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type],
846 S1[k + 1], SS2[l - 1], S1[i - 1], SS2[j + 1], Pf) *
847 scale[i - k + l - j]; /* add *scale[u1+u2+2] */
848
849 qint_4[i][j][a][b] += (qint_4[k][l][0][0] * E);
850
851 /* use ia and ib to go from a....w-1 and from b....w-1 */
852 ia = ib = 1;
853 while ((a + ia) < w && i - (a + ia) >= 1 && (b + ib) < w && (j + b + ib) <= n2) {
854 int iaa, ibb;
855
856 qint_4[i][j][a + ia][b + ib] += qint_4[k][l][ia][ib] * E;
857
858 iaa = ia + 1;
859 while (a + iaa < w && i - (a + iaa) >= 1) {
860 qint_4[i][j][a + iaa][b + ib] += qint_4[k][l][iaa][ib] * E;
861 ++iaa;
862 }
863
864 ibb = ib + 1;
865 while ((b + ibb) < w && (j + b + ibb) <= n2) {
866 qint_4[i][j][a + ia][b + ibb] += qint_4[k][l][ia][ibb] * E;
867 ++ibb;
868 }
869 ++ia;
870 ++ib;
871 }
872 }
873 }
874
875 /* '|' constrain in long sequence */
876 /* collect interactions starting before 5' most '|' constrain */
877 if (fold_constrained && pos && ci && i < ci)
878 continue;
879
880 /* collect interactions ending after 3' most '|' constrain*/
881 if (fold_constrained && pos && ck && k > ck)
882 continue;
883
884 /* '|' constrain in short sequence */
885 /* collect interactions starting before 5' most '|' constrain */
886 if (fold_constrained && pos && cj && j > cj)
887 continue;
888
889 /* collect interactions ending after 3' most '|' constrain*/
890 if (fold_constrained && pos && cl && l < cl)
891 continue;
892
893 /* scale everything to w/2*/
894 /* qint_ik[k][i] all interactions where k and i both are paired */
895 /* substract incr5 from k */
896 if (k - incr5 > 0)
897 add_i5 = k - incr5;
898 else
899 add_i5 = 1;
900
901 /* add incr3 to i */
902 pc_size = MIN2((w + incr3 + incr5), n1);
903 if (i - k + incr3 < pc_size)
904 add_i3 = i - k + incr3;
905 else
906 add_i3 = pc_size - 1;
907
908 if (p_c2 == NULL) /* consider only structure of longer seq. */
909 tt = qint_4[i][j][a][b] * p_c_S[add_i5][add_i3] * scalew * rev_d;
910 else /* consider structures of both seqs. */
911 tt = qint_4[i][j][a][b] * p_c_S[add_i5][add_i3] * p_c2_S[j][b] * scalew * rev_d;
912
913 temp += tt;
914 qint_ik[k][i] += tt;
915 /* int_ik */
916 /* check deltaG_ges = deltaG_int + deltaG_unstr; */
917 intt = qint_4[i][j][a][b] * scalew * rev_d;
918 temp_int += intt;
919 int_ik[k][i] += intt;
920 G_is = (-log(tt) - const_scale) * (const_T);
921 if (G_is < G_min || EQUAL(G_is, G_min)) {
922 G_min = G_is;
923 Gi_min = (-log(intt) - const_scale) * (const_T);
924 gi = i;
925 gj = j;
926 gk = k;
927 gl = l;
928 }
929 }
930 }
931 Z += temp;
932 /* int_ik */
933 Z_int += temp_int;
934 }
935
936 /* free qint_4 values not needed any more */
937 if (i > w) {
938 int bla;
939 bla = i - w;
940 if (fold_constrained && pos && ci && i - w < ci - w + 1)
941 continue;
942
943 if (fold_constrained && pos && ci)
944 bla = MAX2(ci - w + 1, i - w);
945
946 for (j = n2; j > 0; j--) {
947 for (k = 0; k <= w; k++)
948 free(qint_4[bla][j][k]);
949 free(qint_4[bla][j]);
950 }
951 free(qint_4[bla]);
952 qint_4[bla] = NULL;
953 }
954 }
955
956
957 Z2 = 0.0;
958 for (i = 1; i <= n1; i++) {
959 for (k = i; k <= n1 && k < i + w; k++) {
960 Z2 += qint_ik[i][k];
961 for (l = i; l <= k; l++) {
962 /* Int->Pi[l]: prob that position l is within a paired region */
963 /* qint_ik[i][k] as well as Z are scaled to scale[((int) w/2) */
964 Int->Pi[l] += qint_ik[i][k] / Z;
965 /* Int->Gi[l]: minimal delta G at position [l] */
966 Int->Gi[l] = MIN2(Int->Gi[l],
967 (-log(qint_ik[i][k]) - (((int)w / 2) * log(pf_scale))) *
968 (Pf->kT / 1000.0));
969 }
970 }
971 }
972 if (n1 > w) {
973 int start_i, end_i;
974 start_i = n1 - w + 1;
975 end_i = n1;
976 if (fold_constrained && pos && ci) {
977 /* a break in the k loop might result in unfreed values */
978 start_i = ci - w + 1 < n1 - w + 1 ? ci - w + 1 : n1 - w + 1;
979 start_i = start_i > 0 ? start_i : 1;
980 /* start_i = ck; */
981 end_i = ck + w - 1 > n1 ? n1 : ck + w - 1;
982 }
983
984 for (i = start_i; i <= end_i; i++) {
985 if (qint_4[i] == NULL)
986 continue;
987
988 for (j = n2; j > 0; j--) {
989 for (k = 0; k <= w; k++)
990 free(qint_4[i][j][k]);
991 free(qint_4[i][j]);
992 }
993 free(qint_4[i]);
994 }
995 free(qint_4);
996 } else {
997 int start_i, end_i;
998 start_i = 1;
999 end_i = n1;
1000 if (fold_constrained && pos) {
1001 start_i = ci - w + 1 > 0 ? ci - w + 1 : 1;
1002 end_i = ck + w - 1 > n1 ? n1 : ck + w - 1;
1003 }
1004
1005 for (i = start_i; i <= end_i; i++) {
1006 for (j = n2; j > 0; j--) {
1007 for (k = 0; k <= w; k++)
1008 free(qint_4[i][j][k]);
1009 free(qint_4[i][j]);
1010 }
1011 free(qint_4[i]);
1012 }
1013 free(qint_4);
1014 }
1015
1016 if (fold_constrained && (gi == 0 || gk == 0 || gl == 0 || gj == 0))
1017 vrna_message_error("pf_interact: could not satisfy all constraints");
1018
1019 /* fill structure interact */
1020 Int->length = n1;
1021 Int->i = gi;
1022 Int->j = gj;
1023 Int->k = gk;
1024 Int->l = gl;
1025 Int->Gikjl = G_min;
1026 Int->Gikjl_wo = Gi_min;
1027
1028 free(i_long);
1029 free(i_short);
1030
1031 for (i = 1; i <= n1; i++)
1032 free(int_ik[i]);
1033 free(int_ik);
1034 for (i = 1; i <= n1; i++)
1035 free(qint_ik[i]);
1036 free(qint_ik);
1037
1038 /* reset the global variables pf_scale and scale to their original values */
1039 pf_scale = temppfs; /* reset pf_scale */
1040 scale_stru_pf_params((unsigned)n1); /* reset the scale array */
1041 free_pf_arrays(); /* for arrays for pf_fold(...) */
1042
1043 if (expMLbase != NULL) {
1044 free(expMLbase);
1045 expMLbase = NULL;
1046 }
1047
1048 if (scale != NULL) {
1049 free(scale);
1050 scale = NULL;
1051 }
1052
1053 for (i = 1; i <= n1; i++)
1054 free(p_c_S[i]);
1055 free(p_c_S);
1056 if (p_c2 != NULL) {
1057 for (i = 1; i <= n2; i++)
1058 free(p_c2_S[i]);
1059 free(p_c2_S);
1060 }
1061
1062 free(Seq);
1063 free(cc->indx);
1064 free(cc->ptype);
1065 free(cc);
1066 return Int;
1067 }
1068
1069
1070 /*------------------------------------------------------------------------*/
1071 /* use an extra scale for pf_interact, here sl is the longer sequence */
1072 PRIVATE void
scale_int(const char * s,const char * sl,double * sc_int)1073 scale_int(const char *s,
1074 const char *sl,
1075 double *sc_int)
1076 {
1077 int n, nl;
1078 duplexT mfe;
1079 double kT;
1080
1081 n = strlen(s);
1082 nl = strlen(sl);
1083
1084 free(expMLbase);
1085 free(scale);
1086
1087 expMLbase = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * ((nl + 1) * 2));
1088 scale = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * ((nl + 1) * 2));
1089
1090 /* use RNA duplex to get a realistic estimate for the best possible
1091 * interaction energy between the short RNA s and its target sl */
1092 mfe = duplexfold(s, sl);
1093
1094 kT = Pf->kT / 1000.0; /* in Kcal */
1095
1096 /* sc_int is similar to pf_scale: i.e. one time the scale */
1097 *sc_int = exp(-(mfe.energy) / kT / n);
1098
1099 /* free the structure returned by duplexfold */
1100 free(mfe.structure);
1101 }
1102
1103
1104 /*----------------------------------------------------------------------*/
1105 /* init_pf_two(n) :gets the arrays, that you need, from part_func.c */
1106 /* get_pf_arrays(&S, &S1, &ptype, &qb, &qm, &q1k, &qln);*/
1107 /* init_pf_fold(), update_pf_params, encode_char(), make_ptypes() are called by pf_fold() */
1108 PRIVATE void
init_pf_two(int length)1109 init_pf_two(int length)
1110 {
1111 #ifdef SUN4
1112 nonstandard_arithmetic();
1113 #else
1114 #ifdef HP9
1115 fpsetfastmode(1);
1116 #endif
1117 #endif
1118 make_pair_matrix();
1119
1120 /* gets the arrays, that we need, from part_func.c */
1121 if (!get_pf_arrays(&S, &S1, &ptype, &qb, &qm, &q1k, &qln))
1122 vrna_message_error("init_pf_two: pf_fold() has to be called before calling pf_unstru()\n");
1123
1124 /* get a pointer to the base pair probs */
1125 probs = export_bppm();
1126
1127 scale_stru_pf_params((unsigned)length);
1128
1129 init_length = length;
1130 if (init_temp != Pf->temperature)
1131 vrna_message_error("init_pf_two: inconsistency with temperature");
1132 }
1133
1134
1135 PRIVATE void
get_up_arrays(unsigned int length)1136 get_up_arrays(unsigned int length)
1137 {
1138 unsigned int l1 = length + 1;
1139 unsigned int l2 = length + 2;
1140
1141 prpr = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * ((l1 * l2) >> 1));
1142 expMLbase = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * l2);
1143 scale = (FLT_OR_DBL *)vrna_alloc(sizeof(FLT_OR_DBL) * l2);
1144 qqm2 = (double *)vrna_alloc(sizeof(double) * l2);
1145 qq_1m2 = (double *)vrna_alloc(sizeof(double) * l2);
1146 qqm = (double *)vrna_alloc(sizeof(double) * l2);
1147 qqm1 = (double *)vrna_alloc(sizeof(double) * l2);
1148 my_iindx = vrna_idx_row_wise(length);
1149 }
1150
1151
1152 PRIVATE void
free_up_arrays(void)1153 free_up_arrays(void)
1154 {
1155 if (prpr != NULL) {
1156 free(prpr);
1157 prpr = NULL;
1158 }
1159
1160 if (expMLbase != NULL) {
1161 free(expMLbase);
1162 expMLbase = NULL;
1163 }
1164
1165 if (scale != NULL) {
1166 free(scale);
1167 scale = NULL;
1168 }
1169
1170 if (qqm != NULL) {
1171 free(qqm);
1172 qqm = NULL;
1173 }
1174
1175 if (qqm1 != NULL) {
1176 free(qqm1);
1177 qqm1 = NULL;
1178 }
1179
1180 if (qqm2 != NULL) {
1181 free(qqm2);
1182 qqm2 = NULL;
1183 }
1184
1185 if (qq_1m2 != NULL) {
1186 free(qq_1m2);
1187 qq_1m2 = NULL;
1188 }
1189
1190 if (my_iindx != NULL) {
1191 free(my_iindx);
1192 my_iindx = NULL;
1193 }
1194 }
1195
1196
1197 PUBLIC void
free_interact(interact * pin)1198 free_interact(interact *pin)
1199 {
1200 if (S != NULL && pin != NULL) {
1201 free(S);
1202 S = NULL;
1203 }
1204
1205 if (S1 != NULL && pin != NULL) {
1206 free(S1);
1207 S1 = NULL;
1208 }
1209
1210 if (pin != NULL) {
1211 free(pin->Pi);
1212 free(pin->Gi);
1213 free(pin);
1214 pin = NULL;
1215 }
1216 }
1217
1218
1219 /*---------------------------------------------------------------------------*/
1220
1221 PRIVATE void
encode_seq(const char * s1,const char * s2)1222 encode_seq(const char *s1,
1223 const char *s2)
1224 {
1225 unsigned int i, l;
1226
1227 l = strlen(s1);
1228 /* S and S1 are freed by free_pf_arrays(); ! */
1229 S = (short *)vrna_alloc(sizeof(short) * (l + 1));
1230 S1 = (short *)vrna_alloc(sizeof(short) * (l + 1));
1231 /* S1 exists only for the special X K and I bases and energy_set!=0 */
1232 S[0] = l;
1233 for (i = 1; i <= l; i++) {
1234 /* make numerical encoding of sequence */
1235 S[i] = (short)encode_char(toupper(s1[i - 1]));
1236 S1[i] = alias[S[i]]; /* for mismatches of nostandard bases */
1237 }
1238 if (s2 != NULL) {
1239 l = strlen(s2);
1240 /* SS2 exists only for the special X K and I bases and energy_set!=0 */
1241 SS[0] = l;
1242 for (i = 1; i <= l; i++) {
1243 /* make numerical encoding of sequence */
1244 SS[i] = (short)encode_char(toupper(s2[i - 1]));
1245 SS2[i] = alias[SS[i]]; /* for mismatches of nostandard bases */
1246 }
1247 }
1248 }
1249
1250
1251 /*-------------------------------------------------------------------------*/
1252 /* scale energy parameters and pre-calculate Boltzmann weights:
1253 * most of this is done in structure Pf see params.c,h (function:
1254 * get_scaled_pf_parameters(), only arrays scale and expMLbase are handled here*/
1255 PRIVATE void
scale_stru_pf_params(unsigned int length)1256 scale_stru_pf_params(unsigned int length)
1257 {
1258 unsigned int i;
1259 double kT;
1260
1261
1262 /* Do this only at the first call for get_scaled_pf_parameters()
1263 * and/or if temperature has changed*/
1264 if (init_temp != temperature) {
1265 if (Pf)
1266 free(Pf);
1267
1268 vrna_md_t md;
1269 set_model_details(&md);
1270 Pf = vrna_exp_params(&md);
1271 }
1272
1273 init_temp = Pf->temperature;
1274
1275 kT = Pf->kT; /* kT in cal/mol */
1276
1277 /* scaling factors (to avoid overflows) */
1278 if (pf_scale == -1) {
1279 /* mean energy for random sequences: 184.3*length cal */
1280 pf_scale = exp(-(-185 + (Pf->temperature - 37.) * 7.27) / kT);
1281 if (pf_scale < 1)
1282 pf_scale = 1;
1283 }
1284
1285 Pf->pf_scale = pf_scale;
1286 scale[0] = 1.;
1287 scale[1] = 1. / pf_scale;
1288 expMLbase[0] = 1;
1289 expMLbase[1] = Pf->expMLbase / pf_scale;
1290 for (i = 2; i <= length + 1; i++) {
1291 scale[i] = scale[i / 2] * scale[i - (i / 2)];
1292 expMLbase[i] = pow(Pf->expMLbase, (double)i) * scale[i];
1293 }
1294 }
1295
1296
1297 /*-------------------------------------------------------------------------*/
1298 /* make a results structure containing all u-values & the header */
1299 PUBLIC pu_out *
get_u_vals(pu_contrib * p_c,int ** unpaired_values,char * select_contrib)1300 get_u_vals(pu_contrib *p_c,
1301 int **unpaired_values,
1302 char *select_contrib)
1303 {
1304 int i, j, k, l, num_u_vals, count, contribs, size, w, len;
1305 int S, E, H, I, M;
1306 int off_S, off_E, off_H, off_I, off_M;
1307 /* double **p_cont,**p_cont_sh, dG_u; p_u AND its contributions */
1308 pu_out *u_results;
1309
1310 len = p_c->length;
1311
1312 /* number of different -u values */
1313 for (num_u_vals = 0, i = 1; i <= unpaired_values[0][0]; i++) {
1314 j = unpaired_values[i][0];
1315 do
1316 num_u_vals++;
1317 while (++j <= unpaired_values[i][1]);
1318 }
1319 /* check which contributions ([-c "SHIME"] ) are desired by the user,
1320 * set the offset for each contribution */
1321 contribs = 0;
1322 S = E = H = I = M = 0;
1323 off_S = off_E = off_H = off_I = off_M = 0;
1324 if (strchr(select_contrib, 'S')) {
1325 S = 1;
1326 off_S = contribs;
1327 ++contribs;
1328 }
1329
1330 if (strchr(select_contrib, 'E')) {
1331 E = 1;
1332 off_E = contribs;
1333 ++contribs;
1334 }
1335
1336 if (strchr(select_contrib, 'H')) {
1337 H = 1;
1338 off_H = contribs;
1339 ++contribs;
1340 }
1341
1342 if (strchr(select_contrib, 'I')) {
1343 I = 1;
1344 off_I = contribs;
1345 ++contribs;
1346 }
1347
1348 if (strchr(select_contrib, 'M')) {
1349 M = 1;
1350 off_M = contribs;
1351 ++contribs;
1352 }
1353
1354 if (contribs > 5)
1355 vrna_message_error("get_u_vals: error with contribs!");
1356
1357 /* allocate the results structure */
1358 u_results = (pu_out *)vrna_alloc(1 * sizeof(pu_out));
1359 u_results->len = len; /* sequence length */
1360 /*num_u_vals differnet -u values, contribs [-c "SHIME"] */
1361 u_results->u_vals = num_u_vals;
1362 u_results->contribs = contribs;
1363 /* add 1 column for position within the sequence and
1364 * add 1 column for the free energy of interaction values */
1365 /* header e.g. u3I (contribution for u3 interior loops */
1366 size = 1 + (num_u_vals * contribs) + 1;
1367 u_results->header = (char **)vrna_alloc((size + 1) * sizeof(char *));
1368 for (i = 0; i < (size + 1); i++)
1369 u_results->header[i] = (char *)vrna_alloc(10 * sizeof(char));
1370 /* different free energies for all -u and -c combinations */
1371 u_results->u_values = (double **)vrna_alloc((size + 1) * sizeof(double *));
1372 for (i = 0; i < (size + 1); i++)
1373 /* position within the sequence */
1374 u_results->u_values[i] = (double *)vrna_alloc((len + 3) * sizeof(double));
1375 /* write the position within the sequence in the u_results array
1376 * at column zerro */
1377 sprintf(u_results->header[0], "pos");
1378 for (i = 0; i <= len; i++)
1379 /* add the position*/
1380 u_results->u_values[0][i] = i;
1381 /* go over the different -u values, u_vals[] listy of different -u values*/
1382 for (count = k = 1; k <= unpaired_values[0][0]; k++) {
1383 l = unpaired_values[k][0];
1384 do {
1385 int offset; /* offset for the respective -u value (depents on the number
1386 * of the -u value and on the numbers of contribs */
1387
1388 offset = ((count - 1) * contribs) + 1; /* first colum is the position */
1389 /* set the current value of -u : here we call it w */
1390 w = l; /* set w to the actual -u value */
1391 if (w > len)
1392 break; /* corr caro */
1393
1394 /* make the header - look which contribitions are wanted */
1395 if (S)
1396 sprintf(u_results->header[offset + off_S], "u%dS", w);
1397
1398 if (E)
1399 sprintf(u_results->header[offset + off_E], "u%dE", w);
1400
1401 if (H)
1402 sprintf(u_results->header[offset + off_H], "u%dH", w);
1403
1404 if (I)
1405 sprintf(u_results->header[offset + off_I], "u%dI", w);
1406
1407 if (M)
1408 sprintf(u_results->header[offset + off_M], "u%dM", w);
1409
1410 if (p_c != NULL) {
1411 for (i = 1; i <= len; i++) {
1412 /* for each position */
1413 /* w goes form j to i (intervall end at i) */
1414 for (j = i; j < MIN2((i + w), len + 1); j++) {
1415 /* for each -u value < w
1416 * this is not necessay ->
1417 * calculate j from i and w
1418 * : (j-i+1) == w */
1419 double blubb;
1420 /* if (j-i+1) == w we have the -u = w value wanted */
1421 if ((j - i + 1) == w && i + w - 1 <= len) {
1422 blubb = p_c->H[i][j - i] + p_c->I[i][j - i] + p_c->M[i][j - i] + p_c->E[i][j - i];
1423
1424 /* printf("len %d blubb %.3f \n",len, blubb); */
1425 if (S)
1426 u_results->u_values[offset + off_S][i + w - 1] += blubb;
1427
1428 if (E)
1429 u_results->u_values[offset + off_E][i + w - 1] += p_c->E[i][j - i];
1430
1431 if (H)
1432 u_results->u_values[offset + off_H][i + w - 1] += p_c->H[i][j - i];
1433
1434 if (I)
1435 u_results->u_values[offset + off_I][i + w - 1] += p_c->I[i][j - i];
1436
1437 if (M)
1438 u_results->u_values[offset + off_M][i + w - 1] += p_c->M[i][j - i];
1439 }
1440
1441 if (i < w && (j - i + 1) != w && i + w - 1 > len && i + w - 1 < len + 3) {
1442 if (S)
1443 u_results->u_values[offset + off_S][i + w - 1] = -1;
1444
1445 if (E)
1446 u_results->u_values[offset + off_E][i + w - 1] = -1;
1447
1448 if (H)
1449 u_results->u_values[offset + off_H][i + w - 1] = -1;
1450
1451 if (I)
1452 u_results->u_values[offset + off_I][i + w - 1] = -1;
1453
1454 if (M)
1455 u_results->u_values[offset + off_M][i + w - 1] = -1;
1456 }
1457 }
1458 }
1459 } else {
1460 return NULL; /* error */
1461 }
1462
1463 count++;
1464 } while (++l <= unpaired_values[k][1]);
1465 }
1466 return u_results; /*success*/
1467 }
1468
1469
1470 /* plot the results structure */
1471 /* when plotting the results for the target seq we add a header */
1472 /* when plotting the results for the interaction partner u want no header,
1473 * set s1 to NULL to avoid plotting the header */
1474 /* currently we plot the free energies to a file: the probability of
1475 * being unpaired for region [i,j], p_u[i,j], is related to the free
1476 * energy to open region [i,j], dG_u[i,j] by:
1477 * dG_u[i,j] = -log(p_u[i,j])*(temperature+K0)*GASCONST/1000.0; */
1478 PUBLIC int
plot_free_pu_out(pu_out * res,interact * pint,char * ofile,char * head)1479 plot_free_pu_out(pu_out *res,
1480 interact *pint,
1481 char *ofile,
1482 char *head)
1483 {
1484 int size, s, i, len;
1485 double dG_u;
1486 char nan[4], *time, dg[11];
1487 FILE *wastl;
1488 double kT = Pf->kT;
1489
1490 wastl = fopen(ofile, "a");
1491 if (wastl == NULL) {
1492 vrna_message_warning("p_cont: can't open %s for Up_plot", ofile);
1493 return 0;
1494 }
1495
1496 sprintf(dg, "dG");
1497
1498 /* printf("T=%.16f \n(temperature+K0)*GASCONST/1000.0 = %.16f\n",temperature,(temperature+K0)*GASCONST/1000.0); */
1499
1500 /* write the header of the output file: */
1501 /* # timestamp commandlineaufruf */
1502 /* # length and name of first sequence (target) */
1503 /* # first seq */
1504 /* # length and name of second sequence (interaction partner) */
1505 /* # second seq */
1506 /* the next line is the output for the target: colums
1507 * position in target | dG_unpaired values for target | interaction energy */
1508 /* # pos u1S u1H dg */
1509 /* values for target */
1510 /* if -b was choosen: the next lines are the dG_unpaired values for
1511 * the interaction partner */
1512 /* # pos u1S u1H */
1513 /* values for the interaction partner */
1514
1515 /* print header, if nh is zerro */
1516 if (head) {
1517 time = vrna_time_stamp();
1518 fprintf(wastl, "# %s\n", time);
1519 fprintf(wastl, "%s\n", head);
1520 }
1521
1522 fprintf(wastl, "# ");
1523 /* } else { fprintf(wastl," "); } close if before */
1524 len = res->len;
1525 size = res->u_vals * res->contribs;
1526
1527 sprintf(nan, "NA");
1528 nan[2] = '\0';
1529
1530 for (i = 0; i <= len; i++) {
1531 for (s = 0; s <= size + 1; s++) {
1532 /* that is for different contribution */
1533 if (i == 0 && s > size && pint != NULL)
1534 fprintf(wastl, "%8s ", dg);
1535
1536 if (i != 0) {
1537 if (s > 0 && s <= size) {
1538 if (res->u_values[s][i] > 0.0) {
1539 dG_u = -log(res->u_values[s][i]) * kT / 1000.0;
1540 fprintf(wastl, "%8.3f ", dG_u);
1541 } else {
1542 /* no p_u value was defined print nan*/
1543 fprintf(wastl, "%8s ", nan);
1544 }
1545 } else if (s > size && pint != NULL) {
1546 fprintf(wastl, "%8.3f ", pint->Gi[i]);
1547 } else if (s == 0) {
1548 fprintf(wastl, "%8.0f ", res->u_values[s][i]);
1549 }
1550 } else {
1551 if (s > 1)
1552 fprintf(wastl, "%8s ", res->header[s]);
1553 else
1554 fprintf(wastl, "%7s ", res->header[s]);
1555 }
1556 }
1557 fprintf(wastl, "\n");
1558 }
1559 fclose(wastl);
1560 /*free pu_out* res */
1561 if (res != NULL) {
1562 for (i = 0; i <= (size + 2); i++) {
1563 free(res->u_values[i]);
1564 free(res->header[i]);
1565 }
1566 free(res->u_values);
1567 free(res->header);
1568 free(res);
1569 res = NULL;
1570 }
1571
1572 return 1; /* success */
1573 }
1574
1575
1576 PUBLIC int
Up_plot(pu_contrib * p_c,pu_contrib * p_c_sh,interact * pint,char * ofile,int ** unpaired_values,char * select_contrib,char * head,unsigned int mode)1577 Up_plot(pu_contrib *p_c,
1578 pu_contrib *p_c_sh,
1579 interact *pint,
1580 char *ofile,
1581 int **unpaired_values,
1582 char *select_contrib,
1583 char *head,
1584 unsigned int mode)
1585 {
1586 pu_out *dada;
1587 int ret;
1588
1589 /* check what case we have */
1590
1591 /* upmode = 1 only one seq */
1592 /* if(p_c != NULL && pint == NULL) { */
1593 if (mode & RNA_UP_MODE_1) {
1594 dada = get_u_vals(p_c, unpaired_values, select_contrib);
1595 ret = plot_free_pu_out(dada, NULL, ofile, head);
1596
1597 /* upmode > 1 cofolding */
1598 /* } else if (p_c != NULL && pint != NULL) { */
1599 } else if (mode & RNA_UP_MODE_2) {
1600 dada = get_u_vals(p_c, unpaired_values, select_contrib);
1601 ret = plot_free_pu_out(dada, pint, ofile, head);
1602
1603 /* upmode = 3 cofolding*/
1604 /* } else if (p_c == NULL && p_c_sh != NULL) { */
1605 }
1606
1607 if (mode & RNA_UP_MODE_3) {
1608 dada = get_u_vals(p_c, unpaired_values, select_contrib);
1609 ret = plot_free_pu_out(dada, pint, ofile, head);
1610
1611 dada = get_u_vals(p_c_sh, unpaired_values, select_contrib);
1612 ret = plot_free_pu_out(dada, NULL, ofile, NULL);
1613 }
1614
1615 return ret;
1616 }
1617
1618
1619 /*-------------------------------------------------------------------------*/
1620 /* copy from part_func_co.c */
1621 PRIVATE constrain *
get_ptypes_up(char * Seq,const char * structure)1622 get_ptypes_up(char *Seq,
1623 const char *structure)
1624 {
1625 int n, i, j, k, l, length;
1626 constrain *con;
1627 short *s, *s1;
1628
1629 length = strlen(Seq);
1630 make_pair_matrix();
1631 con = (constrain *)vrna_alloc(sizeof(constrain));
1632 con->indx = (int *)vrna_alloc(sizeof(int) * (length + 1));
1633 for (i = 1; i <= length; i++)
1634 con->indx[i] = ((length + 1 - i) * (length - i)) / 2 + length + 1;
1635 con->ptype = (char *)vrna_alloc(sizeof(char) * ((length + 1) * (length + 2) / 2));
1636
1637 set_encoded_seq((const char *)Seq, &s, &s1);
1638
1639 n = s[0];
1640 for (k = 1; k <= n - CO_TURN - 1; k++)
1641 for (l = 1; l <= 2; l++) {
1642 int type, ntype = 0, otype = 0;
1643 i = k;
1644 j = i + CO_TURN + l;
1645 if (j > n)
1646 continue;
1647
1648 type = pair[s[i]][s[j]];
1649 while ((i >= 1) && (j <= n)) {
1650 if ((i > 1) && (j < n))
1651 ntype = pair[s[i - 1]][s[j + 1]];
1652
1653 if (noLonelyPairs && (!otype) && (!ntype))
1654 type = 0; /* i.j can only form isolated pairs */
1655
1656 con->ptype[con->indx[i] - j] = (char)type;
1657 otype = type;
1658 type = ntype;
1659 i--;
1660 j++;
1661 }
1662 }
1663
1664 if (fold_constrained && (structure != NULL)) {
1665 int hx, *stack;
1666 char type;
1667 stack = (int *)vrna_alloc(sizeof(int) * (n + 1));
1668 for (hx = 0, j = 1; j <= n; j++) {
1669 switch (structure[j - 1]) {
1670 case 'x': /* can't pair */
1671 for (l = 1; l < j - CO_TURN; l++)
1672 con->ptype[con->indx[l] - j] = 0;
1673 for (l = j + CO_TURN + 1; l <= n; l++)
1674 con->ptype[con->indx[j] - l] = 0;
1675 break;
1676 case '(':
1677 stack[hx++] = j;
1678 /* fallthrough */
1679 case '<': /* pairs upstream */
1680 break;
1681 case ')':
1682 if (hx <= 0)
1683 vrna_message_error("1. unbalanced brackets in constraints\n%s", structure);
1684
1685 i = stack[--hx];
1686 type = con->ptype[con->indx[i] - j];
1687 /* don't allow pairs i<k<j<l */
1688 for (k = i; k <= j; k++)
1689 for (l = j; l <= n; l++)
1690 con->ptype[con->indx[k] - l] = 0;
1691 /* don't allow pairs k<i<l<j */
1692 for (k = 1; k <= i; k++)
1693 for (l = i; l <= j; l++)
1694 con->ptype[con->indx[k] - l] = 0;
1695 con->ptype[con->indx[i] - j] = (type == 0) ? 7 : type;
1696 case '>': /* pairs downstream */
1697 break;
1698 }
1699 }
1700 if (hx != 0)
1701 vrna_message_error("2. unbalanced brackets in constraint string\n%s", structure);
1702
1703 free(stack);
1704 }
1705
1706 free(s);
1707 free(s1);
1708 return con;
1709 }
1710
1711
1712 PRIVATE void
set_encoded_seq(const char * sequence,short ** S,short ** S1)1713 set_encoded_seq(const char *sequence,
1714 short **S,
1715 short **S1)
1716 {
1717 unsigned int i, l;
1718
1719 l = strlen(sequence);
1720 if (S != NULL) {
1721 *S = (short *)vrna_alloc(sizeof(short) * (l + 2));
1722 for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
1723 (*S)[i] = (short)encode_char(toupper(sequence[i - 1]));
1724 (*S)[l + 1] = (*S)[1];
1725 (*S)[0] = (short)l;
1726 }
1727
1728 /* S1 exists only for the special X K and I bases and energy_set!=0 */
1729 if (S1 != NULL) {
1730 *S1 = (short *)vrna_alloc(sizeof(short) * (l + 2));
1731 for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
1732 (*S1)[i] = alias[(short)encode_char(toupper(sequence[i - 1]))]; /* for mismatches of nostandard bases */
1733 /* for circular folding add first base at position n+1 and last base at position 0 in S1 */
1734 (*S1)[l + 1] = (*S1)[1];
1735 (*S1)[0] = (*S1)[l];
1736 }
1737 }
1738