1 /*
2 * compute the duplex structure of two RNA strands,
3 * allowing only inter-strand base pairs.
4 * see cofold() for computing hybrid structures without
5 * restriction.
6 * Ivo Hofacker
7 * Vienna RNA package
8 *
9 */
10
11
12 /*
13 * library containing the function used in rnaplex
14 * the program rnaplex uses the following function
15 * Lduplexfold: finds high scoring segments
16 * it stores the end-position of these segments in an array
17 * and call then for each of these positions the duplexfold function
18 * which allows one to make backtracking for each of the high scoring position
19 * It allows one to find suboptimal partially overlapping (depends on a a parameter)
20 * duplexes between a long RNA and a shorter one.
21 * Contrarly to RNAduplex, the energy model is not in E~log(N),
22 * where N is the length of an interial loop but used an affine model,
23 * where the extension and begin parameter are fitted to the energy
24 * parameter used by RNAduplex. This allows one to check for duplex between a short RNA(20nt)
25 * and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
26 * in about 50 minutes.
27 * The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
28 * given threshold this value is stored in an array. When the alignment score goes
29 * then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
30 * to find all non-overlapping high-scoring segments.
31 * For more information check "durbin, biological sequence analysis"
32 */
33
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <math.h>
41 #include <ctype.h>
42 #include <string.h>
43 #include "ViennaRNA/utils/basic.h"
44 #include "ViennaRNA/params/default.h"
45 #include "ViennaRNA/fold_vars.h"
46 #include "ViennaRNA/fold.h"
47 #include "ViennaRNA/pair_mat.h"
48 #include "ViennaRNA/params/basic.h"
49 #include "ViennaRNA/plex.h"
50 #include "ViennaRNA/ali_plex.h"
51 #include "ViennaRNA/loops/all.h"
52
53 /* int subopt_sorted=0; */
54
55 #define PUBLIC
56 #define PRIVATE static
57
58 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
59 #define NEW_NINIO 1 /* new asymetry penalty */
60 #define ARRAY 32 /*array size*/
61 #define UNIT 100
62 #define MINPSCORE -2 * UNIT
63 PRIVATE void
64 encode_seqs(const char *s1,
65 const char *s2);
66
67
68 PRIVATE short *
69 encode_seq(const char *seq);
70
71
72 /* PRIVATE void my_encode_seq(const char *s1, const char *s2); */
73 PRIVATE void
74 update_dfold_params(void);
75
76
77 /* PRIVATE int compare(const void *sub1, const void *sub2); */
78 /* PRIVATE int compare_XS(const void *sub1, const void *sub2); */
79 /* PRIVATE duplexT* backtrack(int threshold, const int extension_cost); */
80 /* static void print_struct(duplexT const *dup); */
81
82 /* PRIVATE int print_struct(duplexT const *dup); */
83 /* PRIVATE int get_rescaled_energy(duplexT const *dup); */
84
85 PRIVATE char *
86 backtrack_C(int i,
87 int j,
88 const int extension_cost,
89 const char *structure,
90 int *E);
91
92
93 PRIVATE void
94 find_max_C(const int *position,
95 const int *position_j,
96 const int delta,
97 const int threshold,
98 const int constthreshold,
99 const int length,
100 const char *s1,
101 const char *s2,
102 const int extension_cost,
103 const int fast,
104 const char *structure);
105
106
107 PRIVATE void
108 plot_max_C(const int max,
109 const int max_pos,
110 const int max_pos_j,
111 const int alignment_length,
112 const char *s1,
113 const char *s2,
114 const int extension_cost,
115 const int fast,
116 const char *structure);
117
118
119 PRIVATE char *
120 backtrack_CXS(int i,
121 int j,
122 const int **access_s1,
123 const int **access_s2,
124 const char *structure,
125 int *E);
126
127
128 PRIVATE void
129 find_max_CXS(const int *position,
130 const int *position_j,
131 const int delta,
132 const int threshold,
133 const int constthreshold,
134 const int alignment_length,
135 const char *s1,
136 const char *s2,
137 const int **access_s1,
138 const int **access_s2,
139 const int fast,
140 const char *structure);
141
142
143 PRIVATE void
144 plot_max_CXS(const int max,
145 const int max_pos,
146 const int max_pos_j,
147 const int alignment_length,
148 const char *s1,
149 const char *s2,
150 const int **access_s1,
151 const int **access_s2,
152 const int fast,
153 const char *structure);
154
155
156 PRIVATE duplexT
157 duplexfold_C(const char *s1,
158 const char *s2,
159 const int extension_cost,
160 const char *structure);
161
162
163 PRIVATE duplexT
164 duplexfold_CXS(const char *s1,
165 const char *s2,
166 const int **access_s1,
167 const int **access_s2,
168 const int i_pos,
169 const int j_pos,
170 const int threshold,
171 const char *structure);
172
173
174 /*@unused@*/
175
176 #define MAXSECTORS 500 /* dimension for a backtrack array */
177 #define LOCALITY 0. /* locality parameter for base-pairs */
178
179 #define MIN2(A, B) ((A) < (B) ? (A) : (B))
180 #define MAX2(A, B) ((A) > (B) ? (A) : (B))
181
182 PRIVATE vrna_param_t *P = NULL;
183 PRIVATE int **c = NULL;/*, **in, **bx, **by;*/ /* energy array used in duplexfold */
184 /* PRIVATE int ****c_XS; */
185 PRIVATE int **lc = NULL, **lin = NULL, **lbx = NULL, **lby = NULL, **linx = NULL,
186 **liny = NULL; /* energy array used in Lduplexfold
187 * this arrays contains only 3 columns
188 * In this way I reduce my memory use and
189 * I can make most of my computation and
190 * accession in the computer cash
191 * which is the main performance boost*/
192
193
194 /*PRIVATE int last_cell; this variable is the last_cell containing
195 * the information about the alignment
196 * useful only if there is an alignment
197 * which extends till the last nucleotide of
198 * the long sequence*/
199
200 PRIVATE short *S1 = NULL, *SS1 = NULL, *S2 = NULL, *SS2 = NULL; /*contains the sequences*/
201 PRIVATE int n1, n2; /* sequence lengths */
202 PRIVATE int n3, n4; /*sequence length for the duplex*/;
203 PRIVATE int delay_free = 0;
204
205
206 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
207
208 PRIVATE duplexT
duplexfold_CXS(const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int i_pos,const int j_pos,const int threshold,const char * structure)209 duplexfold_CXS(const char *s1,
210 const char *s2,
211 const int **access_s1,
212 const int **access_s2,
213 const int i_pos,
214 const int j_pos,
215 const int threshold,
216 const char *structure)
217 {
218 int i, j, p, q, Emin = INF, l_min = 0, k_min = 0;
219 char *struc;
220
221 struc = NULL;
222 duplexT mfe;
223 vrna_md_t md;
224 int bonus = -10000;
225 n3 = (int)strlen(s1);
226 n4 = (int)strlen(s2);
227
228 int *previous_const;
229 previous_const = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
230 j = 0;
231 previous_const[j] = 1;
232 int prev_temp = 1;
233 while (j++ < n4) {
234 if (structure[j - 1] == '|') {
235 previous_const[j] = prev_temp;
236 prev_temp = j;
237 } else {
238 previous_const[j] = prev_temp;
239 }
240 }
241
242 set_model_details(&md);
243
244 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
245 update_fold_params();
246 if (P)
247 free(P);
248
249 P = vrna_params(&md);
250 make_pair_matrix();
251 }
252
253 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
254 for (i = 0; i <= n3; i++)
255 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
256 for (i = 0; i <= n3; i++)
257 for (j = 0; j <= n4; j++)
258 c[i][j] = INF;
259 encode_seqs(s1, s2);
260 int type, type2, type3, E, k, l;
261 i = n3 - 1;
262 j = 2;
263 type = pair[S1[i]][S2[j]];
264 if (!type) {
265 printf("Error during initialization of the duplex in duplexfold_XS\n");
266 mfe.structure = NULL;
267 mfe.energy = INF;
268 return mfe;
269 }
270
271 c[i][j] = P->DuplexInit + (structure[j - 1] == '|' ? bonus : 0); /* check if first pair is constrained */
272 if (!(structure[j - 2] == '|'))
273 c[i][j] += P->mismatchExt[rtype[type]][SS2[j - 1]][SS1[i + 1]];
274 else
275 c[i][j] += P->dangle3[rtype[type]][SS1[i + 1]];
276
277 if (type > 2)
278 c[i][j] += P->TerminalAU;
279
280 for (k = i - 1; k > 0; k--) {
281 c[k + 1][0] = INF;
282 for (l = j + 1; l <= n4; l++) {
283 c[k][l] = INF;
284 int bonus_2 = (structure[l - 1] == '|' ? bonus : 0); /* check if position is constrained and prepare bonus accordingly */
285 type2 = pair[S1[k]][S2[l]];
286 if (!type2)
287 continue;
288
289 for (p = k + 1; p < n3 && p < k + MAXLOOP - 1; p++) {
290 for (q = l - 1; q >= previous_const[l] && q > 1; q--) {
291 if (p - k + l - q - 2 > MAXLOOP)
292 break;
293
294 type3 = pair[S1[p]][S2[q]];
295 if (!type3)
296 continue;
297
298 E = E_IntLoop(p - k - 1,
299 l - q - 1,
300 type2,
301 rtype[type3],
302 SS1[k + 1],
303 SS2[l - 1],
304 SS1[p - 1],
305 SS2[q + 1],
306 P) + bonus_2;
307 c[k][l] = MIN2(c[k][l], c[p][q] + E);
308 }
309 }
310 E = c[k][l];
311 if (type2 > 2)
312 E += P->TerminalAU;
313
314 E += access_s1[i - k + 1][i_pos] + access_s2[l - 1][j_pos + (l - 1) - 1];
315 if (k > 1 && l < n4 && !(structure[l] == '|'))
316 E += P->mismatchExt[type2][SS1[k - 1]][SS2[l + 1]];
317 else if (k > 1)
318 E += P->dangle5[type2][SS1[k - 1]];
319 else if (l < n4 && !(structure[l] == '|'))
320 E += P->dangle3[type2][SS2[l + 1]];
321
322 if (E < Emin) {
323 Emin = E;
324 k_min = k;
325 l_min = l;
326 }
327 }
328 }
329 free(previous_const);
330 if (Emin > threshold) {
331 mfe.energy = INF;
332 mfe.ddG = INF;
333 mfe.structure = NULL;
334 for (i = 0; i <= n3; i++)
335 free(c[i]);
336 free(c);
337 free(S1);
338 free(S2);
339 free(SS1);
340 free(SS2);
341 return mfe;
342 } else {
343 struc = backtrack_CXS(k_min, l_min, access_s1, access_s2, structure, &Emin);
344 }
345
346 /* lets take care of the dangles */
347 /* find best combination */
348 int dx_5, dx_3, dy_5, dy_3, dGx, dGy, bonus_x;
349 dx_5 = 0;
350 dx_3 = 0;
351 dy_5 = 0;
352 dy_3 = 0;
353 dGx = 0;
354 dGy = 0;
355 bonus_x = 0;
356 dGx = access_s1[i - k_min + 1][i_pos];
357 dx_3 = 0;
358 dx_5 = 0;
359 bonus_x = 0;
360 dGy = access_s2[l_min - j + 1][j_pos + (l_min - 1) - 1];
361 mfe.tb = i_pos - 9 - i + k_min - 1 - dx_5;
362 mfe.te = i_pos - 9 - 1 + dx_3;
363 mfe.qb = j_pos - 9 - 1 - dy_5;
364 mfe.qe = j_pos + l_min - 3 - 9 + dy_3;
365 mfe.ddG = (double)Emin * 0.01;
366 mfe.dG1 = (double)dGx * 0.01;
367 mfe.dG2 = (double)dGy * 0.01;
368 /* mfe.energy += bonus_y + bonus_x; */
369 mfe.energy = mfe.ddG - mfe.dG1 - mfe.dG2;
370
371 mfe.structure = struc;
372 for (i = 0; i <= n3; i++)
373 free(c[i]);
374 free(c);
375 free(S1);
376 free(S2);
377 free(SS1);
378 free(SS2);
379 return mfe;
380 }
381
382
383 PRIVATE char *
backtrack_CXS(int i,int j,const int ** access_s1,const int ** access_s2,const char * structure,int * Emin)384 backtrack_CXS(int i,
385 int j,
386 const int **access_s1,
387 const int **access_s2,
388 const char *structure,
389 int *Emin)
390 {
391 /* backtrack structure going backwards from i, and forwards from j
392 * return structure in bracket notation with & as separator */
393 int k, l, type, type2, E, traced, i0, j0;
394 char *st1, *st2, *struc;
395 int *previous_const;
396 int bonus = -10000;
397
398 previous_const = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
399 int j_temp = 0;
400 previous_const[j_temp] = 1;
401 int prev_temp = 1;
402 while (j_temp++ < n4) {
403 if (structure[j_temp - 1] == '|') {
404 previous_const[j_temp] = prev_temp;
405 prev_temp = j_temp;
406 } else {
407 previous_const[j_temp] = prev_temp;
408 }
409 }
410 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
411 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
412 i0 = i; /*MAX2(i-1,1);*/ j0 = j;/*MIN2(j+1,n4);*/
413 while (i <= n3 - 1 && j >= 2) {
414 int bonus_2 = (structure[j - 1] == '|' ? bonus : 0);
415 E = c[i][j];
416 traced = 0;
417 st1[i - 1] = '(';
418 st2[j - 1] = ')';
419 type = pair[S1[i]][S2[j]];
420 if (!type)
421 vrna_message_error("backtrack failed in fold duplex bli");
422
423 for (k = i + 1; k <= n3 && k > i - MAXLOOP - 2; k++) {
424 for (l = j - 1; l >= previous_const[j] && l >= 1; l--) {
425 int LE;
426 if (i - k + l - j - 2 > MAXLOOP)
427 break;
428
429 type2 = pair[S1[k]][S2[l]];
430 if (!type2)
431 continue;
432
433 LE = E_IntLoop(k - i - 1,
434 j - l - 1,
435 type,
436 rtype[type2],
437 SS1[i + 1],
438 SS2[j - 1],
439 SS1[k - 1],
440 SS2[l + 1],
441 P) + bonus_2;
442 if (E == c[k][l] + LE) {
443 *Emin -= bonus_2;
444 traced = 1;
445 i = k;
446 j = l;
447 break;
448 }
449 }
450 if (traced)
451 break;
452 }
453 if (!traced) {
454 if (i < n3 && j > 1 && !(structure[j - 2] == '|'))
455 E -= P->mismatchExt[rtype[type]][SS2[j - 1]][SS1[i + 1]];
456 else if (i < n3)
457 E -= P->dangle3[rtype[type]][SS1[i + 1]]; /* +access_s1[1][i+1]; */
458 else if (j > 1)
459 E -= (!(structure[j - 2] == '|') ? P->dangle5[rtype[type]][SS2[j - 1]] : 0); /* +access_s2[1][j+1]; */
460
461 if (type > 2)
462 E -= P->TerminalAU;
463
464 /* break; */
465 if (E != P->DuplexInit + bonus_2) {
466 vrna_message_error("backtrack failed in fold duplex bal");
467 } else {
468 *Emin -= bonus_2;
469 break;
470 }
471 }
472 }
473 /* if (i<n3) i++; */
474 /* if (j>1) j--; */
475 struc = (char *)vrna_alloc(i - i0 + 1 + j0 - j + 1 + 2);
476 for (k = MAX2(i0, 1); k <= i; k++)
477 if (!st1[k - 1])
478 st1[k - 1] = '.';
479
480 for (k = j; k <= j0; k++)
481 if (!st2[k - 1])
482 st2[k - 1] = '.';
483
484 strcpy(struc, st1 + MAX2(i0 - 1, 0));
485 strcat(struc, "&");
486 strcat(struc, st2 + j - 1);
487 free(st1);
488 free(st2);
489 free(previous_const);
490 return struc;
491 }
492
493
494 duplexT **
Lduplexfold_CXS(const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int threshold,const int alignment_length,const int delta,const int fast,const char * structure,const int il_a,const int il_b,const int b_a,const int b_b)495 Lduplexfold_CXS(const char *s1,
496 const char *s2,
497 const int **access_s1,
498 const int **access_s2,
499 const int threshold,
500 const int alignment_length,
501 const int delta,
502 const int fast,
503 const char *structure,
504 const int il_a,
505 const int il_b,
506 const int b_a,
507 const int b_b) /* , const int target_dead, const int query_dead) */
508 {
509 int i, j;
510 int bopen = b_b;
511 int bext = b_a;
512 int iopen = il_b;
513 int iext_s = 2 * il_a; /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
514 int iext_ass = 50 + il_a; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
515 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
516 int i_length;
517 int max_pos; /* get position of the best hit */
518 int max_pos_j;
519 /* int temp; */
520 int min_j_colonne;
521 int max = INF;
522 int bonus = -10000;
523 int constthreshold = 0; /* minimal threshold corresponding to a structure complying to all constraints */
524 int maxPenalty[4];
525 vrna_md_t md;
526
527 i = 0;
528 while (structure[i] != '\0') {
529 if (structure[i] == '|')
530 constthreshold += bonus;
531
532 i++;
533 }
534 int *position; /* contains the position of the hits with energy > E */
535 int *position_j;
536 n1 = (int)strlen(s1);
537 n2 = (int)strlen(s2);
538 position = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
539 position_j = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
540
541 set_model_details(&md);
542
543 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
544 update_dfold_params();
545 if (P)
546 free(P);
547
548 P = vrna_params(&md);
549 make_pair_matrix();
550 }
551
552 encode_seqs(s1, s2);
553
554 maxPenalty[0] = (int)-1 * P->stack[2][2] / 2;
555 maxPenalty[1] = (int)-1 * P->stack[2][2];
556 maxPenalty[2] = (int)-3 * P->stack[2][2] / 2;
557 maxPenalty[3] = (int)-2 * P->stack[2][2];
558
559 lc = (int **)vrna_alloc(sizeof(int *) * 5);
560 lin = (int **)vrna_alloc(sizeof(int *) * 5);
561 lbx = (int **)vrna_alloc(sizeof(int *) * 5);
562 lby = (int **)vrna_alloc(sizeof(int *) * 5);
563 linx = (int **)vrna_alloc(sizeof(int *) * 5);
564 liny = (int **)vrna_alloc(sizeof(int *) * 5);
565
566 for (i = 0; i <= 4; i++) {
567 lc[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
568 lin[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
569 lbx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
570 lby[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
571 linx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
572 liny[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
573 }
574 for (j = n2; j >= 0; j--) {
575 lbx[0][j] = lbx[1][j] = lbx[2][j] = lbx[3][j] = lbx[4][j] = INF;
576 lin[0][j] = lin[1][j] = lin[2][j] = lin[3][j] = lin[4][j] = INF;
577 lc[0][j] = lc[1][j] = lc[2][j] = lc[3][j] = lc[4][j] = INF;
578 lby[0][j] = lby[1][j] = lby[2][j] = lby[3][j] = lby[4][j] = INF;
579 liny[0][j] = liny[1][j] = liny[2][j] = liny[3][j] = liny[4][j] = INF;
580 linx[0][j] = linx[1][j] = linx[2][j] = linx[3][j] = linx[4][j] = INF;
581 }
582
583 i = 10 /*target_dead*/; /* start from 2 ( i=4) because no structure allowed to begin with a single base pair */
584 i_length = n1 - 9 /*- target_dead*/;
585 while (i < i_length) {
586 int idx = i % 5;
587 int idx_1 = (i - 1) % 5;
588 int idx_2 = (i - 2) % 5;
589 int idx_3 = (i - 3) % 5;
590 int idx_4 = (i - 4) % 5;
591 int di1, di2, di3, di4;
592 di1 = access_s1[5][i] - access_s1[4][i - 1];
593 di2 = access_s1[5][i - 1] - access_s1[4][i - 2] + di1;
594 di3 = access_s1[5][i - 2] - access_s1[4][i - 3] + di2;
595 di4 = access_s1[5][i - 3] - access_s1[4][i - 4] + di3;
596 di1 = MIN2(di1, maxPenalty[0]);
597 di2 = MIN2(di2, maxPenalty[1]);
598 di3 = MIN2(di3, maxPenalty[2]);
599 di4 = MIN2(di4, maxPenalty[3]);
600 j = n2 - 9 /*- query_dead*/; /* start from n2-1 because no structure allow to begin with a single base pair */
601 while (--j > 9 /*query_dead - 1*/) {
602 /* ----------------------------------------------------------update lin lbx lby matrix */
603 int bonus_2 = (structure[j - 1] == '|' ? bonus : 0);
604 int dj1, dj2, dj3, dj4;
605 dj1 = access_s2[5][j + 4] - access_s2[4][j + 4];
606 dj2 = access_s2[5][j + 5] - access_s2[4][j + 5] + dj1;
607 dj3 = access_s2[5][j + 6] - access_s2[4][j + 6] + dj2;
608 dj4 = access_s2[5][j + 7] - access_s2[4][j + 7] + dj3;
609 dj1 = MIN2(dj1, maxPenalty[0]);
610 dj2 = MIN2(dj2, maxPenalty[1]);
611 dj3 = MIN2(dj3, maxPenalty[2]);
612 dj4 = MIN2(dj4, maxPenalty[3]);
613 int type2, type, temp;
614 type = pair[S1[i]][S2[j]];
615 lc[idx][j] = type ? P->DuplexInit + bonus_2 : INF;
616 if (!bonus_2) {
617 type2 = pair[S2[j + 1]][S1[i - 1]];
618 lin[idx][j] = MIN2(
619 lc[idx_1][j + 1] + P->mismatchI[type2][SS2[j]][SS1[i]] + di1 + dj1 + iopen + iext_s,
620 lin[idx_1][j] + iext_ass + di1);
621 lin[idx][j] = MIN2(lin[idx][j], lin[idx][j + 1] + iext_ass + dj1);
622 lin[idx][j] = MIN2(lin[idx][j], lin[idx_1][j + 1] + iext_s + di1 + dj1);
623 linx[idx][j] = MIN2(
624 lc[idx_1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + di1 + dj1 + iopen + iext_s,
625 linx[idx_1][j] + iext_ass + di1);
626 liny[idx][j] = MIN2(
627 lc[idx_1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + di1 + dj1 + iopen + iext_s,
628 liny[idx][j + 1] + iext_ass + dj1);
629 type2 = pair[S2[j + 1]][S1[i]];
630 lby[idx][j] = MIN2(lby[idx][j + 1] + bext + dj1,
631 lc[idx][j + 1] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + dj1);
632 } else {
633 lin[idx][j] = lby[idx][j] = linx[idx][j] = liny[idx][j] = INF; /* all loop containing "|" are rejected */
634 }
635
636 type2 = pair[S2[j]][S1[i - 1]];
637 lbx[idx][j] =
638 MIN2(lbx[idx_1][j] + bext + di1,
639 lc[idx_1][j] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + di1);
640 /* --------------------------------------------------------------- end update recursion */
641 if (!type)
642 continue;
643
644 if (!(structure[j] == '|'))
645 lc[idx][j] += P->mismatchExt[type][SS1[i - 1]][SS2[j + 1]];
646 else
647 lc[idx][j] += P->dangle5[type][SS1[i - 1]];
648
649 lc[idx][j] += (type > 2 ? P->TerminalAU : 0);
650 /* type > 2 -> no GC or CG pair */
651 /* ------------------------------------------------------------------update c matrix */
652 /* Be careful, no lc may come from a region where a "|" is in a loop, avoided in lin = lby = INF ... jedoch fuer klein loops muss man aufpassen .. */
653 if ((type2 = pair[S1[i - 1]][S2[j + 1]])) {
654 lc[idx][j] =
655 MIN2(lc[idx_1][j + 1] +
656 E_IntLoop(0, 0, type2, rtype[type], SS1[i], SS2[j], SS1[i - 1], SS2[j + 1],
657 P) + di1 + dj1,
658 lc[idx][j]); /* 0x0+1x1 */
659 }
660
661 if ((type2 = pair[S1[i - 2]][S2[j + 1]])) {
662 lc[idx][j] =
663 MIN2(lc[idx_2][j + 1] +
664 E_IntLoop(1, 0, type2, rtype[type], SS1[i - 1], SS2[j], SS1[i - 1], SS2[j + 1],
665 P) + di2 + dj1,
666 lc[idx][j]); /* 0x1 +1x1 */
667 }
668
669 /* kleine loops checks wird in den folgenden if test gemacht. */
670 if (!(structure[j] == '|')) {
671 if ((type2 = pair[S1[i - 1]][S2[j + 2]])) {
672 lc[idx][j] =
673 MIN2(lc[idx_1][j + 2] +
674 E_IntLoop(0, 1, type2, rtype[type], SS1[i], SS2[j + 1], SS1[i - 1], SS2[j + 1],
675 P) + di1 + dj2,
676 lc[idx][j]); /* 1x0 + 1x1 */
677 }
678
679 if ((type2 = pair[S1[i - 2]][S2[j + 2]])) {
680 lc[idx][j] =
681 MIN2(lc[idx_2][j + 2] +
682 E_IntLoop(1, 1, type2, rtype[type], SS1[i - 1], SS2[j + 1], SS1[i - 1], SS2[j + 1],
683 P) + di2 + dj2,
684 lc[idx][j]); /* 1x1 +1x1 */
685 }
686
687 if ((type2 = pair[S1[i - 3]][S2[j + 2]])) {
688 lc[idx][j] =
689 MIN2(lc[idx_3][j + 2] +
690 E_IntLoop(2, 1, type2, rtype[type], SS1[i - 2], SS2[j + 1], SS1[i - 1], SS2[j + 1],
691 P) + di3 + dj2,
692 lc[idx][j]); /* 2x1 +1x1 */
693 }
694
695 if (!(structure[j + 1] == '|')) {
696 if ((type2 = pair[S1[i - 3]][S2[j + 3]])) {
697 lc[idx][j] =
698 MIN2(lc[idx_3][j + 3] +
699 E_IntLoop(2, 2, type2, rtype[type], SS1[i - 2], SS2[j + 2], SS1[i - 1],
700 SS2[j + 1],
701 P) + di3 + dj3,
702 lc[idx][j]); /* 2x2 + 1x1 */
703 }
704
705 if ((type2 = pair[S1[i - 2]][S2[j + 3]])) {
706 lc[idx][j] =
707 MIN2(lc[idx_2][j + 3] +
708 E_IntLoop(1, 2, type2, rtype[type], SS1[i - 1], SS2[j + 2], SS1[i - 1],
709 SS2[j + 1],
710 P) + di2 + dj3,
711 lc[idx][j]); /* 1x2 +1x1 */
712 }
713
714 if ((type2 = pair[S1[i - 4]][S2[j + 3]])) {
715 lc[idx][j] =
716 MIN2(lc[idx_4][j + 3] +
717 E_IntLoop(3, 2, type2, rtype[type], SS1[i - 3], SS2[j + 2], SS1[i - 1],
718 SS2[j + 1],
719 P) + di4 + dj3,
720 lc[idx][j]);
721 }
722
723 if (!(structure[j + 2] == '|')) {
724 if ((type2 = pair[S1[i - 3]][S2[j + 4]])) {
725 lc[idx][j] =
726 MIN2(lc[idx_3][j + 4] +
727 E_IntLoop(2, 3, type2, rtype[type], SS1[i - 2], SS2[j + 3], SS1[i - 1],
728 SS2[j + 1],
729 P) + di3 + dj4,
730 lc[idx][j]);
731 }
732 }
733 }
734 }
735
736 /* internal->stack */
737 lc[idx][j] = MIN2(
738 lin[idx_3][j + 3] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + di3 + dj3 + 2 * iext_s,
739 lc[idx][j]);
740 lc[idx][j] = MIN2(
741 lin[idx_4][j + 2] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + di4 + dj2,
742 lc[idx][j]);
743 lc[idx][j] = MIN2(
744 lin[idx_2][j + 4] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + di2 + dj4,
745 lc[idx][j]);
746 lc[idx][j] = MIN2(
747 linx[idx_3][j + 1] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + di3 + dj1,
748 lc[idx][j]);
749 lc[idx][j] = MIN2(
750 liny[idx_1][j + 3] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + dj3 + di1,
751 lc[idx][j]);
752 /* bulge -> stack */
753 int bAU;
754 bAU = (type > 2 ? P->TerminalAU : 0);
755 lc[idx][j] = MIN2(lbx[idx_2][j + 1] + di2 + dj1 + bext + bAU, lc[idx][j]);
756 /* min2=by[i][j+1]; */
757 lc[idx][j] = MIN2(lby[idx_1][j + 2] + di1 + dj2 + bext + bAU, lc[idx][j]);
758 lc[idx][j] += bonus_2;
759 /* if(j<=const5end){ */
760 temp = min_colonne;
761 min_colonne = MIN2(lc[idx][j] + (type > 2 ? P->TerminalAU : 0) +
762 (!(structure[j - 2] == '|') ?
763 P->mismatchExt[rtype[type]][SS2[j - 1]][SS1[i +
764 1]] : P->dangle3[rtype[type]][
765 SS1[i + 1]]),
766 min_colonne);
767 if (temp > min_colonne)
768 min_j_colonne = j;
769
770 /* } */
771 /* ---------------------------------------------------------------------end update */
772 }
773 if (max >= min_colonne) {
774 max = min_colonne;
775 max_pos = i;
776 max_pos_j = min_j_colonne;
777 }
778
779 position[i + delta] = min_colonne;
780 min_colonne = INF;
781 position_j[i + delta] = min_j_colonne;
782 i++;
783 }
784 /* printf("MAX :%d ", max); */
785 free(S1);
786 free(S2);
787 free(SS1);
788 free(SS2);
789 if (max < threshold + constthreshold) {
790 find_max_CXS(position,
791 position_j,
792 delta,
793 threshold + constthreshold,
794 constthreshold,
795 alignment_length,
796 s1,
797 s2,
798 access_s1,
799 access_s2,
800 fast,
801 structure);
802 }
803
804 if (max < constthreshold) {
805 plot_max_CXS(max,
806 max_pos,
807 max_pos_j,
808 alignment_length,
809 s1,
810 s2,
811 access_s1,
812 access_s2,
813 fast,
814 structure);
815 }
816
817 for (i = 0; i <= 4; i++) {
818 free(lc[i]);
819 free(lin[i]);
820 free(lbx[i]);
821 free(lby[i]);
822 free(linx[i]);
823 free(liny[i]);
824 }
825 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]); */
826 free(lc);
827 free(lin);
828 free(lbx);
829 free(lby);
830 free(linx);
831 free(liny);
832 free(position);
833 free(position_j);
834 return NULL;
835 }
836
837
838 PRIVATE void
find_max_CXS(const int * position,const int * position_j,const int delta,const int threshold,const int constthreshold,const int alignment_length,const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int fast,const char * structure)839 find_max_CXS(const int *position,
840 const int *position_j,
841 const int delta,
842 const int threshold,
843 const int constthreshold,
844 const int alignment_length,
845 const char *s1,
846 const char *s2,
847 const int **access_s1,
848 const int **access_s2,
849 const int fast,
850 const char *structure)
851 {
852 int pos = n1 - 9;
853
854 if (fast == 1) {
855 while (10 < pos--) {
856 int temp_min = 0;
857 if (position[pos + delta] < (threshold)) {
858 int search_range;
859 search_range = delta + 1;
860 while (--search_range)
861 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
862 temp_min = search_range;
863
864 pos -= temp_min;
865 int max_pos_j;
866 max_pos_j = position_j[pos + delta];
867 int max;
868 max = position[pos + delta];
869 printf("target upper bound %d: query lower bound %d (%5.2f) \n",
870 pos - 10,
871 max_pos_j - 10,
872 ((double)max) / 100);
873 pos = MAX2(10, pos + temp_min - delta);
874 }
875 }
876 } else {
877 pos = n1 - 9;
878 while (pos-- > 10) {
879 int temp_min = 0;
880 if (position[pos + delta] < (threshold)) {
881 int search_range;
882 search_range = delta + 1;
883 while (--search_range)
884 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
885 temp_min = search_range;
886
887 pos -= temp_min; /* position on i */
888 int max_pos_j;
889 max_pos_j = position_j[pos + delta]; /* position on j */
890 /* int begin_t=MAX2(9, pos-alignment_length); */
891 /* int end_t =MIN2(n1-10, pos); */
892 /* int begin_q=MAX2(9, max_pos_j-2); */
893 /* int end_q =MIN2(n2-10, max_pos_j+alignment_length-2); */
894 int begin_t = MAX2(9, pos - alignment_length);
895 int end_t = pos;
896 int begin_q = max_pos_j - 2;
897 int end_q = MIN2(n2 - 9, max_pos_j + alignment_length - 2);
898 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
899 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
900 char *local_structure = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
901 strncpy(s3, (s1 + begin_t), end_t - begin_t + 1);
902 strncpy(s4, (s2 + begin_q), end_q - begin_q + 1);
903 strncpy(local_structure, (structure + begin_q), end_q - begin_q + 1);
904 s3[end_t - begin_t + 1] = '\0';
905 s4[end_q - begin_q + 1] = '\0';
906 local_structure[end_q - begin_q + 1] = '\0';
907 duplexT test;
908 test = duplexfold_CXS(s3,
909 s4,
910 access_s1,
911 access_s2,
912 pos,
913 max_pos_j,
914 threshold,
915 local_structure);
916 if (test.energy * 100 < (threshold - constthreshold)) {
917 int l1 = strchr(test.structure, '&') - test.structure;
918 int dL = strrchr(structure, '|') - strchr(structure, '|');
919 dL += 1;
920 if (dL <= strlen(test.structure) - l1 - 1) {
921 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
922 test.tb, test.te, test.qb, test.qe, test.ddG, test.energy, test.dG1, test.dG2);
923 pos = MAX2(10, pos + temp_min - delta);
924 }
925 }
926
927 free(s3);
928 free(s4);
929 free(test.structure);
930 free(local_structure);
931 }
932 }
933 }
934 }
935
936
937 PRIVATE void
plot_max_CXS(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int fast,const char * structure)938 plot_max_CXS(const int max,
939 const int max_pos,
940 const int max_pos_j,
941 const int alignment_length,
942 const char *s1,
943 const char *s2,
944 const int **access_s1,
945 const int **access_s2,
946 const int fast,
947 const char *structure)
948 {
949 if (fast == 1) {
950 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos - 3, max_pos_j,
951 ((double)max) / 100);
952 } else {
953 int begin_t = MAX2(9, max_pos - alignment_length);
954 int end_t = max_pos;
955 int begin_q = max_pos_j - 2;
956 int end_q = MIN2(n2 - 9, max_pos_j + alignment_length - 2);
957 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
958 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
959 char *local_structure = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
960 strncpy(s3, (s1 + begin_t), end_t - begin_t + 1);
961 strncpy(s4, (s2 + begin_q), end_q - begin_q + 1);
962 strncpy(local_structure, (structure + begin_q), end_q - begin_q + 1);
963 s3[end_t - begin_t + 1] = '\0';
964 s4[end_q - begin_q + 1] = '\0';
965 local_structure[end_q - begin_q + 1] = '\0';
966 duplexT test;
967 test = duplexfold_CXS(s3, s4, access_s1, access_s2, max_pos, max_pos_j, INF, local_structure);
968 int l1 = strchr(test.structure, '&') - test.structure;
969 int dL = strrchr(structure, '|') - strchr(structure, '|');
970 dL += 1;
971 if (dL <= strlen(test.structure) - l1 - 1)
972 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", test.structure,
973 test.tb, test.te, test.qb, test.qe, test.ddG, test.energy, test.dG1, test.dG2);
974
975 free(s3);
976 free(s4);
977 free(test.structure);
978 free(local_structure);
979 }
980 }
981
982
983 /*---------------------------------------------------------duplexfold----------------------------------------------------------------------------------*/
984
985
986 PRIVATE duplexT
duplexfold_C(const char * s1,const char * s2,const int extension_cost,const char * structure)987 duplexfold_C(const char *s1,
988 const char *s2,
989 const int extension_cost,
990 const char *structure)
991 {
992 int i, j, l1, Emin = INF, i_min = 0, j_min = 0;
993 char *struc;
994 duplexT mfe;
995 vrna_md_t md;
996 int bonus = -10000;
997 int *previous_const; /* for each "|" constraint returns the position of the next "|" constraint */
998
999 n3 = (int)strlen(s1);
1000 n4 = (int)strlen(s2);
1001
1002 set_model_details(&md);
1003 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
1004 update_fold_params();
1005 if (P)
1006 free(P);
1007
1008 P = vrna_params(&md);
1009 make_pair_matrix();
1010 }
1011
1012 previous_const = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
1013 j = n4 + 1;
1014 previous_const[j - 1] = n4;
1015 int prev_temp = n4;
1016 while (--j) {
1017 if (structure[j - 1] == '|') {
1018 previous_const[j - 1] = prev_temp;
1019 prev_temp = j;
1020 } else {
1021 previous_const[j - 1] = prev_temp;
1022 }
1023 }
1024 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
1025 for (i = 0; i <= n3; i++)
1026 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
1027 encode_seqs(s1, s2);
1028 for (i = 1; i <= n3; i++) {
1029 for (j = n4; j > 0; j--) {
1030 int type, type2, E, k, l;
1031 int bonus_2 = (structure[j - 1] == '|' ? bonus : 0);
1032 type = pair[S1[i]][S2[j]];
1033 c[i][j] = type ? P->DuplexInit + 2 * extension_cost + bonus_2 : INF;
1034 if (!type)
1035 continue;
1036
1037 if (j < n4 && i > 1 && !(structure[j] == '|'))
1038 c[i][j] += P->mismatchExt[type][SS1[i - 1]][SS2[j + 1]] + 2 * extension_cost;
1039 else if (i > 1)
1040 c[i][j] += P->dangle5[type][SS1[i - 1]] + extension_cost;
1041 else if (j < n4 && !(structure[j] == '|'))
1042 c[i][j] += P->dangle3[type][SS2[j + 1]] + extension_cost;
1043
1044 if (type > 2)
1045 c[i][j] += P->TerminalAU;
1046
1047 for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
1048 for (l = j + 1; l <= previous_const[j]; l++) {
1049 if (i - k + l - j - 2 > MAXLOOP)
1050 break;
1051
1052 type2 = pair[S1[k]][S2[l]];
1053 if (!type2)
1054 continue;
1055
1056 E = E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type],
1057 SS1[k + 1], SS2[l - 1], SS1[i - 1], SS2[j + 1],
1058 P) + (i - k + l - j) * extension_cost + bonus_2;
1059 c[i][j] = MIN2(c[i][j], c[k][l] + E);
1060 }
1061 }
1062 E = c[i][j];
1063 if (i < n3 && j > 1 && !(structure[j - 2] == '|'))
1064 E += P->mismatchExt[rtype[type]][SS2[j - 1]][SS1[i + 1]] + 2 * extension_cost;
1065 else if (i < n3)
1066 E += P->dangle3[rtype[type]][SS1[i + 1]] + extension_cost;
1067 else if (j > 1 && !(structure[j - 2] == '|'))
1068 E += P->dangle5[rtype[type]][SS2[j - 1]] + extension_cost;
1069
1070 if (type > 2)
1071 E += P->TerminalAU;
1072
1073 if (E < Emin) {
1074 Emin = E;
1075 i_min = i;
1076 j_min = j;
1077 }
1078 }
1079 }
1080 struc = backtrack_C(i_min, j_min, extension_cost, structure, &Emin);
1081 if (i_min < n3)
1082 i_min++;
1083
1084 if (j_min > 1)
1085 j_min--;
1086
1087 l1 = strchr(struc, '&') - struc;
1088 int size;
1089 size = strlen(struc) - 1;
1090 Emin -= size * (extension_cost);
1091 mfe.i = i_min;
1092 mfe.j = j_min;
1093 mfe.energy = (double)Emin / 100.;
1094 mfe.structure = struc;
1095 free(previous_const);
1096 if (!delay_free) {
1097 for (i = 0; i <= n3; i++)
1098 free(c[i]);
1099
1100 free(c);
1101 free(S1);
1102 free(S2);
1103 free(SS1);
1104 free(SS2);
1105 }
1106
1107 return mfe;
1108 }
1109
1110
1111 PRIVATE char *
backtrack_C(int i,int j,const int extension_cost,const char * structure,int * Emin)1112 backtrack_C(int i,
1113 int j,
1114 const int extension_cost,
1115 const char *structure,
1116 int *Emin)
1117 {
1118 /* backtrack structure going backwards from i, and forwards from j
1119 * return structure in bracket notation with & as separator */
1120 int k, l, type, type2, E, traced, i0, j0, *previous_const;
1121 char *st1, *st2, *struc;
1122 int bonus = -10000;
1123
1124 previous_const = (int *)vrna_alloc(sizeof(int) * (n4 + 1)); /* encodes the position of the constraints */
1125 int j_temp = n4 + 1;
1126 previous_const[j_temp - 1] = n4;
1127 int prev_temp = n4;
1128 while (--j_temp) {
1129 if (structure[j_temp - 1] == '|') {
1130 previous_const[j_temp - 1] = prev_temp;
1131 prev_temp = j_temp;
1132 } else {
1133 previous_const[j_temp - 1] = prev_temp;
1134 }
1135 }
1136 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
1137 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
1138 i0 = MIN2(i + 1, n3);
1139 j0 = MAX2(j - 1, 1);
1140 while (i > 0 && j <= n4) {
1141 int bonus_2 = (structure[j - 1] == '|' ? bonus : 0);
1142 E = c[i][j];
1143 traced = 0;
1144 st1[i - 1] = '(';
1145 st2[j - 1] = ')';
1146 type = pair[S1[i]][S2[j]];
1147 if (!type)
1148 vrna_message_error("backtrack failed in fold duplex a");
1149
1150 for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
1151 for (l = j + 1; l <= previous_const[j]; l++) {
1152 int LE;
1153 if (i - k + l - j - 2 > MAXLOOP)
1154 break;
1155
1156 type2 = pair[S1[k]][S2[l]];
1157 if (!type2)
1158 continue;
1159
1160 LE = E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type],
1161 SS1[k + 1], SS2[l - 1], SS1[i - 1], SS2[j + 1],
1162 P) + (i - k + l - j) * extension_cost + bonus_2;
1163 if (E == c[k][l] + LE) {
1164 *Emin -= bonus_2;
1165 traced = 1;
1166 i = k;
1167 j = l;
1168 break;
1169 }
1170 }
1171 if (traced)
1172 break;
1173 }
1174 if (!traced) {
1175 if (i > 1 && j < n4 && !(structure[j] == '|'))
1176 E -= P->mismatchExt[type][SS1[i - 1]][SS2[j + 1]] + 2 * extension_cost;
1177 else if (i > 1)
1178 E -= P->dangle5[type][SS1[i - 1]] + extension_cost;
1179 else if (j < n4 && !(structure[j] == '|'))
1180 E -= P->dangle3[type][SS2[j + 1]] + extension_cost;
1181
1182 /* if (j<n4) E -= P->dangle3[type][SS2[j+1]]+extension_cost; */
1183 if (type > 2)
1184 E -= P->TerminalAU;
1185
1186 if (E != P->DuplexInit + 2 * extension_cost + bonus_2) {
1187 vrna_message_error("backtrack failed in fold duplex b");
1188 } else {
1189 *Emin -= bonus_2;
1190 break;
1191 }
1192 }
1193 }
1194 if (i > 1)
1195 i--;
1196
1197 if (j < n4)
1198 j++;
1199
1200 struc = (char *)vrna_alloc(i0 - i + 1 + j - j0 + 1 + 2);
1201 for (k = MAX2(i, 1); k <= i0; k++)
1202 if (!st1[k - 1])
1203 st1[k - 1] = '.';
1204
1205 for (k = j0; k <= j; k++)
1206 if (!st2[k - 1])
1207 st2[k - 1] = '.';
1208
1209 strcpy(struc, st1 + MAX2(i - 1, 0));
1210 strcat(struc, "&");
1211 strcat(struc, st2 + j0 - 1);
1212
1213 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
1214 free(st1);
1215 free(st2);
1216 free(previous_const);
1217 return struc;
1218 }
1219
1220
1221 duplexT **
Lduplexfold_C(const char * s1,const char * s2,const int threshold,const int extension_cost,const int alignment_length,const int delta,const int fast,const char * structure,const int il_a,const int il_b,const int b_a,const int b_b)1222 Lduplexfold_C(const char *s1,
1223 const char *s2,
1224 const int threshold,
1225 const int extension_cost,
1226 const int alignment_length,
1227 const int delta,
1228 const int fast,
1229 const char *structure,
1230 const int il_a,
1231 const int il_b,
1232 const int b_a,
1233 const int b_b)
1234 {
1235 /* duplexT test = duplexfold_C(s1, s2, extension_cost,structure); */
1236
1237 int i, j;
1238 int bopen = b_b;
1239 int bext = b_a + extension_cost;
1240 int iopen = il_b;
1241 int iext_s = 2 * (il_a + extension_cost); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
1242 int iext_ass = 50 + il_a + extension_cost; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
1243 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
1244 int i_length;
1245 int max_pos; /* get position of the best hit */
1246 int max_pos_j = 10;
1247 int temp;
1248 int min_j_colonne = 11;
1249 int max = INF;
1250 int bonus = -10000;
1251 int constthreshold = 0; /* minimal threshold corresponding to a structure complying to all constraints */
1252
1253 i = 0;
1254 while (structure[i] != '\0') {
1255 if (structure[i] == '|')
1256 constthreshold += bonus;
1257
1258 i++;
1259 }
1260 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
1261 /* int nsubopt=10; */ /* total number of subopt */
1262 int *position; /* contains the position of the hits with energy > E */
1263 int *position_j;
1264 /* int const5end; */ /* position of the 5'most constraint. Only interaction reaching this position are taken into account. */
1265 /* const5end = strchr(structure,'|') - structure; */
1266 /* const5end++; */
1267 n1 = (int)strlen(s1);
1268 n2 = (int)strlen(s2);
1269 /* delta_check is the minimal distance allowed for two hits to be accepted */
1270 /* if both hits are closer, reject the smaller ( in term of position) hits */
1271 position = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
1272 position_j = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
1273 /* i want to implement a function that, given a position in a long sequence and a small sequence, */
1274 /* duplexfold them at this position and report the result at the command line */
1275 /* for this i first need to rewrite backtrack in order to remove the printf functio */
1276 /* END OF DEFINITION FOR NEEDED SUBOPT DATA */
1277
1278 if ((!P) || (fabs(P->temperature - temperature) > 1e-6))
1279 update_dfold_params();
1280
1281 lc = (int **)vrna_alloc(sizeof(int *) * 5);
1282 lin = (int **)vrna_alloc(sizeof(int *) * 5);
1283 lbx = (int **)vrna_alloc(sizeof(int *) * 5);
1284 lby = (int **)vrna_alloc(sizeof(int *) * 5);
1285 linx = (int **)vrna_alloc(sizeof(int *) * 5);
1286 liny = (int **)vrna_alloc(sizeof(int *) * 5);
1287
1288 for (i = 0; i <= 4; i++) {
1289 lc[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1290 lin[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1291 lbx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1292 lby[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1293 linx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1294 liny[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1295 }
1296 for (j = n2; j >= 0; j--) {
1297 lbx[0][j] = lbx[1][j] = lbx[2][j] = lbx[3][j] = lbx[4][j] = INF;
1298 lin[0][j] = lin[1][j] = lin[2][j] = lin[3][j] = lin[4][j] = INF;
1299 lc[0][j] = lc[1][j] = lc[2][j] = lc[3][j] = lc[4][j] = INF;
1300 lby[0][j] = lby[1][j] = lby[2][j] = lby[3][j] = lby[4][j] = INF;
1301 liny[0][j] = liny[1][j] = liny[2][j] = liny[3][j] = liny[4][j] = INF;
1302 linx[0][j] = linx[1][j] = linx[2][j] = linx[3][j] = linx[4][j] = INF;
1303 }
1304 encode_seqs(s1, s2);
1305 i = 10;
1306 i_length = n1 - 9;
1307 while (i < i_length) {
1308 int idx = i % 5;
1309 int idx_1 = (i - 1) % 5;
1310 int idx_2 = (i - 2) % 5;
1311 int idx_3 = (i - 3) % 5;
1312 int idx_4 = (i - 4) % 5;
1313 j = n2 - 9;
1314 while (9 < --j) {
1315 int bonus_2 = (structure[j - 1] == '|' ? bonus : 0);
1316 int type, type2;
1317 type = pair[S1[i]][S2[j]];
1318 lc[idx][j] = type ? P->DuplexInit + 2 * extension_cost + bonus_2 : INF; /* to avoid that previous value influence result should actually not be erforderlich */
1319 if (!bonus_2) {
1320 type2 = pair[S2[j + 1]][S1[i - 1]];
1321 lin[idx][j] = MIN2(
1322 lc[idx_1][j + 1] + P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
1323 lin[idx_1][j] + iext_ass);
1324 lin[idx][j] = MIN2(lin[idx][j], lin[idx][j + 1] + iext_ass);
1325 lin[idx][j] = MIN2(lin[idx][j], lin[idx_1][j + 1] + iext_s);
1326 linx[idx][j] = MIN2(
1327 lc[idx_1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
1328 linx[idx_1][j] + iext_ass);
1329 liny[idx][j] = MIN2(
1330 lc[idx_1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
1331 liny[idx][j + 1] + iext_ass);
1332 type2 = pair[S2[j + 1]][S1[i]];
1333 lby[idx][j] =
1334 MIN2(lby[idx][j + 1] + bext,
1335 lc[idx][j + 1] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0));
1336 } else {
1337 lin[idx][j] = lby[idx][j] = linx[idx][j] = liny[idx][j] = INF;
1338 }
1339
1340 type2 = pair[S2[j]][S1[i - 1]];
1341 lbx[idx][j] =
1342 MIN2(lbx[idx_1][j] + bext, lc[idx_1][j] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0));
1343 /* --------------------------------------------------------------- end update recursion */
1344 if (!type)
1345 continue;
1346
1347 if (!(structure[j] == '|'))
1348 lc[idx][j] += P->mismatchExt[type][SS1[i - 1]][SS2[j + 1]] + 2 * extension_cost;
1349 else
1350 lc[idx][j] += P->dangle5[type][SS1[i - 1]] + extension_cost;
1351
1352 lc[idx][j] += (type > 2 ? P->TerminalAU : 0);
1353 /* type > 2 -> no GC or CG pair */
1354 /* ------------------------------------------------------------------update c matrix */
1355 /* Be careful, no lc may come from a region where a "|" is in a loop, avoided in lin = lby = INF ... jedoch fuer klein loops muss man aufpassen .. */
1356 type2 = pair[S1[i - 1]][S2[j + 1]];
1357 lc[idx][j] =
1358 MIN2(lc[idx_1][j + 1] +
1359 E_IntLoop(0, 0, type2, rtype[type], SS1[i], SS2[j], SS1[i - 1], SS2[j + 1],
1360 P) + 2 * extension_cost,
1361 lc[idx][j]);
1362 type2 = pair[S1[i - 2]][S2[j + 1]];
1363 lc[idx][j] =
1364 MIN2(lc[idx_2][j + 1] +
1365 E_IntLoop(1, 0, type2, rtype[type], SS1[i - 1], SS2[j], SS1[i - 1], SS2[j + 1],
1366 P) + 3 * extension_cost,
1367 lc[idx][j]);
1368 /* kleine loops checks wird in den folgenden if test gemacht. */
1369 if (!(structure[j] == '|')) {
1370 type2 = pair[S1[i - 1]][S2[j + 2]];
1371 lc[idx][j] =
1372 MIN2(lc[idx_1][j + 2] +
1373 E_IntLoop(0, 1, type2, rtype[type], SS1[i], SS2[j + 1], SS1[i - 1], SS2[j + 1],
1374 P) + 3 * extension_cost,
1375 lc[idx][j]);
1376 type2 = pair[S1[i - 2]][S2[j + 2]];
1377 lc[idx][j] =
1378 MIN2(lc[idx_2][j + 2] +
1379 E_IntLoop(1, 1, type2, rtype[type], SS1[i - 1], SS2[j + 1], SS1[i - 1], SS2[j + 1],
1380 P) + 4 * extension_cost,
1381 lc[idx][j]);
1382 type2 = pair[S1[i - 3]][S2[j + 2]];
1383 lc[idx][j] =
1384 MIN2(lc[idx_3][j + 2] +
1385 E_IntLoop(2, 1, type2, rtype[type], SS1[i - 2], SS2[j + 1], SS1[i - 1], SS2[j + 1],
1386 P) + 5 * extension_cost,
1387 lc[idx][j]);
1388 if (!(structure[j + 1] == '|')) {
1389 type2 = pair[S1[i - 3]][S2[j + 3]];
1390 lc[idx][j] =
1391 MIN2(lc[idx_3][j + 3] +
1392 E_IntLoop(2, 2, type2, rtype[type], SS1[i - 2], SS2[j + 2], SS1[i - 1], SS2[j + 1],
1393 P) + 6 * extension_cost,
1394 lc[idx][j]);
1395 type2 = pair[S1[i - 2]][S2[j + 3]];
1396 lc[idx][j] =
1397 MIN2(lc[idx_2][j + 3] +
1398 E_IntLoop(1, 2, type2, rtype[type], SS1[i - 1], SS2[j + 2], SS1[i - 1], SS2[j + 1],
1399 P) + 5 * extension_cost,
1400 lc[idx][j]);
1401 type2 = pair[S1[i - 4]][S2[j + 3]];
1402 lc[idx][j] =
1403 MIN2(lc[idx_4][j + 3] +
1404 E_IntLoop(3, 2, type2, rtype[type], SS1[i - 3], SS2[j + 2], SS1[i - 1], SS2[j + 1],
1405 P) + 7 * extension_cost,
1406 lc[idx][j]);
1407 if (!(structure[j + 2] == '|')) {
1408 type2 = pair[S1[i - 3]][S2[j + 4]];
1409 lc[idx][j] =
1410 MIN2(lc[idx_3][j + 4] +
1411 E_IntLoop(2, 3, type2, rtype[type], SS1[i - 2], SS2[j + 3], SS1[i - 1],
1412 SS2[j + 1],
1413 P) + 7 * extension_cost,
1414 lc[idx][j]);
1415 }
1416 }
1417 }
1418
1419 /* internal->stack */
1420 lc[idx][j] = MIN2(
1421 lin[idx_3][j + 3] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 * extension_cost + 2 * iext_s,
1422 lc[idx][j]);
1423 lc[idx][j] = MIN2(
1424 lin[idx_4][j + 2] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + 2 * extension_cost,
1425 lc[idx][j]);
1426 lc[idx][j] = MIN2(
1427 lin[idx_2][j + 4] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + 2 * extension_cost,
1428 lc[idx][j]);
1429 lc[idx][j] = MIN2(
1430 linx[idx_3][j + 1] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + 2 * extension_cost,
1431 lc[idx][j]);
1432 lc[idx][j] = MIN2(
1433 liny[idx_1][j + 3] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + 2 * extension_cost,
1434 lc[idx][j]);
1435 /* bulge -> stack */
1436 int bAU;
1437 bAU = (type > 2 ? P->TerminalAU : 0);
1438 lc[idx][j] = MIN2(lbx[idx_2][j + 1] + 2 * extension_cost + bext + bAU, lc[idx][j]);
1439 /* min2=by[i][j+1]; */
1440 lc[idx][j] = MIN2(lby[idx_1][j + 2] + 2 * extension_cost + bext + bAU, lc[idx][j]);
1441 lc[idx][j] += bonus_2;
1442 /* if(j<=const5end){ */
1443 temp = min_colonne;
1444 min_colonne = MIN2(lc[idx][j] + (type > 2 ? P->TerminalAU : 0) +
1445 (!(structure[j - 2] == '|') ?
1446 P->mismatchExt[rtype[type]][SS2[j - 1]][SS1[i + 1]] + 2 * extension_cost :
1447 P->dangle3[rtype[type]][SS1[i + 1]] + extension_cost),
1448 min_colonne);
1449 if (temp > min_colonne)
1450 min_j_colonne = j;
1451
1452 /* } */
1453
1454 /* ---------------------------------------------------------------------end update */
1455 }
1456 if (max >= min_colonne) {
1457 max = min_colonne;
1458 max_pos = i;
1459 max_pos_j = min_j_colonne;
1460 }
1461
1462 position[i + delta] = min_colonne;
1463 min_colonne = INF;
1464 position_j[i + delta] = min_j_colonne;
1465 i++;
1466 }
1467 free(S1);
1468 free(S2);
1469 free(SS1);
1470 free(SS2);
1471 /* printf("MAX: %d",max); */
1472 if (max < threshold + constthreshold) {
1473 find_max_C(position,
1474 position_j,
1475 delta,
1476 threshold + constthreshold,
1477 constthreshold,
1478 alignment_length,
1479 s1,
1480 s2,
1481 extension_cost,
1482 fast,
1483 structure);
1484 }
1485
1486 if (max < constthreshold)
1487 plot_max_C(max, max_pos, max_pos_j, alignment_length, s1, s2, extension_cost, fast, structure);
1488
1489 for (i = 0; i <= 4; i++) {
1490 free(lc[i]);
1491 free(lin[i]);
1492 free(lbx[i]);
1493 free(lby[i]);
1494 free(linx[i]);
1495 free(liny[i]);
1496 }
1497 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]); */
1498 free(lc);
1499 free(lin);
1500 free(lbx);
1501 free(lby);
1502 free(linx);
1503 free(liny);
1504 free(position);
1505 free(position_j);
1506 return NULL;
1507 }
1508
1509
1510 PRIVATE void
find_max_C(const int * position,const int * position_j,const int delta,const int threshold,const int constthreshold,const int alignment_length,const char * s1,const char * s2,const int extension_cost,const int fast,const char * structure)1511 find_max_C(const int *position,
1512 const int *position_j,
1513 const int delta,
1514 const int threshold,
1515 const int constthreshold,
1516 const int alignment_length,
1517 const char *s1,
1518 const char *s2,
1519 const int extension_cost,
1520 const int fast,
1521 const char *structure)
1522 {
1523 int pos = n1 - 9;
1524
1525 if (fast == 1) {
1526 while (10 < pos--) {
1527 int temp_min = 0;
1528 if (position[pos + delta] < (threshold)) {
1529 int search_range;
1530 search_range = delta + 1;
1531 while (--search_range)
1532 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
1533 temp_min = search_range;
1534
1535 pos -= temp_min;
1536 int max_pos_j;
1537 max_pos_j = position_j[pos + delta];
1538 int max;
1539 max = position[pos + delta];
1540 printf("target upper bound %d: query lower bound %d (%5.2f) \n",
1541 pos - 10,
1542 max_pos_j - 10,
1543 ((double)max) / 100);
1544 pos = MAX2(10, pos - delta);
1545 }
1546 }
1547 } else {
1548 pos = n1 - 9;
1549 while (10 < pos--) {
1550 int temp_min = 0;
1551 if (position[pos + delta] < (threshold)) {
1552 int search_range;
1553 search_range = delta + 1;
1554 while (--search_range)
1555 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
1556 temp_min = search_range;
1557
1558 pos -= temp_min;
1559 int max_pos_j;
1560 max_pos_j = position_j[pos + delta];
1561 /* max_pos_j und pos entsprechen die realen position
1562 * in der erweiterten sequenz.
1563 * pos=1 -> position 1 in the sequence (and not 0 like in C)
1564 * max_pos_j -> position 1 in the sequence ( not 0 like in C)
1565 */
1566 int begin_t = MAX2(11, pos - alignment_length + 1);
1567 int end_t = MIN2(n1 - 10, pos + 1);
1568 int begin_q = MAX2(11, max_pos_j - 1);
1569 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 2);
1570 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
1571 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1572 char *local_structure = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1573 strncpy(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
1574 strncpy(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
1575 strncpy(local_structure, (structure + begin_q - 1), end_q - begin_q + 1);
1576 s3[end_t - begin_t + 1] = '\0';
1577 s4[end_q - begin_q + 1] = '\0';
1578 local_structure[end_q - begin_q + 1] = '\0';
1579 duplexT test;
1580 test = duplexfold_C(s3, s4, extension_cost, local_structure);
1581 if (test.energy * 100 < (threshold - constthreshold)) {
1582 int l1 = strchr(test.structure, '&') - test.structure;
1583 int dL = strrchr(structure, '|') - strchr(structure, '|');
1584 dL += 1;
1585 if (dL <= strlen(test.structure) - l1 - 1) {
1586 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
1587 begin_t - 10 + test.i - l1,
1588 begin_t - 10 + test.i - 1,
1589 begin_q - 10 + test.j - 1,
1590 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2,
1591 test.energy);
1592 pos = MAX2(10, pos - delta);
1593 }
1594 }
1595
1596 free(s3);
1597 free(s4);
1598 free(test.structure);
1599 free(local_structure);
1600 }
1601 }
1602 }
1603 }
1604
1605
1606 PRIVATE void
plot_max_C(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1,const char * s2,const int extension_cost,const int fast,const char * structure)1607 plot_max_C(const int max,
1608 const int max_pos,
1609 const int max_pos_j,
1610 const int alignment_length,
1611 const char *s1,
1612 const char *s2,
1613 const int extension_cost,
1614 const int fast,
1615 const char *structure)
1616 {
1617 if (fast == 1) {
1618 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos - 10, max_pos_j - 10,
1619 ((double)max) / 100);
1620 } else {
1621 duplexT test;
1622 int begin_t = MAX2(11, max_pos - alignment_length + 1);
1623 int end_t = MIN2(n1 - 10, max_pos + 1);
1624 int begin_q = MAX2(11, max_pos_j - 1);
1625 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 2);
1626 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
1627 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1628 char *local_structure = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1629 strncpy(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
1630 strncpy(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
1631 strncpy(local_structure, (structure + begin_q - 1), end_q - begin_q + 1);
1632 s3[end_t - begin_t + 1] = '\0';
1633 s4[end_q - begin_q + 1] = '\0';
1634 local_structure[end_q - begin_q + 1] = '\0';
1635 test = duplexfold_C(s3, s4, extension_cost, local_structure);
1636 int l1 = strchr(test.structure, '&') - test.structure;
1637 int dL = strrchr(structure, '|') - strchr(structure, '|');
1638 dL += 1;
1639 if (dL <= strlen(test.structure) - l1 - 1) {
1640 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
1641 begin_t - 10 + test.i - l1, begin_t - 10 + test.i - 1, begin_q - 10 + test.j - 1,
1642 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2, test.energy);
1643 free(s3);
1644 free(s4);
1645 free(test.structure);
1646 }
1647
1648 free(local_structure);
1649 }
1650 }
1651
1652
1653 PRIVATE void
update_dfold_params(void)1654 update_dfold_params(void)
1655 {
1656 vrna_md_t md;
1657
1658 if (P)
1659 free(P);
1660
1661 set_model_details(&md);
1662 P = vrna_params(&md);
1663 make_pair_matrix();
1664 }
1665
1666
1667 /*---------------------------------------------------------------------------*/
1668
1669
1670 PRIVATE void
encode_seqs(const char * s1,const char * s2)1671 encode_seqs(const char *s1,
1672 const char *s2)
1673 {
1674 unsigned int i, l;
1675
1676 l = strlen(s1);
1677 S1 = encode_seq(s1);
1678 SS1 = (short *)vrna_alloc(sizeof(short) * (l + 1));
1679 /* SS1 exists only for the special X K and I bases and energy_set!=0 */
1680
1681 for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
1682 SS1[i] = alias[S1[i]]; /* for mismatches of nostandard bases */
1683
1684 l = strlen(s2);
1685 S2 = encode_seq(s2);
1686 SS2 = (short *)vrna_alloc(sizeof(short) * (l + 1));
1687 /* SS2 exists only for the special X K and I bases and energy_set!=0 */
1688
1689 for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
1690 SS2[i] = alias[S2[i]]; /* for mismatches of nostandard bases */
1691 }
1692
1693
1694 PRIVATE short *
encode_seq(const char * sequence)1695 encode_seq(const char *sequence)
1696 {
1697 unsigned int i, l;
1698 short *S;
1699
1700 l = strlen(sequence);
1701 S = (short *)vrna_alloc(sizeof(short) * (l + 2));
1702 S[0] = (short)l;
1703
1704 /* make numerical encoding of sequence */
1705 for (i = 1; i <= l; i++)
1706 S[i] = (short)encode_char(toupper(sequence[i - 1]));
1707
1708 /* for circular folding add first base at position n+1 */
1709 S[l + 1] = S[1];
1710
1711 return S;
1712 }
1713