1 /*
2 * compute the duplex structure of two RNA strands,
3 * allowing only inter-strand base pairs.
4 * see cofold() for computing hybrid structures without
5 * restriction.
6 * Ivo Hofacker
7 * Vienna RNA package
8 *
9 */
10
11
12 /*
13 * library containing the function used in rnaplex
14 * the program rnaplex uses the following function
15 * Lduplexfold: finds high scoring segments
16 * it stores the end-position of these segments in an array
17 * and call then for each of these positions the duplexfold function
18 * which allows one to make backtracking for each of the high scoring position
19 * It allows one to find suboptimal partially overlapping (depends on a a parameter)
20 * duplexes between a long RNA and a shorter one.
21 * Contrarly to RNAduplex, the energy model is not in E~log(N),
22 * where N is the length of an interial loop but used an affine model,
23 * where the extension and begin parameter are fitted to the energy
24 * parameter used by RNAduplex. This allows one to check for duplex between a short RNA(20nt)
25 * and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
26 * in about 50 minutes.
27 * The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
28 * given threshold this value is stored in an array. When the alignment score goes
29 * then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
30 * to find all non-overlapping high-scoring segments.
31 * For more information check "durbin, biological sequence analysis"
32 */
33
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <math.h>
41 #include <ctype.h>
42 #include <string.h>
43 #include "ViennaRNA/utils/basic.h"
44 #include "ViennaRNA/params/default.h"
45 #include "ViennaRNA/fold_vars.h"
46 #include "ViennaRNA/fold.h"
47 #include "ViennaRNA/pair_mat.h"
48 #include "ViennaRNA/params/basic.h"
49 #include "ViennaRNA/plex.h"
50 #include "ViennaRNA/ali_plex.h"
51 #include "ViennaRNA/loops/all.h"
52 /* #################SIMD############### */
53
54 /* int subopt_sorted=0; */
55
56 #define PUBLIC
57 #define PRIVATE static
58
59 #define STACK_BULGE1 1 /* stacking energies for bulges of size 1 */
60 #define NEW_NINIO 1 /* new asymetry penalty */
61 #define ARRAY 32 /*array size*/
62 #define UNIT 100
63 #define MINPSCORE -2 * UNIT
64
65 /**
66 *** Macro that define indices for the Single Array approach defined in FLduplexfold_XS->gain of 20% in runtime
67 *** so that everything is done in a 1D array.
68 *** input is idx for i, j for j and the length of the query RNA
69 *** 1D is divided in 6 subarrays, one for each number of allowed state
70 *** The length of each subarray is 5*L. 5 the maximal stored distance on the target sequence,
71 *** L is the length of the query sequence
72 **/
73 #define LCI(i, j, l) ((i) * l + j)
74 #define LINI(i, j, l) ((i + 5) * l + j)
75 #define LBXI(i, j, l) ((i + 10) * l + j)
76 #define LBYI(i, j, l) ((i + 15) * l + j)
77 #define LINIX(i, j, l) ((i + 20) * l + j)
78 #define LINIY(i, j, l) ((i + 25) * l + j)
79
80 PRIVATE void
81 encode_seqs(const char *s1,
82 const char *s2);
83
84
85 PRIVATE short *
86 encode_seq(const char *seq);
87
88
89 PRIVATE void
90 update_dfold_params(void);
91
92
93 /**
94 *** duplexfold(_XS)/backtrack(_XS) computes duplex interaction with standard energy and considers extension_cost
95 *** find_max(_XS)/plot_max(_XS) find suboptimals and MFE
96 *** fduplexfold(_XS) computes duplex in a plex way
97 **/
98 PRIVATE duplexT
99 duplexfold(const char *s1,
100 const char *s2,
101 const int extension_cost);
102
103
104 PRIVATE char *
105 backtrack(int i,
106 int j,
107 const int extension_cost);
108
109
110 PRIVATE void
111 find_max(const int *position,
112 const int *position_j,
113 const int delta,
114 const int threshold,
115 const int length,
116 const char *s1,
117 const char *s2,
118 const int extension_cost,
119 const int fast,
120 const int il_a,
121 const int il_b,
122 const int b_a,
123 const int b_b);
124
125
126 PRIVATE void
127 plot_max(const int max,
128 const int max_pos,
129 const int max_pos_j,
130 const int alignment_length,
131 const char *s1,
132 const char *s2,
133 const int extension_cost,
134 const int fast,
135 const int il_a,
136 const int il_b,
137 const int b_a,
138 const int b_b);
139
140
141 /* PRIVATE duplexT duplexfold_XS(const char *s1, const char *s2,const int **access_s1, const int **access_s2, const int i_pos, const int j_pos, const int threshold); */
142 PRIVATE duplexT
143 duplexfold_XS(const char *s1,
144 const char *s2,
145 const int **access_s1,
146 const int **access_s2,
147 const int i_pos,
148 const int j_pos,
149 const int threshold,
150 const int i_flag,
151 const int j_flag);
152
153
154 /* PRIVATE char * backtrack_XS(int i, int j, const int** access_s1, const int** access_s2); */
155 PRIVATE char *
156 backtrack_XS(int i,
157 int j,
158 const int **access_s1,
159 const int **access_s2,
160 const int i_flag,
161 const int j_flag);
162
163
164 PRIVATE void
165 find_max_XS(const int *position,
166 const int *position_j,
167 const int delta,
168 const int threshold,
169 const int alignment_length,
170 const char *s1,
171 const char *s2,
172 const int **access_s1,
173 const int **access_s2,
174 const int fast,
175 const int il_a,
176 const int il_b,
177 const int b_a,
178 const int b_b);
179
180
181 PRIVATE void
182 plot_max_XS(const int max,
183 const int max_pos,
184 const int max_pos_j,
185 const int alignment_length,
186 const char *s1,
187 const char *s2,
188 const int **access_s1,
189 const int **access_s2,
190 const int fast,
191 const int il_a,
192 const int il_b,
193 const int b_a,
194 const int b_b);
195
196
197 PRIVATE duplexT
198 fduplexfold(const char *s1,
199 const char *s2,
200 const int extension_cost,
201 const int il_a,
202 const int il_b,
203 const int b_a,
204 const int b_b);
205
206
207 PRIVATE char *
208 fbacktrack(int i,
209 int j,
210 const int extension_cost,
211 const int il_a,
212 const int il_b,
213 const int b_a,
214 const int b_b,
215 int *dG);
216
217
218 PRIVATE duplexT
219 fduplexfold_XS(const char *s1,
220 const char *s2,
221 const int **access_s1,
222 const int **access_s2,
223 const int i_pos,
224 const int j_pos,
225 const int threshold,
226 const int il_a,
227 const int il_b,
228 const int b_a,
229 const int b_b);
230
231
232 PRIVATE char *
233 fbacktrack_XS(int i,
234 int j,
235 const int **access_s1,
236 const int **access_s2,
237 const int i_pos,
238 const int j_pos,
239 const int il_a,
240 const int il_b,
241 const int b_a,
242 const int b_b,
243 int *dGe,
244 int *dGeplex,
245 int *dGx,
246 int *dGy);
247
248
249 /*@unused@*/
250
251 #define MAXSECTORS 500 /* dimension for a backtrack array */
252 #define LOCALITY 0. /* locality parameter for base-pairs */
253
254 PRIVATE vrna_param_t *P = NULL;
255
256 /**
257 *** energy array used in fduplexfold and fduplexfold_XS
258 *** We do not use the 1D array here as it is not time critical
259 *** It also makes the code more readable
260 *** c -> stack;in -> interior loop;bx/by->bulge;inx/iny->1xn loops
261 **/
262
263 PRIVATE int **c = NULL, **in = NULL, **bx = NULL, **by = NULL, **inx = NULL, **iny = NULL;
264
265 /**
266 *** S1, SS1, ... contains the encoded sequence for target and query
267 *** n1, n2, n3, n4 contains target and query length
268 **/
269
270 PRIVATE short *S1 = NULL, *SS1 = NULL, *S2 = NULL, *SS2 = NULL; /*contains the sequences*/
271 PRIVATE int n1, n2; /* sequence lengths */
272 PRIVATE int n3, n4; /*sequence length for the duplex*/;
273
274
275 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
276
277 /**
278 *** duplexfold_XS is the pendant to the duplex function as defined in duplex.c
279 *** but takes the accessibility into account. It is similar to the MFE version of RNAup
280 *** The only approximation made is that target 3' end - query 5' end base pair is known
281 *** s1,s2 are the query and target sequence; access_s1, access_s2 are the accessibility
282 *** profiles, i_pos, j_pos are the coordinates of the closing pair.
283 **/
284 PRIVATE duplexT
duplexfold_XS(const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int i_pos,const int j_pos,const int threshold,const int i_flag,const int j_flag)285 duplexfold_XS(const char *s1,
286 const char *s2,
287 const int **access_s1,
288 const int **access_s2,
289 const int i_pos,
290 const int j_pos,
291 const int threshold,
292 const int i_flag,
293 const int j_flag)
294 {
295 int i, j, p, q, Emin = INF, l_min = 0, k_min = 0;
296 char *struc;
297 vrna_md_t md;
298
299 struc = NULL;
300 duplexT mfe;
301 n3 = (int)strlen(s1);
302 n4 = (int)strlen(s2);
303
304 set_model_details(&md);
305
306 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
307 update_fold_params();
308 if (P)
309 free(P);
310
311 P = vrna_params(&md);
312 make_pair_matrix();
313 }
314
315 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
316 for (i = 0; i <= n3; i++)
317 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
318 for (i = 0; i <= n3; i++)
319 for (j = 0; j <= n4; j++)
320 c[i][j] = INF;
321 encode_seqs(s1, s2);
322 int type, type2, type3, E, k, l;
323 i = n3 - i_flag;
324 j = 1 + j_flag;
325 type = pair[S1[i]][S2[j]];
326 if (!type) {
327 printf("Error during initialization of the duplex in duplexfold_XS\n");
328 mfe.structure = NULL;
329 mfe.energy = INF;
330 return mfe;
331 }
332
333 c[i][j] = P->DuplexInit;
334 /** if (type>2) c[i][j] += P->TerminalAU;
335 *** c[i][j]+=P->dangle3[rtype[type]][SS1[i+1]];
336 *** c[i][j]+=P->dangle5[rtype[type]][SS2[j-1]];
337 *** The three above lines are replaced by the line below
338 **/
339
340
341 c[i][j] += vrna_E_ext_stem(rtype[type], (j_flag ? SS2[j - 1] : -1), (i_flag ? SS1[i + 1] : -1), P);
342
343 /* if(j_flag ==0 && i_flag==0){ */
344 /* c[i][j] += vrna_E_ext_stem(rtype[type], -1 , -1 , P); */
345 /* }else if(j_flag ==0 && i_flag==1){ */
346 /* c[i][j] += vrna_E_ext_stem(rtype[type], -1 , SS1[i+1], P); */
347 /* }else if(j_flag ==1 && i_flag==0){ */
348 /* c[i][j] += vrna_E_ext_stem(rtype[type], SS2[j-1] , -1, P); */
349 /* }else { */
350 /* c[i][j] += vrna_E_ext_stem(rtype[type], SS2[j-1] , SS1[i+1], P); */
351 /* } */
352 /* Just in case we have only one bp, we initialize ... */
353 /* k_min, l_min and Emin */
354 k_min = i;
355 l_min = j;
356 Emin = c[i][j];
357 for (k = i; k > 1; k--) {
358 if (k < i)
359 c[k + 1][0] = INF;
360
361 for (l = j; l <= n4 - 1; l++) {
362 if (!(k == i && l == j))
363 c[k][l] = INF;
364
365 type2 = pair[S1[k]][S2[l]];
366 if (!type2)
367 continue;
368
369 for (p = k + 1; p <= n3 - i_flag && p < k + MAXLOOP - 1; p++) {
370 for (q = l - 1; q >= 1 + j_flag; q--) {
371 if (p - k + l - q - 2 > MAXLOOP)
372 break;
373
374 type3 = pair[S1[p]][S2[q]];
375 if (!type3)
376 continue;
377
378 E = E_IntLoop(p - k - 1,
379 l - q - 1,
380 type2,
381 rtype[type3],
382 SS1[k + 1],
383 SS2[l - 1],
384 SS1[p - 1],
385 SS2[q + 1],
386 P);
387 c[k][l] = MIN2(c[k][l], c[p][q] + E);
388 }
389 }
390 E = c[k][l];
391 E += access_s1[i - k + 1][i_pos] + access_s2[l - 1][j_pos + (l - 1) - 1];
392 /**if (type2>2) E += P->TerminalAU;
393 ***if (k>1) E += P->dangle5[type2][SS1[k-1]];
394 ***if (l<n4) E += P->dangle3[type2][SS2[l+1]];
395 *** Replaced by the line below
396 **/
397 E += vrna_E_ext_stem(type2, (k > 1) ? SS1[k - 1] : -1, (l < n4) ? SS2[l + 1] : -1, P);
398
399 if (E < Emin) {
400 Emin = E;
401 k_min = k;
402 l_min = l;
403 }
404 }
405 }
406
407 if (Emin > threshold) {
408 mfe.energy = INF;
409 mfe.ddG = INF;
410 mfe.structure = NULL;
411 for (i = 0; i <= n3; i++)
412 free(c[i]);
413 free(c);
414 free(S1);
415 free(S2);
416 free(SS1);
417 free(SS2);
418 return mfe;
419 } else {
420 struc = backtrack_XS(k_min, l_min, access_s1, access_s2, i_flag, j_flag);
421 }
422
423 /**
424 *** find best dangles combination
425 **/
426 int dx_5, dx_3, dy_5, dy_3, dGx, dGy, bonus_x;
427 dx_5 = 0;
428 dx_3 = 0;
429 dy_5 = 0;
430 dy_3 = 0;
431 dGx = 0;
432 dGy = 0;
433 bonus_x = 0;
434 /* x--------x */
435 /* |||||||| */
436 /* x--------x */
437 dGx = access_s1[i - k_min + 1][i_pos];
438 dx_3 = 0;
439 dx_5 = 0;
440 bonus_x = 0;
441 dGy = access_s2[l_min - j + 1][j_pos + (l_min - 1)];
442
443 mfe.tb = i_pos - 9 - i + k_min - 1 - dx_5;
444 mfe.te = i_pos - 9 - 1 + dx_3;
445 mfe.qb = j_pos - 9 - 1 - dy_5;
446 mfe.qe = j_pos + l_min - 3 - 9 + dy_3;
447 mfe.ddG = (double)Emin * 0.01;
448 mfe.dG1 = (double)dGx * 0.01;
449 mfe.dG2 = (double)dGy * 0.01;
450
451 mfe.energy = mfe.ddG - mfe.dG1 - mfe.dG2;
452
453 mfe.structure = struc;
454 for (i = 0; i <= n3; i++)
455 free(c[i]);
456 free(c);
457 free(S1);
458 free(S2);
459 free(SS1);
460 free(SS2);
461 return mfe;
462 }
463
464
465 PRIVATE char *
backtrack_XS(int i,int j,const int ** access_s1,const int ** access_s2,const int i_flag,const int j_flag)466 backtrack_XS(int i,
467 int j,
468 const int **access_s1,
469 const int **access_s2,
470 const int i_flag,
471 const int j_flag)
472 {
473 /* backtrack structure going backwards from i, and forwards from j
474 * return structure in bracket notation with & as separator */
475 int k, l, type, type2, E, traced, i0, j0;
476 char *st1, *st2, *struc;
477
478 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
479 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
480 i0 = i; /*MAX2(i-1,1);*/ j0 = j;/*MIN2(j+1,n4);*/
481 while (i <= n3 - i_flag && j >= 1 + j_flag) {
482 E = c[i][j];
483 traced = 0;
484 st1[i - 1] = '(';
485 st2[j - 1] = ')';
486 type = pair[S1[i]][S2[j]];
487 if (!type)
488 vrna_message_error("backtrack failed in fold duplex bli");
489
490 for (k = i + 1; k <= n3 && k > i - MAXLOOP - 2; k++) {
491 for (l = j - 1; l >= 1; l--) {
492 int LE;
493 if (i - k + l - j - 2 > MAXLOOP)
494 break;
495
496 type2 = pair[S1[k]][S2[l]];
497 if (!type2)
498 continue;
499
500 LE = E_IntLoop(k - i - 1,
501 j - l - 1,
502 type,
503 rtype[type2],
504 SS1[i + 1],
505 SS2[j - 1],
506 SS1[k - 1],
507 SS2[l + 1],
508 P);
509 if (E == c[k][l] + LE) {
510 traced = 1;
511 i = k;
512 j = l;
513 break;
514 }
515 }
516 if (traced)
517 break;
518 }
519 if (!traced) {
520 #if 0
521 if (i < n3)
522 E -= P->dangle3[rtype[type]][SS1[i + 1]]; /* +access_s1[1][i+1]; */
523
524 if (j > 1)
525 E -= P->dangle5[rtype[type]][SS2[j - 1]]; /* +access_s2[1][j+1]; */
526
527 if (type > 2)
528 E -= P->TerminalAU;
529
530 #endif
531 E -= vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1], P);
532 break;
533 if (E != P->DuplexInit)
534 vrna_message_error("backtrack failed in fold duplex bal");
535 else
536 break;
537 }
538 }
539 /* if (i<n3) i++; */
540 /* if (j>1) j--; */
541 struc = (char *)vrna_alloc(i - i0 + 1 + j0 - j + 1 + 2);
542 for (k = MAX2(i0, 1); k <= i; k++)
543 if (!st1[k - 1])
544 st1[k - 1] = '.';
545
546 for (k = j; k <= j0; k++)
547 if (!st2[k - 1])
548 st2[k - 1] = '.';
549
550 strcpy(struc, st1 + MAX2(i0 - 1, 0));
551 strcat(struc, "&");
552 strcat(struc, st2 + j - 1);
553 free(st1);
554 free(st2);
555 return struc;
556 }
557
558
559 /**
560 *** fduplexfold(_XS) computes the interaction based on the plex energy model.
561 *** Faster than duplex approach, but not standard model compliant
562 *** We use the standard matrix (c, in, etc..., because we backtrack)
563 **/
564 PRIVATE duplexT
fduplexfold_XS(const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int i_pos,const int j_pos,const int threshold,const int il_a,const int il_b,const int b_a,const int b_b)565 fduplexfold_XS(const char *s1,
566 const char *s2,
567 const int **access_s1,
568 const int **access_s2,
569 const int i_pos,
570 const int j_pos,
571 const int threshold,
572 const int il_a,
573 const int il_b,
574 const int b_a,
575 const int b_b)
576 {
577 /**
578 *** i,j recursion index
579 *** Emin, i_min, j_min MFE position and energy
580 *** mfe struc duplex structure
581 **/
582 int i, j, Emin, i_min, j_min, l1;
583 duplexT mfe;
584 char *struc;
585 /**
586 *** bext=b_a bulge extension parameter for linear model
587 *** iopen=il_b interior opening for linear model
588 *** iext_s=2*il_a asymmetric extension for interior loop
589 *** iext_ass=60+il_a symmetric extension for interior loop
590 *** min_colonne=INF; max score of a row
591 *** i_length;
592 *** max_pos; position of best hit during recursion on target
593 *** max_pos_j; position of best hit during recursion on query
594 *** temp; temp variable for min_colonne
595 *** min_j_colonne; position of the minimum on query in row j
596 *** max=INF; absolute MFE
597 *** n3,n4 length of target and query
598 *** DJ contains the accessibility penalty for the query sequence
599 *** maxPenalty contains the maximum penalty
600 **/
601 int bopen = b_b;
602 int bext = b_a;
603 int iopen = il_b;
604 int iext_s = 2 * il_a; /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
605 int iext_ass = 50 + il_a; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
606 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
607 int i_length;
608 int max_pos; /* get position of the best hit */
609 int max_pos_j;
610 int temp = INF;
611 int min_j_colonne;
612 int max = INF;
613 int **DJ;
614 int maxPenalty[4];
615 vrna_md_t md;
616
617 /**
618 *** variable initialization
619 **/
620 n3 = (int)strlen(s1);
621 n4 = (int)strlen(s2);
622
623 set_model_details(&md);
624 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
625 update_fold_params();
626 if (P)
627 free(P);
628
629 P = vrna_params(&md);
630 make_pair_matrix();
631 }
632
633 /**
634 *** array initialization
635 **/
636 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
637 in = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
638 bx = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
639 by = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
640 inx = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
641 iny = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
642 /* #pragma omp parallel for */
643 for (i = 0; i <= n3; i++) {
644 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
645 in[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
646 bx[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
647 by[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
648 inx[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
649 iny[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
650 }
651 for (i = 0; i < n3; i++) {
652 for (j = 0; j < n4; j++) {
653 in[i][j] = INF; /* no in before 1 */
654 c[i][j] = INF; /* no bulge and no in before n2 */
655 bx[i][j] = INF; /* no bulge before 1 */
656 by[i][j] = INF;
657 inx[i][j] = INF; /* no bulge before 1 */
658 iny[i][j] = INF;
659 }
660 }
661 /**
662 *** sequence encoding
663 **/
664 encode_seqs(s1, s2);
665 /**
666 *** Compute max accessibility penalty for the query only once
667 **/
668 maxPenalty[0] = (int)-1 * P->stack[2][2] / 2;
669 maxPenalty[1] = (int)-1 * P->stack[2][2];
670 maxPenalty[2] = (int)-3 * P->stack[2][2] / 2;
671 maxPenalty[3] = (int)-2 * P->stack[2][2];
672
673
674 DJ = (int **)vrna_alloc(4 * sizeof(int *));
675 DJ[0] = (int *)vrna_alloc((1 + n4) * sizeof(int));
676 DJ[1] = (int *)vrna_alloc((1 + n4) * sizeof(int));
677 DJ[2] = (int *)vrna_alloc((1 + n4) * sizeof(int));
678 DJ[3] = (int *)vrna_alloc((1 + n4) * sizeof(int));
679
680 j = n4 - 9;
681 while (--j > 9) {
682 int jdiff = j_pos + j - 11;
683 /**
684 *** Depending in which direction (i:1->n vs j:m->1) the accessibility is computed we get slightly different results.
685 *** We reduce the discrepancies by taking the average of d^i_k and d^j_l
686 **/
687 DJ[0][j] = 0.5 *
688 (access_s2[5][jdiff + 4] - access_s2[4][jdiff + 4] + access_s2[5][jdiff] -
689 access_s2[4][jdiff - 1]);
690 DJ[1][j] = 0.5 *
691 (access_s2[5][jdiff + 5] - access_s2[4][jdiff + 5] + access_s2[5][jdiff + 1] -
692 access_s2[4][jdiff]) + DJ[0][j];
693 DJ[2][j] = 0.5 *
694 (access_s2[5][jdiff + 6] - access_s2[4][jdiff + 6] + access_s2[5][jdiff + 2] -
695 access_s2[4][jdiff + 1]) + DJ[1][j];
696 DJ[3][j] = 0.5 *
697 (access_s2[5][jdiff + 7] - access_s2[4][jdiff + 7] + access_s2[5][jdiff + 3] -
698 access_s2[4][jdiff + 2]) + DJ[2][j];
699
700
701 /*
702 * DJ[0][j] = access_s2[5][jdiff+4] - access_s2[4][jdiff+4] ;
703 * DJ[1][j] = access_s2[5][jdiff+5] - access_s2[4][jdiff+5] + DJ[0][j];
704 * DJ[2][j] = access_s2[5][jdiff+6] - access_s2[4][jdiff+6] + DJ[1][j];
705 * DJ[3][j] = access_s2[5][jdiff+7] - access_s2[4][jdiff+7] + DJ[2][j];
706 * DJ[0][j] = MIN2(DJ[0][j],maxPenalty[0]);
707 * DJ[1][j] = MIN2(DJ[1][j],maxPenalty[1]);
708 * DJ[2][j] = MIN2(DJ[2][j],maxPenalty[2]);
709 * DJ[3][j] = MIN2(DJ[3][j],maxPenalty[3]);
710 */
711 }
712
713 /**
714 *** Start of the recursion
715 *** first and last 10 nucleotides on target and query are dummy nucleotides
716 *** allow to reduce number of if test
717 **/
718 i = 11;
719 i_length = n3 - 9;
720 while (i < i_length) {
721 int di1, di2, di3, di4;
722 int idiff = i_pos - (n3 - 10 - i);
723 di1 = 0.5 *
724 (access_s1[5][idiff + 4] - access_s1[4][idiff + 4] + access_s1[5][idiff] -
725 access_s1[4][idiff - 1]);
726 di2 = 0.5 *
727 (access_s1[5][idiff + 3] - access_s1[4][idiff + 3] + access_s1[5][idiff - 1] -
728 access_s1[4][idiff - 2]) + di1;
729 di3 = 0.5 *
730 (access_s1[5][idiff + 2] - access_s1[4][idiff + 2] + access_s1[5][idiff - 2] -
731 access_s1[4][idiff - 3]) + di2;
732 di4 = 0.5 *
733 (access_s1[5][idiff + 1] - access_s1[4][idiff + 1] + access_s1[5][idiff - 3] -
734 access_s1[4][idiff - 4]) + di3;
735 /*
736 * di1 = access_s1[5][idiff] - access_s1[4][idiff-1];
737 * di2 = access_s1[5][idiff-1] - access_s1[4][idiff-2] + di1;
738 * di3 = access_s1[5][idiff-2] - access_s1[4][idiff-3] + di2;
739 * di4 = access_s1[5][idiff-3] - access_s1[4][idiff-4] + di3;
740 * di1=MIN2(di1,maxPenalty[0]);
741 * di2=MIN2(di2,maxPenalty[1]);
742 * di3=MIN2(di3,maxPenalty[2]);
743 * di4=MIN2(di4,maxPenalty[3]);
744 */
745 j = n4 - 9;
746 min_colonne = INF;
747 while (10 < --j) {
748 int dj1, dj2, dj3, dj4;
749 int jdiff = j_pos + j - 11;
750 dj1 = DJ[0][j];
751 dj2 = DJ[1][j];
752 dj3 = DJ[2][j];
753 dj4 = DJ[3][j];
754 int type, type2;
755 type = pair[S1[i]][S2[j]];
756 /**
757 *** Start duplex
758 **/
759 /*
760 * c[i][j]=type ? P->DuplexInit + access_s1[1][idiff]+access_s2[1][jdiff] : INF;
761 */
762 c[i][j] = type ? P->DuplexInit : INF;
763 /**
764 *** update lin bx by linx liny matrix
765 **/
766 type2 = pair[S2[j + 1]][S1[i - 1]];
767 /**
768 *** start/extend interior loop
769 **/
770 in[i][j] = MIN2(
771 c[i - 1][j + 1] + P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s + di1 + dj1,
772 in[i - 1][j] + iext_ass + di1);
773
774 /**
775 *** start/extend nx1 target
776 *** use same type2 as for in
777 **/
778 inx[i][j] = MIN2(
779 c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s + di1 + dj1,
780 inx[i - 1][j] + iext_ass + di1);
781 /**
782 *** start/extend 1xn target
783 *** use same type2 as for in
784 **/
785 iny[i][j] = MIN2(
786 c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s + di1 + dj1,
787 iny[i][j + 1] + iext_ass + dj1);
788 /**
789 *** extend interior loop
790 **/
791 in[i][j] = MIN2(in[i][j], in[i][j + 1] + iext_ass + dj1);
792 in[i][j] = MIN2(in[i][j], in[i - 1][j + 1] + iext_s + di1 + dj1);
793 /**
794 *** start/extend bulge target
795 **/
796 type2 = pair[S2[j]][S1[i - 1]];
797 bx[i][j] =
798 MIN2(bx[i - 1][j] + bext + di1,
799 c[i - 1][j] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + di1);
800 /**
801 *** start/extend bulge query
802 **/
803 type2 = pair[S2[j + 1]][S1[i]];
804 by[i][j] =
805 MIN2(by[i][j + 1] + bext + dj1,
806 c[i][j + 1] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + dj1);
807 /**
808 ***end update recursion
809 ***######################## Start stack extension##############################
810 **/
811 if (!type)
812 continue;
813
814 c[i][j] += vrna_E_ext_stem(type, SS1[i - 1], SS2[j + 1], P);
815 /**
816 *** stack extension
817 **/
818 if ((type2 = pair[S1[i - 1]][S2[j + 1]]))
819 c[i][j] = MIN2(c[i - 1][j + 1] + P->stack[rtype[type]][type2] + di1 + dj1, c[i][j]);
820
821 /**
822 *** 1x0 / 0x1 stack extension
823 **/
824 if ((type2 = pair[S1[i - 1]][S2[j + 2]]))
825 c[i][j] = MIN2(c[i - 1][j + 2] + P->bulge[1] + P->stack[rtype[type]][type2] + di1 + dj2,
826 c[i][j]);
827
828 if ((type2 = pair[S1[i - 2]][S2[j + 1]]))
829 c[i][j] = MIN2(c[i - 2][j + 1] + P->bulge[1] + P->stack[type2][rtype[type]] + di2 + dj1,
830 c[i][j]);
831
832 /**
833 *** 1x1 / 2x2 stack extension
834 **/
835 if ((type2 = pair[S1[i - 2]][S2[j + 2]]))
836 c[i][j] = MIN2(
837 c[i - 2][j + 2] + P->int11[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]] + di2 + dj2,
838 c[i][j]);
839
840 if ((type2 = pair[S1[i - 3]][S2[j + 3]])) {
841 c[i][j] =
842 MIN2(c[i - 3][j + 3] +
843 P->int22[type2][rtype[type]][SS1[i - 2]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + di3 + dj3,
844 c[i][j]);
845 }
846
847 /**
848 *** 1x2 / 2x1 stack extension
849 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
850 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
851 **/
852 if ((type2 = pair[S1[i - 3]][S2[j + 2]])) {
853 c[i][j] =
854 MIN2(
855 c[i - 3][j + 2] + P->int21[rtype[type]][type2][SS2[j + 1]][SS1[i - 2]][SS1[i - 1]] + di3 + dj2,
856 c[i][j]);
857 }
858
859 if ((type2 = pair[S1[i - 2]][S2[j + 3]])) {
860 c[i][j] =
861 MIN2(
862 c[i - 2][j + 3] + P->int21[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + di2 + dj3,
863 c[i][j]);
864 }
865
866 /**
867 *** 2x3 / 3x2 stack extension
868 **/
869 if ((type2 = pair[S1[i - 4]][S2[j + 3]]))
870 c[i][j] = MIN2(c[i - 4][j + 3] + P->internal_loop[5] + P->ninio[2] +
871 P->mismatch23I[type2][SS1[i - 3]][SS2[j + 2]] +
872 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + di4 + dj3, c[i][j]);
873
874 if ((type2 = pair[S1[i - 3]][S2[j + 4]]))
875 c[i][j] = MIN2(c[i - 3][j + 4] + P->internal_loop[5] + P->ninio[2] +
876 P->mismatch23I[type2][SS1[i - 2]][SS2[j + 3]] +
877 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + di3 + dj4, c[i][j]);
878
879 /**
880 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
881 **/
882 /**
883 *** 3x3 or more
884 **/
885 c[i][j] = MIN2(
886 in[i - 3][j + 3] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 * iext_s + di3 + dj3,
887 c[i][j]);
888 /**
889 *** 2xn or more
890 **/
891 c[i][j] = MIN2(
892 in[i - 4][j + 2] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + di4 + dj2,
893 c[i][j]);
894 /**
895 *** nx2 or more
896 **/
897 c[i][j] = MIN2(
898 in[i - 2][j + 4] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + di2 + dj4,
899 c[i][j]);
900 /**
901 *** nx1 n>2
902 **/
903 c[i][j] = MIN2(
904 inx[i - 3][j + 1] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + di3 + dj1,
905 c[i][j]);
906 /**
907 *** 1xn n>2
908 **/
909 c[i][j] = MIN2(
910 iny[i - 1][j + 3] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + dj3 + di1,
911 c[i][j]);
912 /**
913 *** nx0 n>1
914 **/
915 int bAU;
916 bAU = (type > 2 ? P->TerminalAU : 0);
917 c[i][j] = MIN2(bx[i - 2][j + 1] + di2 + dj1 + bext + bAU, c[i][j]);
918 /**
919 *** 0xn n>1
920 **/
921 c[i][j] = MIN2(by[i - 1][j + 2] + di1 + dj2 + bext + bAU, c[i][j]);
922 /*
923 * remove this line printf("%d\t",c[i][j]);
924 */
925 temp = min_colonne;
926 min_colonne = MIN2(c[i][j] + vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1], P), min_colonne);
927 if (temp > min_colonne)
928 min_j_colonne = j;
929
930 /* ---------------------------------------------------------------------end update */
931 }
932 if (max >= min_colonne) {
933 max = min_colonne;
934 max_pos = i;
935 max_pos_j = min_j_colonne;
936 }
937
938 i++;
939 /*
940 * remove this line printf("\n");
941 */
942 }
943 Emin = max;
944 if (Emin > threshold) {
945 free(S1);
946 free(S2);
947 free(SS1);
948 free(SS2);
949 for (i = 0; i <= n3; i++) {
950 free(c[i]);
951 free(in[i]);
952 free(bx[i]);
953 free(by[i]);
954 free(inx[i]);
955 free(iny[i]);
956 }
957 for (i = 0; i <= 3; i++)
958 free(DJ[i]);
959 free(c);
960 free(in);
961 free(bx);
962 free(by);
963 free(inx);
964 free(iny);
965 free(DJ);
966 mfe.energy = 0;
967 mfe.structure = NULL;
968 return mfe;
969 }
970
971 i_min = max_pos;
972 j_min = max_pos_j;
973 int dGe, dGeplex, dGx, dGy;
974 dGe = dGeplex = dGx = dGy = 0;
975 /* printf("MAX fduplexfold_XS %d\n",Emin); */
976 struc = fbacktrack_XS(i_min,
977 j_min,
978 access_s1,
979 access_s2,
980 i_pos,
981 j_pos,
982 il_a,
983 il_b,
984 b_a,
985 b_b,
986 &dGe,
987 &dGeplex,
988 &dGx,
989 &dGy);
990
991 l1 = strchr(struc, '&') - struc;
992 int size;
993 size = strlen(struc) - 1;
994 int lengthx;
995 int endx;
996 int lengthy;
997 int endy;
998 lengthx = l1;
999 lengthx -= (struc[0] == '.' ? 1 : 0);
1000 lengthx -= (struc[l1 - 1] == '.' ? 1 : 0);
1001 endx = (i_pos - (n3 - i_min));
1002 lengthy = size - l1;
1003 lengthy -= (struc[size] == '.' ? 1 : 0);
1004 lengthy -= (struc[l1 + 1] == '.' ? 1 : 0);
1005 endy = j_pos + j_min + lengthy - 22;
1006 if (i_min < n3 - 10)
1007 i_min++;
1008
1009 if (j_min > 11)
1010 j_min--;
1011
1012 mfe.i = i_min;
1013 mfe.j = j_min;
1014 mfe.ddG = (double)Emin * 0.01;
1015 mfe.structure = struc;
1016 mfe.energy_backtrack = (double)dGe * 0.01;
1017 mfe.energy = (double)dGeplex * 0.01;
1018 mfe.opening_backtrack_x = (double)dGx * 0.01;
1019 mfe.opening_backtrack_y = (double)dGy * 0.01;
1020 mfe.dG1 = 0; /* !remove access to complete access array (double) access_s1[lengthx][endx+10] * 0.01; */
1021 mfe.dG2 = 0; /* !remove access to complete access array (double) access_s2[lengthy][endy+10] * 0.01; */
1022 free(S1);
1023 free(S2);
1024 free(SS1);
1025 free(SS2);
1026 for (i = 0; i <= n3; i++) {
1027 free(c[i]);
1028 free(in[i]);
1029 free(bx[i]);
1030 free(by[i]);
1031 free(inx[i]);
1032 free(iny[i]);
1033 }
1034 for (i = 0; i <= 3; i++)
1035 free(DJ[i]);
1036 free(DJ);
1037 free(c);
1038 free(in);
1039 free(bx);
1040 free(by);
1041 free(iny);
1042 free(inx);
1043 return mfe;
1044 }
1045
1046
1047 PRIVATE char *
fbacktrack_XS(int i,int j,const int ** access_s1,const int ** access_s2,const int i_pos,const int j_pos,const int il_a,const int il_b,const int b_a,const int b_b,int * dG,int * dGplex,int * dGx,int * dGy)1048 fbacktrack_XS(int i,
1049 int j,
1050 const int **access_s1,
1051 const int **access_s2,
1052 const int i_pos,
1053 const int j_pos,
1054 const int il_a,
1055 const int il_b,
1056 const int b_a,
1057 const int b_b,
1058 int *dG,
1059 int *dGplex,
1060 int *dGx,
1061 int *dGy)
1062 {
1063 /* backtrack structure going backwards from i, and forwards from j
1064 * return structure in bracket notation with & as separator */
1065 int k, l, type, type2, E, traced, i0, j0;
1066 char *st1, *st2, *struc;
1067 int bopen = b_b;
1068 int bext = b_a;
1069 int iopen = il_b;
1070 int iext_s = 2 * il_a; /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
1071 int iext_ass = 50 + il_a; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
1072
1073 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
1074 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
1075 i0 = MIN2(i + 1, n3 - 10);
1076 j0 = MAX2(j - 1, 11);
1077 int state;
1078 state = 1; /* we start backtracking from a a pair , i.e. c-matrix */
1079 /* state 1 -> base pair, c
1080 * state 2 -> interior loop, in
1081 * state 3 -> bx loop, bx
1082 * state 4 -> by loop, by
1083 */
1084 traced = 1;
1085 k = i;
1086 l = j; /* stores the i,j information for subsequence usage see * */
1087 int idiff, jdiff;
1088 /**
1089 *** (type>2?P->TerminalAU:0)+P->dangle3[rtype[type]][SS1[i+1]]+P->dangle5[rtype[type]][SS2[j-1]];
1090 **/
1091
1092 int maxPenalty[4];
1093 vrna_md_t md;
1094
1095 set_model_details(&md);
1096
1097 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
1098 update_dfold_params();
1099 if (P)
1100 free(P);
1101
1102 P = vrna_params(&md);
1103 make_pair_matrix();
1104 }
1105
1106 maxPenalty[0] = (int)-1 * P->stack[2][2] / 2;
1107 maxPenalty[1] = (int)-1 * P->stack[2][2];
1108 maxPenalty[2] = (int)-3 * P->stack[2][2] / 2;
1109 maxPenalty[3] = (int)-2 * P->stack[2][2];
1110
1111 type = pair[S1[i]][S2[j]];
1112 *dG += vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1], P);
1113 *dGplex = *dG;
1114
1115 while (i > 10 && j <= n4 - 9 && traced) {
1116 int di1, di2, di3, di4;
1117 idiff = i_pos - (n3 - 10 - i);
1118 di1 = 0.5 *
1119 (access_s1[5][idiff + 4] - access_s1[4][idiff + 4] + access_s1[5][idiff] -
1120 access_s1[4][idiff - 1]);
1121 di2 = 0.5 *
1122 (access_s1[5][idiff + 3] - access_s1[4][idiff + 3] + access_s1[5][idiff - 1] -
1123 access_s1[4][idiff - 2]) + di1;
1124 di3 = 0.5 *
1125 (access_s1[5][idiff + 2] - access_s1[4][idiff + 2] + access_s1[5][idiff - 2] -
1126 access_s1[4][idiff - 3]) + di2;
1127 di4 = 0.5 *
1128 (access_s1[5][idiff + 1] - access_s1[4][idiff + 1] + access_s1[5][idiff - 3] -
1129 access_s1[4][idiff - 4]) + di3;
1130 /*
1131 * di1 = access_s1[5][idiff] - access_s1[4][idiff-1];
1132 * di2 = access_s1[5][idiff-1] - access_s1[4][idiff-2] + di1;
1133 * di3 = access_s1[5][idiff-2] - access_s1[4][idiff-3] + di2;
1134 * di4 = access_s1[5][idiff-3] - access_s1[4][idiff-4] + di3;
1135 * di1=MIN2(di1,maxPenalty[0]);
1136 * di2=MIN2(di2,maxPenalty[1]);
1137 * di3=MIN2(di3,maxPenalty[2]);
1138 * di4=MIN2(di4,maxPenalty[3]);
1139 */
1140 int dj1, dj2, dj3, dj4;
1141 jdiff = j_pos + j - 11;
1142 dj1 = 0.5 *
1143 (access_s2[5][jdiff + 4] - access_s2[4][jdiff + 4] + access_s2[5][jdiff] -
1144 access_s2[4][jdiff - 1]);
1145 dj2 = 0.5 *
1146 (access_s2[5][jdiff + 5] - access_s2[4][jdiff + 5] + access_s2[5][jdiff + 1] -
1147 access_s2[4][jdiff]) + dj1;
1148 dj3 = 0.5 *
1149 (access_s2[5][jdiff + 6] - access_s2[4][jdiff + 6] + access_s2[5][jdiff + 2] -
1150 access_s2[4][jdiff + 1]) + dj2;
1151 dj4 = 0.5 *
1152 (access_s2[5][jdiff + 7] - access_s2[4][jdiff + 7] + access_s2[5][jdiff + 3] -
1153 access_s2[4][jdiff + 2]) + dj3;
1154
1155
1156 /*
1157 * dj1 = access_s2[5][jdiff+4] - access_s2[4][jdiff+4];
1158 * dj2 = access_s2[5][jdiff+5] - access_s2[4][jdiff+5] + dj1;
1159 * dj3 = access_s2[5][jdiff+6] - access_s2[4][jdiff+6] + dj2;
1160 * dj4 = access_s2[5][jdiff+7] - access_s2[4][jdiff+7] + dj3;
1161 * dj1=MIN2(dj1,maxPenalty[0]);
1162 * dj2=MIN2(dj2,maxPenalty[1]);
1163 * dj3=MIN2(dj3,maxPenalty[2]);
1164 * dj4=MIN2(dj4,maxPenalty[3]);
1165 */
1166 traced = 0;
1167 switch (state) {
1168 case 1:
1169 type = pair[S1[i]][S2[j]];
1170 int bAU;
1171 bAU = (type > 2 ? P->TerminalAU : 0);
1172 if (!type)
1173 vrna_message_error("backtrack failed in fold duplex");
1174
1175 type2 = pair[S1[i - 1]][S2[j + 1]];
1176 if (type2 && c[i][j] == (c[i - 1][j + 1] + P->stack[rtype[type]][type2] + di1 + dj1)) {
1177 k = i - 1;
1178 l = j + 1;
1179 (*dG) += E_IntLoop(i - k - 1,
1180 l - j - 1,
1181 type2,
1182 rtype[type],
1183 SS1[k + 1],
1184 SS2[l - 1],
1185 SS1[i - 1],
1186 SS2[j + 1],
1187 P);
1188 *dGplex += E_IntLoop(i - k - 1,
1189 l - j - 1,
1190 type2,
1191 rtype[type],
1192 SS1[k + 1],
1193 SS2[l - 1],
1194 SS1[i - 1],
1195 SS2[j + 1],
1196 P);
1197 *dGx += di1;
1198 *dGy += dj1;
1199 st1[i - 1] = '(';
1200 st2[j - 1] = ')';
1201 i = k;
1202 j = l;
1203 state = 1;
1204 traced = 1;
1205 break;
1206 }
1207
1208 type2 = pair[S1[i - 1]][S2[j + 2]];
1209 if (type2 &&
1210 c[i][j] == (c[i - 1][j + 2] + P->bulge[1] + P->stack[rtype[type]][type2] + di1 + dj2)) {
1211 k = i - 1;
1212 l = j + 2;
1213 *dG += E_IntLoop(i - k - 1,
1214 l - j - 1,
1215 type2,
1216 rtype[type],
1217 SS1[k + 1],
1218 SS2[l - 1],
1219 SS1[i - 1],
1220 SS2[j + 1],
1221 P);
1222 *dGplex += E_IntLoop(i - k - 1,
1223 l - j - 1,
1224 type2,
1225 rtype[type],
1226 SS1[k + 1],
1227 SS2[l - 1],
1228 SS1[i - 1],
1229 SS2[j + 1],
1230 P);
1231 *dGx += di1;
1232 *dGy += dj2;
1233 st1[i - 1] = '(';
1234 st2[j - 1] = ')';
1235 i = k;
1236 j = l;
1237 state = 1;
1238 traced = 1;
1239 break;
1240 }
1241
1242 type2 = pair[S1[i - 2]][S2[j + 1]];
1243 if (type2 &&
1244 c[i][j] == (c[i - 2][j + 1] + P->bulge[1] + P->stack[type2][rtype[type]] + di2 + dj1)) {
1245 k = i - 2;
1246 l = j + 1;
1247 *dG += E_IntLoop(i - k - 1,
1248 l - j - 1,
1249 type2,
1250 rtype[type],
1251 SS1[k + 1],
1252 SS2[l - 1],
1253 SS1[i - 1],
1254 SS2[j + 1],
1255 P);
1256 *dGplex += E_IntLoop(i - k - 1,
1257 l - j - 1,
1258 type2,
1259 rtype[type],
1260 SS1[k + 1],
1261 SS2[l - 1],
1262 SS1[i - 1],
1263 SS2[j + 1],
1264 P);
1265 *dGx += di2;
1266 *dGy += dj1;
1267 st1[i - 1] = '(';
1268 st2[j - 1] = ')';
1269 i = k;
1270 j = l;
1271 state = 1;
1272 traced = 1;
1273 break;
1274 }
1275
1276 type2 = pair[S1[i - 2]][S2[j + 2]];
1277 if (type2 &&
1278 c[i][j] ==
1279 (c[i - 2][j + 2] + P->int11[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]] + di2 + dj2)) {
1280 k = i - 2;
1281 l = j + 2;
1282 *dG += E_IntLoop(i - k - 1,
1283 l - j - 1,
1284 type2,
1285 rtype[type],
1286 SS1[k + 1],
1287 SS2[l - 1],
1288 SS1[i - 1],
1289 SS2[j + 1],
1290 P);
1291 *dGplex += E_IntLoop(i - k - 1,
1292 l - j - 1,
1293 type2,
1294 rtype[type],
1295 SS1[k + 1],
1296 SS2[l - 1],
1297 SS1[i - 1],
1298 SS2[j + 1],
1299 P);
1300 *dGx += di2;
1301 *dGy += dj2;
1302 st1[i - 1] = '(';
1303 st2[j - 1] = ')';
1304 i = k;
1305 j = l;
1306 state = 1;
1307 traced = 1;
1308 break;
1309 }
1310
1311 type2 = pair[S1[i - 3]][S2[j + 3]];
1312 if (type2 &&
1313 c[i][j] ==
1314 (c[i - 3][j + 3] +
1315 P->int22[type2][rtype[type]][SS1[i - 2]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + di3 +
1316 dj3)) {
1317 k = i - 3;
1318 l = j + 3;
1319 *dG += E_IntLoop(i - k - 1,
1320 l - j - 1,
1321 type2,
1322 rtype[type],
1323 SS1[k + 1],
1324 SS2[l - 1],
1325 SS1[i - 1],
1326 SS2[j + 1],
1327 P);
1328 *dGplex += E_IntLoop(i - k - 1,
1329 l - j - 1,
1330 type2,
1331 rtype[type],
1332 SS1[k + 1],
1333 SS2[l - 1],
1334 SS1[i - 1],
1335 SS2[j + 1],
1336 P);
1337 *dGx += di3;
1338 *dGy += dj3;
1339 st1[i - 1] = '(';
1340 st2[j - 1] = ')';
1341 i = k;
1342 j = l;
1343 state = 1;
1344 traced = 1;
1345 break;
1346 }
1347
1348 type2 = pair[S1[i - 3]][S2[j + 2]];
1349 if (type2 &&
1350 c[i][j] ==
1351 (c[i - 3][j + 2] + P->int21[rtype[type]][type2][SS2[j + 1]][SS1[i - 2]][SS1[i - 1]] +
1352 di3 +
1353 dj2)) {
1354 k = i - 3;
1355 l = j + 2;
1356 *dG += E_IntLoop(i - k - 1,
1357 l - j - 1,
1358 type2,
1359 rtype[type],
1360 SS1[k + 1],
1361 SS2[l - 1],
1362 SS1[i - 1],
1363 SS2[j + 1],
1364 P);
1365 *dGplex += E_IntLoop(i - k - 1,
1366 l - j - 1,
1367 type2,
1368 rtype[type],
1369 SS1[k + 1],
1370 SS2[l - 1],
1371 SS1[i - 1],
1372 SS2[j + 1],
1373 P);
1374 *dGx += di3;
1375 *dGy += dj2;
1376 st1[i - 1] = '(';
1377 st2[j - 1] = ')';
1378 i = k;
1379 j = l;
1380 state = 1;
1381 traced = 1;
1382 break;
1383 }
1384
1385 type2 = pair[S1[i - 2]][S2[j + 3]];
1386 if (type2 &&
1387 c[i][j] ==
1388 (c[i - 2][j + 3] + P->int21[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] +
1389 di2 +
1390 dj3)) {
1391 k = i - 2;
1392 l = j + 3;
1393 *dG += E_IntLoop(i - k - 1,
1394 l - j - 1,
1395 type2,
1396 rtype[type],
1397 SS1[k + 1],
1398 SS2[l - 1],
1399 SS1[i - 1],
1400 SS2[j + 1],
1401 P);
1402 *dGplex += E_IntLoop(i - k - 1,
1403 l - j - 1,
1404 type2,
1405 rtype[type],
1406 SS1[k + 1],
1407 SS2[l - 1],
1408 SS1[i - 1],
1409 SS2[j + 1],
1410 P);
1411 *dGx += di2;
1412 *dGy += dj3;
1413 st1[i - 1] = '(';
1414 st2[j - 1] = ')';
1415 i = k;
1416 j = l;
1417 state = 1;
1418 traced = 1;
1419 break;
1420 }
1421
1422 type2 = pair[S1[i - 4]][S2[j + 3]];
1423 if (type2 && c[i][j] == (c[i - 4][j + 3] + P->internal_loop[5] + P->ninio[2] +
1424 P->mismatch23I[type2][SS1[i - 3]][SS2[j + 2]] +
1425 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + di4 + dj3)) {
1426 k = i - 4;
1427 l = j + 3;
1428 *dG += E_IntLoop(i - k - 1,
1429 l - j - 1,
1430 type2,
1431 rtype[type],
1432 SS1[k + 1],
1433 SS2[l - 1],
1434 SS1[i - 1],
1435 SS2[j + 1],
1436 P);
1437 *dGplex += E_IntLoop(i - k - 1,
1438 l - j - 1,
1439 type2,
1440 rtype[type],
1441 SS1[k + 1],
1442 SS2[l - 1],
1443 SS1[i - 1],
1444 SS2[j + 1],
1445 P);
1446 *dGx += di2;
1447 *dGy += dj3;
1448 st1[i - 1] = '(';
1449 st2[j - 1] = ')';
1450 i = k;
1451 j = l;
1452 state = 1;
1453 traced = 1;
1454 break;
1455 }
1456
1457 type2 = pair[S1[i - 3]][S2[j + 4]];
1458 if (type2 && c[i][j] == (c[i - 3][j + 4] + P->internal_loop[5] + P->ninio[2] +
1459 P->mismatch23I[type2][SS1[i - 2]][SS2[j + 3]] +
1460 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + di3 + dj4)) {
1461 k = i - 3;
1462 l = j + 4;
1463 *dG += E_IntLoop(i - k - 1,
1464 l - j - 1,
1465 type2,
1466 rtype[type],
1467 SS1[k + 1],
1468 SS2[l - 1],
1469 SS1[i - 1],
1470 SS2[j + 1],
1471 P);
1472 *dGplex += E_IntLoop(i - k - 1,
1473 l - j - 1,
1474 type2,
1475 rtype[type],
1476 SS1[k + 1],
1477 SS2[l - 1],
1478 SS1[i - 1],
1479 SS2[j + 1],
1480 P);
1481 *dGx += di2;
1482 *dGy += dj3;
1483 st1[i - 1] = '(';
1484 st2[j - 1] = ')';
1485 i = k;
1486 j = l;
1487 state = 1;
1488 traced = 1;
1489 break;
1490 }
1491
1492 if (c[i][j] ==
1493 (in[i - 3][j + 3] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + di3 + dj3 + 2 *
1494 iext_s)) {
1495 k = i;
1496 l = j;
1497 *dGplex += P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 * iext_s;
1498 *dGx += di3;
1499 *dGy += dj3;
1500 st1[i - 1] = '(';
1501 st2[j - 1] = ')';
1502 i = i - 3;
1503 j = j + 3;
1504 state = 2;
1505 traced = 1;
1506 break;
1507 }
1508
1509 if (c[i][j] ==
1510 (in[i - 4][j + 2] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + di4 + dj2 +
1511 iext_s +
1512 2 * iext_ass)) {
1513 k = i;
1514 l = j;
1515 *dGplex += P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass;
1516 *dGx += di4;
1517 *dGy += dj2;
1518 st1[i - 1] = '(';
1519 st2[j - 1] = ')';
1520 i = i - 4;
1521 j = j + 2;
1522 state = 2;
1523 traced = 1;
1524 break;
1525 }
1526
1527 if (c[i][j] ==
1528 (in[i - 2][j + 4] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + di2 + dj4 +
1529 iext_s +
1530 2 * iext_ass)) {
1531 k = i;
1532 l = j;
1533 *dGplex += P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass;
1534 *dGx += di2;
1535 *dGy += dj4;
1536 st1[i - 1] = '(';
1537 st2[j - 1] = ')';
1538 i = i - 2;
1539 j = j + 4;
1540 state = 2;
1541 traced = 1;
1542 break;
1543 }
1544
1545 if (c[i][j] ==
1546 (inx[i - 3][j + 1] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass +
1547 iext_ass + di3 + dj1)) {
1548 k = i;
1549 l = j;
1550 *dGplex += P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass +
1551 di3 + dj1;
1552 *dGx += di3;
1553 *dGy += dj1;
1554 st1[i - 1] = '(';
1555 st2[j - 1] = ')';
1556 i = i - 3;
1557 j = j + 1;
1558 state = 5;
1559 traced = 1;
1560 break;
1561 }
1562
1563 if (c[i][j] ==
1564 (iny[i - 1][j + 3] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass +
1565 iext_ass + di1 + dj3)) {
1566 k = i;
1567 l = j;
1568 *dGplex += P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass +
1569 di1 + dj3;
1570 *dGx += di1;
1571 *dGy += dj3;
1572 st1[i - 1] = '(';
1573 st2[j - 1] = ')';
1574 i = i - 1;
1575 j = j + 3;
1576 state = 6;
1577 traced = 1;
1578 break;
1579 }
1580
1581 if (c[i][j] == (bx[i - 2][j + 1] + di2 + dj1 + bext + bAU)) {
1582 k = i;
1583 l = j;
1584 st1[i - 1] = '(';
1585 st2[j - 1] = ')';
1586 *dGplex += bext + bAU;
1587 *dGx += di2;
1588 *dGy += dj1;
1589 i = i - 2;
1590 j = j + 1;
1591 state = 3;
1592 traced = 1;
1593 break;
1594 }
1595
1596 if (c[i][j] == (by[i - 1][j + 2] + di1 + dj2 + bext + bAU)) {
1597 k = i;
1598 l = j;
1599 *dGplex += bext + bAU;
1600 *dGx += di1;
1601 *dGy += dj2;
1602 st1[i - 1] = '(';
1603 st2[j - 1] = ')';
1604 i = i - 1;
1605 j = j + 2;
1606 state = 4;
1607 traced = 1;
1608 break;
1609 }
1610
1611 break;
1612 case 2:
1613 if (in[i][j] == (in[i - 1][j + 1] + iext_s + di1 + dj1)) {
1614 i--;
1615 j++;
1616 *dGplex += iext_s;
1617 *dGx += di1;
1618 *dGy += dj1;
1619 state = 2;
1620 traced = 1;
1621 break;
1622 }
1623
1624 if (in[i][j] == (in[i - 1][j] + iext_ass + di1)) {
1625 i = i - 1;
1626 *dGplex += iext_ass;
1627 *dGx += di1;
1628 state = 2;
1629 traced = 1;
1630 break;
1631 }
1632
1633 if (in[i][j] == (in[i][j + 1] + iext_ass + dj1)) {
1634 j++;
1635 state = 2;
1636 *dGy += dj1;
1637 *dGplex += iext_ass;
1638 traced = 1;
1639 break;
1640 }
1641
1642 type2 = pair[SS2[j + 1]][SS1[i - 1]];
1643 if (type2 &&
1644 in[i][j] ==
1645 (c[i - 1][j + 1] + P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s + di1 + dj1)) {
1646 *dGplex += P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s;
1647 int temp;
1648 temp = k;
1649 k = i - 1;
1650 i = temp;
1651 temp = l;
1652 l = j + 1;
1653 j = temp;
1654 type = pair[S1[i]][S2[j]];
1655 *dG += E_IntLoop(i - k - 1,
1656 l - j - 1,
1657 type2,
1658 rtype[type],
1659 SS1[k + 1],
1660 SS2[l - 1],
1661 SS1[i - 1],
1662 SS2[j + 1],
1663 P);
1664 *dGx += di1;
1665 *dGy += dj1;
1666 i = k;
1667 j = l;
1668 state = 1;
1669 traced = 1;
1670 break;
1671 }
1672
1673 case 3:
1674 if (bx[i][j] == (bx[i - 1][j] + bext + di1)) {
1675 i--;
1676 *dGplex += bext;
1677 *dGx += di1;
1678 state = 3;
1679 traced = 1;
1680 break;
1681 }
1682
1683 type2 = pair[S2[j]][S1[i - 1]];
1684 if (type2 &&
1685 bx[i][j] == (c[i - 1][j] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + di1)) {
1686 int temp;
1687 temp = k;
1688 k = i - 1;
1689 i = temp;
1690 temp = l;
1691 l = j;
1692 j = temp;
1693 type = pair[S1[i]][S2[j]];
1694 *dG += E_IntLoop(i - k - 1,
1695 l - j - 1,
1696 type2,
1697 rtype[type],
1698 SS1[k + 1],
1699 SS2[l - 1],
1700 SS1[i - 1],
1701 SS2[j + 1],
1702 P);
1703 *dGplex += bopen + bext + (type2 > 2 ? P->TerminalAU : 0);
1704 *dGx += di1;
1705 i = k;
1706 j = l;
1707 state = 1;
1708 traced = 1;
1709 break;
1710 }
1711
1712 case 4:
1713 if (by[i][j] == (by[i][j + 1] + bext + dj1)) {
1714 j++;
1715 *dGplex += bext;
1716 state = 4;
1717 traced = 1;
1718 break;
1719 }
1720
1721 type2 = pair[S2[j + 1]][S1[i]];
1722 if (type2 &&
1723 by[i][j] == (c[i][j + 1] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + dj1)) {
1724 int temp;
1725 temp = k;
1726 k = i;
1727 i = temp;
1728 temp = l;
1729 l = j + 1;
1730 j = temp;
1731 type = pair[S1[i]][S2[j]];
1732 *dG += E_IntLoop(i - k - 1,
1733 l - j - 1,
1734 type2,
1735 rtype[type],
1736 SS1[k + 1],
1737 SS2[l - 1],
1738 SS1[i - 1],
1739 SS2[j + 1],
1740 P);
1741 *dGplex += bopen + bext + (type2 > 2 ? P->TerminalAU : 0);
1742 *dGy += dj1;
1743 i = k;
1744 j = l;
1745 state = 1;
1746 traced = 1;
1747 break;
1748 }
1749
1750 case 5:
1751 if (inx[i][j] == (inx[i - 1][j] + iext_ass + di1)) {
1752 i--;
1753 *dGplex += iext_ass;
1754 *dGx += di1;
1755 state = 5;
1756 traced = 1;
1757 break;
1758 }
1759
1760 type2 = pair[S2[j + 1]][S1[i - 1]];
1761 if (type2 &&
1762 inx[i][j] ==
1763 (c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s + di1 +
1764 dj1)) {
1765 *dGplex += P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s;
1766 int temp;
1767 temp = k;
1768 k = i - 1;
1769 i = temp;
1770 temp = l;
1771 l = j + 1;
1772 j = temp;
1773 type = pair[S1[i]][S2[j]];
1774 *dG += E_IntLoop(i - k - 1,
1775 l - j - 1,
1776 type2,
1777 rtype[type],
1778 SS1[k + 1],
1779 SS2[l - 1],
1780 SS1[i - 1],
1781 SS2[j + 1],
1782 P);
1783 *dGx += di1;
1784 *dGy += dj1;
1785 i = k;
1786 j = l;
1787 state = 1;
1788 traced = 1;
1789 break;
1790 }
1791
1792 case 6:
1793 if (iny[i][j] == (iny[i][j + 1] + iext_ass + dj1)) {
1794 j++;
1795 *dGplex += iext_ass;
1796 *dGx += dj1;
1797 state = 6;
1798 traced = 1;
1799 break;
1800 }
1801
1802 type2 = pair[S2[j + 1]][S1[i - 1]];
1803 if (type2 &&
1804 iny[i][j] ==
1805 (c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s + di1 +
1806 dj1)) {
1807 *dGplex += P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s;
1808 int temp;
1809 temp = k;
1810 k = i - 1;
1811 i = temp;
1812 temp = l;
1813 l = j + 1;
1814 j = temp;
1815 type = pair[S1[i]][S2[j]];
1816 *dG += E_IntLoop(i - k - 1,
1817 l - j - 1,
1818 type2,
1819 rtype[type],
1820 SS1[k + 1],
1821 SS2[l - 1],
1822 SS1[i - 1],
1823 SS2[j + 1],
1824 P);
1825 *dGx += di1;
1826 *dGy += dj1;
1827 i = k;
1828 j = l;
1829 state = 1;
1830 traced = 1;
1831 break;
1832 }
1833 }
1834 }
1835 if (!traced) {
1836 idiff = i_pos - (n3 - 10 - i);
1837 jdiff = j_pos + j - 11;
1838 E = c[i][j];
1839 /**
1840 *** if (i>1) {E -= P->dangle5[type][SS1[i-1]]; *dG+=P->dangle5[type][SS1[i-1]];*dGplex+=P->dangle5[type][SS1[i-1]];}
1841 *** if (j<n4){E -= P->dangle3[type][SS2[j+1]]; *dG+=P->dangle3[type][SS2[j+1]];*dGplex+=P->dangle3[type][SS2[j+1]];}
1842 *** if (type>2) {E -= P->TerminalAU; *dG+=P->TerminalAU;*dGplex+=P->TerminalAU;}
1843 **/
1844 int correction;
1845 correction = vrna_E_ext_stem(type, (i > 1) ? SS1[i - 1] : -1, (j < n4) ? SS2[j + 1] : -1, P);
1846 *dG += correction;
1847 *dGplex += correction;
1848 E -= correction;
1849
1850 /*
1851 * if (E != P->DuplexInit+access_s1[1][idiff]+access_s2[1][jdiff]) {
1852 * vrna_message_error("backtrack failed in second fold duplex");
1853 * }
1854 */
1855 if (E != P->DuplexInit) {
1856 vrna_message_error("backtrack failed in second fold duplex");
1857 } else {
1858 *dG += P->DuplexInit;
1859 *dGplex += P->DuplexInit;
1860 *dGx += 0; /* access_s1[1][idiff]; */
1861 *dGy += 0; /* access_s2[1][jdiff]; */
1862 st1[i - 1] = '(';
1863 st2[j - 1] = ')';
1864 }
1865 }
1866
1867 if (i > 11)
1868 i--;
1869
1870 if (j < n4 - 10)
1871 j++;
1872
1873 struc = (char *)vrna_alloc(i0 - i + 1 + j - j0 + 1 + 2);
1874 for (k = MAX2(i, 1); k <= i0; k++)
1875 if (!st1[k - 1])
1876 st1[k - 1] = '.';
1877
1878 for (k = j0; k <= j; k++)
1879 if (!st2[k - 1])
1880 st2[k - 1] = '.';
1881
1882 strcpy(struc, st1 + MAX2(i - 1, 0));
1883 strcat(struc, "&");
1884 strcat(struc, st2 + j0 - 1);
1885 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
1886 free(st1);
1887 free(st2);
1888 return struc;
1889 }
1890
1891
1892 duplexT **
Lduplexfold_XS(const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int threshold,const int alignment_length,const int delta,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)1893 Lduplexfold_XS(const char *s1,
1894 const char *s2,
1895 const int **access_s1,
1896 const int **access_s2,
1897 const int threshold,
1898 const int alignment_length,
1899 const int delta,
1900 const int fast,
1901 const int il_a,
1902 const int il_b,
1903 const int b_a,
1904 const int b_b)
1905 {
1906 /**
1907 *** See variable definition in fduplexfold_XS
1908 **/
1909 int i, j;
1910 int bopen = b_b;
1911 int bext = b_a;
1912 int iopen = il_b;
1913 int iext_s = 2 * il_a;
1914 int iext_ass = 50 + il_a;
1915 int min_colonne = INF;
1916 int i_length;
1917 int max_pos;
1918 int max_pos_j;
1919 int min_j_colonne;
1920 int max = INF;
1921 int *position;
1922 int *position_j;
1923 int maxPenalty[4];
1924 int **DJ;
1925 /**
1926 *** 1D array corresponding to the standard 2d recursion matrix
1927 *** Makes the computation 20% faster
1928 **/
1929 int *SA;
1930 vrna_md_t md;
1931
1932 /**
1933 *** variable initialization
1934 **/
1935 n1 = (int)strlen(s1);
1936 n2 = (int)strlen(s2);
1937 /**
1938 *** Sequence encoding
1939 **/
1940
1941 set_model_details(&md);
1942
1943 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
1944 update_dfold_params();
1945 if (P)
1946 free(P);
1947
1948 P = vrna_params(&md);
1949 make_pair_matrix();
1950 }
1951
1952 encode_seqs(s1, s2);
1953 /**
1954 *** Position of the high score on the target and query sequence
1955 **/
1956 position = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
1957 position_j = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
1958 /**
1959 *** extension penalty, computed only once, further reduce the computation time
1960 **/
1961 maxPenalty[0] = (int)-1 * P->stack[2][2] / 2;
1962 maxPenalty[1] = (int)-1 * P->stack[2][2];
1963 maxPenalty[2] = (int)-3 * P->stack[2][2] / 2;
1964 maxPenalty[3] = (int)-2 * P->stack[2][2];
1965
1966 DJ = (int **)vrna_alloc(4 * sizeof(int *));
1967 DJ[0] = (int *)vrna_alloc(n2 * sizeof(int));
1968 DJ[1] = (int *)vrna_alloc(n2 * sizeof(int));
1969 DJ[2] = (int *)vrna_alloc(n2 * sizeof(int));
1970 DJ[3] = (int *)vrna_alloc(n2 * sizeof(int));
1971 j = n2 - 9;
1972 while (--j > 10) {
1973 DJ[0][j] = 0.5 *
1974 (access_s2[5][j + 4] - access_s2[4][j + 4] + access_s2[5][j] - access_s2[4][j - 1]);
1975 DJ[1][j] = 0.5 *
1976 (access_s2[5][j + 5] - access_s2[4][j + 5] + access_s2[5][j + 1] - access_s2[4][j]) +
1977 DJ[0][j];
1978 DJ[2][j] = 0.5 *
1979 (access_s2[5][j + 6] - access_s2[4][j + 6] + access_s2[5][j + 2] -
1980 access_s2[4][j + 1]) +
1981 DJ[1][j];
1982 DJ[3][j] = 0.5 *
1983 (access_s2[5][j + 7] - access_s2[4][j + 7] + access_s2[5][j + 3] -
1984 access_s2[4][j + 2]) +
1985 DJ[2][j];
1986 /*
1987 * DJ[0][j] = access_s2[5][j+4] - access_s2[4][j+4] ;
1988 * DJ[1][j] = access_s2[5][j+5] - access_s2[4][j+5] + DJ[0][j];
1989 * DJ[2][j] = access_s2[5][j+6] - access_s2[4][j+6] + DJ[1][j];
1990 * DJ[3][j] = access_s2[5][j+7] - access_s2[4][j+7] + DJ[2][j];
1991 * DJ[0][j] = MIN2(DJ[0][j],maxPenalty[0]);
1992 * DJ[1][j] = MIN2(DJ[1][j],maxPenalty[1]);
1993 * DJ[2][j] = MIN2(DJ[2][j],maxPenalty[2]);
1994 * DJ[3][j] = MIN2(DJ[3][j],maxPenalty[3]);
1995 */
1996 }
1997 /**
1998 *** instead of having 4 2-dim arrays we use a unique 1-dim array
1999 *** The mapping 2d -> 1D is done based ont the macro
2000 *** LCI(i,j,l) ((i )*l + j)
2001 *** LINI(i,j,l) ((i + 5)*l + j)
2002 *** LBXI(i,j,l) ((i + 10)*l + j)
2003 *** LBYI(i,j,l) ((i + 15)*l + j)
2004 *** LINIX(i,j,l) ((i + 20)*l + j)
2005 *** LINIY(i,j,l) ((i + 25)*l + j)
2006 ***
2007 *** SA has a length of 5 (number of columns we look back) *
2008 *** * 6 (number of structures we look at) *
2009 *** * length of the sequence
2010 **/
2011
2012 SA = (int *)vrna_alloc(sizeof(int) * 5 * 6 * (n2 + 5));
2013 for (j = n2 + 4; j >= 0; j--) {
2014 SA[(j *
2015 30)] =
2016 SA[(j * 30) + 1] = SA[(j * 30) + 2] = SA[(j * 30) + 3] = SA[(j * 30) + 4] = INF;
2017 SA[(j * 30) +
2018 5] =
2019 SA[(j * 30) + 1 +
2020 5] =
2021 SA[(j * 30) + 2 + 5] = SA[(j * 30) + 3 + 5] = SA[(j * 30) + 4 + 5] = INF;
2022 SA[(j * 30) +
2023 10] =
2024 SA[(j * 30) + 1 +
2025 10] =
2026 SA[(j * 30) + 2 + 10] = SA[(j * 30) + 3 + 10] = SA[(j * 30) + 4 + 10] = INF;
2027 SA[(j * 30) +
2028 15] =
2029 SA[(j * 30) + 1 +
2030 15] =
2031 SA[(j * 30) + 2 + 15] = SA[(j * 30) + 3 + 15] = SA[(j * 30) + 4 + 15] = INF;
2032 SA[(j * 30) +
2033 20] =
2034 SA[(j * 30) + 1 +
2035 20] =
2036 SA[(j * 30) + 2 + 20] = SA[(j * 30) + 3 + 20] = SA[(j * 30) + 4 + 20] = INF;
2037 SA[(j * 30) +
2038 25] =
2039 SA[(j * 30) + 1 +
2040 25] =
2041 SA[(j * 30) + 2 + 25] = SA[(j * 30) + 3 + 25] = SA[(j * 30) + 4 + 25] = INF;
2042 }
2043
2044 i = 10;
2045 i_length = n1 - 9;
2046 while (i < i_length) {
2047 int di1, di2, di3, di4;
2048 int idx = i % 5;
2049 int idx_1 = (i - 1) % 5;
2050 int idx_2 = (i - 2) % 5;
2051 int idx_3 = (i - 3) % 5;
2052 int idx_4 = (i - 4) % 5;
2053 di1 = 0.5 * (access_s1[5][i + 4] - access_s1[4][i + 4] + access_s1[5][i] - access_s1[4][i - 1]);
2054 di2 = 0.5 *
2055 (access_s1[5][i + 3] - access_s1[4][i + 3] + access_s1[5][i - 1] - access_s1[4][i - 2]) +
2056 di1;
2057 di3 = 0.5 *
2058 (access_s1[5][i + 2] - access_s1[4][i + 2] + access_s1[5][i - 2] - access_s1[4][i - 3]) +
2059 di2;
2060 di4 = 0.5 *
2061 (access_s1[5][i + 1] - access_s1[4][i + 1] + access_s1[5][i - 3] - access_s1[4][i - 4]) +
2062 di3;
2063 /*
2064 * di1 = access_s1[5][i] - access_s1[4][i-1];
2065 * di2 = access_s1[5][i-1] - access_s1[4][i-2] + di1;
2066 * di3 = access_s1[5][i-2] - access_s1[4][i-3] + di2;
2067 * di4 = access_s1[5][i-3] - access_s1[4][i-4] + di3;
2068 * di1=MIN2(di1,maxPenalty[0]);
2069 * di2=MIN2(di2,maxPenalty[1]);
2070 * di3=MIN2(di3,maxPenalty[2]);
2071 * di4=MIN2(di4,maxPenalty[3]);
2072 */
2073 j = n2 - 9;
2074 while (--j > 9) {
2075 int dj1, dj2, dj3, dj4;
2076 dj1 = DJ[0][j];
2077 dj2 = DJ[1][j];
2078 dj3 = DJ[2][j];
2079 dj4 = DJ[3][j];
2080 int type2, type, temp;
2081 type = pair[S1[i]][S2[j]];
2082 /**
2083 *** Start duplex
2084 **/
2085 /* SA[LCI(idx,j,n2)] = type ? P->DuplexInit + access_s1[1][i] + access_s2[1][j] : INF; */
2086 SA[LCI(idx, j, n2)] = type ? P->DuplexInit : INF;
2087 /**
2088 *** update lin bx by linx liny matrix
2089 **/
2090 type2 = pair[S2[j + 1]][S1[i - 1]];
2091 /**
2092 *** start/extend interior loop
2093 **/
2094 SA[LINI(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
2095 n2)] + P->mismatchI[type2][SS2[j]][SS1[i]] + di1 + dj1 + iopen + iext_s,
2096 SA[LINI(idx_1, j, n2)] + iext_ass + di1);
2097
2098 /**
2099 *** start/extend nx1 target
2100 *** use same type2 as for in
2101 **/
2102 SA[LINIX(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
2103 n2)] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + di1 + dj1 + iopen + iext_s,
2104 SA[LINIX(idx_1, j, n2)] + iext_ass + di1);
2105 /**
2106 *** start/extend 1xn target
2107 *** use same type2 as for in
2108 **/
2109 SA[LINIY(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
2110 n2)] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + di1 + dj1 + iopen + iext_s,
2111 SA[LINIY(idx, j + 1, n2)] + iext_ass + dj1);
2112 /**
2113 *** extend interior loop
2114 **/
2115 SA[LINI(idx, j, n2)] = MIN2(SA[LINI(idx, j, n2)], SA[LINI(idx, j + 1, n2)] + iext_ass + dj1);
2116 SA[LINI(idx, j, n2)] = MIN2(SA[LINI(idx, j, n2)],
2117 SA[LINI(idx_1, j + 1, n2)] + iext_s + di1 + dj1);
2118 /**
2119 *** start/extend bulge target
2120 **/
2121 type2 = pair[S2[j]][S1[i - 1]];
2122 SA[LBXI(idx, j, n2)] = MIN2(SA[LBXI(idx_1, j, n2)] + bext + di1,
2123 SA[LCI(idx_1, j,
2124 n2)] + bopen + bext +
2125 (type2 > 2 ? P->TerminalAU : 0) + di1);
2126 /**
2127 *** start/extend bulge query
2128 **/
2129 type2 = pair[S2[j + 1]][S1[i]];
2130 SA[LBYI(idx, j, n2)] = MIN2(SA[LBYI(idx, j + 1, n2)] + bext + dj1,
2131 SA[LCI(idx, j + 1,
2132 n2)] + bopen + bext +
2133 (type2 > 2 ? P->TerminalAU : 0) + dj1);
2134 /**
2135 ***end update recursion
2136 **/
2137 if (!type)
2138 continue; /**
2139 *** stack extension
2140 **/
2141
2142 SA[LCI(idx, j, n2)] += vrna_E_ext_stem(type, SS1[i - 1], SS2[j + 1], P);
2143 /**
2144 *** stack extension
2145 **/
2146 if ((type2 = pair[S1[i - 1]][S2[j + 1]]))
2147 SA[LCI(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
2148 n2)] + P->stack[rtype[type]][type2] + di1 + dj1,
2149 SA[LCI(idx, j, n2)]);
2150
2151 /**
2152 *** 1x0 / 0x1 stack extension
2153 **/
2154 if ((type2 = pair[S1[i - 1]][S2[j + 2]])) {
2155 SA[LCI(idx, j,
2156 n2)] = MIN2(SA[LCI(idx_1, j + 2,
2157 n2)] + P->bulge[1] + P->stack[rtype[type]][type2] + di1 + dj2,
2158 SA[LCI(idx, j, n2)]);
2159 }
2160
2161 if ((type2 = pair[S1[i - 2]][S2[j + 1]])) {
2162 SA[LCI(idx, j,
2163 n2)] = MIN2(SA[LCI(idx_2, j + 1,
2164 n2)] + P->bulge[1] + P->stack[type2][rtype[type]] + di2 + dj1,
2165 SA[LCI(idx, j, n2)]);
2166 }
2167
2168 /**
2169 *** 1x1 / 2x2 stack extension
2170 **/
2171 if ((type2 = pair[S1[i - 2]][S2[j + 2]])) {
2172 SA[LCI(idx, j,
2173 n2)] = MIN2(SA[LCI(idx_2, j + 2,
2174 n2)] + P->int11[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]] + di2 + dj2,
2175 SA[LCI(idx, j, n2)]);
2176 }
2177
2178 if ((type2 = pair[S1[i - 3]][S2[j + 3]])) {
2179 SA[LCI(idx, j,
2180 n2)] = MIN2(SA[LCI(idx_3, j + 3,
2181 n2)] +
2182 P->int22[type2][rtype[type]][SS1[i - 2]][SS1[i - 1]][SS2[j + 1]][SS2[j +
2183 2]] + di3 + dj3,
2184 SA[LCI(idx, j, n2)]);
2185 }
2186
2187 /**
2188 *** 1x2 / 2x1 stack extension
2189 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
2190 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
2191 **/
2192 if ((type2 = pair[S1[i - 3]][S2[j + 2]])) {
2193 SA[LCI(idx, j,
2194 n2)] = MIN2(SA[LCI(idx_3, j + 2,
2195 n2)] +
2196 P->int21[rtype[type]][type2][SS2[j + 1]][SS1[i - 2]][SS1[i - 1]] + di3 + dj2,
2197 SA[LCI(idx, j, n2)]);
2198 }
2199
2200 if ((type2 = pair[S1[i - 2]][S2[j + 3]])) {
2201 SA[LCI(idx, j,
2202 n2)] = MIN2(SA[LCI(idx_2, j + 3,
2203 n2)] +
2204 P->int21[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + di2 + dj3,
2205 SA[LCI(idx, j, n2)]);
2206 }
2207
2208 /**
2209 *** 2x3 / 3x2 stack extension
2210 **/
2211 if ((type2 = pair[S1[i - 4]][S2[j + 3]])) {
2212 SA[LCI(idx, j, n2)] = MIN2(SA[LCI(idx_4, j + 3, n2)] + P->internal_loop[5] + P->ninio[2] +
2213 P->mismatch23I[type2][SS1[i - 3]][SS2[j + 2]] +
2214 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + di4 + dj3,
2215 SA[LCI(idx, j, n2)]);
2216 }
2217
2218 if ((type2 = pair[S1[i - 3]][S2[j + 4]])) {
2219 SA[LCI(idx, j, n2)] = MIN2(SA[LCI(idx_3, j + 4, n2)] + P->internal_loop[5] + P->ninio[2] +
2220 P->mismatch23I[type2][SS1[i - 2]][SS2[j + 3]] +
2221 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + di3 + dj4,
2222 SA[LCI(idx, j, n2)]);
2223 }
2224
2225 /**
2226 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
2227 **/
2228 /**
2229 *** 3x3 or more
2230 **/
2231 SA[LCI(idx, j,
2232 n2)] = MIN2(SA[LINI(idx_3, j + 3,
2233 n2)] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 * iext_s + di3 + dj3,
2234 SA[LCI(idx, j, n2)]);
2235 /**
2236 *** 2xn or more
2237 **/
2238 SA[LCI(idx, j,
2239 n2)] = MIN2(SA[LINI(idx_4, j + 2,
2240 n2)] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + di4 + dj2,
2241 SA[LCI(idx, j, n2)]);
2242 /**
2243 *** nx2 or more
2244 **/
2245 SA[LCI(idx, j,
2246 n2)] = MIN2(SA[LINI(idx_2, j + 4,
2247 n2)] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + di2 + dj4,
2248 SA[LCI(idx, j, n2)]);
2249 /**
2250 *** nx1 n>2
2251 **/
2252 SA[LCI(idx, j,
2253 n2)] = MIN2(SA[LINIX(idx_3, j + 1,
2254 n2)] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + di3 + dj1,
2255 SA[LCI(idx, j, n2)]);
2256 /**
2257 *** 1xn n>2
2258 **/
2259 SA[LCI(idx, j,
2260 n2)] = MIN2(SA[LINIY(idx_1, j + 3,
2261 n2)] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + dj3 + di1,
2262 SA[LCI(idx, j, n2)]);
2263 /**
2264 *** nx0 n>1
2265 **/
2266 int bAU;
2267 bAU = (type > 2 ? P->TerminalAU : 0);
2268 SA[LCI(idx, j,
2269 n2)] = MIN2(SA[LBXI(idx_2, j + 1, n2)] + di2 + dj1 + bext + bAU, SA[LCI(idx, j, n2)]);
2270 /**
2271 *** 0xn n>1
2272 **/
2273 SA[LCI(idx, j,
2274 n2)] = MIN2(SA[LBYI(idx_1, j + 2, n2)] + di1 + dj2 + bext + bAU, SA[LCI(idx, j, n2)]);
2275 temp = min_colonne;
2276 /**
2277 *** (type>2?P->TerminalAU:0)+
2278 *** P->dangle3[rtype[type]][SS1[i+1]]+
2279 *** P->dangle5[rtype[type]][SS2[j-1]],
2280 **/
2281 /* remove this line printf("LCI %d:%d %d\t",i,j,SA[LCI(idx,j,n2)]); */
2282 /* remove this line printf("LI %d:%d %d\t",i,j, SA[LINI(idx,j,n2)]); */
2283 min_colonne = MIN2(SA[LCI(idx, j, n2)] + vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1], P),
2284 min_colonne);
2285
2286 if (temp > min_colonne)
2287 min_j_colonne = j;
2288
2289 /* ---------------------------------------------------------------------end update */
2290 }
2291 if (max >= min_colonne) {
2292 max = min_colonne;
2293 max_pos = i;
2294 max_pos_j = min_j_colonne;
2295 }
2296
2297 position[i + delta] = min_colonne;
2298 min_colonne = INF;
2299 position_j[i + delta] = min_j_colonne;
2300 /* remove this line printf("\n"); */
2301 i++;
2302 }
2303 /* printf("MAX: %d",max); */
2304 free(S1);
2305 free(S2);
2306 free(SS1);
2307 free(SS2);
2308 free(SA);
2309 if (max < threshold) {
2310 find_max_XS(position,
2311 position_j,
2312 delta,
2313 threshold,
2314 alignment_length,
2315 s1,
2316 s2,
2317 access_s1,
2318 access_s2,
2319 fast,
2320 il_a,
2321 il_b,
2322 b_a,
2323 b_b);
2324 }
2325
2326 if (max < INF) {
2327 plot_max_XS(max,
2328 max_pos,
2329 max_pos_j,
2330 alignment_length,
2331 s1,
2332 s2,
2333 access_s1,
2334 access_s2,
2335 fast,
2336 il_a,
2337 il_b,
2338 b_a,
2339 b_b);
2340 }
2341
2342 for (i = 0; i <= 3; i++)
2343 free(DJ[i]);
2344 free(DJ);
2345 free(position);
2346 free(position_j);
2347 return NULL;
2348 }
2349
2350
2351 PRIVATE void
find_max_XS(const int * position,const int * position_j,const int delta,const int threshold,const int alignment_length,const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)2352 find_max_XS(const int *position,
2353 const int *position_j,
2354 const int delta,
2355 const int threshold,
2356 const int alignment_length,
2357 const char *s1,
2358 const char *s2,
2359 const int **access_s1,
2360 const int **access_s2,
2361 const int fast,
2362 const int il_a,
2363 const int il_b,
2364 const int b_a,
2365 const int b_b)
2366 {
2367 int pos = n1 - 9;
2368
2369 if (fast == 1) {
2370 while (10 < pos--) {
2371 int temp_min = 0;
2372 if (position[pos + delta] < (threshold)) {
2373 int search_range;
2374 search_range = delta + 1;
2375 while (--search_range)
2376 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
2377 temp_min = search_range;
2378
2379 pos -= temp_min;
2380 int max_pos_j;
2381 max_pos_j = position_j[pos + delta];
2382 int max;
2383 max = position[pos + delta];
2384 printf("target upper bound %d: query lower bound %d (%5.2f) \n",
2385 pos - 10,
2386 max_pos_j - 10,
2387 ((double)max) / 100);
2388 pos = MAX2(10, pos + temp_min - delta);
2389 }
2390 }
2391 } else if (fast == 2) {
2392 pos = n1 - 9;
2393 while (10 < pos--) {
2394 int temp_min = 0;
2395 if (position[pos + delta] < (threshold)) {
2396 int search_range;
2397 search_range = delta + 1;
2398 while (--search_range)
2399 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
2400 temp_min = search_range;
2401
2402 pos -= temp_min;
2403 int max_pos_j;
2404 max_pos_j = position_j[pos + delta];
2405 /* max_pos_j und pos entsprechen die realen position
2406 * in der erweiterten sequenz.
2407 * pos=1 -> position 1 in the sequence (and not 0 like in C)
2408 * max_pos_j -> position 1 in the sequence ( not 0 like in C)
2409 */
2410 int alignment_length2;
2411 alignment_length2 = MIN2(n1, n2);
2412 int begin_t = MAX2(11, pos - alignment_length2 + 1); /* 10 */
2413 int end_t = MIN2(n1 - 10, pos + 1);
2414 int begin_q = MAX2(11, max_pos_j - 1); /* 10 */
2415 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
2416 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2 + 20));
2417 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2 + 20));
2418 strcpy(s3, "NNNNNNNNNN");
2419 strcpy(s4, "NNNNNNNNNN");
2420 strncat(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
2421 strncat(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
2422 strcat(s3, "NNNNNNNNNN");
2423 strcat(s4, "NNNNNNNNNN");
2424 s3[end_t - begin_t + 1 + 20] = '\0';
2425 s4[end_q - begin_q + 1 + 20] = '\0';
2426 duplexT test;
2427 test = fduplexfold_XS(s3,
2428 s4,
2429 access_s1,
2430 access_s2,
2431 end_t,
2432 begin_q,
2433 threshold,
2434 il_a,
2435 il_b,
2436 b_a,
2437 b_b);
2438 if (test.energy * 100 < threshold) {
2439 int l1 = strchr(test.structure, '&') - test.structure;
2440 printf(
2441 " %s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f) [%5.2f] i:%d,j:%d <%5.2f>\n",
2442 test.structure,
2443 begin_t - 10 + test.i - l1 - 10,
2444 begin_t - 10 + test.i - 1 - 10,
2445 begin_q - 10 + test.j - 1 - 10,
2446 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2 - 10,
2447 test.ddG,
2448 test.energy,
2449 test.opening_backtrack_x,
2450 test.opening_backtrack_y,
2451 test.energy_backtrack,
2452 pos - 10,
2453 max_pos_j - 10,
2454 ((double)position[pos + delta]) / 100);
2455 pos = MAX2(10, pos + temp_min - delta);
2456 free(test.structure);
2457 }
2458
2459 free(s3);
2460 free(s4);
2461 }
2462 }
2463 } else {
2464 pos = n1 - 9;
2465 while (pos-- > 10) {
2466 int temp_min = 0;
2467 if (position[pos + delta] < (threshold)) {
2468 int search_range;
2469 search_range = delta + 1;
2470 while (--search_range)
2471 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
2472 temp_min = search_range;
2473
2474 pos -= temp_min; /* position on i */
2475 int max_pos_j;
2476 max_pos_j = position_j[pos + delta]; /* position on j */
2477 int begin_t = MAX2(11, pos - alignment_length);
2478 int end_t = MIN2(n1 - 10, pos + 1);
2479 int begin_q = MAX2(11, max_pos_j - 1);
2480 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
2481 int i_flag;
2482 int j_flag;
2483 i_flag = (end_t == pos + 1 ? 1 : 0);
2484 j_flag = (begin_q == max_pos_j - 1 ? 1 : 0);
2485 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
2486 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
2487 strncpy(s3, (s1 + begin_t), end_t - begin_t + 1);
2488 strncpy(s4, (s2 + begin_q), end_q - begin_q + 1);
2489 s3[end_t - begin_t + 1] = '\0';
2490 s4[end_q - begin_q + 1] = '\0';
2491 duplexT test;
2492 test =
2493 duplexfold_XS(s3, s4, access_s1, access_s2, pos, max_pos_j, threshold, i_flag, j_flag);
2494 if (test.energy * 100 < threshold) {
2495 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f) i:%d,j:%d <%5.2f>\n",
2496 test.structure,
2497 test.tb,
2498 test.te,
2499 test.qb,
2500 test.qe,
2501 test.ddG,
2502 test.energy,
2503 test.dG1,
2504 test.dG2,
2505 pos - 10,
2506 max_pos_j - 10,
2507 ((double)position[pos + delta]) / 100);
2508 pos = MAX2(10, pos + temp_min - delta);
2509 }
2510
2511 free(s3);
2512 free(s4);
2513 free(test.structure);
2514 }
2515 }
2516 }
2517 }
2518
2519
2520 #if 0
2521 PRIVATE int
2522 compare(const void *sub1,
2523 const void *sub2)
2524 {
2525 int d;
2526
2527 if (((duplexT *)sub1)->ddG > ((duplexT *)sub2)->ddG)
2528 return 1;
2529
2530 if (((duplexT *)sub1)->ddG < ((duplexT *)sub2)->ddG)
2531 return -1;
2532
2533 d = ((duplexT *)sub1)->i - ((duplexT *)sub2)->i;
2534 if (d != 0)
2535 return d;
2536
2537 return ((duplexT *)sub1)->j - ((duplexT *)sub2)->j;
2538 }
2539
2540
2541 #endif
2542
2543 PRIVATE void
plot_max_XS(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1,const char * s2,const int ** access_s1,const int ** access_s2,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)2544 plot_max_XS(const int max,
2545 const int max_pos,
2546 const int max_pos_j,
2547 const int alignment_length,
2548 const char *s1,
2549 const char *s2,
2550 const int **access_s1,
2551 const int **access_s2,
2552 const int fast,
2553 const int il_a,
2554 const int il_b,
2555 const int b_a,
2556 const int b_b)
2557 {
2558 if (fast == 1) {
2559 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos - 3, max_pos_j,
2560 ((double)max) / 100);
2561 } else if (fast == 2) {
2562 int alignment_length2;
2563 alignment_length2 = MIN2(n1, n2);
2564 int begin_t = MAX2(11, max_pos - alignment_length2 + 1); /* 10 */
2565 int end_t = MIN2(n1 - 10, max_pos + 1);
2566 int begin_q = MAX2(11, max_pos_j - 1); /* 10 */
2567 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
2568 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2 + 20));
2569 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2 + 20));
2570 strcpy(s3, "NNNNNNNNNN");
2571 strcpy(s4, "NNNNNNNNNN");
2572 strncat(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
2573 strncat(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
2574 strcat(s3, "NNNNNNNNNN");
2575 strcat(s4, "NNNNNNNNNN");
2576 s3[end_t - begin_t + 1 + 20] = '\0';
2577 s4[end_q - begin_q + 1 + 20] = '\0';
2578 duplexT test;
2579 test = fduplexfold_XS(s3, s4, access_s1, access_s2, end_t, begin_q, INF, il_a, il_b, b_a, b_b);
2580 int l1 = strchr(test.structure, '&') - test.structure;
2581 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f) [%5.2f] i:%d,j:%d <%5.2f>\n",
2582 test.structure,
2583 begin_t - 10 + test.i - l1 - 10,
2584 begin_t - 10 + test.i - 1 - 10,
2585 begin_q - 10 + test.j - 1 - 10,
2586 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2 - 10,
2587 test.ddG,
2588 test.energy,
2589 test.opening_backtrack_x,
2590 test.opening_backtrack_y,
2591 test.energy_backtrack,
2592 max_pos - 10,
2593 max_pos_j - 10,
2594 (double)max / 100);
2595
2596 free(s3);
2597 free(s4);
2598 free(test.structure);
2599 } else {
2600 int begin_t = MAX2(11, max_pos - alignment_length);
2601 int end_t = MIN2(n1 - 10, max_pos + 1);
2602 int begin_q = MAX2(11, max_pos_j - 1);
2603 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
2604 int i_flag;
2605 int j_flag;
2606 i_flag = (end_t == max_pos + 1 ? 1 : 0);
2607 j_flag = (begin_q == max_pos_j - 1 ? 1 : 0);
2608 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2)); /* +1 for \0 +1 for distance */
2609 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
2610
2611 strncpy(s3, (s1 + begin_t - 1), end_t - begin_t + 1); /* -1 to go from */
2612 strncpy(s4, (s2 + begin_q - 1), end_q - begin_q + 1); /* -1 to go from */
2613 s3[end_t - begin_t + 1] = '\0'; /* */
2614 s4[end_q - begin_q + 1] = '\0';
2615 duplexT test;
2616 test = duplexfold_XS(s3, s4, access_s1, access_s2, max_pos, max_pos_j, INF, i_flag, j_flag);
2617 printf("%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f) i:%d,j:%d <%5.2f>\n",
2618 test.structure,
2619 test.tb,
2620 test.te,
2621 test.qb,
2622 test.qe,
2623 test.ddG,
2624 test.energy,
2625 test.dG1,
2626 test.dG2,
2627 max_pos - 10,
2628 max_pos_j - 10,
2629 (double)max / 100);
2630 free(s3);
2631 free(s4);
2632 free(test.structure);
2633 }
2634 }
2635
2636
2637 /*---------------------------------------------------------duplexfold----------------------------------------------------------------------------------*/
2638
2639
2640 PRIVATE duplexT
duplexfold(const char * s1,const char * s2,const int extension_cost)2641 duplexfold(const char *s1,
2642 const char *s2,
2643 const int extension_cost)
2644 {
2645 int i, j, l1, Emin = INF, i_min = 0, j_min = 0;
2646 char *struc;
2647 duplexT mfe;
2648 vrna_md_t md;
2649
2650 n3 = (int)strlen(s1);
2651 n4 = (int)strlen(s2);
2652
2653 set_model_details(&md);
2654 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
2655 update_fold_params();
2656 if (P)
2657 free(P);
2658
2659 P = vrna_params(&md);
2660 make_pair_matrix();
2661 }
2662
2663 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2664 for (i = 0; i <= n3; i++)
2665 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2666 encode_seqs(s1, s2);
2667 for (i = 1; i <= n3; i++) {
2668 for (j = n4; j > 0; j--) {
2669 int type, type2, E, k, l;
2670 type = pair[S1[i]][S2[j]];
2671 c[i][j] = type ? P->DuplexInit + 2 * extension_cost : INF;
2672 if (!type)
2673 continue;
2674
2675 /**
2676 *** if (i>1) c[i][j] += P->dangle5[type][SS1[i-1]]+ extension_cost;
2677 *** if (j<n4) c[i][j] += P->dangle3[type][SS2[j+1]]+ extension_cost;
2678 *** if (type>2) c[i][j] += P->TerminalAU;
2679 **/
2680 c[i][j] += vrna_E_ext_stem(type, (i > 1) ? SS1[i - 1] : -1, (j < n4) ? SS2[j + 1] : -1, P);
2681 for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
2682 for (l = j + 1; l <= n4; l++) {
2683 if (i - k + l - j - 2 > MAXLOOP)
2684 break;
2685
2686 type2 = pair[S1[k]][S2[l]];
2687 if (!type2)
2688 continue;
2689
2690 E = E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type],
2691 SS1[k + 1], SS2[l - 1], SS1[i - 1], SS2[j + 1],
2692 P) + (i - k + l - j) * extension_cost;
2693 c[i][j] = MIN2(c[i][j], c[k][l] + E);
2694 }
2695 }
2696 E = c[i][j];
2697 /**
2698 *** if (i<n3) E += P->dangle3[rtype[type]][SS1[i+1]]+extension_cost;
2699 *** if (j>1) E += P->dangle5[rtype[type]][SS2[j-1]]+extension_cost;
2700 *** if (type>2) E += P->TerminalAU;
2701 ***
2702 **/
2703 E += vrna_E_ext_stem(rtype[type], (j > 1) ? SS2[j - 1] : -1, (i < n3) ? SS1[i + 1] : -1, P);
2704 if (E < Emin) {
2705 Emin = E;
2706 i_min = i;
2707 j_min = j;
2708 }
2709 }
2710 }
2711 struc = backtrack(i_min, j_min, extension_cost);
2712 if (i_min < n3)
2713 i_min++;
2714
2715 if (j_min > 1)
2716 j_min--;
2717
2718 l1 = strchr(struc, '&') - struc;
2719 int size;
2720 size = strlen(struc) - 1;
2721 Emin -= size * (extension_cost);
2722 mfe.i = i_min;
2723 mfe.j = j_min;
2724 mfe.energy = (double)Emin / 100.;
2725 mfe.structure = struc;
2726 for (i = 0; i <= n3; i++)
2727 free(c[i]);
2728 free(c);
2729 free(S1);
2730 free(S2);
2731 free(SS1);
2732 free(SS2);
2733 return mfe;
2734 }
2735
2736
2737 PRIVATE char *
backtrack(int i,int j,const int extension_cost)2738 backtrack(int i,
2739 int j,
2740 const int extension_cost)
2741 {
2742 /* backtrack structure going backwards from i, and forwards from j
2743 * return structure in bracket notation with & as separator */
2744 int k, l, type, type2, E, traced, i0, j0;
2745 char *st1, *st2, *struc;
2746
2747 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
2748 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
2749
2750 i0 = MIN2(i + 1, n3);
2751 j0 = MAX2(j - 1, 1);
2752
2753 while (i > 0 && j <= n4) {
2754 E = c[i][j];
2755 traced = 0;
2756 st1[i - 1] = '(';
2757 st2[j - 1] = ')';
2758 type = pair[S1[i]][S2[j]];
2759 if (!type)
2760 vrna_message_error("backtrack failed in fold duplex");
2761
2762 for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
2763 for (l = j + 1; l <= n4; l++) {
2764 int LE;
2765 if (i - k + l - j - 2 > MAXLOOP)
2766 break;
2767
2768 type2 = pair[S1[k]][S2[l]];
2769 if (!type2)
2770 continue;
2771
2772 LE = E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type],
2773 SS1[k + 1], SS2[l - 1], SS1[i - 1], SS2[j + 1],
2774 P) + (i - k + l - j) * extension_cost;
2775 if (E == c[k][l] + LE) {
2776 traced = 1;
2777 i = k;
2778 j = l;
2779 break;
2780 }
2781 }
2782 if (traced)
2783 break;
2784 }
2785 if (!traced) {
2786 E -= vrna_E_ext_stem(type, (i > 1) ? SS1[i - 1] : -1, (j < n4) ? SS2[j + 1] : -1, P);
2787 /**
2788 *** if (i>1) E -= P->dangle5[type][SS1[i-1]]+extension_cost;
2789 *** if (j<n4) E -= P->dangle3[type][SS2[j+1]]+extension_cost;
2790 *** if (type>2) E -= P->TerminalAU;
2791 **/
2792 if (E != P->DuplexInit + 2 * extension_cost)
2793 vrna_message_error("backtrack failed in fold duplex");
2794 else
2795 break;
2796 }
2797 }
2798 if (i > 1)
2799 i--;
2800
2801 if (j < n4)
2802 j++;
2803
2804 struc = (char *)vrna_alloc(i0 - i + 1 + j - j0 + 1 + 2);
2805 for (k = MAX2(i, 1); k <= i0; k++)
2806 if (!st1[k - 1])
2807 st1[k - 1] = '.';
2808
2809 for (k = j0; k <= j; k++)
2810 if (!st2[k - 1])
2811 st2[k - 1] = '.';
2812
2813 strcpy(struc, st1 + MAX2(i - 1, 0));
2814 strcat(struc, "&");
2815 strcat(struc, st2 + j0 - 1);
2816 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
2817 free(st1);
2818 free(st2);
2819 return struc;
2820 }
2821
2822
2823 PRIVATE duplexT
fduplexfold(const char * s1,const char * s2,const int extension_cost,const int il_a,const int il_b,const int b_a,const int b_b)2824 fduplexfold(const char *s1,
2825 const char *s2,
2826 const int extension_cost,
2827 const int il_a,
2828 const int il_b,
2829 const int b_a,
2830 const int b_b)
2831 {
2832 int i, j, Emin, i_min, j_min, l1;
2833 duplexT mfe;
2834 char *struc;
2835 int bopen = b_b;
2836 int bext = b_a + extension_cost;
2837 int iopen = il_b;
2838 int iext_s = 2 * (il_a + extension_cost); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
2839 int iext_ass = 50 + il_a + extension_cost; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
2840 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
2841 int i_length;
2842 int max_pos; /* get position of the best hit */
2843 int max_pos_j;
2844 int temp = INF;
2845 int min_j_colonne;
2846 int max = INF;
2847 vrna_md_t md;
2848
2849 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
2850
2851 n3 = (int)strlen(s1);
2852 n4 = (int)strlen(s2);
2853 /* delta_check is the minimal distance allowed for two hits to be accepted */
2854 /* if both hits are closer, reject the smaller ( in term of position) hits */
2855 /* i want to implement a function that, given a position in a long sequence and a small sequence, */
2856 /* duplexfold them at this position and report the result at the command line */
2857 /* for this i first need to rewrite backtrack in order to remove the printf functio */
2858 /* END OF DEFINITION FOR NEEDED SUBOPT DATA */
2859 set_model_details(&md);
2860 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
2861 update_fold_params();
2862 if (P)
2863 free(P);
2864
2865 P = vrna_params(&md);
2866 make_pair_matrix();
2867 }
2868
2869 /*local c array initialization---------------------------------------------*/
2870 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2871 in = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2872 bx = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2873 by = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2874 inx = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2875 iny = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
2876 for (i = 0; i <= n3; i++) {
2877 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2878 in[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2879 bx[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2880 by[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2881 inx[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2882 iny[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
2883 }
2884 /*-------------------------------------------------------------------------*/
2885 /*end of array initialisation----------------------------------*/
2886 /*maybe int *** would be better*/
2887 encode_seqs(s1, s2);
2888 /* ------------------------------------------matrix initialisierung */
2889 for (i = 0; i < n3; i++) {
2890 for (j = 0; j < n4; j++) {
2891 in[i][j] = INF; /* no in before 1 */
2892 c[i][j] = INF; /* no bulge and no in before n2 */
2893 bx[i][j] = INF; /* no bulge before 1 */
2894 by[i][j] = INF;
2895 inx[i][j] = INF; /* no bulge before 1 */
2896 iny[i][j] = INF;
2897 }
2898 }
2899
2900 /*--------------------------------------------------------local array*/
2901
2902
2903 /* -------------------------------------------------------------matrix initialisierung */
2904 i = 11;
2905 i_length = n3 - 9;
2906 while (i < i_length) {
2907 j = n4 - 9;
2908 min_colonne = INF;
2909 while (10 < --j) {
2910 int type, type2;
2911 type = pair[S1[i]][S2[j]];
2912 /**
2913 *** Start duplex
2914 **/
2915 c[i][j] = type ? P->DuplexInit + 2 * extension_cost : INF;
2916 /**
2917 *** update lin bx by linx liny matrix
2918 **/
2919 type2 = pair[S2[j + 1]][S1[i - 1]];
2920 /**
2921 *** start/extend interior loop
2922 **/
2923 in[i][j] =
2924 MIN2(c[i - 1][j + 1] + P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
2925 in[i - 1][j] + iext_ass);
2926 /**
2927 *** start/extend nx1 target
2928 *** use same type2 as for in
2929 **/
2930 inx[i][j] = MIN2(c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
2931 inx[i - 1][j] + iext_ass);
2932 /**
2933 *** start/extend 1xn target
2934 *** use same type2 as for in
2935 **/
2936 iny[i][j] = MIN2(c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
2937 iny[i][j + 1] + iext_ass);
2938 /**
2939 *** extend interior loop
2940 **/
2941 in[i][j] = MIN2(in[i][j], in[i][j + 1] + iext_ass);
2942 in[i][j] = MIN2(in[i][j], in[i - 1][j + 1] + iext_s);
2943 /**
2944 *** start/extend bulge target
2945 **/
2946 type2 = pair[S2[j]][S1[i - 1]];
2947 bx[i][j] =
2948 MIN2(bx[i - 1][j] + bext, c[i - 1][j] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0));
2949 /**
2950 *** start/extend bulge query
2951 **/
2952 type2 = pair[S2[j + 1]][S1[i]];
2953 by[i][j] =
2954 MIN2(by[i][j + 1] + bext, c[i][j + 1] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0));
2955 /**
2956 ***end update recursion
2957 ***######################## Start stack extension##############################
2958 **/
2959 if (!type)
2960 continue;
2961
2962 c[i][j] += vrna_E_ext_stem(type, SS1[i - 1], SS2[j + 1], P) + 2 * extension_cost;
2963 /**
2964 *** stack extension
2965 **/
2966 if ((type2 = pair[S1[i - 1]][S2[j + 1]]))
2967 c[i][j] =
2968 MIN2(c[i - 1][j + 1] + P->stack[rtype[type]][type2] + 2 * extension_cost, c[i][j]);
2969
2970 /**
2971 *** 1x0 / 0x1 stack extension
2972 **/
2973 type2 = pair[S1[i - 1]][S2[j + 2]];
2974 c[i][j] = MIN2(
2975 c[i - 1][j + 2] + P->bulge[1] + P->stack[rtype[type]][type2] + 3 * extension_cost,
2976 c[i][j]);
2977 type2 = pair[S1[i - 2]][S2[j + 1]];
2978 c[i][j] = MIN2(
2979 c[i - 2][j + 1] + P->bulge[1] + P->stack[type2][rtype[type]] + 3 * extension_cost,
2980 c[i][j]);
2981 /**
2982 *** 1x1 / 2x2 stack extension
2983 **/
2984 type2 = pair[S1[i - 2]][S2[j + 2]];
2985 c[i][j] = MIN2(
2986 c[i - 2][j + 2] + P->int11[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]] + 4 * extension_cost,
2987 c[i][j]);
2988 type2 = pair[S1[i - 3]][S2[j + 3]];
2989 c[i][j] =
2990 MIN2(c[i - 3][j + 3] +
2991 P->int22[type2][rtype[type]][SS1[i - 2]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + 6 * extension_cost,
2992 c[i][j]);
2993 /**
2994 *** 1x2 / 2x1 stack extension
2995 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
2996 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
2997 **/
2998 type2 = pair[S1[i - 3]][S2[j + 2]];
2999 c[i][j] =
3000 MIN2(
3001 c[i - 3][j + 2] + P->int21[rtype[type]][type2][SS2[j + 1]][SS1[i - 2]][SS1[i - 1]] + 5 * extension_cost,
3002 c[i][j]);
3003 type2 = pair[S1[i - 2]][S2[j + 3]];
3004 c[i][j] =
3005 MIN2(
3006 c[i - 2][j + 3] + P->int21[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + 5 * extension_cost,
3007 c[i][j]);
3008
3009 /**
3010 *** 2x3 / 3x2 stack extension
3011 **/
3012 if ((type2 = pair[S1[i - 4]][S2[j + 3]])) {
3013 c[i][j] = MIN2(c[i - 4][j + 3] + P->internal_loop[5] + P->ninio[2] +
3014 P->mismatch23I[type2][SS1[i - 3]][SS2[j + 2]] +
3015 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + 7 * extension_cost,
3016 c[i][j]);
3017 }
3018
3019 if ((type2 = pair[S1[i - 3]][S2[j + 4]])) {
3020 c[i][j] = MIN2(c[i - 3][j + 4] + P->internal_loop[5] + P->ninio[2] +
3021 P->mismatch23I[type2][SS1[i - 2]][SS2[j + 3]] +
3022 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + 7 * extension_cost,
3023 c[i][j]);
3024 }
3025
3026 /**
3027 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
3028 **/
3029 /**
3030 *** 3x3 or more
3031 **/
3032 c[i][j] = MIN2(
3033 in[i - 3][j + 3] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 * iext_s + 2 * extension_cost,
3034 c[i][j]);
3035 /**
3036 *** 2xn or more
3037 **/
3038 c[i][j] = MIN2(
3039 in[i - 4][j + 2] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + 2 * extension_cost,
3040 c[i][j]);
3041 /**
3042 *** nx2 or more
3043 **/
3044 c[i][j] = MIN2(
3045 in[i - 2][j + 4] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + 2 * extension_cost,
3046 c[i][j]);
3047 /**
3048 *** nx1 n>2
3049 **/
3050 c[i][j] = MIN2(
3051 inx[i - 3][j + 1] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + 2 * extension_cost,
3052 c[i][j]);
3053 /**
3054 *** 1xn n>2
3055 **/
3056 c[i][j] = MIN2(
3057 iny[i - 1][j + 3] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + 2 * extension_cost,
3058 c[i][j]);
3059 /**
3060 *** nx0 n>1
3061 **/
3062 int bAU;
3063 bAU = (type > 2 ? P->TerminalAU : 0);
3064 c[i][j] = MIN2(bx[i - 2][j + 1] + 2 * extension_cost + bext + bAU, c[i][j]);
3065 /**
3066 *** 0xn n>1
3067 **/
3068 c[i][j] = MIN2(by[i - 1][j + 2] + 2 * extension_cost + bext + bAU, c[i][j]);
3069 temp = min_colonne;
3070 min_colonne = MIN2(c[i][j] + vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1],
3071 P) + 2 * extension_cost,
3072 min_colonne);
3073 if (temp > min_colonne)
3074 min_j_colonne = j;
3075
3076 /* ---------------------------------------------------------------------end update */
3077 }
3078 if (max >= min_colonne) {
3079 max = min_colonne;
3080 max_pos = i;
3081 max_pos_j = min_j_colonne;
3082 }
3083
3084 i++;
3085 }
3086 Emin = max;
3087 i_min = max_pos;
3088 j_min = max_pos_j;
3089 int dGe;
3090 dGe = 0;
3091 struc = fbacktrack(i_min, j_min, extension_cost, il_a, il_b, b_a, b_b, &dGe);
3092 if (i_min < n3 - 10)
3093 i_min++;
3094
3095 if (j_min > 11)
3096 j_min--;
3097
3098 l1 = strchr(struc, '&') - struc;
3099 int size;
3100 size = strlen(struc) - 1;
3101 Emin -= size * (extension_cost);
3102 mfe.i = i_min;
3103 mfe.j = j_min;
3104 mfe.energy = (double)Emin / 100.;
3105 mfe.energy_backtrack = (double)dGe / 100.;
3106 mfe.structure = struc;
3107 free(S1);
3108 free(S2);
3109 free(SS1);
3110 free(SS2);
3111 for (i = 0; i <= n3; i++) {
3112 free(c[i]);
3113 free(in[i]);
3114 free(bx[i]);
3115 free(by[i]);
3116 free(inx[i]);
3117 free(iny[i]);
3118 }
3119 free(c);
3120 free(in);
3121 free(bx);
3122 free(by);
3123 free(inx);
3124 free(iny);
3125 return mfe;
3126 }
3127
3128
3129 PRIVATE char *
fbacktrack(int i,int j,const int extension_cost,const int il_a,const int il_b,const int b_a,const int b_b,int * dG)3130 fbacktrack(int i,
3131 int j,
3132 const int extension_cost,
3133 const int il_a,
3134 const int il_b,
3135 const int b_a,
3136 const int b_b,
3137 int *dG)
3138 {
3139 /* backtrack structure going backwards from i, and forwards from j
3140 * return structure in bracket notation with & as separator */
3141 int k, l, type, type2, E, traced, i0, j0;
3142 char *st1, *st2, *struc;
3143 int bopen = b_b;
3144 int bext = b_a + extension_cost;
3145 int iopen = il_b;
3146 int iext_s = 2 * (il_a + extension_cost); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
3147 int iext_ass = 50 + il_a + extension_cost; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
3148
3149 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
3150 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
3151 i0 = MIN2(i + 1, n3 - 10);
3152 j0 = MAX2(j - 1, 11);
3153 int state;
3154 state = 1; /* we start backtracking from a a pair , i.e. c-matrix */
3155 /* state 1 -> base pair, c
3156 * state 2 -> interior loop, in
3157 * state 3 -> bx loop, bx
3158 * state 4 -> by loop, by
3159 */
3160 traced = 1;
3161 k = i;
3162 l = j;
3163 type = pair[S1[i]][S2[j]];
3164 *dG += vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1], P);
3165 /* (type>2?P->TerminalAU:0)+P->dangle3[rtype[type]][SS1[i+1]]+P->dangle5[rtype[type]][SS2[j-1]]; */
3166 while (i > 10 && j <= n4 - 9 && traced) {
3167 traced = 0;
3168 switch (state) {
3169 case 1:
3170 type = pair[S1[i]][S2[j]];
3171 int bAU;
3172 bAU = (type > 2 ? P->TerminalAU : 0);
3173 if (!type)
3174 vrna_message_error("backtrack failed in fold duplex");
3175
3176 type2 = pair[S1[i - 1]][S2[j + 1]];
3177 if (type2 &&
3178 c[i][j] == (c[i - 1][j + 1] + P->stack[rtype[type]][type2] + 2 * extension_cost)) {
3179 k = i - 1;
3180 l = j + 1;
3181 (*dG) += E_IntLoop(i - k - 1,
3182 l - j - 1,
3183 type2,
3184 rtype[type],
3185 SS1[k + 1],
3186 SS2[l - 1],
3187 SS1[i - 1],
3188 SS2[j + 1],
3189 P);
3190 st1[i - 1] = '(';
3191 st2[j - 1] = ')';
3192 i = k;
3193 j = l;
3194 state = 1;
3195 traced = 1;
3196 break;
3197 }
3198
3199 type2 = pair[S1[i - 1]][S2[j + 2]];
3200 if (type2 &&
3201 c[i][j] ==
3202 (c[i - 1][j + 2] + P->bulge[1] + P->stack[rtype[type]][type2] + 3 * extension_cost)) {
3203 k = i - 1;
3204 l = j + 2;
3205 *dG += E_IntLoop(i - k - 1,
3206 l - j - 1,
3207 type2,
3208 rtype[type],
3209 SS1[k + 1],
3210 SS2[l - 1],
3211 SS1[i - 1],
3212 SS2[j + 1],
3213 P);
3214 st1[i - 1] = '(';
3215 st2[j - 1] = ')';
3216 i = k;
3217 j = l;
3218 state = 1;
3219 traced = 1;
3220 break;
3221 }
3222
3223 type2 = pair[S1[i - 2]][S2[j + 1]];
3224 if (type2 &&
3225 c[i][j] ==
3226 (c[i - 2][j + 1] + P->bulge[1] + P->stack[type2][rtype[type]] + 3 * extension_cost)) {
3227 k = i - 2;
3228 l = j + 1;
3229 *dG += E_IntLoop(i - k - 1,
3230 l - j - 1,
3231 type2,
3232 rtype[type],
3233 SS1[k + 1],
3234 SS2[l - 1],
3235 SS1[i - 1],
3236 SS2[j + 1],
3237 P);
3238 st1[i - 1] = '(';
3239 st2[j - 1] = ')';
3240 i = k;
3241 j = l;
3242 state = 1;
3243 traced = 1;
3244 break;
3245 }
3246
3247 type2 = pair[S1[i - 2]][S2[j + 2]];
3248 if (type2 &&
3249 c[i][j] ==
3250 (c[i - 2][j + 2] + P->int11[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]] + 4 *
3251 extension_cost)) {
3252 k = i - 2;
3253 l = j + 2;
3254 *dG += E_IntLoop(i - k - 1,
3255 l - j - 1,
3256 type2,
3257 rtype[type],
3258 SS1[k + 1],
3259 SS2[l - 1],
3260 SS1[i - 1],
3261 SS2[j + 1],
3262 P);
3263 st1[i - 1] = '(';
3264 st2[j - 1] = ')';
3265 i = k;
3266 j = l;
3267 state = 1;
3268 traced = 1;
3269 break;
3270 }
3271
3272 type2 = pair[S1[i - 3]][S2[j + 3]];
3273 if (type2 &&
3274 c[i][j] ==
3275 (c[i - 3][j + 3] +
3276 P->int22[type2][rtype[type]][SS1[i - 2]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + 6 *
3277 extension_cost)) {
3278 k = i - 3;
3279 l = j + 3;
3280 *dG += E_IntLoop(i - k - 1,
3281 l - j - 1,
3282 type2,
3283 rtype[type],
3284 SS1[k + 1],
3285 SS2[l - 1],
3286 SS1[i - 1],
3287 SS2[j + 1],
3288 P);
3289 st1[i - 1] = '(';
3290 st2[j - 1] = ')';
3291 i = k;
3292 j = l;
3293 state = 1;
3294 traced = 1;
3295 break;
3296 }
3297
3298 type2 = pair[S1[i - 3]][S2[j + 2]];
3299 if (type2 &&
3300 c[i][j] ==
3301 (c[i - 3][j + 2] + P->int21[rtype[type]][type2][SS2[j + 1]][SS1[i - 2]][SS1[i - 1]] +
3302 5 *
3303 extension_cost)) {
3304 k = i - 3;
3305 l = j + 2;
3306 *dG += E_IntLoop(i - k - 1,
3307 l - j - 1,
3308 type2,
3309 rtype[type],
3310 SS1[k + 1],
3311 SS2[l - 1],
3312 SS1[i - 1],
3313 SS2[j + 1],
3314 P);
3315 st1[i - 1] = '(';
3316 st2[j - 1] = ')';
3317 i = k;
3318 j = l;
3319 state = 1;
3320 traced = 1;
3321 break;
3322 }
3323
3324 type2 = pair[S1[i - 2]][S2[j + 3]];
3325 if (type2 &&
3326 c[i][j] ==
3327 (c[i - 2][j + 3] + P->int21[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] +
3328 5 *
3329 extension_cost)) {
3330 k = i - 2;
3331 l = j + 3;
3332 *dG += E_IntLoop(i - k - 1,
3333 l - j - 1,
3334 type2,
3335 rtype[type],
3336 SS1[k + 1],
3337 SS2[l - 1],
3338 SS1[i - 1],
3339 SS2[j + 1],
3340 P);
3341 st1[i - 1] = '(';
3342 st2[j - 1] = ')';
3343 i = k;
3344 j = l;
3345 state = 1;
3346 traced = 1;
3347 break;
3348 }
3349
3350 type2 = pair[S1[i - 4]][S2[j + 3]];
3351 if (type2 && c[i][j] == (c[i - 4][j + 3] + P->internal_loop[5] + P->ninio[2] +
3352 P->mismatch23I[type2][SS1[i - 3]][SS2[j + 2]] +
3353 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + 7 *
3354 extension_cost)) {
3355 k = i - 4;
3356 l = j + 3;
3357 *dG += E_IntLoop(i - k - 1,
3358 l - j - 1,
3359 type2,
3360 rtype[type],
3361 SS1[k + 1],
3362 SS2[l - 1],
3363 SS1[i - 1],
3364 SS2[j + 1],
3365 P);
3366 st1[i - 1] = '(';
3367 st2[j - 1] = ')';
3368 i = k;
3369 j = l;
3370 state = 1;
3371 traced = 1;
3372 break;
3373 }
3374
3375 type2 = pair[S1[i - 3]][S2[j + 4]];
3376 if (type2 && c[i][j] == (c[i - 3][j + 4] + P->internal_loop[5] + P->ninio[2] +
3377 P->mismatch23I[type2][SS1[i - 2]][SS2[j + 3]] +
3378 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + 7 *
3379 extension_cost)) {
3380 k = i - 3;
3381 l = j + 4;
3382 *dG += E_IntLoop(i - k - 1,
3383 l - j - 1,
3384 type2,
3385 rtype[type],
3386 SS1[k + 1],
3387 SS2[l - 1],
3388 SS1[i - 1],
3389 SS2[j + 1],
3390 P);
3391 st1[i - 1] = '(';
3392 st2[j - 1] = ')';
3393 i = k;
3394 j = l;
3395 state = 1;
3396 traced = 1;
3397 break;
3398 }
3399
3400 if (c[i][j] ==
3401 (in[i - 3][j + 3] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 *
3402 extension_cost +
3403 2 * iext_s)) {
3404 k = i;
3405 l = j;
3406 st1[i - 1] = '(';
3407 st2[j - 1] = ')';
3408 i = i - 3;
3409 j = j + 3;
3410 state = 2;
3411 traced = 1;
3412 break;
3413 }
3414
3415 if (c[i][j] ==
3416 (in[i - 4][j + 2] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 *
3417 iext_ass + 2 * extension_cost)) {
3418 k = i;
3419 l = j;
3420 st1[i - 1] = '(';
3421 st2[j - 1] = ')';
3422 i = i - 4;
3423 j = j + 2;
3424 state = 2;
3425 traced = 1;
3426 break;
3427 }
3428
3429 if (c[i][j] ==
3430 (in[i - 2][j + 4] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 *
3431 iext_ass + 2 * extension_cost)) {
3432 k = i;
3433 l = j;
3434 st1[i - 1] = '(';
3435 st2[j - 1] = ')';
3436 i = i - 2;
3437 j = j + 4;
3438 state = 2;
3439 traced = 1;
3440 break;
3441 }
3442
3443 if (c[i][j] ==
3444 (inx[i - 3][j + 1] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass +
3445 iext_ass + 2 * extension_cost)) {
3446 k = i;
3447 l = j;
3448 st1[i - 1] = '(';
3449 st2[j - 1] = ')';
3450 i = i - 3;
3451 j = j + 1;
3452 state = 5;
3453 traced = 1;
3454 break;
3455 }
3456
3457 if (c[i][j] ==
3458 (iny[i - 1][j + 3] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass +
3459 iext_ass + 2 * extension_cost)) {
3460 k = i;
3461 l = j;
3462 st1[i - 1] = '(';
3463 st2[j - 1] = ')';
3464 i = i - 1;
3465 j = j + 3;
3466 state = 6;
3467 traced = 1;
3468 break;
3469 }
3470
3471 if (c[i][j] == (bx[i - 2][j + 1] + 2 * extension_cost + bext + bAU)) {
3472 k = i;
3473 l = j;
3474 st1[i - 1] = '(';
3475 st2[j - 1] = ')';
3476 i = i - 2;
3477 j = j + 1;
3478 state = 3;
3479 traced = 1;
3480 break;
3481 }
3482
3483 if (c[i][j] == (by[i - 1][j + 2] + 2 * extension_cost + bext + bAU)) {
3484 k = i;
3485 l = j;
3486 st1[i - 1] = '(';
3487 st2[j - 1] = ')';
3488 i = i - 1;
3489 j = j + 2;
3490 state = 4;
3491 traced = 1;
3492 break;
3493 }
3494
3495 break;
3496 case 2:
3497 if (in[i][j] == (in[i - 1][j + 1] + iext_s)) {
3498 i--;
3499 j++;
3500 state = 2;
3501 traced = 1;
3502 break;
3503 }
3504
3505 if (in[i][j] == (in[i - 1][j] + iext_ass)) {
3506 i = i - 1;
3507 state = 2;
3508 traced = 1;
3509 break;
3510 }
3511
3512 if (in[i][j] == (in[i][j + 1] + iext_ass)) {
3513 j++;
3514 state = 2;
3515 traced = 1;
3516 break;
3517 }
3518
3519 type2 = pair[S2[j + 1]][S1[i - 1]];
3520 if (type2 &&
3521 in[i][j] == (c[i - 1][j + 1] + P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s)) {
3522 int temp;
3523 temp = k;
3524 k = i - 1;
3525 i = temp;
3526 temp = l;
3527 l = j + 1;
3528 j = temp;
3529 type = pair[S1[i]][S2[j]];
3530 *dG += E_IntLoop(i - k - 1,
3531 l - j - 1,
3532 type2,
3533 rtype[type],
3534 SS1[k + 1],
3535 SS2[l - 1],
3536 SS1[i - 1],
3537 SS2[j + 1],
3538 P);
3539 i = k;
3540 j = l;
3541 state = 1;
3542 traced = 1;
3543 break;
3544 }
3545
3546 case 3:
3547 if (bx[i][j] == (bx[i - 1][j] + bext)) {
3548 i--;
3549 state = 3;
3550 traced = 1;
3551 break;
3552 }
3553
3554 type2 = pair[S2[j]][S1[i - 1]];
3555 if (type2 && bx[i][j] == (c[i - 1][j] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0))) {
3556 int temp;
3557 temp = k;
3558 k = i - 1;
3559 i = temp;
3560 temp = l;
3561 l = j;
3562 j = temp;
3563 type = pair[S1[i]][S2[j]];
3564 *dG += E_IntLoop(i - k - 1,
3565 l - j - 1,
3566 type2,
3567 rtype[type],
3568 SS1[k + 1],
3569 SS2[l - 1],
3570 SS1[i - 1],
3571 SS2[j + 1],
3572 P);
3573 i = k;
3574 j = l;
3575 state = 1;
3576 traced = 1;
3577 break;
3578 }
3579
3580 case 4:
3581 if (by[i][j] == (by[i][j + 1] + bext)) {
3582 j++;
3583
3584 state = 4;
3585 traced = 1;
3586 break;
3587 }
3588
3589 type2 = pair[S2[j + 1]][S1[i]];
3590 if (type2 && by[i][j] == (c[i][j + 1] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0))) {
3591 int temp;
3592 temp = k;
3593 k = i;
3594 i = temp;
3595 temp = l;
3596 l = j + 1;
3597 j = temp;
3598 type = pair[S1[i]][S2[j]];
3599 *dG += E_IntLoop(i - k - 1,
3600 l - j - 1,
3601 type2,
3602 rtype[type],
3603 SS1[k + 1],
3604 SS2[l - 1],
3605 SS1[i - 1],
3606 SS2[j + 1],
3607 P);
3608 i = k;
3609 j = l;
3610 state = 1;
3611 traced = 1;
3612 break;
3613 }
3614
3615 case 5:
3616 if (inx[i][j] == (inx[i - 1][j] + iext_ass)) {
3617 i--;
3618 state = 5;
3619 traced = 1;
3620 break;
3621 }
3622
3623 type2 = pair[S2[j + 1]][S1[i - 1]];
3624 if (type2 &&
3625 inx[i][j] ==
3626 (c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s)) {
3627 int temp;
3628 temp = k;
3629 k = i - 1;
3630 i = temp;
3631 temp = l;
3632 l = j + 1;
3633 j = temp;
3634 type = pair[S1[i]][S2[j]];
3635 *dG += E_IntLoop(i - k - 1,
3636 l - j - 1,
3637 type2,
3638 rtype[type],
3639 SS1[k + 1],
3640 SS2[l - 1],
3641 SS1[i - 1],
3642 SS2[j + 1],
3643 P);
3644 i = k;
3645 j = l;
3646 state = 1;
3647 traced = 1;
3648 break;
3649 }
3650
3651 case 6:
3652 if (iny[i][j] == (iny[i][j + 1] + iext_ass)) {
3653 j++;
3654 state = 6;
3655 traced = 1;
3656 break;
3657 }
3658
3659 type2 = pair[S2[j + 1]][S1[i - 1]];
3660 if (type2 &&
3661 iny[i][j] ==
3662 (c[i - 1][j + 1] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s)) {
3663 int temp;
3664 temp = k;
3665 k = i - 1;
3666 i = temp;
3667 temp = l;
3668 l = j + 1;
3669 j = temp;
3670 type = pair[S1[i]][S2[j]];
3671 *dG += E_IntLoop(i - k - 1,
3672 l - j - 1,
3673 type2,
3674 rtype[type],
3675 SS1[k + 1],
3676 SS2[l - 1],
3677 SS1[i - 1],
3678 SS2[j + 1],
3679 P);
3680 i = k;
3681 j = l;
3682 state = 1;
3683 traced = 1;
3684 break;
3685 }
3686 }
3687 }
3688 if (!traced) {
3689 E = c[i][j];
3690 /**
3691 *** if (i>1) {E -= P->dangle5[type][SS1[i-1]]+extension_cost; *dG+=P->dangle5[type][SS1[i-1]];}
3692 *** if (j<n4){E -= P->dangle3[type][SS2[j+1]]+extension_cost; *dG+=P->dangle3[type][SS2[j+1]];}
3693 *** if (type>2) {E -= P->TerminalAU; *dG+=P->TerminalAU;}
3694 **/
3695 int correction;
3696 correction = vrna_E_ext_stem(type, (i > 1) ? SS1[i - 1] : -1, (j < n4) ? SS2[j + 1] : -1, P);
3697 *dG += correction;
3698 E -= correction + 2 * extension_cost;
3699 if (E != P->DuplexInit + 2 * extension_cost) {
3700 vrna_message_error("backtrack failed in second fold duplex");
3701 } else {
3702 *dG += P->DuplexInit;
3703 st1[i - 1] = '(';
3704 st2[j - 1] = ')';
3705 }
3706 }
3707
3708 if (i > 11)
3709 i--;
3710
3711 if (j < n4 - 10)
3712 j++;
3713
3714 struc = (char *)vrna_alloc(i0 - i + 1 + j - j0 + 1 + 2);
3715 for (k = MAX2(i, 1); k <= i0; k++)
3716 if (!st1[k - 1])
3717 st1[k - 1] = '.';
3718
3719 for (k = j0; k <= j; k++)
3720 if (!st2[k - 1])
3721 st2[k - 1] = '.';
3722
3723 strcpy(struc, st1 + MAX2(i - 1, 0));
3724 strcat(struc, "&");
3725 strcat(struc, st2 + j0 - 1);
3726 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
3727 free(st1);
3728 free(st2);
3729 return struc;
3730 }
3731
3732
3733 duplexT **
Lduplexfold(const char * s1,const char * s2,const int threshold,const int extension_cost,const int alignment_length,const int delta,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)3734 Lduplexfold(const char *s1,
3735 const char *s2,
3736 const int threshold,
3737 const int extension_cost,
3738 const int alignment_length,
3739 const int delta,
3740 const int fast,
3741 const int il_a,
3742 const int il_b,
3743 const int b_a,
3744 const int b_b)
3745 {
3746 /**
3747 *** See variable definition in fduplexfold_XS
3748 **/
3749 int i, j;
3750 int bopen = b_b;
3751 int bext = b_a + extension_cost;
3752 int iopen = il_b;
3753 int iext_s = 2 * (il_a + extension_cost); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
3754 int iext_ass = 50 + il_a + extension_cost; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
3755 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
3756 int i_length;
3757 int max_pos; /* get position of the best hit */
3758 int max_pos_j;
3759 int temp = INF;
3760 int min_j_colonne;
3761 int max = INF;
3762 int *position; /* contains the position of the hits with energy > E */
3763 int *position_j;
3764 /**
3765 *** 1D array corresponding to the standard 2d recursion matrix
3766 *** Makes the computation 20% faster
3767 **/
3768 int *SA;
3769 vrna_md_t md;
3770
3771 /**
3772 *** variable initialization
3773 **/
3774 n1 = (int)strlen(s1);
3775 n2 = (int)strlen(s2);
3776 /**
3777 *** Sequence encoding
3778 **/
3779 set_model_details(&md);
3780 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
3781 update_fold_params();
3782 if (P)
3783 free(P);
3784
3785 P = vrna_params(&md);
3786 make_pair_matrix();
3787 }
3788
3789 encode_seqs(s1, s2);
3790 /**
3791 *** Position of the high score on the target and query sequence
3792 **/
3793 position = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
3794 position_j = (int *)vrna_alloc((delta + n1 + 3 + delta) * sizeof(int));
3795 /**
3796 *** instead of having 4 2-dim arrays we use a unique 1-dim array
3797 *** The mapping 2d -> 1D is done based ont the macro
3798 *** LCI(i,j,l) ((i )*l + j)
3799 *** LINI(i,j,l) ((i + 5)*l + j)
3800 *** LBXI(i,j,l) ((i + 10)*l + j)
3801 *** LBYI(i,j,l) ((i + 15)*l + j)
3802 *** LINIX(i,j,l) ((i + 20)*l + j)
3803 *** LINIY(i,j,l) ((i + 25)*l + j)
3804 ***
3805 *** SA has a length of 5 (number of columns we look back) *
3806 *** * 6 (number of structures we look at) *
3807 *** * length of the sequence
3808 **/
3809 SA = (int *)vrna_alloc(sizeof(int) * 5 * 6 * (n2 + 5));
3810 for (j = n2 + 4; j >= 0; j--) {
3811 SA[(j *
3812 30)] =
3813 SA[(j * 30) + 1] = SA[(j * 30) + 2] = SA[(j * 30) + 3] = SA[(j * 30) + 4] = INF;
3814 SA[(j * 30) +
3815 5] =
3816 SA[(j * 30) + 1 +
3817 5] =
3818 SA[(j * 30) + 2 + 5] = SA[(j * 30) + 3 + 5] = SA[(j * 30) + 4 + 5] = INF;
3819 SA[(j * 30) +
3820 10] =
3821 SA[(j * 30) + 1 +
3822 10] =
3823 SA[(j * 30) + 2 + 10] = SA[(j * 30) + 3 + 10] = SA[(j * 30) + 4 + 10] = INF;
3824 SA[(j * 30) +
3825 15] =
3826 SA[(j * 30) + 1 +
3827 15] =
3828 SA[(j * 30) + 2 + 15] = SA[(j * 30) + 3 + 15] = SA[(j * 30) + 4 + 15] = INF;
3829 SA[(j * 30) +
3830 20] =
3831 SA[(j * 30) + 1 +
3832 20] =
3833 SA[(j * 30) + 2 + 20] = SA[(j * 30) + 3 + 20] = SA[(j * 30) + 4 + 20] = INF;
3834 SA[(j * 30) +
3835 25] =
3836 SA[(j * 30) + 1 +
3837 25] =
3838 SA[(j * 30) + 2 + 25] = SA[(j * 30) + 3 + 25] = SA[(j * 30) + 4 + 25] = INF;
3839 }
3840 i = 10;
3841 i_length = n1 - 9;
3842 while (i < i_length) {
3843 int idx = i % 5;
3844 int idx_1 = (i - 1) % 5;
3845 int idx_2 = (i - 2) % 5;
3846 int idx_3 = (i - 3) % 5;
3847 int idx_4 = (i - 4) % 5;
3848 j = n2 - 9;
3849 while (9 < --j) {
3850 int type, type2;
3851 type = pair[S1[i]][S2[j]];
3852 /**
3853 *** Start duplex
3854 **/
3855 SA[LCI(idx, j, n2)] = type ? P->DuplexInit + 2 * extension_cost : INF;
3856 /**
3857 *** update lin bx by linx liny matrix
3858 **/
3859 type2 = pair[S2[j + 1]][S1[i - 1]];
3860 /**
3861 *** start/extend interior loop
3862 **/
3863 SA[LINI(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
3864 n2)] + P->mismatchI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
3865 SA[LINI(idx_1, j, n2)] + iext_ass);
3866 /**
3867 *** start/extend nx1 target
3868 *** use same type2 as for in
3869 **/
3870 SA[LINIX(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
3871 n2)] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
3872 SA[LINIX(idx_1, j, n2)] + iext_ass);
3873 /**
3874 *** start/extend 1xn target
3875 *** use same type2 as for in
3876 **/
3877 SA[LINIY(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
3878 n2)] + P->mismatch1nI[type2][SS2[j]][SS1[i]] + iopen + iext_s,
3879 SA[LINIY(idx, j + 1, n2)] + iext_ass);
3880 /**
3881 *** extend interior loop
3882 **/
3883 SA[LINI(idx, j, n2)] = MIN2(SA[LINI(idx, j, n2)], SA[LINI(idx, j + 1, n2)] + iext_ass);
3884 SA[LINI(idx, j, n2)] = MIN2(SA[LINI(idx, j, n2)], SA[LINI(idx_1, j + 1, n2)] + iext_s);
3885 /**
3886 *** start/extend bulge target
3887 **/
3888 type2 = pair[S2[j]][S1[i - 1]];
3889 SA[LBXI(idx, j, n2)] = MIN2(SA[LBXI(idx_1, j, n2)] + bext,
3890 SA[LCI(idx_1, j,
3891 n2)] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0));
3892 /**
3893 *** start/extend bulge query
3894 **/
3895 type2 = pair[S2[j + 1]][S1[i]];
3896 SA[LBYI(idx, j, n2)] = MIN2(SA[LBYI(idx, j + 1, n2)] + bext,
3897 SA[LCI(idx, j + 1,
3898 n2)] + bopen + bext + (type2 > 2 ? P->TerminalAU : 0));
3899 /**
3900 ***end update recursion
3901 ***##################### Start stack extension ######################
3902 **/
3903 if (!type)
3904 continue; /**
3905 *** stack extension
3906 **/
3907
3908 SA[LCI(idx, j, n2)] += vrna_E_ext_stem(type, SS1[i - 1], SS2[j + 1], P) + 2 * extension_cost;
3909 /**
3910 *** stack extension
3911 **/
3912 if ((type2 = pair[S1[i - 1]][S2[j + 1]]))
3913 SA[LCI(idx, j, n2)] = MIN2(SA[LCI(idx_1, j + 1,
3914 n2)] + P->stack[rtype[type]][type2] + 2 * extension_cost,
3915 SA[LCI(idx, j, n2)]);
3916
3917 /**
3918 *** 1x0 / 0x1 stack extension
3919 **/
3920 if ((type2 = pair[S1[i - 1]][S2[j + 2]])) {
3921 SA[LCI(idx, j,
3922 n2)] = MIN2(SA[LCI(idx_1, j + 2,
3923 n2)] + P->bulge[1] + P->stack[rtype[type]][type2] + 3 * extension_cost,
3924 SA[LCI(idx, j, n2)]);
3925 }
3926
3927 if ((type2 = pair[S1[i - 2]][S2[j + 1]])) {
3928 SA[LCI(idx, j,
3929 n2)] = MIN2(SA[LCI(idx_2, j + 1,
3930 n2)] + P->bulge[1] + P->stack[type2][rtype[type]] + 3 * extension_cost,
3931 SA[LCI(idx, j, n2)]);
3932 }
3933
3934 /**
3935 *** 1x1 / 2x2 stack extension
3936 **/
3937 if ((type2 = pair[S1[i - 2]][S2[j + 2]])) {
3938 SA[LCI(idx, j,
3939 n2)] = MIN2(SA[LCI(idx_2, j + 2,
3940 n2)] + P->int11[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]] + 4 * extension_cost,
3941 SA[LCI(idx, j, n2)]);
3942 }
3943
3944 if ((type2 = pair[S1[i - 3]][S2[j + 3]])) {
3945 SA[LCI(idx, j,
3946 n2)] = MIN2(SA[LCI(idx_3, j + 3,
3947 n2)] +
3948 P->int22[type2][rtype[type]][SS1[i - 2]][SS1[i - 1]][SS2[j + 1]][SS2[j +
3949 2]] + 6 * extension_cost,
3950 SA[LCI(idx, j, n2)]);
3951 }
3952
3953 /**
3954 *** 1x2 / 2x1 stack extension
3955 *** E_IntLoop(1,2,type2, rtype[type],SS1[i-1], SS2[j+2], SS1[i-1], SS2[j+1], P) corresponds to
3956 *** P->int21[rtype[type]][type2][SS2[j+2]][SS1[i-1]][SS1[i-1]]
3957 **/
3958 if ((type2 = pair[S1[i - 3]][S2[j + 2]])) {
3959 SA[LCI(idx, j,
3960 n2)] = MIN2(SA[LCI(idx_3, j + 2,
3961 n2)] +
3962 P->int21[rtype[type]][type2][SS2[j + 1]][SS1[i - 2]][SS1[i - 1]] + 5 * extension_cost,
3963 SA[LCI(idx, j, n2)]);
3964 }
3965
3966 if ((type2 = pair[S1[i - 2]][S2[j + 3]])) {
3967 SA[LCI(idx, j,
3968 n2)] = MIN2(SA[LCI(idx_2, j + 3,
3969 n2)] +
3970 P->int21[type2][rtype[type]][SS1[i - 1]][SS2[j + 1]][SS2[j + 2]] + 5 * extension_cost,
3971 SA[LCI(idx, j, n2)]);
3972 }
3973
3974 /**
3975 *** 2x3 / 3x2 stack extension
3976 **/
3977 if ((type2 = pair[S1[i - 4]][S2[j + 3]])) {
3978 SA[LCI(idx, j, n2)] = MIN2(SA[LCI(idx_4, j + 3,
3979 n2)] + P->internal_loop[5] + P->ninio[2] +
3980 P->mismatch23I[type2][SS1[i - 3]][SS2[j + 2]] +
3981 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + 7 * extension_cost,
3982 SA[LCI(idx, j, n2)]);
3983 }
3984
3985 if ((type2 = pair[S1[i - 3]][S2[j + 4]])) {
3986 SA[LCI(idx, j, n2)] = MIN2(SA[LCI(idx_3, j + 4,
3987 n2)] + P->internal_loop[5] + P->ninio[2] +
3988 P->mismatch23I[type2][SS1[i - 2]][SS2[j + 3]] +
3989 P->mismatch23I[rtype[type]][SS2[j + 1]][SS1[i - 1]] + 7 * extension_cost,
3990 SA[LCI(idx, j, n2)]);
3991 }
3992
3993 /**
3994 *** So now we have to handle 1x3, 3x1, 3x3, and mxn m,n > 3
3995 **/
3996 /**
3997 *** 3x3 or more
3998 **/
3999 SA[LCI(idx, j,
4000 n2)] = MIN2(SA[LINI(idx_3, j + 3,
4001 n2)] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + 2 * iext_s + 2 * extension_cost,
4002 SA[LCI(idx, j, n2)]);
4003 /**
4004 *** 2xn or more
4005 **/
4006 SA[LCI(idx, j,
4007 n2)] = MIN2(SA[LINI(idx_4, j + 2,
4008 n2)] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + 2 * extension_cost,
4009 SA[LCI(idx, j, n2)]);
4010 /**
4011 *** nx2 or more
4012 **/
4013 SA[LCI(idx, j,
4014 n2)] = MIN2(SA[LINI(idx_2, j + 4,
4015 n2)] + P->mismatchI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_s + 2 * iext_ass + 2 * extension_cost,
4016 SA[LCI(idx, j, n2)]);
4017 /**
4018 *** nx1 n>2
4019 **/
4020 SA[LCI(idx, j,
4021 n2)] = MIN2(SA[LINIX(idx_3, j + 1,
4022 n2)] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + 2 * extension_cost,
4023 SA[LCI(idx, j, n2)]);
4024 /**
4025 *** 1xn n>2
4026 **/
4027 SA[LCI(idx, j,
4028 n2)] = MIN2(SA[LINIY(idx_1, j + 3,
4029 n2)] + P->mismatch1nI[rtype[type]][SS1[i - 1]][SS2[j + 1]] + iext_ass + iext_ass + 2 * extension_cost,
4030 SA[LCI(idx, j, n2)]);
4031 /**
4032 *** nx0 n>1
4033 **/
4034 int bAU;
4035 bAU = (type > 2 ? P->TerminalAU : 0);
4036 SA[LCI(idx, j,
4037 n2)] =
4038 MIN2(SA[LBXI(idx_2, j + 1, n2)] + 2 * extension_cost + bext + bAU, SA[LCI(idx, j, n2)]);
4039 /**
4040 *** 0xn n>1
4041 **/
4042 SA[LCI(idx, j,
4043 n2)] =
4044 MIN2(SA[LBYI(idx_1, j + 2, n2)] + 2 * extension_cost + bext + bAU, SA[LCI(idx, j, n2)]);
4045 temp = min_colonne;
4046
4047 min_colonne = MIN2(SA[LCI(idx, j, n2)] + vrna_E_ext_stem(rtype[type], SS2[j - 1], SS1[i + 1],
4048 P) + 2 * extension_cost,
4049 min_colonne);
4050 if (temp > min_colonne)
4051 min_j_colonne = j;
4052 }
4053 if (max >= min_colonne) {
4054 max = min_colonne;
4055 max_pos = i;
4056 max_pos_j = min_j_colonne;
4057 }
4058
4059 position[i + delta] = min_colonne;
4060 min_colonne = INF;
4061 position_j[i + delta] = min_j_colonne;
4062 i++;
4063 }
4064 /* printf("MAX: %d",max); */
4065 free(S1);
4066 free(S2);
4067 free(SS1);
4068 free(SS2);
4069 if (max < threshold) {
4070 find_max(position,
4071 position_j,
4072 delta,
4073 threshold,
4074 alignment_length,
4075 s1,
4076 s2,
4077 extension_cost,
4078 fast,
4079 il_a,
4080 il_b,
4081 b_a,
4082 b_b);
4083 }
4084
4085 if (max < INF) {
4086 plot_max(max,
4087 max_pos,
4088 max_pos_j,
4089 alignment_length,
4090 s1,
4091 s2,
4092 extension_cost,
4093 fast,
4094 il_a,
4095 il_b,
4096 b_a,
4097 b_b);
4098 }
4099
4100 free(SA);
4101 free(position);
4102 free(position_j);
4103 return NULL;
4104 }
4105
4106
4107 PRIVATE void
find_max(const int * position,const int * position_j,const int delta,const int threshold,const int alignment_length,const char * s1,const char * s2,const int extension_cost,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)4108 find_max(const int *position,
4109 const int *position_j,
4110 const int delta,
4111 const int threshold,
4112 const int alignment_length,
4113 const char *s1,
4114 const char *s2,
4115 const int extension_cost,
4116 const int fast,
4117 const int il_a,
4118 const int il_b,
4119 const int b_a,
4120 const int b_b)
4121 {
4122 int pos = n1 - 9;
4123
4124 if (fast == 1) {
4125 while (10 < pos--) {
4126 int temp_min = 0;
4127 if (position[pos + delta] < (threshold)) {
4128 int search_range;
4129 search_range = delta + 1;
4130 while (--search_range)
4131 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
4132 temp_min = search_range;
4133
4134 pos -= temp_min;
4135 int max_pos_j;
4136 max_pos_j = position_j[pos + delta];
4137 int max;
4138 max = position[pos + delta];
4139 printf("target upper bound %d: query lower bound %d (%5.2f) \n",
4140 pos - 10,
4141 max_pos_j - 10,
4142 ((double)max) / 100);
4143 pos = MAX2(10, pos + temp_min - delta);
4144 }
4145 }
4146 } else if (fast == 2) {
4147 pos = n1 - 9;
4148 while (10 < pos--) {
4149 int temp_min = 0;
4150 if (position[pos + delta] < (threshold)) {
4151 int search_range;
4152 search_range = delta + 1;
4153 while (--search_range)
4154 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
4155 temp_min = search_range;
4156
4157 pos -= temp_min;
4158 int max_pos_j;
4159 max_pos_j = position_j[pos + delta];
4160 /* max_pos_j und pos entsprechen die realen position
4161 * in der erweiterten sequenz.
4162 * pos=1 -> position 1 in the sequence (and not 0 like in C)
4163 * max_pos_j -> position 1 in the sequence ( not 0 like in C)
4164 */
4165 int alignment_length2;
4166 alignment_length2 = MIN2(n1, n2);
4167 int begin_t = MAX2(11, pos - alignment_length2 + 1); /* 10 */
4168 int end_t = MIN2(n1 - 10, pos + 1);
4169 int begin_q = MAX2(11, max_pos_j - 1); /* 10 */
4170 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
4171 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2 + 20));
4172 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2 + 20));
4173 strcpy(s3, "NNNNNNNNNN");
4174 strcpy(s4, "NNNNNNNNNN");
4175 strncat(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
4176 strncat(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
4177 strcat(s3, "NNNNNNNNNN");
4178 strcat(s4, "NNNNNNNNNN");
4179 s3[end_t - begin_t + 1 + 20] = '\0';
4180 s4[end_q - begin_q + 1 + 20] = '\0';
4181 duplexT test;
4182 test = fduplexfold(s3, s4, extension_cost, il_a, il_b, b_a, b_b);
4183 if (test.energy * 100 < threshold) {
4184 int l1 = strchr(test.structure, '&') - test.structure;
4185 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) [%5.2f] i:%d,j:%d <%5.2f>\n", test.structure,
4186 begin_t - 10 + test.i - l1 - 10,
4187 begin_t - 10 + test.i - 1 - 10,
4188 begin_q - 10 + test.j - 1 - 10,
4189 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2 - 10,
4190 test.energy, test.energy_backtrack, pos - 10, max_pos_j - 10,
4191 ((double)position[pos + delta]) / 100);
4192 pos = MAX2(10, pos + temp_min - delta);
4193 }
4194
4195 free(s3);
4196 free(s4);
4197 free(test.structure);
4198 }
4199 }
4200 }
4201
4202 #if 0
4203 else if (fast == 3) {
4204 pos = n1 - 9;
4205 while (10 < pos--) {
4206 int temp_min = 0;
4207 if (position[pos + delta] < (threshold)) {
4208 int search_range;
4209 search_range = delta + 1;
4210 while (--search_range)
4211 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
4212 temp_min = search_range;
4213
4214 pos -= temp_min;
4215 int max_pos_j;
4216 max_pos_j = position_j[pos + delta];
4217 /* max_pos_j und pos entsprechen die realen position
4218 * in der erweiterten sequenz.
4219 * pos=1 -> position 1 in the sequence (and not 0 like in C)
4220 * max_pos_j -> position 1 in the sequence ( not 0 like in C)
4221 */
4222 //Here we can start the reverse recursion for the
4223 //Starting from the reported pos / max_pos_j we start the recursion
4224 //We have to be careful with the fact that all energies are inverted.
4225
4226 int alignment_length2;
4227 //Select the smallest interaction length in order to define the new interaction length
4228 alignment_length2 = MIN2(n1 - pos + 1, max_pos_j - 1 + 1);
4229 //
4230 int begin_t = MAX2(11, pos - alignment_length2 + 1); /* 10 */
4231 int end_t = MIN2(n1 - 10, pos + 1);
4232 int begin_q = MAX2(11, max_pos_j - 1); /* 10 */
4233 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
4234 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2 + 20));
4235 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2 + 20));
4236 strcpy(s3, "NNNNNNNNNN");
4237 strcpy(s4, "NNNNNNNNNN");
4238 strncat(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
4239 strncat(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
4240 strcat(s3, "NNNNNNNNNN");
4241 strcat(s4, "NNNNNNNNNN");
4242 s3[end_t - begin_t + 1 + 20] = '\0';
4243 s4[end_q - begin_q + 1 + 20] = '\0';
4244 duplexT test;
4245 test = fduplexfold(s4, s3, extension_cost, il_a, il_b, b_a, b_b);
4246 if (test.energy * 100 < threshold) {
4247 int structureLength = strlen(test.structure);
4248 int l1 = strchr(test.structure, '&') - test.structure;
4249 int start_t, end_t, start_q, end_q;
4250
4251
4252 /*reverse structure string*/
4253 char *reverseStructure = (char *)vrna_alloc(sizeof(char) * (structureLength + 1));
4254 int posStructure;
4255 for (posStructure = l1 + 1; posStructure < structureLength; posStructure++) {
4256 if (test.structure[posStructure] == ')')
4257 reverseStructure[posStructure - l1 - 1] = '(';
4258 else
4259 reverseStructure[posStructure - l1 - 1] = test.structure[posStructure];
4260 }
4261 reverseStructure[structureLength - 1 - l1] = '&';
4262 for (posStructure = 0; posStructure < l1; posStructure++) {
4263 if (test.structure[posStructure] == '(')
4264 reverseStructure[structureLength + posStructure - l1] = ')';
4265 else
4266 reverseStructure[structureLength + posStructure - l1] = test.structure[posStructure];
4267 }
4268 reverseStructure[structureLength] = '\0';
4269 // l1=strchr(reverse.structure, '&')-test.structure;
4270
4271
4272 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) [%5.2f] i:%d,j:%d <%5.2f>\n",
4273 reverseStructure,
4274 begin_t - 10 + test.j - 1 - 10,
4275 (begin_t - 11) + test.j + strlen(test.structure) - l1 - 2 - 10,
4276 begin_q - 10 + test.i - l1 - 10,
4277 begin_q - 10 + test.i - 1 - 10,
4278 test.energy,
4279 test.energy_backtrack,
4280 pos,
4281 max_pos_j,
4282 ((double)position[pos + delta]) / 100);
4283 pos = MAX2(10, pos + temp_min - delta);
4284 }
4285
4286 free(s3);
4287 free(s4);
4288 free(test.structure);
4289 }
4290 }
4291 }
4292 #endif
4293 else {
4294 pos = n1 - 9;
4295 while (10 < pos--) {
4296 int temp_min = 0;
4297 if (position[pos + delta] < (threshold)) {
4298 int search_range;
4299 search_range = delta + 1;
4300 while (--search_range)
4301 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
4302 temp_min = search_range;
4303
4304 pos -= temp_min;
4305 int max_pos_j;
4306 max_pos_j = position_j[pos + delta];
4307 /* max_pos_j und pos entsprechen die realen position
4308 * in der erweiterten sequenz.
4309 * pos=1 -> position 1 in the sequence (and not 0 like in C)
4310 * max_pos_j -> position 1 in the sequence ( not 0 like in C)
4311 */
4312 int alignment_length2;
4313 alignment_length2 = MIN2(n1, n2);
4314 int begin_t = MAX2(11, pos - alignment_length2 + 1); /* 10 */
4315 int end_t = MIN2(n1 - 10, pos + 1);
4316 int begin_q = MAX2(11, max_pos_j - 1); /* 10 */
4317 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
4318 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
4319 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
4320 strncpy(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
4321 strncpy(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
4322 s3[end_t - begin_t + 1] = '\0';
4323 s4[end_q - begin_q + 1] = '\0';
4324 duplexT test;
4325 test = duplexfold(s3, s4, extension_cost);
4326 if (test.energy * 100 < threshold) {
4327 int l1 = strchr(test.structure, '&') - test.structure;
4328 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) i:%d,j:%d <%5.2f>\n", test.structure,
4329 begin_t - 10 + test.i - l1,
4330 begin_t - 10 + test.i - 1,
4331 begin_q - 10 + test.j - 1,
4332 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2,
4333 test.energy, pos - 10, max_pos_j - 10, ((double)position[pos + delta]) / 100);
4334 pos = MAX2(10, pos + temp_min - delta);
4335 }
4336
4337 free(s3);
4338 free(s4);
4339 free(test.structure);
4340 }
4341 }
4342 }
4343 }
4344
4345
4346 PRIVATE void
plot_max(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1,const char * s2,const int extension_cost,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)4347 plot_max(const int max,
4348 const int max_pos,
4349 const int max_pos_j,
4350 const int alignment_length,
4351 const char *s1,
4352 const char *s2,
4353 const int extension_cost,
4354 const int fast,
4355 const int il_a,
4356 const int il_b,
4357 const int b_a,
4358 const int b_b)
4359 {
4360 if (fast == 1) {
4361 printf("target upper bound %d: query lower bound %d (%5.2f)\n", max_pos - 10, max_pos_j - 10,
4362 ((double)max) / 100);
4363 } else if (fast == 2) {
4364 int alignment_length2;
4365 alignment_length2 = MIN2(n1, n2);
4366 int begin_t = MAX2(11, max_pos - alignment_length2 + 1); /* 10 */
4367 int end_t = MIN2(n1 - 10, max_pos + 1);
4368 int begin_q = MAX2(11, max_pos_j - 1); /* 10 */
4369 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
4370 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2 + 20));
4371 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2 + 20));
4372 strcpy(s3, "NNNNNNNNNN");
4373 strcpy(s4, "NNNNNNNNNN");
4374 strncat(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
4375 strncat(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
4376 strcat(s3, "NNNNNNNNNN");
4377 strcat(s4, "NNNNNNNNNN");
4378 s3[end_t - begin_t + 1 + 20] = '\0';
4379 s4[end_q - begin_q + 1 + 20] = '\0';
4380 duplexT test;
4381 test = fduplexfold(s3, s4, extension_cost, il_a, il_b, b_a, b_b);
4382 int l1 = strchr(test.structure, '&') - test.structure;
4383 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) [%5.2f] i:%d,j:%d <%5.2f>\n", test.structure,
4384 begin_t - 10 + test.i - l1 - 10,
4385 begin_t - 10 + test.i - 1 - 10,
4386 begin_q - 10 + test.j - 1 - 10,
4387 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2 - 10,
4388 test.energy, test.energy_backtrack, max_pos - 10, max_pos_j - 10, ((double)max) / 100);
4389 free(s3);
4390 free(s4);
4391 free(test.structure);
4392 } else {
4393 duplexT test;
4394 int alignment_length2;
4395 alignment_length2 = MIN2(n1, n2);
4396 int begin_t = MAX2(11, max_pos - alignment_length2 + 1);
4397 int end_t = MIN2(n1 - 10, max_pos + 1);
4398 int begin_q = MAX2(11, max_pos_j - 1);
4399 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length2 - 1);
4400 char *s3 = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
4401 char *s4 = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
4402 strncpy(s3, (s1 + begin_t - 1), end_t - begin_t + 1);
4403 strncpy(s4, (s2 + begin_q - 1), end_q - begin_q + 1);
4404 s3[end_t - begin_t + 1] = '\0';
4405 s4[end_q - begin_q + 1] = '\0';
4406 test = duplexfold(s3, s4, extension_cost);
4407 int l1 = strchr(test.structure, '&') - test.structure;
4408 printf("%s %3d,%-3d : %3d,%-3d (%5.2f) i:%d,j:%d <%5.2f>\n", test.structure,
4409 begin_t - 10 + test.i - l1,
4410 begin_t - 10 + test.i - 1,
4411 begin_q - 10 + test.j - 1,
4412 (begin_q - 11) + test.j + (int)strlen(test.structure) - l1 - 2,
4413 test.energy, max_pos - 10, max_pos_j - 10, ((double)max) / 100);
4414 free(s3);
4415 free(s4);
4416 free(test.structure);
4417 }
4418 }
4419
4420
4421 PRIVATE void
update_dfold_params(void)4422 update_dfold_params(void)
4423 {
4424 vrna_md_t md;
4425
4426 if (P)
4427 free(P);
4428
4429 set_model_details(&md);
4430 P = vrna_params(&md);
4431 make_pair_matrix();
4432 }
4433
4434
4435 PRIVATE void
encode_seqs(const char * s1,const char * s2)4436 encode_seqs(const char *s1,
4437 const char *s2)
4438 {
4439 unsigned int i, l;
4440
4441 l = strlen(s1);
4442 S1 = encode_seq(s1);
4443 SS1 = (short *)vrna_alloc(sizeof(short) * (l + 1));
4444 /* SS1 exists only for the special X K and I bases and energy_set!=0 */
4445
4446 for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
4447 SS1[i] = alias[S1[i]]; /* for mismatches of nostandard bases */
4448
4449 l = strlen(s2);
4450 S2 = encode_seq(s2);
4451 SS2 = (short *)vrna_alloc(sizeof(short) * (l + 1));
4452 /* SS2 exists only for the special X K and I bases and energy_set!=0 */
4453
4454 for (i = 1; i <= l; i++) /* make numerical encoding of sequence */
4455 SS2[i] = alias[S2[i]]; /* for mismatches of nostandard bases */
4456 }
4457
4458
4459 PRIVATE short *
encode_seq(const char * sequence)4460 encode_seq(const char *sequence)
4461 {
4462 unsigned int i, l;
4463 short *S;
4464
4465 l = strlen(sequence);
4466 S = (short *)vrna_alloc(sizeof(short) * (l + 2));
4467 S[0] = (short)l;
4468
4469 /* make numerical encoding of sequence */
4470 for (i = 1; i <= l; i++)
4471 S[i] = (short)encode_char(toupper(sequence[i - 1]));
4472
4473 /* for circular folding add first base at position n+1 */
4474 S[l + 1] = S[1];
4475
4476 return S;
4477 }
4478
4479
4480 int
arraySize(duplexT ** array)4481 arraySize(duplexT **array)
4482 {
4483 int site_count = 0;
4484
4485 while (array[site_count] != NULL)
4486 site_count++;
4487 return site_count;
4488 }
4489
4490
4491 void
freeDuplexT(duplexT ** array)4492 freeDuplexT(duplexT **array)
4493 {
4494 int size = arraySize(array);
4495
4496 while (--size) {
4497 free(array[size]->structure);
4498 free(array[size]);
4499 }
4500 free(array[0]->structure);
4501 free(array);
4502 }
4503