1 /*
2 * compute the duplex structure of two RNA strands,
3 * allowing only inter-strand base pairs.
4 * see cofold() for computing hybrid structures without
5 * restriction.
6 * Ivo Hofacker
7 * Vienna RNA package
8 *
9 */
10
11
12 /*
13 * library containing the function used in rnaplex
14 * the program rnaplex uses the following function
15 * Lduplexfold: finds high scoring segments
16 * it stores the end-position of these segments in an array
17 * and call then for each of these positions the duplexfold function
18 * which allows one to make backtracking for each of the high scoring position
19 * It allows one to find suboptimal partially overlapping (depends on a a parameter)
20 * duplexes between a long RNA and a shorter one.
21 * Contrarly to RNAduplex, the energy model is not in E~log(N),
22 * where N is the length of an interial loop but used an affine model,
23 * where the extension and begin parameter are fitted to the energy
24 * parameter used by RNAduplex. This allows one to check for duplex between a short RNA(20nt)
25 * and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
26 * in about 50 minutes.
27 * The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
28 * given threshold this value is stored in an array. When the alignment score goes
29 * then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
30 * to find all non-overlapping high-scoring segments.
31 * For more information check "durbin, biological sequence analysis"
32 */
33
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <math.h>
41 #include <ctype.h>
42 #include <string.h>
43 #include "ViennaRNA/utils/basic.h"
44 #include "ViennaRNA/params/constants.h"
45 #include "ViennaRNA/params/default.h"
46 #include "ViennaRNA/fold_vars.h"
47 #include "ViennaRNA/fold.h"
48 #include "ViennaRNA/pair_mat.h"
49 #include "ViennaRNA/params/basic.h"
50 #include "ViennaRNA/loops/all.h"
51 #include "ViennaRNA/plex.h"
52 #include "ViennaRNA/ali_plex.h"
53
54 /**
55 *** Due to the great similarity between functions,
56 *** more annotation can be found in plex.c
57 **/
58
59 PRIVATE short *
60 encode_seq(const char *seq);
61
62
63 PRIVATE void
64 update_dfold_params(void);
65
66
67 /**
68 *** aliduplexfold(_XS)/alibacktrack(_XS) computes duplex interaction with standard energy and considers extension_cost
69 *** alifind_max(_XS)/aliplot_max(_XS) find suboptimals and MFE
70 **/
71 PRIVATE duplexT
72 aliduplexfold(const char *s1[],
73 const char *s2[],
74 const int extension_cost);
75
76
77 PRIVATE char *
78 alibacktrack(int n3,
79 int n4,
80 int i,
81 int j,
82 const short *s1[],
83 const short *s2[],
84 const int extension_cost);
85
86
87 PRIVATE void
88 alifind_max(const int *position,
89 const int *position_j,
90 const int delta,
91 const int threshold,
92 const int alignment_length,
93 const char *s1[],
94 const char *s2[],
95 const int extension_cost,
96 const int fast);
97
98
99 PRIVATE void
100 aliplot_max(const int max,
101 const int max_pos,
102 const int max_pos_j,
103 const int alignment_length,
104 const char *s1[],
105 const char *s2[],
106 const int extension_cost,
107 const int fast);
108
109
110 PRIVATE duplexT
111 aliduplexfold_XS(const char *s1[],
112 const char *s2[],
113 const int **access_s1,
114 const int **access_s2,
115 const int i_pos,
116 const int j_pos,
117 const int threshold,
118 const int i_flag,
119 const int j_flag);
120
121
122 PRIVATE char *
123 alibacktrack_XS(int n3,
124 int n4,
125 int i,
126 int j,
127 const short *s1[],
128 const short *s2[],
129 const int **access_s1,
130 const int **access_s2,
131 const int i_flag,
132 const int j_flag);
133
134
135 PRIVATE void
136 alifind_max_XS(const int *position,
137 const int *position_j,
138 const int delta,
139 const int threshold,
140 const int alignment_length,
141 const char *s1[],
142 const char *s2[],
143 const int **access_s1,
144 const int **access_s2,
145 const int fast);
146
147
148 PRIVATE void
149 aliplot_max_XS(const int max,
150 const int max_pos,
151 const int max_pos_j,
152 const int alignment_length,
153 const char *s1[],
154 const char *s2[],
155 const int **access_s1,
156 const int **access_s2,
157 const int fast);
158
159
160 /**
161 *** computes covariance score
162 **/
163
164 PRIVATE int
165 covscore(const int *types,
166 int n_seq);
167
168
169 extern double cv_fact; /* from alifold.c, default 1 */
170 extern double nc_fact;
171
172
173 /*@unused@*/
174
175 #define MAXSECTORS 500 /* dimension for a backtrack array */
176 #define LOCALITY 0. /* locality parameter for base-pairs */
177
178 PRIVATE vrna_param_t *P = NULL;
179 PRIVATE int **c = NULL;
180 PRIVATE int **lc = NULL, **lin = NULL, **lbx = NULL, **lby = NULL, **linx = NULL,
181 **liny = NULL;
182
183
184 PRIVATE int n1, n2;
185 PRIVATE int n3, n4;
186 PRIVATE int delay_free = 0;
187
188
189 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
190
191
192 /*----------------------------------------------ALIDUPLEXFOLD-----------------------------------------------------------------------------------------------------------*/
193 PRIVATE duplexT
aliduplexfold(const char * s1[],const char * s2[],const int extension_cost)194 aliduplexfold(const char *s1[],
195 const char *s2[],
196 const int extension_cost)
197 {
198 int i, j, s, n_seq, Emin = INF, i_min = 0, j_min = 0;
199 char *struc;
200 duplexT mfe;
201 vrna_md_t md;
202 short **S1, **S2;
203 int *type;
204
205 n3 = (int)strlen(s1[0]);
206 n4 = (int)strlen(s2[0]);
207 for (s = 0; s1[s] != NULL; s++);
208 n_seq = s;
209 for (s = 0; s2[s] != NULL; s++);
210 if (n_seq != s)
211 vrna_message_error("unequal number of sequences in aliduplexfold()\n");
212
213 set_model_details(&md);
214 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
215 update_fold_params();
216 if (P)
217 free(P);
218
219 P = vrna_params(&md);
220 make_pair_matrix();
221 }
222
223 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
224 for (i = 1; i <= n3; i++)
225 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
226
227 S1 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
228 S2 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
229 for (s = 0; s < n_seq; s++) {
230 if (strlen(s1[s]) != n3)
231 vrna_message_error("uneqal seqence lengths");
232
233 if (strlen(s2[s]) != n4)
234 vrna_message_error("uneqal seqence lengths");
235
236 S1[s] = encode_seq(s1[s]);
237 S2[s] = encode_seq(s2[s]);
238 }
239 type = (int *)vrna_alloc(n_seq * sizeof(int));
240
241 for (i = 1; i <= n3; i++) {
242 for (j = n4; j > 0; j--) {
243 int k, l, E, psc;
244 for (s = 0; s < n_seq; s++)
245 type[s] = pair[S1[s][i]][S2[s][j]];
246 psc = covscore(type, n_seq);
247 for (s = 0; s < n_seq; s++)
248 if (type[s] == 0)
249 type[s] = 7;
250
251 c[i][j] = (psc >= MINPSCORE) ? (n_seq * (P->DuplexInit + 2 * extension_cost)) : INF;
252 if (psc < MINPSCORE)
253 continue;
254
255 for (s = 0; s < n_seq; s++)
256 c[i][j] +=
257 vrna_E_ext_stem(type[s], (i > 1) ? S1[s][i - 1] : -1, (j < n4) ? S2[s][j + 1] : -1,
258 P) + 2 * extension_cost;
259 for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
260 for (l = j + 1; l <= n4; l++) {
261 int type2;
262 if (i - k + l - j - 2 > MAXLOOP)
263 break;
264
265 if (c[k][l] > INF / 2)
266 continue;
267
268 for (E = s = 0; s < n_seq; s++) {
269 type2 = pair[S1[s][k]][S2[s][l]];
270 if (type2 == 0)
271 type2 = 7;
272
273 E += E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type[s]],
274 S1[s][k + 1], S2[s][l - 1], S1[s][i - 1], S2[s][j + 1],
275 P) + (i - k + l - j) * extension_cost;
276 }
277 c[i][j] = MIN2(c[i][j], c[k][l] + E);
278 }
279 }
280 c[i][j] -= psc;
281 E = c[i][j];
282 for (s = 0; s < n_seq; s++)
283 E +=
284 vrna_E_ext_stem(rtype[type[s]], (j > 1) ? S2[s][j - 1] : -1, (i < n3) ? S1[s][i + 1] : -1,
285 P) + 2 * extension_cost;
286 if (E < Emin) {
287 Emin = E;
288 i_min = i;
289 j_min = j;
290 }
291 }
292 }
293 struc =
294 alibacktrack(n3, n4, i_min, j_min, (const short int **)S1, (const short int **)S2, extension_cost);
295 if (i_min < n3)
296 i_min++;
297
298 if (j_min > 1)
299 j_min--;
300
301 int size;
302 size = strlen(struc) - 1;
303 Emin -= size * n_seq * extension_cost;
304 mfe.i = i_min;
305 mfe.j = j_min;
306 mfe.energy = (float)(Emin / (100. * n_seq));
307 mfe.structure = struc;
308 if (!delay_free) {
309 for (i = 1; i <= n3; i++)
310 free(c[i]);
311 free(c);
312 }
313
314 for (s = 0; s < n_seq; s++) {
315 free(S1[s]);
316 free(S2[s]);
317 }
318 free(S1);
319 free(S2);
320 free(type);
321 return mfe;
322 }
323
324
325 PRIVATE char *
alibacktrack(int n3,int n4,int i,int j,const short * S1[],const short * S2[],const int extension_cost)326 alibacktrack(int n3,
327 int n4,
328 int i,
329 int j,
330 const short *S1[],
331 const short *S2[],
332 const int extension_cost)
333 {
334 /* backtrack structure going backwards from i, and forwards from j
335 * return structure in bracket notation with & as separator */
336 int k, l, *type, type2, E, traced, i0, j0, s, n_seq;
337 char *st1, *st2, *struc;
338
339 for (s = 0; S1[s] != NULL; s++);
340 n_seq = s;
341 for (s = 0; S2[s] != NULL; s++);
342 if (n_seq != s)
343 vrna_message_error("unequal number of sequences in alibacktrack()\n");
344
345 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
346 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
347 type = (int *)vrna_alloc(n_seq * sizeof(int));
348
349 i0 = MIN2(i + 1, n3);
350 j0 = MAX2(j - 1, 1);
351
352 while (i > 0 && j <= n4) {
353 int psc;
354 E = c[i][j];
355 traced = 0;
356 st1[i - 1] = '(';
357 st2[j - 1] = ')';
358 for (s = 0; s < n_seq; s++)
359 type[s] = pair[S1[s][i]][S2[s][j]];
360 psc = covscore(type, n_seq);
361 for (s = 0; s < n_seq; s++)
362 if (type[s] == 0)
363 type[s] = 7;
364
365 E += psc;
366 for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
367 for (l = j + 1; l <= n4; l++) {
368 int LE;
369 if (i - k + l - j - 2 > MAXLOOP)
370 break;
371
372 if (c[k][l] > INF / 2)
373 continue;
374
375 for (s = LE = 0; s < n_seq; s++) {
376 type2 = pair[S1[s][k]][S2[s][l]];
377 if (type2 == 0)
378 type2 = 7;
379
380 LE += E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type[s]],
381 S1[s][k + 1], S2[s][l - 1], S1[s][i - 1], S2[s][j + 1],
382 P) + (i - k + l - j) * extension_cost;
383 }
384 if (E == c[k][l] + LE) {
385 traced = 1;
386 i = k;
387 j = l;
388 break;
389 }
390 }
391 if (traced)
392 break;
393 }
394 if (!traced) {
395 for (s = 0; s < n_seq; s++)
396 E -=
397 vrna_E_ext_stem(type[s], (i > 1) ? S1[s][i - 1] : -1, (j < n4) ? S2[s][j + 1] : -1,
398 P) + 2 * extension_cost;
399 if (E != n_seq * P->DuplexInit + n_seq * 2 * extension_cost)
400 vrna_message_error("backtrack failed in aliduplex");
401 else
402 break;
403 }
404 }
405 if (i > 1)
406 i--;
407
408 if (j < n4)
409 j++;
410
411 struc = (char *)vrna_alloc(i0 - i + 1 + j - j0 + 1 + 2);
412 for (k = MAX2(i, 1); k <= i0; k++)
413 if (!st1[k - 1])
414 st1[k - 1] = '.';
415
416 for (k = j0; k <= j; k++)
417 if (!st2[k - 1])
418 st2[k - 1] = '.';
419
420 strcpy(struc, st1 + MAX2(i - 1, 0));
421 strcat(struc, "&");
422 strcat(struc, st2 + j0 - 1);
423
424 /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j); */
425 free(st1);
426 free(st2);
427 free(type);
428
429 return struc;
430 }
431
432
433 duplexT **
aliLduplexfold(const char * s1[],const char * s2[],const int threshold,const int extension_cost,const int alignment_length,const int delta,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)434 aliLduplexfold(const char *s1[],
435 const char *s2[],
436 const int threshold,
437 const int extension_cost,
438 const int alignment_length,
439 const int delta,
440 const int fast,
441 const int il_a,
442 const int il_b,
443 const int b_a,
444 const int b_b)
445 {
446 short **S1, **S2;
447 int *type, type2;
448 int i, j, s, n_seq;
449
450 s = 0;
451 int bopen = b_b;
452 int bext = b_a + extension_cost;
453 int iopen = il_b;
454 int iext_s = 2 * (il_a + extension_cost); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
455 int iext_ass = 50 + il_a + extension_cost; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
456 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
457 int i_length;
458 int max_pos; /* get position of the best hit */
459 int max_pos_j;
460 int temp;
461 int min_j_colonne;
462 int max = INF;
463 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
464 int *position; /* contains the position of the hits with energy > E */
465 int *position_j;
466
467
468 n1 = (int)strlen(s1[0]);
469 n2 = (int)strlen(s2[0]);
470 for (s = 0; s1[s]; s++);
471 n_seq = s;
472 for (s = 0; s2[s]; s++);
473 if (n_seq != s)
474 vrna_message_error("unequal number of sequences in aliduplexfold()\n");
475
476 position = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
477 position_j = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
478
479 if ((!P) || (fabs(P->temperature - temperature) > 1e-6))
480 update_dfold_params();
481
482 lc = (int **)vrna_alloc(sizeof(int *) * 5);
483 lin = (int **)vrna_alloc(sizeof(int *) * 5);
484 lbx = (int **)vrna_alloc(sizeof(int *) * 5);
485 lby = (int **)vrna_alloc(sizeof(int *) * 5);
486 linx = (int **)vrna_alloc(sizeof(int *) * 5);
487 liny = (int **)vrna_alloc(sizeof(int *) * 5);
488
489 for (i = 0; i <= 4; i++) {
490 lc[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
491 lin[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
492 lbx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
493 lby[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
494 linx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
495 liny[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
496 }
497
498
499 S1 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
500 S2 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
501 for (s = 0; s < n_seq; s++) {
502 if (strlen(s1[s]) != n1)
503 vrna_message_error("uneqal seqence lengths");
504
505 if (strlen(s2[s]) != n2)
506 vrna_message_error("uneqal seqence lengths");
507
508 S1[s] = encode_seq(s1[s]);
509 S2[s] = encode_seq(s2[s]);
510 }
511 type = (int *)vrna_alloc(n_seq * sizeof(int));
512 /**
513 *** array initialization
514 **/
515 for (j = n2; j >= 0; j--) {
516 lbx[0][j] = lbx[1][j] = lbx[2][j] = lbx[3][j] = lbx[4][j] = INF;
517 lin[0][j] = lin[1][j] = lin[2][j] = lin[3][j] = lin[4][j] = INF;
518 lc[0][j] = lc[1][j] = lc[2][j] = lc[3][j] = lc[4][j] = INF;
519 lby[0][j] = lby[1][j] = lby[2][j] = lby[3][j] = lby[4][j] = INF;
520 liny[0][j] = liny[1][j] = liny[2][j] = liny[3][j] = liny[4][j] = INF;
521 linx[0][j] = linx[1][j] = linx[2][j] = linx[3][j] = linx[4][j] = INF;
522 }
523 i = 10;
524 i_length = n1 - 9;
525 while (i < i_length) {
526 int idx = i % 5;
527 int idx_1 = (i - 1) % 5;
528 int idx_2 = (i - 2) % 5;
529 int idx_3 = (i - 3) % 5;
530 int idx_4 = (i - 4) % 5;
531 j = n2 - 9;
532 while (9 < --j) {
533 int psc;
534 for (s = 0; s < n_seq; s++)
535 type[s] = pair[S1[s][i]][S2[s][j]];
536 psc = covscore(type, n_seq);
537 for (s = 0; s < n_seq; s++)
538 if (type[s] == 0)
539 type[s] = 7;
540
541 lc[idx][j] = (psc >= MINPSCORE) ? (n_seq * P->DuplexInit + 2 * n_seq * extension_cost) : INF;
542 /**
543 *** Update matrix. It is the average over all sequence of a given structure element
544 *** c_stack -> stacking of c
545 *** c_10, c01 -> stack from bulge
546 *** c_nm -> arrives in stack from nxm loop
547 *** c_in -> arrives in stack from interior loop
548 *** c_bx -> arrives in stack from large bulge on target
549 *** c_by -> arrives in stack from large bulge on query
550 ***
551 **/
552 int c_stack, c_10, c_01, c_11, c_22, c_21, c_12, c_23, c_32, c_in, c_in2x, c_in2y, c_bx, c_by,
553 c_inx, c_iny; /* matrix c */
554 int in, in_x, in_y, in_xy; /* in begin, in_x assymetric, in_y assymetric, in_xy symetric; */
555 int inx, inx_x;
556 int iny, iny_y;
557 int bx, bx_x;
558 int by, by_y;
559 in = lc[idx_1][j + 1];
560 in_x = lin[idx_1][j];
561 in_y = lin[idx][j + 1];
562 in_xy = lin[idx_1][j + 1];
563 inx = lc[idx_1][j + 1];
564 inx_x = linx[idx_1][j];
565 iny = lc[idx_1][j + 1];
566 iny_y = liny[idx][j + 1];
567 bx = lc[idx_1][j];
568 bx_x = lbx[idx_1][j];
569 by = lc[idx][j + 1];
570 by_y = lby[idx][j + 1];
571 c_stack = lc[idx_1][j + 1];
572 c_01 = lc[idx_1][j + 2];
573 c_10 = lc[idx_2][j + 1];
574 c_12 = lc[idx_2][j + 3];
575 c_21 = lc[idx_3][j + 2];
576 c_11 = lc[idx_2][j + 2];
577 c_22 = lc[idx_3][j + 3];
578 c_32 = lc[idx_4][j + 3];
579 c_23 = lc[idx_3][j + 4];
580 c_in = lin[idx_3][j + 3];
581 c_in2x = lin[idx_4][j + 2];
582 c_in2y = lin[idx_2][j + 4];
583 c_inx = linx[idx_3][j + 1];
584 c_iny = liny[idx_1][j + 3];
585 c_bx = lbx[idx_2][j + 1];
586 c_by = lby[idx_1][j + 2];
587 for (s = 0; s < n_seq; s++) {
588 type2 = pair[S2[s][j + 1]][S1[s][i - 1]];
589 in += P->mismatchI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s;
590 in_x += iext_ass;
591 in_y += iext_ass;
592 in_xy += iext_s;
593 inx += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s;
594 inx_x += iext_ass;
595 iny += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s;
596 iny_y += iext_ass;
597 type2 = pair[S2[s][j]][S1[s][i - 1]];
598 bx += bopen + bext + (type2 > 2 ? P->TerminalAU : 0);
599 bx_x += bext;
600 type2 = pair[S2[s][j + 1]][S1[s][i]];
601 by += bopen + bext + (type2 > 2 ? P->TerminalAU : 0);
602 by_y += bext;
603 }
604 lin [idx][j] = MIN2(in, MIN2(in_x, MIN2(in_y, in_xy)));
605 linx[idx][j] = MIN2(inx_x, inx);
606 liny[idx][j] = MIN2(iny_y, iny);
607 lby[idx][j] = MIN2(by, by_y);
608 lbx[idx][j] = MIN2(bx, bx_x);
609
610 if (psc < MINPSCORE)
611 continue;
612
613 for (s = 0; s < n_seq; s++)
614 lc[idx][j] += vrna_E_ext_stem(type[s], S1[s][i - 1], S2[s][j + 1], P) + 2 * extension_cost;
615 for (s = 0; s < n_seq; s++) {
616 type2 = pair[S1[s][i - 1]][S2[s][j + 1]];
617 if (type2 == 0)
618 type2 = 7;
619
620 c_stack +=
621 E_IntLoop(0, 0, type2, rtype[type[s]], S1[s][i], S2[s][j], S1[s][i - 1], S2[s][j + 1],
622 P) + 2 * extension_cost;
623 type2 = pair[S1[s][i - 1]][S2[s][j + 2]];
624 if (type2 == 0)
625 type2 = 7;
626
627 c_01 +=
628 E_IntLoop(0, 1, type2, rtype[type[s]], S1[s][i], S2[s][j + 1], S1[s][i - 1], S2[s][j + 1],
629 P) + 3 * extension_cost;
630 type2 = pair[S1[s][i - 2]][S2[s][j + 1]];
631 if (type2 == 0)
632 type2 = 7;
633
634 c_10 +=
635 E_IntLoop(1, 0, type2, rtype[type[s]], S1[s][i - 1], S2[s][j], S1[s][i - 1], S2[s][j + 1],
636 P) + 3 * extension_cost;
637 type2 = pair[S1[s][i - 2]][S2[s][j + 2]];
638 if (type2 == 0)
639 type2 = 7;
640
641 c_11 += E_IntLoop(1,
642 1,
643 type2,
644 rtype[type[s]],
645 S1[s][i - 1],
646 S2[s][j + 1],
647 S1[s][i - 1],
648 S2[s][j + 1],
649 P) + 4 * extension_cost;
650 type2 = pair[S1[s][i - 3]][S2[s][j + 3]];
651 if (type2 == 0)
652 type2 = 7;
653
654 c_22 += E_IntLoop(2,
655 2,
656 type2,
657 rtype[type[s]],
658 S1[s][i - 2],
659 S2[s][j + 2],
660 S1[s][i - 1],
661 S2[s][j + 1],
662 P) + 6 * extension_cost;
663 type2 = pair[S1[s][i - 3]][S2[s][j + 2]];
664 if (type2 == 0)
665 type2 = 7;
666
667 c_21 += E_IntLoop(2,
668 1,
669 type2,
670 rtype[type[s]],
671 S1[s][i - 2],
672 S2[s][j + 1],
673 S1[s][i - 1],
674 S2[s][j + 1],
675 P) + 5 * extension_cost;
676 type2 = pair[S1[s][i - 2]][S2[s][j + 3]];
677 if (type2 == 0)
678 type2 = 7;
679
680 c_12 += E_IntLoop(1,
681 2,
682 type2,
683 rtype[type[s]],
684 S1[s][i - 1],
685 S2[s][j + 2],
686 S1[s][i - 1],
687 S2[s][j + 1],
688 P) + 5 * extension_cost;
689 type2 = pair[S1[s][i - 4]][S2[s][j + 3]];
690 if (type2 == 0)
691 type2 = 7;
692
693 c_32 += E_IntLoop(3,
694 2,
695 type2,
696 rtype[type[s]],
697 S1[s][i - 3],
698 S2[s][j + 2],
699 S1[s][i - 1],
700 S2[s][j + 1],
701 P) + 7 * extension_cost;
702 type2 = pair[S1[s][i - 3]][S2[s][j + 4]];
703 if (type2 == 0)
704 type2 = 7;
705
706 c_23 += E_IntLoop(2,
707 3,
708 type2,
709 rtype[type[s]],
710 S1[s][i - 2],
711 S2[s][j + 3],
712 S1[s][i - 1],
713 S2[s][j + 1],
714 P) + 7 * extension_cost;
715 c_in += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + 2 * extension_cost +
716 2 * iext_s;
717 c_in2x += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
718 iext_ass + 2 * extension_cost;
719 c_in2y += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
720 iext_ass + 2 * extension_cost;
721 c_inx += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
722 iext_ass + 2 * extension_cost;
723 c_iny += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
724 iext_ass + 2 * extension_cost;
725 int bAU;
726 bAU = (type[s] > 2 ? P->TerminalAU : 0);
727 c_bx += 2 * extension_cost + bext + bAU;
728 c_by += 2 * extension_cost + bext + bAU;
729 }
730 lc[idx][j] = MIN2(lc[idx][j],
731 MIN2(c_stack,
732 MIN2(c_10,
733 MIN2(c_01,
734 MIN2(c_11,
735 MIN2(c_21,
736 MIN2(c_12,
737 MIN2(c_22,
738 MIN2(c_23,
739 MIN2(c_32,
740 MIN2(c_bx,
741 MIN2(c_by,
742 MIN2(c_in,
743 MIN2(c_in2x,
744 MIN2(c_in2y,
745 MIN2(
746 c_inx,
747 c_iny)
748 )
749 )
750 )
751 )
752 )
753 )
754 )
755 )
756 )
757 )
758 )
759 )
760 )
761 )
762 );
763 lc[idx][j] -= psc;
764 temp = lc[idx][j];
765 for (s = 0; s < n_seq; s++)
766 temp += vrna_E_ext_stem(rtype[type[s]], S2[s][j - 1], S1[s][i + 1], P) + 2 * extension_cost;
767 if (min_colonne > temp) {
768 min_colonne = temp;
769 min_j_colonne = j;
770 }
771 }
772 if (max >= min_colonne) {
773 max = min_colonne;
774 max_pos = i;
775 max_pos_j = min_j_colonne;
776 }
777
778 position[i + delta] = min_colonne;
779 min_colonne = INF;
780 position_j[i + delta] = min_j_colonne;
781 i++;
782 }
783 /* printf("MAX:%d ",max); */
784 for (s = 0; s < n_seq; s++) {
785 free(S1[s]);
786 free(S2[s]);
787 }
788 free(S1);
789 free(S2);
790 if (max < threshold) {
791 alifind_max(position,
792 position_j,
793 delta,
794 threshold,
795 alignment_length,
796 s1,
797 s2,
798 extension_cost,
799 fast);
800 }
801
802 aliplot_max(max, max_pos, max_pos_j, alignment_length, s1, s2, extension_cost, fast);
803 for (i = 0; i <= 4; i++) {
804 free(lc[i]);
805 free(lin[i]);
806 free(lbx[i]);
807 free(lby[i]);
808 free(linx[i]);
809 free(liny[i]);
810 }
811 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]);free(linx[0]);free(liny[0]); */
812 free(lc);
813 free(lin);
814 free(lbx);
815 free(lby);
816 free(linx);
817 free(liny);
818 free(position);
819 free(position_j);
820 free(type);
821 return NULL;
822 }
823
824
825 PRIVATE void
alifind_max(const int * position,const int * position_j,const int delta,const int threshold,const int alignment_length,const char * s1[],const char * s2[],const int extension_cost,const int fast)826 alifind_max(const int *position,
827 const int *position_j,
828 const int delta,
829 const int threshold,
830 const int alignment_length,
831 const char *s1[],
832 const char *s2[],
833 const int extension_cost,
834 const int fast)
835 {
836 int n_seq = 0;
837
838 for (n_seq = 0; s1[n_seq] != NULL; n_seq++);
839 int pos = n1 - 9;
840 if (fast == 1) {
841 while (10 < pos--) {
842 int temp_min = 0;
843 if (position[pos + delta] < (threshold)) {
844 int search_range;
845 search_range = delta + 1;
846 while (--search_range)
847 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
848 temp_min = search_range;
849
850 pos -= temp_min;
851 int max_pos_j;
852 max_pos_j = position_j[pos + delta];
853 int max;
854 max = position[pos + delta];
855 printf("target upper bound %d: query lower bound %d (%5.2f) \n",
856 pos - 10,
857 max_pos_j - 10,
858 ((double)max) / (n_seq * 100));
859 pos = MAX2(10, pos + temp_min - delta);
860 }
861 }
862 } else {
863 pos = n1 - 9;
864 while (pos-- > 10) {
865 /* printf("delta %d position:%d value:%d\n", delta, pos, position[pos]); */
866 int temp_min = 0;
867 if (position[pos + delta] < (threshold)) {
868 int search_range;
869 search_range = delta + 1;
870 while (--search_range)
871 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
872 temp_min = search_range;
873
874 pos -= temp_min;
875 int max_pos_j;
876 max_pos_j = position_j[pos + delta];
877 /* printf("%d %d %d\n", pos, max_pos_j,position[pos+delta]); */
878 int begin_t = MAX2(11, pos - alignment_length + 1);
879 int end_t = MIN2(n1 - 10, pos + 1);
880 int begin_q = MAX2(11, max_pos_j - 1);
881 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
882 char **s3, **s4;
883 s3 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
884 s4 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
885 int i;
886 for (i = 0; i < n_seq; i++) {
887 s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
888 s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
889 strncpy(s3[i], (s1[i] + begin_t - 1), end_t - begin_t + 1);
890 strncpy(s4[i], (s2[i] + begin_q - 1), end_q - begin_q + 1);
891 s3[i][end_t - begin_t + 1] = '\0';
892 s4[i][end_q - begin_q + 1] = '\0';
893 }
894 duplexT test;
895 test = aliduplexfold((const char **)s3, (const char **)s4, extension_cost);
896 /* printf("test %d threshold %d",test.energy*100,(threshold/n_seq)); */
897 if (test.energy * 100 < (int)(threshold / n_seq)) {
898 int l1 = strchr(test.structure, '&') - test.structure;
899 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
900 begin_t - 10 + test.i - l1,
901 begin_t - 10 + test.i - 1,
902 begin_q - 10 + test.j - 1,
903 begin_q - 11 + test.j + (int)strlen(test.structure) - l1 - 2, test.energy);
904 pos = MAX2(10, pos + temp_min - delta);
905 }
906
907 for (i = 0; i < n_seq; i++) {
908 free(s3[i]);
909 free(s4[i]);
910 }
911 free(s3);
912 free(s4);
913 free(test.structure);
914 }
915 }
916 }
917 }
918
919
920 PRIVATE void
aliplot_max(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1[],const char * s2[],const int extension_cost,const int fast)921 aliplot_max(const int max,
922 const int max_pos,
923 const int max_pos_j,
924 const int alignment_length,
925 const char *s1[],
926 const char *s2[],
927 const int extension_cost,
928 const int fast)
929 {
930 int n_seq;
931
932 for (n_seq = 0; !(s1[n_seq] == NULL); n_seq++);
933 n1 = strlen(s1[0]); /* get length of alignment */
934 n2 = strlen(s2[0]); /* get length of alignment */
935 if (fast == 1) {
936 printf("target upper bound %d: query lower bound %d (%5.2f)\n",
937 max_pos - 10, max_pos_j - 10, (double)((double)max) / (100 * n_seq));
938 } else {
939 int begin_t = MAX2(11, max_pos - alignment_length + 1);
940 int end_t = MIN2(n1 - 10, max_pos + 1);
941 int begin_q = MAX2(11, max_pos_j - 1);
942 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
943 char **s3, **s4;
944 s3 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
945 s4 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
946 int i;
947 for (i = 0; i < n_seq; i++) {
948 s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
949 s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
950 strncpy(s3[i], (s1[i] + begin_t - 1), end_t - begin_t + 1);
951 strncpy(s4[i], (s2[i] + begin_q - 1), end_q - begin_q + 1);
952 s3[i][end_t - begin_t + 1] = '\0';
953 s4[i][end_q - begin_q + 1] = '\0';
954 }
955 duplexT test;
956 s3[n_seq] = s4[n_seq] = NULL;
957 test = aliduplexfold((const char **)s3, (const char **)s4, extension_cost);
958 int l1 = strchr(test.structure, '&') - test.structure;
959 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n",
960 test.structure,
961 begin_t - 10 + test.i - l1,
962 begin_t - 10 + test.i - 1,
963 begin_q - 10 + test.j - 1,
964 begin_q - 11 + test.j + (int)strlen(test.structure) - l1 - 2,
965 test.energy);
966 for (i = 0; i < n_seq; i++) {
967 free(s3[i]);
968 free(s4[i]);
969 }
970 free(s3);
971 free(s4);
972 free(test.structure);
973 }
974 }
975
976
977 PRIVATE duplexT
aliduplexfold_XS(const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int i_pos,const int j_pos,const int threshold,const int i_flag,const int j_flag)978 aliduplexfold_XS(const char *s1[],
979 const char *s2[],
980 const int **access_s1,
981 const int **access_s2,
982 const int i_pos,
983 const int j_pos,
984 const int threshold,
985 const int i_flag,
986 const int j_flag)
987 {
988 int i, j, s, p, q, Emin = INF, l_min = 0, k_min = 0;
989 char *struc;
990 short **S1, **S2;
991 int *type, *type2;
992
993 struc = NULL;
994 duplexT mfe;
995 vrna_md_t md;
996 int n_seq;
997 n3 = (int)strlen(s1[0]);
998 n4 = (int)strlen(s2[0]);
999 for (s = 0; s1[s] != NULL; s++);
1000 n_seq = s;
1001 for (s = 0; s2[s] != NULL; s++);
1002 /* printf("%d \n",i_pos); */
1003
1004 set_model_details(&md);
1005 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
1006 update_fold_params();
1007 if (P)
1008 free(P);
1009
1010 P = vrna_params(&md);
1011 make_pair_matrix();
1012 }
1013
1014 c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
1015 for (i = 0; i <= n3; i++)
1016 c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
1017 for (i = 0; i <= n3; i++)
1018 for (j = 0; j <= n4; j++)
1019 c[i][j] = INF;
1020 S1 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1021 S2 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1022 for (s = 0; s < n_seq; s++) {
1023 if (strlen(s1[s]) != n3)
1024 vrna_message_error("uneqal seqence lengths");
1025
1026 if (strlen(s2[s]) != n4)
1027 vrna_message_error("uneqal seqence lengths");
1028
1029 S1[s] = encode_seq(s1[s]);
1030 S2[s] = encode_seq(s2[s]);
1031 }
1032 type = (int *)vrna_alloc(n_seq * sizeof(int));
1033 type2 = (int *)vrna_alloc(n_seq * sizeof(int));
1034 int type3, E, k, l;
1035 i = n3 - i_flag;
1036 j = 1 + j_flag;
1037 for (s = 0; s < n_seq; s++)
1038 type[s] = pair[S1[s][i]][S2[s][j]];
1039 c[i][j] = n_seq * P->DuplexInit - covscore(type, n_seq);
1040 for (s = 0; s < n_seq; s++)
1041 if (type[s] == 0)
1042 type[s] = 7;
1043
1044 for (s = 0; s < n_seq; s++)
1045 c[i][j] += vrna_E_ext_stem(rtype[type[s]],
1046 (j_flag ? S2[s][j - 1] : -1),
1047 (i_flag ? S1[s][i + 1] : -1),
1048 P);
1049 k_min = i;
1050 l_min = j;
1051 Emin = c[i][j];
1052 for (k = i; k > 1; k--) {
1053 if (k < i)
1054 c[k + 1][0] = INF;
1055
1056 for (l = j; l <= n4 - 1; l++) {
1057 if (!(k == i && l == j))
1058 c[k][l] = INF;
1059
1060 int psc2;
1061 for (s = 0; s < n_seq; s++)
1062 type2[s] = pair[S1[s][k]][S2[s][l]];
1063 psc2 = covscore(type2, n_seq);
1064 if (psc2 < MINPSCORE)
1065 continue;
1066
1067 for (s = 0; s < n_seq; s++)
1068 if (type2[s] == 0)
1069 type2[s] = 7;
1070
1071 for (p = k + 1; p <= n3 - i_flag && p < k + MAXLOOP - 1; p++) {
1072 for (q = l - 1; q >= 1 + j_flag; q--) {
1073 if (p - k + l - q - 2 > MAXLOOP)
1074 break;
1075
1076 for (E = s = 0; s < n_seq; s++) {
1077 type3 = pair[S1[s][p]][S2[s][q]];
1078 if (type3 == 0)
1079 type3 = 7;
1080
1081 E += E_IntLoop(p - k - 1, l - q - 1, type2[s], rtype[type3],
1082 S1[s][k + 1], S2[s][l - 1], S1[s][p - 1], S2[s][q + 1], P);
1083 }
1084 c[k][l] = MIN2(c[k][l], c[p][q] + E);
1085 }
1086 }
1087 c[k][l] -= psc2;
1088 E = c[k][l];
1089 E += n_seq * (access_s1[i - k + 1][i_pos] + access_s2[l - 1][j_pos + (l - 1) - 1]);
1090 for (s = 0; s < n_seq; s++)
1091 E +=
1092 vrna_E_ext_stem(type2[s], (k > 1) ? S1[s][k - 1] : -1, (l < n4) ? S2[s][l + 1] : -1, P);
1093 if (E < Emin) {
1094 Emin = E;
1095 k_min = k;
1096 l_min = l;
1097 }
1098 }
1099 }
1100 if (Emin > threshold - 1) {
1101 mfe.structure = NULL;
1102 mfe.energy = INF;
1103 for (i = 0; i <= n3; i++)
1104 free(c[i]);
1105 free(c);
1106 for (i = 0; i <= n_seq; i++) {
1107 free(S1[i]);
1108 free(S2[i]);
1109 }
1110 free(S1);
1111 free(S2); /* free(SS1); free(SS2); */
1112 free(type);
1113 free(type2);
1114 return mfe;
1115 } else {
1116 struc = alibacktrack_XS(n3,
1117 n4,
1118 k_min,
1119 l_min,
1120 (const short int **)S1,
1121 (const short int **)S2,
1122 access_s1,
1123 access_s2,
1124 i_flag,
1125 j_flag);
1126 }
1127
1128 int dx_5, dx_3, dy_5, dy_3, dGx, dGy;
1129 dx_5 = 0;
1130 dx_3 = 0;
1131 dy_5 = 0;
1132 dy_3 = 0;
1133 dGx = 0;
1134 dGy = 0;
1135 dGx = n_seq * (access_s1[i - k_min + 1][i_pos]);
1136 dx_3 = 0;
1137 dx_5 = 0;
1138 dGy = n_seq * access_s2[l_min - j + 1][j_pos + (l_min - 1) - 1];
1139 mfe.tb = i_pos - 9 - i + k_min - 1 - dx_5;
1140 mfe.te = i_pos - 9 - 1 + dx_3;
1141 mfe.qb = j_pos - 9 - 1 - dy_5;
1142 mfe.qe = j_pos + l_min - 3 - 9 + dy_3;
1143 mfe.ddG = (double)(Emin * 0.01);
1144 mfe.dG1 = (double)(dGx * (0.01));
1145 mfe.dG2 = (double)(dGy * (0.01));
1146 mfe.energy = mfe.ddG - mfe.dG1 - mfe.dG2;
1147 mfe.structure = struc;
1148 for (i = 0; i <= n3; i++)
1149 free(c[i]);
1150 free(c);
1151 for (i = 0; i <= n_seq; i++) {
1152 free(S1[i]);
1153 free(S2[i]);
1154 }
1155 free(S1);
1156 free(S2);
1157 free(type);
1158 free(type2);
1159 return mfe;
1160 }
1161
1162
1163 PRIVATE char *
alibacktrack_XS(int n3,int n4,int i,int j,const short * S1[],const short * S2[],const int ** access_s1,const int ** access_s2,const int i_flag,const int j_flag)1164 alibacktrack_XS(int n3,
1165 int n4,
1166 int i,
1167 int j,
1168 const short *S1[],
1169 const short *S2[],
1170 const int **access_s1,
1171 const int **access_s2,
1172 const int i_flag,
1173 const int j_flag)
1174 {
1175 int k, l, *type, type2, E, traced, i0, j0, s, n_seq, psc;
1176 char *st1 = NULL, *st2 = NULL, *struc = NULL;
1177
1178 for (s = 0; S1[s] != NULL; s++);
1179 n_seq = s;
1180 for (s = 0; S2[s] != NULL; s++);
1181 if (n_seq != s)
1182 vrna_message_error("unequal number of sequences in alibacktrack()\n");
1183
1184 st1 = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
1185 st2 = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
1186 type = (int *)vrna_alloc(n_seq * sizeof(int));
1187
1188 i0 = i; /*MAX2(i-1,1);*/ j0 = j;/*MIN2(j+1,n4);*/
1189 while (i <= n3 - i_flag && j >= 1 + j_flag) {
1190 E = c[i][j];
1191 traced = 0;
1192 st1[i - 1] = '(';
1193 st2[j - 1] = ')';
1194 for (s = 0; s < n_seq; s++)
1195 type[s] = pair[S1[s][i]][S2[s][j]];
1196 psc = covscore(type, n_seq);
1197 for (s = 0; s < n_seq; s++)
1198 if (type[s] == 0)
1199 type[s] = 7;
1200
1201 E += psc;
1202 for (k = i + 1; k <= n3 && k > i - MAXLOOP - 2; k++) {
1203 for (l = j - 1; l >= 1; l--) {
1204 int LE;
1205 if (i - k + l - j - 2 > MAXLOOP)
1206 break;
1207
1208 for (s = LE = 0; s < n_seq; s++) {
1209 type2 = pair[S1[s][k]][S2[s][l]];
1210 if (type2 == 0)
1211 type2 = 7;
1212
1213 LE += E_IntLoop(k - i - 1, j - l - 1, type[s], rtype[type2],
1214 S1[s][i + 1], S2[s][j - 1], S1[s][k - 1], S2[s][l + 1], P);
1215 }
1216 if (E == c[k][l] + LE) {
1217 traced = 1;
1218 i = k;
1219 j = l;
1220 break;
1221 }
1222 }
1223 if (traced)
1224 break;
1225 }
1226 if (!traced) {
1227 for (s = 0; s < n_seq; s++)
1228 if (type[s] > 2)
1229 E -= P->TerminalAU;
1230
1231 break;
1232 if (E != n_seq * P->DuplexInit)
1233 vrna_message_error("backtrack failed in fold duplex bal");
1234 else
1235 break;
1236 }
1237 }
1238 struc = (char *)vrna_alloc(i - i0 + 1 + j0 - j + 1 + 2);
1239 for (k = MAX2(i0, 1); k <= i; k++)
1240 if (!st1[k - 1])
1241 st1[k - 1] = '.';
1242
1243 for (k = j; k <= j0; k++)
1244 if (!st2[k - 1])
1245 st2[k - 1] = '.';
1246
1247 strcpy(struc, st1 + MAX2(i0 - 1, 0));
1248 strcat(struc, "&");
1249 strcat(struc, st2 + j - 1);
1250 free(st1);
1251 free(st2);
1252 free(type);
1253 return struc;
1254 }
1255
1256
1257 duplexT **
aliLduplexfold_XS(const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int threshold,const int alignment_length,const int delta,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)1258 aliLduplexfold_XS(const char *s1[],
1259 const char *s2[],
1260 const int **access_s1,
1261 const int **access_s2,
1262 const int threshold,
1263 const int alignment_length,
1264 const int delta,
1265 const int fast,
1266 const int il_a,
1267 const int il_b,
1268 const int b_a,
1269 const int b_b)
1270 {
1271 short **S1, **S2;
1272 int *type, type2;
1273 int i, j, s, n_seq;
1274
1275 s = 0;
1276 int bopen = b_b;
1277 int bext = b_a;
1278 int iopen = il_b;
1279 int iext_s = 2 * (il_a); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
1280 int iext_ass = 50 + il_a; /* iext_ass assymetric extension of interior loop, either on i or on j side. */
1281 int min_colonne = INF; /* enthaelt das maximum einer kolonne */
1282 int i_length;
1283 int max_pos; /* get position of the best hit */
1284 int max_pos_j;
1285 int temp;
1286 int min_j_colonne;
1287 int max = INF;
1288 /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
1289 int *position; /* contains the position of the hits with energy > E */
1290 int *position_j;
1291 int maxPenalty[4];
1292
1293 n1 = (int)strlen(s1[0]);
1294 n2 = (int)strlen(s2[0]);
1295 for (s = 0; s1[s]; s++);
1296 n_seq = s;
1297 for (s = 0; s2[s]; s++);
1298 if (n_seq != s)
1299 vrna_message_error("unequal number of sequences in aliduplexfold()\n");
1300
1301 position = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
1302 position_j = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
1303
1304 /**
1305 *** extension penalty, computed only once, further reduce the computation time
1306 **/
1307 if ((!P) || (fabs(P->temperature - temperature) > 1e-6))
1308 update_dfold_params();
1309
1310 maxPenalty[0] = (int)-1 * P->stack[2][2] / 2;
1311 maxPenalty[1] = (int)-1 * P->stack[2][2];
1312 maxPenalty[2] = (int)-3 * P->stack[2][2] / 2;
1313 maxPenalty[3] = (int)-2 * P->stack[2][2];
1314
1315
1316 lc = (int **)vrna_alloc(sizeof(int *) * 5);
1317 lin = (int **)vrna_alloc(sizeof(int *) * 5);
1318 lbx = (int **)vrna_alloc(sizeof(int *) * 5);
1319 lby = (int **)vrna_alloc(sizeof(int *) * 5);
1320 linx = (int **)vrna_alloc(sizeof(int *) * 5);
1321 liny = (int **)vrna_alloc(sizeof(int *) * 5);
1322
1323 for (i = 0; i <= 4; i++) {
1324 lc[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1325 lin[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1326 lbx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1327 lby[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1328 linx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1329 liny[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1330 }
1331
1332
1333 S1 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1334 S2 = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1335 for (s = 0; s < n_seq; s++) {
1336 if (strlen(s1[s]) != n1)
1337 vrna_message_error("uneqal seqence lengths");
1338
1339 if (strlen(s2[s]) != n2)
1340 vrna_message_error("uneqal seqence lengths");
1341
1342 S1[s] = encode_seq(s1[s]);
1343 S2[s] = encode_seq(s2[s]);
1344 }
1345 type = (int *)vrna_alloc(n_seq * sizeof(int));
1346 /**
1347 *** array initialization
1348 **/
1349
1350 for (j = n2 + 4; j >= 0; j--) {
1351 lbx[0][j] = lbx[1][j] = lbx[2][j] = lbx[3][j] = lbx[4][j] = INF;
1352 lin[0][j] = lin[1][j] = lin[2][j] = lin[3][j] = lin[4][j] = INF;
1353 lc[0][j] = lc[1][j] = lc[2][j] = lc[3][j] = lc[4][j] = INF;
1354 lby[0][j] = lby[1][j] = lby[2][j] = lby[3][j] = lby[4][j] = INF;
1355 liny[0][j] = liny[1][j] = liny[2][j] = liny[3][j] = liny[4][j] = INF;
1356 linx[0][j] = linx[1][j] = linx[2][j] = linx[3][j] = linx[4][j] = INF;
1357 }
1358 i = 10;
1359 i_length = n1 - 9;
1360 int di1, di2, di3, di4; /* contains accessibility penalty */
1361 while (i < i_length) {
1362 int idx = i % 5;
1363 int idx_1 = (i - 1) % 5;
1364 int idx_2 = (i - 2) % 5;
1365 int idx_3 = (i - 3) % 5;
1366 int idx_4 = (i - 4) % 5;
1367
1368 di1 = access_s1[5][i] - access_s1[4][i - 1];
1369 di2 = access_s1[5][i - 1] - access_s1[4][i - 2] + di1;
1370 di3 = access_s1[5][i - 2] - access_s1[4][i - 3] + di2;
1371 di4 = access_s1[5][i - 3] - access_s1[4][i - 4] + di3;
1372 di1 = MIN2(di1, maxPenalty[0]);
1373 di2 = MIN2(di2, maxPenalty[1]);
1374 di3 = MIN2(di3, maxPenalty[2]);
1375 di4 = MIN2(di3, maxPenalty[3]);
1376 j = n2 - 9;
1377 while (9 < --j) {
1378 int dj1, dj2, dj3, dj4;
1379 dj1 = access_s2[5][j + 4] - access_s2[4][j + 4];
1380 dj2 = access_s2[5][j + 5] - access_s2[4][j + 5] + dj1;
1381 dj3 = access_s2[5][j + 6] - access_s2[4][j + 6] + dj2;
1382 dj4 = access_s2[5][j + 7] - access_s2[4][j + 7] + dj3;
1383 dj1 = MIN2(dj1, maxPenalty[0]);
1384 dj2 = MIN2(dj2, maxPenalty[1]);
1385 dj3 = MIN2(dj3, maxPenalty[2]);
1386 dj4 = MIN2(dj4, maxPenalty[3]);
1387 int psc;
1388 for (s = 0; s < n_seq; s++) /* initialize type1 */
1389 type[s] = pair[S1[s][i]][S2[s][j]];
1390 psc = covscore(type, n_seq);
1391 for (s = 0; s < n_seq; s++)
1392 if (type[s] == 0)
1393 type[s] = 7;
1394
1395 lc[idx][j] = ((psc >= MINPSCORE) ? n_seq * (P->DuplexInit) : INF);
1396 int c_stack, c_10, c_01, c_11, c_22, c_21, c_12, c_23, c_32, c_in, c_in2x, c_in2y, c_bx, c_by,
1397 c_inx, c_iny; /* matrix c */
1398 int in, in_x, in_y, in_xy; /* in begin, in_x assymetric, in_y assymetric, in_xy symetric; */
1399 int inx, inx_x;
1400 int iny, iny_y;
1401 int bx, bx_x;
1402 int by, by_y;
1403 in = lc[idx_1][j + 1];
1404 in_x = lin[idx_1][j];
1405 in_y = lin[idx][j + 1];
1406 in_xy = lin[idx_1][j + 1];
1407 inx = lc[idx_1][j + 1];
1408 inx_x = linx[idx_1][j];
1409 iny = lc[idx_1][j + 1];
1410 iny_y = liny[idx][j + 1];
1411 bx = lc[idx_1][j];
1412 bx_x = lbx[idx_1][j];
1413 by = lc[idx][j + 1];
1414 by_y = lby[idx][j + 1];
1415 c_stack = lc[idx_1][j + 1];
1416 c_01 = lc[idx_1][j + 2];
1417 c_10 = lc[idx_2][j + 1];
1418 c_12 = lc[idx_2][j + 3];
1419 c_21 = lc[idx_3][j + 2];
1420 c_11 = lc[idx_2][j + 2];
1421 c_22 = lc[idx_3][j + 3];
1422 c_32 = lc[idx_4][j + 3];
1423 c_23 = lc[idx_3][j + 4];
1424 c_in = lin[idx_3][j + 3];
1425 c_in2x = lin[idx_4][j + 2];
1426 c_in2y = lin[idx_2][j + 4];
1427 c_inx = linx[idx_3][j + 1];
1428 c_iny = liny[idx_1][j + 3];
1429 c_bx = lbx[idx_2][j + 1];
1430 c_by = lby[idx_1][j + 2];
1431 for (s = 0; s < n_seq; s++) {
1432 type2 = pair[S2[s][j + 1]][S1[s][i - 1]];
1433 in += P->mismatchI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s + di1 + dj1;
1434 in_x += iext_ass + di1;
1435 in_y += iext_ass + dj1;
1436 in_xy += iext_s + di1 + dj1;
1437 inx += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s + di1 + dj1;
1438 inx_x += iext_ass + di1;
1439 iny += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s + di1 + dj1;
1440 iny_y += iext_ass + dj1;
1441 type2 = pair[S2[s][j]][S1[s][i - 1]];
1442 bx += bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + di1;
1443 bx_x += bext + di1;
1444 type2 = pair[S2[s][j + 1]][S1[s][i]];
1445 by += bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + dj1;
1446 by_y += bext + dj1;
1447 }
1448 lin[idx][j] = MIN2(in, MIN2(in_x, MIN2(in_y, in_xy)));
1449 linx[idx][j] = MIN2(inx_x, inx);
1450 liny[idx][j] = MIN2(iny_y, iny);
1451 lby[idx][j] = MIN2(by, by_y);
1452 lbx[idx][j] = MIN2(bx, bx_x);
1453 if (psc < MINPSCORE)
1454 continue;
1455
1456 for (s = 0; s < n_seq; s++)
1457 lc[idx][j] += vrna_E_ext_stem(type[s], S1[s][i - 1], S2[s][j + 1], P);
1458 for (s = 0; s < n_seq; s++) {
1459 type2 = pair[S1[s][i - 1]][S2[s][j + 1]];
1460 if (type2 == 0)
1461 type2 = 7;
1462
1463 c_stack +=
1464 E_IntLoop(0, 0, type2, rtype[type[s]], S1[s][i], S2[s][j], S1[s][i - 1], S2[s][j + 1],
1465 P) + di1 + dj1;
1466 type2 = pair[S1[s][i - 1]][S2[s][j + 2]];
1467 if (type2 == 0)
1468 type2 = 7;
1469
1470 c_01 +=
1471 E_IntLoop(0, 1, type2, rtype[type[s]], S1[s][i], S2[s][j + 1], S1[s][i - 1], S2[s][j + 1],
1472 P) + di1 + dj2;
1473 type2 = pair[S1[s][i - 2]][S2[s][j + 1]];
1474 if (type2 == 0)
1475 type2 = 7;
1476
1477 c_10 +=
1478 E_IntLoop(1, 0, type2, rtype[type[s]], S1[s][i - 1], S2[s][j], S1[s][i - 1], S2[s][j + 1],
1479 P) + di2 + dj1;
1480 type2 = pair[S1[s][i - 2]][S2[s][j + 2]];
1481 if (type2 == 0)
1482 type2 = 7;
1483
1484 c_11 += E_IntLoop(1,
1485 1,
1486 type2,
1487 rtype[type[s]],
1488 S1[s][i - 1],
1489 S2[s][j + 1],
1490 S1[s][i - 1],
1491 S2[s][j + 1],
1492 P) + di2 + dj2;
1493 type2 = pair[S1[s][i - 3]][S2[s][j + 3]];
1494 if (type2 == 0)
1495 type2 = 7;
1496
1497 c_22 += E_IntLoop(2,
1498 2,
1499 type2,
1500 rtype[type[s]],
1501 S1[s][i - 2],
1502 S2[s][j + 2],
1503 S1[s][i - 1],
1504 S2[s][j + 1],
1505 P) + di3 + dj3;
1506 type2 = pair[S1[s][i - 3]][S2[s][j + 2]];
1507 if (type2 == 0)
1508 type2 = 7;
1509
1510 c_21 += E_IntLoop(2,
1511 1,
1512 type2,
1513 rtype[type[s]],
1514 S1[s][i - 2],
1515 S2[s][j + 1],
1516 S1[s][i - 1],
1517 S2[s][j + 1],
1518 P) + di3 + dj2;
1519 type2 = pair[S1[s][i - 2]][S2[s][j + 3]];
1520 if (type2 == 0)
1521 type2 = 7;
1522
1523 c_12 += E_IntLoop(1,
1524 2,
1525 type2,
1526 rtype[type[s]],
1527 S1[s][i - 1],
1528 S2[s][j + 2],
1529 S1[s][i - 1],
1530 S2[s][j + 1],
1531 P) + di2 + dj3;
1532 type2 = pair[S1[s][i - 4]][S2[s][j + 3]];
1533 if (type2 == 0)
1534 type2 = 7;
1535
1536 c_32 += E_IntLoop(3,
1537 2,
1538 type2,
1539 rtype[type[s]],
1540 S1[s][i - 3],
1541 S2[s][j + 2],
1542 S1[s][i - 1],
1543 S2[s][j + 1],
1544 P) + di4 + dj3;
1545 type2 = pair[S1[s][i - 3]][S2[s][j + 4]];
1546 if (type2 == 0)
1547 type2 = 7;
1548
1549 c_23 += E_IntLoop(2,
1550 3,
1551 type2,
1552 rtype[type[s]],
1553 S1[s][i - 2],
1554 S2[s][j + 3],
1555 S1[s][i - 1],
1556 S2[s][j + 1],
1557 P) + dj4 + di3;
1558 c_in += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + di3 + dj3 + 2 *
1559 iext_s;
1560 c_in2x += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
1561 iext_ass + di4 + dj2;
1562 c_in2y += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
1563 iext_ass + di2 + dj4;
1564 c_inx += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
1565 iext_ass + di3 + dj1;
1566 c_iny += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
1567 iext_ass + di1 + dj3;
1568 int bAU;
1569 bAU = (type[s] > 2 ? P->TerminalAU : 0);
1570 c_bx += di2 + dj1 + bext + bAU;
1571 c_by += di1 + dj2 + bext + bAU;
1572 }
1573 lc[idx][j] = MIN2(lc[idx][j],
1574 MIN2(c_stack,
1575 MIN2(c_10,
1576 MIN2(c_01,
1577 MIN2(c_11,
1578 MIN2(c_21,
1579 MIN2(c_12,
1580 MIN2(c_22,
1581 MIN2(c_23,
1582 MIN2(c_32,
1583 MIN2(c_bx,
1584 MIN2(c_by,
1585 MIN2(c_in,
1586 MIN2(c_in2x,
1587 MIN2(c_in2y,
1588 MIN2(
1589 c_inx,
1590 c_iny)
1591 )
1592 )
1593 )
1594 )
1595 )
1596 )
1597 )
1598 )
1599 )
1600 )
1601 )
1602 )
1603 )
1604 )
1605 );
1606 lc[idx][j] -= psc;
1607 temp = lc[idx][j];
1608
1609 for (s = 0; s < n_seq; s++)
1610 temp += vrna_E_ext_stem(rtype[type[s]], S2[s][j - 1], S1[s][i + 1], P);
1611 if (min_colonne > temp) {
1612 min_colonne = temp;
1613 min_j_colonne = j;
1614 }
1615 }
1616 if (max >= min_colonne) {
1617 max = min_colonne;
1618 max_pos = i;
1619 max_pos_j = min_j_colonne;
1620 }
1621
1622 position[i + delta] = min_colonne;
1623 min_colonne = INF;
1624 position_j[i + delta] = min_j_colonne;
1625 i++;
1626 }
1627 /* printf("MAX: %d",max); */
1628 for (s = 0; s < n_seq; s++) {
1629 free(S1[s]);
1630 free(S2[s]);
1631 }
1632 free(S1);
1633 free(S2);
1634 if (max < threshold) {
1635 alifind_max_XS(position,
1636 position_j,
1637 delta,
1638 threshold,
1639 alignment_length,
1640 s1,
1641 s2,
1642 access_s1,
1643 access_s2,
1644 fast);
1645 }
1646
1647 aliplot_max_XS(max, max_pos, max_pos_j, alignment_length, s1, s2, access_s1, access_s2, fast);
1648 for (i = 0; i <= 4; i++) {
1649 free(lc[i]);
1650 free(lin[i]);
1651 free(lbx[i]);
1652 free(lby[i]);
1653 free(linx[i]);
1654 free(liny[i]);
1655 }
1656 /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]);free(linx[0]);free(liny[0]); */
1657 free(lc);
1658 free(lin);
1659 free(lbx);
1660 free(lby);
1661 free(linx);
1662 free(liny);
1663 free(position);
1664 free(position_j);
1665 free(type);
1666 return NULL;
1667 }
1668
1669
1670 PRIVATE void
alifind_max_XS(const int * position,const int * position_j,const int delta,const int threshold,const int alignment_length,const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int fast)1671 alifind_max_XS(const int *position,
1672 const int *position_j,
1673 const int delta,
1674 const int threshold,
1675 const int alignment_length,
1676 const char *s1[],
1677 const char *s2[],
1678 const int **access_s1,
1679 const int **access_s2,
1680 const int fast)
1681 {
1682 int n_seq = 0;
1683
1684 for (n_seq = 0; s1[n_seq] != NULL; n_seq++);
1685 int pos = n1 - 9;
1686 if (fast == 1) {
1687 while (10 < pos--) {
1688 int temp_min = 0;
1689 if (position[pos + delta] < (threshold)) {
1690 int search_range;
1691 search_range = delta + 1;
1692 while (--search_range)
1693 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
1694 temp_min = search_range;
1695
1696 pos -= temp_min;
1697 /*
1698 * int max_pos_j;
1699 * max_pos_j=position_j[pos+delta];
1700 * int max;
1701 * max=position[pos+delta];
1702 * printf("target upper bound %d: query lower bound %d (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100);
1703 */
1704 pos = MAX2(10, pos + temp_min - delta);
1705 }
1706 }
1707 } else {
1708 pos = n1 - 9;
1709 while (pos-- > 10) {
1710 /* printf("delta %d position:%d value:%d\n", delta, pos, position[pos]); */
1711 int temp_min = 0;
1712 if (position[pos + delta] < (threshold)) {
1713 int search_range;
1714 search_range = delta + 1;
1715 while (--search_range)
1716 if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
1717 temp_min = search_range;
1718
1719 pos -= temp_min;
1720 int max_pos_j;
1721 max_pos_j = position_j[pos + delta]; /* position on j */
1722 int begin_t = MAX2(11, pos - alignment_length);
1723 int end_t = MIN2(n1 - 10, pos + 1);
1724 int begin_q = MAX2(11, max_pos_j - 1);
1725 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
1726 int i_flag;
1727 int j_flag;
1728 i_flag = (end_t == pos + 1 ? 1 : 0);
1729 j_flag = (begin_q == max_pos_j - 1 ? 1 : 0);
1730 char **s3, **s4;
1731 s3 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1732 s4 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1733 int i;
1734 for (i = 0; i < n_seq; i++) {
1735 s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
1736 s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1737 strncpy(s3[i], (s1[i] + begin_t), end_t - begin_t + 1);
1738 strncpy(s4[i], (s2[i] + begin_q), end_q - begin_q + 1);
1739 s3[i][end_t - begin_t + 1] = '\0';
1740 s4[i][end_q - begin_q + 1] = '\0';
1741 }
1742 duplexT test;
1743 test = aliduplexfold_XS((const char **)s3,
1744 (const char **)s4,
1745 access_s1,
1746 access_s2,
1747 pos,
1748 max_pos_j,
1749 threshold,
1750 i_flag,
1751 j_flag);
1752 /* printf("position %d approximation %d test %d threshold %d\n", pos, position[pos+delta], (int)test.energy,(int)(threshold/n_seq)); */
1753 if (test.energy * 100 < (int)(threshold / n_seq)) {
1754 printf("%s %3d,%-3d: %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n",
1755 test.structure,
1756 test.tb,
1757 test.te,
1758 test.qb,
1759 test.qe,
1760 test.ddG / n_seq,
1761 test.energy / n_seq,
1762 test.dG1 / n_seq,
1763 test.dG2 / n_seq);
1764 free(test.structure);
1765 pos = MAX2(10, pos + temp_min - delta);
1766 }
1767
1768 for (i = 0; i < n_seq; i++) {
1769 free(s3[i]);
1770 free(s4[i]);
1771 }
1772 free(s3);
1773 free(s4);
1774 /* free(test.structure); */
1775 }
1776 }
1777 }
1778 }
1779
1780
1781 PRIVATE void
aliplot_max_XS(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int fast)1782 aliplot_max_XS(const int max,
1783 const int max_pos,
1784 const int max_pos_j,
1785 const int alignment_length,
1786 const char *s1[],
1787 const char *s2[],
1788 const int **access_s1,
1789 const int **access_s2,
1790 const int fast)
1791 {
1792 int n_seq;
1793
1794 for (n_seq = 0; !(s1[n_seq] == NULL); n_seq++);
1795 n1 = strlen(s1[0]); /* get length of alignment */
1796 n2 = strlen(s2[0]); /* get length of alignme */
1797
1798 if (fast) {
1799 printf("target upper bound %d: query lower bound %d (%5.2f)\n",
1800 max_pos - 10, max_pos_j - 10, (double)((double)max) / (100 * n_seq));
1801 } else {
1802 int begin_t = MAX2(11, max_pos - alignment_length); /* only get the position that binds.. */
1803 int end_t = MIN2(n1 - 10, max_pos + 1); /* ..no dangles */
1804 int begin_q = MAX2(11, max_pos_j - 1);
1805 int end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
1806 int i_flag;
1807 int j_flag;
1808 i_flag = (end_t == max_pos + 1 ? 1 : 0);
1809 j_flag = (begin_q == max_pos_j - 1 ? 1 : 0);
1810 char **s3, **s4;
1811 s3 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1812 s4 = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1813 int i;
1814 for (i = 0; i < n_seq; i++) {
1815 s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
1816 s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1817 strncpy(s3[i], (s1[i] + begin_t), end_t - begin_t + 1);
1818 strncpy(s4[i], (s2[i] + begin_q), end_q - begin_q + 1);
1819 s3[i][end_t - begin_t + 1] = '\0';
1820 s4[i][end_q - begin_q + 1] = '\0';
1821 }
1822 duplexT test;
1823 test = aliduplexfold_XS((const char **)s3,
1824 (const char **)s4,
1825 access_s1,
1826 access_s2,
1827 max_pos,
1828 max_pos_j,
1829 INF,
1830 i_flag,
1831 j_flag);
1832 printf("%s %3d,%-3d: %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n",
1833 test.structure,
1834 test.tb,
1835 test.te,
1836 test.qb,
1837 test.qe,
1838 test.ddG / n_seq,
1839 test.energy / n_seq,
1840 test.dG1 / n_seq,
1841 test.dG2 / n_seq);
1842 free(test.structure);
1843 for (i = 0; i < n_seq; i++) {
1844 free(s3[i]);
1845 free(s4[i]);
1846 }
1847 free(s3);
1848 free(s4);
1849 }
1850 }
1851
1852
1853 PRIVATE int
covscore(const int * types,int n_seq)1854 covscore(const int *types,
1855 int n_seq)
1856 {
1857 /*
1858 * calculate co-variance bonus for a pair depending on
1859 * compensatory/consistent mutations and incompatible seqs
1860 * should be 0 for conserved pairs, >0 for good pairs
1861 */
1862 #define NONE -10000 /* score for forbidden pairs */
1863 int k, l, s, score, pscore;
1864 int dm[7][7] = { { 0, 0, 0, 0, 0, 0, 0 }, /* hamming distance between pairs */
1865 { 0, 0, 2, 2, 1, 2, 2 } /* CG */,
1866 { 0, 2, 0, 1, 2, 2, 2 } /* GC */,
1867 { 0, 2, 1, 0, 2, 1, 2 } /* GU */,
1868 { 0, 1, 2, 2, 0, 2, 1 } /* UG */,
1869 { 0, 2, 2, 1, 2, 0, 2 } /* AU */,
1870 { 0, 2, 2, 2, 1, 2, 0 } /* UA */ };
1871
1872 int pfreq[8] = {
1873 0, 0, 0, 0, 0, 0, 0, 0
1874 };
1875 for (s = 0; s < n_seq; s++)
1876 pfreq[types[s]]++;
1877
1878 if (pfreq[0] * 2 > n_seq)
1879 return NONE;
1880
1881 for (k = 1, score = 0; k <= 6; k++) /* ignore pairtype 7 (gap-gap) */
1882 for (l = k + 1; l <= 6; l++)
1883 /*
1884 * scores for replacements between pairtypes
1885 * consistent or compensatory mutations score 1 or 2
1886 */
1887 score += pfreq[k] * pfreq[l] * dm[k][l];
1888
1889 /* counter examples score -1, gap-gap scores -0.25 */
1890 pscore = cv_fact *
1891 ((UNIT * score) / n_seq - nc_fact * UNIT * (pfreq[0] + pfreq[7] * 0.25));
1892 return pscore;
1893 }
1894
1895
1896 PRIVATE void
update_dfold_params(void)1897 update_dfold_params(void)
1898 {
1899 vrna_md_t md;
1900
1901 if (P)
1902 free(P);
1903
1904 set_model_details(&md);
1905 P = vrna_params(&md);
1906 make_pair_matrix();
1907 }
1908
1909
1910 PRIVATE short *
encode_seq(const char * sequence)1911 encode_seq(const char *sequence)
1912 {
1913 unsigned int i, l;
1914 short *S;
1915
1916 l = strlen(sequence);
1917 S = (short *)vrna_alloc(sizeof(short) * (l + 2));
1918 S[0] = (short)l;
1919
1920 /* make numerical encoding of sequence */
1921 for (i = 1; i <= l; i++)
1922 S[i] = (short)encode_char(toupper(sequence[i - 1]));
1923
1924 /* for circular folding add first base at position n+1 */
1925 S[l + 1] = S[1];
1926
1927 return S;
1928 }
1929