1 /*
2  *         compute the duplex structure of two RNA strands,
3  *              allowing only inter-strand base pairs.
4  *       see cofold() for computing hybrid structures without
5  *                           restriction.
6  *                           Ivo Hofacker
7  *                        Vienna RNA package
8  *
9  */
10 
11 
12 /*
13  * library containing the function used in rnaplex
14  * the program rnaplex uses the following function
15  * Lduplexfold: finds high scoring segments
16  * it stores the end-position of these segments in an array
17  * and call then for each of these positions the duplexfold function
18  * which allows one to make backtracking for each of the high scoring position
19  * It allows one to find suboptimal partially overlapping (depends on a a parameter)
20  * duplexes between a long RNA and a shorter one.
21  * Contrarly to RNAduplex, the energy model is not in E~log(N),
22  * where N is the length of an interial loop but used an affine model,
23  * where the extension and begin parameter are fitted to the energy
24  * parameter used by RNAduplex. This allows one to check for duplex between a short RNA(20nt)
25  * and a long one at the speed of 1Mnt/s. At this speed the whole genome (3Gnt) can be analyzed for one siRNA
26  * in about 50 minutes.
27  * The algorithm is based on an idea by Durbin and Eddy:when the alginment reach a value larger than a
28  * given threshold this value is stored in an array. When the alignment score goes
29  * then under this threshold, the alignemnent begin from this value, in that way the backtracking allow us
30  * to find all non-overlapping high-scoring segments.
31  * For more information check "durbin, biological sequence analysis"
32  */
33 
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <math.h>
41 #include <ctype.h>
42 #include <string.h>
43 #include "ViennaRNA/utils/basic.h"
44 #include "ViennaRNA/params/constants.h"
45 #include "ViennaRNA/params/default.h"
46 #include "ViennaRNA/fold_vars.h"
47 #include "ViennaRNA/fold.h"
48 #include "ViennaRNA/pair_mat.h"
49 #include "ViennaRNA/params/basic.h"
50 #include "ViennaRNA/loops/all.h"
51 #include "ViennaRNA/plex.h"
52 #include "ViennaRNA/ali_plex.h"
53 
54 /**
55 *** Due to the great similarity between functions,
56 *** more annotation can be found in plex.c
57 **/
58 
59 PRIVATE short *
60 encode_seq(const char *seq);
61 
62 
63 PRIVATE void
64 update_dfold_params(void);
65 
66 
67 /**
68 *** aliduplexfold(_XS)/alibacktrack(_XS) computes duplex interaction with standard energy and considers extension_cost
69 *** alifind_max(_XS)/aliplot_max(_XS) find suboptimals and MFE
70 **/
71 PRIVATE duplexT
72 aliduplexfold(const char  *s1[],
73               const char  *s2[],
74               const int   extension_cost);
75 
76 
77 PRIVATE char *
78 alibacktrack(int          n3,
79              int          n4,
80              int          i,
81              int          j,
82              const short  *s1[],
83              const short  *s2[],
84              const int    extension_cost);
85 
86 
87 PRIVATE void
88 alifind_max(const int   *position,
89             const int   *position_j,
90             const int   delta,
91             const int   threshold,
92             const int   alignment_length,
93             const char  *s1[],
94             const char  *s2[],
95             const int   extension_cost,
96             const int   fast);
97 
98 
99 PRIVATE void
100 aliplot_max(const int   max,
101             const int   max_pos,
102             const int   max_pos_j,
103             const int   alignment_length,
104             const char  *s1[],
105             const char  *s2[],
106             const int   extension_cost,
107             const int   fast);
108 
109 
110 PRIVATE duplexT
111 aliduplexfold_XS(const char *s1[],
112                  const char *s2[],
113                  const int  **access_s1,
114                  const int  **access_s2,
115                  const int  i_pos,
116                  const int  j_pos,
117                  const int  threshold,
118                  const int  i_flag,
119                  const int  j_flag);
120 
121 
122 PRIVATE char *
123 alibacktrack_XS(int         n3,
124                 int         n4,
125                 int         i,
126                 int         j,
127                 const short *s1[],
128                 const short *s2[],
129                 const int   **access_s1,
130                 const int   **access_s2,
131                 const int   i_flag,
132                 const int   j_flag);
133 
134 
135 PRIVATE void
136 alifind_max_XS(const int  *position,
137                const int  *position_j,
138                const int  delta,
139                const int  threshold,
140                const int  alignment_length,
141                const char *s1[],
142                const char *s2[],
143                const int  **access_s1,
144                const int  **access_s2,
145                const int  fast);
146 
147 
148 PRIVATE void
149 aliplot_max_XS(const int  max,
150                const int  max_pos,
151                const int  max_pos_j,
152                const int  alignment_length,
153                const char *s1[],
154                const char *s2[],
155                const int  **access_s1,
156                const int  **access_s2,
157                const int  fast);
158 
159 
160 /**
161 *** computes covariance score
162 **/
163 
164 PRIVATE int
165 covscore(const int  *types,
166          int        n_seq);
167 
168 
169 extern double cv_fact; /* from alifold.c, default 1 */
170 extern double nc_fact;
171 
172 
173 /*@unused@*/
174 
175 #define MAXSECTORS      500     /* dimension for a backtrack array */
176 #define LOCALITY        0.      /* locality parameter for base-pairs */
177 
178 PRIVATE vrna_param_t  *P = NULL;
179 PRIVATE int           **c = NULL;
180 PRIVATE int           **lc = NULL, **lin = NULL, **lbx = NULL, **lby = NULL, **linx = NULL,
181                       **liny = NULL;
182 
183 
184 PRIVATE int           n1, n2;
185 PRIVATE int           n3, n4;
186 PRIVATE int           delay_free = 0;
187 
188 
189 /*-----------------------------------------------------------------------duplexfold_XS---------------------------------------------------------------------------*/
190 
191 
192 /*----------------------------------------------ALIDUPLEXFOLD-----------------------------------------------------------------------------------------------------------*/
193 PRIVATE duplexT
aliduplexfold(const char * s1[],const char * s2[],const int extension_cost)194 aliduplexfold(const char  *s1[],
195               const char  *s2[],
196               const int   extension_cost)
197 {
198   int       i, j, s, n_seq, Emin = INF, i_min = 0, j_min = 0;
199   char      *struc;
200   duplexT   mfe;
201   vrna_md_t md;
202   short     **S1, **S2;
203   int       *type;
204 
205   n3  = (int)strlen(s1[0]);
206   n4  = (int)strlen(s2[0]);
207   for (s = 0; s1[s] != NULL; s++);
208   n_seq = s;
209   for (s = 0; s2[s] != NULL; s++);
210   if (n_seq != s)
211     vrna_message_error("unequal number of sequences in aliduplexfold()\n");
212 
213   set_model_details(&md);
214   if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
215     update_fold_params();
216     if (P)
217       free(P);
218 
219     P = vrna_params(&md);
220     make_pair_matrix();
221   }
222 
223   c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
224   for (i = 1; i <= n3; i++)
225     c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
226 
227   S1  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
228   S2  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
229   for (s = 0; s < n_seq; s++) {
230     if (strlen(s1[s]) != n3)
231       vrna_message_error("uneqal seqence lengths");
232 
233     if (strlen(s2[s]) != n4)
234       vrna_message_error("uneqal seqence lengths");
235 
236     S1[s] = encode_seq(s1[s]);
237     S2[s] = encode_seq(s2[s]);
238   }
239   type = (int *)vrna_alloc(n_seq * sizeof(int));
240 
241   for (i = 1; i <= n3; i++) {
242     for (j = n4; j > 0; j--) {
243       int k, l, E, psc;
244       for (s = 0; s < n_seq; s++)
245         type[s] = pair[S1[s][i]][S2[s][j]];
246       psc = covscore(type, n_seq);
247       for (s = 0; s < n_seq; s++)
248         if (type[s] == 0)
249           type[s] = 7;
250 
251       c[i][j] = (psc >= MINPSCORE) ? (n_seq * (P->DuplexInit + 2 * extension_cost)) : INF;
252       if (psc < MINPSCORE)
253         continue;
254 
255       for (s = 0; s < n_seq; s++)
256         c[i][j] +=
257           vrna_E_ext_stem(type[s], (i > 1) ? S1[s][i - 1] : -1, (j < n4) ? S2[s][j + 1] : -1,
258                           P) + 2 * extension_cost;
259       for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
260         for (l = j + 1; l <= n4; l++) {
261           int type2;
262           if (i - k + l - j - 2 > MAXLOOP)
263             break;
264 
265           if (c[k][l] > INF / 2)
266             continue;
267 
268           for (E = s = 0; s < n_seq; s++) {
269             type2 = pair[S1[s][k]][S2[s][l]];
270             if (type2 == 0)
271               type2 = 7;
272 
273             E += E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type[s]],
274                            S1[s][k + 1], S2[s][l - 1], S1[s][i - 1], S2[s][j + 1],
275                            P) + (i - k + l - j) * extension_cost;
276           }
277           c[i][j] = MIN2(c[i][j], c[k][l] + E);
278         }
279       }
280       c[i][j] -= psc;
281       E       = c[i][j];
282       for (s = 0; s < n_seq; s++)
283         E +=
284           vrna_E_ext_stem(rtype[type[s]], (j > 1) ? S2[s][j - 1] : -1, (i < n3) ? S1[s][i + 1] : -1,
285                           P) + 2 * extension_cost;
286       if (E < Emin) {
287         Emin  = E;
288         i_min = i;
289         j_min = j;
290       }
291     }
292   }
293   struc =
294     alibacktrack(n3, n4, i_min, j_min, (const short int **)S1, (const short int **)S2, extension_cost);
295   if (i_min < n3)
296     i_min++;
297 
298   if (j_min > 1)
299     j_min--;
300 
301   int size;
302   size          = strlen(struc) - 1;
303   Emin          -= size * n_seq * extension_cost;
304   mfe.i         = i_min;
305   mfe.j         = j_min;
306   mfe.energy    = (float)(Emin / (100. * n_seq));
307   mfe.structure = struc;
308   if (!delay_free) {
309     for (i = 1; i <= n3; i++)
310       free(c[i]);
311     free(c);
312   }
313 
314   for (s = 0; s < n_seq; s++) {
315     free(S1[s]);
316     free(S2[s]);
317   }
318   free(S1);
319   free(S2);
320   free(type);
321   return mfe;
322 }
323 
324 
325 PRIVATE char *
alibacktrack(int n3,int n4,int i,int j,const short * S1[],const short * S2[],const int extension_cost)326 alibacktrack(int          n3,
327              int          n4,
328              int          i,
329              int          j,
330              const short  *S1[],
331              const short  *S2[],
332              const int    extension_cost)
333 {
334   /* backtrack structure going backwards from i, and forwards from j
335    * return structure in bracket notation with & as separator */
336   int   k, l, *type, type2, E, traced, i0, j0, s, n_seq;
337   char  *st1, *st2, *struc;
338 
339   for (s = 0; S1[s] != NULL; s++);
340   n_seq = s;
341   for (s = 0; S2[s] != NULL; s++);
342   if (n_seq != s)
343     vrna_message_error("unequal number of sequences in alibacktrack()\n");
344 
345   st1   = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
346   st2   = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
347   type  = (int *)vrna_alloc(n_seq * sizeof(int));
348 
349   i0  = MIN2(i + 1, n3);
350   j0  = MAX2(j - 1, 1);
351 
352   while (i > 0 && j <= n4) {
353     int psc;
354     E           = c[i][j];
355     traced      = 0;
356     st1[i - 1]  = '(';
357     st2[j - 1]  = ')';
358     for (s = 0; s < n_seq; s++)
359       type[s] = pair[S1[s][i]][S2[s][j]];
360     psc = covscore(type, n_seq);
361     for (s = 0; s < n_seq; s++)
362       if (type[s] == 0)
363         type[s] = 7;
364 
365     E += psc;
366     for (k = i - 1; k > 0 && k > i - MAXLOOP - 2; k--) {
367       for (l = j + 1; l <= n4; l++) {
368         int LE;
369         if (i - k + l - j - 2 > MAXLOOP)
370           break;
371 
372         if (c[k][l] > INF / 2)
373           continue;
374 
375         for (s = LE = 0; s < n_seq; s++) {
376           type2 = pair[S1[s][k]][S2[s][l]];
377           if (type2 == 0)
378             type2 = 7;
379 
380           LE += E_IntLoop(i - k - 1, l - j - 1, type2, rtype[type[s]],
381                           S1[s][k + 1], S2[s][l - 1], S1[s][i - 1], S2[s][j + 1],
382                           P) + (i - k + l - j) * extension_cost;
383         }
384         if (E == c[k][l] + LE) {
385           traced  = 1;
386           i       = k;
387           j       = l;
388           break;
389         }
390       }
391       if (traced)
392         break;
393     }
394     if (!traced) {
395       for (s = 0; s < n_seq; s++)
396         E -=
397           vrna_E_ext_stem(type[s], (i > 1) ? S1[s][i - 1] : -1, (j < n4) ? S2[s][j + 1] : -1,
398                           P) + 2 * extension_cost;
399       if (E != n_seq * P->DuplexInit + n_seq * 2 * extension_cost)
400         vrna_message_error("backtrack failed in aliduplex");
401       else
402         break;
403     }
404   }
405   if (i > 1)
406     i--;
407 
408   if (j < n4)
409     j++;
410 
411   struc = (char *)vrna_alloc(i0 - i + 1 + j - j0 + 1 + 2);
412   for (k = MAX2(i, 1); k <= i0; k++)
413     if (!st1[k - 1])
414       st1[k - 1] = '.';
415 
416   for (k = j0; k <= j; k++)
417     if (!st2[k - 1])
418       st2[k - 1] = '.';
419 
420   strcpy(struc, st1 + MAX2(i - 1, 0));
421   strcat(struc, "&");
422   strcat(struc, st2 + j0 - 1);
423 
424   /* printf("%s %3d,%-3d : %3d,%-3d\n", struc, i,i0,j0,j);  */
425   free(st1);
426   free(st2);
427   free(type);
428 
429   return struc;
430 }
431 
432 
433 duplexT **
aliLduplexfold(const char * s1[],const char * s2[],const int threshold,const int extension_cost,const int alignment_length,const int delta,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)434 aliLduplexfold(const char *s1[],
435                const char *s2[],
436                const int  threshold,
437                const int  extension_cost,
438                const int  alignment_length,
439                const int  delta,
440                const int  fast,
441                const int  il_a,
442                const int  il_b,
443                const int  b_a,
444                const int  b_b)
445 {
446   short **S1, **S2;
447   int   *type, type2;
448   int   i, j, s, n_seq;
449 
450   s = 0;
451   int   bopen       = b_b;
452   int   bext        = b_a + extension_cost;
453   int   iopen       = il_b;
454   int   iext_s      = 2 * (il_a + extension_cost);  /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
455   int   iext_ass    = 50 + il_a + extension_cost;   /* iext_ass assymetric extension of interior loop, either on i or on j side. */
456   int   min_colonne = INF;                          /* enthaelt das maximum einer kolonne */
457   int   i_length;
458   int   max_pos;                                    /* get position of the best hit */
459   int   max_pos_j;
460   int   temp;
461   int   min_j_colonne;
462   int   max = INF;
463   /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
464   int   *position; /* contains the position of the hits with energy > E */
465   int   *position_j;
466 
467 
468   n1  = (int)strlen(s1[0]);
469   n2  = (int)strlen(s2[0]);
470   for (s = 0; s1[s]; s++);
471   n_seq = s;
472   for (s = 0; s2[s]; s++);
473   if (n_seq != s)
474     vrna_message_error("unequal number of sequences in aliduplexfold()\n");
475 
476   position    = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
477   position_j  = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
478 
479   if ((!P) || (fabs(P->temperature - temperature) > 1e-6))
480     update_dfold_params();
481 
482   lc    = (int **)vrna_alloc(sizeof(int *) * 5);
483   lin   = (int **)vrna_alloc(sizeof(int *) * 5);
484   lbx   = (int **)vrna_alloc(sizeof(int *) * 5);
485   lby   = (int **)vrna_alloc(sizeof(int *) * 5);
486   linx  = (int **)vrna_alloc(sizeof(int *) * 5);
487   liny  = (int **)vrna_alloc(sizeof(int *) * 5);
488 
489   for (i = 0; i <= 4; i++) {
490     lc[i]   = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
491     lin[i]  = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
492     lbx[i]  = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
493     lby[i]  = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
494     linx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
495     liny[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
496   }
497 
498 
499   S1  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
500   S2  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
501   for (s = 0; s < n_seq; s++) {
502     if (strlen(s1[s]) != n1)
503       vrna_message_error("uneqal seqence lengths");
504 
505     if (strlen(s2[s]) != n2)
506       vrna_message_error("uneqal seqence lengths");
507 
508     S1[s] = encode_seq(s1[s]);
509     S2[s] = encode_seq(s2[s]);
510   }
511   type = (int *)vrna_alloc(n_seq * sizeof(int));
512   /**
513   *** array initialization
514   **/
515   for (j = n2; j >= 0; j--) {
516     lbx[0][j]   = lbx[1][j] = lbx[2][j] = lbx[3][j] = lbx[4][j] = INF;
517     lin[0][j]   = lin[1][j] = lin[2][j] = lin[3][j] = lin[4][j] = INF;
518     lc[0][j]    = lc[1][j] = lc[2][j] = lc[3][j] = lc[4][j] = INF;
519     lby[0][j]   = lby[1][j] = lby[2][j] = lby[3][j] = lby[4][j] = INF;
520     liny[0][j]  = liny[1][j] = liny[2][j] = liny[3][j] = liny[4][j] = INF;
521     linx[0][j]  = linx[1][j] = linx[2][j] = linx[3][j] = linx[4][j] = INF;
522   }
523   i         = 10;
524   i_length  = n1 - 9;
525   while (i < i_length) {
526     int idx   = i % 5;
527     int idx_1 = (i - 1) % 5;
528     int idx_2 = (i - 2) % 5;
529     int idx_3 = (i - 3) % 5;
530     int idx_4 = (i - 4) % 5;
531     j = n2 - 9;
532     while (9 < --j) {
533       int psc;
534       for (s = 0; s < n_seq; s++)
535         type[s] = pair[S1[s][i]][S2[s][j]];
536       psc = covscore(type, n_seq);
537       for (s = 0; s < n_seq; s++)
538         if (type[s] == 0)
539           type[s] = 7;
540 
541       lc[idx][j] = (psc >= MINPSCORE) ? (n_seq * P->DuplexInit + 2 * n_seq * extension_cost) : INF;
542       /**
543       *** Update matrix. It is the average over all sequence of a given structure element
544       *** c_stack -> stacking of c
545       *** c_10, c01 -> stack from bulge
546       *** c_nm -> arrives in stack from nxm loop
547       *** c_in -> arrives in stack from interior loop
548       *** c_bx -> arrives in stack from large bulge on target
549       *** c_by -> arrives in stack from large bulge on query
550       ***
551       **/
552       int c_stack, c_10, c_01, c_11, c_22, c_21, c_12, c_23, c_32, c_in, c_in2x, c_in2y, c_bx, c_by,
553           c_inx, c_iny;           /* matrix c */
554       int in, in_x, in_y, in_xy;  /*  in begin, in_x assymetric, in_y assymetric, in_xy symetric; */
555       int inx, inx_x;
556       int iny, iny_y;
557       int bx, bx_x;
558       int by, by_y;
559       in      = lc[idx_1][j + 1];
560       in_x    = lin[idx_1][j];
561       in_y    = lin[idx][j + 1];
562       in_xy   = lin[idx_1][j + 1];
563       inx     = lc[idx_1][j + 1];
564       inx_x   = linx[idx_1][j];
565       iny     = lc[idx_1][j + 1];
566       iny_y   = liny[idx][j + 1];
567       bx      = lc[idx_1][j];
568       bx_x    = lbx[idx_1][j];
569       by      = lc[idx][j + 1];
570       by_y    = lby[idx][j + 1];
571       c_stack = lc[idx_1][j + 1];
572       c_01    = lc[idx_1][j + 2];
573       c_10    = lc[idx_2][j + 1];
574       c_12    = lc[idx_2][j + 3];
575       c_21    = lc[idx_3][j + 2];
576       c_11    = lc[idx_2][j + 2];
577       c_22    = lc[idx_3][j + 3];
578       c_32    = lc[idx_4][j + 3];
579       c_23    = lc[idx_3][j + 4];
580       c_in    = lin[idx_3][j + 3];
581       c_in2x  = lin[idx_4][j + 2];
582       c_in2y  = lin[idx_2][j + 4];
583       c_inx   = linx[idx_3][j + 1];
584       c_iny   = liny[idx_1][j + 3];
585       c_bx    = lbx[idx_2][j + 1];
586       c_by    = lby[idx_1][j + 2];
587       for (s = 0; s < n_seq; s++) {
588         type2 = pair[S2[s][j + 1]][S1[s][i - 1]];
589         in    += P->mismatchI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s;
590         in_x  += iext_ass;
591         in_y  += iext_ass;
592         in_xy += iext_s;
593         inx   += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s;
594         inx_x += iext_ass;
595         iny   += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s;
596         iny_y += iext_ass;
597         type2 = pair[S2[s][j]][S1[s][i - 1]];
598         bx    += bopen + bext + (type2 > 2 ? P->TerminalAU : 0);
599         bx_x  += bext;
600         type2 = pair[S2[s][j + 1]][S1[s][i]];
601         by    += bopen + bext + (type2 > 2 ? P->TerminalAU : 0);
602         by_y  += bext;
603       }
604       lin [idx][j]  = MIN2(in, MIN2(in_x, MIN2(in_y, in_xy)));
605       linx[idx][j]  = MIN2(inx_x, inx);
606       liny[idx][j]  = MIN2(iny_y, iny);
607       lby[idx][j]   = MIN2(by, by_y);
608       lbx[idx][j]   = MIN2(bx, bx_x);
609 
610       if (psc < MINPSCORE)
611         continue;
612 
613       for (s = 0; s < n_seq; s++)
614         lc[idx][j] += vrna_E_ext_stem(type[s], S1[s][i - 1], S2[s][j + 1], P) + 2 * extension_cost;
615       for (s = 0; s < n_seq; s++) {
616         type2 = pair[S1[s][i - 1]][S2[s][j + 1]];
617         if (type2 == 0)
618           type2 = 7;
619 
620         c_stack +=
621           E_IntLoop(0, 0, type2, rtype[type[s]], S1[s][i], S2[s][j], S1[s][i - 1], S2[s][j + 1],
622                     P) + 2 * extension_cost;
623         type2 = pair[S1[s][i - 1]][S2[s][j + 2]];
624         if (type2 == 0)
625           type2 = 7;
626 
627         c_01 +=
628           E_IntLoop(0, 1, type2, rtype[type[s]], S1[s][i], S2[s][j + 1], S1[s][i - 1], S2[s][j + 1],
629                     P) + 3 * extension_cost;
630         type2 = pair[S1[s][i - 2]][S2[s][j + 1]];
631         if (type2 == 0)
632           type2 = 7;
633 
634         c_10 +=
635           E_IntLoop(1, 0, type2, rtype[type[s]], S1[s][i - 1], S2[s][j], S1[s][i - 1], S2[s][j + 1],
636                     P) + 3 * extension_cost;
637         type2 = pair[S1[s][i - 2]][S2[s][j + 2]];
638         if (type2 == 0)
639           type2 = 7;
640 
641         c_11 += E_IntLoop(1,
642                           1,
643                           type2,
644                           rtype[type[s]],
645                           S1[s][i - 1],
646                           S2[s][j + 1],
647                           S1[s][i - 1],
648                           S2[s][j + 1],
649                           P) + 4 * extension_cost;
650         type2 = pair[S1[s][i - 3]][S2[s][j + 3]];
651         if (type2 == 0)
652           type2 = 7;
653 
654         c_22 += E_IntLoop(2,
655                           2,
656                           type2,
657                           rtype[type[s]],
658                           S1[s][i - 2],
659                           S2[s][j + 2],
660                           S1[s][i - 1],
661                           S2[s][j + 1],
662                           P) + 6 * extension_cost;
663         type2 = pair[S1[s][i - 3]][S2[s][j + 2]];
664         if (type2 == 0)
665           type2 = 7;
666 
667         c_21 += E_IntLoop(2,
668                           1,
669                           type2,
670                           rtype[type[s]],
671                           S1[s][i - 2],
672                           S2[s][j + 1],
673                           S1[s][i - 1],
674                           S2[s][j + 1],
675                           P) + 5 * extension_cost;
676         type2 = pair[S1[s][i - 2]][S2[s][j + 3]];
677         if (type2 == 0)
678           type2 = 7;
679 
680         c_12 += E_IntLoop(1,
681                           2,
682                           type2,
683                           rtype[type[s]],
684                           S1[s][i - 1],
685                           S2[s][j + 2],
686                           S1[s][i - 1],
687                           S2[s][j + 1],
688                           P) + 5 * extension_cost;
689         type2 = pair[S1[s][i - 4]][S2[s][j + 3]];
690         if (type2 == 0)
691           type2 = 7;
692 
693         c_32 += E_IntLoop(3,
694                           2,
695                           type2,
696                           rtype[type[s]],
697                           S1[s][i - 3],
698                           S2[s][j + 2],
699                           S1[s][i - 1],
700                           S2[s][j + 1],
701                           P) + 7 * extension_cost;
702         type2 = pair[S1[s][i - 3]][S2[s][j + 4]];
703         if (type2 == 0)
704           type2 = 7;
705 
706         c_23 += E_IntLoop(2,
707                           3,
708                           type2,
709                           rtype[type[s]],
710                           S1[s][i - 2],
711                           S2[s][j + 3],
712                           S1[s][i - 1],
713                           S2[s][j + 1],
714                           P) + 7 * extension_cost;
715         c_in += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + 2 * extension_cost +
716                 2 * iext_s;
717         c_in2x += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
718                   iext_ass + 2 * extension_cost;
719         c_in2y += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
720                   iext_ass + 2 * extension_cost;
721         c_inx += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
722                  iext_ass + 2 * extension_cost;
723         c_iny += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
724                  iext_ass + 2 * extension_cost;
725         int bAU;
726         bAU   = (type[s] > 2 ? P->TerminalAU : 0);
727         c_bx  += 2 * extension_cost + bext + bAU;
728         c_by  += 2 * extension_cost + bext + bAU;
729       }
730       lc[idx][j] = MIN2(lc[idx][j],
731                         MIN2(c_stack,
732                              MIN2(c_10,
733                                   MIN2(c_01,
734                                        MIN2(c_11,
735                                             MIN2(c_21,
736                                                  MIN2(c_12,
737                                                       MIN2(c_22,
738                                                            MIN2(c_23,
739                                                                 MIN2(c_32,
740                                                                      MIN2(c_bx,
741                                                                           MIN2(c_by,
742                                                                                MIN2(c_in,
743                                                                                     MIN2(c_in2x,
744                                                                                          MIN2(c_in2y,
745                                                                                               MIN2(
746                                                                                                 c_inx,
747                                                                                                 c_iny)
748                                                                                               )
749                                                                                          )
750                                                                                     )
751                                                                                )
752                                                                           )
753                                                                      )
754                                                                 )
755                                                            )
756                                                       )
757                                                  )
758                                             )
759                                        )
760                                   )
761                              )
762                         );
763       lc[idx][j]  -= psc;
764       temp        = lc[idx][j];
765       for (s = 0; s < n_seq; s++)
766         temp += vrna_E_ext_stem(rtype[type[s]], S2[s][j - 1], S1[s][i + 1], P) + 2 * extension_cost;
767       if (min_colonne > temp) {
768         min_colonne   = temp;
769         min_j_colonne = j;
770       }
771     }
772     if (max >= min_colonne) {
773       max       = min_colonne;
774       max_pos   = i;
775       max_pos_j = min_j_colonne;
776     }
777 
778     position[i + delta]   = min_colonne;
779     min_colonne           = INF;
780     position_j[i + delta] = min_j_colonne;
781     i++;
782   }
783   /* printf("MAX:%d ",max); */
784   for (s = 0; s < n_seq; s++) {
785     free(S1[s]);
786     free(S2[s]);
787   }
788   free(S1);
789   free(S2);
790   if (max < threshold) {
791     alifind_max(position,
792                 position_j,
793                 delta,
794                 threshold,
795                 alignment_length,
796                 s1,
797                 s2,
798                 extension_cost,
799                 fast);
800   }
801 
802   aliplot_max(max, max_pos, max_pos_j, alignment_length, s1, s2, extension_cost, fast);
803   for (i = 0; i <= 4; i++) {
804     free(lc[i]);
805     free(lin[i]);
806     free(lbx[i]);
807     free(lby[i]);
808     free(linx[i]);
809     free(liny[i]);
810   }
811   /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]);free(linx[0]);free(liny[0]); */
812   free(lc);
813   free(lin);
814   free(lbx);
815   free(lby);
816   free(linx);
817   free(liny);
818   free(position);
819   free(position_j);
820   free(type);
821   return NULL;
822 }
823 
824 
825 PRIVATE void
alifind_max(const int * position,const int * position_j,const int delta,const int threshold,const int alignment_length,const char * s1[],const char * s2[],const int extension_cost,const int fast)826 alifind_max(const int   *position,
827             const int   *position_j,
828             const int   delta,
829             const int   threshold,
830             const int   alignment_length,
831             const char  *s1[],
832             const char  *s2[],
833             const int   extension_cost,
834             const int   fast)
835 {
836   int n_seq = 0;
837 
838   for (n_seq = 0; s1[n_seq] != NULL; n_seq++);
839   int pos = n1 - 9;
840   if (fast == 1) {
841     while (10 < pos--) {
842       int temp_min = 0;
843       if (position[pos + delta] < (threshold)) {
844         int search_range;
845         search_range = delta + 1;
846         while (--search_range)
847           if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
848             temp_min = search_range;
849 
850         pos -= temp_min;
851         int max_pos_j;
852         max_pos_j = position_j[pos + delta];
853         int max;
854         max = position[pos + delta];
855         printf("target upper bound %d: query lower bound %d  (%5.2f) \n",
856                pos - 10,
857                max_pos_j - 10,
858                ((double)max) / (n_seq * 100));
859         pos = MAX2(10, pos + temp_min - delta);
860       }
861     }
862   } else {
863     pos = n1 - 9;
864     while (pos-- > 10) {
865       /* printf("delta %d position:%d value:%d\n", delta, pos, position[pos]); */
866       int temp_min = 0;
867       if (position[pos + delta] < (threshold)) {
868         int search_range;
869         search_range = delta + 1;
870         while (--search_range)
871           if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
872             temp_min = search_range;
873 
874         pos -= temp_min;
875         int   max_pos_j;
876         max_pos_j = position_j[pos + delta];
877         /* printf("%d %d %d\n", pos, max_pos_j,position[pos+delta]); */
878         int   begin_t = MAX2(11, pos - alignment_length + 1);
879         int   end_t = MIN2(n1 - 10, pos + 1);
880         int   begin_q = MAX2(11, max_pos_j - 1);
881         int   end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
882         char  **s3, **s4;
883         s3  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
884         s4  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
885         int   i;
886         for (i = 0; i < n_seq; i++) {
887           s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
888           s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
889           strncpy(s3[i], (s1[i] + begin_t - 1), end_t - begin_t + 1);
890           strncpy(s4[i], (s2[i] + begin_q - 1), end_q - begin_q + 1);
891           s3[i][end_t - begin_t + 1]  = '\0';
892           s4[i][end_q - begin_q + 1]  = '\0';
893         }
894         duplexT test;
895         test = aliduplexfold((const char **)s3, (const char **)s4, extension_cost);
896         /* printf("test %d threshold %d",test.energy*100,(threshold/n_seq)); */
897         if (test.energy * 100 < (int)(threshold / n_seq)) {
898           int l1 = strchr(test.structure, '&') - test.structure;
899           printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", test.structure,
900                  begin_t - 10 + test.i - l1,
901                  begin_t - 10 + test.i - 1,
902                  begin_q - 10 + test.j - 1,
903                  begin_q - 11 + test.j + (int)strlen(test.structure) - l1 - 2, test.energy);
904           pos = MAX2(10, pos + temp_min - delta);
905         }
906 
907         for (i = 0; i < n_seq; i++) {
908           free(s3[i]);
909           free(s4[i]);
910         }
911         free(s3);
912         free(s4);
913         free(test.structure);
914       }
915     }
916   }
917 }
918 
919 
920 PRIVATE void
aliplot_max(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1[],const char * s2[],const int extension_cost,const int fast)921 aliplot_max(const int   max,
922             const int   max_pos,
923             const int   max_pos_j,
924             const int   alignment_length,
925             const char  *s1[],
926             const char  *s2[],
927             const int   extension_cost,
928             const int   fast)
929 {
930   int n_seq;
931 
932   for (n_seq = 0; !(s1[n_seq] == NULL); n_seq++);
933   n1  = strlen(s1[0]);  /* get length of alignment */
934   n2  = strlen(s2[0]);  /* get length of alignment */
935   if (fast == 1) {
936     printf("target upper bound %d: query lower bound %d (%5.2f)\n",
937            max_pos - 10, max_pos_j - 10, (double)((double)max) / (100 * n_seq));
938   } else {
939     int   begin_t = MAX2(11, max_pos - alignment_length + 1);
940     int   end_t = MIN2(n1 - 10, max_pos + 1);
941     int   begin_q = MAX2(11, max_pos_j - 1);
942     int   end_q = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
943     char  **s3, **s4;
944     s3  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
945     s4  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
946     int   i;
947     for (i = 0; i < n_seq; i++) {
948       s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
949       s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
950       strncpy(s3[i], (s1[i] + begin_t - 1), end_t - begin_t + 1);
951       strncpy(s4[i], (s2[i] + begin_q - 1), end_q - begin_q + 1);
952       s3[i][end_t - begin_t + 1]  = '\0';
953       s4[i][end_q - begin_q + 1]  = '\0';
954     }
955     duplexT test;
956     s3[n_seq] = s4[n_seq] = NULL;
957     test      = aliduplexfold((const char **)s3, (const char **)s4, extension_cost);
958     int     l1 = strchr(test.structure, '&') - test.structure;
959     printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n",
960            test.structure,
961            begin_t - 10 + test.i - l1,
962            begin_t - 10 + test.i - 1,
963            begin_q - 10 + test.j - 1,
964            begin_q - 11 + test.j + (int)strlen(test.structure) - l1 - 2,
965            test.energy);
966     for (i = 0; i < n_seq; i++) {
967       free(s3[i]);
968       free(s4[i]);
969     }
970     free(s3);
971     free(s4);
972     free(test.structure);
973   }
974 }
975 
976 
977 PRIVATE duplexT
aliduplexfold_XS(const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int i_pos,const int j_pos,const int threshold,const int i_flag,const int j_flag)978 aliduplexfold_XS(const char *s1[],
979                  const char *s2[],
980                  const int  **access_s1,
981                  const int  **access_s2,
982                  const int  i_pos,
983                  const int  j_pos,
984                  const int  threshold,
985                  const int  i_flag,
986                  const int  j_flag)
987 {
988   int       i, j, s, p, q, Emin = INF, l_min = 0, k_min = 0;
989   char      *struc;
990   short     **S1, **S2;
991   int       *type, *type2;
992 
993   struc = NULL;
994   duplexT   mfe;
995   vrna_md_t md;
996   int       n_seq;
997   n3  = (int)strlen(s1[0]);
998   n4  = (int)strlen(s2[0]);
999   for (s = 0; s1[s] != NULL; s++);
1000   n_seq = s;
1001   for (s = 0; s2[s] != NULL; s++);
1002   /* printf("%d \n",i_pos); */
1003 
1004   set_model_details(&md);
1005   if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
1006     update_fold_params();
1007     if (P)
1008       free(P);
1009 
1010     P = vrna_params(&md);
1011     make_pair_matrix();
1012   }
1013 
1014   c = (int **)vrna_alloc(sizeof(int *) * (n3 + 1));
1015   for (i = 0; i <= n3; i++)
1016     c[i] = (int *)vrna_alloc(sizeof(int) * (n4 + 1));
1017   for (i = 0; i <= n3; i++)
1018     for (j = 0; j <= n4; j++)
1019       c[i][j] = INF;
1020   S1  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1021   S2  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1022   for (s = 0; s < n_seq; s++) {
1023     if (strlen(s1[s]) != n3)
1024       vrna_message_error("uneqal seqence lengths");
1025 
1026     if (strlen(s2[s]) != n4)
1027       vrna_message_error("uneqal seqence lengths");
1028 
1029     S1[s] = encode_seq(s1[s]);
1030     S2[s] = encode_seq(s2[s]);
1031   }
1032   type  = (int *)vrna_alloc(n_seq * sizeof(int));
1033   type2 = (int *)vrna_alloc(n_seq * sizeof(int));
1034   int type3, E, k, l;
1035   i = n3 - i_flag;
1036   j = 1 + j_flag;
1037   for (s = 0; s < n_seq; s++)
1038     type[s] = pair[S1[s][i]][S2[s][j]];
1039   c[i][j] = n_seq * P->DuplexInit - covscore(type, n_seq);
1040   for (s = 0; s < n_seq; s++)
1041     if (type[s] == 0)
1042       type[s] = 7;
1043 
1044   for (s = 0; s < n_seq; s++)
1045     c[i][j] += vrna_E_ext_stem(rtype[type[s]],
1046                                (j_flag ? S2[s][j - 1] : -1),
1047                                (i_flag ? S1[s][i + 1] : -1),
1048                                P);
1049   k_min = i;
1050   l_min = j;
1051   Emin  = c[i][j];
1052   for (k = i; k > 1; k--) {
1053     if (k < i)
1054       c[k + 1][0] = INF;
1055 
1056     for (l = j; l <= n4 - 1; l++) {
1057       if (!(k == i && l == j))
1058         c[k][l] = INF;
1059 
1060       int psc2;
1061       for (s = 0; s < n_seq; s++)
1062         type2[s] = pair[S1[s][k]][S2[s][l]];
1063       psc2 = covscore(type2, n_seq);
1064       if (psc2 < MINPSCORE)
1065         continue;
1066 
1067       for (s = 0; s < n_seq; s++)
1068         if (type2[s] == 0)
1069           type2[s] = 7;
1070 
1071       for (p = k + 1; p <= n3 - i_flag && p < k + MAXLOOP - 1; p++) {
1072         for (q = l - 1; q >= 1 + j_flag; q--) {
1073           if (p - k + l - q - 2 > MAXLOOP)
1074             break;
1075 
1076           for (E = s = 0; s < n_seq; s++) {
1077             type3 = pair[S1[s][p]][S2[s][q]];
1078             if (type3 == 0)
1079               type3 = 7;
1080 
1081             E += E_IntLoop(p - k - 1, l - q - 1, type2[s], rtype[type3],
1082                            S1[s][k + 1], S2[s][l - 1], S1[s][p - 1], S2[s][q + 1], P);
1083           }
1084           c[k][l] = MIN2(c[k][l], c[p][q] + E);
1085         }
1086       }
1087       c[k][l] -= psc2;
1088       E       = c[k][l];
1089       E       += n_seq * (access_s1[i - k + 1][i_pos] + access_s2[l - 1][j_pos + (l - 1) - 1]);
1090       for (s = 0; s < n_seq; s++)
1091         E +=
1092           vrna_E_ext_stem(type2[s], (k > 1) ? S1[s][k - 1] : -1, (l < n4) ? S2[s][l + 1] : -1, P);
1093       if (E < Emin) {
1094         Emin  = E;
1095         k_min = k;
1096         l_min = l;
1097       }
1098     }
1099   }
1100   if (Emin > threshold - 1) {
1101     mfe.structure = NULL;
1102     mfe.energy    = INF;
1103     for (i = 0; i <= n3; i++)
1104       free(c[i]);
1105     free(c);
1106     for (i = 0; i <= n_seq; i++) {
1107       free(S1[i]);
1108       free(S2[i]);
1109     }
1110     free(S1);
1111     free(S2);           /* free(SS1); free(SS2); */
1112     free(type);
1113     free(type2);
1114     return mfe;
1115   } else {
1116     struc = alibacktrack_XS(n3,
1117                             n4,
1118                             k_min,
1119                             l_min,
1120                             (const short int **)S1,
1121                             (const short int **)S2,
1122                             access_s1,
1123                             access_s2,
1124                             i_flag,
1125                             j_flag);
1126   }
1127 
1128   int dx_5, dx_3, dy_5, dy_3, dGx, dGy;
1129   dx_5          = 0;
1130   dx_3          = 0;
1131   dy_5          = 0;
1132   dy_3          = 0;
1133   dGx           = 0;
1134   dGy           = 0;
1135   dGx           = n_seq * (access_s1[i - k_min + 1][i_pos]);
1136   dx_3          = 0;
1137   dx_5          = 0;
1138   dGy           = n_seq * access_s2[l_min - j + 1][j_pos + (l_min - 1) - 1];
1139   mfe.tb        = i_pos - 9 - i + k_min - 1 - dx_5;
1140   mfe.te        = i_pos - 9 - 1 + dx_3;
1141   mfe.qb        = j_pos - 9 - 1 - dy_5;
1142   mfe.qe        = j_pos + l_min - 3 - 9 + dy_3;
1143   mfe.ddG       = (double)(Emin * 0.01);
1144   mfe.dG1       = (double)(dGx * (0.01));
1145   mfe.dG2       = (double)(dGy * (0.01));
1146   mfe.energy    = mfe.ddG - mfe.dG1 - mfe.dG2;
1147   mfe.structure = struc;
1148   for (i = 0; i <= n3; i++)
1149     free(c[i]);
1150   free(c);
1151   for (i = 0; i <= n_seq; i++) {
1152     free(S1[i]);
1153     free(S2[i]);
1154   }
1155   free(S1);
1156   free(S2);
1157   free(type);
1158   free(type2);
1159   return mfe;
1160 }
1161 
1162 
1163 PRIVATE char *
alibacktrack_XS(int n3,int n4,int i,int j,const short * S1[],const short * S2[],const int ** access_s1,const int ** access_s2,const int i_flag,const int j_flag)1164 alibacktrack_XS(int         n3,
1165                 int         n4,
1166                 int         i,
1167                 int         j,
1168                 const short *S1[],
1169                 const short *S2[],
1170                 const int   **access_s1,
1171                 const int   **access_s2,
1172                 const int   i_flag,
1173                 const int   j_flag)
1174 {
1175   int   k, l, *type, type2, E, traced, i0, j0, s, n_seq, psc;
1176   char  *st1 = NULL, *st2 = NULL, *struc = NULL;
1177 
1178   for (s = 0; S1[s] != NULL; s++);
1179   n_seq = s;
1180   for (s = 0; S2[s] != NULL; s++);
1181   if (n_seq != s)
1182     vrna_message_error("unequal number of sequences in alibacktrack()\n");
1183 
1184   st1   = (char *)vrna_alloc(sizeof(char) * (n3 + 1));
1185   st2   = (char *)vrna_alloc(sizeof(char) * (n4 + 1));
1186   type  = (int *)vrna_alloc(n_seq * sizeof(int));
1187 
1188   i0 = i; /*MAX2(i-1,1);*/ j0 = j;/*MIN2(j+1,n4);*/
1189   while (i <= n3 - i_flag && j >= 1 + j_flag) {
1190     E           = c[i][j];
1191     traced      = 0;
1192     st1[i - 1]  = '(';
1193     st2[j - 1]  = ')';
1194     for (s = 0; s < n_seq; s++)
1195       type[s] = pair[S1[s][i]][S2[s][j]];
1196     psc = covscore(type, n_seq);
1197     for (s = 0; s < n_seq; s++)
1198       if (type[s] == 0)
1199         type[s] = 7;
1200 
1201     E += psc;
1202     for (k = i + 1; k <= n3 && k > i - MAXLOOP - 2; k++) {
1203       for (l = j - 1; l >= 1; l--) {
1204         int LE;
1205         if (i - k + l - j - 2 > MAXLOOP)
1206           break;
1207 
1208         for (s = LE = 0; s < n_seq; s++) {
1209           type2 = pair[S1[s][k]][S2[s][l]];
1210           if (type2 == 0)
1211             type2 = 7;
1212 
1213           LE += E_IntLoop(k - i - 1, j - l - 1, type[s], rtype[type2],
1214                           S1[s][i + 1], S2[s][j - 1], S1[s][k - 1], S2[s][l + 1], P);
1215         }
1216         if (E == c[k][l] + LE) {
1217           traced  = 1;
1218           i       = k;
1219           j       = l;
1220           break;
1221         }
1222       }
1223       if (traced)
1224         break;
1225     }
1226     if (!traced) {
1227       for (s = 0; s < n_seq; s++)
1228         if (type[s] > 2)
1229           E -= P->TerminalAU;
1230 
1231       break;
1232       if (E != n_seq * P->DuplexInit)
1233         vrna_message_error("backtrack failed in fold duplex bal");
1234       else
1235         break;
1236     }
1237   }
1238   struc = (char *)vrna_alloc(i - i0 + 1 + j0 - j + 1 + 2);
1239   for (k = MAX2(i0, 1); k <= i; k++)
1240     if (!st1[k - 1])
1241       st1[k - 1] = '.';
1242 
1243   for (k = j; k <= j0; k++)
1244     if (!st2[k - 1])
1245       st2[k - 1] = '.';
1246 
1247   strcpy(struc, st1 + MAX2(i0 - 1, 0));
1248   strcat(struc, "&");
1249   strcat(struc, st2 + j - 1);
1250   free(st1);
1251   free(st2);
1252   free(type);
1253   return struc;
1254 }
1255 
1256 
1257 duplexT **
aliLduplexfold_XS(const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int threshold,const int alignment_length,const int delta,const int fast,const int il_a,const int il_b,const int b_a,const int b_b)1258 aliLduplexfold_XS(const char  *s1[],
1259                   const char  *s2[],
1260                   const int   **access_s1,
1261                   const int   **access_s2,
1262                   const int   threshold,
1263                   const int   alignment_length,
1264                   const int   delta,
1265                   const int   fast,
1266                   const int   il_a,
1267                   const int   il_b,
1268                   const int   b_a,
1269                   const int   b_b)
1270 {
1271   short **S1, **S2;
1272   int   *type, type2;
1273   int   i, j, s, n_seq;
1274 
1275   s = 0;
1276   int   bopen       = b_b;
1277   int   bext        = b_a;
1278   int   iopen       = il_b;
1279   int   iext_s      = 2 * (il_a); /* iext_s 2 nt nucleotide extension of interior loop, on i and j side */
1280   int   iext_ass    = 50 + il_a;  /* iext_ass assymetric extension of interior loop, either on i or on j side. */
1281   int   min_colonne = INF;        /* enthaelt das maximum einer kolonne */
1282   int   i_length;
1283   int   max_pos;                  /* get position of the best hit */
1284   int   max_pos_j;
1285   int   temp;
1286   int   min_j_colonne;
1287   int   max = INF;
1288   /* FOLLOWING NEXT 4 LINE DEFINES AN ARRAY CONTAINING POSITION OF THE SUBOPT IN S1 */
1289   int   *position; /* contains the position of the hits with energy > E */
1290   int   *position_j;
1291   int   maxPenalty[4];
1292 
1293   n1  = (int)strlen(s1[0]);
1294   n2  = (int)strlen(s2[0]);
1295   for (s = 0; s1[s]; s++);
1296   n_seq = s;
1297   for (s = 0; s2[s]; s++);
1298   if (n_seq != s)
1299     vrna_message_error("unequal number of sequences in aliduplexfold()\n");
1300 
1301   position    = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
1302   position_j  = (int *)vrna_alloc((delta + (n1) + 4 + delta) * sizeof(int));
1303 
1304   /**
1305   *** extension penalty, computed only once, further reduce the computation time
1306   **/
1307   if ((!P) || (fabs(P->temperature - temperature) > 1e-6))
1308     update_dfold_params();
1309 
1310   maxPenalty[0] = (int)-1 * P->stack[2][2] / 2;
1311   maxPenalty[1] = (int)-1 * P->stack[2][2];
1312   maxPenalty[2] = (int)-3 * P->stack[2][2] / 2;
1313   maxPenalty[3] = (int)-2 * P->stack[2][2];
1314 
1315 
1316   lc    = (int **)vrna_alloc(sizeof(int *) * 5);
1317   lin   = (int **)vrna_alloc(sizeof(int *) * 5);
1318   lbx   = (int **)vrna_alloc(sizeof(int *) * 5);
1319   lby   = (int **)vrna_alloc(sizeof(int *) * 5);
1320   linx  = (int **)vrna_alloc(sizeof(int *) * 5);
1321   liny  = (int **)vrna_alloc(sizeof(int *) * 5);
1322 
1323   for (i = 0; i <= 4; i++) {
1324     lc[i]   = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1325     lin[i]  = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1326     lbx[i]  = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1327     lby[i]  = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1328     linx[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1329     liny[i] = (int *)vrna_alloc(sizeof(int) * (n2 + 5));
1330   }
1331 
1332 
1333   S1  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1334   S2  = (short **)vrna_alloc((n_seq + 1) * sizeof(short *));
1335   for (s = 0; s < n_seq; s++) {
1336     if (strlen(s1[s]) != n1)
1337       vrna_message_error("uneqal seqence lengths");
1338 
1339     if (strlen(s2[s]) != n2)
1340       vrna_message_error("uneqal seqence lengths");
1341 
1342     S1[s] = encode_seq(s1[s]);
1343     S2[s] = encode_seq(s2[s]);
1344   }
1345   type = (int *)vrna_alloc(n_seq * sizeof(int));
1346   /**
1347   *** array initialization
1348   **/
1349 
1350   for (j = n2 + 4; j >= 0; j--) {
1351     lbx[0][j]   = lbx[1][j] = lbx[2][j] = lbx[3][j] = lbx[4][j] = INF;
1352     lin[0][j]   = lin[1][j] = lin[2][j] = lin[3][j] = lin[4][j] = INF;
1353     lc[0][j]    = lc[1][j] = lc[2][j] = lc[3][j] = lc[4][j] = INF;
1354     lby[0][j]   = lby[1][j] = lby[2][j] = lby[3][j] = lby[4][j] = INF;
1355     liny[0][j]  = liny[1][j] = liny[2][j] = liny[3][j] = liny[4][j] = INF;
1356     linx[0][j]  = linx[1][j] = linx[2][j] = linx[3][j] = linx[4][j] = INF;
1357   }
1358   i         = 10;
1359   i_length  = n1 - 9;
1360   int di1, di2, di3, di4; /* contains accessibility penalty */
1361   while (i < i_length) {
1362     int idx   = i % 5;
1363     int idx_1 = (i - 1) % 5;
1364     int idx_2 = (i - 2) % 5;
1365     int idx_3 = (i - 3) % 5;
1366     int idx_4 = (i - 4) % 5;
1367 
1368     di1 = access_s1[5][i] - access_s1[4][i - 1];
1369     di2 = access_s1[5][i - 1] - access_s1[4][i - 2] + di1;
1370     di3 = access_s1[5][i - 2] - access_s1[4][i - 3] + di2;
1371     di4 = access_s1[5][i - 3] - access_s1[4][i - 4] + di3;
1372     di1 = MIN2(di1, maxPenalty[0]);
1373     di2 = MIN2(di2, maxPenalty[1]);
1374     di3 = MIN2(di3, maxPenalty[2]);
1375     di4 = MIN2(di3, maxPenalty[3]);
1376     j   = n2 - 9;
1377     while (9 < --j) {
1378       int dj1, dj2, dj3, dj4;
1379       dj1 = access_s2[5][j + 4] - access_s2[4][j + 4];
1380       dj2 = access_s2[5][j + 5] - access_s2[4][j + 5] + dj1;
1381       dj3 = access_s2[5][j + 6] - access_s2[4][j + 6] + dj2;
1382       dj4 = access_s2[5][j + 7] - access_s2[4][j + 7] + dj3;
1383       dj1 = MIN2(dj1, maxPenalty[0]);
1384       dj2 = MIN2(dj2, maxPenalty[1]);
1385       dj3 = MIN2(dj3, maxPenalty[2]);
1386       dj4 = MIN2(dj4, maxPenalty[3]);
1387       int psc;
1388       for (s = 0; s < n_seq; s++) /* initialize type1 */
1389         type[s] = pair[S1[s][i]][S2[s][j]];
1390       psc = covscore(type, n_seq);
1391       for (s = 0; s < n_seq; s++)
1392         if (type[s] == 0)
1393           type[s] = 7;
1394 
1395       lc[idx][j] = ((psc >= MINPSCORE) ? n_seq * (P->DuplexInit) : INF);
1396       int c_stack, c_10, c_01, c_11, c_22, c_21, c_12, c_23, c_32, c_in, c_in2x, c_in2y, c_bx, c_by,
1397           c_inx, c_iny;           /* matrix c */
1398       int in, in_x, in_y, in_xy;  /*  in begin, in_x assymetric, in_y assymetric, in_xy symetric; */
1399       int inx, inx_x;
1400       int iny, iny_y;
1401       int bx, bx_x;
1402       int by, by_y;
1403       in      = lc[idx_1][j + 1];
1404       in_x    = lin[idx_1][j];
1405       in_y    = lin[idx][j + 1];
1406       in_xy   = lin[idx_1][j + 1];
1407       inx     = lc[idx_1][j + 1];
1408       inx_x   = linx[idx_1][j];
1409       iny     = lc[idx_1][j + 1];
1410       iny_y   = liny[idx][j + 1];
1411       bx      = lc[idx_1][j];
1412       bx_x    = lbx[idx_1][j];
1413       by      = lc[idx][j + 1];
1414       by_y    = lby[idx][j + 1];
1415       c_stack = lc[idx_1][j + 1];
1416       c_01    = lc[idx_1][j + 2];
1417       c_10    = lc[idx_2][j + 1];
1418       c_12    = lc[idx_2][j + 3];
1419       c_21    = lc[idx_3][j + 2];
1420       c_11    = lc[idx_2][j + 2];
1421       c_22    = lc[idx_3][j + 3];
1422       c_32    = lc[idx_4][j + 3];
1423       c_23    = lc[idx_3][j + 4];
1424       c_in    = lin[idx_3][j + 3];
1425       c_in2x  = lin[idx_4][j + 2];
1426       c_in2y  = lin[idx_2][j + 4];
1427       c_inx   = linx[idx_3][j + 1];
1428       c_iny   = liny[idx_1][j + 3];
1429       c_bx    = lbx[idx_2][j + 1];
1430       c_by    = lby[idx_1][j + 2];
1431       for (s = 0; s < n_seq; s++) {
1432         type2 = pair[S2[s][j + 1]][S1[s][i - 1]];
1433         in    += P->mismatchI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s + di1 + dj1;
1434         in_x  += iext_ass + di1;
1435         in_y  += iext_ass + dj1;
1436         in_xy += iext_s + di1 + dj1;
1437         inx   += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s + di1 + dj1;
1438         inx_x += iext_ass + di1;
1439         iny   += P->mismatch1nI[type2][S2[s][j]][S1[s][i]] + iopen + iext_s + di1 + dj1;
1440         iny_y += iext_ass + dj1;
1441         type2 = pair[S2[s][j]][S1[s][i - 1]];
1442         bx    += bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + di1;
1443         bx_x  += bext + di1;
1444         type2 = pair[S2[s][j + 1]][S1[s][i]];
1445         by    += bopen + bext + (type2 > 2 ? P->TerminalAU : 0) + dj1;
1446         by_y  += bext + dj1;
1447       }
1448       lin[idx][j]   = MIN2(in, MIN2(in_x, MIN2(in_y, in_xy)));
1449       linx[idx][j]  = MIN2(inx_x, inx);
1450       liny[idx][j]  = MIN2(iny_y, iny);
1451       lby[idx][j]   = MIN2(by, by_y);
1452       lbx[idx][j]   = MIN2(bx, bx_x);
1453       if (psc < MINPSCORE)
1454         continue;
1455 
1456       for (s = 0; s < n_seq; s++)
1457         lc[idx][j] += vrna_E_ext_stem(type[s], S1[s][i - 1], S2[s][j + 1], P);
1458       for (s = 0; s < n_seq; s++) {
1459         type2 = pair[S1[s][i - 1]][S2[s][j + 1]];
1460         if (type2 == 0)
1461           type2 = 7;
1462 
1463         c_stack +=
1464           E_IntLoop(0, 0, type2, rtype[type[s]], S1[s][i], S2[s][j], S1[s][i - 1], S2[s][j + 1],
1465                     P) + di1 + dj1;
1466         type2 = pair[S1[s][i - 1]][S2[s][j + 2]];
1467         if (type2 == 0)
1468           type2 = 7;
1469 
1470         c_01 +=
1471           E_IntLoop(0, 1, type2, rtype[type[s]], S1[s][i], S2[s][j + 1], S1[s][i - 1], S2[s][j + 1],
1472                     P) + di1 + dj2;
1473         type2 = pair[S1[s][i - 2]][S2[s][j + 1]];
1474         if (type2 == 0)
1475           type2 = 7;
1476 
1477         c_10 +=
1478           E_IntLoop(1, 0, type2, rtype[type[s]], S1[s][i - 1], S2[s][j], S1[s][i - 1], S2[s][j + 1],
1479                     P) + di2 + dj1;
1480         type2 = pair[S1[s][i - 2]][S2[s][j + 2]];
1481         if (type2 == 0)
1482           type2 = 7;
1483 
1484         c_11 += E_IntLoop(1,
1485                           1,
1486                           type2,
1487                           rtype[type[s]],
1488                           S1[s][i - 1],
1489                           S2[s][j + 1],
1490                           S1[s][i - 1],
1491                           S2[s][j + 1],
1492                           P) + di2 + dj2;
1493         type2 = pair[S1[s][i - 3]][S2[s][j + 3]];
1494         if (type2 == 0)
1495           type2 = 7;
1496 
1497         c_22 += E_IntLoop(2,
1498                           2,
1499                           type2,
1500                           rtype[type[s]],
1501                           S1[s][i - 2],
1502                           S2[s][j + 2],
1503                           S1[s][i - 1],
1504                           S2[s][j + 1],
1505                           P) + di3 + dj3;
1506         type2 = pair[S1[s][i - 3]][S2[s][j + 2]];
1507         if (type2 == 0)
1508           type2 = 7;
1509 
1510         c_21 += E_IntLoop(2,
1511                           1,
1512                           type2,
1513                           rtype[type[s]],
1514                           S1[s][i - 2],
1515                           S2[s][j + 1],
1516                           S1[s][i - 1],
1517                           S2[s][j + 1],
1518                           P) + di3 + dj2;
1519         type2 = pair[S1[s][i - 2]][S2[s][j + 3]];
1520         if (type2 == 0)
1521           type2 = 7;
1522 
1523         c_12 += E_IntLoop(1,
1524                           2,
1525                           type2,
1526                           rtype[type[s]],
1527                           S1[s][i - 1],
1528                           S2[s][j + 2],
1529                           S1[s][i - 1],
1530                           S2[s][j + 1],
1531                           P) + di2 + dj3;
1532         type2 = pair[S1[s][i - 4]][S2[s][j + 3]];
1533         if (type2 == 0)
1534           type2 = 7;
1535 
1536         c_32 += E_IntLoop(3,
1537                           2,
1538                           type2,
1539                           rtype[type[s]],
1540                           S1[s][i - 3],
1541                           S2[s][j + 2],
1542                           S1[s][i - 1],
1543                           S2[s][j + 1],
1544                           P) + di4 + dj3;
1545         type2 = pair[S1[s][i - 3]][S2[s][j + 4]];
1546         if (type2 == 0)
1547           type2 = 7;
1548 
1549         c_23 += E_IntLoop(2,
1550                           3,
1551                           type2,
1552                           rtype[type[s]],
1553                           S1[s][i - 2],
1554                           S2[s][j + 3],
1555                           S1[s][i - 1],
1556                           S2[s][j + 1],
1557                           P) + dj4 + di3;
1558         c_in += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + di3 + dj3 + 2 *
1559                 iext_s;
1560         c_in2x += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
1561                   iext_ass + di4 + dj2;
1562         c_in2y += P->mismatchI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_s + 2 *
1563                   iext_ass + di2 + dj4;
1564         c_inx += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
1565                  iext_ass + di3 + dj1;
1566         c_iny += P->mismatch1nI[rtype[type[s]]][S1[s][i - 1]][S2[s][j + 1]] + iext_ass +
1567                  iext_ass + di1 + dj3;
1568         int bAU;
1569         bAU   = (type[s] > 2 ? P->TerminalAU : 0);
1570         c_bx  += di2 + dj1 + bext + bAU;
1571         c_by  += di1 + dj2 + bext + bAU;
1572       }
1573       lc[idx][j] = MIN2(lc[idx][j],
1574                         MIN2(c_stack,
1575                              MIN2(c_10,
1576                                   MIN2(c_01,
1577                                        MIN2(c_11,
1578                                             MIN2(c_21,
1579                                                  MIN2(c_12,
1580                                                       MIN2(c_22,
1581                                                            MIN2(c_23,
1582                                                                 MIN2(c_32,
1583                                                                      MIN2(c_bx,
1584                                                                           MIN2(c_by,
1585                                                                                MIN2(c_in,
1586                                                                                     MIN2(c_in2x,
1587                                                                                          MIN2(c_in2y,
1588                                                                                               MIN2(
1589                                                                                                 c_inx,
1590                                                                                                 c_iny)
1591                                                                                               )
1592                                                                                          )
1593                                                                                     )
1594                                                                                )
1595                                                                           )
1596                                                                      )
1597                                                                 )
1598                                                            )
1599                                                       )
1600                                                  )
1601                                             )
1602                                        )
1603                                   )
1604                              )
1605                         );
1606       lc[idx][j]  -= psc;
1607       temp        = lc[idx][j];
1608 
1609       for (s = 0; s < n_seq; s++)
1610         temp += vrna_E_ext_stem(rtype[type[s]], S2[s][j - 1], S1[s][i + 1], P);
1611       if (min_colonne > temp) {
1612         min_colonne   = temp;
1613         min_j_colonne = j;
1614       }
1615     }
1616     if (max >= min_colonne) {
1617       max       = min_colonne;
1618       max_pos   = i;
1619       max_pos_j = min_j_colonne;
1620     }
1621 
1622     position[i + delta]   = min_colonne;
1623     min_colonne           = INF;
1624     position_j[i + delta] = min_j_colonne;
1625     i++;
1626   }
1627   /* printf("MAX: %d",max); */
1628   for (s = 0; s < n_seq; s++) {
1629     free(S1[s]);
1630     free(S2[s]);
1631   }
1632   free(S1);
1633   free(S2);
1634   if (max < threshold) {
1635     alifind_max_XS(position,
1636                    position_j,
1637                    delta,
1638                    threshold,
1639                    alignment_length,
1640                    s1,
1641                    s2,
1642                    access_s1,
1643                    access_s2,
1644                    fast);
1645   }
1646 
1647   aliplot_max_XS(max, max_pos, max_pos_j, alignment_length, s1, s2, access_s1, access_s2, fast);
1648   for (i = 0; i <= 4; i++) {
1649     free(lc[i]);
1650     free(lin[i]);
1651     free(lbx[i]);
1652     free(lby[i]);
1653     free(linx[i]);
1654     free(liny[i]);
1655   }
1656   /* free(lc[0]);free(lin[0]);free(lbx[0]);free(lby[0]);free(linx[0]);free(liny[0]); */
1657   free(lc);
1658   free(lin);
1659   free(lbx);
1660   free(lby);
1661   free(linx);
1662   free(liny);
1663   free(position);
1664   free(position_j);
1665   free(type);
1666   return NULL;
1667 }
1668 
1669 
1670 PRIVATE void
alifind_max_XS(const int * position,const int * position_j,const int delta,const int threshold,const int alignment_length,const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int fast)1671 alifind_max_XS(const int  *position,
1672                const int  *position_j,
1673                const int  delta,
1674                const int  threshold,
1675                const int  alignment_length,
1676                const char *s1[],
1677                const char *s2[],
1678                const int  **access_s1,
1679                const int  **access_s2,
1680                const int  fast)
1681 {
1682   int n_seq = 0;
1683 
1684   for (n_seq = 0; s1[n_seq] != NULL; n_seq++);
1685   int pos = n1 - 9;
1686   if (fast == 1) {
1687     while (10 < pos--) {
1688       int temp_min = 0;
1689       if (position[pos + delta] < (threshold)) {
1690         int search_range;
1691         search_range = delta + 1;
1692         while (--search_range)
1693           if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
1694             temp_min = search_range;
1695 
1696         pos -= temp_min;
1697         /*
1698          * int max_pos_j;
1699          * max_pos_j=position_j[pos+delta];
1700          * int max;
1701          * max=position[pos+delta];
1702          * printf("target upper bound %d: query lower bound %d  (%5.2f) \n", pos-10, max_pos_j-10, ((double)max)/100);
1703          */
1704         pos = MAX2(10, pos + temp_min - delta);
1705       }
1706     }
1707   } else {
1708     pos = n1 - 9;
1709     while (pos-- > 10) {
1710       /* printf("delta %d position:%d value:%d\n", delta, pos, position[pos]); */
1711       int temp_min = 0;
1712       if (position[pos + delta] < (threshold)) {
1713         int search_range;
1714         search_range = delta + 1;
1715         while (--search_range)
1716           if (position[pos + delta - search_range] <= position[pos + delta - temp_min])
1717             temp_min = search_range;
1718 
1719         pos -= temp_min;
1720         int   max_pos_j;
1721         max_pos_j = position_j[pos + delta]; /* position on j */
1722         int   begin_t = MAX2(11, pos - alignment_length);
1723         int   end_t   = MIN2(n1 - 10, pos + 1);
1724         int   begin_q = MAX2(11, max_pos_j - 1);
1725         int   end_q   = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
1726         int   i_flag;
1727         int   j_flag;
1728         i_flag  = (end_t == pos + 1 ? 1 : 0);
1729         j_flag  = (begin_q == max_pos_j - 1 ? 1 : 0);
1730         char  **s3, **s4;
1731         s3  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1732         s4  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1733         int   i;
1734         for (i = 0; i < n_seq; i++) {
1735           s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
1736           s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1737           strncpy(s3[i], (s1[i] + begin_t), end_t - begin_t + 1);
1738           strncpy(s4[i], (s2[i] + begin_q), end_q - begin_q + 1);
1739           s3[i][end_t - begin_t + 1]  = '\0';
1740           s4[i][end_q - begin_q + 1]  = '\0';
1741         }
1742         duplexT test;
1743         test = aliduplexfold_XS((const char **)s3,
1744                                 (const char **)s4,
1745                                 access_s1,
1746                                 access_s2,
1747                                 pos,
1748                                 max_pos_j,
1749                                 threshold,
1750                                 i_flag,
1751                                 j_flag);
1752         /*         printf("position %d approximation %d test %d threshold %d\n", pos, position[pos+delta], (int)test.energy,(int)(threshold/n_seq)); */
1753         if (test.energy * 100 < (int)(threshold / n_seq)) {
1754           printf("%s %3d,%-3d: %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n",
1755                  test.structure,
1756                  test.tb,
1757                  test.te,
1758                  test.qb,
1759                  test.qe,
1760                  test.ddG / n_seq,
1761                  test.energy / n_seq,
1762                  test.dG1 / n_seq,
1763                  test.dG2 / n_seq);
1764           free(test.structure);
1765           pos = MAX2(10, pos + temp_min - delta);
1766         }
1767 
1768         for (i = 0; i < n_seq; i++) {
1769           free(s3[i]);
1770           free(s4[i]);
1771         }
1772         free(s3);
1773         free(s4);
1774         /* free(test.structure); */
1775       }
1776     }
1777   }
1778 }
1779 
1780 
1781 PRIVATE void
aliplot_max_XS(const int max,const int max_pos,const int max_pos_j,const int alignment_length,const char * s1[],const char * s2[],const int ** access_s1,const int ** access_s2,const int fast)1782 aliplot_max_XS(const int  max,
1783                const int  max_pos,
1784                const int  max_pos_j,
1785                const int  alignment_length,
1786                const char *s1[],
1787                const char *s2[],
1788                const int  **access_s1,
1789                const int  **access_s2,
1790                const int  fast)
1791 {
1792   int n_seq;
1793 
1794   for (n_seq = 0; !(s1[n_seq] == NULL); n_seq++);
1795   n1  = strlen(s1[0]);  /* get length of alignment */
1796   n2  = strlen(s2[0]);  /* get length of alignme */
1797 
1798   if (fast) {
1799     printf("target upper bound %d: query lower bound %d (%5.2f)\n",
1800            max_pos - 10, max_pos_j - 10, (double)((double)max) / (100 * n_seq));
1801   } else {
1802     int   begin_t = MAX2(11, max_pos - alignment_length); /* only get the position that binds.. */
1803     int   end_t   = MIN2(n1 - 10, max_pos + 1);           /* ..no dangles */
1804     int   begin_q = MAX2(11, max_pos_j - 1);
1805     int   end_q   = MIN2(n2 - 10, max_pos_j + alignment_length - 1);
1806     int   i_flag;
1807     int   j_flag;
1808     i_flag  = (end_t == max_pos + 1 ? 1 : 0);
1809     j_flag  = (begin_q == max_pos_j - 1 ? 1 : 0);
1810     char  **s3, **s4;
1811     s3  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1812     s4  = (char **)vrna_alloc(sizeof(char *) * (n_seq + 1));
1813     int   i;
1814     for (i = 0; i < n_seq; i++) {
1815       s3[i] = (char *)vrna_alloc(sizeof(char) * (end_t - begin_t + 2));
1816       s4[i] = (char *)vrna_alloc(sizeof(char) * (end_q - begin_q + 2));
1817       strncpy(s3[i], (s1[i] + begin_t), end_t - begin_t + 1);
1818       strncpy(s4[i], (s2[i] + begin_q), end_q - begin_q + 1);
1819       s3[i][end_t - begin_t + 1]  = '\0';
1820       s4[i][end_q - begin_q + 1]  = '\0';
1821     }
1822     duplexT test;
1823     test = aliduplexfold_XS((const char **)s3,
1824                             (const char **)s4,
1825                             access_s1,
1826                             access_s2,
1827                             max_pos,
1828                             max_pos_j,
1829                             INF,
1830                             i_flag,
1831                             j_flag);
1832     printf("%s %3d,%-3d: %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n",
1833            test.structure,
1834            test.tb,
1835            test.te,
1836            test.qb,
1837            test.qe,
1838            test.ddG / n_seq,
1839            test.energy / n_seq,
1840            test.dG1 / n_seq,
1841            test.dG2 / n_seq);
1842     free(test.structure);
1843     for (i = 0; i < n_seq; i++) {
1844       free(s3[i]);
1845       free(s4[i]);
1846     }
1847     free(s3);
1848     free(s4);
1849   }
1850 }
1851 
1852 
1853 PRIVATE int
covscore(const int * types,int n_seq)1854 covscore(const int  *types,
1855          int        n_seq)
1856 {
1857   /*
1858    * calculate co-variance bonus for a pair depending on
1859    * compensatory/consistent mutations and incompatible seqs
1860    * should be 0 for conserved pairs, >0 for good pairs
1861    */
1862 #define NONE -10000                         /* score for forbidden pairs */
1863   int k, l, s, score, pscore;
1864   int dm[7][7] = { { 0, 0, 0, 0, 0, 0, 0 }, /* hamming distance between pairs */
1865                    { 0, 0, 2, 2, 1, 2, 2 } /* CG */,
1866                    { 0, 2, 0, 1, 2, 2, 2 } /* GC */,
1867                    { 0, 2, 1, 0, 2, 1, 2 } /* GU */,
1868                    { 0, 1, 2, 2, 0, 2, 1 } /* UG */,
1869                    { 0, 2, 2, 1, 2, 0, 2 } /* AU */,
1870                    { 0, 2, 2, 2, 1, 2, 0 } /* UA */ };
1871 
1872   int pfreq[8] = {
1873     0, 0, 0, 0, 0, 0, 0, 0
1874   };
1875   for (s = 0; s < n_seq; s++)
1876     pfreq[types[s]]++;
1877 
1878   if (pfreq[0] * 2 > n_seq)
1879     return NONE;
1880 
1881   for (k = 1, score = 0; k <= 6; k++) /* ignore pairtype 7 (gap-gap) */
1882     for (l = k + 1; l <= 6; l++)
1883       /*
1884        * scores for replacements between pairtypes
1885        * consistent or compensatory mutations score 1 or 2
1886        */
1887       score += pfreq[k] * pfreq[l] * dm[k][l];
1888 
1889   /* counter examples score -1, gap-gap scores -0.25   */
1890   pscore = cv_fact *
1891            ((UNIT * score) / n_seq - nc_fact * UNIT * (pfreq[0] + pfreq[7] * 0.25));
1892   return pscore;
1893 }
1894 
1895 
1896 PRIVATE void
update_dfold_params(void)1897 update_dfold_params(void)
1898 {
1899   vrna_md_t md;
1900 
1901   if (P)
1902     free(P);
1903 
1904   set_model_details(&md);
1905   P = vrna_params(&md);
1906   make_pair_matrix();
1907 }
1908 
1909 
1910 PRIVATE short *
encode_seq(const char * sequence)1911 encode_seq(const char *sequence)
1912 {
1913   unsigned int  i, l;
1914   short         *S;
1915 
1916   l     = strlen(sequence);
1917   S     = (short *)vrna_alloc(sizeof(short) * (l + 2));
1918   S[0]  = (short)l;
1919 
1920   /* make numerical encoding of sequence */
1921   for (i = 1; i <= l; i++)
1922     S[i] = (short)encode_char(toupper(sequence[i - 1]));
1923 
1924   /* for circular folding add first base at position n+1 */
1925   S[l + 1] = S[1];
1926 
1927   return S;
1928 }
1929