1 /*
2  *           Compute duplex structure of two RNA strands
3  *
4  *                         c Ivo L Hofacker
5  *                        Vienna RNA package
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11 
12 #include <ctype.h>
13 #include <dirent.h>
14 #include <math.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <time.h>
19 #include <sys/types.h>
20 #include <unistd.h>
21 #include "ViennaRNA/params/default.h"
22 #include "ViennaRNA/fold_vars.h"
23 #include "ViennaRNA/params/basic.h"
24 #include "ViennaRNA/utils/basic.h"
25 #include "ViennaRNA/utils/strings.h"
26 #include "ViennaRNA/ali_plex.h"
27 #include "ViennaRNA/alifold.h"
28 #include "ViennaRNA/utils/alignments.h"
29 #include "ViennaRNA/fold.h"
30 #include "ViennaRNA/pair_mat.h"
31 #include "ViennaRNA/plex.h"
32 #include "ViennaRNA/plotting/probabilities.h"
33 #include "ViennaRNA/plotting/alignments.h"
34 #include "ViennaRNA/params/io.h"
35 #include "ViennaRNA/io/utils.h"
36 #include "RNAplex_cmdl.h"
37 
38 
39 clock_t
BeginTimer()40 BeginTimer()
41 {
42   /* timer declaration */
43   clock_t Begin;    /* initialize Begin */
44 
45   Begin = clock();  /* start the timer */
46   return Begin;
47 }
48 
49 
50 clock_t
EndTimer(clock_t begin)51 EndTimer(clock_t begin)
52 {
53   clock_t End;
54 
55   End = clock();    /* stop the timer */
56   return End;
57 }
58 
59 
60 /* --------------------end include timer */
61 extern int subopt_sorted;
62 /* static int print_struc(duplexT const *dup); */
63 static int **average_accessibility_target(char      **names,
64                                           char      **ALN,
65                                           int       number,
66                                           char      *access,
67                                           double    verhaeltnis,
68                                           const int alignment_length,
69                                           int       binaries,
70                                           int       fast);
71 
72 
73 /* static int ** average_accessibility_query(char **names, char **ALN, int number, char *access, double verhaeltnis); */
74 static int get_max_u(const char *s,
75                      char       delim);
76 
77 
78 static int **read_plfold_i(char       *fname,
79                            const int  beg,
80                            const int  end,
81                            double     verhaeltnis,
82                            const int  length,
83                            int        fast);
84 
85 
86 /**
87  * Compute the conditional per nucleotide directly
88  */
89 /* static int ** read_plfold_j(char *fname, const int beg, const int end,double verhaeltnis); */
90 static int **read_plfold_i_bin(char       *fname,
91                                const int  beg,
92                                const int  end,
93                                double     verhaeltnis,
94                                const int  length,
95                                int        fast);
96 
97 
98 /* Compute and pass opening energies in case of f=2*/
99 static int get_sequence_length_from_alignment(char *sequence);
100 
101 
102 /* take as argument a list of hit from an alignment interaction */
103 /* Accessibility can currently not be provided */
104 static void aliprint_struct(FILE      *Result,  /* result file */
105                             FILE      *Target,  /* target alignment */
106                             FILE      *Query,
107                             const int WindowsLength);
108 
109 
110 static void linear_fit(int  *a,
111                        int  *b,
112                        int  *c,
113                        int  *d);                        /* get linear fits for Iopn, Iextension, Bopn, Bextension, I^1open and I^1extension */
114 
115 
116 /**
117  * Compute Tm based on silvana's parameters (1999)
118  */
119 double probcompute_silvana_98(char    *s1,
120                               double  k_concentration,
121                               double  tris_concentration,
122                               double  mg_concentration,
123                               double  na_concentration,
124                               double  probe_concentration);
125 
126 
127 double probcompute_silvana_04(char    *s1,
128                               double  k_concentration,
129                               double  tris_concentration,
130                               double  mg_concentration,
131                               double  na_concentration,
132                               double  probe_concentration);
133 
134 
135 double     probcompute_xia_98(char    *s1,
136                               double  na_concentration,
137                               double  probe_concentration);
138 
139 
140 double     probcompute_sug_95(char    *s1,
141                               double  na_concentration,
142                               double  probe_concentration);
143 
144 
145 double probcompute_newparameters(char   *s1,
146                                  double k_concentration,
147                                  double tris_concentration,
148                                  double mg_concentration,
149                                  double na_concentration,
150                                  double probe_concentration);
151 
152 
153 /*@unused@*/
154 static int convert_plfold_i(char *fname);/* convert test accessibility into bin accessibility. */
155 
156 
157 static char scale[] = "....,....1....,....2....,....3....,....4"
158                       "....,....5....,....6....,....7....,....8";
159 
160 /*--------------------------------------------------------------------------*/
161 
162 int
main(int argc,char * argv[])163 main(int  argc,
164      char *argv[])
165 {
166   struct        RNAplex_args_info args_info;
167 
168 #define MAX_NUM_NAMES    500
169   char                            *temp1[MAX_NUM_NAMES], *temp2[MAX_NUM_NAMES], *AS1[MAX_NUM_NAMES], *AS2[MAX_NUM_NAMES], *names1[MAX_NUM_NAMES], *names2[MAX_NUM_NAMES];
170   char                            *s1     = NULL, *s2 = NULL, *line, *cstruc = NULL, *structure = NULL;
171   char                            *line_q = NULL, *line_t = NULL;
172   char                            *tname  = NULL;
173   char                            *qname  = NULL;
174   char                            *access = NULL;
175   char                            fname[FILENAME_MAX_LENGTH];
176   char                            *ParamFile  = NULL;
177   char                            *ns_bases   = NULL, *c;
178 
179   FILE                            *Result = NULL; /* file containing the results */
180   FILE                            *mRNA   = NULL, *sRNA = NULL;
181 
182   char                            *Resultfile       = NULL;
183   int                             fold_constrained  = 0;
184   int                             i, l, sym;
185 
186   /* double kT, sfact=1.07; */
187   int                             istty, delta = -INF;
188   int                             noconv            = 0;
189   int                             extension_cost    = 0;
190   int                             deltaz            = 0;
191   int                             alignment_length  = 40;
192   int                             fast              = 0;
193   int                             redraw            = 0;
194   int                             binaries          = 0;
195   int                             convert           = 0;
196   /**
197    * Defines how many nucleotides has to be added at the begining and end of the target and query sequence in order to generate the structure figure
198    */
199   int                             WindowsLength   = 0;
200   int                             alignment_mode  = 0;
201   /**
202    * Define scaling factor for the accessibility. Default 1
203    */
204   double                          verhaeltnis = 1;
205   /**
206    * Probe Tm computation
207    */
208   double                          probe_concentration = 0.05;
209   double                          na_concentration    = 1;
210   double                          mg_concentration    = 0;
211   double                          k_concentration     = 0;
212   double                          tris_concentration  = 0;
213   int                             probe_mode          = 0;
214   /*
215    #############################################
216    # check the command line parameters
217    #############################################
218    */
219   if (RNAplex_cmdline_parser(argc, argv, &args_info) != 0)
220     exit(1);
221 
222   /*temperature*/
223   if (args_info.temp_given)
224     temperature = args_info.temp_arg;
225 
226   /*query file*/
227   if (args_info.query_given)
228     qname = strdup(args_info.query_arg);
229 
230   /*target file*/
231   if (args_info.target_given)
232     tname = strdup(args_info.target_arg);
233 
234   /*interaction_length*/
235   alignment_length = args_info.interaction_length_arg;
236   /*extension_cost*/
237   extension_cost = args_info.extension_cost_arg;
238   /*duplex_distance*/
239   deltaz = args_info.duplex_distance_arg;
240   /*energy_threshold*/
241   delta = (int)(100 * args_info.energy_threshold_arg);
242   /*fast_folding*/
243   fast = args_info.fast_folding_arg;
244   /*accessibility*/
245   if (args_info.accessibility_dir_given)
246     access = strdup(args_info.accessibility_dir_arg);
247 
248   /*produce ps arg*/
249   if (args_info.produce_ps_given) {
250     Resultfile  = strdup(args_info.produce_ps_arg);
251     redraw      = 1;
252   }
253 
254   /*WindowLength*/
255   WindowsLength = args_info.WindowLength_arg;
256   /*scale_accessibility_arg*/
257   verhaeltnis = args_info.scale_accessibility_arg;
258   /*constraint_flag*/
259   if (args_info.constraint_given)
260     fold_constrained = 1;
261 
262   /*paramFile*/
263   if (args_info.paramFile_given)
264     ParamFile = strdup(args_info.paramFile_arg);
265 
266   /*binary*/
267   if (args_info.binary_given)
268     binaries = 1;
269 
270   /*convert_to_bin*/
271   if (args_info.convert_to_bin_given)
272     convert = 1;
273 
274   /*alignment_mode*/
275   if (args_info.alignment_mode_given)
276     alignment_mode = 1;
277 
278   /*Probe mode*/
279   if (args_info.probe_mode_given)
280     probe_mode = 1;
281 
282   /*sodium concentration*/
283   na_concentration = args_info.na_concentration_arg;
284   /*magnesium concentration*/
285   mg_concentration = args_info.mg_concentration_arg;
286   /*potassium concentration*/
287   k_concentration = args_info.k_concentration_arg;
288   /*tris concentration*/
289   tris_concentration = args_info.tris_concentration_arg;
290   /*probe concentration*/
291   probe_concentration = args_info.probe_concentration_arg;
292 
293   /*Probe mode Salt concentration*/
294   if (ParamFile != NULL) {
295     if (!strcmp(ParamFile, "DNA"))
296         vrna_params_load_DNA_Mathews2004();
297     else
298       vrna_params_load(ParamFile, VRNA_PARAMETER_FORMAT_DEFAULT);
299   }
300 
301   if (ns_bases != NULL) {
302     nonstandards  = vrna_alloc(33);
303     c             = ns_bases;
304     i             = sym = 0;
305     if (*c == '-') {
306       sym = 1;
307       c++;
308     }
309 
310     while (*c != '\0') {
311       if (*c != ',') {
312         nonstandards[i++] = *c++;
313         nonstandards[i++] = *c;
314         if ((sym) && (*c != *(c - 1))) {
315           nonstandards[i++] = *c;
316           nonstandards[i++] = *(c - 1);
317         }
318       }
319 
320       c++;
321     }
322   }
323 
324   int il_a, il_b, b_a, b_b;
325   linear_fit(&il_a, &il_b, &b_a, &b_b);
326   /**
327    * check if we have two input files
328    */
329 
330 
331   /**
332    * Here we test if the user wants to convert a bunch of text opening energy files
333    * into binary
334    */
335   if (probe_mode) {
336     if (qname || tname) {
337       vrna_message_error("No query/target file allowed in Tm probe mode\nPlease pipe your input into RNAplex\n");
338       /* get sequence */
339     } else {
340       /* fix temperature to 37C */
341       temperature = 37;
342       printf("Probe mode\n");
343       char          *id_s1  = NULL;
344       char          *s1     = NULL;
345       vrna_param_t  *P      = NULL;
346       vrna_md_t     md;
347       set_model_details(&md);
348 
349       if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
350         update_fold_params();
351         P = vrna_params(&md);
352         make_pair_matrix();
353       }
354 
355       /*Initialize parameter */
356       printf("Concentration K:%3.3f TNP:%3.3f Mg:%3.3f Na:%3.3f probe:%3.3f\n\n", k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
357       printf("%100s %7s %7s %7s %7s %7s\n", "sequence", "DDSL98", "DDSL04", "DRSU95", "RRXI98", "CURRENT");
358       do {
359         istty = isatty(fileno(stdout)) && isatty(fileno(stdin));
360         if ((line = vrna_read_line(stdin)) == NULL)
361           break;
362 
363         /* skip empty lines, comment lines, name lines */
364         while ((*line == '*') || (*line == '\0') || (*line == '>')) {
365           printf("%s\n", line);
366           if (*line == '>') {
367             id_s1 = (char *)vrna_alloc(strlen(line) + 2);
368             (void)sscanf(line, "%s", id_s1);
369             memmove(id_s1, id_s1 + 1, strlen(id_s1));
370           }
371 
372           free(line);
373           if ((line = vrna_read_line(stdin)) == NULL) {
374             free(id_s1);
375             break;
376           }
377         }
378         if ((line == NULL) || (strcmp(line, "@") == 0))
379           break;
380 
381         s1 = (char *)vrna_alloc(strlen(line) + 1);
382         strcpy(s1, line);
383         /*compute duplex/entropy energy for the reverse complement*/;
384         double Tm;
385         Tm = probcompute_silvana_98(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
386         printf("%100s  %*.2f ", s1, 6, Tm);
387         Tm = probcompute_silvana_04(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
388         printf("%*.2f  ", 6, Tm);
389         Tm = probcompute_sug_95(line, na_concentration, probe_concentration);
390         printf("%*.2f  ", 6, Tm);
391         Tm = probcompute_xia_98(line, na_concentration, probe_concentration);
392         printf("%*.2f  ", 6, Tm);
393         Tm = probcompute_newparameters(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
394         printf("%*.2f\n", 6, Tm);
395       } while (1);
396     }
397 
398     RNAplex_cmdline_parser_free(&args_info);
399     return 0;
400   }
401 
402   if (convert && access) {
403     char          pattern[8];
404     strcpy(pattern, "_openen");
405     DIR           *dfd;
406     struct dirent *dir;
407     dfd = opendir(access);
408     /**
409      * Check if the directory where the opening energy files are located exists
410      */
411     if (dfd) {
412       while ((dir = readdir(dfd)) != NULL) {
413         int   strPos      = 0;
414         int   PatternPos  = 0;
415         char  name[128];
416         strcpy(name, access);
417         strcat(name, "/");
418         strcat(name, dir->d_name);
419         while (name[strPos] != '\0') {
420           if (name[strPos] == pattern[0]) {
421             /**
422              * Check if the files we are looking ends in openen and not openen_bin,
423              * in order to avoid that RNAplex converts bin-file to bin-file
424              */
425             while (name[strPos + PatternPos] == pattern[PatternPos] && pattern[PatternPos] != '\0' && name[strPos + PatternPos] != '\0')
426               PatternPos++;
427             if (name[strPos + PatternPos] == '\0' && pattern[PatternPos] == '\0')
428               /**
429                * convert_plfold_i is the function that makes the hardwork
430                */
431               convert_plfold_i(name);
432             else
433               PatternPos = 0;
434           }
435 
436           strPos++;
437         }
438       }
439     }
440 
441     closedir(dfd);
442     RNAplex_cmdline_parser_free(&args_info);
443     return 0;
444   }
445 
446   /**
447    * Here we check if the user wants to produce PS formatted structure files
448    * from existing RNAplex dot-parenthesis-formated results. Depending on
449    * the kind of input, Alignments or single sequence we will produce
450    * either a color annotated alignment or RNAfold-like structure,
451    * respectively.
452    */
453   if (Resultfile) {
454     /**
455      * Check single sequence case.
456      */
457     if (!(alignment_mode) && (tname && qname)) {
458       mRNA = fopen(tname, "r");
459       if (mRNA == NULL) {
460         printf("%s: Wrong target file name\n", tname);
461         RNAplex_cmdline_parser_free(&args_info);
462         return 0;
463       }
464 
465       sRNA = fopen(qname, "r");
466       if (sRNA == NULL) {
467         printf("%s: Wrong query file name\n", qname);
468         RNAplex_cmdline_parser_free(&args_info);
469         return 0;
470       }
471 
472       vrna_message_error("Sorry not implemented yet");
473     }/**
474       * We have no single sequence case. Check if we have alignments.
475       */
476     else if ((alignment_mode) && (tname && qname)) {
477       mRNA = fopen(tname, "r");
478       if (mRNA == NULL) {
479         printf("%s: Wrong target file name\n", tname);
480         RNAplex_cmdline_parser_free(&args_info);
481         return 0;
482       }
483 
484       sRNA = fopen(qname, "r");
485       if (sRNA == NULL) {
486         printf("%s: Wrong query file name\n", qname);
487         RNAplex_cmdline_parser_free(&args_info);
488         return 0;
489       }
490 
491       Result = fopen(Resultfile, "r");
492       if (sRNA == NULL) {
493         printf("%s: Wrong query file name\n", qname);
494         RNAplex_cmdline_parser_free(&args_info);
495         return 0;
496       }
497 
498       aliprint_struct(Result, mRNA, sRNA, (const int)WindowsLength);
499       RNAplex_cmdline_parser_free(&args_info);
500       return 0;
501     } else {
502       /**
503        * User was not able to input either two alignments or two single sequence files
504        */
505       printf("Please enter either two alignments or single sequence files\n");
506       RNAplex_cmdline_parser_print_help();
507     }
508   }
509 
510 #if 0
511   update_fold_params();
512   if (ParamFile != NULL)
513     read_parameter_file(ParamFile);
514 
515   if (ns_bases != NULL) {
516     nonstandards  = vrna_alloc(33);
517     c             = ns_bases;
518     i             = sym = 0;
519     if (*c == '-') {
520       sym = 1;
521       c++;
522     }
523 
524     while (*c != '\0') {
525       if (*c != ',') {
526         nonstandards[i++] = *c++;
527         nonstandards[i++] = *c;
528         if ((sym) && (*c != *(c - 1))) {
529           nonstandards[i++] = *c;
530           nonstandards[i++] = *(c - 1);
531         }
532       }
533 
534       c++;
535     }
536   }
537 
538   int il_a, il_b, b_a, b_b;
539   linear_fit(&il_a, &il_b, &b_a, &b_b);
540 #endif
541   /**
542    * check if we have two input files
543    */
544   if ((qname == NULL && tname) || (qname && tname == NULL)) {
545     RNAplex_cmdline_parser_print_help();
546   } else if (qname && tname && !(alignment_mode)) {
547     /*free allocated memory of commandline parser*/
548     RNAplex_cmdline_parser_free(&args_info);
549 
550     if (!fold_constrained) {
551       if (access) {
552         char *id_s1 = NULL;
553         mRNA = fopen(tname, "r");
554         if (mRNA == NULL) {
555           printf("%s: Wrong target file name\n", tname);
556           RNAplex_cmdline_parser_free(&args_info);
557           return 0;
558         }
559 
560         sRNA = fopen(qname, "r");
561         if (sRNA == NULL) {
562           printf("%s: Wrong quert file name\n", qname);
563           RNAplex_cmdline_parser_free(&args_info);
564           return 0;
565         }
566 
567         do {
568           /* main loop: continue until end of file */
569           if ((line_t = vrna_read_line(mRNA)) == NULL)
570             break;
571 
572           /*parse line, get id for further accessibility fetching*/
573           while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
574             if (*line_t == '>') {
575               if (id_s1) {
576                 free(id_s1);
577                 id_s1 = NULL;
578               }
579 
580               id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
581               (void)sscanf(line_t, "%s", id_s1);
582               memmove(id_s1, id_s1 + 1, strlen(id_s1));
583               free(line_t);
584               line_t = NULL;
585             }
586 
587             if (line_t)
588               free(line_t);
589 
590             if ((line_t = vrna_read_line(mRNA)) == NULL)
591               break;
592           }
593           /*append N's to the sequence in order to avoid boundary checking*/
594           if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
595             if (id_s1) {
596               free(id_s1);
597               id_s1 = NULL;
598             }
599 
600             break;
601           }
602 
603           s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
604           strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
605           strcat(s1, line_t);
606           free(line_t);
607           strcat(s1, "NNNNNNNNNN\0");
608           int s1_len;
609           s1_len = strlen(s1);
610           for (l = 0; l < s1_len; l++) {
611             s1[l] = toupper(s1[l]);
612             if (!noconv && s1[l] == 'T')
613               s1[l] = 'U';
614           }
615 
616           /*read accessibility*/
617           int   **access_s1;
618           char  *file_s1;
619           file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s1) + strlen(access) + 20));
620           strcpy(file_s1, access);
621           strcat(file_s1, "/");
622           strcat(file_s1, id_s1);
623           strcat(file_s1, "_openen");
624           if (!binaries) {
625             access_s1 = read_plfold_i(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
626           } else {
627             strcat(file_s1, "_bin");
628             access_s1 = read_plfold_i_bin(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
629           }
630 
631           if (access_s1 == NULL) {
632             printf("Accessibility file %s not found or corrupt, look at next target RNA\n", file_s1);
633             free(file_s1);
634             free(s1);
635             free(id_s1);
636             id_s1 = NULL;
637             continue;
638           }
639 
640           do {
641             char *id_s2 = NULL;
642             if ((line_q = vrna_read_line(sRNA)) == NULL)
643               break;
644 
645             while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
646               if (*line_q == '>') {
647                 if (id_s2) {
648                   free(id_s2);
649                   id_s2 = NULL;
650                 }
651 
652                 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
653                 (void)sscanf(line_q, "%s", id_s2);
654                 memmove(id_s2, id_s2 + 1, strlen(id_s2));
655                 free(line_q);
656                 line_q = NULL;
657               }
658 
659               if (line_q)
660                 free(line_q);
661 
662               if ((line_q = vrna_read_line(sRNA)) == NULL)
663                 break;
664             }
665             if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
666               if (id_s2) {
667                 free(id_s2);
668                 id_s2 = NULL;
669               }
670 
671               break;
672             }
673 
674             if (!id_s2) {
675               free(line_q);
676               continue;
677             }
678 
679             s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
680             strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
681             strcat(s2, line_q);
682             free(line_q);
683             strcat(s2, "NNNNNNNNNN\0");
684             int s2_len;
685             s2_len = strlen(s2);
686             for (l = 0; l < s2_len; l++) {
687               s2[l] = toupper(s2[l]);
688               if (!noconv && s2[l] == 'T')
689                 s2[l] = 'U';
690             }
691             int   **access_s2;
692             char  *file_s2;
693             file_s2 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s2) + strlen(access) + 20));
694             strcpy(file_s2, access);
695             strcat(file_s2, "/");
696             strcat(file_s2, id_s2);
697             strcat(file_s2, "_openen");
698             if (!binaries) {
699               access_s2 = read_plfold_i(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
700             } else {
701               strcat(file_s2, "_bin");
702               access_s2 = read_plfold_i_bin(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
703             }
704 
705             if (access_s2 == NULL) {
706               printf("Accessibility file %s not found, look at next target RNA\n", file_s2);
707               free(file_s2);
708               free(s2);
709               free(id_s2);
710               id_s2 = NULL;
711               continue;
712             }
713 
714             printf(">%s\n>%s\n", id_s1, id_s2);
715             double  begin = BeginTimer();
716             Lduplexfold_XS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
717             float   elapTicks;
718             float   elapMilli;
719             elapTicks = (EndTimer(begin) - begin);
720             elapMilli = elapTicks / 1000;
721             /*             printf("Millisecond %.2f\n",elapMilli); */
722             free(id_s2);
723             free(file_s2);
724             free(s2);
725             id_s2 = NULL;
726             i     = access_s2[0][0];
727             while (--i > -1)
728               free(access_s2[i]);
729             free(access_s2);
730           } while (1);
731           free(id_s1);
732           id_s1 = NULL;
733           free(file_s1);
734           free(s1);
735           rewind(sRNA);
736           i = access_s1[0][0];
737           while (--i > -1)
738             free(access_s1[i]);
739           free(access_s1);
740         } while (1);
741         fclose(mRNA);
742         fclose(sRNA);
743       } else if (access == NULL) {
744         /* t and q are defined, but no accessibility is provided */
745         mRNA = fopen(tname, "r");
746         if (mRNA == NULL) {
747           printf("%s: Wrong target file name\n", tname);
748           RNAplex_cmdline_parser_free(&args_info);
749           return 0;
750         }
751 
752         sRNA = fopen(qname, "r");
753         if (sRNA == NULL) {
754           printf("%s: Wrong query file name\n", qname);
755           RNAplex_cmdline_parser_free(&args_info);
756           return 0;
757         }
758 
759         do {
760           /* main loop: continue until end of file */
761           char *id_s1 = NULL; /* header of the target file  */
762           if ((line_t = vrna_read_line(mRNA)) == NULL)
763             break;
764 
765           /*parse line, get id for further accessibility fetching*/
766           while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
767             if (*line_t == '>') {
768               if (id_s1) {
769                 /* in case we have two header the one after the other */
770                 free(id_s1);     /* free the old header, a put the new one instead */
771                 id_s1 = NULL;
772               }
773 
774               id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
775               (void)sscanf(line_t, "%s", id_s1);
776               memmove(id_s1, id_s1 + 1, strlen(id_s1));
777               free(line_t);
778               line_t = NULL;
779             }
780 
781             if (line_t)
782               free(line_t);
783 
784             if ((line_t = vrna_read_line(mRNA)) == NULL)
785               break;
786           }
787           /*append N's to the sequence in order to avoid boundary checking*/
788           if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
789             if (id_s1) {
790               free(id_s1);
791               id_s1 = NULL;
792             }
793 
794             break;
795           }
796 
797           s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
798           strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
799           strcat(s1, line_t);
800           free(line_t);
801           strcat(s1, "NNNNNNNNNN\0");
802           int s1_len;
803           s1_len = strlen(s1);
804           for (l = 0; l < s1_len; l++) {
805             s1[l] = toupper(s1[l]);
806             if (!noconv && s1[l] == 'T')
807               s1[l] = 'U';
808           }
809           do {
810             /*read sRNA files*/
811             char *id_s2 = NULL;
812             if ((line_q = vrna_read_line(sRNA)) == NULL)
813               break;
814 
815             while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
816               if (*line_q == '>') {
817                 if (id_s2) {
818                   free(id_s2);
819                   id_s2 = NULL;
820                 }
821 
822                 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
823                 (void)sscanf(line_q, "%s", id_s2);
824                 memmove(id_s2, id_s2 + 1, strlen(id_s2));
825                 free(line_q);
826                 line_q = NULL;
827               }
828 
829               if (line_q)
830                 free(line_q);
831 
832               if ((line_q = vrna_read_line(sRNA)) == NULL)
833                 break;
834             }
835             if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
836               if (id_s2)
837                 free(id_s2);
838 
839               break;
840             }
841 
842             if (!id_s2) {
843               free(line_q);
844               continue;
845             }
846 
847             s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
848             strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
849             strcat(s2, line_q);
850             free(line_q);
851             strcat(s2, "NNNNNNNNNN\0");
852             int s2_len;
853             s2_len = strlen(s2);
854             for (l = 0; l < s2_len; l++) {
855               s2[l] = toupper(s2[l]);
856               if (!noconv && s2[l] == 'T')
857                 s2[l] = 'U';
858             }
859             printf(">%s\n>%s\n", id_s1, id_s2);
860             /*             double begin = BeginTimer(); */
861             Lduplexfold(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
862             /* float elapTicks; */
863             /* float elapMilli; */
864             /* elapTicks = (EndTimer(begin) - begin); */
865             /* elapMilli = elapTicks/1000; */
866             /* printf("Millisecond %.2f\n",elapMilli); */
867             /* printf("\n"); */
868             free(id_s2);
869             id_s2 = NULL;
870             free(s2);
871           } while (1);
872           free(id_s1);
873           id_s1 = NULL;
874           free(s1);
875           rewind(sRNA);
876         } while (1);
877         fclose(mRNA);
878         fclose(sRNA);
879       }
880     } else {
881       if (access) {
882         char *id_s1 = NULL;
883         mRNA = fopen(tname, "r");
884         if (mRNA == NULL) {
885           printf("%s: Wrong target file name\n", tname);
886           RNAplex_cmdline_parser_free(&args_info);
887           return 0;
888         }
889 
890         sRNA = fopen(qname, "r");
891         if (sRNA == NULL) {
892           printf("%s: Wrong query file name\n", qname);
893           RNAplex_cmdline_parser_free(&args_info);
894           return 0;
895         }
896 
897         do {
898           /* main loop: continue until end of file */
899           if ((line_t = vrna_read_line(mRNA)) == NULL)
900             break;
901 
902           /*parse line, get id for further accessibility fetching*/
903           while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
904             if (*line_t == '>') {
905               if (id_s1) {
906                 /* in case we have two header the one after the other */
907                 free(id_s1);     /* free the old header, a put the new one instead */
908                 id_s1 = NULL;
909               }
910 
911               id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
912               (void)sscanf(line_t, "%s", id_s1);
913               memmove(id_s1, id_s1 + 1, strlen(id_s1));
914               free(line_t);
915               line_t = NULL;
916             }
917 
918             if (line_t)
919               free(line_t);
920 
921             if ((line_t = vrna_read_line(mRNA)) == NULL)
922               break;
923           }
924           /*append N's to the sequence in order to avoid boundary checking*/
925           if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
926             if (id_s1) {
927               free(id_s1);
928               id_s1 = NULL;
929             }
930 
931             break;
932           }
933 
934           s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
935           strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
936           strcat(s1, line_t);
937           free(line_t);
938           strcat(s1, "NNNNNNNNNN\0");
939           int s1_len;
940           s1_len = strlen(s1);
941           for (l = 0; l < s1_len; l++) {
942             s1[l] = toupper(s1[l]);
943             if (!noconv && s1[l] == 'T')
944               s1[l] = 'U';
945           }
946 
947           /*read accessibility*/
948           int   **access_s1;
949           char  *file_s1;
950           file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s1) + strlen(access) + 20));
951           strcpy(file_s1, access);
952           strcat(file_s1, "/");
953           strcat(file_s1, id_s1);
954           strcat(file_s1, "_openen");
955           if (!binaries) {
956             access_s1 = read_plfold_i(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
957           } else {
958             strcat(file_s1, "_bin");
959             access_s1 = read_plfold_i_bin(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
960           }
961 
962           if (access_s1 == NULL) {
963             printf("Accessibility file %s not found, look at next target RNA\n", file_s1);
964             free(file_s1);
965             free(s1);
966             free(id_s1);
967             id_s1 = NULL;
968             continue;
969           }
970 
971           do {
972             char *id_s2 = NULL;
973             /*read sRNA files*/
974             if ((line_q = vrna_read_line(sRNA)) == NULL)
975               break;
976 
977             while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
978               if (*line_q == '>') {
979                 if (id_s2) {
980                   free(id_s2);
981                   id_s2 = NULL;
982                 }
983 
984                 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
985                 (void)sscanf(line_q, "%s", id_s2);
986                 memmove(id_s2, id_s2 + 1, strlen(id_s2));
987                 free(line_q);
988                 line_q = NULL;
989               }
990 
991               if (line_q)
992                 free(line_q);
993 
994               if ((line_q = vrna_read_line(sRNA)) == NULL)
995                 break;
996             }
997             if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
998               if (id_s2)
999                 free(id_s2);
1000 
1001               break;
1002             }
1003 
1004             /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
1005             s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
1006             strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1007             strcat(s2, line_q);
1008             free(line_q);
1009             strcat(s2, "NNNNNNNNNN\0");
1010             int s2_len;
1011             s2_len = strlen(s2);
1012             for (l = 0; l < s2_len; l++) {
1013               s2[l] = toupper(s2[l]);
1014               if (!noconv && s2[l] == 'T')
1015                 s2[l] = 'U';
1016             }
1017             structure = (char *)vrna_alloc((unsigned)s2_len + 1);
1018             cstruc    = vrna_read_line(sRNA);
1019             if (cstruc != NULL) {
1020               int dn3 = strlen(cstruc) - (s2_len - 20);
1021               strcpy(structure, "..........");
1022               strncat(structure, cstruc, s2_len - 20);
1023               if (dn3 >= 0) {
1024                 strcat(structure, "..........\0");
1025               } else {
1026                 while (dn3++)
1027                   strcat(structure, ".");
1028                 strcat(structure, "\0");
1029               }
1030 
1031               free(cstruc);
1032             } else {
1033               vrna_message_warning("constraints missing");
1034             }
1035 
1036             int   a = strchr(structure, '|') - structure;
1037             int   b = strrchr(structure, '|') - structure;
1038             if (alignment_length < b - a + 1)
1039               vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1040 
1041             int   **access_s2;
1042             char  *file_s2;
1043             file_s2 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s2) + strlen(access) + 20));
1044             strcpy(file_s2, access);
1045             strcat(file_s2, "/");
1046             strcat(file_s2, id_s2);
1047             strcat(file_s2, "_openen");
1048             if (!binaries) {
1049               access_s2 = read_plfold_i(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1050             } else {
1051               strcat(file_s2, "_bin");
1052               access_s2 = read_plfold_i_bin(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1053             }
1054 
1055             if (access_s2 == NULL) {
1056               printf("Accessibility file %s not found, look at next target RNA\n", file_s2);
1057               free(file_s2);
1058               free(s2);
1059               free(id_s2);
1060               continue;
1061             }
1062 
1063             printf(">%s\n>%s\n", id_s1, id_s2);
1064             Lduplexfold_CXS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
1065             free(id_s2);
1066             free(file_s2);
1067             free(s2);
1068             i = access_s2[0][0];
1069             while (--i > -1)
1070               free(access_s2[i]);
1071             free(access_s2);
1072             free(structure);
1073           } while (1);
1074           free(id_s1);
1075           id_s1 = NULL;
1076           free(file_s1);
1077           free(s1);
1078           rewind(sRNA);
1079           i = access_s1[0][0];
1080           while (--i > -1)
1081             free(access_s1[i]);
1082           free(access_s1);
1083         } while (1);
1084         fclose(mRNA);
1085         fclose(sRNA);
1086       } else if (access == NULL) {
1087         /* t and q are defined, but no accessibility is provided */
1088         char *id_s1 = NULL;
1089         mRNA = fopen(tname, "r");
1090         if (mRNA == NULL) {
1091           printf("%s: Wrong target file name\n", tname);
1092           RNAplex_cmdline_parser_free(&args_info);
1093           return 0;
1094         }
1095 
1096         sRNA = fopen(qname, "r");
1097         if (sRNA == NULL) {
1098           printf("%s: Wrong query file name\n", qname);
1099           RNAplex_cmdline_parser_free(&args_info);
1100           return 0;
1101         }
1102 
1103         do {
1104           /* main loop: continue until end of file */
1105           if ((line_t = vrna_read_line(mRNA)) == NULL)
1106             break;
1107 
1108           /*parse line, get id for further accessibility fetching*/
1109           while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
1110             if (*line_t == '>') {
1111               if (id_s1) {
1112                 /* in case we have two header the one after the other */
1113                 free(id_s1);     /* free the old header, a put the new one instead */
1114                 id_s1 = NULL;
1115               }
1116 
1117               id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
1118               (void)sscanf(line_t, "%s", id_s1);
1119               memmove(id_s1, id_s1 + 1, strlen(id_s1));
1120               free(line_t);
1121               line_t = NULL;
1122             }
1123 
1124             if (line_t)
1125               free(line_t);
1126 
1127             if ((line_t = vrna_read_line(mRNA)) == NULL)
1128               break;
1129           }
1130           /*append N's to the sequence in order to avoid boundary checking*/
1131           /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
1132           if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
1133             if (id_s1) {
1134               free(id_s1);
1135               id_s1 = NULL;
1136             }
1137 
1138             break;
1139           }
1140 
1141           s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
1142           strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1143           strcat(s1, line_t);
1144           free(line_t);
1145           strcat(s1, "NNNNNNNNNN\0");
1146           int s1_len;
1147           s1_len = strlen(s1);
1148           for (l = 0; l < s1_len; l++) {
1149             s1[l] = toupper(s1[l]);
1150             if (!noconv && s1[l] == 'T')
1151               s1[l] = 'U';
1152           }
1153           do {
1154             char *id_s2 = NULL;
1155             /*read sRNA files*/
1156             if ((line_q = vrna_read_line(sRNA)) == NULL)
1157               break;
1158 
1159             while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
1160               if (*line_q == '>') {
1161                 if (id_s2) {
1162                   free(id_s2);
1163                   id_s2 = NULL;
1164                 }
1165 
1166                 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
1167                 (void)sscanf(line_q, "%s", id_s2);
1168                 memmove(id_s2, id_s2 + 1, strlen(id_s2));
1169                 free(line_q);
1170                 line_q = NULL;
1171               }
1172 
1173               if (line_q)
1174                 free(line_q);
1175 
1176               if ((line_q = vrna_read_line(sRNA)) == NULL)
1177                 break;
1178             }
1179             /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
1180             if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
1181               if (id_s2)
1182                 free(id_s2);
1183 
1184               break;
1185             }
1186 
1187             s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
1188             strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1189             strcat(s2, line_q);
1190             free(line_q);
1191             strcat(s2, "NNNNNNNNNN\0");
1192             int s2_len;
1193             s2_len = strlen(s2);
1194             for (l = 0; l < s2_len; l++) {
1195               s2[l] = toupper(s2[l]);
1196               if (!noconv && s2[l] == 'T')
1197                 s2[l] = 'U';
1198             }
1199             structure = (char *)vrna_alloc((unsigned)s2_len + 1);
1200             cstruc    = vrna_read_line(sRNA);
1201             if (cstruc != NULL) {
1202               int dn3 = strlen(cstruc) - (s2_len - 20);
1203               strcpy(structure, "..........");
1204               strncat(structure, cstruc, s2_len - 20);
1205               if (dn3 >= 0) {
1206                 strcat(structure, "..........\0");
1207               } else {
1208                 while (dn3++)
1209                   strcat(structure, ".");
1210                 strcat(structure, "\0");
1211               }
1212 
1213               free(cstruc);
1214             } else {
1215               vrna_message_warning("constraints missing");
1216             }
1217 
1218             int     a = strchr(structure, '|') - structure;
1219             int     b = strrchr(structure, '|') - structure;
1220             if (alignment_length < b - a + 1)
1221               vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1222 
1223             printf(">%s\n>%s\n", id_s1, id_s2);
1224             double  begin = BeginTimer();
1225             Lduplexfold_C(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);
1226             float   elapTicks;
1227             float   elapMilli;
1228             elapTicks = EndTimer(begin);
1229             elapMilli = elapTicks / 1000;
1230             /*             printf("Millisecond %.2f\n",elapMilli); */
1231             free(id_s2);
1232             free(s2);
1233             free(structure);
1234           } while (1);
1235           free(id_s1);
1236           id_s1 = NULL;
1237           free(s1);
1238           rewind(sRNA);
1239         } while (1);
1240         fclose(mRNA);
1241         fclose(sRNA);
1242       }
1243     }
1244   } else if (!qname && !tname && !(alignment_mode)) {
1245     istty = isatty(fileno(stdout)) && isatty(fileno(stdin));
1246 
1247     if ((fold_constrained) && (istty)) {
1248       printf("Input constraints using the following notation:\n");
1249       printf("| : paired with another base\n");
1250       printf(". : no constraint at all\n");
1251       printf("x : base must not pair\n");
1252     }
1253 
1254     do {
1255       /* main loop: continue until end of file */
1256       /* duplexT mfe, *subopt         */
1257       char *id_s1 = NULL, *id_s2 = NULL;
1258       if (istty) {
1259         printf("\nInput two sequences (one line each); @ to quit\n");
1260         printf("%s\n", scale);
1261       }
1262 
1263       fname[0] = '\0';
1264 
1265       if ((line = vrna_read_line(stdin)) == NULL)
1266         break;
1267 
1268       /* skip empty lines, comment lines, name lines */
1269       while ((*line == '*') || (*line == '\0') || (*line == '>')) {
1270         printf("%s\n", line);
1271         if (*line == '>') {
1272           id_s1 = (char *)vrna_alloc(strlen(line) + 2);
1273           (void)sscanf(line, "%s", id_s1);
1274           memmove(id_s1, id_s1 + 1, strlen(id_s1));
1275         }
1276 
1277         free(line);
1278         if ((line = vrna_read_line(stdin)) == NULL) {
1279           free(id_s1);
1280           break;
1281         }
1282       }
1283       if ((line == NULL) || (strcmp(line, "@") == 0))
1284         break;
1285 
1286       s1 = (char *)vrna_alloc(strlen(line) + 1 + 20);
1287       strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1288       strcat(s1, line);
1289       free(line);
1290       strcat(s1, "NNNNNNNNNN\0");
1291       if ((line = vrna_read_line(stdin)) == NULL)
1292         break;
1293 
1294       /* skip comment lines and get filenames */
1295 
1296       while ((*line == '*') || (*line == '\0') || (*line == '>')) {
1297         printf("%s\n", line);
1298         if (*line == '>') {
1299           id_s2 = (char *)vrna_alloc(strlen(line) + 2);
1300           (void)sscanf(line, "%s", id_s2);
1301           memmove(id_s2, id_s2 + 1, strlen(id_s2));
1302         }
1303 
1304         free(line);
1305         if ((line = vrna_read_line(stdin)) == NULL) {
1306           free(id_s2);
1307           break;
1308         }
1309       }
1310       if ((line == NULL) || (strcmp(line, "@") == 0))
1311         break;
1312 
1313       s2 = (char *)vrna_alloc(strlen(line) + 1 + 20);
1314       strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1315       strcat(s2, line);
1316       free(line);
1317       strcat(s2, "NNNNNNNNNN\0");
1318 
1319       int n1  = strlen(s1);
1320       int n2  = strlen(s2);
1321 
1322 
1323       structure = (char *)vrna_alloc((unsigned)n2 + 1);
1324       if (fold_constrained) {
1325         cstruc = vrna_read_line(stdin);
1326         if (cstruc != NULL && (cstruc[0] == '>')) {
1327           int dn3 = strlen(cstruc) - (n2 - 20);
1328           strcpy(structure, "..........");
1329           strncat(structure, cstruc, n2 - 20);
1330           if (dn3 >= 0) {
1331             strcat(structure, "..........\0");
1332           } else {
1333             while (dn3++)
1334               strcat(structure, ".");
1335             strcat(structure, "\0");
1336           }
1337 
1338           free(cstruc);
1339         } else {
1340           vrna_message_warning("constraints missing");
1341         }
1342       }
1343 
1344       for (l = 0; l < n1; l++) {
1345         s1[l] = toupper(s1[l]);
1346         if (!noconv && s1[l] == 'T')
1347           s1[l] = 'U';
1348       }
1349       for (l = 0; l < n2; l++) {
1350         s2[l] = toupper(s2[l]);
1351         if (!noconv && s2[l] == 'T')
1352           s2[l] = 'U';
1353       }
1354       if (istty)
1355         printf("lengths = %d,%d\n", (int)strlen(s1), (int)strlen(s2));
1356 
1357       if (alignment_length == 0)
1358         alignment_length = 40;
1359 
1360       if (access == NULL) {
1361         if (!fold_constrained) {
1362           Lduplexfold(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
1363         } else {
1364           int a = strchr(structure, '|') - structure;
1365           int b = strrchr(structure, '|') - structure;
1366           if (alignment_length < b - a + 1)
1367             vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1368 
1369           Lduplexfold_C(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);
1370         }
1371       } else {
1372         int   **access_s1, **access_s2;
1373         char  *file_s1, *file_s2;
1374         int   s1_len, s2_len;
1375         s1_len  = strlen(s1);
1376         s2_len  = strlen(s2);
1377         if (!(id_s1 && id_s2))
1378           vrna_message_error("The fasta files has no header information..., cant fetch accessibility file\n");
1379 
1380         file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s1) + strlen(access) + 20));
1381         file_s2 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s2) + strlen(access) + 20));
1382         strcpy(file_s1, access);
1383         strcpy(file_s2, access);
1384         strcat(file_s1, "/");
1385         strcat(file_s2, "/");
1386         strcat(file_s1, id_s1);
1387         strcat(file_s2, id_s2);
1388         strcat(file_s1, "_openen");
1389         strcat(file_s2, "_openen");
1390         if (!binaries) {
1391           access_s1 = read_plfold_i(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
1392         } else {
1393           strcat(file_s1, "_bin");
1394           access_s1 = read_plfold_i_bin(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
1395         }
1396 
1397         if (access_s1 == NULL) {
1398           free(file_s1);
1399           free(s1);
1400           free(s2);
1401           free(file_s2);
1402           free(id_s1);
1403           free(id_s2);
1404           continue;
1405         }
1406 
1407         if (!binaries) {
1408           access_s2 = read_plfold_i(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1409         } else {
1410           strcat(file_s2, "_bin");
1411           access_s2 = read_plfold_i_bin(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1412         }
1413 
1414         if (access_s2 == NULL) {
1415           free(access_s1);
1416           free(file_s1);
1417           free(s1);
1418           free(s2);
1419           free(file_s2);
1420           free(id_s1);
1421           free(id_s2);
1422           continue;
1423         }
1424 
1425         if (!fold_constrained) {
1426           Lduplexfold_XS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
1427         } else {
1428           int a = strchr(structure, '|') - structure;
1429           int b = strrchr(structure, '|') - structure;
1430           if (alignment_length < b - a + 1)
1431             vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1432 
1433           Lduplexfold_CXS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
1434         }
1435 
1436         i = access_s1[0][0];
1437         while (--i > -1)
1438           free(access_s1[i]);
1439         i = access_s2[0][0];
1440         while (--i > -1)
1441           free(access_s2[i]);
1442         free(access_s1);
1443         free(access_s2);
1444         free(file_s1);
1445         free(file_s2);
1446         free(id_s1);
1447         id_s1 = NULL;
1448         free(id_s2);
1449         id_s2 = NULL;
1450       }
1451 
1452       free(s1);
1453       free(s2);
1454       s1  = NULL;
1455       s2  = NULL;
1456       free(structure);
1457       printf("\n");
1458       if (id_s1)
1459         free(id_s1);
1460 
1461       if (id_s2)
1462         free(id_s2);
1463     } while (1);
1464     if (s1)
1465       free(s1);
1466 
1467     if (s2)
1468       free(s2);
1469 
1470     /* if(id_s1){free(id_s1);}if(id_s2){free(id_s2);} */
1471   } else if (qname && tname && alignment_mode) {
1472     int n_seq, n_seq2;
1473     mRNA = fopen(tname, "r");
1474     if (mRNA == NULL) {
1475       printf("%s: Wrong target file name\n", tname);
1476       RNAplex_cmdline_parser_free(&args_info);
1477       return 0;
1478     }
1479 
1480     sRNA = fopen(qname, "r");
1481     if (sRNA == NULL) {
1482       printf("%s: Wrong query file name\n", qname);
1483       RNAplex_cmdline_parser_free(&args_info);
1484       return 0;
1485     }
1486 
1487     n_seq   = read_clustal(mRNA, temp1, names1);
1488     n_seq2  = read_clustal(sRNA, temp2, names2);
1489     fclose(mRNA);
1490     fclose(sRNA);
1491     if (n_seq != n_seq2) {
1492       for (i = 0; temp1[i]; i++) {
1493         free(temp1[i]);
1494         free(temp2[i]);
1495       }
1496       vrna_message_error("unequal number of seqs in alignments");
1497     }
1498 
1499     for (i = 0; temp1[i]; i++) {
1500       AS1[i]  = (char *)vrna_alloc((strlen(temp1[i]) + 21) * sizeof(char));
1501       AS2[i]  = (char *)vrna_alloc((strlen(temp2[i]) + 21) * sizeof(char));
1502       strcpy(AS1[i], "NNNNNNNNNN");
1503       strcpy(AS2[i], "NNNNNNNNNN");
1504       strcat(AS1[i], temp1[i]);
1505       strcat(AS2[i], temp2[i]);
1506       strcat(AS1[i], "NNNNNNNNNN\0");
1507       strcat(AS2[i], "NNNNNNNNNN\0");
1508     }
1509     for (i = 0; temp1[i]; i++) {
1510       free(temp1[i]);
1511       free(temp2[i]);
1512     }
1513     AS1[n_seq]  = NULL;
1514     AS2[n_seq]  = NULL;
1515 
1516 
1517     if (access == NULL) {
1518       aliLduplexfold((const char **)AS1, (const char **)AS2, n_seq * delta, extension_cost, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
1519     } else {
1520       int **target_access = NULL, **query_access = NULL;
1521       target_access = average_accessibility_target(names1, AS1, n_seq, access, verhaeltnis, alignment_length, binaries, fast); /* get averaged accessibility for alignments */
1522       query_access  = average_accessibility_target(names2, AS2, n_seq, access, verhaeltnis, alignment_length, binaries, fast);
1523       if (!(target_access && query_access)) {
1524         for (i = 0; AS1[i]; i++) {
1525           free(AS1[i]);
1526           free(names1[i]);
1527           free(AS2[i]);
1528           free(names2[i]);
1529         }
1530         RNAplex_cmdline_parser_free(&args_info);
1531         return 0;
1532       }
1533 
1534       aliLduplexfold_XS((const char **)AS1, (const char **)AS2, (const int **)target_access, (const int **)query_access, n_seq * delta, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
1535       if (!(target_access == NULL)) {
1536         i = target_access[0][0];
1537         while (--i > -1)
1538           free(target_access[i]);
1539         free(target_access);
1540       }
1541 
1542       if (!(query_access == NULL)) {
1543         i = query_access[0][0];
1544         while (--i > -1)
1545           free(query_access[i]);
1546         free(query_access);
1547       }
1548     }
1549 
1550     for (i = 0; AS1[i]; i++) {
1551       free(AS1[i]);
1552       free(names1[i]);
1553       free(AS2[i]);
1554       free(names2[i]);
1555     }
1556   }
1557 
1558   if (access) {
1559     free(access);
1560     access = NULL;
1561   }
1562 
1563   if (qname) {
1564     free(tname);
1565     access = NULL;
1566   }
1567 
1568   if (tname) {
1569     free(qname);
1570     access = NULL;
1571   }
1572 
1573   RNAplex_cmdline_parser_free(&args_info);
1574   return 0;
1575 }
1576 
1577 
1578 #if 0
1579 static int
1580 print_struc(duplexT const *dup)
1581 {
1582   /*this function print out the structure of a hybridization site
1583    * and return the value from which one can begin to look for the next
1584    * non-overlappig hybrid*/
1585 
1586   int   l1;
1587   float energy  = dup->energy * 0.01;
1588   int   init    = dup->end;
1589 
1590   l1 = strchr(dup->structure, '&') - dup->structure;
1591   printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", dup->structure, init - l1 - 1,
1592          init - 2, dup->j + l1 + 2 - strlen(dup->structure), dup->j, energy);
1593   return init - l1 - 1;
1594 }
1595 
1596 
1597 #endif
1598 
1599 static int **
read_plfold_i(char * fname,const int beg,const int end,double verhaeltnis,const int length,int fast)1600 read_plfold_i(char      *fname,
1601               const int beg,
1602               const int end,
1603               double    verhaeltnis,
1604               const int length,
1605               int       fast)
1606 {
1607   double  begin = BeginTimer();
1608   FILE    *in   = fopen(fname, "r");
1609 
1610   if (in == NULL) {
1611     vrna_message_warning("File ' %s ' open error", fname);
1612     return NULL;
1613   }
1614 
1615   int   i, j;
1616   int   **access;
1617   int   offset, temp;
1618   temp    = 0;
1619   offset  = 0;
1620   int   seq_pos;
1621   int   beg_r, end_r;
1622   beg_r = beg;
1623   end_r = end;
1624   char  tmp[2048] = {
1625     0x0
1626   };
1627   /* char *ptr; */
1628   if (fgets(tmp, sizeof(tmp), in) == 0)
1629     perror("Empty File");
1630 
1631   if (strchr(tmp, '>')) {
1632     vrna_message_warning("file %s is not in RNAplfold format", fname);
1633     return NULL;
1634   }
1635 
1636   if (fgets(tmp, sizeof(tmp), in) == 0) {
1637     vrna_message_warning("No accessibility data");
1638     return NULL;
1639   }
1640 
1641   int dim_x;
1642   dim_x = get_max_u(tmp, '\t');
1643   if (length > dim_x && fast == 0) {
1644     printf("Interaction length %d is larger than the length of the largest region %d \nfor which the opening energy was computed (-u parameter of RNAplfold)\n", length, dim_x);
1645     printf("Please recompute your profiles with a larger -u or set -l to a smaller interaction length\n");
1646     return NULL;
1647   }
1648 
1649   access = (int **)vrna_alloc(sizeof(int *) * (dim_x + 2));
1650   for (i = 0; i < dim_x + 2; i++)
1651     access[i] = (int *)vrna_alloc(sizeof(int) * (end_r - beg_r + 1)); /* normally +1 */
1652   for (i = 0; i < end_r - beg_r + 1; i++)
1653     for (j = 0; j < dim_x + 2; j++)
1654       access[j][i] = INF;
1655   access[0][0] = dim_x + 2;
1656   while (fgets(tmp, sizeof(tmp), in) != 0 && --end_r > 10) {
1657     /* read a record, before we have --end_r > 10*/
1658     float n;
1659     /* int i; */
1660     /* int u; */
1661     beg_r--;
1662     if (beg_r < 1) {
1663       if (sscanf(tmp, "%d\t%n", &seq_pos, &temp) == 1) {
1664         offset += temp;
1665         int count;
1666         count = 1;
1667         while (sscanf(tmp + offset, "%f%n", &n, &temp) == 1) {
1668           offset += temp;
1669           /* seq_pos - beg allows one to get the accessibility right */
1670           access[count][seq_pos - beg + 11] = (int)rint(100 * n); /* round the number */
1671           access[count][seq_pos - beg + 11] *= verhaeltnis;       /* 10 stands here for the number of nucleotides */
1672           count++;
1673         }
1674         offset = 0;
1675       }
1676     }
1677   }
1678   if (end_r > 20) {
1679     printf("Accessibility files contains %d less entries than expected based on the sequence length\n", end_r - 20);
1680     printf("Please recompute your profiles so that profile length and sequence length match\n");
1681     return NULL;
1682   }
1683 
1684   fclose(in);
1685   float elapTicks;
1686   float elapMilli;
1687   elapTicks = (EndTimer(begin) - begin);
1688   elapMilli = elapTicks / 1000;
1689   return access;
1690 }
1691 
1692 
1693 static int
convert_plfold_i(char * fname)1694 convert_plfold_i(char *fname)
1695 {
1696   int   i;
1697   FILE  *in = fopen(fname, "r");
1698 
1699   if (in == NULL) {
1700     vrna_message_warning("File ' %s ' open error", fname);
1701     return -1;
1702   }
1703 
1704   char tmp[2048] = {
1705     0x0
1706   };
1707   if (fgets(tmp, sizeof(tmp), in) == 0)
1708     perror("Empty File");
1709 
1710   if (strchr(tmp, '>')) {
1711     vrna_message_warning("file %s is not in RNAplfold format", fname);
1712     return -1;
1713   }
1714 
1715   if (fgets(tmp, sizeof(tmp), in) == 0) {
1716     vrna_message_warning("No accessibility data");
1717     return -1;
1718   }
1719 
1720   int   u_length;
1721   u_length = get_max_u(tmp, '\t'); /* get the x dimension */
1722   char  c;
1723   int   length = 0;
1724   while ((c = fgetc(in)) != EOF)
1725     if (c == '\n')
1726       length++;
1727 
1728   fclose(in);
1729   int   **access = read_plfold_i(fname, 1, length + 20, 1, u_length, 2);
1730   char  *outname;
1731   outname = (char *)vrna_alloc((strlen(fname) + 5) * sizeof(char));
1732   strcpy(outname, fname);
1733   strcat(outname, "_bin");
1734   FILE  *fp = fopen(outname, "wb");
1735   int   p[1];
1736   p[0] = 1000000;
1737   for (i = 0; i < u_length + 2; i++) {
1738     fwrite(access[i], sizeof(int), length + 20, fp);
1739     free(access[i]);
1740   }
1741   fseek(fp, 0, SEEK_SET);
1742   fwrite(&u_length, sizeof(int), 1, fp);
1743   fwrite(&length, sizeof(int), 1, fp);
1744   free(outname);
1745   fclose(fp);
1746   free(access);
1747   return 1;
1748 }
1749 
1750 
1751 static int **
read_plfold_i_bin(char * fname,const int beg,const int end,double verhaeltnis,const int length,int fast)1752 read_plfold_i_bin(char      *fname,
1753                   const int beg,
1754                   const int end,
1755                   double    verhaeltnis,
1756                   const int length,
1757                   int       fast)
1758 {
1759   double  begin = BeginTimer();
1760   FILE    *fp   = fopen(fname, "rb");
1761   int     seqlength;
1762 
1763   if (fp == NULL) {
1764     vrna_message_warning("File ' %s ' open error", fname);
1765     return NULL;
1766   }
1767 
1768   int *first_line;
1769   first_line = (int *)vrna_alloc(sizeof(int) * (end - beg + 1));              /* check length of the line LOOK at read_plfold_i */
1770   if (!fread(first_line, sizeof(int), (end - beg) + 1, fp)) {
1771     vrna_message_warning("Problem reading size of profile from '%s'", fname); /* get the value of the u option */
1772     return NULL;
1773   }
1774 
1775   int lim_x;
1776   lim_x     = first_line[0];
1777   seqlength = first_line[1];                                  /* length of the sequence RNAplfold was ran on. */
1778   if (length > lim_x && fast == 0) {
1779     printf("Interaction length %d is larger than the length of the largest region %d \nfor which the opening energy was computed (-u parameter of RNAplfold)\n", length, lim_x);
1780     printf("Please recompute your profiles with a larger -u or set -l to a smaller interaction length\n");
1781     return NULL;
1782   }
1783 
1784   fseek(fp, 0, SEEK_SET);                                   /* set pointer to the begining of the file */
1785   int       **access;                                       /* here we store the access  values */
1786   access = (int **)vrna_alloc(sizeof(int *) * (lim_x + 1)); /* !!!! lim_x+1 -> lim_x+2 */
1787   int       count;
1788   long int  position;
1789   for (count = 0; count < lim_x + 1; count++) {
1790     /* read file */
1791     access[count] = (int *)vrna_alloc(sizeof(int) * (end - beg + 1)); /* declare array length */
1792     /* now we should be sure to read the right position */
1793     /* we first need a seek to the right position */
1794     /* 0->9 line begin, + begin  */
1795     /* here we need information about the total line length..., we can get this if this is saved by RNAplfold... */
1796     position = ftell(fp);
1797     fseek(fp, (beg - 1) * sizeof(int), SEEK_CUR);                 /* go to the desired position, note the 10 offset */
1798     position = ftell(fp);
1799     if (!fread(access[count], sizeof(int), (end - beg) + 1, fp))  /* read the needed number of accessibility values */
1800       printf("File '%s' is corrupted \n", fname);
1801 
1802     position = ftell(fp);
1803     fseek(fp, (seqlength - end + 20) * sizeof(int), SEEK_CUR); /* place to the begining of the next file */
1804     position = ftell(fp);
1805   }
1806   fseek(fp, 0, SEEK_SET);
1807   access[0][0]  = lim_x;
1808   access[0][0]  += 1; /* !!!!!!!!!!!!!!!!!!!put 2 in case of problem */
1809   fclose(fp);         /* close file */
1810   float elapTicks;
1811   float elapMilli;
1812   free(first_line);
1813   elapTicks = (EndTimer(begin) - begin);
1814   elapMilli = elapTicks / 1000;
1815   /* printf("read_plfold_i_bin Millisecond %.2f\n",elapMilli); */ /* return time */
1816   return access; /* return access */
1817 }
1818 
1819 
1820 static int
get_max_u(const char * s,char delim)1821 get_max_u(const char  *s,
1822           char        delim)
1823 {
1824   char  *pch;
1825   int   total;
1826 
1827   total = 0;
1828   pch   = strchr(s, delim);
1829   total++;
1830   while (pch != NULL) {
1831     pch = strchr(pch + 1, delim);
1832     total++;
1833   }
1834   return total - 2;
1835 }
1836 
1837 
1838 static int **
average_accessibility_target(char ** names,char ** ALN,int number,char * access,double verhaeltnis,const int alignment_length,int binaries,int fast)1839 average_accessibility_target(char       **names,
1840                              char       **ALN,
1841                              int        number,
1842                              char       *access,
1843                              double     verhaeltnis,
1844                              const int  alignment_length,
1845                              int        binaries,
1846                              int        fast)
1847 {
1848   int           i;
1849   int           ***master_access  = NULL;           /* contains the accessibility arrays for different */
1850   int           **average_access  = NULL;           /* contains the averaged accessibility */
1851   int           aln_size          = strlen(ALN[0]); /* aln size -> define size of the averaged accessibility array */
1852   int           *index;                             /* contains the index used for navigating inside the alignments */
1853   long long int begin, end;                         /* contains the begin and end region to read the accessibility */
1854 
1855   index = (int *)vrna_alloc(sizeof(int) * number);
1856   for (i = 0; i < number; i++)
1857     index[i] = 1;
1858   master_access = (int ***)vrna_alloc(sizeof(int **) * number);
1859   int   dim_x; /* contains the minimal of the maximum u length for all sequences of the alignment */
1860   dim_x = INF;
1861   int   u;
1862   int   location_flag; /* location flag checks if all line contains a location flag or not */
1863   /* if some line contains location information and other not, return a warning but keep going; */
1864   /* The location flag will be set to TRUE if the following pattern is found */
1865   /*  sequence-name/[0-9]+-[0-9]+  */
1866   /*  |----NAME----||BEG|  |END| */
1867   char  bla[255];
1868   if (sscanf(names[0], "%255[^/]/%lld-%lld", bla, &begin, &end) == 3) /* initialize location_flag; */
1869     location_flag = 1;
1870   else
1871     location_flag = 0;
1872 
1873   char *file_s1 = NULL;
1874   for (i = 0; i < number; i++) {
1875     /*  be careful!!!! Name should contain all characters from begin till the "/" character */
1876     /* char *s1; */
1877     file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(names[i]) + strlen(access) + 20));
1878     begin   = 1;
1879     int sequence_length = get_sequence_length_from_alignment(ALN[i]);
1880     end = sequence_length;
1881     if (sscanf(names[i], "%255[^/]/%lld-%lld", bla, &begin, &end) == 3) {
1882       /* if subsequence and range do not have the same length, stop RNAplex. */
1883       if (end - begin + 1 != sequence_length - 20) {
1884         printf("Your range %lld %lld in sequence %s does not correspond to the sequence length %d\n", begin, end, names[i], sequence_length - 20);
1885         printf("Please check your input alignments and rerun RNAplex");
1886         int a = 0;
1887         if (master_access != NULL) {
1888           for (a = 0; a < i; a++) {
1889             int j;
1890             j = master_access[a][0][0];
1891             while (--j > -1)
1892               free(master_access[a][j]);
1893             free(master_access[a]);
1894           }
1895         }
1896 
1897         free(master_access);
1898         free(index);
1899         free(file_s1);
1900         return average_access;
1901       }
1902 
1903       end += 20; /* add 20 to the end, in order to take the N's into account */
1904       if (location_flag == 0) {
1905         vrna_message_warning("\n!! Line %d in your target alignment contains location information\n"
1906                              "while line %d did not. PLEASE CHECK your alignments!!\n"
1907                              "RNAplex will continue the target search.",
1908                              i + 1, i);
1909       }
1910 
1911       location_flag = 1;
1912       strcpy(file_s1, access);
1913       strcat(file_s1, "/");
1914       strcat(file_s1, bla);
1915     } else {
1916       if (location_flag == 1) {
1917         vrna_message_warning("\n!! Line %d in your target alignment does not contain location information\n"
1918                              "while line %d in your target alignment did. PLEASE CHECK your alignments!!\n"
1919                              "RNAplex will continue the target search.",
1920                              i + 1, i);
1921       }
1922 
1923       location_flag = 0;
1924       strcpy(file_s1, access);
1925       strcat(file_s1, "/");
1926       strcat(file_s1, names[i]);
1927     }
1928 
1929     strcat(file_s1, "_openen");
1930     if (!binaries) {
1931       master_access[i] = read_plfold_i(file_s1, begin, end, verhaeltnis, alignment_length, fast); /* read */
1932     } else {
1933       strcat(file_s1, "_bin");
1934       master_access[i] = read_plfold_i_bin(file_s1, begin, end, verhaeltnis, alignment_length, fast); /* read */
1935     }
1936 
1937     free(file_s1);
1938     file_s1 = NULL;
1939     dim_x   = MIN2(dim_x, master_access[i][0][0]);
1940   }
1941   average_access = (int **)vrna_alloc(sizeof(int *) * (dim_x));
1942   for (i = 0; i < dim_x; i++)
1943     average_access[i] = (int *)vrna_alloc(sizeof(int) * (aln_size + 9));  /* average_access is of the length of the alignment */
1944   /* while master_access is of the length of the sequences. */
1945   average_access[0][0] = dim_x;
1946   for (i = 1; i < aln_size; i++) {
1947     /* go through all aln position */
1948     int j;
1949     int count_j = number;
1950     for (j = 0; j < number; j++) {
1951       /* go through all aln members */
1952       if (!(ALN[j][i - 1] == '-')) {
1953         /* if we have a gap, skip this position */
1954         for (u = 1; u < dim_x; u++) {
1955           /* go through all u */
1956           if (index[j] < u + 10)
1957             continue;
1958 
1959           average_access[u][i] += master_access[j][u][index[j]];  /* index[j] position in sequence j */
1960         }
1961         index[j]++;                                               /* increase position in sequence */
1962       } else {
1963         count_j--;
1964       }
1965     }
1966     if (!(count_j > 0)) {
1967       printf("Alignment contains only gap at column %d\n exiting program\n", i);
1968       return NULL;
1969     }
1970 
1971     for (u = 1; u < dim_x; u++)
1972       average_access[u][i] = (int)rint(average_access[u][i] / (count_j));
1973   }
1974   /* free(index); */
1975   for (i = 0; i < number; i++) {
1976     int j;
1977     j = master_access[i][0][0];
1978     while (--j > -1)
1979       free(master_access[i][j]);
1980     free(master_access[i]);
1981   }
1982   free(master_access);
1983   free(index);
1984   return average_access;
1985 }
1986 
1987 
1988 /**
1989  * aliprint_struct generate two postscript files.
1990  * The first one is a structure annotated alignment a la RNAalifold.
1991  * The second file is a structural representation of the interaction
1992  * with colour annotation of the base-pair conservation.
1993  */
1994 static void
aliprint_struct(FILE * Result,FILE * mrna,FILE * query,const int WindowsLength)1995 aliprint_struct(FILE      *Result,        /* result file */
1996                 FILE      *mrna,          /* target alignment */
1997                 FILE      *query,         /* query alignment */
1998                 const int WindowsLength   /* Number of nucleotide around the target sequence */
1999                 )
2000 {
2001   char  *result = NULL;
2002   /**
2003    * Defines arrays for names and sequences of the query and target alignment
2004    */
2005   char  *AS1[MAX_NUM_NAMES], *AS2[MAX_NUM_NAMES], *names1[MAX_NUM_NAMES], *names2[MAX_NUM_NAMES];
2006   int   n_seq, n_seq2, i, s;
2007 
2008   /**
2009    * Check if target and sequence alignment contains the same number of sequences
2010    * The user should ensure that the sequence are in the same species order in both the target and query alignment files.
2011    */
2012   n_seq   = read_clustal(mrna, AS1, names1);
2013   n_seq2  = read_clustal(query, AS2, names2);
2014   if (n_seq != n_seq2) {
2015     for (i = 0; AS1[i]; i++) {
2016       free(AS1[i]);
2017       free(AS2[i]);
2018     }
2019     vrna_message_error("unequal number of seqs in alignments");
2020   }
2021 
2022   /**
2023    * Here we get the length of the target and query alignments. Important for initiliazing the necessary arrays
2024    */
2025   int target_alignment_length, query_alignment_length;
2026   target_alignment_length = strlen(AS1[0]);
2027   query_alignment_length  = strlen(AS2[0]);
2028 
2029 
2030   /**
2031    * Initialize files containing sequences
2032    */
2033   AS1[n_seq]  = NULL;
2034   AS2[n_seq]  = NULL;
2035   do {
2036     /**
2037      * Quit if the result file does not exist
2038      */
2039     if ((result = vrna_read_line(Result)) == NULL) {
2040       free(result);
2041       break;
2042     }
2043 
2044     /**
2045      * If we have a line in the result file that looks like a result line, then parse it
2046      */
2047     if (strchr(result, '(') && strchr(result, '&') && strchr(result, '(') && strchr(result, ',') && strchr(result, ':') && strchr(result, '-')) {
2048       char  *structure, *pos;
2049       /**
2050        * tbegin,tend,qbegin,qend store the duplex boundaries
2051        * on both the target and query sequences, respectively
2052        */
2053       int   tbegin, tend, qbegin, qend;
2054       int   length, posi;
2055       /**
2056        * sbegin, send are the coordinates of the target alignment slide
2057        */
2058       int   sbegin, send;
2059       /**
2060        * Check in the line where is the first space.
2061        * This gives us the total length of the duplex
2062        */
2063       pos     = strchr(result, ' ');
2064       posi    = (int)(pos - result);
2065       length  = posi;
2066       /**
2067        * Copy the structure in the line into variable structure
2068        */
2069       structure = (char *)vrna_alloc((length + 1) * sizeof(char));
2070       sscanf(result, "%s", structure);
2071       /**
2072        * Save the coordinates on the target
2073        */
2074       sscanf(pos, "%10d,%10d", &tbegin, &tend);
2075       pos = strchr(pos, ':');
2076       pos = strchr(pos, ' ');
2077       /**
2078        * Save the coordinates on the query
2079        */
2080       sscanf(pos, "%10d,%10d", &qbegin, &qend);
2081 
2082       /**
2083        * To produce the ps files we use the full query alignment
2084        * and a user defined section of the target coordinates.
2085        * The size of the target slide is determined by the WindowsLength variable
2086        */
2087       sbegin  = MAX2(tbegin - WindowsLength, 0);
2088       send    = MIN2(tend + WindowsLength, target_alignment_length);
2089       /**
2090        * We retrieved the coordinates of the duplex
2091        * Now we can slide the alignment
2092        * Target and query will hold the alignments for the target and query sequences
2093        */
2094       char  **target;
2095       char  **query;
2096       char  **join;
2097       target  = (char **)vrna_alloc((n_seq + 1) * sizeof(char *));
2098       query   = (char **)vrna_alloc((n_seq + 1) * sizeof(char *));
2099       join    = (char **)vrna_alloc((n_seq + 1) * sizeof(char *));
2100       for (s = 0; s < n_seq; s++) {
2101         target[s] = (char *)vrna_alloc((send - sbegin + 2) * sizeof(char));
2102         strncpy(target[s], (AS1[s] + sbegin - 1), (send - sbegin + 1));
2103         target[s][send - sbegin + 1]  = '\0';
2104         query[s]                      = (char *)vrna_alloc((query_alignment_length + 1) * sizeof(char));
2105         strcpy(query[s], AS2[s]);
2106         query[s][query_alignment_length]  = '\0';
2107         join[s]                           = (char *)vrna_alloc((query_alignment_length + (send - sbegin) + 3) * sizeof(char));
2108         strncpy(join[s], (AS1[s] + sbegin - 1), (send - sbegin + 1));
2109         strcat(join[s], "&");
2110         strcat(join[s], AS2[s]);
2111       }
2112       target[n_seq] = NULL;
2113       query[n_seq]  = NULL;
2114       /**
2115        * Get the consensus structure for the query and target sequence
2116        * This consensus structure is constrained such that the interaction sites are unbound.
2117        * We define and set the query and target constraint
2118        */
2119       char  *target_constraint;
2120       char  *query_constraint;
2121       char  *joint_structure;
2122       target_constraint = (char *)vrna_alloc((unsigned)(send - sbegin + 2));
2123       query_constraint  = (char *)vrna_alloc((unsigned)(query_alignment_length + 1));
2124       joint_structure   = (char *)vrna_alloc((unsigned)(send - sbegin + 3 + query_alignment_length));
2125       for (i = 0; i < strlen(target[0]); i++) {
2126         if (i > tbegin - sbegin - 1 && i < tend - sbegin + 1)
2127           target_constraint[i] = 'x';
2128         else
2129           target_constraint[i] = '.';
2130       }
2131       for (i = 0; i < strlen(AS2[0]); i++) {
2132         if (i > qbegin - 2 && i < qend)
2133           query_constraint[i] = 'x';
2134         else
2135           query_constraint[i] = '.';
2136       }
2137       /**
2138        * Now produce structure
2139        */
2140       fold_constrained = 1;
2141       alifold((const char **)target, target_constraint);
2142       for (i = 0; !(target_constraint[i] == '\0'); i++) {
2143         if (target_constraint[i] == '(')
2144           target_constraint[i] = '[';
2145         else if (target_constraint[i] == ')')
2146           target_constraint[i] = ']';
2147       }
2148       alifold((const char **)query, query_constraint);
2149       for (i = 0; !(query_constraint[i] == '\0'); i++) {
2150         if (query_constraint[i] == '(')
2151           query_constraint[i] = '<';
2152         else if (query_constraint[i] == ')')
2153           query_constraint[i] = '>';
2154       }
2155       /**
2156        * Add duplex structure, and produce joint structure
2157        */
2158       int count_duplex_structure = 0;
2159       for (i = tbegin - sbegin; i < tend - sbegin + 1; i++) {
2160         target_constraint[i] = structure[count_duplex_structure];
2161         count_duplex_structure++;
2162       }
2163       count_duplex_structure++;
2164       for (i = qbegin - 1; i < qend; i++) {
2165         query_constraint[i] = structure[count_duplex_structure];
2166         count_duplex_structure++;
2167       }
2168       strncpy(joint_structure, target_constraint, (send - sbegin + 1));
2169       strcat(joint_structure, "&");
2170       strcat(joint_structure, query_constraint);
2171       /**
2172        * Produce consensus sequence
2173        */
2174       char  *temp_target;
2175       char  *temp_query;
2176       char  *string;
2177       temp_target = consensus((const char **)target);
2178       temp_query  = consensus((const char **)query);
2179       string      = (char *)vrna_alloc((strlen(temp_target) + strlen(temp_query) + 1) * sizeof(char));
2180       strcpy(string, temp_target);
2181       free(temp_target);
2182       strcat(string, temp_query);
2183       free(temp_query);
2184       /**
2185        * now produce output name, based on the first two names of the alignment
2186        */
2187       char *psoutput = vrna_strdup_printf("annaln_%s_%s_%d_%d_%d_%d.ps",
2188                                           names1[0],
2189                                           names2[0],
2190                                           tbegin,
2191                                           tend,
2192                                           qbegin,
2193                                           qend);
2194       s = 0;
2195       while (psoutput[s] != '\0') {
2196         if (psoutput[s] == '\\' || psoutput[s] == '/')
2197           psoutput[s] = '_';
2198 
2199         s++;
2200       }
2201       /**
2202        * Produce a structure annotated, colorated alignment
2203        */
2204       aliPS_color_aln(joint_structure, psoutput, (const char **)join, (const char **)names1);
2205       free(psoutput);
2206 #if 0
2207       /**
2208        * We also need the consensus structure annotated with conservation information
2209        *
2210        */
2211 
2212       /**
2213        * First we change the output file name from annaln -> annstr
2214        */
2215       psoutput[3] = 's';
2216       psoutput[4] = 't';
2217       psoutput[5] = 'r';
2218 
2219 
2220       /**
2221        * Now we change the different parenthesis into one
2222        */
2223 
2224       char *joint_structure_parenthesis;
2225       joint_structure_parenthesis = (char *)vrna_alloc((strlen(joint_structure) + 1) * sizeof(char));
2226       strcpy(joint_structure_parenthesis, joint_structure);
2227       for (i = 0; i < strlen(joint_structure); i++) {
2228         if ((joint_structure[i] == '[') || (joint_structure[i] == '<'))
2229           joint_structure[i] = '(';
2230         else if (joint_structure[i] == ']' || joint_structure[i] == '>')
2231           joint_structure[i] = ')';
2232       }
2233       cut_point = send - sbegin + 1;
2234       /**
2235        * Remove & from the alignment and the consensus structure
2236        */
2237       int counter = 0;
2238       for (i = 0; i < strlen(joint_structure); i++) {
2239         if (joint_structure[i] == '&')
2240           continue;
2241 
2242         joint_structure[counter]              = joint_structure[i];
2243         joint_structure_parenthesis[counter]  = joint_structure_parenthesis[i];
2244         for (s = 0; s < n_seq; s++)
2245           join[s][counter] = join[s][i];
2246         counter++;
2247       }
2248       free(string);
2249       string = consensus((const char **)join);
2250       printf("%s\n%s\n%s\n", join[0], string, joint_structure);
2251       char **A;
2252       //A=annote(joint_structure_parenthesis,(const char**) join);
2253       xrna_plex_plot(string, joint_structure_parenthesis, psoutput);
2254       /**
2255        * Free stuff...
2256        */
2257       free(structure);
2258       free(result);
2259       free(psoutput);
2260       for (i = 0; i < n_seq + 1; i++)
2261         free(target[i]);
2262       free(target);
2263 #endif
2264     }
2265   } while (1);
2266   for (i = 0; AS1[i]; i++) {
2267     free(AS1[i]);
2268     free(AS2[i]);
2269     free(names1[i]);
2270     free(names2[i]);
2271   }
2272   fclose(mrna);
2273   fclose(query);
2274 }
2275 
2276 
2277 /*
2278  * All characters not being included into ATCGUN are considered as gap characters.
2279  */
2280 static int
get_sequence_length_from_alignment(char * sequence)2281 get_sequence_length_from_alignment(char *sequence)
2282 {
2283   int counter = 0;
2284 
2285   while (!(*sequence == '\0')) {
2286     if (*sequence == 'A' || *sequence == 'T' || *sequence == 'C' || *sequence == 'G' || *sequence == 'U' || *sequence == 'N')
2287       counter++;
2288 
2289     sequence++;
2290   }
2291   return counter;
2292 }
2293 
2294 
2295 static void
linear_fit(int * il_a,int * il_b,int * b_a,int * b_b)2296 linear_fit(int  *il_a,
2297            int  *il_b,
2298            int  *b_a,
2299            int  *b_b)
2300 {
2301   /*get fit parameters*/
2302   vrna_md_t     md;
2303   vrna_param_t  *P = NULL;
2304 
2305   set_model_details(&md);
2306 
2307   if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
2308     update_fold_params();
2309     P = vrna_params(&md);
2310     make_pair_matrix();
2311   }
2312 
2313   int     internal_loop_x[25] = {
2314     6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
2315   };
2316   int     internal_loop_y[25];
2317   int     i;
2318   for (i = 0; i < 25; i++) /* initialize the y-value for internal loops */
2319     internal_loop_y[i] = P->internal_loop[internal_loop_x[i]];
2320   int     sumx, sumy, sumxx, sumxy;
2321   sumx = sumy = sumxx = sumxy = 0;
2322   double  til_a, til_b;
2323   int     n = 25; /* only take data points till int_loop size <=20;  */
2324                   /* no measurement exists for larger loop &&  */
2325                   /* such large loop are not expected in RNA-RNA interactions. */
2326   for (i = 0; i < n; i++) {
2327     sumx  += internal_loop_x[i];
2328     sumy  += internal_loop_y[i];
2329     sumxx += internal_loop_x[i] * internal_loop_x[i];
2330     sumxy += internal_loop_x[i] * internal_loop_y[i];
2331   }
2332   if ((sumxx - (sumx * sumx) / n) < 1e-6) {
2333     free(P);
2334     printf("divisor for internal loop is too small %d\n", (sumxx - (sumx * sumx) / n));
2335     vrna_message_error("Problem in fitting");
2336   }
2337 
2338   til_a = (sumxy - (sumx * sumy) / n) / (sumxx - (sumx * sumx) / n);
2339   til_b = sumy / n - (til_a * sumx / n);
2340   *il_a = (int)til_a;
2341   *il_b = (int)til_b;
2342 
2343   /* ###bulge#### */
2344 
2345   n = 5;
2346   int     bulge_loop_x[5] = {
2347     2, 3, 4, 5, 6
2348   };
2349   int     bulge_loop_y[5];
2350   for (i = 0; i < n; i++)
2351     bulge_loop_y[i] = P->bulge[bulge_loop_x[i]];
2352   sumx = sumy = sumxx = sumxy = 0;
2353   double  tb_a, tb_b;
2354   for (i = 0; i < n; i++) {
2355     sumx  += bulge_loop_x[i];
2356     sumy  += bulge_loop_y[i];
2357     sumxx += bulge_loop_x[i] * bulge_loop_x[i];
2358     sumxy += bulge_loop_x[i] * bulge_loop_y[i];
2359   }
2360   tb_a  = (sumxy - (sumx * sumy) / n) / (sumxx - (sumx * sumx) / n);
2361   tb_b  = sumy / n - (tb_a * sumx / n);
2362   *b_a  = (int)tb_a;
2363   *b_b  = (int)tb_b;
2364   free(P);
2365   if ((sumxx - (sumx * sumx) / n) < 1e-6) {
2366     printf("divisor for bulge loop is too small %d\n", (sumxx - (sumx * sumx) / n));
2367     vrna_message_error("Problem in fitting");
2368   }
2369 }
2370 
2371 
2372 double
probcompute_silvana_98(char * s1,double k_concentration,double tris_concentration,double mg_concentration,double na_concentration,double probe_concentration)2373 probcompute_silvana_98(char   *s1,
2374                        double k_concentration,
2375                        double tris_concentration,
2376                        double mg_concentration,
2377                        double na_concentration,
2378                        double probe_concentration)
2379 {
2380   double  d_a               = 3.92 / 100000.0;  /*Parameters from the article of Owczarzy*/
2381   double  d_b               = 9.11 / 1000000;   /*Parameters from the article of Owczarzy*/
2382   double  d_c               = 6.26 / 100000;    /*Parameters from the article of Owczarzy*/
2383   double  d_d               = 1.42 / 100000;    /*Parameters from the article of Owczarzy*/
2384   double  d_e               = 4.82 / 10000;     /*Parameters from the article of Owczarzy*/
2385   double  d_f               = 5.25 / 10000;     /*Parameters from the article of Owczarzy*/
2386   double  d_g               = 8.31 / 100000;    /*Parameters from the article of Owczarzy*/
2387   double  d_magn_corr_value = 0;
2388   double  d_fgc             = 0;
2389   double  dH, dS;
2390 
2391   dS  = 0;
2392   dH  = 0;
2393   double  salt_correction;
2394   double  enthalpy[4][4] =
2395   { { -7.9, -8.4,  -7.8,  -7.2 },
2396     { -8.5, -8.0,  -10.6, -7.8 },
2397     { -8.2, -10.6, -8.0,  -8.4 },
2398     { -7.2, -8.2,  -8.5,  -7.9 } };
2399   double  entropy[4][4] = { {
2400                               -22.2, -22.4, -21.0, -20.4
2401                             },
2402                             { -22.7, -19.9, -27.2, -21.0 },
2403                             { -22.2, -27.2, -19.9, -22.4 },
2404                             { -21.3, -22.2, -22.7, -22.2 } };
2405   int     posst;
2406   posst = 0;
2407   int     converted = encode_char(toupper(s1[posst])) - 1;
2408   int     seqlen    = strlen(s1);
2409   double  Tm;
2410 
2411   /* terminal correction */
2412   if (s1[0] == 'G' || s1[0] == 'C') {
2413     dH  += 0.1;
2414     dS  += -2.8;
2415     d_fgc++;
2416   }
2417 
2418   if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2419     dH  += 2.3;
2420     dS  += 4.8;
2421   }
2422 
2423   if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2424     dH  += 0.1;
2425     dS  += -2.8;
2426   }
2427 
2428   if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2429     dH  += 2.3;
2430     dS  += 4.8;
2431   }
2432 
2433   /* compute dH and dH based on sequence */
2434   for (posst = 1; posst < seqlen; posst++) {
2435     if (toupper(s1[posst]) == 'G' || toupper(s1[posst]) == 'C')
2436       d_fgc++;
2437 
2438     int nextconverted = encode_char(toupper(s1[posst])) - 1;
2439     dH        += enthalpy[converted][nextconverted];
2440     dS        += entropy[converted][nextconverted];
2441     converted = nextconverted;
2442   }
2443   d_fgc = d_fgc / ((double)(seqlen));
2444   if (mg_concentration == 0) {
2445     dS  += 0.368 * (seqlen - 1) * log(na_concentration);
2446     Tm  = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2447     return Tm;
2448   } else {
2449     double  single_charged  = k_concentration + tris_concentration / 2 + na_concentration;
2450     double  d_ratio_ions    = sqrt(mg_concentration / single_charged);
2451     if (single_charged == 0) {
2452       d_magn_corr_value =
2453         d_a -
2454         d_b * log(mg_concentration) +
2455         d_fgc * (d_c + d_d * log(mg_concentration)) +
2456         1 / (2 * ((double)seqlen - 1)) *
2457         (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2458     } else {
2459       if (d_ratio_ions < 0.22) {
2460         d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1 / 100000 * log(single_charged) + 9.40 * 1 / 1000000 * pow(log(single_charged), 2);
2461       } else {
2462         if (d_ratio_ions < 6) {
2463           d_a = 3.92 / 100000 * (0.843 - 0.352 * sqrt(single_charged) * log(single_charged));
2464           d_d = 1.42 / 100000 * (1.279 - 4.03 / 1000 * log(single_charged) - 8.03 / 1000 * pow(log(single_charged), 2));
2465           d_g = 8.31 / 100000 * (0.486 - 0.258 * log(single_charged) + 5.25 / 1000 * pow(log(single_charged), 3));
2466 
2467           d_magn_corr_value = d_a - d_b * log
2468                                 (mg_concentration) + d_fgc *
2469                               (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2470         } else {
2471           d_magn_corr_value = d_a - d_b * log(mg_concentration) + d_fgc * (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2472         }
2473       }
2474     }
2475 
2476     double temp_Tm = dH * 1000 / (dS + 1.987 * log(probe_concentration / 4));
2477     Tm = 1 / (1 / temp_Tm + d_magn_corr_value) - 273.15;
2478     return Tm;
2479   }
2480 }
2481 
2482 
2483 double
probcompute_silvana_04(char * s1,double k_concentration,double tris_concentration,double mg_concentration,double na_concentration,double probe_concentration)2484 probcompute_silvana_04(char   *s1,
2485                        double k_concentration,
2486                        double tris_concentration,
2487                        double mg_concentration,
2488                        double na_concentration,
2489                        double probe_concentration)
2490 {
2491   double  d_a               = 3.92 / 100000.0;  /*Parameters from the article of Owczarzy*/
2492   double  d_b               = 9.11 / 1000000;   /*Parameters from the article of Owczarzy*/
2493   double  d_c               = 6.26 / 100000;    /*Parameters from the article of Owczarzy*/
2494   double  d_d               = 1.42 / 100000;    /*Parameters from the article of Owczarzy*/
2495   double  d_e               = 4.82 / 10000;     /*Parameters from the article of Owczarzy*/
2496   double  d_f               = 5.25 / 10000;     /*Parameters from the article of Owczarzy*/
2497   double  d_g               = 8.31 / 100000;    /*Parameters from the article of Owczarzy*/
2498   double  d_magn_corr_value = 0;
2499   double  d_fgc             = 0;
2500   double  dH, dS;
2501 
2502   dS  = 0;
2503   dH  = 0;
2504   double  salt_correction;
2505   double  enthalpy[4][4] =
2506   { { -7.6, -8.4, -7.8,  -7.2 },
2507     { -8.5, -8.0, -10.6, -7.8 },
2508     { -8.2, -9.8, -8.0,  -8.4 },
2509     { -7.2, -8.2, -8.5,  -7.6 } };
2510   double  entropy[4][4] = { {
2511                               -21.3, -22.4, -21.0, -20.4
2512                             },
2513                             { -22.7, -19.9, -27.2, -21.0 },
2514                             { -22.2, -24.4, -19.9, -22.4 },
2515                             { -21.3, -22.2, -22.7, -21.3 } };
2516   int     posst;
2517   posst = 0;
2518   int     converted = encode_char(toupper(s1[posst])) - 1;
2519   int     seqlen    = strlen(s1);
2520   double  Tm;
2521 
2522   /* terminal correction */
2523   if (s1[0] == 'G' || s1[0] == 'C') {
2524     dH  += 0.1;
2525     dS  += -2.85;
2526     d_fgc++;
2527   }
2528 
2529   if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2530     dH  += 2.4;
2531     dS  += 4.1;
2532   }
2533 
2534   if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2535     dH  += 0.1;
2536     dS  += -2.85;
2537   }
2538 
2539   if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2540     dH  += 2.4;
2541     dS  += 4.1;
2542   }
2543 
2544   /* compute dH and dH based on sequence */
2545   for (posst = 1; posst < seqlen; posst++) {
2546     if (toupper(s1[posst]) == 'G' || toupper(s1[posst]) == 'C')
2547       d_fgc++;
2548 
2549     int nextconverted = encode_char(toupper(s1[posst])) - 1;
2550     dH        += enthalpy[converted][nextconverted];
2551     dS        += entropy[converted][nextconverted];
2552     converted = nextconverted;
2553   }
2554   d_fgc = d_fgc / ((double)(seqlen));
2555   if (mg_concentration == 0) {
2556     dS  += 0.368 * (seqlen - 1) * log(na_concentration);
2557     Tm  = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2558     return Tm;
2559   } else {
2560     double  single_charged  = k_concentration + tris_concentration / 2 + na_concentration;
2561     double  d_ratio_ions    = sqrt(mg_concentration / single_charged);
2562     if (single_charged == 0) {
2563       d_magn_corr_value =
2564         d_a -
2565         d_b * log(mg_concentration) +
2566         d_fgc * (d_c + d_d * log(mg_concentration)) +
2567         1 / (2 * ((double)seqlen - 1)) *
2568         (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2569     } else {
2570       if (d_ratio_ions < 0.22) {
2571         d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1 / 100000 * log(single_charged) + 9.40 * 1 / 1000000 * pow(log(single_charged), 2);
2572       } else {
2573         if (d_ratio_ions < 6) {
2574           d_a = 3.92 / 100000 * (0.843 - 0.352 * sqrt(single_charged) * log(single_charged));
2575           d_d = 1.42 / 100000 * (1.279 - 4.03 / 1000 * log(single_charged) - 8.03 / 1000 * pow(log(single_charged), 2));
2576           d_g = 8.31 / 100000 * (0.486 - 0.258 * log(single_charged) + 5.25 / 1000 * pow(log(single_charged), 3));
2577 
2578           d_magn_corr_value = d_a - d_b * log
2579                                 (mg_concentration) + d_fgc *
2580                               (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2581         } else {
2582           d_magn_corr_value = d_a - d_b * log(mg_concentration) + d_fgc * (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2583         }
2584       }
2585     }
2586 
2587     double temp_Tm = dH * 1000 / (dS + 1.987 * log(probe_concentration / 4));
2588     Tm = 1 / (1 / temp_Tm + d_magn_corr_value) - 273.15;
2589     return Tm;
2590   }
2591 }
2592 
2593 
2594 double
probcompute_xia_98(char * s1,double na_concentration,double probe_concentration)2595 probcompute_xia_98(char   *s1,
2596                    double na_concentration,
2597                    double probe_concentration)
2598 {
2599   double  dH, dS;
2600 
2601   dS  = 0;
2602   dH  = 0;
2603   double  salt_correction;
2604   double  enthalpy[4][4] =
2605   { { -6.820,  -11.400, -10.480, -9.380  },
2606     { -10.440, -13.390, -10.640, -10.480 },
2607     { -12.440, -14.880, -13.390, -11.400 },
2608     { -7.690,  -12.440, -10.440, -6.820  } };
2609   double  entropy[4][4] =
2610   { { -19.0, -29.5, -27.1, -26.7 },
2611     { -26.9, -32.7, -26.7, -27.1 },
2612     { -32.5, -36.9, -32.7, -29.5 },
2613     { -20.5, -32.5, -26.9, -19.0 }, };
2614   int     posst;
2615   posst = 0;
2616   int     converted = encode_char(toupper(s1[posst])) - 1;
2617   int     seqlen    = strlen(s1);
2618   double  Tm;
2619 
2620   /* terminal correction */
2621   if (s1[0] == 'G' || s1[0] == 'C') {
2622     dH  += 3.61;
2623     dS  += -1.5;
2624   }
2625 
2626   if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2627     dH  += 7.73;
2628     dS  += 9.0;
2629   }
2630 
2631   if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2632     dH  += 3.61;
2633     dS  += -1.5;
2634   }
2635 
2636   if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2637     dH  += 7.73;
2638     dS  += 9.0;
2639   }
2640 
2641   /* compute dH and dH based on sequence */
2642   for (posst = 1; posst < seqlen; posst++) {
2643     int nextconverted = encode_char(toupper(s1[posst])) - 1;
2644     dH        += enthalpy[converted][nextconverted];
2645     dS        += entropy[converted][nextconverted];
2646     converted = nextconverted;
2647   }
2648   dS  += 0.368 * (seqlen - 1) * log(na_concentration);
2649   Tm  = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2650   return Tm;
2651 }
2652 
2653 
2654 double
probcompute_sug_95(char * s1,double na_concentration,double probe_concentration)2655 probcompute_sug_95(char   *s1,
2656                    double na_concentration,
2657                    double probe_concentration)
2658 {
2659   double  dH, dS;
2660 
2661   dS  = 0;
2662   dH  = 0;
2663   double  salt_correction;
2664   double  enthalpy[4][4] =
2665   { { -11.500, -7.800,  -7.000,  -8.300 },
2666     { -10.400, -12.800, -16.300, -9.100 },
2667     { -8.600,  -8.000,  -9.300,  -5.900 },
2668     { -7.800,  -5.500,  -9.000,  -7.800 } };
2669   double  entropy[4][4] =
2670   { { -36.4, -21.6, -19.7, -23.9 },
2671     { -28.4, -31.9, -47.1, -23.5 },
2672     { -22.9, -17.1, -23.2, -12.3 },
2673     { -23.2, -13.5, -26.1, -21.9 } };
2674   int     posst;
2675   posst = 0;
2676   int     converted = encode_char(toupper(s1[posst])) - 1;
2677   int     seqlen    = strlen(s1);
2678   double  Tm;
2679 
2680   /*  salt entropy correction von Ahsen 1999 */
2681   /* terminal correction */
2682   if (s1[0] == 'G' || s1[0] == 'C') {
2683     dH  += 0.95;
2684     dS  += -1.95;
2685   }
2686 
2687   if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2688     dH  += 0.95;
2689     dS  += -1.95;
2690   }
2691 
2692   if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2693     dH  += 0.95;
2694     dS  += -1.95;
2695   }
2696 
2697   if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2698     dH  += 0.95;
2699     dS  += -1.95;
2700   }
2701 
2702   /* compute dH and dH based on sequence */
2703   for (posst = 1; posst < seqlen; posst++) {
2704     int nextconverted = encode_char(toupper(s1[posst])) - 1;
2705     dH        += enthalpy[converted][nextconverted];
2706     dS        += entropy[converted][nextconverted];
2707     converted = nextconverted;
2708   }
2709   /*  salt entropy correction von Ahsen 1999 */
2710   dS  += 0.368 * (seqlen - 1) * log(na_concentration);
2711   Tm  = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2712   return Tm;
2713 }
2714 
2715 
2716 double
probcompute_newparameters(char * s1,double k_concentration,double tris_concentration,double mg_concentration,double na_concentration,double probe_concentration)2717 probcompute_newparameters(char    *s1,
2718                           double  k_concentration,
2719                           double  tris_concentration,
2720                           double  mg_concentration,
2721                           double  na_concentration,
2722                           double  probe_concentration)
2723 {
2724   /* ////////////////////////////////////////// */
2725   /* Folding Init */
2726   /* //////////////////////////////////////////   */
2727   vrna_md_t     md;
2728   vrna_param_t  *P = NULL;
2729 
2730   set_model_details(&md);
2731 
2732   if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
2733     update_fold_params();
2734     P = vrna_params(&md);
2735     make_pair_matrix();
2736   }
2737 
2738   /* ///////////////////////////////////////// */
2739   /* Salt Variable Declaration */
2740   /* ///////////////////////////////////////// */
2741 
2742   double  d_temp;             /* melting temperature */
2743   double  d_temp_na;          /*melting temperature in 1M na+ */
2744   double  d_salt_corr_value = 0.0;
2745   double  d_magn_corr_value = 0.0;
2746 
2747   double  d_fgc;              /* gc content */
2748   double  d_conc_monovalents  = k_concentration + na_concentration + tris_concentration / 2;
2749   double  d_ratio_ions        = sqrt(mg_concentration) / d_conc_monovalents;
2750   double  d_a                 = 3.92 / 100000.0;  /*Parameters from the article of Owczarzy*/
2751   double  d_b                 = 9.11 / 1000000;   /*Parameters from the article of Owczarzy*/
2752   double  d_c                 = 6.26 / 100000;    /*Parameters from the article of Owczarzy*/
2753   double  d_d                 = 1.42 / 100000;    /*Parameters from the article of Owczarzy*/
2754   double  d_e                 = 4.82 / 10000;     /*Parameters from the article of Owczarzy*/
2755   double  d_f                 = 5.25 / 10000;     /*Parameters from the article of Owczarzy*/
2756   double  d_g                 = 8.31 / 100000;    /*Parameters from the article of Owczarzy*/
2757 
2758   /* ////////////////////////////////////// */
2759   /* Sequence Variable Declaration */
2760   /* ////////////////////////////////////// */
2761 
2762   int seqlen = strlen(s1);
2763   int posst;
2764   posst = 0;
2765   int converted     = encode_char(toupper(s1[posst]));
2766   int revconverted  = abs(5 - converted);
2767 
2768   /* ////////////////////////////////////// */
2769   /* Energy Variable Declaration */
2770   /* ////////////////////////////////////// */
2771 
2772   double dT, dG, dS, dH;
2773   dS  = 0;
2774   dT  = 0;
2775   dH  = 0;
2776 
2777   /* ////////////////////////////////////// */
2778   /* Computation */
2779   /* ////////////////////////////////////// */
2780 
2781 
2782   /* GC-Content */
2783   d_fgc = 0;
2784   for (posst = 0; posst < seqlen; posst++)
2785     if (s1[posst] == 'G' || s1[posst] == 'C' || s1[posst] == 'g' || s1[posst] == 'c')
2786       d_fgc++;
2787 
2788   d_fgc = d_fgc / seqlen;
2789 
2790   /* dH dG dS */
2791   int type = pair[converted][revconverted];
2792   /* Add twice the duplex penalty */
2793   dG  = 2 * P->DuplexInit;
2794   dH  = 2 * DuplexInitdH;
2795   dS  = (dH - dG) / (37 + K0) * 10;
2796   if (type > 2) {
2797     dG  += P->TerminalAU;
2798     dH  += TerminalAUdH;
2799     dS  += (TerminalAUdH - P->TerminalAU) / (37 + K0) * 10;
2800   }
2801 
2802   for (posst = 1; posst < seqlen; posst++) {
2803     int nextconverted     = encode_char(toupper(s1[posst]));
2804     int nextrevconverted  = abs(5 - nextconverted);
2805     int nexttype          = pair[nextconverted][nextrevconverted];
2806     dG            += stack37[rtype[type]][nexttype];
2807     dH            += stackdH[rtype[type]][nexttype];
2808     dS            += (stackdH[rtype[type]][nexttype] - stack37[rtype[type]][nexttype]) / (37 + K0) * 10;
2809     converted     = nextconverted;
2810     revconverted  = nextrevconverted;
2811     type          = nexttype;
2812   }
2813   if (type > 2) {
2814     dG  += P->TerminalAU;
2815     dH  += TerminalAUdH;
2816     dS  += (TerminalAUdH - P->TerminalAU) / (37 + K0) * 10;
2817   }
2818 
2819   /* add initiation again because of symmetry. */
2820 
2821 
2822   if (mg_concentration == 0) {
2823     dS += 0.368 * (seqlen - 1) * log(na_concentration);
2824     double Tm;
2825     Tm = (10 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2826     return Tm;
2827   } else {
2828     double  single_charged  = k_concentration + tris_concentration / 2 + na_concentration;
2829     double  d_ratio_ions    = sqrt(mg_concentration / single_charged);
2830     if (single_charged == 0) {
2831       d_magn_corr_value =
2832         d_a -
2833         d_b * log(mg_concentration) +
2834         d_fgc * (d_c + d_d * log(mg_concentration)) +
2835         1 / (2 * ((double)seqlen - 1)) *
2836         (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2837     } else {
2838       if (d_ratio_ions < 0.22) {
2839         d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1 / 100000 * log(single_charged) + 9.40 * 1 / 1000000 * pow(log(single_charged), 2);
2840       } else {
2841         if (d_ratio_ions < 6) {
2842           d_a = 3.92 / 100000 * (0.843 - 0.352 * sqrt(single_charged) * log(single_charged));
2843           d_d = 1.42 / 100000 * (1.279 - 4.03 / 1000 * log(single_charged) - 8.03 / 1000 * pow(log(single_charged), 2));
2844           d_g = 8.31 / 100000 * (0.486 - 0.258 * log(single_charged) + 5.25 / 1000 * pow(log(single_charged), 3));
2845 
2846           d_magn_corr_value = d_a - d_b * log
2847                                 (mg_concentration) + d_fgc *
2848                               (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2849         } else {
2850           d_magn_corr_value = d_a - d_b * log(mg_concentration) + d_fgc * (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2851         }
2852       }
2853     }
2854 
2855     double  temp_Tm = dH * 10 / (dS + 1.987 * log(probe_concentration / 4));
2856     int     Tm      = 1 / (1 / temp_Tm + d_magn_corr_value) - 273.15;
2857     return Tm;
2858   }
2859 }
2860