1 /*
2 * Compute duplex structure of two RNA strands
3 *
4 * c Ivo L Hofacker
5 * Vienna RNA package
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include <ctype.h>
13 #include <dirent.h>
14 #include <math.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <time.h>
19 #include <sys/types.h>
20 #include <unistd.h>
21 #include "ViennaRNA/params/default.h"
22 #include "ViennaRNA/fold_vars.h"
23 #include "ViennaRNA/params/basic.h"
24 #include "ViennaRNA/utils/basic.h"
25 #include "ViennaRNA/utils/strings.h"
26 #include "ViennaRNA/ali_plex.h"
27 #include "ViennaRNA/alifold.h"
28 #include "ViennaRNA/utils/alignments.h"
29 #include "ViennaRNA/fold.h"
30 #include "ViennaRNA/pair_mat.h"
31 #include "ViennaRNA/plex.h"
32 #include "ViennaRNA/plotting/probabilities.h"
33 #include "ViennaRNA/plotting/alignments.h"
34 #include "ViennaRNA/params/io.h"
35 #include "ViennaRNA/io/utils.h"
36 #include "RNAplex_cmdl.h"
37
38
39 clock_t
BeginTimer()40 BeginTimer()
41 {
42 /* timer declaration */
43 clock_t Begin; /* initialize Begin */
44
45 Begin = clock(); /* start the timer */
46 return Begin;
47 }
48
49
50 clock_t
EndTimer(clock_t begin)51 EndTimer(clock_t begin)
52 {
53 clock_t End;
54
55 End = clock(); /* stop the timer */
56 return End;
57 }
58
59
60 /* --------------------end include timer */
61 extern int subopt_sorted;
62 /* static int print_struc(duplexT const *dup); */
63 static int **average_accessibility_target(char **names,
64 char **ALN,
65 int number,
66 char *access,
67 double verhaeltnis,
68 const int alignment_length,
69 int binaries,
70 int fast);
71
72
73 /* static int ** average_accessibility_query(char **names, char **ALN, int number, char *access, double verhaeltnis); */
74 static int get_max_u(const char *s,
75 char delim);
76
77
78 static int **read_plfold_i(char *fname,
79 const int beg,
80 const int end,
81 double verhaeltnis,
82 const int length,
83 int fast);
84
85
86 /**
87 * Compute the conditional per nucleotide directly
88 */
89 /* static int ** read_plfold_j(char *fname, const int beg, const int end,double verhaeltnis); */
90 static int **read_plfold_i_bin(char *fname,
91 const int beg,
92 const int end,
93 double verhaeltnis,
94 const int length,
95 int fast);
96
97
98 /* Compute and pass opening energies in case of f=2*/
99 static int get_sequence_length_from_alignment(char *sequence);
100
101
102 /* take as argument a list of hit from an alignment interaction */
103 /* Accessibility can currently not be provided */
104 static void aliprint_struct(FILE *Result, /* result file */
105 FILE *Target, /* target alignment */
106 FILE *Query,
107 const int WindowsLength);
108
109
110 static void linear_fit(int *a,
111 int *b,
112 int *c,
113 int *d); /* get linear fits for Iopn, Iextension, Bopn, Bextension, I^1open and I^1extension */
114
115
116 /**
117 * Compute Tm based on silvana's parameters (1999)
118 */
119 double probcompute_silvana_98(char *s1,
120 double k_concentration,
121 double tris_concentration,
122 double mg_concentration,
123 double na_concentration,
124 double probe_concentration);
125
126
127 double probcompute_silvana_04(char *s1,
128 double k_concentration,
129 double tris_concentration,
130 double mg_concentration,
131 double na_concentration,
132 double probe_concentration);
133
134
135 double probcompute_xia_98(char *s1,
136 double na_concentration,
137 double probe_concentration);
138
139
140 double probcompute_sug_95(char *s1,
141 double na_concentration,
142 double probe_concentration);
143
144
145 double probcompute_newparameters(char *s1,
146 double k_concentration,
147 double tris_concentration,
148 double mg_concentration,
149 double na_concentration,
150 double probe_concentration);
151
152
153 /*@unused@*/
154 static int convert_plfold_i(char *fname);/* convert test accessibility into bin accessibility. */
155
156
157 static char scale[] = "....,....1....,....2....,....3....,....4"
158 "....,....5....,....6....,....7....,....8";
159
160 /*--------------------------------------------------------------------------*/
161
162 int
main(int argc,char * argv[])163 main(int argc,
164 char *argv[])
165 {
166 struct RNAplex_args_info args_info;
167
168 #define MAX_NUM_NAMES 500
169 char *temp1[MAX_NUM_NAMES], *temp2[MAX_NUM_NAMES], *AS1[MAX_NUM_NAMES], *AS2[MAX_NUM_NAMES], *names1[MAX_NUM_NAMES], *names2[MAX_NUM_NAMES];
170 char *s1 = NULL, *s2 = NULL, *line, *cstruc = NULL, *structure = NULL;
171 char *line_q = NULL, *line_t = NULL;
172 char *tname = NULL;
173 char *qname = NULL;
174 char *access = NULL;
175 char fname[FILENAME_MAX_LENGTH];
176 char *ParamFile = NULL;
177 char *ns_bases = NULL, *c;
178
179 FILE *Result = NULL; /* file containing the results */
180 FILE *mRNA = NULL, *sRNA = NULL;
181
182 char *Resultfile = NULL;
183 int fold_constrained = 0;
184 int i, l, sym;
185
186 /* double kT, sfact=1.07; */
187 int istty, delta = -INF;
188 int noconv = 0;
189 int extension_cost = 0;
190 int deltaz = 0;
191 int alignment_length = 40;
192 int fast = 0;
193 int redraw = 0;
194 int binaries = 0;
195 int convert = 0;
196 /**
197 * Defines how many nucleotides has to be added at the begining and end of the target and query sequence in order to generate the structure figure
198 */
199 int WindowsLength = 0;
200 int alignment_mode = 0;
201 /**
202 * Define scaling factor for the accessibility. Default 1
203 */
204 double verhaeltnis = 1;
205 /**
206 * Probe Tm computation
207 */
208 double probe_concentration = 0.05;
209 double na_concentration = 1;
210 double mg_concentration = 0;
211 double k_concentration = 0;
212 double tris_concentration = 0;
213 int probe_mode = 0;
214 /*
215 #############################################
216 # check the command line parameters
217 #############################################
218 */
219 if (RNAplex_cmdline_parser(argc, argv, &args_info) != 0)
220 exit(1);
221
222 /*temperature*/
223 if (args_info.temp_given)
224 temperature = args_info.temp_arg;
225
226 /*query file*/
227 if (args_info.query_given)
228 qname = strdup(args_info.query_arg);
229
230 /*target file*/
231 if (args_info.target_given)
232 tname = strdup(args_info.target_arg);
233
234 /*interaction_length*/
235 alignment_length = args_info.interaction_length_arg;
236 /*extension_cost*/
237 extension_cost = args_info.extension_cost_arg;
238 /*duplex_distance*/
239 deltaz = args_info.duplex_distance_arg;
240 /*energy_threshold*/
241 delta = (int)(100 * args_info.energy_threshold_arg);
242 /*fast_folding*/
243 fast = args_info.fast_folding_arg;
244 /*accessibility*/
245 if (args_info.accessibility_dir_given)
246 access = strdup(args_info.accessibility_dir_arg);
247
248 /*produce ps arg*/
249 if (args_info.produce_ps_given) {
250 Resultfile = strdup(args_info.produce_ps_arg);
251 redraw = 1;
252 }
253
254 /*WindowLength*/
255 WindowsLength = args_info.WindowLength_arg;
256 /*scale_accessibility_arg*/
257 verhaeltnis = args_info.scale_accessibility_arg;
258 /*constraint_flag*/
259 if (args_info.constraint_given)
260 fold_constrained = 1;
261
262 /*paramFile*/
263 if (args_info.paramFile_given)
264 ParamFile = strdup(args_info.paramFile_arg);
265
266 /*binary*/
267 if (args_info.binary_given)
268 binaries = 1;
269
270 /*convert_to_bin*/
271 if (args_info.convert_to_bin_given)
272 convert = 1;
273
274 /*alignment_mode*/
275 if (args_info.alignment_mode_given)
276 alignment_mode = 1;
277
278 /*Probe mode*/
279 if (args_info.probe_mode_given)
280 probe_mode = 1;
281
282 /*sodium concentration*/
283 na_concentration = args_info.na_concentration_arg;
284 /*magnesium concentration*/
285 mg_concentration = args_info.mg_concentration_arg;
286 /*potassium concentration*/
287 k_concentration = args_info.k_concentration_arg;
288 /*tris concentration*/
289 tris_concentration = args_info.tris_concentration_arg;
290 /*probe concentration*/
291 probe_concentration = args_info.probe_concentration_arg;
292
293 /*Probe mode Salt concentration*/
294 if (ParamFile != NULL) {
295 if (!strcmp(ParamFile, "DNA"))
296 vrna_params_load_DNA_Mathews2004();
297 else
298 vrna_params_load(ParamFile, VRNA_PARAMETER_FORMAT_DEFAULT);
299 }
300
301 if (ns_bases != NULL) {
302 nonstandards = vrna_alloc(33);
303 c = ns_bases;
304 i = sym = 0;
305 if (*c == '-') {
306 sym = 1;
307 c++;
308 }
309
310 while (*c != '\0') {
311 if (*c != ',') {
312 nonstandards[i++] = *c++;
313 nonstandards[i++] = *c;
314 if ((sym) && (*c != *(c - 1))) {
315 nonstandards[i++] = *c;
316 nonstandards[i++] = *(c - 1);
317 }
318 }
319
320 c++;
321 }
322 }
323
324 int il_a, il_b, b_a, b_b;
325 linear_fit(&il_a, &il_b, &b_a, &b_b);
326 /**
327 * check if we have two input files
328 */
329
330
331 /**
332 * Here we test if the user wants to convert a bunch of text opening energy files
333 * into binary
334 */
335 if (probe_mode) {
336 if (qname || tname) {
337 vrna_message_error("No query/target file allowed in Tm probe mode\nPlease pipe your input into RNAplex\n");
338 /* get sequence */
339 } else {
340 /* fix temperature to 37C */
341 temperature = 37;
342 printf("Probe mode\n");
343 char *id_s1 = NULL;
344 char *s1 = NULL;
345 vrna_param_t *P = NULL;
346 vrna_md_t md;
347 set_model_details(&md);
348
349 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
350 update_fold_params();
351 P = vrna_params(&md);
352 make_pair_matrix();
353 }
354
355 /*Initialize parameter */
356 printf("Concentration K:%3.3f TNP:%3.3f Mg:%3.3f Na:%3.3f probe:%3.3f\n\n", k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
357 printf("%100s %7s %7s %7s %7s %7s\n", "sequence", "DDSL98", "DDSL04", "DRSU95", "RRXI98", "CURRENT");
358 do {
359 istty = isatty(fileno(stdout)) && isatty(fileno(stdin));
360 if ((line = vrna_read_line(stdin)) == NULL)
361 break;
362
363 /* skip empty lines, comment lines, name lines */
364 while ((*line == '*') || (*line == '\0') || (*line == '>')) {
365 printf("%s\n", line);
366 if (*line == '>') {
367 id_s1 = (char *)vrna_alloc(strlen(line) + 2);
368 (void)sscanf(line, "%s", id_s1);
369 memmove(id_s1, id_s1 + 1, strlen(id_s1));
370 }
371
372 free(line);
373 if ((line = vrna_read_line(stdin)) == NULL) {
374 free(id_s1);
375 break;
376 }
377 }
378 if ((line == NULL) || (strcmp(line, "@") == 0))
379 break;
380
381 s1 = (char *)vrna_alloc(strlen(line) + 1);
382 strcpy(s1, line);
383 /*compute duplex/entropy energy for the reverse complement*/;
384 double Tm;
385 Tm = probcompute_silvana_98(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
386 printf("%100s %*.2f ", s1, 6, Tm);
387 Tm = probcompute_silvana_04(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
388 printf("%*.2f ", 6, Tm);
389 Tm = probcompute_sug_95(line, na_concentration, probe_concentration);
390 printf("%*.2f ", 6, Tm);
391 Tm = probcompute_xia_98(line, na_concentration, probe_concentration);
392 printf("%*.2f ", 6, Tm);
393 Tm = probcompute_newparameters(line, k_concentration, tris_concentration, mg_concentration, na_concentration, probe_concentration);
394 printf("%*.2f\n", 6, Tm);
395 } while (1);
396 }
397
398 RNAplex_cmdline_parser_free(&args_info);
399 return 0;
400 }
401
402 if (convert && access) {
403 char pattern[8];
404 strcpy(pattern, "_openen");
405 DIR *dfd;
406 struct dirent *dir;
407 dfd = opendir(access);
408 /**
409 * Check if the directory where the opening energy files are located exists
410 */
411 if (dfd) {
412 while ((dir = readdir(dfd)) != NULL) {
413 int strPos = 0;
414 int PatternPos = 0;
415 char name[128];
416 strcpy(name, access);
417 strcat(name, "/");
418 strcat(name, dir->d_name);
419 while (name[strPos] != '\0') {
420 if (name[strPos] == pattern[0]) {
421 /**
422 * Check if the files we are looking ends in openen and not openen_bin,
423 * in order to avoid that RNAplex converts bin-file to bin-file
424 */
425 while (name[strPos + PatternPos] == pattern[PatternPos] && pattern[PatternPos] != '\0' && name[strPos + PatternPos] != '\0')
426 PatternPos++;
427 if (name[strPos + PatternPos] == '\0' && pattern[PatternPos] == '\0')
428 /**
429 * convert_plfold_i is the function that makes the hardwork
430 */
431 convert_plfold_i(name);
432 else
433 PatternPos = 0;
434 }
435
436 strPos++;
437 }
438 }
439 }
440
441 closedir(dfd);
442 RNAplex_cmdline_parser_free(&args_info);
443 return 0;
444 }
445
446 /**
447 * Here we check if the user wants to produce PS formatted structure files
448 * from existing RNAplex dot-parenthesis-formated results. Depending on
449 * the kind of input, Alignments or single sequence we will produce
450 * either a color annotated alignment or RNAfold-like structure,
451 * respectively.
452 */
453 if (Resultfile) {
454 /**
455 * Check single sequence case.
456 */
457 if (!(alignment_mode) && (tname && qname)) {
458 mRNA = fopen(tname, "r");
459 if (mRNA == NULL) {
460 printf("%s: Wrong target file name\n", tname);
461 RNAplex_cmdline_parser_free(&args_info);
462 return 0;
463 }
464
465 sRNA = fopen(qname, "r");
466 if (sRNA == NULL) {
467 printf("%s: Wrong query file name\n", qname);
468 RNAplex_cmdline_parser_free(&args_info);
469 return 0;
470 }
471
472 vrna_message_error("Sorry not implemented yet");
473 }/**
474 * We have no single sequence case. Check if we have alignments.
475 */
476 else if ((alignment_mode) && (tname && qname)) {
477 mRNA = fopen(tname, "r");
478 if (mRNA == NULL) {
479 printf("%s: Wrong target file name\n", tname);
480 RNAplex_cmdline_parser_free(&args_info);
481 return 0;
482 }
483
484 sRNA = fopen(qname, "r");
485 if (sRNA == NULL) {
486 printf("%s: Wrong query file name\n", qname);
487 RNAplex_cmdline_parser_free(&args_info);
488 return 0;
489 }
490
491 Result = fopen(Resultfile, "r");
492 if (sRNA == NULL) {
493 printf("%s: Wrong query file name\n", qname);
494 RNAplex_cmdline_parser_free(&args_info);
495 return 0;
496 }
497
498 aliprint_struct(Result, mRNA, sRNA, (const int)WindowsLength);
499 RNAplex_cmdline_parser_free(&args_info);
500 return 0;
501 } else {
502 /**
503 * User was not able to input either two alignments or two single sequence files
504 */
505 printf("Please enter either two alignments or single sequence files\n");
506 RNAplex_cmdline_parser_print_help();
507 }
508 }
509
510 #if 0
511 update_fold_params();
512 if (ParamFile != NULL)
513 read_parameter_file(ParamFile);
514
515 if (ns_bases != NULL) {
516 nonstandards = vrna_alloc(33);
517 c = ns_bases;
518 i = sym = 0;
519 if (*c == '-') {
520 sym = 1;
521 c++;
522 }
523
524 while (*c != '\0') {
525 if (*c != ',') {
526 nonstandards[i++] = *c++;
527 nonstandards[i++] = *c;
528 if ((sym) && (*c != *(c - 1))) {
529 nonstandards[i++] = *c;
530 nonstandards[i++] = *(c - 1);
531 }
532 }
533
534 c++;
535 }
536 }
537
538 int il_a, il_b, b_a, b_b;
539 linear_fit(&il_a, &il_b, &b_a, &b_b);
540 #endif
541 /**
542 * check if we have two input files
543 */
544 if ((qname == NULL && tname) || (qname && tname == NULL)) {
545 RNAplex_cmdline_parser_print_help();
546 } else if (qname && tname && !(alignment_mode)) {
547 /*free allocated memory of commandline parser*/
548 RNAplex_cmdline_parser_free(&args_info);
549
550 if (!fold_constrained) {
551 if (access) {
552 char *id_s1 = NULL;
553 mRNA = fopen(tname, "r");
554 if (mRNA == NULL) {
555 printf("%s: Wrong target file name\n", tname);
556 RNAplex_cmdline_parser_free(&args_info);
557 return 0;
558 }
559
560 sRNA = fopen(qname, "r");
561 if (sRNA == NULL) {
562 printf("%s: Wrong quert file name\n", qname);
563 RNAplex_cmdline_parser_free(&args_info);
564 return 0;
565 }
566
567 do {
568 /* main loop: continue until end of file */
569 if ((line_t = vrna_read_line(mRNA)) == NULL)
570 break;
571
572 /*parse line, get id for further accessibility fetching*/
573 while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
574 if (*line_t == '>') {
575 if (id_s1) {
576 free(id_s1);
577 id_s1 = NULL;
578 }
579
580 id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
581 (void)sscanf(line_t, "%s", id_s1);
582 memmove(id_s1, id_s1 + 1, strlen(id_s1));
583 free(line_t);
584 line_t = NULL;
585 }
586
587 if (line_t)
588 free(line_t);
589
590 if ((line_t = vrna_read_line(mRNA)) == NULL)
591 break;
592 }
593 /*append N's to the sequence in order to avoid boundary checking*/
594 if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
595 if (id_s1) {
596 free(id_s1);
597 id_s1 = NULL;
598 }
599
600 break;
601 }
602
603 s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
604 strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
605 strcat(s1, line_t);
606 free(line_t);
607 strcat(s1, "NNNNNNNNNN\0");
608 int s1_len;
609 s1_len = strlen(s1);
610 for (l = 0; l < s1_len; l++) {
611 s1[l] = toupper(s1[l]);
612 if (!noconv && s1[l] == 'T')
613 s1[l] = 'U';
614 }
615
616 /*read accessibility*/
617 int **access_s1;
618 char *file_s1;
619 file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s1) + strlen(access) + 20));
620 strcpy(file_s1, access);
621 strcat(file_s1, "/");
622 strcat(file_s1, id_s1);
623 strcat(file_s1, "_openen");
624 if (!binaries) {
625 access_s1 = read_plfold_i(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
626 } else {
627 strcat(file_s1, "_bin");
628 access_s1 = read_plfold_i_bin(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
629 }
630
631 if (access_s1 == NULL) {
632 printf("Accessibility file %s not found or corrupt, look at next target RNA\n", file_s1);
633 free(file_s1);
634 free(s1);
635 free(id_s1);
636 id_s1 = NULL;
637 continue;
638 }
639
640 do {
641 char *id_s2 = NULL;
642 if ((line_q = vrna_read_line(sRNA)) == NULL)
643 break;
644
645 while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
646 if (*line_q == '>') {
647 if (id_s2) {
648 free(id_s2);
649 id_s2 = NULL;
650 }
651
652 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
653 (void)sscanf(line_q, "%s", id_s2);
654 memmove(id_s2, id_s2 + 1, strlen(id_s2));
655 free(line_q);
656 line_q = NULL;
657 }
658
659 if (line_q)
660 free(line_q);
661
662 if ((line_q = vrna_read_line(sRNA)) == NULL)
663 break;
664 }
665 if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
666 if (id_s2) {
667 free(id_s2);
668 id_s2 = NULL;
669 }
670
671 break;
672 }
673
674 if (!id_s2) {
675 free(line_q);
676 continue;
677 }
678
679 s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
680 strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
681 strcat(s2, line_q);
682 free(line_q);
683 strcat(s2, "NNNNNNNNNN\0");
684 int s2_len;
685 s2_len = strlen(s2);
686 for (l = 0; l < s2_len; l++) {
687 s2[l] = toupper(s2[l]);
688 if (!noconv && s2[l] == 'T')
689 s2[l] = 'U';
690 }
691 int **access_s2;
692 char *file_s2;
693 file_s2 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s2) + strlen(access) + 20));
694 strcpy(file_s2, access);
695 strcat(file_s2, "/");
696 strcat(file_s2, id_s2);
697 strcat(file_s2, "_openen");
698 if (!binaries) {
699 access_s2 = read_plfold_i(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
700 } else {
701 strcat(file_s2, "_bin");
702 access_s2 = read_plfold_i_bin(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
703 }
704
705 if (access_s2 == NULL) {
706 printf("Accessibility file %s not found, look at next target RNA\n", file_s2);
707 free(file_s2);
708 free(s2);
709 free(id_s2);
710 id_s2 = NULL;
711 continue;
712 }
713
714 printf(">%s\n>%s\n", id_s1, id_s2);
715 double begin = BeginTimer();
716 Lduplexfold_XS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
717 float elapTicks;
718 float elapMilli;
719 elapTicks = (EndTimer(begin) - begin);
720 elapMilli = elapTicks / 1000;
721 /* printf("Millisecond %.2f\n",elapMilli); */
722 free(id_s2);
723 free(file_s2);
724 free(s2);
725 id_s2 = NULL;
726 i = access_s2[0][0];
727 while (--i > -1)
728 free(access_s2[i]);
729 free(access_s2);
730 } while (1);
731 free(id_s1);
732 id_s1 = NULL;
733 free(file_s1);
734 free(s1);
735 rewind(sRNA);
736 i = access_s1[0][0];
737 while (--i > -1)
738 free(access_s1[i]);
739 free(access_s1);
740 } while (1);
741 fclose(mRNA);
742 fclose(sRNA);
743 } else if (access == NULL) {
744 /* t and q are defined, but no accessibility is provided */
745 mRNA = fopen(tname, "r");
746 if (mRNA == NULL) {
747 printf("%s: Wrong target file name\n", tname);
748 RNAplex_cmdline_parser_free(&args_info);
749 return 0;
750 }
751
752 sRNA = fopen(qname, "r");
753 if (sRNA == NULL) {
754 printf("%s: Wrong query file name\n", qname);
755 RNAplex_cmdline_parser_free(&args_info);
756 return 0;
757 }
758
759 do {
760 /* main loop: continue until end of file */
761 char *id_s1 = NULL; /* header of the target file */
762 if ((line_t = vrna_read_line(mRNA)) == NULL)
763 break;
764
765 /*parse line, get id for further accessibility fetching*/
766 while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
767 if (*line_t == '>') {
768 if (id_s1) {
769 /* in case we have two header the one after the other */
770 free(id_s1); /* free the old header, a put the new one instead */
771 id_s1 = NULL;
772 }
773
774 id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
775 (void)sscanf(line_t, "%s", id_s1);
776 memmove(id_s1, id_s1 + 1, strlen(id_s1));
777 free(line_t);
778 line_t = NULL;
779 }
780
781 if (line_t)
782 free(line_t);
783
784 if ((line_t = vrna_read_line(mRNA)) == NULL)
785 break;
786 }
787 /*append N's to the sequence in order to avoid boundary checking*/
788 if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
789 if (id_s1) {
790 free(id_s1);
791 id_s1 = NULL;
792 }
793
794 break;
795 }
796
797 s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
798 strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
799 strcat(s1, line_t);
800 free(line_t);
801 strcat(s1, "NNNNNNNNNN\0");
802 int s1_len;
803 s1_len = strlen(s1);
804 for (l = 0; l < s1_len; l++) {
805 s1[l] = toupper(s1[l]);
806 if (!noconv && s1[l] == 'T')
807 s1[l] = 'U';
808 }
809 do {
810 /*read sRNA files*/
811 char *id_s2 = NULL;
812 if ((line_q = vrna_read_line(sRNA)) == NULL)
813 break;
814
815 while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
816 if (*line_q == '>') {
817 if (id_s2) {
818 free(id_s2);
819 id_s2 = NULL;
820 }
821
822 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
823 (void)sscanf(line_q, "%s", id_s2);
824 memmove(id_s2, id_s2 + 1, strlen(id_s2));
825 free(line_q);
826 line_q = NULL;
827 }
828
829 if (line_q)
830 free(line_q);
831
832 if ((line_q = vrna_read_line(sRNA)) == NULL)
833 break;
834 }
835 if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
836 if (id_s2)
837 free(id_s2);
838
839 break;
840 }
841
842 if (!id_s2) {
843 free(line_q);
844 continue;
845 }
846
847 s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
848 strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
849 strcat(s2, line_q);
850 free(line_q);
851 strcat(s2, "NNNNNNNNNN\0");
852 int s2_len;
853 s2_len = strlen(s2);
854 for (l = 0; l < s2_len; l++) {
855 s2[l] = toupper(s2[l]);
856 if (!noconv && s2[l] == 'T')
857 s2[l] = 'U';
858 }
859 printf(">%s\n>%s\n", id_s1, id_s2);
860 /* double begin = BeginTimer(); */
861 Lduplexfold(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
862 /* float elapTicks; */
863 /* float elapMilli; */
864 /* elapTicks = (EndTimer(begin) - begin); */
865 /* elapMilli = elapTicks/1000; */
866 /* printf("Millisecond %.2f\n",elapMilli); */
867 /* printf("\n"); */
868 free(id_s2);
869 id_s2 = NULL;
870 free(s2);
871 } while (1);
872 free(id_s1);
873 id_s1 = NULL;
874 free(s1);
875 rewind(sRNA);
876 } while (1);
877 fclose(mRNA);
878 fclose(sRNA);
879 }
880 } else {
881 if (access) {
882 char *id_s1 = NULL;
883 mRNA = fopen(tname, "r");
884 if (mRNA == NULL) {
885 printf("%s: Wrong target file name\n", tname);
886 RNAplex_cmdline_parser_free(&args_info);
887 return 0;
888 }
889
890 sRNA = fopen(qname, "r");
891 if (sRNA == NULL) {
892 printf("%s: Wrong query file name\n", qname);
893 RNAplex_cmdline_parser_free(&args_info);
894 return 0;
895 }
896
897 do {
898 /* main loop: continue until end of file */
899 if ((line_t = vrna_read_line(mRNA)) == NULL)
900 break;
901
902 /*parse line, get id for further accessibility fetching*/
903 while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
904 if (*line_t == '>') {
905 if (id_s1) {
906 /* in case we have two header the one after the other */
907 free(id_s1); /* free the old header, a put the new one instead */
908 id_s1 = NULL;
909 }
910
911 id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
912 (void)sscanf(line_t, "%s", id_s1);
913 memmove(id_s1, id_s1 + 1, strlen(id_s1));
914 free(line_t);
915 line_t = NULL;
916 }
917
918 if (line_t)
919 free(line_t);
920
921 if ((line_t = vrna_read_line(mRNA)) == NULL)
922 break;
923 }
924 /*append N's to the sequence in order to avoid boundary checking*/
925 if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
926 if (id_s1) {
927 free(id_s1);
928 id_s1 = NULL;
929 }
930
931 break;
932 }
933
934 s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
935 strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
936 strcat(s1, line_t);
937 free(line_t);
938 strcat(s1, "NNNNNNNNNN\0");
939 int s1_len;
940 s1_len = strlen(s1);
941 for (l = 0; l < s1_len; l++) {
942 s1[l] = toupper(s1[l]);
943 if (!noconv && s1[l] == 'T')
944 s1[l] = 'U';
945 }
946
947 /*read accessibility*/
948 int **access_s1;
949 char *file_s1;
950 file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s1) + strlen(access) + 20));
951 strcpy(file_s1, access);
952 strcat(file_s1, "/");
953 strcat(file_s1, id_s1);
954 strcat(file_s1, "_openen");
955 if (!binaries) {
956 access_s1 = read_plfold_i(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
957 } else {
958 strcat(file_s1, "_bin");
959 access_s1 = read_plfold_i_bin(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
960 }
961
962 if (access_s1 == NULL) {
963 printf("Accessibility file %s not found, look at next target RNA\n", file_s1);
964 free(file_s1);
965 free(s1);
966 free(id_s1);
967 id_s1 = NULL;
968 continue;
969 }
970
971 do {
972 char *id_s2 = NULL;
973 /*read sRNA files*/
974 if ((line_q = vrna_read_line(sRNA)) == NULL)
975 break;
976
977 while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
978 if (*line_q == '>') {
979 if (id_s2) {
980 free(id_s2);
981 id_s2 = NULL;
982 }
983
984 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
985 (void)sscanf(line_q, "%s", id_s2);
986 memmove(id_s2, id_s2 + 1, strlen(id_s2));
987 free(line_q);
988 line_q = NULL;
989 }
990
991 if (line_q)
992 free(line_q);
993
994 if ((line_q = vrna_read_line(sRNA)) == NULL)
995 break;
996 }
997 if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
998 if (id_s2)
999 free(id_s2);
1000
1001 break;
1002 }
1003
1004 /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
1005 s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
1006 strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1007 strcat(s2, line_q);
1008 free(line_q);
1009 strcat(s2, "NNNNNNNNNN\0");
1010 int s2_len;
1011 s2_len = strlen(s2);
1012 for (l = 0; l < s2_len; l++) {
1013 s2[l] = toupper(s2[l]);
1014 if (!noconv && s2[l] == 'T')
1015 s2[l] = 'U';
1016 }
1017 structure = (char *)vrna_alloc((unsigned)s2_len + 1);
1018 cstruc = vrna_read_line(sRNA);
1019 if (cstruc != NULL) {
1020 int dn3 = strlen(cstruc) - (s2_len - 20);
1021 strcpy(structure, "..........");
1022 strncat(structure, cstruc, s2_len - 20);
1023 if (dn3 >= 0) {
1024 strcat(structure, "..........\0");
1025 } else {
1026 while (dn3++)
1027 strcat(structure, ".");
1028 strcat(structure, "\0");
1029 }
1030
1031 free(cstruc);
1032 } else {
1033 vrna_message_warning("constraints missing");
1034 }
1035
1036 int a = strchr(structure, '|') - structure;
1037 int b = strrchr(structure, '|') - structure;
1038 if (alignment_length < b - a + 1)
1039 vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1040
1041 int **access_s2;
1042 char *file_s2;
1043 file_s2 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s2) + strlen(access) + 20));
1044 strcpy(file_s2, access);
1045 strcat(file_s2, "/");
1046 strcat(file_s2, id_s2);
1047 strcat(file_s2, "_openen");
1048 if (!binaries) {
1049 access_s2 = read_plfold_i(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1050 } else {
1051 strcat(file_s2, "_bin");
1052 access_s2 = read_plfold_i_bin(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1053 }
1054
1055 if (access_s2 == NULL) {
1056 printf("Accessibility file %s not found, look at next target RNA\n", file_s2);
1057 free(file_s2);
1058 free(s2);
1059 free(id_s2);
1060 continue;
1061 }
1062
1063 printf(">%s\n>%s\n", id_s1, id_s2);
1064 Lduplexfold_CXS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
1065 free(id_s2);
1066 free(file_s2);
1067 free(s2);
1068 i = access_s2[0][0];
1069 while (--i > -1)
1070 free(access_s2[i]);
1071 free(access_s2);
1072 free(structure);
1073 } while (1);
1074 free(id_s1);
1075 id_s1 = NULL;
1076 free(file_s1);
1077 free(s1);
1078 rewind(sRNA);
1079 i = access_s1[0][0];
1080 while (--i > -1)
1081 free(access_s1[i]);
1082 free(access_s1);
1083 } while (1);
1084 fclose(mRNA);
1085 fclose(sRNA);
1086 } else if (access == NULL) {
1087 /* t and q are defined, but no accessibility is provided */
1088 char *id_s1 = NULL;
1089 mRNA = fopen(tname, "r");
1090 if (mRNA == NULL) {
1091 printf("%s: Wrong target file name\n", tname);
1092 RNAplex_cmdline_parser_free(&args_info);
1093 return 0;
1094 }
1095
1096 sRNA = fopen(qname, "r");
1097 if (sRNA == NULL) {
1098 printf("%s: Wrong query file name\n", qname);
1099 RNAplex_cmdline_parser_free(&args_info);
1100 return 0;
1101 }
1102
1103 do {
1104 /* main loop: continue until end of file */
1105 if ((line_t = vrna_read_line(mRNA)) == NULL)
1106 break;
1107
1108 /*parse line, get id for further accessibility fetching*/
1109 while ((*line_t == '*') || (*line_t == '\0') || (*line_t == '>')) {
1110 if (*line_t == '>') {
1111 if (id_s1) {
1112 /* in case we have two header the one after the other */
1113 free(id_s1); /* free the old header, a put the new one instead */
1114 id_s1 = NULL;
1115 }
1116
1117 id_s1 = (char *)vrna_alloc(strlen(line_t) + 2);
1118 (void)sscanf(line_t, "%s", id_s1);
1119 memmove(id_s1, id_s1 + 1, strlen(id_s1));
1120 free(line_t);
1121 line_t = NULL;
1122 }
1123
1124 if (line_t)
1125 free(line_t);
1126
1127 if ((line_t = vrna_read_line(mRNA)) == NULL)
1128 break;
1129 }
1130 /*append N's to the sequence in order to avoid boundary checking*/
1131 /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
1132 if ((line_t == NULL) || (strcmp(line_t, "@") == 0)) {
1133 if (id_s1) {
1134 free(id_s1);
1135 id_s1 = NULL;
1136 }
1137
1138 break;
1139 }
1140
1141 s1 = (char *)vrna_alloc(strlen(line_t) + 1 + 20);
1142 strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1143 strcat(s1, line_t);
1144 free(line_t);
1145 strcat(s1, "NNNNNNNNNN\0");
1146 int s1_len;
1147 s1_len = strlen(s1);
1148 for (l = 0; l < s1_len; l++) {
1149 s1[l] = toupper(s1[l]);
1150 if (!noconv && s1[l] == 'T')
1151 s1[l] = 'U';
1152 }
1153 do {
1154 char *id_s2 = NULL;
1155 /*read sRNA files*/
1156 if ((line_q = vrna_read_line(sRNA)) == NULL)
1157 break;
1158
1159 while ((*line_q == '*') || (*line_q == '\0') || (*line_q == '>')) {
1160 if (*line_q == '>') {
1161 if (id_s2) {
1162 free(id_s2);
1163 id_s2 = NULL;
1164 }
1165
1166 id_s2 = (char *)vrna_alloc(strlen(line_q) + 2);
1167 (void)sscanf(line_q, "%s", id_s2);
1168 memmove(id_s2, id_s2 + 1, strlen(id_s2));
1169 free(line_q);
1170 line_q = NULL;
1171 }
1172
1173 if (line_q)
1174 free(line_q);
1175
1176 if ((line_q = vrna_read_line(sRNA)) == NULL)
1177 break;
1178 }
1179 /* if ((line_t ==NULL) || (strcmp(line_t, "@") == 0)) break; */
1180 if ((line_q == NULL) || (strcmp(line_q, "@") == 0)) {
1181 if (id_s2)
1182 free(id_s2);
1183
1184 break;
1185 }
1186
1187 s2 = (char *)vrna_alloc(strlen(line_q) + 1 + 20);
1188 strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1189 strcat(s2, line_q);
1190 free(line_q);
1191 strcat(s2, "NNNNNNNNNN\0");
1192 int s2_len;
1193 s2_len = strlen(s2);
1194 for (l = 0; l < s2_len; l++) {
1195 s2[l] = toupper(s2[l]);
1196 if (!noconv && s2[l] == 'T')
1197 s2[l] = 'U';
1198 }
1199 structure = (char *)vrna_alloc((unsigned)s2_len + 1);
1200 cstruc = vrna_read_line(sRNA);
1201 if (cstruc != NULL) {
1202 int dn3 = strlen(cstruc) - (s2_len - 20);
1203 strcpy(structure, "..........");
1204 strncat(structure, cstruc, s2_len - 20);
1205 if (dn3 >= 0) {
1206 strcat(structure, "..........\0");
1207 } else {
1208 while (dn3++)
1209 strcat(structure, ".");
1210 strcat(structure, "\0");
1211 }
1212
1213 free(cstruc);
1214 } else {
1215 vrna_message_warning("constraints missing");
1216 }
1217
1218 int a = strchr(structure, '|') - structure;
1219 int b = strrchr(structure, '|') - structure;
1220 if (alignment_length < b - a + 1)
1221 vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1222
1223 printf(">%s\n>%s\n", id_s1, id_s2);
1224 double begin = BeginTimer();
1225 Lduplexfold_C(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);
1226 float elapTicks;
1227 float elapMilli;
1228 elapTicks = EndTimer(begin);
1229 elapMilli = elapTicks / 1000;
1230 /* printf("Millisecond %.2f\n",elapMilli); */
1231 free(id_s2);
1232 free(s2);
1233 free(structure);
1234 } while (1);
1235 free(id_s1);
1236 id_s1 = NULL;
1237 free(s1);
1238 rewind(sRNA);
1239 } while (1);
1240 fclose(mRNA);
1241 fclose(sRNA);
1242 }
1243 }
1244 } else if (!qname && !tname && !(alignment_mode)) {
1245 istty = isatty(fileno(stdout)) && isatty(fileno(stdin));
1246
1247 if ((fold_constrained) && (istty)) {
1248 printf("Input constraints using the following notation:\n");
1249 printf("| : paired with another base\n");
1250 printf(". : no constraint at all\n");
1251 printf("x : base must not pair\n");
1252 }
1253
1254 do {
1255 /* main loop: continue until end of file */
1256 /* duplexT mfe, *subopt */
1257 char *id_s1 = NULL, *id_s2 = NULL;
1258 if (istty) {
1259 printf("\nInput two sequences (one line each); @ to quit\n");
1260 printf("%s\n", scale);
1261 }
1262
1263 fname[0] = '\0';
1264
1265 if ((line = vrna_read_line(stdin)) == NULL)
1266 break;
1267
1268 /* skip empty lines, comment lines, name lines */
1269 while ((*line == '*') || (*line == '\0') || (*line == '>')) {
1270 printf("%s\n", line);
1271 if (*line == '>') {
1272 id_s1 = (char *)vrna_alloc(strlen(line) + 2);
1273 (void)sscanf(line, "%s", id_s1);
1274 memmove(id_s1, id_s1 + 1, strlen(id_s1));
1275 }
1276
1277 free(line);
1278 if ((line = vrna_read_line(stdin)) == NULL) {
1279 free(id_s1);
1280 break;
1281 }
1282 }
1283 if ((line == NULL) || (strcmp(line, "@") == 0))
1284 break;
1285
1286 s1 = (char *)vrna_alloc(strlen(line) + 1 + 20);
1287 strcpy(s1, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1288 strcat(s1, line);
1289 free(line);
1290 strcat(s1, "NNNNNNNNNN\0");
1291 if ((line = vrna_read_line(stdin)) == NULL)
1292 break;
1293
1294 /* skip comment lines and get filenames */
1295
1296 while ((*line == '*') || (*line == '\0') || (*line == '>')) {
1297 printf("%s\n", line);
1298 if (*line == '>') {
1299 id_s2 = (char *)vrna_alloc(strlen(line) + 2);
1300 (void)sscanf(line, "%s", id_s2);
1301 memmove(id_s2, id_s2 + 1, strlen(id_s2));
1302 }
1303
1304 free(line);
1305 if ((line = vrna_read_line(stdin)) == NULL) {
1306 free(id_s2);
1307 break;
1308 }
1309 }
1310 if ((line == NULL) || (strcmp(line, "@") == 0))
1311 break;
1312
1313 s2 = (char *)vrna_alloc(strlen(line) + 1 + 20);
1314 strcpy(s2, "NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
1315 strcat(s2, line);
1316 free(line);
1317 strcat(s2, "NNNNNNNNNN\0");
1318
1319 int n1 = strlen(s1);
1320 int n2 = strlen(s2);
1321
1322
1323 structure = (char *)vrna_alloc((unsigned)n2 + 1);
1324 if (fold_constrained) {
1325 cstruc = vrna_read_line(stdin);
1326 if (cstruc != NULL && (cstruc[0] == '>')) {
1327 int dn3 = strlen(cstruc) - (n2 - 20);
1328 strcpy(structure, "..........");
1329 strncat(structure, cstruc, n2 - 20);
1330 if (dn3 >= 0) {
1331 strcat(structure, "..........\0");
1332 } else {
1333 while (dn3++)
1334 strcat(structure, ".");
1335 strcat(structure, "\0");
1336 }
1337
1338 free(cstruc);
1339 } else {
1340 vrna_message_warning("constraints missing");
1341 }
1342 }
1343
1344 for (l = 0; l < n1; l++) {
1345 s1[l] = toupper(s1[l]);
1346 if (!noconv && s1[l] == 'T')
1347 s1[l] = 'U';
1348 }
1349 for (l = 0; l < n2; l++) {
1350 s2[l] = toupper(s2[l]);
1351 if (!noconv && s2[l] == 'T')
1352 s2[l] = 'U';
1353 }
1354 if (istty)
1355 printf("lengths = %d,%d\n", (int)strlen(s1), (int)strlen(s2));
1356
1357 if (alignment_length == 0)
1358 alignment_length = 40;
1359
1360 if (access == NULL) {
1361 if (!fold_constrained) {
1362 Lduplexfold(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
1363 } else {
1364 int a = strchr(structure, '|') - structure;
1365 int b = strrchr(structure, '|') - structure;
1366 if (alignment_length < b - a + 1)
1367 vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1368
1369 Lduplexfold_C(s1, s2, delta, extension_cost, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);
1370 }
1371 } else {
1372 int **access_s1, **access_s2;
1373 char *file_s1, *file_s2;
1374 int s1_len, s2_len;
1375 s1_len = strlen(s1);
1376 s2_len = strlen(s2);
1377 if (!(id_s1 && id_s2))
1378 vrna_message_error("The fasta files has no header information..., cant fetch accessibility file\n");
1379
1380 file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s1) + strlen(access) + 20));
1381 file_s2 = (char *)vrna_alloc(sizeof(char) * (strlen(id_s2) + strlen(access) + 20));
1382 strcpy(file_s1, access);
1383 strcpy(file_s2, access);
1384 strcat(file_s1, "/");
1385 strcat(file_s2, "/");
1386 strcat(file_s1, id_s1);
1387 strcat(file_s2, id_s2);
1388 strcat(file_s1, "_openen");
1389 strcat(file_s2, "_openen");
1390 if (!binaries) {
1391 access_s1 = read_plfold_i(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
1392 } else {
1393 strcat(file_s1, "_bin");
1394 access_s1 = read_plfold_i_bin(file_s1, 1, s1_len, verhaeltnis, alignment_length, fast);
1395 }
1396
1397 if (access_s1 == NULL) {
1398 free(file_s1);
1399 free(s1);
1400 free(s2);
1401 free(file_s2);
1402 free(id_s1);
1403 free(id_s2);
1404 continue;
1405 }
1406
1407 if (!binaries) {
1408 access_s2 = read_plfold_i(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1409 } else {
1410 strcat(file_s2, "_bin");
1411 access_s2 = read_plfold_i_bin(file_s2, 1, s2_len, verhaeltnis, alignment_length, fast);
1412 }
1413
1414 if (access_s2 == NULL) {
1415 free(access_s1);
1416 free(file_s1);
1417 free(s1);
1418 free(s2);
1419 free(file_s2);
1420 free(id_s1);
1421 free(id_s2);
1422 continue;
1423 }
1424
1425 if (!fold_constrained) {
1426 Lduplexfold_XS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
1427 } else {
1428 int a = strchr(structure, '|') - structure;
1429 int b = strrchr(structure, '|') - structure;
1430 if (alignment_length < b - a + 1)
1431 vrna_message_error("Maximal duplex length (-l option) is smaller than constraint on the structures\n. Please adjust the -l option accordingly\n");
1432
1433 Lduplexfold_CXS(s1, s2, (const int **)access_s1, (const int **)access_s2, delta, alignment_length, deltaz, fast, structure, il_a, il_b, b_a, b_b);/* , target_dead, query_dead); */
1434 }
1435
1436 i = access_s1[0][0];
1437 while (--i > -1)
1438 free(access_s1[i]);
1439 i = access_s2[0][0];
1440 while (--i > -1)
1441 free(access_s2[i]);
1442 free(access_s1);
1443 free(access_s2);
1444 free(file_s1);
1445 free(file_s2);
1446 free(id_s1);
1447 id_s1 = NULL;
1448 free(id_s2);
1449 id_s2 = NULL;
1450 }
1451
1452 free(s1);
1453 free(s2);
1454 s1 = NULL;
1455 s2 = NULL;
1456 free(structure);
1457 printf("\n");
1458 if (id_s1)
1459 free(id_s1);
1460
1461 if (id_s2)
1462 free(id_s2);
1463 } while (1);
1464 if (s1)
1465 free(s1);
1466
1467 if (s2)
1468 free(s2);
1469
1470 /* if(id_s1){free(id_s1);}if(id_s2){free(id_s2);} */
1471 } else if (qname && tname && alignment_mode) {
1472 int n_seq, n_seq2;
1473 mRNA = fopen(tname, "r");
1474 if (mRNA == NULL) {
1475 printf("%s: Wrong target file name\n", tname);
1476 RNAplex_cmdline_parser_free(&args_info);
1477 return 0;
1478 }
1479
1480 sRNA = fopen(qname, "r");
1481 if (sRNA == NULL) {
1482 printf("%s: Wrong query file name\n", qname);
1483 RNAplex_cmdline_parser_free(&args_info);
1484 return 0;
1485 }
1486
1487 n_seq = read_clustal(mRNA, temp1, names1);
1488 n_seq2 = read_clustal(sRNA, temp2, names2);
1489 fclose(mRNA);
1490 fclose(sRNA);
1491 if (n_seq != n_seq2) {
1492 for (i = 0; temp1[i]; i++) {
1493 free(temp1[i]);
1494 free(temp2[i]);
1495 }
1496 vrna_message_error("unequal number of seqs in alignments");
1497 }
1498
1499 for (i = 0; temp1[i]; i++) {
1500 AS1[i] = (char *)vrna_alloc((strlen(temp1[i]) + 21) * sizeof(char));
1501 AS2[i] = (char *)vrna_alloc((strlen(temp2[i]) + 21) * sizeof(char));
1502 strcpy(AS1[i], "NNNNNNNNNN");
1503 strcpy(AS2[i], "NNNNNNNNNN");
1504 strcat(AS1[i], temp1[i]);
1505 strcat(AS2[i], temp2[i]);
1506 strcat(AS1[i], "NNNNNNNNNN\0");
1507 strcat(AS2[i], "NNNNNNNNNN\0");
1508 }
1509 for (i = 0; temp1[i]; i++) {
1510 free(temp1[i]);
1511 free(temp2[i]);
1512 }
1513 AS1[n_seq] = NULL;
1514 AS2[n_seq] = NULL;
1515
1516
1517 if (access == NULL) {
1518 aliLduplexfold((const char **)AS1, (const char **)AS2, n_seq * delta, extension_cost, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
1519 } else {
1520 int **target_access = NULL, **query_access = NULL;
1521 target_access = average_accessibility_target(names1, AS1, n_seq, access, verhaeltnis, alignment_length, binaries, fast); /* get averaged accessibility for alignments */
1522 query_access = average_accessibility_target(names2, AS2, n_seq, access, verhaeltnis, alignment_length, binaries, fast);
1523 if (!(target_access && query_access)) {
1524 for (i = 0; AS1[i]; i++) {
1525 free(AS1[i]);
1526 free(names1[i]);
1527 free(AS2[i]);
1528 free(names2[i]);
1529 }
1530 RNAplex_cmdline_parser_free(&args_info);
1531 return 0;
1532 }
1533
1534 aliLduplexfold_XS((const char **)AS1, (const char **)AS2, (const int **)target_access, (const int **)query_access, n_seq * delta, alignment_length, deltaz, fast, il_a, il_b, b_a, b_b);
1535 if (!(target_access == NULL)) {
1536 i = target_access[0][0];
1537 while (--i > -1)
1538 free(target_access[i]);
1539 free(target_access);
1540 }
1541
1542 if (!(query_access == NULL)) {
1543 i = query_access[0][0];
1544 while (--i > -1)
1545 free(query_access[i]);
1546 free(query_access);
1547 }
1548 }
1549
1550 for (i = 0; AS1[i]; i++) {
1551 free(AS1[i]);
1552 free(names1[i]);
1553 free(AS2[i]);
1554 free(names2[i]);
1555 }
1556 }
1557
1558 if (access) {
1559 free(access);
1560 access = NULL;
1561 }
1562
1563 if (qname) {
1564 free(tname);
1565 access = NULL;
1566 }
1567
1568 if (tname) {
1569 free(qname);
1570 access = NULL;
1571 }
1572
1573 RNAplex_cmdline_parser_free(&args_info);
1574 return 0;
1575 }
1576
1577
1578 #if 0
1579 static int
1580 print_struc(duplexT const *dup)
1581 {
1582 /*this function print out the structure of a hybridization site
1583 * and return the value from which one can begin to look for the next
1584 * non-overlappig hybrid*/
1585
1586 int l1;
1587 float energy = dup->energy * 0.01;
1588 int init = dup->end;
1589
1590 l1 = strchr(dup->structure, '&') - dup->structure;
1591 printf("%s %3d,%-3d : %3d,%-3d (%5.2f)\n", dup->structure, init - l1 - 1,
1592 init - 2, dup->j + l1 + 2 - strlen(dup->structure), dup->j, energy);
1593 return init - l1 - 1;
1594 }
1595
1596
1597 #endif
1598
1599 static int **
read_plfold_i(char * fname,const int beg,const int end,double verhaeltnis,const int length,int fast)1600 read_plfold_i(char *fname,
1601 const int beg,
1602 const int end,
1603 double verhaeltnis,
1604 const int length,
1605 int fast)
1606 {
1607 double begin = BeginTimer();
1608 FILE *in = fopen(fname, "r");
1609
1610 if (in == NULL) {
1611 vrna_message_warning("File ' %s ' open error", fname);
1612 return NULL;
1613 }
1614
1615 int i, j;
1616 int **access;
1617 int offset, temp;
1618 temp = 0;
1619 offset = 0;
1620 int seq_pos;
1621 int beg_r, end_r;
1622 beg_r = beg;
1623 end_r = end;
1624 char tmp[2048] = {
1625 0x0
1626 };
1627 /* char *ptr; */
1628 if (fgets(tmp, sizeof(tmp), in) == 0)
1629 perror("Empty File");
1630
1631 if (strchr(tmp, '>')) {
1632 vrna_message_warning("file %s is not in RNAplfold format", fname);
1633 return NULL;
1634 }
1635
1636 if (fgets(tmp, sizeof(tmp), in) == 0) {
1637 vrna_message_warning("No accessibility data");
1638 return NULL;
1639 }
1640
1641 int dim_x;
1642 dim_x = get_max_u(tmp, '\t');
1643 if (length > dim_x && fast == 0) {
1644 printf("Interaction length %d is larger than the length of the largest region %d \nfor which the opening energy was computed (-u parameter of RNAplfold)\n", length, dim_x);
1645 printf("Please recompute your profiles with a larger -u or set -l to a smaller interaction length\n");
1646 return NULL;
1647 }
1648
1649 access = (int **)vrna_alloc(sizeof(int *) * (dim_x + 2));
1650 for (i = 0; i < dim_x + 2; i++)
1651 access[i] = (int *)vrna_alloc(sizeof(int) * (end_r - beg_r + 1)); /* normally +1 */
1652 for (i = 0; i < end_r - beg_r + 1; i++)
1653 for (j = 0; j < dim_x + 2; j++)
1654 access[j][i] = INF;
1655 access[0][0] = dim_x + 2;
1656 while (fgets(tmp, sizeof(tmp), in) != 0 && --end_r > 10) {
1657 /* read a record, before we have --end_r > 10*/
1658 float n;
1659 /* int i; */
1660 /* int u; */
1661 beg_r--;
1662 if (beg_r < 1) {
1663 if (sscanf(tmp, "%d\t%n", &seq_pos, &temp) == 1) {
1664 offset += temp;
1665 int count;
1666 count = 1;
1667 while (sscanf(tmp + offset, "%f%n", &n, &temp) == 1) {
1668 offset += temp;
1669 /* seq_pos - beg allows one to get the accessibility right */
1670 access[count][seq_pos - beg + 11] = (int)rint(100 * n); /* round the number */
1671 access[count][seq_pos - beg + 11] *= verhaeltnis; /* 10 stands here for the number of nucleotides */
1672 count++;
1673 }
1674 offset = 0;
1675 }
1676 }
1677 }
1678 if (end_r > 20) {
1679 printf("Accessibility files contains %d less entries than expected based on the sequence length\n", end_r - 20);
1680 printf("Please recompute your profiles so that profile length and sequence length match\n");
1681 return NULL;
1682 }
1683
1684 fclose(in);
1685 float elapTicks;
1686 float elapMilli;
1687 elapTicks = (EndTimer(begin) - begin);
1688 elapMilli = elapTicks / 1000;
1689 return access;
1690 }
1691
1692
1693 static int
convert_plfold_i(char * fname)1694 convert_plfold_i(char *fname)
1695 {
1696 int i;
1697 FILE *in = fopen(fname, "r");
1698
1699 if (in == NULL) {
1700 vrna_message_warning("File ' %s ' open error", fname);
1701 return -1;
1702 }
1703
1704 char tmp[2048] = {
1705 0x0
1706 };
1707 if (fgets(tmp, sizeof(tmp), in) == 0)
1708 perror("Empty File");
1709
1710 if (strchr(tmp, '>')) {
1711 vrna_message_warning("file %s is not in RNAplfold format", fname);
1712 return -1;
1713 }
1714
1715 if (fgets(tmp, sizeof(tmp), in) == 0) {
1716 vrna_message_warning("No accessibility data");
1717 return -1;
1718 }
1719
1720 int u_length;
1721 u_length = get_max_u(tmp, '\t'); /* get the x dimension */
1722 char c;
1723 int length = 0;
1724 while ((c = fgetc(in)) != EOF)
1725 if (c == '\n')
1726 length++;
1727
1728 fclose(in);
1729 int **access = read_plfold_i(fname, 1, length + 20, 1, u_length, 2);
1730 char *outname;
1731 outname = (char *)vrna_alloc((strlen(fname) + 5) * sizeof(char));
1732 strcpy(outname, fname);
1733 strcat(outname, "_bin");
1734 FILE *fp = fopen(outname, "wb");
1735 int p[1];
1736 p[0] = 1000000;
1737 for (i = 0; i < u_length + 2; i++) {
1738 fwrite(access[i], sizeof(int), length + 20, fp);
1739 free(access[i]);
1740 }
1741 fseek(fp, 0, SEEK_SET);
1742 fwrite(&u_length, sizeof(int), 1, fp);
1743 fwrite(&length, sizeof(int), 1, fp);
1744 free(outname);
1745 fclose(fp);
1746 free(access);
1747 return 1;
1748 }
1749
1750
1751 static int **
read_plfold_i_bin(char * fname,const int beg,const int end,double verhaeltnis,const int length,int fast)1752 read_plfold_i_bin(char *fname,
1753 const int beg,
1754 const int end,
1755 double verhaeltnis,
1756 const int length,
1757 int fast)
1758 {
1759 double begin = BeginTimer();
1760 FILE *fp = fopen(fname, "rb");
1761 int seqlength;
1762
1763 if (fp == NULL) {
1764 vrna_message_warning("File ' %s ' open error", fname);
1765 return NULL;
1766 }
1767
1768 int *first_line;
1769 first_line = (int *)vrna_alloc(sizeof(int) * (end - beg + 1)); /* check length of the line LOOK at read_plfold_i */
1770 if (!fread(first_line, sizeof(int), (end - beg) + 1, fp)) {
1771 vrna_message_warning("Problem reading size of profile from '%s'", fname); /* get the value of the u option */
1772 return NULL;
1773 }
1774
1775 int lim_x;
1776 lim_x = first_line[0];
1777 seqlength = first_line[1]; /* length of the sequence RNAplfold was ran on. */
1778 if (length > lim_x && fast == 0) {
1779 printf("Interaction length %d is larger than the length of the largest region %d \nfor which the opening energy was computed (-u parameter of RNAplfold)\n", length, lim_x);
1780 printf("Please recompute your profiles with a larger -u or set -l to a smaller interaction length\n");
1781 return NULL;
1782 }
1783
1784 fseek(fp, 0, SEEK_SET); /* set pointer to the begining of the file */
1785 int **access; /* here we store the access values */
1786 access = (int **)vrna_alloc(sizeof(int *) * (lim_x + 1)); /* !!!! lim_x+1 -> lim_x+2 */
1787 int count;
1788 long int position;
1789 for (count = 0; count < lim_x + 1; count++) {
1790 /* read file */
1791 access[count] = (int *)vrna_alloc(sizeof(int) * (end - beg + 1)); /* declare array length */
1792 /* now we should be sure to read the right position */
1793 /* we first need a seek to the right position */
1794 /* 0->9 line begin, + begin */
1795 /* here we need information about the total line length..., we can get this if this is saved by RNAplfold... */
1796 position = ftell(fp);
1797 fseek(fp, (beg - 1) * sizeof(int), SEEK_CUR); /* go to the desired position, note the 10 offset */
1798 position = ftell(fp);
1799 if (!fread(access[count], sizeof(int), (end - beg) + 1, fp)) /* read the needed number of accessibility values */
1800 printf("File '%s' is corrupted \n", fname);
1801
1802 position = ftell(fp);
1803 fseek(fp, (seqlength - end + 20) * sizeof(int), SEEK_CUR); /* place to the begining of the next file */
1804 position = ftell(fp);
1805 }
1806 fseek(fp, 0, SEEK_SET);
1807 access[0][0] = lim_x;
1808 access[0][0] += 1; /* !!!!!!!!!!!!!!!!!!!put 2 in case of problem */
1809 fclose(fp); /* close file */
1810 float elapTicks;
1811 float elapMilli;
1812 free(first_line);
1813 elapTicks = (EndTimer(begin) - begin);
1814 elapMilli = elapTicks / 1000;
1815 /* printf("read_plfold_i_bin Millisecond %.2f\n",elapMilli); */ /* return time */
1816 return access; /* return access */
1817 }
1818
1819
1820 static int
get_max_u(const char * s,char delim)1821 get_max_u(const char *s,
1822 char delim)
1823 {
1824 char *pch;
1825 int total;
1826
1827 total = 0;
1828 pch = strchr(s, delim);
1829 total++;
1830 while (pch != NULL) {
1831 pch = strchr(pch + 1, delim);
1832 total++;
1833 }
1834 return total - 2;
1835 }
1836
1837
1838 static int **
average_accessibility_target(char ** names,char ** ALN,int number,char * access,double verhaeltnis,const int alignment_length,int binaries,int fast)1839 average_accessibility_target(char **names,
1840 char **ALN,
1841 int number,
1842 char *access,
1843 double verhaeltnis,
1844 const int alignment_length,
1845 int binaries,
1846 int fast)
1847 {
1848 int i;
1849 int ***master_access = NULL; /* contains the accessibility arrays for different */
1850 int **average_access = NULL; /* contains the averaged accessibility */
1851 int aln_size = strlen(ALN[0]); /* aln size -> define size of the averaged accessibility array */
1852 int *index; /* contains the index used for navigating inside the alignments */
1853 long long int begin, end; /* contains the begin and end region to read the accessibility */
1854
1855 index = (int *)vrna_alloc(sizeof(int) * number);
1856 for (i = 0; i < number; i++)
1857 index[i] = 1;
1858 master_access = (int ***)vrna_alloc(sizeof(int **) * number);
1859 int dim_x; /* contains the minimal of the maximum u length for all sequences of the alignment */
1860 dim_x = INF;
1861 int u;
1862 int location_flag; /* location flag checks if all line contains a location flag or not */
1863 /* if some line contains location information and other not, return a warning but keep going; */
1864 /* The location flag will be set to TRUE if the following pattern is found */
1865 /* sequence-name/[0-9]+-[0-9]+ */
1866 /* |----NAME----||BEG| |END| */
1867 char bla[255];
1868 if (sscanf(names[0], "%255[^/]/%lld-%lld", bla, &begin, &end) == 3) /* initialize location_flag; */
1869 location_flag = 1;
1870 else
1871 location_flag = 0;
1872
1873 char *file_s1 = NULL;
1874 for (i = 0; i < number; i++) {
1875 /* be careful!!!! Name should contain all characters from begin till the "/" character */
1876 /* char *s1; */
1877 file_s1 = (char *)vrna_alloc(sizeof(char) * (strlen(names[i]) + strlen(access) + 20));
1878 begin = 1;
1879 int sequence_length = get_sequence_length_from_alignment(ALN[i]);
1880 end = sequence_length;
1881 if (sscanf(names[i], "%255[^/]/%lld-%lld", bla, &begin, &end) == 3) {
1882 /* if subsequence and range do not have the same length, stop RNAplex. */
1883 if (end - begin + 1 != sequence_length - 20) {
1884 printf("Your range %lld %lld in sequence %s does not correspond to the sequence length %d\n", begin, end, names[i], sequence_length - 20);
1885 printf("Please check your input alignments and rerun RNAplex");
1886 int a = 0;
1887 if (master_access != NULL) {
1888 for (a = 0; a < i; a++) {
1889 int j;
1890 j = master_access[a][0][0];
1891 while (--j > -1)
1892 free(master_access[a][j]);
1893 free(master_access[a]);
1894 }
1895 }
1896
1897 free(master_access);
1898 free(index);
1899 free(file_s1);
1900 return average_access;
1901 }
1902
1903 end += 20; /* add 20 to the end, in order to take the N's into account */
1904 if (location_flag == 0) {
1905 vrna_message_warning("\n!! Line %d in your target alignment contains location information\n"
1906 "while line %d did not. PLEASE CHECK your alignments!!\n"
1907 "RNAplex will continue the target search.",
1908 i + 1, i);
1909 }
1910
1911 location_flag = 1;
1912 strcpy(file_s1, access);
1913 strcat(file_s1, "/");
1914 strcat(file_s1, bla);
1915 } else {
1916 if (location_flag == 1) {
1917 vrna_message_warning("\n!! Line %d in your target alignment does not contain location information\n"
1918 "while line %d in your target alignment did. PLEASE CHECK your alignments!!\n"
1919 "RNAplex will continue the target search.",
1920 i + 1, i);
1921 }
1922
1923 location_flag = 0;
1924 strcpy(file_s1, access);
1925 strcat(file_s1, "/");
1926 strcat(file_s1, names[i]);
1927 }
1928
1929 strcat(file_s1, "_openen");
1930 if (!binaries) {
1931 master_access[i] = read_plfold_i(file_s1, begin, end, verhaeltnis, alignment_length, fast); /* read */
1932 } else {
1933 strcat(file_s1, "_bin");
1934 master_access[i] = read_plfold_i_bin(file_s1, begin, end, verhaeltnis, alignment_length, fast); /* read */
1935 }
1936
1937 free(file_s1);
1938 file_s1 = NULL;
1939 dim_x = MIN2(dim_x, master_access[i][0][0]);
1940 }
1941 average_access = (int **)vrna_alloc(sizeof(int *) * (dim_x));
1942 for (i = 0; i < dim_x; i++)
1943 average_access[i] = (int *)vrna_alloc(sizeof(int) * (aln_size + 9)); /* average_access is of the length of the alignment */
1944 /* while master_access is of the length of the sequences. */
1945 average_access[0][0] = dim_x;
1946 for (i = 1; i < aln_size; i++) {
1947 /* go through all aln position */
1948 int j;
1949 int count_j = number;
1950 for (j = 0; j < number; j++) {
1951 /* go through all aln members */
1952 if (!(ALN[j][i - 1] == '-')) {
1953 /* if we have a gap, skip this position */
1954 for (u = 1; u < dim_x; u++) {
1955 /* go through all u */
1956 if (index[j] < u + 10)
1957 continue;
1958
1959 average_access[u][i] += master_access[j][u][index[j]]; /* index[j] position in sequence j */
1960 }
1961 index[j]++; /* increase position in sequence */
1962 } else {
1963 count_j--;
1964 }
1965 }
1966 if (!(count_j > 0)) {
1967 printf("Alignment contains only gap at column %d\n exiting program\n", i);
1968 return NULL;
1969 }
1970
1971 for (u = 1; u < dim_x; u++)
1972 average_access[u][i] = (int)rint(average_access[u][i] / (count_j));
1973 }
1974 /* free(index); */
1975 for (i = 0; i < number; i++) {
1976 int j;
1977 j = master_access[i][0][0];
1978 while (--j > -1)
1979 free(master_access[i][j]);
1980 free(master_access[i]);
1981 }
1982 free(master_access);
1983 free(index);
1984 return average_access;
1985 }
1986
1987
1988 /**
1989 * aliprint_struct generate two postscript files.
1990 * The first one is a structure annotated alignment a la RNAalifold.
1991 * The second file is a structural representation of the interaction
1992 * with colour annotation of the base-pair conservation.
1993 */
1994 static void
aliprint_struct(FILE * Result,FILE * mrna,FILE * query,const int WindowsLength)1995 aliprint_struct(FILE *Result, /* result file */
1996 FILE *mrna, /* target alignment */
1997 FILE *query, /* query alignment */
1998 const int WindowsLength /* Number of nucleotide around the target sequence */
1999 )
2000 {
2001 char *result = NULL;
2002 /**
2003 * Defines arrays for names and sequences of the query and target alignment
2004 */
2005 char *AS1[MAX_NUM_NAMES], *AS2[MAX_NUM_NAMES], *names1[MAX_NUM_NAMES], *names2[MAX_NUM_NAMES];
2006 int n_seq, n_seq2, i, s;
2007
2008 /**
2009 * Check if target and sequence alignment contains the same number of sequences
2010 * The user should ensure that the sequence are in the same species order in both the target and query alignment files.
2011 */
2012 n_seq = read_clustal(mrna, AS1, names1);
2013 n_seq2 = read_clustal(query, AS2, names2);
2014 if (n_seq != n_seq2) {
2015 for (i = 0; AS1[i]; i++) {
2016 free(AS1[i]);
2017 free(AS2[i]);
2018 }
2019 vrna_message_error("unequal number of seqs in alignments");
2020 }
2021
2022 /**
2023 * Here we get the length of the target and query alignments. Important for initiliazing the necessary arrays
2024 */
2025 int target_alignment_length, query_alignment_length;
2026 target_alignment_length = strlen(AS1[0]);
2027 query_alignment_length = strlen(AS2[0]);
2028
2029
2030 /**
2031 * Initialize files containing sequences
2032 */
2033 AS1[n_seq] = NULL;
2034 AS2[n_seq] = NULL;
2035 do {
2036 /**
2037 * Quit if the result file does not exist
2038 */
2039 if ((result = vrna_read_line(Result)) == NULL) {
2040 free(result);
2041 break;
2042 }
2043
2044 /**
2045 * If we have a line in the result file that looks like a result line, then parse it
2046 */
2047 if (strchr(result, '(') && strchr(result, '&') && strchr(result, '(') && strchr(result, ',') && strchr(result, ':') && strchr(result, '-')) {
2048 char *structure, *pos;
2049 /**
2050 * tbegin,tend,qbegin,qend store the duplex boundaries
2051 * on both the target and query sequences, respectively
2052 */
2053 int tbegin, tend, qbegin, qend;
2054 int length, posi;
2055 /**
2056 * sbegin, send are the coordinates of the target alignment slide
2057 */
2058 int sbegin, send;
2059 /**
2060 * Check in the line where is the first space.
2061 * This gives us the total length of the duplex
2062 */
2063 pos = strchr(result, ' ');
2064 posi = (int)(pos - result);
2065 length = posi;
2066 /**
2067 * Copy the structure in the line into variable structure
2068 */
2069 structure = (char *)vrna_alloc((length + 1) * sizeof(char));
2070 sscanf(result, "%s", structure);
2071 /**
2072 * Save the coordinates on the target
2073 */
2074 sscanf(pos, "%10d,%10d", &tbegin, &tend);
2075 pos = strchr(pos, ':');
2076 pos = strchr(pos, ' ');
2077 /**
2078 * Save the coordinates on the query
2079 */
2080 sscanf(pos, "%10d,%10d", &qbegin, &qend);
2081
2082 /**
2083 * To produce the ps files we use the full query alignment
2084 * and a user defined section of the target coordinates.
2085 * The size of the target slide is determined by the WindowsLength variable
2086 */
2087 sbegin = MAX2(tbegin - WindowsLength, 0);
2088 send = MIN2(tend + WindowsLength, target_alignment_length);
2089 /**
2090 * We retrieved the coordinates of the duplex
2091 * Now we can slide the alignment
2092 * Target and query will hold the alignments for the target and query sequences
2093 */
2094 char **target;
2095 char **query;
2096 char **join;
2097 target = (char **)vrna_alloc((n_seq + 1) * sizeof(char *));
2098 query = (char **)vrna_alloc((n_seq + 1) * sizeof(char *));
2099 join = (char **)vrna_alloc((n_seq + 1) * sizeof(char *));
2100 for (s = 0; s < n_seq; s++) {
2101 target[s] = (char *)vrna_alloc((send - sbegin + 2) * sizeof(char));
2102 strncpy(target[s], (AS1[s] + sbegin - 1), (send - sbegin + 1));
2103 target[s][send - sbegin + 1] = '\0';
2104 query[s] = (char *)vrna_alloc((query_alignment_length + 1) * sizeof(char));
2105 strcpy(query[s], AS2[s]);
2106 query[s][query_alignment_length] = '\0';
2107 join[s] = (char *)vrna_alloc((query_alignment_length + (send - sbegin) + 3) * sizeof(char));
2108 strncpy(join[s], (AS1[s] + sbegin - 1), (send - sbegin + 1));
2109 strcat(join[s], "&");
2110 strcat(join[s], AS2[s]);
2111 }
2112 target[n_seq] = NULL;
2113 query[n_seq] = NULL;
2114 /**
2115 * Get the consensus structure for the query and target sequence
2116 * This consensus structure is constrained such that the interaction sites are unbound.
2117 * We define and set the query and target constraint
2118 */
2119 char *target_constraint;
2120 char *query_constraint;
2121 char *joint_structure;
2122 target_constraint = (char *)vrna_alloc((unsigned)(send - sbegin + 2));
2123 query_constraint = (char *)vrna_alloc((unsigned)(query_alignment_length + 1));
2124 joint_structure = (char *)vrna_alloc((unsigned)(send - sbegin + 3 + query_alignment_length));
2125 for (i = 0; i < strlen(target[0]); i++) {
2126 if (i > tbegin - sbegin - 1 && i < tend - sbegin + 1)
2127 target_constraint[i] = 'x';
2128 else
2129 target_constraint[i] = '.';
2130 }
2131 for (i = 0; i < strlen(AS2[0]); i++) {
2132 if (i > qbegin - 2 && i < qend)
2133 query_constraint[i] = 'x';
2134 else
2135 query_constraint[i] = '.';
2136 }
2137 /**
2138 * Now produce structure
2139 */
2140 fold_constrained = 1;
2141 alifold((const char **)target, target_constraint);
2142 for (i = 0; !(target_constraint[i] == '\0'); i++) {
2143 if (target_constraint[i] == '(')
2144 target_constraint[i] = '[';
2145 else if (target_constraint[i] == ')')
2146 target_constraint[i] = ']';
2147 }
2148 alifold((const char **)query, query_constraint);
2149 for (i = 0; !(query_constraint[i] == '\0'); i++) {
2150 if (query_constraint[i] == '(')
2151 query_constraint[i] = '<';
2152 else if (query_constraint[i] == ')')
2153 query_constraint[i] = '>';
2154 }
2155 /**
2156 * Add duplex structure, and produce joint structure
2157 */
2158 int count_duplex_structure = 0;
2159 for (i = tbegin - sbegin; i < tend - sbegin + 1; i++) {
2160 target_constraint[i] = structure[count_duplex_structure];
2161 count_duplex_structure++;
2162 }
2163 count_duplex_structure++;
2164 for (i = qbegin - 1; i < qend; i++) {
2165 query_constraint[i] = structure[count_duplex_structure];
2166 count_duplex_structure++;
2167 }
2168 strncpy(joint_structure, target_constraint, (send - sbegin + 1));
2169 strcat(joint_structure, "&");
2170 strcat(joint_structure, query_constraint);
2171 /**
2172 * Produce consensus sequence
2173 */
2174 char *temp_target;
2175 char *temp_query;
2176 char *string;
2177 temp_target = consensus((const char **)target);
2178 temp_query = consensus((const char **)query);
2179 string = (char *)vrna_alloc((strlen(temp_target) + strlen(temp_query) + 1) * sizeof(char));
2180 strcpy(string, temp_target);
2181 free(temp_target);
2182 strcat(string, temp_query);
2183 free(temp_query);
2184 /**
2185 * now produce output name, based on the first two names of the alignment
2186 */
2187 char *psoutput = vrna_strdup_printf("annaln_%s_%s_%d_%d_%d_%d.ps",
2188 names1[0],
2189 names2[0],
2190 tbegin,
2191 tend,
2192 qbegin,
2193 qend);
2194 s = 0;
2195 while (psoutput[s] != '\0') {
2196 if (psoutput[s] == '\\' || psoutput[s] == '/')
2197 psoutput[s] = '_';
2198
2199 s++;
2200 }
2201 /**
2202 * Produce a structure annotated, colorated alignment
2203 */
2204 aliPS_color_aln(joint_structure, psoutput, (const char **)join, (const char **)names1);
2205 free(psoutput);
2206 #if 0
2207 /**
2208 * We also need the consensus structure annotated with conservation information
2209 *
2210 */
2211
2212 /**
2213 * First we change the output file name from annaln -> annstr
2214 */
2215 psoutput[3] = 's';
2216 psoutput[4] = 't';
2217 psoutput[5] = 'r';
2218
2219
2220 /**
2221 * Now we change the different parenthesis into one
2222 */
2223
2224 char *joint_structure_parenthesis;
2225 joint_structure_parenthesis = (char *)vrna_alloc((strlen(joint_structure) + 1) * sizeof(char));
2226 strcpy(joint_structure_parenthesis, joint_structure);
2227 for (i = 0; i < strlen(joint_structure); i++) {
2228 if ((joint_structure[i] == '[') || (joint_structure[i] == '<'))
2229 joint_structure[i] = '(';
2230 else if (joint_structure[i] == ']' || joint_structure[i] == '>')
2231 joint_structure[i] = ')';
2232 }
2233 cut_point = send - sbegin + 1;
2234 /**
2235 * Remove & from the alignment and the consensus structure
2236 */
2237 int counter = 0;
2238 for (i = 0; i < strlen(joint_structure); i++) {
2239 if (joint_structure[i] == '&')
2240 continue;
2241
2242 joint_structure[counter] = joint_structure[i];
2243 joint_structure_parenthesis[counter] = joint_structure_parenthesis[i];
2244 for (s = 0; s < n_seq; s++)
2245 join[s][counter] = join[s][i];
2246 counter++;
2247 }
2248 free(string);
2249 string = consensus((const char **)join);
2250 printf("%s\n%s\n%s\n", join[0], string, joint_structure);
2251 char **A;
2252 //A=annote(joint_structure_parenthesis,(const char**) join);
2253 xrna_plex_plot(string, joint_structure_parenthesis, psoutput);
2254 /**
2255 * Free stuff...
2256 */
2257 free(structure);
2258 free(result);
2259 free(psoutput);
2260 for (i = 0; i < n_seq + 1; i++)
2261 free(target[i]);
2262 free(target);
2263 #endif
2264 }
2265 } while (1);
2266 for (i = 0; AS1[i]; i++) {
2267 free(AS1[i]);
2268 free(AS2[i]);
2269 free(names1[i]);
2270 free(names2[i]);
2271 }
2272 fclose(mrna);
2273 fclose(query);
2274 }
2275
2276
2277 /*
2278 * All characters not being included into ATCGUN are considered as gap characters.
2279 */
2280 static int
get_sequence_length_from_alignment(char * sequence)2281 get_sequence_length_from_alignment(char *sequence)
2282 {
2283 int counter = 0;
2284
2285 while (!(*sequence == '\0')) {
2286 if (*sequence == 'A' || *sequence == 'T' || *sequence == 'C' || *sequence == 'G' || *sequence == 'U' || *sequence == 'N')
2287 counter++;
2288
2289 sequence++;
2290 }
2291 return counter;
2292 }
2293
2294
2295 static void
linear_fit(int * il_a,int * il_b,int * b_a,int * b_b)2296 linear_fit(int *il_a,
2297 int *il_b,
2298 int *b_a,
2299 int *b_b)
2300 {
2301 /*get fit parameters*/
2302 vrna_md_t md;
2303 vrna_param_t *P = NULL;
2304
2305 set_model_details(&md);
2306
2307 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
2308 update_fold_params();
2309 P = vrna_params(&md);
2310 make_pair_matrix();
2311 }
2312
2313 int internal_loop_x[25] = {
2314 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
2315 };
2316 int internal_loop_y[25];
2317 int i;
2318 for (i = 0; i < 25; i++) /* initialize the y-value for internal loops */
2319 internal_loop_y[i] = P->internal_loop[internal_loop_x[i]];
2320 int sumx, sumy, sumxx, sumxy;
2321 sumx = sumy = sumxx = sumxy = 0;
2322 double til_a, til_b;
2323 int n = 25; /* only take data points till int_loop size <=20; */
2324 /* no measurement exists for larger loop && */
2325 /* such large loop are not expected in RNA-RNA interactions. */
2326 for (i = 0; i < n; i++) {
2327 sumx += internal_loop_x[i];
2328 sumy += internal_loop_y[i];
2329 sumxx += internal_loop_x[i] * internal_loop_x[i];
2330 sumxy += internal_loop_x[i] * internal_loop_y[i];
2331 }
2332 if ((sumxx - (sumx * sumx) / n) < 1e-6) {
2333 free(P);
2334 printf("divisor for internal loop is too small %d\n", (sumxx - (sumx * sumx) / n));
2335 vrna_message_error("Problem in fitting");
2336 }
2337
2338 til_a = (sumxy - (sumx * sumy) / n) / (sumxx - (sumx * sumx) / n);
2339 til_b = sumy / n - (til_a * sumx / n);
2340 *il_a = (int)til_a;
2341 *il_b = (int)til_b;
2342
2343 /* ###bulge#### */
2344
2345 n = 5;
2346 int bulge_loop_x[5] = {
2347 2, 3, 4, 5, 6
2348 };
2349 int bulge_loop_y[5];
2350 for (i = 0; i < n; i++)
2351 bulge_loop_y[i] = P->bulge[bulge_loop_x[i]];
2352 sumx = sumy = sumxx = sumxy = 0;
2353 double tb_a, tb_b;
2354 for (i = 0; i < n; i++) {
2355 sumx += bulge_loop_x[i];
2356 sumy += bulge_loop_y[i];
2357 sumxx += bulge_loop_x[i] * bulge_loop_x[i];
2358 sumxy += bulge_loop_x[i] * bulge_loop_y[i];
2359 }
2360 tb_a = (sumxy - (sumx * sumy) / n) / (sumxx - (sumx * sumx) / n);
2361 tb_b = sumy / n - (tb_a * sumx / n);
2362 *b_a = (int)tb_a;
2363 *b_b = (int)tb_b;
2364 free(P);
2365 if ((sumxx - (sumx * sumx) / n) < 1e-6) {
2366 printf("divisor for bulge loop is too small %d\n", (sumxx - (sumx * sumx) / n));
2367 vrna_message_error("Problem in fitting");
2368 }
2369 }
2370
2371
2372 double
probcompute_silvana_98(char * s1,double k_concentration,double tris_concentration,double mg_concentration,double na_concentration,double probe_concentration)2373 probcompute_silvana_98(char *s1,
2374 double k_concentration,
2375 double tris_concentration,
2376 double mg_concentration,
2377 double na_concentration,
2378 double probe_concentration)
2379 {
2380 double d_a = 3.92 / 100000.0; /*Parameters from the article of Owczarzy*/
2381 double d_b = 9.11 / 1000000; /*Parameters from the article of Owczarzy*/
2382 double d_c = 6.26 / 100000; /*Parameters from the article of Owczarzy*/
2383 double d_d = 1.42 / 100000; /*Parameters from the article of Owczarzy*/
2384 double d_e = 4.82 / 10000; /*Parameters from the article of Owczarzy*/
2385 double d_f = 5.25 / 10000; /*Parameters from the article of Owczarzy*/
2386 double d_g = 8.31 / 100000; /*Parameters from the article of Owczarzy*/
2387 double d_magn_corr_value = 0;
2388 double d_fgc = 0;
2389 double dH, dS;
2390
2391 dS = 0;
2392 dH = 0;
2393 double salt_correction;
2394 double enthalpy[4][4] =
2395 { { -7.9, -8.4, -7.8, -7.2 },
2396 { -8.5, -8.0, -10.6, -7.8 },
2397 { -8.2, -10.6, -8.0, -8.4 },
2398 { -7.2, -8.2, -8.5, -7.9 } };
2399 double entropy[4][4] = { {
2400 -22.2, -22.4, -21.0, -20.4
2401 },
2402 { -22.7, -19.9, -27.2, -21.0 },
2403 { -22.2, -27.2, -19.9, -22.4 },
2404 { -21.3, -22.2, -22.7, -22.2 } };
2405 int posst;
2406 posst = 0;
2407 int converted = encode_char(toupper(s1[posst])) - 1;
2408 int seqlen = strlen(s1);
2409 double Tm;
2410
2411 /* terminal correction */
2412 if (s1[0] == 'G' || s1[0] == 'C') {
2413 dH += 0.1;
2414 dS += -2.8;
2415 d_fgc++;
2416 }
2417
2418 if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2419 dH += 2.3;
2420 dS += 4.8;
2421 }
2422
2423 if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2424 dH += 0.1;
2425 dS += -2.8;
2426 }
2427
2428 if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2429 dH += 2.3;
2430 dS += 4.8;
2431 }
2432
2433 /* compute dH and dH based on sequence */
2434 for (posst = 1; posst < seqlen; posst++) {
2435 if (toupper(s1[posst]) == 'G' || toupper(s1[posst]) == 'C')
2436 d_fgc++;
2437
2438 int nextconverted = encode_char(toupper(s1[posst])) - 1;
2439 dH += enthalpy[converted][nextconverted];
2440 dS += entropy[converted][nextconverted];
2441 converted = nextconverted;
2442 }
2443 d_fgc = d_fgc / ((double)(seqlen));
2444 if (mg_concentration == 0) {
2445 dS += 0.368 * (seqlen - 1) * log(na_concentration);
2446 Tm = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2447 return Tm;
2448 } else {
2449 double single_charged = k_concentration + tris_concentration / 2 + na_concentration;
2450 double d_ratio_ions = sqrt(mg_concentration / single_charged);
2451 if (single_charged == 0) {
2452 d_magn_corr_value =
2453 d_a -
2454 d_b * log(mg_concentration) +
2455 d_fgc * (d_c + d_d * log(mg_concentration)) +
2456 1 / (2 * ((double)seqlen - 1)) *
2457 (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2458 } else {
2459 if (d_ratio_ions < 0.22) {
2460 d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1 / 100000 * log(single_charged) + 9.40 * 1 / 1000000 * pow(log(single_charged), 2);
2461 } else {
2462 if (d_ratio_ions < 6) {
2463 d_a = 3.92 / 100000 * (0.843 - 0.352 * sqrt(single_charged) * log(single_charged));
2464 d_d = 1.42 / 100000 * (1.279 - 4.03 / 1000 * log(single_charged) - 8.03 / 1000 * pow(log(single_charged), 2));
2465 d_g = 8.31 / 100000 * (0.486 - 0.258 * log(single_charged) + 5.25 / 1000 * pow(log(single_charged), 3));
2466
2467 d_magn_corr_value = d_a - d_b * log
2468 (mg_concentration) + d_fgc *
2469 (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2470 } else {
2471 d_magn_corr_value = d_a - d_b * log(mg_concentration) + d_fgc * (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2472 }
2473 }
2474 }
2475
2476 double temp_Tm = dH * 1000 / (dS + 1.987 * log(probe_concentration / 4));
2477 Tm = 1 / (1 / temp_Tm + d_magn_corr_value) - 273.15;
2478 return Tm;
2479 }
2480 }
2481
2482
2483 double
probcompute_silvana_04(char * s1,double k_concentration,double tris_concentration,double mg_concentration,double na_concentration,double probe_concentration)2484 probcompute_silvana_04(char *s1,
2485 double k_concentration,
2486 double tris_concentration,
2487 double mg_concentration,
2488 double na_concentration,
2489 double probe_concentration)
2490 {
2491 double d_a = 3.92 / 100000.0; /*Parameters from the article of Owczarzy*/
2492 double d_b = 9.11 / 1000000; /*Parameters from the article of Owczarzy*/
2493 double d_c = 6.26 / 100000; /*Parameters from the article of Owczarzy*/
2494 double d_d = 1.42 / 100000; /*Parameters from the article of Owczarzy*/
2495 double d_e = 4.82 / 10000; /*Parameters from the article of Owczarzy*/
2496 double d_f = 5.25 / 10000; /*Parameters from the article of Owczarzy*/
2497 double d_g = 8.31 / 100000; /*Parameters from the article of Owczarzy*/
2498 double d_magn_corr_value = 0;
2499 double d_fgc = 0;
2500 double dH, dS;
2501
2502 dS = 0;
2503 dH = 0;
2504 double salt_correction;
2505 double enthalpy[4][4] =
2506 { { -7.6, -8.4, -7.8, -7.2 },
2507 { -8.5, -8.0, -10.6, -7.8 },
2508 { -8.2, -9.8, -8.0, -8.4 },
2509 { -7.2, -8.2, -8.5, -7.6 } };
2510 double entropy[4][4] = { {
2511 -21.3, -22.4, -21.0, -20.4
2512 },
2513 { -22.7, -19.9, -27.2, -21.0 },
2514 { -22.2, -24.4, -19.9, -22.4 },
2515 { -21.3, -22.2, -22.7, -21.3 } };
2516 int posst;
2517 posst = 0;
2518 int converted = encode_char(toupper(s1[posst])) - 1;
2519 int seqlen = strlen(s1);
2520 double Tm;
2521
2522 /* terminal correction */
2523 if (s1[0] == 'G' || s1[0] == 'C') {
2524 dH += 0.1;
2525 dS += -2.85;
2526 d_fgc++;
2527 }
2528
2529 if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2530 dH += 2.4;
2531 dS += 4.1;
2532 }
2533
2534 if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2535 dH += 0.1;
2536 dS += -2.85;
2537 }
2538
2539 if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2540 dH += 2.4;
2541 dS += 4.1;
2542 }
2543
2544 /* compute dH and dH based on sequence */
2545 for (posst = 1; posst < seqlen; posst++) {
2546 if (toupper(s1[posst]) == 'G' || toupper(s1[posst]) == 'C')
2547 d_fgc++;
2548
2549 int nextconverted = encode_char(toupper(s1[posst])) - 1;
2550 dH += enthalpy[converted][nextconverted];
2551 dS += entropy[converted][nextconverted];
2552 converted = nextconverted;
2553 }
2554 d_fgc = d_fgc / ((double)(seqlen));
2555 if (mg_concentration == 0) {
2556 dS += 0.368 * (seqlen - 1) * log(na_concentration);
2557 Tm = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2558 return Tm;
2559 } else {
2560 double single_charged = k_concentration + tris_concentration / 2 + na_concentration;
2561 double d_ratio_ions = sqrt(mg_concentration / single_charged);
2562 if (single_charged == 0) {
2563 d_magn_corr_value =
2564 d_a -
2565 d_b * log(mg_concentration) +
2566 d_fgc * (d_c + d_d * log(mg_concentration)) +
2567 1 / (2 * ((double)seqlen - 1)) *
2568 (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2569 } else {
2570 if (d_ratio_ions < 0.22) {
2571 d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1 / 100000 * log(single_charged) + 9.40 * 1 / 1000000 * pow(log(single_charged), 2);
2572 } else {
2573 if (d_ratio_ions < 6) {
2574 d_a = 3.92 / 100000 * (0.843 - 0.352 * sqrt(single_charged) * log(single_charged));
2575 d_d = 1.42 / 100000 * (1.279 - 4.03 / 1000 * log(single_charged) - 8.03 / 1000 * pow(log(single_charged), 2));
2576 d_g = 8.31 / 100000 * (0.486 - 0.258 * log(single_charged) + 5.25 / 1000 * pow(log(single_charged), 3));
2577
2578 d_magn_corr_value = d_a - d_b * log
2579 (mg_concentration) + d_fgc *
2580 (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2581 } else {
2582 d_magn_corr_value = d_a - d_b * log(mg_concentration) + d_fgc * (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2583 }
2584 }
2585 }
2586
2587 double temp_Tm = dH * 1000 / (dS + 1.987 * log(probe_concentration / 4));
2588 Tm = 1 / (1 / temp_Tm + d_magn_corr_value) - 273.15;
2589 return Tm;
2590 }
2591 }
2592
2593
2594 double
probcompute_xia_98(char * s1,double na_concentration,double probe_concentration)2595 probcompute_xia_98(char *s1,
2596 double na_concentration,
2597 double probe_concentration)
2598 {
2599 double dH, dS;
2600
2601 dS = 0;
2602 dH = 0;
2603 double salt_correction;
2604 double enthalpy[4][4] =
2605 { { -6.820, -11.400, -10.480, -9.380 },
2606 { -10.440, -13.390, -10.640, -10.480 },
2607 { -12.440, -14.880, -13.390, -11.400 },
2608 { -7.690, -12.440, -10.440, -6.820 } };
2609 double entropy[4][4] =
2610 { { -19.0, -29.5, -27.1, -26.7 },
2611 { -26.9, -32.7, -26.7, -27.1 },
2612 { -32.5, -36.9, -32.7, -29.5 },
2613 { -20.5, -32.5, -26.9, -19.0 }, };
2614 int posst;
2615 posst = 0;
2616 int converted = encode_char(toupper(s1[posst])) - 1;
2617 int seqlen = strlen(s1);
2618 double Tm;
2619
2620 /* terminal correction */
2621 if (s1[0] == 'G' || s1[0] == 'C') {
2622 dH += 3.61;
2623 dS += -1.5;
2624 }
2625
2626 if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2627 dH += 7.73;
2628 dS += 9.0;
2629 }
2630
2631 if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2632 dH += 3.61;
2633 dS += -1.5;
2634 }
2635
2636 if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2637 dH += 7.73;
2638 dS += 9.0;
2639 }
2640
2641 /* compute dH and dH based on sequence */
2642 for (posst = 1; posst < seqlen; posst++) {
2643 int nextconverted = encode_char(toupper(s1[posst])) - 1;
2644 dH += enthalpy[converted][nextconverted];
2645 dS += entropy[converted][nextconverted];
2646 converted = nextconverted;
2647 }
2648 dS += 0.368 * (seqlen - 1) * log(na_concentration);
2649 Tm = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2650 return Tm;
2651 }
2652
2653
2654 double
probcompute_sug_95(char * s1,double na_concentration,double probe_concentration)2655 probcompute_sug_95(char *s1,
2656 double na_concentration,
2657 double probe_concentration)
2658 {
2659 double dH, dS;
2660
2661 dS = 0;
2662 dH = 0;
2663 double salt_correction;
2664 double enthalpy[4][4] =
2665 { { -11.500, -7.800, -7.000, -8.300 },
2666 { -10.400, -12.800, -16.300, -9.100 },
2667 { -8.600, -8.000, -9.300, -5.900 },
2668 { -7.800, -5.500, -9.000, -7.800 } };
2669 double entropy[4][4] =
2670 { { -36.4, -21.6, -19.7, -23.9 },
2671 { -28.4, -31.9, -47.1, -23.5 },
2672 { -22.9, -17.1, -23.2, -12.3 },
2673 { -23.2, -13.5, -26.1, -21.9 } };
2674 int posst;
2675 posst = 0;
2676 int converted = encode_char(toupper(s1[posst])) - 1;
2677 int seqlen = strlen(s1);
2678 double Tm;
2679
2680 /* salt entropy correction von Ahsen 1999 */
2681 /* terminal correction */
2682 if (s1[0] == 'G' || s1[0] == 'C') {
2683 dH += 0.95;
2684 dS += -1.95;
2685 }
2686
2687 if (s1[0] == 'A' || s1[0] == 'T' || s1[0] == 'U') {
2688 dH += 0.95;
2689 dS += -1.95;
2690 }
2691
2692 if (s1[seqlen - 1] == 'G' || s1[seqlen - 1] == 'C') {
2693 dH += 0.95;
2694 dS += -1.95;
2695 }
2696
2697 if (s1[seqlen - 1] == 'A' || s1[seqlen - 1] == 'T' || s1[seqlen - 1] == 'U') {
2698 dH += 0.95;
2699 dS += -1.95;
2700 }
2701
2702 /* compute dH and dH based on sequence */
2703 for (posst = 1; posst < seqlen; posst++) {
2704 int nextconverted = encode_char(toupper(s1[posst])) - 1;
2705 dH += enthalpy[converted][nextconverted];
2706 dS += entropy[converted][nextconverted];
2707 converted = nextconverted;
2708 }
2709 /* salt entropy correction von Ahsen 1999 */
2710 dS += 0.368 * (seqlen - 1) * log(na_concentration);
2711 Tm = (1000 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2712 return Tm;
2713 }
2714
2715
2716 double
probcompute_newparameters(char * s1,double k_concentration,double tris_concentration,double mg_concentration,double na_concentration,double probe_concentration)2717 probcompute_newparameters(char *s1,
2718 double k_concentration,
2719 double tris_concentration,
2720 double mg_concentration,
2721 double na_concentration,
2722 double probe_concentration)
2723 {
2724 /* ////////////////////////////////////////// */
2725 /* Folding Init */
2726 /* ////////////////////////////////////////// */
2727 vrna_md_t md;
2728 vrna_param_t *P = NULL;
2729
2730 set_model_details(&md);
2731
2732 if ((!P) || (fabs(P->temperature - temperature) > 1e-6)) {
2733 update_fold_params();
2734 P = vrna_params(&md);
2735 make_pair_matrix();
2736 }
2737
2738 /* ///////////////////////////////////////// */
2739 /* Salt Variable Declaration */
2740 /* ///////////////////////////////////////// */
2741
2742 double d_temp; /* melting temperature */
2743 double d_temp_na; /*melting temperature in 1M na+ */
2744 double d_salt_corr_value = 0.0;
2745 double d_magn_corr_value = 0.0;
2746
2747 double d_fgc; /* gc content */
2748 double d_conc_monovalents = k_concentration + na_concentration + tris_concentration / 2;
2749 double d_ratio_ions = sqrt(mg_concentration) / d_conc_monovalents;
2750 double d_a = 3.92 / 100000.0; /*Parameters from the article of Owczarzy*/
2751 double d_b = 9.11 / 1000000; /*Parameters from the article of Owczarzy*/
2752 double d_c = 6.26 / 100000; /*Parameters from the article of Owczarzy*/
2753 double d_d = 1.42 / 100000; /*Parameters from the article of Owczarzy*/
2754 double d_e = 4.82 / 10000; /*Parameters from the article of Owczarzy*/
2755 double d_f = 5.25 / 10000; /*Parameters from the article of Owczarzy*/
2756 double d_g = 8.31 / 100000; /*Parameters from the article of Owczarzy*/
2757
2758 /* ////////////////////////////////////// */
2759 /* Sequence Variable Declaration */
2760 /* ////////////////////////////////////// */
2761
2762 int seqlen = strlen(s1);
2763 int posst;
2764 posst = 0;
2765 int converted = encode_char(toupper(s1[posst]));
2766 int revconverted = abs(5 - converted);
2767
2768 /* ////////////////////////////////////// */
2769 /* Energy Variable Declaration */
2770 /* ////////////////////////////////////// */
2771
2772 double dT, dG, dS, dH;
2773 dS = 0;
2774 dT = 0;
2775 dH = 0;
2776
2777 /* ////////////////////////////////////// */
2778 /* Computation */
2779 /* ////////////////////////////////////// */
2780
2781
2782 /* GC-Content */
2783 d_fgc = 0;
2784 for (posst = 0; posst < seqlen; posst++)
2785 if (s1[posst] == 'G' || s1[posst] == 'C' || s1[posst] == 'g' || s1[posst] == 'c')
2786 d_fgc++;
2787
2788 d_fgc = d_fgc / seqlen;
2789
2790 /* dH dG dS */
2791 int type = pair[converted][revconverted];
2792 /* Add twice the duplex penalty */
2793 dG = 2 * P->DuplexInit;
2794 dH = 2 * DuplexInitdH;
2795 dS = (dH - dG) / (37 + K0) * 10;
2796 if (type > 2) {
2797 dG += P->TerminalAU;
2798 dH += TerminalAUdH;
2799 dS += (TerminalAUdH - P->TerminalAU) / (37 + K0) * 10;
2800 }
2801
2802 for (posst = 1; posst < seqlen; posst++) {
2803 int nextconverted = encode_char(toupper(s1[posst]));
2804 int nextrevconverted = abs(5 - nextconverted);
2805 int nexttype = pair[nextconverted][nextrevconverted];
2806 dG += stack37[rtype[type]][nexttype];
2807 dH += stackdH[rtype[type]][nexttype];
2808 dS += (stackdH[rtype[type]][nexttype] - stack37[rtype[type]][nexttype]) / (37 + K0) * 10;
2809 converted = nextconverted;
2810 revconverted = nextrevconverted;
2811 type = nexttype;
2812 }
2813 if (type > 2) {
2814 dG += P->TerminalAU;
2815 dH += TerminalAUdH;
2816 dS += (TerminalAUdH - P->TerminalAU) / (37 + K0) * 10;
2817 }
2818
2819 /* add initiation again because of symmetry. */
2820
2821
2822 if (mg_concentration == 0) {
2823 dS += 0.368 * (seqlen - 1) * log(na_concentration);
2824 double Tm;
2825 Tm = (10 * dH) / (dS + 1.987 * log(probe_concentration / 4)) - 273.15;
2826 return Tm;
2827 } else {
2828 double single_charged = k_concentration + tris_concentration / 2 + na_concentration;
2829 double d_ratio_ions = sqrt(mg_concentration / single_charged);
2830 if (single_charged == 0) {
2831 d_magn_corr_value =
2832 d_a -
2833 d_b * log(mg_concentration) +
2834 d_fgc * (d_c + d_d * log(mg_concentration)) +
2835 1 / (2 * ((double)seqlen - 1)) *
2836 (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2837 } else {
2838 if (d_ratio_ions < 0.22) {
2839 d_magn_corr_value = (4.29 * d_fgc - 3.95) * 1 / 100000 * log(single_charged) + 9.40 * 1 / 1000000 * pow(log(single_charged), 2);
2840 } else {
2841 if (d_ratio_ions < 6) {
2842 d_a = 3.92 / 100000 * (0.843 - 0.352 * sqrt(single_charged) * log(single_charged));
2843 d_d = 1.42 / 100000 * (1.279 - 4.03 / 1000 * log(single_charged) - 8.03 / 1000 * pow(log(single_charged), 2));
2844 d_g = 8.31 / 100000 * (0.486 - 0.258 * log(single_charged) + 5.25 / 1000 * pow(log(single_charged), 3));
2845
2846 d_magn_corr_value = d_a - d_b * log
2847 (mg_concentration) + d_fgc *
2848 (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2849 } else {
2850 d_magn_corr_value = d_a - d_b * log(mg_concentration) + d_fgc * (d_c + d_d * log(mg_concentration)) + 1 / (2 * ((double)seqlen - 1)) * (-d_e + d_f * log(mg_concentration) + d_g * pow(log(mg_concentration), 2));
2851 }
2852 }
2853 }
2854
2855 double temp_Tm = dH * 10 / (dS + 1.987 * log(probe_concentration / 4));
2856 int Tm = 1 / (1 / temp_Tm + d_magn_corr_value) - 273.15;
2857 return Tm;
2858 }
2859 }
2860