1 /*
2 * Ineractive access to suboptimal folding
3 *
4 * c Ivo L Hofacker
5 * Vienna RNA package
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include <config.h>
13 #include <assert.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <math.h>
18 #include <ctype.h>
19 #include <string.h>
20 #include "ViennaRNA/part_func.h"
21 #include "ViennaRNA/fold.h"
22 #include "ViennaRNA/cofold.h"
23 #include "ViennaRNA/fold_vars.h"
24 #include "ViennaRNA/utils/basic.h"
25 #include "ViennaRNA/utils/strings.h"
26 #include "ViennaRNA/params/io.h"
27 #include "ViennaRNA/subopt.h"
28 #include "ViennaRNA/params/basic.h"
29 #include "ViennaRNA/constraints/basic.h"
30 #include "ViennaRNA/constraints/SHAPE.h"
31 #include "ViennaRNA/io/file_formats.h"
32 #include "ViennaRNA/io/utils.h"
33 #include "ViennaRNA/commands.h"
34 #include "RNAsubopt_cmdl.h"
35 #include "gengetopt_helper.h"
36 #include "input_id_helpers.h"
37
38 #include "ViennaRNA/color_output.inc"
39
40 PRIVATE void putoutzuker(FILE *output,
41 vrna_subopt_solution_t *zukersolution);
42
43
44 struct nr_en_data {
45 FILE *output;
46 vrna_fold_compound_t *fc;
47 double kT;
48 double ens_en;
49 };
50
51
52 PRIVATE void
53 print_samples(const char *structure,
54 void *data);
55
56
57 PRIVATE void
58 print_samples_en(const char *structure,
59 void *data);
60
61
62 int
main(int argc,char * argv[])63 main(int argc,
64 char *argv[])
65 {
66 FILE *input, *output;
67 struct RNAsubopt_args_info args_info;
68 char *rec_sequence, *rec_id,
69 **rec_rest, *orig_sequence, *constraints_file, *cstruc,
70 *structure, *shape_file, *shape_method, *shape_conversion,
71 *infile, *outfile, *filename_delim;
72 unsigned int rec_type, read_opt;
73 int i, length, cl, istty, delta, n_back, noconv, dos, zuker,
74 with_shapes, verbose, enforceConstraints, st_back_en, batch,
75 tofile, filename_full, canonicalBPonly, nonRedundant;
76 double deltap;
77 vrna_md_t md;
78 dataset_id id_control;
79 vrna_cmd_t commands;
80
81 do_backtrack = 1;
82 delta = 100;
83 deltap = n_back = noconv = dos = zuker = 0;
84 rec_type = read_opt = 0;
85 rec_id = rec_sequence = orig_sequence = NULL;
86 rec_rest = NULL;
87 cstruc = structure = NULL;
88 verbose = 0;
89 st_back_en = 0;
90 infile = NULL;
91 outfile = NULL;
92 output = NULL;
93 tofile = 0;
94 filename_full = 0;
95 canonicalBPonly = 0;
96 commands = NULL;
97 nonRedundant = 0;
98
99 set_model_details(&md);
100
101 /* switch on unique multibranch loop decomposition */
102 md.uniq_ML = 1;
103
104 /*
105 #############################################
106 # check the command line parameters
107 #############################################
108 */
109 if (RNAsubopt_cmdline_parser(argc, argv, &args_info) != 0)
110 exit(1);
111
112 /* parse options for ID manipulation */
113 ggo_get_id_control(args_info, id_control, "Sequence", "sequence", "_", 4, 1);
114
115 /* get basic set of model details */
116 ggo_get_md_eval(args_info, md);
117 ggo_get_md_fold(args_info, md);
118 ggo_get_md_part(args_info, md);
119 ggo_get_circ(args_info, md.circ);
120
121 /* temperature */
122 ggo_get_temperature(args_info, md.temperature);
123
124 /* check dangle model */
125 if ((md.dangles < 0) || (md.dangles > 3)) {
126 vrna_message_warning("required dangle model not implemented, falling back to default dangles=2");
127 md.dangles = dangles = 2;
128 }
129
130 /* SHAPE reactivity data */
131 ggo_get_SHAPE(args_info, with_shapes, shape_file, shape_method, shape_conversion);
132
133 ggo_get_constraints_settings(args_info,
134 fold_constrained,
135 constraints_file,
136 enforceConstraints,
137 batch);
138
139 if (args_info.verbose_given)
140 verbose = 1;
141
142 /* enforce canonical base pairs in any case? */
143 if (args_info.canonicalBPonly_given)
144 canonicalBPonly = 1;
145
146 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
147 if (args_info.noconv_given)
148 noconv = 1;
149
150 /* energy range */
151 if (args_info.deltaEnergy_given)
152 delta = (int)(0.1 + args_info.deltaEnergy_arg * 100);
153
154 /* energy range after post evaluation */
155 if (args_info.deltaEnergyPost_given)
156 deltap = args_info.deltaEnergyPost_arg;
157
158 /* sorted output */
159 if (args_info.sorted_given) {
160 subopt_sorted = VRNA_SORT_BY_ENERGY_LEXICOGRAPHIC_ASC;
161 if (args_info.en_only_given)
162 subopt_sorted = VRNA_SORT_BY_ENERGY_ASC;
163 }
164
165 /* stochastic backtracking */
166 if (args_info.stochBT_given) {
167 n_back = args_info.stochBT_arg;
168 vrna_init_rand();
169 md.compute_bpp = 0;
170 }
171
172 if (args_info.stochBT_en_given) {
173 n_back = args_info.stochBT_en_arg;
174 st_back_en = 1;
175 md.compute_bpp = 0;
176 vrna_init_rand();
177 }
178
179 /* density of states */
180 if (args_info.dos_given) {
181 dos = 1;
182 print_energy = -999999;
183 }
184
185 /* logarithmic multiloop energies */
186 if (args_info.logML_given)
187 md.logML = logML = 1;
188
189 /* zuker subopts */
190 if (args_info.zuker_given)
191 zuker = 1;
192
193 if (zuker) {
194 if (md.circ) {
195 vrna_message_warning("Sorry, zuker subopts not yet implemented for circfold");
196 RNAsubopt_cmdline_parser_print_help();
197 exit(1);
198 } else if (n_back > 0) {
199 vrna_message_warning("Can't do zuker subopts and stochastic subopts at the same time");
200 RNAsubopt_cmdline_parser_print_help();
201 exit(1);
202 } else if (md.gquad) {
203 vrna_message_warning("G-quadruplex support for Zuker subopts not implemented yet");
204 RNAsubopt_cmdline_parser_print_help();
205 exit(1);
206 }
207 }
208
209 if (md.gquad && (n_back > 0)) {
210 vrna_message_warning("G-quadruplex support for stochastic backtracking not implemented yet");
211 RNAsubopt_cmdline_parser_print_help();
212 exit(1);
213 }
214
215 if (args_info.infile_given)
216 infile = strdup(args_info.infile_arg);
217
218 if (args_info.outfile_given) {
219 tofile = 1;
220 if (args_info.outfile_arg)
221 outfile = strdup(args_info.outfile_arg);
222 }
223
224 /* filename sanitize delimiter */
225 if (args_info.filename_delim_given)
226 filename_delim = strdup(args_info.filename_delim_arg);
227 else if (get_id_delim(id_control))
228 filename_delim = strdup(get_id_delim(id_control));
229 else
230 filename_delim = NULL;
231
232 if ((filename_delim) && isspace(*filename_delim)) {
233 free(filename_delim);
234 filename_delim = NULL;
235 }
236
237 /* full filename from FASTA header support */
238 if (args_info.filename_full_given)
239 filename_full = 1;
240
241 /* non-redundant backtracing */
242 if (args_info.nonRedundant_given)
243 nonRedundant = 1;
244
245 if (args_info.commands_given)
246 commands = vrna_file_commands_read(args_info.commands_arg,
247 VRNA_CMD_PARSE_HC | VRNA_CMD_PARSE_SC);
248
249 /* free allocated memory of command line data structure */
250 RNAsubopt_cmdline_parser_free(&args_info);
251
252 /*
253 #############################################
254 # begin initializing
255 #############################################
256 */
257
258 if (infile) {
259 input = fopen((const char *)infile, "r");
260 if (!input)
261 vrna_message_error("Could not read input file");
262 } else {
263 input = stdin;
264 }
265
266 istty = (!infile) && isatty(fileno(stdout)) && isatty(fileno(stdin));
267
268 /* print user help if we get input from tty */
269 if (istty) {
270 if (!zuker)
271 print_comment(stdout, "Use '&' to connect 2 sequences that shall form a complex.");
272
273 if (fold_constrained) {
274 vrna_message_constraint_options(
275 VRNA_CONSTRAINT_DB_DOT | VRNA_CONSTRAINT_DB_X | VRNA_CONSTRAINT_DB_ANG_BRACK |
276 VRNA_CONSTRAINT_DB_RND_BRACK);
277 vrna_message_input_seq("Input sequence (upper or lower case) followed by structure constraint");
278 } else {
279 vrna_message_input_seq_simple();
280 }
281 }
282
283 /* set options we wanna pass to vrna_file_fasta_read_record() */
284 if (istty)
285 read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
286
287 if (!fold_constrained)
288 read_opt |= VRNA_INPUT_NO_REST;
289
290 /*
291 #############################################
292 # main loop: continue until end of file
293 #############################################
294 */
295 while (
296 !((rec_type = vrna_file_fasta_read_record(&rec_id, &rec_sequence, &rec_rest, input, read_opt))
297 & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))) {
298 char *SEQ_ID = NULL;
299 char *v_file_name = NULL;
300 char *tmp_string = NULL;
301 int maybe_multiline = 0;
302
303 /*
304 ########################################################
305 # init everything according to the data we've read
306 ########################################################
307 */
308 if (rec_id) {
309 maybe_multiline = 1;
310 /* remove '>' from FASTA header */
311 rec_id = memmove(rec_id, rec_id + 1, strlen(rec_id));
312 }
313
314 /* construct the sequence ID */
315 set_next_id(&rec_id, id_control);
316 SEQ_ID = fileprefix_from_id(rec_id, id_control, filename_full);
317
318 if (tofile) {
319 /* prepare the file name */
320 if (outfile)
321 v_file_name = vrna_strdup_printf("%s", outfile);
322 else
323 v_file_name = (SEQ_ID) ?
324 vrna_strdup_printf("%s.sub", SEQ_ID) :
325 vrna_strdup_printf("RNAsubopt_output.sub");
326
327 tmp_string = vrna_filename_sanitize(v_file_name, filename_delim);
328 free(v_file_name);
329 v_file_name = tmp_string;
330
331 if (infile && !strcmp(infile, v_file_name))
332 vrna_message_error("Input and output file names are identical");
333
334 output = fopen((const char *)v_file_name, "a");
335 if (!output)
336 vrna_message_error("Failed to open file for writing");
337 } else {
338 output = stdout;
339 }
340
341 /* convert DNA alphabet to RNA if not explicitely switched off */
342 if (!noconv)
343 vrna_seq_toRNA(rec_sequence);
344
345 /* store case-unmodified sequence */
346 orig_sequence = strdup(rec_sequence);
347 /* convert sequence to uppercase letters only */
348 vrna_seq_toupper(rec_sequence);
349
350 vrna_fold_compound_t *vc =
351 vrna_fold_compound(rec_sequence, &md,
352 VRNA_OPTION_MFE | (circ ? 0 : VRNA_OPTION_HYBRID) |
353 ((n_back > 0) ? VRNA_OPTION_PF : 0));
354 length = vc->length;
355
356 structure = (char *)vrna_alloc(sizeof(char) * (length + 1));
357
358 /* parse the rest of the current dataset to obtain a structure constraint */
359 if (fold_constrained) {
360 if (constraints_file) {
361 vrna_constraints_add(vc, constraints_file, VRNA_OPTION_DEFAULT);
362 } else {
363 cstruc = NULL;
364 int cp = -1;
365 unsigned int coptions = (maybe_multiline) ? VRNA_OPTION_MULTILINE : 0;
366 cstruc = vrna_extract_record_rest_structure((const char **)rec_rest, 0, coptions);
367 cstruc = vrna_cut_point_remove(cstruc, &cp);
368 if (vc->cutpoint != cp) {
369 vrna_message_error("Sequence and Structure have different cut points.\n"
370 "sequence: %d, structure: %d",
371 vc->cutpoint, cp);
372 }
373
374 cl = (cstruc) ? (int)strlen(cstruc) : 0;
375
376 if (cl == 0)
377 vrna_message_warning("Structure constraint is missing");
378 else if (cl < length)
379 vrna_message_warning("Structure constraint is shorter than sequence");
380 else if (cl > length)
381 vrna_message_error("Structure constraint is too long");
382
383 if (cstruc) {
384 /* convert pseudo-dot-bracket to actual hard constraints */
385 unsigned int constraint_options = VRNA_CONSTRAINT_DB_DEFAULT;
386
387 if (enforceConstraints)
388 constraint_options |= VRNA_CONSTRAINT_DB_ENFORCE_BP;
389
390 if (canonicalBPonly)
391 constraint_options |= VRNA_CONSTRAINT_DB_CANONICAL_BP;
392
393 vrna_constraints_add(vc, (const char *)cstruc, constraint_options);
394 }
395 }
396 }
397
398 if (with_shapes) {
399 vrna_constraints_add_SHAPE(vc,
400 shape_file,
401 shape_method,
402 shape_conversion,
403 verbose,
404 VRNA_OPTION_MFE | ((n_back > 0) ? VRNA_OPTION_PF : 0));
405 }
406
407 if (commands)
408 vrna_commands_apply(vc,
409 commands,
410 VRNA_CMD_PARSE_HC | VRNA_CMD_PARSE_SC);
411
412 if (istty) {
413 if (cut_point == -1) {
414 vrna_message_info(stdout, "length = %d", length);
415 } else {
416 vrna_message_info(stdout,
417 "length1 = %d\nlength2 = %d",
418 cut_point - 1,
419 length - cut_point + 1);
420 }
421 }
422
423 /*
424 ########################################################
425 # begin actual computations
426 ########################################################
427 */
428
429 if ((logML != 0 || md.dangles == 1 || md.dangles == 3) && dos == 0)
430 if (deltap <= 0)
431 deltap = delta / 100. + 0.001;
432
433 if (deltap > 0)
434 print_energy = deltap;
435
436 /* stochastic backtracking */
437 if (n_back > 0) {
438 double mfe, kT, ens_en;
439 unsigned int options = (nonRedundant) ?
440 VRNA_PBACKTRACK_NON_REDUNDANT :
441 VRNA_PBACKTRACK_DEFAULT;
442
443 if (vc->cutpoint != -1)
444 vrna_message_error("Boltzmann sampling for cofolded structures not implemented (yet)!");
445
446 print_fasta_header(output, rec_id);
447
448 fprintf(output, "%s\n", rec_sequence);
449
450 mfe = vrna_mfe(vc, structure);
451 /* rescale Boltzmann factors according to predicted MFE */
452 vrna_exp_params_rescale(vc, &mfe);
453
454 vrna_mx_mfe_free(vc);
455
456 /* ignore return value, we are not interested in the free energy */
457 ens_en = vrna_pf(vc, structure);
458 kT = vc->exp_params->kT / 1000.;
459
460 if (st_back_en) {
461 struct nr_en_data dat;
462 dat.output = output;
463 dat.fc = vc;
464 dat.kT = kT;
465 dat.ens_en = ens_en;
466
467 vrna_pbacktrack_cb(vc,
468 n_back,
469 &print_samples_en,
470 (void *)&dat,
471 options);
472 } else {
473 vrna_pbacktrack_cb(vc,
474 n_back,
475 &print_samples,
476 (void *)output,
477 options);
478 }
479 }
480 /* normal subopt */
481 else if (!zuker) {
482 /* first lines of output (suitable for sort +1n) */
483 if (rec_id) {
484 char *head = vrna_strdup_printf("%s [%d]", rec_id, delta);
485 print_fasta_header(output, head);
486 free(head);
487 }
488
489 vrna_subopt(vc, delta, subopt_sorted, output);
490
491 if (dos) {
492 int i;
493 for (i = 0; i <= MAXDOS && i <= delta / 10; i++) {
494 char *tline = vrna_strdup_printf("%4d %6d", i, density_of_states[i]);
495 print_table(output, NULL, tline);
496 free(tline);
497 }
498 }
499 }
500 /* Zuker suboptimals */
501 else {
502 vrna_subopt_solution_t *zr;
503
504 if (vc->cutpoint != -1)
505 vrna_message_error("Sorry, zuker subopts not yet implemented for cofold");
506
507 int i;
508 print_fasta_header(output, rec_id);
509
510 fprintf(output, "%s\n", rec_sequence);
511
512 zr = vrna_subopt_zuker(vc);
513
514 putoutzuker(output, zr);
515 (void)fflush(output);
516 for (i = 0; zr[i].structure; i++)
517 free(zr[i].structure);
518 free(zr);
519 }
520
521 (void)fflush(output);
522
523 /* clean up */
524 vrna_fold_compound_free(vc);
525
526 free(cstruc);
527
528 free(rec_id);
529 free(SEQ_ID);
530 free(rec_sequence);
531 free(orig_sequence);
532 free(structure);
533
534 /* free the rest of current dataset */
535 if (rec_rest) {
536 for (i = 0; rec_rest[i]; i++)
537 free(rec_rest[i]);
538 free(rec_rest);
539 }
540
541 rec_id = rec_sequence = orig_sequence = structure = cstruc = NULL;
542 rec_rest = NULL;
543
544 if (tofile && output) {
545 fclose(output);
546 output = NULL;
547 }
548
549 free(v_file_name);
550
551 if (with_shapes || (constraints_file && (!batch)))
552 break;
553
554 /* print user help for the next round if we get input from tty */
555 if (istty) {
556 if (!zuker)
557 print_comment(stdout, "Use '&' to connect 2 sequences that shall form a complex.");
558
559 if (fold_constrained) {
560 vrna_message_constraint_options(
561 VRNA_CONSTRAINT_DB_DOT | VRNA_CONSTRAINT_DB_X | VRNA_CONSTRAINT_DB_ANG_BRACK |
562 VRNA_CONSTRAINT_DB_RND_BRACK);
563 vrna_message_input_seq(
564 "Input sequence (upper or lower case) followed by structure constraint");
565 } else {
566 vrna_message_input_seq_simple();
567 }
568 }
569 }
570
571 if (infile && input)
572 fclose(input);
573
574 free(shape_method);
575 free(shape_conversion);
576 free(filename_delim);
577 vrna_commands_free(commands);
578
579 free_id_data(id_control);
580
581 return EXIT_SUCCESS;
582 }
583
584
585 PRIVATE void
print_samples(const char * structure,void * data)586 print_samples(const char *structure,
587 void *data)
588 {
589 if (structure)
590 print_structure((FILE *)data, structure, NULL);
591 }
592
593
594 PRIVATE void
print_samples_en(const char * structure,void * data)595 print_samples_en(const char *structure,
596 void *data)
597 {
598 if (structure) {
599 struct nr_en_data *d = (struct nr_en_data *)data;
600 FILE *output = d->output;
601 vrna_fold_compound_t *fc = d->fc;
602 double kT = d->kT;
603 double ens_en = d->ens_en;
604
605 double e = vrna_eval_structure(fc, structure);
606 double prob = exp((ens_en - e) / kT);
607 char *e_string = vrna_strdup_printf(" %6.2f %6g", e, prob);
608
609 print_structure(output, structure, e_string);
610
611 free(e_string);
612 }
613 }
614
615
616 PRIVATE void
putoutzuker(FILE * output,vrna_subopt_solution_t * zukersolution)617 putoutzuker(FILE *output,
618 vrna_subopt_solution_t *zukersolution)
619 {
620 int i;
621 char *e_string;
622
623 for (i = 0; zukersolution[i].structure; i++) {
624 e_string = vrna_strdup_printf(" [%6.2f]", zukersolution[i].energy);
625 print_structure(output, zukersolution[i].structure, e_string);
626 free(e_string);
627 }
628 return;
629 }
630