1 /*
2 * Distances of Secondary Structure Ensembles
3 * Peter F Stadler, Ivo L Hofacker, Sebastian Bonhoeffer
4 * Vienna RNA Package
5 */
6
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <math.h>
14 #include <ctype.h>
15 #include <errno.h>
16 #include <string.h>
17 #include <unistd.h>
18 #include "ViennaRNA/part_func.h"
19 #include "ViennaRNA/fold_vars.h"
20 #include "ViennaRNA/dist_vars.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/utils/strings.h"
23 #include "ViennaRNA/plotting/probabilities.h"
24 #include "ViennaRNA/io/utils.h"
25 #include "ViennaRNA/params/io.h"
26 #include "ViennaRNA/profiledist.h"
27 #include "RNApdist_cmdl.h"
28
29
30 #define MAXLENGTH 10000
31 #define MAXSEQ 1000
32
33 PRIVATE void command_line(int argc,
34 char *argv[],
35 vrna_md_t *md);
36
37
38 PRIVATE void print_aligned_lines(FILE *somewhere);
39
40
41 PRIVATE char task;
42 PRIVATE char outfile[FILENAME_MAX_LENGTH];
43 PRIVATE char ruler[] = "....,....1....,....2....,....3....,....4"
44 "....,....5....,....6....,....7....,....8";
45 static int noconv = 0;
46
47 int
main(int argc,char * argv[])48 main(int argc,
49 char *argv[])
50
51 {
52 float *T[MAXSEQ];
53 int i, j, istty, n = 0;
54 int type, taxa_list = 0;
55 float dist;
56 FILE *somewhere = NULL;
57 char *structure;
58 char *line = NULL, fname[FILENAME_MAX_LENGTH], *list_title = NULL;
59 plist *pr_pl, *mfe_pl;
60 vrna_md_t md;
61
62 /* assign globally stored model details */
63 set_model_details(&md);
64
65 pr_pl = mfe_pl = NULL;
66
67 command_line(argc, argv, &md);
68
69 if ((outfile[0] == '\0') && (task == 'm') && edit_backtrack)
70 strcpy(outfile, "backtrack.file");
71
72 if (outfile[0] != '\0')
73 somewhere = fopen(outfile, "w");
74
75 if (somewhere == NULL)
76 somewhere = stdout;
77
78 istty = (isatty(fileno(stdout)) && isatty(fileno(stdin)));
79
80 while (1) {
81 if ((istty) && (n == 0)) {
82 printf("\nInput sequence; @ to quit\n");
83 printf("%s\n", ruler);
84 }
85
86 type = 0; /* what is this type anyway? */
87 do {
88 /* get sequence to fold */
89 *fname = '\0';
90
91 if (line != NULL)
92 free(line);
93
94 /* end do...while() loop if no more input */
95 if ((line = vrna_read_line(stdin)) == NULL) {
96 type = 999;
97 break;
98 }
99
100 /* end do...while() loop if user requested abort */
101 if (line[0] == '@')
102 type = 999;
103
104 if (line[0] == '*') {
105 if (taxa_list == 0) {
106 if (task == 'm')
107 taxa_list = 1;
108
109 printf("%s\n", line);
110 type = 0;
111 } else {
112 list_title = strdup(line);
113 type = 888;
114 }
115 }
116
117 /* we currently read a fasta header */
118 if (line[0] == '>') {
119 if (sscanf(line, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname) != 0)
120 strcat(fname, "_dp.ps");
121
122 if (taxa_list)
123 printf("%d : %s\n", n + 1, line + 1);
124 else
125 printf("%s\n", line);
126
127 type = 0;
128 }
129
130 /* this seems to be a crude way to identify an RNA sequence */
131 if (isalpha(line[0])) {
132 char *cp;
133 cp = strchr(line, ' ');
134 if (cp)
135 *cp = '\0';
136
137 type = 1;
138 }
139 } while (type == 0);
140
141 if ((task == 'm') && (type > 800)) {
142 if (taxa_list)
143 printf("* END of taxa list\n");
144
145 printf("> p %d (pdist)\n", n);
146 for (i = 1; i < n; i++) {
147 for (j = 0; j < i; j++) {
148 printf("%g ", profile_edit_distance(T[i], T[j]));
149 if (edit_backtrack)
150 fprintf(somewhere, "> %d %d\n", i + 1, j + 1);
151
152 print_aligned_lines(somewhere);
153 }
154 printf("\n");
155 }
156 if (type == 888) {
157 /* do another distance matrix */
158 n = 0;
159 printf("%s\n", list_title);
160 free(list_title);
161 }
162 }
163
164 if (type > 800) {
165 for (i = 0; i < n; i++)
166 free_profile(T[i]);
167 if (type == 888)
168 continue;
169
170 if (outfile[0] != '\0')
171 (void)fclose(somewhere);
172
173 if (line != NULL)
174 free(line);
175
176 return 0; /* finito */
177 }
178
179 /* convert DNA alphabet to RNA if not explicitely switched off */
180 if (!noconv)
181 vrna_seq_toRNA(line);
182
183 /* convert sequence to uppercase letters only */
184 vrna_seq_toupper(line);
185
186 if (*fname == '\0')
187 sprintf(fname, "%d_dp.ps", n + 1);
188
189 vrna_fold_compound_t *vc = vrna_fold_compound(line, &md, VRNA_OPTION_MFE | VRNA_OPTION_PF);
190
191 structure = (char *)vrna_alloc((vc->length + 1) * sizeof(char));
192
193 (void)vrna_pf(vc, structure);
194
195 pr_pl = vrna_plist_from_probs(vc, 1e-5);
196 /* fake plist for lower part, since it stays empty */
197 mfe_pl = (plist *)vrna_alloc(sizeof(plist));
198 mfe_pl[0].i = mfe_pl[0].j = 0;
199
200 /* call threadsafe dot plot printing function */
201 PS_dot_plot_list(line, fname, pr_pl, mfe_pl, "");
202
203 T[n] = Make_bp_profile_bppm(vc->exp_matrices->probs, vc->length);
204
205 if ((istty) && (task == 'm'))
206 printf("%s\n", structure);
207
208 free(structure);
209 free(mfe_pl);
210 free(pr_pl);
211 vrna_fold_compound_free(vc);
212
213 n++;
214 switch (task) {
215 case 'p':
216 if (n == 2) {
217 dist = profile_edit_distance(T[0], T[1]);
218 printf("%g\n", dist);
219 print_aligned_lines(somewhere);
220 free_profile(T[0]);
221 free_profile(T[1]);
222 n = 0;
223 }
224
225 break;
226 case 'f':
227 if (n > 1) {
228 dist = profile_edit_distance(T[1], T[0]);
229 printf("%g\n", dist);
230 print_aligned_lines(somewhere);
231 free_profile(T[1]);
232 n = 1;
233 }
234
235 break;
236 case 'c':
237 if (n > 1) {
238 dist = profile_edit_distance(T[1], T[0]);
239 printf("%g\n", dist);
240 print_aligned_lines(somewhere);
241 free_profile(T[0]);
242 T[0] = T[1];
243 n = 1;
244 }
245
246 break;
247
248 case 'm':
249 break;
250
251 default:
252 vrna_message_error("This can't happen.");
253 } /* END switch task */
254 (void)fflush(stdout);
255 } /* END while */
256 if (line != NULL)
257 free(line);
258
259 return 0;
260 }
261
262
263 /* ----------------------------------------------------------------- */
264
265 PRIVATE void
command_line(int argc,char * argv[],vrna_md_t * md)266 command_line(int argc,
267 char *argv[],
268 vrna_md_t *md)
269 {
270 char *ns_bases = NULL;
271 char *ParamFile = NULL;
272 struct RNApdist_args_info args_info;
273
274 task = 'p';
275
276 /*
277 #############################################
278 # check the command line parameters
279 #############################################
280 */
281 if (RNApdist_cmdline_parser(argc, argv, &args_info) != 0)
282 exit(1);
283
284 /* temperature */
285 if (args_info.temp_given)
286 md->temperature = temperature = args_info.temp_arg;
287
288 /* do not take special tetra loop energies into account */
289 if (args_info.noTetra_given)
290 md->special_hp = tetra_loop = 0;
291
292 /* set dangle model */
293 if (args_info.dangles_given) {
294 md->dangles = dangles = args_info.dangles_arg;
295 if (dangles)
296 md->dangles = dangles = 2;
297 }
298
299 /* set energy model */
300 if (args_info.energyModel_given)
301 md->energy_set = energy_set = args_info.energyModel_arg;
302
303 /* do not allow weak pairs */
304 if (args_info.noLP_given)
305 md->noLP = noLonelyPairs = 1;
306
307 /* do not allow wobble pairs (GU) */
308 if (args_info.noGU_given)
309 md->noGU = noGU = 1;
310
311 /* do not allow weak closing pairs (AU,GU) */
312 if (args_info.noClosingGU_given)
313 md->noGUclosure = no_closingGU = 1;
314
315 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
316 if (args_info.noconv_given)
317 noconv = 1;
318
319 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
320 if (args_info.nsp_given)
321 ns_bases = strdup(args_info.nsp_arg);
322
323 /* take another energy parameter set */
324 if (args_info.paramFile_given)
325 ParamFile = strdup(args_info.paramFile_arg);
326
327 if (args_info.compare_given) {
328 switch (args_info.compare_arg[0]) {
329 case 'p':
330 case 'm':
331 case 'f':
332 case 'c':
333 task = args_info.compare_arg[0];
334 break;
335 default:
336 RNApdist_cmdline_parser_print_help();
337 exit(EXIT_FAILURE);
338 }
339 }
340
341 if (args_info.backtrack_given) {
342 if (strcmp(args_info.backtrack_arg, "none"))
343 strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH - 1);
344
345 edit_backtrack = 1;
346 }
347
348 /* free allocated memory of command line data structure */
349 RNApdist_cmdline_parser_free(&args_info);
350
351 /* do some preparations */
352 if (ParamFile != NULL) {
353 if (!strcmp(ParamFile, "DNA"))
354 vrna_params_load_DNA_Mathews2004();
355 else
356 vrna_params_load(ParamFile, VRNA_PARAMETER_FORMAT_DEFAULT);
357 }
358
359 if (ns_bases != NULL)
360 vrna_md_set_nonstandards(md, ns_bases);
361 }
362
363
364 /* ------------------------------------------------------------------------- */
365
366 PRIVATE void
print_aligned_lines(FILE * somewhere)367 print_aligned_lines(FILE *somewhere)
368 {
369 if (edit_backtrack)
370 fprintf(somewhere, "%s\n%s\n", aligned_line[0], aligned_line[1]);
371 }
372
373
374 /*--------------------------------------------------------------------------*/
375