1 /*
2  *              Distances of Secondary Structure Ensembles
3  *        Peter F Stadler, Ivo L Hofacker, Sebastian Bonhoeffer
4  *                      Vienna RNA Package
5  */
6 
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <math.h>
14 #include <ctype.h>
15 #include <errno.h>
16 #include <string.h>
17 #include <unistd.h>
18 #include "ViennaRNA/part_func.h"
19 #include "ViennaRNA/fold_vars.h"
20 #include "ViennaRNA/dist_vars.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/utils/strings.h"
23 #include "ViennaRNA/plotting/probabilities.h"
24 #include "ViennaRNA/io/utils.h"
25 #include "ViennaRNA/params/io.h"
26 #include "ViennaRNA/profiledist.h"
27 #include "RNApdist_cmdl.h"
28 
29 
30 #define MAXLENGTH  10000
31 #define MAXSEQ      1000
32 
33 PRIVATE void command_line(int       argc,
34                           char      *argv[],
35                           vrna_md_t *md);
36 
37 
38 PRIVATE void print_aligned_lines(FILE *somewhere);
39 
40 
41 PRIVATE char  task;
42 PRIVATE char  outfile[FILENAME_MAX_LENGTH];
43 PRIVATE char  ruler[] = "....,....1....,....2....,....3....,....4"
44                         "....,....5....,....6....,....7....,....8";
45 static int    noconv = 0;
46 
47 int
main(int argc,char * argv[])48 main(int  argc,
49      char *argv[])
50 
51 {
52   float     *T[MAXSEQ];
53   int       i, j, istty, n = 0;
54   int       type, taxa_list = 0;
55   float     dist;
56   FILE      *somewhere = NULL;
57   char      *structure;
58   char      *line = NULL, fname[FILENAME_MAX_LENGTH], *list_title = NULL;
59   plist     *pr_pl, *mfe_pl;
60   vrna_md_t md;
61 
62   /* assign globally stored model details */
63   set_model_details(&md);
64 
65   pr_pl = mfe_pl = NULL;
66 
67   command_line(argc, argv, &md);
68 
69   if ((outfile[0] == '\0') && (task == 'm') && edit_backtrack)
70     strcpy(outfile, "backtrack.file");
71 
72   if (outfile[0] != '\0')
73     somewhere = fopen(outfile, "w");
74 
75   if (somewhere == NULL)
76     somewhere = stdout;
77 
78   istty = (isatty(fileno(stdout)) && isatty(fileno(stdin)));
79 
80   while (1) {
81     if ((istty) && (n == 0)) {
82       printf("\nInput sequence;  @ to quit\n");
83       printf("%s\n", ruler);
84     }
85 
86     type = 0; /* what is this type anyway? */
87     do {
88       /* get sequence to fold */
89       *fname = '\0';
90 
91       if (line != NULL)
92         free(line);
93 
94       /* end do...while() loop if no more input */
95       if ((line = vrna_read_line(stdin)) == NULL) {
96         type = 999;
97         break;
98       }
99 
100       /* end do...while() loop if user requested abort */
101       if (line[0] == '@')
102         type = 999;
103 
104       if (line[0] == '*') {
105         if (taxa_list == 0) {
106           if (task == 'm')
107             taxa_list = 1;
108 
109           printf("%s\n", line);
110           type = 0;
111         } else {
112           list_title  = strdup(line);
113           type        = 888;
114         }
115       }
116 
117       /* we currently read a fasta header */
118       if (line[0] == '>') {
119         if (sscanf(line, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname) != 0)
120           strcat(fname, "_dp.ps");
121 
122         if (taxa_list)
123           printf("%d : %s\n", n + 1, line + 1);
124         else
125           printf("%s\n", line);
126 
127         type = 0;
128       }
129 
130       /* this seems to be a crude way to identify an RNA sequence */
131       if (isalpha(line[0])) {
132         char *cp;
133         cp = strchr(line, ' ');
134         if (cp)
135           *cp = '\0';
136 
137         type = 1;
138       }
139     } while (type == 0);
140 
141     if ((task == 'm') && (type > 800)) {
142       if (taxa_list)
143         printf("* END of taxa list\n");
144 
145       printf("> p %d (pdist)\n", n);
146       for (i = 1; i < n; i++) {
147         for (j = 0; j < i; j++) {
148           printf("%g ", profile_edit_distance(T[i], T[j]));
149           if (edit_backtrack)
150             fprintf(somewhere, "> %d %d\n", i + 1, j + 1);
151 
152           print_aligned_lines(somewhere);
153         }
154         printf("\n");
155       }
156       if (type == 888) {
157         /* do another distance matrix */
158         n = 0;
159         printf("%s\n", list_title);
160         free(list_title);
161       }
162     }
163 
164     if (type > 800) {
165       for (i = 0; i < n; i++)
166         free_profile(T[i]);
167       if (type == 888)
168         continue;
169 
170       if (outfile[0] != '\0')
171         (void)fclose(somewhere);
172 
173       if (line != NULL)
174         free(line);
175 
176       return 0; /* finito */
177     }
178 
179     /* convert DNA alphabet to RNA if not explicitely switched off */
180     if (!noconv)
181       vrna_seq_toRNA(line);
182 
183     /* convert sequence to uppercase letters only */
184     vrna_seq_toupper(line);
185 
186     if (*fname == '\0')
187       sprintf(fname, "%d_dp.ps", n + 1);
188 
189     vrna_fold_compound_t *vc = vrna_fold_compound(line, &md, VRNA_OPTION_MFE | VRNA_OPTION_PF);
190 
191     structure = (char *)vrna_alloc((vc->length + 1) * sizeof(char));
192 
193     (void)vrna_pf(vc, structure);
194 
195     pr_pl = vrna_plist_from_probs(vc, 1e-5);
196     /* fake plist for lower part, since it stays empty */
197     mfe_pl      = (plist *)vrna_alloc(sizeof(plist));
198     mfe_pl[0].i = mfe_pl[0].j = 0;
199 
200     /* call threadsafe dot plot printing function */
201     PS_dot_plot_list(line, fname, pr_pl, mfe_pl, "");
202 
203     T[n] = Make_bp_profile_bppm(vc->exp_matrices->probs, vc->length);
204 
205     if ((istty) && (task == 'm'))
206       printf("%s\n", structure);
207 
208     free(structure);
209     free(mfe_pl);
210     free(pr_pl);
211     vrna_fold_compound_free(vc);
212 
213     n++;
214     switch (task) {
215       case 'p':
216         if (n == 2) {
217           dist = profile_edit_distance(T[0], T[1]);
218           printf("%g\n", dist);
219           print_aligned_lines(somewhere);
220           free_profile(T[0]);
221           free_profile(T[1]);
222           n = 0;
223         }
224 
225         break;
226       case 'f':
227         if (n > 1) {
228           dist = profile_edit_distance(T[1], T[0]);
229           printf("%g\n", dist);
230           print_aligned_lines(somewhere);
231           free_profile(T[1]);
232           n = 1;
233         }
234 
235         break;
236       case 'c':
237         if (n > 1) {
238           dist = profile_edit_distance(T[1], T[0]);
239           printf("%g\n", dist);
240           print_aligned_lines(somewhere);
241           free_profile(T[0]);
242           T[0]  = T[1];
243           n     = 1;
244         }
245 
246         break;
247 
248       case 'm':
249         break;
250 
251       default:
252         vrna_message_error("This can't happen.");
253     }   /* END switch task */
254     (void)fflush(stdout);
255   }     /* END while */
256   if (line != NULL)
257     free(line);
258 
259   return 0;
260 }
261 
262 
263 /* ----------------------------------------------------------------- */
264 
265 PRIVATE void
command_line(int argc,char * argv[],vrna_md_t * md)266 command_line(int        argc,
267              char       *argv[],
268              vrna_md_t  *md)
269 {
270   char                        *ns_bases   = NULL;
271   char                        *ParamFile  = NULL;
272   struct  RNApdist_args_info  args_info;
273 
274   task = 'p';
275 
276   /*
277    #############################################
278    # check the command line parameters
279    #############################################
280    */
281   if (RNApdist_cmdline_parser(argc, argv, &args_info) != 0)
282     exit(1);
283 
284   /* temperature */
285   if (args_info.temp_given)
286     md->temperature = temperature = args_info.temp_arg;
287 
288   /* do not take special tetra loop energies into account */
289   if (args_info.noTetra_given)
290     md->special_hp = tetra_loop = 0;
291 
292   /* set dangle model */
293   if (args_info.dangles_given) {
294     md->dangles = dangles = args_info.dangles_arg;
295     if (dangles)
296       md->dangles = dangles = 2;
297   }
298 
299   /* set energy model */
300   if (args_info.energyModel_given)
301     md->energy_set = energy_set = args_info.energyModel_arg;
302 
303   /* do not allow weak pairs */
304   if (args_info.noLP_given)
305     md->noLP = noLonelyPairs = 1;
306 
307   /* do not allow wobble pairs (GU) */
308   if (args_info.noGU_given)
309     md->noGU = noGU = 1;
310 
311   /* do not allow weak closing pairs (AU,GU) */
312   if (args_info.noClosingGU_given)
313     md->noGUclosure = no_closingGU = 1;
314 
315   /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
316   if (args_info.noconv_given)
317     noconv = 1;
318 
319   /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
320   if (args_info.nsp_given)
321     ns_bases = strdup(args_info.nsp_arg);
322 
323   /* take another energy parameter set */
324   if (args_info.paramFile_given)
325     ParamFile = strdup(args_info.paramFile_arg);
326 
327   if (args_info.compare_given) {
328     switch (args_info.compare_arg[0]) {
329       case 'p':
330       case 'm':
331       case 'f':
332       case 'c':
333         task = args_info.compare_arg[0];
334         break;
335       default:
336         RNApdist_cmdline_parser_print_help();
337         exit(EXIT_FAILURE);
338     }
339   }
340 
341   if (args_info.backtrack_given) {
342     if (strcmp(args_info.backtrack_arg, "none"))
343       strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH - 1);
344 
345     edit_backtrack = 1;
346   }
347 
348   /* free allocated memory of command line data structure */
349   RNApdist_cmdline_parser_free(&args_info);
350 
351   /* do some preparations */
352   if (ParamFile != NULL) {
353     if (!strcmp(ParamFile, "DNA"))
354         vrna_params_load_DNA_Mathews2004();
355     else
356       vrna_params_load(ParamFile, VRNA_PARAMETER_FORMAT_DEFAULT);
357   }
358 
359   if (ns_bases != NULL)
360     vrna_md_set_nonstandards(md, ns_bases);
361 }
362 
363 
364 /* ------------------------------------------------------------------------- */
365 
366 PRIVATE void
print_aligned_lines(FILE * somewhere)367 print_aligned_lines(FILE *somewhere)
368 {
369   if (edit_backtrack)
370     fprintf(somewhere, "%s\n%s\n", aligned_line[0], aligned_line[1]);
371 }
372 
373 
374 /*--------------------------------------------------------------------------*/
375