1 /*
2  *                  Distances of Secondary Structures
3  *         Walter Fontana, Ivo L Hofacker, Peter F Stadler
4  *                        Vienna RNA Package
5  */
6 
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <math.h>
14 #include <ctype.h>
15 #include <unistd.h>
16 #include <string.h>
17 #include "ViennaRNA/dist_vars.h"
18 #include "ViennaRNA/RNAstruct.h"
19 #include "ViennaRNA/treedist.h"
20 #include "ViennaRNA/stringdist.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/utils/strings.h"
23 #include "ViennaRNA/utils/structures.h"
24 #include "ViennaRNA/io/utils.h"
25 #include "ViennaRNA/datastructures/basic.h"
26 #include "RNAdistance_cmdl.h"
27 
28 #define MAXNUM      1000    /* max number of structs for distance matrix */
29 
30 #define PUBLIC
31 #define PRIVATE     static
32 
33 PRIVATE void command_line(int   argc,
34                           char  *argv[]);
35 
36 
37 PRIVATE int parse_input(char *line);
38 
39 
40 PRIVATE int check_tree(char *line,
41                        char alpha[]);
42 
43 
44 PRIVATE int check_brackets(char *line);
45 
46 
47 PRIVATE void print_aligned_lines(FILE *somewhere);
48 
49 
50 PRIVATE char  ruler[] = "....,....1....,....2....,....3....,....4"
51                         "....,....5....,....6....,....7....,....8";
52 PRIVATE int   types = 1;
53 PRIVATE int   task;
54 PRIVATE int   taxa_list;
55 PRIVATE char  outfile[FILENAME_MAX_LENGTH], *list_title;
56 
57 PRIVATE char  ttype[10] = "f";
58 PRIVATE int   n         = 0;
59 
60 int
main(int argc,char * argv[])61 main(int  argc,
62      char *argv[])
63 {
64   char      *line = NULL, *xstruc, *cc;
65   Tree      *T[10][MAXNUM];
66   int       tree_types = 0, ttree;
67   swString  *S[10][MAXNUM];
68   char      *P[MAXNUM];  /* structures for base pair distances */
69   int       string_types = 0, tstr;
70   int       i, j, tt, istty, type;
71   int       it, is;
72   FILE      *somewhere = NULL;
73 
74   command_line(argc, argv);
75 
76   if ((outfile[0] == '\0') && (task == 2) && (edit_backtrack))
77     strcpy(outfile, "backtrack.file");
78 
79   if (outfile[0] != '\0')
80     somewhere = fopen(outfile, "w");
81 
82   if (somewhere == NULL)
83     somewhere = stdout;
84 
85   istty = isatty(fileno(stdin)) && isatty(fileno(stdout));
86 
87   do {
88     if ((istty) && (n == 0)) {
89       printf("\nInput structure;  @ to quit\n");
90       printf("%s\n", ruler);
91     }
92 
93     do {
94       if (line != NULL)
95         free(line);
96 
97       line = vrna_read_line(stdin);
98     } while ((type = parse_input(line)) == 0);
99 
100     if (((type == 999) || (type == 888)) && (task == 2)) {
101       /* do matrices */
102       if (taxa_list)
103         printf("* END of taxa list\n");
104 
105       ttree = 0;
106       tstr  = 0;
107       for (tt = 0; tt < types; tt++) {
108         printf("> %c   %d\n", ttype[tt], n);
109         if (islower(ttype[tt])) {
110           for (i = 1; i < n; i++) {
111             for (j = 0; j < i; j++) {
112               printf("%g ", tree_edit_distance(T[ttree][i], T[ttree][j]));
113               if (edit_backtrack) {
114                 fprintf(somewhere, "%d %d", i + 1, j + 1);
115                 if (ttype[tt] == 'f')
116                   unexpand_aligned_F(aligned_line);
117 
118                 print_aligned_lines(somewhere);
119               }
120             }
121             printf("\n");
122           }
123           printf("\n");
124           for (i = 0; i < n; i++)
125             free_tree(T[ttree][i]);
126           ttree++;
127         }
128 
129         if (ttype[tt] == 'P') {
130           for (i = 1; i < n; i++) {
131             for (j = 0; j < i; j++)
132               printf("%g ", (float)vrna_bp_distance(P[i], P[j]));
133             printf("\n");
134           }
135           printf("\n");
136           for (i = 0; i < n; i++)
137             free(P[i]);
138         } else if (isupper(ttype[tt])) {
139           for (i = 1; i < n; i++) {
140             for (j = 0; j < i; j++) {
141               printf("%g ", string_edit_distance(S[tstr][i], S[tstr][j]));
142               if (edit_backtrack) {
143                 fprintf(somewhere, "%d %d", i + 1, j + 1);
144                 if (ttype[tt] == 'F')
145                   unexpand_aligned_F(aligned_line);
146 
147                 print_aligned_lines(somewhere);
148               }
149             }
150             printf("\n");
151           }
152           printf("\n");
153           for (i = 0; i < n; i++)
154             free(S[tstr][i]);
155           tstr++;
156         }
157       }
158       fflush(stdout);
159       if (type == 888) {
160         /* do another distance matrix */
161         n = 0;
162         printf("%s\n", list_title);
163         free(list_title);
164         continue;
165       }
166     }                       /* END do matrices */
167 
168     if (type == 999) {
169       /* finito */
170       if (outfile[0] != '\0')
171         fclose(somewhere);
172 
173       return 0;
174     }
175 
176     if (type < 0) {
177       xstruc = add_root(line);
178       free(line);
179       line  = xstruc;
180       type  = -type;
181     }
182 
183     if (type == 2) {
184       xstruc = unexpand_Full(line);
185       free(line);
186       line  = xstruc;
187       type  = 1;
188     }
189 
190     tree_types    = 0;
191     string_types  = 0;
192     for (tt = 0; tt < types; tt++) {
193       switch (ttype[tt]) {
194         case 'f':
195         case 'F':
196           if (type != 1)
197             vrna_message_error("Can't convert back to full structure");
198 
199           xstruc = expand_Full(line);
200           if (islower(ttype[tt])) /* tree_edit */
201             T[tree_types++][n] = make_tree(xstruc);
202 
203           if (isupper(ttype[tt])) /* string edit */
204             S[string_types++][n] = Make_swString(xstruc);
205 
206           free(xstruc);
207           break;
208         case 'P':
209           if (type != 1)
210             vrna_message_error("Can't convert back to full structure");
211 
212           P[n] = strdup(line);
213           break;
214         case 'h':
215         case 'H':
216           switch (type) {
217             case 1:
218               xstruc = b2HIT(line);
219               if (islower(ttype[tt]))
220                 T[tree_types++][n] = make_tree(xstruc);
221 
222               if (isupper(ttype[tt]))
223                 S[string_types++][n] = Make_swString(xstruc);
224 
225               free(xstruc);
226               break;
227             default:
228               vrna_message_error("Can't convert to HIT structure");
229           }
230           break;
231         case 'c':
232         case 'C':
233           switch (type) {
234             case 1:
235               cc      = b2C(line);
236               xstruc  = expand_Shapiro(cc);
237               free(cc);
238               break;
239             case 4:
240               cc      = expand_Shapiro(line);
241               xstruc  = unweight(cc);
242               free(cc);
243               break;
244             case 3:
245               xstruc = unweight(line);
246               break;
247             default:
248               vrna_message_error("Unknown structure representation");
249               exit(0);
250           }
251           if (islower(ttype[tt]))
252             T[tree_types++][n] = make_tree(xstruc);
253 
254           if (isupper(ttype[tt]))
255             S[string_types++][n] = Make_swString(xstruc);
256 
257           free(xstruc);
258           break;
259         case 'w':
260         case 'W':
261           if (type == 1) {
262             xstruc = b2Shapiro(line);
263             if (islower(ttype[tt]))
264               T[tree_types++][n] = make_tree(xstruc);
265 
266             if (isupper(ttype[tt]))
267               S[string_types++][n] = Make_swString(xstruc);
268 
269             free(xstruc);
270           } else {
271             if (islower(ttype[tt]))
272               T[tree_types++][n] = make_tree(line);
273 
274             if (isupper(ttype[tt]))
275               S[string_types++][n] = Make_swString(line);
276           }
277 
278           break;
279         default:
280           vrna_message_error("Unknown distance type");
281       }
282     }
283     n++;
284     switch (task) {
285       float dist;
286       case 1:
287         if (n == 2) {
288           for (it = 0, is = 0, i = 0; i < types; i++) {
289             if (islower(ttype[i])) {
290               dist = tree_edit_distance(T[it][0], T[it][1]);
291               free_tree(T[it][0]);
292               free_tree(T[it][1]);
293               it++;
294             } else if (ttype[i] == 'P') {
295               dist = (float)vrna_bp_distance(P[0], P[1]);
296               free(P[0]);
297               free(P[1]);
298             } else {
299               /* isupper(ttype[i]) */
300               dist = string_edit_distance(S[is][0], S[is][1]);
301               free(S[is][0]);
302               free(S[is][1]);
303               is++;
304             }
305 
306             printf("%c: %g  ", ttype[i], dist);
307             if ((edit_backtrack) && (ttype[i] != 'P')) {
308               if (ttype[i] == 'f')
309                 unexpand_aligned_F(aligned_line);
310 
311               print_aligned_lines(somewhere);
312             }
313           }
314           printf("\n");
315           n = 0;
316         }
317 
318         break;
319       case 3:
320         if (n > 1) {
321           for (it = 0, is = 0, i = 0; i < types; i++) {
322             if (islower(ttype[i])) {
323               dist = tree_edit_distance(T[it][1], T[it][0]);
324               free_tree(T[it][1]);
325               it++;
326             } else if (ttype[i] == 'P') {
327               dist = (float)vrna_bp_distance(P[0], P[1]);
328               free(P[1]);
329             } else {
330               /* if(isupper(ttype[i])) */
331               dist = string_edit_distance(S[is][0], S[is][1]);
332               free(S[is][1]);
333               is++;
334             }
335 
336             printf("%c: %g  ", ttype[i], dist);
337             if ((edit_backtrack) && (ttype[i] != 'P')) {
338               if (ttype[i] == 'f')
339                 unexpand_aligned_F(aligned_line);
340 
341               print_aligned_lines(somewhere);
342             }
343           }
344           printf("\n");
345           n = 1;
346         }
347 
348         break;
349       case 4:
350         if (n > 1) {
351           for (it = 0, is = 0, i = 0; i < types; i++) {
352             if (islower(ttype[i])) {
353               dist = tree_edit_distance(T[it][1], T[it][0]);
354               free_tree(T[it][0]);
355               T[it][0] = T[it][1];
356               it++;
357             } else if (ttype[i] == 'P') {
358               dist = (float)vrna_bp_distance(P[0], P[1]);
359               free(P[0]);
360               P[0] = P[1];
361             } else {
362               /* if(isupper(ttype[i])) */
363               dist = string_edit_distance(S[is][0], S[is][1]);
364               free(S[is][0]);
365               S[is][0] = S[is][1];
366               is++;
367             }
368 
369             printf("%c: %g  ", ttype[i], dist);
370             if ((edit_backtrack) && (ttype[i] != 'P')) {
371               if (ttype[i] == 'f')
372                 unexpand_aligned_F(aligned_line);
373 
374               print_aligned_lines(somewhere);
375             }
376           }
377           printf("\n");
378           n = 1;
379         }
380 
381         break;
382     }
383     fflush(stdout);
384   } while (type != 999);
385   return 0;
386 }
387 
388 
389 /*--------------------------------------------------------------------------*/
390 
391 PRIVATE int
parse_input(char * line)392 parse_input(char *line)
393 {
394   int   type, rooted = 0, i, xx;
395   char  *cp;
396 
397   if (line == NULL)
398     return 999;
399 
400   if (line[0] == '*') {
401     if (taxa_list == 0) {
402       if (task == 2)
403         taxa_list = 1;
404 
405       printf("%s\n", line);
406       return 0;
407     } else {
408       list_title = strdup(line);
409       return 888;
410     }
411   }
412 
413   if (line[0] == '>') {
414     if (taxa_list)
415       printf("%d :%s\n", n + 1, line + 1);
416     else
417       printf("%s\n", line);
418 
419     return 0;
420   }
421 
422   cp = strchr(line, ' ');
423   if (cp)
424     *cp = '\0';          /* get rid of junk at the end of line */
425 
426   switch (line[0]) {
427     case '.':
428       type = 1;
429       break;
430 
431     case '(':           /* it's a tree */
432       i       = 1;
433       xx      = 0;
434       type    = 4;      /* coarse */
435       rooted  = 0;
436       while (line[i]) {
437         if (line[i] == '.') {
438           type = 1;   /* Full */
439           break;
440         }
441 
442         if ((line[i] == 'U') || (line[i] == 'P')) {
443           type  = 2;  /* FFull */
444           xx    = 1;
445           break;
446         }
447 
448         if (line[i] == 'S') {
449           type  = 3;
450           xx    = 1;
451           break;      /* Shapiro tree */
452         }
453 
454         if ((line[i] != '(') && (line[i] != ')'))
455           xx = 1;
456 
457         i++;
458       }
459       if (!xx)
460         type = 1;
461 
462       rooted = (line[strlen(line) - 2] == 'R');
463       break;
464     case '@':
465       return 999;
466 
467     default:
468       return 0;
469   }
470 
471   switch (type) {
472     case 1:
473       if (check_brackets(line))
474         return 1;
475 
476       break;
477     case 2:
478       if (check_tree(line, "UP")) {
479         if (rooted)
480           return 2;
481         else
482           return -2;
483       }
484 
485       break;
486     case 3:
487       if (check_tree(line, "HBIMSE")) {
488         if (rooted)
489           return -3;
490         else
491           return -3;
492       }
493 
494       break;
495     case 4:
496       if (check_tree(line, "HBIM")) {
497         if (rooted)
498           return 4;
499         else
500           return -4;
501       }
502 
503       break;
504   }
505   return 0;
506 }
507 
508 
509 /*--------------------------------------------------------------------------*/
510 
511 
512 PRIVATE int
check_tree(char * line,char alpha[])513 check_tree(char *line,
514            char alpha[])
515 {
516   int   n, i, o;
517   char  *pos;
518 
519   n = (int)strlen(line);
520 
521   if (line[0] != '(')
522     return 0;
523 
524   i = o = 1;
525   while (line[i]) {
526     while (line[i] == '(') {
527       o++;
528       i++;
529     }
530     pos = strchr(alpha, (int)line[i]);
531     if (pos) {
532       while (isdigit((int)line[++i]));
533       if (line[i] != ')')
534         return 0;
535     }
536 
537     if (line[i] == 'R') {
538       i++;
539       if ((i != n - 1) || (line[i] != ')'))
540         return 0;
541     }
542 
543     if (line[i] == ')') {
544       o--;
545       if (o < 0)
546         return 0;
547 
548       if ((i < n) && (line[i + 1] == ')'))
549         return 0;
550 
551       i++;
552     } else {
553       return 0;
554     }
555   }
556   if (o > 0)
557     return 0;
558 
559   return 1;
560 }
561 
562 
563 /*--------------------------------------------------------------------------*/
564 
565 
566 PRIVATE int
check_brackets(char * line)567 check_brackets(char *line)
568 {
569   int i, o;
570 
571   i = o = 0;
572   while (line[i]) {
573     switch (line[i]) {
574       case '(':
575         o++;
576         i++;
577         break;
578       case '.':
579         i++;
580         break;
581       case ')':
582         i++;
583         o--;
584         if (o < 0)
585           return 0;
586 
587         break;
588       default:
589         return 0;
590     }
591   }
592   if (o > 0)
593     return 0;
594 
595   return 1;
596 }
597 
598 
599 /*--------------------------------------------------------------------------*/
600 
601 
602 PRIVATE void
command_line(int argc,char * argv[])603 command_line(int  argc,
604              char *argv[])
605 {
606   struct        RNAdistance_args_info args_info;
607 
608   /* default values */
609   edit_backtrack  = 0;
610   types           = 1;
611   ttype[0]        = 'f';
612   task            = 1;
613 
614   /*
615    #############################################
616    # check the command line parameters
617    #############################################
618    */
619   if (RNAdistance_cmdline_parser(argc, argv, &args_info) != 0)
620     exit(1);
621 
622   /* use specified distance representations */
623   if (args_info.distance_given) {
624     strncpy(ttype, args_info.distance_arg, 9);
625     types = (int)strlen(ttype);
626   }
627 
628   if (args_info.compare_given) {
629     switch (args_info.compare_arg[0]) {
630       case 'p':
631         task = 1;
632         break;
633       case 'm':
634         task = 2;
635         break;
636       case 'f':
637         task = 3;
638         break;
639       case 'c':
640         task = 4;
641         break;
642       default:
643         RNAdistance_cmdline_parser_print_help();
644         exit(EXIT_FAILURE);
645     }
646   }
647 
648   if (args_info.shapiro_given)
649     cost_matrix = 1;
650 
651   if (args_info.backtrack_given) {
652     if (strcmp(args_info.backtrack_arg, "none"))
653       strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH - 1);
654 
655     edit_backtrack = 1;
656   }
657 
658   /* free allocated memory of command line data structure */
659   RNAdistance_cmdline_parser_free(&args_info);
660 }
661 
662 
663 /*--------------------------------------------------------------------------*/
664 
665 PRIVATE void
print_aligned_lines(FILE * somewhere)666 print_aligned_lines(FILE *somewhere)
667 {
668   if (edit_backtrack) {
669     fprintf(somewhere, "\n%s\n%s\n", aligned_line[0], aligned_line[1]);
670     fflush(somewhere);
671   }
672 }
673