1 /*
2 * Distances of Secondary Structures
3 * Walter Fontana, Ivo L Hofacker, Peter F Stadler
4 * Vienna RNA Package
5 */
6
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <math.h>
14 #include <ctype.h>
15 #include <unistd.h>
16 #include <string.h>
17 #include "ViennaRNA/dist_vars.h"
18 #include "ViennaRNA/RNAstruct.h"
19 #include "ViennaRNA/treedist.h"
20 #include "ViennaRNA/stringdist.h"
21 #include "ViennaRNA/utils/basic.h"
22 #include "ViennaRNA/utils/strings.h"
23 #include "ViennaRNA/utils/structures.h"
24 #include "ViennaRNA/io/utils.h"
25 #include "ViennaRNA/datastructures/basic.h"
26 #include "RNAdistance_cmdl.h"
27
28 #define MAXNUM 1000 /* max number of structs for distance matrix */
29
30 #define PUBLIC
31 #define PRIVATE static
32
33 PRIVATE void command_line(int argc,
34 char *argv[]);
35
36
37 PRIVATE int parse_input(char *line);
38
39
40 PRIVATE int check_tree(char *line,
41 char alpha[]);
42
43
44 PRIVATE int check_brackets(char *line);
45
46
47 PRIVATE void print_aligned_lines(FILE *somewhere);
48
49
50 PRIVATE char ruler[] = "....,....1....,....2....,....3....,....4"
51 "....,....5....,....6....,....7....,....8";
52 PRIVATE int types = 1;
53 PRIVATE int task;
54 PRIVATE int taxa_list;
55 PRIVATE char outfile[FILENAME_MAX_LENGTH], *list_title;
56
57 PRIVATE char ttype[10] = "f";
58 PRIVATE int n = 0;
59
60 int
main(int argc,char * argv[])61 main(int argc,
62 char *argv[])
63 {
64 char *line = NULL, *xstruc, *cc;
65 Tree *T[10][MAXNUM];
66 int tree_types = 0, ttree;
67 swString *S[10][MAXNUM];
68 char *P[MAXNUM]; /* structures for base pair distances */
69 int string_types = 0, tstr;
70 int i, j, tt, istty, type;
71 int it, is;
72 FILE *somewhere = NULL;
73
74 command_line(argc, argv);
75
76 if ((outfile[0] == '\0') && (task == 2) && (edit_backtrack))
77 strcpy(outfile, "backtrack.file");
78
79 if (outfile[0] != '\0')
80 somewhere = fopen(outfile, "w");
81
82 if (somewhere == NULL)
83 somewhere = stdout;
84
85 istty = isatty(fileno(stdin)) && isatty(fileno(stdout));
86
87 do {
88 if ((istty) && (n == 0)) {
89 printf("\nInput structure; @ to quit\n");
90 printf("%s\n", ruler);
91 }
92
93 do {
94 if (line != NULL)
95 free(line);
96
97 line = vrna_read_line(stdin);
98 } while ((type = parse_input(line)) == 0);
99
100 if (((type == 999) || (type == 888)) && (task == 2)) {
101 /* do matrices */
102 if (taxa_list)
103 printf("* END of taxa list\n");
104
105 ttree = 0;
106 tstr = 0;
107 for (tt = 0; tt < types; tt++) {
108 printf("> %c %d\n", ttype[tt], n);
109 if (islower(ttype[tt])) {
110 for (i = 1; i < n; i++) {
111 for (j = 0; j < i; j++) {
112 printf("%g ", tree_edit_distance(T[ttree][i], T[ttree][j]));
113 if (edit_backtrack) {
114 fprintf(somewhere, "%d %d", i + 1, j + 1);
115 if (ttype[tt] == 'f')
116 unexpand_aligned_F(aligned_line);
117
118 print_aligned_lines(somewhere);
119 }
120 }
121 printf("\n");
122 }
123 printf("\n");
124 for (i = 0; i < n; i++)
125 free_tree(T[ttree][i]);
126 ttree++;
127 }
128
129 if (ttype[tt] == 'P') {
130 for (i = 1; i < n; i++) {
131 for (j = 0; j < i; j++)
132 printf("%g ", (float)vrna_bp_distance(P[i], P[j]));
133 printf("\n");
134 }
135 printf("\n");
136 for (i = 0; i < n; i++)
137 free(P[i]);
138 } else if (isupper(ttype[tt])) {
139 for (i = 1; i < n; i++) {
140 for (j = 0; j < i; j++) {
141 printf("%g ", string_edit_distance(S[tstr][i], S[tstr][j]));
142 if (edit_backtrack) {
143 fprintf(somewhere, "%d %d", i + 1, j + 1);
144 if (ttype[tt] == 'F')
145 unexpand_aligned_F(aligned_line);
146
147 print_aligned_lines(somewhere);
148 }
149 }
150 printf("\n");
151 }
152 printf("\n");
153 for (i = 0; i < n; i++)
154 free(S[tstr][i]);
155 tstr++;
156 }
157 }
158 fflush(stdout);
159 if (type == 888) {
160 /* do another distance matrix */
161 n = 0;
162 printf("%s\n", list_title);
163 free(list_title);
164 continue;
165 }
166 } /* END do matrices */
167
168 if (type == 999) {
169 /* finito */
170 if (outfile[0] != '\0')
171 fclose(somewhere);
172
173 return 0;
174 }
175
176 if (type < 0) {
177 xstruc = add_root(line);
178 free(line);
179 line = xstruc;
180 type = -type;
181 }
182
183 if (type == 2) {
184 xstruc = unexpand_Full(line);
185 free(line);
186 line = xstruc;
187 type = 1;
188 }
189
190 tree_types = 0;
191 string_types = 0;
192 for (tt = 0; tt < types; tt++) {
193 switch (ttype[tt]) {
194 case 'f':
195 case 'F':
196 if (type != 1)
197 vrna_message_error("Can't convert back to full structure");
198
199 xstruc = expand_Full(line);
200 if (islower(ttype[tt])) /* tree_edit */
201 T[tree_types++][n] = make_tree(xstruc);
202
203 if (isupper(ttype[tt])) /* string edit */
204 S[string_types++][n] = Make_swString(xstruc);
205
206 free(xstruc);
207 break;
208 case 'P':
209 if (type != 1)
210 vrna_message_error("Can't convert back to full structure");
211
212 P[n] = strdup(line);
213 break;
214 case 'h':
215 case 'H':
216 switch (type) {
217 case 1:
218 xstruc = b2HIT(line);
219 if (islower(ttype[tt]))
220 T[tree_types++][n] = make_tree(xstruc);
221
222 if (isupper(ttype[tt]))
223 S[string_types++][n] = Make_swString(xstruc);
224
225 free(xstruc);
226 break;
227 default:
228 vrna_message_error("Can't convert to HIT structure");
229 }
230 break;
231 case 'c':
232 case 'C':
233 switch (type) {
234 case 1:
235 cc = b2C(line);
236 xstruc = expand_Shapiro(cc);
237 free(cc);
238 break;
239 case 4:
240 cc = expand_Shapiro(line);
241 xstruc = unweight(cc);
242 free(cc);
243 break;
244 case 3:
245 xstruc = unweight(line);
246 break;
247 default:
248 vrna_message_error("Unknown structure representation");
249 exit(0);
250 }
251 if (islower(ttype[tt]))
252 T[tree_types++][n] = make_tree(xstruc);
253
254 if (isupper(ttype[tt]))
255 S[string_types++][n] = Make_swString(xstruc);
256
257 free(xstruc);
258 break;
259 case 'w':
260 case 'W':
261 if (type == 1) {
262 xstruc = b2Shapiro(line);
263 if (islower(ttype[tt]))
264 T[tree_types++][n] = make_tree(xstruc);
265
266 if (isupper(ttype[tt]))
267 S[string_types++][n] = Make_swString(xstruc);
268
269 free(xstruc);
270 } else {
271 if (islower(ttype[tt]))
272 T[tree_types++][n] = make_tree(line);
273
274 if (isupper(ttype[tt]))
275 S[string_types++][n] = Make_swString(line);
276 }
277
278 break;
279 default:
280 vrna_message_error("Unknown distance type");
281 }
282 }
283 n++;
284 switch (task) {
285 float dist;
286 case 1:
287 if (n == 2) {
288 for (it = 0, is = 0, i = 0; i < types; i++) {
289 if (islower(ttype[i])) {
290 dist = tree_edit_distance(T[it][0], T[it][1]);
291 free_tree(T[it][0]);
292 free_tree(T[it][1]);
293 it++;
294 } else if (ttype[i] == 'P') {
295 dist = (float)vrna_bp_distance(P[0], P[1]);
296 free(P[0]);
297 free(P[1]);
298 } else {
299 /* isupper(ttype[i]) */
300 dist = string_edit_distance(S[is][0], S[is][1]);
301 free(S[is][0]);
302 free(S[is][1]);
303 is++;
304 }
305
306 printf("%c: %g ", ttype[i], dist);
307 if ((edit_backtrack) && (ttype[i] != 'P')) {
308 if (ttype[i] == 'f')
309 unexpand_aligned_F(aligned_line);
310
311 print_aligned_lines(somewhere);
312 }
313 }
314 printf("\n");
315 n = 0;
316 }
317
318 break;
319 case 3:
320 if (n > 1) {
321 for (it = 0, is = 0, i = 0; i < types; i++) {
322 if (islower(ttype[i])) {
323 dist = tree_edit_distance(T[it][1], T[it][0]);
324 free_tree(T[it][1]);
325 it++;
326 } else if (ttype[i] == 'P') {
327 dist = (float)vrna_bp_distance(P[0], P[1]);
328 free(P[1]);
329 } else {
330 /* if(isupper(ttype[i])) */
331 dist = string_edit_distance(S[is][0], S[is][1]);
332 free(S[is][1]);
333 is++;
334 }
335
336 printf("%c: %g ", ttype[i], dist);
337 if ((edit_backtrack) && (ttype[i] != 'P')) {
338 if (ttype[i] == 'f')
339 unexpand_aligned_F(aligned_line);
340
341 print_aligned_lines(somewhere);
342 }
343 }
344 printf("\n");
345 n = 1;
346 }
347
348 break;
349 case 4:
350 if (n > 1) {
351 for (it = 0, is = 0, i = 0; i < types; i++) {
352 if (islower(ttype[i])) {
353 dist = tree_edit_distance(T[it][1], T[it][0]);
354 free_tree(T[it][0]);
355 T[it][0] = T[it][1];
356 it++;
357 } else if (ttype[i] == 'P') {
358 dist = (float)vrna_bp_distance(P[0], P[1]);
359 free(P[0]);
360 P[0] = P[1];
361 } else {
362 /* if(isupper(ttype[i])) */
363 dist = string_edit_distance(S[is][0], S[is][1]);
364 free(S[is][0]);
365 S[is][0] = S[is][1];
366 is++;
367 }
368
369 printf("%c: %g ", ttype[i], dist);
370 if ((edit_backtrack) && (ttype[i] != 'P')) {
371 if (ttype[i] == 'f')
372 unexpand_aligned_F(aligned_line);
373
374 print_aligned_lines(somewhere);
375 }
376 }
377 printf("\n");
378 n = 1;
379 }
380
381 break;
382 }
383 fflush(stdout);
384 } while (type != 999);
385 return 0;
386 }
387
388
389 /*--------------------------------------------------------------------------*/
390
391 PRIVATE int
parse_input(char * line)392 parse_input(char *line)
393 {
394 int type, rooted = 0, i, xx;
395 char *cp;
396
397 if (line == NULL)
398 return 999;
399
400 if (line[0] == '*') {
401 if (taxa_list == 0) {
402 if (task == 2)
403 taxa_list = 1;
404
405 printf("%s\n", line);
406 return 0;
407 } else {
408 list_title = strdup(line);
409 return 888;
410 }
411 }
412
413 if (line[0] == '>') {
414 if (taxa_list)
415 printf("%d :%s\n", n + 1, line + 1);
416 else
417 printf("%s\n", line);
418
419 return 0;
420 }
421
422 cp = strchr(line, ' ');
423 if (cp)
424 *cp = '\0'; /* get rid of junk at the end of line */
425
426 switch (line[0]) {
427 case '.':
428 type = 1;
429 break;
430
431 case '(': /* it's a tree */
432 i = 1;
433 xx = 0;
434 type = 4; /* coarse */
435 rooted = 0;
436 while (line[i]) {
437 if (line[i] == '.') {
438 type = 1; /* Full */
439 break;
440 }
441
442 if ((line[i] == 'U') || (line[i] == 'P')) {
443 type = 2; /* FFull */
444 xx = 1;
445 break;
446 }
447
448 if (line[i] == 'S') {
449 type = 3;
450 xx = 1;
451 break; /* Shapiro tree */
452 }
453
454 if ((line[i] != '(') && (line[i] != ')'))
455 xx = 1;
456
457 i++;
458 }
459 if (!xx)
460 type = 1;
461
462 rooted = (line[strlen(line) - 2] == 'R');
463 break;
464 case '@':
465 return 999;
466
467 default:
468 return 0;
469 }
470
471 switch (type) {
472 case 1:
473 if (check_brackets(line))
474 return 1;
475
476 break;
477 case 2:
478 if (check_tree(line, "UP")) {
479 if (rooted)
480 return 2;
481 else
482 return -2;
483 }
484
485 break;
486 case 3:
487 if (check_tree(line, "HBIMSE")) {
488 if (rooted)
489 return -3;
490 else
491 return -3;
492 }
493
494 break;
495 case 4:
496 if (check_tree(line, "HBIM")) {
497 if (rooted)
498 return 4;
499 else
500 return -4;
501 }
502
503 break;
504 }
505 return 0;
506 }
507
508
509 /*--------------------------------------------------------------------------*/
510
511
512 PRIVATE int
check_tree(char * line,char alpha[])513 check_tree(char *line,
514 char alpha[])
515 {
516 int n, i, o;
517 char *pos;
518
519 n = (int)strlen(line);
520
521 if (line[0] != '(')
522 return 0;
523
524 i = o = 1;
525 while (line[i]) {
526 while (line[i] == '(') {
527 o++;
528 i++;
529 }
530 pos = strchr(alpha, (int)line[i]);
531 if (pos) {
532 while (isdigit((int)line[++i]));
533 if (line[i] != ')')
534 return 0;
535 }
536
537 if (line[i] == 'R') {
538 i++;
539 if ((i != n - 1) || (line[i] != ')'))
540 return 0;
541 }
542
543 if (line[i] == ')') {
544 o--;
545 if (o < 0)
546 return 0;
547
548 if ((i < n) && (line[i + 1] == ')'))
549 return 0;
550
551 i++;
552 } else {
553 return 0;
554 }
555 }
556 if (o > 0)
557 return 0;
558
559 return 1;
560 }
561
562
563 /*--------------------------------------------------------------------------*/
564
565
566 PRIVATE int
check_brackets(char * line)567 check_brackets(char *line)
568 {
569 int i, o;
570
571 i = o = 0;
572 while (line[i]) {
573 switch (line[i]) {
574 case '(':
575 o++;
576 i++;
577 break;
578 case '.':
579 i++;
580 break;
581 case ')':
582 i++;
583 o--;
584 if (o < 0)
585 return 0;
586
587 break;
588 default:
589 return 0;
590 }
591 }
592 if (o > 0)
593 return 0;
594
595 return 1;
596 }
597
598
599 /*--------------------------------------------------------------------------*/
600
601
602 PRIVATE void
command_line(int argc,char * argv[])603 command_line(int argc,
604 char *argv[])
605 {
606 struct RNAdistance_args_info args_info;
607
608 /* default values */
609 edit_backtrack = 0;
610 types = 1;
611 ttype[0] = 'f';
612 task = 1;
613
614 /*
615 #############################################
616 # check the command line parameters
617 #############################################
618 */
619 if (RNAdistance_cmdline_parser(argc, argv, &args_info) != 0)
620 exit(1);
621
622 /* use specified distance representations */
623 if (args_info.distance_given) {
624 strncpy(ttype, args_info.distance_arg, 9);
625 types = (int)strlen(ttype);
626 }
627
628 if (args_info.compare_given) {
629 switch (args_info.compare_arg[0]) {
630 case 'p':
631 task = 1;
632 break;
633 case 'm':
634 task = 2;
635 break;
636 case 'f':
637 task = 3;
638 break;
639 case 'c':
640 task = 4;
641 break;
642 default:
643 RNAdistance_cmdline_parser_print_help();
644 exit(EXIT_FAILURE);
645 }
646 }
647
648 if (args_info.shapiro_given)
649 cost_matrix = 1;
650
651 if (args_info.backtrack_given) {
652 if (strcmp(args_info.backtrack_arg, "none"))
653 strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH - 1);
654
655 edit_backtrack = 1;
656 }
657
658 /* free allocated memory of command line data structure */
659 RNAdistance_cmdline_parser_free(&args_info);
660 }
661
662
663 /*--------------------------------------------------------------------------*/
664
665 PRIVATE void
print_aligned_lines(FILE * somewhere)666 print_aligned_lines(FILE *somewhere)
667 {
668 if (edit_backtrack) {
669 fprintf(somewhere, "\n%s\n%s\n", aligned_line[0], aligned_line[1]);
670 fflush(somewhere);
671 }
672 }
673