1 /* SQUID - A C function library for biological sequence analysis
2 * Copyright (C) 1992-1996 Sean R. Eddy
3 *
4 * This source code is distributed under terms of the
5 * GNU General Public License. See the files COPYING
6 * and GNULICENSE for further details.
7 *
8 */
9
10 /* reformat_main.c
11 * Mon Sep 13 13:06:51 1993
12 *
13 * reformat - reformat sequence files.
14 */
15
16
17 #include <stdio.h>
18 #include <string.h>
19 #include "squid.h"
20
21 #define OPTIONS "dhlru"
22
23 char usage[] = "Usage: reformat [-options] <format> <seqfile>\n\
24 Convert between sequence file formats.\n\
25 Available formats are:\n\
26 embl\n\
27 fasta\n\
28 xfasta\n\
29 genbank\n\
30 gcg\n\
31 gcgdata\n\
32 msf\n\
33 strider\n\
34 zuker\n\
35 ig\n\
36 nbrf\n\
37 pir\n\
38 selex\n\
39 squid\n\
40 raw\n\n\
41 Available options are:\n\
42 -d : force DNA alphabet for nucleic acid sequence\n\
43 -r : force RNA alphabet for nucleic acid sequence\n\
44 -l : force lower case\n\
45 -u : force upper case\n\
46 -h : print short help and usage info\n";
47
48 struct seqfmt_s { char *formatname; int fmt; } seqfmt[] =
49 {
50 { "embl", kEMBL },
51 { "fasta", kPearson },
52 { "xfasta", kXPearson},
53 { "genbank", kGenBank },
54 { "gcg", kGCG },
55 { "gcgdata", kGCGdata },
56 { "msf", kMSF },
57 { "strider", kStrider },
58 { "zuker", kZuker },
59 { "ig", kIG },
60 { "nbrf", kNBRF },
61 { "pir", kPIR },
62 { "selex", kSelex },
63 { "squid", kSquid },
64 { "raw", kRaw },
65 };
66 #define NUMFORMATS (sizeof(seqfmt) / sizeof(struct seqfmt_s))
67
68
69 int
main(int argc,char ** argv)70 main(int argc, char **argv)
71 {
72 char *seqfile; /* name of sequence file */
73 char *format;
74 SQFILE *dbfp; /* open sequence file */
75 int fmt; /* format of seqfile */
76 int outfmt; /* output format */
77 char *seq; /* sequence */
78 SQINFO sqinfo;
79 int i;
80
81 int force_rna; /* TRUE to force RNA alphabet */
82 int force_dna; /* TRUE to force DNA alphabet */
83 int force_lower; /* TRUE to force lower case */
84 int force_upper; /* TRUE to force upper case */
85
86 int optchar; /* option character, command line */
87 extern int optind;
88
89 /***********************************************
90 * Parse command line
91 ***********************************************/
92
93 force_rna = FALSE;
94 force_dna = FALSE;
95 force_upper = FALSE;
96 force_lower = FALSE;
97
98 while ((optchar = getopt(argc, argv, OPTIONS)) != -1)
99 switch (optchar) {
100
101 case 'd': force_dna = TRUE; break;
102 case 'l': force_lower = TRUE; break;
103 case 'r': force_rna = TRUE; break;
104 case 'u': force_upper = TRUE; break;
105
106 case 'h':
107 printf("reformat %s, %s\n%s\n", squid_version, squid_date, usage);
108 exit(EXIT_SUCCESS);
109 default:
110 Die("%s\n", usage);
111 }
112
113 if (argc - optind != 2)
114 Die("%s\n", usage);
115 if (force_lower && force_upper)
116 Die("Can't force both upper case and lower case. Stop trying to confuse me.\n%s",
117 usage);
118 if (force_rna && force_dna)
119 Die("Can't force both RNA and DNA. Stop trying to find bugs, you'll be sorry.\n%s",
120 usage);
121
122 format = argv[optind]; optind++;
123 seqfile = argv[optind]; optind++;
124
125 /***********************************************
126 * Figure out what format we're supposed to write
127 ***********************************************/
128
129 outfmt = kUnknown;
130 for (i = 0; i < NUMFORMATS; i++)
131 if (strcasecmp(format, seqfmt[i].formatname) == 0)
132 outfmt = seqfmt[i].fmt;
133
134 if (outfmt == kUnknown)
135 Die("Unknown output format %s\n%s", format, usage);
136
137 /***********************************************
138 * Reformat the file, printing to stdout.
139 ***********************************************/
140
141 if (! SeqfileFormat(seqfile, &fmt, NULL))
142 Die("Can't determine format of file %s\n", seqfile);
143
144 if ((fmt == kMSF || fmt == kSelex || fmt == kClustal) &&
145 (outfmt == kMSF || outfmt == kSelex))
146 {
147 char **aseqs;
148 int num;
149 AINFO ainfo;
150
151 ReadAlignment(seqfile, fmt, &aseqs, &num, &ainfo);
152
153 for (i = 0; i < num; i++)
154 {
155 if (force_dna) ToDNA(aseqs[i]);
156 if (force_rna) ToRNA(aseqs[i]);
157 if (force_lower) s2lower(aseqs[i]);
158 if (force_upper) s2upper(aseqs[i]);
159 }
160
161 switch (outfmt) {
162 case kMSF: WriteMSF(stdout, aseqs, num, &ainfo); break;
163 case kSelex: WriteSELEX(stdout, aseqs, num, &ainfo, 50); break;
164 }
165 FreeAlignment(aseqs, num, &ainfo);
166 }
167 else if (outfmt == kMSF || outfmt == kSelex)
168 {
169 Die("Sorry, you can't make alignment files from unaligned files");
170 }
171 else
172 {
173 if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
174 Die("Failed to open sequence file %s for reading", seqfile);
175
176 while (ReadSeq(dbfp, fmt, &seq, &sqinfo))
177 {
178 if (force_dna) ToDNA(seq);
179 if (force_rna) ToRNA(seq);
180 if (force_lower) s2lower(seq);
181 if (force_upper) s2upper(seq);
182
183 WriteSeq(stdout, outfmt, seq, &sqinfo);
184 FreeSequence(seq, &sqinfo);
185 }
186 SeqfileClose(dbfp);
187 }
188 return 0;
189 }
190