1 /*
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  */
7 
8 /*
9  * This is the entry function of the db_sql command.  Db_sql is a
10  * utility program that translates a schema description written in a
11  * SQL Data Definition Language dialect into C code that implements
12  * the schema using Berkeley DB.
13  */
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <assert.h>
18 #include "db_sql_codegen.h"
19 
20 extern int getopt(int, char *const [], const char *);
21 static int usage(char *);
22 static char * change_extension(char *path, char *extension);
23 static int read_and_parse(FILE *fp);
24 
25 char *progname = "db_sql";
26 int line_number = 0;
27 int debug = 0;
28 int txnflag = 0;
29 
30 int
main(argc,argv)31 main(argc,argv)
32 	int argc;
33 	char **argv;
34 {
35 	extern char *optarg;
36 	extern int optind;
37 	int opt, free_ofilename, free_hfilename;
38 	FILE *ifile, *hfile, *ofile, *tfile, *vfile;
39 	char *ifilename, *hfilename, *ofilename, *tfilename, *vfilename;
40 
41 	ifilename = hfilename = ofilename = tfilename = vfilename = NULL;
42 	free_ofilename = free_hfilename = 0;
43 
44 	progname = argv[0];
45 
46 	/* parse the command line switches */
47 
48 	while ((opt = getopt(argc, argv, "i:t:o:h:dv:x")) != -1) {
49 		switch (opt) {
50 		case 'i':              /* input file name */
51 			ifilename = optarg;
52 			break;
53 		case 'h':              /* header output file name */
54 			hfilename = optarg;
55 			break;
56 		case 'o':              /* output file name */
57 			ofilename = optarg;
58 			break;
59 		case 't':              /* test code output file name */
60 			tfilename = optarg;
61 			break;
62 		case 'd':
63 			debug = 1;
64 			break;
65 		case 'v':              /* verification code output file name */
66 			vfilename = optarg;
67 			break;
68 		case 'x':
69 			txnflag = 1;
70 			break;
71 		default:
72 			return (usage(0));
73 		}
74 	}
75 
76 	argc -= optind;
77 	argv += optind;
78 
79 	if (argc != 0) {
80 		fprintf(stderr,
81 			"extra argument %s after switch arguments\n", *argv);
82 		return (usage(0));
83 	}
84 
85 	if (ifilename == NULL)
86 		ifile = stdin;
87 	else
88 		if ((ifile = fopen(ifilename, "r")) == NULL)
89 			return (usage(ifilename));
90 
91 	/* if ofilename wasn't given, use ifilename with a .c extension */
92 
93 	if (ofilename == NULL && ifilename != NULL) {
94 		ofilename = change_extension(ifilename, "c");
95 		free_ofilename = 1;
96 	}
97 
98 	if (ofilename == NULL)
99 		ofile = stdout;
100 	else
101 		if ((ofile = fopen(ofilename, "w")) == NULL)
102 			return (usage(ofilename));
103 
104 	/* if hfilename wasn't given, use ofilename with a .h extension */
105 
106 	if (hfilename == NULL && ofilename != NULL) {
107 		hfilename = change_extension(ofilename, "h");
108 		free_hfilename = 1;
109 	}
110 
111 	if (hfilename == NULL)
112 		hfile = stdout;
113 	else
114 		if ((hfile = fopen(hfilename, "w")) == NULL)
115 			return (usage(hfilename));
116 
117 	/*
118 	 * if tfile wasn't given, we won't generate the test code.
119 	 *  tfile == null turns off test code generation
120 	 */
121 	if (tfilename == NULL)
122 		tfile = 0;
123 	else {
124 		if (hfilename == NULL) {
125 			fprintf(stderr,
126 			    "Can't produce test when streaming to stdout\n");
127 			return (usage(0));
128 		}
129 		if ((tfile = fopen(tfilename, "w")) == NULL)
130 			return (usage(tfilename));
131 	}
132 	/*
133 	 * Verification files are generated for internal testing purposes,
134 	 * they are similar to the test output file. This functionality is
135 	 * not targeted at end users, so is not documented.
136 	 */
137 	if (vfilename == NULL)
138 		vfile = 0;
139 	else {
140 		if (hfilename == NULL) {
141 			fprintf(stderr,
142 			    "Can't produce verify when streaming to stdout\n");
143 			return (usage(0));
144 		}
145 		if ((vfile = fopen(vfilename, "w")) == NULL)
146 			return (usage(vfilename));
147 	}
148 
149 	if (read_and_parse(ifile))
150 		exit(1);
151 
152 	generate(hfile, ofile, tfile, vfile, hfilename);
153 
154 	/* clean up the allocated memory */
155 	if (free_ofilename)
156 		free(ofilename);
157 	if (free_hfilename)
158 		free(hfilename);
159 	return 0;
160 }
161 
162 /*
163  * Scan input buffer for a semicolon that is not in a comment.
164  * Later, this may need to notice quotes as well.
165  */
166 static char *
scan_for_rightmost_semicolon(p)167 scan_for_rightmost_semicolon(p)
168 	char *p;
169 {
170 	static enum scanner_state {
171 		IDLE = 0, GOT_SLASH = 1, IN_SLASHSTAR_COMMENT = 2,
172 		GOT_STAR = 3, GOT_HYPHEN = 4, IN_HYPHHYPH_COMMENT = 5
173 	} state = IDLE;
174 
175 	char *result;
176 
177 	result = NULL;
178 
179 	if (p == NULL || *p == '\0')
180 		return result;
181 
182 	do {
183 		switch (state) {
184 		case IDLE:
185 			switch (*p) {
186 			case '/': state = GOT_SLASH; break;
187 			case '*': state = GOT_STAR; break;
188 			case '-': state = GOT_HYPHEN; break;
189 			}
190 			break;
191 		case GOT_SLASH:
192 			switch (*p) {
193 			case '*': state = IN_SLASHSTAR_COMMENT; break;
194 			default: state = IDLE;
195 			}
196 			break;
197 		case IN_SLASHSTAR_COMMENT:
198 			switch (*p) {
199 			case '*': state = GOT_STAR; break;
200 			}
201 			break;
202 		case GOT_STAR:
203 			switch (*p) {
204 			case '/': state = IDLE; break;
205 			default: state = IN_SLASHSTAR_COMMENT; break;
206 			}
207 			break;
208 		case GOT_HYPHEN:
209 			switch (*p) {
210 			case '-': state = IN_HYPHHYPH_COMMENT; break;
211 			default: state = IDLE; break;
212 			}
213 		case IN_HYPHHYPH_COMMENT:
214 			switch (*p) {
215 			case '\n': state = IDLE; break;
216 			}
217 			break;
218 		}
219 
220 		if (state == IDLE && *p == ';')
221 			result = p;
222 
223 	} while (*p++);
224 
225 	return result;
226 }
227 
228 /*
229  * read_and_parse reads lines from the input file (containing SQL DDL),
230  * and sends the to the tokenizer and parser.  Because of the way the
231  * SQLite tokenizer works, the chunks sent to the tokenizer must
232  * contain a multiple of whole SQL statements -- a partial statement
233  * will produce a syntax error.  Therefore, this function splits its
234  * input at semicolons.
235  */
236 static int
read_and_parse(fp)237 read_and_parse(fp)
238 	FILE *fp;
239 {
240 	size_t line_len, copy_len, collector_len;
241 	char *q, *collector, buf[256], *err_msg;
242 
243 	collector = 0;
244 	collector_len = 0;
245 	err_msg = 0;
246 
247 	/* line_number is global */
248 
249 	for (line_number = 1; fgets(buf, sizeof(buf), fp) != 0; line_number++) {
250 
251 		line_len = strlen(buf);
252 
253 		if (1 + strlen(buf)  == sizeof(buf)) {
254 			fprintf(stderr, "%s: line %d is too long", progname,
255 				line_number);
256 			return 1;
257 		}
258 
259 		/*
260 		 * Does this line contain a semicolon?  If so, copy
261 		 * the line, up to and including its last semicolon,
262 		 * into collector and parse it.  Then reinitialize
263 		 * collector with the remainer of the line
264 		 */
265 		if ((q = scan_for_rightmost_semicolon(buf)) != NULL)
266 			copy_len = 1 + q - buf;
267 		else
268 			copy_len = line_len;
269 
270 		collector_len += 1 + copy_len;
271 		if (collector == NULL)
272 			collector = calloc(1, collector_len);
273 		else
274 			collector = realloc(collector, collector_len);
275 
276 		strnconcat(collector, collector_len, buf, copy_len);
277 
278 		if (q != 0) {
279 			if (do_parse(collector, &err_msg) != 0) {
280 				fprintf(stderr,
281 					"parsing error at line %d : %s\n",
282 					line_number, err_msg);
283 				return 1;
284 			}
285 
286 			collector_len = 1 + line_len - copy_len;
287 			collector = realloc(collector, collector_len);
288 			memcpy(collector, buf + copy_len, collector_len);
289 			assert(collector[collector_len-1] == 0);
290 		}
291 	}
292 
293 	/*
294 	 * if there's anything after the final semicolon, send it on
295 	 * to the tokenizer -- it might be a hint comment
296 	 */
297 	if (collector != 0) {
298 		if (strlen(collector) > 0 &&
299 		    do_parse(collector, &err_msg) != 0) {
300 			fprintf(stderr, "parsing error at end of file: %s\n",
301 				err_msg);
302 			return 1;
303 		}
304 
305 		free (collector);
306 	}
307 
308 	return 0;
309 }
310 
311 /*
312  * Basename isn't available everywhere, so we have our own version
313  * which works on unix and windows.
314  */
315 static char *
final_component_of(path)316 final_component_of(path)
317 	char *path;
318 {
319 	char *p;
320 	p = strrchr(path, '/');
321 	if (p == NULL)
322 		p = strrchr(path, '\\');
323 	if (p != NULL)
324 		return p + 1;
325 
326 	return path;
327 }
328 
329 /*
330  * Return a new pathname in which any existing "extension" (the part
331  * after ".") has been replaced by the given extension.  If the
332  * pathname has no extension, the new extension is simply appended.
333  * Returns allocated memory
334  */
335 static char *
change_extension(path,extension)336 change_extension(path, extension)
337 	char *path, *extension;
338 {
339 	size_t path_len, copy_len;
340 	char *p, *copy;
341 	const char dot = '.';
342 
343 	/* isolate the final component of the pathname, so that we can
344 	 * examine it for the presence of a '.' without finding a '.'
345 	 * in a directory name componenet of the pathname
346 	 */
347 
348 	p = final_component_of(path);
349 	if (*p != 0)
350 		p++;  /* skip initial char in basename, it could be a dot */
351 
352 	/*
353 	 * Is there a dot in the basename? If so, then the path has
354 	 * an extension that we'll elide before adding the new one.
355 	 */
356 	if (strrchr(p, dot) != 0) {
357 		p = strrchr(path, dot);
358 		path_len = p - path;
359 	} else
360 		path_len = strlen(path);
361 
362 	copy_len = 2 + path_len + strlen(extension);
363 	copy = malloc(copy_len);
364 	memcpy(copy, path, path_len);
365 	copy[path_len] = 0; /* terminate the string */
366 	strconcat(copy, copy_len, ".");
367 	strconcat(copy, copy_len, extension);
368 
369 	return copy;
370 }
371 
372 static int
usage(char * error_tag)373 usage(char *error_tag) {
374 	if (error_tag != 0)
375 		perror(error_tag);
376 	fprintf(stderr, "\
377 Usage:  %s [-i inputFile] [-h outputHeaderFile] [-o outputFile] \
378 [-t testOutputFile] [-d] [-v verificationOutputFile] [-x]\n",
379 		progname);
380 	return (1);
381 }
382