xref: /openbsd/usr.bin/lex/filter.c (revision 73471bf0)
1 /* $OpenBSD: filter.c,v 1.9 2017/08/30 02:54:07 lteo Exp $ */
2 
3 /* filter - postprocessing of flex output through filters */
4 
5 /*  This file is part of flex. */
6 
7 /*  Redistribution and use in source and binary forms, with or without */
8 /*  modification, are permitted provided that the following conditions */
9 /*  are met: */
10 
11 /*  1. Redistributions of source code must retain the above copyright */
12 /*     notice, this list of conditions and the following disclaimer. */
13 /*  2. Redistributions in binary form must reproduce the above copyright */
14 /*     notice, this list of conditions and the following disclaimer in the */
15 /*     documentation and/or other materials provided with the distribution. */
16 
17 /*  Neither the name of the University nor the names of its contributors */
18 /*  may be used to endorse or promote products derived from this software */
19 /*  without specific prior written permission. */
20 
21 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24 /*  PURPOSE. */
25 
26 #include "flexdef.h"
27 static const char *check_4_gnu_m4 =
28 "m4_dnl ifdef(`__gnu__', ,"
29 "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
30 " m4exit(2)')\n";
31 
32 
33 /** global chain. */
34 struct filter *output_chain = NULL;
35 
36 /* Allocate and initialize an external filter.
37  * @param chain the current chain or NULL for new chain
38  * @param cmd the command to execute.
39  * @param ... a NULL terminated list of (const char*) arguments to command,
40  *            not including argv[0].
41  * @return newest filter in chain
42  */
43 struct filter *
44 filter_create_ext(struct filter * chain, const char *cmd,
45     ...)
46 {
47 	struct filter *f;
48 	int max_args;
49 	const char *s;
50 	va_list ap;
51 
52 	/* allocate and initialize new filter */
53 	f = calloc(sizeof(struct filter), 1);
54 	if (!f)
55 		flexerror(_("calloc failed (f) in filter_create_ext"));
56 	f->filter_func = NULL;
57 	f->extra = NULL;
58 	f->next = NULL;
59 	f->argc = 0;
60 
61 	if (chain != NULL) {
62 		/* append f to end of chain */
63 		while (chain->next)
64 			chain = chain->next;
65 		chain->next = f;
66 	}
67 	/* allocate argv, and populate it with the argument list. */
68 	max_args = 8;
69 	f->argv = malloc(sizeof(char *) * (max_args + 1));
70 	if (!f->argv)
71 		flexerror(_("malloc failed (f->argv) in filter_create_ext"));
72 	f->argv[f->argc++] = cmd;
73 
74 	va_start(ap, cmd);
75 	while ((s = va_arg(ap, const char *)) != NULL) {
76 		if (f->argc >= max_args) {
77 			max_args += 8;
78 			f->argv = realloc(f->argv,
79 			    sizeof(char *) * (max_args + 1));
80 		}
81 		f->argv[f->argc++] = s;
82 	}
83 	f->argv[f->argc] = NULL;
84 
85 	va_end(ap);
86 	return f;
87 }
88 
89 /* Allocate and initialize an internal filter.
90  * @param chain the current chain or NULL for new chain
91  * @param filter_func The function that will perform the filtering.
92  *        filter_func should return 0 if successful, and -1
93  *        if an error occurs -- or it can simply exit().
94  * @param extra optional user-defined data to pass to the filter.
95  * @return newest filter in chain
96  */
97 struct filter *
98 filter_create_int(struct filter * chain,
99     int (*filter_func) (struct filter *),
100     void *extra)
101 {
102 	struct filter *f;
103 
104 	/* allocate and initialize new filter */
105 	f = calloc(sizeof(struct filter), 1);
106 	if (!f)
107 		flexerror(_("calloc failed in filter_create_int"));
108 	f->next = NULL;
109 	f->argc = 0;
110 	f->argv = NULL;
111 
112 	f->filter_func = filter_func;
113 	f->extra = extra;
114 
115 	if (chain != NULL) {
116 		/* append f to end of chain */
117 		while (chain->next)
118 			chain = chain->next;
119 		chain->next = f;
120 	}
121 	return f;
122 }
123 
124 /** Fork and exec entire filter chain.
125  *  @param chain The head of the chain.
126  *  @return true on success.
127  */
128 bool
129 filter_apply_chain(struct filter * chain)
130 {
131 	int pid, pipes[2];
132 
133 	/*
134 	 * Tricky recursion, since we want to begin the chain at the END.
135 	 * Why? Because we need all the forked processes to be children of
136 	 * the main flex process.
137 	 */
138 	if (chain)
139 		filter_apply_chain(chain->next);
140 	else
141 		return true;
142 
143 	/*
144 	 * Now we are the right-most unprocessed link in the chain.
145 	 */
146 
147 	fflush(stdout);
148 	fflush(stderr);
149 
150 
151 	if (pipe(pipes) == -1)
152 		flexerror(_("pipe failed"));
153 
154 	if ((pid = fork()) == -1)
155 		flexerror(_("fork failed"));
156 
157 	if (pid == 0) {
158 		/* child */
159 
160 		/*
161 		 * We need stdin (the FILE* stdin) to connect to this new
162 		 * pipe. There is no portable way to set stdin to a new file
163 		 * descriptor, as stdin is not an lvalue on some systems
164 		 * (BSD). So we dup the new pipe onto the stdin descriptor
165 		 * and use a no-op fseek to sync the stream. This is a Hail
166 		 * Mary situation. It seems to work.
167 		 */
168 		close(pipes[1]);
169 		clearerr(stdin);
170 		if (dup2(pipes[0], fileno(stdin)) == -1)
171 			flexfatal(_("dup2(pipes[0],0)"));
172 		close(pipes[0]);
173 		fseek(stdin, 0, SEEK_CUR);
174 
175 		/* run as a filter, either internally or by exec */
176 		if (chain->filter_func) {
177 			if (chain->filter_func(chain) == -1)
178 				flexfatal(_("filter_func failed"));
179 			exit(0);
180 		} else {
181 			execvp(chain->argv[0],
182 			    (char **const) (chain->argv));
183 			lerrsf_fatal(_("exec of %s failed"),
184 			    chain->argv[0]);
185 		}
186 
187 		exit(1);
188 	}
189 	/* Parent */
190 	close(pipes[0]);
191 	if (dup2(pipes[1], fileno(stdout)) == -1)
192 		flexfatal(_("dup2(pipes[1],1)"));
193 	close(pipes[1]);
194 	fseek(stdout, 0, SEEK_CUR);
195 
196 	return true;
197 }
198 
199 /** Truncate the chain to max_len number of filters.
200  * @param chain the current chain.
201  * @param max_len the maximum length of the chain.
202  * @return the resulting length of the chain.
203  */
204 int
205 filter_truncate(struct filter * chain, int max_len)
206 {
207 	int len = 1;
208 
209 	if (!chain)
210 		return 0;
211 
212 	while (chain->next && len < max_len) {
213 		chain = chain->next;
214 		++len;
215 	}
216 
217 	chain->next = NULL;
218 	return len;
219 }
220 
221 /** Splits the chain in order to write to a header file.
222  *  Similar in spirit to the 'tee' program.
223  *  The header file name is in extra.
224  *  @return 0 (zero) on success, and -1 on failure.
225  */
226 int
227 filter_tee_header(struct filter * chain)
228 {
229 	/*
230 	 * This function reads from stdin and writes to both the C file and
231 	 * the header file at the same time.
232 	 */
233 
234 	const int readsz = 512;
235 	char *buf;
236 	int to_cfd = -1;
237 	FILE *to_c = NULL, *to_h = NULL;
238 	bool write_header;
239 
240 	write_header = (chain->extra != NULL);
241 
242 	/*
243 	 * Store a copy of the stdout pipe, which is already piped to C file
244 	 * through the running chain. Then create a new pipe to the H file as
245 	 * stdout, and fork the rest of the chain again.
246 	 */
247 
248 	if ((to_cfd = dup(1)) == -1)
249 		flexfatal(_("dup(1) failed"));
250 	to_c = fdopen(to_cfd, "w");
251 
252 	if (write_header) {
253 		if (freopen((char *) chain->extra, "w", stdout) == NULL)
254 			flexfatal(_("freopen(headerfilename) failed"));
255 
256 		filter_apply_chain(chain->next);
257 		to_h = stdout;
258 	}
259 	/*
260 	 * Now to_c is a pipe to the C branch, and to_h is a pipe to the H
261 	 * branch.
262 	 */
263 
264 	if (write_header) {
265 		fputs(check_4_gnu_m4, to_h);
266 		fputs("m4_changecom`'m4_dnl\n", to_h);
267 		fputs("m4_changequote`'m4_dnl\n", to_h);
268 		fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
269 		fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
270 		fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
271 		    to_h);
272 		fprintf(to_h, "#ifndef %sHEADER_H\n", prefix);
273 		fprintf(to_h, "#define %sHEADER_H 1\n", prefix);
274 		fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix);
275 		fprintf(to_h,
276 		    "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
277 		    headerfilename ? headerfilename : "<stdout>");
278 
279 	}
280 	fputs(check_4_gnu_m4, to_c);
281 	fputs("m4_changecom`'m4_dnl\n", to_c);
282 	fputs("m4_changequote`'m4_dnl\n", to_c);
283 	fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
284 	fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
285 	fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
286 	    outfilename ? outfilename : "<stdout>");
287 
288 	buf = malloc(readsz);
289 	if (!buf)
290 		flexerror(_("malloc failed in filter_tee_header"));
291 	while (fgets(buf, readsz, stdin)) {
292 		fputs(buf, to_c);
293 		if (write_header)
294 			fputs(buf, to_h);
295 	}
296 
297 	if (write_header) {
298 		fprintf(to_h, "\n");
299 
300 		/*
301 		 * write a fake line number. It will get fixed by the linedir
302 		 * filter.
303 		 */
304 		fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
305 
306 		fprintf(to_h, "#undef %sIN_HEADER\n", prefix);
307 		fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix);
308 		fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
309 
310 		fflush(to_h);
311 		if (ferror(to_h))
312 			lerrsf(_("error writing output file %s"),
313 			    (char *) chain->extra);
314 
315 		else if (fclose(to_h))
316 			lerrsf(_("error closing output file %s"),
317 			    (char *) chain->extra);
318 	}
319 	fflush(to_c);
320 	if (ferror(to_c))
321 		lerrsf(_("error writing output file %s"),
322 		    outfilename ? outfilename : "<stdout>");
323 
324 	else if (fclose(to_c))
325 		lerrsf(_("error closing output file %s"),
326 		    outfilename ? outfilename : "<stdout>");
327 
328 	while (wait(0) > 0);
329 
330 	exit(0);
331 	return 0;
332 }
333 
334 /** Adjust the line numbers in the #line directives of the generated scanner.
335  * After the m4 expansion, the line numbers are incorrect since the m4 macros
336  * can add or remove lines.  This only adjusts line numbers for generated code,
337  * not user code. This also happens to be a good place to squeeze multiple
338  * blank lines into a single blank line.
339  */
340 int
341 filter_fix_linedirs(struct filter * chain)
342 {
343 	char *buf;
344 	const int readsz = 512;
345 	int lineno = 1;
346 	bool in_gen = true;	/* in generated code */
347 	bool last_was_blank = false;
348 
349 	if (!chain)
350 		return 0;
351 
352 	buf = malloc(readsz);
353 	if (!buf)
354 		flexerror(_("malloc failed in filter_fix_linedirs"));
355 
356 	while (fgets(buf, readsz, stdin)) {
357 
358 		regmatch_t m[10];
359 
360 		/* Check for #line directive. */
361 		if (buf[0] == '#'
362 		    && regexec(&regex_linedir, buf, 3, m, 0) == 0) {
363 
364 			int num;
365 			char *fname;
366 
367 			/* extract the line number and filename */
368 			num = regmatch_strtol(&m[1], buf, NULL, 0);
369 			fname = regmatch_dup(&m[2], buf);
370 
371 			if (strcmp(fname,
372 				outfilename ? outfilename : "<stdout>") == 0 ||
373 			    strcmp(fname, headerfilename ? headerfilename :
374 				"<stdout>") == 0) {
375 
376 				char *s1, *s2;
377 				char filename[MAXLINE];
378 
379 				s1 = fname;
380 				s2 = filename;
381 
382 				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
383 					/* Escape the backslash */
384 					if (*s1 == '\\')
385 						*s2++ = '\\';
386 					/* Escape the double quote */
387 					if (*s1 == '\"')
388 						*s2++ = '\\';
389 					/* Copy the character as usual */
390 					*s2++ = *s1++;
391 				}
392 
393 				*s2 = '\0';
394 
395 				/* Adjust the line directives. */
396 				in_gen = true;
397 				snprintf(buf, readsz, "#line %d \"%s\"\n",
398 				    lineno + 1, filename);
399 			} else {
400 				/*
401 				 * it's a #line directive for code we didn't
402 				 * write
403 				 */
404 				in_gen = false;
405 			}
406 
407 			free(fname);
408 			last_was_blank = false;
409 		}
410 		/* squeeze blank lines from generated code */
411 		else if (in_gen &&
412 		    regexec(&regex_blank_line, buf, 0, NULL, 0) == 0) {
413 			if (last_was_blank)
414 				continue;
415 			else
416 				last_was_blank = true;
417 		} else {
418 			/* it's a line of normal, non-empty code. */
419 			last_was_blank = false;
420 		}
421 
422 		fputs(buf, stdout);
423 		lineno++;
424 	}
425 	fflush(stdout);
426 	if (ferror(stdout))
427 		lerrsf(_("error writing output file %s"),
428 		    outfilename ? outfilename : "<stdout>");
429 
430 	else if (fclose(stdout))
431 		lerrsf(_("error closing output file %s"),
432 		    outfilename ? outfilename : "<stdout>");
433 
434 	return 0;
435 }
436