xref: /minix/external/bsd/flex/dist/filter.c (revision 0a6a1f1d)
1 /*	$NetBSD: filter.c,v 1.5 2014/10/30 18:44:05 christos Exp $	*/
2 
3 /* filter - postprocessing of flex output through filters */
4 
5 /*  This file is part of flex. */
6 
7 /*  Redistribution and use in source and binary forms, with or without */
8 /*  modification, are permitted provided that the following conditions */
9 /*  are met: */
10 
11 /*  1. Redistributions of source code must retain the above copyright */
12 /*     notice, this list of conditions and the following disclaimer. */
13 /*  2. Redistributions in binary form must reproduce the above copyright */
14 /*     notice, this list of conditions and the following disclaimer in the */
15 /*     documentation and/or other materials provided with the distribution. */
16 
17 /*  Neither the name of the University nor the names of its contributors */
18 /*  may be used to endorse or promote products derived from this software */
19 /*  without specific prior written permission. */
20 
21 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24 /*  PURPOSE. */
25 #include "flexdef.h"
26 __RCSID("$NetBSD: filter.c,v 1.5 2014/10/30 18:44:05 christos Exp $");
27 
28 static const char * check_4_gnu_m4 =
29     "m4_dnl ifdef(`__gnu__', ,"
30     "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
31     " m4exit(2)')\n";
32 
33 
34 /** global chain. */
35 struct filter *output_chain = NULL;
36 
37 /* Allocate and initialize an external filter.
38  * @param chain the current chain or NULL for new chain
39  * @param cmd the command to execute.
40  * @param ... a NULL terminated list of (const char*) arguments to command,
41  *            not including argv[0].
42  * @return newest filter in chain
43  */
filter_create_ext(struct filter * chain,const char * cmd,...)44 struct filter *filter_create_ext (struct filter *chain, const char *cmd,
45 				  ...)
46 {
47 	struct filter *f;
48 	int     max_args;
49 	const char *s;
50 	va_list ap;
51 
52 	/* allocate and initialize new filter */
53 	f = (struct filter *) flex_alloc (sizeof (struct filter));
54 	if (!f)
55 		flexerror (_("flex_alloc failed (f) in filter_create_ext"));
56 	memset (f, 0, sizeof (*f));
57 	f->filter_func = NULL;
58 	f->extra = NULL;
59 	f->next = NULL;
60 	f->argc = 0;
61 
62 	if (chain != NULL) {
63 		/* append f to end of chain */
64 		while (chain->next)
65 			chain = chain->next;
66 		chain->next = f;
67 	}
68 
69 
70 	/* allocate argv, and populate it with the argument list. */
71 	max_args = 8;
72 	f->argv =
73 		(const char **) flex_alloc (sizeof (char *) *
74 					    (max_args + 1));
75 	if (!f->argv)
76 		flexerror (_("flex_alloc failed (f->argv) in filter_create_ext"));
77 	f->argv[f->argc++] = cmd;
78 
79 	va_start (ap, cmd);
80 	while ((s = va_arg (ap, const char *)) != NULL) {
81 		if (f->argc >= max_args) {
82 			max_args += 8;
83 			f->argv =
84 				(const char **) flex_realloc (f->argv,
85 							      sizeof (char
86 								      *) *
87 							      (max_args +
88 							       1));
89 		}
90 		f->argv[f->argc++] = s;
91 	}
92 	f->argv[f->argc] = NULL;
93 
94 	va_end (ap);
95 	return f;
96 }
97 
98 /* Allocate and initialize an internal filter.
99  * @param chain the current chain or NULL for new chain
100  * @param filter_func The function that will perform the filtering.
101  *        filter_func should return 0 if successful, and -1
102  *        if an error occurs -- or it can simply exit().
103  * @param extra optional user-defined data to pass to the filter.
104  * @return newest filter in chain
105  */
filter_create_int(struct filter * chain,int (* filter_func)(struct filter *),void * extra)106 struct filter *filter_create_int (struct filter *chain,
107 				  int (*filter_func) (struct filter *),
108 				  void *extra)
109 {
110 	struct filter *f;
111 
112 	/* allocate and initialize new filter */
113 	f = (struct filter *) flex_alloc (sizeof (struct filter));
114 	if (!f)
115 		flexerror (_("flex_alloc failed in filter_create_int"));
116 	memset (f, 0, sizeof (*f));
117 	f->next = NULL;
118 	f->argc = 0;
119 	f->argv = NULL;
120 
121 	f->filter_func = filter_func;
122 	f->extra = extra;
123 
124 	if (chain != NULL) {
125 		/* append f to end of chain */
126 		while (chain->next)
127 			chain = chain->next;
128 		chain->next = f;
129 	}
130 
131 	return f;
132 }
133 
134 /** Fork and exec entire filter chain.
135  *  @param chain The head of the chain.
136  *  @return true on success.
137  */
filter_apply_chain(struct filter * chain)138 bool filter_apply_chain (struct filter * chain)
139 {
140 	int     pid, pipes[2];
141 
142 
143 	/* Tricky recursion, since we want to begin the chain
144 	 * at the END. Why? Because we need all the forked processes
145 	 * to be children of the main flex process.
146 	 */
147 	if (chain)
148 		filter_apply_chain (chain->next);
149 	else
150 		return true;
151 
152 	/* Now we are the right-most unprocessed link in the chain.
153 	 */
154 
155 	fflush (stdout);
156 	fflush (stderr);
157 
158 
159 	if (pipe (pipes) == -1)
160 		flexerror (_("pipe failed"));
161 
162 	if ((pid = fork ()) == -1)
163 		flexerror (_("fork failed"));
164 
165 	if (pid == 0) {
166 		/* child */
167 
168         /* We need stdin (the FILE* stdin) to connect to this new pipe.
169          * There is no portable way to set stdin to a new file descriptor,
170          * as stdin is not an lvalue on some systems (BSD).
171          * So we dup the new pipe onto the stdin descriptor and use a no-op fseek
172          * to sync the stream. This is a Hail Mary situation. It seems to work.
173          */
174 		close (pipes[1]);
175 clearerr(stdin);
176 		if (dup2 (pipes[0], fileno (stdin)) == -1)
177 			flexfatal (_("dup2(pipes[0],0)"));
178 		close (pipes[0]);
179         fseek (stdin, 0, SEEK_CUR);
180 
181 		/* run as a filter, either internally or by exec */
182 		if (chain->filter_func) {
183 			int     r;
184 
185 			if ((r = chain->filter_func (chain)) == -1)
186 				flexfatal (_("filter_func failed"));
187 			exit (0);
188 		}
189 		else {
190 			execvp (chain->argv[0],
191 				(char **const) (chain->argv));
192             lerrsf_fatal ( _("exec of %s failed"),
193                     chain->argv[0]);
194 		}
195 
196 		exit (1);
197 	}
198 
199 	/* Parent */
200 	close (pipes[0]);
201 	if (dup2 (pipes[1], fileno (stdout)) == -1)
202 		flexfatal (_("dup2(pipes[1],1)"));
203 	close (pipes[1]);
204     fseek (stdout, 0, SEEK_CUR);
205 
206 	return true;
207 }
208 
209 /** Truncate the chain to max_len number of filters.
210  * @param chain the current chain.
211  * @param max_len the maximum length of the chain.
212  * @return the resulting length of the chain.
213  */
filter_truncate(struct filter * chain,int max_len)214 int filter_truncate (struct filter *chain, int max_len)
215 {
216 	int     len = 1;
217 
218 	if (!chain)
219 		return 0;
220 
221 	while (chain->next && len < max_len) {
222 		chain = chain->next;
223 		++len;
224 	}
225 
226 	chain->next = NULL;
227 	return len;
228 }
229 
230 /** Splits the chain in order to write to a header file.
231  *  Similar in spirit to the 'tee' program.
232  *  The header file name is in extra.
233  *  @return 0 (zero) on success, and -1 on failure.
234  */
filter_tee_header(struct filter * chain)235 int filter_tee_header (struct filter *chain)
236 {
237 	/* This function reads from stdin and writes to both the C file and the
238 	 * header file at the same time.
239 	 */
240 
241 	const int readsz = 512;
242 	char   *buf;
243 	int     to_cfd = -1;
244 	FILE   *to_c = NULL, *to_h = NULL;
245 	bool    write_header;
246 
247 	write_header = (chain->extra != NULL);
248 
249 	/* Store a copy of the stdout pipe, which is already piped to C file
250 	 * through the running chain. Then create a new pipe to the H file as
251 	 * stdout, and fork the rest of the chain again.
252 	 */
253 
254 	if ((to_cfd = dup (1)) == -1)
255 		flexfatal (_("dup(1) failed"));
256 	to_c = fdopen (to_cfd, "w");
257 
258 	if (write_header) {
259 		if (freopen ((char *) chain->extra, "w", stdout) == NULL)
260 			flexfatal (_("freopen(headerfilename) failed"));
261 
262 		filter_apply_chain (chain->next);
263 		to_h = stdout;
264 	}
265 
266 	/* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch.
267 	 */
268 
269 	if (write_header) {
270         fputs (check_4_gnu_m4, to_h);
271 		fputs ("m4_changecom`'m4_dnl\n", to_h);
272 		fputs ("m4_changequote`'m4_dnl\n", to_h);
273 		fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
274 	    fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
275 		fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
276 		       to_h);
277 		fprintf (to_h, "#ifndef %sHEADER_H\n", prefix);
278 		fprintf (to_h, "#define %sHEADER_H 1\n", prefix);
279 		fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix);
280 		fprintf (to_h,
281 			 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
282 			 headerfilename ? headerfilename : "<stdout>");
283 
284 	}
285 
286     fputs (check_4_gnu_m4, to_c);
287 	fputs ("m4_changecom`'m4_dnl\n", to_c);
288 	fputs ("m4_changequote`'m4_dnl\n", to_c);
289 	fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
290 	fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
291 	fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
292 		 outfilename ? outfilename : "<stdout>");
293 
294 	buf = (char *) flex_alloc (readsz);
295 	if (!buf)
296 		flexerror (_("flex_alloc failed in filter_tee_header"));
297 	while (fgets (buf, readsz, stdin)) {
298 		fputs (buf, to_c);
299 		if (write_header)
300 			fputs (buf, to_h);
301 	}
302 
303 	if (write_header) {
304 		fprintf (to_h, "\n");
305 
306 		/* write a fake line number. It will get fixed by the linedir filter. */
307 		fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
308 
309 		fprintf (to_h, "#undef %sIN_HEADER\n", prefix);
310 		fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix);
311 		fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
312 
313 		fflush (to_h);
314 		if (ferror (to_h))
315 			lerrsf (_("error writing output file %s"),
316 				(char *) chain->extra);
317 
318 		else if (fclose (to_h))
319 			lerrsf (_("error closing output file %s"),
320 				(char *) chain->extra);
321 	}
322 
323 	fflush (to_c);
324 	if (ferror (to_c))
325 		lerrsf (_("error writing output file %s"),
326 			outfilename ? outfilename : "<stdout>");
327 
328 	else if (fclose (to_c))
329 		lerrsf (_("error closing output file %s"),
330 			outfilename ? outfilename : "<stdout>");
331 
332 	while (wait (0) > 0) ;
333 
334 	exit (0);
335 	return 0;
336 }
337 
338 /** Adjust the line numbers in the #line directives of the generated scanner.
339  * After the m4 expansion, the line numbers are incorrect since the m4 macros
340  * can add or remove lines.  This only adjusts line numbers for generated code,
341  * not user code. This also happens to be a good place to squeeze multiple
342  * blank lines into a single blank line.
343  */
filter_fix_linedirs(struct filter * chain)344 int filter_fix_linedirs (struct filter *chain)
345 {
346 	char   *buf;
347 	const int readsz = 512;
348 	int     lineno = 1;
349 	bool    in_gen = true;	/* in generated code */
350 	bool    last_was_blank = false;
351 
352 	if (!chain)
353 		return 0;
354 
355 	buf = (char *) flex_alloc (readsz);
356 	if (!buf)
357 		flexerror (_("flex_alloc failed in filter_fix_linedirs"));
358 
359 	while (fgets (buf, readsz, stdin)) {
360 
361 		regmatch_t m[10];
362 
363 		/* Check for #line directive. */
364 		if (buf[0] == '#'
365 			&& regexec (&regex_linedir, buf, 3, m, 0) == 0) {
366 
367 			char   *fname;
368 
369 			/* extract the line number and filename */
370 			(void)regmatch_strtol (&m[1], buf, NULL, 0);
371 			fname = regmatch_dup (&m[2], buf);
372 
373 			if (strcmp (fname,
374 				outfilename ? outfilename : "<stdout>")
375 					== 0
376 			 || strcmp (fname,
377 			 	headerfilename ? headerfilename : "<stdout>")
378 					== 0) {
379 
380 				char    *s1, *s2;
381 				char	filename[MAXLINE];
382 
383 				s1 = fname;
384 				s2 = filename;
385 
386 				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
387 					/* Escape the backslash */
388 					if (*s1 == '\\')
389 						*s2++ = '\\';
390 					/* Escape the double quote */
391 					if (*s1 == '\"')
392 						*s2++ = '\\';
393 					/* Copy the character as usual */
394 					*s2++ = *s1++;
395 				}
396 
397 				*s2 = '\0';
398 
399 				/* Adjust the line directives. */
400 				in_gen = true;
401 				snprintf (buf, readsz, "#line %d \"%s\"\n",
402 					  lineno + 1, filename);
403 			}
404 			else {
405 				/* it's a #line directive for code we didn't write */
406 				in_gen = false;
407 			}
408 
409 			free (fname);
410 			last_was_blank = false;
411 		}
412 
413 		/* squeeze blank lines from generated code */
414 		else if (in_gen
415 			 && regexec (&regex_blank_line, buf, 0, NULL,
416 				     0) == 0) {
417 			if (last_was_blank)
418 				continue;
419 			else
420 				last_was_blank = true;
421 		}
422 
423 		else {
424 			/* it's a line of normal, non-empty code. */
425 			last_was_blank = false;
426 		}
427 
428 		fputs (buf, stdout);
429 		lineno++;
430 	}
431 	fflush (stdout);
432 	if (ferror (stdout))
433 		lerrsf (_("error writing output file %s"),
434 			outfilename ? outfilename : "<stdout>");
435 
436 	else if (fclose (stdout))
437 		lerrsf (_("error closing output file %s"),
438 			outfilename ? outfilename : "<stdout>");
439 
440 	return 0;
441 }
442 
443 /* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */
444