1 /* $OpenBSD: filter.c,v 1.10 2024/11/09 18:03:44 op Exp $ */
2
3 /* filter - postprocessing of flex output through filters */
4
5 /* This file is part of flex. */
6
7 /* Redistribution and use in source and binary forms, with or without */
8 /* modification, are permitted provided that the following conditions */
9 /* are met: */
10
11 /* 1. Redistributions of source code must retain the above copyright */
12 /* notice, this list of conditions and the following disclaimer. */
13 /* 2. Redistributions in binary form must reproduce the above copyright */
14 /* notice, this list of conditions and the following disclaimer in the */
15 /* documentation and/or other materials provided with the distribution. */
16
17 /* Neither the name of the University nor the names of its contributors */
18 /* may be used to endorse or promote products derived from this software */
19 /* without specific prior written permission. */
20
21 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
22 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
23 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
24 /* PURPOSE. */
25
26 #include "flexdef.h"
27 static const char *check_4_gnu_m4 =
28 "m4_dnl ifdef(`__gnu__', ,"
29 "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
30 " m4exit(2)')\n";
31
32
33 /** global chain. */
34 struct filter *output_chain = NULL;
35
36 /* Allocate and initialize an external filter.
37 * @param chain the current chain or NULL for new chain
38 * @param cmd the command to execute.
39 * @param ... a NULL terminated list of (const char*) arguments to command,
40 * not including argv[0].
41 * @return newest filter in chain
42 */
43 struct filter *
filter_create_ext(struct filter * chain,const char * cmd,...)44 filter_create_ext(struct filter * chain, const char *cmd,
45 ...)
46 {
47 struct filter *f;
48 int max_args;
49 const char *s;
50 va_list ap;
51
52 /* allocate and initialize new filter */
53 f = calloc(sizeof(struct filter), 1);
54 if (!f)
55 flexerror(_("calloc failed (f) in filter_create_ext"));
56 f->filter_func = NULL;
57 f->extra = NULL;
58 f->next = NULL;
59 f->argc = 0;
60
61 if (chain != NULL) {
62 /* append f to end of chain */
63 while (chain->next)
64 chain = chain->next;
65 chain->next = f;
66 }
67 /* allocate argv, and populate it with the argument list. */
68 max_args = 8;
69 f->argv = malloc(sizeof(char *) * (max_args + 1));
70 if (!f->argv)
71 flexerror(_("malloc failed (f->argv) in filter_create_ext"));
72 f->argv[f->argc++] = cmd;
73
74 va_start(ap, cmd);
75 while ((s = va_arg(ap, const char *)) != NULL) {
76 if (f->argc >= max_args) {
77 max_args += 8;
78 f->argv = realloc(f->argv,
79 sizeof(char *) * (max_args + 1));
80 }
81 f->argv[f->argc++] = s;
82 }
83 f->argv[f->argc] = NULL;
84
85 va_end(ap);
86 return f;
87 }
88
89 /* Allocate and initialize an internal filter.
90 * @param chain the current chain or NULL for new chain
91 * @param filter_func The function that will perform the filtering.
92 * filter_func should return 0 if successful, and -1
93 * if an error occurs -- or it can simply exit().
94 * @param extra optional user-defined data to pass to the filter.
95 * @return newest filter in chain
96 */
97 struct filter *
filter_create_int(struct filter * chain,int (* filter_func)(struct filter *),void * extra)98 filter_create_int(struct filter * chain,
99 int (*filter_func) (struct filter *),
100 void *extra)
101 {
102 struct filter *f;
103
104 /* allocate and initialize new filter */
105 f = calloc(sizeof(struct filter), 1);
106 if (!f)
107 flexerror(_("calloc failed in filter_create_int"));
108 f->next = NULL;
109 f->argc = 0;
110 f->argv = NULL;
111
112 f->filter_func = filter_func;
113 f->extra = extra;
114
115 if (chain != NULL) {
116 /* append f to end of chain */
117 while (chain->next)
118 chain = chain->next;
119 chain->next = f;
120 }
121 return f;
122 }
123
124 /** Fork and exec entire filter chain.
125 * @param chain The head of the chain.
126 * @return true on success.
127 */
128 bool
filter_apply_chain(struct filter * chain)129 filter_apply_chain(struct filter * chain)
130 {
131 int pid, pipes[2];
132
133 /*
134 * Tricky recursion, since we want to begin the chain at the END.
135 * Why? Because we need all the forked processes to be children of
136 * the main flex process.
137 */
138 if (chain)
139 filter_apply_chain(chain->next);
140 else
141 return true;
142
143 /*
144 * Now we are the right-most unprocessed link in the chain.
145 */
146
147 fflush(stdout);
148 fflush(stderr);
149
150
151 if (pipe(pipes) == -1)
152 flexerror(_("pipe failed"));
153
154 if ((pid = fork()) == -1)
155 flexerror(_("fork failed"));
156
157 if (pid == 0) {
158 /* child */
159
160 /*
161 * We need stdin (the FILE* stdin) to connect to this new
162 * pipe. There is no portable way to set stdin to a new file
163 * descriptor, as stdin is not an lvalue on some systems
164 * (BSD). So we dup the new pipe onto the stdin descriptor
165 * and use a no-op fseek to sync the stream. This is a Hail
166 * Mary situation. It seems to work.
167 */
168 close(pipes[1]);
169 clearerr(stdin);
170 if (dup2(pipes[0], fileno(stdin)) == -1)
171 flexfatal(_("dup2(pipes[0],0)"));
172 close(pipes[0]);
173 fseek(stdin, 0, SEEK_CUR);
174
175 /* run as a filter, either internally or by exec */
176 if (chain->filter_func) {
177 if (chain->filter_func(chain) == -1)
178 flexfatal(_("filter_func failed"));
179 exit(0);
180 } else {
181 execvp(chain->argv[0],
182 (char **const) (chain->argv));
183 lerrsf_fatal(_("exec of %s failed"),
184 chain->argv[0]);
185 }
186
187 exit(1);
188 }
189 /* Parent */
190 close(pipes[0]);
191 if (dup2(pipes[1], fileno(stdout)) == -1)
192 flexfatal(_("dup2(pipes[1],1)"));
193 close(pipes[1]);
194 fseek(stdout, 0, SEEK_CUR);
195
196 return true;
197 }
198
199 /** Truncate the chain to max_len number of filters.
200 * @param chain the current chain.
201 * @param max_len the maximum length of the chain.
202 * @return the resulting length of the chain.
203 */
204 int
filter_truncate(struct filter * chain,int max_len)205 filter_truncate(struct filter * chain, int max_len)
206 {
207 int len = 1;
208
209 if (!chain)
210 return 0;
211
212 while (chain->next && len < max_len) {
213 chain = chain->next;
214 ++len;
215 }
216
217 chain->next = NULL;
218 return len;
219 }
220
221 /** Splits the chain in order to write to a header file.
222 * Similar in spirit to the 'tee' program.
223 * The header file name is in extra.
224 * @return 0 (zero) on success, and -1 on failure.
225 */
226 int
filter_tee_header(struct filter * chain)227 filter_tee_header(struct filter * chain)
228 {
229 /*
230 * This function reads from stdin and writes to both the C file and
231 * the header file at the same time.
232 */
233
234 const int readsz = 512;
235 char *buf;
236 int to_cfd = -1;
237 FILE *to_c = NULL, *to_h = NULL;
238 bool write_header;
239
240 write_header = (chain->extra != NULL);
241
242 /*
243 * Store a copy of the stdout pipe, which is already piped to C file
244 * through the running chain. Then create a new pipe to the H file as
245 * stdout, and fork the rest of the chain again.
246 */
247
248 if ((to_cfd = dup(1)) == -1)
249 flexfatal(_("dup(1) failed"));
250 to_c = fdopen(to_cfd, "w");
251
252 if (write_header) {
253 if (freopen((char *) chain->extra, "w", stdout) == NULL)
254 flexfatal(_("freopen(headerfilename) failed"));
255
256 filter_apply_chain(chain->next);
257 to_h = stdout;
258 }
259 /*
260 * Now to_c is a pipe to the C branch, and to_h is a pipe to the H
261 * branch.
262 */
263
264 if (write_header) {
265 fputs(check_4_gnu_m4, to_h);
266 fputs("m4_changecom`'m4_dnl\n", to_h);
267 fputs("m4_changequote`'m4_dnl\n", to_h);
268 fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
269 fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
270 fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
271 to_h);
272 fprintf(to_h, "#ifndef %sHEADER_H\n", prefix);
273 fprintf(to_h, "#define %sHEADER_H 1\n", prefix);
274 fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix);
275 fprintf(to_h,
276 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
277 headerfilename ? headerfilename : "<stdout>");
278
279 }
280 fputs(check_4_gnu_m4, to_c);
281 fputs("m4_changecom`'m4_dnl\n", to_c);
282 fputs("m4_changequote`'m4_dnl\n", to_c);
283 fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
284 fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
285 fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
286 outfilename ? outfilename : "<stdout>");
287
288 buf = malloc(readsz);
289 if (!buf)
290 flexerror(_("malloc failed in filter_tee_header"));
291 while (fgets(buf, readsz, stdin)) {
292 fputs(buf, to_c);
293 if (write_header)
294 fputs(buf, to_h);
295 }
296
297 if (write_header) {
298 fprintf(to_h, "\n");
299
300 /*
301 * write a fake line number. It will get fixed by the linedir
302 * filter.
303 */
304 fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
305
306 fprintf(to_h, "#undef %sIN_HEADER\n", prefix);
307 fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix);
308 fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
309
310 fflush(to_h);
311 if (ferror(to_h))
312 lerrsf(_("error writing output file %s"),
313 (char *) chain->extra);
314
315 else if (fclose(to_h))
316 lerrsf(_("error closing output file %s"),
317 (char *) chain->extra);
318 }
319 fflush(to_c);
320 if (ferror(to_c))
321 lerrsf(_("error writing output file %s"),
322 outfilename ? outfilename : "<stdout>");
323
324 else if (fclose(to_c))
325 lerrsf(_("error closing output file %s"),
326 outfilename ? outfilename : "<stdout>");
327
328 while (wait(0) > 0);
329
330 exit(0);
331 return 0;
332 }
333
334 /** Adjust the line numbers in the #line directives of the generated scanner.
335 * After the m4 expansion, the line numbers are incorrect since the m4 macros
336 * can add or remove lines. This only adjusts line numbers for generated code,
337 * not user code. This also happens to be a good place to squeeze multiple
338 * blank lines into a single blank line.
339 */
340 int
filter_fix_linedirs(struct filter * chain)341 filter_fix_linedirs(struct filter * chain)
342 {
343 char *buf;
344 const int readsz = 512;
345 int lineno = 1;
346 bool in_gen = true; /* in generated code */
347 bool last_was_blank = false;
348
349 if (!chain)
350 return 0;
351
352 buf = malloc(readsz);
353 if (!buf)
354 flexerror(_("malloc failed in filter_fix_linedirs"));
355
356 while (fgets(buf, readsz, stdin)) {
357
358 regmatch_t m[10];
359
360 /* Check for #line directive. */
361 if (buf[0] == '#'
362 && regexec(®ex_linedir, buf, 3, m, 0) == 0) {
363
364 char *fname;
365
366 /* extract the line number and filename */
367 regmatch_strtol(&m[1], buf, NULL, 0);
368 fname = regmatch_dup(&m[2], buf);
369
370 if (strcmp(fname,
371 outfilename ? outfilename : "<stdout>") == 0 ||
372 strcmp(fname, headerfilename ? headerfilename :
373 "<stdout>") == 0) {
374
375 char *s1, *s2;
376 char filename[MAXLINE];
377
378 s1 = fname;
379 s2 = filename;
380
381 while ((s2 - filename) < (MAXLINE - 1) && *s1) {
382 /* Escape the backslash */
383 if (*s1 == '\\')
384 *s2++ = '\\';
385 /* Escape the double quote */
386 if (*s1 == '\"')
387 *s2++ = '\\';
388 /* Copy the character as usual */
389 *s2++ = *s1++;
390 }
391
392 *s2 = '\0';
393
394 /* Adjust the line directives. */
395 in_gen = true;
396 snprintf(buf, readsz, "#line %d \"%s\"\n",
397 lineno + 1, filename);
398 } else {
399 /*
400 * it's a #line directive for code we didn't
401 * write
402 */
403 in_gen = false;
404 }
405
406 free(fname);
407 last_was_blank = false;
408 }
409 /* squeeze blank lines from generated code */
410 else if (in_gen &&
411 regexec(®ex_blank_line, buf, 0, NULL, 0) == 0) {
412 if (last_was_blank)
413 continue;
414 else
415 last_was_blank = true;
416 } else {
417 /* it's a line of normal, non-empty code. */
418 last_was_blank = false;
419 }
420
421 fputs(buf, stdout);
422 lineno++;
423 }
424 fflush(stdout);
425 if (ferror(stdout))
426 lerrsf(_("error writing output file %s"),
427 outfilename ? outfilename : "<stdout>");
428
429 else if (fclose(stdout))
430 lerrsf(_("error closing output file %s"),
431 outfilename ? outfilename : "<stdout>");
432
433 return 0;
434 }
435