1 /* $OpenBSD: filter.c,v 1.9 2017/08/30 02:54:07 lteo Exp $ */ 2 3 /* filter - postprocessing of flex output through filters */ 4 5 /* This file is part of flex. */ 6 7 /* Redistribution and use in source and binary forms, with or without */ 8 /* modification, are permitted provided that the following conditions */ 9 /* are met: */ 10 11 /* 1. Redistributions of source code must retain the above copyright */ 12 /* notice, this list of conditions and the following disclaimer. */ 13 /* 2. Redistributions in binary form must reproduce the above copyright */ 14 /* notice, this list of conditions and the following disclaimer in the */ 15 /* documentation and/or other materials provided with the distribution. */ 16 17 /* Neither the name of the University nor the names of its contributors */ 18 /* may be used to endorse or promote products derived from this software */ 19 /* without specific prior written permission. */ 20 21 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 22 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 23 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 24 /* PURPOSE. */ 25 26 #include "flexdef.h" 27 static const char *check_4_gnu_m4 = 28 "m4_dnl ifdef(`__gnu__', ," 29 "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" 30 " m4exit(2)')\n"; 31 32 33 /** global chain. */ 34 struct filter *output_chain = NULL; 35 36 /* Allocate and initialize an external filter. 37 * @param chain the current chain or NULL for new chain 38 * @param cmd the command to execute. 39 * @param ... a NULL terminated list of (const char*) arguments to command, 40 * not including argv[0]. 41 * @return newest filter in chain 42 */ 43 struct filter * 44 filter_create_ext(struct filter * chain, const char *cmd, 45 ...) 46 { 47 struct filter *f; 48 int max_args; 49 const char *s; 50 va_list ap; 51 52 /* allocate and initialize new filter */ 53 f = calloc(sizeof(struct filter), 1); 54 if (!f) 55 flexerror(_("calloc failed (f) in filter_create_ext")); 56 f->filter_func = NULL; 57 f->extra = NULL; 58 f->next = NULL; 59 f->argc = 0; 60 61 if (chain != NULL) { 62 /* append f to end of chain */ 63 while (chain->next) 64 chain = chain->next; 65 chain->next = f; 66 } 67 /* allocate argv, and populate it with the argument list. */ 68 max_args = 8; 69 f->argv = malloc(sizeof(char *) * (max_args + 1)); 70 if (!f->argv) 71 flexerror(_("malloc failed (f->argv) in filter_create_ext")); 72 f->argv[f->argc++] = cmd; 73 74 va_start(ap, cmd); 75 while ((s = va_arg(ap, const char *)) != NULL) { 76 if (f->argc >= max_args) { 77 max_args += 8; 78 f->argv = realloc(f->argv, 79 sizeof(char *) * (max_args + 1)); 80 } 81 f->argv[f->argc++] = s; 82 } 83 f->argv[f->argc] = NULL; 84 85 va_end(ap); 86 return f; 87 } 88 89 /* Allocate and initialize an internal filter. 90 * @param chain the current chain or NULL for new chain 91 * @param filter_func The function that will perform the filtering. 92 * filter_func should return 0 if successful, and -1 93 * if an error occurs -- or it can simply exit(). 94 * @param extra optional user-defined data to pass to the filter. 95 * @return newest filter in chain 96 */ 97 struct filter * 98 filter_create_int(struct filter * chain, 99 int (*filter_func) (struct filter *), 100 void *extra) 101 { 102 struct filter *f; 103 104 /* allocate and initialize new filter */ 105 f = calloc(sizeof(struct filter), 1); 106 if (!f) 107 flexerror(_("calloc failed in filter_create_int")); 108 f->next = NULL; 109 f->argc = 0; 110 f->argv = NULL; 111 112 f->filter_func = filter_func; 113 f->extra = extra; 114 115 if (chain != NULL) { 116 /* append f to end of chain */ 117 while (chain->next) 118 chain = chain->next; 119 chain->next = f; 120 } 121 return f; 122 } 123 124 /** Fork and exec entire filter chain. 125 * @param chain The head of the chain. 126 * @return true on success. 127 */ 128 bool 129 filter_apply_chain(struct filter * chain) 130 { 131 int pid, pipes[2]; 132 133 /* 134 * Tricky recursion, since we want to begin the chain at the END. 135 * Why? Because we need all the forked processes to be children of 136 * the main flex process. 137 */ 138 if (chain) 139 filter_apply_chain(chain->next); 140 else 141 return true; 142 143 /* 144 * Now we are the right-most unprocessed link in the chain. 145 */ 146 147 fflush(stdout); 148 fflush(stderr); 149 150 151 if (pipe(pipes) == -1) 152 flexerror(_("pipe failed")); 153 154 if ((pid = fork()) == -1) 155 flexerror(_("fork failed")); 156 157 if (pid == 0) { 158 /* child */ 159 160 /* 161 * We need stdin (the FILE* stdin) to connect to this new 162 * pipe. There is no portable way to set stdin to a new file 163 * descriptor, as stdin is not an lvalue on some systems 164 * (BSD). So we dup the new pipe onto the stdin descriptor 165 * and use a no-op fseek to sync the stream. This is a Hail 166 * Mary situation. It seems to work. 167 */ 168 close(pipes[1]); 169 clearerr(stdin); 170 if (dup2(pipes[0], fileno(stdin)) == -1) 171 flexfatal(_("dup2(pipes[0],0)")); 172 close(pipes[0]); 173 fseek(stdin, 0, SEEK_CUR); 174 175 /* run as a filter, either internally or by exec */ 176 if (chain->filter_func) { 177 if (chain->filter_func(chain) == -1) 178 flexfatal(_("filter_func failed")); 179 exit(0); 180 } else { 181 execvp(chain->argv[0], 182 (char **const) (chain->argv)); 183 lerrsf_fatal(_("exec of %s failed"), 184 chain->argv[0]); 185 } 186 187 exit(1); 188 } 189 /* Parent */ 190 close(pipes[0]); 191 if (dup2(pipes[1], fileno(stdout)) == -1) 192 flexfatal(_("dup2(pipes[1],1)")); 193 close(pipes[1]); 194 fseek(stdout, 0, SEEK_CUR); 195 196 return true; 197 } 198 199 /** Truncate the chain to max_len number of filters. 200 * @param chain the current chain. 201 * @param max_len the maximum length of the chain. 202 * @return the resulting length of the chain. 203 */ 204 int 205 filter_truncate(struct filter * chain, int max_len) 206 { 207 int len = 1; 208 209 if (!chain) 210 return 0; 211 212 while (chain->next && len < max_len) { 213 chain = chain->next; 214 ++len; 215 } 216 217 chain->next = NULL; 218 return len; 219 } 220 221 /** Splits the chain in order to write to a header file. 222 * Similar in spirit to the 'tee' program. 223 * The header file name is in extra. 224 * @return 0 (zero) on success, and -1 on failure. 225 */ 226 int 227 filter_tee_header(struct filter * chain) 228 { 229 /* 230 * This function reads from stdin and writes to both the C file and 231 * the header file at the same time. 232 */ 233 234 const int readsz = 512; 235 char *buf; 236 int to_cfd = -1; 237 FILE *to_c = NULL, *to_h = NULL; 238 bool write_header; 239 240 write_header = (chain->extra != NULL); 241 242 /* 243 * Store a copy of the stdout pipe, which is already piped to C file 244 * through the running chain. Then create a new pipe to the H file as 245 * stdout, and fork the rest of the chain again. 246 */ 247 248 if ((to_cfd = dup(1)) == -1) 249 flexfatal(_("dup(1) failed")); 250 to_c = fdopen(to_cfd, "w"); 251 252 if (write_header) { 253 if (freopen((char *) chain->extra, "w", stdout) == NULL) 254 flexfatal(_("freopen(headerfilename) failed")); 255 256 filter_apply_chain(chain->next); 257 to_h = stdout; 258 } 259 /* 260 * Now to_c is a pipe to the C branch, and to_h is a pipe to the H 261 * branch. 262 */ 263 264 if (write_header) { 265 fputs(check_4_gnu_m4, to_h); 266 fputs("m4_changecom`'m4_dnl\n", to_h); 267 fputs("m4_changequote`'m4_dnl\n", to_h); 268 fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h); 269 fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); 270 fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", 271 to_h); 272 fprintf(to_h, "#ifndef %sHEADER_H\n", prefix); 273 fprintf(to_h, "#define %sHEADER_H 1\n", prefix); 274 fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix); 275 fprintf(to_h, 276 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 277 headerfilename ? headerfilename : "<stdout>"); 278 279 } 280 fputs(check_4_gnu_m4, to_c); 281 fputs("m4_changecom`'m4_dnl\n", to_c); 282 fputs("m4_changequote`'m4_dnl\n", to_c); 283 fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c); 284 fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); 285 fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", 286 outfilename ? outfilename : "<stdout>"); 287 288 buf = malloc(readsz); 289 if (!buf) 290 flexerror(_("malloc failed in filter_tee_header")); 291 while (fgets(buf, readsz, stdin)) { 292 fputs(buf, to_c); 293 if (write_header) 294 fputs(buf, to_h); 295 } 296 297 if (write_header) { 298 fprintf(to_h, "\n"); 299 300 /* 301 * write a fake line number. It will get fixed by the linedir 302 * filter. 303 */ 304 fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); 305 306 fprintf(to_h, "#undef %sIN_HEADER\n", prefix); 307 fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix); 308 fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); 309 310 fflush(to_h); 311 if (ferror(to_h)) 312 lerrsf(_("error writing output file %s"), 313 (char *) chain->extra); 314 315 else if (fclose(to_h)) 316 lerrsf(_("error closing output file %s"), 317 (char *) chain->extra); 318 } 319 fflush(to_c); 320 if (ferror(to_c)) 321 lerrsf(_("error writing output file %s"), 322 outfilename ? outfilename : "<stdout>"); 323 324 else if (fclose(to_c)) 325 lerrsf(_("error closing output file %s"), 326 outfilename ? outfilename : "<stdout>"); 327 328 while (wait(0) > 0); 329 330 exit(0); 331 return 0; 332 } 333 334 /** Adjust the line numbers in the #line directives of the generated scanner. 335 * After the m4 expansion, the line numbers are incorrect since the m4 macros 336 * can add or remove lines. This only adjusts line numbers for generated code, 337 * not user code. This also happens to be a good place to squeeze multiple 338 * blank lines into a single blank line. 339 */ 340 int 341 filter_fix_linedirs(struct filter * chain) 342 { 343 char *buf; 344 const int readsz = 512; 345 int lineno = 1; 346 bool in_gen = true; /* in generated code */ 347 bool last_was_blank = false; 348 349 if (!chain) 350 return 0; 351 352 buf = malloc(readsz); 353 if (!buf) 354 flexerror(_("malloc failed in filter_fix_linedirs")); 355 356 while (fgets(buf, readsz, stdin)) { 357 358 regmatch_t m[10]; 359 360 /* Check for #line directive. */ 361 if (buf[0] == '#' 362 && regexec(®ex_linedir, buf, 3, m, 0) == 0) { 363 364 int num; 365 char *fname; 366 367 /* extract the line number and filename */ 368 num = regmatch_strtol(&m[1], buf, NULL, 0); 369 fname = regmatch_dup(&m[2], buf); 370 371 if (strcmp(fname, 372 outfilename ? outfilename : "<stdout>") == 0 || 373 strcmp(fname, headerfilename ? headerfilename : 374 "<stdout>") == 0) { 375 376 char *s1, *s2; 377 char filename[MAXLINE]; 378 379 s1 = fname; 380 s2 = filename; 381 382 while ((s2 - filename) < (MAXLINE - 1) && *s1) { 383 /* Escape the backslash */ 384 if (*s1 == '\\') 385 *s2++ = '\\'; 386 /* Escape the double quote */ 387 if (*s1 == '\"') 388 *s2++ = '\\'; 389 /* Copy the character as usual */ 390 *s2++ = *s1++; 391 } 392 393 *s2 = '\0'; 394 395 /* Adjust the line directives. */ 396 in_gen = true; 397 snprintf(buf, readsz, "#line %d \"%s\"\n", 398 lineno + 1, filename); 399 } else { 400 /* 401 * it's a #line directive for code we didn't 402 * write 403 */ 404 in_gen = false; 405 } 406 407 free(fname); 408 last_was_blank = false; 409 } 410 /* squeeze blank lines from generated code */ 411 else if (in_gen && 412 regexec(®ex_blank_line, buf, 0, NULL, 0) == 0) { 413 if (last_was_blank) 414 continue; 415 else 416 last_was_blank = true; 417 } else { 418 /* it's a line of normal, non-empty code. */ 419 last_was_blank = false; 420 } 421 422 fputs(buf, stdout); 423 lineno++; 424 } 425 fflush(stdout); 426 if (ferror(stdout)) 427 lerrsf(_("error writing output file %s"), 428 outfilename ? outfilename : "<stdout>"); 429 430 else if (fclose(stdout)) 431 lerrsf(_("error closing output file %s"), 432 outfilename ? outfilename : "<stdout>"); 433 434 return 0; 435 } 436