1 /* 2 * Copyright (c) 2002 - 2014 Tony Finch <dot@dotat.at> 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * unifdef - remove ifdef'ed lines 28 * 29 * This code was derived from software contributed to Berkeley by Dave Yost. 30 * It was rewritten to support ANSI C by Tony Finch. The original version 31 * of unifdef carried the 4-clause BSD copyright licence. None of its code 32 * remains in this version (though some of the names remain) so it now 33 * carries a more liberal licence. 34 * 35 * Wishlist: 36 * provide an option which will append the name of the 37 * appropriate symbol after #else's and #endif's 38 * provide an option which will check symbols after 39 * #else's and #endif's to see that they match their 40 * corresponding #ifdef or #ifndef 41 * 42 * These require better buffer handling, which would also make 43 * it possible to handle all "dodgy" directives correctly. 44 */ 45 46 #include <sys/stat.h> 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <stdarg.h> 51 #include <stdbool.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <unistd.h> 56 57 static const char copyright[] = 58 #include "version.h" 59 "@(#) $Author: deraadt $\n" 60 "@(#) $URL: http://dotat.at/prog/unifdef $\n" 61 ; 62 63 /* types of input lines: */ 64 typedef enum { 65 LT_TRUEI, /* a true #if with ignore flag */ 66 LT_FALSEI, /* a false #if with ignore flag */ 67 LT_IF, /* an unknown #if */ 68 LT_TRUE, /* a true #if */ 69 LT_FALSE, /* a false #if */ 70 LT_ELIF, /* an unknown #elif */ 71 LT_ELTRUE, /* a true #elif */ 72 LT_ELFALSE, /* a false #elif */ 73 LT_ELSE, /* #else */ 74 LT_ENDIF, /* #endif */ 75 LT_DODGY, /* flag: directive is not on one line */ 76 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 77 LT_PLAIN, /* ordinary line */ 78 LT_EOF, /* end of file */ 79 LT_ERROR, /* unevaluable #if */ 80 LT_COUNT 81 } Linetype; 82 83 static char const * const linetype_name[] = { 84 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 85 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 86 "DODGY TRUEI", "DODGY FALSEI", 87 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 88 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 89 "DODGY ELSE", "DODGY ENDIF", 90 "PLAIN", "EOF", "ERROR" 91 }; 92 93 #define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF)) 94 #define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY)) 95 96 /* state of #if processing */ 97 typedef enum { 98 IS_OUTSIDE, 99 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 100 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 101 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 102 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 103 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 104 IS_PASS_ELSE, /* an else after a pass state */ 105 IS_FALSE_ELSE, /* an else after a true state */ 106 IS_TRUE_ELSE, /* an else after only false states */ 107 IS_FALSE_TRAILER, /* #elifs after a true are false */ 108 IS_COUNT 109 } Ifstate; 110 111 static char const * const ifstate_name[] = { 112 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 113 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 114 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 115 "FALSE_TRAILER" 116 }; 117 118 /* state of comment parser */ 119 typedef enum { 120 NO_COMMENT = false, /* outside a comment */ 121 C_COMMENT, /* in a comment like this one */ 122 CXX_COMMENT, /* between // and end of line */ 123 STARTING_COMMENT, /* just after slash-backslash-newline */ 124 FINISHING_COMMENT, /* star-backslash-newline in a C comment */ 125 CHAR_LITERAL, /* inside '' */ 126 STRING_LITERAL /* inside "" */ 127 } Comment_state; 128 129 static char const * const comment_name[] = { 130 "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING" 131 }; 132 133 /* state of preprocessor line parser */ 134 typedef enum { 135 LS_START, /* only space and comments on this line */ 136 LS_HASH, /* only space, comments, and a hash */ 137 LS_DIRTY /* this line can't be a preprocessor line */ 138 } Line_state; 139 140 static char const * const linestate_name[] = { 141 "START", "HASH", "DIRTY" 142 }; 143 144 /* 145 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 146 */ 147 #define MAXDEPTH 64 /* maximum #if nesting */ 148 #define MAXLINE 4096 /* maximum length of line */ 149 #define MAXSYMS 16384 /* maximum number of symbols */ 150 151 /* 152 * Sometimes when editing a keyword the replacement text is longer, so 153 * we leave some space at the end of the tline buffer to accommodate this. 154 */ 155 #define EDITSLOP 10 156 157 /* 158 * Globals. 159 */ 160 161 static bool compblank; /* -B: compress blank lines */ 162 static bool lnblank; /* -b: blank deleted lines */ 163 static bool complement; /* -c: do the complement */ 164 static bool debugging; /* -d: debugging reports */ 165 static bool inplace; /* -m: modify in place */ 166 static bool iocccok; /* -e: fewer IOCCC errors */ 167 static bool strictlogic; /* -K: keep ambiguous #ifs */ 168 static bool killconsts; /* -k: eval constant #ifs */ 169 static bool lnnum; /* -n: add #line directives */ 170 static bool symlist; /* -s: output symbol list */ 171 static bool symdepth; /* -S: output symbol depth */ 172 static bool text; /* -t: this is a text file */ 173 174 static const char *symname[MAXSYMS]; /* symbol name */ 175 static const char *value[MAXSYMS]; /* -Dsym=value */ 176 static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 177 static int nsyms; /* number of symbols */ 178 179 static FILE *input; /* input file pointer */ 180 static const char *filename; /* input file name */ 181 static int linenum; /* current line number */ 182 static const char *linefile; /* file name for #line */ 183 static FILE *output; /* output file pointer */ 184 static const char *ofilename; /* output file name */ 185 static const char *backext; /* backup extension */ 186 static char *tempname; /* avoid splatting input */ 187 188 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 189 static char *keyword; /* used for editing #elif's */ 190 191 /* 192 * When processing a file, the output's newline style will match the 193 * input's, and unifdef correctly handles CRLF or LF endings whatever 194 * the platform's native style. The stdio streams are opened in binary 195 * mode to accommodate platforms whose native newline style is CRLF. 196 * When the output isn't a processed input file (when it is error / 197 * debug / diagnostic messages) then unifdef uses native line endings. 198 */ 199 200 static const char *newline; /* input file format */ 201 static const char newline_unix[] = "\n"; 202 static const char newline_crlf[] = "\r\n"; 203 204 static Comment_state incomment; /* comment parser state */ 205 static Line_state linestate; /* #if line parser state */ 206 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 207 static bool ignoring[MAXDEPTH]; /* ignore comments state */ 208 static int stifline[MAXDEPTH]; /* start of current #if */ 209 static int depth; /* current #if nesting */ 210 static int delcount; /* count of deleted lines */ 211 static unsigned blankcount; /* count of blank lines */ 212 static unsigned blankmax; /* maximum recent blankcount */ 213 static bool constexpr; /* constant #if expression */ 214 static bool zerosyms; /* to format symdepth output */ 215 static bool firstsym; /* ditto */ 216 217 static int exitmode; /* exit status mode */ 218 static int exitstat; /* program exit status */ 219 220 static void addsym1(bool, bool, char *); 221 static void addsym2(bool, const char *, const char *); 222 static char *astrcat(const char *, const char *); 223 static void cleantemp(void); 224 static void closeio(void); 225 static void debug(const char *, ...); 226 static void debugsym(const char *, int); 227 static bool defundef(void); 228 static void defundefile(const char *); 229 static void done(void); 230 static void error(const char *); 231 static int findsym(const char **); 232 static void flushline(bool); 233 static void hashline(void); 234 static void help(void); 235 static Linetype ifeval(const char **); 236 static void ignoreoff(void); 237 static void ignoreon(void); 238 static void indirectsym(void); 239 static void keywordedit(const char *); 240 static const char *matchsym(const char *, const char *); 241 static void nest(void); 242 static Linetype parseline(void); 243 static void process(void); 244 static void processinout(const char *, const char *); 245 static const char *skipargs(const char *); 246 static const char *skipcomment(const char *); 247 static const char *skiphash(void); 248 static const char *skipline(const char *); 249 static const char *skipsym(const char *); 250 static void state(Ifstate); 251 static void unnest(void); 252 static void usage(void); 253 static void version(void); 254 static const char *xstrdup(const char *, const char *); 255 static FILE * mktempmode(char *tmp, int mode); 256 257 #define endsym(c) (!isalnum((unsigned char)c) && c != '_') 258 259 /* 260 * The main program. 261 */ 262 int 263 main(int argc, char *argv[]) 264 { 265 const char *errstr; 266 int opt; 267 268 if (pledge("stdio rpath wpath cpath fattr", NULL) == -1) 269 err(1, "pledge"); 270 271 while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1) 272 switch (opt) { 273 case 'i': /* treat stuff controlled by these symbols as text */ 274 /* 275 * For strict backwards-compatibility the U or D 276 * should be immediately after the -i but it doesn't 277 * matter much if we relax that requirement. 278 */ 279 opt = *optarg++; 280 if (opt == 'D') 281 addsym1(true, true, optarg); 282 else if (opt == 'U') 283 addsym1(true, false, optarg); 284 else 285 usage(); 286 break; 287 case 'D': /* define a symbol */ 288 addsym1(false, true, optarg); 289 break; 290 case 'U': /* undef a symbol */ 291 addsym1(false, false, optarg); 292 break; 293 case 'I': /* no-op for compatibility with cpp */ 294 break; 295 case 'b': /* blank deleted lines instead of omitting them */ 296 case 'l': /* backwards compatibility */ 297 lnblank = true; 298 break; 299 case 'B': /* compress blank lines around removed section */ 300 compblank = true; 301 break; 302 case 'c': /* treat -D as -U and vice versa */ 303 complement = true; 304 break; 305 case 'd': 306 debugging = true; 307 break; 308 case 'e': /* fewer errors from dodgy lines */ 309 iocccok = true; 310 break; 311 case 'f': /* definitions file */ 312 defundefile(optarg); 313 break; 314 case 'h': 315 help(); 316 break; 317 case 'K': /* keep ambiguous #ifs */ 318 strictlogic = true; 319 break; 320 case 'k': /* process constant #ifs */ 321 killconsts = true; 322 break; 323 case 'm': /* modify in place */ 324 inplace = true; 325 break; 326 case 'M': /* modify in place and keep backup */ 327 inplace = true; 328 backext = optarg; 329 break; 330 case 'n': /* add #line directive after deleted lines */ 331 lnnum = true; 332 break; 333 case 'o': /* output to a file */ 334 ofilename = optarg; 335 break; 336 case 's': /* only output list of symbols that control #ifs */ 337 symlist = true; 338 break; 339 case 'S': /* list symbols with their nesting depth */ 340 symlist = symdepth = true; 341 break; 342 case 't': /* don't parse C comments */ 343 text = true; 344 break; 345 case 'V': 346 version(); 347 break; 348 case 'x': 349 exitmode = strtonum(optarg, 0, 2, &errstr); 350 if (errstr) 351 errx(1, "-x %s: %s", optarg, errstr); 352 break; 353 default: 354 usage(); 355 } 356 argc -= optind; 357 argv += optind; 358 if (compblank && lnblank) 359 errx(2, "-B and -b are mutually exclusive"); 360 if (symlist && (ofilename != NULL || inplace || argc > 1)) 361 errx(2, "-s only works with one input file"); 362 if (argc > 1 && ofilename != NULL) 363 errx(2, "-o cannot be used with multiple input files"); 364 if (argc > 1 && !inplace) 365 errx(2, "multiple input files require -m or -M"); 366 if (argc == 0 && inplace) 367 errx(2, "can't edit stdin in place"); 368 if (argc == 0) 369 argc = 1; 370 if (argc == 1 && !inplace && ofilename == NULL) 371 ofilename = "-"; 372 indirectsym(); 373 374 atexit(cleantemp); 375 if (ofilename != NULL) 376 processinout(*argv, ofilename); 377 else while (argc-- > 0) { 378 processinout(*argv, *argv); 379 argv++; 380 } 381 switch(exitmode) { 382 case(0): exit(exitstat); 383 case(1): exit(!exitstat); 384 case(2): exit(0); 385 default: abort(); /* bug */ 386 } 387 } 388 389 /* 390 * File logistics. 391 */ 392 static void 393 processinout(const char *ifn, const char *ofn) 394 { 395 struct stat st; 396 397 if (ifn == NULL || strcmp(ifn, "-") == 0) { 398 filename = "[stdin]"; 399 linefile = NULL; 400 input = stdin; 401 } else { 402 filename = ifn; 403 linefile = ifn; 404 input = fopen(ifn, "rb"); 405 if (input == NULL) 406 err(2, "can't open %s", ifn); 407 } 408 if (strcmp(ofn, "-") == 0) { 409 output = stdout; 410 process(); 411 return; 412 } 413 if (stat(ofn, &st) == -1) { 414 output = fopen(ofn, "wb"); 415 if (output == NULL) 416 err(2, "can't create %s", ofn); 417 process(); 418 return; 419 } 420 421 tempname = astrcat(ofn, ".XXXXXX"); 422 output = mktempmode(tempname, st.st_mode); 423 if (output == NULL) 424 err(2, "can't create %s", tempname); 425 426 process(); 427 428 if (backext != NULL && *backext != '\0') { 429 char *backname = astrcat(ofn, backext); 430 if (rename(ofn, backname) == -1) 431 err(2, "can't rename \"%s\" to \"%s\"", ofn, backname); 432 free(backname); 433 } 434 if (rename(tempname, ofn) == -1) 435 err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn); 436 free(tempname); 437 tempname = NULL; 438 } 439 440 /* 441 * For cleaning up if there is an error. 442 */ 443 static void 444 cleantemp(void) 445 { 446 if (tempname != NULL) 447 remove(tempname); 448 } 449 450 /* 451 * Self-identification functions. 452 */ 453 454 static void 455 version(void) 456 { 457 const char *c = copyright; 458 for (;;) { 459 while (*++c != '$') 460 if (*c == '\0') 461 exit(0); 462 while (*++c != '$') 463 putc(*c, stderr); 464 putc('\n', stderr); 465 } 466 } 467 468 static void 469 synopsis(FILE *fp) 470 { 471 fprintf(fp, 472 "usage: unifdef [-BbcdehKkmnSstV] [-[i]Dsym[=val]] [-[i]Usym] [-f defile]\n" 473 " [-M backext] [-o outfile] [-x 0 | 1 | 2] file ...\n"); 474 } 475 476 static void 477 usage(void) 478 { 479 synopsis(stderr); 480 exit(2); 481 } 482 483 static void 484 help(void) 485 { 486 synopsis(stdout); 487 printf( 488 " -Dsym=val define preprocessor symbol with given value\n" 489 " -Dsym define preprocessor symbol with value 1\n" 490 " -Usym preprocessor symbol is undefined\n" 491 " -iDsym=val \\ ignore C strings and comments\n" 492 " -iDsym ) in sections controlled by these\n" 493 " -iUsym / preprocessor symbols\n" 494 " -fpath file containing #define and #undef directives\n" 495 " -b blank lines instead of deleting them\n" 496 " -B compress blank lines around deleted section\n" 497 " -c complement (invert) keep vs. delete\n" 498 " -d debugging mode\n" 499 " -e ignore multiline preprocessor directives\n" 500 " -h print help\n" 501 " -Ipath extra include file path (ignored)\n" 502 " -K disable && and || short-circuiting\n" 503 " -k process constant #if expressions\n" 504 " -Mext modify in place and keep backups\n" 505 " -m modify input files in place\n" 506 " -n add #line directives to output\n" 507 " -opath output file name\n" 508 " -S list #if control symbols with nesting\n" 509 " -s list #if control symbols\n" 510 " -t ignore C strings and comments\n" 511 " -V print version\n" 512 " -x{012} exit status mode\n" 513 ); 514 exit(0); 515 } 516 517 /* 518 * A state transition function alters the global #if processing state 519 * in a particular way. The table below is indexed by the current 520 * processing state and the type of the current line. 521 * 522 * Nesting is handled by keeping a stack of states; some transition 523 * functions increase or decrease the depth. They also maintain the 524 * ignore state on a stack. In some complicated cases they have to 525 * alter the preprocessor directive, as follows. 526 * 527 * When we have processed a group that starts off with a known-false 528 * #if/#elif sequence (which has therefore been deleted) followed by a 529 * #elif that we don't understand and therefore must keep, we edit the 530 * latter into a #if to keep the nesting correct. We use memcpy() to 531 * overwrite the 4 byte token "elif" with "if " without a '\0' byte. 532 * 533 * When we find a true #elif in a group, the following block will 534 * always be kept and the rest of the sequence after the next #elif or 535 * #else will be discarded. We edit the #elif into a #else and the 536 * following directive to #endif since this has the desired behaviour. 537 * 538 * "Dodgy" directives are split across multiple lines, the most common 539 * example being a multi-line comment hanging off the right of the 540 * directive. We can handle them correctly only if there is no change 541 * from printing to dropping (or vice versa) caused by that directive. 542 * If the directive is the first of a group we have a choice between 543 * failing with an error, or passing it through unchanged instead of 544 * evaluating it. The latter is not the default to avoid questions from 545 * users about unifdef unexpectedly leaving behind preprocessor directives. 546 */ 547 typedef void state_fn(void); 548 549 /* report an error */ 550 static void Eelif (void) { error("Inappropriate #elif"); } 551 static void Eelse (void) { error("Inappropriate #else"); } 552 static void Eendif(void) { error("Inappropriate #endif"); } 553 static void Eeof (void) { error("Premature EOF"); } 554 static void Eioccc(void) { error("Obfuscated preprocessor control line"); } 555 /* plain line handling */ 556 static void print (void) { flushline(true); } 557 static void drop (void) { flushline(false); } 558 /* output lacks group's start line */ 559 static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); } 560 static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); } 561 static void Selse (void) { drop(); state(IS_TRUE_ELSE); } 562 /* print/pass this block */ 563 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); } 564 static void Pelse (void) { print(); state(IS_PASS_ELSE); } 565 static void Pendif(void) { print(); unnest(); } 566 /* discard this block */ 567 static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); } 568 static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); } 569 static void Delse (void) { drop(); state(IS_FALSE_ELSE); } 570 static void Dendif(void) { drop(); unnest(); } 571 /* first line of group */ 572 static void Fdrop (void) { nest(); Dfalse(); } 573 static void Fpass (void) { nest(); Pelif(); } 574 static void Ftrue (void) { nest(); Strue(); } 575 static void Ffalse(void) { nest(); Sfalse(); } 576 /* variable pedantry for obfuscated lines */ 577 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); } 578 static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); } 579 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); } 580 /* ignore comments in this block */ 581 static void Idrop (void) { Fdrop(); ignoreon(); } 582 static void Itrue (void) { Ftrue(); ignoreon(); } 583 static void Ifalse(void) { Ffalse(); ignoreon(); } 584 /* modify this line */ 585 static void Mpass (void) { memcpy(keyword, "if ", 4); Pelif(); } 586 static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); } 587 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); } 588 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); } 589 590 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 591 /* IS_OUTSIDE */ 592 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 593 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 594 print, done, abort }, 595 /* IS_FALSE_PREFIX */ 596 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 597 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 598 drop, Eeof, abort }, 599 /* IS_TRUE_PREFIX */ 600 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 601 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 602 print, Eeof, abort }, 603 /* IS_PASS_MIDDLE */ 604 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 605 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 606 print, Eeof, abort }, 607 /* IS_FALSE_MIDDLE */ 608 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 609 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 610 drop, Eeof, abort }, 611 /* IS_TRUE_MIDDLE */ 612 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 613 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 614 print, Eeof, abort }, 615 /* IS_PASS_ELSE */ 616 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 617 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 618 print, Eeof, abort }, 619 /* IS_FALSE_ELSE */ 620 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 621 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 622 drop, Eeof, abort }, 623 /* IS_TRUE_ELSE */ 624 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 625 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 626 print, Eeof, abort }, 627 /* IS_FALSE_TRAILER */ 628 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 629 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 630 drop, Eeof, abort } 631 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 632 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 633 PLAIN EOF ERROR */ 634 }; 635 636 /* 637 * State machine utility functions 638 */ 639 static void 640 ignoreoff(void) 641 { 642 if (depth == 0) 643 abort(); /* bug */ 644 ignoring[depth] = ignoring[depth-1]; 645 } 646 static void 647 ignoreon(void) 648 { 649 ignoring[depth] = true; 650 } 651 static void 652 keywordedit(const char *replacement) 653 { 654 snprintf(keyword, tline + sizeof(tline) - keyword, 655 "%s%s", replacement, newline); 656 print(); 657 } 658 static void 659 nest(void) 660 { 661 if (depth > MAXDEPTH-1) 662 abort(); /* bug */ 663 if (depth == MAXDEPTH-1) 664 error("Too many levels of nesting"); 665 depth += 1; 666 stifline[depth] = linenum; 667 } 668 static void 669 unnest(void) 670 { 671 if (depth == 0) 672 abort(); /* bug */ 673 depth -= 1; 674 } 675 static void 676 state(Ifstate is) 677 { 678 ifstate[depth] = is; 679 } 680 681 /* 682 * The last state transition function. When this is called, 683 * lineval == LT_EOF, so the process() loop will terminate. 684 */ 685 static void 686 done(void) 687 { 688 if (incomment) 689 error("EOF in comment"); 690 closeio(); 691 } 692 693 /* 694 * Write a line to the output or not, according to command line options. 695 * If writing fails, closeio() will print the error and exit. 696 */ 697 static void 698 flushline(bool keep) 699 { 700 if (symlist) 701 return; 702 if (keep ^ complement) { 703 bool blankline = tline[strspn(tline, " \t\r\n")] == '\0'; 704 if (blankline && compblank && blankcount != blankmax) { 705 delcount += 1; 706 blankcount += 1; 707 } else { 708 if (lnnum && delcount > 0) 709 hashline(); 710 if (fputs(tline, output) == EOF) 711 closeio(); 712 delcount = 0; 713 blankmax = blankcount = blankline ? blankcount + 1 : 0; 714 } 715 } else { 716 if (lnblank && fputs(newline, output) == EOF) 717 closeio(); 718 exitstat = 1; 719 delcount += 1; 720 blankcount = 0; 721 } 722 if (debugging && fflush(output) == EOF) 723 closeio(); 724 } 725 726 /* 727 * Format of #line directives depends on whether we know the input filename. 728 */ 729 static void 730 hashline(void) 731 { 732 int e; 733 734 if (linefile == NULL) 735 e = fprintf(output, "#line %d%s", linenum, newline); 736 else 737 e = fprintf(output, "#line %d \"%s\"%s", 738 linenum, linefile, newline); 739 if (e < 0) 740 closeio(); 741 } 742 743 /* 744 * Flush the output and handle errors. 745 */ 746 static void 747 closeio(void) 748 { 749 /* Tidy up after findsym(). */ 750 if (symdepth && !zerosyms) 751 printf("\n"); 752 if (output != NULL && (ferror(output) || fclose(output) == EOF)) 753 err(2, "%s: can't write to output", filename); 754 fclose(input); 755 } 756 757 /* 758 * The driver for the state machine. 759 */ 760 static void 761 process(void) 762 { 763 Linetype lineval = LT_PLAIN; 764 /* When compressing blank lines, act as if the file 765 is preceded by a large number of blank lines. */ 766 blankmax = blankcount = 1000; 767 zerosyms = true; 768 newline = NULL; 769 linenum = 0; 770 while (lineval != LT_EOF) { 771 lineval = parseline(); 772 trans_table[ifstate[depth]][lineval](); 773 debug("process line %d %s -> %s depth %d", 774 linenum, linetype_name[lineval], 775 ifstate_name[ifstate[depth]], depth); 776 } 777 } 778 779 /* 780 * Parse a line and determine its type. We keep the preprocessor line 781 * parser state between calls in the global variable linestate, with 782 * help from skipcomment(). 783 */ 784 static Linetype 785 parseline(void) 786 { 787 const char *cp; 788 int cursym; 789 Linetype retval; 790 Comment_state wascomment; 791 792 wascomment = incomment; 793 cp = skiphash(); 794 if (cp == NULL) 795 return (LT_EOF); 796 if (newline == NULL) { 797 if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1) 798 newline = newline_crlf; 799 else 800 newline = newline_unix; 801 } 802 if (*cp == '\0') { 803 retval = LT_PLAIN; 804 goto done; 805 } 806 keyword = tline + (cp - tline); 807 if ((cp = matchsym("ifdef", keyword)) != NULL || 808 (cp = matchsym("ifndef", keyword)) != NULL) { 809 cp = skipcomment(cp); 810 if ((cursym = findsym(&cp)) < 0) 811 retval = LT_IF; 812 else { 813 retval = (keyword[2] == 'n') 814 ? LT_FALSE : LT_TRUE; 815 if (value[cursym] == NULL) 816 retval = (retval == LT_TRUE) 817 ? LT_FALSE : LT_TRUE; 818 if (ignore[cursym]) 819 retval = (retval == LT_TRUE) 820 ? LT_TRUEI : LT_FALSEI; 821 } 822 } else if ((cp = matchsym("if", keyword)) != NULL) 823 retval = ifeval(&cp); 824 else if ((cp = matchsym("elif", keyword)) != NULL) 825 retval = linetype_if2elif(ifeval(&cp)); 826 else if ((cp = matchsym("else", keyword)) != NULL) 827 retval = LT_ELSE; 828 else if ((cp = matchsym("endif", keyword)) != NULL) 829 retval = LT_ENDIF; 830 else { 831 cp = skipsym(keyword); 832 /* no way can we deal with a continuation inside a keyword */ 833 if (strncmp(cp, "\\\r\n", 3) == 0 || 834 strncmp(cp, "\\\n", 2) == 0) 835 Eioccc(); 836 cp = skipline(cp); 837 retval = LT_PLAIN; 838 goto done; 839 } 840 cp = skipcomment(cp); 841 if (*cp != '\0') { 842 cp = skipline(cp); 843 if (retval == LT_TRUE || retval == LT_FALSE || 844 retval == LT_TRUEI || retval == LT_FALSEI) 845 retval = LT_IF; 846 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 847 retval = LT_ELIF; 848 } 849 /* the following can happen if the last line of the file lacks a 850 newline or if there is too much whitespace in a directive */ 851 if (linestate == LS_HASH) { 852 long len = cp - tline; 853 if (fgets(tline + len, MAXLINE - len, input) == NULL) { 854 if (ferror(input)) 855 err(2, "can't read %s", filename); 856 /* append the missing newline at eof */ 857 strlcpy(tline + len, newline, sizeof(tline) - len); 858 cp += strlen(newline); 859 linestate = LS_START; 860 } else { 861 linestate = LS_DIRTY; 862 } 863 } 864 if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) { 865 retval = linetype_2dodgy(retval); 866 linestate = LS_DIRTY; 867 } 868 done: 869 debug("parser line %d state %s comment %s line", linenum, 870 comment_name[incomment], linestate_name[linestate]); 871 return (retval); 872 } 873 874 /* 875 * These are the binary operators that are supported by the expression 876 * evaluator. 877 */ 878 static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) { 879 if(at == LT_IF || bt == LT_IF) return (LT_IF); 880 return (*p = v, v ? LT_TRUE : LT_FALSE); 881 } 882 static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) { 883 return op_strict(p, a < b, at, bt); 884 } 885 static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) { 886 return op_strict(p, a > b, at, bt); 887 } 888 static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) { 889 return op_strict(p, a <= b, at, bt); 890 } 891 static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) { 892 return op_strict(p, a >= b, at, bt); 893 } 894 static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) { 895 return op_strict(p, a == b, at, bt); 896 } 897 static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) { 898 return op_strict(p, a != b, at, bt); 899 } 900 static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) { 901 if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE)) 902 return (*p = 1, LT_TRUE); 903 return op_strict(p, a || b, at, bt); 904 } 905 static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) { 906 if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE)) 907 return (*p = 0, LT_FALSE); 908 return op_strict(p, a && b, at, bt); 909 } 910 911 /* 912 * An evaluation function takes three arguments, as follows: (1) a pointer to 913 * an element of the precedence table which lists the operators at the current 914 * level of precedence; (2) a pointer to an integer which will receive the 915 * value of the expression; and (3) a pointer to a char* that points to the 916 * expression to be evaluated and that is updated to the end of the expression 917 * when evaluation is complete. The function returns LT_FALSE if the value of 918 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression 919 * depends on an unknown symbol, or LT_ERROR if there is a parse failure. 920 */ 921 struct ops; 922 923 typedef Linetype eval_fn(const struct ops *, long *, const char **); 924 925 static eval_fn eval_table, eval_unary; 926 927 /* 928 * The precedence table. Expressions involving binary operators are evaluated 929 * in a table-driven way by eval_table. When it evaluates a subexpression it 930 * calls the inner function with its first argument pointing to the next 931 * element of the table. Innermost expressions have special non-table-driven 932 * handling. 933 */ 934 struct op { 935 const char *str; 936 Linetype (*fn)(long *, Linetype, long, Linetype, long); 937 }; 938 struct ops { 939 eval_fn *inner; 940 struct op op[5]; 941 }; 942 static const struct ops eval_ops[] = { 943 { eval_table, { { "||", op_or } } }, 944 { eval_table, { { "&&", op_and } } }, 945 { eval_table, { { "==", op_eq }, 946 { "!=", op_ne } } }, 947 { eval_unary, { { "<=", op_le }, 948 { ">=", op_ge }, 949 { "<", op_lt }, 950 { ">", op_gt } } } 951 }; 952 953 /* Current operator precedence level */ 954 static long prec(const struct ops *ops) 955 { 956 return (ops - eval_ops); 957 } 958 959 /* 960 * Function for evaluating the innermost parts of expressions, 961 * viz. !expr (expr) number defined(symbol) symbol 962 * We reset the constexpr flag in the last two cases. 963 */ 964 static Linetype 965 eval_unary(const struct ops *ops, long *valp, const char **cpp) 966 { 967 const char *cp; 968 char *ep; 969 int sym; 970 bool defparen; 971 Linetype lt; 972 973 cp = skipcomment(*cpp); 974 if (*cp == '!') { 975 debug("eval%d !", prec(ops)); 976 cp++; 977 lt = eval_unary(ops, valp, &cp); 978 if (lt == LT_ERROR) 979 return (LT_ERROR); 980 if (lt != LT_IF) { 981 *valp = !*valp; 982 lt = *valp ? LT_TRUE : LT_FALSE; 983 } 984 } else if (*cp == '(') { 985 cp++; 986 debug("eval%d (", prec(ops)); 987 lt = eval_table(eval_ops, valp, &cp); 988 if (lt == LT_ERROR) 989 return (LT_ERROR); 990 cp = skipcomment(cp); 991 if (*cp++ != ')') 992 return (LT_ERROR); 993 } else if (isdigit((unsigned char)*cp)) { 994 debug("eval%d number", prec(ops)); 995 *valp = strtol(cp, &ep, 0); 996 if (ep == cp) 997 return (LT_ERROR); 998 lt = *valp ? LT_TRUE : LT_FALSE; 999 cp = ep; 1000 } else if (matchsym("defined", cp) != NULL) { 1001 cp = skipcomment(cp+7); 1002 if (*cp == '(') { 1003 cp = skipcomment(cp+1); 1004 defparen = true; 1005 } else { 1006 defparen = false; 1007 } 1008 sym = findsym(&cp); 1009 cp = skipcomment(cp); 1010 if (defparen && *cp++ != ')') { 1011 debug("eval%d defined missing ')'", prec(ops)); 1012 return (LT_ERROR); 1013 } 1014 if (sym < 0) { 1015 debug("eval%d defined unknown", prec(ops)); 1016 lt = LT_IF; 1017 } else { 1018 debug("eval%d defined %s", prec(ops), symname[sym]); 1019 *valp = (value[sym] != NULL); 1020 lt = *valp ? LT_TRUE : LT_FALSE; 1021 } 1022 constexpr = false; 1023 } else if (!endsym(*cp)) { 1024 debug("eval%d symbol", prec(ops)); 1025 sym = findsym(&cp); 1026 if (sym < 0) { 1027 lt = LT_IF; 1028 cp = skipargs(cp); 1029 } else if (value[sym] == NULL) { 1030 *valp = 0; 1031 lt = LT_FALSE; 1032 } else { 1033 *valp = strtol(value[sym], &ep, 0); 1034 if (*ep != '\0' || ep == value[sym]) 1035 return (LT_ERROR); 1036 lt = *valp ? LT_TRUE : LT_FALSE; 1037 cp = skipargs(cp); 1038 } 1039 constexpr = false; 1040 } else { 1041 debug("eval%d bad expr", prec(ops)); 1042 return (LT_ERROR); 1043 } 1044 1045 *cpp = cp; 1046 debug("eval%d = %d", prec(ops), *valp); 1047 return (lt); 1048 } 1049 1050 /* 1051 * Table-driven evaluation of binary operators. 1052 */ 1053 static Linetype 1054 eval_table(const struct ops *ops, long *valp, const char **cpp) 1055 { 1056 const struct op *op; 1057 const char *cp; 1058 long val; 1059 Linetype lt, rt; 1060 1061 debug("eval%d", prec(ops)); 1062 cp = *cpp; 1063 lt = ops->inner(ops+1, valp, &cp); 1064 if (lt == LT_ERROR) 1065 return (LT_ERROR); 1066 for (;;) { 1067 cp = skipcomment(cp); 1068 for (op = ops->op; op->str != NULL; op++) 1069 if (strncmp(cp, op->str, strlen(op->str)) == 0) 1070 break; 1071 if (op->str == NULL) 1072 break; 1073 cp += strlen(op->str); 1074 debug("eval%d %s", prec(ops), op->str); 1075 rt = ops->inner(ops+1, &val, &cp); 1076 if (rt == LT_ERROR) 1077 return (LT_ERROR); 1078 lt = op->fn(valp, lt, *valp, rt, val); 1079 } 1080 1081 *cpp = cp; 1082 debug("eval%d = %d", prec(ops), *valp); 1083 debug("eval%d lt = %s", prec(ops), linetype_name[lt]); 1084 return (lt); 1085 } 1086 1087 /* 1088 * Evaluate the expression on a #if or #elif line. If we can work out 1089 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 1090 * return just a generic LT_IF. 1091 */ 1092 static Linetype 1093 ifeval(const char **cpp) 1094 { 1095 Linetype ret; 1096 long val = 0; 1097 1098 debug("eval %s", *cpp); 1099 constexpr = killconsts ? false : true; 1100 ret = eval_table(eval_ops, &val, cpp); 1101 debug("eval = %d", val); 1102 return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret); 1103 } 1104 1105 /* 1106 * Read a line and examine its initial part to determine if it is a 1107 * preprocessor directive. Returns NULL on EOF, or a pointer to a 1108 * preprocessor directive name, or a pointer to the zero byte at the 1109 * end of the line. 1110 */ 1111 static const char * 1112 skiphash(void) 1113 { 1114 const char *cp; 1115 1116 linenum++; 1117 if (fgets(tline, MAXLINE, input) == NULL) { 1118 if (ferror(input)) 1119 err(2, "can't read %s", filename); 1120 else 1121 return (NULL); 1122 } 1123 cp = skipcomment(tline); 1124 if (linestate == LS_START && *cp == '#') { 1125 linestate = LS_HASH; 1126 return (skipcomment(cp + 1)); 1127 } else if (*cp == '\0') { 1128 return (cp); 1129 } else { 1130 return (skipline(cp)); 1131 } 1132 } 1133 1134 /* 1135 * Mark a line dirty and consume the rest of it, keeping track of the 1136 * lexical state. 1137 */ 1138 static const char * 1139 skipline(const char *cp) 1140 { 1141 const char *pcp; 1142 if (*cp != '\0') 1143 linestate = LS_DIRTY; 1144 while (*cp != '\0') { 1145 cp = skipcomment(pcp = cp); 1146 if (pcp == cp) 1147 cp++; 1148 } 1149 return (cp); 1150 } 1151 1152 /* 1153 * Skip over comments, strings, and character literals and stop at the 1154 * next character position that is not whitespace. Between calls we keep 1155 * the comment state in the global variable incomment, and we also adjust 1156 * the global variable linestate when we see a newline. 1157 * XXX: doesn't cope with the buffer splitting inside a state transition. 1158 */ 1159 static const char * 1160 skipcomment(const char *cp) 1161 { 1162 if (text || ignoring[depth]) { 1163 for (; isspace((unsigned char)*cp); cp++) 1164 if (*cp == '\n') 1165 linestate = LS_START; 1166 return (cp); 1167 } 1168 while (*cp != '\0') 1169 /* don't reset to LS_START after a line continuation */ 1170 if (strncmp(cp, "\\\r\n", 3) == 0) 1171 cp += 3; 1172 else if (strncmp(cp, "\\\n", 2) == 0) 1173 cp += 2; 1174 else switch (incomment) { 1175 case NO_COMMENT: 1176 if (strncmp(cp, "/\\\r\n", 4) == 0) { 1177 incomment = STARTING_COMMENT; 1178 cp += 4; 1179 } else if (strncmp(cp, "/\\\n", 3) == 0) { 1180 incomment = STARTING_COMMENT; 1181 cp += 3; 1182 } else if (strncmp(cp, "/*", 2) == 0) { 1183 incomment = C_COMMENT; 1184 cp += 2; 1185 } else if (strncmp(cp, "//", 2) == 0) { 1186 incomment = CXX_COMMENT; 1187 cp += 2; 1188 } else if (strncmp(cp, "\'", 1) == 0) { 1189 incomment = CHAR_LITERAL; 1190 linestate = LS_DIRTY; 1191 cp += 1; 1192 } else if (strncmp(cp, "\"", 1) == 0) { 1193 incomment = STRING_LITERAL; 1194 linestate = LS_DIRTY; 1195 cp += 1; 1196 } else if (strncmp(cp, "\n", 1) == 0) { 1197 linestate = LS_START; 1198 cp += 1; 1199 } else if (strchr(" \r\t", *cp) != NULL) { 1200 cp += 1; 1201 } else 1202 return (cp); 1203 continue; 1204 case CXX_COMMENT: 1205 if (strncmp(cp, "\n", 1) == 0) { 1206 incomment = NO_COMMENT; 1207 linestate = LS_START; 1208 } 1209 cp += 1; 1210 continue; 1211 case CHAR_LITERAL: 1212 case STRING_LITERAL: 1213 if ((incomment == CHAR_LITERAL && cp[0] == '\'') || 1214 (incomment == STRING_LITERAL && cp[0] == '\"')) { 1215 incomment = NO_COMMENT; 1216 cp += 1; 1217 } else if (cp[0] == '\\') { 1218 if (cp[1] == '\0') 1219 cp += 1; 1220 else 1221 cp += 2; 1222 } else if (strncmp(cp, "\n", 1) == 0) { 1223 if (incomment == CHAR_LITERAL) 1224 error("Unterminated char literal"); 1225 else 1226 error("Unterminated string literal"); 1227 } else 1228 cp += 1; 1229 continue; 1230 case C_COMMENT: 1231 if (strncmp(cp, "*\\\r\n", 4) == 0) { 1232 incomment = FINISHING_COMMENT; 1233 cp += 4; 1234 } else if (strncmp(cp, "*\\\n", 3) == 0) { 1235 incomment = FINISHING_COMMENT; 1236 cp += 3; 1237 } else if (strncmp(cp, "*/", 2) == 0) { 1238 incomment = NO_COMMENT; 1239 cp += 2; 1240 } else 1241 cp += 1; 1242 continue; 1243 case STARTING_COMMENT: 1244 if (*cp == '*') { 1245 incomment = C_COMMENT; 1246 cp += 1; 1247 } else if (*cp == '/') { 1248 incomment = CXX_COMMENT; 1249 cp += 1; 1250 } else { 1251 incomment = NO_COMMENT; 1252 linestate = LS_DIRTY; 1253 } 1254 continue; 1255 case FINISHING_COMMENT: 1256 if (*cp == '/') { 1257 incomment = NO_COMMENT; 1258 cp += 1; 1259 } else 1260 incomment = C_COMMENT; 1261 continue; 1262 default: 1263 abort(); /* bug */ 1264 } 1265 return (cp); 1266 } 1267 1268 /* 1269 * Skip macro arguments. 1270 */ 1271 static const char * 1272 skipargs(const char *cp) 1273 { 1274 const char *ocp = cp; 1275 int level = 0; 1276 cp = skipcomment(cp); 1277 if (*cp != '(') 1278 return (cp); 1279 do { 1280 if (*cp == '(') 1281 level++; 1282 if (*cp == ')') 1283 level--; 1284 cp = skipcomment(cp+1); 1285 } while (level != 0 && *cp != '\0'); 1286 if (level == 0) 1287 return (cp); 1288 else 1289 /* Rewind and re-detect the syntax error later. */ 1290 return (ocp); 1291 } 1292 1293 /* 1294 * Skip over an identifier. 1295 */ 1296 static const char * 1297 skipsym(const char *cp) 1298 { 1299 while (!endsym(*cp)) 1300 ++cp; 1301 return (cp); 1302 } 1303 1304 /* 1305 * Skip whitespace and take a copy of any following identifier. 1306 */ 1307 static const char * 1308 getsym(const char **cpp) 1309 { 1310 const char *cp = *cpp, *sym; 1311 1312 cp = skipcomment(cp); 1313 cp = skipsym(sym = cp); 1314 if (cp == sym) 1315 return NULL; 1316 *cpp = cp; 1317 return (xstrdup(sym, cp)); 1318 } 1319 1320 /* 1321 * Check that s (a symbol) matches the start of t, and that the 1322 * following character in t is not a symbol character. Returns a 1323 * pointer to the following character in t if there is a match, 1324 * otherwise NULL. 1325 */ 1326 static const char * 1327 matchsym(const char *s, const char *t) 1328 { 1329 while (*s != '\0' && *t != '\0') 1330 if (*s != *t) 1331 return (NULL); 1332 else 1333 ++s, ++t; 1334 if (*s == '\0' && endsym(*t)) 1335 return(t); 1336 else 1337 return(NULL); 1338 } 1339 1340 /* 1341 * Look for the symbol in the symbol table. If it is found, we return 1342 * the symbol table index, else we return -1. 1343 */ 1344 static int 1345 findsym(const char **strp) 1346 { 1347 const char *str; 1348 int symind; 1349 1350 str = *strp; 1351 *strp = skipsym(str); 1352 if (symlist) { 1353 if (*strp == str) 1354 return (-1); 1355 if (symdepth && firstsym) 1356 printf("%s%3d", zerosyms ? "" : "\n", depth); 1357 firstsym = zerosyms = false; 1358 printf("%s%.*s%s", 1359 symdepth ? " " : "", 1360 (int)(*strp-str), str, 1361 symdepth ? "" : "\n"); 1362 /* we don't care about the value of the symbol */ 1363 return (0); 1364 } 1365 for (symind = 0; symind < nsyms; ++symind) { 1366 if (matchsym(symname[symind], str) != NULL) { 1367 debugsym("findsym", symind); 1368 return (symind); 1369 } 1370 } 1371 return (-1); 1372 } 1373 1374 /* 1375 * Resolve indirect symbol values to their final definitions. 1376 */ 1377 static void 1378 indirectsym(void) 1379 { 1380 const char *cp; 1381 int changed, sym, ind; 1382 1383 do { 1384 changed = 0; 1385 for (sym = 0; sym < nsyms; ++sym) { 1386 if (value[sym] == NULL) 1387 continue; 1388 cp = value[sym]; 1389 ind = findsym(&cp); 1390 if (ind == -1 || ind == sym || 1391 *cp != '\0' || 1392 value[ind] == NULL || 1393 value[ind] == value[sym]) 1394 continue; 1395 debugsym("indir...", sym); 1396 value[sym] = value[ind]; 1397 debugsym("...ectsym", sym); 1398 changed++; 1399 } 1400 } while (changed); 1401 } 1402 1403 /* 1404 * Add a symbol to the symbol table, specified with the format sym=val 1405 */ 1406 static void 1407 addsym1(bool ignorethis, bool definethis, char *symval) 1408 { 1409 const char *sym, *val; 1410 1411 sym = symval; 1412 val = skipsym(sym); 1413 if (definethis && *val == '=') { 1414 symval[val - sym] = '\0'; 1415 val = val + 1; 1416 } else if (*val == '\0') { 1417 val = definethis ? "1" : NULL; 1418 } else { 1419 usage(); 1420 } 1421 addsym2(ignorethis, sym, val); 1422 } 1423 1424 /* 1425 * Add a symbol to the symbol table. 1426 */ 1427 static void 1428 addsym2(bool ignorethis, const char *sym, const char *val) 1429 { 1430 const char *cp = sym; 1431 int symind; 1432 1433 symind = findsym(&cp); 1434 if (symind < 0) { 1435 if (nsyms >= MAXSYMS) 1436 errx(2, "too many symbols"); 1437 symind = nsyms++; 1438 } 1439 ignore[symind] = ignorethis; 1440 symname[symind] = sym; 1441 value[symind] = val; 1442 debugsym("addsym", symind); 1443 } 1444 1445 static void 1446 debugsym(const char *why, int symind) 1447 { 1448 debug("%s %s%c%s", why, symname[symind], 1449 value[symind] ? '=' : ' ', 1450 value[symind] ? value[symind] : "undef"); 1451 } 1452 1453 /* 1454 * Add symbols to the symbol table from a file containing 1455 * #define and #undef preprocessor directives. 1456 */ 1457 static void 1458 defundefile(const char *fn) 1459 { 1460 filename = fn; 1461 input = fopen(fn, "rb"); 1462 if (input == NULL) 1463 err(2, "can't open %s", fn); 1464 linenum = 0; 1465 while (defundef()) 1466 ; 1467 if (ferror(input)) 1468 err(2, "can't read %s", filename); 1469 else 1470 fclose(input); 1471 if (incomment) 1472 error("EOF in comment"); 1473 } 1474 1475 /* 1476 * Read and process one #define or #undef directive 1477 */ 1478 static bool 1479 defundef(void) 1480 { 1481 const char *cp, *kw, *sym, *val, *end; 1482 1483 cp = skiphash(); 1484 if (cp == NULL) 1485 return (false); 1486 if (*cp == '\0') 1487 goto done; 1488 /* strip trailing whitespace, and do a fairly rough check to 1489 avoid unsupported multi-line preprocessor directives */ 1490 end = cp + strlen(cp); 1491 while (end > tline && strchr(" \t\n\r", end[-1]) != NULL) 1492 --end; 1493 if (end > tline && end[-1] == '\\') 1494 Eioccc(); 1495 1496 kw = cp; 1497 if ((cp = matchsym("define", kw)) != NULL) { 1498 sym = getsym(&cp); 1499 if (sym == NULL) 1500 error("Missing macro name in #define"); 1501 if (*cp == '(') { 1502 val = "1"; 1503 } else { 1504 cp = skipcomment(cp); 1505 val = (cp < end) ? xstrdup(cp, end) : ""; 1506 } 1507 debug("#define"); 1508 addsym2(false, sym, val); 1509 } else if ((cp = matchsym("undef", kw)) != NULL) { 1510 sym = getsym(&cp); 1511 if (sym == NULL) 1512 error("Missing macro name in #undef"); 1513 cp = skipcomment(cp); 1514 debug("#undef"); 1515 addsym2(false, sym, NULL); 1516 } else { 1517 error("Unrecognized preprocessor directive"); 1518 } 1519 skipline(cp); 1520 done: 1521 debug("parser line %d state %s comment %s line", linenum, 1522 comment_name[incomment], linestate_name[linestate]); 1523 return (true); 1524 } 1525 1526 /* 1527 * Concatenate two strings into new memory, checking for failure. 1528 */ 1529 static char * 1530 astrcat(const char *s1, const char *s2) 1531 { 1532 char *s; 1533 1534 if (asprintf(&s, "%s%s", s1, s2) == -1) 1535 err(2, "asprintf"); 1536 return (s); 1537 } 1538 1539 /* 1540 * Duplicate a segment of a string, checking for failure. 1541 */ 1542 static const char * 1543 xstrdup(const char *start, const char *end) 1544 { 1545 size_t n; 1546 char *s; 1547 1548 if (end < start) abort(); /* bug */ 1549 n = (size_t)(end - start) + 1; 1550 s = (char *)malloc(n); 1551 if (s == NULL) 1552 err(2, "malloc"); 1553 snprintf(s, n, "%s", start); 1554 return (s); 1555 } 1556 1557 /* 1558 * Diagnostics. 1559 */ 1560 static void 1561 debug(const char *msg, ...) 1562 { 1563 va_list ap; 1564 1565 if (debugging) { 1566 va_start(ap, msg); 1567 vwarnx(msg, ap); 1568 va_end(ap); 1569 } 1570 } 1571 1572 static void 1573 error(const char *msg) 1574 { 1575 if (depth == 0) 1576 warnx("%s: %d: %s", filename, linenum, msg); 1577 else 1578 warnx("%s: %d: %s (#if line %d depth %d)", 1579 filename, linenum, msg, stifline[depth], depth); 1580 closeio(); 1581 errx(2, "Output may be truncated"); 1582 } 1583 1584 static FILE * 1585 mktempmode(char *tmp, int mode) 1586 { 1587 int fd = mkstemp(tmp); 1588 if (fd == -1) 1589 return (NULL); 1590 fchmod(fd, mode & (S_IRWXU|S_IRWXG|S_IRWXO)); 1591 return (fdopen(fd, "wb")); 1592 } 1593