1 /* $OpenBSD: unifdef.c,v 1.15 2012/03/04 04:05:15 fgsch Exp $ */ 2 /* 3 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at> 4 * Copyright (c) 1985, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Dave Yost. Support for #if and #elif was added by Tony Finch. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * unifdef - remove ifdef'ed lines 37 * 38 * Wishlist: 39 * provide an option which will append the name of the 40 * appropriate symbol after #else's and #endif's 41 * provide an option which will check symbols after 42 * #else's and #endif's to see that they match their 43 * corresponding #ifdef or #ifndef 44 * generate #line directives in place of deleted code 45 * 46 * The first two items above require better buffer handling, which would 47 * also make it possible to handle all "dodgy" directives correctly. 48 */ 49 50 #include <ctype.h> 51 #include <err.h> 52 #include <stdarg.h> 53 #include <stdbool.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <unistd.h> 58 59 /* types of input lines: */ 60 typedef enum { 61 LT_TRUEI, /* a true #if with ignore flag */ 62 LT_FALSEI, /* a false #if with ignore flag */ 63 LT_IF, /* an unknown #if */ 64 LT_TRUE, /* a true #if */ 65 LT_FALSE, /* a false #if */ 66 LT_ELIF, /* an unknown #elif */ 67 LT_ELTRUE, /* a true #elif */ 68 LT_ELFALSE, /* a false #elif */ 69 LT_ELSE, /* #else */ 70 LT_ENDIF, /* #endif */ 71 LT_DODGY, /* flag: directive is not on one line */ 72 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 73 LT_PLAIN, /* ordinary line */ 74 LT_EOF, /* end of file */ 75 LT_COUNT 76 } Linetype; 77 78 static char const * const linetype_name[] = { 79 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 80 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 81 "DODGY TRUEI", "DODGY FALSEI", 82 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 83 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 84 "DODGY ELSE", "DODGY ENDIF", 85 "PLAIN", "EOF" 86 }; 87 88 /* state of #if processing */ 89 typedef enum { 90 IS_OUTSIDE, 91 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 92 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 93 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 94 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 95 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 96 IS_PASS_ELSE, /* an else after a pass state */ 97 IS_FALSE_ELSE, /* an else after a true state */ 98 IS_TRUE_ELSE, /* an else after only false states */ 99 IS_FALSE_TRAILER, /* #elifs after a true are false */ 100 IS_COUNT 101 } Ifstate; 102 103 static char const * const ifstate_name[] = { 104 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 105 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 106 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 107 "FALSE_TRAILER" 108 }; 109 110 /* state of comment parser */ 111 typedef enum { 112 NO_COMMENT = false, /* outside a comment */ 113 C_COMMENT, /* in a comment like this one */ 114 CXX_COMMENT, /* between // and end of line */ 115 STARTING_COMMENT, /* just after slash-backslash-newline */ 116 FINISHING_COMMENT /* star-backslash-newline in a C comment */ 117 } Comment_state; 118 119 static char const * const comment_name[] = { 120 "NO", "C", "CXX", "STARTING", "FINISHING" 121 }; 122 123 /* state of preprocessor line parser */ 124 typedef enum { 125 LS_START, /* only space and comments on this line */ 126 LS_HASH, /* only space, comments, and a hash */ 127 LS_DIRTY /* this line can't be a preprocessor line */ 128 } Line_state; 129 130 static char const * const linestate_name[] = { 131 "START", "HASH", "DIRTY" 132 }; 133 134 /* 135 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 136 */ 137 #define MAXDEPTH 64 /* maximum #if nesting */ 138 #define MAXLINE 4096 /* maximum length of line */ 139 #define MAXSYMS 4096 /* maximum number of symbols */ 140 141 /* 142 * Sometimes when editing a keyword the replacement text is longer, so 143 * we leave some space at the end of the tline buffer to accommodate this. 144 */ 145 #define EDITSLOP 10 146 147 /* 148 * Globals. 149 */ 150 151 static bool complement; /* -c: do the complement */ 152 static bool debugging; /* -d: debugging reports */ 153 static bool iocccok; /* -e: fewer IOCCC errors */ 154 static bool killconsts; /* -k: eval constant #ifs */ 155 static bool lnblank; /* -l: blank deleted lines */ 156 static bool symlist; /* -s: output symbol list */ 157 static bool text; /* -t: this is a text file */ 158 159 static const char *symname[MAXSYMS]; /* symbol name */ 160 static const char *value[MAXSYMS]; /* -Dsym=value */ 161 static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 162 static int nsyms; /* number of symbols */ 163 164 static FILE *input; /* input file pointer */ 165 static const char *filename; /* input file name */ 166 static int linenum; /* current line number */ 167 168 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 169 static char *keyword; /* used for editing #elif's */ 170 171 static Comment_state incomment; /* comment parser state */ 172 static Line_state linestate; /* #if line parser state */ 173 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 174 static bool ignoring[MAXDEPTH]; /* ignore comments state */ 175 static int stifline[MAXDEPTH]; /* start of current #if */ 176 static int depth; /* current #if nesting */ 177 static bool keepthis; /* don't delete constant #if */ 178 179 static int exitstat; /* program exit status */ 180 181 static void addsym(bool, bool, char *); 182 static void debug(const char *, ...); 183 static void error(const char *); 184 static int findsym(const char *); 185 static void flushline(bool); 186 static Linetype get_line(void); 187 static Linetype ifeval(const char **); 188 static void ignoreoff(void); 189 static void ignoreon(void); 190 static void keywordedit(const char *); 191 static void nest(void); 192 static void process(void); 193 static const char *skipcomment(const char *); 194 static const char *skipsym(const char *); 195 static void state(Ifstate); 196 static int strlcmp(const char *, const char *, size_t); 197 static void usage(void); 198 199 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') 200 201 /* 202 * The main program. 203 */ 204 int 205 main(int argc, char *argv[]) 206 { 207 int opt; 208 209 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1) 210 switch (opt) { 211 case 'i': /* treat stuff controlled by these symbols as text */ 212 /* 213 * For strict backwards-compatibility the U or D 214 * should be immediately after the -i but it doesn't 215 * matter much if we relax that requirement. 216 */ 217 opt = *optarg++; 218 if (opt == 'D') 219 addsym(true, true, optarg); 220 else if (opt == 'U') 221 addsym(true, false, optarg); 222 else 223 usage(); 224 break; 225 case 'D': /* define a symbol */ 226 addsym(false, true, optarg); 227 break; 228 case 'U': /* undef a symbol */ 229 addsym(false, false, optarg); 230 break; 231 case 'I': 232 /* no-op for compatibility with cpp */ 233 break; 234 case 'c': /* treat -D as -U and vice versa */ 235 complement = true; 236 break; 237 case 'd': 238 debugging = true; 239 break; 240 case 'e': /* fewer errors from dodgy lines */ 241 iocccok = true; 242 break; 243 case 'k': /* process constant #ifs */ 244 killconsts = true; 245 break; 246 case 'l': /* blank deleted lines instead of omitting them */ 247 lnblank = true; 248 break; 249 case 's': /* only output list of symbols that control #ifs */ 250 symlist = true; 251 break; 252 case 't': /* don't parse C comments */ 253 text = true; 254 break; 255 default: 256 usage(); 257 } 258 argc -= optind; 259 argv += optind; 260 if (nsyms == 0 && !symlist) { 261 warnx("must -D or -U at least one symbol"); 262 usage(); 263 } 264 if (argc > 1) { 265 errx(2, "can only do one file"); 266 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 267 filename = *argv; 268 if ((input = fopen(filename, "r")) != NULL) { 269 process(); 270 (void) fclose(input); 271 } else 272 err(2, "can't open %s", *argv); 273 } else { 274 filename = "[stdin]"; 275 input = stdin; 276 process(); 277 } 278 279 exit(exitstat); 280 } 281 282 static void 283 usage(void) 284 { 285 fprintf(stderr, 286 "usage: unifdef [-ceklst] [-Dsym[=val]] [-Ipath] [-iDsym[=val]] " 287 "[-iUsym] [-Usym]\n" 288 "\t[file]\n"); 289 exit(2); 290 } 291 292 /* 293 * A state transition function alters the global #if processing state 294 * in a particular way. The table below is indexed by the current 295 * processing state and the type of the current line. A NULL entry 296 * indicates that processing is complete. 297 * 298 * Nesting is handled by keeping a stack of states; some transition 299 * functions increase or decrease the depth. They also maintain the 300 * ignore state on a stack. In some complicated cases they have to 301 * alter the preprocessor directive, as follows. 302 * 303 * When we have processed a group that starts off with a known-false 304 * #if/#elif sequence (which has therefore been deleted) followed by a 305 * #elif that we don't understand and therefore must keep, we edit the 306 * latter into a #if to keep the nesting correct. 307 * 308 * When we find a true #elif in a group, the following block will 309 * always be kept and the rest of the sequence after the next #elif or 310 * #else will be discarded. We edit the #elif into a #else and the 311 * following directive to #endif since this has the desired behaviour. 312 * 313 * "Dodgy" directives are split across multiple lines, the most common 314 * example being a multi-line comment hanging off the right of the 315 * directive. We can handle them correctly only if there is no change 316 * from printing to dropping (or vice versa) caused by that directive. 317 * If the directive is the first of a group we have a choice between 318 * failing with an error, or passing it through unchanged instead of 319 * evaluating it. The latter is not the default to avoid questions from 320 * users about unifdef unexpectedly leaving behind preprocessor directives. 321 */ 322 typedef void state_fn(void); 323 324 /* report an error */ 325 static void 326 Eelif(void) 327 { 328 error("Inappropriate #elif"); 329 } 330 331 static void 332 Eelse(void) 333 { 334 error("Inappropriate #else"); 335 } 336 337 static void 338 Eendif(void) 339 { 340 error("Inappropriate #endif"); 341 } 342 343 static void 344 Eeof(void) 345 { 346 error("Premature EOF"); 347 } 348 349 static void 350 Eioccc(void) 351 { 352 error("Obfuscated preprocessor control line"); 353 } 354 355 /* plain line handling */ 356 static void 357 print(void) 358 { 359 flushline(true); 360 } 361 362 static void 363 drop(void) 364 { 365 flushline(false); 366 } 367 368 /* output lacks group's start line */ 369 static void 370 Strue(void) 371 { 372 drop(); 373 ignoreoff(); 374 state(IS_TRUE_PREFIX); 375 } 376 377 static void 378 Sfalse(void) 379 { 380 drop(); 381 ignoreoff(); 382 state(IS_FALSE_PREFIX); 383 } 384 385 static void 386 Selse(void) 387 { 388 drop(); 389 state(IS_TRUE_ELSE); 390 } 391 392 /* print/pass this block */ 393 static void 394 Pelif(void) 395 { 396 print(); 397 ignoreoff(); 398 state(IS_PASS_MIDDLE); 399 } 400 401 static void 402 Pelse(void) 403 { 404 print(); 405 state(IS_PASS_ELSE); 406 } 407 408 static void 409 Pendif(void) 410 { 411 print(); 412 --depth; 413 } 414 415 /* discard this block */ 416 static void 417 Dfalse(void) 418 { 419 drop(); 420 ignoreoff(); 421 state(IS_FALSE_TRAILER); 422 } 423 424 static void 425 Delif(void) 426 { 427 drop(); 428 ignoreoff(); 429 state(IS_FALSE_MIDDLE); 430 } 431 432 static void 433 Delse(void) 434 { 435 drop(); 436 state(IS_FALSE_ELSE); 437 } 438 439 static void 440 Dendif(void) 441 { 442 drop(); 443 --depth; 444 } 445 446 /* first line of group */ 447 static void 448 Fdrop(void) 449 { 450 nest(); 451 Dfalse(); 452 } 453 454 static void 455 Fpass(void) 456 { 457 nest(); 458 Pelif(); 459 } 460 461 static void 462 Ftrue(void) 463 { 464 nest(); 465 Strue(); 466 } 467 468 static void 469 Ffalse(void) 470 { 471 nest(); 472 Sfalse(); 473 } 474 475 /* variable pedantry for obfuscated lines */ 476 static void 477 Oiffy(void) 478 { 479 if (iocccok) 480 Fpass(); 481 else 482 Eioccc(); 483 ignoreon(); 484 } 485 486 static void 487 Oif(void) 488 { 489 if (iocccok) 490 Fpass(); 491 else 492 Eioccc(); 493 } 494 495 static void 496 Oelif(void) 497 { 498 if (iocccok) 499 Pelif(); 500 else 501 Eioccc(); 502 } 503 504 /* ignore comments in this block */ 505 static void 506 Idrop(void) 507 { 508 Fdrop(); 509 ignoreon(); 510 } 511 512 static void 513 Itrue(void) 514 { 515 Ftrue(); 516 ignoreon(); 517 } 518 519 static void 520 Ifalse(void) 521 { 522 Ffalse(); 523 ignoreon(); 524 } 525 526 /* edit this line */ 527 static void 528 Mpass (void) 529 { 530 strncpy(keyword, "if ", 4); 531 Pelif(); 532 } 533 534 static void 535 Mtrue (void) 536 { 537 keywordedit("else\n"); 538 state(IS_TRUE_MIDDLE); 539 } 540 541 static void 542 Melif (void) 543 { 544 keywordedit("endif\n"); 545 state(IS_FALSE_TRAILER); 546 } 547 548 static void 549 Melse (void) 550 { 551 keywordedit("endif\n"); 552 state(IS_FALSE_ELSE); 553 } 554 555 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 556 /* IS_OUTSIDE */ 557 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 558 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 559 print, NULL }, 560 /* IS_FALSE_PREFIX */ 561 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 562 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 563 drop, Eeof }, 564 /* IS_TRUE_PREFIX */ 565 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 566 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 567 print, Eeof }, 568 /* IS_PASS_MIDDLE */ 569 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 570 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 571 print, Eeof }, 572 /* IS_FALSE_MIDDLE */ 573 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 574 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 575 drop, Eeof }, 576 /* IS_TRUE_MIDDLE */ 577 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 578 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 579 print, Eeof }, 580 /* IS_PASS_ELSE */ 581 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 582 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 583 print, Eeof }, 584 /* IS_FALSE_ELSE */ 585 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 586 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 587 drop, Eeof }, 588 /* IS_TRUE_ELSE */ 589 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 590 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 591 print, Eeof }, 592 /* IS_FALSE_TRAILER */ 593 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 594 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 595 drop, Eeof } 596 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 597 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 598 PLAIN EOF */ 599 }; 600 601 /* 602 * State machine utility functions 603 */ 604 static void 605 ignoreoff(void) 606 { 607 ignoring[depth] = ignoring[depth-1]; 608 } 609 610 static void 611 ignoreon(void) 612 { 613 ignoring[depth] = true; 614 } 615 616 static void 617 keywordedit(const char *replacement) 618 { 619 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); 620 print(); 621 } 622 623 static void 624 nest(void) 625 { 626 depth += 1; 627 if (depth >= MAXDEPTH) 628 error("Too many levels of nesting"); 629 stifline[depth] = linenum; 630 } 631 632 static void 633 state(Ifstate is) 634 { 635 ifstate[depth] = is; 636 } 637 638 /* 639 * Write a line to the output or not, according to command line options. 640 */ 641 static void 642 flushline(bool keep) 643 { 644 if (symlist) 645 return; 646 if (keep ^ complement) 647 fputs(tline, stdout); 648 else { 649 if (lnblank) 650 putc('\n', stdout); 651 exitstat = 1; 652 } 653 } 654 655 /* 656 * The driver for the state machine. 657 */ 658 static void 659 process(void) 660 { 661 Linetype lineval; 662 state_fn *trans; 663 664 for (;;) { 665 linenum++; 666 lineval = get_line(); 667 trans = trans_table[ifstate[depth]][lineval]; 668 if (trans == NULL) 669 break; 670 trans(); 671 debug("process %s -> %s depth %d", 672 linetype_name[lineval], 673 ifstate_name[ifstate[depth]], depth); 674 } 675 if (incomment) 676 error("EOF in comment"); 677 } 678 679 /* 680 * Parse a line and determine its type. We keep the preprocessor line 681 * parser state between calls in a global variable. 682 */ 683 static Linetype 684 get_line(void) 685 { 686 const char *cp; 687 int cursym; 688 int kwlen; 689 Linetype retval; 690 Comment_state wascomment; 691 692 if (fgets(tline, MAXLINE, input) == NULL) 693 return (LT_EOF); 694 retval = LT_PLAIN; 695 wascomment = incomment; 696 cp = skipcomment(tline); 697 if (linestate == LS_START) { 698 if (*cp == '#') { 699 linestate = LS_HASH; 700 cp = skipcomment(cp + 1); 701 } else if (*cp != '\0') 702 linestate = LS_DIRTY; 703 } 704 if (!incomment && linestate == LS_HASH) { 705 keyword = tline + (cp - tline); 706 cp = skipsym(cp); 707 kwlen = cp - keyword; 708 /* no way can we deal with a continuation inside a keyword */ 709 if (strncmp(cp, "\\\n", 2) == 0) 710 Eioccc(); 711 if (strlcmp("ifdef", keyword, kwlen) == 0 || 712 strlcmp("ifndef", keyword, kwlen) == 0) { 713 cp = skipcomment(cp); 714 if ((cursym = findsym(cp)) < 0) 715 retval = LT_IF; 716 else { 717 retval = (keyword[2] == 'n') 718 ? LT_FALSE : LT_TRUE; 719 if (value[cursym] == NULL) 720 retval = (retval == LT_TRUE) 721 ? LT_FALSE : LT_TRUE; 722 if (ignore[cursym]) 723 retval = (retval == LT_TRUE) 724 ? LT_TRUEI : LT_FALSEI; 725 } 726 cp = skipsym(cp); 727 } else if (strlcmp("if", keyword, kwlen) == 0) 728 retval = ifeval(&cp); 729 else if (strlcmp("elif", keyword, kwlen) == 0) 730 retval = ifeval(&cp) - LT_IF + LT_ELIF; 731 else if (strlcmp("else", keyword, kwlen) == 0) 732 retval = LT_ELSE; 733 else if (strlcmp("endif", keyword, kwlen) == 0) 734 retval = LT_ENDIF; 735 else { 736 linestate = LS_DIRTY; 737 retval = LT_PLAIN; 738 } 739 cp = skipcomment(cp); 740 if (*cp != '\0') { 741 linestate = LS_DIRTY; 742 if (retval == LT_TRUE || retval == LT_FALSE || 743 retval == LT_TRUEI || retval == LT_FALSEI) 744 retval = LT_IF; 745 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 746 retval = LT_ELIF; 747 } 748 if (retval != LT_PLAIN && (wascomment || incomment)) { 749 retval += LT_DODGY; 750 if (incomment) 751 linestate = LS_DIRTY; 752 } 753 /* skipcomment should have changed the state */ 754 if (linestate == LS_HASH) 755 abort(); /* bug */ 756 } 757 if (linestate == LS_DIRTY) { 758 while (*cp != '\0') 759 cp = skipcomment(cp + 1); 760 } 761 debug("parser %s comment %s line", 762 comment_name[incomment], linestate_name[linestate]); 763 return (retval); 764 } 765 766 /* 767 * These are the operators that are supported by the expression 768 * evaluator. Note that if support for division is added then we also 769 * need short-circuiting booleans because of divide-by-zero. 770 */ 771 static int 772 op_lt(int a, int b) 773 { 774 return (a < b); 775 } 776 777 static int 778 op_gt(int a, int b) 779 { 780 return (a > b); 781 } 782 783 static int 784 op_le(int a, int b) 785 { 786 return (a <= b); 787 } 788 789 static int 790 op_ge(int a, int b) 791 { 792 return (a >= b); 793 } 794 795 static int 796 op_eq(int a, int b) 797 { 798 return (a == b); 799 } 800 801 static int 802 op_ne(int a, int b) 803 { 804 return (a != b); 805 } 806 807 static int 808 op_or(int a, int b) 809 { 810 return (a || b); 811 } 812 813 static int 814 op_and(int a, int b) 815 { 816 return (a && b); 817 } 818 819 /* 820 * An evaluation function takes three arguments, as follows: (1) a pointer to 821 * an element of the precedence table which lists the operators at the current 822 * level of precedence; (2) a pointer to an integer which will receive the 823 * value of the expression; and (3) a pointer to a char* that points to the 824 * expression to be evaluated and that is updated to the end of the expression 825 * when evaluation is complete. The function returns LT_FALSE if the value of 826 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the 827 * expression could not be evaluated. 828 */ 829 struct ops; 830 831 typedef Linetype eval_fn(const struct ops *, int *, const char **); 832 833 static eval_fn eval_table, eval_unary; 834 835 /* 836 * The precedence table. Expressions involving binary operators are evaluated 837 * in a table-driven way by eval_table. When it evaluates a subexpression it 838 * calls the inner function with its first argument pointing to the next 839 * element of the table. Innermost expressions have special non-table-driven 840 * handling. 841 */ 842 static const struct ops { 843 eval_fn *inner; 844 struct op { 845 const char *str; 846 int (*fn)(int, int); 847 } op[5]; 848 } eval_ops[] = { 849 { eval_table, { { "||", op_or } } }, 850 { eval_table, { { "&&", op_and } } }, 851 { eval_table, { { "==", op_eq }, 852 { "!=", op_ne } } }, 853 { eval_unary, { { "<=", op_le }, 854 { ">=", op_ge }, 855 { "<", op_lt }, 856 { ">", op_gt } } } 857 }; 858 859 /* 860 * Function for evaluating the innermost parts of expressions, 861 * viz. !expr (expr) defined(symbol) symbol number 862 * We reset the keepthis flag when we find a non-constant subexpression. 863 */ 864 static Linetype 865 eval_unary(const struct ops *ops, int *valp, const char **cpp) 866 { 867 const char *cp; 868 char *ep; 869 int sym; 870 871 cp = skipcomment(*cpp); 872 if (*cp == '!') { 873 debug("eval%d !", ops - eval_ops); 874 cp++; 875 if (eval_unary(ops, valp, &cp) == LT_IF) 876 return (LT_IF); 877 *valp = !*valp; 878 } else if (*cp == '(') { 879 cp++; 880 debug("eval%d (", ops - eval_ops); 881 if (eval_table(eval_ops, valp, &cp) == LT_IF) 882 return (LT_IF); 883 cp = skipcomment(cp); 884 if (*cp++ != ')') 885 return (LT_IF); 886 } else if (isdigit((unsigned char)*cp)) { 887 debug("eval%d number", ops - eval_ops); 888 *valp = strtol(cp, &ep, 0); 889 cp = skipsym(cp); 890 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 891 cp = skipcomment(cp+7); 892 debug("eval%d defined", ops - eval_ops); 893 if (*cp++ != '(') 894 return (LT_IF); 895 cp = skipcomment(cp); 896 sym = findsym(cp); 897 if (sym < 0 && !symlist) 898 return (LT_IF); 899 *valp = (value[sym] != NULL); 900 cp = skipsym(cp); 901 cp = skipcomment(cp); 902 if (*cp++ != ')') 903 return (LT_IF); 904 keepthis = false; 905 } else if (!endsym(*cp)) { 906 debug("eval%d symbol", ops - eval_ops); 907 sym = findsym(cp); 908 if (sym < 0 && !symlist) 909 return (LT_IF); 910 if (value[sym] == NULL) 911 *valp = 0; 912 else { 913 *valp = strtol(value[sym], &ep, 0); 914 if (*ep != '\0' || ep == value[sym]) 915 return (LT_IF); 916 } 917 cp = skipsym(cp); 918 keepthis = false; 919 } else 920 return (LT_IF); 921 922 *cpp = cp; 923 debug("eval%d = %d", ops - eval_ops, *valp); 924 return (*valp ? LT_TRUE : LT_FALSE); 925 } 926 927 /* 928 * Table-driven evaluation of binary operators. 929 */ 930 static Linetype 931 eval_table(const struct ops *ops, int *valp, const char **cpp) 932 { 933 const struct op *op; 934 const char *cp; 935 int val; 936 937 debug("eval%d", ops - eval_ops); 938 cp = *cpp; 939 if (ops->inner(ops+1, valp, &cp) == LT_IF) 940 return (LT_IF); 941 for (;;) { 942 cp = skipcomment(cp); 943 for (op = ops->op; op->str != NULL; op++) 944 if (strncmp(cp, op->str, strlen(op->str)) == 0) 945 break; 946 if (op->str == NULL) 947 break; 948 cp += strlen(op->str); 949 debug("eval%d %s", ops - eval_ops, op->str); 950 if (ops->inner(ops+1, &val, &cp) == LT_IF) 951 return (LT_IF); 952 *valp = op->fn(*valp, val); 953 } 954 955 *cpp = cp; 956 debug("eval%d = %d", ops - eval_ops, *valp); 957 return (*valp ? LT_TRUE : LT_FALSE); 958 } 959 960 /* 961 * Evaluate the expression on a #if or #elif line. If we can work out 962 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 963 * return just a generic LT_IF. 964 */ 965 static Linetype 966 ifeval(const char **cpp) 967 { 968 int ret; 969 int val; 970 971 debug("eval %s", *cpp); 972 keepthis = killconsts ? false : true; 973 ret = eval_table(eval_ops, &val, cpp); 974 return (keepthis ? LT_IF : ret); 975 } 976 977 /* 978 * Skip over comments and stop at the next character position that is 979 * not whitespace. Between calls we keep the comment state in the 980 * global variable incomment, and we also adjust the global variable 981 * linestate when we see a newline. 982 * XXX: doesn't cope with the buffer splitting inside a state transition. 983 */ 984 static const char * 985 skipcomment(const char *cp) 986 { 987 if (text || ignoring[depth]) { 988 for (; isspace((unsigned char)*cp); cp++) 989 if (*cp == '\n') 990 linestate = LS_START; 991 return (cp); 992 } 993 while (*cp != '\0') 994 if (strncmp(cp, "\\\n", 2) == 0) 995 cp += 2; 996 else switch (incomment) { 997 case NO_COMMENT: 998 if (strncmp(cp, "/\\\n", 3) == 0) { 999 incomment = STARTING_COMMENT; 1000 cp += 3; 1001 } else if (strncmp(cp, "/*", 2) == 0) { 1002 incomment = C_COMMENT; 1003 cp += 2; 1004 } else if (strncmp(cp, "//", 2) == 0) { 1005 incomment = CXX_COMMENT; 1006 cp += 2; 1007 } else if (strncmp(cp, "\n", 1) == 0) { 1008 linestate = LS_START; 1009 cp += 1; 1010 } else if (strchr(" \t", *cp) != NULL) { 1011 cp += 1; 1012 } else 1013 return (cp); 1014 continue; 1015 case CXX_COMMENT: 1016 if (strncmp(cp, "\n", 1) == 0) { 1017 incomment = NO_COMMENT; 1018 linestate = LS_START; 1019 } 1020 cp += 1; 1021 continue; 1022 case C_COMMENT: 1023 if (strncmp(cp, "*\\\n", 3) == 0) { 1024 incomment = FINISHING_COMMENT; 1025 cp += 3; 1026 } else if (strncmp(cp, "*/", 2) == 0) { 1027 incomment = NO_COMMENT; 1028 cp += 2; 1029 } else 1030 cp += 1; 1031 continue; 1032 case STARTING_COMMENT: 1033 if (*cp == '*') { 1034 incomment = C_COMMENT; 1035 cp += 1; 1036 } else if (*cp == '/') { 1037 incomment = CXX_COMMENT; 1038 cp += 1; 1039 } else { 1040 incomment = NO_COMMENT; 1041 linestate = LS_DIRTY; 1042 } 1043 continue; 1044 case FINISHING_COMMENT: 1045 if (*cp == '/') { 1046 incomment = NO_COMMENT; 1047 cp += 1; 1048 } else 1049 incomment = C_COMMENT; 1050 continue; 1051 default: 1052 /* bug */ 1053 abort(); 1054 } 1055 return (cp); 1056 } 1057 1058 /* 1059 * Skip over an identifier. 1060 */ 1061 static const char * 1062 skipsym(const char *cp) 1063 { 1064 while (!endsym(*cp)) 1065 ++cp; 1066 return (cp); 1067 } 1068 1069 /* 1070 * Look for the symbol in the symbol table. If it is found, we return 1071 * the symbol table index, else we return -1. 1072 */ 1073 static int 1074 findsym(const char *str) 1075 { 1076 const char *cp; 1077 int symind; 1078 1079 cp = skipsym(str); 1080 if (cp == str) 1081 return (-1); 1082 if (symlist) 1083 printf("%.*s\n", (int)(cp-str), str); 1084 for (symind = 0; symind < nsyms; ++symind) { 1085 if (strlcmp(symname[symind], str, cp-str) == 0) { 1086 debug("findsym %s %s", symname[symind], 1087 value[symind] ? value[symind] : ""); 1088 return (symind); 1089 } 1090 } 1091 return (-1); 1092 } 1093 1094 /* 1095 * Add a symbol to the symbol table. 1096 */ 1097 static void 1098 addsym(bool ignorethis, bool definethis, char *sym) 1099 { 1100 int symind; 1101 char *val; 1102 1103 symind = findsym(sym); 1104 if (symind < 0) { 1105 if (nsyms >= MAXSYMS) 1106 errx(2, "too many symbols"); 1107 symind = nsyms++; 1108 } 1109 symname[symind] = sym; 1110 ignore[symind] = ignorethis; 1111 val = sym + (skipsym(sym) - sym); 1112 if (definethis) { 1113 if (*val == '=') { 1114 value[symind] = val+1; 1115 *val = '\0'; 1116 } else if (*val == '\0') 1117 value[symind] = ""; 1118 else 1119 usage(); 1120 } else { 1121 if (*val != '\0') 1122 usage(); 1123 value[symind] = NULL; 1124 } 1125 } 1126 1127 /* 1128 * Compare s with n characters of t. 1129 * The same as strncmp() except that it checks that s[n] == '\0'. 1130 */ 1131 static int 1132 strlcmp(const char *s, const char *t, size_t n) 1133 { 1134 while (n-- && *t != '\0') 1135 if (*s != *t) 1136 return ((unsigned char)*s - (unsigned char)*t); 1137 else 1138 ++s, ++t; 1139 return ((unsigned char)*s); 1140 } 1141 1142 /* 1143 * Diagnostics. 1144 */ 1145 static void 1146 debug(const char *msg, ...) 1147 { 1148 va_list ap; 1149 1150 if (debugging) { 1151 va_start(ap, msg); 1152 vwarnx(msg, ap); 1153 va_end(ap); 1154 } 1155 } 1156 1157 static void 1158 error(const char *msg) 1159 { 1160 if (depth == 0) 1161 warnx("%s: %d: %s", filename, linenum, msg); 1162 else 1163 warnx("%s: %d: %s (#if line %d depth %d)", 1164 filename, linenum, msg, stifline[depth], depth); 1165 errx(2, "output may be truncated"); 1166 } 1167