1 /* $OpenBSD: unifdef.c,v 1.13 2007/06/25 15:57:28 jmc Exp $ */ 2 /* 3 * Copyright (c) 2002, 2003 Tony Finch <dot@dotat.at> 4 * Copyright (c) 1985, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Dave Yost. Support for #if and #elif was added by Tony Finch. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifndef lint 36 static const char copyright[] = 37 "@(#) Copyright (c) 1985, 1993\n\ 38 The Regents of the University of California. All rights reserved.\n"; 39 40 #if 0 41 static char sccsid[] = "@(#)unifdef.c 8.1 (Berkeley) 6/6/93"; 42 #endif 43 static const char rcsid[] = "$OpenBSD: unifdef.c,v 1.13 2007/06/25 15:57:28 jmc Exp $"; 44 #endif 45 46 /* 47 * unifdef - remove ifdef'ed lines 48 * 49 * Wishlist: 50 * provide an option which will append the name of the 51 * appropriate symbol after #else's and #endif's 52 * provide an option which will check symbols after 53 * #else's and #endif's to see that they match their 54 * corresponding #ifdef or #ifndef 55 * generate #line directives in place of deleted code 56 * 57 * The first two items above require better buffer handling, which would 58 * also make it possible to handle all "dodgy" directives correctly. 59 */ 60 61 #include <ctype.h> 62 #include <err.h> 63 #include <stdarg.h> 64 #include <stdbool.h> 65 #include <stdio.h> 66 #include <stdlib.h> 67 #include <string.h> 68 #include <unistd.h> 69 70 /* types of input lines: */ 71 typedef enum { 72 LT_TRUEI, /* a true #if with ignore flag */ 73 LT_FALSEI, /* a false #if with ignore flag */ 74 LT_IF, /* an unknown #if */ 75 LT_TRUE, /* a true #if */ 76 LT_FALSE, /* a false #if */ 77 LT_ELIF, /* an unknown #elif */ 78 LT_ELTRUE, /* a true #elif */ 79 LT_ELFALSE, /* a false #elif */ 80 LT_ELSE, /* #else */ 81 LT_ENDIF, /* #endif */ 82 LT_DODGY, /* flag: directive is not on one line */ 83 LT_DODGY_LAST = LT_DODGY + LT_ENDIF, 84 LT_PLAIN, /* ordinary line */ 85 LT_EOF, /* end of file */ 86 LT_COUNT 87 } Linetype; 88 89 static char const * const linetype_name[] = { 90 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE", 91 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF", 92 "DODGY TRUEI", "DODGY FALSEI", 93 "DODGY IF", "DODGY TRUE", "DODGY FALSE", 94 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE", 95 "DODGY ELSE", "DODGY ENDIF", 96 "PLAIN", "EOF" 97 }; 98 99 /* state of #if processing */ 100 typedef enum { 101 IS_OUTSIDE, 102 IS_FALSE_PREFIX, /* false #if followed by false #elifs */ 103 IS_TRUE_PREFIX, /* first non-false #(el)if is true */ 104 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */ 105 IS_FALSE_MIDDLE, /* a false #elif after a pass state */ 106 IS_TRUE_MIDDLE, /* a true #elif after a pass state */ 107 IS_PASS_ELSE, /* an else after a pass state */ 108 IS_FALSE_ELSE, /* an else after a true state */ 109 IS_TRUE_ELSE, /* an else after only false states */ 110 IS_FALSE_TRAILER, /* #elifs after a true are false */ 111 IS_COUNT 112 } Ifstate; 113 114 static char const * const ifstate_name[] = { 115 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX", 116 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE", 117 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE", 118 "FALSE_TRAILER" 119 }; 120 121 /* state of comment parser */ 122 typedef enum { 123 NO_COMMENT = false, /* outside a comment */ 124 C_COMMENT, /* in a comment like this one */ 125 CXX_COMMENT, /* between // and end of line */ 126 STARTING_COMMENT, /* just after slash-backslash-newline */ 127 FINISHING_COMMENT /* star-backslash-newline in a C comment */ 128 } Comment_state; 129 130 static char const * const comment_name[] = { 131 "NO", "C", "CXX", "STARTING", "FINISHING" 132 }; 133 134 /* state of preprocessor line parser */ 135 typedef enum { 136 LS_START, /* only space and comments on this line */ 137 LS_HASH, /* only space, comments, and a hash */ 138 LS_DIRTY /* this line can't be a preprocessor line */ 139 } Line_state; 140 141 static char const * const linestate_name[] = { 142 "START", "HASH", "DIRTY" 143 }; 144 145 /* 146 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1 147 */ 148 #define MAXDEPTH 64 /* maximum #if nesting */ 149 #define MAXLINE 4096 /* maximum length of line */ 150 #define MAXSYMS 4096 /* maximum number of symbols */ 151 152 /* 153 * Sometimes when editing a keyword the replacement text is longer, so 154 * we leave some space at the end of the tline buffer to accommodate this. 155 */ 156 #define EDITSLOP 10 157 158 /* 159 * Globals. 160 */ 161 162 static bool complement; /* -c: do the complement */ 163 static bool debugging; /* -d: debugging reports */ 164 static bool iocccok; /* -e: fewer IOCCC errors */ 165 static bool killconsts; /* -k: eval constant #ifs */ 166 static bool lnblank; /* -l: blank deleted lines */ 167 static bool symlist; /* -s: output symbol list */ 168 static bool text; /* -t: this is a text file */ 169 170 static const char *symname[MAXSYMS]; /* symbol name */ 171 static const char *value[MAXSYMS]; /* -Dsym=value */ 172 static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ 173 static int nsyms; /* number of symbols */ 174 175 static FILE *input; /* input file pointer */ 176 static const char *filename; /* input file name */ 177 static int linenum; /* current line number */ 178 179 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */ 180 static char *keyword; /* used for editing #elif's */ 181 182 static Comment_state incomment; /* comment parser state */ 183 static Line_state linestate; /* #if line parser state */ 184 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */ 185 static bool ignoring[MAXDEPTH]; /* ignore comments state */ 186 static int stifline[MAXDEPTH]; /* start of current #if */ 187 static int depth; /* current #if nesting */ 188 static bool keepthis; /* don't delete constant #if */ 189 190 static int exitstat; /* program exit status */ 191 192 static void addsym(bool, bool, char *); 193 static void debug(const char *, ...); 194 static void error(const char *); 195 static int findsym(const char *); 196 static void flushline(bool); 197 static Linetype getline(void); 198 static Linetype ifeval(const char **); 199 static void ignoreoff(void); 200 static void ignoreon(void); 201 static void keywordedit(const char *); 202 static void nest(void); 203 static void process(void); 204 static const char *skipcomment(const char *); 205 static const char *skipsym(const char *); 206 static void state(Ifstate); 207 static int strlcmp(const char *, const char *, size_t); 208 static void usage(void); 209 210 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') 211 212 /* 213 * The main program. 214 */ 215 int 216 main(int argc, char *argv[]) 217 { 218 int opt; 219 220 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1) 221 switch (opt) { 222 case 'i': /* treat stuff controlled by these symbols as text */ 223 /* 224 * For strict backwards-compatibility the U or D 225 * should be immediately after the -i but it doesn't 226 * matter much if we relax that requirement. 227 */ 228 opt = *optarg++; 229 if (opt == 'D') 230 addsym(true, true, optarg); 231 else if (opt == 'U') 232 addsym(true, false, optarg); 233 else 234 usage(); 235 break; 236 case 'D': /* define a symbol */ 237 addsym(false, true, optarg); 238 break; 239 case 'U': /* undef a symbol */ 240 addsym(false, false, optarg); 241 break; 242 case 'I': 243 /* no-op for compatibility with cpp */ 244 break; 245 case 'c': /* treat -D as -U and vice versa */ 246 complement = true; 247 break; 248 case 'd': 249 debugging = true; 250 break; 251 case 'e': /* fewer errors from dodgy lines */ 252 iocccok = true; 253 break; 254 case 'k': /* process constant #ifs */ 255 killconsts = true; 256 break; 257 case 'l': /* blank deleted lines instead of omitting them */ 258 lnblank = true; 259 break; 260 case 's': /* only output list of symbols that control #ifs */ 261 symlist = true; 262 break; 263 case 't': /* don't parse C comments */ 264 text = true; 265 break; 266 default: 267 usage(); 268 } 269 argc -= optind; 270 argv += optind; 271 if (nsyms == 0 && !symlist) { 272 warnx("must -D or -U at least one symbol"); 273 usage(); 274 } 275 if (argc > 1) { 276 errx(2, "can only do one file"); 277 } else if (argc == 1 && strcmp(*argv, "-") != 0) { 278 filename = *argv; 279 if ((input = fopen(filename, "r")) != NULL) { 280 process(); 281 (void) fclose(input); 282 } else 283 err(2, "can't open %s", *argv); 284 } else { 285 filename = "[stdin]"; 286 input = stdin; 287 process(); 288 } 289 290 exit(exitstat); 291 } 292 293 static void 294 usage(void) 295 { 296 fprintf(stderr, 297 "usage: unifdef [-ceklst] [-Dsym[=val]] [-Ipath] [-iDsym[=val]] " 298 "[-iUsym] [-Usym]\n" 299 "\t[file]\n"); 300 exit(2); 301 } 302 303 /* 304 * A state transition function alters the global #if processing state 305 * in a particular way. The table below is indexed by the current 306 * processing state and the type of the current line. A NULL entry 307 * indicates that processing is complete. 308 * 309 * Nesting is handled by keeping a stack of states; some transition 310 * functions increase or decrease the depth. They also maintain the 311 * ignore state on a stack. In some complicated cases they have to 312 * alter the preprocessor directive, as follows. 313 * 314 * When we have processed a group that starts off with a known-false 315 * #if/#elif sequence (which has therefore been deleted) followed by a 316 * #elif that we don't understand and therefore must keep, we edit the 317 * latter into a #if to keep the nesting correct. 318 * 319 * When we find a true #elif in a group, the following block will 320 * always be kept and the rest of the sequence after the next #elif or 321 * #else will be discarded. We edit the #elif into a #else and the 322 * following directive to #endif since this has the desired behaviour. 323 * 324 * "Dodgy" directives are split across multiple lines, the most common 325 * example being a multi-line comment hanging off the right of the 326 * directive. We can handle them correctly only if there is no change 327 * from printing to dropping (or vice versa) caused by that directive. 328 * If the directive is the first of a group we have a choice between 329 * failing with an error, or passing it through unchanged instead of 330 * evaluating it. The latter is not the default to avoid questions from 331 * users about unifdef unexpectedly leaving behind preprocessor directives. 332 */ 333 typedef void state_fn(void); 334 335 /* report an error */ 336 static void 337 Eelif(void) 338 { 339 error("Inappropriate #elif"); 340 } 341 342 static void 343 Eelse(void) 344 { 345 error("Inappropriate #else"); 346 } 347 348 static void 349 Eendif(void) 350 { 351 error("Inappropriate #endif"); 352 } 353 354 static void 355 Eeof(void) 356 { 357 error("Premature EOF"); 358 } 359 360 static void 361 Eioccc(void) 362 { 363 error("Obfuscated preprocessor control line"); 364 } 365 366 /* plain line handling */ 367 static void 368 print(void) 369 { 370 flushline(true); 371 } 372 373 static void 374 drop(void) 375 { 376 flushline(false); 377 } 378 379 /* output lacks group's start line */ 380 static void 381 Strue(void) 382 { 383 drop(); 384 ignoreoff(); 385 state(IS_TRUE_PREFIX); 386 } 387 388 static void 389 Sfalse(void) 390 { 391 drop(); 392 ignoreoff(); 393 state(IS_FALSE_PREFIX); 394 } 395 396 static void 397 Selse(void) 398 { 399 drop(); 400 state(IS_TRUE_ELSE); 401 } 402 403 /* print/pass this block */ 404 static void 405 Pelif(void) 406 { 407 print(); 408 ignoreoff(); 409 state(IS_PASS_MIDDLE); 410 } 411 412 static void 413 Pelse(void) 414 { 415 print(); 416 state(IS_PASS_ELSE); 417 } 418 419 static void 420 Pendif(void) 421 { 422 print(); 423 --depth; 424 } 425 426 /* discard this block */ 427 static void 428 Dfalse(void) 429 { 430 drop(); 431 ignoreoff(); 432 state(IS_FALSE_TRAILER); 433 } 434 435 static void 436 Delif(void) 437 { 438 drop(); 439 ignoreoff(); 440 state(IS_FALSE_MIDDLE); 441 } 442 443 static void 444 Delse(void) 445 { 446 drop(); 447 state(IS_FALSE_ELSE); 448 } 449 450 static void 451 Dendif(void) 452 { 453 drop(); 454 --depth; 455 } 456 457 /* first line of group */ 458 static void 459 Fdrop(void) 460 { 461 nest(); 462 Dfalse(); 463 } 464 465 static void 466 Fpass(void) 467 { 468 nest(); 469 Pelif(); 470 } 471 472 static void 473 Ftrue(void) 474 { 475 nest(); 476 Strue(); 477 } 478 479 static void 480 Ffalse(void) 481 { 482 nest(); 483 Sfalse(); 484 } 485 486 /* variable pedantry for obfuscated lines */ 487 static void 488 Oiffy(void) 489 { 490 if (iocccok) 491 Fpass(); 492 else 493 Eioccc(); 494 ignoreon(); 495 } 496 497 static void 498 Oif(void) 499 { 500 if (iocccok) 501 Fpass(); 502 else 503 Eioccc(); 504 } 505 506 static void 507 Oelif(void) 508 { 509 if (iocccok) 510 Pelif(); 511 else 512 Eioccc(); 513 } 514 515 /* ignore comments in this block */ 516 static void 517 Idrop(void) 518 { 519 Fdrop(); 520 ignoreon(); 521 } 522 523 static void 524 Itrue(void) 525 { 526 Ftrue(); 527 ignoreon(); 528 } 529 530 static void 531 Ifalse(void) 532 { 533 Ffalse(); 534 ignoreon(); 535 } 536 537 /* edit this line */ 538 static void 539 Mpass (void) 540 { 541 strncpy(keyword, "if ", 4); 542 Pelif(); 543 } 544 545 static void 546 Mtrue (void) 547 { 548 keywordedit("else\n"); 549 state(IS_TRUE_MIDDLE); 550 } 551 552 static void 553 Melif (void) 554 { 555 keywordedit("endif\n"); 556 state(IS_FALSE_TRAILER); 557 } 558 559 static void 560 Melse (void) 561 { 562 keywordedit("endif\n"); 563 state(IS_FALSE_ELSE); 564 } 565 566 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = { 567 /* IS_OUTSIDE */ 568 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif, 569 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif, 570 print, NULL }, 571 /* IS_FALSE_PREFIX */ 572 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif, 573 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, 574 drop, Eeof }, 575 /* IS_TRUE_PREFIX */ 576 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif, 577 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 578 print, Eeof }, 579 /* IS_PASS_MIDDLE */ 580 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif, 581 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif, 582 print, Eeof }, 583 /* IS_FALSE_MIDDLE */ 584 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif, 585 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, 586 drop, Eeof }, 587 /* IS_TRUE_MIDDLE */ 588 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif, 589 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif, 590 print, Eeof }, 591 /* IS_PASS_ELSE */ 592 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif, 593 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif, 594 print, Eeof }, 595 /* IS_FALSE_ELSE */ 596 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif, 597 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, 598 drop, Eeof }, 599 /* IS_TRUE_ELSE */ 600 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif, 601 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc, 602 print, Eeof }, 603 /* IS_FALSE_TRAILER */ 604 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif, 605 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, 606 drop, Eeof } 607 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF 608 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY) 609 PLAIN EOF */ 610 }; 611 612 /* 613 * State machine utility functions 614 */ 615 static void 616 ignoreoff(void) 617 { 618 ignoring[depth] = ignoring[depth-1]; 619 } 620 621 static void 622 ignoreon(void) 623 { 624 ignoring[depth] = true; 625 } 626 627 static void 628 keywordedit(const char *replacement) 629 { 630 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword); 631 print(); 632 } 633 634 static void 635 nest(void) 636 { 637 depth += 1; 638 if (depth >= MAXDEPTH) 639 error("Too many levels of nesting"); 640 stifline[depth] = linenum; 641 } 642 643 static void 644 state(Ifstate is) 645 { 646 ifstate[depth] = is; 647 } 648 649 /* 650 * Write a line to the output or not, according to command line options. 651 */ 652 static void 653 flushline(bool keep) 654 { 655 if (symlist) 656 return; 657 if (keep ^ complement) 658 fputs(tline, stdout); 659 else { 660 if (lnblank) 661 putc('\n', stdout); 662 exitstat = 1; 663 } 664 } 665 666 /* 667 * The driver for the state machine. 668 */ 669 static void 670 process(void) 671 { 672 Linetype lineval; 673 state_fn *trans; 674 675 for (;;) { 676 linenum++; 677 lineval = getline(); 678 trans = trans_table[ifstate[depth]][lineval]; 679 if (trans == NULL) 680 break; 681 trans(); 682 debug("process %s -> %s depth %d", 683 linetype_name[lineval], 684 ifstate_name[ifstate[depth]], depth); 685 } 686 if (incomment) 687 error("EOF in comment"); 688 } 689 690 /* 691 * Parse a line and determine its type. We keep the preprocessor line 692 * parser state between calls in a global variable. 693 */ 694 static Linetype 695 getline(void) 696 { 697 const char *cp; 698 int cursym; 699 int kwlen; 700 Linetype retval; 701 Comment_state wascomment; 702 703 if (fgets(tline, MAXLINE, input) == NULL) 704 return (LT_EOF); 705 retval = LT_PLAIN; 706 wascomment = incomment; 707 cp = skipcomment(tline); 708 if (linestate == LS_START) { 709 if (*cp == '#') { 710 linestate = LS_HASH; 711 cp = skipcomment(cp + 1); 712 } else if (*cp != '\0') 713 linestate = LS_DIRTY; 714 } 715 if (!incomment && linestate == LS_HASH) { 716 keyword = tline + (cp - tline); 717 cp = skipsym(cp); 718 kwlen = cp - keyword; 719 /* no way can we deal with a continuation inside a keyword */ 720 if (strncmp(cp, "\\\n", 2) == 0) 721 Eioccc(); 722 if (strlcmp("ifdef", keyword, kwlen) == 0 || 723 strlcmp("ifndef", keyword, kwlen) == 0) { 724 cp = skipcomment(cp); 725 if ((cursym = findsym(cp)) < 0) 726 retval = LT_IF; 727 else { 728 retval = (keyword[2] == 'n') 729 ? LT_FALSE : LT_TRUE; 730 if (value[cursym] == NULL) 731 retval = (retval == LT_TRUE) 732 ? LT_FALSE : LT_TRUE; 733 if (ignore[cursym]) 734 retval = (retval == LT_TRUE) 735 ? LT_TRUEI : LT_FALSEI; 736 } 737 cp = skipsym(cp); 738 } else if (strlcmp("if", keyword, kwlen) == 0) 739 retval = ifeval(&cp); 740 else if (strlcmp("elif", keyword, kwlen) == 0) 741 retval = ifeval(&cp) - LT_IF + LT_ELIF; 742 else if (strlcmp("else", keyword, kwlen) == 0) 743 retval = LT_ELSE; 744 else if (strlcmp("endif", keyword, kwlen) == 0) 745 retval = LT_ENDIF; 746 else { 747 linestate = LS_DIRTY; 748 retval = LT_PLAIN; 749 } 750 cp = skipcomment(cp); 751 if (*cp != '\0') { 752 linestate = LS_DIRTY; 753 if (retval == LT_TRUE || retval == LT_FALSE || 754 retval == LT_TRUEI || retval == LT_FALSEI) 755 retval = LT_IF; 756 if (retval == LT_ELTRUE || retval == LT_ELFALSE) 757 retval = LT_ELIF; 758 } 759 if (retval != LT_PLAIN && (wascomment || incomment)) { 760 retval += LT_DODGY; 761 if (incomment) 762 linestate = LS_DIRTY; 763 } 764 /* skipcomment should have changed the state */ 765 if (linestate == LS_HASH) 766 abort(); /* bug */ 767 } 768 if (linestate == LS_DIRTY) { 769 while (*cp != '\0') 770 cp = skipcomment(cp + 1); 771 } 772 debug("parser %s comment %s line", 773 comment_name[incomment], linestate_name[linestate]); 774 return (retval); 775 } 776 777 /* 778 * These are the operators that are supported by the expression 779 * evaluator. Note that if support for division is added then we also 780 * need short-circuiting booleans because of divide-by-zero. 781 */ 782 static int 783 op_lt(int a, int b) 784 { 785 return (a < b); 786 } 787 788 static int 789 op_gt(int a, int b) 790 { 791 return (a > b); 792 } 793 794 static int 795 op_le(int a, int b) 796 { 797 return (a <= b); 798 } 799 800 static int 801 op_ge(int a, int b) 802 { 803 return (a >= b); 804 } 805 806 static int 807 op_eq(int a, int b) 808 { 809 return (a == b); 810 } 811 812 static int 813 op_ne(int a, int b) 814 { 815 return (a != b); 816 } 817 818 static int 819 op_or(int a, int b) 820 { 821 return (a || b); 822 } 823 824 static int 825 op_and(int a, int b) 826 { 827 return (a && b); 828 } 829 830 /* 831 * An evaluation function takes three arguments, as follows: (1) a pointer to 832 * an element of the precedence table which lists the operators at the current 833 * level of precedence; (2) a pointer to an integer which will receive the 834 * value of the expression; and (3) a pointer to a char* that points to the 835 * expression to be evaluated and that is updated to the end of the expression 836 * when evaluation is complete. The function returns LT_FALSE if the value of 837 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the 838 * expression could not be evaluated. 839 */ 840 struct ops; 841 842 typedef Linetype eval_fn(const struct ops *, int *, const char **); 843 844 static eval_fn eval_table, eval_unary; 845 846 /* 847 * The precedence table. Expressions involving binary operators are evaluated 848 * in a table-driven way by eval_table. When it evaluates a subexpression it 849 * calls the inner function with its first argument pointing to the next 850 * element of the table. Innermost expressions have special non-table-driven 851 * handling. 852 */ 853 static const struct ops { 854 eval_fn *inner; 855 struct op { 856 const char *str; 857 int (*fn)(int, int); 858 } op[5]; 859 } eval_ops[] = { 860 { eval_table, { { "||", op_or } } }, 861 { eval_table, { { "&&", op_and } } }, 862 { eval_table, { { "==", op_eq }, 863 { "!=", op_ne } } }, 864 { eval_unary, { { "<=", op_le }, 865 { ">=", op_ge }, 866 { "<", op_lt }, 867 { ">", op_gt } } } 868 }; 869 870 /* 871 * Function for evaluating the innermost parts of expressions, 872 * viz. !expr (expr) defined(symbol) symbol number 873 * We reset the keepthis flag when we find a non-constant subexpression. 874 */ 875 static Linetype 876 eval_unary(const struct ops *ops, int *valp, const char **cpp) 877 { 878 const char *cp; 879 char *ep; 880 int sym; 881 882 cp = skipcomment(*cpp); 883 if (*cp == '!') { 884 debug("eval%d !", ops - eval_ops); 885 cp++; 886 if (eval_unary(ops, valp, &cp) == LT_IF) 887 return (LT_IF); 888 *valp = !*valp; 889 } else if (*cp == '(') { 890 cp++; 891 debug("eval%d (", ops - eval_ops); 892 if (eval_table(eval_ops, valp, &cp) == LT_IF) 893 return (LT_IF); 894 cp = skipcomment(cp); 895 if (*cp++ != ')') 896 return (LT_IF); 897 } else if (isdigit((unsigned char)*cp)) { 898 debug("eval%d number", ops - eval_ops); 899 *valp = strtol(cp, &ep, 0); 900 cp = skipsym(cp); 901 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { 902 cp = skipcomment(cp+7); 903 debug("eval%d defined", ops - eval_ops); 904 if (*cp++ != '(') 905 return (LT_IF); 906 cp = skipcomment(cp); 907 sym = findsym(cp); 908 if (sym < 0 && !symlist) 909 return (LT_IF); 910 *valp = (value[sym] != NULL); 911 cp = skipsym(cp); 912 cp = skipcomment(cp); 913 if (*cp++ != ')') 914 return (LT_IF); 915 keepthis = false; 916 } else if (!endsym(*cp)) { 917 debug("eval%d symbol", ops - eval_ops); 918 sym = findsym(cp); 919 if (sym < 0 && !symlist) 920 return (LT_IF); 921 if (value[sym] == NULL) 922 *valp = 0; 923 else { 924 *valp = strtol(value[sym], &ep, 0); 925 if (*ep != '\0' || ep == value[sym]) 926 return (LT_IF); 927 } 928 cp = skipsym(cp); 929 keepthis = false; 930 } else 931 return (LT_IF); 932 933 *cpp = cp; 934 debug("eval%d = %d", ops - eval_ops, *valp); 935 return (*valp ? LT_TRUE : LT_FALSE); 936 } 937 938 /* 939 * Table-driven evaluation of binary operators. 940 */ 941 static Linetype 942 eval_table(const struct ops *ops, int *valp, const char **cpp) 943 { 944 const struct op *op; 945 const char *cp; 946 int val; 947 948 debug("eval%d", ops - eval_ops); 949 cp = *cpp; 950 if (ops->inner(ops+1, valp, &cp) == LT_IF) 951 return (LT_IF); 952 for (;;) { 953 cp = skipcomment(cp); 954 for (op = ops->op; op->str != NULL; op++) 955 if (strncmp(cp, op->str, strlen(op->str)) == 0) 956 break; 957 if (op->str == NULL) 958 break; 959 cp += strlen(op->str); 960 debug("eval%d %s", ops - eval_ops, op->str); 961 if (ops->inner(ops+1, &val, &cp) == LT_IF) 962 return (LT_IF); 963 *valp = op->fn(*valp, val); 964 } 965 966 *cpp = cp; 967 debug("eval%d = %d", ops - eval_ops, *valp); 968 return (*valp ? LT_TRUE : LT_FALSE); 969 } 970 971 /* 972 * Evaluate the expression on a #if or #elif line. If we can work out 973 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we 974 * return just a generic LT_IF. 975 */ 976 static Linetype 977 ifeval(const char **cpp) 978 { 979 int ret; 980 int val; 981 982 debug("eval %s", *cpp); 983 keepthis = killconsts ? false : true; 984 ret = eval_table(eval_ops, &val, cpp); 985 return (keepthis ? LT_IF : ret); 986 } 987 988 /* 989 * Skip over comments and stop at the next character position that is 990 * not whitespace. Between calls we keep the comment state in the 991 * global variable incomment, and we also adjust the global variable 992 * linestate when we see a newline. 993 * XXX: doesn't cope with the buffer splitting inside a state transition. 994 */ 995 static const char * 996 skipcomment(const char *cp) 997 { 998 if (text || ignoring[depth]) { 999 for (; isspace((unsigned char)*cp); cp++) 1000 if (*cp == '\n') 1001 linestate = LS_START; 1002 return (cp); 1003 } 1004 while (*cp != '\0') 1005 if (strncmp(cp, "\\\n", 2) == 0) 1006 cp += 2; 1007 else switch (incomment) { 1008 case NO_COMMENT: 1009 if (strncmp(cp, "/\\\n", 3) == 0) { 1010 incomment = STARTING_COMMENT; 1011 cp += 3; 1012 } else if (strncmp(cp, "/*", 2) == 0) { 1013 incomment = C_COMMENT; 1014 cp += 2; 1015 } else if (strncmp(cp, "//", 2) == 0) { 1016 incomment = CXX_COMMENT; 1017 cp += 2; 1018 } else if (strncmp(cp, "\n", 1) == 0) { 1019 linestate = LS_START; 1020 cp += 1; 1021 } else if (strchr(" \t", *cp) != NULL) { 1022 cp += 1; 1023 } else 1024 return (cp); 1025 continue; 1026 case CXX_COMMENT: 1027 if (strncmp(cp, "\n", 1) == 0) { 1028 incomment = NO_COMMENT; 1029 linestate = LS_START; 1030 } 1031 cp += 1; 1032 continue; 1033 case C_COMMENT: 1034 if (strncmp(cp, "*\\\n", 3) == 0) { 1035 incomment = FINISHING_COMMENT; 1036 cp += 3; 1037 } else if (strncmp(cp, "*/", 2) == 0) { 1038 incomment = NO_COMMENT; 1039 cp += 2; 1040 } else 1041 cp += 1; 1042 continue; 1043 case STARTING_COMMENT: 1044 if (*cp == '*') { 1045 incomment = C_COMMENT; 1046 cp += 1; 1047 } else if (*cp == '/') { 1048 incomment = CXX_COMMENT; 1049 cp += 1; 1050 } else { 1051 incomment = NO_COMMENT; 1052 linestate = LS_DIRTY; 1053 } 1054 continue; 1055 case FINISHING_COMMENT: 1056 if (*cp == '/') { 1057 incomment = NO_COMMENT; 1058 cp += 1; 1059 } else 1060 incomment = C_COMMENT; 1061 continue; 1062 default: 1063 /* bug */ 1064 abort(); 1065 } 1066 return (cp); 1067 } 1068 1069 /* 1070 * Skip over an identifier. 1071 */ 1072 static const char * 1073 skipsym(const char *cp) 1074 { 1075 while (!endsym(*cp)) 1076 ++cp; 1077 return (cp); 1078 } 1079 1080 /* 1081 * Look for the symbol in the symbol table. If it is found, we return 1082 * the symbol table index, else we return -1. 1083 */ 1084 static int 1085 findsym(const char *str) 1086 { 1087 const char *cp; 1088 int symind; 1089 1090 cp = skipsym(str); 1091 if (cp == str) 1092 return (-1); 1093 if (symlist) 1094 printf("%.*s\n", (int)(cp-str), str); 1095 for (symind = 0; symind < nsyms; ++symind) { 1096 if (strlcmp(symname[symind], str, cp-str) == 0) { 1097 debug("findsym %s %s", symname[symind], 1098 value[symind] ? value[symind] : ""); 1099 return (symind); 1100 } 1101 } 1102 return (-1); 1103 } 1104 1105 /* 1106 * Add a symbol to the symbol table. 1107 */ 1108 static void 1109 addsym(bool ignorethis, bool definethis, char *sym) 1110 { 1111 int symind; 1112 char *val; 1113 1114 symind = findsym(sym); 1115 if (symind < 0) { 1116 if (nsyms >= MAXSYMS) 1117 errx(2, "too many symbols"); 1118 symind = nsyms++; 1119 } 1120 symname[symind] = sym; 1121 ignore[symind] = ignorethis; 1122 val = sym + (skipsym(sym) - sym); 1123 if (definethis) { 1124 if (*val == '=') { 1125 value[symind] = val+1; 1126 *val = '\0'; 1127 } else if (*val == '\0') 1128 value[symind] = ""; 1129 else 1130 usage(); 1131 } else { 1132 if (*val != '\0') 1133 usage(); 1134 value[symind] = NULL; 1135 } 1136 } 1137 1138 /* 1139 * Compare s with n characters of t. 1140 * The same as strncmp() except that it checks that s[n] == '\0'. 1141 */ 1142 static int 1143 strlcmp(const char *s, const char *t, size_t n) 1144 { 1145 while (n-- && *t != '\0') 1146 if (*s != *t) 1147 return ((unsigned char)*s - (unsigned char)*t); 1148 else 1149 ++s, ++t; 1150 return ((unsigned char)*s); 1151 } 1152 1153 /* 1154 * Diagnostics. 1155 */ 1156 static void 1157 debug(const char *msg, ...) 1158 { 1159 va_list ap; 1160 1161 if (debugging) { 1162 va_start(ap, msg); 1163 vwarnx(msg, ap); 1164 va_end(ap); 1165 } 1166 } 1167 1168 static void 1169 error(const char *msg) 1170 { 1171 if (depth == 0) 1172 warnx("%s: %d: %s", filename, linenum, msg); 1173 else 1174 warnx("%s: %d: %s (#if line %d depth %d)", 1175 filename, linenum, msg, stifline[depth], depth); 1176 errx(2, "output may be truncated"); 1177 } 1178