1 /* CPP Library - traditional lexical analysis and macro expansion. 2 Copyright (C) 2002-2018 Free Software Foundation, Inc. 3 Contributed by Neil Booth, May 2002 4 5 This program is free software; you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published by the 7 Free Software Foundation; either version 3, or (at your option) any 8 later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; see the file COPYING3. If not see 17 <http://www.gnu.org/licenses/>. */ 18 19 #include "config.h" 20 #include "system.h" 21 #include "cpplib.h" 22 #include "internal.h" 23 24 /* The replacement text of a function-like macro is stored as a 25 contiguous sequence of aligned blocks, each representing the text 26 between subsequent parameters. 27 28 Each block comprises the text between its surrounding parameters, 29 the length of that text, and the one-based index of the following 30 parameter. The final block in the replacement text is easily 31 recognizable as it has an argument index of zero. */ 32 33 struct block 34 { 35 unsigned int text_len; 36 unsigned short arg_index; 37 uchar text[1]; 38 }; 39 40 #define BLOCK_HEADER_LEN offsetof (struct block, text) 41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN)) 42 43 /* Structure holding information about a function-like macro 44 invocation. */ 45 struct fun_macro 46 { 47 /* Memory buffer holding the trad_arg array. */ 48 _cpp_buff *buff; 49 50 /* An array of size the number of macro parameters + 1, containing 51 the offsets of the start of each macro argument in the output 52 buffer. The argument continues until the character before the 53 start of the next one. */ 54 size_t *args; 55 56 /* The hashnode of the macro. */ 57 cpp_hashnode *node; 58 59 /* The offset of the macro name in the output buffer. */ 60 size_t offset; 61 62 /* The line the macro name appeared on. */ 63 source_location line; 64 65 /* Number of parameters. */ 66 unsigned int paramc; 67 68 /* Zero-based index of argument being currently lexed. */ 69 unsigned int argc; 70 }; 71 72 /* Lexing state. It is mostly used to prevent macro expansion. */ 73 enum ls {ls_none = 0, /* Normal state. */ 74 ls_fun_open, /* When looking for '('. */ 75 ls_fun_close, /* When looking for ')'. */ 76 ls_defined, /* After defined. */ 77 ls_defined_close, /* Looking for ')' of defined(). */ 78 ls_hash, /* After # in preprocessor conditional. */ 79 ls_predicate, /* After the predicate, maybe paren? */ 80 ls_answer, /* In answer to predicate. */ 81 ls_has_include, /* After __has_include__. */ 82 ls_has_include_close}; /* Looking for ')' of __has_include__. */ 83 84 /* Lexing TODO: Maybe handle space in escaped newlines. Stop lex.c 85 from recognizing comments and directives during its lexing pass. */ 86 87 static const uchar *skip_whitespace (cpp_reader *, const uchar *, int); 88 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *); 89 static const uchar *copy_comment (cpp_reader *, const uchar *, int); 90 static void check_output_buffer (cpp_reader *, size_t); 91 static void push_replacement_text (cpp_reader *, cpp_hashnode *); 92 static bool scan_parameters (cpp_reader *, cpp_macro *); 93 static bool recursive_macro (cpp_reader *, cpp_hashnode *); 94 static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int); 95 static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *, 96 struct fun_macro *); 97 static void save_argument (struct fun_macro *, size_t); 98 static void replace_args_and_push (cpp_reader *, struct fun_macro *); 99 static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *); 100 101 /* Ensures we have N bytes' space in the output buffer, and 102 reallocates it if not. */ 103 static void 104 check_output_buffer (cpp_reader *pfile, size_t n) 105 { 106 /* We might need two bytes to terminate an unterminated comment, and 107 one more to terminate the line with a NUL. */ 108 n += 2 + 1; 109 110 if (n > (size_t) (pfile->out.limit - pfile->out.cur)) 111 { 112 size_t size = pfile->out.cur - pfile->out.base; 113 size_t new_size = (size + n) * 3 / 2; 114 115 pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size); 116 pfile->out.limit = pfile->out.base + new_size; 117 pfile->out.cur = pfile->out.base + size; 118 } 119 } 120 121 /* Skip a C-style block comment in a macro as a result of -CC. 122 PFILE->buffer->cur points to the initial asterisk of the comment, 123 change it to point to after the '*' and '/' characters that terminate it. 124 Return true if the macro has not been termined, in that case set 125 PFILE->buffer->cur to the end of the buffer. */ 126 static bool 127 skip_macro_block_comment (cpp_reader *pfile) 128 { 129 const uchar *cur = pfile->buffer->cur; 130 131 cur++; 132 if (*cur == '/') 133 cur++; 134 135 /* People like decorating comments with '*', so check for '/' 136 instead for efficiency. */ 137 while (! (*cur++ == '/' && cur[-2] == '*')) 138 if (cur[-1] == '\n') 139 { 140 pfile->buffer->cur = cur - 1; 141 return true; 142 } 143 144 pfile->buffer->cur = cur; 145 return false; 146 } 147 148 /* CUR points to the asterisk introducing a comment in the current 149 context. IN_DEFINE is true if we are in the replacement text of a 150 macro. 151 152 The asterisk and following comment is copied to the buffer pointed 153 to by pfile->out.cur, which must be of sufficient size. 154 Unterminated comments are diagnosed, and correctly terminated in 155 the output. pfile->out.cur is updated depending upon IN_DEFINE, 156 -C, -CC and pfile->state.in_directive. 157 158 Returns a pointer to the first character after the comment in the 159 input buffer. */ 160 static const uchar * 161 copy_comment (cpp_reader *pfile, const uchar *cur, int in_define) 162 { 163 bool unterminated, copy = false; 164 source_location src_loc = pfile->line_table->highest_line; 165 cpp_buffer *buffer = pfile->buffer; 166 167 buffer->cur = cur; 168 if (pfile->context->prev) 169 unterminated = skip_macro_block_comment (pfile); 170 else 171 unterminated = _cpp_skip_block_comment (pfile); 172 173 if (unterminated) 174 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0, 175 "unterminated comment"); 176 177 /* Comments in directives become spaces so that tokens are properly 178 separated when the ISO preprocessor re-lexes the line. The 179 exception is #define. */ 180 if (pfile->state.in_directive) 181 { 182 if (in_define) 183 { 184 if (CPP_OPTION (pfile, discard_comments_in_macro_exp)) 185 pfile->out.cur--; 186 else 187 copy = true; 188 } 189 else 190 pfile->out.cur[-1] = ' '; 191 } 192 else if (CPP_OPTION (pfile, discard_comments)) 193 pfile->out.cur--; 194 else 195 copy = true; 196 197 if (copy) 198 { 199 size_t len = (size_t) (buffer->cur - cur); 200 memcpy (pfile->out.cur, cur, len); 201 pfile->out.cur += len; 202 if (unterminated) 203 { 204 *pfile->out.cur++ = '*'; 205 *pfile->out.cur++ = '/'; 206 } 207 } 208 209 return buffer->cur; 210 } 211 212 /* CUR points to any character in the input buffer. Skips over all 213 contiguous horizontal white space and NULs, including comments if 214 SKIP_COMMENTS, until reaching the first non-horizontal-whitespace 215 character or the end of the current context. Escaped newlines are 216 removed. 217 218 The whitespace is copied verbatim to the output buffer, except that 219 comments are handled as described in copy_comment(). 220 pfile->out.cur is updated. 221 222 Returns a pointer to the first character after the whitespace in 223 the input buffer. */ 224 static const uchar * 225 skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments) 226 { 227 uchar *out = pfile->out.cur; 228 229 for (;;) 230 { 231 unsigned int c = *cur++; 232 *out++ = c; 233 234 if (is_nvspace (c)) 235 continue; 236 237 if (c == '/' && *cur == '*' && skip_comments) 238 { 239 pfile->out.cur = out; 240 cur = copy_comment (pfile, cur, false /* in_define */); 241 out = pfile->out.cur; 242 continue; 243 } 244 245 out--; 246 break; 247 } 248 249 pfile->out.cur = out; 250 return cur - 1; 251 } 252 253 /* Lexes and outputs an identifier starting at CUR, which is assumed 254 to point to a valid first character of an identifier. Returns 255 the hashnode, and updates out.cur. */ 256 static cpp_hashnode * 257 lex_identifier (cpp_reader *pfile, const uchar *cur) 258 { 259 size_t len; 260 uchar *out = pfile->out.cur; 261 cpp_hashnode *result; 262 263 do 264 *out++ = *cur++; 265 while (is_numchar (*cur)); 266 267 CUR (pfile->context) = cur; 268 len = out - pfile->out.cur; 269 result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur, 270 len, HT_ALLOC)); 271 pfile->out.cur = out; 272 return result; 273 } 274 275 /* Overlays the true file buffer temporarily with text of length LEN 276 starting at START. The true buffer is restored upon calling 277 restore_buff(). */ 278 void 279 _cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len) 280 { 281 cpp_buffer *buffer = pfile->buffer; 282 283 pfile->overlaid_buffer = buffer; 284 pfile->saved_cur = buffer->cur; 285 pfile->saved_rlimit = buffer->rlimit; 286 pfile->saved_line_base = buffer->next_line; 287 buffer->need_line = false; 288 289 buffer->cur = start; 290 buffer->line_base = start; 291 buffer->rlimit = start + len; 292 } 293 294 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */ 295 void 296 _cpp_remove_overlay (cpp_reader *pfile) 297 { 298 cpp_buffer *buffer = pfile->overlaid_buffer; 299 300 buffer->cur = pfile->saved_cur; 301 buffer->rlimit = pfile->saved_rlimit; 302 buffer->line_base = pfile->saved_line_base; 303 buffer->need_line = true; 304 305 pfile->overlaid_buffer = NULL; 306 } 307 308 /* Reads a logical line into the output buffer. Returns TRUE if there 309 is more text left in the buffer. */ 310 bool 311 _cpp_read_logical_line_trad (cpp_reader *pfile) 312 { 313 do 314 { 315 if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile)) 316 return false; 317 } 318 while (!_cpp_scan_out_logical_line (pfile, NULL, false) 319 || pfile->state.skipping); 320 321 return pfile->buffer != NULL; 322 } 323 324 /* Return true if NODE is a fun_like macro. */ 325 static inline bool 326 fun_like_macro (cpp_hashnode *node) 327 { 328 if (node->flags & NODE_BUILTIN) 329 return node->value.builtin == BT_HAS_ATTRIBUTE; 330 else 331 return node->value.macro->fun_like; 332 } 333 334 /* Set up state for finding the opening '(' of a function-like 335 macro. */ 336 static void 337 maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start, 338 struct fun_macro *macro) 339 { 340 unsigned int n; 341 if (node->flags & NODE_BUILTIN) 342 n = 1; 343 else 344 n = node->value.macro->paramc; 345 346 if (macro->buff) 347 _cpp_release_buff (pfile, macro->buff); 348 macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t)); 349 macro->args = (size_t *) BUFF_FRONT (macro->buff); 350 macro->node = node; 351 macro->offset = start - pfile->out.base; 352 macro->paramc = n; 353 macro->argc = 0; 354 } 355 356 /* Save the OFFSET of the start of the next argument to MACRO. */ 357 static void 358 save_argument (struct fun_macro *macro, size_t offset) 359 { 360 macro->argc++; 361 if (macro->argc <= macro->paramc) 362 macro->args[macro->argc] = offset; 363 } 364 365 /* Copies the next logical line in the current buffer (starting at 366 buffer->cur) to the output buffer. The output is guaranteed to 367 terminate with a NUL character. buffer->cur is updated. 368 369 If MACRO is non-NULL, then we are scanning the replacement list of 370 MACRO, and we call save_replacement_text() every time we meet an 371 argument. 372 373 If BUILTIN_MACRO_ARG is true, this is called to macro expand 374 arguments of builtin function-like macros. */ 375 bool 376 _cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro, 377 bool builtin_macro_arg) 378 { 379 bool result = true; 380 cpp_context *context; 381 const uchar *cur; 382 uchar *out; 383 struct fun_macro fmacro; 384 unsigned int c, paren_depth = 0, quote; 385 enum ls lex_state = ls_none; 386 bool header_ok; 387 const uchar *start_of_input_line; 388 389 fmacro.buff = NULL; 390 fmacro.args = NULL; 391 fmacro.node = NULL; 392 fmacro.offset = 0; 393 fmacro.line = 0; 394 fmacro.paramc = 0; 395 fmacro.argc = 0; 396 397 quote = 0; 398 header_ok = pfile->state.angled_headers; 399 CUR (pfile->context) = pfile->buffer->cur; 400 RLIMIT (pfile->context) = pfile->buffer->rlimit; 401 if (!builtin_macro_arg) 402 { 403 pfile->out.cur = pfile->out.base; 404 pfile->out.first_line = pfile->line_table->highest_line; 405 } 406 /* start_of_input_line is needed to make sure that directives really, 407 really start at the first character of the line. */ 408 start_of_input_line = pfile->buffer->cur; 409 new_context: 410 context = pfile->context; 411 cur = CUR (context); 412 check_output_buffer (pfile, RLIMIT (context) - cur); 413 out = pfile->out.cur; 414 415 for (;;) 416 { 417 if (!context->prev 418 && !builtin_macro_arg 419 && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos) 420 { 421 pfile->buffer->cur = cur; 422 _cpp_process_line_notes (pfile, false); 423 } 424 c = *cur++; 425 *out++ = c; 426 427 /* Whitespace should "continue" out of the switch, 428 non-whitespace should "break" out of it. */ 429 switch (c) 430 { 431 case ' ': 432 case '\t': 433 case '\f': 434 case '\v': 435 case '\0': 436 continue; 437 438 case '\n': 439 /* If this is a macro's expansion, pop it. */ 440 if (context->prev) 441 { 442 pfile->out.cur = out - 1; 443 _cpp_pop_context (pfile); 444 goto new_context; 445 } 446 447 /* Omit the newline from the output buffer. */ 448 pfile->out.cur = out - 1; 449 pfile->buffer->cur = cur; 450 if (builtin_macro_arg) 451 goto done; 452 pfile->buffer->need_line = true; 453 CPP_INCREMENT_LINE (pfile, 0); 454 455 if ((lex_state == ls_fun_open || lex_state == ls_fun_close) 456 && !pfile->state.in_directive 457 && _cpp_get_fresh_line (pfile)) 458 { 459 /* Newlines in arguments become a space, but we don't 460 clear any in-progress quote. */ 461 if (lex_state == ls_fun_close) 462 out[-1] = ' '; 463 cur = pfile->buffer->cur; 464 continue; 465 } 466 goto done; 467 468 case '<': 469 if (header_ok) 470 quote = '>'; 471 break; 472 case '>': 473 if (c == quote) 474 quote = 0; 475 break; 476 477 case '"': 478 case '\'': 479 if (c == quote) 480 quote = 0; 481 else if (!quote) 482 quote = c; 483 break; 484 485 case '\\': 486 /* Skip escaped quotes here, it's easier than above. */ 487 if (*cur == '\\' || *cur == '"' || *cur == '\'') 488 *out++ = *cur++; 489 break; 490 491 case '/': 492 /* Traditional CPP does not recognize comments within 493 literals. */ 494 if (!quote && *cur == '*') 495 { 496 pfile->out.cur = out; 497 cur = copy_comment (pfile, cur, macro != 0); 498 out = pfile->out.cur; 499 continue; 500 } 501 break; 502 503 case '_': 504 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 505 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 506 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 507 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 508 case 'y': case 'z': 509 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 510 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 511 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 512 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 513 case 'Y': case 'Z': 514 if (!pfile->state.skipping && (quote == 0 || macro)) 515 { 516 cpp_hashnode *node; 517 uchar *out_start = out - 1; 518 519 pfile->out.cur = out_start; 520 node = lex_identifier (pfile, cur - 1); 521 out = pfile->out.cur; 522 cur = CUR (context); 523 524 if (node->type == NT_MACRO 525 /* Should we expand for ls_answer? */ 526 && (lex_state == ls_none || lex_state == ls_fun_open) 527 && !pfile->state.prevent_expansion) 528 { 529 /* Macros invalidate MI optimization. */ 530 pfile->mi_valid = false; 531 if (fun_like_macro (node)) 532 { 533 maybe_start_funlike (pfile, node, out_start, &fmacro); 534 lex_state = ls_fun_open; 535 fmacro.line = pfile->line_table->highest_line; 536 continue; 537 } 538 else if (!recursive_macro (pfile, node)) 539 { 540 /* Remove the object-like macro's name from the 541 output, and push its replacement text. */ 542 pfile->out.cur = out_start; 543 push_replacement_text (pfile, node); 544 lex_state = ls_none; 545 goto new_context; 546 } 547 } 548 else if (macro && (node->flags & NODE_MACRO_ARG) != 0) 549 { 550 /* Found a parameter in the replacement text of a 551 #define. Remove its name from the output. */ 552 pfile->out.cur = out_start; 553 save_replacement_text (pfile, macro, node->value.arg_index); 554 out = pfile->out.base; 555 } 556 else if (lex_state == ls_hash) 557 { 558 lex_state = ls_predicate; 559 continue; 560 } 561 else if (pfile->state.in_expression 562 && node == pfile->spec_nodes.n_defined) 563 { 564 lex_state = ls_defined; 565 continue; 566 } 567 else if (pfile->state.in_expression 568 && (node == pfile->spec_nodes.n__has_include__ 569 || node == pfile->spec_nodes.n__has_include_next__)) 570 { 571 lex_state = ls_has_include; 572 continue; 573 } 574 } 575 break; 576 577 case '(': 578 if (quote == 0) 579 { 580 paren_depth++; 581 if (lex_state == ls_fun_open) 582 { 583 if (recursive_macro (pfile, fmacro.node)) 584 lex_state = ls_none; 585 else 586 { 587 lex_state = ls_fun_close; 588 paren_depth = 1; 589 out = pfile->out.base + fmacro.offset; 590 fmacro.args[0] = fmacro.offset; 591 } 592 } 593 else if (lex_state == ls_predicate) 594 lex_state = ls_answer; 595 else if (lex_state == ls_defined) 596 lex_state = ls_defined_close; 597 else if (lex_state == ls_has_include) 598 lex_state = ls_has_include_close; 599 } 600 break; 601 602 case ',': 603 if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1) 604 save_argument (&fmacro, out - pfile->out.base); 605 break; 606 607 case ')': 608 if (quote == 0) 609 { 610 paren_depth--; 611 if (lex_state == ls_fun_close && paren_depth == 0) 612 { 613 if (fmacro.node->flags & NODE_BUILTIN) 614 { 615 /* Handle builtin function-like macros like 616 __has_attribute. The already parsed arguments 617 are put into a buffer, which is then preprocessed 618 and the result is fed to _cpp_push_text_context 619 with disabled expansion, where the ISO preprocessor 620 parses it. While in traditional preprocessing 621 macro arguments aren't immediately expanded, they in 622 the end are because the macro with replaced arguments 623 is preprocessed again. For the builtin function-like 624 macros we need the argument immediately though, 625 if we don't preprocess them, they would behave 626 very differently from ISO preprocessor handling 627 of those builtin macros. So, this handling is 628 more similar to traditional preprocessing of 629 #if directives, where we also keep preprocessing 630 until everything is expanded, and then feed the 631 result with disabled expansion to ISO preprocessor 632 for handling the directives. */ 633 lex_state = ls_none; 634 save_argument (&fmacro, out - pfile->out.base); 635 cpp_macro m; 636 memset (&m, '\0', sizeof (m)); 637 m.paramc = fmacro.paramc; 638 if (_cpp_arguments_ok (pfile, &m, fmacro.node, 639 fmacro.argc)) 640 { 641 size_t len = fmacro.args[1] - fmacro.args[0]; 642 uchar *buf; 643 644 /* Remove the macro's invocation from the 645 output, and push its replacement text. */ 646 pfile->out.cur = pfile->out.base + fmacro.offset; 647 CUR (context) = cur; 648 buf = _cpp_unaligned_alloc (pfile, len + 2); 649 buf[0] = '('; 650 memcpy (buf + 1, pfile->out.base + fmacro.args[0], 651 len); 652 buf[len + 1] = '\n'; 653 654 const unsigned char *ctx_rlimit = RLIMIT (context); 655 const unsigned char *saved_cur = pfile->buffer->cur; 656 const unsigned char *saved_rlimit 657 = pfile->buffer->rlimit; 658 const unsigned char *saved_line_base 659 = pfile->buffer->line_base; 660 bool saved_need_line = pfile->buffer->need_line; 661 cpp_buffer *saved_overlaid_buffer 662 = pfile->overlaid_buffer; 663 pfile->buffer->cur = buf; 664 pfile->buffer->line_base = buf; 665 pfile->buffer->rlimit = buf + len + 1; 666 pfile->buffer->need_line = false; 667 pfile->overlaid_buffer = pfile->buffer; 668 bool saved_in_directive = pfile->state.in_directive; 669 pfile->state.in_directive = true; 670 cpp_context *saved_prev_context = context->prev; 671 context->prev = NULL; 672 673 _cpp_scan_out_logical_line (pfile, NULL, true); 674 675 pfile->state.in_directive = saved_in_directive; 676 check_output_buffer (pfile, 1); 677 *pfile->out.cur = '\n'; 678 pfile->buffer->cur = pfile->out.base + fmacro.offset; 679 pfile->buffer->line_base = pfile->buffer->cur; 680 pfile->buffer->rlimit = pfile->out.cur; 681 CUR (context) = pfile->buffer->cur; 682 RLIMIT (context) = pfile->buffer->rlimit; 683 684 pfile->state.prevent_expansion++; 685 const uchar *text 686 = _cpp_builtin_macro_text (pfile, fmacro.node); 687 pfile->state.prevent_expansion--; 688 689 context->prev = saved_prev_context; 690 pfile->buffer->cur = saved_cur; 691 pfile->buffer->rlimit = saved_rlimit; 692 pfile->buffer->line_base = saved_line_base; 693 pfile->buffer->need_line = saved_need_line; 694 pfile->overlaid_buffer = saved_overlaid_buffer; 695 pfile->out.cur = pfile->out.base + fmacro.offset; 696 CUR (context) = cur; 697 RLIMIT (context) = ctx_rlimit; 698 len = ustrlen (text); 699 buf = _cpp_unaligned_alloc (pfile, len + 1); 700 memcpy (buf, text, len); 701 buf[len] = '\n'; 702 text = buf; 703 _cpp_push_text_context (pfile, fmacro.node, 704 text, len); 705 goto new_context; 706 } 707 break; 708 } 709 710 cpp_macro *m = fmacro.node->value.macro; 711 712 m->used = 1; 713 lex_state = ls_none; 714 save_argument (&fmacro, out - pfile->out.base); 715 716 /* A single zero-length argument is no argument. */ 717 if (fmacro.argc == 1 718 && m->paramc == 0 719 && out == pfile->out.base + fmacro.offset + 1) 720 fmacro.argc = 0; 721 722 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc)) 723 { 724 /* Remove the macro's invocation from the 725 output, and push its replacement text. */ 726 pfile->out.cur = pfile->out.base + fmacro.offset; 727 CUR (context) = cur; 728 replace_args_and_push (pfile, &fmacro); 729 goto new_context; 730 } 731 } 732 else if (lex_state == ls_answer || lex_state == ls_defined_close 733 || lex_state == ls_has_include_close) 734 lex_state = ls_none; 735 } 736 break; 737 738 case '#': 739 if (cur - 1 == start_of_input_line 740 /* A '#' from a macro doesn't start a directive. */ 741 && !pfile->context->prev 742 && !pfile->state.in_directive) 743 { 744 /* A directive. With the way _cpp_handle_directive 745 currently works, we only want to call it if either we 746 know the directive is OK, or we want it to fail and 747 be removed from the output. If we want it to be 748 passed through (the assembler case) then we must not 749 call _cpp_handle_directive. */ 750 pfile->out.cur = out; 751 cur = skip_whitespace (pfile, cur, true /* skip_comments */); 752 out = pfile->out.cur; 753 754 if (*cur == '\n') 755 { 756 /* Null directive. Ignore it and don't invalidate 757 the MI optimization. */ 758 pfile->buffer->need_line = true; 759 CPP_INCREMENT_LINE (pfile, 0); 760 result = false; 761 goto done; 762 } 763 else 764 { 765 bool do_it = false; 766 767 if (is_numstart (*cur) 768 && CPP_OPTION (pfile, lang) != CLK_ASM) 769 do_it = true; 770 else if (is_idstart (*cur)) 771 /* Check whether we know this directive, but don't 772 advance. */ 773 do_it = lex_identifier (pfile, cur)->is_directive; 774 775 if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM) 776 { 777 /* This is a kludge. We want to have the ISO 778 preprocessor lex the next token. */ 779 pfile->buffer->cur = cur; 780 _cpp_handle_directive (pfile, false /* indented */); 781 result = false; 782 goto done; 783 } 784 } 785 } 786 787 if (pfile->state.in_expression) 788 { 789 lex_state = ls_hash; 790 continue; 791 } 792 break; 793 794 default: 795 break; 796 } 797 798 /* Non-whitespace disables MI optimization and stops treating 799 '<' as a quote in #include. */ 800 header_ok = false; 801 if (!pfile->state.in_directive) 802 pfile->mi_valid = false; 803 804 if (lex_state == ls_none) 805 continue; 806 807 /* Some of these transitions of state are syntax errors. The 808 ISO preprocessor will issue errors later. */ 809 if (lex_state == ls_fun_open) 810 /* Missing '('. */ 811 lex_state = ls_none; 812 else if (lex_state == ls_hash 813 || lex_state == ls_predicate 814 || lex_state == ls_defined 815 || lex_state == ls_has_include) 816 lex_state = ls_none; 817 818 /* ls_answer and ls_defined_close keep going until ')'. */ 819 } 820 821 done: 822 if (fmacro.buff) 823 _cpp_release_buff (pfile, fmacro.buff); 824 825 if (lex_state == ls_fun_close) 826 cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0, 827 "unterminated argument list invoking macro \"%s\"", 828 NODE_NAME (fmacro.node)); 829 return result; 830 } 831 832 /* Push a context holding the replacement text of the macro NODE on 833 the context stack. NODE is either object-like, or a function-like 834 macro with no arguments. */ 835 static void 836 push_replacement_text (cpp_reader *pfile, cpp_hashnode *node) 837 { 838 size_t len; 839 const uchar *text; 840 uchar *buf; 841 842 if (node->flags & NODE_BUILTIN) 843 { 844 text = _cpp_builtin_macro_text (pfile, node); 845 len = ustrlen (text); 846 buf = _cpp_unaligned_alloc (pfile, len + 1); 847 memcpy (buf, text, len); 848 buf[len] = '\n'; 849 text = buf; 850 } 851 else 852 { 853 cpp_macro *macro = node->value.macro; 854 macro->used = 1; 855 text = macro->exp.text; 856 macro->traditional = 1; 857 len = macro->count; 858 } 859 860 _cpp_push_text_context (pfile, node, text, len); 861 } 862 863 /* Returns TRUE if traditional macro recursion is detected. */ 864 static bool 865 recursive_macro (cpp_reader *pfile, cpp_hashnode *node) 866 { 867 bool recursing = !!(node->flags & NODE_DISABLED); 868 869 /* Object-like macros that are already expanding are necessarily 870 recursive. 871 872 However, it is possible to have traditional function-like macros 873 that are not infinitely recursive but recurse to any given depth. 874 Further, it is easy to construct examples that get ever longer 875 until the point they stop recursing. So there is no easy way to 876 detect true recursion; instead we assume any expansion more than 877 20 deep since the first invocation of this macro must be 878 recursing. */ 879 if (recursing && fun_like_macro (node)) 880 { 881 size_t depth = 0; 882 cpp_context *context = pfile->context; 883 884 do 885 { 886 depth++; 887 if (context->c.macro == node && depth > 20) 888 break; 889 context = context->prev; 890 } 891 while (context); 892 recursing = context != NULL; 893 } 894 895 if (recursing) 896 cpp_error (pfile, CPP_DL_ERROR, 897 "detected recursion whilst expanding macro \"%s\"", 898 NODE_NAME (node)); 899 900 return recursing; 901 } 902 903 /* Return the length of the replacement text of a function-like or 904 object-like non-builtin macro. */ 905 size_t 906 _cpp_replacement_text_len (const cpp_macro *macro) 907 { 908 size_t len; 909 910 if (macro->fun_like && (macro->paramc != 0)) 911 { 912 const uchar *exp; 913 914 len = 0; 915 for (exp = macro->exp.text;;) 916 { 917 struct block *b = (struct block *) exp; 918 919 len += b->text_len; 920 if (b->arg_index == 0) 921 break; 922 len += NODE_LEN (macro->params[b->arg_index - 1]); 923 exp += BLOCK_LEN (b->text_len); 924 } 925 } 926 else 927 len = macro->count; 928 929 return len; 930 } 931 932 /* Copy the replacement text of MACRO to DEST, which must be of 933 sufficient size. It is not NUL-terminated. The next character is 934 returned. */ 935 uchar * 936 _cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest) 937 { 938 if (macro->fun_like && (macro->paramc != 0)) 939 { 940 const uchar *exp; 941 942 for (exp = macro->exp.text;;) 943 { 944 struct block *b = (struct block *) exp; 945 cpp_hashnode *param; 946 947 memcpy (dest, b->text, b->text_len); 948 dest += b->text_len; 949 if (b->arg_index == 0) 950 break; 951 param = macro->params[b->arg_index - 1]; 952 memcpy (dest, NODE_NAME (param), NODE_LEN (param)); 953 dest += NODE_LEN (param); 954 exp += BLOCK_LEN (b->text_len); 955 } 956 } 957 else 958 { 959 memcpy (dest, macro->exp.text, macro->count); 960 dest += macro->count; 961 } 962 963 return dest; 964 } 965 966 /* Push a context holding the replacement text of the macro NODE on 967 the context stack. NODE is either object-like, or a function-like 968 macro with no arguments. */ 969 static void 970 replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro) 971 { 972 cpp_macro *macro = fmacro->node->value.macro; 973 974 if (macro->paramc == 0) 975 push_replacement_text (pfile, fmacro->node); 976 else 977 { 978 const uchar *exp; 979 uchar *p; 980 _cpp_buff *buff; 981 size_t len = 0; 982 int cxtquote = 0; 983 984 /* Get an estimate of the length of the argument-replaced text. 985 This is a worst case estimate, assuming that every replacement 986 text character needs quoting. */ 987 for (exp = macro->exp.text;;) 988 { 989 struct block *b = (struct block *) exp; 990 991 len += b->text_len; 992 if (b->arg_index == 0) 993 break; 994 len += 2 * (fmacro->args[b->arg_index] 995 - fmacro->args[b->arg_index - 1] - 1); 996 exp += BLOCK_LEN (b->text_len); 997 } 998 999 /* Allocate room for the expansion plus \n. */ 1000 buff = _cpp_get_buff (pfile, len + 1); 1001 1002 /* Copy the expansion and replace arguments. */ 1003 /* Accumulate actual length, including quoting as necessary */ 1004 p = BUFF_FRONT (buff); 1005 len = 0; 1006 for (exp = macro->exp.text;;) 1007 { 1008 struct block *b = (struct block *) exp; 1009 size_t arglen; 1010 int argquote; 1011 uchar *base; 1012 uchar *in; 1013 1014 len += b->text_len; 1015 /* Copy the non-argument text literally, keeping 1016 track of whether matching quotes have been seen. */ 1017 for (arglen = b->text_len, in = b->text; arglen > 0; arglen--) 1018 { 1019 if (*in == '"') 1020 cxtquote = ! cxtquote; 1021 *p++ = *in++; 1022 } 1023 /* Done if no more arguments */ 1024 if (b->arg_index == 0) 1025 break; 1026 arglen = (fmacro->args[b->arg_index] 1027 - fmacro->args[b->arg_index - 1] - 1); 1028 base = pfile->out.base + fmacro->args[b->arg_index - 1]; 1029 in = base; 1030 #if 0 1031 /* Skip leading whitespace in the text for the argument to 1032 be substituted. To be compatible with gcc 2.95, we would 1033 also need to trim trailing whitespace. Gcc 2.95 trims 1034 leading and trailing whitespace, which may be a bug. The 1035 current gcc testsuite explicitly checks that this leading 1036 and trailing whitespace in actual arguments is 1037 preserved. */ 1038 while (arglen > 0 && is_space (*in)) 1039 { 1040 in++; 1041 arglen--; 1042 } 1043 #endif 1044 for (argquote = 0; arglen > 0; arglen--) 1045 { 1046 if (cxtquote && *in == '"') 1047 { 1048 if (in > base && *(in-1) != '\\') 1049 argquote = ! argquote; 1050 /* Always add backslash before double quote if argument 1051 is expanded in a quoted context */ 1052 *p++ = '\\'; 1053 len++; 1054 } 1055 else if (cxtquote && argquote && *in == '\\') 1056 { 1057 /* Always add backslash before a backslash in an argument 1058 that is expanded in a quoted context and also in the 1059 range of a quoted context in the argument itself. */ 1060 *p++ = '\\'; 1061 len++; 1062 } 1063 *p++ = *in++; 1064 len++; 1065 } 1066 exp += BLOCK_LEN (b->text_len); 1067 } 1068 1069 /* \n-terminate. */ 1070 *p = '\n'; 1071 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len); 1072 1073 /* So we free buffer allocation when macro is left. */ 1074 pfile->context->buff = buff; 1075 } 1076 } 1077 1078 /* Read and record the parameters, if any, of a function-like macro 1079 definition. Destroys pfile->out.cur. 1080 1081 Returns true on success, false on failure (syntax error or a 1082 duplicate parameter). On success, CUR (pfile->context) is just 1083 past the closing parenthesis. */ 1084 static bool 1085 scan_parameters (cpp_reader *pfile, cpp_macro *macro) 1086 { 1087 const uchar *cur = CUR (pfile->context) + 1; 1088 bool ok; 1089 1090 for (;;) 1091 { 1092 cur = skip_whitespace (pfile, cur, true /* skip_comments */); 1093 1094 if (is_idstart (*cur)) 1095 { 1096 struct cpp_hashnode *id = lex_identifier (pfile, cur); 1097 ok = false; 1098 if (_cpp_save_parameter (pfile, macro, id, id)) 1099 break; 1100 cur = skip_whitespace (pfile, CUR (pfile->context), 1101 true /* skip_comments */); 1102 if (*cur == ',') 1103 { 1104 cur++; 1105 continue; 1106 } 1107 ok = (*cur == ')'); 1108 break; 1109 } 1110 1111 ok = (*cur == ')' && macro->paramc == 0); 1112 break; 1113 } 1114 1115 if (!ok) 1116 cpp_error (pfile, CPP_DL_ERROR, "syntax error in macro parameter list"); 1117 1118 CUR (pfile->context) = cur + (*cur == ')'); 1119 1120 return ok; 1121 } 1122 1123 /* Save the text from pfile->out.base to pfile->out.cur as 1124 the replacement text for the current macro, followed by argument 1125 ARG_INDEX, with zero indicating the end of the replacement 1126 text. */ 1127 static void 1128 save_replacement_text (cpp_reader *pfile, cpp_macro *macro, 1129 unsigned int arg_index) 1130 { 1131 size_t len = pfile->out.cur - pfile->out.base; 1132 uchar *exp; 1133 1134 if (macro->paramc == 0) 1135 { 1136 /* Object-like and function-like macros without parameters 1137 simply store their \n-terminated replacement text. */ 1138 exp = _cpp_unaligned_alloc (pfile, len + 1); 1139 memcpy (exp, pfile->out.base, len); 1140 exp[len] = '\n'; 1141 macro->exp.text = exp; 1142 macro->traditional = 1; 1143 macro->count = len; 1144 } 1145 else 1146 { 1147 /* Store the text's length (unsigned int), the argument index 1148 (unsigned short, base 1) and then the text. */ 1149 size_t blen = BLOCK_LEN (len); 1150 struct block *block; 1151 1152 if (macro->count + blen > BUFF_ROOM (pfile->a_buff)) 1153 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen); 1154 1155 exp = BUFF_FRONT (pfile->a_buff); 1156 block = (struct block *) (exp + macro->count); 1157 macro->exp.text = exp; 1158 macro->traditional = 1; 1159 1160 /* Write out the block information. */ 1161 block->text_len = len; 1162 block->arg_index = arg_index; 1163 memcpy (block->text, pfile->out.base, len); 1164 1165 /* Lex the rest into the start of the output buffer. */ 1166 pfile->out.cur = pfile->out.base; 1167 1168 macro->count += blen; 1169 1170 /* If we've finished, commit the memory. */ 1171 if (arg_index == 0) 1172 BUFF_FRONT (pfile->a_buff) += macro->count; 1173 } 1174 } 1175 1176 /* Analyze and save the replacement text of a macro. Returns true on 1177 success. */ 1178 bool 1179 _cpp_create_trad_definition (cpp_reader *pfile, cpp_macro *macro) 1180 { 1181 const uchar *cur; 1182 uchar *limit; 1183 cpp_context *context = pfile->context; 1184 1185 /* The context has not been set up for command line defines, and CUR 1186 has not been updated for the macro name for in-file defines. */ 1187 pfile->out.cur = pfile->out.base; 1188 CUR (context) = pfile->buffer->cur; 1189 RLIMIT (context) = pfile->buffer->rlimit; 1190 check_output_buffer (pfile, RLIMIT (context) - CUR (context)); 1191 1192 /* Is this a function-like macro? */ 1193 if (* CUR (context) == '(') 1194 { 1195 bool ok = scan_parameters (pfile, macro); 1196 1197 /* Remember the params so we can clear NODE_MACRO_ARG flags. */ 1198 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff); 1199 1200 /* Setting macro to NULL indicates an error occurred, and 1201 prevents unnecessary work in _cpp_scan_out_logical_line. */ 1202 if (!ok) 1203 macro = NULL; 1204 else 1205 { 1206 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc]; 1207 macro->fun_like = 1; 1208 } 1209 } 1210 1211 /* Skip leading whitespace in the replacement text. */ 1212 pfile->buffer->cur 1213 = skip_whitespace (pfile, CUR (context), 1214 CPP_OPTION (pfile, discard_comments_in_macro_exp)); 1215 1216 pfile->state.prevent_expansion++; 1217 _cpp_scan_out_logical_line (pfile, macro, false); 1218 pfile->state.prevent_expansion--; 1219 1220 if (!macro) 1221 return false; 1222 1223 /* Skip trailing white space. */ 1224 cur = pfile->out.base; 1225 limit = pfile->out.cur; 1226 while (limit > cur && is_space (limit[-1])) 1227 limit--; 1228 pfile->out.cur = limit; 1229 save_replacement_text (pfile, macro, 0); 1230 1231 return true; 1232 } 1233 1234 /* Copy SRC of length LEN to DEST, but convert all contiguous 1235 whitespace to a single space, provided it is not in quotes. The 1236 quote currently in effect is pointed to by PQUOTE, and is updated 1237 by the function. Returns the number of bytes copied. */ 1238 static size_t 1239 canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote) 1240 { 1241 uchar *orig_dest = dest; 1242 uchar quote = *pquote; 1243 1244 while (len) 1245 { 1246 if (is_space (*src) && !quote) 1247 { 1248 do 1249 src++, len--; 1250 while (len && is_space (*src)); 1251 *dest++ = ' '; 1252 } 1253 else 1254 { 1255 if (*src == '\'' || *src == '"') 1256 { 1257 if (!quote) 1258 quote = *src; 1259 else if (quote == *src) 1260 quote = 0; 1261 } 1262 *dest++ = *src++, len--; 1263 } 1264 } 1265 1266 *pquote = quote; 1267 return dest - orig_dest; 1268 } 1269 1270 /* Returns true if MACRO1 and MACRO2 have expansions different other 1271 than in the form of their whitespace. */ 1272 bool 1273 _cpp_expansions_different_trad (const cpp_macro *macro1, 1274 const cpp_macro *macro2) 1275 { 1276 uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count); 1277 uchar *p2 = p1 + macro1->count; 1278 uchar quote1 = 0, quote2 = 0; 1279 bool mismatch; 1280 size_t len1, len2; 1281 1282 if (macro1->paramc > 0) 1283 { 1284 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text; 1285 1286 mismatch = true; 1287 for (;;) 1288 { 1289 struct block *b1 = (struct block *) exp1; 1290 struct block *b2 = (struct block *) exp2; 1291 1292 if (b1->arg_index != b2->arg_index) 1293 break; 1294 1295 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1); 1296 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2); 1297 if (len1 != len2 || memcmp (p1, p2, len1)) 1298 break; 1299 if (b1->arg_index == 0) 1300 { 1301 mismatch = false; 1302 break; 1303 } 1304 exp1 += BLOCK_LEN (b1->text_len); 1305 exp2 += BLOCK_LEN (b2->text_len); 1306 } 1307 } 1308 else 1309 { 1310 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1); 1311 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2); 1312 mismatch = (len1 != len2 || memcmp (p1, p2, len1)); 1313 } 1314 1315 free (p1); 1316 return mismatch; 1317 } 1318