1 /* CPP Library - lexical analysis. 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3 Contributed by Per Bothner, 1994-95. 4 Based on CCCP program by Paul Rubin, June 1986 5 Adapted to ANSI C, Richard Stallman, Jan 1987 6 Broken out to separate file, Zack Weinberg, Mar 2000 7 8 This program is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 2, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "cpplib.h" 25 #include "internal.h" 26 27 enum spell_type 28 { 29 SPELL_OPERATOR = 0, 30 SPELL_IDENT, 31 SPELL_LITERAL, 32 SPELL_NONE 33 }; 34 35 struct token_spelling 36 { 37 enum spell_type category; 38 const unsigned char *name; 39 }; 40 41 static const unsigned char *const digraph_spellings[] = 42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; 43 44 #define OP(e, s) { SPELL_OPERATOR, U s }, 45 #define TK(e, s) { SPELL_ ## s, U #e }, 46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 47 #undef OP 48 #undef TK 49 50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name) 52 53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int); 54 static int skip_line_comment (cpp_reader *); 55 static void skip_whitespace (cpp_reader *, cppchar_t); 56 static void lex_string (cpp_reader *, cpp_token *, const uchar *); 57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); 58 static void create_literal (cpp_reader *, cpp_token *, const uchar *, 59 unsigned int, enum cpp_ttype); 60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *); 61 static int name_p (cpp_reader *, const cpp_string *); 62 static tokenrun *next_tokenrun (tokenrun *); 63 64 static _cpp_buff *new_buff (size_t); 65 66 67 /* Utility routine: 68 69 Compares, the token TOKEN to the NUL-terminated string STRING. 70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 71 int 72 cpp_ideq (const cpp_token *token, const char *string) 73 { 74 if (token->type != CPP_NAME) 75 return 0; 76 77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); 78 } 79 80 /* Record a note TYPE at byte POS into the current cleaned logical 81 line. */ 82 static void 83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) 84 { 85 if (buffer->notes_used == buffer->notes_cap) 86 { 87 buffer->notes_cap = buffer->notes_cap * 2 + 200; 88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes, 89 buffer->notes_cap); 90 } 91 92 buffer->notes[buffer->notes_used].pos = pos; 93 buffer->notes[buffer->notes_used].type = type; 94 buffer->notes_used++; 95 } 96 97 /* Returns with a logical line that contains no escaped newlines or 98 trigraphs. This is a time-critical inner loop. */ 99 void 100 _cpp_clean_line (cpp_reader *pfile) 101 { 102 cpp_buffer *buffer; 103 const uchar *s; 104 uchar c, *d, *p; 105 106 buffer = pfile->buffer; 107 buffer->cur_note = buffer->notes_used = 0; 108 buffer->cur = buffer->line_base = buffer->next_line; 109 buffer->need_line = false; 110 s = buffer->next_line - 1; 111 112 if (!buffer->from_stage3) 113 { 114 /* Short circuit for the common case of an un-escaped line with 115 no trigraphs. The primary win here is by not writing any 116 data back to memory until we have to. */ 117 for (;;) 118 { 119 c = *++s; 120 if (c == '\n' || c == '\r') 121 { 122 d = (uchar *) s; 123 124 if (s == buffer->rlimit) 125 goto done; 126 127 /* DOS line ending? */ 128 if (c == '\r' && s[1] == '\n') 129 s++; 130 131 if (s == buffer->rlimit) 132 goto done; 133 134 /* check for escaped newline */ 135 p = d; 136 while (p != buffer->next_line && is_nvspace (p[-1])) 137 p--; 138 if (p == buffer->next_line || p[-1] != '\\') 139 goto done; 140 141 /* Have an escaped newline; process it and proceed to 142 the slow path. */ 143 add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); 144 d = p - 2; 145 buffer->next_line = p - 1; 146 break; 147 } 148 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 149 { 150 /* Have a trigraph. We may or may not have to convert 151 it. Add a line note regardless, for -Wtrigraphs. */ 152 add_line_note (buffer, s, s[2]); 153 if (CPP_OPTION (pfile, trigraphs)) 154 { 155 /* We do, and that means we have to switch to the 156 slow path. */ 157 d = (uchar *) s; 158 *d = _cpp_trigraph_map[s[2]]; 159 s += 2; 160 break; 161 } 162 } 163 } 164 165 166 for (;;) 167 { 168 c = *++s; 169 *++d = c; 170 171 if (c == '\n' || c == '\r') 172 { 173 /* Handle DOS line endings. */ 174 if (c == '\r' && s != buffer->rlimit && s[1] == '\n') 175 s++; 176 if (s == buffer->rlimit) 177 break; 178 179 /* Escaped? */ 180 p = d; 181 while (p != buffer->next_line && is_nvspace (p[-1])) 182 p--; 183 if (p == buffer->next_line || p[-1] != '\\') 184 break; 185 186 add_line_note (buffer, p - 1, p != d ? ' ': '\\'); 187 d = p - 2; 188 buffer->next_line = p - 1; 189 } 190 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) 191 { 192 /* Add a note regardless, for the benefit of -Wtrigraphs. */ 193 add_line_note (buffer, d, s[2]); 194 if (CPP_OPTION (pfile, trigraphs)) 195 { 196 *d = _cpp_trigraph_map[s[2]]; 197 s += 2; 198 } 199 } 200 } 201 } 202 else 203 { 204 do 205 s++; 206 while (*s != '\n' && *s != '\r'); 207 d = (uchar *) s; 208 209 /* Handle DOS line endings. */ 210 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') 211 s++; 212 } 213 214 done: 215 *d = '\n'; 216 /* A sentinel note that should never be processed. */ 217 add_line_note (buffer, d + 1, '\n'); 218 buffer->next_line = s + 1; 219 } 220 221 /* Return true if the trigraph indicated by NOTE should be warned 222 about in a comment. */ 223 static bool 224 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) 225 { 226 const uchar *p; 227 228 /* Within comments we don't warn about trigraphs, unless the 229 trigraph forms an escaped newline, as that may change 230 behavior. */ 231 if (note->type != '/') 232 return false; 233 234 /* If -trigraphs, then this was an escaped newline iff the next note 235 is coincident. */ 236 if (CPP_OPTION (pfile, trigraphs)) 237 return note[1].pos == note->pos; 238 239 /* Otherwise, see if this forms an escaped newline. */ 240 p = note->pos + 3; 241 while (is_nvspace (*p)) 242 p++; 243 244 /* There might have been escaped newlines between the trigraph and the 245 newline we found. Hence the position test. */ 246 return (*p == '\n' && p < note[1].pos); 247 } 248 249 /* Process the notes created by add_line_note as far as the current 250 location. */ 251 void 252 _cpp_process_line_notes (cpp_reader *pfile, int in_comment) 253 { 254 cpp_buffer *buffer = pfile->buffer; 255 256 for (;;) 257 { 258 _cpp_line_note *note = &buffer->notes[buffer->cur_note]; 259 unsigned int col; 260 261 if (note->pos > buffer->cur) 262 break; 263 264 buffer->cur_note++; 265 col = CPP_BUF_COLUMN (buffer, note->pos + 1); 266 267 if (note->type == '\\' || note->type == ' ') 268 { 269 if (note->type == ' ' && !in_comment) 270 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 271 "backslash and newline separated by space"); 272 273 if (buffer->next_line > buffer->rlimit) 274 { 275 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, 276 "backslash-newline at end of file"); 277 /* Prevent "no newline at end of file" warning. */ 278 buffer->next_line = buffer->rlimit; 279 } 280 281 buffer->line_base = note->pos; 282 CPP_INCREMENT_LINE (pfile, 0); 283 } 284 else if (_cpp_trigraph_map[note->type]) 285 { 286 if (CPP_OPTION (pfile, warn_trigraphs) 287 && (!in_comment || warn_in_comment (pfile, note))) 288 { 289 if (CPP_OPTION (pfile, trigraphs)) 290 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 291 "trigraph ??%c converted to %c", 292 note->type, 293 (int) _cpp_trigraph_map[note->type]); 294 else 295 { 296 cpp_error_with_line 297 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, 298 "trigraph ??%c ignored, use -trigraphs to enable", 299 note->type); 300 } 301 } 302 } 303 else 304 abort (); 305 } 306 } 307 308 /* Skip a C-style block comment. We find the end of the comment by 309 seeing if an asterisk is before every '/' we encounter. Returns 310 nonzero if comment terminated by EOF, zero otherwise. 311 312 Buffer->cur points to the initial asterisk of the comment. */ 313 bool 314 _cpp_skip_block_comment (cpp_reader *pfile) 315 { 316 cpp_buffer *buffer = pfile->buffer; 317 const uchar *cur = buffer->cur; 318 uchar c; 319 320 cur++; 321 if (*cur == '/') 322 cur++; 323 324 for (;;) 325 { 326 /* People like decorating comments with '*', so check for '/' 327 instead for efficiency. */ 328 c = *cur++; 329 330 if (c == '/') 331 { 332 if (cur[-2] == '*') 333 break; 334 335 /* Warn about potential nested comments, but not if the '/' 336 comes immediately before the true comment delimiter. 337 Don't bother to get it right across escaped newlines. */ 338 if (CPP_OPTION (pfile, warn_comments) 339 && cur[0] == '*' && cur[1] != '/') 340 { 341 buffer->cur = cur; 342 cpp_error_with_line (pfile, CPP_DL_WARNING, 343 pfile->line_table->highest_line, CPP_BUF_COL (buffer), 344 "\"/*\" within comment"); 345 } 346 } 347 else if (c == '\n') 348 { 349 unsigned int cols; 350 buffer->cur = cur - 1; 351 _cpp_process_line_notes (pfile, true); 352 if (buffer->next_line >= buffer->rlimit) 353 return true; 354 _cpp_clean_line (pfile); 355 356 cols = buffer->next_line - buffer->line_base; 357 CPP_INCREMENT_LINE (pfile, cols); 358 359 cur = buffer->cur; 360 } 361 } 362 363 buffer->cur = cur; 364 _cpp_process_line_notes (pfile, true); 365 return false; 366 } 367 368 /* Skip a C++ line comment, leaving buffer->cur pointing to the 369 terminating newline. Handles escaped newlines. Returns nonzero 370 if a multiline comment. */ 371 static int 372 skip_line_comment (cpp_reader *pfile) 373 { 374 cpp_buffer *buffer = pfile->buffer; 375 unsigned int orig_line = pfile->line_table->highest_line; 376 377 while (*buffer->cur != '\n') 378 buffer->cur++; 379 380 _cpp_process_line_notes (pfile, true); 381 return orig_line != pfile->line_table->highest_line; 382 } 383 384 /* Skips whitespace, saving the next non-whitespace character. */ 385 static void 386 skip_whitespace (cpp_reader *pfile, cppchar_t c) 387 { 388 cpp_buffer *buffer = pfile->buffer; 389 bool saw_NUL = false; 390 391 do 392 { 393 /* Horizontal space always OK. */ 394 if (c == ' ' || c == '\t') 395 ; 396 /* Just \f \v or \0 left. */ 397 else if (c == '\0') 398 saw_NUL = true; 399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 400 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 401 CPP_BUF_COL (buffer), 402 "%s in preprocessing directive", 403 c == '\f' ? "form feed" : "vertical tab"); 404 405 c = *buffer->cur++; 406 } 407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 408 while (is_nvspace (c)); 409 410 if (saw_NUL) 411 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); 412 413 buffer->cur--; 414 } 415 416 /* See if the characters of a number token are valid in a name (no 417 '.', '+' or '-'). */ 418 static int 419 name_p (cpp_reader *pfile, const cpp_string *string) 420 { 421 unsigned int i; 422 423 for (i = 0; i < string->len; i++) 424 if (!is_idchar (string->text[i])) 425 return 0; 426 427 return 1; 428 } 429 430 /* After parsing an identifier or other sequence, produce a warning about 431 sequences not in NFC/NFKC. */ 432 static void 433 warn_about_normalization (cpp_reader *pfile, 434 const cpp_token *token, 435 const struct normalize_state *s) 436 { 437 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s) 438 && !pfile->state.skipping) 439 { 440 /* Make sure that the token is printed using UCNs, even 441 if we'd otherwise happily print UTF-8. */ 442 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token)); 443 size_t sz; 444 445 sz = cpp_spell_token (pfile, token, buf, false) - buf; 446 if (NORMALIZE_STATE_RESULT (s) == normalized_C) 447 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 448 "`%.*s' is not in NFKC", (int) sz, buf); 449 else 450 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0, 451 "`%.*s' is not in NFC", (int) sz, buf); 452 } 453 } 454 455 /* Returns TRUE if the sequence starting at buffer->cur is invalid in 456 an identifier. FIRST is TRUE if this starts an identifier. */ 457 static bool 458 forms_identifier_p (cpp_reader *pfile, int first, 459 struct normalize_state *state) 460 { 461 cpp_buffer *buffer = pfile->buffer; 462 463 if (*buffer->cur == '$') 464 { 465 if (!CPP_OPTION (pfile, dollars_in_ident)) 466 return false; 467 468 buffer->cur++; 469 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) 470 { 471 CPP_OPTION (pfile, warn_dollars) = 0; 472 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); 473 } 474 475 return true; 476 } 477 478 /* Is this a syntactically valid UCN? */ 479 if (CPP_OPTION (pfile, extended_identifiers) 480 && *buffer->cur == '\\' 481 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) 482 { 483 buffer->cur += 2; 484 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, 485 state)) 486 return true; 487 buffer->cur -= 2; 488 } 489 490 return false; 491 } 492 493 /* Lex an identifier starting at BUFFER->CUR - 1. */ 494 static cpp_hashnode * 495 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, 496 struct normalize_state *nst) 497 { 498 cpp_hashnode *result; 499 const uchar *cur; 500 unsigned int len; 501 unsigned int hash = HT_HASHSTEP (0, *base); 502 503 cur = pfile->buffer->cur; 504 if (! starts_ucn) 505 while (ISIDNUM (*cur)) 506 { 507 hash = HT_HASHSTEP (hash, *cur); 508 cur++; 509 } 510 pfile->buffer->cur = cur; 511 if (starts_ucn || forms_identifier_p (pfile, false, nst)) 512 { 513 /* Slower version for identifiers containing UCNs (or $). */ 514 do { 515 while (ISIDNUM (*pfile->buffer->cur)) 516 { 517 pfile->buffer->cur++; 518 NORMALIZE_STATE_UPDATE_IDNUM (nst); 519 } 520 } while (forms_identifier_p (pfile, false, nst)); 521 result = _cpp_interpret_identifier (pfile, base, 522 pfile->buffer->cur - base); 523 } 524 else 525 { 526 len = cur - base; 527 hash = HT_HASHFINISH (hash, len); 528 529 result = (cpp_hashnode *) 530 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC); 531 } 532 533 /* Rarely, identifiers require diagnostics when lexed. */ 534 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 535 && !pfile->state.skipping, 0)) 536 { 537 /* It is allowed to poison the same identifier twice. */ 538 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 539 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", 540 NODE_NAME (result)); 541 542 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 543 replacement list of a variadic macro. */ 544 if (result == pfile->spec_nodes.n__VA_ARGS__ 545 && !pfile->state.va_args_ok) 546 cpp_error (pfile, CPP_DL_PEDWARN, 547 "__VA_ARGS__ can only appear in the expansion" 548 " of a C99 variadic macro"); 549 } 550 551 return result; 552 } 553 554 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ 555 static void 556 lex_number (cpp_reader *pfile, cpp_string *number, 557 struct normalize_state *nst) 558 { 559 const uchar *cur; 560 const uchar *base; 561 uchar *dest; 562 563 base = pfile->buffer->cur - 1; 564 do 565 { 566 cur = pfile->buffer->cur; 567 568 /* N.B. ISIDNUM does not include $. */ 569 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 570 { 571 cur++; 572 NORMALIZE_STATE_UPDATE_IDNUM (nst); 573 } 574 575 pfile->buffer->cur = cur; 576 } 577 while (forms_identifier_p (pfile, false, nst)); 578 579 number->len = cur - base; 580 dest = _cpp_unaligned_alloc (pfile, number->len + 1); 581 memcpy (dest, base, number->len); 582 dest[number->len] = '\0'; 583 number->text = dest; 584 } 585 586 /* Create a token of type TYPE with a literal spelling. */ 587 static void 588 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, 589 unsigned int len, enum cpp_ttype type) 590 { 591 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); 592 593 memcpy (dest, base, len); 594 dest[len] = '\0'; 595 token->type = type; 596 token->val.str.len = len; 597 token->val.str.text = dest; 598 } 599 600 /* Lexes a string, character constant, or angle-bracketed header file 601 name. The stored string contains the spelling, including opening 602 quote and leading any leading 'L'. It returns the type of the 603 literal, or CPP_OTHER if it was not properly terminated. 604 605 The spelling is NUL-terminated, but it is not guaranteed that this 606 is the first NUL since embedded NULs are preserved. */ 607 static void 608 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) 609 { 610 bool saw_NUL = false; 611 const uchar *cur; 612 cppchar_t terminator; 613 enum cpp_ttype type; 614 615 cur = base; 616 terminator = *cur++; 617 if (terminator == 'L') 618 terminator = *cur++; 619 if (terminator == '\"') 620 type = *base == 'L' ? CPP_WSTRING: CPP_STRING; 621 else if (terminator == '\'') 622 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; 623 else 624 terminator = '>', type = CPP_HEADER_NAME; 625 626 for (;;) 627 { 628 cppchar_t c = *cur++; 629 630 /* In #include-style directives, terminators are not escapable. */ 631 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') 632 cur++; 633 else if (c == terminator) 634 break; 635 else if (c == '\n') 636 { 637 cur--; 638 type = CPP_OTHER; 639 break; 640 } 641 else if (c == '\0') 642 saw_NUL = true; 643 } 644 645 if (saw_NUL && !pfile->state.skipping) 646 cpp_error (pfile, CPP_DL_WARNING, 647 "null character(s) preserved in literal"); 648 649 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM) 650 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character", 651 (int) terminator); 652 653 pfile->buffer->cur = cur; 654 create_literal (pfile, token, base, cur - base, type); 655 } 656 657 /* The stored comment includes the comment start and any terminator. */ 658 static void 659 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, 660 cppchar_t type) 661 { 662 unsigned char *buffer; 663 unsigned int len, clen; 664 665 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 666 667 /* C++ comments probably (not definitely) have moved past a new 668 line, which we don't want to save in the comment. */ 669 if (is_vspace (pfile->buffer->cur[-1])) 670 len--; 671 672 /* If we are currently in a directive, then we need to store all 673 C++ comments as C comments internally, and so we need to 674 allocate a little extra space in that case. 675 676 Note that the only time we encounter a directive here is 677 when we are saving comments in a "#define". */ 678 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; 679 680 buffer = _cpp_unaligned_alloc (pfile, clen); 681 682 token->type = CPP_COMMENT; 683 token->val.str.len = clen; 684 token->val.str.text = buffer; 685 686 buffer[0] = '/'; 687 memcpy (buffer + 1, from, len - 1); 688 689 /* Finish conversion to a C comment, if necessary. */ 690 if (pfile->state.in_directive && type == '/') 691 { 692 buffer[1] = '*'; 693 buffer[clen - 2] = '*'; 694 buffer[clen - 1] = '/'; 695 } 696 } 697 698 /* Allocate COUNT tokens for RUN. */ 699 void 700 _cpp_init_tokenrun (tokenrun *run, unsigned int count) 701 { 702 run->base = XNEWVEC (cpp_token, count); 703 run->limit = run->base + count; 704 run->next = NULL; 705 } 706 707 /* Returns the next tokenrun, or creates one if there is none. */ 708 static tokenrun * 709 next_tokenrun (tokenrun *run) 710 { 711 if (run->next == NULL) 712 { 713 run->next = XNEW (tokenrun); 714 run->next->prev = run; 715 _cpp_init_tokenrun (run->next, 250); 716 } 717 718 return run->next; 719 } 720 721 /* Allocate a single token that is invalidated at the same time as the 722 rest of the tokens on the line. Has its line and col set to the 723 same as the last lexed token, so that diagnostics appear in the 724 right place. */ 725 cpp_token * 726 _cpp_temp_token (cpp_reader *pfile) 727 { 728 cpp_token *old, *result; 729 730 old = pfile->cur_token - 1; 731 if (pfile->cur_token == pfile->cur_run->limit) 732 { 733 pfile->cur_run = next_tokenrun (pfile->cur_run); 734 pfile->cur_token = pfile->cur_run->base; 735 } 736 737 result = pfile->cur_token++; 738 result->src_loc = old->src_loc; 739 return result; 740 } 741 742 /* Lex a token into RESULT (external interface). Takes care of issues 743 like directive handling, token lookahead, multiple include 744 optimization and skipping. */ 745 const cpp_token * 746 _cpp_lex_token (cpp_reader *pfile) 747 { 748 cpp_token *result; 749 750 for (;;) 751 { 752 if (pfile->cur_token == pfile->cur_run->limit) 753 { 754 pfile->cur_run = next_tokenrun (pfile->cur_run); 755 pfile->cur_token = pfile->cur_run->base; 756 } 757 758 if (pfile->lookaheads) 759 { 760 pfile->lookaheads--; 761 result = pfile->cur_token++; 762 } 763 else 764 result = _cpp_lex_direct (pfile); 765 766 if (result->flags & BOL) 767 { 768 /* Is this a directive. If _cpp_handle_directive returns 769 false, it is an assembler #. */ 770 if (result->type == CPP_HASH 771 /* 6.10.3 p 11: Directives in a list of macro arguments 772 gives undefined behavior. This implementation 773 handles the directive as normal. */ 774 && pfile->state.parsing_args != 1) 775 { 776 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 777 { 778 if (pfile->directive_result.type == CPP_PADDING) 779 continue; 780 result = &pfile->directive_result; 781 } 782 } 783 else if (pfile->state.in_deferred_pragma) 784 result = &pfile->directive_result; 785 786 if (pfile->cb.line_change && !pfile->state.skipping) 787 pfile->cb.line_change (pfile, result, pfile->state.parsing_args); 788 } 789 790 /* We don't skip tokens in directives. */ 791 if (pfile->state.in_directive || pfile->state.in_deferred_pragma) 792 break; 793 794 /* Outside a directive, invalidate controlling macros. At file 795 EOF, _cpp_lex_direct takes care of popping the buffer, so we never 796 get here and MI optimization works. */ 797 pfile->mi_valid = false; 798 799 if (!pfile->state.skipping || result->type == CPP_EOF) 800 break; 801 } 802 803 return result; 804 } 805 806 /* Returns true if a fresh line has been loaded. */ 807 bool 808 _cpp_get_fresh_line (cpp_reader *pfile) 809 { 810 int return_at_eof; 811 812 /* We can't get a new line until we leave the current directive. */ 813 if (pfile->state.in_directive) 814 return false; 815 816 for (;;) 817 { 818 cpp_buffer *buffer = pfile->buffer; 819 820 if (!buffer->need_line) 821 return true; 822 823 if (buffer->next_line < buffer->rlimit) 824 { 825 _cpp_clean_line (pfile); 826 return true; 827 } 828 829 /* First, get out of parsing arguments state. */ 830 if (pfile->state.parsing_args) 831 return false; 832 833 /* End of buffer. Non-empty files should end in a newline. */ 834 if (buffer->buf != buffer->rlimit 835 && buffer->next_line > buffer->rlimit 836 && !buffer->from_stage3) 837 { 838 /* Only warn once. */ 839 buffer->next_line = buffer->rlimit; 840 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, 841 CPP_BUF_COLUMN (buffer, buffer->cur), 842 "no newline at end of file"); 843 } 844 845 return_at_eof = buffer->return_at_eof; 846 _cpp_pop_buffer (pfile); 847 if (pfile->buffer == NULL || return_at_eof) 848 return false; 849 } 850 } 851 852 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 853 do \ 854 { \ 855 result->type = ELSE_TYPE; \ 856 if (*buffer->cur == CHAR) \ 857 buffer->cur++, result->type = THEN_TYPE; \ 858 } \ 859 while (0) 860 861 /* Lex a token into pfile->cur_token, which is also incremented, to 862 get diagnostics pointing to the correct location. 863 864 Does not handle issues such as token lookahead, multiple-include 865 optimization, directives, skipping etc. This function is only 866 suitable for use by _cpp_lex_token, and in special cases like 867 lex_expansion_token which doesn't care for any of these issues. 868 869 When meeting a newline, returns CPP_EOF if parsing a directive, 870 otherwise returns to the start of the token buffer if permissible. 871 Returns the location of the lexed token. */ 872 cpp_token * 873 _cpp_lex_direct (cpp_reader *pfile) 874 { 875 cppchar_t c; 876 cpp_buffer *buffer; 877 const unsigned char *comment_start; 878 cpp_token *result = pfile->cur_token++; 879 880 fresh_line: 881 result->flags = 0; 882 buffer = pfile->buffer; 883 if (buffer->need_line) 884 { 885 if (pfile->state.in_deferred_pragma) 886 { 887 result->type = CPP_PRAGMA_EOL; 888 pfile->state.in_deferred_pragma = false; 889 if (!pfile->state.pragma_allow_expansion) 890 pfile->state.prevent_expansion--; 891 return result; 892 } 893 if (!_cpp_get_fresh_line (pfile)) 894 { 895 result->type = CPP_EOF; 896 if (!pfile->state.in_directive) 897 { 898 /* Tell the compiler the line number of the EOF token. */ 899 result->src_loc = pfile->line_table->highest_line; 900 result->flags = BOL; 901 } 902 return result; 903 } 904 if (!pfile->keep_tokens) 905 { 906 pfile->cur_run = &pfile->base_run; 907 result = pfile->base_run.base; 908 pfile->cur_token = result + 1; 909 } 910 result->flags = BOL; 911 if (pfile->state.parsing_args == 2) 912 result->flags |= PREV_WHITE; 913 } 914 buffer = pfile->buffer; 915 update_tokens_line: 916 result->src_loc = pfile->line_table->highest_line; 917 918 skipped_white: 919 if (buffer->cur >= buffer->notes[buffer->cur_note].pos 920 && !pfile->overlaid_buffer) 921 { 922 _cpp_process_line_notes (pfile, false); 923 result->src_loc = pfile->line_table->highest_line; 924 } 925 c = *buffer->cur++; 926 927 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, 928 CPP_BUF_COLUMN (buffer, buffer->cur)); 929 930 switch (c) 931 { 932 case ' ': case '\t': case '\f': case '\v': case '\0': 933 result->flags |= PREV_WHITE; 934 skip_whitespace (pfile, c); 935 goto skipped_white; 936 937 case '\n': 938 if (buffer->cur < buffer->rlimit) 939 CPP_INCREMENT_LINE (pfile, 0); 940 buffer->need_line = true; 941 goto fresh_line; 942 943 case '0': case '1': case '2': case '3': case '4': 944 case '5': case '6': case '7': case '8': case '9': 945 { 946 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 947 result->type = CPP_NUMBER; 948 lex_number (pfile, &result->val.str, &nst); 949 warn_about_normalization (pfile, result, &nst); 950 break; 951 } 952 953 case 'L': 954 /* 'L' may introduce wide characters or strings. */ 955 if (*buffer->cur == '\'' || *buffer->cur == '"') 956 { 957 lex_string (pfile, result, buffer->cur - 1); 958 break; 959 } 960 /* Fall through. */ 961 962 case '_': 963 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 964 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 965 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 966 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 967 case 'y': case 'z': 968 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 969 case 'G': case 'H': case 'I': case 'J': case 'K': 970 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 971 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 972 case 'Y': case 'Z': 973 result->type = CPP_NAME; 974 { 975 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 976 result->val.node = lex_identifier (pfile, buffer->cur - 1, false, 977 &nst); 978 warn_about_normalization (pfile, result, &nst); 979 } 980 981 /* Convert named operators to their proper types. */ 982 if (result->val.node->flags & NODE_OPERATOR) 983 { 984 result->flags |= NAMED_OP; 985 result->type = (enum cpp_ttype) result->val.node->directive_index; 986 } 987 break; 988 989 case '\'': 990 case '"': 991 lex_string (pfile, result, buffer->cur - 1); 992 break; 993 994 case '/': 995 /* A potential block or line comment. */ 996 comment_start = buffer->cur; 997 c = *buffer->cur; 998 999 if (c == '*') 1000 { 1001 if (_cpp_skip_block_comment (pfile)) 1002 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); 1003 } 1004 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 1005 || cpp_in_system_header (pfile))) 1006 { 1007 /* Warn about comments only if pedantically GNUC89, and not 1008 in system headers. */ 1009 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 1010 && ! buffer->warned_cplusplus_comments) 1011 { 1012 cpp_error (pfile, CPP_DL_PEDWARN, 1013 "C++ style comments are not allowed in ISO C90"); 1014 cpp_error (pfile, CPP_DL_PEDWARN, 1015 "(this will be reported only once per input file)"); 1016 buffer->warned_cplusplus_comments = 1; 1017 } 1018 1019 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 1020 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); 1021 } 1022 else if (c == '=') 1023 { 1024 buffer->cur++; 1025 result->type = CPP_DIV_EQ; 1026 break; 1027 } 1028 else 1029 { 1030 result->type = CPP_DIV; 1031 break; 1032 } 1033 1034 if (!pfile->state.save_comments) 1035 { 1036 result->flags |= PREV_WHITE; 1037 goto update_tokens_line; 1038 } 1039 1040 /* Save the comment as a token in its own right. */ 1041 save_comment (pfile, result, comment_start, c); 1042 break; 1043 1044 case '<': 1045 if (pfile->state.angled_headers) 1046 { 1047 lex_string (pfile, result, buffer->cur - 1); 1048 break; 1049 } 1050 1051 result->type = CPP_LESS; 1052 if (*buffer->cur == '=') 1053 buffer->cur++, result->type = CPP_LESS_EQ; 1054 else if (*buffer->cur == '<') 1055 { 1056 buffer->cur++; 1057 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 1058 } 1059 else if (CPP_OPTION (pfile, digraphs)) 1060 { 1061 if (*buffer->cur == ':') 1062 { 1063 buffer->cur++; 1064 result->flags |= DIGRAPH; 1065 result->type = CPP_OPEN_SQUARE; 1066 } 1067 else if (*buffer->cur == '%') 1068 { 1069 buffer->cur++; 1070 result->flags |= DIGRAPH; 1071 result->type = CPP_OPEN_BRACE; 1072 } 1073 } 1074 break; 1075 1076 case '>': 1077 result->type = CPP_GREATER; 1078 if (*buffer->cur == '=') 1079 buffer->cur++, result->type = CPP_GREATER_EQ; 1080 else if (*buffer->cur == '>') 1081 { 1082 buffer->cur++; 1083 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 1084 } 1085 break; 1086 1087 case '%': 1088 result->type = CPP_MOD; 1089 if (*buffer->cur == '=') 1090 buffer->cur++, result->type = CPP_MOD_EQ; 1091 else if (CPP_OPTION (pfile, digraphs)) 1092 { 1093 if (*buffer->cur == ':') 1094 { 1095 buffer->cur++; 1096 result->flags |= DIGRAPH; 1097 result->type = CPP_HASH; 1098 if (*buffer->cur == '%' && buffer->cur[1] == ':') 1099 buffer->cur += 2, result->type = CPP_PASTE; 1100 } 1101 else if (*buffer->cur == '>') 1102 { 1103 buffer->cur++; 1104 result->flags |= DIGRAPH; 1105 result->type = CPP_CLOSE_BRACE; 1106 } 1107 } 1108 break; 1109 1110 case '.': 1111 result->type = CPP_DOT; 1112 if (ISDIGIT (*buffer->cur)) 1113 { 1114 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1115 result->type = CPP_NUMBER; 1116 lex_number (pfile, &result->val.str, &nst); 1117 warn_about_normalization (pfile, result, &nst); 1118 } 1119 else if (*buffer->cur == '.' && buffer->cur[1] == '.') 1120 buffer->cur += 2, result->type = CPP_ELLIPSIS; 1121 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1122 buffer->cur++, result->type = CPP_DOT_STAR; 1123 break; 1124 1125 case '+': 1126 result->type = CPP_PLUS; 1127 if (*buffer->cur == '+') 1128 buffer->cur++, result->type = CPP_PLUS_PLUS; 1129 else if (*buffer->cur == '=') 1130 buffer->cur++, result->type = CPP_PLUS_EQ; 1131 break; 1132 1133 case '-': 1134 result->type = CPP_MINUS; 1135 if (*buffer->cur == '>') 1136 { 1137 buffer->cur++; 1138 result->type = CPP_DEREF; 1139 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) 1140 buffer->cur++, result->type = CPP_DEREF_STAR; 1141 } 1142 else if (*buffer->cur == '-') 1143 buffer->cur++, result->type = CPP_MINUS_MINUS; 1144 else if (*buffer->cur == '=') 1145 buffer->cur++, result->type = CPP_MINUS_EQ; 1146 break; 1147 1148 case '&': 1149 result->type = CPP_AND; 1150 if (*buffer->cur == '&') 1151 buffer->cur++, result->type = CPP_AND_AND; 1152 else if (*buffer->cur == '=') 1153 buffer->cur++, result->type = CPP_AND_EQ; 1154 break; 1155 1156 case '|': 1157 result->type = CPP_OR; 1158 if (*buffer->cur == '|') 1159 buffer->cur++, result->type = CPP_OR_OR; 1160 else if (*buffer->cur == '=') 1161 buffer->cur++, result->type = CPP_OR_EQ; 1162 break; 1163 1164 case ':': 1165 result->type = CPP_COLON; 1166 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) 1167 buffer->cur++, result->type = CPP_SCOPE; 1168 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) 1169 { 1170 buffer->cur++; 1171 result->flags |= DIGRAPH; 1172 result->type = CPP_CLOSE_SQUARE; 1173 } 1174 break; 1175 1176 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 1177 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 1178 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 1179 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 1180 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; 1181 1182 case '?': result->type = CPP_QUERY; break; 1183 case '~': result->type = CPP_COMPL; break; 1184 case ',': result->type = CPP_COMMA; break; 1185 case '(': result->type = CPP_OPEN_PAREN; break; 1186 case ')': result->type = CPP_CLOSE_PAREN; break; 1187 case '[': result->type = CPP_OPEN_SQUARE; break; 1188 case ']': result->type = CPP_CLOSE_SQUARE; break; 1189 case '{': result->type = CPP_OPEN_BRACE; break; 1190 case '}': result->type = CPP_CLOSE_BRACE; break; 1191 case ';': result->type = CPP_SEMICOLON; break; 1192 1193 /* @ is a punctuator in Objective-C. */ 1194 case '@': result->type = CPP_ATSIGN; break; 1195 1196 case '$': 1197 case '\\': 1198 { 1199 const uchar *base = --buffer->cur; 1200 struct normalize_state nst = INITIAL_NORMALIZE_STATE; 1201 1202 if (forms_identifier_p (pfile, true, &nst)) 1203 { 1204 result->type = CPP_NAME; 1205 result->val.node = lex_identifier (pfile, base, true, &nst); 1206 warn_about_normalization (pfile, result, &nst); 1207 break; 1208 } 1209 buffer->cur++; 1210 } 1211 1212 default: 1213 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); 1214 break; 1215 } 1216 1217 return result; 1218 } 1219 1220 /* An upper bound on the number of bytes needed to spell TOKEN. 1221 Does not include preceding whitespace. */ 1222 unsigned int 1223 cpp_token_len (const cpp_token *token) 1224 { 1225 unsigned int len; 1226 1227 switch (TOKEN_SPELL (token)) 1228 { 1229 default: len = 4; break; 1230 case SPELL_LITERAL: len = token->val.str.len; break; 1231 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break; 1232 } 1233 1234 return len; 1235 } 1236 1237 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. 1238 Return the number of bytes read out of NAME. (There are always 1239 10 bytes written to BUFFER.) */ 1240 1241 static size_t 1242 utf8_to_ucn (unsigned char *buffer, const unsigned char *name) 1243 { 1244 int j; 1245 int ucn_len = 0; 1246 int ucn_len_c; 1247 unsigned t; 1248 unsigned long utf32; 1249 1250 /* Compute the length of the UTF-8 sequence. */ 1251 for (t = *name; t & 0x80; t <<= 1) 1252 ucn_len++; 1253 1254 utf32 = *name & (0x7F >> ucn_len); 1255 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) 1256 { 1257 utf32 = (utf32 << 6) | (*++name & 0x3F); 1258 1259 /* Ill-formed UTF-8. */ 1260 if ((*name & ~0x3F) != 0x80) 1261 abort (); 1262 } 1263 1264 *buffer++ = '\\'; 1265 *buffer++ = 'U'; 1266 for (j = 7; j >= 0; j--) 1267 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; 1268 return ucn_len; 1269 } 1270 1271 1272 /* Write the spelling of a token TOKEN to BUFFER. The buffer must 1273 already contain the enough space to hold the token's spelling. 1274 Returns a pointer to the character after the last character written. 1275 FORSTRING is true if this is to be the spelling after translation 1276 phase 1 (this is different for UCNs). 1277 FIXME: Would be nice if we didn't need the PFILE argument. */ 1278 unsigned char * 1279 cpp_spell_token (cpp_reader *pfile, const cpp_token *token, 1280 unsigned char *buffer, bool forstring) 1281 { 1282 switch (TOKEN_SPELL (token)) 1283 { 1284 case SPELL_OPERATOR: 1285 { 1286 const unsigned char *spelling; 1287 unsigned char c; 1288 1289 if (token->flags & DIGRAPH) 1290 spelling 1291 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1292 else if (token->flags & NAMED_OP) 1293 goto spell_ident; 1294 else 1295 spelling = TOKEN_NAME (token); 1296 1297 while ((c = *spelling++) != '\0') 1298 *buffer++ = c; 1299 } 1300 break; 1301 1302 spell_ident: 1303 case SPELL_IDENT: 1304 if (forstring) 1305 { 1306 memcpy (buffer, NODE_NAME (token->val.node), 1307 NODE_LEN (token->val.node)); 1308 buffer += NODE_LEN (token->val.node); 1309 } 1310 else 1311 { 1312 size_t i; 1313 const unsigned char * name = NODE_NAME (token->val.node); 1314 1315 for (i = 0; i < NODE_LEN (token->val.node); i++) 1316 if (name[i] & ~0x7F) 1317 { 1318 i += utf8_to_ucn (buffer, name + i) - 1; 1319 buffer += 10; 1320 } 1321 else 1322 *buffer++ = NODE_NAME (token->val.node)[i]; 1323 } 1324 break; 1325 1326 case SPELL_LITERAL: 1327 memcpy (buffer, token->val.str.text, token->val.str.len); 1328 buffer += token->val.str.len; 1329 break; 1330 1331 case SPELL_NONE: 1332 cpp_error (pfile, CPP_DL_ICE, 1333 "unspellable token %s", TOKEN_NAME (token)); 1334 break; 1335 } 1336 1337 return buffer; 1338 } 1339 1340 /* Returns TOKEN spelt as a null-terminated string. The string is 1341 freed when the reader is destroyed. Useful for diagnostics. */ 1342 unsigned char * 1343 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) 1344 { 1345 unsigned int len = cpp_token_len (token) + 1; 1346 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 1347 1348 end = cpp_spell_token (pfile, token, start, false); 1349 end[0] = '\0'; 1350 1351 return start; 1352 } 1353 1354 /* Used by C front ends, which really should move to using 1355 cpp_token_as_text. */ 1356 const char * 1357 cpp_type2name (enum cpp_ttype type) 1358 { 1359 return (const char *) token_spellings[type].name; 1360 } 1361 1362 /* Writes the spelling of token to FP, without any preceding space. 1363 Separated from cpp_spell_token for efficiency - to avoid stdio 1364 double-buffering. */ 1365 void 1366 cpp_output_token (const cpp_token *token, FILE *fp) 1367 { 1368 switch (TOKEN_SPELL (token)) 1369 { 1370 case SPELL_OPERATOR: 1371 { 1372 const unsigned char *spelling; 1373 int c; 1374 1375 if (token->flags & DIGRAPH) 1376 spelling 1377 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1378 else if (token->flags & NAMED_OP) 1379 goto spell_ident; 1380 else 1381 spelling = TOKEN_NAME (token); 1382 1383 c = *spelling; 1384 do 1385 putc (c, fp); 1386 while ((c = *++spelling) != '\0'); 1387 } 1388 break; 1389 1390 spell_ident: 1391 case SPELL_IDENT: 1392 { 1393 size_t i; 1394 const unsigned char * name = NODE_NAME (token->val.node); 1395 1396 for (i = 0; i < NODE_LEN (token->val.node); i++) 1397 if (name[i] & ~0x7F) 1398 { 1399 unsigned char buffer[10]; 1400 i += utf8_to_ucn (buffer, name + i) - 1; 1401 fwrite (buffer, 1, 10, fp); 1402 } 1403 else 1404 fputc (NODE_NAME (token->val.node)[i], fp); 1405 } 1406 break; 1407 1408 case SPELL_LITERAL: 1409 fwrite (token->val.str.text, 1, token->val.str.len, fp); 1410 break; 1411 1412 case SPELL_NONE: 1413 /* An error, most probably. */ 1414 break; 1415 } 1416 } 1417 1418 /* Compare two tokens. */ 1419 int 1420 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) 1421 { 1422 if (a->type == b->type && a->flags == b->flags) 1423 switch (TOKEN_SPELL (a)) 1424 { 1425 default: /* Keep compiler happy. */ 1426 case SPELL_OPERATOR: 1427 return 1; 1428 case SPELL_NONE: 1429 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); 1430 case SPELL_IDENT: 1431 return a->val.node == b->val.node; 1432 case SPELL_LITERAL: 1433 return (a->val.str.len == b->val.str.len 1434 && !memcmp (a->val.str.text, b->val.str.text, 1435 a->val.str.len)); 1436 } 1437 1438 return 0; 1439 } 1440 1441 /* Returns nonzero if a space should be inserted to avoid an 1442 accidental token paste for output. For simplicity, it is 1443 conservative, and occasionally advises a space where one is not 1444 needed, e.g. "." and ".2". */ 1445 int 1446 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, 1447 const cpp_token *token2) 1448 { 1449 enum cpp_ttype a = token1->type, b = token2->type; 1450 cppchar_t c; 1451 1452 if (token1->flags & NAMED_OP) 1453 a = CPP_NAME; 1454 if (token2->flags & NAMED_OP) 1455 b = CPP_NAME; 1456 1457 c = EOF; 1458 if (token2->flags & DIGRAPH) 1459 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 1460 else if (token_spellings[b].category == SPELL_OPERATOR) 1461 c = token_spellings[b].name[0]; 1462 1463 /* Quickly get everything that can paste with an '='. */ 1464 if ((int) a <= (int) CPP_LAST_EQ && c == '=') 1465 return 1; 1466 1467 switch (a) 1468 { 1469 case CPP_GREATER: return c == '>'; 1470 case CPP_LESS: return c == '<' || c == '%' || c == ':'; 1471 case CPP_PLUS: return c == '+'; 1472 case CPP_MINUS: return c == '-' || c == '>'; 1473 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 1474 case CPP_MOD: return c == ':' || c == '>'; 1475 case CPP_AND: return c == '&'; 1476 case CPP_OR: return c == '|'; 1477 case CPP_COLON: return c == ':' || c == '>'; 1478 case CPP_DEREF: return c == '*'; 1479 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 1480 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 1481 case CPP_NAME: return ((b == CPP_NUMBER 1482 && name_p (pfile, &token2->val.str)) 1483 || b == CPP_NAME 1484 || b == CPP_CHAR || b == CPP_STRING); /* L */ 1485 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 1486 || c == '.' || c == '+' || c == '-'); 1487 /* UCNs */ 1488 case CPP_OTHER: return ((token1->val.str.text[0] == '\\' 1489 && b == CPP_NAME) 1490 || (CPP_OPTION (pfile, objc) 1491 && token1->val.str.text[0] == '@' 1492 && (b == CPP_NAME || b == CPP_STRING))); 1493 default: break; 1494 } 1495 1496 return 0; 1497 } 1498 1499 /* Output all the remaining tokens on the current line, and a newline 1500 character, to FP. Leading whitespace is removed. If there are 1501 macros, special token padding is not performed. */ 1502 void 1503 cpp_output_line (cpp_reader *pfile, FILE *fp) 1504 { 1505 const cpp_token *token; 1506 1507 token = cpp_get_token (pfile); 1508 while (token->type != CPP_EOF) 1509 { 1510 cpp_output_token (token, fp); 1511 token = cpp_get_token (pfile); 1512 if (token->flags & PREV_WHITE) 1513 putc (' ', fp); 1514 } 1515 1516 putc ('\n', fp); 1517 } 1518 1519 /* Memory buffers. Changing these three constants can have a dramatic 1520 effect on performance. The values here are reasonable defaults, 1521 but might be tuned. If you adjust them, be sure to test across a 1522 range of uses of cpplib, including heavy nested function-like macro 1523 expansion. Also check the change in peak memory usage (NJAMD is a 1524 good tool for this). */ 1525 #define MIN_BUFF_SIZE 8000 1526 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 1527 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 1528 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 1529 1530 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 1531 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 1532 #endif 1533 1534 /* Create a new allocation buffer. Place the control block at the end 1535 of the buffer, so that buffer overflows will cause immediate chaos. */ 1536 static _cpp_buff * 1537 new_buff (size_t len) 1538 { 1539 _cpp_buff *result; 1540 unsigned char *base; 1541 1542 if (len < MIN_BUFF_SIZE) 1543 len = MIN_BUFF_SIZE; 1544 len = CPP_ALIGN (len); 1545 1546 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff)); 1547 result = (_cpp_buff *) (base + len); 1548 result->base = base; 1549 result->cur = base; 1550 result->limit = base + len; 1551 result->next = NULL; 1552 return result; 1553 } 1554 1555 /* Place a chain of unwanted allocation buffers on the free list. */ 1556 void 1557 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) 1558 { 1559 _cpp_buff *end = buff; 1560 1561 while (end->next) 1562 end = end->next; 1563 end->next = pfile->free_buffs; 1564 pfile->free_buffs = buff; 1565 } 1566 1567 /* Return a free buffer of size at least MIN_SIZE. */ 1568 _cpp_buff * 1569 _cpp_get_buff (cpp_reader *pfile, size_t min_size) 1570 { 1571 _cpp_buff *result, **p; 1572 1573 for (p = &pfile->free_buffs;; p = &(*p)->next) 1574 { 1575 size_t size; 1576 1577 if (*p == NULL) 1578 return new_buff (min_size); 1579 result = *p; 1580 size = result->limit - result->base; 1581 /* Return a buffer that's big enough, but don't waste one that's 1582 way too big. */ 1583 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 1584 break; 1585 } 1586 1587 *p = result->next; 1588 result->next = NULL; 1589 result->cur = result->base; 1590 return result; 1591 } 1592 1593 /* Creates a new buffer with enough space to hold the uncommitted 1594 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 1595 the excess bytes to the new buffer. Chains the new buffer after 1596 BUFF, and returns the new buffer. */ 1597 _cpp_buff * 1598 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) 1599 { 1600 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 1601 _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 1602 1603 buff->next = new_buff; 1604 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 1605 return new_buff; 1606 } 1607 1608 /* Creates a new buffer with enough space to hold the uncommitted 1609 remaining bytes of the buffer pointed to by BUFF, and at least 1610 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 1611 Chains the new buffer before the buffer pointed to by BUFF, and 1612 updates the pointer to point to the new buffer. */ 1613 void 1614 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) 1615 { 1616 _cpp_buff *new_buff, *old_buff = *pbuff; 1617 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 1618 1619 new_buff = _cpp_get_buff (pfile, size); 1620 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 1621 new_buff->next = old_buff; 1622 *pbuff = new_buff; 1623 } 1624 1625 /* Free a chain of buffers starting at BUFF. */ 1626 void 1627 _cpp_free_buff (_cpp_buff *buff) 1628 { 1629 _cpp_buff *next; 1630 1631 for (; buff; buff = next) 1632 { 1633 next = buff->next; 1634 free (buff->base); 1635 } 1636 } 1637 1638 /* Allocate permanent, unaligned storage of length LEN. */ 1639 unsigned char * 1640 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len) 1641 { 1642 _cpp_buff *buff = pfile->u_buff; 1643 unsigned char *result = buff->cur; 1644 1645 if (len > (size_t) (buff->limit - result)) 1646 { 1647 buff = _cpp_get_buff (pfile, len); 1648 buff->next = pfile->u_buff; 1649 pfile->u_buff = buff; 1650 result = buff->cur; 1651 } 1652 1653 buff->cur = result + len; 1654 return result; 1655 } 1656 1657 /* Allocate permanent, unaligned storage of length LEN from a_buff. 1658 That buffer is used for growing allocations when saving macro 1659 replacement lists in a #define, and when parsing an answer to an 1660 assertion in #assert, #unassert or #if (and therefore possibly 1661 whilst expanding macros). It therefore must not be used by any 1662 code that they might call: specifically the lexer and the guts of 1663 the macro expander. 1664 1665 All existing other uses clearly fit this restriction: storing 1666 registered pragmas during initialization. */ 1667 unsigned char * 1668 _cpp_aligned_alloc (cpp_reader *pfile, size_t len) 1669 { 1670 _cpp_buff *buff = pfile->a_buff; 1671 unsigned char *result = buff->cur; 1672 1673 if (len > (size_t) (buff->limit - result)) 1674 { 1675 buff = _cpp_get_buff (pfile, len); 1676 buff->next = pfile->a_buff; 1677 pfile->a_buff = buff; 1678 result = buff->cur; 1679 } 1680 1681 buff->cur = result + len; 1682 return result; 1683 } 1684 1685 /* Say which field of TOK is in use. */ 1686 1687 enum cpp_token_fld_kind 1688 cpp_token_val_index (cpp_token *tok) 1689 { 1690 switch (TOKEN_SPELL (tok)) 1691 { 1692 case SPELL_IDENT: 1693 return CPP_TOKEN_FLD_NODE; 1694 case SPELL_LITERAL: 1695 return CPP_TOKEN_FLD_STR; 1696 case SPELL_NONE: 1697 if (tok->type == CPP_MACRO_ARG) 1698 return CPP_TOKEN_FLD_ARG_NO; 1699 else if (tok->type == CPP_PADDING) 1700 return CPP_TOKEN_FLD_SOURCE; 1701 else if (tok->type == CPP_PRAGMA) 1702 return CPP_TOKEN_FLD_PRAGMA; 1703 /* else fall through */ 1704 default: 1705 return CPP_TOKEN_FLD_NONE; 1706 } 1707 } 1708