1 /* CPP Library - lexical analysis. 2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. 3 Contributed by Per Bothner, 1994-95. 4 Based on CCCP program by Paul Rubin, June 1986 5 Adapted to ANSI C, Richard Stallman, Jan 1987 6 Broken out to separate file, Zack Weinberg, Mar 2000 7 Single-pass line tokenization by Neil Booth, April 2000 8 9 This program is free software; you can redistribute it and/or modify it 10 under the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2, or (at your option) any 12 later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "cpplib.h" 26 #include "cpphash.h" 27 28 #ifdef MULTIBYTE_CHARS 29 #include "mbchar.h" 30 #include <locale.h> 31 #endif 32 33 /* Tokens with SPELL_STRING store their spelling in the token list, 34 and it's length in the token->val.name.len. */ 35 enum spell_type 36 { 37 SPELL_OPERATOR = 0, 38 SPELL_CHAR, 39 SPELL_IDENT, 40 SPELL_NUMBER, 41 SPELL_STRING, 42 SPELL_NONE 43 }; 44 45 struct token_spelling 46 { 47 enum spell_type category; 48 const unsigned char *name; 49 }; 50 51 static const unsigned char *const digraph_spellings[] = 52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; 53 54 #define OP(e, s) { SPELL_OPERATOR, U s }, 55 #define TK(e, s) { s, U STRINGX (e) }, 56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; 57 #undef OP 58 #undef TK 59 60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) 61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name) 62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0) 63 64 static void handle_newline PARAMS ((cpp_reader *)); 65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *)); 66 static cppchar_t get_effective_char PARAMS ((cpp_reader *)); 67 68 static int skip_block_comment PARAMS ((cpp_reader *)); 69 static int skip_line_comment PARAMS ((cpp_reader *)); 70 static void adjust_column PARAMS ((cpp_reader *)); 71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); 72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *)); 73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int, 74 unsigned int *)); 75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int)); 76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *)); 77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t)); 78 static bool trigraph_p PARAMS ((cpp_reader *)); 79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *, 80 cppchar_t)); 81 static bool continue_after_nul PARAMS ((cpp_reader *)); 82 static int name_p PARAMS ((cpp_reader *, const cpp_string *)); 83 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, 84 const unsigned char *, cppchar_t *)); 85 static tokenrun *next_tokenrun PARAMS ((tokenrun *)); 86 87 static unsigned int hex_digit_value PARAMS ((unsigned int)); 88 static _cpp_buff *new_buff PARAMS ((size_t)); 89 90 /* Utility routine: 91 92 Compares, the token TOKEN to the NUL-terminated string STRING. 93 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ 94 int 95 cpp_ideq (token, string) 96 const cpp_token *token; 97 const char *string; 98 { 99 if (token->type != CPP_NAME) 100 return 0; 101 102 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); 103 } 104 105 /* Call when meeting a newline, assumed to be in buffer->cur[-1]. 106 Returns with buffer->cur pointing to the character immediately 107 following the newline (combination). */ 108 static void 109 handle_newline (pfile) 110 cpp_reader *pfile; 111 { 112 cpp_buffer *buffer = pfile->buffer; 113 114 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java) 115 only accept CR-LF; maybe we should fall back to that behavior? */ 116 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n') 117 buffer->cur++; 118 119 buffer->line_base = buffer->cur; 120 buffer->col_adjust = 0; 121 pfile->line++; 122 } 123 124 /* Subroutine of skip_escaped_newlines; called when a 3-character 125 sequence beginning with "??" is encountered. buffer->cur points to 126 the second '?'. 127 128 Warn if necessary, and returns true if the sequence forms a 129 trigraph and the trigraph should be honored. */ 130 static bool 131 trigraph_p (pfile) 132 cpp_reader *pfile; 133 { 134 cpp_buffer *buffer = pfile->buffer; 135 cppchar_t from_char = buffer->cur[1]; 136 bool accept; 137 138 if (!_cpp_trigraph_map[from_char]) 139 return false; 140 141 accept = CPP_OPTION (pfile, trigraphs); 142 143 /* Don't warn about trigraphs in comments. */ 144 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment) 145 { 146 if (accept) 147 cpp_error_with_line (pfile, DL_WARNING, 148 pfile->line, CPP_BUF_COL (buffer) - 1, 149 "trigraph ??%c converted to %c", 150 (int) from_char, 151 (int) _cpp_trigraph_map[from_char]); 152 else if (buffer->cur != buffer->last_Wtrigraphs) 153 { 154 buffer->last_Wtrigraphs = buffer->cur; 155 cpp_error_with_line (pfile, DL_WARNING, 156 pfile->line, CPP_BUF_COL (buffer) - 1, 157 "trigraph ??%c ignored", (int) from_char); 158 } 159 } 160 161 return accept; 162 } 163 164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to 165 lie in buffer->cur[-1]. Returns the next byte, which will be in 166 buffer->cur[-1]. This routine performs preprocessing stages 1 and 167 2 of the ISO C standard. */ 168 static cppchar_t 169 skip_escaped_newlines (pfile) 170 cpp_reader *pfile; 171 { 172 cpp_buffer *buffer = pfile->buffer; 173 cppchar_t next = buffer->cur[-1]; 174 175 /* Only do this if we apply stages 1 and 2. */ 176 if (!buffer->from_stage3) 177 { 178 const unsigned char *saved_cur; 179 cppchar_t next1; 180 181 do 182 { 183 if (next == '?') 184 { 185 if (buffer->cur[0] != '?' || !trigraph_p (pfile)) 186 break; 187 188 /* Translate the trigraph. */ 189 next = _cpp_trigraph_map[buffer->cur[1]]; 190 buffer->cur += 2; 191 if (next != '\\') 192 break; 193 } 194 195 if (buffer->cur == buffer->rlimit) 196 break; 197 198 /* We have a backslash, and room for at least one more 199 character. Skip horizontal whitespace. */ 200 saved_cur = buffer->cur; 201 do 202 next1 = *buffer->cur++; 203 while (is_nvspace (next1) && buffer->cur < buffer->rlimit); 204 205 if (!is_vspace (next1)) 206 { 207 buffer->cur = saved_cur; 208 break; 209 } 210 211 if (saved_cur != buffer->cur - 1 212 && !pfile->state.lexing_comment) 213 cpp_error (pfile, DL_WARNING, 214 "backslash and newline separated by space"); 215 216 handle_newline (pfile); 217 buffer->backup_to = buffer->cur; 218 if (buffer->cur == buffer->rlimit) 219 { 220 cpp_error (pfile, DL_PEDWARN, 221 "backslash-newline at end of file"); 222 next = EOF; 223 } 224 else 225 next = *buffer->cur++; 226 } 227 while (next == '\\' || next == '?'); 228 } 229 230 return next; 231 } 232 233 /* Obtain the next character, after trigraph conversion and skipping 234 an arbitrarily long string of escaped newlines. The common case of 235 no trigraphs or escaped newlines falls through quickly. On return, 236 buffer->backup_to points to where to return to if the character is 237 not to be processed. */ 238 static cppchar_t 239 get_effective_char (pfile) 240 cpp_reader *pfile; 241 { 242 cppchar_t next; 243 cpp_buffer *buffer = pfile->buffer; 244 245 buffer->backup_to = buffer->cur; 246 next = *buffer->cur++; 247 if (__builtin_expect (next == '?' || next == '\\', 0)) 248 next = skip_escaped_newlines (pfile); 249 250 return next; 251 } 252 253 /* Skip a C-style block comment. We find the end of the comment by 254 seeing if an asterisk is before every '/' we encounter. Returns 255 nonzero if comment terminated by EOF, zero otherwise. */ 256 static int 257 skip_block_comment (pfile) 258 cpp_reader *pfile; 259 { 260 cpp_buffer *buffer = pfile->buffer; 261 cppchar_t c = EOF, prevc = EOF; 262 263 pfile->state.lexing_comment = 1; 264 while (buffer->cur != buffer->rlimit) 265 { 266 prevc = c, c = *buffer->cur++; 267 268 /* FIXME: For speed, create a new character class of characters 269 of interest inside block comments. */ 270 if (c == '?' || c == '\\') 271 c = skip_escaped_newlines (pfile); 272 273 /* People like decorating comments with '*', so check for '/' 274 instead for efficiency. */ 275 if (c == '/') 276 { 277 if (prevc == '*') 278 break; 279 280 /* Warn about potential nested comments, but not if the '/' 281 comes immediately before the true comment delimiter. 282 Don't bother to get it right across escaped newlines. */ 283 if (CPP_OPTION (pfile, warn_comments) 284 && buffer->cur[0] == '*' && buffer->cur[1] != '/') 285 cpp_error_with_line (pfile, DL_WARNING, 286 pfile->line, CPP_BUF_COL (buffer), 287 "\"/*\" within comment"); 288 } 289 else if (is_vspace (c)) 290 handle_newline (pfile); 291 else if (c == '\t') 292 adjust_column (pfile); 293 } 294 295 pfile->state.lexing_comment = 0; 296 return c != '/' || prevc != '*'; 297 } 298 299 /* Skip a C++ line comment, leaving buffer->cur pointing to the 300 terminating newline. Handles escaped newlines. Returns nonzero 301 if a multiline comment. */ 302 static int 303 skip_line_comment (pfile) 304 cpp_reader *pfile; 305 { 306 cpp_buffer *buffer = pfile->buffer; 307 unsigned int orig_line = pfile->line; 308 cppchar_t c; 309 #ifdef MULTIBYTE_CHARS 310 wchar_t wc; 311 int char_len; 312 #endif 313 314 pfile->state.lexing_comment = 1; 315 #ifdef MULTIBYTE_CHARS 316 /* Reset multibyte conversion state. */ 317 (void) local_mbtowc (NULL, NULL, 0); 318 #endif 319 do 320 { 321 if (buffer->cur == buffer->rlimit) 322 goto at_eof; 323 324 #ifdef MULTIBYTE_CHARS 325 char_len = local_mbtowc (&wc, (const char *) buffer->cur, 326 buffer->rlimit - buffer->cur); 327 if (char_len == -1) 328 { 329 cpp_error (pfile, DL_WARNING, 330 "ignoring invalid multibyte character"); 331 char_len = 1; 332 c = *buffer->cur++; 333 } 334 else 335 { 336 buffer->cur += char_len; 337 c = wc; 338 } 339 #else 340 c = *buffer->cur++; 341 #endif 342 if (c == '?' || c == '\\') 343 c = skip_escaped_newlines (pfile); 344 } 345 while (!is_vspace (c)); 346 347 /* Step back over the newline, except at EOF. */ 348 buffer->cur--; 349 at_eof: 350 351 pfile->state.lexing_comment = 0; 352 return orig_line != pfile->line; 353 } 354 355 /* pfile->buffer->cur is one beyond the \t character. Update 356 col_adjust so we track the column correctly. */ 357 static void 358 adjust_column (pfile) 359 cpp_reader *pfile; 360 { 361 cpp_buffer *buffer = pfile->buffer; 362 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */ 363 364 /* Round it up to multiple of the tabstop, but subtract 1 since the 365 tab itself occupies a character position. */ 366 buffer->col_adjust += (CPP_OPTION (pfile, tabstop) 367 - col % CPP_OPTION (pfile, tabstop)) - 1; 368 } 369 370 /* Skips whitespace, saving the next non-whitespace character. 371 Adjusts pfile->col_adjust to account for tabs. Without this, 372 tokens might be assigned an incorrect column. */ 373 static int 374 skip_whitespace (pfile, c) 375 cpp_reader *pfile; 376 cppchar_t c; 377 { 378 cpp_buffer *buffer = pfile->buffer; 379 unsigned int warned = 0; 380 381 do 382 { 383 /* Horizontal space always OK. */ 384 if (c == ' ') 385 ; 386 else if (c == '\t') 387 adjust_column (pfile); 388 /* Just \f \v or \0 left. */ 389 else if (c == '\0') 390 { 391 if (buffer->cur - 1 == buffer->rlimit) 392 return 0; 393 if (!warned) 394 { 395 cpp_error (pfile, DL_WARNING, "null character(s) ignored"); 396 warned = 1; 397 } 398 } 399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) 400 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, 401 CPP_BUF_COL (buffer), 402 "%s in preprocessing directive", 403 c == '\f' ? "form feed" : "vertical tab"); 404 405 c = *buffer->cur++; 406 } 407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ 408 while (is_nvspace (c)); 409 410 buffer->cur--; 411 return 1; 412 } 413 414 /* See if the characters of a number token are valid in a name (no 415 '.', '+' or '-'). */ 416 static int 417 name_p (pfile, string) 418 cpp_reader *pfile; 419 const cpp_string *string; 420 { 421 unsigned int i; 422 423 for (i = 0; i < string->len; i++) 424 if (!is_idchar (string->text[i])) 425 return 0; 426 427 return 1; 428 } 429 430 /* Parse an identifier, skipping embedded backslash-newlines. This is 431 a critical inner loop. The common case is an identifier which has 432 not been split by backslash-newline, does not contain a dollar 433 sign, and has already been scanned (roughly 10:1 ratio of 434 seen:unseen identifiers in normal code; the distribution is 435 Poisson-like). Second most common case is a new identifier, not 436 split and no dollar sign. The other possibilities are rare and 437 have been relegated to parse_slow. */ 438 static cpp_hashnode * 439 parse_identifier (pfile) 440 cpp_reader *pfile; 441 { 442 cpp_hashnode *result; 443 const uchar *cur, *base; 444 445 /* Fast-path loop. Skim over a normal identifier. 446 N.B. ISIDNUM does not include $. */ 447 cur = pfile->buffer->cur; 448 while (ISIDNUM (*cur)) 449 cur++; 450 451 /* Check for slow-path cases. */ 452 if (*cur == '?' || *cur == '\\' || *cur == '$') 453 { 454 unsigned int len; 455 456 base = parse_slow (pfile, cur, 0, &len); 457 result = (cpp_hashnode *) 458 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED); 459 } 460 else 461 { 462 base = pfile->buffer->cur - 1; 463 pfile->buffer->cur = cur; 464 result = (cpp_hashnode *) 465 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); 466 } 467 468 /* Rarely, identifiers require diagnostics when lexed. 469 XXX Has to be forced out of the fast path. */ 470 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) 471 && !pfile->state.skipping, 0)) 472 { 473 /* It is allowed to poison the same identifier twice. */ 474 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) 475 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"", 476 NODE_NAME (result)); 477 478 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the 479 replacement list of a variadic macro. */ 480 if (result == pfile->spec_nodes.n__VA_ARGS__ 481 && !pfile->state.va_args_ok) 482 cpp_error (pfile, DL_PEDWARN, 483 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); 484 } 485 486 return result; 487 } 488 489 /* Slow path. This handles numbers and identifiers which have been 490 split, or contain dollar signs. The part of the token from 491 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is 492 1 if it's a number, and 2 if it has a leading period. Returns a 493 pointer to the token's NUL-terminated spelling in permanent 494 storage, and sets PLEN to its length. */ 495 static uchar * 496 parse_slow (pfile, cur, number_p, plen) 497 cpp_reader *pfile; 498 const uchar *cur; 499 int number_p; 500 unsigned int *plen; 501 { 502 cpp_buffer *buffer = pfile->buffer; 503 const uchar *base = buffer->cur - 1; 504 struct obstack *stack = &pfile->hash_table->stack; 505 unsigned int c, prevc, saw_dollar = 0; 506 507 /* Place any leading period. */ 508 if (number_p == 2) 509 obstack_1grow (stack, '.'); 510 511 /* Copy the part of the token which is known to be okay. */ 512 obstack_grow (stack, base, cur - base); 513 514 /* Now process the part which isn't. We are looking at one of 515 '$', '\\', or '?' on entry to this loop. */ 516 prevc = cur[-1]; 517 c = *cur++; 518 buffer->cur = cur; 519 for (;;) 520 { 521 /* Potential escaped newline? */ 522 buffer->backup_to = buffer->cur - 1; 523 if (c == '?' || c == '\\') 524 c = skip_escaped_newlines (pfile); 525 526 if (!is_idchar (c)) 527 { 528 if (!number_p) 529 break; 530 if (c != '.' && !VALID_SIGN (c, prevc)) 531 break; 532 } 533 534 /* Handle normal identifier characters in this loop. */ 535 do 536 { 537 prevc = c; 538 obstack_1grow (stack, c); 539 540 if (c == '$') 541 saw_dollar++; 542 543 c = *buffer->cur++; 544 } 545 while (is_idchar (c)); 546 } 547 548 /* Step back over the unwanted char. */ 549 BACKUP (); 550 551 /* $ is not an identifier character in the standard, but is commonly 552 accepted as an extension. Don't warn about it in skipped 553 conditional blocks. */ 554 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping) 555 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number"); 556 557 /* Identifiers and numbers are null-terminated. */ 558 *plen = obstack_object_size (stack); 559 obstack_1grow (stack, '\0'); 560 return obstack_finish (stack); 561 } 562 563 /* Parse a number, beginning with character C, skipping embedded 564 backslash-newlines. LEADING_PERIOD is nonzero if there was a "." 565 before C. Place the result in NUMBER. */ 566 static void 567 parse_number (pfile, number, leading_period) 568 cpp_reader *pfile; 569 cpp_string *number; 570 int leading_period; 571 { 572 const uchar *cur; 573 574 /* Fast-path loop. Skim over a normal number. 575 N.B. ISIDNUM does not include $. */ 576 cur = pfile->buffer->cur; 577 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) 578 cur++; 579 580 /* Check for slow-path cases. */ 581 if (*cur == '?' || *cur == '\\' || *cur == '$') 582 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len); 583 else 584 { 585 const uchar *base = pfile->buffer->cur - 1; 586 uchar *dest; 587 588 number->len = cur - base + leading_period; 589 dest = _cpp_unaligned_alloc (pfile, number->len + 1); 590 dest[number->len] = '\0'; 591 number->text = dest; 592 593 if (leading_period) 594 *dest++ = '.'; 595 memcpy (dest, base, cur - base); 596 pfile->buffer->cur = cur; 597 } 598 } 599 600 /* Subroutine of parse_string. */ 601 static int 602 unescaped_terminator_p (pfile, dest) 603 cpp_reader *pfile; 604 const unsigned char *dest; 605 { 606 const unsigned char *start, *temp; 607 608 /* In #include-style directives, terminators are not escapeable. */ 609 if (pfile->state.angled_headers) 610 return 1; 611 612 start = BUFF_FRONT (pfile->u_buff); 613 614 /* An odd number of consecutive backslashes represents an escaped 615 terminator. */ 616 for (temp = dest; temp > start && temp[-1] == '\\'; temp--) 617 ; 618 619 return ((dest - temp) & 1) == 0; 620 } 621 622 /* Parses a string, character constant, or angle-bracketed header file 623 name. Handles embedded trigraphs and escaped newlines. The stored 624 string is guaranteed NUL-terminated, but it is not guaranteed that 625 this is the first NUL since embedded NULs are preserved. 626 627 When this function returns, buffer->cur points to the next 628 character to be processed. */ 629 static void 630 parse_string (pfile, token, terminator) 631 cpp_reader *pfile; 632 cpp_token *token; 633 cppchar_t terminator; 634 { 635 cpp_buffer *buffer = pfile->buffer; 636 unsigned char *dest, *limit; 637 cppchar_t c; 638 bool warned_nulls = false; 639 #ifdef MULTIBYTE_CHARS 640 wchar_t wc; 641 int char_len; 642 #endif 643 644 dest = BUFF_FRONT (pfile->u_buff); 645 limit = BUFF_LIMIT (pfile->u_buff); 646 647 #ifdef MULTIBYTE_CHARS 648 /* Reset multibyte conversion state. */ 649 (void) local_mbtowc (NULL, NULL, 0); 650 #endif 651 for (;;) 652 { 653 /* We need room for another char, possibly the terminating NUL. */ 654 if ((size_t) (limit - dest) < 1) 655 { 656 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff); 657 _cpp_extend_buff (pfile, &pfile->u_buff, 2); 658 dest = BUFF_FRONT (pfile->u_buff) + len_so_far; 659 limit = BUFF_LIMIT (pfile->u_buff); 660 } 661 662 #ifdef MULTIBYTE_CHARS 663 char_len = local_mbtowc (&wc, (const char *) buffer->cur, 664 buffer->rlimit - buffer->cur); 665 if (char_len == -1) 666 { 667 cpp_error (pfile, DL_WARNING, 668 "ignoring invalid multibyte character"); 669 char_len = 1; 670 c = *buffer->cur++; 671 } 672 else 673 { 674 buffer->cur += char_len; 675 c = wc; 676 } 677 #else 678 c = *buffer->cur++; 679 #endif 680 681 /* Handle trigraphs, escaped newlines etc. */ 682 if (c == '?' || c == '\\') 683 c = skip_escaped_newlines (pfile); 684 685 if (c == terminator) 686 { 687 if (unescaped_terminator_p (pfile, dest)) 688 break; 689 } 690 else if (is_vspace (c)) 691 { 692 /* No string literal may extend over multiple lines. In 693 assembly language, suppress the error except for <> 694 includes. This is a kludge around not knowing where 695 comments are. */ 696 unterminated: 697 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>') 698 cpp_error (pfile, DL_ERROR, "missing terminating %c character", 699 (int) terminator); 700 buffer->cur--; 701 break; 702 } 703 else if (c == '\0') 704 { 705 if (buffer->cur - 1 == buffer->rlimit) 706 goto unterminated; 707 if (!warned_nulls) 708 { 709 warned_nulls = true; 710 cpp_error (pfile, DL_WARNING, 711 "null character(s) preserved in literal"); 712 } 713 } 714 #ifdef MULTIBYTE_CHARS 715 if (char_len > 1) 716 { 717 for ( ; char_len > 0; --char_len) 718 *dest++ = (*buffer->cur - char_len); 719 } 720 else 721 #endif 722 *dest++ = c; 723 } 724 725 *dest = '\0'; 726 727 token->val.str.text = BUFF_FRONT (pfile->u_buff); 728 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff); 729 BUFF_FRONT (pfile->u_buff) = dest + 1; 730 } 731 732 /* The stored comment includes the comment start and any terminator. */ 733 static void 734 save_comment (pfile, token, from, type) 735 cpp_reader *pfile; 736 cpp_token *token; 737 const unsigned char *from; 738 cppchar_t type; 739 { 740 unsigned char *buffer; 741 unsigned int len, clen; 742 743 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ 744 745 /* C++ comments probably (not definitely) have moved past a new 746 line, which we don't want to save in the comment. */ 747 if (is_vspace (pfile->buffer->cur[-1])) 748 len--; 749 750 /* If we are currently in a directive, then we need to store all 751 C++ comments as C comments internally, and so we need to 752 allocate a little extra space in that case. 753 754 Note that the only time we encounter a directive here is 755 when we are saving comments in a "#define". */ 756 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; 757 758 buffer = _cpp_unaligned_alloc (pfile, clen); 759 760 token->type = CPP_COMMENT; 761 token->val.str.len = clen; 762 token->val.str.text = buffer; 763 764 buffer[0] = '/'; 765 memcpy (buffer + 1, from, len - 1); 766 767 /* Finish conversion to a C comment, if necessary. */ 768 if (pfile->state.in_directive && type == '/') 769 { 770 buffer[1] = '*'; 771 buffer[clen - 2] = '*'; 772 buffer[clen - 1] = '/'; 773 } 774 } 775 776 /* Allocate COUNT tokens for RUN. */ 777 void 778 _cpp_init_tokenrun (run, count) 779 tokenrun *run; 780 unsigned int count; 781 { 782 run->base = xnewvec (cpp_token, count); 783 run->limit = run->base + count; 784 run->next = NULL; 785 } 786 787 /* Returns the next tokenrun, or creates one if there is none. */ 788 static tokenrun * 789 next_tokenrun (run) 790 tokenrun *run; 791 { 792 if (run->next == NULL) 793 { 794 run->next = xnew (tokenrun); 795 run->next->prev = run; 796 _cpp_init_tokenrun (run->next, 250); 797 } 798 799 return run->next; 800 } 801 802 /* Allocate a single token that is invalidated at the same time as the 803 rest of the tokens on the line. Has its line and col set to the 804 same as the last lexed token, so that diagnostics appear in the 805 right place. */ 806 cpp_token * 807 _cpp_temp_token (pfile) 808 cpp_reader *pfile; 809 { 810 cpp_token *old, *result; 811 812 old = pfile->cur_token - 1; 813 if (pfile->cur_token == pfile->cur_run->limit) 814 { 815 pfile->cur_run = next_tokenrun (pfile->cur_run); 816 pfile->cur_token = pfile->cur_run->base; 817 } 818 819 result = pfile->cur_token++; 820 result->line = old->line; 821 result->col = old->col; 822 return result; 823 } 824 825 /* Lex a token into RESULT (external interface). Takes care of issues 826 like directive handling, token lookahead, multiple include 827 optimization and skipping. */ 828 const cpp_token * 829 _cpp_lex_token (pfile) 830 cpp_reader *pfile; 831 { 832 cpp_token *result; 833 834 for (;;) 835 { 836 if (pfile->cur_token == pfile->cur_run->limit) 837 { 838 pfile->cur_run = next_tokenrun (pfile->cur_run); 839 pfile->cur_token = pfile->cur_run->base; 840 } 841 842 if (pfile->lookaheads) 843 { 844 pfile->lookaheads--; 845 result = pfile->cur_token++; 846 } 847 else 848 result = _cpp_lex_direct (pfile); 849 850 if (result->flags & BOL) 851 { 852 /* Is this a directive. If _cpp_handle_directive returns 853 false, it is an assembler #. */ 854 if (result->type == CPP_HASH 855 /* 6.10.3 p 11: Directives in a list of macro arguments 856 gives undefined behavior. This implementation 857 handles the directive as normal. */ 858 && pfile->state.parsing_args != 1 859 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE)) 860 continue; 861 if (pfile->cb.line_change && !pfile->state.skipping) 862 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args); 863 } 864 865 /* We don't skip tokens in directives. */ 866 if (pfile->state.in_directive) 867 break; 868 869 /* Outside a directive, invalidate controlling macros. At file 870 EOF, _cpp_lex_direct takes care of popping the buffer, so we never 871 get here and MI optimisation works. */ 872 pfile->mi_valid = false; 873 874 if (!pfile->state.skipping || result->type == CPP_EOF) 875 break; 876 } 877 878 return result; 879 } 880 881 /* A NUL terminates the current buffer. For ISO preprocessing this is 882 EOF, but for traditional preprocessing it indicates we need a line 883 refill. Returns TRUE to continue preprocessing a new buffer, FALSE 884 to return a CPP_EOF to the caller. */ 885 static bool 886 continue_after_nul (pfile) 887 cpp_reader *pfile; 888 { 889 cpp_buffer *buffer = pfile->buffer; 890 bool more = false; 891 892 buffer->saved_flags = BOL; 893 if (CPP_OPTION (pfile, traditional)) 894 { 895 if (pfile->state.in_directive) 896 return false; 897 898 _cpp_remove_overlay (pfile); 899 more = _cpp_read_logical_line_trad (pfile); 900 _cpp_overlay_buffer (pfile, pfile->out.base, 901 pfile->out.cur - pfile->out.base); 902 pfile->line = pfile->out.first_line; 903 } 904 else 905 { 906 /* Stop parsing arguments with a CPP_EOF. When we finally come 907 back here, do the work of popping the buffer. */ 908 if (!pfile->state.parsing_args) 909 { 910 if (buffer->cur != buffer->line_base) 911 { 912 /* Non-empty files should end in a newline. Don't warn 913 for command line and _Pragma buffers. */ 914 handle_newline (pfile); 915 } 916 917 /* Similarly, finish an in-progress directive with CPP_EOF 918 before popping the buffer. */ 919 if (!pfile->state.in_directive && buffer->prev) 920 { 921 more = !buffer->return_at_eof; 922 _cpp_pop_buffer (pfile); 923 } 924 } 925 } 926 927 return more; 928 } 929 930 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ 931 do { \ 932 if (get_effective_char (pfile) == CHAR) \ 933 result->type = THEN_TYPE; \ 934 else \ 935 { \ 936 BACKUP (); \ 937 result->type = ELSE_TYPE; \ 938 } \ 939 } while (0) 940 941 /* Lex a token into pfile->cur_token, which is also incremented, to 942 get diagnostics pointing to the correct location. 943 944 Does not handle issues such as token lookahead, multiple-include 945 optimisation, directives, skipping etc. This function is only 946 suitable for use by _cpp_lex_token, and in special cases like 947 lex_expansion_token which doesn't care for any of these issues. 948 949 When meeting a newline, returns CPP_EOF if parsing a directive, 950 otherwise returns to the start of the token buffer if permissible. 951 Returns the location of the lexed token. */ 952 cpp_token * 953 _cpp_lex_direct (pfile) 954 cpp_reader *pfile; 955 { 956 cppchar_t c; 957 cpp_buffer *buffer; 958 const unsigned char *comment_start; 959 cpp_token *result = pfile->cur_token++; 960 961 fresh_line: 962 buffer = pfile->buffer; 963 result->flags = buffer->saved_flags; 964 buffer->saved_flags = 0; 965 update_tokens_line: 966 result->line = pfile->line; 967 968 skipped_white: 969 c = *buffer->cur++; 970 result->col = CPP_BUF_COLUMN (buffer, buffer->cur); 971 972 trigraph: 973 switch (c) 974 { 975 case ' ': case '\t': case '\f': case '\v': case '\0': 976 result->flags |= PREV_WHITE; 977 if (skip_whitespace (pfile, c)) 978 goto skipped_white; 979 980 /* End of buffer. */ 981 buffer->cur--; 982 if (continue_after_nul (pfile)) 983 goto fresh_line; 984 result->type = CPP_EOF; 985 break; 986 987 case '\n': case '\r': 988 handle_newline (pfile); 989 buffer->saved_flags = BOL; 990 if (! pfile->state.in_directive) 991 { 992 if (pfile->state.parsing_args == 2) 993 buffer->saved_flags |= PREV_WHITE; 994 if (!pfile->keep_tokens) 995 { 996 pfile->cur_run = &pfile->base_run; 997 result = pfile->base_run.base; 998 pfile->cur_token = result + 1; 999 } 1000 goto fresh_line; 1001 } 1002 result->type = CPP_EOF; 1003 break; 1004 1005 case '?': 1006 case '\\': 1007 /* These could start an escaped newline, or '?' a trigraph. Let 1008 skip_escaped_newlines do all the work. */ 1009 { 1010 unsigned int line = pfile->line; 1011 1012 c = skip_escaped_newlines (pfile); 1013 if (line != pfile->line) 1014 { 1015 buffer->cur--; 1016 /* We had at least one escaped newline of some sort. 1017 Update the token's line and column. */ 1018 goto update_tokens_line; 1019 } 1020 } 1021 1022 /* We are either the original '?' or '\\', or a trigraph. */ 1023 if (c == '?') 1024 result->type = CPP_QUERY; 1025 else if (c == '\\') 1026 goto random_char; 1027 else 1028 goto trigraph; 1029 break; 1030 1031 case '0': case '1': case '2': case '3': case '4': 1032 case '5': case '6': case '7': case '8': case '9': 1033 result->type = CPP_NUMBER; 1034 parse_number (pfile, &result->val.str, 0); 1035 break; 1036 1037 case 'L': 1038 /* 'L' may introduce wide characters or strings. */ 1039 { 1040 const unsigned char *pos = buffer->cur; 1041 1042 c = get_effective_char (pfile); 1043 if (c == '\'' || c == '"') 1044 { 1045 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR); 1046 parse_string (pfile, result, c); 1047 break; 1048 } 1049 buffer->cur = pos; 1050 } 1051 /* Fall through. */ 1052 1053 start_ident: 1054 case '_': 1055 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 1056 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 1057 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 1058 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 1059 case 'y': case 'z': 1060 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 1061 case 'G': case 'H': case 'I': case 'J': case 'K': 1062 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 1063 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 1064 case 'Y': case 'Z': 1065 result->type = CPP_NAME; 1066 result->val.node = parse_identifier (pfile); 1067 1068 /* Convert named operators to their proper types. */ 1069 if (result->val.node->flags & NODE_OPERATOR) 1070 { 1071 result->flags |= NAMED_OP; 1072 result->type = result->val.node->value.operator; 1073 } 1074 break; 1075 1076 case '\'': 1077 case '"': 1078 result->type = c == '"' ? CPP_STRING: CPP_CHAR; 1079 parse_string (pfile, result, c); 1080 break; 1081 1082 case '/': 1083 /* A potential block or line comment. */ 1084 comment_start = buffer->cur; 1085 c = get_effective_char (pfile); 1086 1087 if (c == '*') 1088 { 1089 if (skip_block_comment (pfile)) 1090 cpp_error (pfile, DL_ERROR, "unterminated comment"); 1091 } 1092 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) 1093 || CPP_IN_SYSTEM_HEADER (pfile))) 1094 { 1095 /* Warn about comments only if pedantically GNUC89, and not 1096 in system headers. */ 1097 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) 1098 && ! buffer->warned_cplusplus_comments) 1099 { 1100 cpp_error (pfile, DL_PEDWARN, 1101 "C++ style comments are not allowed in ISO C90"); 1102 cpp_error (pfile, DL_PEDWARN, 1103 "(this will be reported only once per input file)"); 1104 buffer->warned_cplusplus_comments = 1; 1105 } 1106 1107 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) 1108 cpp_error (pfile, DL_WARNING, "multi-line comment"); 1109 } 1110 else if (c == '=') 1111 { 1112 result->type = CPP_DIV_EQ; 1113 break; 1114 } 1115 else 1116 { 1117 BACKUP (); 1118 result->type = CPP_DIV; 1119 break; 1120 } 1121 1122 if (!pfile->state.save_comments) 1123 { 1124 result->flags |= PREV_WHITE; 1125 goto update_tokens_line; 1126 } 1127 1128 /* Save the comment as a token in its own right. */ 1129 save_comment (pfile, result, comment_start, c); 1130 break; 1131 1132 case '<': 1133 if (pfile->state.angled_headers) 1134 { 1135 result->type = CPP_HEADER_NAME; 1136 parse_string (pfile, result, '>'); 1137 break; 1138 } 1139 1140 c = get_effective_char (pfile); 1141 if (c == '=') 1142 result->type = CPP_LESS_EQ; 1143 else if (c == '<') 1144 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); 1145 else if (c == '?' && CPP_OPTION (pfile, cplusplus)) 1146 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN); 1147 else if (c == ':' && CPP_OPTION (pfile, digraphs)) 1148 { 1149 result->type = CPP_OPEN_SQUARE; 1150 result->flags |= DIGRAPH; 1151 } 1152 else if (c == '%' && CPP_OPTION (pfile, digraphs)) 1153 { 1154 result->type = CPP_OPEN_BRACE; 1155 result->flags |= DIGRAPH; 1156 } 1157 else 1158 { 1159 BACKUP (); 1160 result->type = CPP_LESS; 1161 } 1162 break; 1163 1164 case '>': 1165 c = get_effective_char (pfile); 1166 if (c == '=') 1167 result->type = CPP_GREATER_EQ; 1168 else if (c == '>') 1169 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); 1170 else if (c == '?' && CPP_OPTION (pfile, cplusplus)) 1171 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX); 1172 else 1173 { 1174 BACKUP (); 1175 result->type = CPP_GREATER; 1176 } 1177 break; 1178 1179 case '%': 1180 c = get_effective_char (pfile); 1181 if (c == '=') 1182 result->type = CPP_MOD_EQ; 1183 else if (CPP_OPTION (pfile, digraphs) && c == ':') 1184 { 1185 result->flags |= DIGRAPH; 1186 result->type = CPP_HASH; 1187 if (get_effective_char (pfile) == '%') 1188 { 1189 const unsigned char *pos = buffer->cur; 1190 1191 if (get_effective_char (pfile) == ':') 1192 result->type = CPP_PASTE; 1193 else 1194 buffer->cur = pos - 1; 1195 } 1196 else 1197 BACKUP (); 1198 } 1199 else if (CPP_OPTION (pfile, digraphs) && c == '>') 1200 { 1201 result->flags |= DIGRAPH; 1202 result->type = CPP_CLOSE_BRACE; 1203 } 1204 else 1205 { 1206 BACKUP (); 1207 result->type = CPP_MOD; 1208 } 1209 break; 1210 1211 case '.': 1212 result->type = CPP_DOT; 1213 c = get_effective_char (pfile); 1214 if (c == '.') 1215 { 1216 const unsigned char *pos = buffer->cur; 1217 1218 if (get_effective_char (pfile) == '.') 1219 result->type = CPP_ELLIPSIS; 1220 else 1221 buffer->cur = pos - 1; 1222 } 1223 /* All known character sets have 0...9 contiguous. */ 1224 else if (ISDIGIT (c)) 1225 { 1226 result->type = CPP_NUMBER; 1227 parse_number (pfile, &result->val.str, 1); 1228 } 1229 else if (c == '*' && CPP_OPTION (pfile, cplusplus)) 1230 result->type = CPP_DOT_STAR; 1231 else 1232 BACKUP (); 1233 break; 1234 1235 case '+': 1236 c = get_effective_char (pfile); 1237 if (c == '+') 1238 result->type = CPP_PLUS_PLUS; 1239 else if (c == '=') 1240 result->type = CPP_PLUS_EQ; 1241 else 1242 { 1243 BACKUP (); 1244 result->type = CPP_PLUS; 1245 } 1246 break; 1247 1248 case '-': 1249 c = get_effective_char (pfile); 1250 if (c == '>') 1251 { 1252 result->type = CPP_DEREF; 1253 if (CPP_OPTION (pfile, cplusplus)) 1254 { 1255 if (get_effective_char (pfile) == '*') 1256 result->type = CPP_DEREF_STAR; 1257 else 1258 BACKUP (); 1259 } 1260 } 1261 else if (c == '-') 1262 result->type = CPP_MINUS_MINUS; 1263 else if (c == '=') 1264 result->type = CPP_MINUS_EQ; 1265 else 1266 { 1267 BACKUP (); 1268 result->type = CPP_MINUS; 1269 } 1270 break; 1271 1272 case '&': 1273 c = get_effective_char (pfile); 1274 if (c == '&') 1275 result->type = CPP_AND_AND; 1276 else if (c == '=') 1277 result->type = CPP_AND_EQ; 1278 else 1279 { 1280 BACKUP (); 1281 result->type = CPP_AND; 1282 } 1283 break; 1284 1285 case '|': 1286 c = get_effective_char (pfile); 1287 if (c == '|') 1288 result->type = CPP_OR_OR; 1289 else if (c == '=') 1290 result->type = CPP_OR_EQ; 1291 else 1292 { 1293 BACKUP (); 1294 result->type = CPP_OR; 1295 } 1296 break; 1297 1298 case ':': 1299 c = get_effective_char (pfile); 1300 if (c == ':' && CPP_OPTION (pfile, cplusplus)) 1301 result->type = CPP_SCOPE; 1302 else if (c == '>' && CPP_OPTION (pfile, digraphs)) 1303 { 1304 result->flags |= DIGRAPH; 1305 result->type = CPP_CLOSE_SQUARE; 1306 } 1307 else 1308 { 1309 BACKUP (); 1310 result->type = CPP_COLON; 1311 } 1312 break; 1313 1314 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; 1315 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; 1316 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; 1317 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; 1318 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; 1319 1320 case '~': result->type = CPP_COMPL; break; 1321 case ',': result->type = CPP_COMMA; break; 1322 case '(': result->type = CPP_OPEN_PAREN; break; 1323 case ')': result->type = CPP_CLOSE_PAREN; break; 1324 case '[': result->type = CPP_OPEN_SQUARE; break; 1325 case ']': result->type = CPP_CLOSE_SQUARE; break; 1326 case '{': result->type = CPP_OPEN_BRACE; break; 1327 case '}': result->type = CPP_CLOSE_BRACE; break; 1328 case ';': result->type = CPP_SEMICOLON; break; 1329 1330 /* @ is a punctuator in Objective-C. */ 1331 case '@': result->type = CPP_ATSIGN; break; 1332 1333 case '$': 1334 if (CPP_OPTION (pfile, dollars_in_ident)) 1335 goto start_ident; 1336 /* Fall through... */ 1337 1338 random_char: 1339 default: 1340 result->type = CPP_OTHER; 1341 result->val.c = c; 1342 break; 1343 } 1344 1345 return result; 1346 } 1347 1348 /* An upper bound on the number of bytes needed to spell TOKEN, 1349 including preceding whitespace. */ 1350 unsigned int 1351 cpp_token_len (token) 1352 const cpp_token *token; 1353 { 1354 unsigned int len; 1355 1356 switch (TOKEN_SPELL (token)) 1357 { 1358 default: len = 0; break; 1359 case SPELL_NUMBER: 1360 case SPELL_STRING: len = token->val.str.len; break; 1361 case SPELL_IDENT: len = NODE_LEN (token->val.node); break; 1362 } 1363 /* 1 for whitespace, 4 for comment delimiters. */ 1364 return len + 5; 1365 } 1366 1367 /* Write the spelling of a token TOKEN to BUFFER. The buffer must 1368 already contain the enough space to hold the token's spelling. 1369 Returns a pointer to the character after the last character 1370 written. */ 1371 unsigned char * 1372 cpp_spell_token (pfile, token, buffer) 1373 cpp_reader *pfile; /* Would be nice to be rid of this... */ 1374 const cpp_token *token; 1375 unsigned char *buffer; 1376 { 1377 switch (TOKEN_SPELL (token)) 1378 { 1379 case SPELL_OPERATOR: 1380 { 1381 const unsigned char *spelling; 1382 unsigned char c; 1383 1384 if (token->flags & DIGRAPH) 1385 spelling 1386 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1387 else if (token->flags & NAMED_OP) 1388 goto spell_ident; 1389 else 1390 spelling = TOKEN_NAME (token); 1391 1392 while ((c = *spelling++) != '\0') 1393 *buffer++ = c; 1394 } 1395 break; 1396 1397 case SPELL_CHAR: 1398 *buffer++ = token->val.c; 1399 break; 1400 1401 spell_ident: 1402 case SPELL_IDENT: 1403 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node)); 1404 buffer += NODE_LEN (token->val.node); 1405 break; 1406 1407 case SPELL_NUMBER: 1408 memcpy (buffer, token->val.str.text, token->val.str.len); 1409 buffer += token->val.str.len; 1410 break; 1411 1412 case SPELL_STRING: 1413 { 1414 int left, right, tag; 1415 switch (token->type) 1416 { 1417 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; 1418 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; 1419 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; 1420 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; 1421 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; 1422 default: 1423 cpp_error (pfile, DL_ICE, "unknown string token %s\n", 1424 TOKEN_NAME (token)); 1425 return buffer; 1426 } 1427 if (tag) *buffer++ = tag; 1428 *buffer++ = left; 1429 memcpy (buffer, token->val.str.text, token->val.str.len); 1430 buffer += token->val.str.len; 1431 *buffer++ = right; 1432 } 1433 break; 1434 1435 case SPELL_NONE: 1436 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token)); 1437 break; 1438 } 1439 1440 return buffer; 1441 } 1442 1443 /* Returns TOKEN spelt as a null-terminated string. The string is 1444 freed when the reader is destroyed. Useful for diagnostics. */ 1445 unsigned char * 1446 cpp_token_as_text (pfile, token) 1447 cpp_reader *pfile; 1448 const cpp_token *token; 1449 { 1450 unsigned int len = cpp_token_len (token); 1451 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; 1452 1453 end = cpp_spell_token (pfile, token, start); 1454 end[0] = '\0'; 1455 1456 return start; 1457 } 1458 1459 /* Used by C front ends, which really should move to using 1460 cpp_token_as_text. */ 1461 const char * 1462 cpp_type2name (type) 1463 enum cpp_ttype type; 1464 { 1465 return (const char *) token_spellings[type].name; 1466 } 1467 1468 /* Writes the spelling of token to FP, without any preceding space. 1469 Separated from cpp_spell_token for efficiency - to avoid stdio 1470 double-buffering. */ 1471 void 1472 cpp_output_token (token, fp) 1473 const cpp_token *token; 1474 FILE *fp; 1475 { 1476 switch (TOKEN_SPELL (token)) 1477 { 1478 case SPELL_OPERATOR: 1479 { 1480 const unsigned char *spelling; 1481 int c; 1482 1483 if (token->flags & DIGRAPH) 1484 spelling 1485 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH]; 1486 else if (token->flags & NAMED_OP) 1487 goto spell_ident; 1488 else 1489 spelling = TOKEN_NAME (token); 1490 1491 c = *spelling; 1492 do 1493 putc (c, fp); 1494 while ((c = *++spelling) != '\0'); 1495 } 1496 break; 1497 1498 case SPELL_CHAR: 1499 putc (token->val.c, fp); 1500 break; 1501 1502 spell_ident: 1503 case SPELL_IDENT: 1504 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp); 1505 break; 1506 1507 case SPELL_NUMBER: 1508 fwrite (token->val.str.text, 1, token->val.str.len, fp); 1509 break; 1510 1511 case SPELL_STRING: 1512 { 1513 int left, right, tag; 1514 switch (token->type) 1515 { 1516 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; 1517 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; 1518 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; 1519 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; 1520 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; 1521 default: 1522 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token)); 1523 return; 1524 } 1525 if (tag) putc (tag, fp); 1526 putc (left, fp); 1527 fwrite (token->val.str.text, 1, token->val.str.len, fp); 1528 putc (right, fp); 1529 } 1530 break; 1531 1532 case SPELL_NONE: 1533 /* An error, most probably. */ 1534 break; 1535 } 1536 } 1537 1538 /* Compare two tokens. */ 1539 int 1540 _cpp_equiv_tokens (a, b) 1541 const cpp_token *a, *b; 1542 { 1543 if (a->type == b->type && a->flags == b->flags) 1544 switch (TOKEN_SPELL (a)) 1545 { 1546 default: /* Keep compiler happy. */ 1547 case SPELL_OPERATOR: 1548 return 1; 1549 case SPELL_CHAR: 1550 return a->val.c == b->val.c; /* Character. */ 1551 case SPELL_NONE: 1552 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); 1553 case SPELL_IDENT: 1554 return a->val.node == b->val.node; 1555 case SPELL_NUMBER: 1556 case SPELL_STRING: 1557 return (a->val.str.len == b->val.str.len 1558 && !memcmp (a->val.str.text, b->val.str.text, 1559 a->val.str.len)); 1560 } 1561 1562 return 0; 1563 } 1564 1565 /* Returns nonzero if a space should be inserted to avoid an 1566 accidental token paste for output. For simplicity, it is 1567 conservative, and occasionally advises a space where one is not 1568 needed, e.g. "." and ".2". */ 1569 int 1570 cpp_avoid_paste (pfile, token1, token2) 1571 cpp_reader *pfile; 1572 const cpp_token *token1, *token2; 1573 { 1574 enum cpp_ttype a = token1->type, b = token2->type; 1575 cppchar_t c; 1576 1577 if (token1->flags & NAMED_OP) 1578 a = CPP_NAME; 1579 if (token2->flags & NAMED_OP) 1580 b = CPP_NAME; 1581 1582 c = EOF; 1583 if (token2->flags & DIGRAPH) 1584 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0]; 1585 else if (token_spellings[b].category == SPELL_OPERATOR) 1586 c = token_spellings[b].name[0]; 1587 1588 /* Quickly get everything that can paste with an '='. */ 1589 if ((int) a <= (int) CPP_LAST_EQ && c == '=') 1590 return 1; 1591 1592 switch (a) 1593 { 1594 case CPP_GREATER: return c == '>' || c == '?'; 1595 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':'; 1596 case CPP_PLUS: return c == '+'; 1597 case CPP_MINUS: return c == '-' || c == '>'; 1598 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */ 1599 case CPP_MOD: return c == ':' || c == '>'; 1600 case CPP_AND: return c == '&'; 1601 case CPP_OR: return c == '|'; 1602 case CPP_COLON: return c == ':' || c == '>'; 1603 case CPP_DEREF: return c == '*'; 1604 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER; 1605 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */ 1606 case CPP_NAME: return ((b == CPP_NUMBER 1607 && name_p (pfile, &token2->val.str)) 1608 || b == CPP_NAME 1609 || b == CPP_CHAR || b == CPP_STRING); /* L */ 1610 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME 1611 || c == '.' || c == '+' || c == '-'); 1612 case CPP_OTHER: return (CPP_OPTION (pfile, objc) 1613 && token1->val.c == '@' 1614 && (b == CPP_NAME || b == CPP_STRING)); 1615 default: break; 1616 } 1617 1618 return 0; 1619 } 1620 1621 /* Output all the remaining tokens on the current line, and a newline 1622 character, to FP. Leading whitespace is removed. If there are 1623 macros, special token padding is not performed. */ 1624 void 1625 cpp_output_line (pfile, fp) 1626 cpp_reader *pfile; 1627 FILE *fp; 1628 { 1629 const cpp_token *token; 1630 1631 token = cpp_get_token (pfile); 1632 while (token->type != CPP_EOF) 1633 { 1634 cpp_output_token (token, fp); 1635 token = cpp_get_token (pfile); 1636 if (token->flags & PREV_WHITE) 1637 putc (' ', fp); 1638 } 1639 1640 putc ('\n', fp); 1641 } 1642 1643 /* Returns the value of a hexadecimal digit. */ 1644 static unsigned int 1645 hex_digit_value (c) 1646 unsigned int c; 1647 { 1648 if (hex_p (c)) 1649 return hex_value (c); 1650 else 1651 abort (); 1652 } 1653 1654 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate 1655 failure if cpplib is not parsing C++ or C99. Such failure is 1656 silent, and no variables are updated. Otherwise returns 0, and 1657 warns if -Wtraditional. 1658 1659 [lex.charset]: The character designated by the universal character 1660 name \UNNNNNNNN is that character whose character short name in 1661 ISO/IEC 10646 is NNNNNNNN; the character designated by the 1662 universal character name \uNNNN is that character whose character 1663 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value 1664 for a universal character name is less than 0x20 or in the range 1665 0x7F-0x9F (inclusive), or if the universal character name 1666 designates a character in the basic source character set, then the 1667 program is ill-formed. 1668 1669 We assume that wchar_t is Unicode, so we don't need to do any 1670 mapping. Is this ever wrong? 1671 1672 PC points to the 'u' or 'U', PSTR is points to the byte after PC, 1673 LIMIT is the end of the string or charconst. PSTR is updated to 1674 point after the UCS on return, and the UCS is written into PC. */ 1675 1676 static int 1677 maybe_read_ucs (pfile, pstr, limit, pc) 1678 cpp_reader *pfile; 1679 const unsigned char **pstr; 1680 const unsigned char *limit; 1681 cppchar_t *pc; 1682 { 1683 const unsigned char *p = *pstr; 1684 unsigned int code = 0; 1685 unsigned int c = *pc, length; 1686 1687 /* Only attempt to interpret a UCS for C++ and C99. */ 1688 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))) 1689 return 1; 1690 1691 if (CPP_WTRADITIONAL (pfile)) 1692 cpp_error (pfile, DL_WARNING, 1693 "the meaning of '\\%c' is different in traditional C", c); 1694 1695 length = (c == 'u' ? 4: 8); 1696 1697 if ((size_t) (limit - p) < length) 1698 { 1699 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name"); 1700 /* Skip to the end to avoid more diagnostics. */ 1701 p = limit; 1702 } 1703 else 1704 { 1705 for (; length; length--, p++) 1706 { 1707 c = *p; 1708 if (ISXDIGIT (c)) 1709 code = (code << 4) + hex_digit_value (c); 1710 else 1711 { 1712 cpp_error (pfile, DL_ERROR, 1713 "non-hex digit '%c' in universal-character-name", c); 1714 /* We shouldn't skip in case there are multibyte chars. */ 1715 break; 1716 } 1717 } 1718 } 1719 1720 #ifdef TARGET_EBCDIC 1721 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target"); 1722 code = 0x3f; /* EBCDIC invalid character */ 1723 #else 1724 /* True extended characters are OK. */ 1725 if (code >= 0xa0 1726 && !(code & 0x80000000) 1727 && !(code >= 0xD800 && code <= 0xDFFF)) 1728 ; 1729 /* The standard permits $, @ and ` to be specified as UCNs. We use 1730 hex escapes so that this also works with EBCDIC hosts. */ 1731 else if (code == 0x24 || code == 0x40 || code == 0x60) 1732 ; 1733 /* Don't give another error if one occurred above. */ 1734 else if (length == 0) 1735 cpp_error (pfile, DL_ERROR, "universal-character-name out of range"); 1736 #endif 1737 1738 *pstr = p; 1739 *pc = code; 1740 return 0; 1741 } 1742 1743 /* Returns the value of an escape sequence, truncated to the correct 1744 target precision. PSTR points to the input pointer, which is just 1745 after the backslash. LIMIT is how much text we have. WIDE is true 1746 if the escape sequence is part of a wide character constant or 1747 string literal. Handles all relevant diagnostics. */ 1748 cppchar_t 1749 cpp_parse_escape (pfile, pstr, limit, wide) 1750 cpp_reader *pfile; 1751 const unsigned char **pstr; 1752 const unsigned char *limit; 1753 int wide; 1754 { 1755 int unknown = 0; 1756 const unsigned char *str = *pstr; 1757 cppchar_t c, mask; 1758 unsigned int width; 1759 1760 if (wide) 1761 width = CPP_OPTION (pfile, wchar_precision); 1762 else 1763 width = CPP_OPTION (pfile, char_precision); 1764 if (width < BITS_PER_CPPCHAR_T) 1765 mask = ((cppchar_t) 1 << width) - 1; 1766 else 1767 mask = ~0; 1768 1769 c = *str++; 1770 switch (c) 1771 { 1772 case '\\': case '\'': case '"': case '?': break; 1773 case 'b': c = TARGET_BS; break; 1774 case 'f': c = TARGET_FF; break; 1775 case 'n': c = TARGET_NEWLINE; break; 1776 case 'r': c = TARGET_CR; break; 1777 case 't': c = TARGET_TAB; break; 1778 case 'v': c = TARGET_VT; break; 1779 1780 case '(': case '{': case '[': case '%': 1781 /* '\(', etc, are used at beginning of line to avoid confusing Emacs. 1782 '\%' is used to prevent SCCS from getting confused. */ 1783 unknown = CPP_PEDANTIC (pfile); 1784 break; 1785 1786 case 'a': 1787 if (CPP_WTRADITIONAL (pfile)) 1788 cpp_error (pfile, DL_WARNING, 1789 "the meaning of '\\a' is different in traditional C"); 1790 c = TARGET_BELL; 1791 break; 1792 1793 case 'e': case 'E': 1794 if (CPP_PEDANTIC (pfile)) 1795 cpp_error (pfile, DL_PEDWARN, 1796 "non-ISO-standard escape sequence, '\\%c'", (int) c); 1797 c = TARGET_ESC; 1798 break; 1799 1800 case 'u': case 'U': 1801 unknown = maybe_read_ucs (pfile, &str, limit, &c); 1802 break; 1803 1804 case 'x': 1805 if (CPP_WTRADITIONAL (pfile)) 1806 cpp_error (pfile, DL_WARNING, 1807 "the meaning of '\\x' is different in traditional C"); 1808 1809 { 1810 cppchar_t i = 0, overflow = 0; 1811 int digits_found = 0; 1812 1813 while (str < limit) 1814 { 1815 c = *str; 1816 if (! ISXDIGIT (c)) 1817 break; 1818 str++; 1819 overflow |= i ^ (i << 4 >> 4); 1820 i = (i << 4) + hex_digit_value (c); 1821 digits_found = 1; 1822 } 1823 1824 if (!digits_found) 1825 cpp_error (pfile, DL_ERROR, 1826 "\\x used with no following hex digits"); 1827 1828 if (overflow | (i != (i & mask))) 1829 { 1830 cpp_error (pfile, DL_PEDWARN, 1831 "hex escape sequence out of range"); 1832 i &= mask; 1833 } 1834 c = i; 1835 } 1836 break; 1837 1838 case '0': case '1': case '2': case '3': 1839 case '4': case '5': case '6': case '7': 1840 { 1841 size_t count = 0; 1842 cppchar_t i = c - '0'; 1843 1844 while (str < limit && ++count < 3) 1845 { 1846 c = *str; 1847 if (c < '0' || c > '7') 1848 break; 1849 str++; 1850 i = (i << 3) + c - '0'; 1851 } 1852 1853 if (i != (i & mask)) 1854 { 1855 cpp_error (pfile, DL_PEDWARN, 1856 "octal escape sequence out of range"); 1857 i &= mask; 1858 } 1859 c = i; 1860 } 1861 break; 1862 1863 default: 1864 unknown = 1; 1865 break; 1866 } 1867 1868 if (unknown) 1869 { 1870 if (ISGRAPH (c)) 1871 cpp_error (pfile, DL_PEDWARN, 1872 "unknown escape sequence '\\%c'", (int) c); 1873 else 1874 cpp_error (pfile, DL_PEDWARN, 1875 "unknown escape sequence: '\\%03o'", (int) c); 1876 } 1877 1878 if (c > mask) 1879 { 1880 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type"); 1881 c &= mask; 1882 } 1883 1884 *pstr = str; 1885 return c; 1886 } 1887 1888 /* Interpret a (possibly wide) character constant in TOKEN. 1889 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN 1890 points to a variable that is filled in with the number of 1891 characters seen, and UNSIGNEDP to a variable that indicates whether 1892 the result has signed type. */ 1893 cppchar_t 1894 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp) 1895 cpp_reader *pfile; 1896 const cpp_token *token; 1897 unsigned int *pchars_seen; 1898 int *unsignedp; 1899 { 1900 const unsigned char *str = token->val.str.text; 1901 const unsigned char *limit = str + token->val.str.len; 1902 unsigned int chars_seen = 0; 1903 size_t width, max_chars; 1904 cppchar_t c, mask, result = 0; 1905 bool unsigned_p; 1906 1907 #ifdef MULTIBYTE_CHARS 1908 (void) local_mbtowc (NULL, NULL, 0); 1909 #endif 1910 1911 /* Width in bits. */ 1912 if (token->type == CPP_CHAR) 1913 { 1914 width = CPP_OPTION (pfile, char_precision); 1915 max_chars = CPP_OPTION (pfile, int_precision) / width; 1916 unsigned_p = CPP_OPTION (pfile, unsigned_char); 1917 } 1918 else 1919 { 1920 width = CPP_OPTION (pfile, wchar_precision); 1921 max_chars = 1; 1922 unsigned_p = CPP_OPTION (pfile, unsigned_wchar); 1923 } 1924 1925 if (width < BITS_PER_CPPCHAR_T) 1926 mask = ((cppchar_t) 1 << width) - 1; 1927 else 1928 mask = ~0; 1929 1930 while (str < limit) 1931 { 1932 #ifdef MULTIBYTE_CHARS 1933 wchar_t wc; 1934 int char_len; 1935 1936 char_len = local_mbtowc (&wc, str, limit - str); 1937 if (char_len == -1) 1938 { 1939 cpp_error (pfile, DL_WARNING, 1940 "ignoring invalid multibyte character"); 1941 c = *str++; 1942 } 1943 else 1944 { 1945 str += char_len; 1946 c = wc; 1947 } 1948 #else 1949 c = *str++; 1950 #endif 1951 1952 if (c == '\\') 1953 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR); 1954 1955 #ifdef MAP_CHARACTER 1956 if (ISPRINT (c)) 1957 c = MAP_CHARACTER (c); 1958 #endif 1959 1960 chars_seen++; 1961 1962 /* Truncate the character, scale the result and merge the two. */ 1963 c &= mask; 1964 if (width < BITS_PER_CPPCHAR_T) 1965 result = (result << width) | c; 1966 else 1967 result = c; 1968 } 1969 1970 if (chars_seen == 0) 1971 cpp_error (pfile, DL_ERROR, "empty character constant"); 1972 else if (chars_seen > 1) 1973 { 1974 /* Multichar charconsts are of type int and therefore signed. */ 1975 unsigned_p = 0; 1976 1977 if (chars_seen > max_chars) 1978 { 1979 chars_seen = max_chars; 1980 cpp_error (pfile, DL_WARNING, 1981 "character constant too long for its type"); 1982 } 1983 else if (CPP_OPTION (pfile, warn_multichar)) 1984 cpp_error (pfile, DL_WARNING, "multi-character character constant"); 1985 } 1986 1987 /* Sign-extend or truncate the constant to cppchar_t. The value is 1988 in WIDTH bits, but for multi-char charconsts it's value is the 1989 full target type's width. */ 1990 if (chars_seen > 1) 1991 width *= max_chars; 1992 if (width < BITS_PER_CPPCHAR_T) 1993 { 1994 mask = ((cppchar_t) 1 << width) - 1; 1995 if (unsigned_p || !(result & (1 << (width - 1)))) 1996 result &= mask; 1997 else 1998 result |= ~mask; 1999 } 2000 2001 *pchars_seen = chars_seen; 2002 *unsignedp = unsigned_p; 2003 return result; 2004 } 2005 2006 /* Memory buffers. Changing these three constants can have a dramatic 2007 effect on performance. The values here are reasonable defaults, 2008 but might be tuned. If you adjust them, be sure to test across a 2009 range of uses of cpplib, including heavy nested function-like macro 2010 expansion. Also check the change in peak memory usage (NJAMD is a 2011 good tool for this). */ 2012 #define MIN_BUFF_SIZE 8000 2013 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) 2014 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ 2015 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) 2016 2017 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) 2018 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! 2019 #endif 2020 2021 /* Create a new allocation buffer. Place the control block at the end 2022 of the buffer, so that buffer overflows will cause immediate chaos. */ 2023 static _cpp_buff * 2024 new_buff (len) 2025 size_t len; 2026 { 2027 _cpp_buff *result; 2028 unsigned char *base; 2029 2030 if (len < MIN_BUFF_SIZE) 2031 len = MIN_BUFF_SIZE; 2032 len = CPP_ALIGN (len); 2033 2034 base = xmalloc (len + sizeof (_cpp_buff)); 2035 result = (_cpp_buff *) (base + len); 2036 result->base = base; 2037 result->cur = base; 2038 result->limit = base + len; 2039 result->next = NULL; 2040 return result; 2041 } 2042 2043 /* Place a chain of unwanted allocation buffers on the free list. */ 2044 void 2045 _cpp_release_buff (pfile, buff) 2046 cpp_reader *pfile; 2047 _cpp_buff *buff; 2048 { 2049 _cpp_buff *end = buff; 2050 2051 while (end->next) 2052 end = end->next; 2053 end->next = pfile->free_buffs; 2054 pfile->free_buffs = buff; 2055 } 2056 2057 /* Return a free buffer of size at least MIN_SIZE. */ 2058 _cpp_buff * 2059 _cpp_get_buff (pfile, min_size) 2060 cpp_reader *pfile; 2061 size_t min_size; 2062 { 2063 _cpp_buff *result, **p; 2064 2065 for (p = &pfile->free_buffs;; p = &(*p)->next) 2066 { 2067 size_t size; 2068 2069 if (*p == NULL) 2070 return new_buff (min_size); 2071 result = *p; 2072 size = result->limit - result->base; 2073 /* Return a buffer that's big enough, but don't waste one that's 2074 way too big. */ 2075 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) 2076 break; 2077 } 2078 2079 *p = result->next; 2080 result->next = NULL; 2081 result->cur = result->base; 2082 return result; 2083 } 2084 2085 /* Creates a new buffer with enough space to hold the uncommitted 2086 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies 2087 the excess bytes to the new buffer. Chains the new buffer after 2088 BUFF, and returns the new buffer. */ 2089 _cpp_buff * 2090 _cpp_append_extend_buff (pfile, buff, min_extra) 2091 cpp_reader *pfile; 2092 _cpp_buff *buff; 2093 size_t min_extra; 2094 { 2095 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); 2096 _cpp_buff *new_buff = _cpp_get_buff (pfile, size); 2097 2098 buff->next = new_buff; 2099 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); 2100 return new_buff; 2101 } 2102 2103 /* Creates a new buffer with enough space to hold the uncommitted 2104 remaining bytes of the buffer pointed to by BUFF, and at least 2105 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. 2106 Chains the new buffer before the buffer pointed to by BUFF, and 2107 updates the pointer to point to the new buffer. */ 2108 void 2109 _cpp_extend_buff (pfile, pbuff, min_extra) 2110 cpp_reader *pfile; 2111 _cpp_buff **pbuff; 2112 size_t min_extra; 2113 { 2114 _cpp_buff *new_buff, *old_buff = *pbuff; 2115 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); 2116 2117 new_buff = _cpp_get_buff (pfile, size); 2118 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); 2119 new_buff->next = old_buff; 2120 *pbuff = new_buff; 2121 } 2122 2123 /* Free a chain of buffers starting at BUFF. */ 2124 void 2125 _cpp_free_buff (buff) 2126 _cpp_buff *buff; 2127 { 2128 _cpp_buff *next; 2129 2130 for (; buff; buff = next) 2131 { 2132 next = buff->next; 2133 free (buff->base); 2134 } 2135 } 2136 2137 /* Allocate permanent, unaligned storage of length LEN. */ 2138 unsigned char * 2139 _cpp_unaligned_alloc (pfile, len) 2140 cpp_reader *pfile; 2141 size_t len; 2142 { 2143 _cpp_buff *buff = pfile->u_buff; 2144 unsigned char *result = buff->cur; 2145 2146 if (len > (size_t) (buff->limit - result)) 2147 { 2148 buff = _cpp_get_buff (pfile, len); 2149 buff->next = pfile->u_buff; 2150 pfile->u_buff = buff; 2151 result = buff->cur; 2152 } 2153 2154 buff->cur = result + len; 2155 return result; 2156 } 2157 2158 /* Allocate permanent, unaligned storage of length LEN from a_buff. 2159 That buffer is used for growing allocations when saving macro 2160 replacement lists in a #define, and when parsing an answer to an 2161 assertion in #assert, #unassert or #if (and therefore possibly 2162 whilst expanding macros). It therefore must not be used by any 2163 code that they might call: specifically the lexer and the guts of 2164 the macro expander. 2165 2166 All existing other uses clearly fit this restriction: storing 2167 registered pragmas during initialization. */ 2168 unsigned char * 2169 _cpp_aligned_alloc (pfile, len) 2170 cpp_reader *pfile; 2171 size_t len; 2172 { 2173 _cpp_buff *buff = pfile->a_buff; 2174 unsigned char *result = buff->cur; 2175 2176 if (len > (size_t) (buff->limit - result)) 2177 { 2178 buff = _cpp_get_buff (pfile, len); 2179 buff->next = pfile->a_buff; 2180 pfile->a_buff = buff; 2181 result = buff->cur; 2182 } 2183 2184 buff->cur = result + len; 2185 return result; 2186 } 2187