1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
8
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "cpphash.h"
27
28 #ifdef MULTIBYTE_CHARS
29 #include "mbchar.h"
30 #include <locale.h>
31 #endif
32
33 /* Tokens with SPELL_STRING store their spelling in the token list,
34 and it's length in the token->val.name.len. */
35 enum spell_type
36 {
37 SPELL_OPERATOR = 0,
38 SPELL_CHAR,
39 SPELL_IDENT,
40 SPELL_NUMBER,
41 SPELL_STRING,
42 SPELL_NONE
43 };
44
45 struct token_spelling
46 {
47 enum spell_type category;
48 const unsigned char *name;
49 };
50
51 static const unsigned char *const digraph_spellings[] =
52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
53
54 #define OP(e, s) { SPELL_OPERATOR, U s },
55 #define TK(e, s) { s, U STRINGX (e) },
56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
57 #undef OP
58 #undef TK
59
60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
63
64 static void handle_newline PARAMS ((cpp_reader *));
65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
67
68 static int skip_block_comment PARAMS ((cpp_reader *));
69 static int skip_line_comment PARAMS ((cpp_reader *));
70 static void adjust_column PARAMS ((cpp_reader *));
71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
74 unsigned int *));
75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
78 static bool trigraph_p PARAMS ((cpp_reader *));
79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
80 cppchar_t));
81 static bool continue_after_nul PARAMS ((cpp_reader *));
82 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
83 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
84 const unsigned char *, cppchar_t *));
85 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
86
87 static unsigned int hex_digit_value PARAMS ((unsigned int));
88 static _cpp_buff *new_buff PARAMS ((size_t));
89
90 /* Utility routine:
91
92 Compares, the token TOKEN to the NUL-terminated string STRING.
93 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
94 int
cpp_ideq(token,string)95 cpp_ideq (token, string)
96 const cpp_token *token;
97 const char *string;
98 {
99 if (token->type != CPP_NAME)
100 return 0;
101
102 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
103 }
104
105 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
106 Returns with buffer->cur pointing to the character immediately
107 following the newline (combination). */
108 static void
handle_newline(pfile)109 handle_newline (pfile)
110 cpp_reader *pfile;
111 {
112 cpp_buffer *buffer = pfile->buffer;
113
114 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
115 only accept CR-LF; maybe we should fall back to that behavior? */
116 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
117 buffer->cur++;
118
119 buffer->line_base = buffer->cur;
120 buffer->col_adjust = 0;
121 pfile->line++;
122 }
123
124 /* Subroutine of skip_escaped_newlines; called when a 3-character
125 sequence beginning with "??" is encountered. buffer->cur points to
126 the second '?'.
127
128 Warn if necessary, and returns true if the sequence forms a
129 trigraph and the trigraph should be honored. */
130 static bool
trigraph_p(pfile)131 trigraph_p (pfile)
132 cpp_reader *pfile;
133 {
134 cpp_buffer *buffer = pfile->buffer;
135 cppchar_t from_char = buffer->cur[1];
136 bool accept;
137
138 if (!_cpp_trigraph_map[from_char])
139 return false;
140
141 accept = CPP_OPTION (pfile, trigraphs);
142
143 /* Don't warn about trigraphs in comments. */
144 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
145 {
146 if (accept)
147 cpp_error_with_line (pfile, DL_WARNING,
148 pfile->line, CPP_BUF_COL (buffer) - 1,
149 "trigraph ??%c converted to %c",
150 (int) from_char,
151 (int) _cpp_trigraph_map[from_char]);
152 else if (buffer->cur != buffer->last_Wtrigraphs)
153 {
154 buffer->last_Wtrigraphs = buffer->cur;
155 cpp_error_with_line (pfile, DL_WARNING,
156 pfile->line, CPP_BUF_COL (buffer) - 1,
157 "trigraph ??%c ignored", (int) from_char);
158 }
159 }
160
161 return accept;
162 }
163
164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
165 lie in buffer->cur[-1]. Returns the next byte, which will be in
166 buffer->cur[-1]. This routine performs preprocessing stages 1 and
167 2 of the ISO C standard. */
168 static cppchar_t
skip_escaped_newlines(pfile)169 skip_escaped_newlines (pfile)
170 cpp_reader *pfile;
171 {
172 cpp_buffer *buffer = pfile->buffer;
173 cppchar_t next = buffer->cur[-1];
174
175 /* Only do this if we apply stages 1 and 2. */
176 if (!buffer->from_stage3)
177 {
178 const unsigned char *saved_cur;
179 cppchar_t next1;
180
181 do
182 {
183 if (next == '?')
184 {
185 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
186 break;
187
188 /* Translate the trigraph. */
189 next = _cpp_trigraph_map[buffer->cur[1]];
190 buffer->cur += 2;
191 if (next != '\\')
192 break;
193 }
194
195 if (buffer->cur == buffer->rlimit)
196 break;
197
198 /* We have a backslash, and room for at least one more
199 character. Skip horizontal whitespace. */
200 saved_cur = buffer->cur;
201 do
202 next1 = *buffer->cur++;
203 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
204
205 if (!is_vspace (next1))
206 {
207 buffer->cur = saved_cur;
208 break;
209 }
210
211 if (saved_cur != buffer->cur - 1
212 && !pfile->state.lexing_comment)
213 cpp_error (pfile, DL_WARNING,
214 "backslash and newline separated by space");
215
216 handle_newline (pfile);
217 buffer->backup_to = buffer->cur;
218 if (buffer->cur == buffer->rlimit)
219 {
220 cpp_error (pfile, DL_PEDWARN,
221 "backslash-newline at end of file");
222 next = EOF;
223 }
224 else
225 next = *buffer->cur++;
226 }
227 while (next == '\\' || next == '?');
228 }
229
230 return next;
231 }
232
233 /* Obtain the next character, after trigraph conversion and skipping
234 an arbitrarily long string of escaped newlines. The common case of
235 no trigraphs or escaped newlines falls through quickly. On return,
236 buffer->backup_to points to where to return to if the character is
237 not to be processed. */
238 static cppchar_t
get_effective_char(pfile)239 get_effective_char (pfile)
240 cpp_reader *pfile;
241 {
242 cppchar_t next;
243 cpp_buffer *buffer = pfile->buffer;
244
245 buffer->backup_to = buffer->cur;
246 next = *buffer->cur++;
247 if (__builtin_expect (next == '?' || next == '\\', 0))
248 next = skip_escaped_newlines (pfile);
249
250 return next;
251 }
252
253 /* Skip a C-style block comment. We find the end of the comment by
254 seeing if an asterisk is before every '/' we encounter. Returns
255 nonzero if comment terminated by EOF, zero otherwise. */
256 static int
skip_block_comment(pfile)257 skip_block_comment (pfile)
258 cpp_reader *pfile;
259 {
260 cpp_buffer *buffer = pfile->buffer;
261 cppchar_t c = EOF, prevc = EOF;
262
263 pfile->state.lexing_comment = 1;
264 while (buffer->cur != buffer->rlimit)
265 {
266 prevc = c, c = *buffer->cur++;
267
268 /* FIXME: For speed, create a new character class of characters
269 of interest inside block comments. */
270 if (c == '?' || c == '\\')
271 c = skip_escaped_newlines (pfile);
272
273 /* People like decorating comments with '*', so check for '/'
274 instead for efficiency. */
275 if (c == '/')
276 {
277 if (prevc == '*')
278 break;
279
280 /* Warn about potential nested comments, but not if the '/'
281 comes immediately before the true comment delimiter.
282 Don't bother to get it right across escaped newlines. */
283 if (CPP_OPTION (pfile, warn_comments)
284 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
285 cpp_error_with_line (pfile, DL_WARNING,
286 pfile->line, CPP_BUF_COL (buffer),
287 "\"/*\" within comment");
288 }
289 else if (is_vspace (c))
290 handle_newline (pfile);
291 else if (c == '\t')
292 adjust_column (pfile);
293 }
294
295 pfile->state.lexing_comment = 0;
296 return c != '/' || prevc != '*';
297 }
298
299 /* Skip a C++ line comment, leaving buffer->cur pointing to the
300 terminating newline. Handles escaped newlines. Returns nonzero
301 if a multiline comment. */
302 static int
skip_line_comment(pfile)303 skip_line_comment (pfile)
304 cpp_reader *pfile;
305 {
306 cpp_buffer *buffer = pfile->buffer;
307 unsigned int orig_line = pfile->line;
308 cppchar_t c;
309 #ifdef MULTIBYTE_CHARS
310 wchar_t wc;
311 int char_len;
312 #endif
313
314 pfile->state.lexing_comment = 1;
315 #ifdef MULTIBYTE_CHARS
316 /* Reset multibyte conversion state. */
317 (void) local_mbtowc (NULL, NULL, 0);
318 #endif
319 do
320 {
321 if (buffer->cur == buffer->rlimit)
322 goto at_eof;
323
324 #ifdef MULTIBYTE_CHARS
325 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
326 buffer->rlimit - buffer->cur);
327 if (char_len == -1)
328 {
329 cpp_error (pfile, DL_WARNING,
330 "ignoring invalid multibyte character");
331 char_len = 1;
332 c = *buffer->cur++;
333 }
334 else
335 {
336 buffer->cur += char_len;
337 c = wc;
338 }
339 #else
340 c = *buffer->cur++;
341 #endif
342 if (c == '?' || c == '\\')
343 c = skip_escaped_newlines (pfile);
344 }
345 while (!is_vspace (c));
346
347 /* Step back over the newline, except at EOF. */
348 buffer->cur--;
349 at_eof:
350
351 pfile->state.lexing_comment = 0;
352 return orig_line != pfile->line;
353 }
354
355 /* pfile->buffer->cur is one beyond the \t character. Update
356 col_adjust so we track the column correctly. */
357 static void
adjust_column(pfile)358 adjust_column (pfile)
359 cpp_reader *pfile;
360 {
361 cpp_buffer *buffer = pfile->buffer;
362 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
363
364 /* Round it up to multiple of the tabstop, but subtract 1 since the
365 tab itself occupies a character position. */
366 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
367 - col % CPP_OPTION (pfile, tabstop)) - 1;
368 }
369
370 /* Skips whitespace, saving the next non-whitespace character.
371 Adjusts pfile->col_adjust to account for tabs. Without this,
372 tokens might be assigned an incorrect column. */
373 static int
skip_whitespace(pfile,c)374 skip_whitespace (pfile, c)
375 cpp_reader *pfile;
376 cppchar_t c;
377 {
378 cpp_buffer *buffer = pfile->buffer;
379 unsigned int warned = 0;
380
381 do
382 {
383 /* Horizontal space always OK. */
384 if (c == ' ')
385 ;
386 else if (c == '\t')
387 adjust_column (pfile);
388 /* Just \f \v or \0 left. */
389 else if (c == '\0')
390 {
391 if (buffer->cur - 1 == buffer->rlimit)
392 return 0;
393 if (!warned)
394 {
395 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
396 warned = 1;
397 }
398 }
399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
401 CPP_BUF_COL (buffer),
402 "%s in preprocessing directive",
403 c == '\f' ? "form feed" : "vertical tab");
404
405 c = *buffer->cur++;
406 }
407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
408 while (is_nvspace (c));
409
410 buffer->cur--;
411 return 1;
412 }
413
414 /* See if the characters of a number token are valid in a name (no
415 '.', '+' or '-'). */
416 static int
name_p(pfile,string)417 name_p (pfile, string)
418 cpp_reader *pfile;
419 const cpp_string *string;
420 {
421 unsigned int i;
422
423 for (i = 0; i < string->len; i++)
424 if (!is_idchar (string->text[i]))
425 return 0;
426
427 return 1;
428 }
429
430 /* Parse an identifier, skipping embedded backslash-newlines. This is
431 a critical inner loop. The common case is an identifier which has
432 not been split by backslash-newline, does not contain a dollar
433 sign, and has already been scanned (roughly 10:1 ratio of
434 seen:unseen identifiers in normal code; the distribution is
435 Poisson-like). Second most common case is a new identifier, not
436 split and no dollar sign. The other possibilities are rare and
437 have been relegated to parse_slow. */
438 static cpp_hashnode *
parse_identifier(pfile)439 parse_identifier (pfile)
440 cpp_reader *pfile;
441 {
442 cpp_hashnode *result;
443 const uchar *cur, *base;
444
445 /* Fast-path loop. Skim over a normal identifier.
446 N.B. ISIDNUM does not include $. */
447 cur = pfile->buffer->cur;
448 while (ISIDNUM (*cur))
449 cur++;
450
451 /* Check for slow-path cases. */
452 if (*cur == '?' || *cur == '\\' || *cur == '$')
453 {
454 unsigned int len;
455
456 base = parse_slow (pfile, cur, 0, &len);
457 result = (cpp_hashnode *)
458 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
459 }
460 else
461 {
462 base = pfile->buffer->cur - 1;
463 pfile->buffer->cur = cur;
464 result = (cpp_hashnode *)
465 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
466 }
467
468 /* Rarely, identifiers require diagnostics when lexed.
469 XXX Has to be forced out of the fast path. */
470 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
471 && !pfile->state.skipping, 0))
472 {
473 /* It is allowed to poison the same identifier twice. */
474 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
475 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
476 NODE_NAME (result));
477
478 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479 replacement list of a variadic macro. */
480 if (result == pfile->spec_nodes.n__VA_ARGS__
481 && !pfile->state.va_args_ok)
482 cpp_error (pfile, DL_PEDWARN,
483 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
484 }
485
486 return result;
487 }
488
489 /* Slow path. This handles numbers and identifiers which have been
490 split, or contain dollar signs. The part of the token from
491 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
492 1 if it's a number, and 2 if it has a leading period. Returns a
493 pointer to the token's NUL-terminated spelling in permanent
494 storage, and sets PLEN to its length. */
495 static uchar *
parse_slow(pfile,cur,number_p,plen)496 parse_slow (pfile, cur, number_p, plen)
497 cpp_reader *pfile;
498 const uchar *cur;
499 int number_p;
500 unsigned int *plen;
501 {
502 cpp_buffer *buffer = pfile->buffer;
503 const uchar *base = buffer->cur - 1;
504 struct obstack *stack = &pfile->hash_table->stack;
505 unsigned int c, prevc, saw_dollar = 0;
506
507 /* Place any leading period. */
508 if (number_p == 2)
509 obstack_1grow (stack, '.');
510
511 /* Copy the part of the token which is known to be okay. */
512 obstack_grow (stack, base, cur - base);
513
514 /* Now process the part which isn't. We are looking at one of
515 '$', '\\', or '?' on entry to this loop. */
516 prevc = cur[-1];
517 c = *cur++;
518 buffer->cur = cur;
519 for (;;)
520 {
521 /* Potential escaped newline? */
522 buffer->backup_to = buffer->cur - 1;
523 if (c == '?' || c == '\\')
524 c = skip_escaped_newlines (pfile);
525
526 if (!is_idchar (c))
527 {
528 if (!number_p)
529 break;
530 if (c != '.' && !VALID_SIGN (c, prevc))
531 break;
532 }
533
534 /* Handle normal identifier characters in this loop. */
535 do
536 {
537 prevc = c;
538 obstack_1grow (stack, c);
539
540 if (c == '$')
541 saw_dollar++;
542
543 c = *buffer->cur++;
544 }
545 while (is_idchar (c));
546 }
547
548 /* Step back over the unwanted char. */
549 BACKUP ();
550
551 /* $ is not an identifier character in the standard, but is commonly
552 accepted as an extension. Don't warn about it in skipped
553 conditional blocks. */
554 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
555 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
556
557 /* Identifiers and numbers are null-terminated. */
558 *plen = obstack_object_size (stack);
559 obstack_1grow (stack, '\0');
560 return obstack_finish (stack);
561 }
562
563 /* Parse a number, beginning with character C, skipping embedded
564 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
565 before C. Place the result in NUMBER. */
566 static void
parse_number(pfile,number,leading_period)567 parse_number (pfile, number, leading_period)
568 cpp_reader *pfile;
569 cpp_string *number;
570 int leading_period;
571 {
572 const uchar *cur;
573
574 /* Fast-path loop. Skim over a normal number.
575 N.B. ISIDNUM does not include $. */
576 cur = pfile->buffer->cur;
577 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
578 cur++;
579
580 /* Check for slow-path cases. */
581 if (*cur == '?' || *cur == '\\' || *cur == '$')
582 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
583 else
584 {
585 const uchar *base = pfile->buffer->cur - 1;
586 uchar *dest;
587
588 number->len = cur - base + leading_period;
589 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
590 dest[number->len] = '\0';
591 number->text = dest;
592
593 if (leading_period)
594 *dest++ = '.';
595 memcpy (dest, base, cur - base);
596 pfile->buffer->cur = cur;
597 }
598 }
599
600 /* Subroutine of parse_string. */
601 static int
unescaped_terminator_p(pfile,dest)602 unescaped_terminator_p (pfile, dest)
603 cpp_reader *pfile;
604 const unsigned char *dest;
605 {
606 const unsigned char *start, *temp;
607
608 /* In #include-style directives, terminators are not escapeable. */
609 if (pfile->state.angled_headers)
610 return 1;
611
612 start = BUFF_FRONT (pfile->u_buff);
613
614 /* An odd number of consecutive backslashes represents an escaped
615 terminator. */
616 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
617 ;
618
619 return ((dest - temp) & 1) == 0;
620 }
621
622 /* Parses a string, character constant, or angle-bracketed header file
623 name. Handles embedded trigraphs and escaped newlines. The stored
624 string is guaranteed NUL-terminated, but it is not guaranteed that
625 this is the first NUL since embedded NULs are preserved.
626
627 When this function returns, buffer->cur points to the next
628 character to be processed. */
629 static void
parse_string(pfile,token,terminator)630 parse_string (pfile, token, terminator)
631 cpp_reader *pfile;
632 cpp_token *token;
633 cppchar_t terminator;
634 {
635 cpp_buffer *buffer = pfile->buffer;
636 unsigned char *dest, *limit;
637 cppchar_t c;
638 bool warned_nulls = false;
639 #ifdef MULTIBYTE_CHARS
640 wchar_t wc;
641 int char_len;
642 #endif
643
644 dest = BUFF_FRONT (pfile->u_buff);
645 limit = BUFF_LIMIT (pfile->u_buff);
646
647 #ifdef MULTIBYTE_CHARS
648 /* Reset multibyte conversion state. */
649 (void) local_mbtowc (NULL, NULL, 0);
650 #endif
651 for (;;)
652 {
653 /* We need room for another char, possibly the terminating NUL. */
654 if ((size_t) (limit - dest) < 1)
655 {
656 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
657 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
658 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
659 limit = BUFF_LIMIT (pfile->u_buff);
660 }
661
662 #ifdef MULTIBYTE_CHARS
663 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
664 buffer->rlimit - buffer->cur);
665 if (char_len == -1)
666 {
667 cpp_error (pfile, DL_WARNING,
668 "ignoring invalid multibyte character");
669 char_len = 1;
670 c = *buffer->cur++;
671 }
672 else
673 {
674 buffer->cur += char_len;
675 c = wc;
676 }
677 #else
678 c = *buffer->cur++;
679 #endif
680
681 /* Handle trigraphs, escaped newlines etc. */
682 if (c == '?' || c == '\\')
683 c = skip_escaped_newlines (pfile);
684
685 if (c == terminator)
686 {
687 if (unescaped_terminator_p (pfile, dest))
688 break;
689 }
690 else if (is_vspace (c))
691 {
692 /* No string literal may extend over multiple lines. In
693 assembly language, suppress the error except for <>
694 includes. This is a kludge around not knowing where
695 comments are. */
696 unterminated:
697 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
698 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
699 (int) terminator);
700 buffer->cur--;
701 break;
702 }
703 else if (c == '\0')
704 {
705 if (buffer->cur - 1 == buffer->rlimit)
706 goto unterminated;
707 if (!warned_nulls)
708 {
709 warned_nulls = true;
710 cpp_error (pfile, DL_WARNING,
711 "null character(s) preserved in literal");
712 }
713 }
714 #ifdef MULTIBYTE_CHARS
715 if (char_len > 1)
716 {
717 for ( ; char_len > 0; --char_len)
718 *dest++ = (*buffer->cur - char_len);
719 }
720 else
721 #endif
722 *dest++ = c;
723 }
724
725 *dest = '\0';
726
727 token->val.str.text = BUFF_FRONT (pfile->u_buff);
728 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
729 BUFF_FRONT (pfile->u_buff) = dest + 1;
730 }
731
732 /* The stored comment includes the comment start and any terminator. */
733 static void
save_comment(pfile,token,from,type)734 save_comment (pfile, token, from, type)
735 cpp_reader *pfile;
736 cpp_token *token;
737 const unsigned char *from;
738 cppchar_t type;
739 {
740 unsigned char *buffer;
741 unsigned int len, clen;
742
743 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
744
745 /* C++ comments probably (not definitely) have moved past a new
746 line, which we don't want to save in the comment. */
747 if (is_vspace (pfile->buffer->cur[-1]))
748 len--;
749
750 /* If we are currently in a directive, then we need to store all
751 C++ comments as C comments internally, and so we need to
752 allocate a little extra space in that case.
753
754 Note that the only time we encounter a directive here is
755 when we are saving comments in a "#define". */
756 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
757
758 buffer = _cpp_unaligned_alloc (pfile, clen);
759
760 token->type = CPP_COMMENT;
761 token->val.str.len = clen;
762 token->val.str.text = buffer;
763
764 buffer[0] = '/';
765 memcpy (buffer + 1, from, len - 1);
766
767 /* Finish conversion to a C comment, if necessary. */
768 if (pfile->state.in_directive && type == '/')
769 {
770 buffer[1] = '*';
771 buffer[clen - 2] = '*';
772 buffer[clen - 1] = '/';
773 }
774 }
775
776 /* Allocate COUNT tokens for RUN. */
777 void
_cpp_init_tokenrun(run,count)778 _cpp_init_tokenrun (run, count)
779 tokenrun *run;
780 unsigned int count;
781 {
782 run->base = xnewvec (cpp_token, count);
783 run->limit = run->base + count;
784 run->next = NULL;
785 }
786
787 /* Returns the next tokenrun, or creates one if there is none. */
788 static tokenrun *
next_tokenrun(run)789 next_tokenrun (run)
790 tokenrun *run;
791 {
792 if (run->next == NULL)
793 {
794 run->next = xnew (tokenrun);
795 run->next->prev = run;
796 _cpp_init_tokenrun (run->next, 250);
797 }
798
799 return run->next;
800 }
801
802 /* Allocate a single token that is invalidated at the same time as the
803 rest of the tokens on the line. Has its line and col set to the
804 same as the last lexed token, so that diagnostics appear in the
805 right place. */
806 cpp_token *
_cpp_temp_token(pfile)807 _cpp_temp_token (pfile)
808 cpp_reader *pfile;
809 {
810 cpp_token *old, *result;
811
812 old = pfile->cur_token - 1;
813 if (pfile->cur_token == pfile->cur_run->limit)
814 {
815 pfile->cur_run = next_tokenrun (pfile->cur_run);
816 pfile->cur_token = pfile->cur_run->base;
817 }
818
819 result = pfile->cur_token++;
820 result->line = old->line;
821 result->col = old->col;
822 return result;
823 }
824
825 /* Lex a token into RESULT (external interface). Takes care of issues
826 like directive handling, token lookahead, multiple include
827 optimization and skipping. */
828 const cpp_token *
_cpp_lex_token(pfile)829 _cpp_lex_token (pfile)
830 cpp_reader *pfile;
831 {
832 cpp_token *result;
833
834 for (;;)
835 {
836 if (pfile->cur_token == pfile->cur_run->limit)
837 {
838 pfile->cur_run = next_tokenrun (pfile->cur_run);
839 pfile->cur_token = pfile->cur_run->base;
840 }
841
842 if (pfile->lookaheads)
843 {
844 pfile->lookaheads--;
845 result = pfile->cur_token++;
846 }
847 else
848 result = _cpp_lex_direct (pfile);
849
850 if (result->flags & BOL)
851 {
852 /* Is this a directive. If _cpp_handle_directive returns
853 false, it is an assembler #. */
854 if (result->type == CPP_HASH
855 /* 6.10.3 p 11: Directives in a list of macro arguments
856 gives undefined behavior. This implementation
857 handles the directive as normal. */
858 && pfile->state.parsing_args != 1
859 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
860 continue;
861 if (pfile->cb.line_change && !pfile->state.skipping)
862 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
863 }
864
865 /* We don't skip tokens in directives. */
866 if (pfile->state.in_directive)
867 break;
868
869 /* Outside a directive, invalidate controlling macros. At file
870 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
871 get here and MI optimisation works. */
872 pfile->mi_valid = false;
873
874 if (!pfile->state.skipping || result->type == CPP_EOF)
875 break;
876 }
877
878 return result;
879 }
880
881 /* A NUL terminates the current buffer. For ISO preprocessing this is
882 EOF, but for traditional preprocessing it indicates we need a line
883 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
884 to return a CPP_EOF to the caller. */
885 static bool
continue_after_nul(pfile)886 continue_after_nul (pfile)
887 cpp_reader *pfile;
888 {
889 cpp_buffer *buffer = pfile->buffer;
890 bool more = false;
891
892 buffer->saved_flags = BOL;
893 if (CPP_OPTION (pfile, traditional))
894 {
895 if (pfile->state.in_directive)
896 return false;
897
898 _cpp_remove_overlay (pfile);
899 more = _cpp_read_logical_line_trad (pfile);
900 _cpp_overlay_buffer (pfile, pfile->out.base,
901 pfile->out.cur - pfile->out.base);
902 pfile->line = pfile->out.first_line;
903 }
904 else
905 {
906 /* Stop parsing arguments with a CPP_EOF. When we finally come
907 back here, do the work of popping the buffer. */
908 if (!pfile->state.parsing_args)
909 {
910 if (buffer->cur != buffer->line_base)
911 {
912 /* Non-empty files should end in a newline. Don't warn
913 for command line and _Pragma buffers. */
914 handle_newline (pfile);
915 }
916
917 /* Similarly, finish an in-progress directive with CPP_EOF
918 before popping the buffer. */
919 if (!pfile->state.in_directive && buffer->prev)
920 {
921 more = !buffer->return_at_eof;
922 _cpp_pop_buffer (pfile);
923 }
924 }
925 }
926
927 return more;
928 }
929
930 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
931 do { \
932 if (get_effective_char (pfile) == CHAR) \
933 result->type = THEN_TYPE; \
934 else \
935 { \
936 BACKUP (); \
937 result->type = ELSE_TYPE; \
938 } \
939 } while (0)
940
941 /* Lex a token into pfile->cur_token, which is also incremented, to
942 get diagnostics pointing to the correct location.
943
944 Does not handle issues such as token lookahead, multiple-include
945 optimisation, directives, skipping etc. This function is only
946 suitable for use by _cpp_lex_token, and in special cases like
947 lex_expansion_token which doesn't care for any of these issues.
948
949 When meeting a newline, returns CPP_EOF if parsing a directive,
950 otherwise returns to the start of the token buffer if permissible.
951 Returns the location of the lexed token. */
952 cpp_token *
_cpp_lex_direct(pfile)953 _cpp_lex_direct (pfile)
954 cpp_reader *pfile;
955 {
956 cppchar_t c;
957 cpp_buffer *buffer;
958 const unsigned char *comment_start;
959 cpp_token *result = pfile->cur_token++;
960
961 fresh_line:
962 buffer = pfile->buffer;
963 result->flags = buffer->saved_flags;
964 buffer->saved_flags = 0;
965 update_tokens_line:
966 result->line = pfile->line;
967
968 skipped_white:
969 c = *buffer->cur++;
970 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
971
972 trigraph:
973 switch (c)
974 {
975 case ' ': case '\t': case '\f': case '\v': case '\0':
976 result->flags |= PREV_WHITE;
977 if (skip_whitespace (pfile, c))
978 goto skipped_white;
979
980 /* End of buffer. */
981 buffer->cur--;
982 if (continue_after_nul (pfile))
983 goto fresh_line;
984 result->type = CPP_EOF;
985 break;
986
987 case '\n': case '\r':
988 handle_newline (pfile);
989 buffer->saved_flags = BOL;
990 if (! pfile->state.in_directive)
991 {
992 if (pfile->state.parsing_args == 2)
993 buffer->saved_flags |= PREV_WHITE;
994 if (!pfile->keep_tokens)
995 {
996 pfile->cur_run = &pfile->base_run;
997 result = pfile->base_run.base;
998 pfile->cur_token = result + 1;
999 }
1000 goto fresh_line;
1001 }
1002 result->type = CPP_EOF;
1003 break;
1004
1005 case '?':
1006 case '\\':
1007 /* These could start an escaped newline, or '?' a trigraph. Let
1008 skip_escaped_newlines do all the work. */
1009 {
1010 unsigned int line = pfile->line;
1011
1012 c = skip_escaped_newlines (pfile);
1013 if (line != pfile->line)
1014 {
1015 buffer->cur--;
1016 /* We had at least one escaped newline of some sort.
1017 Update the token's line and column. */
1018 goto update_tokens_line;
1019 }
1020 }
1021
1022 /* We are either the original '?' or '\\', or a trigraph. */
1023 if (c == '?')
1024 result->type = CPP_QUERY;
1025 else if (c == '\\')
1026 goto random_char;
1027 else
1028 goto trigraph;
1029 break;
1030
1031 case '0': case '1': case '2': case '3': case '4':
1032 case '5': case '6': case '7': case '8': case '9':
1033 result->type = CPP_NUMBER;
1034 parse_number (pfile, &result->val.str, 0);
1035 break;
1036
1037 case 'L':
1038 /* 'L' may introduce wide characters or strings. */
1039 {
1040 const unsigned char *pos = buffer->cur;
1041
1042 c = get_effective_char (pfile);
1043 if (c == '\'' || c == '"')
1044 {
1045 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1046 parse_string (pfile, result, c);
1047 break;
1048 }
1049 buffer->cur = pos;
1050 }
1051 /* Fall through. */
1052
1053 start_ident:
1054 case '_':
1055 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1056 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1057 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1058 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1059 case 'y': case 'z':
1060 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1061 case 'G': case 'H': case 'I': case 'J': case 'K':
1062 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1063 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1064 case 'Y': case 'Z':
1065 result->type = CPP_NAME;
1066 result->val.node = parse_identifier (pfile);
1067
1068 /* Convert named operators to their proper types. */
1069 if (result->val.node->flags & NODE_OPERATOR)
1070 {
1071 result->flags |= NAMED_OP;
1072 result->type = result->val.node->value.operator;
1073 }
1074 break;
1075
1076 case '\'':
1077 case '"':
1078 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1079 parse_string (pfile, result, c);
1080 break;
1081
1082 case '/':
1083 /* A potential block or line comment. */
1084 comment_start = buffer->cur;
1085 c = get_effective_char (pfile);
1086
1087 if (c == '*')
1088 {
1089 if (skip_block_comment (pfile))
1090 cpp_error (pfile, DL_ERROR, "unterminated comment");
1091 }
1092 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1093 || CPP_IN_SYSTEM_HEADER (pfile)))
1094 {
1095 /* Warn about comments only if pedantically GNUC89, and not
1096 in system headers. */
1097 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1098 && ! buffer->warned_cplusplus_comments)
1099 {
1100 cpp_error (pfile, DL_PEDWARN,
1101 "C++ style comments are not allowed in ISO C90");
1102 cpp_error (pfile, DL_PEDWARN,
1103 "(this will be reported only once per input file)");
1104 buffer->warned_cplusplus_comments = 1;
1105 }
1106
1107 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1108 cpp_error (pfile, DL_WARNING, "multi-line comment");
1109 }
1110 else if (c == '=')
1111 {
1112 result->type = CPP_DIV_EQ;
1113 break;
1114 }
1115 else
1116 {
1117 BACKUP ();
1118 result->type = CPP_DIV;
1119 break;
1120 }
1121
1122 if (!pfile->state.save_comments)
1123 {
1124 result->flags |= PREV_WHITE;
1125 goto update_tokens_line;
1126 }
1127
1128 /* Save the comment as a token in its own right. */
1129 save_comment (pfile, result, comment_start, c);
1130 break;
1131
1132 case '<':
1133 if (pfile->state.angled_headers)
1134 {
1135 result->type = CPP_HEADER_NAME;
1136 parse_string (pfile, result, '>');
1137 break;
1138 }
1139
1140 c = get_effective_char (pfile);
1141 if (c == '=')
1142 result->type = CPP_LESS_EQ;
1143 else if (c == '<')
1144 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1145 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1146 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1147 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1148 {
1149 result->type = CPP_OPEN_SQUARE;
1150 result->flags |= DIGRAPH;
1151 }
1152 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1153 {
1154 result->type = CPP_OPEN_BRACE;
1155 result->flags |= DIGRAPH;
1156 }
1157 else
1158 {
1159 BACKUP ();
1160 result->type = CPP_LESS;
1161 }
1162 break;
1163
1164 case '>':
1165 c = get_effective_char (pfile);
1166 if (c == '=')
1167 result->type = CPP_GREATER_EQ;
1168 else if (c == '>')
1169 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1170 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1171 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1172 else
1173 {
1174 BACKUP ();
1175 result->type = CPP_GREATER;
1176 }
1177 break;
1178
1179 case '%':
1180 c = get_effective_char (pfile);
1181 if (c == '=')
1182 result->type = CPP_MOD_EQ;
1183 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1184 {
1185 result->flags |= DIGRAPH;
1186 result->type = CPP_HASH;
1187 if (get_effective_char (pfile) == '%')
1188 {
1189 const unsigned char *pos = buffer->cur;
1190
1191 if (get_effective_char (pfile) == ':')
1192 result->type = CPP_PASTE;
1193 else
1194 buffer->cur = pos - 1;
1195 }
1196 else
1197 BACKUP ();
1198 }
1199 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1200 {
1201 result->flags |= DIGRAPH;
1202 result->type = CPP_CLOSE_BRACE;
1203 }
1204 else
1205 {
1206 BACKUP ();
1207 result->type = CPP_MOD;
1208 }
1209 break;
1210
1211 case '.':
1212 result->type = CPP_DOT;
1213 c = get_effective_char (pfile);
1214 if (c == '.')
1215 {
1216 const unsigned char *pos = buffer->cur;
1217
1218 if (get_effective_char (pfile) == '.')
1219 result->type = CPP_ELLIPSIS;
1220 else
1221 buffer->cur = pos - 1;
1222 }
1223 /* All known character sets have 0...9 contiguous. */
1224 else if (ISDIGIT (c))
1225 {
1226 result->type = CPP_NUMBER;
1227 parse_number (pfile, &result->val.str, 1);
1228 }
1229 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1230 result->type = CPP_DOT_STAR;
1231 else
1232 BACKUP ();
1233 break;
1234
1235 case '+':
1236 c = get_effective_char (pfile);
1237 if (c == '+')
1238 result->type = CPP_PLUS_PLUS;
1239 else if (c == '=')
1240 result->type = CPP_PLUS_EQ;
1241 else
1242 {
1243 BACKUP ();
1244 result->type = CPP_PLUS;
1245 }
1246 break;
1247
1248 case '-':
1249 c = get_effective_char (pfile);
1250 if (c == '>')
1251 {
1252 result->type = CPP_DEREF;
1253 if (CPP_OPTION (pfile, cplusplus))
1254 {
1255 if (get_effective_char (pfile) == '*')
1256 result->type = CPP_DEREF_STAR;
1257 else
1258 BACKUP ();
1259 }
1260 }
1261 else if (c == '-')
1262 result->type = CPP_MINUS_MINUS;
1263 else if (c == '=')
1264 result->type = CPP_MINUS_EQ;
1265 else
1266 {
1267 BACKUP ();
1268 result->type = CPP_MINUS;
1269 }
1270 break;
1271
1272 case '&':
1273 c = get_effective_char (pfile);
1274 if (c == '&')
1275 result->type = CPP_AND_AND;
1276 else if (c == '=')
1277 result->type = CPP_AND_EQ;
1278 else
1279 {
1280 BACKUP ();
1281 result->type = CPP_AND;
1282 }
1283 break;
1284
1285 case '|':
1286 c = get_effective_char (pfile);
1287 if (c == '|')
1288 result->type = CPP_OR_OR;
1289 else if (c == '=')
1290 result->type = CPP_OR_EQ;
1291 else
1292 {
1293 BACKUP ();
1294 result->type = CPP_OR;
1295 }
1296 break;
1297
1298 case ':':
1299 c = get_effective_char (pfile);
1300 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1301 result->type = CPP_SCOPE;
1302 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1303 {
1304 result->flags |= DIGRAPH;
1305 result->type = CPP_CLOSE_SQUARE;
1306 }
1307 else
1308 {
1309 BACKUP ();
1310 result->type = CPP_COLON;
1311 }
1312 break;
1313
1314 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1315 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1316 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1317 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1318 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1319
1320 case '~': result->type = CPP_COMPL; break;
1321 case ',': result->type = CPP_COMMA; break;
1322 case '(': result->type = CPP_OPEN_PAREN; break;
1323 case ')': result->type = CPP_CLOSE_PAREN; break;
1324 case '[': result->type = CPP_OPEN_SQUARE; break;
1325 case ']': result->type = CPP_CLOSE_SQUARE; break;
1326 case '{': result->type = CPP_OPEN_BRACE; break;
1327 case '}': result->type = CPP_CLOSE_BRACE; break;
1328 case ';': result->type = CPP_SEMICOLON; break;
1329
1330 /* @ is a punctuator in Objective-C. */
1331 case '@': result->type = CPP_ATSIGN; break;
1332
1333 case '$':
1334 if (CPP_OPTION (pfile, dollars_in_ident))
1335 goto start_ident;
1336 /* Fall through... */
1337
1338 random_char:
1339 default:
1340 result->type = CPP_OTHER;
1341 result->val.c = c;
1342 break;
1343 }
1344
1345 return result;
1346 }
1347
1348 /* An upper bound on the number of bytes needed to spell TOKEN,
1349 including preceding whitespace. */
1350 unsigned int
cpp_token_len(token)1351 cpp_token_len (token)
1352 const cpp_token *token;
1353 {
1354 unsigned int len;
1355
1356 switch (TOKEN_SPELL (token))
1357 {
1358 default: len = 0; break;
1359 case SPELL_NUMBER:
1360 case SPELL_STRING: len = token->val.str.len; break;
1361 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1362 }
1363 /* 1 for whitespace, 4 for comment delimiters. */
1364 return len + 5;
1365 }
1366
1367 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1368 already contain the enough space to hold the token's spelling.
1369 Returns a pointer to the character after the last character
1370 written. */
1371 unsigned char *
cpp_spell_token(pfile,token,buffer)1372 cpp_spell_token (pfile, token, buffer)
1373 cpp_reader *pfile; /* Would be nice to be rid of this... */
1374 const cpp_token *token;
1375 unsigned char *buffer;
1376 {
1377 switch (TOKEN_SPELL (token))
1378 {
1379 case SPELL_OPERATOR:
1380 {
1381 const unsigned char *spelling;
1382 unsigned char c;
1383
1384 if (token->flags & DIGRAPH)
1385 spelling
1386 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1387 else if (token->flags & NAMED_OP)
1388 goto spell_ident;
1389 else
1390 spelling = TOKEN_NAME (token);
1391
1392 while ((c = *spelling++) != '\0')
1393 *buffer++ = c;
1394 }
1395 break;
1396
1397 case SPELL_CHAR:
1398 *buffer++ = token->val.c;
1399 break;
1400
1401 spell_ident:
1402 case SPELL_IDENT:
1403 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1404 buffer += NODE_LEN (token->val.node);
1405 break;
1406
1407 case SPELL_NUMBER:
1408 memcpy (buffer, token->val.str.text, token->val.str.len);
1409 buffer += token->val.str.len;
1410 break;
1411
1412 case SPELL_STRING:
1413 {
1414 int left, right, tag;
1415 switch (token->type)
1416 {
1417 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1418 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1419 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1420 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1421 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1422 default:
1423 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1424 TOKEN_NAME (token));
1425 return buffer;
1426 }
1427 if (tag) *buffer++ = tag;
1428 *buffer++ = left;
1429 memcpy (buffer, token->val.str.text, token->val.str.len);
1430 buffer += token->val.str.len;
1431 *buffer++ = right;
1432 }
1433 break;
1434
1435 case SPELL_NONE:
1436 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1437 break;
1438 }
1439
1440 return buffer;
1441 }
1442
1443 /* Returns TOKEN spelt as a null-terminated string. The string is
1444 freed when the reader is destroyed. Useful for diagnostics. */
1445 unsigned char *
cpp_token_as_text(pfile,token)1446 cpp_token_as_text (pfile, token)
1447 cpp_reader *pfile;
1448 const cpp_token *token;
1449 {
1450 unsigned int len = cpp_token_len (token);
1451 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1452
1453 end = cpp_spell_token (pfile, token, start);
1454 end[0] = '\0';
1455
1456 return start;
1457 }
1458
1459 /* Used by C front ends, which really should move to using
1460 cpp_token_as_text. */
1461 const char *
cpp_type2name(type)1462 cpp_type2name (type)
1463 enum cpp_ttype type;
1464 {
1465 return (const char *) token_spellings[type].name;
1466 }
1467
1468 /* Writes the spelling of token to FP, without any preceding space.
1469 Separated from cpp_spell_token for efficiency - to avoid stdio
1470 double-buffering. */
1471 void
cpp_output_token(token,fp)1472 cpp_output_token (token, fp)
1473 const cpp_token *token;
1474 FILE *fp;
1475 {
1476 switch (TOKEN_SPELL (token))
1477 {
1478 case SPELL_OPERATOR:
1479 {
1480 const unsigned char *spelling;
1481 int c;
1482
1483 if (token->flags & DIGRAPH)
1484 spelling
1485 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1486 else if (token->flags & NAMED_OP)
1487 goto spell_ident;
1488 else
1489 spelling = TOKEN_NAME (token);
1490
1491 c = *spelling;
1492 do
1493 putc (c, fp);
1494 while ((c = *++spelling) != '\0');
1495 }
1496 break;
1497
1498 case SPELL_CHAR:
1499 putc (token->val.c, fp);
1500 break;
1501
1502 spell_ident:
1503 case SPELL_IDENT:
1504 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1505 break;
1506
1507 case SPELL_NUMBER:
1508 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1509 break;
1510
1511 case SPELL_STRING:
1512 {
1513 int left, right, tag;
1514 switch (token->type)
1515 {
1516 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1517 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1518 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1519 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1520 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1521 default:
1522 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1523 return;
1524 }
1525 if (tag) putc (tag, fp);
1526 putc (left, fp);
1527 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1528 putc (right, fp);
1529 }
1530 break;
1531
1532 case SPELL_NONE:
1533 /* An error, most probably. */
1534 break;
1535 }
1536 }
1537
1538 /* Compare two tokens. */
1539 int
_cpp_equiv_tokens(a,b)1540 _cpp_equiv_tokens (a, b)
1541 const cpp_token *a, *b;
1542 {
1543 if (a->type == b->type && a->flags == b->flags)
1544 switch (TOKEN_SPELL (a))
1545 {
1546 default: /* Keep compiler happy. */
1547 case SPELL_OPERATOR:
1548 return 1;
1549 case SPELL_CHAR:
1550 return a->val.c == b->val.c; /* Character. */
1551 case SPELL_NONE:
1552 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1553 case SPELL_IDENT:
1554 return a->val.node == b->val.node;
1555 case SPELL_NUMBER:
1556 case SPELL_STRING:
1557 return (a->val.str.len == b->val.str.len
1558 && !memcmp (a->val.str.text, b->val.str.text,
1559 a->val.str.len));
1560 }
1561
1562 return 0;
1563 }
1564
1565 /* Returns nonzero if a space should be inserted to avoid an
1566 accidental token paste for output. For simplicity, it is
1567 conservative, and occasionally advises a space where one is not
1568 needed, e.g. "." and ".2". */
1569 int
cpp_avoid_paste(pfile,token1,token2)1570 cpp_avoid_paste (pfile, token1, token2)
1571 cpp_reader *pfile;
1572 const cpp_token *token1, *token2;
1573 {
1574 enum cpp_ttype a = token1->type, b = token2->type;
1575 cppchar_t c;
1576
1577 if (token1->flags & NAMED_OP)
1578 a = CPP_NAME;
1579 if (token2->flags & NAMED_OP)
1580 b = CPP_NAME;
1581
1582 c = EOF;
1583 if (token2->flags & DIGRAPH)
1584 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1585 else if (token_spellings[b].category == SPELL_OPERATOR)
1586 c = token_spellings[b].name[0];
1587
1588 /* Quickly get everything that can paste with an '='. */
1589 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1590 return 1;
1591
1592 switch (a)
1593 {
1594 case CPP_GREATER: return c == '>' || c == '?';
1595 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1596 case CPP_PLUS: return c == '+';
1597 case CPP_MINUS: return c == '-' || c == '>';
1598 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1599 case CPP_MOD: return c == ':' || c == '>';
1600 case CPP_AND: return c == '&';
1601 case CPP_OR: return c == '|';
1602 case CPP_COLON: return c == ':' || c == '>';
1603 case CPP_DEREF: return c == '*';
1604 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1605 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1606 case CPP_NAME: return ((b == CPP_NUMBER
1607 && name_p (pfile, &token2->val.str))
1608 || b == CPP_NAME
1609 || b == CPP_CHAR || b == CPP_STRING); /* L */
1610 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1611 || c == '.' || c == '+' || c == '-');
1612 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1613 && token1->val.c == '@'
1614 && (b == CPP_NAME || b == CPP_STRING));
1615 default: break;
1616 }
1617
1618 return 0;
1619 }
1620
1621 /* Output all the remaining tokens on the current line, and a newline
1622 character, to FP. Leading whitespace is removed. If there are
1623 macros, special token padding is not performed. */
1624 void
cpp_output_line(pfile,fp)1625 cpp_output_line (pfile, fp)
1626 cpp_reader *pfile;
1627 FILE *fp;
1628 {
1629 const cpp_token *token;
1630
1631 token = cpp_get_token (pfile);
1632 while (token->type != CPP_EOF)
1633 {
1634 cpp_output_token (token, fp);
1635 token = cpp_get_token (pfile);
1636 if (token->flags & PREV_WHITE)
1637 putc (' ', fp);
1638 }
1639
1640 putc ('\n', fp);
1641 }
1642
1643 /* Returns the value of a hexadecimal digit. */
1644 static unsigned int
hex_digit_value(c)1645 hex_digit_value (c)
1646 unsigned int c;
1647 {
1648 if (hex_p (c))
1649 return hex_value (c);
1650 else
1651 abort ();
1652 }
1653
1654 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1655 failure if cpplib is not parsing C++ or C99. Such failure is
1656 silent, and no variables are updated. Otherwise returns 0, and
1657 warns if -Wtraditional.
1658
1659 [lex.charset]: The character designated by the universal character
1660 name \UNNNNNNNN is that character whose character short name in
1661 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1662 universal character name \uNNNN is that character whose character
1663 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1664 for a universal character name is less than 0x20 or in the range
1665 0x7F-0x9F (inclusive), or if the universal character name
1666 designates a character in the basic source character set, then the
1667 program is ill-formed.
1668
1669 We assume that wchar_t is Unicode, so we don't need to do any
1670 mapping. Is this ever wrong?
1671
1672 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1673 LIMIT is the end of the string or charconst. PSTR is updated to
1674 point after the UCS on return, and the UCS is written into PC. */
1675
1676 static int
maybe_read_ucs(pfile,pstr,limit,pc)1677 maybe_read_ucs (pfile, pstr, limit, pc)
1678 cpp_reader *pfile;
1679 const unsigned char **pstr;
1680 const unsigned char *limit;
1681 cppchar_t *pc;
1682 {
1683 const unsigned char *p = *pstr;
1684 unsigned int code = 0;
1685 unsigned int c = *pc, length;
1686
1687 /* Only attempt to interpret a UCS for C++ and C99. */
1688 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1689 return 1;
1690
1691 if (CPP_WTRADITIONAL (pfile))
1692 cpp_error (pfile, DL_WARNING,
1693 "the meaning of '\\%c' is different in traditional C", c);
1694
1695 length = (c == 'u' ? 4: 8);
1696
1697 if ((size_t) (limit - p) < length)
1698 {
1699 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1700 /* Skip to the end to avoid more diagnostics. */
1701 p = limit;
1702 }
1703 else
1704 {
1705 for (; length; length--, p++)
1706 {
1707 c = *p;
1708 if (ISXDIGIT (c))
1709 code = (code << 4) + hex_digit_value (c);
1710 else
1711 {
1712 cpp_error (pfile, DL_ERROR,
1713 "non-hex digit '%c' in universal-character-name", c);
1714 /* We shouldn't skip in case there are multibyte chars. */
1715 break;
1716 }
1717 }
1718 }
1719
1720 #ifdef TARGET_EBCDIC
1721 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1722 code = 0x3f; /* EBCDIC invalid character */
1723 #else
1724 /* True extended characters are OK. */
1725 if (code >= 0xa0
1726 && !(code & 0x80000000)
1727 && !(code >= 0xD800 && code <= 0xDFFF))
1728 ;
1729 /* The standard permits $, @ and ` to be specified as UCNs. We use
1730 hex escapes so that this also works with EBCDIC hosts. */
1731 else if (code == 0x24 || code == 0x40 || code == 0x60)
1732 ;
1733 /* Don't give another error if one occurred above. */
1734 else if (length == 0)
1735 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1736 #endif
1737
1738 *pstr = p;
1739 *pc = code;
1740 return 0;
1741 }
1742
1743 /* Returns the value of an escape sequence, truncated to the correct
1744 target precision. PSTR points to the input pointer, which is just
1745 after the backslash. LIMIT is how much text we have. WIDE is true
1746 if the escape sequence is part of a wide character constant or
1747 string literal. Handles all relevant diagnostics. */
1748 cppchar_t
cpp_parse_escape(pfile,pstr,limit,wide)1749 cpp_parse_escape (pfile, pstr, limit, wide)
1750 cpp_reader *pfile;
1751 const unsigned char **pstr;
1752 const unsigned char *limit;
1753 int wide;
1754 {
1755 int unknown = 0;
1756 const unsigned char *str = *pstr;
1757 cppchar_t c, mask;
1758 unsigned int width;
1759
1760 if (wide)
1761 width = CPP_OPTION (pfile, wchar_precision);
1762 else
1763 width = CPP_OPTION (pfile, char_precision);
1764 if (width < BITS_PER_CPPCHAR_T)
1765 mask = ((cppchar_t) 1 << width) - 1;
1766 else
1767 mask = ~0;
1768
1769 c = *str++;
1770 switch (c)
1771 {
1772 case '\\': case '\'': case '"': case '?': break;
1773 case 'b': c = TARGET_BS; break;
1774 case 'f': c = TARGET_FF; break;
1775 case 'n': c = TARGET_NEWLINE; break;
1776 case 'r': c = TARGET_CR; break;
1777 case 't': c = TARGET_TAB; break;
1778 case 'v': c = TARGET_VT; break;
1779
1780 case '(': case '{': case '[': case '%':
1781 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1782 '\%' is used to prevent SCCS from getting confused. */
1783 unknown = CPP_PEDANTIC (pfile);
1784 break;
1785
1786 case 'a':
1787 if (CPP_WTRADITIONAL (pfile))
1788 cpp_error (pfile, DL_WARNING,
1789 "the meaning of '\\a' is different in traditional C");
1790 c = TARGET_BELL;
1791 break;
1792
1793 case 'e': case 'E':
1794 if (CPP_PEDANTIC (pfile))
1795 cpp_error (pfile, DL_PEDWARN,
1796 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1797 c = TARGET_ESC;
1798 break;
1799
1800 case 'u': case 'U':
1801 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1802 break;
1803
1804 case 'x':
1805 if (CPP_WTRADITIONAL (pfile))
1806 cpp_error (pfile, DL_WARNING,
1807 "the meaning of '\\x' is different in traditional C");
1808
1809 {
1810 cppchar_t i = 0, overflow = 0;
1811 int digits_found = 0;
1812
1813 while (str < limit)
1814 {
1815 c = *str;
1816 if (! ISXDIGIT (c))
1817 break;
1818 str++;
1819 overflow |= i ^ (i << 4 >> 4);
1820 i = (i << 4) + hex_digit_value (c);
1821 digits_found = 1;
1822 }
1823
1824 if (!digits_found)
1825 cpp_error (pfile, DL_ERROR,
1826 "\\x used with no following hex digits");
1827
1828 if (overflow | (i != (i & mask)))
1829 {
1830 cpp_error (pfile, DL_PEDWARN,
1831 "hex escape sequence out of range");
1832 i &= mask;
1833 }
1834 c = i;
1835 }
1836 break;
1837
1838 case '0': case '1': case '2': case '3':
1839 case '4': case '5': case '6': case '7':
1840 {
1841 size_t count = 0;
1842 cppchar_t i = c - '0';
1843
1844 while (str < limit && ++count < 3)
1845 {
1846 c = *str;
1847 if (c < '0' || c > '7')
1848 break;
1849 str++;
1850 i = (i << 3) + c - '0';
1851 }
1852
1853 if (i != (i & mask))
1854 {
1855 cpp_error (pfile, DL_PEDWARN,
1856 "octal escape sequence out of range");
1857 i &= mask;
1858 }
1859 c = i;
1860 }
1861 break;
1862
1863 default:
1864 unknown = 1;
1865 break;
1866 }
1867
1868 if (unknown)
1869 {
1870 if (ISGRAPH (c))
1871 cpp_error (pfile, DL_PEDWARN,
1872 "unknown escape sequence '\\%c'", (int) c);
1873 else
1874 cpp_error (pfile, DL_PEDWARN,
1875 "unknown escape sequence: '\\%03o'", (int) c);
1876 }
1877
1878 if (c > mask)
1879 {
1880 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1881 c &= mask;
1882 }
1883
1884 *pstr = str;
1885 return c;
1886 }
1887
1888 /* Interpret a (possibly wide) character constant in TOKEN.
1889 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1890 points to a variable that is filled in with the number of
1891 characters seen, and UNSIGNEDP to a variable that indicates whether
1892 the result has signed type. */
1893 cppchar_t
cpp_interpret_charconst(pfile,token,pchars_seen,unsignedp)1894 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1895 cpp_reader *pfile;
1896 const cpp_token *token;
1897 unsigned int *pchars_seen;
1898 int *unsignedp;
1899 {
1900 const unsigned char *str = token->val.str.text;
1901 const unsigned char *limit = str + token->val.str.len;
1902 unsigned int chars_seen = 0;
1903 size_t width, max_chars;
1904 cppchar_t c, mask, result = 0;
1905 bool unsigned_p;
1906
1907 #ifdef MULTIBYTE_CHARS
1908 (void) local_mbtowc (NULL, NULL, 0);
1909 #endif
1910
1911 /* Width in bits. */
1912 if (token->type == CPP_CHAR)
1913 {
1914 width = CPP_OPTION (pfile, char_precision);
1915 max_chars = CPP_OPTION (pfile, int_precision) / width;
1916 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1917 }
1918 else
1919 {
1920 width = CPP_OPTION (pfile, wchar_precision);
1921 max_chars = 1;
1922 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1923 }
1924
1925 if (width < BITS_PER_CPPCHAR_T)
1926 mask = ((cppchar_t) 1 << width) - 1;
1927 else
1928 mask = ~0;
1929
1930 while (str < limit)
1931 {
1932 #ifdef MULTIBYTE_CHARS
1933 wchar_t wc;
1934 int char_len;
1935
1936 char_len = local_mbtowc (&wc, str, limit - str);
1937 if (char_len == -1)
1938 {
1939 cpp_error (pfile, DL_WARNING,
1940 "ignoring invalid multibyte character");
1941 c = *str++;
1942 }
1943 else
1944 {
1945 str += char_len;
1946 c = wc;
1947 }
1948 #else
1949 c = *str++;
1950 #endif
1951
1952 if (c == '\\')
1953 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1954
1955 #ifdef MAP_CHARACTER
1956 if (ISPRINT (c))
1957 c = MAP_CHARACTER (c);
1958 #endif
1959
1960 chars_seen++;
1961
1962 /* Truncate the character, scale the result and merge the two. */
1963 c &= mask;
1964 if (width < BITS_PER_CPPCHAR_T)
1965 result = (result << width) | c;
1966 else
1967 result = c;
1968 }
1969
1970 if (chars_seen == 0)
1971 cpp_error (pfile, DL_ERROR, "empty character constant");
1972 else if (chars_seen > 1)
1973 {
1974 /* Multichar charconsts are of type int and therefore signed. */
1975 unsigned_p = 0;
1976
1977 if (chars_seen > max_chars)
1978 {
1979 chars_seen = max_chars;
1980 cpp_error (pfile, DL_WARNING,
1981 "character constant too long for its type");
1982 }
1983 else if (CPP_OPTION (pfile, warn_multichar))
1984 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1985 }
1986
1987 /* Sign-extend or truncate the constant to cppchar_t. The value is
1988 in WIDTH bits, but for multi-char charconsts it's value is the
1989 full target type's width. */
1990 if (chars_seen > 1)
1991 width *= max_chars;
1992 if (width < BITS_PER_CPPCHAR_T)
1993 {
1994 mask = ((cppchar_t) 1 << width) - 1;
1995 if (unsigned_p || !(result & (1 << (width - 1))))
1996 result &= mask;
1997 else
1998 result |= ~mask;
1999 }
2000
2001 *pchars_seen = chars_seen;
2002 *unsignedp = unsigned_p;
2003 return result;
2004 }
2005
2006 /* Memory buffers. Changing these three constants can have a dramatic
2007 effect on performance. The values here are reasonable defaults,
2008 but might be tuned. If you adjust them, be sure to test across a
2009 range of uses of cpplib, including heavy nested function-like macro
2010 expansion. Also check the change in peak memory usage (NJAMD is a
2011 good tool for this). */
2012 #define MIN_BUFF_SIZE 8000
2013 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2014 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2015 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2016
2017 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2018 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2019 #endif
2020
2021 /* Create a new allocation buffer. Place the control block at the end
2022 of the buffer, so that buffer overflows will cause immediate chaos. */
2023 static _cpp_buff *
new_buff(len)2024 new_buff (len)
2025 size_t len;
2026 {
2027 _cpp_buff *result;
2028 unsigned char *base;
2029
2030 if (len < MIN_BUFF_SIZE)
2031 len = MIN_BUFF_SIZE;
2032 len = CPP_ALIGN (len);
2033
2034 base = xmalloc (len + sizeof (_cpp_buff));
2035 result = (_cpp_buff *) (base + len);
2036 result->base = base;
2037 result->cur = base;
2038 result->limit = base + len;
2039 result->next = NULL;
2040 return result;
2041 }
2042
2043 /* Place a chain of unwanted allocation buffers on the free list. */
2044 void
_cpp_release_buff(pfile,buff)2045 _cpp_release_buff (pfile, buff)
2046 cpp_reader *pfile;
2047 _cpp_buff *buff;
2048 {
2049 _cpp_buff *end = buff;
2050
2051 while (end->next)
2052 end = end->next;
2053 end->next = pfile->free_buffs;
2054 pfile->free_buffs = buff;
2055 }
2056
2057 /* Return a free buffer of size at least MIN_SIZE. */
2058 _cpp_buff *
_cpp_get_buff(pfile,min_size)2059 _cpp_get_buff (pfile, min_size)
2060 cpp_reader *pfile;
2061 size_t min_size;
2062 {
2063 _cpp_buff *result, **p;
2064
2065 for (p = &pfile->free_buffs;; p = &(*p)->next)
2066 {
2067 size_t size;
2068
2069 if (*p == NULL)
2070 return new_buff (min_size);
2071 result = *p;
2072 size = result->limit - result->base;
2073 /* Return a buffer that's big enough, but don't waste one that's
2074 way too big. */
2075 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2076 break;
2077 }
2078
2079 *p = result->next;
2080 result->next = NULL;
2081 result->cur = result->base;
2082 return result;
2083 }
2084
2085 /* Creates a new buffer with enough space to hold the uncommitted
2086 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2087 the excess bytes to the new buffer. Chains the new buffer after
2088 BUFF, and returns the new buffer. */
2089 _cpp_buff *
_cpp_append_extend_buff(pfile,buff,min_extra)2090 _cpp_append_extend_buff (pfile, buff, min_extra)
2091 cpp_reader *pfile;
2092 _cpp_buff *buff;
2093 size_t min_extra;
2094 {
2095 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2096 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2097
2098 buff->next = new_buff;
2099 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2100 return new_buff;
2101 }
2102
2103 /* Creates a new buffer with enough space to hold the uncommitted
2104 remaining bytes of the buffer pointed to by BUFF, and at least
2105 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2106 Chains the new buffer before the buffer pointed to by BUFF, and
2107 updates the pointer to point to the new buffer. */
2108 void
_cpp_extend_buff(pfile,pbuff,min_extra)2109 _cpp_extend_buff (pfile, pbuff, min_extra)
2110 cpp_reader *pfile;
2111 _cpp_buff **pbuff;
2112 size_t min_extra;
2113 {
2114 _cpp_buff *new_buff, *old_buff = *pbuff;
2115 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2116
2117 new_buff = _cpp_get_buff (pfile, size);
2118 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2119 new_buff->next = old_buff;
2120 *pbuff = new_buff;
2121 }
2122
2123 /* Free a chain of buffers starting at BUFF. */
2124 void
_cpp_free_buff(buff)2125 _cpp_free_buff (buff)
2126 _cpp_buff *buff;
2127 {
2128 _cpp_buff *next;
2129
2130 for (; buff; buff = next)
2131 {
2132 next = buff->next;
2133 free (buff->base);
2134 }
2135 }
2136
2137 /* Allocate permanent, unaligned storage of length LEN. */
2138 unsigned char *
_cpp_unaligned_alloc(pfile,len)2139 _cpp_unaligned_alloc (pfile, len)
2140 cpp_reader *pfile;
2141 size_t len;
2142 {
2143 _cpp_buff *buff = pfile->u_buff;
2144 unsigned char *result = buff->cur;
2145
2146 if (len > (size_t) (buff->limit - result))
2147 {
2148 buff = _cpp_get_buff (pfile, len);
2149 buff->next = pfile->u_buff;
2150 pfile->u_buff = buff;
2151 result = buff->cur;
2152 }
2153
2154 buff->cur = result + len;
2155 return result;
2156 }
2157
2158 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2159 That buffer is used for growing allocations when saving macro
2160 replacement lists in a #define, and when parsing an answer to an
2161 assertion in #assert, #unassert or #if (and therefore possibly
2162 whilst expanding macros). It therefore must not be used by any
2163 code that they might call: specifically the lexer and the guts of
2164 the macro expander.
2165
2166 All existing other uses clearly fit this restriction: storing
2167 registered pragmas during initialization. */
2168 unsigned char *
_cpp_aligned_alloc(pfile,len)2169 _cpp_aligned_alloc (pfile, len)
2170 cpp_reader *pfile;
2171 size_t len;
2172 {
2173 _cpp_buff *buff = pfile->a_buff;
2174 unsigned char *result = buff->cur;
2175
2176 if (len > (size_t) (buff->limit - result))
2177 {
2178 buff = _cpp_get_buff (pfile, len);
2179 buff->next = pfile->a_buff;
2180 pfile->a_buff = buff;
2181 result = buff->cur;
2182 }
2183
2184 buff->cur = result + len;
2185 return result;
2186 }
2187