xref: /openbsd/gnu/usr.bin/gcc/gcc/cpplex.c (revision 404b540a)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7    Single-pass line tokenization by Neil Booth, April 2000
8 
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "cpphash.h"
27 
28 #ifdef MULTIBYTE_CHARS
29 #include "mbchar.h"
30 #include <locale.h>
31 #endif
32 
33 /* Tokens with SPELL_STRING store their spelling in the token list,
34    and it's length in the token->val.name.len.  */
35 enum spell_type
36 {
37   SPELL_OPERATOR = 0,
38   SPELL_CHAR,
39   SPELL_IDENT,
40   SPELL_NUMBER,
41   SPELL_STRING,
42   SPELL_NONE
43 };
44 
45 struct token_spelling
46 {
47   enum spell_type category;
48   const unsigned char *name;
49 };
50 
51 static const unsigned char *const digraph_spellings[] =
52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
53 
54 #define OP(e, s) { SPELL_OPERATOR, U s           },
55 #define TK(e, s) { s,              U STRINGX (e) },
56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
57 #undef OP
58 #undef TK
59 
60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
63 
64 static void handle_newline PARAMS ((cpp_reader *));
65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
67 
68 static int skip_block_comment PARAMS ((cpp_reader *));
69 static int skip_line_comment PARAMS ((cpp_reader *));
70 static void adjust_column PARAMS ((cpp_reader *));
71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
74 				  unsigned int *));
75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
78 static bool trigraph_p PARAMS ((cpp_reader *));
79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
80 				  cppchar_t));
81 static bool continue_after_nul PARAMS ((cpp_reader *));
82 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
83 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
84 				   const unsigned char *, cppchar_t *));
85 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
86 
87 static unsigned int hex_digit_value PARAMS ((unsigned int));
88 static _cpp_buff *new_buff PARAMS ((size_t));
89 
90 /* Utility routine:
91 
92    Compares, the token TOKEN to the NUL-terminated string STRING.
93    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
94 int
95 cpp_ideq (token, string)
96      const cpp_token *token;
97      const char *string;
98 {
99   if (token->type != CPP_NAME)
100     return 0;
101 
102   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
103 }
104 
105 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
106    Returns with buffer->cur pointing to the character immediately
107    following the newline (combination).  */
108 static void
109 handle_newline (pfile)
110      cpp_reader *pfile;
111 {
112   cpp_buffer *buffer = pfile->buffer;
113 
114   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
115      only accept CR-LF; maybe we should fall back to that behavior?  */
116   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
117     buffer->cur++;
118 
119   buffer->line_base = buffer->cur;
120   buffer->col_adjust = 0;
121   pfile->line++;
122 }
123 
124 /* Subroutine of skip_escaped_newlines; called when a 3-character
125    sequence beginning with "??" is encountered.  buffer->cur points to
126    the second '?'.
127 
128    Warn if necessary, and returns true if the sequence forms a
129    trigraph and the trigraph should be honored.  */
130 static bool
131 trigraph_p (pfile)
132      cpp_reader *pfile;
133 {
134   cpp_buffer *buffer = pfile->buffer;
135   cppchar_t from_char = buffer->cur[1];
136   bool accept;
137 
138   if (!_cpp_trigraph_map[from_char])
139     return false;
140 
141   accept = CPP_OPTION (pfile, trigraphs);
142 
143   /* Don't warn about trigraphs in comments.  */
144   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
145     {
146       if (accept)
147 	cpp_error_with_line (pfile, DL_WARNING,
148 			     pfile->line, CPP_BUF_COL (buffer) - 1,
149 			     "trigraph ??%c converted to %c",
150 			     (int) from_char,
151 			     (int) _cpp_trigraph_map[from_char]);
152       else if (buffer->cur != buffer->last_Wtrigraphs)
153 	{
154 	  buffer->last_Wtrigraphs = buffer->cur;
155 	  cpp_error_with_line (pfile, DL_WARNING,
156 			       pfile->line, CPP_BUF_COL (buffer) - 1,
157 			       "trigraph ??%c ignored", (int) from_char);
158 	}
159     }
160 
161   return accept;
162 }
163 
164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
165    lie in buffer->cur[-1].  Returns the next byte, which will be in
166    buffer->cur[-1].  This routine performs preprocessing stages 1 and
167    2 of the ISO C standard.  */
168 static cppchar_t
169 skip_escaped_newlines (pfile)
170      cpp_reader *pfile;
171 {
172   cpp_buffer *buffer = pfile->buffer;
173   cppchar_t next = buffer->cur[-1];
174 
175   /* Only do this if we apply stages 1 and 2.  */
176   if (!buffer->from_stage3)
177     {
178       const unsigned char *saved_cur;
179       cppchar_t next1;
180 
181       do
182 	{
183 	  if (next == '?')
184 	    {
185 	      if (buffer->cur[0] != '?' || !trigraph_p (pfile))
186 		break;
187 
188 	      /* Translate the trigraph.  */
189 	      next = _cpp_trigraph_map[buffer->cur[1]];
190 	      buffer->cur += 2;
191 	      if (next != '\\')
192 		break;
193 	    }
194 
195 	  if (buffer->cur == buffer->rlimit)
196 	    break;
197 
198 	  /* We have a backslash, and room for at least one more
199 	     character.  Skip horizontal whitespace.  */
200 	  saved_cur = buffer->cur;
201 	  do
202 	    next1 = *buffer->cur++;
203 	  while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
204 
205 	  if (!is_vspace (next1))
206 	    {
207 	      buffer->cur = saved_cur;
208 	      break;
209 	    }
210 
211 	  if (saved_cur != buffer->cur - 1
212 	      && !pfile->state.lexing_comment)
213 	    cpp_error (pfile, DL_WARNING,
214 		       "backslash and newline separated by space");
215 
216 	  handle_newline (pfile);
217 	  buffer->backup_to = buffer->cur;
218 	  if (buffer->cur == buffer->rlimit)
219 	    {
220 	      cpp_error (pfile, DL_PEDWARN,
221 			 "backslash-newline at end of file");
222 	      next = EOF;
223 	    }
224 	  else
225 	    next = *buffer->cur++;
226 	}
227       while (next == '\\' || next == '?');
228     }
229 
230   return next;
231 }
232 
233 /* Obtain the next character, after trigraph conversion and skipping
234    an arbitrarily long string of escaped newlines.  The common case of
235    no trigraphs or escaped newlines falls through quickly.  On return,
236    buffer->backup_to points to where to return to if the character is
237    not to be processed.  */
238 static cppchar_t
239 get_effective_char (pfile)
240      cpp_reader *pfile;
241 {
242   cppchar_t next;
243   cpp_buffer *buffer = pfile->buffer;
244 
245   buffer->backup_to = buffer->cur;
246   next = *buffer->cur++;
247   if (__builtin_expect (next == '?' || next == '\\', 0))
248     next = skip_escaped_newlines (pfile);
249 
250   return next;
251 }
252 
253 /* Skip a C-style block comment.  We find the end of the comment by
254    seeing if an asterisk is before every '/' we encounter.  Returns
255    nonzero if comment terminated by EOF, zero otherwise.  */
256 static int
257 skip_block_comment (pfile)
258      cpp_reader *pfile;
259 {
260   cpp_buffer *buffer = pfile->buffer;
261   cppchar_t c = EOF, prevc = EOF;
262 
263   pfile->state.lexing_comment = 1;
264   while (buffer->cur != buffer->rlimit)
265     {
266       prevc = c, c = *buffer->cur++;
267 
268       /* FIXME: For speed, create a new character class of characters
269 	 of interest inside block comments.  */
270       if (c == '?' || c == '\\')
271 	c = skip_escaped_newlines (pfile);
272 
273       /* People like decorating comments with '*', so check for '/'
274 	 instead for efficiency.  */
275       if (c == '/')
276 	{
277 	  if (prevc == '*')
278 	    break;
279 
280 	  /* Warn about potential nested comments, but not if the '/'
281 	     comes immediately before the true comment delimiter.
282 	     Don't bother to get it right across escaped newlines.  */
283 	  if (CPP_OPTION (pfile, warn_comments)
284 	      && buffer->cur[0] == '*' && buffer->cur[1] != '/')
285 	    cpp_error_with_line (pfile, DL_WARNING,
286 				 pfile->line, CPP_BUF_COL (buffer),
287 				 "\"/*\" within comment");
288 	}
289       else if (is_vspace (c))
290 	handle_newline (pfile);
291       else if (c == '\t')
292 	adjust_column (pfile);
293     }
294 
295   pfile->state.lexing_comment = 0;
296   return c != '/' || prevc != '*';
297 }
298 
299 /* Skip a C++ line comment, leaving buffer->cur pointing to the
300    terminating newline.  Handles escaped newlines.  Returns nonzero
301    if a multiline comment.  */
302 static int
303 skip_line_comment (pfile)
304      cpp_reader *pfile;
305 {
306   cpp_buffer *buffer = pfile->buffer;
307   unsigned int orig_line = pfile->line;
308   cppchar_t c;
309 #ifdef MULTIBYTE_CHARS
310   wchar_t wc;
311   int char_len;
312 #endif
313 
314   pfile->state.lexing_comment = 1;
315 #ifdef MULTIBYTE_CHARS
316   /* Reset multibyte conversion state.  */
317   (void) local_mbtowc (NULL, NULL, 0);
318 #endif
319   do
320     {
321       if (buffer->cur == buffer->rlimit)
322 	goto at_eof;
323 
324 #ifdef MULTIBYTE_CHARS
325       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
326 			       buffer->rlimit - buffer->cur);
327       if (char_len == -1)
328 	{
329 	  cpp_error (pfile, DL_WARNING,
330 		     "ignoring invalid multibyte character");
331 	  char_len = 1;
332 	  c = *buffer->cur++;
333 	}
334       else
335 	{
336 	  buffer->cur += char_len;
337 	  c = wc;
338 	}
339 #else
340       c = *buffer->cur++;
341 #endif
342       if (c == '?' || c == '\\')
343 	c = skip_escaped_newlines (pfile);
344     }
345   while (!is_vspace (c));
346 
347   /* Step back over the newline, except at EOF.  */
348   buffer->cur--;
349  at_eof:
350 
351   pfile->state.lexing_comment = 0;
352   return orig_line != pfile->line;
353 }
354 
355 /* pfile->buffer->cur is one beyond the \t character.  Update
356    col_adjust so we track the column correctly.  */
357 static void
358 adjust_column (pfile)
359      cpp_reader *pfile;
360 {
361   cpp_buffer *buffer = pfile->buffer;
362   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
363 
364   /* Round it up to multiple of the tabstop, but subtract 1 since the
365      tab itself occupies a character position.  */
366   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
367 			 - col % CPP_OPTION (pfile, tabstop)) - 1;
368 }
369 
370 /* Skips whitespace, saving the next non-whitespace character.
371    Adjusts pfile->col_adjust to account for tabs.  Without this,
372    tokens might be assigned an incorrect column.  */
373 static int
374 skip_whitespace (pfile, c)
375      cpp_reader *pfile;
376      cppchar_t c;
377 {
378   cpp_buffer *buffer = pfile->buffer;
379   unsigned int warned = 0;
380 
381   do
382     {
383       /* Horizontal space always OK.  */
384       if (c == ' ')
385 	;
386       else if (c == '\t')
387 	adjust_column (pfile);
388       /* Just \f \v or \0 left.  */
389       else if (c == '\0')
390 	{
391 	  if (buffer->cur - 1 == buffer->rlimit)
392 	    return 0;
393 	  if (!warned)
394 	    {
395 	      cpp_error (pfile, DL_WARNING, "null character(s) ignored");
396 	      warned = 1;
397 	    }
398 	}
399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400 	cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
401 			     CPP_BUF_COL (buffer),
402 			     "%s in preprocessing directive",
403 			     c == '\f' ? "form feed" : "vertical tab");
404 
405       c = *buffer->cur++;
406     }
407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408   while (is_nvspace (c));
409 
410   buffer->cur--;
411   return 1;
412 }
413 
414 /* See if the characters of a number token are valid in a name (no
415    '.', '+' or '-').  */
416 static int
417 name_p (pfile, string)
418      cpp_reader *pfile;
419      const cpp_string *string;
420 {
421   unsigned int i;
422 
423   for (i = 0; i < string->len; i++)
424     if (!is_idchar (string->text[i]))
425       return 0;
426 
427   return 1;
428 }
429 
430 /* Parse an identifier, skipping embedded backslash-newlines.  This is
431    a critical inner loop.  The common case is an identifier which has
432    not been split by backslash-newline, does not contain a dollar
433    sign, and has already been scanned (roughly 10:1 ratio of
434    seen:unseen identifiers in normal code; the distribution is
435    Poisson-like).  Second most common case is a new identifier, not
436    split and no dollar sign.  The other possibilities are rare and
437    have been relegated to parse_slow.  */
438 static cpp_hashnode *
439 parse_identifier (pfile)
440      cpp_reader *pfile;
441 {
442   cpp_hashnode *result;
443   const uchar *cur, *base;
444 
445   /* Fast-path loop.  Skim over a normal identifier.
446      N.B. ISIDNUM does not include $.  */
447   cur = pfile->buffer->cur;
448   while (ISIDNUM (*cur))
449     cur++;
450 
451   /* Check for slow-path cases.  */
452   if (*cur == '?' || *cur == '\\' || *cur == '$')
453     {
454       unsigned int len;
455 
456       base = parse_slow (pfile, cur, 0, &len);
457       result = (cpp_hashnode *)
458 	ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
459     }
460   else
461     {
462       base = pfile->buffer->cur - 1;
463       pfile->buffer->cur = cur;
464       result = (cpp_hashnode *)
465 	ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
466     }
467 
468   /* Rarely, identifiers require diagnostics when lexed.
469      XXX Has to be forced out of the fast path.  */
470   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
471 			&& !pfile->state.skipping, 0))
472     {
473       /* It is allowed to poison the same identifier twice.  */
474       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
475 	cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
476 		   NODE_NAME (result));
477 
478       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479 	 replacement list of a variadic macro.  */
480       if (result == pfile->spec_nodes.n__VA_ARGS__
481 	  && !pfile->state.va_args_ok)
482 	cpp_error (pfile, DL_PEDWARN,
483 	"__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
484     }
485 
486   return result;
487 }
488 
489 /* Slow path.  This handles numbers and identifiers which have been
490    split, or contain dollar signs.  The part of the token from
491    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
492    1 if it's a number, and 2 if it has a leading period.  Returns a
493    pointer to the token's NUL-terminated spelling in permanent
494    storage, and sets PLEN to its length.  */
495 static uchar *
496 parse_slow (pfile, cur, number_p, plen)
497      cpp_reader *pfile;
498      const uchar *cur;
499      int number_p;
500      unsigned int *plen;
501 {
502   cpp_buffer *buffer = pfile->buffer;
503   const uchar *base = buffer->cur - 1;
504   struct obstack *stack = &pfile->hash_table->stack;
505   unsigned int c, prevc, saw_dollar = 0;
506 
507   /* Place any leading period.  */
508   if (number_p == 2)
509     obstack_1grow (stack, '.');
510 
511   /* Copy the part of the token which is known to be okay.  */
512   obstack_grow (stack, base, cur - base);
513 
514   /* Now process the part which isn't.  We are looking at one of
515      '$', '\\', or '?' on entry to this loop.  */
516   prevc = cur[-1];
517   c = *cur++;
518   buffer->cur = cur;
519   for (;;)
520     {
521       /* Potential escaped newline?  */
522       buffer->backup_to = buffer->cur - 1;
523       if (c == '?' || c == '\\')
524 	c = skip_escaped_newlines (pfile);
525 
526       if (!is_idchar (c))
527 	{
528 	  if (!number_p)
529 	    break;
530 	  if (c != '.' && !VALID_SIGN (c, prevc))
531 	    break;
532 	}
533 
534       /* Handle normal identifier characters in this loop.  */
535       do
536 	{
537 	  prevc = c;
538 	  obstack_1grow (stack, c);
539 
540 	  if (c == '$')
541 	    saw_dollar++;
542 
543 	  c = *buffer->cur++;
544 	}
545       while (is_idchar (c));
546     }
547 
548   /* Step back over the unwanted char.  */
549   BACKUP ();
550 
551   /* $ is not an identifier character in the standard, but is commonly
552      accepted as an extension.  Don't warn about it in skipped
553      conditional blocks.  */
554   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
555     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
556 
557   /* Identifiers and numbers are null-terminated.  */
558   *plen = obstack_object_size (stack);
559   obstack_1grow (stack, '\0');
560   return obstack_finish (stack);
561 }
562 
563 /* Parse a number, beginning with character C, skipping embedded
564    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
565    before C.  Place the result in NUMBER.  */
566 static void
567 parse_number (pfile, number, leading_period)
568      cpp_reader *pfile;
569      cpp_string *number;
570      int leading_period;
571 {
572   const uchar *cur;
573 
574   /* Fast-path loop.  Skim over a normal number.
575      N.B. ISIDNUM does not include $.  */
576   cur = pfile->buffer->cur;
577   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
578     cur++;
579 
580   /* Check for slow-path cases.  */
581   if (*cur == '?' || *cur == '\\' || *cur == '$')
582     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
583   else
584     {
585       const uchar *base = pfile->buffer->cur - 1;
586       uchar *dest;
587 
588       number->len = cur - base + leading_period;
589       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
590       dest[number->len] = '\0';
591       number->text = dest;
592 
593       if (leading_period)
594 	*dest++ = '.';
595       memcpy (dest, base, cur - base);
596       pfile->buffer->cur = cur;
597     }
598 }
599 
600 /* Subroutine of parse_string.  */
601 static int
602 unescaped_terminator_p (pfile, dest)
603      cpp_reader *pfile;
604      const unsigned char *dest;
605 {
606   const unsigned char *start, *temp;
607 
608   /* In #include-style directives, terminators are not escapeable.  */
609   if (pfile->state.angled_headers)
610     return 1;
611 
612   start = BUFF_FRONT (pfile->u_buff);
613 
614   /* An odd number of consecutive backslashes represents an escaped
615      terminator.  */
616   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
617     ;
618 
619   return ((dest - temp) & 1) == 0;
620 }
621 
622 /* Parses a string, character constant, or angle-bracketed header file
623    name.  Handles embedded trigraphs and escaped newlines.  The stored
624    string is guaranteed NUL-terminated, but it is not guaranteed that
625    this is the first NUL since embedded NULs are preserved.
626 
627    When this function returns, buffer->cur points to the next
628    character to be processed.  */
629 static void
630 parse_string (pfile, token, terminator)
631      cpp_reader *pfile;
632      cpp_token *token;
633      cppchar_t terminator;
634 {
635   cpp_buffer *buffer = pfile->buffer;
636   unsigned char *dest, *limit;
637   cppchar_t c;
638   bool warned_nulls = false;
639 #ifdef MULTIBYTE_CHARS
640   wchar_t wc;
641   int char_len;
642 #endif
643 
644   dest = BUFF_FRONT (pfile->u_buff);
645   limit = BUFF_LIMIT (pfile->u_buff);
646 
647 #ifdef MULTIBYTE_CHARS
648   /* Reset multibyte conversion state.  */
649   (void) local_mbtowc (NULL, NULL, 0);
650 #endif
651   for (;;)
652     {
653       /* We need room for another char, possibly the terminating NUL.  */
654       if ((size_t) (limit - dest) < 1)
655 	{
656 	  size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
657 	  _cpp_extend_buff (pfile, &pfile->u_buff, 2);
658 	  dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
659 	  limit = BUFF_LIMIT (pfile->u_buff);
660 	}
661 
662 #ifdef MULTIBYTE_CHARS
663       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
664 			       buffer->rlimit - buffer->cur);
665       if (char_len == -1)
666 	{
667 	  cpp_error (pfile, DL_WARNING,
668 		     "ignoring invalid multibyte character");
669 	  char_len = 1;
670 	  c = *buffer->cur++;
671 	}
672       else
673 	{
674 	  buffer->cur += char_len;
675 	  c = wc;
676 	}
677 #else
678       c = *buffer->cur++;
679 #endif
680 
681       /* Handle trigraphs, escaped newlines etc.  */
682       if (c == '?' || c == '\\')
683 	c = skip_escaped_newlines (pfile);
684 
685       if (c == terminator)
686 	{
687 	  if (unescaped_terminator_p (pfile, dest))
688 	    break;
689 	}
690       else if (is_vspace (c))
691 	{
692 	  /* No string literal may extend over multiple lines.  In
693 	     assembly language, suppress the error except for <>
694 	     includes.  This is a kludge around not knowing where
695 	     comments are.  */
696 	unterminated:
697 	  if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
698 	    cpp_error (pfile, DL_ERROR, "missing terminating %c character",
699 		       (int) terminator);
700 	  buffer->cur--;
701 	  break;
702 	}
703       else if (c == '\0')
704 	{
705 	  if (buffer->cur - 1 == buffer->rlimit)
706 	    goto unterminated;
707 	  if (!warned_nulls)
708 	    {
709 	      warned_nulls = true;
710 	      cpp_error (pfile, DL_WARNING,
711 			 "null character(s) preserved in literal");
712 	    }
713 	}
714 #ifdef MULTIBYTE_CHARS
715       if (char_len > 1)
716 	{
717 	  for ( ; char_len > 0; --char_len)
718 	    *dest++ = (*buffer->cur - char_len);
719 	}
720       else
721 #endif
722 	*dest++ = c;
723     }
724 
725   *dest = '\0';
726 
727   token->val.str.text = BUFF_FRONT (pfile->u_buff);
728   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
729   BUFF_FRONT (pfile->u_buff) = dest + 1;
730 }
731 
732 /* The stored comment includes the comment start and any terminator.  */
733 static void
734 save_comment (pfile, token, from, type)
735      cpp_reader *pfile;
736      cpp_token *token;
737      const unsigned char *from;
738      cppchar_t type;
739 {
740   unsigned char *buffer;
741   unsigned int len, clen;
742 
743   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
744 
745   /* C++ comments probably (not definitely) have moved past a new
746      line, which we don't want to save in the comment.  */
747   if (is_vspace (pfile->buffer->cur[-1]))
748     len--;
749 
750   /* If we are currently in a directive, then we need to store all
751      C++ comments as C comments internally, and so we need to
752      allocate a little extra space in that case.
753 
754      Note that the only time we encounter a directive here is
755      when we are saving comments in a "#define".  */
756   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
757 
758   buffer = _cpp_unaligned_alloc (pfile, clen);
759 
760   token->type = CPP_COMMENT;
761   token->val.str.len = clen;
762   token->val.str.text = buffer;
763 
764   buffer[0] = '/';
765   memcpy (buffer + 1, from, len - 1);
766 
767   /* Finish conversion to a C comment, if necessary.  */
768   if (pfile->state.in_directive && type == '/')
769     {
770       buffer[1] = '*';
771       buffer[clen - 2] = '*';
772       buffer[clen - 1] = '/';
773     }
774 }
775 
776 /* Allocate COUNT tokens for RUN.  */
777 void
778 _cpp_init_tokenrun (run, count)
779      tokenrun *run;
780      unsigned int count;
781 {
782   run->base = xnewvec (cpp_token, count);
783   run->limit = run->base + count;
784   run->next = NULL;
785 }
786 
787 /* Returns the next tokenrun, or creates one if there is none.  */
788 static tokenrun *
789 next_tokenrun (run)
790      tokenrun *run;
791 {
792   if (run->next == NULL)
793     {
794       run->next = xnew (tokenrun);
795       run->next->prev = run;
796       _cpp_init_tokenrun (run->next, 250);
797     }
798 
799   return run->next;
800 }
801 
802 /* Allocate a single token that is invalidated at the same time as the
803    rest of the tokens on the line.  Has its line and col set to the
804    same as the last lexed token, so that diagnostics appear in the
805    right place.  */
806 cpp_token *
807 _cpp_temp_token (pfile)
808      cpp_reader *pfile;
809 {
810   cpp_token *old, *result;
811 
812   old = pfile->cur_token - 1;
813   if (pfile->cur_token == pfile->cur_run->limit)
814     {
815       pfile->cur_run = next_tokenrun (pfile->cur_run);
816       pfile->cur_token = pfile->cur_run->base;
817     }
818 
819   result = pfile->cur_token++;
820   result->line = old->line;
821   result->col = old->col;
822   return result;
823 }
824 
825 /* Lex a token into RESULT (external interface).  Takes care of issues
826    like directive handling, token lookahead, multiple include
827    optimization and skipping.  */
828 const cpp_token *
829 _cpp_lex_token (pfile)
830      cpp_reader *pfile;
831 {
832   cpp_token *result;
833 
834   for (;;)
835     {
836       if (pfile->cur_token == pfile->cur_run->limit)
837 	{
838 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
839 	  pfile->cur_token = pfile->cur_run->base;
840 	}
841 
842       if (pfile->lookaheads)
843 	{
844 	  pfile->lookaheads--;
845 	  result = pfile->cur_token++;
846 	}
847       else
848 	result = _cpp_lex_direct (pfile);
849 
850       if (result->flags & BOL)
851 	{
852 	  /* Is this a directive.  If _cpp_handle_directive returns
853 	     false, it is an assembler #.  */
854 	  if (result->type == CPP_HASH
855 	      /* 6.10.3 p 11: Directives in a list of macro arguments
856 		 gives undefined behavior.  This implementation
857 		 handles the directive as normal.  */
858 	      && pfile->state.parsing_args != 1
859 	      && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
860 	    continue;
861 	  if (pfile->cb.line_change && !pfile->state.skipping)
862 	    (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
863 	}
864 
865       /* We don't skip tokens in directives.  */
866       if (pfile->state.in_directive)
867 	break;
868 
869       /* Outside a directive, invalidate controlling macros.  At file
870 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
871 	 get here and MI optimisation works.  */
872       pfile->mi_valid = false;
873 
874       if (!pfile->state.skipping || result->type == CPP_EOF)
875 	break;
876     }
877 
878   return result;
879 }
880 
881 /* A NUL terminates the current buffer.  For ISO preprocessing this is
882    EOF, but for traditional preprocessing it indicates we need a line
883    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
884    to return a CPP_EOF to the caller.  */
885 static bool
886 continue_after_nul (pfile)
887      cpp_reader *pfile;
888 {
889   cpp_buffer *buffer = pfile->buffer;
890   bool more = false;
891 
892   buffer->saved_flags = BOL;
893   if (CPP_OPTION (pfile, traditional))
894     {
895       if (pfile->state.in_directive)
896 	return false;
897 
898       _cpp_remove_overlay (pfile);
899       more = _cpp_read_logical_line_trad (pfile);
900       _cpp_overlay_buffer (pfile, pfile->out.base,
901 			   pfile->out.cur - pfile->out.base);
902       pfile->line = pfile->out.first_line;
903     }
904   else
905     {
906       /* Stop parsing arguments with a CPP_EOF.  When we finally come
907 	 back here, do the work of popping the buffer.  */
908       if (!pfile->state.parsing_args)
909 	{
910 	  if (buffer->cur != buffer->line_base)
911 	    {
912 	      /* Non-empty files should end in a newline.  Don't warn
913 		 for command line and _Pragma buffers.  */
914 	      if (!buffer->from_stage3)
915 		cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
916 	      handle_newline (pfile);
917 	    }
918 
919 	  /* Similarly, finish an in-progress directive with CPP_EOF
920 	     before popping the buffer.  */
921 	  if (!pfile->state.in_directive && buffer->prev)
922 	    {
923 	      more = !buffer->return_at_eof;
924 	      _cpp_pop_buffer (pfile);
925 	    }
926 	}
927     }
928 
929   return more;
930 }
931 
932 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)	\
933   do {						\
934     if (get_effective_char (pfile) == CHAR)	\
935       result->type = THEN_TYPE;			\
936     else					\
937       {						\
938         BACKUP ();				\
939         result->type = ELSE_TYPE;		\
940       }						\
941   } while (0)
942 
943 /* Lex a token into pfile->cur_token, which is also incremented, to
944    get diagnostics pointing to the correct location.
945 
946    Does not handle issues such as token lookahead, multiple-include
947    optimisation, directives, skipping etc.  This function is only
948    suitable for use by _cpp_lex_token, and in special cases like
949    lex_expansion_token which doesn't care for any of these issues.
950 
951    When meeting a newline, returns CPP_EOF if parsing a directive,
952    otherwise returns to the start of the token buffer if permissible.
953    Returns the location of the lexed token.  */
954 cpp_token *
955 _cpp_lex_direct (pfile)
956      cpp_reader *pfile;
957 {
958   cppchar_t c;
959   cpp_buffer *buffer;
960   const unsigned char *comment_start;
961   cpp_token *result = pfile->cur_token++;
962 
963  fresh_line:
964   buffer = pfile->buffer;
965   result->flags = buffer->saved_flags;
966   buffer->saved_flags = 0;
967  update_tokens_line:
968   result->line = pfile->line;
969 
970  skipped_white:
971   c = *buffer->cur++;
972   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
973 
974  trigraph:
975   switch (c)
976     {
977     case ' ': case '\t': case '\f': case '\v': case '\0':
978       result->flags |= PREV_WHITE;
979       if (skip_whitespace (pfile, c))
980 	goto skipped_white;
981 
982       /* End of buffer.  */
983       buffer->cur--;
984       if (continue_after_nul (pfile))
985 	goto fresh_line;
986       result->type = CPP_EOF;
987       break;
988 
989     case '\n': case '\r':
990       handle_newline (pfile);
991       buffer->saved_flags = BOL;
992       if (! pfile->state.in_directive)
993 	{
994 	  if (pfile->state.parsing_args == 2)
995 	    buffer->saved_flags |= PREV_WHITE;
996 	  if (!pfile->keep_tokens)
997 	    {
998 	      pfile->cur_run = &pfile->base_run;
999 	      result = pfile->base_run.base;
1000 	      pfile->cur_token = result + 1;
1001 	    }
1002 	  goto fresh_line;
1003 	}
1004       result->type = CPP_EOF;
1005       break;
1006 
1007     case '?':
1008     case '\\':
1009       /* These could start an escaped newline, or '?' a trigraph.  Let
1010 	 skip_escaped_newlines do all the work.  */
1011       {
1012 	unsigned int line = pfile->line;
1013 
1014 	c = skip_escaped_newlines (pfile);
1015 	if (line != pfile->line)
1016 	  {
1017 	    buffer->cur--;
1018 	    /* We had at least one escaped newline of some sort.
1019 	       Update the token's line and column.  */
1020 	    goto update_tokens_line;
1021 	  }
1022       }
1023 
1024       /* We are either the original '?' or '\\', or a trigraph.  */
1025       if (c == '?')
1026 	result->type = CPP_QUERY;
1027       else if (c == '\\')
1028 	goto random_char;
1029       else
1030 	goto trigraph;
1031       break;
1032 
1033     case '0': case '1': case '2': case '3': case '4':
1034     case '5': case '6': case '7': case '8': case '9':
1035       result->type = CPP_NUMBER;
1036       parse_number (pfile, &result->val.str, 0);
1037       break;
1038 
1039     case 'L':
1040       /* 'L' may introduce wide characters or strings.  */
1041       {
1042 	const unsigned char *pos = buffer->cur;
1043 
1044 	c = get_effective_char (pfile);
1045 	if (c == '\'' || c == '"')
1046 	  {
1047 	    result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1048 	    parse_string (pfile, result, c);
1049 	    break;
1050 	  }
1051 	buffer->cur = pos;
1052       }
1053       /* Fall through.  */
1054 
1055     start_ident:
1056     case '_':
1057     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1058     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1059     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1060     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1061     case 'y': case 'z':
1062     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1063     case 'G': case 'H': case 'I': case 'J': case 'K':
1064     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1065     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1066     case 'Y': case 'Z':
1067       result->type = CPP_NAME;
1068       result->val.node = parse_identifier (pfile);
1069 
1070       /* Convert named operators to their proper types.  */
1071       if (result->val.node->flags & NODE_OPERATOR)
1072 	{
1073 	  result->flags |= NAMED_OP;
1074 	  result->type = result->val.node->value.operator;
1075 	}
1076       break;
1077 
1078     case '\'':
1079     case '"':
1080       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1081       parse_string (pfile, result, c);
1082       break;
1083 
1084     case '/':
1085       /* A potential block or line comment.  */
1086       comment_start = buffer->cur;
1087       c = get_effective_char (pfile);
1088 
1089       if (c == '*')
1090 	{
1091 	  if (skip_block_comment (pfile))
1092 	    cpp_error (pfile, DL_ERROR, "unterminated comment");
1093 	}
1094       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1095 			    || CPP_IN_SYSTEM_HEADER (pfile)))
1096 	{
1097 	  /* Warn about comments only if pedantically GNUC89, and not
1098 	     in system headers.  */
1099 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1100 	      && ! buffer->warned_cplusplus_comments)
1101 	    {
1102 	      cpp_error (pfile, DL_PEDWARN,
1103 			 "C++ style comments are not allowed in ISO C90");
1104 	      cpp_error (pfile, DL_PEDWARN,
1105 			 "(this will be reported only once per input file)");
1106 	      buffer->warned_cplusplus_comments = 1;
1107 	    }
1108 
1109 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1110 	    cpp_error (pfile, DL_WARNING, "multi-line comment");
1111 	}
1112       else if (c == '=')
1113 	{
1114 	  result->type = CPP_DIV_EQ;
1115 	  break;
1116 	}
1117       else
1118 	{
1119 	  BACKUP ();
1120 	  result->type = CPP_DIV;
1121 	  break;
1122 	}
1123 
1124       if (!pfile->state.save_comments)
1125 	{
1126 	  result->flags |= PREV_WHITE;
1127 	  goto update_tokens_line;
1128 	}
1129 
1130       /* Save the comment as a token in its own right.  */
1131       save_comment (pfile, result, comment_start, c);
1132       break;
1133 
1134     case '<':
1135       if (pfile->state.angled_headers)
1136 	{
1137 	  result->type = CPP_HEADER_NAME;
1138 	  parse_string (pfile, result, '>');
1139 	  break;
1140 	}
1141 
1142       c = get_effective_char (pfile);
1143       if (c == '=')
1144 	result->type = CPP_LESS_EQ;
1145       else if (c == '<')
1146 	IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1147       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1148 	IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1149       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1150 	{
1151 	  result->type = CPP_OPEN_SQUARE;
1152 	  result->flags |= DIGRAPH;
1153 	}
1154       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1155 	{
1156 	  result->type = CPP_OPEN_BRACE;
1157 	  result->flags |= DIGRAPH;
1158 	}
1159       else
1160 	{
1161 	  BACKUP ();
1162 	  result->type = CPP_LESS;
1163 	}
1164       break;
1165 
1166     case '>':
1167       c = get_effective_char (pfile);
1168       if (c == '=')
1169 	result->type = CPP_GREATER_EQ;
1170       else if (c == '>')
1171 	IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1172       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1173 	IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1174       else
1175 	{
1176 	  BACKUP ();
1177 	  result->type = CPP_GREATER;
1178 	}
1179       break;
1180 
1181     case '%':
1182       c = get_effective_char (pfile);
1183       if (c == '=')
1184 	result->type = CPP_MOD_EQ;
1185       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1186 	{
1187 	  result->flags |= DIGRAPH;
1188 	  result->type = CPP_HASH;
1189 	  if (get_effective_char (pfile) == '%')
1190 	    {
1191 	      const unsigned char *pos = buffer->cur;
1192 
1193 	      if (get_effective_char (pfile) == ':')
1194 		result->type = CPP_PASTE;
1195 	      else
1196 		buffer->cur = pos - 1;
1197 	    }
1198 	  else
1199 	    BACKUP ();
1200 	}
1201       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1202 	{
1203 	  result->flags |= DIGRAPH;
1204 	  result->type = CPP_CLOSE_BRACE;
1205 	}
1206       else
1207 	{
1208 	  BACKUP ();
1209 	  result->type = CPP_MOD;
1210 	}
1211       break;
1212 
1213     case '.':
1214       result->type = CPP_DOT;
1215       c = get_effective_char (pfile);
1216       if (c == '.')
1217 	{
1218 	  const unsigned char *pos = buffer->cur;
1219 
1220 	  if (get_effective_char (pfile) == '.')
1221 	    result->type = CPP_ELLIPSIS;
1222 	  else
1223 	    buffer->cur = pos - 1;
1224 	}
1225       /* All known character sets have 0...9 contiguous.  */
1226       else if (ISDIGIT (c))
1227 	{
1228 	  result->type = CPP_NUMBER;
1229 	  parse_number (pfile, &result->val.str, 1);
1230 	}
1231       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1232 	result->type = CPP_DOT_STAR;
1233       else
1234 	BACKUP ();
1235       break;
1236 
1237     case '+':
1238       c = get_effective_char (pfile);
1239       if (c == '+')
1240 	result->type = CPP_PLUS_PLUS;
1241       else if (c == '=')
1242 	result->type = CPP_PLUS_EQ;
1243       else
1244 	{
1245 	  BACKUP ();
1246 	  result->type = CPP_PLUS;
1247 	}
1248       break;
1249 
1250     case '-':
1251       c = get_effective_char (pfile);
1252       if (c == '>')
1253 	{
1254 	  result->type = CPP_DEREF;
1255 	  if (CPP_OPTION (pfile, cplusplus))
1256 	    {
1257 	      if (get_effective_char (pfile) == '*')
1258 		result->type = CPP_DEREF_STAR;
1259 	      else
1260 		BACKUP ();
1261 	    }
1262 	}
1263       else if (c == '-')
1264 	result->type = CPP_MINUS_MINUS;
1265       else if (c == '=')
1266 	result->type = CPP_MINUS_EQ;
1267       else
1268 	{
1269 	  BACKUP ();
1270 	  result->type = CPP_MINUS;
1271 	}
1272       break;
1273 
1274     case '&':
1275       c = get_effective_char (pfile);
1276       if (c == '&')
1277 	result->type = CPP_AND_AND;
1278       else if (c == '=')
1279 	result->type = CPP_AND_EQ;
1280       else
1281 	{
1282 	  BACKUP ();
1283 	  result->type = CPP_AND;
1284 	}
1285       break;
1286 
1287     case '|':
1288       c = get_effective_char (pfile);
1289       if (c == '|')
1290 	result->type = CPP_OR_OR;
1291       else if (c == '=')
1292 	result->type = CPP_OR_EQ;
1293       else
1294 	{
1295 	  BACKUP ();
1296 	  result->type = CPP_OR;
1297 	}
1298       break;
1299 
1300     case ':':
1301       c = get_effective_char (pfile);
1302       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1303 	result->type = CPP_SCOPE;
1304       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1305 	{
1306 	  result->flags |= DIGRAPH;
1307 	  result->type = CPP_CLOSE_SQUARE;
1308 	}
1309       else
1310 	{
1311 	  BACKUP ();
1312 	  result->type = CPP_COLON;
1313 	}
1314       break;
1315 
1316     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1317     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1318     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1319     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1320     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1321 
1322     case '~': result->type = CPP_COMPL; break;
1323     case ',': result->type = CPP_COMMA; break;
1324     case '(': result->type = CPP_OPEN_PAREN; break;
1325     case ')': result->type = CPP_CLOSE_PAREN; break;
1326     case '[': result->type = CPP_OPEN_SQUARE; break;
1327     case ']': result->type = CPP_CLOSE_SQUARE; break;
1328     case '{': result->type = CPP_OPEN_BRACE; break;
1329     case '}': result->type = CPP_CLOSE_BRACE; break;
1330     case ';': result->type = CPP_SEMICOLON; break;
1331 
1332       /* @ is a punctuator in Objective-C.  */
1333     case '@': result->type = CPP_ATSIGN; break;
1334 
1335     case '$':
1336       if (CPP_OPTION (pfile, dollars_in_ident))
1337 	goto start_ident;
1338       /* Fall through...  */
1339 
1340     random_char:
1341     default:
1342       result->type = CPP_OTHER;
1343       result->val.c = c;
1344       break;
1345     }
1346 
1347   return result;
1348 }
1349 
1350 /* An upper bound on the number of bytes needed to spell TOKEN,
1351    including preceding whitespace.  */
1352 unsigned int
1353 cpp_token_len (token)
1354      const cpp_token *token;
1355 {
1356   unsigned int len;
1357 
1358   switch (TOKEN_SPELL (token))
1359     {
1360     default:		len = 0;				break;
1361     case SPELL_NUMBER:
1362     case SPELL_STRING:	len = token->val.str.len;		break;
1363     case SPELL_IDENT:	len = NODE_LEN (token->val.node);	break;
1364     }
1365   /* 1 for whitespace, 4 for comment delimiters.  */
1366   return len + 5;
1367 }
1368 
1369 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1370    already contain the enough space to hold the token's spelling.
1371    Returns a pointer to the character after the last character
1372    written.  */
1373 unsigned char *
1374 cpp_spell_token (pfile, token, buffer)
1375      cpp_reader *pfile;		/* Would be nice to be rid of this...  */
1376      const cpp_token *token;
1377      unsigned char *buffer;
1378 {
1379   switch (TOKEN_SPELL (token))
1380     {
1381     case SPELL_OPERATOR:
1382       {
1383 	const unsigned char *spelling;
1384 	unsigned char c;
1385 
1386 	if (token->flags & DIGRAPH)
1387 	  spelling
1388 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1389 	else if (token->flags & NAMED_OP)
1390 	  goto spell_ident;
1391 	else
1392 	  spelling = TOKEN_NAME (token);
1393 
1394 	while ((c = *spelling++) != '\0')
1395 	  *buffer++ = c;
1396       }
1397       break;
1398 
1399     case SPELL_CHAR:
1400       *buffer++ = token->val.c;
1401       break;
1402 
1403     spell_ident:
1404     case SPELL_IDENT:
1405       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1406       buffer += NODE_LEN (token->val.node);
1407       break;
1408 
1409     case SPELL_NUMBER:
1410       memcpy (buffer, token->val.str.text, token->val.str.len);
1411       buffer += token->val.str.len;
1412       break;
1413 
1414     case SPELL_STRING:
1415       {
1416 	int left, right, tag;
1417 	switch (token->type)
1418 	  {
1419 	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
1420 	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
1421 	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
1422     	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
1423 	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
1424 	  default:
1425 	    cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1426 		       TOKEN_NAME (token));
1427 	    return buffer;
1428 	  }
1429 	if (tag) *buffer++ = tag;
1430 	*buffer++ = left;
1431 	memcpy (buffer, token->val.str.text, token->val.str.len);
1432 	buffer += token->val.str.len;
1433 	*buffer++ = right;
1434       }
1435       break;
1436 
1437     case SPELL_NONE:
1438       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1439       break;
1440     }
1441 
1442   return buffer;
1443 }
1444 
1445 /* Returns TOKEN spelt as a null-terminated string.  The string is
1446    freed when the reader is destroyed.  Useful for diagnostics.  */
1447 unsigned char *
1448 cpp_token_as_text (pfile, token)
1449      cpp_reader *pfile;
1450      const cpp_token *token;
1451 {
1452   unsigned int len = cpp_token_len (token);
1453   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1454 
1455   end = cpp_spell_token (pfile, token, start);
1456   end[0] = '\0';
1457 
1458   return start;
1459 }
1460 
1461 /* Used by C front ends, which really should move to using
1462    cpp_token_as_text.  */
1463 const char *
1464 cpp_type2name (type)
1465      enum cpp_ttype type;
1466 {
1467   return (const char *) token_spellings[type].name;
1468 }
1469 
1470 /* Writes the spelling of token to FP, without any preceding space.
1471    Separated from cpp_spell_token for efficiency - to avoid stdio
1472    double-buffering.  */
1473 void
1474 cpp_output_token (token, fp)
1475      const cpp_token *token;
1476      FILE *fp;
1477 {
1478   switch (TOKEN_SPELL (token))
1479     {
1480     case SPELL_OPERATOR:
1481       {
1482 	const unsigned char *spelling;
1483 	int c;
1484 
1485 	if (token->flags & DIGRAPH)
1486 	  spelling
1487 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1488 	else if (token->flags & NAMED_OP)
1489 	  goto spell_ident;
1490 	else
1491 	  spelling = TOKEN_NAME (token);
1492 
1493 	c = *spelling;
1494 	do
1495 	  putc (c, fp);
1496 	while ((c = *++spelling) != '\0');
1497       }
1498       break;
1499 
1500     case SPELL_CHAR:
1501       putc (token->val.c, fp);
1502       break;
1503 
1504     spell_ident:
1505     case SPELL_IDENT:
1506       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1507     break;
1508 
1509     case SPELL_NUMBER:
1510       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1511       break;
1512 
1513     case SPELL_STRING:
1514       {
1515 	int left, right, tag;
1516 	switch (token->type)
1517 	  {
1518 	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
1519 	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
1520 	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
1521     	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
1522 	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
1523 	  default:
1524 	    fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1525 	    return;
1526 	  }
1527 	if (tag) putc (tag, fp);
1528 	putc (left, fp);
1529 	fwrite (token->val.str.text, 1, token->val.str.len, fp);
1530 	putc (right, fp);
1531       }
1532       break;
1533 
1534     case SPELL_NONE:
1535       /* An error, most probably.  */
1536       break;
1537     }
1538 }
1539 
1540 /* Compare two tokens.  */
1541 int
1542 _cpp_equiv_tokens (a, b)
1543      const cpp_token *a, *b;
1544 {
1545   if (a->type == b->type && a->flags == b->flags)
1546     switch (TOKEN_SPELL (a))
1547       {
1548       default:			/* Keep compiler happy.  */
1549       case SPELL_OPERATOR:
1550 	return 1;
1551       case SPELL_CHAR:
1552 	return a->val.c == b->val.c; /* Character.  */
1553       case SPELL_NONE:
1554 	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1555       case SPELL_IDENT:
1556 	return a->val.node == b->val.node;
1557       case SPELL_NUMBER:
1558       case SPELL_STRING:
1559 	return (a->val.str.len == b->val.str.len
1560 		&& !memcmp (a->val.str.text, b->val.str.text,
1561 			    a->val.str.len));
1562       }
1563 
1564   return 0;
1565 }
1566 
1567 /* Returns nonzero if a space should be inserted to avoid an
1568    accidental token paste for output.  For simplicity, it is
1569    conservative, and occasionally advises a space where one is not
1570    needed, e.g. "." and ".2".  */
1571 int
1572 cpp_avoid_paste (pfile, token1, token2)
1573      cpp_reader *pfile;
1574      const cpp_token *token1, *token2;
1575 {
1576   enum cpp_ttype a = token1->type, b = token2->type;
1577   cppchar_t c;
1578 
1579   if (token1->flags & NAMED_OP)
1580     a = CPP_NAME;
1581   if (token2->flags & NAMED_OP)
1582     b = CPP_NAME;
1583 
1584   c = EOF;
1585   if (token2->flags & DIGRAPH)
1586     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1587   else if (token_spellings[b].category == SPELL_OPERATOR)
1588     c = token_spellings[b].name[0];
1589 
1590   /* Quickly get everything that can paste with an '='.  */
1591   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1592     return 1;
1593 
1594   switch (a)
1595     {
1596     case CPP_GREATER:	return c == '>' || c == '?';
1597     case CPP_LESS:	return c == '<' || c == '?' || c == '%' || c == ':';
1598     case CPP_PLUS:	return c == '+';
1599     case CPP_MINUS:	return c == '-' || c == '>';
1600     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1601     case CPP_MOD:	return c == ':' || c == '>';
1602     case CPP_AND:	return c == '&';
1603     case CPP_OR:	return c == '|';
1604     case CPP_COLON:	return c == ':' || c == '>';
1605     case CPP_DEREF:	return c == '*';
1606     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1607     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1608     case CPP_NAME:	return ((b == CPP_NUMBER
1609 				 && name_p (pfile, &token2->val.str))
1610 				|| b == CPP_NAME
1611 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1612     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1613 				|| c == '.' || c == '+' || c == '-');
1614     case CPP_OTHER:	return (CPP_OPTION (pfile, objc)
1615 				&& token1->val.c == '@'
1616 				&& (b == CPP_NAME || b == CPP_STRING));
1617     default:		break;
1618     }
1619 
1620   return 0;
1621 }
1622 
1623 /* Output all the remaining tokens on the current line, and a newline
1624    character, to FP.  Leading whitespace is removed.  If there are
1625    macros, special token padding is not performed.  */
1626 void
1627 cpp_output_line (pfile, fp)
1628      cpp_reader *pfile;
1629      FILE *fp;
1630 {
1631   const cpp_token *token;
1632 
1633   token = cpp_get_token (pfile);
1634   while (token->type != CPP_EOF)
1635     {
1636       cpp_output_token (token, fp);
1637       token = cpp_get_token (pfile);
1638       if (token->flags & PREV_WHITE)
1639 	putc (' ', fp);
1640     }
1641 
1642   putc ('\n', fp);
1643 }
1644 
1645 /* Returns the value of a hexadecimal digit.  */
1646 static unsigned int
1647 hex_digit_value (c)
1648      unsigned int c;
1649 {
1650   if (hex_p (c))
1651     return hex_value (c);
1652   else
1653     abort ();
1654 }
1655 
1656 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1657    failure if cpplib is not parsing C++ or C99.  Such failure is
1658    silent, and no variables are updated.  Otherwise returns 0, and
1659    warns if -Wtraditional.
1660 
1661    [lex.charset]: The character designated by the universal character
1662    name \UNNNNNNNN is that character whose character short name in
1663    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1664    universal character name \uNNNN is that character whose character
1665    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1666    for a universal character name is less than 0x20 or in the range
1667    0x7F-0x9F (inclusive), or if the universal character name
1668    designates a character in the basic source character set, then the
1669    program is ill-formed.
1670 
1671    We assume that wchar_t is Unicode, so we don't need to do any
1672    mapping.  Is this ever wrong?
1673 
1674    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1675    LIMIT is the end of the string or charconst.  PSTR is updated to
1676    point after the UCS on return, and the UCS is written into PC.  */
1677 
1678 static int
1679 maybe_read_ucs (pfile, pstr, limit, pc)
1680      cpp_reader *pfile;
1681      const unsigned char **pstr;
1682      const unsigned char *limit;
1683      cppchar_t *pc;
1684 {
1685   const unsigned char *p = *pstr;
1686   unsigned int code = 0;
1687   unsigned int c = *pc, length;
1688 
1689   /* Only attempt to interpret a UCS for C++ and C99.  */
1690   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1691     return 1;
1692 
1693   if (CPP_WTRADITIONAL (pfile))
1694     cpp_error (pfile, DL_WARNING,
1695 	       "the meaning of '\\%c' is different in traditional C", c);
1696 
1697   length = (c == 'u' ? 4: 8);
1698 
1699   if ((size_t) (limit - p) < length)
1700     {
1701       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1702       /* Skip to the end to avoid more diagnostics.  */
1703       p = limit;
1704     }
1705   else
1706     {
1707       for (; length; length--, p++)
1708 	{
1709 	  c = *p;
1710 	  if (ISXDIGIT (c))
1711 	    code = (code << 4) + hex_digit_value (c);
1712 	  else
1713 	    {
1714 	      cpp_error (pfile, DL_ERROR,
1715 			 "non-hex digit '%c' in universal-character-name", c);
1716 	      /* We shouldn't skip in case there are multibyte chars.  */
1717 	      break;
1718 	    }
1719 	}
1720     }
1721 
1722 #ifdef TARGET_EBCDIC
1723   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1724   code = 0x3f;  /* EBCDIC invalid character */
1725 #else
1726  /* True extended characters are OK.  */
1727   if (code >= 0xa0
1728       && !(code & 0x80000000)
1729       && !(code >= 0xD800 && code <= 0xDFFF))
1730     ;
1731   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1732      hex escapes so that this also works with EBCDIC hosts.  */
1733   else if (code == 0x24 || code == 0x40 || code == 0x60)
1734     ;
1735   /* Don't give another error if one occurred above.  */
1736   else if (length == 0)
1737     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1738 #endif
1739 
1740   *pstr = p;
1741   *pc = code;
1742   return 0;
1743 }
1744 
1745 /* Returns the value of an escape sequence, truncated to the correct
1746    target precision.  PSTR points to the input pointer, which is just
1747    after the backslash.  LIMIT is how much text we have.  WIDE is true
1748    if the escape sequence is part of a wide character constant or
1749    string literal.  Handles all relevant diagnostics.  */
1750 cppchar_t
1751 cpp_parse_escape (pfile, pstr, limit, wide)
1752      cpp_reader *pfile;
1753      const unsigned char **pstr;
1754      const unsigned char *limit;
1755      int wide;
1756 {
1757   int unknown = 0;
1758   const unsigned char *str = *pstr;
1759   cppchar_t c, mask;
1760   unsigned int width;
1761 
1762   if (wide)
1763     width = CPP_OPTION (pfile, wchar_precision);
1764   else
1765     width = CPP_OPTION (pfile, char_precision);
1766   if (width < BITS_PER_CPPCHAR_T)
1767     mask = ((cppchar_t) 1 << width) - 1;
1768   else
1769     mask = ~0;
1770 
1771   c = *str++;
1772   switch (c)
1773     {
1774     case '\\': case '\'': case '"': case '?': break;
1775     case 'b': c = TARGET_BS;	  break;
1776     case 'f': c = TARGET_FF;	  break;
1777     case 'n': c = TARGET_NEWLINE; break;
1778     case 'r': c = TARGET_CR;	  break;
1779     case 't': c = TARGET_TAB;	  break;
1780     case 'v': c = TARGET_VT;	  break;
1781 
1782     case '(': case '{': case '[': case '%':
1783       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1784 	 '\%' is used to prevent SCCS from getting confused.  */
1785       unknown = CPP_PEDANTIC (pfile);
1786       break;
1787 
1788     case 'a':
1789       if (CPP_WTRADITIONAL (pfile))
1790 	cpp_error (pfile, DL_WARNING,
1791 		   "the meaning of '\\a' is different in traditional C");
1792       c = TARGET_BELL;
1793       break;
1794 
1795     case 'e': case 'E':
1796       if (CPP_PEDANTIC (pfile))
1797 	cpp_error (pfile, DL_PEDWARN,
1798 		   "non-ISO-standard escape sequence, '\\%c'", (int) c);
1799       c = TARGET_ESC;
1800       break;
1801 
1802     case 'u': case 'U':
1803       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1804       break;
1805 
1806     case 'x':
1807       if (CPP_WTRADITIONAL (pfile))
1808 	cpp_error (pfile, DL_WARNING,
1809 		   "the meaning of '\\x' is different in traditional C");
1810 
1811       {
1812 	cppchar_t i = 0, overflow = 0;
1813 	int digits_found = 0;
1814 
1815 	while (str < limit)
1816 	  {
1817 	    c = *str;
1818 	    if (! ISXDIGIT (c))
1819 	      break;
1820 	    str++;
1821 	    overflow |= i ^ (i << 4 >> 4);
1822 	    i = (i << 4) + hex_digit_value (c);
1823 	    digits_found = 1;
1824 	  }
1825 
1826 	if (!digits_found)
1827 	  cpp_error (pfile, DL_ERROR,
1828 		       "\\x used with no following hex digits");
1829 
1830 	if (overflow | (i != (i & mask)))
1831 	  {
1832 	    cpp_error (pfile, DL_PEDWARN,
1833 		       "hex escape sequence out of range");
1834 	    i &= mask;
1835 	  }
1836 	c = i;
1837       }
1838       break;
1839 
1840     case '0':  case '1':  case '2':  case '3':
1841     case '4':  case '5':  case '6':  case '7':
1842       {
1843 	size_t count = 0;
1844 	cppchar_t i = c - '0';
1845 
1846 	while (str < limit && ++count < 3)
1847 	  {
1848 	    c = *str;
1849 	    if (c < '0' || c > '7')
1850 	      break;
1851 	    str++;
1852 	    i = (i << 3) + c - '0';
1853 	  }
1854 
1855 	if (i != (i & mask))
1856 	  {
1857 	    cpp_error (pfile, DL_PEDWARN,
1858 		       "octal escape sequence out of range");
1859 	    i &= mask;
1860 	  }
1861 	c = i;
1862       }
1863       break;
1864 
1865     default:
1866       unknown = 1;
1867       break;
1868     }
1869 
1870   if (unknown)
1871     {
1872       if (ISGRAPH (c))
1873 	cpp_error (pfile, DL_PEDWARN,
1874 		   "unknown escape sequence '\\%c'", (int) c);
1875       else
1876 	cpp_error (pfile, DL_PEDWARN,
1877 		   "unknown escape sequence: '\\%03o'", (int) c);
1878     }
1879 
1880   if (c > mask)
1881     {
1882       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1883       c &= mask;
1884     }
1885 
1886   *pstr = str;
1887   return c;
1888 }
1889 
1890 /* Interpret a (possibly wide) character constant in TOKEN.
1891    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1892    points to a variable that is filled in with the number of
1893    characters seen, and UNSIGNEDP to a variable that indicates whether
1894    the result has signed type.  */
1895 cppchar_t
1896 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1897      cpp_reader *pfile;
1898      const cpp_token *token;
1899      unsigned int *pchars_seen;
1900      int *unsignedp;
1901 {
1902   const unsigned char *str = token->val.str.text;
1903   const unsigned char *limit = str + token->val.str.len;
1904   unsigned int chars_seen = 0;
1905   size_t width, max_chars;
1906   cppchar_t c, mask, result = 0;
1907   bool unsigned_p;
1908 
1909 #ifdef MULTIBYTE_CHARS
1910   (void) local_mbtowc (NULL, NULL, 0);
1911 #endif
1912 
1913   /* Width in bits.  */
1914   if (token->type == CPP_CHAR)
1915     {
1916       width = CPP_OPTION (pfile, char_precision);
1917       max_chars = CPP_OPTION (pfile, int_precision) / width;
1918       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1919     }
1920   else
1921     {
1922       width = CPP_OPTION (pfile, wchar_precision);
1923       max_chars = 1;
1924       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1925     }
1926 
1927   if (width < BITS_PER_CPPCHAR_T)
1928     mask = ((cppchar_t) 1 << width) - 1;
1929   else
1930     mask = ~0;
1931 
1932   while (str < limit)
1933     {
1934 #ifdef MULTIBYTE_CHARS
1935       wchar_t wc;
1936       int char_len;
1937 
1938       char_len = local_mbtowc (&wc, str, limit - str);
1939       if (char_len == -1)
1940 	{
1941 	  cpp_error (pfile, DL_WARNING,
1942 		     "ignoring invalid multibyte character");
1943 	  c = *str++;
1944 	}
1945       else
1946 	{
1947 	  str += char_len;
1948 	  c = wc;
1949 	}
1950 #else
1951       c = *str++;
1952 #endif
1953 
1954       if (c == '\\')
1955 	c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1956 
1957 #ifdef MAP_CHARACTER
1958       if (ISPRINT (c))
1959 	c = MAP_CHARACTER (c);
1960 #endif
1961 
1962       chars_seen++;
1963 
1964       /* Truncate the character, scale the result and merge the two.  */
1965       c &= mask;
1966       if (width < BITS_PER_CPPCHAR_T)
1967 	result = (result << width) | c;
1968       else
1969 	result = c;
1970     }
1971 
1972   if (chars_seen == 0)
1973     cpp_error (pfile, DL_ERROR, "empty character constant");
1974   else if (chars_seen > 1)
1975     {
1976       /* Multichar charconsts are of type int and therefore signed.  */
1977       unsigned_p = 0;
1978 
1979       if (chars_seen > max_chars)
1980 	{
1981 	  chars_seen = max_chars;
1982 	  cpp_error (pfile, DL_WARNING,
1983 		     "character constant too long for its type");
1984 	}
1985       else if (CPP_OPTION (pfile, warn_multichar))
1986 	cpp_error (pfile, DL_WARNING, "multi-character character constant");
1987     }
1988 
1989   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1990      in WIDTH bits, but for multi-char charconsts it's value is the
1991      full target type's width.  */
1992   if (chars_seen > 1)
1993     width *= max_chars;
1994   if (width < BITS_PER_CPPCHAR_T)
1995     {
1996       mask = ((cppchar_t) 1 << width) - 1;
1997       if (unsigned_p || !(result & (1 << (width - 1))))
1998 	result &= mask;
1999       else
2000 	result |= ~mask;
2001     }
2002 
2003   *pchars_seen = chars_seen;
2004   *unsignedp = unsigned_p;
2005   return result;
2006 }
2007 
2008 /* Memory buffers.  Changing these three constants can have a dramatic
2009    effect on performance.  The values here are reasonable defaults,
2010    but might be tuned.  If you adjust them, be sure to test across a
2011    range of uses of cpplib, including heavy nested function-like macro
2012    expansion.  Also check the change in peak memory usage (NJAMD is a
2013    good tool for this).  */
2014 #define MIN_BUFF_SIZE 8000
2015 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2016 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2017 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2018 
2019 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2020   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2021 #endif
2022 
2023 /* Create a new allocation buffer.  Place the control block at the end
2024    of the buffer, so that buffer overflows will cause immediate chaos.  */
2025 static _cpp_buff *
2026 new_buff (len)
2027      size_t len;
2028 {
2029   _cpp_buff *result;
2030   unsigned char *base;
2031 
2032   if (len < MIN_BUFF_SIZE)
2033     len = MIN_BUFF_SIZE;
2034   len = CPP_ALIGN (len);
2035 
2036   base = xmalloc (len + sizeof (_cpp_buff));
2037   result = (_cpp_buff *) (base + len);
2038   result->base = base;
2039   result->cur = base;
2040   result->limit = base + len;
2041   result->next = NULL;
2042   return result;
2043 }
2044 
2045 /* Place a chain of unwanted allocation buffers on the free list.  */
2046 void
2047 _cpp_release_buff (pfile, buff)
2048      cpp_reader *pfile;
2049      _cpp_buff *buff;
2050 {
2051   _cpp_buff *end = buff;
2052 
2053   while (end->next)
2054     end = end->next;
2055   end->next = pfile->free_buffs;
2056   pfile->free_buffs = buff;
2057 }
2058 
2059 /* Return a free buffer of size at least MIN_SIZE.  */
2060 _cpp_buff *
2061 _cpp_get_buff (pfile, min_size)
2062      cpp_reader *pfile;
2063      size_t min_size;
2064 {
2065   _cpp_buff *result, **p;
2066 
2067   for (p = &pfile->free_buffs;; p = &(*p)->next)
2068     {
2069       size_t size;
2070 
2071       if (*p == NULL)
2072 	return new_buff (min_size);
2073       result = *p;
2074       size = result->limit - result->base;
2075       /* Return a buffer that's big enough, but don't waste one that's
2076          way too big.  */
2077       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2078 	break;
2079     }
2080 
2081   *p = result->next;
2082   result->next = NULL;
2083   result->cur = result->base;
2084   return result;
2085 }
2086 
2087 /* Creates a new buffer with enough space to hold the uncommitted
2088    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2089    the excess bytes to the new buffer.  Chains the new buffer after
2090    BUFF, and returns the new buffer.  */
2091 _cpp_buff *
2092 _cpp_append_extend_buff (pfile, buff, min_extra)
2093      cpp_reader *pfile;
2094      _cpp_buff *buff;
2095      size_t min_extra;
2096 {
2097   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2098   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2099 
2100   buff->next = new_buff;
2101   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2102   return new_buff;
2103 }
2104 
2105 /* Creates a new buffer with enough space to hold the uncommitted
2106    remaining bytes of the buffer pointed to by BUFF, and at least
2107    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2108    Chains the new buffer before the buffer pointed to by BUFF, and
2109    updates the pointer to point to the new buffer.  */
2110 void
2111 _cpp_extend_buff (pfile, pbuff, min_extra)
2112      cpp_reader *pfile;
2113      _cpp_buff **pbuff;
2114      size_t min_extra;
2115 {
2116   _cpp_buff *new_buff, *old_buff = *pbuff;
2117   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2118 
2119   new_buff = _cpp_get_buff (pfile, size);
2120   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2121   new_buff->next = old_buff;
2122   *pbuff = new_buff;
2123 }
2124 
2125 /* Free a chain of buffers starting at BUFF.  */
2126 void
2127 _cpp_free_buff (buff)
2128      _cpp_buff *buff;
2129 {
2130   _cpp_buff *next;
2131 
2132   for (; buff; buff = next)
2133     {
2134       next = buff->next;
2135       free (buff->base);
2136     }
2137 }
2138 
2139 /* Allocate permanent, unaligned storage of length LEN.  */
2140 unsigned char *
2141 _cpp_unaligned_alloc (pfile, len)
2142      cpp_reader *pfile;
2143      size_t len;
2144 {
2145   _cpp_buff *buff = pfile->u_buff;
2146   unsigned char *result = buff->cur;
2147 
2148   if (len > (size_t) (buff->limit - result))
2149     {
2150       buff = _cpp_get_buff (pfile, len);
2151       buff->next = pfile->u_buff;
2152       pfile->u_buff = buff;
2153       result = buff->cur;
2154     }
2155 
2156   buff->cur = result + len;
2157   return result;
2158 }
2159 
2160 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2161    That buffer is used for growing allocations when saving macro
2162    replacement lists in a #define, and when parsing an answer to an
2163    assertion in #assert, #unassert or #if (and therefore possibly
2164    whilst expanding macros).  It therefore must not be used by any
2165    code that they might call: specifically the lexer and the guts of
2166    the macro expander.
2167 
2168    All existing other uses clearly fit this restriction: storing
2169    registered pragmas during initialization.  */
2170 unsigned char *
2171 _cpp_aligned_alloc (pfile, len)
2172      cpp_reader *pfile;
2173      size_t len;
2174 {
2175   _cpp_buff *buff = pfile->a_buff;
2176   unsigned char *result = buff->cur;
2177 
2178   if (len > (size_t) (buff->limit - result))
2179     {
2180       buff = _cpp_get_buff (pfile, len);
2181       buff->next = pfile->a_buff;
2182       pfile->a_buff = buff;
2183       result = buff->cur;
2184     }
2185 
2186   buff->cur = result + len;
2187   return result;
2188 }
2189