xref: /openbsd/gnu/usr.bin/gcc/gcc/cpplex.c (revision dd6081ec)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7    Single-pass line tokenization by Neil Booth, April 2000
8 
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "cpphash.h"
27 
28 #ifdef MULTIBYTE_CHARS
29 #include "mbchar.h"
30 #include <locale.h>
31 #endif
32 
33 /* Tokens with SPELL_STRING store their spelling in the token list,
34    and it's length in the token->val.name.len.  */
35 enum spell_type
36 {
37   SPELL_OPERATOR = 0,
38   SPELL_CHAR,
39   SPELL_IDENT,
40   SPELL_NUMBER,
41   SPELL_STRING,
42   SPELL_NONE
43 };
44 
45 struct token_spelling
46 {
47   enum spell_type category;
48   const unsigned char *name;
49 };
50 
51 static const unsigned char *const digraph_spellings[] =
52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
53 
54 #define OP(e, s) { SPELL_OPERATOR, U s           },
55 #define TK(e, s) { s,              U STRINGX (e) },
56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
57 #undef OP
58 #undef TK
59 
60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
63 
64 static void handle_newline PARAMS ((cpp_reader *));
65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
67 
68 static int skip_block_comment PARAMS ((cpp_reader *));
69 static int skip_line_comment PARAMS ((cpp_reader *));
70 static void adjust_column PARAMS ((cpp_reader *));
71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
74 				  unsigned int *));
75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
78 static bool trigraph_p PARAMS ((cpp_reader *));
79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
80 				  cppchar_t));
81 static bool continue_after_nul PARAMS ((cpp_reader *));
82 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
83 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
84 				   const unsigned char *, cppchar_t *));
85 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
86 
87 static unsigned int hex_digit_value PARAMS ((unsigned int));
88 static _cpp_buff *new_buff PARAMS ((size_t));
89 
90 /* Utility routine:
91 
92    Compares, the token TOKEN to the NUL-terminated string STRING.
93    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
94 int
cpp_ideq(token,string)95 cpp_ideq (token, string)
96      const cpp_token *token;
97      const char *string;
98 {
99   if (token->type != CPP_NAME)
100     return 0;
101 
102   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
103 }
104 
105 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
106    Returns with buffer->cur pointing to the character immediately
107    following the newline (combination).  */
108 static void
handle_newline(pfile)109 handle_newline (pfile)
110      cpp_reader *pfile;
111 {
112   cpp_buffer *buffer = pfile->buffer;
113 
114   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
115      only accept CR-LF; maybe we should fall back to that behavior?  */
116   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
117     buffer->cur++;
118 
119   buffer->line_base = buffer->cur;
120   buffer->col_adjust = 0;
121   pfile->line++;
122 }
123 
124 /* Subroutine of skip_escaped_newlines; called when a 3-character
125    sequence beginning with "??" is encountered.  buffer->cur points to
126    the second '?'.
127 
128    Warn if necessary, and returns true if the sequence forms a
129    trigraph and the trigraph should be honored.  */
130 static bool
trigraph_p(pfile)131 trigraph_p (pfile)
132      cpp_reader *pfile;
133 {
134   cpp_buffer *buffer = pfile->buffer;
135   cppchar_t from_char = buffer->cur[1];
136   bool accept;
137 
138   if (!_cpp_trigraph_map[from_char])
139     return false;
140 
141   accept = CPP_OPTION (pfile, trigraphs);
142 
143   /* Don't warn about trigraphs in comments.  */
144   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
145     {
146       if (accept)
147 	cpp_error_with_line (pfile, DL_WARNING,
148 			     pfile->line, CPP_BUF_COL (buffer) - 1,
149 			     "trigraph ??%c converted to %c",
150 			     (int) from_char,
151 			     (int) _cpp_trigraph_map[from_char]);
152       else if (buffer->cur != buffer->last_Wtrigraphs)
153 	{
154 	  buffer->last_Wtrigraphs = buffer->cur;
155 	  cpp_error_with_line (pfile, DL_WARNING,
156 			       pfile->line, CPP_BUF_COL (buffer) - 1,
157 			       "trigraph ??%c ignored", (int) from_char);
158 	}
159     }
160 
161   return accept;
162 }
163 
164 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
165    lie in buffer->cur[-1].  Returns the next byte, which will be in
166    buffer->cur[-1].  This routine performs preprocessing stages 1 and
167    2 of the ISO C standard.  */
168 static cppchar_t
skip_escaped_newlines(pfile)169 skip_escaped_newlines (pfile)
170      cpp_reader *pfile;
171 {
172   cpp_buffer *buffer = pfile->buffer;
173   cppchar_t next = buffer->cur[-1];
174 
175   /* Only do this if we apply stages 1 and 2.  */
176   if (!buffer->from_stage3)
177     {
178       const unsigned char *saved_cur;
179       cppchar_t next1;
180 
181       do
182 	{
183 	  if (next == '?')
184 	    {
185 	      if (buffer->cur[0] != '?' || !trigraph_p (pfile))
186 		break;
187 
188 	      /* Translate the trigraph.  */
189 	      next = _cpp_trigraph_map[buffer->cur[1]];
190 	      buffer->cur += 2;
191 	      if (next != '\\')
192 		break;
193 	    }
194 
195 	  if (buffer->cur == buffer->rlimit)
196 	    break;
197 
198 	  /* We have a backslash, and room for at least one more
199 	     character.  Skip horizontal whitespace.  */
200 	  saved_cur = buffer->cur;
201 	  do
202 	    next1 = *buffer->cur++;
203 	  while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
204 
205 	  if (!is_vspace (next1))
206 	    {
207 	      buffer->cur = saved_cur;
208 	      break;
209 	    }
210 
211 	  if (saved_cur != buffer->cur - 1
212 	      && !pfile->state.lexing_comment)
213 	    cpp_error (pfile, DL_WARNING,
214 		       "backslash and newline separated by space");
215 
216 	  handle_newline (pfile);
217 	  buffer->backup_to = buffer->cur;
218 	  if (buffer->cur == buffer->rlimit)
219 	    {
220 	      cpp_error (pfile, DL_PEDWARN,
221 			 "backslash-newline at end of file");
222 	      next = EOF;
223 	    }
224 	  else
225 	    next = *buffer->cur++;
226 	}
227       while (next == '\\' || next == '?');
228     }
229 
230   return next;
231 }
232 
233 /* Obtain the next character, after trigraph conversion and skipping
234    an arbitrarily long string of escaped newlines.  The common case of
235    no trigraphs or escaped newlines falls through quickly.  On return,
236    buffer->backup_to points to where to return to if the character is
237    not to be processed.  */
238 static cppchar_t
get_effective_char(pfile)239 get_effective_char (pfile)
240      cpp_reader *pfile;
241 {
242   cppchar_t next;
243   cpp_buffer *buffer = pfile->buffer;
244 
245   buffer->backup_to = buffer->cur;
246   next = *buffer->cur++;
247   if (__builtin_expect (next == '?' || next == '\\', 0))
248     next = skip_escaped_newlines (pfile);
249 
250   return next;
251 }
252 
253 /* Skip a C-style block comment.  We find the end of the comment by
254    seeing if an asterisk is before every '/' we encounter.  Returns
255    nonzero if comment terminated by EOF, zero otherwise.  */
256 static int
skip_block_comment(pfile)257 skip_block_comment (pfile)
258      cpp_reader *pfile;
259 {
260   cpp_buffer *buffer = pfile->buffer;
261   cppchar_t c = EOF, prevc = EOF;
262 
263   pfile->state.lexing_comment = 1;
264   while (buffer->cur != buffer->rlimit)
265     {
266       prevc = c, c = *buffer->cur++;
267 
268       /* FIXME: For speed, create a new character class of characters
269 	 of interest inside block comments.  */
270       if (c == '?' || c == '\\')
271 	c = skip_escaped_newlines (pfile);
272 
273       /* People like decorating comments with '*', so check for '/'
274 	 instead for efficiency.  */
275       if (c == '/')
276 	{
277 	  if (prevc == '*')
278 	    break;
279 
280 	  /* Warn about potential nested comments, but not if the '/'
281 	     comes immediately before the true comment delimiter.
282 	     Don't bother to get it right across escaped newlines.  */
283 	  if (CPP_OPTION (pfile, warn_comments)
284 	      && buffer->cur[0] == '*' && buffer->cur[1] != '/')
285 	    cpp_error_with_line (pfile, DL_WARNING,
286 				 pfile->line, CPP_BUF_COL (buffer),
287 				 "\"/*\" within comment");
288 	}
289       else if (is_vspace (c))
290 	handle_newline (pfile);
291       else if (c == '\t')
292 	adjust_column (pfile);
293     }
294 
295   pfile->state.lexing_comment = 0;
296   return c != '/' || prevc != '*';
297 }
298 
299 /* Skip a C++ line comment, leaving buffer->cur pointing to the
300    terminating newline.  Handles escaped newlines.  Returns nonzero
301    if a multiline comment.  */
302 static int
skip_line_comment(pfile)303 skip_line_comment (pfile)
304      cpp_reader *pfile;
305 {
306   cpp_buffer *buffer = pfile->buffer;
307   unsigned int orig_line = pfile->line;
308   cppchar_t c;
309 #ifdef MULTIBYTE_CHARS
310   wchar_t wc;
311   int char_len;
312 #endif
313 
314   pfile->state.lexing_comment = 1;
315 #ifdef MULTIBYTE_CHARS
316   /* Reset multibyte conversion state.  */
317   (void) local_mbtowc (NULL, NULL, 0);
318 #endif
319   do
320     {
321       if (buffer->cur == buffer->rlimit)
322 	goto at_eof;
323 
324 #ifdef MULTIBYTE_CHARS
325       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
326 			       buffer->rlimit - buffer->cur);
327       if (char_len == -1)
328 	{
329 	  cpp_error (pfile, DL_WARNING,
330 		     "ignoring invalid multibyte character");
331 	  char_len = 1;
332 	  c = *buffer->cur++;
333 	}
334       else
335 	{
336 	  buffer->cur += char_len;
337 	  c = wc;
338 	}
339 #else
340       c = *buffer->cur++;
341 #endif
342       if (c == '?' || c == '\\')
343 	c = skip_escaped_newlines (pfile);
344     }
345   while (!is_vspace (c));
346 
347   /* Step back over the newline, except at EOF.  */
348   buffer->cur--;
349  at_eof:
350 
351   pfile->state.lexing_comment = 0;
352   return orig_line != pfile->line;
353 }
354 
355 /* pfile->buffer->cur is one beyond the \t character.  Update
356    col_adjust so we track the column correctly.  */
357 static void
adjust_column(pfile)358 adjust_column (pfile)
359      cpp_reader *pfile;
360 {
361   cpp_buffer *buffer = pfile->buffer;
362   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
363 
364   /* Round it up to multiple of the tabstop, but subtract 1 since the
365      tab itself occupies a character position.  */
366   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
367 			 - col % CPP_OPTION (pfile, tabstop)) - 1;
368 }
369 
370 /* Skips whitespace, saving the next non-whitespace character.
371    Adjusts pfile->col_adjust to account for tabs.  Without this,
372    tokens might be assigned an incorrect column.  */
373 static int
skip_whitespace(pfile,c)374 skip_whitespace (pfile, c)
375      cpp_reader *pfile;
376      cppchar_t c;
377 {
378   cpp_buffer *buffer = pfile->buffer;
379   unsigned int warned = 0;
380 
381   do
382     {
383       /* Horizontal space always OK.  */
384       if (c == ' ')
385 	;
386       else if (c == '\t')
387 	adjust_column (pfile);
388       /* Just \f \v or \0 left.  */
389       else if (c == '\0')
390 	{
391 	  if (buffer->cur - 1 == buffer->rlimit)
392 	    return 0;
393 	  if (!warned)
394 	    {
395 	      cpp_error (pfile, DL_WARNING, "null character(s) ignored");
396 	      warned = 1;
397 	    }
398 	}
399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400 	cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
401 			     CPP_BUF_COL (buffer),
402 			     "%s in preprocessing directive",
403 			     c == '\f' ? "form feed" : "vertical tab");
404 
405       c = *buffer->cur++;
406     }
407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408   while (is_nvspace (c));
409 
410   buffer->cur--;
411   return 1;
412 }
413 
414 /* See if the characters of a number token are valid in a name (no
415    '.', '+' or '-').  */
416 static int
name_p(pfile,string)417 name_p (pfile, string)
418      cpp_reader *pfile;
419      const cpp_string *string;
420 {
421   unsigned int i;
422 
423   for (i = 0; i < string->len; i++)
424     if (!is_idchar (string->text[i]))
425       return 0;
426 
427   return 1;
428 }
429 
430 /* Parse an identifier, skipping embedded backslash-newlines.  This is
431    a critical inner loop.  The common case is an identifier which has
432    not been split by backslash-newline, does not contain a dollar
433    sign, and has already been scanned (roughly 10:1 ratio of
434    seen:unseen identifiers in normal code; the distribution is
435    Poisson-like).  Second most common case is a new identifier, not
436    split and no dollar sign.  The other possibilities are rare and
437    have been relegated to parse_slow.  */
438 static cpp_hashnode *
parse_identifier(pfile)439 parse_identifier (pfile)
440      cpp_reader *pfile;
441 {
442   cpp_hashnode *result;
443   const uchar *cur, *base;
444 
445   /* Fast-path loop.  Skim over a normal identifier.
446      N.B. ISIDNUM does not include $.  */
447   cur = pfile->buffer->cur;
448   while (ISIDNUM (*cur))
449     cur++;
450 
451   /* Check for slow-path cases.  */
452   if (*cur == '?' || *cur == '\\' || *cur == '$')
453     {
454       unsigned int len;
455 
456       base = parse_slow (pfile, cur, 0, &len);
457       result = (cpp_hashnode *)
458 	ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
459     }
460   else
461     {
462       base = pfile->buffer->cur - 1;
463       pfile->buffer->cur = cur;
464       result = (cpp_hashnode *)
465 	ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
466     }
467 
468   /* Rarely, identifiers require diagnostics when lexed.
469      XXX Has to be forced out of the fast path.  */
470   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
471 			&& !pfile->state.skipping, 0))
472     {
473       /* It is allowed to poison the same identifier twice.  */
474       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
475 	cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
476 		   NODE_NAME (result));
477 
478       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479 	 replacement list of a variadic macro.  */
480       if (result == pfile->spec_nodes.n__VA_ARGS__
481 	  && !pfile->state.va_args_ok)
482 	cpp_error (pfile, DL_PEDWARN,
483 	"__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
484     }
485 
486   return result;
487 }
488 
489 /* Slow path.  This handles numbers and identifiers which have been
490    split, or contain dollar signs.  The part of the token from
491    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
492    1 if it's a number, and 2 if it has a leading period.  Returns a
493    pointer to the token's NUL-terminated spelling in permanent
494    storage, and sets PLEN to its length.  */
495 static uchar *
parse_slow(pfile,cur,number_p,plen)496 parse_slow (pfile, cur, number_p, plen)
497      cpp_reader *pfile;
498      const uchar *cur;
499      int number_p;
500      unsigned int *plen;
501 {
502   cpp_buffer *buffer = pfile->buffer;
503   const uchar *base = buffer->cur - 1;
504   struct obstack *stack = &pfile->hash_table->stack;
505   unsigned int c, prevc, saw_dollar = 0;
506 
507   /* Place any leading period.  */
508   if (number_p == 2)
509     obstack_1grow (stack, '.');
510 
511   /* Copy the part of the token which is known to be okay.  */
512   obstack_grow (stack, base, cur - base);
513 
514   /* Now process the part which isn't.  We are looking at one of
515      '$', '\\', or '?' on entry to this loop.  */
516   prevc = cur[-1];
517   c = *cur++;
518   buffer->cur = cur;
519   for (;;)
520     {
521       /* Potential escaped newline?  */
522       buffer->backup_to = buffer->cur - 1;
523       if (c == '?' || c == '\\')
524 	c = skip_escaped_newlines (pfile);
525 
526       if (!is_idchar (c))
527 	{
528 	  if (!number_p)
529 	    break;
530 	  if (c != '.' && !VALID_SIGN (c, prevc))
531 	    break;
532 	}
533 
534       /* Handle normal identifier characters in this loop.  */
535       do
536 	{
537 	  prevc = c;
538 	  obstack_1grow (stack, c);
539 
540 	  if (c == '$')
541 	    saw_dollar++;
542 
543 	  c = *buffer->cur++;
544 	}
545       while (is_idchar (c));
546     }
547 
548   /* Step back over the unwanted char.  */
549   BACKUP ();
550 
551   /* $ is not an identifier character in the standard, but is commonly
552      accepted as an extension.  Don't warn about it in skipped
553      conditional blocks.  */
554   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
555     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
556 
557   /* Identifiers and numbers are null-terminated.  */
558   *plen = obstack_object_size (stack);
559   obstack_1grow (stack, '\0');
560   return obstack_finish (stack);
561 }
562 
563 /* Parse a number, beginning with character C, skipping embedded
564    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
565    before C.  Place the result in NUMBER.  */
566 static void
parse_number(pfile,number,leading_period)567 parse_number (pfile, number, leading_period)
568      cpp_reader *pfile;
569      cpp_string *number;
570      int leading_period;
571 {
572   const uchar *cur;
573 
574   /* Fast-path loop.  Skim over a normal number.
575      N.B. ISIDNUM does not include $.  */
576   cur = pfile->buffer->cur;
577   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
578     cur++;
579 
580   /* Check for slow-path cases.  */
581   if (*cur == '?' || *cur == '\\' || *cur == '$')
582     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
583   else
584     {
585       const uchar *base = pfile->buffer->cur - 1;
586       uchar *dest;
587 
588       number->len = cur - base + leading_period;
589       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
590       dest[number->len] = '\0';
591       number->text = dest;
592 
593       if (leading_period)
594 	*dest++ = '.';
595       memcpy (dest, base, cur - base);
596       pfile->buffer->cur = cur;
597     }
598 }
599 
600 /* Subroutine of parse_string.  */
601 static int
unescaped_terminator_p(pfile,dest)602 unescaped_terminator_p (pfile, dest)
603      cpp_reader *pfile;
604      const unsigned char *dest;
605 {
606   const unsigned char *start, *temp;
607 
608   /* In #include-style directives, terminators are not escapeable.  */
609   if (pfile->state.angled_headers)
610     return 1;
611 
612   start = BUFF_FRONT (pfile->u_buff);
613 
614   /* An odd number of consecutive backslashes represents an escaped
615      terminator.  */
616   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
617     ;
618 
619   return ((dest - temp) & 1) == 0;
620 }
621 
622 /* Parses a string, character constant, or angle-bracketed header file
623    name.  Handles embedded trigraphs and escaped newlines.  The stored
624    string is guaranteed NUL-terminated, but it is not guaranteed that
625    this is the first NUL since embedded NULs are preserved.
626 
627    When this function returns, buffer->cur points to the next
628    character to be processed.  */
629 static void
parse_string(pfile,token,terminator)630 parse_string (pfile, token, terminator)
631      cpp_reader *pfile;
632      cpp_token *token;
633      cppchar_t terminator;
634 {
635   cpp_buffer *buffer = pfile->buffer;
636   unsigned char *dest, *limit;
637   cppchar_t c;
638   bool warned_nulls = false;
639 #ifdef MULTIBYTE_CHARS
640   wchar_t wc;
641   int char_len;
642 #endif
643 
644   dest = BUFF_FRONT (pfile->u_buff);
645   limit = BUFF_LIMIT (pfile->u_buff);
646 
647 #ifdef MULTIBYTE_CHARS
648   /* Reset multibyte conversion state.  */
649   (void) local_mbtowc (NULL, NULL, 0);
650 #endif
651   for (;;)
652     {
653       /* We need room for another char, possibly the terminating NUL.  */
654       if ((size_t) (limit - dest) < 1)
655 	{
656 	  size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
657 	  _cpp_extend_buff (pfile, &pfile->u_buff, 2);
658 	  dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
659 	  limit = BUFF_LIMIT (pfile->u_buff);
660 	}
661 
662 #ifdef MULTIBYTE_CHARS
663       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
664 			       buffer->rlimit - buffer->cur);
665       if (char_len == -1)
666 	{
667 	  cpp_error (pfile, DL_WARNING,
668 		     "ignoring invalid multibyte character");
669 	  char_len = 1;
670 	  c = *buffer->cur++;
671 	}
672       else
673 	{
674 	  buffer->cur += char_len;
675 	  c = wc;
676 	}
677 #else
678       c = *buffer->cur++;
679 #endif
680 
681       /* Handle trigraphs, escaped newlines etc.  */
682       if (c == '?' || c == '\\')
683 	c = skip_escaped_newlines (pfile);
684 
685       if (c == terminator)
686 	{
687 	  if (unescaped_terminator_p (pfile, dest))
688 	    break;
689 	}
690       else if (is_vspace (c))
691 	{
692 	  /* No string literal may extend over multiple lines.  In
693 	     assembly language, suppress the error except for <>
694 	     includes.  This is a kludge around not knowing where
695 	     comments are.  */
696 	unterminated:
697 	  if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
698 	    cpp_error (pfile, DL_ERROR, "missing terminating %c character",
699 		       (int) terminator);
700 	  buffer->cur--;
701 	  break;
702 	}
703       else if (c == '\0')
704 	{
705 	  if (buffer->cur - 1 == buffer->rlimit)
706 	    goto unterminated;
707 	  if (!warned_nulls)
708 	    {
709 	      warned_nulls = true;
710 	      cpp_error (pfile, DL_WARNING,
711 			 "null character(s) preserved in literal");
712 	    }
713 	}
714 #ifdef MULTIBYTE_CHARS
715       if (char_len > 1)
716 	{
717 	  for ( ; char_len > 0; --char_len)
718 	    *dest++ = (*buffer->cur - char_len);
719 	}
720       else
721 #endif
722 	*dest++ = c;
723     }
724 
725   *dest = '\0';
726 
727   token->val.str.text = BUFF_FRONT (pfile->u_buff);
728   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
729   BUFF_FRONT (pfile->u_buff) = dest + 1;
730 }
731 
732 /* The stored comment includes the comment start and any terminator.  */
733 static void
save_comment(pfile,token,from,type)734 save_comment (pfile, token, from, type)
735      cpp_reader *pfile;
736      cpp_token *token;
737      const unsigned char *from;
738      cppchar_t type;
739 {
740   unsigned char *buffer;
741   unsigned int len, clen;
742 
743   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
744 
745   /* C++ comments probably (not definitely) have moved past a new
746      line, which we don't want to save in the comment.  */
747   if (is_vspace (pfile->buffer->cur[-1]))
748     len--;
749 
750   /* If we are currently in a directive, then we need to store all
751      C++ comments as C comments internally, and so we need to
752      allocate a little extra space in that case.
753 
754      Note that the only time we encounter a directive here is
755      when we are saving comments in a "#define".  */
756   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
757 
758   buffer = _cpp_unaligned_alloc (pfile, clen);
759 
760   token->type = CPP_COMMENT;
761   token->val.str.len = clen;
762   token->val.str.text = buffer;
763 
764   buffer[0] = '/';
765   memcpy (buffer + 1, from, len - 1);
766 
767   /* Finish conversion to a C comment, if necessary.  */
768   if (pfile->state.in_directive && type == '/')
769     {
770       buffer[1] = '*';
771       buffer[clen - 2] = '*';
772       buffer[clen - 1] = '/';
773     }
774 }
775 
776 /* Allocate COUNT tokens for RUN.  */
777 void
_cpp_init_tokenrun(run,count)778 _cpp_init_tokenrun (run, count)
779      tokenrun *run;
780      unsigned int count;
781 {
782   run->base = xnewvec (cpp_token, count);
783   run->limit = run->base + count;
784   run->next = NULL;
785 }
786 
787 /* Returns the next tokenrun, or creates one if there is none.  */
788 static tokenrun *
next_tokenrun(run)789 next_tokenrun (run)
790      tokenrun *run;
791 {
792   if (run->next == NULL)
793     {
794       run->next = xnew (tokenrun);
795       run->next->prev = run;
796       _cpp_init_tokenrun (run->next, 250);
797     }
798 
799   return run->next;
800 }
801 
802 /* Allocate a single token that is invalidated at the same time as the
803    rest of the tokens on the line.  Has its line and col set to the
804    same as the last lexed token, so that diagnostics appear in the
805    right place.  */
806 cpp_token *
_cpp_temp_token(pfile)807 _cpp_temp_token (pfile)
808      cpp_reader *pfile;
809 {
810   cpp_token *old, *result;
811 
812   old = pfile->cur_token - 1;
813   if (pfile->cur_token == pfile->cur_run->limit)
814     {
815       pfile->cur_run = next_tokenrun (pfile->cur_run);
816       pfile->cur_token = pfile->cur_run->base;
817     }
818 
819   result = pfile->cur_token++;
820   result->line = old->line;
821   result->col = old->col;
822   return result;
823 }
824 
825 /* Lex a token into RESULT (external interface).  Takes care of issues
826    like directive handling, token lookahead, multiple include
827    optimization and skipping.  */
828 const cpp_token *
_cpp_lex_token(pfile)829 _cpp_lex_token (pfile)
830      cpp_reader *pfile;
831 {
832   cpp_token *result;
833 
834   for (;;)
835     {
836       if (pfile->cur_token == pfile->cur_run->limit)
837 	{
838 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
839 	  pfile->cur_token = pfile->cur_run->base;
840 	}
841 
842       if (pfile->lookaheads)
843 	{
844 	  pfile->lookaheads--;
845 	  result = pfile->cur_token++;
846 	}
847       else
848 	result = _cpp_lex_direct (pfile);
849 
850       if (result->flags & BOL)
851 	{
852 	  /* Is this a directive.  If _cpp_handle_directive returns
853 	     false, it is an assembler #.  */
854 	  if (result->type == CPP_HASH
855 	      /* 6.10.3 p 11: Directives in a list of macro arguments
856 		 gives undefined behavior.  This implementation
857 		 handles the directive as normal.  */
858 	      && pfile->state.parsing_args != 1
859 	      && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
860 	    continue;
861 	  if (pfile->cb.line_change && !pfile->state.skipping)
862 	    (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
863 	}
864 
865       /* We don't skip tokens in directives.  */
866       if (pfile->state.in_directive)
867 	break;
868 
869       /* Outside a directive, invalidate controlling macros.  At file
870 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
871 	 get here and MI optimisation works.  */
872       pfile->mi_valid = false;
873 
874       if (!pfile->state.skipping || result->type == CPP_EOF)
875 	break;
876     }
877 
878   return result;
879 }
880 
881 /* A NUL terminates the current buffer.  For ISO preprocessing this is
882    EOF, but for traditional preprocessing it indicates we need a line
883    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
884    to return a CPP_EOF to the caller.  */
885 static bool
continue_after_nul(pfile)886 continue_after_nul (pfile)
887      cpp_reader *pfile;
888 {
889   cpp_buffer *buffer = pfile->buffer;
890   bool more = false;
891 
892   buffer->saved_flags = BOL;
893   if (CPP_OPTION (pfile, traditional))
894     {
895       if (pfile->state.in_directive)
896 	return false;
897 
898       _cpp_remove_overlay (pfile);
899       more = _cpp_read_logical_line_trad (pfile);
900       _cpp_overlay_buffer (pfile, pfile->out.base,
901 			   pfile->out.cur - pfile->out.base);
902       pfile->line = pfile->out.first_line;
903     }
904   else
905     {
906       /* Stop parsing arguments with a CPP_EOF.  When we finally come
907 	 back here, do the work of popping the buffer.  */
908       if (!pfile->state.parsing_args)
909 	{
910 	  if (buffer->cur != buffer->line_base)
911 	    {
912 	      /* Non-empty files should end in a newline.  Don't warn
913 		 for command line and _Pragma buffers.  */
914 	      handle_newline (pfile);
915 	    }
916 
917 	  /* Similarly, finish an in-progress directive with CPP_EOF
918 	     before popping the buffer.  */
919 	  if (!pfile->state.in_directive && buffer->prev)
920 	    {
921 	      more = !buffer->return_at_eof;
922 	      _cpp_pop_buffer (pfile);
923 	    }
924 	}
925     }
926 
927   return more;
928 }
929 
930 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)	\
931   do {						\
932     if (get_effective_char (pfile) == CHAR)	\
933       result->type = THEN_TYPE;			\
934     else					\
935       {						\
936         BACKUP ();				\
937         result->type = ELSE_TYPE;		\
938       }						\
939   } while (0)
940 
941 /* Lex a token into pfile->cur_token, which is also incremented, to
942    get diagnostics pointing to the correct location.
943 
944    Does not handle issues such as token lookahead, multiple-include
945    optimisation, directives, skipping etc.  This function is only
946    suitable for use by _cpp_lex_token, and in special cases like
947    lex_expansion_token which doesn't care for any of these issues.
948 
949    When meeting a newline, returns CPP_EOF if parsing a directive,
950    otherwise returns to the start of the token buffer if permissible.
951    Returns the location of the lexed token.  */
952 cpp_token *
_cpp_lex_direct(pfile)953 _cpp_lex_direct (pfile)
954      cpp_reader *pfile;
955 {
956   cppchar_t c;
957   cpp_buffer *buffer;
958   const unsigned char *comment_start;
959   cpp_token *result = pfile->cur_token++;
960 
961  fresh_line:
962   buffer = pfile->buffer;
963   result->flags = buffer->saved_flags;
964   buffer->saved_flags = 0;
965  update_tokens_line:
966   result->line = pfile->line;
967 
968  skipped_white:
969   c = *buffer->cur++;
970   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
971 
972  trigraph:
973   switch (c)
974     {
975     case ' ': case '\t': case '\f': case '\v': case '\0':
976       result->flags |= PREV_WHITE;
977       if (skip_whitespace (pfile, c))
978 	goto skipped_white;
979 
980       /* End of buffer.  */
981       buffer->cur--;
982       if (continue_after_nul (pfile))
983 	goto fresh_line;
984       result->type = CPP_EOF;
985       break;
986 
987     case '\n': case '\r':
988       handle_newline (pfile);
989       buffer->saved_flags = BOL;
990       if (! pfile->state.in_directive)
991 	{
992 	  if (pfile->state.parsing_args == 2)
993 	    buffer->saved_flags |= PREV_WHITE;
994 	  if (!pfile->keep_tokens)
995 	    {
996 	      pfile->cur_run = &pfile->base_run;
997 	      result = pfile->base_run.base;
998 	      pfile->cur_token = result + 1;
999 	    }
1000 	  goto fresh_line;
1001 	}
1002       result->type = CPP_EOF;
1003       break;
1004 
1005     case '?':
1006     case '\\':
1007       /* These could start an escaped newline, or '?' a trigraph.  Let
1008 	 skip_escaped_newlines do all the work.  */
1009       {
1010 	unsigned int line = pfile->line;
1011 
1012 	c = skip_escaped_newlines (pfile);
1013 	if (line != pfile->line)
1014 	  {
1015 	    buffer->cur--;
1016 	    /* We had at least one escaped newline of some sort.
1017 	       Update the token's line and column.  */
1018 	    goto update_tokens_line;
1019 	  }
1020       }
1021 
1022       /* We are either the original '?' or '\\', or a trigraph.  */
1023       if (c == '?')
1024 	result->type = CPP_QUERY;
1025       else if (c == '\\')
1026 	goto random_char;
1027       else
1028 	goto trigraph;
1029       break;
1030 
1031     case '0': case '1': case '2': case '3': case '4':
1032     case '5': case '6': case '7': case '8': case '9':
1033       result->type = CPP_NUMBER;
1034       parse_number (pfile, &result->val.str, 0);
1035       break;
1036 
1037     case 'L':
1038       /* 'L' may introduce wide characters or strings.  */
1039       {
1040 	const unsigned char *pos = buffer->cur;
1041 
1042 	c = get_effective_char (pfile);
1043 	if (c == '\'' || c == '"')
1044 	  {
1045 	    result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1046 	    parse_string (pfile, result, c);
1047 	    break;
1048 	  }
1049 	buffer->cur = pos;
1050       }
1051       /* Fall through.  */
1052 
1053     start_ident:
1054     case '_':
1055     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1056     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1057     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1058     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1059     case 'y': case 'z':
1060     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1061     case 'G': case 'H': case 'I': case 'J': case 'K':
1062     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1063     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1064     case 'Y': case 'Z':
1065       result->type = CPP_NAME;
1066       result->val.node = parse_identifier (pfile);
1067 
1068       /* Convert named operators to their proper types.  */
1069       if (result->val.node->flags & NODE_OPERATOR)
1070 	{
1071 	  result->flags |= NAMED_OP;
1072 	  result->type = result->val.node->value.operator;
1073 	}
1074       break;
1075 
1076     case '\'':
1077     case '"':
1078       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1079       parse_string (pfile, result, c);
1080       break;
1081 
1082     case '/':
1083       /* A potential block or line comment.  */
1084       comment_start = buffer->cur;
1085       c = get_effective_char (pfile);
1086 
1087       if (c == '*')
1088 	{
1089 	  if (skip_block_comment (pfile))
1090 	    cpp_error (pfile, DL_ERROR, "unterminated comment");
1091 	}
1092       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1093 			    || CPP_IN_SYSTEM_HEADER (pfile)))
1094 	{
1095 	  /* Warn about comments only if pedantically GNUC89, and not
1096 	     in system headers.  */
1097 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1098 	      && ! buffer->warned_cplusplus_comments)
1099 	    {
1100 	      cpp_error (pfile, DL_PEDWARN,
1101 			 "C++ style comments are not allowed in ISO C90");
1102 	      cpp_error (pfile, DL_PEDWARN,
1103 			 "(this will be reported only once per input file)");
1104 	      buffer->warned_cplusplus_comments = 1;
1105 	    }
1106 
1107 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1108 	    cpp_error (pfile, DL_WARNING, "multi-line comment");
1109 	}
1110       else if (c == '=')
1111 	{
1112 	  result->type = CPP_DIV_EQ;
1113 	  break;
1114 	}
1115       else
1116 	{
1117 	  BACKUP ();
1118 	  result->type = CPP_DIV;
1119 	  break;
1120 	}
1121 
1122       if (!pfile->state.save_comments)
1123 	{
1124 	  result->flags |= PREV_WHITE;
1125 	  goto update_tokens_line;
1126 	}
1127 
1128       /* Save the comment as a token in its own right.  */
1129       save_comment (pfile, result, comment_start, c);
1130       break;
1131 
1132     case '<':
1133       if (pfile->state.angled_headers)
1134 	{
1135 	  result->type = CPP_HEADER_NAME;
1136 	  parse_string (pfile, result, '>');
1137 	  break;
1138 	}
1139 
1140       c = get_effective_char (pfile);
1141       if (c == '=')
1142 	result->type = CPP_LESS_EQ;
1143       else if (c == '<')
1144 	IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1145       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1146 	IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1147       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1148 	{
1149 	  result->type = CPP_OPEN_SQUARE;
1150 	  result->flags |= DIGRAPH;
1151 	}
1152       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1153 	{
1154 	  result->type = CPP_OPEN_BRACE;
1155 	  result->flags |= DIGRAPH;
1156 	}
1157       else
1158 	{
1159 	  BACKUP ();
1160 	  result->type = CPP_LESS;
1161 	}
1162       break;
1163 
1164     case '>':
1165       c = get_effective_char (pfile);
1166       if (c == '=')
1167 	result->type = CPP_GREATER_EQ;
1168       else if (c == '>')
1169 	IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1170       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1171 	IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1172       else
1173 	{
1174 	  BACKUP ();
1175 	  result->type = CPP_GREATER;
1176 	}
1177       break;
1178 
1179     case '%':
1180       c = get_effective_char (pfile);
1181       if (c == '=')
1182 	result->type = CPP_MOD_EQ;
1183       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1184 	{
1185 	  result->flags |= DIGRAPH;
1186 	  result->type = CPP_HASH;
1187 	  if (get_effective_char (pfile) == '%')
1188 	    {
1189 	      const unsigned char *pos = buffer->cur;
1190 
1191 	      if (get_effective_char (pfile) == ':')
1192 		result->type = CPP_PASTE;
1193 	      else
1194 		buffer->cur = pos - 1;
1195 	    }
1196 	  else
1197 	    BACKUP ();
1198 	}
1199       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1200 	{
1201 	  result->flags |= DIGRAPH;
1202 	  result->type = CPP_CLOSE_BRACE;
1203 	}
1204       else
1205 	{
1206 	  BACKUP ();
1207 	  result->type = CPP_MOD;
1208 	}
1209       break;
1210 
1211     case '.':
1212       result->type = CPP_DOT;
1213       c = get_effective_char (pfile);
1214       if (c == '.')
1215 	{
1216 	  const unsigned char *pos = buffer->cur;
1217 
1218 	  if (get_effective_char (pfile) == '.')
1219 	    result->type = CPP_ELLIPSIS;
1220 	  else
1221 	    buffer->cur = pos - 1;
1222 	}
1223       /* All known character sets have 0...9 contiguous.  */
1224       else if (ISDIGIT (c))
1225 	{
1226 	  result->type = CPP_NUMBER;
1227 	  parse_number (pfile, &result->val.str, 1);
1228 	}
1229       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1230 	result->type = CPP_DOT_STAR;
1231       else
1232 	BACKUP ();
1233       break;
1234 
1235     case '+':
1236       c = get_effective_char (pfile);
1237       if (c == '+')
1238 	result->type = CPP_PLUS_PLUS;
1239       else if (c == '=')
1240 	result->type = CPP_PLUS_EQ;
1241       else
1242 	{
1243 	  BACKUP ();
1244 	  result->type = CPP_PLUS;
1245 	}
1246       break;
1247 
1248     case '-':
1249       c = get_effective_char (pfile);
1250       if (c == '>')
1251 	{
1252 	  result->type = CPP_DEREF;
1253 	  if (CPP_OPTION (pfile, cplusplus))
1254 	    {
1255 	      if (get_effective_char (pfile) == '*')
1256 		result->type = CPP_DEREF_STAR;
1257 	      else
1258 		BACKUP ();
1259 	    }
1260 	}
1261       else if (c == '-')
1262 	result->type = CPP_MINUS_MINUS;
1263       else if (c == '=')
1264 	result->type = CPP_MINUS_EQ;
1265       else
1266 	{
1267 	  BACKUP ();
1268 	  result->type = CPP_MINUS;
1269 	}
1270       break;
1271 
1272     case '&':
1273       c = get_effective_char (pfile);
1274       if (c == '&')
1275 	result->type = CPP_AND_AND;
1276       else if (c == '=')
1277 	result->type = CPP_AND_EQ;
1278       else
1279 	{
1280 	  BACKUP ();
1281 	  result->type = CPP_AND;
1282 	}
1283       break;
1284 
1285     case '|':
1286       c = get_effective_char (pfile);
1287       if (c == '|')
1288 	result->type = CPP_OR_OR;
1289       else if (c == '=')
1290 	result->type = CPP_OR_EQ;
1291       else
1292 	{
1293 	  BACKUP ();
1294 	  result->type = CPP_OR;
1295 	}
1296       break;
1297 
1298     case ':':
1299       c = get_effective_char (pfile);
1300       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1301 	result->type = CPP_SCOPE;
1302       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1303 	{
1304 	  result->flags |= DIGRAPH;
1305 	  result->type = CPP_CLOSE_SQUARE;
1306 	}
1307       else
1308 	{
1309 	  BACKUP ();
1310 	  result->type = CPP_COLON;
1311 	}
1312       break;
1313 
1314     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1315     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1316     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1317     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1318     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1319 
1320     case '~': result->type = CPP_COMPL; break;
1321     case ',': result->type = CPP_COMMA; break;
1322     case '(': result->type = CPP_OPEN_PAREN; break;
1323     case ')': result->type = CPP_CLOSE_PAREN; break;
1324     case '[': result->type = CPP_OPEN_SQUARE; break;
1325     case ']': result->type = CPP_CLOSE_SQUARE; break;
1326     case '{': result->type = CPP_OPEN_BRACE; break;
1327     case '}': result->type = CPP_CLOSE_BRACE; break;
1328     case ';': result->type = CPP_SEMICOLON; break;
1329 
1330       /* @ is a punctuator in Objective-C.  */
1331     case '@': result->type = CPP_ATSIGN; break;
1332 
1333     case '$':
1334       if (CPP_OPTION (pfile, dollars_in_ident))
1335 	goto start_ident;
1336       /* Fall through...  */
1337 
1338     random_char:
1339     default:
1340       result->type = CPP_OTHER;
1341       result->val.c = c;
1342       break;
1343     }
1344 
1345   return result;
1346 }
1347 
1348 /* An upper bound on the number of bytes needed to spell TOKEN,
1349    including preceding whitespace.  */
1350 unsigned int
cpp_token_len(token)1351 cpp_token_len (token)
1352      const cpp_token *token;
1353 {
1354   unsigned int len;
1355 
1356   switch (TOKEN_SPELL (token))
1357     {
1358     default:		len = 0;				break;
1359     case SPELL_NUMBER:
1360     case SPELL_STRING:	len = token->val.str.len;		break;
1361     case SPELL_IDENT:	len = NODE_LEN (token->val.node);	break;
1362     }
1363   /* 1 for whitespace, 4 for comment delimiters.  */
1364   return len + 5;
1365 }
1366 
1367 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1368    already contain the enough space to hold the token's spelling.
1369    Returns a pointer to the character after the last character
1370    written.  */
1371 unsigned char *
cpp_spell_token(pfile,token,buffer)1372 cpp_spell_token (pfile, token, buffer)
1373      cpp_reader *pfile;		/* Would be nice to be rid of this...  */
1374      const cpp_token *token;
1375      unsigned char *buffer;
1376 {
1377   switch (TOKEN_SPELL (token))
1378     {
1379     case SPELL_OPERATOR:
1380       {
1381 	const unsigned char *spelling;
1382 	unsigned char c;
1383 
1384 	if (token->flags & DIGRAPH)
1385 	  spelling
1386 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1387 	else if (token->flags & NAMED_OP)
1388 	  goto spell_ident;
1389 	else
1390 	  spelling = TOKEN_NAME (token);
1391 
1392 	while ((c = *spelling++) != '\0')
1393 	  *buffer++ = c;
1394       }
1395       break;
1396 
1397     case SPELL_CHAR:
1398       *buffer++ = token->val.c;
1399       break;
1400 
1401     spell_ident:
1402     case SPELL_IDENT:
1403       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1404       buffer += NODE_LEN (token->val.node);
1405       break;
1406 
1407     case SPELL_NUMBER:
1408       memcpy (buffer, token->val.str.text, token->val.str.len);
1409       buffer += token->val.str.len;
1410       break;
1411 
1412     case SPELL_STRING:
1413       {
1414 	int left, right, tag;
1415 	switch (token->type)
1416 	  {
1417 	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
1418 	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
1419 	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
1420     	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
1421 	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
1422 	  default:
1423 	    cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1424 		       TOKEN_NAME (token));
1425 	    return buffer;
1426 	  }
1427 	if (tag) *buffer++ = tag;
1428 	*buffer++ = left;
1429 	memcpy (buffer, token->val.str.text, token->val.str.len);
1430 	buffer += token->val.str.len;
1431 	*buffer++ = right;
1432       }
1433       break;
1434 
1435     case SPELL_NONE:
1436       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1437       break;
1438     }
1439 
1440   return buffer;
1441 }
1442 
1443 /* Returns TOKEN spelt as a null-terminated string.  The string is
1444    freed when the reader is destroyed.  Useful for diagnostics.  */
1445 unsigned char *
cpp_token_as_text(pfile,token)1446 cpp_token_as_text (pfile, token)
1447      cpp_reader *pfile;
1448      const cpp_token *token;
1449 {
1450   unsigned int len = cpp_token_len (token);
1451   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1452 
1453   end = cpp_spell_token (pfile, token, start);
1454   end[0] = '\0';
1455 
1456   return start;
1457 }
1458 
1459 /* Used by C front ends, which really should move to using
1460    cpp_token_as_text.  */
1461 const char *
cpp_type2name(type)1462 cpp_type2name (type)
1463      enum cpp_ttype type;
1464 {
1465   return (const char *) token_spellings[type].name;
1466 }
1467 
1468 /* Writes the spelling of token to FP, without any preceding space.
1469    Separated from cpp_spell_token for efficiency - to avoid stdio
1470    double-buffering.  */
1471 void
cpp_output_token(token,fp)1472 cpp_output_token (token, fp)
1473      const cpp_token *token;
1474      FILE *fp;
1475 {
1476   switch (TOKEN_SPELL (token))
1477     {
1478     case SPELL_OPERATOR:
1479       {
1480 	const unsigned char *spelling;
1481 	int c;
1482 
1483 	if (token->flags & DIGRAPH)
1484 	  spelling
1485 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1486 	else if (token->flags & NAMED_OP)
1487 	  goto spell_ident;
1488 	else
1489 	  spelling = TOKEN_NAME (token);
1490 
1491 	c = *spelling;
1492 	do
1493 	  putc (c, fp);
1494 	while ((c = *++spelling) != '\0');
1495       }
1496       break;
1497 
1498     case SPELL_CHAR:
1499       putc (token->val.c, fp);
1500       break;
1501 
1502     spell_ident:
1503     case SPELL_IDENT:
1504       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1505     break;
1506 
1507     case SPELL_NUMBER:
1508       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1509       break;
1510 
1511     case SPELL_STRING:
1512       {
1513 	int left, right, tag;
1514 	switch (token->type)
1515 	  {
1516 	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
1517 	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
1518 	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
1519     	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
1520 	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
1521 	  default:
1522 	    fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1523 	    return;
1524 	  }
1525 	if (tag) putc (tag, fp);
1526 	putc (left, fp);
1527 	fwrite (token->val.str.text, 1, token->val.str.len, fp);
1528 	putc (right, fp);
1529       }
1530       break;
1531 
1532     case SPELL_NONE:
1533       /* An error, most probably.  */
1534       break;
1535     }
1536 }
1537 
1538 /* Compare two tokens.  */
1539 int
_cpp_equiv_tokens(a,b)1540 _cpp_equiv_tokens (a, b)
1541      const cpp_token *a, *b;
1542 {
1543   if (a->type == b->type && a->flags == b->flags)
1544     switch (TOKEN_SPELL (a))
1545       {
1546       default:			/* Keep compiler happy.  */
1547       case SPELL_OPERATOR:
1548 	return 1;
1549       case SPELL_CHAR:
1550 	return a->val.c == b->val.c; /* Character.  */
1551       case SPELL_NONE:
1552 	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1553       case SPELL_IDENT:
1554 	return a->val.node == b->val.node;
1555       case SPELL_NUMBER:
1556       case SPELL_STRING:
1557 	return (a->val.str.len == b->val.str.len
1558 		&& !memcmp (a->val.str.text, b->val.str.text,
1559 			    a->val.str.len));
1560       }
1561 
1562   return 0;
1563 }
1564 
1565 /* Returns nonzero if a space should be inserted to avoid an
1566    accidental token paste for output.  For simplicity, it is
1567    conservative, and occasionally advises a space where one is not
1568    needed, e.g. "." and ".2".  */
1569 int
cpp_avoid_paste(pfile,token1,token2)1570 cpp_avoid_paste (pfile, token1, token2)
1571      cpp_reader *pfile;
1572      const cpp_token *token1, *token2;
1573 {
1574   enum cpp_ttype a = token1->type, b = token2->type;
1575   cppchar_t c;
1576 
1577   if (token1->flags & NAMED_OP)
1578     a = CPP_NAME;
1579   if (token2->flags & NAMED_OP)
1580     b = CPP_NAME;
1581 
1582   c = EOF;
1583   if (token2->flags & DIGRAPH)
1584     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1585   else if (token_spellings[b].category == SPELL_OPERATOR)
1586     c = token_spellings[b].name[0];
1587 
1588   /* Quickly get everything that can paste with an '='.  */
1589   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1590     return 1;
1591 
1592   switch (a)
1593     {
1594     case CPP_GREATER:	return c == '>' || c == '?';
1595     case CPP_LESS:	return c == '<' || c == '?' || c == '%' || c == ':';
1596     case CPP_PLUS:	return c == '+';
1597     case CPP_MINUS:	return c == '-' || c == '>';
1598     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1599     case CPP_MOD:	return c == ':' || c == '>';
1600     case CPP_AND:	return c == '&';
1601     case CPP_OR:	return c == '|';
1602     case CPP_COLON:	return c == ':' || c == '>';
1603     case CPP_DEREF:	return c == '*';
1604     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1605     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1606     case CPP_NAME:	return ((b == CPP_NUMBER
1607 				 && name_p (pfile, &token2->val.str))
1608 				|| b == CPP_NAME
1609 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1610     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1611 				|| c == '.' || c == '+' || c == '-');
1612     case CPP_OTHER:	return (CPP_OPTION (pfile, objc)
1613 				&& token1->val.c == '@'
1614 				&& (b == CPP_NAME || b == CPP_STRING));
1615     default:		break;
1616     }
1617 
1618   return 0;
1619 }
1620 
1621 /* Output all the remaining tokens on the current line, and a newline
1622    character, to FP.  Leading whitespace is removed.  If there are
1623    macros, special token padding is not performed.  */
1624 void
cpp_output_line(pfile,fp)1625 cpp_output_line (pfile, fp)
1626      cpp_reader *pfile;
1627      FILE *fp;
1628 {
1629   const cpp_token *token;
1630 
1631   token = cpp_get_token (pfile);
1632   while (token->type != CPP_EOF)
1633     {
1634       cpp_output_token (token, fp);
1635       token = cpp_get_token (pfile);
1636       if (token->flags & PREV_WHITE)
1637 	putc (' ', fp);
1638     }
1639 
1640   putc ('\n', fp);
1641 }
1642 
1643 /* Returns the value of a hexadecimal digit.  */
1644 static unsigned int
hex_digit_value(c)1645 hex_digit_value (c)
1646      unsigned int c;
1647 {
1648   if (hex_p (c))
1649     return hex_value (c);
1650   else
1651     abort ();
1652 }
1653 
1654 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1655    failure if cpplib is not parsing C++ or C99.  Such failure is
1656    silent, and no variables are updated.  Otherwise returns 0, and
1657    warns if -Wtraditional.
1658 
1659    [lex.charset]: The character designated by the universal character
1660    name \UNNNNNNNN is that character whose character short name in
1661    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1662    universal character name \uNNNN is that character whose character
1663    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1664    for a universal character name is less than 0x20 or in the range
1665    0x7F-0x9F (inclusive), or if the universal character name
1666    designates a character in the basic source character set, then the
1667    program is ill-formed.
1668 
1669    We assume that wchar_t is Unicode, so we don't need to do any
1670    mapping.  Is this ever wrong?
1671 
1672    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1673    LIMIT is the end of the string or charconst.  PSTR is updated to
1674    point after the UCS on return, and the UCS is written into PC.  */
1675 
1676 static int
maybe_read_ucs(pfile,pstr,limit,pc)1677 maybe_read_ucs (pfile, pstr, limit, pc)
1678      cpp_reader *pfile;
1679      const unsigned char **pstr;
1680      const unsigned char *limit;
1681      cppchar_t *pc;
1682 {
1683   const unsigned char *p = *pstr;
1684   unsigned int code = 0;
1685   unsigned int c = *pc, length;
1686 
1687   /* Only attempt to interpret a UCS for C++ and C99.  */
1688   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1689     return 1;
1690 
1691   if (CPP_WTRADITIONAL (pfile))
1692     cpp_error (pfile, DL_WARNING,
1693 	       "the meaning of '\\%c' is different in traditional C", c);
1694 
1695   length = (c == 'u' ? 4: 8);
1696 
1697   if ((size_t) (limit - p) < length)
1698     {
1699       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1700       /* Skip to the end to avoid more diagnostics.  */
1701       p = limit;
1702     }
1703   else
1704     {
1705       for (; length; length--, p++)
1706 	{
1707 	  c = *p;
1708 	  if (ISXDIGIT (c))
1709 	    code = (code << 4) + hex_digit_value (c);
1710 	  else
1711 	    {
1712 	      cpp_error (pfile, DL_ERROR,
1713 			 "non-hex digit '%c' in universal-character-name", c);
1714 	      /* We shouldn't skip in case there are multibyte chars.  */
1715 	      break;
1716 	    }
1717 	}
1718     }
1719 
1720 #ifdef TARGET_EBCDIC
1721   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1722   code = 0x3f;  /* EBCDIC invalid character */
1723 #else
1724  /* True extended characters are OK.  */
1725   if (code >= 0xa0
1726       && !(code & 0x80000000)
1727       && !(code >= 0xD800 && code <= 0xDFFF))
1728     ;
1729   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1730      hex escapes so that this also works with EBCDIC hosts.  */
1731   else if (code == 0x24 || code == 0x40 || code == 0x60)
1732     ;
1733   /* Don't give another error if one occurred above.  */
1734   else if (length == 0)
1735     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1736 #endif
1737 
1738   *pstr = p;
1739   *pc = code;
1740   return 0;
1741 }
1742 
1743 /* Returns the value of an escape sequence, truncated to the correct
1744    target precision.  PSTR points to the input pointer, which is just
1745    after the backslash.  LIMIT is how much text we have.  WIDE is true
1746    if the escape sequence is part of a wide character constant or
1747    string literal.  Handles all relevant diagnostics.  */
1748 cppchar_t
cpp_parse_escape(pfile,pstr,limit,wide)1749 cpp_parse_escape (pfile, pstr, limit, wide)
1750      cpp_reader *pfile;
1751      const unsigned char **pstr;
1752      const unsigned char *limit;
1753      int wide;
1754 {
1755   int unknown = 0;
1756   const unsigned char *str = *pstr;
1757   cppchar_t c, mask;
1758   unsigned int width;
1759 
1760   if (wide)
1761     width = CPP_OPTION (pfile, wchar_precision);
1762   else
1763     width = CPP_OPTION (pfile, char_precision);
1764   if (width < BITS_PER_CPPCHAR_T)
1765     mask = ((cppchar_t) 1 << width) - 1;
1766   else
1767     mask = ~0;
1768 
1769   c = *str++;
1770   switch (c)
1771     {
1772     case '\\': case '\'': case '"': case '?': break;
1773     case 'b': c = TARGET_BS;	  break;
1774     case 'f': c = TARGET_FF;	  break;
1775     case 'n': c = TARGET_NEWLINE; break;
1776     case 'r': c = TARGET_CR;	  break;
1777     case 't': c = TARGET_TAB;	  break;
1778     case 'v': c = TARGET_VT;	  break;
1779 
1780     case '(': case '{': case '[': case '%':
1781       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1782 	 '\%' is used to prevent SCCS from getting confused.  */
1783       unknown = CPP_PEDANTIC (pfile);
1784       break;
1785 
1786     case 'a':
1787       if (CPP_WTRADITIONAL (pfile))
1788 	cpp_error (pfile, DL_WARNING,
1789 		   "the meaning of '\\a' is different in traditional C");
1790       c = TARGET_BELL;
1791       break;
1792 
1793     case 'e': case 'E':
1794       if (CPP_PEDANTIC (pfile))
1795 	cpp_error (pfile, DL_PEDWARN,
1796 		   "non-ISO-standard escape sequence, '\\%c'", (int) c);
1797       c = TARGET_ESC;
1798       break;
1799 
1800     case 'u': case 'U':
1801       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1802       break;
1803 
1804     case 'x':
1805       if (CPP_WTRADITIONAL (pfile))
1806 	cpp_error (pfile, DL_WARNING,
1807 		   "the meaning of '\\x' is different in traditional C");
1808 
1809       {
1810 	cppchar_t i = 0, overflow = 0;
1811 	int digits_found = 0;
1812 
1813 	while (str < limit)
1814 	  {
1815 	    c = *str;
1816 	    if (! ISXDIGIT (c))
1817 	      break;
1818 	    str++;
1819 	    overflow |= i ^ (i << 4 >> 4);
1820 	    i = (i << 4) + hex_digit_value (c);
1821 	    digits_found = 1;
1822 	  }
1823 
1824 	if (!digits_found)
1825 	  cpp_error (pfile, DL_ERROR,
1826 		       "\\x used with no following hex digits");
1827 
1828 	if (overflow | (i != (i & mask)))
1829 	  {
1830 	    cpp_error (pfile, DL_PEDWARN,
1831 		       "hex escape sequence out of range");
1832 	    i &= mask;
1833 	  }
1834 	c = i;
1835       }
1836       break;
1837 
1838     case '0':  case '1':  case '2':  case '3':
1839     case '4':  case '5':  case '6':  case '7':
1840       {
1841 	size_t count = 0;
1842 	cppchar_t i = c - '0';
1843 
1844 	while (str < limit && ++count < 3)
1845 	  {
1846 	    c = *str;
1847 	    if (c < '0' || c > '7')
1848 	      break;
1849 	    str++;
1850 	    i = (i << 3) + c - '0';
1851 	  }
1852 
1853 	if (i != (i & mask))
1854 	  {
1855 	    cpp_error (pfile, DL_PEDWARN,
1856 		       "octal escape sequence out of range");
1857 	    i &= mask;
1858 	  }
1859 	c = i;
1860       }
1861       break;
1862 
1863     default:
1864       unknown = 1;
1865       break;
1866     }
1867 
1868   if (unknown)
1869     {
1870       if (ISGRAPH (c))
1871 	cpp_error (pfile, DL_PEDWARN,
1872 		   "unknown escape sequence '\\%c'", (int) c);
1873       else
1874 	cpp_error (pfile, DL_PEDWARN,
1875 		   "unknown escape sequence: '\\%03o'", (int) c);
1876     }
1877 
1878   if (c > mask)
1879     {
1880       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1881       c &= mask;
1882     }
1883 
1884   *pstr = str;
1885   return c;
1886 }
1887 
1888 /* Interpret a (possibly wide) character constant in TOKEN.
1889    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1890    points to a variable that is filled in with the number of
1891    characters seen, and UNSIGNEDP to a variable that indicates whether
1892    the result has signed type.  */
1893 cppchar_t
cpp_interpret_charconst(pfile,token,pchars_seen,unsignedp)1894 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1895      cpp_reader *pfile;
1896      const cpp_token *token;
1897      unsigned int *pchars_seen;
1898      int *unsignedp;
1899 {
1900   const unsigned char *str = token->val.str.text;
1901   const unsigned char *limit = str + token->val.str.len;
1902   unsigned int chars_seen = 0;
1903   size_t width, max_chars;
1904   cppchar_t c, mask, result = 0;
1905   bool unsigned_p;
1906 
1907 #ifdef MULTIBYTE_CHARS
1908   (void) local_mbtowc (NULL, NULL, 0);
1909 #endif
1910 
1911   /* Width in bits.  */
1912   if (token->type == CPP_CHAR)
1913     {
1914       width = CPP_OPTION (pfile, char_precision);
1915       max_chars = CPP_OPTION (pfile, int_precision) / width;
1916       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1917     }
1918   else
1919     {
1920       width = CPP_OPTION (pfile, wchar_precision);
1921       max_chars = 1;
1922       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1923     }
1924 
1925   if (width < BITS_PER_CPPCHAR_T)
1926     mask = ((cppchar_t) 1 << width) - 1;
1927   else
1928     mask = ~0;
1929 
1930   while (str < limit)
1931     {
1932 #ifdef MULTIBYTE_CHARS
1933       wchar_t wc;
1934       int char_len;
1935 
1936       char_len = local_mbtowc (&wc, str, limit - str);
1937       if (char_len == -1)
1938 	{
1939 	  cpp_error (pfile, DL_WARNING,
1940 		     "ignoring invalid multibyte character");
1941 	  c = *str++;
1942 	}
1943       else
1944 	{
1945 	  str += char_len;
1946 	  c = wc;
1947 	}
1948 #else
1949       c = *str++;
1950 #endif
1951 
1952       if (c == '\\')
1953 	c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1954 
1955 #ifdef MAP_CHARACTER
1956       if (ISPRINT (c))
1957 	c = MAP_CHARACTER (c);
1958 #endif
1959 
1960       chars_seen++;
1961 
1962       /* Truncate the character, scale the result and merge the two.  */
1963       c &= mask;
1964       if (width < BITS_PER_CPPCHAR_T)
1965 	result = (result << width) | c;
1966       else
1967 	result = c;
1968     }
1969 
1970   if (chars_seen == 0)
1971     cpp_error (pfile, DL_ERROR, "empty character constant");
1972   else if (chars_seen > 1)
1973     {
1974       /* Multichar charconsts are of type int and therefore signed.  */
1975       unsigned_p = 0;
1976 
1977       if (chars_seen > max_chars)
1978 	{
1979 	  chars_seen = max_chars;
1980 	  cpp_error (pfile, DL_WARNING,
1981 		     "character constant too long for its type");
1982 	}
1983       else if (CPP_OPTION (pfile, warn_multichar))
1984 	cpp_error (pfile, DL_WARNING, "multi-character character constant");
1985     }
1986 
1987   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1988      in WIDTH bits, but for multi-char charconsts it's value is the
1989      full target type's width.  */
1990   if (chars_seen > 1)
1991     width *= max_chars;
1992   if (width < BITS_PER_CPPCHAR_T)
1993     {
1994       mask = ((cppchar_t) 1 << width) - 1;
1995       if (unsigned_p || !(result & (1 << (width - 1))))
1996 	result &= mask;
1997       else
1998 	result |= ~mask;
1999     }
2000 
2001   *pchars_seen = chars_seen;
2002   *unsignedp = unsigned_p;
2003   return result;
2004 }
2005 
2006 /* Memory buffers.  Changing these three constants can have a dramatic
2007    effect on performance.  The values here are reasonable defaults,
2008    but might be tuned.  If you adjust them, be sure to test across a
2009    range of uses of cpplib, including heavy nested function-like macro
2010    expansion.  Also check the change in peak memory usage (NJAMD is a
2011    good tool for this).  */
2012 #define MIN_BUFF_SIZE 8000
2013 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2014 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2015 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2016 
2017 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2018   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2019 #endif
2020 
2021 /* Create a new allocation buffer.  Place the control block at the end
2022    of the buffer, so that buffer overflows will cause immediate chaos.  */
2023 static _cpp_buff *
new_buff(len)2024 new_buff (len)
2025      size_t len;
2026 {
2027   _cpp_buff *result;
2028   unsigned char *base;
2029 
2030   if (len < MIN_BUFF_SIZE)
2031     len = MIN_BUFF_SIZE;
2032   len = CPP_ALIGN (len);
2033 
2034   base = xmalloc (len + sizeof (_cpp_buff));
2035   result = (_cpp_buff *) (base + len);
2036   result->base = base;
2037   result->cur = base;
2038   result->limit = base + len;
2039   result->next = NULL;
2040   return result;
2041 }
2042 
2043 /* Place a chain of unwanted allocation buffers on the free list.  */
2044 void
_cpp_release_buff(pfile,buff)2045 _cpp_release_buff (pfile, buff)
2046      cpp_reader *pfile;
2047      _cpp_buff *buff;
2048 {
2049   _cpp_buff *end = buff;
2050 
2051   while (end->next)
2052     end = end->next;
2053   end->next = pfile->free_buffs;
2054   pfile->free_buffs = buff;
2055 }
2056 
2057 /* Return a free buffer of size at least MIN_SIZE.  */
2058 _cpp_buff *
_cpp_get_buff(pfile,min_size)2059 _cpp_get_buff (pfile, min_size)
2060      cpp_reader *pfile;
2061      size_t min_size;
2062 {
2063   _cpp_buff *result, **p;
2064 
2065   for (p = &pfile->free_buffs;; p = &(*p)->next)
2066     {
2067       size_t size;
2068 
2069       if (*p == NULL)
2070 	return new_buff (min_size);
2071       result = *p;
2072       size = result->limit - result->base;
2073       /* Return a buffer that's big enough, but don't waste one that's
2074          way too big.  */
2075       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2076 	break;
2077     }
2078 
2079   *p = result->next;
2080   result->next = NULL;
2081   result->cur = result->base;
2082   return result;
2083 }
2084 
2085 /* Creates a new buffer with enough space to hold the uncommitted
2086    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2087    the excess bytes to the new buffer.  Chains the new buffer after
2088    BUFF, and returns the new buffer.  */
2089 _cpp_buff *
_cpp_append_extend_buff(pfile,buff,min_extra)2090 _cpp_append_extend_buff (pfile, buff, min_extra)
2091      cpp_reader *pfile;
2092      _cpp_buff *buff;
2093      size_t min_extra;
2094 {
2095   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2096   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2097 
2098   buff->next = new_buff;
2099   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2100   return new_buff;
2101 }
2102 
2103 /* Creates a new buffer with enough space to hold the uncommitted
2104    remaining bytes of the buffer pointed to by BUFF, and at least
2105    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2106    Chains the new buffer before the buffer pointed to by BUFF, and
2107    updates the pointer to point to the new buffer.  */
2108 void
_cpp_extend_buff(pfile,pbuff,min_extra)2109 _cpp_extend_buff (pfile, pbuff, min_extra)
2110      cpp_reader *pfile;
2111      _cpp_buff **pbuff;
2112      size_t min_extra;
2113 {
2114   _cpp_buff *new_buff, *old_buff = *pbuff;
2115   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2116 
2117   new_buff = _cpp_get_buff (pfile, size);
2118   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2119   new_buff->next = old_buff;
2120   *pbuff = new_buff;
2121 }
2122 
2123 /* Free a chain of buffers starting at BUFF.  */
2124 void
_cpp_free_buff(buff)2125 _cpp_free_buff (buff)
2126      _cpp_buff *buff;
2127 {
2128   _cpp_buff *next;
2129 
2130   for (; buff; buff = next)
2131     {
2132       next = buff->next;
2133       free (buff->base);
2134     }
2135 }
2136 
2137 /* Allocate permanent, unaligned storage of length LEN.  */
2138 unsigned char *
_cpp_unaligned_alloc(pfile,len)2139 _cpp_unaligned_alloc (pfile, len)
2140      cpp_reader *pfile;
2141      size_t len;
2142 {
2143   _cpp_buff *buff = pfile->u_buff;
2144   unsigned char *result = buff->cur;
2145 
2146   if (len > (size_t) (buff->limit - result))
2147     {
2148       buff = _cpp_get_buff (pfile, len);
2149       buff->next = pfile->u_buff;
2150       pfile->u_buff = buff;
2151       result = buff->cur;
2152     }
2153 
2154   buff->cur = result + len;
2155   return result;
2156 }
2157 
2158 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2159    That buffer is used for growing allocations when saving macro
2160    replacement lists in a #define, and when parsing an answer to an
2161    assertion in #assert, #unassert or #if (and therefore possibly
2162    whilst expanding macros).  It therefore must not be used by any
2163    code that they might call: specifically the lexer and the guts of
2164    the macro expander.
2165 
2166    All existing other uses clearly fit this restriction: storing
2167    registered pragmas during initialization.  */
2168 unsigned char *
_cpp_aligned_alloc(pfile,len)2169 _cpp_aligned_alloc (pfile, len)
2170      cpp_reader *pfile;
2171      size_t len;
2172 {
2173   _cpp_buff *buff = pfile->a_buff;
2174   unsigned char *result = buff->cur;
2175 
2176   if (len > (size_t) (buff->limit - result))
2177     {
2178       buff = _cpp_get_buff (pfile, len);
2179       buff->next = pfile->a_buff;
2180       pfile->a_buff = buff;
2181       result = buff->cur;
2182     }
2183 
2184   buff->cur = result + len;
2185   return result;
2186 }
2187