xref: /openbsd/gnu/gcc/libcpp/lex.c (revision 898184e3)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7 
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26 
27 enum spell_type
28 {
29   SPELL_OPERATOR = 0,
30   SPELL_IDENT,
31   SPELL_LITERAL,
32   SPELL_NONE
33 };
34 
35 struct token_spelling
36 {
37   enum spell_type category;
38   const unsigned char *name;
39 };
40 
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43 
44 #define OP(e, s) { SPELL_OPERATOR, U s  },
45 #define TK(e, s) { SPELL_ ## s,    U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49 
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52 
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 			    unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
63 
64 static _cpp_buff *new_buff (size_t);
65 
66 
67 /* Utility routine:
68 
69    Compares, the token TOKEN to the NUL-terminated string STRING.
70    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
71 int
72 cpp_ideq (const cpp_token *token, const char *string)
73 {
74   if (token->type != CPP_NAME)
75     return 0;
76 
77   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78 }
79 
80 /* Record a note TYPE at byte POS into the current cleaned logical
81    line.  */
82 static void
83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84 {
85   if (buffer->notes_used == buffer->notes_cap)
86     {
87       buffer->notes_cap = buffer->notes_cap * 2 + 200;
88       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89                                   buffer->notes_cap);
90     }
91 
92   buffer->notes[buffer->notes_used].pos = pos;
93   buffer->notes[buffer->notes_used].type = type;
94   buffer->notes_used++;
95 }
96 
97 /* Returns with a logical line that contains no escaped newlines or
98    trigraphs.  This is a time-critical inner loop.  */
99 void
100 _cpp_clean_line (cpp_reader *pfile)
101 {
102   cpp_buffer *buffer;
103   const uchar *s;
104   uchar c, *d, *p;
105 
106   buffer = pfile->buffer;
107   buffer->cur_note = buffer->notes_used = 0;
108   buffer->cur = buffer->line_base = buffer->next_line;
109   buffer->need_line = false;
110   s = buffer->next_line - 1;
111 
112   if (!buffer->from_stage3)
113     {
114       /* Short circuit for the common case of an un-escaped line with
115 	 no trigraphs.  The primary win here is by not writing any
116 	 data back to memory until we have to.  */
117       for (;;)
118 	{
119 	  c = *++s;
120 	  if (c == '\n' || c == '\r')
121 	    {
122 	      d = (uchar *) s;
123 
124 	      if (s == buffer->rlimit)
125 		goto done;
126 
127 	      /* DOS line ending? */
128 	      if (c == '\r' && s[1] == '\n')
129 		s++;
130 
131 	      if (s == buffer->rlimit)
132 		goto done;
133 
134 	      /* check for escaped newline */
135 	      p = d;
136 	      while (p != buffer->next_line && is_nvspace (p[-1]))
137 		p--;
138 	      if (p == buffer->next_line || p[-1] != '\\')
139 		goto done;
140 
141 	      /* Have an escaped newline; process it and proceed to
142 		 the slow path.  */
143 	      add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144 	      d = p - 2;
145 	      buffer->next_line = p - 1;
146 	      break;
147 	    }
148 	  if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149 	    {
150 	      /* Have a trigraph.  We may or may not have to convert
151 		 it.  Add a line note regardless, for -Wtrigraphs.  */
152 	      add_line_note (buffer, s, s[2]);
153 	      if (CPP_OPTION (pfile, trigraphs))
154 		{
155 		  /* We do, and that means we have to switch to the
156 		     slow path.  */
157 		  d = (uchar *) s;
158 		  *d = _cpp_trigraph_map[s[2]];
159 		  s += 2;
160 		  break;
161 		}
162 	    }
163 	}
164 
165 
166       for (;;)
167 	{
168 	  c = *++s;
169 	  *++d = c;
170 
171 	  if (c == '\n' || c == '\r')
172 	    {
173 		  /* Handle DOS line endings.  */
174 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175 		s++;
176 	      if (s == buffer->rlimit)
177 		break;
178 
179 	      /* Escaped?  */
180 	      p = d;
181 	      while (p != buffer->next_line && is_nvspace (p[-1]))
182 		p--;
183 	      if (p == buffer->next_line || p[-1] != '\\')
184 		break;
185 
186 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187 	      d = p - 2;
188 	      buffer->next_line = p - 1;
189 	    }
190 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191 	    {
192 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
193 	      add_line_note (buffer, d, s[2]);
194 	      if (CPP_OPTION (pfile, trigraphs))
195 		{
196 		  *d = _cpp_trigraph_map[s[2]];
197 		  s += 2;
198 		}
199 	    }
200 	}
201     }
202   else
203     {
204       do
205 	s++;
206       while (*s != '\n' && *s != '\r');
207       d = (uchar *) s;
208 
209       /* Handle DOS line endings.  */
210       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211 	s++;
212     }
213 
214  done:
215   *d = '\n';
216   /* A sentinel note that should never be processed.  */
217   add_line_note (buffer, d + 1, '\n');
218   buffer->next_line = s + 1;
219 }
220 
221 /* Return true if the trigraph indicated by NOTE should be warned
222    about in a comment.  */
223 static bool
224 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
225 {
226   const uchar *p;
227 
228   /* Within comments we don't warn about trigraphs, unless the
229      trigraph forms an escaped newline, as that may change
230      behavior.  */
231   if (note->type != '/')
232     return false;
233 
234   /* If -trigraphs, then this was an escaped newline iff the next note
235      is coincident.  */
236   if (CPP_OPTION (pfile, trigraphs))
237     return note[1].pos == note->pos;
238 
239   /* Otherwise, see if this forms an escaped newline.  */
240   p = note->pos + 3;
241   while (is_nvspace (*p))
242     p++;
243 
244   /* There might have been escaped newlines between the trigraph and the
245      newline we found.  Hence the position test.  */
246   return (*p == '\n' && p < note[1].pos);
247 }
248 
249 /* Process the notes created by add_line_note as far as the current
250    location.  */
251 void
252 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
253 {
254   cpp_buffer *buffer = pfile->buffer;
255 
256   for (;;)
257     {
258       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259       unsigned int col;
260 
261       if (note->pos > buffer->cur)
262 	break;
263 
264       buffer->cur_note++;
265       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
266 
267       if (note->type == '\\' || note->type == ' ')
268 	{
269 	  if (note->type == ' ' && !in_comment)
270 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
271 				 "backslash and newline separated by space");
272 
273 	  if (buffer->next_line > buffer->rlimit)
274 	    {
275 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
276 				   "backslash-newline at end of file");
277 	      /* Prevent "no newline at end of file" warning.  */
278 	      buffer->next_line = buffer->rlimit;
279 	    }
280 
281 	  buffer->line_base = note->pos;
282 	  CPP_INCREMENT_LINE (pfile, 0);
283 	}
284       else if (_cpp_trigraph_map[note->type])
285 	{
286 	  if (CPP_OPTION (pfile, warn_trigraphs)
287 	      && (!in_comment || warn_in_comment (pfile, note)))
288 	    {
289 	      if (CPP_OPTION (pfile, trigraphs))
290 		cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
291 				     "trigraph ??%c converted to %c",
292 				     note->type,
293 				     (int) _cpp_trigraph_map[note->type]);
294 	      else
295 		{
296 		  cpp_error_with_line
297 		    (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
298 		     "trigraph ??%c ignored, use -trigraphs to enable",
299 		     note->type);
300 		}
301 	    }
302 	}
303       else
304 	abort ();
305     }
306 }
307 
308 /* Skip a C-style block comment.  We find the end of the comment by
309    seeing if an asterisk is before every '/' we encounter.  Returns
310    nonzero if comment terminated by EOF, zero otherwise.
311 
312    Buffer->cur points to the initial asterisk of the comment.  */
313 bool
314 _cpp_skip_block_comment (cpp_reader *pfile)
315 {
316   cpp_buffer *buffer = pfile->buffer;
317   const uchar *cur = buffer->cur;
318   uchar c;
319 
320   cur++;
321   if (*cur == '/')
322     cur++;
323 
324   for (;;)
325     {
326       /* People like decorating comments with '*', so check for '/'
327 	 instead for efficiency.  */
328       c = *cur++;
329 
330       if (c == '/')
331 	{
332 	  if (cur[-2] == '*')
333 	    break;
334 
335 	  /* Warn about potential nested comments, but not if the '/'
336 	     comes immediately before the true comment delimiter.
337 	     Don't bother to get it right across escaped newlines.  */
338 	  if (CPP_OPTION (pfile, warn_comments)
339 	      && cur[0] == '*' && cur[1] != '/')
340 	    {
341 	      buffer->cur = cur;
342 	      cpp_error_with_line (pfile, CPP_DL_WARNING,
343 				   pfile->line_table->highest_line, CPP_BUF_COL (buffer),
344 				   "\"/*\" within comment");
345 	    }
346 	}
347       else if (c == '\n')
348 	{
349 	  unsigned int cols;
350 	  buffer->cur = cur - 1;
351 	  _cpp_process_line_notes (pfile, true);
352 	  if (buffer->next_line >= buffer->rlimit)
353 	    return true;
354 	  _cpp_clean_line (pfile);
355 
356 	  cols = buffer->next_line - buffer->line_base;
357 	  CPP_INCREMENT_LINE (pfile, cols);
358 
359 	  cur = buffer->cur;
360 	}
361     }
362 
363   buffer->cur = cur;
364   _cpp_process_line_notes (pfile, true);
365   return false;
366 }
367 
368 /* Skip a C++ line comment, leaving buffer->cur pointing to the
369    terminating newline.  Handles escaped newlines.  Returns nonzero
370    if a multiline comment.  */
371 static int
372 skip_line_comment (cpp_reader *pfile)
373 {
374   cpp_buffer *buffer = pfile->buffer;
375   unsigned int orig_line = pfile->line_table->highest_line;
376 
377   while (*buffer->cur != '\n')
378     buffer->cur++;
379 
380   _cpp_process_line_notes (pfile, true);
381   return orig_line != pfile->line_table->highest_line;
382 }
383 
384 /* Skips whitespace, saving the next non-whitespace character.  */
385 static void
386 skip_whitespace (cpp_reader *pfile, cppchar_t c)
387 {
388   cpp_buffer *buffer = pfile->buffer;
389   bool saw_NUL = false;
390 
391   do
392     {
393       /* Horizontal space always OK.  */
394       if (c == ' ' || c == '\t')
395 	;
396       /* Just \f \v or \0 left.  */
397       else if (c == '\0')
398 	saw_NUL = true;
399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
401 			     CPP_BUF_COL (buffer),
402 			     "%s in preprocessing directive",
403 			     c == '\f' ? "form feed" : "vertical tab");
404 
405       c = *buffer->cur++;
406     }
407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408   while (is_nvspace (c));
409 
410   if (saw_NUL)
411     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
412 
413   buffer->cur--;
414 }
415 
416 /* See if the characters of a number token are valid in a name (no
417    '.', '+' or '-').  */
418 static int
419 name_p (cpp_reader *pfile, const cpp_string *string)
420 {
421   unsigned int i;
422 
423   for (i = 0; i < string->len; i++)
424     if (!is_idchar (string->text[i]))
425       return 0;
426 
427   return 1;
428 }
429 
430 /* After parsing an identifier or other sequence, produce a warning about
431    sequences not in NFC/NFKC.  */
432 static void
433 warn_about_normalization (cpp_reader *pfile,
434 			  const cpp_token *token,
435 			  const struct normalize_state *s)
436 {
437   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438       && !pfile->state.skipping)
439     {
440       /* Make sure that the token is printed using UCNs, even
441 	 if we'd otherwise happily print UTF-8.  */
442       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
443       size_t sz;
444 
445       sz = cpp_spell_token (pfile, token, buf, false) - buf;
446       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447 	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
448 			     "`%.*s' is not in NFKC", (int) sz, buf);
449       else
450 	cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
451 			     "`%.*s' is not in NFC", (int) sz, buf);
452     }
453 }
454 
455 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
456    an identifier.  FIRST is TRUE if this starts an identifier.  */
457 static bool
458 forms_identifier_p (cpp_reader *pfile, int first,
459 		    struct normalize_state *state)
460 {
461   cpp_buffer *buffer = pfile->buffer;
462 
463   if (*buffer->cur == '$')
464     {
465       if (!CPP_OPTION (pfile, dollars_in_ident))
466 	return false;
467 
468       buffer->cur++;
469       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
470 	{
471 	  CPP_OPTION (pfile, warn_dollars) = 0;
472 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
473 	}
474 
475       return true;
476     }
477 
478   /* Is this a syntactically valid UCN?  */
479   if (CPP_OPTION (pfile, extended_identifiers)
480       && *buffer->cur == '\\'
481       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
482     {
483       buffer->cur += 2;
484       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485 			  state))
486 	return true;
487       buffer->cur -= 2;
488     }
489 
490   return false;
491 }
492 
493 /* Lex an identifier starting at BUFFER->CUR - 1.  */
494 static cpp_hashnode *
495 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496 		struct normalize_state *nst)
497 {
498   cpp_hashnode *result;
499   const uchar *cur;
500   unsigned int len;
501   unsigned int hash = HT_HASHSTEP (0, *base);
502 
503   cur = pfile->buffer->cur;
504   if (! starts_ucn)
505     while (ISIDNUM (*cur))
506       {
507 	hash = HT_HASHSTEP (hash, *cur);
508 	cur++;
509       }
510   pfile->buffer->cur = cur;
511   if (starts_ucn || forms_identifier_p (pfile, false, nst))
512     {
513       /* Slower version for identifiers containing UCNs (or $).  */
514       do {
515 	while (ISIDNUM (*pfile->buffer->cur))
516 	  {
517 	    pfile->buffer->cur++;
518 	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
519 	  }
520       } while (forms_identifier_p (pfile, false, nst));
521       result = _cpp_interpret_identifier (pfile, base,
522 					  pfile->buffer->cur - base);
523     }
524   else
525     {
526       len = cur - base;
527       hash = HT_HASHFINISH (hash, len);
528 
529       result = (cpp_hashnode *)
530 	ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531     }
532 
533   /* Rarely, identifiers require diagnostics when lexed.  */
534   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535 			&& !pfile->state.skipping, 0))
536     {
537       /* It is allowed to poison the same identifier twice.  */
538       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
539 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
540 		   NODE_NAME (result));
541 
542       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543 	 replacement list of a variadic macro.  */
544       if (result == pfile->spec_nodes.n__VA_ARGS__
545 	  && !pfile->state.va_args_ok)
546 	cpp_error (pfile, CPP_DL_PEDWARN,
547 		   "__VA_ARGS__ can only appear in the expansion"
548 		   " of a C99 variadic macro");
549     }
550 
551   return result;
552 }
553 
554 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
555 static void
556 lex_number (cpp_reader *pfile, cpp_string *number,
557 	    struct normalize_state *nst)
558 {
559   const uchar *cur;
560   const uchar *base;
561   uchar *dest;
562 
563   base = pfile->buffer->cur - 1;
564   do
565     {
566       cur = pfile->buffer->cur;
567 
568       /* N.B. ISIDNUM does not include $.  */
569       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
570 	{
571 	  cur++;
572 	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
573 	}
574 
575       pfile->buffer->cur = cur;
576     }
577   while (forms_identifier_p (pfile, false, nst));
578 
579   number->len = cur - base;
580   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581   memcpy (dest, base, number->len);
582   dest[number->len] = '\0';
583   number->text = dest;
584 }
585 
586 /* Create a token of type TYPE with a literal spelling.  */
587 static void
588 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589 		unsigned int len, enum cpp_ttype type)
590 {
591   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
592 
593   memcpy (dest, base, len);
594   dest[len] = '\0';
595   token->type = type;
596   token->val.str.len = len;
597   token->val.str.text = dest;
598 }
599 
600 /* Lexes a string, character constant, or angle-bracketed header file
601    name.  The stored string contains the spelling, including opening
602    quote and leading any leading 'L'.  It returns the type of the
603    literal, or CPP_OTHER if it was not properly terminated.
604 
605    The spelling is NUL-terminated, but it is not guaranteed that this
606    is the first NUL since embedded NULs are preserved.  */
607 static void
608 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
609 {
610   bool saw_NUL = false;
611   const uchar *cur;
612   cppchar_t terminator;
613   enum cpp_ttype type;
614 
615   cur = base;
616   terminator = *cur++;
617   if (terminator == 'L')
618     terminator = *cur++;
619   if (terminator == '\"')
620     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621   else if (terminator == '\'')
622     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623   else
624     terminator = '>', type = CPP_HEADER_NAME;
625 
626   for (;;)
627     {
628       cppchar_t c = *cur++;
629 
630       /* In #include-style directives, terminators are not escapable.  */
631       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632 	cur++;
633       else if (c == terminator)
634 	break;
635       else if (c == '\n')
636 	{
637 	  cur--;
638 	  type = CPP_OTHER;
639 	  break;
640 	}
641       else if (c == '\0')
642 	saw_NUL = true;
643     }
644 
645   if (saw_NUL && !pfile->state.skipping)
646     cpp_error (pfile, CPP_DL_WARNING,
647 	       "null character(s) preserved in literal");
648 
649   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
650     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
651 	       (int) terminator);
652 
653   pfile->buffer->cur = cur;
654   create_literal (pfile, token, base, cur - base, type);
655 }
656 
657 /* The stored comment includes the comment start and any terminator.  */
658 static void
659 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
660 	      cppchar_t type)
661 {
662   unsigned char *buffer;
663   unsigned int len, clen;
664 
665   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
666 
667   /* C++ comments probably (not definitely) have moved past a new
668      line, which we don't want to save in the comment.  */
669   if (is_vspace (pfile->buffer->cur[-1]))
670     len--;
671 
672   /* If we are currently in a directive, then we need to store all
673      C++ comments as C comments internally, and so we need to
674      allocate a little extra space in that case.
675 
676      Note that the only time we encounter a directive here is
677      when we are saving comments in a "#define".  */
678   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
679 
680   buffer = _cpp_unaligned_alloc (pfile, clen);
681 
682   token->type = CPP_COMMENT;
683   token->val.str.len = clen;
684   token->val.str.text = buffer;
685 
686   buffer[0] = '/';
687   memcpy (buffer + 1, from, len - 1);
688 
689   /* Finish conversion to a C comment, if necessary.  */
690   if (pfile->state.in_directive && type == '/')
691     {
692       buffer[1] = '*';
693       buffer[clen - 2] = '*';
694       buffer[clen - 1] = '/';
695     }
696 }
697 
698 /* Allocate COUNT tokens for RUN.  */
699 void
700 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
701 {
702   run->base = XNEWVEC (cpp_token, count);
703   run->limit = run->base + count;
704   run->next = NULL;
705 }
706 
707 /* Returns the next tokenrun, or creates one if there is none.  */
708 static tokenrun *
709 next_tokenrun (tokenrun *run)
710 {
711   if (run->next == NULL)
712     {
713       run->next = XNEW (tokenrun);
714       run->next->prev = run;
715       _cpp_init_tokenrun (run->next, 250);
716     }
717 
718   return run->next;
719 }
720 
721 /* Allocate a single token that is invalidated at the same time as the
722    rest of the tokens on the line.  Has its line and col set to the
723    same as the last lexed token, so that diagnostics appear in the
724    right place.  */
725 cpp_token *
726 _cpp_temp_token (cpp_reader *pfile)
727 {
728   cpp_token *old, *result;
729 
730   old = pfile->cur_token - 1;
731   if (pfile->cur_token == pfile->cur_run->limit)
732     {
733       pfile->cur_run = next_tokenrun (pfile->cur_run);
734       pfile->cur_token = pfile->cur_run->base;
735     }
736 
737   result = pfile->cur_token++;
738   result->src_loc = old->src_loc;
739   return result;
740 }
741 
742 /* Lex a token into RESULT (external interface).  Takes care of issues
743    like directive handling, token lookahead, multiple include
744    optimization and skipping.  */
745 const cpp_token *
746 _cpp_lex_token (cpp_reader *pfile)
747 {
748   cpp_token *result;
749 
750   for (;;)
751     {
752       if (pfile->cur_token == pfile->cur_run->limit)
753 	{
754 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
755 	  pfile->cur_token = pfile->cur_run->base;
756 	}
757 
758       if (pfile->lookaheads)
759 	{
760 	  pfile->lookaheads--;
761 	  result = pfile->cur_token++;
762 	}
763       else
764 	result = _cpp_lex_direct (pfile);
765 
766       if (result->flags & BOL)
767 	{
768 	  /* Is this a directive.  If _cpp_handle_directive returns
769 	     false, it is an assembler #.  */
770 	  if (result->type == CPP_HASH
771 	      /* 6.10.3 p 11: Directives in a list of macro arguments
772 		 gives undefined behavior.  This implementation
773 		 handles the directive as normal.  */
774 	      && pfile->state.parsing_args != 1)
775 	    {
776 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
777 		{
778 		  if (pfile->directive_result.type == CPP_PADDING)
779 		    continue;
780 		  result = &pfile->directive_result;
781 		}
782 	    }
783 	  else if (pfile->state.in_deferred_pragma)
784 	    result = &pfile->directive_result;
785 
786 	  if (pfile->cb.line_change && !pfile->state.skipping)
787 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
788 	}
789 
790       /* We don't skip tokens in directives.  */
791       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
792 	break;
793 
794       /* Outside a directive, invalidate controlling macros.  At file
795 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
796 	 get here and MI optimization works.  */
797       pfile->mi_valid = false;
798 
799       if (!pfile->state.skipping || result->type == CPP_EOF)
800 	break;
801     }
802 
803   return result;
804 }
805 
806 /* Returns true if a fresh line has been loaded.  */
807 bool
808 _cpp_get_fresh_line (cpp_reader *pfile)
809 {
810   int return_at_eof;
811 
812   /* We can't get a new line until we leave the current directive.  */
813   if (pfile->state.in_directive)
814     return false;
815 
816   for (;;)
817     {
818       cpp_buffer *buffer = pfile->buffer;
819 
820       if (!buffer->need_line)
821 	return true;
822 
823       if (buffer->next_line < buffer->rlimit)
824 	{
825 	  _cpp_clean_line (pfile);
826 	  return true;
827 	}
828 
829       /* First, get out of parsing arguments state.  */
830       if (pfile->state.parsing_args)
831 	return false;
832 
833       /* End of buffer.  Non-empty files should end in a newline.  */
834       if (buffer->buf != buffer->rlimit
835 	  && buffer->next_line > buffer->rlimit
836 	  && !buffer->from_stage3)
837 	{
838 	  /* Only warn once.  */
839 	  buffer->next_line = buffer->rlimit;
840 	  cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
841 			       CPP_BUF_COLUMN (buffer, buffer->cur),
842 			       "no newline at end of file");
843 	}
844 
845       return_at_eof = buffer->return_at_eof;
846       _cpp_pop_buffer (pfile);
847       if (pfile->buffer == NULL || return_at_eof)
848 	return false;
849     }
850 }
851 
852 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
853   do							\
854     {							\
855       result->type = ELSE_TYPE;				\
856       if (*buffer->cur == CHAR)				\
857 	buffer->cur++, result->type = THEN_TYPE;	\
858     }							\
859   while (0)
860 
861 /* Lex a token into pfile->cur_token, which is also incremented, to
862    get diagnostics pointing to the correct location.
863 
864    Does not handle issues such as token lookahead, multiple-include
865    optimization, directives, skipping etc.  This function is only
866    suitable for use by _cpp_lex_token, and in special cases like
867    lex_expansion_token which doesn't care for any of these issues.
868 
869    When meeting a newline, returns CPP_EOF if parsing a directive,
870    otherwise returns to the start of the token buffer if permissible.
871    Returns the location of the lexed token.  */
872 cpp_token *
873 _cpp_lex_direct (cpp_reader *pfile)
874 {
875   cppchar_t c;
876   cpp_buffer *buffer;
877   const unsigned char *comment_start;
878   cpp_token *result = pfile->cur_token++;
879 
880  fresh_line:
881   result->flags = 0;
882   buffer = pfile->buffer;
883   if (buffer->need_line)
884     {
885       if (pfile->state.in_deferred_pragma)
886 	{
887 	  result->type = CPP_PRAGMA_EOL;
888 	  pfile->state.in_deferred_pragma = false;
889 	  if (!pfile->state.pragma_allow_expansion)
890 	    pfile->state.prevent_expansion--;
891 	  return result;
892 	}
893       if (!_cpp_get_fresh_line (pfile))
894 	{
895 	  result->type = CPP_EOF;
896 	  if (!pfile->state.in_directive)
897 	    {
898 	      /* Tell the compiler the line number of the EOF token.  */
899 	      result->src_loc = pfile->line_table->highest_line;
900 	      result->flags = BOL;
901 	    }
902 	  return result;
903 	}
904       if (!pfile->keep_tokens)
905 	{
906 	  pfile->cur_run = &pfile->base_run;
907 	  result = pfile->base_run.base;
908 	  pfile->cur_token = result + 1;
909 	}
910       result->flags = BOL;
911       if (pfile->state.parsing_args == 2)
912 	result->flags |= PREV_WHITE;
913     }
914   buffer = pfile->buffer;
915  update_tokens_line:
916   result->src_loc = pfile->line_table->highest_line;
917 
918  skipped_white:
919   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
920       && !pfile->overlaid_buffer)
921     {
922       _cpp_process_line_notes (pfile, false);
923       result->src_loc = pfile->line_table->highest_line;
924     }
925   c = *buffer->cur++;
926 
927   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
928 			       CPP_BUF_COLUMN (buffer, buffer->cur));
929 
930   switch (c)
931     {
932     case ' ': case '\t': case '\f': case '\v': case '\0':
933       result->flags |= PREV_WHITE;
934       skip_whitespace (pfile, c);
935       goto skipped_white;
936 
937     case '\n':
938       if (buffer->cur < buffer->rlimit)
939 	CPP_INCREMENT_LINE (pfile, 0);
940       buffer->need_line = true;
941       goto fresh_line;
942 
943     case '0': case '1': case '2': case '3': case '4':
944     case '5': case '6': case '7': case '8': case '9':
945       {
946 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
947 	result->type = CPP_NUMBER;
948 	lex_number (pfile, &result->val.str, &nst);
949 	warn_about_normalization (pfile, result, &nst);
950 	break;
951       }
952 
953     case 'L':
954       /* 'L' may introduce wide characters or strings.  */
955       if (*buffer->cur == '\'' || *buffer->cur == '"')
956 	{
957 	  lex_string (pfile, result, buffer->cur - 1);
958 	  break;
959 	}
960       /* Fall through.  */
961 
962     case '_':
963     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
964     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
965     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
966     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
967     case 'y': case 'z':
968     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
969     case 'G': case 'H': case 'I': case 'J': case 'K':
970     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
971     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
972     case 'Y': case 'Z':
973       result->type = CPP_NAME;
974       {
975 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
976 	result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
977 					   &nst);
978 	warn_about_normalization (pfile, result, &nst);
979       }
980 
981       /* Convert named operators to their proper types.  */
982       if (result->val.node->flags & NODE_OPERATOR)
983 	{
984 	  result->flags |= NAMED_OP;
985 	  result->type = (enum cpp_ttype) result->val.node->directive_index;
986 	}
987       break;
988 
989     case '\'':
990     case '"':
991       lex_string (pfile, result, buffer->cur - 1);
992       break;
993 
994     case '/':
995       /* A potential block or line comment.  */
996       comment_start = buffer->cur;
997       c = *buffer->cur;
998 
999       if (c == '*')
1000 	{
1001 	  if (_cpp_skip_block_comment (pfile))
1002 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1003 	}
1004       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1005 			    || cpp_in_system_header (pfile)))
1006 	{
1007 	  /* Warn about comments only if pedantically GNUC89, and not
1008 	     in system headers.  */
1009 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1010 	      && ! buffer->warned_cplusplus_comments)
1011 	    {
1012 	      cpp_error (pfile, CPP_DL_PEDWARN,
1013 			 "C++ style comments are not allowed in ISO C90");
1014 	      cpp_error (pfile, CPP_DL_PEDWARN,
1015 			 "(this will be reported only once per input file)");
1016 	      buffer->warned_cplusplus_comments = 1;
1017 	    }
1018 
1019 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1020 	    cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1021 	}
1022       else if (c == '=')
1023 	{
1024 	  buffer->cur++;
1025 	  result->type = CPP_DIV_EQ;
1026 	  break;
1027 	}
1028       else
1029 	{
1030 	  result->type = CPP_DIV;
1031 	  break;
1032 	}
1033 
1034       if (!pfile->state.save_comments)
1035 	{
1036 	  result->flags |= PREV_WHITE;
1037 	  goto update_tokens_line;
1038 	}
1039 
1040       /* Save the comment as a token in its own right.  */
1041       save_comment (pfile, result, comment_start, c);
1042       break;
1043 
1044     case '<':
1045       if (pfile->state.angled_headers)
1046 	{
1047 	  lex_string (pfile, result, buffer->cur - 1);
1048 	  break;
1049 	}
1050 
1051       result->type = CPP_LESS;
1052       if (*buffer->cur == '=')
1053 	buffer->cur++, result->type = CPP_LESS_EQ;
1054       else if (*buffer->cur == '<')
1055 	{
1056 	  buffer->cur++;
1057 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1058 	}
1059       else if (CPP_OPTION (pfile, digraphs))
1060 	{
1061 	  if (*buffer->cur == ':')
1062 	    {
1063 	      buffer->cur++;
1064 	      result->flags |= DIGRAPH;
1065 	      result->type = CPP_OPEN_SQUARE;
1066 	    }
1067 	  else if (*buffer->cur == '%')
1068 	    {
1069 	      buffer->cur++;
1070 	      result->flags |= DIGRAPH;
1071 	      result->type = CPP_OPEN_BRACE;
1072 	    }
1073 	}
1074       break;
1075 
1076     case '>':
1077       result->type = CPP_GREATER;
1078       if (*buffer->cur == '=')
1079 	buffer->cur++, result->type = CPP_GREATER_EQ;
1080       else if (*buffer->cur == '>')
1081 	{
1082 	  buffer->cur++;
1083 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1084 	}
1085       break;
1086 
1087     case '%':
1088       result->type = CPP_MOD;
1089       if (*buffer->cur == '=')
1090 	buffer->cur++, result->type = CPP_MOD_EQ;
1091       else if (CPP_OPTION (pfile, digraphs))
1092 	{
1093 	  if (*buffer->cur == ':')
1094 	    {
1095 	      buffer->cur++;
1096 	      result->flags |= DIGRAPH;
1097 	      result->type = CPP_HASH;
1098 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1099 		buffer->cur += 2, result->type = CPP_PASTE;
1100 	    }
1101 	  else if (*buffer->cur == '>')
1102 	    {
1103 	      buffer->cur++;
1104 	      result->flags |= DIGRAPH;
1105 	      result->type = CPP_CLOSE_BRACE;
1106 	    }
1107 	}
1108       break;
1109 
1110     case '.':
1111       result->type = CPP_DOT;
1112       if (ISDIGIT (*buffer->cur))
1113 	{
1114 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1115 	  result->type = CPP_NUMBER;
1116 	  lex_number (pfile, &result->val.str, &nst);
1117 	  warn_about_normalization (pfile, result, &nst);
1118 	}
1119       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1120 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1121       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1122 	buffer->cur++, result->type = CPP_DOT_STAR;
1123       break;
1124 
1125     case '+':
1126       result->type = CPP_PLUS;
1127       if (*buffer->cur == '+')
1128 	buffer->cur++, result->type = CPP_PLUS_PLUS;
1129       else if (*buffer->cur == '=')
1130 	buffer->cur++, result->type = CPP_PLUS_EQ;
1131       break;
1132 
1133     case '-':
1134       result->type = CPP_MINUS;
1135       if (*buffer->cur == '>')
1136 	{
1137 	  buffer->cur++;
1138 	  result->type = CPP_DEREF;
1139 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1140 	    buffer->cur++, result->type = CPP_DEREF_STAR;
1141 	}
1142       else if (*buffer->cur == '-')
1143 	buffer->cur++, result->type = CPP_MINUS_MINUS;
1144       else if (*buffer->cur == '=')
1145 	buffer->cur++, result->type = CPP_MINUS_EQ;
1146       break;
1147 
1148     case '&':
1149       result->type = CPP_AND;
1150       if (*buffer->cur == '&')
1151 	buffer->cur++, result->type = CPP_AND_AND;
1152       else if (*buffer->cur == '=')
1153 	buffer->cur++, result->type = CPP_AND_EQ;
1154       break;
1155 
1156     case '|':
1157       result->type = CPP_OR;
1158       if (*buffer->cur == '|')
1159 	buffer->cur++, result->type = CPP_OR_OR;
1160       else if (*buffer->cur == '=')
1161 	buffer->cur++, result->type = CPP_OR_EQ;
1162       break;
1163 
1164     case ':':
1165       result->type = CPP_COLON;
1166       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1167 	buffer->cur++, result->type = CPP_SCOPE;
1168       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1169 	{
1170 	  buffer->cur++;
1171 	  result->flags |= DIGRAPH;
1172 	  result->type = CPP_CLOSE_SQUARE;
1173 	}
1174       break;
1175 
1176     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1177     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1178     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1179     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1180     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1181 
1182     case '?': result->type = CPP_QUERY; break;
1183     case '~': result->type = CPP_COMPL; break;
1184     case ',': result->type = CPP_COMMA; break;
1185     case '(': result->type = CPP_OPEN_PAREN; break;
1186     case ')': result->type = CPP_CLOSE_PAREN; break;
1187     case '[': result->type = CPP_OPEN_SQUARE; break;
1188     case ']': result->type = CPP_CLOSE_SQUARE; break;
1189     case '{': result->type = CPP_OPEN_BRACE; break;
1190     case '}': result->type = CPP_CLOSE_BRACE; break;
1191     case ';': result->type = CPP_SEMICOLON; break;
1192 
1193       /* @ is a punctuator in Objective-C.  */
1194     case '@': result->type = CPP_ATSIGN; break;
1195 
1196     case '$':
1197     case '\\':
1198       {
1199 	const uchar *base = --buffer->cur;
1200 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1201 
1202 	if (forms_identifier_p (pfile, true, &nst))
1203 	  {
1204 	    result->type = CPP_NAME;
1205 	    result->val.node = lex_identifier (pfile, base, true, &nst);
1206 	    warn_about_normalization (pfile, result, &nst);
1207 	    break;
1208 	  }
1209 	buffer->cur++;
1210       }
1211 
1212     default:
1213       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1214       break;
1215     }
1216 
1217   return result;
1218 }
1219 
1220 /* An upper bound on the number of bytes needed to spell TOKEN.
1221    Does not include preceding whitespace.  */
1222 unsigned int
1223 cpp_token_len (const cpp_token *token)
1224 {
1225   unsigned int len;
1226 
1227   switch (TOKEN_SPELL (token))
1228     {
1229     default:		len = 4;				break;
1230     case SPELL_LITERAL:	len = token->val.str.len;		break;
1231     case SPELL_IDENT:	len = NODE_LEN (token->val.node) * 10;	break;
1232     }
1233 
1234   return len;
1235 }
1236 
1237 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1238    Return the number of bytes read out of NAME.  (There are always
1239    10 bytes written to BUFFER.)  */
1240 
1241 static size_t
1242 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1243 {
1244   int j;
1245   int ucn_len = 0;
1246   int ucn_len_c;
1247   unsigned t;
1248   unsigned long utf32;
1249 
1250   /* Compute the length of the UTF-8 sequence.  */
1251   for (t = *name; t & 0x80; t <<= 1)
1252     ucn_len++;
1253 
1254   utf32 = *name & (0x7F >> ucn_len);
1255   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1256     {
1257       utf32 = (utf32 << 6) | (*++name & 0x3F);
1258 
1259       /* Ill-formed UTF-8.  */
1260       if ((*name & ~0x3F) != 0x80)
1261 	abort ();
1262     }
1263 
1264   *buffer++ = '\\';
1265   *buffer++ = 'U';
1266   for (j = 7; j >= 0; j--)
1267     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1268   return ucn_len;
1269 }
1270 
1271 
1272 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1273    already contain the enough space to hold the token's spelling.
1274    Returns a pointer to the character after the last character written.
1275    FORSTRING is true if this is to be the spelling after translation
1276    phase 1 (this is different for UCNs).
1277    FIXME: Would be nice if we didn't need the PFILE argument.  */
1278 unsigned char *
1279 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1280 		 unsigned char *buffer, bool forstring)
1281 {
1282   switch (TOKEN_SPELL (token))
1283     {
1284     case SPELL_OPERATOR:
1285       {
1286 	const unsigned char *spelling;
1287 	unsigned char c;
1288 
1289 	if (token->flags & DIGRAPH)
1290 	  spelling
1291 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1292 	else if (token->flags & NAMED_OP)
1293 	  goto spell_ident;
1294 	else
1295 	  spelling = TOKEN_NAME (token);
1296 
1297 	while ((c = *spelling++) != '\0')
1298 	  *buffer++ = c;
1299       }
1300       break;
1301 
1302     spell_ident:
1303     case SPELL_IDENT:
1304       if (forstring)
1305 	{
1306 	  memcpy (buffer, NODE_NAME (token->val.node),
1307 		  NODE_LEN (token->val.node));
1308 	  buffer += NODE_LEN (token->val.node);
1309 	}
1310       else
1311 	{
1312 	  size_t i;
1313 	  const unsigned char * name = NODE_NAME (token->val.node);
1314 
1315 	  for (i = 0; i < NODE_LEN (token->val.node); i++)
1316 	    if (name[i] & ~0x7F)
1317 	      {
1318 		i += utf8_to_ucn (buffer, name + i) - 1;
1319 		buffer += 10;
1320 	      }
1321 	    else
1322 	      *buffer++ = NODE_NAME (token->val.node)[i];
1323 	}
1324       break;
1325 
1326     case SPELL_LITERAL:
1327       memcpy (buffer, token->val.str.text, token->val.str.len);
1328       buffer += token->val.str.len;
1329       break;
1330 
1331     case SPELL_NONE:
1332       cpp_error (pfile, CPP_DL_ICE,
1333 		 "unspellable token %s", TOKEN_NAME (token));
1334       break;
1335     }
1336 
1337   return buffer;
1338 }
1339 
1340 /* Returns TOKEN spelt as a null-terminated string.  The string is
1341    freed when the reader is destroyed.  Useful for diagnostics.  */
1342 unsigned char *
1343 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1344 {
1345   unsigned int len = cpp_token_len (token) + 1;
1346   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1347 
1348   end = cpp_spell_token (pfile, token, start, false);
1349   end[0] = '\0';
1350 
1351   return start;
1352 }
1353 
1354 /* Used by C front ends, which really should move to using
1355    cpp_token_as_text.  */
1356 const char *
1357 cpp_type2name (enum cpp_ttype type)
1358 {
1359   return (const char *) token_spellings[type].name;
1360 }
1361 
1362 /* Writes the spelling of token to FP, without any preceding space.
1363    Separated from cpp_spell_token for efficiency - to avoid stdio
1364    double-buffering.  */
1365 void
1366 cpp_output_token (const cpp_token *token, FILE *fp)
1367 {
1368   switch (TOKEN_SPELL (token))
1369     {
1370     case SPELL_OPERATOR:
1371       {
1372 	const unsigned char *spelling;
1373 	int c;
1374 
1375 	if (token->flags & DIGRAPH)
1376 	  spelling
1377 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1378 	else if (token->flags & NAMED_OP)
1379 	  goto spell_ident;
1380 	else
1381 	  spelling = TOKEN_NAME (token);
1382 
1383 	c = *spelling;
1384 	do
1385 	  putc (c, fp);
1386 	while ((c = *++spelling) != '\0');
1387       }
1388       break;
1389 
1390     spell_ident:
1391     case SPELL_IDENT:
1392       {
1393 	size_t i;
1394 	const unsigned char * name = NODE_NAME (token->val.node);
1395 
1396 	for (i = 0; i < NODE_LEN (token->val.node); i++)
1397 	  if (name[i] & ~0x7F)
1398 	    {
1399 	      unsigned char buffer[10];
1400 	      i += utf8_to_ucn (buffer, name + i) - 1;
1401 	      fwrite (buffer, 1, 10, fp);
1402 	    }
1403 	  else
1404 	    fputc (NODE_NAME (token->val.node)[i], fp);
1405       }
1406       break;
1407 
1408     case SPELL_LITERAL:
1409       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1410       break;
1411 
1412     case SPELL_NONE:
1413       /* An error, most probably.  */
1414       break;
1415     }
1416 }
1417 
1418 /* Compare two tokens.  */
1419 int
1420 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1421 {
1422   if (a->type == b->type && a->flags == b->flags)
1423     switch (TOKEN_SPELL (a))
1424       {
1425       default:			/* Keep compiler happy.  */
1426       case SPELL_OPERATOR:
1427 	return 1;
1428       case SPELL_NONE:
1429 	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1430       case SPELL_IDENT:
1431 	return a->val.node == b->val.node;
1432       case SPELL_LITERAL:
1433 	return (a->val.str.len == b->val.str.len
1434 		&& !memcmp (a->val.str.text, b->val.str.text,
1435 			    a->val.str.len));
1436       }
1437 
1438   return 0;
1439 }
1440 
1441 /* Returns nonzero if a space should be inserted to avoid an
1442    accidental token paste for output.  For simplicity, it is
1443    conservative, and occasionally advises a space where one is not
1444    needed, e.g. "." and ".2".  */
1445 int
1446 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1447 		 const cpp_token *token2)
1448 {
1449   enum cpp_ttype a = token1->type, b = token2->type;
1450   cppchar_t c;
1451 
1452   if (token1->flags & NAMED_OP)
1453     a = CPP_NAME;
1454   if (token2->flags & NAMED_OP)
1455     b = CPP_NAME;
1456 
1457   c = EOF;
1458   if (token2->flags & DIGRAPH)
1459     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1460   else if (token_spellings[b].category == SPELL_OPERATOR)
1461     c = token_spellings[b].name[0];
1462 
1463   /* Quickly get everything that can paste with an '='.  */
1464   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1465     return 1;
1466 
1467   switch (a)
1468     {
1469     case CPP_GREATER:	return c == '>';
1470     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1471     case CPP_PLUS:	return c == '+';
1472     case CPP_MINUS:	return c == '-' || c == '>';
1473     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1474     case CPP_MOD:	return c == ':' || c == '>';
1475     case CPP_AND:	return c == '&';
1476     case CPP_OR:	return c == '|';
1477     case CPP_COLON:	return c == ':' || c == '>';
1478     case CPP_DEREF:	return c == '*';
1479     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1480     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1481     case CPP_NAME:	return ((b == CPP_NUMBER
1482 				 && name_p (pfile, &token2->val.str))
1483 				|| b == CPP_NAME
1484 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1485     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1486 				|| c == '.' || c == '+' || c == '-');
1487 				      /* UCNs */
1488     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1489 				 && b == CPP_NAME)
1490 				|| (CPP_OPTION (pfile, objc)
1491 				    && token1->val.str.text[0] == '@'
1492 				    && (b == CPP_NAME || b == CPP_STRING)));
1493     default:		break;
1494     }
1495 
1496   return 0;
1497 }
1498 
1499 /* Output all the remaining tokens on the current line, and a newline
1500    character, to FP.  Leading whitespace is removed.  If there are
1501    macros, special token padding is not performed.  */
1502 void
1503 cpp_output_line (cpp_reader *pfile, FILE *fp)
1504 {
1505   const cpp_token *token;
1506 
1507   token = cpp_get_token (pfile);
1508   while (token->type != CPP_EOF)
1509     {
1510       cpp_output_token (token, fp);
1511       token = cpp_get_token (pfile);
1512       if (token->flags & PREV_WHITE)
1513 	putc (' ', fp);
1514     }
1515 
1516   putc ('\n', fp);
1517 }
1518 
1519 /* Memory buffers.  Changing these three constants can have a dramatic
1520    effect on performance.  The values here are reasonable defaults,
1521    but might be tuned.  If you adjust them, be sure to test across a
1522    range of uses of cpplib, including heavy nested function-like macro
1523    expansion.  Also check the change in peak memory usage (NJAMD is a
1524    good tool for this).  */
1525 #define MIN_BUFF_SIZE 8000
1526 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1527 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1528 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1529 
1530 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1531   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1532 #endif
1533 
1534 /* Create a new allocation buffer.  Place the control block at the end
1535    of the buffer, so that buffer overflows will cause immediate chaos.  */
1536 static _cpp_buff *
1537 new_buff (size_t len)
1538 {
1539   _cpp_buff *result;
1540   unsigned char *base;
1541 
1542   if (len < MIN_BUFF_SIZE)
1543     len = MIN_BUFF_SIZE;
1544   len = CPP_ALIGN (len);
1545 
1546   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1547   result = (_cpp_buff *) (base + len);
1548   result->base = base;
1549   result->cur = base;
1550   result->limit = base + len;
1551   result->next = NULL;
1552   return result;
1553 }
1554 
1555 /* Place a chain of unwanted allocation buffers on the free list.  */
1556 void
1557 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1558 {
1559   _cpp_buff *end = buff;
1560 
1561   while (end->next)
1562     end = end->next;
1563   end->next = pfile->free_buffs;
1564   pfile->free_buffs = buff;
1565 }
1566 
1567 /* Return a free buffer of size at least MIN_SIZE.  */
1568 _cpp_buff *
1569 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1570 {
1571   _cpp_buff *result, **p;
1572 
1573   for (p = &pfile->free_buffs;; p = &(*p)->next)
1574     {
1575       size_t size;
1576 
1577       if (*p == NULL)
1578 	return new_buff (min_size);
1579       result = *p;
1580       size = result->limit - result->base;
1581       /* Return a buffer that's big enough, but don't waste one that's
1582          way too big.  */
1583       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1584 	break;
1585     }
1586 
1587   *p = result->next;
1588   result->next = NULL;
1589   result->cur = result->base;
1590   return result;
1591 }
1592 
1593 /* Creates a new buffer with enough space to hold the uncommitted
1594    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1595    the excess bytes to the new buffer.  Chains the new buffer after
1596    BUFF, and returns the new buffer.  */
1597 _cpp_buff *
1598 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1599 {
1600   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1601   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1602 
1603   buff->next = new_buff;
1604   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1605   return new_buff;
1606 }
1607 
1608 /* Creates a new buffer with enough space to hold the uncommitted
1609    remaining bytes of the buffer pointed to by BUFF, and at least
1610    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1611    Chains the new buffer before the buffer pointed to by BUFF, and
1612    updates the pointer to point to the new buffer.  */
1613 void
1614 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1615 {
1616   _cpp_buff *new_buff, *old_buff = *pbuff;
1617   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1618 
1619   new_buff = _cpp_get_buff (pfile, size);
1620   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1621   new_buff->next = old_buff;
1622   *pbuff = new_buff;
1623 }
1624 
1625 /* Free a chain of buffers starting at BUFF.  */
1626 void
1627 _cpp_free_buff (_cpp_buff *buff)
1628 {
1629   _cpp_buff *next;
1630 
1631   for (; buff; buff = next)
1632     {
1633       next = buff->next;
1634       free (buff->base);
1635     }
1636 }
1637 
1638 /* Allocate permanent, unaligned storage of length LEN.  */
1639 unsigned char *
1640 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1641 {
1642   _cpp_buff *buff = pfile->u_buff;
1643   unsigned char *result = buff->cur;
1644 
1645   if (len > (size_t) (buff->limit - result))
1646     {
1647       buff = _cpp_get_buff (pfile, len);
1648       buff->next = pfile->u_buff;
1649       pfile->u_buff = buff;
1650       result = buff->cur;
1651     }
1652 
1653   buff->cur = result + len;
1654   return result;
1655 }
1656 
1657 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1658    That buffer is used for growing allocations when saving macro
1659    replacement lists in a #define, and when parsing an answer to an
1660    assertion in #assert, #unassert or #if (and therefore possibly
1661    whilst expanding macros).  It therefore must not be used by any
1662    code that they might call: specifically the lexer and the guts of
1663    the macro expander.
1664 
1665    All existing other uses clearly fit this restriction: storing
1666    registered pragmas during initialization.  */
1667 unsigned char *
1668 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1669 {
1670   _cpp_buff *buff = pfile->a_buff;
1671   unsigned char *result = buff->cur;
1672 
1673   if (len > (size_t) (buff->limit - result))
1674     {
1675       buff = _cpp_get_buff (pfile, len);
1676       buff->next = pfile->a_buff;
1677       pfile->a_buff = buff;
1678       result = buff->cur;
1679     }
1680 
1681   buff->cur = result + len;
1682   return result;
1683 }
1684 
1685 /* Say which field of TOK is in use.  */
1686 
1687 enum cpp_token_fld_kind
1688 cpp_token_val_index (cpp_token *tok)
1689 {
1690   switch (TOKEN_SPELL (tok))
1691     {
1692     case SPELL_IDENT:
1693       return CPP_TOKEN_FLD_NODE;
1694     case SPELL_LITERAL:
1695       return CPP_TOKEN_FLD_STR;
1696     case SPELL_NONE:
1697       if (tok->type == CPP_MACRO_ARG)
1698 	return CPP_TOKEN_FLD_ARG_NO;
1699       else if (tok->type == CPP_PADDING)
1700 	return CPP_TOKEN_FLD_SOURCE;
1701       else if (tok->type == CPP_PRAGMA)
1702 	return CPP_TOKEN_FLD_PRAGMA;
1703       /* else fall through */
1704     default:
1705       return CPP_TOKEN_FLD_NONE;
1706     }
1707 }
1708