xref: /dragonfly/contrib/gcc-4.7/libcpp/lex.c (revision 78478697)
1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010,
3    2011 Free Software Foundation, Inc.
4    Contributed by Per Bothner, 1994-95.
5    Based on CCCP program by Paul Rubin, June 1986
6    Adapted to ANSI C, Richard Stallman, Jan 1987
7    Broken out to separate file, Zack Weinberg, Mar 2000
8 
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "internal.h"
27 
28 enum spell_type
29 {
30   SPELL_OPERATOR = 0,
31   SPELL_IDENT,
32   SPELL_LITERAL,
33   SPELL_NONE
34 };
35 
36 struct token_spelling
37 {
38   enum spell_type category;
39   const unsigned char *name;
40 };
41 
42 static const unsigned char *const digraph_spellings[] =
43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44 
45 #define OP(e, s) { SPELL_OPERATOR, UC s  },
46 #define TK(e, s) { SPELL_ ## s,    UC #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48 #undef OP
49 #undef TK
50 
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void store_comment (cpp_reader *, cpp_token *);
60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 			    unsigned int, enum cpp_ttype);
62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63 static int name_p (cpp_reader *, const cpp_string *);
64 static tokenrun *next_tokenrun (tokenrun *);
65 
66 static _cpp_buff *new_buff (size_t);
67 
68 
69 /* Utility routine:
70 
71    Compares, the token TOKEN to the NUL-terminated string STRING.
72    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
73 int
74 cpp_ideq (const cpp_token *token, const char *string)
75 {
76   if (token->type != CPP_NAME)
77     return 0;
78 
79   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
80 }
81 
82 /* Record a note TYPE at byte POS into the current cleaned logical
83    line.  */
84 static void
85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 {
87   if (buffer->notes_used == buffer->notes_cap)
88     {
89       buffer->notes_cap = buffer->notes_cap * 2 + 200;
90       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91                                   buffer->notes_cap);
92     }
93 
94   buffer->notes[buffer->notes_used].pos = pos;
95   buffer->notes[buffer->notes_used].type = type;
96   buffer->notes_used++;
97 }
98 
99 
100 /* Fast path to find line special characters using optimized character
101    scanning algorithms.  Anything complicated falls back to the slow
102    path below.  Since this loop is very hot it's worth doing these kinds
103    of optimizations.
104 
105    One of the paths through the ifdefs should provide
106 
107      const uchar *search_line_fast (const uchar *s, const uchar *end);
108 
109    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
110    the found character.
111 
112    Note that the last character of the buffer is *always* a newline,
113    as forced by _cpp_convert_input.  This fact can be used to avoid
114    explicitly looking for the end of the buffer.  */
115 
116 /* Configure gives us an ifdef test.  */
117 #ifndef WORDS_BIGENDIAN
118 #define WORDS_BIGENDIAN 0
119 #endif
120 
121 /* We'd like the largest integer that fits into a register.  There's nothing
122    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
123    but MS decided on an LLP64 model.  Thankfully when building with GCC we
124    can get the "real" word size.  */
125 #ifdef __GNUC__
126 typedef unsigned int word_type __attribute__((__mode__(__word__)));
127 #else
128 typedef unsigned long word_type;
129 #endif
130 
131 /* The code below is only expecting sizes 4 or 8.
132    Die at compile-time if this expectation is violated.  */
133 typedef char check_word_type_size
134   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
135 
136 /* Return X with the first N bytes forced to values that won't match one
137    of the interesting characters.  Note that NUL is not interesting.  */
138 
139 static inline word_type
140 acc_char_mask_misalign (word_type val, unsigned int n)
141 {
142   word_type mask = -1;
143   if (WORDS_BIGENDIAN)
144     mask >>= n * 8;
145   else
146     mask <<= n * 8;
147   return val & mask;
148 }
149 
150 /* Return X replicated to all byte positions within WORD_TYPE.  */
151 
152 static inline word_type
153 acc_char_replicate (uchar x)
154 {
155   word_type ret;
156 
157   ret = (x << 24) | (x << 16) | (x << 8) | x;
158   if (sizeof(word_type) == 8)
159     ret = (ret << 16 << 16) | ret;
160   return ret;
161 }
162 
163 /* Return non-zero if some byte of VAL is (probably) C.  */
164 
165 static inline word_type
166 acc_char_cmp (word_type val, word_type c)
167 {
168 #if defined(__GNUC__) && defined(__alpha__)
169   /* We can get exact results using a compare-bytes instruction.
170      Get (val == c) via (0 >= (val ^ c)).  */
171   return __builtin_alpha_cmpbge (0, val ^ c);
172 #else
173   word_type magic = 0x7efefefeU;
174   if (sizeof(word_type) == 8)
175     magic = (magic << 16 << 16) | 0xfefefefeU;
176   magic |= 1;
177 
178   val ^= c;
179   return ((val + magic) ^ ~val) & ~magic;
180 #endif
181 }
182 
183 /* Given the result of acc_char_cmp is non-zero, return the index of
184    the found character.  If this was a false positive, return -1.  */
185 
186 static inline int
187 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
188 		word_type val ATTRIBUTE_UNUSED)
189 {
190 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
191   /* The cmpbge instruction sets *bits* of the result corresponding to
192      matches in the bytes with no false positives.  */
193   return __builtin_ctzl (cmp);
194 #else
195   unsigned int i;
196 
197   /* ??? It would be nice to force unrolling here,
198      and have all of these constants folded.  */
199   for (i = 0; i < sizeof(word_type); ++i)
200     {
201       uchar c;
202       if (WORDS_BIGENDIAN)
203 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
204       else
205 	c = (val >> i * 8) & 0xff;
206 
207       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
208 	return i;
209     }
210 
211   return -1;
212 #endif
213 }
214 
215 /* A version of the fast scanner using bit fiddling techniques.
216 
217    For 32-bit words, one would normally perform 16 comparisons and
218    16 branches.  With this algorithm one performs 24 arithmetic
219    operations and one branch.  Whether this is faster with a 32-bit
220    word size is going to be somewhat system dependent.
221 
222    For 64-bit words, we eliminate twice the number of comparisons
223    and branches without increasing the number of arithmetic operations.
224    It's almost certainly going to be a win with 64-bit word size.  */
225 
226 static const uchar * search_line_acc_char (const uchar *, const uchar *)
227   ATTRIBUTE_UNUSED;
228 
229 static const uchar *
230 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
231 {
232   const word_type repl_nl = acc_char_replicate ('\n');
233   const word_type repl_cr = acc_char_replicate ('\r');
234   const word_type repl_bs = acc_char_replicate ('\\');
235   const word_type repl_qm = acc_char_replicate ('?');
236 
237   unsigned int misalign;
238   const word_type *p;
239   word_type val, t;
240 
241   /* Align the buffer.  Mask out any bytes from before the beginning.  */
242   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
243   val = *p;
244   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
245   if (misalign)
246     val = acc_char_mask_misalign (val, misalign);
247 
248   /* Main loop.  */
249   while (1)
250     {
251       t  = acc_char_cmp (val, repl_nl);
252       t |= acc_char_cmp (val, repl_cr);
253       t |= acc_char_cmp (val, repl_bs);
254       t |= acc_char_cmp (val, repl_qm);
255 
256       if (__builtin_expect (t != 0, 0))
257 	{
258 	  int i = acc_char_index (t, val);
259 	  if (i >= 0)
260 	    return (const uchar *)p + i;
261 	}
262 
263       val = *++p;
264     }
265 }
266 
267 /* Disable on Solaris 2/x86 until the following problems can be properly
268    autoconfed:
269 
270    The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
271    The Solaris 9 assembler cannot assemble SSE4.2 insns.
272    Before Solaris 9 Update 6, SSE insns cannot be executed.
273    The Solaris 10+ assembler tags objects with the instruction set
274    extensions used, so SSE4.2 executables cannot run on machines that
275    don't support that extension.  */
276 
277 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
278 
279 /* Replicated character data to be shared between implementations.
280    Recall that outside of a context with vector support we can't
281    define compatible vector types, therefore these are all defined
282    in terms of raw characters.  */
283 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
284   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
285     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
286   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
287     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
288   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
289     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
290   { '?', '?', '?', '?', '?', '?', '?', '?',
291     '?', '?', '?', '?', '?', '?', '?', '?' },
292 };
293 
294 /* A version of the fast scanner using MMX vectorized byte compare insns.
295 
296    This uses the PMOVMSKB instruction which was introduced with "MMX2",
297    which was packaged into SSE1; it is also present in the AMD MMX
298    extension.  Mark the function as using "sse" so that we emit a real
299    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
300 
301 static const uchar *
302 #ifndef __SSE__
303 __attribute__((__target__("sse")))
304 #endif
305 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
306 {
307   typedef char v8qi __attribute__ ((__vector_size__ (8)));
308   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
309 
310   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
311   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
312   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
313   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
314 
315   unsigned int misalign, found, mask;
316   const v8qi *p;
317   v8qi data, t, c;
318 
319   /* Align the source pointer.  While MMX doesn't generate unaligned data
320      faults, this allows us to safely scan to the end of the buffer without
321      reading beyond the end of the last page.  */
322   misalign = (uintptr_t)s & 7;
323   p = (const v8qi *)((uintptr_t)s & -8);
324   data = *p;
325 
326   /* Create a mask for the bytes that are valid within the first
327      16-byte block.  The Idea here is that the AND with the mask
328      within the loop is "free", since we need some AND or TEST
329      insn in order to set the flags for the branch anyway.  */
330   mask = -1u << misalign;
331 
332   /* Main loop processing 8 bytes at a time.  */
333   goto start;
334   do
335     {
336       data = *++p;
337       mask = -1;
338 
339     start:
340       t = __builtin_ia32_pcmpeqb(data, repl_nl);
341       c = __builtin_ia32_pcmpeqb(data, repl_cr);
342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343       c = __builtin_ia32_pcmpeqb(data, repl_bs);
344       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345       c = __builtin_ia32_pcmpeqb(data, repl_qm);
346       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
347       found = __builtin_ia32_pmovmskb (t);
348       found &= mask;
349     }
350   while (!found);
351 
352   __builtin_ia32_emms ();
353 
354   /* FOUND contains 1 in bits for which we matched a relevant
355      character.  Conversion to the byte index is trivial.  */
356   found = __builtin_ctz(found);
357   return (const uchar *)p + found;
358 }
359 
360 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
361 
362 static const uchar *
363 #ifndef __SSE2__
364 __attribute__((__target__("sse2")))
365 #endif
366 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
367 {
368   typedef char v16qi __attribute__ ((__vector_size__ (16)));
369 
370   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
371   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
372   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
373   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
374 
375   unsigned int misalign, found, mask;
376   const v16qi *p;
377   v16qi data, t;
378 
379   /* Align the source pointer.  */
380   misalign = (uintptr_t)s & 15;
381   p = (const v16qi *)((uintptr_t)s & -16);
382   data = *p;
383 
384   /* Create a mask for the bytes that are valid within the first
385      16-byte block.  The Idea here is that the AND with the mask
386      within the loop is "free", since we need some AND or TEST
387      insn in order to set the flags for the branch anyway.  */
388   mask = -1u << misalign;
389 
390   /* Main loop processing 16 bytes at a time.  */
391   goto start;
392   do
393     {
394       data = *++p;
395       mask = -1;
396 
397     start:
398       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
399       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
400       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
401       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
402       found = __builtin_ia32_pmovmskb128 (t);
403       found &= mask;
404     }
405   while (!found);
406 
407   /* FOUND contains 1 in bits for which we matched a relevant
408      character.  Conversion to the byte index is trivial.  */
409   found = __builtin_ctz(found);
410   return (const uchar *)p + found;
411 }
412 
413 #ifdef HAVE_SSE4
414 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
415 
416 static const uchar *
417 #ifndef __SSE4_2__
418 __attribute__((__target__("sse4.2")))
419 #endif
420 search_line_sse42 (const uchar *s, const uchar *end)
421 {
422   typedef char v16qi __attribute__ ((__vector_size__ (16)));
423   static const v16qi search = { '\n', '\r', '?', '\\' };
424 
425   uintptr_t si = (uintptr_t)s;
426   uintptr_t index;
427 
428   /* Check for unaligned input.  */
429   if (si & 15)
430     {
431       if (__builtin_expect (end - s < 16, 0)
432 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433 	{
434 	  /* There are less than 16 bytes left in the buffer, and less
435 	     than 16 bytes left on the page.  Reading 16 bytes at this
436 	     point might generate a spurious page fault.  Defer to the
437 	     SSE2 implementation, which already handles alignment.  */
438 	  return search_line_sse2 (s, end);
439 	}
440 
441       /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 	 memory need not be aligned.  */
443       __asm ("%vpcmpestri $0, (%1), %2"
444 	     : "=c"(index) : "r"(s), "x"(search), "a"(4), "d"(16));
445       if (__builtin_expect (index < 16, 0))
446 	goto found;
447 
448       /* Advance the pointer to an aligned address.  We will re-scan a
449 	 few bytes, but we no longer need care for reading past the
450 	 end of a page, since we're guaranteed a match.  */
451       s = (const uchar *)((si + 16) & -16);
452     }
453 
454   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
455      in inline assembly, we can make proper use of the flags set.  */
456   __asm (      "sub $16, %1\n"
457 	"	.balign 16\n"
458 	"0:	add $16, %1\n"
459 	"	%vpcmpestri $0, (%1), %2\n"
460 	"	jnc 0b"
461 	: "=&c"(index), "+r"(s)
462 	: "x"(search), "a"(4), "d"(16));
463 
464  found:
465   return s + index;
466 }
467 
468 #else
469 /* Work around out-dated assemblers without sse4 support.  */
470 #define search_line_sse42 search_line_sse2
471 #endif
472 
473 /* Check the CPU capabilities.  */
474 
475 #include "../gcc/config/i386/cpuid.h"
476 
477 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
478 static search_line_fast_type search_line_fast;
479 
480 #define HAVE_init_vectorized_lexer 1
481 static inline void
482 init_vectorized_lexer (void)
483 {
484   unsigned dummy, ecx = 0, edx = 0;
485   search_line_fast_type impl = search_line_acc_char;
486   int minimum = 0;
487 
488 #if defined(__SSE4_2__)
489   minimum = 3;
490 #elif defined(__SSE2__)
491   minimum = 2;
492 #elif defined(__SSE__)
493   minimum = 1;
494 #endif
495 
496   if (minimum == 3)
497     impl = search_line_sse42;
498   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
499     {
500       if (minimum == 3 || (ecx & bit_SSE4_2))
501         impl = search_line_sse42;
502       else if (minimum == 2 || (edx & bit_SSE2))
503 	impl = search_line_sse2;
504       else if (minimum == 1 || (edx & bit_SSE))
505 	impl = search_line_mmx;
506     }
507   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
508     {
509       if (minimum == 1
510 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
511 	impl = search_line_mmx;
512     }
513 
514   search_line_fast = impl;
515 }
516 
517 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
518 
519 /* A vection of the fast scanner using AltiVec vectorized byte compares.  */
520 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
521    so we can't compile this function without -maltivec on the command line
522    (or implied by some other switch).  */
523 
524 static const uchar *
525 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
526 {
527   typedef __attribute__((altivec(vector))) unsigned char vc;
528 
529   const vc repl_nl = {
530     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
531     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
532   };
533   const vc repl_cr = {
534     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
535     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
536   };
537   const vc repl_bs = {
538     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
539     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
540   };
541   const vc repl_qm = {
542     '?', '?', '?', '?', '?', '?', '?', '?',
543     '?', '?', '?', '?', '?', '?', '?', '?',
544   };
545   const vc ones = {
546     -1, -1, -1, -1, -1, -1, -1, -1,
547     -1, -1, -1, -1, -1, -1, -1, -1,
548   };
549   const vc zero = { 0 };
550 
551   vc data, mask, t;
552 
553   /* Altivec loads automatically mask addresses with -16.  This lets us
554      issue the first load as early as possible.  */
555   data = __builtin_vec_ld(0, (const vc *)s);
556 
557   /* Discard bytes before the beginning of the buffer.  Do this by
558      beginning with all ones and shifting in zeros according to the
559      mis-alignment.  The LVSR instruction pulls the exact shift we
560      want from the address.  */
561   mask = __builtin_vec_lvsr(0, s);
562   mask = __builtin_vec_perm(zero, ones, mask);
563   data &= mask;
564 
565   /* While altivec loads mask addresses, we still need to align S so
566      that the offset we compute at the end is correct.  */
567   s = (const uchar *)((uintptr_t)s & -16);
568 
569   /* Main loop processing 16 bytes at a time.  */
570   goto start;
571   do
572     {
573       vc m_nl, m_cr, m_bs, m_qm;
574 
575       s += 16;
576       data = __builtin_vec_ld(0, (const vc *)s);
577 
578     start:
579       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
580       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
581       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
582       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
583       t = (m_nl | m_cr) | (m_bs | m_qm);
584 
585       /* T now contains 0xff in bytes for which we matched one of the relevant
586 	 characters.  We want to exit the loop if any byte in T is non-zero.
587 	 Below is the expansion of vec_any_ne(t, zero).  */
588     }
589   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
590 
591   {
592 #define N  (sizeof(vc) / sizeof(long))
593 
594     typedef char check_count[(N == 2 || N == 4) * 2 - 1];
595     union {
596       vc v;
597       unsigned long l[N];
598     } u;
599     unsigned long l, i = 0;
600 
601     u.v = t;
602 
603     /* Find the first word of T that is non-zero.  */
604     switch (N)
605       {
606       case 4:
607 	l = u.l[i++];
608 	if (l != 0)
609 	  break;
610 	s += sizeof(unsigned long);
611 	l = u.l[i++];
612 	if (l != 0)
613 	  break;
614 	s += sizeof(unsigned long);
615       case 2:
616 	l = u.l[i++];
617 	if (l != 0)
618 	  break;
619 	s += sizeof(unsigned long);
620 	l = u.l[i];
621       }
622 
623     /* L now contains 0xff in bytes for which we matched one of the
624        relevant characters.  We can find the byte index by finding
625        its bit index and dividing by 8.  */
626     l = __builtin_clzl(l) >> 3;
627     return s + l;
628 
629 #undef N
630   }
631 }
632 
633 #else
634 
635 /* We only have one accellerated alternative.  Use a direct call so that
636    we encourage inlining.  */
637 
638 #define search_line_fast  search_line_acc_char
639 
640 #endif
641 
642 /* Initialize the lexer if needed.  */
643 
644 void
645 _cpp_init_lexer (void)
646 {
647 #ifdef HAVE_init_vectorized_lexer
648   init_vectorized_lexer ();
649 #endif
650 }
651 
652 /* Returns with a logical line that contains no escaped newlines or
653    trigraphs.  This is a time-critical inner loop.  */
654 void
655 _cpp_clean_line (cpp_reader *pfile)
656 {
657   cpp_buffer *buffer;
658   const uchar *s;
659   uchar c, *d, *p;
660 
661   buffer = pfile->buffer;
662   buffer->cur_note = buffer->notes_used = 0;
663   buffer->cur = buffer->line_base = buffer->next_line;
664   buffer->need_line = false;
665   s = buffer->next_line;
666 
667   if (!buffer->from_stage3)
668     {
669       const uchar *pbackslash = NULL;
670 
671       /* Fast path.  This is the common case of an un-escaped line with
672 	 no trigraphs.  The primary win here is by not writing any
673 	 data back to memory until we have to.  */
674       while (1)
675 	{
676 	  /* Perform an optimized search for \n, \r, \\, ?.  */
677 	  s = search_line_fast (s, buffer->rlimit);
678 
679 	  c = *s;
680 	  if (c == '\\')
681 	    {
682 	      /* Record the location of the backslash and continue.  */
683 	      pbackslash = s++;
684 	    }
685 	  else if (__builtin_expect (c == '?', 0))
686 	    {
687 	      if (__builtin_expect (s[1] == '?', false)
688 		   && _cpp_trigraph_map[s[2]])
689 		{
690 		  /* Have a trigraph.  We may or may not have to convert
691 		     it.  Add a line note regardless, for -Wtrigraphs.  */
692 		  add_line_note (buffer, s, s[2]);
693 		  if (CPP_OPTION (pfile, trigraphs))
694 		    {
695 		      /* We do, and that means we have to switch to the
696 		         slow path.  */
697 		      d = (uchar *) s;
698 		      *d = _cpp_trigraph_map[s[2]];
699 		      s += 2;
700 		      goto slow_path;
701 		    }
702 		}
703 	      /* Not a trigraph.  Continue on fast-path.  */
704 	      s++;
705 	    }
706 	  else
707 	    break;
708 	}
709 
710       /* This must be \r or \n.  We're either done, or we'll be forced
711 	 to write back to the buffer and continue on the slow path.  */
712       d = (uchar *) s;
713 
714       if (__builtin_expect (s == buffer->rlimit, false))
715 	goto done;
716 
717       /* DOS line ending? */
718       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
719 	{
720 	  s++;
721 	  if (s == buffer->rlimit)
722 	    goto done;
723 	}
724 
725       if (__builtin_expect (pbackslash == NULL, true))
726 	goto done;
727 
728       /* Check for escaped newline.  */
729       p = d;
730       while (is_nvspace (p[-1]))
731 	p--;
732       if (p - 1 != pbackslash)
733 	goto done;
734 
735       /* Have an escaped newline; process it and proceed to
736 	 the slow path.  */
737       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
738       d = p - 2;
739       buffer->next_line = p - 1;
740 
741     slow_path:
742       while (1)
743 	{
744 	  c = *++s;
745 	  *++d = c;
746 
747 	  if (c == '\n' || c == '\r')
748 	    {
749 	      /* Handle DOS line endings.  */
750 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
751 		s++;
752 	      if (s == buffer->rlimit)
753 		break;
754 
755 	      /* Escaped?  */
756 	      p = d;
757 	      while (p != buffer->next_line && is_nvspace (p[-1]))
758 		p--;
759 	      if (p == buffer->next_line || p[-1] != '\\')
760 		break;
761 
762 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
763 	      d = p - 2;
764 	      buffer->next_line = p - 1;
765 	    }
766 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
767 	    {
768 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
769 	      add_line_note (buffer, d, s[2]);
770 	      if (CPP_OPTION (pfile, trigraphs))
771 		{
772 		  *d = _cpp_trigraph_map[s[2]];
773 		  s += 2;
774 		}
775 	    }
776 	}
777     }
778   else
779     {
780       while (*s != '\n' && *s != '\r')
781 	s++;
782       d = (uchar *) s;
783 
784       /* Handle DOS line endings.  */
785       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
786 	s++;
787     }
788 
789  done:
790   *d = '\n';
791   /* A sentinel note that should never be processed.  */
792   add_line_note (buffer, d + 1, '\n');
793   buffer->next_line = s + 1;
794 }
795 
796 /* Return true if the trigraph indicated by NOTE should be warned
797    about in a comment.  */
798 static bool
799 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
800 {
801   const uchar *p;
802 
803   /* Within comments we don't warn about trigraphs, unless the
804      trigraph forms an escaped newline, as that may change
805      behavior.  */
806   if (note->type != '/')
807     return false;
808 
809   /* If -trigraphs, then this was an escaped newline iff the next note
810      is coincident.  */
811   if (CPP_OPTION (pfile, trigraphs))
812     return note[1].pos == note->pos;
813 
814   /* Otherwise, see if this forms an escaped newline.  */
815   p = note->pos + 3;
816   while (is_nvspace (*p))
817     p++;
818 
819   /* There might have been escaped newlines between the trigraph and the
820      newline we found.  Hence the position test.  */
821   return (*p == '\n' && p < note[1].pos);
822 }
823 
824 /* Process the notes created by add_line_note as far as the current
825    location.  */
826 void
827 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
828 {
829   cpp_buffer *buffer = pfile->buffer;
830 
831   for (;;)
832     {
833       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
834       unsigned int col;
835 
836       if (note->pos > buffer->cur)
837 	break;
838 
839       buffer->cur_note++;
840       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
841 
842       if (note->type == '\\' || note->type == ' ')
843 	{
844 	  if (note->type == ' ' && !in_comment)
845 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
846 				 "backslash and newline separated by space");
847 
848 	  if (buffer->next_line > buffer->rlimit)
849 	    {
850 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
851 				   "backslash-newline at end of file");
852 	      /* Prevent "no newline at end of file" warning.  */
853 	      buffer->next_line = buffer->rlimit;
854 	    }
855 
856 	  buffer->line_base = note->pos;
857 	  CPP_INCREMENT_LINE (pfile, 0);
858 	}
859       else if (_cpp_trigraph_map[note->type])
860 	{
861 	  if (CPP_OPTION (pfile, warn_trigraphs)
862 	      && (!in_comment || warn_in_comment (pfile, note)))
863 	    {
864 	      if (CPP_OPTION (pfile, trigraphs))
865 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
866                                        pfile->line_table->highest_line, col,
867 				       "trigraph ??%c converted to %c",
868 				       note->type,
869 				       (int) _cpp_trigraph_map[note->type]);
870 	      else
871 		{
872 		  cpp_warning_with_line
873 		    (pfile, CPP_W_TRIGRAPHS,
874                      pfile->line_table->highest_line, col,
875 		     "trigraph ??%c ignored, use -trigraphs to enable",
876 		     note->type);
877 		}
878 	    }
879 	}
880       else if (note->type == 0)
881 	/* Already processed in lex_raw_string.  */;
882       else
883 	abort ();
884     }
885 }
886 
887 /* Skip a C-style block comment.  We find the end of the comment by
888    seeing if an asterisk is before every '/' we encounter.  Returns
889    nonzero if comment terminated by EOF, zero otherwise.
890 
891    Buffer->cur points to the initial asterisk of the comment.  */
892 bool
893 _cpp_skip_block_comment (cpp_reader *pfile)
894 {
895   cpp_buffer *buffer = pfile->buffer;
896   const uchar *cur = buffer->cur;
897   uchar c;
898 
899   cur++;
900   if (*cur == '/')
901     cur++;
902 
903   for (;;)
904     {
905       /* People like decorating comments with '*', so check for '/'
906 	 instead for efficiency.  */
907       c = *cur++;
908 
909       if (c == '/')
910 	{
911 	  if (cur[-2] == '*')
912 	    break;
913 
914 	  /* Warn about potential nested comments, but not if the '/'
915 	     comes immediately before the true comment delimiter.
916 	     Don't bother to get it right across escaped newlines.  */
917 	  if (CPP_OPTION (pfile, warn_comments)
918 	      && cur[0] == '*' && cur[1] != '/')
919 	    {
920 	      buffer->cur = cur;
921 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
922 				     pfile->line_table->highest_line,
923 				     CPP_BUF_COL (buffer),
924 				     "\"/*\" within comment");
925 	    }
926 	}
927       else if (c == '\n')
928 	{
929 	  unsigned int cols;
930 	  buffer->cur = cur - 1;
931 	  _cpp_process_line_notes (pfile, true);
932 	  if (buffer->next_line >= buffer->rlimit)
933 	    return true;
934 	  _cpp_clean_line (pfile);
935 
936 	  cols = buffer->next_line - buffer->line_base;
937 	  CPP_INCREMENT_LINE (pfile, cols);
938 
939 	  cur = buffer->cur;
940 	}
941     }
942 
943   buffer->cur = cur;
944   _cpp_process_line_notes (pfile, true);
945   return false;
946 }
947 
948 /* Skip a C++ line comment, leaving buffer->cur pointing to the
949    terminating newline.  Handles escaped newlines.  Returns nonzero
950    if a multiline comment.  */
951 static int
952 skip_line_comment (cpp_reader *pfile)
953 {
954   cpp_buffer *buffer = pfile->buffer;
955   source_location orig_line = pfile->line_table->highest_line;
956 
957   while (*buffer->cur != '\n')
958     buffer->cur++;
959 
960   _cpp_process_line_notes (pfile, true);
961   return orig_line != pfile->line_table->highest_line;
962 }
963 
964 /* Skips whitespace, saving the next non-whitespace character.  */
965 static void
966 skip_whitespace (cpp_reader *pfile, cppchar_t c)
967 {
968   cpp_buffer *buffer = pfile->buffer;
969   bool saw_NUL = false;
970 
971   do
972     {
973       /* Horizontal space always OK.  */
974       if (c == ' ' || c == '\t')
975 	;
976       /* Just \f \v or \0 left.  */
977       else if (c == '\0')
978 	saw_NUL = true;
979       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
980 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
981 			     CPP_BUF_COL (buffer),
982 			     "%s in preprocessing directive",
983 			     c == '\f' ? "form feed" : "vertical tab");
984 
985       c = *buffer->cur++;
986     }
987   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
988   while (is_nvspace (c));
989 
990   if (saw_NUL)
991     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
992 
993   buffer->cur--;
994 }
995 
996 /* See if the characters of a number token are valid in a name (no
997    '.', '+' or '-').  */
998 static int
999 name_p (cpp_reader *pfile, const cpp_string *string)
1000 {
1001   unsigned int i;
1002 
1003   for (i = 0; i < string->len; i++)
1004     if (!is_idchar (string->text[i]))
1005       return 0;
1006 
1007   return 1;
1008 }
1009 
1010 /* After parsing an identifier or other sequence, produce a warning about
1011    sequences not in NFC/NFKC.  */
1012 static void
1013 warn_about_normalization (cpp_reader *pfile,
1014 			  const cpp_token *token,
1015 			  const struct normalize_state *s)
1016 {
1017   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1018       && !pfile->state.skipping)
1019     {
1020       /* Make sure that the token is printed using UCNs, even
1021 	 if we'd otherwise happily print UTF-8.  */
1022       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1023       size_t sz;
1024 
1025       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1026       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1027 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1028 			       "`%.*s' is not in NFKC", (int) sz, buf);
1029       else
1030 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1031 			       "`%.*s' is not in NFC", (int) sz, buf);
1032     }
1033 }
1034 
1035 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1036    an identifier.  FIRST is TRUE if this starts an identifier.  */
1037 static bool
1038 forms_identifier_p (cpp_reader *pfile, int first,
1039 		    struct normalize_state *state)
1040 {
1041   cpp_buffer *buffer = pfile->buffer;
1042 
1043   if (*buffer->cur == '$')
1044     {
1045       if (!CPP_OPTION (pfile, dollars_in_ident))
1046 	return false;
1047 
1048       buffer->cur++;
1049       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1050 	{
1051 	  CPP_OPTION (pfile, warn_dollars) = 0;
1052 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1053 	}
1054 
1055       return true;
1056     }
1057 
1058   /* Is this a syntactically valid UCN?  */
1059   if (CPP_OPTION (pfile, extended_identifiers)
1060       && *buffer->cur == '\\'
1061       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1062     {
1063       buffer->cur += 2;
1064       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1065 			  state))
1066 	return true;
1067       buffer->cur -= 2;
1068     }
1069 
1070   return false;
1071 }
1072 
1073 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1074 static cpp_hashnode *
1075 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1076 {
1077   cpp_hashnode *result;
1078   const uchar *cur;
1079   unsigned int len;
1080   unsigned int hash = HT_HASHSTEP (0, *base);
1081 
1082   cur = base + 1;
1083   while (ISIDNUM (*cur))
1084     {
1085       hash = HT_HASHSTEP (hash, *cur);
1086       cur++;
1087     }
1088   len = cur - base;
1089   hash = HT_HASHFINISH (hash, len);
1090   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1091 					      base, len, hash, HT_ALLOC));
1092 
1093   /* Rarely, identifiers require diagnostics when lexed.  */
1094   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1095 			&& !pfile->state.skipping, 0))
1096     {
1097       /* It is allowed to poison the same identifier twice.  */
1098       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1099 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1100 		   NODE_NAME (result));
1101 
1102       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1103 	 replacement list of a variadic macro.  */
1104       if (result == pfile->spec_nodes.n__VA_ARGS__
1105 	  && !pfile->state.va_args_ok)
1106 	cpp_error (pfile, CPP_DL_PEDWARN,
1107 		   "__VA_ARGS__ can only appear in the expansion"
1108 		   " of a C99 variadic macro");
1109 
1110       /* For -Wc++-compat, warn about use of C++ named operators.  */
1111       if (result->flags & NODE_WARN_OPERATOR)
1112 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1113 		     "identifier \"%s\" is a special operator name in C++",
1114 		     NODE_NAME (result));
1115     }
1116 
1117   return result;
1118 }
1119 
1120 /* Get the cpp_hashnode of an identifier specified by NAME in
1121    the current cpp_reader object.  If none is found, NULL is returned.  */
1122 cpp_hashnode *
1123 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1124 {
1125   cpp_hashnode *result;
1126   result = lex_identifier_intern (pfile, (uchar *) name);
1127   return result;
1128 }
1129 
1130 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1131 static cpp_hashnode *
1132 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1133 		struct normalize_state *nst)
1134 {
1135   cpp_hashnode *result;
1136   const uchar *cur;
1137   unsigned int len;
1138   unsigned int hash = HT_HASHSTEP (0, *base);
1139 
1140   cur = pfile->buffer->cur;
1141   if (! starts_ucn)
1142     while (ISIDNUM (*cur))
1143       {
1144 	hash = HT_HASHSTEP (hash, *cur);
1145 	cur++;
1146       }
1147   pfile->buffer->cur = cur;
1148   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1149     {
1150       /* Slower version for identifiers containing UCNs (or $).  */
1151       do {
1152 	while (ISIDNUM (*pfile->buffer->cur))
1153 	  {
1154 	    pfile->buffer->cur++;
1155 	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
1156 	  }
1157       } while (forms_identifier_p (pfile, false, nst));
1158       result = _cpp_interpret_identifier (pfile, base,
1159 					  pfile->buffer->cur - base);
1160     }
1161   else
1162     {
1163       len = cur - base;
1164       hash = HT_HASHFINISH (hash, len);
1165 
1166       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1167 						  base, len, hash, HT_ALLOC));
1168     }
1169 
1170   /* Rarely, identifiers require diagnostics when lexed.  */
1171   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1172 			&& !pfile->state.skipping, 0))
1173     {
1174       /* It is allowed to poison the same identifier twice.  */
1175       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1176 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1177 		   NODE_NAME (result));
1178 
1179       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1180 	 replacement list of a variadic macro.  */
1181       if (result == pfile->spec_nodes.n__VA_ARGS__
1182 	  && !pfile->state.va_args_ok)
1183 	cpp_error (pfile, CPP_DL_PEDWARN,
1184 		   "__VA_ARGS__ can only appear in the expansion"
1185 		   " of a C99 variadic macro");
1186 
1187       /* For -Wc++-compat, warn about use of C++ named operators.  */
1188       if (result->flags & NODE_WARN_OPERATOR)
1189 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1190 		     "identifier \"%s\" is a special operator name in C++",
1191 		     NODE_NAME (result));
1192     }
1193 
1194   return result;
1195 }
1196 
1197 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1198 static void
1199 lex_number (cpp_reader *pfile, cpp_string *number,
1200 	    struct normalize_state *nst)
1201 {
1202   const uchar *cur;
1203   const uchar *base;
1204   uchar *dest;
1205 
1206   base = pfile->buffer->cur - 1;
1207   do
1208     {
1209       cur = pfile->buffer->cur;
1210 
1211       /* N.B. ISIDNUM does not include $.  */
1212       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
1213 	{
1214 	  cur++;
1215 	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
1216 	}
1217 
1218       pfile->buffer->cur = cur;
1219     }
1220   while (forms_identifier_p (pfile, false, nst));
1221 
1222   number->len = cur - base;
1223   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1224   memcpy (dest, base, number->len);
1225   dest[number->len] = '\0';
1226   number->text = dest;
1227 }
1228 
1229 /* Create a token of type TYPE with a literal spelling.  */
1230 static void
1231 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1232 		unsigned int len, enum cpp_ttype type)
1233 {
1234   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1235 
1236   memcpy (dest, base, len);
1237   dest[len] = '\0';
1238   token->type = type;
1239   token->val.str.len = len;
1240   token->val.str.text = dest;
1241 }
1242 
1243 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1244    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1245 
1246 static void
1247 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1248 		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1249 {
1250   _cpp_buff *first_buff = *first_buff_p;
1251   _cpp_buff *last_buff = *last_buff_p;
1252 
1253   if (first_buff == NULL)
1254     first_buff = last_buff = _cpp_get_buff (pfile, len);
1255   else if (len > BUFF_ROOM (last_buff))
1256     {
1257       size_t room = BUFF_ROOM (last_buff);
1258       memcpy (BUFF_FRONT (last_buff), base, room);
1259       BUFF_FRONT (last_buff) += room;
1260       base += room;
1261       len -= room;
1262       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1263     }
1264 
1265   memcpy (BUFF_FRONT (last_buff), base, len);
1266   BUFF_FRONT (last_buff) += len;
1267 
1268   *first_buff_p = first_buff;
1269   *last_buff_p = last_buff;
1270 }
1271 
1272 /* Lexes a raw string.  The stored string contains the spelling, including
1273    double quotes, delimiter string, '(' and ')', any leading
1274    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1275    literal, or CPP_OTHER if it was not properly terminated.
1276 
1277    The spelling is NUL-terminated, but it is not guaranteed that this
1278    is the first NUL since embedded NULs are preserved.  */
1279 
1280 static void
1281 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1282 		const uchar *cur)
1283 {
1284   const uchar *raw_prefix;
1285   unsigned int raw_prefix_len = 0;
1286   enum cpp_ttype type;
1287   size_t total_len = 0;
1288   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1289   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1290 
1291   type = (*base == 'L' ? CPP_WSTRING :
1292 	  *base == 'U' ? CPP_STRING32 :
1293 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1294 	  : CPP_STRING);
1295 
1296   raw_prefix = cur + 1;
1297   while (raw_prefix_len < 16)
1298     {
1299       switch (raw_prefix[raw_prefix_len])
1300 	{
1301 	case ' ': case '(': case ')': case '\\': case '\t':
1302 	case '\v': case '\f': case '\n': default:
1303 	  break;
1304 	/* Basic source charset except the above chars.  */
1305 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1306 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1307 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1308 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1309 	case 'y': case 'z':
1310 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1311 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1312 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1313 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1314 	case 'Y': case 'Z':
1315 	case '0': case '1': case '2': case '3': case '4': case '5':
1316 	case '6': case '7': case '8': case '9':
1317 	case '_': case '{': case '}': case '#': case '[': case ']':
1318 	case '<': case '>': case '%': case ':': case ';': case '.':
1319 	case '?': case '*': case '+': case '-': case '/': case '^':
1320 	case '&': case '|': case '~': case '!': case '=': case ',':
1321 	case '"': case '\'':
1322 	  raw_prefix_len++;
1323 	  continue;
1324 	}
1325       break;
1326     }
1327 
1328   if (raw_prefix[raw_prefix_len] != '(')
1329     {
1330       int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1331 		+ 1;
1332       if (raw_prefix_len == 16)
1333 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1334 			     "raw string delimiter longer than 16 characters");
1335       else
1336 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1337 			     "invalid character '%c' in raw string delimiter",
1338 			     (int) raw_prefix[raw_prefix_len]);
1339       pfile->buffer->cur = raw_prefix - 1;
1340       create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1341       return;
1342     }
1343 
1344   cur = raw_prefix + raw_prefix_len + 1;
1345   for (;;)
1346     {
1347 #define BUF_APPEND(STR,LEN)					\
1348       do {							\
1349 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
1350 			&first_buff, &last_buff);		\
1351 	total_len += (LEN);					\
1352       } while (0);
1353 
1354       cppchar_t c;
1355 
1356       /* If we previously performed any trigraph or line splicing
1357 	 transformations, undo them within the body of the raw string.  */
1358       while (note->pos < cur)
1359 	++note;
1360       for (; note->pos == cur; ++note)
1361 	{
1362 	  switch (note->type)
1363 	    {
1364 	    case '\\':
1365 	    case ' ':
1366 	      /* Restore backslash followed by newline.  */
1367 	      BUF_APPEND (base, cur - base);
1368 	      base = cur;
1369 	      BUF_APPEND ("\\", 1);
1370 	    after_backslash:
1371 	      if (note->type == ' ')
1372 		{
1373 		  /* GNU backslash whitespace newline extension.  FIXME
1374 		     could be any sequence of non-vertical space.  When we
1375 		     can properly restore any such sequence, we should mark
1376 		     this note as handled so _cpp_process_line_notes
1377 		     doesn't warn.  */
1378 		  BUF_APPEND (" ", 1);
1379 		}
1380 
1381 	      BUF_APPEND ("\n", 1);
1382 	      break;
1383 
1384 	    case 0:
1385 	      /* Already handled.  */
1386 	      break;
1387 
1388 	    default:
1389 	      if (_cpp_trigraph_map[note->type])
1390 		{
1391 		  /* Don't warn about this trigraph in
1392 		     _cpp_process_line_notes, since trigraphs show up as
1393 		     trigraphs in raw strings.  */
1394 		  uchar type = note->type;
1395 		  note->type = 0;
1396 
1397 		  if (!CPP_OPTION (pfile, trigraphs))
1398 		    /* If we didn't convert the trigraph in the first
1399 		       place, don't do anything now either.  */
1400 		    break;
1401 
1402 		  BUF_APPEND (base, cur - base);
1403 		  base = cur;
1404 		  BUF_APPEND ("??", 2);
1405 
1406 		  /* ??/ followed by newline gets two line notes, one for
1407 		     the trigraph and one for the backslash/newline.  */
1408 		  if (type == '/' && note[1].pos == cur)
1409 		    {
1410 		      if (note[1].type != '\\'
1411 			  && note[1].type != ' ')
1412 			abort ();
1413 		      BUF_APPEND ("/", 1);
1414 		      ++note;
1415 		      goto after_backslash;
1416 		    }
1417 		  /* The ) from ??) could be part of the suffix.  */
1418 		  else if (type == ')'
1419 			   && strncmp ((const char *) cur+1,
1420 				       (const char *) raw_prefix,
1421 				       raw_prefix_len) == 0
1422 			   && cur[raw_prefix_len+1] == '"')
1423 		    {
1424 		      BUF_APPEND (")", 1);
1425 		      base++;
1426 		      cur += raw_prefix_len + 2;
1427 		      goto break_outer_loop;
1428 		    }
1429 		  else
1430 		    {
1431 		      /* Skip the replacement character.  */
1432 		      base = ++cur;
1433 		      BUF_APPEND (&type, 1);
1434 		    }
1435 		}
1436 	      else
1437 		abort ();
1438 	      break;
1439 	    }
1440 	}
1441       c = *cur++;
1442 
1443       if (c == ')'
1444 	  && strncmp ((const char *) cur, (const char *) raw_prefix,
1445 		      raw_prefix_len) == 0
1446 	  && cur[raw_prefix_len] == '"')
1447 	{
1448 	  cur += raw_prefix_len + 1;
1449 	  break;
1450 	}
1451       else if (c == '\n')
1452 	{
1453 	  if (pfile->state.in_directive
1454 	      || pfile->state.parsing_args
1455 	      || pfile->state.in_deferred_pragma)
1456 	    {
1457 	      cur--;
1458 	      type = CPP_OTHER;
1459 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1460 				   "unterminated raw string");
1461 	      break;
1462 	    }
1463 
1464 	  BUF_APPEND (base, cur - base);
1465 
1466 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
1467 	    CPP_INCREMENT_LINE (pfile, 0);
1468 	  pfile->buffer->need_line = true;
1469 
1470 	  pfile->buffer->cur = cur-1;
1471 	  _cpp_process_line_notes (pfile, false);
1472 	  if (!_cpp_get_fresh_line (pfile))
1473 	    {
1474 	      source_location src_loc = token->src_loc;
1475 	      token->type = CPP_EOF;
1476 	      /* Tell the compiler the line number of the EOF token.  */
1477 	      token->src_loc = pfile->line_table->highest_line;
1478 	      token->flags = BOL;
1479 	      if (first_buff != NULL)
1480 		_cpp_release_buff (pfile, first_buff);
1481 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1482 				   "unterminated raw string");
1483 	      return;
1484 	    }
1485 
1486 	  cur = base = pfile->buffer->cur;
1487 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1488 	}
1489     }
1490  break_outer_loop:
1491 
1492   if (CPP_OPTION (pfile, user_literals))
1493     {
1494       /* Grab user defined literal suffix.  */
1495       if (ISIDST (*cur))
1496 	{
1497 	  type = cpp_userdef_string_add_type (type);
1498 	  ++cur;
1499 	}
1500       while (ISIDNUM (*cur))
1501 	++cur;
1502     }
1503 
1504   pfile->buffer->cur = cur;
1505   if (first_buff == NULL)
1506     create_literal (pfile, token, base, cur - base, type);
1507   else
1508     {
1509       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1510 
1511       token->type = type;
1512       token->val.str.len = total_len + (cur - base);
1513       token->val.str.text = dest;
1514       last_buff = first_buff;
1515       while (last_buff != NULL)
1516 	{
1517 	  memcpy (dest, last_buff->base,
1518 		  BUFF_FRONT (last_buff) - last_buff->base);
1519 	  dest += BUFF_FRONT (last_buff) - last_buff->base;
1520 	  last_buff = last_buff->next;
1521 	}
1522       _cpp_release_buff (pfile, first_buff);
1523       memcpy (dest, base, cur - base);
1524       dest[cur - base] = '\0';
1525     }
1526 }
1527 
1528 /* Lexes a string, character constant, or angle-bracketed header file
1529    name.  The stored string contains the spelling, including opening
1530    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1531    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1532    if it was not properly terminated, or CPP_LESS for an unterminated
1533    header name which must be relexed as normal tokens.
1534 
1535    The spelling is NUL-terminated, but it is not guaranteed that this
1536    is the first NUL since embedded NULs are preserved.  */
1537 static void
1538 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1539 {
1540   bool saw_NUL = false;
1541   const uchar *cur;
1542   cppchar_t terminator;
1543   enum cpp_ttype type;
1544 
1545   cur = base;
1546   terminator = *cur++;
1547   if (terminator == 'L' || terminator == 'U')
1548     terminator = *cur++;
1549   else if (terminator == 'u')
1550     {
1551       terminator = *cur++;
1552       if (terminator == '8')
1553 	terminator = *cur++;
1554     }
1555   if (terminator == 'R')
1556     {
1557       lex_raw_string (pfile, token, base, cur);
1558       return;
1559     }
1560   if (terminator == '"')
1561     type = (*base == 'L' ? CPP_WSTRING :
1562 	    *base == 'U' ? CPP_STRING32 :
1563 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1564 			 : CPP_STRING);
1565   else if (terminator == '\'')
1566     type = (*base == 'L' ? CPP_WCHAR :
1567 	    *base == 'U' ? CPP_CHAR32 :
1568 	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1569   else
1570     terminator = '>', type = CPP_HEADER_NAME;
1571 
1572   for (;;)
1573     {
1574       cppchar_t c = *cur++;
1575 
1576       /* In #include-style directives, terminators are not escapable.  */
1577       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1578 	cur++;
1579       else if (c == terminator)
1580 	break;
1581       else if (c == '\n')
1582 	{
1583 	  cur--;
1584 	  /* Unmatched quotes always yield undefined behavior, but
1585 	     greedy lexing means that what appears to be an unterminated
1586 	     header name may actually be a legitimate sequence of tokens.  */
1587 	  if (terminator == '>')
1588 	    {
1589 	      token->type = CPP_LESS;
1590 	      return;
1591 	    }
1592 	  type = CPP_OTHER;
1593 	  break;
1594 	}
1595       else if (c == '\0')
1596 	saw_NUL = true;
1597     }
1598 
1599   if (saw_NUL && !pfile->state.skipping)
1600     cpp_error (pfile, CPP_DL_WARNING,
1601 	       "null character(s) preserved in literal");
1602 
1603   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1604     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1605 	       (int) terminator);
1606 
1607   if (CPP_OPTION (pfile, user_literals))
1608     {
1609       /* Grab user defined literal suffix.  */
1610       if (ISIDST (*cur))
1611 	{
1612 	  type = cpp_userdef_char_add_type (type);
1613 	  type = cpp_userdef_string_add_type (type);
1614           ++cur;
1615 	}
1616       while (ISIDNUM (*cur))
1617 	++cur;
1618     }
1619 
1620   pfile->buffer->cur = cur;
1621   create_literal (pfile, token, base, cur - base, type);
1622 }
1623 
1624 /* Return the comment table. The client may not make any assumption
1625    about the ordering of the table.  */
1626 cpp_comment_table *
1627 cpp_get_comments (cpp_reader *pfile)
1628 {
1629   return &pfile->comments;
1630 }
1631 
1632 /* Append a comment to the end of the comment table. */
1633 static void
1634 store_comment (cpp_reader *pfile, cpp_token *token)
1635 {
1636   int len;
1637 
1638   if (pfile->comments.allocated == 0)
1639     {
1640       pfile->comments.allocated = 256;
1641       pfile->comments.entries = (cpp_comment *) xmalloc
1642 	(pfile->comments.allocated * sizeof (cpp_comment));
1643     }
1644 
1645   if (pfile->comments.count == pfile->comments.allocated)
1646     {
1647       pfile->comments.allocated *= 2;
1648       pfile->comments.entries = (cpp_comment *) xrealloc
1649 	(pfile->comments.entries,
1650 	 pfile->comments.allocated * sizeof (cpp_comment));
1651     }
1652 
1653   len = token->val.str.len;
1654 
1655   /* Copy comment. Note, token may not be NULL terminated. */
1656   pfile->comments.entries[pfile->comments.count].comment =
1657     (char *) xmalloc (sizeof (char) * (len + 1));
1658   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1659 	  token->val.str.text, len);
1660   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1661 
1662   /* Set source location. */
1663   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1664 
1665   /* Increment the count of entries in the comment table. */
1666   pfile->comments.count++;
1667 }
1668 
1669 /* The stored comment includes the comment start and any terminator.  */
1670 static void
1671 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1672 	      cppchar_t type)
1673 {
1674   unsigned char *buffer;
1675   unsigned int len, clen, i;
1676 
1677   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1678 
1679   /* C++ comments probably (not definitely) have moved past a new
1680      line, which we don't want to save in the comment.  */
1681   if (is_vspace (pfile->buffer->cur[-1]))
1682     len--;
1683 
1684   /* If we are currently in a directive or in argument parsing, then
1685      we need to store all C++ comments as C comments internally, and
1686      so we need to allocate a little extra space in that case.
1687 
1688      Note that the only time we encounter a directive here is
1689      when we are saving comments in a "#define".  */
1690   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1691 	  && type == '/') ? len + 2 : len;
1692 
1693   buffer = _cpp_unaligned_alloc (pfile, clen);
1694 
1695   token->type = CPP_COMMENT;
1696   token->val.str.len = clen;
1697   token->val.str.text = buffer;
1698 
1699   buffer[0] = '/';
1700   memcpy (buffer + 1, from, len - 1);
1701 
1702   /* Finish conversion to a C comment, if necessary.  */
1703   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1704     {
1705       buffer[1] = '*';
1706       buffer[clen - 2] = '*';
1707       buffer[clen - 1] = '/';
1708       /* As there can be in a C++ comments illegal sequences for C comments
1709          we need to filter them out.  */
1710       for (i = 2; i < (clen - 2); i++)
1711         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1712           buffer[i] = '|';
1713     }
1714 
1715   /* Finally store this comment for use by clients of libcpp. */
1716   store_comment (pfile, token);
1717 }
1718 
1719 /* Allocate COUNT tokens for RUN.  */
1720 void
1721 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1722 {
1723   run->base = XNEWVEC (cpp_token, count);
1724   run->limit = run->base + count;
1725   run->next = NULL;
1726 }
1727 
1728 /* Returns the next tokenrun, or creates one if there is none.  */
1729 static tokenrun *
1730 next_tokenrun (tokenrun *run)
1731 {
1732   if (run->next == NULL)
1733     {
1734       run->next = XNEW (tokenrun);
1735       run->next->prev = run;
1736       _cpp_init_tokenrun (run->next, 250);
1737     }
1738 
1739   return run->next;
1740 }
1741 
1742 /* Return the number of not yet processed token in a given
1743    context.  */
1744 int
1745 _cpp_remaining_tokens_num_in_context (cpp_context *context)
1746 {
1747   if (context->tokens_kind == TOKENS_KIND_DIRECT)
1748     return (LAST (context).token - FIRST (context).token);
1749   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1750 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
1751     return (LAST (context).ptoken - FIRST (context).ptoken);
1752   else
1753       abort ();
1754 }
1755 
1756 /* Returns the token present at index INDEX in a given context.  If
1757    INDEX is zero, the next token to be processed is returned.  */
1758 static const cpp_token*
1759 _cpp_token_from_context_at (cpp_context *context, int index)
1760 {
1761   if (context->tokens_kind == TOKENS_KIND_DIRECT)
1762     return &(FIRST (context).token[index]);
1763   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1764 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
1765     return FIRST (context).ptoken[index];
1766  else
1767    abort ();
1768 }
1769 
1770 /* Look ahead in the input stream.  */
1771 const cpp_token *
1772 cpp_peek_token (cpp_reader *pfile, int index)
1773 {
1774   cpp_context *context = pfile->context;
1775   const cpp_token *peektok;
1776   int count;
1777 
1778   /* First, scan through any pending cpp_context objects.  */
1779   while (context->prev)
1780     {
1781       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
1782 
1783       if (index < (int) sz)
1784         return _cpp_token_from_context_at (context, index);
1785       index -= (int) sz;
1786       context = context->prev;
1787     }
1788 
1789   /* We will have to read some new tokens after all (and do so
1790      without invalidating preceding tokens).  */
1791   count = index;
1792   pfile->keep_tokens++;
1793 
1794   do
1795     {
1796       peektok = _cpp_lex_token (pfile);
1797       if (peektok->type == CPP_EOF)
1798 	return peektok;
1799     }
1800   while (index--);
1801 
1802   _cpp_backup_tokens_direct (pfile, count + 1);
1803   pfile->keep_tokens--;
1804 
1805   return peektok;
1806 }
1807 
1808 /* Allocate a single token that is invalidated at the same time as the
1809    rest of the tokens on the line.  Has its line and col set to the
1810    same as the last lexed token, so that diagnostics appear in the
1811    right place.  */
1812 cpp_token *
1813 _cpp_temp_token (cpp_reader *pfile)
1814 {
1815   cpp_token *old, *result;
1816   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1817   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1818 
1819   old = pfile->cur_token - 1;
1820   /* Any pre-existing lookaheads must not be clobbered.  */
1821   if (la)
1822     {
1823       if (sz <= la)
1824         {
1825           tokenrun *next = next_tokenrun (pfile->cur_run);
1826 
1827           if (sz < la)
1828             memmove (next->base + 1, next->base,
1829                      (la - sz) * sizeof (cpp_token));
1830 
1831           next->base[0] = pfile->cur_run->limit[-1];
1832         }
1833 
1834       if (sz > 1)
1835         memmove (pfile->cur_token + 1, pfile->cur_token,
1836                  MIN (la, sz - 1) * sizeof (cpp_token));
1837     }
1838 
1839   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1840     {
1841       pfile->cur_run = next_tokenrun (pfile->cur_run);
1842       pfile->cur_token = pfile->cur_run->base;
1843     }
1844 
1845   result = pfile->cur_token++;
1846   result->src_loc = old->src_loc;
1847   return result;
1848 }
1849 
1850 /* Lex a token into RESULT (external interface).  Takes care of issues
1851    like directive handling, token lookahead, multiple include
1852    optimization and skipping.  */
1853 const cpp_token *
1854 _cpp_lex_token (cpp_reader *pfile)
1855 {
1856   cpp_token *result;
1857 
1858   for (;;)
1859     {
1860       if (pfile->cur_token == pfile->cur_run->limit)
1861 	{
1862 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
1863 	  pfile->cur_token = pfile->cur_run->base;
1864 	}
1865       /* We assume that the current token is somewhere in the current
1866 	 run.  */
1867       if (pfile->cur_token < pfile->cur_run->base
1868 	  || pfile->cur_token >= pfile->cur_run->limit)
1869 	abort ();
1870 
1871       if (pfile->lookaheads)
1872 	{
1873 	  pfile->lookaheads--;
1874 	  result = pfile->cur_token++;
1875 	}
1876       else
1877 	result = _cpp_lex_direct (pfile);
1878 
1879       if (result->flags & BOL)
1880 	{
1881 	  /* Is this a directive.  If _cpp_handle_directive returns
1882 	     false, it is an assembler #.  */
1883 	  if (result->type == CPP_HASH
1884 	      /* 6.10.3 p 11: Directives in a list of macro arguments
1885 		 gives undefined behavior.  This implementation
1886 		 handles the directive as normal.  */
1887 	      && pfile->state.parsing_args != 1)
1888 	    {
1889 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1890 		{
1891 		  if (pfile->directive_result.type == CPP_PADDING)
1892 		    continue;
1893 		  result = &pfile->directive_result;
1894 		}
1895 	    }
1896 	  else if (pfile->state.in_deferred_pragma)
1897 	    result = &pfile->directive_result;
1898 
1899 	  if (pfile->cb.line_change && !pfile->state.skipping)
1900 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1901 	}
1902 
1903       /* We don't skip tokens in directives.  */
1904       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1905 	break;
1906 
1907       /* Outside a directive, invalidate controlling macros.  At file
1908 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1909 	 get here and MI optimization works.  */
1910       pfile->mi_valid = false;
1911 
1912       if (!pfile->state.skipping || result->type == CPP_EOF)
1913 	break;
1914     }
1915 
1916   return result;
1917 }
1918 
1919 /* Returns true if a fresh line has been loaded.  */
1920 bool
1921 _cpp_get_fresh_line (cpp_reader *pfile)
1922 {
1923   int return_at_eof;
1924 
1925   /* We can't get a new line until we leave the current directive.  */
1926   if (pfile->state.in_directive)
1927     return false;
1928 
1929   for (;;)
1930     {
1931       cpp_buffer *buffer = pfile->buffer;
1932 
1933       if (!buffer->need_line)
1934 	return true;
1935 
1936       if (buffer->next_line < buffer->rlimit)
1937 	{
1938 	  _cpp_clean_line (pfile);
1939 	  return true;
1940 	}
1941 
1942       /* First, get out of parsing arguments state.  */
1943       if (pfile->state.parsing_args)
1944 	return false;
1945 
1946       /* End of buffer.  Non-empty files should end in a newline.  */
1947       if (buffer->buf != buffer->rlimit
1948 	  && buffer->next_line > buffer->rlimit
1949 	  && !buffer->from_stage3)
1950 	{
1951 	  /* Clip to buffer size.  */
1952 	  buffer->next_line = buffer->rlimit;
1953 	}
1954 
1955       return_at_eof = buffer->return_at_eof;
1956       _cpp_pop_buffer (pfile);
1957       if (pfile->buffer == NULL || return_at_eof)
1958 	return false;
1959     }
1960 }
1961 
1962 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
1963   do							\
1964     {							\
1965       result->type = ELSE_TYPE;				\
1966       if (*buffer->cur == CHAR)				\
1967 	buffer->cur++, result->type = THEN_TYPE;	\
1968     }							\
1969   while (0)
1970 
1971 /* Lex a token into pfile->cur_token, which is also incremented, to
1972    get diagnostics pointing to the correct location.
1973 
1974    Does not handle issues such as token lookahead, multiple-include
1975    optimization, directives, skipping etc.  This function is only
1976    suitable for use by _cpp_lex_token, and in special cases like
1977    lex_expansion_token which doesn't care for any of these issues.
1978 
1979    When meeting a newline, returns CPP_EOF if parsing a directive,
1980    otherwise returns to the start of the token buffer if permissible.
1981    Returns the location of the lexed token.  */
1982 cpp_token *
1983 _cpp_lex_direct (cpp_reader *pfile)
1984 {
1985   cppchar_t c;
1986   cpp_buffer *buffer;
1987   const unsigned char *comment_start;
1988   cpp_token *result = pfile->cur_token++;
1989 
1990  fresh_line:
1991   result->flags = 0;
1992   buffer = pfile->buffer;
1993   if (buffer->need_line)
1994     {
1995       if (pfile->state.in_deferred_pragma)
1996 	{
1997 	  result->type = CPP_PRAGMA_EOL;
1998 	  pfile->state.in_deferred_pragma = false;
1999 	  if (!pfile->state.pragma_allow_expansion)
2000 	    pfile->state.prevent_expansion--;
2001 	  return result;
2002 	}
2003       if (!_cpp_get_fresh_line (pfile))
2004 	{
2005 	  result->type = CPP_EOF;
2006 	  if (!pfile->state.in_directive)
2007 	    {
2008 	      /* Tell the compiler the line number of the EOF token.  */
2009 	      result->src_loc = pfile->line_table->highest_line;
2010 	      result->flags = BOL;
2011 	    }
2012 	  return result;
2013 	}
2014       if (!pfile->keep_tokens)
2015 	{
2016 	  pfile->cur_run = &pfile->base_run;
2017 	  result = pfile->base_run.base;
2018 	  pfile->cur_token = result + 1;
2019 	}
2020       result->flags = BOL;
2021       if (pfile->state.parsing_args == 2)
2022 	result->flags |= PREV_WHITE;
2023     }
2024   buffer = pfile->buffer;
2025  update_tokens_line:
2026   result->src_loc = pfile->line_table->highest_line;
2027 
2028  skipped_white:
2029   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2030       && !pfile->overlaid_buffer)
2031     {
2032       _cpp_process_line_notes (pfile, false);
2033       result->src_loc = pfile->line_table->highest_line;
2034     }
2035   c = *buffer->cur++;
2036 
2037   if (pfile->forced_token_location_p)
2038     result->src_loc = *pfile->forced_token_location_p;
2039   else
2040     result->src_loc = linemap_position_for_column (pfile->line_table,
2041 					  CPP_BUF_COLUMN (buffer, buffer->cur));
2042 
2043   switch (c)
2044     {
2045     case ' ': case '\t': case '\f': case '\v': case '\0':
2046       result->flags |= PREV_WHITE;
2047       skip_whitespace (pfile, c);
2048       goto skipped_white;
2049 
2050     case '\n':
2051       if (buffer->cur < buffer->rlimit)
2052 	CPP_INCREMENT_LINE (pfile, 0);
2053       buffer->need_line = true;
2054       goto fresh_line;
2055 
2056     case '0': case '1': case '2': case '3': case '4':
2057     case '5': case '6': case '7': case '8': case '9':
2058       {
2059 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2060 	result->type = CPP_NUMBER;
2061 	lex_number (pfile, &result->val.str, &nst);
2062 	warn_about_normalization (pfile, result, &nst);
2063 	break;
2064       }
2065 
2066     case 'L':
2067     case 'u':
2068     case 'U':
2069     case 'R':
2070       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2071 	 wide strings or raw strings.  */
2072       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2073 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2074 	{
2075 	  if ((*buffer->cur == '\'' && c != 'R')
2076 	      || *buffer->cur == '"'
2077 	      || (*buffer->cur == 'R'
2078 		  && c != 'R'
2079 		  && buffer->cur[1] == '"'
2080 		  && CPP_OPTION (pfile, rliterals))
2081 	      || (*buffer->cur == '8'
2082 		  && c == 'u'
2083 		  && (buffer->cur[1] == '"'
2084 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2085 			  && CPP_OPTION (pfile, rliterals)))))
2086 	    {
2087 	      lex_string (pfile, result, buffer->cur - 1);
2088 	      break;
2089 	    }
2090 	}
2091       /* Fall through.  */
2092 
2093     case '_':
2094     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2095     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2096     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2097     case 's': case 't':           case 'v': case 'w': case 'x':
2098     case 'y': case 'z':
2099     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2100     case 'G': case 'H': case 'I': case 'J': case 'K':
2101     case 'M': case 'N': case 'O': case 'P': case 'Q':
2102     case 'S': case 'T':           case 'V': case 'W': case 'X':
2103     case 'Y': case 'Z':
2104       result->type = CPP_NAME;
2105       {
2106 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2107 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2108 						&nst);
2109 	warn_about_normalization (pfile, result, &nst);
2110       }
2111 
2112       /* Convert named operators to their proper types.  */
2113       if (result->val.node.node->flags & NODE_OPERATOR)
2114 	{
2115 	  result->flags |= NAMED_OP;
2116 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2117 	}
2118       break;
2119 
2120     case '\'':
2121     case '"':
2122       lex_string (pfile, result, buffer->cur - 1);
2123       break;
2124 
2125     case '/':
2126       /* A potential block or line comment.  */
2127       comment_start = buffer->cur;
2128       c = *buffer->cur;
2129 
2130       if (c == '*')
2131 	{
2132 	  if (_cpp_skip_block_comment (pfile))
2133 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2134 	}
2135       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
2136 			    || cpp_in_system_header (pfile)))
2137 	{
2138 	  /* Warn about comments only if pedantically GNUC89, and not
2139 	     in system headers.  */
2140 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
2141 	      && ! buffer->warned_cplusplus_comments)
2142 	    {
2143 	      cpp_error (pfile, CPP_DL_PEDWARN,
2144 			 "C++ style comments are not allowed in ISO C90");
2145 	      cpp_error (pfile, CPP_DL_PEDWARN,
2146 			 "(this will be reported only once per input file)");
2147 	      buffer->warned_cplusplus_comments = 1;
2148 	    }
2149 
2150 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2151 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2152 	}
2153       else if (c == '=')
2154 	{
2155 	  buffer->cur++;
2156 	  result->type = CPP_DIV_EQ;
2157 	  break;
2158 	}
2159       else
2160 	{
2161 	  result->type = CPP_DIV;
2162 	  break;
2163 	}
2164 
2165       if (!pfile->state.save_comments)
2166 	{
2167 	  result->flags |= PREV_WHITE;
2168 	  goto update_tokens_line;
2169 	}
2170 
2171       /* Save the comment as a token in its own right.  */
2172       save_comment (pfile, result, comment_start, c);
2173       break;
2174 
2175     case '<':
2176       if (pfile->state.angled_headers)
2177 	{
2178 	  lex_string (pfile, result, buffer->cur - 1);
2179 	  if (result->type != CPP_LESS)
2180 	    break;
2181 	}
2182 
2183       result->type = CPP_LESS;
2184       if (*buffer->cur == '=')
2185 	buffer->cur++, result->type = CPP_LESS_EQ;
2186       else if (*buffer->cur == '<')
2187 	{
2188 	  buffer->cur++;
2189 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2190 	}
2191       else if (CPP_OPTION (pfile, digraphs))
2192 	{
2193 	  if (*buffer->cur == ':')
2194 	    {
2195 	      buffer->cur++;
2196 	      result->flags |= DIGRAPH;
2197 	      result->type = CPP_OPEN_SQUARE;
2198 	    }
2199 	  else if (*buffer->cur == '%')
2200 	    {
2201 	      buffer->cur++;
2202 	      result->flags |= DIGRAPH;
2203 	      result->type = CPP_OPEN_BRACE;
2204 	    }
2205 	}
2206       break;
2207 
2208     case '>':
2209       result->type = CPP_GREATER;
2210       if (*buffer->cur == '=')
2211 	buffer->cur++, result->type = CPP_GREATER_EQ;
2212       else if (*buffer->cur == '>')
2213 	{
2214 	  buffer->cur++;
2215 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2216 	}
2217       break;
2218 
2219     case '%':
2220       result->type = CPP_MOD;
2221       if (*buffer->cur == '=')
2222 	buffer->cur++, result->type = CPP_MOD_EQ;
2223       else if (CPP_OPTION (pfile, digraphs))
2224 	{
2225 	  if (*buffer->cur == ':')
2226 	    {
2227 	      buffer->cur++;
2228 	      result->flags |= DIGRAPH;
2229 	      result->type = CPP_HASH;
2230 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
2231 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2232 	    }
2233 	  else if (*buffer->cur == '>')
2234 	    {
2235 	      buffer->cur++;
2236 	      result->flags |= DIGRAPH;
2237 	      result->type = CPP_CLOSE_BRACE;
2238 	    }
2239 	}
2240       break;
2241 
2242     case '.':
2243       result->type = CPP_DOT;
2244       if (ISDIGIT (*buffer->cur))
2245 	{
2246 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2247 	  result->type = CPP_NUMBER;
2248 	  lex_number (pfile, &result->val.str, &nst);
2249 	  warn_about_normalization (pfile, result, &nst);
2250 	}
2251       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2252 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
2253       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2254 	buffer->cur++, result->type = CPP_DOT_STAR;
2255       break;
2256 
2257     case '+':
2258       result->type = CPP_PLUS;
2259       if (*buffer->cur == '+')
2260 	buffer->cur++, result->type = CPP_PLUS_PLUS;
2261       else if (*buffer->cur == '=')
2262 	buffer->cur++, result->type = CPP_PLUS_EQ;
2263       break;
2264 
2265     case '-':
2266       result->type = CPP_MINUS;
2267       if (*buffer->cur == '>')
2268 	{
2269 	  buffer->cur++;
2270 	  result->type = CPP_DEREF;
2271 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2272 	    buffer->cur++, result->type = CPP_DEREF_STAR;
2273 	}
2274       else if (*buffer->cur == '-')
2275 	buffer->cur++, result->type = CPP_MINUS_MINUS;
2276       else if (*buffer->cur == '=')
2277 	buffer->cur++, result->type = CPP_MINUS_EQ;
2278       break;
2279 
2280     case '&':
2281       result->type = CPP_AND;
2282       if (*buffer->cur == '&')
2283 	buffer->cur++, result->type = CPP_AND_AND;
2284       else if (*buffer->cur == '=')
2285 	buffer->cur++, result->type = CPP_AND_EQ;
2286       break;
2287 
2288     case '|':
2289       result->type = CPP_OR;
2290       if (*buffer->cur == '|')
2291 	buffer->cur++, result->type = CPP_OR_OR;
2292       else if (*buffer->cur == '=')
2293 	buffer->cur++, result->type = CPP_OR_EQ;
2294       break;
2295 
2296     case ':':
2297       result->type = CPP_COLON;
2298       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2299 	buffer->cur++, result->type = CPP_SCOPE;
2300       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2301 	{
2302 	  buffer->cur++;
2303 	  result->flags |= DIGRAPH;
2304 	  result->type = CPP_CLOSE_SQUARE;
2305 	}
2306       break;
2307 
2308     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2309     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2310     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2311     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2312     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2313 
2314     case '?': result->type = CPP_QUERY; break;
2315     case '~': result->type = CPP_COMPL; break;
2316     case ',': result->type = CPP_COMMA; break;
2317     case '(': result->type = CPP_OPEN_PAREN; break;
2318     case ')': result->type = CPP_CLOSE_PAREN; break;
2319     case '[': result->type = CPP_OPEN_SQUARE; break;
2320     case ']': result->type = CPP_CLOSE_SQUARE; break;
2321     case '{': result->type = CPP_OPEN_BRACE; break;
2322     case '}': result->type = CPP_CLOSE_BRACE; break;
2323     case ';': result->type = CPP_SEMICOLON; break;
2324 
2325       /* @ is a punctuator in Objective-C.  */
2326     case '@': result->type = CPP_ATSIGN; break;
2327 
2328     case '$':
2329     case '\\':
2330       {
2331 	const uchar *base = --buffer->cur;
2332 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2333 
2334 	if (forms_identifier_p (pfile, true, &nst))
2335 	  {
2336 	    result->type = CPP_NAME;
2337 	    result->val.node.node = lex_identifier (pfile, base, true, &nst);
2338 	    warn_about_normalization (pfile, result, &nst);
2339 	    break;
2340 	  }
2341 	buffer->cur++;
2342       }
2343 
2344     default:
2345       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2346       break;
2347     }
2348 
2349   return result;
2350 }
2351 
2352 /* An upper bound on the number of bytes needed to spell TOKEN.
2353    Does not include preceding whitespace.  */
2354 unsigned int
2355 cpp_token_len (const cpp_token *token)
2356 {
2357   unsigned int len;
2358 
2359   switch (TOKEN_SPELL (token))
2360     {
2361     default:		len = 6;				break;
2362     case SPELL_LITERAL:	len = token->val.str.len;		break;
2363     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
2364     }
2365 
2366   return len;
2367 }
2368 
2369 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2370    Return the number of bytes read out of NAME.  (There are always
2371    10 bytes written to BUFFER.)  */
2372 
2373 static size_t
2374 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2375 {
2376   int j;
2377   int ucn_len = 0;
2378   int ucn_len_c;
2379   unsigned t;
2380   unsigned long utf32;
2381 
2382   /* Compute the length of the UTF-8 sequence.  */
2383   for (t = *name; t & 0x80; t <<= 1)
2384     ucn_len++;
2385 
2386   utf32 = *name & (0x7F >> ucn_len);
2387   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2388     {
2389       utf32 = (utf32 << 6) | (*++name & 0x3F);
2390 
2391       /* Ill-formed UTF-8.  */
2392       if ((*name & ~0x3F) != 0x80)
2393 	abort ();
2394     }
2395 
2396   *buffer++ = '\\';
2397   *buffer++ = 'U';
2398   for (j = 7; j >= 0; j--)
2399     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2400   return ucn_len;
2401 }
2402 
2403 /* Given a token TYPE corresponding to a digraph, return a pointer to
2404    the spelling of the digraph.  */
2405 static const unsigned char *
2406 cpp_digraph2name (enum cpp_ttype type)
2407 {
2408   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2409 }
2410 
2411 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2412    already contain the enough space to hold the token's spelling.
2413    Returns a pointer to the character after the last character written.
2414    FORSTRING is true if this is to be the spelling after translation
2415    phase 1 (this is different for UCNs).
2416    FIXME: Would be nice if we didn't need the PFILE argument.  */
2417 unsigned char *
2418 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2419 		 unsigned char *buffer, bool forstring)
2420 {
2421   switch (TOKEN_SPELL (token))
2422     {
2423     case SPELL_OPERATOR:
2424       {
2425 	const unsigned char *spelling;
2426 	unsigned char c;
2427 
2428 	if (token->flags & DIGRAPH)
2429 	  spelling = cpp_digraph2name (token->type);
2430 	else if (token->flags & NAMED_OP)
2431 	  goto spell_ident;
2432 	else
2433 	  spelling = TOKEN_NAME (token);
2434 
2435 	while ((c = *spelling++) != '\0')
2436 	  *buffer++ = c;
2437       }
2438       break;
2439 
2440     spell_ident:
2441     case SPELL_IDENT:
2442       if (forstring)
2443 	{
2444 	  memcpy (buffer, NODE_NAME (token->val.node.node),
2445 		  NODE_LEN (token->val.node.node));
2446 	  buffer += NODE_LEN (token->val.node.node);
2447 	}
2448       else
2449 	{
2450 	  size_t i;
2451 	  const unsigned char * name = NODE_NAME (token->val.node.node);
2452 
2453 	  for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2454 	    if (name[i] & ~0x7F)
2455 	      {
2456 		i += utf8_to_ucn (buffer, name + i) - 1;
2457 		buffer += 10;
2458 	      }
2459 	    else
2460 	      *buffer++ = NODE_NAME (token->val.node.node)[i];
2461 	}
2462       break;
2463 
2464     case SPELL_LITERAL:
2465       memcpy (buffer, token->val.str.text, token->val.str.len);
2466       buffer += token->val.str.len;
2467       break;
2468 
2469     case SPELL_NONE:
2470       cpp_error (pfile, CPP_DL_ICE,
2471 		 "unspellable token %s", TOKEN_NAME (token));
2472       break;
2473     }
2474 
2475   return buffer;
2476 }
2477 
2478 /* Returns TOKEN spelt as a null-terminated string.  The string is
2479    freed when the reader is destroyed.  Useful for diagnostics.  */
2480 unsigned char *
2481 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2482 {
2483   unsigned int len = cpp_token_len (token) + 1;
2484   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2485 
2486   end = cpp_spell_token (pfile, token, start, false);
2487   end[0] = '\0';
2488 
2489   return start;
2490 }
2491 
2492 /* Returns a pointer to a string which spells the token defined by
2493    TYPE and FLAGS.  Used by C front ends, which really should move to
2494    using cpp_token_as_text.  */
2495 const char *
2496 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2497 {
2498   if (flags & DIGRAPH)
2499     return (const char *) cpp_digraph2name (type);
2500   else if (flags & NAMED_OP)
2501     return cpp_named_operator2name (type);
2502 
2503   return (const char *) token_spellings[type].name;
2504 }
2505 
2506 /* Writes the spelling of token to FP, without any preceding space.
2507    Separated from cpp_spell_token for efficiency - to avoid stdio
2508    double-buffering.  */
2509 void
2510 cpp_output_token (const cpp_token *token, FILE *fp)
2511 {
2512   switch (TOKEN_SPELL (token))
2513     {
2514     case SPELL_OPERATOR:
2515       {
2516 	const unsigned char *spelling;
2517 	int c;
2518 
2519 	if (token->flags & DIGRAPH)
2520 	  spelling = cpp_digraph2name (token->type);
2521 	else if (token->flags & NAMED_OP)
2522 	  goto spell_ident;
2523 	else
2524 	  spelling = TOKEN_NAME (token);
2525 
2526 	c = *spelling;
2527 	do
2528 	  putc (c, fp);
2529 	while ((c = *++spelling) != '\0');
2530       }
2531       break;
2532 
2533     spell_ident:
2534     case SPELL_IDENT:
2535       {
2536 	size_t i;
2537 	const unsigned char * name = NODE_NAME (token->val.node.node);
2538 
2539 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2540 	  if (name[i] & ~0x7F)
2541 	    {
2542 	      unsigned char buffer[10];
2543 	      i += utf8_to_ucn (buffer, name + i) - 1;
2544 	      fwrite (buffer, 1, 10, fp);
2545 	    }
2546 	  else
2547 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
2548       }
2549       break;
2550 
2551     case SPELL_LITERAL:
2552       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2553       break;
2554 
2555     case SPELL_NONE:
2556       /* An error, most probably.  */
2557       break;
2558     }
2559 }
2560 
2561 /* Compare two tokens.  */
2562 int
2563 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2564 {
2565   if (a->type == b->type && a->flags == b->flags)
2566     switch (TOKEN_SPELL (a))
2567       {
2568       default:			/* Keep compiler happy.  */
2569       case SPELL_OPERATOR:
2570 	/* token_no is used to track where multiple consecutive ##
2571 	   tokens were originally located.  */
2572 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2573       case SPELL_NONE:
2574 	return (a->type != CPP_MACRO_ARG
2575 		|| a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
2576       case SPELL_IDENT:
2577 	return a->val.node.node == b->val.node.node;
2578       case SPELL_LITERAL:
2579 	return (a->val.str.len == b->val.str.len
2580 		&& !memcmp (a->val.str.text, b->val.str.text,
2581 			    a->val.str.len));
2582       }
2583 
2584   return 0;
2585 }
2586 
2587 /* Returns nonzero if a space should be inserted to avoid an
2588    accidental token paste for output.  For simplicity, it is
2589    conservative, and occasionally advises a space where one is not
2590    needed, e.g. "." and ".2".  */
2591 int
2592 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2593 		 const cpp_token *token2)
2594 {
2595   enum cpp_ttype a = token1->type, b = token2->type;
2596   cppchar_t c;
2597 
2598   if (token1->flags & NAMED_OP)
2599     a = CPP_NAME;
2600   if (token2->flags & NAMED_OP)
2601     b = CPP_NAME;
2602 
2603   c = EOF;
2604   if (token2->flags & DIGRAPH)
2605     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2606   else if (token_spellings[b].category == SPELL_OPERATOR)
2607     c = token_spellings[b].name[0];
2608 
2609   /* Quickly get everything that can paste with an '='.  */
2610   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2611     return 1;
2612 
2613   switch (a)
2614     {
2615     case CPP_GREATER:	return c == '>';
2616     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
2617     case CPP_PLUS:	return c == '+';
2618     case CPP_MINUS:	return c == '-' || c == '>';
2619     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
2620     case CPP_MOD:	return c == ':' || c == '>';
2621     case CPP_AND:	return c == '&';
2622     case CPP_OR:	return c == '|';
2623     case CPP_COLON:	return c == ':' || c == '>';
2624     case CPP_DEREF:	return c == '*';
2625     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
2626     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
2627     case CPP_NAME:	return ((b == CPP_NUMBER
2628 				 && name_p (pfile, &token2->val.str))
2629 				|| b == CPP_NAME
2630 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
2631     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
2632 				|| c == '.' || c == '+' || c == '-');
2633 				      /* UCNs */
2634     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
2635 				 && b == CPP_NAME)
2636 				|| (CPP_OPTION (pfile, objc)
2637 				    && token1->val.str.text[0] == '@'
2638 				    && (b == CPP_NAME || b == CPP_STRING)));
2639     default:		break;
2640     }
2641 
2642   return 0;
2643 }
2644 
2645 /* Output all the remaining tokens on the current line, and a newline
2646    character, to FP.  Leading whitespace is removed.  If there are
2647    macros, special token padding is not performed.  */
2648 void
2649 cpp_output_line (cpp_reader *pfile, FILE *fp)
2650 {
2651   const cpp_token *token;
2652 
2653   token = cpp_get_token (pfile);
2654   while (token->type != CPP_EOF)
2655     {
2656       cpp_output_token (token, fp);
2657       token = cpp_get_token (pfile);
2658       if (token->flags & PREV_WHITE)
2659 	putc (' ', fp);
2660     }
2661 
2662   putc ('\n', fp);
2663 }
2664 
2665 /* Return a string representation of all the remaining tokens on the
2666    current line.  The result is allocated using xmalloc and must be
2667    freed by the caller.  */
2668 unsigned char *
2669 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2670 {
2671   const cpp_token *token;
2672   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2673   unsigned int alloced = 120 + out;
2674   unsigned char *result = (unsigned char *) xmalloc (alloced);
2675 
2676   /* If DIR_NAME is empty, there are no initial contents.  */
2677   if (dir_name)
2678     {
2679       sprintf ((char *) result, "#%s ", dir_name);
2680       out += 2;
2681     }
2682 
2683   token = cpp_get_token (pfile);
2684   while (token->type != CPP_EOF)
2685     {
2686       unsigned char *last;
2687       /* Include room for a possible space and the terminating nul.  */
2688       unsigned int len = cpp_token_len (token) + 2;
2689 
2690       if (out + len > alloced)
2691 	{
2692 	  alloced *= 2;
2693 	  if (out + len > alloced)
2694 	    alloced = out + len;
2695 	  result = (unsigned char *) xrealloc (result, alloced);
2696 	}
2697 
2698       last = cpp_spell_token (pfile, token, &result[out], 0);
2699       out = last - result;
2700 
2701       token = cpp_get_token (pfile);
2702       if (token->flags & PREV_WHITE)
2703 	result[out++] = ' ';
2704     }
2705 
2706   result[out] = '\0';
2707   return result;
2708 }
2709 
2710 /* Memory buffers.  Changing these three constants can have a dramatic
2711    effect on performance.  The values here are reasonable defaults,
2712    but might be tuned.  If you adjust them, be sure to test across a
2713    range of uses of cpplib, including heavy nested function-like macro
2714    expansion.  Also check the change in peak memory usage (NJAMD is a
2715    good tool for this).  */
2716 #define MIN_BUFF_SIZE 8000
2717 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2718 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2719 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2720 
2721 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2722   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2723 #endif
2724 
2725 /* Create a new allocation buffer.  Place the control block at the end
2726    of the buffer, so that buffer overflows will cause immediate chaos.  */
2727 static _cpp_buff *
2728 new_buff (size_t len)
2729 {
2730   _cpp_buff *result;
2731   unsigned char *base;
2732 
2733   if (len < MIN_BUFF_SIZE)
2734     len = MIN_BUFF_SIZE;
2735   len = CPP_ALIGN (len);
2736 
2737   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2738   result = (_cpp_buff *) (base + len);
2739   result->base = base;
2740   result->cur = base;
2741   result->limit = base + len;
2742   result->next = NULL;
2743   return result;
2744 }
2745 
2746 /* Place a chain of unwanted allocation buffers on the free list.  */
2747 void
2748 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2749 {
2750   _cpp_buff *end = buff;
2751 
2752   while (end->next)
2753     end = end->next;
2754   end->next = pfile->free_buffs;
2755   pfile->free_buffs = buff;
2756 }
2757 
2758 /* Return a free buffer of size at least MIN_SIZE.  */
2759 _cpp_buff *
2760 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2761 {
2762   _cpp_buff *result, **p;
2763 
2764   for (p = &pfile->free_buffs;; p = &(*p)->next)
2765     {
2766       size_t size;
2767 
2768       if (*p == NULL)
2769 	return new_buff (min_size);
2770       result = *p;
2771       size = result->limit - result->base;
2772       /* Return a buffer that's big enough, but don't waste one that's
2773          way too big.  */
2774       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2775 	break;
2776     }
2777 
2778   *p = result->next;
2779   result->next = NULL;
2780   result->cur = result->base;
2781   return result;
2782 }
2783 
2784 /* Creates a new buffer with enough space to hold the uncommitted
2785    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2786    the excess bytes to the new buffer.  Chains the new buffer after
2787    BUFF, and returns the new buffer.  */
2788 _cpp_buff *
2789 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2790 {
2791   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2792   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2793 
2794   buff->next = new_buff;
2795   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2796   return new_buff;
2797 }
2798 
2799 /* Creates a new buffer with enough space to hold the uncommitted
2800    remaining bytes of the buffer pointed to by BUFF, and at least
2801    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2802    Chains the new buffer before the buffer pointed to by BUFF, and
2803    updates the pointer to point to the new buffer.  */
2804 void
2805 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2806 {
2807   _cpp_buff *new_buff, *old_buff = *pbuff;
2808   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2809 
2810   new_buff = _cpp_get_buff (pfile, size);
2811   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2812   new_buff->next = old_buff;
2813   *pbuff = new_buff;
2814 }
2815 
2816 /* Free a chain of buffers starting at BUFF.  */
2817 void
2818 _cpp_free_buff (_cpp_buff *buff)
2819 {
2820   _cpp_buff *next;
2821 
2822   for (; buff; buff = next)
2823     {
2824       next = buff->next;
2825       free (buff->base);
2826     }
2827 }
2828 
2829 /* Allocate permanent, unaligned storage of length LEN.  */
2830 unsigned char *
2831 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2832 {
2833   _cpp_buff *buff = pfile->u_buff;
2834   unsigned char *result = buff->cur;
2835 
2836   if (len > (size_t) (buff->limit - result))
2837     {
2838       buff = _cpp_get_buff (pfile, len);
2839       buff->next = pfile->u_buff;
2840       pfile->u_buff = buff;
2841       result = buff->cur;
2842     }
2843 
2844   buff->cur = result + len;
2845   return result;
2846 }
2847 
2848 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2849    That buffer is used for growing allocations when saving macro
2850    replacement lists in a #define, and when parsing an answer to an
2851    assertion in #assert, #unassert or #if (and therefore possibly
2852    whilst expanding macros).  It therefore must not be used by any
2853    code that they might call: specifically the lexer and the guts of
2854    the macro expander.
2855 
2856    All existing other uses clearly fit this restriction: storing
2857    registered pragmas during initialization.  */
2858 unsigned char *
2859 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2860 {
2861   _cpp_buff *buff = pfile->a_buff;
2862   unsigned char *result = buff->cur;
2863 
2864   if (len > (size_t) (buff->limit - result))
2865     {
2866       buff = _cpp_get_buff (pfile, len);
2867       buff->next = pfile->a_buff;
2868       pfile->a_buff = buff;
2869       result = buff->cur;
2870     }
2871 
2872   buff->cur = result + len;
2873   return result;
2874 }
2875 
2876 /* Say which field of TOK is in use.  */
2877 
2878 enum cpp_token_fld_kind
2879 cpp_token_val_index (cpp_token *tok)
2880 {
2881   switch (TOKEN_SPELL (tok))
2882     {
2883     case SPELL_IDENT:
2884       return CPP_TOKEN_FLD_NODE;
2885     case SPELL_LITERAL:
2886       return CPP_TOKEN_FLD_STR;
2887     case SPELL_OPERATOR:
2888       if (tok->type == CPP_PASTE)
2889 	return CPP_TOKEN_FLD_TOKEN_NO;
2890       else
2891 	return CPP_TOKEN_FLD_NONE;
2892     case SPELL_NONE:
2893       if (tok->type == CPP_MACRO_ARG)
2894 	return CPP_TOKEN_FLD_ARG_NO;
2895       else if (tok->type == CPP_PADDING)
2896 	return CPP_TOKEN_FLD_SOURCE;
2897       else if (tok->type == CPP_PRAGMA)
2898 	return CPP_TOKEN_FLD_PRAGMA;
2899       /* else fall through */
2900     default:
2901       return CPP_TOKEN_FLD_NONE;
2902     }
2903 }
2904 
2905 /* All tokens lexed in R after calling this function will be forced to have
2906    their source_location the same as the location referenced by P, until
2907    cpp_stop_forcing_token_locations is called for R.  */
2908 
2909 void
2910 cpp_force_token_locations (cpp_reader *r, source_location *p)
2911 {
2912   r->forced_token_location_p = p;
2913 }
2914 
2915 /* Go back to assigning locations naturally for lexed tokens.  */
2916 
2917 void
2918 cpp_stop_forcing_token_locations (cpp_reader *r)
2919 {
2920   r->forced_token_location_p = NULL;
2921 }
2922