1 /* CPP Library - lexical analysis.
2    Copyright (C) 2000-2013 Free Software Foundation, Inc.
3    Contributed by Per Bothner, 1994-95.
4    Based on CCCP program by Paul Rubin, June 1986
5    Adapted to ANSI C, Richard Stallman, Jan 1987
6    Broken out to separate file, Zack Weinberg, Mar 2000
7 
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26 
27 enum spell_type
28 {
29   SPELL_OPERATOR = 0,
30   SPELL_IDENT,
31   SPELL_LITERAL,
32   SPELL_NONE
33 };
34 
35 struct token_spelling
36 {
37   enum spell_type category;
38   const unsigned char *name;
39 };
40 
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43 
44 #define OP(e, s) { SPELL_OPERATOR, UC s  },
45 #define TK(e, s) { SPELL_ ## s,    UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49 
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52 
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 			    unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
64 
65 static _cpp_buff *new_buff (size_t);
66 
67 
68 /* Utility routine:
69 
70    Compares, the token TOKEN to the NUL-terminated string STRING.
71    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
72 int
cpp_ideq(const cpp_token * token,const char * string)73 cpp_ideq (const cpp_token *token, const char *string)
74 {
75   if (token->type != CPP_NAME)
76     return 0;
77 
78   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
79 }
80 
81 /* Record a note TYPE at byte POS into the current cleaned logical
82    line.  */
83 static void
add_line_note(cpp_buffer * buffer,const uchar * pos,unsigned int type)84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 {
86   if (buffer->notes_used == buffer->notes_cap)
87     {
88       buffer->notes_cap = buffer->notes_cap * 2 + 200;
89       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90                                   buffer->notes_cap);
91     }
92 
93   buffer->notes[buffer->notes_used].pos = pos;
94   buffer->notes[buffer->notes_used].type = type;
95   buffer->notes_used++;
96 }
97 
98 
99 /* Fast path to find line special characters using optimized character
100    scanning algorithms.  Anything complicated falls back to the slow
101    path below.  Since this loop is very hot it's worth doing these kinds
102    of optimizations.
103 
104    One of the paths through the ifdefs should provide
105 
106      const uchar *search_line_fast (const uchar *s, const uchar *end);
107 
108    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
109    the found character.
110 
111    Note that the last character of the buffer is *always* a newline,
112    as forced by _cpp_convert_input.  This fact can be used to avoid
113    explicitly looking for the end of the buffer.  */
114 
115 /* Configure gives us an ifdef test.  */
116 #ifndef WORDS_BIGENDIAN
117 #define WORDS_BIGENDIAN 0
118 #endif
119 
120 /* We'd like the largest integer that fits into a register.  There's nothing
121    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
122    but MS decided on an LLP64 model.  Thankfully when building with GCC we
123    can get the "real" word size.  */
124 #ifdef __GNUC__
125 typedef unsigned int word_type __attribute__((__mode__(__word__)));
126 #else
127 typedef unsigned long word_type;
128 #endif
129 
130 /* The code below is only expecting sizes 4 or 8.
131    Die at compile-time if this expectation is violated.  */
132 typedef char check_word_type_size
133   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134 
135 /* Return X with the first N bytes forced to values that won't match one
136    of the interesting characters.  Note that NUL is not interesting.  */
137 
138 static inline word_type
acc_char_mask_misalign(word_type val,unsigned int n)139 acc_char_mask_misalign (word_type val, unsigned int n)
140 {
141   word_type mask = -1;
142   if (WORDS_BIGENDIAN)
143     mask >>= n * 8;
144   else
145     mask <<= n * 8;
146   return val & mask;
147 }
148 
149 /* Return X replicated to all byte positions within WORD_TYPE.  */
150 
151 static inline word_type
acc_char_replicate(uchar x)152 acc_char_replicate (uchar x)
153 {
154   word_type ret;
155 
156   ret = (x << 24) | (x << 16) | (x << 8) | x;
157   if (sizeof(word_type) == 8)
158     ret = (ret << 16 << 16) | ret;
159   return ret;
160 }
161 
162 /* Return non-zero if some byte of VAL is (probably) C.  */
163 
164 static inline word_type
acc_char_cmp(word_type val,word_type c)165 acc_char_cmp (word_type val, word_type c)
166 {
167 #if defined(__GNUC__) && defined(__alpha__)
168   /* We can get exact results using a compare-bytes instruction.
169      Get (val == c) via (0 >= (val ^ c)).  */
170   return __builtin_alpha_cmpbge (0, val ^ c);
171 #else
172   word_type magic = 0x7efefefeU;
173   if (sizeof(word_type) == 8)
174     magic = (magic << 16 << 16) | 0xfefefefeU;
175   magic |= 1;
176 
177   val ^= c;
178   return ((val + magic) ^ ~val) & ~magic;
179 #endif
180 }
181 
182 /* Given the result of acc_char_cmp is non-zero, return the index of
183    the found character.  If this was a false positive, return -1.  */
184 
185 static inline int
acc_char_index(word_type cmp ATTRIBUTE_UNUSED,word_type val ATTRIBUTE_UNUSED)186 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 		word_type val ATTRIBUTE_UNUSED)
188 {
189 #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190   /* The cmpbge instruction sets *bits* of the result corresponding to
191      matches in the bytes with no false positives.  */
192   return __builtin_ctzl (cmp);
193 #else
194   unsigned int i;
195 
196   /* ??? It would be nice to force unrolling here,
197      and have all of these constants folded.  */
198   for (i = 0; i < sizeof(word_type); ++i)
199     {
200       uchar c;
201       if (WORDS_BIGENDIAN)
202 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203       else
204 	c = (val >> i * 8) & 0xff;
205 
206       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 	return i;
208     }
209 
210   return -1;
211 #endif
212 }
213 
214 /* A version of the fast scanner using bit fiddling techniques.
215 
216    For 32-bit words, one would normally perform 16 comparisons and
217    16 branches.  With this algorithm one performs 24 arithmetic
218    operations and one branch.  Whether this is faster with a 32-bit
219    word size is going to be somewhat system dependent.
220 
221    For 64-bit words, we eliminate twice the number of comparisons
222    and branches without increasing the number of arithmetic operations.
223    It's almost certainly going to be a win with 64-bit word size.  */
224 
225 static const uchar * search_line_acc_char (const uchar *, const uchar *)
226   ATTRIBUTE_UNUSED;
227 
228 static const uchar *
search_line_acc_char(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)229 search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230 {
231   const word_type repl_nl = acc_char_replicate ('\n');
232   const word_type repl_cr = acc_char_replicate ('\r');
233   const word_type repl_bs = acc_char_replicate ('\\');
234   const word_type repl_qm = acc_char_replicate ('?');
235 
236   unsigned int misalign;
237   const word_type *p;
238   word_type val, t;
239 
240   /* Align the buffer.  Mask out any bytes from before the beginning.  */
241   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242   val = *p;
243   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244   if (misalign)
245     val = acc_char_mask_misalign (val, misalign);
246 
247   /* Main loop.  */
248   while (1)
249     {
250       t  = acc_char_cmp (val, repl_nl);
251       t |= acc_char_cmp (val, repl_cr);
252       t |= acc_char_cmp (val, repl_bs);
253       t |= acc_char_cmp (val, repl_qm);
254 
255       if (__builtin_expect (t != 0, 0))
256 	{
257 	  int i = acc_char_index (t, val);
258 	  if (i >= 0)
259 	    return (const uchar *)p + i;
260 	}
261 
262       val = *++p;
263     }
264 }
265 
266 /* Disable on Solaris 2/x86 until the following problems can be properly
267    autoconfed:
268 
269    The Solaris 9 assembler cannot assemble SSE4.2 insns.
270    Before Solaris 9 Update 6, SSE insns cannot be executed.
271    The Solaris 10+ assembler tags objects with the instruction set
272    extensions used, so SSE4.2 executables cannot run on machines that
273    don't support that extension.  */
274 
275 #if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
276 
277 /* Replicated character data to be shared between implementations.
278    Recall that outside of a context with vector support we can't
279    define compatible vector types, therefore these are all defined
280    in terms of raw characters.  */
281 static const char repl_chars[4][16] __attribute__((aligned(16))) = {
282   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
283     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
284   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
285     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
286   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
287     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
288   { '?', '?', '?', '?', '?', '?', '?', '?',
289     '?', '?', '?', '?', '?', '?', '?', '?' },
290 };
291 
292 /* A version of the fast scanner using MMX vectorized byte compare insns.
293 
294    This uses the PMOVMSKB instruction which was introduced with "MMX2",
295    which was packaged into SSE1; it is also present in the AMD MMX
296    extension.  Mark the function as using "sse" so that we emit a real
297    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
298 
299 static const uchar *
300 #ifndef __SSE__
301 __attribute__((__target__("sse")))
302 #endif
search_line_mmx(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)303 search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
304 {
305   typedef char v8qi __attribute__ ((__vector_size__ (8)));
306   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
307 
308   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
309   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
310   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
311   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
312 
313   unsigned int misalign, found, mask;
314   const v8qi *p;
315   v8qi data, t, c;
316 
317   /* Align the source pointer.  While MMX doesn't generate unaligned data
318      faults, this allows us to safely scan to the end of the buffer without
319      reading beyond the end of the last page.  */
320   misalign = (uintptr_t)s & 7;
321   p = (const v8qi *)((uintptr_t)s & -8);
322   data = *p;
323 
324   /* Create a mask for the bytes that are valid within the first
325      16-byte block.  The Idea here is that the AND with the mask
326      within the loop is "free", since we need some AND or TEST
327      insn in order to set the flags for the branch anyway.  */
328   mask = -1u << misalign;
329 
330   /* Main loop processing 8 bytes at a time.  */
331   goto start;
332   do
333     {
334       data = *++p;
335       mask = -1;
336 
337     start:
338       t = __builtin_ia32_pcmpeqb(data, repl_nl);
339       c = __builtin_ia32_pcmpeqb(data, repl_cr);
340       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341       c = __builtin_ia32_pcmpeqb(data, repl_bs);
342       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343       c = __builtin_ia32_pcmpeqb(data, repl_qm);
344       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
345       found = __builtin_ia32_pmovmskb (t);
346       found &= mask;
347     }
348   while (!found);
349 
350   __builtin_ia32_emms ();
351 
352   /* FOUND contains 1 in bits for which we matched a relevant
353      character.  Conversion to the byte index is trivial.  */
354   found = __builtin_ctz(found);
355   return (const uchar *)p + found;
356 }
357 
358 /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
359 
360 static const uchar *
361 #ifndef __SSE2__
362 __attribute__((__target__("sse2")))
363 #endif
search_line_sse2(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)364 search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
365 {
366   typedef char v16qi __attribute__ ((__vector_size__ (16)));
367 
368   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
369   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
370   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
371   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
372 
373   unsigned int misalign, found, mask;
374   const v16qi *p;
375   v16qi data, t;
376 
377   /* Align the source pointer.  */
378   misalign = (uintptr_t)s & 15;
379   p = (const v16qi *)((uintptr_t)s & -16);
380   data = *p;
381 
382   /* Create a mask for the bytes that are valid within the first
383      16-byte block.  The Idea here is that the AND with the mask
384      within the loop is "free", since we need some AND or TEST
385      insn in order to set the flags for the branch anyway.  */
386   mask = -1u << misalign;
387 
388   /* Main loop processing 16 bytes at a time.  */
389   goto start;
390   do
391     {
392       data = *++p;
393       mask = -1;
394 
395     start:
396       t  = __builtin_ia32_pcmpeqb128(data, repl_nl);
397       t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
398       t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
399       t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
400       found = __builtin_ia32_pmovmskb128 (t);
401       found &= mask;
402     }
403   while (!found);
404 
405   /* FOUND contains 1 in bits for which we matched a relevant
406      character.  Conversion to the byte index is trivial.  */
407   found = __builtin_ctz(found);
408   return (const uchar *)p + found;
409 }
410 
411 #ifdef HAVE_SSE4
412 /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
413 
414 static const uchar *
415 #ifndef __SSE4_2__
416 __attribute__((__target__("sse4.2")))
417 #endif
search_line_sse42(const uchar * s,const uchar * end)418 search_line_sse42 (const uchar *s, const uchar *end)
419 {
420   typedef char v16qi __attribute__ ((__vector_size__ (16)));
421   static const v16qi search = { '\n', '\r', '?', '\\' };
422 
423   uintptr_t si = (uintptr_t)s;
424   uintptr_t index;
425 
426   /* Check for unaligned input.  */
427   if (si & 15)
428     {
429       v16qi sv;
430 
431       if (__builtin_expect (end - s < 16, 0)
432 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
433 	{
434 	  /* There are less than 16 bytes left in the buffer, and less
435 	     than 16 bytes left on the page.  Reading 16 bytes at this
436 	     point might generate a spurious page fault.  Defer to the
437 	     SSE2 implementation, which already handles alignment.  */
438 	  return search_line_sse2 (s, end);
439 	}
440 
441       /* ??? The builtin doesn't understand that the PCMPESTRI read from
442 	 memory need not be aligned.  */
443       sv = __builtin_ia32_loaddqu ((const char *) s);
444       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
445 
446       if (__builtin_expect (index < 16, 0))
447 	goto found;
448 
449       /* Advance the pointer to an aligned address.  We will re-scan a
450 	 few bytes, but we no longer need care for reading past the
451 	 end of a page, since we're guaranteed a match.  */
452       s = (const uchar *)((si + 16) & -16);
453     }
454 
455   /* Main loop, processing 16 bytes at a time.  By doing the whole loop
456      in inline assembly, we can make proper use of the flags set.  */
457   __asm (      "sub $16, %1\n"
458 	"	.balign 16\n"
459 	"0:	add $16, %1\n"
460 	"	%vpcmpestri $0, (%1), %2\n"
461 	"	jnc 0b"
462 	: "=&c"(index), "+r"(s)
463 	: "x"(search), "a"(4), "d"(16));
464 
465  found:
466   return s + index;
467 }
468 
469 #else
470 /* Work around out-dated assemblers without sse4 support.  */
471 #define search_line_sse42 search_line_sse2
472 #endif
473 
474 /* Check the CPU capabilities.  */
475 
476 #include "../gcc/config/i386/cpuid.h"
477 
478 typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
479 static search_line_fast_type search_line_fast;
480 
481 #define HAVE_init_vectorized_lexer 1
482 static inline void
init_vectorized_lexer(void)483 init_vectorized_lexer (void)
484 {
485   unsigned dummy, ecx = 0, edx = 0;
486   search_line_fast_type impl = search_line_acc_char;
487   int minimum = 0;
488 
489 #if defined(__SSE4_2__)
490   minimum = 3;
491 #elif defined(__SSE2__)
492   minimum = 2;
493 #elif defined(__SSE__)
494   minimum = 1;
495 #endif
496 
497   if (minimum == 3)
498     impl = search_line_sse42;
499   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
500     {
501       if (minimum == 3 || (ecx & bit_SSE4_2))
502         impl = search_line_sse42;
503       else if (minimum == 2 || (edx & bit_SSE2))
504 	impl = search_line_sse2;
505       else if (minimum == 1 || (edx & bit_SSE))
506 	impl = search_line_mmx;
507     }
508   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
509     {
510       if (minimum == 1
511 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
512 	impl = search_line_mmx;
513     }
514 
515   search_line_fast = impl;
516 }
517 
518 #elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
519 
520 /* A vection of the fast scanner using AltiVec vectorized byte compares
521    and VSX unaligned loads (when VSX is available).  This is otherwise
522    the same as the pre-GCC 5 version.  */
523 
524 static const uchar *
search_line_fast(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)525 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
526 {
527   typedef __attribute__((altivec(vector))) unsigned char vc;
528 
529   const vc repl_nl = {
530     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
531     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
532   };
533   const vc repl_cr = {
534     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
535     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
536   };
537   const vc repl_bs = {
538     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
539     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
540   };
541   const vc repl_qm = {
542     '?', '?', '?', '?', '?', '?', '?', '?',
543     '?', '?', '?', '?', '?', '?', '?', '?',
544   };
545   const vc zero = { 0 };
546 
547   vc data, t;
548 
549   /* Main loop processing 16 bytes at a time.  */
550   do
551     {
552       vc m_nl, m_cr, m_bs, m_qm;
553 
554       data = *((const vc *)s);
555       s += 16;
556 
557       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
558       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
559       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
560       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
561       t = (m_nl | m_cr) | (m_bs | m_qm);
562 
563       /* T now contains 0xff in bytes for which we matched one of the relevant
564 	 characters.  We want to exit the loop if any byte in T is non-zero.
565 	 Below is the expansion of vec_any_ne(t, zero).  */
566     }
567   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
568 
569   /* Restore s to to point to the 16 bytes we just processed.  */
570   s -= 16;
571 
572   {
573 #define N  (sizeof(vc) / sizeof(long))
574 
575     union {
576       vc v;
577       /* Statically assert that N is 2 or 4.  */
578       unsigned long l[(N == 2 || N == 4) ? N : -1];
579     } u;
580     unsigned long l, i = 0;
581 
582     u.v = t;
583 
584     /* Find the first word of T that is non-zero.  */
585     switch (N)
586       {
587       case 4:
588 	l = u.l[i++];
589 	if (l != 0)
590 	  break;
591 	s += sizeof(unsigned long);
592 	l = u.l[i++];
593 	if (l != 0)
594 	  break;
595 	s += sizeof(unsigned long);
596       case 2:
597 	l = u.l[i++];
598 	if (l != 0)
599 	  break;
600 	s += sizeof(unsigned long);
601 	l = u.l[i];
602       }
603 
604     /* L now contains 0xff in bytes for which we matched one of the
605        relevant characters.  We can find the byte index by finding
606        its bit index and dividing by 8.  */
607 #ifdef __BIG_ENDIAN__
608     l = __builtin_clzl(l) >> 3;
609 #else
610     l = __builtin_ctzl(l) >> 3;
611 #endif
612     return s + l;
613 
614 #undef N
615   }
616 }
617 
618 #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
619 
620 /* A vection of the fast scanner using AltiVec vectorized byte compares.
621    This cannot be used for little endian because vec_lvsl/lvsr are
622    deprecated for little endian and the code won't work properly.  */
623 /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
624    so we can't compile this function without -maltivec on the command line
625    (or implied by some other switch).  */
626 
627 static const uchar *
search_line_fast(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)628 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
629 {
630   typedef __attribute__((altivec(vector))) unsigned char vc;
631 
632   const vc repl_nl = {
633     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
634     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
635   };
636   const vc repl_cr = {
637     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
638     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
639   };
640   const vc repl_bs = {
641     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
642     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
643   };
644   const vc repl_qm = {
645     '?', '?', '?', '?', '?', '?', '?', '?',
646     '?', '?', '?', '?', '?', '?', '?', '?',
647   };
648   const vc ones = {
649     -1, -1, -1, -1, -1, -1, -1, -1,
650     -1, -1, -1, -1, -1, -1, -1, -1,
651   };
652   const vc zero = { 0 };
653 
654   vc data, mask, t;
655 
656   /* Altivec loads automatically mask addresses with -16.  This lets us
657      issue the first load as early as possible.  */
658   data = __builtin_vec_ld(0, (const vc *)s);
659 
660   /* Discard bytes before the beginning of the buffer.  Do this by
661      beginning with all ones and shifting in zeros according to the
662      mis-alignment.  The LVSR instruction pulls the exact shift we
663      want from the address.  */
664   mask = __builtin_vec_lvsr(0, s);
665   mask = __builtin_vec_perm(zero, ones, mask);
666   data &= mask;
667 
668   /* While altivec loads mask addresses, we still need to align S so
669      that the offset we compute at the end is correct.  */
670   s = (const uchar *)((uintptr_t)s & -16);
671 
672   /* Main loop processing 16 bytes at a time.  */
673   goto start;
674   do
675     {
676       vc m_nl, m_cr, m_bs, m_qm;
677 
678       s += 16;
679       data = __builtin_vec_ld(0, (const vc *)s);
680 
681     start:
682       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
683       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
684       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
685       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
686       t = (m_nl | m_cr) | (m_bs | m_qm);
687 
688       /* T now contains 0xff in bytes for which we matched one of the relevant
689 	 characters.  We want to exit the loop if any byte in T is non-zero.
690 	 Below is the expansion of vec_any_ne(t, zero).  */
691     }
692   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
693 
694   {
695 #define N  (sizeof(vc) / sizeof(long))
696 
697     union {
698       vc v;
699       /* Statically assert that N is 2 or 4.  */
700       unsigned long l[(N == 2 || N == 4) ? N : -1];
701     } u;
702     unsigned long l, i = 0;
703 
704     u.v = t;
705 
706     /* Find the first word of T that is non-zero.  */
707     switch (N)
708       {
709       case 4:
710 	l = u.l[i++];
711 	if (l != 0)
712 	  break;
713 	s += sizeof(unsigned long);
714 	l = u.l[i++];
715 	if (l != 0)
716 	  break;
717 	s += sizeof(unsigned long);
718       case 2:
719 	l = u.l[i++];
720 	if (l != 0)
721 	  break;
722 	s += sizeof(unsigned long);
723 	l = u.l[i];
724       }
725 
726     /* L now contains 0xff in bytes for which we matched one of the
727        relevant characters.  We can find the byte index by finding
728        its bit index and dividing by 8.  */
729     l = __builtin_clzl(l) >> 3;
730     return s + l;
731 
732 #undef N
733   }
734 }
735 
736 #elif defined (__ARM_NEON__)
737 #include "arm_neon.h"
738 
739 static const uchar *
search_line_fast(const uchar * s,const uchar * end ATTRIBUTE_UNUSED)740 search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
741 {
742   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
743   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
744   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
745   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
746   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
747 
748   unsigned int misalign, found, mask;
749   const uint8_t *p;
750   uint8x16_t data;
751 
752   /* Align the source pointer.  */
753   misalign = (uintptr_t)s & 15;
754   p = (const uint8_t *)((uintptr_t)s & -16);
755   data = vld1q_u8 (p);
756 
757   /* Create a mask for the bytes that are valid within the first
758      16-byte block.  The Idea here is that the AND with the mask
759      within the loop is "free", since we need some AND or TEST
760      insn in order to set the flags for the branch anyway.  */
761   mask = (-1u << misalign) & 0xffff;
762 
763   /* Main loop, processing 16 bytes at a time.  */
764   goto start;
765 
766   do
767     {
768       uint8x8_t l;
769       uint16x4_t m;
770       uint32x2_t n;
771       uint8x16_t t, u, v, w;
772 
773       p += 16;
774       data = vld1q_u8 (p);
775       mask = 0xffff;
776 
777     start:
778       t = vceqq_u8 (data, repl_nl);
779       u = vceqq_u8 (data, repl_cr);
780       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
781       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
782       t = vandq_u8 (vorrq_u8 (v, w), xmask);
783       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
784       m = vpaddl_u8 (l);
785       n = vpaddl_u16 (m);
786 
787       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
788 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
789       found &= mask;
790     }
791   while (!found);
792 
793   /* FOUND contains 1 in bits for which we matched a relevant
794      character.  Conversion to the byte index is trivial.  */
795   found = __builtin_ctz (found);
796   return (const uchar *)p + found;
797 }
798 
799 #else
800 
801 /* We only have one accellerated alternative.  Use a direct call so that
802    we encourage inlining.  */
803 
804 #define search_line_fast  search_line_acc_char
805 
806 #endif
807 
808 /* Initialize the lexer if needed.  */
809 
810 void
_cpp_init_lexer(void)811 _cpp_init_lexer (void)
812 {
813 #ifdef HAVE_init_vectorized_lexer
814   init_vectorized_lexer ();
815 #endif
816 }
817 
818 /* Returns with a logical line that contains no escaped newlines or
819    trigraphs.  This is a time-critical inner loop.  */
820 void
_cpp_clean_line(cpp_reader * pfile)821 _cpp_clean_line (cpp_reader *pfile)
822 {
823   cpp_buffer *buffer;
824   const uchar *s;
825   uchar c, *d, *p;
826 
827   buffer = pfile->buffer;
828   buffer->cur_note = buffer->notes_used = 0;
829   buffer->cur = buffer->line_base = buffer->next_line;
830   buffer->need_line = false;
831   s = buffer->next_line;
832 
833   if (!buffer->from_stage3)
834     {
835       const uchar *pbackslash = NULL;
836 
837       /* Fast path.  This is the common case of an un-escaped line with
838 	 no trigraphs.  The primary win here is by not writing any
839 	 data back to memory until we have to.  */
840       while (1)
841 	{
842 	  /* Perform an optimized search for \n, \r, \\, ?.  */
843 	  s = search_line_fast (s, buffer->rlimit);
844 
845 	  c = *s;
846 	  if (c == '\\')
847 	    {
848 	      /* Record the location of the backslash and continue.  */
849 	      pbackslash = s++;
850 	    }
851 	  else if (__builtin_expect (c == '?', 0))
852 	    {
853 	      if (__builtin_expect (s[1] == '?', false)
854 		   && _cpp_trigraph_map[s[2]])
855 		{
856 		  /* Have a trigraph.  We may or may not have to convert
857 		     it.  Add a line note regardless, for -Wtrigraphs.  */
858 		  add_line_note (buffer, s, s[2]);
859 		  if (CPP_OPTION (pfile, trigraphs))
860 		    {
861 		      /* We do, and that means we have to switch to the
862 		         slow path.  */
863 		      d = (uchar *) s;
864 		      *d = _cpp_trigraph_map[s[2]];
865 		      s += 2;
866 		      goto slow_path;
867 		    }
868 		}
869 	      /* Not a trigraph.  Continue on fast-path.  */
870 	      s++;
871 	    }
872 	  else
873 	    break;
874 	}
875 
876       /* This must be \r or \n.  We're either done, or we'll be forced
877 	 to write back to the buffer and continue on the slow path.  */
878       d = (uchar *) s;
879 
880       if (__builtin_expect (s == buffer->rlimit, false))
881 	goto done;
882 
883       /* DOS line ending? */
884       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
885 	{
886 	  s++;
887 	  if (s == buffer->rlimit)
888 	    goto done;
889 	}
890 
891       if (__builtin_expect (pbackslash == NULL, true))
892 	goto done;
893 
894       /* Check for escaped newline.  */
895       p = d;
896       while (is_nvspace (p[-1]))
897 	p--;
898       if (p - 1 != pbackslash)
899 	goto done;
900 
901       /* Have an escaped newline; process it and proceed to
902 	 the slow path.  */
903       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
904       d = p - 2;
905       buffer->next_line = p - 1;
906 
907     slow_path:
908       while (1)
909 	{
910 	  c = *++s;
911 	  *++d = c;
912 
913 	  if (c == '\n' || c == '\r')
914 	    {
915 	      /* Handle DOS line endings.  */
916 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
917 		s++;
918 	      if (s == buffer->rlimit)
919 		break;
920 
921 	      /* Escaped?  */
922 	      p = d;
923 	      while (p != buffer->next_line && is_nvspace (p[-1]))
924 		p--;
925 	      if (p == buffer->next_line || p[-1] != '\\')
926 		break;
927 
928 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
929 	      d = p - 2;
930 	      buffer->next_line = p - 1;
931 	    }
932 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
933 	    {
934 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
935 	      add_line_note (buffer, d, s[2]);
936 	      if (CPP_OPTION (pfile, trigraphs))
937 		{
938 		  *d = _cpp_trigraph_map[s[2]];
939 		  s += 2;
940 		}
941 	    }
942 	}
943     }
944   else
945     {
946       while (*s != '\n' && *s != '\r')
947 	s++;
948       d = (uchar *) s;
949 
950       /* Handle DOS line endings.  */
951       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
952 	s++;
953     }
954 
955  done:
956   *d = '\n';
957   /* A sentinel note that should never be processed.  */
958   add_line_note (buffer, d + 1, '\n');
959   buffer->next_line = s + 1;
960 }
961 
962 /* Return true if the trigraph indicated by NOTE should be warned
963    about in a comment.  */
964 static bool
warn_in_comment(cpp_reader * pfile,_cpp_line_note * note)965 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
966 {
967   const uchar *p;
968 
969   /* Within comments we don't warn about trigraphs, unless the
970      trigraph forms an escaped newline, as that may change
971      behavior.  */
972   if (note->type != '/')
973     return false;
974 
975   /* If -trigraphs, then this was an escaped newline iff the next note
976      is coincident.  */
977   if (CPP_OPTION (pfile, trigraphs))
978     return note[1].pos == note->pos;
979 
980   /* Otherwise, see if this forms an escaped newline.  */
981   p = note->pos + 3;
982   while (is_nvspace (*p))
983     p++;
984 
985   /* There might have been escaped newlines between the trigraph and the
986      newline we found.  Hence the position test.  */
987   return (*p == '\n' && p < note[1].pos);
988 }
989 
990 /* Process the notes created by add_line_note as far as the current
991    location.  */
992 void
_cpp_process_line_notes(cpp_reader * pfile,int in_comment)993 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
994 {
995   cpp_buffer *buffer = pfile->buffer;
996 
997   for (;;)
998     {
999       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1000       unsigned int col;
1001 
1002       if (note->pos > buffer->cur)
1003 	break;
1004 
1005       buffer->cur_note++;
1006       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1007 
1008       if (note->type == '\\' || note->type == ' ')
1009 	{
1010 	  if (note->type == ' ' && !in_comment)
1011 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1012 				 "backslash and newline separated by space");
1013 
1014 	  if (buffer->next_line > buffer->rlimit)
1015 	    {
1016 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1017 				   "backslash-newline at end of file");
1018 	      /* Prevent "no newline at end of file" warning.  */
1019 	      buffer->next_line = buffer->rlimit;
1020 	    }
1021 
1022 	  buffer->line_base = note->pos;
1023 	  CPP_INCREMENT_LINE (pfile, 0);
1024 	}
1025       else if (_cpp_trigraph_map[note->type])
1026 	{
1027 	  if (CPP_OPTION (pfile, warn_trigraphs)
1028 	      && (!in_comment || warn_in_comment (pfile, note)))
1029 	    {
1030 	      if (CPP_OPTION (pfile, trigraphs))
1031 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1032                                        pfile->line_table->highest_line, col,
1033 				       "trigraph ??%c converted to %c",
1034 				       note->type,
1035 				       (int) _cpp_trigraph_map[note->type]);
1036 	      else
1037 		{
1038 		  cpp_warning_with_line
1039 		    (pfile, CPP_W_TRIGRAPHS,
1040                      pfile->line_table->highest_line, col,
1041 		     "trigraph ??%c ignored, use -trigraphs to enable",
1042 		     note->type);
1043 		}
1044 	    }
1045 	}
1046       else if (note->type == 0)
1047 	/* Already processed in lex_raw_string.  */;
1048       else
1049 	abort ();
1050     }
1051 }
1052 
1053 /* Skip a C-style block comment.  We find the end of the comment by
1054    seeing if an asterisk is before every '/' we encounter.  Returns
1055    nonzero if comment terminated by EOF, zero otherwise.
1056 
1057    Buffer->cur points to the initial asterisk of the comment.  */
1058 bool
_cpp_skip_block_comment(cpp_reader * pfile)1059 _cpp_skip_block_comment (cpp_reader *pfile)
1060 {
1061   cpp_buffer *buffer = pfile->buffer;
1062   const uchar *cur = buffer->cur;
1063   uchar c;
1064 
1065   cur++;
1066   if (*cur == '/')
1067     cur++;
1068 
1069   for (;;)
1070     {
1071       /* People like decorating comments with '*', so check for '/'
1072 	 instead for efficiency.  */
1073       c = *cur++;
1074 
1075       if (c == '/')
1076 	{
1077 	  if (cur[-2] == '*')
1078 	    break;
1079 
1080 	  /* Warn about potential nested comments, but not if the '/'
1081 	     comes immediately before the true comment delimiter.
1082 	     Don't bother to get it right across escaped newlines.  */
1083 	  if (CPP_OPTION (pfile, warn_comments)
1084 	      && cur[0] == '*' && cur[1] != '/')
1085 	    {
1086 	      buffer->cur = cur;
1087 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1088 				     pfile->line_table->highest_line,
1089 				     CPP_BUF_COL (buffer),
1090 				     "\"/*\" within comment");
1091 	    }
1092 	}
1093       else if (c == '\n')
1094 	{
1095 	  unsigned int cols;
1096 	  buffer->cur = cur - 1;
1097 	  _cpp_process_line_notes (pfile, true);
1098 	  if (buffer->next_line >= buffer->rlimit)
1099 	    return true;
1100 	  _cpp_clean_line (pfile);
1101 
1102 	  cols = buffer->next_line - buffer->line_base;
1103 	  CPP_INCREMENT_LINE (pfile, cols);
1104 
1105 	  cur = buffer->cur;
1106 	}
1107     }
1108 
1109   buffer->cur = cur;
1110   _cpp_process_line_notes (pfile, true);
1111   return false;
1112 }
1113 
1114 /* Skip a C++ line comment, leaving buffer->cur pointing to the
1115    terminating newline.  Handles escaped newlines.  Returns nonzero
1116    if a multiline comment.  */
1117 static int
skip_line_comment(cpp_reader * pfile)1118 skip_line_comment (cpp_reader *pfile)
1119 {
1120   cpp_buffer *buffer = pfile->buffer;
1121   source_location orig_line = pfile->line_table->highest_line;
1122 
1123   while (*buffer->cur != '\n')
1124     buffer->cur++;
1125 
1126   _cpp_process_line_notes (pfile, true);
1127   return orig_line != pfile->line_table->highest_line;
1128 }
1129 
1130 /* Skips whitespace, saving the next non-whitespace character.  */
1131 static void
skip_whitespace(cpp_reader * pfile,cppchar_t c)1132 skip_whitespace (cpp_reader *pfile, cppchar_t c)
1133 {
1134   cpp_buffer *buffer = pfile->buffer;
1135   bool saw_NUL = false;
1136 
1137   do
1138     {
1139       /* Horizontal space always OK.  */
1140       if (c == ' ' || c == '\t')
1141 	;
1142       /* Just \f \v or \0 left.  */
1143       else if (c == '\0')
1144 	saw_NUL = true;
1145       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1146 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1147 			     CPP_BUF_COL (buffer),
1148 			     "%s in preprocessing directive",
1149 			     c == '\f' ? "form feed" : "vertical tab");
1150 
1151       c = *buffer->cur++;
1152     }
1153   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1154   while (is_nvspace (c));
1155 
1156   if (saw_NUL)
1157     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
1158 
1159   buffer->cur--;
1160 }
1161 
1162 /* See if the characters of a number token are valid in a name (no
1163    '.', '+' or '-').  */
1164 static int
name_p(cpp_reader * pfile,const cpp_string * string)1165 name_p (cpp_reader *pfile, const cpp_string *string)
1166 {
1167   unsigned int i;
1168 
1169   for (i = 0; i < string->len; i++)
1170     if (!is_idchar (string->text[i]))
1171       return 0;
1172 
1173   return 1;
1174 }
1175 
1176 /* After parsing an identifier or other sequence, produce a warning about
1177    sequences not in NFC/NFKC.  */
1178 static void
warn_about_normalization(cpp_reader * pfile,const cpp_token * token,const struct normalize_state * s)1179 warn_about_normalization (cpp_reader *pfile,
1180 			  const cpp_token *token,
1181 			  const struct normalize_state *s)
1182 {
1183   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1184       && !pfile->state.skipping)
1185     {
1186       /* Make sure that the token is printed using UCNs, even
1187 	 if we'd otherwise happily print UTF-8.  */
1188       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1189       size_t sz;
1190 
1191       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1192       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1193 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1194 			       "`%.*s' is not in NFKC", (int) sz, buf);
1195       else
1196 	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1197 			       "`%.*s' is not in NFC", (int) sz, buf);
1198       free (buf);
1199     }
1200 }
1201 
1202 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
1203    an identifier.  FIRST is TRUE if this starts an identifier.  */
1204 static bool
forms_identifier_p(cpp_reader * pfile,int first,struct normalize_state * state)1205 forms_identifier_p (cpp_reader *pfile, int first,
1206 		    struct normalize_state *state)
1207 {
1208   cpp_buffer *buffer = pfile->buffer;
1209 
1210   if (*buffer->cur == '$')
1211     {
1212       if (!CPP_OPTION (pfile, dollars_in_ident))
1213 	return false;
1214 
1215       buffer->cur++;
1216       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1217 	{
1218 	  CPP_OPTION (pfile, warn_dollars) = 0;
1219 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1220 	}
1221 
1222       return true;
1223     }
1224 
1225   /* Is this a syntactically valid UCN?  */
1226   if (CPP_OPTION (pfile, extended_identifiers)
1227       && *buffer->cur == '\\'
1228       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1229     {
1230       buffer->cur += 2;
1231       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1232 			  state))
1233 	return true;
1234       buffer->cur -= 2;
1235     }
1236 
1237   return false;
1238 }
1239 
1240 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1241 static cpp_hashnode *
lex_identifier_intern(cpp_reader * pfile,const uchar * base)1242 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1243 {
1244   cpp_hashnode *result;
1245   const uchar *cur;
1246   unsigned int len;
1247   unsigned int hash = HT_HASHSTEP (0, *base);
1248 
1249   cur = base + 1;
1250   while (ISIDNUM (*cur))
1251     {
1252       hash = HT_HASHSTEP (hash, *cur);
1253       cur++;
1254     }
1255   len = cur - base;
1256   hash = HT_HASHFINISH (hash, len);
1257   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1258 					      base, len, hash, HT_ALLOC));
1259 
1260   /* Rarely, identifiers require diagnostics when lexed.  */
1261   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1262 			&& !pfile->state.skipping, 0))
1263     {
1264       /* It is allowed to poison the same identifier twice.  */
1265       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1266 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1267 		   NODE_NAME (result));
1268 
1269       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1270 	 replacement list of a variadic macro.  */
1271       if (result == pfile->spec_nodes.n__VA_ARGS__
1272 	  && !pfile->state.va_args_ok)
1273 	cpp_error (pfile, CPP_DL_PEDWARN,
1274 		   "__VA_ARGS__ can only appear in the expansion"
1275 		   " of a C99 variadic macro");
1276 
1277       /* For -Wc++-compat, warn about use of C++ named operators.  */
1278       if (result->flags & NODE_WARN_OPERATOR)
1279 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1280 		     "identifier \"%s\" is a special operator name in C++",
1281 		     NODE_NAME (result));
1282     }
1283 
1284   return result;
1285 }
1286 
1287 /* Get the cpp_hashnode of an identifier specified by NAME in
1288    the current cpp_reader object.  If none is found, NULL is returned.  */
1289 cpp_hashnode *
_cpp_lex_identifier(cpp_reader * pfile,const char * name)1290 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1291 {
1292   cpp_hashnode *result;
1293   result = lex_identifier_intern (pfile, (uchar *) name);
1294   return result;
1295 }
1296 
1297 /* Lex an identifier starting at BUFFER->CUR - 1.  */
1298 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * base,bool starts_ucn,struct normalize_state * nst)1299 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1300 		struct normalize_state *nst)
1301 {
1302   cpp_hashnode *result;
1303   const uchar *cur;
1304   unsigned int len;
1305   unsigned int hash = HT_HASHSTEP (0, *base);
1306 
1307   cur = pfile->buffer->cur;
1308   if (! starts_ucn)
1309     while (ISIDNUM (*cur))
1310       {
1311 	hash = HT_HASHSTEP (hash, *cur);
1312 	cur++;
1313       }
1314   pfile->buffer->cur = cur;
1315   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1316     {
1317       /* Slower version for identifiers containing UCNs (or $).  */
1318       do {
1319 	while (ISIDNUM (*pfile->buffer->cur))
1320 	  {
1321 	    pfile->buffer->cur++;
1322 	    NORMALIZE_STATE_UPDATE_IDNUM (nst);
1323 	  }
1324       } while (forms_identifier_p (pfile, false, nst));
1325       result = _cpp_interpret_identifier (pfile, base,
1326 					  pfile->buffer->cur - base);
1327     }
1328   else
1329     {
1330       len = cur - base;
1331       hash = HT_HASHFINISH (hash, len);
1332 
1333       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1334 						  base, len, hash, HT_ALLOC));
1335     }
1336 
1337   /* Rarely, identifiers require diagnostics when lexed.  */
1338   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1339 			&& !pfile->state.skipping, 0))
1340     {
1341       /* It is allowed to poison the same identifier twice.  */
1342       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1343 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1344 		   NODE_NAME (result));
1345 
1346       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1347 	 replacement list of a variadic macro.  */
1348       if (result == pfile->spec_nodes.n__VA_ARGS__
1349 	  && !pfile->state.va_args_ok)
1350 	cpp_error (pfile, CPP_DL_PEDWARN,
1351 		   "__VA_ARGS__ can only appear in the expansion"
1352 		   " of a C99 variadic macro");
1353 
1354       /* For -Wc++-compat, warn about use of C++ named operators.  */
1355       if (result->flags & NODE_WARN_OPERATOR)
1356 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1357 		     "identifier \"%s\" is a special operator name in C++",
1358 		     NODE_NAME (result));
1359     }
1360 
1361   return result;
1362 }
1363 
1364 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1365 static void
lex_number(cpp_reader * pfile,cpp_string * number,struct normalize_state * nst)1366 lex_number (cpp_reader *pfile, cpp_string *number,
1367 	    struct normalize_state *nst)
1368 {
1369   const uchar *cur;
1370   const uchar *base;
1371   uchar *dest;
1372 
1373   base = pfile->buffer->cur - 1;
1374   do
1375     {
1376       cur = pfile->buffer->cur;
1377 
1378       /* N.B. ISIDNUM does not include $.  */
1379       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
1380 	{
1381 	  cur++;
1382 	  NORMALIZE_STATE_UPDATE_IDNUM (nst);
1383 	}
1384 
1385       pfile->buffer->cur = cur;
1386     }
1387   while (forms_identifier_p (pfile, false, nst));
1388 
1389   number->len = cur - base;
1390   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1391   memcpy (dest, base, number->len);
1392   dest[number->len] = '\0';
1393   number->text = dest;
1394 }
1395 
1396 /* Create a token of type TYPE with a literal spelling.  */
1397 static void
create_literal(cpp_reader * pfile,cpp_token * token,const uchar * base,unsigned int len,enum cpp_ttype type)1398 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1399 		unsigned int len, enum cpp_ttype type)
1400 {
1401   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1402 
1403   memcpy (dest, base, len);
1404   dest[len] = '\0';
1405   token->type = type;
1406   token->val.str.len = len;
1407   token->val.str.text = dest;
1408 }
1409 
1410 /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1411    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1412 
1413 static void
bufring_append(cpp_reader * pfile,const uchar * base,size_t len,_cpp_buff ** first_buff_p,_cpp_buff ** last_buff_p)1414 bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1415 		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1416 {
1417   _cpp_buff *first_buff = *first_buff_p;
1418   _cpp_buff *last_buff = *last_buff_p;
1419 
1420   if (first_buff == NULL)
1421     first_buff = last_buff = _cpp_get_buff (pfile, len);
1422   else if (len > BUFF_ROOM (last_buff))
1423     {
1424       size_t room = BUFF_ROOM (last_buff);
1425       memcpy (BUFF_FRONT (last_buff), base, room);
1426       BUFF_FRONT (last_buff) += room;
1427       base += room;
1428       len -= room;
1429       last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1430     }
1431 
1432   memcpy (BUFF_FRONT (last_buff), base, len);
1433   BUFF_FRONT (last_buff) += len;
1434 
1435   *first_buff_p = first_buff;
1436   *last_buff_p = last_buff;
1437 }
1438 
1439 /* Lexes a raw string.  The stored string contains the spelling, including
1440    double quotes, delimiter string, '(' and ')', any leading
1441    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
1442    literal, or CPP_OTHER if it was not properly terminated.
1443 
1444    The spelling is NUL-terminated, but it is not guaranteed that this
1445    is the first NUL since embedded NULs are preserved.  */
1446 
1447 static void
lex_raw_string(cpp_reader * pfile,cpp_token * token,const uchar * base,const uchar * cur)1448 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1449 		const uchar *cur)
1450 {
1451   const uchar *raw_prefix;
1452   unsigned int raw_prefix_len = 0;
1453   enum cpp_ttype type;
1454   size_t total_len = 0;
1455   _cpp_buff *first_buff = NULL, *last_buff = NULL;
1456   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1457 
1458   type = (*base == 'L' ? CPP_WSTRING :
1459 	  *base == 'U' ? CPP_STRING32 :
1460 	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1461 	  : CPP_STRING);
1462 
1463   raw_prefix = cur + 1;
1464   while (raw_prefix_len < 16)
1465     {
1466       switch (raw_prefix[raw_prefix_len])
1467 	{
1468 	case ' ': case '(': case ')': case '\\': case '\t':
1469 	case '\v': case '\f': case '\n': default:
1470 	  break;
1471 	/* Basic source charset except the above chars.  */
1472 	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1473 	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1474 	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1475 	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1476 	case 'y': case 'z':
1477 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1478 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1479 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1480 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1481 	case 'Y': case 'Z':
1482 	case '0': case '1': case '2': case '3': case '4': case '5':
1483 	case '6': case '7': case '8': case '9':
1484 	case '_': case '{': case '}': case '#': case '[': case ']':
1485 	case '<': case '>': case '%': case ':': case ';': case '.':
1486 	case '?': case '*': case '+': case '-': case '/': case '^':
1487 	case '&': case '|': case '~': case '!': case '=': case ',':
1488 	case '"': case '\'':
1489 	  raw_prefix_len++;
1490 	  continue;
1491 	}
1492       break;
1493     }
1494 
1495   if (raw_prefix[raw_prefix_len] != '(')
1496     {
1497       int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
1498 		+ 1;
1499       if (raw_prefix_len == 16)
1500 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1501 			     "raw string delimiter longer than 16 characters");
1502       else
1503 	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
1504 			     "invalid character '%c' in raw string delimiter",
1505 			     (int) raw_prefix[raw_prefix_len]);
1506       pfile->buffer->cur = raw_prefix - 1;
1507       create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
1508       return;
1509     }
1510 
1511   cur = raw_prefix + raw_prefix_len + 1;
1512   for (;;)
1513     {
1514 #define BUF_APPEND(STR,LEN)					\
1515       do {							\
1516 	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
1517 			&first_buff, &last_buff);		\
1518 	total_len += (LEN);					\
1519       } while (0);
1520 
1521       cppchar_t c;
1522 
1523       /* If we previously performed any trigraph or line splicing
1524 	 transformations, undo them within the body of the raw string.  */
1525       while (note->pos < cur)
1526 	++note;
1527       for (; note->pos == cur; ++note)
1528 	{
1529 	  switch (note->type)
1530 	    {
1531 	    case '\\':
1532 	    case ' ':
1533 	      /* Restore backslash followed by newline.  */
1534 	      BUF_APPEND (base, cur - base);
1535 	      base = cur;
1536 	      BUF_APPEND ("\\", 1);
1537 	    after_backslash:
1538 	      if (note->type == ' ')
1539 		{
1540 		  /* GNU backslash whitespace newline extension.  FIXME
1541 		     could be any sequence of non-vertical space.  When we
1542 		     can properly restore any such sequence, we should mark
1543 		     this note as handled so _cpp_process_line_notes
1544 		     doesn't warn.  */
1545 		  BUF_APPEND (" ", 1);
1546 		}
1547 
1548 	      BUF_APPEND ("\n", 1);
1549 	      break;
1550 
1551 	    case 0:
1552 	      /* Already handled.  */
1553 	      break;
1554 
1555 	    default:
1556 	      if (_cpp_trigraph_map[note->type])
1557 		{
1558 		  /* Don't warn about this trigraph in
1559 		     _cpp_process_line_notes, since trigraphs show up as
1560 		     trigraphs in raw strings.  */
1561 		  uchar type = note->type;
1562 		  note->type = 0;
1563 
1564 		  if (!CPP_OPTION (pfile, trigraphs))
1565 		    /* If we didn't convert the trigraph in the first
1566 		       place, don't do anything now either.  */
1567 		    break;
1568 
1569 		  BUF_APPEND (base, cur - base);
1570 		  base = cur;
1571 		  BUF_APPEND ("??", 2);
1572 
1573 		  /* ??/ followed by newline gets two line notes, one for
1574 		     the trigraph and one for the backslash/newline.  */
1575 		  if (type == '/' && note[1].pos == cur)
1576 		    {
1577 		      if (note[1].type != '\\'
1578 			  && note[1].type != ' ')
1579 			abort ();
1580 		      BUF_APPEND ("/", 1);
1581 		      ++note;
1582 		      goto after_backslash;
1583 		    }
1584 		  /* The ) from ??) could be part of the suffix.  */
1585 		  else if (type == ')'
1586 			   && strncmp ((const char *) cur+1,
1587 				       (const char *) raw_prefix,
1588 				       raw_prefix_len) == 0
1589 			   && cur[raw_prefix_len+1] == '"')
1590 		    {
1591 		      BUF_APPEND (")", 1);
1592 		      base++;
1593 		      cur += raw_prefix_len + 2;
1594 		      goto break_outer_loop;
1595 		    }
1596 		  else
1597 		    {
1598 		      /* Skip the replacement character.  */
1599 		      base = ++cur;
1600 		      BUF_APPEND (&type, 1);
1601 		    }
1602 		}
1603 	      else
1604 		abort ();
1605 	      break;
1606 	    }
1607 	}
1608       c = *cur++;
1609 
1610       if (c == ')'
1611 	  && strncmp ((const char *) cur, (const char *) raw_prefix,
1612 		      raw_prefix_len) == 0
1613 	  && cur[raw_prefix_len] == '"')
1614 	{
1615 	  cur += raw_prefix_len + 1;
1616 	  break;
1617 	}
1618       else if (c == '\n')
1619 	{
1620 	  if (pfile->state.in_directive
1621 	      || pfile->state.parsing_args
1622 	      || pfile->state.in_deferred_pragma)
1623 	    {
1624 	      cur--;
1625 	      type = CPP_OTHER;
1626 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1627 				   "unterminated raw string");
1628 	      break;
1629 	    }
1630 
1631 	  BUF_APPEND (base, cur - base);
1632 
1633 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
1634 	    CPP_INCREMENT_LINE (pfile, 0);
1635 	  pfile->buffer->need_line = true;
1636 
1637 	  pfile->buffer->cur = cur-1;
1638 	  _cpp_process_line_notes (pfile, false);
1639 	  if (!_cpp_get_fresh_line (pfile))
1640 	    {
1641 	      source_location src_loc = token->src_loc;
1642 	      token->type = CPP_EOF;
1643 	      /* Tell the compiler the line number of the EOF token.  */
1644 	      token->src_loc = pfile->line_table->highest_line;
1645 	      token->flags = BOL;
1646 	      if (first_buff != NULL)
1647 		_cpp_release_buff (pfile, first_buff);
1648 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1649 				   "unterminated raw string");
1650 	      return;
1651 	    }
1652 
1653 	  cur = base = pfile->buffer->cur;
1654 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1655 	}
1656     }
1657  break_outer_loop:
1658 
1659   if (CPP_OPTION (pfile, user_literals))
1660     {
1661       /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
1662 	 underscore is ill-formed.  Since this breaks programs using macros
1663 	 from inttypes.h, we generate a warning and treat the ud-suffix as a
1664 	 separate preprocessing token.  This approach is under discussion by
1665 	 the standards committee, and has been adopted as a conforming
1666 	 extension by other front ends such as clang.
1667          A special exception is made for the suffix 's' which will be
1668 	 standardized as a user-defined literal suffix for strings.  */
1669       if (ISALPHA (*cur) && *cur != 's')
1670 	{
1671 	  /* Raise a warning, but do not consume subsequent tokens.  */
1672 	  if (CPP_OPTION (pfile, warn_literal_suffix))
1673 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1674 				   token->src_loc, 0,
1675 				   "invalid suffix on literal; C++11 requires "
1676 				   "a space between literal and identifier");
1677 	}
1678       /* Grab user defined literal suffix.  */
1679       else if (ISIDST (*cur))
1680 	{
1681 	  type = cpp_userdef_string_add_type (type);
1682 	  ++cur;
1683 
1684 	  while (ISIDNUM (*cur))
1685 	    ++cur;
1686 	}
1687     }
1688 
1689   pfile->buffer->cur = cur;
1690   if (first_buff == NULL)
1691     create_literal (pfile, token, base, cur - base, type);
1692   else
1693     {
1694       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1695 
1696       token->type = type;
1697       token->val.str.len = total_len + (cur - base);
1698       token->val.str.text = dest;
1699       last_buff = first_buff;
1700       while (last_buff != NULL)
1701 	{
1702 	  memcpy (dest, last_buff->base,
1703 		  BUFF_FRONT (last_buff) - last_buff->base);
1704 	  dest += BUFF_FRONT (last_buff) - last_buff->base;
1705 	  last_buff = last_buff->next;
1706 	}
1707       _cpp_release_buff (pfile, first_buff);
1708       memcpy (dest, base, cur - base);
1709       dest[cur - base] = '\0';
1710     }
1711 }
1712 
1713 /* Lexes a string, character constant, or angle-bracketed header file
1714    name.  The stored string contains the spelling, including opening
1715    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1716    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1717    if it was not properly terminated, or CPP_LESS for an unterminated
1718    header name which must be relexed as normal tokens.
1719 
1720    The spelling is NUL-terminated, but it is not guaranteed that this
1721    is the first NUL since embedded NULs are preserved.  */
1722 static void
lex_string(cpp_reader * pfile,cpp_token * token,const uchar * base)1723 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1724 {
1725   bool saw_NUL = false;
1726   const uchar *cur;
1727   cppchar_t terminator;
1728   enum cpp_ttype type;
1729 
1730   cur = base;
1731   terminator = *cur++;
1732   if (terminator == 'L' || terminator == 'U')
1733     terminator = *cur++;
1734   else if (terminator == 'u')
1735     {
1736       terminator = *cur++;
1737       if (terminator == '8')
1738 	terminator = *cur++;
1739     }
1740   if (terminator == 'R')
1741     {
1742       lex_raw_string (pfile, token, base, cur);
1743       return;
1744     }
1745   if (terminator == '"')
1746     type = (*base == 'L' ? CPP_WSTRING :
1747 	    *base == 'U' ? CPP_STRING32 :
1748 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1749 			 : CPP_STRING);
1750   else if (terminator == '\'')
1751     type = (*base == 'L' ? CPP_WCHAR :
1752 	    *base == 'U' ? CPP_CHAR32 :
1753 	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
1754   else
1755     terminator = '>', type = CPP_HEADER_NAME;
1756 
1757   for (;;)
1758     {
1759       cppchar_t c = *cur++;
1760 
1761       /* In #include-style directives, terminators are not escapable.  */
1762       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1763 	cur++;
1764       else if (c == terminator)
1765 	break;
1766       else if (c == '\n')
1767 	{
1768 	  cur--;
1769 	  /* Unmatched quotes always yield undefined behavior, but
1770 	     greedy lexing means that what appears to be an unterminated
1771 	     header name may actually be a legitimate sequence of tokens.  */
1772 	  if (terminator == '>')
1773 	    {
1774 	      token->type = CPP_LESS;
1775 	      return;
1776 	    }
1777 	  type = CPP_OTHER;
1778 	  break;
1779 	}
1780       else if (c == '\0')
1781 	saw_NUL = true;
1782     }
1783 
1784   if (saw_NUL && !pfile->state.skipping)
1785     cpp_error (pfile, CPP_DL_WARNING,
1786 	       "null character(s) preserved in literal");
1787 
1788   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1789     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1790 	       (int) terminator);
1791 
1792   if (CPP_OPTION (pfile, user_literals))
1793     {
1794       /* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
1795 	 underscore is ill-formed.  Since this breaks programs using macros
1796 	 from inttypes.h, we generate a warning and treat the ud-suffix as a
1797 	 separate preprocessing token.  This approach is under discussion by
1798 	 the standards committee, and has been adopted as a conforming
1799 	 extension by other front ends such as clang.
1800          A special exception is made for the suffix 's' which will be
1801 	 standardized as a user-defined literal suffix for strings.  */
1802       if (ISALPHA (*cur) && *cur != 's')
1803 	{
1804 	  /* Raise a warning, but do not consume subsequent tokens.  */
1805 	  if (CPP_OPTION (pfile, warn_literal_suffix))
1806 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1807 				   token->src_loc, 0,
1808 				   "invalid suffix on literal; C++11 requires "
1809 				   "a space between literal and identifier");
1810 	}
1811       /* Grab user defined literal suffix.  */
1812       else if (ISIDST (*cur))
1813 	{
1814 	  type = cpp_userdef_char_add_type (type);
1815 	  type = cpp_userdef_string_add_type (type);
1816           ++cur;
1817 
1818 	  while (ISIDNUM (*cur))
1819 	    ++cur;
1820 	}
1821     }
1822 
1823   pfile->buffer->cur = cur;
1824   create_literal (pfile, token, base, cur - base, type);
1825 }
1826 
1827 /* Return the comment table. The client may not make any assumption
1828    about the ordering of the table.  */
1829 cpp_comment_table *
cpp_get_comments(cpp_reader * pfile)1830 cpp_get_comments (cpp_reader *pfile)
1831 {
1832   return &pfile->comments;
1833 }
1834 
1835 /* Append a comment to the end of the comment table. */
1836 static void
store_comment(cpp_reader * pfile,cpp_token * token)1837 store_comment (cpp_reader *pfile, cpp_token *token)
1838 {
1839   int len;
1840 
1841   if (pfile->comments.allocated == 0)
1842     {
1843       pfile->comments.allocated = 256;
1844       pfile->comments.entries = (cpp_comment *) xmalloc
1845 	(pfile->comments.allocated * sizeof (cpp_comment));
1846     }
1847 
1848   if (pfile->comments.count == pfile->comments.allocated)
1849     {
1850       pfile->comments.allocated *= 2;
1851       pfile->comments.entries = (cpp_comment *) xrealloc
1852 	(pfile->comments.entries,
1853 	 pfile->comments.allocated * sizeof (cpp_comment));
1854     }
1855 
1856   len = token->val.str.len;
1857 
1858   /* Copy comment. Note, token may not be NULL terminated. */
1859   pfile->comments.entries[pfile->comments.count].comment =
1860     (char *) xmalloc (sizeof (char) * (len + 1));
1861   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1862 	  token->val.str.text, len);
1863   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1864 
1865   /* Set source location. */
1866   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1867 
1868   /* Increment the count of entries in the comment table. */
1869   pfile->comments.count++;
1870 }
1871 
1872 /* The stored comment includes the comment start and any terminator.  */
1873 static void
save_comment(cpp_reader * pfile,cpp_token * token,const unsigned char * from,cppchar_t type)1874 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1875 	      cppchar_t type)
1876 {
1877   unsigned char *buffer;
1878   unsigned int len, clen, i;
1879 
1880   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1881 
1882   /* C++ comments probably (not definitely) have moved past a new
1883      line, which we don't want to save in the comment.  */
1884   if (is_vspace (pfile->buffer->cur[-1]))
1885     len--;
1886 
1887   /* If we are currently in a directive or in argument parsing, then
1888      we need to store all C++ comments as C comments internally, and
1889      so we need to allocate a little extra space in that case.
1890 
1891      Note that the only time we encounter a directive here is
1892      when we are saving comments in a "#define".  */
1893   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
1894 	  && type == '/') ? len + 2 : len;
1895 
1896   buffer = _cpp_unaligned_alloc (pfile, clen);
1897 
1898   token->type = CPP_COMMENT;
1899   token->val.str.len = clen;
1900   token->val.str.text = buffer;
1901 
1902   buffer[0] = '/';
1903   memcpy (buffer + 1, from, len - 1);
1904 
1905   /* Finish conversion to a C comment, if necessary.  */
1906   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
1907     {
1908       buffer[1] = '*';
1909       buffer[clen - 2] = '*';
1910       buffer[clen - 1] = '/';
1911       /* As there can be in a C++ comments illegal sequences for C comments
1912          we need to filter them out.  */
1913       for (i = 2; i < (clen - 2); i++)
1914         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1915           buffer[i] = '|';
1916     }
1917 
1918   /* Finally store this comment for use by clients of libcpp. */
1919   store_comment (pfile, token);
1920 }
1921 
1922 /* Allocate COUNT tokens for RUN.  */
1923 void
_cpp_init_tokenrun(tokenrun * run,unsigned int count)1924 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1925 {
1926   run->base = XNEWVEC (cpp_token, count);
1927   run->limit = run->base + count;
1928   run->next = NULL;
1929 }
1930 
1931 /* Returns the next tokenrun, or creates one if there is none.  */
1932 static tokenrun *
next_tokenrun(tokenrun * run)1933 next_tokenrun (tokenrun *run)
1934 {
1935   if (run->next == NULL)
1936     {
1937       run->next = XNEW (tokenrun);
1938       run->next->prev = run;
1939       _cpp_init_tokenrun (run->next, 250);
1940     }
1941 
1942   return run->next;
1943 }
1944 
1945 /* Return the number of not yet processed token in a given
1946    context.  */
1947 int
_cpp_remaining_tokens_num_in_context(cpp_context * context)1948 _cpp_remaining_tokens_num_in_context (cpp_context *context)
1949 {
1950   if (context->tokens_kind == TOKENS_KIND_DIRECT)
1951     return (LAST (context).token - FIRST (context).token);
1952   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1953 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
1954     return (LAST (context).ptoken - FIRST (context).ptoken);
1955   else
1956       abort ();
1957 }
1958 
1959 /* Returns the token present at index INDEX in a given context.  If
1960    INDEX is zero, the next token to be processed is returned.  */
1961 static const cpp_token*
_cpp_token_from_context_at(cpp_context * context,int index)1962 _cpp_token_from_context_at (cpp_context *context, int index)
1963 {
1964   if (context->tokens_kind == TOKENS_KIND_DIRECT)
1965     return &(FIRST (context).token[index]);
1966   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1967 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
1968     return FIRST (context).ptoken[index];
1969  else
1970    abort ();
1971 }
1972 
1973 /* Look ahead in the input stream.  */
1974 const cpp_token *
cpp_peek_token(cpp_reader * pfile,int index)1975 cpp_peek_token (cpp_reader *pfile, int index)
1976 {
1977   cpp_context *context = pfile->context;
1978   const cpp_token *peektok;
1979   int count;
1980 
1981   /* First, scan through any pending cpp_context objects.  */
1982   while (context->prev)
1983     {
1984       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
1985 
1986       if (index < (int) sz)
1987         return _cpp_token_from_context_at (context, index);
1988       index -= (int) sz;
1989       context = context->prev;
1990     }
1991 
1992   /* We will have to read some new tokens after all (and do so
1993      without invalidating preceding tokens).  */
1994   count = index;
1995   pfile->keep_tokens++;
1996 
1997   do
1998     {
1999       peektok = _cpp_lex_token (pfile);
2000       if (peektok->type == CPP_EOF)
2001 	return peektok;
2002     }
2003   while (index--);
2004 
2005   _cpp_backup_tokens_direct (pfile, count + 1);
2006   pfile->keep_tokens--;
2007 
2008   return peektok;
2009 }
2010 
2011 /* Allocate a single token that is invalidated at the same time as the
2012    rest of the tokens on the line.  Has its line and col set to the
2013    same as the last lexed token, so that diagnostics appear in the
2014    right place.  */
2015 cpp_token *
_cpp_temp_token(cpp_reader * pfile)2016 _cpp_temp_token (cpp_reader *pfile)
2017 {
2018   cpp_token *old, *result;
2019   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2020   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
2021 
2022   old = pfile->cur_token - 1;
2023   /* Any pre-existing lookaheads must not be clobbered.  */
2024   if (la)
2025     {
2026       if (sz <= la)
2027         {
2028           tokenrun *next = next_tokenrun (pfile->cur_run);
2029 
2030           if (sz < la)
2031             memmove (next->base + 1, next->base,
2032                      (la - sz) * sizeof (cpp_token));
2033 
2034           next->base[0] = pfile->cur_run->limit[-1];
2035         }
2036 
2037       if (sz > 1)
2038         memmove (pfile->cur_token + 1, pfile->cur_token,
2039                  MIN (la, sz - 1) * sizeof (cpp_token));
2040     }
2041 
2042   if (!sz && pfile->cur_token == pfile->cur_run->limit)
2043     {
2044       pfile->cur_run = next_tokenrun (pfile->cur_run);
2045       pfile->cur_token = pfile->cur_run->base;
2046     }
2047 
2048   result = pfile->cur_token++;
2049   result->src_loc = old->src_loc;
2050   return result;
2051 }
2052 
2053 /* Lex a token into RESULT (external interface).  Takes care of issues
2054    like directive handling, token lookahead, multiple include
2055    optimization and skipping.  */
2056 const cpp_token *
_cpp_lex_token(cpp_reader * pfile)2057 _cpp_lex_token (cpp_reader *pfile)
2058 {
2059   cpp_token *result;
2060 
2061   for (;;)
2062     {
2063       if (pfile->cur_token == pfile->cur_run->limit)
2064 	{
2065 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
2066 	  pfile->cur_token = pfile->cur_run->base;
2067 	}
2068       /* We assume that the current token is somewhere in the current
2069 	 run.  */
2070       if (pfile->cur_token < pfile->cur_run->base
2071 	  || pfile->cur_token >= pfile->cur_run->limit)
2072 	abort ();
2073 
2074       if (pfile->lookaheads)
2075 	{
2076 	  pfile->lookaheads--;
2077 	  result = pfile->cur_token++;
2078 	}
2079       else
2080 	result = _cpp_lex_direct (pfile);
2081 
2082       if (result->flags & BOL)
2083 	{
2084 	  /* Is this a directive.  If _cpp_handle_directive returns
2085 	     false, it is an assembler #.  */
2086 	  if (result->type == CPP_HASH
2087 	      /* 6.10.3 p 11: Directives in a list of macro arguments
2088 		 gives undefined behavior.  This implementation
2089 		 handles the directive as normal.  */
2090 	      && pfile->state.parsing_args != 1)
2091 	    {
2092 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
2093 		{
2094 		  if (pfile->directive_result.type == CPP_PADDING)
2095 		    continue;
2096 		  result = &pfile->directive_result;
2097 		}
2098 	    }
2099 	  else if (pfile->state.in_deferred_pragma)
2100 	    result = &pfile->directive_result;
2101 
2102 	  if (pfile->cb.line_change && !pfile->state.skipping)
2103 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
2104 	}
2105 
2106       /* We don't skip tokens in directives.  */
2107       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
2108 	break;
2109 
2110       /* Outside a directive, invalidate controlling macros.  At file
2111 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
2112 	 get here and MI optimization works.  */
2113       pfile->mi_valid = false;
2114 
2115       if (!pfile->state.skipping || result->type == CPP_EOF)
2116 	break;
2117     }
2118 
2119   return result;
2120 }
2121 
2122 /* Returns true if a fresh line has been loaded.  */
2123 bool
_cpp_get_fresh_line(cpp_reader * pfile)2124 _cpp_get_fresh_line (cpp_reader *pfile)
2125 {
2126   int return_at_eof;
2127 
2128   /* We can't get a new line until we leave the current directive.  */
2129   if (pfile->state.in_directive)
2130     return false;
2131 
2132   for (;;)
2133     {
2134       cpp_buffer *buffer = pfile->buffer;
2135 
2136       if (!buffer->need_line)
2137 	return true;
2138 
2139       if (buffer->next_line < buffer->rlimit)
2140 	{
2141 	  _cpp_clean_line (pfile);
2142 	  return true;
2143 	}
2144 
2145       /* First, get out of parsing arguments state.  */
2146       if (pfile->state.parsing_args)
2147 	return false;
2148 
2149       /* End of buffer.  Non-empty files should end in a newline.  */
2150       if (buffer->buf != buffer->rlimit
2151 	  && buffer->next_line > buffer->rlimit
2152 	  && !buffer->from_stage3)
2153 	{
2154 	  /* Clip to buffer size.  */
2155 	  buffer->next_line = buffer->rlimit;
2156 	}
2157 
2158       return_at_eof = buffer->return_at_eof;
2159       _cpp_pop_buffer (pfile);
2160       if (pfile->buffer == NULL || return_at_eof)
2161 	return false;
2162     }
2163 }
2164 
2165 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
2166   do							\
2167     {							\
2168       result->type = ELSE_TYPE;				\
2169       if (*buffer->cur == CHAR)				\
2170 	buffer->cur++, result->type = THEN_TYPE;	\
2171     }							\
2172   while (0)
2173 
2174 /* Lex a token into pfile->cur_token, which is also incremented, to
2175    get diagnostics pointing to the correct location.
2176 
2177    Does not handle issues such as token lookahead, multiple-include
2178    optimization, directives, skipping etc.  This function is only
2179    suitable for use by _cpp_lex_token, and in special cases like
2180    lex_expansion_token which doesn't care for any of these issues.
2181 
2182    When meeting a newline, returns CPP_EOF if parsing a directive,
2183    otherwise returns to the start of the token buffer if permissible.
2184    Returns the location of the lexed token.  */
2185 cpp_token *
_cpp_lex_direct(cpp_reader * pfile)2186 _cpp_lex_direct (cpp_reader *pfile)
2187 {
2188   cppchar_t c;
2189   cpp_buffer *buffer;
2190   const unsigned char *comment_start;
2191   cpp_token *result = pfile->cur_token++;
2192 
2193  fresh_line:
2194   result->flags = 0;
2195   buffer = pfile->buffer;
2196   if (buffer->need_line)
2197     {
2198       if (pfile->state.in_deferred_pragma)
2199 	{
2200 	  result->type = CPP_PRAGMA_EOL;
2201 	  pfile->state.in_deferred_pragma = false;
2202 	  if (!pfile->state.pragma_allow_expansion)
2203 	    pfile->state.prevent_expansion--;
2204 	  return result;
2205 	}
2206       if (!_cpp_get_fresh_line (pfile))
2207 	{
2208 	  result->type = CPP_EOF;
2209 	  if (!pfile->state.in_directive)
2210 	    {
2211 	      /* Tell the compiler the line number of the EOF token.  */
2212 	      result->src_loc = pfile->line_table->highest_line;
2213 	      result->flags = BOL;
2214 	    }
2215 	  return result;
2216 	}
2217       if (!pfile->keep_tokens)
2218 	{
2219 	  pfile->cur_run = &pfile->base_run;
2220 	  result = pfile->base_run.base;
2221 	  pfile->cur_token = result + 1;
2222 	}
2223       result->flags = BOL;
2224       if (pfile->state.parsing_args == 2)
2225 	result->flags |= PREV_WHITE;
2226     }
2227   buffer = pfile->buffer;
2228  update_tokens_line:
2229   result->src_loc = pfile->line_table->highest_line;
2230 
2231  skipped_white:
2232   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2233       && !pfile->overlaid_buffer)
2234     {
2235       _cpp_process_line_notes (pfile, false);
2236       result->src_loc = pfile->line_table->highest_line;
2237     }
2238   c = *buffer->cur++;
2239 
2240   if (pfile->forced_token_location_p)
2241     result->src_loc = *pfile->forced_token_location_p;
2242   else
2243     result->src_loc = linemap_position_for_column (pfile->line_table,
2244 					  CPP_BUF_COLUMN (buffer, buffer->cur));
2245 
2246   switch (c)
2247     {
2248     case ' ': case '\t': case '\f': case '\v': case '\0':
2249       result->flags |= PREV_WHITE;
2250       skip_whitespace (pfile, c);
2251       goto skipped_white;
2252 
2253     case '\n':
2254       if (buffer->cur < buffer->rlimit)
2255 	CPP_INCREMENT_LINE (pfile, 0);
2256       buffer->need_line = true;
2257       goto fresh_line;
2258 
2259     case '0': case '1': case '2': case '3': case '4':
2260     case '5': case '6': case '7': case '8': case '9':
2261       {
2262 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2263 	result->type = CPP_NUMBER;
2264 	lex_number (pfile, &result->val.str, &nst);
2265 	warn_about_normalization (pfile, result, &nst);
2266 	break;
2267       }
2268 
2269     case 'L':
2270     case 'u':
2271     case 'U':
2272     case 'R':
2273       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2274 	 wide strings or raw strings.  */
2275       if (c == 'L' || CPP_OPTION (pfile, rliterals)
2276 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
2277 	{
2278 	  if ((*buffer->cur == '\'' && c != 'R')
2279 	      || *buffer->cur == '"'
2280 	      || (*buffer->cur == 'R'
2281 		  && c != 'R'
2282 		  && buffer->cur[1] == '"'
2283 		  && CPP_OPTION (pfile, rliterals))
2284 	      || (*buffer->cur == '8'
2285 		  && c == 'u'
2286 		  && (buffer->cur[1] == '"'
2287 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2288 			  && CPP_OPTION (pfile, rliterals)))))
2289 	    {
2290 	      lex_string (pfile, result, buffer->cur - 1);
2291 	      break;
2292 	    }
2293 	}
2294       /* Fall through.  */
2295 
2296     case '_':
2297     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2298     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2299     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2300     case 's': case 't':           case 'v': case 'w': case 'x':
2301     case 'y': case 'z':
2302     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2303     case 'G': case 'H': case 'I': case 'J': case 'K':
2304     case 'M': case 'N': case 'O': case 'P': case 'Q':
2305     case 'S': case 'T':           case 'V': case 'W': case 'X':
2306     case 'Y': case 'Z':
2307       result->type = CPP_NAME;
2308       {
2309 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2310 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
2311 						&nst);
2312 	warn_about_normalization (pfile, result, &nst);
2313       }
2314 
2315       /* Convert named operators to their proper types.  */
2316       if (result->val.node.node->flags & NODE_OPERATOR)
2317 	{
2318 	  result->flags |= NAMED_OP;
2319 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
2320 	}
2321       break;
2322 
2323     case '\'':
2324     case '"':
2325       lex_string (pfile, result, buffer->cur - 1);
2326       break;
2327 
2328     case '/':
2329       /* A potential block or line comment.  */
2330       comment_start = buffer->cur;
2331       c = *buffer->cur;
2332 
2333       if (c == '*')
2334 	{
2335 	  if (_cpp_skip_block_comment (pfile))
2336 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
2337 	}
2338       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
2339 			    || cpp_in_system_header (pfile)))
2340 	{
2341 	  /* Warn about comments only if pedantically GNUC89, and not
2342 	     in system headers.  */
2343 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
2344 	      && ! buffer->warned_cplusplus_comments)
2345 	    {
2346 	      cpp_error (pfile, CPP_DL_PEDWARN,
2347 			 "C++ style comments are not allowed in ISO C90");
2348 	      cpp_error (pfile, CPP_DL_PEDWARN,
2349 			 "(this will be reported only once per input file)");
2350 	      buffer->warned_cplusplus_comments = 1;
2351 	    }
2352 
2353 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
2354 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
2355 	}
2356       else if (c == '=')
2357 	{
2358 	  buffer->cur++;
2359 	  result->type = CPP_DIV_EQ;
2360 	  break;
2361 	}
2362       else
2363 	{
2364 	  result->type = CPP_DIV;
2365 	  break;
2366 	}
2367 
2368       if (!pfile->state.save_comments)
2369 	{
2370 	  result->flags |= PREV_WHITE;
2371 	  goto update_tokens_line;
2372 	}
2373 
2374       /* Save the comment as a token in its own right.  */
2375       save_comment (pfile, result, comment_start, c);
2376       break;
2377 
2378     case '<':
2379       if (pfile->state.angled_headers)
2380 	{
2381 	  lex_string (pfile, result, buffer->cur - 1);
2382 	  if (result->type != CPP_LESS)
2383 	    break;
2384 	}
2385 
2386       result->type = CPP_LESS;
2387       if (*buffer->cur == '=')
2388 	buffer->cur++, result->type = CPP_LESS_EQ;
2389       else if (*buffer->cur == '<')
2390 	{
2391 	  buffer->cur++;
2392 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
2393 	}
2394       else if (CPP_OPTION (pfile, digraphs))
2395 	{
2396 	  if (*buffer->cur == ':')
2397 	    {
2398 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2399 		 three characters are <:: and the subsequent character
2400 		 is neither : nor >, the < is treated as a preprocessor
2401 		 token by itself".  */
2402 	      if (CPP_OPTION (pfile, cplusplus)
2403 		  && (CPP_OPTION (pfile, lang) == CLK_CXX11
2404 		      || CPP_OPTION (pfile, lang) == CLK_GNUCXX11)
2405 		  && buffer->cur[1] == ':'
2406 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2407 		break;
2408 
2409 	      buffer->cur++;
2410 	      result->flags |= DIGRAPH;
2411 	      result->type = CPP_OPEN_SQUARE;
2412 	    }
2413 	  else if (*buffer->cur == '%')
2414 	    {
2415 	      buffer->cur++;
2416 	      result->flags |= DIGRAPH;
2417 	      result->type = CPP_OPEN_BRACE;
2418 	    }
2419 	}
2420       break;
2421 
2422     case '>':
2423       result->type = CPP_GREATER;
2424       if (*buffer->cur == '=')
2425 	buffer->cur++, result->type = CPP_GREATER_EQ;
2426       else if (*buffer->cur == '>')
2427 	{
2428 	  buffer->cur++;
2429 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2430 	}
2431       break;
2432 
2433     case '%':
2434       result->type = CPP_MOD;
2435       if (*buffer->cur == '=')
2436 	buffer->cur++, result->type = CPP_MOD_EQ;
2437       else if (CPP_OPTION (pfile, digraphs))
2438 	{
2439 	  if (*buffer->cur == ':')
2440 	    {
2441 	      buffer->cur++;
2442 	      result->flags |= DIGRAPH;
2443 	      result->type = CPP_HASH;
2444 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
2445 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
2446 	    }
2447 	  else if (*buffer->cur == '>')
2448 	    {
2449 	      buffer->cur++;
2450 	      result->flags |= DIGRAPH;
2451 	      result->type = CPP_CLOSE_BRACE;
2452 	    }
2453 	}
2454       break;
2455 
2456     case '.':
2457       result->type = CPP_DOT;
2458       if (ISDIGIT (*buffer->cur))
2459 	{
2460 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2461 	  result->type = CPP_NUMBER;
2462 	  lex_number (pfile, &result->val.str, &nst);
2463 	  warn_about_normalization (pfile, result, &nst);
2464 	}
2465       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2466 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
2467       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2468 	buffer->cur++, result->type = CPP_DOT_STAR;
2469       break;
2470 
2471     case '+':
2472       result->type = CPP_PLUS;
2473       if (*buffer->cur == '+')
2474 	buffer->cur++, result->type = CPP_PLUS_PLUS;
2475       else if (*buffer->cur == '=')
2476 	buffer->cur++, result->type = CPP_PLUS_EQ;
2477       break;
2478 
2479     case '-':
2480       result->type = CPP_MINUS;
2481       if (*buffer->cur == '>')
2482 	{
2483 	  buffer->cur++;
2484 	  result->type = CPP_DEREF;
2485 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2486 	    buffer->cur++, result->type = CPP_DEREF_STAR;
2487 	}
2488       else if (*buffer->cur == '-')
2489 	buffer->cur++, result->type = CPP_MINUS_MINUS;
2490       else if (*buffer->cur == '=')
2491 	buffer->cur++, result->type = CPP_MINUS_EQ;
2492       break;
2493 
2494     case '&':
2495       result->type = CPP_AND;
2496       if (*buffer->cur == '&')
2497 	buffer->cur++, result->type = CPP_AND_AND;
2498       else if (*buffer->cur == '=')
2499 	buffer->cur++, result->type = CPP_AND_EQ;
2500       break;
2501 
2502     case '|':
2503       result->type = CPP_OR;
2504       if (*buffer->cur == '|')
2505 	buffer->cur++, result->type = CPP_OR_OR;
2506       else if (*buffer->cur == '=')
2507 	buffer->cur++, result->type = CPP_OR_EQ;
2508       break;
2509 
2510     case ':':
2511       result->type = CPP_COLON;
2512       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2513 	buffer->cur++, result->type = CPP_SCOPE;
2514       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
2515 	{
2516 	  buffer->cur++;
2517 	  result->flags |= DIGRAPH;
2518 	  result->type = CPP_CLOSE_SQUARE;
2519 	}
2520       break;
2521 
2522     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2523     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2524     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2525     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
2526     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
2527 
2528     case '?': result->type = CPP_QUERY; break;
2529     case '~': result->type = CPP_COMPL; break;
2530     case ',': result->type = CPP_COMMA; break;
2531     case '(': result->type = CPP_OPEN_PAREN; break;
2532     case ')': result->type = CPP_CLOSE_PAREN; break;
2533     case '[': result->type = CPP_OPEN_SQUARE; break;
2534     case ']': result->type = CPP_CLOSE_SQUARE; break;
2535     case '{': result->type = CPP_OPEN_BRACE; break;
2536     case '}': result->type = CPP_CLOSE_BRACE; break;
2537     case ';': result->type = CPP_SEMICOLON; break;
2538 
2539       /* @ is a punctuator in Objective-C.  */
2540     case '@': result->type = CPP_ATSIGN; break;
2541 
2542     case '$':
2543     case '\\':
2544       {
2545 	const uchar *base = --buffer->cur;
2546 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2547 
2548 	if (forms_identifier_p (pfile, true, &nst))
2549 	  {
2550 	    result->type = CPP_NAME;
2551 	    result->val.node.node = lex_identifier (pfile, base, true, &nst);
2552 	    warn_about_normalization (pfile, result, &nst);
2553 	    break;
2554 	  }
2555 	buffer->cur++;
2556       }
2557 
2558     default:
2559       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2560       break;
2561     }
2562 
2563   return result;
2564 }
2565 
2566 /* An upper bound on the number of bytes needed to spell TOKEN.
2567    Does not include preceding whitespace.  */
2568 unsigned int
cpp_token_len(const cpp_token * token)2569 cpp_token_len (const cpp_token *token)
2570 {
2571   unsigned int len;
2572 
2573   switch (TOKEN_SPELL (token))
2574     {
2575     default:		len = 6;				break;
2576     case SPELL_LITERAL:	len = token->val.str.len;		break;
2577     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
2578     }
2579 
2580   return len;
2581 }
2582 
2583 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2584    Return the number of bytes read out of NAME.  (There are always
2585    10 bytes written to BUFFER.)  */
2586 
2587 static size_t
utf8_to_ucn(unsigned char * buffer,const unsigned char * name)2588 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2589 {
2590   int j;
2591   int ucn_len = 0;
2592   int ucn_len_c;
2593   unsigned t;
2594   unsigned long utf32;
2595 
2596   /* Compute the length of the UTF-8 sequence.  */
2597   for (t = *name; t & 0x80; t <<= 1)
2598     ucn_len++;
2599 
2600   utf32 = *name & (0x7F >> ucn_len);
2601   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2602     {
2603       utf32 = (utf32 << 6) | (*++name & 0x3F);
2604 
2605       /* Ill-formed UTF-8.  */
2606       if ((*name & ~0x3F) != 0x80)
2607 	abort ();
2608     }
2609 
2610   *buffer++ = '\\';
2611   *buffer++ = 'U';
2612   for (j = 7; j >= 0; j--)
2613     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2614   return ucn_len;
2615 }
2616 
2617 /* Given a token TYPE corresponding to a digraph, return a pointer to
2618    the spelling of the digraph.  */
2619 static const unsigned char *
cpp_digraph2name(enum cpp_ttype type)2620 cpp_digraph2name (enum cpp_ttype type)
2621 {
2622   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2623 }
2624 
2625 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2626    already contain the enough space to hold the token's spelling.
2627    Returns a pointer to the character after the last character written.
2628    FORSTRING is true if this is to be the spelling after translation
2629    phase 1 (this is different for UCNs).
2630    FIXME: Would be nice if we didn't need the PFILE argument.  */
2631 unsigned char *
cpp_spell_token(cpp_reader * pfile,const cpp_token * token,unsigned char * buffer,bool forstring)2632 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
2633 		 unsigned char *buffer, bool forstring)
2634 {
2635   switch (TOKEN_SPELL (token))
2636     {
2637     case SPELL_OPERATOR:
2638       {
2639 	const unsigned char *spelling;
2640 	unsigned char c;
2641 
2642 	if (token->flags & DIGRAPH)
2643 	  spelling = cpp_digraph2name (token->type);
2644 	else if (token->flags & NAMED_OP)
2645 	  goto spell_ident;
2646 	else
2647 	  spelling = TOKEN_NAME (token);
2648 
2649 	while ((c = *spelling++) != '\0')
2650 	  *buffer++ = c;
2651       }
2652       break;
2653 
2654     spell_ident:
2655     case SPELL_IDENT:
2656       if (forstring)
2657 	{
2658 	  memcpy (buffer, NODE_NAME (token->val.node.node),
2659 		  NODE_LEN (token->val.node.node));
2660 	  buffer += NODE_LEN (token->val.node.node);
2661 	}
2662       else
2663 	{
2664 	  size_t i;
2665 	  const unsigned char * name = NODE_NAME (token->val.node.node);
2666 
2667 	  for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2668 	    if (name[i] & ~0x7F)
2669 	      {
2670 		i += utf8_to_ucn (buffer, name + i) - 1;
2671 		buffer += 10;
2672 	      }
2673 	    else
2674 	      *buffer++ = NODE_NAME (token->val.node.node)[i];
2675 	}
2676       break;
2677 
2678     case SPELL_LITERAL:
2679       memcpy (buffer, token->val.str.text, token->val.str.len);
2680       buffer += token->val.str.len;
2681       break;
2682 
2683     case SPELL_NONE:
2684       cpp_error (pfile, CPP_DL_ICE,
2685 		 "unspellable token %s", TOKEN_NAME (token));
2686       break;
2687     }
2688 
2689   return buffer;
2690 }
2691 
2692 /* Returns TOKEN spelt as a null-terminated string.  The string is
2693    freed when the reader is destroyed.  Useful for diagnostics.  */
2694 unsigned char *
cpp_token_as_text(cpp_reader * pfile,const cpp_token * token)2695 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
2696 {
2697   unsigned int len = cpp_token_len (token) + 1;
2698   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
2699 
2700   end = cpp_spell_token (pfile, token, start, false);
2701   end[0] = '\0';
2702 
2703   return start;
2704 }
2705 
2706 /* Returns a pointer to a string which spells the token defined by
2707    TYPE and FLAGS.  Used by C front ends, which really should move to
2708    using cpp_token_as_text.  */
2709 const char *
cpp_type2name(enum cpp_ttype type,unsigned char flags)2710 cpp_type2name (enum cpp_ttype type, unsigned char flags)
2711 {
2712   if (flags & DIGRAPH)
2713     return (const char *) cpp_digraph2name (type);
2714   else if (flags & NAMED_OP)
2715     return cpp_named_operator2name (type);
2716 
2717   return (const char *) token_spellings[type].name;
2718 }
2719 
2720 /* Writes the spelling of token to FP, without any preceding space.
2721    Separated from cpp_spell_token for efficiency - to avoid stdio
2722    double-buffering.  */
2723 void
cpp_output_token(const cpp_token * token,FILE * fp)2724 cpp_output_token (const cpp_token *token, FILE *fp)
2725 {
2726   switch (TOKEN_SPELL (token))
2727     {
2728     case SPELL_OPERATOR:
2729       {
2730 	const unsigned char *spelling;
2731 	int c;
2732 
2733 	if (token->flags & DIGRAPH)
2734 	  spelling = cpp_digraph2name (token->type);
2735 	else if (token->flags & NAMED_OP)
2736 	  goto spell_ident;
2737 	else
2738 	  spelling = TOKEN_NAME (token);
2739 
2740 	c = *spelling;
2741 	do
2742 	  putc (c, fp);
2743 	while ((c = *++spelling) != '\0');
2744       }
2745       break;
2746 
2747     spell_ident:
2748     case SPELL_IDENT:
2749       {
2750 	size_t i;
2751 	const unsigned char * name = NODE_NAME (token->val.node.node);
2752 
2753 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
2754 	  if (name[i] & ~0x7F)
2755 	    {
2756 	      unsigned char buffer[10];
2757 	      i += utf8_to_ucn (buffer, name + i) - 1;
2758 	      fwrite (buffer, 1, 10, fp);
2759 	    }
2760 	  else
2761 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
2762       }
2763       break;
2764 
2765     case SPELL_LITERAL:
2766       fwrite (token->val.str.text, 1, token->val.str.len, fp);
2767       break;
2768 
2769     case SPELL_NONE:
2770       /* An error, most probably.  */
2771       break;
2772     }
2773 }
2774 
2775 /* Compare two tokens.  */
2776 int
_cpp_equiv_tokens(const cpp_token * a,const cpp_token * b)2777 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
2778 {
2779   if (a->type == b->type && a->flags == b->flags)
2780     switch (TOKEN_SPELL (a))
2781       {
2782       default:			/* Keep compiler happy.  */
2783       case SPELL_OPERATOR:
2784 	/* token_no is used to track where multiple consecutive ##
2785 	   tokens were originally located.  */
2786 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
2787       case SPELL_NONE:
2788 	return (a->type != CPP_MACRO_ARG
2789 		|| a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
2790       case SPELL_IDENT:
2791 	return a->val.node.node == b->val.node.node;
2792       case SPELL_LITERAL:
2793 	return (a->val.str.len == b->val.str.len
2794 		&& !memcmp (a->val.str.text, b->val.str.text,
2795 			    a->val.str.len));
2796       }
2797 
2798   return 0;
2799 }
2800 
2801 /* Returns nonzero if a space should be inserted to avoid an
2802    accidental token paste for output.  For simplicity, it is
2803    conservative, and occasionally advises a space where one is not
2804    needed, e.g. "." and ".2".  */
2805 int
cpp_avoid_paste(cpp_reader * pfile,const cpp_token * token1,const cpp_token * token2)2806 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2807 		 const cpp_token *token2)
2808 {
2809   enum cpp_ttype a = token1->type, b = token2->type;
2810   cppchar_t c;
2811 
2812   if (token1->flags & NAMED_OP)
2813     a = CPP_NAME;
2814   if (token2->flags & NAMED_OP)
2815     b = CPP_NAME;
2816 
2817   c = EOF;
2818   if (token2->flags & DIGRAPH)
2819     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
2820   else if (token_spellings[b].category == SPELL_OPERATOR)
2821     c = token_spellings[b].name[0];
2822 
2823   /* Quickly get everything that can paste with an '='.  */
2824   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
2825     return 1;
2826 
2827   switch (a)
2828     {
2829     case CPP_GREATER:	return c == '>';
2830     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
2831     case CPP_PLUS:	return c == '+';
2832     case CPP_MINUS:	return c == '-' || c == '>';
2833     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
2834     case CPP_MOD:	return c == ':' || c == '>';
2835     case CPP_AND:	return c == '&';
2836     case CPP_OR:	return c == '|';
2837     case CPP_COLON:	return c == ':' || c == '>';
2838     case CPP_DEREF:	return c == '*';
2839     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
2840     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
2841     case CPP_NAME:	return ((b == CPP_NUMBER
2842 				 && name_p (pfile, &token2->val.str))
2843 				|| b == CPP_NAME
2844 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
2845     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
2846 				|| c == '.' || c == '+' || c == '-');
2847 				      /* UCNs */
2848     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
2849 				 && b == CPP_NAME)
2850 				|| (CPP_OPTION (pfile, objc)
2851 				    && token1->val.str.text[0] == '@'
2852 				    && (b == CPP_NAME || b == CPP_STRING)));
2853     default:		break;
2854     }
2855 
2856   return 0;
2857 }
2858 
2859 /* Output all the remaining tokens on the current line, and a newline
2860    character, to FP.  Leading whitespace is removed.  If there are
2861    macros, special token padding is not performed.  */
2862 void
cpp_output_line(cpp_reader * pfile,FILE * fp)2863 cpp_output_line (cpp_reader *pfile, FILE *fp)
2864 {
2865   const cpp_token *token;
2866 
2867   token = cpp_get_token (pfile);
2868   while (token->type != CPP_EOF)
2869     {
2870       cpp_output_token (token, fp);
2871       token = cpp_get_token (pfile);
2872       if (token->flags & PREV_WHITE)
2873 	putc (' ', fp);
2874     }
2875 
2876   putc ('\n', fp);
2877 }
2878 
2879 /* Return a string representation of all the remaining tokens on the
2880    current line.  The result is allocated using xmalloc and must be
2881    freed by the caller.  */
2882 unsigned char *
cpp_output_line_to_string(cpp_reader * pfile,const unsigned char * dir_name)2883 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
2884 {
2885   const cpp_token *token;
2886   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
2887   unsigned int alloced = 120 + out;
2888   unsigned char *result = (unsigned char *) xmalloc (alloced);
2889 
2890   /* If DIR_NAME is empty, there are no initial contents.  */
2891   if (dir_name)
2892     {
2893       sprintf ((char *) result, "#%s ", dir_name);
2894       out += 2;
2895     }
2896 
2897   token = cpp_get_token (pfile);
2898   while (token->type != CPP_EOF)
2899     {
2900       unsigned char *last;
2901       /* Include room for a possible space and the terminating nul.  */
2902       unsigned int len = cpp_token_len (token) + 2;
2903 
2904       if (out + len > alloced)
2905 	{
2906 	  alloced *= 2;
2907 	  if (out + len > alloced)
2908 	    alloced = out + len;
2909 	  result = (unsigned char *) xrealloc (result, alloced);
2910 	}
2911 
2912       last = cpp_spell_token (pfile, token, &result[out], 0);
2913       out = last - result;
2914 
2915       token = cpp_get_token (pfile);
2916       if (token->flags & PREV_WHITE)
2917 	result[out++] = ' ';
2918     }
2919 
2920   result[out] = '\0';
2921   return result;
2922 }
2923 
2924 /* Memory buffers.  Changing these three constants can have a dramatic
2925    effect on performance.  The values here are reasonable defaults,
2926    but might be tuned.  If you adjust them, be sure to test across a
2927    range of uses of cpplib, including heavy nested function-like macro
2928    expansion.  Also check the change in peak memory usage (NJAMD is a
2929    good tool for this).  */
2930 #define MIN_BUFF_SIZE 8000
2931 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2932 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2933 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2934 
2935 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2936   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2937 #endif
2938 
2939 /* Create a new allocation buffer.  Place the control block at the end
2940    of the buffer, so that buffer overflows will cause immediate chaos.  */
2941 static _cpp_buff *
new_buff(size_t len)2942 new_buff (size_t len)
2943 {
2944   _cpp_buff *result;
2945   unsigned char *base;
2946 
2947   if (len < MIN_BUFF_SIZE)
2948     len = MIN_BUFF_SIZE;
2949   len = CPP_ALIGN (len);
2950 
2951 #ifdef ENABLE_VALGRIND_CHECKING
2952   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
2953      struct first.  */
2954   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
2955   base = XNEWVEC (unsigned char, len + slen);
2956   result = (_cpp_buff *) base;
2957   base += slen;
2958 #else
2959   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2960   result = (_cpp_buff *) (base + len);
2961 #endif
2962   result->base = base;
2963   result->cur = base;
2964   result->limit = base + len;
2965   result->next = NULL;
2966   return result;
2967 }
2968 
2969 /* Place a chain of unwanted allocation buffers on the free list.  */
2970 void
_cpp_release_buff(cpp_reader * pfile,_cpp_buff * buff)2971 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2972 {
2973   _cpp_buff *end = buff;
2974 
2975   while (end->next)
2976     end = end->next;
2977   end->next = pfile->free_buffs;
2978   pfile->free_buffs = buff;
2979 }
2980 
2981 /* Return a free buffer of size at least MIN_SIZE.  */
2982 _cpp_buff *
_cpp_get_buff(cpp_reader * pfile,size_t min_size)2983 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2984 {
2985   _cpp_buff *result, **p;
2986 
2987   for (p = &pfile->free_buffs;; p = &(*p)->next)
2988     {
2989       size_t size;
2990 
2991       if (*p == NULL)
2992 	return new_buff (min_size);
2993       result = *p;
2994       size = result->limit - result->base;
2995       /* Return a buffer that's big enough, but don't waste one that's
2996          way too big.  */
2997       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2998 	break;
2999     }
3000 
3001   *p = result->next;
3002   result->next = NULL;
3003   result->cur = result->base;
3004   return result;
3005 }
3006 
3007 /* Creates a new buffer with enough space to hold the uncommitted
3008    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
3009    the excess bytes to the new buffer.  Chains the new buffer after
3010    BUFF, and returns the new buffer.  */
3011 _cpp_buff *
_cpp_append_extend_buff(cpp_reader * pfile,_cpp_buff * buff,size_t min_extra)3012 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
3013 {
3014   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
3015   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
3016 
3017   buff->next = new_buff;
3018   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3019   return new_buff;
3020 }
3021 
3022 /* Creates a new buffer with enough space to hold the uncommitted
3023    remaining bytes of the buffer pointed to by BUFF, and at least
3024    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
3025    Chains the new buffer before the buffer pointed to by BUFF, and
3026    updates the pointer to point to the new buffer.  */
3027 void
_cpp_extend_buff(cpp_reader * pfile,_cpp_buff ** pbuff,size_t min_extra)3028 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
3029 {
3030   _cpp_buff *new_buff, *old_buff = *pbuff;
3031   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3032 
3033   new_buff = _cpp_get_buff (pfile, size);
3034   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3035   new_buff->next = old_buff;
3036   *pbuff = new_buff;
3037 }
3038 
3039 /* Free a chain of buffers starting at BUFF.  */
3040 void
_cpp_free_buff(_cpp_buff * buff)3041 _cpp_free_buff (_cpp_buff *buff)
3042 {
3043   _cpp_buff *next;
3044 
3045   for (; buff; buff = next)
3046     {
3047       next = buff->next;
3048 #ifdef ENABLE_VALGRIND_CHECKING
3049       free (buff);
3050 #else
3051       free (buff->base);
3052 #endif
3053     }
3054 }
3055 
3056 /* Allocate permanent, unaligned storage of length LEN.  */
3057 unsigned char *
_cpp_unaligned_alloc(cpp_reader * pfile,size_t len)3058 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
3059 {
3060   _cpp_buff *buff = pfile->u_buff;
3061   unsigned char *result = buff->cur;
3062 
3063   if (len > (size_t) (buff->limit - result))
3064     {
3065       buff = _cpp_get_buff (pfile, len);
3066       buff->next = pfile->u_buff;
3067       pfile->u_buff = buff;
3068       result = buff->cur;
3069     }
3070 
3071   buff->cur = result + len;
3072   return result;
3073 }
3074 
3075 /* Allocate permanent, unaligned storage of length LEN from a_buff.
3076    That buffer is used for growing allocations when saving macro
3077    replacement lists in a #define, and when parsing an answer to an
3078    assertion in #assert, #unassert or #if (and therefore possibly
3079    whilst expanding macros).  It therefore must not be used by any
3080    code that they might call: specifically the lexer and the guts of
3081    the macro expander.
3082 
3083    All existing other uses clearly fit this restriction: storing
3084    registered pragmas during initialization.  */
3085 unsigned char *
_cpp_aligned_alloc(cpp_reader * pfile,size_t len)3086 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
3087 {
3088   _cpp_buff *buff = pfile->a_buff;
3089   unsigned char *result = buff->cur;
3090 
3091   if (len > (size_t) (buff->limit - result))
3092     {
3093       buff = _cpp_get_buff (pfile, len);
3094       buff->next = pfile->a_buff;
3095       pfile->a_buff = buff;
3096       result = buff->cur;
3097     }
3098 
3099   buff->cur = result + len;
3100   return result;
3101 }
3102 
3103 /* Say which field of TOK is in use.  */
3104 
3105 enum cpp_token_fld_kind
cpp_token_val_index(cpp_token * tok)3106 cpp_token_val_index (cpp_token *tok)
3107 {
3108   switch (TOKEN_SPELL (tok))
3109     {
3110     case SPELL_IDENT:
3111       return CPP_TOKEN_FLD_NODE;
3112     case SPELL_LITERAL:
3113       return CPP_TOKEN_FLD_STR;
3114     case SPELL_OPERATOR:
3115       if (tok->type == CPP_PASTE)
3116 	return CPP_TOKEN_FLD_TOKEN_NO;
3117       else
3118 	return CPP_TOKEN_FLD_NONE;
3119     case SPELL_NONE:
3120       if (tok->type == CPP_MACRO_ARG)
3121 	return CPP_TOKEN_FLD_ARG_NO;
3122       else if (tok->type == CPP_PADDING)
3123 	return CPP_TOKEN_FLD_SOURCE;
3124       else if (tok->type == CPP_PRAGMA)
3125 	return CPP_TOKEN_FLD_PRAGMA;
3126       /* else fall through */
3127     default:
3128       return CPP_TOKEN_FLD_NONE;
3129     }
3130 }
3131 
3132 /* All tokens lexed in R after calling this function will be forced to have
3133    their source_location the same as the location referenced by P, until
3134    cpp_stop_forcing_token_locations is called for R.  */
3135 
3136 void
cpp_force_token_locations(cpp_reader * r,source_location * p)3137 cpp_force_token_locations (cpp_reader *r, source_location *p)
3138 {
3139   r->forced_token_location_p = p;
3140 }
3141 
3142 /* Go back to assigning locations naturally for lexed tokens.  */
3143 
3144 void
cpp_stop_forcing_token_locations(cpp_reader * r)3145 cpp_stop_forcing_token_locations (cpp_reader *r)
3146 {
3147   r->forced_token_location_p = NULL;
3148 }
3149