xref: /openbsd/gnu/usr.bin/gcc/gcc/cpplex.c (revision dd6081ec)
1*c87b03e5Sespie /* CPP Library - lexical analysis.
2*c87b03e5Sespie    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3*c87b03e5Sespie    Contributed by Per Bothner, 1994-95.
4*c87b03e5Sespie    Based on CCCP program by Paul Rubin, June 1986
5*c87b03e5Sespie    Adapted to ANSI C, Richard Stallman, Jan 1987
6*c87b03e5Sespie    Broken out to separate file, Zack Weinberg, Mar 2000
7*c87b03e5Sespie    Single-pass line tokenization by Neil Booth, April 2000
8*c87b03e5Sespie 
9*c87b03e5Sespie This program is free software; you can redistribute it and/or modify it
10*c87b03e5Sespie under the terms of the GNU General Public License as published by the
11*c87b03e5Sespie Free Software Foundation; either version 2, or (at your option) any
12*c87b03e5Sespie later version.
13*c87b03e5Sespie 
14*c87b03e5Sespie This program is distributed in the hope that it will be useful,
15*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of
16*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17*c87b03e5Sespie GNU General Public License for more details.
18*c87b03e5Sespie 
19*c87b03e5Sespie You should have received a copy of the GNU General Public License
20*c87b03e5Sespie along with this program; if not, write to the Free Software
21*c87b03e5Sespie Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22*c87b03e5Sespie 
23*c87b03e5Sespie #include "config.h"
24*c87b03e5Sespie #include "system.h"
25*c87b03e5Sespie #include "cpplib.h"
26*c87b03e5Sespie #include "cpphash.h"
27*c87b03e5Sespie 
28*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
29*c87b03e5Sespie #include "mbchar.h"
30*c87b03e5Sespie #include <locale.h>
31*c87b03e5Sespie #endif
32*c87b03e5Sespie 
33*c87b03e5Sespie /* Tokens with SPELL_STRING store their spelling in the token list,
34*c87b03e5Sespie    and it's length in the token->val.name.len.  */
35*c87b03e5Sespie enum spell_type
36*c87b03e5Sespie {
37*c87b03e5Sespie   SPELL_OPERATOR = 0,
38*c87b03e5Sespie   SPELL_CHAR,
39*c87b03e5Sespie   SPELL_IDENT,
40*c87b03e5Sespie   SPELL_NUMBER,
41*c87b03e5Sespie   SPELL_STRING,
42*c87b03e5Sespie   SPELL_NONE
43*c87b03e5Sespie };
44*c87b03e5Sespie 
45*c87b03e5Sespie struct token_spelling
46*c87b03e5Sespie {
47*c87b03e5Sespie   enum spell_type category;
48*c87b03e5Sespie   const unsigned char *name;
49*c87b03e5Sespie };
50*c87b03e5Sespie 
51*c87b03e5Sespie static const unsigned char *const digraph_spellings[] =
52*c87b03e5Sespie { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
53*c87b03e5Sespie 
54*c87b03e5Sespie #define OP(e, s) { SPELL_OPERATOR, U s           },
55*c87b03e5Sespie #define TK(e, s) { s,              U STRINGX (e) },
56*c87b03e5Sespie static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
57*c87b03e5Sespie #undef OP
58*c87b03e5Sespie #undef TK
59*c87b03e5Sespie 
60*c87b03e5Sespie #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61*c87b03e5Sespie #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
62*c87b03e5Sespie #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
63*c87b03e5Sespie 
64*c87b03e5Sespie static void handle_newline PARAMS ((cpp_reader *));
65*c87b03e5Sespie static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
66*c87b03e5Sespie static cppchar_t get_effective_char PARAMS ((cpp_reader *));
67*c87b03e5Sespie 
68*c87b03e5Sespie static int skip_block_comment PARAMS ((cpp_reader *));
69*c87b03e5Sespie static int skip_line_comment PARAMS ((cpp_reader *));
70*c87b03e5Sespie static void adjust_column PARAMS ((cpp_reader *));
71*c87b03e5Sespie static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
72*c87b03e5Sespie static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
73*c87b03e5Sespie static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
74*c87b03e5Sespie 				  unsigned int *));
75*c87b03e5Sespie static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
76*c87b03e5Sespie static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
77*c87b03e5Sespie static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
78*c87b03e5Sespie static bool trigraph_p PARAMS ((cpp_reader *));
79*c87b03e5Sespie static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
80*c87b03e5Sespie 				  cppchar_t));
81*c87b03e5Sespie static bool continue_after_nul PARAMS ((cpp_reader *));
82*c87b03e5Sespie static int name_p PARAMS ((cpp_reader *, const cpp_string *));
83*c87b03e5Sespie static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
84*c87b03e5Sespie 				   const unsigned char *, cppchar_t *));
85*c87b03e5Sespie static tokenrun *next_tokenrun PARAMS ((tokenrun *));
86*c87b03e5Sespie 
87*c87b03e5Sespie static unsigned int hex_digit_value PARAMS ((unsigned int));
88*c87b03e5Sespie static _cpp_buff *new_buff PARAMS ((size_t));
89*c87b03e5Sespie 
90*c87b03e5Sespie /* Utility routine:
91*c87b03e5Sespie 
92*c87b03e5Sespie    Compares, the token TOKEN to the NUL-terminated string STRING.
93*c87b03e5Sespie    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
94*c87b03e5Sespie int
cpp_ideq(token,string)95*c87b03e5Sespie cpp_ideq (token, string)
96*c87b03e5Sespie      const cpp_token *token;
97*c87b03e5Sespie      const char *string;
98*c87b03e5Sespie {
99*c87b03e5Sespie   if (token->type != CPP_NAME)
100*c87b03e5Sespie     return 0;
101*c87b03e5Sespie 
102*c87b03e5Sespie   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
103*c87b03e5Sespie }
104*c87b03e5Sespie 
105*c87b03e5Sespie /* Call when meeting a newline, assumed to be in buffer->cur[-1].
106*c87b03e5Sespie    Returns with buffer->cur pointing to the character immediately
107*c87b03e5Sespie    following the newline (combination).  */
108*c87b03e5Sespie static void
handle_newline(pfile)109*c87b03e5Sespie handle_newline (pfile)
110*c87b03e5Sespie      cpp_reader *pfile;
111*c87b03e5Sespie {
112*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
113*c87b03e5Sespie 
114*c87b03e5Sespie   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
115*c87b03e5Sespie      only accept CR-LF; maybe we should fall back to that behavior?  */
116*c87b03e5Sespie   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
117*c87b03e5Sespie     buffer->cur++;
118*c87b03e5Sespie 
119*c87b03e5Sespie   buffer->line_base = buffer->cur;
120*c87b03e5Sespie   buffer->col_adjust = 0;
121*c87b03e5Sespie   pfile->line++;
122*c87b03e5Sespie }
123*c87b03e5Sespie 
124*c87b03e5Sespie /* Subroutine of skip_escaped_newlines; called when a 3-character
125*c87b03e5Sespie    sequence beginning with "??" is encountered.  buffer->cur points to
126*c87b03e5Sespie    the second '?'.
127*c87b03e5Sespie 
128*c87b03e5Sespie    Warn if necessary, and returns true if the sequence forms a
129*c87b03e5Sespie    trigraph and the trigraph should be honored.  */
130*c87b03e5Sespie static bool
trigraph_p(pfile)131*c87b03e5Sespie trigraph_p (pfile)
132*c87b03e5Sespie      cpp_reader *pfile;
133*c87b03e5Sespie {
134*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
135*c87b03e5Sespie   cppchar_t from_char = buffer->cur[1];
136*c87b03e5Sespie   bool accept;
137*c87b03e5Sespie 
138*c87b03e5Sespie   if (!_cpp_trigraph_map[from_char])
139*c87b03e5Sespie     return false;
140*c87b03e5Sespie 
141*c87b03e5Sespie   accept = CPP_OPTION (pfile, trigraphs);
142*c87b03e5Sespie 
143*c87b03e5Sespie   /* Don't warn about trigraphs in comments.  */
144*c87b03e5Sespie   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
145*c87b03e5Sespie     {
146*c87b03e5Sespie       if (accept)
147*c87b03e5Sespie 	cpp_error_with_line (pfile, DL_WARNING,
148*c87b03e5Sespie 			     pfile->line, CPP_BUF_COL (buffer) - 1,
149*c87b03e5Sespie 			     "trigraph ??%c converted to %c",
150*c87b03e5Sespie 			     (int) from_char,
151*c87b03e5Sespie 			     (int) _cpp_trigraph_map[from_char]);
152*c87b03e5Sespie       else if (buffer->cur != buffer->last_Wtrigraphs)
153*c87b03e5Sespie 	{
154*c87b03e5Sespie 	  buffer->last_Wtrigraphs = buffer->cur;
155*c87b03e5Sespie 	  cpp_error_with_line (pfile, DL_WARNING,
156*c87b03e5Sespie 			       pfile->line, CPP_BUF_COL (buffer) - 1,
157*c87b03e5Sespie 			       "trigraph ??%c ignored", (int) from_char);
158*c87b03e5Sespie 	}
159*c87b03e5Sespie     }
160*c87b03e5Sespie 
161*c87b03e5Sespie   return accept;
162*c87b03e5Sespie }
163*c87b03e5Sespie 
164*c87b03e5Sespie /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
165*c87b03e5Sespie    lie in buffer->cur[-1].  Returns the next byte, which will be in
166*c87b03e5Sespie    buffer->cur[-1].  This routine performs preprocessing stages 1 and
167*c87b03e5Sespie    2 of the ISO C standard.  */
168*c87b03e5Sespie static cppchar_t
skip_escaped_newlines(pfile)169*c87b03e5Sespie skip_escaped_newlines (pfile)
170*c87b03e5Sespie      cpp_reader *pfile;
171*c87b03e5Sespie {
172*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
173*c87b03e5Sespie   cppchar_t next = buffer->cur[-1];
174*c87b03e5Sespie 
175*c87b03e5Sespie   /* Only do this if we apply stages 1 and 2.  */
176*c87b03e5Sespie   if (!buffer->from_stage3)
177*c87b03e5Sespie     {
178*c87b03e5Sespie       const unsigned char *saved_cur;
179*c87b03e5Sespie       cppchar_t next1;
180*c87b03e5Sespie 
181*c87b03e5Sespie       do
182*c87b03e5Sespie 	{
183*c87b03e5Sespie 	  if (next == '?')
184*c87b03e5Sespie 	    {
185*c87b03e5Sespie 	      if (buffer->cur[0] != '?' || !trigraph_p (pfile))
186*c87b03e5Sespie 		break;
187*c87b03e5Sespie 
188*c87b03e5Sespie 	      /* Translate the trigraph.  */
189*c87b03e5Sespie 	      next = _cpp_trigraph_map[buffer->cur[1]];
190*c87b03e5Sespie 	      buffer->cur += 2;
191*c87b03e5Sespie 	      if (next != '\\')
192*c87b03e5Sespie 		break;
193*c87b03e5Sespie 	    }
194*c87b03e5Sespie 
195*c87b03e5Sespie 	  if (buffer->cur == buffer->rlimit)
196*c87b03e5Sespie 	    break;
197*c87b03e5Sespie 
198*c87b03e5Sespie 	  /* We have a backslash, and room for at least one more
199*c87b03e5Sespie 	     character.  Skip horizontal whitespace.  */
200*c87b03e5Sespie 	  saved_cur = buffer->cur;
201*c87b03e5Sespie 	  do
202*c87b03e5Sespie 	    next1 = *buffer->cur++;
203*c87b03e5Sespie 	  while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
204*c87b03e5Sespie 
205*c87b03e5Sespie 	  if (!is_vspace (next1))
206*c87b03e5Sespie 	    {
207*c87b03e5Sespie 	      buffer->cur = saved_cur;
208*c87b03e5Sespie 	      break;
209*c87b03e5Sespie 	    }
210*c87b03e5Sespie 
211*c87b03e5Sespie 	  if (saved_cur != buffer->cur - 1
212*c87b03e5Sespie 	      && !pfile->state.lexing_comment)
213*c87b03e5Sespie 	    cpp_error (pfile, DL_WARNING,
214*c87b03e5Sespie 		       "backslash and newline separated by space");
215*c87b03e5Sespie 
216*c87b03e5Sespie 	  handle_newline (pfile);
217*c87b03e5Sespie 	  buffer->backup_to = buffer->cur;
218*c87b03e5Sespie 	  if (buffer->cur == buffer->rlimit)
219*c87b03e5Sespie 	    {
220*c87b03e5Sespie 	      cpp_error (pfile, DL_PEDWARN,
221*c87b03e5Sespie 			 "backslash-newline at end of file");
222*c87b03e5Sespie 	      next = EOF;
223*c87b03e5Sespie 	    }
224*c87b03e5Sespie 	  else
225*c87b03e5Sespie 	    next = *buffer->cur++;
226*c87b03e5Sespie 	}
227*c87b03e5Sespie       while (next == '\\' || next == '?');
228*c87b03e5Sespie     }
229*c87b03e5Sespie 
230*c87b03e5Sespie   return next;
231*c87b03e5Sespie }
232*c87b03e5Sespie 
233*c87b03e5Sespie /* Obtain the next character, after trigraph conversion and skipping
234*c87b03e5Sespie    an arbitrarily long string of escaped newlines.  The common case of
235*c87b03e5Sespie    no trigraphs or escaped newlines falls through quickly.  On return,
236*c87b03e5Sespie    buffer->backup_to points to where to return to if the character is
237*c87b03e5Sespie    not to be processed.  */
238*c87b03e5Sespie static cppchar_t
get_effective_char(pfile)239*c87b03e5Sespie get_effective_char (pfile)
240*c87b03e5Sespie      cpp_reader *pfile;
241*c87b03e5Sespie {
242*c87b03e5Sespie   cppchar_t next;
243*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
244*c87b03e5Sespie 
245*c87b03e5Sespie   buffer->backup_to = buffer->cur;
246*c87b03e5Sespie   next = *buffer->cur++;
247*c87b03e5Sespie   if (__builtin_expect (next == '?' || next == '\\', 0))
248*c87b03e5Sespie     next = skip_escaped_newlines (pfile);
249*c87b03e5Sespie 
250*c87b03e5Sespie   return next;
251*c87b03e5Sespie }
252*c87b03e5Sespie 
253*c87b03e5Sespie /* Skip a C-style block comment.  We find the end of the comment by
254*c87b03e5Sespie    seeing if an asterisk is before every '/' we encounter.  Returns
255*c87b03e5Sespie    nonzero if comment terminated by EOF, zero otherwise.  */
256*c87b03e5Sespie static int
skip_block_comment(pfile)257*c87b03e5Sespie skip_block_comment (pfile)
258*c87b03e5Sespie      cpp_reader *pfile;
259*c87b03e5Sespie {
260*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
261*c87b03e5Sespie   cppchar_t c = EOF, prevc = EOF;
262*c87b03e5Sespie 
263*c87b03e5Sespie   pfile->state.lexing_comment = 1;
264*c87b03e5Sespie   while (buffer->cur != buffer->rlimit)
265*c87b03e5Sespie     {
266*c87b03e5Sespie       prevc = c, c = *buffer->cur++;
267*c87b03e5Sespie 
268*c87b03e5Sespie       /* FIXME: For speed, create a new character class of characters
269*c87b03e5Sespie 	 of interest inside block comments.  */
270*c87b03e5Sespie       if (c == '?' || c == '\\')
271*c87b03e5Sespie 	c = skip_escaped_newlines (pfile);
272*c87b03e5Sespie 
273*c87b03e5Sespie       /* People like decorating comments with '*', so check for '/'
274*c87b03e5Sespie 	 instead for efficiency.  */
275*c87b03e5Sespie       if (c == '/')
276*c87b03e5Sespie 	{
277*c87b03e5Sespie 	  if (prevc == '*')
278*c87b03e5Sespie 	    break;
279*c87b03e5Sespie 
280*c87b03e5Sespie 	  /* Warn about potential nested comments, but not if the '/'
281*c87b03e5Sespie 	     comes immediately before the true comment delimiter.
282*c87b03e5Sespie 	     Don't bother to get it right across escaped newlines.  */
283*c87b03e5Sespie 	  if (CPP_OPTION (pfile, warn_comments)
284*c87b03e5Sespie 	      && buffer->cur[0] == '*' && buffer->cur[1] != '/')
285*c87b03e5Sespie 	    cpp_error_with_line (pfile, DL_WARNING,
286*c87b03e5Sespie 				 pfile->line, CPP_BUF_COL (buffer),
287*c87b03e5Sespie 				 "\"/*\" within comment");
288*c87b03e5Sespie 	}
289*c87b03e5Sespie       else if (is_vspace (c))
290*c87b03e5Sespie 	handle_newline (pfile);
291*c87b03e5Sespie       else if (c == '\t')
292*c87b03e5Sespie 	adjust_column (pfile);
293*c87b03e5Sespie     }
294*c87b03e5Sespie 
295*c87b03e5Sespie   pfile->state.lexing_comment = 0;
296*c87b03e5Sespie   return c != '/' || prevc != '*';
297*c87b03e5Sespie }
298*c87b03e5Sespie 
299*c87b03e5Sespie /* Skip a C++ line comment, leaving buffer->cur pointing to the
300*c87b03e5Sespie    terminating newline.  Handles escaped newlines.  Returns nonzero
301*c87b03e5Sespie    if a multiline comment.  */
302*c87b03e5Sespie static int
skip_line_comment(pfile)303*c87b03e5Sespie skip_line_comment (pfile)
304*c87b03e5Sespie      cpp_reader *pfile;
305*c87b03e5Sespie {
306*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
307*c87b03e5Sespie   unsigned int orig_line = pfile->line;
308*c87b03e5Sespie   cppchar_t c;
309*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
310*c87b03e5Sespie   wchar_t wc;
311*c87b03e5Sespie   int char_len;
312*c87b03e5Sespie #endif
313*c87b03e5Sespie 
314*c87b03e5Sespie   pfile->state.lexing_comment = 1;
315*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
316*c87b03e5Sespie   /* Reset multibyte conversion state.  */
317*c87b03e5Sespie   (void) local_mbtowc (NULL, NULL, 0);
318*c87b03e5Sespie #endif
319*c87b03e5Sespie   do
320*c87b03e5Sespie     {
321*c87b03e5Sespie       if (buffer->cur == buffer->rlimit)
322*c87b03e5Sespie 	goto at_eof;
323*c87b03e5Sespie 
324*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
325*c87b03e5Sespie       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
326*c87b03e5Sespie 			       buffer->rlimit - buffer->cur);
327*c87b03e5Sespie       if (char_len == -1)
328*c87b03e5Sespie 	{
329*c87b03e5Sespie 	  cpp_error (pfile, DL_WARNING,
330*c87b03e5Sespie 		     "ignoring invalid multibyte character");
331*c87b03e5Sespie 	  char_len = 1;
332*c87b03e5Sespie 	  c = *buffer->cur++;
333*c87b03e5Sespie 	}
334*c87b03e5Sespie       else
335*c87b03e5Sespie 	{
336*c87b03e5Sespie 	  buffer->cur += char_len;
337*c87b03e5Sespie 	  c = wc;
338*c87b03e5Sespie 	}
339*c87b03e5Sespie #else
340*c87b03e5Sespie       c = *buffer->cur++;
341*c87b03e5Sespie #endif
342*c87b03e5Sespie       if (c == '?' || c == '\\')
343*c87b03e5Sespie 	c = skip_escaped_newlines (pfile);
344*c87b03e5Sespie     }
345*c87b03e5Sespie   while (!is_vspace (c));
346*c87b03e5Sespie 
347*c87b03e5Sespie   /* Step back over the newline, except at EOF.  */
348*c87b03e5Sespie   buffer->cur--;
349*c87b03e5Sespie  at_eof:
350*c87b03e5Sespie 
351*c87b03e5Sespie   pfile->state.lexing_comment = 0;
352*c87b03e5Sespie   return orig_line != pfile->line;
353*c87b03e5Sespie }
354*c87b03e5Sespie 
355*c87b03e5Sespie /* pfile->buffer->cur is one beyond the \t character.  Update
356*c87b03e5Sespie    col_adjust so we track the column correctly.  */
357*c87b03e5Sespie static void
adjust_column(pfile)358*c87b03e5Sespie adjust_column (pfile)
359*c87b03e5Sespie      cpp_reader *pfile;
360*c87b03e5Sespie {
361*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
362*c87b03e5Sespie   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
363*c87b03e5Sespie 
364*c87b03e5Sespie   /* Round it up to multiple of the tabstop, but subtract 1 since the
365*c87b03e5Sespie      tab itself occupies a character position.  */
366*c87b03e5Sespie   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
367*c87b03e5Sespie 			 - col % CPP_OPTION (pfile, tabstop)) - 1;
368*c87b03e5Sespie }
369*c87b03e5Sespie 
370*c87b03e5Sespie /* Skips whitespace, saving the next non-whitespace character.
371*c87b03e5Sespie    Adjusts pfile->col_adjust to account for tabs.  Without this,
372*c87b03e5Sespie    tokens might be assigned an incorrect column.  */
373*c87b03e5Sespie static int
skip_whitespace(pfile,c)374*c87b03e5Sespie skip_whitespace (pfile, c)
375*c87b03e5Sespie      cpp_reader *pfile;
376*c87b03e5Sespie      cppchar_t c;
377*c87b03e5Sespie {
378*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
379*c87b03e5Sespie   unsigned int warned = 0;
380*c87b03e5Sespie 
381*c87b03e5Sespie   do
382*c87b03e5Sespie     {
383*c87b03e5Sespie       /* Horizontal space always OK.  */
384*c87b03e5Sespie       if (c == ' ')
385*c87b03e5Sespie 	;
386*c87b03e5Sespie       else if (c == '\t')
387*c87b03e5Sespie 	adjust_column (pfile);
388*c87b03e5Sespie       /* Just \f \v or \0 left.  */
389*c87b03e5Sespie       else if (c == '\0')
390*c87b03e5Sespie 	{
391*c87b03e5Sespie 	  if (buffer->cur - 1 == buffer->rlimit)
392*c87b03e5Sespie 	    return 0;
393*c87b03e5Sespie 	  if (!warned)
394*c87b03e5Sespie 	    {
395*c87b03e5Sespie 	      cpp_error (pfile, DL_WARNING, "null character(s) ignored");
396*c87b03e5Sespie 	      warned = 1;
397*c87b03e5Sespie 	    }
398*c87b03e5Sespie 	}
399*c87b03e5Sespie       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400*c87b03e5Sespie 	cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
401*c87b03e5Sespie 			     CPP_BUF_COL (buffer),
402*c87b03e5Sespie 			     "%s in preprocessing directive",
403*c87b03e5Sespie 			     c == '\f' ? "form feed" : "vertical tab");
404*c87b03e5Sespie 
405*c87b03e5Sespie       c = *buffer->cur++;
406*c87b03e5Sespie     }
407*c87b03e5Sespie   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
408*c87b03e5Sespie   while (is_nvspace (c));
409*c87b03e5Sespie 
410*c87b03e5Sespie   buffer->cur--;
411*c87b03e5Sespie   return 1;
412*c87b03e5Sespie }
413*c87b03e5Sespie 
414*c87b03e5Sespie /* See if the characters of a number token are valid in a name (no
415*c87b03e5Sespie    '.', '+' or '-').  */
416*c87b03e5Sespie static int
name_p(pfile,string)417*c87b03e5Sespie name_p (pfile, string)
418*c87b03e5Sespie      cpp_reader *pfile;
419*c87b03e5Sespie      const cpp_string *string;
420*c87b03e5Sespie {
421*c87b03e5Sespie   unsigned int i;
422*c87b03e5Sespie 
423*c87b03e5Sespie   for (i = 0; i < string->len; i++)
424*c87b03e5Sespie     if (!is_idchar (string->text[i]))
425*c87b03e5Sespie       return 0;
426*c87b03e5Sespie 
427*c87b03e5Sespie   return 1;
428*c87b03e5Sespie }
429*c87b03e5Sespie 
430*c87b03e5Sespie /* Parse an identifier, skipping embedded backslash-newlines.  This is
431*c87b03e5Sespie    a critical inner loop.  The common case is an identifier which has
432*c87b03e5Sespie    not been split by backslash-newline, does not contain a dollar
433*c87b03e5Sespie    sign, and has already been scanned (roughly 10:1 ratio of
434*c87b03e5Sespie    seen:unseen identifiers in normal code; the distribution is
435*c87b03e5Sespie    Poisson-like).  Second most common case is a new identifier, not
436*c87b03e5Sespie    split and no dollar sign.  The other possibilities are rare and
437*c87b03e5Sespie    have been relegated to parse_slow.  */
438*c87b03e5Sespie static cpp_hashnode *
parse_identifier(pfile)439*c87b03e5Sespie parse_identifier (pfile)
440*c87b03e5Sespie      cpp_reader *pfile;
441*c87b03e5Sespie {
442*c87b03e5Sespie   cpp_hashnode *result;
443*c87b03e5Sespie   const uchar *cur, *base;
444*c87b03e5Sespie 
445*c87b03e5Sespie   /* Fast-path loop.  Skim over a normal identifier.
446*c87b03e5Sespie      N.B. ISIDNUM does not include $.  */
447*c87b03e5Sespie   cur = pfile->buffer->cur;
448*c87b03e5Sespie   while (ISIDNUM (*cur))
449*c87b03e5Sespie     cur++;
450*c87b03e5Sespie 
451*c87b03e5Sespie   /* Check for slow-path cases.  */
452*c87b03e5Sespie   if (*cur == '?' || *cur == '\\' || *cur == '$')
453*c87b03e5Sespie     {
454*c87b03e5Sespie       unsigned int len;
455*c87b03e5Sespie 
456*c87b03e5Sespie       base = parse_slow (pfile, cur, 0, &len);
457*c87b03e5Sespie       result = (cpp_hashnode *)
458*c87b03e5Sespie 	ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
459*c87b03e5Sespie     }
460*c87b03e5Sespie   else
461*c87b03e5Sespie     {
462*c87b03e5Sespie       base = pfile->buffer->cur - 1;
463*c87b03e5Sespie       pfile->buffer->cur = cur;
464*c87b03e5Sespie       result = (cpp_hashnode *)
465*c87b03e5Sespie 	ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
466*c87b03e5Sespie     }
467*c87b03e5Sespie 
468*c87b03e5Sespie   /* Rarely, identifiers require diagnostics when lexed.
469*c87b03e5Sespie      XXX Has to be forced out of the fast path.  */
470*c87b03e5Sespie   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
471*c87b03e5Sespie 			&& !pfile->state.skipping, 0))
472*c87b03e5Sespie     {
473*c87b03e5Sespie       /* It is allowed to poison the same identifier twice.  */
474*c87b03e5Sespie       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
475*c87b03e5Sespie 	cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
476*c87b03e5Sespie 		   NODE_NAME (result));
477*c87b03e5Sespie 
478*c87b03e5Sespie       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479*c87b03e5Sespie 	 replacement list of a variadic macro.  */
480*c87b03e5Sespie       if (result == pfile->spec_nodes.n__VA_ARGS__
481*c87b03e5Sespie 	  && !pfile->state.va_args_ok)
482*c87b03e5Sespie 	cpp_error (pfile, DL_PEDWARN,
483*c87b03e5Sespie 	"__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
484*c87b03e5Sespie     }
485*c87b03e5Sespie 
486*c87b03e5Sespie   return result;
487*c87b03e5Sespie }
488*c87b03e5Sespie 
489*c87b03e5Sespie /* Slow path.  This handles numbers and identifiers which have been
490*c87b03e5Sespie    split, or contain dollar signs.  The part of the token from
491*c87b03e5Sespie    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
492*c87b03e5Sespie    1 if it's a number, and 2 if it has a leading period.  Returns a
493*c87b03e5Sespie    pointer to the token's NUL-terminated spelling in permanent
494*c87b03e5Sespie    storage, and sets PLEN to its length.  */
495*c87b03e5Sespie static uchar *
parse_slow(pfile,cur,number_p,plen)496*c87b03e5Sespie parse_slow (pfile, cur, number_p, plen)
497*c87b03e5Sespie      cpp_reader *pfile;
498*c87b03e5Sespie      const uchar *cur;
499*c87b03e5Sespie      int number_p;
500*c87b03e5Sespie      unsigned int *plen;
501*c87b03e5Sespie {
502*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
503*c87b03e5Sespie   const uchar *base = buffer->cur - 1;
504*c87b03e5Sespie   struct obstack *stack = &pfile->hash_table->stack;
505*c87b03e5Sespie   unsigned int c, prevc, saw_dollar = 0;
506*c87b03e5Sespie 
507*c87b03e5Sespie   /* Place any leading period.  */
508*c87b03e5Sespie   if (number_p == 2)
509*c87b03e5Sespie     obstack_1grow (stack, '.');
510*c87b03e5Sespie 
511*c87b03e5Sespie   /* Copy the part of the token which is known to be okay.  */
512*c87b03e5Sespie   obstack_grow (stack, base, cur - base);
513*c87b03e5Sespie 
514*c87b03e5Sespie   /* Now process the part which isn't.  We are looking at one of
515*c87b03e5Sespie      '$', '\\', or '?' on entry to this loop.  */
516*c87b03e5Sespie   prevc = cur[-1];
517*c87b03e5Sespie   c = *cur++;
518*c87b03e5Sespie   buffer->cur = cur;
519*c87b03e5Sespie   for (;;)
520*c87b03e5Sespie     {
521*c87b03e5Sespie       /* Potential escaped newline?  */
522*c87b03e5Sespie       buffer->backup_to = buffer->cur - 1;
523*c87b03e5Sespie       if (c == '?' || c == '\\')
524*c87b03e5Sespie 	c = skip_escaped_newlines (pfile);
525*c87b03e5Sespie 
526*c87b03e5Sespie       if (!is_idchar (c))
527*c87b03e5Sespie 	{
528*c87b03e5Sespie 	  if (!number_p)
529*c87b03e5Sespie 	    break;
530*c87b03e5Sespie 	  if (c != '.' && !VALID_SIGN (c, prevc))
531*c87b03e5Sespie 	    break;
532*c87b03e5Sespie 	}
533*c87b03e5Sespie 
534*c87b03e5Sespie       /* Handle normal identifier characters in this loop.  */
535*c87b03e5Sespie       do
536*c87b03e5Sespie 	{
537*c87b03e5Sespie 	  prevc = c;
538*c87b03e5Sespie 	  obstack_1grow (stack, c);
539*c87b03e5Sespie 
540*c87b03e5Sespie 	  if (c == '$')
541*c87b03e5Sespie 	    saw_dollar++;
542*c87b03e5Sespie 
543*c87b03e5Sespie 	  c = *buffer->cur++;
544*c87b03e5Sespie 	}
545*c87b03e5Sespie       while (is_idchar (c));
546*c87b03e5Sespie     }
547*c87b03e5Sespie 
548*c87b03e5Sespie   /* Step back over the unwanted char.  */
549*c87b03e5Sespie   BACKUP ();
550*c87b03e5Sespie 
551*c87b03e5Sespie   /* $ is not an identifier character in the standard, but is commonly
552*c87b03e5Sespie      accepted as an extension.  Don't warn about it in skipped
553*c87b03e5Sespie      conditional blocks.  */
554*c87b03e5Sespie   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
555*c87b03e5Sespie     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
556*c87b03e5Sespie 
557*c87b03e5Sespie   /* Identifiers and numbers are null-terminated.  */
558*c87b03e5Sespie   *plen = obstack_object_size (stack);
559*c87b03e5Sespie   obstack_1grow (stack, '\0');
560*c87b03e5Sespie   return obstack_finish (stack);
561*c87b03e5Sespie }
562*c87b03e5Sespie 
563*c87b03e5Sespie /* Parse a number, beginning with character C, skipping embedded
564*c87b03e5Sespie    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
565*c87b03e5Sespie    before C.  Place the result in NUMBER.  */
566*c87b03e5Sespie static void
parse_number(pfile,number,leading_period)567*c87b03e5Sespie parse_number (pfile, number, leading_period)
568*c87b03e5Sespie      cpp_reader *pfile;
569*c87b03e5Sespie      cpp_string *number;
570*c87b03e5Sespie      int leading_period;
571*c87b03e5Sespie {
572*c87b03e5Sespie   const uchar *cur;
573*c87b03e5Sespie 
574*c87b03e5Sespie   /* Fast-path loop.  Skim over a normal number.
575*c87b03e5Sespie      N.B. ISIDNUM does not include $.  */
576*c87b03e5Sespie   cur = pfile->buffer->cur;
577*c87b03e5Sespie   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
578*c87b03e5Sespie     cur++;
579*c87b03e5Sespie 
580*c87b03e5Sespie   /* Check for slow-path cases.  */
581*c87b03e5Sespie   if (*cur == '?' || *cur == '\\' || *cur == '$')
582*c87b03e5Sespie     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
583*c87b03e5Sespie   else
584*c87b03e5Sespie     {
585*c87b03e5Sespie       const uchar *base = pfile->buffer->cur - 1;
586*c87b03e5Sespie       uchar *dest;
587*c87b03e5Sespie 
588*c87b03e5Sespie       number->len = cur - base + leading_period;
589*c87b03e5Sespie       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
590*c87b03e5Sespie       dest[number->len] = '\0';
591*c87b03e5Sespie       number->text = dest;
592*c87b03e5Sespie 
593*c87b03e5Sespie       if (leading_period)
594*c87b03e5Sespie 	*dest++ = '.';
595*c87b03e5Sespie       memcpy (dest, base, cur - base);
596*c87b03e5Sespie       pfile->buffer->cur = cur;
597*c87b03e5Sespie     }
598*c87b03e5Sespie }
599*c87b03e5Sespie 
600*c87b03e5Sespie /* Subroutine of parse_string.  */
601*c87b03e5Sespie static int
unescaped_terminator_p(pfile,dest)602*c87b03e5Sespie unescaped_terminator_p (pfile, dest)
603*c87b03e5Sespie      cpp_reader *pfile;
604*c87b03e5Sespie      const unsigned char *dest;
605*c87b03e5Sespie {
606*c87b03e5Sespie   const unsigned char *start, *temp;
607*c87b03e5Sespie 
608*c87b03e5Sespie   /* In #include-style directives, terminators are not escapeable.  */
609*c87b03e5Sespie   if (pfile->state.angled_headers)
610*c87b03e5Sespie     return 1;
611*c87b03e5Sespie 
612*c87b03e5Sespie   start = BUFF_FRONT (pfile->u_buff);
613*c87b03e5Sespie 
614*c87b03e5Sespie   /* An odd number of consecutive backslashes represents an escaped
615*c87b03e5Sespie      terminator.  */
616*c87b03e5Sespie   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
617*c87b03e5Sespie     ;
618*c87b03e5Sespie 
619*c87b03e5Sespie   return ((dest - temp) & 1) == 0;
620*c87b03e5Sespie }
621*c87b03e5Sespie 
622*c87b03e5Sespie /* Parses a string, character constant, or angle-bracketed header file
623*c87b03e5Sespie    name.  Handles embedded trigraphs and escaped newlines.  The stored
624*c87b03e5Sespie    string is guaranteed NUL-terminated, but it is not guaranteed that
625*c87b03e5Sespie    this is the first NUL since embedded NULs are preserved.
626*c87b03e5Sespie 
627*c87b03e5Sespie    When this function returns, buffer->cur points to the next
628*c87b03e5Sespie    character to be processed.  */
629*c87b03e5Sespie static void
parse_string(pfile,token,terminator)630*c87b03e5Sespie parse_string (pfile, token, terminator)
631*c87b03e5Sespie      cpp_reader *pfile;
632*c87b03e5Sespie      cpp_token *token;
633*c87b03e5Sespie      cppchar_t terminator;
634*c87b03e5Sespie {
635*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
636*c87b03e5Sespie   unsigned char *dest, *limit;
637*c87b03e5Sespie   cppchar_t c;
638*c87b03e5Sespie   bool warned_nulls = false;
639*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
640*c87b03e5Sespie   wchar_t wc;
641*c87b03e5Sespie   int char_len;
642*c87b03e5Sespie #endif
643*c87b03e5Sespie 
644*c87b03e5Sespie   dest = BUFF_FRONT (pfile->u_buff);
645*c87b03e5Sespie   limit = BUFF_LIMIT (pfile->u_buff);
646*c87b03e5Sespie 
647*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
648*c87b03e5Sespie   /* Reset multibyte conversion state.  */
649*c87b03e5Sespie   (void) local_mbtowc (NULL, NULL, 0);
650*c87b03e5Sespie #endif
651*c87b03e5Sespie   for (;;)
652*c87b03e5Sespie     {
653*c87b03e5Sespie       /* We need room for another char, possibly the terminating NUL.  */
654*c87b03e5Sespie       if ((size_t) (limit - dest) < 1)
655*c87b03e5Sespie 	{
656*c87b03e5Sespie 	  size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
657*c87b03e5Sespie 	  _cpp_extend_buff (pfile, &pfile->u_buff, 2);
658*c87b03e5Sespie 	  dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
659*c87b03e5Sespie 	  limit = BUFF_LIMIT (pfile->u_buff);
660*c87b03e5Sespie 	}
661*c87b03e5Sespie 
662*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
663*c87b03e5Sespie       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
664*c87b03e5Sespie 			       buffer->rlimit - buffer->cur);
665*c87b03e5Sespie       if (char_len == -1)
666*c87b03e5Sespie 	{
667*c87b03e5Sespie 	  cpp_error (pfile, DL_WARNING,
668*c87b03e5Sespie 		     "ignoring invalid multibyte character");
669*c87b03e5Sespie 	  char_len = 1;
670*c87b03e5Sespie 	  c = *buffer->cur++;
671*c87b03e5Sespie 	}
672*c87b03e5Sespie       else
673*c87b03e5Sespie 	{
674*c87b03e5Sespie 	  buffer->cur += char_len;
675*c87b03e5Sespie 	  c = wc;
676*c87b03e5Sespie 	}
677*c87b03e5Sespie #else
678*c87b03e5Sespie       c = *buffer->cur++;
679*c87b03e5Sespie #endif
680*c87b03e5Sespie 
681*c87b03e5Sespie       /* Handle trigraphs, escaped newlines etc.  */
682*c87b03e5Sespie       if (c == '?' || c == '\\')
683*c87b03e5Sespie 	c = skip_escaped_newlines (pfile);
684*c87b03e5Sespie 
685*c87b03e5Sespie       if (c == terminator)
686*c87b03e5Sespie 	{
687*c87b03e5Sespie 	  if (unescaped_terminator_p (pfile, dest))
688*c87b03e5Sespie 	    break;
689*c87b03e5Sespie 	}
690*c87b03e5Sespie       else if (is_vspace (c))
691*c87b03e5Sespie 	{
692*c87b03e5Sespie 	  /* No string literal may extend over multiple lines.  In
693*c87b03e5Sespie 	     assembly language, suppress the error except for <>
694*c87b03e5Sespie 	     includes.  This is a kludge around not knowing where
695*c87b03e5Sespie 	     comments are.  */
696*c87b03e5Sespie 	unterminated:
697*c87b03e5Sespie 	  if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
698*c87b03e5Sespie 	    cpp_error (pfile, DL_ERROR, "missing terminating %c character",
699*c87b03e5Sespie 		       (int) terminator);
700*c87b03e5Sespie 	  buffer->cur--;
701*c87b03e5Sespie 	  break;
702*c87b03e5Sespie 	}
703*c87b03e5Sespie       else if (c == '\0')
704*c87b03e5Sespie 	{
705*c87b03e5Sespie 	  if (buffer->cur - 1 == buffer->rlimit)
706*c87b03e5Sespie 	    goto unterminated;
707*c87b03e5Sespie 	  if (!warned_nulls)
708*c87b03e5Sespie 	    {
709*c87b03e5Sespie 	      warned_nulls = true;
710*c87b03e5Sespie 	      cpp_error (pfile, DL_WARNING,
711*c87b03e5Sespie 			 "null character(s) preserved in literal");
712*c87b03e5Sespie 	    }
713*c87b03e5Sespie 	}
714*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
715*c87b03e5Sespie       if (char_len > 1)
716*c87b03e5Sespie 	{
717*c87b03e5Sespie 	  for ( ; char_len > 0; --char_len)
718*c87b03e5Sespie 	    *dest++ = (*buffer->cur - char_len);
719*c87b03e5Sespie 	}
720*c87b03e5Sespie       else
721*c87b03e5Sespie #endif
722*c87b03e5Sespie 	*dest++ = c;
723*c87b03e5Sespie     }
724*c87b03e5Sespie 
725*c87b03e5Sespie   *dest = '\0';
726*c87b03e5Sespie 
727*c87b03e5Sespie   token->val.str.text = BUFF_FRONT (pfile->u_buff);
728*c87b03e5Sespie   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
729*c87b03e5Sespie   BUFF_FRONT (pfile->u_buff) = dest + 1;
730*c87b03e5Sespie }
731*c87b03e5Sespie 
732*c87b03e5Sespie /* The stored comment includes the comment start and any terminator.  */
733*c87b03e5Sespie static void
save_comment(pfile,token,from,type)734*c87b03e5Sespie save_comment (pfile, token, from, type)
735*c87b03e5Sespie      cpp_reader *pfile;
736*c87b03e5Sespie      cpp_token *token;
737*c87b03e5Sespie      const unsigned char *from;
738*c87b03e5Sespie      cppchar_t type;
739*c87b03e5Sespie {
740*c87b03e5Sespie   unsigned char *buffer;
741*c87b03e5Sespie   unsigned int len, clen;
742*c87b03e5Sespie 
743*c87b03e5Sespie   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
744*c87b03e5Sespie 
745*c87b03e5Sespie   /* C++ comments probably (not definitely) have moved past a new
746*c87b03e5Sespie      line, which we don't want to save in the comment.  */
747*c87b03e5Sespie   if (is_vspace (pfile->buffer->cur[-1]))
748*c87b03e5Sespie     len--;
749*c87b03e5Sespie 
750*c87b03e5Sespie   /* If we are currently in a directive, then we need to store all
751*c87b03e5Sespie      C++ comments as C comments internally, and so we need to
752*c87b03e5Sespie      allocate a little extra space in that case.
753*c87b03e5Sespie 
754*c87b03e5Sespie      Note that the only time we encounter a directive here is
755*c87b03e5Sespie      when we are saving comments in a "#define".  */
756*c87b03e5Sespie   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
757*c87b03e5Sespie 
758*c87b03e5Sespie   buffer = _cpp_unaligned_alloc (pfile, clen);
759*c87b03e5Sespie 
760*c87b03e5Sespie   token->type = CPP_COMMENT;
761*c87b03e5Sespie   token->val.str.len = clen;
762*c87b03e5Sespie   token->val.str.text = buffer;
763*c87b03e5Sespie 
764*c87b03e5Sespie   buffer[0] = '/';
765*c87b03e5Sespie   memcpy (buffer + 1, from, len - 1);
766*c87b03e5Sespie 
767*c87b03e5Sespie   /* Finish conversion to a C comment, if necessary.  */
768*c87b03e5Sespie   if (pfile->state.in_directive && type == '/')
769*c87b03e5Sespie     {
770*c87b03e5Sespie       buffer[1] = '*';
771*c87b03e5Sespie       buffer[clen - 2] = '*';
772*c87b03e5Sespie       buffer[clen - 1] = '/';
773*c87b03e5Sespie     }
774*c87b03e5Sespie }
775*c87b03e5Sespie 
776*c87b03e5Sespie /* Allocate COUNT tokens for RUN.  */
777*c87b03e5Sespie void
_cpp_init_tokenrun(run,count)778*c87b03e5Sespie _cpp_init_tokenrun (run, count)
779*c87b03e5Sespie      tokenrun *run;
780*c87b03e5Sespie      unsigned int count;
781*c87b03e5Sespie {
782*c87b03e5Sespie   run->base = xnewvec (cpp_token, count);
783*c87b03e5Sespie   run->limit = run->base + count;
784*c87b03e5Sespie   run->next = NULL;
785*c87b03e5Sespie }
786*c87b03e5Sespie 
787*c87b03e5Sespie /* Returns the next tokenrun, or creates one if there is none.  */
788*c87b03e5Sespie static tokenrun *
next_tokenrun(run)789*c87b03e5Sespie next_tokenrun (run)
790*c87b03e5Sespie      tokenrun *run;
791*c87b03e5Sespie {
792*c87b03e5Sespie   if (run->next == NULL)
793*c87b03e5Sespie     {
794*c87b03e5Sespie       run->next = xnew (tokenrun);
795*c87b03e5Sespie       run->next->prev = run;
796*c87b03e5Sespie       _cpp_init_tokenrun (run->next, 250);
797*c87b03e5Sespie     }
798*c87b03e5Sespie 
799*c87b03e5Sespie   return run->next;
800*c87b03e5Sespie }
801*c87b03e5Sespie 
802*c87b03e5Sespie /* Allocate a single token that is invalidated at the same time as the
803*c87b03e5Sespie    rest of the tokens on the line.  Has its line and col set to the
804*c87b03e5Sespie    same as the last lexed token, so that diagnostics appear in the
805*c87b03e5Sespie    right place.  */
806*c87b03e5Sespie cpp_token *
_cpp_temp_token(pfile)807*c87b03e5Sespie _cpp_temp_token (pfile)
808*c87b03e5Sespie      cpp_reader *pfile;
809*c87b03e5Sespie {
810*c87b03e5Sespie   cpp_token *old, *result;
811*c87b03e5Sespie 
812*c87b03e5Sespie   old = pfile->cur_token - 1;
813*c87b03e5Sespie   if (pfile->cur_token == pfile->cur_run->limit)
814*c87b03e5Sespie     {
815*c87b03e5Sespie       pfile->cur_run = next_tokenrun (pfile->cur_run);
816*c87b03e5Sespie       pfile->cur_token = pfile->cur_run->base;
817*c87b03e5Sespie     }
818*c87b03e5Sespie 
819*c87b03e5Sespie   result = pfile->cur_token++;
820*c87b03e5Sespie   result->line = old->line;
821*c87b03e5Sespie   result->col = old->col;
822*c87b03e5Sespie   return result;
823*c87b03e5Sespie }
824*c87b03e5Sespie 
825*c87b03e5Sespie /* Lex a token into RESULT (external interface).  Takes care of issues
826*c87b03e5Sespie    like directive handling, token lookahead, multiple include
827*c87b03e5Sespie    optimization and skipping.  */
828*c87b03e5Sespie const cpp_token *
_cpp_lex_token(pfile)829*c87b03e5Sespie _cpp_lex_token (pfile)
830*c87b03e5Sespie      cpp_reader *pfile;
831*c87b03e5Sespie {
832*c87b03e5Sespie   cpp_token *result;
833*c87b03e5Sespie 
834*c87b03e5Sespie   for (;;)
835*c87b03e5Sespie     {
836*c87b03e5Sespie       if (pfile->cur_token == pfile->cur_run->limit)
837*c87b03e5Sespie 	{
838*c87b03e5Sespie 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
839*c87b03e5Sespie 	  pfile->cur_token = pfile->cur_run->base;
840*c87b03e5Sespie 	}
841*c87b03e5Sespie 
842*c87b03e5Sespie       if (pfile->lookaheads)
843*c87b03e5Sespie 	{
844*c87b03e5Sespie 	  pfile->lookaheads--;
845*c87b03e5Sespie 	  result = pfile->cur_token++;
846*c87b03e5Sespie 	}
847*c87b03e5Sespie       else
848*c87b03e5Sespie 	result = _cpp_lex_direct (pfile);
849*c87b03e5Sespie 
850*c87b03e5Sespie       if (result->flags & BOL)
851*c87b03e5Sespie 	{
852*c87b03e5Sespie 	  /* Is this a directive.  If _cpp_handle_directive returns
853*c87b03e5Sespie 	     false, it is an assembler #.  */
854*c87b03e5Sespie 	  if (result->type == CPP_HASH
855*c87b03e5Sespie 	      /* 6.10.3 p 11: Directives in a list of macro arguments
856*c87b03e5Sespie 		 gives undefined behavior.  This implementation
857*c87b03e5Sespie 		 handles the directive as normal.  */
858*c87b03e5Sespie 	      && pfile->state.parsing_args != 1
859*c87b03e5Sespie 	      && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
860*c87b03e5Sespie 	    continue;
861*c87b03e5Sespie 	  if (pfile->cb.line_change && !pfile->state.skipping)
862*c87b03e5Sespie 	    (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
863*c87b03e5Sespie 	}
864*c87b03e5Sespie 
865*c87b03e5Sespie       /* We don't skip tokens in directives.  */
866*c87b03e5Sespie       if (pfile->state.in_directive)
867*c87b03e5Sespie 	break;
868*c87b03e5Sespie 
869*c87b03e5Sespie       /* Outside a directive, invalidate controlling macros.  At file
870*c87b03e5Sespie 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
871*c87b03e5Sespie 	 get here and MI optimisation works.  */
872*c87b03e5Sespie       pfile->mi_valid = false;
873*c87b03e5Sespie 
874*c87b03e5Sespie       if (!pfile->state.skipping || result->type == CPP_EOF)
875*c87b03e5Sespie 	break;
876*c87b03e5Sespie     }
877*c87b03e5Sespie 
878*c87b03e5Sespie   return result;
879*c87b03e5Sespie }
880*c87b03e5Sespie 
881*c87b03e5Sespie /* A NUL terminates the current buffer.  For ISO preprocessing this is
882*c87b03e5Sespie    EOF, but for traditional preprocessing it indicates we need a line
883*c87b03e5Sespie    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
884*c87b03e5Sespie    to return a CPP_EOF to the caller.  */
885*c87b03e5Sespie static bool
continue_after_nul(pfile)886*c87b03e5Sespie continue_after_nul (pfile)
887*c87b03e5Sespie      cpp_reader *pfile;
888*c87b03e5Sespie {
889*c87b03e5Sespie   cpp_buffer *buffer = pfile->buffer;
890*c87b03e5Sespie   bool more = false;
891*c87b03e5Sespie 
892*c87b03e5Sespie   buffer->saved_flags = BOL;
893*c87b03e5Sespie   if (CPP_OPTION (pfile, traditional))
894*c87b03e5Sespie     {
895*c87b03e5Sespie       if (pfile->state.in_directive)
896*c87b03e5Sespie 	return false;
897*c87b03e5Sespie 
898*c87b03e5Sespie       _cpp_remove_overlay (pfile);
899*c87b03e5Sespie       more = _cpp_read_logical_line_trad (pfile);
900*c87b03e5Sespie       _cpp_overlay_buffer (pfile, pfile->out.base,
901*c87b03e5Sespie 			   pfile->out.cur - pfile->out.base);
902*c87b03e5Sespie       pfile->line = pfile->out.first_line;
903*c87b03e5Sespie     }
904*c87b03e5Sespie   else
905*c87b03e5Sespie     {
906*c87b03e5Sespie       /* Stop parsing arguments with a CPP_EOF.  When we finally come
907*c87b03e5Sespie 	 back here, do the work of popping the buffer.  */
908*c87b03e5Sespie       if (!pfile->state.parsing_args)
909*c87b03e5Sespie 	{
910*c87b03e5Sespie 	  if (buffer->cur != buffer->line_base)
911*c87b03e5Sespie 	    {
912*c87b03e5Sespie 	      /* Non-empty files should end in a newline.  Don't warn
913*c87b03e5Sespie 		 for command line and _Pragma buffers.  */
914*c87b03e5Sespie 	      handle_newline (pfile);
915*c87b03e5Sespie 	    }
916*c87b03e5Sespie 
917*c87b03e5Sespie 	  /* Similarly, finish an in-progress directive with CPP_EOF
918*c87b03e5Sespie 	     before popping the buffer.  */
919*c87b03e5Sespie 	  if (!pfile->state.in_directive && buffer->prev)
920*c87b03e5Sespie 	    {
921*c87b03e5Sespie 	      more = !buffer->return_at_eof;
922*c87b03e5Sespie 	      _cpp_pop_buffer (pfile);
923*c87b03e5Sespie 	    }
924*c87b03e5Sespie 	}
925*c87b03e5Sespie     }
926*c87b03e5Sespie 
927*c87b03e5Sespie   return more;
928*c87b03e5Sespie }
929*c87b03e5Sespie 
930*c87b03e5Sespie #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)	\
931*c87b03e5Sespie   do {						\
932*c87b03e5Sespie     if (get_effective_char (pfile) == CHAR)	\
933*c87b03e5Sespie       result->type = THEN_TYPE;			\
934*c87b03e5Sespie     else					\
935*c87b03e5Sespie       {						\
936*c87b03e5Sespie         BACKUP ();				\
937*c87b03e5Sespie         result->type = ELSE_TYPE;		\
938*c87b03e5Sespie       }						\
939*c87b03e5Sespie   } while (0)
940*c87b03e5Sespie 
941*c87b03e5Sespie /* Lex a token into pfile->cur_token, which is also incremented, to
942*c87b03e5Sespie    get diagnostics pointing to the correct location.
943*c87b03e5Sespie 
944*c87b03e5Sespie    Does not handle issues such as token lookahead, multiple-include
945*c87b03e5Sespie    optimisation, directives, skipping etc.  This function is only
946*c87b03e5Sespie    suitable for use by _cpp_lex_token, and in special cases like
947*c87b03e5Sespie    lex_expansion_token which doesn't care for any of these issues.
948*c87b03e5Sespie 
949*c87b03e5Sespie    When meeting a newline, returns CPP_EOF if parsing a directive,
950*c87b03e5Sespie    otherwise returns to the start of the token buffer if permissible.
951*c87b03e5Sespie    Returns the location of the lexed token.  */
952*c87b03e5Sespie cpp_token *
_cpp_lex_direct(pfile)953*c87b03e5Sespie _cpp_lex_direct (pfile)
954*c87b03e5Sespie      cpp_reader *pfile;
955*c87b03e5Sespie {
956*c87b03e5Sespie   cppchar_t c;
957*c87b03e5Sespie   cpp_buffer *buffer;
958*c87b03e5Sespie   const unsigned char *comment_start;
959*c87b03e5Sespie   cpp_token *result = pfile->cur_token++;
960*c87b03e5Sespie 
961*c87b03e5Sespie  fresh_line:
962*c87b03e5Sespie   buffer = pfile->buffer;
963*c87b03e5Sespie   result->flags = buffer->saved_flags;
964*c87b03e5Sespie   buffer->saved_flags = 0;
965*c87b03e5Sespie  update_tokens_line:
966*c87b03e5Sespie   result->line = pfile->line;
967*c87b03e5Sespie 
968*c87b03e5Sespie  skipped_white:
969*c87b03e5Sespie   c = *buffer->cur++;
970*c87b03e5Sespie   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
971*c87b03e5Sespie 
972*c87b03e5Sespie  trigraph:
973*c87b03e5Sespie   switch (c)
974*c87b03e5Sespie     {
975*c87b03e5Sespie     case ' ': case '\t': case '\f': case '\v': case '\0':
976*c87b03e5Sespie       result->flags |= PREV_WHITE;
977*c87b03e5Sespie       if (skip_whitespace (pfile, c))
978*c87b03e5Sespie 	goto skipped_white;
979*c87b03e5Sespie 
980*c87b03e5Sespie       /* End of buffer.  */
981*c87b03e5Sespie       buffer->cur--;
982*c87b03e5Sespie       if (continue_after_nul (pfile))
983*c87b03e5Sespie 	goto fresh_line;
984*c87b03e5Sespie       result->type = CPP_EOF;
985*c87b03e5Sespie       break;
986*c87b03e5Sespie 
987*c87b03e5Sespie     case '\n': case '\r':
988*c87b03e5Sespie       handle_newline (pfile);
989*c87b03e5Sespie       buffer->saved_flags = BOL;
990*c87b03e5Sespie       if (! pfile->state.in_directive)
991*c87b03e5Sespie 	{
992*c87b03e5Sespie 	  if (pfile->state.parsing_args == 2)
993*c87b03e5Sespie 	    buffer->saved_flags |= PREV_WHITE;
994*c87b03e5Sespie 	  if (!pfile->keep_tokens)
995*c87b03e5Sespie 	    {
996*c87b03e5Sespie 	      pfile->cur_run = &pfile->base_run;
997*c87b03e5Sespie 	      result = pfile->base_run.base;
998*c87b03e5Sespie 	      pfile->cur_token = result + 1;
999*c87b03e5Sespie 	    }
1000*c87b03e5Sespie 	  goto fresh_line;
1001*c87b03e5Sespie 	}
1002*c87b03e5Sespie       result->type = CPP_EOF;
1003*c87b03e5Sespie       break;
1004*c87b03e5Sespie 
1005*c87b03e5Sespie     case '?':
1006*c87b03e5Sespie     case '\\':
1007*c87b03e5Sespie       /* These could start an escaped newline, or '?' a trigraph.  Let
1008*c87b03e5Sespie 	 skip_escaped_newlines do all the work.  */
1009*c87b03e5Sespie       {
1010*c87b03e5Sespie 	unsigned int line = pfile->line;
1011*c87b03e5Sespie 
1012*c87b03e5Sespie 	c = skip_escaped_newlines (pfile);
1013*c87b03e5Sespie 	if (line != pfile->line)
1014*c87b03e5Sespie 	  {
1015*c87b03e5Sespie 	    buffer->cur--;
1016*c87b03e5Sespie 	    /* We had at least one escaped newline of some sort.
1017*c87b03e5Sespie 	       Update the token's line and column.  */
1018*c87b03e5Sespie 	    goto update_tokens_line;
1019*c87b03e5Sespie 	  }
1020*c87b03e5Sespie       }
1021*c87b03e5Sespie 
1022*c87b03e5Sespie       /* We are either the original '?' or '\\', or a trigraph.  */
1023*c87b03e5Sespie       if (c == '?')
1024*c87b03e5Sespie 	result->type = CPP_QUERY;
1025*c87b03e5Sespie       else if (c == '\\')
1026*c87b03e5Sespie 	goto random_char;
1027*c87b03e5Sespie       else
1028*c87b03e5Sespie 	goto trigraph;
1029*c87b03e5Sespie       break;
1030*c87b03e5Sespie 
1031*c87b03e5Sespie     case '0': case '1': case '2': case '3': case '4':
1032*c87b03e5Sespie     case '5': case '6': case '7': case '8': case '9':
1033*c87b03e5Sespie       result->type = CPP_NUMBER;
1034*c87b03e5Sespie       parse_number (pfile, &result->val.str, 0);
1035*c87b03e5Sespie       break;
1036*c87b03e5Sespie 
1037*c87b03e5Sespie     case 'L':
1038*c87b03e5Sespie       /* 'L' may introduce wide characters or strings.  */
1039*c87b03e5Sespie       {
1040*c87b03e5Sespie 	const unsigned char *pos = buffer->cur;
1041*c87b03e5Sespie 
1042*c87b03e5Sespie 	c = get_effective_char (pfile);
1043*c87b03e5Sespie 	if (c == '\'' || c == '"')
1044*c87b03e5Sespie 	  {
1045*c87b03e5Sespie 	    result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1046*c87b03e5Sespie 	    parse_string (pfile, result, c);
1047*c87b03e5Sespie 	    break;
1048*c87b03e5Sespie 	  }
1049*c87b03e5Sespie 	buffer->cur = pos;
1050*c87b03e5Sespie       }
1051*c87b03e5Sespie       /* Fall through.  */
1052*c87b03e5Sespie 
1053*c87b03e5Sespie     start_ident:
1054*c87b03e5Sespie     case '_':
1055*c87b03e5Sespie     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1056*c87b03e5Sespie     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1057*c87b03e5Sespie     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1058*c87b03e5Sespie     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1059*c87b03e5Sespie     case 'y': case 'z':
1060*c87b03e5Sespie     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1061*c87b03e5Sespie     case 'G': case 'H': case 'I': case 'J': case 'K':
1062*c87b03e5Sespie     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1063*c87b03e5Sespie     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1064*c87b03e5Sespie     case 'Y': case 'Z':
1065*c87b03e5Sespie       result->type = CPP_NAME;
1066*c87b03e5Sespie       result->val.node = parse_identifier (pfile);
1067*c87b03e5Sespie 
1068*c87b03e5Sespie       /* Convert named operators to their proper types.  */
1069*c87b03e5Sespie       if (result->val.node->flags & NODE_OPERATOR)
1070*c87b03e5Sespie 	{
1071*c87b03e5Sespie 	  result->flags |= NAMED_OP;
1072*c87b03e5Sespie 	  result->type = result->val.node->value.operator;
1073*c87b03e5Sespie 	}
1074*c87b03e5Sespie       break;
1075*c87b03e5Sespie 
1076*c87b03e5Sespie     case '\'':
1077*c87b03e5Sespie     case '"':
1078*c87b03e5Sespie       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1079*c87b03e5Sespie       parse_string (pfile, result, c);
1080*c87b03e5Sespie       break;
1081*c87b03e5Sespie 
1082*c87b03e5Sespie     case '/':
1083*c87b03e5Sespie       /* A potential block or line comment.  */
1084*c87b03e5Sespie       comment_start = buffer->cur;
1085*c87b03e5Sespie       c = get_effective_char (pfile);
1086*c87b03e5Sespie 
1087*c87b03e5Sespie       if (c == '*')
1088*c87b03e5Sespie 	{
1089*c87b03e5Sespie 	  if (skip_block_comment (pfile))
1090*c87b03e5Sespie 	    cpp_error (pfile, DL_ERROR, "unterminated comment");
1091*c87b03e5Sespie 	}
1092*c87b03e5Sespie       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1093*c87b03e5Sespie 			    || CPP_IN_SYSTEM_HEADER (pfile)))
1094*c87b03e5Sespie 	{
1095*c87b03e5Sespie 	  /* Warn about comments only if pedantically GNUC89, and not
1096*c87b03e5Sespie 	     in system headers.  */
1097*c87b03e5Sespie 	  if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1098*c87b03e5Sespie 	      && ! buffer->warned_cplusplus_comments)
1099*c87b03e5Sespie 	    {
1100*c87b03e5Sespie 	      cpp_error (pfile, DL_PEDWARN,
1101*c87b03e5Sespie 			 "C++ style comments are not allowed in ISO C90");
1102*c87b03e5Sespie 	      cpp_error (pfile, DL_PEDWARN,
1103*c87b03e5Sespie 			 "(this will be reported only once per input file)");
1104*c87b03e5Sespie 	      buffer->warned_cplusplus_comments = 1;
1105*c87b03e5Sespie 	    }
1106*c87b03e5Sespie 
1107*c87b03e5Sespie 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1108*c87b03e5Sespie 	    cpp_error (pfile, DL_WARNING, "multi-line comment");
1109*c87b03e5Sespie 	}
1110*c87b03e5Sespie       else if (c == '=')
1111*c87b03e5Sespie 	{
1112*c87b03e5Sespie 	  result->type = CPP_DIV_EQ;
1113*c87b03e5Sespie 	  break;
1114*c87b03e5Sespie 	}
1115*c87b03e5Sespie       else
1116*c87b03e5Sespie 	{
1117*c87b03e5Sespie 	  BACKUP ();
1118*c87b03e5Sespie 	  result->type = CPP_DIV;
1119*c87b03e5Sespie 	  break;
1120*c87b03e5Sespie 	}
1121*c87b03e5Sespie 
1122*c87b03e5Sespie       if (!pfile->state.save_comments)
1123*c87b03e5Sespie 	{
1124*c87b03e5Sespie 	  result->flags |= PREV_WHITE;
1125*c87b03e5Sespie 	  goto update_tokens_line;
1126*c87b03e5Sespie 	}
1127*c87b03e5Sespie 
1128*c87b03e5Sespie       /* Save the comment as a token in its own right.  */
1129*c87b03e5Sespie       save_comment (pfile, result, comment_start, c);
1130*c87b03e5Sespie       break;
1131*c87b03e5Sespie 
1132*c87b03e5Sespie     case '<':
1133*c87b03e5Sespie       if (pfile->state.angled_headers)
1134*c87b03e5Sespie 	{
1135*c87b03e5Sespie 	  result->type = CPP_HEADER_NAME;
1136*c87b03e5Sespie 	  parse_string (pfile, result, '>');
1137*c87b03e5Sespie 	  break;
1138*c87b03e5Sespie 	}
1139*c87b03e5Sespie 
1140*c87b03e5Sespie       c = get_effective_char (pfile);
1141*c87b03e5Sespie       if (c == '=')
1142*c87b03e5Sespie 	result->type = CPP_LESS_EQ;
1143*c87b03e5Sespie       else if (c == '<')
1144*c87b03e5Sespie 	IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1145*c87b03e5Sespie       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1146*c87b03e5Sespie 	IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1147*c87b03e5Sespie       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1148*c87b03e5Sespie 	{
1149*c87b03e5Sespie 	  result->type = CPP_OPEN_SQUARE;
1150*c87b03e5Sespie 	  result->flags |= DIGRAPH;
1151*c87b03e5Sespie 	}
1152*c87b03e5Sespie       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1153*c87b03e5Sespie 	{
1154*c87b03e5Sespie 	  result->type = CPP_OPEN_BRACE;
1155*c87b03e5Sespie 	  result->flags |= DIGRAPH;
1156*c87b03e5Sespie 	}
1157*c87b03e5Sespie       else
1158*c87b03e5Sespie 	{
1159*c87b03e5Sespie 	  BACKUP ();
1160*c87b03e5Sespie 	  result->type = CPP_LESS;
1161*c87b03e5Sespie 	}
1162*c87b03e5Sespie       break;
1163*c87b03e5Sespie 
1164*c87b03e5Sespie     case '>':
1165*c87b03e5Sespie       c = get_effective_char (pfile);
1166*c87b03e5Sespie       if (c == '=')
1167*c87b03e5Sespie 	result->type = CPP_GREATER_EQ;
1168*c87b03e5Sespie       else if (c == '>')
1169*c87b03e5Sespie 	IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1170*c87b03e5Sespie       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1171*c87b03e5Sespie 	IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1172*c87b03e5Sespie       else
1173*c87b03e5Sespie 	{
1174*c87b03e5Sespie 	  BACKUP ();
1175*c87b03e5Sespie 	  result->type = CPP_GREATER;
1176*c87b03e5Sespie 	}
1177*c87b03e5Sespie       break;
1178*c87b03e5Sespie 
1179*c87b03e5Sespie     case '%':
1180*c87b03e5Sespie       c = get_effective_char (pfile);
1181*c87b03e5Sespie       if (c == '=')
1182*c87b03e5Sespie 	result->type = CPP_MOD_EQ;
1183*c87b03e5Sespie       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1184*c87b03e5Sespie 	{
1185*c87b03e5Sespie 	  result->flags |= DIGRAPH;
1186*c87b03e5Sespie 	  result->type = CPP_HASH;
1187*c87b03e5Sespie 	  if (get_effective_char (pfile) == '%')
1188*c87b03e5Sespie 	    {
1189*c87b03e5Sespie 	      const unsigned char *pos = buffer->cur;
1190*c87b03e5Sespie 
1191*c87b03e5Sespie 	      if (get_effective_char (pfile) == ':')
1192*c87b03e5Sespie 		result->type = CPP_PASTE;
1193*c87b03e5Sespie 	      else
1194*c87b03e5Sespie 		buffer->cur = pos - 1;
1195*c87b03e5Sespie 	    }
1196*c87b03e5Sespie 	  else
1197*c87b03e5Sespie 	    BACKUP ();
1198*c87b03e5Sespie 	}
1199*c87b03e5Sespie       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1200*c87b03e5Sespie 	{
1201*c87b03e5Sespie 	  result->flags |= DIGRAPH;
1202*c87b03e5Sespie 	  result->type = CPP_CLOSE_BRACE;
1203*c87b03e5Sespie 	}
1204*c87b03e5Sespie       else
1205*c87b03e5Sespie 	{
1206*c87b03e5Sespie 	  BACKUP ();
1207*c87b03e5Sespie 	  result->type = CPP_MOD;
1208*c87b03e5Sespie 	}
1209*c87b03e5Sespie       break;
1210*c87b03e5Sespie 
1211*c87b03e5Sespie     case '.':
1212*c87b03e5Sespie       result->type = CPP_DOT;
1213*c87b03e5Sespie       c = get_effective_char (pfile);
1214*c87b03e5Sespie       if (c == '.')
1215*c87b03e5Sespie 	{
1216*c87b03e5Sespie 	  const unsigned char *pos = buffer->cur;
1217*c87b03e5Sespie 
1218*c87b03e5Sespie 	  if (get_effective_char (pfile) == '.')
1219*c87b03e5Sespie 	    result->type = CPP_ELLIPSIS;
1220*c87b03e5Sespie 	  else
1221*c87b03e5Sespie 	    buffer->cur = pos - 1;
1222*c87b03e5Sespie 	}
1223*c87b03e5Sespie       /* All known character sets have 0...9 contiguous.  */
1224*c87b03e5Sespie       else if (ISDIGIT (c))
1225*c87b03e5Sespie 	{
1226*c87b03e5Sespie 	  result->type = CPP_NUMBER;
1227*c87b03e5Sespie 	  parse_number (pfile, &result->val.str, 1);
1228*c87b03e5Sespie 	}
1229*c87b03e5Sespie       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1230*c87b03e5Sespie 	result->type = CPP_DOT_STAR;
1231*c87b03e5Sespie       else
1232*c87b03e5Sespie 	BACKUP ();
1233*c87b03e5Sespie       break;
1234*c87b03e5Sespie 
1235*c87b03e5Sespie     case '+':
1236*c87b03e5Sespie       c = get_effective_char (pfile);
1237*c87b03e5Sespie       if (c == '+')
1238*c87b03e5Sespie 	result->type = CPP_PLUS_PLUS;
1239*c87b03e5Sespie       else if (c == '=')
1240*c87b03e5Sespie 	result->type = CPP_PLUS_EQ;
1241*c87b03e5Sespie       else
1242*c87b03e5Sespie 	{
1243*c87b03e5Sespie 	  BACKUP ();
1244*c87b03e5Sespie 	  result->type = CPP_PLUS;
1245*c87b03e5Sespie 	}
1246*c87b03e5Sespie       break;
1247*c87b03e5Sespie 
1248*c87b03e5Sespie     case '-':
1249*c87b03e5Sespie       c = get_effective_char (pfile);
1250*c87b03e5Sespie       if (c == '>')
1251*c87b03e5Sespie 	{
1252*c87b03e5Sespie 	  result->type = CPP_DEREF;
1253*c87b03e5Sespie 	  if (CPP_OPTION (pfile, cplusplus))
1254*c87b03e5Sespie 	    {
1255*c87b03e5Sespie 	      if (get_effective_char (pfile) == '*')
1256*c87b03e5Sespie 		result->type = CPP_DEREF_STAR;
1257*c87b03e5Sespie 	      else
1258*c87b03e5Sespie 		BACKUP ();
1259*c87b03e5Sespie 	    }
1260*c87b03e5Sespie 	}
1261*c87b03e5Sespie       else if (c == '-')
1262*c87b03e5Sespie 	result->type = CPP_MINUS_MINUS;
1263*c87b03e5Sespie       else if (c == '=')
1264*c87b03e5Sespie 	result->type = CPP_MINUS_EQ;
1265*c87b03e5Sespie       else
1266*c87b03e5Sespie 	{
1267*c87b03e5Sespie 	  BACKUP ();
1268*c87b03e5Sespie 	  result->type = CPP_MINUS;
1269*c87b03e5Sespie 	}
1270*c87b03e5Sespie       break;
1271*c87b03e5Sespie 
1272*c87b03e5Sespie     case '&':
1273*c87b03e5Sespie       c = get_effective_char (pfile);
1274*c87b03e5Sespie       if (c == '&')
1275*c87b03e5Sespie 	result->type = CPP_AND_AND;
1276*c87b03e5Sespie       else if (c == '=')
1277*c87b03e5Sespie 	result->type = CPP_AND_EQ;
1278*c87b03e5Sespie       else
1279*c87b03e5Sespie 	{
1280*c87b03e5Sespie 	  BACKUP ();
1281*c87b03e5Sespie 	  result->type = CPP_AND;
1282*c87b03e5Sespie 	}
1283*c87b03e5Sespie       break;
1284*c87b03e5Sespie 
1285*c87b03e5Sespie     case '|':
1286*c87b03e5Sespie       c = get_effective_char (pfile);
1287*c87b03e5Sespie       if (c == '|')
1288*c87b03e5Sespie 	result->type = CPP_OR_OR;
1289*c87b03e5Sespie       else if (c == '=')
1290*c87b03e5Sespie 	result->type = CPP_OR_EQ;
1291*c87b03e5Sespie       else
1292*c87b03e5Sespie 	{
1293*c87b03e5Sespie 	  BACKUP ();
1294*c87b03e5Sespie 	  result->type = CPP_OR;
1295*c87b03e5Sespie 	}
1296*c87b03e5Sespie       break;
1297*c87b03e5Sespie 
1298*c87b03e5Sespie     case ':':
1299*c87b03e5Sespie       c = get_effective_char (pfile);
1300*c87b03e5Sespie       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1301*c87b03e5Sespie 	result->type = CPP_SCOPE;
1302*c87b03e5Sespie       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1303*c87b03e5Sespie 	{
1304*c87b03e5Sespie 	  result->flags |= DIGRAPH;
1305*c87b03e5Sespie 	  result->type = CPP_CLOSE_SQUARE;
1306*c87b03e5Sespie 	}
1307*c87b03e5Sespie       else
1308*c87b03e5Sespie 	{
1309*c87b03e5Sespie 	  BACKUP ();
1310*c87b03e5Sespie 	  result->type = CPP_COLON;
1311*c87b03e5Sespie 	}
1312*c87b03e5Sespie       break;
1313*c87b03e5Sespie 
1314*c87b03e5Sespie     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1315*c87b03e5Sespie     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1316*c87b03e5Sespie     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1317*c87b03e5Sespie     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1318*c87b03e5Sespie     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1319*c87b03e5Sespie 
1320*c87b03e5Sespie     case '~': result->type = CPP_COMPL; break;
1321*c87b03e5Sespie     case ',': result->type = CPP_COMMA; break;
1322*c87b03e5Sespie     case '(': result->type = CPP_OPEN_PAREN; break;
1323*c87b03e5Sespie     case ')': result->type = CPP_CLOSE_PAREN; break;
1324*c87b03e5Sespie     case '[': result->type = CPP_OPEN_SQUARE; break;
1325*c87b03e5Sespie     case ']': result->type = CPP_CLOSE_SQUARE; break;
1326*c87b03e5Sespie     case '{': result->type = CPP_OPEN_BRACE; break;
1327*c87b03e5Sespie     case '}': result->type = CPP_CLOSE_BRACE; break;
1328*c87b03e5Sespie     case ';': result->type = CPP_SEMICOLON; break;
1329*c87b03e5Sespie 
1330*c87b03e5Sespie       /* @ is a punctuator in Objective-C.  */
1331*c87b03e5Sespie     case '@': result->type = CPP_ATSIGN; break;
1332*c87b03e5Sespie 
1333*c87b03e5Sespie     case '$':
1334*c87b03e5Sespie       if (CPP_OPTION (pfile, dollars_in_ident))
1335*c87b03e5Sespie 	goto start_ident;
1336*c87b03e5Sespie       /* Fall through...  */
1337*c87b03e5Sespie 
1338*c87b03e5Sespie     random_char:
1339*c87b03e5Sespie     default:
1340*c87b03e5Sespie       result->type = CPP_OTHER;
1341*c87b03e5Sespie       result->val.c = c;
1342*c87b03e5Sespie       break;
1343*c87b03e5Sespie     }
1344*c87b03e5Sespie 
1345*c87b03e5Sespie   return result;
1346*c87b03e5Sespie }
1347*c87b03e5Sespie 
1348*c87b03e5Sespie /* An upper bound on the number of bytes needed to spell TOKEN,
1349*c87b03e5Sespie    including preceding whitespace.  */
1350*c87b03e5Sespie unsigned int
cpp_token_len(token)1351*c87b03e5Sespie cpp_token_len (token)
1352*c87b03e5Sespie      const cpp_token *token;
1353*c87b03e5Sespie {
1354*c87b03e5Sespie   unsigned int len;
1355*c87b03e5Sespie 
1356*c87b03e5Sespie   switch (TOKEN_SPELL (token))
1357*c87b03e5Sespie     {
1358*c87b03e5Sespie     default:		len = 0;				break;
1359*c87b03e5Sespie     case SPELL_NUMBER:
1360*c87b03e5Sespie     case SPELL_STRING:	len = token->val.str.len;		break;
1361*c87b03e5Sespie     case SPELL_IDENT:	len = NODE_LEN (token->val.node);	break;
1362*c87b03e5Sespie     }
1363*c87b03e5Sespie   /* 1 for whitespace, 4 for comment delimiters.  */
1364*c87b03e5Sespie   return len + 5;
1365*c87b03e5Sespie }
1366*c87b03e5Sespie 
1367*c87b03e5Sespie /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1368*c87b03e5Sespie    already contain the enough space to hold the token's spelling.
1369*c87b03e5Sespie    Returns a pointer to the character after the last character
1370*c87b03e5Sespie    written.  */
1371*c87b03e5Sespie unsigned char *
cpp_spell_token(pfile,token,buffer)1372*c87b03e5Sespie cpp_spell_token (pfile, token, buffer)
1373*c87b03e5Sespie      cpp_reader *pfile;		/* Would be nice to be rid of this...  */
1374*c87b03e5Sespie      const cpp_token *token;
1375*c87b03e5Sespie      unsigned char *buffer;
1376*c87b03e5Sespie {
1377*c87b03e5Sespie   switch (TOKEN_SPELL (token))
1378*c87b03e5Sespie     {
1379*c87b03e5Sespie     case SPELL_OPERATOR:
1380*c87b03e5Sespie       {
1381*c87b03e5Sespie 	const unsigned char *spelling;
1382*c87b03e5Sespie 	unsigned char c;
1383*c87b03e5Sespie 
1384*c87b03e5Sespie 	if (token->flags & DIGRAPH)
1385*c87b03e5Sespie 	  spelling
1386*c87b03e5Sespie 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1387*c87b03e5Sespie 	else if (token->flags & NAMED_OP)
1388*c87b03e5Sespie 	  goto spell_ident;
1389*c87b03e5Sespie 	else
1390*c87b03e5Sespie 	  spelling = TOKEN_NAME (token);
1391*c87b03e5Sespie 
1392*c87b03e5Sespie 	while ((c = *spelling++) != '\0')
1393*c87b03e5Sespie 	  *buffer++ = c;
1394*c87b03e5Sespie       }
1395*c87b03e5Sespie       break;
1396*c87b03e5Sespie 
1397*c87b03e5Sespie     case SPELL_CHAR:
1398*c87b03e5Sespie       *buffer++ = token->val.c;
1399*c87b03e5Sespie       break;
1400*c87b03e5Sespie 
1401*c87b03e5Sespie     spell_ident:
1402*c87b03e5Sespie     case SPELL_IDENT:
1403*c87b03e5Sespie       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1404*c87b03e5Sespie       buffer += NODE_LEN (token->val.node);
1405*c87b03e5Sespie       break;
1406*c87b03e5Sespie 
1407*c87b03e5Sespie     case SPELL_NUMBER:
1408*c87b03e5Sespie       memcpy (buffer, token->val.str.text, token->val.str.len);
1409*c87b03e5Sespie       buffer += token->val.str.len;
1410*c87b03e5Sespie       break;
1411*c87b03e5Sespie 
1412*c87b03e5Sespie     case SPELL_STRING:
1413*c87b03e5Sespie       {
1414*c87b03e5Sespie 	int left, right, tag;
1415*c87b03e5Sespie 	switch (token->type)
1416*c87b03e5Sespie 	  {
1417*c87b03e5Sespie 	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
1418*c87b03e5Sespie 	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
1419*c87b03e5Sespie 	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
1420*c87b03e5Sespie     	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
1421*c87b03e5Sespie 	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
1422*c87b03e5Sespie 	  default:
1423*c87b03e5Sespie 	    cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1424*c87b03e5Sespie 		       TOKEN_NAME (token));
1425*c87b03e5Sespie 	    return buffer;
1426*c87b03e5Sespie 	  }
1427*c87b03e5Sespie 	if (tag) *buffer++ = tag;
1428*c87b03e5Sespie 	*buffer++ = left;
1429*c87b03e5Sespie 	memcpy (buffer, token->val.str.text, token->val.str.len);
1430*c87b03e5Sespie 	buffer += token->val.str.len;
1431*c87b03e5Sespie 	*buffer++ = right;
1432*c87b03e5Sespie       }
1433*c87b03e5Sespie       break;
1434*c87b03e5Sespie 
1435*c87b03e5Sespie     case SPELL_NONE:
1436*c87b03e5Sespie       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1437*c87b03e5Sespie       break;
1438*c87b03e5Sespie     }
1439*c87b03e5Sespie 
1440*c87b03e5Sespie   return buffer;
1441*c87b03e5Sespie }
1442*c87b03e5Sespie 
1443*c87b03e5Sespie /* Returns TOKEN spelt as a null-terminated string.  The string is
1444*c87b03e5Sespie    freed when the reader is destroyed.  Useful for diagnostics.  */
1445*c87b03e5Sespie unsigned char *
cpp_token_as_text(pfile,token)1446*c87b03e5Sespie cpp_token_as_text (pfile, token)
1447*c87b03e5Sespie      cpp_reader *pfile;
1448*c87b03e5Sespie      const cpp_token *token;
1449*c87b03e5Sespie {
1450*c87b03e5Sespie   unsigned int len = cpp_token_len (token);
1451*c87b03e5Sespie   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1452*c87b03e5Sespie 
1453*c87b03e5Sespie   end = cpp_spell_token (pfile, token, start);
1454*c87b03e5Sespie   end[0] = '\0';
1455*c87b03e5Sespie 
1456*c87b03e5Sespie   return start;
1457*c87b03e5Sespie }
1458*c87b03e5Sespie 
1459*c87b03e5Sespie /* Used by C front ends, which really should move to using
1460*c87b03e5Sespie    cpp_token_as_text.  */
1461*c87b03e5Sespie const char *
cpp_type2name(type)1462*c87b03e5Sespie cpp_type2name (type)
1463*c87b03e5Sespie      enum cpp_ttype type;
1464*c87b03e5Sespie {
1465*c87b03e5Sespie   return (const char *) token_spellings[type].name;
1466*c87b03e5Sespie }
1467*c87b03e5Sespie 
1468*c87b03e5Sespie /* Writes the spelling of token to FP, without any preceding space.
1469*c87b03e5Sespie    Separated from cpp_spell_token for efficiency - to avoid stdio
1470*c87b03e5Sespie    double-buffering.  */
1471*c87b03e5Sespie void
cpp_output_token(token,fp)1472*c87b03e5Sespie cpp_output_token (token, fp)
1473*c87b03e5Sespie      const cpp_token *token;
1474*c87b03e5Sespie      FILE *fp;
1475*c87b03e5Sespie {
1476*c87b03e5Sespie   switch (TOKEN_SPELL (token))
1477*c87b03e5Sespie     {
1478*c87b03e5Sespie     case SPELL_OPERATOR:
1479*c87b03e5Sespie       {
1480*c87b03e5Sespie 	const unsigned char *spelling;
1481*c87b03e5Sespie 	int c;
1482*c87b03e5Sespie 
1483*c87b03e5Sespie 	if (token->flags & DIGRAPH)
1484*c87b03e5Sespie 	  spelling
1485*c87b03e5Sespie 	    = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1486*c87b03e5Sespie 	else if (token->flags & NAMED_OP)
1487*c87b03e5Sespie 	  goto spell_ident;
1488*c87b03e5Sespie 	else
1489*c87b03e5Sespie 	  spelling = TOKEN_NAME (token);
1490*c87b03e5Sespie 
1491*c87b03e5Sespie 	c = *spelling;
1492*c87b03e5Sespie 	do
1493*c87b03e5Sespie 	  putc (c, fp);
1494*c87b03e5Sespie 	while ((c = *++spelling) != '\0');
1495*c87b03e5Sespie       }
1496*c87b03e5Sespie       break;
1497*c87b03e5Sespie 
1498*c87b03e5Sespie     case SPELL_CHAR:
1499*c87b03e5Sespie       putc (token->val.c, fp);
1500*c87b03e5Sespie       break;
1501*c87b03e5Sespie 
1502*c87b03e5Sespie     spell_ident:
1503*c87b03e5Sespie     case SPELL_IDENT:
1504*c87b03e5Sespie       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1505*c87b03e5Sespie     break;
1506*c87b03e5Sespie 
1507*c87b03e5Sespie     case SPELL_NUMBER:
1508*c87b03e5Sespie       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1509*c87b03e5Sespie       break;
1510*c87b03e5Sespie 
1511*c87b03e5Sespie     case SPELL_STRING:
1512*c87b03e5Sespie       {
1513*c87b03e5Sespie 	int left, right, tag;
1514*c87b03e5Sespie 	switch (token->type)
1515*c87b03e5Sespie 	  {
1516*c87b03e5Sespie 	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
1517*c87b03e5Sespie 	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
1518*c87b03e5Sespie 	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
1519*c87b03e5Sespie     	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
1520*c87b03e5Sespie 	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
1521*c87b03e5Sespie 	  default:
1522*c87b03e5Sespie 	    fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1523*c87b03e5Sespie 	    return;
1524*c87b03e5Sespie 	  }
1525*c87b03e5Sespie 	if (tag) putc (tag, fp);
1526*c87b03e5Sespie 	putc (left, fp);
1527*c87b03e5Sespie 	fwrite (token->val.str.text, 1, token->val.str.len, fp);
1528*c87b03e5Sespie 	putc (right, fp);
1529*c87b03e5Sespie       }
1530*c87b03e5Sespie       break;
1531*c87b03e5Sespie 
1532*c87b03e5Sespie     case SPELL_NONE:
1533*c87b03e5Sespie       /* An error, most probably.  */
1534*c87b03e5Sespie       break;
1535*c87b03e5Sespie     }
1536*c87b03e5Sespie }
1537*c87b03e5Sespie 
1538*c87b03e5Sespie /* Compare two tokens.  */
1539*c87b03e5Sespie int
_cpp_equiv_tokens(a,b)1540*c87b03e5Sespie _cpp_equiv_tokens (a, b)
1541*c87b03e5Sespie      const cpp_token *a, *b;
1542*c87b03e5Sespie {
1543*c87b03e5Sespie   if (a->type == b->type && a->flags == b->flags)
1544*c87b03e5Sespie     switch (TOKEN_SPELL (a))
1545*c87b03e5Sespie       {
1546*c87b03e5Sespie       default:			/* Keep compiler happy.  */
1547*c87b03e5Sespie       case SPELL_OPERATOR:
1548*c87b03e5Sespie 	return 1;
1549*c87b03e5Sespie       case SPELL_CHAR:
1550*c87b03e5Sespie 	return a->val.c == b->val.c; /* Character.  */
1551*c87b03e5Sespie       case SPELL_NONE:
1552*c87b03e5Sespie 	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1553*c87b03e5Sespie       case SPELL_IDENT:
1554*c87b03e5Sespie 	return a->val.node == b->val.node;
1555*c87b03e5Sespie       case SPELL_NUMBER:
1556*c87b03e5Sespie       case SPELL_STRING:
1557*c87b03e5Sespie 	return (a->val.str.len == b->val.str.len
1558*c87b03e5Sespie 		&& !memcmp (a->val.str.text, b->val.str.text,
1559*c87b03e5Sespie 			    a->val.str.len));
1560*c87b03e5Sespie       }
1561*c87b03e5Sespie 
1562*c87b03e5Sespie   return 0;
1563*c87b03e5Sespie }
1564*c87b03e5Sespie 
1565*c87b03e5Sespie /* Returns nonzero if a space should be inserted to avoid an
1566*c87b03e5Sespie    accidental token paste for output.  For simplicity, it is
1567*c87b03e5Sespie    conservative, and occasionally advises a space where one is not
1568*c87b03e5Sespie    needed, e.g. "." and ".2".  */
1569*c87b03e5Sespie int
cpp_avoid_paste(pfile,token1,token2)1570*c87b03e5Sespie cpp_avoid_paste (pfile, token1, token2)
1571*c87b03e5Sespie      cpp_reader *pfile;
1572*c87b03e5Sespie      const cpp_token *token1, *token2;
1573*c87b03e5Sespie {
1574*c87b03e5Sespie   enum cpp_ttype a = token1->type, b = token2->type;
1575*c87b03e5Sespie   cppchar_t c;
1576*c87b03e5Sespie 
1577*c87b03e5Sespie   if (token1->flags & NAMED_OP)
1578*c87b03e5Sespie     a = CPP_NAME;
1579*c87b03e5Sespie   if (token2->flags & NAMED_OP)
1580*c87b03e5Sespie     b = CPP_NAME;
1581*c87b03e5Sespie 
1582*c87b03e5Sespie   c = EOF;
1583*c87b03e5Sespie   if (token2->flags & DIGRAPH)
1584*c87b03e5Sespie     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1585*c87b03e5Sespie   else if (token_spellings[b].category == SPELL_OPERATOR)
1586*c87b03e5Sespie     c = token_spellings[b].name[0];
1587*c87b03e5Sespie 
1588*c87b03e5Sespie   /* Quickly get everything that can paste with an '='.  */
1589*c87b03e5Sespie   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1590*c87b03e5Sespie     return 1;
1591*c87b03e5Sespie 
1592*c87b03e5Sespie   switch (a)
1593*c87b03e5Sespie     {
1594*c87b03e5Sespie     case CPP_GREATER:	return c == '>' || c == '?';
1595*c87b03e5Sespie     case CPP_LESS:	return c == '<' || c == '?' || c == '%' || c == ':';
1596*c87b03e5Sespie     case CPP_PLUS:	return c == '+';
1597*c87b03e5Sespie     case CPP_MINUS:	return c == '-' || c == '>';
1598*c87b03e5Sespie     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1599*c87b03e5Sespie     case CPP_MOD:	return c == ':' || c == '>';
1600*c87b03e5Sespie     case CPP_AND:	return c == '&';
1601*c87b03e5Sespie     case CPP_OR:	return c == '|';
1602*c87b03e5Sespie     case CPP_COLON:	return c == ':' || c == '>';
1603*c87b03e5Sespie     case CPP_DEREF:	return c == '*';
1604*c87b03e5Sespie     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1605*c87b03e5Sespie     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1606*c87b03e5Sespie     case CPP_NAME:	return ((b == CPP_NUMBER
1607*c87b03e5Sespie 				 && name_p (pfile, &token2->val.str))
1608*c87b03e5Sespie 				|| b == CPP_NAME
1609*c87b03e5Sespie 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1610*c87b03e5Sespie     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1611*c87b03e5Sespie 				|| c == '.' || c == '+' || c == '-');
1612*c87b03e5Sespie     case CPP_OTHER:	return (CPP_OPTION (pfile, objc)
1613*c87b03e5Sespie 				&& token1->val.c == '@'
1614*c87b03e5Sespie 				&& (b == CPP_NAME || b == CPP_STRING));
1615*c87b03e5Sespie     default:		break;
1616*c87b03e5Sespie     }
1617*c87b03e5Sespie 
1618*c87b03e5Sespie   return 0;
1619*c87b03e5Sespie }
1620*c87b03e5Sespie 
1621*c87b03e5Sespie /* Output all the remaining tokens on the current line, and a newline
1622*c87b03e5Sespie    character, to FP.  Leading whitespace is removed.  If there are
1623*c87b03e5Sespie    macros, special token padding is not performed.  */
1624*c87b03e5Sespie void
cpp_output_line(pfile,fp)1625*c87b03e5Sespie cpp_output_line (pfile, fp)
1626*c87b03e5Sespie      cpp_reader *pfile;
1627*c87b03e5Sespie      FILE *fp;
1628*c87b03e5Sespie {
1629*c87b03e5Sespie   const cpp_token *token;
1630*c87b03e5Sespie 
1631*c87b03e5Sespie   token = cpp_get_token (pfile);
1632*c87b03e5Sespie   while (token->type != CPP_EOF)
1633*c87b03e5Sespie     {
1634*c87b03e5Sespie       cpp_output_token (token, fp);
1635*c87b03e5Sespie       token = cpp_get_token (pfile);
1636*c87b03e5Sespie       if (token->flags & PREV_WHITE)
1637*c87b03e5Sespie 	putc (' ', fp);
1638*c87b03e5Sespie     }
1639*c87b03e5Sespie 
1640*c87b03e5Sespie   putc ('\n', fp);
1641*c87b03e5Sespie }
1642*c87b03e5Sespie 
1643*c87b03e5Sespie /* Returns the value of a hexadecimal digit.  */
1644*c87b03e5Sespie static unsigned int
hex_digit_value(c)1645*c87b03e5Sespie hex_digit_value (c)
1646*c87b03e5Sespie      unsigned int c;
1647*c87b03e5Sespie {
1648*c87b03e5Sespie   if (hex_p (c))
1649*c87b03e5Sespie     return hex_value (c);
1650*c87b03e5Sespie   else
1651*c87b03e5Sespie     abort ();
1652*c87b03e5Sespie }
1653*c87b03e5Sespie 
1654*c87b03e5Sespie /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1655*c87b03e5Sespie    failure if cpplib is not parsing C++ or C99.  Such failure is
1656*c87b03e5Sespie    silent, and no variables are updated.  Otherwise returns 0, and
1657*c87b03e5Sespie    warns if -Wtraditional.
1658*c87b03e5Sespie 
1659*c87b03e5Sespie    [lex.charset]: The character designated by the universal character
1660*c87b03e5Sespie    name \UNNNNNNNN is that character whose character short name in
1661*c87b03e5Sespie    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1662*c87b03e5Sespie    universal character name \uNNNN is that character whose character
1663*c87b03e5Sespie    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1664*c87b03e5Sespie    for a universal character name is less than 0x20 or in the range
1665*c87b03e5Sespie    0x7F-0x9F (inclusive), or if the universal character name
1666*c87b03e5Sespie    designates a character in the basic source character set, then the
1667*c87b03e5Sespie    program is ill-formed.
1668*c87b03e5Sespie 
1669*c87b03e5Sespie    We assume that wchar_t is Unicode, so we don't need to do any
1670*c87b03e5Sespie    mapping.  Is this ever wrong?
1671*c87b03e5Sespie 
1672*c87b03e5Sespie    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1673*c87b03e5Sespie    LIMIT is the end of the string or charconst.  PSTR is updated to
1674*c87b03e5Sespie    point after the UCS on return, and the UCS is written into PC.  */
1675*c87b03e5Sespie 
1676*c87b03e5Sespie static int
maybe_read_ucs(pfile,pstr,limit,pc)1677*c87b03e5Sespie maybe_read_ucs (pfile, pstr, limit, pc)
1678*c87b03e5Sespie      cpp_reader *pfile;
1679*c87b03e5Sespie      const unsigned char **pstr;
1680*c87b03e5Sespie      const unsigned char *limit;
1681*c87b03e5Sespie      cppchar_t *pc;
1682*c87b03e5Sespie {
1683*c87b03e5Sespie   const unsigned char *p = *pstr;
1684*c87b03e5Sespie   unsigned int code = 0;
1685*c87b03e5Sespie   unsigned int c = *pc, length;
1686*c87b03e5Sespie 
1687*c87b03e5Sespie   /* Only attempt to interpret a UCS for C++ and C99.  */
1688*c87b03e5Sespie   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1689*c87b03e5Sespie     return 1;
1690*c87b03e5Sespie 
1691*c87b03e5Sespie   if (CPP_WTRADITIONAL (pfile))
1692*c87b03e5Sespie     cpp_error (pfile, DL_WARNING,
1693*c87b03e5Sespie 	       "the meaning of '\\%c' is different in traditional C", c);
1694*c87b03e5Sespie 
1695*c87b03e5Sespie   length = (c == 'u' ? 4: 8);
1696*c87b03e5Sespie 
1697*c87b03e5Sespie   if ((size_t) (limit - p) < length)
1698*c87b03e5Sespie     {
1699*c87b03e5Sespie       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1700*c87b03e5Sespie       /* Skip to the end to avoid more diagnostics.  */
1701*c87b03e5Sespie       p = limit;
1702*c87b03e5Sespie     }
1703*c87b03e5Sespie   else
1704*c87b03e5Sespie     {
1705*c87b03e5Sespie       for (; length; length--, p++)
1706*c87b03e5Sespie 	{
1707*c87b03e5Sespie 	  c = *p;
1708*c87b03e5Sespie 	  if (ISXDIGIT (c))
1709*c87b03e5Sespie 	    code = (code << 4) + hex_digit_value (c);
1710*c87b03e5Sespie 	  else
1711*c87b03e5Sespie 	    {
1712*c87b03e5Sespie 	      cpp_error (pfile, DL_ERROR,
1713*c87b03e5Sespie 			 "non-hex digit '%c' in universal-character-name", c);
1714*c87b03e5Sespie 	      /* We shouldn't skip in case there are multibyte chars.  */
1715*c87b03e5Sespie 	      break;
1716*c87b03e5Sespie 	    }
1717*c87b03e5Sespie 	}
1718*c87b03e5Sespie     }
1719*c87b03e5Sespie 
1720*c87b03e5Sespie #ifdef TARGET_EBCDIC
1721*c87b03e5Sespie   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1722*c87b03e5Sespie   code = 0x3f;  /* EBCDIC invalid character */
1723*c87b03e5Sespie #else
1724*c87b03e5Sespie  /* True extended characters are OK.  */
1725*c87b03e5Sespie   if (code >= 0xa0
1726*c87b03e5Sespie       && !(code & 0x80000000)
1727*c87b03e5Sespie       && !(code >= 0xD800 && code <= 0xDFFF))
1728*c87b03e5Sespie     ;
1729*c87b03e5Sespie   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1730*c87b03e5Sespie      hex escapes so that this also works with EBCDIC hosts.  */
1731*c87b03e5Sespie   else if (code == 0x24 || code == 0x40 || code == 0x60)
1732*c87b03e5Sespie     ;
1733*c87b03e5Sespie   /* Don't give another error if one occurred above.  */
1734*c87b03e5Sespie   else if (length == 0)
1735*c87b03e5Sespie     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1736*c87b03e5Sespie #endif
1737*c87b03e5Sespie 
1738*c87b03e5Sespie   *pstr = p;
1739*c87b03e5Sespie   *pc = code;
1740*c87b03e5Sespie   return 0;
1741*c87b03e5Sespie }
1742*c87b03e5Sespie 
1743*c87b03e5Sespie /* Returns the value of an escape sequence, truncated to the correct
1744*c87b03e5Sespie    target precision.  PSTR points to the input pointer, which is just
1745*c87b03e5Sespie    after the backslash.  LIMIT is how much text we have.  WIDE is true
1746*c87b03e5Sespie    if the escape sequence is part of a wide character constant or
1747*c87b03e5Sespie    string literal.  Handles all relevant diagnostics.  */
1748*c87b03e5Sespie cppchar_t
cpp_parse_escape(pfile,pstr,limit,wide)1749*c87b03e5Sespie cpp_parse_escape (pfile, pstr, limit, wide)
1750*c87b03e5Sespie      cpp_reader *pfile;
1751*c87b03e5Sespie      const unsigned char **pstr;
1752*c87b03e5Sespie      const unsigned char *limit;
1753*c87b03e5Sespie      int wide;
1754*c87b03e5Sespie {
1755*c87b03e5Sespie   int unknown = 0;
1756*c87b03e5Sespie   const unsigned char *str = *pstr;
1757*c87b03e5Sespie   cppchar_t c, mask;
1758*c87b03e5Sespie   unsigned int width;
1759*c87b03e5Sespie 
1760*c87b03e5Sespie   if (wide)
1761*c87b03e5Sespie     width = CPP_OPTION (pfile, wchar_precision);
1762*c87b03e5Sespie   else
1763*c87b03e5Sespie     width = CPP_OPTION (pfile, char_precision);
1764*c87b03e5Sespie   if (width < BITS_PER_CPPCHAR_T)
1765*c87b03e5Sespie     mask = ((cppchar_t) 1 << width) - 1;
1766*c87b03e5Sespie   else
1767*c87b03e5Sespie     mask = ~0;
1768*c87b03e5Sespie 
1769*c87b03e5Sespie   c = *str++;
1770*c87b03e5Sespie   switch (c)
1771*c87b03e5Sespie     {
1772*c87b03e5Sespie     case '\\': case '\'': case '"': case '?': break;
1773*c87b03e5Sespie     case 'b': c = TARGET_BS;	  break;
1774*c87b03e5Sespie     case 'f': c = TARGET_FF;	  break;
1775*c87b03e5Sespie     case 'n': c = TARGET_NEWLINE; break;
1776*c87b03e5Sespie     case 'r': c = TARGET_CR;	  break;
1777*c87b03e5Sespie     case 't': c = TARGET_TAB;	  break;
1778*c87b03e5Sespie     case 'v': c = TARGET_VT;	  break;
1779*c87b03e5Sespie 
1780*c87b03e5Sespie     case '(': case '{': case '[': case '%':
1781*c87b03e5Sespie       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1782*c87b03e5Sespie 	 '\%' is used to prevent SCCS from getting confused.  */
1783*c87b03e5Sespie       unknown = CPP_PEDANTIC (pfile);
1784*c87b03e5Sespie       break;
1785*c87b03e5Sespie 
1786*c87b03e5Sespie     case 'a':
1787*c87b03e5Sespie       if (CPP_WTRADITIONAL (pfile))
1788*c87b03e5Sespie 	cpp_error (pfile, DL_WARNING,
1789*c87b03e5Sespie 		   "the meaning of '\\a' is different in traditional C");
1790*c87b03e5Sespie       c = TARGET_BELL;
1791*c87b03e5Sespie       break;
1792*c87b03e5Sespie 
1793*c87b03e5Sespie     case 'e': case 'E':
1794*c87b03e5Sespie       if (CPP_PEDANTIC (pfile))
1795*c87b03e5Sespie 	cpp_error (pfile, DL_PEDWARN,
1796*c87b03e5Sespie 		   "non-ISO-standard escape sequence, '\\%c'", (int) c);
1797*c87b03e5Sespie       c = TARGET_ESC;
1798*c87b03e5Sespie       break;
1799*c87b03e5Sespie 
1800*c87b03e5Sespie     case 'u': case 'U':
1801*c87b03e5Sespie       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1802*c87b03e5Sespie       break;
1803*c87b03e5Sespie 
1804*c87b03e5Sespie     case 'x':
1805*c87b03e5Sespie       if (CPP_WTRADITIONAL (pfile))
1806*c87b03e5Sespie 	cpp_error (pfile, DL_WARNING,
1807*c87b03e5Sespie 		   "the meaning of '\\x' is different in traditional C");
1808*c87b03e5Sespie 
1809*c87b03e5Sespie       {
1810*c87b03e5Sespie 	cppchar_t i = 0, overflow = 0;
1811*c87b03e5Sespie 	int digits_found = 0;
1812*c87b03e5Sespie 
1813*c87b03e5Sespie 	while (str < limit)
1814*c87b03e5Sespie 	  {
1815*c87b03e5Sespie 	    c = *str;
1816*c87b03e5Sespie 	    if (! ISXDIGIT (c))
1817*c87b03e5Sespie 	      break;
1818*c87b03e5Sespie 	    str++;
1819*c87b03e5Sespie 	    overflow |= i ^ (i << 4 >> 4);
1820*c87b03e5Sespie 	    i = (i << 4) + hex_digit_value (c);
1821*c87b03e5Sespie 	    digits_found = 1;
1822*c87b03e5Sespie 	  }
1823*c87b03e5Sespie 
1824*c87b03e5Sespie 	if (!digits_found)
1825*c87b03e5Sespie 	  cpp_error (pfile, DL_ERROR,
1826*c87b03e5Sespie 		       "\\x used with no following hex digits");
1827*c87b03e5Sespie 
1828*c87b03e5Sespie 	if (overflow | (i != (i & mask)))
1829*c87b03e5Sespie 	  {
1830*c87b03e5Sespie 	    cpp_error (pfile, DL_PEDWARN,
1831*c87b03e5Sespie 		       "hex escape sequence out of range");
1832*c87b03e5Sespie 	    i &= mask;
1833*c87b03e5Sespie 	  }
1834*c87b03e5Sespie 	c = i;
1835*c87b03e5Sespie       }
1836*c87b03e5Sespie       break;
1837*c87b03e5Sespie 
1838*c87b03e5Sespie     case '0':  case '1':  case '2':  case '3':
1839*c87b03e5Sespie     case '4':  case '5':  case '6':  case '7':
1840*c87b03e5Sespie       {
1841*c87b03e5Sespie 	size_t count = 0;
1842*c87b03e5Sespie 	cppchar_t i = c - '0';
1843*c87b03e5Sespie 
1844*c87b03e5Sespie 	while (str < limit && ++count < 3)
1845*c87b03e5Sespie 	  {
1846*c87b03e5Sespie 	    c = *str;
1847*c87b03e5Sespie 	    if (c < '0' || c > '7')
1848*c87b03e5Sespie 	      break;
1849*c87b03e5Sespie 	    str++;
1850*c87b03e5Sespie 	    i = (i << 3) + c - '0';
1851*c87b03e5Sespie 	  }
1852*c87b03e5Sespie 
1853*c87b03e5Sespie 	if (i != (i & mask))
1854*c87b03e5Sespie 	  {
1855*c87b03e5Sespie 	    cpp_error (pfile, DL_PEDWARN,
1856*c87b03e5Sespie 		       "octal escape sequence out of range");
1857*c87b03e5Sespie 	    i &= mask;
1858*c87b03e5Sespie 	  }
1859*c87b03e5Sespie 	c = i;
1860*c87b03e5Sespie       }
1861*c87b03e5Sespie       break;
1862*c87b03e5Sespie 
1863*c87b03e5Sespie     default:
1864*c87b03e5Sespie       unknown = 1;
1865*c87b03e5Sespie       break;
1866*c87b03e5Sespie     }
1867*c87b03e5Sespie 
1868*c87b03e5Sespie   if (unknown)
1869*c87b03e5Sespie     {
1870*c87b03e5Sespie       if (ISGRAPH (c))
1871*c87b03e5Sespie 	cpp_error (pfile, DL_PEDWARN,
1872*c87b03e5Sespie 		   "unknown escape sequence '\\%c'", (int) c);
1873*c87b03e5Sespie       else
1874*c87b03e5Sespie 	cpp_error (pfile, DL_PEDWARN,
1875*c87b03e5Sespie 		   "unknown escape sequence: '\\%03o'", (int) c);
1876*c87b03e5Sespie     }
1877*c87b03e5Sespie 
1878*c87b03e5Sespie   if (c > mask)
1879*c87b03e5Sespie     {
1880*c87b03e5Sespie       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1881*c87b03e5Sespie       c &= mask;
1882*c87b03e5Sespie     }
1883*c87b03e5Sespie 
1884*c87b03e5Sespie   *pstr = str;
1885*c87b03e5Sespie   return c;
1886*c87b03e5Sespie }
1887*c87b03e5Sespie 
1888*c87b03e5Sespie /* Interpret a (possibly wide) character constant in TOKEN.
1889*c87b03e5Sespie    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1890*c87b03e5Sespie    points to a variable that is filled in with the number of
1891*c87b03e5Sespie    characters seen, and UNSIGNEDP to a variable that indicates whether
1892*c87b03e5Sespie    the result has signed type.  */
1893*c87b03e5Sespie cppchar_t
cpp_interpret_charconst(pfile,token,pchars_seen,unsignedp)1894*c87b03e5Sespie cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1895*c87b03e5Sespie      cpp_reader *pfile;
1896*c87b03e5Sespie      const cpp_token *token;
1897*c87b03e5Sespie      unsigned int *pchars_seen;
1898*c87b03e5Sespie      int *unsignedp;
1899*c87b03e5Sespie {
1900*c87b03e5Sespie   const unsigned char *str = token->val.str.text;
1901*c87b03e5Sespie   const unsigned char *limit = str + token->val.str.len;
1902*c87b03e5Sespie   unsigned int chars_seen = 0;
1903*c87b03e5Sespie   size_t width, max_chars;
1904*c87b03e5Sespie   cppchar_t c, mask, result = 0;
1905*c87b03e5Sespie   bool unsigned_p;
1906*c87b03e5Sespie 
1907*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
1908*c87b03e5Sespie   (void) local_mbtowc (NULL, NULL, 0);
1909*c87b03e5Sespie #endif
1910*c87b03e5Sespie 
1911*c87b03e5Sespie   /* Width in bits.  */
1912*c87b03e5Sespie   if (token->type == CPP_CHAR)
1913*c87b03e5Sespie     {
1914*c87b03e5Sespie       width = CPP_OPTION (pfile, char_precision);
1915*c87b03e5Sespie       max_chars = CPP_OPTION (pfile, int_precision) / width;
1916*c87b03e5Sespie       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1917*c87b03e5Sespie     }
1918*c87b03e5Sespie   else
1919*c87b03e5Sespie     {
1920*c87b03e5Sespie       width = CPP_OPTION (pfile, wchar_precision);
1921*c87b03e5Sespie       max_chars = 1;
1922*c87b03e5Sespie       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1923*c87b03e5Sespie     }
1924*c87b03e5Sespie 
1925*c87b03e5Sespie   if (width < BITS_PER_CPPCHAR_T)
1926*c87b03e5Sespie     mask = ((cppchar_t) 1 << width) - 1;
1927*c87b03e5Sespie   else
1928*c87b03e5Sespie     mask = ~0;
1929*c87b03e5Sespie 
1930*c87b03e5Sespie   while (str < limit)
1931*c87b03e5Sespie     {
1932*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
1933*c87b03e5Sespie       wchar_t wc;
1934*c87b03e5Sespie       int char_len;
1935*c87b03e5Sespie 
1936*c87b03e5Sespie       char_len = local_mbtowc (&wc, str, limit - str);
1937*c87b03e5Sespie       if (char_len == -1)
1938*c87b03e5Sespie 	{
1939*c87b03e5Sespie 	  cpp_error (pfile, DL_WARNING,
1940*c87b03e5Sespie 		     "ignoring invalid multibyte character");
1941*c87b03e5Sespie 	  c = *str++;
1942*c87b03e5Sespie 	}
1943*c87b03e5Sespie       else
1944*c87b03e5Sespie 	{
1945*c87b03e5Sespie 	  str += char_len;
1946*c87b03e5Sespie 	  c = wc;
1947*c87b03e5Sespie 	}
1948*c87b03e5Sespie #else
1949*c87b03e5Sespie       c = *str++;
1950*c87b03e5Sespie #endif
1951*c87b03e5Sespie 
1952*c87b03e5Sespie       if (c == '\\')
1953*c87b03e5Sespie 	c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1954*c87b03e5Sespie 
1955*c87b03e5Sespie #ifdef MAP_CHARACTER
1956*c87b03e5Sespie       if (ISPRINT (c))
1957*c87b03e5Sespie 	c = MAP_CHARACTER (c);
1958*c87b03e5Sespie #endif
1959*c87b03e5Sespie 
1960*c87b03e5Sespie       chars_seen++;
1961*c87b03e5Sespie 
1962*c87b03e5Sespie       /* Truncate the character, scale the result and merge the two.  */
1963*c87b03e5Sespie       c &= mask;
1964*c87b03e5Sespie       if (width < BITS_PER_CPPCHAR_T)
1965*c87b03e5Sespie 	result = (result << width) | c;
1966*c87b03e5Sespie       else
1967*c87b03e5Sespie 	result = c;
1968*c87b03e5Sespie     }
1969*c87b03e5Sespie 
1970*c87b03e5Sespie   if (chars_seen == 0)
1971*c87b03e5Sespie     cpp_error (pfile, DL_ERROR, "empty character constant");
1972*c87b03e5Sespie   else if (chars_seen > 1)
1973*c87b03e5Sespie     {
1974*c87b03e5Sespie       /* Multichar charconsts are of type int and therefore signed.  */
1975*c87b03e5Sespie       unsigned_p = 0;
1976*c87b03e5Sespie 
1977*c87b03e5Sespie       if (chars_seen > max_chars)
1978*c87b03e5Sespie 	{
1979*c87b03e5Sespie 	  chars_seen = max_chars;
1980*c87b03e5Sespie 	  cpp_error (pfile, DL_WARNING,
1981*c87b03e5Sespie 		     "character constant too long for its type");
1982*c87b03e5Sespie 	}
1983*c87b03e5Sespie       else if (CPP_OPTION (pfile, warn_multichar))
1984*c87b03e5Sespie 	cpp_error (pfile, DL_WARNING, "multi-character character constant");
1985*c87b03e5Sespie     }
1986*c87b03e5Sespie 
1987*c87b03e5Sespie   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1988*c87b03e5Sespie      in WIDTH bits, but for multi-char charconsts it's value is the
1989*c87b03e5Sespie      full target type's width.  */
1990*c87b03e5Sespie   if (chars_seen > 1)
1991*c87b03e5Sespie     width *= max_chars;
1992*c87b03e5Sespie   if (width < BITS_PER_CPPCHAR_T)
1993*c87b03e5Sespie     {
1994*c87b03e5Sespie       mask = ((cppchar_t) 1 << width) - 1;
1995*c87b03e5Sespie       if (unsigned_p || !(result & (1 << (width - 1))))
1996*c87b03e5Sespie 	result &= mask;
1997*c87b03e5Sespie       else
1998*c87b03e5Sespie 	result |= ~mask;
1999*c87b03e5Sespie     }
2000*c87b03e5Sespie 
2001*c87b03e5Sespie   *pchars_seen = chars_seen;
2002*c87b03e5Sespie   *unsignedp = unsigned_p;
2003*c87b03e5Sespie   return result;
2004*c87b03e5Sespie }
2005*c87b03e5Sespie 
2006*c87b03e5Sespie /* Memory buffers.  Changing these three constants can have a dramatic
2007*c87b03e5Sespie    effect on performance.  The values here are reasonable defaults,
2008*c87b03e5Sespie    but might be tuned.  If you adjust them, be sure to test across a
2009*c87b03e5Sespie    range of uses of cpplib, including heavy nested function-like macro
2010*c87b03e5Sespie    expansion.  Also check the change in peak memory usage (NJAMD is a
2011*c87b03e5Sespie    good tool for this).  */
2012*c87b03e5Sespie #define MIN_BUFF_SIZE 8000
2013*c87b03e5Sespie #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2014*c87b03e5Sespie #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2015*c87b03e5Sespie 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2016*c87b03e5Sespie 
2017*c87b03e5Sespie #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2018*c87b03e5Sespie   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2019*c87b03e5Sespie #endif
2020*c87b03e5Sespie 
2021*c87b03e5Sespie /* Create a new allocation buffer.  Place the control block at the end
2022*c87b03e5Sespie    of the buffer, so that buffer overflows will cause immediate chaos.  */
2023*c87b03e5Sespie static _cpp_buff *
new_buff(len)2024*c87b03e5Sespie new_buff (len)
2025*c87b03e5Sespie      size_t len;
2026*c87b03e5Sespie {
2027*c87b03e5Sespie   _cpp_buff *result;
2028*c87b03e5Sespie   unsigned char *base;
2029*c87b03e5Sespie 
2030*c87b03e5Sespie   if (len < MIN_BUFF_SIZE)
2031*c87b03e5Sespie     len = MIN_BUFF_SIZE;
2032*c87b03e5Sespie   len = CPP_ALIGN (len);
2033*c87b03e5Sespie 
2034*c87b03e5Sespie   base = xmalloc (len + sizeof (_cpp_buff));
2035*c87b03e5Sespie   result = (_cpp_buff *) (base + len);
2036*c87b03e5Sespie   result->base = base;
2037*c87b03e5Sespie   result->cur = base;
2038*c87b03e5Sespie   result->limit = base + len;
2039*c87b03e5Sespie   result->next = NULL;
2040*c87b03e5Sespie   return result;
2041*c87b03e5Sespie }
2042*c87b03e5Sespie 
2043*c87b03e5Sespie /* Place a chain of unwanted allocation buffers on the free list.  */
2044*c87b03e5Sespie void
_cpp_release_buff(pfile,buff)2045*c87b03e5Sespie _cpp_release_buff (pfile, buff)
2046*c87b03e5Sespie      cpp_reader *pfile;
2047*c87b03e5Sespie      _cpp_buff *buff;
2048*c87b03e5Sespie {
2049*c87b03e5Sespie   _cpp_buff *end = buff;
2050*c87b03e5Sespie 
2051*c87b03e5Sespie   while (end->next)
2052*c87b03e5Sespie     end = end->next;
2053*c87b03e5Sespie   end->next = pfile->free_buffs;
2054*c87b03e5Sespie   pfile->free_buffs = buff;
2055*c87b03e5Sespie }
2056*c87b03e5Sespie 
2057*c87b03e5Sespie /* Return a free buffer of size at least MIN_SIZE.  */
2058*c87b03e5Sespie _cpp_buff *
_cpp_get_buff(pfile,min_size)2059*c87b03e5Sespie _cpp_get_buff (pfile, min_size)
2060*c87b03e5Sespie      cpp_reader *pfile;
2061*c87b03e5Sespie      size_t min_size;
2062*c87b03e5Sespie {
2063*c87b03e5Sespie   _cpp_buff *result, **p;
2064*c87b03e5Sespie 
2065*c87b03e5Sespie   for (p = &pfile->free_buffs;; p = &(*p)->next)
2066*c87b03e5Sespie     {
2067*c87b03e5Sespie       size_t size;
2068*c87b03e5Sespie 
2069*c87b03e5Sespie       if (*p == NULL)
2070*c87b03e5Sespie 	return new_buff (min_size);
2071*c87b03e5Sespie       result = *p;
2072*c87b03e5Sespie       size = result->limit - result->base;
2073*c87b03e5Sespie       /* Return a buffer that's big enough, but don't waste one that's
2074*c87b03e5Sespie          way too big.  */
2075*c87b03e5Sespie       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2076*c87b03e5Sespie 	break;
2077*c87b03e5Sespie     }
2078*c87b03e5Sespie 
2079*c87b03e5Sespie   *p = result->next;
2080*c87b03e5Sespie   result->next = NULL;
2081*c87b03e5Sespie   result->cur = result->base;
2082*c87b03e5Sespie   return result;
2083*c87b03e5Sespie }
2084*c87b03e5Sespie 
2085*c87b03e5Sespie /* Creates a new buffer with enough space to hold the uncommitted
2086*c87b03e5Sespie    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2087*c87b03e5Sespie    the excess bytes to the new buffer.  Chains the new buffer after
2088*c87b03e5Sespie    BUFF, and returns the new buffer.  */
2089*c87b03e5Sespie _cpp_buff *
_cpp_append_extend_buff(pfile,buff,min_extra)2090*c87b03e5Sespie _cpp_append_extend_buff (pfile, buff, min_extra)
2091*c87b03e5Sespie      cpp_reader *pfile;
2092*c87b03e5Sespie      _cpp_buff *buff;
2093*c87b03e5Sespie      size_t min_extra;
2094*c87b03e5Sespie {
2095*c87b03e5Sespie   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2096*c87b03e5Sespie   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2097*c87b03e5Sespie 
2098*c87b03e5Sespie   buff->next = new_buff;
2099*c87b03e5Sespie   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2100*c87b03e5Sespie   return new_buff;
2101*c87b03e5Sespie }
2102*c87b03e5Sespie 
2103*c87b03e5Sespie /* Creates a new buffer with enough space to hold the uncommitted
2104*c87b03e5Sespie    remaining bytes of the buffer pointed to by BUFF, and at least
2105*c87b03e5Sespie    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2106*c87b03e5Sespie    Chains the new buffer before the buffer pointed to by BUFF, and
2107*c87b03e5Sespie    updates the pointer to point to the new buffer.  */
2108*c87b03e5Sespie void
_cpp_extend_buff(pfile,pbuff,min_extra)2109*c87b03e5Sespie _cpp_extend_buff (pfile, pbuff, min_extra)
2110*c87b03e5Sespie      cpp_reader *pfile;
2111*c87b03e5Sespie      _cpp_buff **pbuff;
2112*c87b03e5Sespie      size_t min_extra;
2113*c87b03e5Sespie {
2114*c87b03e5Sespie   _cpp_buff *new_buff, *old_buff = *pbuff;
2115*c87b03e5Sespie   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2116*c87b03e5Sespie 
2117*c87b03e5Sespie   new_buff = _cpp_get_buff (pfile, size);
2118*c87b03e5Sespie   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2119*c87b03e5Sespie   new_buff->next = old_buff;
2120*c87b03e5Sespie   *pbuff = new_buff;
2121*c87b03e5Sespie }
2122*c87b03e5Sespie 
2123*c87b03e5Sespie /* Free a chain of buffers starting at BUFF.  */
2124*c87b03e5Sespie void
_cpp_free_buff(buff)2125*c87b03e5Sespie _cpp_free_buff (buff)
2126*c87b03e5Sespie      _cpp_buff *buff;
2127*c87b03e5Sespie {
2128*c87b03e5Sespie   _cpp_buff *next;
2129*c87b03e5Sespie 
2130*c87b03e5Sespie   for (; buff; buff = next)
2131*c87b03e5Sespie     {
2132*c87b03e5Sespie       next = buff->next;
2133*c87b03e5Sespie       free (buff->base);
2134*c87b03e5Sespie     }
2135*c87b03e5Sespie }
2136*c87b03e5Sespie 
2137*c87b03e5Sespie /* Allocate permanent, unaligned storage of length LEN.  */
2138*c87b03e5Sespie unsigned char *
_cpp_unaligned_alloc(pfile,len)2139*c87b03e5Sespie _cpp_unaligned_alloc (pfile, len)
2140*c87b03e5Sespie      cpp_reader *pfile;
2141*c87b03e5Sespie      size_t len;
2142*c87b03e5Sespie {
2143*c87b03e5Sespie   _cpp_buff *buff = pfile->u_buff;
2144*c87b03e5Sespie   unsigned char *result = buff->cur;
2145*c87b03e5Sespie 
2146*c87b03e5Sespie   if (len > (size_t) (buff->limit - result))
2147*c87b03e5Sespie     {
2148*c87b03e5Sespie       buff = _cpp_get_buff (pfile, len);
2149*c87b03e5Sespie       buff->next = pfile->u_buff;
2150*c87b03e5Sespie       pfile->u_buff = buff;
2151*c87b03e5Sespie       result = buff->cur;
2152*c87b03e5Sespie     }
2153*c87b03e5Sespie 
2154*c87b03e5Sespie   buff->cur = result + len;
2155*c87b03e5Sespie   return result;
2156*c87b03e5Sespie }
2157*c87b03e5Sespie 
2158*c87b03e5Sespie /* Allocate permanent, unaligned storage of length LEN from a_buff.
2159*c87b03e5Sespie    That buffer is used for growing allocations when saving macro
2160*c87b03e5Sespie    replacement lists in a #define, and when parsing an answer to an
2161*c87b03e5Sespie    assertion in #assert, #unassert or #if (and therefore possibly
2162*c87b03e5Sespie    whilst expanding macros).  It therefore must not be used by any
2163*c87b03e5Sespie    code that they might call: specifically the lexer and the guts of
2164*c87b03e5Sespie    the macro expander.
2165*c87b03e5Sespie 
2166*c87b03e5Sespie    All existing other uses clearly fit this restriction: storing
2167*c87b03e5Sespie    registered pragmas during initialization.  */
2168*c87b03e5Sespie unsigned char *
_cpp_aligned_alloc(pfile,len)2169*c87b03e5Sespie _cpp_aligned_alloc (pfile, len)
2170*c87b03e5Sespie      cpp_reader *pfile;
2171*c87b03e5Sespie      size_t len;
2172*c87b03e5Sespie {
2173*c87b03e5Sespie   _cpp_buff *buff = pfile->a_buff;
2174*c87b03e5Sespie   unsigned char *result = buff->cur;
2175*c87b03e5Sespie 
2176*c87b03e5Sespie   if (len > (size_t) (buff->limit - result))
2177*c87b03e5Sespie     {
2178*c87b03e5Sespie       buff = _cpp_get_buff (pfile, len);
2179*c87b03e5Sespie       buff->next = pfile->a_buff;
2180*c87b03e5Sespie       pfile->a_buff = buff;
2181*c87b03e5Sespie       result = buff->cur;
2182*c87b03e5Sespie     }
2183*c87b03e5Sespie 
2184*c87b03e5Sespie   buff->cur = result + len;
2185*c87b03e5Sespie   return result;
2186*c87b03e5Sespie }
2187