1*c87b03e5Sespie /* CPP Library - lexical analysis.
2*c87b03e5Sespie Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3*c87b03e5Sespie Contributed by Per Bothner, 1994-95.
4*c87b03e5Sespie Based on CCCP program by Paul Rubin, June 1986
5*c87b03e5Sespie Adapted to ANSI C, Richard Stallman, Jan 1987
6*c87b03e5Sespie Broken out to separate file, Zack Weinberg, Mar 2000
7*c87b03e5Sespie Single-pass line tokenization by Neil Booth, April 2000
8*c87b03e5Sespie
9*c87b03e5Sespie This program is free software; you can redistribute it and/or modify it
10*c87b03e5Sespie under the terms of the GNU General Public License as published by the
11*c87b03e5Sespie Free Software Foundation; either version 2, or (at your option) any
12*c87b03e5Sespie later version.
13*c87b03e5Sespie
14*c87b03e5Sespie This program is distributed in the hope that it will be useful,
15*c87b03e5Sespie but WITHOUT ANY WARRANTY; without even the implied warranty of
16*c87b03e5Sespie MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17*c87b03e5Sespie GNU General Public License for more details.
18*c87b03e5Sespie
19*c87b03e5Sespie You should have received a copy of the GNU General Public License
20*c87b03e5Sespie along with this program; if not, write to the Free Software
21*c87b03e5Sespie Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22*c87b03e5Sespie
23*c87b03e5Sespie #include "config.h"
24*c87b03e5Sespie #include "system.h"
25*c87b03e5Sespie #include "cpplib.h"
26*c87b03e5Sespie #include "cpphash.h"
27*c87b03e5Sespie
28*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
29*c87b03e5Sespie #include "mbchar.h"
30*c87b03e5Sespie #include <locale.h>
31*c87b03e5Sespie #endif
32*c87b03e5Sespie
33*c87b03e5Sespie /* Tokens with SPELL_STRING store their spelling in the token list,
34*c87b03e5Sespie and it's length in the token->val.name.len. */
35*c87b03e5Sespie enum spell_type
36*c87b03e5Sespie {
37*c87b03e5Sespie SPELL_OPERATOR = 0,
38*c87b03e5Sespie SPELL_CHAR,
39*c87b03e5Sespie SPELL_IDENT,
40*c87b03e5Sespie SPELL_NUMBER,
41*c87b03e5Sespie SPELL_STRING,
42*c87b03e5Sespie SPELL_NONE
43*c87b03e5Sespie };
44*c87b03e5Sespie
45*c87b03e5Sespie struct token_spelling
46*c87b03e5Sespie {
47*c87b03e5Sespie enum spell_type category;
48*c87b03e5Sespie const unsigned char *name;
49*c87b03e5Sespie };
50*c87b03e5Sespie
51*c87b03e5Sespie static const unsigned char *const digraph_spellings[] =
52*c87b03e5Sespie { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
53*c87b03e5Sespie
54*c87b03e5Sespie #define OP(e, s) { SPELL_OPERATOR, U s },
55*c87b03e5Sespie #define TK(e, s) { s, U STRINGX (e) },
56*c87b03e5Sespie static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
57*c87b03e5Sespie #undef OP
58*c87b03e5Sespie #undef TK
59*c87b03e5Sespie
60*c87b03e5Sespie #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
61*c87b03e5Sespie #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
62*c87b03e5Sespie #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
63*c87b03e5Sespie
64*c87b03e5Sespie static void handle_newline PARAMS ((cpp_reader *));
65*c87b03e5Sespie static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
66*c87b03e5Sespie static cppchar_t get_effective_char PARAMS ((cpp_reader *));
67*c87b03e5Sespie
68*c87b03e5Sespie static int skip_block_comment PARAMS ((cpp_reader *));
69*c87b03e5Sespie static int skip_line_comment PARAMS ((cpp_reader *));
70*c87b03e5Sespie static void adjust_column PARAMS ((cpp_reader *));
71*c87b03e5Sespie static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
72*c87b03e5Sespie static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
73*c87b03e5Sespie static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
74*c87b03e5Sespie unsigned int *));
75*c87b03e5Sespie static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
76*c87b03e5Sespie static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
77*c87b03e5Sespie static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
78*c87b03e5Sespie static bool trigraph_p PARAMS ((cpp_reader *));
79*c87b03e5Sespie static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
80*c87b03e5Sespie cppchar_t));
81*c87b03e5Sespie static bool continue_after_nul PARAMS ((cpp_reader *));
82*c87b03e5Sespie static int name_p PARAMS ((cpp_reader *, const cpp_string *));
83*c87b03e5Sespie static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
84*c87b03e5Sespie const unsigned char *, cppchar_t *));
85*c87b03e5Sespie static tokenrun *next_tokenrun PARAMS ((tokenrun *));
86*c87b03e5Sespie
87*c87b03e5Sespie static unsigned int hex_digit_value PARAMS ((unsigned int));
88*c87b03e5Sespie static _cpp_buff *new_buff PARAMS ((size_t));
89*c87b03e5Sespie
90*c87b03e5Sespie /* Utility routine:
91*c87b03e5Sespie
92*c87b03e5Sespie Compares, the token TOKEN to the NUL-terminated string STRING.
93*c87b03e5Sespie TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
94*c87b03e5Sespie int
cpp_ideq(token,string)95*c87b03e5Sespie cpp_ideq (token, string)
96*c87b03e5Sespie const cpp_token *token;
97*c87b03e5Sespie const char *string;
98*c87b03e5Sespie {
99*c87b03e5Sespie if (token->type != CPP_NAME)
100*c87b03e5Sespie return 0;
101*c87b03e5Sespie
102*c87b03e5Sespie return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
103*c87b03e5Sespie }
104*c87b03e5Sespie
105*c87b03e5Sespie /* Call when meeting a newline, assumed to be in buffer->cur[-1].
106*c87b03e5Sespie Returns with buffer->cur pointing to the character immediately
107*c87b03e5Sespie following the newline (combination). */
108*c87b03e5Sespie static void
handle_newline(pfile)109*c87b03e5Sespie handle_newline (pfile)
110*c87b03e5Sespie cpp_reader *pfile;
111*c87b03e5Sespie {
112*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
113*c87b03e5Sespie
114*c87b03e5Sespie /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
115*c87b03e5Sespie only accept CR-LF; maybe we should fall back to that behavior? */
116*c87b03e5Sespie if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
117*c87b03e5Sespie buffer->cur++;
118*c87b03e5Sespie
119*c87b03e5Sespie buffer->line_base = buffer->cur;
120*c87b03e5Sespie buffer->col_adjust = 0;
121*c87b03e5Sespie pfile->line++;
122*c87b03e5Sespie }
123*c87b03e5Sespie
124*c87b03e5Sespie /* Subroutine of skip_escaped_newlines; called when a 3-character
125*c87b03e5Sespie sequence beginning with "??" is encountered. buffer->cur points to
126*c87b03e5Sespie the second '?'.
127*c87b03e5Sespie
128*c87b03e5Sespie Warn if necessary, and returns true if the sequence forms a
129*c87b03e5Sespie trigraph and the trigraph should be honored. */
130*c87b03e5Sespie static bool
trigraph_p(pfile)131*c87b03e5Sespie trigraph_p (pfile)
132*c87b03e5Sespie cpp_reader *pfile;
133*c87b03e5Sespie {
134*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
135*c87b03e5Sespie cppchar_t from_char = buffer->cur[1];
136*c87b03e5Sespie bool accept;
137*c87b03e5Sespie
138*c87b03e5Sespie if (!_cpp_trigraph_map[from_char])
139*c87b03e5Sespie return false;
140*c87b03e5Sespie
141*c87b03e5Sespie accept = CPP_OPTION (pfile, trigraphs);
142*c87b03e5Sespie
143*c87b03e5Sespie /* Don't warn about trigraphs in comments. */
144*c87b03e5Sespie if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
145*c87b03e5Sespie {
146*c87b03e5Sespie if (accept)
147*c87b03e5Sespie cpp_error_with_line (pfile, DL_WARNING,
148*c87b03e5Sespie pfile->line, CPP_BUF_COL (buffer) - 1,
149*c87b03e5Sespie "trigraph ??%c converted to %c",
150*c87b03e5Sespie (int) from_char,
151*c87b03e5Sespie (int) _cpp_trigraph_map[from_char]);
152*c87b03e5Sespie else if (buffer->cur != buffer->last_Wtrigraphs)
153*c87b03e5Sespie {
154*c87b03e5Sespie buffer->last_Wtrigraphs = buffer->cur;
155*c87b03e5Sespie cpp_error_with_line (pfile, DL_WARNING,
156*c87b03e5Sespie pfile->line, CPP_BUF_COL (buffer) - 1,
157*c87b03e5Sespie "trigraph ??%c ignored", (int) from_char);
158*c87b03e5Sespie }
159*c87b03e5Sespie }
160*c87b03e5Sespie
161*c87b03e5Sespie return accept;
162*c87b03e5Sespie }
163*c87b03e5Sespie
164*c87b03e5Sespie /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
165*c87b03e5Sespie lie in buffer->cur[-1]. Returns the next byte, which will be in
166*c87b03e5Sespie buffer->cur[-1]. This routine performs preprocessing stages 1 and
167*c87b03e5Sespie 2 of the ISO C standard. */
168*c87b03e5Sespie static cppchar_t
skip_escaped_newlines(pfile)169*c87b03e5Sespie skip_escaped_newlines (pfile)
170*c87b03e5Sespie cpp_reader *pfile;
171*c87b03e5Sespie {
172*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
173*c87b03e5Sespie cppchar_t next = buffer->cur[-1];
174*c87b03e5Sespie
175*c87b03e5Sespie /* Only do this if we apply stages 1 and 2. */
176*c87b03e5Sespie if (!buffer->from_stage3)
177*c87b03e5Sespie {
178*c87b03e5Sespie const unsigned char *saved_cur;
179*c87b03e5Sespie cppchar_t next1;
180*c87b03e5Sespie
181*c87b03e5Sespie do
182*c87b03e5Sespie {
183*c87b03e5Sespie if (next == '?')
184*c87b03e5Sespie {
185*c87b03e5Sespie if (buffer->cur[0] != '?' || !trigraph_p (pfile))
186*c87b03e5Sespie break;
187*c87b03e5Sespie
188*c87b03e5Sespie /* Translate the trigraph. */
189*c87b03e5Sespie next = _cpp_trigraph_map[buffer->cur[1]];
190*c87b03e5Sespie buffer->cur += 2;
191*c87b03e5Sespie if (next != '\\')
192*c87b03e5Sespie break;
193*c87b03e5Sespie }
194*c87b03e5Sespie
195*c87b03e5Sespie if (buffer->cur == buffer->rlimit)
196*c87b03e5Sespie break;
197*c87b03e5Sespie
198*c87b03e5Sespie /* We have a backslash, and room for at least one more
199*c87b03e5Sespie character. Skip horizontal whitespace. */
200*c87b03e5Sespie saved_cur = buffer->cur;
201*c87b03e5Sespie do
202*c87b03e5Sespie next1 = *buffer->cur++;
203*c87b03e5Sespie while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
204*c87b03e5Sespie
205*c87b03e5Sespie if (!is_vspace (next1))
206*c87b03e5Sespie {
207*c87b03e5Sespie buffer->cur = saved_cur;
208*c87b03e5Sespie break;
209*c87b03e5Sespie }
210*c87b03e5Sespie
211*c87b03e5Sespie if (saved_cur != buffer->cur - 1
212*c87b03e5Sespie && !pfile->state.lexing_comment)
213*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
214*c87b03e5Sespie "backslash and newline separated by space");
215*c87b03e5Sespie
216*c87b03e5Sespie handle_newline (pfile);
217*c87b03e5Sespie buffer->backup_to = buffer->cur;
218*c87b03e5Sespie if (buffer->cur == buffer->rlimit)
219*c87b03e5Sespie {
220*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
221*c87b03e5Sespie "backslash-newline at end of file");
222*c87b03e5Sespie next = EOF;
223*c87b03e5Sespie }
224*c87b03e5Sespie else
225*c87b03e5Sespie next = *buffer->cur++;
226*c87b03e5Sespie }
227*c87b03e5Sespie while (next == '\\' || next == '?');
228*c87b03e5Sespie }
229*c87b03e5Sespie
230*c87b03e5Sespie return next;
231*c87b03e5Sespie }
232*c87b03e5Sespie
233*c87b03e5Sespie /* Obtain the next character, after trigraph conversion and skipping
234*c87b03e5Sespie an arbitrarily long string of escaped newlines. The common case of
235*c87b03e5Sespie no trigraphs or escaped newlines falls through quickly. On return,
236*c87b03e5Sespie buffer->backup_to points to where to return to if the character is
237*c87b03e5Sespie not to be processed. */
238*c87b03e5Sespie static cppchar_t
get_effective_char(pfile)239*c87b03e5Sespie get_effective_char (pfile)
240*c87b03e5Sespie cpp_reader *pfile;
241*c87b03e5Sespie {
242*c87b03e5Sespie cppchar_t next;
243*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
244*c87b03e5Sespie
245*c87b03e5Sespie buffer->backup_to = buffer->cur;
246*c87b03e5Sespie next = *buffer->cur++;
247*c87b03e5Sespie if (__builtin_expect (next == '?' || next == '\\', 0))
248*c87b03e5Sespie next = skip_escaped_newlines (pfile);
249*c87b03e5Sespie
250*c87b03e5Sespie return next;
251*c87b03e5Sespie }
252*c87b03e5Sespie
253*c87b03e5Sespie /* Skip a C-style block comment. We find the end of the comment by
254*c87b03e5Sespie seeing if an asterisk is before every '/' we encounter. Returns
255*c87b03e5Sespie nonzero if comment terminated by EOF, zero otherwise. */
256*c87b03e5Sespie static int
skip_block_comment(pfile)257*c87b03e5Sespie skip_block_comment (pfile)
258*c87b03e5Sespie cpp_reader *pfile;
259*c87b03e5Sespie {
260*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
261*c87b03e5Sespie cppchar_t c = EOF, prevc = EOF;
262*c87b03e5Sespie
263*c87b03e5Sespie pfile->state.lexing_comment = 1;
264*c87b03e5Sespie while (buffer->cur != buffer->rlimit)
265*c87b03e5Sespie {
266*c87b03e5Sespie prevc = c, c = *buffer->cur++;
267*c87b03e5Sespie
268*c87b03e5Sespie /* FIXME: For speed, create a new character class of characters
269*c87b03e5Sespie of interest inside block comments. */
270*c87b03e5Sespie if (c == '?' || c == '\\')
271*c87b03e5Sespie c = skip_escaped_newlines (pfile);
272*c87b03e5Sespie
273*c87b03e5Sespie /* People like decorating comments with '*', so check for '/'
274*c87b03e5Sespie instead for efficiency. */
275*c87b03e5Sespie if (c == '/')
276*c87b03e5Sespie {
277*c87b03e5Sespie if (prevc == '*')
278*c87b03e5Sespie break;
279*c87b03e5Sespie
280*c87b03e5Sespie /* Warn about potential nested comments, but not if the '/'
281*c87b03e5Sespie comes immediately before the true comment delimiter.
282*c87b03e5Sespie Don't bother to get it right across escaped newlines. */
283*c87b03e5Sespie if (CPP_OPTION (pfile, warn_comments)
284*c87b03e5Sespie && buffer->cur[0] == '*' && buffer->cur[1] != '/')
285*c87b03e5Sespie cpp_error_with_line (pfile, DL_WARNING,
286*c87b03e5Sespie pfile->line, CPP_BUF_COL (buffer),
287*c87b03e5Sespie "\"/*\" within comment");
288*c87b03e5Sespie }
289*c87b03e5Sespie else if (is_vspace (c))
290*c87b03e5Sespie handle_newline (pfile);
291*c87b03e5Sespie else if (c == '\t')
292*c87b03e5Sespie adjust_column (pfile);
293*c87b03e5Sespie }
294*c87b03e5Sespie
295*c87b03e5Sespie pfile->state.lexing_comment = 0;
296*c87b03e5Sespie return c != '/' || prevc != '*';
297*c87b03e5Sespie }
298*c87b03e5Sespie
299*c87b03e5Sespie /* Skip a C++ line comment, leaving buffer->cur pointing to the
300*c87b03e5Sespie terminating newline. Handles escaped newlines. Returns nonzero
301*c87b03e5Sespie if a multiline comment. */
302*c87b03e5Sespie static int
skip_line_comment(pfile)303*c87b03e5Sespie skip_line_comment (pfile)
304*c87b03e5Sespie cpp_reader *pfile;
305*c87b03e5Sespie {
306*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
307*c87b03e5Sespie unsigned int orig_line = pfile->line;
308*c87b03e5Sespie cppchar_t c;
309*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
310*c87b03e5Sespie wchar_t wc;
311*c87b03e5Sespie int char_len;
312*c87b03e5Sespie #endif
313*c87b03e5Sespie
314*c87b03e5Sespie pfile->state.lexing_comment = 1;
315*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
316*c87b03e5Sespie /* Reset multibyte conversion state. */
317*c87b03e5Sespie (void) local_mbtowc (NULL, NULL, 0);
318*c87b03e5Sespie #endif
319*c87b03e5Sespie do
320*c87b03e5Sespie {
321*c87b03e5Sespie if (buffer->cur == buffer->rlimit)
322*c87b03e5Sespie goto at_eof;
323*c87b03e5Sespie
324*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
325*c87b03e5Sespie char_len = local_mbtowc (&wc, (const char *) buffer->cur,
326*c87b03e5Sespie buffer->rlimit - buffer->cur);
327*c87b03e5Sespie if (char_len == -1)
328*c87b03e5Sespie {
329*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
330*c87b03e5Sespie "ignoring invalid multibyte character");
331*c87b03e5Sespie char_len = 1;
332*c87b03e5Sespie c = *buffer->cur++;
333*c87b03e5Sespie }
334*c87b03e5Sespie else
335*c87b03e5Sespie {
336*c87b03e5Sespie buffer->cur += char_len;
337*c87b03e5Sespie c = wc;
338*c87b03e5Sespie }
339*c87b03e5Sespie #else
340*c87b03e5Sespie c = *buffer->cur++;
341*c87b03e5Sespie #endif
342*c87b03e5Sespie if (c == '?' || c == '\\')
343*c87b03e5Sespie c = skip_escaped_newlines (pfile);
344*c87b03e5Sespie }
345*c87b03e5Sespie while (!is_vspace (c));
346*c87b03e5Sespie
347*c87b03e5Sespie /* Step back over the newline, except at EOF. */
348*c87b03e5Sespie buffer->cur--;
349*c87b03e5Sespie at_eof:
350*c87b03e5Sespie
351*c87b03e5Sespie pfile->state.lexing_comment = 0;
352*c87b03e5Sespie return orig_line != pfile->line;
353*c87b03e5Sespie }
354*c87b03e5Sespie
355*c87b03e5Sespie /* pfile->buffer->cur is one beyond the \t character. Update
356*c87b03e5Sespie col_adjust so we track the column correctly. */
357*c87b03e5Sespie static void
adjust_column(pfile)358*c87b03e5Sespie adjust_column (pfile)
359*c87b03e5Sespie cpp_reader *pfile;
360*c87b03e5Sespie {
361*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
362*c87b03e5Sespie unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
363*c87b03e5Sespie
364*c87b03e5Sespie /* Round it up to multiple of the tabstop, but subtract 1 since the
365*c87b03e5Sespie tab itself occupies a character position. */
366*c87b03e5Sespie buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
367*c87b03e5Sespie - col % CPP_OPTION (pfile, tabstop)) - 1;
368*c87b03e5Sespie }
369*c87b03e5Sespie
370*c87b03e5Sespie /* Skips whitespace, saving the next non-whitespace character.
371*c87b03e5Sespie Adjusts pfile->col_adjust to account for tabs. Without this,
372*c87b03e5Sespie tokens might be assigned an incorrect column. */
373*c87b03e5Sespie static int
skip_whitespace(pfile,c)374*c87b03e5Sespie skip_whitespace (pfile, c)
375*c87b03e5Sespie cpp_reader *pfile;
376*c87b03e5Sespie cppchar_t c;
377*c87b03e5Sespie {
378*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
379*c87b03e5Sespie unsigned int warned = 0;
380*c87b03e5Sespie
381*c87b03e5Sespie do
382*c87b03e5Sespie {
383*c87b03e5Sespie /* Horizontal space always OK. */
384*c87b03e5Sespie if (c == ' ')
385*c87b03e5Sespie ;
386*c87b03e5Sespie else if (c == '\t')
387*c87b03e5Sespie adjust_column (pfile);
388*c87b03e5Sespie /* Just \f \v or \0 left. */
389*c87b03e5Sespie else if (c == '\0')
390*c87b03e5Sespie {
391*c87b03e5Sespie if (buffer->cur - 1 == buffer->rlimit)
392*c87b03e5Sespie return 0;
393*c87b03e5Sespie if (!warned)
394*c87b03e5Sespie {
395*c87b03e5Sespie cpp_error (pfile, DL_WARNING, "null character(s) ignored");
396*c87b03e5Sespie warned = 1;
397*c87b03e5Sespie }
398*c87b03e5Sespie }
399*c87b03e5Sespie else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400*c87b03e5Sespie cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
401*c87b03e5Sespie CPP_BUF_COL (buffer),
402*c87b03e5Sespie "%s in preprocessing directive",
403*c87b03e5Sespie c == '\f' ? "form feed" : "vertical tab");
404*c87b03e5Sespie
405*c87b03e5Sespie c = *buffer->cur++;
406*c87b03e5Sespie }
407*c87b03e5Sespie /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
408*c87b03e5Sespie while (is_nvspace (c));
409*c87b03e5Sespie
410*c87b03e5Sespie buffer->cur--;
411*c87b03e5Sespie return 1;
412*c87b03e5Sespie }
413*c87b03e5Sespie
414*c87b03e5Sespie /* See if the characters of a number token are valid in a name (no
415*c87b03e5Sespie '.', '+' or '-'). */
416*c87b03e5Sespie static int
name_p(pfile,string)417*c87b03e5Sespie name_p (pfile, string)
418*c87b03e5Sespie cpp_reader *pfile;
419*c87b03e5Sespie const cpp_string *string;
420*c87b03e5Sespie {
421*c87b03e5Sespie unsigned int i;
422*c87b03e5Sespie
423*c87b03e5Sespie for (i = 0; i < string->len; i++)
424*c87b03e5Sespie if (!is_idchar (string->text[i]))
425*c87b03e5Sespie return 0;
426*c87b03e5Sespie
427*c87b03e5Sespie return 1;
428*c87b03e5Sespie }
429*c87b03e5Sespie
430*c87b03e5Sespie /* Parse an identifier, skipping embedded backslash-newlines. This is
431*c87b03e5Sespie a critical inner loop. The common case is an identifier which has
432*c87b03e5Sespie not been split by backslash-newline, does not contain a dollar
433*c87b03e5Sespie sign, and has already been scanned (roughly 10:1 ratio of
434*c87b03e5Sespie seen:unseen identifiers in normal code; the distribution is
435*c87b03e5Sespie Poisson-like). Second most common case is a new identifier, not
436*c87b03e5Sespie split and no dollar sign. The other possibilities are rare and
437*c87b03e5Sespie have been relegated to parse_slow. */
438*c87b03e5Sespie static cpp_hashnode *
parse_identifier(pfile)439*c87b03e5Sespie parse_identifier (pfile)
440*c87b03e5Sespie cpp_reader *pfile;
441*c87b03e5Sespie {
442*c87b03e5Sespie cpp_hashnode *result;
443*c87b03e5Sespie const uchar *cur, *base;
444*c87b03e5Sespie
445*c87b03e5Sespie /* Fast-path loop. Skim over a normal identifier.
446*c87b03e5Sespie N.B. ISIDNUM does not include $. */
447*c87b03e5Sespie cur = pfile->buffer->cur;
448*c87b03e5Sespie while (ISIDNUM (*cur))
449*c87b03e5Sespie cur++;
450*c87b03e5Sespie
451*c87b03e5Sespie /* Check for slow-path cases. */
452*c87b03e5Sespie if (*cur == '?' || *cur == '\\' || *cur == '$')
453*c87b03e5Sespie {
454*c87b03e5Sespie unsigned int len;
455*c87b03e5Sespie
456*c87b03e5Sespie base = parse_slow (pfile, cur, 0, &len);
457*c87b03e5Sespie result = (cpp_hashnode *)
458*c87b03e5Sespie ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
459*c87b03e5Sespie }
460*c87b03e5Sespie else
461*c87b03e5Sespie {
462*c87b03e5Sespie base = pfile->buffer->cur - 1;
463*c87b03e5Sespie pfile->buffer->cur = cur;
464*c87b03e5Sespie result = (cpp_hashnode *)
465*c87b03e5Sespie ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
466*c87b03e5Sespie }
467*c87b03e5Sespie
468*c87b03e5Sespie /* Rarely, identifiers require diagnostics when lexed.
469*c87b03e5Sespie XXX Has to be forced out of the fast path. */
470*c87b03e5Sespie if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
471*c87b03e5Sespie && !pfile->state.skipping, 0))
472*c87b03e5Sespie {
473*c87b03e5Sespie /* It is allowed to poison the same identifier twice. */
474*c87b03e5Sespie if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
475*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
476*c87b03e5Sespie NODE_NAME (result));
477*c87b03e5Sespie
478*c87b03e5Sespie /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
479*c87b03e5Sespie replacement list of a variadic macro. */
480*c87b03e5Sespie if (result == pfile->spec_nodes.n__VA_ARGS__
481*c87b03e5Sespie && !pfile->state.va_args_ok)
482*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
483*c87b03e5Sespie "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
484*c87b03e5Sespie }
485*c87b03e5Sespie
486*c87b03e5Sespie return result;
487*c87b03e5Sespie }
488*c87b03e5Sespie
489*c87b03e5Sespie /* Slow path. This handles numbers and identifiers which have been
490*c87b03e5Sespie split, or contain dollar signs. The part of the token from
491*c87b03e5Sespie PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
492*c87b03e5Sespie 1 if it's a number, and 2 if it has a leading period. Returns a
493*c87b03e5Sespie pointer to the token's NUL-terminated spelling in permanent
494*c87b03e5Sespie storage, and sets PLEN to its length. */
495*c87b03e5Sespie static uchar *
parse_slow(pfile,cur,number_p,plen)496*c87b03e5Sespie parse_slow (pfile, cur, number_p, plen)
497*c87b03e5Sespie cpp_reader *pfile;
498*c87b03e5Sespie const uchar *cur;
499*c87b03e5Sespie int number_p;
500*c87b03e5Sespie unsigned int *plen;
501*c87b03e5Sespie {
502*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
503*c87b03e5Sespie const uchar *base = buffer->cur - 1;
504*c87b03e5Sespie struct obstack *stack = &pfile->hash_table->stack;
505*c87b03e5Sespie unsigned int c, prevc, saw_dollar = 0;
506*c87b03e5Sespie
507*c87b03e5Sespie /* Place any leading period. */
508*c87b03e5Sespie if (number_p == 2)
509*c87b03e5Sespie obstack_1grow (stack, '.');
510*c87b03e5Sespie
511*c87b03e5Sespie /* Copy the part of the token which is known to be okay. */
512*c87b03e5Sespie obstack_grow (stack, base, cur - base);
513*c87b03e5Sespie
514*c87b03e5Sespie /* Now process the part which isn't. We are looking at one of
515*c87b03e5Sespie '$', '\\', or '?' on entry to this loop. */
516*c87b03e5Sespie prevc = cur[-1];
517*c87b03e5Sespie c = *cur++;
518*c87b03e5Sespie buffer->cur = cur;
519*c87b03e5Sespie for (;;)
520*c87b03e5Sespie {
521*c87b03e5Sespie /* Potential escaped newline? */
522*c87b03e5Sespie buffer->backup_to = buffer->cur - 1;
523*c87b03e5Sespie if (c == '?' || c == '\\')
524*c87b03e5Sespie c = skip_escaped_newlines (pfile);
525*c87b03e5Sespie
526*c87b03e5Sespie if (!is_idchar (c))
527*c87b03e5Sespie {
528*c87b03e5Sespie if (!number_p)
529*c87b03e5Sespie break;
530*c87b03e5Sespie if (c != '.' && !VALID_SIGN (c, prevc))
531*c87b03e5Sespie break;
532*c87b03e5Sespie }
533*c87b03e5Sespie
534*c87b03e5Sespie /* Handle normal identifier characters in this loop. */
535*c87b03e5Sespie do
536*c87b03e5Sespie {
537*c87b03e5Sespie prevc = c;
538*c87b03e5Sespie obstack_1grow (stack, c);
539*c87b03e5Sespie
540*c87b03e5Sespie if (c == '$')
541*c87b03e5Sespie saw_dollar++;
542*c87b03e5Sespie
543*c87b03e5Sespie c = *buffer->cur++;
544*c87b03e5Sespie }
545*c87b03e5Sespie while (is_idchar (c));
546*c87b03e5Sespie }
547*c87b03e5Sespie
548*c87b03e5Sespie /* Step back over the unwanted char. */
549*c87b03e5Sespie BACKUP ();
550*c87b03e5Sespie
551*c87b03e5Sespie /* $ is not an identifier character in the standard, but is commonly
552*c87b03e5Sespie accepted as an extension. Don't warn about it in skipped
553*c87b03e5Sespie conditional blocks. */
554*c87b03e5Sespie if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
555*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
556*c87b03e5Sespie
557*c87b03e5Sespie /* Identifiers and numbers are null-terminated. */
558*c87b03e5Sespie *plen = obstack_object_size (stack);
559*c87b03e5Sespie obstack_1grow (stack, '\0');
560*c87b03e5Sespie return obstack_finish (stack);
561*c87b03e5Sespie }
562*c87b03e5Sespie
563*c87b03e5Sespie /* Parse a number, beginning with character C, skipping embedded
564*c87b03e5Sespie backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
565*c87b03e5Sespie before C. Place the result in NUMBER. */
566*c87b03e5Sespie static void
parse_number(pfile,number,leading_period)567*c87b03e5Sespie parse_number (pfile, number, leading_period)
568*c87b03e5Sespie cpp_reader *pfile;
569*c87b03e5Sespie cpp_string *number;
570*c87b03e5Sespie int leading_period;
571*c87b03e5Sespie {
572*c87b03e5Sespie const uchar *cur;
573*c87b03e5Sespie
574*c87b03e5Sespie /* Fast-path loop. Skim over a normal number.
575*c87b03e5Sespie N.B. ISIDNUM does not include $. */
576*c87b03e5Sespie cur = pfile->buffer->cur;
577*c87b03e5Sespie while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
578*c87b03e5Sespie cur++;
579*c87b03e5Sespie
580*c87b03e5Sespie /* Check for slow-path cases. */
581*c87b03e5Sespie if (*cur == '?' || *cur == '\\' || *cur == '$')
582*c87b03e5Sespie number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
583*c87b03e5Sespie else
584*c87b03e5Sespie {
585*c87b03e5Sespie const uchar *base = pfile->buffer->cur - 1;
586*c87b03e5Sespie uchar *dest;
587*c87b03e5Sespie
588*c87b03e5Sespie number->len = cur - base + leading_period;
589*c87b03e5Sespie dest = _cpp_unaligned_alloc (pfile, number->len + 1);
590*c87b03e5Sespie dest[number->len] = '\0';
591*c87b03e5Sespie number->text = dest;
592*c87b03e5Sespie
593*c87b03e5Sespie if (leading_period)
594*c87b03e5Sespie *dest++ = '.';
595*c87b03e5Sespie memcpy (dest, base, cur - base);
596*c87b03e5Sespie pfile->buffer->cur = cur;
597*c87b03e5Sespie }
598*c87b03e5Sespie }
599*c87b03e5Sespie
600*c87b03e5Sespie /* Subroutine of parse_string. */
601*c87b03e5Sespie static int
unescaped_terminator_p(pfile,dest)602*c87b03e5Sespie unescaped_terminator_p (pfile, dest)
603*c87b03e5Sespie cpp_reader *pfile;
604*c87b03e5Sespie const unsigned char *dest;
605*c87b03e5Sespie {
606*c87b03e5Sespie const unsigned char *start, *temp;
607*c87b03e5Sespie
608*c87b03e5Sespie /* In #include-style directives, terminators are not escapeable. */
609*c87b03e5Sespie if (pfile->state.angled_headers)
610*c87b03e5Sespie return 1;
611*c87b03e5Sespie
612*c87b03e5Sespie start = BUFF_FRONT (pfile->u_buff);
613*c87b03e5Sespie
614*c87b03e5Sespie /* An odd number of consecutive backslashes represents an escaped
615*c87b03e5Sespie terminator. */
616*c87b03e5Sespie for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
617*c87b03e5Sespie ;
618*c87b03e5Sespie
619*c87b03e5Sespie return ((dest - temp) & 1) == 0;
620*c87b03e5Sespie }
621*c87b03e5Sespie
622*c87b03e5Sespie /* Parses a string, character constant, or angle-bracketed header file
623*c87b03e5Sespie name. Handles embedded trigraphs and escaped newlines. The stored
624*c87b03e5Sespie string is guaranteed NUL-terminated, but it is not guaranteed that
625*c87b03e5Sespie this is the first NUL since embedded NULs are preserved.
626*c87b03e5Sespie
627*c87b03e5Sespie When this function returns, buffer->cur points to the next
628*c87b03e5Sespie character to be processed. */
629*c87b03e5Sespie static void
parse_string(pfile,token,terminator)630*c87b03e5Sespie parse_string (pfile, token, terminator)
631*c87b03e5Sespie cpp_reader *pfile;
632*c87b03e5Sespie cpp_token *token;
633*c87b03e5Sespie cppchar_t terminator;
634*c87b03e5Sespie {
635*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
636*c87b03e5Sespie unsigned char *dest, *limit;
637*c87b03e5Sespie cppchar_t c;
638*c87b03e5Sespie bool warned_nulls = false;
639*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
640*c87b03e5Sespie wchar_t wc;
641*c87b03e5Sespie int char_len;
642*c87b03e5Sespie #endif
643*c87b03e5Sespie
644*c87b03e5Sespie dest = BUFF_FRONT (pfile->u_buff);
645*c87b03e5Sespie limit = BUFF_LIMIT (pfile->u_buff);
646*c87b03e5Sespie
647*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
648*c87b03e5Sespie /* Reset multibyte conversion state. */
649*c87b03e5Sespie (void) local_mbtowc (NULL, NULL, 0);
650*c87b03e5Sespie #endif
651*c87b03e5Sespie for (;;)
652*c87b03e5Sespie {
653*c87b03e5Sespie /* We need room for another char, possibly the terminating NUL. */
654*c87b03e5Sespie if ((size_t) (limit - dest) < 1)
655*c87b03e5Sespie {
656*c87b03e5Sespie size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
657*c87b03e5Sespie _cpp_extend_buff (pfile, &pfile->u_buff, 2);
658*c87b03e5Sespie dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
659*c87b03e5Sespie limit = BUFF_LIMIT (pfile->u_buff);
660*c87b03e5Sespie }
661*c87b03e5Sespie
662*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
663*c87b03e5Sespie char_len = local_mbtowc (&wc, (const char *) buffer->cur,
664*c87b03e5Sespie buffer->rlimit - buffer->cur);
665*c87b03e5Sespie if (char_len == -1)
666*c87b03e5Sespie {
667*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
668*c87b03e5Sespie "ignoring invalid multibyte character");
669*c87b03e5Sespie char_len = 1;
670*c87b03e5Sespie c = *buffer->cur++;
671*c87b03e5Sespie }
672*c87b03e5Sespie else
673*c87b03e5Sespie {
674*c87b03e5Sespie buffer->cur += char_len;
675*c87b03e5Sespie c = wc;
676*c87b03e5Sespie }
677*c87b03e5Sespie #else
678*c87b03e5Sespie c = *buffer->cur++;
679*c87b03e5Sespie #endif
680*c87b03e5Sespie
681*c87b03e5Sespie /* Handle trigraphs, escaped newlines etc. */
682*c87b03e5Sespie if (c == '?' || c == '\\')
683*c87b03e5Sespie c = skip_escaped_newlines (pfile);
684*c87b03e5Sespie
685*c87b03e5Sespie if (c == terminator)
686*c87b03e5Sespie {
687*c87b03e5Sespie if (unescaped_terminator_p (pfile, dest))
688*c87b03e5Sespie break;
689*c87b03e5Sespie }
690*c87b03e5Sespie else if (is_vspace (c))
691*c87b03e5Sespie {
692*c87b03e5Sespie /* No string literal may extend over multiple lines. In
693*c87b03e5Sespie assembly language, suppress the error except for <>
694*c87b03e5Sespie includes. This is a kludge around not knowing where
695*c87b03e5Sespie comments are. */
696*c87b03e5Sespie unterminated:
697*c87b03e5Sespie if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
698*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "missing terminating %c character",
699*c87b03e5Sespie (int) terminator);
700*c87b03e5Sespie buffer->cur--;
701*c87b03e5Sespie break;
702*c87b03e5Sespie }
703*c87b03e5Sespie else if (c == '\0')
704*c87b03e5Sespie {
705*c87b03e5Sespie if (buffer->cur - 1 == buffer->rlimit)
706*c87b03e5Sespie goto unterminated;
707*c87b03e5Sespie if (!warned_nulls)
708*c87b03e5Sespie {
709*c87b03e5Sespie warned_nulls = true;
710*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
711*c87b03e5Sespie "null character(s) preserved in literal");
712*c87b03e5Sespie }
713*c87b03e5Sespie }
714*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
715*c87b03e5Sespie if (char_len > 1)
716*c87b03e5Sespie {
717*c87b03e5Sespie for ( ; char_len > 0; --char_len)
718*c87b03e5Sespie *dest++ = (*buffer->cur - char_len);
719*c87b03e5Sespie }
720*c87b03e5Sespie else
721*c87b03e5Sespie #endif
722*c87b03e5Sespie *dest++ = c;
723*c87b03e5Sespie }
724*c87b03e5Sespie
725*c87b03e5Sespie *dest = '\0';
726*c87b03e5Sespie
727*c87b03e5Sespie token->val.str.text = BUFF_FRONT (pfile->u_buff);
728*c87b03e5Sespie token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
729*c87b03e5Sespie BUFF_FRONT (pfile->u_buff) = dest + 1;
730*c87b03e5Sespie }
731*c87b03e5Sespie
732*c87b03e5Sespie /* The stored comment includes the comment start and any terminator. */
733*c87b03e5Sespie static void
save_comment(pfile,token,from,type)734*c87b03e5Sespie save_comment (pfile, token, from, type)
735*c87b03e5Sespie cpp_reader *pfile;
736*c87b03e5Sespie cpp_token *token;
737*c87b03e5Sespie const unsigned char *from;
738*c87b03e5Sespie cppchar_t type;
739*c87b03e5Sespie {
740*c87b03e5Sespie unsigned char *buffer;
741*c87b03e5Sespie unsigned int len, clen;
742*c87b03e5Sespie
743*c87b03e5Sespie len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
744*c87b03e5Sespie
745*c87b03e5Sespie /* C++ comments probably (not definitely) have moved past a new
746*c87b03e5Sespie line, which we don't want to save in the comment. */
747*c87b03e5Sespie if (is_vspace (pfile->buffer->cur[-1]))
748*c87b03e5Sespie len--;
749*c87b03e5Sespie
750*c87b03e5Sespie /* If we are currently in a directive, then we need to store all
751*c87b03e5Sespie C++ comments as C comments internally, and so we need to
752*c87b03e5Sespie allocate a little extra space in that case.
753*c87b03e5Sespie
754*c87b03e5Sespie Note that the only time we encounter a directive here is
755*c87b03e5Sespie when we are saving comments in a "#define". */
756*c87b03e5Sespie clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
757*c87b03e5Sespie
758*c87b03e5Sespie buffer = _cpp_unaligned_alloc (pfile, clen);
759*c87b03e5Sespie
760*c87b03e5Sespie token->type = CPP_COMMENT;
761*c87b03e5Sespie token->val.str.len = clen;
762*c87b03e5Sespie token->val.str.text = buffer;
763*c87b03e5Sespie
764*c87b03e5Sespie buffer[0] = '/';
765*c87b03e5Sespie memcpy (buffer + 1, from, len - 1);
766*c87b03e5Sespie
767*c87b03e5Sespie /* Finish conversion to a C comment, if necessary. */
768*c87b03e5Sespie if (pfile->state.in_directive && type == '/')
769*c87b03e5Sespie {
770*c87b03e5Sespie buffer[1] = '*';
771*c87b03e5Sespie buffer[clen - 2] = '*';
772*c87b03e5Sespie buffer[clen - 1] = '/';
773*c87b03e5Sespie }
774*c87b03e5Sespie }
775*c87b03e5Sespie
776*c87b03e5Sespie /* Allocate COUNT tokens for RUN. */
777*c87b03e5Sespie void
_cpp_init_tokenrun(run,count)778*c87b03e5Sespie _cpp_init_tokenrun (run, count)
779*c87b03e5Sespie tokenrun *run;
780*c87b03e5Sespie unsigned int count;
781*c87b03e5Sespie {
782*c87b03e5Sespie run->base = xnewvec (cpp_token, count);
783*c87b03e5Sespie run->limit = run->base + count;
784*c87b03e5Sespie run->next = NULL;
785*c87b03e5Sespie }
786*c87b03e5Sespie
787*c87b03e5Sespie /* Returns the next tokenrun, or creates one if there is none. */
788*c87b03e5Sespie static tokenrun *
next_tokenrun(run)789*c87b03e5Sespie next_tokenrun (run)
790*c87b03e5Sespie tokenrun *run;
791*c87b03e5Sespie {
792*c87b03e5Sespie if (run->next == NULL)
793*c87b03e5Sespie {
794*c87b03e5Sespie run->next = xnew (tokenrun);
795*c87b03e5Sespie run->next->prev = run;
796*c87b03e5Sespie _cpp_init_tokenrun (run->next, 250);
797*c87b03e5Sespie }
798*c87b03e5Sespie
799*c87b03e5Sespie return run->next;
800*c87b03e5Sespie }
801*c87b03e5Sespie
802*c87b03e5Sespie /* Allocate a single token that is invalidated at the same time as the
803*c87b03e5Sespie rest of the tokens on the line. Has its line and col set to the
804*c87b03e5Sespie same as the last lexed token, so that diagnostics appear in the
805*c87b03e5Sespie right place. */
806*c87b03e5Sespie cpp_token *
_cpp_temp_token(pfile)807*c87b03e5Sespie _cpp_temp_token (pfile)
808*c87b03e5Sespie cpp_reader *pfile;
809*c87b03e5Sespie {
810*c87b03e5Sespie cpp_token *old, *result;
811*c87b03e5Sespie
812*c87b03e5Sespie old = pfile->cur_token - 1;
813*c87b03e5Sespie if (pfile->cur_token == pfile->cur_run->limit)
814*c87b03e5Sespie {
815*c87b03e5Sespie pfile->cur_run = next_tokenrun (pfile->cur_run);
816*c87b03e5Sespie pfile->cur_token = pfile->cur_run->base;
817*c87b03e5Sespie }
818*c87b03e5Sespie
819*c87b03e5Sespie result = pfile->cur_token++;
820*c87b03e5Sespie result->line = old->line;
821*c87b03e5Sespie result->col = old->col;
822*c87b03e5Sespie return result;
823*c87b03e5Sespie }
824*c87b03e5Sespie
825*c87b03e5Sespie /* Lex a token into RESULT (external interface). Takes care of issues
826*c87b03e5Sespie like directive handling, token lookahead, multiple include
827*c87b03e5Sespie optimization and skipping. */
828*c87b03e5Sespie const cpp_token *
_cpp_lex_token(pfile)829*c87b03e5Sespie _cpp_lex_token (pfile)
830*c87b03e5Sespie cpp_reader *pfile;
831*c87b03e5Sespie {
832*c87b03e5Sespie cpp_token *result;
833*c87b03e5Sespie
834*c87b03e5Sespie for (;;)
835*c87b03e5Sespie {
836*c87b03e5Sespie if (pfile->cur_token == pfile->cur_run->limit)
837*c87b03e5Sespie {
838*c87b03e5Sespie pfile->cur_run = next_tokenrun (pfile->cur_run);
839*c87b03e5Sespie pfile->cur_token = pfile->cur_run->base;
840*c87b03e5Sespie }
841*c87b03e5Sespie
842*c87b03e5Sespie if (pfile->lookaheads)
843*c87b03e5Sespie {
844*c87b03e5Sespie pfile->lookaheads--;
845*c87b03e5Sespie result = pfile->cur_token++;
846*c87b03e5Sespie }
847*c87b03e5Sespie else
848*c87b03e5Sespie result = _cpp_lex_direct (pfile);
849*c87b03e5Sespie
850*c87b03e5Sespie if (result->flags & BOL)
851*c87b03e5Sespie {
852*c87b03e5Sespie /* Is this a directive. If _cpp_handle_directive returns
853*c87b03e5Sespie false, it is an assembler #. */
854*c87b03e5Sespie if (result->type == CPP_HASH
855*c87b03e5Sespie /* 6.10.3 p 11: Directives in a list of macro arguments
856*c87b03e5Sespie gives undefined behavior. This implementation
857*c87b03e5Sespie handles the directive as normal. */
858*c87b03e5Sespie && pfile->state.parsing_args != 1
859*c87b03e5Sespie && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
860*c87b03e5Sespie continue;
861*c87b03e5Sespie if (pfile->cb.line_change && !pfile->state.skipping)
862*c87b03e5Sespie (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
863*c87b03e5Sespie }
864*c87b03e5Sespie
865*c87b03e5Sespie /* We don't skip tokens in directives. */
866*c87b03e5Sespie if (pfile->state.in_directive)
867*c87b03e5Sespie break;
868*c87b03e5Sespie
869*c87b03e5Sespie /* Outside a directive, invalidate controlling macros. At file
870*c87b03e5Sespie EOF, _cpp_lex_direct takes care of popping the buffer, so we never
871*c87b03e5Sespie get here and MI optimisation works. */
872*c87b03e5Sespie pfile->mi_valid = false;
873*c87b03e5Sespie
874*c87b03e5Sespie if (!pfile->state.skipping || result->type == CPP_EOF)
875*c87b03e5Sespie break;
876*c87b03e5Sespie }
877*c87b03e5Sespie
878*c87b03e5Sespie return result;
879*c87b03e5Sespie }
880*c87b03e5Sespie
881*c87b03e5Sespie /* A NUL terminates the current buffer. For ISO preprocessing this is
882*c87b03e5Sespie EOF, but for traditional preprocessing it indicates we need a line
883*c87b03e5Sespie refill. Returns TRUE to continue preprocessing a new buffer, FALSE
884*c87b03e5Sespie to return a CPP_EOF to the caller. */
885*c87b03e5Sespie static bool
continue_after_nul(pfile)886*c87b03e5Sespie continue_after_nul (pfile)
887*c87b03e5Sespie cpp_reader *pfile;
888*c87b03e5Sespie {
889*c87b03e5Sespie cpp_buffer *buffer = pfile->buffer;
890*c87b03e5Sespie bool more = false;
891*c87b03e5Sespie
892*c87b03e5Sespie buffer->saved_flags = BOL;
893*c87b03e5Sespie if (CPP_OPTION (pfile, traditional))
894*c87b03e5Sespie {
895*c87b03e5Sespie if (pfile->state.in_directive)
896*c87b03e5Sespie return false;
897*c87b03e5Sespie
898*c87b03e5Sespie _cpp_remove_overlay (pfile);
899*c87b03e5Sespie more = _cpp_read_logical_line_trad (pfile);
900*c87b03e5Sespie _cpp_overlay_buffer (pfile, pfile->out.base,
901*c87b03e5Sespie pfile->out.cur - pfile->out.base);
902*c87b03e5Sespie pfile->line = pfile->out.first_line;
903*c87b03e5Sespie }
904*c87b03e5Sespie else
905*c87b03e5Sespie {
906*c87b03e5Sespie /* Stop parsing arguments with a CPP_EOF. When we finally come
907*c87b03e5Sespie back here, do the work of popping the buffer. */
908*c87b03e5Sespie if (!pfile->state.parsing_args)
909*c87b03e5Sespie {
910*c87b03e5Sespie if (buffer->cur != buffer->line_base)
911*c87b03e5Sespie {
912*c87b03e5Sespie /* Non-empty files should end in a newline. Don't warn
913*c87b03e5Sespie for command line and _Pragma buffers. */
914*c87b03e5Sespie handle_newline (pfile);
915*c87b03e5Sespie }
916*c87b03e5Sespie
917*c87b03e5Sespie /* Similarly, finish an in-progress directive with CPP_EOF
918*c87b03e5Sespie before popping the buffer. */
919*c87b03e5Sespie if (!pfile->state.in_directive && buffer->prev)
920*c87b03e5Sespie {
921*c87b03e5Sespie more = !buffer->return_at_eof;
922*c87b03e5Sespie _cpp_pop_buffer (pfile);
923*c87b03e5Sespie }
924*c87b03e5Sespie }
925*c87b03e5Sespie }
926*c87b03e5Sespie
927*c87b03e5Sespie return more;
928*c87b03e5Sespie }
929*c87b03e5Sespie
930*c87b03e5Sespie #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
931*c87b03e5Sespie do { \
932*c87b03e5Sespie if (get_effective_char (pfile) == CHAR) \
933*c87b03e5Sespie result->type = THEN_TYPE; \
934*c87b03e5Sespie else \
935*c87b03e5Sespie { \
936*c87b03e5Sespie BACKUP (); \
937*c87b03e5Sespie result->type = ELSE_TYPE; \
938*c87b03e5Sespie } \
939*c87b03e5Sespie } while (0)
940*c87b03e5Sespie
941*c87b03e5Sespie /* Lex a token into pfile->cur_token, which is also incremented, to
942*c87b03e5Sespie get diagnostics pointing to the correct location.
943*c87b03e5Sespie
944*c87b03e5Sespie Does not handle issues such as token lookahead, multiple-include
945*c87b03e5Sespie optimisation, directives, skipping etc. This function is only
946*c87b03e5Sespie suitable for use by _cpp_lex_token, and in special cases like
947*c87b03e5Sespie lex_expansion_token which doesn't care for any of these issues.
948*c87b03e5Sespie
949*c87b03e5Sespie When meeting a newline, returns CPP_EOF if parsing a directive,
950*c87b03e5Sespie otherwise returns to the start of the token buffer if permissible.
951*c87b03e5Sespie Returns the location of the lexed token. */
952*c87b03e5Sespie cpp_token *
_cpp_lex_direct(pfile)953*c87b03e5Sespie _cpp_lex_direct (pfile)
954*c87b03e5Sespie cpp_reader *pfile;
955*c87b03e5Sespie {
956*c87b03e5Sespie cppchar_t c;
957*c87b03e5Sespie cpp_buffer *buffer;
958*c87b03e5Sespie const unsigned char *comment_start;
959*c87b03e5Sespie cpp_token *result = pfile->cur_token++;
960*c87b03e5Sespie
961*c87b03e5Sespie fresh_line:
962*c87b03e5Sespie buffer = pfile->buffer;
963*c87b03e5Sespie result->flags = buffer->saved_flags;
964*c87b03e5Sespie buffer->saved_flags = 0;
965*c87b03e5Sespie update_tokens_line:
966*c87b03e5Sespie result->line = pfile->line;
967*c87b03e5Sespie
968*c87b03e5Sespie skipped_white:
969*c87b03e5Sespie c = *buffer->cur++;
970*c87b03e5Sespie result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
971*c87b03e5Sespie
972*c87b03e5Sespie trigraph:
973*c87b03e5Sespie switch (c)
974*c87b03e5Sespie {
975*c87b03e5Sespie case ' ': case '\t': case '\f': case '\v': case '\0':
976*c87b03e5Sespie result->flags |= PREV_WHITE;
977*c87b03e5Sespie if (skip_whitespace (pfile, c))
978*c87b03e5Sespie goto skipped_white;
979*c87b03e5Sespie
980*c87b03e5Sespie /* End of buffer. */
981*c87b03e5Sespie buffer->cur--;
982*c87b03e5Sespie if (continue_after_nul (pfile))
983*c87b03e5Sespie goto fresh_line;
984*c87b03e5Sespie result->type = CPP_EOF;
985*c87b03e5Sespie break;
986*c87b03e5Sespie
987*c87b03e5Sespie case '\n': case '\r':
988*c87b03e5Sespie handle_newline (pfile);
989*c87b03e5Sespie buffer->saved_flags = BOL;
990*c87b03e5Sespie if (! pfile->state.in_directive)
991*c87b03e5Sespie {
992*c87b03e5Sespie if (pfile->state.parsing_args == 2)
993*c87b03e5Sespie buffer->saved_flags |= PREV_WHITE;
994*c87b03e5Sespie if (!pfile->keep_tokens)
995*c87b03e5Sespie {
996*c87b03e5Sespie pfile->cur_run = &pfile->base_run;
997*c87b03e5Sespie result = pfile->base_run.base;
998*c87b03e5Sespie pfile->cur_token = result + 1;
999*c87b03e5Sespie }
1000*c87b03e5Sespie goto fresh_line;
1001*c87b03e5Sespie }
1002*c87b03e5Sespie result->type = CPP_EOF;
1003*c87b03e5Sespie break;
1004*c87b03e5Sespie
1005*c87b03e5Sespie case '?':
1006*c87b03e5Sespie case '\\':
1007*c87b03e5Sespie /* These could start an escaped newline, or '?' a trigraph. Let
1008*c87b03e5Sespie skip_escaped_newlines do all the work. */
1009*c87b03e5Sespie {
1010*c87b03e5Sespie unsigned int line = pfile->line;
1011*c87b03e5Sespie
1012*c87b03e5Sespie c = skip_escaped_newlines (pfile);
1013*c87b03e5Sespie if (line != pfile->line)
1014*c87b03e5Sespie {
1015*c87b03e5Sespie buffer->cur--;
1016*c87b03e5Sespie /* We had at least one escaped newline of some sort.
1017*c87b03e5Sespie Update the token's line and column. */
1018*c87b03e5Sespie goto update_tokens_line;
1019*c87b03e5Sespie }
1020*c87b03e5Sespie }
1021*c87b03e5Sespie
1022*c87b03e5Sespie /* We are either the original '?' or '\\', or a trigraph. */
1023*c87b03e5Sespie if (c == '?')
1024*c87b03e5Sespie result->type = CPP_QUERY;
1025*c87b03e5Sespie else if (c == '\\')
1026*c87b03e5Sespie goto random_char;
1027*c87b03e5Sespie else
1028*c87b03e5Sespie goto trigraph;
1029*c87b03e5Sespie break;
1030*c87b03e5Sespie
1031*c87b03e5Sespie case '0': case '1': case '2': case '3': case '4':
1032*c87b03e5Sespie case '5': case '6': case '7': case '8': case '9':
1033*c87b03e5Sespie result->type = CPP_NUMBER;
1034*c87b03e5Sespie parse_number (pfile, &result->val.str, 0);
1035*c87b03e5Sespie break;
1036*c87b03e5Sespie
1037*c87b03e5Sespie case 'L':
1038*c87b03e5Sespie /* 'L' may introduce wide characters or strings. */
1039*c87b03e5Sespie {
1040*c87b03e5Sespie const unsigned char *pos = buffer->cur;
1041*c87b03e5Sespie
1042*c87b03e5Sespie c = get_effective_char (pfile);
1043*c87b03e5Sespie if (c == '\'' || c == '"')
1044*c87b03e5Sespie {
1045*c87b03e5Sespie result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1046*c87b03e5Sespie parse_string (pfile, result, c);
1047*c87b03e5Sespie break;
1048*c87b03e5Sespie }
1049*c87b03e5Sespie buffer->cur = pos;
1050*c87b03e5Sespie }
1051*c87b03e5Sespie /* Fall through. */
1052*c87b03e5Sespie
1053*c87b03e5Sespie start_ident:
1054*c87b03e5Sespie case '_':
1055*c87b03e5Sespie case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1056*c87b03e5Sespie case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1057*c87b03e5Sespie case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1058*c87b03e5Sespie case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1059*c87b03e5Sespie case 'y': case 'z':
1060*c87b03e5Sespie case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1061*c87b03e5Sespie case 'G': case 'H': case 'I': case 'J': case 'K':
1062*c87b03e5Sespie case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1063*c87b03e5Sespie case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1064*c87b03e5Sespie case 'Y': case 'Z':
1065*c87b03e5Sespie result->type = CPP_NAME;
1066*c87b03e5Sespie result->val.node = parse_identifier (pfile);
1067*c87b03e5Sespie
1068*c87b03e5Sespie /* Convert named operators to their proper types. */
1069*c87b03e5Sespie if (result->val.node->flags & NODE_OPERATOR)
1070*c87b03e5Sespie {
1071*c87b03e5Sespie result->flags |= NAMED_OP;
1072*c87b03e5Sespie result->type = result->val.node->value.operator;
1073*c87b03e5Sespie }
1074*c87b03e5Sespie break;
1075*c87b03e5Sespie
1076*c87b03e5Sespie case '\'':
1077*c87b03e5Sespie case '"':
1078*c87b03e5Sespie result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1079*c87b03e5Sespie parse_string (pfile, result, c);
1080*c87b03e5Sespie break;
1081*c87b03e5Sespie
1082*c87b03e5Sespie case '/':
1083*c87b03e5Sespie /* A potential block or line comment. */
1084*c87b03e5Sespie comment_start = buffer->cur;
1085*c87b03e5Sespie c = get_effective_char (pfile);
1086*c87b03e5Sespie
1087*c87b03e5Sespie if (c == '*')
1088*c87b03e5Sespie {
1089*c87b03e5Sespie if (skip_block_comment (pfile))
1090*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "unterminated comment");
1091*c87b03e5Sespie }
1092*c87b03e5Sespie else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1093*c87b03e5Sespie || CPP_IN_SYSTEM_HEADER (pfile)))
1094*c87b03e5Sespie {
1095*c87b03e5Sespie /* Warn about comments only if pedantically GNUC89, and not
1096*c87b03e5Sespie in system headers. */
1097*c87b03e5Sespie if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1098*c87b03e5Sespie && ! buffer->warned_cplusplus_comments)
1099*c87b03e5Sespie {
1100*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1101*c87b03e5Sespie "C++ style comments are not allowed in ISO C90");
1102*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1103*c87b03e5Sespie "(this will be reported only once per input file)");
1104*c87b03e5Sespie buffer->warned_cplusplus_comments = 1;
1105*c87b03e5Sespie }
1106*c87b03e5Sespie
1107*c87b03e5Sespie if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1108*c87b03e5Sespie cpp_error (pfile, DL_WARNING, "multi-line comment");
1109*c87b03e5Sespie }
1110*c87b03e5Sespie else if (c == '=')
1111*c87b03e5Sespie {
1112*c87b03e5Sespie result->type = CPP_DIV_EQ;
1113*c87b03e5Sespie break;
1114*c87b03e5Sespie }
1115*c87b03e5Sespie else
1116*c87b03e5Sespie {
1117*c87b03e5Sespie BACKUP ();
1118*c87b03e5Sespie result->type = CPP_DIV;
1119*c87b03e5Sespie break;
1120*c87b03e5Sespie }
1121*c87b03e5Sespie
1122*c87b03e5Sespie if (!pfile->state.save_comments)
1123*c87b03e5Sespie {
1124*c87b03e5Sespie result->flags |= PREV_WHITE;
1125*c87b03e5Sespie goto update_tokens_line;
1126*c87b03e5Sespie }
1127*c87b03e5Sespie
1128*c87b03e5Sespie /* Save the comment as a token in its own right. */
1129*c87b03e5Sespie save_comment (pfile, result, comment_start, c);
1130*c87b03e5Sespie break;
1131*c87b03e5Sespie
1132*c87b03e5Sespie case '<':
1133*c87b03e5Sespie if (pfile->state.angled_headers)
1134*c87b03e5Sespie {
1135*c87b03e5Sespie result->type = CPP_HEADER_NAME;
1136*c87b03e5Sespie parse_string (pfile, result, '>');
1137*c87b03e5Sespie break;
1138*c87b03e5Sespie }
1139*c87b03e5Sespie
1140*c87b03e5Sespie c = get_effective_char (pfile);
1141*c87b03e5Sespie if (c == '=')
1142*c87b03e5Sespie result->type = CPP_LESS_EQ;
1143*c87b03e5Sespie else if (c == '<')
1144*c87b03e5Sespie IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1145*c87b03e5Sespie else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1146*c87b03e5Sespie IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1147*c87b03e5Sespie else if (c == ':' && CPP_OPTION (pfile, digraphs))
1148*c87b03e5Sespie {
1149*c87b03e5Sespie result->type = CPP_OPEN_SQUARE;
1150*c87b03e5Sespie result->flags |= DIGRAPH;
1151*c87b03e5Sespie }
1152*c87b03e5Sespie else if (c == '%' && CPP_OPTION (pfile, digraphs))
1153*c87b03e5Sespie {
1154*c87b03e5Sespie result->type = CPP_OPEN_BRACE;
1155*c87b03e5Sespie result->flags |= DIGRAPH;
1156*c87b03e5Sespie }
1157*c87b03e5Sespie else
1158*c87b03e5Sespie {
1159*c87b03e5Sespie BACKUP ();
1160*c87b03e5Sespie result->type = CPP_LESS;
1161*c87b03e5Sespie }
1162*c87b03e5Sespie break;
1163*c87b03e5Sespie
1164*c87b03e5Sespie case '>':
1165*c87b03e5Sespie c = get_effective_char (pfile);
1166*c87b03e5Sespie if (c == '=')
1167*c87b03e5Sespie result->type = CPP_GREATER_EQ;
1168*c87b03e5Sespie else if (c == '>')
1169*c87b03e5Sespie IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1170*c87b03e5Sespie else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1171*c87b03e5Sespie IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1172*c87b03e5Sespie else
1173*c87b03e5Sespie {
1174*c87b03e5Sespie BACKUP ();
1175*c87b03e5Sespie result->type = CPP_GREATER;
1176*c87b03e5Sespie }
1177*c87b03e5Sespie break;
1178*c87b03e5Sespie
1179*c87b03e5Sespie case '%':
1180*c87b03e5Sespie c = get_effective_char (pfile);
1181*c87b03e5Sespie if (c == '=')
1182*c87b03e5Sespie result->type = CPP_MOD_EQ;
1183*c87b03e5Sespie else if (CPP_OPTION (pfile, digraphs) && c == ':')
1184*c87b03e5Sespie {
1185*c87b03e5Sespie result->flags |= DIGRAPH;
1186*c87b03e5Sespie result->type = CPP_HASH;
1187*c87b03e5Sespie if (get_effective_char (pfile) == '%')
1188*c87b03e5Sespie {
1189*c87b03e5Sespie const unsigned char *pos = buffer->cur;
1190*c87b03e5Sespie
1191*c87b03e5Sespie if (get_effective_char (pfile) == ':')
1192*c87b03e5Sespie result->type = CPP_PASTE;
1193*c87b03e5Sespie else
1194*c87b03e5Sespie buffer->cur = pos - 1;
1195*c87b03e5Sespie }
1196*c87b03e5Sespie else
1197*c87b03e5Sespie BACKUP ();
1198*c87b03e5Sespie }
1199*c87b03e5Sespie else if (CPP_OPTION (pfile, digraphs) && c == '>')
1200*c87b03e5Sespie {
1201*c87b03e5Sespie result->flags |= DIGRAPH;
1202*c87b03e5Sespie result->type = CPP_CLOSE_BRACE;
1203*c87b03e5Sespie }
1204*c87b03e5Sespie else
1205*c87b03e5Sespie {
1206*c87b03e5Sespie BACKUP ();
1207*c87b03e5Sespie result->type = CPP_MOD;
1208*c87b03e5Sespie }
1209*c87b03e5Sespie break;
1210*c87b03e5Sespie
1211*c87b03e5Sespie case '.':
1212*c87b03e5Sespie result->type = CPP_DOT;
1213*c87b03e5Sespie c = get_effective_char (pfile);
1214*c87b03e5Sespie if (c == '.')
1215*c87b03e5Sespie {
1216*c87b03e5Sespie const unsigned char *pos = buffer->cur;
1217*c87b03e5Sespie
1218*c87b03e5Sespie if (get_effective_char (pfile) == '.')
1219*c87b03e5Sespie result->type = CPP_ELLIPSIS;
1220*c87b03e5Sespie else
1221*c87b03e5Sespie buffer->cur = pos - 1;
1222*c87b03e5Sespie }
1223*c87b03e5Sespie /* All known character sets have 0...9 contiguous. */
1224*c87b03e5Sespie else if (ISDIGIT (c))
1225*c87b03e5Sespie {
1226*c87b03e5Sespie result->type = CPP_NUMBER;
1227*c87b03e5Sespie parse_number (pfile, &result->val.str, 1);
1228*c87b03e5Sespie }
1229*c87b03e5Sespie else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1230*c87b03e5Sespie result->type = CPP_DOT_STAR;
1231*c87b03e5Sespie else
1232*c87b03e5Sespie BACKUP ();
1233*c87b03e5Sespie break;
1234*c87b03e5Sespie
1235*c87b03e5Sespie case '+':
1236*c87b03e5Sespie c = get_effective_char (pfile);
1237*c87b03e5Sespie if (c == '+')
1238*c87b03e5Sespie result->type = CPP_PLUS_PLUS;
1239*c87b03e5Sespie else if (c == '=')
1240*c87b03e5Sespie result->type = CPP_PLUS_EQ;
1241*c87b03e5Sespie else
1242*c87b03e5Sespie {
1243*c87b03e5Sespie BACKUP ();
1244*c87b03e5Sespie result->type = CPP_PLUS;
1245*c87b03e5Sespie }
1246*c87b03e5Sespie break;
1247*c87b03e5Sespie
1248*c87b03e5Sespie case '-':
1249*c87b03e5Sespie c = get_effective_char (pfile);
1250*c87b03e5Sespie if (c == '>')
1251*c87b03e5Sespie {
1252*c87b03e5Sespie result->type = CPP_DEREF;
1253*c87b03e5Sespie if (CPP_OPTION (pfile, cplusplus))
1254*c87b03e5Sespie {
1255*c87b03e5Sespie if (get_effective_char (pfile) == '*')
1256*c87b03e5Sespie result->type = CPP_DEREF_STAR;
1257*c87b03e5Sespie else
1258*c87b03e5Sespie BACKUP ();
1259*c87b03e5Sespie }
1260*c87b03e5Sespie }
1261*c87b03e5Sespie else if (c == '-')
1262*c87b03e5Sespie result->type = CPP_MINUS_MINUS;
1263*c87b03e5Sespie else if (c == '=')
1264*c87b03e5Sespie result->type = CPP_MINUS_EQ;
1265*c87b03e5Sespie else
1266*c87b03e5Sespie {
1267*c87b03e5Sespie BACKUP ();
1268*c87b03e5Sespie result->type = CPP_MINUS;
1269*c87b03e5Sespie }
1270*c87b03e5Sespie break;
1271*c87b03e5Sespie
1272*c87b03e5Sespie case '&':
1273*c87b03e5Sespie c = get_effective_char (pfile);
1274*c87b03e5Sespie if (c == '&')
1275*c87b03e5Sespie result->type = CPP_AND_AND;
1276*c87b03e5Sespie else if (c == '=')
1277*c87b03e5Sespie result->type = CPP_AND_EQ;
1278*c87b03e5Sespie else
1279*c87b03e5Sespie {
1280*c87b03e5Sespie BACKUP ();
1281*c87b03e5Sespie result->type = CPP_AND;
1282*c87b03e5Sespie }
1283*c87b03e5Sespie break;
1284*c87b03e5Sespie
1285*c87b03e5Sespie case '|':
1286*c87b03e5Sespie c = get_effective_char (pfile);
1287*c87b03e5Sespie if (c == '|')
1288*c87b03e5Sespie result->type = CPP_OR_OR;
1289*c87b03e5Sespie else if (c == '=')
1290*c87b03e5Sespie result->type = CPP_OR_EQ;
1291*c87b03e5Sespie else
1292*c87b03e5Sespie {
1293*c87b03e5Sespie BACKUP ();
1294*c87b03e5Sespie result->type = CPP_OR;
1295*c87b03e5Sespie }
1296*c87b03e5Sespie break;
1297*c87b03e5Sespie
1298*c87b03e5Sespie case ':':
1299*c87b03e5Sespie c = get_effective_char (pfile);
1300*c87b03e5Sespie if (c == ':' && CPP_OPTION (pfile, cplusplus))
1301*c87b03e5Sespie result->type = CPP_SCOPE;
1302*c87b03e5Sespie else if (c == '>' && CPP_OPTION (pfile, digraphs))
1303*c87b03e5Sespie {
1304*c87b03e5Sespie result->flags |= DIGRAPH;
1305*c87b03e5Sespie result->type = CPP_CLOSE_SQUARE;
1306*c87b03e5Sespie }
1307*c87b03e5Sespie else
1308*c87b03e5Sespie {
1309*c87b03e5Sespie BACKUP ();
1310*c87b03e5Sespie result->type = CPP_COLON;
1311*c87b03e5Sespie }
1312*c87b03e5Sespie break;
1313*c87b03e5Sespie
1314*c87b03e5Sespie case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1315*c87b03e5Sespie case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1316*c87b03e5Sespie case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1317*c87b03e5Sespie case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1318*c87b03e5Sespie case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1319*c87b03e5Sespie
1320*c87b03e5Sespie case '~': result->type = CPP_COMPL; break;
1321*c87b03e5Sespie case ',': result->type = CPP_COMMA; break;
1322*c87b03e5Sespie case '(': result->type = CPP_OPEN_PAREN; break;
1323*c87b03e5Sespie case ')': result->type = CPP_CLOSE_PAREN; break;
1324*c87b03e5Sespie case '[': result->type = CPP_OPEN_SQUARE; break;
1325*c87b03e5Sespie case ']': result->type = CPP_CLOSE_SQUARE; break;
1326*c87b03e5Sespie case '{': result->type = CPP_OPEN_BRACE; break;
1327*c87b03e5Sespie case '}': result->type = CPP_CLOSE_BRACE; break;
1328*c87b03e5Sespie case ';': result->type = CPP_SEMICOLON; break;
1329*c87b03e5Sespie
1330*c87b03e5Sespie /* @ is a punctuator in Objective-C. */
1331*c87b03e5Sespie case '@': result->type = CPP_ATSIGN; break;
1332*c87b03e5Sespie
1333*c87b03e5Sespie case '$':
1334*c87b03e5Sespie if (CPP_OPTION (pfile, dollars_in_ident))
1335*c87b03e5Sespie goto start_ident;
1336*c87b03e5Sespie /* Fall through... */
1337*c87b03e5Sespie
1338*c87b03e5Sespie random_char:
1339*c87b03e5Sespie default:
1340*c87b03e5Sespie result->type = CPP_OTHER;
1341*c87b03e5Sespie result->val.c = c;
1342*c87b03e5Sespie break;
1343*c87b03e5Sespie }
1344*c87b03e5Sespie
1345*c87b03e5Sespie return result;
1346*c87b03e5Sespie }
1347*c87b03e5Sespie
1348*c87b03e5Sespie /* An upper bound on the number of bytes needed to spell TOKEN,
1349*c87b03e5Sespie including preceding whitespace. */
1350*c87b03e5Sespie unsigned int
cpp_token_len(token)1351*c87b03e5Sespie cpp_token_len (token)
1352*c87b03e5Sespie const cpp_token *token;
1353*c87b03e5Sespie {
1354*c87b03e5Sespie unsigned int len;
1355*c87b03e5Sespie
1356*c87b03e5Sespie switch (TOKEN_SPELL (token))
1357*c87b03e5Sespie {
1358*c87b03e5Sespie default: len = 0; break;
1359*c87b03e5Sespie case SPELL_NUMBER:
1360*c87b03e5Sespie case SPELL_STRING: len = token->val.str.len; break;
1361*c87b03e5Sespie case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1362*c87b03e5Sespie }
1363*c87b03e5Sespie /* 1 for whitespace, 4 for comment delimiters. */
1364*c87b03e5Sespie return len + 5;
1365*c87b03e5Sespie }
1366*c87b03e5Sespie
1367*c87b03e5Sespie /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1368*c87b03e5Sespie already contain the enough space to hold the token's spelling.
1369*c87b03e5Sespie Returns a pointer to the character after the last character
1370*c87b03e5Sespie written. */
1371*c87b03e5Sespie unsigned char *
cpp_spell_token(pfile,token,buffer)1372*c87b03e5Sespie cpp_spell_token (pfile, token, buffer)
1373*c87b03e5Sespie cpp_reader *pfile; /* Would be nice to be rid of this... */
1374*c87b03e5Sespie const cpp_token *token;
1375*c87b03e5Sespie unsigned char *buffer;
1376*c87b03e5Sespie {
1377*c87b03e5Sespie switch (TOKEN_SPELL (token))
1378*c87b03e5Sespie {
1379*c87b03e5Sespie case SPELL_OPERATOR:
1380*c87b03e5Sespie {
1381*c87b03e5Sespie const unsigned char *spelling;
1382*c87b03e5Sespie unsigned char c;
1383*c87b03e5Sespie
1384*c87b03e5Sespie if (token->flags & DIGRAPH)
1385*c87b03e5Sespie spelling
1386*c87b03e5Sespie = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1387*c87b03e5Sespie else if (token->flags & NAMED_OP)
1388*c87b03e5Sespie goto spell_ident;
1389*c87b03e5Sespie else
1390*c87b03e5Sespie spelling = TOKEN_NAME (token);
1391*c87b03e5Sespie
1392*c87b03e5Sespie while ((c = *spelling++) != '\0')
1393*c87b03e5Sespie *buffer++ = c;
1394*c87b03e5Sespie }
1395*c87b03e5Sespie break;
1396*c87b03e5Sespie
1397*c87b03e5Sespie case SPELL_CHAR:
1398*c87b03e5Sespie *buffer++ = token->val.c;
1399*c87b03e5Sespie break;
1400*c87b03e5Sespie
1401*c87b03e5Sespie spell_ident:
1402*c87b03e5Sespie case SPELL_IDENT:
1403*c87b03e5Sespie memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1404*c87b03e5Sespie buffer += NODE_LEN (token->val.node);
1405*c87b03e5Sespie break;
1406*c87b03e5Sespie
1407*c87b03e5Sespie case SPELL_NUMBER:
1408*c87b03e5Sespie memcpy (buffer, token->val.str.text, token->val.str.len);
1409*c87b03e5Sespie buffer += token->val.str.len;
1410*c87b03e5Sespie break;
1411*c87b03e5Sespie
1412*c87b03e5Sespie case SPELL_STRING:
1413*c87b03e5Sespie {
1414*c87b03e5Sespie int left, right, tag;
1415*c87b03e5Sespie switch (token->type)
1416*c87b03e5Sespie {
1417*c87b03e5Sespie case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1418*c87b03e5Sespie case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1419*c87b03e5Sespie case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1420*c87b03e5Sespie case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1421*c87b03e5Sespie case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1422*c87b03e5Sespie default:
1423*c87b03e5Sespie cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1424*c87b03e5Sespie TOKEN_NAME (token));
1425*c87b03e5Sespie return buffer;
1426*c87b03e5Sespie }
1427*c87b03e5Sespie if (tag) *buffer++ = tag;
1428*c87b03e5Sespie *buffer++ = left;
1429*c87b03e5Sespie memcpy (buffer, token->val.str.text, token->val.str.len);
1430*c87b03e5Sespie buffer += token->val.str.len;
1431*c87b03e5Sespie *buffer++ = right;
1432*c87b03e5Sespie }
1433*c87b03e5Sespie break;
1434*c87b03e5Sespie
1435*c87b03e5Sespie case SPELL_NONE:
1436*c87b03e5Sespie cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1437*c87b03e5Sespie break;
1438*c87b03e5Sespie }
1439*c87b03e5Sespie
1440*c87b03e5Sespie return buffer;
1441*c87b03e5Sespie }
1442*c87b03e5Sespie
1443*c87b03e5Sespie /* Returns TOKEN spelt as a null-terminated string. The string is
1444*c87b03e5Sespie freed when the reader is destroyed. Useful for diagnostics. */
1445*c87b03e5Sespie unsigned char *
cpp_token_as_text(pfile,token)1446*c87b03e5Sespie cpp_token_as_text (pfile, token)
1447*c87b03e5Sespie cpp_reader *pfile;
1448*c87b03e5Sespie const cpp_token *token;
1449*c87b03e5Sespie {
1450*c87b03e5Sespie unsigned int len = cpp_token_len (token);
1451*c87b03e5Sespie unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1452*c87b03e5Sespie
1453*c87b03e5Sespie end = cpp_spell_token (pfile, token, start);
1454*c87b03e5Sespie end[0] = '\0';
1455*c87b03e5Sespie
1456*c87b03e5Sespie return start;
1457*c87b03e5Sespie }
1458*c87b03e5Sespie
1459*c87b03e5Sespie /* Used by C front ends, which really should move to using
1460*c87b03e5Sespie cpp_token_as_text. */
1461*c87b03e5Sespie const char *
cpp_type2name(type)1462*c87b03e5Sespie cpp_type2name (type)
1463*c87b03e5Sespie enum cpp_ttype type;
1464*c87b03e5Sespie {
1465*c87b03e5Sespie return (const char *) token_spellings[type].name;
1466*c87b03e5Sespie }
1467*c87b03e5Sespie
1468*c87b03e5Sespie /* Writes the spelling of token to FP, without any preceding space.
1469*c87b03e5Sespie Separated from cpp_spell_token for efficiency - to avoid stdio
1470*c87b03e5Sespie double-buffering. */
1471*c87b03e5Sespie void
cpp_output_token(token,fp)1472*c87b03e5Sespie cpp_output_token (token, fp)
1473*c87b03e5Sespie const cpp_token *token;
1474*c87b03e5Sespie FILE *fp;
1475*c87b03e5Sespie {
1476*c87b03e5Sespie switch (TOKEN_SPELL (token))
1477*c87b03e5Sespie {
1478*c87b03e5Sespie case SPELL_OPERATOR:
1479*c87b03e5Sespie {
1480*c87b03e5Sespie const unsigned char *spelling;
1481*c87b03e5Sespie int c;
1482*c87b03e5Sespie
1483*c87b03e5Sespie if (token->flags & DIGRAPH)
1484*c87b03e5Sespie spelling
1485*c87b03e5Sespie = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1486*c87b03e5Sespie else if (token->flags & NAMED_OP)
1487*c87b03e5Sespie goto spell_ident;
1488*c87b03e5Sespie else
1489*c87b03e5Sespie spelling = TOKEN_NAME (token);
1490*c87b03e5Sespie
1491*c87b03e5Sespie c = *spelling;
1492*c87b03e5Sespie do
1493*c87b03e5Sespie putc (c, fp);
1494*c87b03e5Sespie while ((c = *++spelling) != '\0');
1495*c87b03e5Sespie }
1496*c87b03e5Sespie break;
1497*c87b03e5Sespie
1498*c87b03e5Sespie case SPELL_CHAR:
1499*c87b03e5Sespie putc (token->val.c, fp);
1500*c87b03e5Sespie break;
1501*c87b03e5Sespie
1502*c87b03e5Sespie spell_ident:
1503*c87b03e5Sespie case SPELL_IDENT:
1504*c87b03e5Sespie fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1505*c87b03e5Sespie break;
1506*c87b03e5Sespie
1507*c87b03e5Sespie case SPELL_NUMBER:
1508*c87b03e5Sespie fwrite (token->val.str.text, 1, token->val.str.len, fp);
1509*c87b03e5Sespie break;
1510*c87b03e5Sespie
1511*c87b03e5Sespie case SPELL_STRING:
1512*c87b03e5Sespie {
1513*c87b03e5Sespie int left, right, tag;
1514*c87b03e5Sespie switch (token->type)
1515*c87b03e5Sespie {
1516*c87b03e5Sespie case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1517*c87b03e5Sespie case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1518*c87b03e5Sespie case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1519*c87b03e5Sespie case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1520*c87b03e5Sespie case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1521*c87b03e5Sespie default:
1522*c87b03e5Sespie fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1523*c87b03e5Sespie return;
1524*c87b03e5Sespie }
1525*c87b03e5Sespie if (tag) putc (tag, fp);
1526*c87b03e5Sespie putc (left, fp);
1527*c87b03e5Sespie fwrite (token->val.str.text, 1, token->val.str.len, fp);
1528*c87b03e5Sespie putc (right, fp);
1529*c87b03e5Sespie }
1530*c87b03e5Sespie break;
1531*c87b03e5Sespie
1532*c87b03e5Sespie case SPELL_NONE:
1533*c87b03e5Sespie /* An error, most probably. */
1534*c87b03e5Sespie break;
1535*c87b03e5Sespie }
1536*c87b03e5Sespie }
1537*c87b03e5Sespie
1538*c87b03e5Sespie /* Compare two tokens. */
1539*c87b03e5Sespie int
_cpp_equiv_tokens(a,b)1540*c87b03e5Sespie _cpp_equiv_tokens (a, b)
1541*c87b03e5Sespie const cpp_token *a, *b;
1542*c87b03e5Sespie {
1543*c87b03e5Sespie if (a->type == b->type && a->flags == b->flags)
1544*c87b03e5Sespie switch (TOKEN_SPELL (a))
1545*c87b03e5Sespie {
1546*c87b03e5Sespie default: /* Keep compiler happy. */
1547*c87b03e5Sespie case SPELL_OPERATOR:
1548*c87b03e5Sespie return 1;
1549*c87b03e5Sespie case SPELL_CHAR:
1550*c87b03e5Sespie return a->val.c == b->val.c; /* Character. */
1551*c87b03e5Sespie case SPELL_NONE:
1552*c87b03e5Sespie return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1553*c87b03e5Sespie case SPELL_IDENT:
1554*c87b03e5Sespie return a->val.node == b->val.node;
1555*c87b03e5Sespie case SPELL_NUMBER:
1556*c87b03e5Sespie case SPELL_STRING:
1557*c87b03e5Sespie return (a->val.str.len == b->val.str.len
1558*c87b03e5Sespie && !memcmp (a->val.str.text, b->val.str.text,
1559*c87b03e5Sespie a->val.str.len));
1560*c87b03e5Sespie }
1561*c87b03e5Sespie
1562*c87b03e5Sespie return 0;
1563*c87b03e5Sespie }
1564*c87b03e5Sespie
1565*c87b03e5Sespie /* Returns nonzero if a space should be inserted to avoid an
1566*c87b03e5Sespie accidental token paste for output. For simplicity, it is
1567*c87b03e5Sespie conservative, and occasionally advises a space where one is not
1568*c87b03e5Sespie needed, e.g. "." and ".2". */
1569*c87b03e5Sespie int
cpp_avoid_paste(pfile,token1,token2)1570*c87b03e5Sespie cpp_avoid_paste (pfile, token1, token2)
1571*c87b03e5Sespie cpp_reader *pfile;
1572*c87b03e5Sespie const cpp_token *token1, *token2;
1573*c87b03e5Sespie {
1574*c87b03e5Sespie enum cpp_ttype a = token1->type, b = token2->type;
1575*c87b03e5Sespie cppchar_t c;
1576*c87b03e5Sespie
1577*c87b03e5Sespie if (token1->flags & NAMED_OP)
1578*c87b03e5Sespie a = CPP_NAME;
1579*c87b03e5Sespie if (token2->flags & NAMED_OP)
1580*c87b03e5Sespie b = CPP_NAME;
1581*c87b03e5Sespie
1582*c87b03e5Sespie c = EOF;
1583*c87b03e5Sespie if (token2->flags & DIGRAPH)
1584*c87b03e5Sespie c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1585*c87b03e5Sespie else if (token_spellings[b].category == SPELL_OPERATOR)
1586*c87b03e5Sespie c = token_spellings[b].name[0];
1587*c87b03e5Sespie
1588*c87b03e5Sespie /* Quickly get everything that can paste with an '='. */
1589*c87b03e5Sespie if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1590*c87b03e5Sespie return 1;
1591*c87b03e5Sespie
1592*c87b03e5Sespie switch (a)
1593*c87b03e5Sespie {
1594*c87b03e5Sespie case CPP_GREATER: return c == '>' || c == '?';
1595*c87b03e5Sespie case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1596*c87b03e5Sespie case CPP_PLUS: return c == '+';
1597*c87b03e5Sespie case CPP_MINUS: return c == '-' || c == '>';
1598*c87b03e5Sespie case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1599*c87b03e5Sespie case CPP_MOD: return c == ':' || c == '>';
1600*c87b03e5Sespie case CPP_AND: return c == '&';
1601*c87b03e5Sespie case CPP_OR: return c == '|';
1602*c87b03e5Sespie case CPP_COLON: return c == ':' || c == '>';
1603*c87b03e5Sespie case CPP_DEREF: return c == '*';
1604*c87b03e5Sespie case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1605*c87b03e5Sespie case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1606*c87b03e5Sespie case CPP_NAME: return ((b == CPP_NUMBER
1607*c87b03e5Sespie && name_p (pfile, &token2->val.str))
1608*c87b03e5Sespie || b == CPP_NAME
1609*c87b03e5Sespie || b == CPP_CHAR || b == CPP_STRING); /* L */
1610*c87b03e5Sespie case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1611*c87b03e5Sespie || c == '.' || c == '+' || c == '-');
1612*c87b03e5Sespie case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1613*c87b03e5Sespie && token1->val.c == '@'
1614*c87b03e5Sespie && (b == CPP_NAME || b == CPP_STRING));
1615*c87b03e5Sespie default: break;
1616*c87b03e5Sespie }
1617*c87b03e5Sespie
1618*c87b03e5Sespie return 0;
1619*c87b03e5Sespie }
1620*c87b03e5Sespie
1621*c87b03e5Sespie /* Output all the remaining tokens on the current line, and a newline
1622*c87b03e5Sespie character, to FP. Leading whitespace is removed. If there are
1623*c87b03e5Sespie macros, special token padding is not performed. */
1624*c87b03e5Sespie void
cpp_output_line(pfile,fp)1625*c87b03e5Sespie cpp_output_line (pfile, fp)
1626*c87b03e5Sespie cpp_reader *pfile;
1627*c87b03e5Sespie FILE *fp;
1628*c87b03e5Sespie {
1629*c87b03e5Sespie const cpp_token *token;
1630*c87b03e5Sespie
1631*c87b03e5Sespie token = cpp_get_token (pfile);
1632*c87b03e5Sespie while (token->type != CPP_EOF)
1633*c87b03e5Sespie {
1634*c87b03e5Sespie cpp_output_token (token, fp);
1635*c87b03e5Sespie token = cpp_get_token (pfile);
1636*c87b03e5Sespie if (token->flags & PREV_WHITE)
1637*c87b03e5Sespie putc (' ', fp);
1638*c87b03e5Sespie }
1639*c87b03e5Sespie
1640*c87b03e5Sespie putc ('\n', fp);
1641*c87b03e5Sespie }
1642*c87b03e5Sespie
1643*c87b03e5Sespie /* Returns the value of a hexadecimal digit. */
1644*c87b03e5Sespie static unsigned int
hex_digit_value(c)1645*c87b03e5Sespie hex_digit_value (c)
1646*c87b03e5Sespie unsigned int c;
1647*c87b03e5Sespie {
1648*c87b03e5Sespie if (hex_p (c))
1649*c87b03e5Sespie return hex_value (c);
1650*c87b03e5Sespie else
1651*c87b03e5Sespie abort ();
1652*c87b03e5Sespie }
1653*c87b03e5Sespie
1654*c87b03e5Sespie /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1655*c87b03e5Sespie failure if cpplib is not parsing C++ or C99. Such failure is
1656*c87b03e5Sespie silent, and no variables are updated. Otherwise returns 0, and
1657*c87b03e5Sespie warns if -Wtraditional.
1658*c87b03e5Sespie
1659*c87b03e5Sespie [lex.charset]: The character designated by the universal character
1660*c87b03e5Sespie name \UNNNNNNNN is that character whose character short name in
1661*c87b03e5Sespie ISO/IEC 10646 is NNNNNNNN; the character designated by the
1662*c87b03e5Sespie universal character name \uNNNN is that character whose character
1663*c87b03e5Sespie short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1664*c87b03e5Sespie for a universal character name is less than 0x20 or in the range
1665*c87b03e5Sespie 0x7F-0x9F (inclusive), or if the universal character name
1666*c87b03e5Sespie designates a character in the basic source character set, then the
1667*c87b03e5Sespie program is ill-formed.
1668*c87b03e5Sespie
1669*c87b03e5Sespie We assume that wchar_t is Unicode, so we don't need to do any
1670*c87b03e5Sespie mapping. Is this ever wrong?
1671*c87b03e5Sespie
1672*c87b03e5Sespie PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1673*c87b03e5Sespie LIMIT is the end of the string or charconst. PSTR is updated to
1674*c87b03e5Sespie point after the UCS on return, and the UCS is written into PC. */
1675*c87b03e5Sespie
1676*c87b03e5Sespie static int
maybe_read_ucs(pfile,pstr,limit,pc)1677*c87b03e5Sespie maybe_read_ucs (pfile, pstr, limit, pc)
1678*c87b03e5Sespie cpp_reader *pfile;
1679*c87b03e5Sespie const unsigned char **pstr;
1680*c87b03e5Sespie const unsigned char *limit;
1681*c87b03e5Sespie cppchar_t *pc;
1682*c87b03e5Sespie {
1683*c87b03e5Sespie const unsigned char *p = *pstr;
1684*c87b03e5Sespie unsigned int code = 0;
1685*c87b03e5Sespie unsigned int c = *pc, length;
1686*c87b03e5Sespie
1687*c87b03e5Sespie /* Only attempt to interpret a UCS for C++ and C99. */
1688*c87b03e5Sespie if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1689*c87b03e5Sespie return 1;
1690*c87b03e5Sespie
1691*c87b03e5Sespie if (CPP_WTRADITIONAL (pfile))
1692*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
1693*c87b03e5Sespie "the meaning of '\\%c' is different in traditional C", c);
1694*c87b03e5Sespie
1695*c87b03e5Sespie length = (c == 'u' ? 4: 8);
1696*c87b03e5Sespie
1697*c87b03e5Sespie if ((size_t) (limit - p) < length)
1698*c87b03e5Sespie {
1699*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1700*c87b03e5Sespie /* Skip to the end to avoid more diagnostics. */
1701*c87b03e5Sespie p = limit;
1702*c87b03e5Sespie }
1703*c87b03e5Sespie else
1704*c87b03e5Sespie {
1705*c87b03e5Sespie for (; length; length--, p++)
1706*c87b03e5Sespie {
1707*c87b03e5Sespie c = *p;
1708*c87b03e5Sespie if (ISXDIGIT (c))
1709*c87b03e5Sespie code = (code << 4) + hex_digit_value (c);
1710*c87b03e5Sespie else
1711*c87b03e5Sespie {
1712*c87b03e5Sespie cpp_error (pfile, DL_ERROR,
1713*c87b03e5Sespie "non-hex digit '%c' in universal-character-name", c);
1714*c87b03e5Sespie /* We shouldn't skip in case there are multibyte chars. */
1715*c87b03e5Sespie break;
1716*c87b03e5Sespie }
1717*c87b03e5Sespie }
1718*c87b03e5Sespie }
1719*c87b03e5Sespie
1720*c87b03e5Sespie #ifdef TARGET_EBCDIC
1721*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1722*c87b03e5Sespie code = 0x3f; /* EBCDIC invalid character */
1723*c87b03e5Sespie #else
1724*c87b03e5Sespie /* True extended characters are OK. */
1725*c87b03e5Sespie if (code >= 0xa0
1726*c87b03e5Sespie && !(code & 0x80000000)
1727*c87b03e5Sespie && !(code >= 0xD800 && code <= 0xDFFF))
1728*c87b03e5Sespie ;
1729*c87b03e5Sespie /* The standard permits $, @ and ` to be specified as UCNs. We use
1730*c87b03e5Sespie hex escapes so that this also works with EBCDIC hosts. */
1731*c87b03e5Sespie else if (code == 0x24 || code == 0x40 || code == 0x60)
1732*c87b03e5Sespie ;
1733*c87b03e5Sespie /* Don't give another error if one occurred above. */
1734*c87b03e5Sespie else if (length == 0)
1735*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1736*c87b03e5Sespie #endif
1737*c87b03e5Sespie
1738*c87b03e5Sespie *pstr = p;
1739*c87b03e5Sespie *pc = code;
1740*c87b03e5Sespie return 0;
1741*c87b03e5Sespie }
1742*c87b03e5Sespie
1743*c87b03e5Sespie /* Returns the value of an escape sequence, truncated to the correct
1744*c87b03e5Sespie target precision. PSTR points to the input pointer, which is just
1745*c87b03e5Sespie after the backslash. LIMIT is how much text we have. WIDE is true
1746*c87b03e5Sespie if the escape sequence is part of a wide character constant or
1747*c87b03e5Sespie string literal. Handles all relevant diagnostics. */
1748*c87b03e5Sespie cppchar_t
cpp_parse_escape(pfile,pstr,limit,wide)1749*c87b03e5Sespie cpp_parse_escape (pfile, pstr, limit, wide)
1750*c87b03e5Sespie cpp_reader *pfile;
1751*c87b03e5Sespie const unsigned char **pstr;
1752*c87b03e5Sespie const unsigned char *limit;
1753*c87b03e5Sespie int wide;
1754*c87b03e5Sespie {
1755*c87b03e5Sespie int unknown = 0;
1756*c87b03e5Sespie const unsigned char *str = *pstr;
1757*c87b03e5Sespie cppchar_t c, mask;
1758*c87b03e5Sespie unsigned int width;
1759*c87b03e5Sespie
1760*c87b03e5Sespie if (wide)
1761*c87b03e5Sespie width = CPP_OPTION (pfile, wchar_precision);
1762*c87b03e5Sespie else
1763*c87b03e5Sespie width = CPP_OPTION (pfile, char_precision);
1764*c87b03e5Sespie if (width < BITS_PER_CPPCHAR_T)
1765*c87b03e5Sespie mask = ((cppchar_t) 1 << width) - 1;
1766*c87b03e5Sespie else
1767*c87b03e5Sespie mask = ~0;
1768*c87b03e5Sespie
1769*c87b03e5Sespie c = *str++;
1770*c87b03e5Sespie switch (c)
1771*c87b03e5Sespie {
1772*c87b03e5Sespie case '\\': case '\'': case '"': case '?': break;
1773*c87b03e5Sespie case 'b': c = TARGET_BS; break;
1774*c87b03e5Sespie case 'f': c = TARGET_FF; break;
1775*c87b03e5Sespie case 'n': c = TARGET_NEWLINE; break;
1776*c87b03e5Sespie case 'r': c = TARGET_CR; break;
1777*c87b03e5Sespie case 't': c = TARGET_TAB; break;
1778*c87b03e5Sespie case 'v': c = TARGET_VT; break;
1779*c87b03e5Sespie
1780*c87b03e5Sespie case '(': case '{': case '[': case '%':
1781*c87b03e5Sespie /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1782*c87b03e5Sespie '\%' is used to prevent SCCS from getting confused. */
1783*c87b03e5Sespie unknown = CPP_PEDANTIC (pfile);
1784*c87b03e5Sespie break;
1785*c87b03e5Sespie
1786*c87b03e5Sespie case 'a':
1787*c87b03e5Sespie if (CPP_WTRADITIONAL (pfile))
1788*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
1789*c87b03e5Sespie "the meaning of '\\a' is different in traditional C");
1790*c87b03e5Sespie c = TARGET_BELL;
1791*c87b03e5Sespie break;
1792*c87b03e5Sespie
1793*c87b03e5Sespie case 'e': case 'E':
1794*c87b03e5Sespie if (CPP_PEDANTIC (pfile))
1795*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1796*c87b03e5Sespie "non-ISO-standard escape sequence, '\\%c'", (int) c);
1797*c87b03e5Sespie c = TARGET_ESC;
1798*c87b03e5Sespie break;
1799*c87b03e5Sespie
1800*c87b03e5Sespie case 'u': case 'U':
1801*c87b03e5Sespie unknown = maybe_read_ucs (pfile, &str, limit, &c);
1802*c87b03e5Sespie break;
1803*c87b03e5Sespie
1804*c87b03e5Sespie case 'x':
1805*c87b03e5Sespie if (CPP_WTRADITIONAL (pfile))
1806*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
1807*c87b03e5Sespie "the meaning of '\\x' is different in traditional C");
1808*c87b03e5Sespie
1809*c87b03e5Sespie {
1810*c87b03e5Sespie cppchar_t i = 0, overflow = 0;
1811*c87b03e5Sespie int digits_found = 0;
1812*c87b03e5Sespie
1813*c87b03e5Sespie while (str < limit)
1814*c87b03e5Sespie {
1815*c87b03e5Sespie c = *str;
1816*c87b03e5Sespie if (! ISXDIGIT (c))
1817*c87b03e5Sespie break;
1818*c87b03e5Sespie str++;
1819*c87b03e5Sespie overflow |= i ^ (i << 4 >> 4);
1820*c87b03e5Sespie i = (i << 4) + hex_digit_value (c);
1821*c87b03e5Sespie digits_found = 1;
1822*c87b03e5Sespie }
1823*c87b03e5Sespie
1824*c87b03e5Sespie if (!digits_found)
1825*c87b03e5Sespie cpp_error (pfile, DL_ERROR,
1826*c87b03e5Sespie "\\x used with no following hex digits");
1827*c87b03e5Sespie
1828*c87b03e5Sespie if (overflow | (i != (i & mask)))
1829*c87b03e5Sespie {
1830*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1831*c87b03e5Sespie "hex escape sequence out of range");
1832*c87b03e5Sespie i &= mask;
1833*c87b03e5Sespie }
1834*c87b03e5Sespie c = i;
1835*c87b03e5Sespie }
1836*c87b03e5Sespie break;
1837*c87b03e5Sespie
1838*c87b03e5Sespie case '0': case '1': case '2': case '3':
1839*c87b03e5Sespie case '4': case '5': case '6': case '7':
1840*c87b03e5Sespie {
1841*c87b03e5Sespie size_t count = 0;
1842*c87b03e5Sespie cppchar_t i = c - '0';
1843*c87b03e5Sespie
1844*c87b03e5Sespie while (str < limit && ++count < 3)
1845*c87b03e5Sespie {
1846*c87b03e5Sespie c = *str;
1847*c87b03e5Sespie if (c < '0' || c > '7')
1848*c87b03e5Sespie break;
1849*c87b03e5Sespie str++;
1850*c87b03e5Sespie i = (i << 3) + c - '0';
1851*c87b03e5Sespie }
1852*c87b03e5Sespie
1853*c87b03e5Sespie if (i != (i & mask))
1854*c87b03e5Sespie {
1855*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1856*c87b03e5Sespie "octal escape sequence out of range");
1857*c87b03e5Sespie i &= mask;
1858*c87b03e5Sespie }
1859*c87b03e5Sespie c = i;
1860*c87b03e5Sespie }
1861*c87b03e5Sespie break;
1862*c87b03e5Sespie
1863*c87b03e5Sespie default:
1864*c87b03e5Sespie unknown = 1;
1865*c87b03e5Sespie break;
1866*c87b03e5Sespie }
1867*c87b03e5Sespie
1868*c87b03e5Sespie if (unknown)
1869*c87b03e5Sespie {
1870*c87b03e5Sespie if (ISGRAPH (c))
1871*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1872*c87b03e5Sespie "unknown escape sequence '\\%c'", (int) c);
1873*c87b03e5Sespie else
1874*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN,
1875*c87b03e5Sespie "unknown escape sequence: '\\%03o'", (int) c);
1876*c87b03e5Sespie }
1877*c87b03e5Sespie
1878*c87b03e5Sespie if (c > mask)
1879*c87b03e5Sespie {
1880*c87b03e5Sespie cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1881*c87b03e5Sespie c &= mask;
1882*c87b03e5Sespie }
1883*c87b03e5Sespie
1884*c87b03e5Sespie *pstr = str;
1885*c87b03e5Sespie return c;
1886*c87b03e5Sespie }
1887*c87b03e5Sespie
1888*c87b03e5Sespie /* Interpret a (possibly wide) character constant in TOKEN.
1889*c87b03e5Sespie WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1890*c87b03e5Sespie points to a variable that is filled in with the number of
1891*c87b03e5Sespie characters seen, and UNSIGNEDP to a variable that indicates whether
1892*c87b03e5Sespie the result has signed type. */
1893*c87b03e5Sespie cppchar_t
cpp_interpret_charconst(pfile,token,pchars_seen,unsignedp)1894*c87b03e5Sespie cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1895*c87b03e5Sespie cpp_reader *pfile;
1896*c87b03e5Sespie const cpp_token *token;
1897*c87b03e5Sespie unsigned int *pchars_seen;
1898*c87b03e5Sespie int *unsignedp;
1899*c87b03e5Sespie {
1900*c87b03e5Sespie const unsigned char *str = token->val.str.text;
1901*c87b03e5Sespie const unsigned char *limit = str + token->val.str.len;
1902*c87b03e5Sespie unsigned int chars_seen = 0;
1903*c87b03e5Sespie size_t width, max_chars;
1904*c87b03e5Sespie cppchar_t c, mask, result = 0;
1905*c87b03e5Sespie bool unsigned_p;
1906*c87b03e5Sespie
1907*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
1908*c87b03e5Sespie (void) local_mbtowc (NULL, NULL, 0);
1909*c87b03e5Sespie #endif
1910*c87b03e5Sespie
1911*c87b03e5Sespie /* Width in bits. */
1912*c87b03e5Sespie if (token->type == CPP_CHAR)
1913*c87b03e5Sespie {
1914*c87b03e5Sespie width = CPP_OPTION (pfile, char_precision);
1915*c87b03e5Sespie max_chars = CPP_OPTION (pfile, int_precision) / width;
1916*c87b03e5Sespie unsigned_p = CPP_OPTION (pfile, unsigned_char);
1917*c87b03e5Sespie }
1918*c87b03e5Sespie else
1919*c87b03e5Sespie {
1920*c87b03e5Sespie width = CPP_OPTION (pfile, wchar_precision);
1921*c87b03e5Sespie max_chars = 1;
1922*c87b03e5Sespie unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1923*c87b03e5Sespie }
1924*c87b03e5Sespie
1925*c87b03e5Sespie if (width < BITS_PER_CPPCHAR_T)
1926*c87b03e5Sespie mask = ((cppchar_t) 1 << width) - 1;
1927*c87b03e5Sespie else
1928*c87b03e5Sespie mask = ~0;
1929*c87b03e5Sespie
1930*c87b03e5Sespie while (str < limit)
1931*c87b03e5Sespie {
1932*c87b03e5Sespie #ifdef MULTIBYTE_CHARS
1933*c87b03e5Sespie wchar_t wc;
1934*c87b03e5Sespie int char_len;
1935*c87b03e5Sespie
1936*c87b03e5Sespie char_len = local_mbtowc (&wc, str, limit - str);
1937*c87b03e5Sespie if (char_len == -1)
1938*c87b03e5Sespie {
1939*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
1940*c87b03e5Sespie "ignoring invalid multibyte character");
1941*c87b03e5Sespie c = *str++;
1942*c87b03e5Sespie }
1943*c87b03e5Sespie else
1944*c87b03e5Sespie {
1945*c87b03e5Sespie str += char_len;
1946*c87b03e5Sespie c = wc;
1947*c87b03e5Sespie }
1948*c87b03e5Sespie #else
1949*c87b03e5Sespie c = *str++;
1950*c87b03e5Sespie #endif
1951*c87b03e5Sespie
1952*c87b03e5Sespie if (c == '\\')
1953*c87b03e5Sespie c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1954*c87b03e5Sespie
1955*c87b03e5Sespie #ifdef MAP_CHARACTER
1956*c87b03e5Sespie if (ISPRINT (c))
1957*c87b03e5Sespie c = MAP_CHARACTER (c);
1958*c87b03e5Sespie #endif
1959*c87b03e5Sespie
1960*c87b03e5Sespie chars_seen++;
1961*c87b03e5Sespie
1962*c87b03e5Sespie /* Truncate the character, scale the result and merge the two. */
1963*c87b03e5Sespie c &= mask;
1964*c87b03e5Sespie if (width < BITS_PER_CPPCHAR_T)
1965*c87b03e5Sespie result = (result << width) | c;
1966*c87b03e5Sespie else
1967*c87b03e5Sespie result = c;
1968*c87b03e5Sespie }
1969*c87b03e5Sespie
1970*c87b03e5Sespie if (chars_seen == 0)
1971*c87b03e5Sespie cpp_error (pfile, DL_ERROR, "empty character constant");
1972*c87b03e5Sespie else if (chars_seen > 1)
1973*c87b03e5Sespie {
1974*c87b03e5Sespie /* Multichar charconsts are of type int and therefore signed. */
1975*c87b03e5Sespie unsigned_p = 0;
1976*c87b03e5Sespie
1977*c87b03e5Sespie if (chars_seen > max_chars)
1978*c87b03e5Sespie {
1979*c87b03e5Sespie chars_seen = max_chars;
1980*c87b03e5Sespie cpp_error (pfile, DL_WARNING,
1981*c87b03e5Sespie "character constant too long for its type");
1982*c87b03e5Sespie }
1983*c87b03e5Sespie else if (CPP_OPTION (pfile, warn_multichar))
1984*c87b03e5Sespie cpp_error (pfile, DL_WARNING, "multi-character character constant");
1985*c87b03e5Sespie }
1986*c87b03e5Sespie
1987*c87b03e5Sespie /* Sign-extend or truncate the constant to cppchar_t. The value is
1988*c87b03e5Sespie in WIDTH bits, but for multi-char charconsts it's value is the
1989*c87b03e5Sespie full target type's width. */
1990*c87b03e5Sespie if (chars_seen > 1)
1991*c87b03e5Sespie width *= max_chars;
1992*c87b03e5Sespie if (width < BITS_PER_CPPCHAR_T)
1993*c87b03e5Sespie {
1994*c87b03e5Sespie mask = ((cppchar_t) 1 << width) - 1;
1995*c87b03e5Sespie if (unsigned_p || !(result & (1 << (width - 1))))
1996*c87b03e5Sespie result &= mask;
1997*c87b03e5Sespie else
1998*c87b03e5Sespie result |= ~mask;
1999*c87b03e5Sespie }
2000*c87b03e5Sespie
2001*c87b03e5Sespie *pchars_seen = chars_seen;
2002*c87b03e5Sespie *unsignedp = unsigned_p;
2003*c87b03e5Sespie return result;
2004*c87b03e5Sespie }
2005*c87b03e5Sespie
2006*c87b03e5Sespie /* Memory buffers. Changing these three constants can have a dramatic
2007*c87b03e5Sespie effect on performance. The values here are reasonable defaults,
2008*c87b03e5Sespie but might be tuned. If you adjust them, be sure to test across a
2009*c87b03e5Sespie range of uses of cpplib, including heavy nested function-like macro
2010*c87b03e5Sespie expansion. Also check the change in peak memory usage (NJAMD is a
2011*c87b03e5Sespie good tool for this). */
2012*c87b03e5Sespie #define MIN_BUFF_SIZE 8000
2013*c87b03e5Sespie #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2014*c87b03e5Sespie #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2015*c87b03e5Sespie (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2016*c87b03e5Sespie
2017*c87b03e5Sespie #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2018*c87b03e5Sespie #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2019*c87b03e5Sespie #endif
2020*c87b03e5Sespie
2021*c87b03e5Sespie /* Create a new allocation buffer. Place the control block at the end
2022*c87b03e5Sespie of the buffer, so that buffer overflows will cause immediate chaos. */
2023*c87b03e5Sespie static _cpp_buff *
new_buff(len)2024*c87b03e5Sespie new_buff (len)
2025*c87b03e5Sespie size_t len;
2026*c87b03e5Sespie {
2027*c87b03e5Sespie _cpp_buff *result;
2028*c87b03e5Sespie unsigned char *base;
2029*c87b03e5Sespie
2030*c87b03e5Sespie if (len < MIN_BUFF_SIZE)
2031*c87b03e5Sespie len = MIN_BUFF_SIZE;
2032*c87b03e5Sespie len = CPP_ALIGN (len);
2033*c87b03e5Sespie
2034*c87b03e5Sespie base = xmalloc (len + sizeof (_cpp_buff));
2035*c87b03e5Sespie result = (_cpp_buff *) (base + len);
2036*c87b03e5Sespie result->base = base;
2037*c87b03e5Sespie result->cur = base;
2038*c87b03e5Sespie result->limit = base + len;
2039*c87b03e5Sespie result->next = NULL;
2040*c87b03e5Sespie return result;
2041*c87b03e5Sespie }
2042*c87b03e5Sespie
2043*c87b03e5Sespie /* Place a chain of unwanted allocation buffers on the free list. */
2044*c87b03e5Sespie void
_cpp_release_buff(pfile,buff)2045*c87b03e5Sespie _cpp_release_buff (pfile, buff)
2046*c87b03e5Sespie cpp_reader *pfile;
2047*c87b03e5Sespie _cpp_buff *buff;
2048*c87b03e5Sespie {
2049*c87b03e5Sespie _cpp_buff *end = buff;
2050*c87b03e5Sespie
2051*c87b03e5Sespie while (end->next)
2052*c87b03e5Sespie end = end->next;
2053*c87b03e5Sespie end->next = pfile->free_buffs;
2054*c87b03e5Sespie pfile->free_buffs = buff;
2055*c87b03e5Sespie }
2056*c87b03e5Sespie
2057*c87b03e5Sespie /* Return a free buffer of size at least MIN_SIZE. */
2058*c87b03e5Sespie _cpp_buff *
_cpp_get_buff(pfile,min_size)2059*c87b03e5Sespie _cpp_get_buff (pfile, min_size)
2060*c87b03e5Sespie cpp_reader *pfile;
2061*c87b03e5Sespie size_t min_size;
2062*c87b03e5Sespie {
2063*c87b03e5Sespie _cpp_buff *result, **p;
2064*c87b03e5Sespie
2065*c87b03e5Sespie for (p = &pfile->free_buffs;; p = &(*p)->next)
2066*c87b03e5Sespie {
2067*c87b03e5Sespie size_t size;
2068*c87b03e5Sespie
2069*c87b03e5Sespie if (*p == NULL)
2070*c87b03e5Sespie return new_buff (min_size);
2071*c87b03e5Sespie result = *p;
2072*c87b03e5Sespie size = result->limit - result->base;
2073*c87b03e5Sespie /* Return a buffer that's big enough, but don't waste one that's
2074*c87b03e5Sespie way too big. */
2075*c87b03e5Sespie if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2076*c87b03e5Sespie break;
2077*c87b03e5Sespie }
2078*c87b03e5Sespie
2079*c87b03e5Sespie *p = result->next;
2080*c87b03e5Sespie result->next = NULL;
2081*c87b03e5Sespie result->cur = result->base;
2082*c87b03e5Sespie return result;
2083*c87b03e5Sespie }
2084*c87b03e5Sespie
2085*c87b03e5Sespie /* Creates a new buffer with enough space to hold the uncommitted
2086*c87b03e5Sespie remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2087*c87b03e5Sespie the excess bytes to the new buffer. Chains the new buffer after
2088*c87b03e5Sespie BUFF, and returns the new buffer. */
2089*c87b03e5Sespie _cpp_buff *
_cpp_append_extend_buff(pfile,buff,min_extra)2090*c87b03e5Sespie _cpp_append_extend_buff (pfile, buff, min_extra)
2091*c87b03e5Sespie cpp_reader *pfile;
2092*c87b03e5Sespie _cpp_buff *buff;
2093*c87b03e5Sespie size_t min_extra;
2094*c87b03e5Sespie {
2095*c87b03e5Sespie size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2096*c87b03e5Sespie _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2097*c87b03e5Sespie
2098*c87b03e5Sespie buff->next = new_buff;
2099*c87b03e5Sespie memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2100*c87b03e5Sespie return new_buff;
2101*c87b03e5Sespie }
2102*c87b03e5Sespie
2103*c87b03e5Sespie /* Creates a new buffer with enough space to hold the uncommitted
2104*c87b03e5Sespie remaining bytes of the buffer pointed to by BUFF, and at least
2105*c87b03e5Sespie MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2106*c87b03e5Sespie Chains the new buffer before the buffer pointed to by BUFF, and
2107*c87b03e5Sespie updates the pointer to point to the new buffer. */
2108*c87b03e5Sespie void
_cpp_extend_buff(pfile,pbuff,min_extra)2109*c87b03e5Sespie _cpp_extend_buff (pfile, pbuff, min_extra)
2110*c87b03e5Sespie cpp_reader *pfile;
2111*c87b03e5Sespie _cpp_buff **pbuff;
2112*c87b03e5Sespie size_t min_extra;
2113*c87b03e5Sespie {
2114*c87b03e5Sespie _cpp_buff *new_buff, *old_buff = *pbuff;
2115*c87b03e5Sespie size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2116*c87b03e5Sespie
2117*c87b03e5Sespie new_buff = _cpp_get_buff (pfile, size);
2118*c87b03e5Sespie memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2119*c87b03e5Sespie new_buff->next = old_buff;
2120*c87b03e5Sespie *pbuff = new_buff;
2121*c87b03e5Sespie }
2122*c87b03e5Sespie
2123*c87b03e5Sespie /* Free a chain of buffers starting at BUFF. */
2124*c87b03e5Sespie void
_cpp_free_buff(buff)2125*c87b03e5Sespie _cpp_free_buff (buff)
2126*c87b03e5Sespie _cpp_buff *buff;
2127*c87b03e5Sespie {
2128*c87b03e5Sespie _cpp_buff *next;
2129*c87b03e5Sespie
2130*c87b03e5Sespie for (; buff; buff = next)
2131*c87b03e5Sespie {
2132*c87b03e5Sespie next = buff->next;
2133*c87b03e5Sespie free (buff->base);
2134*c87b03e5Sespie }
2135*c87b03e5Sespie }
2136*c87b03e5Sespie
2137*c87b03e5Sespie /* Allocate permanent, unaligned storage of length LEN. */
2138*c87b03e5Sespie unsigned char *
_cpp_unaligned_alloc(pfile,len)2139*c87b03e5Sespie _cpp_unaligned_alloc (pfile, len)
2140*c87b03e5Sespie cpp_reader *pfile;
2141*c87b03e5Sespie size_t len;
2142*c87b03e5Sespie {
2143*c87b03e5Sespie _cpp_buff *buff = pfile->u_buff;
2144*c87b03e5Sespie unsigned char *result = buff->cur;
2145*c87b03e5Sespie
2146*c87b03e5Sespie if (len > (size_t) (buff->limit - result))
2147*c87b03e5Sespie {
2148*c87b03e5Sespie buff = _cpp_get_buff (pfile, len);
2149*c87b03e5Sespie buff->next = pfile->u_buff;
2150*c87b03e5Sespie pfile->u_buff = buff;
2151*c87b03e5Sespie result = buff->cur;
2152*c87b03e5Sespie }
2153*c87b03e5Sespie
2154*c87b03e5Sespie buff->cur = result + len;
2155*c87b03e5Sespie return result;
2156*c87b03e5Sespie }
2157*c87b03e5Sespie
2158*c87b03e5Sespie /* Allocate permanent, unaligned storage of length LEN from a_buff.
2159*c87b03e5Sespie That buffer is used for growing allocations when saving macro
2160*c87b03e5Sespie replacement lists in a #define, and when parsing an answer to an
2161*c87b03e5Sespie assertion in #assert, #unassert or #if (and therefore possibly
2162*c87b03e5Sespie whilst expanding macros). It therefore must not be used by any
2163*c87b03e5Sespie code that they might call: specifically the lexer and the guts of
2164*c87b03e5Sespie the macro expander.
2165*c87b03e5Sespie
2166*c87b03e5Sespie All existing other uses clearly fit this restriction: storing
2167*c87b03e5Sespie registered pragmas during initialization. */
2168*c87b03e5Sespie unsigned char *
_cpp_aligned_alloc(pfile,len)2169*c87b03e5Sespie _cpp_aligned_alloc (pfile, len)
2170*c87b03e5Sespie cpp_reader *pfile;
2171*c87b03e5Sespie size_t len;
2172*c87b03e5Sespie {
2173*c87b03e5Sespie _cpp_buff *buff = pfile->a_buff;
2174*c87b03e5Sespie unsigned char *result = buff->cur;
2175*c87b03e5Sespie
2176*c87b03e5Sespie if (len > (size_t) (buff->limit - result))
2177*c87b03e5Sespie {
2178*c87b03e5Sespie buff = _cpp_get_buff (pfile, len);
2179*c87b03e5Sespie buff->next = pfile->a_buff;
2180*c87b03e5Sespie pfile->a_buff = buff;
2181*c87b03e5Sespie result = buff->cur;
2182*c87b03e5Sespie }
2183*c87b03e5Sespie
2184*c87b03e5Sespie buff->cur = result + len;
2185*c87b03e5Sespie return result;
2186*c87b03e5Sespie }
2187