1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26
27 enum spell_type
28 {
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
33 };
34
35 struct token_spelling
36 {
37 enum spell_type category;
38 const unsigned char *name;
39 };
40
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { SPELL_ ## s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
63
64 static _cpp_buff *new_buff (size_t);
65
66
67 /* Utility routine:
68
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
cpp_ideq(const cpp_token * token,const char * string)72 cpp_ideq (const cpp_token *token, const char *string)
73 {
74 if (token->type != CPP_NAME)
75 return 0;
76
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78 }
79
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
add_line_note(cpp_buffer * buffer,const uchar * pos,unsigned int type)83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84 {
85 if (buffer->notes_used == buffer->notes_cap)
86 {
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
90 }
91
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
95 }
96
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
_cpp_clean_line(cpp_reader * pfile)100 _cpp_clean_line (cpp_reader *pfile)
101 {
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
105
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
111
112 if (!buffer->from_stage3)
113 {
114 /* Short circuit for the common case of an un-escaped line with
115 no trigraphs. The primary win here is by not writing any
116 data back to memory until we have to. */
117 for (;;)
118 {
119 c = *++s;
120 if (c == '\n' || c == '\r')
121 {
122 d = (uchar *) s;
123
124 if (s == buffer->rlimit)
125 goto done;
126
127 /* DOS line ending? */
128 if (c == '\r' && s[1] == '\n')
129 s++;
130
131 if (s == buffer->rlimit)
132 goto done;
133
134 /* check for escaped newline */
135 p = d;
136 while (p != buffer->next_line && is_nvspace (p[-1]))
137 p--;
138 if (p == buffer->next_line || p[-1] != '\\')
139 goto done;
140
141 /* Have an escaped newline; process it and proceed to
142 the slow path. */
143 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144 d = p - 2;
145 buffer->next_line = p - 1;
146 break;
147 }
148 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149 {
150 /* Have a trigraph. We may or may not have to convert
151 it. Add a line note regardless, for -Wtrigraphs. */
152 add_line_note (buffer, s, s[2]);
153 if (CPP_OPTION (pfile, trigraphs))
154 {
155 /* We do, and that means we have to switch to the
156 slow path. */
157 d = (uchar *) s;
158 *d = _cpp_trigraph_map[s[2]];
159 s += 2;
160 break;
161 }
162 }
163 }
164
165
166 for (;;)
167 {
168 c = *++s;
169 *++d = c;
170
171 if (c == '\n' || c == '\r')
172 {
173 /* Handle DOS line endings. */
174 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
176 if (s == buffer->rlimit)
177 break;
178
179 /* Escaped? */
180 p = d;
181 while (p != buffer->next_line && is_nvspace (p[-1]))
182 p--;
183 if (p == buffer->next_line || p[-1] != '\\')
184 break;
185
186 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187 d = p - 2;
188 buffer->next_line = p - 1;
189 }
190 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191 {
192 /* Add a note regardless, for the benefit of -Wtrigraphs. */
193 add_line_note (buffer, d, s[2]);
194 if (CPP_OPTION (pfile, trigraphs))
195 {
196 *d = _cpp_trigraph_map[s[2]];
197 s += 2;
198 }
199 }
200 }
201 }
202 else
203 {
204 do
205 s++;
206 while (*s != '\n' && *s != '\r');
207 d = (uchar *) s;
208
209 /* Handle DOS line endings. */
210 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211 s++;
212 }
213
214 done:
215 *d = '\n';
216 /* A sentinel note that should never be processed. */
217 add_line_note (buffer, d + 1, '\n');
218 buffer->next_line = s + 1;
219 }
220
221 /* Return true if the trigraph indicated by NOTE should be warned
222 about in a comment. */
223 static bool
warn_in_comment(cpp_reader * pfile,_cpp_line_note * note)224 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
225 {
226 const uchar *p;
227
228 /* Within comments we don't warn about trigraphs, unless the
229 trigraph forms an escaped newline, as that may change
230 behavior. */
231 if (note->type != '/')
232 return false;
233
234 /* If -trigraphs, then this was an escaped newline iff the next note
235 is coincident. */
236 if (CPP_OPTION (pfile, trigraphs))
237 return note[1].pos == note->pos;
238
239 /* Otherwise, see if this forms an escaped newline. */
240 p = note->pos + 3;
241 while (is_nvspace (*p))
242 p++;
243
244 /* There might have been escaped newlines between the trigraph and the
245 newline we found. Hence the position test. */
246 return (*p == '\n' && p < note[1].pos);
247 }
248
249 /* Process the notes created by add_line_note as far as the current
250 location. */
251 void
_cpp_process_line_notes(cpp_reader * pfile,int in_comment)252 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
253 {
254 cpp_buffer *buffer = pfile->buffer;
255
256 for (;;)
257 {
258 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259 unsigned int col;
260
261 if (note->pos > buffer->cur)
262 break;
263
264 buffer->cur_note++;
265 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
266
267 if (note->type == '\\' || note->type == ' ')
268 {
269 if (note->type == ' ' && !in_comment)
270 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
271 "backslash and newline separated by space");
272
273 if (buffer->next_line > buffer->rlimit)
274 {
275 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
276 "backslash-newline at end of file");
277 /* Prevent "no newline at end of file" warning. */
278 buffer->next_line = buffer->rlimit;
279 }
280
281 buffer->line_base = note->pos;
282 CPP_INCREMENT_LINE (pfile, 0);
283 }
284 else if (_cpp_trigraph_map[note->type])
285 {
286 if (CPP_OPTION (pfile, warn_trigraphs)
287 && (!in_comment || warn_in_comment (pfile, note)))
288 {
289 if (CPP_OPTION (pfile, trigraphs))
290 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
291 "trigraph ??%c converted to %c",
292 note->type,
293 (int) _cpp_trigraph_map[note->type]);
294 else
295 {
296 cpp_error_with_line
297 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
298 "trigraph ??%c ignored, use -trigraphs to enable",
299 note->type);
300 }
301 }
302 }
303 else
304 abort ();
305 }
306 }
307
308 /* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 nonzero if comment terminated by EOF, zero otherwise.
311
312 Buffer->cur points to the initial asterisk of the comment. */
313 bool
_cpp_skip_block_comment(cpp_reader * pfile)314 _cpp_skip_block_comment (cpp_reader *pfile)
315 {
316 cpp_buffer *buffer = pfile->buffer;
317 const uchar *cur = buffer->cur;
318 uchar c;
319
320 cur++;
321 if (*cur == '/')
322 cur++;
323
324 for (;;)
325 {
326 /* People like decorating comments with '*', so check for '/'
327 instead for efficiency. */
328 c = *cur++;
329
330 if (c == '/')
331 {
332 if (cur[-2] == '*')
333 break;
334
335 /* Warn about potential nested comments, but not if the '/'
336 comes immediately before the true comment delimiter.
337 Don't bother to get it right across escaped newlines. */
338 if (CPP_OPTION (pfile, warn_comments)
339 && cur[0] == '*' && cur[1] != '/')
340 {
341 buffer->cur = cur;
342 cpp_error_with_line (pfile, CPP_DL_WARNING,
343 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
344 "\"/*\" within comment");
345 }
346 }
347 else if (c == '\n')
348 {
349 unsigned int cols;
350 buffer->cur = cur - 1;
351 _cpp_process_line_notes (pfile, true);
352 if (buffer->next_line >= buffer->rlimit)
353 return true;
354 _cpp_clean_line (pfile);
355
356 cols = buffer->next_line - buffer->line_base;
357 CPP_INCREMENT_LINE (pfile, cols);
358
359 cur = buffer->cur;
360 }
361 }
362
363 buffer->cur = cur;
364 _cpp_process_line_notes (pfile, true);
365 return false;
366 }
367
368 /* Skip a C++ line comment, leaving buffer->cur pointing to the
369 terminating newline. Handles escaped newlines. Returns nonzero
370 if a multiline comment. */
371 static int
skip_line_comment(cpp_reader * pfile)372 skip_line_comment (cpp_reader *pfile)
373 {
374 cpp_buffer *buffer = pfile->buffer;
375 unsigned int orig_line = pfile->line_table->highest_line;
376
377 while (*buffer->cur != '\n')
378 buffer->cur++;
379
380 _cpp_process_line_notes (pfile, true);
381 return orig_line != pfile->line_table->highest_line;
382 }
383
384 /* Skips whitespace, saving the next non-whitespace character. */
385 static void
skip_whitespace(cpp_reader * pfile,cppchar_t c)386 skip_whitespace (cpp_reader *pfile, cppchar_t c)
387 {
388 cpp_buffer *buffer = pfile->buffer;
389 bool saw_NUL = false;
390
391 do
392 {
393 /* Horizontal space always OK. */
394 if (c == ' ' || c == '\t')
395 ;
396 /* Just \f \v or \0 left. */
397 else if (c == '\0')
398 saw_NUL = true;
399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
401 CPP_BUF_COL (buffer),
402 "%s in preprocessing directive",
403 c == '\f' ? "form feed" : "vertical tab");
404
405 c = *buffer->cur++;
406 }
407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
408 while (is_nvspace (c));
409
410 if (saw_NUL)
411 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
412
413 buffer->cur--;
414 }
415
416 /* See if the characters of a number token are valid in a name (no
417 '.', '+' or '-'). */
418 static int
name_p(cpp_reader * pfile,const cpp_string * string)419 name_p (cpp_reader *pfile, const cpp_string *string)
420 {
421 unsigned int i;
422
423 for (i = 0; i < string->len; i++)
424 if (!is_idchar (string->text[i]))
425 return 0;
426
427 return 1;
428 }
429
430 /* After parsing an identifier or other sequence, produce a warning about
431 sequences not in NFC/NFKC. */
432 static void
warn_about_normalization(cpp_reader * pfile,const cpp_token * token,const struct normalize_state * s)433 warn_about_normalization (cpp_reader *pfile,
434 const cpp_token *token,
435 const struct normalize_state *s)
436 {
437 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438 && !pfile->state.skipping)
439 {
440 /* Make sure that the token is printed using UCNs, even
441 if we'd otherwise happily print UTF-8. */
442 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
443 size_t sz;
444
445 sz = cpp_spell_token (pfile, token, buf, false) - buf;
446 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
448 "`%.*s' is not in NFKC", (int) sz, buf);
449 else
450 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
451 "`%.*s' is not in NFC", (int) sz, buf);
452 }
453 }
454
455 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
456 an identifier. FIRST is TRUE if this starts an identifier. */
457 static bool
forms_identifier_p(cpp_reader * pfile,int first,struct normalize_state * state)458 forms_identifier_p (cpp_reader *pfile, int first,
459 struct normalize_state *state)
460 {
461 cpp_buffer *buffer = pfile->buffer;
462
463 if (*buffer->cur == '$')
464 {
465 if (!CPP_OPTION (pfile, dollars_in_ident))
466 return false;
467
468 buffer->cur++;
469 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
470 {
471 CPP_OPTION (pfile, warn_dollars) = 0;
472 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
473 }
474
475 return true;
476 }
477
478 /* Is this a syntactically valid UCN? */
479 if (CPP_OPTION (pfile, extended_identifiers)
480 && *buffer->cur == '\\'
481 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
482 {
483 buffer->cur += 2;
484 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485 state))
486 return true;
487 buffer->cur -= 2;
488 }
489
490 return false;
491 }
492
493 /* Lex an identifier starting at BUFFER->CUR - 1. */
494 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * base,bool starts_ucn,struct normalize_state * nst)495 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496 struct normalize_state *nst)
497 {
498 cpp_hashnode *result;
499 const uchar *cur;
500 unsigned int len;
501 unsigned int hash = HT_HASHSTEP (0, *base);
502
503 cur = pfile->buffer->cur;
504 if (! starts_ucn)
505 while (ISIDNUM (*cur))
506 {
507 hash = HT_HASHSTEP (hash, *cur);
508 cur++;
509 }
510 pfile->buffer->cur = cur;
511 if (starts_ucn || forms_identifier_p (pfile, false, nst))
512 {
513 /* Slower version for identifiers containing UCNs (or $). */
514 do {
515 while (ISIDNUM (*pfile->buffer->cur))
516 {
517 pfile->buffer->cur++;
518 NORMALIZE_STATE_UPDATE_IDNUM (nst);
519 }
520 } while (forms_identifier_p (pfile, false, nst));
521 result = _cpp_interpret_identifier (pfile, base,
522 pfile->buffer->cur - base);
523 }
524 else
525 {
526 len = cur - base;
527 hash = HT_HASHFINISH (hash, len);
528
529 result = (cpp_hashnode *)
530 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531 }
532
533 /* Rarely, identifiers require diagnostics when lexed. */
534 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535 && !pfile->state.skipping, 0))
536 {
537 /* It is allowed to poison the same identifier twice. */
538 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
539 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
540 NODE_NAME (result));
541
542 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543 replacement list of a variadic macro. */
544 if (result == pfile->spec_nodes.n__VA_ARGS__
545 && !pfile->state.va_args_ok)
546 cpp_error (pfile, CPP_DL_PEDWARN,
547 "__VA_ARGS__ can only appear in the expansion"
548 " of a C99 variadic macro");
549 }
550
551 return result;
552 }
553
554 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
555 static void
lex_number(cpp_reader * pfile,cpp_string * number,struct normalize_state * nst)556 lex_number (cpp_reader *pfile, cpp_string *number,
557 struct normalize_state *nst)
558 {
559 const uchar *cur;
560 const uchar *base;
561 uchar *dest;
562
563 base = pfile->buffer->cur - 1;
564 do
565 {
566 cur = pfile->buffer->cur;
567
568 /* N.B. ISIDNUM does not include $. */
569 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
570 {
571 cur++;
572 NORMALIZE_STATE_UPDATE_IDNUM (nst);
573 }
574
575 pfile->buffer->cur = cur;
576 }
577 while (forms_identifier_p (pfile, false, nst));
578
579 number->len = cur - base;
580 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581 memcpy (dest, base, number->len);
582 dest[number->len] = '\0';
583 number->text = dest;
584 }
585
586 /* Create a token of type TYPE with a literal spelling. */
587 static void
create_literal(cpp_reader * pfile,cpp_token * token,const uchar * base,unsigned int len,enum cpp_ttype type)588 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589 unsigned int len, enum cpp_ttype type)
590 {
591 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
592
593 memcpy (dest, base, len);
594 dest[len] = '\0';
595 token->type = type;
596 token->val.str.len = len;
597 token->val.str.text = dest;
598 }
599
600 /* Lexes a string, character constant, or angle-bracketed header file
601 name. The stored string contains the spelling, including opening
602 quote and leading any leading 'L'. It returns the type of the
603 literal, or CPP_OTHER if it was not properly terminated.
604
605 The spelling is NUL-terminated, but it is not guaranteed that this
606 is the first NUL since embedded NULs are preserved. */
607 static void
lex_string(cpp_reader * pfile,cpp_token * token,const uchar * base)608 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
609 {
610 bool saw_NUL = false;
611 const uchar *cur;
612 cppchar_t terminator;
613 enum cpp_ttype type;
614
615 cur = base;
616 terminator = *cur++;
617 if (terminator == 'L')
618 terminator = *cur++;
619 if (terminator == '\"')
620 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621 else if (terminator == '\'')
622 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623 else
624 terminator = '>', type = CPP_HEADER_NAME;
625
626 for (;;)
627 {
628 cppchar_t c = *cur++;
629
630 /* In #include-style directives, terminators are not escapable. */
631 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632 cur++;
633 else if (c == terminator)
634 break;
635 else if (c == '\n')
636 {
637 cur--;
638 type = CPP_OTHER;
639 break;
640 }
641 else if (c == '\0')
642 saw_NUL = true;
643 }
644
645 if (saw_NUL && !pfile->state.skipping)
646 cpp_error (pfile, CPP_DL_WARNING,
647 "null character(s) preserved in literal");
648
649 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
650 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
651 (int) terminator);
652
653 pfile->buffer->cur = cur;
654 create_literal (pfile, token, base, cur - base, type);
655 }
656
657 /* The stored comment includes the comment start and any terminator. */
658 static void
save_comment(cpp_reader * pfile,cpp_token * token,const unsigned char * from,cppchar_t type)659 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
660 cppchar_t type)
661 {
662 unsigned char *buffer;
663 unsigned int len, clen;
664
665 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
666
667 /* C++ comments probably (not definitely) have moved past a new
668 line, which we don't want to save in the comment. */
669 if (is_vspace (pfile->buffer->cur[-1]))
670 len--;
671
672 /* If we are currently in a directive, then we need to store all
673 C++ comments as C comments internally, and so we need to
674 allocate a little extra space in that case.
675
676 Note that the only time we encounter a directive here is
677 when we are saving comments in a "#define". */
678 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
679
680 buffer = _cpp_unaligned_alloc (pfile, clen);
681
682 token->type = CPP_COMMENT;
683 token->val.str.len = clen;
684 token->val.str.text = buffer;
685
686 buffer[0] = '/';
687 memcpy (buffer + 1, from, len - 1);
688
689 /* Finish conversion to a C comment, if necessary. */
690 if (pfile->state.in_directive && type == '/')
691 {
692 buffer[1] = '*';
693 buffer[clen - 2] = '*';
694 buffer[clen - 1] = '/';
695 }
696 }
697
698 /* Allocate COUNT tokens for RUN. */
699 void
_cpp_init_tokenrun(tokenrun * run,unsigned int count)700 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
701 {
702 run->base = XNEWVEC (cpp_token, count);
703 run->limit = run->base + count;
704 run->next = NULL;
705 }
706
707 /* Returns the next tokenrun, or creates one if there is none. */
708 static tokenrun *
next_tokenrun(tokenrun * run)709 next_tokenrun (tokenrun *run)
710 {
711 if (run->next == NULL)
712 {
713 run->next = XNEW (tokenrun);
714 run->next->prev = run;
715 _cpp_init_tokenrun (run->next, 250);
716 }
717
718 return run->next;
719 }
720
721 /* Allocate a single token that is invalidated at the same time as the
722 rest of the tokens on the line. Has its line and col set to the
723 same as the last lexed token, so that diagnostics appear in the
724 right place. */
725 cpp_token *
_cpp_temp_token(cpp_reader * pfile)726 _cpp_temp_token (cpp_reader *pfile)
727 {
728 cpp_token *old, *result;
729
730 old = pfile->cur_token - 1;
731 if (pfile->cur_token == pfile->cur_run->limit)
732 {
733 pfile->cur_run = next_tokenrun (pfile->cur_run);
734 pfile->cur_token = pfile->cur_run->base;
735 }
736
737 result = pfile->cur_token++;
738 result->src_loc = old->src_loc;
739 return result;
740 }
741
742 /* Lex a token into RESULT (external interface). Takes care of issues
743 like directive handling, token lookahead, multiple include
744 optimization and skipping. */
745 const cpp_token *
_cpp_lex_token(cpp_reader * pfile)746 _cpp_lex_token (cpp_reader *pfile)
747 {
748 cpp_token *result;
749
750 for (;;)
751 {
752 if (pfile->cur_token == pfile->cur_run->limit)
753 {
754 pfile->cur_run = next_tokenrun (pfile->cur_run);
755 pfile->cur_token = pfile->cur_run->base;
756 }
757
758 if (pfile->lookaheads)
759 {
760 pfile->lookaheads--;
761 result = pfile->cur_token++;
762 }
763 else
764 result = _cpp_lex_direct (pfile);
765
766 if (result->flags & BOL)
767 {
768 /* Is this a directive. If _cpp_handle_directive returns
769 false, it is an assembler #. */
770 if (result->type == CPP_HASH
771 /* 6.10.3 p 11: Directives in a list of macro arguments
772 gives undefined behavior. This implementation
773 handles the directive as normal. */
774 && pfile->state.parsing_args != 1)
775 {
776 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
777 {
778 if (pfile->directive_result.type == CPP_PADDING)
779 continue;
780 result = &pfile->directive_result;
781 }
782 }
783 else if (pfile->state.in_deferred_pragma)
784 result = &pfile->directive_result;
785
786 if (pfile->cb.line_change && !pfile->state.skipping)
787 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
788 }
789
790 /* We don't skip tokens in directives. */
791 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
792 break;
793
794 /* Outside a directive, invalidate controlling macros. At file
795 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
796 get here and MI optimization works. */
797 pfile->mi_valid = false;
798
799 if (!pfile->state.skipping || result->type == CPP_EOF)
800 break;
801 }
802
803 return result;
804 }
805
806 /* Returns true if a fresh line has been loaded. */
807 bool
_cpp_get_fresh_line(cpp_reader * pfile)808 _cpp_get_fresh_line (cpp_reader *pfile)
809 {
810 int return_at_eof;
811
812 /* We can't get a new line until we leave the current directive. */
813 if (pfile->state.in_directive)
814 return false;
815
816 for (;;)
817 {
818 cpp_buffer *buffer = pfile->buffer;
819
820 if (!buffer->need_line)
821 return true;
822
823 if (buffer->next_line < buffer->rlimit)
824 {
825 _cpp_clean_line (pfile);
826 return true;
827 }
828
829 /* First, get out of parsing arguments state. */
830 if (pfile->state.parsing_args)
831 return false;
832
833 /* End of buffer. Non-empty files should end in a newline. */
834 if (buffer->buf != buffer->rlimit
835 && buffer->next_line > buffer->rlimit
836 && !buffer->from_stage3)
837 {
838 /* Only warn once. */
839 buffer->next_line = buffer->rlimit;
840 }
841
842 return_at_eof = buffer->return_at_eof;
843 _cpp_pop_buffer (pfile);
844 if (pfile->buffer == NULL || return_at_eof)
845 return false;
846 }
847 }
848
849 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
850 do \
851 { \
852 result->type = ELSE_TYPE; \
853 if (*buffer->cur == CHAR) \
854 buffer->cur++, result->type = THEN_TYPE; \
855 } \
856 while (0)
857
858 /* Lex a token into pfile->cur_token, which is also incremented, to
859 get diagnostics pointing to the correct location.
860
861 Does not handle issues such as token lookahead, multiple-include
862 optimization, directives, skipping etc. This function is only
863 suitable for use by _cpp_lex_token, and in special cases like
864 lex_expansion_token which doesn't care for any of these issues.
865
866 When meeting a newline, returns CPP_EOF if parsing a directive,
867 otherwise returns to the start of the token buffer if permissible.
868 Returns the location of the lexed token. */
869 cpp_token *
_cpp_lex_direct(cpp_reader * pfile)870 _cpp_lex_direct (cpp_reader *pfile)
871 {
872 cppchar_t c;
873 cpp_buffer *buffer;
874 const unsigned char *comment_start;
875 cpp_token *result = pfile->cur_token++;
876
877 fresh_line:
878 result->flags = 0;
879 buffer = pfile->buffer;
880 if (buffer->need_line)
881 {
882 if (pfile->state.in_deferred_pragma)
883 {
884 result->type = CPP_PRAGMA_EOL;
885 pfile->state.in_deferred_pragma = false;
886 if (!pfile->state.pragma_allow_expansion)
887 pfile->state.prevent_expansion--;
888 return result;
889 }
890 if (!_cpp_get_fresh_line (pfile))
891 {
892 result->type = CPP_EOF;
893 if (!pfile->state.in_directive)
894 {
895 /* Tell the compiler the line number of the EOF token. */
896 result->src_loc = pfile->line_table->highest_line;
897 result->flags = BOL;
898 }
899 return result;
900 }
901 if (!pfile->keep_tokens)
902 {
903 pfile->cur_run = &pfile->base_run;
904 result = pfile->base_run.base;
905 pfile->cur_token = result + 1;
906 }
907 result->flags = BOL;
908 if (pfile->state.parsing_args == 2)
909 result->flags |= PREV_WHITE;
910 }
911 buffer = pfile->buffer;
912 update_tokens_line:
913 result->src_loc = pfile->line_table->highest_line;
914
915 skipped_white:
916 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
917 && !pfile->overlaid_buffer)
918 {
919 _cpp_process_line_notes (pfile, false);
920 result->src_loc = pfile->line_table->highest_line;
921 }
922 c = *buffer->cur++;
923
924 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
925 CPP_BUF_COLUMN (buffer, buffer->cur));
926
927 switch (c)
928 {
929 case ' ': case '\t': case '\f': case '\v': case '\0':
930 result->flags |= PREV_WHITE;
931 skip_whitespace (pfile, c);
932 goto skipped_white;
933
934 case '\n':
935 if (buffer->cur < buffer->rlimit)
936 CPP_INCREMENT_LINE (pfile, 0);
937 buffer->need_line = true;
938 goto fresh_line;
939
940 case '0': case '1': case '2': case '3': case '4':
941 case '5': case '6': case '7': case '8': case '9':
942 {
943 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
944 result->type = CPP_NUMBER;
945 lex_number (pfile, &result->val.str, &nst);
946 warn_about_normalization (pfile, result, &nst);
947 break;
948 }
949
950 case 'L':
951 /* 'L' may introduce wide characters or strings. */
952 if (*buffer->cur == '\'' || *buffer->cur == '"')
953 {
954 lex_string (pfile, result, buffer->cur - 1);
955 break;
956 }
957 /* Fall through. */
958
959 case '_':
960 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
961 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
962 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
963 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
964 case 'y': case 'z':
965 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
966 case 'G': case 'H': case 'I': case 'J': case 'K':
967 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
968 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
969 case 'Y': case 'Z':
970 result->type = CPP_NAME;
971 {
972 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
973 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
974 &nst);
975 warn_about_normalization (pfile, result, &nst);
976 }
977
978 /* Convert named operators to their proper types. */
979 if (result->val.node->flags & NODE_OPERATOR)
980 {
981 result->flags |= NAMED_OP;
982 result->type = (enum cpp_ttype) result->val.node->directive_index;
983 }
984 break;
985
986 case '\'':
987 case '"':
988 lex_string (pfile, result, buffer->cur - 1);
989 break;
990
991 case '/':
992 /* A potential block or line comment. */
993 comment_start = buffer->cur;
994 c = *buffer->cur;
995
996 if (c == '*')
997 {
998 if (_cpp_skip_block_comment (pfile))
999 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1000 }
1001 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1002 || cpp_in_system_header (pfile)))
1003 {
1004 /* Warn about comments only if pedantically GNUC89, and not
1005 in system headers. */
1006 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1007 && ! buffer->warned_cplusplus_comments)
1008 {
1009 cpp_error (pfile, CPP_DL_PEDWARN,
1010 "C++ style comments are not allowed in ISO C90");
1011 cpp_error (pfile, CPP_DL_PEDWARN,
1012 "(this will be reported only once per input file)");
1013 buffer->warned_cplusplus_comments = 1;
1014 }
1015
1016 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1017 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1018 }
1019 else if (c == '=')
1020 {
1021 buffer->cur++;
1022 result->type = CPP_DIV_EQ;
1023 break;
1024 }
1025 else
1026 {
1027 result->type = CPP_DIV;
1028 break;
1029 }
1030
1031 if (!pfile->state.save_comments)
1032 {
1033 result->flags |= PREV_WHITE;
1034 goto update_tokens_line;
1035 }
1036
1037 /* Save the comment as a token in its own right. */
1038 save_comment (pfile, result, comment_start, c);
1039 break;
1040
1041 case '<':
1042 if (pfile->state.angled_headers)
1043 {
1044 lex_string (pfile, result, buffer->cur - 1);
1045 break;
1046 }
1047
1048 result->type = CPP_LESS;
1049 if (*buffer->cur == '=')
1050 buffer->cur++, result->type = CPP_LESS_EQ;
1051 else if (*buffer->cur == '<')
1052 {
1053 buffer->cur++;
1054 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1055 }
1056 else if (CPP_OPTION (pfile, digraphs))
1057 {
1058 if (*buffer->cur == ':')
1059 {
1060 buffer->cur++;
1061 result->flags |= DIGRAPH;
1062 result->type = CPP_OPEN_SQUARE;
1063 }
1064 else if (*buffer->cur == '%')
1065 {
1066 buffer->cur++;
1067 result->flags |= DIGRAPH;
1068 result->type = CPP_OPEN_BRACE;
1069 }
1070 }
1071 break;
1072
1073 case '>':
1074 result->type = CPP_GREATER;
1075 if (*buffer->cur == '=')
1076 buffer->cur++, result->type = CPP_GREATER_EQ;
1077 else if (*buffer->cur == '>')
1078 {
1079 buffer->cur++;
1080 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1081 }
1082 break;
1083
1084 case '%':
1085 result->type = CPP_MOD;
1086 if (*buffer->cur == '=')
1087 buffer->cur++, result->type = CPP_MOD_EQ;
1088 else if (CPP_OPTION (pfile, digraphs))
1089 {
1090 if (*buffer->cur == ':')
1091 {
1092 buffer->cur++;
1093 result->flags |= DIGRAPH;
1094 result->type = CPP_HASH;
1095 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1096 buffer->cur += 2, result->type = CPP_PASTE;
1097 }
1098 else if (*buffer->cur == '>')
1099 {
1100 buffer->cur++;
1101 result->flags |= DIGRAPH;
1102 result->type = CPP_CLOSE_BRACE;
1103 }
1104 }
1105 break;
1106
1107 case '.':
1108 result->type = CPP_DOT;
1109 if (ISDIGIT (*buffer->cur))
1110 {
1111 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1112 result->type = CPP_NUMBER;
1113 lex_number (pfile, &result->val.str, &nst);
1114 warn_about_normalization (pfile, result, &nst);
1115 }
1116 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1117 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1118 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1119 buffer->cur++, result->type = CPP_DOT_STAR;
1120 break;
1121
1122 case '+':
1123 result->type = CPP_PLUS;
1124 if (*buffer->cur == '+')
1125 buffer->cur++, result->type = CPP_PLUS_PLUS;
1126 else if (*buffer->cur == '=')
1127 buffer->cur++, result->type = CPP_PLUS_EQ;
1128 break;
1129
1130 case '-':
1131 result->type = CPP_MINUS;
1132 if (*buffer->cur == '>')
1133 {
1134 buffer->cur++;
1135 result->type = CPP_DEREF;
1136 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1137 buffer->cur++, result->type = CPP_DEREF_STAR;
1138 }
1139 else if (*buffer->cur == '-')
1140 buffer->cur++, result->type = CPP_MINUS_MINUS;
1141 else if (*buffer->cur == '=')
1142 buffer->cur++, result->type = CPP_MINUS_EQ;
1143 break;
1144
1145 case '&':
1146 result->type = CPP_AND;
1147 if (*buffer->cur == '&')
1148 buffer->cur++, result->type = CPP_AND_AND;
1149 else if (*buffer->cur == '=')
1150 buffer->cur++, result->type = CPP_AND_EQ;
1151 break;
1152
1153 case '|':
1154 result->type = CPP_OR;
1155 if (*buffer->cur == '|')
1156 buffer->cur++, result->type = CPP_OR_OR;
1157 else if (*buffer->cur == '=')
1158 buffer->cur++, result->type = CPP_OR_EQ;
1159 break;
1160
1161 case ':':
1162 result->type = CPP_COLON;
1163 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1164 buffer->cur++, result->type = CPP_SCOPE;
1165 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1166 {
1167 buffer->cur++;
1168 result->flags |= DIGRAPH;
1169 result->type = CPP_CLOSE_SQUARE;
1170 }
1171 break;
1172
1173 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1174 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1175 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1176 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1177 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1178
1179 case '?': result->type = CPP_QUERY; break;
1180 case '~': result->type = CPP_COMPL; break;
1181 case ',': result->type = CPP_COMMA; break;
1182 case '(': result->type = CPP_OPEN_PAREN; break;
1183 case ')': result->type = CPP_CLOSE_PAREN; break;
1184 case '[': result->type = CPP_OPEN_SQUARE; break;
1185 case ']': result->type = CPP_CLOSE_SQUARE; break;
1186 case '{': result->type = CPP_OPEN_BRACE; break;
1187 case '}': result->type = CPP_CLOSE_BRACE; break;
1188 case ';': result->type = CPP_SEMICOLON; break;
1189
1190 /* @ is a punctuator in Objective-C. */
1191 case '@': result->type = CPP_ATSIGN; break;
1192
1193 case '$':
1194 case '\\':
1195 {
1196 const uchar *base = --buffer->cur;
1197 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1198
1199 if (forms_identifier_p (pfile, true, &nst))
1200 {
1201 result->type = CPP_NAME;
1202 result->val.node = lex_identifier (pfile, base, true, &nst);
1203 warn_about_normalization (pfile, result, &nst);
1204 break;
1205 }
1206 buffer->cur++;
1207 }
1208
1209 default:
1210 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1211 break;
1212 }
1213
1214 return result;
1215 }
1216
1217 /* An upper bound on the number of bytes needed to spell TOKEN.
1218 Does not include preceding whitespace. */
1219 unsigned int
cpp_token_len(const cpp_token * token)1220 cpp_token_len (const cpp_token *token)
1221 {
1222 unsigned int len;
1223
1224 switch (TOKEN_SPELL (token))
1225 {
1226 default: len = 4; break;
1227 case SPELL_LITERAL: len = token->val.str.len; break;
1228 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1229 }
1230
1231 return len;
1232 }
1233
1234 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1235 Return the number of bytes read out of NAME. (There are always
1236 10 bytes written to BUFFER.) */
1237
1238 static size_t
utf8_to_ucn(unsigned char * buffer,const unsigned char * name)1239 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1240 {
1241 int j;
1242 int ucn_len = 0;
1243 int ucn_len_c;
1244 unsigned t;
1245 unsigned long utf32;
1246
1247 /* Compute the length of the UTF-8 sequence. */
1248 for (t = *name; t & 0x80; t <<= 1)
1249 ucn_len++;
1250
1251 utf32 = *name & (0x7F >> ucn_len);
1252 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1253 {
1254 utf32 = (utf32 << 6) | (*++name & 0x3F);
1255
1256 /* Ill-formed UTF-8. */
1257 if ((*name & ~0x3F) != 0x80)
1258 abort ();
1259 }
1260
1261 *buffer++ = '\\';
1262 *buffer++ = 'U';
1263 for (j = 7; j >= 0; j--)
1264 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1265 return ucn_len;
1266 }
1267
1268
1269 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1270 already contain the enough space to hold the token's spelling.
1271 Returns a pointer to the character after the last character written.
1272 FORSTRING is true if this is to be the spelling after translation
1273 phase 1 (this is different for UCNs).
1274 FIXME: Would be nice if we didn't need the PFILE argument. */
1275 unsigned char *
cpp_spell_token(cpp_reader * pfile,const cpp_token * token,unsigned char * buffer,bool forstring)1276 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1277 unsigned char *buffer, bool forstring)
1278 {
1279 switch (TOKEN_SPELL (token))
1280 {
1281 case SPELL_OPERATOR:
1282 {
1283 const unsigned char *spelling;
1284 unsigned char c;
1285
1286 if (token->flags & DIGRAPH)
1287 spelling
1288 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1289 else if (token->flags & NAMED_OP)
1290 goto spell_ident;
1291 else
1292 spelling = TOKEN_NAME (token);
1293
1294 while ((c = *spelling++) != '\0')
1295 *buffer++ = c;
1296 }
1297 break;
1298
1299 spell_ident:
1300 case SPELL_IDENT:
1301 if (forstring)
1302 {
1303 memcpy (buffer, NODE_NAME (token->val.node),
1304 NODE_LEN (token->val.node));
1305 buffer += NODE_LEN (token->val.node);
1306 }
1307 else
1308 {
1309 size_t i;
1310 const unsigned char * name = NODE_NAME (token->val.node);
1311
1312 for (i = 0; i < NODE_LEN (token->val.node); i++)
1313 if (name[i] & ~0x7F)
1314 {
1315 i += utf8_to_ucn (buffer, name + i) - 1;
1316 buffer += 10;
1317 }
1318 else
1319 *buffer++ = NODE_NAME (token->val.node)[i];
1320 }
1321 break;
1322
1323 case SPELL_LITERAL:
1324 memcpy (buffer, token->val.str.text, token->val.str.len);
1325 buffer += token->val.str.len;
1326 break;
1327
1328 case SPELL_NONE:
1329 cpp_error (pfile, CPP_DL_ICE,
1330 "unspellable token %s", TOKEN_NAME (token));
1331 break;
1332 }
1333
1334 return buffer;
1335 }
1336
1337 /* Returns TOKEN spelt as a null-terminated string. The string is
1338 freed when the reader is destroyed. Useful for diagnostics. */
1339 unsigned char *
cpp_token_as_text(cpp_reader * pfile,const cpp_token * token)1340 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1341 {
1342 unsigned int len = cpp_token_len (token) + 1;
1343 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1344
1345 end = cpp_spell_token (pfile, token, start, false);
1346 end[0] = '\0';
1347
1348 return start;
1349 }
1350
1351 /* Used by C front ends, which really should move to using
1352 cpp_token_as_text. */
1353 const char *
cpp_type2name(enum cpp_ttype type)1354 cpp_type2name (enum cpp_ttype type)
1355 {
1356 return (const char *) token_spellings[type].name;
1357 }
1358
1359 /* Writes the spelling of token to FP, without any preceding space.
1360 Separated from cpp_spell_token for efficiency - to avoid stdio
1361 double-buffering. */
1362 void
cpp_output_token(const cpp_token * token,FILE * fp)1363 cpp_output_token (const cpp_token *token, FILE *fp)
1364 {
1365 switch (TOKEN_SPELL (token))
1366 {
1367 case SPELL_OPERATOR:
1368 {
1369 const unsigned char *spelling;
1370 int c;
1371
1372 if (token->flags & DIGRAPH)
1373 spelling
1374 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1375 else if (token->flags & NAMED_OP)
1376 goto spell_ident;
1377 else
1378 spelling = TOKEN_NAME (token);
1379
1380 c = *spelling;
1381 do
1382 putc (c, fp);
1383 while ((c = *++spelling) != '\0');
1384 }
1385 break;
1386
1387 spell_ident:
1388 case SPELL_IDENT:
1389 {
1390 size_t i;
1391 const unsigned char * name = NODE_NAME (token->val.node);
1392
1393 for (i = 0; i < NODE_LEN (token->val.node); i++)
1394 if (name[i] & ~0x7F)
1395 {
1396 unsigned char buffer[10];
1397 i += utf8_to_ucn (buffer, name + i) - 1;
1398 fwrite (buffer, 1, 10, fp);
1399 }
1400 else
1401 fputc (NODE_NAME (token->val.node)[i], fp);
1402 }
1403 break;
1404
1405 case SPELL_LITERAL:
1406 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1407 break;
1408
1409 case SPELL_NONE:
1410 /* An error, most probably. */
1411 break;
1412 }
1413 }
1414
1415 /* Compare two tokens. */
1416 int
_cpp_equiv_tokens(const cpp_token * a,const cpp_token * b)1417 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1418 {
1419 if (a->type == b->type && a->flags == b->flags)
1420 switch (TOKEN_SPELL (a))
1421 {
1422 default: /* Keep compiler happy. */
1423 case SPELL_OPERATOR:
1424 return 1;
1425 case SPELL_NONE:
1426 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1427 case SPELL_IDENT:
1428 return a->val.node == b->val.node;
1429 case SPELL_LITERAL:
1430 return (a->val.str.len == b->val.str.len
1431 && !memcmp (a->val.str.text, b->val.str.text,
1432 a->val.str.len));
1433 }
1434
1435 return 0;
1436 }
1437
1438 /* Returns nonzero if a space should be inserted to avoid an
1439 accidental token paste for output. For simplicity, it is
1440 conservative, and occasionally advises a space where one is not
1441 needed, e.g. "." and ".2". */
1442 int
cpp_avoid_paste(cpp_reader * pfile,const cpp_token * token1,const cpp_token * token2)1443 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1444 const cpp_token *token2)
1445 {
1446 enum cpp_ttype a = token1->type, b = token2->type;
1447 cppchar_t c;
1448
1449 if (token1->flags & NAMED_OP)
1450 a = CPP_NAME;
1451 if (token2->flags & NAMED_OP)
1452 b = CPP_NAME;
1453
1454 c = EOF;
1455 if (token2->flags & DIGRAPH)
1456 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1457 else if (token_spellings[b].category == SPELL_OPERATOR)
1458 c = token_spellings[b].name[0];
1459
1460 /* Quickly get everything that can paste with an '='. */
1461 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1462 return 1;
1463
1464 switch (a)
1465 {
1466 case CPP_GREATER: return c == '>';
1467 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1468 case CPP_PLUS: return c == '+';
1469 case CPP_MINUS: return c == '-' || c == '>';
1470 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1471 case CPP_MOD: return c == ':' || c == '>';
1472 case CPP_AND: return c == '&';
1473 case CPP_OR: return c == '|';
1474 case CPP_COLON: return c == ':' || c == '>';
1475 case CPP_DEREF: return c == '*';
1476 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1477 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1478 case CPP_NAME: return ((b == CPP_NUMBER
1479 && name_p (pfile, &token2->val.str))
1480 || b == CPP_NAME
1481 || b == CPP_CHAR || b == CPP_STRING); /* L */
1482 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1483 || c == '.' || c == '+' || c == '-');
1484 /* UCNs */
1485 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1486 && b == CPP_NAME)
1487 || (CPP_OPTION (pfile, objc)
1488 && token1->val.str.text[0] == '@'
1489 && (b == CPP_NAME || b == CPP_STRING)));
1490 default: break;
1491 }
1492
1493 return 0;
1494 }
1495
1496 /* Output all the remaining tokens on the current line, and a newline
1497 character, to FP. Leading whitespace is removed. If there are
1498 macros, special token padding is not performed. */
1499 void
cpp_output_line(cpp_reader * pfile,FILE * fp)1500 cpp_output_line (cpp_reader *pfile, FILE *fp)
1501 {
1502 const cpp_token *token;
1503
1504 token = cpp_get_token (pfile);
1505 while (token->type != CPP_EOF)
1506 {
1507 cpp_output_token (token, fp);
1508 token = cpp_get_token (pfile);
1509 if (token->flags & PREV_WHITE)
1510 putc (' ', fp);
1511 }
1512
1513 putc ('\n', fp);
1514 }
1515
1516 /* Memory buffers. Changing these three constants can have a dramatic
1517 effect on performance. The values here are reasonable defaults,
1518 but might be tuned. If you adjust them, be sure to test across a
1519 range of uses of cpplib, including heavy nested function-like macro
1520 expansion. Also check the change in peak memory usage (NJAMD is a
1521 good tool for this). */
1522 #define MIN_BUFF_SIZE 8000
1523 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1524 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1525 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1526
1527 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1528 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1529 #endif
1530
1531 /* Create a new allocation buffer. Place the control block at the end
1532 of the buffer, so that buffer overflows will cause immediate chaos. */
1533 static _cpp_buff *
new_buff(size_t len)1534 new_buff (size_t len)
1535 {
1536 _cpp_buff *result;
1537 unsigned char *base;
1538
1539 if (len < MIN_BUFF_SIZE)
1540 len = MIN_BUFF_SIZE;
1541 len = CPP_ALIGN (len);
1542
1543 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1544 result = (_cpp_buff *) (base + len);
1545 result->base = base;
1546 result->cur = base;
1547 result->limit = base + len;
1548 result->next = NULL;
1549 return result;
1550 }
1551
1552 /* Place a chain of unwanted allocation buffers on the free list. */
1553 void
_cpp_release_buff(cpp_reader * pfile,_cpp_buff * buff)1554 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1555 {
1556 _cpp_buff *end = buff;
1557
1558 while (end->next)
1559 end = end->next;
1560 end->next = pfile->free_buffs;
1561 pfile->free_buffs = buff;
1562 }
1563
1564 /* Return a free buffer of size at least MIN_SIZE. */
1565 _cpp_buff *
_cpp_get_buff(cpp_reader * pfile,size_t min_size)1566 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1567 {
1568 _cpp_buff *result, **p;
1569
1570 for (p = &pfile->free_buffs;; p = &(*p)->next)
1571 {
1572 size_t size;
1573
1574 if (*p == NULL)
1575 return new_buff (min_size);
1576 result = *p;
1577 size = result->limit - result->base;
1578 /* Return a buffer that's big enough, but don't waste one that's
1579 way too big. */
1580 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1581 break;
1582 }
1583
1584 *p = result->next;
1585 result->next = NULL;
1586 result->cur = result->base;
1587 return result;
1588 }
1589
1590 /* Creates a new buffer with enough space to hold the uncommitted
1591 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1592 the excess bytes to the new buffer. Chains the new buffer after
1593 BUFF, and returns the new buffer. */
1594 _cpp_buff *
_cpp_append_extend_buff(cpp_reader * pfile,_cpp_buff * buff,size_t min_extra)1595 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1596 {
1597 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1598 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1599
1600 buff->next = new_buff;
1601 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1602 return new_buff;
1603 }
1604
1605 /* Creates a new buffer with enough space to hold the uncommitted
1606 remaining bytes of the buffer pointed to by BUFF, and at least
1607 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1608 Chains the new buffer before the buffer pointed to by BUFF, and
1609 updates the pointer to point to the new buffer. */
1610 void
_cpp_extend_buff(cpp_reader * pfile,_cpp_buff ** pbuff,size_t min_extra)1611 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1612 {
1613 _cpp_buff *new_buff, *old_buff = *pbuff;
1614 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1615
1616 new_buff = _cpp_get_buff (pfile, size);
1617 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1618 new_buff->next = old_buff;
1619 *pbuff = new_buff;
1620 }
1621
1622 /* Free a chain of buffers starting at BUFF. */
1623 void
_cpp_free_buff(_cpp_buff * buff)1624 _cpp_free_buff (_cpp_buff *buff)
1625 {
1626 _cpp_buff *next;
1627
1628 for (; buff; buff = next)
1629 {
1630 next = buff->next;
1631 free (buff->base);
1632 }
1633 }
1634
1635 /* Allocate permanent, unaligned storage of length LEN. */
1636 unsigned char *
_cpp_unaligned_alloc(cpp_reader * pfile,size_t len)1637 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1638 {
1639 _cpp_buff *buff = pfile->u_buff;
1640 unsigned char *result = buff->cur;
1641
1642 if (len > (size_t) (buff->limit - result))
1643 {
1644 buff = _cpp_get_buff (pfile, len);
1645 buff->next = pfile->u_buff;
1646 pfile->u_buff = buff;
1647 result = buff->cur;
1648 }
1649
1650 buff->cur = result + len;
1651 return result;
1652 }
1653
1654 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1655 That buffer is used for growing allocations when saving macro
1656 replacement lists in a #define, and when parsing an answer to an
1657 assertion in #assert, #unassert or #if (and therefore possibly
1658 whilst expanding macros). It therefore must not be used by any
1659 code that they might call: specifically the lexer and the guts of
1660 the macro expander.
1661
1662 All existing other uses clearly fit this restriction: storing
1663 registered pragmas during initialization. */
1664 unsigned char *
_cpp_aligned_alloc(cpp_reader * pfile,size_t len)1665 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1666 {
1667 _cpp_buff *buff = pfile->a_buff;
1668 unsigned char *result = buff->cur;
1669
1670 if (len > (size_t) (buff->limit - result))
1671 {
1672 buff = _cpp_get_buff (pfile, len);
1673 buff->next = pfile->a_buff;
1674 pfile->a_buff = buff;
1675 result = buff->cur;
1676 }
1677
1678 buff->cur = result + len;
1679 return result;
1680 }
1681
1682 /* Say which field of TOK is in use. */
1683
1684 enum cpp_token_fld_kind
cpp_token_val_index(cpp_token * tok)1685 cpp_token_val_index (cpp_token *tok)
1686 {
1687 switch (TOKEN_SPELL (tok))
1688 {
1689 case SPELL_IDENT:
1690 return CPP_TOKEN_FLD_NODE;
1691 case SPELL_LITERAL:
1692 return CPP_TOKEN_FLD_STR;
1693 case SPELL_NONE:
1694 if (tok->type == CPP_MACRO_ARG)
1695 return CPP_TOKEN_FLD_ARG_NO;
1696 else if (tok->type == CPP_PADDING)
1697 return CPP_TOKEN_FLD_SOURCE;
1698 else if (tok->type == CPP_PRAGMA)
1699 return CPP_TOKEN_FLD_PRAGMA;
1700 /* else fall through */
1701 default:
1702 return CPP_TOKEN_FLD_NONE;
1703 }
1704 }
1705