1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
26
27 enum spell_type
28 {
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
33 };
34
35 struct token_spelling
36 {
37 enum spell_type category;
38 const unsigned char *name;
39 };
40
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
43
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { SPELL_ ## s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
49
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
63
64 static _cpp_buff *new_buff (size_t);
65
66
67 /* Utility routine:
68
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
cpp_ideq(const cpp_token * token,const char * string)72 cpp_ideq (const cpp_token *token, const char *string)
73 {
74 if (token->type != CPP_NAME)
75 return 0;
76
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
78 }
79
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
add_line_note(cpp_buffer * buffer,const uchar * pos,unsigned int type)83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
84 {
85 if (buffer->notes_used == buffer->notes_cap)
86 {
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
90 }
91
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
95 }
96
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
_cpp_clean_line(cpp_reader * pfile)100 _cpp_clean_line (cpp_reader *pfile)
101 {
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
105
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
111
112 if (!buffer->from_stage3)
113 {
114 /* Short circuit for the common case of an un-escaped line with
115 no trigraphs. The primary win here is by not writing any
116 data back to memory until we have to. */
117 for (;;)
118 {
119 c = *++s;
120 if (c == '\n' || c == '\r')
121 {
122 d = (uchar *) s;
123
124 if (s == buffer->rlimit)
125 goto done;
126
127 /* DOS line ending? */
128 if (c == '\r' && s[1] == '\n')
129 s++;
130
131 if (s == buffer->rlimit)
132 goto done;
133
134 /* check for escaped newline */
135 p = d;
136 while (p != buffer->next_line && is_nvspace (p[-1]))
137 p--;
138 if (p == buffer->next_line || p[-1] != '\\')
139 goto done;
140
141 /* Have an escaped newline; process it and proceed to
142 the slow path. */
143 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144 d = p - 2;
145 buffer->next_line = p - 1;
146 break;
147 }
148 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149 {
150 /* Have a trigraph. We may or may not have to convert
151 it. Add a line note regardless, for -Wtrigraphs. */
152 add_line_note (buffer, s, s[2]);
153 if (CPP_OPTION (pfile, trigraphs))
154 {
155 /* We do, and that means we have to switch to the
156 slow path. */
157 d = (uchar *) s;
158 *d = _cpp_trigraph_map[s[2]];
159 s += 2;
160 break;
161 }
162 }
163 }
164
165
166 for (;;)
167 {
168 c = *++s;
169 *++d = c;
170
171 if (c == '\n' || c == '\r')
172 {
173 /* Handle DOS line endings. */
174 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
176 if (s == buffer->rlimit)
177 break;
178
179 /* Escaped? */
180 p = d;
181 while (p != buffer->next_line && is_nvspace (p[-1]))
182 p--;
183 if (p == buffer->next_line || p[-1] != '\\')
184 break;
185
186 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187 d = p - 2;
188 buffer->next_line = p - 1;
189 }
190 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
191 {
192 /* Add a note regardless, for the benefit of -Wtrigraphs. */
193 add_line_note (buffer, d, s[2]);
194 if (CPP_OPTION (pfile, trigraphs))
195 {
196 *d = _cpp_trigraph_map[s[2]];
197 s += 2;
198 }
199 }
200 }
201 }
202 else
203 {
204 do
205 s++;
206 while (*s != '\n' && *s != '\r');
207 d = (uchar *) s;
208
209 /* Handle DOS line endings. */
210 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211 s++;
212 }
213
214 done:
215 /* (TIGCC 20050212) Don't convert \r to \n, switch them instead. */
216 if (*d=='\r' && d[1]=='\n')
217 {*d='\n'; d[1]='\r';}
218 else
219 *d = '\n';
220 /* A sentinel note that should never be processed. */
221 add_line_note (buffer, d + 1, '\n');
222 buffer->next_line = s + 1;
223 }
224
225 /* Return true if the trigraph indicated by NOTE should be warned
226 about in a comment. */
227 static bool
warn_in_comment(cpp_reader * pfile,_cpp_line_note * note)228 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
229 {
230 const uchar *p;
231
232 /* Within comments we don't warn about trigraphs, unless the
233 trigraph forms an escaped newline, as that may change
234 behavior. */
235 if (note->type != '/')
236 return false;
237
238 /* If -trigraphs, then this was an escaped newline iff the next note
239 is coincident. */
240 if (CPP_OPTION (pfile, trigraphs))
241 return note[1].pos == note->pos;
242
243 /* Otherwise, see if this forms an escaped newline. */
244 p = note->pos + 3;
245 while (is_nvspace (*p))
246 p++;
247
248 /* There might have been escaped newlines between the trigraph and the
249 newline we found. Hence the position test. */
250 return (*p == '\n' && p < note[1].pos);
251 }
252
253 /* Process the notes created by add_line_note as far as the current
254 location. */
255 void
_cpp_process_line_notes(cpp_reader * pfile,int in_comment)256 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
257 {
258 cpp_buffer *buffer = pfile->buffer;
259
260 for (;;)
261 {
262 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
263 unsigned int col;
264
265 if (note->pos > buffer->cur)
266 break;
267
268 buffer->cur_note++;
269 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
270
271 if (note->type == '\\' || note->type == ' ')
272 {
273 if (note->type == ' ' && !in_comment)
274 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
275 "backslash and newline separated by space");
276
277 if (buffer->next_line > buffer->rlimit)
278 {
279 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
280 "backslash-newline at end of file");
281 /* Prevent "no newline at end of file" warning. */
282 buffer->next_line = buffer->rlimit;
283 }
284
285 buffer->line_base = note->pos;
286 CPP_INCREMENT_LINE (pfile, 0);
287 }
288 else if (_cpp_trigraph_map[note->type])
289 {
290 if (CPP_OPTION (pfile, warn_trigraphs)
291 && (!in_comment || warn_in_comment (pfile, note)))
292 {
293 if (CPP_OPTION (pfile, trigraphs))
294 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
295 "trigraph ??%c converted to %c",
296 note->type,
297 (int) _cpp_trigraph_map[note->type]);
298 else
299 {
300 cpp_error_with_line
301 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
302 "trigraph ??%c ignored, use -trigraphs to enable",
303 note->type);
304 }
305 }
306 }
307 else
308 abort ();
309 }
310 }
311
312 /* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 nonzero if comment terminated by EOF, zero otherwise.
315
316 Buffer->cur points to the initial asterisk of the comment. */
317 bool
_cpp_skip_block_comment(cpp_reader * pfile)318 _cpp_skip_block_comment (cpp_reader *pfile)
319 {
320 cpp_buffer *buffer = pfile->buffer;
321 const uchar *cur = buffer->cur;
322 uchar c;
323
324 cur++;
325 if (*cur == '/')
326 cur++;
327
328 for (;;)
329 {
330 /* People like decorating comments with '*', so check for '/'
331 instead for efficiency. */
332 c = *cur++;
333
334 if (c == '/')
335 {
336 if (cur[-2] == '*')
337 break;
338
339 /* Warn about potential nested comments, but not if the '/'
340 comes immediately before the true comment delimiter.
341 Don't bother to get it right across escaped newlines. */
342 if (CPP_OPTION (pfile, warn_comments)
343 && cur[0] == '*' && cur[1] != '/')
344 {
345 buffer->cur = cur;
346 cpp_error_with_line (pfile, CPP_DL_WARNING,
347 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
348 "\"/*\" within comment");
349 }
350 }
351 else if (c == '\n')
352 {
353 unsigned int cols;
354 buffer->cur = cur - 1;
355 _cpp_process_line_notes (pfile, true);
356 if (buffer->next_line >= buffer->rlimit)
357 return true;
358 _cpp_clean_line (pfile);
359
360 cols = buffer->next_line - buffer->line_base;
361 CPP_INCREMENT_LINE (pfile, cols);
362
363 cur = buffer->cur;
364 }
365 }
366
367 buffer->cur = cur;
368 _cpp_process_line_notes (pfile, true);
369 return false;
370 }
371
372 /* Skip a C++ line comment, leaving buffer->cur pointing to the
373 terminating newline. Handles escaped newlines. Returns nonzero
374 if a multiline comment. */
375 static int
skip_line_comment(cpp_reader * pfile)376 skip_line_comment (cpp_reader *pfile)
377 {
378 cpp_buffer *buffer = pfile->buffer;
379 unsigned int orig_line = pfile->line_table->highest_line;
380
381 while (*buffer->cur != '\n')
382 buffer->cur++;
383
384 _cpp_process_line_notes (pfile, true);
385 return orig_line != pfile->line_table->highest_line;
386 }
387
388 /* Skips whitespace, saving the next non-whitespace character. */
389 static void
skip_whitespace(cpp_reader * pfile,cppchar_t c)390 skip_whitespace (cpp_reader *pfile, cppchar_t c)
391 {
392 cpp_buffer *buffer = pfile->buffer;
393 bool saw_NUL = false;
394
395 do
396 {
397 /* Horizontal space always OK. */
398 if (c == ' ' || c == '\t')
399 ;
400 /* Just \f \v or \0 left. */
401 else if (c == '\0')
402 saw_NUL = true;
403 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
404 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
405 CPP_BUF_COL (buffer),
406 "%s in preprocessing directive",
407 c == '\f' ? "form feed" : "vertical tab");
408
409 c = *buffer->cur++;
410 }
411 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
412 while (is_nvspace (c));
413
414 if (saw_NUL)
415 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
416
417 buffer->cur--;
418 }
419
420 /* See if the characters of a number token are valid in a name (no
421 '.', '+' or '-'). */
422 static int
name_p(cpp_reader * pfile,const cpp_string * string)423 name_p (cpp_reader *pfile, const cpp_string *string)
424 {
425 unsigned int i;
426
427 for (i = 0; i < string->len; i++)
428 if (!is_idchar (string->text[i]))
429 return 0;
430
431 return 1;
432 }
433
434 /* After parsing an identifier or other sequence, produce a warning about
435 sequences not in NFC/NFKC. */
436 static void
warn_about_normalization(cpp_reader * pfile,const cpp_token * token,const struct normalize_state * s)437 warn_about_normalization (cpp_reader *pfile,
438 const cpp_token *token,
439 const struct normalize_state *s)
440 {
441 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
442 && !pfile->state.skipping)
443 {
444 /* Make sure that the token is printed using UCNs, even
445 if we'd otherwise happily print UTF-8. */
446 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
447 size_t sz;
448
449 sz = cpp_spell_token (pfile, token, buf, false) - buf;
450 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
451 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
452 "`%.*s' is not in NFKC", (int) sz, buf);
453 else
454 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
455 "`%.*s' is not in NFC", (int) sz, buf);
456 }
457 }
458
459 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
460 an identifier. FIRST is TRUE if this starts an identifier. */
461 static bool
forms_identifier_p(cpp_reader * pfile,int first,struct normalize_state * state)462 forms_identifier_p (cpp_reader *pfile, int first,
463 struct normalize_state *state)
464 {
465 cpp_buffer *buffer = pfile->buffer;
466
467 if (*buffer->cur == '$')
468 {
469 if (!CPP_OPTION (pfile, dollars_in_ident))
470 return false;
471
472 buffer->cur++;
473 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
474 {
475 CPP_OPTION (pfile, warn_dollars) = 0;
476 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
477 }
478
479 return true;
480 }
481
482 /* Is this a syntactically valid UCN? */
483 if (CPP_OPTION (pfile, extended_identifiers)
484 && *buffer->cur == '\\'
485 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
486 {
487 buffer->cur += 2;
488 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
489 state))
490 return true;
491 buffer->cur -= 2;
492 }
493
494 return false;
495 }
496
497 /* Lex an identifier starting at BUFFER->CUR - 1. */
498 static cpp_hashnode *
lex_identifier(cpp_reader * pfile,const uchar * base,bool starts_ucn,struct normalize_state * nst)499 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
500 struct normalize_state *nst)
501 {
502 cpp_hashnode *result;
503 const uchar *cur;
504 unsigned int len;
505 unsigned int hash = HT_HASHSTEP (0, *base);
506
507 cur = pfile->buffer->cur;
508 if (! starts_ucn)
509 while (ISIDNUM (*cur))
510 {
511 hash = HT_HASHSTEP (hash, *cur);
512 cur++;
513 }
514 pfile->buffer->cur = cur;
515 if (starts_ucn || forms_identifier_p (pfile, false, nst))
516 {
517 /* Slower version for identifiers containing UCNs (or $). */
518 do {
519 while (ISIDNUM (*pfile->buffer->cur))
520 {
521 pfile->buffer->cur++;
522 NORMALIZE_STATE_UPDATE_IDNUM (nst);
523 }
524 } while (forms_identifier_p (pfile, false, nst));
525 result = _cpp_interpret_identifier (pfile, base,
526 pfile->buffer->cur - base);
527 }
528 else
529 {
530 len = cur - base;
531 hash = HT_HASHFINISH (hash, len);
532
533 result = (cpp_hashnode *)
534 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
535 }
536
537 /* Rarely, identifiers require diagnostics when lexed. */
538 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
539 && !pfile->state.skipping, 0))
540 {
541 /* It is allowed to poison the same identifier twice. */
542 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
543 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
544 NODE_NAME (result));
545
546 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
547 replacement list of a variadic macro. */
548 if (result == pfile->spec_nodes.n__VA_ARGS__
549 && !pfile->state.va_args_ok)
550 cpp_error (pfile, CPP_DL_PEDWARN,
551 "__VA_ARGS__ can only appear in the expansion"
552 " of a C99 variadic macro");
553 }
554
555 return result;
556 }
557
558 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
559 static void
lex_number(cpp_reader * pfile,cpp_string * number,struct normalize_state * nst)560 lex_number (cpp_reader *pfile, cpp_string *number,
561 struct normalize_state *nst)
562 {
563 const uchar *cur;
564 const uchar *base;
565 uchar *dest;
566
567 base = pfile->buffer->cur - 1;
568 do
569 {
570 cur = pfile->buffer->cur;
571
572 /* N.B. ISIDNUM does not include $. */
573 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
574 {
575 cur++;
576 NORMALIZE_STATE_UPDATE_IDNUM (nst);
577 }
578
579 pfile->buffer->cur = cur;
580 }
581 while (forms_identifier_p (pfile, false, nst));
582
583 number->len = cur - base;
584 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
585 memcpy (dest, base, number->len);
586 dest[number->len] = '\0';
587 number->text = dest;
588 }
589
590 /* Create a token of type TYPE with a literal spelling. */
591 static void
create_literal(cpp_reader * pfile,cpp_token * token,const uchar * base,unsigned int len,enum cpp_ttype type)592 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
593 unsigned int len, enum cpp_ttype type)
594 {
595 char *p;
596 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
597
598 memcpy (dest, base, len);
599 dest[len] = '\0';
600 /* (TIGCC 20050206) Delete \r characters in multi-line strings. */
601 p = (char *)dest;
602 while (p < (char *)dest + len) {
603 if (*p == '\r') {
604 memmove (p, p + 1, (char *)dest + len - p);
605 len--;
606 } else p++;
607 }
608 token->type = type;
609 token->val.str.len = len;
610 token->val.str.text = dest;
611 }
612
613 /* Lexes a string, character constant, or angle-bracketed header file
614 name. The stored string contains the spelling, including opening
615 quote and leading any leading 'L'. It returns the type of the
616 literal, or CPP_OTHER if it was not properly terminated.
617
618 The spelling is NUL-terminated, but it is not guaranteed that this
619 is the first NUL since embedded NULs are preserved.
620
621 Multi-line strings are allowed as a TIGCC extension (removed in the FSF GCC
622 since version 3.3). */
623 static void
lex_string(cpp_reader * pfile,cpp_token * token,const uchar * base)624 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
625 {
626 bool saw_NUL = false;
627 const uchar *cur;
628 cppchar_t terminator;
629 enum cpp_ttype type;
630 cpp_buffer *buffer;
631 unsigned int startcol;
632
633 buffer = pfile->buffer;
634 startcol = CPP_BUF_COL (buffer);
635 cur = base;
636 terminator = *cur++;
637 if (terminator == 'L')
638 terminator = *cur++;
639 if (terminator == '\"')
640 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
641 else if (terminator == '\'')
642 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
643 else
644 terminator = '>', type = CPP_HEADER_NAME;
645
646 for (;;)
647 {
648 cppchar_t c = *cur++;
649
650 /* In #include-style directives, terminators are not escapable. */
651 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
652 cur++;
653 else if (c == terminator)
654 break;
655 else if (c == '\n')
656 {
657 unsigned int cols;
658
659 /* In assembly language, silently terminate string and
660 character literals at end of line. This is a kludge
661 around not knowing where comments are. */
662 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
663 {
664 cur--;
665 break;
666 }
667
668 /* Character constants and header names may not extend over
669 multiple lines. In Standard C, neither may strings.
670 In TIGCC, we accept multiline strings as an
671 extension, except in #include family directives. */
672 if (terminator != '"' || pfile->state.angled_headers)
673 {
674 cur--;
675 type = CPP_OTHER;
676 break;
677 }
678
679 if (CPP_PEDANTIC (pfile))
680 cpp_error(pfile, CPP_DL_PEDWARN, "ISO C forbids newline in string literal");
681 buffer->cur = cur - 1;
682 _cpp_process_line_notes (pfile, true);
683 if (buffer->next_line >= buffer->rlimit)
684 {
685 cur--;
686 type = CPP_OTHER;
687 break;
688 }
689 _cpp_clean_line (pfile);
690
691 cols = buffer->next_line - buffer->line_base;
692 CPP_INCREMENT_LINE (pfile, cols);
693
694 cur = buffer->cur;
695
696 if (pfile->mls_line == 0)
697 {
698 pfile->mls_line = token->src_loc;
699 pfile->mls_col = startcol;
700 }
701 }
702 else if (c == '\0')
703 saw_NUL = true;
704 }
705
706 if (saw_NUL && !pfile->state.skipping)
707 cpp_error (pfile, CPP_DL_WARNING,
708 "null character(s) preserved in literal");
709
710 pfile->buffer->cur = cur;
711 create_literal (pfile, token, base, cur - base, type);
712 }
713
714 /* The stored comment includes the comment start and any terminator. */
715 static void
save_comment(cpp_reader * pfile,cpp_token * token,const unsigned char * from,cppchar_t type)716 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
717 cppchar_t type)
718 {
719 unsigned char *buffer;
720 unsigned int len, clen;
721
722 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
723
724 /* C++ comments probably (not definitely) have moved past a new
725 line, which we don't want to save in the comment. */
726 if (is_vspace (pfile->buffer->cur[-1]))
727 len--;
728
729 /* If we are currently in a directive, then we need to store all
730 C++ comments as C comments internally, and so we need to
731 allocate a little extra space in that case.
732
733 Note that the only time we encounter a directive here is
734 when we are saving comments in a "#define". */
735 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
736
737 buffer = _cpp_unaligned_alloc (pfile, clen);
738
739 token->type = CPP_COMMENT;
740 token->val.str.len = clen;
741 token->val.str.text = buffer;
742
743 buffer[0] = '/';
744 memcpy (buffer + 1, from, len - 1);
745
746 /* Finish conversion to a C comment, if necessary. */
747 if (pfile->state.in_directive && type == '/')
748 {
749 buffer[1] = '*';
750 buffer[clen - 2] = '*';
751 buffer[clen - 1] = '/';
752 }
753 }
754
755 /* Allocate COUNT tokens for RUN. */
756 void
_cpp_init_tokenrun(tokenrun * run,unsigned int count)757 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
758 {
759 run->base = XNEWVEC (cpp_token, count);
760 run->limit = run->base + count;
761 run->next = NULL;
762 }
763
764 /* Returns the next tokenrun, or creates one if there is none. */
765 static tokenrun *
next_tokenrun(tokenrun * run)766 next_tokenrun (tokenrun *run)
767 {
768 if (run->next == NULL)
769 {
770 run->next = XNEW (tokenrun);
771 run->next->prev = run;
772 _cpp_init_tokenrun (run->next, 250);
773 }
774
775 return run->next;
776 }
777
778 /* Allocate a single token that is invalidated at the same time as the
779 rest of the tokens on the line. Has its line and col set to the
780 same as the last lexed token, so that diagnostics appear in the
781 right place. */
782 cpp_token *
_cpp_temp_token(cpp_reader * pfile)783 _cpp_temp_token (cpp_reader *pfile)
784 {
785 cpp_token *old, *result;
786
787 old = pfile->cur_token - 1;
788 if (pfile->cur_token == pfile->cur_run->limit)
789 {
790 pfile->cur_run = next_tokenrun (pfile->cur_run);
791 pfile->cur_token = pfile->cur_run->base;
792 }
793
794 result = pfile->cur_token++;
795 result->src_loc = old->src_loc;
796 return result;
797 }
798
799 /* Lex a token into RESULT (external interface). Takes care of issues
800 like directive handling, token lookahead, multiple include
801 optimization and skipping. */
802 const cpp_token *
_cpp_lex_token(cpp_reader * pfile)803 _cpp_lex_token (cpp_reader *pfile)
804 {
805 cpp_token *result;
806
807 for (;;)
808 {
809 if (pfile->cur_token == pfile->cur_run->limit)
810 {
811 pfile->cur_run = next_tokenrun (pfile->cur_run);
812 pfile->cur_token = pfile->cur_run->base;
813 }
814
815 if (pfile->lookaheads)
816 {
817 pfile->lookaheads--;
818 result = pfile->cur_token++;
819 }
820 else
821 result = _cpp_lex_direct (pfile);
822
823 if (result->flags & BOL)
824 {
825 /* Is this a directive. If _cpp_handle_directive returns
826 false, it is an assembler #. */
827 if (result->type == CPP_HASH
828 /* 6.10.3 p 11: Directives in a list of macro arguments
829 gives undefined behavior. This implementation
830 handles the directive as normal. */
831 && pfile->state.parsing_args != 1
832 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
833 {
834 if (pfile->directive_result.type == CPP_PADDING)
835 continue;
836 else
837 {
838 result = &pfile->directive_result;
839 break;
840 }
841 }
842
843 if (pfile->cb.line_change && !pfile->state.skipping)
844 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
845 }
846
847 /* We don't skip tokens in directives. */
848 if (pfile->state.in_directive)
849 break;
850
851 /* Outside a directive, invalidate controlling macros. At file
852 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
853 get here and MI optimization works. */
854 pfile->mi_valid = false;
855
856 if (!pfile->state.skipping || result->type == CPP_EOF)
857 break;
858 }
859
860 return result;
861 }
862
863 /* Returns true if a fresh line has been loaded. */
864 bool
_cpp_get_fresh_line(cpp_reader * pfile)865 _cpp_get_fresh_line (cpp_reader *pfile)
866 {
867 int return_at_eof;
868
869 /* We can't get a new line until we leave the current directive. */
870 if (pfile->state.in_directive)
871 return false;
872
873 for (;;)
874 {
875 cpp_buffer *buffer = pfile->buffer;
876
877 if (!buffer->need_line)
878 return true;
879
880 if (buffer->next_line < buffer->rlimit)
881 {
882 _cpp_clean_line (pfile);
883 return true;
884 }
885
886 /* First, get out of parsing arguments state. */
887 if (pfile->state.parsing_args)
888 return false;
889
890 /* End of buffer. Non-empty files should end in a newline. */
891 if (buffer->buf != buffer->rlimit
892 && buffer->next_line > buffer->rlimit
893 && !buffer->from_stage3)
894 {
895 /* Only warn once. */
896 buffer->next_line = buffer->rlimit;
897 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
898 CPP_BUF_COLUMN (buffer, buffer->cur),
899 "no newline at end of file");
900 }
901
902 return_at_eof = buffer->return_at_eof;
903 _cpp_pop_buffer (pfile);
904 if (pfile->buffer == NULL || return_at_eof)
905 return false;
906 }
907 }
908
909 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
910 do \
911 { \
912 result->type = ELSE_TYPE; \
913 if (*buffer->cur == CHAR) \
914 buffer->cur++, result->type = THEN_TYPE; \
915 } \
916 while (0)
917
918 /* Lex a token into pfile->cur_token, which is also incremented, to
919 get diagnostics pointing to the correct location.
920
921 Does not handle issues such as token lookahead, multiple-include
922 optimization, directives, skipping etc. This function is only
923 suitable for use by _cpp_lex_token, and in special cases like
924 lex_expansion_token which doesn't care for any of these issues.
925
926 When meeting a newline, returns CPP_EOF if parsing a directive,
927 otherwise returns to the start of the token buffer if permissible.
928 Returns the location of the lexed token. */
929 cpp_token *
_cpp_lex_direct(cpp_reader * pfile)930 _cpp_lex_direct (cpp_reader *pfile)
931 {
932 cppchar_t c;
933 cpp_buffer *buffer;
934 const unsigned char *comment_start;
935 cpp_token *result = pfile->cur_token++;
936
937 fresh_line:
938 result->flags = 0;
939 buffer = pfile->buffer;
940 if (buffer->need_line)
941 {
942 if (!_cpp_get_fresh_line (pfile))
943 {
944 result->type = CPP_EOF;
945 if (!pfile->state.in_directive)
946 {
947 /* Tell the compiler the line number of the EOF token. */
948 result->src_loc = pfile->line_table->highest_line;
949 result->flags = BOL;
950 }
951 return result;
952 }
953 if (!pfile->keep_tokens)
954 {
955 pfile->cur_run = &pfile->base_run;
956 result = pfile->base_run.base;
957 pfile->cur_token = result + 1;
958 }
959 result->flags = BOL;
960 if (pfile->state.parsing_args == 2)
961 result->flags |= PREV_WHITE;
962 }
963 buffer = pfile->buffer;
964 update_tokens_line:
965 result->src_loc = pfile->line_table->highest_line;
966
967 skipped_white:
968 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
969 && !pfile->overlaid_buffer)
970 {
971 _cpp_process_line_notes (pfile, false);
972 result->src_loc = pfile->line_table->highest_line;
973 }
974 c = *buffer->cur++;
975
976 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
977 CPP_BUF_COLUMN (buffer, buffer->cur));
978
979 switch (c)
980 {
981 case ' ': case '\t': case '\f': case '\v': case '\0':
982 result->flags |= PREV_WHITE;
983 skip_whitespace (pfile, c);
984 goto skipped_white;
985
986 case '\n':
987 if (buffer->cur < buffer->rlimit)
988 CPP_INCREMENT_LINE (pfile, 0);
989 buffer->need_line = true;
990 goto fresh_line;
991
992 case '0': case '1': case '2': case '3': case '4':
993 case '5': case '6': case '7': case '8': case '9':
994 {
995 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
996 result->type = CPP_NUMBER;
997 lex_number (pfile, &result->val.str, &nst);
998 warn_about_normalization (pfile, result, &nst);
999 break;
1000 }
1001
1002 case 'L':
1003 /* 'L' may introduce wide characters or strings. */
1004 if (*buffer->cur == '\'' || *buffer->cur == '"')
1005 {
1006 lex_string (pfile, result, buffer->cur - 1);
1007 break;
1008 }
1009 /* Fall through. */
1010
1011 case '_':
1012 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1013 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1014 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1015 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1016 case 'y': case 'z':
1017 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1018 case 'G': case 'H': case 'I': case 'J': case 'K':
1019 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1020 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1021 case 'Y': case 'Z':
1022 result->type = CPP_NAME;
1023 {
1024 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1025 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1026 &nst);
1027 warn_about_normalization (pfile, result, &nst);
1028 }
1029
1030 /* Convert named operators to their proper types. */
1031 if (result->val.node->flags & NODE_OPERATOR)
1032 {
1033 result->flags |= NAMED_OP;
1034 result->type = (enum cpp_ttype) result->val.node->directive_index;
1035 }
1036 break;
1037
1038 case '\'':
1039 case '"':
1040 lex_string (pfile, result, buffer->cur - 1);
1041 break;
1042
1043 case '/':
1044 /* A potential block or line comment. */
1045 comment_start = buffer->cur;
1046 c = *buffer->cur;
1047
1048 if (c == '*')
1049 {
1050 if (_cpp_skip_block_comment (pfile))
1051 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1052 }
1053 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1054 || cpp_in_system_header (pfile)))
1055 {
1056 /* Warn about comments only if pedantically GNUC89, and not
1057 in system headers. */
1058 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1059 && ! buffer->warned_cplusplus_comments)
1060 {
1061 cpp_error (pfile, CPP_DL_PEDWARN,
1062 "C++ style comments are not allowed in ISO C90");
1063 cpp_error (pfile, CPP_DL_PEDWARN,
1064 "(this will be reported only once per input file)");
1065 buffer->warned_cplusplus_comments = 1;
1066 }
1067
1068 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1069 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1070 }
1071 else if (c == '=')
1072 {
1073 buffer->cur++;
1074 result->type = CPP_DIV_EQ;
1075 break;
1076 }
1077 else
1078 {
1079 result->type = CPP_DIV;
1080 break;
1081 }
1082
1083 if (!pfile->state.save_comments)
1084 {
1085 result->flags |= PREV_WHITE;
1086 goto update_tokens_line;
1087 }
1088
1089 /* Save the comment as a token in its own right. */
1090 save_comment (pfile, result, comment_start, c);
1091 break;
1092
1093 case '<':
1094 if (pfile->state.angled_headers)
1095 {
1096 lex_string (pfile, result, buffer->cur - 1);
1097 break;
1098 }
1099
1100 result->type = CPP_LESS;
1101 if (*buffer->cur == '=')
1102 buffer->cur++, result->type = CPP_LESS_EQ;
1103 else if (*buffer->cur == '<')
1104 {
1105 buffer->cur++;
1106 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1107 }
1108 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1109 {
1110 buffer->cur++;
1111 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1112 }
1113 else if (CPP_OPTION (pfile, digraphs))
1114 {
1115 if (*buffer->cur == ':')
1116 {
1117 buffer->cur++;
1118 result->flags |= DIGRAPH;
1119 result->type = CPP_OPEN_SQUARE;
1120 }
1121 else if (*buffer->cur == '%')
1122 {
1123 buffer->cur++;
1124 result->flags |= DIGRAPH;
1125 result->type = CPP_OPEN_BRACE;
1126 }
1127 }
1128 break;
1129
1130 case '>':
1131 result->type = CPP_GREATER;
1132 if (*buffer->cur == '=')
1133 buffer->cur++, result->type = CPP_GREATER_EQ;
1134 else if (*buffer->cur == '>')
1135 {
1136 buffer->cur++;
1137 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1138 }
1139 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1140 {
1141 buffer->cur++;
1142 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1143 }
1144 break;
1145
1146 case '%':
1147 result->type = CPP_MOD;
1148 if (*buffer->cur == '=')
1149 buffer->cur++, result->type = CPP_MOD_EQ;
1150 else if (CPP_OPTION (pfile, digraphs))
1151 {
1152 if (*buffer->cur == ':')
1153 {
1154 buffer->cur++;
1155 result->flags |= DIGRAPH;
1156 result->type = CPP_HASH;
1157 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1158 buffer->cur += 2, result->type = CPP_PASTE;
1159 }
1160 else if (*buffer->cur == '>')
1161 {
1162 buffer->cur++;
1163 result->flags |= DIGRAPH;
1164 result->type = CPP_CLOSE_BRACE;
1165 }
1166 }
1167 break;
1168
1169 case '.':
1170 result->type = CPP_DOT;
1171 if (ISDIGIT (*buffer->cur))
1172 {
1173 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1174 result->type = CPP_NUMBER;
1175 lex_number (pfile, &result->val.str, &nst);
1176 warn_about_normalization (pfile, result, &nst);
1177 }
1178 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1179 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1180 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1181 buffer->cur++, result->type = CPP_DOT_STAR;
1182 break;
1183
1184 case '+':
1185 result->type = CPP_PLUS;
1186 if (*buffer->cur == '+')
1187 buffer->cur++, result->type = CPP_PLUS_PLUS;
1188 else if (*buffer->cur == '=')
1189 buffer->cur++, result->type = CPP_PLUS_EQ;
1190 break;
1191
1192 case '-':
1193 result->type = CPP_MINUS;
1194 if (*buffer->cur == '>')
1195 {
1196 buffer->cur++;
1197 result->type = CPP_DEREF;
1198 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1199 buffer->cur++, result->type = CPP_DEREF_STAR;
1200 }
1201 else if (*buffer->cur == '-')
1202 buffer->cur++, result->type = CPP_MINUS_MINUS;
1203 else if (*buffer->cur == '=')
1204 buffer->cur++, result->type = CPP_MINUS_EQ;
1205 break;
1206
1207 case '&':
1208 result->type = CPP_AND;
1209 if (*buffer->cur == '&')
1210 buffer->cur++, result->type = CPP_AND_AND;
1211 else if (*buffer->cur == '=')
1212 buffer->cur++, result->type = CPP_AND_EQ;
1213 break;
1214
1215 case '|':
1216 result->type = CPP_OR;
1217 if (*buffer->cur == '|')
1218 buffer->cur++, result->type = CPP_OR_OR;
1219 else if (*buffer->cur == '=')
1220 buffer->cur++, result->type = CPP_OR_EQ;
1221 break;
1222
1223 case ':':
1224 result->type = CPP_COLON;
1225 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1226 buffer->cur++, result->type = CPP_SCOPE;
1227 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1228 {
1229 buffer->cur++;
1230 result->flags |= DIGRAPH;
1231 result->type = CPP_CLOSE_SQUARE;
1232 }
1233 break;
1234
1235 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1236 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1237 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1238 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1239 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1240
1241 case '?': result->type = CPP_QUERY; break;
1242 case '~': result->type = CPP_COMPL; break;
1243 case ',': result->type = CPP_COMMA; break;
1244 case '(': result->type = CPP_OPEN_PAREN; break;
1245 case ')': result->type = CPP_CLOSE_PAREN; break;
1246 case '[': result->type = CPP_OPEN_SQUARE; break;
1247 case ']': result->type = CPP_CLOSE_SQUARE; break;
1248 case '{': result->type = CPP_OPEN_BRACE; break;
1249 case '}': result->type = CPP_CLOSE_BRACE; break;
1250 case ';': result->type = CPP_SEMICOLON; break;
1251
1252 /* @ is a punctuator in Objective-C. */
1253 case '@': result->type = CPP_ATSIGN; break;
1254
1255 case '$':
1256 case '\\':
1257 {
1258 const uchar *base = --buffer->cur;
1259 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1260
1261 if (forms_identifier_p (pfile, true, &nst))
1262 {
1263 result->type = CPP_NAME;
1264 result->val.node = lex_identifier (pfile, base, true, &nst);
1265 warn_about_normalization (pfile, result, &nst);
1266 break;
1267 }
1268 buffer->cur++;
1269 }
1270
1271 default:
1272 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1273 break;
1274 }
1275
1276 return result;
1277 }
1278
1279 /* An upper bound on the number of bytes needed to spell TOKEN.
1280 Does not include preceding whitespace. */
1281 unsigned int
cpp_token_len(const cpp_token * token)1282 cpp_token_len (const cpp_token *token)
1283 {
1284 unsigned int len;
1285
1286 switch (TOKEN_SPELL (token))
1287 {
1288 default: len = 4; break;
1289 case SPELL_LITERAL: len = token->val.str.len; break;
1290 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1291 }
1292
1293 return len;
1294 }
1295
1296 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1297 Return the number of bytes read out of NAME. (There are always
1298 10 bytes written to BUFFER.) */
1299
1300 static size_t
utf8_to_ucn(unsigned char * buffer,const unsigned char * name)1301 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1302 {
1303 int j;
1304 int ucn_len = 0;
1305 int ucn_len_c;
1306 unsigned t;
1307 unsigned long utf32;
1308
1309 /* Compute the length of the UTF-8 sequence. */
1310 for (t = *name; t & 0x80; t <<= 1)
1311 ucn_len++;
1312
1313 utf32 = *name & (0x7F >> ucn_len);
1314 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1315 {
1316 utf32 = (utf32 << 6) | (*++name & 0x3F);
1317
1318 /* Ill-formed UTF-8. */
1319 if ((*name & ~0x3F) != 0x80)
1320 abort ();
1321 }
1322
1323 *buffer++ = '\\';
1324 *buffer++ = 'U';
1325 for (j = 7; j >= 0; j--)
1326 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1327 return ucn_len;
1328 }
1329
1330
1331 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1332 already contain the enough space to hold the token's spelling.
1333 Returns a pointer to the character after the last character written.
1334 FORSTRING is true if this is to be the spelling after translation
1335 phase 1 (this is different for UCNs).
1336 FIXME: Would be nice if we didn't need the PFILE argument. */
1337 unsigned char *
cpp_spell_token(cpp_reader * pfile,const cpp_token * token,unsigned char * buffer,bool forstring)1338 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1339 unsigned char *buffer, bool forstring)
1340 {
1341 switch (TOKEN_SPELL (token))
1342 {
1343 case SPELL_OPERATOR:
1344 {
1345 const unsigned char *spelling;
1346 unsigned char c;
1347
1348 if (token->flags & DIGRAPH)
1349 spelling
1350 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1351 else if (token->flags & NAMED_OP)
1352 goto spell_ident;
1353 else
1354 spelling = TOKEN_NAME (token);
1355
1356 while ((c = *spelling++) != '\0')
1357 *buffer++ = c;
1358 }
1359 break;
1360
1361 spell_ident:
1362 case SPELL_IDENT:
1363 if (forstring)
1364 {
1365 memcpy (buffer, NODE_NAME (token->val.node),
1366 NODE_LEN (token->val.node));
1367 buffer += NODE_LEN (token->val.node);
1368 }
1369 else
1370 {
1371 size_t i;
1372 const unsigned char * name = NODE_NAME (token->val.node);
1373
1374 for (i = 0; i < NODE_LEN (token->val.node); i++)
1375 if (name[i] & ~0x7F)
1376 {
1377 i += utf8_to_ucn (buffer, name + i) - 1;
1378 buffer += 10;
1379 }
1380 else
1381 *buffer++ = NODE_NAME (token->val.node)[i];
1382 }
1383 break;
1384
1385 case SPELL_LITERAL:
1386 memcpy (buffer, token->val.str.text, token->val.str.len);
1387 buffer += token->val.str.len;
1388 break;
1389
1390 case SPELL_NONE:
1391 cpp_error (pfile, CPP_DL_ICE,
1392 "unspellable token %s", TOKEN_NAME (token));
1393 break;
1394 }
1395
1396 return buffer;
1397 }
1398
1399 /* Returns TOKEN spelt as a null-terminated string. The string is
1400 freed when the reader is destroyed. Useful for diagnostics. */
1401 unsigned char *
cpp_token_as_text(cpp_reader * pfile,const cpp_token * token)1402 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1403 {
1404 unsigned int len = cpp_token_len (token) + 1;
1405 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1406
1407 end = cpp_spell_token (pfile, token, start, false);
1408 end[0] = '\0';
1409
1410 return start;
1411 }
1412
1413 /* Used by C front ends, which really should move to using
1414 cpp_token_as_text. */
1415 const char *
cpp_type2name(enum cpp_ttype type)1416 cpp_type2name (enum cpp_ttype type)
1417 {
1418 return (const char *) token_spellings[type].name;
1419 }
1420
1421 /* Writes the spelling of token to FP, without any preceding space.
1422 Separated from cpp_spell_token for efficiency - to avoid stdio
1423 double-buffering. */
1424 void
cpp_output_token(const cpp_token * token,FILE * fp)1425 cpp_output_token (const cpp_token *token, FILE *fp)
1426 {
1427 switch (TOKEN_SPELL (token))
1428 {
1429 case SPELL_OPERATOR:
1430 {
1431 const unsigned char *spelling;
1432 int c;
1433
1434 if (token->flags & DIGRAPH)
1435 spelling
1436 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1437 else if (token->flags & NAMED_OP)
1438 goto spell_ident;
1439 else
1440 spelling = TOKEN_NAME (token);
1441
1442 c = *spelling;
1443 do
1444 putc (c, fp);
1445 while ((c = *++spelling) != '\0');
1446 }
1447 break;
1448
1449 spell_ident:
1450 case SPELL_IDENT:
1451 {
1452 size_t i;
1453 const unsigned char * name = NODE_NAME (token->val.node);
1454
1455 for (i = 0; i < NODE_LEN (token->val.node); i++)
1456 if (name[i] & ~0x7F)
1457 {
1458 unsigned char buffer[10];
1459 i += utf8_to_ucn (buffer, name + i) - 1;
1460 fwrite (buffer, 1, 10, fp);
1461 }
1462 else
1463 fputc (NODE_NAME (token->val.node)[i], fp);
1464 }
1465 break;
1466
1467 case SPELL_LITERAL:
1468 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1469 break;
1470
1471 case SPELL_NONE:
1472 /* An error, most probably. */
1473 break;
1474 }
1475 }
1476
1477 /* Compare two tokens. */
1478 int
_cpp_equiv_tokens(const cpp_token * a,const cpp_token * b)1479 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1480 {
1481 if (a->type == b->type && a->flags == b->flags)
1482 switch (TOKEN_SPELL (a))
1483 {
1484 default: /* Keep compiler happy. */
1485 case SPELL_OPERATOR:
1486 return 1;
1487 case SPELL_NONE:
1488 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1489 case SPELL_IDENT:
1490 return a->val.node == b->val.node;
1491 case SPELL_LITERAL:
1492 return (a->val.str.len == b->val.str.len
1493 && !memcmp (a->val.str.text, b->val.str.text,
1494 a->val.str.len));
1495 }
1496
1497 return 0;
1498 }
1499
1500 /* Returns nonzero if a space should be inserted to avoid an
1501 accidental token paste for output. For simplicity, it is
1502 conservative, and occasionally advises a space where one is not
1503 needed, e.g. "." and ".2". */
1504 int
cpp_avoid_paste(cpp_reader * pfile,const cpp_token * token1,const cpp_token * token2)1505 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1506 const cpp_token *token2)
1507 {
1508 enum cpp_ttype a = token1->type, b = token2->type;
1509 cppchar_t c;
1510
1511 if (token1->flags & NAMED_OP)
1512 a = CPP_NAME;
1513 if (token2->flags & NAMED_OP)
1514 b = CPP_NAME;
1515
1516 c = EOF;
1517 if (token2->flags & DIGRAPH)
1518 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1519 else if (token_spellings[b].category == SPELL_OPERATOR)
1520 c = token_spellings[b].name[0];
1521
1522 /* Quickly get everything that can paste with an '='. */
1523 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1524 return 1;
1525
1526 switch (a)
1527 {
1528 case CPP_GREATER: return c == '>' || c == '?';
1529 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1530 case CPP_PLUS: return c == '+';
1531 case CPP_MINUS: return c == '-' || c == '>';
1532 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1533 case CPP_MOD: return c == ':' || c == '>';
1534 case CPP_AND: return c == '&';
1535 case CPP_OR: return c == '|';
1536 case CPP_COLON: return c == ':' || c == '>';
1537 case CPP_DEREF: return c == '*';
1538 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1539 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1540 case CPP_NAME: return ((b == CPP_NUMBER
1541 && name_p (pfile, &token2->val.str))
1542 || b == CPP_NAME
1543 || b == CPP_CHAR || b == CPP_STRING); /* L */
1544 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1545 || c == '.' || c == '+' || c == '-');
1546 /* UCNs */
1547 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1548 && b == CPP_NAME)
1549 || (CPP_OPTION (pfile, objc)
1550 && token1->val.str.text[0] == '@'
1551 && (b == CPP_NAME || b == CPP_STRING)));
1552 default: break;
1553 }
1554
1555 return 0;
1556 }
1557
1558 /* Output all the remaining tokens on the current line, and a newline
1559 character, to FP. Leading whitespace is removed. If there are
1560 macros, special token padding is not performed. */
1561 void
cpp_output_line(cpp_reader * pfile,FILE * fp)1562 cpp_output_line (cpp_reader *pfile, FILE *fp)
1563 {
1564 const cpp_token *token;
1565
1566 token = cpp_get_token (pfile);
1567 while (token->type != CPP_EOF)
1568 {
1569 cpp_output_token (token, fp);
1570 token = cpp_get_token (pfile);
1571 if (token->flags & PREV_WHITE)
1572 putc (' ', fp);
1573 }
1574
1575 putc ('\n', fp);
1576 }
1577
1578 /* Memory buffers. Changing these three constants can have a dramatic
1579 effect on performance. The values here are reasonable defaults,
1580 but might be tuned. If you adjust them, be sure to test across a
1581 range of uses of cpplib, including heavy nested function-like macro
1582 expansion. Also check the change in peak memory usage (NJAMD is a
1583 good tool for this). */
1584 #define MIN_BUFF_SIZE 8000
1585 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1586 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1587 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1588
1589 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1590 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1591 #endif
1592
1593 /* Create a new allocation buffer. Place the control block at the end
1594 of the buffer, so that buffer overflows will cause immediate chaos. */
1595 static _cpp_buff *
new_buff(size_t len)1596 new_buff (size_t len)
1597 {
1598 _cpp_buff *result;
1599 unsigned char *base;
1600
1601 if (len < MIN_BUFF_SIZE)
1602 len = MIN_BUFF_SIZE;
1603 len = CPP_ALIGN (len);
1604
1605 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1606 result = (_cpp_buff *) (base + len);
1607 result->base = base;
1608 result->cur = base;
1609 result->limit = base + len;
1610 result->next = NULL;
1611 return result;
1612 }
1613
1614 /* Place a chain of unwanted allocation buffers on the free list. */
1615 void
_cpp_release_buff(cpp_reader * pfile,_cpp_buff * buff)1616 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1617 {
1618 _cpp_buff *end = buff;
1619
1620 while (end->next)
1621 end = end->next;
1622 end->next = pfile->free_buffs;
1623 pfile->free_buffs = buff;
1624 }
1625
1626 /* Return a free buffer of size at least MIN_SIZE. */
1627 _cpp_buff *
_cpp_get_buff(cpp_reader * pfile,size_t min_size)1628 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1629 {
1630 _cpp_buff *result, **p;
1631
1632 for (p = &pfile->free_buffs;; p = &(*p)->next)
1633 {
1634 size_t size;
1635
1636 if (*p == NULL)
1637 return new_buff (min_size);
1638 result = *p;
1639 size = result->limit - result->base;
1640 /* Return a buffer that's big enough, but don't waste one that's
1641 way too big. */
1642 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1643 break;
1644 }
1645
1646 *p = result->next;
1647 result->next = NULL;
1648 result->cur = result->base;
1649 return result;
1650 }
1651
1652 /* Creates a new buffer with enough space to hold the uncommitted
1653 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1654 the excess bytes to the new buffer. Chains the new buffer after
1655 BUFF, and returns the new buffer. */
1656 _cpp_buff *
_cpp_append_extend_buff(cpp_reader * pfile,_cpp_buff * buff,size_t min_extra)1657 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1658 {
1659 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1660 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1661
1662 buff->next = new_buff;
1663 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1664 return new_buff;
1665 }
1666
1667 /* Creates a new buffer with enough space to hold the uncommitted
1668 remaining bytes of the buffer pointed to by BUFF, and at least
1669 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1670 Chains the new buffer before the buffer pointed to by BUFF, and
1671 updates the pointer to point to the new buffer. */
1672 void
_cpp_extend_buff(cpp_reader * pfile,_cpp_buff ** pbuff,size_t min_extra)1673 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1674 {
1675 _cpp_buff *new_buff, *old_buff = *pbuff;
1676 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1677
1678 new_buff = _cpp_get_buff (pfile, size);
1679 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1680 new_buff->next = old_buff;
1681 *pbuff = new_buff;
1682 }
1683
1684 /* Free a chain of buffers starting at BUFF. */
1685 void
_cpp_free_buff(_cpp_buff * buff)1686 _cpp_free_buff (_cpp_buff *buff)
1687 {
1688 _cpp_buff *next;
1689
1690 for (; buff; buff = next)
1691 {
1692 next = buff->next;
1693 free (buff->base);
1694 }
1695 }
1696
1697 /* Allocate permanent, unaligned storage of length LEN. */
1698 unsigned char *
_cpp_unaligned_alloc(cpp_reader * pfile,size_t len)1699 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1700 {
1701 _cpp_buff *buff = pfile->u_buff;
1702 unsigned char *result = buff->cur;
1703
1704 if (len > (size_t) (buff->limit - result))
1705 {
1706 buff = _cpp_get_buff (pfile, len);
1707 buff->next = pfile->u_buff;
1708 pfile->u_buff = buff;
1709 result = buff->cur;
1710 }
1711
1712 buff->cur = result + len;
1713 return result;
1714 }
1715
1716 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1717 That buffer is used for growing allocations when saving macro
1718 replacement lists in a #define, and when parsing an answer to an
1719 assertion in #assert, #unassert or #if (and therefore possibly
1720 whilst expanding macros). It therefore must not be used by any
1721 code that they might call: specifically the lexer and the guts of
1722 the macro expander.
1723
1724 All existing other uses clearly fit this restriction: storing
1725 registered pragmas during initialization. */
1726 unsigned char *
_cpp_aligned_alloc(cpp_reader * pfile,size_t len)1727 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1728 {
1729 _cpp_buff *buff = pfile->a_buff;
1730 unsigned char *result = buff->cur;
1731
1732 if (len > (size_t) (buff->limit - result))
1733 {
1734 buff = _cpp_get_buff (pfile, len);
1735 buff->next = pfile->a_buff;
1736 pfile->a_buff = buff;
1737 result = buff->cur;
1738 }
1739
1740 buff->cur = result + len;
1741 return result;
1742 }
1743
1744 /* Say which field of TOK is in use. */
1745
1746 enum cpp_token_fld_kind
cpp_token_val_index(cpp_token * tok)1747 cpp_token_val_index (cpp_token *tok)
1748 {
1749 switch (TOKEN_SPELL (tok))
1750 {
1751 case SPELL_IDENT:
1752 return CPP_TOKEN_FLD_NODE;
1753 case SPELL_LITERAL:
1754 return CPP_TOKEN_FLD_STR;
1755 case SPELL_NONE:
1756 if (tok->type == CPP_MACRO_ARG)
1757 return CPP_TOKEN_FLD_ARG_NO;
1758 else if (tok->type == CPP_PADDING)
1759 return CPP_TOKEN_FLD_SOURCE;
1760 else if (tok->type == CPP_PRAGMA)
1761 return CPP_TOKEN_FLD_STR;
1762 /* else fall through */
1763 default:
1764 return CPP_TOKEN_FLD_NONE;
1765 }
1766 }
1767
1768 /* Emits error for unterminated strings. */
1769 void
cpp_unterminated(cpp_reader * pfile,int term)1770 cpp_unterminated (cpp_reader *pfile, int term)
1771 {
1772 cpp_error (pfile, CPP_DL_ERROR, "missing terminating %c character", term);
1773
1774 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line_table->highest_line)
1775 {
1776 cpp_error_with_line (pfile, CPP_DL_ERROR, pfile->mls_line, pfile->mls_col,
1777 "possible start of unterminated string literal");
1778 pfile->mls_line = 0;
1779 }
1780 }
1781
1782