1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
23
24 typedef struct saved_token_t {
25 token_t token;
26 bool had_whitespace;
27 } saved_token_t;
28
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
31 bool had_whitespace;
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
34 bool at_line_begin;
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38
39 struct pp_definition_t {
40 symbol_t *symbol;
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
43 size_t expand_pos;
44 whitespace_info_t expand_info;
45 bool is_variadic : 1;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
50 size_t n_parameters;
51 pp_definition_t *parameters;
52
53 /* replacement */
54 size_t list_len;
55 saved_token_t *token_list;
56 };
57
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
61 bool condition;
62 bool in_else;
63 /** conditional in skip mode (then+else gets skipped) */
64 bool skip;
65 pp_conditional_t *parent;
66 };
67
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70 FILE *file;
71 input_t *input;
72 utf32 c;
73 utf32 buf[1024+MAX_PUTBACK];
74 const utf32 *bufend;
75 const utf32 *bufpos;
76 source_position_t position;
77 pp_input_t *parent;
78 unsigned output_line;
79 searchpath_entry_t *path;
80 };
81
82 struct searchpath_entry_t {
83 const char *path;
84 searchpath_entry_t *next;
85 bool is_system_path;
86 };
87
88 static pp_input_t input;
89
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
93
94 static pp_conditional_t *conditional_stack;
95
96 token_t pp_token;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
101 static FILE *out;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
113
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
117 bool is_system_path;
118 };
119
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
124
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
127
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
131
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
138
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
143
init_symbols(void)144 static void init_symbols(void)
145 {
146 symbol_colongreater = symbol_table_insert(":>");
147 symbol_lesscolon = symbol_table_insert("<:");
148 symbol_lesspercent = symbol_table_insert("<%");
149 symbol_percentcolon = symbol_table_insert("%:");
150 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151 symbol_percentgreater = symbol_table_insert("%>");
152
153 symbol_L = symbol_table_insert("L");
154 symbol_U = symbol_table_insert("U");
155 symbol_u = symbol_table_insert("u");
156 symbol_u8 = symbol_table_insert("u8");
157 }
158
switch_pp_input(FILE * const file,char const * const filename,searchpath_entry_t * const path,bool const is_system_header)159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
160 {
161 input.file = file;
162 input.input = input_from_stream(file, NULL);
163 input.bufend = NULL;
164 input.bufpos = NULL;
165 input.output_line = 0;
166 input.position.input_name = filename;
167 input.position.lineno = 1;
168 input.position.is_system_header = is_system_header;
169 input.path = path;
170
171 /* indicate that we're at a new input */
172 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
173
174 /* place a virtual '\n' so we realize we're at line begin */
175 input.position.lineno = 0;
176 input.c = '\n';
177 }
178
close_pp_input(void)179 FILE *close_pp_input(void)
180 {
181 input_free(input.input);
182
183 FILE* const file = input.file;
184 assert(file);
185
186 input.input = NULL;
187 input.file = NULL;
188 input.bufend = NULL;
189 input.bufpos = NULL;
190 input.c = EOF;
191
192 return file;
193 }
194
push_input(void)195 static void push_input(void)
196 {
197 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
198
199 /* adjust buffer positions */
200 if (input.bufpos != NULL)
201 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202 if (input.bufend != NULL)
203 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
204
205 saved_input->parent = input_stack;
206 input_stack = saved_input;
207 ++n_inputs;
208 }
209
pop_restore_input(void)210 static void pop_restore_input(void)
211 {
212 assert(n_inputs > 0);
213 assert(input_stack != NULL);
214
215 pp_input_t *saved_input = input_stack;
216
217 memcpy(&input, saved_input, sizeof(input));
218 input.parent = NULL;
219
220 /* adjust buffer positions */
221 if (saved_input->bufpos != NULL)
222 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223 if (saved_input->bufend != NULL)
224 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
225
226 input_stack = saved_input->parent;
227 obstack_free(&input_obstack, saved_input);
228 --n_inputs;
229 }
230
231 /**
232 * Prints a parse error message at the current token.
233 *
234 * @param msg the error message
235 */
parse_error(const char * msg)236 static void parse_error(const char *msg)
237 {
238 errorf(&pp_token.base.source_position, "%s", msg);
239 }
240
next_real_char(void)241 static inline void next_real_char(void)
242 {
243 assert(input.bufpos <= input.bufend);
244 if (input.bufpos >= input.bufend) {
245 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
246 if (n == 0) {
247 input.c = EOF;
248 return;
249 }
250 input.bufpos = input.buf + MAX_PUTBACK;
251 input.bufend = input.bufpos + n;
252 }
253 input.c = *input.bufpos++;
254 ++input.position.colno;
255 }
256
257 /**
258 * Put a character back into the buffer.
259 *
260 * @param pc the character to put back
261 */
put_back(utf32 const pc)262 static inline void put_back(utf32 const pc)
263 {
264 assert(input.bufpos > input.buf);
265 *(--input.bufpos - input.buf + input.buf) = (char) pc;
266 --input.position.colno;
267 }
268
269 #define NEWLINE \
270 '\r': \
271 next_char(); \
272 if (input.c == '\n') { \
273 case '\n': \
274 next_char(); \
275 } \
276 ++input.position.lineno; \
277 input.position.colno = 1; \
278 goto newline; \
279 newline // Let it look like an ordinary case label.
280
281 #define eat(c_type) (assert(input.c == c_type), next_char())
282
maybe_concat_lines(void)283 static void maybe_concat_lines(void)
284 {
285 eat('\\');
286
287 switch (input.c) {
288 case NEWLINE:
289 info.whitespace_at_line_begin = 0;
290 return;
291
292 default:
293 break;
294 }
295
296 put_back(input.c);
297 input.c = '\\';
298 }
299
300 /**
301 * Set c to the next input character, ie.
302 * after expanding trigraphs.
303 */
next_char(void)304 static inline void next_char(void)
305 {
306 next_real_char();
307
308 /* filter trigraphs and concatenated lines */
309 if (UNLIKELY(input.c == '\\')) {
310 maybe_concat_lines();
311 goto end_of_next_char;
312 }
313
314 if (LIKELY(input.c != '?'))
315 goto end_of_next_char;
316
317 next_real_char();
318 if (LIKELY(input.c != '?')) {
319 put_back(input.c);
320 input.c = '?';
321 goto end_of_next_char;
322 }
323
324 next_real_char();
325 switch (input.c) {
326 case '=': input.c = '#'; break;
327 case '(': input.c = '['; break;
328 case '/': input.c = '\\'; maybe_concat_lines(); break;
329 case ')': input.c = ']'; break;
330 case '\'': input.c = '^'; break;
331 case '<': input.c = '{'; break;
332 case '!': input.c = '|'; break;
333 case '>': input.c = '}'; break;
334 case '-': input.c = '~'; break;
335 default:
336 put_back(input.c);
337 put_back('?');
338 input.c = '?';
339 break;
340 }
341
342 end_of_next_char:;
343 #ifdef DEBUG_CHARS
344 printf("nchar '%c'\n", input.c);
345 #endif
346 }
347
348
349
350 /**
351 * Returns true if the given char is a octal digit.
352 *
353 * @param char the character to check
354 */
is_octal_digit(int chr)355 static inline bool is_octal_digit(int chr)
356 {
357 switch (chr) {
358 case '0':
359 case '1':
360 case '2':
361 case '3':
362 case '4':
363 case '5':
364 case '6':
365 case '7':
366 return true;
367 default:
368 return false;
369 }
370 }
371
372 /**
373 * Returns the value of a digit.
374 * The only portable way to do it ...
375 */
digit_value(int digit)376 static int digit_value(int digit)
377 {
378 switch (digit) {
379 case '0': return 0;
380 case '1': return 1;
381 case '2': return 2;
382 case '3': return 3;
383 case '4': return 4;
384 case '5': return 5;
385 case '6': return 6;
386 case '7': return 7;
387 case '8': return 8;
388 case '9': return 9;
389 case 'a':
390 case 'A': return 10;
391 case 'b':
392 case 'B': return 11;
393 case 'c':
394 case 'C': return 12;
395 case 'd':
396 case 'D': return 13;
397 case 'e':
398 case 'E': return 14;
399 case 'f':
400 case 'F': return 15;
401 default:
402 panic("wrong character given");
403 }
404 }
405
406 /**
407 * Parses an octal character sequence.
408 *
409 * @param first_digit the already read first digit
410 */
parse_octal_sequence(const utf32 first_digit)411 static utf32 parse_octal_sequence(const utf32 first_digit)
412 {
413 assert(is_octal_digit(first_digit));
414 utf32 value = digit_value(first_digit);
415 if (!is_octal_digit(input.c)) return value;
416 value = 8 * value + digit_value(input.c);
417 next_char();
418 if (!is_octal_digit(input.c)) return value;
419 value = 8 * value + digit_value(input.c);
420 next_char();
421 return value;
422
423 }
424
425 /**
426 * Parses a hex character sequence.
427 */
parse_hex_sequence(void)428 static utf32 parse_hex_sequence(void)
429 {
430 utf32 value = 0;
431 while (isxdigit(input.c)) {
432 value = 16 * value + digit_value(input.c);
433 next_char();
434 }
435 return value;
436 }
437
is_universal_char_valid(utf32 const v)438 static bool is_universal_char_valid(utf32 const v)
439 {
440 /* C11 §6.4.3:2 */
441 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
442 return false;
443 if (0xD800 <= v && v <= 0xDFFF)
444 return false;
445 return true;
446 }
447
parse_universal_char(unsigned const n_digits)448 static utf32 parse_universal_char(unsigned const n_digits)
449 {
450 utf32 v = 0;
451 for (unsigned k = n_digits; k != 0; --k) {
452 if (isxdigit(input.c)) {
453 v = 16 * v + digit_value(input.c);
454 if (!resolve_escape_sequences)
455 obstack_1grow(&symbol_obstack, input.c);
456 next_char();
457 } else {
458 errorf(&input.position,
459 "short universal character name, expected %u more digits",
460 k);
461 break;
462 }
463 }
464 if (!is_universal_char_valid(v)) {
465 errorf(&input.position,
466 "\\%c%0*X is not a valid universal character name",
467 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
468 }
469 return v;
470 }
471
is_universal_char_valid_identifier_c99(utf32 const v)472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
473 {
474 static const utf32 single_chars[] = {
475 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
481 };
482
483 static const utf32 ranges[][2] = {
484 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
534 {0x3021, 0x3029},
535 };
536 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537 if (ranges[i][0] <= v && v <= ranges[i][1])
538 return true;
539 }
540 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541 if (v == single_chars[i])
542 return true;
543 }
544 return false;
545 }
546
is_universal_char_valid_identifier_c11(utf32 const v)547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
548 {
549 /* C11 Annex D.1 */
550 if ( v == 0x000A8) return true;
551 if ( v == 0x000AA) return true;
552 if ( v == 0x000AD) return true;
553 if ( v == 0x000AF) return true;
554 if (0x000B2 <= v && v <= 0x000B5) return true;
555 if (0x000B7 <= v && v <= 0x000BA) return true;
556 if (0x000BC <= v && v <= 0x000BE) return true;
557 if (0x000C0 <= v && v <= 0x000D6) return true;
558 if (0x000D8 <= v && v <= 0x000F6) return true;
559 if (0x000F8 <= v && v <= 0x000FF) return true;
560 if (0x00100 <= v && v <= 0x0167F) return true;
561 if (0x01681 <= v && v <= 0x0180D) return true;
562 if (0x0180F <= v && v <= 0x01FFF) return true;
563 if (0x0200B <= v && v <= 0x0200D) return true;
564 if (0x0202A <= v && v <= 0x0202E) return true;
565 if (0x0203F <= v && v <= 0x02040) return true;
566 if ( v == 0x02054) return true;
567 if (0x02060 <= v && v <= 0x0206F) return true;
568 if (0x02070 <= v && v <= 0x0218F) return true;
569 if (0x02460 <= v && v <= 0x024FF) return true;
570 if (0x02776 <= v && v <= 0x02793) return true;
571 if (0x02C00 <= v && v <= 0x02DFF) return true;
572 if (0x02E80 <= v && v <= 0x02FFF) return true;
573 if (0x03004 <= v && v <= 0x03007) return true;
574 if (0x03021 <= v && v <= 0x0302F) return true;
575 if (0x03031 <= v && v <= 0x0303F) return true;
576 if (0x03040 <= v && v <= 0x0D7FF) return true;
577 if (0x0F900 <= v && v <= 0x0FD3D) return true;
578 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581 if (0x10000 <= v && v <= 0x1FFFD) return true;
582 if (0x20000 <= v && v <= 0x2FFFD) return true;
583 if (0x30000 <= v && v <= 0x3FFFD) return true;
584 if (0x40000 <= v && v <= 0x4FFFD) return true;
585 if (0x50000 <= v && v <= 0x5FFFD) return true;
586 if (0x60000 <= v && v <= 0x6FFFD) return true;
587 if (0x70000 <= v && v <= 0x7FFFD) return true;
588 if (0x80000 <= v && v <= 0x8FFFD) return true;
589 if (0x90000 <= v && v <= 0x9FFFD) return true;
590 if (0xA0000 <= v && v <= 0xAFFFD) return true;
591 if (0xB0000 <= v && v <= 0xBFFFD) return true;
592 if (0xC0000 <= v && v <= 0xCFFFD) return true;
593 if (0xD0000 <= v && v <= 0xDFFFD) return true;
594 if (0xE0000 <= v && v <= 0xEFFFD) return true;
595 return false;
596 }
597
is_universal_char_valid_identifier(utf32 const v)598 static bool is_universal_char_valid_identifier(utf32 const v)
599 {
600 if (c_mode & _C11)
601 return is_universal_char_valid_identifier_c11(v);
602 return is_universal_char_valid_identifier_c99(v);
603 }
604
is_universal_char_invalid_identifier_start(utf32 const v)605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
606 {
607 if (! (c_mode & _C11))
608 return false;
609
610 /* C11 Annex D.2 */
611 if (0x0300 <= v && v <= 0x036F) return true;
612 if (0x1DC0 <= v && v <= 0x1DFF) return true;
613 if (0x20D0 <= v && v <= 0x20FF) return true;
614 if (0xFE20 <= v && v <= 0xFE2F) return true;
615 return false;
616 }
617
618 /**
619 * Parse an escape sequence.
620 */
parse_escape_sequence(void)621 static utf32 parse_escape_sequence(void)
622 {
623 eat('\\');
624
625 utf32 const ec = input.c;
626 next_char();
627
628 switch (ec) {
629 case '"': return '"';
630 case '\'': return '\'';
631 case '\\': return '\\';
632 case '?': return '\?';
633 case 'a': return '\a';
634 case 'b': return '\b';
635 case 'f': return '\f';
636 case 'n': return '\n';
637 case 'r': return '\r';
638 case 't': return '\t';
639 case 'v': return '\v';
640 case 'x':
641 return parse_hex_sequence();
642 case '0':
643 case '1':
644 case '2':
645 case '3':
646 case '4':
647 case '5':
648 case '6':
649 case '7':
650 return parse_octal_sequence(ec);
651 case EOF:
652 parse_error("reached end of file while parsing escape sequence");
653 return EOF;
654 /* \E is not documented, but handled, by GCC. It is acceptable according
655 * to §6.11.4, whereas \e is not. */
656 case 'E':
657 case 'e':
658 if (c_mode & _GNUC)
659 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
660 break;
661
662 case 'U': return parse_universal_char(8);
663 case 'u': return parse_universal_char(4);
664
665 default:
666 break;
667 }
668 /* §6.4.4.4:8 footnote 64 */
669 parse_error("unknown escape sequence");
670 return EOF;
671 }
672
identify_string(char * string)673 static const char *identify_string(char *string)
674 {
675 const char *result = strset_insert(&stringset, string);
676 if (result != string) {
677 obstack_free(&symbol_obstack, string);
678 }
679 return result;
680 }
681
sym_make_string(string_encoding_t const enc)682 static string_t sym_make_string(string_encoding_t const enc)
683 {
684 obstack_1grow(&symbol_obstack, '\0');
685 size_t const len = obstack_object_size(&symbol_obstack) - 1;
686 char *const string = obstack_finish(&symbol_obstack);
687 char const *const result = identify_string(string);
688 return (string_t){ result, len, enc };
689 }
690
make_string(char const * const string)691 string_t make_string(char const *const string)
692 {
693 obstack_grow(&symbol_obstack, string, strlen(string));
694 return sym_make_string(STRING_ENCODING_CHAR);
695 }
696
get_string_encoding_limit(string_encoding_t const enc)697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
698 {
699 switch (enc) {
700 case STRING_ENCODING_CHAR: return 0xFF;
701 case STRING_ENCODING_CHAR16: return 0xFFFF;
702 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
704 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
705 }
706 panic("invalid string encoding");
707 }
708
parse_string(utf32 const delimiter,token_kind_t const kind,string_encoding_t const enc,char const * const context)709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710 string_encoding_t const enc,
711 char const *const context)
712 {
713 const unsigned start_linenr = input.position.lineno;
714
715 eat(delimiter);
716
717 utf32 const limit = get_string_encoding_limit(enc);
718 while (true) {
719 switch (input.c) {
720 case '\\': {
721 if (resolve_escape_sequences) {
722 utf32 const tc = parse_escape_sequence();
723 if (tc > limit) {
724 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
725 }
726 if (enc == STRING_ENCODING_CHAR) {
727 obstack_1grow(&symbol_obstack, tc);
728 } else {
729 obstack_grow_utf8(&symbol_obstack, tc);
730 }
731 } else {
732 obstack_1grow(&symbol_obstack, (char)input.c);
733 next_char();
734 obstack_1grow(&symbol_obstack, (char)input.c);
735 next_char();
736 }
737 break;
738 }
739
740 case NEWLINE:
741 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
742 break;
743
744 case EOF: {
745 source_position_t source_position;
746 source_position.input_name = pp_token.base.source_position.input_name;
747 source_position.lineno = start_linenr;
748 errorf(&source_position, "EOF while parsing %s", context);
749 goto end_of_string;
750 }
751
752 default:
753 if (input.c == delimiter) {
754 next_char();
755 goto end_of_string;
756 } else {
757 obstack_grow_utf8(&symbol_obstack, input.c);
758 next_char();
759 break;
760 }
761 }
762 }
763
764 end_of_string:
765 pp_token.kind = kind;
766 pp_token.literal.string = sym_make_string(enc);
767 }
768
parse_string_literal(string_encoding_t const enc)769 static void parse_string_literal(string_encoding_t const enc)
770 {
771 parse_string('"', T_STRING_LITERAL, enc, "string literal");
772 }
773
parse_character_constant(string_encoding_t const enc)774 static void parse_character_constant(string_encoding_t const enc)
775 {
776 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
777 if (pp_token.literal.string.size == 0) {
778 parse_error("empty character constant");
779 }
780 }
781
782 #define SYMBOL_CASES_WITHOUT_E_P \
783 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
784 case 'a': \
785 case 'b': \
786 case 'c': \
787 case 'd': \
788 case 'f': \
789 case 'g': \
790 case 'h': \
791 case 'i': \
792 case 'j': \
793 case 'k': \
794 case 'l': \
795 case 'm': \
796 case 'n': \
797 case 'o': \
798 case 'q': \
799 case 'r': \
800 case 's': \
801 case 't': \
802 case 'u': \
803 case 'v': \
804 case 'w': \
805 case 'x': \
806 case 'y': \
807 case 'z': \
808 case 'A': \
809 case 'B': \
810 case 'C': \
811 case 'D': \
812 case 'F': \
813 case 'G': \
814 case 'H': \
815 case 'I': \
816 case 'J': \
817 case 'K': \
818 case 'L': \
819 case 'M': \
820 case 'N': \
821 case 'O': \
822 case 'Q': \
823 case 'R': \
824 case 'S': \
825 case 'T': \
826 case 'U': \
827 case 'V': \
828 case 'W': \
829 case 'X': \
830 case 'Y': \
831 case 'Z': \
832 case '_'
833
834 #define SYMBOL_CASES \
835 SYMBOL_CASES_WITHOUT_E_P: \
836 case 'e': \
837 case 'p': \
838 case 'E': \
839 case 'P'
840
841 #define DIGIT_CASES \
842 '0': \
843 case '1': \
844 case '2': \
845 case '3': \
846 case '4': \
847 case '5': \
848 case '6': \
849 case '7': \
850 case '8': \
851 case '9'
852
start_expanding(pp_definition_t * definition)853 static void start_expanding(pp_definition_t *definition)
854 {
855 definition->parent_expansion = current_expansion;
856 definition->expand_pos = 0;
857 definition->is_expanding = true;
858 if (definition->list_len > 0) {
859 definition->token_list[0].had_whitespace
860 = info.had_whitespace;
861 }
862 current_expansion = definition;
863 }
864
finished_expanding(pp_definition_t * definition)865 static void finished_expanding(pp_definition_t *definition)
866 {
867 assert(definition->is_expanding);
868 pp_definition_t *parent = definition->parent_expansion;
869 definition->parent_expansion = NULL;
870 definition->is_expanding = false;
871
872 /* stop further expanding once we expanded a parameter used in a
873 * sub macro-call */
874 if (definition == argument_expanding)
875 argument_expanding = NULL;
876
877 assert(current_expansion == definition);
878 current_expansion = parent;
879 }
880
grow_string_escaped(struct obstack * obst,const string_t * string,char const * delimiter)881 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
882 {
883 char const *prefix = get_string_encoding_prefix(string->encoding);
884 obstack_printf(obst, "%s%s", prefix, delimiter);
885 size_t size = string->size;
886 const char *str = string->begin;
887 if (resolve_escape_sequences) {
888 obstack_grow(obst, str, size);
889 } else {
890 for (size_t i = 0; i < size; ++i) {
891 const char c = str[i];
892 if (c == '\\' || c == '"')
893 obstack_1grow(obst, '\\');
894 obstack_1grow(obst, c);
895 }
896 }
897 obstack_printf(obst, "%s", delimiter);
898 }
899
grow_token(struct obstack * obst,const token_t * token)900 static void grow_token(struct obstack *obst, const token_t *token)
901 {
902 switch (token->kind) {
903 case T_NUMBER:
904 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
905 break;
906
907 case T_STRING_LITERAL: {
908 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
909 grow_string_escaped(obst, &token->literal.string, delimiter);
910 break;
911 }
912
913 case T_CHARACTER_CONSTANT:
914 grow_string_escaped(obst, &token->literal.string, "'");
915 break;
916
917 case T_IDENTIFIER:
918 default: {
919 const char *str = token->base.symbol->string;
920 size_t len = strlen(str);
921 obstack_grow(obst, str, len);
922 break;
923 }
924 }
925 }
926
stringify(const pp_definition_t * definition)927 static void stringify(const pp_definition_t *definition)
928 {
929 assert(obstack_object_size(&symbol_obstack) == 0);
930
931 size_t list_len = definition->list_len;
932 for (size_t p = 0; p < list_len; ++p) {
933 const saved_token_t *saved = &definition->token_list[p];
934 if (p > 0 && saved->had_whitespace)
935 obstack_1grow(&symbol_obstack, ' ');
936 grow_token(&symbol_obstack, &saved->token);
937 }
938 pp_token.kind = T_STRING_LITERAL;
939 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
940 }
941
set_punctuator(token_kind_t const kind)942 static inline void set_punctuator(token_kind_t const kind)
943 {
944 pp_token.kind = kind;
945 pp_token.base.symbol = token_symbols[kind];
946 }
947
set_digraph(token_kind_t const kind,symbol_t * const symbol)948 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
949 {
950 pp_token.kind = kind;
951 pp_token.base.symbol = symbol;
952 }
953
954 /**
955 * returns next final token from a preprocessor macro expansion
956 */
expand_next(void)957 static bool expand_next(void)
958 {
959 if (current_expansion == NULL)
960 return false;
961
962 restart:;
963 size_t pos = current_expansion->expand_pos;
964 if (pos >= current_expansion->list_len) {
965 finished_expanding(current_expansion);
966 /* it was the outermost expansion, parse pptoken normally */
967 if (current_expansion == NULL) {
968 return false;
969 }
970 goto restart;
971 }
972 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
973 pp_token = saved->token;
974 if (pp_token.kind == '#') {
975 if (pos < current_expansion->list_len) {
976 const saved_token_t *next = ¤t_expansion->token_list[pos];
977 if (next->token.kind == T_MACRO_PARAMETER) {
978 pp_definition_t *def = next->token.macro_parameter.def;
979 assert(def != NULL && def->is_parameter);
980 stringify(def);
981 ++pos;
982 }
983 }
984 }
985
986 if (current_expansion->expand_pos > 0)
987 info.had_whitespace = saved->had_whitespace;
988 current_expansion->expand_pos = pos;
989 pp_token.base.source_position = expansion_pos;
990
991 return true;
992 }
993
994 /**
995 * Returns the next token kind found when continuing the current expansions
996 * without starting new sub-expansions.
997 */
peek_expansion(void)998 static token_kind_t peek_expansion(void)
999 {
1000 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1001 if (e->expand_pos < e->list_len)
1002 return e->token_list[e->expand_pos].token.kind;
1003 }
1004 return T_EOF;
1005 }
1006
skip_line_comment(void)1007 static void skip_line_comment(void)
1008 {
1009 info.had_whitespace = true;
1010 while (true) {
1011 switch (input.c) {
1012 case EOF:
1013 return;
1014
1015 case '\r':
1016 case '\n':
1017 return;
1018
1019 default:
1020 next_char();
1021 break;
1022 }
1023 }
1024 }
1025
skip_multiline_comment(void)1026 static void skip_multiline_comment(void)
1027 {
1028 info.had_whitespace = true;
1029
1030 unsigned start_linenr = input.position.lineno;
1031 while (true) {
1032 switch (input.c) {
1033 case '/':
1034 next_char();
1035 if (input.c == '*') {
1036 /* TODO: nested comment, warn here */
1037 }
1038 break;
1039 case '*':
1040 next_char();
1041 if (input.c == '/') {
1042 if (input.position.lineno != input.output_line)
1043 info.whitespace_at_line_begin = input.position.colno;
1044 next_char();
1045 return;
1046 }
1047 break;
1048
1049 case NEWLINE:
1050 break;
1051
1052 case EOF: {
1053 source_position_t source_position;
1054 source_position.input_name = pp_token.base.source_position.input_name;
1055 source_position.lineno = start_linenr;
1056 errorf(&source_position, "at end of file while looking for comment end");
1057 return;
1058 }
1059
1060 default:
1061 next_char();
1062 break;
1063 }
1064 }
1065 }
1066
skip_till_newline(bool stop_at_non_whitespace)1067 static bool skip_till_newline(bool stop_at_non_whitespace)
1068 {
1069 bool res = false;
1070 while (true) {
1071 switch (input.c) {
1072 case ' ':
1073 case '\t':
1074 next_char();
1075 continue;
1076
1077 case '/':
1078 next_char();
1079 if (input.c == '/') {
1080 next_char();
1081 skip_line_comment();
1082 continue;
1083 } else if (input.c == '*') {
1084 next_char();
1085 skip_multiline_comment();
1086 continue;
1087 } else {
1088 put_back(input.c);
1089 input.c = '/';
1090 }
1091 return true;
1092
1093 case NEWLINE:
1094 return res;
1095
1096 default:
1097 if (stop_at_non_whitespace)
1098 return false;
1099 res = true;
1100 next_char();
1101 continue;
1102 }
1103 }
1104 }
1105
skip_whitespace(void)1106 static void skip_whitespace(void)
1107 {
1108 while (true) {
1109 switch (input.c) {
1110 case ' ':
1111 case '\t':
1112 ++info.whitespace_at_line_begin;
1113 info.had_whitespace = true;
1114 next_char();
1115 continue;
1116
1117 case NEWLINE:
1118 info.at_line_begin = true;
1119 info.had_whitespace = true;
1120 info.whitespace_at_line_begin = 0;
1121 continue;
1122
1123 case '/':
1124 next_char();
1125 if (input.c == '/') {
1126 next_char();
1127 skip_line_comment();
1128 continue;
1129 } else if (input.c == '*') {
1130 next_char();
1131 skip_multiline_comment();
1132 continue;
1133 } else {
1134 put_back(input.c);
1135 input.c = '/';
1136 }
1137 return;
1138
1139 default:
1140 return;
1141 }
1142 }
1143 }
1144
eat_pp(pp_token_kind_t const kind)1145 static inline void eat_pp(pp_token_kind_t const kind)
1146 {
1147 assert(pp_token.base.symbol->pp_ID == kind);
1148 (void) kind;
1149 next_input_token();
1150 }
1151
eat_token(token_kind_t const kind)1152 static inline void eat_token(token_kind_t const kind)
1153 {
1154 assert(pp_token.kind == kind);
1155 (void)kind;
1156 next_input_token();
1157 }
1158
identify_encoding_prefix(symbol_t * const sym)1159 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1160 {
1161 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1162 if (c_mode & _C11) {
1163 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1164 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1165 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1166 }
1167 return STRING_ENCODING_CHAR;
1168 }
1169
parse_symbol(void)1170 static void parse_symbol(void)
1171 {
1172 assert(obstack_object_size(&symbol_obstack) == 0);
1173 while (true) {
1174 switch (input.c) {
1175 case DIGIT_CASES:
1176 case SYMBOL_CASES:
1177 obstack_1grow(&symbol_obstack, (char) input.c);
1178 next_char();
1179 break;
1180
1181 case '\\':
1182 next_char();
1183 switch (input.c) {
1184 {
1185 unsigned n;
1186 case 'U': n = 8; goto universal;
1187 case 'u': n = 4; goto universal;
1188 universal:
1189 if (!resolve_escape_sequences) {
1190 obstack_1grow(&symbol_obstack, '\\');
1191 obstack_1grow(&symbol_obstack, input.c);
1192 }
1193 next_char();
1194 utf32 const v = parse_universal_char(n);
1195 if (!is_universal_char_valid_identifier(v)) {
1196 if (is_universal_char_valid(v)) {
1197 errorf(&input.position,
1198 "universal character \\%c%0*X is not valid in an identifier",
1199 n == 4 ? 'u' : 'U', (int)n, v);
1200 }
1201 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1202 errorf(&input.position,
1203 "universal character \\%c%0*X is not valid as start of an identifier",
1204 n == 4 ? 'u' : 'U', (int)n, v);
1205 } else if (resolve_escape_sequences) {
1206 obstack_grow_utf8(&symbol_obstack, v);
1207 }
1208 break;
1209 }
1210
1211 default:
1212 put_back(input.c);
1213 input.c = '\\';
1214 goto end_symbol;
1215 }
1216
1217 default:
1218 dollar_sign:
1219 goto end_symbol;
1220 }
1221 }
1222
1223 end_symbol:
1224 obstack_1grow(&symbol_obstack, '\0');
1225 char *string = obstack_finish(&symbol_obstack);
1226
1227 symbol_t *symbol = symbol_table_insert(string);
1228
1229 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1230 if (input.c == '"') {
1231 string_encoding_t const enc = identify_encoding_prefix(symbol);
1232 if (enc != STRING_ENCODING_CHAR) {
1233 parse_string_literal(enc);
1234 return;
1235 }
1236 } else if (input.c == '\'') {
1237 string_encoding_t const enc = identify_encoding_prefix(symbol);
1238 if (enc != STRING_ENCODING_CHAR) {
1239 if (enc == STRING_ENCODING_UTF8) {
1240 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1241 }
1242 parse_character_constant(enc);
1243 return;
1244 }
1245 }
1246
1247 pp_token.kind = symbol->ID;
1248 pp_token.base.symbol = symbol;
1249
1250 /* we can free the memory from symbol obstack if we already had an entry in
1251 * the symbol table */
1252 if (symbol->string != string) {
1253 obstack_free(&symbol_obstack, string);
1254 }
1255 }
1256
parse_number(void)1257 static void parse_number(void)
1258 {
1259 obstack_1grow(&symbol_obstack, (char) input.c);
1260 next_char();
1261
1262 while (true) {
1263 switch (input.c) {
1264 case '.':
1265 case DIGIT_CASES:
1266 case SYMBOL_CASES_WITHOUT_E_P:
1267 obstack_1grow(&symbol_obstack, (char) input.c);
1268 next_char();
1269 break;
1270
1271 case 'e':
1272 case 'p':
1273 case 'E':
1274 case 'P':
1275 obstack_1grow(&symbol_obstack, (char) input.c);
1276 next_char();
1277 if (input.c == '+' || input.c == '-') {
1278 obstack_1grow(&symbol_obstack, (char) input.c);
1279 next_char();
1280 }
1281 break;
1282
1283 default:
1284 dollar_sign:
1285 goto end_number;
1286 }
1287 }
1288
1289 end_number:
1290 pp_token.kind = T_NUMBER;
1291 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1292 }
1293
1294 #define MAYBE_PROLOG \
1295 next_char(); \
1296 switch (input.c) {
1297
1298 #define MAYBE(ch, kind) \
1299 case ch: \
1300 next_char(); \
1301 set_punctuator(kind); \
1302 return;
1303
1304 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1305 case ch: \
1306 next_char(); \
1307 set_digraph(kind, symbol); \
1308 return;
1309
1310 #define ELSE_CODE(code) \
1311 default: \
1312 code \
1313 }
1314
1315 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1316
1317 /** identifies and returns the next preprocessing token contained in the
1318 * input stream. No macro expansion is performed. */
next_input_token(void)1319 static void next_input_token(void)
1320 {
1321 if (next_info.had_whitespace) {
1322 info = next_info;
1323 next_info.had_whitespace = false;
1324 } else {
1325 info.at_line_begin = false;
1326 info.had_whitespace = false;
1327 }
1328 restart:
1329 pp_token.base.source_position = input.position;
1330 pp_token.base.symbol = NULL;
1331
1332 switch (input.c) {
1333 case ' ':
1334 case '\t':
1335 info.whitespace_at_line_begin++;
1336 info.had_whitespace = true;
1337 next_char();
1338 goto restart;
1339
1340 case NEWLINE:
1341 info.at_line_begin = true;
1342 info.had_whitespace = true;
1343 info.whitespace_at_line_begin = 0;
1344 goto restart;
1345
1346 case SYMBOL_CASES:
1347 parse_symbol();
1348 return;
1349
1350 case DIGIT_CASES:
1351 parse_number();
1352 return;
1353
1354 case '"':
1355 parse_string_literal(STRING_ENCODING_CHAR);
1356 return;
1357
1358 case '\'':
1359 parse_character_constant(STRING_ENCODING_CHAR);
1360 return;
1361
1362 case '.':
1363 MAYBE_PROLOG
1364 case '0':
1365 case '1':
1366 case '2':
1367 case '3':
1368 case '4':
1369 case '5':
1370 case '6':
1371 case '7':
1372 case '8':
1373 case '9':
1374 put_back(input.c);
1375 input.c = '.';
1376 parse_number();
1377 return;
1378
1379 case '.':
1380 MAYBE_PROLOG
1381 MAYBE('.', T_DOTDOTDOT)
1382 ELSE_CODE(
1383 put_back(input.c);
1384 input.c = '.';
1385 set_punctuator('.');
1386 return;
1387 )
1388 ELSE('.')
1389 case '&':
1390 MAYBE_PROLOG
1391 MAYBE('&', T_ANDAND)
1392 MAYBE('=', T_ANDEQUAL)
1393 ELSE('&')
1394 case '*':
1395 MAYBE_PROLOG
1396 MAYBE('=', T_ASTERISKEQUAL)
1397 ELSE('*')
1398 case '+':
1399 MAYBE_PROLOG
1400 MAYBE('+', T_PLUSPLUS)
1401 MAYBE('=', T_PLUSEQUAL)
1402 ELSE('+')
1403 case '-':
1404 MAYBE_PROLOG
1405 MAYBE('>', T_MINUSGREATER)
1406 MAYBE('-', T_MINUSMINUS)
1407 MAYBE('=', T_MINUSEQUAL)
1408 ELSE('-')
1409 case '!':
1410 MAYBE_PROLOG
1411 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1412 ELSE('!')
1413 case '/':
1414 MAYBE_PROLOG
1415 MAYBE('=', T_SLASHEQUAL)
1416 case '*':
1417 next_char();
1418 skip_multiline_comment();
1419 goto restart;
1420 case '/':
1421 next_char();
1422 skip_line_comment();
1423 goto restart;
1424 ELSE('/')
1425 case '%':
1426 MAYBE_PROLOG
1427 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1428 MAYBE('=', T_PERCENTEQUAL)
1429 case ':':
1430 MAYBE_PROLOG
1431 case '%':
1432 MAYBE_PROLOG
1433 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1434 ELSE_CODE(
1435 put_back(input.c);
1436 input.c = '%';
1437 goto digraph_percentcolon;
1438 )
1439 ELSE_CODE(
1440 digraph_percentcolon:
1441 set_digraph('#', symbol_percentcolon);
1442 return;
1443 )
1444 ELSE('%')
1445 case '<':
1446 MAYBE_PROLOG
1447 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1448 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1449 MAYBE('=', T_LESSEQUAL)
1450 case '<':
1451 MAYBE_PROLOG
1452 MAYBE('=', T_LESSLESSEQUAL)
1453 ELSE(T_LESSLESS)
1454 ELSE('<')
1455 case '>':
1456 MAYBE_PROLOG
1457 MAYBE('=', T_GREATEREQUAL)
1458 case '>':
1459 MAYBE_PROLOG
1460 MAYBE('=', T_GREATERGREATEREQUAL)
1461 ELSE(T_GREATERGREATER)
1462 ELSE('>')
1463 case '^':
1464 MAYBE_PROLOG
1465 MAYBE('=', T_CARETEQUAL)
1466 ELSE('^')
1467 case '|':
1468 MAYBE_PROLOG
1469 MAYBE('=', T_PIPEEQUAL)
1470 MAYBE('|', T_PIPEPIPE)
1471 ELSE('|')
1472 case ':':
1473 MAYBE_PROLOG
1474 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1475 case ':':
1476 if (c_mode & _CXX) {
1477 next_char();
1478 set_punctuator(T_COLONCOLON);
1479 return;
1480 }
1481 /* FALLTHROUGH */
1482 ELSE(':')
1483 case '=':
1484 MAYBE_PROLOG
1485 MAYBE('=', T_EQUALEQUAL)
1486 ELSE('=')
1487 case '#':
1488 MAYBE_PROLOG
1489 MAYBE('#', T_HASHHASH)
1490 ELSE('#')
1491
1492 case '?':
1493 case '[':
1494 case ']':
1495 case '(':
1496 case ')':
1497 case '{':
1498 case '}':
1499 case '~':
1500 case ';':
1501 case ',':
1502 set_punctuator(input.c);
1503 next_char();
1504 return;
1505
1506 case EOF:
1507 if (input_stack != NULL) {
1508 fclose(close_pp_input());
1509 pop_restore_input();
1510 if (out)
1511 fputc('\n', out);
1512 if (input.c == (utf32)EOF)
1513 --input.position.lineno;
1514 print_line_directive(&input.position, "2");
1515 goto restart;
1516 } else {
1517 info.at_line_begin = true;
1518 set_punctuator(T_EOF);
1519 }
1520 return;
1521
1522 case '\\':
1523 next_char();
1524 int next_c = input.c;
1525 put_back(input.c);
1526 input.c = '\\';
1527 if (next_c == 'U' || next_c == 'u') {
1528 parse_symbol();
1529 return;
1530 }
1531 /* FALLTHROUGH */
1532 default:
1533 dollar_sign:
1534 if (error_on_unknown_chars) {
1535 errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
1536 next_char();
1537 goto restart;
1538 } else {
1539 assert(obstack_object_size(&symbol_obstack) == 0);
1540 obstack_grow_utf8(&symbol_obstack, input.c);
1541 obstack_1grow(&symbol_obstack, '\0');
1542 char *const string = obstack_finish(&symbol_obstack);
1543 symbol_t *const symbol = symbol_table_insert(string);
1544 if (symbol->string != string)
1545 obstack_free(&symbol_obstack, string);
1546
1547 pp_token.kind = T_UNKNOWN_CHAR;
1548 pp_token.base.symbol = symbol;
1549 next_char();
1550 return;
1551 }
1552 }
1553 }
1554
print_quoted_string(const char * const string)1555 static void print_quoted_string(const char *const string)
1556 {
1557 fputc('"', out);
1558 for (const char *c = string; *c != 0; ++c) {
1559 switch (*c) {
1560 case '"': fputs("\\\"", out); break;
1561 case '\\': fputs("\\\\", out); break;
1562 case '\a': fputs("\\a", out); break;
1563 case '\b': fputs("\\b", out); break;
1564 case '\f': fputs("\\f", out); break;
1565 case '\n': fputs("\\n", out); break;
1566 case '\r': fputs("\\r", out); break;
1567 case '\t': fputs("\\t", out); break;
1568 case '\v': fputs("\\v", out); break;
1569 case '\?': fputs("\\?", out); break;
1570 default:
1571 if (!isprint(*c)) {
1572 fprintf(out, "\\%03o", (unsigned)*c);
1573 break;
1574 }
1575 fputc(*c, out);
1576 break;
1577 }
1578 }
1579 fputc('"', out);
1580 }
1581
print_line_directive(const source_position_t * pos,const char * add)1582 static void print_line_directive(const source_position_t *pos, const char *add)
1583 {
1584 if (!out)
1585 return;
1586
1587 fprintf(out, "# %u ", pos->lineno);
1588 print_quoted_string(pos->input_name);
1589 if (add != NULL) {
1590 fputc(' ', out);
1591 fputs(add, out);
1592 }
1593 if (pos->is_system_header) {
1594 fputs(" 3", out);
1595 }
1596
1597 printed_input_name = pos->input_name;
1598 input.output_line = pos->lineno-1;
1599 }
1600
emit_newlines(void)1601 static bool emit_newlines(void)
1602 {
1603 if (!out)
1604 return true;
1605
1606 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1607 if (delta == 0)
1608 return false;
1609
1610 if (delta >= 9) {
1611 fputc('\n', out);
1612 print_line_directive(&pp_token.base.source_position, NULL);
1613 fputc('\n', out);
1614 } else {
1615 for (unsigned i = 0; i < delta; ++i) {
1616 fputc('\n', out);
1617 }
1618 }
1619 input.output_line = pp_token.base.source_position.lineno;
1620
1621 unsigned whitespace = info.whitespace_at_line_begin;
1622 /* make sure there is at least 1 whitespace before a (macro-expanded)
1623 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1624 if (pp_token.kind == '#' && whitespace == 0)
1625 ++whitespace;
1626 for (unsigned i = 0; i < whitespace; ++i)
1627 fputc(' ', out);
1628
1629 return true;
1630 }
1631
set_preprocessor_output(FILE * output)1632 void set_preprocessor_output(FILE *output)
1633 {
1634 out = output;
1635 if (out != NULL) {
1636 error_on_unknown_chars = false;
1637 resolve_escape_sequences = false;
1638 } else {
1639 error_on_unknown_chars = true;
1640 resolve_escape_sequences = true;
1641 }
1642 }
1643
emit_pp_token(void)1644 void emit_pp_token(void)
1645 {
1646 if (!emit_newlines() &&
1647 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1648 fputc(' ', out);
1649
1650 switch (pp_token.kind) {
1651 case T_NUMBER:
1652 fputs(pp_token.literal.string.begin, out);
1653 break;
1654
1655 case T_STRING_LITERAL:
1656 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1657 fputc('"', out);
1658 fputs(pp_token.literal.string.begin, out);
1659 fputc('"', out);
1660 break;
1661
1662 case T_CHARACTER_CONSTANT:
1663 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1664 fputc('\'', out);
1665 fputs(pp_token.literal.string.begin, out);
1666 fputc('\'', out);
1667 break;
1668
1669 case T_MACRO_PARAMETER:
1670 panic("macro parameter not expanded");
1671
1672 default:
1673 fputs(pp_token.base.symbol->string, out);
1674 break;
1675 }
1676 last_token = pp_token.kind;
1677 }
1678
eat_pp_directive(void)1679 static void eat_pp_directive(void)
1680 {
1681 while (!info.at_line_begin) {
1682 next_input_token();
1683 }
1684 }
1685
strings_equal(const string_t * string1,const string_t * string2)1686 static bool strings_equal(const string_t *string1, const string_t *string2)
1687 {
1688 size_t size = string1->size;
1689 if (size != string2->size)
1690 return false;
1691
1692 const char *c1 = string1->begin;
1693 const char *c2 = string2->begin;
1694 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1695 if (*c1 != *c2)
1696 return false;
1697 }
1698 return true;
1699 }
1700
pp_tokens_equal(const token_t * token1,const token_t * token2)1701 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1702 {
1703 if (token1->kind != token2->kind)
1704 return false;
1705
1706 switch (token1->kind) {
1707 case T_NUMBER:
1708 case T_CHARACTER_CONSTANT:
1709 case T_STRING_LITERAL:
1710 return strings_equal(&token1->literal.string, &token2->literal.string);
1711
1712 case T_MACRO_PARAMETER:
1713 return token1->macro_parameter.def->symbol
1714 == token2->macro_parameter.def->symbol;
1715
1716 default:
1717 return token1->base.symbol == token2->base.symbol;
1718 }
1719 }
1720
pp_definitions_equal(const pp_definition_t * definition1,const pp_definition_t * definition2)1721 static bool pp_definitions_equal(const pp_definition_t *definition1,
1722 const pp_definition_t *definition2)
1723 {
1724 if (definition1->list_len != definition2->list_len)
1725 return false;
1726
1727 size_t len = definition1->list_len;
1728 const saved_token_t *t1 = definition1->token_list;
1729 const saved_token_t *t2 = definition2->token_list;
1730 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1731 if (!pp_tokens_equal(&t1->token, &t2->token))
1732 return false;
1733 if (t1->had_whitespace != t2->had_whitespace)
1734 return false;
1735 }
1736 return true;
1737 }
1738
missing_macro_param_error(void)1739 static void missing_macro_param_error(void)
1740 {
1741 errorf(&pp_token.base.source_position,
1742 "'#' is not followed by a macro parameter");
1743 }
1744
is_defineable_token(char const * const context)1745 static bool is_defineable_token(char const *const context)
1746 {
1747 if (info.at_line_begin) {
1748 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1749 }
1750
1751 symbol_t *const symbol = pp_token.base.symbol;
1752 if (!symbol)
1753 goto no_ident;
1754
1755 if (pp_token.kind != T_IDENTIFIER) {
1756 switch (symbol->string[0]) {
1757 case SYMBOL_CASES:
1758 dollar_sign:
1759 break;
1760
1761 default:
1762 no_ident:
1763 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1764 return false;
1765 }
1766 }
1767
1768 /* TODO turn this into a flag in pp_def. */
1769 switch (symbol->pp_ID) {
1770 /* §6.10.8:4 */
1771 case TP_defined:
1772 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1773 return false;
1774
1775 default:
1776 return true;
1777 }
1778 }
1779
parse_define_directive(void)1780 static void parse_define_directive(void)
1781 {
1782 eat_pp(TP_define);
1783 if (skip_mode) {
1784 eat_pp_directive();
1785 return;
1786 }
1787
1788 assert(obstack_object_size(&pp_obstack) == 0);
1789
1790 if (!is_defineable_token("#define"))
1791 goto error_out;
1792 symbol_t *const symbol = pp_token.base.symbol;
1793
1794 pp_definition_t *new_definition
1795 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1796 memset(new_definition, 0, sizeof(new_definition[0]));
1797 new_definition->symbol = symbol;
1798 new_definition->source_position = input.position;
1799
1800 /* this is probably the only place where spaces are significant in the
1801 * lexer (except for the fact that they separate tokens). #define b(x)
1802 * is something else than #define b (x) */
1803 if (input.c == '(') {
1804 next_input_token();
1805 eat_token('(');
1806
1807 while (true) {
1808 switch (pp_token.kind) {
1809 case T_DOTDOTDOT:
1810 new_definition->is_variadic = true;
1811 eat_token(T_DOTDOTDOT);
1812 if (pp_token.kind != ')') {
1813 errorf(&input.position,
1814 "'...' not at end of macro argument list");
1815 goto error_out;
1816 }
1817 break;
1818
1819 case T_IDENTIFIER: {
1820 pp_definition_t parameter;
1821 memset(¶meter, 0, sizeof(parameter));
1822 parameter.source_position = pp_token.base.source_position;
1823 parameter.symbol = pp_token.base.symbol;
1824 parameter.is_parameter = true;
1825 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1826 eat_token(T_IDENTIFIER);
1827
1828 if (pp_token.kind == ',') {
1829 eat_token(',');
1830 break;
1831 }
1832
1833 if (pp_token.kind != ')') {
1834 errorf(&pp_token.base.source_position,
1835 "expected ',' or ')' after identifier, got %K",
1836 &pp_token);
1837 goto error_out;
1838 }
1839 break;
1840 }
1841
1842 case ')':
1843 eat_token(')');
1844 goto finish_argument_list;
1845
1846 default:
1847 errorf(&pp_token.base.source_position,
1848 "expected identifier, '...' or ')' in #define argument list, got %K",
1849 &pp_token);
1850 goto error_out;
1851 }
1852 }
1853
1854 finish_argument_list:
1855 new_definition->has_parameters = true;
1856 size_t size = obstack_object_size(&pp_obstack);
1857 new_definition->n_parameters
1858 = size / sizeof(new_definition->parameters[0]);
1859 new_definition->parameters = obstack_finish(&pp_obstack);
1860 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1861 pp_definition_t *param = &new_definition->parameters[i];
1862 symbol_t *symbol = param->symbol;
1863 pp_definition_t *previous = symbol->pp_definition;
1864 if (previous != NULL
1865 && previous->function_definition == new_definition) {
1866 errorf(¶m->source_position,
1867 "duplicate macro parameter '%Y'", symbol);
1868 param->symbol = sym_anonymous;
1869 continue;
1870 }
1871 param->parent_expansion = previous;
1872 param->function_definition = new_definition;
1873 symbol->pp_definition = param;
1874 }
1875 } else {
1876 next_input_token();
1877 }
1878
1879 /* construct token list */
1880 assert(obstack_object_size(&pp_obstack) == 0);
1881 bool next_must_be_param = false;
1882 while (!info.at_line_begin) {
1883 if (pp_token.kind == T_IDENTIFIER) {
1884 const symbol_t *symbol = pp_token.base.symbol;
1885 pp_definition_t *definition = symbol->pp_definition;
1886 if (definition != NULL
1887 && definition->function_definition == new_definition) {
1888 pp_token.kind = T_MACRO_PARAMETER;
1889 pp_token.macro_parameter.def = definition;
1890 }
1891 }
1892 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1893 missing_macro_param_error();
1894 }
1895 saved_token_t saved_token;
1896 saved_token.token = pp_token;
1897 saved_token.had_whitespace = info.had_whitespace;
1898 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1899 next_must_be_param
1900 = new_definition->has_parameters && pp_token.kind == '#';
1901 next_input_token();
1902 }
1903 if (next_must_be_param)
1904 missing_macro_param_error();
1905
1906 new_definition->list_len = obstack_object_size(&pp_obstack)
1907 / sizeof(new_definition->token_list[0]);
1908 new_definition->token_list = obstack_finish(&pp_obstack);
1909
1910 if (new_definition->has_parameters) {
1911 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1912 pp_definition_t *param = &new_definition->parameters[i];
1913 symbol_t *symbol = param->symbol;
1914 if (symbol == sym_anonymous)
1915 continue;
1916 assert(symbol->pp_definition == param);
1917 assert(param->function_definition == new_definition);
1918 symbol->pp_definition = param->parent_expansion;
1919 param->parent_expansion = NULL;
1920 }
1921 }
1922
1923 pp_definition_t *old_definition = symbol->pp_definition;
1924 if (old_definition != NULL) {
1925 if (!pp_definitions_equal(old_definition, new_definition)) {
1926 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1927 } else {
1928 /* reuse the old definition */
1929 obstack_free(&pp_obstack, new_definition);
1930 new_definition = old_definition;
1931 }
1932 }
1933
1934 symbol->pp_definition = new_definition;
1935 return;
1936
1937 error_out:
1938 if (obstack_object_size(&pp_obstack) > 0) {
1939 char *ptr = obstack_finish(&pp_obstack);
1940 obstack_free(&pp_obstack, ptr);
1941 }
1942 eat_pp_directive();
1943 }
1944
parse_undef_directive(void)1945 static void parse_undef_directive(void)
1946 {
1947 eat_pp(TP_undef);
1948 if (skip_mode) {
1949 eat_pp_directive();
1950 return;
1951 }
1952
1953 if (!is_defineable_token("#undef")) {
1954 eat_pp_directive();
1955 return;
1956 }
1957
1958 pp_token.base.symbol->pp_definition = NULL;
1959 next_input_token();
1960
1961 if (!info.at_line_begin) {
1962 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1963 }
1964 eat_pp_directive();
1965 }
1966
1967 /** behind an #include we can have the special headername lexems.
1968 * They're only allowed behind an #include so they're not recognized
1969 * by the normal next_preprocessing_token. We handle them as a special
1970 * exception here */
parse_headername(bool * system_include)1971 static const char *parse_headername(bool *system_include)
1972 {
1973 if (info.at_line_begin) {
1974 parse_error("expected headername after #include");
1975 return NULL;
1976 }
1977
1978 /* check wether we have a "... or <... headername */
1979 source_position_t position = input.position;
1980 switch (input.c) {
1981 {
1982 utf32 delimiter;
1983 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1984 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1985 parse_name:
1986 assert(obstack_object_size(&symbol_obstack) == 0);
1987 next_char();
1988 while (true) {
1989 switch (input.c) {
1990 case NEWLINE:
1991 case EOF:
1992 {
1993 char *dummy = obstack_finish(&symbol_obstack);
1994 obstack_free(&symbol_obstack, dummy);
1995 }
1996 errorf(&pp_token.base.source_position,
1997 "header name without closing '%c'", (char)delimiter);
1998 return NULL;
1999
2000 default:
2001 if (input.c == delimiter) {
2002 next_char();
2003 goto finish_headername;
2004 } else {
2005 obstack_1grow(&symbol_obstack, (char)input.c);
2006 next_char();
2007 }
2008 break;
2009 }
2010 }
2011 /* we should never be here */
2012 }
2013
2014 default:
2015 next_preprocessing_token();
2016 if (info.at_line_begin) {
2017 /* TODO: if we are already in the new line then we parsed more than
2018 * wanted. We reuse the token, but could produce following errors
2019 * misbehaviours... */
2020 goto error_invalid_input;
2021 }
2022 if (pp_token.kind == T_STRING_LITERAL) {
2023 *system_include = false;
2024 return pp_token.literal.string.begin;
2025 } else if (pp_token.kind == '<') {
2026 *system_include = true;
2027 assert(obstack_object_size(&pp_obstack) == 0);
2028 while (true) {
2029 next_preprocessing_token();
2030 if (info.at_line_begin) {
2031 /* TODO: we shouldn't have parsed/expanded something on the
2032 * next line yet... */
2033 char *dummy = obstack_finish(&pp_obstack);
2034 obstack_free(&pp_obstack, dummy);
2035 goto error_invalid_input;
2036 }
2037 if (pp_token.kind == '>')
2038 break;
2039
2040 saved_token_t saved;
2041 saved.token = pp_token;
2042 saved.had_whitespace = info.had_whitespace;
2043 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2044 }
2045 size_t size = obstack_object_size(&pp_obstack);
2046 assert(size % sizeof(saved_token_t) == 0);
2047 size_t n_tokens = size / sizeof(saved_token_t);
2048 saved_token_t *tokens = obstack_finish(&pp_obstack);
2049 assert(obstack_object_size(&symbol_obstack) == 0);
2050 for (size_t i = 0; i < n_tokens; ++i) {
2051 const saved_token_t *saved = &tokens[i];
2052 if (i > 0 && saved->had_whitespace)
2053 obstack_1grow(&symbol_obstack, ' ');
2054 grow_token(&symbol_obstack, &saved->token);
2055 }
2056 obstack_free(&pp_obstack, tokens);
2057 goto finish_headername;
2058 } else {
2059 error_invalid_input:
2060 {
2061 char *dummy = obstack_finish(&symbol_obstack);
2062 obstack_free(&symbol_obstack, dummy);
2063 }
2064
2065 errorf(&pp_token.base.source_position,
2066 "expected \"FILENAME\" or <FILENAME> after #include");
2067 return NULL;
2068 }
2069 }
2070
2071 finish_headername:
2072 obstack_1grow(&symbol_obstack, '\0');
2073 char *const headername = obstack_finish(&symbol_obstack);
2074 const char *identified = identify_string(headername);
2075 pp_token.base.source_position = position;
2076 return identified;
2077 }
2078
do_include(bool const bracket_include,bool const include_next,char const * const headername)2079 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2080 {
2081 size_t const headername_len = strlen(headername);
2082 searchpath_entry_t *entry;
2083 if (include_next) {
2084 entry = input.path ? input.path->next
2085 : bracket_include ? bracket_searchpath.first
2086 : quote_searchpath.first;
2087 } else {
2088 if (!bracket_include) {
2089 /* put dirname of current input on obstack */
2090 const char *filename = input.position.input_name;
2091 const char *last_slash = strrchr(filename, '/');
2092 const char *full_name;
2093 if (last_slash != NULL) {
2094 size_t len = last_slash - filename;
2095 obstack_grow(&symbol_obstack, filename, len + 1);
2096 obstack_grow0(&symbol_obstack, headername, headername_len);
2097 char *complete_path = obstack_finish(&symbol_obstack);
2098 full_name = identify_string(complete_path);
2099 } else {
2100 full_name = headername;
2101 }
2102
2103 FILE *file = fopen(full_name, "r");
2104 if (file != NULL) {
2105 switch_pp_input(file, full_name, NULL, false);
2106 return true;
2107 }
2108 entry = quote_searchpath.first;
2109 } else {
2110 entry = bracket_searchpath.first;
2111 }
2112 }
2113
2114 assert(obstack_object_size(&symbol_obstack) == 0);
2115 /* check searchpath */
2116 for (; entry; entry = entry->next) {
2117 const char *path = entry->path;
2118 size_t len = strlen(path);
2119 obstack_grow(&symbol_obstack, path, len);
2120 if (path[len-1] != '/')
2121 obstack_1grow(&symbol_obstack, '/');
2122 obstack_grow(&symbol_obstack, headername, headername_len+1);
2123
2124 char *complete_path = obstack_finish(&symbol_obstack);
2125 FILE *file = fopen(complete_path, "r");
2126 if (file != NULL) {
2127 const char *filename = identify_string(complete_path);
2128 switch_pp_input(file, filename, entry, entry->is_system_path);
2129 return true;
2130 } else {
2131 obstack_free(&symbol_obstack, complete_path);
2132 }
2133 }
2134
2135 return false;
2136 }
2137
parse_include_directive(bool const include_next)2138 static void parse_include_directive(bool const include_next)
2139 {
2140 if (skip_mode) {
2141 eat_pp_directive();
2142 return;
2143 }
2144
2145 /* do not eat the TP_include, since it would already parse the next token
2146 * which needs special handling here. */
2147 skip_till_newline(true);
2148 bool system_include;
2149 const char *headername = parse_headername(&system_include);
2150 if (headername == NULL) {
2151 eat_pp_directive();
2152 return;
2153 }
2154
2155 bool had_nonwhitespace = skip_till_newline(false);
2156 if (had_nonwhitespace) {
2157 warningf(WARN_OTHER, &input.position,
2158 "extra tokens at end of #include directive");
2159 }
2160
2161 if (n_inputs > INCLUDE_LIMIT) {
2162 errorf(&pp_token.base.source_position, "#include nested too deeply");
2163 /* eat \n or EOF */
2164 next_input_token();
2165 return;
2166 }
2167
2168 /* switch inputs */
2169 info.whitespace_at_line_begin = 0;
2170 info.had_whitespace = false;
2171 info.at_line_begin = true;
2172 emit_newlines();
2173 push_input();
2174 bool res = do_include(system_include, include_next, headername);
2175 if (res) {
2176 next_input_token();
2177 } else {
2178 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2179 pop_restore_input();
2180 }
2181 }
2182
push_conditional(void)2183 static pp_conditional_t *push_conditional(void)
2184 {
2185 pp_conditional_t *conditional
2186 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2187 memset(conditional, 0, sizeof(*conditional));
2188
2189 conditional->parent = conditional_stack;
2190 conditional_stack = conditional;
2191
2192 return conditional;
2193 }
2194
pop_conditional(void)2195 static void pop_conditional(void)
2196 {
2197 assert(conditional_stack != NULL);
2198 conditional_stack = conditional_stack->parent;
2199 }
2200
check_unclosed_conditionals(void)2201 void check_unclosed_conditionals(void)
2202 {
2203 while (conditional_stack != NULL) {
2204 pp_conditional_t *conditional = conditional_stack;
2205
2206 if (conditional->in_else) {
2207 errorf(&conditional->source_position, "unterminated #else");
2208 } else {
2209 errorf(&conditional->source_position, "unterminated condition");
2210 }
2211 pop_conditional();
2212 }
2213 }
2214
parse_ifdef_ifndef_directive(bool const is_ifdef)2215 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2216 {
2217 bool condition;
2218 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2219
2220 if (skip_mode) {
2221 eat_pp_directive();
2222 pp_conditional_t *conditional = push_conditional();
2223 conditional->source_position = pp_token.base.source_position;
2224 conditional->skip = true;
2225 return;
2226 }
2227
2228 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2229 errorf(&pp_token.base.source_position,
2230 "expected identifier after #%s, got %K",
2231 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2232 eat_pp_directive();
2233
2234 /* just take the true case in the hope to avoid further errors */
2235 condition = true;
2236 } else {
2237 /* evaluate wether we are in true or false case */
2238 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2239 eat_token(T_IDENTIFIER);
2240
2241 if (!info.at_line_begin) {
2242 errorf(&pp_token.base.source_position,
2243 "extra tokens at end of #%s",
2244 is_ifdef ? "ifdef" : "ifndef");
2245 eat_pp_directive();
2246 }
2247 }
2248
2249 pp_conditional_t *conditional = push_conditional();
2250 conditional->source_position = pp_token.base.source_position;
2251 conditional->condition = condition;
2252
2253 if (!condition) {
2254 skip_mode = true;
2255 }
2256 }
2257
parse_else_directive(void)2258 static void parse_else_directive(void)
2259 {
2260 eat_pp(TP_else);
2261
2262 if (!info.at_line_begin) {
2263 if (!skip_mode) {
2264 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2265 }
2266 eat_pp_directive();
2267 }
2268
2269 pp_conditional_t *conditional = conditional_stack;
2270 if (conditional == NULL) {
2271 errorf(&pp_token.base.source_position, "#else without prior #if");
2272 return;
2273 }
2274
2275 if (conditional->in_else) {
2276 errorf(&pp_token.base.source_position,
2277 "#else after #else (condition started %P)",
2278 &conditional->source_position);
2279 skip_mode = true;
2280 return;
2281 }
2282
2283 conditional->in_else = true;
2284 if (!conditional->skip) {
2285 skip_mode = conditional->condition;
2286 }
2287 conditional->source_position = pp_token.base.source_position;
2288 }
2289
parse_endif_directive(void)2290 static void parse_endif_directive(void)
2291 {
2292 eat_pp(TP_endif);
2293
2294 if (!info.at_line_begin) {
2295 if (!skip_mode) {
2296 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2297 }
2298 eat_pp_directive();
2299 }
2300
2301 pp_conditional_t *conditional = conditional_stack;
2302 if (conditional == NULL) {
2303 errorf(&pp_token.base.source_position, "#endif without prior #if");
2304 return;
2305 }
2306
2307 if (!conditional->skip) {
2308 skip_mode = false;
2309 }
2310 pop_conditional();
2311 }
2312
2313 typedef enum stdc_pragma_kind_t {
2314 STDC_UNKNOWN,
2315 STDC_FP_CONTRACT,
2316 STDC_FENV_ACCESS,
2317 STDC_CX_LIMITED_RANGE
2318 } stdc_pragma_kind_t;
2319
2320 typedef enum stdc_pragma_value_kind_t {
2321 STDC_VALUE_UNKNOWN,
2322 STDC_VALUE_ON,
2323 STDC_VALUE_OFF,
2324 STDC_VALUE_DEFAULT
2325 } stdc_pragma_value_kind_t;
2326
parse_pragma_directive(void)2327 static void parse_pragma_directive(void)
2328 {
2329 eat_pp(TP_pragma);
2330 if (skip_mode) {
2331 eat_pp_directive();
2332 return;
2333 }
2334
2335 if (pp_token.kind != T_IDENTIFIER) {
2336 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2337 "expected identifier after #pragma");
2338 eat_pp_directive();
2339 return;
2340 }
2341
2342 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2343 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2344 /* a STDC pragma */
2345 next_input_token();
2346
2347 switch (pp_token.base.symbol->pp_ID) {
2348 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2349 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2350 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2351 default: break;
2352 }
2353 if (kind != STDC_UNKNOWN) {
2354 next_input_token();
2355 stdc_pragma_value_kind_t value;
2356 switch (pp_token.base.symbol->pp_ID) {
2357 case TP_ON: value = STDC_VALUE_ON; break;
2358 case TP_OFF: value = STDC_VALUE_OFF; break;
2359 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2360 default: value = STDC_VALUE_UNKNOWN; break;
2361 }
2362 if (value == STDC_VALUE_UNKNOWN) {
2363 kind = STDC_UNKNOWN;
2364 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2365 }
2366 }
2367 }
2368 eat_pp_directive();
2369 if (kind == STDC_UNKNOWN) {
2370 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2371 "encountered unknown #pragma");
2372 }
2373 }
2374
parse_line_directive(void)2375 static void parse_line_directive(void)
2376 {
2377 if (pp_token.kind != T_NUMBER) {
2378 if (!skip_mode)
2379 parse_error("expected integer");
2380 } else {
2381 char *end;
2382 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2383 if (*end == '\0') {
2384 /* use offset -1 as this is about the next line */
2385 input.position.lineno = line - 1;
2386 /* force output of line */
2387 input.output_line = input.position.lineno - 20;
2388 } else {
2389 if (!skip_mode) {
2390 errorf(&input.position, "'%S' is not a valid line number",
2391 &pp_token.literal.string);
2392 }
2393 }
2394 next_input_token();
2395 if (info.at_line_begin)
2396 return;
2397 }
2398 if (pp_token.kind == T_STRING_LITERAL
2399 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2400 input.position.input_name = pp_token.literal.string.begin;
2401 input.position.is_system_header = false;
2402 next_input_token();
2403
2404 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2405 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2406 /* flags:
2407 * 1 - indicates start of a new file
2408 * 2 - indicates return from a file
2409 * 3 - indicates system header
2410 * 4 - indicates implicit extern "C" in C++ mode
2411 *
2412 * currently we're only interested in "3"
2413 */
2414 if (streq(pp_token.literal.string.begin, "3")) {
2415 input.position.is_system_header = true;
2416 }
2417 next_input_token();
2418 }
2419 }
2420
2421 eat_pp_directive();
2422 }
2423
parse_error_directive(void)2424 static void parse_error_directive(void)
2425 {
2426 if (skip_mode) {
2427 eat_pp_directive();
2428 return;
2429 }
2430
2431 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2432 resolve_escape_sequences = false;
2433
2434 source_position_t const pos = pp_token.base.source_position;
2435 do {
2436 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2437 obstack_1grow(&pp_obstack, ' ');
2438
2439 switch (pp_token.kind) {
2440 case T_NUMBER: {
2441 string_t const *const str = &pp_token.literal.string;
2442 obstack_grow(&pp_obstack, str->begin, str->size);
2443 break;
2444 }
2445
2446 {
2447 char delim;
2448 case T_STRING_LITERAL: delim = '"'; goto string;
2449 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2450 string:;
2451 string_t const *const str = &pp_token.literal.string;
2452 char const *const enc = get_string_encoding_prefix(str->encoding);
2453 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2454 break;
2455 }
2456
2457 default: {
2458 char const *const str = pp_token.base.symbol->string;
2459 obstack_grow(&pp_obstack, str, strlen(str));
2460 break;
2461 }
2462 }
2463
2464 next_input_token();
2465 } while (!info.at_line_begin);
2466
2467 resolve_escape_sequences = old_resolve_escape_sequences;
2468
2469 obstack_1grow(&pp_obstack, '\0');
2470 char *const str = obstack_finish(&pp_obstack);
2471 errorf(&pos, "#%s", str);
2472 obstack_free(&pp_obstack, str);
2473 }
2474
parse_preprocessing_directive(void)2475 static void parse_preprocessing_directive(void)
2476 {
2477 eat_token('#');
2478
2479 if (info.at_line_begin) {
2480 /* empty directive */
2481 return;
2482 }
2483
2484 if (pp_token.base.symbol) {
2485 switch (pp_token.base.symbol->pp_ID) {
2486 case TP_define: parse_define_directive(); break;
2487 case TP_else: parse_else_directive(); break;
2488 case TP_endif: parse_endif_directive(); break;
2489 case TP_error: parse_error_directive(); break;
2490 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2491 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2492 case TP_include: parse_include_directive(false); break;
2493 case TP_include_next: parse_include_directive(true); break;
2494 case TP_line: next_input_token(); goto line_directive;
2495 case TP_pragma: parse_pragma_directive(); break;
2496 case TP_undef: parse_undef_directive(); break;
2497 default: goto skip;
2498 }
2499 } else if (pp_token.kind == T_NUMBER) {
2500 line_directive:
2501 parse_line_directive();
2502 } else {
2503 skip:
2504 if (!skip_mode) {
2505 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2506 }
2507 eat_pp_directive();
2508 }
2509
2510 assert(info.at_line_begin);
2511 }
2512
finish_current_argument(void)2513 static void finish_current_argument(void)
2514 {
2515 if (current_argument == NULL)
2516 return;
2517 size_t size = obstack_object_size(&pp_obstack);
2518 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2519 current_argument->token_list = obstack_finish(&pp_obstack);
2520 }
2521
next_preprocessing_token(void)2522 void next_preprocessing_token(void)
2523 {
2524 restart:
2525 if (!expand_next()) {
2526 do {
2527 next_input_token();
2528 while (pp_token.kind == '#' && info.at_line_begin) {
2529 parse_preprocessing_directive();
2530 }
2531 } while (skip_mode && pp_token.kind != T_EOF);
2532 }
2533
2534 const token_kind_t kind = pp_token.kind;
2535 if (current_call == NULL || argument_expanding != NULL) {
2536 symbol_t *const symbol = pp_token.base.symbol;
2537 if (symbol) {
2538 if (kind == T_MACRO_PARAMETER) {
2539 assert(current_expansion != NULL);
2540 start_expanding(pp_token.macro_parameter.def);
2541 goto restart;
2542 }
2543
2544 pp_definition_t *const pp_definition = symbol->pp_definition;
2545 if (pp_definition != NULL && !pp_definition->is_expanding) {
2546 if (pp_definition->has_parameters) {
2547
2548 /* check if next token is a '(' */
2549 whitespace_info_t old_info = info;
2550 token_kind_t next_token = peek_expansion();
2551 if (next_token == T_EOF) {
2552 info.at_line_begin = false;
2553 info.had_whitespace = false;
2554 skip_whitespace();
2555 if (input.c == '(') {
2556 next_token = '(';
2557 }
2558 }
2559
2560 if (next_token == '(') {
2561 if (current_expansion == NULL)
2562 expansion_pos = pp_token.base.source_position;
2563 next_preprocessing_token();
2564 assert(pp_token.kind == '(');
2565
2566 pp_definition->parent_expansion = current_expansion;
2567 current_call = pp_definition;
2568 current_call->expand_pos = 0;
2569 current_call->expand_info = old_info;
2570 if (current_call->n_parameters > 0) {
2571 current_argument = ¤t_call->parameters[0];
2572 assert(argument_brace_count == 0);
2573 }
2574 goto restart;
2575 } else {
2576 /* skip_whitespaces() skipped newlines and whitespace,
2577 * remember results for next token */
2578 next_info = info;
2579 info = old_info;
2580 return;
2581 }
2582 } else {
2583 if (current_expansion == NULL)
2584 expansion_pos = pp_token.base.source_position;
2585 start_expanding(pp_definition);
2586 goto restart;
2587 }
2588 }
2589 }
2590 }
2591
2592 if (current_call != NULL) {
2593 /* current_call != NULL */
2594 if (kind == '(') {
2595 ++argument_brace_count;
2596 } else if (kind == ')') {
2597 if (argument_brace_count > 0) {
2598 --argument_brace_count;
2599 } else {
2600 finish_current_argument();
2601 assert(kind == ')');
2602 start_expanding(current_call);
2603 info = current_call->expand_info;
2604 current_call = NULL;
2605 current_argument = NULL;
2606 goto restart;
2607 }
2608 } else if (kind == ',' && argument_brace_count == 0) {
2609 finish_current_argument();
2610 current_call->expand_pos++;
2611 if (current_call->expand_pos >= current_call->n_parameters) {
2612 errorf(&pp_token.base.source_position,
2613 "too many arguments passed for macro '%Y'",
2614 current_call->symbol);
2615 current_argument = NULL;
2616 } else {
2617 current_argument
2618 = ¤t_call->parameters[current_call->expand_pos];
2619 }
2620 goto restart;
2621 } else if (kind == T_MACRO_PARAMETER) {
2622 /* parameters have to be fully expanded before being used as
2623 * parameters for another macro-call */
2624 assert(current_expansion != NULL);
2625 pp_definition_t *argument = pp_token.macro_parameter.def;
2626 argument_expanding = argument;
2627 start_expanding(argument);
2628 goto restart;
2629 } else if (kind == T_EOF) {
2630 errorf(&expansion_pos,
2631 "reached end of file while parsing arguments for '%Y'",
2632 current_call->symbol);
2633 return;
2634 }
2635 if (current_argument != NULL) {
2636 saved_token_t saved;
2637 saved.token = pp_token;
2638 saved.had_whitespace = info.had_whitespace;
2639 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2640 }
2641 goto restart;
2642 }
2643 }
2644
append_include_path(searchpath_t * paths,const char * path)2645 void append_include_path(searchpath_t *paths, const char *path)
2646 {
2647 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2648 entry->path = path;
2649 entry->is_system_path = paths->is_system_path;
2650
2651 *paths->anchor = entry;
2652 paths->anchor = &entry->next;
2653 }
2654
append_env_paths(searchpath_t * paths,const char * envvar)2655 static void append_env_paths(searchpath_t *paths, const char *envvar)
2656 {
2657 const char *val = getenv(envvar);
2658 if (val != NULL && *val != '\0') {
2659 const char *begin = val;
2660 const char *c;
2661 do {
2662 c = begin;
2663 while (*c != '\0' && *c != ':')
2664 ++c;
2665
2666 size_t len = c-begin;
2667 if (len == 0) {
2668 /* use "." for gcc compatibility (Matze: I would expect that
2669 * nothing happens for an empty entry...) */
2670 append_include_path(paths, ".");
2671 } else {
2672 char *const string = obstack_copy0(&config_obstack, begin, len);
2673 append_include_path(paths, string);
2674 }
2675
2676 begin = c+1;
2677 /* skip : */
2678 if (*begin == ':')
2679 ++begin;
2680 } while(*c != '\0');
2681 }
2682 }
2683
append_searchpath(searchpath_t * path,const searchpath_t * append)2684 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2685 {
2686 *path->anchor = append->first;
2687 }
2688
setup_include_path(void)2689 static void setup_include_path(void)
2690 {
2691 /* built-in paths */
2692 append_include_path(&system_searchpath, "/usr/include");
2693
2694 /* parse environment variable */
2695 append_env_paths(&bracket_searchpath, "CPATH");
2696 append_env_paths(&system_searchpath,
2697 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2698
2699 /* append system search path to bracket searchpath */
2700 append_searchpath(&system_searchpath, &after_searchpath);
2701 append_searchpath(&bracket_searchpath, &system_searchpath);
2702 append_searchpath("e_searchpath, &bracket_searchpath);
2703 }
2704
input_error(unsigned const delta_lines,unsigned const delta_cols,char const * const message)2705 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2706 {
2707 source_position_t pos = pp_token.base.source_position;
2708 pos.lineno += delta_lines;
2709 pos.colno += delta_cols;
2710 errorf(&pos, "%s", message);
2711 }
2712
init_include_paths(void)2713 void init_include_paths(void)
2714 {
2715 obstack_init(&config_obstack);
2716 }
2717
init_preprocessor(void)2718 void init_preprocessor(void)
2719 {
2720 init_symbols();
2721
2722 obstack_init(&pp_obstack);
2723 obstack_init(&input_obstack);
2724 strset_init(&stringset);
2725
2726 setup_include_path();
2727
2728 set_input_error_callback(input_error);
2729 }
2730
exit_preprocessor(void)2731 void exit_preprocessor(void)
2732 {
2733 obstack_free(&input_obstack, NULL);
2734 obstack_free(&pp_obstack, NULL);
2735 obstack_free(&config_obstack, NULL);
2736
2737 strset_destroy(&stringset);
2738 }
2739
2740 int pptest_main(int argc, char **argv);
pptest_main(int argc,char ** argv)2741 int pptest_main(int argc, char **argv)
2742 {
2743 init_symbol_table();
2744 init_include_paths();
2745 init_preprocessor();
2746 init_tokens();
2747
2748 error_on_unknown_chars = false;
2749 resolve_escape_sequences = false;
2750
2751 /* simplistic commandline parser */
2752 const char *filename = NULL;
2753 const char *output = NULL;
2754 for (int i = 1; i < argc; ++i) {
2755 const char *opt = argv[i];
2756 if (streq(opt, "-I")) {
2757 append_include_path(&bracket_searchpath, argv[++i]);
2758 continue;
2759 } else if (streq(opt, "-E")) {
2760 /* ignore */
2761 } else if (streq(opt, "-o")) {
2762 output = argv[++i];
2763 continue;
2764 } else if (opt[0] == '-') {
2765 fprintf(stderr, "Unknown option '%s'\n", opt);
2766 } else {
2767 if (filename != NULL)
2768 fprintf(stderr, "Multiple inputs not supported\n");
2769 filename = argv[i];
2770 }
2771 }
2772 if (filename == NULL) {
2773 fprintf(stderr, "No input specified\n");
2774 return 1;
2775 }
2776
2777 if (output == NULL) {
2778 out = stdout;
2779 } else {
2780 out = fopen(output, "w");
2781 if (out == NULL) {
2782 fprintf(stderr, "Couldn't open output '%s'\n", output);
2783 return 1;
2784 }
2785 }
2786
2787 /* just here for gcc compatibility */
2788 fprintf(out, "# 1 \"%s\"\n", filename);
2789 fprintf(out, "# 1 \"<built-in>\"\n");
2790 fprintf(out, "# 1 \"<command-line>\"\n");
2791
2792 FILE *file = fopen(filename, "r");
2793 if (file == NULL) {
2794 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2795 return 1;
2796 }
2797 switch_pp_input(file, filename, NULL, false);
2798
2799 for (;;) {
2800 next_preprocessing_token();
2801 if (pp_token.kind == T_EOF)
2802 break;
2803 emit_pp_token();
2804 }
2805
2806 fputc('\n', out);
2807 check_unclosed_conditionals();
2808 fclose(close_pp_input());
2809 if (out != stdout)
2810 fclose(out);
2811
2812 exit_tokens();
2813 exit_preprocessor();
2814 exit_symbol_table();
2815
2816 return 0;
2817 }
2818