1 #include <config.h>
2 
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8 
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20 
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
23 
24 typedef struct saved_token_t {
25 	token_t token;
26 	bool    had_whitespace;
27 } saved_token_t;
28 
29 typedef struct whitespace_info_t {
30 	/** current token had whitespace in front of it */
31 	bool     had_whitespace;
32 	/** current token is at the beginning of a line.
33 	 * => a "#" at line begin starts a preprocessing directive. */
34 	bool     at_line_begin;
35 	/** number of spaces before the first token in a line */
36 	unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38 
39 struct pp_definition_t {
40 	symbol_t          *symbol;
41 	source_position_t  source_position;
42 	pp_definition_t   *parent_expansion;
43 	size_t             expand_pos;
44 	whitespace_info_t  expand_info;
45 	bool               is_variadic    : 1;
46 	bool               is_expanding   : 1;
47 	bool               has_parameters : 1;
48 	bool               is_parameter   : 1;
49 	pp_definition_t   *function_definition;
50 	size_t             n_parameters;
51 	pp_definition_t   *parameters;
52 
53 	/* replacement */
54 	size_t             list_len;
55 	saved_token_t     *token_list;
56 };
57 
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 	source_position_t  source_position;
61 	bool               condition;
62 	bool               in_else;
63 	/** conditional in skip mode (then+else gets skipped) */
64 	bool               skip;
65 	pp_conditional_t  *parent;
66 };
67 
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70 	FILE               *file;
71 	input_t            *input;
72 	utf32               c;
73 	utf32               buf[1024+MAX_PUTBACK];
74 	const utf32        *bufend;
75 	const utf32        *bufpos;
76 	source_position_t   position;
77 	pp_input_t         *parent;
78 	unsigned            output_line;
79 	searchpath_entry_t *path;
80 };
81 
82 struct searchpath_entry_t {
83 	const char         *path;
84 	searchpath_entry_t *next;
85 	bool                is_system_path;
86 };
87 
88 static pp_input_t      input;
89 
90 static pp_input_t     *input_stack;
91 static unsigned        n_inputs;
92 static struct obstack  input_obstack;
93 
94 static pp_conditional_t *conditional_stack;
95 
96 token_t                  pp_token;
97 bool                     allow_dollar_in_symbol   = true;
98 static bool              resolve_escape_sequences = true;
99 static bool              error_on_unknown_chars   = true;
100 static bool              skip_mode;
101 static FILE             *out;
102 static struct obstack    pp_obstack;
103 static struct obstack    config_obstack;
104 static const char       *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t  *current_expansion  = NULL;
107 static pp_definition_t  *current_call       = NULL;
108 static pp_definition_t  *current_argument   = NULL;
109 static pp_definition_t  *argument_expanding = NULL;
110 static unsigned          argument_brace_count;
111 static strset_t          stringset;
112 static token_kind_t      last_token;
113 
114 struct searchpath_t {
115 	searchpath_entry_t  *first;
116 	searchpath_entry_t **anchor;
117 	bool                 is_system_path;
118 };
119 
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
124 
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
127 
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
131 
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
138 
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
143 
init_symbols(void)144 static void init_symbols(void)
145 {
146 	symbol_colongreater             = symbol_table_insert(":>");
147 	symbol_lesscolon                = symbol_table_insert("<:");
148 	symbol_lesspercent              = symbol_table_insert("<%");
149 	symbol_percentcolon             = symbol_table_insert("%:");
150 	symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151 	symbol_percentgreater           = symbol_table_insert("%>");
152 
153 	symbol_L  = symbol_table_insert("L");
154 	symbol_U  = symbol_table_insert("U");
155 	symbol_u  = symbol_table_insert("u");
156 	symbol_u8 = symbol_table_insert("u8");
157 }
158 
switch_pp_input(FILE * const file,char const * const filename,searchpath_entry_t * const path,bool const is_system_header)159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
160 {
161 	input.file                      = file;
162 	input.input                     = input_from_stream(file, NULL);
163 	input.bufend                    = NULL;
164 	input.bufpos                    = NULL;
165 	input.output_line               = 0;
166 	input.position.input_name       = filename;
167 	input.position.lineno           = 1;
168 	input.position.is_system_header = is_system_header;
169 	input.path                      = path;
170 
171 	/* indicate that we're at a new input */
172 	print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
173 
174 	/* place a virtual '\n' so we realize we're at line begin */
175 	input.position.lineno = 0;
176 	input.c               = '\n';
177 }
178 
close_pp_input(void)179 FILE *close_pp_input(void)
180 {
181 	input_free(input.input);
182 
183 	FILE* const file = input.file;
184 	assert(file);
185 
186 	input.input  = NULL;
187 	input.file   = NULL;
188 	input.bufend = NULL;
189 	input.bufpos = NULL;
190 	input.c      = EOF;
191 
192 	return file;
193 }
194 
push_input(void)195 static void push_input(void)
196 {
197 	pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
198 
199 	/* adjust buffer positions */
200 	if (input.bufpos != NULL)
201 		saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202 	if (input.bufend != NULL)
203 		saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
204 
205 	saved_input->parent = input_stack;
206 	input_stack         = saved_input;
207 	++n_inputs;
208 }
209 
pop_restore_input(void)210 static void pop_restore_input(void)
211 {
212 	assert(n_inputs > 0);
213 	assert(input_stack != NULL);
214 
215 	pp_input_t *saved_input = input_stack;
216 
217 	memcpy(&input, saved_input, sizeof(input));
218 	input.parent = NULL;
219 
220 	/* adjust buffer positions */
221 	if (saved_input->bufpos != NULL)
222 		input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223 	if (saved_input->bufend != NULL)
224 		input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
225 
226 	input_stack = saved_input->parent;
227 	obstack_free(&input_obstack, saved_input);
228 	--n_inputs;
229 }
230 
231 /**
232  * Prints a parse error message at the current token.
233  *
234  * @param msg   the error message
235  */
parse_error(const char * msg)236 static void parse_error(const char *msg)
237 {
238 	errorf(&pp_token.base.source_position,  "%s", msg);
239 }
240 
next_real_char(void)241 static inline void next_real_char(void)
242 {
243 	assert(input.bufpos <= input.bufend);
244 	if (input.bufpos >= input.bufend) {
245 		size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
246 		if (n == 0) {
247 			input.c = EOF;
248 			return;
249 		}
250 		input.bufpos = input.buf + MAX_PUTBACK;
251 		input.bufend = input.bufpos + n;
252 	}
253 	input.c = *input.bufpos++;
254 	++input.position.colno;
255 }
256 
257 /**
258  * Put a character back into the buffer.
259  *
260  * @param pc  the character to put back
261  */
put_back(utf32 const pc)262 static inline void put_back(utf32 const pc)
263 {
264 	assert(input.bufpos > input.buf);
265 	*(--input.bufpos - input.buf + input.buf) = (char) pc;
266 	--input.position.colno;
267 }
268 
269 #define NEWLINE \
270 	'\r': \
271 		next_char(); \
272 		if (input.c == '\n') { \
273 	case '\n': \
274 			next_char(); \
275 		} \
276 		++input.position.lineno; \
277 		input.position.colno = 1; \
278 		goto newline; \
279 		newline // Let it look like an ordinary case label.
280 
281 #define eat(c_type) (assert(input.c == c_type), next_char())
282 
maybe_concat_lines(void)283 static void maybe_concat_lines(void)
284 {
285 	eat('\\');
286 
287 	switch (input.c) {
288 	case NEWLINE:
289 		info.whitespace_at_line_begin = 0;
290 		return;
291 
292 	default:
293 		break;
294 	}
295 
296 	put_back(input.c);
297 	input.c = '\\';
298 }
299 
300 /**
301  * Set c to the next input character, ie.
302  * after expanding trigraphs.
303  */
next_char(void)304 static inline void next_char(void)
305 {
306 	next_real_char();
307 
308 	/* filter trigraphs and concatenated lines */
309 	if (UNLIKELY(input.c == '\\')) {
310 		maybe_concat_lines();
311 		goto end_of_next_char;
312 	}
313 
314 	if (LIKELY(input.c != '?'))
315 		goto end_of_next_char;
316 
317 	next_real_char();
318 	if (LIKELY(input.c != '?')) {
319 		put_back(input.c);
320 		input.c = '?';
321 		goto end_of_next_char;
322 	}
323 
324 	next_real_char();
325 	switch (input.c) {
326 	case '=': input.c = '#'; break;
327 	case '(': input.c = '['; break;
328 	case '/': input.c = '\\'; maybe_concat_lines(); break;
329 	case ')': input.c = ']'; break;
330 	case '\'': input.c = '^'; break;
331 	case '<': input.c = '{'; break;
332 	case '!': input.c = '|'; break;
333 	case '>': input.c = '}'; break;
334 	case '-': input.c = '~'; break;
335 	default:
336 		put_back(input.c);
337 		put_back('?');
338 		input.c = '?';
339 		break;
340 	}
341 
342 end_of_next_char:;
343 #ifdef DEBUG_CHARS
344 	printf("nchar '%c'\n", input.c);
345 #endif
346 }
347 
348 
349 
350 /**
351  * Returns true if the given char is a octal digit.
352  *
353  * @param char  the character to check
354  */
is_octal_digit(int chr)355 static inline bool is_octal_digit(int chr)
356 {
357 	switch (chr) {
358 	case '0':
359 	case '1':
360 	case '2':
361 	case '3':
362 	case '4':
363 	case '5':
364 	case '6':
365 	case '7':
366 		return true;
367 	default:
368 		return false;
369 	}
370 }
371 
372 /**
373  * Returns the value of a digit.
374  * The only portable way to do it ...
375  */
digit_value(int digit)376 static int digit_value(int digit)
377 {
378 	switch (digit) {
379 	case '0': return 0;
380 	case '1': return 1;
381 	case '2': return 2;
382 	case '3': return 3;
383 	case '4': return 4;
384 	case '5': return 5;
385 	case '6': return 6;
386 	case '7': return 7;
387 	case '8': return 8;
388 	case '9': return 9;
389 	case 'a':
390 	case 'A': return 10;
391 	case 'b':
392 	case 'B': return 11;
393 	case 'c':
394 	case 'C': return 12;
395 	case 'd':
396 	case 'D': return 13;
397 	case 'e':
398 	case 'E': return 14;
399 	case 'f':
400 	case 'F': return 15;
401 	default:
402 		panic("wrong character given");
403 	}
404 }
405 
406 /**
407  * Parses an octal character sequence.
408  *
409  * @param first_digit  the already read first digit
410  */
parse_octal_sequence(const utf32 first_digit)411 static utf32 parse_octal_sequence(const utf32 first_digit)
412 {
413 	assert(is_octal_digit(first_digit));
414 	utf32 value = digit_value(first_digit);
415 	if (!is_octal_digit(input.c)) return value;
416 	value = 8 * value + digit_value(input.c);
417 	next_char();
418 	if (!is_octal_digit(input.c)) return value;
419 	value = 8 * value + digit_value(input.c);
420 	next_char();
421 	return value;
422 
423 }
424 
425 /**
426  * Parses a hex character sequence.
427  */
parse_hex_sequence(void)428 static utf32 parse_hex_sequence(void)
429 {
430 	utf32 value = 0;
431 	while (isxdigit(input.c)) {
432 		value = 16 * value + digit_value(input.c);
433 		next_char();
434 	}
435 	return value;
436 }
437 
is_universal_char_valid(utf32 const v)438 static bool is_universal_char_valid(utf32 const v)
439 {
440 	/* C11 §6.4.3:2 */
441 	if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
442 		return false;
443 	if (0xD800 <= v && v <= 0xDFFF)
444 		return false;
445 	return true;
446 }
447 
parse_universal_char(unsigned const n_digits)448 static utf32 parse_universal_char(unsigned const n_digits)
449 {
450 	utf32 v = 0;
451 	for (unsigned k = n_digits; k != 0; --k) {
452 		if (isxdigit(input.c)) {
453 			v = 16 * v + digit_value(input.c);
454 			if (!resolve_escape_sequences)
455 				obstack_1grow(&symbol_obstack, input.c);
456 			next_char();
457 		} else {
458 			errorf(&input.position,
459 			       "short universal character name, expected %u more digits",
460 				   k);
461 			break;
462 		}
463 	}
464 	if (!is_universal_char_valid(v)) {
465 		errorf(&input.position,
466 		       "\\%c%0*X is not a valid universal character name",
467 		       n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
468 	}
469 	return v;
470 }
471 
is_universal_char_valid_identifier_c99(utf32 const v)472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
473 {
474 	static const utf32 single_chars[] = {
475 		0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476 		0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477 		0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478 		0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479 		0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480 		0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
481 	};
482 
483 	static const utf32 ranges[][2] = {
484 		{0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485 		{0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486 		{0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487 		{0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488 		{0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489 		{0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490 		{0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491 		{0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492 		{0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493 		{0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494 		{0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495 		{0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496 		{0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497 		{0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498 		{0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499 		{0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500 		{0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501 		{0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502 		{0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503 		{0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504 		{0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505 		{0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506 		{0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507 		{0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508 		{0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509 		{0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510 		{0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511 		{0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512 		{0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513 		{0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514 		{0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515 		{0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516 		{0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517 		{0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518 		{0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519 		{0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520 		{0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521 		{0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522 		{0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523 		{0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524 		{0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525 		{0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526 		{0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527 		{0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528 		{0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529 		{0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530 		{0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531 		{0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532 		{0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533 		{0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
534 		{0x3021, 0x3029},
535 	};
536 	for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537 		if (ranges[i][0] <= v && v <= ranges[i][1])
538 			return true;
539 	}
540 	for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541 		if (v == single_chars[i])
542 			return true;
543 	}
544 	return false;
545 }
546 
is_universal_char_valid_identifier_c11(utf32 const v)547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
548 {
549 	/* C11 Annex D.1 */
550 	if (                v == 0x000A8) return true;
551 	if (                v == 0x000AA) return true;
552 	if (                v == 0x000AD) return true;
553 	if (                v == 0x000AF) return true;
554 	if (0x000B2 <= v && v <= 0x000B5) return true;
555 	if (0x000B7 <= v && v <= 0x000BA) return true;
556 	if (0x000BC <= v && v <= 0x000BE) return true;
557 	if (0x000C0 <= v && v <= 0x000D6) return true;
558 	if (0x000D8 <= v && v <= 0x000F6) return true;
559 	if (0x000F8 <= v && v <= 0x000FF) return true;
560 	if (0x00100 <= v && v <= 0x0167F) return true;
561 	if (0x01681 <= v && v <= 0x0180D) return true;
562 	if (0x0180F <= v && v <= 0x01FFF) return true;
563 	if (0x0200B <= v && v <= 0x0200D) return true;
564 	if (0x0202A <= v && v <= 0x0202E) return true;
565 	if (0x0203F <= v && v <= 0x02040) return true;
566 	if (                v == 0x02054) return true;
567 	if (0x02060 <= v && v <= 0x0206F) return true;
568 	if (0x02070 <= v && v <= 0x0218F) return true;
569 	if (0x02460 <= v && v <= 0x024FF) return true;
570 	if (0x02776 <= v && v <= 0x02793) return true;
571 	if (0x02C00 <= v && v <= 0x02DFF) return true;
572 	if (0x02E80 <= v && v <= 0x02FFF) return true;
573 	if (0x03004 <= v && v <= 0x03007) return true;
574 	if (0x03021 <= v && v <= 0x0302F) return true;
575 	if (0x03031 <= v && v <= 0x0303F) return true;
576 	if (0x03040 <= v && v <= 0x0D7FF) return true;
577 	if (0x0F900 <= v && v <= 0x0FD3D) return true;
578 	if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579 	if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580 	if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581 	if (0x10000 <= v && v <= 0x1FFFD) return true;
582 	if (0x20000 <= v && v <= 0x2FFFD) return true;
583 	if (0x30000 <= v && v <= 0x3FFFD) return true;
584 	if (0x40000 <= v && v <= 0x4FFFD) return true;
585 	if (0x50000 <= v && v <= 0x5FFFD) return true;
586 	if (0x60000 <= v && v <= 0x6FFFD) return true;
587 	if (0x70000 <= v && v <= 0x7FFFD) return true;
588 	if (0x80000 <= v && v <= 0x8FFFD) return true;
589 	if (0x90000 <= v && v <= 0x9FFFD) return true;
590 	if (0xA0000 <= v && v <= 0xAFFFD) return true;
591 	if (0xB0000 <= v && v <= 0xBFFFD) return true;
592 	if (0xC0000 <= v && v <= 0xCFFFD) return true;
593 	if (0xD0000 <= v && v <= 0xDFFFD) return true;
594 	if (0xE0000 <= v && v <= 0xEFFFD) return true;
595 	return false;
596 }
597 
is_universal_char_valid_identifier(utf32 const v)598 static bool is_universal_char_valid_identifier(utf32 const v)
599 {
600 	if (c_mode & _C11)
601 		return is_universal_char_valid_identifier_c11(v);
602 	return is_universal_char_valid_identifier_c99(v);
603 }
604 
is_universal_char_invalid_identifier_start(utf32 const v)605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
606 {
607 	if (! (c_mode & _C11))
608 		return false;
609 
610 	/* C11 Annex D.2 */
611 	if (0x0300 <= v && v <= 0x036F) return true;
612 	if (0x1DC0 <= v && v <= 0x1DFF) return true;
613 	if (0x20D0 <= v && v <= 0x20FF) return true;
614 	if (0xFE20 <= v && v <= 0xFE2F) return true;
615 	return false;
616 }
617 
618 /**
619  * Parse an escape sequence.
620  */
parse_escape_sequence(void)621 static utf32 parse_escape_sequence(void)
622 {
623 	eat('\\');
624 
625 	utf32 const ec = input.c;
626 	next_char();
627 
628 	switch (ec) {
629 	case '"':  return '"';
630 	case '\'': return '\'';
631 	case '\\': return '\\';
632 	case '?': return '\?';
633 	case 'a': return '\a';
634 	case 'b': return '\b';
635 	case 'f': return '\f';
636 	case 'n': return '\n';
637 	case 'r': return '\r';
638 	case 't': return '\t';
639 	case 'v': return '\v';
640 	case 'x':
641 		return parse_hex_sequence();
642 	case '0':
643 	case '1':
644 	case '2':
645 	case '3':
646 	case '4':
647 	case '5':
648 	case '6':
649 	case '7':
650 		return parse_octal_sequence(ec);
651 	case EOF:
652 		parse_error("reached end of file while parsing escape sequence");
653 		return EOF;
654 	/* \E is not documented, but handled, by GCC.  It is acceptable according
655 	 * to §6.11.4, whereas \e is not. */
656 	case 'E':
657 	case 'e':
658 		if (c_mode & _GNUC)
659 			return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
660 		break;
661 
662 	case 'U': return parse_universal_char(8);
663 	case 'u': return parse_universal_char(4);
664 
665 	default:
666 		break;
667 	}
668 	/* §6.4.4.4:8 footnote 64 */
669 	parse_error("unknown escape sequence");
670 	return EOF;
671 }
672 
identify_string(char * string)673 static const char *identify_string(char *string)
674 {
675 	const char *result = strset_insert(&stringset, string);
676 	if (result != string) {
677 		obstack_free(&symbol_obstack, string);
678 	}
679 	return result;
680 }
681 
sym_make_string(string_encoding_t const enc)682 static string_t sym_make_string(string_encoding_t const enc)
683 {
684 	obstack_1grow(&symbol_obstack, '\0');
685 	size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
686 	char       *const string = obstack_finish(&symbol_obstack);
687 	char const *const result = identify_string(string);
688 	return (string_t){ result, len, enc };
689 }
690 
make_string(char const * const string)691 string_t make_string(char const *const string)
692 {
693 	obstack_grow(&symbol_obstack, string, strlen(string));
694 	return sym_make_string(STRING_ENCODING_CHAR);
695 }
696 
get_string_encoding_limit(string_encoding_t const enc)697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
698 {
699 	switch (enc) {
700 	case STRING_ENCODING_CHAR:   return 0xFF;
701 	case STRING_ENCODING_CHAR16: return 0xFFFF;
702 	case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703 	case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
704 	case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
705 	}
706 	panic("invalid string encoding");
707 }
708 
parse_string(utf32 const delimiter,token_kind_t const kind,string_encoding_t const enc,char const * const context)709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710                          string_encoding_t const enc,
711                          char const *const context)
712 {
713 	const unsigned start_linenr = input.position.lineno;
714 
715 	eat(delimiter);
716 
717 	utf32 const limit = get_string_encoding_limit(enc);
718 	while (true) {
719 		switch (input.c) {
720 		case '\\': {
721 			if (resolve_escape_sequences) {
722 				utf32 const tc = parse_escape_sequence();
723 				if (tc > limit) {
724 					warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
725 				}
726 				if (enc == STRING_ENCODING_CHAR) {
727 					obstack_1grow(&symbol_obstack, tc);
728 				} else {
729 					obstack_grow_utf8(&symbol_obstack, tc);
730 				}
731 			} else {
732 				obstack_1grow(&symbol_obstack, (char)input.c);
733 				next_char();
734 				obstack_1grow(&symbol_obstack, (char)input.c);
735 				next_char();
736 			}
737 			break;
738 		}
739 
740 		case NEWLINE:
741 			errorf(&pp_token.base.source_position, "newline while parsing %s", context);
742 			break;
743 
744 		case EOF: {
745 			source_position_t source_position;
746 			source_position.input_name = pp_token.base.source_position.input_name;
747 			source_position.lineno     = start_linenr;
748 			errorf(&source_position, "EOF while parsing %s", context);
749 			goto end_of_string;
750 		}
751 
752 		default:
753 			if (input.c == delimiter) {
754 				next_char();
755 				goto end_of_string;
756 			} else {
757 				obstack_grow_utf8(&symbol_obstack, input.c);
758 				next_char();
759 				break;
760 			}
761 		}
762 	}
763 
764 end_of_string:
765 	pp_token.kind           = kind;
766 	pp_token.literal.string = sym_make_string(enc);
767 }
768 
parse_string_literal(string_encoding_t const enc)769 static void parse_string_literal(string_encoding_t const enc)
770 {
771 	parse_string('"', T_STRING_LITERAL, enc, "string literal");
772 }
773 
parse_character_constant(string_encoding_t const enc)774 static void parse_character_constant(string_encoding_t const enc)
775 {
776 	parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
777 	if (pp_token.literal.string.size == 0) {
778 		parse_error("empty character constant");
779 	}
780 }
781 
782 #define SYMBOL_CASES_WITHOUT_E_P \
783 	     '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
784 	case 'a': \
785 	case 'b': \
786 	case 'c': \
787 	case 'd': \
788 	case 'f': \
789 	case 'g': \
790 	case 'h': \
791 	case 'i': \
792 	case 'j': \
793 	case 'k': \
794 	case 'l': \
795 	case 'm': \
796 	case 'n': \
797 	case 'o': \
798 	case 'q': \
799 	case 'r': \
800 	case 's': \
801 	case 't': \
802 	case 'u': \
803 	case 'v': \
804 	case 'w': \
805 	case 'x': \
806 	case 'y': \
807 	case 'z': \
808 	case 'A': \
809 	case 'B': \
810 	case 'C': \
811 	case 'D': \
812 	case 'F': \
813 	case 'G': \
814 	case 'H': \
815 	case 'I': \
816 	case 'J': \
817 	case 'K': \
818 	case 'L': \
819 	case 'M': \
820 	case 'N': \
821 	case 'O': \
822 	case 'Q': \
823 	case 'R': \
824 	case 'S': \
825 	case 'T': \
826 	case 'U': \
827 	case 'V': \
828 	case 'W': \
829 	case 'X': \
830 	case 'Y': \
831 	case 'Z': \
832 	case '_'
833 
834 #define SYMBOL_CASES \
835 	     SYMBOL_CASES_WITHOUT_E_P: \
836 	case 'e': \
837 	case 'p': \
838 	case 'E': \
839 	case 'P'
840 
841 #define DIGIT_CASES \
842 	     '0':  \
843 	case '1':  \
844 	case '2':  \
845 	case '3':  \
846 	case '4':  \
847 	case '5':  \
848 	case '6':  \
849 	case '7':  \
850 	case '8':  \
851 	case '9'
852 
start_expanding(pp_definition_t * definition)853 static void start_expanding(pp_definition_t *definition)
854 {
855 	definition->parent_expansion = current_expansion;
856 	definition->expand_pos       = 0;
857 	definition->is_expanding     = true;
858 	if (definition->list_len > 0) {
859 		definition->token_list[0].had_whitespace
860 			= info.had_whitespace;
861 	}
862 	current_expansion = definition;
863 }
864 
finished_expanding(pp_definition_t * definition)865 static void finished_expanding(pp_definition_t *definition)
866 {
867 	assert(definition->is_expanding);
868 	pp_definition_t *parent = definition->parent_expansion;
869 	definition->parent_expansion = NULL;
870 	definition->is_expanding     = false;
871 
872 	/* stop further expanding once we expanded a parameter used in a
873 	 * sub macro-call */
874 	if (definition == argument_expanding)
875 		argument_expanding = NULL;
876 
877 	assert(current_expansion == definition);
878 	current_expansion = parent;
879 }
880 
grow_string_escaped(struct obstack * obst,const string_t * string,char const * delimiter)881 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
882 {
883 	char const *prefix = get_string_encoding_prefix(string->encoding);
884 	obstack_printf(obst, "%s%s", prefix, delimiter);
885 	size_t      size = string->size;
886 	const char *str  = string->begin;
887 	if (resolve_escape_sequences) {
888 		obstack_grow(obst, str, size);
889 	} else {
890 		for (size_t i = 0; i < size; ++i) {
891 			const char c = str[i];
892 			if (c == '\\' || c == '"')
893 				obstack_1grow(obst, '\\');
894 			obstack_1grow(obst, c);
895 		}
896 	}
897 	obstack_printf(obst, "%s", delimiter);
898 }
899 
grow_token(struct obstack * obst,const token_t * token)900 static void grow_token(struct obstack *obst, const token_t *token)
901 {
902 	switch (token->kind) {
903 	case T_NUMBER:
904 		obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
905 		break;
906 
907 	case T_STRING_LITERAL: {
908 		char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
909 		grow_string_escaped(obst, &token->literal.string, delimiter);
910 		break;
911 	}
912 
913 	case T_CHARACTER_CONSTANT:
914 		grow_string_escaped(obst, &token->literal.string, "'");
915 		break;
916 
917 	case T_IDENTIFIER:
918 	default: {
919 		const char *str = token->base.symbol->string;
920 		size_t      len = strlen(str);
921 		obstack_grow(obst, str, len);
922 		break;
923 	}
924 	}
925 }
926 
stringify(const pp_definition_t * definition)927 static void stringify(const pp_definition_t *definition)
928 {
929 	assert(obstack_object_size(&symbol_obstack) == 0);
930 
931 	size_t list_len = definition->list_len;
932 	for (size_t p = 0; p < list_len; ++p) {
933 		const saved_token_t *saved = &definition->token_list[p];
934 		if (p > 0 && saved->had_whitespace)
935 			obstack_1grow(&symbol_obstack, ' ');
936 		grow_token(&symbol_obstack, &saved->token);
937 	}
938 	pp_token.kind           = T_STRING_LITERAL;
939 	pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
940 }
941 
set_punctuator(token_kind_t const kind)942 static inline void set_punctuator(token_kind_t const kind)
943 {
944 	pp_token.kind        = kind;
945 	pp_token.base.symbol = token_symbols[kind];
946 }
947 
set_digraph(token_kind_t const kind,symbol_t * const symbol)948 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
949 {
950 	pp_token.kind        = kind;
951 	pp_token.base.symbol = symbol;
952 }
953 
954 /**
955  * returns next final token from a preprocessor macro expansion
956  */
expand_next(void)957 static bool expand_next(void)
958 {
959 	if (current_expansion == NULL)
960 		return false;
961 
962 restart:;
963 	size_t pos = current_expansion->expand_pos;
964 	if (pos >= current_expansion->list_len) {
965 		finished_expanding(current_expansion);
966 		/* it was the outermost expansion, parse pptoken normally */
967 		if (current_expansion == NULL) {
968 			return false;
969 		}
970 		goto restart;
971 	}
972 	const saved_token_t *saved = &current_expansion->token_list[pos++];
973 	pp_token = saved->token;
974 	if (pp_token.kind == '#') {
975 		if (pos < current_expansion->list_len) {
976 			const saved_token_t *next = &current_expansion->token_list[pos];
977 			if (next->token.kind == T_MACRO_PARAMETER) {
978 				pp_definition_t *def = next->token.macro_parameter.def;
979 				assert(def != NULL && def->is_parameter);
980 				stringify(def);
981 				++pos;
982 			}
983 		}
984 	}
985 
986 	if (current_expansion->expand_pos > 0)
987 		info.had_whitespace = saved->had_whitespace;
988 	current_expansion->expand_pos = pos;
989 	pp_token.base.source_position = expansion_pos;
990 
991 	return true;
992 }
993 
994 /**
995  * Returns the next token kind found when continuing the current expansions
996  * without starting new sub-expansions.
997  */
peek_expansion(void)998 static token_kind_t peek_expansion(void)
999 {
1000 	for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1001 		if (e->expand_pos < e->list_len)
1002 			return e->token_list[e->expand_pos].token.kind;
1003 	}
1004 	return T_EOF;
1005 }
1006 
skip_line_comment(void)1007 static void skip_line_comment(void)
1008 {
1009 	info.had_whitespace = true;
1010 	while (true) {
1011 		switch (input.c) {
1012 		case EOF:
1013 			return;
1014 
1015 		case '\r':
1016 		case '\n':
1017 			return;
1018 
1019 		default:
1020 			next_char();
1021 			break;
1022 		}
1023 	}
1024 }
1025 
skip_multiline_comment(void)1026 static void skip_multiline_comment(void)
1027 {
1028 	info.had_whitespace = true;
1029 
1030 	unsigned start_linenr = input.position.lineno;
1031 	while (true) {
1032 		switch (input.c) {
1033 		case '/':
1034 			next_char();
1035 			if (input.c == '*') {
1036 				/* TODO: nested comment, warn here */
1037 			}
1038 			break;
1039 		case '*':
1040 			next_char();
1041 			if (input.c == '/') {
1042 				if (input.position.lineno != input.output_line)
1043 					info.whitespace_at_line_begin = input.position.colno;
1044 				next_char();
1045 				return;
1046 			}
1047 			break;
1048 
1049 		case NEWLINE:
1050 			break;
1051 
1052 		case EOF: {
1053 			source_position_t source_position;
1054 			source_position.input_name = pp_token.base.source_position.input_name;
1055 			source_position.lineno     = start_linenr;
1056 			errorf(&source_position, "at end of file while looking for comment end");
1057 			return;
1058 		}
1059 
1060 		default:
1061 			next_char();
1062 			break;
1063 		}
1064 	}
1065 }
1066 
skip_till_newline(bool stop_at_non_whitespace)1067 static bool skip_till_newline(bool stop_at_non_whitespace)
1068 {
1069 	bool res = false;
1070 	while (true) {
1071 		switch (input.c) {
1072 		case ' ':
1073 		case '\t':
1074 			next_char();
1075 			continue;
1076 
1077 		case '/':
1078 			next_char();
1079 			if (input.c == '/') {
1080 				next_char();
1081 				skip_line_comment();
1082 				continue;
1083 			} else if (input.c == '*') {
1084 				next_char();
1085 				skip_multiline_comment();
1086 				continue;
1087 			} else {
1088 				put_back(input.c);
1089 				input.c = '/';
1090 			}
1091 			return true;
1092 
1093 		case NEWLINE:
1094 			return res;
1095 
1096 		default:
1097 			if (stop_at_non_whitespace)
1098 				return false;
1099 			res = true;
1100 			next_char();
1101 			continue;
1102 		}
1103 	}
1104 }
1105 
skip_whitespace(void)1106 static void skip_whitespace(void)
1107 {
1108 	while (true) {
1109 		switch (input.c) {
1110 		case ' ':
1111 		case '\t':
1112 			++info.whitespace_at_line_begin;
1113 			info.had_whitespace = true;
1114 			next_char();
1115 			continue;
1116 
1117 		case NEWLINE:
1118 			info.at_line_begin  = true;
1119 			info.had_whitespace = true;
1120 			info.whitespace_at_line_begin = 0;
1121 			continue;
1122 
1123 		case '/':
1124 			next_char();
1125 			if (input.c == '/') {
1126 				next_char();
1127 				skip_line_comment();
1128 				continue;
1129 			} else if (input.c == '*') {
1130 				next_char();
1131 				skip_multiline_comment();
1132 				continue;
1133 			} else {
1134 				put_back(input.c);
1135 				input.c = '/';
1136 			}
1137 			return;
1138 
1139 		default:
1140 			return;
1141 		}
1142 	}
1143 }
1144 
eat_pp(pp_token_kind_t const kind)1145 static inline void eat_pp(pp_token_kind_t const kind)
1146 {
1147 	assert(pp_token.base.symbol->pp_ID == kind);
1148 	(void) kind;
1149 	next_input_token();
1150 }
1151 
eat_token(token_kind_t const kind)1152 static inline void eat_token(token_kind_t const kind)
1153 {
1154 	assert(pp_token.kind == kind);
1155 	(void)kind;
1156 	next_input_token();
1157 }
1158 
identify_encoding_prefix(symbol_t * const sym)1159 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1160 {
1161 	if (sym == symbol_L) return STRING_ENCODING_WIDE;
1162 	if (c_mode & _C11) {
1163 		if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1164 		if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1165 		if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1166 	}
1167 	return STRING_ENCODING_CHAR;
1168 }
1169 
parse_symbol(void)1170 static void parse_symbol(void)
1171 {
1172 	assert(obstack_object_size(&symbol_obstack) == 0);
1173 	while (true) {
1174 		switch (input.c) {
1175 		case DIGIT_CASES:
1176 		case SYMBOL_CASES:
1177 			obstack_1grow(&symbol_obstack, (char) input.c);
1178 			next_char();
1179 			break;
1180 
1181 		case '\\':
1182 			next_char();
1183 			switch (input.c) {
1184 			{
1185 				unsigned n;
1186 			case 'U': n = 8; goto universal;
1187 			case 'u': n = 4; goto universal;
1188 universal:
1189 				if (!resolve_escape_sequences) {
1190 					obstack_1grow(&symbol_obstack, '\\');
1191 					obstack_1grow(&symbol_obstack, input.c);
1192 				}
1193 				next_char();
1194 				utf32 const v = parse_universal_char(n);
1195 				if (!is_universal_char_valid_identifier(v)) {
1196 					if (is_universal_char_valid(v)) {
1197 						errorf(&input.position,
1198 							   "universal character \\%c%0*X is not valid in an identifier",
1199 							   n == 4 ? 'u' : 'U', (int)n, v);
1200 					}
1201 				} else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1202 					errorf(&input.position,
1203 						   "universal character \\%c%0*X is not valid as start of an identifier",
1204 						   n == 4 ? 'u' : 'U', (int)n, v);
1205 				} else if (resolve_escape_sequences) {
1206 					obstack_grow_utf8(&symbol_obstack, v);
1207 				}
1208 				break;
1209 			}
1210 
1211 			default:
1212 				put_back(input.c);
1213 				input.c = '\\';
1214 				goto end_symbol;
1215 			}
1216 
1217 		default:
1218 dollar_sign:
1219 			goto end_symbol;
1220 		}
1221 	}
1222 
1223 end_symbol:
1224 	obstack_1grow(&symbol_obstack, '\0');
1225 	char *string = obstack_finish(&symbol_obstack);
1226 
1227 	symbol_t *symbol = symbol_table_insert(string);
1228 
1229 	/* Might be a prefixed string or character constant: L/U/u/u8"string". */
1230 	if (input.c == '"') {
1231 		string_encoding_t const enc = identify_encoding_prefix(symbol);
1232 		if (enc != STRING_ENCODING_CHAR) {
1233 			parse_string_literal(enc);
1234 			return;
1235 		}
1236 	} else if (input.c == '\'') {
1237 		string_encoding_t const enc = identify_encoding_prefix(symbol);
1238 		if (enc != STRING_ENCODING_CHAR) {
1239 			if (enc == STRING_ENCODING_UTF8) {
1240 				errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1241 			}
1242 			parse_character_constant(enc);
1243 			return;
1244 		}
1245 	}
1246 
1247 	pp_token.kind        = symbol->ID;
1248 	pp_token.base.symbol = symbol;
1249 
1250 	/* we can free the memory from symbol obstack if we already had an entry in
1251 	 * the symbol table */
1252 	if (symbol->string != string) {
1253 		obstack_free(&symbol_obstack, string);
1254 	}
1255 }
1256 
parse_number(void)1257 static void parse_number(void)
1258 {
1259 	obstack_1grow(&symbol_obstack, (char) input.c);
1260 	next_char();
1261 
1262 	while (true) {
1263 		switch (input.c) {
1264 		case '.':
1265 		case DIGIT_CASES:
1266 		case SYMBOL_CASES_WITHOUT_E_P:
1267 			obstack_1grow(&symbol_obstack, (char) input.c);
1268 			next_char();
1269 			break;
1270 
1271 		case 'e':
1272 		case 'p':
1273 		case 'E':
1274 		case 'P':
1275 			obstack_1grow(&symbol_obstack, (char) input.c);
1276 			next_char();
1277 			if (input.c == '+' || input.c == '-') {
1278 				obstack_1grow(&symbol_obstack, (char) input.c);
1279 				next_char();
1280 			}
1281 			break;
1282 
1283 		default:
1284 dollar_sign:
1285 			goto end_number;
1286 		}
1287 	}
1288 
1289 end_number:
1290 	pp_token.kind           = T_NUMBER;
1291 	pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1292 }
1293 
1294 #define MAYBE_PROLOG \
1295 	next_char(); \
1296 	switch (input.c) {
1297 
1298 #define MAYBE(ch, kind) \
1299 	case ch: \
1300 		next_char(); \
1301 		set_punctuator(kind); \
1302 		return;
1303 
1304 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1305 	case ch: \
1306 		next_char(); \
1307 		set_digraph(kind, symbol); \
1308 		return;
1309 
1310 #define ELSE_CODE(code) \
1311 	default: \
1312 		code \
1313 	}
1314 
1315 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1316 
1317 /** identifies and returns the next preprocessing token contained in the
1318  * input stream. No macro expansion is performed. */
next_input_token(void)1319 static void next_input_token(void)
1320 {
1321 	if (next_info.had_whitespace) {
1322 		info = next_info;
1323 		next_info.had_whitespace = false;
1324 	} else {
1325 		info.at_line_begin  = false;
1326 		info.had_whitespace = false;
1327 	}
1328 restart:
1329 	pp_token.base.source_position = input.position;
1330 	pp_token.base.symbol          = NULL;
1331 
1332 	switch (input.c) {
1333 	case ' ':
1334 	case '\t':
1335 		info.whitespace_at_line_begin++;
1336 		info.had_whitespace = true;
1337 		next_char();
1338 		goto restart;
1339 
1340 	case NEWLINE:
1341 		info.at_line_begin            = true;
1342 		info.had_whitespace           = true;
1343 		info.whitespace_at_line_begin = 0;
1344 		goto restart;
1345 
1346 	case SYMBOL_CASES:
1347 		parse_symbol();
1348 		return;
1349 
1350 	case DIGIT_CASES:
1351 		parse_number();
1352 		return;
1353 
1354 	case '"':
1355 		parse_string_literal(STRING_ENCODING_CHAR);
1356 		return;
1357 
1358 	case '\'':
1359 		parse_character_constant(STRING_ENCODING_CHAR);
1360 		return;
1361 
1362 	case '.':
1363 		MAYBE_PROLOG
1364 			case '0':
1365 			case '1':
1366 			case '2':
1367 			case '3':
1368 			case '4':
1369 			case '5':
1370 			case '6':
1371 			case '7':
1372 			case '8':
1373 			case '9':
1374 				put_back(input.c);
1375 				input.c = '.';
1376 				parse_number();
1377 				return;
1378 
1379 			case '.':
1380 				MAYBE_PROLOG
1381 				MAYBE('.', T_DOTDOTDOT)
1382 				ELSE_CODE(
1383 					put_back(input.c);
1384 					input.c = '.';
1385 					set_punctuator('.');
1386 					return;
1387 				)
1388 		ELSE('.')
1389 	case '&':
1390 		MAYBE_PROLOG
1391 		MAYBE('&', T_ANDAND)
1392 		MAYBE('=', T_ANDEQUAL)
1393 		ELSE('&')
1394 	case '*':
1395 		MAYBE_PROLOG
1396 		MAYBE('=', T_ASTERISKEQUAL)
1397 		ELSE('*')
1398 	case '+':
1399 		MAYBE_PROLOG
1400 		MAYBE('+', T_PLUSPLUS)
1401 		MAYBE('=', T_PLUSEQUAL)
1402 		ELSE('+')
1403 	case '-':
1404 		MAYBE_PROLOG
1405 		MAYBE('>', T_MINUSGREATER)
1406 		MAYBE('-', T_MINUSMINUS)
1407 		MAYBE('=', T_MINUSEQUAL)
1408 		ELSE('-')
1409 	case '!':
1410 		MAYBE_PROLOG
1411 		MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1412 		ELSE('!')
1413 	case '/':
1414 		MAYBE_PROLOG
1415 		MAYBE('=', T_SLASHEQUAL)
1416 		case '*':
1417 			next_char();
1418 			skip_multiline_comment();
1419 			goto restart;
1420 		case '/':
1421 			next_char();
1422 			skip_line_comment();
1423 			goto restart;
1424 		ELSE('/')
1425 	case '%':
1426 		MAYBE_PROLOG
1427 		MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1428 		MAYBE('=', T_PERCENTEQUAL)
1429 		case ':':
1430 			MAYBE_PROLOG
1431 			case '%':
1432 				MAYBE_PROLOG
1433 				MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1434 				ELSE_CODE(
1435 					put_back(input.c);
1436 					input.c = '%';
1437 					goto digraph_percentcolon;
1438 				)
1439 			ELSE_CODE(
1440 digraph_percentcolon:
1441 				set_digraph('#', symbol_percentcolon);
1442 				return;
1443 			)
1444 		ELSE('%')
1445 	case '<':
1446 		MAYBE_PROLOG
1447 		MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1448 		MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1449 		MAYBE('=', T_LESSEQUAL)
1450 		case '<':
1451 			MAYBE_PROLOG
1452 			MAYBE('=', T_LESSLESSEQUAL)
1453 			ELSE(T_LESSLESS)
1454 		ELSE('<')
1455 	case '>':
1456 		MAYBE_PROLOG
1457 		MAYBE('=', T_GREATEREQUAL)
1458 		case '>':
1459 			MAYBE_PROLOG
1460 			MAYBE('=', T_GREATERGREATEREQUAL)
1461 			ELSE(T_GREATERGREATER)
1462 		ELSE('>')
1463 	case '^':
1464 		MAYBE_PROLOG
1465 		MAYBE('=', T_CARETEQUAL)
1466 		ELSE('^')
1467 	case '|':
1468 		MAYBE_PROLOG
1469 		MAYBE('=', T_PIPEEQUAL)
1470 		MAYBE('|', T_PIPEPIPE)
1471 		ELSE('|')
1472 	case ':':
1473 		MAYBE_PROLOG
1474 		MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1475 		case ':':
1476 			if (c_mode & _CXX) {
1477 				next_char();
1478 				set_punctuator(T_COLONCOLON);
1479 				return;
1480 			}
1481 			/* FALLTHROUGH */
1482 		ELSE(':')
1483 	case '=':
1484 		MAYBE_PROLOG
1485 		MAYBE('=', T_EQUALEQUAL)
1486 		ELSE('=')
1487 	case '#':
1488 		MAYBE_PROLOG
1489 		MAYBE('#', T_HASHHASH)
1490 		ELSE('#')
1491 
1492 	case '?':
1493 	case '[':
1494 	case ']':
1495 	case '(':
1496 	case ')':
1497 	case '{':
1498 	case '}':
1499 	case '~':
1500 	case ';':
1501 	case ',':
1502 		set_punctuator(input.c);
1503 		next_char();
1504 		return;
1505 
1506 	case EOF:
1507 		if (input_stack != NULL) {
1508 			fclose(close_pp_input());
1509 			pop_restore_input();
1510 			if (out)
1511 				fputc('\n', out);
1512 			if (input.c == (utf32)EOF)
1513 				--input.position.lineno;
1514 			print_line_directive(&input.position, "2");
1515 			goto restart;
1516 		} else {
1517 			info.at_line_begin = true;
1518 			set_punctuator(T_EOF);
1519 		}
1520 		return;
1521 
1522 	case '\\':
1523 		next_char();
1524 		int next_c = input.c;
1525 		put_back(input.c);
1526 		input.c = '\\';
1527 		if (next_c == 'U' || next_c == 'u') {
1528 			parse_symbol();
1529 			return;
1530 		}
1531 		/* FALLTHROUGH */
1532 	default:
1533 dollar_sign:
1534 		if (error_on_unknown_chars) {
1535 			errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
1536 			next_char();
1537 			goto restart;
1538 		} else {
1539 			assert(obstack_object_size(&symbol_obstack) == 0);
1540 			obstack_grow_utf8(&symbol_obstack, input.c);
1541 			obstack_1grow(&symbol_obstack, '\0');
1542 			char     *const string = obstack_finish(&symbol_obstack);
1543 			symbol_t *const symbol = symbol_table_insert(string);
1544 			if (symbol->string != string)
1545 				obstack_free(&symbol_obstack, string);
1546 
1547 			pp_token.kind        = T_UNKNOWN_CHAR;
1548 			pp_token.base.symbol = symbol;
1549 			next_char();
1550 			return;
1551 		}
1552 	}
1553 }
1554 
print_quoted_string(const char * const string)1555 static void print_quoted_string(const char *const string)
1556 {
1557 	fputc('"', out);
1558 	for (const char *c = string; *c != 0; ++c) {
1559 		switch (*c) {
1560 		case '"': fputs("\\\"", out); break;
1561 		case '\\':  fputs("\\\\", out); break;
1562 		case '\a':  fputs("\\a", out); break;
1563 		case '\b':  fputs("\\b", out); break;
1564 		case '\f':  fputs("\\f", out); break;
1565 		case '\n':  fputs("\\n", out); break;
1566 		case '\r':  fputs("\\r", out); break;
1567 		case '\t':  fputs("\\t", out); break;
1568 		case '\v':  fputs("\\v", out); break;
1569 		case '\?':  fputs("\\?", out); break;
1570 		default:
1571 			if (!isprint(*c)) {
1572 				fprintf(out, "\\%03o", (unsigned)*c);
1573 				break;
1574 			}
1575 			fputc(*c, out);
1576 			break;
1577 		}
1578 	}
1579 	fputc('"', out);
1580 }
1581 
print_line_directive(const source_position_t * pos,const char * add)1582 static void print_line_directive(const source_position_t *pos, const char *add)
1583 {
1584 	if (!out)
1585 		return;
1586 
1587 	fprintf(out, "# %u ", pos->lineno);
1588 	print_quoted_string(pos->input_name);
1589 	if (add != NULL) {
1590 		fputc(' ', out);
1591 		fputs(add, out);
1592 	}
1593 	if (pos->is_system_header) {
1594 		fputs(" 3", out);
1595 	}
1596 
1597 	printed_input_name = pos->input_name;
1598 	input.output_line  = pos->lineno-1;
1599 }
1600 
emit_newlines(void)1601 static bool emit_newlines(void)
1602 {
1603 	if (!out)
1604 		return true;
1605 
1606 	unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1607 	if (delta == 0)
1608 		return false;
1609 
1610 	if (delta >= 9) {
1611 		fputc('\n', out);
1612 		print_line_directive(&pp_token.base.source_position, NULL);
1613 		fputc('\n', out);
1614 	} else {
1615 		for (unsigned i = 0; i < delta; ++i) {
1616 			fputc('\n', out);
1617 		}
1618 	}
1619 	input.output_line = pp_token.base.source_position.lineno;
1620 
1621 	unsigned whitespace = info.whitespace_at_line_begin;
1622 	/* make sure there is at least 1 whitespace before a (macro-expanded)
1623 	 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1624 	if (pp_token.kind == '#' && whitespace == 0)
1625 		++whitespace;
1626 	for (unsigned i = 0; i < whitespace; ++i)
1627 		fputc(' ', out);
1628 
1629 	return true;
1630 }
1631 
set_preprocessor_output(FILE * output)1632 void set_preprocessor_output(FILE *output)
1633 {
1634 	out = output;
1635 	if (out != NULL) {
1636 		error_on_unknown_chars   = false;
1637 		resolve_escape_sequences = false;
1638 	} else {
1639 		error_on_unknown_chars   = true;
1640 		resolve_escape_sequences = true;
1641 	}
1642 }
1643 
emit_pp_token(void)1644 void emit_pp_token(void)
1645 {
1646 	if (!emit_newlines() &&
1647 	    (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1648 		fputc(' ', out);
1649 
1650 	switch (pp_token.kind) {
1651 	case T_NUMBER:
1652 		fputs(pp_token.literal.string.begin, out);
1653 		break;
1654 
1655 	case T_STRING_LITERAL:
1656 		fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1657 		fputc('"', out);
1658 		fputs(pp_token.literal.string.begin, out);
1659 		fputc('"', out);
1660 		break;
1661 
1662 	case T_CHARACTER_CONSTANT:
1663 		fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1664 		fputc('\'', out);
1665 		fputs(pp_token.literal.string.begin, out);
1666 		fputc('\'', out);
1667 		break;
1668 
1669 	case T_MACRO_PARAMETER:
1670 		panic("macro parameter not expanded");
1671 
1672 	default:
1673 		fputs(pp_token.base.symbol->string, out);
1674 		break;
1675 	}
1676 	last_token = pp_token.kind;
1677 }
1678 
eat_pp_directive(void)1679 static void eat_pp_directive(void)
1680 {
1681 	while (!info.at_line_begin) {
1682 		next_input_token();
1683 	}
1684 }
1685 
strings_equal(const string_t * string1,const string_t * string2)1686 static bool strings_equal(const string_t *string1, const string_t *string2)
1687 {
1688 	size_t size = string1->size;
1689 	if (size != string2->size)
1690 		return false;
1691 
1692 	const char *c1 = string1->begin;
1693 	const char *c2 = string2->begin;
1694 	for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1695 		if (*c1 != *c2)
1696 			return false;
1697 	}
1698 	return true;
1699 }
1700 
pp_tokens_equal(const token_t * token1,const token_t * token2)1701 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1702 {
1703 	if (token1->kind != token2->kind)
1704 		return false;
1705 
1706 	switch (token1->kind) {
1707 	case T_NUMBER:
1708 	case T_CHARACTER_CONSTANT:
1709 	case T_STRING_LITERAL:
1710 		return strings_equal(&token1->literal.string, &token2->literal.string);
1711 
1712 	case T_MACRO_PARAMETER:
1713 		return token1->macro_parameter.def->symbol
1714 		    == token2->macro_parameter.def->symbol;
1715 
1716 	default:
1717 		return token1->base.symbol == token2->base.symbol;
1718 	}
1719 }
1720 
pp_definitions_equal(const pp_definition_t * definition1,const pp_definition_t * definition2)1721 static bool pp_definitions_equal(const pp_definition_t *definition1,
1722                                  const pp_definition_t *definition2)
1723 {
1724 	if (definition1->list_len != definition2->list_len)
1725 		return false;
1726 
1727 	size_t               len = definition1->list_len;
1728 	const saved_token_t *t1  = definition1->token_list;
1729 	const saved_token_t *t2  = definition2->token_list;
1730 	for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1731 		if (!pp_tokens_equal(&t1->token, &t2->token))
1732 			return false;
1733 		if (t1->had_whitespace != t2->had_whitespace)
1734 			return false;
1735 	}
1736 	return true;
1737 }
1738 
missing_macro_param_error(void)1739 static void missing_macro_param_error(void)
1740 {
1741 	errorf(&pp_token.base.source_position,
1742 	       "'#' is not followed by a macro parameter");
1743 }
1744 
is_defineable_token(char const * const context)1745 static bool is_defineable_token(char const *const context)
1746 {
1747 	if (info.at_line_begin) {
1748 		errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1749 	}
1750 
1751 	symbol_t *const symbol = pp_token.base.symbol;
1752 	if (!symbol)
1753 		goto no_ident;
1754 
1755 	if (pp_token.kind != T_IDENTIFIER) {
1756 		switch (symbol->string[0]) {
1757 		case SYMBOL_CASES:
1758 dollar_sign:
1759 			break;
1760 
1761 		default:
1762 no_ident:
1763 			errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1764 			return false;
1765 		}
1766 	}
1767 
1768 	/* TODO turn this into a flag in pp_def. */
1769 	switch (symbol->pp_ID) {
1770 	/* §6.10.8:4 */
1771 	case TP_defined:
1772 		errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1773 		return false;
1774 
1775 	default:
1776 		return true;
1777 	}
1778 }
1779 
parse_define_directive(void)1780 static void parse_define_directive(void)
1781 {
1782 	eat_pp(TP_define);
1783 	if (skip_mode) {
1784 		eat_pp_directive();
1785 		return;
1786 	}
1787 
1788 	assert(obstack_object_size(&pp_obstack) == 0);
1789 
1790 	if (!is_defineable_token("#define"))
1791 		goto error_out;
1792 	symbol_t *const symbol = pp_token.base.symbol;
1793 
1794 	pp_definition_t *new_definition
1795 		= obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1796 	memset(new_definition, 0, sizeof(new_definition[0]));
1797 	new_definition->symbol          = symbol;
1798 	new_definition->source_position = input.position;
1799 
1800 	/* this is probably the only place where spaces are significant in the
1801 	 * lexer (except for the fact that they separate tokens). #define b(x)
1802 	 * is something else than #define b (x) */
1803 	if (input.c == '(') {
1804 		next_input_token();
1805 		eat_token('(');
1806 
1807 		while (true) {
1808 			switch (pp_token.kind) {
1809 			case T_DOTDOTDOT:
1810 				new_definition->is_variadic = true;
1811 				eat_token(T_DOTDOTDOT);
1812 				if (pp_token.kind != ')') {
1813 					errorf(&input.position,
1814 							"'...' not at end of macro argument list");
1815 					goto error_out;
1816 				}
1817 				break;
1818 
1819 			case T_IDENTIFIER: {
1820 				pp_definition_t parameter;
1821 				memset(&parameter, 0, sizeof(parameter));
1822 				parameter.source_position = pp_token.base.source_position;
1823 				parameter.symbol          = pp_token.base.symbol;
1824 				parameter.is_parameter    = true;
1825 				obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1826 				eat_token(T_IDENTIFIER);
1827 
1828 				if (pp_token.kind == ',') {
1829 					eat_token(',');
1830 					break;
1831 				}
1832 
1833 				if (pp_token.kind != ')') {
1834 					errorf(&pp_token.base.source_position,
1835 					       "expected ',' or ')' after identifier, got %K",
1836 					       &pp_token);
1837 					goto error_out;
1838 				}
1839 				break;
1840 			}
1841 
1842 			case ')':
1843 				eat_token(')');
1844 				goto finish_argument_list;
1845 
1846 			default:
1847 				errorf(&pp_token.base.source_position,
1848 				       "expected identifier, '...' or ')' in #define argument list, got %K",
1849 				       &pp_token);
1850 				goto error_out;
1851 			}
1852 		}
1853 
1854 	finish_argument_list:
1855 		new_definition->has_parameters = true;
1856 		size_t size = obstack_object_size(&pp_obstack);
1857 		new_definition->n_parameters
1858 			= size / sizeof(new_definition->parameters[0]);
1859 		new_definition->parameters = obstack_finish(&pp_obstack);
1860 		for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1861 			pp_definition_t *param    = &new_definition->parameters[i];
1862 			symbol_t        *symbol   = param->symbol;
1863 			pp_definition_t *previous = symbol->pp_definition;
1864 			if (previous != NULL
1865 			    && previous->function_definition == new_definition) {
1866 				errorf(&param->source_position,
1867 				       "duplicate macro parameter '%Y'", symbol);
1868 				param->symbol = sym_anonymous;
1869 				continue;
1870 			}
1871 			param->parent_expansion    = previous;
1872 			param->function_definition = new_definition;
1873 			symbol->pp_definition      = param;
1874 		}
1875 	} else {
1876 		next_input_token();
1877 	}
1878 
1879 	/* construct token list */
1880 	assert(obstack_object_size(&pp_obstack) == 0);
1881 	bool next_must_be_param = false;
1882 	while (!info.at_line_begin) {
1883 		if (pp_token.kind == T_IDENTIFIER) {
1884 			const symbol_t  *symbol     = pp_token.base.symbol;
1885 			pp_definition_t *definition = symbol->pp_definition;
1886 			if (definition != NULL
1887 			    && definition->function_definition == new_definition) {
1888 			    pp_token.kind                = T_MACRO_PARAMETER;
1889 			    pp_token.macro_parameter.def = definition;
1890 			}
1891 		}
1892 		if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1893 			missing_macro_param_error();
1894 		}
1895 		saved_token_t saved_token;
1896 		saved_token.token = pp_token;
1897 		saved_token.had_whitespace = info.had_whitespace;
1898 		obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1899 		next_must_be_param
1900 			= new_definition->has_parameters && pp_token.kind == '#';
1901 		next_input_token();
1902 	}
1903 	if (next_must_be_param)
1904 		missing_macro_param_error();
1905 
1906 	new_definition->list_len   = obstack_object_size(&pp_obstack)
1907 		/ sizeof(new_definition->token_list[0]);
1908 	new_definition->token_list = obstack_finish(&pp_obstack);
1909 
1910 	if (new_definition->has_parameters) {
1911 		for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1912 			pp_definition_t *param      = &new_definition->parameters[i];
1913 			symbol_t        *symbol     = param->symbol;
1914 			if (symbol == sym_anonymous)
1915 				continue;
1916 			assert(symbol->pp_definition == param);
1917 			assert(param->function_definition == new_definition);
1918 			symbol->pp_definition   = param->parent_expansion;
1919 			param->parent_expansion = NULL;
1920 		}
1921 	}
1922 
1923 	pp_definition_t *old_definition = symbol->pp_definition;
1924 	if (old_definition != NULL) {
1925 		if (!pp_definitions_equal(old_definition, new_definition)) {
1926 			warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1927 		} else {
1928 			/* reuse the old definition */
1929 			obstack_free(&pp_obstack, new_definition);
1930 			new_definition = old_definition;
1931 		}
1932 	}
1933 
1934 	symbol->pp_definition = new_definition;
1935 	return;
1936 
1937 error_out:
1938 	if (obstack_object_size(&pp_obstack) > 0) {
1939 		char *ptr = obstack_finish(&pp_obstack);
1940 		obstack_free(&pp_obstack, ptr);
1941 	}
1942 	eat_pp_directive();
1943 }
1944 
parse_undef_directive(void)1945 static void parse_undef_directive(void)
1946 {
1947 	eat_pp(TP_undef);
1948 	if (skip_mode) {
1949 		eat_pp_directive();
1950 		return;
1951 	}
1952 
1953 	if (!is_defineable_token("#undef")) {
1954 		eat_pp_directive();
1955 		return;
1956 	}
1957 
1958 	pp_token.base.symbol->pp_definition = NULL;
1959 	next_input_token();
1960 
1961 	if (!info.at_line_begin) {
1962 		warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1963 	}
1964 	eat_pp_directive();
1965 }
1966 
1967 /** behind an #include we can have the special headername lexems.
1968  * They're only allowed behind an #include so they're not recognized
1969  * by the normal next_preprocessing_token. We handle them as a special
1970  * exception here */
parse_headername(bool * system_include)1971 static const char *parse_headername(bool *system_include)
1972 {
1973 	if (info.at_line_begin) {
1974 		parse_error("expected headername after #include");
1975 		return NULL;
1976 	}
1977 
1978 	/* check wether we have a "... or <... headername */
1979 	source_position_t position = input.position;
1980 	switch (input.c) {
1981 	{
1982 		utf32 delimiter;
1983 	case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1984 	case '"': delimiter = '"'; *system_include = false; goto parse_name;
1985 parse_name:
1986 		assert(obstack_object_size(&symbol_obstack) == 0);
1987 		next_char();
1988 		while (true) {
1989 			switch (input.c) {
1990 			case NEWLINE:
1991 			case EOF:
1992 				{
1993 					char *dummy = obstack_finish(&symbol_obstack);
1994 					obstack_free(&symbol_obstack, dummy);
1995 				}
1996 				errorf(&pp_token.base.source_position,
1997 				       "header name without closing '%c'", (char)delimiter);
1998 				return NULL;
1999 
2000 			default:
2001 				if (input.c == delimiter) {
2002 					next_char();
2003 					goto finish_headername;
2004 				} else {
2005 					obstack_1grow(&symbol_obstack, (char)input.c);
2006 					next_char();
2007 				}
2008 				break;
2009 			}
2010 		}
2011 		/* we should never be here */
2012 	}
2013 
2014 	default:
2015 		next_preprocessing_token();
2016 		if (info.at_line_begin) {
2017 			/* TODO: if we are already in the new line then we parsed more than
2018 			 * wanted. We reuse the token, but could produce following errors
2019 			 * misbehaviours... */
2020 			goto error_invalid_input;
2021 		}
2022 		if (pp_token.kind == T_STRING_LITERAL) {
2023 			*system_include = false;
2024 			return pp_token.literal.string.begin;
2025 		} else if (pp_token.kind == '<') {
2026 			*system_include = true;
2027 			assert(obstack_object_size(&pp_obstack) == 0);
2028 			while (true) {
2029 				next_preprocessing_token();
2030 				if (info.at_line_begin) {
2031 					/* TODO: we shouldn't have parsed/expanded something on the
2032 					 * next line yet... */
2033 					char *dummy = obstack_finish(&pp_obstack);
2034 					obstack_free(&pp_obstack, dummy);
2035 					goto error_invalid_input;
2036 				}
2037 				if (pp_token.kind == '>')
2038 					break;
2039 
2040 				saved_token_t saved;
2041 				saved.token          = pp_token;
2042 				saved.had_whitespace = info.had_whitespace;
2043 				obstack_grow(&pp_obstack, &saved, sizeof(saved));
2044 			}
2045 			size_t size = obstack_object_size(&pp_obstack);
2046 			assert(size % sizeof(saved_token_t) == 0);
2047 			size_t n_tokens = size / sizeof(saved_token_t);
2048 			saved_token_t *tokens = obstack_finish(&pp_obstack);
2049 			assert(obstack_object_size(&symbol_obstack) == 0);
2050 			for (size_t i = 0; i < n_tokens; ++i) {
2051 				const saved_token_t *saved = &tokens[i];
2052 				if (i > 0 && saved->had_whitespace)
2053 					obstack_1grow(&symbol_obstack, ' ');
2054 				grow_token(&symbol_obstack, &saved->token);
2055 			}
2056 			obstack_free(&pp_obstack, tokens);
2057 			goto finish_headername;
2058 		} else {
2059 error_invalid_input:
2060 			{
2061 				char *dummy = obstack_finish(&symbol_obstack);
2062 				obstack_free(&symbol_obstack, dummy);
2063 			}
2064 
2065 			errorf(&pp_token.base.source_position,
2066 			       "expected \"FILENAME\" or <FILENAME> after #include");
2067 			return NULL;
2068 		}
2069 	}
2070 
2071 finish_headername:
2072 	obstack_1grow(&symbol_obstack, '\0');
2073 	char *const  headername = obstack_finish(&symbol_obstack);
2074 	const char  *identified = identify_string(headername);
2075 	pp_token.base.source_position = position;
2076 	return identified;
2077 }
2078 
do_include(bool const bracket_include,bool const include_next,char const * const headername)2079 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2080 {
2081 	size_t const        headername_len = strlen(headername);
2082 	searchpath_entry_t *entry;
2083 	if (include_next) {
2084 		entry = input.path      ? input.path->next
2085 		      : bracket_include ? bracket_searchpath.first
2086 		      : quote_searchpath.first;
2087 	} else {
2088 		if (!bracket_include) {
2089 			/* put dirname of current input on obstack */
2090 			const char *filename   = input.position.input_name;
2091 			const char *last_slash = strrchr(filename, '/');
2092 			const char *full_name;
2093 			if (last_slash != NULL) {
2094 				size_t len = last_slash - filename;
2095 				obstack_grow(&symbol_obstack, filename, len + 1);
2096 				obstack_grow0(&symbol_obstack, headername, headername_len);
2097 				char *complete_path = obstack_finish(&symbol_obstack);
2098 				full_name = identify_string(complete_path);
2099 			} else {
2100 				full_name = headername;
2101 			}
2102 
2103 			FILE *file = fopen(full_name, "r");
2104 			if (file != NULL) {
2105 				switch_pp_input(file, full_name, NULL, false);
2106 				return true;
2107 			}
2108 			entry = quote_searchpath.first;
2109 		} else {
2110 			entry = bracket_searchpath.first;
2111 		}
2112 	}
2113 
2114 	assert(obstack_object_size(&symbol_obstack) == 0);
2115 	/* check searchpath */
2116 	for (; entry; entry = entry->next) {
2117 	    const char *path = entry->path;
2118 	    size_t      len  = strlen(path);
2119 		obstack_grow(&symbol_obstack, path, len);
2120 		if (path[len-1] != '/')
2121 			obstack_1grow(&symbol_obstack, '/');
2122 		obstack_grow(&symbol_obstack, headername, headername_len+1);
2123 
2124 		char *complete_path = obstack_finish(&symbol_obstack);
2125 		FILE *file          = fopen(complete_path, "r");
2126 		if (file != NULL) {
2127 			const char *filename = identify_string(complete_path);
2128 			switch_pp_input(file, filename, entry, entry->is_system_path);
2129 			return true;
2130 		} else {
2131 			obstack_free(&symbol_obstack, complete_path);
2132 		}
2133 	}
2134 
2135 	return false;
2136 }
2137 
parse_include_directive(bool const include_next)2138 static void parse_include_directive(bool const include_next)
2139 {
2140 	if (skip_mode) {
2141 		eat_pp_directive();
2142 		return;
2143 	}
2144 
2145 	/* do not eat the TP_include, since it would already parse the next token
2146 	 * which needs special handling here. */
2147 	skip_till_newline(true);
2148 	bool system_include;
2149 	const char *headername = parse_headername(&system_include);
2150 	if (headername == NULL) {
2151 		eat_pp_directive();
2152 		return;
2153 	}
2154 
2155 	bool had_nonwhitespace = skip_till_newline(false);
2156 	if (had_nonwhitespace) {
2157 		warningf(WARN_OTHER, &input.position,
2158 		         "extra tokens at end of #include directive");
2159 	}
2160 
2161 	if (n_inputs > INCLUDE_LIMIT) {
2162 		errorf(&pp_token.base.source_position, "#include nested too deeply");
2163 		/* eat \n or EOF */
2164 		next_input_token();
2165 		return;
2166 	}
2167 
2168 	/* switch inputs */
2169 	info.whitespace_at_line_begin = 0;
2170 	info.had_whitespace           = false;
2171 	info.at_line_begin            = true;
2172 	emit_newlines();
2173 	push_input();
2174 	bool res = do_include(system_include, include_next, headername);
2175 	if (res) {
2176 		next_input_token();
2177 	} else {
2178 		errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2179 		pop_restore_input();
2180 	}
2181 }
2182 
push_conditional(void)2183 static pp_conditional_t *push_conditional(void)
2184 {
2185 	pp_conditional_t *conditional
2186 		= obstack_alloc(&pp_obstack, sizeof(*conditional));
2187 	memset(conditional, 0, sizeof(*conditional));
2188 
2189 	conditional->parent = conditional_stack;
2190 	conditional_stack   = conditional;
2191 
2192 	return conditional;
2193 }
2194 
pop_conditional(void)2195 static void pop_conditional(void)
2196 {
2197 	assert(conditional_stack != NULL);
2198 	conditional_stack = conditional_stack->parent;
2199 }
2200 
check_unclosed_conditionals(void)2201 void check_unclosed_conditionals(void)
2202 {
2203 	while (conditional_stack != NULL) {
2204 		pp_conditional_t *conditional = conditional_stack;
2205 
2206 		if (conditional->in_else) {
2207 			errorf(&conditional->source_position, "unterminated #else");
2208 		} else {
2209 			errorf(&conditional->source_position, "unterminated condition");
2210 		}
2211 		pop_conditional();
2212 	}
2213 }
2214 
parse_ifdef_ifndef_directive(bool const is_ifdef)2215 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2216 {
2217 	bool condition;
2218 	eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2219 
2220 	if (skip_mode) {
2221 		eat_pp_directive();
2222 		pp_conditional_t *conditional = push_conditional();
2223 		conditional->source_position  = pp_token.base.source_position;
2224 		conditional->skip             = true;
2225 		return;
2226 	}
2227 
2228 	if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2229 		errorf(&pp_token.base.source_position,
2230 		       "expected identifier after #%s, got %K",
2231 		       is_ifdef ? "ifdef" : "ifndef", &pp_token);
2232 		eat_pp_directive();
2233 
2234 		/* just take the true case in the hope to avoid further errors */
2235 		condition = true;
2236 	} else {
2237 		/* evaluate wether we are in true or false case */
2238 		condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2239 		eat_token(T_IDENTIFIER);
2240 
2241 		if (!info.at_line_begin) {
2242 			errorf(&pp_token.base.source_position,
2243 			       "extra tokens at end of #%s",
2244 			       is_ifdef ? "ifdef" : "ifndef");
2245 			eat_pp_directive();
2246 		}
2247 	}
2248 
2249 	pp_conditional_t *conditional = push_conditional();
2250 	conditional->source_position  = pp_token.base.source_position;
2251 	conditional->condition        = condition;
2252 
2253 	if (!condition) {
2254 		skip_mode = true;
2255 	}
2256 }
2257 
parse_else_directive(void)2258 static void parse_else_directive(void)
2259 {
2260 	eat_pp(TP_else);
2261 
2262 	if (!info.at_line_begin) {
2263 		if (!skip_mode) {
2264 			warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2265 		}
2266 		eat_pp_directive();
2267 	}
2268 
2269 	pp_conditional_t *conditional = conditional_stack;
2270 	if (conditional == NULL) {
2271 		errorf(&pp_token.base.source_position, "#else without prior #if");
2272 		return;
2273 	}
2274 
2275 	if (conditional->in_else) {
2276 		errorf(&pp_token.base.source_position,
2277 		       "#else after #else (condition started %P)",
2278 		       &conditional->source_position);
2279 		skip_mode = true;
2280 		return;
2281 	}
2282 
2283 	conditional->in_else = true;
2284 	if (!conditional->skip) {
2285 		skip_mode = conditional->condition;
2286 	}
2287 	conditional->source_position = pp_token.base.source_position;
2288 }
2289 
parse_endif_directive(void)2290 static void parse_endif_directive(void)
2291 {
2292 	eat_pp(TP_endif);
2293 
2294 	if (!info.at_line_begin) {
2295 		if (!skip_mode) {
2296 			warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2297 		}
2298 		eat_pp_directive();
2299 	}
2300 
2301 	pp_conditional_t *conditional = conditional_stack;
2302 	if (conditional == NULL) {
2303 		errorf(&pp_token.base.source_position, "#endif without prior #if");
2304 		return;
2305 	}
2306 
2307 	if (!conditional->skip) {
2308 		skip_mode = false;
2309 	}
2310 	pop_conditional();
2311 }
2312 
2313 typedef enum stdc_pragma_kind_t {
2314 	STDC_UNKNOWN,
2315 	STDC_FP_CONTRACT,
2316 	STDC_FENV_ACCESS,
2317 	STDC_CX_LIMITED_RANGE
2318 } stdc_pragma_kind_t;
2319 
2320 typedef enum stdc_pragma_value_kind_t {
2321 	STDC_VALUE_UNKNOWN,
2322 	STDC_VALUE_ON,
2323 	STDC_VALUE_OFF,
2324 	STDC_VALUE_DEFAULT
2325 } stdc_pragma_value_kind_t;
2326 
parse_pragma_directive(void)2327 static void parse_pragma_directive(void)
2328 {
2329 	eat_pp(TP_pragma);
2330 	if (skip_mode) {
2331 		eat_pp_directive();
2332 		return;
2333 	}
2334 
2335 	if (pp_token.kind != T_IDENTIFIER) {
2336 		warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2337 		         "expected identifier after #pragma");
2338 		eat_pp_directive();
2339 		return;
2340 	}
2341 
2342 	stdc_pragma_kind_t kind = STDC_UNKNOWN;
2343 	if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2344 		/* a STDC pragma */
2345 		next_input_token();
2346 
2347 		switch (pp_token.base.symbol->pp_ID) {
2348 		case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2349 		case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2350 		case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2351 		default:                  break;
2352 		}
2353 		if (kind != STDC_UNKNOWN) {
2354 			next_input_token();
2355 			stdc_pragma_value_kind_t value;
2356 			switch (pp_token.base.symbol->pp_ID) {
2357 			case TP_ON:      value = STDC_VALUE_ON;      break;
2358 			case TP_OFF:     value = STDC_VALUE_OFF;     break;
2359 			case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2360 			default:         value = STDC_VALUE_UNKNOWN; break;
2361 			}
2362 			if (value == STDC_VALUE_UNKNOWN) {
2363 				kind = STDC_UNKNOWN;
2364 				errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2365 			}
2366 		}
2367 	}
2368 	eat_pp_directive();
2369 	if (kind == STDC_UNKNOWN) {
2370 		warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2371 		         "encountered unknown #pragma");
2372 	}
2373 }
2374 
parse_line_directive(void)2375 static void parse_line_directive(void)
2376 {
2377 	if (pp_token.kind != T_NUMBER) {
2378 		if (!skip_mode)
2379 			parse_error("expected integer");
2380 	} else {
2381 		char      *end;
2382 		long const line = strtol(pp_token.literal.string.begin, &end, 0);
2383 		if (*end == '\0') {
2384 			/* use offset -1 as this is about the next line */
2385 			input.position.lineno = line - 1;
2386 			/* force output of line */
2387 			input.output_line = input.position.lineno - 20;
2388 		} else {
2389 			if (!skip_mode) {
2390 				errorf(&input.position, "'%S' is not a valid line number",
2391 					   &pp_token.literal.string);
2392 			}
2393 		}
2394 		next_input_token();
2395 		if (info.at_line_begin)
2396 			return;
2397 	}
2398 	if (pp_token.kind == T_STRING_LITERAL
2399 	    && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2400 		input.position.input_name       = pp_token.literal.string.begin;
2401 		input.position.is_system_header = false;
2402 		next_input_token();
2403 
2404 		/* attempt to parse numeric flags as outputted by gcc preprocessor */
2405 		while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2406 			/* flags:
2407 			 * 1 - indicates start of a new file
2408 			 * 2 - indicates return from a file
2409 			 * 3 - indicates system header
2410 			 * 4 - indicates implicit extern "C" in C++ mode
2411 			 *
2412 			 * currently we're only interested in "3"
2413 			 */
2414 			if (streq(pp_token.literal.string.begin, "3")) {
2415 				input.position.is_system_header = true;
2416 			}
2417 			next_input_token();
2418 		}
2419 	}
2420 
2421 	eat_pp_directive();
2422 }
2423 
parse_error_directive(void)2424 static void parse_error_directive(void)
2425 {
2426 	if (skip_mode) {
2427 		eat_pp_directive();
2428 		return;
2429 	}
2430 
2431 	bool const old_resolve_escape_sequences = resolve_escape_sequences;
2432 	resolve_escape_sequences = false;
2433 
2434 	source_position_t const pos = pp_token.base.source_position;
2435 	do {
2436 		if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2437 			obstack_1grow(&pp_obstack, ' ');
2438 
2439 		switch (pp_token.kind) {
2440 		case T_NUMBER: {
2441 			string_t const *const str = &pp_token.literal.string;
2442 			obstack_grow(&pp_obstack, str->begin, str->size);
2443 			break;
2444 		}
2445 
2446 		{
2447 			char delim;
2448 		case T_STRING_LITERAL:     delim =  '"'; goto string;
2449 		case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2450 string:;
2451 			string_t const *const str = &pp_token.literal.string;
2452 			char     const *const enc = get_string_encoding_prefix(str->encoding);
2453 			obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2454 			break;
2455 		}
2456 
2457 		default: {
2458 			char const *const str = pp_token.base.symbol->string;
2459 			obstack_grow(&pp_obstack, str, strlen(str));
2460 			break;
2461 		}
2462 		}
2463 
2464 		next_input_token();
2465 	} while (!info.at_line_begin);
2466 
2467 	resolve_escape_sequences = old_resolve_escape_sequences;
2468 
2469 	obstack_1grow(&pp_obstack, '\0');
2470 	char *const str = obstack_finish(&pp_obstack);
2471 	errorf(&pos, "#%s", str);
2472 	obstack_free(&pp_obstack, str);
2473 }
2474 
parse_preprocessing_directive(void)2475 static void parse_preprocessing_directive(void)
2476 {
2477 	eat_token('#');
2478 
2479 	if (info.at_line_begin) {
2480 		/* empty directive */
2481 		return;
2482 	}
2483 
2484 	if (pp_token.base.symbol) {
2485 		switch (pp_token.base.symbol->pp_ID) {
2486 		case TP_define:       parse_define_directive();            break;
2487 		case TP_else:         parse_else_directive();              break;
2488 		case TP_endif:        parse_endif_directive();             break;
2489 		case TP_error:        parse_error_directive();             break;
2490 		case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2491 		case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2492 		case TP_include:      parse_include_directive(false);      break;
2493 		case TP_include_next: parse_include_directive(true);       break;
2494 		case TP_line:         next_input_token(); goto line_directive;
2495 		case TP_pragma:       parse_pragma_directive();            break;
2496 		case TP_undef:        parse_undef_directive();             break;
2497 		default:              goto skip;
2498 		}
2499 	} else if (pp_token.kind == T_NUMBER) {
2500 line_directive:
2501 		parse_line_directive();
2502 	} else {
2503 skip:
2504 		if (!skip_mode) {
2505 			errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2506 		}
2507 		eat_pp_directive();
2508 	}
2509 
2510 	assert(info.at_line_begin);
2511 }
2512 
finish_current_argument(void)2513 static void finish_current_argument(void)
2514 {
2515 	if (current_argument == NULL)
2516 		return;
2517 	size_t size = obstack_object_size(&pp_obstack);
2518 	current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2519 	current_argument->token_list = obstack_finish(&pp_obstack);
2520 }
2521 
next_preprocessing_token(void)2522 void next_preprocessing_token(void)
2523 {
2524 restart:
2525 	if (!expand_next()) {
2526 		do {
2527 			next_input_token();
2528 			while (pp_token.kind == '#' && info.at_line_begin) {
2529 				parse_preprocessing_directive();
2530 			}
2531 		} while (skip_mode && pp_token.kind != T_EOF);
2532 	}
2533 
2534 	const token_kind_t kind = pp_token.kind;
2535 	if (current_call == NULL || argument_expanding != NULL) {
2536 		symbol_t *const symbol = pp_token.base.symbol;
2537 		if (symbol) {
2538 			if (kind == T_MACRO_PARAMETER) {
2539 				assert(current_expansion != NULL);
2540 				start_expanding(pp_token.macro_parameter.def);
2541 				goto restart;
2542 			}
2543 
2544 			pp_definition_t *const pp_definition = symbol->pp_definition;
2545 			if (pp_definition != NULL && !pp_definition->is_expanding) {
2546 				if (pp_definition->has_parameters) {
2547 
2548 					/* check if next token is a '(' */
2549 					whitespace_info_t old_info   = info;
2550 					token_kind_t      next_token = peek_expansion();
2551 					if (next_token == T_EOF) {
2552 						info.at_line_begin  = false;
2553 						info.had_whitespace = false;
2554 						skip_whitespace();
2555 						if (input.c == '(') {
2556 							next_token = '(';
2557 						}
2558 					}
2559 
2560 					if (next_token == '(') {
2561 						if (current_expansion == NULL)
2562 							expansion_pos = pp_token.base.source_position;
2563 						next_preprocessing_token();
2564 						assert(pp_token.kind == '(');
2565 
2566 						pp_definition->parent_expansion = current_expansion;
2567 						current_call              = pp_definition;
2568 						current_call->expand_pos  = 0;
2569 						current_call->expand_info = old_info;
2570 						if (current_call->n_parameters > 0) {
2571 							current_argument = &current_call->parameters[0];
2572 							assert(argument_brace_count == 0);
2573 						}
2574 						goto restart;
2575 					} else {
2576 						/* skip_whitespaces() skipped newlines and whitespace,
2577 						 * remember results for next token */
2578 						next_info = info;
2579 						info      = old_info;
2580 						return;
2581 					}
2582 				} else {
2583 					if (current_expansion == NULL)
2584 						expansion_pos = pp_token.base.source_position;
2585 					start_expanding(pp_definition);
2586 					goto restart;
2587 				}
2588 			}
2589 		}
2590 	}
2591 
2592 	if (current_call != NULL) {
2593 		/* current_call != NULL */
2594 		if (kind == '(') {
2595 			++argument_brace_count;
2596 		} else if (kind == ')') {
2597 			if (argument_brace_count > 0) {
2598 				--argument_brace_count;
2599 			} else {
2600 				finish_current_argument();
2601 				assert(kind == ')');
2602 				start_expanding(current_call);
2603 				info = current_call->expand_info;
2604 				current_call     = NULL;
2605 				current_argument = NULL;
2606 				goto restart;
2607 			}
2608 		} else if (kind == ',' && argument_brace_count == 0) {
2609 			finish_current_argument();
2610 			current_call->expand_pos++;
2611 			if (current_call->expand_pos >= current_call->n_parameters) {
2612 				errorf(&pp_token.base.source_position,
2613 					   "too many arguments passed for macro '%Y'",
2614 					   current_call->symbol);
2615 				current_argument = NULL;
2616 			} else {
2617 				current_argument
2618 					= &current_call->parameters[current_call->expand_pos];
2619 			}
2620 			goto restart;
2621 		} else if (kind == T_MACRO_PARAMETER) {
2622 			/* parameters have to be fully expanded before being used as
2623 			 * parameters for another macro-call */
2624 			assert(current_expansion != NULL);
2625 			pp_definition_t *argument = pp_token.macro_parameter.def;
2626 			argument_expanding = argument;
2627 			start_expanding(argument);
2628 			goto restart;
2629 		} else if (kind == T_EOF) {
2630 			errorf(&expansion_pos,
2631 			       "reached end of file while parsing arguments for '%Y'",
2632 			       current_call->symbol);
2633 			return;
2634 		}
2635 		if (current_argument != NULL) {
2636 			saved_token_t saved;
2637 			saved.token = pp_token;
2638 			saved.had_whitespace = info.had_whitespace;
2639 			obstack_grow(&pp_obstack, &saved, sizeof(saved));
2640 		}
2641 		goto restart;
2642 	}
2643 }
2644 
append_include_path(searchpath_t * paths,const char * path)2645 void append_include_path(searchpath_t *paths, const char *path)
2646 {
2647 	searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2648 	entry->path           = path;
2649 	entry->is_system_path = paths->is_system_path;
2650 
2651 	*paths->anchor = entry;
2652 	paths->anchor  = &entry->next;
2653 }
2654 
append_env_paths(searchpath_t * paths,const char * envvar)2655 static void append_env_paths(searchpath_t *paths, const char *envvar)
2656 {
2657 	const char *val = getenv(envvar);
2658 	if (val != NULL && *val != '\0') {
2659 		const char *begin = val;
2660 		const char *c;
2661 		do {
2662 			c = begin;
2663 			while (*c != '\0' && *c != ':')
2664 				++c;
2665 
2666 			size_t len = c-begin;
2667 			if (len == 0) {
2668 				/* use "." for gcc compatibility (Matze: I would expect that
2669 				 * nothing happens for an empty entry...) */
2670 				append_include_path(paths, ".");
2671 			} else {
2672 				char *const string = obstack_copy0(&config_obstack, begin, len);
2673 				append_include_path(paths, string);
2674 			}
2675 
2676 			begin = c+1;
2677 			/* skip : */
2678 			if (*begin == ':')
2679 				++begin;
2680 		} while(*c != '\0');
2681 	}
2682 }
2683 
append_searchpath(searchpath_t * path,const searchpath_t * append)2684 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2685 {
2686 	*path->anchor = append->first;
2687 }
2688 
setup_include_path(void)2689 static void setup_include_path(void)
2690 {
2691 	/* built-in paths */
2692 	append_include_path(&system_searchpath, "/usr/include");
2693 
2694 	/* parse environment variable */
2695 	append_env_paths(&bracket_searchpath, "CPATH");
2696 	append_env_paths(&system_searchpath,
2697 	                 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2698 
2699 	/* append system search path to bracket searchpath */
2700 	append_searchpath(&system_searchpath,  &after_searchpath);
2701 	append_searchpath(&bracket_searchpath, &system_searchpath);
2702 	append_searchpath(&quote_searchpath, &bracket_searchpath);
2703 }
2704 
input_error(unsigned const delta_lines,unsigned const delta_cols,char const * const message)2705 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2706 {
2707 	source_position_t pos = pp_token.base.source_position;
2708 	pos.lineno += delta_lines;
2709 	pos.colno  += delta_cols;
2710 	errorf(&pos, "%s", message);
2711 }
2712 
init_include_paths(void)2713 void init_include_paths(void)
2714 {
2715 	obstack_init(&config_obstack);
2716 }
2717 
init_preprocessor(void)2718 void init_preprocessor(void)
2719 {
2720 	init_symbols();
2721 
2722 	obstack_init(&pp_obstack);
2723 	obstack_init(&input_obstack);
2724 	strset_init(&stringset);
2725 
2726 	setup_include_path();
2727 
2728 	set_input_error_callback(input_error);
2729 }
2730 
exit_preprocessor(void)2731 void exit_preprocessor(void)
2732 {
2733 	obstack_free(&input_obstack, NULL);
2734 	obstack_free(&pp_obstack, NULL);
2735 	obstack_free(&config_obstack, NULL);
2736 
2737 	strset_destroy(&stringset);
2738 }
2739 
2740 int pptest_main(int argc, char **argv);
pptest_main(int argc,char ** argv)2741 int pptest_main(int argc, char **argv)
2742 {
2743 	init_symbol_table();
2744 	init_include_paths();
2745 	init_preprocessor();
2746 	init_tokens();
2747 
2748 	error_on_unknown_chars   = false;
2749 	resolve_escape_sequences = false;
2750 
2751 	/* simplistic commandline parser */
2752 	const char *filename = NULL;
2753 	const char *output = NULL;
2754 	for (int i = 1; i < argc; ++i) {
2755 		const char *opt = argv[i];
2756 		if (streq(opt, "-I")) {
2757 			append_include_path(&bracket_searchpath, argv[++i]);
2758 			continue;
2759 		} else if (streq(opt, "-E")) {
2760 			/* ignore */
2761 		} else if (streq(opt, "-o")) {
2762 			output = argv[++i];
2763 			continue;
2764 		} else if (opt[0] == '-') {
2765 			fprintf(stderr, "Unknown option '%s'\n", opt);
2766 		} else {
2767 			if (filename != NULL)
2768 				fprintf(stderr, "Multiple inputs not supported\n");
2769 			filename = argv[i];
2770 		}
2771 	}
2772 	if (filename == NULL) {
2773 		fprintf(stderr, "No input specified\n");
2774 		return 1;
2775 	}
2776 
2777 	if (output == NULL) {
2778 		out = stdout;
2779 	} else {
2780 		out = fopen(output, "w");
2781 		if (out == NULL) {
2782 			fprintf(stderr, "Couldn't open output '%s'\n", output);
2783 			return 1;
2784 		}
2785 	}
2786 
2787 	/* just here for gcc compatibility */
2788 	fprintf(out, "# 1 \"%s\"\n", filename);
2789 	fprintf(out, "# 1 \"<built-in>\"\n");
2790 	fprintf(out, "# 1 \"<command-line>\"\n");
2791 
2792 	FILE *file = fopen(filename, "r");
2793 	if (file == NULL) {
2794 		fprintf(stderr, "Couldn't open input '%s'\n", filename);
2795 		return 1;
2796 	}
2797 	switch_pp_input(file, filename, NULL, false);
2798 
2799 	for (;;) {
2800 		next_preprocessing_token();
2801 		if (pp_token.kind == T_EOF)
2802 			break;
2803 		emit_pp_token();
2804 	}
2805 
2806 	fputc('\n', out);
2807 	check_unclosed_conditionals();
2808 	fclose(close_pp_input());
2809 	if (out != stdout)
2810 		fclose(out);
2811 
2812 	exit_tokens();
2813 	exit_preprocessor();
2814 	exit_symbol_table();
2815 
2816 	return 0;
2817 }
2818