1 /*
2  *  TCC - Tiny C Compiler
3  *
4  *  Copyright (c) 2001-2004 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 
21 #include "tcc.h"
22 #include <math.h>
23 /********************************************************/
24 /* global variables */
25 
26 ST_DATA int tok_flags;
27 /* additional informations about token */
28 #define TOK_FLAG_BOL   0x0001	/* beginning of line before */
29 #define TOK_FLAG_BOF   0x0002	/* beginning of file before */
30 #define TOK_FLAG_ENDIF 0x0004	/* a endif was found matching starting #ifdef */
31 #define TOK_FLAG_EOF   0x0008	/* end of file */
32 
33 ST_DATA int parse_flags;
34 #define PARSE_FLAG_PREPROCESS 0x0001	/* activate preprocessing */
35 #define PARSE_FLAG_TOK_NUM    0x0002	/* return numbers instead of TOK_PPNUM */
36 #define PARSE_FLAG_LINEFEED   0x0004	/* line feed is returned as a
37 					token. line feed is also
38 					returned at eof */
39 #define PARSE_FLAG_ASM_COMMENTS 0x0008	/* '#' can be used for line comment */
40 #define PARSE_FLAG_SPACES     0x0010	/* next() returns space tokens (for -E) */
41 
42 ST_DATA struct BufferedFile *file;
43 ST_DATA int ch, tok;
44 ST_DATA CValue tokc;
45 ST_DATA const int *macro_ptr;
46 ST_DATA CString tokcstr;/* current parsed string, if any */
47 
48 /* display benchmark infos */
49 ST_DATA int total_lines;
50 ST_DATA int total_bytes;
51 ST_DATA int tok_ident;
52 ST_DATA TokenSym **table_ident;
53 
54 /* ------------------------------------------------------------------------- */
55 
56 static int *macro_ptr_allocated;
57 static const int *unget_saved_macro_ptr;
58 static int unget_saved_buffer[TOK_MAX_SIZE + 1];
59 static int unget_buffer_enabled;
60 static TokenSym *hash_ident[TOK_HASH_SIZE];
61 static char token_buf[STRING_MAX_SIZE + 1];
62 /* true if isid(c) || isnum(c) || isdot(c) */
63 static unsigned char isidnum_table[256 - CH_EOF];
64 
65 static const char tcc_keywords[] =
66 #define DEF(id, str) str "\0"
67 #include "tcctok.h"
68 #undef DEF
69 ;
70 
71 /* WARNING: the content of this string encodes token numbers */
72 static const unsigned char tok_two_chars[] =
73 	"<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253"
74 	"-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266";
75 
76 struct macro_level {
77 	struct macro_level *prev;
78 	const int *p;
79 };
80 
81 static void next_nomacro_spc(void);
82 static void macro_subst(
83 	TokenString *tok_str,
84 	Sym **nested_list,
85 	const int *macro_str,
86 	struct macro_level **can_read_stream
87 );
88 
skip(int c)89 ST_FUNC void skip(int c)
90 {
91 	if (tok != c) {
92 		tcc_error ("'%c' expected (got \"%s\")", c, get_tok_str (tok, &tokc));
93 	}
94 	next ();
95 }
96 
expect(const char * msg)97 ST_FUNC void expect(const char *msg)
98 {
99 	tcc_error ("%s expected", msg);
100 }
101 
102 /* ------------------------------------------------------------------------- */
103 /* CString handling */
cstr_realloc(CString * cstr,int new_size)104 static void cstr_realloc(CString *cstr, int new_size)
105 {
106 	int size;
107 	void *data;
108 
109 	size = cstr->size_allocated;
110 	if (size == 0) {
111 		size = 8;	/* no need to allocate a too small first string */
112 	}
113 	while (size < new_size)
114 		size = size * 2;
115 	data = realloc (cstr->data_allocated, size);
116 	cstr->data_allocated = data;
117 	cstr->size_allocated = size;
118 	cstr->data = data;
119 }
120 
121 /* add a byte */
cstr_ccat(CString * cstr,int ch)122 ST_FUNC void cstr_ccat(CString *cstr, int ch)
123 {
124 	int size;
125 	size = cstr->size + 1;
126 	if (size > cstr->size_allocated) {
127 		cstr_realloc (cstr, size);
128 	}
129 	((unsigned char *) cstr->data)[size - 1] = ch;
130 	cstr->size = size;
131 }
132 
cstr_cat(CString * cstr,const char * str)133 ST_FUNC void cstr_cat(CString *cstr, const char *str)
134 {
135 	int c;
136 	for (;;) {
137 		c = *str;
138 		if (c == '\0') {
139 			break;
140 		}
141 		cstr_ccat (cstr, c);
142 		str++;
143 	}
144 }
145 
146 /* add a wide char */
cstr_wccat(CString * cstr,int ch)147 ST_FUNC void cstr_wccat(CString *cstr, int ch)
148 {
149 	int size;
150 	size = cstr->size + sizeof(nwchar_t);
151 	if (size > cstr->size_allocated) {
152 		cstr_realloc (cstr, size);
153 	}
154 	*(nwchar_t *) (((unsigned char *) cstr->data) + size - sizeof(nwchar_t)) = ch;
155 	cstr->size = size;
156 }
157 
cstr_new(CString * cstr)158 ST_FUNC void cstr_new(CString *cstr)
159 {
160 	memset (cstr, 0, sizeof(CString));
161 }
162 
163 /* free string and reset it to NULL */
cstr_free(CString * cstr)164 ST_FUNC void cstr_free(CString *cstr)
165 {
166 	free (cstr->data_allocated);
167 	cstr_new (cstr);
168 }
169 
170 /* reset string to empty */
cstr_reset(CString * cstr)171 ST_FUNC void cstr_reset(CString *cstr)
172 {
173 	cstr->size = 0;
174 }
175 
176 /* XXX: unicode ? */
add_char(CString * cstr,int c)177 static void add_char(CString *cstr, int c)
178 {
179 	if (c == '\'' || c == '\"' || c == '\\') {
180 		/* XXX: could be more precise if char or string */
181 		cstr_ccat (cstr, '\\');
182 	}
183 	if (c >= 32 && c <= 126) {
184 		cstr_ccat (cstr, c);
185 	} else {
186 		cstr_ccat (cstr, '\\');
187 		if (c == '\n') {
188 			cstr_ccat (cstr, 'n');
189 		} else {
190 			cstr_ccat (cstr, '0' + ((c >> 6) & 7));
191 			cstr_ccat (cstr, '0' + ((c >> 3) & 7));
192 			cstr_ccat (cstr, '0' + (c & 7));
193 		}
194 	}
195 }
196 
197 /* ------------------------------------------------------------------------- */
198 /* allocate a new token */
tok_alloc_new(TokenSym ** pts,const char * str,int len)199 static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len)
200 {
201 	TokenSym *ts, **ptable;
202 	int i;
203 
204 	if (tok_ident >= SYM_FIRST_ANOM) {
205 		tcc_error ("memory full");
206 	}
207 
208 	/* expand token table if needed */
209 	i = tok_ident - TOK_IDENT;
210 	if ((i % TOK_ALLOC_INCR) == 0) {
211 		ptable = realloc (table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *));
212 		table_ident = ptable;
213 	}
214 	ts = malloc (sizeof(TokenSym) + len);
215 	table_ident[i] = ts;
216 	ts->tok = tok_ident++;
217 	ts->sym_define = NULL;
218 	ts->sym_label = NULL;
219 	ts->sym_struct = NULL;
220 	ts->sym_identifier = NULL;
221 	ts->len = len;
222 	ts->hash_next = NULL;
223 	memcpy (ts->str, str, len);
224 	ts->str[len] = '\0';
225 	*pts = ts;
226 	return ts;
227 }
228 
229 #define TOK_HASH_INIT 1
230 #define TOK_HASH_FUNC(h, c) ((h) * 263 + (c))
231 
232 /* find a token and add it if not found */
tok_alloc(const char * str,int len)233 ST_FUNC TokenSym *tok_alloc(const char *str, int len)
234 {
235 	TokenSym *ts, **pts;
236 	int i;
237 	unsigned int h;
238 
239 	h = TOK_HASH_INIT;
240 	for (i = 0; i < len; i++) {
241 		h = TOK_HASH_FUNC (h, ((unsigned char *) str)[i]);
242 	}
243 	h &= (TOK_HASH_SIZE - 1);
244 
245 	pts = &hash_ident[h];
246 	for (;;) {
247 		ts = *pts;
248 		if (!ts) {
249 			break;
250 		}
251 		if (ts->len == len && !memcmp (ts->str, str, len)) {
252 			return ts;
253 		}
254 		pts = &(ts->hash_next);
255 	}
256 	return tok_alloc_new (pts, str, len);
257 }
258 
259 /* XXX: buffer overflow */
260 /* XXX: float tokens */
get_tok_str(int v,CValue * cv)261 ST_FUNC char *get_tok_str(int v, CValue *cv)
262 {
263 	static char buf[STRING_MAX_SIZE + 1];
264 	static CString cstr_buf;
265 	CString *cstr;
266 	char *p;
267 	int i, len;
268 
269 	/* NOTE: to go faster, we give a fixed buffer for small strings */
270 	cstr_reset (&cstr_buf);
271 	cstr_buf.data = buf;
272 	cstr_buf.size_allocated = sizeof(buf);
273 	p = buf;
274 
275 	switch (v) {
276 	case TOK_CINT:
277 	case TOK_CUINT:
278 		/* XXX: not quite exact, but only useful for testing */
279 		if (cv) {
280 			sprintf (p, "%u", cv->ui);
281 		}
282 		break;
283 	case TOK_CLLONG:
284 	case TOK_CULLONG:
285 		/* XXX: not quite exact, but only useful for testing  */
286 		if (cv) {
287 			sprintf (p, "%"PFMT64u, cv->ull);
288 		}
289 		break;
290 	case TOK_LCHAR:
291 		cstr_ccat (&cstr_buf, 'L');
292 	case TOK_CCHAR:
293 		cstr_ccat (&cstr_buf, '\'');
294 		if (cv) {
295 			add_char (&cstr_buf, cv->i);
296 		}
297 		cstr_ccat (&cstr_buf, '\'');
298 		cstr_ccat (&cstr_buf, '\0');
299 		break;
300 	case TOK_PPNUM:
301 		cstr = cv->cstr;
302 		len = cstr->size - 1;
303 		for (i = 0; i < len; i++) {
304 			add_char (&cstr_buf, ((unsigned char *) cstr->data)[i]);
305 		}
306 		cstr_ccat (&cstr_buf, '\0');
307 		break;
308 	case TOK_LSTR:
309 		cstr_ccat (&cstr_buf, 'L');
310 	case TOK_STR:
311 		if (cv) {
312 			cstr = cv->cstr;
313 			cstr_ccat (&cstr_buf, '\"');
314 			if (v == TOK_STR) {
315 				len = cstr->size - 1;
316 				for (i = 0; i < len; i++) {
317 					add_char (&cstr_buf, ((unsigned char *) cstr->data)[i]);
318 				}
319 			} else {
320 				len = (cstr->size / sizeof(nwchar_t)) - 1;
321 				for (i = 0; i < len; i++) {
322 					add_char (&cstr_buf, ((nwchar_t *) cstr->data)[i]);
323 				}
324 			}
325 			cstr_ccat (&cstr_buf, '\"');
326 			cstr_ccat (&cstr_buf, '\0');
327 		} else {
328 			eprintf ("cv = nil\n");
329 		}
330 		break;
331 	case TOK_LT:
332 		v = '<';
333 		goto addv;
334 	case TOK_GT:
335 		v = '>';
336 		goto addv;
337 	case TOK_DOTS:
338 		return strcpy (p, "...");
339 	case TOK_A_SHL:
340 		return strcpy (p, "<<=");
341 	case TOK_A_SAR:
342 		return strcpy (p, ">>=");
343 	default:
344 		if (v < TOK_IDENT) {
345 			/* search in two bytes table */
346 			const unsigned char *q = tok_two_chars;
347 			while (*q) {
348 				if (q[2] == v) {
349 					*p++ = q[0];
350 					*p++ = q[1];
351 					*p = '\0';
352 					return buf;
353 				}
354 				q += 3;
355 			}
356 addv:
357 			*p++ = v;
358 			*p = '\0';
359 		} else if (v < tok_ident) {
360 			return table_ident[v - TOK_IDENT]->str;
361 		} else if (v >= SYM_FIRST_ANOM) {
362 			/* special name for anonymous symbol */
363 			sprintf (p, "%u", v - SYM_FIRST_ANOM);
364 		} else {
365 			/* should never happen */
366 			return NULL;
367 		}
368 		break;
369 	}
370 	return cstr_buf.data;
371 }
372 
373 /* fill input buffer and peek next char */
tcc_peekc_slow(BufferedFile * bf)374 static int tcc_peekc_slow(BufferedFile *bf)
375 {
376 	int len;
377 	/* only tries to read if really end of buffer */
378 	if (bf->buf_ptr >= bf->buf_end) {
379 		if (bf->fd != -1) {
380 #if defined(PARSE_DEBUG)
381 			len = 8;
382 #else
383 			len = IO_BUF_SIZE;
384 #endif
385 			len = read (bf->fd, bf->buffer, len);
386 			if (len < 0) {
387 				len = 0;
388 			}
389 		} else {
390 			len = 0;
391 		}
392 		total_bytes += len;
393 		bf->buf_ptr = bf->buffer;
394 		bf->buf_end = bf->buffer + len;
395 		*bf->buf_end = CH_EOB;
396 	}
397 	if (bf->buf_ptr < bf->buf_end) {
398 		return bf->buf_ptr[0];
399 	} else {
400 		bf->buf_ptr = bf->buf_end;
401 		return CH_EOF;
402 	}
403 }
404 
405 /* return the current character, handling end of block if necessary
406    (but not stray) */
handle_eob(void)407 ST_FUNC int handle_eob(void)
408 {
409 	return tcc_peekc_slow (file);
410 }
411 
412 /* read next char from current input file and handle end of input buffer */
inp(void)413 ST_INLN void inp(void)
414 {
415 	ch = *(++(file->buf_ptr));
416 	/* end of buffer/file handling */
417 	if (ch == CH_EOB) {
418 		ch = handle_eob ();
419 	}
420 }
421 
422 /* handle '\[\r]\n' */
handle_stray_noerror(void)423 static int handle_stray_noerror(void)
424 {
425 	while (ch == '\\') {
426 		inp ();
427 		if (ch == '\n') {
428 			file->line_num++;
429 			inp ();
430 		} else if (ch == '\r') {
431 			inp ();
432 			if (ch != '\n') {
433 				goto fail;
434 			}
435 			file->line_num++;
436 			inp ();
437 		} else {
438 fail:
439 			return 1;
440 		}
441 	}
442 	return 0;
443 }
444 
handle_stray(void)445 static void handle_stray(void)
446 {
447 	if (handle_stray_noerror ()) {
448 		tcc_error ("stray '\\' in program");
449 	}
450 }
451 
452 /* skip the stray and handle the \\n case. Output an error if
453    incorrect char after the stray */
handle_stray1(uint8_t * p)454 static int handle_stray1(uint8_t *p)
455 {
456 	int c;
457 
458 	if (p >= file->buf_end) {
459 		file->buf_ptr = p;
460 		c = handle_eob ();
461 		p = file->buf_ptr;
462 		if (c == '\\') {
463 			goto parse_stray;
464 		}
465 	} else {
466 parse_stray:
467 		file->buf_ptr = p;
468 		ch = *p;
469 		handle_stray ();
470 		p = file->buf_ptr;
471 		c = *p;
472 	}
473 	return c;
474 }
475 
476 /* handle just the EOB case, but not stray */
477 #define PEEKC_EOB(c, p)			\
478 	{				\
479 		p++;			\
480 		c = *p;			\
481 		if (c == '\\') {	\
482 			file->buf_ptr = p;\
483 			c = handle_eob ();\
484 			p = file->buf_ptr;\
485 		}			\
486 	}
487 
488 /* handle the complicated stray case */
489 #define PEEKC(c, p)			\
490 	{				\
491 		p++;			\
492 		c = *p;			\
493 		if (c == '\\') {	\
494 			c = handle_stray1 (p);\
495 			p = file->buf_ptr;\
496 		}			\
497 	}
498 
499 /* input with '\[\r]\n' handling. Note that this function cannot
500    handle other characters after '\', so you cannot call it inside
501    strings or comments */
minp(void)502 ST_FUNC void minp(void)
503 {
504 	inp ();
505 	if (ch == '\\') {
506 		handle_stray ();
507 	}
508 }
509 
510 
511 /* single line C++ comments */
parse_line_comment(uint8_t * p)512 static uint8_t *parse_line_comment(uint8_t *p)
513 {
514 	int c;
515 
516 	p++;
517 	for (;;) {
518 		c = *p;
519 redo:
520 		if (c == '\n' || c == CH_EOF) {
521 			break;
522 		} else if (c == '\\') {
523 			file->buf_ptr = p;
524 			c = handle_eob ();
525 			p = file->buf_ptr;
526 			if (c == '\\') {
527 				PEEKC_EOB (c, p);
528 				if (c == '\n') {
529 					file->line_num++;
530 					PEEKC_EOB (c, p);
531 				} else if (c == '\r') {
532 					PEEKC_EOB (c, p);
533 					if (c == '\n') {
534 						file->line_num++;
535 						PEEKC_EOB (c, p);
536 					}
537 				}
538 			} else {
539 				goto redo;
540 			}
541 		} else {
542 			p++;
543 		}
544 	}
545 	return p;
546 }
547 
548 /* C comments */
parse_comment(uint8_t * p)549 ST_FUNC uint8_t *parse_comment(uint8_t *p)
550 {
551 	int c;
552 
553 	p++;
554 	for (;;) {
555 		/* fast skip loop */
556 		for (;;) {
557 			c = *p;
558 			if (c == '\n' || c == '*' || c == '\\') {
559 				break;
560 			}
561 			p++;
562 			c = *p;
563 			if (c == '\n' || c == '*' || c == '\\') {
564 				break;
565 			}
566 			p++;
567 		}
568 		/* now we can handle all the cases */
569 		if (c == '\n') {
570 			file->line_num++;
571 			p++;
572 		} else if (c == '*') {
573 			p++;
574 			for (;;) {
575 				c = *p;
576 				if (c == '*') {
577 					p++;
578 				} else if (c == '/') {
579 					goto end_of_comment;
580 				} else if (c == '\\') {
581 					file->buf_ptr = p;
582 					c = handle_eob ();
583 					p = file->buf_ptr;
584 					if (c == '\\') {
585 						/* skip '\[\r]\n', otherwise just skip the stray */
586 						while (c == '\\') {
587 							PEEKC_EOB (c, p);
588 							if (c == '\n') {
589 								file->line_num++;
590 								PEEKC_EOB (c, p);
591 							} else if (c == '\r') {
592 								PEEKC_EOB (c, p);
593 								if (c == '\n') {
594 									file->line_num++;
595 									PEEKC_EOB (c, p);
596 								}
597 							} else {
598 								goto after_star;
599 							}
600 						}
601 					}
602 				} else {
603 					break;
604 				}
605 			}
606 after_star:
607 			;
608 		} else {
609 			/* stray, eob or eof */
610 			file->buf_ptr = p;
611 			c = handle_eob ();
612 			p = file->buf_ptr;
613 			if (c == CH_EOF) {
614 				tcc_error ("unexpected end of file in comment");
615 			} else if (c == '\\') {
616 				p++;
617 			}
618 		}
619 	}
620 end_of_comment:
621 	p++;
622 	return p;
623 }
624 
625 #define cinp minp
626 
skip_spaces(void)627 static inline void skip_spaces(void)
628 {
629 	while (is_space (ch))
630 		cinp ();
631 }
632 
check_space(int t,int * spc)633 static inline int check_space(int t, int *spc)
634 {
635 	if (is_space (t)) {
636 		if (*spc) {
637 			return 1;
638 		}
639 		*spc = 1;
640 	} else {
641 		*spc = 0;
642 	}
643 	return 0;
644 }
645 
646 /* parse a string without interpreting escapes */
parse_pp_string(uint8_t * p,int sep,CString * str)647 static uint8_t *parse_pp_string(uint8_t *p,
648 				int sep, CString *str)
649 {
650 	int c;
651 	p++;
652 	while (tcc_nerr () == 0) {
653 		c = *p;
654 		if (c == sep) {
655 			break;
656 		} else if (c == '\\') {
657 			file->buf_ptr = p;
658 			c = handle_eob ();
659 			p = file->buf_ptr;
660 			if (c == CH_EOF) {
661 unterminated_string:
662 				/* XXX: indicate line number of start of string */
663 				tcc_error ("missing terminating %c character", sep);
664 				return NULL;
665 			} else if (c == '\\') {
666 				/* escape : just skip \[\r]\n */
667 				PEEKC_EOB (c, p);
668 				if (c == '\n') {
669 					file->line_num++;
670 					p++;
671 				} else if (c == '\r') {
672 					PEEKC_EOB (c, p);
673 					if (c != '\n') {
674 						expect ("'\n' after '\r'");
675 						return NULL;
676 					}
677 					file->line_num++;
678 					p++;
679 				} else if (c == CH_EOF) {
680 					goto unterminated_string;
681 				} else {
682 					if (str) {
683 						cstr_ccat (str, '\\');
684 						cstr_ccat (str, c);
685 					}
686 					p++;
687 				}
688 			}
689 		} else if (c == '\n') {
690 			file->line_num++;
691 			goto add_char;
692 		} else if (c == '\r') {
693 			PEEKC_EOB (c, p);
694 			if (c != '\n') {
695 				if (str) {
696 					cstr_ccat (str, '\r');
697 				}
698 			} else {
699 				file->line_num++;
700 				goto add_char;
701 			}
702 		} else {
703 add_char:
704 			if (str) {
705 				cstr_ccat (str, c);
706 			}
707 			p++;
708 		}
709 	}
710 	p++;
711 	return p;
712 }
713 
714 /* skip block of text until #else, #elif or #endif. skip also pairs of
715    #if/#endif */
preprocess_skip(void)716 static void preprocess_skip(void)
717 {
718 	int a, start_of_line, c, in_warn_or_error;
719 	uint8_t *p;
720 
721 	p = file->buf_ptr;
722 	a = 0;
723 redo_start:
724 	start_of_line = 1;
725 	in_warn_or_error = 0;
726 	while (tcc_nerr () == 0) {
727 redo_no_start:
728 		c = *p;
729 		switch (c) {
730 		case ' ':
731 		case '\t':
732 		case '\f':
733 		case '\v':
734 		case '\r':
735 			p++;
736 			goto redo_no_start;
737 		case '\n':
738 			file->line_num++;
739 			p++;
740 			goto redo_start;
741 		case '\\':
742 			file->buf_ptr = p;
743 			c = handle_eob ();
744 			if (c == CH_EOF) {
745 				expect ("#endif");
746 				return;
747 			} else if (c == '\\') {
748 				ch = file->buf_ptr[0];
749 				handle_stray_noerror ();
750 			}
751 			p = file->buf_ptr;
752 			goto redo_no_start;
753 		/* skip strings */
754 		case '\"':
755 		case '\'':
756 			if (in_warn_or_error) {
757 				goto _default;
758 			}
759 			p = parse_pp_string (p, c, NULL);
760 			if (p == NULL) {
761 				return;
762 			}
763 			break;
764 		/* skip comments */
765 		case '/':
766 			if (in_warn_or_error) {
767 				goto _default;
768 			}
769 			file->buf_ptr = p;
770 			ch = *p;
771 			minp ();
772 			p = file->buf_ptr;
773 			if (ch == '*') {
774 				p = parse_comment (p);
775 			} else if (ch == '/') {
776 				p = parse_line_comment (p);
777 			}
778 			break;
779 		case '#':
780 			p++;
781 			if (start_of_line) {
782 				file->buf_ptr = p;
783 				next_nomacro ();
784 				p = file->buf_ptr;
785 				if (a == 0 &&
786 				    (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF)) {
787 					goto the_end;
788 				}
789 				if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF) {
790 					a++;
791 				} else if (tok == TOK_ENDIF) {
792 					a--;
793 				} else if (tok == TOK_ERROR || tok == TOK_WARNING) {
794 					in_warn_or_error = 1;
795 				} else if (tok == TOK_LINEFEED) {
796 					goto redo_start;
797 				}
798 			}
799 			break;
800 _default:
801 		default:
802 			p++;
803 			break;
804 		}
805 		start_of_line = 0;
806 	}
807 the_end:
808 	;
809 	file->buf_ptr = p;
810 }
811 
812 /* ParseState handling */
813 
814 /* XXX: currently, no include file info is stored. Thus, we cannot display
815    accurate messages if the function or data definition spans multiple
816    files */
817 
818 /* save current parse state in 's' */
save_parse_state(ParseState * s)819 ST_FUNC void save_parse_state(ParseState *s)
820 {
821 	s->line_num = file->line_num;
822 	s->macro_ptr = macro_ptr;
823 	s->tok = tok;
824 	s->tokc = tokc;
825 }
826 
827 /* restore parse state from 's' */
restore_parse_state(ParseState * s)828 ST_FUNC void restore_parse_state(ParseState *s)
829 {
830 	file->line_num = s->line_num;
831 	macro_ptr = s->macro_ptr;
832 	tok = s->tok;
833 	tokc = s->tokc;
834 }
835 
836 /* return the number of additional 'ints' necessary to store the
837    token */
tok_ext_size(int t)838 static inline int tok_ext_size(int t)
839 {
840 	switch (t) {
841 	/* 4 bytes */
842 	case TOK_CINT:
843 	case TOK_CUINT:
844 	case TOK_CCHAR:
845 	case TOK_LCHAR:
846 	case TOK_CFLOAT:
847 	case TOK_LINENUM:
848 		return 1;
849 	case TOK_STR:
850 	case TOK_LSTR:
851 	case TOK_PPNUM:
852 		tcc_error ("unsupported token");
853 		return 1;
854 	case TOK_CDOUBLE:
855 	case TOK_CLLONG:
856 	case TOK_CULLONG:
857 		return 2;
858 	case TOK_CLDOUBLE:
859 		return LDOUBLE_SIZE / 4;
860 	default:
861 		return 0;
862 	}
863 }
864 
865 /* token string handling */
866 
tok_str_new(TokenString * s)867 ST_INLN void tok_str_new(TokenString *s)
868 {
869 	s->str = NULL;
870 	s->len = 0;
871 	s->allocated_len = 0;
872 	s->last_line_num = -1;
873 }
874 
tok_str_free(int * str)875 ST_FUNC void tok_str_free(int *str)
876 {
877 	free (str);
878 }
879 
tok_str_realloc(TokenString * s)880 static int *tok_str_realloc(TokenString *s)
881 {
882 	int *str, len;
883 
884 	if (s->allocated_len == 0) {
885 		len = 8;
886 	} else {
887 		len = s->allocated_len * 2;
888 	}
889 	str = realloc (s->str, len * sizeof(int));
890 	s->allocated_len = len;
891 	s->str = str;
892 	return str;
893 }
894 
tok_str_add(TokenString * s,int t)895 ST_FUNC void tok_str_add(TokenString *s, int t)
896 {
897 	int len, *str;
898 
899 	len = s->len;
900 	str = s->str;
901 	if (len >= s->allocated_len) {
902 		str = tok_str_realloc (s);
903 	}
904 	str[len++] = t;
905 	s->len = len;
906 }
907 
tok_str_add2(TokenString * s,int t,CValue * cv)908 static void tok_str_add2(TokenString *s, int t, CValue *cv)
909 {
910 	int len, *str;
911 
912 	len = s->len;
913 	str = s->str;
914 
915 	/* allocate space for worst case */
916 	if (len + TOK_MAX_SIZE > s->allocated_len) {
917 		str = tok_str_realloc (s);
918 	}
919 	str[len++] = t;
920 	switch (t) {
921 	case TOK_CINT:
922 	case TOK_CUINT:
923 	case TOK_CCHAR:
924 	case TOK_LCHAR:
925 	case TOK_CFLOAT:
926 	case TOK_LINENUM:
927 		str[len++] = cv->tab[0];
928 		break;
929 	case TOK_PPNUM:
930 	case TOK_STR:
931 	case TOK_LSTR:
932 	{
933 		int nb_words;
934 
935 		nb_words = (sizeof(CString) + cv->cstr->size + 3) >> 2;
936 		while ((len + nb_words) > s->allocated_len) {
937 			str = tok_str_realloc (s);
938 		}
939 		CString cstr = {0};
940 		cstr.data = NULL;
941 		cstr.size = cv->cstr->size;
942 		cstr.data_allocated = NULL;
943 		cstr.size_allocated = cstr.size;
944 
945 		ut8 *p = (ut8*)(str + len);
946 		memcpy (p, &cstr, sizeof (CString));
947 		memcpy (p + sizeof (CString),
948 			cv->cstr->data, cstr.size);
949 		len += nb_words;
950 	}
951 	break;
952 	case TOK_CDOUBLE:
953 	case TOK_CLLONG:
954 	case TOK_CULLONG:
955 #if LDOUBLE_SIZE == 8
956 	case TOK_CLDOUBLE:
957 #endif
958 		str[len++] = cv->tab[0];
959 		str[len++] = cv->tab[1];
960 		break;
961 #if LDOUBLE_SIZE == 12
962 	case TOK_CLDOUBLE:
963 		str[len++] = cv->tab[0];
964 		str[len++] = cv->tab[1];
965 		str[len++] = cv->tab[2];
966 #elif LDOUBLE_SIZE == 16
967 	case TOK_CLDOUBLE:
968 		str[len++] = cv->tab[0];
969 		str[len++] = cv->tab[1];
970 		str[len++] = cv->tab[2];
971 		str[len++] = cv->tab[3];
972 #elif LDOUBLE_SIZE != 8
973 #error add long double size support
974 #endif
975 		break;
976 	default:
977 		break;
978 	}
979 	s->len = len;
980 }
981 
982 /* add the current parse token in token string 's' */
tok_str_add_tok(TokenString * s)983 ST_FUNC void tok_str_add_tok(TokenString *s)
984 {
985 	CValue cval;
986 
987 	/* save line number info */
988 	if (file->line_num != s->last_line_num) {
989 		s->last_line_num = file->line_num;
990 		cval.i = s->last_line_num;
991 		tok_str_add2 (s, TOK_LINENUM, &cval);
992 	}
993 	tok_str_add2 (s, tok, &tokc);
994 }
995 
996 /* get a token from an integer array and increment pointer
997    accordingly. we code it as a macro to avoid pointer aliasing. */
TOK_GET(int * t,const int ** pp,CValue * cv)998 static inline void TOK_GET(int *t, const int **pp, CValue *cv)
999 {
1000 	const int *p = *pp;
1001 	int n, *tab;
1002 
1003 	tab = cv->tab;
1004 	switch (*t = *p++) {
1005 	case TOK_CINT:
1006 	case TOK_CUINT:
1007 	case TOK_CCHAR:
1008 	case TOK_LCHAR:
1009 	case TOK_CFLOAT:
1010 	case TOK_LINENUM:
1011 		tab[0] = *p++;
1012 		break;
1013 	case TOK_STR:
1014 	case TOK_LSTR:
1015 	case TOK_PPNUM:
1016 		cv->cstr = (CString *) p;
1017 		cv->cstr->data = (char *) p + sizeof(CString);
1018 		p += (sizeof(CString) + cv->cstr->size + 3) >> 2;
1019 		break;
1020 	case TOK_CDOUBLE:
1021 	case TOK_CLLONG:
1022 	case TOK_CULLONG:
1023 		n = 2;
1024 		goto copy;
1025 	case TOK_CLDOUBLE:
1026 #if LDOUBLE_SIZE == 16
1027 		n = 4;
1028 #elif LDOUBLE_SIZE == 12
1029 		n = 3;
1030 #elif LDOUBLE_SIZE == 8
1031 		n = 2;
1032 #else
1033 #error add long double size support
1034 #endif
1035 copy:
1036 		do {
1037 			*tab++ = *p++;
1038 		} while (--n);
1039 		break;
1040 	default:
1041 		break;
1042 	}
1043 	*pp = p;
1044 }
1045 
macro_is_equal(const int * a,const int * b)1046 static int macro_is_equal(const int *a, const int *b)
1047 {
1048 	char buf[STRING_MAX_SIZE + 1];
1049 	CValue cv;
1050 	int t;
1051 	while (*a && *b) {
1052 		TOK_GET (&t, &a, &cv);
1053 		pstrcpy (buf, sizeof buf, get_tok_str (t, &cv));
1054 		TOK_GET (&t, &b, &cv);
1055 		if (strcmp (buf, get_tok_str (t, &cv))) {
1056 			return 0;
1057 		}
1058 	}
1059 	return !(*a || *b);
1060 }
1061 
1062 /* defines handling */
define_push(int v,int macro_type,int * str,Sym * first_arg)1063 ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg)
1064 {
1065 	Sym *s;
1066 
1067 	s = define_find (v);
1068 	if (s && !macro_is_equal (s->d, str)) {
1069 		tcc_warning ("%s redefined", get_tok_str (v, NULL));
1070 	}
1071 
1072 	s = sym_push2 (&define_stack, v, macro_type, 0);
1073 	if (!s) {
1074 		return;
1075 	}
1076 	s->d = str;
1077 	s->next = first_arg;
1078 	if (v >= TOK_IDENT) {
1079 		table_ident[v - TOK_IDENT]->sym_define = s;
1080 	}
1081 }
1082 
1083 /* undefined a define symbol. Its name is just set to zero */
define_undef(Sym * s)1084 ST_FUNC void define_undef(Sym *s)
1085 {
1086 	int v;
1087 	v = s->v;
1088 	if (v >= TOK_IDENT && v < tok_ident) {
1089 		table_ident[v - TOK_IDENT]->sym_define = NULL;
1090 	}
1091 	s->v = 0;
1092 }
1093 
define_find(int v)1094 ST_INLN Sym *define_find(int v)
1095 {
1096 	v -= TOK_IDENT;
1097 	if ((unsigned) v >= (unsigned) (tok_ident - TOK_IDENT)) {
1098 		return NULL;
1099 	}
1100 	return table_ident[v]->sym_define;
1101 }
1102 
1103 /* free define stack until top reaches 'b' */
free_defines(Sym * b)1104 ST_FUNC void free_defines(Sym *b)
1105 {
1106 	Sym *top, *top1;
1107 	int v;
1108 
1109 	top = define_stack;
1110 	while (top != b) {
1111 		top1 = top->prev;
1112 		/* do not free args or predefined defines */
1113 		if (top->d) {
1114 			tok_str_free (top->d);
1115 		}
1116 		v = top->v;
1117 		if (v >= TOK_IDENT && v < tok_ident) {
1118 			table_ident[v - TOK_IDENT]->sym_define = NULL;
1119 		}
1120 		sym_free (top);
1121 		top = top1;
1122 	}
1123 	define_stack = b;
1124 }
1125 
1126 /* label lookup */
label_find(int v)1127 ST_FUNC Sym *label_find(int v)
1128 {
1129 	v -= TOK_IDENT;
1130 	if ((unsigned) v >= (unsigned) (tok_ident - TOK_IDENT)) {
1131 		return NULL;
1132 	}
1133 	return table_ident[v]->sym_label;
1134 }
1135 
label_push(Sym ** ptop,int v,int flags)1136 ST_FUNC Sym *label_push(Sym **ptop, int v, int flags)
1137 {
1138 	Sym *s, **ps;
1139 	s = sym_push2 (ptop, v, 0, 0);
1140 	if (!s) {
1141 		return s;
1142 	}
1143 	s->r = flags;
1144 	ps = &table_ident[v - TOK_IDENT]->sym_label;
1145 	if (ptop == &global_label_stack) {
1146 		/* modify the top most local identifier, so that
1147 		   sym_identifier will point to 's' when popped */
1148 		while (*ps != NULL)
1149 			ps = &(*ps)->prev_tok;
1150 	}
1151 	s->prev_tok = *ps;
1152 	*ps = s;
1153 	return s;
1154 }
1155 
1156 /* pop labels until element last is reached. Look if any labels are
1157    undefined. Define symbols if '&&label' was used. */
label_pop(Sym ** ptop,Sym * slast)1158 ST_FUNC void label_pop(Sym **ptop, Sym *slast)
1159 {
1160 	Sym *s, *s1;
1161 	for (s = *ptop; s != slast; s = s1) {
1162 		s1 = s->prev;
1163 		if (s->r == LABEL_DECLARED) {
1164 			tcc_warning ("label '%s' declared but not used", get_tok_str (s->v, NULL));
1165 		} else if (s->r == LABEL_FORWARD) {
1166 			tcc_error ("label '%s' used but not defined",
1167 				get_tok_str (s->v, NULL));
1168 		}
1169 		/* remove label */
1170 		table_ident[s->v - TOK_IDENT]->sym_label = s->prev_tok;
1171 		sym_free (s);
1172 	}
1173 	*ptop = slast;
1174 }
1175 
1176 /* eval an expression for #if/#elif */
expr_preprocess(void)1177 static int expr_preprocess(void)
1178 {
1179 	int c, t;
1180 	TokenString str;
1181 
1182 	tok_str_new (&str);
1183 	while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1184 		next ();/* do macro subst */
1185 		if (tok == TOK_DEFINED) {
1186 			next_nomacro ();
1187 			t = tok;
1188 			if (t == '(') {
1189 				next_nomacro ();
1190 			}
1191 			c = define_find (tok) != 0;
1192 			if (t == '(') {
1193 				next_nomacro ();
1194 			}
1195 			tok = TOK_CINT;
1196 			tokc.i = c;
1197 		} else if (tok >= TOK_IDENT) {
1198 			/* if undefined macro */
1199 			tok = TOK_CINT;
1200 			tokc.i = 0;
1201 		}
1202 		tok_str_add_tok (&str);
1203 	}
1204 	tok_str_add (&str, -1);	/* simulate end of file */
1205 	tok_str_add (&str, 0);
1206 	/* now evaluate C constant expression */
1207 	macro_ptr = str.str;
1208 	next ();
1209 	c = expr_const ();
1210 	macro_ptr = NULL;
1211 	tok_str_free (str.str);
1212 	return c != 0;
1213 }
1214 
1215 #if defined(PARSE_DEBUG) || defined(PP_DEBUG)
tok_print(int * str)1216 static void tok_print(int *str)
1217 {
1218 	int t;
1219 	CValue cval;
1220 
1221 	printf ("<");
1222 	while (1) {
1223 		TOK_GET (&t, &str, &cval);
1224 		if (!t) {
1225 			break;
1226 		}
1227 		printf ("%s", get_tok_str (t, &cval));
1228 	}
1229 	printf (">\n");
1230 }
1231 #endif
1232 
1233 /* parse after #define */
parse_define(void)1234 ST_FUNC void parse_define(void)
1235 {
1236 	Sym *s, *first, **ps;
1237 	int v, t, varg, is_vaargs, spc;
1238 	TokenString str;
1239 
1240 	v = tok;
1241 	if (v < TOK_IDENT) {
1242 		tcc_error ("invalid macro name '%s'", get_tok_str (tok, &tokc));
1243 	}
1244 	/* XXX: should check if same macro (ANSI) */
1245 	first = NULL;
1246 	t = MACRO_OBJ;
1247 	/* '(' must be just after macro definition for MACRO_FUNC */
1248 	next_nomacro_spc ();
1249 	if (tok == '(') {
1250 		next_nomacro ();
1251 		ps = &first;
1252 		while (tok != ')') {
1253 			varg = tok;
1254 			next_nomacro ();
1255 			is_vaargs = 0;
1256 			if (varg == TOK_DOTS) {
1257 				varg = TOK___VA_ARGS__;
1258 				is_vaargs = 1;
1259 			} else if (tok == TOK_DOTS && gnu_ext) {
1260 				is_vaargs = 1;
1261 				next_nomacro ();
1262 			}
1263 			if (varg < TOK_IDENT) {
1264 				tcc_error ("badly punctuated parameter list");
1265 			}
1266 			s = sym_push2 (&define_stack, varg | SYM_FIELD, is_vaargs, 0);
1267 			if (!s) {
1268 				return;
1269 			}
1270 			*ps = s;
1271 			ps = &s->next;
1272 			if (tok != ',') {
1273 				break;
1274 			}
1275 			next_nomacro ();
1276 		}
1277 		if (tok == ')') {
1278 			next_nomacro_spc ();
1279 		}
1280 		t = MACRO_FUNC;
1281 	}
1282 	tok_str_new (&str);
1283 	spc = 2;
1284 	/* EOF testing necessary for '-D' handling */
1285 	while (tok != TOK_LINEFEED && tok != TOK_EOF) {
1286 		/* remove spaces around ## and after '#' */
1287 		if (TOK_TWOSHARPS == tok) {
1288 			if (1 == spc) {
1289 				--str.len;
1290 			}
1291 			spc = 2;
1292 		} else if ('#' == tok) {
1293 			spc = 2;
1294 		} else if (check_space (tok, &spc)) {
1295 			goto skip;
1296 		}
1297 		tok_str_add2 (&str, tok, &tokc);
1298 skip:
1299 		next_nomacro_spc ();
1300 	}
1301 	if (spc == 1) {
1302 		--str.len;	/* remove trailing space */
1303 	}
1304 	tok_str_add (&str, 0);
1305 #ifdef PP_DEBUG
1306 	printf ("define %s %d: ", get_tok_str (v, NULL), t);
1307 	tok_print (str.str);
1308 #endif
1309 	define_push (v, t, str.str, first);
1310 }
1311 
hash_cached_include(const char * filename)1312 static inline int hash_cached_include(const char *filename)
1313 {
1314 	const unsigned char *s;
1315 	unsigned int h;
1316 
1317 	h = TOK_HASH_INIT;
1318 	s = (const unsigned char *) filename;
1319 	while (*s) {
1320 		h = TOK_HASH_FUNC (h, *s);
1321 		s++;
1322 	}
1323 	h &= (CACHED_INCLUDES_HASH_SIZE - 1);
1324 	return h;
1325 }
1326 
search_cached_include(TCCState * s1,const char * filename)1327 static CachedInclude *search_cached_include(TCCState *s1, const char *filename)
1328 {
1329 	CachedInclude *e;
1330 	int i, h;
1331 	h = hash_cached_include (filename);
1332 	i = s1->cached_includes_hash[h];
1333 	for (;;) {
1334 		if (i == 0) {
1335 			break;
1336 		}
1337 		e = s1->cached_includes[i - 1];
1338 		if (0 == PATHCMP (e->filename, filename)) {
1339 			return e;
1340 		}
1341 		i = e->hash_next;
1342 	}
1343 	return NULL;
1344 }
1345 
add_cached_include(TCCState * s1,const char * filename,int ifndef_macro)1346 static inline void add_cached_include(TCCState *s1, const char *filename, int ifndef_macro)
1347 {
1348 	CachedInclude *e;
1349 	int h;
1350 
1351 	if (search_cached_include (s1, filename)) {
1352 		return;
1353 	}
1354 #ifdef INC_DEBUG
1355 	printf ("adding cached '%s' %s\n", filename, get_tok_str (ifndef_macro, NULL));
1356 #endif
1357 	e = malloc (sizeof(CachedInclude) + strlen (filename));
1358 	strcpy (e->filename, filename);
1359 	e->ifndef_macro = ifndef_macro;
1360 	dynarray_add ((void ***) &s1->cached_includes, &s1->nb_cached_includes, e);
1361 	/* add in hash table */
1362 	h = hash_cached_include (filename);
1363 	e->hash_next = s1->cached_includes_hash[h];
1364 	s1->cached_includes_hash[h] = s1->nb_cached_includes;
1365 }
1366 
pragma_parse(TCCState * s1)1367 static void pragma_parse(TCCState *s1)
1368 {
1369 	int val;
1370 
1371 	next ();
1372 	if (tok == TOK_pack) {
1373 		/*
1374 		  This may be:
1375 		  #pragma pack(1) // set
1376 		  #pragma pack() // reset to default
1377 		  #pragma pack(push,1) // push & set
1378 		  #pragma pack(pop) // restore previous
1379 		*/
1380 		next ();
1381 		skip ('(');
1382 		if (tok == TOK_ASM_pop) {
1383 			next ();
1384 			if (s1->pack_stack_ptr <= s1->pack_stack) {
1385 stk_error:
1386 				tcc_error ("out of pack stack");
1387 			}
1388 			s1->pack_stack_ptr--;
1389 		} else {
1390 			val = 0;
1391 			if (tok != ')') {
1392 				if (tok == TOK_ASM_push) {
1393 					next ();
1394 					if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1) {
1395 						goto stk_error;
1396 					}
1397 					s1->pack_stack_ptr++;
1398 					skip (',');
1399 				}
1400 				if (tok != TOK_CINT) {
1401 pack_error:
1402 					tcc_error ("invalid pack pragma");
1403 				}
1404 				val = tokc.i;
1405 				if (val < 1 || val > 16 || (val & (val - 1)) != 0) {
1406 					goto pack_error;
1407 				}
1408 				next ();
1409 			}
1410 			*s1->pack_stack_ptr = val;
1411 			skip (')');
1412 		}
1413 	}
1414 }
1415 
1416 /* is_bof is true if first non space token at beginning of file */
preprocess(int is_bof)1417 ST_FUNC void preprocess(int is_bof)
1418 {
1419 	TCCState *s1 = tcc_state;
1420 	int i, c, n, saved_parse_flags;
1421 	char buf[1024], *q;
1422 	Sym *s;
1423 
1424 	saved_parse_flags = parse_flags;
1425 	parse_flags = PARSE_FLAG_PREPROCESS | PARSE_FLAG_TOK_NUM |
1426 		      PARSE_FLAG_LINEFEED;
1427 	next_nomacro ();
1428 redo:
1429 	switch (tok) {
1430 	case TOK_DEFINE:
1431 		next_nomacro ();
1432 		parse_define ();
1433 		break;
1434 	case TOK_UNDEF:
1435 		next_nomacro ();
1436 		s = define_find (tok);
1437 		/* undefine symbol by putting an invalid name */
1438 		if (s) {
1439 			define_undef (s);
1440 		}
1441 		break;
1442 	case TOK_INCLUDE:
1443 	case TOK_INCLUDE_NEXT:
1444 		ch = file->buf_ptr[0];
1445 		/* XXX: incorrect if comments : use next_nomacro with a special mode */
1446 		skip_spaces ();
1447 		if (ch == '<') {
1448 			c = '>';
1449 			goto read_name;
1450 		} else if (ch == '\"') {
1451 			c = ch;
1452 read_name:
1453 			inp ();
1454 			q = buf;
1455 			while (ch != c && ch != '\n' && ch != CH_EOF) {
1456 				if ((q - buf) < sizeof(buf) - 1) {
1457 					*q++ = ch;
1458 				}
1459 				if (ch == '\\') {
1460 					if (handle_stray_noerror () == 0) {
1461 						--q;
1462 					}
1463 				} else {
1464 					inp ();
1465 				}
1466 			}
1467 			*q = '\0';
1468 			minp ();
1469 #if 0
1470 			/* eat all spaces and comments after include */
1471 			/* XXX: slightly incorrect */
1472 			while (ch1 != '\n' && ch1 != CH_EOF)
1473 				inp ();
1474 #endif
1475 		} else {
1476 			/* computed #include : either we have only strings or
1477 			   we have anything enclosed in '<>' */
1478 			next ();
1479 			buf[0] = '\0';
1480 			if (tok == TOK_STR) {
1481 				while (tok != TOK_LINEFEED) {
1482 					if (tok != TOK_STR) {
1483 include_syntax:
1484 						tcc_error ("'#include' expects \"FILENAME\" or <FILENAME>");
1485 					}
1486 					pstrcat (buf, sizeof(buf), (char *) tokc.cstr->data);
1487 					next ();
1488 				}
1489 				c = '\"';
1490 			} else {
1491 				int len;
1492 				while (tok != TOK_LINEFEED) {
1493 					pstrcat (buf, sizeof(buf), get_tok_str (tok, &tokc));
1494 					next ();
1495 				}
1496 				len = strlen (buf);
1497 				/* check syntax and remove '<>' */
1498 				if (len < 2 || buf[0] != '<' || buf[len - 1] != '>') {
1499 					goto include_syntax;
1500 				}
1501 				memmove (buf, buf + 1, len - 2);
1502 				buf[len - 2] = '\0';
1503 				c = '>';
1504 			}
1505 		}
1506 
1507 		if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE) {
1508 			tcc_error ("#include recursion too deep");
1509 		}
1510 		/* store current file in stack, but increment stack later below */
1511 		*s1->include_stack_ptr = file;
1512 
1513 		n = s1->nb_include_paths + s1->nb_sysinclude_paths;
1514 		for (i = -2; i < n; ++i) {
1515 			char buf1[sizeof file->filename];
1516 			CachedInclude *e;
1517 			BufferedFile **f;
1518 			const char *path;
1519 
1520 			if (i == -2) {
1521 				/* check absolute include path */
1522 				if (!IS_ABSPATH (buf)) {
1523 					continue;
1524 				}
1525 				buf1[0] = 0;
1526 				i = n;	/* force end loop */
1527 
1528 			} else if (i == -1) {
1529 				/* search in current dir if "header.h" */
1530 				if (c != '\"') {
1531 					continue;
1532 				}
1533 				path = file->filename;
1534 				pstrncpy (buf1, path, tcc_basename (path) - path);
1535 
1536 			} else {
1537 				/* search in all the include paths */
1538 				if (i < s1->nb_include_paths) {
1539 					path = s1->include_paths[i];
1540 				} else {
1541 					path = s1->sysinclude_paths[i - s1->nb_include_paths];
1542 				}
1543 				pstrcpy (buf1, sizeof(buf1), path);
1544 				pstrcat (buf1, sizeof(buf1), "/");
1545 			}
1546 
1547 			pstrcat (buf1, sizeof(buf1), buf);
1548 
1549 			if (tok == TOK_INCLUDE_NEXT) {
1550 				for (f = s1->include_stack_ptr; f >= s1->include_stack; --f) {
1551 					if (0 == PATHCMP ((*f)->filename, buf1)) {
1552 #ifdef INC_DEBUG
1553 						printf ("%s: #include_next skipping %s\n", file->filename, buf1);
1554 #endif
1555 						goto include_trynext;
1556 					}
1557 				}
1558 			}
1559 
1560 			e = search_cached_include (s1, buf1);
1561 			if (e && define_find (e->ifndef_macro)) {
1562 				/* no need to parse the include because the 'ifndef macro'
1563 				   is defined */
1564 #ifdef INC_DEBUG
1565 				printf ("%s: skipping cached %s\n", file->filename, buf1);
1566 #endif
1567 				goto include_done;
1568 			}
1569 
1570 			if (tcc_open (s1, buf1) < 0) {
1571 include_trynext:
1572 				continue;
1573 			}
1574 			fprintf (stderr, "#include \"%s\"\n", buf1);
1575 
1576 #ifdef INC_DEBUG
1577 			fprintf (stderr, "%s: including %s\n", file->prev->filename, file->filename);
1578 #endif
1579 			/* update target deps */
1580 			dynarray_add ((void ***) &s1->target_deps, &s1->nb_target_deps,
1581 				strdup (buf1));
1582 			/* push current file in stack */
1583 			++s1->include_stack_ptr;
1584 			tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1585 			ch = file->buf_ptr[0];
1586 			goto the_end;
1587 		}
1588 		/* load include file from the same directory as the parent */
1589 		{
1590 			char filepath[1024];
1591 			int filepath_len;
1592 			char *e = file->filename + strlen (file->filename);
1593 			while (e > file->filename) {
1594 				if (*e == R_SYS_DIR[0]) {
1595 					break;
1596 				}
1597 				e--;
1598 			}
1599 			filepath_len = R_MIN ((size_t) (e - file->filename) + 1, sizeof (filepath) - 1);
1600 			memcpy (filepath, file->filename, filepath_len);
1601 			strcpy (filepath + filepath_len, buf);
1602 			if (tcc_open (s1, filepath) < 0) {
1603 				if (!dir_name) {
1604 					dir_name = "/usr/include";
1605 				}
1606 				int len = snprintf (filepath, sizeof (filepath), "%s/%s", dir_name, buf);
1607 				if (len >= sizeof (filepath) || tcc_open (s1, filepath) < 0) {
1608 					tcc_error ("include file '%s' not found", filepath);
1609 				} else {
1610 					fprintf (stderr, "#include \"%s\"\n", filepath);
1611 					++s1->include_stack_ptr;
1612 					tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1613 					ch = file->buf_ptr[0];
1614 					goto the_end;
1615 				}
1616 			} else {
1617 				fprintf (stderr, "#include \"%s\"\n", filepath);
1618 				++s1->include_stack_ptr;
1619 				tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL;
1620 				ch = file->buf_ptr[0];
1621 				goto the_end;
1622 			}
1623 		}
1624 include_done:
1625 		break;
1626 	case TOK_IFNDEF:
1627 		c = 1;
1628 		goto do_ifdef;
1629 	case TOK_IF:
1630 		c = expr_preprocess ();
1631 		goto do_if;
1632 	case TOK_IFDEF:
1633 		c = 0;
1634 do_ifdef:
1635 		next_nomacro ();
1636 		if (tok < TOK_IDENT) {
1637 			tcc_error ("invalid argument for '#if%sdef'", c? "n": "");
1638 		}
1639 		if (is_bof) {
1640 			if (c) {
1641 #ifdef INC_DEBUG
1642 				printf ("#ifndef %s\n", get_tok_str (tok, NULL));
1643 #endif
1644 				file->ifndef_macro = tok;
1645 			}
1646 		}
1647 		c = (define_find (tok) != 0) ^ c;
1648 do_if:
1649 		if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE) {
1650 			tcc_error ("memory full");
1651 		}
1652 		*s1->ifdef_stack_ptr++ = c;
1653 		goto test_skip;
1654 	case TOK_ELSE:
1655 		if (s1->ifdef_stack_ptr == s1->ifdef_stack) {
1656 			tcc_error ("#else without matching #if");
1657 		}
1658 		if (s1->ifdef_stack_ptr[-1] & 2) {
1659 			tcc_error ("#else after #else");
1660 		}
1661 		c = (s1->ifdef_stack_ptr[-1] ^= 3);
1662 		goto test_else;
1663 	case TOK_ELIF:
1664 		if (s1->ifdef_stack_ptr == s1->ifdef_stack) {
1665 			tcc_error ("#elif without matching #if");
1666 		}
1667 		c = s1->ifdef_stack_ptr[-1];
1668 		if (c > 1) {
1669 			tcc_error ("#elif after #else");
1670 		}
1671 		/* last #if/#elif expression was true: we skip */
1672 		if (c == 1) {
1673 			goto skip;
1674 		}
1675 		c = expr_preprocess ();
1676 		s1->ifdef_stack_ptr[-1] = c;
1677 test_else:
1678 		if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1) {
1679 			file->ifndef_macro = 0;
1680 		}
1681 test_skip:
1682 		if (!(c & 1)) {
1683 skip:
1684 			preprocess_skip ();
1685 			is_bof = 0;
1686 			goto redo;
1687 		}
1688 		break;
1689 	case TOK_ENDIF:
1690 		if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr) {
1691 			tcc_error ("#endif without matching #if");
1692 		}
1693 		s1->ifdef_stack_ptr--;
1694 		/* '#ifndef macro' was at the start of file. Now we check if
1695 		   an '#endif' is exactly at the end of file */
1696 		if (file->ifndef_macro &&
1697 		    s1->ifdef_stack_ptr == file->ifdef_stack_ptr) {
1698 			file->ifndef_macro_saved = file->ifndef_macro;
1699 			/* need to set to zero to avoid false matches if another
1700 			   #ifndef at middle of file */
1701 			file->ifndef_macro = 0;
1702 			while (tok != TOK_LINEFEED)
1703 				next_nomacro ();
1704 			tok_flags |= TOK_FLAG_ENDIF;
1705 			goto the_end;
1706 		}
1707 		break;
1708 	case TOK_LINE:
1709 		next ();
1710 		if (tok != TOK_CINT) {
1711 			tcc_error ("#line");
1712 		}
1713 		file->line_num = tokc.i - 1;	/* the line number will be incremented after */
1714 		next ();
1715 		if (tok != TOK_LINEFEED) {
1716 			if (tok != TOK_STR) {
1717 				tcc_error ("#line");
1718 			}
1719 			pstrcpy (file->filename, sizeof(file->filename),
1720 				(char *) tokc.cstr->data);
1721 		}
1722 		break;
1723 	case TOK_ERROR:
1724 	case TOK_WARNING:
1725 		c = tok;
1726 		ch = file->buf_ptr[0];
1727 		skip_spaces ();
1728 		q = buf;
1729 		while (ch != '\n' && ch != CH_EOF) {
1730 			if ((q - buf) < sizeof(buf) - 1) {
1731 				*q++ = ch;
1732 			}
1733 			if (ch == '\\') {
1734 				if (handle_stray_noerror () == 0) {
1735 					--q;
1736 				}
1737 			} else {
1738 				inp ();
1739 			}
1740 		}
1741 		*q = '\0';
1742 		tcc_warning ("#%s %s", c == TOK_ERROR? "error": "warning", buf);
1743 		break;
1744 	case TOK_PRAGMA:
1745 		pragma_parse (s1);
1746 		break;
1747 	default:
1748 		if (tok == TOK_LINEFEED || tok == '!' || tok == TOK_PPNUM) {
1749 			/* '!' is ignored to allow C scripts. numbers are ignored
1750 			   to emulate cpp behaviour */
1751 		} else {
1752 			if (!(saved_parse_flags & PARSE_FLAG_ASM_COMMENTS)) {
1753 				tcc_warning ("Ignoring unknown preprocessing directive #%s", get_tok_str (tok, &tokc));
1754 			} else {
1755 				/* this is a gas line comment in an 'S' file. */
1756 				file->buf_ptr = parse_line_comment (file->buf_ptr);
1757 				goto the_end;
1758 			}
1759 		}
1760 		break;
1761 	}
1762 	/* ignore other preprocess commands or #! for C scripts */
1763 	while (tok != TOK_LINEFEED)
1764 		next_nomacro ();
1765 the_end:
1766 	parse_flags = saved_parse_flags;
1767 }
1768 
1769 /* evaluate escape codes in a string. */
parse_escape_string(CString * outstr,const uint8_t * buf,int is_long)1770 static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long)
1771 {
1772 	int c, n;
1773 	const uint8_t *p;
1774 
1775 	p = buf;
1776 	for (;;) {
1777 		c = *p;
1778 		if (c == '\0') {
1779 			break;
1780 		}
1781 		if (c == '\\') {
1782 			p++;
1783 			/* escape */
1784 			c = *p;
1785 			switch (c) {
1786 			case '0': case '1': case '2': case '3':
1787 			case '4': case '5': case '6': case '7':
1788 				/* at most three octal digits */
1789 				n = c - '0';
1790 				p++;
1791 				c = *p;
1792 				if (isoct (c)) {
1793 					n = n * 8 + c - '0';
1794 					p++;
1795 					c = *p;
1796 					if (isoct (c)) {
1797 						n = n * 8 + c - '0';
1798 						p++;
1799 					}
1800 				}
1801 				c = n;
1802 				goto add_char_nonext;
1803 			case 'x':
1804 			case 'u':
1805 			case 'U':
1806 				p++;
1807 				n = 0;
1808 				for (;;) {
1809 					c = *p;
1810 					if (c >= 'a' && c <= 'f') {
1811 						c = c - 'a' + 10;
1812 					} else if (c >= 'A' && c <= 'F') {
1813 						c = c - 'A' + 10;
1814 					} else if (isnum (c)) {
1815 						c = c - '0';
1816 					} else {
1817 						break;
1818 					}
1819 					n = n * 16 + c;
1820 					p++;
1821 				}
1822 				c = n;
1823 				goto add_char_nonext;
1824 			case 'a':
1825 				c = '\a';
1826 				break;
1827 			case 'b':
1828 				c = '\b';
1829 				break;
1830 			case 'f':
1831 				c = '\f';
1832 				break;
1833 			case 'n':
1834 				c = '\n';
1835 				break;
1836 			case 'r':
1837 				c = '\r';
1838 				break;
1839 			case 't':
1840 				c = '\t';
1841 				break;
1842 			case 'v':
1843 				c = '\v';
1844 				break;
1845 			case 'e':
1846 				if (!gnu_ext) {
1847 					goto invalid_escape;
1848 				}
1849 				c = 27;
1850 				break;
1851 			case '\'':
1852 			case '\"':
1853 			case '\\':
1854 			case '?':
1855 				break;
1856 			default:
1857 invalid_escape:
1858 				if (c >= '!' && c <= '~') {
1859 					tcc_warning ("unknown escape sequence: \'\\%c\'", c);
1860 				} else {
1861 					tcc_warning ("unknown escape sequence: \'\\x%x\'", c);
1862 				}
1863 				break;
1864 			}
1865 		}
1866 		p++;
1867 add_char_nonext:
1868 		if (!is_long) {
1869 			cstr_ccat (outstr, c);
1870 		} else {
1871 			cstr_wccat (outstr, c);
1872 		}
1873 	}
1874 	/* add a trailing '\0' */
1875 	if (!is_long) {
1876 		cstr_ccat (outstr, '\0');
1877 	} else {
1878 		cstr_wccat (outstr, '\0');
1879 	}
1880 }
1881 
1882 /* we use 64 bit numbers */
1883 #define BN_SIZE 2
1884 
1885 /* bn = (bn << shift) | or_val */
bn_lshift(unsigned int * bn,int shift,int or_val)1886 static void bn_lshift(unsigned int *bn, int shift, int or_val)
1887 {
1888 	int i;
1889 	unsigned int v;
1890 	for (i = 0; i < BN_SIZE; i++) {
1891 		v = bn[i];
1892 		bn[i] = (v << shift) | or_val;
1893 		or_val = v >> (32 - shift);
1894 	}
1895 }
1896 
bn_zero(unsigned int * bn)1897 static void bn_zero(unsigned int *bn)
1898 {
1899 	int i;
1900 	for (i = 0; i < BN_SIZE; i++) {
1901 		bn[i] = 0;
1902 	}
1903 }
1904 
1905 /* parse number in null terminated string 'p' and return it in the
1906    current token */
parse_number(const char * p)1907 static void parse_number(const char *p)
1908 {
1909 	int b, t, shift, frac_bits, s, exp_val, ch;
1910 	char *q;
1911 	unsigned int bn[BN_SIZE];
1912 	double d;
1913 
1914 	/* number */
1915 	q = token_buf;
1916 	ch = *p++;
1917 	t = ch;
1918 	ch = *p++;
1919 	*q++ = t;
1920 	b = 10;
1921 	if (t == '.') {
1922 		goto float_frac_parse;
1923 	} else if (t == '0') {
1924 		if (ch == 'x' || ch == 'X') {
1925 			q--;
1926 			ch = *p++;
1927 			b = 16;
1928 		} else if (tcc_ext && (ch == 'b' || ch == 'B')) {
1929 			q--;
1930 			ch = *p++;
1931 			b = 2;
1932 		}
1933 	}
1934 	/* parse all digits. cannot check octal numbers at this stage
1935 	   because of floating point constants */
1936 	while (1) {
1937 		if (ch >= 'a' && ch <= 'f') {
1938 			t = ch - 'a' + 10;
1939 		} else if (ch >= 'A' && ch <= 'F') {
1940 			t = ch - 'A' + 10;
1941 		} else if (isnum (ch)) {
1942 			t = ch - '0';
1943 		} else {
1944 			break;
1945 		}
1946 		if (t >= b) {
1947 			break;
1948 		}
1949 		if (q >= token_buf + STRING_MAX_SIZE) {
1950 num_too_long:
1951 			tcc_error ("number too long");
1952 		}
1953 		*q++ = ch;
1954 		ch = *p++;
1955 	}
1956 	if (ch == '.' ||
1957 	    ((ch == 'e' || ch == 'E') && b == 10) ||
1958 	    ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) {
1959 		if (b != 10) {
1960 			/* NOTE: strtox should support that for hexa numbers, but
1961 			   non ISOC99 libcs do not support it, so we prefer to do
1962 			   it by hand */
1963 			/* hexadecimal or binary floats */
1964 			/* XXX: handle overflows */
1965 			*q = '\0';
1966 			if (b == 16) {
1967 				shift = 4;
1968 			} else {
1969 				shift = 2;
1970 			}
1971 			bn_zero (bn);
1972 			q = token_buf;
1973 			while (1) {
1974 				t = *q++;
1975 				if (t == '\0') {
1976 					break;
1977 				} else if (t >= 'a') {
1978 					t = t - 'a' + 10;
1979 				} else if (t >= 'A') {
1980 					t = t - 'A' + 10;
1981 				} else {
1982 					t = t - '0';
1983 				}
1984 				bn_lshift (bn, shift, t);
1985 			}
1986 			frac_bits = 0;
1987 			if (ch == '.') {
1988 				ch = *p++;
1989 				while (1) {
1990 					t = ch;
1991 					if (t >= 'a' && t <= 'f') {
1992 						t = t - 'a' + 10;
1993 					} else if (t >= 'A' && t <= 'F') {
1994 						t = t - 'A' + 10;
1995 					} else if (t >= '0' && t <= '9') {
1996 						t = t - '0';
1997 					} else {
1998 						break;
1999 					}
2000 					if (t >= b) {
2001 						tcc_error ("invalid digit");
2002 					}
2003 					bn_lshift (bn, shift, t);
2004 					frac_bits += shift;
2005 					ch = *p++;
2006 				}
2007 			}
2008 			if (ch != 'p' && ch != 'P') {
2009 				expect ("exponent");
2010 				return;
2011 			}
2012 			ch = *p++;
2013 			s = 1;
2014 			exp_val = 0;
2015 			if (ch == '+') {
2016 				ch = *p++;
2017 			} else if (ch == '-') {
2018 				s = -1;
2019 				ch = *p++;
2020 			}
2021 			if (ch < '0' || ch > '9') {
2022 				expect ("exponent digits");
2023 				return;
2024 			}
2025 			while (ch >= '0' && ch <= '9') {
2026 				exp_val = exp_val * 10 + ch - '0';
2027 				ch = *p++;
2028 			}
2029 			exp_val = exp_val * s;
2030 
2031 			/* now we can generate the number */
2032 			/* XXX: should patch directly float number */
2033 			d = (double) bn[1] * 4294967296.0 + (double) bn[0];
2034 			d = ldexp (d, exp_val - frac_bits);
2035 			t = toup (ch);
2036 			if (t == 'F') {
2037 				ch = *p++;
2038 				tok = TOK_CFLOAT;
2039 				/* float : should handle overflow */
2040 				tokc.f = (float) d;
2041 			} else if (t == 'L') {
2042 				ch = *p++;
2043 #ifdef TCC_TARGET_PE
2044 				tok = TOK_CDOUBLE;
2045 				tokc.d = d;
2046 #else
2047 				tok = TOK_CLDOUBLE;
2048 				/* XXX: not large enough */
2049 				tokc.ld = (long double) d;
2050 #endif
2051 			} else {
2052 				tok = TOK_CDOUBLE;
2053 				tokc.d = d;
2054 			}
2055 		} else {
2056 			/* decimal floats */
2057 			if (ch == '.') {
2058 				if (q >= token_buf + STRING_MAX_SIZE) {
2059 					goto num_too_long;
2060 				}
2061 				*q++ = ch;
2062 				ch = *p++;
2063 float_frac_parse:
2064 				while (ch >= '0' && ch <= '9') {
2065 					if (q >= token_buf + STRING_MAX_SIZE) {
2066 						goto num_too_long;
2067 					}
2068 					*q++ = ch;
2069 					ch = *p++;
2070 				}
2071 			}
2072 			if (ch == 'e' || ch == 'E') {
2073 				if (q >= token_buf + STRING_MAX_SIZE) {
2074 					goto num_too_long;
2075 				}
2076 				*q++ = ch;
2077 				ch = *p++;
2078 				if (ch == '-' || ch == '+') {
2079 					if (q >= token_buf + STRING_MAX_SIZE) {
2080 						goto num_too_long;
2081 					}
2082 					*q++ = ch;
2083 					ch = *p++;
2084 				}
2085 				if (ch < '0' || ch > '9') {
2086 					expect ("exponent digits");
2087 					return;
2088 				}
2089 				while (ch >= '0' && ch <= '9') {
2090 					if (q >= token_buf + STRING_MAX_SIZE) {
2091 						goto num_too_long;
2092 					}
2093 					*q++ = ch;
2094 					ch = *p++;
2095 				}
2096 			}
2097 			*q = '\0';
2098 			t = toup (ch);
2099 			errno = 0;
2100 			if (t == 'F') {
2101 				ch = *p++;
2102 				tok = TOK_CFLOAT;
2103 				tokc.f = strtof (token_buf, NULL);
2104 			} else if (t == 'L') {
2105 				ch = *p++;
2106 				tok = TOK_CDOUBLE;
2107 				tokc.d = strtod (token_buf, NULL);
2108 #if 0
2109 #ifdef TCC_TARGET_PE
2110 				tok = TOK_CDOUBLE;
2111 				tokc.d = strtod (token_buf, NULL);
2112 #else
2113 				tok = TOK_CLDOUBLE;
2114 				tokc.ld = strtold (token_buf, NULL);
2115 #endif
2116 #endif
2117 			} else {
2118 				tok = TOK_CDOUBLE;
2119 				tokc.d = strtod (token_buf, NULL);
2120 			}
2121 		}
2122 	} else {
2123 		unsigned long long n, n1;
2124 		int lcount, ucount;
2125 
2126 		/* integer number */
2127 		*q = '\0';
2128 		q = token_buf;
2129 		if (b == 10 && *q == '0') {
2130 			b = 8;
2131 			q++;
2132 		}
2133 		n = 0;
2134 		while (1) {
2135 			t = *q++;
2136 			/* no need for checks except for base 10 / 8 errors */
2137 			if (t == '\0') {
2138 				break;
2139 			} else if (t >= 'a') {
2140 				t = t - 'a' + 10;
2141 			} else if (t >= 'A') {
2142 				t = t - 'A' + 10;
2143 			} else {
2144 				t = t - '0';
2145 				if (t >= b) {
2146 					tcc_error ("invalid digit");
2147 				}
2148 			}
2149 			n1 = n;
2150 			n = n * b + t;
2151 			/* detect overflow */
2152 			/* XXX: this test is not reliable */
2153 			if (n < n1) {
2154 				tcc_error ("integer constant overflow");
2155 			}
2156 		}
2157 
2158 		/* XXX: not exactly ANSI compliant */
2159 		if ((n & 0xffffffff00000000LL) != 0) {
2160 			if ((n >> 63) != 0) {
2161 				tok = TOK_CULLONG;
2162 			} else {
2163 				tok = TOK_CLLONG;
2164 			}
2165 		} else if (n > 0x7fffffff) {
2166 			tok = TOK_CUINT;
2167 		} else {
2168 			tok = TOK_CINT;
2169 		}
2170 		lcount = 0;
2171 		ucount = 0;
2172 		for (;;) {
2173 			t = toup (ch);
2174 			if (t == 'L') {
2175 				if (lcount >= 2) {
2176 					tcc_error ("three 'l's in integer constant");
2177 				}
2178 				lcount++;
2179 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2180 				if (lcount == 2) {
2181 #endif
2182 				if (tok == TOK_CINT) {
2183 					tok = TOK_CLLONG;
2184 				} else if (tok == TOK_CUINT) {
2185 					tok = TOK_CULLONG;
2186 				}
2187 #if !defined TCC_TARGET_X86_64 || defined TCC_TARGET_PE
2188 			}
2189 #endif
2190 				ch = *p++;
2191 			} else if (t == 'U') {
2192 				if (ucount >= 1) {
2193 					tcc_error ("two 'u's in integer constant");
2194 				}
2195 				ucount++;
2196 				if (tok == TOK_CINT) {
2197 					tok = TOK_CUINT;
2198 				} else if (tok == TOK_CLLONG) {
2199 					tok = TOK_CULLONG;
2200 				}
2201 				ch = *p++;
2202 			} else {
2203 				break;
2204 			}
2205 		}
2206 		if (tok == TOK_CINT || tok == TOK_CUINT) {
2207 			tokc.ui = n;
2208 		} else {
2209 			tokc.ull = n;
2210 		}
2211 	}
2212 	if (ch) {
2213 		tcc_error ("invalid number\n");
2214 	}
2215 }
2216 
2217 
2218 #define PARSE2(c1, tok1, c2, tok2)\
2219 case c1:			\
2220 	PEEKC (c, p);		\
2221 	if (c == c2) {		\
2222 		p++;		\
2223 		tok = tok2;	\
2224 	} else {		\
2225 		tok = tok1;	\
2226 	}			\
2227 	break;
2228 
2229 /* return next token without macro substitution */
next_nomacro1(void)2230 static inline void next_nomacro1(void)
2231 {
2232 	int t, c, is_long;
2233 	TokenSym *ts;
2234 	uint8_t *p, *p1;
2235 	unsigned int h;
2236 
2237 	p = file->buf_ptr;
2238 redo_no_start:
2239 	c = *p;
2240 	switch (c) {
2241 	case ' ':
2242 	case '\t':
2243 		tok = c;
2244 		p++;
2245 		goto keep_tok_flags;
2246 	case '\f':
2247 	case '\v':
2248 	case '\r':
2249 		p++;
2250 		goto redo_no_start;
2251 	case '\\':
2252 		/* first look if it is in fact an end of buffer */
2253 		if (p >= file->buf_end) {
2254 			file->buf_ptr = p;
2255 			handle_eob ();
2256 			p = file->buf_ptr;
2257 			if (p >= file->buf_end) {
2258 				goto parse_eof;
2259 			} else {
2260 				goto redo_no_start;
2261 			}
2262 		} else {
2263 			file->buf_ptr = p;
2264 			ch = *p;
2265 			handle_stray ();
2266 			p = file->buf_ptr;
2267 			goto redo_no_start;
2268 		}
2269 parse_eof:
2270 		{
2271 			TCCState *s1 = tcc_state;
2272 			if ((parse_flags & PARSE_FLAG_LINEFEED)
2273 			    && !(tok_flags & TOK_FLAG_EOF)) {
2274 				tok_flags |= TOK_FLAG_EOF;
2275 				tok = TOK_LINEFEED;
2276 				goto keep_tok_flags;
2277 			} else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) {
2278 				tok = TOK_EOF;
2279 			} else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) {
2280 				tcc_error ("missing #endif");
2281 			} else if (s1->include_stack_ptr == s1->include_stack) {
2282 				/* no include left : end of file. */
2283 				tok = TOK_EOF;
2284 			} else {
2285 				tok_flags &= ~TOK_FLAG_EOF;
2286 				/* pop include file */
2287 
2288 				/* test if previous '#endif' was after a #ifdef at
2289 				   start of file */
2290 				if (tok_flags & TOK_FLAG_ENDIF) {
2291 #ifdef INC_DEBUG
2292 					printf ("#endif %s\n", get_tok_str (file->ifndef_macro_saved, NULL));
2293 #endif
2294 					add_cached_include (s1, file->filename, file->ifndef_macro_saved);
2295 					tok_flags &= ~TOK_FLAG_ENDIF;
2296 				}
2297 
2298 				/* pop include stack */
2299 				tcc_close ();
2300 				s1->include_stack_ptr--;
2301 				p = file->buf_ptr;
2302 				goto redo_no_start;
2303 			}
2304 		}
2305 		break;
2306 
2307 	case '\n':
2308 		file->line_num++;
2309 		tok_flags |= TOK_FLAG_BOL;
2310 		p++;
2311 maybe_newline:
2312 		if (0 == (parse_flags & PARSE_FLAG_LINEFEED)) {
2313 			goto redo_no_start;
2314 		}
2315 		tok = TOK_LINEFEED;
2316 		goto keep_tok_flags;
2317 
2318 	case '#':
2319 		/* XXX: simplify */
2320 		PEEKC (c, p);
2321 		if ((tok_flags & TOK_FLAG_BOL) &&
2322 		    (parse_flags & PARSE_FLAG_PREPROCESS)) {
2323 			file->buf_ptr = p;
2324 			preprocess (tok_flags & TOK_FLAG_BOF);
2325 			p = file->buf_ptr;
2326 			goto maybe_newline;
2327 		} else {
2328 			if (c == '#') {
2329 				p++;
2330 				tok = TOK_TWOSHARPS;
2331 			} else {
2332 				if (parse_flags & PARSE_FLAG_ASM_COMMENTS) {
2333 					p = parse_line_comment (p - 1);
2334 					goto redo_no_start;
2335 				} else {
2336 					tok = '#';
2337 				}
2338 			}
2339 		}
2340 		break;
2341 
2342 	case 'a': case 'b': case 'c': case 'd':
2343 	case 'e': case 'f': case 'g': case 'h':
2344 	case 'i': case 'j': case 'k': case 'l':
2345 	case 'm': case 'n': case 'o': case 'p':
2346 	case 'q': case 'r': case 's': case 't':
2347 	case 'u': case 'v': case 'w': case 'x':
2348 	case 'y': case 'z':
2349 	case 'A': case 'B': case 'C': case 'D':
2350 	case 'E': case 'F': case 'G': case 'H':
2351 	case 'I': case 'J': case 'K':
2352 	case 'M': case 'N': case 'O': case 'P':
2353 	case 'Q': case 'R': case 'S': case 'T':
2354 	case 'U': case 'V': case 'W': case 'X':
2355 	case 'Y': case 'Z':
2356 	case '_': case '.':
2357 parse_ident_fast:
2358 		p1 = p;
2359 		h = TOK_HASH_INIT;
2360 		h = TOK_HASH_FUNC (h, c);
2361 		p++;
2362 		for (;;) {
2363 			c = *p;
2364 			if (!isidnum_table[*p - CH_EOF]) {
2365 				break;
2366 			}
2367 			// dot handling here too
2368 			if (isdot (c)) {
2369 				PEEKC (c, p);
2370 				if (isnum (c)) {
2371 					cstr_reset (&tokcstr);
2372 					cstr_ccat (&tokcstr, '.');
2373 					goto parse_num;
2374 				} else if (isdot (c)) {
2375 					goto parse_dots;
2376 				}
2377 			}
2378 			h = TOK_HASH_FUNC (h, *p);
2379 			p++;
2380 		}
2381 		if (c != '\\') {
2382 			TokenSym **pts;
2383 			int len;
2384 
2385 			/* fast case : no stray found, so we have the full token
2386 			   and we have already hashed it */
2387 			len = p - p1;
2388 			h &= (TOK_HASH_SIZE - 1);
2389 			pts = &hash_ident[h];
2390 			for (;;) {
2391 				ts = *pts;
2392 				if (!ts) {
2393 					break;
2394 				}
2395 				if (ts->len == len && !memcmp (ts->str, p1, len)) {
2396 					goto token_found;
2397 				}
2398 				pts = &(ts->hash_next);
2399 			}
2400 			ts = tok_alloc_new (pts, (const char *) p1, len);
2401 token_found:
2402 			;
2403 		} else {
2404 			/* slower case */
2405 			cstr_reset (&tokcstr);
2406 
2407 			while (p1 < p) {
2408 				cstr_ccat (&tokcstr, *p1);
2409 				p1++;
2410 			}
2411 			p--;
2412 			PEEKC (c, p);
2413 parse_ident_slow:
2414 			while (isidnum_table[((c > 255)? 255: c) - CH_EOF]) {
2415 				cstr_ccat (&tokcstr, c);
2416 				PEEKC (c, p);
2417 			}
2418 			ts = tok_alloc (tokcstr.data, tokcstr.size);
2419 		}
2420 		tok = ts->tok;
2421 		break;
2422 	case 'L':
2423 		t = p[1];
2424 		if (t != '\\' && t != '\'' && t != '\"') {
2425 			/* fast case */
2426 			goto parse_ident_fast;
2427 		} else {
2428 			PEEKC (c, p);
2429 			if (c == '\'' || c == '\"') {
2430 				is_long = 1;
2431 				goto str_const;
2432 			} else {
2433 				cstr_reset (&tokcstr);
2434 				cstr_ccat (&tokcstr, 'L');
2435 				goto parse_ident_slow;
2436 			}
2437 		}
2438 		break;
2439 	case '0': case '1': case '2': case '3':
2440 	case '4': case '5': case '6': case '7':
2441 	case '8': case '9':
2442 
2443 		cstr_reset (&tokcstr);
2444 		/* after the first digit, accept digits, alpha, '.' or sign if
2445 		   prefixed by 'eEpP' */
2446 parse_num:
2447 		for (;;) {
2448 			t = c;
2449 			cstr_ccat (&tokcstr, c);
2450 			PEEKC (c, p);
2451 			if (!(isnum (c) || isid (c) || isdot (c) ||
2452 			      ((c == '+' || c == '-') &&
2453 			       (t == 'e' || t == 'E' || t == 'p' || t == 'P')))) {
2454 				break;
2455 			}
2456 		}
2457 		/* We add a trailing '\0' to ease parsing */
2458 		cstr_ccat (&tokcstr, '\0');
2459 		tokc.cstr = &tokcstr;
2460 		tok = TOK_PPNUM;
2461 		break;
2462 		/* special dot handling because it can also start a number */
2463 parse_dots:
2464 		if (!isdot (c)) {
2465 			expect ("'.'");
2466 			return;
2467 		}
2468 		PEEKC (c, p);
2469 		tok = TOK_DOTS;
2470 		break;
2471 	case '\'':
2472 	case '\"':
2473 		is_long = 0;
2474 str_const:
2475 		{
2476 			CString str;
2477 			int sep;
2478 
2479 			sep = c;
2480 
2481 			/* parse the string */
2482 			cstr_new (&str);
2483 			p = parse_pp_string (p, sep, &str);
2484 			if (!p) {
2485 				return;
2486 			}
2487 			cstr_ccat (&str, '\0');
2488 
2489 			/* eval the escape (should be done as TOK_PPNUM) */
2490 			cstr_reset (&tokcstr);
2491 			parse_escape_string (&tokcstr, str.data, is_long);
2492 			cstr_free (&str);
2493 
2494 			if (sep == '\'') {
2495 				int char_size;
2496 				/* XXX: make it portable */
2497 				if (!is_long) {
2498 					char_size = 1;
2499 				} else {
2500 					char_size = sizeof(nwchar_t);
2501 				}
2502 				if (tokcstr.size <= char_size) {
2503 					tcc_error ("empty character constant");
2504 				}
2505 				if (tokcstr.size > 2 * char_size) {
2506 					tcc_warning ("multi-character character constant");
2507 				}
2508 				if (!is_long) {
2509 					tokc.i = *(int8_t *) tokcstr.data;
2510 					tok = TOK_CCHAR;
2511 				} else {
2512 					tokc.i = *(nwchar_t *) tokcstr.data;
2513 					tok = TOK_LCHAR;
2514 				}
2515 			} else {
2516 				tokc.cstr = &tokcstr;
2517 				if (!is_long) {
2518 					tok = TOK_STR;
2519 				} else {
2520 					tok = TOK_LSTR;
2521 				}
2522 			}
2523 		}
2524 		break;
2525 
2526 	case '<':
2527 		PEEKC (c, p);
2528 		if (c == '=') {
2529 			p++;
2530 			tok = TOK_LE;
2531 		} else if (c == '<') {
2532 			PEEKC (c, p);
2533 			if (c == '=') {
2534 				p++;
2535 				tok = TOK_A_SHL;
2536 			} else {
2537 				tok = TOK_SHL;
2538 			}
2539 		} else {
2540 			tok = TOK_LT;
2541 		}
2542 		break;
2543 
2544 	case '>':
2545 		PEEKC (c, p);
2546 		if (c == '=') {
2547 			p++;
2548 			tok = TOK_GE;
2549 		} else if (c == '>') {
2550 			PEEKC (c, p);
2551 			if (c == '=') {
2552 				p++;
2553 				tok = TOK_A_SAR;
2554 			} else {
2555 				tok = TOK_SAR;
2556 			}
2557 		} else {
2558 			tok = TOK_GT;
2559 		}
2560 		break;
2561 
2562 	case '&':
2563 		PEEKC (c, p);
2564 		if (c == '&') {
2565 			p++;
2566 			tok = TOK_LAND;
2567 		} else if (c == '=') {
2568 			p++;
2569 			tok = TOK_A_AND;
2570 		} else {
2571 			tok = '&';
2572 		}
2573 		break;
2574 
2575 	case '|':
2576 		PEEKC (c, p);
2577 		if (c == '|') {
2578 			p++;
2579 			tok = TOK_LOR;
2580 		} else if (c == '=') {
2581 			p++;
2582 			tok = TOK_A_OR;
2583 		} else {
2584 			tok = '|';
2585 		}
2586 		break;
2587 
2588 	case '+':
2589 		PEEKC (c, p);
2590 		if (c == '+') {
2591 			p++;
2592 			tok = TOK_INC;
2593 		} else if (c == '=') {
2594 			p++;
2595 			tok = TOK_A_ADD;
2596 		} else {
2597 			tok = '+';
2598 		}
2599 		break;
2600 
2601 	case '-':
2602 		PEEKC (c, p);
2603 		if (c == '-') {
2604 			p++;
2605 			tok = TOK_DEC;
2606 		} else if (c == '=') {
2607 			p++;
2608 			tok = TOK_A_SUB;
2609 		} else if (c == '>') {
2610 			p++;
2611 			tok = TOK_ARROW;
2612 		} else {
2613 			tok = '-';
2614 		}
2615 		break;
2616 
2617 		PARSE2 ('!', '!', '=', TOK_NE)
2618 		PARSE2 ('=', '=', '=', TOK_EQ)
2619 		PARSE2 ('*', '*', '=', TOK_A_MUL)
2620 		PARSE2 ('%', '%', '=', TOK_A_MOD)
2621 		PARSE2 ('^', '^', '=', TOK_A_XOR)
2622 
2623 	/* comments or operator */
2624 	case '/':
2625 		PEEKC (c, p);
2626 		if (c == '*') {
2627 			p = parse_comment (p);
2628 			/* comments replaced by a blank */
2629 			tok = ' ';
2630 			goto keep_tok_flags;
2631 		} else if (c == '/') {
2632 			p = parse_line_comment (p);
2633 			tok = ' ';
2634 			goto keep_tok_flags;
2635 		} else if (c == '=') {
2636 			p++;
2637 			tok = TOK_A_DIV;
2638 		} else {
2639 			tok = '/';
2640 		}
2641 		break;
2642 
2643 	/* simple tokens */
2644 	case '(':
2645 	case ')':
2646 	case '[':
2647 	case ']':
2648 	case '{':
2649 	case '}':
2650 	case ',':
2651 	case ';':
2652 	case ':':
2653 	case '?':
2654 	case '~':
2655 	case '$':	/* only used in assembler */
2656 	case '@':	/* dito */
2657 		tok = c;
2658 		p++;
2659 		break;
2660 	default:
2661 		tcc_error ("unrecognized character \\x%02x", c);
2662 		break;
2663 	}
2664 	tok_flags = 0;
2665 keep_tok_flags:
2666 	file->buf_ptr = p;
2667 #if defined(PARSE_DEBUG)
2668 	printf ("token = %s\n", get_tok_str (tok, &tokc));
2669 #endif
2670 }
2671 
2672 /* return next token without macro substitution. Can read input from
2673    macro_ptr buffer */
next_nomacro_spc(void)2674 static void next_nomacro_spc(void)
2675 {
2676 	if (macro_ptr) {
2677 redo:
2678 		tok = *macro_ptr;
2679 		if (tok) {
2680 			TOK_GET (&tok, &macro_ptr, &tokc);
2681 			if (tok == TOK_LINENUM) {
2682 				file->line_num = tokc.i;
2683 				goto redo;
2684 			}
2685 		}
2686 	} else {
2687 		next_nomacro1 ();
2688 	}
2689 }
2690 
next_nomacro(void)2691 ST_FUNC void next_nomacro(void)
2692 {
2693 	do {
2694 		next_nomacro_spc ();
2695 	} while (tcc_nerr () == 0 && is_space (tok));
2696 }
2697 
2698 /* substitute args in macro_str and return allocated string */
macro_arg_subst(Sym ** nested_list,const int * macro_str,Sym * args)2699 static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args)
2700 {
2701 	int last_tok, t, spc;
2702 	const int *st;
2703 	Sym *s;
2704 	CValue cval;
2705 	TokenString str;
2706 	CString cstr;
2707 
2708 	tok_str_new (&str);
2709 	last_tok = 0;
2710 	while (tcc_nerr () == 0) {
2711 		TOK_GET (&t, &macro_str, &cval);
2712 		if (!t) {
2713 			break;
2714 		}
2715 		if (t == '#') {
2716 			/* stringize */
2717 			TOK_GET (&t, &macro_str, &cval);
2718 			if (!t) {
2719 				break;
2720 			}
2721 			s = sym_find2 (args, t);
2722 			if (s) {
2723 				cstr_new (&cstr);
2724 				st = s->d;
2725 				spc = 0;
2726 				while (*st) {
2727 					TOK_GET (&t, &st, &cval);
2728 					if (!check_space (t, &spc)) {
2729 						cstr_cat (&cstr, get_tok_str (t, &cval));
2730 					}
2731 				}
2732 				cstr.size -= spc;
2733 				cstr_ccat (&cstr, '\0');
2734 #ifdef PP_DEBUG
2735 				printf ("stringize: %s\n", (char *) cstr.data);
2736 #endif
2737 				/* add string */
2738 				cval.cstr = &cstr;
2739 				tok_str_add2 (&str, TOK_STR, &cval);
2740 				cstr_free (&cstr);
2741 			} else {
2742 				tok_str_add2 (&str, t, &cval);
2743 			}
2744 		} else if (t >= TOK_IDENT) {
2745 			s = sym_find2 (args, t);
2746 			if (s) {
2747 				st = s->d;
2748 				/* if '##' is present before or after, no arg substitution */
2749 				if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) {
2750 					/* special case for var arg macros : ## eats the
2751 					   ',' if empty VA_ARGS variable. */
2752 					/* XXX: test of the ',' is not 100%
2753 					   reliable. should fix it to avoid security
2754 					   problems */
2755 					if (gnu_ext && s->type.t &&
2756 					    last_tok == TOK_TWOSHARPS &&
2757 					    str.len >= 2 && str.str[str.len - 2] == ',') {
2758 						if (*st == 0) {
2759 							/* suppress ',' '##' */
2760 							str.len -= 2;
2761 						} else {
2762 							/* suppress '##' and add variable */
2763 							str.len--;
2764 							goto add_var;
2765 						}
2766 					} else {
2767 						int t1;
2768 add_var:
2769 						for (;;) {
2770 							TOK_GET (&t1, &st, &cval);
2771 							if (!t1) {
2772 								break;
2773 							}
2774 							tok_str_add2 (&str, t1, &cval);
2775 						}
2776 					}
2777 				} else {
2778 					/* NOTE: the stream cannot be read when macro
2779 					   substituing an argument */
2780 					macro_subst (&str, nested_list, st, NULL);
2781 				}
2782 			} else {
2783 				tok_str_add (&str, t);
2784 			}
2785 		} else {
2786 			tok_str_add2 (&str, t, &cval);
2787 		}
2788 		last_tok = t;
2789 	}
2790 	tok_str_add (&str, 0);
2791 	return str.str;
2792 }
2793 
2794 static char const ab_month_name[12][4] =
2795 {
2796 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
2797 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
2798 };
2799 
2800 /* do macro substitution of current token with macro 's' and add
2801    result to (tok_str,tok_len). 'nested_list' is the list of all
2802    macros we got inside to avoid recursing. Return non zero if no
2803    substitution needs to be done */
macro_subst_tok(TokenString * tok_str,Sym ** nested_list,Sym * s,struct macro_level ** can_read_stream)2804 static int macro_subst_tok(TokenString *tok_str,
2805 			   Sym **nested_list, Sym *s, struct macro_level **can_read_stream)
2806 {
2807 	Sym *args, *sa, *sa1;
2808 	int mstr_allocated, parlevel, *mstr, t, t1, spc;
2809 	const int *p;
2810 	TokenString str;
2811 	char *cstrval;
2812 	CValue cval;
2813 	CString cstr;
2814 	char buf[32];
2815 
2816 	/* if symbol is a macro, prepare substitution */
2817 	/* special macros */
2818 	if (tok == TOK___LINE__) {
2819 		snprintf (buf, sizeof(buf), "%d", file->line_num);
2820 		cstrval = buf;
2821 		t1 = TOK_PPNUM;
2822 		goto add_cstr1;
2823 	} else if (tok == TOK___FILE__) {
2824 		cstrval = file->filename;
2825 		goto add_cstr;
2826 	} else if (tok == TOK___DATE__ || tok == TOK___TIME__) {
2827 		time_t ti;
2828 		struct tm *tm;
2829 
2830 		time (&ti);
2831 		tm = localtime (&ti);
2832 		if (tok == TOK___DATE__) {
2833 			snprintf (buf, sizeof(buf), "%s %2d %d",
2834 				ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900);
2835 		} else {
2836 			snprintf (buf, sizeof(buf), "%02d:%02d:%02d",
2837 				tm->tm_hour, tm->tm_min, tm->tm_sec);
2838 		}
2839 		cstrval = buf;
2840 add_cstr:
2841 		t1 = TOK_STR;
2842 add_cstr1:
2843 		cstr_new (&cstr);
2844 		cstr_cat (&cstr, cstrval);
2845 		cstr_ccat (&cstr, '\0');
2846 		cval.cstr = &cstr;
2847 		tok_str_add2 (tok_str, t1, &cval);
2848 		cstr_free (&cstr);
2849 	} else {
2850 		mstr = s->d;
2851 		mstr_allocated = 0;
2852 		if (s->type.t == MACRO_FUNC) {
2853 			/* NOTE: we do not use next_nomacro to avoid eating the
2854 			   next token. XXX: find better solution */
2855 redo:
2856 			if (macro_ptr) {
2857 				p = macro_ptr;
2858 				while (is_space (t = *p) || TOK_LINEFEED == t)
2859 					++p;
2860 				if (t == 0 && can_read_stream) {
2861 					/* end of macro stream: we must look at the token
2862 					   after in the file */
2863 					struct macro_level *ml = *can_read_stream;
2864 					macro_ptr = NULL;
2865 					if (ml) {
2866 						macro_ptr = ml->p;
2867 						ml->p = NULL;
2868 						*can_read_stream = ml->prev;
2869 					}
2870 					/* also, end of scope for nested defined symbol */
2871 					(*nested_list)->v = -1;
2872 					goto redo;
2873 				}
2874 			} else {
2875 				ch = file->buf_ptr[0];
2876 				while (is_space (ch) || ch == '\n' || ch == '/') {
2877 					if (ch == '/') {
2878 						int c;
2879 						uint8_t *p = file->buf_ptr;
2880 						PEEKC (c, p);
2881 						if (c == '*') {
2882 							p = parse_comment (p);
2883 							file->buf_ptr = p - 1;
2884 						} else if (c == '/') {
2885 							p = parse_line_comment (p);
2886 							file->buf_ptr = p - 1;
2887 						} else {
2888 							break;
2889 						}
2890 					}
2891 					cinp ();
2892 				}
2893 				t = ch;
2894 			}
2895 			if (t != '(') {	/* no macro subst */
2896 				return -1;
2897 			}
2898 
2899 			/* argument macro */
2900 			next_nomacro ();
2901 			next_nomacro ();
2902 			args = NULL;
2903 			sa = s->next;
2904 			/* NOTE: empty args are allowed, except if no args */
2905 			while (tcc_nerr () == 0) {
2906 				/* handle '()' case */
2907 				if (!args && !sa && tok == ')') {
2908 					break;
2909 				}
2910 				if (!sa) {
2911 					tcc_error ("macro '%s' used with too many args",
2912 						get_tok_str (s->v, 0));
2913 				}
2914 				tok_str_new (&str);
2915 				parlevel = spc = 0;
2916 				/* NOTE: non zero sa->t indicates VA_ARGS */
2917 				while ((parlevel > 0 ||
2918 					(tok != ')' &&
2919 					 (tok != ',' || (sa && sa->type.t)))) &&
2920 				       tok != -1) {
2921 					if (tok == '(') {
2922 						parlevel++;
2923 					} else if (tok == ')') {
2924 						parlevel--;
2925 					}
2926 					if (tok == TOK_LINEFEED) {
2927 						tok = ' ';
2928 					}
2929 					if (!check_space (tok, &spc)) {
2930 						tok_str_add2 (&str, tok, &tokc);
2931 					}
2932 					next_nomacro_spc ();
2933 				}
2934 				str.len -= spc;
2935 				tok_str_add (&str, 0);
2936 				sa1 = sa ? sym_push2 (&args, sa->v & ~SYM_FIELD, sa->type.t, 0) : NULL;
2937 				if (!sa1) {
2938 					return -1;
2939 				}
2940 				sa1->d = str.str;
2941 				sa = sa->next;
2942 				if (tok == ')') {
2943 					/* special case for gcc var args: add an empty
2944 					   var arg argument if it is omitted */
2945 					if (sa && sa->type.t && gnu_ext) {
2946 						continue;
2947 					} else {
2948 						break;
2949 					}
2950 				}
2951 				if (tok != ',') {
2952 					expect (",");
2953 					return 1;
2954 				}
2955 				next_nomacro ();
2956 			}
2957 			if (sa) {
2958 				tcc_error ("macro '%s' used with too few args",
2959 					get_tok_str (s->v, 0));
2960 			}
2961 
2962 			/* now subst each arg */
2963 			mstr = macro_arg_subst (nested_list, mstr, args);
2964 			/* free memory */
2965 			sa = args;
2966 			while (sa) {
2967 				sa1 = sa->prev;
2968 				tok_str_free (sa->d);
2969 				sym_free (sa);
2970 				sa = sa1;
2971 			}
2972 			mstr_allocated = 1;
2973 		}
2974 		if (sym_push2 (nested_list, s->v, 0, 0) == 0) {
2975 			return -1;
2976 		}
2977 		macro_subst (tok_str, nested_list, mstr, can_read_stream);
2978 		/* pop nested defined symbol */
2979 		sa1 = *nested_list;
2980 		*nested_list = sa1->prev;
2981 		sym_free (sa1);
2982 		if (mstr_allocated) {
2983 			tok_str_free (mstr);
2984 		}
2985 	}
2986 	return 0;
2987 }
2988 
2989 /* handle the '##' operator. Return NULL if no '##' seen. Otherwise
2990    return the resulting string (which must be freed). */
macro_twosharps(const int * macro_str)2991 static inline int *macro_twosharps(const int *macro_str)
2992 {
2993 	const int *ptr;
2994 	int t;
2995 	TokenString macro_str1;
2996 	CString cstr;
2997 	int n, start_of_nosubsts;
2998 
2999 	/* we search the first '##' */
3000 	for (ptr = macro_str;;) {
3001 		CValue cval;
3002 		TOK_GET (&t, &ptr, &cval);
3003 		if (t == TOK_TWOSHARPS) {
3004 			break;
3005 		}
3006 		/* nothing more to do if end of string */
3007 		if (t == 0) {
3008 			return NULL;
3009 		}
3010 	}
3011 
3012 	/* we saw '##', so we need more processing to handle it */
3013 	start_of_nosubsts = -1;
3014 	tok_str_new (&macro_str1);
3015 	for (ptr = macro_str;;) {
3016 		TOK_GET (&tok, &ptr, &tokc);
3017 		if (tok == 0) {
3018 			break;
3019 		}
3020 		if (tok == TOK_TWOSHARPS) {
3021 			continue;
3022 		}
3023 		if (tok == TOK_NOSUBST && start_of_nosubsts < 0) {
3024 			start_of_nosubsts = macro_str1.len;
3025 		}
3026 		while (*ptr == TOK_TWOSHARPS) {
3027 			/* given 'a##b', remove nosubsts preceding 'a' */
3028 			if (start_of_nosubsts >= 0) {
3029 				macro_str1.len = start_of_nosubsts;
3030 			}
3031 			/* given 'a##b', skip '##' */
3032 			t = *++ptr;
3033 			/* given 'a##b', remove nosubsts preceding 'b' */
3034 			while (t == TOK_NOSUBST)
3035 				t = *++ptr;
3036 			if (t && t != TOK_TWOSHARPS) {
3037 				CValue cval;
3038 				TOK_GET (&t, &ptr, &cval);
3039 				/* We concatenate the two tokens */
3040 				cstr_new (&cstr);
3041 				cstr_cat (&cstr, get_tok_str (tok, &tokc));
3042 				n = cstr.size;
3043 				cstr_cat (&cstr, get_tok_str (t, &cval));
3044 				cstr_ccat (&cstr, '\0');
3045 
3046 				tcc_open_bf (tcc_state, ":paste:", cstr.size);
3047 				memcpy (file->buffer, cstr.data, cstr.size);
3048 				while (tcc_nerr () == 0) {
3049 					next_nomacro1 ();
3050 					if (0 == *file->buf_ptr) {
3051 						break;
3052 					}
3053 					tok_str_add2 (&macro_str1, tok, &tokc);
3054 					tcc_warning ("pasting \"%.*s\" and \"%s\" does not give a valid preprocessing token",
3055 						n, (char *) cstr.data, (char *) cstr.data + n);
3056 				}
3057 				tcc_close ();
3058 				cstr_free (&cstr);
3059 			}
3060 		}
3061 		if (tok != TOK_NOSUBST) {
3062 			start_of_nosubsts = -1;
3063 		}
3064 		tok_str_add2 (&macro_str1, tok, &tokc);
3065 	}
3066 	tok_str_add (&macro_str1, 0);
3067 	return macro_str1.str;
3068 }
3069 
3070 
3071 /* do macro substitution of macro_str and add result to
3072    (tok_str,tok_len). 'nested_list' is the list of all macros we got
3073    inside to avoid recursing. */
macro_subst(TokenString * tok_str,Sym ** nested_list,const int * macro_str,struct macro_level ** can_read_stream)3074 static void macro_subst(TokenString *tok_str, Sym **nested_list,
3075 			const int *macro_str, struct macro_level **can_read_stream)
3076 {
3077 	Sym *s;
3078 	int *macro_str1;
3079 	const int *ptr;
3080 	int t, ret, spc;
3081 	CValue cval;
3082 	struct macro_level ml;
3083 	int force_blank;
3084 
3085 	/* first scan for '##' operator handling */
3086 	ptr = macro_str;
3087 	macro_str1 = macro_twosharps (ptr);
3088 
3089 	if (macro_str1) {
3090 		ptr = macro_str1;
3091 	}
3092 	spc = 0;
3093 	force_blank = 0;
3094 
3095 	while (tcc_nerr () == 0) {
3096 		/* NOTE: ptr == NULL can only happen if tokens are read from
3097 		   file stream due to a macro function call */
3098 		if (ptr == NULL) {
3099 			break;
3100 		}
3101 		TOK_GET (&t, &ptr, &cval);
3102 		if (t == 0) {
3103 			break;
3104 		}
3105 		if (t == TOK_NOSUBST) {
3106 			/* following token has already been subst'd. just copy it on */
3107 			tok_str_add2 (tok_str, TOK_NOSUBST, NULL);
3108 			TOK_GET (&t, &ptr, &cval);
3109 			goto no_subst;
3110 		}
3111 		s = define_find (t);
3112 		if (s != NULL) {
3113 			/* if nested substitution, do nothing */
3114 			if (sym_find2 (*nested_list, t)) {
3115 				/* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */
3116 				tok_str_add2 (tok_str, TOK_NOSUBST, NULL);
3117 				goto no_subst;
3118 			}
3119 			ml.p = macro_ptr;
3120 			if (can_read_stream) {
3121 				ml.prev = *can_read_stream, *can_read_stream = &ml;
3122 			}
3123 			macro_ptr = (int *) ptr;
3124 			tok = t;
3125 			ret = macro_subst_tok (tok_str, nested_list, s, can_read_stream);
3126 			ptr = (int *) macro_ptr;
3127 			macro_ptr = ml.p;
3128 			if (can_read_stream && *can_read_stream == &ml) {
3129 				*can_read_stream = ml.prev;
3130 			}
3131 			if (ret != 0) {
3132 				goto no_subst;
3133 			}
3134 			if (parse_flags & PARSE_FLAG_SPACES) {
3135 				force_blank = 1;
3136 			}
3137 		} else {
3138 no_subst:
3139 			if (force_blank) {
3140 				tok_str_add (tok_str, ' ');
3141 				spc = 1;
3142 				force_blank = 0;
3143 			}
3144 			if (!check_space (t, &spc)) {
3145 				tok_str_add2 (tok_str, t, &cval);
3146 			}
3147 		}
3148 	}
3149 	if (macro_str1) {
3150 		tok_str_free (macro_str1);
3151 	}
3152 }
3153 
3154 /* return next token with macro substitution */
next(void)3155 ST_FUNC void next(void)
3156 {
3157 	Sym *nested_list, *s;
3158 	TokenString str;
3159 	struct macro_level *ml;
3160 
3161 redo:
3162 	if (parse_flags & PARSE_FLAG_SPACES) {
3163 		next_nomacro_spc ();
3164 	} else {
3165 		next_nomacro ();
3166 	}
3167 	if (!macro_ptr) {
3168 		/* if not reading from macro substituted string, then try
3169 		   to substitute macros */
3170 		if (tok >= TOK_IDENT &&
3171 		    (parse_flags & PARSE_FLAG_PREPROCESS)) {
3172 			s = define_find (tok);
3173 			if (s) {
3174 				/* we have a macro: we try to substitute */
3175 				tok_str_new (&str);
3176 				nested_list = NULL;
3177 				ml = NULL;
3178 				if (macro_subst_tok (&str, &nested_list, s, &ml) == 0) {
3179 					/* substitution done, NOTE: maybe empty */
3180 					tok_str_add (&str, 0);
3181 					macro_ptr = str.str;
3182 					macro_ptr_allocated = str.str;
3183 					goto redo;
3184 				}
3185 			}
3186 		}
3187 	} else {
3188 		if (tok == 0) {
3189 			/* end of macro or end of unget buffer */
3190 			if (unget_buffer_enabled) {
3191 				macro_ptr = unget_saved_macro_ptr;
3192 				unget_buffer_enabled = 0;
3193 			} else {
3194 				/* end of macro string: free it */
3195 				tok_str_free (macro_ptr_allocated);
3196 				macro_ptr_allocated = NULL;
3197 				macro_ptr = NULL;
3198 			}
3199 			goto redo;
3200 		} else if (tok == TOK_NOSUBST) {
3201 			/* discard preprocessor's nosubst markers */
3202 			goto redo;
3203 		}
3204 	}
3205 
3206 	/* convert preprocessor tokens into C tokens */
3207 	if (tok == TOK_PPNUM &&
3208 	    (parse_flags & PARSE_FLAG_TOK_NUM)) {
3209 		parse_number ((char *) tokc.cstr->data);
3210 	}
3211 }
3212 
3213 /* push back current token and set current token to 'last_tok'. Only
3214    identifier case handled for labels. */
unget_tok(int last_tok)3215 ST_INLN void unget_tok(int last_tok)
3216 {
3217 	int i, n;
3218 	int *q;
3219 	if (unget_buffer_enabled) {
3220 		/* assert(macro_ptr == unget_saved_buffer + 1);
3221 		   assert(*macro_ptr == 0);  */
3222 	} else {
3223 		unget_saved_macro_ptr = macro_ptr;
3224 		unget_buffer_enabled = 1;
3225 	}
3226 	q = unget_saved_buffer;
3227 	macro_ptr = q;
3228 	*q++ = tok;
3229 	n = tok_ext_size (tok) - 1;
3230 	for (i = 0; i < n; i++) {
3231 		*q++ = tokc.tab[i];
3232 	}
3233 	*q = 0;	/* end of token string */
3234 	tok = last_tok;
3235 }
3236 
3237 
3238 /* better than nothing, but needs extension to handle '-E' option
3239    correctly too */
preprocess_init(TCCState * s1)3240 ST_FUNC void preprocess_init(TCCState *s1)
3241 {
3242 	s1->include_stack_ptr = s1->include_stack;
3243 	/* XXX: move that before to avoid having to initialize
3244 	   file->ifdef_stack_ptr ? */
3245 	s1->ifdef_stack_ptr = s1->ifdef_stack;
3246 	file->ifdef_stack_ptr = s1->ifdef_stack_ptr;
3247 
3248 	vtop = vstack - 1;
3249 	s1->pack_stack[0] = 0;
3250 	s1->pack_stack_ptr = s1->pack_stack;
3251 }
3252 
preprocess_new(void)3253 ST_FUNC void preprocess_new(void)
3254 {
3255 	int i, c;
3256 	const char *p, *r;
3257 
3258 	/* init isid table */
3259 	for (i = CH_EOF; i < 256; i++) {
3260 		isidnum_table[i - CH_EOF] = isid (i) || isnum (i) || isdot (i);
3261 	}
3262 
3263 	/* add all tokens */
3264 	table_ident = NULL;
3265 	memset (hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *));
3266 
3267 	tok_ident = TOK_IDENT;
3268 	p = tcc_keywords;
3269 	while (*p) {
3270 		r = p;
3271 		for (;;) {
3272 			c = *r++;
3273 			if (c == '\0') {
3274 				break;
3275 			}
3276 		}
3277 		tok_alloc (p, r - p - 1);
3278 		p = r;
3279 	}
3280 }
3281 
3282 /* Preprocess the current file */
tcc_preprocess(TCCState * s1)3283 ST_FUNC int tcc_preprocess(TCCState *s1)
3284 {
3285 	Sym *define_start;
3286 
3287 	BufferedFile *file_ref, **iptr, **iptr_new;
3288 	int token_seen, line_ref, d;
3289 	const char *s;
3290 
3291 	preprocess_init (s1);
3292 	define_start = define_stack;
3293 	ch = file->buf_ptr[0];
3294 	tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF;
3295 	parse_flags = PARSE_FLAG_ASM_COMMENTS | PARSE_FLAG_PREPROCESS |
3296 		      PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES;
3297 	token_seen = 0;
3298 	line_ref = 0;
3299 	file_ref = NULL;
3300 	iptr = s1->include_stack_ptr;
3301 
3302 	while (tcc_nerr () == 0) {
3303 		next ();
3304 		if (tok == TOK_EOF) {
3305 			break;
3306 		} else if (file != file_ref) {
3307 			goto print_line;
3308 		} else if (tok == TOK_LINEFEED) {
3309 			if (!token_seen) {
3310 				continue;
3311 			}
3312 			++line_ref;
3313 			token_seen = 0;
3314 		} else if (!token_seen) {
3315 			d = file->line_num - line_ref;
3316 			if (file != file_ref || d < 0 || d >= 8) {
3317 print_line:
3318 				iptr_new = s1->include_stack_ptr;
3319 				s = iptr_new > iptr? " 1"
3320 				    : iptr_new < iptr? " 2"
3321 				    : iptr_new > s1->include_stack? " 3"
3322 				    : ""
3323 				;
3324 				iptr = iptr_new;
3325 				fprintf (s1->ppfp, "# %d \"%s\"%s\n", file->line_num, file->filename, s);
3326 			} else {
3327 				while (d)
3328 					fputs ("\n", s1->ppfp), --d;
3329 			}
3330 			line_ref = (file_ref = file)->line_num;
3331 			token_seen = tok != TOK_LINEFEED;
3332 			if (!token_seen) {
3333 				continue;
3334 			}
3335 		}
3336 		fputs (get_tok_str (tok, &tokc), s1->ppfp);
3337 	}
3338 	free_defines (define_start);
3339 	return 0;
3340 }
3341