xref: /netbsd/usr.bin/xlint/lint1/lex.c (revision 6172b0cb)
1 /* $NetBSD: lex.c,v 1.188 2023/07/15 13:35:24 rillig Exp $ */
2 
3 /*
4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5  * Copyright (c) 1994, 1995 Jochen Pohl
6  * All Rights Reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by Jochen Pohl for
19  *	The NetBSD Project.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
38 
39 #include <sys/cdefs.h>
40 #if defined(__RCSID)
41 __RCSID("$NetBSD: lex.c,v 1.188 2023/07/15 13:35:24 rillig Exp $");
42 #endif
43 
44 #include <ctype.h>
45 #include <errno.h>
46 #include <float.h>
47 #include <limits.h>
48 #include <math.h>
49 #include <stdlib.h>
50 #include <string.h>
51 
52 #include "lint1.h"
53 #include "cgram.h"
54 
55 #define CHAR_MASK	((1U << CHAR_SIZE) - 1)
56 
57 
58 /* Current position (it's also updated when an included file is parsed) */
59 pos_t	curr_pos = { "", 1, 0 };
60 
61 /*
62  * Current position in C source (not updated when an included file is
63  * parsed).
64  */
65 pos_t	csrc_pos = { "", 1, 0 };
66 
67 bool in_gcc_attribute;
68 bool in_system_header;
69 
70 /*
71  * Valid values for 'since' are 78, 90, 99, 11, 23.
72  *
73  * The C11 keywords are all taken from the reserved namespace.  They are added
74  * in C99 mode as well, to make the parse error messages more useful.  For
75  * example, if the keyword '_Generic' were not defined, it would be interpreted
76  * as an implicit function call, leading to a parse error.
77  *
78  * The C23 keywords are not made available in earlier modes, as they may
79  * conflict with user-defined identifiers.
80  */
81 #define kwdef(name, token, detail,	since, gcc, deco) \
82 	{ /* CONSTCOND */ \
83 		name, token, detail, \
84 		(since) == 90, \
85 		(since) == 99 || (since) == 11, \
86 		(since) == 23, \
87 		(gcc) > 0, \
88 		((deco) & 1) != 0, ((deco) & 2) != 0, ((deco) & 4) != 0, \
89 	}
90 #define kwdef_token(name, token,		since, gcc, deco) \
91 	kwdef(name, token, {false},		since, gcc, deco)
92 #define kwdef_sclass(name, sclass,		since, gcc, deco) \
93 	kwdef(name, T_SCLASS, .u.kw_scl = (sclass), since, gcc, deco)
94 #define kwdef_type(name, tspec,			since) \
95 	kwdef(name, T_TYPE, .u.kw_tspec = (tspec), since, 0, 1)
96 #define kwdef_tqual(name, tqual,		since, gcc, deco) \
97 	kwdef(name, T_QUAL, .u.kw_tqual = {.tqual = true}, since, gcc, deco)
98 #define kwdef_keyword(name, token) \
99 	kwdef(name, token, {false},		78, 0, 1)
100 
101 /* During initialization, these keywords are written to the symbol table. */
102 static const struct keyword {
103 	const	char kw_name[20];
104 	int	kw_token;	/* token to be returned by yylex() */
105 	union {
106 		bool kw_dummy;
107 		scl_t kw_scl;		/* if kw_token is T_SCLASS */
108 		tspec_t kw_tspec;	/* if kw_token is T_TYPE or
109 					 * T_STRUCT_OR_UNION */
110 		type_qualifiers kw_tqual;	/* if kw_token is T_QUAL */
111 		function_specifier kw_fs;	/* if kw_token is
112 						 * T_FUNCTION_SPECIFIER */
113 	} u;
114 	bool	kw_added_in_c90:1;
115 	bool	kw_added_in_c99_or_c11:1;
116 	bool	kw_added_in_c23:1;
117 	bool	kw_gcc:1;	/* available in GCC mode */
118 	bool	kw_plain:1;	/* 'name' */
119 	bool	kw_leading:1;	/* '__name' */
120 	bool	kw_both:1;	/* '__name__' */
121 } keywords[] = {
122 	// TODO: _Alignas is not available in C99.
123 	kwdef_keyword(	"_Alignas",	T_ALIGNAS),
124 	// TODO: _Alignof is not available in C99.
125 	kwdef_keyword(	"_Alignof",	T_ALIGNOF),
126 	// TODO: alignof is not available in C99.
127 	kwdef_token(	"alignof",	T_ALIGNOF,		78,0,6),
128 	kwdef_token(	"asm",		T_ASM,			78,1,7),
129 	kwdef_token(	"_Atomic",	T_ATOMIC,		11,0,1),
130 	kwdef_token(	"attribute",	T_ATTRIBUTE,		78,1,6),
131 	kwdef_sclass(	"auto",		AUTO,			78,0,1),
132 	kwdef_type(	"_Bool",	BOOL,			99),
133 	kwdef_keyword(	"break",	T_BREAK),
134 	kwdef_token(	"__builtin_offsetof", T_BUILTIN_OFFSETOF, 78,1,1),
135 	kwdef_keyword(	"case",		T_CASE),
136 	kwdef_type(	"char",		CHAR,			78),
137 	kwdef_type(	"_Complex",	COMPLEX,		99),
138 	kwdef_tqual(	"const",	tq_const,		90,0,7),
139 	kwdef_keyword(	"continue",	T_CONTINUE),
140 	kwdef_keyword(	"default",	T_DEFAULT),
141 	kwdef_keyword(	"do",		T_DO),
142 	kwdef_type(	"double",	DOUBLE,			78),
143 	kwdef_keyword(	"else",		T_ELSE),
144 	// XXX: enum is not available in traditional C.
145 	kwdef_keyword(	"enum",		T_ENUM),
146 	kwdef_token(	"__extension__",T_EXTENSION,		78,1,1),
147 	kwdef_sclass(	"extern",	EXTERN,			78,0,1),
148 	kwdef_type(	"float",	FLOAT,			78),
149 	kwdef_keyword(	"for",		T_FOR),
150 	kwdef_token(	"_Generic",	T_GENERIC,		11,0,1),
151 	kwdef_keyword(	"goto",		T_GOTO),
152 	kwdef_keyword(	"if",		T_IF),
153 	kwdef_token(	"__imag__",	T_IMAG,			78,1,1),
154 	kwdef("inline",	T_FUNCTION_SPECIFIER, .u.kw_fs = FS_INLINE, 99,0,7),
155 	kwdef_type(	"int",		INT,			78),
156 #ifdef INT128_SIZE
157 	kwdef_type(	"__int128_t",	INT128,			99),
158 #endif
159 	kwdef_type(	"long",		LONG,			78),
160 	kwdef("_Noreturn", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_NORETURN, 11,0,1),
161 	// XXX: __packed is GCC-specific.
162 	kwdef_token(	"__packed",	T_PACKED,		78,0,1),
163 	kwdef_token(	"__real__",	T_REAL,			78,1,1),
164 	kwdef_sclass(	"register",	REG,			78,0,1),
165 	kwdef_tqual(	"restrict",	tq_restrict,		99,0,7),
166 	kwdef_keyword(	"return",	T_RETURN),
167 	kwdef_type(	"short",	SHORT,			78),
168 	kwdef(		"signed", T_TYPE, .u.kw_tspec = SIGNED,	90,0,3),
169 	kwdef_keyword(	"sizeof",	T_SIZEOF),
170 	kwdef_sclass(	"static",	STATIC,			78,0,1),
171 	// XXX: _Static_assert was added in C11.
172 	kwdef_keyword(	"_Static_assert",	T_STATIC_ASSERT),
173 	kwdef("struct",	T_STRUCT_OR_UNION, .u.kw_tspec = STRUCT, 78,0,1),
174 	kwdef_keyword(	"switch",	T_SWITCH),
175 	kwdef_token(	"__symbolrename",	T_SYMBOLRENAME,	78,0,1),
176 	kwdef_sclass(	"__thread",	THREAD_LOCAL,		78,1,1),
177 	kwdef_sclass(	"_Thread_local", THREAD_LOCAL,		11,0,1),
178 	kwdef_sclass(	"thread_local", THREAD_LOCAL,		23,0,1),
179 	kwdef_sclass(	"typedef",	TYPEDEF,		78,0,1),
180 	kwdef_token(	"typeof",	T_TYPEOF,		78,1,7),
181 #ifdef INT128_SIZE
182 	kwdef_type(	"__uint128_t",	UINT128,		99),
183 #endif
184 	kwdef("union",	T_STRUCT_OR_UNION, .u.kw_tspec = UNION,	78,0,1),
185 	kwdef_type(	"unsigned",	UNSIGN,			78),
186 	// XXX: void is not available in traditional C.
187 	kwdef_type(	"void",		VOID,			78),
188 	kwdef_tqual(	"volatile",	tq_volatile,		90,0,7),
189 	kwdef_keyword(	"while",	T_WHILE),
190 #undef kwdef
191 #undef kwdef_token
192 #undef kwdef_sclass
193 #undef kwdef_type
194 #undef kwdef_tqual
195 #undef kwdef_keyword
196 };
197 
198 /*
199  * The symbol table containing all keywords, identifiers and labels. The hash
200  * entries are linked via sym_t.s_symtab_next.
201  */
202 static sym_t *symtab[503];
203 
204 /*
205  * The kind of the next expected symbol, to distinguish the namespaces of
206  * members, labels, type tags and other identifiers.
207  */
208 symt_t symtyp;
209 
210 
211 static unsigned int
hash(const char * s)212 hash(const char *s)
213 {
214 	unsigned int v;
215 	const char *p;
216 
217 	v = 0;
218 	for (p = s; *p != '\0'; p++) {
219 		v = (v << 4) + (unsigned char)*p;
220 		v ^= v >> 28;
221 	}
222 	return v % (sizeof(symtab) / sizeof(symtab[0]));
223 }
224 
225 static void
symtab_add(sym_t * sym)226 symtab_add(sym_t *sym)
227 {
228 	unsigned int h;
229 
230 	h = hash(sym->s_name);
231 	if ((sym->s_symtab_next = symtab[h]) != NULL)
232 		symtab[h]->s_symtab_ref = &sym->s_symtab_next;
233 	sym->s_symtab_ref = &symtab[h];
234 	symtab[h] = sym;
235 }
236 
237 static sym_t *
symtab_search(const char * name)238 symtab_search(const char *name)
239 {
240 
241 	unsigned int h = hash(name);
242 	for (sym_t *sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) {
243 		if (strcmp(sym->s_name, name) != 0)
244 			continue;
245 		if (sym->s_keyword != NULL ||
246 		    sym->s_kind == symtyp ||
247 		    in_gcc_attribute)
248 			return sym;
249 	}
250 
251 	return NULL;
252 }
253 
254 static void
symtab_remove(sym_t * sym)255 symtab_remove(sym_t *sym)
256 {
257 
258 	if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL)
259 		sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref;
260 	sym->s_symtab_next = NULL;
261 }
262 
263 static void
symtab_remove_locals(void)264 symtab_remove_locals(void)
265 {
266 
267 	for (size_t i = 0; i < sizeof(symtab) / sizeof(symtab[0]); i++) {
268 		for (sym_t *sym = symtab[i]; sym != NULL; ) {
269 			sym_t *next = sym->s_symtab_next;
270 			if (sym->s_block_level >= 1)
271 				symtab_remove(sym);
272 			sym = next;
273 		}
274 	}
275 }
276 
277 #ifdef DEBUG
278 static int
sym_by_name(const void * va,const void * vb)279 sym_by_name(const void *va, const void *vb)
280 {
281 	const sym_t *a = *(const sym_t *const *)va;
282 	const sym_t *b = *(const sym_t *const *)vb;
283 
284 	return strcmp(a->s_name, b->s_name);
285 }
286 
287 struct syms {
288 	const sym_t **items;
289 	size_t len;
290 	size_t cap;
291 };
292 
293 static void
syms_add(struct syms * syms,const sym_t * sym)294 syms_add(struct syms *syms, const sym_t *sym)
295 {
296 	if (syms->len >= syms->cap) {
297 		syms->cap *= 2;
298 		syms->items = xrealloc(syms->items,
299 		    syms->cap * sizeof(syms->items[0]));
300 	}
301 	syms->items[syms->len++] = sym;
302 }
303 
304 void
debug_symtab(void)305 debug_symtab(void)
306 {
307 	struct syms syms = { xcalloc(64, sizeof(syms.items[0])), 0, 64 };
308 
309 	debug_enter();
310 	for (int level = -1;; level++) {
311 		bool more = false;
312 		size_t n = sizeof(symtab) / sizeof(symtab[0]);
313 
314 		syms.len = 0;
315 		for (size_t i = 0; i < n; i++) {
316 			for (sym_t *sym = symtab[i]; sym != NULL;) {
317 				if (sym->s_block_level == level &&
318 				    sym->s_keyword == NULL)
319 					syms_add(&syms, sym);
320 				if (sym->s_block_level > level)
321 					more = true;
322 				sym = sym->s_symtab_next;
323 			}
324 		}
325 
326 		if (syms.len > 0) {
327 			debug_step("symbol table level %d", level);
328 			debug_indent_inc();
329 			qsort(syms.items, syms.len, sizeof(syms.items[0]),
330 			    sym_by_name);
331 			for (size_t i = 0; i < syms.len; i++)
332 				debug_sym("", syms.items[i], "\n");
333 			debug_indent_dec();
334 
335 			lint_assert(level != -1);
336 		}
337 
338 		if (!more)
339 			break;
340 	}
341 	debug_leave();
342 
343 	free(syms.items);
344 }
345 #endif
346 
347 static void
add_keyword(const struct keyword * kw,bool leading,bool trailing)348 add_keyword(const struct keyword *kw, bool leading, bool trailing)
349 {
350 
351 	const char *name;
352 	if (!leading && !trailing) {
353 		name = kw->kw_name;
354 	} else {
355 		char buf[256];
356 		(void)snprintf(buf, sizeof(buf), "%s%s%s",
357 		    leading ? "__" : "", kw->kw_name, trailing ? "__" : "");
358 		name = xstrdup(buf);
359 	}
360 
361 	sym_t *sym = block_zero_alloc(sizeof(*sym), "sym");
362 	sym->s_name = name;
363 	sym->s_keyword = kw;
364 	int tok = kw->kw_token;
365 	sym->u.s_keyword.sk_token = tok;
366 	if (tok == T_TYPE || tok == T_STRUCT_OR_UNION)
367 		sym->u.s_keyword.u.sk_tspec = kw->u.kw_tspec;
368 	if (tok == T_SCLASS)
369 		sym->s_scl = kw->u.kw_scl;
370 	if (tok == T_QUAL)
371 		sym->u.s_keyword.u.sk_type_qualifier = kw->u.kw_tqual;
372 	if (tok == T_FUNCTION_SPECIFIER)
373 		sym->u.s_keyword.u.function_specifier = kw->u.kw_fs;
374 
375 	symtab_add(sym);
376 }
377 
378 static bool
is_keyword_known(const struct keyword * kw)379 is_keyword_known(const struct keyword *kw)
380 {
381 
382 	if (kw->kw_added_in_c23 && !allow_c23)
383 		return false;
384 	if ((kw->kw_added_in_c90 || kw->kw_added_in_c99_or_c11) && !allow_c90)
385 		return false;
386 
387 	/*
388 	 * In the 1990s, GCC defined several keywords that were later
389 	 * incorporated into C99, therefore in GCC mode, all C99 keywords are
390 	 * made available.  The C11 keywords are made available as well, but
391 	 * there are so few that they don't matter practically.
392 	 */
393 	if (allow_gcc)
394 		return true;
395 	if (kw->kw_gcc)
396 		return false;
397 
398 	if (kw->kw_added_in_c99_or_c11 && !allow_c99)
399 		return false;
400 	return true;
401 }
402 
403 /* Write all keywords to the symbol table. */
404 void
initscan(void)405 initscan(void)
406 {
407 
408 	size_t n = sizeof(keywords) / sizeof(keywords[0]);
409 	for (size_t i = 0; i < n; i++) {
410 		const struct keyword *kw = keywords + i;
411 		if (!is_keyword_known(kw))
412 			continue;
413 		if (kw->kw_plain)
414 			add_keyword(kw, false, false);
415 		if (kw->kw_leading)
416 			add_keyword(kw, true, false);
417 		if (kw->kw_both)
418 			add_keyword(kw, true, true);
419 	}
420 }
421 
422 /*
423  * When scanning the remainder of a long token (see lex_input), read a byte
424  * and return it as an unsigned char or as EOF.
425  *
426  * Increment the line counts if necessary.
427  */
428 static int
read_byte(void)429 read_byte(void)
430 {
431 	int c;
432 
433 	if ((c = lex_input()) == EOF)
434 		return c;
435 	if (c == '\0')
436 		return EOF;	/* lex returns 0 on EOF. */
437 	if (c == '\n')
438 		lex_next_line();
439 	return c;
440 }
441 
442 static int
lex_keyword(sym_t * sym)443 lex_keyword(sym_t *sym)
444 {
445 	int tok = sym->u.s_keyword.sk_token;
446 
447 	if (tok == T_SCLASS)
448 		yylval.y_scl = sym->s_scl;
449 	if (tok == T_TYPE || tok == T_STRUCT_OR_UNION)
450 		yylval.y_tspec = sym->u.s_keyword.u.sk_tspec;
451 	if (tok == T_QUAL)
452 		yylval.y_type_qualifiers =
453 		    sym->u.s_keyword.u.sk_type_qualifier;
454 	if (tok == T_FUNCTION_SPECIFIER)
455 		yylval.y_function_specifier =
456 		    sym->u.s_keyword.u.function_specifier;
457 	return tok;
458 }
459 
460 /*
461  * Look up the definition of a name in the symbol table. This symbol must
462  * either be a keyword or a symbol of the type required by symtyp (label,
463  * member, tag, ...).
464  */
465 extern int
lex_name(const char * yytext,size_t yyleng)466 lex_name(const char *yytext, size_t yyleng)
467 {
468 
469 	sym_t *sym = symtab_search(yytext);
470 	if (sym != NULL && sym->s_keyword != NULL)
471 		return lex_keyword(sym);
472 
473 	sbuf_t *sb = xmalloc(sizeof(*sb));
474 	sb->sb_len = yyleng;
475 	sb->sb_sym = sym;
476 	yylval.y_name = sb;
477 
478 	if (sym != NULL) {
479 		lint_assert(block_level >= sym->s_block_level);
480 		sb->sb_name = sym->s_name;
481 		return sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME;
482 	}
483 
484 	char *name = block_zero_alloc(yyleng + 1, "string");
485 	(void)memcpy(name, yytext, yyleng + 1);
486 	sb->sb_name = name;
487 	return T_NAME;
488 }
489 
490 int
lex_integer_constant(const char * yytext,size_t yyleng,int base)491 lex_integer_constant(const char *yytext, size_t yyleng, int base)
492 {
493 	/* C11 6.4.4.1p5 */
494 	static const tspec_t suffix_type[2][3] = {
495 		{ INT,  LONG,  LLONG, },
496 		{ UINT, ULONG, ULLONG, }
497 	};
498 
499 	const char *cp = yytext;
500 	size_t len = yyleng;
501 
502 	/* skip 0[xX] or 0[bB] */
503 	if (base == 16 || base == 2) {
504 		cp += 2;
505 		len -= 2;
506 	}
507 
508 	/* read suffixes */
509 	unsigned l_suffix = 0, u_suffix = 0;
510 	for (;; len--) {
511 		char c = cp[len - 1];
512 		if (c == 'l' || c == 'L')
513 			l_suffix++;
514 		else if (c == 'u' || c == 'U')
515 			u_suffix++;
516 		else
517 			break;
518 	}
519 	if (l_suffix > 2 || u_suffix > 1) {
520 		/* malformed integer constant */
521 		warning(251);
522 		if (l_suffix > 2)
523 			l_suffix = 2;
524 		if (u_suffix > 1)
525 			u_suffix = 1;
526 	}
527 	if (!allow_c90 && u_suffix > 0) {
528 		/* suffix 'U' is illegal in traditional C */
529 		warning(97);
530 	}
531 	tspec_t typ = suffix_type[u_suffix][l_suffix];
532 
533 	bool warned = false;
534 	errno = 0;
535 	char *eptr;
536 	uint64_t ui = (uint64_t)strtoull(cp, &eptr, base);
537 	lint_assert(eptr == cp + len);
538 	if (errno != 0) {
539 		/* integer constant out of range */
540 		warning(252);
541 		warned = true;
542 	}
543 
544 	if (any_query_enabled && base == 8 && ui != 0) {
545 		/* octal number '%.*s' */
546 		query_message(8, (int)len, cp);
547 	}
548 
549 	/*
550 	 * If the value is too big for the current type, we must choose
551 	 * another type.
552 	 */
553 	bool ansiu = false;
554 	switch (typ) {
555 	case INT:
556 		if (ui <= TARG_INT_MAX) {
557 			/* ok */
558 		} else if (ui <= TARG_UINT_MAX && base != 10) {
559 			typ = UINT;
560 		} else if (ui <= TARG_LONG_MAX) {
561 			typ = LONG;
562 		} else {
563 			typ = ULONG;
564 			if (ui > TARG_ULONG_MAX && !warned) {
565 				/* integer constant out of range */
566 				warning(252);
567 			}
568 		}
569 		if (typ == UINT || typ == ULONG) {
570 			if (!allow_c90) {
571 				typ = LONG;
572 			} else if (allow_trad) {
573 				/*
574 				 * Remember that the constant is unsigned
575 				 * only in ANSI C.
576 				 */
577 				ansiu = true;
578 			}
579 		}
580 		break;
581 	case UINT:
582 		if (ui > TARG_UINT_MAX) {
583 			typ = ULONG;
584 			if (ui > TARG_ULONG_MAX && !warned) {
585 				/* integer constant out of range */
586 				warning(252);
587 			}
588 		}
589 		break;
590 	case LONG:
591 		if (ui > TARG_LONG_MAX && allow_c90) {
592 			typ = ULONG;
593 			if (allow_trad)
594 				ansiu = true;
595 			if (ui > TARG_ULONG_MAX && !warned) {
596 				/* integer constant out of range */
597 				warning(252);
598 			}
599 		}
600 		break;
601 	case ULONG:
602 		if (ui > TARG_ULONG_MAX && !warned) {
603 			/* integer constant out of range */
604 			warning(252);
605 		}
606 		break;
607 	case LLONG:
608 		if (ui > TARG_LLONG_MAX && allow_c90)
609 			typ = ULLONG;
610 		break;
611 	case ULLONG:
612 		if (ui > TARG_ULLONG_MAX && !warned) {
613 			/* integer constant out of range */
614 			warning(252);
615 		}
616 		break;
617 	default:
618 		break;
619 	}
620 
621 	ui = (uint64_t)convert_integer((int64_t)ui, typ, 0);
622 
623 	yylval.y_val = xcalloc(1, sizeof(*yylval.y_val));
624 	yylval.y_val->v_tspec = typ;
625 	yylval.y_val->v_unsigned_since_c90 = ansiu;
626 	yylval.y_val->u.integer = (int64_t)ui;
627 
628 	return T_CON;
629 }
630 
631 /*
632  * Extend or truncate si to match t.  If t is signed, sign-extend.
633  *
634  * len is the number of significant bits. If len is 0, len is set
635  * to the width of type t.
636  */
637 int64_t
convert_integer(int64_t si,tspec_t t,unsigned int len)638 convert_integer(int64_t si, tspec_t t, unsigned int len)
639 {
640 
641 	if (len == 0)
642 		len = size_in_bits(t);
643 
644 	uint64_t vbits = value_bits(len);
645 	uint64_t ui = (uint64_t)si;
646 	return t == PTR || is_uinteger(t) || ((ui & bit(len - 1)) == 0)
647 	    ? (int64_t)(ui & vbits)
648 	    : (int64_t)(ui | ~vbits);
649 }
650 
651 int
lex_floating_constant(const char * yytext,size_t yyleng)652 lex_floating_constant(const char *yytext, size_t yyleng)
653 {
654 	const char *cp = yytext;
655 	size_t len = yyleng;
656 
657 	bool imaginary = cp[len - 1] == 'i';
658 	if (imaginary)
659 		len--;
660 
661 	char c = cp[len - 1];
662 	tspec_t typ;
663 	if (c == 'f' || c == 'F') {
664 		typ = imaginary ? FCOMPLEX : FLOAT;
665 		len--;
666 	} else if (c == 'l' || c == 'L') {
667 		typ = imaginary ? LCOMPLEX : LDOUBLE;
668 		len--;
669 	} else
670 		typ = imaginary ? DCOMPLEX : DOUBLE;
671 
672 	if (!allow_c90 && typ != DOUBLE) {
673 		/* suffixes 'F' and 'L' are illegal in traditional C */
674 		warning(98);
675 	}
676 
677 	errno = 0;
678 	char *eptr;
679 	long double ld = strtold(cp, &eptr);
680 	lint_assert(eptr == cp + len);
681 	if (errno != 0) {
682 		/* floating-point constant out of range */
683 		warning(248);
684 	} else if (typ == FLOAT) {
685 		ld = (float)ld;
686 		if (isfinite(ld) == 0) {
687 			/* floating-point constant out of range */
688 			warning(248);
689 			ld = ld > 0 ? FLT_MAX : -FLT_MAX;
690 		}
691 	} else if (typ == DOUBLE
692 	    || /* CONSTCOND */LDOUBLE_SIZE == DOUBLE_SIZE) {
693 		ld = (double)ld;
694 		if (isfinite(ld) == 0) {
695 			/* floating-point constant out of range */
696 			warning(248);
697 			ld = ld > 0 ? DBL_MAX : -DBL_MAX;
698 		}
699 	}
700 
701 	yylval.y_val = xcalloc(1, sizeof(*yylval.y_val));
702 	yylval.y_val->v_tspec = typ;
703 	yylval.y_val->u.floating = ld;
704 
705 	return T_CON;
706 }
707 
708 int
lex_operator(int t,op_t o)709 lex_operator(int t, op_t o)
710 {
711 
712 	yylval.y_op = o;
713 	return t;
714 }
715 
716 static int prev_byte = -1;
717 
718 static int
read_escaped_oct(int c)719 read_escaped_oct(int c)
720 {
721 	int n = 3;
722 	int value = 0;
723 	do {
724 		value = (value << 3) + (c - '0');
725 		c = read_byte();
726 	} while (--n > 0 && '0' <= c && c <= '7');
727 	prev_byte = c;
728 	if (value > TARG_UCHAR_MAX) {
729 		/* character escape does not fit in character */
730 		warning(76);
731 		value &= CHAR_MASK;
732 	}
733 	return value;
734 }
735 
736 static unsigned int
read_escaped_hex(int c)737 read_escaped_hex(int c)
738 {
739 	if (!allow_c90)
740 		/* \x undefined in traditional C */
741 		warning(82);
742 	unsigned int value = 0;
743 	int state = 0;		/* 0 = no digits, 1 = OK, 2 = overflow */
744 	while (c = read_byte(), isxdigit(c)) {
745 		c = isdigit(c) ? c - '0' : toupper(c) - 'A' + 10;
746 		value = (value << 4) + c;
747 		if (state == 2)
748 			continue;
749 		if ((value & ~CHAR_MASK) != 0) {
750 			/* overflow in hex escape */
751 			warning(75);
752 			state = 2;
753 		} else {
754 			state = 1;
755 		}
756 	}
757 	prev_byte = c;
758 	if (state == 0) {
759 		/* no hex digits follow \x */
760 		error(74);
761 	}
762 	if (state == 2)
763 		value &= CHAR_MASK;
764 	return value;
765 }
766 
767 static int
read_escaped_backslash(int delim)768 read_escaped_backslash(int delim)
769 {
770 	int c;
771 
772 	switch (c = read_byte()) {
773 	case '"':
774 		if (!allow_c90 && delim == '\'')
775 			/* \" inside character constants undef... */
776 			warning(262);
777 		return '"';
778 	case '\'':
779 		return '\'';
780 	case '?':
781 		if (!allow_c90)
782 			/* \? undefined in traditional C */
783 			warning(263);
784 		return '?';
785 	case '\\':
786 		return '\\';
787 	case 'a':
788 		if (!allow_c90)
789 			/* \a undefined in traditional C */
790 			warning(81);
791 		return '\a';
792 	case 'b':
793 		return '\b';
794 	case 'f':
795 		return '\f';
796 	case 'n':
797 		return '\n';
798 	case 'r':
799 		return '\r';
800 	case 't':
801 		return '\t';
802 	case 'v':
803 		if (!allow_c90)
804 			/* \v undefined in traditional C */
805 			warning(264);
806 		return '\v';
807 	case '8': case '9':
808 		/* bad octal digit '%c' */
809 		warning(77, c);
810 		/* FALLTHROUGH */
811 	case '0': case '1': case '2': case '3':
812 	case '4': case '5': case '6': case '7':
813 		return read_escaped_oct(c);
814 	case 'x':
815 		return (int)read_escaped_hex(c);
816 	case '\n':
817 		return -3;
818 	case EOF:
819 		return -2;
820 	default:
821 		if (isprint(c)) {
822 			/* dubious escape \%c */
823 			warning(79, c);
824 		} else {
825 			/* dubious escape \%o */
826 			warning(80, c);
827 		}
828 		return c;
829 	}
830 }
831 
832 /*
833  * Read a character which is part of a character constant or of a string
834  * and handle escapes.
835  *
836  * 'delim' is '\'' for character constants and '"' for string literals.
837  *
838  * Returns -1 if the end of the character constant or string is reached,
839  * -2 if the EOF is reached, and the character otherwise.
840  */
841 static int
get_escaped_char(int delim)842 get_escaped_char(int delim)
843 {
844 
845 	int c = prev_byte;
846 	if (c != -1)
847 		prev_byte = -1;
848 	else
849 		c = read_byte();
850 
851 	if (c == delim)
852 		return -1;
853 	switch (c) {
854 	case '\n':
855 		if (!allow_c90) {
856 			/* newline in string or char constant */
857 			error(254);
858 			return -2;
859 		}
860 		return c;
861 	case '\0':
862 		/* syntax error '%s' */
863 		error(249, "EOF or null byte in literal");
864 		return -2;
865 	case EOF:
866 		return -2;
867 	case '\\':
868 		c = read_escaped_backslash(delim);
869 		if (c == -3)
870 			return get_escaped_char(delim);
871 	}
872 	return c;
873 }
874 
875 /* Called if lex found a leading "'". */
876 int
lex_character_constant(void)877 lex_character_constant(void)
878 {
879 	size_t n;
880 	int val, c;
881 
882 	n = 0;
883 	val = 0;
884 	while ((c = get_escaped_char('\'')) >= 0) {
885 		val = (int)((unsigned int)val << CHAR_SIZE) + c;
886 		n++;
887 	}
888 	if (c == -2) {
889 		/* unterminated character constant */
890 		error(253);
891 	} else if (n > sizeof(int) || (n > 1 && (pflag || hflag))) {
892 		/*
893 		 * XXX: ^^ should rather be sizeof(TARG_INT). Luckily,
894 		 * sizeof(int) is the same on all supported platforms.
895 		 */
896 		/* too many characters in character constant */
897 		error(71);
898 	} else if (n > 1) {
899 		/* multi-character character constant */
900 		warning(294);
901 	} else if (n == 0) {
902 		/* empty character constant */
903 		error(73);
904 	}
905 	if (n == 1)
906 		val = (int)convert_integer(val, CHAR, CHAR_SIZE);
907 
908 	yylval.y_val = xcalloc(1, sizeof(*yylval.y_val));
909 	yylval.y_val->v_tspec = INT;
910 	yylval.y_val->v_char_constant = true;
911 	yylval.y_val->u.integer = val;
912 
913 	return T_CON;
914 }
915 
916 /*
917  * Called if lex found a leading L\'
918  */
919 int
lex_wide_character_constant(void)920 lex_wide_character_constant(void)
921 {
922 	static char buf[MB_LEN_MAX + 1];
923 	size_t n, nmax;
924 	int c;
925 	wchar_t wc;
926 
927 	nmax = MB_CUR_MAX;
928 
929 	n = 0;
930 	while ((c = get_escaped_char('\'')) >= 0) {
931 		if (n < nmax)
932 			buf[n] = (char)c;
933 		n++;
934 	}
935 
936 	wc = 0;
937 
938 	if (c == -2) {
939 		/* unterminated character constant */
940 		error(253);
941 	} else if (n == 0) {
942 		/* empty character constant */
943 		error(73);
944 	} else if (n > nmax) {
945 		n = nmax;
946 		/* too many characters in character constant */
947 		error(71);
948 	} else {
949 		buf[n] = '\0';
950 		(void)mbtowc(NULL, NULL, 0);
951 		if (mbtowc(&wc, buf, nmax) < 0)
952 			/* invalid multibyte character */
953 			error(291);
954 	}
955 
956 	yylval.y_val = xcalloc(1, sizeof(*yylval.y_val));
957 	yylval.y_val->v_tspec = WCHAR_TSPEC;
958 	yylval.y_val->v_char_constant = true;
959 	yylval.y_val->u.integer = wc;
960 
961 	return T_CON;
962 }
963 
964 /* See https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html */
965 static void
parse_line_directive_flags(const char * p,bool * is_begin,bool * is_end,bool * is_system)966 parse_line_directive_flags(const char *p,
967 			   bool *is_begin, bool *is_end, bool *is_system)
968 {
969 
970 	*is_begin = false;
971 	*is_end = false;
972 	*is_system = false;
973 
974 	while (*p != '\0') {
975 		while (ch_isspace(*p))
976 			p++;
977 
978 		const char *word = p;
979 		while (*p != '\0' && !ch_isspace(*p))
980 			p++;
981 		size_t len = (size_t)(p - word);
982 
983 		if (len == 1 && word[0] == '1')
984 			*is_begin = true;
985 		if (len == 1 && word[0] == '2')
986 			*is_end = true;
987 		if (len == 1 && word[0] == '3')
988 			*is_system = true;
989 		/* Flag '4' is only interesting for C++. */
990 	}
991 }
992 
993 /*
994  * Called for preprocessor directives. Currently implemented are:
995  *	# pragma [argument...]
996  *	# lineno
997  *	# lineno "filename"
998  *	# lineno "filename" GCC-flag...
999  */
1000 void
lex_directive(const char * yytext)1001 lex_directive(const char *yytext)
1002 {
1003 	const char *cp, *fn;
1004 	char c, *eptr;
1005 	size_t fnl;
1006 	long ln;
1007 	bool is_begin, is_end, is_system;
1008 
1009 	static bool first = true;
1010 
1011 	/* Go to first non-whitespace after # */
1012 	for (cp = yytext + 1; (c = *cp) == ' ' || c == '\t'; cp++)
1013 		continue;
1014 
1015 	if (!ch_isdigit(c)) {
1016 		if (strncmp(cp, "pragma", 6) == 0 && ch_isspace(cp[6]))
1017 			return;
1018 	error:
1019 		/* undefined or invalid '#' directive */
1020 		warning(255);
1021 		return;
1022 	}
1023 	ln = strtol(--cp, &eptr, 10);
1024 	if (eptr == cp)
1025 		goto error;
1026 	if ((c = *(cp = eptr)) != ' ' && c != '\t' && c != '\0')
1027 		goto error;
1028 	while ((c = *cp++) == ' ' || c == '\t')
1029 		continue;
1030 	if (c != '\0') {
1031 		if (c != '"')
1032 			goto error;
1033 		fn = cp;
1034 		while ((c = *cp) != '"' && c != '\0')
1035 			cp++;
1036 		if (c != '"')
1037 			goto error;
1038 		if ((fnl = cp++ - fn) > PATH_MAX)
1039 			goto error;
1040 		/* empty string means stdin */
1041 		if (fnl == 0) {
1042 			fn = "{standard input}";
1043 			fnl = 16;	/* strlen (fn) */
1044 		}
1045 		curr_pos.p_file = record_filename(fn, fnl);
1046 		/*
1047 		 * If this is the first directive, the name is the name
1048 		 * of the C source file as specified at the command line.
1049 		 * It is written to the output file.
1050 		 */
1051 		if (first) {
1052 			csrc_pos.p_file = curr_pos.p_file;
1053 			outsrc(transform_filename(curr_pos.p_file,
1054 			    strlen(curr_pos.p_file)));
1055 			first = false;
1056 		}
1057 
1058 		parse_line_directive_flags(cp, &is_begin, &is_end, &is_system);
1059 		update_location(curr_pos.p_file, (int)ln, is_begin, is_end);
1060 		in_system_header = is_system;
1061 	}
1062 	curr_pos.p_line = (int)ln - 1;
1063 	curr_pos.p_uniq = 0;
1064 	if (curr_pos.p_file == csrc_pos.p_file) {
1065 		csrc_pos.p_line = (int)ln - 1;
1066 		csrc_pos.p_uniq = 0;
1067 	}
1068 }
1069 
1070 /* Handle lint comments such as ARGSUSED. */
1071 void
lex_comment(void)1072 lex_comment(void)
1073 {
1074 	int c;
1075 	static const struct {
1076 		const	char name[18];
1077 		bool	arg;
1078 		lint_comment comment;
1079 	} keywtab[] = {
1080 		{ "ARGSUSED",		true,	LC_ARGSUSED	},
1081 		{ "BITFIELDTYPE",	false,	LC_BITFIELDTYPE	},
1082 		{ "CONSTCOND",		false,	LC_CONSTCOND	},
1083 		{ "CONSTANTCOND",	false,	LC_CONSTCOND	},
1084 		{ "CONSTANTCONDITION",	false,	LC_CONSTCOND	},
1085 		{ "FALLTHRU",		false,	LC_FALLTHROUGH	},
1086 		{ "FALLTHROUGH",	false,	LC_FALLTHROUGH	},
1087 		{ "FALL THROUGH",	false,	LC_FALLTHROUGH	},
1088 		{ "fallthrough",	false,	LC_FALLTHROUGH	},
1089 		{ "LINTLIBRARY",	false,	LC_LINTLIBRARY	},
1090 		{ "LINTED",		true,	LC_LINTED	},
1091 		{ "LONGLONG",		false,	LC_LONGLONG	},
1092 		{ "NOSTRICT",		true,	LC_LINTED	},
1093 		{ "NOTREACHED",		false,	LC_NOTREACHED	},
1094 		{ "PRINTFLIKE",		true,	LC_PRINTFLIKE	},
1095 		{ "PROTOLIB",		true,	LC_PROTOLIB	},
1096 		{ "SCANFLIKE",		true,	LC_SCANFLIKE	},
1097 		{ "VARARGS",		true,	LC_VARARGS	},
1098 	};
1099 	char keywd[32];
1100 	char arg[32];
1101 	size_t l, i;
1102 	int a;
1103 
1104 	bool seen_end_of_comment = false;
1105 
1106 	while (c = read_byte(), isspace(c))
1107 		continue;
1108 
1109 	/* Read the potential keyword to keywd */
1110 	l = 0;
1111 	while (c != EOF && l < sizeof(keywd) - 1 &&
1112 	    (isalpha(c) || isspace(c))) {
1113 		if (islower(c) && l > 0 && ch_isupper(keywd[0]))
1114 			break;
1115 		keywd[l++] = (char)c;
1116 		c = read_byte();
1117 	}
1118 	while (l > 0 && ch_isspace(keywd[l - 1]))
1119 		l--;
1120 	keywd[l] = '\0';
1121 
1122 	/* look for the keyword */
1123 	for (i = 0; i < sizeof(keywtab) / sizeof(keywtab[0]); i++)
1124 		if (strcmp(keywtab[i].name, keywd) == 0)
1125 			goto found_keyword;
1126 	goto skip_rest;
1127 
1128 found_keyword:
1129 	while (isspace(c))
1130 		c = read_byte();
1131 
1132 	/* read the argument, if the keyword accepts one and there is one */
1133 	l = 0;
1134 	if (keywtab[i].arg) {
1135 		while (isdigit(c) && l < sizeof(arg) - 1) {
1136 			arg[l++] = (char)c;
1137 			c = read_byte();
1138 		}
1139 	}
1140 	arg[l] = '\0';
1141 	a = l != 0 ? atoi(arg) : -1;
1142 
1143 	while (isspace(c))
1144 		c = read_byte();
1145 
1146 	seen_end_of_comment = c == '*' && (c = read_byte()) == '/';
1147 	if (!seen_end_of_comment && keywtab[i].comment != LC_LINTED)
1148 		/* extra characters in lint comment */
1149 		warning(257);
1150 
1151 	handle_lint_comment(keywtab[i].comment, a);
1152 
1153 skip_rest:
1154 	while (!seen_end_of_comment) {
1155 		int lc = c;
1156 		if ((c = read_byte()) == EOF) {
1157 			/* unterminated comment */
1158 			error(256);
1159 			break;
1160 		}
1161 		if (lc == '*' && c == '/')
1162 			seen_end_of_comment = true;
1163 	}
1164 }
1165 
1166 void
lex_slash_slash_comment(void)1167 lex_slash_slash_comment(void)
1168 {
1169 	int c;
1170 
1171 	if (!allow_c99 && !allow_gcc)
1172 		/* %s does not support '//' comments */
1173 		gnuism(312, allow_c90 ? "C90" : "traditional C");
1174 
1175 	while ((c = read_byte()) != EOF && c != '\n')
1176 		continue;
1177 }
1178 
1179 /*
1180  * Clear flags for lint comments LINTED, LONGLONG and CONSTCOND.
1181  * clear_warn_flags is called after function definitions and global and
1182  * local declarations and definitions. It is also called between
1183  * the controlling expression and the body of control statements
1184  * (if, switch, for, while).
1185  */
1186 void
clear_warn_flags(void)1187 clear_warn_flags(void)
1188 {
1189 
1190 	lwarn = LWARN_ALL;
1191 	suppress_longlong = false;
1192 	suppress_constcond = false;
1193 }
1194 
1195 int
lex_string(void)1196 lex_string(void)
1197 {
1198 	unsigned char *s;
1199 	int c;
1200 	size_t len, max;
1201 
1202 	s = xmalloc(max = 64);
1203 
1204 	len = 0;
1205 	while ((c = get_escaped_char('"')) >= 0) {
1206 		/* +1 to reserve space for a trailing NUL character */
1207 		if (len + 1 == max)
1208 			s = xrealloc(s, max *= 2);
1209 		s[len++] = (char)c;
1210 	}
1211 	s[len] = '\0';
1212 	if (c == -2)
1213 		/* unterminated string constant */
1214 		error(258);
1215 
1216 	strg_t *strg = xcalloc(1, sizeof(*strg));
1217 	strg->st_char = true;
1218 	strg->st_len = len;
1219 	strg->st_mem = s;
1220 
1221 	yylval.y_string = strg;
1222 	return T_STRING;
1223 }
1224 
1225 int
lex_wide_string(void)1226 lex_wide_string(void)
1227 {
1228 	int c, n;
1229 
1230 	size_t len = 0, max = 64;
1231 	char *s = xmalloc(max);
1232 	while ((c = get_escaped_char('"')) >= 0) {
1233 		/* +1 to save space for a trailing NUL character */
1234 		if (len + 1 >= max)
1235 			s = xrealloc(s, max *= 2);
1236 		s[len++] = (char)c;
1237 	}
1238 	s[len] = '\0';
1239 	if (c == -2)
1240 		/* unterminated string constant */
1241 		error(258);
1242 
1243 	/* get length of wide-character string */
1244 	(void)mblen(NULL, 0);
1245 	size_t wlen = 0;
1246 	for (size_t i = 0; i < len; i += n, wlen++) {
1247 		if ((n = mblen(&s[i], MB_CUR_MAX)) == -1) {
1248 			/* invalid multibyte character */
1249 			error(291);
1250 			break;
1251 		}
1252 		if (n == 0)
1253 			n = 1;
1254 	}
1255 
1256 	wchar_t *ws = xmalloc((wlen + 1) * sizeof(*ws));
1257 	size_t wi = 0;
1258 	/* convert from multibyte to wide char */
1259 	(void)mbtowc(NULL, NULL, 0);
1260 	for (size_t i = 0; i < len; i += n, wi++) {
1261 		if ((n = mbtowc(&ws[wi], &s[i], MB_CUR_MAX)) == -1)
1262 			break;
1263 		if (n == 0)
1264 			n = 1;
1265 	}
1266 	ws[wi] = 0;
1267 	free(s);
1268 
1269 	strg_t *strg = xcalloc(1, sizeof(*strg));
1270 	strg->st_char = false;
1271 	strg->st_len = wlen;
1272 	strg->st_mem = ws;
1273 
1274 	yylval.y_string = strg;
1275 	return T_STRING;
1276 }
1277 
1278 void
lex_next_line(void)1279 lex_next_line(void)
1280 {
1281 	curr_pos.p_line++;
1282 	curr_pos.p_uniq = 0;
1283 	debug_printf("parsing %s:%d\n", curr_pos.p_file, curr_pos.p_line);
1284 	if (curr_pos.p_file == csrc_pos.p_file) {
1285 		csrc_pos.p_line++;
1286 		csrc_pos.p_uniq = 0;
1287 	}
1288 }
1289 
1290 void
lex_unknown_character(int c)1291 lex_unknown_character(int c)
1292 {
1293 
1294 	/* unknown character \%o */
1295 	error(250, c);
1296 }
1297 
1298 /*
1299  * The scanner does not create new symbol table entries for symbols it cannot
1300  * find in the symbol table. This is to avoid putting undeclared symbols into
1301  * the symbol table if a syntax error occurs.
1302  *
1303  * getsym is called as soon as it is probably ok to put the symbol in the
1304  * symbol table. It is still possible that symbols are put in the symbol
1305  * table that are not completely declared due to syntax errors. To avoid too
1306  * many problems in this case, symbols get type 'int' in getsym.
1307  *
1308  * XXX calls to getsym should be delayed until declare_1_* is called.
1309  */
1310 sym_t *
getsym(sbuf_t * sb)1311 getsym(sbuf_t *sb)
1312 {
1313 
1314 	sym_t *sym = sb->sb_sym;
1315 
1316 	/*
1317 	 * During member declaration it is possible that name() looked
1318 	 * for symbols of type FVFT, although it should have looked for
1319 	 * symbols of type FTAG. Same can happen for labels. Both cases
1320 	 * are compensated here.
1321 	 */
1322 	if (symtyp == FMEMBER || symtyp == FLABEL) {
1323 		if (sym == NULL || sym->s_kind == FVFT)
1324 			sym = symtab_search(sb->sb_name);
1325 	}
1326 
1327 	if (sym != NULL) {
1328 		lint_assert(sym->s_kind == symtyp);
1329 		set_symtyp(FVFT);
1330 		free(sb);
1331 		return sym;
1332 	}
1333 
1334 	/* create a new symbol table entry */
1335 
1336 	/* labels must always be allocated at level 1 (outermost block) */
1337 	decl_level *dl;
1338 	if (symtyp == FLABEL) {
1339 		sym = level_zero_alloc(1, sizeof(*sym), "sym");
1340 		char *s = level_zero_alloc(1, sb->sb_len + 1, "string");
1341 		(void)memcpy(s, sb->sb_name, sb->sb_len + 1);
1342 		sym->s_name = s;
1343 		sym->s_block_level = 1;
1344 		dl = dcs;
1345 		while (dl->d_enclosing != NULL &&
1346 		    dl->d_enclosing->d_enclosing != NULL)
1347 			dl = dl->d_enclosing;
1348 		lint_assert(dl->d_kind == DLK_AUTO);
1349 	} else {
1350 		sym = block_zero_alloc(sizeof(*sym), "sym");
1351 		sym->s_name = sb->sb_name;
1352 		sym->s_block_level = block_level;
1353 		dl = dcs;
1354 	}
1355 
1356 	sym->s_def_pos = unique_curr_pos();
1357 	if ((sym->s_kind = symtyp) != FLABEL)
1358 		sym->s_type = gettyp(INT);
1359 
1360 	set_symtyp(FVFT);
1361 
1362 	if (!in_gcc_attribute) {
1363 		symtab_add(sym);
1364 
1365 		*dl->d_last_dlsym = sym;
1366 		dl->d_last_dlsym = &sym->s_level_next;
1367 	}
1368 
1369 	free(sb);
1370 	return sym;
1371 }
1372 
1373 /*
1374  * Construct a temporary symbol. The symbol name starts with a digit to avoid
1375  * name clashes with other identifiers.
1376  */
1377 sym_t *
mktempsym(type_t * tp)1378 mktempsym(type_t *tp)
1379 {
1380 	static unsigned n = 0;
1381 	char *s = level_zero_alloc((size_t)block_level, 64, "string");
1382 	sym_t *sym = block_zero_alloc(sizeof(*sym), "sym");
1383 	scl_t scl;
1384 
1385 	(void)snprintf(s, 64, "%.8u_tmp", n++);
1386 
1387 	scl = dcs->d_scl;
1388 	if (scl == NOSCL)
1389 		scl = block_level > 0 ? AUTO : EXTERN;
1390 
1391 	sym->s_name = s;
1392 	sym->s_type = tp;
1393 	sym->s_block_level = block_level;
1394 	sym->s_scl = scl;
1395 	sym->s_kind = FVFT;
1396 	sym->s_used = true;
1397 	sym->s_set = true;
1398 
1399 	symtab_add(sym);
1400 
1401 	*dcs->d_last_dlsym = sym;
1402 	dcs->d_last_dlsym = &sym->s_level_next;
1403 
1404 	return sym;
1405 }
1406 
1407 /* Remove a symbol forever from the symbol table. */
1408 void
rmsym(sym_t * sym)1409 rmsym(sym_t *sym)
1410 {
1411 
1412 	debug_step("rmsym '%s' %s '%s'",
1413 	    sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type));
1414 	symtab_remove(sym);
1415 
1416 	/* avoid that the symbol will later be put back to the symbol table */
1417 	sym->s_block_level = -1;
1418 }
1419 
1420 /*
1421  * Remove all symbols from the symbol table that have the same level as the
1422  * given symbol.
1423  */
1424 void
symtab_remove_level(sym_t * syms)1425 symtab_remove_level(sym_t *syms)
1426 {
1427 
1428 	if (syms != NULL)
1429 		debug_step("%s %d", __func__, syms->s_block_level);
1430 
1431 	/* Note the use of s_level_next instead of s_symtab_next. */
1432 	for (sym_t *sym = syms; sym != NULL; sym = sym->s_level_next) {
1433 		if (sym->s_block_level != -1) {
1434 			debug_step("%s '%s' %s '%s' %d", __func__,
1435 			    sym->s_name, symt_name(sym->s_kind),
1436 			    type_name(sym->s_type),
1437 			    sym->s_block_level);
1438 			symtab_remove(sym);
1439 			sym->s_symtab_ref = NULL;
1440 		}
1441 	}
1442 }
1443 
1444 /* Put a symbol into the symbol table. */
1445 void
inssym(int level,sym_t * sym)1446 inssym(int level, sym_t *sym)
1447 {
1448 
1449 	debug_step("%s '%s' %s '%s' %d", __func__,
1450 	    sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type),
1451 	    level);
1452 	sym->s_block_level = level;
1453 	symtab_add(sym);
1454 
1455 	/*
1456 	 * Placing the inner symbols to the beginning of the list ensures
1457 	 * that these symbols are preferred over symbols from the outer
1458 	 * blocks that happen to have the same name.
1459 	 */
1460 	const sym_t *next = sym->s_symtab_next;
1461 	if (next != NULL)
1462 		lint_assert(sym->s_block_level >= next->s_block_level);
1463 }
1464 
1465 /* Called at level 0 after syntax errors. */
1466 void
clean_up_after_error(void)1467 clean_up_after_error(void)
1468 {
1469 
1470 	symtab_remove_locals();
1471 
1472 	while (mem_block_level > 0)
1473 		level_free_all(mem_block_level--);
1474 }
1475 
1476 /* Create a new symbol with the same name as an existing symbol. */
1477 sym_t *
pushdown(const sym_t * sym)1478 pushdown(const sym_t *sym)
1479 {
1480 	sym_t *nsym;
1481 
1482 	debug_step("pushdown '%s' %s '%s'",
1483 	    sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type));
1484 	nsym = block_zero_alloc(sizeof(*nsym), "sym");
1485 	lint_assert(sym->s_block_level <= block_level);
1486 	nsym->s_name = sym->s_name;
1487 	nsym->s_def_pos = unique_curr_pos();
1488 	nsym->s_kind = sym->s_kind;
1489 	nsym->s_block_level = block_level;
1490 
1491 	symtab_add(nsym);
1492 
1493 	*dcs->d_last_dlsym = nsym;
1494 	dcs->d_last_dlsym = &nsym->s_level_next;
1495 
1496 	return nsym;
1497 }
1498 
1499 /*
1500  * Free any dynamically allocated memory referenced by
1501  * the value stack or yylval.
1502  * The type of information in yylval is described by tok.
1503  */
1504 void
freeyyv(void * sp,int tok)1505 freeyyv(void *sp, int tok)
1506 {
1507 	if (tok == T_NAME || tok == T_TYPENAME) {
1508 		sbuf_t *sb = *(sbuf_t **)sp;
1509 		free(sb);
1510 	} else if (tok == T_CON) {
1511 		val_t *val = *(val_t **)sp;
1512 		free(val);
1513 	} else if (tok == T_STRING) {
1514 		strg_t *strg = *(strg_t **)sp;
1515 		free(strg->st_mem);
1516 		free(strg);
1517 	}
1518 }
1519