xref: /netbsd/usr.bin/xlint/lint1/emit1.c (revision 3a0e83fa)
1 /* $NetBSD: emit1.c,v 1.72 2023/07/13 08:40:38 rillig Exp $ */
2 
3 /*
4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5  * Copyright (c) 1994, 1995 Jochen Pohl
6  * All Rights Reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by Jochen Pohl for
19  *	The NetBSD Project.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
38 
39 #include <sys/cdefs.h>
40 #if defined(__RCSID)
41 __RCSID("$NetBSD: emit1.c,v 1.72 2023/07/13 08:40:38 rillig Exp $");
42 #endif
43 
44 #include "lint1.h"
45 
46 static	void	outtt(sym_t *, sym_t *);
47 static	void	outfstrg(strg_t *);
48 
49 /*
50  * Write type into the output buffer.
51  * The type is written as a sequence of substrings, each of which describes a
52  * node of type type_t
53  * a node is encoded as follows:
54  *	_Bool			B
55  *	_Complex float		s X
56  *	_Complex double		X
57  *	_Complex long double	l X
58  *	char			C
59  *	signed char		s C
60  *	unsigned char		u C
61  *	short			S
62  *	unsigned short		u S
63  *	int			I
64  *	unsigned int		u I
65  *	long			L
66  *	unsigned long		u L
67  *	long long		Q
68  *	unsigned long long	u Q
69  *	float			s D
70  *	double			D
71  *	long double		l D
72  *	void			V
73  *	*			P
74  *	[n]			A n
75  *	()			F
76  *	(void)			F 0
77  *	(n parameters)		F n arg1 arg2 ... argn
78  *	(n parameters, ...)	F n arg1 arg2 ... argn E
79  *	enum tag		e T tag_or_typename
80  *	struct tag		s T tag_or_typename
81  *	union tag		u T tag_or_typename
82  *
83  *	tag_or_typename		0 (obsolete)		no tag or type name
84  *				1 n tag			tagged type
85  *				2 n typename		only typedef name
86  *				3 line.file.uniq	anonymous types
87  *
88  * spaces are only for better readability
89  * additionally it is possible to prepend the characters 'c' (for const)
90  * and 'v' (for volatile)
91  */
92 void
outtype(const type_t * tp)93 outtype(const type_t *tp)
94 {
95 	/* Available letters: ------GH--K-MNO--R--U-W-YZ */
96 #ifdef INT128_SIZE
97 	static const char tt[NTSPEC] = "???BCCCSSIILLQQJJDDD?XXXVTTTPAF";
98 	static const char ss[NTSPEC] = "???  su u u u u us l?s l sue   ";
99 #else
100 	static const char tt[NTSPEC] = "???BCCCSSIILLQQDDD?XXXVTTTPAF";
101 	static const char ss[NTSPEC] = "???  su u u u us l?s l sue   ";
102 #endif
103 	int na;
104 	sym_t *arg;
105 	tspec_t ts;
106 
107 	while (tp != NULL) {
108 		if ((ts = tp->t_tspec) == INT && tp->t_is_enum)
109 			ts = ENUM;
110 		lint_assert(tt[ts] != '?' && ss[ts] != '?');
111 		if (tp->t_const)
112 			outchar('c');
113 		if (tp->t_volatile)
114 			outchar('v');
115 		if (ss[ts] != ' ')
116 			outchar(ss[ts]);
117 		outchar(tt[ts]);
118 
119 		if (ts == ARRAY) {
120 			outint(tp->t_dim);
121 		} else if (ts == ENUM) {
122 			outtt(tp->t_enum->en_tag, tp->t_enum->en_first_typedef);
123 		} else if (is_struct_or_union(ts)) {
124 			outtt(tp->t_sou->sou_tag, tp->t_sou->sou_first_typedef);
125 		} else if (ts == FUNC && tp->t_proto) {
126 			na = 0;
127 			for (arg = tp->t_args; arg != NULL; arg = arg->s_next)
128 				na++;
129 			if (tp->t_vararg)
130 				na++;
131 			outint(na);
132 			for (arg = tp->t_args; arg != NULL; arg = arg->s_next)
133 				outtype(arg->s_type);
134 			if (tp->t_vararg)
135 				outchar('E');
136 		}
137 		tp = tp->t_subt;
138 	}
139 }
140 
141 /*
142  * write the name of a tag or typename
143  *
144  * if the tag is named, the name of the tag is written,
145  * otherwise, if a typename exists which refers to this tag,
146  * this typename is written
147  */
148 static void
outtt(sym_t * tag,sym_t * tdef)149 outtt(sym_t *tag, sym_t *tdef)
150 {
151 
152 	/* 0 is no longer used. */
153 
154 	if (tag->s_name != unnamed) {
155 		outint(1);
156 		outname(tag->s_name);
157 	} else if (tdef != NULL) {
158 		outint(2);
159 		outname(tdef->s_name);
160 	} else {
161 		outint(3);
162 		outint(tag->s_def_pos.p_line);
163 		outchar('.');
164 		outint(get_filename_id(tag->s_def_pos.p_file));
165 		outchar('.');
166 		outint(tag->s_def_pos.p_uniq);
167 	}
168 }
169 
170 /*
171  * write information about a globally declared/defined symbol
172  * with storage class extern
173  *
174  * information about function definitions are written in outfdef(),
175  * not here
176  */
177 void
outsym(const sym_t * sym,scl_t sc,def_t def)178 outsym(const sym_t *sym, scl_t sc, def_t def)
179 {
180 
181 	/*
182 	 * Static function declarations must also be written to the output
183 	 * file. Compatibility of function declarations (for both static
184 	 * and extern functions) must be checked in lint2. Lint1 can't do
185 	 * this, especially not if functions are declared at block level
186 	 * before their first declaration at level 0.
187 	 */
188 	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
189 		return;
190 	if (ch_isdigit(sym->s_name[0]))	/* 00000000_tmp */
191 		return;
192 
193 	/* reset buffer */
194 	outclr();
195 
196 	outint(csrc_pos.p_line);
197 	outchar('d');		/* declaration */
198 	outint(get_filename_id(sym->s_def_pos.p_file));
199 	outchar('.');
200 	outint(sym->s_def_pos.p_line);
201 
202 	/* flags */
203 
204 	if (def == DEF)
205 		outchar('d');	/* defined */
206 	else if (def == TDEF)
207 		outchar('t');	/* tentative defined */
208 	else {
209 		lint_assert(def == DECL);
210 		outchar('e');	/* declared */
211 	}
212 
213 	if (llibflg && def != DECL) {
214 		/*
215 		 * mark it as used so lint2 does not complain about
216 		 * unused symbols in libraries
217 		 */
218 		outchar('u');
219 	}
220 
221 	if (sc == STATIC)
222 		outchar('s');
223 
224 	/* name of the symbol */
225 	outname(sym->s_name);
226 
227 	/* renamed name of symbol, if necessary */
228 	if (sym->s_rename != NULL) {
229 		outchar('r');
230 		outname(sym->s_rename);
231 	}
232 
233 	/* type of the symbol */
234 	outtype(sym->s_type);
235 }
236 
237 /*
238  * Write information about a function definition. This is also done for static
239  * functions, to later check if they are called with proper argument types.
240  */
241 void
outfdef(const sym_t * fsym,const pos_t * posp,bool rval,bool osdef,const sym_t * args)242 outfdef(const sym_t *fsym, const pos_t *posp, bool rval, bool osdef,
243 	const sym_t *args)
244 {
245 	int narg;
246 	const sym_t *arg;
247 
248 	/* reset the buffer */
249 	outclr();
250 
251 	if (posp->p_file == csrc_pos.p_file) {
252 		outint(posp->p_line);
253 	} else {
254 		outint(csrc_pos.p_line);
255 	}
256 	outchar('d');		/* declaration */
257 	outint(get_filename_id(posp->p_file));
258 	outchar('.');
259 	outint(posp->p_line);
260 
261 	/* flags */
262 
263 	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
264 	if (printflike_argnum != -1) {
265 		nvararg = printflike_argnum;
266 	} else if (scanflike_argnum != -1) {
267 		nvararg = scanflike_argnum;
268 	}
269 
270 	if (nvararg != -1) {
271 		outchar('v');
272 		outint(nvararg);
273 	}
274 	if (scanflike_argnum != -1) {
275 		outchar('S');
276 		outint(scanflike_argnum);
277 	}
278 	if (printflike_argnum != -1) {
279 		outchar('P');
280 		outint(printflike_argnum);
281 	}
282 	nvararg = printflike_argnum = scanflike_argnum = -1;
283 
284 	outchar('d');
285 
286 	if (rval)
287 		outchar('r');	/* has return value */
288 
289 	if (llibflg)
290 		/*
291 		 * mark it as used so lint2 does not complain about
292 		 * unused symbols in libraries
293 		 */
294 		outchar('u');
295 
296 	if (osdef)
297 		outchar('o');	/* old-style function definition */
298 
299 	if (fsym->s_inline)
300 		outchar('i');
301 
302 	if (fsym->s_scl == STATIC)
303 		outchar('s');
304 
305 	/* name of function */
306 	outname(fsym->s_name);
307 
308 	/* renamed name of function, if necessary */
309 	if (fsym->s_rename != NULL) {
310 		outchar('r');
311 		outname(fsym->s_rename);
312 	}
313 
314 	/* argument types and return value */
315 	if (osdef) {
316 		narg = 0;
317 		for (arg = args; arg != NULL; arg = arg->s_next)
318 			narg++;
319 		outchar('f');
320 		outint(narg);
321 		for (arg = args; arg != NULL; arg = arg->s_next)
322 			outtype(arg->s_type);
323 		outtype(fsym->s_type->t_subt);
324 	} else {
325 		outtype(fsym->s_type);
326 	}
327 }
328 
329 /*
330  * write out all information necessary for lint2 to check function
331  * calls
332  *
333  * retval_used is set if the return value is used (assigned to a variable)
334  * retval_discarded is set if the return value is neither used nor ignored
335  * (that is, cast to void)
336  */
337 void
outcall(const tnode_t * tn,bool retval_used,bool retval_discarded)338 outcall(const tnode_t *tn, bool retval_used, bool retval_discarded)
339 {
340 	tnode_t *args, *arg;
341 	int narg, n, i;
342 	tspec_t t;
343 
344 	/* reset buffer */
345 	outclr();
346 
347 	outint(csrc_pos.p_line);
348 	outchar('c');		/* function call */
349 	outint(get_filename_id(curr_pos.p_file));
350 	outchar('.');
351 	outint(curr_pos.p_line);
352 
353 	/*
354 	 * flags; 'u' and 'i' must be last to make sure a letter
355 	 * is between the numeric argument of a flag and the name of
356 	 * the function
357 	 */
358 	narg = 0;
359 	args = tn->tn_right;
360 	for (arg = args; arg != NULL; arg = arg->tn_right)
361 		narg++;
362 	/* information about arguments */
363 	for (n = 1; n <= narg; n++) {
364 		/* the last argument is the top one in the tree */
365 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
366 			continue;
367 		arg = arg->tn_left;
368 		if (arg->tn_op == CON) {
369 			if (is_integer(t = arg->tn_type->t_tspec)) {
370 				/*
371 				 * XXX it would probably be better to
372 				 * explicitly test the sign
373 				 */
374 				int64_t si = arg->tn_val.u.integer;
375 				if (si == 0) {
376 					/* zero constant */
377 					outchar('z');
378 				} else if (!msb(si, t)) {
379 					/* positive if cast to signed */
380 					outchar('p');
381 				} else {
382 					/* negative if cast to signed */
383 					outchar('n');
384 				}
385 				outint(n);
386 			}
387 		} else if (arg->tn_op == ADDR &&
388 			   arg->tn_left->tn_op == STRING &&
389 			   arg->tn_left->tn_string->st_char) {
390 			/* constant string, write all format specifiers */
391 			outchar('s');
392 			outint(n);
393 			outfstrg(arg->tn_left->tn_string);
394 		}
395 
396 	}
397 	/* return value discarded/used/ignored */
398 	outchar((char)(retval_discarded ? 'd' : (retval_used ? 'u' : 'i')));
399 
400 	/* name of the called function */
401 	outname(tn->tn_left->tn_left->tn_sym->s_name);
402 
403 	/* types of arguments */
404 	outchar('f');
405 	outint(narg);
406 	for (n = 1; n <= narg; n++) {
407 		/* the last argument is the top one in the tree */
408 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
409 			continue;
410 		outtype(arg->tn_left->tn_type);
411 	}
412 	/* expected type of return value */
413 	outtype(tn->tn_type);
414 }
415 
416 /* write a character to the output buffer, quoted if necessary */
417 static void
outqchar(char c)418 outqchar(char c)
419 {
420 
421 	if (ch_isprint(c) && c != '\\' && c != '"' && c != '\'') {
422 		outchar(c);
423 		return;
424 	}
425 
426 	outchar('\\');
427 	switch (c) {
428 	case '\\':
429 		outchar('\\');
430 		break;
431 	case '"':
432 		outchar('"');
433 		break;
434 	case '\'':
435 		outchar('\'');
436 		break;
437 	case '\b':
438 		outchar('b');
439 		break;
440 	case '\t':
441 		outchar('t');
442 		break;
443 	case '\n':
444 		outchar('n');
445 		break;
446 	case '\f':
447 		outchar('f');
448 		break;
449 	case '\r':
450 		outchar('r');
451 		break;
452 	case '\v':
453 		outchar('v');
454 		break;
455 	case '\a':
456 		outchar('a');
457 		break;
458 	default:
459 		outchar((char)((((unsigned char)c >> 6) & 07) + '0'));
460 		outchar((char)((((unsigned char)c >> 3) & 07) + '0'));
461 		outchar((char)((c & 07) + '0'));
462 		break;
463 	}
464 }
465 
466 /*
467  * extracts potential format specifiers for printf() and scanf() and
468  * writes them, enclosed in "" and quoted if necessary, to the output buffer
469  */
470 static void
outfstrg(strg_t * strg)471 outfstrg(strg_t *strg)
472 {
473 	char c, oc;
474 	bool first;
475 	const char *cp;
476 
477 	lint_assert(strg->st_char);
478 	cp = strg->st_mem;
479 
480 	outchar('"');
481 
482 	c = *cp++;
483 
484 	while (c != '\0') {
485 
486 		if (c != '%') {
487 			c = *cp++;
488 			continue;
489 		}
490 
491 		outchar('%');
492 		c = *cp++;
493 
494 		/* flags for printf and scanf and *-fieldwidth for printf */
495 		while (c == '-' || c == '+' || c == ' ' ||
496 		       c == '#' || c == '0' || c == '*') {
497 			outchar(c);
498 			c = *cp++;
499 		}
500 
501 		/* numeric field width */
502 		while (ch_isdigit(c)) {
503 			outchar(c);
504 			c = *cp++;
505 		}
506 
507 		/* precision for printf */
508 		if (c == '.') {
509 			outchar(c);
510 			c = *cp++;
511 			if (c == '*') {
512 				outchar(c);
513 				c = *cp++;
514 			} else {
515 				while (ch_isdigit(c)) {
516 					outchar(c);
517 					c = *cp++;
518 				}
519 			}
520 		}
521 
522 		/* h, l, L and q flags for printf and scanf */
523 		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
524 			outchar(c);
525 			c = *cp++;
526 		}
527 
528 		/*
529 		 * The last character. It is always written, so we can detect
530 		 * invalid format specifiers.
531 		 */
532 		if (c != '\0') {
533 			outqchar(c);
534 			oc = c;
535 			c = *cp++;
536 			/*
537 			 * handle [ for scanf. [-] means that a minus sign
538 			 * was found at an undefined position.
539 			 */
540 			if (oc == '[') {
541 				if (c == '^')
542 					c = *cp++;
543 				if (c == ']')
544 					c = *cp++;
545 				first = true;
546 				while (c != '\0' && c != ']') {
547 					if (c == '-') {
548 						if (!first && *cp != ']')
549 							outchar(c);
550 					}
551 					first = false;
552 					c = *cp++;
553 				}
554 				if (c == ']') {
555 					outchar(c);
556 					c = *cp++;
557 				}
558 			}
559 		}
560 
561 	}
562 
563 	outchar('"');
564 }
565 
566 /*
567  * writes a record if sym was used
568  */
569 void
outusg(const sym_t * sym)570 outusg(const sym_t *sym)
571 {
572 	if (ch_isdigit(sym->s_name[0]))	/* 00000000_tmp, from mktempsym */
573 		return;
574 
575 	/* reset buffer */
576 	outclr();
577 
578 	outint(csrc_pos.p_line);
579 	outchar('u');		/* used */
580 	outint(get_filename_id(curr_pos.p_file));
581 	outchar('.');
582 	outint(curr_pos.p_line);
583 
584 	/* necessary to delimit both numbers */
585 	outchar('x');
586 
587 	outname(sym->s_name);
588 }
589