xref: /netbsd/usr.bin/xlint/lint1/emit1.c (revision 6550d01e)
1 /* $NetBSD: emit1.c,v 1.19 2008/09/26 22:52:24 matt Exp $ */
2 
3 /*
4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5  * Copyright (c) 1994, 1995 Jochen Pohl
6  * All Rights Reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Jochen Pohl for
19  *	The NetBSD Project.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
38 
39 #include <sys/cdefs.h>
40 #if defined(__RCSID) && !defined(lint)
41 __RCSID("$NetBSD: emit1.c,v 1.19 2008/09/26 22:52:24 matt Exp $");
42 #endif
43 
44 #include <ctype.h>
45 
46 #include "lint1.h"
47 
48 static	void	outtt(sym_t *, sym_t *);
49 static	void	outfstrg(strg_t *);
50 
51 /*
52  * Write type into the output buffer.
53  * The type is written as a sequence of substrings, each of which describes a
54  * node of type type_t
55  * a node is coded as follows:
56  *	_Bool			B
57  *	_Complex float		s X
58  *	_Complex double		X
59  *	_Complex long double	l X
60  *	char			C
61  *	signed char		s C
62  *	unsigned char		u C
63  *	short			S
64  *	unsigned short		u S
65  *	int			I
66  *	unsigned int		u I
67  *	long			L
68  *	unsigned long		u L
69  *	long long		Q
70  *	unsigned long long	u Q
71  *	float			s D
72  *	double			D
73  *	long double		l D
74  *	void			V
75  *	*			P
76  *	[n]			A n
77  *	()			F
78  *	(void)			F 0
79  *	(n arguments)		F n arg1 arg2 ... argn
80  *	(n arguments, ...)	F n arg1 arg2 ... argn-1 E
81  *	(a, b, c, ...)		f n arg1 arg2 ...
82  *	enum tag		e T tag_or_typename
83  *	struct tag		s T tag_or_typename
84  *	union tag		u T tag_or_typename
85  *
86  *	tag_or_typename		0			no tag or type name
87  *				1 n tag			Tag
88  *				2 n typename		only type name
89  *
90  * spaces are only for better readability
91  * additionaly it is possible to prepend the characters 'c' (for const)
92  * and 'v' (for volatile)
93  */
94 void
95 outtype(type_t *tp)
96 {
97 	int	t, s, na;
98 	sym_t	*arg;
99 	tspec_t	ts;
100 
101 	while (tp != NULL) {
102 		if ((ts = tp->t_tspec) == INT && tp->t_isenum)
103 			ts = ENUM;
104 		switch (ts) {
105 		case BOOL:	t = 'B';	s = '\0';	break;
106 		case CHAR:	t = 'C';	s = '\0';	break;
107 		case SCHAR:	t = 'C';	s = 's';	break;
108 		case UCHAR:	t = 'C';	s = 'u';	break;
109 		case SHORT:	t = 'S';	s = '\0';	break;
110 		case USHORT:	t = 'S';	s = 'u';	break;
111 		case INT:	t = 'I';	s = '\0';	break;
112 		case UINT:	t = 'I';	s = 'u';	break;
113 		case LONG:	t = 'L';	s = '\0';	break;
114 		case ULONG:	t = 'L';	s = 'u';	break;
115 		case QUAD:	t = 'Q';	s = '\0';	break;
116 		case UQUAD:	t = 'Q';	s = 'u';	break;
117 		case FLOAT:	t = 'D';	s = 's';	break;
118 		case DOUBLE:	t = 'D';	s = '\0';	break;
119 		case LDOUBLE:	t = 'D';	s = 'l';	break;
120 		case VOID:	t = 'V';	s = '\0';	break;
121 		case PTR:	t = 'P';	s = '\0';	break;
122 		case ARRAY:	t = 'A';	s = '\0';	break;
123 		case FUNC:	t = 'F';	s = '\0';	break;
124 		case ENUM:	t = 'T';	s = 'e';	break;
125 		case STRUCT:	t = 'T';	s = 's';	break;
126 		case UNION:	t = 'T';	s = 'u';	break;
127 		case FCOMPLEX:	t = 'X';	s = 's';	break;
128 		case DCOMPLEX:	t = 'X';	s = '\0';	break;
129 		case LCOMPLEX:	t = 'X';	s = 'l';	break;
130 		default:
131 			LERROR("outtyp()");
132 		}
133 		if (tp->t_const)
134 			outchar('c');
135 		if (tp->t_volatile)
136 			outchar('v');
137 		if (s != '\0')
138 			outchar(s);
139 		outchar(t);
140 		if (ts == ARRAY) {
141 			outint(tp->t_dim);
142 		} else if (ts == ENUM) {
143 			outtt(tp->t_enum->etag, tp->t_enum->etdef);
144 		} else if (ts == STRUCT || ts == UNION) {
145 			outtt(tp->t_str->stag, tp->t_str->stdef);
146 		} else if (ts == FUNC && tp->t_proto) {
147 			na = 0;
148 			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
149 					na++;
150 			if (tp->t_vararg)
151 				na++;
152 			outint(na);
153 			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
154 				outtype(arg->s_type);
155 			if (tp->t_vararg)
156 				outchar('E');
157 		}
158 		tp = tp->t_subt;
159 	}
160 }
161 
162 /*
163  * type to string
164  * used for debugging output
165  *
166  * it uses its own output buffer for conversion
167  */
168 const char *
169 ttos(type_t *tp)
170 {
171 	static	ob_t	tob;
172 	ob_t	tmp;
173 
174 	if (tob.o_buf == NULL) {
175 		tob.o_len = 64;
176 		tob.o_buf = tob.o_nxt = xmalloc(tob.o_len);
177 		tob.o_end = tob.o_buf + tob.o_len;
178 	}
179 
180 	tmp = ob;
181 	ob = tob;
182 	ob.o_nxt = ob.o_buf;
183 	outtype(tp);
184 	outchar('\0');
185 	tob = ob;
186 	ob = tmp;
187 
188 	return (tob.o_buf);
189 }
190 
191 /*
192  * write the name of a tag or typename
193  *
194  * if the tag is named, the name of the
195  * tag is written, otherwise, if a typename exists which
196  * refers to this tag, this typename is written
197  */
198 static void
199 outtt(sym_t *tag, sym_t *tdef)
200 {
201 
202 	/*
203 	 * 0 is no longer used.
204 	 */
205 	if (tag->s_name != unnamed) {
206 		outint(1);
207 		outname(tag->s_name);
208 	} else if (tdef != NULL) {
209 		outint(2);
210 		outname(tdef->s_name);
211 	} else {
212 		outint(3);
213 		outint(tag->s_dpos.p_line);
214 		outchar('.');
215 		outint(getfnid(tag->s_dpos.p_file));
216 		outchar('.');
217 		outint(tag->s_dpos.p_uniq);
218 	}
219 }
220 
221 /*
222  * write information about an global declared/defined symbol
223  * with storage class extern
224  *
225  * informations about function definitions are written in outfdef(),
226  * not here
227  */
228 void
229 outsym(sym_t *sym, scl_t sc, def_t def)
230 {
231 
232 	/*
233 	 * Static function declarations must also be written to the output
234 	 * file. Compatibility of function declarations (for both static
235 	 * and extern functions) must be checked in lint2. Lint1 can't do
236 	 * this, especially not, if functions are declared at block level
237 	 * before their first declaration at level 0.
238 	 */
239 	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
240 		return;
241 
242 	/* reset buffer */
243 	outclr();
244 
245 	/*
246 	 * line number of .c source, 'd' for declaration, Id of current
247 	 * source (.c or .h), and line in current source.
248 	 */
249 	outint(csrc_pos.p_line);
250 	outchar('d');
251 	outint(getfnid(sym->s_dpos.p_file));
252 	outchar('.');
253 	outint(sym->s_dpos.p_line);
254 
255 	/* flags */
256 
257 	switch (def) {
258 	case DEF:
259 		/* defined */
260 		outchar('d');
261 		break;
262 	case TDEF:
263 		/* tentative defined */
264 		outchar('t');
265 		break;
266 	case DECL:
267 		/* declared */
268 		outchar('e');
269 		break;
270 	default:
271 		LERROR("outsym()");
272 	}
273 	if (llibflg && def != DECL) {
274 		/*
275 		 * mark it as used so we get no warnings from lint2 about
276 		 * unused symbols in libraries.
277 		 */
278 		outchar('u');
279 	}
280 
281 	if (sc == STATIC)
282 		outchar('s');
283 
284 	/* name of the symbol */
285 	outname(sym->s_name);
286 
287 	/* renamed name of symbol, if necessary */
288 	if (sym->s_rename) {
289 		outchar('r');
290 		outname(sym->s_rename);
291 	}
292 
293 	/* type of the symbol */
294 	outtype(sym->s_type);
295 }
296 
297 /*
298  * write information about function definition
299  *
300  * this is also done for static functions so we are able to check if
301  * they are called with proper argument types
302  */
303 void
304 outfdef(sym_t *fsym, pos_t *posp, int rval, int osdef, sym_t *args)
305 {
306 	int	narg;
307 	sym_t	*arg;
308 
309 	/* reset the buffer */
310 	outclr();
311 
312 	/*
313 	 * line number of .c source, 'd' for declaration, Id of current
314 	 * source (.c or .h), and line in current source
315 	 *
316 	 * we are already at the end of the function. If we are in the
317 	 * .c source, posp->p_line is correct, otherwise csrc_pos.p_line
318 	 * (for functions defined in header files).
319 	 */
320 	if (posp->p_file == csrc_pos.p_file) {
321 		outint(posp->p_line);
322 	} else {
323 		outint(csrc_pos.p_line);
324 	}
325 	outchar('d');
326 	outint(getfnid(posp->p_file));
327 	outchar('.');
328 	outint(posp->p_line);
329 
330 	/* flags */
331 
332 	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
333 	if (prflstrg != -1) {
334 		nvararg = prflstrg;
335 	} else if (scflstrg != -1) {
336 		nvararg = scflstrg;
337 	}
338 
339 	if (nvararg != -1) {
340 		outchar('v');
341 		outint(nvararg);
342 	}
343 	if (scflstrg != -1) {
344 		outchar('S');
345 		outint(scflstrg);
346 	}
347 	if (prflstrg != -1) {
348 		outchar('P');
349 		outint(prflstrg);
350 	}
351 	nvararg = prflstrg = scflstrg = -1;
352 
353 	outchar('d');
354 
355 	if (rval)
356 		/* has return value */
357 		outchar('r');
358 
359 	if (llibflg)
360 		/*
361 		 * mark it as used so lint2 does not complain about
362 		 * unused symbols in libraries
363 		 */
364 		outchar('u');
365 
366 	if (osdef)
367 		/* old style function definition */
368 		outchar('o');
369 
370 	if (fsym->s_scl == STATIC)
371 		outchar('s');
372 
373 	/* name of function */
374 	outname(fsym->s_name);
375 
376 	/* renamed name of function, if necessary */
377 	if (fsym->s_rename) {
378 		outchar('r');
379 		outname(fsym->s_rename);
380 	}
381 
382 	/* argument types and return value */
383 	if (osdef) {
384 		narg = 0;
385 		for (arg = args; arg != NULL; arg = arg->s_nxt)
386 			narg++;
387 		outchar('f');
388 		outint(narg);
389 		for (arg = args; arg != NULL; arg = arg->s_nxt)
390 			outtype(arg->s_type);
391 		outtype(fsym->s_type->t_subt);
392 	} else {
393 		outtype(fsym->s_type);
394 	}
395 }
396 
397 /*
398  * write out all information necessary for lint2 to check function
399  * calls
400  *
401  * rvused is set if the return value is used (asigned to a variable)
402  * rvdisc is set if the return value is not used and not ignored
403  * (casted to void)
404  */
405 void
406 outcall(tnode_t *tn, int rvused, int rvdisc)
407 {
408 	tnode_t	*args, *arg;
409 	int	narg, n, i;
410 	int64_t	q;
411 	tspec_t	t;
412 
413 	/* reset buffer */
414 	outclr();
415 
416 	/*
417 	 * line number of .c source, 'c' for function call, Id of current
418 	 * source (.c or .h), and line in current source
419 	 */
420 	outint(csrc_pos.p_line);
421 	outchar('c');
422 	outint(getfnid(curr_pos.p_file));
423 	outchar('.');
424 	outint(curr_pos.p_line);
425 
426 	/*
427 	 * flags; 'u' and 'i' must be last to make sure a letter
428 	 * is between the numeric argument of a flag and the name of
429 	 * the function
430 	 */
431 	narg = 0;
432 	args = tn->tn_right;
433 	for (arg = args; arg != NULL; arg = arg->tn_right)
434 		narg++;
435 	/* informations about arguments */
436 	for (n = 1; n <= narg; n++) {
437 		/* the last argument is the top one in the tree */
438 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
439 			continue;
440 		arg = arg->tn_left;
441 		if (arg->tn_op == CON) {
442 			if (isityp(t = arg->tn_type->t_tspec)) {
443 				/*
444 				 * XXX it would probably be better to
445 				 * explicitly test the sign
446 				 */
447 				if ((q = arg->tn_val->v_quad) == 0) {
448 					/* zero constant */
449 					outchar('z');
450 				} else if (msb(q, t, 0) == 0) {
451 					/* positive if casted to signed */
452 					outchar('p');
453 				} else {
454 					/* negative if casted to signed */
455 					outchar('n');
456 				}
457 				outint(n);
458 			}
459 		} else if (arg->tn_op == AMPER &&
460 			   arg->tn_left->tn_op == STRING &&
461 			   arg->tn_left->tn_strg->st_tspec == CHAR) {
462 			/* constant string, write all format specifiers */
463 			outchar('s');
464 			outint(n);
465 			outfstrg(arg->tn_left->tn_strg);
466 		}
467 
468 	}
469 	/* return value discarded/used/ignored */
470 	outchar(rvdisc ? 'd' : (rvused ? 'u' : 'i'));
471 
472 	/* name of the called function */
473 	outname(tn->tn_left->tn_left->tn_sym->s_name);
474 
475 	/* types of arguments */
476 	outchar('f');
477 	outint(narg);
478 	for (n = 1; n <= narg; n++) {
479 		/* the last argument is the top one in the tree */
480 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
481 			continue;
482 		outtype(arg->tn_left->tn_type);
483 	}
484 	/* expected type of return value */
485 	outtype(tn->tn_type);
486 }
487 
488 /*
489  * extracts potential format specifiers for printf() and scanf() and
490  * writes them, enclosed in "" and qouted if necessary, to the output buffer
491  */
492 static void
493 outfstrg(strg_t *strg)
494 {
495 	int	c, oc, first;
496 	u_char	*cp;
497 
498 	if (strg->st_tspec != CHAR)
499 		LERROR("outfstrg()");
500 
501 	cp = strg->st_cp;
502 
503 	outchar('"');
504 
505 	c = *cp++;
506 
507 	while (c != '\0') {
508 
509 		if (c != '%') {
510 			c = *cp++;
511 			continue;
512 		}
513 
514 		outqchar('%');
515 		c = *cp++;
516 
517 		/* flags for printf and scanf and *-fieldwidth for printf */
518 		while (c != '\0' && (c == '-' || c == '+' || c == ' ' ||
519 				     c == '#' || c == '0' || c == '*')) {
520 			outqchar(c);
521 			c = *cp++;
522 		}
523 
524 		/* numeric field width */
525 		while (c != '\0' && isdigit(c)) {
526 			outqchar(c);
527 			c = *cp++;
528 		}
529 
530 		/* precision for printf */
531 		if (c == '.') {
532 			outqchar(c);
533 			if ((c = *cp++) == '*') {
534 				outqchar(c);
535 				c = *cp++;
536 			} else {
537 				while (c != '\0' && isdigit(c)) {
538 					outqchar(c);
539 					c = *cp++;
540 				}
541 			}
542 		}
543 
544 		/* h, l, L and q flags fpr printf and scanf */
545 		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
546 			outqchar(c);
547 			c = *cp++;
548 		}
549 
550 		/*
551 		 * The last character. It is always written so we can detect
552 		 * invalid format specifiers.
553 		 */
554 		if (c != '\0') {
555 			outqchar(c);
556 			oc = c;
557 			c = *cp++;
558 			/*
559 			 * handle [ for scanf. [-] means that a minus sign
560 			 * was found at an undefined position.
561 			 */
562 			if (oc == '[') {
563 				if (c == '^')
564 					c = *cp++;
565 				if (c == ']')
566 					c = *cp++;
567 				first = 1;
568 				while (c != '\0' && c != ']') {
569 					if (c == '-') {
570 						if (!first && *cp != ']')
571 							outqchar(c);
572 					}
573 					first = 0;
574 					c = *cp++;
575 				}
576 				if (c == ']') {
577 					outqchar(c);
578 					c = *cp++;
579 				}
580 			}
581 		}
582 
583 	}
584 
585 	outchar('"');
586 }
587 
588 /*
589  * writes a record if sym was used
590  */
591 void
592 outusg(sym_t *sym)
593 {
594 	/* reset buffer */
595 	outclr();
596 
597 	/*
598 	 * line number of .c source, 'u' for used, Id of current
599 	 * source (.c or .h), and line in current source
600 	 */
601 	outint(csrc_pos.p_line);
602 	outchar('u');
603 	outint(getfnid(curr_pos.p_file));
604 	outchar('.');
605 	outint(curr_pos.p_line);
606 
607 	/* necessary to delimit both numbers */
608 	outchar('x');
609 
610 	/* Den Namen des Symbols ausgeben */
611 	outname(sym->s_name);
612 }
613