xref: /netbsd/usr.bin/xlint/lint1/emit1.c (revision bf9ec67e)
1 /* $NetBSD: emit1.c,v 1.12 2002/02/05 03:04:27 thorpej Exp $ */
2 
3 /*
4  * Copyright (c) 1996 Christopher G. Demetriou.  All Rights Reserved.
5  * Copyright (c) 1994, 1995 Jochen Pohl
6  * All Rights Reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Jochen Pohl for
19  *	The NetBSD Project.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #if defined(__RCSID) && !defined(lint)
37 __RCSID("$NetBSD: emit1.c,v 1.12 2002/02/05 03:04:27 thorpej Exp $");
38 #endif
39 
40 #include <ctype.h>
41 
42 #include "lint1.h"
43 
44 static	void	outtt(sym_t *, sym_t *);
45 static	void	outfstrg(strg_t *);
46 
47 /*
48  * Write type into the output buffer.
49  * The type is written as a sequence of substrings, each of which describes a
50  * node of type type_t
51  * a node is coded as follows:
52  *	char			C
53  *	signed char		s C
54  *	unsigned char		u C
55  *	short			S
56  *	unsigned short		u S
57  *	int			I
58  *	unsigned int		u I
59  *	long			L
60  *	unsigned long		u L
61  *	long long		Q
62  *	unsigned long long	u Q
63  *	float			s D
64  *	double			D
65  *	long double		l D
66  *	void			V
67  *	*			P
68  *	[n]			A n
69  *	()			F
70  *	(void)			F 0
71  *	(n arguments)		F n arg1 arg2 ... argn
72  *	(n arguments, ...)	F n arg1 arg2 ... argn-1 E
73  *	(a, b, c, ...)		f n arg1 arg2 ...
74  *	enum tag		e T tag_or_typename
75  *	struct tag		s T tag_or_typename
76  *	union tag		u T tag_or_typename
77  *
78  *	tag_or_typename		0			no tag or type name
79  *				1 n tag			Tag
80  *				2 n typename		only type name
81  *
82  * spaces are only for better readability
83  * additionaly it is possible to prepend the characters 'c' (for const)
84  * and 'v' (for volatile)
85  */
86 void
87 outtype(type_t *tp)
88 {
89 	int	t, s, na;
90 	sym_t	*arg;
91 	tspec_t	ts;
92 
93 	while (tp != NULL) {
94 		if ((ts = tp->t_tspec) == INT && tp->t_isenum)
95 			ts = ENUM;
96 		switch (ts) {
97 		case CHAR:	t = 'C';	s = '\0';	break;
98 		case SCHAR:	t = 'C';	s = 's';	break;
99 		case UCHAR:	t = 'C';	s = 'u';	break;
100 		case SHORT:	t = 'S';	s = '\0';	break;
101 		case USHORT:	t = 'S';	s = 'u';	break;
102 		case INT:	t = 'I';	s = '\0';	break;
103 		case UINT:	t = 'I';	s = 'u';	break;
104 		case LONG:	t = 'L';	s = '\0';	break;
105 		case ULONG:	t = 'L';	s = 'u';	break;
106 		case QUAD:	t = 'Q';	s = '\0';	break;
107 		case UQUAD:	t = 'Q';	s = 'u';	break;
108 		case FLOAT:	t = 'D';	s = 's';	break;
109 		case DOUBLE:	t = 'D';	s = '\0';	break;
110 		case LDOUBLE:	t = 'D';	s = 'l';	break;
111 		case VOID:	t = 'V';	s = '\0';	break;
112 		case PTR:	t = 'P';	s = '\0';	break;
113 		case ARRAY:	t = 'A';	s = '\0';	break;
114 		case FUNC:	t = 'F';	s = '\0';	break;
115 		case ENUM:	t = 'T';	s = 'e';	break;
116 		case STRUCT:	t = 'T';	s = 's';	break;
117 		case UNION:	t = 'T';	s = 'u';	break;
118 		default:
119 			lerror("outtyp() 1");
120 		}
121 		if (tp->t_const)
122 			outchar('c');
123 		if (tp->t_volatile)
124 			outchar('v');
125 		if (s != '\0')
126 			outchar(s);
127 		outchar(t);
128 		if (ts == ARRAY) {
129 			outint(tp->t_dim);
130 		} else if (ts == ENUM) {
131 			outtt(tp->t_enum->etag, tp->t_enum->etdef);
132 		} else if (ts == STRUCT || ts == UNION) {
133 			outtt(tp->t_str->stag, tp->t_str->stdef);
134 		} else if (ts == FUNC && tp->t_proto) {
135 			na = 0;
136 			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
137 					na++;
138 			if (tp->t_vararg)
139 				na++;
140 			outint(na);
141 			for (arg = tp->t_args; arg != NULL; arg = arg->s_nxt)
142 				outtype(arg->s_type);
143 			if (tp->t_vararg)
144 				outchar('E');
145 		}
146 		tp = tp->t_subt;
147 	}
148 }
149 
150 /*
151  * type to string
152  * used for debugging output
153  *
154  * it uses its own output buffer for conversion
155  */
156 const char *
157 ttos(type_t *tp)
158 {
159 	static	ob_t	tob;
160 	ob_t	tmp;
161 
162 	if (tob.o_buf == NULL) {
163 		tob.o_len = 64;
164 		tob.o_buf = tob.o_nxt = xmalloc(tob.o_len);
165 		tob.o_end = tob.o_buf + tob.o_len;
166 	}
167 
168 	tmp = ob;
169 	ob = tob;
170 	ob.o_nxt = ob.o_buf;
171 	outtype(tp);
172 	outchar('\0');
173 	tob = ob;
174 	ob = tmp;
175 
176 	return (tob.o_buf);
177 }
178 
179 /*
180  * write the name of a tag or typename
181  *
182  * if the tag is named, the name of the
183  * tag is written, otherwise, if a typename exists which
184  * refers to this tag, this typename is written
185  */
186 static void
187 outtt(sym_t *tag, sym_t *tdef)
188 {
189 
190 	/*
191 	 * 0 is no longer used.
192 	 */
193 	if (tag->s_name != unnamed) {
194 		outint(1);
195 		outname(tag->s_name);
196 	} else if (tdef != NULL) {
197 		outint(2);
198 		outname(tdef->s_name);
199 	} else {
200 		outint(3);
201 		outint(tag->s_dpos.p_line);
202 		outchar('.');
203 		outint(getfnid(tag->s_dpos.p_file));
204 		outchar('.');
205 		outint(tag->s_dpos.p_uniq);
206 	}
207 }
208 
209 /*
210  * write information about an global declared/defined symbol
211  * with storage class extern
212  *
213  * informations about function definitions are written in outfdef(),
214  * not here
215  */
216 void
217 outsym(sym_t *sym, scl_t sc, def_t def)
218 {
219 
220 	/*
221 	 * Static function declarations must also be written to the output
222 	 * file. Compatibility of function declarations (for both static
223 	 * and extern functions) must be checked in lint2. Lint1 can't do
224 	 * this, especially not, if functions are declared at block level
225 	 * before their first declaration at level 0.
226 	 */
227 	if (sc != EXTERN && !(sc == STATIC && sym->s_type->t_tspec == FUNC))
228 		return;
229 
230 	/* reset buffer */
231 	outclr();
232 
233 	/*
234 	 * line number of .c source, 'd' for declaration, Id of current
235 	 * source (.c or .h), and line in current source.
236 	 */
237 	outint(csrc_pos.p_line);
238 	outchar('d');
239 	outint(getfnid(sym->s_dpos.p_file));
240 	outchar('.');
241 	outint(sym->s_dpos.p_line);
242 
243 	/* flags */
244 
245 	switch (def) {
246 	case DEF:
247 		/* defined */
248 		outchar('d');
249 		break;
250 	case TDEF:
251 		/* tentative defined */
252 		outchar('t');
253 		break;
254 	case DECL:
255 		/* declared */
256 		outchar('e');
257 		break;
258 	default:
259 		lerror("outsym() 2");
260 	}
261 	if (llibflg && def != DECL) {
262 		/*
263 		 * mark it as used so we get no warnings from lint2 about
264 		 * unused symbols in libraries.
265 		 */
266 		outchar('u');
267 	}
268 
269 	if (sc == STATIC)
270 		outchar('s');
271 
272 	/* name of the symbol */
273 	outname(sym->s_name);
274 
275 	/* renamed name of symbol, if necessary */
276 	if (sym->s_rename) {
277 		outchar('r');
278 		outname(sym->s_rename);
279 	}
280 
281 	/* type of the symbol */
282 	outtype(sym->s_type);
283 }
284 
285 /*
286  * write information about function definition
287  *
288  * this is also done for static functions so we are able to check if
289  * they are called with proper argument types
290  */
291 void
292 outfdef(sym_t *fsym, pos_t *posp, int rval, int osdef, sym_t *args)
293 {
294 	int	narg;
295 	sym_t	*arg;
296 
297 	/* reset the buffer */
298 	outclr();
299 
300 	/*
301 	 * line number of .c source, 'd' for declaration, Id of current
302 	 * source (.c or .h), and line in current source
303 	 *
304 	 * we are already at the end of the function. If we are in the
305 	 * .c source, posp->p_line is correct, otherwise csrc_pos.p_line
306 	 * (for functions defined in header files).
307 	 */
308 	if (posp->p_file == csrc_pos.p_file) {
309 		outint(posp->p_line);
310 	} else {
311 		outint(csrc_pos.p_line);
312 	}
313 	outchar('d');
314 	outint(getfnid(posp->p_file));
315 	outchar('.');
316 	outint(posp->p_line);
317 
318 	/* flags */
319 
320 	/* both SCANFLIKE and PRINTFLIKE imply VARARGS */
321 	if (prflstrg != -1) {
322 		nvararg = prflstrg;
323 	} else if (scflstrg != -1) {
324 		nvararg = scflstrg;
325 	}
326 
327 	if (nvararg != -1) {
328 		outchar('v');
329 		outint(nvararg);
330 	}
331 	if (scflstrg != -1) {
332 		outchar('S');
333 		outint(scflstrg);
334 	}
335 	if (prflstrg != -1) {
336 		outchar('P');
337 		outint(prflstrg);
338 	}
339 	nvararg = prflstrg = scflstrg = -1;
340 
341 	outchar('d');
342 
343 	if (rval)
344 		/* has return value */
345 		outchar('r');
346 
347 	if (llibflg)
348 		/*
349 		 * mark it as used so lint2 does not complain about
350 		 * unused symbols in libraries
351 		 */
352 		outchar('u');
353 
354 	if (osdef)
355 		/* old style function definition */
356 		outchar('o');
357 
358 	if (fsym->s_scl == STATIC)
359 		outchar('s');
360 
361 	/* name of function */
362 	outname(fsym->s_name);
363 
364 	/* renamed name of function, if necessary */
365 	if (fsym->s_rename) {
366 		outchar('r');
367 		outname(fsym->s_rename);
368 	}
369 
370 	/* argument types and return value */
371 	if (osdef) {
372 		narg = 0;
373 		for (arg = args; arg != NULL; arg = arg->s_nxt)
374 			narg++;
375 		outchar('f');
376 		outint(narg);
377 		for (arg = args; arg != NULL; arg = arg->s_nxt)
378 			outtype(arg->s_type);
379 		outtype(fsym->s_type->t_subt);
380 	} else {
381 		outtype(fsym->s_type);
382 	}
383 }
384 
385 /*
386  * write out all information necessary for lint2 to check function
387  * calls
388  *
389  * rvused is set if the return value is used (asigned to a variable)
390  * rvdisc is set if the return value is not used and not ignored
391  * (casted to void)
392  */
393 void
394 outcall(tnode_t *tn, int rvused, int rvdisc)
395 {
396 	tnode_t	*args, *arg;
397 	int	narg, n, i;
398 	int64_t	q;
399 	tspec_t	t;
400 
401 	/* reset buffer */
402 	outclr();
403 
404 	/*
405 	 * line number of .c source, 'c' for function call, Id of current
406 	 * source (.c or .h), and line in current source
407 	 */
408 	outint(csrc_pos.p_line);
409 	outchar('c');
410 	outint(getfnid(curr_pos.p_file));
411 	outchar('.');
412 	outint(curr_pos.p_line);
413 
414 	/*
415 	 * flags; 'u' and 'i' must be last to make sure a letter
416 	 * is between the numeric argument of a flag and the name of
417 	 * the function
418 	 */
419 	narg = 0;
420 	args = tn->tn_right;
421 	for (arg = args; arg != NULL; arg = arg->tn_right)
422 		narg++;
423 	/* informations about arguments */
424 	for (n = 1; n <= narg; n++) {
425 		/* the last argument is the top one in the tree */
426 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
427 			continue;
428 		arg = arg->tn_left;
429 		if (arg->tn_op == CON) {
430 			if (isityp(t = arg->tn_type->t_tspec)) {
431 				/*
432 				 * XXX it would probably be better to
433 				 * explizitly test the sign
434 				 */
435 				if ((q = arg->tn_val->v_quad) == 0) {
436 					/* zero constant */
437 					outchar('z');
438 				} else if (msb(q, t, 0) == 0) {
439 					/* positive if casted to signed */
440 					outchar('p');
441 				} else {
442 					/* negative if casted to signed */
443 					outchar('n');
444 				}
445 				outint(n);
446 			}
447 		} else if (arg->tn_op == AMPER &&
448 			   arg->tn_left->tn_op == STRING &&
449 			   arg->tn_left->tn_strg->st_tspec == CHAR) {
450 			/* constant string, write all format specifiers */
451 			outchar('s');
452 			outint(n);
453 			outfstrg(arg->tn_left->tn_strg);
454 		}
455 
456 	}
457 	/* return value discarded/used/ignored */
458 	outchar(rvdisc ? 'd' : (rvused ? 'u' : 'i'));
459 
460 	/* name of the called function */
461 	outname(tn->tn_left->tn_left->tn_sym->s_name);
462 
463 	/* types of arguments */
464 	outchar('f');
465 	outint(narg);
466 	for (n = 1; n <= narg; n++) {
467 		/* the last argument is the top one in the tree */
468 		for (i = narg, arg = args; i > n; i--, arg = arg->tn_right)
469 			continue;
470 		outtype(arg->tn_left->tn_type);
471 	}
472 	/* expected type of return value */
473 	outtype(tn->tn_type);
474 }
475 
476 /*
477  * extracts potential format specifiers for printf() and scanf() and
478  * writes them, enclosed in "" and qouted if necessary, to the output buffer
479  */
480 static void
481 outfstrg(strg_t *strg)
482 {
483 	int	c, oc, first;
484 	u_char	*cp;
485 
486 	if (strg->st_tspec != CHAR)
487 		lerror("outfstrg() 1");
488 
489 	cp = strg->st_cp;
490 
491 	outchar('"');
492 
493 	c = *cp++;
494 
495 	while (c != '\0') {
496 
497 		if (c != '%') {
498 			c = *cp++;
499 			continue;
500 		}
501 
502 		outqchar('%');
503 		c = *cp++;
504 
505 		/* flags for printf and scanf and *-fieldwidth for printf */
506 		while (c != '\0' && (c == '-' || c == '+' || c == ' ' ||
507 				     c == '#' || c == '0' || c == '*')) {
508 			outqchar(c);
509 			c = *cp++;
510 		}
511 
512 		/* numeric field width */
513 		while (c != '\0' && isdigit(c)) {
514 			outqchar(c);
515 			c = *cp++;
516 		}
517 
518 		/* precision for printf */
519 		if (c == '.') {
520 			outqchar(c);
521 			if ((c = *cp++) == '*') {
522 				outqchar(c);
523 				c = *cp++;
524 			} else {
525 				while (c != '\0' && isdigit(c)) {
526 					outqchar(c);
527 					c = *cp++;
528 				}
529 			}
530 		}
531 
532 		/* h, l, L and q flags fpr printf and scanf */
533 		if (c == 'h' || c == 'l' || c == 'L' || c == 'q') {
534 			outqchar(c);
535 			c = *cp++;
536 		}
537 
538 		/*
539 		 * The last character. It is always written so we can detect
540 		 * invalid format specifiers.
541 		 */
542 		if (c != '\0') {
543 			outqchar(c);
544 			oc = c;
545 			c = *cp++;
546 			/*
547 			 * handle [ for scanf. [-] means that a minus sign
548 			 * was found at an undefined position.
549 			 */
550 			if (oc == '[') {
551 				if (c == '^')
552 					c = *cp++;
553 				if (c == ']')
554 					c = *cp++;
555 				first = 1;
556 				while (c != '\0' && c != ']') {
557 					if (c == '-') {
558 						if (!first && *cp != ']')
559 							outqchar(c);
560 					}
561 					first = 0;
562 					c = *cp++;
563 				}
564 				if (c == ']') {
565 					outqchar(c);
566 					c = *cp++;
567 				}
568 			}
569 		}
570 
571 	}
572 
573 	outchar('"');
574 }
575 
576 /*
577  * writes a record if sym was used
578  */
579 void
580 outusg(sym_t *sym)
581 {
582 	/* reset buffer */
583 	outclr();
584 
585 	/*
586 	 * line number of .c source, 'u' for used, Id of current
587 	 * source (.c or .h), and line in current source
588 	 */
589 	outint(csrc_pos.p_line);
590 	outchar('u');
591 	outint(getfnid(curr_pos.p_file));
592 	outchar('.');
593 	outint(curr_pos.p_line);
594 
595 	/* necessary to delimit both numbers */
596 	outchar('x');
597 
598 	/* Den Namen des Symbols ausgeben */
599 	outname(sym->s_name);
600 }
601