xref: /freebsd/usr.bin/ctags/C.c (revision d0b2dbfa)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1987, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #ifndef lint
34 static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
35 #endif
36 #endif
37 
38 #include <sys/cdefs.h>
39 #include <limits.h>
40 #include <stddef.h>
41 #include <stdio.h>
42 #include <string.h>
43 
44 #include "ctags.h"
45 
46 static int	func_entry(void);
47 static void	hash_entry(void);
48 static void	skip_string(int);
49 static int	str_entry(int);
50 
51 /*
52  * c_entries --
53  *	read .c and .h files and call appropriate routines
54  */
55 void
56 c_entries(void)
57 {
58 	int	c;			/* current character */
59 	int	level;			/* brace level */
60 	int	token;			/* if reading a token */
61 	int	t_def;			/* if reading a typedef */
62 	int	t_level;		/* typedef's brace level */
63 	char	*sp;			/* buffer pointer */
64 	char	tok[MAXTOKEN];		/* token buffer */
65 
66 	lineftell = ftell(inf);
67 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
68 	while (GETC(!=, EOF)) {
69 		switch (c) {
70 		/*
71 		 * Here's where it DOESN'T handle: {
72 		 *	foo(a)
73 		 *	{
74 		 *	#ifdef notdef
75 		 *		}
76 		 *	#endif
77 		 *		if (a)
78 		 *			puts("hello, world");
79 		 *	}
80 		 */
81 		case '{':
82 			++level;
83 			goto endtok;
84 		case '}':
85 			/*
86 			 * if level goes below zero, try and fix
87 			 * it, even though we've already messed up
88 			 */
89 			if (--level < 0)
90 				level = 0;
91 			goto endtok;
92 
93 		case '\n':
94 			SETLINE;
95 			/*
96 			 * the above 3 cases are similar in that they
97 			 * are special characters that also end tokens.
98 			 */
99 	endtok:			if (sp > tok) {
100 				*sp = EOS;
101 				token = YES;
102 				sp = tok;
103 			}
104 			else
105 				token = NO;
106 			continue;
107 
108 		/*
109 		 * We ignore quoted strings and character constants
110 		 * completely.
111 		 */
112 		case '"':
113 		case '\'':
114 			skip_string(c);
115 			break;
116 
117 		/*
118 		 * comments can be fun; note the state is unchanged after
119 		 * return, in case we found:
120 		 *	"foo() XX comment XX { int bar; }"
121 		 */
122 		case '/':
123 			if (GETC(==, '*') || c == '/') {
124 				skip_comment(c);
125 				continue;
126 			}
127 			(void)ungetc(c, inf);
128 			c = '/';
129 			goto storec;
130 
131 		/* hash marks flag #define's. */
132 		case '#':
133 			if (sp == tok) {
134 				hash_entry();
135 				break;
136 			}
137 			goto storec;
138 
139 		/*
140 		 * if we have a current token, parenthesis on
141 		 * level zero indicates a function.
142 		 */
143 		case '(':
144 			if (!level && token) {
145 				int	curline;
146 
147 				if (sp != tok)
148 					*sp = EOS;
149 				/*
150 				 * grab the line immediately, we may
151 				 * already be wrong, for example,
152 				 *	foo\n
153 				 *	(arg1,
154 				 */
155 				get_line();
156 				curline = lineno;
157 				if (func_entry()) {
158 					++level;
159 					pfnote(tok, curline);
160 				}
161 				break;
162 			}
163 			goto storec;
164 
165 		/*
166 		 * semi-colons indicate the end of a typedef; if we find a
167 		 * typedef we search for the next semi-colon of the same
168 		 * level as the typedef.  Ignoring "structs", they are
169 		 * tricky, since you can find:
170 		 *
171 		 *	"typedef long time_t;"
172 		 *	"typedef unsigned int u_int;"
173 		 *	"typedef unsigned int u_int [10];"
174 		 *
175 		 * If looking at a typedef, we save a copy of the last token
176 		 * found.  Then, when we find the ';' we take the current
177 		 * token if it starts with a valid token name, else we take
178 		 * the one we saved.  There's probably some reasonable
179 		 * alternative to this...
180 		 */
181 		case ';':
182 			if (t_def && level == t_level) {
183 				t_def = NO;
184 				get_line();
185 				if (sp != tok)
186 					*sp = EOS;
187 				pfnote(tok, lineno);
188 				break;
189 			}
190 			goto storec;
191 
192 		/*
193 		 * store characters until one that can't be part of a token
194 		 * comes along; check the current token against certain
195 		 * reserved words.
196 		 */
197 		default:
198 			/* ignore whitespace */
199 			if (c == ' ' || c == '\t') {
200 				int save = c;
201 				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
202 					;
203 				if (c == EOF)
204 					return;
205 				(void)ungetc(c, inf);
206 				c = save;
207 			}
208 	storec:		if (!intoken(c)) {
209 				if (sp == tok)
210 					break;
211 				*sp = EOS;
212 				if (tflag) {
213 					/* no typedefs inside typedefs */
214 					if (!t_def &&
215 						   !memcmp(tok, "typedef",8)) {
216 						t_def = YES;
217 						t_level = level;
218 						break;
219 					}
220 					/* catch "typedef struct" */
221 					if ((!t_def || t_level < level)
222 					    && (!memcmp(tok, "struct", 7)
223 					    || !memcmp(tok, "union", 6)
224 					    || !memcmp(tok, "enum", 5))) {
225 						/*
226 						 * get line immediately;
227 						 * may change before '{'
228 						 */
229 						get_line();
230 						if (str_entry(c))
231 							++level;
232 						break;
233 						/* } */
234 					}
235 				}
236 				sp = tok;
237 			}
238 			else if (sp != tok || begtoken(c)) {
239 				if (sp == tok + sizeof tok - 1)
240 					/* Too long -- truncate it */
241 					*sp = EOS;
242 				else
243 					*sp++ = c;
244 				token = YES;
245 			}
246 			continue;
247 		}
248 
249 		sp = tok;
250 		token = NO;
251 	}
252 }
253 
254 /*
255  * func_entry --
256  *	handle a function reference
257  */
258 static int
259 func_entry(void)
260 {
261 	int	c;			/* current character */
262 	int	level = 0;		/* for matching '()' */
263 	static char attribute[] = "__attribute__";
264 	char	maybe_attribute[sizeof attribute + 1],
265 		*anext;
266 
267 	/*
268 	 * Find the end of the assumed function declaration.
269 	 * Note that ANSI C functions can have type definitions so keep
270 	 * track of the parentheses nesting level.
271 	 */
272 	while (GETC(!=, EOF)) {
273 		switch (c) {
274 		case '\'':
275 		case '"':
276 			/* skip strings and character constants */
277 			skip_string(c);
278 			break;
279 		case '/':
280 			/* skip comments */
281 			if (GETC(==, '*') || c == '/')
282 				skip_comment(c);
283 			break;
284 		case '(':
285 			level++;
286 			break;
287 		case ')':
288 			if (level == 0)
289 				goto fnd;
290 			level--;
291 			break;
292 		case '\n':
293 			SETLINE;
294 		}
295 	}
296 	return (NO);
297 fnd:
298 	/*
299 	 * we assume that the character after a function's right paren
300 	 * is a token character if it's a function and a non-token
301 	 * character if it's a declaration.  Comments don't count...
302 	 */
303 	for (anext = maybe_attribute;;) {
304 		while (GETC(!=, EOF) && iswhite(c))
305 			if (c == '\n')
306 				SETLINE;
307 		if (c == EOF)
308 			return NO;
309 		/*
310 		 * Recognize the gnu __attribute__ extension, which would
311 		 * otherwise make the heuristic test DTWT
312 		 */
313 		if (anext == maybe_attribute) {
314 			if (intoken(c)) {
315 				*anext++ = c;
316 				continue;
317 			}
318 		} else {
319 			if (intoken(c)) {
320 				if (anext - maybe_attribute
321 				 < (ptrdiff_t)(sizeof attribute - 1))
322 					*anext++ = c;
323 				else	break;
324 				continue;
325 			} else {
326 				*anext++ = '\0';
327 				if (strcmp(maybe_attribute, attribute) == 0) {
328 					(void)ungetc(c, inf);
329 					return NO;
330 				}
331 				break;
332 			}
333 		}
334 		if (intoken(c) || c == '{')
335 			break;
336 		if (c == '/' && (GETC(==, '*') || c == '/'))
337 			skip_comment(c);
338 		else {				/* don't ever "read" '/' */
339 			(void)ungetc(c, inf);
340 			return (NO);
341 		}
342 	}
343 	if (c != '{')
344 		(void)skip_key('{');
345 	return (YES);
346 }
347 
348 /*
349  * hash_entry --
350  *	handle a line starting with a '#'
351  */
352 static void
353 hash_entry(void)
354 {
355 	int	c;			/* character read */
356 	int	curline;		/* line started on */
357 	char	*sp;			/* buffer pointer */
358 	char	tok[MAXTOKEN];		/* storage buffer */
359 
360 	/* ignore leading whitespace */
361 	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
362 		;
363 	(void)ungetc(c, inf);
364 
365 	curline = lineno;
366 	for (sp = tok;;) {		/* get next token */
367 		if (GETC(==, EOF))
368 			return;
369 		if (iswhite(c))
370 			break;
371 		if (sp == tok + sizeof tok - 1)
372 			/* Too long -- truncate it */
373 			*sp = EOS;
374 		else
375 			*sp++ = c;
376 	}
377 	*sp = EOS;
378 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
379 		goto skip;
380 	for (;;) {			/* this doesn't handle "#define \n" */
381 		if (GETC(==, EOF))
382 			return;
383 		if (!iswhite(c))
384 			break;
385 	}
386 	for (sp = tok;;) {		/* get next token */
387 		if (sp == tok + sizeof tok - 1)
388 			/* Too long -- truncate it */
389 			*sp = EOS;
390 		else
391 			*sp++ = c;
392 		if (GETC(==, EOF))
393 			return;
394 		/*
395 		 * this is where it DOESN'T handle
396 		 * "#define \n"
397 		 */
398 		if (!intoken(c))
399 			break;
400 	}
401 	*sp = EOS;
402 	if (dflag || c == '(') {	/* only want macros */
403 		get_line();
404 		pfnote(tok, curline);
405 	}
406 skip:	if (c == '\n') {		/* get rid of rest of define */
407 		SETLINE
408 		if (*(sp - 1) != '\\')
409 			return;
410 	}
411 	(void)skip_key('\n');
412 }
413 
414 /*
415  * str_entry --
416  *	handle a struct, union or enum entry
417  */
418 static int
419 str_entry(int c) /* c is current character */
420 {
421 	int	curline;		/* line started on */
422 	char	*sp;			/* buffer pointer */
423 	char	tok[LINE_MAX];		/* storage buffer */
424 
425 	curline = lineno;
426 	while (iswhite(c))
427 		if (GETC(==, EOF))
428 			return (NO);
429 	if (c == '{')		/* it was "struct {" */
430 		return (YES);
431 	for (sp = tok;;) {		/* get next token */
432 		if (sp == tok + sizeof tok - 1)
433 			/* Too long -- truncate it */
434 			*sp = EOS;
435 		else
436 			*sp++ = c;
437 		if (GETC(==, EOF))
438 			return (NO);
439 		if (!intoken(c))
440 			break;
441 	}
442 	switch (c) {
443 		case '{':		/* it was "struct foo{" */
444 			--sp;
445 			break;
446 		case '\n':		/* it was "struct foo\n" */
447 			SETLINE;
448 			/*FALLTHROUGH*/
449 		default:		/* probably "struct foo " */
450 			while (GETC(!=, EOF))
451 				if (!iswhite(c))
452 					break;
453 			if (c != '{') {
454 				(void)ungetc(c, inf);
455 				return (NO);
456 			}
457 	}
458 	*sp = EOS;
459 	pfnote(tok, curline);
460 	return (YES);
461 }
462 
463 /*
464  * skip_comment --
465  *	skip over comment
466  */
467 void
468 skip_comment(int t) /* t is comment character */
469 {
470 	int	c;			/* character read */
471 	int	star;			/* '*' flag */
472 
473 	for (star = 0; GETC(!=, EOF);)
474 		switch(c) {
475 		/* comments don't nest, nor can they be escaped. */
476 		case '*':
477 			star = YES;
478 			break;
479 		case '/':
480 			if (star && t == '*')
481 				return;
482 			break;
483 		case '\n':
484 			SETLINE;
485 			if (t == '/')
486 				return;
487 			/*FALLTHROUGH*/
488 		default:
489 			star = NO;
490 			break;
491 		}
492 }
493 
494 /*
495  * skip_string --
496  *	skip to the end of a string or character constant.
497  */
498 void
499 skip_string(int key)
500 {
501 	int	c,
502 		skip;
503 
504 	for (skip = NO; GETC(!=, EOF); )
505 		switch (c) {
506 		case '\\':		/* a backslash escapes anything */
507 			skip = !skip;	/* we toggle in case it's "\\" */
508 			break;
509 		case '\n':
510 			SETLINE;
511 			/*FALLTHROUGH*/
512 		default:
513 			if (c == key && !skip)
514 				return;
515 			skip = NO;
516 		}
517 }
518 
519 /*
520  * skip_key --
521  *	skip to next char "key"
522  */
523 int
524 skip_key(int key)
525 {
526 	int	c,
527 		skip,
528 		retval;
529 
530 	for (skip = retval = NO; GETC(!=, EOF);)
531 		switch(c) {
532 		case '\\':		/* a backslash escapes anything */
533 			skip = !skip;	/* we toggle in case it's "\\" */
534 			break;
535 		case ';':		/* special case for yacc; if one */
536 		case '|':		/* of these chars occurs, we may */
537 			retval = YES;	/* have moved out of the rule */
538 			break;		/* not used by C */
539 		case '\'':
540 		case '"':
541 			/* skip strings and character constants */
542 			skip_string(c);
543 			break;
544 		case '/':
545 			/* skip comments */
546 			if (GETC(==, '*') || c == '/') {
547 				skip_comment(c);
548 				break;
549 			}
550 			(void)ungetc(c, inf);
551 			c = '/';
552 			goto norm;
553 		case '\n':
554 			SETLINE;
555 			/*FALLTHROUGH*/
556 		default:
557 		norm:
558 			if (c == key && !skip)
559 				return (retval);
560 			skip = NO;
561 		}
562 	return (retval);
563 }
564