xref: /freebsd/usr.bin/ctags/C.c (revision c697fb7f)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1987, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #ifndef lint
34 static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 4/2/94";
35 #endif
36 #endif
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include <limits.h>
42 #include <stdio.h>
43 #include <string.h>
44 
45 #include "ctags.h"
46 
47 static int	func_entry(void);
48 static void	hash_entry(void);
49 static void	skip_string(int);
50 static int	str_entry(int);
51 
52 /*
53  * c_entries --
54  *	read .c and .h files and call appropriate routines
55  */
56 void
57 c_entries(void)
58 {
59 	int	c;			/* current character */
60 	int	level;			/* brace level */
61 	int	token;			/* if reading a token */
62 	int	t_def;			/* if reading a typedef */
63 	int	t_level;		/* typedef's brace level */
64 	char	*sp;			/* buffer pointer */
65 	char	tok[MAXTOKEN];		/* token buffer */
66 
67 	lineftell = ftell(inf);
68 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
69 	while (GETC(!=, EOF)) {
70 		switch (c) {
71 		/*
72 		 * Here's where it DOESN'T handle: {
73 		 *	foo(a)
74 		 *	{
75 		 *	#ifdef notdef
76 		 *		}
77 		 *	#endif
78 		 *		if (a)
79 		 *			puts("hello, world");
80 		 *	}
81 		 */
82 		case '{':
83 			++level;
84 			goto endtok;
85 		case '}':
86 			/*
87 			 * if level goes below zero, try and fix
88 			 * it, even though we've already messed up
89 			 */
90 			if (--level < 0)
91 				level = 0;
92 			goto endtok;
93 
94 		case '\n':
95 			SETLINE;
96 			/*
97 			 * the above 3 cases are similar in that they
98 			 * are special characters that also end tokens.
99 			 */
100 	endtok:			if (sp > tok) {
101 				*sp = EOS;
102 				token = YES;
103 				sp = tok;
104 			}
105 			else
106 				token = NO;
107 			continue;
108 
109 		/*
110 		 * We ignore quoted strings and character constants
111 		 * completely.
112 		 */
113 		case '"':
114 		case '\'':
115 			skip_string(c);
116 			break;
117 
118 		/*
119 		 * comments can be fun; note the state is unchanged after
120 		 * return, in case we found:
121 		 *	"foo() XX comment XX { int bar; }"
122 		 */
123 		case '/':
124 			if (GETC(==, '*') || c == '/') {
125 				skip_comment(c);
126 				continue;
127 			}
128 			(void)ungetc(c, inf);
129 			c = '/';
130 			goto storec;
131 
132 		/* hash marks flag #define's. */
133 		case '#':
134 			if (sp == tok) {
135 				hash_entry();
136 				break;
137 			}
138 			goto storec;
139 
140 		/*
141 		 * if we have a current token, parenthesis on
142 		 * level zero indicates a function.
143 		 */
144 		case '(':
145 			if (!level && token) {
146 				int	curline;
147 
148 				if (sp != tok)
149 					*sp = EOS;
150 				/*
151 				 * grab the line immediately, we may
152 				 * already be wrong, for example,
153 				 *	foo\n
154 				 *	(arg1,
155 				 */
156 				get_line();
157 				curline = lineno;
158 				if (func_entry()) {
159 					++level;
160 					pfnote(tok, curline);
161 				}
162 				break;
163 			}
164 			goto storec;
165 
166 		/*
167 		 * semi-colons indicate the end of a typedef; if we find a
168 		 * typedef we search for the next semi-colon of the same
169 		 * level as the typedef.  Ignoring "structs", they are
170 		 * tricky, since you can find:
171 		 *
172 		 *	"typedef long time_t;"
173 		 *	"typedef unsigned int u_int;"
174 		 *	"typedef unsigned int u_int [10];"
175 		 *
176 		 * If looking at a typedef, we save a copy of the last token
177 		 * found.  Then, when we find the ';' we take the current
178 		 * token if it starts with a valid token name, else we take
179 		 * the one we saved.  There's probably some reasonable
180 		 * alternative to this...
181 		 */
182 		case ';':
183 			if (t_def && level == t_level) {
184 				t_def = NO;
185 				get_line();
186 				if (sp != tok)
187 					*sp = EOS;
188 				pfnote(tok, lineno);
189 				break;
190 			}
191 			goto storec;
192 
193 		/*
194 		 * store characters until one that can't be part of a token
195 		 * comes along; check the current token against certain
196 		 * reserved words.
197 		 */
198 		default:
199 			/* ignore whitespace */
200 			if (c == ' ' || c == '\t') {
201 				int save = c;
202 				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
203 					;
204 				if (c == EOF)
205 					return;
206 				(void)ungetc(c, inf);
207 				c = save;
208 			}
209 	storec:		if (!intoken(c)) {
210 				if (sp == tok)
211 					break;
212 				*sp = EOS;
213 				if (tflag) {
214 					/* no typedefs inside typedefs */
215 					if (!t_def &&
216 						   !memcmp(tok, "typedef",8)) {
217 						t_def = YES;
218 						t_level = level;
219 						break;
220 					}
221 					/* catch "typedef struct" */
222 					if ((!t_def || t_level < level)
223 					    && (!memcmp(tok, "struct", 7)
224 					    || !memcmp(tok, "union", 6)
225 					    || !memcmp(tok, "enum", 5))) {
226 						/*
227 						 * get line immediately;
228 						 * may change before '{'
229 						 */
230 						get_line();
231 						if (str_entry(c))
232 							++level;
233 						break;
234 						/* } */
235 					}
236 				}
237 				sp = tok;
238 			}
239 			else if (sp != tok || begtoken(c)) {
240 				if (sp == tok + sizeof tok - 1)
241 					/* Too long -- truncate it */
242 					*sp = EOS;
243 				else
244 					*sp++ = c;
245 				token = YES;
246 			}
247 			continue;
248 		}
249 
250 		sp = tok;
251 		token = NO;
252 	}
253 }
254 
255 /*
256  * func_entry --
257  *	handle a function reference
258  */
259 static int
260 func_entry(void)
261 {
262 	int	c;			/* current character */
263 	int	level = 0;		/* for matching '()' */
264 
265 	/*
266 	 * Find the end of the assumed function declaration.
267 	 * Note that ANSI C functions can have type definitions so keep
268 	 * track of the parentheses nesting level.
269 	 */
270 	while (GETC(!=, EOF)) {
271 		switch (c) {
272 		case '\'':
273 		case '"':
274 			/* skip strings and character constants */
275 			skip_string(c);
276 			break;
277 		case '/':
278 			/* skip comments */
279 			if (GETC(==, '*') || c == '/')
280 				skip_comment(c);
281 			break;
282 		case '(':
283 			level++;
284 			break;
285 		case ')':
286 			if (level == 0)
287 				goto fnd;
288 			level--;
289 			break;
290 		case '\n':
291 			SETLINE;
292 		}
293 	}
294 	return (NO);
295 fnd:
296 	/*
297 	 * we assume that the character after a function's right paren
298 	 * is a token character if it's a function and a non-token
299 	 * character if it's a declaration.  Comments don't count...
300 	 */
301 	for (;;) {
302 		while (GETC(!=, EOF) && iswhite(c))
303 			if (c == '\n')
304 				SETLINE;
305 		if (intoken(c) || c == '{')
306 			break;
307 		if (c == '/' && (GETC(==, '*') || c == '/'))
308 			skip_comment(c);
309 		else {				/* don't ever "read" '/' */
310 			(void)ungetc(c, inf);
311 			return (NO);
312 		}
313 	}
314 	if (c != '{')
315 		(void)skip_key('{');
316 	return (YES);
317 }
318 
319 /*
320  * hash_entry --
321  *	handle a line starting with a '#'
322  */
323 static void
324 hash_entry(void)
325 {
326 	int	c;			/* character read */
327 	int	curline;		/* line started on */
328 	char	*sp;			/* buffer pointer */
329 	char	tok[MAXTOKEN];		/* storage buffer */
330 
331 	/* ignore leading whitespace */
332 	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
333 		;
334 	(void)ungetc(c, inf);
335 
336 	curline = lineno;
337 	for (sp = tok;;) {		/* get next token */
338 		if (GETC(==, EOF))
339 			return;
340 		if (iswhite(c))
341 			break;
342 		if (sp == tok + sizeof tok - 1)
343 			/* Too long -- truncate it */
344 			*sp = EOS;
345 		else
346 			*sp++ = c;
347 	}
348 	*sp = EOS;
349 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
350 		goto skip;
351 	for (;;) {			/* this doesn't handle "#define \n" */
352 		if (GETC(==, EOF))
353 			return;
354 		if (!iswhite(c))
355 			break;
356 	}
357 	for (sp = tok;;) {		/* get next token */
358 		if (sp == tok + sizeof tok - 1)
359 			/* Too long -- truncate it */
360 			*sp = EOS;
361 		else
362 			*sp++ = c;
363 		if (GETC(==, EOF))
364 			return;
365 		/*
366 		 * this is where it DOESN'T handle
367 		 * "#define \n"
368 		 */
369 		if (!intoken(c))
370 			break;
371 	}
372 	*sp = EOS;
373 	if (dflag || c == '(') {	/* only want macros */
374 		get_line();
375 		pfnote(tok, curline);
376 	}
377 skip:	if (c == '\n') {		/* get rid of rest of define */
378 		SETLINE
379 		if (*(sp - 1) != '\\')
380 			return;
381 	}
382 	(void)skip_key('\n');
383 }
384 
385 /*
386  * str_entry --
387  *	handle a struct, union or enum entry
388  */
389 static int
390 str_entry(int c) /* c is current character */
391 {
392 	int	curline;		/* line started on */
393 	char	*sp;			/* buffer pointer */
394 	char	tok[LINE_MAX];		/* storage buffer */
395 
396 	curline = lineno;
397 	while (iswhite(c))
398 		if (GETC(==, EOF))
399 			return (NO);
400 	if (c == '{')		/* it was "struct {" */
401 		return (YES);
402 	for (sp = tok;;) {		/* get next token */
403 		if (sp == tok + sizeof tok - 1)
404 			/* Too long -- truncate it */
405 			*sp = EOS;
406 		else
407 			*sp++ = c;
408 		if (GETC(==, EOF))
409 			return (NO);
410 		if (!intoken(c))
411 			break;
412 	}
413 	switch (c) {
414 		case '{':		/* it was "struct foo{" */
415 			--sp;
416 			break;
417 		case '\n':		/* it was "struct foo\n" */
418 			SETLINE;
419 			/*FALLTHROUGH*/
420 		default:		/* probably "struct foo " */
421 			while (GETC(!=, EOF))
422 				if (!iswhite(c))
423 					break;
424 			if (c != '{') {
425 				(void)ungetc(c, inf);
426 				return (NO);
427 			}
428 	}
429 	*sp = EOS;
430 	pfnote(tok, curline);
431 	return (YES);
432 }
433 
434 /*
435  * skip_comment --
436  *	skip over comment
437  */
438 void
439 skip_comment(int t) /* t is comment character */
440 {
441 	int	c;			/* character read */
442 	int	star;			/* '*' flag */
443 
444 	for (star = 0; GETC(!=, EOF);)
445 		switch(c) {
446 		/* comments don't nest, nor can they be escaped. */
447 		case '*':
448 			star = YES;
449 			break;
450 		case '/':
451 			if (star && t == '*')
452 				return;
453 			break;
454 		case '\n':
455 			if (t == '/')
456 				return;
457 			SETLINE;
458 			/*FALLTHROUGH*/
459 		default:
460 			star = NO;
461 			break;
462 		}
463 }
464 
465 /*
466  * skip_string --
467  *	skip to the end of a string or character constant.
468  */
469 void
470 skip_string(int key)
471 {
472 	int	c,
473 		skip;
474 
475 	for (skip = NO; GETC(!=, EOF); )
476 		switch (c) {
477 		case '\\':		/* a backslash escapes anything */
478 			skip = !skip;	/* we toggle in case it's "\\" */
479 			break;
480 		case '\n':
481 			SETLINE;
482 			/*FALLTHROUGH*/
483 		default:
484 			if (c == key && !skip)
485 				return;
486 			skip = NO;
487 		}
488 }
489 
490 /*
491  * skip_key --
492  *	skip to next char "key"
493  */
494 int
495 skip_key(int key)
496 {
497 	int	c,
498 		skip,
499 		retval;
500 
501 	for (skip = retval = NO; GETC(!=, EOF);)
502 		switch(c) {
503 		case '\\':		/* a backslash escapes anything */
504 			skip = !skip;	/* we toggle in case it's "\\" */
505 			break;
506 		case ';':		/* special case for yacc; if one */
507 		case '|':		/* of these chars occurs, we may */
508 			retval = YES;	/* have moved out of the rule */
509 			break;		/* not used by C */
510 		case '\'':
511 		case '"':
512 			/* skip strings and character constants */
513 			skip_string(c);
514 			break;
515 		case '/':
516 			/* skip comments */
517 			if (GETC(==, '*') || c == '/') {
518 				skip_comment(c);
519 				break;
520 			}
521 			(void)ungetc(c, inf);
522 			c = '/';
523 			goto norm;
524 		case '\n':
525 			SETLINE;
526 			/*FALLTHROUGH*/
527 		default:
528 		norm:
529 			if (c == key && !skip)
530 				return (retval);
531 			skip = NO;
532 		}
533 	return (retval);
534 }
535