xref: /dragonfly/usr.bin/ctags/C.c (revision b40e316c)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#)C.c	8.4 (Berkeley) 4/2/94
34  * $FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $
35  * $DragonFly: src/usr.bin/ctags/C.c,v 1.2 2003/06/17 04:29:25 dillon Exp $
36  */
37 
38 #include <limits.h>
39 #include <stdio.h>
40 #include <string.h>
41 
42 #include "ctags.h"
43 
44 static int	func_entry(void);
45 static void	hash_entry(void);
46 static void	skip_string(int);
47 static int	str_entry(int);
48 
49 /*
50  * c_entries --
51  *	read .c and .h files and call appropriate routines
52  */
53 void
54 c_entries(void)
55 {
56 	int	c;			/* current character */
57 	int	level;			/* brace level */
58 	int	token;			/* if reading a token */
59 	int	t_def;			/* if reading a typedef */
60 	int	t_level;		/* typedef's brace level */
61 	char	*sp;			/* buffer pointer */
62 	char	tok[MAXTOKEN];		/* token buffer */
63 
64 	lineftell = ftell(inf);
65 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
66 	while (GETC(!=, EOF)) {
67 		switch (c) {
68 		/*
69 		 * Here's where it DOESN'T handle: {
70 		 *	foo(a)
71 		 *	{
72 		 *	#ifdef notdef
73 		 *		}
74 		 *	#endif
75 		 *		if (a)
76 		 *			puts("hello, world");
77 		 *	}
78 		 */
79 		case '{':
80 			++level;
81 			goto endtok;
82 		case '}':
83 			/*
84 			 * if level goes below zero, try and fix
85 			 * it, even though we've already messed up
86 			 */
87 			if (--level < 0)
88 				level = 0;
89 			goto endtok;
90 
91 		case '\n':
92 			SETLINE;
93 			/*
94 			 * the above 3 cases are similar in that they
95 			 * are special characters that also end tokens.
96 			 */
97 	endtok:			if (sp > tok) {
98 				*sp = EOS;
99 				token = YES;
100 				sp = tok;
101 			}
102 			else
103 				token = NO;
104 			continue;
105 
106 		/*
107 		 * We ignore quoted strings and character constants
108 		 * completely.
109 		 */
110 		case '"':
111 		case '\'':
112 			(void)skip_string(c);
113 			break;
114 
115 		/*
116 		 * comments can be fun; note the state is unchanged after
117 		 * return, in case we found:
118 		 *	"foo() XX comment XX { int bar; }"
119 		 */
120 		case '/':
121 			if (GETC(==, '*') || c == '/') {
122 				skip_comment(c);
123 				continue;
124 			}
125 			(void)ungetc(c, inf);
126 			c = '/';
127 			goto storec;
128 
129 		/* hash marks flag #define's. */
130 		case '#':
131 			if (sp == tok) {
132 				hash_entry();
133 				break;
134 			}
135 			goto storec;
136 
137 		/*
138 		 * if we have a current token, parenthesis on
139 		 * level zero indicates a function.
140 		 */
141 		case '(':
142 			if (!level && token) {
143 				int	curline;
144 
145 				if (sp != tok)
146 					*sp = EOS;
147 				/*
148 				 * grab the line immediately, we may
149 				 * already be wrong, for example,
150 				 *	foo\n
151 				 *	(arg1,
152 				 */
153 				getline();
154 				curline = lineno;
155 				if (func_entry()) {
156 					++level;
157 					pfnote(tok, curline);
158 				}
159 				break;
160 			}
161 			goto storec;
162 
163 		/*
164 		 * semi-colons indicate the end of a typedef; if we find a
165 		 * typedef we search for the next semi-colon of the same
166 		 * level as the typedef.  Ignoring "structs", they are
167 		 * tricky, since you can find:
168 		 *
169 		 *	"typedef long time_t;"
170 		 *	"typedef unsigned int u_int;"
171 		 *	"typedef unsigned int u_int [10];"
172 		 *
173 		 * If looking at a typedef, we save a copy of the last token
174 		 * found.  Then, when we find the ';' we take the current
175 		 * token if it starts with a valid token name, else we take
176 		 * the one we saved.  There's probably some reasonable
177 		 * alternative to this...
178 		 */
179 		case ';':
180 			if (t_def && level == t_level) {
181 				t_def = NO;
182 				getline();
183 				if (sp != tok)
184 					*sp = EOS;
185 				pfnote(tok, lineno);
186 				break;
187 			}
188 			goto storec;
189 
190 		/*
191 		 * store characters until one that can't be part of a token
192 		 * comes along; check the current token against certain
193 		 * reserved words.
194 		 */
195 		default:
196 			/* ignore whitespace */
197 			if (c == ' ' || c == '\t') {
198 				int save = c;
199 				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
200 					;
201 				if (c == EOF)
202 					return;
203 				(void)ungetc(c, inf);
204 				c = save;
205 			}
206 	storec:		if (!intoken(c)) {
207 				if (sp == tok)
208 					break;
209 				*sp = EOS;
210 				if (tflag) {
211 					/* no typedefs inside typedefs */
212 					if (!t_def &&
213 						   !memcmp(tok, "typedef",8)) {
214 						t_def = YES;
215 						t_level = level;
216 						break;
217 					}
218 					/* catch "typedef struct" */
219 					if ((!t_def || t_level < level)
220 					    && (!memcmp(tok, "struct", 7)
221 					    || !memcmp(tok, "union", 6)
222 					    || !memcmp(tok, "enum", 5))) {
223 						/*
224 						 * get line immediately;
225 						 * may change before '{'
226 						 */
227 						getline();
228 						if (str_entry(c))
229 							++level;
230 						break;
231 						/* } */
232 					}
233 				}
234 				sp = tok;
235 			}
236 			else if (sp != tok || begtoken(c)) {
237 				if (sp == tok + sizeof tok - 1)
238 					/* Too long -- truncate it */
239 					*sp = EOS;
240 				else
241 					*sp++ = c;
242 				token = YES;
243 			}
244 			continue;
245 		}
246 
247 		sp = tok;
248 		token = NO;
249 	}
250 }
251 
252 /*
253  * func_entry --
254  *	handle a function reference
255  */
256 static int
257 func_entry(void)
258 {
259 	int	c;			/* current character */
260 	int	level = 0;		/* for matching '()' */
261 
262 	/*
263 	 * Find the end of the assumed function declaration.
264 	 * Note that ANSI C functions can have type definitions so keep
265 	 * track of the parentheses nesting level.
266 	 */
267 	while (GETC(!=, EOF)) {
268 		switch (c) {
269 		case '\'':
270 		case '"':
271 			/* skip strings and character constants */
272 			skip_string(c);
273 			break;
274 		case '/':
275 			/* skip comments */
276 			if (GETC(==, '*') || c == '/')
277 				skip_comment(c);
278 			break;
279 		case '(':
280 			level++;
281 			break;
282 		case ')':
283 			if (level == 0)
284 				goto fnd;
285 			level--;
286 			break;
287 		case '\n':
288 			SETLINE;
289 		}
290 	}
291 	return (NO);
292 fnd:
293 	/*
294 	 * we assume that the character after a function's right paren
295 	 * is a token character if it's a function and a non-token
296 	 * character if it's a declaration.  Comments don't count...
297 	 */
298 	for (;;) {
299 		while (GETC(!=, EOF) && iswhite(c))
300 			if (c == '\n')
301 				SETLINE;
302 		if (intoken(c) || c == '{')
303 			break;
304 		if (c == '/' && (GETC(==, '*') || c == '/'))
305 			skip_comment(c);
306 		else {				/* don't ever "read" '/' */
307 			(void)ungetc(c, inf);
308 			return (NO);
309 		}
310 	}
311 	if (c != '{')
312 		(void)skip_key('{');
313 	return (YES);
314 }
315 
316 /*
317  * hash_entry --
318  *	handle a line starting with a '#'
319  */
320 static void
321 hash_entry(void)
322 {
323 	int	c;			/* character read */
324 	int	curline;		/* line started on */
325 	char	*sp;			/* buffer pointer */
326 	char	tok[MAXTOKEN];		/* storage buffer */
327 
328 	/* ignore leading whitespace */
329 	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
330 		;
331 	(void)ungetc(c, inf);
332 
333 	curline = lineno;
334 	for (sp = tok;;) {		/* get next token */
335 		if (GETC(==, EOF))
336 			return;
337 		if (iswhite(c))
338 			break;
339 		if (sp == tok + sizeof tok - 1)
340 			/* Too long -- truncate it */
341 			*sp = EOS;
342 		else
343 			*sp++ = c;
344 	}
345 	*sp = EOS;
346 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
347 		goto skip;
348 	for (;;) {			/* this doesn't handle "#define \n" */
349 		if (GETC(==, EOF))
350 			return;
351 		if (!iswhite(c))
352 			break;
353 	}
354 	for (sp = tok;;) {		/* get next token */
355 		if (sp == tok + sizeof tok - 1)
356 			/* Too long -- truncate it */
357 			*sp = EOS;
358 		else
359 			*sp++ = c;
360 		if (GETC(==, EOF))
361 			return;
362 		/*
363 		 * this is where it DOESN'T handle
364 		 * "#define \n"
365 		 */
366 		if (!intoken(c))
367 			break;
368 	}
369 	*sp = EOS;
370 	if (dflag || c == '(') {	/* only want macros */
371 		getline();
372 		pfnote(tok, curline);
373 	}
374 skip:	if (c == '\n') {		/* get rid of rest of define */
375 		SETLINE
376 		if (*(sp - 1) != '\\')
377 			return;
378 	}
379 	(void)skip_key('\n');
380 }
381 
382 /*
383  * str_entry --
384  *	handle a struct, union or enum entry
385  */
386 static int
387 str_entry(int c) /* c is current character */
388 {
389 	int	curline;		/* line started on */
390 	char	*sp;			/* buffer pointer */
391 	char	tok[LINE_MAX];		/* storage buffer */
392 
393 	curline = lineno;
394 	while (iswhite(c))
395 		if (GETC(==, EOF))
396 			return (NO);
397 	if (c == '{')		/* it was "struct {" */
398 		return (YES);
399 	for (sp = tok;;) {		/* get next token */
400 		if (sp == tok + sizeof tok - 1)
401 			/* Too long -- truncate it */
402 			*sp = EOS;
403 		else
404 			*sp++ = c;
405 		if (GETC(==, EOF))
406 			return (NO);
407 		if (!intoken(c))
408 			break;
409 	}
410 	switch (c) {
411 		case '{':		/* it was "struct foo{" */
412 			--sp;
413 			break;
414 		case '\n':		/* it was "struct foo\n" */
415 			SETLINE;
416 			/*FALLTHROUGH*/
417 		default:		/* probably "struct foo " */
418 			while (GETC(!=, EOF))
419 				if (!iswhite(c))
420 					break;
421 			if (c != '{') {
422 				(void)ungetc(c, inf);
423 				return (NO);
424 			}
425 	}
426 	*sp = EOS;
427 	pfnote(tok, curline);
428 	return (YES);
429 }
430 
431 /*
432  * skip_comment --
433  *	skip over comment
434  */
435 void
436 skip_comment(int t) /* t is comment character */
437 {
438 	int	c;			/* character read */
439 	int	star;			/* '*' flag */
440 
441 	for (star = 0; GETC(!=, EOF);)
442 		switch(c) {
443 		/* comments don't nest, nor can they be escaped. */
444 		case '*':
445 			star = YES;
446 			break;
447 		case '/':
448 			if (star && t == '*')
449 				return;
450 			break;
451 		case '\n':
452 			if (t == '/')
453 				return;
454 			SETLINE;
455 			/*FALLTHROUGH*/
456 		default:
457 			star = NO;
458 			break;
459 		}
460 }
461 
462 /*
463  * skip_string --
464  *	skip to the end of a string or character constant.
465  */
466 void
467 skip_string(int key)
468 {
469 	int	c,
470 		skip;
471 
472 	for (skip = NO; GETC(!=, EOF); )
473 		switch (c) {
474 		case '\\':		/* a backslash escapes anything */
475 			skip = !skip;	/* we toggle in case it's "\\" */
476 			break;
477 		case '\n':
478 			SETLINE;
479 			/*FALLTHROUGH*/
480 		default:
481 			if (c == key && !skip)
482 				return;
483 			skip = NO;
484 		}
485 }
486 
487 /*
488  * skip_key --
489  *	skip to next char "key"
490  */
491 int
492 skip_key(int key)
493 {
494 	int	c,
495 		skip,
496 		retval;
497 
498 	for (skip = retval = NO; GETC(!=, EOF);)
499 		switch(c) {
500 		case '\\':		/* a backslash escapes anything */
501 			skip = !skip;	/* we toggle in case it's "\\" */
502 			break;
503 		case ';':		/* special case for yacc; if one */
504 		case '|':		/* of these chars occurs, we may */
505 			retval = YES;	/* have moved out of the rule */
506 			break;		/* not used by C */
507 		case '\'':
508 		case '"':
509 			/* skip strings and character constants */
510 			skip_string(c);
511 			break;
512 		case '/':
513 			/* skip comments */
514 			if (GETC(==, '*') || c == '/') {
515 				skip_comment(c);
516 				break;
517 			}
518 			(void)ungetc(c, inf);
519 			c = '/';
520 			goto norm;
521 		case '\n':
522 			SETLINE;
523 			/*FALLTHROUGH*/
524 		default:
525 		norm:
526 			if (c == key && !skip)
527 				return (retval);
528 			skip = NO;
529 		}
530 	return (retval);
531 }
532