xref: /dragonfly/usr.bin/ctags/C.c (revision 279dd846)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#)C.c	8.4 (Berkeley) 4/2/94
30  * $FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $
31  * $DragonFly: src/usr.bin/ctags/C.c,v 1.2 2003/06/17 04:29:25 dillon Exp $
32  */
33 
34 #include <limits.h>
35 #include <stdio.h>
36 #include <string.h>
37 
38 #include "ctags.h"
39 
40 static int	func_entry(void);
41 static void	hash_entry(void);
42 static void	skip_string(int);
43 static int	str_entry(int);
44 
45 /*
46  * c_entries --
47  *	read .c and .h files and call appropriate routines
48  */
49 void
50 c_entries(void)
51 {
52 	int	c;			/* current character */
53 	int	level;			/* brace level */
54 	int	token;			/* if reading a token */
55 	int	t_def;			/* if reading a typedef */
56 	int	t_level;		/* typedef's brace level */
57 	char	*sp;			/* buffer pointer */
58 	char	tok[MAXTOKEN];		/* token buffer */
59 
60 	lineftell = ftell(inf);
61 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
62 	while (GETC(!=, EOF)) {
63 		switch (c) {
64 		/*
65 		 * Here's where it DOESN'T handle: {
66 		 *	foo(a)
67 		 *	{
68 		 *	#ifdef notdef
69 		 *		}
70 		 *	#endif
71 		 *		if (a)
72 		 *			puts("hello, world");
73 		 *	}
74 		 */
75 		case '{':
76 			++level;
77 			goto endtok;
78 		case '}':
79 			/*
80 			 * if level goes below zero, try and fix
81 			 * it, even though we've already messed up
82 			 */
83 			if (--level < 0)
84 				level = 0;
85 			goto endtok;
86 
87 		case '\n':
88 			SETLINE;
89 			/*
90 			 * the above 3 cases are similar in that they
91 			 * are special characters that also end tokens.
92 			 */
93 	endtok:			if (sp > tok) {
94 				*sp = EOS;
95 				token = YES;
96 				sp = tok;
97 			}
98 			else
99 				token = NO;
100 			continue;
101 
102 		/*
103 		 * We ignore quoted strings and character constants
104 		 * completely.
105 		 */
106 		case '"':
107 		case '\'':
108 			(void)skip_string(c);
109 			break;
110 
111 		/*
112 		 * comments can be fun; note the state is unchanged after
113 		 * return, in case we found:
114 		 *	"foo() XX comment XX { int bar; }"
115 		 */
116 		case '/':
117 			if (GETC(==, '*') || c == '/') {
118 				skip_comment(c);
119 				continue;
120 			}
121 			(void)ungetc(c, inf);
122 			c = '/';
123 			goto storec;
124 
125 		/* hash marks flag #define's. */
126 		case '#':
127 			if (sp == tok) {
128 				hash_entry();
129 				break;
130 			}
131 			goto storec;
132 
133 		/*
134 		 * if we have a current token, parenthesis on
135 		 * level zero indicates a function.
136 		 */
137 		case '(':
138 			if (!level && token) {
139 				int	curline;
140 
141 				if (sp != tok)
142 					*sp = EOS;
143 				/*
144 				 * grab the line immediately, we may
145 				 * already be wrong, for example,
146 				 *	foo\n
147 				 *	(arg1,
148 				 */
149 				getline();
150 				curline = lineno;
151 				if (func_entry()) {
152 					++level;
153 					pfnote(tok, curline);
154 				}
155 				break;
156 			}
157 			goto storec;
158 
159 		/*
160 		 * semi-colons indicate the end of a typedef; if we find a
161 		 * typedef we search for the next semi-colon of the same
162 		 * level as the typedef.  Ignoring "structs", they are
163 		 * tricky, since you can find:
164 		 *
165 		 *	"typedef long time_t;"
166 		 *	"typedef unsigned int u_int;"
167 		 *	"typedef unsigned int u_int [10];"
168 		 *
169 		 * If looking at a typedef, we save a copy of the last token
170 		 * found.  Then, when we find the ';' we take the current
171 		 * token if it starts with a valid token name, else we take
172 		 * the one we saved.  There's probably some reasonable
173 		 * alternative to this...
174 		 */
175 		case ';':
176 			if (t_def && level == t_level) {
177 				t_def = NO;
178 				getline();
179 				if (sp != tok)
180 					*sp = EOS;
181 				pfnote(tok, lineno);
182 				break;
183 			}
184 			goto storec;
185 
186 		/*
187 		 * store characters until one that can't be part of a token
188 		 * comes along; check the current token against certain
189 		 * reserved words.
190 		 */
191 		default:
192 			/* ignore whitespace */
193 			if (c == ' ' || c == '\t') {
194 				int save = c;
195 				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
196 					;
197 				if (c == EOF)
198 					return;
199 				(void)ungetc(c, inf);
200 				c = save;
201 			}
202 	storec:		if (!intoken(c)) {
203 				if (sp == tok)
204 					break;
205 				*sp = EOS;
206 				if (tflag) {
207 					/* no typedefs inside typedefs */
208 					if (!t_def &&
209 						   !memcmp(tok, "typedef",8)) {
210 						t_def = YES;
211 						t_level = level;
212 						break;
213 					}
214 					/* catch "typedef struct" */
215 					if ((!t_def || t_level < level)
216 					    && (!memcmp(tok, "struct", 7)
217 					    || !memcmp(tok, "union", 6)
218 					    || !memcmp(tok, "enum", 5))) {
219 						/*
220 						 * get line immediately;
221 						 * may change before '{'
222 						 */
223 						getline();
224 						if (str_entry(c))
225 							++level;
226 						break;
227 						/* } */
228 					}
229 				}
230 				sp = tok;
231 			}
232 			else if (sp != tok || begtoken(c)) {
233 				if (sp == tok + sizeof tok - 1)
234 					/* Too long -- truncate it */
235 					*sp = EOS;
236 				else
237 					*sp++ = c;
238 				token = YES;
239 			}
240 			continue;
241 		}
242 
243 		sp = tok;
244 		token = NO;
245 	}
246 }
247 
248 /*
249  * func_entry --
250  *	handle a function reference
251  */
252 static int
253 func_entry(void)
254 {
255 	int	c;			/* current character */
256 	int	level = 0;		/* for matching '()' */
257 
258 	/*
259 	 * Find the end of the assumed function declaration.
260 	 * Note that ANSI C functions can have type definitions so keep
261 	 * track of the parentheses nesting level.
262 	 */
263 	while (GETC(!=, EOF)) {
264 		switch (c) {
265 		case '\'':
266 		case '"':
267 			/* skip strings and character constants */
268 			skip_string(c);
269 			break;
270 		case '/':
271 			/* skip comments */
272 			if (GETC(==, '*') || c == '/')
273 				skip_comment(c);
274 			break;
275 		case '(':
276 			level++;
277 			break;
278 		case ')':
279 			if (level == 0)
280 				goto fnd;
281 			level--;
282 			break;
283 		case '\n':
284 			SETLINE;
285 		}
286 	}
287 	return (NO);
288 fnd:
289 	/*
290 	 * we assume that the character after a function's right paren
291 	 * is a token character if it's a function and a non-token
292 	 * character if it's a declaration.  Comments don't count...
293 	 */
294 	for (;;) {
295 		while (GETC(!=, EOF) && iswhite(c))
296 			if (c == '\n')
297 				SETLINE;
298 		if (intoken(c) || c == '{')
299 			break;
300 		if (c == '/' && (GETC(==, '*') || c == '/'))
301 			skip_comment(c);
302 		else {				/* don't ever "read" '/' */
303 			(void)ungetc(c, inf);
304 			return (NO);
305 		}
306 	}
307 	if (c != '{')
308 		(void)skip_key('{');
309 	return (YES);
310 }
311 
312 /*
313  * hash_entry --
314  *	handle a line starting with a '#'
315  */
316 static void
317 hash_entry(void)
318 {
319 	int	c;			/* character read */
320 	int	curline;		/* line started on */
321 	char	*sp;			/* buffer pointer */
322 	char	tok[MAXTOKEN];		/* storage buffer */
323 
324 	/* ignore leading whitespace */
325 	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
326 		;
327 	(void)ungetc(c, inf);
328 
329 	curline = lineno;
330 	for (sp = tok;;) {		/* get next token */
331 		if (GETC(==, EOF))
332 			return;
333 		if (iswhite(c))
334 			break;
335 		if (sp == tok + sizeof tok - 1)
336 			/* Too long -- truncate it */
337 			*sp = EOS;
338 		else
339 			*sp++ = c;
340 	}
341 	*sp = EOS;
342 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
343 		goto skip;
344 	for (;;) {			/* this doesn't handle "#define \n" */
345 		if (GETC(==, EOF))
346 			return;
347 		if (!iswhite(c))
348 			break;
349 	}
350 	for (sp = tok;;) {		/* get next token */
351 		if (sp == tok + sizeof tok - 1)
352 			/* Too long -- truncate it */
353 			*sp = EOS;
354 		else
355 			*sp++ = c;
356 		if (GETC(==, EOF))
357 			return;
358 		/*
359 		 * this is where it DOESN'T handle
360 		 * "#define \n"
361 		 */
362 		if (!intoken(c))
363 			break;
364 	}
365 	*sp = EOS;
366 	if (dflag || c == '(') {	/* only want macros */
367 		getline();
368 		pfnote(tok, curline);
369 	}
370 skip:	if (c == '\n') {		/* get rid of rest of define */
371 		SETLINE
372 		if (*(sp - 1) != '\\')
373 			return;
374 	}
375 	(void)skip_key('\n');
376 }
377 
378 /*
379  * str_entry --
380  *	handle a struct, union or enum entry
381  */
382 static int
383 str_entry(int c) /* c is current character */
384 {
385 	int	curline;		/* line started on */
386 	char	*sp;			/* buffer pointer */
387 	char	tok[LINE_MAX];		/* storage buffer */
388 
389 	curline = lineno;
390 	while (iswhite(c))
391 		if (GETC(==, EOF))
392 			return (NO);
393 	if (c == '{')		/* it was "struct {" */
394 		return (YES);
395 	for (sp = tok;;) {		/* get next token */
396 		if (sp == tok + sizeof tok - 1)
397 			/* Too long -- truncate it */
398 			*sp = EOS;
399 		else
400 			*sp++ = c;
401 		if (GETC(==, EOF))
402 			return (NO);
403 		if (!intoken(c))
404 			break;
405 	}
406 	switch (c) {
407 		case '{':		/* it was "struct foo{" */
408 			--sp;
409 			break;
410 		case '\n':		/* it was "struct foo\n" */
411 			SETLINE;
412 			/*FALLTHROUGH*/
413 		default:		/* probably "struct foo " */
414 			while (GETC(!=, EOF))
415 				if (!iswhite(c))
416 					break;
417 			if (c != '{') {
418 				(void)ungetc(c, inf);
419 				return (NO);
420 			}
421 	}
422 	*sp = EOS;
423 	pfnote(tok, curline);
424 	return (YES);
425 }
426 
427 /*
428  * skip_comment --
429  *	skip over comment
430  */
431 void
432 skip_comment(int t) /* t is comment character */
433 {
434 	int	c;			/* character read */
435 	int	star;			/* '*' flag */
436 
437 	for (star = 0; GETC(!=, EOF);)
438 		switch(c) {
439 		/* comments don't nest, nor can they be escaped. */
440 		case '*':
441 			star = YES;
442 			break;
443 		case '/':
444 			if (star && t == '*')
445 				return;
446 			break;
447 		case '\n':
448 			if (t == '/')
449 				return;
450 			SETLINE;
451 			/*FALLTHROUGH*/
452 		default:
453 			star = NO;
454 			break;
455 		}
456 }
457 
458 /*
459  * skip_string --
460  *	skip to the end of a string or character constant.
461  */
462 void
463 skip_string(int key)
464 {
465 	int	c,
466 		skip;
467 
468 	for (skip = NO; GETC(!=, EOF); )
469 		switch (c) {
470 		case '\\':		/* a backslash escapes anything */
471 			skip = !skip;	/* we toggle in case it's "\\" */
472 			break;
473 		case '\n':
474 			SETLINE;
475 			/*FALLTHROUGH*/
476 		default:
477 			if (c == key && !skip)
478 				return;
479 			skip = NO;
480 		}
481 }
482 
483 /*
484  * skip_key --
485  *	skip to next char "key"
486  */
487 int
488 skip_key(int key)
489 {
490 	int	c,
491 		skip,
492 		retval;
493 
494 	for (skip = retval = NO; GETC(!=, EOF);)
495 		switch(c) {
496 		case '\\':		/* a backslash escapes anything */
497 			skip = !skip;	/* we toggle in case it's "\\" */
498 			break;
499 		case ';':		/* special case for yacc; if one */
500 		case '|':		/* of these chars occurs, we may */
501 			retval = YES;	/* have moved out of the rule */
502 			break;		/* not used by C */
503 		case '\'':
504 		case '"':
505 			/* skip strings and character constants */
506 			skip_string(c);
507 			break;
508 		case '/':
509 			/* skip comments */
510 			if (GETC(==, '*') || c == '/') {
511 				skip_comment(c);
512 				break;
513 			}
514 			(void)ungetc(c, inf);
515 			c = '/';
516 			goto norm;
517 		case '\n':
518 			SETLINE;
519 			/*FALLTHROUGH*/
520 		default:
521 		norm:
522 			if (c == key && !skip)
523 				return (retval);
524 			skip = NO;
525 		}
526 	return (retval);
527 }
528