xref: /original-bsd/usr.bin/ctags/C.c (revision 7e5c8007)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)C.c	8.4 (Berkeley) 04/02/94";
10 #endif /* not lint */
11 
12 #include <limits.h>
13 #include <stdio.h>
14 #include <string.h>
15 
16 #include "ctags.h"
17 
18 static int	func_entry __P((void));
19 static void	hash_entry __P((void));
20 static void	skip_string __P((int));
21 static int	str_entry __P((int));
22 
23 /*
24  * c_entries --
25  *	read .c and .h files and call appropriate routines
26  */
27 void
28 c_entries()
29 {
30 	int	c;			/* current character */
31 	int	level;			/* brace level */
32 	int	token;			/* if reading a token */
33 	int	t_def;			/* if reading a typedef */
34 	int	t_level;		/* typedef's brace level */
35 	char	*sp;			/* buffer pointer */
36 	char	tok[MAXTOKEN];		/* token buffer */
37 
38 	lineftell = ftell(inf);
39 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
40 	while (GETC(!=, EOF)) {
41 		switch (c) {
42 		/*
43 		 * Here's where it DOESN'T handle: {
44 		 *	foo(a)
45 		 *	{
46 		 *	#ifdef notdef
47 		 *		}
48 		 *	#endif
49 		 *		if (a)
50 		 *			puts("hello, world");
51 		 *	}
52 		 */
53 		case '{':
54 			++level;
55 			goto endtok;
56 		case '}':
57 			/*
58 			 * if level goes below zero, try and fix
59 			 * it, even though we've already messed up
60 			 */
61 			if (--level < 0)
62 				level = 0;
63 			goto endtok;
64 
65 		case '\n':
66 			SETLINE;
67 			/*
68 			 * the above 3 cases are similar in that they
69 			 * are special characters that also end tokens.
70 			 */
71 	endtok:			if (sp > tok) {
72 				*sp = EOS;
73 				token = YES;
74 				sp = tok;
75 			}
76 			else
77 				token = NO;
78 			continue;
79 
80 		/*
81 		 * We ignore quoted strings and character constants
82 		 * completely.
83 		 */
84 		case '"':
85 		case '\'':
86 			(void)skip_string(c);
87 			break;
88 
89 		/*
90 		 * comments can be fun; note the state is unchanged after
91 		 * return, in case we found:
92 		 *	"foo() XX comment XX { int bar; }"
93 		 */
94 		case '/':
95 			if (GETC(==, '*')) {
96 				skip_comment();
97 				continue;
98 			}
99 			(void)ungetc(c, inf);
100 			c = '/';
101 			goto storec;
102 
103 		/* hash marks flag #define's. */
104 		case '#':
105 			if (sp == tok) {
106 				hash_entry();
107 				break;
108 			}
109 			goto storec;
110 
111 		/*
112 		 * if we have a current token, parenthesis on
113 		 * level zero indicates a function.
114 		 */
115 		case '(':
116 			if (!level && token) {
117 				int	curline;
118 
119 				if (sp != tok)
120 					*sp = EOS;
121 				/*
122 				 * grab the line immediately, we may
123 				 * already be wrong, for example,
124 				 *	foo\n
125 				 *	(arg1,
126 				 */
127 				getline();
128 				curline = lineno;
129 				if (func_entry()) {
130 					++level;
131 					pfnote(tok, curline);
132 				}
133 				break;
134 			}
135 			goto storec;
136 
137 		/*
138 		 * semi-colons indicate the end of a typedef; if we find a
139 		 * typedef we search for the next semi-colon of the same
140 		 * level as the typedef.  Ignoring "structs", they are
141 		 * tricky, since you can find:
142 		 *
143 		 *	"typedef long time_t;"
144 		 *	"typedef unsigned int u_int;"
145 		 *	"typedef unsigned int u_int [10];"
146 		 *
147 		 * If looking at a typedef, we save a copy of the last token
148 		 * found.  Then, when we find the ';' we take the current
149 		 * token if it starts with a valid token name, else we take
150 		 * the one we saved.  There's probably some reasonable
151 		 * alternative to this...
152 		 */
153 		case ';':
154 			if (t_def && level == t_level) {
155 				t_def = NO;
156 				getline();
157 				if (sp != tok)
158 					*sp = EOS;
159 				pfnote(tok, lineno);
160 				break;
161 			}
162 			goto storec;
163 
164 		/*
165 		 * store characters until one that can't be part of a token
166 		 * comes along; check the current token against certain
167 		 * reserved words.
168 		 */
169 		default:
170 	storec:		if (!intoken(c)) {
171 				if (sp == tok)
172 					break;
173 				*sp = EOS;
174 				if (tflag) {
175 					/* no typedefs inside typedefs */
176 					if (!t_def &&
177 						   !memcmp(tok, "typedef",8)) {
178 						t_def = YES;
179 						t_level = level;
180 						break;
181 					}
182 					/* catch "typedef struct" */
183 					if ((!t_def || t_level < level)
184 					    && (!memcmp(tok, "struct", 7)
185 					    || !memcmp(tok, "union", 6)
186 					    || !memcmp(tok, "enum", 5))) {
187 						/*
188 						 * get line immediately;
189 						 * may change before '{'
190 						 */
191 						getline();
192 						if (str_entry(c))
193 							++level;
194 						break;
195 						/* } */
196 					}
197 				}
198 				sp = tok;
199 			}
200 			else if (sp != tok || begtoken(c)) {
201 				*sp++ = c;
202 				token = YES;
203 			}
204 			continue;
205 		}
206 
207 		sp = tok;
208 		token = NO;
209 	}
210 }
211 
212 /*
213  * func_entry --
214  *	handle a function reference
215  */
216 static int
217 func_entry()
218 {
219 	int	c;			/* current character */
220 	int	level = 0;		/* for matching '()' */
221 
222 	/*
223 	 * Find the end of the assumed function declaration.
224 	 * Note that ANSI C functions can have type definitions so keep
225 	 * track of the parentheses nesting level.
226 	 */
227 	while (GETC(!=, EOF)) {
228 		switch (c) {
229 		case '\'':
230 		case '"':
231 			/* skip strings and character constants */
232 			skip_string(c);
233 			break;
234 		case '/':
235 			/* skip comments */
236 			if (GETC(==, '*'))
237 				skip_comment();
238 			break;
239 		case '(':
240 			level++;
241 			break;
242 		case ')':
243 			if (level == 0)
244 				goto fnd;
245 			level--;
246 			break;
247 		case '\n':
248 			SETLINE;
249 		}
250 	}
251 	return (NO);
252 fnd:
253 	/*
254 	 * we assume that the character after a function's right paren
255 	 * is a token character if it's a function and a non-token
256 	 * character if it's a declaration.  Comments don't count...
257 	 */
258 	for (;;) {
259 		while (GETC(!=, EOF) && iswhite(c))
260 			if (c == '\n')
261 				SETLINE;
262 		if (intoken(c) || c == '{')
263 			break;
264 		if (c == '/' && GETC(==, '*'))
265 			skip_comment();
266 		else {				/* don't ever "read" '/' */
267 			(void)ungetc(c, inf);
268 			return (NO);
269 		}
270 	}
271 	if (c != '{')
272 		(void)skip_key('{');
273 	return (YES);
274 }
275 
276 /*
277  * hash_entry --
278  *	handle a line starting with a '#'
279  */
280 static void
281 hash_entry()
282 {
283 	int	c;			/* character read */
284 	int	curline;		/* line started on */
285 	char	*sp;			/* buffer pointer */
286 	char	tok[MAXTOKEN];		/* storage buffer */
287 
288 	curline = lineno;
289 	for (sp = tok;;) {		/* get next token */
290 		if (GETC(==, EOF))
291 			return;
292 		if (iswhite(c))
293 			break;
294 		*sp++ = c;
295 	}
296 	*sp = EOS;
297 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
298 		goto skip;
299 	for (;;) {			/* this doesn't handle "#define \n" */
300 		if (GETC(==, EOF))
301 			return;
302 		if (!iswhite(c))
303 			break;
304 	}
305 	for (sp = tok;;) {		/* get next token */
306 		*sp++ = c;
307 		if (GETC(==, EOF))
308 			return;
309 		/*
310 		 * this is where it DOESN'T handle
311 		 * "#define \n"
312 		 */
313 		if (!intoken(c))
314 			break;
315 	}
316 	*sp = EOS;
317 	if (dflag || c == '(') {	/* only want macros */
318 		getline();
319 		pfnote(tok, curline);
320 	}
321 skip:	if (c == '\n') {		/* get rid of rest of define */
322 		SETLINE
323 		if (*(sp - 1) != '\\')
324 			return;
325 	}
326 	(void)skip_key('\n');
327 }
328 
329 /*
330  * str_entry --
331  *	handle a struct, union or enum entry
332  */
333 static int
334 str_entry(c)
335 	int	c;			/* current character */
336 {
337 	int	curline;		/* line started on */
338 	char	*sp;			/* buffer pointer */
339 	char	tok[LINE_MAX];		/* storage buffer */
340 
341 	curline = lineno;
342 	while (iswhite(c))
343 		if (GETC(==, EOF))
344 			return (NO);
345 	if (c == '{')		/* it was "struct {" */
346 		return (YES);
347 	for (sp = tok;;) {		/* get next token */
348 		*sp++ = c;
349 		if (GETC(==, EOF))
350 			return (NO);
351 		if (!intoken(c))
352 			break;
353 	}
354 	switch (c) {
355 		case '{':		/* it was "struct foo{" */
356 			--sp;
357 			break;
358 		case '\n':		/* it was "struct foo\n" */
359 			SETLINE;
360 			/*FALLTHROUGH*/
361 		default:		/* probably "struct foo " */
362 			while (GETC(!=, EOF))
363 				if (!iswhite(c))
364 					break;
365 			if (c != '{') {
366 				(void)ungetc(c, inf);
367 				return (NO);
368 			}
369 	}
370 	*sp = EOS;
371 	pfnote(tok, curline);
372 	return (YES);
373 }
374 
375 /*
376  * skip_comment --
377  *	skip over comment
378  */
379 void
380 skip_comment()
381 {
382 	int	c;			/* character read */
383 	int	star;			/* '*' flag */
384 
385 	for (star = 0; GETC(!=, EOF);)
386 		switch(c) {
387 		/* comments don't nest, nor can they be escaped. */
388 		case '*':
389 			star = YES;
390 			break;
391 		case '/':
392 			if (star)
393 				return;
394 			break;
395 		case '\n':
396 			SETLINE;
397 			/*FALLTHROUGH*/
398 		default:
399 			star = NO;
400 			break;
401 		}
402 }
403 
404 /*
405  * skip_string --
406  *	skip to the end of a string or character constant.
407  */
408 void
409 skip_string(key)
410 	int	key;
411 {
412 	int	c,
413 		skip;
414 
415 	for (skip = NO; GETC(!=, EOF); )
416 		switch (c) {
417 		case '\\':		/* a backslash escapes anything */
418 			skip = !skip;	/* we toggle in case it's "\\" */
419 			break;
420 		case '\n':
421 			SETLINE;
422 			/*FALLTHROUGH*/
423 		default:
424 			if (c == key && !skip)
425 				return;
426 			skip = NO;
427 		}
428 }
429 
430 /*
431  * skip_key --
432  *	skip to next char "key"
433  */
434 int
435 skip_key(key)
436 	int	key;
437 {
438 	int	c,
439 		skip,
440 		retval;
441 
442 	for (skip = retval = NO; GETC(!=, EOF);)
443 		switch(c) {
444 		case '\\':		/* a backslash escapes anything */
445 			skip = !skip;	/* we toggle in case it's "\\" */
446 			break;
447 		case ';':		/* special case for yacc; if one */
448 		case '|':		/* of these chars occurs, we may */
449 			retval = YES;	/* have moved out of the rule */
450 			break;		/* not used by C */
451 		case '\'':
452 		case '"':
453 			/* skip strings and character constants */
454 			skip_string(c);
455 			break;
456 		case '/':
457 			/* skip comments */
458 			if (GETC(==, '*')) {
459 				skip_comment();
460 				break;
461 			}
462 			(void)ungetc(c, inf);
463 			c = '/';
464 			goto norm;
465 		case '\n':
466 			SETLINE;
467 			/*FALLTHROUGH*/
468 		default:
469 		norm:
470 			if (c == key && !skip)
471 				return (retval);
472 			skip = NO;
473 		}
474 	return (retval);
475 }
476