xref: /original-bsd/usr.bin/ctags/C.c (revision 333da485)
1 /*
2  * Copyright (c) 1987, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)C.c	8.2 (Berkeley) 01/07/94";
10 #endif /* not lint */
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include "ctags.h"
15 
16 static int func_entry(), str_entry();
17 static void hash_entry();
18 static void skip_string();
19 
20 /*
21  * c_entries --
22  *	read .c and .h files and call appropriate routines
23  */
24 c_entries()
25 {
26 	extern int	tflag;		/* -t: create tags for typedefs */
27 	register int	c,		/* current character */
28 			level;		/* brace level */
29 	register char	*sp;		/* buffer pointer */
30 	int	token,			/* if reading a token */
31 		t_def,			/* if reading a typedef */
32 		t_level;		/* typedef's brace level */
33 	char	tok[MAXTOKEN];		/* token buffer */
34 
35 	lineftell = ftell(inf);
36 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
37 	while (GETC(!=,EOF)) {
38 
39 	switch ((char)c) {
40 		/*
41 		 * Here's where it DOESN'T handle:
42 		 *	foo(a)
43 		 *	{
44 		 *	#ifdef notdef
45 		 *		}
46 		 *	#endif
47 		 *		if (a)
48 		 *			puts("hello, world");
49 		 *	}
50 		 */
51 		case '{':
52 			++level;
53 			goto endtok;
54 		case '}':
55 			/*
56 			 * if level goes below zero, try and fix
57 			 * it, even though we've already messed up
58 			 */
59 			if (--level < 0)
60 				level = 0;
61 			goto endtok;
62 
63 		case '\n':
64 			SETLINE;
65 			/*
66 			 * the above 3 cases are similar in that they
67 			 * are special characters that also end tokens.
68 			 */
69 endtok:			if (sp > tok) {
70 				*sp = EOS;
71 				token = YES;
72 				sp = tok;
73 			}
74 			else
75 				token = NO;
76 			continue;
77 
78 		/*
79 		 * We ignore quoted strings and character constants
80 		 * completely.
81 		 */
82 		case '"':
83 		case '\'':
84 			(void)skip_string(c);
85 			break;
86 
87 		/*
88 		 * comments can be fun; note the state is unchanged after
89 		 * return, in case we found:
90 		 *	"foo() XX comment XX { int bar; }"
91 		 */
92 		case '/':
93 			if (GETC(==,'*')) {
94 				skip_comment();
95 				continue;
96 			}
97 			(void)ungetc(c,inf);
98 			c = '/';
99 			goto storec;
100 
101 		/* hash marks flag #define's. */
102 		case '#':
103 			if (sp == tok) {
104 				hash_entry();
105 				break;
106 			}
107 			goto storec;
108 
109 		/*
110 	 	 * if we have a current token, parenthesis on
111 		 * level zero indicates a function.
112 		 */
113 		case '(':
114 			if (!level && token) {
115 				int	curline;
116 
117 				if (sp != tok)
118 					*sp = EOS;
119 				/*
120 				 * grab the line immediately, we may
121 				 * already be wrong, for example,
122 				 *	foo\n
123 				 *	(arg1,
124 				 */
125 				getline();
126 				curline = lineno;
127 				if (func_entry()) {
128 					++level;
129 					pfnote(tok,curline);
130 				}
131 				break;
132 			}
133 			goto storec;
134 
135 		/*
136 		 * semi-colons indicate the end of a typedef; if we find a
137 		 * typedef we search for the next semi-colon of the same
138 		 * level as the typedef.  Ignoring "structs", they are
139 		 * tricky, since you can find:
140 		 *
141 		 *	"typedef long time_t;"
142 		 *	"typedef unsigned int u_int;"
143 		 *	"typedef unsigned int u_int [10];"
144 		 *
145 		 * If looking at a typedef, we save a copy of the last token
146 		 * found.  Then, when we find the ';' we take the current
147 		 * token if it starts with a valid token name, else we take
148 		 * the one we saved.  There's probably some reasonable
149 		 * alternative to this...
150 		 */
151 		case ';':
152 			if (t_def && level == t_level) {
153 				t_def = NO;
154 				getline();
155 				if (sp != tok)
156 					*sp = EOS;
157 				pfnote(tok,lineno);
158 				break;
159 			}
160 			goto storec;
161 
162 		/*
163 		 * store characters until one that can't be part of a token
164 		 * comes along; check the current token against certain
165 		 * reserved words.
166 		 */
167 		default:
168 storec:			if (!intoken(c)) {
169 				if (sp == tok)
170 					break;
171 				*sp = EOS;
172 				if (tflag) {
173 					/* no typedefs inside typedefs */
174 					if (!t_def && !bcmp(tok,"typedef",8)) {
175 						t_def = YES;
176 						t_level = level;
177 						break;
178 					}
179 					/* catch "typedef struct" */
180 					if ((!t_def || t_level < level)
181 					    && (!bcmp(tok,"struct",7)
182 					    || !bcmp(tok,"union",6)
183 					    || !bcmp(tok,"enum",5))) {
184 						/*
185 						 * get line immediately;
186 						 * may change before '{'
187 						 */
188 						getline();
189 						if (str_entry(c))
190 							++level;
191 						break;
192 					}
193 				}
194 				sp = tok;
195 			}
196 			else if (sp != tok || begtoken(c)) {
197 				*sp++ = c;
198 				token = YES;
199 			}
200 			continue;
201 		}
202 		sp = tok;
203 		token = NO;
204 	}
205 }
206 
207 /*
208  * func_entry --
209  *	handle a function reference
210  */
211 static int
212 func_entry()
213 {
214 	register int	c;		/* current character */
215 	int		level = 0;	/* for matching '()' */
216 
217 	/*
218 	 * Find the end of the assumed function declaration.
219 	 * Note that ANSI C functions can have type definitions so keep
220 	 * track of the parentheses nesting level.
221 	 */
222 	while (GETC(!=,EOF)) {
223 		switch ((char)c) {
224 		case '\'':
225 		case '"':
226 			/* skip strings and character constants */
227 			skip_string(c);
228 			break;
229 		case '/':
230 			/* skip comments */
231 			if (GETC(==,'*'))
232 				skip_comment();
233 			break;
234 		case '(':
235 			level++;
236 			break;
237 		case ')':
238 			if (level == 0)
239 				goto fnd;
240 			level--;
241 			break;
242 		case '\n':
243 			SETLINE;
244 		}
245 	}
246 	return(NO);
247 fnd:
248 	/*
249 	 * we assume that the character after a function's right paren
250 	 * is a token character if it's a function and a non-token
251 	 * character if it's a declaration.  Comments don't count...
252 	 */
253 	for (;;) {
254 		while (GETC(!=,EOF) && iswhite(c))
255 			if (c == (int)'\n')
256 				SETLINE;
257 		if (intoken(c) || c == (int)'{')
258 			break;
259 		if (c == (int)'/' && GETC(==,'*'))
260 			skip_comment();
261 		else {				/* don't ever "read" '/' */
262 			(void)ungetc(c,inf);
263 			return(NO);
264 		}
265 	}
266 	if (c != (int)'{')
267 		(void)skip_key((int)'{');
268 	return(YES);
269 }
270 
271 /*
272  * hash_entry --
273  *	handle a line starting with a '#'
274  */
275 static void
276 hash_entry()
277 {
278 	extern int	dflag;		/* -d: non-macro defines */
279 	register int	c,		/* character read */
280 			curline;	/* line started on */
281 	register char	*sp;		/* buffer pointer */
282 	char	tok[MAXTOKEN];		/* storage buffer */
283 
284 	curline = lineno;
285 	for (sp = tok;;) {		/* get next token */
286 		if (GETC(==,EOF))
287 			return;
288 		if (iswhite(c))
289 			break;
290 		*sp++ = c;
291 	}
292 	*sp = EOS;
293 	if (bcmp(tok,"define",6))	/* only interested in #define's */
294 		goto skip;
295 	for (;;) {			/* this doesn't handle "#define \n" */
296 		if (GETC(==,EOF))
297 			return;
298 		if (!iswhite(c))
299 			break;
300 	}
301 	for (sp = tok;;) {		/* get next token */
302 		*sp++ = c;
303 		if (GETC(==,EOF))
304 			return;
305 		/*
306 		 * this is where it DOESN'T handle
307 		 * "#define \n"
308 		 */
309 		if (!intoken(c))
310 			break;
311 	}
312 	*sp = EOS;
313 	if (dflag || c == (int)'(') {	/* only want macros */
314 		getline();
315 		pfnote(tok,curline);
316 	}
317 skip:	if (c == (int)'\n') {		/* get rid of rest of define */
318 		SETLINE
319 		if (*(sp - 1) != '\\')
320 			return;
321 	}
322 	(void)skip_key((int)'\n');
323 }
324 
325 /*
326  * str_entry --
327  *	handle a struct, union or enum entry
328  */
329 static
330 str_entry(c)
331 	register int	c;		/* current character */
332 {
333 	register char	*sp;		/* buffer pointer */
334 	int	curline;		/* line started on */
335 	char	tok[BUFSIZ];		/* storage buffer */
336 
337 	curline = lineno;
338 	while (iswhite(c))
339 		if (GETC(==,EOF))
340 			return(NO);
341 	if (c == (int)'{')		/* it was "struct {" */
342 		return(YES);
343 	for (sp = tok;;) {		/* get next token */
344 		*sp++ = c;
345 		if (GETC(==,EOF))
346 			return(NO);
347 		if (!intoken(c))
348 			break;
349 	}
350 	switch ((char)c) {
351 		case '{':		/* it was "struct foo{" */
352 			--sp;
353 			break;
354 		case '\n':		/* it was "struct foo\n" */
355 			SETLINE;
356 			/*FALLTHROUGH*/
357 		default:		/* probably "struct foo " */
358 			while (GETC(!=,EOF))
359 				if (!iswhite(c))
360 					break;
361 			if (c != (int)'{') {
362 				(void)ungetc(c, inf);
363 				return(NO);
364 			}
365 	}
366 	*sp = EOS;
367 	pfnote(tok,curline);
368 	return(YES);
369 }
370 
371 /*
372  * skip_comment --
373  *	skip over comment
374  */
375 skip_comment()
376 {
377 	register int	c,		/* character read */
378 			star;		/* '*' flag */
379 
380 	for (star = 0;GETC(!=,EOF);)
381 		switch((char)c) {
382 			/* comments don't nest, nor can they be escaped. */
383 			case '*':
384 				star = YES;
385 				break;
386 			case '/':
387 				if (star)
388 					return;
389 				break;
390 			case '\n':
391 				SETLINE;
392 				/*FALLTHROUGH*/
393 			default:
394 				star = NO;
395 		}
396 }
397 
398 /*
399  * skip_string --
400  *	skip to the end of a string or character constant.
401  */
402 void
403 skip_string(key)
404 	register int	key;
405 {
406 	register int	c,
407 			skip;
408 
409 	for (skip = NO; GETC(!=,EOF); )
410 		switch ((char)c) {
411 		case '\\':		/* a backslash escapes anything */
412 			skip = !skip;	/* we toggle in case it's "\\" */
413 			break;
414 		case '\n':
415 			SETLINE;
416 			/*FALLTHROUGH*/
417 		default:
418 			if (c == key && !skip)
419 				return;
420 			skip = NO;
421 		}
422 }
423 
424 /*
425  * skip_key --
426  *	skip to next char "key"
427  */
428 int
429 skip_key(key)
430 	register int	key;
431 {
432 	register int	c,
433 			skip,
434 			retval;
435 
436 	for (skip = retval = NO;GETC(!=,EOF);)
437 		switch((char)c) {
438 		case '\\':		/* a backslash escapes anything */
439 			skip = !skip;	/* we toggle in case it's "\\" */
440 			break;
441 		case ';':		/* special case for yacc; if one */
442 		case '|':		/* of these chars occurs, we may */
443 			retval = YES;	/* have moved out of the rule */
444 			break;		/* not used by C */
445 		case '\'':
446 		case '"':
447 			/* skip strings and character constants */
448 			skip_string(c);
449 			break;
450 		case '/':
451 			/* skip comments */
452 			if (GETC(==,'*')) {
453 				skip_comment();
454 				break;
455 			}
456 			(void)ungetc(c,inf);
457 			c = '/';
458 			goto norm;
459 		case '\n':
460 			SETLINE;
461 			/*FALLTHROUGH*/
462 		default:
463 		norm:
464 			if (c == key && !skip)
465 				return(retval);
466 			skip = NO;
467 		}
468 	return(retval);
469 }
470