xref: /original-bsd/usr.bin/ctags/C.c (revision eeb6993a)
1 /*
2  * Copyright (c) 1987 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)C.c	5.4 (Berkeley) 06/01/90";
10 #endif /* not lint */
11 
12 #include <stdio.h>
13 #include <ctags.h>
14 
15 /*
16  * c_entries --
17  *	read .c and .h files and call appropriate routines
18  */
19 c_entries()
20 {
21 	extern int	tflag;		/* -t: create tags for typedefs */
22 	register int	c,		/* current character */
23 			level;		/* brace level */
24 	register char	*sp;		/* buffer pointer */
25 	int	token,			/* if reading a token */
26 		t_def,			/* if reading a typedef */
27 		t_level;		/* typedef's brace level */
28 	char	tok[MAXTOKEN];		/* token buffer */
29 
30 	lineftell = ftell(inf);
31 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
32 	while (GETC(!=,EOF)) {
33 
34 	switch ((char)c) {
35 		/*
36 		 * Here's where it DOESN'T handle:
37 		 *	foo(a)
38 		 *	{
39 		 *	#ifdef notdef
40 		 *		}
41 		 *	#endif
42 		 *		if (a)
43 		 *			puts("hello, world");
44 		 *	}
45 		 */
46 		case '{':
47 			++level;
48 			goto endtok;
49 		case '}':
50 			/*
51 			 * if level goes below zero, try and fix
52 			 * it, even though we've already messed up
53 			 */
54 			if (--level < 0)
55 				level = 0;
56 			goto endtok;
57 
58 		case '\n':
59 			SETLINE;
60 			/*
61 			 * the above 3 cases are similar in that they
62 			 * are special characters that also end tokens.
63 			 */
64 endtok:			if (sp > tok) {
65 				*sp = EOS;
66 				token = YES;
67 				sp = tok;
68 			}
69 			else
70 				token = NO;
71 			continue;
72 
73 		/* we ignore quoted strings and comments in their entirety */
74 		case '"':
75 		case '\'':
76 			(void)skip_key(c);
77 			break;
78 
79 		/*
80 		 * comments can be fun; note the state is unchanged after
81 		 * return, in case we found:
82 		 *	"foo() XX comment XX { int bar; }"
83 		 */
84 		case '/':
85 			if (GETC(==,'*')) {
86 				skip_comment();
87 				continue;
88 			}
89 			(void)ungetc(c,inf);
90 			c = '/';
91 			goto storec;
92 
93 		/* hash marks flag #define's. */
94 		case '#':
95 			if (sp == tok) {
96 				hash_entry();
97 				break;
98 			}
99 			goto storec;
100 
101 		/*
102 	 	 * if we have a current token, parenthesis on
103 		 * level zero indicates a function.
104 		 */
105 		case '(':
106 			if (!level && token) {
107 				int	curline;
108 
109 				if (sp != tok)
110 					*sp = EOS;
111 				/*
112 				 * grab the line immediately, we may
113 				 * already be wrong, for example,
114 				 *	foo\n
115 				 *	(arg1,
116 				 */
117 				getline();
118 				curline = lineno;
119 				if (func_entry()) {
120 					++level;
121 					pfnote(tok,curline);
122 				}
123 				break;
124 			}
125 			goto storec;
126 
127 		/*
128 		 * semi-colons indicate the end of a typedef; if we find a
129 		 * typedef we search for the next semi-colon of the same
130 		 * level as the typedef.  Ignoring "structs", they are
131 		 * tricky, since you can find:
132 		 *
133 		 *	"typedef long time_t;"
134 		 *	"typedef unsigned int u_int;"
135 		 *	"typedef unsigned int u_int [10];"
136 		 *
137 		 * If looking at a typedef, we save a copy of the last token
138 		 * found.  Then, when we find the ';' we take the current
139 		 * token if it starts with a valid token name, else we take
140 		 * the one we saved.  There's probably some reasonable
141 		 * alternative to this...
142 		 */
143 		case ';':
144 			if (t_def && level == t_level) {
145 				t_def = NO;
146 				getline();
147 				if (sp != tok)
148 					*sp = EOS;
149 				pfnote(tok,lineno);
150 				break;
151 			}
152 			goto storec;
153 
154 		/*
155 		 * store characters until one that can't be part of a token
156 		 * comes along; check the current token against certain
157 		 * reserved words.
158 		 */
159 		default:
160 storec:			if (!intoken(c)) {
161 				if (sp == tok)
162 					break;
163 				*sp = EOS;
164 				if (tflag) {
165 					/* no typedefs inside typedefs */
166 					if (!t_def && !bcmp(tok,"typedef",8)) {
167 						t_def = YES;
168 						t_level = level;
169 						break;
170 					}
171 					/* catch "typedef struct" */
172 					if ((!t_def || t_level < level)
173 					    && (!bcmp(tok,"struct",7)
174 					    || !bcmp(tok,"union",6)
175 					    || !bcmp(tok,"enum",5))) {
176 						/*
177 						 * get line immediately;
178 						 * may change before '{'
179 						 */
180 						getline();
181 						if (str_entry(c))
182 							++level;
183 						break;
184 					}
185 				}
186 				sp = tok;
187 			}
188 			else if (sp != tok || begtoken(c)) {
189 				*sp++ = c;
190 				token = YES;
191 			}
192 			continue;
193 		}
194 		sp = tok;
195 		token = NO;
196 	}
197 }
198 
199 /*
200  * func_entry --
201  *	handle a function reference
202  */
203 static
204 func_entry()
205 {
206 	register int	c;		/* current character */
207 
208 	/*
209 	 * we assume that the character after a function's right paren
210 	 * is a token character if it's a function and a non-token
211 	 * character if it's a declaration.  Comments don't count...
212 	 */
213 	(void)skip_key((int)')');
214 	for (;;) {
215 		while (GETC(!=,EOF) && iswhite(c))
216 			if (c == (int)'\n')
217 				SETLINE;
218 		if (intoken(c) || c == (int)'{')
219 			break;
220 		if (c == (int)'/' && GETC(==,'*'))
221 			skip_comment();
222 		else {				/* don't ever "read" '/' */
223 			(void)ungetc(c,inf);
224 			return(NO);
225 		}
226 	}
227 	if (c != (int)'{')
228 		(void)skip_key((int)'{');
229 	return(YES);
230 }
231 
232 /*
233  * hash_entry --
234  *	handle a line starting with a '#'
235  */
236 static
237 hash_entry()
238 {
239 	extern int	dflag;		/* -d: non-macro defines */
240 	register int	c,		/* character read */
241 			curline;	/* line started on */
242 	register char	*sp;		/* buffer pointer */
243 	char	tok[MAXTOKEN];		/* storage buffer */
244 
245 	curline = lineno;
246 	for (sp = tok;;) {		/* get next token */
247 		if (GETC(==,EOF))
248 			return;
249 		if (iswhite(c))
250 			break;
251 		*sp++ = c;
252 	}
253 	*sp = EOS;
254 	if (bcmp(tok,"define",6))	/* only interested in #define's */
255 		goto skip;
256 	for (;;) {			/* this doesn't handle "#define \n" */
257 		if (GETC(==,EOF))
258 			return;
259 		if (!iswhite(c))
260 			break;
261 	}
262 	for (sp = tok;;) {		/* get next token */
263 		*sp++ = c;
264 		if (GETC(==,EOF))
265 			return;
266 		/*
267 		 * this is where it DOESN'T handle
268 		 * "#define \n"
269 		 */
270 		if (!intoken(c))
271 			break;
272 	}
273 	*sp = EOS;
274 	if (dflag || c == (int)'(') {	/* only want macros */
275 		getline();
276 		pfnote(tok,curline);
277 	}
278 skip:	if (c == (int)'\n') {		/* get rid of rest of define */
279 		SETLINE
280 		if (*(sp - 1) != '\\')
281 			return;
282 	}
283 	(void)skip_key((int)'\n');
284 }
285 
286 /*
287  * str_entry --
288  *	handle a struct, union or enum entry
289  */
290 static
291 str_entry(c)
292 	register int	c;		/* current character */
293 {
294 	register char	*sp;		/* buffer pointer */
295 	int	curline;		/* line started on */
296 	char	tok[BUFSIZ];		/* storage buffer */
297 
298 	curline = lineno;
299 	while (iswhite(c))
300 		if (GETC(==,EOF))
301 			return(NO);
302 	if (c == (int)'{')		/* it was "struct {" */
303 		return(YES);
304 	for (sp = tok;;) {		/* get next token */
305 		*sp++ = c;
306 		if (GETC(==,EOF))
307 			return(NO);
308 		if (!intoken(c))
309 			break;
310 	}
311 	switch ((char)c) {
312 		case '{':		/* it was "struct foo{" */
313 			--sp;
314 			break;
315 		case '\n':		/* it was "struct foo\n" */
316 			SETLINE;
317 			/*FALLTHROUGH*/
318 		default:		/* probably "struct foo " */
319 			while (GETC(!=,EOF))
320 				if (!iswhite(c))
321 					break;
322 			if (c != (int)'{') {
323 				(void)ungetc(c, inf);
324 				return(NO);
325 			}
326 	}
327 	*sp = EOS;
328 	pfnote(tok,curline);
329 	return(YES);
330 }
331 
332 /*
333  * skip_comment --
334  *	skip over comment
335  */
336 skip_comment()
337 {
338 	register int	c,		/* character read */
339 			star;		/* '*' flag */
340 
341 	for (star = 0;GETC(!=,EOF);)
342 		switch((char)c) {
343 			/* comments don't nest, nor can they be escaped. */
344 			case '*':
345 				star = YES;
346 				break;
347 			case '/':
348 				if (star)
349 					return;
350 				break;
351 			case '\n':
352 				SETLINE;
353 				/*FALLTHROUGH*/
354 			default:
355 				star = NO;
356 		}
357 }
358 
359 /*
360  * skip_key --
361  *	skip to next char "key"
362  */
363 skip_key(key)
364 	register int	key;
365 {
366 	register int	c,
367 			skip,
368 			retval;
369 
370 	for (skip = retval = NO;GETC(!=,EOF);)
371 		switch((char)c) {
372 		case '\\':		/* a backslash escapes anything */
373 			skip = !skip;	/* we toggle in case it's "\\" */
374 			break;
375 		case ';':		/* special case for yacc; if one */
376 		case '|':		/* of these chars occurs, we may */
377 			retval = YES;	/* have moved out of the rule */
378 			break;		/* not used by C */
379 		case '\n':
380 			SETLINE;
381 			/*FALLTHROUGH*/
382 		default:
383 			if (c == key && !skip)
384 				return(retval);
385 			skip = NO;
386 		}
387 	return(retval);
388 }
389