xref: /original-bsd/usr.bin/ctags/C.c (revision 92c664ec)
1 /*
2  * Copyright (c) 1987 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char sccsid[] = "@(#)C.c	5.1 (Berkeley) 03/16/87";
9 #endif not lint
10 
11 #include <stdio.h>
12 #include <ctags.h>
13 
14 /*
15  * c_entries --
16  *	read .c and .h files and call appropriate routines
17  */
18 c_entries()
19 {
20 	extern int	tflag;		/* -t: create tags for typedefs */
21 	register int	c,		/* current character */
22 			level;		/* brace level */
23 	register char	*sp;		/* buffer pointer */
24 	int	token,			/* if reading a token */
25 		t_def,			/* if reading a typedef */
26 		t_level;		/* typedef's brace level */
27 	char	tok[MAXTOKEN];		/* token buffer */
28 
29 	lineftell = ftell(inf);
30 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
31 	while (GETC(!=,EOF)) {
32 
33 	switch ((char)c) {
34 		/*
35 		 * Here's where it DOESN'T handle:
36 		 *	foo(a)
37 		 *	{
38 		 *	#ifdef notdef
39 		 *		}
40 		 *	#endif
41 		 *		if (a)
42 		 *			puts("hello, world");
43 		 *	}
44 		 */
45 		case '{':
46 			++level;
47 			goto endtok;
48 		case '}':
49 			/*
50 			 * if level goes below zero, try and fix
51 			 * it, even though we've already messed up
52 			 */
53 			if (--level < 0)
54 				level = 0;
55 			goto endtok;
56 
57 		case '\n':
58 			SETLINE;
59 			/*
60 			 * the above 3 cases are similar in that they
61 			 * are special characters that also end tokens.
62 			 */
63 endtok:			if (sp > tok) {
64 				*sp = EOS;
65 				token = YES;
66 				sp = tok;
67 			}
68 			else
69 				token = NO;
70 			continue;
71 
72 		/* we ignore quoted strings and comments in their entirety */
73 		case '"':
74 		case '\'':
75 			(void)skip_key(c);
76 			break;
77 
78 		/*
79 		 * comments can be fun; note the state is unchanged after
80 		 * return, in case we found:
81 		 *	"foo() XX comment XX { int bar; }"
82 		 */
83 		case '/':
84 			if (GETC(==,'*')) {
85 				skip_comment();
86 				continue;
87 			}
88 			(void)ungetc(c,inf);
89 			c = '/';
90 			goto storec;
91 
92 		/* hash marks flag #define's. */
93 		case '#':
94 			if (sp == tok) {
95 				hash_entry();
96 				break;
97 			}
98 			goto storec;
99 
100 		/*
101 	 	 * if we have a current token, parenthesis on
102 		 * level zero indicates a function.
103 		 */
104 		case '(':
105 			if (!level && token) {
106 				int	curline;
107 
108 				if (sp != tok)
109 					*sp = EOS;
110 				/*
111 				 * grab the line immediately, we may
112 				 * already be wrong, for example,
113 				 *	foo\n
114 				 *	(arg1,
115 				 */
116 				getline();
117 				curline = lineno;
118 				if (func_entry()) {
119 					++level;
120 					pfnote(tok,curline);
121 				}
122 				break;
123 			}
124 			goto storec;
125 
126 		/*
127 		 * semi-colons indicate the end of a typedef; if we find a
128 		 * typedef we search for the next semi-colon of the same
129 		 * level as the typedef.  Ignoring "structs", they are
130 		 * tricky, since you can find:
131 		 *
132 		 *	"typedef long time_t;"
133 		 *	"typedef unsigned int u_int;"
134 		 *	"typedef unsigned int u_int [10];"
135 		 *
136 		 * If looking at a typedef, we save a copy of the last token
137 		 * found.  Then, when we find the ';' we take the current
138 		 * token if it starts with a valid token name, else we take
139 		 * the one we saved.  There's probably some reasonable
140 		 * alternative to this...
141 		 */
142 		case ';':
143 			if (t_def && level == t_level) {
144 				t_def = NO;
145 				getline();
146 				if (sp != tok)
147 					*sp = EOS;
148 				pfnote(tok,lineno);
149 				break;
150 			}
151 			goto storec;
152 
153 		/*
154 		 * store characters until one that can't be part of a token
155 		 * comes along; check the current token against certain
156 		 * reserved words.
157 		 */
158 		default:
159 storec:			if (!intoken(c)) {
160 				if (sp == tok)
161 					break;
162 				*sp = EOS;
163 				if (tflag) {
164 					/* no typedefs inside typedefs */
165 					if (!t_def && !bcmp(tok,"typedef",8)) {
166 						t_def = YES;
167 						t_level = level;
168 						break;
169 					}
170 					/* catch "typedef struct" */
171 					if ((!t_def || t_level < level)
172 					    && (!bcmp(tok,"struct",7)
173 					    || !bcmp(tok,"union",6)
174 					    || !bcmp(tok,"enum",5))) {
175 						/*
176 						 * get line immediately;
177 						 * may change before '{'
178 						 */
179 						getline();
180 						if (str_entry(c))
181 							++level;
182 						break;
183 					}
184 				}
185 				sp = tok;
186 			}
187 			else if (sp != tok || begtoken(c)) {
188 				*sp++ = c;
189 				token = YES;
190 			}
191 			continue;
192 		}
193 		sp = tok;
194 		token = NO;
195 	}
196 }
197 
198 /*
199  * func_entry --
200  *	handle a function reference
201  */
202 static
203 func_entry()
204 {
205 	register int	c;		/* current character */
206 
207 	/*
208 	 * we assume that the character after a function's right paren
209 	 * is a token character if it's a function and a non-token
210 	 * character if it's a declaration.  Comments don't count...
211 	 */
212 	(void)skip_key((int)')');
213 	for (;;) {
214 		while (GETC(!=,EOF) && iswhite(c))
215 			if (c == (int)'\n')
216 				SETLINE;
217 		if (intoken(c) || c == (int)'{')
218 			break;
219 		if (c == (int)'/' && GETC(==,'*'))
220 			skip_comment();
221 		else {				/* don't ever "read" '/' */
222 			(void)ungetc(c,inf);
223 			return(NO);
224 		}
225 	}
226 	if (c != (int)'{')
227 		(void)skip_key((int)'{');
228 	return(YES);
229 }
230 
231 /*
232  * hash_entry --
233  *	handle a line starting with a '#'
234  */
235 static
236 hash_entry()
237 {
238 	extern int	dflag;		/* -d: non-macro defines */
239 	register int	c,		/* character read */
240 			curline;	/* line started on */
241 	register char	*sp;		/* buffer pointer */
242 	char	tok[MAXTOKEN];		/* storage buffer */
243 
244 	curline = lineno;
245 	for (sp = tok;;) {		/* get next token */
246 		if (GETC(==,EOF))
247 			return;
248 		if (iswhite(c))
249 			break;
250 		*sp++ = c;
251 	}
252 	*sp = EOS;
253 	if (bcmp(tok,"define",6))	/* only interested in #define's */
254 		goto skip;
255 	for (;;) {			/* this doesn't handle "#define \n" */
256 		if (GETC(==,EOF))
257 			return;
258 		if (!iswhite(c))
259 			break;
260 	}
261 	for (sp = tok;;) {		/* get next token */
262 		*sp++ = c;
263 		if (GETC(==,EOF))
264 			return;
265 		/*
266 		 * this is where it DOESN'T handle
267 		 * "#define \n"
268 		 */
269 		if (!intoken(c))
270 			break;
271 	}
272 	*sp = EOS;
273 	if (dflag || c == (int)'(') {	/* only want macros */
274 		getline();
275 		pfnote(tok,curline);
276 	}
277 skip:	if (c == (int)'\n') {		/* get rid of rest of define */
278 		SETLINE
279 		if (*(sp - 1) != '\\')
280 			return;
281 	}
282 	(void)skip_key((int)'\n');
283 }
284 
285 /*
286  * str_entry --
287  *	handle a struct, union or enum entry
288  */
289 static
290 str_entry(c)
291 	register int	c;		/* current character */
292 {
293 	register char	*sp;		/* buffer pointer */
294 	int	curline;		/* line started on */
295 	char	tok[BUFSIZ];		/* storage buffer */
296 
297 	curline = lineno;
298 	while (iswhite(c))
299 		if (GETC(==,EOF))
300 			return(NO);
301 	if (c == (int)'{')		/* it was "struct {" */
302 		return(YES);
303 	for (sp = tok;;) {		/* get next token */
304 		*sp++ = c;
305 		if (GETC(==,EOF))
306 			return(NO);
307 		if (!intoken(c))
308 			break;
309 	}
310 	switch ((char)c) {
311 		case '{':		/* it was "struct foo{" */
312 			--sp;
313 			break;
314 		case '\n':		/* it was "struct foo\n" */
315 			SETLINE;
316 			/*FALLTHROUGH*/
317 		default:		/* probably "struct foo " */
318 			while (GETC(!=,EOF))
319 				if (!iswhite(c))
320 					break;
321 			if (c != (int)'{')
322 				return(NO);
323 	}
324 	*sp = EOS;
325 	pfnote(tok,curline);
326 	return(YES);
327 }
328 
329 /*
330  * skip_comment --
331  *	skip over comment
332  */
333 skip_comment()
334 {
335 	register int	c,		/* character read */
336 			star;		/* '*' flag */
337 
338 	for (star = 0;GETC(!=,EOF);)
339 		switch((char)c) {
340 			/* comments don't nest, nor can they be escaped. */
341 			case '*':
342 				star = YES;
343 				break;
344 			case '/':
345 				if (star)
346 					return;
347 				break;
348 			case '\n':
349 				SETLINE;
350 				/*FALLTHROUGH*/
351 			default:
352 				star = NO;
353 		}
354 }
355 
356 /*
357  * skip_key --
358  *	skip to next char "key"
359  */
360 skip_key(key)
361 	register int	key;
362 {
363 	register int	c,
364 			skip,
365 			retval;
366 
367 	for (skip = retval = NO;GETC(!=,EOF);)
368 		switch((char)c) {
369 		case '\\':		/* a backslash escapes anything */
370 			skip = !skip;	/* we toggle in case it's "\\" */
371 			break;
372 		case ';':		/* special case for yacc; if one */
373 		case '|':		/* of these chars occurs, we may */
374 			retval = YES;	/* have moved out of the rule */
375 			break;		/* not used by C */
376 		case '\n':
377 			SETLINE;
378 			/*FALLTHROUGH*/
379 		default:
380 			if (c == key && !skip)
381 				return(retval);
382 			skip = NO;
383 		}
384 	return(retval);
385 }
386