xref: /original-bsd/usr.bin/ctags/C.c (revision 14054b48)
1 /*
2  * Copyright (c) 1987 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)C.c	5.5 (Berkeley) 02/26/91";
10 #endif /* not lint */
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include "ctags.h"
15 
16 static int func_entry(), str_entry();
17 static void hash_entry();
18 
19 /*
20  * c_entries --
21  *	read .c and .h files and call appropriate routines
22  */
23 c_entries()
24 {
25 	extern int	tflag;		/* -t: create tags for typedefs */
26 	register int	c,		/* current character */
27 			level;		/* brace level */
28 	register char	*sp;		/* buffer pointer */
29 	int	token,			/* if reading a token */
30 		t_def,			/* if reading a typedef */
31 		t_level;		/* typedef's brace level */
32 	char	tok[MAXTOKEN];		/* token buffer */
33 
34 	lineftell = ftell(inf);
35 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
36 	while (GETC(!=,EOF)) {
37 
38 	switch ((char)c) {
39 		/*
40 		 * Here's where it DOESN'T handle:
41 		 *	foo(a)
42 		 *	{
43 		 *	#ifdef notdef
44 		 *		}
45 		 *	#endif
46 		 *		if (a)
47 		 *			puts("hello, world");
48 		 *	}
49 		 */
50 		case '{':
51 			++level;
52 			goto endtok;
53 		case '}':
54 			/*
55 			 * if level goes below zero, try and fix
56 			 * it, even though we've already messed up
57 			 */
58 			if (--level < 0)
59 				level = 0;
60 			goto endtok;
61 
62 		case '\n':
63 			SETLINE;
64 			/*
65 			 * the above 3 cases are similar in that they
66 			 * are special characters that also end tokens.
67 			 */
68 endtok:			if (sp > tok) {
69 				*sp = EOS;
70 				token = YES;
71 				sp = tok;
72 			}
73 			else
74 				token = NO;
75 			continue;
76 
77 		/* we ignore quoted strings and comments in their entirety */
78 		case '"':
79 		case '\'':
80 			(void)skip_key(c);
81 			break;
82 
83 		/*
84 		 * comments can be fun; note the state is unchanged after
85 		 * return, in case we found:
86 		 *	"foo() XX comment XX { int bar; }"
87 		 */
88 		case '/':
89 			if (GETC(==,'*')) {
90 				skip_comment();
91 				continue;
92 			}
93 			(void)ungetc(c,inf);
94 			c = '/';
95 			goto storec;
96 
97 		/* hash marks flag #define's. */
98 		case '#':
99 			if (sp == tok) {
100 				hash_entry();
101 				break;
102 			}
103 			goto storec;
104 
105 		/*
106 	 	 * if we have a current token, parenthesis on
107 		 * level zero indicates a function.
108 		 */
109 		case '(':
110 			if (!level && token) {
111 				int	curline;
112 
113 				if (sp != tok)
114 					*sp = EOS;
115 				/*
116 				 * grab the line immediately, we may
117 				 * already be wrong, for example,
118 				 *	foo\n
119 				 *	(arg1,
120 				 */
121 				getline();
122 				curline = lineno;
123 				if (func_entry()) {
124 					++level;
125 					pfnote(tok,curline);
126 				}
127 				break;
128 			}
129 			goto storec;
130 
131 		/*
132 		 * semi-colons indicate the end of a typedef; if we find a
133 		 * typedef we search for the next semi-colon of the same
134 		 * level as the typedef.  Ignoring "structs", they are
135 		 * tricky, since you can find:
136 		 *
137 		 *	"typedef long time_t;"
138 		 *	"typedef unsigned int u_int;"
139 		 *	"typedef unsigned int u_int [10];"
140 		 *
141 		 * If looking at a typedef, we save a copy of the last token
142 		 * found.  Then, when we find the ';' we take the current
143 		 * token if it starts with a valid token name, else we take
144 		 * the one we saved.  There's probably some reasonable
145 		 * alternative to this...
146 		 */
147 		case ';':
148 			if (t_def && level == t_level) {
149 				t_def = NO;
150 				getline();
151 				if (sp != tok)
152 					*sp = EOS;
153 				pfnote(tok,lineno);
154 				break;
155 			}
156 			goto storec;
157 
158 		/*
159 		 * store characters until one that can't be part of a token
160 		 * comes along; check the current token against certain
161 		 * reserved words.
162 		 */
163 		default:
164 storec:			if (!intoken(c)) {
165 				if (sp == tok)
166 					break;
167 				*sp = EOS;
168 				if (tflag) {
169 					/* no typedefs inside typedefs */
170 					if (!t_def && !bcmp(tok,"typedef",8)) {
171 						t_def = YES;
172 						t_level = level;
173 						break;
174 					}
175 					/* catch "typedef struct" */
176 					if ((!t_def || t_level < level)
177 					    && (!bcmp(tok,"struct",7)
178 					    || !bcmp(tok,"union",6)
179 					    || !bcmp(tok,"enum",5))) {
180 						/*
181 						 * get line immediately;
182 						 * may change before '{'
183 						 */
184 						getline();
185 						if (str_entry(c))
186 							++level;
187 						break;
188 					}
189 				}
190 				sp = tok;
191 			}
192 			else if (sp != tok || begtoken(c)) {
193 				*sp++ = c;
194 				token = YES;
195 			}
196 			continue;
197 		}
198 		sp = tok;
199 		token = NO;
200 	}
201 }
202 
203 /*
204  * func_entry --
205  *	handle a function reference
206  */
207 static
208 func_entry()
209 {
210 	register int	c;		/* current character */
211 
212 	/*
213 	 * we assume that the character after a function's right paren
214 	 * is a token character if it's a function and a non-token
215 	 * character if it's a declaration.  Comments don't count...
216 	 */
217 	(void)skip_key((int)')');
218 	for (;;) {
219 		while (GETC(!=,EOF) && iswhite(c))
220 			if (c == (int)'\n')
221 				SETLINE;
222 		if (intoken(c) || c == (int)'{')
223 			break;
224 		if (c == (int)'/' && GETC(==,'*'))
225 			skip_comment();
226 		else {				/* don't ever "read" '/' */
227 			(void)ungetc(c,inf);
228 			return(NO);
229 		}
230 	}
231 	if (c != (int)'{')
232 		(void)skip_key((int)'{');
233 	return(YES);
234 }
235 
236 /*
237  * hash_entry --
238  *	handle a line starting with a '#'
239  */
240 static void
241 hash_entry()
242 {
243 	extern int	dflag;		/* -d: non-macro defines */
244 	register int	c,		/* character read */
245 			curline;	/* line started on */
246 	register char	*sp;		/* buffer pointer */
247 	char	tok[MAXTOKEN];		/* storage buffer */
248 
249 	curline = lineno;
250 	for (sp = tok;;) {		/* get next token */
251 		if (GETC(==,EOF))
252 			return;
253 		if (iswhite(c))
254 			break;
255 		*sp++ = c;
256 	}
257 	*sp = EOS;
258 	if (bcmp(tok,"define",6))	/* only interested in #define's */
259 		goto skip;
260 	for (;;) {			/* this doesn't handle "#define \n" */
261 		if (GETC(==,EOF))
262 			return;
263 		if (!iswhite(c))
264 			break;
265 	}
266 	for (sp = tok;;) {		/* get next token */
267 		*sp++ = c;
268 		if (GETC(==,EOF))
269 			return;
270 		/*
271 		 * this is where it DOESN'T handle
272 		 * "#define \n"
273 		 */
274 		if (!intoken(c))
275 			break;
276 	}
277 	*sp = EOS;
278 	if (dflag || c == (int)'(') {	/* only want macros */
279 		getline();
280 		pfnote(tok,curline);
281 	}
282 skip:	if (c == (int)'\n') {		/* get rid of rest of define */
283 		SETLINE
284 		if (*(sp - 1) != '\\')
285 			return;
286 	}
287 	(void)skip_key((int)'\n');
288 }
289 
290 /*
291  * str_entry --
292  *	handle a struct, union or enum entry
293  */
294 static
295 str_entry(c)
296 	register int	c;		/* current character */
297 {
298 	register char	*sp;		/* buffer pointer */
299 	int	curline;		/* line started on */
300 	char	tok[BUFSIZ];		/* storage buffer */
301 
302 	curline = lineno;
303 	while (iswhite(c))
304 		if (GETC(==,EOF))
305 			return(NO);
306 	if (c == (int)'{')		/* it was "struct {" */
307 		return(YES);
308 	for (sp = tok;;) {		/* get next token */
309 		*sp++ = c;
310 		if (GETC(==,EOF))
311 			return(NO);
312 		if (!intoken(c))
313 			break;
314 	}
315 	switch ((char)c) {
316 		case '{':		/* it was "struct foo{" */
317 			--sp;
318 			break;
319 		case '\n':		/* it was "struct foo\n" */
320 			SETLINE;
321 			/*FALLTHROUGH*/
322 		default:		/* probably "struct foo " */
323 			while (GETC(!=,EOF))
324 				if (!iswhite(c))
325 					break;
326 			if (c != (int)'{') {
327 				(void)ungetc(c, inf);
328 				return(NO);
329 			}
330 	}
331 	*sp = EOS;
332 	pfnote(tok,curline);
333 	return(YES);
334 }
335 
336 /*
337  * skip_comment --
338  *	skip over comment
339  */
340 skip_comment()
341 {
342 	register int	c,		/* character read */
343 			star;		/* '*' flag */
344 
345 	for (star = 0;GETC(!=,EOF);)
346 		switch((char)c) {
347 			/* comments don't nest, nor can they be escaped. */
348 			case '*':
349 				star = YES;
350 				break;
351 			case '/':
352 				if (star)
353 					return;
354 				break;
355 			case '\n':
356 				SETLINE;
357 				/*FALLTHROUGH*/
358 			default:
359 				star = NO;
360 		}
361 }
362 
363 /*
364  * skip_key --
365  *	skip to next char "key"
366  */
367 skip_key(key)
368 	register int	key;
369 {
370 	register int	c,
371 			skip,
372 			retval;
373 
374 	for (skip = retval = NO;GETC(!=,EOF);)
375 		switch((char)c) {
376 		case '\\':		/* a backslash escapes anything */
377 			skip = !skip;	/* we toggle in case it's "\\" */
378 			break;
379 		case ';':		/* special case for yacc; if one */
380 		case '|':		/* of these chars occurs, we may */
381 			retval = YES;	/* have moved out of the rule */
382 			break;		/* not used by C */
383 		case '\n':
384 			SETLINE;
385 			/*FALLTHROUGH*/
386 		default:
387 			if (c == key && !skip)
388 				return(retval);
389 			skip = NO;
390 		}
391 	return(retval);
392 }
393