xref: /original-bsd/usr.bin/ctags/C.c (revision 75e0fc1d)
1 /*
2  * Copyright (c) 1987 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  */
17 
18 #ifndef lint
19 static char sccsid[] = "@(#)C.c	5.3 (Berkeley) 12/31/88";
20 #endif /* not lint */
21 
22 #include <stdio.h>
23 #include <ctags.h>
24 
25 /*
26  * c_entries --
27  *	read .c and .h files and call appropriate routines
28  */
29 c_entries()
30 {
31 	extern int	tflag;		/* -t: create tags for typedefs */
32 	register int	c,		/* current character */
33 			level;		/* brace level */
34 	register char	*sp;		/* buffer pointer */
35 	int	token,			/* if reading a token */
36 		t_def,			/* if reading a typedef */
37 		t_level;		/* typedef's brace level */
38 	char	tok[MAXTOKEN];		/* token buffer */
39 
40 	lineftell = ftell(inf);
41 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
42 	while (GETC(!=,EOF)) {
43 
44 	switch ((char)c) {
45 		/*
46 		 * Here's where it DOESN'T handle:
47 		 *	foo(a)
48 		 *	{
49 		 *	#ifdef notdef
50 		 *		}
51 		 *	#endif
52 		 *		if (a)
53 		 *			puts("hello, world");
54 		 *	}
55 		 */
56 		case '{':
57 			++level;
58 			goto endtok;
59 		case '}':
60 			/*
61 			 * if level goes below zero, try and fix
62 			 * it, even though we've already messed up
63 			 */
64 			if (--level < 0)
65 				level = 0;
66 			goto endtok;
67 
68 		case '\n':
69 			SETLINE;
70 			/*
71 			 * the above 3 cases are similar in that they
72 			 * are special characters that also end tokens.
73 			 */
74 endtok:			if (sp > tok) {
75 				*sp = EOS;
76 				token = YES;
77 				sp = tok;
78 			}
79 			else
80 				token = NO;
81 			continue;
82 
83 		/* we ignore quoted strings and comments in their entirety */
84 		case '"':
85 		case '\'':
86 			(void)skip_key(c);
87 			break;
88 
89 		/*
90 		 * comments can be fun; note the state is unchanged after
91 		 * return, in case we found:
92 		 *	"foo() XX comment XX { int bar; }"
93 		 */
94 		case '/':
95 			if (GETC(==,'*')) {
96 				skip_comment();
97 				continue;
98 			}
99 			(void)ungetc(c,inf);
100 			c = '/';
101 			goto storec;
102 
103 		/* hash marks flag #define's. */
104 		case '#':
105 			if (sp == tok) {
106 				hash_entry();
107 				break;
108 			}
109 			goto storec;
110 
111 		/*
112 	 	 * if we have a current token, parenthesis on
113 		 * level zero indicates a function.
114 		 */
115 		case '(':
116 			if (!level && token) {
117 				int	curline;
118 
119 				if (sp != tok)
120 					*sp = EOS;
121 				/*
122 				 * grab the line immediately, we may
123 				 * already be wrong, for example,
124 				 *	foo\n
125 				 *	(arg1,
126 				 */
127 				getline();
128 				curline = lineno;
129 				if (func_entry()) {
130 					++level;
131 					pfnote(tok,curline);
132 				}
133 				break;
134 			}
135 			goto storec;
136 
137 		/*
138 		 * semi-colons indicate the end of a typedef; if we find a
139 		 * typedef we search for the next semi-colon of the same
140 		 * level as the typedef.  Ignoring "structs", they are
141 		 * tricky, since you can find:
142 		 *
143 		 *	"typedef long time_t;"
144 		 *	"typedef unsigned int u_int;"
145 		 *	"typedef unsigned int u_int [10];"
146 		 *
147 		 * If looking at a typedef, we save a copy of the last token
148 		 * found.  Then, when we find the ';' we take the current
149 		 * token if it starts with a valid token name, else we take
150 		 * the one we saved.  There's probably some reasonable
151 		 * alternative to this...
152 		 */
153 		case ';':
154 			if (t_def && level == t_level) {
155 				t_def = NO;
156 				getline();
157 				if (sp != tok)
158 					*sp = EOS;
159 				pfnote(tok,lineno);
160 				break;
161 			}
162 			goto storec;
163 
164 		/*
165 		 * store characters until one that can't be part of a token
166 		 * comes along; check the current token against certain
167 		 * reserved words.
168 		 */
169 		default:
170 storec:			if (!intoken(c)) {
171 				if (sp == tok)
172 					break;
173 				*sp = EOS;
174 				if (tflag) {
175 					/* no typedefs inside typedefs */
176 					if (!t_def && !bcmp(tok,"typedef",8)) {
177 						t_def = YES;
178 						t_level = level;
179 						break;
180 					}
181 					/* catch "typedef struct" */
182 					if ((!t_def || t_level < level)
183 					    && (!bcmp(tok,"struct",7)
184 					    || !bcmp(tok,"union",6)
185 					    || !bcmp(tok,"enum",5))) {
186 						/*
187 						 * get line immediately;
188 						 * may change before '{'
189 						 */
190 						getline();
191 						if (str_entry(c))
192 							++level;
193 						break;
194 					}
195 				}
196 				sp = tok;
197 			}
198 			else if (sp != tok || begtoken(c)) {
199 				*sp++ = c;
200 				token = YES;
201 			}
202 			continue;
203 		}
204 		sp = tok;
205 		token = NO;
206 	}
207 }
208 
209 /*
210  * func_entry --
211  *	handle a function reference
212  */
213 static
214 func_entry()
215 {
216 	register int	c;		/* current character */
217 
218 	/*
219 	 * we assume that the character after a function's right paren
220 	 * is a token character if it's a function and a non-token
221 	 * character if it's a declaration.  Comments don't count...
222 	 */
223 	(void)skip_key((int)')');
224 	for (;;) {
225 		while (GETC(!=,EOF) && iswhite(c))
226 			if (c == (int)'\n')
227 				SETLINE;
228 		if (intoken(c) || c == (int)'{')
229 			break;
230 		if (c == (int)'/' && GETC(==,'*'))
231 			skip_comment();
232 		else {				/* don't ever "read" '/' */
233 			(void)ungetc(c,inf);
234 			return(NO);
235 		}
236 	}
237 	if (c != (int)'{')
238 		(void)skip_key((int)'{');
239 	return(YES);
240 }
241 
242 /*
243  * hash_entry --
244  *	handle a line starting with a '#'
245  */
246 static
247 hash_entry()
248 {
249 	extern int	dflag;		/* -d: non-macro defines */
250 	register int	c,		/* character read */
251 			curline;	/* line started on */
252 	register char	*sp;		/* buffer pointer */
253 	char	tok[MAXTOKEN];		/* storage buffer */
254 
255 	curline = lineno;
256 	for (sp = tok;;) {		/* get next token */
257 		if (GETC(==,EOF))
258 			return;
259 		if (iswhite(c))
260 			break;
261 		*sp++ = c;
262 	}
263 	*sp = EOS;
264 	if (bcmp(tok,"define",6))	/* only interested in #define's */
265 		goto skip;
266 	for (;;) {			/* this doesn't handle "#define \n" */
267 		if (GETC(==,EOF))
268 			return;
269 		if (!iswhite(c))
270 			break;
271 	}
272 	for (sp = tok;;) {		/* get next token */
273 		*sp++ = c;
274 		if (GETC(==,EOF))
275 			return;
276 		/*
277 		 * this is where it DOESN'T handle
278 		 * "#define \n"
279 		 */
280 		if (!intoken(c))
281 			break;
282 	}
283 	*sp = EOS;
284 	if (dflag || c == (int)'(') {	/* only want macros */
285 		getline();
286 		pfnote(tok,curline);
287 	}
288 skip:	if (c == (int)'\n') {		/* get rid of rest of define */
289 		SETLINE
290 		if (*(sp - 1) != '\\')
291 			return;
292 	}
293 	(void)skip_key((int)'\n');
294 }
295 
296 /*
297  * str_entry --
298  *	handle a struct, union or enum entry
299  */
300 static
301 str_entry(c)
302 	register int	c;		/* current character */
303 {
304 	register char	*sp;		/* buffer pointer */
305 	int	curline;		/* line started on */
306 	char	tok[BUFSIZ];		/* storage buffer */
307 
308 	curline = lineno;
309 	while (iswhite(c))
310 		if (GETC(==,EOF))
311 			return(NO);
312 	if (c == (int)'{')		/* it was "struct {" */
313 		return(YES);
314 	for (sp = tok;;) {		/* get next token */
315 		*sp++ = c;
316 		if (GETC(==,EOF))
317 			return(NO);
318 		if (!intoken(c))
319 			break;
320 	}
321 	switch ((char)c) {
322 		case '{':		/* it was "struct foo{" */
323 			--sp;
324 			break;
325 		case '\n':		/* it was "struct foo\n" */
326 			SETLINE;
327 			/*FALLTHROUGH*/
328 		default:		/* probably "struct foo " */
329 			while (GETC(!=,EOF))
330 				if (!iswhite(c))
331 					break;
332 			if (c != (int)'{') {
333 				(void)ungetc(c, inf);
334 				return(NO);
335 			}
336 	}
337 	*sp = EOS;
338 	pfnote(tok,curline);
339 	return(YES);
340 }
341 
342 /*
343  * skip_comment --
344  *	skip over comment
345  */
346 skip_comment()
347 {
348 	register int	c,		/* character read */
349 			star;		/* '*' flag */
350 
351 	for (star = 0;GETC(!=,EOF);)
352 		switch((char)c) {
353 			/* comments don't nest, nor can they be escaped. */
354 			case '*':
355 				star = YES;
356 				break;
357 			case '/':
358 				if (star)
359 					return;
360 				break;
361 			case '\n':
362 				SETLINE;
363 				/*FALLTHROUGH*/
364 			default:
365 				star = NO;
366 		}
367 }
368 
369 /*
370  * skip_key --
371  *	skip to next char "key"
372  */
373 skip_key(key)
374 	register int	key;
375 {
376 	register int	c,
377 			skip,
378 			retval;
379 
380 	for (skip = retval = NO;GETC(!=,EOF);)
381 		switch((char)c) {
382 		case '\\':		/* a backslash escapes anything */
383 			skip = !skip;	/* we toggle in case it's "\\" */
384 			break;
385 		case ';':		/* special case for yacc; if one */
386 		case '|':		/* of these chars occurs, we may */
387 			retval = YES;	/* have moved out of the rule */
388 			break;		/* not used by C */
389 		case '\n':
390 			SETLINE;
391 			/*FALLTHROUGH*/
392 		default:
393 			if (c == key && !skip)
394 				return(retval);
395 			skip = NO;
396 		}
397 	return(retval);
398 }
399