xref: /386bsd/usr/src/usr.bin/ctags/C.c (revision a2142627)
1 /*
2  * Copyright (c) 1987 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static char sccsid[] = "@(#)C.c	5.5 (Berkeley) 2/26/91";
36 #endif /* not lint */
37 
38 #include <stdio.h>
39 #include <string.h>
40 #include "ctags.h"
41 
42 static int func_entry(), str_entry();
43 static void hash_entry();
44 
45 /*
46  * c_entries --
47  *	read .c and .h files and call appropriate routines
48  */
c_entries()49 c_entries()
50 {
51 	extern int	tflag;		/* -t: create tags for typedefs */
52 	register int	c,		/* current character */
53 			level;		/* brace level */
54 	register char	*sp;		/* buffer pointer */
55 	int	token,			/* if reading a token */
56 		t_def,			/* if reading a typedef */
57 		t_level;		/* typedef's brace level */
58 	char	tok[MAXTOKEN];		/* token buffer */
59 
60 	lineftell = ftell(inf);
61 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
62 	while (GETC(!=,EOF)) {
63 
64 	switch ((char)c) {
65 		/*
66 		 * Here's where it DOESN'T handle:
67 		 *	foo(a)
68 		 *	{
69 		 *	#ifdef notdef
70 		 *		}
71 		 *	#endif
72 		 *		if (a)
73 		 *			puts("hello, world");
74 		 *	}
75 		 */
76 		case '{':
77 			++level;
78 			goto endtok;
79 		case '}':
80 			/*
81 			 * if level goes below zero, try and fix
82 			 * it, even though we've already messed up
83 			 */
84 			if (--level < 0)
85 				level = 0;
86 			goto endtok;
87 
88 		case '\n':
89 			SETLINE;
90 			/*
91 			 * the above 3 cases are similar in that they
92 			 * are special characters that also end tokens.
93 			 */
94 endtok:			if (sp > tok) {
95 				*sp = EOS;
96 				token = YES;
97 				sp = tok;
98 			}
99 			else
100 				token = NO;
101 			continue;
102 
103 		/* we ignore quoted strings and comments in their entirety */
104 		case '"':
105 		case '\'':
106 			(void)skip_key(c);
107 			break;
108 
109 		/*
110 		 * comments can be fun; note the state is unchanged after
111 		 * return, in case we found:
112 		 *	"foo() XX comment XX { int bar; }"
113 		 */
114 		case '/':
115 			if (GETC(==,'*')) {
116 				skip_comment();
117 				continue;
118 			}
119 			(void)ungetc(c,inf);
120 			c = '/';
121 			goto storec;
122 
123 		/* hash marks flag #define's. */
124 		case '#':
125 			if (sp == tok) {
126 				hash_entry();
127 				break;
128 			}
129 			goto storec;
130 
131 		/*
132 	 	 * if we have a current token, parenthesis on
133 		 * level zero indicates a function.
134 		 */
135 		case '(':
136 			if (!level && token) {
137 				int	curline;
138 
139 				if (sp != tok)
140 					*sp = EOS;
141 				/*
142 				 * grab the line immediately, we may
143 				 * already be wrong, for example,
144 				 *	foo\n
145 				 *	(arg1,
146 				 */
147 				getline();
148 				curline = lineno;
149 				if (func_entry()) {
150 					++level;
151 					pfnote(tok,curline);
152 				}
153 				break;
154 			}
155 			goto storec;
156 
157 		/*
158 		 * semi-colons indicate the end of a typedef; if we find a
159 		 * typedef we search for the next semi-colon of the same
160 		 * level as the typedef.  Ignoring "structs", they are
161 		 * tricky, since you can find:
162 		 *
163 		 *	"typedef long time_t;"
164 		 *	"typedef unsigned int u_int;"
165 		 *	"typedef unsigned int u_int [10];"
166 		 *
167 		 * If looking at a typedef, we save a copy of the last token
168 		 * found.  Then, when we find the ';' we take the current
169 		 * token if it starts with a valid token name, else we take
170 		 * the one we saved.  There's probably some reasonable
171 		 * alternative to this...
172 		 */
173 		case ';':
174 			if (t_def && level == t_level) {
175 				t_def = NO;
176 				getline();
177 				if (sp != tok)
178 					*sp = EOS;
179 				pfnote(tok,lineno);
180 				break;
181 			}
182 			goto storec;
183 
184 		/*
185 		 * store characters until one that can't be part of a token
186 		 * comes along; check the current token against certain
187 		 * reserved words.
188 		 */
189 		default:
190 storec:			if (!intoken(c)) {
191 				if (sp == tok)
192 					break;
193 				*sp = EOS;
194 				if (tflag) {
195 					/* no typedefs inside typedefs */
196 					if (!t_def && !bcmp(tok,"typedef",8)) {
197 						t_def = YES;
198 						t_level = level;
199 						break;
200 					}
201 					/* catch "typedef struct" */
202 					if ((!t_def || t_level < level)
203 					    && (!bcmp(tok,"struct",7)
204 					    || !bcmp(tok,"union",6)
205 					    || !bcmp(tok,"enum",5))) {
206 						/*
207 						 * get line immediately;
208 						 * may change before '{'
209 						 */
210 						getline();
211 						if (str_entry(c))
212 							++level;
213 						break;
214 					}
215 				}
216 				sp = tok;
217 			}
218 			else if (sp != tok || begtoken(c)) {
219 				*sp++ = c;
220 				token = YES;
221 			}
222 			continue;
223 		}
224 		sp = tok;
225 		token = NO;
226 	}
227 }
228 
229 /*
230  * func_entry --
231  *	handle a function reference
232  */
233 static
func_entry()234 func_entry()
235 {
236 	register int	c;		/* current character */
237 
238 	/*
239 	 * we assume that the character after a function's right paren
240 	 * is a token character if it's a function and a non-token
241 	 * character if it's a declaration.  Comments don't count...
242 	 */
243 	(void)skip_key((int)')');
244 	for (;;) {
245 		while (GETC(!=,EOF) && iswhite(c))
246 			if (c == (int)'\n')
247 				SETLINE;
248 		if (intoken(c) || c == (int)'{')
249 			break;
250 		if (c == (int)'/' && GETC(==,'*'))
251 			skip_comment();
252 		else {				/* don't ever "read" '/' */
253 			(void)ungetc(c,inf);
254 			return(NO);
255 		}
256 	}
257 	if (c != (int)'{')
258 		(void)skip_key((int)'{');
259 	return(YES);
260 }
261 
262 /*
263  * hash_entry --
264  *	handle a line starting with a '#'
265  */
266 static void
hash_entry()267 hash_entry()
268 {
269 	extern int	dflag;		/* -d: non-macro defines */
270 	register int	c,		/* character read */
271 			curline;	/* line started on */
272 	register char	*sp;		/* buffer pointer */
273 	char	tok[MAXTOKEN];		/* storage buffer */
274 
275 	curline = lineno;
276 	for (sp = tok;;) {		/* get next token */
277 		if (GETC(==,EOF))
278 			return;
279 		if (iswhite(c))
280 			break;
281 		*sp++ = c;
282 	}
283 	*sp = EOS;
284 	if (bcmp(tok,"define",6))	/* only interested in #define's */
285 		goto skip;
286 	for (;;) {			/* this doesn't handle "#define \n" */
287 		if (GETC(==,EOF))
288 			return;
289 		if (!iswhite(c))
290 			break;
291 	}
292 	for (sp = tok;;) {		/* get next token */
293 		*sp++ = c;
294 		if (GETC(==,EOF))
295 			return;
296 		/*
297 		 * this is where it DOESN'T handle
298 		 * "#define \n"
299 		 */
300 		if (!intoken(c))
301 			break;
302 	}
303 	*sp = EOS;
304 	if (dflag || c == (int)'(') {	/* only want macros */
305 		getline();
306 		pfnote(tok,curline);
307 	}
308 skip:	if (c == (int)'\n') {		/* get rid of rest of define */
309 		SETLINE
310 		if (*(sp - 1) != '\\')
311 			return;
312 	}
313 	(void)skip_key((int)'\n');
314 }
315 
316 /*
317  * str_entry --
318  *	handle a struct, union or enum entry
319  */
320 static
str_entry(c)321 str_entry(c)
322 	register int	c;		/* current character */
323 {
324 	register char	*sp;		/* buffer pointer */
325 	int	curline;		/* line started on */
326 	char	tok[BUFSIZ];		/* storage buffer */
327 
328 	curline = lineno;
329 	while (iswhite(c))
330 		if (GETC(==,EOF))
331 			return(NO);
332 	if (c == (int)'{')		/* it was "struct {" */
333 		return(YES);
334 	for (sp = tok;;) {		/* get next token */
335 		*sp++ = c;
336 		if (GETC(==,EOF))
337 			return(NO);
338 		if (!intoken(c))
339 			break;
340 	}
341 	switch ((char)c) {
342 		case '{':		/* it was "struct foo{" */
343 			--sp;
344 			break;
345 		case '\n':		/* it was "struct foo\n" */
346 			SETLINE;
347 			/*FALLTHROUGH*/
348 		default:		/* probably "struct foo " */
349 			while (GETC(!=,EOF))
350 				if (!iswhite(c))
351 					break;
352 			if (c != (int)'{') {
353 				(void)ungetc(c, inf);
354 				return(NO);
355 			}
356 	}
357 	*sp = EOS;
358 	pfnote(tok,curline);
359 	return(YES);
360 }
361 
362 /*
363  * skip_comment --
364  *	skip over comment
365  */
skip_comment()366 skip_comment()
367 {
368 	register int	c,		/* character read */
369 			star;		/* '*' flag */
370 
371 	for (star = 0;GETC(!=,EOF);)
372 		switch((char)c) {
373 			/* comments don't nest, nor can they be escaped. */
374 			case '*':
375 				star = YES;
376 				break;
377 			case '/':
378 				if (star)
379 					return;
380 				break;
381 			case '\n':
382 				SETLINE;
383 				/*FALLTHROUGH*/
384 			default:
385 				star = NO;
386 		}
387 }
388 
389 /*
390  * skip_key --
391  *	skip to next char "key"
392  */
skip_key(key)393 skip_key(key)
394 	register int	key;
395 {
396 	register int	c,
397 			skip,
398 			retval;
399 
400 	for (skip = retval = NO;GETC(!=,EOF);)
401 		switch((char)c) {
402 		case '\\':		/* a backslash escapes anything */
403 			skip = !skip;	/* we toggle in case it's "\\" */
404 			break;
405 		case ';':		/* special case for yacc; if one */
406 		case '|':		/* of these chars occurs, we may */
407 			retval = YES;	/* have moved out of the rule */
408 			break;		/* not used by C */
409 		case '\n':
410 			SETLINE;
411 			/*FALLTHROUGH*/
412 		default:
413 			if (c == key && !skip)
414 				return(retval);
415 			skip = NO;
416 		}
417 	return(retval);
418 }
419