xref: /dragonfly/usr.bin/ctags/C.c (revision a4da4a90)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * @(#)C.c	8.4 (Berkeley) 4/2/94
30  * $FreeBSD: head/usr.bin/ctags/C.c 299355 2016-05-10 11:11:23Z bapt $
31  */
32 
33 #include <limits.h>
34 #include <stdio.h>
35 #include <string.h>
36 
37 #include "ctags.h"
38 
39 static int	func_entry(void);
40 static void	hash_entry(void);
41 static void	skip_string(int);
42 static int	str_entry(int);
43 
44 /*
45  * c_entries --
46  *	read .c and .h files and call appropriate routines
47  */
48 void
49 c_entries(void)
50 {
51 	int	c;			/* current character */
52 	int	level;			/* brace level */
53 	int	token;			/* if reading a token */
54 	int	t_def;			/* if reading a typedef */
55 	int	t_level;		/* typedef's brace level */
56 	char	*sp;			/* buffer pointer */
57 	char	tok[MAXTOKEN];		/* token buffer */
58 
59 	lineftell = ftell(inf);
60 	sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
61 	while (GETC(!=, EOF)) {
62 		switch (c) {
63 		/*
64 		 * Here's where it DOESN'T handle: {
65 		 *	foo(a)
66 		 *	{
67 		 *	#ifdef notdef
68 		 *		}
69 		 *	#endif
70 		 *		if (a)
71 		 *			puts("hello, world");
72 		 *	}
73 		 */
74 		case '{':
75 			++level;
76 			goto endtok;
77 		case '}':
78 			/*
79 			 * if level goes below zero, try and fix
80 			 * it, even though we've already messed up
81 			 */
82 			if (--level < 0)
83 				level = 0;
84 			goto endtok;
85 
86 		case '\n':
87 			SETLINE;
88 			/*
89 			 * the above 3 cases are similar in that they
90 			 * are special characters that also end tokens.
91 			 */
92 	endtok:			if (sp > tok) {
93 				*sp = EOS;
94 				token = YES;
95 				sp = tok;
96 			}
97 			else
98 				token = NO;
99 			continue;
100 
101 		/*
102 		 * We ignore quoted strings and character constants
103 		 * completely.
104 		 */
105 		case '"':
106 		case '\'':
107 			skip_string(c);
108 			break;
109 
110 		/*
111 		 * comments can be fun; note the state is unchanged after
112 		 * return, in case we found:
113 		 *	"foo() XX comment XX { int bar; }"
114 		 */
115 		case '/':
116 			if (GETC(==, '*') || c == '/') {
117 				skip_comment(c);
118 				continue;
119 			}
120 			ungetc(c, inf);
121 			c = '/';
122 			goto storec;
123 
124 		/* hash marks flag #define's. */
125 		case '#':
126 			if (sp == tok) {
127 				hash_entry();
128 				break;
129 			}
130 			goto storec;
131 
132 		/*
133 		 * if we have a current token, parenthesis on
134 		 * level zero indicates a function.
135 		 */
136 		case '(':
137 			if (!level && token) {
138 				int	curline;
139 
140 				if (sp != tok)
141 					*sp = EOS;
142 				/*
143 				 * grab the line immediately, we may
144 				 * already be wrong, for example,
145 				 *	foo\n
146 				 *	(arg1,
147 				 */
148 				get_line();
149 				curline = lineno;
150 				if (func_entry()) {
151 					++level;
152 					pfnote(tok, curline);
153 				}
154 				break;
155 			}
156 			goto storec;
157 
158 		/*
159 		 * semi-colons indicate the end of a typedef; if we find a
160 		 * typedef we search for the next semi-colon of the same
161 		 * level as the typedef.  Ignoring "structs", they are
162 		 * tricky, since you can find:
163 		 *
164 		 *	"typedef long time_t;"
165 		 *	"typedef unsigned int u_int;"
166 		 *	"typedef unsigned int u_int [10];"
167 		 *
168 		 * If looking at a typedef, we save a copy of the last token
169 		 * found.  Then, when we find the ';' we take the current
170 		 * token if it starts with a valid token name, else we take
171 		 * the one we saved.  There's probably some reasonable
172 		 * alternative to this...
173 		 */
174 		case ';':
175 			if (t_def && level == t_level) {
176 				t_def = NO;
177 				get_line();
178 				if (sp != tok)
179 					*sp = EOS;
180 				pfnote(tok, lineno);
181 				break;
182 			}
183 			goto storec;
184 
185 		/*
186 		 * store characters until one that can't be part of a token
187 		 * comes along; check the current token against certain
188 		 * reserved words.
189 		 */
190 		default:
191 			/* ignore whitespace */
192 			if (c == ' ' || c == '\t') {
193 				int save = c;
194 				while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
195 					;
196 				if (c == EOF)
197 					return;
198 				ungetc(c, inf);
199 				c = save;
200 			}
201 	storec:		if (!intoken(c)) {
202 				if (sp == tok)
203 					break;
204 				*sp = EOS;
205 				if (tflag) {
206 					/* no typedefs inside typedefs */
207 					if (!t_def &&
208 						   !memcmp(tok, "typedef",8)) {
209 						t_def = YES;
210 						t_level = level;
211 						break;
212 					}
213 					/* catch "typedef struct" */
214 					if ((!t_def || t_level < level)
215 					    && (!memcmp(tok, "struct", 7)
216 					    || !memcmp(tok, "union", 6)
217 					    || !memcmp(tok, "enum", 5))) {
218 						/*
219 						 * get line immediately;
220 						 * may change before '{'
221 						 */
222 						get_line();
223 						if (str_entry(c))
224 							++level;
225 						break;
226 						/* } */
227 					}
228 				}
229 				sp = tok;
230 			}
231 			else if (sp != tok || begtoken(c)) {
232 				if (sp == tok + sizeof tok - 1)
233 					/* Too long -- truncate it */
234 					*sp = EOS;
235 				else
236 					*sp++ = c;
237 				token = YES;
238 			}
239 			continue;
240 		}
241 
242 		sp = tok;
243 		token = NO;
244 	}
245 }
246 
247 /*
248  * func_entry --
249  *	handle a function reference
250  */
251 static int
252 func_entry(void)
253 {
254 	int	c;			/* current character */
255 	int	level = 0;		/* for matching '()' */
256 
257 	/*
258 	 * Find the end of the assumed function declaration.
259 	 * Note that ANSI C functions can have type definitions so keep
260 	 * track of the parentheses nesting level.
261 	 */
262 	while (GETC(!=, EOF)) {
263 		switch (c) {
264 		case '\'':
265 		case '"':
266 			/* skip strings and character constants */
267 			skip_string(c);
268 			break;
269 		case '/':
270 			/* skip comments */
271 			if (GETC(==, '*') || c == '/')
272 				skip_comment(c);
273 			break;
274 		case '(':
275 			level++;
276 			break;
277 		case ')':
278 			if (level == 0)
279 				goto fnd;
280 			level--;
281 			break;
282 		case '\n':
283 			SETLINE;
284 		}
285 	}
286 	return (NO);
287 fnd:
288 	/*
289 	 * we assume that the character after a function's right paren
290 	 * is a token character if it's a function and a non-token
291 	 * character if it's a declaration.  Comments don't count...
292 	 */
293 	for (;;) {
294 		while (GETC(!=, EOF) && iswhite(c))
295 			if (c == '\n')
296 				SETLINE;
297 		if (intoken(c) || c == '{')
298 			break;
299 		if (c == '/' && (GETC(==, '*') || c == '/'))
300 			skip_comment(c);
301 		else {				/* don't ever "read" '/' */
302 			ungetc(c, inf);
303 			return (NO);
304 		}
305 	}
306 	if (c != '{')
307 		skip_key('{');
308 	return (YES);
309 }
310 
311 /*
312  * hash_entry --
313  *	handle a line starting with a '#'
314  */
315 static void
316 hash_entry(void)
317 {
318 	int	c;			/* character read */
319 	int	curline;		/* line started on */
320 	char	*sp;			/* buffer pointer */
321 	char	tok[MAXTOKEN];		/* storage buffer */
322 
323 	/* ignore leading whitespace */
324 	while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
325 		;
326 	ungetc(c, inf);
327 
328 	curline = lineno;
329 	for (sp = tok;;) {		/* get next token */
330 		if (GETC(==, EOF))
331 			return;
332 		if (iswhite(c))
333 			break;
334 		if (sp == tok + sizeof tok - 1)
335 			/* Too long -- truncate it */
336 			*sp = EOS;
337 		else
338 			*sp++ = c;
339 	}
340 	*sp = EOS;
341 	if (memcmp(tok, "define", 6))	/* only interested in #define's */
342 		goto skip;
343 	for (;;) {			/* this doesn't handle "#define \n" */
344 		if (GETC(==, EOF))
345 			return;
346 		if (!iswhite(c))
347 			break;
348 	}
349 	for (sp = tok;;) {		/* get next token */
350 		if (sp == tok + sizeof tok - 1)
351 			/* Too long -- truncate it */
352 			*sp = EOS;
353 		else
354 			*sp++ = c;
355 		if (GETC(==, EOF))
356 			return;
357 		/*
358 		 * this is where it DOESN'T handle
359 		 * "#define \n"
360 		 */
361 		if (!intoken(c))
362 			break;
363 	}
364 	*sp = EOS;
365 	if (dflag || c == '(') {	/* only want macros */
366 		get_line();
367 		pfnote(tok, curline);
368 	}
369 skip:	if (c == '\n') {		/* get rid of rest of define */
370 		SETLINE
371 		if (*(sp - 1) != '\\')
372 			return;
373 	}
374 	skip_key('\n');
375 }
376 
377 /*
378  * str_entry --
379  *	handle a struct, union or enum entry
380  */
381 static int
382 str_entry(int c) /* c is current character */
383 {
384 	int	curline;		/* line started on */
385 	char	*sp;			/* buffer pointer */
386 	char	tok[LINE_MAX];		/* storage buffer */
387 
388 	curline = lineno;
389 	while (iswhite(c))
390 		if (GETC(==, EOF))
391 			return (NO);
392 	if (c == '{')		/* it was "struct {" */
393 		return (YES);
394 	for (sp = tok;;) {		/* get next token */
395 		if (sp == tok + sizeof tok - 1)
396 			/* Too long -- truncate it */
397 			*sp = EOS;
398 		else
399 			*sp++ = c;
400 		if (GETC(==, EOF))
401 			return (NO);
402 		if (!intoken(c))
403 			break;
404 	}
405 	switch (c) {
406 		case '{':		/* it was "struct foo{" */
407 			--sp;
408 			break;
409 		case '\n':		/* it was "struct foo\n" */
410 			SETLINE;
411 			/*FALLTHROUGH*/
412 		default:		/* probably "struct foo " */
413 			while (GETC(!=, EOF))
414 				if (!iswhite(c))
415 					break;
416 			if (c != '{') {
417 				ungetc(c, inf);
418 				return (NO);
419 			}
420 	}
421 	*sp = EOS;
422 	pfnote(tok, curline);
423 	return (YES);
424 }
425 
426 /*
427  * skip_comment --
428  *	skip over comment
429  */
430 void
431 skip_comment(int t) /* t is comment character */
432 {
433 	int	c;			/* character read */
434 	int	star;			/* '*' flag */
435 
436 	for (star = 0; GETC(!=, EOF);)
437 		switch(c) {
438 		/* comments don't nest, nor can they be escaped. */
439 		case '*':
440 			star = YES;
441 			break;
442 		case '/':
443 			if (star && t == '*')
444 				return;
445 			break;
446 		case '\n':
447 			if (t == '/')
448 				return;
449 			SETLINE;
450 			/*FALLTHROUGH*/
451 		default:
452 			star = NO;
453 			break;
454 		}
455 }
456 
457 /*
458  * skip_string --
459  *	skip to the end of a string or character constant.
460  */
461 void
462 skip_string(int key)
463 {
464 	int	c,
465 		skip;
466 
467 	for (skip = NO; GETC(!=, EOF); )
468 		switch (c) {
469 		case '\\':		/* a backslash escapes anything */
470 			skip = !skip;	/* we toggle in case it's "\\" */
471 			break;
472 		case '\n':
473 			SETLINE;
474 			/*FALLTHROUGH*/
475 		default:
476 			if (c == key && !skip)
477 				return;
478 			skip = NO;
479 		}
480 }
481 
482 /*
483  * skip_key --
484  *	skip to next char "key"
485  */
486 int
487 skip_key(int key)
488 {
489 	int	c,
490 		skip,
491 		retval;
492 
493 	for (skip = retval = NO; GETC(!=, EOF);)
494 		switch(c) {
495 		case '\\':		/* a backslash escapes anything */
496 			skip = !skip;	/* we toggle in case it's "\\" */
497 			break;
498 		case ';':		/* special case for yacc; if one */
499 		case '|':		/* of these chars occurs, we may */
500 			retval = YES;	/* have moved out of the rule */
501 			break;		/* not used by C */
502 		case '\'':
503 		case '"':
504 			/* skip strings and character constants */
505 			skip_string(c);
506 			break;
507 		case '/':
508 			/* skip comments */
509 			if (GETC(==, '*') || c == '/') {
510 				skip_comment(c);
511 				break;
512 			}
513 			ungetc(c, inf);
514 			c = '/';
515 			goto norm;
516 		case '\n':
517 			SETLINE;
518 			/*FALLTHROUGH*/
519 		default:
520 		norm:
521 			if (c == key && !skip)
522 				return (retval);
523 			skip = NO;
524 		}
525 	return (retval);
526 }
527