1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1987, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #include <limits.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <string.h>
37
38 #include "ctags.h"
39
40 static bool func_entry(void);
41 static void hash_entry(void);
42 static void skip_string(int);
43 static bool str_entry(int);
44
45 /*
46 * c_entries --
47 * read .c and .h files and call appropriate routines
48 */
49 void
c_entries(void)50 c_entries(void)
51 {
52 int c; /* current character */
53 int level; /* brace level */
54 int token; /* if reading a token */
55 bool t_def; /* if reading a typedef */
56 int t_level; /* typedef's brace level */
57 char *sp; /* buffer pointer */
58 char tok[MAXTOKEN]; /* token buffer */
59
60 lineftell = ftell(inf);
61 sp = tok; token = t_def = false; t_level = -1; level = 0; lineno = 1;
62 while (GETC(!=, EOF)) {
63 switch (c) {
64 /*
65 * Here's where it DOESN'T handle: {
66 * foo(a)
67 * {
68 * #ifdef notdef
69 * }
70 * #endif
71 * if (a)
72 * puts("hello, world");
73 * }
74 */
75 case '{':
76 ++level;
77 goto endtok;
78 case '}':
79 /*
80 * if level goes below zero, try and fix
81 * it, even though we've already messed up
82 */
83 if (--level < 0)
84 level = 0;
85 goto endtok;
86
87 case '\n':
88 SETLINE;
89 /*
90 * the above 3 cases are similar in that they
91 * are special characters that also end tokens.
92 */
93 endtok: if (sp > tok) {
94 *sp = EOS;
95 token = true;
96 sp = tok;
97 }
98 else
99 token = false;
100 continue;
101
102 /*
103 * We ignore quoted strings and character constants
104 * completely.
105 */
106 case '"':
107 case '\'':
108 skip_string(c);
109 break;
110
111 /*
112 * comments can be fun; note the state is unchanged after
113 * return, in case we found:
114 * "foo() XX comment XX { int bar; }"
115 */
116 case '/':
117 if (GETC(==, '*') || c == '/') {
118 skip_comment(c);
119 continue;
120 }
121 (void)ungetc(c, inf);
122 c = '/';
123 goto storec;
124
125 /* hash marks flag #define's. */
126 case '#':
127 if (sp == tok) {
128 hash_entry();
129 break;
130 }
131 goto storec;
132
133 /*
134 * if we have a current token, parenthesis on
135 * level zero indicates a function.
136 */
137 case '(':
138 if (!level && token) {
139 int curline;
140
141 if (sp != tok)
142 *sp = EOS;
143 /*
144 * grab the line immediately, we may
145 * already be wrong, for example,
146 * foo\n
147 * (arg1,
148 */
149 get_line();
150 curline = lineno;
151 if (func_entry()) {
152 ++level;
153 pfnote(tok, curline);
154 }
155 break;
156 }
157 goto storec;
158
159 /*
160 * semi-colons indicate the end of a typedef; if we find a
161 * typedef we search for the next semi-colon of the same
162 * level as the typedef. Ignoring "structs", they are
163 * tricky, since you can find:
164 *
165 * "typedef long time_t;"
166 * "typedef unsigned int u_int;"
167 * "typedef unsigned int u_int [10];"
168 *
169 * If looking at a typedef, we save a copy of the last token
170 * found. Then, when we find the ';' we take the current
171 * token if it starts with a valid token name, else we take
172 * the one we saved. There's probably some reasonable
173 * alternative to this...
174 */
175 case ';':
176 if (t_def && level == t_level) {
177 t_def = false;
178 get_line();
179 if (sp != tok)
180 *sp = EOS;
181 pfnote(tok, lineno);
182 break;
183 }
184 goto storec;
185
186 /*
187 * store characters until one that can't be part of a token
188 * comes along; check the current token against certain
189 * reserved words.
190 */
191 default:
192 /* ignore whitespace */
193 if (c == ' ' || c == '\t') {
194 int save = c;
195 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
196 ;
197 if (c == EOF)
198 return;
199 (void)ungetc(c, inf);
200 c = save;
201 }
202 storec: if (!intoken(c)) {
203 if (sp == tok)
204 break;
205 *sp = EOS;
206 if (tflag) {
207 /* no typedefs inside typedefs */
208 if (!t_def &&
209 !memcmp(tok, "typedef",8)) {
210 t_def = true;
211 t_level = level;
212 break;
213 }
214 /* catch "typedef struct" */
215 if ((!t_def || t_level < level)
216 && (!memcmp(tok, "struct", 7)
217 || !memcmp(tok, "union", 6)
218 || !memcmp(tok, "enum", 5))) {
219 /*
220 * get line immediately;
221 * may change before '{'
222 */
223 get_line();
224 if (str_entry(c))
225 ++level;
226 break;
227 /* } */
228 }
229 }
230 sp = tok;
231 }
232 else if (sp != tok || begtoken(c)) {
233 if (sp == tok + sizeof tok - 1)
234 /* Too long -- truncate it */
235 *sp = EOS;
236 else
237 *sp++ = c;
238 token = true;
239 }
240 continue;
241 }
242
243 sp = tok;
244 token = false;
245 }
246 }
247
248 /*
249 * func_entry --
250 * handle a function reference
251 */
252 static bool
func_entry(void)253 func_entry(void)
254 {
255 int c; /* current character */
256 int level = 0; /* for matching '()' */
257 static char attribute[] = "__attribute__";
258 char maybe_attribute[sizeof attribute + 1],
259 *anext;
260
261 /*
262 * Find the end of the assumed function declaration.
263 * Note that ANSI C functions can have type definitions so keep
264 * track of the parentheses nesting level.
265 */
266 while (GETC(!=, EOF)) {
267 switch (c) {
268 case '\'':
269 case '"':
270 /* skip strings and character constants */
271 skip_string(c);
272 break;
273 case '/':
274 /* skip comments */
275 if (GETC(==, '*') || c == '/')
276 skip_comment(c);
277 break;
278 case '(':
279 level++;
280 break;
281 case ')':
282 if (level == 0)
283 goto fnd;
284 level--;
285 break;
286 case '\n':
287 SETLINE;
288 }
289 }
290 return (false);
291 fnd:
292 /*
293 * we assume that the character after a function's right paren
294 * is a token character if it's a function and a non-token
295 * character if it's a declaration. Comments don't count...
296 */
297 for (anext = maybe_attribute;;) {
298 while (GETC(!=, EOF) && iswhite(c))
299 if (c == '\n')
300 SETLINE;
301 if (c == EOF)
302 return false;
303 /*
304 * Recognize the gnu __attribute__ extension, which would
305 * otherwise make the heuristic test DTWT
306 */
307 if (anext == maybe_attribute) {
308 if (intoken(c)) {
309 *anext++ = c;
310 continue;
311 }
312 } else {
313 if (intoken(c)) {
314 if (anext - maybe_attribute
315 < (ptrdiff_t)(sizeof attribute - 1))
316 *anext++ = c;
317 else break;
318 continue;
319 } else {
320 *anext++ = '\0';
321 if (strcmp(maybe_attribute, attribute) == 0) {
322 (void)ungetc(c, inf);
323 return false;
324 }
325 break;
326 }
327 }
328 if (intoken(c) || c == '{')
329 break;
330 if (c == '/' && (GETC(==, '*') || c == '/'))
331 skip_comment(c);
332 else { /* don't ever "read" '/' */
333 (void)ungetc(c, inf);
334 return (false);
335 }
336 }
337 if (c != '{')
338 (void)skip_key('{');
339 return (true);
340 }
341
342 /*
343 * hash_entry --
344 * handle a line starting with a '#'
345 */
346 static void
hash_entry(void)347 hash_entry(void)
348 {
349 int c; /* character read */
350 int curline; /* line started on */
351 char *sp; /* buffer pointer */
352 char tok[MAXTOKEN]; /* storage buffer */
353
354 /* ignore leading whitespace */
355 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
356 ;
357 (void)ungetc(c, inf);
358
359 curline = lineno;
360 for (sp = tok;;) { /* get next token */
361 if (GETC(==, EOF))
362 return;
363 if (iswhite(c))
364 break;
365 if (sp == tok + sizeof tok - 1)
366 /* Too long -- truncate it */
367 *sp = EOS;
368 else
369 *sp++ = c;
370 }
371 *sp = EOS;
372 if (memcmp(tok, "define", 6)) /* only interested in #define's */
373 goto skip;
374 for (;;) { /* this doesn't handle "#define \n" */
375 if (GETC(==, EOF))
376 return;
377 if (!iswhite(c))
378 break;
379 }
380 for (sp = tok;;) { /* get next token */
381 if (sp == tok + sizeof tok - 1)
382 /* Too long -- truncate it */
383 *sp = EOS;
384 else
385 *sp++ = c;
386 if (GETC(==, EOF))
387 return;
388 /*
389 * this is where it DOESN'T handle
390 * "#define \n"
391 */
392 if (!intoken(c))
393 break;
394 }
395 *sp = EOS;
396 if (dflag || c == '(') { /* only want macros */
397 get_line();
398 pfnote(tok, curline);
399 }
400 skip: if (c == '\n') { /* get rid of rest of define */
401 SETLINE
402 if (*(sp - 1) != '\\')
403 return;
404 }
405 (void)skip_key('\n');
406 }
407
408 /*
409 * str_entry --
410 * handle a struct, union or enum entry
411 */
412 static bool
str_entry(int c)413 str_entry(int c) /* c is current character */
414 {
415 int curline; /* line started on */
416 char *sp; /* buffer pointer */
417 char tok[LINE_MAX]; /* storage buffer */
418
419 curline = lineno;
420 while (iswhite(c))
421 if (GETC(==, EOF))
422 return (false);
423 if (c == '{') /* it was "struct {" */
424 return (true);
425 for (sp = tok;;) { /* get next token */
426 if (sp == tok + sizeof tok - 1)
427 /* Too long -- truncate it */
428 *sp = EOS;
429 else
430 *sp++ = c;
431 if (GETC(==, EOF))
432 return (false);
433 if (!intoken(c))
434 break;
435 }
436 switch (c) {
437 case '{': /* it was "struct foo{" */
438 --sp;
439 break;
440 case '\n': /* it was "struct foo\n" */
441 SETLINE;
442 /*FALLTHROUGH*/
443 default: /* probably "struct foo " */
444 while (GETC(!=, EOF))
445 if (!iswhite(c))
446 break;
447 if (c != '{') {
448 (void)ungetc(c, inf);
449 return (false);
450 }
451 }
452 *sp = EOS;
453 pfnote(tok, curline);
454 return (true);
455 }
456
457 /*
458 * skip_comment --
459 * skip over comment
460 */
461 void
skip_comment(int t)462 skip_comment(int t) /* t is comment character */
463 {
464 int c; /* character read */
465 int star; /* '*' flag */
466
467 for (star = 0; GETC(!=, EOF);)
468 switch(c) {
469 /* comments don't nest, nor can they be escaped. */
470 case '*':
471 star = true;
472 break;
473 case '/':
474 if (star && t == '*')
475 return;
476 break;
477 case '\n':
478 SETLINE;
479 if (t == '/')
480 return;
481 /*FALLTHROUGH*/
482 default:
483 star = false;
484 break;
485 }
486 }
487
488 /*
489 * skip_string --
490 * skip to the end of a string or character constant.
491 */
492 void
skip_string(int key)493 skip_string(int key)
494 {
495 int c,
496 skip;
497
498 for (skip = false; GETC(!=, EOF); )
499 switch (c) {
500 case '\\': /* a backslash escapes anything */
501 skip = !skip; /* we toggle in case it's "\\" */
502 break;
503 case '\n':
504 SETLINE;
505 /*FALLTHROUGH*/
506 default:
507 if (c == key && !skip)
508 return;
509 skip = false;
510 }
511 }
512
513 /*
514 * skip_key --
515 * skip to next char "key"
516 */
517 bool
skip_key(int key)518 skip_key(int key)
519 {
520 int c;
521 bool skip;
522 bool retval;
523
524 for (skip = retval = false; GETC(!=, EOF);)
525 switch(c) {
526 case '\\': /* a backslash escapes anything */
527 skip = !skip; /* we toggle in case it's "\\" */
528 break;
529 case ';': /* special case for yacc; if one */
530 case '|': /* of these chars occurs, we may */
531 retval = true; /* have moved out of the rule */
532 break; /* not used by C */
533 case '\'':
534 case '"':
535 /* skip strings and character constants */
536 skip_string(c);
537 break;
538 case '/':
539 /* skip comments */
540 if (GETC(==, '*') || c == '/') {
541 skip_comment(c);
542 break;
543 }
544 (void)ungetc(c, inf);
545 c = '/';
546 goto norm;
547 case '\n':
548 SETLINE;
549 /*FALLTHROUGH*/
550 default:
551 norm:
552 if (c == key && !skip)
553 return (retval);
554 skip = false;
555 }
556 return (retval);
557 }
558