1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 35 #endif 36 #endif 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <limits.h> 42 #include <stdio.h> 43 #include <string.h> 44 45 #include "ctags.h" 46 47 static int func_entry(void); 48 static void hash_entry(void); 49 static void skip_string(int); 50 static int str_entry(int); 51 52 /* 53 * c_entries -- 54 * read .c and .h files and call appropriate routines 55 */ 56 void 57 c_entries(void) 58 { 59 int c; /* current character */ 60 int level; /* brace level */ 61 int token; /* if reading a token */ 62 int t_def; /* if reading a typedef */ 63 int t_level; /* typedef's brace level */ 64 char *sp; /* buffer pointer */ 65 char tok[MAXTOKEN]; /* token buffer */ 66 67 lineftell = ftell(inf); 68 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 69 while (GETC(!=, EOF)) { 70 switch (c) { 71 /* 72 * Here's where it DOESN'T handle: { 73 * foo(a) 74 * { 75 * #ifdef notdef 76 * } 77 * #endif 78 * if (a) 79 * puts("hello, world"); 80 * } 81 */ 82 case '{': 83 ++level; 84 goto endtok; 85 case '}': 86 /* 87 * if level goes below zero, try and fix 88 * it, even though we've already messed up 89 */ 90 if (--level < 0) 91 level = 0; 92 goto endtok; 93 94 case '\n': 95 SETLINE; 96 /* 97 * the above 3 cases are similar in that they 98 * are special characters that also end tokens. 99 */ 100 endtok: if (sp > tok) { 101 *sp = EOS; 102 token = YES; 103 sp = tok; 104 } 105 else 106 token = NO; 107 continue; 108 109 /* 110 * We ignore quoted strings and character constants 111 * completely. 112 */ 113 case '"': 114 case '\'': 115 skip_string(c); 116 break; 117 118 /* 119 * comments can be fun; note the state is unchanged after 120 * return, in case we found: 121 * "foo() XX comment XX { int bar; }" 122 */ 123 case '/': 124 if (GETC(==, '*') || c == '/') { 125 skip_comment(c); 126 continue; 127 } 128 (void)ungetc(c, inf); 129 c = '/'; 130 goto storec; 131 132 /* hash marks flag #define's. */ 133 case '#': 134 if (sp == tok) { 135 hash_entry(); 136 break; 137 } 138 goto storec; 139 140 /* 141 * if we have a current token, parenthesis on 142 * level zero indicates a function. 143 */ 144 case '(': 145 if (!level && token) { 146 int curline; 147 148 if (sp != tok) 149 *sp = EOS; 150 /* 151 * grab the line immediately, we may 152 * already be wrong, for example, 153 * foo\n 154 * (arg1, 155 */ 156 get_line(); 157 curline = lineno; 158 if (func_entry()) { 159 ++level; 160 pfnote(tok, curline); 161 } 162 break; 163 } 164 goto storec; 165 166 /* 167 * semi-colons indicate the end of a typedef; if we find a 168 * typedef we search for the next semi-colon of the same 169 * level as the typedef. Ignoring "structs", they are 170 * tricky, since you can find: 171 * 172 * "typedef long time_t;" 173 * "typedef unsigned int u_int;" 174 * "typedef unsigned int u_int [10];" 175 * 176 * If looking at a typedef, we save a copy of the last token 177 * found. Then, when we find the ';' we take the current 178 * token if it starts with a valid token name, else we take 179 * the one we saved. There's probably some reasonable 180 * alternative to this... 181 */ 182 case ';': 183 if (t_def && level == t_level) { 184 t_def = NO; 185 get_line(); 186 if (sp != tok) 187 *sp = EOS; 188 pfnote(tok, lineno); 189 break; 190 } 191 goto storec; 192 193 /* 194 * store characters until one that can't be part of a token 195 * comes along; check the current token against certain 196 * reserved words. 197 */ 198 default: 199 /* ignore whitespace */ 200 if (c == ' ' || c == '\t') { 201 int save = c; 202 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 203 ; 204 if (c == EOF) 205 return; 206 (void)ungetc(c, inf); 207 c = save; 208 } 209 storec: if (!intoken(c)) { 210 if (sp == tok) 211 break; 212 *sp = EOS; 213 if (tflag) { 214 /* no typedefs inside typedefs */ 215 if (!t_def && 216 !memcmp(tok, "typedef",8)) { 217 t_def = YES; 218 t_level = level; 219 break; 220 } 221 /* catch "typedef struct" */ 222 if ((!t_def || t_level < level) 223 && (!memcmp(tok, "struct", 7) 224 || !memcmp(tok, "union", 6) 225 || !memcmp(tok, "enum", 5))) { 226 /* 227 * get line immediately; 228 * may change before '{' 229 */ 230 get_line(); 231 if (str_entry(c)) 232 ++level; 233 break; 234 /* } */ 235 } 236 } 237 sp = tok; 238 } 239 else if (sp != tok || begtoken(c)) { 240 if (sp == tok + sizeof tok - 1) 241 /* Too long -- truncate it */ 242 *sp = EOS; 243 else 244 *sp++ = c; 245 token = YES; 246 } 247 continue; 248 } 249 250 sp = tok; 251 token = NO; 252 } 253 } 254 255 /* 256 * func_entry -- 257 * handle a function reference 258 */ 259 static int 260 func_entry(void) 261 { 262 int c; /* current character */ 263 int level = 0; /* for matching '()' */ 264 265 /* 266 * Find the end of the assumed function declaration. 267 * Note that ANSI C functions can have type definitions so keep 268 * track of the parentheses nesting level. 269 */ 270 while (GETC(!=, EOF)) { 271 switch (c) { 272 case '\'': 273 case '"': 274 /* skip strings and character constants */ 275 skip_string(c); 276 break; 277 case '/': 278 /* skip comments */ 279 if (GETC(==, '*') || c == '/') 280 skip_comment(c); 281 break; 282 case '(': 283 level++; 284 break; 285 case ')': 286 if (level == 0) 287 goto fnd; 288 level--; 289 break; 290 case '\n': 291 SETLINE; 292 } 293 } 294 return (NO); 295 fnd: 296 /* 297 * we assume that the character after a function's right paren 298 * is a token character if it's a function and a non-token 299 * character if it's a declaration. Comments don't count... 300 */ 301 for (;;) { 302 while (GETC(!=, EOF) && iswhite(c)) 303 if (c == '\n') 304 SETLINE; 305 if (intoken(c) || c == '{') 306 break; 307 if (c == '/' && (GETC(==, '*') || c == '/')) 308 skip_comment(c); 309 else { /* don't ever "read" '/' */ 310 (void)ungetc(c, inf); 311 return (NO); 312 } 313 } 314 if (c != '{') 315 (void)skip_key('{'); 316 return (YES); 317 } 318 319 /* 320 * hash_entry -- 321 * handle a line starting with a '#' 322 */ 323 static void 324 hash_entry(void) 325 { 326 int c; /* character read */ 327 int curline; /* line started on */ 328 char *sp; /* buffer pointer */ 329 char tok[MAXTOKEN]; /* storage buffer */ 330 331 /* ignore leading whitespace */ 332 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 333 ; 334 (void)ungetc(c, inf); 335 336 curline = lineno; 337 for (sp = tok;;) { /* get next token */ 338 if (GETC(==, EOF)) 339 return; 340 if (iswhite(c)) 341 break; 342 if (sp == tok + sizeof tok - 1) 343 /* Too long -- truncate it */ 344 *sp = EOS; 345 else 346 *sp++ = c; 347 } 348 *sp = EOS; 349 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 350 goto skip; 351 for (;;) { /* this doesn't handle "#define \n" */ 352 if (GETC(==, EOF)) 353 return; 354 if (!iswhite(c)) 355 break; 356 } 357 for (sp = tok;;) { /* get next token */ 358 if (sp == tok + sizeof tok - 1) 359 /* Too long -- truncate it */ 360 *sp = EOS; 361 else 362 *sp++ = c; 363 if (GETC(==, EOF)) 364 return; 365 /* 366 * this is where it DOESN'T handle 367 * "#define \n" 368 */ 369 if (!intoken(c)) 370 break; 371 } 372 *sp = EOS; 373 if (dflag || c == '(') { /* only want macros */ 374 get_line(); 375 pfnote(tok, curline); 376 } 377 skip: if (c == '\n') { /* get rid of rest of define */ 378 SETLINE 379 if (*(sp - 1) != '\\') 380 return; 381 } 382 (void)skip_key('\n'); 383 } 384 385 /* 386 * str_entry -- 387 * handle a struct, union or enum entry 388 */ 389 static int 390 str_entry(int c) /* c is current character */ 391 { 392 int curline; /* line started on */ 393 char *sp; /* buffer pointer */ 394 char tok[LINE_MAX]; /* storage buffer */ 395 396 curline = lineno; 397 while (iswhite(c)) 398 if (GETC(==, EOF)) 399 return (NO); 400 if (c == '{') /* it was "struct {" */ 401 return (YES); 402 for (sp = tok;;) { /* get next token */ 403 if (sp == tok + sizeof tok - 1) 404 /* Too long -- truncate it */ 405 *sp = EOS; 406 else 407 *sp++ = c; 408 if (GETC(==, EOF)) 409 return (NO); 410 if (!intoken(c)) 411 break; 412 } 413 switch (c) { 414 case '{': /* it was "struct foo{" */ 415 --sp; 416 break; 417 case '\n': /* it was "struct foo\n" */ 418 SETLINE; 419 /*FALLTHROUGH*/ 420 default: /* probably "struct foo " */ 421 while (GETC(!=, EOF)) 422 if (!iswhite(c)) 423 break; 424 if (c != '{') { 425 (void)ungetc(c, inf); 426 return (NO); 427 } 428 } 429 *sp = EOS; 430 pfnote(tok, curline); 431 return (YES); 432 } 433 434 /* 435 * skip_comment -- 436 * skip over comment 437 */ 438 void 439 skip_comment(int t) /* t is comment character */ 440 { 441 int c; /* character read */ 442 int star; /* '*' flag */ 443 444 for (star = 0; GETC(!=, EOF);) 445 switch(c) { 446 /* comments don't nest, nor can they be escaped. */ 447 case '*': 448 star = YES; 449 break; 450 case '/': 451 if (star && t == '*') 452 return; 453 break; 454 case '\n': 455 if (t == '/') 456 return; 457 SETLINE; 458 /*FALLTHROUGH*/ 459 default: 460 star = NO; 461 break; 462 } 463 } 464 465 /* 466 * skip_string -- 467 * skip to the end of a string or character constant. 468 */ 469 void 470 skip_string(int key) 471 { 472 int c, 473 skip; 474 475 for (skip = NO; GETC(!=, EOF); ) 476 switch (c) { 477 case '\\': /* a backslash escapes anything */ 478 skip = !skip; /* we toggle in case it's "\\" */ 479 break; 480 case '\n': 481 SETLINE; 482 /*FALLTHROUGH*/ 483 default: 484 if (c == key && !skip) 485 return; 486 skip = NO; 487 } 488 } 489 490 /* 491 * skip_key -- 492 * skip to next char "key" 493 */ 494 int 495 skip_key(int key) 496 { 497 int c, 498 skip, 499 retval; 500 501 for (skip = retval = NO; GETC(!=, EOF);) 502 switch(c) { 503 case '\\': /* a backslash escapes anything */ 504 skip = !skip; /* we toggle in case it's "\\" */ 505 break; 506 case ';': /* special case for yacc; if one */ 507 case '|': /* of these chars occurs, we may */ 508 retval = YES; /* have moved out of the rule */ 509 break; /* not used by C */ 510 case '\'': 511 case '"': 512 /* skip strings and character constants */ 513 skip_string(c); 514 break; 515 case '/': 516 /* skip comments */ 517 if (GETC(==, '*') || c == '/') { 518 skip_comment(c); 519 break; 520 } 521 (void)ungetc(c, inf); 522 c = '/'; 523 goto norm; 524 case '\n': 525 SETLINE; 526 /*FALLTHROUGH*/ 527 default: 528 norm: 529 if (c == key && !skip) 530 return (retval); 531 skip = NO; 532 } 533 return (retval); 534 } 535