1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1987, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94"; 35 #endif 36 #endif 37 38 #include <sys/cdefs.h> 39 #include <limits.h> 40 #include <stddef.h> 41 #include <stdio.h> 42 #include <string.h> 43 44 #include "ctags.h" 45 46 static int func_entry(void); 47 static void hash_entry(void); 48 static void skip_string(int); 49 static int str_entry(int); 50 51 /* 52 * c_entries -- 53 * read .c and .h files and call appropriate routines 54 */ 55 void 56 c_entries(void) 57 { 58 int c; /* current character */ 59 int level; /* brace level */ 60 int token; /* if reading a token */ 61 int t_def; /* if reading a typedef */ 62 int t_level; /* typedef's brace level */ 63 char *sp; /* buffer pointer */ 64 char tok[MAXTOKEN]; /* token buffer */ 65 66 lineftell = ftell(inf); 67 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 68 while (GETC(!=, EOF)) { 69 switch (c) { 70 /* 71 * Here's where it DOESN'T handle: { 72 * foo(a) 73 * { 74 * #ifdef notdef 75 * } 76 * #endif 77 * if (a) 78 * puts("hello, world"); 79 * } 80 */ 81 case '{': 82 ++level; 83 goto endtok; 84 case '}': 85 /* 86 * if level goes below zero, try and fix 87 * it, even though we've already messed up 88 */ 89 if (--level < 0) 90 level = 0; 91 goto endtok; 92 93 case '\n': 94 SETLINE; 95 /* 96 * the above 3 cases are similar in that they 97 * are special characters that also end tokens. 98 */ 99 endtok: if (sp > tok) { 100 *sp = EOS; 101 token = YES; 102 sp = tok; 103 } 104 else 105 token = NO; 106 continue; 107 108 /* 109 * We ignore quoted strings and character constants 110 * completely. 111 */ 112 case '"': 113 case '\'': 114 skip_string(c); 115 break; 116 117 /* 118 * comments can be fun; note the state is unchanged after 119 * return, in case we found: 120 * "foo() XX comment XX { int bar; }" 121 */ 122 case '/': 123 if (GETC(==, '*') || c == '/') { 124 skip_comment(c); 125 continue; 126 } 127 (void)ungetc(c, inf); 128 c = '/'; 129 goto storec; 130 131 /* hash marks flag #define's. */ 132 case '#': 133 if (sp == tok) { 134 hash_entry(); 135 break; 136 } 137 goto storec; 138 139 /* 140 * if we have a current token, parenthesis on 141 * level zero indicates a function. 142 */ 143 case '(': 144 if (!level && token) { 145 int curline; 146 147 if (sp != tok) 148 *sp = EOS; 149 /* 150 * grab the line immediately, we may 151 * already be wrong, for example, 152 * foo\n 153 * (arg1, 154 */ 155 get_line(); 156 curline = lineno; 157 if (func_entry()) { 158 ++level; 159 pfnote(tok, curline); 160 } 161 break; 162 } 163 goto storec; 164 165 /* 166 * semi-colons indicate the end of a typedef; if we find a 167 * typedef we search for the next semi-colon of the same 168 * level as the typedef. Ignoring "structs", they are 169 * tricky, since you can find: 170 * 171 * "typedef long time_t;" 172 * "typedef unsigned int u_int;" 173 * "typedef unsigned int u_int [10];" 174 * 175 * If looking at a typedef, we save a copy of the last token 176 * found. Then, when we find the ';' we take the current 177 * token if it starts with a valid token name, else we take 178 * the one we saved. There's probably some reasonable 179 * alternative to this... 180 */ 181 case ';': 182 if (t_def && level == t_level) { 183 t_def = NO; 184 get_line(); 185 if (sp != tok) 186 *sp = EOS; 187 pfnote(tok, lineno); 188 break; 189 } 190 goto storec; 191 192 /* 193 * store characters until one that can't be part of a token 194 * comes along; check the current token against certain 195 * reserved words. 196 */ 197 default: 198 /* ignore whitespace */ 199 if (c == ' ' || c == '\t') { 200 int save = c; 201 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 202 ; 203 if (c == EOF) 204 return; 205 (void)ungetc(c, inf); 206 c = save; 207 } 208 storec: if (!intoken(c)) { 209 if (sp == tok) 210 break; 211 *sp = EOS; 212 if (tflag) { 213 /* no typedefs inside typedefs */ 214 if (!t_def && 215 !memcmp(tok, "typedef",8)) { 216 t_def = YES; 217 t_level = level; 218 break; 219 } 220 /* catch "typedef struct" */ 221 if ((!t_def || t_level < level) 222 && (!memcmp(tok, "struct", 7) 223 || !memcmp(tok, "union", 6) 224 || !memcmp(tok, "enum", 5))) { 225 /* 226 * get line immediately; 227 * may change before '{' 228 */ 229 get_line(); 230 if (str_entry(c)) 231 ++level; 232 break; 233 /* } */ 234 } 235 } 236 sp = tok; 237 } 238 else if (sp != tok || begtoken(c)) { 239 if (sp == tok + sizeof tok - 1) 240 /* Too long -- truncate it */ 241 *sp = EOS; 242 else 243 *sp++ = c; 244 token = YES; 245 } 246 continue; 247 } 248 249 sp = tok; 250 token = NO; 251 } 252 } 253 254 /* 255 * func_entry -- 256 * handle a function reference 257 */ 258 static int 259 func_entry(void) 260 { 261 int c; /* current character */ 262 int level = 0; /* for matching '()' */ 263 static char attribute[] = "__attribute__"; 264 char maybe_attribute[sizeof attribute + 1], 265 *anext; 266 267 /* 268 * Find the end of the assumed function declaration. 269 * Note that ANSI C functions can have type definitions so keep 270 * track of the parentheses nesting level. 271 */ 272 while (GETC(!=, EOF)) { 273 switch (c) { 274 case '\'': 275 case '"': 276 /* skip strings and character constants */ 277 skip_string(c); 278 break; 279 case '/': 280 /* skip comments */ 281 if (GETC(==, '*') || c == '/') 282 skip_comment(c); 283 break; 284 case '(': 285 level++; 286 break; 287 case ')': 288 if (level == 0) 289 goto fnd; 290 level--; 291 break; 292 case '\n': 293 SETLINE; 294 } 295 } 296 return (NO); 297 fnd: 298 /* 299 * we assume that the character after a function's right paren 300 * is a token character if it's a function and a non-token 301 * character if it's a declaration. Comments don't count... 302 */ 303 for (anext = maybe_attribute;;) { 304 while (GETC(!=, EOF) && iswhite(c)) 305 if (c == '\n') 306 SETLINE; 307 if (c == EOF) 308 return NO; 309 /* 310 * Recognize the gnu __attribute__ extension, which would 311 * otherwise make the heuristic test DTWT 312 */ 313 if (anext == maybe_attribute) { 314 if (intoken(c)) { 315 *anext++ = c; 316 continue; 317 } 318 } else { 319 if (intoken(c)) { 320 if (anext - maybe_attribute 321 < (ptrdiff_t)(sizeof attribute - 1)) 322 *anext++ = c; 323 else break; 324 continue; 325 } else { 326 *anext++ = '\0'; 327 if (strcmp(maybe_attribute, attribute) == 0) { 328 (void)ungetc(c, inf); 329 return NO; 330 } 331 break; 332 } 333 } 334 if (intoken(c) || c == '{') 335 break; 336 if (c == '/' && (GETC(==, '*') || c == '/')) 337 skip_comment(c); 338 else { /* don't ever "read" '/' */ 339 (void)ungetc(c, inf); 340 return (NO); 341 } 342 } 343 if (c != '{') 344 (void)skip_key('{'); 345 return (YES); 346 } 347 348 /* 349 * hash_entry -- 350 * handle a line starting with a '#' 351 */ 352 static void 353 hash_entry(void) 354 { 355 int c; /* character read */ 356 int curline; /* line started on */ 357 char *sp; /* buffer pointer */ 358 char tok[MAXTOKEN]; /* storage buffer */ 359 360 /* ignore leading whitespace */ 361 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 362 ; 363 (void)ungetc(c, inf); 364 365 curline = lineno; 366 for (sp = tok;;) { /* get next token */ 367 if (GETC(==, EOF)) 368 return; 369 if (iswhite(c)) 370 break; 371 if (sp == tok + sizeof tok - 1) 372 /* Too long -- truncate it */ 373 *sp = EOS; 374 else 375 *sp++ = c; 376 } 377 *sp = EOS; 378 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 379 goto skip; 380 for (;;) { /* this doesn't handle "#define \n" */ 381 if (GETC(==, EOF)) 382 return; 383 if (!iswhite(c)) 384 break; 385 } 386 for (sp = tok;;) { /* get next token */ 387 if (sp == tok + sizeof tok - 1) 388 /* Too long -- truncate it */ 389 *sp = EOS; 390 else 391 *sp++ = c; 392 if (GETC(==, EOF)) 393 return; 394 /* 395 * this is where it DOESN'T handle 396 * "#define \n" 397 */ 398 if (!intoken(c)) 399 break; 400 } 401 *sp = EOS; 402 if (dflag || c == '(') { /* only want macros */ 403 get_line(); 404 pfnote(tok, curline); 405 } 406 skip: if (c == '\n') { /* get rid of rest of define */ 407 SETLINE 408 if (*(sp - 1) != '\\') 409 return; 410 } 411 (void)skip_key('\n'); 412 } 413 414 /* 415 * str_entry -- 416 * handle a struct, union or enum entry 417 */ 418 static int 419 str_entry(int c) /* c is current character */ 420 { 421 int curline; /* line started on */ 422 char *sp; /* buffer pointer */ 423 char tok[LINE_MAX]; /* storage buffer */ 424 425 curline = lineno; 426 while (iswhite(c)) 427 if (GETC(==, EOF)) 428 return (NO); 429 if (c == '{') /* it was "struct {" */ 430 return (YES); 431 for (sp = tok;;) { /* get next token */ 432 if (sp == tok + sizeof tok - 1) 433 /* Too long -- truncate it */ 434 *sp = EOS; 435 else 436 *sp++ = c; 437 if (GETC(==, EOF)) 438 return (NO); 439 if (!intoken(c)) 440 break; 441 } 442 switch (c) { 443 case '{': /* it was "struct foo{" */ 444 --sp; 445 break; 446 case '\n': /* it was "struct foo\n" */ 447 SETLINE; 448 /*FALLTHROUGH*/ 449 default: /* probably "struct foo " */ 450 while (GETC(!=, EOF)) 451 if (!iswhite(c)) 452 break; 453 if (c != '{') { 454 (void)ungetc(c, inf); 455 return (NO); 456 } 457 } 458 *sp = EOS; 459 pfnote(tok, curline); 460 return (YES); 461 } 462 463 /* 464 * skip_comment -- 465 * skip over comment 466 */ 467 void 468 skip_comment(int t) /* t is comment character */ 469 { 470 int c; /* character read */ 471 int star; /* '*' flag */ 472 473 for (star = 0; GETC(!=, EOF);) 474 switch(c) { 475 /* comments don't nest, nor can they be escaped. */ 476 case '*': 477 star = YES; 478 break; 479 case '/': 480 if (star && t == '*') 481 return; 482 break; 483 case '\n': 484 SETLINE; 485 if (t == '/') 486 return; 487 /*FALLTHROUGH*/ 488 default: 489 star = NO; 490 break; 491 } 492 } 493 494 /* 495 * skip_string -- 496 * skip to the end of a string or character constant. 497 */ 498 void 499 skip_string(int key) 500 { 501 int c, 502 skip; 503 504 for (skip = NO; GETC(!=, EOF); ) 505 switch (c) { 506 case '\\': /* a backslash escapes anything */ 507 skip = !skip; /* we toggle in case it's "\\" */ 508 break; 509 case '\n': 510 SETLINE; 511 /*FALLTHROUGH*/ 512 default: 513 if (c == key && !skip) 514 return; 515 skip = NO; 516 } 517 } 518 519 /* 520 * skip_key -- 521 * skip to next char "key" 522 */ 523 int 524 skip_key(int key) 525 { 526 int c, 527 skip, 528 retval; 529 530 for (skip = retval = NO; GETC(!=, EOF);) 531 switch(c) { 532 case '\\': /* a backslash escapes anything */ 533 skip = !skip; /* we toggle in case it's "\\" */ 534 break; 535 case ';': /* special case for yacc; if one */ 536 case '|': /* of these chars occurs, we may */ 537 retval = YES; /* have moved out of the rule */ 538 break; /* not used by C */ 539 case '\'': 540 case '"': 541 /* skip strings and character constants */ 542 skip_string(c); 543 break; 544 case '/': 545 /* skip comments */ 546 if (GETC(==, '*') || c == '/') { 547 skip_comment(c); 548 break; 549 } 550 (void)ungetc(c, inf); 551 c = '/'; 552 goto norm; 553 case '\n': 554 SETLINE; 555 /*FALLTHROUGH*/ 556 default: 557 norm: 558 if (c == key && !skip) 559 return (retval); 560 skip = NO; 561 } 562 return (retval); 563 } 564