1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)C.c 8.4 (Berkeley) 4/2/94 30 * $FreeBSD: head/usr.bin/ctags/C.c 299355 2016-05-10 11:11:23Z bapt $ 31 */ 32 33 #include <limits.h> 34 #include <stdio.h> 35 #include <string.h> 36 37 #include "ctags.h" 38 39 static int func_entry(void); 40 static void hash_entry(void); 41 static void skip_string(int); 42 static int str_entry(int); 43 44 /* 45 * c_entries -- 46 * read .c and .h files and call appropriate routines 47 */ 48 void 49 c_entries(void) 50 { 51 int c; /* current character */ 52 int level; /* brace level */ 53 int token; /* if reading a token */ 54 int t_def; /* if reading a typedef */ 55 int t_level; /* typedef's brace level */ 56 char *sp; /* buffer pointer */ 57 char tok[MAXTOKEN]; /* token buffer */ 58 59 lineftell = ftell(inf); 60 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 61 while (GETC(!=, EOF)) { 62 switch (c) { 63 /* 64 * Here's where it DOESN'T handle: { 65 * foo(a) 66 * { 67 * #ifdef notdef 68 * } 69 * #endif 70 * if (a) 71 * puts("hello, world"); 72 * } 73 */ 74 case '{': 75 ++level; 76 goto endtok; 77 case '}': 78 /* 79 * if level goes below zero, try and fix 80 * it, even though we've already messed up 81 */ 82 if (--level < 0) 83 level = 0; 84 goto endtok; 85 86 case '\n': 87 SETLINE; 88 /* 89 * the above 3 cases are similar in that they 90 * are special characters that also end tokens. 91 */ 92 endtok: if (sp > tok) { 93 *sp = EOS; 94 token = YES; 95 sp = tok; 96 } 97 else 98 token = NO; 99 continue; 100 101 /* 102 * We ignore quoted strings and character constants 103 * completely. 104 */ 105 case '"': 106 case '\'': 107 skip_string(c); 108 break; 109 110 /* 111 * comments can be fun; note the state is unchanged after 112 * return, in case we found: 113 * "foo() XX comment XX { int bar; }" 114 */ 115 case '/': 116 if (GETC(==, '*') || c == '/') { 117 skip_comment(c); 118 continue; 119 } 120 ungetc(c, inf); 121 c = '/'; 122 goto storec; 123 124 /* hash marks flag #define's. */ 125 case '#': 126 if (sp == tok) { 127 hash_entry(); 128 break; 129 } 130 goto storec; 131 132 /* 133 * if we have a current token, parenthesis on 134 * level zero indicates a function. 135 */ 136 case '(': 137 if (!level && token) { 138 int curline; 139 140 if (sp != tok) 141 *sp = EOS; 142 /* 143 * grab the line immediately, we may 144 * already be wrong, for example, 145 * foo\n 146 * (arg1, 147 */ 148 get_line(); 149 curline = lineno; 150 if (func_entry()) { 151 ++level; 152 pfnote(tok, curline); 153 } 154 break; 155 } 156 goto storec; 157 158 /* 159 * semi-colons indicate the end of a typedef; if we find a 160 * typedef we search for the next semi-colon of the same 161 * level as the typedef. Ignoring "structs", they are 162 * tricky, since you can find: 163 * 164 * "typedef long time_t;" 165 * "typedef unsigned int u_int;" 166 * "typedef unsigned int u_int [10];" 167 * 168 * If looking at a typedef, we save a copy of the last token 169 * found. Then, when we find the ';' we take the current 170 * token if it starts with a valid token name, else we take 171 * the one we saved. There's probably some reasonable 172 * alternative to this... 173 */ 174 case ';': 175 if (t_def && level == t_level) { 176 t_def = NO; 177 get_line(); 178 if (sp != tok) 179 *sp = EOS; 180 pfnote(tok, lineno); 181 break; 182 } 183 goto storec; 184 185 /* 186 * store characters until one that can't be part of a token 187 * comes along; check the current token against certain 188 * reserved words. 189 */ 190 default: 191 /* ignore whitespace */ 192 if (c == ' ' || c == '\t') { 193 int save = c; 194 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 195 ; 196 if (c == EOF) 197 return; 198 ungetc(c, inf); 199 c = save; 200 } 201 storec: if (!intoken(c)) { 202 if (sp == tok) 203 break; 204 *sp = EOS; 205 if (tflag) { 206 /* no typedefs inside typedefs */ 207 if (!t_def && 208 !memcmp(tok, "typedef",8)) { 209 t_def = YES; 210 t_level = level; 211 break; 212 } 213 /* catch "typedef struct" */ 214 if ((!t_def || t_level < level) 215 && (!memcmp(tok, "struct", 7) 216 || !memcmp(tok, "union", 6) 217 || !memcmp(tok, "enum", 5))) { 218 /* 219 * get line immediately; 220 * may change before '{' 221 */ 222 get_line(); 223 if (str_entry(c)) 224 ++level; 225 break; 226 /* } */ 227 } 228 } 229 sp = tok; 230 } 231 else if (sp != tok || begtoken(c)) { 232 if (sp == tok + sizeof tok - 1) 233 /* Too long -- truncate it */ 234 *sp = EOS; 235 else 236 *sp++ = c; 237 token = YES; 238 } 239 continue; 240 } 241 242 sp = tok; 243 token = NO; 244 } 245 } 246 247 /* 248 * func_entry -- 249 * handle a function reference 250 */ 251 static int 252 func_entry(void) 253 { 254 int c; /* current character */ 255 int level = 0; /* for matching '()' */ 256 257 /* 258 * Find the end of the assumed function declaration. 259 * Note that ANSI C functions can have type definitions so keep 260 * track of the parentheses nesting level. 261 */ 262 while (GETC(!=, EOF)) { 263 switch (c) { 264 case '\'': 265 case '"': 266 /* skip strings and character constants */ 267 skip_string(c); 268 break; 269 case '/': 270 /* skip comments */ 271 if (GETC(==, '*') || c == '/') 272 skip_comment(c); 273 break; 274 case '(': 275 level++; 276 break; 277 case ')': 278 if (level == 0) 279 goto fnd; 280 level--; 281 break; 282 case '\n': 283 SETLINE; 284 } 285 } 286 return (NO); 287 fnd: 288 /* 289 * we assume that the character after a function's right paren 290 * is a token character if it's a function and a non-token 291 * character if it's a declaration. Comments don't count... 292 */ 293 for (;;) { 294 while (GETC(!=, EOF) && iswhite(c)) 295 if (c == '\n') 296 SETLINE; 297 if (intoken(c) || c == '{') 298 break; 299 if (c == '/' && (GETC(==, '*') || c == '/')) 300 skip_comment(c); 301 else { /* don't ever "read" '/' */ 302 ungetc(c, inf); 303 return (NO); 304 } 305 } 306 if (c != '{') 307 skip_key('{'); 308 return (YES); 309 } 310 311 /* 312 * hash_entry -- 313 * handle a line starting with a '#' 314 */ 315 static void 316 hash_entry(void) 317 { 318 int c; /* character read */ 319 int curline; /* line started on */ 320 char *sp; /* buffer pointer */ 321 char tok[MAXTOKEN]; /* storage buffer */ 322 323 /* ignore leading whitespace */ 324 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 325 ; 326 ungetc(c, inf); 327 328 curline = lineno; 329 for (sp = tok;;) { /* get next token */ 330 if (GETC(==, EOF)) 331 return; 332 if (iswhite(c)) 333 break; 334 if (sp == tok + sizeof tok - 1) 335 /* Too long -- truncate it */ 336 *sp = EOS; 337 else 338 *sp++ = c; 339 } 340 *sp = EOS; 341 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 342 goto skip; 343 for (;;) { /* this doesn't handle "#define \n" */ 344 if (GETC(==, EOF)) 345 return; 346 if (!iswhite(c)) 347 break; 348 } 349 for (sp = tok;;) { /* get next token */ 350 if (sp == tok + sizeof tok - 1) 351 /* Too long -- truncate it */ 352 *sp = EOS; 353 else 354 *sp++ = c; 355 if (GETC(==, EOF)) 356 return; 357 /* 358 * this is where it DOESN'T handle 359 * "#define \n" 360 */ 361 if (!intoken(c)) 362 break; 363 } 364 *sp = EOS; 365 if (dflag || c == '(') { /* only want macros */ 366 get_line(); 367 pfnote(tok, curline); 368 } 369 skip: if (c == '\n') { /* get rid of rest of define */ 370 SETLINE 371 if (*(sp - 1) != '\\') 372 return; 373 } 374 skip_key('\n'); 375 } 376 377 /* 378 * str_entry -- 379 * handle a struct, union or enum entry 380 */ 381 static int 382 str_entry(int c) /* c is current character */ 383 { 384 int curline; /* line started on */ 385 char *sp; /* buffer pointer */ 386 char tok[LINE_MAX]; /* storage buffer */ 387 388 curline = lineno; 389 while (iswhite(c)) 390 if (GETC(==, EOF)) 391 return (NO); 392 if (c == '{') /* it was "struct {" */ 393 return (YES); 394 for (sp = tok;;) { /* get next token */ 395 if (sp == tok + sizeof tok - 1) 396 /* Too long -- truncate it */ 397 *sp = EOS; 398 else 399 *sp++ = c; 400 if (GETC(==, EOF)) 401 return (NO); 402 if (!intoken(c)) 403 break; 404 } 405 switch (c) { 406 case '{': /* it was "struct foo{" */ 407 --sp; 408 break; 409 case '\n': /* it was "struct foo\n" */ 410 SETLINE; 411 /*FALLTHROUGH*/ 412 default: /* probably "struct foo " */ 413 while (GETC(!=, EOF)) 414 if (!iswhite(c)) 415 break; 416 if (c != '{') { 417 ungetc(c, inf); 418 return (NO); 419 } 420 } 421 *sp = EOS; 422 pfnote(tok, curline); 423 return (YES); 424 } 425 426 /* 427 * skip_comment -- 428 * skip over comment 429 */ 430 void 431 skip_comment(int t) /* t is comment character */ 432 { 433 int c; /* character read */ 434 int star; /* '*' flag */ 435 436 for (star = 0; GETC(!=, EOF);) 437 switch(c) { 438 /* comments don't nest, nor can they be escaped. */ 439 case '*': 440 star = YES; 441 break; 442 case '/': 443 if (star && t == '*') 444 return; 445 break; 446 case '\n': 447 if (t == '/') 448 return; 449 SETLINE; 450 /*FALLTHROUGH*/ 451 default: 452 star = NO; 453 break; 454 } 455 } 456 457 /* 458 * skip_string -- 459 * skip to the end of a string or character constant. 460 */ 461 void 462 skip_string(int key) 463 { 464 int c, 465 skip; 466 467 for (skip = NO; GETC(!=, EOF); ) 468 switch (c) { 469 case '\\': /* a backslash escapes anything */ 470 skip = !skip; /* we toggle in case it's "\\" */ 471 break; 472 case '\n': 473 SETLINE; 474 /*FALLTHROUGH*/ 475 default: 476 if (c == key && !skip) 477 return; 478 skip = NO; 479 } 480 } 481 482 /* 483 * skip_key -- 484 * skip to next char "key" 485 */ 486 int 487 skip_key(int key) 488 { 489 int c, 490 skip, 491 retval; 492 493 for (skip = retval = NO; GETC(!=, EOF);) 494 switch(c) { 495 case '\\': /* a backslash escapes anything */ 496 skip = !skip; /* we toggle in case it's "\\" */ 497 break; 498 case ';': /* special case for yacc; if one */ 499 case '|': /* of these chars occurs, we may */ 500 retval = YES; /* have moved out of the rule */ 501 break; /* not used by C */ 502 case '\'': 503 case '"': 504 /* skip strings and character constants */ 505 skip_string(c); 506 break; 507 case '/': 508 /* skip comments */ 509 if (GETC(==, '*') || c == '/') { 510 skip_comment(c); 511 break; 512 } 513 ungetc(c, inf); 514 c = '/'; 515 goto norm; 516 case '\n': 517 SETLINE; 518 /*FALLTHROUGH*/ 519 default: 520 norm: 521 if (c == key && !skip) 522 return (retval); 523 skip = NO; 524 } 525 return (retval); 526 } 527