1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)C.c 8.4 (Berkeley) 4/2/94 30 * $FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $ 31 * $DragonFly: src/usr.bin/ctags/C.c,v 1.2 2003/06/17 04:29:25 dillon Exp $ 32 */ 33 34 #include <limits.h> 35 #include <stdio.h> 36 #include <string.h> 37 38 #include "ctags.h" 39 40 static int func_entry(void); 41 static void hash_entry(void); 42 static void skip_string(int); 43 static int str_entry(int); 44 45 /* 46 * c_entries -- 47 * read .c and .h files and call appropriate routines 48 */ 49 void 50 c_entries(void) 51 { 52 int c; /* current character */ 53 int level; /* brace level */ 54 int token; /* if reading a token */ 55 int t_def; /* if reading a typedef */ 56 int t_level; /* typedef's brace level */ 57 char *sp; /* buffer pointer */ 58 char tok[MAXTOKEN]; /* token buffer */ 59 60 lineftell = ftell(inf); 61 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 62 while (GETC(!=, EOF)) { 63 switch (c) { 64 /* 65 * Here's where it DOESN'T handle: { 66 * foo(a) 67 * { 68 * #ifdef notdef 69 * } 70 * #endif 71 * if (a) 72 * puts("hello, world"); 73 * } 74 */ 75 case '{': 76 ++level; 77 goto endtok; 78 case '}': 79 /* 80 * if level goes below zero, try and fix 81 * it, even though we've already messed up 82 */ 83 if (--level < 0) 84 level = 0; 85 goto endtok; 86 87 case '\n': 88 SETLINE; 89 /* 90 * the above 3 cases are similar in that they 91 * are special characters that also end tokens. 92 */ 93 endtok: if (sp > tok) { 94 *sp = EOS; 95 token = YES; 96 sp = tok; 97 } 98 else 99 token = NO; 100 continue; 101 102 /* 103 * We ignore quoted strings and character constants 104 * completely. 105 */ 106 case '"': 107 case '\'': 108 (void)skip_string(c); 109 break; 110 111 /* 112 * comments can be fun; note the state is unchanged after 113 * return, in case we found: 114 * "foo() XX comment XX { int bar; }" 115 */ 116 case '/': 117 if (GETC(==, '*') || c == '/') { 118 skip_comment(c); 119 continue; 120 } 121 (void)ungetc(c, inf); 122 c = '/'; 123 goto storec; 124 125 /* hash marks flag #define's. */ 126 case '#': 127 if (sp == tok) { 128 hash_entry(); 129 break; 130 } 131 goto storec; 132 133 /* 134 * if we have a current token, parenthesis on 135 * level zero indicates a function. 136 */ 137 case '(': 138 if (!level && token) { 139 int curline; 140 141 if (sp != tok) 142 *sp = EOS; 143 /* 144 * grab the line immediately, we may 145 * already be wrong, for example, 146 * foo\n 147 * (arg1, 148 */ 149 getline(); 150 curline = lineno; 151 if (func_entry()) { 152 ++level; 153 pfnote(tok, curline); 154 } 155 break; 156 } 157 goto storec; 158 159 /* 160 * semi-colons indicate the end of a typedef; if we find a 161 * typedef we search for the next semi-colon of the same 162 * level as the typedef. Ignoring "structs", they are 163 * tricky, since you can find: 164 * 165 * "typedef long time_t;" 166 * "typedef unsigned int u_int;" 167 * "typedef unsigned int u_int [10];" 168 * 169 * If looking at a typedef, we save a copy of the last token 170 * found. Then, when we find the ';' we take the current 171 * token if it starts with a valid token name, else we take 172 * the one we saved. There's probably some reasonable 173 * alternative to this... 174 */ 175 case ';': 176 if (t_def && level == t_level) { 177 t_def = NO; 178 getline(); 179 if (sp != tok) 180 *sp = EOS; 181 pfnote(tok, lineno); 182 break; 183 } 184 goto storec; 185 186 /* 187 * store characters until one that can't be part of a token 188 * comes along; check the current token against certain 189 * reserved words. 190 */ 191 default: 192 /* ignore whitespace */ 193 if (c == ' ' || c == '\t') { 194 int save = c; 195 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 196 ; 197 if (c == EOF) 198 return; 199 (void)ungetc(c, inf); 200 c = save; 201 } 202 storec: if (!intoken(c)) { 203 if (sp == tok) 204 break; 205 *sp = EOS; 206 if (tflag) { 207 /* no typedefs inside typedefs */ 208 if (!t_def && 209 !memcmp(tok, "typedef",8)) { 210 t_def = YES; 211 t_level = level; 212 break; 213 } 214 /* catch "typedef struct" */ 215 if ((!t_def || t_level < level) 216 && (!memcmp(tok, "struct", 7) 217 || !memcmp(tok, "union", 6) 218 || !memcmp(tok, "enum", 5))) { 219 /* 220 * get line immediately; 221 * may change before '{' 222 */ 223 getline(); 224 if (str_entry(c)) 225 ++level; 226 break; 227 /* } */ 228 } 229 } 230 sp = tok; 231 } 232 else if (sp != tok || begtoken(c)) { 233 if (sp == tok + sizeof tok - 1) 234 /* Too long -- truncate it */ 235 *sp = EOS; 236 else 237 *sp++ = c; 238 token = YES; 239 } 240 continue; 241 } 242 243 sp = tok; 244 token = NO; 245 } 246 } 247 248 /* 249 * func_entry -- 250 * handle a function reference 251 */ 252 static int 253 func_entry(void) 254 { 255 int c; /* current character */ 256 int level = 0; /* for matching '()' */ 257 258 /* 259 * Find the end of the assumed function declaration. 260 * Note that ANSI C functions can have type definitions so keep 261 * track of the parentheses nesting level. 262 */ 263 while (GETC(!=, EOF)) { 264 switch (c) { 265 case '\'': 266 case '"': 267 /* skip strings and character constants */ 268 skip_string(c); 269 break; 270 case '/': 271 /* skip comments */ 272 if (GETC(==, '*') || c == '/') 273 skip_comment(c); 274 break; 275 case '(': 276 level++; 277 break; 278 case ')': 279 if (level == 0) 280 goto fnd; 281 level--; 282 break; 283 case '\n': 284 SETLINE; 285 } 286 } 287 return (NO); 288 fnd: 289 /* 290 * we assume that the character after a function's right paren 291 * is a token character if it's a function and a non-token 292 * character if it's a declaration. Comments don't count... 293 */ 294 for (;;) { 295 while (GETC(!=, EOF) && iswhite(c)) 296 if (c == '\n') 297 SETLINE; 298 if (intoken(c) || c == '{') 299 break; 300 if (c == '/' && (GETC(==, '*') || c == '/')) 301 skip_comment(c); 302 else { /* don't ever "read" '/' */ 303 (void)ungetc(c, inf); 304 return (NO); 305 } 306 } 307 if (c != '{') 308 (void)skip_key('{'); 309 return (YES); 310 } 311 312 /* 313 * hash_entry -- 314 * handle a line starting with a '#' 315 */ 316 static void 317 hash_entry(void) 318 { 319 int c; /* character read */ 320 int curline; /* line started on */ 321 char *sp; /* buffer pointer */ 322 char tok[MAXTOKEN]; /* storage buffer */ 323 324 /* ignore leading whitespace */ 325 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 326 ; 327 (void)ungetc(c, inf); 328 329 curline = lineno; 330 for (sp = tok;;) { /* get next token */ 331 if (GETC(==, EOF)) 332 return; 333 if (iswhite(c)) 334 break; 335 if (sp == tok + sizeof tok - 1) 336 /* Too long -- truncate it */ 337 *sp = EOS; 338 else 339 *sp++ = c; 340 } 341 *sp = EOS; 342 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 343 goto skip; 344 for (;;) { /* this doesn't handle "#define \n" */ 345 if (GETC(==, EOF)) 346 return; 347 if (!iswhite(c)) 348 break; 349 } 350 for (sp = tok;;) { /* get next token */ 351 if (sp == tok + sizeof tok - 1) 352 /* Too long -- truncate it */ 353 *sp = EOS; 354 else 355 *sp++ = c; 356 if (GETC(==, EOF)) 357 return; 358 /* 359 * this is where it DOESN'T handle 360 * "#define \n" 361 */ 362 if (!intoken(c)) 363 break; 364 } 365 *sp = EOS; 366 if (dflag || c == '(') { /* only want macros */ 367 getline(); 368 pfnote(tok, curline); 369 } 370 skip: if (c == '\n') { /* get rid of rest of define */ 371 SETLINE 372 if (*(sp - 1) != '\\') 373 return; 374 } 375 (void)skip_key('\n'); 376 } 377 378 /* 379 * str_entry -- 380 * handle a struct, union or enum entry 381 */ 382 static int 383 str_entry(int c) /* c is current character */ 384 { 385 int curline; /* line started on */ 386 char *sp; /* buffer pointer */ 387 char tok[LINE_MAX]; /* storage buffer */ 388 389 curline = lineno; 390 while (iswhite(c)) 391 if (GETC(==, EOF)) 392 return (NO); 393 if (c == '{') /* it was "struct {" */ 394 return (YES); 395 for (sp = tok;;) { /* get next token */ 396 if (sp == tok + sizeof tok - 1) 397 /* Too long -- truncate it */ 398 *sp = EOS; 399 else 400 *sp++ = c; 401 if (GETC(==, EOF)) 402 return (NO); 403 if (!intoken(c)) 404 break; 405 } 406 switch (c) { 407 case '{': /* it was "struct foo{" */ 408 --sp; 409 break; 410 case '\n': /* it was "struct foo\n" */ 411 SETLINE; 412 /*FALLTHROUGH*/ 413 default: /* probably "struct foo " */ 414 while (GETC(!=, EOF)) 415 if (!iswhite(c)) 416 break; 417 if (c != '{') { 418 (void)ungetc(c, inf); 419 return (NO); 420 } 421 } 422 *sp = EOS; 423 pfnote(tok, curline); 424 return (YES); 425 } 426 427 /* 428 * skip_comment -- 429 * skip over comment 430 */ 431 void 432 skip_comment(int t) /* t is comment character */ 433 { 434 int c; /* character read */ 435 int star; /* '*' flag */ 436 437 for (star = 0; GETC(!=, EOF);) 438 switch(c) { 439 /* comments don't nest, nor can they be escaped. */ 440 case '*': 441 star = YES; 442 break; 443 case '/': 444 if (star && t == '*') 445 return; 446 break; 447 case '\n': 448 if (t == '/') 449 return; 450 SETLINE; 451 /*FALLTHROUGH*/ 452 default: 453 star = NO; 454 break; 455 } 456 } 457 458 /* 459 * skip_string -- 460 * skip to the end of a string or character constant. 461 */ 462 void 463 skip_string(int key) 464 { 465 int c, 466 skip; 467 468 for (skip = NO; GETC(!=, EOF); ) 469 switch (c) { 470 case '\\': /* a backslash escapes anything */ 471 skip = !skip; /* we toggle in case it's "\\" */ 472 break; 473 case '\n': 474 SETLINE; 475 /*FALLTHROUGH*/ 476 default: 477 if (c == key && !skip) 478 return; 479 skip = NO; 480 } 481 } 482 483 /* 484 * skip_key -- 485 * skip to next char "key" 486 */ 487 int 488 skip_key(int key) 489 { 490 int c, 491 skip, 492 retval; 493 494 for (skip = retval = NO; GETC(!=, EOF);) 495 switch(c) { 496 case '\\': /* a backslash escapes anything */ 497 skip = !skip; /* we toggle in case it's "\\" */ 498 break; 499 case ';': /* special case for yacc; if one */ 500 case '|': /* of these chars occurs, we may */ 501 retval = YES; /* have moved out of the rule */ 502 break; /* not used by C */ 503 case '\'': 504 case '"': 505 /* skip strings and character constants */ 506 skip_string(c); 507 break; 508 case '/': 509 /* skip comments */ 510 if (GETC(==, '*') || c == '/') { 511 skip_comment(c); 512 break; 513 } 514 (void)ungetc(c, inf); 515 c = '/'; 516 goto norm; 517 case '\n': 518 SETLINE; 519 /*FALLTHROUGH*/ 520 default: 521 norm: 522 if (c == key && !skip) 523 return (retval); 524 skip = NO; 525 } 526 return (retval); 527 } 528