1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)C.c 8.4 (Berkeley) 4/2/94 34 * $FreeBSD: src/usr.bin/ctags/C.c,v 1.3.2.2 2002/07/30 00:55:07 tjr Exp $ 35 * $DragonFly: src/usr.bin/ctags/C.c,v 1.2 2003/06/17 04:29:25 dillon Exp $ 36 */ 37 38 #include <limits.h> 39 #include <stdio.h> 40 #include <string.h> 41 42 #include "ctags.h" 43 44 static int func_entry(void); 45 static void hash_entry(void); 46 static void skip_string(int); 47 static int str_entry(int); 48 49 /* 50 * c_entries -- 51 * read .c and .h files and call appropriate routines 52 */ 53 void 54 c_entries(void) 55 { 56 int c; /* current character */ 57 int level; /* brace level */ 58 int token; /* if reading a token */ 59 int t_def; /* if reading a typedef */ 60 int t_level; /* typedef's brace level */ 61 char *sp; /* buffer pointer */ 62 char tok[MAXTOKEN]; /* token buffer */ 63 64 lineftell = ftell(inf); 65 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 66 while (GETC(!=, EOF)) { 67 switch (c) { 68 /* 69 * Here's where it DOESN'T handle: { 70 * foo(a) 71 * { 72 * #ifdef notdef 73 * } 74 * #endif 75 * if (a) 76 * puts("hello, world"); 77 * } 78 */ 79 case '{': 80 ++level; 81 goto endtok; 82 case '}': 83 /* 84 * if level goes below zero, try and fix 85 * it, even though we've already messed up 86 */ 87 if (--level < 0) 88 level = 0; 89 goto endtok; 90 91 case '\n': 92 SETLINE; 93 /* 94 * the above 3 cases are similar in that they 95 * are special characters that also end tokens. 96 */ 97 endtok: if (sp > tok) { 98 *sp = EOS; 99 token = YES; 100 sp = tok; 101 } 102 else 103 token = NO; 104 continue; 105 106 /* 107 * We ignore quoted strings and character constants 108 * completely. 109 */ 110 case '"': 111 case '\'': 112 (void)skip_string(c); 113 break; 114 115 /* 116 * comments can be fun; note the state is unchanged after 117 * return, in case we found: 118 * "foo() XX comment XX { int bar; }" 119 */ 120 case '/': 121 if (GETC(==, '*') || c == '/') { 122 skip_comment(c); 123 continue; 124 } 125 (void)ungetc(c, inf); 126 c = '/'; 127 goto storec; 128 129 /* hash marks flag #define's. */ 130 case '#': 131 if (sp == tok) { 132 hash_entry(); 133 break; 134 } 135 goto storec; 136 137 /* 138 * if we have a current token, parenthesis on 139 * level zero indicates a function. 140 */ 141 case '(': 142 if (!level && token) { 143 int curline; 144 145 if (sp != tok) 146 *sp = EOS; 147 /* 148 * grab the line immediately, we may 149 * already be wrong, for example, 150 * foo\n 151 * (arg1, 152 */ 153 getline(); 154 curline = lineno; 155 if (func_entry()) { 156 ++level; 157 pfnote(tok, curline); 158 } 159 break; 160 } 161 goto storec; 162 163 /* 164 * semi-colons indicate the end of a typedef; if we find a 165 * typedef we search for the next semi-colon of the same 166 * level as the typedef. Ignoring "structs", they are 167 * tricky, since you can find: 168 * 169 * "typedef long time_t;" 170 * "typedef unsigned int u_int;" 171 * "typedef unsigned int u_int [10];" 172 * 173 * If looking at a typedef, we save a copy of the last token 174 * found. Then, when we find the ';' we take the current 175 * token if it starts with a valid token name, else we take 176 * the one we saved. There's probably some reasonable 177 * alternative to this... 178 */ 179 case ';': 180 if (t_def && level == t_level) { 181 t_def = NO; 182 getline(); 183 if (sp != tok) 184 *sp = EOS; 185 pfnote(tok, lineno); 186 break; 187 } 188 goto storec; 189 190 /* 191 * store characters until one that can't be part of a token 192 * comes along; check the current token against certain 193 * reserved words. 194 */ 195 default: 196 /* ignore whitespace */ 197 if (c == ' ' || c == '\t') { 198 int save = c; 199 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 200 ; 201 if (c == EOF) 202 return; 203 (void)ungetc(c, inf); 204 c = save; 205 } 206 storec: if (!intoken(c)) { 207 if (sp == tok) 208 break; 209 *sp = EOS; 210 if (tflag) { 211 /* no typedefs inside typedefs */ 212 if (!t_def && 213 !memcmp(tok, "typedef",8)) { 214 t_def = YES; 215 t_level = level; 216 break; 217 } 218 /* catch "typedef struct" */ 219 if ((!t_def || t_level < level) 220 && (!memcmp(tok, "struct", 7) 221 || !memcmp(tok, "union", 6) 222 || !memcmp(tok, "enum", 5))) { 223 /* 224 * get line immediately; 225 * may change before '{' 226 */ 227 getline(); 228 if (str_entry(c)) 229 ++level; 230 break; 231 /* } */ 232 } 233 } 234 sp = tok; 235 } 236 else if (sp != tok || begtoken(c)) { 237 if (sp == tok + sizeof tok - 1) 238 /* Too long -- truncate it */ 239 *sp = EOS; 240 else 241 *sp++ = c; 242 token = YES; 243 } 244 continue; 245 } 246 247 sp = tok; 248 token = NO; 249 } 250 } 251 252 /* 253 * func_entry -- 254 * handle a function reference 255 */ 256 static int 257 func_entry(void) 258 { 259 int c; /* current character */ 260 int level = 0; /* for matching '()' */ 261 262 /* 263 * Find the end of the assumed function declaration. 264 * Note that ANSI C functions can have type definitions so keep 265 * track of the parentheses nesting level. 266 */ 267 while (GETC(!=, EOF)) { 268 switch (c) { 269 case '\'': 270 case '"': 271 /* skip strings and character constants */ 272 skip_string(c); 273 break; 274 case '/': 275 /* skip comments */ 276 if (GETC(==, '*') || c == '/') 277 skip_comment(c); 278 break; 279 case '(': 280 level++; 281 break; 282 case ')': 283 if (level == 0) 284 goto fnd; 285 level--; 286 break; 287 case '\n': 288 SETLINE; 289 } 290 } 291 return (NO); 292 fnd: 293 /* 294 * we assume that the character after a function's right paren 295 * is a token character if it's a function and a non-token 296 * character if it's a declaration. Comments don't count... 297 */ 298 for (;;) { 299 while (GETC(!=, EOF) && iswhite(c)) 300 if (c == '\n') 301 SETLINE; 302 if (intoken(c) || c == '{') 303 break; 304 if (c == '/' && (GETC(==, '*') || c == '/')) 305 skip_comment(c); 306 else { /* don't ever "read" '/' */ 307 (void)ungetc(c, inf); 308 return (NO); 309 } 310 } 311 if (c != '{') 312 (void)skip_key('{'); 313 return (YES); 314 } 315 316 /* 317 * hash_entry -- 318 * handle a line starting with a '#' 319 */ 320 static void 321 hash_entry(void) 322 { 323 int c; /* character read */ 324 int curline; /* line started on */ 325 char *sp; /* buffer pointer */ 326 char tok[MAXTOKEN]; /* storage buffer */ 327 328 /* ignore leading whitespace */ 329 while (GETC(!=, EOF) && (c == ' ' || c == '\t')) 330 ; 331 (void)ungetc(c, inf); 332 333 curline = lineno; 334 for (sp = tok;;) { /* get next token */ 335 if (GETC(==, EOF)) 336 return; 337 if (iswhite(c)) 338 break; 339 if (sp == tok + sizeof tok - 1) 340 /* Too long -- truncate it */ 341 *sp = EOS; 342 else 343 *sp++ = c; 344 } 345 *sp = EOS; 346 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 347 goto skip; 348 for (;;) { /* this doesn't handle "#define \n" */ 349 if (GETC(==, EOF)) 350 return; 351 if (!iswhite(c)) 352 break; 353 } 354 for (sp = tok;;) { /* get next token */ 355 if (sp == tok + sizeof tok - 1) 356 /* Too long -- truncate it */ 357 *sp = EOS; 358 else 359 *sp++ = c; 360 if (GETC(==, EOF)) 361 return; 362 /* 363 * this is where it DOESN'T handle 364 * "#define \n" 365 */ 366 if (!intoken(c)) 367 break; 368 } 369 *sp = EOS; 370 if (dflag || c == '(') { /* only want macros */ 371 getline(); 372 pfnote(tok, curline); 373 } 374 skip: if (c == '\n') { /* get rid of rest of define */ 375 SETLINE 376 if (*(sp - 1) != '\\') 377 return; 378 } 379 (void)skip_key('\n'); 380 } 381 382 /* 383 * str_entry -- 384 * handle a struct, union or enum entry 385 */ 386 static int 387 str_entry(int c) /* c is current character */ 388 { 389 int curline; /* line started on */ 390 char *sp; /* buffer pointer */ 391 char tok[LINE_MAX]; /* storage buffer */ 392 393 curline = lineno; 394 while (iswhite(c)) 395 if (GETC(==, EOF)) 396 return (NO); 397 if (c == '{') /* it was "struct {" */ 398 return (YES); 399 for (sp = tok;;) { /* get next token */ 400 if (sp == tok + sizeof tok - 1) 401 /* Too long -- truncate it */ 402 *sp = EOS; 403 else 404 *sp++ = c; 405 if (GETC(==, EOF)) 406 return (NO); 407 if (!intoken(c)) 408 break; 409 } 410 switch (c) { 411 case '{': /* it was "struct foo{" */ 412 --sp; 413 break; 414 case '\n': /* it was "struct foo\n" */ 415 SETLINE; 416 /*FALLTHROUGH*/ 417 default: /* probably "struct foo " */ 418 while (GETC(!=, EOF)) 419 if (!iswhite(c)) 420 break; 421 if (c != '{') { 422 (void)ungetc(c, inf); 423 return (NO); 424 } 425 } 426 *sp = EOS; 427 pfnote(tok, curline); 428 return (YES); 429 } 430 431 /* 432 * skip_comment -- 433 * skip over comment 434 */ 435 void 436 skip_comment(int t) /* t is comment character */ 437 { 438 int c; /* character read */ 439 int star; /* '*' flag */ 440 441 for (star = 0; GETC(!=, EOF);) 442 switch(c) { 443 /* comments don't nest, nor can they be escaped. */ 444 case '*': 445 star = YES; 446 break; 447 case '/': 448 if (star && t == '*') 449 return; 450 break; 451 case '\n': 452 if (t == '/') 453 return; 454 SETLINE; 455 /*FALLTHROUGH*/ 456 default: 457 star = NO; 458 break; 459 } 460 } 461 462 /* 463 * skip_string -- 464 * skip to the end of a string or character constant. 465 */ 466 void 467 skip_string(int key) 468 { 469 int c, 470 skip; 471 472 for (skip = NO; GETC(!=, EOF); ) 473 switch (c) { 474 case '\\': /* a backslash escapes anything */ 475 skip = !skip; /* we toggle in case it's "\\" */ 476 break; 477 case '\n': 478 SETLINE; 479 /*FALLTHROUGH*/ 480 default: 481 if (c == key && !skip) 482 return; 483 skip = NO; 484 } 485 } 486 487 /* 488 * skip_key -- 489 * skip to next char "key" 490 */ 491 int 492 skip_key(int key) 493 { 494 int c, 495 skip, 496 retval; 497 498 for (skip = retval = NO; GETC(!=, EOF);) 499 switch(c) { 500 case '\\': /* a backslash escapes anything */ 501 skip = !skip; /* we toggle in case it's "\\" */ 502 break; 503 case ';': /* special case for yacc; if one */ 504 case '|': /* of these chars occurs, we may */ 505 retval = YES; /* have moved out of the rule */ 506 break; /* not used by C */ 507 case '\'': 508 case '"': 509 /* skip strings and character constants */ 510 skip_string(c); 511 break; 512 case '/': 513 /* skip comments */ 514 if (GETC(==, '*') || c == '/') { 515 skip_comment(c); 516 break; 517 } 518 (void)ungetc(c, inf); 519 c = '/'; 520 goto norm; 521 case '\n': 522 SETLINE; 523 /*FALLTHROUGH*/ 524 default: 525 norm: 526 if (c == key && !skip) 527 return (retval); 528 skip = NO; 529 } 530 return (retval); 531 } 532