1 /* 2 * Copyright (c) 1987, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #ifndef lint 9 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 04/02/94"; 10 #endif /* not lint */ 11 12 #include <limits.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 #include "ctags.h" 17 18 static int func_entry __P((void)); 19 static void hash_entry __P((void)); 20 static void skip_string __P((int)); 21 static int str_entry __P((int)); 22 23 /* 24 * c_entries -- 25 * read .c and .h files and call appropriate routines 26 */ 27 void 28 c_entries() 29 { 30 int c; /* current character */ 31 int level; /* brace level */ 32 int token; /* if reading a token */ 33 int t_def; /* if reading a typedef */ 34 int t_level; /* typedef's brace level */ 35 char *sp; /* buffer pointer */ 36 char tok[MAXTOKEN]; /* token buffer */ 37 38 lineftell = ftell(inf); 39 sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1; 40 while (GETC(!=, EOF)) { 41 switch (c) { 42 /* 43 * Here's where it DOESN'T handle: { 44 * foo(a) 45 * { 46 * #ifdef notdef 47 * } 48 * #endif 49 * if (a) 50 * puts("hello, world"); 51 * } 52 */ 53 case '{': 54 ++level; 55 goto endtok; 56 case '}': 57 /* 58 * if level goes below zero, try and fix 59 * it, even though we've already messed up 60 */ 61 if (--level < 0) 62 level = 0; 63 goto endtok; 64 65 case '\n': 66 SETLINE; 67 /* 68 * the above 3 cases are similar in that they 69 * are special characters that also end tokens. 70 */ 71 endtok: if (sp > tok) { 72 *sp = EOS; 73 token = YES; 74 sp = tok; 75 } 76 else 77 token = NO; 78 continue; 79 80 /* 81 * We ignore quoted strings and character constants 82 * completely. 83 */ 84 case '"': 85 case '\'': 86 (void)skip_string(c); 87 break; 88 89 /* 90 * comments can be fun; note the state is unchanged after 91 * return, in case we found: 92 * "foo() XX comment XX { int bar; }" 93 */ 94 case '/': 95 if (GETC(==, '*')) { 96 skip_comment(); 97 continue; 98 } 99 (void)ungetc(c, inf); 100 c = '/'; 101 goto storec; 102 103 /* hash marks flag #define's. */ 104 case '#': 105 if (sp == tok) { 106 hash_entry(); 107 break; 108 } 109 goto storec; 110 111 /* 112 * if we have a current token, parenthesis on 113 * level zero indicates a function. 114 */ 115 case '(': 116 if (!level && token) { 117 int curline; 118 119 if (sp != tok) 120 *sp = EOS; 121 /* 122 * grab the line immediately, we may 123 * already be wrong, for example, 124 * foo\n 125 * (arg1, 126 */ 127 getline(); 128 curline = lineno; 129 if (func_entry()) { 130 ++level; 131 pfnote(tok, curline); 132 } 133 break; 134 } 135 goto storec; 136 137 /* 138 * semi-colons indicate the end of a typedef; if we find a 139 * typedef we search for the next semi-colon of the same 140 * level as the typedef. Ignoring "structs", they are 141 * tricky, since you can find: 142 * 143 * "typedef long time_t;" 144 * "typedef unsigned int u_int;" 145 * "typedef unsigned int u_int [10];" 146 * 147 * If looking at a typedef, we save a copy of the last token 148 * found. Then, when we find the ';' we take the current 149 * token if it starts with a valid token name, else we take 150 * the one we saved. There's probably some reasonable 151 * alternative to this... 152 */ 153 case ';': 154 if (t_def && level == t_level) { 155 t_def = NO; 156 getline(); 157 if (sp != tok) 158 *sp = EOS; 159 pfnote(tok, lineno); 160 break; 161 } 162 goto storec; 163 164 /* 165 * store characters until one that can't be part of a token 166 * comes along; check the current token against certain 167 * reserved words. 168 */ 169 default: 170 storec: if (!intoken(c)) { 171 if (sp == tok) 172 break; 173 *sp = EOS; 174 if (tflag) { 175 /* no typedefs inside typedefs */ 176 if (!t_def && 177 !memcmp(tok, "typedef",8)) { 178 t_def = YES; 179 t_level = level; 180 break; 181 } 182 /* catch "typedef struct" */ 183 if ((!t_def || t_level < level) 184 && (!memcmp(tok, "struct", 7) 185 || !memcmp(tok, "union", 6) 186 || !memcmp(tok, "enum", 5))) { 187 /* 188 * get line immediately; 189 * may change before '{' 190 */ 191 getline(); 192 if (str_entry(c)) 193 ++level; 194 break; 195 /* } */ 196 } 197 } 198 sp = tok; 199 } 200 else if (sp != tok || begtoken(c)) { 201 *sp++ = c; 202 token = YES; 203 } 204 continue; 205 } 206 207 sp = tok; 208 token = NO; 209 } 210 } 211 212 /* 213 * func_entry -- 214 * handle a function reference 215 */ 216 static int 217 func_entry() 218 { 219 int c; /* current character */ 220 int level = 0; /* for matching '()' */ 221 222 /* 223 * Find the end of the assumed function declaration. 224 * Note that ANSI C functions can have type definitions so keep 225 * track of the parentheses nesting level. 226 */ 227 while (GETC(!=, EOF)) { 228 switch (c) { 229 case '\'': 230 case '"': 231 /* skip strings and character constants */ 232 skip_string(c); 233 break; 234 case '/': 235 /* skip comments */ 236 if (GETC(==, '*')) 237 skip_comment(); 238 break; 239 case '(': 240 level++; 241 break; 242 case ')': 243 if (level == 0) 244 goto fnd; 245 level--; 246 break; 247 case '\n': 248 SETLINE; 249 } 250 } 251 return (NO); 252 fnd: 253 /* 254 * we assume that the character after a function's right paren 255 * is a token character if it's a function and a non-token 256 * character if it's a declaration. Comments don't count... 257 */ 258 for (;;) { 259 while (GETC(!=, EOF) && iswhite(c)) 260 if (c == '\n') 261 SETLINE; 262 if (intoken(c) || c == '{') 263 break; 264 if (c == '/' && GETC(==, '*')) 265 skip_comment(); 266 else { /* don't ever "read" '/' */ 267 (void)ungetc(c, inf); 268 return (NO); 269 } 270 } 271 if (c != '{') 272 (void)skip_key('{'); 273 return (YES); 274 } 275 276 /* 277 * hash_entry -- 278 * handle a line starting with a '#' 279 */ 280 static void 281 hash_entry() 282 { 283 int c; /* character read */ 284 int curline; /* line started on */ 285 char *sp; /* buffer pointer */ 286 char tok[MAXTOKEN]; /* storage buffer */ 287 288 curline = lineno; 289 for (sp = tok;;) { /* get next token */ 290 if (GETC(==, EOF)) 291 return; 292 if (iswhite(c)) 293 break; 294 *sp++ = c; 295 } 296 *sp = EOS; 297 if (memcmp(tok, "define", 6)) /* only interested in #define's */ 298 goto skip; 299 for (;;) { /* this doesn't handle "#define \n" */ 300 if (GETC(==, EOF)) 301 return; 302 if (!iswhite(c)) 303 break; 304 } 305 for (sp = tok;;) { /* get next token */ 306 *sp++ = c; 307 if (GETC(==, EOF)) 308 return; 309 /* 310 * this is where it DOESN'T handle 311 * "#define \n" 312 */ 313 if (!intoken(c)) 314 break; 315 } 316 *sp = EOS; 317 if (dflag || c == '(') { /* only want macros */ 318 getline(); 319 pfnote(tok, curline); 320 } 321 skip: if (c == '\n') { /* get rid of rest of define */ 322 SETLINE 323 if (*(sp - 1) != '\\') 324 return; 325 } 326 (void)skip_key('\n'); 327 } 328 329 /* 330 * str_entry -- 331 * handle a struct, union or enum entry 332 */ 333 static int 334 str_entry(c) 335 int c; /* current character */ 336 { 337 int curline; /* line started on */ 338 char *sp; /* buffer pointer */ 339 char tok[LINE_MAX]; /* storage buffer */ 340 341 curline = lineno; 342 while (iswhite(c)) 343 if (GETC(==, EOF)) 344 return (NO); 345 if (c == '{') /* it was "struct {" */ 346 return (YES); 347 for (sp = tok;;) { /* get next token */ 348 *sp++ = c; 349 if (GETC(==, EOF)) 350 return (NO); 351 if (!intoken(c)) 352 break; 353 } 354 switch (c) { 355 case '{': /* it was "struct foo{" */ 356 --sp; 357 break; 358 case '\n': /* it was "struct foo\n" */ 359 SETLINE; 360 /*FALLTHROUGH*/ 361 default: /* probably "struct foo " */ 362 while (GETC(!=, EOF)) 363 if (!iswhite(c)) 364 break; 365 if (c != '{') { 366 (void)ungetc(c, inf); 367 return (NO); 368 } 369 } 370 *sp = EOS; 371 pfnote(tok, curline); 372 return (YES); 373 } 374 375 /* 376 * skip_comment -- 377 * skip over comment 378 */ 379 void 380 skip_comment() 381 { 382 int c; /* character read */ 383 int star; /* '*' flag */ 384 385 for (star = 0; GETC(!=, EOF);) 386 switch(c) { 387 /* comments don't nest, nor can they be escaped. */ 388 case '*': 389 star = YES; 390 break; 391 case '/': 392 if (star) 393 return; 394 break; 395 case '\n': 396 SETLINE; 397 /*FALLTHROUGH*/ 398 default: 399 star = NO; 400 break; 401 } 402 } 403 404 /* 405 * skip_string -- 406 * skip to the end of a string or character constant. 407 */ 408 void 409 skip_string(key) 410 int key; 411 { 412 int c, 413 skip; 414 415 for (skip = NO; GETC(!=, EOF); ) 416 switch (c) { 417 case '\\': /* a backslash escapes anything */ 418 skip = !skip; /* we toggle in case it's "\\" */ 419 break; 420 case '\n': 421 SETLINE; 422 /*FALLTHROUGH*/ 423 default: 424 if (c == key && !skip) 425 return; 426 skip = NO; 427 } 428 } 429 430 /* 431 * skip_key -- 432 * skip to next char "key" 433 */ 434 int 435 skip_key(key) 436 int key; 437 { 438 int c, 439 skip, 440 retval; 441 442 for (skip = retval = NO; GETC(!=, EOF);) 443 switch(c) { 444 case '\\': /* a backslash escapes anything */ 445 skip = !skip; /* we toggle in case it's "\\" */ 446 break; 447 case ';': /* special case for yacc; if one */ 448 case '|': /* of these chars occurs, we may */ 449 retval = YES; /* have moved out of the rule */ 450 break; /* not used by C */ 451 case '\'': 452 case '"': 453 /* skip strings and character constants */ 454 skip_string(c); 455 break; 456 case '/': 457 /* skip comments */ 458 if (GETC(==, '*')) { 459 skip_comment(); 460 break; 461 } 462 (void)ungetc(c, inf); 463 c = '/'; 464 goto norm; 465 case '\n': 466 SETLINE; 467 /*FALLTHROUGH*/ 468 default: 469 norm: 470 if (c == key && !skip) 471 return (retval); 472 skip = NO; 473 } 474 return (retval); 475 } 476