1 #include <stdio.h> 2 3 /* 4 * This program examines each of its arguments for C function 5 * definitions, and puts them in a file "tags" for use by the editor 6 * (and anyone else who wants to). 7 */ 8 9 /* 10 * program history: 11 * ken arnold wrote this program. ask him. 12 * brought over to the vax by peter b. kessler 7/79 13 * who disavows any knowledge of its actions, 14 * except for the stuff related to the construction 15 * of the search patterns. 16 * Some additional enhancements made by Mark Horton, involving 17 * the options and special treatment of "main", "}" at beginning 18 * of line, and a few bug fixes. 19 */ 20 21 #define reg register 22 #define logical char 23 24 #define TRUE (1) 25 #define FALSE (0) 26 27 #define iswhite(arg) (_wht[arg]) /* T if char is white */ 28 #define begtoken(arg) (_btk[arg]) /* T if char can start token */ 29 #define intoken(arg) (_itk[arg]) /* T if char can be in token */ 30 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ 31 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ 32 33 #define max(I1,I2) (I1 > I2 ? I1 : I2) 34 35 struct nd_st { /* sorting structure */ 36 char *func; /* function name */ 37 char *file; /* file name */ 38 char *pat; /* search pattern */ 39 logical been_warned; /* set if noticed dup */ 40 struct nd_st *left,*right; /* left and right sons */ 41 }; 42 43 long ftell(); 44 #ifdef DEBUG 45 char *unctrl(); 46 #endif 47 typedef struct nd_st NODE; 48 49 logical number, /* T if on line starting with # */ 50 term = FALSE, /* T if print on terminal */ 51 makefile= TRUE, /* T if to creat "tags" file */ 52 gotone, /* found a func already on line */ 53 /* boolean "func" (see init) */ 54 _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177]; 55 56 char searchar = '?'; /* use ?...? searches */ 57 #define MAXPATTERN 50 /* according to bill */ 58 59 int lineno; /* line number of current line */ 60 char line[256], /* current input line */ 61 *curfile, /* current input file name */ 62 *outfile= "tags", /* output file */ 63 *white = " \f\t\n", /* white chars */ 64 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", 65 /* token ending chars */ 66 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", 67 /* token starting chars */ 68 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789", /* valid in-token chars */ 69 *notgd = ",;"; /* non-valid after-function chars */ 70 71 int file_num; /* current file number */ 72 int aflag; /* -a: append to tags */ 73 int uflag; /* -u: update tags */ 74 int wflag; /* -w: suppress warnings */ 75 76 FILE *inf, /* ioptr for current input file */ 77 *outf; /* ioptr for tags file */ 78 79 long lineftell; /* ftell after getc( inf ) == '\n' */ 80 81 NODE *head; /* the head of the sorted binary tree */ 82 83 main(ac,av) 84 int ac; 85 char *av[]; 86 { 87 char cmd[100]; 88 int i; 89 90 while (ac > 1 && av[1][0] == '-') { 91 for (i=1; av[1][i]; i++) { 92 switch(av[1][i]) { 93 case 'a': 94 aflag++; 95 break; 96 case 'u': 97 uflag++; 98 break; 99 case 'w': 100 wflag++; 101 break; 102 103 default: 104 goto usage; 105 } 106 } 107 ac--; av++; 108 } 109 110 if (ac <= 1) { 111 usage: printf("Usage: ctags [-au] file ...\n"); 112 exit(1); 113 } 114 115 init(); /* set up boolean "functions" */ 116 /* 117 * loop through files finding functions 118 */ 119 for (file_num = 1; file_num < ac; file_num++) 120 find_funcs(av[file_num]); 121 122 if (uflag) { 123 for (i=1; i<ac; i++) { 124 sprintf(cmd, "mv %s OTAGS ; fgrep -v '\t%s\t' OTAGS > %s ; rm OTAGS", outfile, av[i], outfile); 125 system(cmd); 126 } 127 aflag++; 128 } 129 130 if ((outf = fopen(outfile, aflag ? "a" : "w")) == NULL) { 131 perror(outfile); 132 exit(1); 133 } 134 put_funcs(head); /* put the data in "tags" */ 135 exit(0); 136 } 137 138 /* 139 * This routine sets up the boolean psuedo-functions which work 140 * by seting boolean flags dependent upon the corresponding character 141 142 * Every char which is NOT in that string is not a white char. Therefore, 143 * all of the array "_wht" is set to FALSE, and then the elements 144 * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 145 * of a char is TRUE if it is the string "white", else FALSE. 146 * It also open up the "tags" output file. 147 */ 148 init() 149 { 150 151 reg char *sp; 152 reg int i; 153 154 for (i = 0; i < 0177; i++) { 155 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; 156 _gd[i] = TRUE; 157 } 158 for (sp = white; *sp; sp++) 159 _wht[*sp] = TRUE; 160 for (sp = endtk; *sp; sp++) 161 _etk[*sp] = TRUE; 162 for (sp = intk; *sp; sp++) 163 _itk[*sp] = TRUE; 164 for (sp = begtk; *sp; sp++) 165 _btk[*sp] = TRUE; 166 for (sp = notgd; *sp; sp++) 167 _gd[*sp] = FALSE; 168 } 169 170 /* 171 * This program opens the specified file and calls the function 172 * which finds the function defenitions. 173 */ 174 find_funcs(file) 175 char *file; 176 { 177 178 if ((inf=fopen(file,"r")) == NULL) { 179 perror(file); 180 return; 181 } 182 183 curfile = (char *) calloc(strlen(file)+1,1); 184 strcpy(curfile, file); 185 lineno = 1; 186 C_funcs(); /* find the C-style functions */ 187 fclose(inf); 188 } 189 190 /* 191 * This routine finds functions in C syntax and adds them 192 * to the list. 193 */ 194 C_funcs() 195 { 196 197 reg char c, /* current input char */ 198 *token, /* start of current token */ 199 *tp; /* end of current token */ 200 logical incom, /* T if inside a comment */ 201 inquote, /* T if inside a quoted string */ 202 inchar, /* T if inside a single char ' */ 203 midtoken; /* T if in middle of token */ 204 char *sp; /* current input char */ 205 char tok[100]; 206 long insub; /* level of "{}"s deep */ 207 208 /* 209 * init boolean flags, counters, and pointers 210 */ 211 212 number = gotone = midtoken = inquote = inchar = incom = FALSE; 213 insub = 0L; 214 sp = tp = token = line; 215 #ifdef DEBUG 216 printf(" t s c m q c g n\n"); 217 printf(" s t k u o i u h o u\n"); 218 printf(" c p p n b m d o r t m\n"); 219 #endif 220 while ((*sp=c=getc(inf)) != EOF) { 221 #ifdef DEBUG 222 printf("%2.2s: ",unctrl(c)); 223 printf("%2.2s ",unctrl(*sp)); 224 printf("%2.2s ",unctrl(*tp)); 225 printf("%2.2s ",unctrl(*token)); 226 printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); 227 #endif 228 /* 229 * action based on mixture of character type, *sp, 230 * and logical flags 231 */ 232 233 if (c == '\\') { 234 c = *++sp = getc(inf); 235 /* 236 * Handling of backslash is very naive. 237 * We do, however, turn escaped newlines 238 * into spaces. 239 */ 240 if (c = '\n') 241 c = ' '; 242 } 243 else if (incom) { 244 if (c == '*') { 245 while ((*++sp=c=getc(inf)) == '*') { 246 #ifdef DEBUG 247 printf("%2.2s- ",unctrl(c)); 248 printf("%2.2s ",unctrl(*sp)); 249 printf("%2.2s ",unctrl(*tp)); 250 printf("%2.2s ",unctrl(*token)); 251 printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); 252 #endif 253 continue; 254 } 255 #ifdef DEBUG 256 printf("%2.2s- ",unctrl(c)); 257 printf("%2.2s ",unctrl(*sp)); 258 printf("%2.2s ",unctrl(*tp)); 259 printf("%2.2s ",unctrl(*token)); 260 printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number); 261 #endif 262 if (c == '/') 263 incom = FALSE; 264 } 265 } 266 else if (inquote) { 267 /* 268 * Too dumb to know about \" not being magic, but 269 * they usually occur in pairs anyway. 270 */ 271 if ( c == '"' ) 272 inquote = FALSE; 273 continue; 274 } 275 else if (inchar) { 276 if ( c == '\'' ) 277 inchar = FALSE; 278 continue; 279 } 280 else if (c == '"') 281 inquote = TRUE; 282 else if (c == '\'') 283 inchar = TRUE; 284 else if (c == '/') 285 if ((*++sp=c=getc(inf)) == '*') 286 incom = TRUE; 287 else 288 ungetc(*sp,inf); 289 else if (c == '#' && sp == line) 290 number = TRUE; 291 else if (c == '{') 292 insub++; 293 else if (c == '}') 294 if (sp == line) 295 /* 296 * Kludge to get back in sync after getting confused. 297 * We really shouldn't be looking at indenting style, 298 * but tricking with the preprocessor can get us off, 299 * and most people indent this way anyway. 300 * This resets level of indenting to zero if '}' is 301 * found at beginning of line. 302 */ 303 insub = 0; 304 else 305 insub--; 306 else if (!insub && !inquote && !inchar && !gotone) { 307 if (midtoken) { 308 if (endtoken(c)) { 309 if (start_func(&sp,token,tp)) { 310 strncpy(tok,token,tp-token+1); 311 tok[tp-token+1] = 0; 312 add_func(tok); 313 gotone = TRUE; 314 } 315 midtoken = FALSE; 316 token = sp; 317 } 318 else if (intoken(c)) 319 tp++; 320 } 321 else if (begtoken(c)) { 322 token = tp = sp; 323 midtoken = TRUE; 324 } 325 } 326 327 /* 328 * move on to next char, and set flags accordingly 329 */ 330 331 sp++; 332 if (c == '\n') { 333 tp = token = sp = line; 334 lineftell = ftell( inf ); 335 #ifdef DEBUG 336 printf("lineftell saved as %ld\n",lineftell); 337 #endif 338 number = gotone = midtoken = inquote = inchar = FALSE; 339 lineno++; 340 } 341 } 342 } 343 344 /* 345 * This routine checks to see if the current token is 346 * at the start of a function. It updates the input line 347 * so that the '(' will be in it when it returns. 348 */ 349 start_func(lp,token,tp) 350 char **lp,*token,*tp; 351 { 352 353 reg char c,*sp,*tsp; 354 static logical found; 355 logical firsttok; /* T if have seen first token in ()'s */ 356 int bad; 357 358 sp = *lp; 359 c = *sp; 360 bad = FALSE; 361 if (!number) /* space is not allowed in macro defs */ 362 while (iswhite(c)) { 363 *++sp = c = getc(inf); 364 #ifdef DEBUG 365 printf("%2.2s:\n",unctrl(c)); 366 #endif 367 } 368 /* the following tries to make it so that a #define a b(c) */ 369 /* doesn't count as a define of b. */ 370 else { 371 logical define; 372 373 define = TRUE; 374 for (tsp = "define"; *tsp && token < tp; tsp++) 375 if (*tsp != *token++) { 376 define = FALSE; 377 break; 378 } 379 if (define) 380 found = 0; 381 else 382 found++; 383 if (found >= 2) { 384 gotone = TRUE; 385 badone: bad = TRUE; 386 goto ret; 387 } 388 } 389 if (c != '(') 390 goto badone; 391 firsttok = FALSE; 392 while ((*++sp=c=getc(inf)) != ')') { 393 /* 394 * This line used to confuse ctags: 395 * int (*oldhup)(); 396 * This fixes it. A nonwhite char before the first 397 * token, other than a / (in case of a comment in there) 398 * makes this not a declaration. 399 */ 400 if (begtoken(c) || c=='/') firsttok++; 401 else if (!iswhite(c) && !firsttok) goto badone; 402 #ifdef DEBUG 403 printf("%2.2s:\n",unctrl(c)); 404 #endif 405 } 406 #ifdef DEBUG 407 printf("%2.2s:\n",unctrl(c)); 408 #endif 409 while (iswhite(*++sp=c=getc(inf))) 410 #ifdef DEBUG 411 printf("%2.2s:\n",unctrl(c)) 412 #endif 413 ; 414 #ifdef DEBUG 415 printf("%2.2s:\n",unctrl(c)); 416 #endif 417 ret: 418 *lp = --sp; 419 ungetc(c,inf); 420 return !bad && isgood(c); 421 } 422 423 /* 424 * This routine adds a function to the list 425 */ 426 add_func(token) 427 char *token; 428 { 429 reg char *fp,*pp; 430 reg NODE *np; 431 432 if ((np = (NODE *) calloc(1,sizeof (NODE))) == NULL) { 433 printf("too many functions to sort\n"); 434 put_funcs(head); 435 free_tree(head); 436 head = np = (NODE *) calloc(1,sizeof (NODE)); 437 } 438 if (strcmp(token,"main") == 0) { 439 /* 440 * Since there are so many directories with lots of 441 * misc. complete programs in them, main tends to get 442 * redefined a lot. So we change all mains to instead 443 * refer to the name of the file, without leading 444 * pathname components and without a trailing .c. 445 */ 446 fp = curfile; 447 for (pp=curfile; *pp; pp++) 448 if (*pp == '/') 449 fp = pp+1; 450 *token = 'M'; 451 strcpy(token+1, fp); 452 pp = &token[strlen(token)-2]; 453 if (*pp == '.') 454 *pp = 0; 455 } 456 fp = np->func = (char *) calloc(strlen(token)+1,sizeof (char)); 457 np->file = curfile; 458 strcpy(fp, token); 459 { /* 460 * this change to make the whole line the pattern 461 */ 462 long saveftell = ftell( inf ); 463 int patlen; 464 char ch; 465 466 patlen = 0; 467 fseek( inf , lineftell , 0 ); 468 #ifdef DEBUG 469 printf("saveftell=%ld, lseek back to %ld\n",saveftell,lineftell); 470 #endif 471 ch = getc( inf ); 472 while ( ch != '\n' && ch != searchar && patlen < MAXPATTERN ) { 473 patlen ++; 474 ch = getc( inf ); 475 } 476 pp = np -> pat = (char *) calloc( patlen + 2 , sizeof( char ) ); 477 fseek( inf , lineftell , 0 ); 478 ch = getc( inf ); 479 while ( patlen -- ) { 480 *pp ++ = ch; 481 ch = getc( inf ); 482 } 483 if ( ch == '\n' ) 484 *pp ++ = '$'; 485 *pp = '\0'; 486 fseek( inf , saveftell , 0 ); 487 #ifdef DEBUG 488 printf("seek back to %ld, ftell is now %ld\n",saveftell,ftell(inf)); 489 #endif 490 } 491 #ifdef DEBUG 492 printf("\"%s\"\t\"%s\"\t\"%s\"\n",np->func,np->file,np->pat); 493 #endif 494 if (head == NULL) 495 head = np; 496 else 497 add_node(np,head); 498 } 499 500 /* 501 * This routine cfrees the entire tree from the node down. 502 */ 503 free_tree(node) 504 NODE *node; 505 { 506 507 while (node) { 508 free_tree(node->right); 509 cfree(node); 510 node = node->left; 511 } 512 } 513 514 /* 515 * This routine finds the node where the new function node 516 * should be added. 517 */ 518 add_node(node,cur_node) 519 NODE *node,*cur_node; 520 { 521 522 reg int dif; 523 524 dif = strcmp(node->func,cur_node->func); 525 #ifdef DEBUG 526 printf("strcmp(\"%s\",\"%s\") == %d\n",node->func,cur_node->func,dif); 527 #endif 528 if (dif == 0) { 529 if (node->file == cur_node->file) { 530 if (!wflag) { 531 fprintf(stderr,"Duplicate function in file \"%s\", line %d: %s\n",node->file,lineno,node->func); 532 fprintf(stderr,"Second entry ignored\n"); 533 } 534 return; 535 } 536 else { 537 if (!cur_node->been_warned) 538 if (!wflag) 539 fprintf(stderr,"Duplicate function name in files %s and %s: %s (Warning only)\n", 540 node->file, cur_node->file, node->func); 541 cur_node->been_warned = TRUE; 542 } 543 } 544 if (dif < 0) 545 if (cur_node->left != NULL) 546 add_node(node,cur_node->left); 547 else { 548 #ifdef DEBUG 549 printf("adding to left branch\n"); 550 #endif 551 cur_node->left = node; 552 } 553 else 554 if (cur_node->right != NULL) 555 add_node(node,cur_node->right); 556 else { 557 #ifdef DEBUG 558 printf("adding to right branch\n"); 559 #endif 560 cur_node->right = node; 561 } 562 } 563 564 /* 565 * This routine puts the functions in the file. 566 */ 567 put_funcs(node) 568 NODE *node; 569 { 570 571 if (node == NULL) 572 return; 573 put_funcs(node->left); 574 fprintf(outf,"%s\t%s\t%c^%s%c\n",node->func,node->file 575 ,searchar,node->pat,searchar); 576 put_funcs(node->right); 577 } 578 579 #ifdef DEBUG 580 char * 581 unctrl(c) 582 char c; 583 { 584 static char buf[3]; 585 if (c>=' ' && c<='~') { 586 buf[0] = c; 587 buf[1] = 0; 588 } else if (c > '~') { 589 buf[0] = '^'; 590 buf[1] = '?'; 591 buf[2] = 0; 592 } else if (c < 0) { 593 buf[0] = buf[1] = '?'; 594 buf[2] = 0; 595 } else { 596 buf[0] = '\\'; 597 buf[2] = 0; 598 switch(c) { 599 case '\b': 600 buf[1] = 'b'; 601 break; 602 case '\t': 603 buf[1] = 't'; 604 break; 605 case '\n': 606 buf[1] = 'n'; 607 break; 608 default: 609 buf[0] = '^'; 610 buf[1] = c + 64; 611 } 612 } 613 return(buf); 614 } 615 #endif 616