1 #ifndef lint 2 static char *sccsid = "@(#)ctags.c 4.4 (Berkeley) 8/30/82"; 3 #endif 4 5 #include <stdio.h> 6 #include <ctype.h> 7 8 /* 9 * ctags: create a tags file 10 */ 11 12 #define reg register 13 #define logical char 14 15 #define TRUE (1) 16 #define FALSE (0) 17 18 #define iswhite(arg) (_wht[arg]) /* T if char is white */ 19 #define begtoken(arg) (_btk[arg]) /* T if char can start token */ 20 #define intoken(arg) (_itk[arg]) /* T if char can be in token */ 21 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ 22 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ 23 24 #define max(I1,I2) (I1 > I2 ? I1 : I2) 25 26 struct nd_st { /* sorting structure */ 27 char *entry; /* function or type name */ 28 char *file; /* file name */ 29 logical f; /* use pattern or line no */ 30 int lno; /* for -x option */ 31 char *pat; /* search pattern */ 32 logical been_warned; /* set if noticed dup */ 33 struct nd_st *left,*right; /* left and right sons */ 34 }; 35 36 long ftell(); 37 typedef struct nd_st NODE; 38 39 logical number, /* T if on line starting with # */ 40 gotone, /* found a func already on line */ 41 /* boolean "func" (see init) */ 42 _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177]; 43 44 /* typedefs are recognized using a simple finite automata, 45 * tydef is its state variable. 46 */ 47 typedef enum {none, begin, middle, end } TYST; 48 49 TYST tydef = none; 50 51 char searchar = '/'; /* use /.../ searches */ 52 53 int lineno; /* line number of current line */ 54 char line[4*BUFSIZ], /* current input line */ 55 *curfile, /* current input file name */ 56 *outfile= "tags", /* output file */ 57 *white = " \f\t\n", /* white chars */ 58 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", 59 /* token ending chars */ 60 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", 61 /* token starting chars */ 62 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789", 63 /* valid in-token chars */ 64 *notgd = ",;"; /* non-valid after-function chars */ 65 66 int file_num; /* current file number */ 67 int aflag; /* -a: append to tags */ 68 int tflag; /* -t: create tags for typedefs */ 69 int uflag; /* -u: update tags */ 70 int wflag; /* -w: suppress warnings */ 71 int vflag; /* -v: create vgrind style index output */ 72 int xflag; /* -x: create cxref style output */ 73 74 char lbuf[BUFSIZ]; 75 76 FILE *inf, /* ioptr for current input file */ 77 *outf; /* ioptr for tags file */ 78 79 long lineftell; /* ftell after getc( inf ) == '\n' */ 80 81 NODE *head; /* the head of the sorted binary tree */ 82 83 char *savestr(); 84 char *rindex(); 85 main(ac,av) 86 int ac; 87 char *av[]; 88 { 89 char cmd[100]; 90 int i; 91 92 while (ac > 1 && av[1][0] == '-') { 93 for (i=1; av[1][i]; i++) { 94 switch(av[1][i]) { 95 case 'B': 96 searchar='?'; 97 break; 98 case 'F': 99 searchar='/'; 100 break; 101 case 'a': 102 aflag++; 103 break; 104 case 't': 105 tflag++; 106 break; 107 case 'u': 108 uflag++; 109 break; 110 case 'w': 111 wflag++; 112 break; 113 case 'v': 114 vflag++; 115 xflag++; 116 break; 117 case 'x': 118 xflag++; 119 break; 120 default: 121 goto usage; 122 } 123 } 124 ac--; av++; 125 } 126 127 if (ac <= 1) { 128 usage: printf("Usage: ctags [-BFatuwvx] file ...\n"); 129 exit(1); 130 } 131 132 init(); /* set up boolean "functions" */ 133 /* 134 * loop through files finding functions 135 */ 136 for (file_num = 1; file_num < ac; file_num++) 137 find_entries(av[file_num]); 138 139 if (xflag) { 140 put_entries(head); 141 exit(0); 142 } 143 if (uflag) { 144 for (i=1; i<ac; i++) { 145 sprintf(cmd, 146 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", 147 outfile, av[i], outfile); 148 system(cmd); 149 } 150 aflag++; 151 } 152 outf = fopen(outfile, aflag ? "a" : "w"); 153 if (outf == NULL) { 154 perror(outfile); 155 exit(1); 156 } 157 put_entries(head); 158 fclose(outf); 159 if (uflag) { 160 sprintf(cmd, "sort %s -o %s", outfile, outfile); 161 system(cmd); 162 } 163 exit(0); 164 } 165 166 /* 167 * This routine sets up the boolean psuedo-functions which work 168 * by seting boolean flags dependent upon the corresponding character 169 * Every char which is NOT in that string is not a white char. Therefore, 170 * all of the array "_wht" is set to FALSE, and then the elements 171 * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 172 * of a char is TRUE if it is the string "white", else FALSE. 173 */ 174 init() 175 { 176 177 reg char *sp; 178 reg int i; 179 180 for (i = 0; i < 0177; i++) { 181 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; 182 _gd[i] = TRUE; 183 } 184 for (sp = white; *sp; sp++) 185 _wht[*sp] = TRUE; 186 for (sp = endtk; *sp; sp++) 187 _etk[*sp] = TRUE; 188 for (sp = intk; *sp; sp++) 189 _itk[*sp] = TRUE; 190 for (sp = begtk; *sp; sp++) 191 _btk[*sp] = TRUE; 192 for (sp = notgd; *sp; sp++) 193 _gd[*sp] = FALSE; 194 } 195 196 /* 197 * This routine opens the specified file and calls the function 198 * which finds the function and type definitions. 199 */ 200 find_entries(file) 201 char *file; 202 { 203 char *cp; 204 205 if ((inf=fopen(file,"r")) == NULL) { 206 perror(file); 207 return; 208 } 209 curfile = savestr(file); 210 cp = rindex(file, '.'); 211 if (cp && (cp[1] != 'c' || cp[1] != 'h') && cp[2] == 0) { 212 if (PF_funcs(inf) == 0) { 213 rewind(inf); 214 C_entries(); 215 } 216 } else 217 C_entries(); 218 fclose(inf); 219 } 220 221 pfnote(name, ln, f) 222 char *name; 223 logical f; /* f == TRUE when function */ 224 { 225 register char *fp; 226 register NODE *np; 227 char nbuf[BUFSIZ]; 228 229 if ((np = (NODE *) malloc(sizeof (NODE))) == NULL) { 230 fprintf(stderr, "ctags: too many entries to sort\n"); 231 put_entries(head); 232 free_tree(head); 233 head = np = (NODE *) malloc(sizeof (NODE)); 234 } 235 if (xflag == 0 && !strcmp(name, "main")) { 236 fp = rindex(curfile, '/'); 237 if (fp == 0) 238 fp = curfile; 239 else 240 fp++; 241 sprintf(nbuf, "M%s", fp); 242 fp = rindex(nbuf, '.'); 243 if (fp && fp[2] == 0) 244 *fp = 0; 245 name = nbuf; 246 } 247 np->entry = savestr(name); 248 np->file = curfile; 249 np->f = f; 250 np->lno = ln; 251 np->left = np->right = 0; 252 if (xflag == 0) { 253 lbuf[50] = 0; 254 strcat(lbuf, "$"); 255 lbuf[50] = 0; 256 } 257 np->pat = savestr(lbuf); 258 if (head == NULL) 259 head = np; 260 else 261 add_node(np, head); 262 } 263 264 /* 265 * This routine finds functions and typedefs in C syntax and adds them 266 * to the list. 267 */ 268 C_entries() 269 { 270 register int c; 271 register char *token, *tp; 272 logical incomm, inquote, inchar, midtoken; 273 int level; 274 char *sp; 275 char tok[BUFSIZ]; 276 277 lineno = 1; 278 number = gotone = midtoken = inquote = inchar = incomm = FALSE; 279 level = 0; 280 sp = tp = token = line; 281 for (;;) { 282 *sp=c=getc(inf); 283 if (feof(inf)) 284 break; 285 if (c == '\n') 286 lineno++; 287 if (c == '\\') { 288 c = *++sp = getc(inf); 289 if (c = '\n') 290 c = ' '; 291 } else if (incomm) { 292 if (c == '*') { 293 while ((*++sp=c=getc(inf)) == '*') 294 continue; 295 if (c == '\n') 296 lineno++; 297 if (c == '/') 298 incomm = FALSE; 299 } 300 } else if (inquote) { 301 /* 302 * Too dumb to know about \" not being magic, but 303 * they usually occur in pairs anyway. 304 */ 305 if (c == '"') 306 inquote = FALSE; 307 continue; 308 } else if (inchar) { 309 if (c == '\'') 310 inchar = FALSE; 311 continue; 312 } else switch (c) { 313 case '"': 314 inquote = TRUE; 315 continue; 316 case '\'': 317 inchar = TRUE; 318 continue; 319 case '/': 320 if ((*++sp=c=getc(inf)) == '*') 321 incomm = TRUE; 322 else 323 ungetc(*sp, inf); 324 continue; 325 case '#': 326 if (sp == line) 327 number = TRUE; 328 continue; 329 case '{': 330 if (tydef == begin) { 331 tydef=middle; 332 } 333 level++; 334 continue; 335 case '}': 336 if (sp == line) 337 level = 0; /* reset */ 338 else 339 level--; 340 if (!level && tydef==middle) { 341 tydef=end; 342 } 343 continue; 344 } 345 if (!level && !inquote && !incomm && gotone == FALSE) { 346 if (midtoken) { 347 if (endtoken(c)) { 348 int f; 349 int pfline = lineno; 350 if (start_entry(&sp,token,&f)) { 351 strncpy(tok,token,tp-token+1); 352 tok[tp-token+1] = 0; 353 getline(); 354 pfnote(tok, pfline, f); 355 gotone = f; /* function */ 356 } 357 midtoken = FALSE; 358 token = sp; 359 } else if (intoken(c)) 360 tp++; 361 } else if (begtoken(c)) { 362 token = tp = sp; 363 midtoken = TRUE; 364 } 365 } 366 if (c == ';' && tydef==end) /* clean with typedefs */ 367 tydef=none; 368 sp++; 369 if (c == '\n' || sp > &line[sizeof (line) - BUFSIZ]) { 370 tp = token = sp = line; 371 lineftell = ftell(inf); 372 number = gotone = midtoken = inquote = inchar = FALSE; 373 } 374 } 375 } 376 377 /* 378 * This routine checks to see if the current token is 379 * at the start of a function, or corresponds to a typedef 380 * It updates the input line * so that the '(' will be 381 * in it when it returns. 382 */ 383 start_entry(lp,token,f) 384 char **lp; 385 register char *token; 386 int *f; 387 { 388 389 reg char c,*sp; 390 static logical found; 391 logical firsttok; /* T if have seen first token in ()'s */ 392 int bad; 393 394 *f = 1; /* a function */ 395 sp = *lp; 396 c = *sp; 397 bad = FALSE; 398 if (!number) { /* space is not allowed in macro defs */ 399 while (iswhite(c)) { 400 *++sp = c = getc(inf); 401 if (c == '\n') { 402 lineno++; 403 if (sp > &line[sizeof (line) - BUFSIZ]) 404 goto ret; 405 } 406 } 407 /* the following tries to make it so that a #define a b(c) */ 408 /* doesn't count as a define of b. */ 409 } else { 410 if (!strncmp(token, "define", 6)) 411 found = 0; 412 else 413 found++; 414 if (found >= 2) { 415 gotone = TRUE; 416 badone: bad = TRUE; 417 goto ret; 418 } 419 } 420 /* check for the typedef cases */ 421 if (tflag && !strncmp(token, "typedef", 7)) { 422 tydef=begin; 423 goto badone; 424 } 425 if (tydef==begin && (!strncmp(token, "struct", 6) || 426 !strncmp(token, "union", 5) || !strncmp(token, "enum", 4))) { 427 goto badone; 428 } 429 if (tydef==begin) { 430 tydef=end; 431 goto badone; 432 } 433 if (tydef==end) { 434 *f = 0; 435 goto ret; 436 } 437 if (c != '(') 438 goto badone; 439 firsttok = FALSE; 440 while ((*++sp=c=getc(inf)) != ')') { 441 if (c == '\n') { 442 lineno++; 443 if (sp > &line[sizeof (line) - BUFSIZ]) 444 goto ret; 445 } 446 /* 447 * This line used to confuse ctags: 448 * int (*oldhup)(); 449 * This fixes it. A nonwhite char before the first 450 * token, other than a / (in case of a comment in there) 451 * makes this not a declaration. 452 */ 453 if (begtoken(c) || c=='/') firsttok++; 454 else if (!iswhite(c) && !firsttok) goto badone; 455 } 456 while (iswhite(*++sp=c=getc(inf))) 457 if (c == '\n') { 458 lineno++; 459 if (sp > &line[sizeof (line) - BUFSIZ]) 460 break; 461 } 462 ret: 463 *lp = --sp; 464 if (c == '\n') 465 lineno--; 466 ungetc(c,inf); 467 return !bad && (!*f || isgood(c)); 468 /* hack for typedefs */ 469 } 470 471 getline() 472 { 473 long saveftell = ftell( inf ); 474 register char *cp; 475 476 fseek( inf , lineftell , 0 ); 477 fgets(lbuf, sizeof lbuf, inf); 478 cp = rindex(lbuf, '\n'); 479 if (cp) 480 *cp = 0; 481 fseek(inf, saveftell, 0); 482 } 483 484 free_tree(node) 485 NODE *node; 486 { 487 488 while (node) { 489 free_tree(node->right); 490 cfree(node); 491 node = node->left; 492 } 493 } 494 495 add_node(node, cur_node) 496 NODE *node,*cur_node; 497 { 498 register int dif; 499 500 dif = strcmp(node->entry, cur_node->entry); 501 if (dif == 0) { 502 if (node->file == cur_node->file) { 503 if (!wflag) { 504 fprintf(stderr,"Duplicate entry in file %s, line %d: %s\n", 505 node->file,lineno,node->entry); 506 fprintf(stderr,"Second entry ignored\n"); 507 } 508 return; 509 } 510 if (!cur_node->been_warned) 511 if (!wflag) 512 fprintf(stderr,"Duplicate entry in files %s and %s: %s (Warning only)\n", 513 node->file, cur_node->file, node->entry); 514 cur_node->been_warned = TRUE; 515 return; 516 } 517 if (dif < 0) { 518 if (cur_node->left != NULL) 519 add_node(node,cur_node->left); 520 else 521 cur_node->left = node; 522 return; 523 } 524 if (cur_node->right != NULL) 525 add_node(node,cur_node->right); 526 else 527 cur_node->right = node; 528 } 529 530 put_entries(node) 531 reg NODE *node; 532 { 533 reg char *sp; 534 535 if (node == NULL) 536 return; 537 put_entries(node->left); 538 if (xflag == 0) 539 if (node->f) { /* a function */ 540 fprintf(outf, "%s\t%s\t%c^", 541 node->entry, node->file, searchar); 542 for (sp = node->pat; *sp; sp++) 543 if (*sp == '\\') 544 fprintf(outf, "\\\\"); 545 else if (*sp == searchar) 546 fprintf(outf, "\\%c", searchar); 547 else 548 putc(*sp, outf); 549 fprintf(outf, "%c\n", searchar); 550 } else { /* a typedef; text pattern inadequate */ 551 fprintf(outf, "%s\t%s\t%d\n", 552 node->entry, node->file, node->lno); 553 } 554 else if (vflag) 555 fprintf(stdout, "%s %s %d\n", 556 node->entry, node->file, (node->lno+63)/64); 557 else 558 fprintf(stdout, "%-16s%4d %-16s %s\n", 559 node->entry, node->lno, node->file, node->pat); 560 put_entries(node->right); 561 } 562 563 char *dbp = lbuf; 564 int pfcnt; 565 566 PF_funcs(fi) 567 FILE *fi; 568 { 569 570 lineno = 0; 571 pfcnt = 0; 572 while (fgets(lbuf, sizeof(lbuf), fi)) { 573 lineno++; 574 dbp = lbuf; 575 if ( *dbp == '%' ) dbp++ ; /* Ratfor escape to fortran */ 576 while (isspace(*dbp)) 577 dbp++; 578 if (*dbp == 0) 579 continue; 580 switch (*dbp |' ') { 581 582 case 'i': 583 if (tail("integer")) 584 takeprec(); 585 break; 586 case 'r': 587 if (tail("real")) 588 takeprec(); 589 break; 590 case 'l': 591 if (tail("logical")) 592 takeprec(); 593 break; 594 case 'c': 595 if (tail("complex") || tail("character")) 596 takeprec(); 597 break; 598 case 'd': 599 if (tail("double")) { 600 while (isspace(*dbp)) 601 dbp++; 602 if (*dbp == 0) 603 continue; 604 if (tail("precision")) 605 break; 606 continue; 607 } 608 break; 609 } 610 while (isspace(*dbp)) 611 dbp++; 612 if (*dbp == 0) 613 continue; 614 switch (*dbp|' ') { 615 616 case 'f': 617 if (tail("function")) 618 getit(); 619 continue; 620 case 's': 621 if (tail("subroutine")) 622 getit(); 623 continue; 624 case 'p': 625 if (tail("program")) { 626 getit(); 627 continue; 628 } 629 if (tail("procedure")) 630 getit(); 631 continue; 632 } 633 } 634 return (pfcnt); 635 } 636 637 tail(cp) 638 char *cp; 639 { 640 register int len = 0; 641 642 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' ')) 643 cp++, len++; 644 if (*cp == 0) { 645 dbp += len; 646 return (1); 647 } 648 return (0); 649 } 650 651 takeprec() 652 { 653 654 while (isspace(*dbp)) 655 dbp++; 656 if (*dbp != '*') 657 return; 658 dbp++; 659 while (isspace(*dbp)) 660 dbp++; 661 if (!isdigit(*dbp)) { 662 --dbp; /* force failure */ 663 return; 664 } 665 do 666 dbp++; 667 while (isdigit(*dbp)); 668 } 669 670 getit() 671 { 672 register char *cp; 673 char c; 674 char nambuf[BUFSIZ]; 675 676 for (cp = lbuf; *cp; cp++) 677 ; 678 *--cp = 0; /* zap newline */ 679 while (isspace(*dbp)) 680 dbp++; 681 if (*dbp == 0 || !isalpha(*dbp)) 682 return; 683 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++) 684 continue; 685 c = cp[0]; 686 cp[0] = 0; 687 strcpy(nambuf, dbp); 688 cp[0] = c; 689 pfnote(nambuf, lineno, FALSE); 690 pfcnt++; 691 } 692 693 char * 694 savestr(cp) 695 char *cp; 696 { 697 register int len; 698 register char *dp; 699 700 len = strlen(cp); 701 dp = (char *)malloc(len+1); 702 strcpy(dp, cp); 703 return (dp); 704 } 705 706 /* 707 * Return the ptr in sp at which the character c last 708 * appears; NULL if not found 709 * 710 * Identical to v7 rindex, included for portability. 711 */ 712 713 char * 714 rindex(sp, c) 715 register char *sp, c; 716 { 717 register char *r; 718 719 r = NULL; 720 do { 721 if (*sp == c) 722 r = sp; 723 } while (*sp++); 724 return(r); 725 } 726