1 /* 2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* Copyright (c) 1988 AT&T */ 6 /* All Rights Reserved */ 7 8 /* 9 * Copyright (c) 1980 Regents of the University of California. 10 * All rights reserved. The Berkeley software License Agreement 11 * specifies the terms and conditions for redistribution. 12 */ 13 14 /* 15 * Modify ctags to handle C++ in C_entries(), etc: 16 * - Handles C++ comment token "//" 17 * - Handles C++ scope operator "::". 18 * This helps to distinguish between xyz() 19 * definition and X::xyz() definition. 20 * - Recognizes C++ reserved word "class" in typedef processing 21 * (for "-t" option) 22 * - Handles Sun C++ special file name extensions: .c, .C, .cc, and .cxx. 23 * - Handles overloaded unary/binary operator names 24 * Doesn't handle yet: 25 * - inline functions in class definition (currently they get 26 * swallowed within a class definition) 27 * - Tags with scope operator :: with spaces in between, 28 * e.g. classz ::afunc 29 * 30 * Enhance operator functions support: 31 * - Control flow involving operator tokens scanning are 32 * consistent with that of other function tokens - original 33 * hacking method for 2.0 is removed. This will accurately 34 * identify tags for declarations of the form 'operator+()' 35 * (bugid 1027806) as well as allowing spaces in between 36 * 'operator' and 'oprtk', e.g. 'operator + ()'. 37 * 38 */ 39 40 #ifndef lint 41 char copyright[] = "@(#) Copyright (c) 1980 Regents of the University of " 42 "California.\nAll rights reserved.\n"; 43 #endif 44 45 #include <stdio.h> 46 #include <ctype.h> 47 #include <locale.h> 48 #include <unistd.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <strings.h> 52 #include <limits.h> 53 #include <sys/types.h> 54 #include <sys/stat.h> 55 56 /* 57 * ctags: create a tags file 58 */ 59 60 #define bool char 61 62 #define TRUE (1) 63 #define FALSE (0) 64 65 #define CPFLAG 3 /* # of bytes in a flag */ 66 67 #define iswhite(arg) (_wht[arg]) /* T if char is white */ 68 #define begtoken(arg) (_btk[arg]) /* T if char can start token */ 69 #define intoken(arg) (_itk[arg]) /* T if char can be in token */ 70 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ 71 #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ 72 73 #define optoken(arg) (_opr[arg]) /* T if char can be */ 74 /* an overloaded operator token */ 75 76 #define max(I1, I2) (I1 > I2 ? I1 : I2) 77 78 struct nd_st { /* sorting structure */ 79 char *entry; /* function or type name */ 80 char *file; /* file name */ 81 bool f; /* use pattern or line no */ 82 int lno; /* for -x option */ 83 char *pat; /* search pattern */ 84 bool been_warned; /* set if noticed dup */ 85 struct nd_st *left, *right; /* left and right sons */ 86 }; 87 88 long ftell(); 89 typedef struct nd_st NODE; 90 91 static bool 92 number, /* T if on line starting with # */ 93 gotone, /* found a func already on line */ 94 /* boolean "func" (see init) */ 95 _wht[0177], _etk[0177], _itk[0177], _btk[0177], _gd[0177]; 96 97 /* boolean array for overloadable operator symbols */ 98 static bool _opr[0177]; 99 100 /* 101 * typedefs are recognized using a simple finite automata, 102 * tydef is its state variable. 103 */ 104 typedef enum {none, begin, begin_rec, begin_tag, middle, end } TYST; 105 106 static TYST tydef = none; 107 108 static char searchar = '/'; /* use /.../ searches */ 109 110 #define LINEBUFSIZ 4*BUFSIZ 111 112 static int lineno; /* line number of current line */ 113 static char 114 line[LINEBUFSIZ], /* current input line */ 115 *curfile, /* current input file name */ 116 *outfile = "tags", /* output file */ 117 *white = " \f\t\n", /* white chars */ 118 *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", 119 /* token ending chars */ 120 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", 121 /* token starting chars */ 122 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" 123 "0123456789", 124 /* valid in-token chars */ 125 *notgd = ",;"; /* non-valid after-function chars */ 126 127 static char *oprtk = " =-+%*/&|^~!<>[]()"; /* overloadable operators */ 128 129 static int file_num; /* current file number */ 130 static int aflag; /* -a: append to tags */ 131 132 #ifndef XPG4 /* XPG4: handle typedefs by default */ 133 static int tflag; /* -t: create tags for typedefs */ 134 #endif /* !XPG4 */ 135 136 static int uflag; /* -u: update tags */ 137 static int wflag; /* -w: suppress warnings */ 138 static int vflag; /* -v: create vgrind style index output */ 139 static int xflag; /* -x: create cxref style output */ 140 141 static char lbuf[LINEBUFSIZ]; 142 143 static FILE 144 *inf, /* ioptr for current input file */ 145 *outf; /* ioptr for tags file */ 146 147 static long lineftell; /* ftell after getc( inf ) == '\n' */ 148 149 static NODE *head; /* the head of the sorted binary tree */ 150 151 static int infile_fail; /* Count of bad opens. Fix bug ID #1082298 */ 152 153 static char *dbp = lbuf; 154 static int pfcnt; 155 156 static int mac; /* our modified argc, after parseargs() */ 157 static char **mav; /* our modified argv, after parseargs() */ 158 159 160 /* our local functions: */ 161 static void init(void); 162 static void find_entries(char *); 163 static void pfnote(char *, int, bool); 164 static void C_entries(void); 165 static int start_entry(char **, char *, int *); 166 static void Y_entries(void); 167 static char *toss_comment(char *); 168 static void getaline(long int); 169 static void free_tree(NODE *); 170 static void add_node(NODE *, NODE *); 171 static void put_entries(NODE *); 172 static int PF_funcs(FILE *); 173 static int tail(char *); 174 static void takeprec(void); 175 static void getit(void); 176 static char *savestr(char *); 177 static void L_funcs(FILE *); 178 static void L_getit(int); 179 static int striccmp(char *, char *); 180 static int first_char(void); 181 static void toss_yysec(void); 182 static void Usage(void); 183 static void parseargs(int, char **); 184 185 int 186 main(int ac, char *av[]) 187 { 188 int i; 189 char cmd[100]; 190 191 (void) setlocale(LC_ALL, ""); 192 #if !defined(TEXT_DOMAIN) 193 #define TEXT_DOMAIN "SYS_TEST" 194 #endif 195 (void) textdomain(TEXT_DOMAIN); 196 197 parseargs(ac, av); 198 199 while ((i = getopt(mac, mav, "aBFtuvwxf:")) != EOF) { 200 switch (i) { 201 case 'a': /* -a: Append output to existing tags file */ 202 aflag++; 203 break; 204 205 case 'B': /* -B: Use backward search patterns (?...?) */ 206 searchar = '?'; 207 break; 208 209 case 'F': /* -F: Use forward search patterns (/.../) */ 210 searchar = '/'; 211 break; 212 213 case 't': /* -t: Create tags for typedefs. */ 214 /* for XPG4 , we silently ignore "-t". */ 215 #ifndef XPG4 216 tflag++; 217 #endif /* !XPG4 */ 218 break; 219 220 case 'u': /* -u: Update the specified tags file */ 221 uflag++; 222 break; 223 224 case 'v': /* -v: Index listing on stdout */ 225 vflag++; 226 xflag++; 227 break; 228 229 case 'w': /* -w: Suppress warnings */ 230 wflag++; 231 break; 232 233 case 'x': /* -x: Produce a simple index */ 234 xflag++; 235 break; 236 237 case 'f': /* -f tagsfile: output to tagsfile */ 238 outfile = strdup(optarg); 239 break; 240 241 default: 242 Usage(); /* never returns */ 243 break; 244 } 245 } 246 247 /* if we didn't specify any source code to parse, complain and die. */ 248 if (optind == mac) { 249 Usage(); /* never returns */ 250 } 251 252 253 init(); /* set up boolean "functions" */ 254 /* 255 * loop through files finding functions 256 */ 257 for (file_num = optind; file_num < mac; file_num++) 258 find_entries(mav[file_num]); 259 260 if (xflag) { 261 put_entries(head); 262 exit(infile_fail > 0 ? 2 : 0); /* Fix for 1082298 */ 263 } 264 if (uflag) { 265 for (i = 1; i < mac; i++) { 266 (void) sprintf(cmd, 267 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", 268 outfile, mav[i], outfile); 269 (void) system(cmd); 270 } 271 aflag++; 272 } 273 outf = fopen(outfile, aflag ? "a" : "w"); 274 if (outf == NULL) { 275 perror(outfile); 276 exit(1); 277 } 278 put_entries(head); 279 (void) fclose(outf); 280 if (uflag) { 281 (void) sprintf(cmd, "sort %s -o %s", outfile, outfile); 282 (void) system(cmd); 283 } 284 return (infile_fail > 0 ? 2 : 0); /* Fix for #1082298 */ 285 } 286 287 /* 288 * This routine sets up the boolean psuedo-functions which work 289 * by seting boolean flags dependent upon the corresponding character 290 * Every char which is NOT in that string is not a white char. Therefore, 291 * all of the array "_wht" is set to FALSE, and then the elements 292 * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 293 * of a char is TRUE if it is the string "white", else FALSE. 294 */ 295 static void 296 init(void) 297 { 298 char *sp; 299 int i; 300 301 for (i = 0; i < 0177; i++) { 302 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; 303 _opr[i] = FALSE; /* initialize boolean */ 304 /* array of operator symbols */ 305 _gd[i] = TRUE; 306 } 307 for (sp = white; *sp; sp++) 308 _wht[*sp] = TRUE; 309 for (sp = endtk; *sp; sp++) 310 _etk[*sp] = TRUE; 311 for (sp = intk; *sp; sp++) 312 _itk[*sp] = TRUE; 313 for (sp = begtk; *sp; sp++) 314 _btk[*sp] = TRUE; 315 316 /* mark overloadable operator symbols */ 317 for (sp = oprtk; *sp; sp++) 318 _opr[*sp] = TRUE; 319 320 for (sp = notgd; *sp; sp++) 321 _gd[*sp] = FALSE; 322 } 323 324 /* 325 * This routine opens the specified file and calls the function 326 * which finds the function and type definitions. 327 */ 328 static void 329 find_entries(char *file) 330 { 331 char *cp; 332 struct stat st; 333 334 /* skip anything that isn't a regular file */ 335 if (stat(file, &st) == 0 && !S_ISREG(st.st_mode)) 336 return; 337 338 if ((inf = fopen(file, "r")) == NULL) { 339 perror(file); 340 infile_fail++; /* Count bad opens. ID #1082298 */ 341 return; 342 } 343 curfile = savestr(file); 344 lineno = 0; 345 cp = strrchr(file, '.'); 346 /* .l implies lisp or lex source code */ 347 if (cp && cp[1] == 'l' && cp[2] == '\0') { 348 if (strchr(";([", first_char()) != NULL) /* lisp */ 349 { 350 L_funcs(inf); 351 (void) fclose(inf); 352 return; 353 } else { /* lex */ 354 /* 355 * throw away all the code before the second "%%" 356 */ 357 toss_yysec(); 358 getaline(lineftell); 359 pfnote("yylex", lineno, TRUE); 360 toss_yysec(); 361 C_entries(); 362 (void) fclose(inf); 363 return; 364 } 365 } 366 /* .y implies a yacc file */ 367 if (cp && cp[1] == 'y' && cp[2] == '\0') { 368 toss_yysec(); 369 Y_entries(); 370 C_entries(); 371 (void) fclose(inf); 372 return; 373 } 374 375 /* 376 * Add in file name extension support for Sun C++ which 377 * permits .C/.c (AT&T), .cc (G++) and .cxx (Gloksp.) 378 */ 379 380 /* if not a .c, .C, .cc, .cxx or .h file, try fortran */ 381 if (cp && (cp[1] != 'C' && cp[1] != 'c' && cp[1] != 'h') && 382 cp[2] == '\0' && (strcmp(cp, ".cc") == 0) && 383 (strcmp(cp, ".cxx") == 0)) { 384 if (PF_funcs(inf) != 0) { 385 (void) fclose(inf); 386 return; 387 } 388 rewind(inf); /* no fortran tags found, try C */ 389 } 390 C_entries(); 391 (void) fclose(inf); 392 } 393 394 static void 395 pfnote(char *name, int ln, bool f) 396 { 397 char *fp; 398 NODE *np; 399 char *nametk; /* hold temporary tokens from name */ 400 char nbuf[BUFSIZ]; 401 402 if ((np = malloc(sizeof (NODE))) == NULL) { 403 (void) fprintf(stderr, 404 gettext("ctags: too many entries to sort\n")); 405 put_entries(head); 406 free_tree(head); 407 head = np = (NODE *) malloc(sizeof (NODE)); 408 } 409 if (xflag == 0 && (strcmp(name, "main") == 0)) { 410 fp = strrchr(curfile, '/'); 411 412 if (fp == 0) 413 fp = curfile; 414 else 415 fp++; 416 (void) sprintf(nbuf, "M%s", fp); 417 fp = strrchr(nbuf, '.'); 418 /* Chop off .cc and .cxx as well as .c, .h, etc */ 419 if (fp && ((fp[2] == 0) || (fp[2] == 'c' && fp[3] == 0) || 420 (fp[3] == 'x' && fp[4] == 0))) 421 *fp = 0; 422 name = nbuf; 423 } 424 425 /* remove in-between blanks operator function tags */ 426 if (strchr(name, ' ') != NULL) 427 { 428 (void) strcpy(name, strtok(name, " ")); 429 while ((nametk = strtok(0, " ")) != NULL) 430 (void) strcat(name, nametk); 431 } 432 np->entry = savestr(name); 433 np->file = curfile; 434 np->f = f; 435 np->lno = ln; 436 np->left = np->right = 0; 437 if (xflag == 0) { 438 lbuf[50] = 0; 439 (void) strcat(lbuf, "$"); 440 lbuf[50] = 0; 441 } 442 np->pat = savestr(lbuf); 443 if (head == NULL) 444 head = np; 445 else 446 add_node(np, head); 447 } 448 449 /* 450 * This routine finds functions and typedefs in C syntax and adds them 451 * to the list. 452 */ 453 static void 454 C_entries(void) 455 { 456 int c; 457 char *token, *tp; 458 bool incomm, inquote, inchar, midtoken, isoperator, optfound; 459 int level; 460 char *sp; 461 char tok[BUFSIZ]; 462 long int tokftell; 463 464 number = gotone = midtoken = inquote = inchar = 465 incomm = isoperator = optfound = FALSE; 466 467 level = 0; 468 sp = tp = token = line; 469 lineno++; 470 lineftell = tokftell = ftell(inf); 471 for (;;) { 472 *sp = c = getc(inf); 473 if (feof(inf)) 474 break; 475 if (c == '\n') { 476 lineftell = ftell(inf); 477 lineno++; 478 } else if (c == '\\') { 479 c = *++sp = getc(inf); 480 if ((c == '\n') || (c == EOF)) { /* c == EOF, 1091005 */ 481 lineftell = ftell(inf); 482 lineno++; 483 c = ' '; 484 } 485 } else if (incomm) { 486 if (c == '*') { 487 while ((*++sp = c = getc(inf)) == '*') 488 continue; 489 490 /* c == EOF 1091005 */ 491 if ((c == '\n') || (c == EOF)) { 492 lineftell = ftell(inf); 493 lineno++; 494 } 495 496 if (c == '/') 497 incomm = FALSE; 498 } 499 } else if (inquote) { 500 /* 501 * Too dumb to know about \" not being magic, but 502 * they usually occur in pairs anyway. 503 */ 504 if (c == '"') 505 inquote = FALSE; 506 continue; 507 } else if (inchar) { 508 if (c == '\'') 509 inchar = FALSE; 510 continue; 511 } else if (midtoken == TRUE) { /* if white space omitted */ 512 goto dotoken; 513 } else switch (c) { 514 case '"': 515 inquote = TRUE; 516 continue; 517 case '\'': 518 inchar = TRUE; 519 continue; 520 case '/': 521 *++sp = c = getc(inf); 522 /* Handles the C++ comment token "//" */ 523 if (c == '*') 524 incomm = TRUE; 525 else if (c == '/') { 526 /* 527 * Skip over all the characters after 528 * "//" until a newline character. Now also 529 * includes fix for 1091005, check for EOF. 530 */ 531 do { 532 c = getc(inf); 533 /* 1091005: */ 534 } while ((c != '\n') && (c != EOF)); 535 536 537 /* 538 * Fixed bugid 1030014 539 * Return the current position of the 540 * file after the newline. 541 */ 542 lineftell = ftell(inf); 543 lineno++; 544 *--sp = c; 545 } 546 else 547 (void) ungetc(*sp, inf); 548 continue; 549 case '#': 550 if (sp == line) 551 number = TRUE; 552 continue; 553 case '{': 554 if ((tydef == begin_rec) || (tydef == begin_tag)) { 555 tydef = middle; 556 } 557 level++; 558 continue; 559 case '}': 560 /* 561 * Heuristic for function or structure end; 562 * common for #ifdef/#else blocks to add extra "{" 563 */ 564 if (sp == line) 565 level = 0; /* reset */ 566 else 567 level--; 568 if (!level && tydef == middle) { 569 tydef = end; 570 } 571 if (!level && tydef == none) /* Fix for #1034126 */ 572 goto dotoken; 573 continue; 574 } 575 576 dotoken: 577 578 579 if (!level && !inquote && !incomm && gotone == FALSE) { 580 if (midtoken) { 581 if (endtoken(c)) { 582 583 /* 584 * 585 * ':' +---> ':' -> midtok 586 * 587 * +---> operator{+,-, etc} -> midtok 588 * (continue) 589 * +---> endtok 590 */ 591 /* 592 * Enhance operator function support and 593 * fix bugid 1027806 594 * 595 * For operator token, scanning will continue until 596 * '(' is found. Spaces between 'operater' and 597 * 'oprtk' are allowed (e.g. 'operator + ()'), but 598 * will be removed when the actual entry for the tag 599 * is made. 600 * Note that functions of the form 'operator ()(int)' 601 * will be recognized, but 'operator ()' will not, 602 * even though this is legitimate in C. 603 */ 604 605 if (optoken(c)) { 606 if (isoperator) { 607 if (optfound) { 608 if (c != '(') { 609 tp++; 610 goto next_char; 611 } 612 } else { 613 if (c != ' ') { 614 optfound = TRUE; 615 } 616 tp++; 617 goto next_char; 618 } 619 } else { 620 /* start: this code shifted left for cstyle */ 621 char *backptr = tp - 7; 622 if (strncmp(backptr, "operator", 8) == 0) { 623 /* This is an overloaded operator */ 624 isoperator = TRUE; 625 if (c != ' ') { 626 optfound = TRUE; 627 } 628 629 tp++; 630 goto next_char; 631 } else if (c == '~') { 632 /* This is a destructor */ 633 tp++; 634 goto next_char; 635 } 636 /* end: above code shifted left for cstyle */ 637 } 638 } else if (c == ':') { 639 if ((*++sp = getc(inf)) == ':') { 640 tp += 2; 641 c = *sp; 642 goto next_char; 643 } else { 644 (void) ungetc (*sp, inf); 645 --sp; 646 } 647 } 648 649 /* start: this code shifted left for cstyle */ 650 { 651 int f; 652 int pfline = lineno; 653 654 if (start_entry(&sp, token, &f)) { 655 (void) strncpy(tok, token, tp-token+1); 656 tok[tp-token+1] = 0; 657 getaline(tokftell); 658 pfnote(tok, pfline, f); 659 gotone = f; /* function */ 660 } 661 662 isoperator = optfound = midtoken = FALSE; 663 token = sp; 664 } 665 /* end: above code shifted left for cstyle */ 666 } else if (intoken(c)) 667 tp++; 668 } else if (begtoken(c)) { 669 token = tp = sp; 670 midtoken = TRUE; 671 tokftell = lineftell; 672 } 673 } 674 next_char: 675 if (c == ';' && tydef == end) /* clean with typedefs */ 676 tydef = none; 677 sp++; 678 /* The "c == }" was added to fix #1034126 */ 679 if (c == '\n' ||c == '}'|| sp > &line[sizeof (line) - BUFSIZ]) { 680 tp = token = sp = line; 681 number = gotone = midtoken = inquote = 682 inchar = isoperator = optfound = FALSE; 683 } 684 } 685 } 686 687 /* 688 * This routine checks to see if the current token is 689 * at the start of a function, or corresponds to a typedef 690 * It updates the input line * so that the '(' will be 691 * in it when it returns. 692 */ 693 static int 694 start_entry(char **lp, char *token, int *f) 695 { 696 char *sp; 697 int c; 698 static bool found; 699 bool firsttok; /* T if have seen first token in ()'s */ 700 int bad; 701 702 *f = 1; /* a function */ 703 sp = *lp; 704 c = *sp; 705 bad = FALSE; 706 if (!number) { /* space is not allowed in macro defs */ 707 while (iswhite(c)) { 708 *++sp = c = getc(inf); 709 if ((c == '\n') || (c == EOF)) { /* c==EOF, #1091005 */ 710 lineno++; 711 lineftell = ftell(inf); 712 if (sp > &line[sizeof (line) - BUFSIZ]) 713 goto ret; 714 } 715 } 716 /* the following tries to make it so that a #define a b(c) */ 717 /* doesn't count as a define of b. */ 718 } else { 719 if (strncmp(token, "define", 6) == 0) 720 found = 0; 721 else 722 found++; 723 if (found >= 2) { 724 gotone = TRUE; 725 badone: bad = TRUE; 726 goto ret; 727 } 728 } 729 /* check for the typedef cases */ 730 #ifdef XPG4 731 if (strncmp(token, "typedef", 7) == 0) { 732 #else /* !XPG4 */ 733 if (tflag && (strncmp(token, "typedef", 7) == 0)) { 734 #endif /* XPG4 */ 735 tydef = begin; 736 goto badone; 737 } 738 /* Handles 'class' besides 'struct' etc. */ 739 if (tydef == begin && ((strncmp(token, "struct", 6) == 0) || 740 (strncmp(token, "class", 5) == 0) || 741 (strncmp(token, "union", 5) == 0)|| 742 (strncmp(token, "enum", 4) == 0))) { 743 tydef = begin_rec; 744 goto badone; 745 } 746 if (tydef == begin) { 747 tydef = end; 748 goto badone; 749 } 750 if (tydef == begin_rec) { 751 tydef = begin_tag; 752 goto badone; 753 } 754 if (tydef == begin_tag) { 755 tydef = end; 756 goto gottydef; /* Fall through to "tydef==end" */ 757 } 758 759 gottydef: 760 if (tydef == end) { 761 *f = 0; 762 goto ret; 763 } 764 if (c != '(') 765 goto badone; 766 firsttok = FALSE; 767 while ((*++sp = c = getc(inf)) != ')') { 768 if ((c == '\n') || (c == EOF)) { /* c == EOF Fix for #1091005 */ 769 lineftell = ftell(inf); 770 lineno++; 771 if (sp > &line[sizeof (line) - BUFSIZ]) 772 goto ret; 773 } 774 /* 775 * This line used to confuse ctags: 776 * int (*oldhup)(); 777 * This fixes it. A nonwhite char before the first 778 * token, other than a / (in case of a comment in there) 779 * makes this not a declaration. 780 */ 781 if (begtoken(c) || c == '/') 782 firsttok = TRUE; 783 else if (!iswhite(c) && !firsttok) 784 goto badone; 785 } 786 while (iswhite(*++sp = c = getc(inf))) 787 if ((c == '\n') || (c == EOF)) { /* c == EOF fix for #1091005 */ 788 lineno++; 789 lineftell = ftell(inf); 790 if (sp > &line[sizeof (line) - BUFSIZ]) 791 break; 792 } 793 ret: 794 *lp = --sp; 795 if (c == '\n') 796 lineno--; 797 (void) ungetc(c, inf); 798 return (!bad && (!*f || isgood(c))); 799 /* hack for typedefs */ 800 } 801 802 /* 803 * Y_entries: 804 * Find the yacc tags and put them in. 805 */ 806 static void 807 Y_entries(void) 808 { 809 char *sp, *orig_sp; 810 int brace; 811 bool in_rule = FALSE; 812 size_t toklen; 813 char tok[LINEBUFSIZ]; 814 815 brace = 0; 816 getaline(lineftell); 817 pfnote("yyparse", lineno, TRUE); 818 while (fgets(line, sizeof (line), inf) != NULL) { 819 for (sp = line; *sp; sp++) { 820 switch (*sp) { 821 case '\n': 822 lineno++; 823 /* FALLTHROUGH */ 824 case ' ': 825 case '\t': 826 case '\f': 827 case '\r': 828 break; 829 case '"': 830 case '\'': { 831 char start = *sp; 832 sp++; 833 834 while ((*sp != '\0') && (*sp != start)) { 835 if (*sp == '\\') 836 sp++; /* Skip escaped thing */ 837 sp++; 838 } 839 840 if (*sp == '\0') 841 sp--; 842 break; 843 } 844 case '/': 845 if (*++sp == '*') 846 sp = toss_comment(sp); 847 else 848 --sp; 849 break; 850 case '{': 851 brace++; 852 break; 853 case '}': 854 brace--; 855 break; 856 case '%': 857 if (sp[1] == '%' && sp == line) 858 return; 859 break; 860 case '|': 861 case ';': 862 in_rule = FALSE; 863 break; 864 default: 865 if (brace == 0 && !in_rule && (isalpha(*sp) || 866 *sp == '.' || 867 *sp == '_')) { 868 orig_sp = sp; 869 ++sp; 870 while (isalnum(*sp) || *sp == '_' || 871 *sp == '.') 872 sp++; 873 toklen = sp - orig_sp; 874 while (isspace(*sp)) 875 sp++; 876 if (*sp == ':' || (*sp == '\0' && 877 first_char() == ':')) { 878 (void) strncpy(tok, 879 orig_sp, toklen); 880 tok[toklen] = '\0'; 881 (void) strcpy(lbuf, line); 882 lbuf[strlen(lbuf) - 1] = '\0'; 883 pfnote(tok, lineno, TRUE); 884 in_rule = TRUE; 885 /* 886 * if we read NUL, leave it so 887 * we read the next line 888 */ 889 if (*sp == '\0') 890 sp--; 891 } else { 892 sp--; 893 } 894 } 895 break; 896 } 897 } 898 } 899 } 900 901 static char * 902 toss_comment(char *start) 903 { 904 char *sp; 905 906 /* 907 * first, see if the end-of-comment is on the same line 908 */ 909 do { 910 while ((sp = strchr(start, '*')) != NULL) 911 if (sp[1] == '/') 912 return (++sp); 913 else 914 start = (++sp); 915 start = line; 916 lineno++; 917 } while (fgets(line, sizeof (line), inf) != NULL); 918 919 /* 920 * running this through lint revealed that the original version 921 * of this routine didn't explicitly return something; while 922 * the return value was always used!. so i've added this 923 * next line. 924 */ 925 return (sp); 926 } 927 928 static void 929 getaline(long int where) 930 { 931 long saveftell = ftell(inf); 932 char *cp; 933 934 (void) fseek(inf, where, 0); 935 (void) fgets(lbuf, sizeof (lbuf), inf); 936 cp = strrchr(lbuf, '\n'); 937 if (cp) 938 *cp = 0; 939 (void) fseek(inf, saveftell, 0); 940 } 941 942 static void 943 free_tree(NODE *node) 944 { 945 NODE *next; 946 while (node) { 947 free_tree(node->right); 948 next = node->left; 949 free(node); 950 node = next; 951 } 952 } 953 954 static void 955 add_node(NODE *node, NODE *cur_node) 956 { 957 int dif; 958 959 dif = strcmp(node->entry, cur_node->entry); 960 if (dif == 0) { 961 if (node->file == cur_node->file) { 962 if (!wflag) { 963 (void) fprintf(stderr, 964 gettext("Duplicate entry in file %s, line %d: %s\n"), 965 node->file, lineno, node->entry); 966 (void) fprintf(stderr, 967 gettext("Second entry ignored\n")); 968 } 969 return; 970 } 971 if (!cur_node->been_warned) 972 if (!wflag) { 973 (void) fprintf(stderr, gettext("Duplicate " 974 "entry in files %s and %s: %s " 975 "(Warning only)\n"), 976 node->file, cur_node->file, 977 node->entry); 978 } 979 cur_node->been_warned = TRUE; 980 return; 981 } 982 983 if (dif < 0) { 984 if (cur_node->left != NULL) 985 add_node(node, cur_node->left); 986 else 987 cur_node->left = node; 988 return; 989 } 990 if (cur_node->right != NULL) 991 add_node(node, cur_node->right); 992 else 993 cur_node->right = node; 994 } 995 996 static void 997 put_entries(NODE *node) 998 { 999 char *sp; 1000 1001 if (node == NULL) 1002 return; 1003 put_entries(node->left); 1004 1005 /* 1006 * while the code in the following #ifdef section could be combined, 1007 * it's explicitly separated here to make maintainance easier. 1008 */ 1009 #ifdef XPG4 1010 /* 1011 * POSIX 2003: we no longer have a "-t" flag; the logic is 1012 * automatically assumed to be "turned on" here. 1013 */ 1014 if (xflag == 0) { 1015 (void) fprintf(outf, "%s\t%s\t%c^", 1016 node->entry, node->file, searchar); 1017 for (sp = node->pat; *sp; sp++) 1018 if (*sp == '\\') 1019 (void) fprintf(outf, "\\\\"); 1020 else if (*sp == searchar) 1021 (void) fprintf(outf, "\\%c", searchar); 1022 else 1023 (void) putc(*sp, outf); 1024 (void) fprintf(outf, "%c\n", searchar); 1025 } else if (vflag) 1026 (void) fprintf(stdout, "%s %s %d\n", 1027 node->entry, node->file, (node->lno+63)/64); 1028 else 1029 (void) fprintf(stdout, "%-16s %4d %-16s %s\n", 1030 node->entry, node->lno, node->file, node->pat); 1031 #else /* XPG4 */ 1032 /* 1033 * original way of doing things. "-t" logic is only turned on 1034 * when the user has specified it via a command-line argument. 1035 */ 1036 if (xflag == 0) 1037 if (node->f) { /* a function */ 1038 (void) fprintf(outf, "%s\t%s\t%c^", 1039 node->entry, node->file, searchar); 1040 for (sp = node->pat; *sp; sp++) 1041 if (*sp == '\\') 1042 (void) fprintf(outf, "\\\\"); 1043 else if (*sp == searchar) 1044 (void) fprintf(outf, "\\%c", searchar); 1045 else 1046 (void) putc(*sp, outf); 1047 (void) fprintf(outf, "%c\n", searchar); 1048 } else { /* a typedef; text pattern inadequate */ 1049 (void) fprintf(outf, "%s\t%s\t%d\n", 1050 node->entry, node->file, node->lno); 1051 } else if (vflag) 1052 (void) fprintf(stdout, "%s %s %d\n", 1053 node->entry, node->file, (node->lno+63)/64); 1054 else 1055 (void) fprintf(stdout, "%-16s %4d %-16s %s\n", 1056 node->entry, node->lno, node->file, node->pat); 1057 #endif /* XPG4 */ 1058 put_entries(node->right); 1059 } 1060 1061 1062 static int 1063 PF_funcs(FILE *fi) 1064 { 1065 1066 pfcnt = 0; 1067 while (fgets(lbuf, sizeof (lbuf), fi)) { 1068 lineno++; 1069 dbp = lbuf; 1070 if (*dbp == '%') dbp++; /* Ratfor escape to fortran */ 1071 while (isspace(*dbp)) 1072 dbp++; 1073 if (*dbp == 0) 1074 continue; 1075 switch (*dbp |' ') { 1076 1077 case 'i': 1078 if (tail("integer")) 1079 takeprec(); 1080 break; 1081 case 'r': 1082 if (tail("real")) 1083 takeprec(); 1084 break; 1085 case 'l': 1086 if (tail("logical")) 1087 takeprec(); 1088 break; 1089 case 'c': 1090 if (tail("complex") || tail("character")) 1091 takeprec(); 1092 break; 1093 case 'd': 1094 if (tail("double")) { 1095 while (isspace(*dbp)) 1096 dbp++; 1097 if (*dbp == 0) 1098 continue; 1099 if (tail("precision")) 1100 break; 1101 continue; 1102 } 1103 break; 1104 } 1105 while (isspace(*dbp)) 1106 dbp++; 1107 if (*dbp == 0) 1108 continue; 1109 switch (*dbp|' ') { 1110 1111 case 'f': 1112 if (tail("function")) 1113 getit(); 1114 continue; 1115 case 's': 1116 if (tail("subroutine")) 1117 getit(); 1118 continue; 1119 case 'p': 1120 if (tail("program")) { 1121 getit(); 1122 continue; 1123 } 1124 if (tail("procedure")) 1125 getit(); 1126 continue; 1127 } 1128 } 1129 return (pfcnt); 1130 } 1131 1132 static int 1133 tail(char *cp) 1134 { 1135 int len = 0; 1136 1137 while (*cp && (*cp&~' ') == ((*(dbp+len))&~' ')) 1138 cp++, len++; 1139 if (*cp == 0) { 1140 dbp += len; 1141 return (1); 1142 } 1143 return (0); 1144 } 1145 1146 static void 1147 takeprec(void) 1148 { 1149 while (isspace(*dbp)) 1150 dbp++; 1151 if (*dbp != '*') 1152 return; 1153 dbp++; 1154 while (isspace(*dbp)) 1155 dbp++; 1156 if (!isdigit(*dbp)) { 1157 --dbp; /* force failure */ 1158 return; 1159 } 1160 do 1161 dbp++; 1162 while (isdigit(*dbp)); 1163 } 1164 1165 static void 1166 getit(void) 1167 { 1168 char *cp; 1169 char c; 1170 char nambuf[LINEBUFSIZ]; 1171 1172 for (cp = lbuf; *cp; cp++) 1173 ; 1174 *--cp = 0; /* zap newline */ 1175 while (isspace(*dbp)) 1176 dbp++; 1177 if (*dbp == 0 || !isalpha(*dbp) || !isascii(*dbp)) 1178 return; 1179 for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++) 1180 continue; 1181 c = cp[0]; 1182 cp[0] = 0; 1183 (void) strcpy(nambuf, dbp); 1184 cp[0] = c; 1185 pfnote(nambuf, lineno, TRUE); 1186 pfcnt++; 1187 } 1188 1189 static char * 1190 savestr(char *cp) 1191 { 1192 int len; 1193 char *dp; 1194 1195 len = strlen(cp); 1196 dp = (char *)malloc(len+1); 1197 (void) strcpy(dp, cp); 1198 1199 return (dp); 1200 } 1201 1202 /* 1203 * lisp tag functions 1204 * just look for (def or (DEF 1205 */ 1206 1207 static void 1208 L_funcs(FILE *fi) 1209 { 1210 int special; 1211 1212 pfcnt = 0; 1213 while (fgets(lbuf, sizeof (lbuf), fi)) { 1214 lineno++; 1215 dbp = lbuf; 1216 if (dbp[0] == '(' && 1217 (dbp[1] == 'D' || dbp[1] == 'd') && 1218 (dbp[2] == 'E' || dbp[2] == 'e') && 1219 (dbp[3] == 'F' || dbp[3] == 'f')) { 1220 dbp += 4; 1221 if (striccmp(dbp, "method") == 0 || 1222 striccmp(dbp, "wrapper") == 0 || 1223 striccmp(dbp, "whopper") == 0) 1224 special = TRUE; 1225 else 1226 special = FALSE; 1227 while (!isspace(*dbp)) 1228 dbp++; 1229 while (isspace(*dbp)) 1230 dbp++; 1231 L_getit(special); 1232 } 1233 } 1234 } 1235 1236 static void 1237 L_getit(int special) 1238 { 1239 char *cp; 1240 char c; 1241 char nambuf[LINEBUFSIZ]; 1242 1243 for (cp = lbuf; *cp; cp++) 1244 continue; 1245 *--cp = 0; /* zap newline */ 1246 if (*dbp == 0) 1247 return; 1248 if (special) { 1249 if ((cp = strchr(dbp, ')')) == NULL) 1250 return; 1251 while (cp >= dbp && *cp != ':') 1252 cp--; 1253 if (cp < dbp) 1254 return; 1255 dbp = cp; 1256 while (*cp && *cp != ')' && *cp != ' ') 1257 cp++; 1258 } 1259 else 1260 for (cp = dbp + 1; *cp && *cp != '(' && *cp != ' '; cp++) 1261 continue; 1262 c = cp[0]; 1263 cp[0] = 0; 1264 (void) strcpy(nambuf, dbp); 1265 cp[0] = c; 1266 pfnote(nambuf, lineno, TRUE); 1267 pfcnt++; 1268 } 1269 1270 /* 1271 * striccmp: 1272 * Compare two strings over the length of the second, ignoring 1273 * case distinctions. If they are the same, return 0. If they 1274 * are different, return the difference of the first two different 1275 * characters. It is assumed that the pattern (second string) is 1276 * completely lower case. 1277 */ 1278 static int 1279 striccmp(char *str, char *pat) 1280 { 1281 int c1; 1282 1283 while (*pat) { 1284 if (isupper(*str)) 1285 c1 = tolower(*str); 1286 else 1287 c1 = *str; 1288 if (c1 != *pat) 1289 return (c1 - *pat); 1290 pat++; 1291 str++; 1292 } 1293 return (0); 1294 } 1295 1296 /* 1297 * first_char: 1298 * Return the first non-blank character in the file. After 1299 * finding it, rewind the input file so we start at the beginning 1300 * again. 1301 */ 1302 static int 1303 first_char(void) 1304 { 1305 int c; 1306 long off; 1307 1308 off = ftell(inf); 1309 while ((c = getc(inf)) != EOF) 1310 if (!isspace(c) && c != '\r') { 1311 (void) fseek(inf, off, 0); 1312 return (c); 1313 } 1314 (void) fseek(inf, off, 0); 1315 return (EOF); 1316 } 1317 1318 /* 1319 * toss_yysec: 1320 * Toss away code until the next "%%" line. 1321 */ 1322 static void 1323 toss_yysec(void) 1324 { 1325 char buf[BUFSIZ]; 1326 1327 for (;;) { 1328 lineftell = ftell(inf); 1329 if (fgets(buf, BUFSIZ, inf) == NULL) 1330 return; 1331 lineno++; 1332 if (strncmp(buf, "%%", 2) == 0) 1333 return; 1334 } 1335 } 1336 1337 static void 1338 Usage(void) 1339 { 1340 #ifdef XPG4 1341 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFuvw] " 1342 #else /* !XPG4 */ 1343 (void) fprintf(stderr, gettext("Usage:\tctags [-aBFtuvw] " 1344 #endif /* XPG4 */ 1345 "[-f tagsfile] file ...\n")); 1346 (void) fprintf(stderr, gettext("OR:\tctags [-x] file ...\n")); 1347 exit(1); 1348 } 1349 1350 1351 /* 1352 * parseargs(): modify the args 1353 * the purpose of this routine is to transform any ancient argument 1354 * usage into a format which is acceptable to getopt(3C), so that we 1355 * retain backwards Solaris 2.[0-4] compatibility. 1356 * 1357 * This routine allows us to make full use of getopts, without any 1358 * funny argument processing in main(). 1359 * 1360 * The other alternative would be to hand-craft the processed arguments 1361 * during and after getopt(3C) - which usually leads to uglier code 1362 * in main(). I've opted to keep the ugliness isolated down here, 1363 * instead of in main(). 1364 * 1365 * In a nutshell, if the user has used the old Solaris syntax of: 1366 * ctags [-aBFtuvwx] [-f tagsfile] filename ... 1367 * We simply change this into: 1368 * ctags [-a] [-B] [-F] [-t] [-u] [-v] [-w] [-x] [-f tags] file... 1369 * 1370 * If the user has specified the new getopt(3C) syntax, we merely 1371 * copy that into our modified argument space. 1372 */ 1373 static void 1374 parseargs(int ac, char **av) 1375 { 1376 int i; /* current argument */ 1377 int a; /* used to parse combined arguments */ 1378 int fflag; /* 1 = we're only parsing filenames */ 1379 size_t sz; /* size of the argument */ 1380 size_t mav_sz; /* size of our psuedo argument space */ 1381 1382 i = mac = fflag = 0; /* proper initializations */ 1383 1384 mav_sz = ((ac + 1) * sizeof (char *)); 1385 if ((mav = malloc(mav_sz)) == (char **)NULL) { 1386 perror("Can't malloc argument space"); 1387 exit(1); 1388 } 1389 1390 /* for each argument, see if we need to change things: */ 1391 for (; (av[i] != NULL) && (av[i][0] != '\0'); i++) { 1392 1393 if (strcmp(av[i], "--") == 0) { 1394 fflag = 1; /* just handle filenames now */ 1395 } 1396 1397 sz = strlen(&av[i][0]); /* get this arg's size */ 1398 1399 /* 1400 * if the argument starts with a "-", and has more than 1401 * 1 flag, then we have to search through each character, 1402 * and separate any flags which have been combined. 1403 * 1404 * so, if we've found a "-" string which needs separating: 1405 */ 1406 if (fflag == 0 && /* not handling filename args */ 1407 av[i][0] == '-' && /* and this is a flag */ 1408 sz > 2) { /* and there's more than 1 flag */ 1409 /* then for each flag after the "-" sign: */ 1410 for (a = 1; av[i][a]; a++) { 1411 /* copy the flag into mav space. */ 1412 if (a > 1) { 1413 /* 1414 * we need to call realloc() after the 1415 * 1st combined flag, because "ac" 1416 * doesn't include combined args. 1417 */ 1418 mav_sz += sizeof (char *); 1419 if ((mav = realloc(mav, mav_sz)) == 1420 (char **)NULL) { 1421 perror("Can't realloc " 1422 "argument space"); 1423 exit(1); 1424 } 1425 } 1426 1427 if ((mav[mac] = malloc((size_t)CPFLAG)) == 1428 (char *)NULL) { 1429 perror("Can't malloc argument space"); 1430 exit(1); 1431 } 1432 (void) sprintf(mav[mac], "-%c", av[i][a]); 1433 ++mac; 1434 } 1435 } else { 1436 /* otherwise, just copy the argument: */ 1437 if ((mav[mac] = malloc(sz + 1)) == (char *)NULL) { 1438 perror("Can't malloc argument space"); 1439 exit(1); 1440 } 1441 (void) strcpy(mav[mac], av[i]); 1442 ++mac; 1443 } 1444 } 1445 1446 mav[mac] = (char *)NULL; 1447 } 1448