xref: /original-bsd/usr.bin/ctags/ctags.c (revision f82e54c4)
1 #ifndef	lint
2 static char *sccsid = "@(#)ctags.c	4.4 (Berkeley) 8/30/82";
3 #endif
4 
5 #include <stdio.h>
6 #include <ctype.h>
7 
8 /*
9  * ctags: create a tags file
10  */
11 
12 #define	reg	register
13 #define	logical	char
14 
15 #define	TRUE	(1)
16 #define	FALSE	(0)
17 
18 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
19 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
20 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
21 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
22 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
23 
24 #define	max(I1,I2)	(I1 > I2 ? I1 : I2)
25 
26 struct	nd_st {			/* sorting structure			*/
27 	char	*entry;			/* function or type name	*/
28 	char	*file;			/* file name			*/
29 	logical f;			/* use pattern or line no	*/
30 	int	lno;			/* for -x option		*/
31 	char	*pat;			/* search pattern		*/
32 	logical	been_warned;		/* set if noticed dup		*/
33 	struct	nd_st	*left,*right;	/* left and right sons		*/
34 };
35 
36 long	ftell();
37 typedef	struct	nd_st	NODE;
38 
39 logical	number,				/* T if on line starting with #	*/
40 	gotone,				/* found a func already on line	*/
41 					/* boolean "func" (see init)	*/
42 	_wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177];
43 
44 	/* typedefs are recognized using a simple finite automata,
45 	 * tydef is its state variable.
46 	 */
47 typedef enum {none, begin, middle, end } TYST;
48 
49 TYST tydef = none;
50 
51 char	searchar = '/';			/* use /.../ searches 		*/
52 
53 int	lineno;				/* line number of current line */
54 char	line[4*BUFSIZ],		/* current input line			*/
55 	*curfile,		/* current input file name		*/
56 	*outfile= "tags",	/* output file				*/
57 	*white	= " \f\t\n",	/* white chars				*/
58 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
59 				/* token ending chars			*/
60 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
61 				/* token starting chars			*/
62 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789",
63 				/* valid in-token chars			*/
64 	*notgd	= ",;";		/* non-valid after-function chars	*/
65 
66 int	file_num;		/* current file number			*/
67 int	aflag;			/* -a: append to tags */
68 int	tflag;			/* -t: create tags for typedefs */
69 int	uflag;			/* -u: update tags */
70 int	wflag;			/* -w: suppress warnings */
71 int	vflag;			/* -v: create vgrind style index output */
72 int	xflag;			/* -x: create cxref style output */
73 
74 char	lbuf[BUFSIZ];
75 
76 FILE	*inf,			/* ioptr for current input file		*/
77 	*outf;			/* ioptr for tags file			*/
78 
79 long	lineftell;		/* ftell after getc( inf ) == '\n' 	*/
80 
81 NODE	*head;			/* the head of the sorted binary tree	*/
82 
83 char	*savestr();
84 char	*rindex();
85 main(ac,av)
86 int	ac;
87 char	*av[];
88 {
89 	char cmd[100];
90 	int i;
91 
92 	while (ac > 1 && av[1][0] == '-') {
93 		for (i=1; av[1][i]; i++) {
94 			switch(av[1][i]) {
95 				case 'B':
96 					searchar='?';
97 					break;
98 				case 'F':
99 					searchar='/';
100 					break;
101 				case 'a':
102 					aflag++;
103 					break;
104 				case 't':
105 					tflag++;
106 					break;
107 				case 'u':
108 					uflag++;
109 					break;
110 				case 'w':
111 					wflag++;
112 					break;
113 				case 'v':
114 					vflag++;
115 					xflag++;
116 					break;
117 				case 'x':
118 					xflag++;
119 					break;
120 				default:
121 					goto usage;
122 			}
123 		}
124 		ac--; av++;
125 	}
126 
127 	if (ac <= 1) {
128 usage:		printf("Usage: ctags [-BFatuwvx] file ...\n");
129 		exit(1);
130 	}
131 
132 	init();			/* set up boolean "functions"		*/
133 	/*
134 	 * loop through files finding functions
135 	 */
136 	for (file_num = 1; file_num < ac; file_num++)
137 		find_entries(av[file_num]);
138 
139 	if (xflag) {
140 		put_entries(head);
141 		exit(0);
142 	}
143 	if (uflag) {
144 		for (i=1; i<ac; i++) {
145 			sprintf(cmd,
146 				"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
147 				outfile, av[i], outfile);
148 			system(cmd);
149 		}
150 		aflag++;
151 	}
152 	outf = fopen(outfile, aflag ? "a" : "w");
153 	if (outf == NULL) {
154 		perror(outfile);
155 		exit(1);
156 	}
157 	put_entries(head);
158 	fclose(outf);
159 	if (uflag) {
160 		sprintf(cmd, "sort %s -o %s", outfile, outfile);
161 		system(cmd);
162 	}
163 	exit(0);
164 }
165 
166 /*
167  * This routine sets up the boolean psuedo-functions which work
168  * by seting boolean flags dependent upon the corresponding character
169  * Every char which is NOT in that string is not a white char.  Therefore,
170  * all of the array "_wht" is set to FALSE, and then the elements
171  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
172  * of a char is TRUE if it is the string "white", else FALSE.
173  */
174 init()
175 {
176 
177 	reg	char	*sp;
178 	reg	int	i;
179 
180 	for (i = 0; i < 0177; i++) {
181 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
182 		_gd[i] = TRUE;
183 	}
184 	for (sp = white; *sp; sp++)
185 		_wht[*sp] = TRUE;
186 	for (sp = endtk; *sp; sp++)
187 		_etk[*sp] = TRUE;
188 	for (sp = intk; *sp; sp++)
189 		_itk[*sp] = TRUE;
190 	for (sp = begtk; *sp; sp++)
191 		_btk[*sp] = TRUE;
192 	for (sp = notgd; *sp; sp++)
193 		_gd[*sp] = FALSE;
194 }
195 
196 /*
197  * This routine opens the specified file and calls the function
198  * which finds the function and type definitions.
199  */
200 find_entries(file)
201 char	*file;
202 {
203 	char *cp;
204 
205 	if ((inf=fopen(file,"r")) == NULL) {
206 		perror(file);
207 		return;
208 	}
209 	curfile = savestr(file);
210 	cp = rindex(file, '.');
211 	if (cp && (cp[1] != 'c' || cp[1] != 'h') && cp[2] == 0) {
212 		if (PF_funcs(inf) == 0) {
213 			rewind(inf);
214 			C_entries();
215 		}
216 	} else
217 		C_entries();
218 	fclose(inf);
219 }
220 
221 pfnote(name, ln, f)
222 	char *name;
223 	logical f;			/* f == TRUE when function */
224 {
225 	register char *fp;
226 	register NODE *np;
227 	char nbuf[BUFSIZ];
228 
229 	if ((np = (NODE *) malloc(sizeof (NODE))) == NULL) {
230 		fprintf(stderr, "ctags: too many entries to sort\n");
231 		put_entries(head);
232 		free_tree(head);
233 		head = np = (NODE *) malloc(sizeof (NODE));
234 	}
235 	if (xflag == 0 && !strcmp(name, "main")) {
236 		fp = rindex(curfile, '/');
237 		if (fp == 0)
238 			fp = curfile;
239 		else
240 			fp++;
241 		sprintf(nbuf, "M%s", fp);
242 		fp = rindex(nbuf, '.');
243 		if (fp && fp[2] == 0)
244 			*fp = 0;
245 		name = nbuf;
246 	}
247 	np->entry = savestr(name);
248 	np->file = curfile;
249 	np->f = f;
250 	np->lno = ln;
251 	np->left = np->right = 0;
252 	if (xflag == 0) {
253 		lbuf[50] = 0;
254 		strcat(lbuf, "$");
255 		lbuf[50] = 0;
256 	}
257 	np->pat = savestr(lbuf);
258 	if (head == NULL)
259 		head = np;
260 	else
261 		add_node(np, head);
262 }
263 
264 /*
265  * This routine finds functions and typedefs in C syntax and adds them
266  * to the list.
267  */
268 C_entries()
269 {
270 	register int c;
271 	register char *token, *tp;
272 	logical incomm, inquote, inchar, midtoken;
273 	int level;
274 	char *sp;
275 	char tok[BUFSIZ];
276 
277 	lineno = 1;
278 	number = gotone = midtoken = inquote = inchar = incomm = FALSE;
279 	level = 0;
280 	sp = tp = token = line;
281 	for (;;) {
282 		*sp=c=getc(inf);
283 		if (feof(inf))
284 			break;
285 		if (c == '\n')
286 			lineno++;
287 		if (c == '\\') {
288 			c = *++sp = getc(inf);
289 			if (c = '\n')
290 				c = ' ';
291 		} else if (incomm) {
292 			if (c == '*') {
293 				while ((*++sp=c=getc(inf)) == '*')
294 					continue;
295 				if (c == '\n')
296 					lineno++;
297 				if (c == '/')
298 					incomm = FALSE;
299 			}
300 		} else if (inquote) {
301 			/*
302 			 * Too dumb to know about \" not being magic, but
303 			 * they usually occur in pairs anyway.
304 			 */
305 			if (c == '"')
306 				inquote = FALSE;
307 			continue;
308 		} else if (inchar) {
309 			if (c == '\'')
310 				inchar = FALSE;
311 			continue;
312 		} else switch (c) {
313 		case '"':
314 			inquote = TRUE;
315 			continue;
316 		case '\'':
317 			inchar = TRUE;
318 			continue;
319 		case '/':
320 			if ((*++sp=c=getc(inf)) == '*')
321 				incomm = TRUE;
322 			else
323 				ungetc(*sp, inf);
324 			continue;
325 		case '#':
326 			if (sp == line)
327 				number = TRUE;
328 			continue;
329 		case '{':
330 			if (tydef == begin) {
331 				tydef=middle;
332 			}
333 			level++;
334 			continue;
335 		case '}':
336 			if (sp == line)
337 				level = 0;	/* reset */
338 			else
339 				level--;
340 			if (!level && tydef==middle) {
341 				tydef=end;
342 			}
343 			continue;
344 		}
345 		if (!level && !inquote && !incomm && gotone == FALSE) {
346 			if (midtoken) {
347 				if (endtoken(c)) {
348 					int f;
349 					int pfline = lineno;
350 					if (start_entry(&sp,token,&f)) {
351 						strncpy(tok,token,tp-token+1);
352 						tok[tp-token+1] = 0;
353 						getline();
354 						pfnote(tok, pfline, f);
355 						gotone = f;	/* function */
356 					}
357 					midtoken = FALSE;
358 					token = sp;
359 				} else if (intoken(c))
360 					tp++;
361 			} else if (begtoken(c)) {
362 				token = tp = sp;
363 				midtoken = TRUE;
364 			}
365 		}
366 		if (c == ';'  &&  tydef==end)	/* clean with typedefs */
367 			tydef=none;
368 		sp++;
369 		if (c == '\n' || sp > &line[sizeof (line) - BUFSIZ]) {
370 			tp = token = sp = line;
371 			lineftell = ftell(inf);
372 			number = gotone = midtoken = inquote = inchar = FALSE;
373 		}
374 	}
375 }
376 
377 /*
378  * This routine  checks to see if the current token is
379  * at the start of a function, or corresponds to a typedef
380  * It updates the input line * so that the '(' will be
381  * in it when it returns.
382  */
383 start_entry(lp,token,f)
384 char	**lp;
385 register char *token;
386 int	*f;
387 {
388 
389 	reg	char	c,*sp;
390 	static	logical	found;
391 	logical	firsttok;		/* T if have seen first token in ()'s */
392 	int	bad;
393 
394 	*f = 1;			/* a function */
395 	sp = *lp;
396 	c = *sp;
397 	bad = FALSE;
398 	if (!number) {		/* space is not allowed in macro defs	*/
399 		while (iswhite(c)) {
400 			*++sp = c = getc(inf);
401 			if (c == '\n') {
402 				lineno++;
403 				if (sp > &line[sizeof (line) - BUFSIZ])
404 					goto ret;
405 			}
406 		}
407 	/* the following tries to make it so that a #define a b(c)	*/
408 	/* doesn't count as a define of b.				*/
409 	} else {
410 		if (!strncmp(token, "define", 6))
411 			found = 0;
412 		else
413 			found++;
414 		if (found >= 2) {
415 			gotone = TRUE;
416 badone:			bad = TRUE;
417 			goto ret;
418 		}
419 	}
420 	/* check for the typedef cases		*/
421 	if (tflag && !strncmp(token, "typedef", 7)) {
422 		tydef=begin;
423 		goto badone;
424 	}
425 	if (tydef==begin && (!strncmp(token, "struct", 6) ||
426 	    !strncmp(token, "union", 5) || !strncmp(token, "enum", 4))) {
427 		goto badone;
428 	}
429 	if (tydef==begin) {
430 		tydef=end;
431 		goto badone;
432 	}
433 	if (tydef==end) {
434 		*f = 0;
435 		goto ret;
436 	}
437 	if (c != '(')
438 		goto badone;
439 	firsttok = FALSE;
440 	while ((*++sp=c=getc(inf)) != ')') {
441 		if (c == '\n') {
442 			lineno++;
443 			if (sp > &line[sizeof (line) - BUFSIZ])
444 				goto ret;
445 		}
446 		/*
447 		 * This line used to confuse ctags:
448 		 *	int	(*oldhup)();
449 		 * This fixes it. A nonwhite char before the first
450 		 * token, other than a / (in case of a comment in there)
451 		 * makes this not a declaration.
452 		 */
453 		if (begtoken(c) || c=='/') firsttok++;
454 		else if (!iswhite(c) && !firsttok) goto badone;
455 	}
456 	while (iswhite(*++sp=c=getc(inf)))
457 		if (c == '\n') {
458 			lineno++;
459 			if (sp > &line[sizeof (line) - BUFSIZ])
460 				break;
461 		}
462 ret:
463 	*lp = --sp;
464 	if (c == '\n')
465 		lineno--;
466 	ungetc(c,inf);
467 	return !bad && (!*f || isgood(c));
468 					/* hack for typedefs */
469 }
470 
471 getline()
472 {
473 	long saveftell = ftell( inf );
474 	register char *cp;
475 
476 	fseek( inf , lineftell , 0 );
477 	fgets(lbuf, sizeof lbuf, inf);
478 	cp = rindex(lbuf, '\n');
479 	if (cp)
480 		*cp = 0;
481 	fseek(inf, saveftell, 0);
482 }
483 
484 free_tree(node)
485 NODE	*node;
486 {
487 
488 	while (node) {
489 		free_tree(node->right);
490 		cfree(node);
491 		node = node->left;
492 	}
493 }
494 
495 add_node(node, cur_node)
496 	NODE *node,*cur_node;
497 {
498 	register int dif;
499 
500 	dif = strcmp(node->entry, cur_node->entry);
501 	if (dif == 0) {
502 		if (node->file == cur_node->file) {
503 			if (!wflag) {
504 fprintf(stderr,"Duplicate entry in file %s, line %d: %s\n",
505     node->file,lineno,node->entry);
506 fprintf(stderr,"Second entry ignored\n");
507 			}
508 			return;
509 		}
510 		if (!cur_node->been_warned)
511 			if (!wflag)
512 fprintf(stderr,"Duplicate entry in files %s and %s: %s (Warning only)\n",
513     node->file, cur_node->file, node->entry);
514 		cur_node->been_warned = TRUE;
515 		return;
516 	}
517 	if (dif < 0) {
518 		if (cur_node->left != NULL)
519 			add_node(node,cur_node->left);
520 		else
521 			cur_node->left = node;
522 		return;
523 	}
524 	if (cur_node->right != NULL)
525 		add_node(node,cur_node->right);
526 	else
527 		cur_node->right = node;
528 }
529 
530 put_entries(node)
531 reg NODE	*node;
532 {
533 	reg char	*sp;
534 
535 	if (node == NULL)
536 		return;
537 	put_entries(node->left);
538 	if (xflag == 0)
539 		if (node->f) {		/* a function */
540 			fprintf(outf, "%s\t%s\t%c^",
541 				node->entry, node->file, searchar);
542 			for (sp = node->pat; *sp; sp++)
543 				if (*sp == '\\')
544 					fprintf(outf, "\\\\");
545 				else if (*sp == searchar)
546 					fprintf(outf, "\\%c", searchar);
547 				else
548 					putc(*sp, outf);
549 			fprintf(outf, "%c\n", searchar);
550 		} else {		/* a typedef; text pattern inadequate */
551 			fprintf(outf, "%s\t%s\t%d\n",
552 				node->entry, node->file, node->lno);
553 		}
554 	else if (vflag)
555 		fprintf(stdout, "%s %s %d\n",
556 				node->entry, node->file, (node->lno+63)/64);
557 	else
558 		fprintf(stdout, "%-16s%4d %-16s %s\n",
559 			node->entry, node->lno, node->file, node->pat);
560 	put_entries(node->right);
561 }
562 
563 char	*dbp = lbuf;
564 int	pfcnt;
565 
566 PF_funcs(fi)
567 	FILE *fi;
568 {
569 
570 	lineno = 0;
571 	pfcnt = 0;
572 	while (fgets(lbuf, sizeof(lbuf), fi)) {
573 		lineno++;
574 		dbp = lbuf;
575 		if ( *dbp == '%' ) dbp++ ;	/* Ratfor escape to fortran */
576 		while (isspace(*dbp))
577 			dbp++;
578 		if (*dbp == 0)
579 			continue;
580 		switch (*dbp |' ') {
581 
582 		case 'i':
583 			if (tail("integer"))
584 				takeprec();
585 			break;
586 		case 'r':
587 			if (tail("real"))
588 				takeprec();
589 			break;
590 		case 'l':
591 			if (tail("logical"))
592 				takeprec();
593 			break;
594 		case 'c':
595 			if (tail("complex") || tail("character"))
596 				takeprec();
597 			break;
598 		case 'd':
599 			if (tail("double")) {
600 				while (isspace(*dbp))
601 					dbp++;
602 				if (*dbp == 0)
603 					continue;
604 				if (tail("precision"))
605 					break;
606 				continue;
607 			}
608 			break;
609 		}
610 		while (isspace(*dbp))
611 			dbp++;
612 		if (*dbp == 0)
613 			continue;
614 		switch (*dbp|' ') {
615 
616 		case 'f':
617 			if (tail("function"))
618 				getit();
619 			continue;
620 		case 's':
621 			if (tail("subroutine"))
622 				getit();
623 			continue;
624 		case 'p':
625 			if (tail("program")) {
626 				getit();
627 				continue;
628 			}
629 			if (tail("procedure"))
630 				getit();
631 			continue;
632 		}
633 	}
634 	return (pfcnt);
635 }
636 
637 tail(cp)
638 	char *cp;
639 {
640 	register int len = 0;
641 
642 	while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
643 		cp++, len++;
644 	if (*cp == 0) {
645 		dbp += len;
646 		return (1);
647 	}
648 	return (0);
649 }
650 
651 takeprec()
652 {
653 
654 	while (isspace(*dbp))
655 		dbp++;
656 	if (*dbp != '*')
657 		return;
658 	dbp++;
659 	while (isspace(*dbp))
660 		dbp++;
661 	if (!isdigit(*dbp)) {
662 		--dbp;		/* force failure */
663 		return;
664 	}
665 	do
666 		dbp++;
667 	while (isdigit(*dbp));
668 }
669 
670 getit()
671 {
672 	register char *cp;
673 	char c;
674 	char nambuf[BUFSIZ];
675 
676 	for (cp = lbuf; *cp; cp++)
677 		;
678 	*--cp = 0;	/* zap newline */
679 	while (isspace(*dbp))
680 		dbp++;
681 	if (*dbp == 0 || !isalpha(*dbp))
682 		return;
683 	for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
684 		continue;
685 	c = cp[0];
686 	cp[0] = 0;
687 	strcpy(nambuf, dbp);
688 	cp[0] = c;
689 	pfnote(nambuf, lineno, FALSE);
690 	pfcnt++;
691 }
692 
693 char *
694 savestr(cp)
695 	char *cp;
696 {
697 	register int len;
698 	register char *dp;
699 
700 	len = strlen(cp);
701 	dp = (char *)malloc(len+1);
702 	strcpy(dp, cp);
703 	return (dp);
704 }
705 
706 /*
707  * Return the ptr in sp at which the character c last
708  * appears; NULL if not found
709  *
710  * Identical to v7 rindex, included for portability.
711  */
712 
713 char *
714 rindex(sp, c)
715 register char *sp, c;
716 {
717 	register char *r;
718 
719 	r = NULL;
720 	do {
721 		if (*sp == c)
722 			r = sp;
723 	} while (*sp++);
724 	return(r);
725 }
726