xref: /original-bsd/usr.bin/ctags/ctags.c (revision 6c57d260)
1 static char *sccsid = "@(#)ctags.c	4.3 (Berkeley) 11/24/80";
2 #include <stdio.h>
3 #include <ctype.h>
4 
5 /*
6  * ctags: create a tags file
7  */
8 
9 #define	reg	register
10 #define	logical	char
11 
12 #define	TRUE	(1)
13 #define	FALSE	(0)
14 
15 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
16 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
17 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
18 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
19 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
20 
21 #define	max(I1,I2)	(I1 > I2 ? I1 : I2)
22 
23 struct	nd_st {			/* sorting structure			*/
24 	char	*func;			/* function name		*/
25 	char	*file;			/* file name			*/
26 	int	lno;			/* for -x option		*/
27 	char	*pat;			/* search pattern		*/
28 	logical	been_warned;		/* set if noticed dup		*/
29 	struct	nd_st	*left,*right;	/* left and right sons		*/
30 };
31 
32 long	ftell();
33 typedef	struct	nd_st	NODE;
34 
35 logical	number,				/* T if on line starting with #	*/
36 	term	= FALSE,		/* T if print on terminal	*/
37 	makefile= TRUE,			/* T if to creat "tags" file	*/
38 	gotone,				/* found a func already on line	*/
39 					/* boolean "func" (see init)	*/
40 	_wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177];
41 
42 char	searchar = '?';			/* use ?...? searches 		*/
43 
44 int	lineno;				/* line number of current line */
45 char	line[4*BUFSIZ],		/* current input line			*/
46 	*curfile,		/* current input file name		*/
47 	*outfile= "tags",	/* output file				*/
48 	*white	= " \f\t\n",	/* white chars				*/
49 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
50 				/* token ending chars			*/
51 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
52 				/* token starting chars			*/
53 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789",				/* valid in-token chars			*/
54 	*notgd	= ",;";		/* non-valid after-function chars	*/
55 
56 int	file_num;		/* current file number			*/
57 int	aflag;			/* -a: append to tags */
58 int	uflag;			/* -u: update tags */
59 int	wflag;			/* -w: suppress warnings */
60 int	vflag;			/* -v: create vgrind style index output */
61 int	xflag;			/* -x: create cxref style output */
62 
63 char	lbuf[BUFSIZ];
64 
65 FILE	*inf,			/* ioptr for current input file		*/
66 	*outf;			/* ioptr for tags file			*/
67 
68 long	lineftell;		/* ftell after getc( inf ) == '\n' 	*/
69 
70 NODE	*head;			/* the head of the sorted binary tree	*/
71 
72 char	*savestr();
73 char	*rindex();
74 main(ac,av)
75 int	ac;
76 char	*av[];
77 {
78 	char cmd[100];
79 	int i;
80 
81 	while (ac > 1 && av[1][0] == '-') {
82 		for (i=1; av[1][i]; i++) {
83 			switch(av[1][i]) {
84 				case 'a':
85 					aflag++;
86 					break;
87 				case 'u':
88 					uflag++;
89 					break;
90 				case 'w':
91 					wflag++;
92 					break;
93 				case 'v':
94 					vflag++;
95 					xflag++;
96 					break;
97 				case 'x':
98 					xflag++;
99 					break;
100 				default:
101 					goto usage;
102 			}
103 		}
104 		ac--; av++;
105 	}
106 
107 	if (ac <= 1) {
108 		usage: printf("Usage: ctags [-au] file ...\n");
109 		exit(1);
110 	}
111 
112 	init();			/* set up boolean "functions"		*/
113 	/*
114 	 * loop through files finding functions
115 	 */
116 	for (file_num = 1; file_num < ac; file_num++)
117 		find_funcs(av[file_num]);
118 
119 	if (xflag) {
120 		put_funcs(head);
121 		exit(0);
122 	}
123 	if (uflag) {
124 		for (i=1; i<ac; i++) {
125 			sprintf(cmd,
126 				"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
127 				outfile, av[i], outfile);
128 			system(cmd);
129 		}
130 		aflag++;
131 	}
132 	outf = fopen(outfile, aflag ? "a" : "w");
133 	if (outf == NULL) {
134 		perror(outfile);
135 		exit(1);
136 	}
137 	put_funcs(head);
138 	fclose(outf);
139 	if (uflag) {
140 		sprintf(cmd, "sort %s -o %s", outfile, outfile);
141 		system(cmd);
142 	}
143 	exit(0);
144 }
145 
146 /*
147  * This routine sets up the boolean psuedo-functions which work
148  * by seting boolean flags dependent upon the corresponding character
149  * Every char which is NOT in that string is not a white char.  Therefore,
150  * all of the array "_wht" is set to FALSE, and then the elements
151  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
152  * of a char is TRUE if it is the string "white", else FALSE.
153  */
154 init()
155 {
156 
157 	reg	char	*sp;
158 	reg	int	i;
159 
160 	for (i = 0; i < 0177; i++) {
161 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
162 		_gd[i] = TRUE;
163 	}
164 	for (sp = white; *sp; sp++)
165 		_wht[*sp] = TRUE;
166 	for (sp = endtk; *sp; sp++)
167 		_etk[*sp] = TRUE;
168 	for (sp = intk; *sp; sp++)
169 		_itk[*sp] = TRUE;
170 	for (sp = begtk; *sp; sp++)
171 		_btk[*sp] = TRUE;
172 	for (sp = notgd; *sp; sp++)
173 		_gd[*sp] = FALSE;
174 }
175 
176 /*
177  * This routine opens the specified file and calls the function
178  * which finds the function definitions.
179  */
180 find_funcs(file)
181 char	*file;
182 {
183 	char *cp;
184 
185 	if ((inf=fopen(file,"r")) == NULL) {
186 		perror(file);
187 		return;
188 	}
189 	curfile = savestr(file);
190 	cp = rindex(file, '.');
191 	if (cp && (cp[1] != 'c' || cp[1] != 'h') && cp[2] == 0) {
192 		if (PF_funcs(inf) == 0) {
193 			rewind(inf);
194 			C_funcs();
195 		}
196 	} else
197 		C_funcs();
198 	fclose(inf);
199 }
200 
201 pfnote(name, ln)
202 	char *name;
203 {
204 	register char *fp;
205 	register NODE *np;
206 	char nbuf[BUFSIZ];
207 
208 	if ((np = (NODE *) malloc(sizeof (NODE))) == NULL) {
209 		fprintf(stderr, "ctags: too many functions to sort\n");
210 		put_funcs(head);
211 		free_tree(head);
212 		head = np = (NODE *) malloc(sizeof (NODE));
213 	}
214 	if (xflag == 0 && !strcmp(name, "main")) {
215 		fp = rindex(curfile, '/');
216 		if (fp == 0)
217 			fp = curfile;
218 		else
219 			fp++;
220 		sprintf(nbuf, "M%s", fp);
221 		fp = rindex(nbuf, '.');
222 		if (fp && fp[2] == 0)
223 			*fp = 0;
224 		name = nbuf;
225 	}
226 	np->func = savestr(name);
227 	np->file = curfile;
228 	np->lno = ln;
229 	np->left = np->right = 0;
230 	if (xflag == 0) {
231 		lbuf[50] = 0;
232 		strcat(lbuf, "$");
233 		lbuf[50] = 0;
234 	}
235 	np->pat = savestr(lbuf);
236 	if (head == NULL)
237 		head = np;
238 	else
239 		add_node(np, head);
240 }
241 
242 /*
243  * This routine finds functions in C syntax and adds them
244  * to the list.
245  */
246 C_funcs()
247 {
248 	register int c;
249 	register char *token, *tp;
250 	int incomm, inquote, inchar, midtoken, level;
251 	char *sp;
252 	char tok[BUFSIZ];
253 
254 	lineno = 1;
255 	number = gotone = midtoken = inquote = inchar = incomm = FALSE;
256 	level = 0;
257 	sp = tp = token = line;
258 	for (;;) {
259 		*sp=c=getc(inf);
260 		if (feof(inf))
261 			break;
262 		if (c == '\n')
263 			lineno++;
264 		if (c == '\\') {
265 			c = *++sp = getc(inf);
266 			if (c = '\n')
267 				c = ' ';
268 		} else if (incomm) {
269 			if (c == '*') {
270 				while ((*++sp=c=getc(inf)) == '*')
271 					continue;
272 				if (c == '\n')
273 					lineno++;
274 				if (c == '/')
275 					incomm = FALSE;
276 			}
277 		} else if (inquote) {
278 			/*
279 			 * Too dumb to know about \" not being magic, but
280 			 * they usually occur in pairs anyway.
281 			 */
282 			if (c == '"')
283 				inquote = FALSE;
284 			continue;
285 		} else if (inchar) {
286 			if (c == '\'')
287 				inchar = FALSE;
288 			continue;
289 		} else switch (c) {
290 		case '"':
291 			inquote = TRUE;
292 			continue;
293 		case '\'':
294 			inchar = TRUE;
295 			continue;
296 		case '/':
297 			if ((*++sp=c=getc(inf)) == '*')
298 				incomm = TRUE;
299 			else
300 				ungetc(*sp, inf);
301 			continue;
302 		case '#':
303 			if (sp == line)
304 				number = TRUE;
305 			continue;
306 		case '{':
307 			level++;
308 			continue;
309 		case '}':
310 			if (sp == line)
311 				level = 0;	/* reset */
312 			else
313 				level--;
314 			continue;
315 		}
316 		if (!level && !inquote && !incomm && gotone == 0) {
317 			if (midtoken) {
318 				if (endtoken(c)) {
319 					int pfline = lineno;
320 					if (start_func(&sp,token,tp)) {
321 						strncpy(tok,token,tp-token+1);
322 						tok[tp-token+1] = 0;
323 						getline();
324 						pfnote(tok, pfline);
325 						gotone = TRUE;
326 					}
327 					midtoken = FALSE;
328 					token = sp;
329 				} else if (intoken(c))
330 					tp++;
331 			} else if (begtoken(c)) {
332 				token = tp = sp;
333 				midtoken = TRUE;
334 			}
335 		}
336 		sp++;
337 		if (c == '\n' || sp > &line[sizeof (line) - BUFSIZ]) {
338 			tp = token = sp = line;
339 			lineftell = ftell(inf);
340 			number = gotone = midtoken = inquote = inchar = FALSE;
341 		}
342 	}
343 }
344 
345 /*
346  *	This routine  checks to see if the current token is
347  * at the start of a function.  It updates the input line
348  * so that the '(' will be in it when it returns.
349  */
350 start_func(lp,token,tp)
351 char	**lp,*token,*tp;
352 {
353 
354 	reg	char	c,*sp,*tsp;
355 	static	logical	found;
356 	logical	firsttok;		/* T if have seen first token in ()'s */
357 	int	bad;
358 
359 	sp = *lp;
360 	c = *sp;
361 	bad = FALSE;
362 	if (!number) {		/* space is not allowed in macro defs	*/
363 		while (iswhite(c)) {
364 			*++sp = c = getc(inf);
365 			if (c == '\n') {
366 				lineno++;
367 				if (sp > &line[sizeof (line) - BUFSIZ])
368 					goto ret;
369 			}
370 		}
371 	/* the following tries to make it so that a #define a b(c)	*/
372 	/* doesn't count as a define of b.				*/
373 	} else {
374 		logical	define;
375 
376 		define = TRUE;
377 		for (tsp = "define"; *tsp && token < tp; tsp++)
378 			if (*tsp != *token++) {
379 				define = FALSE;
380 				break;
381 			}
382 		if (define)
383 			found = 0;
384 		else
385 			found++;
386 		if (found >= 2) {
387 			gotone = TRUE;
388 badone:			bad = TRUE;
389 			goto ret;
390 		}
391 	}
392 	if (c != '(')
393 		goto badone;
394 	firsttok = FALSE;
395 	while ((*++sp=c=getc(inf)) != ')') {
396 		if (c == '\n') {
397 			lineno++;
398 			if (sp > &line[sizeof (line) - BUFSIZ])
399 				goto ret;
400 		}
401 		/*
402 		 * This line used to confuse ctags:
403 		 *	int	(*oldhup)();
404 		 * This fixes it. A nonwhite char before the first
405 		 * token, other than a / (in case of a comment in there)
406 		 * makes this not a declaration.
407 		 */
408 		if (begtoken(c) || c=='/') firsttok++;
409 		else if (!iswhite(c) && !firsttok) goto badone;
410 	}
411 	while (iswhite(*++sp=c=getc(inf)))
412 		if (c == '\n') {
413 			lineno++;
414 			if (sp > &line[sizeof (line) - BUFSIZ])
415 				break;
416 		}
417 ret:
418 	*lp = --sp;
419 	if (c == '\n')
420 		lineno--;
421 	ungetc(c,inf);
422 	return !bad && isgood(c);
423 }
424 
425 getline()
426 {
427 	long saveftell = ftell( inf );
428 	register char *cp;
429 
430 	fseek( inf , lineftell , 0 );
431 	fgets(lbuf, sizeof lbuf, inf);
432 	cp = rindex(lbuf, '\n');
433 	if (cp)
434 		*cp = 0;
435 	fseek(inf, saveftell, 0);
436 }
437 
438 free_tree(node)
439 NODE	*node;
440 {
441 
442 	while (node) {
443 		free_tree(node->right);
444 		cfree(node);
445 		node = node->left;
446 	}
447 }
448 
449 add_node(node, cur_node)
450 	NODE *node,*cur_node;
451 {
452 	register int dif;
453 
454 	dif = strcmp(node->func,cur_node->func);
455 	if (dif == 0) {
456 		if (node->file == cur_node->file) {
457 			if (!wflag) {
458 fprintf(stderr,"Duplicate function in file %s, line %d: %s\n",
459     node->file,lineno,node->func);
460 fprintf(stderr,"Second entry ignored\n");
461 			}
462 			return;
463 		}
464 		if (!cur_node->been_warned)
465 			if (!wflag)
466 fprintf(stderr,"Duplicate function in files %s and %s: %s (Warning only)\n",
467     node->file, cur_node->file, node->func);
468 		cur_node->been_warned = TRUE;
469 		return;
470 	}
471 	if (dif < 0) {
472 		if (cur_node->left != NULL)
473 			add_node(node,cur_node->left);
474 		else
475 			cur_node->left = node;
476 		return;
477 	}
478 	if (cur_node->right != NULL)
479 		add_node(node,cur_node->right);
480 	else
481 		cur_node->right = node;
482 }
483 
484 put_funcs(node)
485 reg NODE	*node;
486 {
487 	reg char	*sp;
488 
489 	if (node == NULL)
490 		return;
491 	put_funcs(node->left);
492 	if (xflag == 0) {
493 		fprintf(outf, "%s\t%s\t%c^", node->func, node->file ,searchar);
494 		for (sp = node->pat; *sp; sp++)
495 			if (*sp == '\\')
496 				fprintf(outf, "\\\\");
497 			else
498 				putc(*sp, outf);
499 		fprintf(outf, "%c\n", searchar);
500 	}
501 	else if (vflag)
502 		fprintf(stdout, "%s %s %d\n", node->func, node->file, (node->lno+63)/64);
503 	else
504 		fprintf(stdout, "%-16s%4d %-16s %s\n",
505 		    node->func, node->lno, node->file, node->pat);
506 	put_funcs(node->right);
507 }
508 
509 char	*dbp = lbuf;
510 int	pfcnt;
511 
512 PF_funcs(fi)
513 	FILE *fi;
514 {
515 
516 	lineno = 0;
517 	pfcnt = 0;
518 	while (fgets(lbuf, sizeof(lbuf), fi)) {
519 		lineno++;
520 		dbp = lbuf;
521 		if ( *dbp == '%' ) dbp++ ;	/* Ratfor escape to fortran */
522 		while (isspace(*dbp))
523 			dbp++;
524 		if (*dbp == 0)
525 			continue;
526 		switch (*dbp |' ') {
527 
528 		case 'i':
529 			if (tail("integer"))
530 				takeprec();
531 			break;
532 		case 'r':
533 			if (tail("real"))
534 				takeprec();
535 			break;
536 		case 'l':
537 			if (tail("logical"))
538 				takeprec();
539 			break;
540 		case 'c':
541 			if (tail("complex") || tail("character"))
542 				takeprec();
543 			break;
544 		case 'd':
545 			if (tail("double")) {
546 				while (isspace(*dbp))
547 					dbp++;
548 				if (*dbp == 0)
549 					continue;
550 				if (tail("precision"))
551 					break;
552 				continue;
553 			}
554 			break;
555 		}
556 		while (isspace(*dbp))
557 			dbp++;
558 		if (*dbp == 0)
559 			continue;
560 		switch (*dbp|' ') {
561 
562 		case 'f':
563 			if (tail("function"))
564 				getit();
565 			continue;
566 		case 's':
567 			if (tail("subroutine"))
568 				getit();
569 			continue;
570 		case 'p':
571 			if (tail("program")) {
572 				getit();
573 				continue;
574 			}
575 			if (tail("procedure"))
576 				getit();
577 			continue;
578 		}
579 	}
580 	return (pfcnt);
581 }
582 
583 tail(cp)
584 	char *cp;
585 {
586 	register int len = 0;
587 
588 	while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
589 		cp++, len++;
590 	if (*cp == 0) {
591 		dbp += len;
592 		return (1);
593 	}
594 	return (0);
595 }
596 
597 takeprec()
598 {
599 
600 	while (isspace(*dbp))
601 		dbp++;
602 	if (*dbp != '*')
603 		return;
604 	dbp++;
605 	while (isspace(*dbp))
606 		dbp++;
607 	if (!isdigit(*dbp)) {
608 		--dbp;		/* force failure */
609 		return;
610 	}
611 	do
612 		dbp++;
613 	while (isdigit(*dbp));
614 }
615 
616 getit()
617 {
618 	register char *cp;
619 	char c;
620 	char nambuf[BUFSIZ];
621 
622 	for (cp = lbuf; *cp; cp++)
623 		;
624 	*--cp = 0;	/* zap newline */
625 	while (isspace(*dbp))
626 		dbp++;
627 	if (*dbp == 0 || !isalpha(*dbp))
628 		return;
629 	for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
630 		continue;
631 	c = cp[0];
632 	cp[0] = 0;
633 	strcpy(nambuf, dbp);
634 	cp[0] = c;
635 	pfnote(nambuf, lineno);
636 	pfcnt++;
637 }
638 
639 char *
640 savestr(cp)
641 	char *cp;
642 {
643 	register int len;
644 	register char *dp;
645 
646 	len = strlen(cp);
647 	dp = (char *)malloc(len+1);
648 	strcpy(dp, cp);
649 	return (dp);
650 }
651 
652 /*
653  * Return the ptr in sp at which the character c last
654  * appears; NULL if not found
655  *
656  * Identical to v7 rindex, included for portability.
657  */
658 
659 char *
660 rindex(sp, c)
661 register char *sp, c;
662 {
663 	register char *r;
664 
665 	r = NULL;
666 	do {
667 		if (*sp == c)
668 			r = sp;
669 	} while (*sp++);
670 	return(r);
671 }
672