xref: /original-bsd/usr.bin/ctags/ctags.c (revision ba72ef4c)
1 static char *sccsid = "@(#)ctags.c	4.2 (Berkeley) 10/3/80";
2 #include <stdio.h>
3 #include <ctype.h>
4 
5 /*
6  * ctags
7  */
8 
9 #define	reg	register
10 #define	logical	char
11 
12 #define	TRUE	(1)
13 #define	FALSE	(0)
14 
15 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
16 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
17 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
18 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
19 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
20 
21 #define	max(I1,I2)	(I1 > I2 ? I1 : I2)
22 
23 struct	nd_st {			/* sorting structure			*/
24 	char	*func;			/* function name		*/
25 	char	*file;			/* file name			*/
26 	int	lno;			/* for -x option		*/
27 	char	*pat;			/* search pattern		*/
28 	logical	been_warned;		/* set if noticed dup		*/
29 	struct	nd_st	*left,*right;	/* left and right sons		*/
30 };
31 
32 long	ftell();
33 typedef	struct	nd_st	NODE;
34 
35 logical	number,				/* T if on line starting with #	*/
36 	term	= FALSE,		/* T if print on terminal	*/
37 	makefile= TRUE,			/* T if to creat "tags" file	*/
38 	gotone,				/* found a func already on line	*/
39 					/* boolean "func" (see init)	*/
40 	_wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177];
41 
42 char	searchar = '?';			/* use ?...? searches 		*/
43 
44 int	lineno;				/* line number of current line */
45 char	line[4*BUFSIZ],		/* current input line			*/
46 	*curfile,		/* current input file name		*/
47 	*outfile= "tags",	/* output file				*/
48 	*white	= " \f\t\n",	/* white chars				*/
49 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
50 				/* token ending chars			*/
51 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
52 				/* token starting chars			*/
53 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789",				/* valid in-token chars			*/
54 	*notgd	= ",;";		/* non-valid after-function chars	*/
55 
56 int	file_num;		/* current file number			*/
57 int	aflag;			/* -a: append to tags */
58 int	uflag;			/* -u: update tags */
59 int	wflag;			/* -w: suppress warnings */
60 int	xflag;			/* -x: create cxref style output */
61 
62 char	lbuf[BUFSIZ];
63 
64 FILE	*inf,			/* ioptr for current input file		*/
65 	*outf;			/* ioptr for tags file			*/
66 
67 long	lineftell;		/* ftell after getc( inf ) == '\n' 	*/
68 
69 NODE	*head;			/* the head of the sorted binary tree	*/
70 
71 char	*savestr();
72 char	*rindex();
73 main(ac,av)
74 int	ac;
75 char	*av[];
76 {
77 	char cmd[100];
78 	int i;
79 
80 	while (ac > 1 && av[1][0] == '-') {
81 		for (i=1; av[1][i]; i++) {
82 			switch(av[1][i]) {
83 				case 'a':
84 					aflag++;
85 					break;
86 				case 'u':
87 					uflag++;
88 					break;
89 				case 'w':
90 					wflag++;
91 					break;
92 				case 'x':
93 					xflag++;
94 					break;
95 				default:
96 					goto usage;
97 			}
98 		}
99 		ac--; av++;
100 	}
101 
102 	if (ac <= 1) {
103 		usage: printf("Usage: ctags [-au] file ...\n");
104 		exit(1);
105 	}
106 
107 	init();			/* set up boolean "functions"		*/
108 	/*
109 	 * loop through files finding functions
110 	 */
111 	for (file_num = 1; file_num < ac; file_num++)
112 		find_funcs(av[file_num]);
113 
114 	if (xflag) {
115 		put_funcs(head);
116 		exit(0);
117 	}
118 	if (uflag) {
119 		for (i=1; i<ac; i++) {
120 			sprintf(cmd,
121 				"mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
122 				outfile, av[i], outfile);
123 			system(cmd);
124 		}
125 		aflag++;
126 	}
127 	outf = fopen(outfile, aflag ? "a" : "w");
128 	if (outf == NULL) {
129 		perror(outfile);
130 		exit(1);
131 	}
132 	put_funcs(head);
133 	fclose(outf);
134 	if (uflag) {
135 		sprintf(cmd, "sort %s -o %s", outfile, outfile);
136 		system(cmd);
137 	}
138 	exit(0);
139 }
140 
141 /*
142  * This routine sets up the boolean psuedo-functions which work
143  * by seting boolean flags dependent upon the corresponding character
144  * Every char which is NOT in that string is not a white char.  Therefore,
145  * all of the array "_wht" is set to FALSE, and then the elements
146  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
147  * of a char is TRUE if it is the string "white", else FALSE.
148  */
149 init()
150 {
151 
152 	reg	char	*sp;
153 	reg	int	i;
154 
155 	for (i = 0; i < 0177; i++) {
156 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
157 		_gd[i] = TRUE;
158 	}
159 	for (sp = white; *sp; sp++)
160 		_wht[*sp] = TRUE;
161 	for (sp = endtk; *sp; sp++)
162 		_etk[*sp] = TRUE;
163 	for (sp = intk; *sp; sp++)
164 		_itk[*sp] = TRUE;
165 	for (sp = begtk; *sp; sp++)
166 		_btk[*sp] = TRUE;
167 	for (sp = notgd; *sp; sp++)
168 		_gd[*sp] = FALSE;
169 }
170 
171 /*
172  * This routine opens the specified file and calls the function
173  * which finds the function definitions.
174  */
175 find_funcs(file)
176 char	*file;
177 {
178 	char *cp;
179 
180 	if ((inf=fopen(file,"r")) == NULL) {
181 		perror(file);
182 		return;
183 	}
184 	curfile = savestr(file);
185 	cp = rindex(file, '.');
186 	if (cp && (cp[1] != 'c' || cp[1] != 'h') && cp[2] == 0) {
187 		if (PF_funcs(inf) == 0) {
188 			rewind(inf);
189 			C_funcs();
190 		}
191 	} else
192 		C_funcs();
193 	fclose(inf);
194 }
195 
196 pfnote(name, ln)
197 	char *name;
198 {
199 	register char *fp;
200 	register NODE *np;
201 	char nbuf[BUFSIZ];
202 
203 	if ((np = (NODE *) malloc(sizeof (NODE))) == NULL) {
204 		fprintf(stderr, "ctags: too many functions to sort\n");
205 		put_funcs(head);
206 		free_tree(head);
207 		head = np = (NODE *) malloc(sizeof (NODE));
208 	}
209 	if (xflag == 0 && !strcmp(name, "main")) {
210 		fp = rindex(curfile, '/');
211 		if (fp == 0)
212 			fp = curfile;
213 		else
214 			fp++;
215 		sprintf(nbuf, "M%s", fp);
216 		fp = rindex(nbuf, '.');
217 		if (fp && fp[2] == 0)
218 			*fp = 0;
219 		name = nbuf;
220 	}
221 	np->func = savestr(name);
222 	np->file = curfile;
223 	np->lno = ln;
224 	np->left = np->right = 0;
225 	if (xflag == 0) {
226 		lbuf[50] = 0;
227 		strcat(lbuf, "$");
228 		lbuf[50] = 0;
229 	}
230 	np->pat = savestr(lbuf);
231 	if (head == NULL)
232 		head = np;
233 	else
234 		add_node(np, head);
235 }
236 
237 /*
238  * This routine finds functions in C syntax and adds them
239  * to the list.
240  */
241 C_funcs()
242 {
243 	register int c;
244 	register char *token, *tp;
245 	int incomm, inquote, inchar, midtoken, level;
246 	char *sp;
247 	char tok[BUFSIZ];
248 
249 	lineno = 1;
250 	number = gotone = midtoken = inquote = inchar = incomm = FALSE;
251 	level = 0;
252 	sp = tp = token = line;
253 	for (;;) {
254 		*sp=c=getc(inf);
255 		if (feof(inf))
256 			break;
257 		if (c == '\n')
258 			lineno++;
259 		if (c == '\\') {
260 			c = *++sp = getc(inf);
261 			if (c = '\n')
262 				c = ' ';
263 		} else if (incomm) {
264 			if (c == '*') {
265 				while ((*++sp=c=getc(inf)) == '*')
266 					continue;
267 				if (c == '\n')
268 					lineno++;
269 				if (c == '/')
270 					incomm = FALSE;
271 			}
272 		} else if (inquote) {
273 			/*
274 			 * Too dumb to know about \" not being magic, but
275 			 * they usually occur in pairs anyway.
276 			 */
277 			if (c == '"')
278 				inquote = FALSE;
279 			continue;
280 		} else if (inchar) {
281 			if (c == '\'')
282 				inchar = FALSE;
283 			continue;
284 		} else switch (c) {
285 		case '"':
286 			inquote = TRUE;
287 			continue;
288 		case '\'':
289 			inchar = TRUE;
290 			continue;
291 		case '/':
292 			if ((*++sp=c=getc(inf)) == '*')
293 				incomm = TRUE;
294 			else
295 				ungetc(*sp, inf);
296 			continue;
297 		case '#':
298 			if (sp == line)
299 				number = TRUE;
300 			continue;
301 		case '{':
302 			level++;
303 			continue;
304 		case '}':
305 			if (sp == line)
306 				level = 0;	/* reset */
307 			else
308 				level--;
309 			continue;
310 		}
311 		if (!level && !inquote && !incomm && gotone == 0) {
312 			if (midtoken) {
313 				if (endtoken(c)) {
314 					int pfline = lineno;
315 					if (start_func(&sp,token,tp)) {
316 						strncpy(tok,token,tp-token+1);
317 						tok[tp-token+1] = 0;
318 						getline();
319 						pfnote(tok, pfline);
320 						gotone = TRUE;
321 					}
322 					midtoken = FALSE;
323 					token = sp;
324 				} else if (intoken(c))
325 					tp++;
326 			} else if (begtoken(c)) {
327 				token = tp = sp;
328 				midtoken = TRUE;
329 			}
330 		}
331 		sp++;
332 		if (c == '\n' || sp > &line[sizeof (line) - BUFSIZ]) {
333 			tp = token = sp = line;
334 			lineftell = ftell(inf);
335 			number = gotone = midtoken = inquote = inchar = FALSE;
336 		}
337 	}
338 }
339 
340 /*
341  *	This routine  checks to see if the current token is
342  * at the start of a function.  It updates the input line
343  * so that the '(' will be in it when it returns.
344  */
345 start_func(lp,token,tp)
346 char	**lp,*token,*tp;
347 {
348 
349 	reg	char	c,*sp,*tsp;
350 	static	logical	found;
351 	logical	firsttok;		/* T if have seen first token in ()'s */
352 	int	bad;
353 
354 	sp = *lp;
355 	c = *sp;
356 	bad = FALSE;
357 	if (!number) {		/* space is not allowed in macro defs	*/
358 		while (iswhite(c)) {
359 			*++sp = c = getc(inf);
360 			if (c == '\n') {
361 				lineno++;
362 				if (sp > &line[sizeof (line) - BUFSIZ])
363 					goto ret;
364 			}
365 		}
366 	/* the following tries to make it so that a #define a b(c)	*/
367 	/* doesn't count as a define of b.				*/
368 	} else {
369 		logical	define;
370 
371 		define = TRUE;
372 		for (tsp = "define"; *tsp && token < tp; tsp++)
373 			if (*tsp != *token++) {
374 				define = FALSE;
375 				break;
376 			}
377 		if (define)
378 			found = 0;
379 		else
380 			found++;
381 		if (found >= 2) {
382 			gotone = TRUE;
383 badone:			bad = TRUE;
384 			goto ret;
385 		}
386 	}
387 	if (c != '(')
388 		goto badone;
389 	firsttok = FALSE;
390 	while ((*++sp=c=getc(inf)) != ')') {
391 		if (c == '\n') {
392 			lineno++;
393 			if (sp > &line[sizeof (line) - BUFSIZ])
394 				goto ret;
395 		}
396 		/*
397 		 * This line used to confuse ctags:
398 		 *	int	(*oldhup)();
399 		 * This fixes it. A nonwhite char before the first
400 		 * token, other than a / (in case of a comment in there)
401 		 * makes this not a declaration.
402 		 */
403 		if (begtoken(c) || c=='/') firsttok++;
404 		else if (!iswhite(c) && !firsttok) goto badone;
405 	}
406 	while (iswhite(*++sp=c=getc(inf)))
407 		if (c == '\n') {
408 			lineno++;
409 			if (sp > &line[sizeof (line) - BUFSIZ])
410 				break;
411 		}
412 ret:
413 	*lp = --sp;
414 	if (c == '\n')
415 		lineno--;
416 	ungetc(c,inf);
417 	return !bad && isgood(c);
418 }
419 
420 getline()
421 {
422 	long saveftell = ftell( inf );
423 	register char *cp;
424 
425 	fseek( inf , lineftell , 0 );
426 	fgets(lbuf, sizeof lbuf, inf);
427 	cp = rindex(lbuf, '\n');
428 	if (cp)
429 		*cp = 0;
430 	fseek(inf, saveftell, 0);
431 }
432 
433 free_tree(node)
434 NODE	*node;
435 {
436 
437 	while (node) {
438 		free_tree(node->right);
439 		cfree(node);
440 		node = node->left;
441 	}
442 }
443 
444 add_node(node, cur_node)
445 	NODE *node,*cur_node;
446 {
447 	register int dif;
448 
449 	dif = strcmp(node->func,cur_node->func);
450 	if (dif == 0) {
451 		if (node->file == cur_node->file) {
452 			if (!wflag) {
453 fprintf(stderr,"Duplicate function in file %s, line %d: %s\n",
454     node->file,lineno,node->func);
455 fprintf(stderr,"Second entry ignored\n");
456 			}
457 			return;
458 		}
459 		if (!cur_node->been_warned)
460 			if (!wflag)
461 fprintf(stderr,"Duplicate function in files %s and %s: %s (Warning only)\n",
462     node->file, cur_node->file, node->func);
463 		cur_node->been_warned = TRUE;
464 		return;
465 	}
466 	if (dif < 0) {
467 		if (cur_node->left != NULL)
468 			add_node(node,cur_node->left);
469 		else
470 			cur_node->left = node;
471 		return;
472 	}
473 	if (cur_node->right != NULL)
474 		add_node(node,cur_node->right);
475 	else
476 		cur_node->right = node;
477 }
478 
479 put_funcs(node)
480 reg NODE	*node;
481 {
482 	reg char	*sp;
483 
484 	if (node == NULL)
485 		return;
486 	put_funcs(node->left);
487 	if (xflag == 0) {
488 		fprintf(outf, "%s\t%s\t%c^", node->func, node->file ,searchar);
489 		for (sp = node->pat; *sp; sp++)
490 			if (*sp == '\\')
491 				fprintf(outf, "\\\\");
492 			else
493 				putc(*sp, outf);
494 		fprintf(outf, "%c\n", searchar);
495 	}
496 	else
497 		fprintf(stdout, "%-16s%4d %-16s %s\n",
498 		    node->func, node->lno, node->file, node->pat);
499 	put_funcs(node->right);
500 }
501 
502 char	*dbp = lbuf;
503 int	pfcnt;
504 
505 PF_funcs(fi)
506 	FILE *fi;
507 {
508 
509 	lineno = 0;
510 	pfcnt = 0;
511 	while (fgets(lbuf, sizeof(lbuf), fi)) {
512 		lineno++;
513 		dbp = lbuf;
514 		if ( *dbp == '%' ) dbp++ ;	/* Ratfor escape to fortran */
515 		while (isspace(*dbp))
516 			dbp++;
517 		if (*dbp == 0)
518 			continue;
519 		switch (*dbp |' ') {
520 
521 		case 'i':
522 			if (tail("integer"))
523 				takeprec();
524 			break;
525 		case 'r':
526 			if (tail("real"))
527 				takeprec();
528 			break;
529 		case 'l':
530 			if (tail("logical"))
531 				takeprec();
532 			break;
533 		case 'c':
534 			if (tail("complex") || tail("character"))
535 				takeprec();
536 			break;
537 		case 'd':
538 			if (tail("double")) {
539 				while (isspace(*dbp))
540 					dbp++;
541 				if (*dbp == 0)
542 					continue;
543 				if (tail("precision"))
544 					break;
545 				continue;
546 			}
547 			break;
548 		}
549 		while (isspace(*dbp))
550 			dbp++;
551 		if (*dbp == 0)
552 			continue;
553 		switch (*dbp|' ') {
554 
555 		case 'f':
556 			if (tail("function"))
557 				getit();
558 			continue;
559 		case 's':
560 			if (tail("subroutine"))
561 				getit();
562 			continue;
563 		case 'p':
564 			if (tail("program")) {
565 				getit();
566 				continue;
567 			}
568 			if (tail("procedure"))
569 				getit();
570 			continue;
571 		}
572 	}
573 	return (pfcnt);
574 }
575 
576 tail(cp)
577 	char *cp;
578 {
579 	register int len = 0;
580 
581 	while (*cp && (*cp&~' ') == ((*(dbp+len))&~' '))
582 		cp++, len++;
583 	if (*cp == 0) {
584 		dbp += len;
585 		return (1);
586 	}
587 	return (0);
588 }
589 
590 takeprec()
591 {
592 
593 	while (isspace(*dbp))
594 		dbp++;
595 	if (*dbp != '*')
596 		return;
597 	dbp++;
598 	while (isspace(*dbp))
599 		dbp++;
600 	if (!isdigit(*dbp)) {
601 		--dbp;		/* force failure */
602 		return;
603 	}
604 	do
605 		dbp++;
606 	while (isdigit(*dbp));
607 }
608 
609 getit()
610 {
611 	register char *cp;
612 	char c;
613 	char nambuf[BUFSIZ];
614 
615 	for (cp = lbuf; *cp; cp++)
616 		;
617 	*--cp = 0;	/* zap newline */
618 	while (isspace(*dbp))
619 		dbp++;
620 	if (*dbp == 0 || !isalpha(*dbp))
621 		return;
622 	for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++)
623 		continue;
624 	c = cp[0];
625 	cp[0] = 0;
626 	strcpy(nambuf, dbp);
627 	cp[0] = c;
628 	pfnote(nambuf, lineno);
629 	pfcnt++;
630 }
631 
632 char *
633 savestr(cp)
634 	char *cp;
635 {
636 	register int len;
637 	register char *dp;
638 
639 	len = strlen(cp);
640 	dp = (char *)malloc(len+1);
641 	strcpy(dp, cp);
642 	return (dp);
643 }
644 
645 /*
646  * Return the ptr in sp at which the character c last
647  * appears; NULL if not found
648  *
649  * Identical to v7 rindex, included for portability.
650  */
651 
652 char *
653 rindex(sp, c)
654 register char *sp, c;
655 {
656 	register char *r;
657 
658 	r = NULL;
659 	do {
660 		if (*sp == c)
661 			r = sp;
662 	} while (*sp++);
663 	return(r);
664 }
665