xref: /original-bsd/usr.bin/ctags/ctags.c (revision 96c0f48c)
1 #include	<stdio.h>
2 
3 /*
4  *	This program examines each of its arguments for C function
5  * definitions, and puts them in a file "tags" for use by the editor
6  * (and anyone else who wants to).
7  */
8 
9 /*
10  *	program history:
11  *	ken arnold wrote this program.  ask him.
12  *	brought over to the vax by peter b. kessler 7/79
13  *	who disavows any knowledge of its actions,
14  *	except for the stuff related to the construction
15  *	of the search patterns.
16  *	Some additional enhancements made by Mark Horton, involving
17  *	the options and special treatment of "main", "}" at beginning
18  *	of line, and a few bug fixes.
19  */
20 
21 #define	reg	register
22 #define	logical	char
23 
24 #define	TRUE	(1)
25 #define	FALSE	(0)
26 
27 #define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
28 #define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
29 #define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
30 #define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
31 #define	isgood(arg)	(_gd[arg])	/* T if char can be after ')'	*/
32 
33 #define	max(I1,I2)	(I1 > I2 ? I1 : I2)
34 
35 struct	nd_st {			/* sorting structure			*/
36 	char	*func;			/* function name		*/
37 	char	*file;			/* file name			*/
38 	char	*pat;			/* search pattern		*/
39 	logical	been_warned;		/* set if noticed dup		*/
40 	struct	nd_st	*left,*right;	/* left and right sons		*/
41 };
42 
43 long	ftell();
44 #ifdef DEBUG
45 char	*unctrl();
46 #endif
47 typedef	struct	nd_st	NODE;
48 
49 logical	number,				/* T if on line starting with #	*/
50 	term	= FALSE,		/* T if print on terminal	*/
51 	makefile= TRUE,			/* T if to creat "tags" file	*/
52 	gotone,				/* found a func already on line	*/
53 					/* boolean "func" (see init)	*/
54 	_wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177];
55 
56 char	searchar = '?';			/* use ?...? searches 		*/
57 #define	MAXPATTERN	50		/* according to bill		*/
58 
59 int	lineno;				/* line number of current line */
60 char	line[256],		/* current input line			*/
61 	*curfile,		/* current input file name		*/
62 	*outfile= "tags",	/* output file				*/
63 	*white	= " \f\t\n",	/* white chars				*/
64 	*endtk	= " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?",
65 				/* token ending chars			*/
66 	*begtk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
67 				/* token starting chars			*/
68 	*intk	= "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789",				/* valid in-token chars			*/
69 	*notgd	= ",;";		/* non-valid after-function chars	*/
70 
71 int	file_num;		/* current file number			*/
72 int	aflag;			/* -a: append to tags */
73 int	uflag;			/* -u: update tags */
74 int	wflag;			/* -w: suppress warnings */
75 
76 FILE	*inf,			/* ioptr for current input file		*/
77 	*outf;			/* ioptr for tags file			*/
78 
79 long	lineftell;		/* ftell after getc( inf ) == '\n' 	*/
80 
81 NODE	*head;			/* the head of the sorted binary tree	*/
82 
83 main(ac,av)
84 int	ac;
85 char	*av[];
86 {
87 	char cmd[100];
88 	int i;
89 
90 	while (ac > 1 && av[1][0] == '-') {
91 		for (i=1; av[1][i]; i++) {
92 			switch(av[1][i]) {
93 				case 'a':
94 					aflag++;
95 					break;
96 				case 'u':
97 					uflag++;
98 					break;
99 				case 'w':
100 					wflag++;
101 					break;
102 
103 				default:
104 					goto usage;
105 			}
106 		}
107 		ac--; av++;
108 	}
109 
110 	if (ac <= 1) {
111 		usage: printf("Usage: ctags [-au] file ...\n");
112 		exit(1);
113 	}
114 
115 	init();			/* set up boolean "functions"		*/
116 	/*
117 	 * loop through files finding functions
118 	 */
119 	for (file_num = 1; file_num < ac; file_num++)
120 		find_funcs(av[file_num]);
121 
122 	if (uflag) {
123 		for (i=1; i<ac; i++) {
124 			sprintf(cmd, "mv %s OTAGS ; fgrep -v '\t%s\t' OTAGS > %s ; rm OTAGS", outfile, av[i], outfile);
125 			system(cmd);
126 		}
127 		aflag++;
128 	}
129 
130 	if ((outf = fopen(outfile, aflag ? "a" : "w")) == NULL) {
131 		perror(outfile);
132 		exit(1);
133 	}
134 	put_funcs(head);	/* put the data in "tags"		*/
135 	exit(0);
136 }
137 
138 /*
139  *	This routine sets up the boolean psuedo-functions which work
140  * by seting boolean flags dependent upon the corresponding character
141 
142  * Every char which is NOT in that string is not a white char.  Therefore,
143  * all of the array "_wht" is set to FALSE, and then the elements
144  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
145  * of a char is TRUE if it is the string "white", else FALSE.
146  * It also open up the "tags" output file.
147  */
148 init()
149 {
150 
151 	reg	char	*sp;
152 	reg	int	i;
153 
154 	for (i = 0; i < 0177; i++) {
155 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
156 		_gd[i] = TRUE;
157 	}
158 	for (sp = white; *sp; sp++)
159 		_wht[*sp] = TRUE;
160 	for (sp = endtk; *sp; sp++)
161 		_etk[*sp] = TRUE;
162 	for (sp = intk; *sp; sp++)
163 		_itk[*sp] = TRUE;
164 	for (sp = begtk; *sp; sp++)
165 		_btk[*sp] = TRUE;
166 	for (sp = notgd; *sp; sp++)
167 		_gd[*sp] = FALSE;
168 }
169 
170 /*
171  *	This program opens the specified file and calls the function
172  * which finds the function defenitions.
173  */
174 find_funcs(file)
175 char	*file;
176 {
177 
178 	if ((inf=fopen(file,"r")) == NULL) {
179 		perror(file);
180 		return;
181 	}
182 
183 	curfile = (char *) calloc(strlen(file)+1,1);
184 	strcpy(curfile, file);
185 	lineno = 1;
186 	C_funcs();		/* find the C-style functions		*/
187 	fclose(inf);
188 }
189 
190 /*
191  *	This routine finds functions in C syntax and adds them
192  * to the list.
193  */
194 C_funcs()
195 {
196 
197 	reg	char	c,		/* current input char		*/
198 			*token,		/* start of current token	*/
199 			*tp;		/* end of current token		*/
200 	logical	incom,			/* T if inside a comment	*/
201 		inquote,		/* T if inside a quoted string	*/
202 		inchar,			/* T if inside a single char '	*/
203 		midtoken;		/* T if in middle of token	*/
204 	char	*sp;			/* current input char		*/
205 	char	tok[100];
206 	long	insub;			/* level of "{}"s deep		*/
207 
208 	/*
209 	 * init boolean flags, counters, and pointers
210 	 */
211 
212 	number = gotone = midtoken = inquote = inchar = incom = FALSE;
213 	insub = 0L;
214 	sp = tp = token = line;
215 #ifdef DEBUG
216 	printf("           t  s c m q c g n\n");
217 	printf("     s  t  k  u o i u h o u\n");
218 	printf(" c   p  p  n  b m d o r t m\n");
219 #endif
220 	while ((*sp=c=getc(inf)) != EOF) {
221 #ifdef DEBUG
222 		printf("%2.2s: ",unctrl(c));
223 		printf("%2.2s ",unctrl(*sp));
224 		printf("%2.2s ",unctrl(*tp));
225 		printf("%2.2s ",unctrl(*token));
226 		printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number);
227 #endif
228 		/*
229 		 * action based on mixture of character type, *sp,
230 		 * and logical flags
231 		 */
232 
233 		if (c == '\\') {
234 			c = *++sp = getc(inf);
235 			/*
236 			 * Handling of backslash is very naive.
237 			 * We do, however, turn escaped newlines
238 			 * into spaces.
239 			 */
240 			if (c = '\n')
241 				c = ' ';
242 		}
243 		else if (incom) {
244 			if (c == '*') {
245 				while ((*++sp=c=getc(inf)) == '*') {
246 #ifdef DEBUG
247 					printf("%2.2s- ",unctrl(c));
248 					printf("%2.2s ",unctrl(*sp));
249 					printf("%2.2s ",unctrl(*tp));
250 					printf("%2.2s ",unctrl(*token));
251 					printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number);
252 #endif
253 					continue;
254 				}
255 #ifdef DEBUG
256 				printf("%2.2s- ",unctrl(c));
257 				printf("%2.2s ",unctrl(*sp));
258 				printf("%2.2s ",unctrl(*tp));
259 				printf("%2.2s ",unctrl(*token));
260 				printf("%2ld %d %d %d %d %d %d\n",insub,incom,midtoken,inquote,inchar,gotone,number);
261 #endif
262 				if (c == '/')
263 					incom = FALSE;
264 			}
265 		}
266 		else if (inquote) {
267 			/*
268 			 * Too dumb to know about \" not being magic, but
269 			 * they usually occur in pairs anyway.
270 			 */
271 			if ( c == '"' )
272 				inquote = FALSE;
273 			continue;
274 		     }
275 		else if (inchar) {
276 			if ( c == '\'' )
277 				inchar = FALSE;
278 			continue;
279 		     }
280 		else if (c == '"')
281 			inquote = TRUE;
282 		else if (c == '\'')
283 			inchar = TRUE;
284 		else if (c == '/')
285 			if ((*++sp=c=getc(inf)) == '*')
286 				incom = TRUE;
287 			else
288 				ungetc(*sp,inf);
289 		else if (c == '#' && sp == line)
290 			number = TRUE;
291 		else if (c == '{')
292 			insub++;
293 		else if (c == '}')
294 			if (sp == line)
295 				/*
296 				 * Kludge to get back in sync after getting confused.
297 				 * We really shouldn't be looking at indenting style,
298 				 * but tricking with the preprocessor can get us off,
299 				 * and most people indent this way anyway.
300 				 * This resets level of indenting to zero if '}' is
301 				 * found at beginning of line.
302 				 */
303 				insub = 0;
304 			else
305 				insub--;
306 		else if (!insub && !inquote && !inchar && !gotone) {
307 			if (midtoken) {
308 				if (endtoken(c)) {
309 					if (start_func(&sp,token,tp)) {
310 						strncpy(tok,token,tp-token+1);
311 						tok[tp-token+1] = 0;
312 						add_func(tok);
313 						gotone = TRUE;
314 					}
315 					midtoken = FALSE;
316 					token = sp;
317 				}
318 				else if (intoken(c))
319 					tp++;
320 			}
321 			else if (begtoken(c)) {
322 				token = tp = sp;
323 				midtoken = TRUE;
324 			}
325 		}
326 
327 		/*
328 		 * move on to next char, and set flags accordingly
329 		 */
330 
331 		sp++;
332 		if (c == '\n') {
333 			tp = token = sp = line;
334 			lineftell = ftell( inf );
335 #ifdef DEBUG
336 			printf("lineftell saved as %ld\n",lineftell);
337 #endif
338 			number = gotone = midtoken = inquote = inchar = FALSE;
339 			lineno++;
340 		}
341 	}
342 }
343 
344 /*
345  *	This routine  checks to see if the current token is
346  * at the start of a function.  It updates the input line
347  * so that the '(' will be in it when it returns.
348  */
349 start_func(lp,token,tp)
350 char	**lp,*token,*tp;
351 {
352 
353 	reg	char	c,*sp,*tsp;
354 	static	logical	found;
355 	logical	firsttok;		/* T if have seen first token in ()'s */
356 	int	bad;
357 
358 	sp = *lp;
359 	c = *sp;
360 	bad = FALSE;
361 	if (!number)		/* space is not allowed in macro defs	*/
362 		while (iswhite(c)) {
363 			*++sp = c = getc(inf);
364 #ifdef DEBUG
365 			printf("%2.2s:\n",unctrl(c));
366 #endif
367 		}
368 	/* the following tries to make it so that a #define a b(c)	*/
369 	/* doesn't count as a define of b.				*/
370 	else {
371 		logical	define;
372 
373 		define = TRUE;
374 		for (tsp = "define"; *tsp && token < tp; tsp++)
375 			if (*tsp != *token++) {
376 				define = FALSE;
377 				break;
378 			}
379 		if (define)
380 			found = 0;
381 		else
382 			found++;
383 		if (found >= 2) {
384 			gotone = TRUE;
385 badone:			bad = TRUE;
386 			goto ret;
387 		}
388 	}
389 	if (c != '(')
390 		goto badone;
391 	firsttok = FALSE;
392 	while ((*++sp=c=getc(inf)) != ')') {
393 		/*
394 		 * This line used to confuse ctags:
395 		 *	int	(*oldhup)();
396 		 * This fixes it. A nonwhite char before the first
397 		 * token, other than a / (in case of a comment in there)
398 		 * makes this not a declaration.
399 		 */
400 		if (begtoken(c) || c=='/') firsttok++;
401 		else if (!iswhite(c) && !firsttok) goto badone;
402 #ifdef DEBUG
403 		printf("%2.2s:\n",unctrl(c));
404 #endif
405 	}
406 #ifdef DEBUG
407 	printf("%2.2s:\n",unctrl(c));
408 #endif
409 	while (iswhite(*++sp=c=getc(inf)))
410 #ifdef DEBUG
411 		printf("%2.2s:\n",unctrl(c))
412 #endif
413 		;
414 #ifdef DEBUG
415 	printf("%2.2s:\n",unctrl(c));
416 #endif
417 ret:
418 	*lp = --sp;
419 	ungetc(c,inf);
420 	return !bad && isgood(c);
421 }
422 
423 /*
424  *	This routine adds a function to the list
425  */
426 add_func(token)
427 char *token;
428 {
429 	reg	char	*fp,*pp;
430 	reg	NODE	*np;
431 
432 	if ((np = (NODE *) calloc(1,sizeof (NODE))) == NULL) {
433 		printf("too many functions to sort\n");
434 		put_funcs(head);
435 		free_tree(head);
436 		head = np = (NODE *) calloc(1,sizeof (NODE));
437 	}
438 	if (strcmp(token,"main") == 0) {
439 		/*
440 		 * Since there are so many directories with lots of
441 		 * misc. complete programs in them, main tends to get
442 		 * redefined a lot. So we change all mains to instead
443 		 * refer to the name of the file, without leading
444 		 * pathname components and without a trailing .c.
445 		 */
446 		fp = curfile;
447 		for (pp=curfile; *pp; pp++)
448 			if (*pp == '/')
449 				fp = pp+1;
450 		*token = 'M';
451 		strcpy(token+1, fp);
452 		pp = &token[strlen(token)-2];
453 		if (*pp == '.')
454 			*pp = 0;
455 	}
456 	fp = np->func = (char *) calloc(strlen(token)+1,sizeof (char));
457 	np->file = curfile;
458 	strcpy(fp, token);
459 	{	/*
460 		 * this change to make the whole line the pattern
461 		 */
462 	    long	saveftell = ftell( inf );
463 	    int		patlen;
464 	    char	ch;
465 
466 	    patlen = 0;
467 	    fseek( inf , lineftell , 0 );
468 #ifdef DEBUG
469 	    printf("saveftell=%ld, lseek back to %ld\n",saveftell,lineftell);
470 #endif
471 	    ch = getc( inf );
472 	    while ( ch != '\n' && ch != searchar && patlen < MAXPATTERN ) {
473 		patlen ++;
474 		ch = getc( inf );
475 	    }
476 	    pp = np -> pat = (char *) calloc( patlen + 2 , sizeof( char ) );
477 	    fseek( inf , lineftell , 0 );
478 	    ch = getc( inf );
479 	    while ( patlen -- ) {
480 		*pp ++ = ch;
481 		ch = getc( inf );
482 	    }
483 	    if ( ch == '\n' )
484 		*pp ++ = '$';
485 	    *pp = '\0';
486 	    fseek( inf , saveftell , 0 );
487 #ifdef DEBUG
488 	    printf("seek back to %ld, ftell is now %ld\n",saveftell,ftell(inf));
489 #endif
490 	}
491 #ifdef DEBUG
492 	printf("\"%s\"\t\"%s\"\t\"%s\"\n",np->func,np->file,np->pat);
493 #endif
494 	if (head == NULL)
495 		head = np;
496 	else
497 		add_node(np,head);
498 }
499 
500 /*
501  *	This routine cfrees the entire tree from the node down.
502  */
503 free_tree(node)
504 NODE	*node;
505 {
506 
507 	while (node) {
508 		free_tree(node->right);
509 		cfree(node);
510 		node = node->left;
511 	}
512 }
513 
514 /*
515  *	This routine finds the node where the new function node
516  * should be added.
517  */
518 add_node(node,cur_node)
519 NODE	*node,*cur_node;
520 {
521 
522 	reg	int	dif;
523 
524 	dif = strcmp(node->func,cur_node->func);
525 #ifdef DEBUG
526 	printf("strcmp(\"%s\",\"%s\") == %d\n",node->func,cur_node->func,dif);
527 #endif
528 	if (dif == 0) {
529 		if (node->file == cur_node->file) {
530 			if (!wflag) {
531 				fprintf(stderr,"Duplicate function in file \"%s\", line %d: %s\n",node->file,lineno,node->func);
532 				fprintf(stderr,"Second entry ignored\n");
533 			}
534 			return;
535 		}
536 		else {
537 			if (!cur_node->been_warned)
538 				if (!wflag)
539 					fprintf(stderr,"Duplicate function name in files %s and %s: %s (Warning only)\n",
540 						node->file, cur_node->file, node->func);
541 			cur_node->been_warned = TRUE;
542 		}
543 	}
544 	if (dif < 0)
545 		if (cur_node->left != NULL)
546 			add_node(node,cur_node->left);
547 		else {
548 #ifdef DEBUG
549 			printf("adding to left branch\n");
550 #endif
551 			cur_node->left = node;
552 		}
553 	else
554 		if (cur_node->right != NULL)
555 			add_node(node,cur_node->right);
556 		else {
557 #ifdef DEBUG
558 			printf("adding to right branch\n");
559 #endif
560 			cur_node->right = node;
561 		}
562 }
563 
564 /*
565  *	This routine puts the functions in the file.
566  */
567 put_funcs(node)
568 NODE	*node;
569 {
570 
571 	if (node == NULL)
572 		return;
573 	put_funcs(node->left);
574 	fprintf(outf,"%s\t%s\t%c^%s%c\n",node->func,node->file
575 	       ,searchar,node->pat,searchar);
576 	put_funcs(node->right);
577 }
578 
579 #ifdef DEBUG
580 char *
581 unctrl(c)
582 char c;
583 {
584 	static char buf[3];
585 	if (c>=' ' && c<='~') {
586 		buf[0] = c;
587 		buf[1] = 0;
588 	} else if (c > '~') {
589 		buf[0] = '^';
590 		buf[1] = '?';
591 		buf[2] = 0;
592 	} else if (c < 0) {
593 		buf[0] = buf[1] = '?';
594 		buf[2] = 0;
595 	} else {
596 		buf[0] = '\\';
597 		buf[2] = 0;
598 		switch(c) {
599 		case '\b':
600 			buf[1] = 'b';
601 			break;
602 		case '\t':
603 			buf[1] = 't';
604 			break;
605 		case '\n':
606 			buf[1] = 'n';
607 			break;
608 		default:
609 			buf[0] = '^';
610 			buf[1] = c + 64;
611 		}
612 	}
613 	return(buf);
614 }
615 #endif
616