xref: /freebsd/usr.bin/ctags/ctags.c (revision 315ee00f)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1987, 1993, 1994, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #ifndef lint
33 static const char copyright[] =
34 "@(#) Copyright (c) 1987, 1993, 1994, 1995\n\
35 	The Regents of the University of California.  All rights reserved.\n";
36 #endif
37 
38 #if 0
39 #ifndef lint
40 static char sccsid[] = "@(#)ctags.c	8.4 (Berkeley) 2/7/95";
41 #endif
42 #endif
43 
44 #include <sys/cdefs.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <sys/wait.h>
48 
49 #include <err.h>
50 #include <errno.h>
51 #include <limits.h>
52 #include <locale.h>
53 #include <regex.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58 
59 #include "ctags.h"
60 
61 /*
62  * ctags: create a tags file
63  */
64 
65 NODE	*head;			/* head of the sorted binary tree */
66 
67 				/* boolean "func" (see init()) */
68 bool	_wht[256], _etk[256], _itk[256], _btk[256], _gd[256];
69 
70 FILE	*inf;			/* ioptr for current input file */
71 FILE	*outf;			/* ioptr for tags file */
72 
73 long	lineftell;		/* ftell after getc( inf ) == '\n' */
74 
75 int	lineno;			/* line number of current line */
76 int	dflag;			/* -d: non-macro defines */
77 int	tflag;			/* -t: create tags for typedefs */
78 int	vflag;			/* -v: vgrind style index output */
79 int	wflag;			/* -w: suppress warnings */
80 int	xflag;			/* -x: cxref style output */
81 
82 char	*curfile;		/* current input file name */
83 char	searchar = '/';		/* use /.../ searches by default */
84 char	lbuf[LINE_MAX];
85 
86 void	init(void);
87 void	find_entries(char *);
88 static void usage(void) __dead2;
89 
90 int
91 main(int argc, char **argv)
92 {
93 	static const char	*outfile = "tags";	/* output file */
94 	int	aflag;				/* -a: append to tags */
95 	int	uflag;				/* -u: update tags */
96 	int	exit_val;			/* exit value */
97 	int	step;				/* step through args */
98 	int	ch;				/* getopts char */
99 
100 	setlocale(LC_ALL, "");
101 
102 	aflag = uflag = NO;
103 	tflag = YES;
104 	while ((ch = getopt(argc, argv, "BFTadf:tuwvx")) != -1)
105 		switch(ch) {
106 		case 'B':
107 			searchar = '?';
108 			break;
109 		case 'F':
110 			searchar = '/';
111 			break;
112 		case 'T':
113 			tflag = NO;
114 			break;
115 		case 'a':
116 			aflag++;
117 			break;
118 		case 'd':
119 			dflag++;
120 			break;
121 		case 'f':
122 			outfile = optarg;
123 			break;
124 		case 't':
125 			tflag = YES;
126 			break;
127 		case 'u':
128 			uflag++;
129 			break;
130 		case 'w':
131 			wflag++;
132 			break;
133 		case 'v':
134 			vflag++;
135 		case 'x':
136 			xflag++;
137 			break;
138 		case '?':
139 		default:
140 			usage();
141 		}
142 	argv += optind;
143 	argc -= optind;
144 	if (!argc)
145 		usage();
146 
147 	if (strcmp(outfile, "-") == 0)
148 		outfile = "/dev/stdout";
149 
150 	if (!xflag)
151 		setlocale(LC_COLLATE, "C");
152 
153 	init();
154 
155 	for (exit_val = step = 0; step < argc; ++step)
156 		if (!(inf = fopen(argv[step], "r"))) {
157 			warn("%s", argv[step]);
158 			exit_val = 1;
159 		}
160 		else {
161 			curfile = argv[step];
162 			find_entries(argv[step]);
163 			(void)fclose(inf);
164 		}
165 
166 	if (head) {
167 		if (xflag)
168 			put_entries(head);
169 		else {
170 			if (uflag) {
171 				struct stat sb;
172 				FILE *oldf;
173 				regex_t *regx;
174 
175 				if ((oldf = fopen(outfile, "r")) == NULL) {
176 					if (errno == ENOENT) {
177 						uflag = 0;
178 						goto udone;
179 					}
180 					err(1, "opening %s", outfile);
181 				}
182 				if (fstat(fileno(oldf), &sb) != 0 ||
183 				    !S_ISREG(sb.st_mode)) {
184 					fclose(oldf);
185 					uflag = 0;
186 					goto udone;
187 				}
188 				if (unlink(outfile))
189 					err(1, "unlinking %s", outfile);
190 				if ((outf = fopen(outfile, "w")) == NULL)
191 					err(1, "recreating %s", outfile);
192 				if ((regx = calloc(argc, sizeof(regex_t))) == NULL)
193 					err(1, "RE alloc");
194 				for (step = 0; step < argc; step++) {
195 					(void)strcpy(lbuf, "\t");
196 					(void)strlcat(lbuf, argv[step], LINE_MAX);
197 					(void)strlcat(lbuf, "\t", LINE_MAX);
198 					if (regcomp(regx + step, lbuf,
199 					    REG_NOSPEC))
200 						warn("RE compilation failed");
201 				}
202 nextline:
203 				while (fgets(lbuf, LINE_MAX, oldf)) {
204 					for (step = 0; step < argc; step++)
205 						if (regexec(regx + step,
206 						    lbuf, 0, NULL, 0) == 0)
207 							goto nextline;
208 					fputs(lbuf, outf);
209 				}
210 				for (step = 0; step < argc; step++)
211 					regfree(regx + step);
212 				free(regx);
213 				fclose(oldf);
214 				fclose(outf);
215 				++aflag;
216 			}
217 udone:
218 			if (!(outf = fopen(outfile, aflag ? "a" : "w")))
219 				err(1, "%s", outfile);
220 			put_entries(head);
221 			(void)fclose(outf);
222 			if (uflag) {
223 				pid_t pid;
224 
225 				if ((pid = fork()) == -1)
226 					err(1, "fork failed");
227 				else if (pid == 0) {
228 					execlp("sort", "sort", "-o", outfile,
229 					    outfile, NULL);
230 					err(1, "exec of sort failed");
231 				}
232 				/* Just assume the sort went OK. The old code
233 				   did not do any checks either. */
234 				(void)wait(NULL);
235 			}
236 		}
237 	}
238 	if (ferror(stdout) != 0 || fflush(stdout) != 0)
239 		err(1, "stdout");
240 	exit(exit_val);
241 }
242 
243 static void
244 usage(void)
245 {
246 	(void)fprintf(stderr, "usage: ctags [-BFTaduwvx] [-f tagsfile] file ...\n");
247 	exit(1);
248 }
249 
250 /*
251  * init --
252  *	this routine sets up the boolean pseudo-functions which work by
253  *	setting boolean flags dependent upon the corresponding character.
254  *	Every char which is NOT in that string is false with respect to
255  *	the pseudo-function.  Therefore, all of the array "_wht" is NO
256  *	by default and then the elements subscripted by the chars in
257  *	CWHITE are set to YES.  Thus, "_wht" of a char is YES if it is in
258  *	the string CWHITE, else NO.
259  */
260 void
261 init(void)
262 {
263 	int		i;
264 	const unsigned char	*sp;
265 
266 	for (i = 0; i < 256; i++) {
267 		_wht[i] = _etk[i] = _itk[i] = _btk[i] = NO;
268 		_gd[i] = YES;
269 	}
270 #define	CWHITE	" \f\t\n"
271 	for (sp = CWHITE; *sp; sp++)	/* white space chars */
272 		_wht[*sp] = YES;
273 #define	CTOKEN	" \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?"
274 	for (sp = CTOKEN; *sp; sp++)	/* token ending chars */
275 		_etk[*sp] = YES;
276 #define	CINTOK	"ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789"
277 	for (sp = CINTOK; *sp; sp++)	/* valid in-token chars */
278 		_itk[*sp] = YES;
279 #define	CBEGIN	"ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
280 	for (sp = CBEGIN; *sp; sp++)	/* token starting chars */
281 		_btk[*sp] = YES;
282 #define	CNOTGD	",;"
283 	for (sp = CNOTGD; *sp; sp++)	/* invalid after-function chars */
284 		_gd[*sp] = NO;
285 }
286 
287 /*
288  * find_entries --
289  *	this routine opens the specified file and calls the function
290  *	which searches the file.
291  */
292 void
293 find_entries(char *file)
294 {
295 	char	*cp;
296 
297 	lineno = 0;				/* should be 1 ?? KB */
298 	if ((cp = strrchr(file, '.'))) {
299 		if (cp[1] == 'l' && !cp[2]) {
300 			int	c;
301 
302 			for (;;) {
303 				if (GETC(==, EOF))
304 					return;
305 				if (!iswhite(c)) {
306 					rewind(inf);
307 					break;
308 				}
309 			}
310 #define	LISPCHR	";(["
311 /* lisp */		if (strchr(LISPCHR, c)) {
312 				l_entries();
313 				return;
314 			}
315 /* lex */		else {
316 				/*
317 				 * we search all 3 parts of a lex file
318 				 * for C references.  This may be wrong.
319 				 */
320 				toss_yysec();
321 				(void)strcpy(lbuf, "%%$");
322 				pfnote("yylex", lineno);
323 				rewind(inf);
324 			}
325 		}
326 /* yacc */	else if (cp[1] == 'y' && !cp[2]) {
327 			/*
328 			 * we search only the 3rd part of a yacc file
329 			 * for C references.  This may be wrong.
330 			 */
331 			toss_yysec();
332 			(void)strcpy(lbuf, "%%$");
333 			pfnote("yyparse", lineno);
334 			y_entries();
335 		}
336 /* fortran */	else if ((cp[1] != 'c' && cp[1] != 'h') && !cp[2]) {
337 			if (PF_funcs())
338 				return;
339 			rewind(inf);
340 		}
341 	}
342 /* C */	c_entries();
343 }
344