xref: /freebsd/usr.bin/whereis/whereis.c (revision 315ee00f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright © 2002, Jörg Wunsch
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
30  * since the original 4.3BSD version suffers legal problems that
31  * prevent it from being redistributed, and since the 4.4BSD version
32  * was pretty inferior in functionality.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/sysctl.h>
38 
39 #include <dirent.h>
40 #include <err.h>
41 #include <errno.h>
42 #include <locale.h>
43 #include <regex.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sysexits.h>
48 #include <unistd.h>
49 
50 #include "pathnames.h"
51 
52 #define	NO_BIN_FOUND	1
53 #define	NO_MAN_FOUND	2
54 #define	NO_SRC_FOUND	4
55 
56 typedef const char *ccharp;
57 
58 static int opt_a, opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
59 static ccharp *bindirs, *mandirs, *sourcedirs;
60 static char **query;
61 
62 static const char *sourcepath = PATH_SOURCES;
63 
64 static char	*colonify(ccharp *);
65 static int	 contains(ccharp *, const char *);
66 static void	 decolonify(char *, ccharp **, int *);
67 static void	 defaults(void);
68 static void	 scanopts(int, char **);
69 static void	 usage(void);
70 
71 /*
72  * Throughout this program, a number of strings are dynamically
73  * allocated but never freed.  Their memory is written to when
74  * splitting the strings into string lists which will later be
75  * processed.  Since it's important that those string lists remain
76  * valid even after the functions allocating the memory returned,
77  * those functions cannot free them.  They could be freed only at end
78  * of main(), which is pretty pointless anyway.
79  *
80  * The overall amount of memory to be allocated for processing the
81  * strings is not expected to exceed a few kilobytes.  For that
82  * reason, allocation can usually always be assumed to succeed (within
83  * a virtual memory environment), thus we simply bail out using
84  * abort(3) in case of an allocation failure.
85  */
86 
87 static void
88 usage(void)
89 {
90 	(void)fprintf(stderr,
91 	     "usage: whereis [-abmqsux] [-BMS dir ... -f] program ...\n");
92 	exit(EX_USAGE);
93 }
94 
95 /*
96  * Scan options passed to program.
97  *
98  * Note that the -B/-M/-S options expect a list of directory
99  * names that must be terminated with -f.
100  */
101 static void
102 scanopts(int argc, char **argv)
103 {
104 	int c, i;
105 	ccharp **dirlist;
106 
107 	while ((c = getopt(argc, argv, "BMSabfmqsux")) != -1)
108 		switch (c) {
109 		case 'B':
110 			dirlist = &bindirs;
111 			goto dolist;
112 
113 		case 'M':
114 			dirlist = &mandirs;
115 			goto dolist;
116 
117 		case 'S':
118 			dirlist = &sourcedirs;
119 		  dolist:
120 			i = 0;
121 			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
122 			(*dirlist)[i] = NULL;
123 			while (optind < argc &&
124 			       strcmp(argv[optind], "-f") != 0 &&
125 			       strcmp(argv[optind], "-B") != 0 &&
126 			       strcmp(argv[optind], "-M") != 0 &&
127 			       strcmp(argv[optind], "-S") != 0) {
128 				decolonify(argv[optind], dirlist, &i);
129 				optind++;
130 			}
131 			break;
132 
133 		case 'a':
134 			opt_a = 1;
135 			break;
136 
137 		case 'b':
138 			opt_b = 1;
139 			break;
140 
141 		case 'f':
142 			goto breakout;
143 
144 		case 'm':
145 			opt_m = 1;
146 			break;
147 
148 		case 'q':
149 			opt_q = 1;
150 			break;
151 
152 		case 's':
153 			opt_s = 1;
154 			break;
155 
156 		case 'u':
157 			opt_u = 1;
158 			break;
159 
160 		case 'x':
161 			opt_x = 1;
162 			break;
163 
164 		default:
165 			usage();
166 		}
167   breakout:
168 	if (optind == argc)
169 		usage();
170 	query = argv + optind;
171 }
172 
173 /*
174  * Find out whether string `s' is contained in list `cpp'.
175  */
176 static int
177 contains(ccharp *cpp, const char *s)
178 {
179 	ccharp cp;
180 
181 	if (cpp == NULL)
182 		return (0);
183 
184 	while ((cp = *cpp) != NULL) {
185 		if (strcmp(cp, s) == 0)
186 			return (1);
187 		cpp++;
188 	}
189 	return (0);
190 }
191 
192 /*
193  * Split string `s' at colons, and pass it to the string list pointed
194  * to by `cppp' (which has `*ip' elements).  Note that the original
195  * string is modified by replacing the colon with a NUL byte.  The
196  * partial string is only added if it has a length greater than 0, and
197  * if it's not already contained in the string list.
198  */
199 static void
200 decolonify(char *s, ccharp **cppp, int *ip)
201 {
202 	char *cp;
203 
204 	while ((cp = strchr(s, ':')), *s != '\0') {
205 		if (cp)
206 			*cp = '\0';
207 		if (strlen(s) && !contains(*cppp, s)) {
208 			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
209 			if (*cppp == NULL)
210 				abort();
211 			(*cppp)[*ip] = s;
212 			(*cppp)[*ip + 1] = NULL;
213 			(*ip)++;
214 		}
215 		if (cp)
216 			s = cp + 1;
217 		else
218 			break;
219 	}
220 }
221 
222 /*
223  * Join string list `cpp' into a colon-separated string.
224  */
225 static char *
226 colonify(ccharp *cpp)
227 {
228 	size_t s;
229 	char *cp;
230 	int i;
231 
232 	if (cpp == NULL)
233 		return (0);
234 
235 	for (s = 0, i = 0; cpp[i] != NULL; i++)
236 		s += strlen(cpp[i]) + 1;
237 	if ((cp = malloc(s + 1)) == NULL)
238 		abort();
239 	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
240 		strcat(cp, cpp[i]);
241 		strcat(cp, ":");
242 	}
243 	cp[s - 1] = '\0';		/* eliminate last colon */
244 
245 	return (cp);
246 }
247 
248 /*
249  * Provide defaults for all options and directory lists.
250  */
251 static void
252 defaults(void)
253 {
254 	size_t s;
255 	char *b, buf[BUFSIZ], *cp;
256 	int nele;
257 	FILE *p;
258 	DIR *dir;
259 	struct stat sb;
260 	struct dirent *dirp;
261 	const int oid[2] = {CTL_USER, USER_CS_PATH};
262 
263 	/* default to -bms if none has been specified */
264 	if (!opt_b && !opt_m && !opt_s)
265 		opt_b = opt_m = opt_s = 1;
266 
267 	/* -b defaults to default path + /usr/libexec +
268 	 * user's path */
269 	if (!bindirs) {
270 		if (sysctl(oid, 2, NULL, &s, NULL, 0) == -1)
271 			err(EX_OSERR, "sysctl(\"user.cs_path\")");
272 		if ((b = malloc(s + 1)) == NULL)
273 			abort();
274 		if (sysctl(oid, 2, b, &s, NULL, 0) == -1)
275 			err(EX_OSERR, "sysctl(\"user.cs_path\")");
276 		nele = 0;
277 		decolonify(b, &bindirs, &nele);
278 		bindirs = realloc(bindirs, (nele + 2) * sizeof(char *));
279 		if (bindirs == NULL)
280 			abort();
281 		bindirs[nele++] = PATH_LIBEXEC;
282 		bindirs[nele] = NULL;
283 		if ((cp = getenv("PATH")) != NULL) {
284 			/* don't destroy the original environment... */
285 			b = strdup(cp);
286 			if (b == NULL)
287 				abort();
288 			decolonify(b, &bindirs, &nele);
289 		}
290 	}
291 
292 	/* -m defaults to $(manpath) */
293 	if (!mandirs) {
294 		if ((p = popen(MANPATHCMD, "r")) == NULL)
295 			err(EX_OSERR, "cannot execute manpath command");
296 		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
297 		    pclose(p))
298 			err(EX_OSERR, "error processing manpath results");
299 		if ((b = strchr(buf, '\n')) != NULL)
300 			*b = '\0';
301 		b = strdup(buf);
302 		if (b == NULL)
303 			abort();
304 		nele = 0;
305 		decolonify(b, &mandirs, &nele);
306 	}
307 
308 	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
309 	if (!sourcedirs) {
310 		b = strdup(sourcepath);
311 		if (b == NULL)
312 			abort();
313 		nele = 0;
314 		decolonify(b, &sourcedirs, &nele);
315 
316 		if (stat(PATH_PORTS, &sb) == -1) {
317 			if (errno == ENOENT)
318 				/* no /usr/ports, we are done */
319 				return;
320 			err(EX_OSERR, "stat(" PATH_PORTS ")");
321 		}
322 		if ((sb.st_mode & S_IFMT) != S_IFDIR)
323 			/* /usr/ports is not a directory, ignore */
324 			return;
325 		if (access(PATH_PORTS, R_OK | X_OK) != 0)
326 			return;
327 		if ((dir = opendir(PATH_PORTS)) == NULL)
328 			err(EX_OSERR, "opendir" PATH_PORTS ")");
329 		while ((dirp = readdir(dir)) != NULL) {
330 			/*
331 			 * Not everything below PATH_PORTS is of
332 			 * interest.  First, all dot files and
333 			 * directories (e. g. .snap) can be ignored.
334 			 * Also, all subdirectories starting with a
335 			 * capital letter are not going to be
336 			 * examined, as they are used for internal
337 			 * purposes (Mk, Tools, ...).  This also
338 			 * matches a possible CVS subdirectory.
339 			 * Finally, the distfiles subdirectory is also
340 			 * special, and should not be considered to
341 			 * avoid false matches.
342 			 */
343 			if (dirp->d_name[0] == '.' ||
344 			    /*
345 			     * isupper() not used on purpose: the
346 			     * check is supposed to default to the C
347 			     * locale instead of the current user's
348 			     * locale.
349 			     */
350 			    (dirp->d_name[0] >= 'A' && dirp->d_name[0] <= 'Z') ||
351 			    strcmp(dirp->d_name, "distfiles") == 0)
352 				continue;
353 			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
354 			    == NULL)
355 				abort();
356 			strcpy(b, PATH_PORTS);
357 			strcat(b, "/");
358 			strcat(b, dirp->d_name);
359 			if (stat(b, &sb) == -1 ||
360 			    (sb.st_mode & S_IFMT) != S_IFDIR ||
361 			    access(b, R_OK | X_OK) != 0) {
362 				free(b);
363 				continue;
364 			}
365 			sourcedirs = realloc(sourcedirs,
366 					     (nele + 2) * sizeof(char *));
367 			if (sourcedirs == NULL)
368 				abort();
369 			sourcedirs[nele++] = b;
370 			sourcedirs[nele] = NULL;
371 		}
372 		closedir(dir);
373 	}
374 }
375 
376 int
377 main(int argc, char **argv)
378 {
379 	int unusual, i, printed;
380 	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
381 	ccharp *dp;
382 	size_t nlen, olen, s;
383 	struct stat sb;
384 	regex_t re, re2;
385 	regmatch_t matches[2];
386 	regoff_t rlen;
387 	FILE *p;
388 
389 	setlocale(LC_ALL, "");
390 
391 	scanopts(argc, argv);
392 	defaults();
393 
394 	if (mandirs == NULL)
395 		opt_m = 0;
396 	if (bindirs == NULL)
397 		opt_b = 0;
398 	if (sourcedirs == NULL)
399 		opt_s = 0;
400 	if (opt_m + opt_b + opt_s == 0)
401 		errx(EX_DATAERR, "no directories to search");
402 
403 	if (opt_m) {
404 		setenv("MANPATH", colonify(mandirs), 1);
405 		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
406 			regerror(i, &re, buf, BUFSIZ - 1);
407 			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
408 			     MANWHEREISMATCH, buf);
409 		}
410 	}
411 
412 	for (; (name = *query) != NULL; query++) {
413 		/* strip leading path name component */
414 		if ((cp = strrchr(name, '/')) != NULL)
415 			name = cp + 1;
416 		/* strip SCCS or RCS suffix/prefix */
417 		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
418 			name += 2;
419 		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
420 			name[s - 2] = '\0';
421 		/* compression suffix */
422 		s = strlen(name);
423 		if (s > 2 &&
424 		    (strcmp(name + s - 2, ".z") == 0 ||
425 		     strcmp(name + s - 2, ".Z") == 0))
426 			name[s - 2] = '\0';
427 		else if (s > 3 &&
428 			 strcmp(name + s - 3, ".gz") == 0)
429 			name[s - 3] = '\0';
430 		else if (s > 4 &&
431 			 strcmp(name + s - 4, ".bz2") == 0)
432 			name[s - 4] = '\0';
433 
434 		unusual = 0;
435 		bin = man = src = NULL;
436 		s = strlen(name);
437 
438 		if (opt_b) {
439 			/*
440 			 * Binaries have to match exactly, and must be regular
441 			 * executable files.
442 			 */
443 			unusual = unusual | NO_BIN_FOUND;
444 			for (dp = bindirs; *dp != NULL; dp++) {
445 				cp = malloc(strlen(*dp) + 1 + s + 1);
446 				if (cp == NULL)
447 					abort();
448 				strcpy(cp, *dp);
449 				strcat(cp, "/");
450 				strcat(cp, name);
451 				if (stat(cp, &sb) == 0 &&
452 				    (sb.st_mode & S_IFMT) == S_IFREG &&
453 				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
454 				    != 0) {
455 					unusual = unusual & ~NO_BIN_FOUND;
456 					if (bin == NULL) {
457 						bin = strdup(cp);
458 					} else {
459 						olen = strlen(bin);
460 						nlen = strlen(cp);
461 						bin = realloc(bin,
462 							      olen + nlen + 2);
463 						if (bin == NULL)
464 							abort();
465 						strcat(bin, " ");
466 						strcat(bin, cp);
467 					}
468 					if (!opt_a) {
469 						free(cp);
470 						break;
471 					}
472 				}
473 				free(cp);
474 			}
475 		}
476 
477 		if (opt_m) {
478 			/*
479 			 * Ask the man command to perform the search for us.
480 			 */
481 			unusual = unusual | NO_MAN_FOUND;
482 			if (opt_a)
483 				cp = malloc(sizeof MANWHEREISALLCMD - 2 + s);
484 			else
485 				cp = malloc(sizeof MANWHEREISCMD - 2 + s);
486 
487 			if (cp == NULL)
488 				abort();
489 
490 			if (opt_a)
491 				sprintf(cp, MANWHEREISALLCMD, name);
492 			else
493 				sprintf(cp, MANWHEREISCMD, name);
494 
495 			if ((p = popen(cp, "r")) != NULL) {
496 
497 				while (fgets(buf, BUFSIZ - 1, p) != NULL) {
498 					unusual = unusual & ~NO_MAN_FOUND;
499 
500 					if ((cp2 = strchr(buf, '\n')) != NULL)
501 						*cp2 = '\0';
502 					if (regexec(&re, buf, 2,
503 						    matches, 0) == 0 &&
504 					    (rlen = matches[1].rm_eo -
505 					     matches[1].rm_so) > 0) {
506 						/*
507 						 * man -w found formatted
508 						 * page, need to pick up
509 						 * source page name.
510 						 */
511 						cp2 = malloc(rlen + 1);
512 						if (cp2 == NULL)
513 							abort();
514 						memcpy(cp2,
515 						       buf + matches[1].rm_so,
516 						       rlen);
517 						cp2[rlen] = '\0';
518 					} else {
519 						/*
520 						 * man -w found plain source
521 						 * page, use it.
522 						 */
523 						cp2 = strdup(buf);
524 						if (cp2 == NULL)
525 							abort();
526 					}
527 
528 					if (man == NULL) {
529 						man = strdup(cp2);
530 					} else {
531 						olen = strlen(man);
532 						nlen = strlen(cp2);
533 						man = realloc(man,
534 							      olen + nlen + 2);
535 						if (man == NULL)
536 							abort();
537 						strcat(man, " ");
538 						strcat(man, cp2);
539 					}
540 
541 					free(cp2);
542 
543 					if (!opt_a)
544 						break;
545 				}
546 				pclose(p);
547 				free(cp);
548 			}
549 		}
550 
551 		if (opt_s) {
552 			/*
553 			 * Sources match if a subdir with the exact
554 			 * name is found.
555 			 */
556 			unusual = unusual | NO_SRC_FOUND;
557 			for (dp = sourcedirs; *dp != NULL; dp++) {
558 				cp = malloc(strlen(*dp) + 1 + s + 1);
559 				if (cp == NULL)
560 					abort();
561 				strcpy(cp, *dp);
562 				strcat(cp, "/");
563 				strcat(cp, name);
564 				if (stat(cp, &sb) == 0 &&
565 				    (sb.st_mode & S_IFMT) == S_IFDIR) {
566 					unusual = unusual & ~NO_SRC_FOUND;
567 					if (src == NULL) {
568 						src = strdup(cp);
569 					} else {
570 						olen = strlen(src);
571 						nlen = strlen(cp);
572 						src = realloc(src,
573 							      olen + nlen + 2);
574 						if (src == NULL)
575 							abort();
576 						strcat(src, " ");
577 						strcat(src, cp);
578 					}
579 					if (!opt_a) {
580 						free(cp);
581 						break;
582 					}
583 				}
584 				free(cp);
585 			}
586 			/*
587 			 * If still not found, ask locate to search it
588 			 * for us.  This will find sources for things
589 			 * like lpr that are well hidden in the
590 			 * /usr/src tree, but takes a lot longer.
591 			 * Thus, option -x (`expensive') prevents this
592 			 * search.
593 			 *
594 			 * Do only match locate output that starts
595 			 * with one of our source directories, and at
596 			 * least one further level of subdirectories.
597 			 */
598 			if (opt_x || (src && !opt_a))
599 				goto done_sources;
600 
601 			cp = malloc(sizeof LOCATECMD - 2 + s);
602 			if (cp == NULL)
603 				abort();
604 			sprintf(cp, LOCATECMD, name);
605 			if ((p = popen(cp, "r")) == NULL)
606 				goto done_sources;
607 			while ((src == NULL || opt_a) &&
608 			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
609 				if ((cp2 = strchr(buf, '\n')) != NULL)
610 					*cp2 = '\0';
611 				for (dp = sourcedirs;
612 				     (src == NULL || opt_a) && *dp != NULL;
613 				     dp++) {
614 					cp2 = malloc(strlen(*dp) + 9);
615 					if (cp2 == NULL)
616 						abort();
617 					strcpy(cp2, "^");
618 					strcat(cp2, *dp);
619 					strcat(cp2, "/[^/]+/");
620 					if ((i = regcomp(&re2, cp2,
621 							 REG_EXTENDED|REG_NOSUB))
622 					    != 0) {
623 						regerror(i, &re, buf,
624 							 BUFSIZ - 1);
625 						errx(EX_UNAVAILABLE,
626 						     "regcomp(%s) failed: %s",
627 						     cp2, buf);
628 					}
629 					free(cp2);
630 					if (regexec(&re2, buf, 0,
631 						    (regmatch_t *)NULL, 0)
632 					    == 0) {
633 						unusual = unusual &
634 						          ~NO_SRC_FOUND;
635 						if (src == NULL) {
636 							src = strdup(buf);
637 						} else {
638 							olen = strlen(src);
639 							nlen = strlen(buf);
640 							src = realloc(src,
641 								      olen +
642 								      nlen + 2);
643 							if (src == NULL)
644 								abort();
645 							strcat(src, " ");
646 							strcat(src, buf);
647 						}
648 					}
649 					regfree(&re2);
650 				}
651 			}
652 			pclose(p);
653 			free(cp);
654 		}
655 	  done_sources:
656 
657 		if (opt_u && !unusual)
658 			continue;
659 
660 		printed = 0;
661 		if (!opt_q) {
662 			printf("%s:", name);
663 			printed++;
664 		}
665 		if (bin) {
666 			if (printed++)
667 				putchar(' ');
668 			fputs(bin, stdout);
669 		}
670 		if (man) {
671 			if (printed++)
672 				putchar(' ');
673 			fputs(man, stdout);
674 		}
675 		if (src) {
676 			if (printed++)
677 				putchar(' ');
678 			fputs(src, stdout);
679 		}
680 		if (printed)
681 			putchar('\n');
682 	}
683 
684 	if (opt_m)
685 		regfree(&re);
686 
687 	return (0);
688 }
689