xref: /original-bsd/usr.bin/locate/locate/locate.c (revision df23cbe6)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * James A. Woods.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 char copyright[] =
13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
14  All rights reserved.\n";
15 #endif /* not lint */
16 
17 #ifndef lint
18 static char sccsid[] = "@(#)locate.c	5.4 (Berkeley) 06/29/92";
19 #endif /* not lint */
20 
21 /*
22  * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
23  *
24  * Locate scans a file list for the full pathname of a file given only part
25  * of the name.  The list has been processed with with "front-compression"
26  * and bigram coding.  Front compression reduces space by a factor of 4-5,
27  * bigram coding by a further 20-25%.
28  *
29  * The codes are:
30  *
31  * 	0-28	likeliest differential counts + offset to make nonnegative
32  *	30	switch code for out-of-range count to follow in next word
33  *	128-255 bigram codes (128 most common, as determined by 'updatedb')
34  *	32-127  single character (printable) ascii residue (ie, literal)
35  *
36  * A novel two-tiered string search technique is employed:
37  *
38  * First, a metacharacter-free subpattern and partial pathname is matched
39  * BACKWARDS to avoid full expansion of the pathname list.  The time savings
40  * is 40-50% over forward matching, which cannot efficiently handle
41  * overlapped search patterns and compressed path residue.
42  *
43  * Then, the actual shell glob-style regular expression (if in this form) is
44  * matched against the candidate pathnames using the slower routines provided
45  * in the standard 'find'.
46  */
47 
48 #include <sys/param.h>
49 
50 #include <fnmatch.h>
51 #include <unistd.h>
52 #include <stdio.h>
53 #include "locate.h"
54 #include "pathnames.h"
55 
56 FILE *fp;
57 
58 main(argc, argv)
59 	int argc;
60 	char **argv;
61 {
62 	if (argc != 2) {
63 		(void)fprintf(stderr, "usage: locate pattern\n");
64 		exit(1);
65 	}
66 	if (!(fp = fopen(_PATH_FCODES, "r"))) {
67 		(void)fprintf(stderr, "locate: no database file %s.\n",
68 		    _PATH_FCODES);
69 		exit(1);
70 	}
71 	while (*++argv)
72 		fastfind(*argv);
73 	exit(0);
74 }
75 
76 fastfind(pathpart)
77 	char *pathpart;
78 {
79 	register char *p, *s;
80 	register int c;
81 	int count, found, globflag;
82 	char *cutoff, *patend, *q, *index(), *patprep();
83 	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
84 
85 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
86 		p[c] = getc(fp), s[c] = getc(fp);
87 
88 	p = pathpart;
89 	globflag = index(p, '*') || index(p, '?') || index(p, '[');
90 	patend = patprep(p);
91 
92 	found = 0;
93 	for (c = getc(fp), count = 0; c != EOF;) {
94 		count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
95 		/* overlay old path */
96 		for (p = path + count; (c = getc(fp)) > SWITCH;)
97 			if (c < PARITY)
98 				*p++ = c;
99 			else {		/* bigrams are parity-marked */
100 				c &= PARITY - 1;
101 				*p++ = bigram1[c], *p++ = bigram2[c];
102 			}
103 		*p-- = NULL;
104 		cutoff = (found ? path : path + count);
105 		for (found = 0, s = p; s >= cutoff; s--)
106 			if (*s == *patend) {	/* fast first char check */
107 				for (p = patend - 1, q = s - 1; *p != NULL;
108 				    p--, q--)
109 					if (*q != *p)
110 						break;
111 				if (*p == NULL) {	/* fast match success */
112 					found = 1;
113 					if (!globflag ||
114 					    !fnmatch(pathpart, path, 0))
115 						(void)printf("%s\n", path);
116 					break;
117 				}
118 			}
119 	}
120 }
121 
122 /*
123  * extract last glob-free subpattern in name for fast pre-match; prepend
124  * '\0' for backwards match; return end of new pattern
125  */
126 static char globfree[100];
127 
128 char *
129 patprep(name)
130 	char *name;
131 {
132 	register char *endmark, *p, *subp;
133 
134 	subp = globfree;
135 	*subp++ = '\0';
136 	p = name + strlen(name) - 1;
137 	/* skip trailing metacharacters (and [] ranges) */
138 	for (; p >= name; p--)
139 		if (index("*?", *p) == 0)
140 			break;
141 	if (p < name)
142 		p = name;
143 	if (*p == ']')
144 		for (p--; p >= name; p--)
145 			if (*p == '[') {
146 				p--;
147 				break;
148 			}
149 	if (p < name)
150 		p = name;
151 	/*
152 	 * if pattern has only metacharacters, check every path (force '/'
153 	 * search)
154 	 */
155 	if ((p == name) && index("?*[]", *p) != 0)
156 		*subp++ = '/';
157 	else {
158 		for (endmark = p; p >= name; p--)
159 			if (index("]*?", *p) != 0)
160 				break;
161 		for (++p;
162 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
163 			*subp++ = *p++;
164 	}
165 	*subp = '\0';
166 	return(--subp);
167 }
168