xref: /original-bsd/usr.bin/locate/locate/locate.c (revision cef711d2)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * James A. Woods.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char copyright[] =
13 "@(#) Copyright (c) 1989, 1993\n\
14 	The Regents of the University of California.  All rights reserved.\n";
15 #endif /* not lint */
16 
17 #ifndef lint
18 static char sccsid[] = "@(#)locate.c	8.1 (Berkeley) 06/06/93";
19 #endif /* not lint */
20 
21 /*
22  * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
23  *
24  * Locate scans a file list for the full pathname of a file given only part
25  * of the name.  The list has been processed with with "front-compression"
26  * and bigram coding.  Front compression reduces space by a factor of 4-5,
27  * bigram coding by a further 20-25%.
28  *
29  * The codes are:
30  *
31  * 	0-28	likeliest differential counts + offset to make nonnegative
32  *	30	switch code for out-of-range count to follow in next word
33  *	128-255 bigram codes (128 most common, as determined by 'updatedb')
34  *	32-127  single character (printable) ascii residue (ie, literal)
35  *
36  * A novel two-tiered string search technique is employed:
37  *
38  * First, a metacharacter-free subpattern and partial pathname is matched
39  * BACKWARDS to avoid full expansion of the pathname list.  The time savings
40  * is 40-50% over forward matching, which cannot efficiently handle
41  * overlapped search patterns and compressed path residue.
42  *
43  * Then, the actual shell glob-style regular expression (if in this form) is
44  * matched against the candidate pathnames using the slower routines provided
45  * in the standard 'find'.
46  */
47 
48 #include <sys/param.h>
49 
50 #include <fnmatch.h>
51 #include <unistd.h>
52 #include <stdio.h>
53 #include <string.h>
54 
55 #include "locate.h"
56 #include "pathnames.h"
57 
58 FILE *fp;
59 
60 int
61 main(argc, argv)
62 	int argc;
63 	char *argv[];
64 {
65 	if (argc != 2) {
66 		(void)fprintf(stderr, "usage: locate pattern\n");
67 		exit(1);
68 	}
69 	if (!(fp = fopen(_PATH_FCODES, "r"))) {
70 		(void)fprintf(stderr, "locate: no database file %s.\n",
71 		    _PATH_FCODES);
72 		exit(1);
73 	}
74 	while (*++argv)
75 		fastfind(*argv);
76 	exit(0);
77 }
78 
79 fastfind(pathpart)
80 	char *pathpart;
81 {
82 	register char *p, *s;
83 	register int c;
84 	int count, found, globflag;
85 	char *cutoff, *patend, *q, *patprep();
86 	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
87 
88 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
89 		p[c] = getc(fp), s[c] = getc(fp);
90 
91 	p = pathpart;
92 	globflag = index(p, '*') || index(p, '?') || index(p, '[');
93 	patend = patprep(p);
94 
95 	found = 0;
96 	for (c = getc(fp), count = 0; c != EOF;) {
97 		count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
98 		/* overlay old path */
99 		for (p = path + count; (c = getc(fp)) > SWITCH;)
100 			if (c < PARITY)
101 				*p++ = c;
102 			else {		/* bigrams are parity-marked */
103 				c &= PARITY - 1;
104 				*p++ = bigram1[c], *p++ = bigram2[c];
105 			}
106 		*p-- = NULL;
107 		cutoff = (found ? path : path + count);
108 		for (found = 0, s = p; s >= cutoff; s--)
109 			if (*s == *patend) {	/* fast first char check */
110 				for (p = patend - 1, q = s - 1; *p != NULL;
111 				    p--, q--)
112 					if (*q != *p)
113 						break;
114 				if (*p == NULL) {	/* fast match success */
115 					found = 1;
116 					if (!globflag ||
117 					    !fnmatch(pathpart, path, 0))
118 						(void)printf("%s\n", path);
119 					break;
120 				}
121 			}
122 	}
123 }
124 
125 /*
126  * extract last glob-free subpattern in name for fast pre-match; prepend
127  * '\0' for backwards match; return end of new pattern
128  */
129 static char globfree[100];
130 
131 char *
132 patprep(name)
133 	char *name;
134 {
135 	register char *endmark, *p, *subp;
136 
137 	subp = globfree;
138 	*subp++ = '\0';
139 	p = name + strlen(name) - 1;
140 	/* skip trailing metacharacters (and [] ranges) */
141 	for (; p >= name; p--)
142 		if (index("*?", *p) == 0)
143 			break;
144 	if (p < name)
145 		p = name;
146 	if (*p == ']')
147 		for (p--; p >= name; p--)
148 			if (*p == '[') {
149 				p--;
150 				break;
151 			}
152 	if (p < name)
153 		p = name;
154 	/*
155 	 * if pattern has only metacharacters, check every path (force '/'
156 	 * search)
157 	 */
158 	if ((p == name) && index("?*[]", *p) != 0)
159 		*subp++ = '/';
160 	else {
161 		for (endmark = p; p >= name; p--)
162 			if (index("]*?", *p) != 0)
163 				break;
164 		for (++p;
165 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
166 			*subp++ = *p++;
167 	}
168 	*subp = '\0';
169 	return(--subp);
170 }
171