1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * James A. Woods.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #ifndef lint
12 static char copyright[] =
13 "@(#) Copyright (c) 1989, 1993\n\
14 The Regents of the University of California. All rights reserved.\n";
15 #endif /* not lint */
16
17 #ifndef lint
18 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 06/06/93";
19 #endif /* not lint */
20
21 /*
22 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
23 *
24 * Locate scans a file list for the full pathname of a file given only part
25 * of the name. The list has been processed with with "front-compression"
26 * and bigram coding. Front compression reduces space by a factor of 4-5,
27 * bigram coding by a further 20-25%.
28 *
29 * The codes are:
30 *
31 * 0-28 likeliest differential counts + offset to make nonnegative
32 * 30 switch code for out-of-range count to follow in next word
33 * 128-255 bigram codes (128 most common, as determined by 'updatedb')
34 * 32-127 single character (printable) ascii residue (ie, literal)
35 *
36 * A novel two-tiered string search technique is employed:
37 *
38 * First, a metacharacter-free subpattern and partial pathname is matched
39 * BACKWARDS to avoid full expansion of the pathname list. The time savings
40 * is 40-50% over forward matching, which cannot efficiently handle
41 * overlapped search patterns and compressed path residue.
42 *
43 * Then, the actual shell glob-style regular expression (if in this form) is
44 * matched against the candidate pathnames using the slower routines provided
45 * in the standard 'find'.
46 */
47
48 #include <sys/param.h>
49
50 #include <fnmatch.h>
51 #include <unistd.h>
52 #include <stdio.h>
53 #include <string.h>
54
55 #include "locate.h"
56 #include "pathnames.h"
57
58 FILE *fp;
59
60 int
main(argc,argv)61 main(argc, argv)
62 int argc;
63 char *argv[];
64 {
65 if (argc != 2) {
66 (void)fprintf(stderr, "usage: locate pattern\n");
67 exit(1);
68 }
69 if (!(fp = fopen(_PATH_FCODES, "r"))) {
70 (void)fprintf(stderr, "locate: no database file %s.\n",
71 _PATH_FCODES);
72 exit(1);
73 }
74 while (*++argv)
75 fastfind(*argv);
76 exit(0);
77 }
78
fastfind(pathpart)79 fastfind(pathpart)
80 char *pathpart;
81 {
82 register char *p, *s;
83 register int c;
84 int count, found, globflag;
85 char *cutoff, *patend, *q, *patprep();
86 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
87
88 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
89 p[c] = getc(fp), s[c] = getc(fp);
90
91 p = pathpart;
92 globflag = index(p, '*') || index(p, '?') || index(p, '[');
93 patend = patprep(p);
94
95 found = 0;
96 for (c = getc(fp), count = 0; c != EOF;) {
97 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
98 /* overlay old path */
99 for (p = path + count; (c = getc(fp)) > SWITCH;)
100 if (c < PARITY)
101 *p++ = c;
102 else { /* bigrams are parity-marked */
103 c &= PARITY - 1;
104 *p++ = bigram1[c], *p++ = bigram2[c];
105 }
106 *p-- = NULL;
107 cutoff = (found ? path : path + count);
108 for (found = 0, s = p; s >= cutoff; s--)
109 if (*s == *patend) { /* fast first char check */
110 for (p = patend - 1, q = s - 1; *p != NULL;
111 p--, q--)
112 if (*q != *p)
113 break;
114 if (*p == NULL) { /* fast match success */
115 found = 1;
116 if (!globflag ||
117 !fnmatch(pathpart, path, 0))
118 (void)printf("%s\n", path);
119 break;
120 }
121 }
122 }
123 }
124
125 /*
126 * extract last glob-free subpattern in name for fast pre-match; prepend
127 * '\0' for backwards match; return end of new pattern
128 */
129 static char globfree[100];
130
131 char *
patprep(name)132 patprep(name)
133 char *name;
134 {
135 register char *endmark, *p, *subp;
136
137 subp = globfree;
138 *subp++ = '\0';
139 p = name + strlen(name) - 1;
140 /* skip trailing metacharacters (and [] ranges) */
141 for (; p >= name; p--)
142 if (index("*?", *p) == 0)
143 break;
144 if (p < name)
145 p = name;
146 if (*p == ']')
147 for (p--; p >= name; p--)
148 if (*p == '[') {
149 p--;
150 break;
151 }
152 if (p < name)
153 p = name;
154 /*
155 * if pattern has only metacharacters, check every path (force '/'
156 * search)
157 */
158 if ((p == name) && index("?*[]", *p) != 0)
159 *subp++ = '/';
160 else {
161 for (endmark = p; p >= name; p--)
162 if (index("]*?", *p) != 0)
163 break;
164 for (++p;
165 (p <= endmark) && subp < (globfree + sizeof(globfree));)
166 *subp++ = *p++;
167 }
168 *subp = '\0';
169 return(--subp);
170 }
171