xref: /original-bsd/usr.bin/locate/locate/locate.c (revision 72b6fd44)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * James A. Woods.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 char copyright[] =
13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
14  All rights reserved.\n";
15 #endif /* not lint */
16 
17 #ifndef lint
18 static char sccsid[] = "@(#)locate.c	5.2 (Berkeley) 06/01/90";
19 #endif /* not lint */
20 
21 /*
22  * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
23  *
24  * Locate scans a file list for the full pathname of a file given only part
25  * of the name.  The list has been processed with with "front-compression"
26  * and bigram coding.  Front compression reduces space by a factor of 4-5,
27  * bigram coding by a further 20-25%.
28  *
29  * The codes are:
30  *
31  * 	0-28	likeliest differential counts + offset to make nonnegative
32  *	30	switch code for out-of-range count to follow in next word
33  *	128-255 bigram codes (128 most common, as determined by 'updatedb')
34  *	32-127  single character (printable) ascii residue (ie, literal)
35  *
36  * A novel two-tiered string search technique is employed:
37  *
38  * First, a metacharacter-free subpattern and partial pathname is matched
39  * BACKWARDS to avoid full expansion of the pathname list.  The time savings
40  * is 40-50% over forward matching, which cannot efficiently handle
41  * overlapped search patterns and compressed path residue.
42  *
43  * Then, the actual shell glob-style regular expression (if in this form) is
44  * matched against the candidate pathnames using the slower routines provided
45  * in the standard 'find'.
46  */
47 
48 #include <sys/param.h>
49 #include <unistd.h>
50 #include <stdio.h>
51 #include "locate.h"
52 #include "pathnames.h"
53 
54 FILE *fp;
55 
56 main(argc, argv)
57 	int argc;
58 	char **argv;
59 {
60 	if (argc != 2) {
61 		(void)fprintf(stderr, "usage: locate pattern\n");
62 		exit(1);
63 	}
64 	if (!(fp = fopen(_PATH_FCODES, "r"))) {
65 		(void)fprintf(stderr, "locate: no database file %s.\n",
66 		    _PATH_FCODES);
67 		exit(1);
68 	}
69 	while (*++argv)
70 		fastfind(*argv);
71 	exit(0);
72 }
73 
74 fastfind(pathpart)
75 	char *pathpart;
76 {
77 	register char *p, *s;
78 	register int c;
79 	int count, found, globflag;
80 	char *cutoff, *patend, *q, *index(), *patprep();
81 	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
82 
83 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
84 		p[c] = getc(fp), s[c] = getc(fp);
85 
86 	p = pathpart;
87 	globflag = index(p, '*') || index(p, '?') || index(p, '[');
88 	patend = patprep(p);
89 
90 	found = 0;
91 	for (c = getc(fp), count = 0; c != EOF;) {
92 		count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
93 		/* overlay old path */
94 		for (p = path + count; (c = getc(fp)) > SWITCH;)
95 			if (c < PARITY)
96 				*p++ = c;
97 			else {		/* bigrams are parity-marked */
98 				c &= PARITY - 1;
99 				*p++ = bigram1[c], *p++ = bigram2[c];
100 			}
101 		*p-- = NULL;
102 		cutoff = (found ? path : path + count);
103 		for (found = 0, s = p; s >= cutoff; s--)
104 			if (*s == *patend) {	/* fast first char check */
105 				for (p = patend - 1, q = s - 1; *p != NULL;
106 				    p--, q--)
107 					if (*q != *p)
108 						break;
109 				if (*p == NULL) {	/* fast match success */
110 					found = 1;
111 					if (!globflag ||
112 					    fnmatch(pathpart, path, FNM_QUOTE))
113 						(void)printf("%s\n", path);
114 					break;
115 				}
116 			}
117 	}
118 }
119 
120 /*
121  * extract last glob-free subpattern in name for fast pre-match; prepend
122  * '\0' for backwards match; return end of new pattern
123  */
124 static char globfree[100];
125 
126 char *
127 patprep(name)
128 	char *name;
129 {
130 	register char *endmark, *p, *subp;
131 
132 	subp = globfree;
133 	*subp++ = '\0';
134 	p = name + strlen(name) - 1;
135 	/* skip trailing metacharacters (and [] ranges) */
136 	for (; p >= name; p--)
137 		if (index("*?", *p) == 0)
138 			break;
139 	if (p < name)
140 		p = name;
141 	if (*p == ']')
142 		for (p--; p >= name; p--)
143 			if (*p == '[') {
144 				p--;
145 				break;
146 			}
147 	if (p < name)
148 		p = name;
149 	/*
150 	 * if pattern has only metacharacters, check every path (force '/'
151 	 * search)
152 	 */
153 	if ((p == name) && index("?*[]", *p) != 0)
154 		*subp++ = '/';
155 	else {
156 		for (endmark = p; p >= name; p--)
157 			if (index("]*?", *p) != 0)
158 				break;
159 		for (++p;
160 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
161 			*subp++ = *p++;
162 	}
163 	*subp = '\0';
164 	return(--subp);
165 }
166