xref: /original-bsd/usr.bin/locate/locate/locate.c (revision 188f7363)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * James A. Woods.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  */
20 
21 #ifndef lint
22 char copyright[] =
23 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
24  All rights reserved.\n";
25 #endif /* not lint */
26 
27 #ifndef lint
28 static char sccsid[] = "@(#)locate.c	5.1 (Berkeley) 03/06/90";
29 #endif /* not lint */
30 
31 /*
32  * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
33  *
34  * Locate scans a file list for the full pathname of a file given only part
35  * of the name.  The list has been processed with with "front-compression"
36  * and bigram coding.  Front compression reduces space by a factor of 4-5,
37  * bigram coding by a further 20-25%.
38  *
39  * The codes are:
40  *
41  * 	0-28	likeliest differential counts + offset to make nonnegative
42  *	30	switch code for out-of-range count to follow in next word
43  *	128-255 bigram codes (128 most common, as determined by 'updatedb')
44  *	32-127  single character (printable) ascii residue (ie, literal)
45  *
46  * A novel two-tiered string search technique is employed:
47  *
48  * First, a metacharacter-free subpattern and partial pathname is matched
49  * BACKWARDS to avoid full expansion of the pathname list.  The time savings
50  * is 40-50% over forward matching, which cannot efficiently handle
51  * overlapped search patterns and compressed path residue.
52  *
53  * Then, the actual shell glob-style regular expression (if in this form) is
54  * matched against the candidate pathnames using the slower routines provided
55  * in the standard 'find'.
56  */
57 
58 #include <sys/param.h>
59 #include <unistd.h>
60 #include <stdio.h>
61 #include "locate.h"
62 #include "pathnames.h"
63 
64 FILE *fp;
65 
66 main(argc, argv)
67 	int argc;
68 	char **argv;
69 {
70 	if (argc != 2) {
71 		(void)fprintf(stderr, "usage: locate pattern\n");
72 		exit(1);
73 	}
74 	if (!(fp = fopen(_PATH_FCODES, "r"))) {
75 		(void)fprintf(stderr, "locate: no database file %s.\n",
76 		    _PATH_FCODES);
77 		exit(1);
78 	}
79 	while (*++argv)
80 		fastfind(*argv);
81 	exit(0);
82 }
83 
84 fastfind(pathpart)
85 	char *pathpart;
86 {
87 	register char *p, *s;
88 	register int c;
89 	int count, found, globflag;
90 	char *cutoff, *patend, *q, *index(), *patprep();
91 	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
92 
93 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
94 		p[c] = getc(fp), s[c] = getc(fp);
95 
96 	p = pathpart;
97 	globflag = index(p, '*') || index(p, '?') || index(p, '[');
98 	patend = patprep(p);
99 
100 	found = 0;
101 	for (c = getc(fp), count = 0; c != EOF;) {
102 		count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
103 		/* overlay old path */
104 		for (p = path + count; (c = getc(fp)) > SWITCH;)
105 			if (c < PARITY)
106 				*p++ = c;
107 			else {		/* bigrams are parity-marked */
108 				c &= PARITY - 1;
109 				*p++ = bigram1[c], *p++ = bigram2[c];
110 			}
111 		*p-- = NULL;
112 		cutoff = (found ? path : path + count);
113 		for (found = 0, s = p; s >= cutoff; s--)
114 			if (*s == *patend) {	/* fast first char check */
115 				for (p = patend - 1, q = s - 1; *p != NULL;
116 				    p--, q--)
117 					if (*q != *p)
118 						break;
119 				if (*p == NULL) {	/* fast match success */
120 					found = 1;
121 					if (!globflag ||
122 					    fnmatch(pathpart, path, FNM_QUOTE))
123 						(void)printf("%s\n", path);
124 					break;
125 				}
126 			}
127 	}
128 }
129 
130 /*
131  * extract last glob-free subpattern in name for fast pre-match; prepend
132  * '\0' for backwards match; return end of new pattern
133  */
134 static char globfree[100];
135 
136 char *
137 patprep(name)
138 	char *name;
139 {
140 	register char *endmark, *p, *subp;
141 
142 	subp = globfree;
143 	*subp++ = '\0';
144 	p = name + strlen(name) - 1;
145 	/* skip trailing metacharacters (and [] ranges) */
146 	for (; p >= name; p--)
147 		if (index("*?", *p) == 0)
148 			break;
149 	if (p < name)
150 		p = name;
151 	if (*p == ']')
152 		for (p--; p >= name; p--)
153 			if (*p == '[') {
154 				p--;
155 				break;
156 			}
157 	if (p < name)
158 		p = name;
159 	/*
160 	 * if pattern has only metacharacters, check every path (force '/'
161 	 * search)
162 	 */
163 	if ((p == name) && index("?*[]", *p) != 0)
164 		*subp++ = '/';
165 	else {
166 		for (endmark = p; p >= name; p--)
167 			if (index("]*?", *p) != 0)
168 				break;
169 		for (++p;
170 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
171 			*subp++ = *p++;
172 	}
173 	*subp = '\0';
174 	return(--subp);
175 }
176