1 /* $NetBSD: locate.c,v 1.9 1999/08/16 01:41:17 sjg Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * James A. Woods. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ 42 The Regents of the University of California. All rights reserved.\n"); 43 #endif /* not lint */ 44 45 #ifndef lint 46 #if 0 47 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 48 #endif 49 __RCSID("$NetBSD: locate.c,v 1.9 1999/08/16 01:41:17 sjg Exp $"); 50 #endif /* not lint */ 51 52 /* 53 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 54 * 55 * Locate scans a file list for the full pathname of a file given only part 56 * of the name. The list has been processed with with "front-compression" 57 * and bigram coding. Front compression reduces space by a factor of 4-5, 58 * bigram coding by a further 20-25%. 59 * 60 * The codes are: 61 * 62 * 0-28 likeliest differential counts + offset to make nonnegative 63 * 30 switch code for out-of-range count to follow in next word 64 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 65 * 32-127 single character (printable) ascii residue (ie, literal) 66 * 67 * A novel two-tiered string search technique is employed: 68 * 69 * First, a metacharacter-free subpattern and partial pathname is matched 70 * BACKWARDS to avoid full expansion of the pathname list. The time savings 71 * is 40-50% over forward matching, which cannot efficiently handle 72 * overlapped search patterns and compressed path residue. 73 * 74 * Then, the actual shell glob-style regular expression (if in this form) is 75 * matched against the candidate pathnames using the slower routines provided 76 * in the standard 'find'. 77 */ 78 79 #include <sys/param.h> 80 81 #include <fnmatch.h> 82 #include <unistd.h> 83 #include <stdio.h> 84 #include <string.h> 85 #include <stdlib.h> 86 #include <sys/queue.h> 87 88 #include "locate.h" 89 #include "pathnames.h" 90 91 92 struct locate_db { 93 LIST_ENTRY(locate_db) db_link; 94 FILE *db_fp; 95 }; 96 LIST_HEAD(db_list, locate_db) db_list; 97 98 #ifndef NEW 99 # define NEW(type) (type *) malloc(sizeof (type)) 100 #endif 101 102 void add_db __P((char *)); 103 int fastfind __P((FILE *, char *)); 104 int main __P((int, char **)); 105 char *patprep __P((char *)); 106 107 108 void 109 add_db(path) 110 char *path; 111 { 112 FILE *fp; 113 struct locate_db *dbp; 114 115 if (!(path && *path)) 116 path = _PATH_FCODES; 117 if ((fp = fopen(path, "r"))) { 118 dbp = NEW(struct locate_db); 119 dbp->db_fp = fp; 120 LIST_INSERT_HEAD(&db_list, dbp, db_link); 121 } else { 122 (void)fprintf(stderr, "locate: no database file %s.\n", path); 123 } 124 } 125 126 int 127 main(argc, argv) 128 int argc; 129 char *argv[]; 130 { 131 char *locate_path = getenv("LOCATE_PATH"); 132 char *cp; 133 struct locate_db *dbp; 134 int c; 135 int found = 0; 136 137 LIST_INIT(&db_list); 138 139 while ((c = getopt(argc, argv, "d:")) != EOF) { 140 switch (c) { 141 case 'd': 142 locate_path = optarg; 143 break; 144 } 145 } 146 if (argc <= optind) { 147 (void)fprintf(stderr, "usage: locate [-d dbpath] pattern ...\n"); 148 exit(1); 149 } 150 if (!locate_path) 151 locate_path = _PATH_FCODES; 152 if ((cp = strrchr(locate_path, ':'))) { 153 locate_path = strdup(locate_path); 154 while ((cp = strrchr(locate_path, ':'))) { 155 *cp++ = '\0'; 156 add_db(cp); 157 } 158 } 159 add_db(locate_path); 160 if (db_list.lh_first == NULL) 161 exit(1); 162 for (; optind < argc; ++optind) { 163 for (dbp = db_list.lh_first; dbp != NULL; 164 dbp = dbp->db_link.le_next) { 165 found |= fastfind(dbp->db_fp, argv[optind]); 166 } 167 } 168 exit(found == 0); 169 } 170 171 int 172 fastfind(fp, pathpart) 173 FILE *fp; 174 char *pathpart; 175 { 176 char *p, *s; 177 int c; 178 int count, found, globflag, printed; 179 char *cutoff, *patend, *q; 180 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 181 182 rewind(fp); 183 184 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) 185 p[c] = getc(fp), s[c] = getc(fp); 186 187 p = pathpart; 188 globflag = strchr(p, '*') || strchr(p, '?') || strchr(p, '['); 189 patend = patprep(p); 190 191 found = printed = 0; 192 for (c = getc(fp), count = 0; c != EOF;) { 193 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; 194 /* overlay old path */ 195 for (p = path + count; (c = getc(fp)) > SWITCH;) 196 if (c < PARITY) 197 *p++ = c; 198 else { /* bigrams are parity-marked */ 199 c &= PARITY - 1; 200 *p++ = bigram1[c], *p++ = bigram2[c]; 201 } 202 *p-- = '\0'; 203 cutoff = (found ? path : path + count); 204 for (found = 0, s = p; s >= cutoff; s--) 205 if (*s == *patend) { /* fast first char check */ 206 for (p = patend - 1, q = s - 1; *p != '\0'; 207 p--, q--) 208 if (*q != *p) 209 break; 210 if (*p == '\0') { /* fast match success */ 211 found = 1; 212 if (!globflag || 213 !fnmatch(pathpart, path, 0)) { 214 (void)printf("%s\n", path); 215 ++printed; 216 } 217 break; 218 } 219 } 220 } 221 return (printed); 222 } 223 224 /* 225 * extract last glob-free subpattern in name for fast pre-match; prepend 226 * '\0' for backwards match; return end of new pattern 227 */ 228 static char globfree[100]; 229 230 char * 231 patprep(name) 232 char *name; 233 { 234 char *endmark, *p, *subp; 235 236 subp = globfree; 237 *subp++ = '\0'; 238 p = name + strlen(name) - 1; 239 /* skip trailing metacharacters (and [] ranges) */ 240 for (; p >= name; p--) 241 if (strchr("*?", *p) == 0) 242 break; 243 if (p < name) 244 p = name; 245 if (*p == ']') 246 for (p--; p >= name; p--) 247 if (*p == '[') { 248 p--; 249 break; 250 } 251 if (p < name) 252 p = name; 253 /* 254 * if pattern has only metacharacters, check every path (force '/' 255 * search) 256 */ 257 if ((p == name) && strchr("?*[]", *p) != 0) 258 *subp++ = '/'; 259 else { 260 for (endmark = p; p >= name; p--) 261 if (strchr("]*?", *p) != 0) 262 break; 263 for (++p; 264 (p <= endmark) && subp < (globfree + sizeof(globfree));) 265 *subp++ = *p++; 266 } 267 *subp = '\0'; 268 return(--subp); 269 } 270