1 /* 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #ifndef lint 41 static const char copyright[] = 42 "@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ 43 @(#) Copyright (c) 1989, 1993\n\ 44 The Regents of the University of California. All rights reserved.\n"; 45 #endif /* not lint */ 46 47 #ifndef lint 48 #if 0 49 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 50 #endif 51 static const char rcsid[] = 52 "$FreeBSD$"; 53 #endif /* not lint */ 54 55 /* 56 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 57 * 58 * Locate scans a file list for the full pathname of a file given only part 59 * of the name. The list has been processed with with "front-compression" 60 * and bigram coding. Front compression reduces space by a factor of 4-5, 61 * bigram coding by a further 20-25%. 62 * 63 * The codes are: 64 * 65 * 0-28 likeliest differential counts + offset to make nonnegative 66 * 30 switch code for out-of-range count to follow in next word 67 * 31 an 8 bit char followed 68 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 69 * 32-127 single character (printable) ascii residue (ie, literal) 70 * 71 * A novel two-tiered string search technique is employed: 72 * 73 * First, a metacharacter-free subpattern and partial pathname is matched 74 * BACKWARDS to avoid full expansion of the pathname list. The time savings 75 * is 40-50% over forward matching, which cannot efficiently handle 76 * overlapped search patterns and compressed path residue. 77 * 78 * Then, the actual shell glob-style regular expression (if in this form) is 79 * matched against the candidate pathnames using the slower routines provided 80 * in the standard 'find'. 81 */ 82 83 #include <sys/param.h> 84 #include <ctype.h> 85 #include <err.h> 86 #include <fnmatch.h> 87 #include <locale.h> 88 #include <stdio.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <unistd.h> 92 93 #ifdef MMAP 94 # include <sys/types.h> 95 # include <sys/stat.h> 96 # include <sys/mman.h> 97 # include <fcntl.h> 98 #endif 99 100 101 #include "locate.h" 102 #include "pathnames.h" 103 104 #ifdef DEBUG 105 # include <sys/time.h> 106 # include <sys/types.h> 107 # include <sys/resource.h> 108 #endif 109 110 int f_mmap; /* use mmap */ 111 int f_icase; /* ignore case */ 112 int f_stdin; /* read database from stdin */ 113 int f_statistic; /* print statistic */ 114 int f_silent; /* suppress output, show only count of matches */ 115 int f_limit; /* limit number of output lines, 0 == infinite */ 116 u_int counter; /* counter for matches [-c] */ 117 char separator='\n'; /* line separator */ 118 119 u_char myctype[UCHAR_MAX + 1]; 120 121 void usage(void); 122 void statistic(FILE *, char *); 123 void fastfind(FILE *, char *, char *); 124 void fastfind_icase(FILE *, char *, char *); 125 void fastfind_mmap(char *, caddr_t, int, char *); 126 void fastfind_mmap_icase(char *, caddr_t, int, char *); 127 void search_mmap(char *, char **); 128 void search_fopen(char *, char **); 129 unsigned long cputime(void); 130 131 extern char **colon(char **, char*, char*); 132 extern void print_matches(u_int); 133 extern int getwm(caddr_t); 134 extern int getwf(FILE *); 135 extern u_char *tolower_word(u_char *); 136 extern int check_bigram_char(int); 137 extern char *patprep(char *); 138 139 int 140 main(int argc, char **argv) 141 { 142 register int ch; 143 char **dbv = NULL; 144 char *path_fcodes; /* locate database */ 145 #ifdef MMAP 146 f_mmap = 1; /* mmap is default */ 147 #endif 148 (void) setlocale(LC_ALL, ""); 149 150 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 151 switch(ch) { 152 case '0': /* 'find -print0' style */ 153 separator = '\0'; 154 break; 155 case 'S': /* statistic lines */ 156 f_statistic = 1; 157 break; 158 case 'l': /* limit number of output lines, 0 == infinite */ 159 f_limit = atoi(optarg); 160 break; 161 case 'd': /* database */ 162 dbv = colon(dbv, optarg, _PATH_FCODES); 163 break; 164 case 'i': /* ignore case */ 165 f_icase = 1; 166 break; 167 case 'm': /* mmap */ 168 #ifdef MMAP 169 f_mmap = 1; 170 #else 171 warnx("mmap(2) not implemented"); 172 #endif 173 break; 174 case 's': /* stdio lib */ 175 f_mmap = 0; 176 break; 177 case 'c': /* suppress output, show only count of matches */ 178 f_silent = 1; 179 break; 180 default: 181 usage(); 182 } 183 argv += optind; 184 argc -= optind; 185 186 /* to few arguments */ 187 if (argc < 1 && !(f_statistic)) 188 usage(); 189 190 /* no (valid) database as argument */ 191 if (dbv == NULL || *dbv == NULL) { 192 /* try to read database from environment */ 193 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 194 *path_fcodes == '\0') 195 /* use default database */ 196 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 197 else /* $LOCATE_PATH */ 198 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 199 } 200 201 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 202 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 203 myctype[ch] = tolower(ch); 204 205 /* foreach database ... */ 206 while((path_fcodes = *dbv) != NULL) { 207 dbv++; 208 209 if (!strcmp(path_fcodes, "-")) 210 f_stdin = 1; 211 else 212 f_stdin = 0; 213 214 #ifndef MMAP 215 f_mmap = 0; /* be paranoid */ 216 #endif 217 if (!f_mmap || f_stdin || f_statistic) 218 search_fopen(path_fcodes, argv); 219 else 220 search_mmap(path_fcodes, argv); 221 } 222 223 if (f_silent) 224 print_matches(counter); 225 exit(0); 226 } 227 228 229 /* 230 * Arguments: 231 * db database 232 * s search strings 233 */ 234 void 235 search_fopen(char *db, char **s) 236 { 237 FILE *fp; 238 #ifdef DEBUG 239 long t0; 240 #endif 241 242 /* can only read stdin once */ 243 if (f_stdin) { 244 fp = stdin; 245 if (*(s+1) != NULL) { 246 warnx("read database from stdin, use only `%s' as pattern", *s); 247 *(s+1) = NULL; 248 } 249 } 250 else if ((fp = fopen(db, "r")) == NULL) 251 err(1, "`%s'", db); 252 253 /* count only chars or lines */ 254 if (f_statistic) { 255 statistic(fp, db); 256 (void)fclose(fp); 257 return; 258 } 259 260 /* foreach search string ... */ 261 while(*s != NULL) { 262 #ifdef DEBUG 263 t0 = cputime(); 264 #endif 265 if (!f_stdin && 266 fseek(fp, (long)0, SEEK_SET) == -1) 267 err(1, "fseek to begin of ``%s''\n", db); 268 269 if (f_icase) 270 fastfind_icase(fp, *s, db); 271 else 272 fastfind(fp, *s, db); 273 #ifdef DEBUG 274 warnx("fastfind %ld ms", cputime () - t0); 275 #endif 276 s++; 277 } 278 (void)fclose(fp); 279 } 280 281 #ifdef MMAP 282 /* 283 * Arguments: 284 * db database 285 * s search strings 286 */ 287 void 288 search_mmap(char *db, char **s) 289 { 290 struct stat sb; 291 int fd; 292 caddr_t p; 293 off_t len; 294 #ifdef DEBUG 295 long t0; 296 #endif 297 if ((fd = open(db, O_RDONLY)) == -1 || 298 fstat(fd, &sb) == -1) 299 err(1, "`%s'", db); 300 len = sb.st_size; 301 if (len < (2*NBG)) 302 errx(1, 303 "database too small: %s\nRun /usr/libexec/locate.updatedb", 304 db); 305 306 if ((p = mmap((caddr_t)0, (size_t)len, 307 PROT_READ, MAP_SHARED, 308 fd, (off_t)0)) == MAP_FAILED) 309 err(1, "mmap ``%s''", db); 310 311 /* foreach search string ... */ 312 while (*s != NULL) { 313 #ifdef DEBUG 314 t0 = cputime(); 315 #endif 316 if (f_icase) 317 fastfind_mmap_icase(*s, p, (int)len, db); 318 else 319 fastfind_mmap(*s, p, (int)len, db); 320 #ifdef DEBUG 321 warnx("fastfind %ld ms", cputime () - t0); 322 #endif 323 s++; 324 } 325 326 if (munmap(p, (size_t)len) == -1) 327 warn("munmap %s\n", db); 328 329 (void)close(fd); 330 } 331 #endif /* MMAP */ 332 333 #ifdef DEBUG 334 unsigned long 335 cputime () 336 { 337 struct rusage rus; 338 339 getrusage(RUSAGE_SELF, &rus); 340 return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); 341 } 342 #endif /* DEBUG */ 343 344 void 345 usage () 346 { 347 (void)fprintf(stderr, 348 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 349 (void)fprintf(stderr, 350 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 351 exit(1); 352 } 353 354 355 /* load fastfind functions */ 356 357 /* statistic */ 358 /* fastfind_mmap, fastfind_mmap_icase */ 359 #ifdef MMAP 360 #undef FF_MMAP 361 #undef FF_ICASE 362 363 #define FF_MMAP 364 #include "fastfind.c" 365 #define FF_ICASE 366 #include "fastfind.c" 367 #endif /* MMAP */ 368 369 /* fopen */ 370 /* fastfind, fastfind_icase */ 371 #undef FF_MMAP 372 #undef FF_ICASE 373 #include "fastfind.c" 374 #define FF_ICASE 375 #include "fastfind.c" 376