1 /* 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #ifndef lint 41 static const char copyright[] = 42 "@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ 43 @(#) Copyright (c) 1989, 1993\n\ 44 The Regents of the University of California. All rights reserved.\n"; 45 #endif /* not lint */ 46 47 #ifndef lint 48 #if 0 49 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 50 #endif 51 static const char rcsid[] = 52 "$FreeBSD$"; 53 #endif /* not lint */ 54 55 /* 56 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 57 * 58 * Locate scans a file list for the full pathname of a file given only part 59 * of the name. The list has been processed with with "front-compression" 60 * and bigram coding. Front compression reduces space by a factor of 4-5, 61 * bigram coding by a further 20-25%. 62 * 63 * The codes are: 64 * 65 * 0-28 likeliest differential counts + offset to make nonnegative 66 * 30 switch code for out-of-range count to follow in next word 67 * 31 an 8 bit char followed 68 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 69 * 32-127 single character (printable) ascii residue (ie, literal) 70 * 71 * A novel two-tiered string search technique is employed: 72 * 73 * First, a metacharacter-free subpattern and partial pathname is matched 74 * BACKWARDS to avoid full expansion of the pathname list. The time savings 75 * is 40-50% over forward matching, which cannot efficiently handle 76 * overlapped search patterns and compressed path residue. 77 * 78 * Then, the actual shell glob-style regular expression (if in this form) is 79 * matched against the candidate pathnames using the slower routines provided 80 * in the standard 'find'. 81 */ 82 83 #include <sys/param.h> 84 #include <ctype.h> 85 #include <err.h> 86 #include <fnmatch.h> 87 #include <locale.h> 88 #include <stdio.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <unistd.h> 92 93 #ifdef MMAP 94 # include <sys/types.h> 95 # include <sys/stat.h> 96 # include <sys/mman.h> 97 # include <fcntl.h> 98 #endif 99 100 101 #include "locate.h" 102 #include "pathnames.h" 103 104 #ifdef DEBUG 105 # include <sys/time.h> 106 # include <sys/types.h> 107 # include <sys/resource.h> 108 #endif 109 110 int f_mmap; /* use mmap */ 111 int f_icase; /* ignore case */ 112 int f_stdin; /* read database from stdin */ 113 int f_statistic; /* print statistic */ 114 int f_silent; /* suppress output, show only count of matches */ 115 int f_limit; /* limit number of output lines, 0 == infinite */ 116 u_int counter; /* counter for matches [-c] */ 117 char separator='\n'; /* line separator */ 118 119 120 void usage(void); 121 void statistic(FILE *, char *); 122 void fastfind(FILE *, char *, char *); 123 void fastfind_icase(FILE *, char *, char *); 124 void fastfind_mmap(char *, caddr_t, int, char *); 125 void fastfind_mmap_icase(char *, caddr_t, int, char *); 126 void search_mmap(char *, char **); 127 void search_fopen(char *, char **); 128 unsigned long cputime(void); 129 130 extern char **colon(char **, char*, char*); 131 extern void print_matches(u_int); 132 extern int getwm(caddr_t); 133 extern int getwf(FILE *); 134 extern u_char *tolower_word(u_char *); 135 extern int check_bigram_char(int); 136 extern char *patprep(char *); 137 138 int 139 main(int argc, char **argv) 140 { 141 register int ch; 142 char **dbv = NULL; 143 char *path_fcodes; /* locate database */ 144 #ifdef MMAP 145 f_mmap = 1; /* mmap is default */ 146 #endif 147 (void) setlocale(LC_ALL, ""); 148 149 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 150 switch(ch) { 151 case '0': /* 'find -print0' style */ 152 separator = '\0'; 153 break; 154 case 'S': /* statistic lines */ 155 f_statistic = 1; 156 break; 157 case 'l': /* limit number of output lines, 0 == infinite */ 158 f_limit = atoi(optarg); 159 break; 160 case 'd': /* database */ 161 dbv = colon(dbv, optarg, _PATH_FCODES); 162 break; 163 case 'i': /* ignore case */ 164 f_icase = 1; 165 break; 166 case 'm': /* mmap */ 167 #ifdef MMAP 168 f_mmap = 1; 169 #else 170 warnx("mmap(2) not implemented"); 171 #endif 172 break; 173 case 's': /* stdio lib */ 174 f_mmap = 0; 175 break; 176 case 'c': /* suppress output, show only count of matches */ 177 f_silent = 1; 178 break; 179 default: 180 usage(); 181 } 182 argv += optind; 183 argc -= optind; 184 185 /* to few arguments */ 186 if (argc < 1 && !(f_statistic)) 187 usage(); 188 189 /* no (valid) database as argument */ 190 if (dbv == NULL || *dbv == NULL) { 191 /* try to read database from environment */ 192 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 193 *path_fcodes == '\0') 194 /* use default database */ 195 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 196 else /* $LOCATE_PATH */ 197 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 198 } 199 200 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 201 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 202 myctype[ch] = tolower(ch); 203 204 /* foreach database ... */ 205 while((path_fcodes = *dbv) != NULL) { 206 dbv++; 207 208 if (!strcmp(path_fcodes, "-")) 209 f_stdin = 1; 210 else 211 f_stdin = 0; 212 213 #ifndef MMAP 214 f_mmap = 0; /* be paranoid */ 215 #endif 216 if (!f_mmap || f_stdin || f_statistic) 217 search_fopen(path_fcodes, argv); 218 else 219 search_mmap(path_fcodes, argv); 220 } 221 222 if (f_silent) 223 print_matches(counter); 224 exit(0); 225 } 226 227 228 /* 229 * Arguments: 230 * db database 231 * s search strings 232 */ 233 void 234 search_fopen(char *db, char **s) 235 { 236 FILE *fp; 237 #ifdef DEBUG 238 long t0; 239 #endif 240 241 /* can only read stdin once */ 242 if (f_stdin) { 243 fp = stdin; 244 if (*(s+1) != NULL) { 245 warnx("read database from stdin, use only `%s' as pattern", *s); 246 *(s+1) = NULL; 247 } 248 } 249 else if ((fp = fopen(db, "r")) == NULL) 250 err(1, "`%s'", db); 251 252 /* count only chars or lines */ 253 if (f_statistic) { 254 statistic(fp, db); 255 (void)fclose(fp); 256 return; 257 } 258 259 /* foreach search string ... */ 260 while(*s != NULL) { 261 #ifdef DEBUG 262 t0 = cputime(); 263 #endif 264 if (!f_stdin && 265 fseek(fp, (long)0, SEEK_SET) == -1) 266 err(1, "fseek to begin of ``%s''\n", db); 267 268 if (f_icase) 269 fastfind_icase(fp, *s, db); 270 else 271 fastfind(fp, *s, db); 272 #ifdef DEBUG 273 warnx("fastfind %ld ms", cputime () - t0); 274 #endif 275 s++; 276 } 277 (void)fclose(fp); 278 } 279 280 #ifdef MMAP 281 /* 282 * Arguments: 283 * db database 284 * s search strings 285 */ 286 void 287 search_mmap(char *db, char **s) 288 { 289 struct stat sb; 290 int fd; 291 caddr_t p; 292 off_t len; 293 #ifdef DEBUG 294 long t0; 295 #endif 296 if ((fd = open(db, O_RDONLY)) == -1 || 297 fstat(fd, &sb) == -1) 298 err(1, "`%s'", db); 299 len = sb.st_size; 300 if (len < (2*NBG)) 301 errx(1, 302 "database too small: %s\nRun /usr/libexec/locate.updatedb", 303 db); 304 305 if ((p = mmap((caddr_t)0, (size_t)len, 306 PROT_READ, MAP_SHARED, 307 fd, (off_t)0)) == MAP_FAILED) 308 err(1, "mmap ``%s''", db); 309 310 /* foreach search string ... */ 311 while (*s != NULL) { 312 #ifdef DEBUG 313 t0 = cputime(); 314 #endif 315 if (f_icase) 316 fastfind_mmap_icase(*s, p, (int)len, db); 317 else 318 fastfind_mmap(*s, p, (int)len, db); 319 #ifdef DEBUG 320 warnx("fastfind %ld ms", cputime () - t0); 321 #endif 322 s++; 323 } 324 325 if (munmap(p, (size_t)len) == -1) 326 warn("munmap %s\n", db); 327 328 (void)close(fd); 329 } 330 #endif /* MMAP */ 331 332 #ifdef DEBUG 333 unsigned long 334 cputime () 335 { 336 struct rusage rus; 337 338 getrusage(RUSAGE_SELF, &rus); 339 return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); 340 } 341 #endif /* DEBUG */ 342 343 void 344 usage () 345 { 346 (void)fprintf(stderr, 347 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 348 (void)fprintf(stderr, 349 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 350 exit(1); 351 } 352 353 354 /* load fastfind functions */ 355 356 /* statistic */ 357 /* fastfind_mmap, fastfind_mmap_icase */ 358 #ifdef MMAP 359 #undef FF_MMAP 360 #undef FF_ICASE 361 362 #define FF_MMAP 363 #include "fastfind.c" 364 #define FF_ICASE 365 #include "fastfind.c" 366 #endif /* MMAP */ 367 368 /* fopen */ 369 /* fastfind, fastfind_icase */ 370 #undef FF_MMAP 371 #undef FF_ICASE 372 #include "fastfind.c" 373 #define FF_ICASE 374 #include "fastfind.c" 375