1 /* 2 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 3 * Copyright (c) 1989, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * James A. Woods. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 static const char copyright[] = 40 "@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ 41 @(#) Copyright (c) 1989, 1993\n\ 42 The Regents of the University of California. All rights reserved.\n"; 43 #endif /* not lint */ 44 45 #ifndef lint 46 #if 0 47 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 48 #endif 49 static const char rcsid[] = 50 "$FreeBSD$"; 51 #endif /* not lint */ 52 53 /* 54 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 55 * 56 * Locate scans a file list for the full pathname of a file given only part 57 * of the name. The list has been processed with with "front-compression" 58 * and bigram coding. Front compression reduces space by a factor of 4-5, 59 * bigram coding by a further 20-25%. 60 * 61 * The codes are: 62 * 63 * 0-28 likeliest differential counts + offset to make nonnegative 64 * 30 switch code for out-of-range count to follow in next word 65 * 31 an 8 bit char followed 66 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 67 * 32-127 single character (printable) ascii residue (ie, literal) 68 * 69 * A novel two-tiered string search technique is employed: 70 * 71 * First, a metacharacter-free subpattern and partial pathname is matched 72 * BACKWARDS to avoid full expansion of the pathname list. The time savings 73 * is 40-50% over forward matching, which cannot efficiently handle 74 * overlapped search patterns and compressed path residue. 75 * 76 * Then, the actual shell glob-style regular expression (if in this form) is 77 * matched against the candidate pathnames using the slower routines provided 78 * in the standard 'find'. 79 */ 80 81 #include <sys/param.h> 82 #include <ctype.h> 83 #include <err.h> 84 #include <fnmatch.h> 85 #include <locale.h> 86 #include <stdio.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <unistd.h> 90 91 #ifdef MMAP 92 # include <sys/types.h> 93 # include <sys/stat.h> 94 # include <sys/mman.h> 95 # include <fcntl.h> 96 #endif 97 98 99 #include "locate.h" 100 #include "pathnames.h" 101 102 #ifdef DEBUG 103 # include <sys/time.h> 104 # include <sys/types.h> 105 # include <sys/resource.h> 106 #endif 107 108 int f_mmap; /* use mmap */ 109 int f_icase; /* ignore case */ 110 int f_stdin; /* read database from stdin */ 111 int f_statistic; /* print statistic */ 112 int f_silent; /* suppress output, show only count of matches */ 113 int f_limit; /* limit number of output lines, 0 == infinite */ 114 u_int counter; /* counter for matches [-c] */ 115 char separator='\n'; /* line separator */ 116 117 118 void usage(void); 119 void statistic(FILE *, char *); 120 void fastfind(FILE *, char *, char *); 121 void fastfind_icase(FILE *, char *, char *); 122 void fastfind_mmap(char *, caddr_t, int, char *); 123 void fastfind_mmap_icase(char *, caddr_t, int, char *); 124 void search_mmap(char *, char **); 125 void search_fopen(char *, char **); 126 unsigned long cputime(void); 127 128 extern char **colon(char **, char*, char*); 129 extern void print_matches(u_int); 130 extern int getwm(caddr_t); 131 extern int getwf(FILE *); 132 extern u_char *tolower_word(u_char *); 133 extern int check_bigram_char(int); 134 extern char *patprep(char *); 135 136 int 137 main(int argc, char **argv) 138 { 139 register int ch; 140 char **dbv = NULL; 141 char *path_fcodes; /* locate database */ 142 #ifdef MMAP 143 f_mmap = 1; /* mmap is default */ 144 #endif 145 (void) setlocale(LC_ALL, ""); 146 147 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 148 switch(ch) { 149 case '0': /* 'find -print0' style */ 150 separator = '\0'; 151 break; 152 case 'S': /* statistic lines */ 153 f_statistic = 1; 154 break; 155 case 'l': /* limit number of output lines, 0 == infinite */ 156 f_limit = atoi(optarg); 157 break; 158 case 'd': /* database */ 159 dbv = colon(dbv, optarg, _PATH_FCODES); 160 break; 161 case 'i': /* ignore case */ 162 f_icase = 1; 163 break; 164 case 'm': /* mmap */ 165 #ifdef MMAP 166 f_mmap = 1; 167 #else 168 warnx("mmap(2) not implemented"); 169 #endif 170 break; 171 case 's': /* stdio lib */ 172 f_mmap = 0; 173 break; 174 case 'c': /* suppress output, show only count of matches */ 175 f_silent = 1; 176 break; 177 default: 178 usage(); 179 } 180 argv += optind; 181 argc -= optind; 182 183 /* to few arguments */ 184 if (argc < 1 && !(f_statistic)) 185 usage(); 186 187 /* no (valid) database as argument */ 188 if (dbv == NULL || *dbv == NULL) { 189 /* try to read database from environment */ 190 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 191 *path_fcodes == '\0') 192 /* use default database */ 193 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 194 else /* $LOCATE_PATH */ 195 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 196 } 197 198 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 199 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 200 myctype[ch] = tolower(ch); 201 202 /* foreach database ... */ 203 while((path_fcodes = *dbv) != NULL) { 204 dbv++; 205 206 if (!strcmp(path_fcodes, "-")) 207 f_stdin = 1; 208 else 209 f_stdin = 0; 210 211 #ifndef MMAP 212 f_mmap = 0; /* be paranoid */ 213 #endif 214 if (!f_mmap || f_stdin || f_statistic) 215 search_fopen(path_fcodes, argv); 216 else 217 search_mmap(path_fcodes, argv); 218 } 219 220 if (f_silent) 221 print_matches(counter); 222 exit(0); 223 } 224 225 226 /* 227 * Arguments: 228 * db database 229 * s search strings 230 */ 231 void 232 search_fopen(char *db, char **s) 233 { 234 FILE *fp; 235 #ifdef DEBUG 236 long t0; 237 #endif 238 239 /* can only read stdin once */ 240 if (f_stdin) { 241 fp = stdin; 242 if (*(s+1) != NULL) { 243 warnx("read database from stdin, use only `%s' as pattern", *s); 244 *(s+1) = NULL; 245 } 246 } 247 else if ((fp = fopen(db, "r")) == NULL) 248 err(1, "`%s'", db); 249 250 /* count only chars or lines */ 251 if (f_statistic) { 252 statistic(fp, db); 253 (void)fclose(fp); 254 return; 255 } 256 257 /* foreach search string ... */ 258 while(*s != NULL) { 259 #ifdef DEBUG 260 t0 = cputime(); 261 #endif 262 if (!f_stdin && 263 fseek(fp, (long)0, SEEK_SET) == -1) 264 err(1, "fseek to begin of ``%s''\n", db); 265 266 if (f_icase) 267 fastfind_icase(fp, *s, db); 268 else 269 fastfind(fp, *s, db); 270 #ifdef DEBUG 271 warnx("fastfind %ld ms", cputime () - t0); 272 #endif 273 s++; 274 } 275 (void)fclose(fp); 276 } 277 278 #ifdef MMAP 279 /* 280 * Arguments: 281 * db database 282 * s search strings 283 */ 284 void 285 search_mmap(char *db, char **s) 286 { 287 struct stat sb; 288 int fd; 289 caddr_t p; 290 off_t len; 291 #ifdef DEBUG 292 long t0; 293 #endif 294 if ((fd = open(db, O_RDONLY)) == -1 || 295 fstat(fd, &sb) == -1) 296 err(1, "`%s'", db); 297 len = sb.st_size; 298 if (len < (2*NBG)) 299 errx(1, 300 "database too small: %s\nRun /usr/libexec/locate.updatedb", 301 db); 302 303 if ((p = mmap((caddr_t)0, (size_t)len, 304 PROT_READ, MAP_SHARED, 305 fd, (off_t)0)) == MAP_FAILED) 306 err(1, "mmap ``%s''", db); 307 308 /* foreach search string ... */ 309 while (*s != NULL) { 310 #ifdef DEBUG 311 t0 = cputime(); 312 #endif 313 if (f_icase) 314 fastfind_mmap_icase(*s, p, (int)len, db); 315 else 316 fastfind_mmap(*s, p, (int)len, db); 317 #ifdef DEBUG 318 warnx("fastfind %ld ms", cputime () - t0); 319 #endif 320 s++; 321 } 322 323 if (munmap(p, (size_t)len) == -1) 324 warn("munmap %s\n", db); 325 326 (void)close(fd); 327 } 328 #endif /* MMAP */ 329 330 #ifdef DEBUG 331 unsigned long 332 cputime () 333 { 334 struct rusage rus; 335 336 getrusage(RUSAGE_SELF, &rus); 337 return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); 338 } 339 #endif /* DEBUG */ 340 341 void 342 usage () 343 { 344 (void)fprintf(stderr, 345 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 346 (void)fprintf(stderr, 347 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 348 exit(1); 349 } 350 351 352 /* load fastfind functions */ 353 354 /* statistic */ 355 /* fastfind_mmap, fastfind_mmap_icase */ 356 #ifdef MMAP 357 #undef FF_MMAP 358 #undef FF_ICASE 359 360 #define FF_MMAP 361 #include "fastfind.c" 362 #define FF_ICASE 363 #include "fastfind.c" 364 #endif /* MMAP */ 365 366 /* fopen */ 367 /* fastfind, fastfind_icase */ 368 #undef FF_MMAP 369 #undef FF_ICASE 370 #include "fastfind.c" 371 #define FF_ICASE 372 #include "fastfind.c" 373