1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org> 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 /* 37 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 38 * 39 * Locate scans a file list for the full pathname of a file given only part 40 * of the name. The list has been processed with with "front-compression" 41 * and bigram coding. Front compression reduces space by a factor of 4-5, 42 * bigram coding by a further 20-25%. 43 * 44 * The codes are: 45 * 46 * 0-28 likeliest differential counts + offset to make nonnegative 47 * 30 switch code for out-of-range count to follow in next word 48 * 31 an 8 bit char followed 49 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 50 * 32-127 single character (printable) ascii residue (ie, literal) 51 * 52 * A novel two-tiered string search technique is employed: 53 * 54 * First, a metacharacter-free subpattern and partial pathname is matched 55 * BACKWARDS to avoid full expansion of the pathname list. The time savings 56 * is 40-50% over forward matching, which cannot efficiently handle 57 * overlapped search patterns and compressed path residue. 58 * 59 * Then, the actual shell glob-style regular expression (if in this form) is 60 * matched against the candidate pathnames using the slower routines provided 61 * in the standard 'find'. 62 */ 63 64 #include <sys/param.h> 65 #include <ctype.h> 66 #include <err.h> 67 #include <fnmatch.h> 68 #include <locale.h> 69 #include <stdio.h> 70 #include <stdlib.h> 71 #include <string.h> 72 #include <unistd.h> 73 74 #ifdef MMAP 75 # include <sys/types.h> 76 # include <sys/stat.h> 77 # include <sys/mman.h> 78 # include <fcntl.h> 79 #endif 80 81 #include "locate.h" 82 #include "pathnames.h" 83 84 85 int f_mmap; /* use mmap */ 86 int f_icase; /* ignore case */ 87 int f_stdin; /* read database from stdin */ 88 int f_statistic; /* print statistic */ 89 int f_silent; /* suppress output, show only count of matches */ 90 long f_limit; /* limit number of output lines, 0 == infinite */ 91 long counter; /* counter for matches [-c] */ 92 char separator='\n'; /* line separator */ 93 94 u_char myctype[UCHAR_MAX + 1]; 95 96 void usage(void); 97 void statistic(FILE *, char *); 98 void fastfind(FILE *, char *, char *); 99 void fastfind_icase(FILE *, char *, char *); 100 void fastfind_mmap(char *, caddr_t, off_t, char *); 101 void fastfind_mmap_icase(char *, caddr_t, off_t, char *); 102 void search_mmap(char *, char **); 103 void search_fopen(char *, char **); 104 unsigned long cputime(void); 105 106 extern char **colon(char **, char*, char*); 107 extern int getwm(caddr_t); 108 extern int getwf(FILE *); 109 extern u_char *tolower_word(u_char *); 110 extern int check_bigram_char(int); 111 extern char *patprep(char *); 112 extern void rebuild_message(char *db); 113 extern int check_size(char *db); 114 115 int 116 main(int argc, char **argv) 117 { 118 int ch; 119 char **dbv = NULL; 120 char *path_fcodes; /* locate database */ 121 #ifdef MMAP 122 f_mmap = 1; /* mmap is default */ 123 #endif 124 (void) setlocale(LC_ALL, ""); 125 126 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 127 switch(ch) { 128 case '0': /* 'find -print0' style */ 129 separator = '\0'; 130 break; 131 case 'S': /* statistic lines */ 132 f_statistic = 1; 133 break; 134 case 'l': /* limit number of output lines, 0 == infinite */ 135 f_limit = atol(optarg); 136 if (f_limit < 0 ) 137 errx(1, "invalid argument for -l: '%s'", optarg); 138 break; 139 case 'd': /* database */ 140 dbv = colon(dbv, optarg, _PATH_FCODES); 141 break; 142 case 'i': /* ignore case */ 143 f_icase = 1; 144 break; 145 case 'm': /* mmap */ 146 #ifdef MMAP 147 f_mmap = 1; 148 #else 149 warnx("mmap(2) not implemented"); 150 #endif 151 break; 152 case 's': /* stdio lib */ 153 f_mmap = 0; 154 break; 155 case 'c': /* suppress output, show only count of matches */ 156 f_silent = 1; 157 break; 158 default: 159 usage(); 160 } 161 argv += optind; 162 argc -= optind; 163 164 /* to few arguments */ 165 if (argc < 1 && !(f_statistic)) 166 usage(); 167 168 /* no (valid) database as argument */ 169 if (dbv == NULL || *dbv == NULL) { 170 /* try to read database from environment */ 171 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 172 *path_fcodes == '\0') 173 /* use default database */ 174 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 175 else /* $LOCATE_PATH */ 176 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 177 } 178 179 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 180 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 181 myctype[ch] = tolower(ch); 182 183 /* foreach database ... */ 184 while((path_fcodes = *dbv) != NULL) { 185 dbv++; 186 187 if (!strcmp(path_fcodes, "-")) 188 f_stdin = 1; 189 else 190 f_stdin = 0; 191 192 #ifndef MMAP 193 f_mmap = 0; /* be paranoid */ 194 #endif 195 if (!f_mmap || f_stdin || f_statistic) 196 search_fopen(path_fcodes, argv); 197 else 198 search_mmap(path_fcodes, argv); 199 } 200 201 if (f_silent) 202 printf("%ld\n", counter); 203 exit(0); 204 } 205 206 /* 207 * Arguments: 208 * db database 209 * s search strings 210 */ 211 void 212 search_fopen(char *db, char **s) 213 { 214 FILE *fp; 215 216 /* can only read stdin once */ 217 if (f_stdin) { 218 fp = stdin; 219 if (*(s+1) != NULL) { 220 warnx("read database from stdin, use only `%s' as pattern", *s); 221 *(s+1) = NULL; 222 } 223 } 224 else { 225 if (!check_size(db)) 226 exit(1); 227 228 if ((fp = fopen(db, "r")) == NULL) { 229 warn("`%s'", db); 230 rebuild_message(db); 231 exit(1); 232 } 233 } 234 235 /* count only chars or lines */ 236 if (f_statistic) { 237 statistic(fp, db); 238 (void)fclose(fp); 239 return; 240 } 241 242 /* foreach search string ... */ 243 while(*s != NULL) { 244 if (!f_stdin && 245 fseek(fp, (long)0, SEEK_SET) == -1) 246 err(1, "fseek to begin of ``%s''\n", db); 247 248 if (f_icase) 249 fastfind_icase(fp, *s, db); 250 else 251 fastfind(fp, *s, db); 252 s++; 253 } 254 (void)fclose(fp); 255 } 256 257 #ifdef MMAP 258 /* 259 * Arguments: 260 * db database 261 * s search strings 262 */ 263 void 264 search_mmap(char *db, char **s) 265 { 266 struct stat sb; 267 int fd; 268 caddr_t p; 269 off_t len; 270 271 if (!check_size(db)) 272 exit(1); 273 274 if (stat(db, &sb) == -1) 275 err(1, "stat"); 276 277 len = sb.st_size; 278 279 if ((fd = open(db, O_RDONLY)) == -1) { 280 warn("%s", db); 281 rebuild_message(db); 282 exit(1); 283 } 284 285 if ((p = mmap((caddr_t)0, (size_t)len, 286 PROT_READ, MAP_SHARED, 287 fd, (off_t)0)) == MAP_FAILED) 288 err(1, "mmap ``%s''", db); 289 290 /* foreach search string ... */ 291 while (*s != NULL) { 292 if (f_icase) 293 fastfind_mmap_icase(*s, p, len, db); 294 else 295 fastfind_mmap(*s, p, len, db); 296 s++; 297 } 298 299 if (munmap(p, (size_t)len) == -1) 300 warn("munmap %s\n", db); 301 302 (void)close(fd); 303 } 304 #endif /* MMAP */ 305 306 void 307 usage () 308 { 309 (void)fprintf(stderr, 310 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 311 (void)fprintf(stderr, 312 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 313 exit(1); 314 } 315 316 317 /* load fastfind functions */ 318 319 /* statistic */ 320 /* fastfind_mmap, fastfind_mmap_icase */ 321 #ifdef MMAP 322 #undef FF_MMAP 323 #undef FF_ICASE 324 325 #define FF_MMAP 326 #include "fastfind.c" 327 #define FF_ICASE 328 #include "fastfind.c" 329 #endif /* MMAP */ 330 331 /* fopen */ 332 /* fastfind, fastfind_icase */ 333 #undef FF_MMAP 334 #undef FF_ICASE 335 #include "fastfind.c" 336 #define FF_ICASE 337 #include "fastfind.c" 338