1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org> 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * James A. Woods. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 static const char copyright[] = 38 "@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ 39 @(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif /* not lint */ 42 43 #ifndef lint 44 #if 0 45 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 46 #endif 47 static const char rcsid[] = 48 "$FreeBSD$"; 49 #endif /* not lint */ 50 51 /* 52 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 53 * 54 * Locate scans a file list for the full pathname of a file given only part 55 * of the name. The list has been processed with with "front-compression" 56 * and bigram coding. Front compression reduces space by a factor of 4-5, 57 * bigram coding by a further 20-25%. 58 * 59 * The codes are: 60 * 61 * 0-28 likeliest differential counts + offset to make nonnegative 62 * 30 switch code for out-of-range count to follow in next word 63 * 31 an 8 bit char followed 64 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 65 * 32-127 single character (printable) ascii residue (ie, literal) 66 * 67 * A novel two-tiered string search technique is employed: 68 * 69 * First, a metacharacter-free subpattern and partial pathname is matched 70 * BACKWARDS to avoid full expansion of the pathname list. The time savings 71 * is 40-50% over forward matching, which cannot efficiently handle 72 * overlapped search patterns and compressed path residue. 73 * 74 * Then, the actual shell glob-style regular expression (if in this form) is 75 * matched against the candidate pathnames using the slower routines provided 76 * in the standard 'find'. 77 */ 78 79 #include <sys/param.h> 80 #include <ctype.h> 81 #include <err.h> 82 #include <fnmatch.h> 83 #include <locale.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <string.h> 87 #include <unistd.h> 88 89 #ifdef MMAP 90 # include <sys/types.h> 91 # include <sys/stat.h> 92 # include <sys/mman.h> 93 # include <fcntl.h> 94 #endif 95 96 #include "locate.h" 97 #include "pathnames.h" 98 99 100 int f_mmap; /* use mmap */ 101 int f_icase; /* ignore case */ 102 int f_stdin; /* read database from stdin */ 103 int f_statistic; /* print statistic */ 104 int f_silent; /* suppress output, show only count of matches */ 105 long f_limit; /* limit number of output lines, 0 == infinite */ 106 long counter; /* counter for matches [-c] */ 107 char separator='\n'; /* line separator */ 108 109 u_char myctype[UCHAR_MAX + 1]; 110 111 void usage(void); 112 void statistic(FILE *, char *); 113 void fastfind(FILE *, char *, char *); 114 void fastfind_icase(FILE *, char *, char *); 115 void fastfind_mmap(char *, caddr_t, off_t, char *); 116 void fastfind_mmap_icase(char *, caddr_t, off_t, char *); 117 void search_mmap(char *, char **); 118 void search_fopen(char *, char **); 119 unsigned long cputime(void); 120 121 extern char **colon(char **, char*, char*); 122 extern int getwm(caddr_t); 123 extern int getwf(FILE *); 124 extern u_char *tolower_word(u_char *); 125 extern int check_bigram_char(int); 126 extern char *patprep(char *); 127 extern void rebuild_message(char *db); 128 extern int check_size(char *db); 129 130 int 131 main(int argc, char **argv) 132 { 133 int ch; 134 char **dbv = NULL; 135 char *path_fcodes; /* locate database */ 136 #ifdef MMAP 137 f_mmap = 1; /* mmap is default */ 138 #endif 139 (void) setlocale(LC_ALL, ""); 140 141 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) 142 switch(ch) { 143 case '0': /* 'find -print0' style */ 144 separator = '\0'; 145 break; 146 case 'S': /* statistic lines */ 147 f_statistic = 1; 148 break; 149 case 'l': /* limit number of output lines, 0 == infinite */ 150 f_limit = atol(optarg); 151 if (f_limit < 0 ) 152 errx(1, "invalid argument for -l: '%s'", optarg); 153 break; 154 case 'd': /* database */ 155 dbv = colon(dbv, optarg, _PATH_FCODES); 156 break; 157 case 'i': /* ignore case */ 158 f_icase = 1; 159 break; 160 case 'm': /* mmap */ 161 #ifdef MMAP 162 f_mmap = 1; 163 #else 164 warnx("mmap(2) not implemented"); 165 #endif 166 break; 167 case 's': /* stdio lib */ 168 f_mmap = 0; 169 break; 170 case 'c': /* suppress output, show only count of matches */ 171 f_silent = 1; 172 break; 173 default: 174 usage(); 175 } 176 argv += optind; 177 argc -= optind; 178 179 /* to few arguments */ 180 if (argc < 1 && !(f_statistic)) 181 usage(); 182 183 /* no (valid) database as argument */ 184 if (dbv == NULL || *dbv == NULL) { 185 /* try to read database from environment */ 186 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || 187 *path_fcodes == '\0') 188 /* use default database */ 189 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); 190 else /* $LOCATE_PATH */ 191 dbv = colon(dbv, path_fcodes, _PATH_FCODES); 192 } 193 194 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ 195 for (ch = 0; ch < UCHAR_MAX + 1; ch++) 196 myctype[ch] = tolower(ch); 197 198 /* foreach database ... */ 199 while((path_fcodes = *dbv) != NULL) { 200 dbv++; 201 202 if (!strcmp(path_fcodes, "-")) 203 f_stdin = 1; 204 else 205 f_stdin = 0; 206 207 #ifndef MMAP 208 f_mmap = 0; /* be paranoid */ 209 #endif 210 if (!f_mmap || f_stdin || f_statistic) 211 search_fopen(path_fcodes, argv); 212 else 213 search_mmap(path_fcodes, argv); 214 } 215 216 if (f_silent) 217 printf("%ld\n", counter); 218 exit(0); 219 } 220 221 /* 222 * Arguments: 223 * db database 224 * s search strings 225 */ 226 void 227 search_fopen(char *db, char **s) 228 { 229 FILE *fp; 230 231 /* can only read stdin once */ 232 if (f_stdin) { 233 fp = stdin; 234 if (*(s+1) != NULL) { 235 warnx("read database from stdin, use only `%s' as pattern", *s); 236 *(s+1) = NULL; 237 } 238 } 239 else { 240 if (!check_size(db)) 241 exit(1); 242 243 if ((fp = fopen(db, "r")) == NULL) { 244 warn("`%s'", db); 245 rebuild_message(db); 246 exit(1); 247 } 248 } 249 250 /* count only chars or lines */ 251 if (f_statistic) { 252 statistic(fp, db); 253 (void)fclose(fp); 254 return; 255 } 256 257 /* foreach search string ... */ 258 while(*s != NULL) { 259 if (!f_stdin && 260 fseek(fp, (long)0, SEEK_SET) == -1) 261 err(1, "fseek to begin of ``%s''\n", db); 262 263 if (f_icase) 264 fastfind_icase(fp, *s, db); 265 else 266 fastfind(fp, *s, db); 267 s++; 268 } 269 (void)fclose(fp); 270 } 271 272 #ifdef MMAP 273 /* 274 * Arguments: 275 * db database 276 * s search strings 277 */ 278 void 279 search_mmap(char *db, char **s) 280 { 281 struct stat sb; 282 int fd; 283 caddr_t p; 284 off_t len; 285 286 if (!check_size(db)) 287 exit(1); 288 289 if (stat(db, &sb) == -1) 290 err(1, "stat"); 291 292 len = sb.st_size; 293 294 if ((fd = open(db, O_RDONLY)) == -1) { 295 warn("%s", db); 296 rebuild_message(db); 297 exit(1); 298 } 299 300 if ((p = mmap((caddr_t)0, (size_t)len, 301 PROT_READ, MAP_SHARED, 302 fd, (off_t)0)) == MAP_FAILED) 303 err(1, "mmap ``%s''", db); 304 305 /* foreach search string ... */ 306 while (*s != NULL) { 307 if (f_icase) 308 fastfind_mmap_icase(*s, p, len, db); 309 else 310 fastfind_mmap(*s, p, len, db); 311 s++; 312 } 313 314 if (munmap(p, (size_t)len) == -1) 315 warn("munmap %s\n", db); 316 317 (void)close(fd); 318 } 319 #endif /* MMAP */ 320 321 void 322 usage () 323 { 324 (void)fprintf(stderr, 325 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); 326 (void)fprintf(stderr, 327 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); 328 exit(1); 329 } 330 331 332 /* load fastfind functions */ 333 334 /* statistic */ 335 /* fastfind_mmap, fastfind_mmap_icase */ 336 #ifdef MMAP 337 #undef FF_MMAP 338 #undef FF_ICASE 339 340 #define FF_MMAP 341 #include "fastfind.c" 342 #define FF_ICASE 343 #include "fastfind.c" 344 #endif /* MMAP */ 345 346 /* fopen */ 347 /* fastfind, fastfind_icase */ 348 #undef FF_MMAP 349 #undef FF_ICASE 350 #include "fastfind.c" 351 #define FF_ICASE 352 #include "fastfind.c" 353