1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char copyright[] = 13 "@(#) Copyright (c) 1989, 1993\n\ 14 The Regents of the University of California. All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 06/06/93"; 19 #endif /* not lint */ 20 21 /* 22 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 23 * 24 * Locate scans a file list for the full pathname of a file given only part 25 * of the name. The list has been processed with with "front-compression" 26 * and bigram coding. Front compression reduces space by a factor of 4-5, 27 * bigram coding by a further 20-25%. 28 * 29 * The codes are: 30 * 31 * 0-28 likeliest differential counts + offset to make nonnegative 32 * 30 switch code for out-of-range count to follow in next word 33 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 34 * 32-127 single character (printable) ascii residue (ie, literal) 35 * 36 * A novel two-tiered string search technique is employed: 37 * 38 * First, a metacharacter-free subpattern and partial pathname is matched 39 * BACKWARDS to avoid full expansion of the pathname list. The time savings 40 * is 40-50% over forward matching, which cannot efficiently handle 41 * overlapped search patterns and compressed path residue. 42 * 43 * Then, the actual shell glob-style regular expression (if in this form) is 44 * matched against the candidate pathnames using the slower routines provided 45 * in the standard 'find'. 46 */ 47 48 #include <sys/param.h> 49 50 #include <fnmatch.h> 51 #include <unistd.h> 52 #include <stdio.h> 53 #include <string.h> 54 55 #include "locate.h" 56 #include "pathnames.h" 57 58 FILE *fp; 59 60 int 61 main(argc, argv) 62 int argc; 63 char *argv[]; 64 { 65 if (argc != 2) { 66 (void)fprintf(stderr, "usage: locate pattern\n"); 67 exit(1); 68 } 69 if (!(fp = fopen(_PATH_FCODES, "r"))) { 70 (void)fprintf(stderr, "locate: no database file %s.\n", 71 _PATH_FCODES); 72 exit(1); 73 } 74 while (*++argv) 75 fastfind(*argv); 76 exit(0); 77 } 78 79 fastfind(pathpart) 80 char *pathpart; 81 { 82 register char *p, *s; 83 register int c; 84 int count, found, globflag; 85 char *cutoff, *patend, *q, *patprep(); 86 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 87 88 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) 89 p[c] = getc(fp), s[c] = getc(fp); 90 91 p = pathpart; 92 globflag = index(p, '*') || index(p, '?') || index(p, '['); 93 patend = patprep(p); 94 95 found = 0; 96 for (c = getc(fp), count = 0; c != EOF;) { 97 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; 98 /* overlay old path */ 99 for (p = path + count; (c = getc(fp)) > SWITCH;) 100 if (c < PARITY) 101 *p++ = c; 102 else { /* bigrams are parity-marked */ 103 c &= PARITY - 1; 104 *p++ = bigram1[c], *p++ = bigram2[c]; 105 } 106 *p-- = NULL; 107 cutoff = (found ? path : path + count); 108 for (found = 0, s = p; s >= cutoff; s--) 109 if (*s == *patend) { /* fast first char check */ 110 for (p = patend - 1, q = s - 1; *p != NULL; 111 p--, q--) 112 if (*q != *p) 113 break; 114 if (*p == NULL) { /* fast match success */ 115 found = 1; 116 if (!globflag || 117 !fnmatch(pathpart, path, 0)) 118 (void)printf("%s\n", path); 119 break; 120 } 121 } 122 } 123 } 124 125 /* 126 * extract last glob-free subpattern in name for fast pre-match; prepend 127 * '\0' for backwards match; return end of new pattern 128 */ 129 static char globfree[100]; 130 131 char * 132 patprep(name) 133 char *name; 134 { 135 register char *endmark, *p, *subp; 136 137 subp = globfree; 138 *subp++ = '\0'; 139 p = name + strlen(name) - 1; 140 /* skip trailing metacharacters (and [] ranges) */ 141 for (; p >= name; p--) 142 if (index("*?", *p) == 0) 143 break; 144 if (p < name) 145 p = name; 146 if (*p == ']') 147 for (p--; p >= name; p--) 148 if (*p == '[') { 149 p--; 150 break; 151 } 152 if (p < name) 153 p = name; 154 /* 155 * if pattern has only metacharacters, check every path (force '/' 156 * search) 157 */ 158 if ((p == name) && index("?*[]", *p) != 0) 159 *subp++ = '/'; 160 else { 161 for (endmark = p; p >= name; p--) 162 if (index("]*?", *p) != 0) 163 break; 164 for (++p; 165 (p <= endmark) && subp < (globfree + sizeof(globfree));) 166 *subp++ = *p++; 167 } 168 *subp = '\0'; 169 return(--subp); 170 } 171