1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 char copyright[] = 13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 14 All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)locate.c 5.4 (Berkeley) 06/29/92"; 19 #endif /* not lint */ 20 21 /* 22 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 23 * 24 * Locate scans a file list for the full pathname of a file given only part 25 * of the name. The list has been processed with with "front-compression" 26 * and bigram coding. Front compression reduces space by a factor of 4-5, 27 * bigram coding by a further 20-25%. 28 * 29 * The codes are: 30 * 31 * 0-28 likeliest differential counts + offset to make nonnegative 32 * 30 switch code for out-of-range count to follow in next word 33 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 34 * 32-127 single character (printable) ascii residue (ie, literal) 35 * 36 * A novel two-tiered string search technique is employed: 37 * 38 * First, a metacharacter-free subpattern and partial pathname is matched 39 * BACKWARDS to avoid full expansion of the pathname list. The time savings 40 * is 40-50% over forward matching, which cannot efficiently handle 41 * overlapped search patterns and compressed path residue. 42 * 43 * Then, the actual shell glob-style regular expression (if in this form) is 44 * matched against the candidate pathnames using the slower routines provided 45 * in the standard 'find'. 46 */ 47 48 #include <sys/param.h> 49 50 #include <fnmatch.h> 51 #include <unistd.h> 52 #include <stdio.h> 53 #include "locate.h" 54 #include "pathnames.h" 55 56 FILE *fp; 57 58 main(argc, argv) 59 int argc; 60 char **argv; 61 { 62 if (argc != 2) { 63 (void)fprintf(stderr, "usage: locate pattern\n"); 64 exit(1); 65 } 66 if (!(fp = fopen(_PATH_FCODES, "r"))) { 67 (void)fprintf(stderr, "locate: no database file %s.\n", 68 _PATH_FCODES); 69 exit(1); 70 } 71 while (*++argv) 72 fastfind(*argv); 73 exit(0); 74 } 75 76 fastfind(pathpart) 77 char *pathpart; 78 { 79 register char *p, *s; 80 register int c; 81 int count, found, globflag; 82 char *cutoff, *patend, *q, *index(), *patprep(); 83 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 84 85 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) 86 p[c] = getc(fp), s[c] = getc(fp); 87 88 p = pathpart; 89 globflag = index(p, '*') || index(p, '?') || index(p, '['); 90 patend = patprep(p); 91 92 found = 0; 93 for (c = getc(fp), count = 0; c != EOF;) { 94 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; 95 /* overlay old path */ 96 for (p = path + count; (c = getc(fp)) > SWITCH;) 97 if (c < PARITY) 98 *p++ = c; 99 else { /* bigrams are parity-marked */ 100 c &= PARITY - 1; 101 *p++ = bigram1[c], *p++ = bigram2[c]; 102 } 103 *p-- = NULL; 104 cutoff = (found ? path : path + count); 105 for (found = 0, s = p; s >= cutoff; s--) 106 if (*s == *patend) { /* fast first char check */ 107 for (p = patend - 1, q = s - 1; *p != NULL; 108 p--, q--) 109 if (*q != *p) 110 break; 111 if (*p == NULL) { /* fast match success */ 112 found = 1; 113 if (!globflag || 114 !fnmatch(pathpart, path, 0)) 115 (void)printf("%s\n", path); 116 break; 117 } 118 } 119 } 120 } 121 122 /* 123 * extract last glob-free subpattern in name for fast pre-match; prepend 124 * '\0' for backwards match; return end of new pattern 125 */ 126 static char globfree[100]; 127 128 char * 129 patprep(name) 130 char *name; 131 { 132 register char *endmark, *p, *subp; 133 134 subp = globfree; 135 *subp++ = '\0'; 136 p = name + strlen(name) - 1; 137 /* skip trailing metacharacters (and [] ranges) */ 138 for (; p >= name; p--) 139 if (index("*?", *p) == 0) 140 break; 141 if (p < name) 142 p = name; 143 if (*p == ']') 144 for (p--; p >= name; p--) 145 if (*p == '[') { 146 p--; 147 break; 148 } 149 if (p < name) 150 p = name; 151 /* 152 * if pattern has only metacharacters, check every path (force '/' 153 * search) 154 */ 155 if ((p == name) && index("?*[]", *p) != 0) 156 *subp++ = '/'; 157 else { 158 for (endmark = p; p >= name; p--) 159 if (index("]*?", *p) != 0) 160 break; 161 for (++p; 162 (p <= endmark) && subp < (globfree + sizeof(globfree));) 163 *subp++ = *p++; 164 } 165 *subp = '\0'; 166 return(--subp); 167 } 168