1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 char copyright[] = 13 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 14 All rights reserved.\n"; 15 #endif /* not lint */ 16 17 #ifndef lint 18 static char sccsid[] = "@(#)locate.c 5.2 (Berkeley) 06/01/90"; 19 #endif /* not lint */ 20 21 /* 22 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 23 * 24 * Locate scans a file list for the full pathname of a file given only part 25 * of the name. The list has been processed with with "front-compression" 26 * and bigram coding. Front compression reduces space by a factor of 4-5, 27 * bigram coding by a further 20-25%. 28 * 29 * The codes are: 30 * 31 * 0-28 likeliest differential counts + offset to make nonnegative 32 * 30 switch code for out-of-range count to follow in next word 33 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 34 * 32-127 single character (printable) ascii residue (ie, literal) 35 * 36 * A novel two-tiered string search technique is employed: 37 * 38 * First, a metacharacter-free subpattern and partial pathname is matched 39 * BACKWARDS to avoid full expansion of the pathname list. The time savings 40 * is 40-50% over forward matching, which cannot efficiently handle 41 * overlapped search patterns and compressed path residue. 42 * 43 * Then, the actual shell glob-style regular expression (if in this form) is 44 * matched against the candidate pathnames using the slower routines provided 45 * in the standard 'find'. 46 */ 47 48 #include <sys/param.h> 49 #include <unistd.h> 50 #include <stdio.h> 51 #include "locate.h" 52 #include "pathnames.h" 53 54 FILE *fp; 55 56 main(argc, argv) 57 int argc; 58 char **argv; 59 { 60 if (argc != 2) { 61 (void)fprintf(stderr, "usage: locate pattern\n"); 62 exit(1); 63 } 64 if (!(fp = fopen(_PATH_FCODES, "r"))) { 65 (void)fprintf(stderr, "locate: no database file %s.\n", 66 _PATH_FCODES); 67 exit(1); 68 } 69 while (*++argv) 70 fastfind(*argv); 71 exit(0); 72 } 73 74 fastfind(pathpart) 75 char *pathpart; 76 { 77 register char *p, *s; 78 register int c; 79 int count, found, globflag; 80 char *cutoff, *patend, *q, *index(), *patprep(); 81 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 82 83 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) 84 p[c] = getc(fp), s[c] = getc(fp); 85 86 p = pathpart; 87 globflag = index(p, '*') || index(p, '?') || index(p, '['); 88 patend = patprep(p); 89 90 found = 0; 91 for (c = getc(fp), count = 0; c != EOF;) { 92 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; 93 /* overlay old path */ 94 for (p = path + count; (c = getc(fp)) > SWITCH;) 95 if (c < PARITY) 96 *p++ = c; 97 else { /* bigrams are parity-marked */ 98 c &= PARITY - 1; 99 *p++ = bigram1[c], *p++ = bigram2[c]; 100 } 101 *p-- = NULL; 102 cutoff = (found ? path : path + count); 103 for (found = 0, s = p; s >= cutoff; s--) 104 if (*s == *patend) { /* fast first char check */ 105 for (p = patend - 1, q = s - 1; *p != NULL; 106 p--, q--) 107 if (*q != *p) 108 break; 109 if (*p == NULL) { /* fast match success */ 110 found = 1; 111 if (!globflag || 112 fnmatch(pathpart, path, FNM_QUOTE)) 113 (void)printf("%s\n", path); 114 break; 115 } 116 } 117 } 118 } 119 120 /* 121 * extract last glob-free subpattern in name for fast pre-match; prepend 122 * '\0' for backwards match; return end of new pattern 123 */ 124 static char globfree[100]; 125 126 char * 127 patprep(name) 128 char *name; 129 { 130 register char *endmark, *p, *subp; 131 132 subp = globfree; 133 *subp++ = '\0'; 134 p = name + strlen(name) - 1; 135 /* skip trailing metacharacters (and [] ranges) */ 136 for (; p >= name; p--) 137 if (index("*?", *p) == 0) 138 break; 139 if (p < name) 140 p = name; 141 if (*p == ']') 142 for (p--; p >= name; p--) 143 if (*p == '[') { 144 p--; 145 break; 146 } 147 if (p < name) 148 p = name; 149 /* 150 * if pattern has only metacharacters, check every path (force '/' 151 * search) 152 */ 153 if ((p == name) && index("?*[]", *p) != 0) 154 *subp++ = '/'; 155 else { 156 for (endmark = p; p >= name; p--) 157 if (index("]*?", *p) != 0) 158 break; 159 for (++p; 160 (p <= endmark) && subp < (globfree + sizeof(globfree));) 161 *subp++ = *p++; 162 } 163 *subp = '\0'; 164 return(--subp); 165 } 166