1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods. 7 * 8 * Redistribution and use in source and binary forms are permitted 9 * provided that the above copyright notice and this paragraph are 10 * duplicated in all such forms and that any documentation, 11 * advertising materials, and other materials related to such 12 * distribution and use acknowledge that the software was developed 13 * by the University of California, Berkeley. The name of the 14 * University may not be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 19 */ 20 21 #ifndef lint 22 char copyright[] = 23 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\ 24 All rights reserved.\n"; 25 #endif /* not lint */ 26 27 #ifndef lint 28 static char sccsid[] = "@(#)locate.c 5.1 (Berkeley) 03/06/90"; 29 #endif /* not lint */ 30 31 /* 32 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 33 * 34 * Locate scans a file list for the full pathname of a file given only part 35 * of the name. The list has been processed with with "front-compression" 36 * and bigram coding. Front compression reduces space by a factor of 4-5, 37 * bigram coding by a further 20-25%. 38 * 39 * The codes are: 40 * 41 * 0-28 likeliest differential counts + offset to make nonnegative 42 * 30 switch code for out-of-range count to follow in next word 43 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 44 * 32-127 single character (printable) ascii residue (ie, literal) 45 * 46 * A novel two-tiered string search technique is employed: 47 * 48 * First, a metacharacter-free subpattern and partial pathname is matched 49 * BACKWARDS to avoid full expansion of the pathname list. The time savings 50 * is 40-50% over forward matching, which cannot efficiently handle 51 * overlapped search patterns and compressed path residue. 52 * 53 * Then, the actual shell glob-style regular expression (if in this form) is 54 * matched against the candidate pathnames using the slower routines provided 55 * in the standard 'find'. 56 */ 57 58 #include <sys/param.h> 59 #include <unistd.h> 60 #include <stdio.h> 61 #include "locate.h" 62 #include "pathnames.h" 63 64 FILE *fp; 65 66 main(argc, argv) 67 int argc; 68 char **argv; 69 { 70 if (argc != 2) { 71 (void)fprintf(stderr, "usage: locate pattern\n"); 72 exit(1); 73 } 74 if (!(fp = fopen(_PATH_FCODES, "r"))) { 75 (void)fprintf(stderr, "locate: no database file %s.\n", 76 _PATH_FCODES); 77 exit(1); 78 } 79 while (*++argv) 80 fastfind(*argv); 81 exit(0); 82 } 83 84 fastfind(pathpart) 85 char *pathpart; 86 { 87 register char *p, *s; 88 register int c; 89 int count, found, globflag; 90 char *cutoff, *patend, *q, *index(), *patprep(); 91 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 92 93 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) 94 p[c] = getc(fp), s[c] = getc(fp); 95 96 p = pathpart; 97 globflag = index(p, '*') || index(p, '?') || index(p, '['); 98 patend = patprep(p); 99 100 found = 0; 101 for (c = getc(fp), count = 0; c != EOF;) { 102 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; 103 /* overlay old path */ 104 for (p = path + count; (c = getc(fp)) > SWITCH;) 105 if (c < PARITY) 106 *p++ = c; 107 else { /* bigrams are parity-marked */ 108 c &= PARITY - 1; 109 *p++ = bigram1[c], *p++ = bigram2[c]; 110 } 111 *p-- = NULL; 112 cutoff = (found ? path : path + count); 113 for (found = 0, s = p; s >= cutoff; s--) 114 if (*s == *patend) { /* fast first char check */ 115 for (p = patend - 1, q = s - 1; *p != NULL; 116 p--, q--) 117 if (*q != *p) 118 break; 119 if (*p == NULL) { /* fast match success */ 120 found = 1; 121 if (!globflag || 122 fnmatch(pathpart, path, FNM_QUOTE)) 123 (void)printf("%s\n", path); 124 break; 125 } 126 } 127 } 128 } 129 130 /* 131 * extract last glob-free subpattern in name for fast pre-match; prepend 132 * '\0' for backwards match; return end of new pattern 133 */ 134 static char globfree[100]; 135 136 char * 137 patprep(name) 138 char *name; 139 { 140 register char *endmark, *p, *subp; 141 142 subp = globfree; 143 *subp++ = '\0'; 144 p = name + strlen(name) - 1; 145 /* skip trailing metacharacters (and [] ranges) */ 146 for (; p >= name; p--) 147 if (index("*?", *p) == 0) 148 break; 149 if (p < name) 150 p = name; 151 if (*p == ']') 152 for (p--; p >= name; p--) 153 if (*p == '[') { 154 p--; 155 break; 156 } 157 if (p < name) 158 p = name; 159 /* 160 * if pattern has only metacharacters, check every path (force '/' 161 * search) 162 */ 163 if ((p == name) && index("?*[]", *p) != 0) 164 *subp++ = '/'; 165 else { 166 for (endmark = p; p >= name; p--) 167 if (index("]*?", *p) != 0) 168 break; 169 for (++p; 170 (p <= endmark) && subp < (globfree + sizeof(globfree));) 171 *subp++ = *p++; 172 } 173 *subp = '\0'; 174 return(--subp); 175 } 176