1 /* 2 * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin. 3 * Copyright (c) 1989, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * James A. Woods. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $FreeBSD: src/usr.bin/locate/locate/fastfind.c,v 1.12.2.1 2001/02/22 13:28:52 wosch Exp $ 38 */ 39 40 41 #ifndef _LOCATE_STATISTIC_ 42 #define _LOCATE_STATISTIC_ 43 44 void 45 statistic (FILE *fp, char *path_fcodes) 46 { 47 int lines, chars, size, big, zwerg; 48 u_char *p, *s; 49 int c; 50 int count, umlaut; 51 u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 52 53 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 54 p[c] = check_bigram_char(getc(fp)); 55 s[c] = check_bigram_char(getc(fp)); 56 } 57 58 lines = chars = big = zwerg = umlaut = 0; 59 size = NBG + NBG; 60 61 for (c = getc(fp), count = 0; c != EOF; size++) { 62 if (c == SWITCH) { 63 count += getwf(fp) - OFFSET; 64 size += sizeof(int); 65 zwerg++; 66 } else 67 count += c - OFFSET; 68 69 for (p = path + count; (c = getc(fp)) > SWITCH; size++) 70 if (c < PARITY) { 71 if (c == UMLAUT) { 72 c = getc(fp); 73 size++; 74 umlaut++; 75 } 76 p++; 77 } else { 78 /* bigram char */ 79 big++; 80 p += 2; 81 } 82 83 p++; 84 lines++; 85 chars += (p - path); 86 } 87 88 (void)printf("\nDatabase: %s\n", path_fcodes); 89 (void)printf("Compression: Front: %2.2f%%, ", 90 (size + big - (2 * NBG)) / (chars / (float)100)); 91 (void)printf("Bigram: %2.2f%%, ", (size - big) / (size / (float)100)); 92 (void)printf("Total: %2.2f%%\n", 93 (size - (2 * NBG)) / (chars / (float)100)); 94 (void)printf("Filenames: %d, ", lines); 95 (void)printf("Characters: %d, ", chars); 96 (void)printf("Database size: %d\n", size); 97 (void)printf("Bigram characters: %d, ", big); 98 (void)printf("Integers: %d, ", zwerg); 99 (void)printf("8-Bit characters: %d\n", umlaut); 100 101 } 102 #endif /* _LOCATE_STATISTIC_ */ 103 104 105 void 106 #ifdef FF_MMAP 107 108 109 #ifdef FF_ICASE 110 fastfind_mmap_icase 111 #else 112 fastfind_mmap 113 #endif /* FF_ICASE */ 114 (char *pathpart, caddr_t paddr, int len, char *database) 115 116 117 #else /* MMAP */ 118 119 120 #ifdef FF_ICASE 121 fastfind_icase 122 #else 123 fastfind 124 #endif /* FF_ICASE */ 125 126 (FILE *fp, char *pathpart, char *database) 127 128 129 #endif /* MMAP */ 130 131 { 132 u_char *p, *s, *patend, *q, *foundchar; 133 int c, cc; 134 int count, found, globflag; 135 u_char *cutoff; 136 u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 137 138 #ifdef FF_ICASE 139 /* use a lookup table for case insensitive search */ 140 u_char table[UCHAR_MAX + 1]; 141 142 tolower_word(pathpart); 143 #endif /* FF_ICASE*/ 144 145 /* init bigram table */ 146 #ifdef FF_MMAP 147 if (len < (2*NBG)) 148 errx(1, "database too small: %s", database); 149 150 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { 151 p[c] = check_bigram_char(*paddr++); 152 s[c] = check_bigram_char(*paddr++); 153 } 154 #else 155 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { 156 p[c] = check_bigram_char(getc(fp)); 157 s[c] = check_bigram_char(getc(fp)); 158 } 159 #endif /* FF_MMAP */ 160 161 /* find optimal (last) char for searching */ 162 for (p = pathpart; *p != '\0'; p++) 163 if (index(LOCATE_REG, *p) != NULL) 164 break; 165 166 if (*p == '\0') 167 globflag = 0; 168 else 169 globflag = 1; 170 171 p = pathpart; 172 patend = patprep(p); 173 cc = *patend; 174 175 #ifdef FF_ICASE 176 /* set patend char to true */ 177 for (c = 0; c < UCHAR_MAX + 1; c++) 178 table[c] = 0; 179 180 table[TOLOWER(*patend)] = 1; 181 table[toupper(*patend)] = 1; 182 #endif /* FF_ICASE */ 183 184 185 /* main loop */ 186 found = count = 0; 187 foundchar = 0; 188 189 #ifdef FF_MMAP 190 c = (u_char)*paddr++; len--; 191 for (; len > 0; ) { 192 #else 193 c = getc(fp); 194 for (; c != EOF; ) { 195 #endif /* FF_MMAP */ 196 197 /* go forward or backward */ 198 if (c == SWITCH) { /* big step, an integer */ 199 #ifdef FF_MMAP 200 count += getwm(paddr) - OFFSET; 201 len -= INTSIZE; paddr += INTSIZE; 202 #else 203 count += getwf(fp) - OFFSET; 204 #endif /* FF_MMAP */ 205 } else { /* slow step, =< 14 chars */ 206 count += c - OFFSET; 207 } 208 209 /* overlay old path */ 210 p = path + count; 211 foundchar = p - 1; 212 213 #ifdef FF_MMAP 214 for (; len > 0;) { 215 c = (u_char)*paddr++; 216 len--; 217 #else 218 for (;;) { 219 c = getc(fp); 220 #endif /* FF_MMAP */ 221 /* 222 * == UMLAUT: 8 bit char followed 223 * <= SWITCH: offset 224 * >= PARITY: bigram 225 * rest: single ascii char 226 * 227 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram 228 */ 229 if (c < PARITY) { 230 if (c <= UMLAUT) { 231 if (c == UMLAUT) { 232 #ifdef FF_MMAP 233 c = (u_char)*paddr++; 234 len--; 235 #else 236 c = getc(fp); 237 #endif /* FF_MMAP */ 238 239 } else 240 break; /* SWITCH */ 241 } 242 #ifdef FF_ICASE 243 if (table[c]) 244 #else 245 if (c == cc) 246 #endif /* FF_ICASE */ 247 foundchar = p; 248 *p++ = c; 249 } 250 else { 251 /* bigrams are parity-marked */ 252 TO7BIT(c); 253 254 #ifndef FF_ICASE 255 if (bigram1[c] == cc || 256 bigram2[c] == cc) 257 #else 258 259 if (table[bigram1[c]] || 260 table[bigram2[c]]) 261 #endif /* FF_ICASE */ 262 foundchar = p + 1; 263 264 *p++ = bigram1[c]; 265 *p++ = bigram2[c]; 266 } 267 } 268 269 if (found) { /* previous line matched */ 270 cutoff = path; 271 *p-- = '\0'; 272 foundchar = p; 273 } else if (foundchar >= path + count) { /* a char matched */ 274 *p-- = '\0'; 275 cutoff = path + count; 276 } else /* nothing to do */ 277 continue; 278 279 found = 0; 280 for (s = foundchar; s >= cutoff; s--) { 281 if (*s == cc 282 #ifdef FF_ICASE 283 || TOLOWER(*s) == cc 284 #endif /* FF_ICASE */ 285 ) { /* fast first char check */ 286 for (p = patend - 1, q = s - 1; *p != '\0'; 287 p--, q--) 288 if (*q != *p 289 #ifdef FF_ICASE 290 && TOLOWER(*q) != *p 291 #endif /* FF_ICASE */ 292 ) 293 break; 294 if (*p == '\0') { /* fast match success */ 295 found = 1; 296 if (!globflag || 297 #ifndef FF_ICASE 298 !fnmatch(pathpart, path, 0)) 299 #else 300 !fnmatch(pathpart, path, 301 FNM_CASEFOLD)) 302 #endif /* !FF_ICASE */ 303 { 304 if (f_silent) 305 counter++; 306 else if (f_limit) { 307 counter++; 308 if (f_limit >= counter) 309 (void)puts(path); 310 else 311 errx(0, "[show only %d lines]", counter - 1); 312 } else 313 (void)puts(path); 314 } 315 break; 316 } 317 } 318 } 319 } 320 } 321