1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.proprietary.c% 6 * 7 * @(#)spell.h 8.1 (Berkeley) 06/06/93 8 */ 9 10 #include <stdio.h> 11 #include <ctype.h> 12 13 #ifndef unix 14 #define SHIFT 5 15 #define TABSIZE (int)(400000/(1<<SHIFT)) 16 int *tab; /*honeywell loader deficiency*/ 17 #else 18 #define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */ 19 #define SHIFT 4 20 #define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/ 21 short tab[TABSIZE]; 22 #endif 23 long p[] = { 24 399871, 25 399887, 26 399899, 27 399911, 28 399913, 29 399937, 30 399941, 31 399953, 32 399979, 33 399983, 34 399989, 35 }; 36 #define NP (sizeof(p)/sizeof(p[0])) 37 #define NW 30 38 39 /* 40 * Hash table for spelling checker has n bits. 41 * Each word w is hashed by k different (modular) hash functions, hi. 42 * The bits hi(w), i=1..k, are set for words in the dictionary. 43 * Assuming independence, the probability that no word of a d-word 44 * dictionary sets a particular bit is given by the Poisson formula 45 * P = exp(-y)*y**0/0!, where y=d*k/n. 46 * The probability that a random string is recognized as a word is then 47 * (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2, 48 * whence one finds, for example, that a 25000-word dictionary in a 49 * 400000-bit table works best with k=11. 50 */ 51 52 long pow2[NP][NW]; 53 54 prime(argc, argv) 55 int argc; 56 register char **argv; 57 { 58 int i, j; 59 long h; 60 register long *lp; 61 62 #ifndef unix 63 if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL) 64 return(0); 65 #endif 66 if (argc > 1) { 67 FILE *f; 68 if ((f = fopen(argv[1], "ri")) == NULL) 69 return(0); 70 if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE) 71 return(0); 72 fclose(f); 73 } 74 for (i=0; i<NP; i++) { 75 h = *(lp = pow2[i]) = 1<<14; 76 for (j=1; j<NW; j++) 77 h = *++lp = (h<<7) % p[i]; 78 } 79 return(1); 80 } 81 82 #define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1)))) 83 #define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1)) 84