1 /* @(#)spell.h 4.2 02/02/91 */ 2 3 #include <stdio.h> 4 #include <ctype.h> 5 6 #ifndef unix 7 #define SHIFT 5 8 #define TABSIZE (int)(400000/(1<<SHIFT)) 9 int *tab; /*honeywell loader deficiency*/ 10 #else 11 #define Tolower(c) (isupper(c)?tolower(c):c) /* ugh!!! */ 12 #define SHIFT 4 13 #define TABSIZE 25000 /*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/ 14 short tab[TABSIZE]; 15 #endif 16 long p[] = { 17 399871, 18 399887, 19 399899, 20 399911, 21 399913, 22 399937, 23 399941, 24 399953, 25 399979, 26 399983, 27 399989, 28 }; 29 #define NP (sizeof(p)/sizeof(p[0])) 30 #define NW 30 31 32 /* 33 * Hash table for spelling checker has n bits. 34 * Each word w is hashed by k different (modular) hash functions, hi. 35 * The bits hi(w), i=1..k, are set for words in the dictionary. 36 * Assuming independence, the probability that no word of a d-word 37 * dictionary sets a particular bit is given by the Poisson formula 38 * P = exp(-y)*y**0/0!, where y=d*k/n. 39 * The probability that a random string is recognized as a word is then 40 * (1-P)**k. For given n and d this is minimum when y=log(2), P=1/2, 41 * whence one finds, for example, that a 25000-word dictionary in a 42 * 400000-bit table works best with k=11. 43 */ 44 45 long pow2[NP][NW]; 46 47 prime(argc, argv) 48 int argc; 49 register char **argv; 50 { 51 int i, j; 52 long h; 53 register long *lp; 54 55 #ifndef unix 56 if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL) 57 return(0); 58 #endif 59 if (argc > 1) { 60 FILE *f; 61 if ((f = fopen(argv[1], "ri")) == NULL) 62 return(0); 63 if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE) 64 return(0); 65 fclose(f); 66 } 67 for (i=0; i<NP; i++) { 68 h = *(lp = pow2[i]) = 1<<14; 69 for (j=1; j<NW; j++) 70 h = *++lp = (h<<7) % p[i]; 71 } 72 return(1); 73 } 74 75 #define get(h) (tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1)))) 76 #define set(h) tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1)) 77