xref: /original-bsd/usr.bin/spell/spell.h (revision f0fd5f8a)
1 /*	@(#)spell.h	4.1	12/18/82	*/
2 
3 #include <stdio.h>
4 #include <ctype.h>
5 
6 #ifndef unix
7 #define SHIFT	5
8 #define TABSIZE (int)(400000/(1<<SHIFT))
9 int	*tab;	/*honeywell loader deficiency*/
10 #else
11 #define Tolower(c)	(isupper(c)?tolower(c):c) /* ugh!!! */
12 #define SHIFT	4
13 #define TABSIZE 25000	/*(int)(400000/(1<<shift))--pdp11 compiler deficiency*/
14 short	tab[TABSIZE];
15 #endif
16 long	p[] = {
17 	399871,
18 	399887,
19 	399899,
20 	399911,
21 	399913,
22 	399937,
23 	399941,
24 	399953,
25 	399979,
26 	399983,
27 	399989,
28 };
29 #define	NP	(sizeof(p)/sizeof(p[0]))
30 #define	NW	30
31 
32 /*
33 * Hash table for spelling checker has n bits.
34 * Each word w is hashed by k different (modular) hash functions, hi.
35 * The bits hi(w), i=1..k, are set for words in the dictionary.
36 * Assuming independence, the probability that no word of a d-word
37 * dictionary sets a particular bit is given by the Poisson formula
38 * P = exp(-y)*y**0/0!, where y=d*k/n.
39 * The probability that a random string is recognized as a word is then
40 * (1-P)**k.  For given n and d this is minimum when y=log(2), P=1/2,
41 * whence one finds, for example, that a 25000-word dictionary in a
42 * 400000-bit table works best with k=11.
43 */
44 
45 long	pow2[NP][NW];
46 
47 prime(argc, argv) register char **argv;
48 {
49 	int i, j;
50 	long h;
51 	register long *lp;
52 
53 #ifndef unix
54 	if ((tab = (int *)calloc(sizeof(*tab), TABSIZE)) == NULL)
55 		return(0);
56 #endif
57 	if (argc > 1) {
58 		FILE *f;
59 		if ((f = fopen(argv[1], "ri")) == NULL)
60 			return(0);
61 		if (fread((char *)tab, sizeof(*tab), TABSIZE, f) != TABSIZE)
62 			return(0);
63 		fclose(f);
64 	}
65 	for (i=0; i<NP; i++) {
66 		h = *(lp = pow2[i]) = 1<<14;
67 		for (j=1; j<NW; j++)
68 			h = *++lp = (h<<7) % p[i];
69 	}
70 	return(1);
71 }
72 
73 #define get(h)	(tab[h>>SHIFT]&(1<<((int)h&((1<<SHIFT)-1))))
74 #define set(h)	tab[h>>SHIFT] |= 1<<((int)h&((1<<SHIFT)-1))
75