1 /*-
2 * %sccs.include.proprietary.c%
3 */
4
5 #ifndef lint
6 static char sccsid[] = "@(#)what4.c 4.3 (Berkeley) 04/18/91";
7 #endif /* not lint */
8
9 #include "what..c"
10 #define NW 5
11 #define ZIPF 10
12 #define HASHF 3
13 #define WLEN 10
14 #define SAME 0
15 #define TSIZE HASHF*ZIPF*NW
16 #define NF 10
17
18 struct wst {
19 char *tx;
20 int ct;
21 }
22 ;
23 int HSIZE;
24 static struct wst word[TSIZE];
25 static char tbuf[NW*ZIPF*WLEN], *tp = tbuf;
26
freqwd(fn,wd,nin)27 freqwd ( fn, wd, nin )
28 char *fn[], *wd[];
29 {
30 FILE *fi[NF];
31 int nw = 0, i, any, nf, j, wexch(), wcomp();
32 char tw[20];
33 for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
34 for(nf=0; fn[nf] && nf<NF; nf++)
35 fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
36 do {
37 any=0;
38 for(i=0; i<nf; i++)
39 {
40 if (fi[i]==NULL) continue;
41 if (gw(fi[i], tw)==0)
42 {
43 fclose(fi[i]);
44 fi[i]==NULL;
45 continue;
46 }
47 any=1;
48 if (common(tw)) continue;
49 if (strlen(tw)<3) continue;
50 j = lookup (tw);
51 if (j<0 && nw < ZIPF*NW)
52 {
53 j = -j;
54 strcpy (tp, tw);
55 word[j].tx = tp;
56 while (*tp++);
57 _assert (tp < tbuf+NW*ZIPF*WLEN);
58 word[j].ct = 1;
59 nw++;
60 }
61 else if (j>0)
62 word[j].ct++;
63 }
64 }
65 while (any>0);
66 shell ( TSIZE, wcomp, wexch );
67 for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
68 if (nw>=nin*2 && word[nw].ct != word[0].ct)
69 break;
70 for(i=0; i<nw; i++)
71 wd[i] = word[i].tx;
72 return(nw);
73 }
74
lookup(wt)75 lookup (wt)
76 char *wt;
77 {
78 int h;
79 h = hash(wt);
80 for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
81 {
82 if (h==0) continue;
83 if (strcmp(wt, word[h].tx) == SAME)
84 return (h);
85 }
86 return ( -h );
87 }
88
hash(s)89 hash (s)
90 char *s;
91 {
92 int k = 0, c = 0, i = 0;
93 while ( c = *s++ )
94 k ^= (c << (i++%5) );
95 return (k>0 ? k : -k);
96 }
97
gw(f,t)98 gw (f, t)
99 char *t;
100 FILE *f;
101 {
102 int start = 1, oldc = ' ', c;
103 if (f==NULL) return (0);
104 while ( (c=getc(f)) != EOF)
105 {
106 if (isupper(c)) c= tolower(c);
107 if (start==1)
108 if (!alphanum(c, oldc))
109 continue;
110 else
111 start=0;
112 if (start==0)
113 if (alphanum(c, oldc))
114 *t++ = c;
115 else
116 {
117 *t=0;
118 return(1);
119 }
120 oldc=c;
121 }
122 return(0);
123 }
124
alphanum(c,oldc)125 alphanum( c, oldc )
126 {
127 if (isalpha(c) || isdigit(c)) return(1);
128 if (isalpha(oldc))
129 if (c== '\'' || c == '-') return(1);
130 return(0);
131 }
132
wcomp(n1,n2)133 wcomp (n1, n2)
134 {
135 return (word[n1].ct >= word[n2].ct);
136 }
137
wexch(n1,n2)138 wexch (n1, n2)
139 {
140 struct wst tt;
141 tt.tx = word[n1].tx;
142 tt.ct = word[n1].ct;
143 word[n1].tx = word[n2].tx;
144 word[n1].ct = word[n2].ct;
145 word[n2].tx = tt.tx;
146 word[n2].ct = tt.ct;
147 }
148
prime(n)149 prime(n)
150 {
151 /* only executed once- slow is ok */
152 int i;
153 if (n%2==0) return(0);
154 for(i=3; i*i<=n; i+= 2)
155 if (n%i ==0 ) return(0);
156 return(1);
157 }
158
trimnl(s)159 trimnl(s)
160 char *s;
161 {
162 while (*s)s++;
163 if (*--s=='\n') *s=0;
164 }
165
166 /* this is the test for what4.c as a standalone prog ... */
167 # ifdef 0
168 main (argc, argv)
169 char *argv[];
170 {
171 char *ff[10], *wd[20], **ffp ff;
172 int n, i;
173
174 while (--argc)
175 *ffp++ = *++argv;
176 *ffp=0;
177 n=freqwd(ff,wd);
178 for(i=0; i<n; i++)
179 printf("%s\n",wd[i]);
180 printf("total of %d items\n",n);
181 }
182 # endif 0
183