1 /*- 2 * %sccs.include.proprietary.c% 3 */ 4 5 #ifndef lint 6 static char sccsid[] = "@(#)what4.c 4.3 (Berkeley) 04/18/91"; 7 #endif /* not lint */ 8 9 #include "what..c" 10 #define NW 5 11 #define ZIPF 10 12 #define HASHF 3 13 #define WLEN 10 14 #define SAME 0 15 #define TSIZE HASHF*ZIPF*NW 16 #define NF 10 17 18 struct wst { 19 char *tx; 20 int ct; 21 } 22 ; 23 int HSIZE; 24 static struct wst word[TSIZE]; 25 static char tbuf[NW*ZIPF*WLEN], *tp = tbuf; 26 27 freqwd ( fn, wd, nin ) 28 char *fn[], *wd[]; 29 { 30 FILE *fi[NF]; 31 int nw = 0, i, any, nf, j, wexch(), wcomp(); 32 char tw[20]; 33 for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); 34 for(nf=0; fn[nf] && nf<NF; nf++) 35 fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; 36 do { 37 any=0; 38 for(i=0; i<nf; i++) 39 { 40 if (fi[i]==NULL) continue; 41 if (gw(fi[i], tw)==0) 42 { 43 fclose(fi[i]); 44 fi[i]==NULL; 45 continue; 46 } 47 any=1; 48 if (common(tw)) continue; 49 if (strlen(tw)<3) continue; 50 j = lookup (tw); 51 if (j<0 && nw < ZIPF*NW) 52 { 53 j = -j; 54 strcpy (tp, tw); 55 word[j].tx = tp; 56 while (*tp++); 57 _assert (tp < tbuf+NW*ZIPF*WLEN); 58 word[j].ct = 1; 59 nw++; 60 } 61 else if (j>0) 62 word[j].ct++; 63 } 64 } 65 while (any>0); 66 shell ( TSIZE, wcomp, wexch ); 67 for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) 68 if (nw>=nin*2 && word[nw].ct != word[0].ct) 69 break; 70 for(i=0; i<nw; i++) 71 wd[i] = word[i].tx; 72 return(nw); 73 } 74 75 lookup (wt) 76 char *wt; 77 { 78 int h; 79 h = hash(wt); 80 for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) 81 { 82 if (h==0) continue; 83 if (strcmp(wt, word[h].tx) == SAME) 84 return (h); 85 } 86 return ( -h ); 87 } 88 89 hash (s) 90 char *s; 91 { 92 int k = 0, c = 0, i = 0; 93 while ( c = *s++ ) 94 k ^= (c << (i++%5) ); 95 return (k>0 ? k : -k); 96 } 97 98 gw (f, t) 99 char *t; 100 FILE *f; 101 { 102 int start = 1, oldc = ' ', c; 103 if (f==NULL) return (0); 104 while ( (c=getc(f)) != EOF) 105 { 106 if (isupper(c)) c= tolower(c); 107 if (start==1) 108 if (!alphanum(c, oldc)) 109 continue; 110 else 111 start=0; 112 if (start==0) 113 if (alphanum(c, oldc)) 114 *t++ = c; 115 else 116 { 117 *t=0; 118 return(1); 119 } 120 oldc=c; 121 } 122 return(0); 123 } 124 125 alphanum( c, oldc ) 126 { 127 if (isalpha(c) || isdigit(c)) return(1); 128 if (isalpha(oldc)) 129 if (c== '\'' || c == '-') return(1); 130 return(0); 131 } 132 133 wcomp (n1, n2) 134 { 135 return (word[n1].ct >= word[n2].ct); 136 } 137 138 wexch (n1, n2) 139 { 140 struct wst tt; 141 tt.tx = word[n1].tx; 142 tt.ct = word[n1].ct; 143 word[n1].tx = word[n2].tx; 144 word[n1].ct = word[n2].ct; 145 word[n2].tx = tt.tx; 146 word[n2].ct = tt.ct; 147 } 148 149 prime(n) 150 { 151 /* only executed once- slow is ok */ 152 int i; 153 if (n%2==0) return(0); 154 for(i=3; i*i<=n; i+= 2) 155 if (n%i ==0 ) return(0); 156 return(1); 157 } 158 159 trimnl(s) 160 char *s; 161 { 162 while (*s)s++; 163 if (*--s=='\n') *s=0; 164 } 165 166 /* this is the test for what4.c as a standalone prog ... */ 167 # ifdef 0 168 main (argc, argv) 169 char *argv[]; 170 { 171 char *ff[10], *wd[20], **ffp ff; 172 int n, i; 173 174 while (--argc) 175 *ffp++ = *++argv; 176 *ffp=0; 177 n=freqwd(ff,wd); 178 for(i=0; i<n; i++) 179 printf("%s\n",wd[i]); 180 printf("total of %d items\n",n); 181 } 182 # endif 0 183