1 #ifndef lint 2 static char *sccsid = "@(#)what4.c 4.2 (Berkeley) 09/11/87"; 3 #endif 4 5 #include "what..c" 6 #define NW 5 7 #define ZIPF 10 8 #define HASHF 3 9 #define WLEN 10 10 #define SAME 0 11 #define TSIZE HASHF*ZIPF*NW 12 #define NF 10 13 14 struct wst { 15 char *tx; 16 int ct; 17 } 18 ; 19 int HSIZE; 20 static struct wst word[TSIZE]; 21 static char tbuf[NW*ZIPF*WLEN], *tp = tbuf; 22 23 freqwd ( fn, wd, nin ) 24 char *fn[], *wd[]; 25 { 26 FILE *fi[NF]; 27 int nw = 0, i, any, nf, j, wexch(), wcomp(); 28 char tw[20]; 29 for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); 30 for(nf=0; fn[nf] && nf<NF; nf++) 31 fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; 32 do { 33 any=0; 34 for(i=0; i<nf; i++) 35 { 36 if (fi[i]==NULL) continue; 37 if (gw(fi[i], tw)==0) 38 { 39 fclose(fi[i]); 40 fi[i]==NULL; 41 continue; 42 } 43 any=1; 44 if (common(tw)) continue; 45 if (strlen(tw)<3) continue; 46 j = lookup (tw); 47 if (j<0 && nw < ZIPF*NW) 48 { 49 j = -j; 50 strcpy (tp, tw); 51 word[j].tx = tp; 52 while (*tp++); 53 _assert (tp < tbuf+NW*ZIPF*WLEN); 54 word[j].ct = 1; 55 nw++; 56 } 57 else if (j>0) 58 word[j].ct++; 59 } 60 } 61 while (any>0); 62 shell ( TSIZE, wcomp, wexch ); 63 for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) 64 if (nw>=nin*2 && word[nw].ct != word[0].ct) 65 break; 66 for(i=0; i<nw; i++) 67 wd[i] = word[i].tx; 68 return(nw); 69 } 70 71 lookup (wt) 72 char *wt; 73 { 74 int h; 75 h = hash(wt); 76 for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) 77 { 78 if (h==0) continue; 79 if (strcmp(wt, word[h].tx) == SAME) 80 return (h); 81 } 82 return ( -h ); 83 } 84 85 hash (s) 86 char *s; 87 { 88 int k = 0, c = 0, i = 0; 89 while ( c = *s++ ) 90 k ^= (c << (i++%5) ); 91 return (k>0 ? k : -k); 92 } 93 94 gw (f, t) 95 char *t; 96 FILE *f; 97 { 98 int start = 1, oldc = ' ', c; 99 if (f==NULL) return (0); 100 while ( (c=getc(f)) != EOF) 101 { 102 if (isupper(c)) c= tolower(c); 103 if (start==1) 104 if (!alphanum(c, oldc)) 105 continue; 106 else 107 start=0; 108 if (start==0) 109 if (alphanum(c, oldc)) 110 *t++ = c; 111 else 112 { 113 *t=0; 114 return(1); 115 } 116 oldc=c; 117 } 118 return(0); 119 } 120 121 alphanum( c, oldc ) 122 { 123 if (isalpha(c) || isdigit(c)) return(1); 124 if (isalpha(oldc)) 125 if (c== '\'' || c == '-') return(1); 126 return(0); 127 } 128 129 wcomp (n1, n2) 130 { 131 return (word[n1].ct >= word[n2].ct); 132 } 133 134 wexch (n1, n2) 135 { 136 struct wst tt; 137 tt.tx = word[n1].tx; 138 tt.ct = word[n1].ct; 139 word[n1].tx = word[n2].tx; 140 word[n1].ct = word[n2].ct; 141 word[n2].tx = tt.tx; 142 word[n2].ct = tt.ct; 143 } 144 145 prime(n) 146 { 147 /* only executed once- slow is ok */ 148 int i; 149 if (n%2==0) return(0); 150 for(i=3; i*i<=n; i+= 2) 151 if (n%i ==0 ) return(0); 152 return(1); 153 } 154 155 trimnl(s) 156 char *s; 157 { 158 while (*s)s++; 159 if (*--s=='\n') *s=0; 160 } 161 162 /* this is the test for what4.c as a standalone prog ... */ 163 # ifdef 0 164 main (argc, argv) 165 char *argv[]; 166 { 167 char *ff[10], *wd[20], **ffp ff; 168 int n, i; 169 170 while (--argc) 171 *ffp++ = *++argv; 172 *ffp=0; 173 n=freqwd(ff,wd); 174 for(i=0; i<n; i++) 175 printf("%s\n",wd[i]); 176 printf("total of %d items\n",n); 177 } 178 # endif 0 179