1 #ifndef lint 2 static char sccsid[] = "@(#)dprog.c 4.2 (Berkeley) 82/11/06"; 3 #endif not lint 4 5 /* 6 * diction -- print all sentences containing one of default phrases 7 * 8 * status returns: 9 * 0 - ok, and some matches 10 * 1 - ok, but no matches 11 * 2 - some error 12 */ 13 14 #include <stdio.h> 15 #include <ctype.h> 16 17 #define MAXSIZ 6500 18 #define QSIZE 650 19 int linemsg; 20 long olcount; 21 long lcount; 22 struct words { 23 char inp; 24 char out; 25 struct words *nst; 26 struct words *link; 27 struct words *fail; 28 } w[MAXSIZ], *smax, *q; 29 30 char table[128] = { 31 0, 0, 0, 0, 0, 0, 0, 0, 32 0, 0, ' ', 0, 0, 0, 0, 0, 33 0, 0, 0, 0, 0, 0, 0, 0, 34 0, 0, 0, 0, 0, 0, 0, 0, 35 ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', 36 ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', 37 '0', '1', '2', '3', '4', '5', '6', '7', 38 '8', '9', ' ', ' ', ' ', ' ', ' ', '.', 39 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 40 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 41 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 42 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', 43 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 44 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 45 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 46 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' 47 }; 48 int caps = 0; 49 int lineno = 0; 50 int fflag; 51 int nflag = 1; /*use default file*/ 52 char *filename; 53 int mflg = 0; /*don't catch output*/ 54 int nfile; 55 int nsucc; 56 long nsent = 0; 57 long nhits = 0; 58 char *nlp; 59 char *begp, *endp; 60 int beg, last; 61 char *myst; 62 int myct = 0; 63 int oct = 0; 64 FILE *wordf; 65 FILE *mine; 66 char *argptr; 67 long tl = 0; 68 long th = 0; 69 70 main(argc, argv) 71 char *argv[]; 72 { 73 int sv; 74 while (--argc > 0 && (++argv)[0][0]=='-') 75 switch (argv[0][1]) { 76 77 case 'f': 78 fflag++; 79 filename = (++argv)[0]; 80 argc--; 81 continue; 82 83 case 'n': 84 nflag = 0; 85 continue; 86 case 'd': 87 mflg=0; 88 continue; 89 case 'c': 90 caps++; 91 continue; 92 case 'l': 93 lineno++; 94 continue; 95 default: 96 fprintf(stderr, "diction: unknown flag\n"); 97 continue; 98 } 99 out: 100 if(nflag){ 101 wordf = fopen(DICT,"r"); 102 if(wordf == NULL){ 103 fprintf(stderr,"diction: can't open default dictionary\n"); 104 exit(2); 105 } 106 } 107 else { 108 wordf = fopen(filename,"r"); 109 if(wordf == NULL){ 110 fprintf(stderr,"diction: can't open %s\n",filename); 111 exit(2); 112 } 113 } 114 115 #ifdef CATCH 116 if(fopen(CATCH,"r") != NULL) 117 if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; 118 #endif 119 #ifdef MACS 120 if(caps){ 121 printf(".so "); 122 printf(MACS); 123 printf("\n"); 124 } 125 #endif 126 cgotofn(); 127 cfail(); 128 nfile = argc; 129 if (argc<=0) { 130 execute((char *)NULL); 131 } 132 else while (--argc >= 0) { 133 execute(*argv); 134 if(lineno){ 135 printf("file %s: number of lines %ld number of phrases found %ld\n", 136 *argv, lcount-1, nhits); 137 tl += lcount-1; 138 th += nhits; 139 sv = lcount-1; 140 lcount = nhits = 0; 141 } 142 argv++; 143 } 144 if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); 145 if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); 146 else if(tl != sv) 147 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); 148 exit(nsucc == 0); 149 } 150 151 execute(file) 152 char *file; 153 { 154 register char *p; 155 register struct words *c; 156 register ccount; 157 int count1; 158 char *beg1; 159 struct words *savc; 160 char *savp; 161 int savct; 162 int scr; 163 char buf[1024]; 164 int f; 165 int hit; 166 last = 0; 167 if (file) { 168 if ((f = open(file, 0)) < 0) { 169 fprintf(stderr, "diction: can't open %s\n", file); 170 exit(2); 171 } 172 } 173 else f = 0; 174 lcount = olcount = 1; 175 linemsg = 1; 176 ccount = 0; 177 count1 = -1; 178 p = buf; 179 nlp = p; 180 c = w; 181 oct = hit = 0; 182 savc = (struct words *) 0; 183 savp = (char *) 0; 184 for (;;) { 185 if(--ccount <= 0) { 186 if (p == &buf[1024]) p = buf; 187 if (p > &buf[512]) { 188 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 189 } 190 else if ((ccount = read(f, p, 512)) <= 0) break; 191 if(caps && (count1 > 0)) 192 fwrite(beg1,sizeof(*beg1),count1,stdout); 193 count1 = ccount; 194 beg1 = p; 195 } 196 if(p == &buf[1024])p=buf; 197 nstate: 198 if (c->inp == table[*p]) { 199 c = c->nst; 200 } 201 else if (c->link != 0) { 202 c = c->link; 203 goto nstate; 204 } 205 else { 206 if(savp != 0){ 207 c=savc; 208 p=savp; 209 if(ccount > savct)ccount += savct; 210 else ccount = savct; 211 savc = (struct words *) 0; 212 savp = (char *) 0; 213 goto hadone; 214 } 215 c = c->fail; 216 if (c==0) { 217 c = w; 218 istate: 219 if (c->inp == table[*p]) { 220 c = c->nst; 221 } 222 else if (c->link != 0) { 223 c = c->link; 224 goto istate; 225 } 226 } 227 else goto nstate; 228 } 229 if(c->out){ 230 if((c->inp == table[*(p+1)]) && (c->nst != 0)){ 231 savp=p; 232 savc=c; 233 savct=ccount; 234 goto cont; 235 } 236 else if(c->link != 0){ 237 savc=c; 238 while((savc=savc->link)!= 0){ 239 if(savc->inp == table[*(p+1)]){ 240 savp=p; 241 savc=c; 242 savct=ccount; 243 goto cont; 244 } 245 } 246 } 247 hadone: 248 savc = (struct words *) 0; 249 savp = (char *) 0; 250 if(c->out == (char)(0377)){ 251 c=w; 252 goto nstate; 253 } 254 begp = p - (c->out); 255 if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 256 endp=p; 257 if(mflg){ 258 if(begp-20 < &buf[0]){ 259 myst = &buf[1024]-20; 260 if(nlp < &buf[512])myst=nlp; 261 } 262 else myst = begp-20; 263 if(myst < nlp)myst = nlp; 264 beg = 0; 265 } 266 hit = 1; 267 nhits++; 268 if(*p == '\n')lcount++; 269 if (table[*p++] == '.') { 270 linemsg = 1; 271 if (--ccount <= 0) { 272 if (p == &buf[1024]) p = buf; 273 if (p > &buf[512]) { 274 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 275 } 276 else if ((ccount = read(f, p, 512)) <= 0) break; 277 if(caps && (count1 > 0)) 278 fwrite(beg1,sizeof(*beg1),count1,stdout); 279 count1=ccount; 280 beg1=p; 281 } 282 } 283 succeed: nsucc = 1; 284 { 285 if (p <= nlp) { 286 outc(&buf[1024],file); 287 nlp = buf; 288 } 289 outc(p,file); 290 } 291 if(mflg)last=1; 292 nomatch: 293 nlp = p; 294 c = w; 295 begp = endp = 0; 296 continue; 297 } 298 cont: 299 if(*p == '\n')lcount++; 300 if (table[*p++] == '.'){ 301 if(hit){ 302 if(p <= nlp){ 303 outc(&buf[1024],file); 304 nlp = buf; 305 } 306 outc(p,file); 307 if(!caps)printf("\n\n"); 308 if(mflg && last){putc('\n',mine);myct = 0;} 309 } 310 linemsg = 1; 311 if(*p == '\n')olcount = lcount+1; 312 else 313 olcount=lcount; 314 last = 0; 315 hit = 0; 316 oct = 0; 317 nlp = p; 318 c = w; 319 begp = endp = 0; 320 nsent++; 321 } 322 } 323 if(caps && (count1 > 0)) 324 fwrite(beg1,sizeof(*beg1),count1,stdout); 325 close(f); 326 } 327 328 getargc() 329 { 330 register c; 331 if (wordf){ 332 if((c=getc(wordf))==EOF){ 333 fclose(wordf); 334 if(nflag && fflag){ 335 nflag=0; 336 wordf=fopen(filename,"r"); 337 if(wordf == NULL){ 338 fprintf("diction can't open %s\n",filename); 339 exit(2); 340 } 341 return(getc(wordf)); 342 } 343 else return(EOF); 344 } 345 else return(c); 346 } 347 if ((c = *argptr++) == '\0') 348 return(EOF); 349 return(c); 350 } 351 352 cgotofn() { 353 register c; 354 register struct words *s; 355 register ct; 356 int neg; 357 358 s = smax = w; 359 neg = ct = 0; 360 nword: for(;;) { 361 c = getargc(); 362 if(c == '~'){ 363 neg++; 364 c = getargc(); 365 } 366 if (c==EOF) 367 return; 368 if (c == '\n') { 369 if(neg)s->out = 0377; 370 else s->out = ct-1; 371 neg = ct = 0; 372 s = w; 373 } else { 374 loop: if (s->inp == c) { 375 s = s->nst; 376 ct++; 377 continue; 378 } 379 if (s->inp == 0) goto enter; 380 if (s->link == 0) { 381 if (smax >= &w[MAXSIZ - 1]) overflo(); 382 s->link = ++smax; 383 s = smax; 384 goto enter; 385 } 386 s = s->link; 387 goto loop; 388 } 389 } 390 391 enter: 392 do { 393 s->inp = c; 394 ct++; 395 if (smax >= &w[MAXSIZ - 1]) overflo(); 396 s->nst = ++smax; 397 s = smax; 398 } while ((c = getargc()) != '\n' && c!=EOF); 399 if(neg)smax->out = 0377; 400 else smax->out = ct-1; 401 neg = ct = 0; 402 s = w; 403 if (c != EOF) 404 goto nword; 405 } 406 407 overflo() { 408 fprintf(stderr, "wordlist too large\n"); 409 exit(2); 410 } 411 cfail() { 412 struct words *queue[QSIZE]; 413 struct words **front, **rear; 414 struct words *state; 415 int bstart; 416 register char c; 417 register struct words *s; 418 s = w; 419 front = rear = queue; 420 init: if ((s->inp) != 0) { 421 *rear++ = s->nst; 422 if (rear >= &queue[QSIZE - 1]) overflo(); 423 } 424 if ((s = s->link) != 0) { 425 goto init; 426 } 427 428 while (rear!=front) { 429 s = *front; 430 if (front == &queue[QSIZE-1]) 431 front = queue; 432 else front++; 433 cloop: if ((c = s->inp) != 0) { 434 bstart=0; 435 *rear = (q = s->nst); 436 if (front < rear) 437 if (rear >= &queue[QSIZE-1]) 438 if (front == queue) overflo(); 439 else rear = queue; 440 else rear++; 441 else 442 if (++rear == front) overflo(); 443 state = s->fail; 444 floop: if (state == 0){ state = w;bstart=1;} 445 if (state->inp == c) { 446 qloop: q->fail = state->nst; 447 if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 448 if((q=q->link) != 0)goto qloop; 449 } 450 else if ((state = state->link) != 0) 451 goto floop; 452 else if(bstart==0){state=0; goto floop;} 453 } 454 if ((s = s->link) != 0) 455 goto cloop; 456 } 457 /* for(s=w;s<=smax;s++) 458 printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 459 s->inp,s->out,s->nst,s->link,s->fail); 460 */ 461 } 462 outc(addr,file) 463 char *addr; 464 char *file; 465 { 466 int inside; 467 468 inside = 0; 469 if(!caps && lineno && linemsg){ 470 printf("beginning line %ld",olcount); 471 if(file != (char *)NULL)printf(" %s\n",file); 472 else printf("\n"); 473 linemsg = 0; 474 } 475 while(nlp < addr){ 476 if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ 477 oct=0; 478 putchar('\n'); 479 } 480 if(nlp == begp){ 481 if(caps)inside++; 482 else { 483 if( oct >45){putchar('\n'); 484 oct=0; 485 } 486 if( oct==0 || table[*nlp] != ' '){ 487 printf("*["); 488 oct+=2; 489 } 490 else {printf(" *[");; 491 oct+=3; 492 } 493 } 494 if(mflg)putc('[',mine); 495 } 496 if(inside){ 497 if(islower(*nlp))*nlp = toupper(*nlp); 498 } 499 else { 500 if(!caps && *nlp == '\n')*nlp = ' '; 501 if(*nlp == ' ' && oct==0); 502 else if(!caps) {putchar(*nlp); oct++;} 503 } 504 if(nlp == endp){ 505 if(caps) 506 inside= 0; 507 else { 508 if(*(nlp) != ' '){printf("]*"); 509 oct+=2; 510 } 511 else {printf("]* "); 512 oct+=3; 513 } 514 if(oct >60){putchar('\n'); 515 oct=0; 516 } 517 } 518 if(mflg)putc(']',mine); 519 beg = 0; 520 } 521 if(mflg){ 522 if(nlp == myst)beg = 1; 523 if(beg || last){ 524 putc(*nlp,mine); 525 if(myct++ >= 72 || last == 20){ 526 putc('\n',mine); 527 if(last == 20)last=myct=0; 528 else myct=0; 529 } 530 if(last)last++; 531 } 532 } 533 nlp++; 534 } 535 } 536