1 /*- 2 * %sccs.include.proprietary.c% 3 */ 4 5 #ifndef lint 6 static char sccsid[] = "@(#)dprog.c 4.5 (Berkeley) 04/17/91"; 7 #endif /* not lint */ 8 9 /* 10 * diction -- print all sentences containing one of default phrases 11 * 12 * status returns: 13 * 0 - ok, and some matches 14 * 1 - ok, but no matches 15 * 2 - some error 16 */ 17 18 #include <stdio.h> 19 #include <ctype.h> 20 #include "pathnames.h" 21 22 #define MAXSIZ 6500 23 #define QSIZE 650 24 int linemsg; 25 long olcount; 26 long lcount; 27 struct words { 28 char inp; 29 char out; 30 struct words *nst; 31 struct words *link; 32 struct words *fail; 33 } w[MAXSIZ], *smax, *q; 34 35 char table[128] = { 36 0, 0, 0, 0, 0, 0, 0, 0, 37 0, 0, ' ', 0, 0, 0, 0, 0, 38 0, 0, 0, 0, 0, 0, 0, 0, 39 0, 0, 0, 0, 0, 0, 0, 0, 40 ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', 41 ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', 42 '0', '1', '2', '3', '4', '5', '6', '7', 43 '8', '9', ' ', ' ', ' ', ' ', ' ', '.', 44 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 45 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 46 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 47 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', 48 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 49 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 50 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 51 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' 52 }; 53 int caps = 0; 54 int lineno = 0; 55 int fflag; 56 int nflag = 1; /*use default file*/ 57 char *filename; 58 int mflg = 0; /*don't catch output*/ 59 int nfile; 60 int nsucc; 61 long nsent = 0; 62 long nhits = 0; 63 char *nlp; 64 char *begp, *endp; 65 int beg, last; 66 char *myst; 67 int myct = 0; 68 int oct = 0; 69 FILE *wordf; 70 FILE *mine; 71 char *argptr; 72 long tl = 0; 73 long th = 0; 74 75 main(argc, argv) 76 char *argv[]; 77 { 78 int sv; 79 while (--argc > 0 && (++argv)[0][0]=='-') 80 switch (argv[0][1]) { 81 82 case 'f': 83 fflag++; 84 filename = (++argv)[0]; 85 argc--; 86 continue; 87 88 case 'n': 89 nflag = 0; 90 continue; 91 case 'd': 92 mflg=0; 93 continue; 94 case 'c': 95 caps++; 96 continue; 97 case 'l': 98 lineno++; 99 continue; 100 default: 101 fprintf(stderr, "diction: unknown flag\n"); 102 continue; 103 } 104 out: 105 if(nflag){ 106 wordf = fopen(_PATH_DICT,"r"); 107 if(wordf == NULL){ 108 fprintf(stderr,"diction: can't open default dictionary\n"); 109 exit(2); 110 } 111 } 112 else { 113 wordf = fopen(filename,"r"); 114 if(wordf == NULL){ 115 fprintf(stderr,"diction: can't open %s\n",filename); 116 exit(2); 117 } 118 } 119 120 #ifdef CATCH 121 if(fopen(CATCH,"r") != NULL) 122 if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; 123 #endif 124 #ifdef MACS 125 if(caps){ 126 printf(".so "); 127 printf(MACS); 128 printf("\n"); 129 } 130 #endif 131 cgotofn(); 132 cfail(); 133 nfile = argc; 134 if (argc<=0) { 135 execute((char *)NULL); 136 } 137 else while (--argc >= 0) { 138 execute(*argv); 139 if(lineno){ 140 printf("file %s: number of lines %ld number of phrases found %ld\n", 141 *argv, lcount-1, nhits); 142 tl += lcount-1; 143 th += nhits; 144 sv = lcount-1; 145 lcount = nhits = 0; 146 } 147 argv++; 148 } 149 if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); 150 if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); 151 else if(tl != sv) 152 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); 153 exit(nsucc == 0); 154 } 155 156 execute(file) 157 char *file; 158 { 159 register char *p; 160 register struct words *c; 161 register ccount; 162 int count1; 163 char *beg1; 164 struct words *savc; 165 char *savp; 166 int savct; 167 int scr; 168 char buf[1024]; 169 int f; 170 int hit; 171 last = 0; 172 if (file) { 173 if ((f = open(file, 0)) < 0) { 174 fprintf(stderr, "diction: can't open %s\n", file); 175 exit(2); 176 } 177 } 178 else f = 0; 179 lcount = olcount = 1; 180 linemsg = 1; 181 ccount = 0; 182 count1 = -1; 183 p = buf; 184 nlp = p; 185 c = w; 186 oct = hit = 0; 187 savc = (struct words *) 0; 188 savp = (char *) 0; 189 for (;;) { 190 if(--ccount <= 0) { 191 if (p == &buf[1024]) p = buf; 192 if (p > &buf[512]) { 193 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 194 } 195 else if ((ccount = read(f, p, 512)) <= 0) break; 196 if(caps && (count1 > 0)) 197 fwrite(beg1,sizeof(*beg1),count1,stdout); 198 count1 = ccount; 199 beg1 = p; 200 } 201 if(p == &buf[1024])p=buf; 202 nstate: 203 if (c->inp == table[*p]) { 204 c = c->nst; 205 } 206 else if (c->link != 0) { 207 c = c->link; 208 goto nstate; 209 } 210 else { 211 if(savp != 0){ 212 c=savc; 213 p=savp; 214 if(ccount > savct)ccount += savct; 215 else ccount = savct; 216 savc = (struct words *) 0; 217 savp = (char *) 0; 218 goto hadone; 219 } 220 c = c->fail; 221 if (c==0) { 222 c = w; 223 istate: 224 if (c->inp == table[*p]) { 225 c = c->nst; 226 } 227 else if (c->link != 0) { 228 c = c->link; 229 goto istate; 230 } 231 } 232 else goto nstate; 233 } 234 if(c->out){ 235 if((c->inp == table[*(p+1)]) && (c->nst != 0)){ 236 savp=p; 237 savc=c; 238 savct=ccount; 239 goto cont; 240 } 241 else if(c->link != 0){ 242 savc=c; 243 while((savc=savc->link)!= 0){ 244 if(savc->inp == table[*(p+1)]){ 245 savp=p; 246 savc=c; 247 savct=ccount; 248 goto cont; 249 } 250 } 251 } 252 hadone: 253 savc = (struct words *) 0; 254 savp = (char *) 0; 255 if(c->out == (char)(0377)){ 256 c=w; 257 goto nstate; 258 } 259 begp = p - (c->out); 260 if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 261 endp=p; 262 if(mflg){ 263 if(begp-20 < &buf[0]){ 264 myst = &buf[1024]-20; 265 if(nlp < &buf[512])myst=nlp; 266 } 267 else myst = begp-20; 268 if(myst < nlp)myst = nlp; 269 beg = 0; 270 } 271 hit = 1; 272 nhits++; 273 if(*p == '\n')lcount++; 274 if (table[*p++] == '.') { 275 linemsg = 1; 276 if (--ccount <= 0) { 277 if (p == &buf[1024]) p = buf; 278 if (p > &buf[512]) { 279 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 280 } 281 else if ((ccount = read(f, p, 512)) <= 0) break; 282 if(caps && (count1 > 0)) 283 fwrite(beg1,sizeof(*beg1),count1,stdout); 284 count1=ccount; 285 beg1=p; 286 } 287 } 288 succeed: nsucc = 1; 289 { 290 if (p <= nlp) { 291 outc(&buf[1024],file); 292 nlp = buf; 293 } 294 outc(p,file); 295 } 296 if(mflg)last=1; 297 nomatch: 298 nlp = p; 299 c = w; 300 begp = endp = 0; 301 continue; 302 } 303 cont: 304 if(*p == '\n')lcount++; 305 if (table[*p++] == '.'){ 306 if(hit){ 307 if(p <= nlp){ 308 outc(&buf[1024],file); 309 nlp = buf; 310 } 311 outc(p,file); 312 if(!caps)printf("\n\n"); 313 if(mflg && last){putc('\n',mine);myct = 0;} 314 } 315 linemsg = 1; 316 if(*p == '\n')olcount = lcount+1; 317 else 318 olcount=lcount; 319 last = 0; 320 hit = 0; 321 oct = 0; 322 nlp = p; 323 c = w; 324 begp = endp = 0; 325 nsent++; 326 } 327 } 328 if(caps && (count1 > 0)) 329 fwrite(beg1,sizeof(*beg1),count1,stdout); 330 close(f); 331 } 332 333 getargc() 334 { 335 register c; 336 if (wordf){ 337 if((c=getc(wordf))==EOF){ 338 fclose(wordf); 339 if(nflag && fflag){ 340 nflag=0; 341 wordf=fopen(filename,"r"); 342 if(wordf == NULL){ 343 fprintf(stderr, 344 "diction can't open %s\n",filename); 345 exit(2); 346 } 347 return(getc(wordf)); 348 } 349 else return(EOF); 350 } 351 else return(c); 352 } 353 if ((c = *argptr++) == '\0') 354 return(EOF); 355 return(c); 356 } 357 358 cgotofn() { 359 register c; 360 register struct words *s; 361 register ct; 362 int neg; 363 364 s = smax = w; 365 neg = ct = 0; 366 nword: for(;;) { 367 c = getargc(); 368 if(c == '~'){ 369 neg++; 370 c = getargc(); 371 } 372 if (c==EOF) 373 return; 374 if (c == '\n') { 375 if(neg)s->out = 0377; 376 else s->out = ct-1; 377 neg = ct = 0; 378 s = w; 379 } else { 380 loop: if (s->inp == c) { 381 s = s->nst; 382 ct++; 383 continue; 384 } 385 if (s->inp == 0) goto enter; 386 if (s->link == 0) { 387 if (smax >= &w[MAXSIZ - 1]) overflo(); 388 s->link = ++smax; 389 s = smax; 390 goto enter; 391 } 392 s = s->link; 393 goto loop; 394 } 395 } 396 397 enter: 398 do { 399 s->inp = c; 400 ct++; 401 if (smax >= &w[MAXSIZ - 1]) overflo(); 402 s->nst = ++smax; 403 s = smax; 404 } while ((c = getargc()) != '\n' && c!=EOF); 405 if(neg)smax->out = 0377; 406 else smax->out = ct-1; 407 neg = ct = 0; 408 s = w; 409 if (c != EOF) 410 goto nword; 411 } 412 413 overflo() { 414 fprintf(stderr, "wordlist too large\n"); 415 exit(2); 416 } 417 cfail() { 418 struct words *queue[QSIZE]; 419 struct words **front, **rear; 420 struct words *state; 421 int bstart; 422 register char c; 423 register struct words *s; 424 s = w; 425 front = rear = queue; 426 init: if ((s->inp) != 0) { 427 *rear++ = s->nst; 428 if (rear >= &queue[QSIZE - 1]) overflo(); 429 } 430 if ((s = s->link) != 0) { 431 goto init; 432 } 433 434 while (rear!=front) { 435 s = *front; 436 if (front == &queue[QSIZE-1]) 437 front = queue; 438 else front++; 439 cloop: if ((c = s->inp) != 0) { 440 bstart=0; 441 *rear = (q = s->nst); 442 if (front < rear) 443 if (rear >= &queue[QSIZE-1]) 444 if (front == queue) overflo(); 445 else rear = queue; 446 else rear++; 447 else 448 if (++rear == front) overflo(); 449 state = s->fail; 450 floop: if (state == 0){ state = w;bstart=1;} 451 if (state->inp == c) { 452 qloop: q->fail = state->nst; 453 if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 454 if((q=q->link) != 0)goto qloop; 455 } 456 else if ((state = state->link) != 0) 457 goto floop; 458 else if(bstart==0){state=0; goto floop;} 459 } 460 if ((s = s->link) != 0) 461 goto cloop; 462 } 463 /* for(s=w;s<=smax;s++) 464 printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 465 s->inp,s->out,s->nst,s->link,s->fail); 466 */ 467 } 468 outc(addr,file) 469 char *addr; 470 char *file; 471 { 472 int inside; 473 474 inside = 0; 475 if(!caps && lineno && linemsg){ 476 printf("beginning line %ld",olcount); 477 if(file != (char *)NULL)printf(" %s\n",file); 478 else printf("\n"); 479 linemsg = 0; 480 } 481 while(nlp < addr){ 482 if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ 483 oct=0; 484 putchar('\n'); 485 } 486 if(nlp == begp){ 487 if(caps)inside++; 488 else { 489 if( oct >45){putchar('\n'); 490 oct=0; 491 } 492 if( oct==0 || table[*nlp] != ' '){ 493 printf("*["); 494 oct+=2; 495 } 496 else {printf(" *[");; 497 oct+=3; 498 } 499 } 500 if(mflg)putc('[',mine); 501 } 502 if(inside){ 503 if(islower(*nlp))*nlp = toupper(*nlp); 504 } 505 else { 506 if(!caps && *nlp == '\n')*nlp = ' '; 507 if(*nlp == ' ' && oct==0); 508 else if(!caps) {putchar(*nlp); oct++;} 509 } 510 if(nlp == endp){ 511 if(caps) 512 inside= 0; 513 else { 514 if(*(nlp) != ' '){printf("]*"); 515 oct+=2; 516 } 517 else {printf("]* "); 518 oct+=3; 519 } 520 if(oct >60){putchar('\n'); 521 oct=0; 522 } 523 } 524 if(mflg)putc(']',mine); 525 beg = 0; 526 } 527 if(mflg){ 528 if(nlp == myst)beg = 1; 529 if(beg || last){ 530 putc(*nlp,mine); 531 if(myct++ >= 72 || last == 20){ 532 putc('\n',mine); 533 if(last == 20)last=myct=0; 534 else myct=0; 535 } 536 if(last)last++; 537 } 538 } 539 nlp++; 540 } 541 } 542