1 #ifndef lint 2 static char sccsid[] = "@(#)dprog.c 4.4 (Berkeley) 91/03/01"; 3 #endif not lint 4 5 /* 6 * diction -- print all sentences containing one of default phrases 7 * 8 * status returns: 9 * 0 - ok, and some matches 10 * 1 - ok, but no matches 11 * 2 - some error 12 */ 13 14 #include <stdio.h> 15 #include <ctype.h> 16 #include "pathnames.h" 17 18 #define MAXSIZ 6500 19 #define QSIZE 650 20 int linemsg; 21 long olcount; 22 long lcount; 23 struct words { 24 char inp; 25 char out; 26 struct words *nst; 27 struct words *link; 28 struct words *fail; 29 } w[MAXSIZ], *smax, *q; 30 31 char table[128] = { 32 0, 0, 0, 0, 0, 0, 0, 0, 33 0, 0, ' ', 0, 0, 0, 0, 0, 34 0, 0, 0, 0, 0, 0, 0, 0, 35 0, 0, 0, 0, 0, 0, 0, 0, 36 ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', 37 ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', 38 '0', '1', '2', '3', '4', '5', '6', '7', 39 '8', '9', ' ', ' ', ' ', ' ', ' ', '.', 40 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 41 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 42 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 43 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', 44 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 45 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 46 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 47 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' 48 }; 49 int caps = 0; 50 int lineno = 0; 51 int fflag; 52 int nflag = 1; /*use default file*/ 53 char *filename; 54 int mflg = 0; /*don't catch output*/ 55 int nfile; 56 int nsucc; 57 long nsent = 0; 58 long nhits = 0; 59 char *nlp; 60 char *begp, *endp; 61 int beg, last; 62 char *myst; 63 int myct = 0; 64 int oct = 0; 65 FILE *wordf; 66 FILE *mine; 67 char *argptr; 68 long tl = 0; 69 long th = 0; 70 71 main(argc, argv) 72 char *argv[]; 73 { 74 int sv; 75 while (--argc > 0 && (++argv)[0][0]=='-') 76 switch (argv[0][1]) { 77 78 case 'f': 79 fflag++; 80 filename = (++argv)[0]; 81 argc--; 82 continue; 83 84 case 'n': 85 nflag = 0; 86 continue; 87 case 'd': 88 mflg=0; 89 continue; 90 case 'c': 91 caps++; 92 continue; 93 case 'l': 94 lineno++; 95 continue; 96 default: 97 fprintf(stderr, "diction: unknown flag\n"); 98 continue; 99 } 100 out: 101 if(nflag){ 102 wordf = fopen(_PATH_DICT,"r"); 103 if(wordf == NULL){ 104 fprintf(stderr,"diction: can't open default dictionary\n"); 105 exit(2); 106 } 107 } 108 else { 109 wordf = fopen(filename,"r"); 110 if(wordf == NULL){ 111 fprintf(stderr,"diction: can't open %s\n",filename); 112 exit(2); 113 } 114 } 115 116 #ifdef CATCH 117 if(fopen(CATCH,"r") != NULL) 118 if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; 119 #endif 120 #ifdef MACS 121 if(caps){ 122 printf(".so "); 123 printf(MACS); 124 printf("\n"); 125 } 126 #endif 127 cgotofn(); 128 cfail(); 129 nfile = argc; 130 if (argc<=0) { 131 execute((char *)NULL); 132 } 133 else while (--argc >= 0) { 134 execute(*argv); 135 if(lineno){ 136 printf("file %s: number of lines %ld number of phrases found %ld\n", 137 *argv, lcount-1, nhits); 138 tl += lcount-1; 139 th += nhits; 140 sv = lcount-1; 141 lcount = nhits = 0; 142 } 143 argv++; 144 } 145 if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); 146 if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); 147 else if(tl != sv) 148 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); 149 exit(nsucc == 0); 150 } 151 152 execute(file) 153 char *file; 154 { 155 register char *p; 156 register struct words *c; 157 register ccount; 158 int count1; 159 char *beg1; 160 struct words *savc; 161 char *savp; 162 int savct; 163 int scr; 164 char buf[1024]; 165 int f; 166 int hit; 167 last = 0; 168 if (file) { 169 if ((f = open(file, 0)) < 0) { 170 fprintf(stderr, "diction: can't open %s\n", file); 171 exit(2); 172 } 173 } 174 else f = 0; 175 lcount = olcount = 1; 176 linemsg = 1; 177 ccount = 0; 178 count1 = -1; 179 p = buf; 180 nlp = p; 181 c = w; 182 oct = hit = 0; 183 savc = (struct words *) 0; 184 savp = (char *) 0; 185 for (;;) { 186 if(--ccount <= 0) { 187 if (p == &buf[1024]) p = buf; 188 if (p > &buf[512]) { 189 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 190 } 191 else if ((ccount = read(f, p, 512)) <= 0) break; 192 if(caps && (count1 > 0)) 193 fwrite(beg1,sizeof(*beg1),count1,stdout); 194 count1 = ccount; 195 beg1 = p; 196 } 197 if(p == &buf[1024])p=buf; 198 nstate: 199 if (c->inp == table[*p]) { 200 c = c->nst; 201 } 202 else if (c->link != 0) { 203 c = c->link; 204 goto nstate; 205 } 206 else { 207 if(savp != 0){ 208 c=savc; 209 p=savp; 210 if(ccount > savct)ccount += savct; 211 else ccount = savct; 212 savc = (struct words *) 0; 213 savp = (char *) 0; 214 goto hadone; 215 } 216 c = c->fail; 217 if (c==0) { 218 c = w; 219 istate: 220 if (c->inp == table[*p]) { 221 c = c->nst; 222 } 223 else if (c->link != 0) { 224 c = c->link; 225 goto istate; 226 } 227 } 228 else goto nstate; 229 } 230 if(c->out){ 231 if((c->inp == table[*(p+1)]) && (c->nst != 0)){ 232 savp=p; 233 savc=c; 234 savct=ccount; 235 goto cont; 236 } 237 else if(c->link != 0){ 238 savc=c; 239 while((savc=savc->link)!= 0){ 240 if(savc->inp == table[*(p+1)]){ 241 savp=p; 242 savc=c; 243 savct=ccount; 244 goto cont; 245 } 246 } 247 } 248 hadone: 249 savc = (struct words *) 0; 250 savp = (char *) 0; 251 if(c->out == (char)(0377)){ 252 c=w; 253 goto nstate; 254 } 255 begp = p - (c->out); 256 if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 257 endp=p; 258 if(mflg){ 259 if(begp-20 < &buf[0]){ 260 myst = &buf[1024]-20; 261 if(nlp < &buf[512])myst=nlp; 262 } 263 else myst = begp-20; 264 if(myst < nlp)myst = nlp; 265 beg = 0; 266 } 267 hit = 1; 268 nhits++; 269 if(*p == '\n')lcount++; 270 if (table[*p++] == '.') { 271 linemsg = 1; 272 if (--ccount <= 0) { 273 if (p == &buf[1024]) p = buf; 274 if (p > &buf[512]) { 275 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 276 } 277 else if ((ccount = read(f, p, 512)) <= 0) break; 278 if(caps && (count1 > 0)) 279 fwrite(beg1,sizeof(*beg1),count1,stdout); 280 count1=ccount; 281 beg1=p; 282 } 283 } 284 succeed: nsucc = 1; 285 { 286 if (p <= nlp) { 287 outc(&buf[1024],file); 288 nlp = buf; 289 } 290 outc(p,file); 291 } 292 if(mflg)last=1; 293 nomatch: 294 nlp = p; 295 c = w; 296 begp = endp = 0; 297 continue; 298 } 299 cont: 300 if(*p == '\n')lcount++; 301 if (table[*p++] == '.'){ 302 if(hit){ 303 if(p <= nlp){ 304 outc(&buf[1024],file); 305 nlp = buf; 306 } 307 outc(p,file); 308 if(!caps)printf("\n\n"); 309 if(mflg && last){putc('\n',mine);myct = 0;} 310 } 311 linemsg = 1; 312 if(*p == '\n')olcount = lcount+1; 313 else 314 olcount=lcount; 315 last = 0; 316 hit = 0; 317 oct = 0; 318 nlp = p; 319 c = w; 320 begp = endp = 0; 321 nsent++; 322 } 323 } 324 if(caps && (count1 > 0)) 325 fwrite(beg1,sizeof(*beg1),count1,stdout); 326 close(f); 327 } 328 329 getargc() 330 { 331 register c; 332 if (wordf){ 333 if((c=getc(wordf))==EOF){ 334 fclose(wordf); 335 if(nflag && fflag){ 336 nflag=0; 337 wordf=fopen(filename,"r"); 338 if(wordf == NULL){ 339 fprintf(stderr, 340 "diction can't open %s\n",filename); 341 exit(2); 342 } 343 return(getc(wordf)); 344 } 345 else return(EOF); 346 } 347 else return(c); 348 } 349 if ((c = *argptr++) == '\0') 350 return(EOF); 351 return(c); 352 } 353 354 cgotofn() { 355 register c; 356 register struct words *s; 357 register ct; 358 int neg; 359 360 s = smax = w; 361 neg = ct = 0; 362 nword: for(;;) { 363 c = getargc(); 364 if(c == '~'){ 365 neg++; 366 c = getargc(); 367 } 368 if (c==EOF) 369 return; 370 if (c == '\n') { 371 if(neg)s->out = 0377; 372 else s->out = ct-1; 373 neg = ct = 0; 374 s = w; 375 } else { 376 loop: if (s->inp == c) { 377 s = s->nst; 378 ct++; 379 continue; 380 } 381 if (s->inp == 0) goto enter; 382 if (s->link == 0) { 383 if (smax >= &w[MAXSIZ - 1]) overflo(); 384 s->link = ++smax; 385 s = smax; 386 goto enter; 387 } 388 s = s->link; 389 goto loop; 390 } 391 } 392 393 enter: 394 do { 395 s->inp = c; 396 ct++; 397 if (smax >= &w[MAXSIZ - 1]) overflo(); 398 s->nst = ++smax; 399 s = smax; 400 } while ((c = getargc()) != '\n' && c!=EOF); 401 if(neg)smax->out = 0377; 402 else smax->out = ct-1; 403 neg = ct = 0; 404 s = w; 405 if (c != EOF) 406 goto nword; 407 } 408 409 overflo() { 410 fprintf(stderr, "wordlist too large\n"); 411 exit(2); 412 } 413 cfail() { 414 struct words *queue[QSIZE]; 415 struct words **front, **rear; 416 struct words *state; 417 int bstart; 418 register char c; 419 register struct words *s; 420 s = w; 421 front = rear = queue; 422 init: if ((s->inp) != 0) { 423 *rear++ = s->nst; 424 if (rear >= &queue[QSIZE - 1]) overflo(); 425 } 426 if ((s = s->link) != 0) { 427 goto init; 428 } 429 430 while (rear!=front) { 431 s = *front; 432 if (front == &queue[QSIZE-1]) 433 front = queue; 434 else front++; 435 cloop: if ((c = s->inp) != 0) { 436 bstart=0; 437 *rear = (q = s->nst); 438 if (front < rear) 439 if (rear >= &queue[QSIZE-1]) 440 if (front == queue) overflo(); 441 else rear = queue; 442 else rear++; 443 else 444 if (++rear == front) overflo(); 445 state = s->fail; 446 floop: if (state == 0){ state = w;bstart=1;} 447 if (state->inp == c) { 448 qloop: q->fail = state->nst; 449 if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 450 if((q=q->link) != 0)goto qloop; 451 } 452 else if ((state = state->link) != 0) 453 goto floop; 454 else if(bstart==0){state=0; goto floop;} 455 } 456 if ((s = s->link) != 0) 457 goto cloop; 458 } 459 /* for(s=w;s<=smax;s++) 460 printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 461 s->inp,s->out,s->nst,s->link,s->fail); 462 */ 463 } 464 outc(addr,file) 465 char *addr; 466 char *file; 467 { 468 int inside; 469 470 inside = 0; 471 if(!caps && lineno && linemsg){ 472 printf("beginning line %ld",olcount); 473 if(file != (char *)NULL)printf(" %s\n",file); 474 else printf("\n"); 475 linemsg = 0; 476 } 477 while(nlp < addr){ 478 if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ 479 oct=0; 480 putchar('\n'); 481 } 482 if(nlp == begp){ 483 if(caps)inside++; 484 else { 485 if( oct >45){putchar('\n'); 486 oct=0; 487 } 488 if( oct==0 || table[*nlp] != ' '){ 489 printf("*["); 490 oct+=2; 491 } 492 else {printf(" *[");; 493 oct+=3; 494 } 495 } 496 if(mflg)putc('[',mine); 497 } 498 if(inside){ 499 if(islower(*nlp))*nlp = toupper(*nlp); 500 } 501 else { 502 if(!caps && *nlp == '\n')*nlp = ' '; 503 if(*nlp == ' ' && oct==0); 504 else if(!caps) {putchar(*nlp); oct++;} 505 } 506 if(nlp == endp){ 507 if(caps) 508 inside= 0; 509 else { 510 if(*(nlp) != ' '){printf("]*"); 511 oct+=2; 512 } 513 else {printf("]* "); 514 oct+=3; 515 } 516 if(oct >60){putchar('\n'); 517 oct=0; 518 } 519 } 520 if(mflg)putc(']',mine); 521 beg = 0; 522 } 523 if(mflg){ 524 if(nlp == myst)beg = 1; 525 if(beg || last){ 526 putc(*nlp,mine); 527 if(myct++ >= 72 || last == 20){ 528 putc('\n',mine); 529 if(last == 20)last=myct=0; 530 else myct=0; 531 } 532 if(last)last++; 533 } 534 } 535 nlp++; 536 } 537 } 538