1 /*-
2 * %sccs.include.proprietary.c%
3 */
4
5 #ifndef lint
6 static char sccsid[] = "@(#)dprog.c 4.5 (Berkeley) 04/17/91";
7 #endif /* not lint */
8
9 /*
10 * diction -- print all sentences containing one of default phrases
11 *
12 * status returns:
13 * 0 - ok, and some matches
14 * 1 - ok, but no matches
15 * 2 - some error
16 */
17
18 #include <stdio.h>
19 #include <ctype.h>
20 #include "pathnames.h"
21
22 #define MAXSIZ 6500
23 #define QSIZE 650
24 int linemsg;
25 long olcount;
26 long lcount;
27 struct words {
28 char inp;
29 char out;
30 struct words *nst;
31 struct words *link;
32 struct words *fail;
33 } w[MAXSIZ], *smax, *q;
34
35 char table[128] = {
36 0, 0, 0, 0, 0, 0, 0, 0,
37 0, 0, ' ', 0, 0, 0, 0, 0,
38 0, 0, 0, 0, 0, 0, 0, 0,
39 0, 0, 0, 0, 0, 0, 0, 0,
40 ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
41 ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
42 '0', '1', '2', '3', '4', '5', '6', '7',
43 '8', '9', ' ', ' ', ' ', ' ', ' ', '.',
44 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
45 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
46 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
47 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
48 ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
49 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
50 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
51 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
52 };
53 int caps = 0;
54 int lineno = 0;
55 int fflag;
56 int nflag = 1; /*use default file*/
57 char *filename;
58 int mflg = 0; /*don't catch output*/
59 int nfile;
60 int nsucc;
61 long nsent = 0;
62 long nhits = 0;
63 char *nlp;
64 char *begp, *endp;
65 int beg, last;
66 char *myst;
67 int myct = 0;
68 int oct = 0;
69 FILE *wordf;
70 FILE *mine;
71 char *argptr;
72 long tl = 0;
73 long th = 0;
74
main(argc,argv)75 main(argc, argv)
76 char *argv[];
77 {
78 int sv;
79 while (--argc > 0 && (++argv)[0][0]=='-')
80 switch (argv[0][1]) {
81
82 case 'f':
83 fflag++;
84 filename = (++argv)[0];
85 argc--;
86 continue;
87
88 case 'n':
89 nflag = 0;
90 continue;
91 case 'd':
92 mflg=0;
93 continue;
94 case 'c':
95 caps++;
96 continue;
97 case 'l':
98 lineno++;
99 continue;
100 default:
101 fprintf(stderr, "diction: unknown flag\n");
102 continue;
103 }
104 out:
105 if(nflag){
106 wordf = fopen(_PATH_DICT,"r");
107 if(wordf == NULL){
108 fprintf(stderr,"diction: can't open default dictionary\n");
109 exit(2);
110 }
111 }
112 else {
113 wordf = fopen(filename,"r");
114 if(wordf == NULL){
115 fprintf(stderr,"diction: can't open %s\n",filename);
116 exit(2);
117 }
118 }
119
120 #ifdef CATCH
121 if(fopen(CATCH,"r") != NULL)
122 if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
123 #endif
124 #ifdef MACS
125 if(caps){
126 printf(".so ");
127 printf(MACS);
128 printf("\n");
129 }
130 #endif
131 cgotofn();
132 cfail();
133 nfile = argc;
134 if (argc<=0) {
135 execute((char *)NULL);
136 }
137 else while (--argc >= 0) {
138 execute(*argv);
139 if(lineno){
140 printf("file %s: number of lines %ld number of phrases found %ld\n",
141 *argv, lcount-1, nhits);
142 tl += lcount-1;
143 th += nhits;
144 sv = lcount-1;
145 lcount = nhits = 0;
146 }
147 argv++;
148 }
149 if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
150 if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
151 else if(tl != sv)
152 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
153 exit(nsucc == 0);
154 }
155
execute(file)156 execute(file)
157 char *file;
158 {
159 register char *p;
160 register struct words *c;
161 register ccount;
162 int count1;
163 char *beg1;
164 struct words *savc;
165 char *savp;
166 int savct;
167 int scr;
168 char buf[1024];
169 int f;
170 int hit;
171 last = 0;
172 if (file) {
173 if ((f = open(file, 0)) < 0) {
174 fprintf(stderr, "diction: can't open %s\n", file);
175 exit(2);
176 }
177 }
178 else f = 0;
179 lcount = olcount = 1;
180 linemsg = 1;
181 ccount = 0;
182 count1 = -1;
183 p = buf;
184 nlp = p;
185 c = w;
186 oct = hit = 0;
187 savc = (struct words *) 0;
188 savp = (char *) 0;
189 for (;;) {
190 if(--ccount <= 0) {
191 if (p == &buf[1024]) p = buf;
192 if (p > &buf[512]) {
193 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
194 }
195 else if ((ccount = read(f, p, 512)) <= 0) break;
196 if(caps && (count1 > 0))
197 fwrite(beg1,sizeof(*beg1),count1,stdout);
198 count1 = ccount;
199 beg1 = p;
200 }
201 if(p == &buf[1024])p=buf;
202 nstate:
203 if (c->inp == table[*p]) {
204 c = c->nst;
205 }
206 else if (c->link != 0) {
207 c = c->link;
208 goto nstate;
209 }
210 else {
211 if(savp != 0){
212 c=savc;
213 p=savp;
214 if(ccount > savct)ccount += savct;
215 else ccount = savct;
216 savc = (struct words *) 0;
217 savp = (char *) 0;
218 goto hadone;
219 }
220 c = c->fail;
221 if (c==0) {
222 c = w;
223 istate:
224 if (c->inp == table[*p]) {
225 c = c->nst;
226 }
227 else if (c->link != 0) {
228 c = c->link;
229 goto istate;
230 }
231 }
232 else goto nstate;
233 }
234 if(c->out){
235 if((c->inp == table[*(p+1)]) && (c->nst != 0)){
236 savp=p;
237 savc=c;
238 savct=ccount;
239 goto cont;
240 }
241 else if(c->link != 0){
242 savc=c;
243 while((savc=savc->link)!= 0){
244 if(savc->inp == table[*(p+1)]){
245 savp=p;
246 savc=c;
247 savct=ccount;
248 goto cont;
249 }
250 }
251 }
252 hadone:
253 savc = (struct words *) 0;
254 savp = (char *) 0;
255 if(c->out == (char)(0377)){
256 c=w;
257 goto nstate;
258 }
259 begp = p - (c->out);
260 if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
261 endp=p;
262 if(mflg){
263 if(begp-20 < &buf[0]){
264 myst = &buf[1024]-20;
265 if(nlp < &buf[512])myst=nlp;
266 }
267 else myst = begp-20;
268 if(myst < nlp)myst = nlp;
269 beg = 0;
270 }
271 hit = 1;
272 nhits++;
273 if(*p == '\n')lcount++;
274 if (table[*p++] == '.') {
275 linemsg = 1;
276 if (--ccount <= 0) {
277 if (p == &buf[1024]) p = buf;
278 if (p > &buf[512]) {
279 if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
280 }
281 else if ((ccount = read(f, p, 512)) <= 0) break;
282 if(caps && (count1 > 0))
283 fwrite(beg1,sizeof(*beg1),count1,stdout);
284 count1=ccount;
285 beg1=p;
286 }
287 }
288 succeed: nsucc = 1;
289 {
290 if (p <= nlp) {
291 outc(&buf[1024],file);
292 nlp = buf;
293 }
294 outc(p,file);
295 }
296 if(mflg)last=1;
297 nomatch:
298 nlp = p;
299 c = w;
300 begp = endp = 0;
301 continue;
302 }
303 cont:
304 if(*p == '\n')lcount++;
305 if (table[*p++] == '.'){
306 if(hit){
307 if(p <= nlp){
308 outc(&buf[1024],file);
309 nlp = buf;
310 }
311 outc(p,file);
312 if(!caps)printf("\n\n");
313 if(mflg && last){putc('\n',mine);myct = 0;}
314 }
315 linemsg = 1;
316 if(*p == '\n')olcount = lcount+1;
317 else
318 olcount=lcount;
319 last = 0;
320 hit = 0;
321 oct = 0;
322 nlp = p;
323 c = w;
324 begp = endp = 0;
325 nsent++;
326 }
327 }
328 if(caps && (count1 > 0))
329 fwrite(beg1,sizeof(*beg1),count1,stdout);
330 close(f);
331 }
332
getargc()333 getargc()
334 {
335 register c;
336 if (wordf){
337 if((c=getc(wordf))==EOF){
338 fclose(wordf);
339 if(nflag && fflag){
340 nflag=0;
341 wordf=fopen(filename,"r");
342 if(wordf == NULL){
343 fprintf(stderr,
344 "diction can't open %s\n",filename);
345 exit(2);
346 }
347 return(getc(wordf));
348 }
349 else return(EOF);
350 }
351 else return(c);
352 }
353 if ((c = *argptr++) == '\0')
354 return(EOF);
355 return(c);
356 }
357
cgotofn()358 cgotofn() {
359 register c;
360 register struct words *s;
361 register ct;
362 int neg;
363
364 s = smax = w;
365 neg = ct = 0;
366 nword: for(;;) {
367 c = getargc();
368 if(c == '~'){
369 neg++;
370 c = getargc();
371 }
372 if (c==EOF)
373 return;
374 if (c == '\n') {
375 if(neg)s->out = 0377;
376 else s->out = ct-1;
377 neg = ct = 0;
378 s = w;
379 } else {
380 loop: if (s->inp == c) {
381 s = s->nst;
382 ct++;
383 continue;
384 }
385 if (s->inp == 0) goto enter;
386 if (s->link == 0) {
387 if (smax >= &w[MAXSIZ - 1]) overflo();
388 s->link = ++smax;
389 s = smax;
390 goto enter;
391 }
392 s = s->link;
393 goto loop;
394 }
395 }
396
397 enter:
398 do {
399 s->inp = c;
400 ct++;
401 if (smax >= &w[MAXSIZ - 1]) overflo();
402 s->nst = ++smax;
403 s = smax;
404 } while ((c = getargc()) != '\n' && c!=EOF);
405 if(neg)smax->out = 0377;
406 else smax->out = ct-1;
407 neg = ct = 0;
408 s = w;
409 if (c != EOF)
410 goto nword;
411 }
412
overflo()413 overflo() {
414 fprintf(stderr, "wordlist too large\n");
415 exit(2);
416 }
cfail()417 cfail() {
418 struct words *queue[QSIZE];
419 struct words **front, **rear;
420 struct words *state;
421 int bstart;
422 register char c;
423 register struct words *s;
424 s = w;
425 front = rear = queue;
426 init: if ((s->inp) != 0) {
427 *rear++ = s->nst;
428 if (rear >= &queue[QSIZE - 1]) overflo();
429 }
430 if ((s = s->link) != 0) {
431 goto init;
432 }
433
434 while (rear!=front) {
435 s = *front;
436 if (front == &queue[QSIZE-1])
437 front = queue;
438 else front++;
439 cloop: if ((c = s->inp) != 0) {
440 bstart=0;
441 *rear = (q = s->nst);
442 if (front < rear)
443 if (rear >= &queue[QSIZE-1])
444 if (front == queue) overflo();
445 else rear = queue;
446 else rear++;
447 else
448 if (++rear == front) overflo();
449 state = s->fail;
450 floop: if (state == 0){ state = w;bstart=1;}
451 if (state->inp == c) {
452 qloop: q->fail = state->nst;
453 if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
454 if((q=q->link) != 0)goto qloop;
455 }
456 else if ((state = state->link) != 0)
457 goto floop;
458 else if(bstart==0){state=0; goto floop;}
459 }
460 if ((s = s->link) != 0)
461 goto cloop;
462 }
463 /* for(s=w;s<=smax;s++)
464 printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
465 s->inp,s->out,s->nst,s->link,s->fail);
466 */
467 }
outc(addr,file)468 outc(addr,file)
469 char *addr;
470 char *file;
471 {
472 int inside;
473
474 inside = 0;
475 if(!caps && lineno && linemsg){
476 printf("beginning line %ld",olcount);
477 if(file != (char *)NULL)printf(" %s\n",file);
478 else printf("\n");
479 linemsg = 0;
480 }
481 while(nlp < addr){
482 if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
483 oct=0;
484 putchar('\n');
485 }
486 if(nlp == begp){
487 if(caps)inside++;
488 else {
489 if( oct >45){putchar('\n');
490 oct=0;
491 }
492 if( oct==0 || table[*nlp] != ' '){
493 printf("*[");
494 oct+=2;
495 }
496 else {printf(" *[");;
497 oct+=3;
498 }
499 }
500 if(mflg)putc('[',mine);
501 }
502 if(inside){
503 if(islower(*nlp))*nlp = toupper(*nlp);
504 }
505 else {
506 if(!caps && *nlp == '\n')*nlp = ' ';
507 if(*nlp == ' ' && oct==0);
508 else if(!caps) {putchar(*nlp); oct++;}
509 }
510 if(nlp == endp){
511 if(caps)
512 inside= 0;
513 else {
514 if(*(nlp) != ' '){printf("]*");
515 oct+=2;
516 }
517 else {printf("]* ");
518 oct+=3;
519 }
520 if(oct >60){putchar('\n');
521 oct=0;
522 }
523 }
524 if(mflg)putc(']',mine);
525 beg = 0;
526 }
527 if(mflg){
528 if(nlp == myst)beg = 1;
529 if(beg || last){
530 putc(*nlp,mine);
531 if(myct++ >= 72 || last == 20){
532 putc('\n',mine);
533 if(last == 20)last=myct=0;
534 else myct=0;
535 }
536 if(last)last++;
537 }
538 }
539 nlp++;
540 }
541 }
542