1 #ifndef lint
2 static char sccsid[] = "@(#)dprog.c	4.4	(Berkeley)	91/03/01";
3 #endif not lint
4 
5 /*
6  * diction -- print all sentences containing one of default phrases
7  *
8  *	status returns:
9  *		0 - ok, and some matches
10  *		1 - ok, but no matches
11  *		2 - some error
12  */
13 
14 #include <stdio.h>
15 #include <ctype.h>
16 #include "pathnames.h"
17 
18 #define	MAXSIZ 6500
19 #define QSIZE 650
20 int linemsg;
21 long olcount;
22 long lcount;
23 struct words {
24 	char 	inp;
25 	char	out;
26 	struct	words *nst;
27 	struct	words *link;
28 	struct	words *fail;
29 } w[MAXSIZ], *smax, *q;
30 
31 char table[128] = {
32 	0, 0, 0, 0, 0, 0, 0, 0,
33 	0, 0, ' ', 0, 0, 0, 0, 0,
34 	0, 0, 0, 0, 0, 0, 0, 0,
35 	0, 0, 0, 0, 0, 0, 0, 0,
36 	' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
37 	' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
38 	'0', '1', '2', '3', '4', '5', '6', '7',
39 	'8', '9', ' ', ' ', ' ', ' ', ' ', '.',
40 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
41 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
42 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
43 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
44 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
45 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
46 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
47 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
48 	};
49 int	caps = 0;
50 int	lineno = 0;
51 int fflag;
52 int nflag	= 1; /*use default file*/
53 char *filename;
54 int	mflg	= 0;	/*don't catch output*/
55 int	nfile;
56 int	nsucc;
57 long nsent = 0;
58 long nhits = 0;
59 char *nlp;
60 char *begp, *endp;
61 int beg, last;
62 char *myst;
63 int myct = 0;
64 int oct = 0;
65 FILE	*wordf;
66 FILE *mine;
67 char	*argptr;
68 long tl = 0;
69 long th = 0;
70 
71 main(argc, argv)
72 char *argv[];
73 {
74 	int sv;
75 	while (--argc > 0 && (++argv)[0][0]=='-')
76 		switch (argv[0][1]) {
77 
78 		case 'f':
79 			fflag++;
80 			filename = (++argv)[0];
81 			argc--;
82 			continue;
83 
84 		case 'n':
85 			nflag = 0;
86 			continue;
87 		case 'd':
88 			mflg=0;
89 			continue;
90 		case 'c':
91 			caps++;
92 			continue;
93 		case 'l':
94 			lineno++;
95 			continue;
96 		default:
97 			fprintf(stderr, "diction: unknown flag\n");
98 			continue;
99 		}
100 out:
101 	if(nflag){
102 		wordf = fopen(_PATH_DICT,"r");
103 		if(wordf == NULL){
104 			fprintf(stderr,"diction: can't open default dictionary\n");
105 			exit(2);
106 		}
107 	}
108 	else {
109 		wordf = fopen(filename,"r");
110 		if(wordf == NULL){
111 			fprintf(stderr,"diction: can't open %s\n",filename);
112 			exit(2);
113 		}
114 	}
115 
116 #ifdef CATCH
117 	if(fopen(CATCH,"r") != NULL)
118 		if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
119 #endif
120 #ifdef MACS
121 	if(caps){
122 		printf(".so ");
123 		printf(MACS);
124 		printf("\n");
125 	}
126 #endif
127 	cgotofn();
128 	cfail();
129 	nfile = argc;
130 	if (argc<=0) {
131 		execute((char *)NULL);
132 	}
133 	else while (--argc >= 0) {
134 		execute(*argv);
135 		if(lineno){
136 			printf("file %s: number of lines %ld number of phrases found %ld\n",
137 				*argv, lcount-1, nhits);
138 			tl += lcount-1;
139 			th += nhits;
140 			sv = lcount-1;
141 			lcount = nhits = 0;
142 		}
143 		argv++;
144 	}
145 	if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
146 	if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
147 	else if(tl != sv)
148 		 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
149 	exit(nsucc == 0);
150 }
151 
152 execute(file)
153 char *file;
154 {
155 	register char *p;
156 	register struct words *c;
157 	register ccount;
158 	int count1;
159 	char *beg1;
160 	struct words *savc;
161 	char *savp;
162 	int savct;
163 	int scr;
164 	char buf[1024];
165 	int f;
166 	int hit;
167 	last = 0;
168 	if (file) {
169 		if ((f = open(file, 0)) < 0) {
170 			fprintf(stderr, "diction: can't open %s\n", file);
171 			exit(2);
172 		}
173 	}
174 	else f = 0;
175 	lcount = olcount = 1;
176 	linemsg = 1;
177 	ccount = 0;
178 	count1 = -1;
179 	p = buf;
180 	nlp = p;
181 	c = w;
182 	oct = hit = 0;
183 	savc = (struct words *) 0;
184 	savp = (char *) 0;
185 	for (;;) {
186 		if(--ccount <= 0) {
187 			if (p == &buf[1024]) p = buf;
188 			if (p > &buf[512]) {
189 				if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
190 			}
191 			else if ((ccount = read(f, p, 512)) <= 0) break;
192 			if(caps && (count1 > 0))
193 				fwrite(beg1,sizeof(*beg1),count1,stdout);
194 			count1 = ccount;
195 			beg1 = p;
196 		}
197 		if(p == &buf[1024])p=buf;
198 		nstate:
199 			if (c->inp == table[*p]) {
200 				c = c->nst;
201 			}
202 			else if (c->link != 0) {
203 				c = c->link;
204 				goto nstate;
205 			}
206 			else {
207 				if(savp != 0){
208 					c=savc;
209 					p=savp;
210 					if(ccount > savct)ccount += savct;
211 					else ccount = savct;
212 					savc = (struct words *) 0;
213 					savp = (char *) 0;
214 					goto hadone;
215 				}
216 				c = c->fail;
217 				if (c==0) {
218 					c = w;
219 					istate:
220 					if (c->inp == table[*p]) {
221 						c = c->nst;
222 					}
223 					else if (c->link != 0) {
224 						c = c->link;
225 						goto istate;
226 					}
227 				}
228 				else goto nstate;
229 			}
230 		if(c->out){
231 			if((c->inp == table[*(p+1)]) && (c->nst != 0)){
232 				savp=p;
233 				savc=c;
234 				savct=ccount;
235 				goto cont;
236 			}
237 			else if(c->link != 0){
238 				savc=c;
239 				while((savc=savc->link)!= 0){
240 					if(savc->inp == table[*(p+1)]){
241 						savp=p;
242 						savc=c;
243 						savct=ccount;
244 						goto cont;
245 					}
246 				}
247 			}
248 		hadone:
249 			savc = (struct words *) 0;
250 			savp = (char *) 0;
251 			if(c->out == (char)(0377)){
252 				c=w;
253 				goto nstate;
254 			}
255 			begp = p - (c->out);
256 			if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
257 			endp=p;
258 			if(mflg){
259 				if(begp-20 < &buf[0]){
260 					myst = &buf[1024]-20;
261 					if(nlp < &buf[512])myst=nlp;
262 				}
263 				else myst = begp-20;
264 				if(myst < nlp)myst = nlp;
265 				beg = 0;
266 			}
267 			hit = 1;
268 			nhits++;
269 			if(*p == '\n')lcount++;
270 			if (table[*p++] == '.') {
271 				linemsg = 1;
272 				if (--ccount <= 0) {
273 					if (p == &buf[1024]) p = buf;
274 					if (p > &buf[512]) {
275 						if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
276 					}
277 					else if ((ccount = read(f, p, 512)) <= 0) break;
278 					if(caps && (count1 > 0))
279 						fwrite(beg1,sizeof(*beg1),count1,stdout);
280 					count1=ccount;
281 					beg1=p;
282 				}
283 			}
284 	succeed:	nsucc = 1;
285 			{
286 				if (p <= nlp) {
287 					outc(&buf[1024],file);
288 					nlp = buf;
289 				}
290 				outc(p,file);
291 			}
292 			if(mflg)last=1;
293 	nomatch:
294 			nlp = p;
295 			c = w;
296 			begp = endp = 0;
297 			continue;
298 		}
299 	cont:
300 		if(*p == '\n')lcount++;
301 		if (table[*p++] == '.'){
302 				if(hit){
303 					if(p <= nlp){
304 						outc(&buf[1024],file);
305 						nlp = buf;
306 					}
307 					outc(p,file);
308 					if(!caps)printf("\n\n");
309 					if(mflg && last){putc('\n',mine);myct = 0;}
310 					}
311 				linemsg = 1;
312 				if(*p == '\n')olcount = lcount+1;
313 				else
314 					olcount=lcount;
315 				last = 0;
316 				hit = 0;
317 				oct = 0;
318 				nlp = p;
319 				c = w;
320 				begp = endp = 0;
321 				nsent++;
322 			}
323 	}
324 	if(caps && (count1 > 0))
325 		fwrite(beg1,sizeof(*beg1),count1,stdout);
326 	close(f);
327 }
328 
329 getargc()
330 {
331 	register c;
332 	if (wordf){
333 		if((c=getc(wordf))==EOF){
334 			fclose(wordf);
335 			if(nflag && fflag){
336 				nflag=0;
337 				wordf=fopen(filename,"r");
338 				if(wordf == NULL){
339 					fprintf(stderr,
340 					    "diction can't open %s\n",filename);
341 					exit(2);
342 				}
343 				return(getc(wordf));
344 			}
345 			else return(EOF);
346 		}
347 		else return(c);
348 	}
349 	if ((c = *argptr++) == '\0')
350 		return(EOF);
351 	return(c);
352 }
353 
354 cgotofn() {
355 	register c;
356 	register struct words *s;
357 	register ct;
358 	int neg;
359 
360 	s = smax = w;
361 	neg = ct = 0;
362 nword:	for(;;) {
363 		c = getargc();
364 		if(c == '~'){
365 			neg++;
366 			c = getargc();
367 		}
368 		if (c==EOF)
369 			return;
370 		if (c == '\n') {
371 			if(neg)s->out = 0377;
372 			else s->out = ct-1;
373 			neg = ct = 0;
374 			s = w;
375 		} else {
376 		loop:	if (s->inp == c) {
377 				s = s->nst;
378 				ct++;
379 				continue;
380 			}
381 			if (s->inp == 0) goto enter;
382 			if (s->link == 0) {
383 				if (smax >= &w[MAXSIZ - 1]) overflo();
384 				s->link = ++smax;
385 				s = smax;
386 				goto enter;
387 			}
388 			s = s->link;
389 			goto loop;
390 		}
391 	}
392 
393 	enter:
394 	do {
395 		s->inp = c;
396 		ct++;
397 		if (smax >= &w[MAXSIZ - 1]) overflo();
398 		s->nst = ++smax;
399 		s = smax;
400 	} while ((c = getargc()) != '\n' && c!=EOF);
401 	if(neg)smax->out = 0377;
402 	else smax->out = ct-1;
403 	neg = ct = 0;
404 	s = w;
405 	if (c != EOF)
406 		goto nword;
407 }
408 
409 overflo() {
410 	fprintf(stderr, "wordlist too large\n");
411 	exit(2);
412 }
413 cfail() {
414 	struct words *queue[QSIZE];
415 	struct words **front, **rear;
416 	struct words *state;
417 	int bstart;
418 	register char c;
419 	register struct words *s;
420 	s = w;
421 	front = rear = queue;
422 init:	if ((s->inp) != 0) {
423 		*rear++ = s->nst;
424 		if (rear >= &queue[QSIZE - 1]) overflo();
425 	}
426 	if ((s = s->link) != 0) {
427 		goto init;
428 	}
429 
430 	while (rear!=front) {
431 		s = *front;
432 		if (front == &queue[QSIZE-1])
433 			front = queue;
434 		else front++;
435 	cloop:	if ((c = s->inp) != 0) {
436 			bstart=0;
437 			*rear = (q = s->nst);
438 			if (front < rear)
439 				if (rear >= &queue[QSIZE-1])
440 					if (front == queue) overflo();
441 					else rear = queue;
442 				else rear++;
443 			else
444 				if (++rear == front) overflo();
445 			state = s->fail;
446 		floop:	if (state == 0){ state = w;bstart=1;}
447 			if (state->inp == c) {
448 			qloop:	q->fail = state->nst;
449 				if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
450 				if((q=q->link) != 0)goto qloop;
451 			}
452 			else if ((state = state->link) != 0)
453 				goto floop;
454 			else if(bstart==0){state=0; goto floop;}
455 		}
456 		if ((s = s->link) != 0)
457 			goto cloop;
458 	}
459 /*	for(s=w;s<=smax;s++)
460 		printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
461 			s->inp,s->out,s->nst,s->link,s->fail);
462 */
463 }
464 outc(addr,file)
465 char *addr;
466 char *file;
467 {
468 	int inside;
469 
470 	inside = 0;
471 	if(!caps && lineno && linemsg){
472 		printf("beginning line %ld",olcount);
473 		if(file != (char *)NULL)printf(" %s\n",file);
474 		else printf("\n");
475 		linemsg = 0;
476 	}
477 	while(nlp < addr){
478 		if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
479 			oct=0;
480 			putchar('\n');
481 		}
482 		if(nlp == begp){
483 			if(caps)inside++;
484 			else {
485 				if( oct >45){putchar('\n');
486 					oct=0;
487 				}
488 				if( oct==0 || table[*nlp] != ' '){
489 					printf("*[");
490 					oct+=2;
491 				}
492 				else {printf(" *[");;
493 					oct+=3;
494 				}
495 			}
496 			if(mflg)putc('[',mine);
497 		}
498 		if(inside){
499 			if(islower(*nlp))*nlp = toupper(*nlp);
500 		}
501 		else {
502 			if(!caps && *nlp == '\n')*nlp = ' ';
503 			if(*nlp == ' ' && oct==0);
504 			else if(!caps) {putchar(*nlp); oct++;}
505 		}
506 		if(nlp == endp){
507 			if(caps)
508 				inside= 0;
509 			else {
510 				if(*(nlp) != ' '){printf("]*");
511 					oct+=2;
512 				}
513 				else {printf("]* ");
514 					oct+=3;
515 				}
516 				if(oct >60){putchar('\n');
517 					oct=0;
518 				}
519 			}
520 			if(mflg)putc(']',mine);
521 			beg = 0;
522 		}
523 		if(mflg){
524 			if(nlp == myst)beg = 1;
525 			if(beg || last){
526 				putc(*nlp,mine);
527 				if(myct++ >= 72 || last == 20){
528 					putc('\n',mine);
529 					if(last == 20)last=myct=0;
530 					else myct=0;
531 				}
532 				if(last)last++;
533 			}
534 		}
535 		nlp++;
536 	}
537 }
538