1 /*-
2  * %sccs.include.proprietary.c%
3  */
4 
5 #ifndef lint
6 static char sccsid[] = "@(#)dprog.c	4.5 (Berkeley) 04/17/91";
7 #endif /* not lint */
8 
9 /*
10  * diction -- print all sentences containing one of default phrases
11  *
12  *	status returns:
13  *		0 - ok, and some matches
14  *		1 - ok, but no matches
15  *		2 - some error
16  */
17 
18 #include <stdio.h>
19 #include <ctype.h>
20 #include "pathnames.h"
21 
22 #define	MAXSIZ 6500
23 #define QSIZE 650
24 int linemsg;
25 long olcount;
26 long lcount;
27 struct words {
28 	char 	inp;
29 	char	out;
30 	struct	words *nst;
31 	struct	words *link;
32 	struct	words *fail;
33 } w[MAXSIZ], *smax, *q;
34 
35 char table[128] = {
36 	0, 0, 0, 0, 0, 0, 0, 0,
37 	0, 0, ' ', 0, 0, 0, 0, 0,
38 	0, 0, 0, 0, 0, 0, 0, 0,
39 	0, 0, 0, 0, 0, 0, 0, 0,
40 	' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
41 	' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
42 	'0', '1', '2', '3', '4', '5', '6', '7',
43 	'8', '9', ' ', ' ', ' ', ' ', ' ', '.',
44 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
45 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
46 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
47 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
48 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
49 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
50 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
51 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
52 	};
53 int	caps = 0;
54 int	lineno = 0;
55 int fflag;
56 int nflag	= 1; /*use default file*/
57 char *filename;
58 int	mflg	= 0;	/*don't catch output*/
59 int	nfile;
60 int	nsucc;
61 long nsent = 0;
62 long nhits = 0;
63 char *nlp;
64 char *begp, *endp;
65 int beg, last;
66 char *myst;
67 int myct = 0;
68 int oct = 0;
69 FILE	*wordf;
70 FILE *mine;
71 char	*argptr;
72 long tl = 0;
73 long th = 0;
74 
75 main(argc, argv)
76 char *argv[];
77 {
78 	int sv;
79 	while (--argc > 0 && (++argv)[0][0]=='-')
80 		switch (argv[0][1]) {
81 
82 		case 'f':
83 			fflag++;
84 			filename = (++argv)[0];
85 			argc--;
86 			continue;
87 
88 		case 'n':
89 			nflag = 0;
90 			continue;
91 		case 'd':
92 			mflg=0;
93 			continue;
94 		case 'c':
95 			caps++;
96 			continue;
97 		case 'l':
98 			lineno++;
99 			continue;
100 		default:
101 			fprintf(stderr, "diction: unknown flag\n");
102 			continue;
103 		}
104 out:
105 	if(nflag){
106 		wordf = fopen(_PATH_DICT,"r");
107 		if(wordf == NULL){
108 			fprintf(stderr,"diction: can't open default dictionary\n");
109 			exit(2);
110 		}
111 	}
112 	else {
113 		wordf = fopen(filename,"r");
114 		if(wordf == NULL){
115 			fprintf(stderr,"diction: can't open %s\n",filename);
116 			exit(2);
117 		}
118 	}
119 
120 #ifdef CATCH
121 	if(fopen(CATCH,"r") != NULL)
122 		if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
123 #endif
124 #ifdef MACS
125 	if(caps){
126 		printf(".so ");
127 		printf(MACS);
128 		printf("\n");
129 	}
130 #endif
131 	cgotofn();
132 	cfail();
133 	nfile = argc;
134 	if (argc<=0) {
135 		execute((char *)NULL);
136 	}
137 	else while (--argc >= 0) {
138 		execute(*argv);
139 		if(lineno){
140 			printf("file %s: number of lines %ld number of phrases found %ld\n",
141 				*argv, lcount-1, nhits);
142 			tl += lcount-1;
143 			th += nhits;
144 			sv = lcount-1;
145 			lcount = nhits = 0;
146 		}
147 		argv++;
148 	}
149 	if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
150 	if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
151 	else if(tl != sv)
152 		 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
153 	exit(nsucc == 0);
154 }
155 
156 execute(file)
157 char *file;
158 {
159 	register char *p;
160 	register struct words *c;
161 	register ccount;
162 	int count1;
163 	char *beg1;
164 	struct words *savc;
165 	char *savp;
166 	int savct;
167 	int scr;
168 	char buf[1024];
169 	int f;
170 	int hit;
171 	last = 0;
172 	if (file) {
173 		if ((f = open(file, 0)) < 0) {
174 			fprintf(stderr, "diction: can't open %s\n", file);
175 			exit(2);
176 		}
177 	}
178 	else f = 0;
179 	lcount = olcount = 1;
180 	linemsg = 1;
181 	ccount = 0;
182 	count1 = -1;
183 	p = buf;
184 	nlp = p;
185 	c = w;
186 	oct = hit = 0;
187 	savc = (struct words *) 0;
188 	savp = (char *) 0;
189 	for (;;) {
190 		if(--ccount <= 0) {
191 			if (p == &buf[1024]) p = buf;
192 			if (p > &buf[512]) {
193 				if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
194 			}
195 			else if ((ccount = read(f, p, 512)) <= 0) break;
196 			if(caps && (count1 > 0))
197 				fwrite(beg1,sizeof(*beg1),count1,stdout);
198 			count1 = ccount;
199 			beg1 = p;
200 		}
201 		if(p == &buf[1024])p=buf;
202 		nstate:
203 			if (c->inp == table[*p]) {
204 				c = c->nst;
205 			}
206 			else if (c->link != 0) {
207 				c = c->link;
208 				goto nstate;
209 			}
210 			else {
211 				if(savp != 0){
212 					c=savc;
213 					p=savp;
214 					if(ccount > savct)ccount += savct;
215 					else ccount = savct;
216 					savc = (struct words *) 0;
217 					savp = (char *) 0;
218 					goto hadone;
219 				}
220 				c = c->fail;
221 				if (c==0) {
222 					c = w;
223 					istate:
224 					if (c->inp == table[*p]) {
225 						c = c->nst;
226 					}
227 					else if (c->link != 0) {
228 						c = c->link;
229 						goto istate;
230 					}
231 				}
232 				else goto nstate;
233 			}
234 		if(c->out){
235 			if((c->inp == table[*(p+1)]) && (c->nst != 0)){
236 				savp=p;
237 				savc=c;
238 				savct=ccount;
239 				goto cont;
240 			}
241 			else if(c->link != 0){
242 				savc=c;
243 				while((savc=savc->link)!= 0){
244 					if(savc->inp == table[*(p+1)]){
245 						savp=p;
246 						savc=c;
247 						savct=ccount;
248 						goto cont;
249 					}
250 				}
251 			}
252 		hadone:
253 			savc = (struct words *) 0;
254 			savp = (char *) 0;
255 			if(c->out == (char)(0377)){
256 				c=w;
257 				goto nstate;
258 			}
259 			begp = p - (c->out);
260 			if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
261 			endp=p;
262 			if(mflg){
263 				if(begp-20 < &buf[0]){
264 					myst = &buf[1024]-20;
265 					if(nlp < &buf[512])myst=nlp;
266 				}
267 				else myst = begp-20;
268 				if(myst < nlp)myst = nlp;
269 				beg = 0;
270 			}
271 			hit = 1;
272 			nhits++;
273 			if(*p == '\n')lcount++;
274 			if (table[*p++] == '.') {
275 				linemsg = 1;
276 				if (--ccount <= 0) {
277 					if (p == &buf[1024]) p = buf;
278 					if (p > &buf[512]) {
279 						if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
280 					}
281 					else if ((ccount = read(f, p, 512)) <= 0) break;
282 					if(caps && (count1 > 0))
283 						fwrite(beg1,sizeof(*beg1),count1,stdout);
284 					count1=ccount;
285 					beg1=p;
286 				}
287 			}
288 	succeed:	nsucc = 1;
289 			{
290 				if (p <= nlp) {
291 					outc(&buf[1024],file);
292 					nlp = buf;
293 				}
294 				outc(p,file);
295 			}
296 			if(mflg)last=1;
297 	nomatch:
298 			nlp = p;
299 			c = w;
300 			begp = endp = 0;
301 			continue;
302 		}
303 	cont:
304 		if(*p == '\n')lcount++;
305 		if (table[*p++] == '.'){
306 				if(hit){
307 					if(p <= nlp){
308 						outc(&buf[1024],file);
309 						nlp = buf;
310 					}
311 					outc(p,file);
312 					if(!caps)printf("\n\n");
313 					if(mflg && last){putc('\n',mine);myct = 0;}
314 					}
315 				linemsg = 1;
316 				if(*p == '\n')olcount = lcount+1;
317 				else
318 					olcount=lcount;
319 				last = 0;
320 				hit = 0;
321 				oct = 0;
322 				nlp = p;
323 				c = w;
324 				begp = endp = 0;
325 				nsent++;
326 			}
327 	}
328 	if(caps && (count1 > 0))
329 		fwrite(beg1,sizeof(*beg1),count1,stdout);
330 	close(f);
331 }
332 
333 getargc()
334 {
335 	register c;
336 	if (wordf){
337 		if((c=getc(wordf))==EOF){
338 			fclose(wordf);
339 			if(nflag && fflag){
340 				nflag=0;
341 				wordf=fopen(filename,"r");
342 				if(wordf == NULL){
343 					fprintf(stderr,
344 					    "diction can't open %s\n",filename);
345 					exit(2);
346 				}
347 				return(getc(wordf));
348 			}
349 			else return(EOF);
350 		}
351 		else return(c);
352 	}
353 	if ((c = *argptr++) == '\0')
354 		return(EOF);
355 	return(c);
356 }
357 
358 cgotofn() {
359 	register c;
360 	register struct words *s;
361 	register ct;
362 	int neg;
363 
364 	s = smax = w;
365 	neg = ct = 0;
366 nword:	for(;;) {
367 		c = getargc();
368 		if(c == '~'){
369 			neg++;
370 			c = getargc();
371 		}
372 		if (c==EOF)
373 			return;
374 		if (c == '\n') {
375 			if(neg)s->out = 0377;
376 			else s->out = ct-1;
377 			neg = ct = 0;
378 			s = w;
379 		} else {
380 		loop:	if (s->inp == c) {
381 				s = s->nst;
382 				ct++;
383 				continue;
384 			}
385 			if (s->inp == 0) goto enter;
386 			if (s->link == 0) {
387 				if (smax >= &w[MAXSIZ - 1]) overflo();
388 				s->link = ++smax;
389 				s = smax;
390 				goto enter;
391 			}
392 			s = s->link;
393 			goto loop;
394 		}
395 	}
396 
397 	enter:
398 	do {
399 		s->inp = c;
400 		ct++;
401 		if (smax >= &w[MAXSIZ - 1]) overflo();
402 		s->nst = ++smax;
403 		s = smax;
404 	} while ((c = getargc()) != '\n' && c!=EOF);
405 	if(neg)smax->out = 0377;
406 	else smax->out = ct-1;
407 	neg = ct = 0;
408 	s = w;
409 	if (c != EOF)
410 		goto nword;
411 }
412 
413 overflo() {
414 	fprintf(stderr, "wordlist too large\n");
415 	exit(2);
416 }
417 cfail() {
418 	struct words *queue[QSIZE];
419 	struct words **front, **rear;
420 	struct words *state;
421 	int bstart;
422 	register char c;
423 	register struct words *s;
424 	s = w;
425 	front = rear = queue;
426 init:	if ((s->inp) != 0) {
427 		*rear++ = s->nst;
428 		if (rear >= &queue[QSIZE - 1]) overflo();
429 	}
430 	if ((s = s->link) != 0) {
431 		goto init;
432 	}
433 
434 	while (rear!=front) {
435 		s = *front;
436 		if (front == &queue[QSIZE-1])
437 			front = queue;
438 		else front++;
439 	cloop:	if ((c = s->inp) != 0) {
440 			bstart=0;
441 			*rear = (q = s->nst);
442 			if (front < rear)
443 				if (rear >= &queue[QSIZE-1])
444 					if (front == queue) overflo();
445 					else rear = queue;
446 				else rear++;
447 			else
448 				if (++rear == front) overflo();
449 			state = s->fail;
450 		floop:	if (state == 0){ state = w;bstart=1;}
451 			if (state->inp == c) {
452 			qloop:	q->fail = state->nst;
453 				if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
454 				if((q=q->link) != 0)goto qloop;
455 			}
456 			else if ((state = state->link) != 0)
457 				goto floop;
458 			else if(bstart==0){state=0; goto floop;}
459 		}
460 		if ((s = s->link) != 0)
461 			goto cloop;
462 	}
463 /*	for(s=w;s<=smax;s++)
464 		printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
465 			s->inp,s->out,s->nst,s->link,s->fail);
466 */
467 }
468 outc(addr,file)
469 char *addr;
470 char *file;
471 {
472 	int inside;
473 
474 	inside = 0;
475 	if(!caps && lineno && linemsg){
476 		printf("beginning line %ld",olcount);
477 		if(file != (char *)NULL)printf(" %s\n",file);
478 		else printf("\n");
479 		linemsg = 0;
480 	}
481 	while(nlp < addr){
482 		if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
483 			oct=0;
484 			putchar('\n');
485 		}
486 		if(nlp == begp){
487 			if(caps)inside++;
488 			else {
489 				if( oct >45){putchar('\n');
490 					oct=0;
491 				}
492 				if( oct==0 || table[*nlp] != ' '){
493 					printf("*[");
494 					oct+=2;
495 				}
496 				else {printf(" *[");;
497 					oct+=3;
498 				}
499 			}
500 			if(mflg)putc('[',mine);
501 		}
502 		if(inside){
503 			if(islower(*nlp))*nlp = toupper(*nlp);
504 		}
505 		else {
506 			if(!caps && *nlp == '\n')*nlp = ' ';
507 			if(*nlp == ' ' && oct==0);
508 			else if(!caps) {putchar(*nlp); oct++;}
509 		}
510 		if(nlp == endp){
511 			if(caps)
512 				inside= 0;
513 			else {
514 				if(*(nlp) != ' '){printf("]*");
515 					oct+=2;
516 				}
517 				else {printf("]* ");
518 					oct+=3;
519 				}
520 				if(oct >60){putchar('\n');
521 					oct=0;
522 				}
523 			}
524 			if(mflg)putc(']',mine);
525 			beg = 0;
526 		}
527 		if(mflg){
528 			if(nlp == myst)beg = 1;
529 			if(beg || last){
530 				putc(*nlp,mine);
531 				if(myct++ >= 72 || last == 20){
532 					putc('\n',mine);
533 					if(last == 20)last=myct=0;
534 					else myct=0;
535 				}
536 				if(last)last++;
537 			}
538 		}
539 		nlp++;
540 	}
541 }
542