1 #ifndef lint
2 static char sccsid[] = "@(#)dprog.c	4.2	(Berkeley)	82/11/06";
3 #endif not lint
4 
5 /*
6  * diction -- print all sentences containing one of default phrases
7  *
8  *	status returns:
9  *		0 - ok, and some matches
10  *		1 - ok, but no matches
11  *		2 - some error
12  */
13 
14 #include <stdio.h>
15 #include <ctype.h>
16 
17 #define	MAXSIZ 6500
18 #define QSIZE 650
19 int linemsg;
20 long olcount;
21 long lcount;
22 struct words {
23 	char 	inp;
24 	char	out;
25 	struct	words *nst;
26 	struct	words *link;
27 	struct	words *fail;
28 } w[MAXSIZ], *smax, *q;
29 
30 char table[128] = {
31 	0, 0, 0, 0, 0, 0, 0, 0,
32 	0, 0, ' ', 0, 0, 0, 0, 0,
33 	0, 0, 0, 0, 0, 0, 0, 0,
34 	0, 0, 0, 0, 0, 0, 0, 0,
35 	' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
36 	' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
37 	'0', '1', '2', '3', '4', '5', '6', '7',
38 	'8', '9', ' ', ' ', ' ', ' ', ' ', '.',
39 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
40 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
41 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
42 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
43 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
44 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
45 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
46 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
47 	};
48 int	caps = 0;
49 int	lineno = 0;
50 int fflag;
51 int nflag	= 1; /*use default file*/
52 char *filename;
53 int	mflg	= 0;	/*don't catch output*/
54 int	nfile;
55 int	nsucc;
56 long nsent = 0;
57 long nhits = 0;
58 char *nlp;
59 char *begp, *endp;
60 int beg, last;
61 char *myst;
62 int myct = 0;
63 int oct = 0;
64 FILE	*wordf;
65 FILE *mine;
66 char	*argptr;
67 long tl = 0;
68 long th = 0;
69 
70 main(argc, argv)
71 char *argv[];
72 {
73 	int sv;
74 	while (--argc > 0 && (++argv)[0][0]=='-')
75 		switch (argv[0][1]) {
76 
77 		case 'f':
78 			fflag++;
79 			filename = (++argv)[0];
80 			argc--;
81 			continue;
82 
83 		case 'n':
84 			nflag = 0;
85 			continue;
86 		case 'd':
87 			mflg=0;
88 			continue;
89 		case 'c':
90 			caps++;
91 			continue;
92 		case 'l':
93 			lineno++;
94 			continue;
95 		default:
96 			fprintf(stderr, "diction: unknown flag\n");
97 			continue;
98 		}
99 out:
100 	if(nflag){
101 		wordf = fopen(DICT,"r");
102 		if(wordf == NULL){
103 			fprintf(stderr,"diction: can't open default dictionary\n");
104 			exit(2);
105 		}
106 	}
107 	else {
108 		wordf = fopen(filename,"r");
109 		if(wordf == NULL){
110 			fprintf(stderr,"diction: can't open %s\n",filename);
111 			exit(2);
112 		}
113 	}
114 
115 #ifdef CATCH
116 	if(fopen(CATCH,"r") != NULL)
117 		if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
118 #endif
119 #ifdef MACS
120 	if(caps){
121 		printf(".so ");
122 		printf(MACS);
123 		printf("\n");
124 	}
125 #endif
126 	cgotofn();
127 	cfail();
128 	nfile = argc;
129 	if (argc<=0) {
130 		execute((char *)NULL);
131 	}
132 	else while (--argc >= 0) {
133 		execute(*argv);
134 		if(lineno){
135 			printf("file %s: number of lines %ld number of phrases found %ld\n",
136 				*argv, lcount-1, nhits);
137 			tl += lcount-1;
138 			th += nhits;
139 			sv = lcount-1;
140 			lcount = nhits = 0;
141 		}
142 		argv++;
143 	}
144 	if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
145 	if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
146 	else if(tl != sv)
147 		 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
148 	exit(nsucc == 0);
149 }
150 
151 execute(file)
152 char *file;
153 {
154 	register char *p;
155 	register struct words *c;
156 	register ccount;
157 	int count1;
158 	char *beg1;
159 	struct words *savc;
160 	char *savp;
161 	int savct;
162 	int scr;
163 	char buf[1024];
164 	int f;
165 	int hit;
166 	last = 0;
167 	if (file) {
168 		if ((f = open(file, 0)) < 0) {
169 			fprintf(stderr, "diction: can't open %s\n", file);
170 			exit(2);
171 		}
172 	}
173 	else f = 0;
174 	lcount = olcount = 1;
175 	linemsg = 1;
176 	ccount = 0;
177 	count1 = -1;
178 	p = buf;
179 	nlp = p;
180 	c = w;
181 	oct = hit = 0;
182 	savc = (struct words *) 0;
183 	savp = (char *) 0;
184 	for (;;) {
185 		if(--ccount <= 0) {
186 			if (p == &buf[1024]) p = buf;
187 			if (p > &buf[512]) {
188 				if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
189 			}
190 			else if ((ccount = read(f, p, 512)) <= 0) break;
191 			if(caps && (count1 > 0))
192 				fwrite(beg1,sizeof(*beg1),count1,stdout);
193 			count1 = ccount;
194 			beg1 = p;
195 		}
196 		if(p == &buf[1024])p=buf;
197 		nstate:
198 			if (c->inp == table[*p]) {
199 				c = c->nst;
200 			}
201 			else if (c->link != 0) {
202 				c = c->link;
203 				goto nstate;
204 			}
205 			else {
206 				if(savp != 0){
207 					c=savc;
208 					p=savp;
209 					if(ccount > savct)ccount += savct;
210 					else ccount = savct;
211 					savc = (struct words *) 0;
212 					savp = (char *) 0;
213 					goto hadone;
214 				}
215 				c = c->fail;
216 				if (c==0) {
217 					c = w;
218 					istate:
219 					if (c->inp == table[*p]) {
220 						c = c->nst;
221 					}
222 					else if (c->link != 0) {
223 						c = c->link;
224 						goto istate;
225 					}
226 				}
227 				else goto nstate;
228 			}
229 		if(c->out){
230 			if((c->inp == table[*(p+1)]) && (c->nst != 0)){
231 				savp=p;
232 				savc=c;
233 				savct=ccount;
234 				goto cont;
235 			}
236 			else if(c->link != 0){
237 				savc=c;
238 				while((savc=savc->link)!= 0){
239 					if(savc->inp == table[*(p+1)]){
240 						savp=p;
241 						savc=c;
242 						savct=ccount;
243 						goto cont;
244 					}
245 				}
246 			}
247 		hadone:
248 			savc = (struct words *) 0;
249 			savp = (char *) 0;
250 			if(c->out == (char)(0377)){
251 				c=w;
252 				goto nstate;
253 			}
254 			begp = p - (c->out);
255 			if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
256 			endp=p;
257 			if(mflg){
258 				if(begp-20 < &buf[0]){
259 					myst = &buf[1024]-20;
260 					if(nlp < &buf[512])myst=nlp;
261 				}
262 				else myst = begp-20;
263 				if(myst < nlp)myst = nlp;
264 				beg = 0;
265 			}
266 			hit = 1;
267 			nhits++;
268 			if(*p == '\n')lcount++;
269 			if (table[*p++] == '.') {
270 				linemsg = 1;
271 				if (--ccount <= 0) {
272 					if (p == &buf[1024]) p = buf;
273 					if (p > &buf[512]) {
274 						if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
275 					}
276 					else if ((ccount = read(f, p, 512)) <= 0) break;
277 					if(caps && (count1 > 0))
278 						fwrite(beg1,sizeof(*beg1),count1,stdout);
279 					count1=ccount;
280 					beg1=p;
281 				}
282 			}
283 	succeed:	nsucc = 1;
284 			{
285 				if (p <= nlp) {
286 					outc(&buf[1024],file);
287 					nlp = buf;
288 				}
289 				outc(p,file);
290 			}
291 			if(mflg)last=1;
292 	nomatch:
293 			nlp = p;
294 			c = w;
295 			begp = endp = 0;
296 			continue;
297 		}
298 	cont:
299 		if(*p == '\n')lcount++;
300 		if (table[*p++] == '.'){
301 				if(hit){
302 					if(p <= nlp){
303 						outc(&buf[1024],file);
304 						nlp = buf;
305 					}
306 					outc(p,file);
307 					if(!caps)printf("\n\n");
308 					if(mflg && last){putc('\n',mine);myct = 0;}
309 					}
310 				linemsg = 1;
311 				if(*p == '\n')olcount = lcount+1;
312 				else
313 					olcount=lcount;
314 				last = 0;
315 				hit = 0;
316 				oct = 0;
317 				nlp = p;
318 				c = w;
319 				begp = endp = 0;
320 				nsent++;
321 			}
322 	}
323 	if(caps && (count1 > 0))
324 		fwrite(beg1,sizeof(*beg1),count1,stdout);
325 	close(f);
326 }
327 
328 getargc()
329 {
330 	register c;
331 	if (wordf){
332 		if((c=getc(wordf))==EOF){
333 			fclose(wordf);
334 			if(nflag && fflag){
335 				nflag=0;
336 				wordf=fopen(filename,"r");
337 				if(wordf == NULL){
338 					fprintf("diction can't open %s\n",filename);
339 					exit(2);
340 				}
341 				return(getc(wordf));
342 			}
343 			else return(EOF);
344 		}
345 		else return(c);
346 	}
347 	if ((c = *argptr++) == '\0')
348 		return(EOF);
349 	return(c);
350 }
351 
352 cgotofn() {
353 	register c;
354 	register struct words *s;
355 	register ct;
356 	int neg;
357 
358 	s = smax = w;
359 	neg = ct = 0;
360 nword:	for(;;) {
361 		c = getargc();
362 		if(c == '~'){
363 			neg++;
364 			c = getargc();
365 		}
366 		if (c==EOF)
367 			return;
368 		if (c == '\n') {
369 			if(neg)s->out = 0377;
370 			else s->out = ct-1;
371 			neg = ct = 0;
372 			s = w;
373 		} else {
374 		loop:	if (s->inp == c) {
375 				s = s->nst;
376 				ct++;
377 				continue;
378 			}
379 			if (s->inp == 0) goto enter;
380 			if (s->link == 0) {
381 				if (smax >= &w[MAXSIZ - 1]) overflo();
382 				s->link = ++smax;
383 				s = smax;
384 				goto enter;
385 			}
386 			s = s->link;
387 			goto loop;
388 		}
389 	}
390 
391 	enter:
392 	do {
393 		s->inp = c;
394 		ct++;
395 		if (smax >= &w[MAXSIZ - 1]) overflo();
396 		s->nst = ++smax;
397 		s = smax;
398 	} while ((c = getargc()) != '\n' && c!=EOF);
399 	if(neg)smax->out = 0377;
400 	else smax->out = ct-1;
401 	neg = ct = 0;
402 	s = w;
403 	if (c != EOF)
404 		goto nword;
405 }
406 
407 overflo() {
408 	fprintf(stderr, "wordlist too large\n");
409 	exit(2);
410 }
411 cfail() {
412 	struct words *queue[QSIZE];
413 	struct words **front, **rear;
414 	struct words *state;
415 	int bstart;
416 	register char c;
417 	register struct words *s;
418 	s = w;
419 	front = rear = queue;
420 init:	if ((s->inp) != 0) {
421 		*rear++ = s->nst;
422 		if (rear >= &queue[QSIZE - 1]) overflo();
423 	}
424 	if ((s = s->link) != 0) {
425 		goto init;
426 	}
427 
428 	while (rear!=front) {
429 		s = *front;
430 		if (front == &queue[QSIZE-1])
431 			front = queue;
432 		else front++;
433 	cloop:	if ((c = s->inp) != 0) {
434 			bstart=0;
435 			*rear = (q = s->nst);
436 			if (front < rear)
437 				if (rear >= &queue[QSIZE-1])
438 					if (front == queue) overflo();
439 					else rear = queue;
440 				else rear++;
441 			else
442 				if (++rear == front) overflo();
443 			state = s->fail;
444 		floop:	if (state == 0){ state = w;bstart=1;}
445 			if (state->inp == c) {
446 			qloop:	q->fail = state->nst;
447 				if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
448 				if((q=q->link) != 0)goto qloop;
449 			}
450 			else if ((state = state->link) != 0)
451 				goto floop;
452 			else if(bstart==0){state=0; goto floop;}
453 		}
454 		if ((s = s->link) != 0)
455 			goto cloop;
456 	}
457 /*	for(s=w;s<=smax;s++)
458 		printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
459 			s->inp,s->out,s->nst,s->link,s->fail);
460 */
461 }
462 outc(addr,file)
463 char *addr;
464 char *file;
465 {
466 	int inside;
467 
468 	inside = 0;
469 	if(!caps && lineno && linemsg){
470 		printf("beginning line %ld",olcount);
471 		if(file != (char *)NULL)printf(" %s\n",file);
472 		else printf("\n");
473 		linemsg = 0;
474 	}
475 	while(nlp < addr){
476 		if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
477 			oct=0;
478 			putchar('\n');
479 		}
480 		if(nlp == begp){
481 			if(caps)inside++;
482 			else {
483 				if( oct >45){putchar('\n');
484 					oct=0;
485 				}
486 				if( oct==0 || table[*nlp] != ' '){
487 					printf("*[");
488 					oct+=2;
489 				}
490 				else {printf(" *[");;
491 					oct+=3;
492 				}
493 			}
494 			if(mflg)putc('[',mine);
495 		}
496 		if(inside){
497 			if(islower(*nlp))*nlp = toupper(*nlp);
498 		}
499 		else {
500 			if(!caps && *nlp == '\n')*nlp = ' ';
501 			if(*nlp == ' ' && oct==0);
502 			else if(!caps) {putchar(*nlp); oct++;}
503 		}
504 		if(nlp == endp){
505 			if(caps)
506 				inside= 0;
507 			else {
508 				if(*(nlp) != ' '){printf("]*");
509 					oct+=2;
510 				}
511 				else {printf("]* ");
512 					oct+=3;
513 				}
514 				if(oct >60){putchar('\n');
515 					oct=0;
516 				}
517 			}
518 			if(mflg)putc(']',mine);
519 			beg = 0;
520 		}
521 		if(mflg){
522 			if(nlp == myst)beg = 1;
523 			if(beg || last){
524 				putc(*nlp,mine);
525 				if(myct++ >= 72 || last == 20){
526 					putc('\n',mine);
527 					if(last == 20)last=myct=0;
528 					else myct=0;
529 				}
530 				if(last)last++;
531 			}
532 		}
533 		nlp++;
534 	}
535 }
536