xref: /original-bsd/old/awk/b.c (revision 7a4e9f34)
1 /*	b.c	4.1	82/05/07	*/
2 
3 #include "awk.def"
4 #include "stdio.h"
5 #include "awk.h"
6 
7 extern node *op2();
8 extern struct fa *cgotofn();
9 #define MAXLIN 256
10 #define NCHARS 128
11 #define NSTATES 256
12 
13 #define type(v)	v->nobj
14 #define left(v)	v->narg[0]
15 #define right(v)	v->narg[1]
16 #define parent(v)	v->nnext
17 
18 #define LEAF	case CCL: case NCCL: case CHAR: case DOT:
19 #define UNARY	case FINAL: case STAR: case PLUS: case QUEST:
20 
21 /* encoding in tree nodes:
22 	leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value
23 	unary (FINAL, STAR, PLUS, QUEST): left is child, right is null
24 	binary (CAT, OR): left and right are children
25 	parent contains pointer to parent
26 */
27 
28 struct fa {
29 	int cch;
30 	struct fa *st;
31 };
32 
33 int	*state[NSTATES];
34 int	*foll[MAXLIN];
35 char	chars[MAXLIN];
36 int	setvec[MAXLIN];
37 node	*point[MAXLIN];
38 
39 int	setcnt;
40 int	line;
41 
42 
43 struct fa *makedfa(p)	/* returns dfa for tree pointed to by p */
44 node *p;
45 {
46 	node *p1;
47 	struct fa *fap;
48 	p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p);
49 		/* put DOT STAR in front of reg. exp. */
50 	p1 = op2(FINAL, p1, (node *) 0);		/* install FINAL node */
51 
52 	line = 0;
53 	penter(p1);	/* enter parent pointers and leaf indices */
54 	point[line] = p1;	/* FINAL node */
55 	setvec[0] = 1;		/* for initial DOT STAR */
56 	cfoll(p1);	/* set up follow sets */
57 	fap = cgotofn();
58 	freetr(p1);	/* add this when alloc works */
59 	return(fap);
60 }
61 
62 penter(p)	/* set up parent pointers and leaf indices */
63 node *p;
64 {
65 	switch(type(p)) {
66 		LEAF
67 			left(p) = (node *) line;
68 			point[line++] = p;
69 			break;
70 		UNARY
71 			penter(left(p));
72 			parent(left(p)) = p;
73 			break;
74 		case CAT:
75 		case OR:
76 			penter(left(p));
77 			penter(right(p));
78 			parent(left(p)) = p;
79 			parent(right(p)) = p;
80 			break;
81 		default:
82 			error(FATAL, "unknown type %d in penter\n", type(p));
83 			break;
84 	}
85 }
86 
87 freetr(p)	/* free parse tree and follow sets */
88 node *p;
89 {
90 	switch(type(p)) {
91 		LEAF
92 			xfree(foll[(int) left(p)]);
93 			xfree(p);
94 			break;
95 		UNARY
96 			freetr(left(p));
97 			xfree(p);
98 			break;
99 		case CAT:
100 		case OR:
101 			freetr(left(p));
102 			freetr(right(p));
103 			xfree(p);
104 			break;
105 		default:
106 			error(FATAL, "unknown type %d in freetr", type(p));
107 			break;
108 	}
109 }
110 char *cclenter(p)
111 register char *p;
112 {
113 	register i, c;
114 	char *op;
115 
116 	op = p;
117 	i = 0;
118 	while ((c = *p++) != 0) {
119 		if (c == '-' && i > 0 && chars[i-1] != 0) {
120 			if (*p != 0) {
121 				c = chars[i-1];
122 				while (c < *p) {
123 					if (i >= MAXLIN)
124 						overflo();
125 					chars[i++] = ++c;
126 				}
127 				p++;
128 				continue;
129 			}
130 		}
131 		if (i >= MAXLIN)
132 			overflo();
133 		chars[i++] = c;
134 	}
135 	chars[i++] = '\0';
136 	dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL);
137 	xfree(op);
138 	return(tostring(chars));
139 }
140 
141 overflo()
142 {
143 	error(FATAL, "regular expression too long\n");
144 }
145 
146 cfoll(v)		/* enter follow set of each leaf of vertex v into foll[leaf] */
147 register node *v;
148 {
149 	register i;
150 	int prev;
151 	int *add();
152 
153 	switch(type(v)) {
154 		LEAF
155 			setcnt = 0;
156 			for (i=1; i<=line; i++)
157 				setvec[i] = 0;
158 			follow(v);
159 			if (notin(foll, ( (int) left(v))-1, &prev)) {
160 				foll[(int) left(v)] = add(setcnt);
161 			}
162 			else
163 				foll[ (int) left(v)] = foll[prev];
164 			break;
165 		UNARY
166 			cfoll(left(v));
167 			break;
168 		case CAT:
169 		case OR:
170 			cfoll(left(v));
171 			cfoll(right(v));
172 			break;
173 		default:
174 			error(FATAL, "unknown type %d in cfoll", type(v));
175 	}
176 }
177 
178 first(p)			/* collects initially active leaves of p into setvec */
179 register node *p;		/* returns 0 or 1 depending on whether p matches empty string */
180 {
181 	register b;
182 
183 	switch(type(p)) {
184 		LEAF
185 			if (setvec[(int) left(p)] != 1) {
186 				setvec[(int) left(p)] = 1;
187 				setcnt++;
188 			}
189 			if (type(p) == CCL && (*(char *) right(p)) == '\0')
190 				return(0);		/* empty CCL */
191 			else return(1);
192 		case FINAL:
193 		case PLUS:
194 			if (first(left(p)) == 0) return(0);
195 			return(1);
196 		case STAR:
197 		case QUEST:
198 			first(left(p));
199 			return(0);
200 		case CAT:
201 			if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
202 			return(1);
203 		case OR:
204 			b = first(right(p));
205 			if (first(left(p)) == 0 || b == 0) return(0);
206 			return(1);
207 	}
208 	error(FATAL, "unknown type %d in first\n", type(p));
209 	return(-1);
210 }
211 
212 follow(v)
213 node *v;		/* collects leaves that can follow v into setvec */
214 {
215 	node *p;
216 
217 	if (type(v) == FINAL)
218 		return;
219 	p = parent(v);
220 	switch (type(p)) {
221 		case STAR:
222 		case PLUS:	first(v);
223 				follow(p);
224 				return;
225 
226 		case OR:
227 		case QUEST:	follow(p);
228 				return;
229 
230 		case CAT:	if (v == left(p)) {	/* v is left child of p */
231 					if (first(right(p)) == 0) {
232 						follow(p);
233 						return;
234 					}
235 				}
236 				else		/* v is right child */
237 					follow(p);
238 				return;
239 		case FINAL:	if (setvec[line] != 1) {
240 					setvec[line] = 1;
241 					setcnt++;
242 				}
243 				return;
244 	}
245 }
246 
247 member(c, s)	/* is c in s? */
248 register char c, *s;
249 {
250 	while (*s)
251 		if (c == *s++)
252 			return(1);
253 	return(0);
254 }
255 
256 notin(array, n, prev)		/* is setvec in array[0] thru array[n]? */
257 int **array;
258 int *prev; {
259 	register i, j;
260 	int *ptr;
261 	for (i=0; i<=n; i++) {
262 		ptr = array[i];
263 		if (*ptr == setcnt) {
264 			for (j=0; j < setcnt; j++)
265 				if (setvec[*(++ptr)] != 1) goto nxt;
266 			*prev = i;
267 			return(0);
268 		}
269 		nxt: ;
270 	}
271 	return(1);
272 }
273 
274 int *add(n) {		/* remember setvec */
275 	int *ptr, *p;
276 	register i;
277 	if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL)
278 		overflo();
279 	*ptr = n;
280 	dprintf("add(%d)\n", n, NULL, NULL);
281 	for (i=1; i <= line; i++)
282 		if (setvec[i] == 1) {
283 			*(++ptr) = i;
284 			dprintf("  ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i);
285 		}
286 	dprintf("\n", NULL, NULL, NULL);
287 	return(p);
288 }
289 
290 struct fa *cgotofn()
291 {
292 	register i, k;
293 	register int *ptr;
294 	char c;
295 	char *p;
296 	node *cp;
297 	int j, n, s, ind, numtrans;
298 	int finflg;
299 	int curpos, num, prev;
300 	struct fa *where[NSTATES];
301 
302 	int fatab[257];
303 	struct fa *pfa;
304 
305 	char index[MAXLIN];
306 	char iposns[MAXLIN];
307 	int sposns[MAXLIN];
308 	int spmax, spinit;
309 
310 	char symbol[NCHARS];
311 	char isyms[NCHARS];
312 	char ssyms[NCHARS];
313 	int ssmax, ssinit;
314 
315 	for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0;
316 	for (i=0; i<NCHARS; i++)  isyms[i] = symbol[i] = 0;
317 	setcnt = 0;
318 	/* compute initial positions and symbols of state 0 */
319 	ssmax = 0;
320 	ptr = state[0] = foll[0];
321 	spinit = *ptr;
322 	for (i=0; i<spinit; i++) {
323 		curpos = *(++ptr);
324 		sposns[i] = curpos;
325 		iposns[curpos] = 1;
326 		cp = point[curpos];
327 		dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos);
328 		switch (type(cp)) {
329 			case CHAR:
330 				k = (int) right(cp);
331 				if (isyms[k] != 1) {
332 					isyms[k] = 1;
333 					ssyms[ssmax++] = k;
334 				}
335 				break;
336 			case DOT:
337 				for (k=1; k<NCHARS; k++) {
338 					if (k != HAT) {
339 						if (isyms[k] != 1) {
340 							isyms[k] = 1;
341 							ssyms[ssmax++] = k;
342 						}
343 					}
344 				}
345 				break;
346 			case CCL:
347 				for (p = (char *) right(cp); *p; p++) {
348 					if (*p != HAT) {
349 						if (isyms[*p] != 1) {
350 							isyms[*p] = 1;
351 							ssyms[ssmax++] = *p;
352 						}
353 					}
354 				}
355 				break;
356 			case NCCL:
357 				for (k=1; k<NCHARS; k++) {
358 					if (k != HAT && !member(k, (char *) right(cp))) {
359 						if (isyms[k] != 1) {
360 							isyms[k] = 1;
361 							ssyms[ssmax++] = k;
362 						}
363 					}
364 				}
365 		}
366 	}
367 	ssinit = ssmax;
368 	n = 0;
369 	for (s=0; s<=n; s++)  {
370 	dprintf("s = %d\n", s, NULL, NULL);
371 		ind = 0;
372 		numtrans = 0;
373 		finflg = 0;
374 		if (*(state[s] + *state[s]) == line) {		/* s final? */
375 			finflg = 1;
376 			goto tenter;
377 		}
378 		spmax = spinit;
379 		ssmax = ssinit;
380 		ptr = state[s];
381 		num = *ptr;
382 		for (i=0; i<num; i++) {
383 			curpos = *(++ptr);
384 			if (iposns[curpos] != 1 && index[curpos] != 1) {
385 				index[curpos] = 1;
386 				sposns[spmax++] = curpos;
387 			}
388 			cp = point[curpos];
389 			switch (type(cp)) {
390 				case CHAR:
391 					k = (int) right(cp);
392 					if (isyms[k] == 0 && symbol[k] == 0) {
393 						symbol[k] = 1;
394 						ssyms[ssmax++] = k;
395 					}
396 					break;
397 				case DOT:
398 					for (k=1; k<NCHARS; k++) {
399 						if (k != HAT) {
400 							if (isyms[k] == 0 && symbol[k] == 0) {
401 								symbol[k] = 1;
402 								ssyms[ssmax++] = k;
403 							}
404 						}
405 					}
406 					break;
407 				case CCL:
408 					for (p = (char *) right(cp); *p; p++) {
409 						if (*p != HAT) {
410 							if (isyms[*p] == 0 && symbol[*p] == 0) {
411 								symbol[*p] = 1;
412 								ssyms[ssmax++] = *p;
413 							}
414 						}
415 					}
416 					break;
417 				case NCCL:
418 					for (k=1; k<NCHARS; k++) {
419 						if (k != HAT && !member(k, (char *) right(cp))) {
420 							if (isyms[k] == 0 && symbol[k] == 0) {
421 								symbol[k] = 1;
422 								ssyms[ssmax++] = k;
423 							}
424 						}
425 					}
426 			}
427 		}
428 		for (j=0; j<ssmax; j++) {	/* nextstate(s, ssyms[j]) */
429 			c = ssyms[j];
430 			symbol[c] = 0;
431 			setcnt = 0;
432 			for (k=0; k<=line; k++) setvec[k] = 0;
433 			for (i=0; i<spmax; i++) {
434 				index[sposns[i]] = 0;
435 				cp = point[sposns[i]];
436 				if ((k = type(cp)) != FINAL)
437 					if (k == CHAR && c == (int) right(cp)
438 					 || k == DOT
439 					 || k == CCL && member(c, (char *) right(cp))
440 					 || k == NCCL && !member(c, (char *) right(cp))) {
441 						ptr = foll[sposns[i]];
442 						num = *ptr;
443 						for (k=0; k<num; k++) {
444 							if (setvec[*(++ptr)] != 1
445 								&& iposns[*ptr] != 1) {
446 								setvec[*ptr] = 1;
447 								setcnt++;
448 							}
449 						}
450 					}
451 			} /* end nextstate */
452 			if (notin(state, n, &prev)) {
453 				if (n >= NSTATES) {
454 					dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL);
455 					overflo();
456 				}
457 				state[++n] = add(setcnt);
458 				dprintf("	delta(%d,%o) = %d", s,c,n);
459 				dprintf(", ind = %d\n", ind+1, NULL, NULL);
460 				fatab[++ind] = c;
461 				fatab[++ind] = n;
462 				numtrans++;
463 			}
464 			else {
465 				if (prev != 0) {
466 					dprintf("	delta(%d,%o) = %d", s,c,prev);
467 					dprintf(", ind = %d\n", ind+1, NULL, NULL);
468 					fatab[++ind] = c;
469 					fatab[++ind] = prev;
470 					numtrans++;
471 				}
472 			}
473 		}
474 	tenter:
475 		if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL)
476 			overflo();
477 		where[s] = pfa;
478 		if (finflg)
479 			pfa->cch = -1;		/* s is a final state */
480 		else
481 			pfa->cch = numtrans;
482 		pfa->st = 0;
483 		for (i=1, pfa += 1; i<=numtrans; i++, pfa++) {
484 			pfa->cch = fatab[2*i-1];
485 			pfa->st = (struct fa *) fatab[2*i];
486 		}
487 	}
488 	for (i=0; i<=n; i++) {
489 		xfree(state[i]);	/* free state[i] */
490 		pfa = where[i];
491 		pfa->st = where[0];
492 		dprintf("state %d: (%o)\n", i, pfa, NULL);
493 		dprintf("	numtrans = %d,	default = %o\n", pfa->cch, pfa->st, NULL);
494 		for (k=1; k<=pfa->cch; k++) {
495 			(pfa+k)->st = where[ (int) (pfa+k)->st];
496 			dprintf("	char = %o,	nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL);
497 		}
498 	}
499 	pfa = where[0];
500 	if ((num = pfa->cch) < 0)
501 		return(where[0]);
502 	for (pfa += num; num; num--, pfa--)
503 		if (pfa->cch == HAT) {
504 			return(pfa->st);
505 		}
506 	return(where[0]);
507 }
508 
509 match(pfa, p)
510 register struct fa *pfa;
511 register char *p;
512 {
513 	register count;
514 	char c;
515 	if (p == 0) return(0);
516 	if (pfa->cch == 1) {		/* fast test for first character, if possible */
517 		c = (++pfa)->cch;
518 		do
519 			if (c == *p) {
520 				p++;
521 				pfa = pfa->st;
522 				goto adv;
523 			}
524 		while (*p++ != 0);
525 		return(0);
526 	}
527    adv: if ((count = pfa->cch) < 0) return(1);
528 	do {
529 		for (pfa += count; count; count--, pfa--)
530 			if (pfa->cch == *p) {
531 				break;
532 			}
533 		pfa = pfa->st;
534 		if ((count = pfa->cch) < 0) return(1);
535 	} while(*p++ != 0);
536 	return(0);
537 }
538