1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 /* lasciate ogne speranza, voi ch'entrate. */
26 
27 #define	DEBUG
28 
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include "awk.h"
34 #include "ytab.h"
35 
36 #define	HAT	(NCHARS-2)	/* matches ^ in regular expr */
37 				/* NCHARS is 2**n */
38 #define MAXLIN 22
39 
40 #define type(v)		(v)->nobj	/* badly overloaded here */
41 #define info(v)		(v)->ntype	/* badly overloaded here */
42 #define left(v)		(v)->narg[0]
43 #define right(v)	(v)->narg[1]
44 #define parent(v)	(v)->nnext
45 
46 #define LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47 #define UNARY	case STAR: case PLUS: case QUEST:
48 
49 /* encoding in tree Nodes:
50 	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51 		left is index, right contains value or pointer to value
52 	unary (STAR, PLUS, QUEST): left is child, right is null
53 	binary (CAT, OR): left and right are children
54 	parent contains pointer to parent
55 */
56 
57 
58 int	*setvec;
59 int	*tmpset;
60 int	maxsetvec = 0;
61 
62 int	rtok;		/* next token in current re */
63 int	rlxval;
64 static uschar	*rlxstr;
65 static uschar	*prestr;	/* current position in current re */
66 static uschar	*lastre;	/* origin of last re */
67 
68 static	int setcnt;
69 static	int poscnt;
70 
71 char	*patbeg;
72 int	patlen;
73 
74 #define	NFA	20	/* cache this many dynamic fa's */
75 fa	*fatab[NFA];
76 int	nfatab	= 0;	/* entries in fatab */
77 
makedfa(const char * s,int anchor)78 fa *makedfa(const char *s, int anchor)	/* returns dfa for reg expr s */
79 {
80 	int i, use, nuse;
81 	fa *pfa;
82 	static int now = 1;
83 
84 	if (setvec == 0) {	/* first time through any RE */
85 		maxsetvec = MAXLIN;
86 		setvec = (int *) malloc(maxsetvec * sizeof(int));
87 		tmpset = (int *) malloc(maxsetvec * sizeof(int));
88 		if (setvec == 0 || tmpset == 0)
89 			overflo("out of space initializing makedfa");
90 	}
91 
92 	if (compile_time)	/* a constant for sure */
93 		return mkdfa(s, anchor);
94 	for (i = 0; i < nfatab; i++)	/* is it there already? */
95 		if (fatab[i]->anchor == anchor
96 		  && strcmp((const char *) fatab[i]->restr, s) == 0) {
97 			fatab[i]->use = now++;
98 			return fatab[i];
99 		}
100 	pfa = mkdfa(s, anchor);
101 	if (nfatab < NFA) {	/* room for another */
102 		fatab[nfatab] = pfa;
103 		fatab[nfatab]->use = now++;
104 		nfatab++;
105 		return pfa;
106 	}
107 	use = fatab[0]->use;	/* replace least-recently used */
108 	nuse = 0;
109 	for (i = 1; i < nfatab; i++)
110 		if (fatab[i]->use < use) {
111 			use = fatab[i]->use;
112 			nuse = i;
113 		}
114 	freefa(fatab[nuse]);
115 	fatab[nuse] = pfa;
116 	pfa->use = now++;
117 	return pfa;
118 }
119 
mkdfa(const char * s,int anchor)120 fa *mkdfa(const char *s, int anchor)	/* does the real work of making a dfa */
121 				/* anchor = 1 for anchored matches, else 0 */
122 {
123 	Node *p, *p1;
124 	fa *f;
125 
126 	p = reparse(s);
127 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128 		/* put ALL STAR in front of reg.  exp. */
129 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130 		/* put FINAL after reg.  exp. */
131 
132 	poscnt = 0;
133 	penter(p1);	/* enter parent pointers and leaf indices */
134 	if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135 		overflo("out of space for fa");
136 	f->accept = poscnt-1;	/* penter has computed number of positions in re */
137 	cfoll(f, p1);	/* set up follow sets */
138 	freetr(p1);
139 	if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140 			overflo("out of space in makedfa");
141 	if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142 		overflo("out of space in makedfa");
143 	*f->posns[1] = 0;
144 	f->initstat = makeinit(f, anchor);
145 	f->anchor = anchor;
146 	f->restr = (uschar *) tostring(s);
147 	return f;
148 }
149 
makeinit(fa * f,int anchor)150 int makeinit(fa *f, int anchor)
151 {
152 	int i, k;
153 
154 	f->curstat = 2;
155 	f->out[2] = 0;
156 	f->reset = 0;
157 	k = *(f->re[0].lfollow);
158 	xfree(f->posns[2]);
159 	if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160 		overflo("out of space in makeinit");
161 	for (i=0; i <= k; i++) {
162 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
163 	}
164 	if ((f->posns[2])[1] == f->accept)
165 		f->out[2] = 1;
166 	for (i=0; i < NCHARS; i++)
167 		f->gototab[2][i] = 0;
168 	f->curstat = cgoto(f, 2, HAT);
169 	if (anchor) {
170 		*f->posns[2] = k-1;	/* leave out position 0 */
171 		for (i=0; i < k; i++) {
172 			(f->posns[0])[i] = (f->posns[2])[i];
173 		}
174 
175 		f->out[0] = f->out[2];
176 		if (f->curstat != 2)
177 			--(*f->posns[f->curstat]);
178 	}
179 	return f->curstat;
180 }
181 
penter(Node * p)182 void penter(Node *p)	/* set up parent pointers and leaf indices */
183 {
184 	switch (type(p)) {
185 	LEAF
186 		info(p) = poscnt;
187 		poscnt++;
188 		break;
189 	UNARY
190 		penter(left(p));
191 		parent(left(p)) = p;
192 		break;
193 	case CAT:
194 	case OR:
195 		penter(left(p));
196 		penter(right(p));
197 		parent(left(p)) = p;
198 		parent(right(p)) = p;
199 		break;
200 	default:	/* can't happen */
201 		FATAL("can't happen: unknown type %d in penter", type(p));
202 		break;
203 	}
204 }
205 
freetr(Node * p)206 void freetr(Node *p)	/* free parse tree */
207 {
208 	switch (type(p)) {
209 	LEAF
210 		xfree(p);
211 		break;
212 	UNARY
213 		freetr(left(p));
214 		xfree(p);
215 		break;
216 	case CAT:
217 	case OR:
218 		freetr(left(p));
219 		freetr(right(p));
220 		xfree(p);
221 		break;
222 	default:	/* can't happen */
223 		FATAL("can't happen: unknown type %d in freetr", type(p));
224 		break;
225 	}
226 }
227 
228 /* in the parsing of regular expressions, metacharacters like . have */
229 /* to be seen literally;  \056 is not a metacharacter. */
230 
hexstr(char ** pp)231 int hexstr(char **pp)	/* find and eval hex string at pp, return new p */
232 {			/* only pick up one 8-bit byte (2 chars) */
233 	uschar *p;
234 	int n = 0;
235 	int i;
236 
237 	for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238 		if (isdigit(*p))
239 			n = 16 * n + *p - '0';
240 		else if (*p >= 'a' && *p <= 'f')
241 			n = 16 * n + *p - 'a' + 10;
242 		else if (*p >= 'A' && *p <= 'F')
243 			n = 16 * n + *p - 'A' + 10;
244 	}
245 	*pp = (char *) p;
246 	return n;
247 }
248 
249 #define isoctdigit(c) ((c) >= '0' && (c) <= '7')	/* multiple use of arg */
250 
quoted(char ** pp)251 int quoted(char **pp)	/* pick up next thing after a \\ */
252 			/* and increment *pp */
253 {
254 	char *p = *pp;
255 	int c;
256 
257 	if ((c = *p++) == 't')
258 		c = '\t';
259 	else if (c == 'n')
260 		c = '\n';
261 	else if (c == 'f')
262 		c = '\f';
263 	else if (c == 'r')
264 		c = '\r';
265 	else if (c == 'b')
266 		c = '\b';
267 	else if (c == '\\')
268 		c = '\\';
269 	else if (c == 'x') {	/* hexadecimal goo follows */
270 		c = hexstr(&p);	/* this adds a null if number is invalid */
271 	} else if (isoctdigit(c)) {	/* \d \dd \ddd */
272 		int n = c - '0';
273 		if (isoctdigit(*p)) {
274 			n = 8 * n + *p++ - '0';
275 			if (isoctdigit(*p))
276 				n = 8 * n + *p++ - '0';
277 		}
278 		c = n;
279 	} /* else */
280 		/* c = c; */
281 	*pp = p;
282 	return c;
283 }
284 
collate_range_cmp(int a,int b)285 static int collate_range_cmp(int a, int b)
286 {
287 	int r;
288 	static char s[2][2];
289 
290 	if ((uschar)a == (uschar)b)
291 		return 0;
292 	s[0][0] = a;
293 	s[1][0] = b;
294 	if ((r = strcoll(s[0], s[1])) == 0)
295 		r = (uschar)a - (uschar)b;
296 	return r;
297 }
298 
cclenter(const char * argp)299 char *cclenter(const char *argp)	/* add a character class */
300 {
301 	int i, c, c2;
302 	int j;
303 	uschar *p = (uschar *) argp;
304 	uschar *op, *bp;
305 	static uschar *buf = 0;
306 	static int bufsz = 100;
307 
308 	op = p;
309 	if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
310 		FATAL("out of space for character class [%.10s...] 1", p);
311 	bp = buf;
312 	for (i = 0; (c = *p++) != 0; ) {
313 		if (c == '\\') {
314 			c = quoted((char **) &p);
315 		} else if (c == '-' && i > 0 && bp[-1] != 0) {
316 			if (*p != 0) {
317 				c = bp[-1];
318 				c2 = *p++;
319 				if (c2 == '\\')
320 					c2 = quoted((char **) &p);
321 				if (collate_range_cmp(c, c2) > 0) {	/* empty; ignore */
322 					bp--;
323 					i--;
324 					continue;
325 				}
326 				for (j = 0; j < NCHARS; j++) {
327 					if ((collate_range_cmp(c, j) > 0) ||
328 					    collate_range_cmp(j, c2) > 0)
329 						continue;
330 					if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
331 						FATAL("out of space for character class [%.10s...] 2", p);
332 					*bp++ = j;
333 					i++;
334 				}
335 				continue;
336 			}
337 		}
338 		if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
339 			FATAL("out of space for character class [%.10s...] 3", p);
340 		*bp++ = c;
341 		i++;
342 	}
343 	*bp = 0;
344 	dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
345 	xfree(op);
346 	return (char *) tostring((char *) buf);
347 }
348 
overflo(const char * s)349 void overflo(const char *s)
350 {
351 	FATAL("regular expression too big: %.30s...", s);
352 }
353 
cfoll(fa * f,Node * v)354 void cfoll(fa *f, Node *v)	/* enter follow set of each leaf of vertex v into lfollow[leaf] */
355 {
356 	int i;
357 	int *p;
358 
359 	switch (type(v)) {
360 	LEAF
361 		f->re[info(v)].ltype = type(v);
362 		f->re[info(v)].lval.np = right(v);
363 		while (f->accept >= maxsetvec) {	/* guessing here! */
364 			maxsetvec *= 4;
365 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
366 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
367 			if (setvec == 0 || tmpset == 0)
368 				overflo("out of space in cfoll()");
369 		}
370 		for (i = 0; i <= f->accept; i++)
371 			setvec[i] = 0;
372 		setcnt = 0;
373 		follow(v);	/* computes setvec and setcnt */
374 		if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
375 			overflo("out of space building follow set");
376 		f->re[info(v)].lfollow = p;
377 		*p = setcnt;
378 		for (i = f->accept; i >= 0; i--)
379 			if (setvec[i] == 1)
380 				*++p = i;
381 		break;
382 	UNARY
383 		cfoll(f,left(v));
384 		break;
385 	case CAT:
386 	case OR:
387 		cfoll(f,left(v));
388 		cfoll(f,right(v));
389 		break;
390 	default:	/* can't happen */
391 		FATAL("can't happen: unknown type %d in cfoll", type(v));
392 	}
393 }
394 
first(Node * p)395 int first(Node *p)	/* collects initially active leaves of p into setvec */
396 			/* returns 1 if p matches empty string */
397 {
398 	int b, lp;
399 
400 	switch (type(p)) {
401 	LEAF
402 		lp = info(p);	/* look for high-water mark of subscripts */
403 		while (setcnt >= maxsetvec || lp >= maxsetvec) {	/* guessing here! */
404 			maxsetvec *= 4;
405 			setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
406 			tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
407 			if (setvec == 0 || tmpset == 0)
408 				overflo("out of space in first()");
409 		}
410 		if (setvec[lp] != 1) {
411 			setvec[lp] = 1;
412 			setcnt++;
413 		}
414 		if (type(p) == CCL && (*(char *) right(p)) == '\0')
415 			return(0);		/* empty CCL */
416 		else return(1);
417 	case PLUS:
418 		if (first(left(p)) == 0) return(0);
419 		return(1);
420 	case STAR:
421 	case QUEST:
422 		first(left(p));
423 		return(0);
424 	case CAT:
425 		if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
426 		return(1);
427 	case OR:
428 		b = first(right(p));
429 		if (first(left(p)) == 0 || b == 0) return(0);
430 		return(1);
431 	}
432 	FATAL("can't happen: unknown type %d in first", type(p));	/* can't happen */
433 	return(-1);
434 }
435 
follow(Node * v)436 void follow(Node *v)	/* collects leaves that can follow v into setvec */
437 {
438 	Node *p;
439 
440 	if (type(v) == FINAL)
441 		return;
442 	p = parent(v);
443 	switch (type(p)) {
444 	case STAR:
445 	case PLUS:
446 		first(v);
447 		follow(p);
448 		return;
449 
450 	case OR:
451 	case QUEST:
452 		follow(p);
453 		return;
454 
455 	case CAT:
456 		if (v == left(p)) {	/* v is left child of p */
457 			if (first(right(p)) == 0) {
458 				follow(p);
459 				return;
460 			}
461 		} else		/* v is right child */
462 			follow(p);
463 		return;
464 	}
465 }
466 
member(int c,const char * sarg)467 int member(int c, const char *sarg)	/* is c in s? */
468 {
469 	uschar *s = (uschar *) sarg;
470 
471 	while (*s)
472 		if (c == *s++)
473 			return(1);
474 	return(0);
475 }
476 
match(fa * f,const char * p0)477 int match(fa *f, const char *p0)	/* shortest match ? */
478 {
479 	int s, ns;
480 	uschar *p = (uschar *) p0;
481 
482 	s = f->reset ? makeinit(f,0) : f->initstat;
483 	if (f->out[s])
484 		return(1);
485 	do {
486 		if ((ns = f->gototab[s][*p]) != 0)
487 			s = ns;
488 		else
489 			s = cgoto(f, s, *p);
490 		if (f->out[s])
491 			return(1);
492 	} while (*p++ != 0);
493 	return(0);
494 }
495 
pmatch(fa * f,const char * p0)496 int pmatch(fa *f, const char *p0)	/* longest match, for sub */
497 {
498 	int s, ns;
499 	uschar *p = (uschar *) p0;
500 	uschar *q;
501 	int i, k;
502 
503 	s = f->reset ? makeinit(f,1) : f->initstat;
504 	patbeg = (char *) p;
505 	patlen = -1;
506 	do {
507 		q = p;
508 		do {
509 			if (f->out[s])		/* final state */
510 				patlen = q-p;
511 			if ((ns = f->gototab[s][*q]) != 0)
512 				s = ns;
513 			else
514 				s = cgoto(f, s, *q);
515 			if (s == 1) {	/* no transition */
516 				if (patlen >= 0) {
517 					patbeg = (char *) p;
518 					return(1);
519 				}
520 				else
521 					goto nextin;	/* no match */
522 			}
523 		} while (*q++ != 0);
524 		if (f->out[s])
525 			patlen = q-p-1;	/* don't count $ */
526 		if (patlen >= 0) {
527 			patbeg = (char *) p;
528 			return(1);
529 		}
530 	nextin:
531 		s = 2;
532 		if (f->reset) {
533 			for (i = 2; i <= f->curstat; i++)
534 				xfree(f->posns[i]);
535 			k = *f->posns[0];
536 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
537 				overflo("out of space in pmatch");
538 			for (i = 0; i <= k; i++)
539 				(f->posns[2])[i] = (f->posns[0])[i];
540 			f->initstat = f->curstat = 2;
541 			f->out[2] = f->out[0];
542 			for (i = 0; i < NCHARS; i++)
543 				f->gototab[2][i] = 0;
544 		}
545 	} while (*p++ != 0);
546 	return (0);
547 }
548 
nematch(fa * f,const char * p0)549 int nematch(fa *f, const char *p0)	/* non-empty match, for sub */
550 {
551 	int s, ns;
552 	uschar *p = (uschar *) p0;
553 	uschar *q;
554 	int i, k;
555 
556 	s = f->reset ? makeinit(f,1) : f->initstat;
557 	patlen = -1;
558 	while (*p) {
559 		q = p;
560 		do {
561 			if (f->out[s])		/* final state */
562 				patlen = q-p;
563 			if ((ns = f->gototab[s][*q]) != 0)
564 				s = ns;
565 			else
566 				s = cgoto(f, s, *q);
567 			if (s == 1) {	/* no transition */
568 				if (patlen > 0) {
569 					patbeg = (char *) p;
570 					return(1);
571 				} else
572 					goto nnextin;	/* no nonempty match */
573 			}
574 		} while (*q++ != 0);
575 		if (f->out[s])
576 			patlen = q-p-1;	/* don't count $ */
577 		if (patlen > 0 ) {
578 			patbeg = (char *) p;
579 			return(1);
580 		}
581 	nnextin:
582 		s = 2;
583 		if (f->reset) {
584 			for (i = 2; i <= f->curstat; i++)
585 				xfree(f->posns[i]);
586 			k = *f->posns[0];
587 			if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
588 				overflo("out of state space");
589 			for (i = 0; i <= k; i++)
590 				(f->posns[2])[i] = (f->posns[0])[i];
591 			f->initstat = f->curstat = 2;
592 			f->out[2] = f->out[0];
593 			for (i = 0; i < NCHARS; i++)
594 				f->gototab[2][i] = 0;
595 		}
596 		p++;
597 	}
598 	return (0);
599 }
600 
reparse(const char * p)601 Node *reparse(const char *p)	/* parses regular expression pointed to by p */
602 {			/* uses relex() to scan regular expression */
603 	Node *np;
604 
605 	dprintf( ("reparse <%s>\n", p) );
606 	lastre = prestr = (uschar *) p;	/* prestr points to string to be parsed */
607 	rtok = relex();
608 	/* GNU compatibility: an empty regexp matches anything */
609 	if (rtok == '\0')
610 		/* FATAL("empty regular expression"); previous */
611 		return(op2(ALL, NIL, NIL));
612 	np = regexp();
613 	if (rtok != '\0')
614 		FATAL("syntax error in regular expression %s at %s", lastre, prestr);
615 	return(np);
616 }
617 
regexp(void)618 Node *regexp(void)	/* top-level parse of reg expr */
619 {
620 	return (alt(concat(primary())));
621 }
622 
primary(void)623 Node *primary(void)
624 {
625 	Node *np;
626 
627 	switch (rtok) {
628 	case CHAR:
629 		np = op2(CHAR, NIL, itonp(rlxval));
630 		rtok = relex();
631 		return (unary(np));
632 	case ALL:
633 		rtok = relex();
634 		return (unary(op2(ALL, NIL, NIL)));
635 	case DOT:
636 		rtok = relex();
637 		return (unary(op2(DOT, NIL, NIL)));
638 	case CCL:
639 		np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
640 		rtok = relex();
641 		return (unary(np));
642 	case NCCL:
643 		np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
644 		rtok = relex();
645 		return (unary(np));
646 	case '^':
647 		rtok = relex();
648 		return (unary(op2(CHAR, NIL, itonp(HAT))));
649 	case '$':
650 		rtok = relex();
651 		return (unary(op2(CHAR, NIL, NIL)));
652 	case '(':
653 		rtok = relex();
654 		if (rtok == ')') {	/* special pleading for () */
655 			rtok = relex();
656 			return unary(op2(CCL, NIL, (Node *) tostring("")));
657 		}
658 		np = regexp();
659 		if (rtok == ')') {
660 			rtok = relex();
661 			return (unary(np));
662 		}
663 		else
664 			FATAL("syntax error in regular expression %s at %s", lastre, prestr);
665 	default:
666 		FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
667 	}
668 	return 0;	/*NOTREACHED*/
669 }
670 
concat(Node * np)671 Node *concat(Node *np)
672 {
673 	switch (rtok) {
674 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
675 		return (concat(op2(CAT, np, primary())));
676 	}
677 	return (np);
678 }
679 
alt(Node * np)680 Node *alt(Node *np)
681 {
682 	if (rtok == OR) {
683 		rtok = relex();
684 		return (alt(op2(OR, np, concat(primary()))));
685 	}
686 	return (np);
687 }
688 
unary(Node * np)689 Node *unary(Node *np)
690 {
691 	switch (rtok) {
692 	case STAR:
693 		rtok = relex();
694 		return (unary(op2(STAR, np, NIL)));
695 	case PLUS:
696 		rtok = relex();
697 		return (unary(op2(PLUS, np, NIL)));
698 	case QUEST:
699 		rtok = relex();
700 		return (unary(op2(QUEST, np, NIL)));
701 	default:
702 		return (np);
703 	}
704 }
705 
706 /*
707  * Character class definitions conformant to the POSIX locale as
708  * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
709  * and operating character sets are both ASCII (ISO646) or supersets
710  * thereof.
711  *
712  * Note that to avoid overflowing the temporary buffer used in
713  * relex(), the expanded character class (prior to range expansion)
714  * must be less than twice the size of their full name.
715  */
716 
717 /* Because isblank doesn't show up in any of the header files on any
718  * system i use, it's defined here.  if some other locale has a richer
719  * definition of "blank", define HAS_ISBLANK and provide your own
720  * version.
721  */
722 
723 #ifndef HAS_ISBLANK
724 
isblank(int c)725 int isblank(int c)
726 {
727 	return c==' ' || c=='\t';
728 }
729 
730 #endif
731 
732 struct charclass {
733 	const char *cc_name;
734 	int cc_namelen;
735 	int (*cc_func)(int);
736 } charclasses[] = {
737 	{ "alnum",	5,	isalnum },
738 	{ "alpha",	5,	isalpha },
739 	{ "blank",	5,	isblank },
740 	{ "cntrl",	5,	iscntrl },
741 	{ "digit",	5,	isdigit },
742 	{ "graph",	5,	isgraph },
743 	{ "lower",	5,	islower },
744 	{ "print",	5,	isprint },
745 	{ "punct",	5,	ispunct },
746 	{ "space",	5,	isspace },
747 	{ "upper",	5,	isupper },
748 	{ "xdigit",	6,	isxdigit },
749 	{ NULL,		0,	NULL },
750 };
751 
752 
relex(void)753 int relex(void)		/* lexical analyzer for reparse */
754 {
755 	int c, n;
756 	int cflag;
757 	static uschar *buf = 0;
758 	static int bufsz = 100;
759 	uschar *bp;
760 	struct charclass *cc;
761 	int i;
762 
763 	switch (c = *prestr++) {
764 	case '|': return OR;
765 	case '*': return STAR;
766 	case '+': return PLUS;
767 	case '?': return QUEST;
768 	case '.': return DOT;
769 	case '\0': prestr--; return '\0';
770 	case '^':
771 	case '$':
772 	case '(':
773 	case ')':
774 		return c;
775 	case '\\':
776 		rlxval = quoted((char **) &prestr);
777 		return CHAR;
778 	default:
779 		rlxval = c;
780 		return CHAR;
781 	case '[':
782 		if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
783 			FATAL("out of space in reg expr %.10s..", lastre);
784 		bp = buf;
785 		if (*prestr == '^') {
786 			cflag = 1;
787 			prestr++;
788 		}
789 		else
790 			cflag = 0;
791 		n = 2 * strlen((const char *) prestr)+1;
792 		if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
793 			FATAL("out of space for reg expr %.10s...", lastre);
794 		for (; ; ) {
795 			if ((c = *prestr++) == '\\') {
796 				*bp++ = '\\';
797 				if ((c = *prestr++) == '\0')
798 					FATAL("nonterminated character class %.20s...", lastre);
799 				*bp++ = c;
800 			/* } else if (c == '\n') { */
801 			/* 	FATAL("newline in character class %.20s...", lastre); */
802 			} else if (c == '[' && *prestr == ':') {
803 				/* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
804 				for (cc = charclasses; cc->cc_name; cc++)
805 					if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
806 						break;
807 				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
808 				    prestr[2 + cc->cc_namelen] == ']') {
809 					prestr += cc->cc_namelen + 3;
810 					for (i = 0; i < NCHARS; i++) {
811 						if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
812 						    FATAL("out of space for reg expr %.10s...", lastre);
813 						if (cc->cc_func(i)) {
814 							*bp++ = i;
815 							n++;
816 						}
817 					}
818 				} else
819 					*bp++ = c;
820 			} else if (c == '\0') {
821 				FATAL("nonterminated character class %.20s", lastre);
822 			} else if (bp == buf) {	/* 1st char is special */
823 				*bp++ = c;
824 			} else if (c == ']') {
825 				*bp++ = 0;
826 				rlxstr = (uschar *) tostring((char *) buf);
827 				if (cflag == 0)
828 					return CCL;
829 				else
830 					return NCCL;
831 			} else
832 				*bp++ = c;
833 		}
834 	}
835 }
836 
cgoto(fa * f,int s,int c)837 int cgoto(fa *f, int s, int c)
838 {
839 	int i, j, k;
840 	int *p, *q;
841 
842 	if (c < 0 || c > 255)
843 		FATAL("can't happen: neg char %d in cgoto", c);
844 	while (f->accept >= maxsetvec) {	/* guessing here! */
845 		maxsetvec *= 4;
846 		setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
847 		tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
848 		if (setvec == 0 || tmpset == 0)
849 			overflo("out of space in cgoto()");
850 	}
851 	for (i = 0; i <= f->accept; i++)
852 		setvec[i] = 0;
853 	setcnt = 0;
854 	/* compute positions of gototab[s,c] into setvec */
855 	p = f->posns[s];
856 	for (i = 1; i <= *p; i++) {
857 		if ((k = f->re[p[i]].ltype) != FINAL) {
858 			if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
859 			 || (k == DOT && c != 0 && c != HAT)
860 			 || (k == ALL && c != 0)
861 			 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
862 			 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
863 				q = f->re[p[i]].lfollow;
864 				for (j = 1; j <= *q; j++) {
865 					if (q[j] >= maxsetvec) {
866 						maxsetvec *= 4;
867 						setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
868 						tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
869 						if (setvec == 0 || tmpset == 0)
870 							overflo("cgoto overflow");
871 					}
872 					if (setvec[q[j]] == 0) {
873 						setcnt++;
874 						setvec[q[j]] = 1;
875 					}
876 				}
877 			}
878 		}
879 	}
880 	/* determine if setvec is a previous state */
881 	tmpset[0] = setcnt;
882 	j = 1;
883 	for (i = f->accept; i >= 0; i--)
884 		if (setvec[i]) {
885 			tmpset[j++] = i;
886 		}
887 	/* tmpset == previous state? */
888 	for (i = 1; i <= f->curstat; i++) {
889 		p = f->posns[i];
890 		if ((k = tmpset[0]) != p[0])
891 			goto different;
892 		for (j = 1; j <= k; j++)
893 			if (tmpset[j] != p[j])
894 				goto different;
895 		/* setvec is state i */
896 		f->gototab[s][c] = i;
897 		return i;
898 	  different:;
899 	}
900 
901 	/* add tmpset to current set of states */
902 	if (f->curstat >= NSTATES-1) {
903 		f->curstat = 2;
904 		f->reset = 1;
905 		for (i = 2; i < NSTATES; i++)
906 			xfree(f->posns[i]);
907 	} else
908 		++(f->curstat);
909 	for (i = 0; i < NCHARS; i++)
910 		f->gototab[f->curstat][i] = 0;
911 	xfree(f->posns[f->curstat]);
912 	if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
913 		overflo("out of space in cgoto");
914 
915 	f->posns[f->curstat] = p;
916 	f->gototab[s][c] = f->curstat;
917 	for (i = 0; i <= setcnt; i++)
918 		p[i] = tmpset[i];
919 	if (setvec[f->accept])
920 		f->out[f->curstat] = 1;
921 	else
922 		f->out[f->curstat] = 0;
923 	return f->curstat;
924 }
925 
926 
freefa(fa * f)927 void freefa(fa *f)	/* free a finite automaton */
928 {
929 	int i;
930 
931 	if (f == NULL)
932 		return;
933 	for (i = 0; i <= f->curstat; i++)
934 		xfree(f->posns[i]);
935 	for (i = 0; i <= f->accept; i++) {
936 		xfree(f->re[i].lfollow);
937 		if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
938 			xfree((f->re[i].lval.np));
939 	}
940 	xfree(f->restr);
941 	xfree(f);
942 }
943