xref: /original-bsd/lib/libc/regex/engine.c (revision 860e07fc)
1 /*-
2  * Copyright (c) 1992 Henry Spencer.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Henry Spencer of the University of Toronto.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)engine.c	5.2 (Berkeley) 08/10/92
12  */
13 
14 /*
15  * The matching engine and friends.  This file is #included by regexec.c
16  * after suitable #defines of a variety of macros used herein, so that
17  * different state representations can be used without duplicating masses
18  * of code.
19  */
20 
21 #ifdef SNAMES
22 #define	matcher	smatcher
23 #define	fast	sfast
24 #define	slow	sslow
25 #define	dissect	sdissect
26 #define	backref	sbackref
27 #define	expand	sexpand
28 #define	step	sstep
29 #define	print	sprint
30 #define	match	smat
31 #endif
32 #ifdef LNAMES
33 #define	matcher	lmatcher
34 #define	fast	lfast
35 #define	slow	lslow
36 #define	dissect	ldissect
37 #define	backref	lbackref
38 #define	expand	lexpand
39 #define	step	lstep
40 #define	print	lprint
41 #define	match	lmat
42 #endif
43 
44 /* another structure passed up and down to avoid zillions of parameters */
45 struct match {
46 	struct re_guts *g;
47 	int eflags;
48 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
49 	uchar *offp;		/* offsets work from here */
50 	uchar *beginp;		/* start of string -- virtual NUL precedes */
51 	uchar *endp;		/* end of string -- virtual NUL here */
52 	uchar *coldp;		/* can be no match starting before here */
53 	uchar **lastpos;	/* [nplus+1] */
54 	STATEVARS;
55 	states st;		/* current states */
56 	states fresh;		/* states for a fresh start */
57 	states tmp;		/* temporary */
58 	states empty;		/* empty set of states */
59 };
60 
61 #ifndef NDEBUG
62 STATIC void print();
63 extern char *regchar();
64 #define	SP(t, s, c)	{ if (m->eflags&REG_TRACE) print(m->g, t, s, c, stdout); }
65 #define	DO(t, p1, p2, s1, s2)	{ if (m->eflags&REG_TRACE) { \
66 					printf("%s %s-", t, regchar(*(p1))); \
67 					printf("%s ", regchar(*(p2))); \
68 					printf("%ld-%ld\n", s1, s2); } }
69 #else
70 #define	SP(t, s, c)	/* nothing */
71 #define	DO(t, p1, p2, s1, s2)	/* nothing */
72 #endif
73 
74 STATIC uchar *fast();
75 STATIC uchar *slow();
76 STATIC uchar *dissect();
77 STATIC uchar *backref();
78 STATIC states expand();
79 STATIC states step();
80 
81 /*
82  - matcher - the actual matching engine
83  */
84 static int			/* 0 success, REG_NOMATCH failure */
85 matcher(g, string, nmatch, pmatch, eflags)
86 register struct re_guts *g;
87 uchar *string;
88 size_t nmatch;
89 regmatch_t pmatch[];
90 int eflags;
91 {
92 	register uchar *endp;
93 	register int i;
94 	struct match mv;
95 	register struct match *m = &mv;
96 	register uchar *dp;
97 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
98 	const register sopno gl = g->laststate;
99 	uchar *start;
100 	uchar *stop;
101 
102 	if (g->cflags&REG_NOSUB)
103 		nmatch = 0;		/* simplify tests */
104 	if (eflags&REG_STARTEND) {
105 		start = string + pmatch[0].rm_so;
106 		stop = string + pmatch[0].rm_eo;
107 	} else {
108 		start = string;
109 		stop = start + strlen((char *)start);
110 	}
111 
112 	/* prescreening; this does wonders for this rather slow code */
113 	if (g->must != NULL) {
114 		for (dp = start; dp < stop; dp++)
115 			if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen &&
116 			strncmp((char *)dp, g->must, (size_t)g->mlen) == 0)
117 				break;
118 		if (dp == stop)		/* we didn't find g->must */
119 			return(REG_NOMATCH);
120 	}
121 
122 	/* match struct setup */
123 	m->g = g;
124 	m->eflags = eflags;
125 	m->pmatch = NULL;
126 	m->lastpos = NULL;
127 	m->offp = string;
128 	m->beginp = start;
129 	m->endp = stop;
130 	STATESETUP(m, 4);
131 	SETUP(m->st);
132 	SETUP(m->fresh);
133 	SETUP(m->tmp);
134 	SETUP(m->empty);
135 	CLEAR(m->empty);
136 
137 	/* this loop does only one repetition except for backrefs */
138 	for (;;) {
139 		endp = fast(m, start, stop, gf, gl);
140 		if (endp == NULL) {		/* a miss */
141 			STATETEARDOWN(m);
142 			return(REG_NOMATCH);
143 		}
144 		if (nmatch == 0 && !g->backrefs)
145 			break;		/* no further info needed */
146 
147 		/* where? */
148 		assert(m->coldp != NULL);
149 		for (;;) {
150 			endp = slow(m, m->coldp, stop, gf, gl);
151 			if (endp != NULL)
152 				break;
153 			assert(*m->coldp != '\0');
154 			m->coldp++;
155 		}
156 		if (nmatch == 1 && !g->backrefs)
157 			break;		/* no further info needed */
158 
159 		/* oh my, he wants the subexpressions... */
160 		if (m->pmatch == NULL)
161 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
162 							sizeof(regmatch_t));
163 		if (m->pmatch == NULL) {
164 			STATETEARDOWN(m);
165 			return(REG_ESPACE);
166 		}
167 		for (i = 1; i <= m->g->nsub; i++) {
168 			m->pmatch[i].rm_so = -1;
169 			m->pmatch[i].rm_eo = -1;
170 		}
171 		if (!g->backrefs)
172 			dp = dissect(m, m->coldp, endp, gf, gl);
173 		else {
174 			if (g->nplus > 0 && m->lastpos == NULL)
175 				m->lastpos = (uchar **)malloc((g->nplus+1) *
176 							sizeof(uchar *));
177 			if (g->nplus > 0 && m->lastpos == NULL) {
178 				free(m->pmatch);
179 				STATETEARDOWN(m);
180 				return(REG_ESPACE);
181 			}
182 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
183 		}
184 		if (dp != NULL)
185 			break;
186 
187 		/* uh-oh... we couldn't find a subexpression-level match */
188 		assert(g->backrefs);	/* must be back references doing it */
189 		assert(g->nplus == 0 || m->lastpos != NULL);
190 		while (dp == NULL && endp > m->coldp &&
191 			(endp = slow(m, m->coldp, endp-1, gf, gl)) != NULL) {
192 			/* try it on a shorter possibility */
193 			for (i = 1; i <= m->g->nsub; i++) {
194 				m->pmatch[i].rm_so = -1;
195 				m->pmatch[i].rm_eo = -1;
196 			}
197 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
198 		}
199 		assert(dp == NULL || dp == endp);
200 		if (dp != NULL)		/* found a shorter one */
201 			break;
202 
203 		/* despite initial appearances, there is no match here */
204 		start = m->coldp + 1;	/* recycle starting later */
205 		assert(start <= stop);
206 	}
207 
208 	/* fill in the details if requested */
209 	if (nmatch > 0) {
210 		pmatch[0].rm_so = m->coldp - m->offp;
211 		pmatch[0].rm_eo = endp - m->offp;
212 	}
213 	if (nmatch > 1) {
214 		assert(m->pmatch != NULL);
215 		for (i = 1; i < nmatch; i++)
216 			if (i <= m->g->nsub)
217 				pmatch[i] = m->pmatch[i];
218 			else {
219 				pmatch[i].rm_so = -1;
220 				pmatch[i].rm_eo = -1;
221 			}
222 	}
223 
224 	if (m->pmatch != NULL)
225 		free((char *)m->pmatch);
226 	if (m->lastpos != NULL)
227 		free((char *)m->lastpos);
228 	STATETEARDOWN(m);
229 	return(0);
230 }
231 
232 /*
233  - dissect - figure out what matched what, no back references
234  */
235 static uchar *			/* == stop (success) always */
236 dissect(m, start, stop, startst, stopst)
237 register struct match *m;
238 uchar *start;
239 uchar *stop;
240 sopno startst;
241 sopno stopst;
242 {
243 	register int i;
244 	register sopno ss;	/* start sop of current subRE */
245 	register sopno es;	/* end sop of current subRE */
246 	register uchar *sp;	/* start of string matched by it */
247 	register uchar *stp;	/* string matched by it cannot pass here */
248 	register uchar *rest;	/* start of rest of string */
249 	register uchar *tail;	/* string unmatched by rest of RE */
250 	register sopno ssub;	/* start sop of subsubRE */
251 	register sopno esub;	/* end sop of subsubRE */
252 	register uchar *ssp;	/* start of string matched by subsubRE */
253 	register uchar *sep;	/* end of string matched by subsubRE */
254 	register uchar *oldssp;	/* previous ssp */
255 	register uchar *dp;
256 	register size_t len;
257 
258 	DO("diss", start, stop, startst, stopst);
259 	sp = start;
260 	for (ss = startst; ss < stopst; ss = es) {
261 		/* identify end of subRE */
262 		es = ss;
263 		switch (OP(m->g->strip[es])) {
264 		case OPLUS_:
265 		case OQUEST_:
266 			es += OPND(m->g->strip[es]);
267 			break;
268 		case OCH_:
269 			while (OP(m->g->strip[es]) != O_CH)
270 				es += OPND(m->g->strip[es]);
271 			break;
272 		}
273 		es++;
274 
275 		/* figure out what it matched */
276 		switch (OP(m->g->strip[ss])) {
277 		case OEND:
278 			assert(0);
279 			break;
280 		case OCHAR:
281 			sp++;
282 			break;
283 		case OBOL:
284 		case OEOL:
285 			break;
286 		case OANY:
287 		case OANYOF:
288 			sp++;
289 			break;
290 		case OBACK_:
291 		case O_BACK:
292 			assert(0);
293 			break;
294 		/* cases where length of match is hard to find */
295 		case OQUEST_:
296 			stp = stop;
297 			for (;;) {
298 				/* how long could this one be? */
299 				rest = slow(m, sp, stp, ss, es);
300 				assert(rest != NULL);	/* it did match */
301 				/* could the rest match the rest? */
302 				tail = slow(m, rest, stop, es, stopst);
303 				if (tail == stop)
304 					break;		/* yes! */
305 				/* no -- try a shorter match for this one */
306 				stp = rest - 1;
307 				assert(stp >= sp);	/* it did work */
308 			}
309 			ssub = ss + 1;
310 			esub = es - 1;
311 			/* did innards match? */
312 			if (slow(m, sp, rest, ssub, esub) != NULL) {
313 				dp = dissect(m, sp, rest, ssub, esub);
314 				assert(dp == rest);
315 			} else		/* no */
316 				assert(sp == rest);
317 			sp = rest;
318 			break;
319 		case OPLUS_:
320 			stp = stop;
321 			for (;;) {
322 				/* how long could this one be? */
323 				rest = slow(m, sp, stp, ss, es);
324 				assert(rest != NULL);	/* it did match */
325 				/* could the rest match the rest? */
326 				tail = slow(m, rest, stop, es, stopst);
327 				if (tail == stop)
328 					break;		/* yes! */
329 				/* no -- try a shorter match for this one */
330 				stp = rest - 1;
331 				assert(stp >= sp);	/* it did work */
332 			}
333 			ssub = ss + 1;
334 			esub = es - 1;
335 			ssp = sp;
336 			oldssp = ssp;
337 			for (;;) {	/* find last match of innards */
338 				sep = slow(m, ssp, rest, ssub, esub);
339 				if (sep == NULL || sep == ssp)
340 					break;	/* failed or matched null */
341 				oldssp = ssp;	/* on to next try */
342 				ssp = sep;
343 			}
344 			if (sep == NULL) {
345 				/* last successful match */
346 				sep = ssp;
347 				ssp = oldssp;
348 			}
349 			assert(sep == rest);	/* must exhaust substring */
350 			assert(slow(m, ssp, sep, ssub, esub) == rest);
351 			dp = dissect(m, ssp, sep, ssub, esub);
352 			assert(dp == sep);
353 			sp = rest;
354 			break;
355 		case OCH_:
356 			stp = stop;
357 			for (;;) {
358 				/* how long could this one be? */
359 				rest = slow(m, sp, stp, ss, es);
360 				assert(rest != NULL);	/* it did match */
361 				/* could the rest match the rest? */
362 				tail = slow(m, rest, stop, es, stopst);
363 				if (tail == stop)
364 					break;		/* yes! */
365 				/* no -- try a shorter match for this one */
366 				stp = rest - 1;
367 				assert(stp >= sp);	/* it did work */
368 			}
369 			ssub = ss + 1;
370 			esub = ss + OPND(m->g->strip[ss]) - 1;
371 			assert(OP(m->g->strip[esub]) == OOR1);
372 			for (;;) {	/* find first matching branch */
373 				if (slow(m, sp, rest, ssub, esub) == rest)
374 					break;	/* it matched all of it */
375 				/* that one missed, try next one */
376 				assert(OP(m->g->strip[esub]) == OOR1);
377 				esub++;
378 				assert(OP(m->g->strip[esub]) == OOR2);
379 				ssub = esub + 1;
380 				esub += OPND(m->g->strip[esub]);
381 				if (OP(m->g->strip[esub]) == OOR2)
382 					esub--;
383 				else
384 					assert(OP(m->g->strip[esub]) == O_CH);
385 			}
386 			dp = dissect(m, sp, rest, ssub, esub);
387 			assert(dp == rest);
388 			sp = rest;
389 			break;
390 		case O_PLUS:
391 		case O_QUEST:
392 		case OOR1:
393 		case OOR2:
394 		case O_CH:
395 			assert(0);
396 			break;
397 		case OLPAREN:
398 			i = OPND(m->g->strip[ss]);
399 			m->pmatch[i].rm_so = sp - m->offp;
400 			break;
401 		case ORPAREN:
402 			i = OPND(m->g->strip[ss]);
403 			m->pmatch[i].rm_eo = sp - m->offp;
404 			break;
405 		default:		/* uh oh */
406 			assert(0);
407 			break;
408 		}
409 	}
410 
411 	assert(sp == stop);
412 	return(sp);
413 }
414 
415 /*
416  - backref - figure out what matched what, figuring in back references
417  */
418 static uchar *			/* == stop (success) or NULL (failure) */
419 backref(m, start, stop, startst, stopst, lev)
420 register struct match *m;
421 uchar *start;
422 uchar *stop;
423 sopno startst;
424 sopno stopst;
425 sopno lev;			/* PLUS nesting level */
426 {
427 	register int i;
428 	register sopno ss;	/* start sop of current subRE */
429 	register uchar *sp;	/* start of string matched by it */
430 	register sopno ssub;	/* start sop of subsubRE */
431 	register sopno esub;	/* end sop of subsubRE */
432 	register uchar *ssp;	/* start of string matched by subsubRE */
433 	register uchar *dp;
434 	register size_t len;
435 	register int hard;
436 	register sop s;
437 	register regoff_t offsave;
438 	register cset *cs;
439 
440 	DO("back", start, stop, startst, stopst);
441 	sp = start;
442 
443 	/* get as far as we can with easy stuff */
444 	hard = 0;
445 	for (ss = startst; !hard && ss < stopst; ss++)
446 		switch (OP(s = m->g->strip[ss])) {
447 		case OCHAR:
448 			if (sp == stop || *sp++ != OPND(s))
449 				return(NULL);
450 			break;
451 		case OANY:
452 			if (sp == stop)
453 				return(NULL);
454 			sp++;
455 			break;
456 		case OANYOF:
457 			cs = &m->g->sets[OPND(s)];
458 			if (sp == stop || !CHIN(cs, *sp++))
459 				return(NULL);
460 			break;
461 		case OBOL:
462 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
463 					(sp < m->endp && *(sp-1) == '\n' &&
464 						(m->g->cflags&REG_NEWLINE)) )
465 				{ /* yes */ }
466 			else
467 				return(NULL);
468 			break;
469 		case OEOL:
470 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
471 					(sp < m->endp && *sp == '\n' &&
472 						(m->g->cflags&REG_NEWLINE)) )
473 				{ /* yes */ }
474 			else
475 				return(NULL);
476 			break;
477 		case O_QUEST:
478 			break;
479 		case OOR1:	/* matches null but needs to skip */
480 			ss++;
481 			s = m->g->strip[ss];
482 			do {
483 				assert(OP(s) == OOR2);
484 				ss += OPND(s);
485 			} while (OP(s = m->g->strip[ss]) != O_CH);
486 			/* note that the ss++ gets us past the O_CH */
487 			break;
488 		default:	/* have to make a choice */
489 			hard = 1;
490 			break;
491 		}
492 	if (!hard) {		/* that was it! */
493 		if (sp != stop)
494 			return(NULL);
495 		return(sp);
496 	}
497 	ss--;			/* adjust for the for's final increment */
498 
499 	/* the hard stuff */
500 	DO("hard", sp, stop, ss, stopst);
501 	s = m->g->strip[ss];
502 	switch (OP(s)) {
503 	case OBACK_:		/* the vilest depths */
504 		i = OPND(s);
505 		if (m->pmatch[i].rm_eo == -1)
506 			return(NULL);
507 		assert(m->pmatch[i].rm_so != -1);
508 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
509 		assert(stop - m->beginp >= len);
510 		if (sp > stop - len)
511 			return(NULL);	/* not enough left to match */
512 		ssp = m->offp + m->pmatch[i].rm_so;
513 		if (strncmp((char *)sp, (char *)ssp, len) != 0)
514 			return(NULL);
515 		while (m->g->strip[ss] != SOP(O_BACK, i))
516 			ss++;
517 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
518 		break;
519 	case OQUEST_:		/* to null or not */
520 		dp = backref(m, sp, stop, ss+1, stopst, lev);
521 		if (dp != NULL)
522 			return(dp);	/* not */
523 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
524 		break;
525 	case OPLUS_:
526 		assert(m->lastpos != NULL);
527 		assert(lev+1 <= m->g->nplus);
528 		m->lastpos[lev+1] = sp;
529 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
530 		break;
531 	case O_PLUS:
532 		if (sp == m->lastpos[lev])	/* last pass matched null */
533 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
534 		/* try another pass */
535 		m->lastpos[lev] = sp;
536 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
537 		if (dp == NULL)
538 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
539 		else
540 			return(dp);
541 		break;
542 	case OCH_:		/* find the right one, if any */
543 		ssub = ss + 1;
544 		esub = ss + OPND(s) - 1;
545 		assert(OP(m->g->strip[esub]) == OOR1);
546 		for (;;) {	/* find first matching branch */
547 			dp = backref(m, sp, stop, ssub, esub, lev);
548 			if (dp != NULL)
549 				return(dp);
550 			/* that one missed, try next one */
551 			if (OP(m->g->strip[esub]) == O_CH)
552 				return(NULL);	/* there is none */
553 			esub++;
554 			assert(OP(m->g->strip[esub]) == OOR2);
555 			ssub = esub + 1;
556 			esub += OPND(m->g->strip[esub]);
557 			if (OP(m->g->strip[esub]) == OOR2)
558 				esub--;
559 			else
560 				assert(OP(m->g->strip[esub]) == O_CH);
561 		}
562 		break;
563 	case OLPAREN:		/* must undo assignment if rest fails */
564 		i = OPND(s);
565 		offsave = m->pmatch[i].rm_so;
566 		m->pmatch[i].rm_so = sp - m->offp;
567 		dp = backref(m, sp, stop, ss+1, stopst, lev);
568 		if (dp != NULL)
569 			return(dp);
570 		m->pmatch[i].rm_so = offsave;
571 		return(NULL);
572 		break;
573 	case ORPAREN:		/* must undo assignment if rest fails */
574 		i = OPND(s);
575 		offsave = m->pmatch[i].rm_eo;
576 		m->pmatch[i].rm_eo = sp - m->offp;
577 		dp = backref(m, sp, stop, ss+1, stopst, lev);
578 		if (dp != NULL)
579 			return(dp);
580 		m->pmatch[i].rm_eo = offsave;
581 		return(NULL);
582 		break;
583 	default:		/* uh oh */
584 		assert(0);
585 		break;
586 	}
587 
588 	/* "can't happen" */
589 	assert(0);
590 	/* NOTREACHED */
591 }
592 
593 /*
594  - fast - step through the string at top speed
595  */
596 static uchar *			/* where tentative match ended, or NULL */
597 fast(m, start, stop, startst, stopst)
598 register struct match *m;
599 uchar *start;
600 uchar *stop;
601 sopno startst;
602 sopno stopst;
603 {
604 	register states st = m->st;
605 	register states fresh = m->fresh;
606 	register states tmp = m->tmp;
607 	register uchar *p = start;
608 	register uchar c;
609 	register uchar lastc;	/* previous c */
610 	register int atbol;
611 	register int ateol;
612 	register uchar *coldp;	/* last p after which no match was underway */
613 
614 	CLEAR(st);
615 	SET1(st, startst);
616 	st = expand(m->g, startst, stopst, st, 0, 0);
617 	ASSIGN(fresh, st);
618 	SP("start", st, *p);
619 	c = '\0';
620 	coldp = NULL;
621 	for (;;) {
622 		/* next character */
623 		lastc = c;
624 		c = (p == m->endp) ? '\0' : *p;
625 		if (EQ(st, fresh))
626 			coldp = p;
627 
628 		/* is there an EOL and/or BOL between lastc and c? */
629 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
630 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
631 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
632 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
633 		if (atbol || ateol) {
634 			st = expand(m->g, startst, stopst, st, atbol, ateol);
635 			SP("bef", st, c);
636 		}
637 
638 		/* are we done? */
639 		if (ISSET(st, stopst) || p == stop)
640 			break;		/* NOTE BREAK OUT */
641 
642 		/* no, we must deal with this character */
643 		ASSIGN(tmp, st);
644 		ASSIGN(st, fresh);
645 		st = step(m->g, startst, stopst, tmp, c, st);
646 		SP("aft", st, c);
647 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
648 		p++;
649 	}
650 
651 	assert(coldp != NULL);
652 	m->coldp = coldp;
653 	if (ISSET(st, stopst))
654 		return(p+1);
655 	else
656 		return(NULL);
657 }
658 
659 /*
660  - slow - step through the string more deliberately
661  */
662 static uchar *			/* where it ended */
663 slow(m, start, stop, startst, stopst)
664 register struct match *m;
665 uchar *start;
666 uchar *stop;
667 sopno startst;
668 sopno stopst;
669 {
670 	register states st = m->st;
671 	register states empty = m->empty;
672 	register states tmp = m->tmp;
673 	register uchar *p = start;
674 	register uchar c = (start == m->beginp) ? '\0' : *(start-1);
675 	register uchar lastc;	/* previous c */
676 	register int atbol;
677 	register int ateol;
678 	register uchar *matchp;	/* last p at which a match ended */
679 
680 	DO("slow", start, stop, startst, stopst);
681 	CLEAR(st);
682 	SET1(st, startst);
683 	SP("sstart", st, *p);
684 	matchp = NULL;
685 	for (;;) {
686 		/* next character */
687 		lastc = c;
688 		c = (p == m->endp) ? '\0' : *p;
689 
690 		/* is there an EOL and/or BOL between lastc and c? */
691 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
692 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
693 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
694 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
695 
696 		/* do we need an expansion before looking at the char? */
697 		if (p == start || atbol || ateol) {
698 			st = expand(m->g, startst, stopst, st, atbol, ateol);
699 			SP("sbef", st, c);
700 		}
701 
702 		/* are we done? */
703 		if (ISSET(st, stopst))
704 			matchp = p;
705 		if (EQ(st, empty) || p == stop)
706 			break;		/* NOTE BREAK OUT */
707 
708 		/* no, we must deal with this character */
709 		ASSIGN(tmp, st);
710 		ASSIGN(st, empty);
711 		st = step(m->g, startst, stopst, tmp, c, st);
712 		SP("saft", st, c);
713 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
714 		p++;
715 	}
716 
717 	return(matchp);
718 }
719 
720 
721 /*
722  - expand - return set of states reachable from an initial set
723  */
724 static states
725 expand(g, start, stop, st, atbol, ateol)
726 register struct re_guts *g;
727 int start;			/* start state within strip */
728 int stop;			/* state after stop state within strip */
729 register states st;
730 int atbol;			/* at start or just after \n? (for BOL) */
731 int ateol;			/* just before \n or \0? (for EOL) */
732 {
733 	register sop s;
734 	register sopno pc;
735 	register onestate here;		/* note, macros know this name */
736 	register sopno look;
737 	register int i;
738 
739 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
740 		s = g->strip[pc];
741 		switch (OP(s)) {
742 		case OEND:
743 			assert(pc == stop-1);
744 			break;
745 		case OCHAR:		/* can't get past this */
746 			break;
747 		case OBOL:
748 			if (atbol)
749 				FWD(st, st, 1);
750 			break;
751 		case OEOL:
752 			if (ateol)
753 				FWD(st, st, 1);
754 			break;
755 		case OANY:		/* can't get past this either */
756 			break;
757 		case OANYOF:		/* or this */
758 			break;
759 		case OBACK_:		/* not significant here */
760 		case O_BACK:
761 			FWD(st, st, 1);
762 			break;
763 		case OPLUS_:		/* forward, this is just an empty */
764 			FWD(st, st, 1);
765 			break;
766 		case O_PLUS:		/* both forward and back */
767 			FWD(st, st, 1);
768 			i = ISSETBACK(st, OPND(s));
769 			BACK(st, st, OPND(s));
770 			if (!i && ISSETBACK(st, OPND(s))) {
771 				/* oho, must reconsider loop body */
772 				pc -= OPND(s) + 1;
773 				INIT(here, pc);
774 			}
775 			break;
776 		case OQUEST_:		/* two branches, both forward */
777 			FWD(st, st, 1);
778 			FWD(st, st, OPND(s));
779 			break;
780 		case O_QUEST:		/* just an empty */
781 			FWD(st, st, 1);
782 			break;
783 		case OLPAREN:		/* not significant here */
784 		case ORPAREN:
785 			FWD(st, st, 1);
786 			break;
787 		case OCH_:		/* mark the first two branches */
788 			FWD(st, st, 1);
789 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
790 			FWD(st, st, OPND(s));
791 			break;
792 		case OOR1:		/* done a branch, find the O_CH */
793 			if (ISSTATEIN(st, here)) {
794 				for (look = 1;
795 						OP(s = g->strip[pc+look]) != O_CH;
796 						look += OPND(s))
797 					assert(OP(s) == OOR2);
798 				FWD(st, st, look);
799 			}
800 			break;
801 		case OOR2:		/* propagate OCH_'s marking onward */
802 			FWD(st, st, 1);
803 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
804 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
805 				FWD(st, st, OPND(s));
806 			}
807 			break;
808 		case O_CH:		/* just an empty */
809 			FWD(st, st, 1);
810 			break;
811 		default:		/* ooooops... */
812 			assert(0);
813 			break;
814 		}
815 	}
816 
817 	return(st);
818 }
819 
820 /*
821  - step - map set of states reachable before char to set reachable after
822  */
823 static states
824 step(g, start, stop, bef, ch, aft)
825 register struct re_guts *g;
826 int start;			/* start state within strip */
827 int stop;			/* state after stop state within strip */
828 register states bef;		/* states reachable before */
829 uchar ch;
830 register states aft;		/* states already known reachable after */
831 {
832 	register cset *cs;
833 	register sop s;
834 	register sopno pc;
835 	register onestate here;		/* note, macros know this name */
836 	register sopno look;
837 	register int i;
838 
839 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
840 		s = g->strip[pc];
841 		switch (OP(s)) {
842 		case OEND:
843 			assert(pc == stop-1);
844 			break;
845 		case OCHAR:
846 			if (ch == OPND(s))
847 				FWD(aft, bef, 1);
848 			break;
849 		case OBOL:	/* these are looked after by expand() */
850 		case OEOL:
851 			break;
852 		case OANY:
853 			FWD(aft, bef, 1);
854 			break;
855 		case OANYOF:
856 			cs = &g->sets[OPND(s)];
857 			if (CHIN(cs, ch))
858 				FWD(aft, bef, 1);
859 			break;
860 		case OBACK_:		/* ignored here */
861 		case O_BACK:
862 			FWD(aft, aft, 1);
863 			break;
864 		case OPLUS_:		/* forward, this is just an empty */
865 			FWD(aft, aft, 1);
866 			break;
867 		case O_PLUS:		/* both forward and back */
868 			FWD(aft, aft, 1);
869 			i = ISSETBACK(aft, OPND(s));
870 			BACK(aft, aft, OPND(s));
871 			if (!i && ISSETBACK(aft, OPND(s))) {
872 				/* oho, must reconsider loop body */
873 				pc -= OPND(s) + 1;
874 				INIT(here, pc);
875 			}
876 			break;
877 		case OQUEST_:		/* two branches, both forward */
878 			FWD(aft, aft, 1);
879 			FWD(aft, aft, OPND(s));
880 			break;
881 		case O_QUEST:		/* just an empty */
882 			FWD(aft, aft, 1);
883 			break;
884 		case OLPAREN:		/* not significant here */
885 		case ORPAREN:
886 			FWD(aft, aft, 1);
887 			break;
888 		case OCH_:		/* mark the first two branches */
889 			FWD(aft, aft, 1);
890 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
891 			FWD(aft, aft, OPND(s));
892 			break;
893 		case OOR1:		/* done a branch, find the O_CH */
894 			if (ISSTATEIN(aft, here)) {
895 				for (look = 1;
896 						OP(s = g->strip[pc+look]) != O_CH;
897 						look += OPND(s))
898 					assert(OP(s) == OOR2);
899 				FWD(aft, aft, look);
900 			}
901 			break;
902 		case OOR2:		/* propagate OCH_'s marking */
903 			FWD(aft, aft, 1);
904 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
905 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
906 				FWD(aft, aft, OPND(s));
907 			}
908 			break;
909 		case O_CH:		/* just empty */
910 			FWD(aft, aft, 1);
911 			break;
912 		default:		/* ooooops... */
913 			assert(0);
914 			break;
915 		}
916 	}
917 
918 	return(aft);
919 }
920 
921 #ifndef NDEBUG
922 /*
923  - print - print a set of states
924  */
925 static void
926 print(g, caption, st, ch, d)
927 struct re_guts *g;
928 char *caption;
929 states st;
930 uchar ch;
931 FILE *d;
932 {
933 	register int i;
934 	register int first = 1;
935 
936 	fprintf(d, "%s", caption);
937 	if (ch != '\0')
938 		fprintf(d, " %s", regchar(ch));
939 	for (i = 0; i < g->nstates; i++)
940 		if (ISSET(st, i)) {
941 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
942 			first = 0;
943 		}
944 	fprintf(d, "\n");
945 }
946 #endif
947 
948 #undef	matcher
949 #undef	fast
950 #undef	slow
951 #undef	dissect
952 #undef	backref
953 #undef	expand
954 #undef	step
955 #undef	print
956 #undef	match
957