xref: /original-bsd/lib/libc/regex/engine.c (revision a8f82b20)
1 /*-
2  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Henry Spencer of the University of Toronto.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)engine.c	8.3 (Berkeley) 03/16/94
12  */
13 
14 /*
15  * The matching engine and friends.  This file is #included by regexec.c
16  * after suitable #defines of a variety of macros used herein, so that
17  * different state representations can be used without duplicating masses
18  * of code.
19  */
20 
21 #ifdef SNAMES
22 #define	matcher	smatcher
23 #define	fast	sfast
24 #define	slow	sslow
25 #define	dissect	sdissect
26 #define	backref	sbackref
27 #define	step	sstep
28 #define	print	sprint
29 #define	at	sat
30 #define	match	smat
31 #endif
32 #ifdef LNAMES
33 #define	matcher	lmatcher
34 #define	fast	lfast
35 #define	slow	lslow
36 #define	dissect	ldissect
37 #define	backref	lbackref
38 #define	step	lstep
39 #define	print	lprint
40 #define	at	lat
41 #define	match	lmat
42 #endif
43 
44 /* another structure passed up and down to avoid zillions of parameters */
45 struct match {
46 	struct re_guts *g;
47 	int eflags;
48 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
49 	char *offp;		/* offsets work from here */
50 	char *beginp;		/* start of string -- virtual NUL precedes */
51 	char *endp;		/* end of string -- virtual NUL here */
52 	char *coldp;		/* can be no match starting before here */
53 	char **lastpos;		/* [nplus+1] */
54 	STATEVARS;
55 	states st;		/* current states */
56 	states fresh;		/* states for a fresh start */
57 	states tmp;		/* temporary */
58 	states empty;		/* empty set of states */
59 };
60 
61 /* ========= begin header generated by ./mkh ========= */
62 #ifdef __cplusplus
63 extern "C" {
64 #endif
65 
66 /* === engine.c === */
67 static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
68 static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
69 static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
70 static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
71 static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
72 static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
73 #define	BOL	(OUT+1)
74 #define	EOL	(BOL+1)
75 #define	BOLEOL	(BOL+2)
76 #define	NOTHING	(BOL+3)
77 #define	BOW	(BOL+4)
78 #define	EOW	(BOL+5)
79 #define	CODEMAX	(BOL+5)		/* highest code used */
80 #define	NONCHAR(c)	((c) > CHAR_MAX)
81 #define	NNONCHAR	(CODEMAX-CHAR_MAX)
82 #ifdef REDEBUG
83 static void print(struct match *m, char *caption, states st, int ch, FILE *d);
84 #endif
85 #ifdef REDEBUG
86 static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
87 #endif
88 #ifdef REDEBUG
89 static char *pchar(int ch);
90 #endif
91 
92 #ifdef __cplusplus
93 }
94 #endif
95 /* ========= end header generated by ./mkh ========= */
96 
97 #ifdef REDEBUG
98 #define	SP(t, s, c)	print(m, t, s, c, stdout)
99 #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
100 #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
101 #else
102 #define	SP(t, s, c)	/* nothing */
103 #define	AT(t, p1, p2, s1, s2)	/* nothing */
104 #define	NOTE(s)	/* nothing */
105 #endif
106 
107 /*
108  - matcher - the actual matching engine
109  == static int matcher(register struct re_guts *g, char *string, \
110  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
111  */
112 static int			/* 0 success, REG_NOMATCH failure */
113 matcher(g, string, nmatch, pmatch, eflags)
114 register struct re_guts *g;
115 char *string;
116 size_t nmatch;
117 regmatch_t pmatch[];
118 int eflags;
119 {
120 	register char *endp;
121 	register int i;
122 	struct match mv;
123 	register struct match *m = &mv;
124 	register char *dp;
125 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
126 	const register sopno gl = g->laststate;
127 	char *start;
128 	char *stop;
129 
130 	/* simplify the situation where possible */
131 	if (g->cflags&REG_NOSUB)
132 		nmatch = 0;
133 	if (eflags&REG_STARTEND) {
134 		start = string + pmatch[0].rm_so;
135 		stop = string + pmatch[0].rm_eo;
136 	} else {
137 		start = string;
138 		stop = start + strlen(start);
139 	}
140 	if (stop < start)
141 		return(REG_INVARG);
142 
143 	/* prescreening; this does wonders for this rather slow code */
144 	if (g->must != NULL) {
145 		for (dp = start; dp < stop; dp++)
146 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
147 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
148 				break;
149 		if (dp == stop)		/* we didn't find g->must */
150 			return(REG_NOMATCH);
151 	}
152 
153 	/* match struct setup */
154 	m->g = g;
155 	m->eflags = eflags;
156 	m->pmatch = NULL;
157 	m->lastpos = NULL;
158 	m->offp = string;
159 	m->beginp = start;
160 	m->endp = stop;
161 	STATESETUP(m, 4);
162 	SETUP(m->st);
163 	SETUP(m->fresh);
164 	SETUP(m->tmp);
165 	SETUP(m->empty);
166 	CLEAR(m->empty);
167 
168 	/* this loop does only one repetition except for backrefs */
169 	for (;;) {
170 		endp = fast(m, start, stop, gf, gl);
171 		if (endp == NULL) {		/* a miss */
172 			STATETEARDOWN(m);
173 			return(REG_NOMATCH);
174 		}
175 		if (nmatch == 0 && !g->backrefs)
176 			break;		/* no further info needed */
177 
178 		/* where? */
179 		assert(m->coldp != NULL);
180 		for (;;) {
181 			NOTE("finding start");
182 			endp = slow(m, m->coldp, stop, gf, gl);
183 			if (endp != NULL)
184 				break;
185 			assert(m->coldp < m->endp);
186 			m->coldp++;
187 		}
188 		if (nmatch == 1 && !g->backrefs)
189 			break;		/* no further info needed */
190 
191 		/* oh my, he wants the subexpressions... */
192 		if (m->pmatch == NULL)
193 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
194 							sizeof(regmatch_t));
195 		if (m->pmatch == NULL) {
196 			STATETEARDOWN(m);
197 			return(REG_ESPACE);
198 		}
199 		for (i = 1; i <= m->g->nsub; i++)
200 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
201 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
202 			NOTE("dissecting");
203 			dp = dissect(m, m->coldp, endp, gf, gl);
204 		} else {
205 			if (g->nplus > 0 && m->lastpos == NULL)
206 				m->lastpos = (char **)malloc((g->nplus+1) *
207 							sizeof(char *));
208 			if (g->nplus > 0 && m->lastpos == NULL) {
209 				free(m->pmatch);
210 				STATETEARDOWN(m);
211 				return(REG_ESPACE);
212 			}
213 			NOTE("backref dissect");
214 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
215 		}
216 		if (dp != NULL)
217 			break;
218 
219 		/* uh-oh... we couldn't find a subexpression-level match */
220 		assert(g->backrefs);	/* must be back references doing it */
221 		assert(g->nplus == 0 || m->lastpos != NULL);
222 		for (;;) {
223 			if (dp != NULL || endp <= m->coldp)
224 				break;		/* defeat */
225 			NOTE("backoff");
226 			endp = slow(m, m->coldp, endp-1, gf, gl);
227 			if (endp == NULL)
228 				break;		/* defeat */
229 			/* try it on a shorter possibility */
230 #ifndef NDEBUG
231 			for (i = 1; i <= m->g->nsub; i++) {
232 				assert(m->pmatch[i].rm_so == -1);
233 				assert(m->pmatch[i].rm_eo == -1);
234 			}
235 #endif
236 			NOTE("backoff dissect");
237 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
238 		}
239 		assert(dp == NULL || dp == endp);
240 		if (dp != NULL)		/* found a shorter one */
241 			break;
242 
243 		/* despite initial appearances, there is no match here */
244 		NOTE("false alarm");
245 		start = m->coldp + 1;	/* recycle starting later */
246 		assert(start <= stop);
247 	}
248 
249 	/* fill in the details if requested */
250 	if (nmatch > 0) {
251 		pmatch[0].rm_so = m->coldp - m->offp;
252 		pmatch[0].rm_eo = endp - m->offp;
253 	}
254 	if (nmatch > 1) {
255 		assert(m->pmatch != NULL);
256 		for (i = 1; i < nmatch; i++)
257 			if (i <= m->g->nsub)
258 				pmatch[i] = m->pmatch[i];
259 			else {
260 				pmatch[i].rm_so = -1;
261 				pmatch[i].rm_eo = -1;
262 			}
263 	}
264 
265 	if (m->pmatch != NULL)
266 		free((char *)m->pmatch);
267 	if (m->lastpos != NULL)
268 		free((char *)m->lastpos);
269 	STATETEARDOWN(m);
270 	return(0);
271 }
272 
273 /*
274  - dissect - figure out what matched what, no back references
275  == static char *dissect(register struct match *m, char *start, \
276  ==	char *stop, sopno startst, sopno stopst);
277  */
278 static char *			/* == stop (success) always */
279 dissect(m, start, stop, startst, stopst)
280 register struct match *m;
281 char *start;
282 char *stop;
283 sopno startst;
284 sopno stopst;
285 {
286 	register int i;
287 	register sopno ss;	/* start sop of current subRE */
288 	register sopno es;	/* end sop of current subRE */
289 	register char *sp;	/* start of string matched by it */
290 	register char *stp;	/* string matched by it cannot pass here */
291 	register char *rest;	/* start of rest of string */
292 	register char *tail;	/* string unmatched by rest of RE */
293 	register sopno ssub;	/* start sop of subsubRE */
294 	register sopno esub;	/* end sop of subsubRE */
295 	register char *ssp;	/* start of string matched by subsubRE */
296 	register char *sep;	/* end of string matched by subsubRE */
297 	register char *oldssp;	/* previous ssp */
298 	register char *dp;
299 
300 	AT("diss", start, stop, startst, stopst);
301 	sp = start;
302 	for (ss = startst; ss < stopst; ss = es) {
303 		/* identify end of subRE */
304 		es = ss;
305 		switch (OP(m->g->strip[es])) {
306 		case OPLUS_:
307 		case OQUEST_:
308 			es += OPND(m->g->strip[es]);
309 			break;
310 		case OCH_:
311 			while (OP(m->g->strip[es]) != O_CH)
312 				es += OPND(m->g->strip[es]);
313 			break;
314 		}
315 		es++;
316 
317 		/* figure out what it matched */
318 		switch (OP(m->g->strip[ss])) {
319 		case OEND:
320 			assert(nope);
321 			break;
322 		case OCHAR:
323 			sp++;
324 			break;
325 		case OBOL:
326 		case OEOL:
327 		case OBOW:
328 		case OEOW:
329 			break;
330 		case OANY:
331 		case OANYOF:
332 			sp++;
333 			break;
334 		case OBACK_:
335 		case O_BACK:
336 			assert(nope);
337 			break;
338 		/* cases where length of match is hard to find */
339 		case OQUEST_:
340 			stp = stop;
341 			for (;;) {
342 				/* how long could this one be? */
343 				rest = slow(m, sp, stp, ss, es);
344 				assert(rest != NULL);	/* it did match */
345 				/* could the rest match the rest? */
346 				tail = slow(m, rest, stop, es, stopst);
347 				if (tail == stop)
348 					break;		/* yes! */
349 				/* no -- try a shorter match for this one */
350 				stp = rest - 1;
351 				assert(stp >= sp);	/* it did work */
352 			}
353 			ssub = ss + 1;
354 			esub = es - 1;
355 			/* did innards match? */
356 			if (slow(m, sp, rest, ssub, esub) != NULL) {
357 				dp = dissect(m, sp, rest, ssub, esub);
358 				assert(dp == rest);
359 			} else		/* no */
360 				assert(sp == rest);
361 			sp = rest;
362 			break;
363 		case OPLUS_:
364 			stp = stop;
365 			for (;;) {
366 				/* how long could this one be? */
367 				rest = slow(m, sp, stp, ss, es);
368 				assert(rest != NULL);	/* it did match */
369 				/* could the rest match the rest? */
370 				tail = slow(m, rest, stop, es, stopst);
371 				if (tail == stop)
372 					break;		/* yes! */
373 				/* no -- try a shorter match for this one */
374 				stp = rest - 1;
375 				assert(stp >= sp);	/* it did work */
376 			}
377 			ssub = ss + 1;
378 			esub = es - 1;
379 			ssp = sp;
380 			oldssp = ssp;
381 			for (;;) {	/* find last match of innards */
382 				sep = slow(m, ssp, rest, ssub, esub);
383 				if (sep == NULL || sep == ssp)
384 					break;	/* failed or matched null */
385 				oldssp = ssp;	/* on to next try */
386 				ssp = sep;
387 			}
388 			if (sep == NULL) {
389 				/* last successful match */
390 				sep = ssp;
391 				ssp = oldssp;
392 			}
393 			assert(sep == rest);	/* must exhaust substring */
394 			assert(slow(m, ssp, sep, ssub, esub) == rest);
395 			dp = dissect(m, ssp, sep, ssub, esub);
396 			assert(dp == sep);
397 			sp = rest;
398 			break;
399 		case OCH_:
400 			stp = stop;
401 			for (;;) {
402 				/* how long could this one be? */
403 				rest = slow(m, sp, stp, ss, es);
404 				assert(rest != NULL);	/* it did match */
405 				/* could the rest match the rest? */
406 				tail = slow(m, rest, stop, es, stopst);
407 				if (tail == stop)
408 					break;		/* yes! */
409 				/* no -- try a shorter match for this one */
410 				stp = rest - 1;
411 				assert(stp >= sp);	/* it did work */
412 			}
413 			ssub = ss + 1;
414 			esub = ss + OPND(m->g->strip[ss]) - 1;
415 			assert(OP(m->g->strip[esub]) == OOR1);
416 			for (;;) {	/* find first matching branch */
417 				if (slow(m, sp, rest, ssub, esub) == rest)
418 					break;	/* it matched all of it */
419 				/* that one missed, try next one */
420 				assert(OP(m->g->strip[esub]) == OOR1);
421 				esub++;
422 				assert(OP(m->g->strip[esub]) == OOR2);
423 				ssub = esub + 1;
424 				esub += OPND(m->g->strip[esub]);
425 				if (OP(m->g->strip[esub]) == OOR2)
426 					esub--;
427 				else
428 					assert(OP(m->g->strip[esub]) == O_CH);
429 			}
430 			dp = dissect(m, sp, rest, ssub, esub);
431 			assert(dp == rest);
432 			sp = rest;
433 			break;
434 		case O_PLUS:
435 		case O_QUEST:
436 		case OOR1:
437 		case OOR2:
438 		case O_CH:
439 			assert(nope);
440 			break;
441 		case OLPAREN:
442 			i = OPND(m->g->strip[ss]);
443 			assert(0 < i && i <= m->g->nsub);
444 			m->pmatch[i].rm_so = sp - m->offp;
445 			break;
446 		case ORPAREN:
447 			i = OPND(m->g->strip[ss]);
448 			assert(0 < i && i <= m->g->nsub);
449 			m->pmatch[i].rm_eo = sp - m->offp;
450 			break;
451 		default:		/* uh oh */
452 			assert(nope);
453 			break;
454 		}
455 	}
456 
457 	assert(sp == stop);
458 	return(sp);
459 }
460 
461 /*
462  - backref - figure out what matched what, figuring in back references
463  == static char *backref(register struct match *m, char *start, \
464  ==	char *stop, sopno startst, sopno stopst, sopno lev);
465  */
466 static char *			/* == stop (success) or NULL (failure) */
467 backref(m, start, stop, startst, stopst, lev)
468 register struct match *m;
469 char *start;
470 char *stop;
471 sopno startst;
472 sopno stopst;
473 sopno lev;			/* PLUS nesting level */
474 {
475 	register int i;
476 	register sopno ss;	/* start sop of current subRE */
477 	register char *sp;	/* start of string matched by it */
478 	register sopno ssub;	/* start sop of subsubRE */
479 	register sopno esub;	/* end sop of subsubRE */
480 	register char *ssp;	/* start of string matched by subsubRE */
481 	register char *dp;
482 	register size_t len;
483 	register int hard;
484 	register sop s;
485 	register regoff_t offsave;
486 	register cset *cs;
487 
488 	AT("back", start, stop, startst, stopst);
489 	sp = start;
490 
491 	/* get as far as we can with easy stuff */
492 	hard = 0;
493 	for (ss = startst; !hard && ss < stopst; ss++)
494 		switch (OP(s = m->g->strip[ss])) {
495 		case OCHAR:
496 			if (sp == stop || *sp++ != (char)OPND(s))
497 				return(NULL);
498 			break;
499 		case OANY:
500 			if (sp == stop)
501 				return(NULL);
502 			sp++;
503 			break;
504 		case OANYOF:
505 			cs = &m->g->sets[OPND(s)];
506 			if (sp == stop || !CHIN(cs, *sp++))
507 				return(NULL);
508 			break;
509 		case OBOL:
510 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
511 					(sp < m->endp && *(sp-1) == '\n' &&
512 						(m->g->cflags&REG_NEWLINE)) )
513 				{ /* yes */ }
514 			else
515 				return(NULL);
516 			break;
517 		case OEOL:
518 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
519 					(sp < m->endp && *sp == '\n' &&
520 						(m->g->cflags&REG_NEWLINE)) )
521 				{ /* yes */ }
522 			else
523 				return(NULL);
524 			break;
525 		case OBOW:
526 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
527 					(sp < m->endp && *(sp-1) == '\n' &&
528 						(m->g->cflags&REG_NEWLINE)) ||
529 					(sp > m->beginp &&
530 							!ISWORD(*(sp-1))) ) &&
531 					(sp < m->endp && ISWORD(*sp)) )
532 				{ /* yes */ }
533 			else
534 				return(NULL);
535 			break;
536 		case OEOW:
537 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
538 					(sp < m->endp && *sp == '\n' &&
539 						(m->g->cflags&REG_NEWLINE)) ||
540 					(sp < m->endp && !ISWORD(*sp)) ) &&
541 					(sp > m->beginp && ISWORD(*(sp-1))) )
542 				{ /* yes */ }
543 			else
544 				return(NULL);
545 			break;
546 		case O_QUEST:
547 			break;
548 		case OOR1:	/* matches null but needs to skip */
549 			ss++;
550 			s = m->g->strip[ss];
551 			do {
552 				assert(OP(s) == OOR2);
553 				ss += OPND(s);
554 			} while (OP(s = m->g->strip[ss]) != O_CH);
555 			/* note that the ss++ gets us past the O_CH */
556 			break;
557 		default:	/* have to make a choice */
558 			hard = 1;
559 			break;
560 		}
561 	if (!hard) {		/* that was it! */
562 		if (sp != stop)
563 			return(NULL);
564 		return(sp);
565 	}
566 	ss--;			/* adjust for the for's final increment */
567 
568 	/* the hard stuff */
569 	AT("hard", sp, stop, ss, stopst);
570 	s = m->g->strip[ss];
571 	switch (OP(s)) {
572 	case OBACK_:		/* the vilest depths */
573 		i = OPND(s);
574 		assert(0 < i && i <= m->g->nsub);
575 		if (m->pmatch[i].rm_eo == -1)
576 			return(NULL);
577 		assert(m->pmatch[i].rm_so != -1);
578 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
579 		assert(stop - m->beginp >= len);
580 		if (sp > stop - len)
581 			return(NULL);	/* not enough left to match */
582 		ssp = m->offp + m->pmatch[i].rm_so;
583 		if (memcmp(sp, ssp, len) != 0)
584 			return(NULL);
585 		while (m->g->strip[ss] != SOP(O_BACK, i))
586 			ss++;
587 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
588 		break;
589 	case OQUEST_:		/* to null or not */
590 		dp = backref(m, sp, stop, ss+1, stopst, lev);
591 		if (dp != NULL)
592 			return(dp);	/* not */
593 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
594 		break;
595 	case OPLUS_:
596 		assert(m->lastpos != NULL);
597 		assert(lev+1 <= m->g->nplus);
598 		m->lastpos[lev+1] = sp;
599 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
600 		break;
601 	case O_PLUS:
602 		if (sp == m->lastpos[lev])	/* last pass matched null */
603 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
604 		/* try another pass */
605 		m->lastpos[lev] = sp;
606 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
607 		if (dp == NULL)
608 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
609 		else
610 			return(dp);
611 		break;
612 	case OCH_:		/* find the right one, if any */
613 		ssub = ss + 1;
614 		esub = ss + OPND(s) - 1;
615 		assert(OP(m->g->strip[esub]) == OOR1);
616 		for (;;) {	/* find first matching branch */
617 			dp = backref(m, sp, stop, ssub, esub, lev);
618 			if (dp != NULL)
619 				return(dp);
620 			/* that one missed, try next one */
621 			if (OP(m->g->strip[esub]) == O_CH)
622 				return(NULL);	/* there is none */
623 			esub++;
624 			assert(OP(m->g->strip[esub]) == OOR2);
625 			ssub = esub + 1;
626 			esub += OPND(m->g->strip[esub]);
627 			if (OP(m->g->strip[esub]) == OOR2)
628 				esub--;
629 			else
630 				assert(OP(m->g->strip[esub]) == O_CH);
631 		}
632 		break;
633 	case OLPAREN:		/* must undo assignment if rest fails */
634 		i = OPND(s);
635 		assert(0 < i && i <= m->g->nsub);
636 		offsave = m->pmatch[i].rm_so;
637 		m->pmatch[i].rm_so = sp - m->offp;
638 		dp = backref(m, sp, stop, ss+1, stopst, lev);
639 		if (dp != NULL)
640 			return(dp);
641 		m->pmatch[i].rm_so = offsave;
642 		return(NULL);
643 		break;
644 	case ORPAREN:		/* must undo assignment if rest fails */
645 		i = OPND(s);
646 		assert(0 < i && i <= m->g->nsub);
647 		offsave = m->pmatch[i].rm_eo;
648 		m->pmatch[i].rm_eo = sp - m->offp;
649 		dp = backref(m, sp, stop, ss+1, stopst, lev);
650 		if (dp != NULL)
651 			return(dp);
652 		m->pmatch[i].rm_eo = offsave;
653 		return(NULL);
654 		break;
655 	default:		/* uh oh */
656 		assert(nope);
657 		break;
658 	}
659 
660 	/* "can't happen" */
661 	assert(nope);
662 	/* NOTREACHED */
663 }
664 
665 /*
666  - fast - step through the string at top speed
667  == static char *fast(register struct match *m, char *start, \
668  ==	char *stop, sopno startst, sopno stopst);
669  */
670 static char *			/* where tentative match ended, or NULL */
671 fast(m, start, stop, startst, stopst)
672 register struct match *m;
673 char *start;
674 char *stop;
675 sopno startst;
676 sopno stopst;
677 {
678 	register states st = m->st;
679 	register states fresh = m->fresh;
680 	register states tmp = m->tmp;
681 	register char *p = start;
682 	register int c = (start == m->beginp) ? OUT : *(start-1);
683 	register int lastc;	/* previous c */
684 	register int flagch;
685 	register int i;
686 	register char *coldp;	/* last p after which no match was underway */
687 
688 	CLEAR(st);
689 	SET1(st, startst);
690 	st = step(m->g, startst, stopst, st, NOTHING, st);
691 	ASSIGN(fresh, st);
692 	SP("start", st, *p);
693 	coldp = NULL;
694 	for (;;) {
695 		/* next character */
696 		lastc = c;
697 		c = (p == m->endp) ? OUT : *p;
698 		if (EQ(st, fresh))
699 			coldp = p;
700 
701 		/* is there an EOL and/or BOL between lastc and c? */
702 		flagch = '\0';
703 		i = 0;
704 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
705 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
706 			flagch = BOL;
707 			i = m->g->nbol;
708 		}
709 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
710 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
711 			flagch = (flagch == BOL) ? BOLEOL : EOL;
712 			i += m->g->neol;
713 		}
714 		if (i != 0) {
715 			for (; i > 0; i--)
716 				st = step(m->g, startst, stopst, st, flagch, st);
717 			SP("boleol", st, c);
718 		}
719 
720 		/* how about a word boundary? */
721 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
722 					(c != OUT && ISWORD(c)) ) {
723 			flagch = BOW;
724 		}
725 		if ( (lastc != OUT && ISWORD(lastc)) &&
726 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
727 			flagch = EOW;
728 		}
729 		if (flagch == BOW || flagch == EOW) {
730 			st = step(m->g, startst, stopst, st, flagch, st);
731 			SP("boweow", st, c);
732 		}
733 
734 		/* are we done? */
735 		if (ISSET(st, stopst) || p == stop)
736 			break;		/* NOTE BREAK OUT */
737 
738 		/* no, we must deal with this character */
739 		ASSIGN(tmp, st);
740 		ASSIGN(st, fresh);
741 		assert(c != OUT);
742 		st = step(m->g, startst, stopst, tmp, c, st);
743 		SP("aft", st, c);
744 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
745 		p++;
746 	}
747 
748 	assert(coldp != NULL);
749 	m->coldp = coldp;
750 	if (ISSET(st, stopst))
751 		return(p+1);
752 	else
753 		return(NULL);
754 }
755 
756 /*
757  - slow - step through the string more deliberately
758  == static char *slow(register struct match *m, char *start, \
759  ==	char *stop, sopno startst, sopno stopst);
760  */
761 static char *			/* where it ended */
762 slow(m, start, stop, startst, stopst)
763 register struct match *m;
764 char *start;
765 char *stop;
766 sopno startst;
767 sopno stopst;
768 {
769 	register states st = m->st;
770 	register states empty = m->empty;
771 	register states tmp = m->tmp;
772 	register char *p = start;
773 	register int c = (start == m->beginp) ? OUT : *(start-1);
774 	register int lastc;	/* previous c */
775 	register int flagch;
776 	register int i;
777 	register char *matchp;	/* last p at which a match ended */
778 
779 	AT("slow", start, stop, startst, stopst);
780 	CLEAR(st);
781 	SET1(st, startst);
782 	SP("sstart", st, *p);
783 	st = step(m->g, startst, stopst, st, NOTHING, st);
784 	matchp = NULL;
785 	for (;;) {
786 		/* next character */
787 		lastc = c;
788 		c = (p == m->endp) ? OUT : *p;
789 
790 		/* is there an EOL and/or BOL between lastc and c? */
791 		flagch = '\0';
792 		i = 0;
793 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
794 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
795 			flagch = BOL;
796 			i = m->g->nbol;
797 		}
798 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
799 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
800 			flagch = (flagch == BOL) ? BOLEOL : EOL;
801 			i += m->g->neol;
802 		}
803 		if (i != 0) {
804 			for (; i > 0; i--)
805 				st = step(m->g, startst, stopst, st, flagch, st);
806 			SP("sboleol", st, c);
807 		}
808 
809 		/* how about a word boundary? */
810 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
811 					(c != OUT && ISWORD(c)) ) {
812 			flagch = BOW;
813 		}
814 		if ( (lastc != OUT && ISWORD(lastc)) &&
815 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
816 			flagch = EOW;
817 		}
818 		if (flagch == BOW || flagch == EOW) {
819 			st = step(m->g, startst, stopst, st, flagch, st);
820 			SP("sboweow", st, c);
821 		}
822 
823 		/* are we done? */
824 		if (ISSET(st, stopst))
825 			matchp = p;
826 		if (EQ(st, empty) || p == stop)
827 			break;		/* NOTE BREAK OUT */
828 
829 		/* no, we must deal with this character */
830 		ASSIGN(tmp, st);
831 		ASSIGN(st, empty);
832 		assert(c != OUT);
833 		st = step(m->g, startst, stopst, tmp, c, st);
834 		SP("saft", st, c);
835 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
836 		p++;
837 	}
838 
839 	return(matchp);
840 }
841 
842 
843 /*
844  - step - map set of states reachable before char to set reachable after
845  == static states step(register struct re_guts *g, sopno start, sopno stop, \
846  ==	register states bef, int ch, register states aft);
847  == #define	BOL	(OUT+1)
848  == #define	EOL	(BOL+1)
849  == #define	BOLEOL	(BOL+2)
850  == #define	NOTHING	(BOL+3)
851  == #define	BOW	(BOL+4)
852  == #define	EOW	(BOL+5)
853  == #define	CODEMAX	(BOL+5)		// highest code used
854  == #define	NONCHAR(c)	((c) > CHAR_MAX)
855  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
856  */
857 static states
858 step(g, start, stop, bef, ch, aft)
859 register struct re_guts *g;
860 sopno start;			/* start state within strip */
861 sopno stop;			/* state after stop state within strip */
862 register states bef;		/* states reachable before */
863 int ch;				/* character or NONCHAR code */
864 register states aft;		/* states already known reachable after */
865 {
866 	register cset *cs;
867 	register sop s;
868 	register sopno pc;
869 	register onestate here;		/* note, macros know this name */
870 	register sopno look;
871 	register int i;
872 
873 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
874 		s = g->strip[pc];
875 		switch (OP(s)) {
876 		case OEND:
877 			assert(pc == stop-1);
878 			break;
879 		case OCHAR:
880 			/* only characters can match */
881 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
882 			if (ch == (char)OPND(s))
883 				FWD(aft, bef, 1);
884 			break;
885 		case OBOL:
886 			if (ch == BOL || ch == BOLEOL)
887 				FWD(aft, bef, 1);
888 			break;
889 		case OEOL:
890 			if (ch == EOL || ch == BOLEOL)
891 				FWD(aft, bef, 1);
892 			break;
893 		case OBOW:
894 			if (ch == BOW)
895 				FWD(aft, bef, 1);
896 			break;
897 		case OEOW:
898 			if (ch == EOW)
899 				FWD(aft, bef, 1);
900 			break;
901 		case OANY:
902 			if (!NONCHAR(ch))
903 				FWD(aft, bef, 1);
904 			break;
905 		case OANYOF:
906 			cs = &g->sets[OPND(s)];
907 			if (!NONCHAR(ch) && CHIN(cs, ch))
908 				FWD(aft, bef, 1);
909 			break;
910 		case OBACK_:		/* ignored here */
911 		case O_BACK:
912 			FWD(aft, aft, 1);
913 			break;
914 		case OPLUS_:		/* forward, this is just an empty */
915 			FWD(aft, aft, 1);
916 			break;
917 		case O_PLUS:		/* both forward and back */
918 			FWD(aft, aft, 1);
919 			i = ISSETBACK(aft, OPND(s));
920 			BACK(aft, aft, OPND(s));
921 			if (!i && ISSETBACK(aft, OPND(s))) {
922 				/* oho, must reconsider loop body */
923 				pc -= OPND(s) + 1;
924 				INIT(here, pc);
925 			}
926 			break;
927 		case OQUEST_:		/* two branches, both forward */
928 			FWD(aft, aft, 1);
929 			FWD(aft, aft, OPND(s));
930 			break;
931 		case O_QUEST:		/* just an empty */
932 			FWD(aft, aft, 1);
933 			break;
934 		case OLPAREN:		/* not significant here */
935 		case ORPAREN:
936 			FWD(aft, aft, 1);
937 			break;
938 		case OCH_:		/* mark the first two branches */
939 			FWD(aft, aft, 1);
940 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
941 			FWD(aft, aft, OPND(s));
942 			break;
943 		case OOR1:		/* done a branch, find the O_CH */
944 			if (ISSTATEIN(aft, here)) {
945 				for (look = 1;
946 						OP(s = g->strip[pc+look]) != O_CH;
947 						look += OPND(s))
948 					assert(OP(s) == OOR2);
949 				FWD(aft, aft, look);
950 			}
951 			break;
952 		case OOR2:		/* propagate OCH_'s marking */
953 			FWD(aft, aft, 1);
954 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
955 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
956 				FWD(aft, aft, OPND(s));
957 			}
958 			break;
959 		case O_CH:		/* just empty */
960 			FWD(aft, aft, 1);
961 			break;
962 		default:		/* ooooops... */
963 			assert(nope);
964 			break;
965 		}
966 	}
967 
968 	return(aft);
969 }
970 
971 #ifdef REDEBUG
972 /*
973  - print - print a set of states
974  == #ifdef REDEBUG
975  == static void print(struct match *m, char *caption, states st, \
976  ==	int ch, FILE *d);
977  == #endif
978  */
979 static void
980 print(m, caption, st, ch, d)
981 struct match *m;
982 char *caption;
983 states st;
984 int ch;
985 FILE *d;
986 {
987 	register struct re_guts *g = m->g;
988 	register int i;
989 	register int first = 1;
990 
991 	if (!(m->eflags&REG_TRACE))
992 		return;
993 
994 	fprintf(d, "%s", caption);
995 	if (ch != '\0')
996 		fprintf(d, " %s", pchar(ch));
997 	for (i = 0; i < g->nstates; i++)
998 		if (ISSET(st, i)) {
999 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1000 			first = 0;
1001 		}
1002 	fprintf(d, "\n");
1003 }
1004 
1005 /*
1006  - at - print current situation
1007  == #ifdef REDEBUG
1008  == static void at(struct match *m, char *title, char *start, char *stop, \
1009  ==						sopno startst, sopno stopst);
1010  == #endif
1011  */
1012 static void
1013 at(m, title, start, stop, startst, stopst)
1014 struct match *m;
1015 char *title;
1016 char *start;
1017 char *stop;
1018 sopno startst;
1019 sopno stopst;
1020 {
1021 	if (!(m->eflags&REG_TRACE))
1022 		return;
1023 
1024 	printf("%s %s-", title, pchar(*start));
1025 	printf("%s ", pchar(*stop));
1026 	printf("%ld-%ld\n", (long)startst, (long)stopst);
1027 }
1028 
1029 #ifndef PCHARDONE
1030 #define	PCHARDONE	/* never again */
1031 /*
1032  - pchar - make a character printable
1033  == #ifdef REDEBUG
1034  == static char *pchar(int ch);
1035  == #endif
1036  *
1037  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1038  * duplicate here avoids having a debugging-capable regexec.o tied to
1039  * a matching debug.o, and this is convenient.  It all disappears in
1040  * the non-debug compilation anyway, so it doesn't matter much.
1041  */
1042 static char *			/* -> representation */
1043 pchar(ch)
1044 int ch;
1045 {
1046 	static char pbuf[10];
1047 
1048 	if (isprint(ch) || ch == ' ')
1049 		sprintf(pbuf, "%c", ch);
1050 	else
1051 		sprintf(pbuf, "\\%o", ch);
1052 	return(pbuf);
1053 }
1054 #endif
1055 #endif
1056 
1057 #undef	matcher
1058 #undef	fast
1059 #undef	slow
1060 #undef	dissect
1061 #undef	backref
1062 #undef	step
1063 #undef	print
1064 #undef	at
1065 #undef	match
1066