xref: /original-bsd/lib/libc/regex/engine.c (revision 13950b19)
1641822ebSbostic /*-
2fdc51b3dSbostic  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
3fdc51b3dSbostic  * Copyright (c) 1992, 1993, 1994
43a0c2316Sbostic  *	The Regents of the University of California.  All rights reserved.
5641822ebSbostic  *
6641822ebSbostic  * This code is derived from software contributed to Berkeley by
7*13950b19Sbostic  * Henry Spencer.
8641822ebSbostic  *
9641822ebSbostic  * %sccs.include.redist.c%
10641822ebSbostic  *
11*13950b19Sbostic  *	@(#)engine.c	8.5 (Berkeley) 03/20/94
12641822ebSbostic  */
13641822ebSbostic 
14641822ebSbostic /*
15641822ebSbostic  * The matching engine and friends.  This file is #included by regexec.c
16641822ebSbostic  * after suitable #defines of a variety of macros used herein, so that
17641822ebSbostic  * different state representations can be used without duplicating masses
18641822ebSbostic  * of code.
19641822ebSbostic  */
20641822ebSbostic 
21641822ebSbostic #ifdef SNAMES
22641822ebSbostic #define	matcher	smatcher
23641822ebSbostic #define	fast	sfast
24641822ebSbostic #define	slow	sslow
25641822ebSbostic #define	dissect	sdissect
26641822ebSbostic #define	backref	sbackref
27641822ebSbostic #define	step	sstep
28641822ebSbostic #define	print	sprint
29e51f590cSbostic #define	at	sat
30641822ebSbostic #define	match	smat
31641822ebSbostic #endif
32641822ebSbostic #ifdef LNAMES
33641822ebSbostic #define	matcher	lmatcher
34641822ebSbostic #define	fast	lfast
35641822ebSbostic #define	slow	lslow
36641822ebSbostic #define	dissect	ldissect
37641822ebSbostic #define	backref	lbackref
38641822ebSbostic #define	step	lstep
39641822ebSbostic #define	print	lprint
40e51f590cSbostic #define	at	lat
41641822ebSbostic #define	match	lmat
42641822ebSbostic #endif
43641822ebSbostic 
44641822ebSbostic /* another structure passed up and down to avoid zillions of parameters */
45641822ebSbostic struct match {
46641822ebSbostic 	struct re_guts *g;
47641822ebSbostic 	int eflags;
48641822ebSbostic 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
49188b6717Sbostic 	char *offp;		/* offsets work from here */
50188b6717Sbostic 	char *beginp;		/* start of string -- virtual NUL precedes */
51188b6717Sbostic 	char *endp;		/* end of string -- virtual NUL here */
52188b6717Sbostic 	char *coldp;		/* can be no match starting before here */
53188b6717Sbostic 	char **lastpos;		/* [nplus+1] */
54641822ebSbostic 	STATEVARS;
55641822ebSbostic 	states st;		/* current states */
56641822ebSbostic 	states fresh;		/* states for a fresh start */
57641822ebSbostic 	states tmp;		/* temporary */
58641822ebSbostic 	states empty;		/* empty set of states */
59641822ebSbostic };
60641822ebSbostic 
61fdc51b3dSbostic /* ========= begin header generated by ./mkh ========= */
62fdc51b3dSbostic #ifdef __cplusplus
63fdc51b3dSbostic extern "C" {
64fdc51b3dSbostic #endif
65fdc51b3dSbostic 
66fdc51b3dSbostic /* === engine.c === */
670ca11009Sbostic static int matcher __P((struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags));
680ca11009Sbostic static char *dissect __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
690ca11009Sbostic static char *backref __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev));
700ca11009Sbostic static char *fast __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
710ca11009Sbostic static char *slow __P((struct match *m, char *start, char *stop, sopno startst, sopno stopst));
720ca11009Sbostic static states step __P((struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft));
73188b6717Sbostic #define	BOL	(OUT+1)
74188b6717Sbostic #define	EOL	(BOL+1)
75188b6717Sbostic #define	BOLEOL	(BOL+2)
76188b6717Sbostic #define	NOTHING	(BOL+3)
77188b6717Sbostic #define	BOW	(BOL+4)
78188b6717Sbostic #define	EOW	(BOL+5)
79188b6717Sbostic #define	CODEMAX	(BOL+5)		/* highest code used */
80188b6717Sbostic #define	NONCHAR(c)	((c) > CHAR_MAX)
81188b6717Sbostic #define	NNONCHAR	(CODEMAX-CHAR_MAX)
82188b6717Sbostic #ifdef REDEBUG
830ca11009Sbostic static void print __P((struct match *m, char *caption, states st, int ch, FILE *d));
84188b6717Sbostic #endif
85188b6717Sbostic #ifdef REDEBUG
860ca11009Sbostic static void at __P((struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst));
87188b6717Sbostic #endif
88188b6717Sbostic #ifdef REDEBUG
890ca11009Sbostic static char *pchar __P((int ch));
90188b6717Sbostic #endif
91188b6717Sbostic 
92fdc51b3dSbostic #ifdef __cplusplus
93fdc51b3dSbostic }
94fdc51b3dSbostic #endif
95fdc51b3dSbostic /* ========= end header generated by ./mkh ========= */
96fdc51b3dSbostic 
97e51f590cSbostic #ifdef REDEBUG
98e51f590cSbostic #define	SP(t, s, c)	print(m, t, s, c, stdout)
99e51f590cSbostic #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
100e51f590cSbostic #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
101641822ebSbostic #else
102641822ebSbostic #define	SP(t, s, c)	/* nothing */
103e51f590cSbostic #define	AT(t, p1, p2, s1, s2)	/* nothing */
104e51f590cSbostic #define	NOTE(s)	/* nothing */
105641822ebSbostic #endif
106641822ebSbostic 
107641822ebSbostic /*
108641822ebSbostic  - matcher - the actual matching engine
109188b6717Sbostic  == static int matcher(register struct re_guts *g, char *string, \
110188b6717Sbostic  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
111641822ebSbostic  */
112641822ebSbostic static int			/* 0 success, REG_NOMATCH failure */
matcher(g,string,nmatch,pmatch,eflags)113641822ebSbostic matcher(g, string, nmatch, pmatch, eflags)
114641822ebSbostic register struct re_guts *g;
115188b6717Sbostic char *string;
116641822ebSbostic size_t nmatch;
117641822ebSbostic regmatch_t pmatch[];
118641822ebSbostic int eflags;
119641822ebSbostic {
120188b6717Sbostic 	register char *endp;
121641822ebSbostic 	register int i;
122641822ebSbostic 	struct match mv;
123641822ebSbostic 	register struct match *m = &mv;
124188b6717Sbostic 	register char *dp;
125641822ebSbostic 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
126641822ebSbostic 	const register sopno gl = g->laststate;
127188b6717Sbostic 	char *start;
128188b6717Sbostic 	char *stop;
129641822ebSbostic 
130e51f590cSbostic 	/* simplify the situation where possible */
131641822ebSbostic 	if (g->cflags&REG_NOSUB)
132e51f590cSbostic 		nmatch = 0;
133641822ebSbostic 	if (eflags&REG_STARTEND) {
134641822ebSbostic 		start = string + pmatch[0].rm_so;
135641822ebSbostic 		stop = string + pmatch[0].rm_eo;
136641822ebSbostic 	} else {
137641822ebSbostic 		start = string;
138188b6717Sbostic 		stop = start + strlen(start);
139641822ebSbostic 	}
140188b6717Sbostic 	if (stop < start)
141188b6717Sbostic 		return(REG_INVARG);
142641822ebSbostic 
1438afd419dSbostic 	/* prescreening; this does wonders for this rather slow code */
1448afd419dSbostic 	if (g->must != NULL) {
1458afd419dSbostic 		for (dp = start; dp < stop; dp++)
146188b6717Sbostic 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
147188b6717Sbostic 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
1488afd419dSbostic 				break;
1498afd419dSbostic 		if (dp == stop)		/* we didn't find g->must */
1508afd419dSbostic 			return(REG_NOMATCH);
1518afd419dSbostic 	}
1528afd419dSbostic 
153641822ebSbostic 	/* match struct setup */
154641822ebSbostic 	m->g = g;
155641822ebSbostic 	m->eflags = eflags;
156641822ebSbostic 	m->pmatch = NULL;
157641822ebSbostic 	m->lastpos = NULL;
158641822ebSbostic 	m->offp = string;
159641822ebSbostic 	m->beginp = start;
160641822ebSbostic 	m->endp = stop;
161641822ebSbostic 	STATESETUP(m, 4);
162641822ebSbostic 	SETUP(m->st);
163641822ebSbostic 	SETUP(m->fresh);
164641822ebSbostic 	SETUP(m->tmp);
165641822ebSbostic 	SETUP(m->empty);
166641822ebSbostic 	CLEAR(m->empty);
167641822ebSbostic 
168641822ebSbostic 	/* this loop does only one repetition except for backrefs */
169641822ebSbostic 	for (;;) {
170641822ebSbostic 		endp = fast(m, start, stop, gf, gl);
171641822ebSbostic 		if (endp == NULL) {		/* a miss */
172641822ebSbostic 			STATETEARDOWN(m);
173641822ebSbostic 			return(REG_NOMATCH);
174641822ebSbostic 		}
175641822ebSbostic 		if (nmatch == 0 && !g->backrefs)
176641822ebSbostic 			break;		/* no further info needed */
177641822ebSbostic 
178641822ebSbostic 		/* where? */
179641822ebSbostic 		assert(m->coldp != NULL);
180641822ebSbostic 		for (;;) {
181e51f590cSbostic 			NOTE("finding start");
182641822ebSbostic 			endp = slow(m, m->coldp, stop, gf, gl);
183641822ebSbostic 			if (endp != NULL)
184641822ebSbostic 				break;
185e51f590cSbostic 			assert(m->coldp < m->endp);
186641822ebSbostic 			m->coldp++;
187641822ebSbostic 		}
188641822ebSbostic 		if (nmatch == 1 && !g->backrefs)
189641822ebSbostic 			break;		/* no further info needed */
190641822ebSbostic 
191641822ebSbostic 		/* oh my, he wants the subexpressions... */
192641822ebSbostic 		if (m->pmatch == NULL)
193641822ebSbostic 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
194641822ebSbostic 							sizeof(regmatch_t));
195641822ebSbostic 		if (m->pmatch == NULL) {
196641822ebSbostic 			STATETEARDOWN(m);
197641822ebSbostic 			return(REG_ESPACE);
198641822ebSbostic 		}
199e51f590cSbostic 		for (i = 1; i <= m->g->nsub; i++)
200e51f590cSbostic 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
201e51f590cSbostic 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
202e51f590cSbostic 			NOTE("dissecting");
203641822ebSbostic 			dp = dissect(m, m->coldp, endp, gf, gl);
204e51f590cSbostic 		} else {
205641822ebSbostic 			if (g->nplus > 0 && m->lastpos == NULL)
206188b6717Sbostic 				m->lastpos = (char **)malloc((g->nplus+1) *
207188b6717Sbostic 							sizeof(char *));
208641822ebSbostic 			if (g->nplus > 0 && m->lastpos == NULL) {
209641822ebSbostic 				free(m->pmatch);
210641822ebSbostic 				STATETEARDOWN(m);
211641822ebSbostic 				return(REG_ESPACE);
212641822ebSbostic 			}
213e51f590cSbostic 			NOTE("backref dissect");
214641822ebSbostic 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
215641822ebSbostic 		}
216641822ebSbostic 		if (dp != NULL)
217641822ebSbostic 			break;
218641822ebSbostic 
219641822ebSbostic 		/* uh-oh... we couldn't find a subexpression-level match */
220641822ebSbostic 		assert(g->backrefs);	/* must be back references doing it */
221641822ebSbostic 		assert(g->nplus == 0 || m->lastpos != NULL);
222e51f590cSbostic 		for (;;) {
223e51f590cSbostic 			if (dp != NULL || endp <= m->coldp)
224e51f590cSbostic 				break;		/* defeat */
225e51f590cSbostic 			NOTE("backoff");
226e51f590cSbostic 			endp = slow(m, m->coldp, endp-1, gf, gl);
227e51f590cSbostic 			if (endp == NULL)
228e51f590cSbostic 				break;		/* defeat */
229641822ebSbostic 			/* try it on a shorter possibility */
230e51f590cSbostic #ifndef NDEBUG
231641822ebSbostic 			for (i = 1; i <= m->g->nsub; i++) {
232e51f590cSbostic 				assert(m->pmatch[i].rm_so == -1);
233e51f590cSbostic 				assert(m->pmatch[i].rm_eo == -1);
234641822ebSbostic 			}
235e51f590cSbostic #endif
236e51f590cSbostic 			NOTE("backoff dissect");
237641822ebSbostic 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
238641822ebSbostic 		}
239641822ebSbostic 		assert(dp == NULL || dp == endp);
240641822ebSbostic 		if (dp != NULL)		/* found a shorter one */
241641822ebSbostic 			break;
242641822ebSbostic 
243641822ebSbostic 		/* despite initial appearances, there is no match here */
244e51f590cSbostic 		NOTE("false alarm");
245641822ebSbostic 		start = m->coldp + 1;	/* recycle starting later */
246641822ebSbostic 		assert(start <= stop);
247641822ebSbostic 	}
248641822ebSbostic 
249641822ebSbostic 	/* fill in the details if requested */
250641822ebSbostic 	if (nmatch > 0) {
251641822ebSbostic 		pmatch[0].rm_so = m->coldp - m->offp;
252641822ebSbostic 		pmatch[0].rm_eo = endp - m->offp;
253641822ebSbostic 	}
254641822ebSbostic 	if (nmatch > 1) {
255641822ebSbostic 		assert(m->pmatch != NULL);
256641822ebSbostic 		for (i = 1; i < nmatch; i++)
257641822ebSbostic 			if (i <= m->g->nsub)
258641822ebSbostic 				pmatch[i] = m->pmatch[i];
259641822ebSbostic 			else {
260641822ebSbostic 				pmatch[i].rm_so = -1;
261641822ebSbostic 				pmatch[i].rm_eo = -1;
262641822ebSbostic 			}
263641822ebSbostic 	}
264641822ebSbostic 
265641822ebSbostic 	if (m->pmatch != NULL)
266641822ebSbostic 		free((char *)m->pmatch);
267641822ebSbostic 	if (m->lastpos != NULL)
268641822ebSbostic 		free((char *)m->lastpos);
269641822ebSbostic 	STATETEARDOWN(m);
270641822ebSbostic 	return(0);
271641822ebSbostic }
272641822ebSbostic 
273641822ebSbostic /*
274641822ebSbostic  - dissect - figure out what matched what, no back references
275188b6717Sbostic  == static char *dissect(register struct match *m, char *start, \
276188b6717Sbostic  ==	char *stop, sopno startst, sopno stopst);
277641822ebSbostic  */
278188b6717Sbostic static char *			/* == stop (success) always */
dissect(m,start,stop,startst,stopst)279641822ebSbostic dissect(m, start, stop, startst, stopst)
280641822ebSbostic register struct match *m;
281188b6717Sbostic char *start;
282188b6717Sbostic char *stop;
283641822ebSbostic sopno startst;
284641822ebSbostic sopno stopst;
285641822ebSbostic {
286641822ebSbostic 	register int i;
287641822ebSbostic 	register sopno ss;	/* start sop of current subRE */
288641822ebSbostic 	register sopno es;	/* end sop of current subRE */
289188b6717Sbostic 	register char *sp;	/* start of string matched by it */
290188b6717Sbostic 	register char *stp;	/* string matched by it cannot pass here */
291188b6717Sbostic 	register char *rest;	/* start of rest of string */
292188b6717Sbostic 	register char *tail;	/* string unmatched by rest of RE */
293641822ebSbostic 	register sopno ssub;	/* start sop of subsubRE */
294641822ebSbostic 	register sopno esub;	/* end sop of subsubRE */
295188b6717Sbostic 	register char *ssp;	/* start of string matched by subsubRE */
296188b6717Sbostic 	register char *sep;	/* end of string matched by subsubRE */
297188b6717Sbostic 	register char *oldssp;	/* previous ssp */
298188b6717Sbostic 	register char *dp;
299641822ebSbostic 
300e51f590cSbostic 	AT("diss", start, stop, startst, stopst);
301641822ebSbostic 	sp = start;
302641822ebSbostic 	for (ss = startst; ss < stopst; ss = es) {
303641822ebSbostic 		/* identify end of subRE */
304641822ebSbostic 		es = ss;
305641822ebSbostic 		switch (OP(m->g->strip[es])) {
306641822ebSbostic 		case OPLUS_:
307641822ebSbostic 		case OQUEST_:
308641822ebSbostic 			es += OPND(m->g->strip[es]);
309641822ebSbostic 			break;
310641822ebSbostic 		case OCH_:
311641822ebSbostic 			while (OP(m->g->strip[es]) != O_CH)
312641822ebSbostic 				es += OPND(m->g->strip[es]);
313641822ebSbostic 			break;
314641822ebSbostic 		}
315641822ebSbostic 		es++;
316641822ebSbostic 
317641822ebSbostic 		/* figure out what it matched */
318641822ebSbostic 		switch (OP(m->g->strip[ss])) {
319641822ebSbostic 		case OEND:
320188b6717Sbostic 			assert(nope);
321641822ebSbostic 			break;
322641822ebSbostic 		case OCHAR:
323641822ebSbostic 			sp++;
324641822ebSbostic 			break;
325641822ebSbostic 		case OBOL:
326641822ebSbostic 		case OEOL:
327188b6717Sbostic 		case OBOW:
328188b6717Sbostic 		case OEOW:
329641822ebSbostic 			break;
330641822ebSbostic 		case OANY:
331641822ebSbostic 		case OANYOF:
332641822ebSbostic 			sp++;
333641822ebSbostic 			break;
334641822ebSbostic 		case OBACK_:
335641822ebSbostic 		case O_BACK:
336188b6717Sbostic 			assert(nope);
337641822ebSbostic 			break;
338641822ebSbostic 		/* cases where length of match is hard to find */
339641822ebSbostic 		case OQUEST_:
340641822ebSbostic 			stp = stop;
341641822ebSbostic 			for (;;) {
342641822ebSbostic 				/* how long could this one be? */
343641822ebSbostic 				rest = slow(m, sp, stp, ss, es);
344641822ebSbostic 				assert(rest != NULL);	/* it did match */
345641822ebSbostic 				/* could the rest match the rest? */
346641822ebSbostic 				tail = slow(m, rest, stop, es, stopst);
347641822ebSbostic 				if (tail == stop)
348641822ebSbostic 					break;		/* yes! */
349641822ebSbostic 				/* no -- try a shorter match for this one */
350641822ebSbostic 				stp = rest - 1;
351641822ebSbostic 				assert(stp >= sp);	/* it did work */
352641822ebSbostic 			}
353641822ebSbostic 			ssub = ss + 1;
354641822ebSbostic 			esub = es - 1;
355641822ebSbostic 			/* did innards match? */
356641822ebSbostic 			if (slow(m, sp, rest, ssub, esub) != NULL) {
357641822ebSbostic 				dp = dissect(m, sp, rest, ssub, esub);
358641822ebSbostic 				assert(dp == rest);
359641822ebSbostic 			} else		/* no */
360641822ebSbostic 				assert(sp == rest);
361641822ebSbostic 			sp = rest;
362641822ebSbostic 			break;
363641822ebSbostic 		case OPLUS_:
364641822ebSbostic 			stp = stop;
365641822ebSbostic 			for (;;) {
366641822ebSbostic 				/* how long could this one be? */
367641822ebSbostic 				rest = slow(m, sp, stp, ss, es);
368641822ebSbostic 				assert(rest != NULL);	/* it did match */
369641822ebSbostic 				/* could the rest match the rest? */
370641822ebSbostic 				tail = slow(m, rest, stop, es, stopst);
371641822ebSbostic 				if (tail == stop)
372641822ebSbostic 					break;		/* yes! */
373641822ebSbostic 				/* no -- try a shorter match for this one */
374641822ebSbostic 				stp = rest - 1;
375641822ebSbostic 				assert(stp >= sp);	/* it did work */
376641822ebSbostic 			}
377641822ebSbostic 			ssub = ss + 1;
378641822ebSbostic 			esub = es - 1;
379641822ebSbostic 			ssp = sp;
380641822ebSbostic 			oldssp = ssp;
381641822ebSbostic 			for (;;) {	/* find last match of innards */
382641822ebSbostic 				sep = slow(m, ssp, rest, ssub, esub);
383641822ebSbostic 				if (sep == NULL || sep == ssp)
384641822ebSbostic 					break;	/* failed or matched null */
385641822ebSbostic 				oldssp = ssp;	/* on to next try */
386641822ebSbostic 				ssp = sep;
387641822ebSbostic 			}
388641822ebSbostic 			if (sep == NULL) {
389641822ebSbostic 				/* last successful match */
390641822ebSbostic 				sep = ssp;
391641822ebSbostic 				ssp = oldssp;
392641822ebSbostic 			}
393641822ebSbostic 			assert(sep == rest);	/* must exhaust substring */
394641822ebSbostic 			assert(slow(m, ssp, sep, ssub, esub) == rest);
395641822ebSbostic 			dp = dissect(m, ssp, sep, ssub, esub);
396641822ebSbostic 			assert(dp == sep);
397641822ebSbostic 			sp = rest;
398641822ebSbostic 			break;
399641822ebSbostic 		case OCH_:
400641822ebSbostic 			stp = stop;
401641822ebSbostic 			for (;;) {
402641822ebSbostic 				/* how long could this one be? */
403641822ebSbostic 				rest = slow(m, sp, stp, ss, es);
404641822ebSbostic 				assert(rest != NULL);	/* it did match */
405641822ebSbostic 				/* could the rest match the rest? */
406641822ebSbostic 				tail = slow(m, rest, stop, es, stopst);
407641822ebSbostic 				if (tail == stop)
408641822ebSbostic 					break;		/* yes! */
409641822ebSbostic 				/* no -- try a shorter match for this one */
410641822ebSbostic 				stp = rest - 1;
411641822ebSbostic 				assert(stp >= sp);	/* it did work */
412641822ebSbostic 			}
413641822ebSbostic 			ssub = ss + 1;
414641822ebSbostic 			esub = ss + OPND(m->g->strip[ss]) - 1;
415641822ebSbostic 			assert(OP(m->g->strip[esub]) == OOR1);
416641822ebSbostic 			for (;;) {	/* find first matching branch */
417641822ebSbostic 				if (slow(m, sp, rest, ssub, esub) == rest)
418641822ebSbostic 					break;	/* it matched all of it */
419641822ebSbostic 				/* that one missed, try next one */
420641822ebSbostic 				assert(OP(m->g->strip[esub]) == OOR1);
421641822ebSbostic 				esub++;
422641822ebSbostic 				assert(OP(m->g->strip[esub]) == OOR2);
423641822ebSbostic 				ssub = esub + 1;
424641822ebSbostic 				esub += OPND(m->g->strip[esub]);
425641822ebSbostic 				if (OP(m->g->strip[esub]) == OOR2)
426641822ebSbostic 					esub--;
427641822ebSbostic 				else
428641822ebSbostic 					assert(OP(m->g->strip[esub]) == O_CH);
429641822ebSbostic 			}
430641822ebSbostic 			dp = dissect(m, sp, rest, ssub, esub);
431641822ebSbostic 			assert(dp == rest);
432641822ebSbostic 			sp = rest;
433641822ebSbostic 			break;
434641822ebSbostic 		case O_PLUS:
435641822ebSbostic 		case O_QUEST:
436641822ebSbostic 		case OOR1:
437641822ebSbostic 		case OOR2:
438641822ebSbostic 		case O_CH:
439188b6717Sbostic 			assert(nope);
440641822ebSbostic 			break;
441641822ebSbostic 		case OLPAREN:
442641822ebSbostic 			i = OPND(m->g->strip[ss]);
443fdc51b3dSbostic 			assert(0 < i && i <= m->g->nsub);
444641822ebSbostic 			m->pmatch[i].rm_so = sp - m->offp;
445641822ebSbostic 			break;
446641822ebSbostic 		case ORPAREN:
447641822ebSbostic 			i = OPND(m->g->strip[ss]);
448fdc51b3dSbostic 			assert(0 < i && i <= m->g->nsub);
449641822ebSbostic 			m->pmatch[i].rm_eo = sp - m->offp;
450641822ebSbostic 			break;
451641822ebSbostic 		default:		/* uh oh */
452188b6717Sbostic 			assert(nope);
453641822ebSbostic 			break;
454641822ebSbostic 		}
455641822ebSbostic 	}
456641822ebSbostic 
457641822ebSbostic 	assert(sp == stop);
458641822ebSbostic 	return(sp);
459641822ebSbostic }
460641822ebSbostic 
461641822ebSbostic /*
462641822ebSbostic  - backref - figure out what matched what, figuring in back references
463188b6717Sbostic  == static char *backref(register struct match *m, char *start, \
464188b6717Sbostic  ==	char *stop, sopno startst, sopno stopst, sopno lev);
465641822ebSbostic  */
466188b6717Sbostic static char *			/* == stop (success) or NULL (failure) */
backref(m,start,stop,startst,stopst,lev)467641822ebSbostic backref(m, start, stop, startst, stopst, lev)
468641822ebSbostic register struct match *m;
469188b6717Sbostic char *start;
470188b6717Sbostic char *stop;
471641822ebSbostic sopno startst;
472641822ebSbostic sopno stopst;
473641822ebSbostic sopno lev;			/* PLUS nesting level */
474641822ebSbostic {
475641822ebSbostic 	register int i;
476641822ebSbostic 	register sopno ss;	/* start sop of current subRE */
477188b6717Sbostic 	register char *sp;	/* start of string matched by it */
478641822ebSbostic 	register sopno ssub;	/* start sop of subsubRE */
479641822ebSbostic 	register sopno esub;	/* end sop of subsubRE */
480188b6717Sbostic 	register char *ssp;	/* start of string matched by subsubRE */
481188b6717Sbostic 	register char *dp;
482641822ebSbostic 	register size_t len;
483641822ebSbostic 	register int hard;
484641822ebSbostic 	register sop s;
485641822ebSbostic 	register regoff_t offsave;
486641822ebSbostic 	register cset *cs;
487641822ebSbostic 
488e51f590cSbostic 	AT("back", start, stop, startst, stopst);
489641822ebSbostic 	sp = start;
490641822ebSbostic 
491641822ebSbostic 	/* get as far as we can with easy stuff */
492641822ebSbostic 	hard = 0;
493641822ebSbostic 	for (ss = startst; !hard && ss < stopst; ss++)
494641822ebSbostic 		switch (OP(s = m->g->strip[ss])) {
495641822ebSbostic 		case OCHAR:
496188b6717Sbostic 			if (sp == stop || *sp++ != (char)OPND(s))
497641822ebSbostic 				return(NULL);
498641822ebSbostic 			break;
499641822ebSbostic 		case OANY:
500641822ebSbostic 			if (sp == stop)
501641822ebSbostic 				return(NULL);
502641822ebSbostic 			sp++;
503641822ebSbostic 			break;
504641822ebSbostic 		case OANYOF:
505641822ebSbostic 			cs = &m->g->sets[OPND(s)];
506641822ebSbostic 			if (sp == stop || !CHIN(cs, *sp++))
507641822ebSbostic 				return(NULL);
508641822ebSbostic 			break;
509641822ebSbostic 		case OBOL:
510641822ebSbostic 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
511641822ebSbostic 					(sp < m->endp && *(sp-1) == '\n' &&
512641822ebSbostic 						(m->g->cflags&REG_NEWLINE)) )
513641822ebSbostic 				{ /* yes */ }
514641822ebSbostic 			else
515641822ebSbostic 				return(NULL);
516641822ebSbostic 			break;
517641822ebSbostic 		case OEOL:
518641822ebSbostic 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
519641822ebSbostic 					(sp < m->endp && *sp == '\n' &&
520641822ebSbostic 						(m->g->cflags&REG_NEWLINE)) )
521641822ebSbostic 				{ /* yes */ }
522641822ebSbostic 			else
523641822ebSbostic 				return(NULL);
524641822ebSbostic 			break;
525188b6717Sbostic 		case OBOW:
526188b6717Sbostic 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
527188b6717Sbostic 					(sp < m->endp && *(sp-1) == '\n' &&
528188b6717Sbostic 						(m->g->cflags&REG_NEWLINE)) ||
529188b6717Sbostic 					(sp > m->beginp &&
530c50aad69Sbostic 							!ISWORD(*(sp-1))) ) &&
531c50aad69Sbostic 					(sp < m->endp && ISWORD(*sp)) )
532188b6717Sbostic 				{ /* yes */ }
533188b6717Sbostic 			else
534188b6717Sbostic 				return(NULL);
535188b6717Sbostic 			break;
536188b6717Sbostic 		case OEOW:
537188b6717Sbostic 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
538188b6717Sbostic 					(sp < m->endp && *sp == '\n' &&
539188b6717Sbostic 						(m->g->cflags&REG_NEWLINE)) ||
540c50aad69Sbostic 					(sp < m->endp && !ISWORD(*sp)) ) &&
541c50aad69Sbostic 					(sp > m->beginp && ISWORD(*(sp-1))) )
542188b6717Sbostic 				{ /* yes */ }
543188b6717Sbostic 			else
544188b6717Sbostic 				return(NULL);
545188b6717Sbostic 			break;
546641822ebSbostic 		case O_QUEST:
547641822ebSbostic 			break;
548641822ebSbostic 		case OOR1:	/* matches null but needs to skip */
549641822ebSbostic 			ss++;
550641822ebSbostic 			s = m->g->strip[ss];
551641822ebSbostic 			do {
552641822ebSbostic 				assert(OP(s) == OOR2);
553641822ebSbostic 				ss += OPND(s);
554641822ebSbostic 			} while (OP(s = m->g->strip[ss]) != O_CH);
555641822ebSbostic 			/* note that the ss++ gets us past the O_CH */
556641822ebSbostic 			break;
557641822ebSbostic 		default:	/* have to make a choice */
558641822ebSbostic 			hard = 1;
559641822ebSbostic 			break;
560641822ebSbostic 		}
561641822ebSbostic 	if (!hard) {		/* that was it! */
562641822ebSbostic 		if (sp != stop)
563641822ebSbostic 			return(NULL);
564641822ebSbostic 		return(sp);
565641822ebSbostic 	}
566641822ebSbostic 	ss--;			/* adjust for the for's final increment */
567641822ebSbostic 
568641822ebSbostic 	/* the hard stuff */
569e51f590cSbostic 	AT("hard", sp, stop, ss, stopst);
570641822ebSbostic 	s = m->g->strip[ss];
571641822ebSbostic 	switch (OP(s)) {
572641822ebSbostic 	case OBACK_:		/* the vilest depths */
573641822ebSbostic 		i = OPND(s);
574fdc51b3dSbostic 		assert(0 < i && i <= m->g->nsub);
575641822ebSbostic 		if (m->pmatch[i].rm_eo == -1)
576641822ebSbostic 			return(NULL);
577641822ebSbostic 		assert(m->pmatch[i].rm_so != -1);
578641822ebSbostic 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
579641822ebSbostic 		assert(stop - m->beginp >= len);
580641822ebSbostic 		if (sp > stop - len)
581641822ebSbostic 			return(NULL);	/* not enough left to match */
582641822ebSbostic 		ssp = m->offp + m->pmatch[i].rm_so;
583188b6717Sbostic 		if (memcmp(sp, ssp, len) != 0)
584641822ebSbostic 			return(NULL);
585641822ebSbostic 		while (m->g->strip[ss] != SOP(O_BACK, i))
586641822ebSbostic 			ss++;
587641822ebSbostic 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
588641822ebSbostic 		break;
589641822ebSbostic 	case OQUEST_:		/* to null or not */
590641822ebSbostic 		dp = backref(m, sp, stop, ss+1, stopst, lev);
591641822ebSbostic 		if (dp != NULL)
592641822ebSbostic 			return(dp);	/* not */
593641822ebSbostic 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
594641822ebSbostic 		break;
595641822ebSbostic 	case OPLUS_:
596641822ebSbostic 		assert(m->lastpos != NULL);
597641822ebSbostic 		assert(lev+1 <= m->g->nplus);
598641822ebSbostic 		m->lastpos[lev+1] = sp;
599641822ebSbostic 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
600641822ebSbostic 		break;
601641822ebSbostic 	case O_PLUS:
602641822ebSbostic 		if (sp == m->lastpos[lev])	/* last pass matched null */
603641822ebSbostic 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
604641822ebSbostic 		/* try another pass */
605641822ebSbostic 		m->lastpos[lev] = sp;
606641822ebSbostic 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
607641822ebSbostic 		if (dp == NULL)
608641822ebSbostic 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
609641822ebSbostic 		else
610641822ebSbostic 			return(dp);
611641822ebSbostic 		break;
612641822ebSbostic 	case OCH_:		/* find the right one, if any */
613641822ebSbostic 		ssub = ss + 1;
614641822ebSbostic 		esub = ss + OPND(s) - 1;
615641822ebSbostic 		assert(OP(m->g->strip[esub]) == OOR1);
616641822ebSbostic 		for (;;) {	/* find first matching branch */
617641822ebSbostic 			dp = backref(m, sp, stop, ssub, esub, lev);
618641822ebSbostic 			if (dp != NULL)
619641822ebSbostic 				return(dp);
620641822ebSbostic 			/* that one missed, try next one */
621641822ebSbostic 			if (OP(m->g->strip[esub]) == O_CH)
622641822ebSbostic 				return(NULL);	/* there is none */
623641822ebSbostic 			esub++;
624641822ebSbostic 			assert(OP(m->g->strip[esub]) == OOR2);
625641822ebSbostic 			ssub = esub + 1;
626641822ebSbostic 			esub += OPND(m->g->strip[esub]);
627641822ebSbostic 			if (OP(m->g->strip[esub]) == OOR2)
628641822ebSbostic 				esub--;
629641822ebSbostic 			else
630641822ebSbostic 				assert(OP(m->g->strip[esub]) == O_CH);
631641822ebSbostic 		}
632641822ebSbostic 		break;
633641822ebSbostic 	case OLPAREN:		/* must undo assignment if rest fails */
634641822ebSbostic 		i = OPND(s);
635fdc51b3dSbostic 		assert(0 < i && i <= m->g->nsub);
636641822ebSbostic 		offsave = m->pmatch[i].rm_so;
637641822ebSbostic 		m->pmatch[i].rm_so = sp - m->offp;
638641822ebSbostic 		dp = backref(m, sp, stop, ss+1, stopst, lev);
639641822ebSbostic 		if (dp != NULL)
640641822ebSbostic 			return(dp);
641641822ebSbostic 		m->pmatch[i].rm_so = offsave;
642641822ebSbostic 		return(NULL);
643641822ebSbostic 		break;
644641822ebSbostic 	case ORPAREN:		/* must undo assignment if rest fails */
645641822ebSbostic 		i = OPND(s);
646fdc51b3dSbostic 		assert(0 < i && i <= m->g->nsub);
647641822ebSbostic 		offsave = m->pmatch[i].rm_eo;
648641822ebSbostic 		m->pmatch[i].rm_eo = sp - m->offp;
649641822ebSbostic 		dp = backref(m, sp, stop, ss+1, stopst, lev);
650641822ebSbostic 		if (dp != NULL)
651641822ebSbostic 			return(dp);
652641822ebSbostic 		m->pmatch[i].rm_eo = offsave;
653641822ebSbostic 		return(NULL);
654641822ebSbostic 		break;
655641822ebSbostic 	default:		/* uh oh */
656188b6717Sbostic 		assert(nope);
657641822ebSbostic 		break;
658641822ebSbostic 	}
659641822ebSbostic 
660641822ebSbostic 	/* "can't happen" */
661188b6717Sbostic 	assert(nope);
662641822ebSbostic 	/* NOTREACHED */
663641822ebSbostic }
664641822ebSbostic 
665641822ebSbostic /*
666641822ebSbostic  - fast - step through the string at top speed
667188b6717Sbostic  == static char *fast(register struct match *m, char *start, \
668188b6717Sbostic  ==	char *stop, sopno startst, sopno stopst);
669641822ebSbostic  */
670188b6717Sbostic static char *			/* where tentative match ended, or NULL */
fast(m,start,stop,startst,stopst)671641822ebSbostic fast(m, start, stop, startst, stopst)
672641822ebSbostic register struct match *m;
673188b6717Sbostic char *start;
674188b6717Sbostic char *stop;
675641822ebSbostic sopno startst;
676641822ebSbostic sopno stopst;
677641822ebSbostic {
678641822ebSbostic 	register states st = m->st;
679641822ebSbostic 	register states fresh = m->fresh;
680641822ebSbostic 	register states tmp = m->tmp;
681188b6717Sbostic 	register char *p = start;
682188b6717Sbostic 	register int c = (start == m->beginp) ? OUT : *(start-1);
683188b6717Sbostic 	register int lastc;	/* previous c */
684188b6717Sbostic 	register int flagch;
685188b6717Sbostic 	register int i;
686188b6717Sbostic 	register char *coldp;	/* last p after which no match was underway */
687641822ebSbostic 
688641822ebSbostic 	CLEAR(st);
689641822ebSbostic 	SET1(st, startst);
690188b6717Sbostic 	st = step(m->g, startst, stopst, st, NOTHING, st);
691641822ebSbostic 	ASSIGN(fresh, st);
692641822ebSbostic 	SP("start", st, *p);
693641822ebSbostic 	coldp = NULL;
694641822ebSbostic 	for (;;) {
695641822ebSbostic 		/* next character */
696641822ebSbostic 		lastc = c;
697188b6717Sbostic 		c = (p == m->endp) ? OUT : *p;
698641822ebSbostic 		if (EQ(st, fresh))
699641822ebSbostic 			coldp = p;
700641822ebSbostic 
701641822ebSbostic 		/* is there an EOL and/or BOL between lastc and c? */
702188b6717Sbostic 		flagch = '\0';
703188b6717Sbostic 		i = 0;
704188b6717Sbostic 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
705188b6717Sbostic 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
706188b6717Sbostic 			flagch = BOL;
707188b6717Sbostic 			i = m->g->nbol;
708188b6717Sbostic 		}
709188b6717Sbostic 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
710188b6717Sbostic 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
711188b6717Sbostic 			flagch = (flagch == BOL) ? BOLEOL : EOL;
712188b6717Sbostic 			i += m->g->neol;
713188b6717Sbostic 		}
714188b6717Sbostic 		if (i != 0) {
715188b6717Sbostic 			for (; i > 0; i--)
716188b6717Sbostic 				st = step(m->g, startst, stopst, st, flagch, st);
717188b6717Sbostic 			SP("boleol", st, c);
718188b6717Sbostic 		}
719188b6717Sbostic 
720188b6717Sbostic 		/* how about a word boundary? */
721c50aad69Sbostic 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
722c50aad69Sbostic 					(c != OUT && ISWORD(c)) ) {
723188b6717Sbostic 			flagch = BOW;
724188b6717Sbostic 		}
725c50aad69Sbostic 		if ( (lastc != OUT && ISWORD(lastc)) &&
726c50aad69Sbostic 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
727188b6717Sbostic 			flagch = EOW;
728188b6717Sbostic 		}
729188b6717Sbostic 		if (flagch == BOW || flagch == EOW) {
730188b6717Sbostic 			st = step(m->g, startst, stopst, st, flagch, st);
731188b6717Sbostic 			SP("boweow", st, c);
732641822ebSbostic 		}
733641822ebSbostic 
734641822ebSbostic 		/* are we done? */
735641822ebSbostic 		if (ISSET(st, stopst) || p == stop)
736641822ebSbostic 			break;		/* NOTE BREAK OUT */
737641822ebSbostic 
738641822ebSbostic 		/* no, we must deal with this character */
739641822ebSbostic 		ASSIGN(tmp, st);
740641822ebSbostic 		ASSIGN(st, fresh);
741188b6717Sbostic 		assert(c != OUT);
742641822ebSbostic 		st = step(m->g, startst, stopst, tmp, c, st);
743641822ebSbostic 		SP("aft", st, c);
744188b6717Sbostic 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
745641822ebSbostic 		p++;
746641822ebSbostic 	}
747641822ebSbostic 
748641822ebSbostic 	assert(coldp != NULL);
749641822ebSbostic 	m->coldp = coldp;
750641822ebSbostic 	if (ISSET(st, stopst))
751641822ebSbostic 		return(p+1);
752641822ebSbostic 	else
753641822ebSbostic 		return(NULL);
754641822ebSbostic }
755641822ebSbostic 
756641822ebSbostic /*
757641822ebSbostic  - slow - step through the string more deliberately
758188b6717Sbostic  == static char *slow(register struct match *m, char *start, \
759188b6717Sbostic  ==	char *stop, sopno startst, sopno stopst);
760641822ebSbostic  */
761188b6717Sbostic static char *			/* where it ended */
slow(m,start,stop,startst,stopst)762641822ebSbostic slow(m, start, stop, startst, stopst)
763641822ebSbostic register struct match *m;
764188b6717Sbostic char *start;
765188b6717Sbostic char *stop;
766641822ebSbostic sopno startst;
767641822ebSbostic sopno stopst;
768641822ebSbostic {
769641822ebSbostic 	register states st = m->st;
770641822ebSbostic 	register states empty = m->empty;
771641822ebSbostic 	register states tmp = m->tmp;
772188b6717Sbostic 	register char *p = start;
773188b6717Sbostic 	register int c = (start == m->beginp) ? OUT : *(start-1);
774188b6717Sbostic 	register int lastc;	/* previous c */
775188b6717Sbostic 	register int flagch;
776188b6717Sbostic 	register int i;
777188b6717Sbostic 	register char *matchp;	/* last p at which a match ended */
778641822ebSbostic 
779e51f590cSbostic 	AT("slow", start, stop, startst, stopst);
780641822ebSbostic 	CLEAR(st);
781641822ebSbostic 	SET1(st, startst);
782641822ebSbostic 	SP("sstart", st, *p);
783188b6717Sbostic 	st = step(m->g, startst, stopst, st, NOTHING, st);
784641822ebSbostic 	matchp = NULL;
785641822ebSbostic 	for (;;) {
786641822ebSbostic 		/* next character */
787641822ebSbostic 		lastc = c;
788188b6717Sbostic 		c = (p == m->endp) ? OUT : *p;
789641822ebSbostic 
790641822ebSbostic 		/* is there an EOL and/or BOL between lastc and c? */
791188b6717Sbostic 		flagch = '\0';
792188b6717Sbostic 		i = 0;
793188b6717Sbostic 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
794188b6717Sbostic 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
795188b6717Sbostic 			flagch = BOL;
796188b6717Sbostic 			i = m->g->nbol;
797188b6717Sbostic 		}
798188b6717Sbostic 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
799188b6717Sbostic 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
800188b6717Sbostic 			flagch = (flagch == BOL) ? BOLEOL : EOL;
801188b6717Sbostic 			i += m->g->neol;
802188b6717Sbostic 		}
803188b6717Sbostic 		if (i != 0) {
804188b6717Sbostic 			for (; i > 0; i--)
805188b6717Sbostic 				st = step(m->g, startst, stopst, st, flagch, st);
806188b6717Sbostic 			SP("sboleol", st, c);
807188b6717Sbostic 		}
808641822ebSbostic 
809188b6717Sbostic 		/* how about a word boundary? */
810c50aad69Sbostic 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
811c50aad69Sbostic 					(c != OUT && ISWORD(c)) ) {
812188b6717Sbostic 			flagch = BOW;
813188b6717Sbostic 		}
814c50aad69Sbostic 		if ( (lastc != OUT && ISWORD(lastc)) &&
815c50aad69Sbostic 				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
816188b6717Sbostic 			flagch = EOW;
817188b6717Sbostic 		}
818188b6717Sbostic 		if (flagch == BOW || flagch == EOW) {
819188b6717Sbostic 			st = step(m->g, startst, stopst, st, flagch, st);
820188b6717Sbostic 			SP("sboweow", st, c);
821641822ebSbostic 		}
822641822ebSbostic 
823641822ebSbostic 		/* are we done? */
824641822ebSbostic 		if (ISSET(st, stopst))
825641822ebSbostic 			matchp = p;
826641822ebSbostic 		if (EQ(st, empty) || p == stop)
827641822ebSbostic 			break;		/* NOTE BREAK OUT */
828641822ebSbostic 
829641822ebSbostic 		/* no, we must deal with this character */
830641822ebSbostic 		ASSIGN(tmp, st);
831641822ebSbostic 		ASSIGN(st, empty);
832188b6717Sbostic 		assert(c != OUT);
833641822ebSbostic 		st = step(m->g, startst, stopst, tmp, c, st);
834641822ebSbostic 		SP("saft", st, c);
835188b6717Sbostic 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
836641822ebSbostic 		p++;
837641822ebSbostic 	}
838641822ebSbostic 
839641822ebSbostic 	return(matchp);
840641822ebSbostic }
841641822ebSbostic 
842641822ebSbostic 
843641822ebSbostic /*
844641822ebSbostic  - step - map set of states reachable before char to set reachable after
845fdc51b3dSbostic  == static states step(register struct re_guts *g, sopno start, sopno stop, \
846188b6717Sbostic  ==	register states bef, int ch, register states aft);
847188b6717Sbostic  == #define	BOL	(OUT+1)
848188b6717Sbostic  == #define	EOL	(BOL+1)
849188b6717Sbostic  == #define	BOLEOL	(BOL+2)
850188b6717Sbostic  == #define	NOTHING	(BOL+3)
851188b6717Sbostic  == #define	BOW	(BOL+4)
852188b6717Sbostic  == #define	EOW	(BOL+5)
853188b6717Sbostic  == #define	CODEMAX	(BOL+5)		// highest code used
854188b6717Sbostic  == #define	NONCHAR(c)	((c) > CHAR_MAX)
855188b6717Sbostic  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
856641822ebSbostic  */
857641822ebSbostic static states
step(g,start,stop,bef,ch,aft)858641822ebSbostic step(g, start, stop, bef, ch, aft)
859641822ebSbostic register struct re_guts *g;
860fdc51b3dSbostic sopno start;			/* start state within strip */
861fdc51b3dSbostic sopno stop;			/* state after stop state within strip */
862641822ebSbostic register states bef;		/* states reachable before */
863188b6717Sbostic int ch;				/* character or NONCHAR code */
864641822ebSbostic register states aft;		/* states already known reachable after */
865641822ebSbostic {
866641822ebSbostic 	register cset *cs;
867641822ebSbostic 	register sop s;
868641822ebSbostic 	register sopno pc;
869641822ebSbostic 	register onestate here;		/* note, macros know this name */
870641822ebSbostic 	register sopno look;
871641822ebSbostic 	register int i;
872641822ebSbostic 
873641822ebSbostic 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
874641822ebSbostic 		s = g->strip[pc];
875641822ebSbostic 		switch (OP(s)) {
876641822ebSbostic 		case OEND:
877641822ebSbostic 			assert(pc == stop-1);
878641822ebSbostic 			break;
879641822ebSbostic 		case OCHAR:
880188b6717Sbostic 			/* only characters can match */
881188b6717Sbostic 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
882188b6717Sbostic 			if (ch == (char)OPND(s))
883641822ebSbostic 				FWD(aft, bef, 1);
884641822ebSbostic 			break;
885188b6717Sbostic 		case OBOL:
886188b6717Sbostic 			if (ch == BOL || ch == BOLEOL)
887188b6717Sbostic 				FWD(aft, bef, 1);
888188b6717Sbostic 			break;
889641822ebSbostic 		case OEOL:
890188b6717Sbostic 			if (ch == EOL || ch == BOLEOL)
891188b6717Sbostic 				FWD(aft, bef, 1);
892188b6717Sbostic 			break;
893188b6717Sbostic 		case OBOW:
894188b6717Sbostic 			if (ch == BOW)
895188b6717Sbostic 				FWD(aft, bef, 1);
896188b6717Sbostic 			break;
897188b6717Sbostic 		case OEOW:
898188b6717Sbostic 			if (ch == EOW)
899188b6717Sbostic 				FWD(aft, bef, 1);
900641822ebSbostic 			break;
901641822ebSbostic 		case OANY:
902188b6717Sbostic 			if (!NONCHAR(ch))
903641822ebSbostic 				FWD(aft, bef, 1);
904641822ebSbostic 			break;
905641822ebSbostic 		case OANYOF:
906641822ebSbostic 			cs = &g->sets[OPND(s)];
907188b6717Sbostic 			if (!NONCHAR(ch) && CHIN(cs, ch))
908641822ebSbostic 				FWD(aft, bef, 1);
909641822ebSbostic 			break;
910641822ebSbostic 		case OBACK_:		/* ignored here */
911641822ebSbostic 		case O_BACK:
912641822ebSbostic 			FWD(aft, aft, 1);
913641822ebSbostic 			break;
914641822ebSbostic 		case OPLUS_:		/* forward, this is just an empty */
915641822ebSbostic 			FWD(aft, aft, 1);
916641822ebSbostic 			break;
917641822ebSbostic 		case O_PLUS:		/* both forward and back */
918641822ebSbostic 			FWD(aft, aft, 1);
919641822ebSbostic 			i = ISSETBACK(aft, OPND(s));
920641822ebSbostic 			BACK(aft, aft, OPND(s));
921641822ebSbostic 			if (!i && ISSETBACK(aft, OPND(s))) {
922641822ebSbostic 				/* oho, must reconsider loop body */
923641822ebSbostic 				pc -= OPND(s) + 1;
924641822ebSbostic 				INIT(here, pc);
925641822ebSbostic 			}
926641822ebSbostic 			break;
927641822ebSbostic 		case OQUEST_:		/* two branches, both forward */
928641822ebSbostic 			FWD(aft, aft, 1);
929641822ebSbostic 			FWD(aft, aft, OPND(s));
930641822ebSbostic 			break;
931641822ebSbostic 		case O_QUEST:		/* just an empty */
932641822ebSbostic 			FWD(aft, aft, 1);
933641822ebSbostic 			break;
934641822ebSbostic 		case OLPAREN:		/* not significant here */
935641822ebSbostic 		case ORPAREN:
936641822ebSbostic 			FWD(aft, aft, 1);
937641822ebSbostic 			break;
938641822ebSbostic 		case OCH_:		/* mark the first two branches */
939641822ebSbostic 			FWD(aft, aft, 1);
940641822ebSbostic 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
941641822ebSbostic 			FWD(aft, aft, OPND(s));
942641822ebSbostic 			break;
943641822ebSbostic 		case OOR1:		/* done a branch, find the O_CH */
944641822ebSbostic 			if (ISSTATEIN(aft, here)) {
945641822ebSbostic 				for (look = 1;
946641822ebSbostic 						OP(s = g->strip[pc+look]) != O_CH;
947641822ebSbostic 						look += OPND(s))
948641822ebSbostic 					assert(OP(s) == OOR2);
949641822ebSbostic 				FWD(aft, aft, look);
950641822ebSbostic 			}
951641822ebSbostic 			break;
952641822ebSbostic 		case OOR2:		/* propagate OCH_'s marking */
953641822ebSbostic 			FWD(aft, aft, 1);
954641822ebSbostic 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
955641822ebSbostic 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
956641822ebSbostic 				FWD(aft, aft, OPND(s));
957641822ebSbostic 			}
958641822ebSbostic 			break;
959641822ebSbostic 		case O_CH:		/* just empty */
960641822ebSbostic 			FWD(aft, aft, 1);
961641822ebSbostic 			break;
962641822ebSbostic 		default:		/* ooooops... */
963188b6717Sbostic 			assert(nope);
964641822ebSbostic 			break;
965641822ebSbostic 		}
966641822ebSbostic 	}
967641822ebSbostic 
968641822ebSbostic 	return(aft);
969641822ebSbostic }
970641822ebSbostic 
971e51f590cSbostic #ifdef REDEBUG
972641822ebSbostic /*
973641822ebSbostic  - print - print a set of states
974188b6717Sbostic  == #ifdef REDEBUG
975188b6717Sbostic  == static void print(struct match *m, char *caption, states st, \
976188b6717Sbostic  ==	int ch, FILE *d);
977188b6717Sbostic  == #endif
978641822ebSbostic  */
979641822ebSbostic static void
print(m,caption,st,ch,d)980e51f590cSbostic print(m, caption, st, ch, d)
981e51f590cSbostic struct match *m;
982641822ebSbostic char *caption;
983641822ebSbostic states st;
984188b6717Sbostic int ch;
985641822ebSbostic FILE *d;
986641822ebSbostic {
987e51f590cSbostic 	register struct re_guts *g = m->g;
988641822ebSbostic 	register int i;
989641822ebSbostic 	register int first = 1;
990641822ebSbostic 
991e51f590cSbostic 	if (!(m->eflags&REG_TRACE))
992e51f590cSbostic 		return;
993e51f590cSbostic 
994641822ebSbostic 	fprintf(d, "%s", caption);
995641822ebSbostic 	if (ch != '\0')
996e51f590cSbostic 		fprintf(d, " %s", pchar(ch));
997641822ebSbostic 	for (i = 0; i < g->nstates; i++)
998641822ebSbostic 		if (ISSET(st, i)) {
999641822ebSbostic 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
1000641822ebSbostic 			first = 0;
1001641822ebSbostic 		}
1002641822ebSbostic 	fprintf(d, "\n");
1003641822ebSbostic }
1004e51f590cSbostic 
1005e51f590cSbostic /*
1006e51f590cSbostic  - at - print current situation
1007188b6717Sbostic  == #ifdef REDEBUG
1008188b6717Sbostic  == static void at(struct match *m, char *title, char *start, char *stop, \
1009fdc51b3dSbostic  ==						sopno startst, sopno stopst);
1010188b6717Sbostic  == #endif
1011e51f590cSbostic  */
1012e51f590cSbostic static void
at(m,title,start,stop,startst,stopst)1013e51f590cSbostic at(m, title, start, stop, startst, stopst)
1014e51f590cSbostic struct match *m;
1015e51f590cSbostic char *title;
1016188b6717Sbostic char *start;
1017188b6717Sbostic char *stop;
1018e51f590cSbostic sopno startst;
1019e51f590cSbostic sopno stopst;
1020e51f590cSbostic {
1021e51f590cSbostic 	if (!(m->eflags&REG_TRACE))
1022e51f590cSbostic 		return;
1023e51f590cSbostic 
1024e51f590cSbostic 	printf("%s %s-", title, pchar(*start));
1025e51f590cSbostic 	printf("%s ", pchar(*stop));
1026e51f590cSbostic 	printf("%ld-%ld\n", (long)startst, (long)stopst);
1027e51f590cSbostic }
1028e51f590cSbostic 
1029e51f590cSbostic #ifndef PCHARDONE
1030e51f590cSbostic #define	PCHARDONE	/* never again */
1031e51f590cSbostic /*
1032e51f590cSbostic  - pchar - make a character printable
1033188b6717Sbostic  == #ifdef REDEBUG
1034188b6717Sbostic  == static char *pchar(int ch);
1035188b6717Sbostic  == #endif
1036e51f590cSbostic  *
1037e51f590cSbostic  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1038e51f590cSbostic  * duplicate here avoids having a debugging-capable regexec.o tied to
1039e51f590cSbostic  * a matching debug.o, and this is convenient.  It all disappears in
1040e51f590cSbostic  * the non-debug compilation anyway, so it doesn't matter much.
1041e51f590cSbostic  */
1042e51f590cSbostic static char *			/* -> representation */
pchar(ch)1043e51f590cSbostic pchar(ch)
1044e51f590cSbostic int ch;
1045e51f590cSbostic {
1046e51f590cSbostic 	static char pbuf[10];
1047e51f590cSbostic 
1048e51f590cSbostic 	if (isprint(ch) || ch == ' ')
1049e51f590cSbostic 		sprintf(pbuf, "%c", ch);
1050e51f590cSbostic 	else
1051e51f590cSbostic 		sprintf(pbuf, "\\%o", ch);
1052e51f590cSbostic 	return(pbuf);
1053e51f590cSbostic }
1054e51f590cSbostic #endif
1055641822ebSbostic #endif
1056641822ebSbostic 
1057641822ebSbostic #undef	matcher
1058641822ebSbostic #undef	fast
1059641822ebSbostic #undef	slow
1060641822ebSbostic #undef	dissect
1061641822ebSbostic #undef	backref
1062641822ebSbostic #undef	step
1063641822ebSbostic #undef	print
1064e51f590cSbostic #undef	at
1065641822ebSbostic #undef	match
1066