xref: /original-bsd/lib/libc/regex/engine.c (revision d45ebeed)
1 /*-
2  * Copyright (c) 1992 Henry Spencer.
3  * Copyright (c) 1992 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Henry Spencer of the University of Toronto.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)engine.c	5.4 (Berkeley) 09/30/92
12  */
13 
14 /*
15  * The matching engine and friends.  This file is #included by regexec.c
16  * after suitable #defines of a variety of macros used herein, so that
17  * different state representations can be used without duplicating masses
18  * of code.
19  */
20 
21 #ifdef SNAMES
22 #define	matcher	smatcher
23 #define	fast	sfast
24 #define	slow	sslow
25 #define	dissect	sdissect
26 #define	backref	sbackref
27 #define	expand	sexpand
28 #define	step	sstep
29 #define	print	sprint
30 #define	at	sat
31 #define	match	smat
32 #endif
33 #ifdef LNAMES
34 #define	matcher	lmatcher
35 #define	fast	lfast
36 #define	slow	lslow
37 #define	dissect	ldissect
38 #define	backref	lbackref
39 #define	expand	lexpand
40 #define	step	lstep
41 #define	print	lprint
42 #define	at	lat
43 #define	match	lmat
44 #endif
45 
46 /* another structure passed up and down to avoid zillions of parameters */
47 struct match {
48 	struct re_guts *g;
49 	int eflags;
50 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
51 	uchar *offp;		/* offsets work from here */
52 	uchar *beginp;		/* start of string -- virtual NUL precedes */
53 	uchar *endp;		/* end of string -- virtual NUL here */
54 	uchar *coldp;		/* can be no match starting before here */
55 	uchar **lastpos;	/* [nplus+1] */
56 	STATEVARS;
57 	states st;		/* current states */
58 	states fresh;		/* states for a fresh start */
59 	states tmp;		/* temporary */
60 	states empty;		/* empty set of states */
61 };
62 
63 #ifdef REDEBUG
64 #define	SP(t, s, c)	print(m, t, s, c, stdout)
65 #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
66 #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
67 #else
68 #define	SP(t, s, c)	/* nothing */
69 #define	AT(t, p1, p2, s1, s2)	/* nothing */
70 #define	NOTE(s)	/* nothing */
71 #endif
72 
73 static uchar	*backref
74 	__P((struct match *, uchar *, uchar *, sopno, sopno, sopno));
75 static uchar	*dissect __P((struct match *, uchar *, uchar *, sopno, sopno));
76 static states	 expand __P((struct re_guts *, int, int, states, int, int));
77 static uchar	*fast __P((struct match *, uchar *, uchar *, sopno, sopno));
78 static int	 matcher
79 	__P((struct re_guts *, uchar *, size_t, regmatch_t[], int));
80 static uchar	*slow __P((struct match *, uchar *, uchar *, sopno, sopno));
81 static states	 step __P((struct re_guts *, int, int, states, u_int, states));
82 
83 #ifdef REDEBUG
84 static void  at __P((struct match *, char *, uchar *, uchar *, sopno, stopno));
85 static char *pchar __P((int));
86 static void  print __P((struct match *, char *, states, u_int, FILE *));
87 #endif
88 
89 /*
90  - matcher - the actual matching engine
91  */
92 static int			/* 0 success, REG_NOMATCH failure */
93 matcher(g, string, nmatch, pmatch, eflags)
94 register struct re_guts *g;
95 uchar *string;
96 size_t nmatch;
97 regmatch_t pmatch[];
98 int eflags;
99 {
100 	register uchar *endp;
101 	register int i;
102 	struct match mv;
103 	register struct match *m = &mv;
104 	register uchar *dp;
105 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
106 	const register sopno gl = g->laststate;
107 	uchar *start;
108 	uchar *stop;
109 
110 	/* simplify the situation where possible */
111 	if (g->cflags&REG_NOSUB)
112 		nmatch = 0;
113 	if (eflags&REG_STARTEND) {
114 		start = string + pmatch[0].rm_so;
115 		stop = string + pmatch[0].rm_eo;
116 	} else {
117 		start = string;
118 		stop = start + strlen((char *)start);
119 	}
120 
121 	/* prescreening; this does wonders for this rather slow code */
122 	if (g->must != NULL) {
123 		for (dp = start; dp < stop; dp++)
124 			if (*dp == (uchar)g->must[0] && stop - dp >= g->mlen &&
125 			strncmp((char *)dp, g->must, (size_t)g->mlen) == 0)
126 				break;
127 		if (dp == stop)		/* we didn't find g->must */
128 			return(REG_NOMATCH);
129 	}
130 
131 	/* match struct setup */
132 	m->g = g;
133 	m->eflags = eflags;
134 	m->pmatch = NULL;
135 	m->lastpos = NULL;
136 	m->offp = string;
137 	m->beginp = start;
138 	m->endp = stop;
139 	STATESETUP(m, 4);
140 	SETUP(m->st);
141 	SETUP(m->fresh);
142 	SETUP(m->tmp);
143 	SETUP(m->empty);
144 	CLEAR(m->empty);
145 
146 	/* this loop does only one repetition except for backrefs */
147 	for (;;) {
148 		endp = fast(m, start, stop, gf, gl);
149 		if (endp == NULL) {		/* a miss */
150 			STATETEARDOWN(m);
151 			return(REG_NOMATCH);
152 		}
153 		if (nmatch == 0 && !g->backrefs)
154 			break;		/* no further info needed */
155 
156 		/* where? */
157 		assert(m->coldp != NULL);
158 		for (;;) {
159 			NOTE("finding start");
160 			endp = slow(m, m->coldp, stop, gf, gl);
161 			if (endp != NULL)
162 				break;
163 			assert(m->coldp < m->endp);
164 			m->coldp++;
165 		}
166 		if (nmatch == 1 && !g->backrefs)
167 			break;		/* no further info needed */
168 
169 		/* oh my, he wants the subexpressions... */
170 		if (m->pmatch == NULL)
171 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
172 							sizeof(regmatch_t));
173 		if (m->pmatch == NULL) {
174 			STATETEARDOWN(m);
175 			return(REG_ESPACE);
176 		}
177 		for (i = 1; i <= m->g->nsub; i++)
178 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
179 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
180 			NOTE("dissecting");
181 			dp = dissect(m, m->coldp, endp, gf, gl);
182 		} else {
183 			if (g->nplus > 0 && m->lastpos == NULL)
184 				m->lastpos = (uchar **)malloc((g->nplus+1) *
185 							sizeof(uchar *));
186 			if (g->nplus > 0 && m->lastpos == NULL) {
187 				free(m->pmatch);
188 				STATETEARDOWN(m);
189 				return(REG_ESPACE);
190 			}
191 			NOTE("backref dissect");
192 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
193 		}
194 		if (dp != NULL)
195 			break;
196 
197 		/* uh-oh... we couldn't find a subexpression-level match */
198 		assert(g->backrefs);	/* must be back references doing it */
199 		assert(g->nplus == 0 || m->lastpos != NULL);
200 		for (;;) {
201 			if (dp != NULL || endp <= m->coldp)
202 				break;		/* defeat */
203 			NOTE("backoff");
204 			endp = slow(m, m->coldp, endp-1, gf, gl);
205 			if (endp == NULL)
206 				break;		/* defeat */
207 			/* try it on a shorter possibility */
208 #ifndef NDEBUG
209 			for (i = 1; i <= m->g->nsub; i++) {
210 				assert(m->pmatch[i].rm_so == -1);
211 				assert(m->pmatch[i].rm_eo == -1);
212 			}
213 #endif
214 			NOTE("backoff dissect");
215 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
216 		}
217 		assert(dp == NULL || dp == endp);
218 		if (dp != NULL)		/* found a shorter one */
219 			break;
220 
221 		/* despite initial appearances, there is no match here */
222 		NOTE("false alarm");
223 		start = m->coldp + 1;	/* recycle starting later */
224 		assert(start <= stop);
225 	}
226 
227 	/* fill in the details if requested */
228 	if (nmatch > 0) {
229 		pmatch[0].rm_so = m->coldp - m->offp;
230 		pmatch[0].rm_eo = endp - m->offp;
231 	}
232 	if (nmatch > 1) {
233 		assert(m->pmatch != NULL);
234 		for (i = 1; i < nmatch; i++)
235 			if (i <= m->g->nsub)
236 				pmatch[i] = m->pmatch[i];
237 			else {
238 				pmatch[i].rm_so = -1;
239 				pmatch[i].rm_eo = -1;
240 			}
241 	}
242 
243 	if (m->pmatch != NULL)
244 		free((char *)m->pmatch);
245 	if (m->lastpos != NULL)
246 		free((char *)m->lastpos);
247 	STATETEARDOWN(m);
248 	return(0);
249 }
250 
251 /*
252  - dissect - figure out what matched what, no back references
253  */
254 static uchar *			/* == stop (success) always */
255 dissect(m, start, stop, startst, stopst)
256 register struct match *m;
257 uchar *start;
258 uchar *stop;
259 sopno startst;
260 sopno stopst;
261 {
262 	register int i;
263 	register sopno ss;	/* start sop of current subRE */
264 	register sopno es;	/* end sop of current subRE */
265 	register uchar *sp;	/* start of string matched by it */
266 	register uchar *stp;	/* string matched by it cannot pass here */
267 	register uchar *rest;	/* start of rest of string */
268 	register uchar *tail;	/* string unmatched by rest of RE */
269 	register sopno ssub;	/* start sop of subsubRE */
270 	register sopno esub;	/* end sop of subsubRE */
271 	register uchar *ssp;	/* start of string matched by subsubRE */
272 	register uchar *sep;	/* end of string matched by subsubRE */
273 	register uchar *oldssp;	/* previous ssp */
274 	register uchar *dp;
275 
276 	AT("diss", start, stop, startst, stopst);
277 	sp = start;
278 	for (ss = startst; ss < stopst; ss = es) {
279 		/* identify end of subRE */
280 		es = ss;
281 		switch (OP(m->g->strip[es])) {
282 		case OPLUS_:
283 		case OQUEST_:
284 			es += OPND(m->g->strip[es]);
285 			break;
286 		case OCH_:
287 			while (OP(m->g->strip[es]) != O_CH)
288 				es += OPND(m->g->strip[es]);
289 			break;
290 		}
291 		es++;
292 
293 		/* figure out what it matched */
294 		switch (OP(m->g->strip[ss])) {
295 		case OEND:
296 			assert(0);
297 			break;
298 		case OCHAR:
299 			sp++;
300 			break;
301 		case OBOL:
302 		case OEOL:
303 			break;
304 		case OANY:
305 		case OANYOF:
306 			sp++;
307 			break;
308 		case OBACK_:
309 		case O_BACK:
310 			assert(0);
311 			break;
312 		/* cases where length of match is hard to find */
313 		case OQUEST_:
314 			stp = stop;
315 			for (;;) {
316 				/* how long could this one be? */
317 				rest = slow(m, sp, stp, ss, es);
318 				assert(rest != NULL);	/* it did match */
319 				/* could the rest match the rest? */
320 				tail = slow(m, rest, stop, es, stopst);
321 				if (tail == stop)
322 					break;		/* yes! */
323 				/* no -- try a shorter match for this one */
324 				stp = rest - 1;
325 				assert(stp >= sp);	/* it did work */
326 			}
327 			ssub = ss + 1;
328 			esub = es - 1;
329 			/* did innards match? */
330 			if (slow(m, sp, rest, ssub, esub) != NULL) {
331 				dp = dissect(m, sp, rest, ssub, esub);
332 				assert(dp == rest);
333 			} else		/* no */
334 				assert(sp == rest);
335 			sp = rest;
336 			break;
337 		case OPLUS_:
338 			stp = stop;
339 			for (;;) {
340 				/* how long could this one be? */
341 				rest = slow(m, sp, stp, ss, es);
342 				assert(rest != NULL);	/* it did match */
343 				/* could the rest match the rest? */
344 				tail = slow(m, rest, stop, es, stopst);
345 				if (tail == stop)
346 					break;		/* yes! */
347 				/* no -- try a shorter match for this one */
348 				stp = rest - 1;
349 				assert(stp >= sp);	/* it did work */
350 			}
351 			ssub = ss + 1;
352 			esub = es - 1;
353 			ssp = sp;
354 			oldssp = ssp;
355 			for (;;) {	/* find last match of innards */
356 				sep = slow(m, ssp, rest, ssub, esub);
357 				if (sep == NULL || sep == ssp)
358 					break;	/* failed or matched null */
359 				oldssp = ssp;	/* on to next try */
360 				ssp = sep;
361 			}
362 			if (sep == NULL) {
363 				/* last successful match */
364 				sep = ssp;
365 				ssp = oldssp;
366 			}
367 			assert(sep == rest);	/* must exhaust substring */
368 			assert(slow(m, ssp, sep, ssub, esub) == rest);
369 			dp = dissect(m, ssp, sep, ssub, esub);
370 			assert(dp == sep);
371 			sp = rest;
372 			break;
373 		case OCH_:
374 			stp = stop;
375 			for (;;) {
376 				/* how long could this one be? */
377 				rest = slow(m, sp, stp, ss, es);
378 				assert(rest != NULL);	/* it did match */
379 				/* could the rest match the rest? */
380 				tail = slow(m, rest, stop, es, stopst);
381 				if (tail == stop)
382 					break;		/* yes! */
383 				/* no -- try a shorter match for this one */
384 				stp = rest - 1;
385 				assert(stp >= sp);	/* it did work */
386 			}
387 			ssub = ss + 1;
388 			esub = ss + OPND(m->g->strip[ss]) - 1;
389 			assert(OP(m->g->strip[esub]) == OOR1);
390 			for (;;) {	/* find first matching branch */
391 				if (slow(m, sp, rest, ssub, esub) == rest)
392 					break;	/* it matched all of it */
393 				/* that one missed, try next one */
394 				assert(OP(m->g->strip[esub]) == OOR1);
395 				esub++;
396 				assert(OP(m->g->strip[esub]) == OOR2);
397 				ssub = esub + 1;
398 				esub += OPND(m->g->strip[esub]);
399 				if (OP(m->g->strip[esub]) == OOR2)
400 					esub--;
401 				else
402 					assert(OP(m->g->strip[esub]) == O_CH);
403 			}
404 			dp = dissect(m, sp, rest, ssub, esub);
405 			assert(dp == rest);
406 			sp = rest;
407 			break;
408 		case O_PLUS:
409 		case O_QUEST:
410 		case OOR1:
411 		case OOR2:
412 		case O_CH:
413 			assert(0);
414 			break;
415 		case OLPAREN:
416 			i = OPND(m->g->strip[ss]);
417 			m->pmatch[i].rm_so = sp - m->offp;
418 			break;
419 		case ORPAREN:
420 			i = OPND(m->g->strip[ss]);
421 			m->pmatch[i].rm_eo = sp - m->offp;
422 			break;
423 		default:		/* uh oh */
424 			assert(0);
425 			break;
426 		}
427 	}
428 
429 	assert(sp == stop);
430 	return(sp);
431 }
432 
433 /*
434  - backref - figure out what matched what, figuring in back references
435  */
436 static uchar *			/* == stop (success) or NULL (failure) */
437 backref(m, start, stop, startst, stopst, lev)
438 register struct match *m;
439 uchar *start;
440 uchar *stop;
441 sopno startst;
442 sopno stopst;
443 sopno lev;			/* PLUS nesting level */
444 {
445 	register int i;
446 	register sopno ss;	/* start sop of current subRE */
447 	register uchar *sp;	/* start of string matched by it */
448 	register sopno ssub;	/* start sop of subsubRE */
449 	register sopno esub;	/* end sop of subsubRE */
450 	register uchar *ssp;	/* start of string matched by subsubRE */
451 	register uchar *dp;
452 	register size_t len;
453 	register int hard;
454 	register sop s;
455 	register regoff_t offsave;
456 	register cset *cs;
457 
458 	AT("back", start, stop, startst, stopst);
459 	sp = start;
460 
461 	/* get as far as we can with easy stuff */
462 	hard = 0;
463 	for (ss = startst; !hard && ss < stopst; ss++)
464 		switch (OP(s = m->g->strip[ss])) {
465 		case OCHAR:
466 			if (sp == stop || *sp++ != OPND(s))
467 				return(NULL);
468 			break;
469 		case OANY:
470 			if (sp == stop)
471 				return(NULL);
472 			sp++;
473 			break;
474 		case OANYOF:
475 			cs = &m->g->sets[OPND(s)];
476 			if (sp == stop || !CHIN(cs, *sp++))
477 				return(NULL);
478 			break;
479 		case OBOL:
480 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
481 					(sp < m->endp && *(sp-1) == '\n' &&
482 						(m->g->cflags&REG_NEWLINE)) )
483 				{ /* yes */ }
484 			else
485 				return(NULL);
486 			break;
487 		case OEOL:
488 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
489 					(sp < m->endp && *sp == '\n' &&
490 						(m->g->cflags&REG_NEWLINE)) )
491 				{ /* yes */ }
492 			else
493 				return(NULL);
494 			break;
495 		case O_QUEST:
496 			break;
497 		case OOR1:	/* matches null but needs to skip */
498 			ss++;
499 			s = m->g->strip[ss];
500 			do {
501 				assert(OP(s) == OOR2);
502 				ss += OPND(s);
503 			} while (OP(s = m->g->strip[ss]) != O_CH);
504 			/* note that the ss++ gets us past the O_CH */
505 			break;
506 		default:	/* have to make a choice */
507 			hard = 1;
508 			break;
509 		}
510 	if (!hard) {		/* that was it! */
511 		if (sp != stop)
512 			return(NULL);
513 		return(sp);
514 	}
515 	ss--;			/* adjust for the for's final increment */
516 
517 	/* the hard stuff */
518 	AT("hard", sp, stop, ss, stopst);
519 	s = m->g->strip[ss];
520 	switch (OP(s)) {
521 	case OBACK_:		/* the vilest depths */
522 		i = OPND(s);
523 		if (m->pmatch[i].rm_eo == -1)
524 			return(NULL);
525 		assert(m->pmatch[i].rm_so != -1);
526 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
527 		assert(stop - m->beginp >= len);
528 		if (sp > stop - len)
529 			return(NULL);	/* not enough left to match */
530 		ssp = m->offp + m->pmatch[i].rm_so;
531 		if (strncmp((char *)sp, (char *)ssp, len) != 0)
532 			return(NULL);
533 		while (m->g->strip[ss] != SOP(O_BACK, i))
534 			ss++;
535 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
536 		break;
537 	case OQUEST_:		/* to null or not */
538 		dp = backref(m, sp, stop, ss+1, stopst, lev);
539 		if (dp != NULL)
540 			return(dp);	/* not */
541 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
542 		break;
543 	case OPLUS_:
544 		assert(m->lastpos != NULL);
545 		assert(lev+1 <= m->g->nplus);
546 		m->lastpos[lev+1] = sp;
547 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
548 		break;
549 	case O_PLUS:
550 		if (sp == m->lastpos[lev])	/* last pass matched null */
551 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
552 		/* try another pass */
553 		m->lastpos[lev] = sp;
554 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
555 		if (dp == NULL)
556 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
557 		else
558 			return(dp);
559 		break;
560 	case OCH_:		/* find the right one, if any */
561 		ssub = ss + 1;
562 		esub = ss + OPND(s) - 1;
563 		assert(OP(m->g->strip[esub]) == OOR1);
564 		for (;;) {	/* find first matching branch */
565 			dp = backref(m, sp, stop, ssub, esub, lev);
566 			if (dp != NULL)
567 				return(dp);
568 			/* that one missed, try next one */
569 			if (OP(m->g->strip[esub]) == O_CH)
570 				return(NULL);	/* there is none */
571 			esub++;
572 			assert(OP(m->g->strip[esub]) == OOR2);
573 			ssub = esub + 1;
574 			esub += OPND(m->g->strip[esub]);
575 			if (OP(m->g->strip[esub]) == OOR2)
576 				esub--;
577 			else
578 				assert(OP(m->g->strip[esub]) == O_CH);
579 		}
580 		break;
581 	case OLPAREN:		/* must undo assignment if rest fails */
582 		i = OPND(s);
583 		offsave = m->pmatch[i].rm_so;
584 		m->pmatch[i].rm_so = sp - m->offp;
585 		dp = backref(m, sp, stop, ss+1, stopst, lev);
586 		if (dp != NULL)
587 			return(dp);
588 		m->pmatch[i].rm_so = offsave;
589 		return(NULL);
590 		break;
591 	case ORPAREN:		/* must undo assignment if rest fails */
592 		i = OPND(s);
593 		offsave = m->pmatch[i].rm_eo;
594 		m->pmatch[i].rm_eo = sp - m->offp;
595 		dp = backref(m, sp, stop, ss+1, stopst, lev);
596 		if (dp != NULL)
597 			return(dp);
598 		m->pmatch[i].rm_eo = offsave;
599 		return(NULL);
600 		break;
601 	default:		/* uh oh */
602 		assert(0);
603 		break;
604 	}
605 
606 	/* "can't happen" */
607 	assert(0);
608 	/* NOTREACHED */
609 }
610 
611 /*
612  - fast - step through the string at top speed
613  */
614 static uchar *			/* where tentative match ended, or NULL */
615 fast(m, start, stop, startst, stopst)
616 register struct match *m;
617 uchar *start;
618 uchar *stop;
619 sopno startst;
620 sopno stopst;
621 {
622 	register states st = m->st;
623 	register states fresh = m->fresh;
624 	register states tmp = m->tmp;
625 	register uchar *p = start;
626 	register uchar c = (start == m->beginp) ? '\0' : *(start-1);
627 	register uchar lastc;	/* previous c */
628 	register int atbol;
629 	register int ateol;
630 	register uchar *coldp;	/* last p after which no match was underway */
631 
632 	CLEAR(st);
633 	SET1(st, startst);
634 	st = expand(m->g, startst, stopst, st, 0, 0);
635 	ASSIGN(fresh, st);
636 	SP("start", st, *p);
637 	coldp = NULL;
638 	for (;;) {
639 		/* next character */
640 		lastc = c;
641 		c = (p == m->endp) ? '\0' : *p;
642 		if (EQ(st, fresh))
643 			coldp = p;
644 
645 		/* is there an EOL and/or BOL between lastc and c? */
646 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
647 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
648 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
649 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
650 		if (atbol || ateol) {
651 			st = expand(m->g, startst, stopst, st, atbol, ateol);
652 			SP("bef", st, c);
653 		}
654 
655 		/* are we done? */
656 		if (ISSET(st, stopst) || p == stop)
657 			break;		/* NOTE BREAK OUT */
658 
659 		/* no, we must deal with this character */
660 		ASSIGN(tmp, st);
661 		ASSIGN(st, fresh);
662 		st = step(m->g, startst, stopst, tmp, c, st);
663 		SP("aft", st, c);
664 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
665 		p++;
666 	}
667 
668 	assert(coldp != NULL);
669 	m->coldp = coldp;
670 	if (ISSET(st, stopst))
671 		return(p+1);
672 	else
673 		return(NULL);
674 }
675 
676 /*
677  - slow - step through the string more deliberately
678  */
679 static uchar *			/* where it ended */
680 slow(m, start, stop, startst, stopst)
681 register struct match *m;
682 uchar *start;
683 uchar *stop;
684 sopno startst;
685 sopno stopst;
686 {
687 	register states st = m->st;
688 	register states empty = m->empty;
689 	register states tmp = m->tmp;
690 	register uchar *p = start;
691 	register uchar c = (start == m->beginp) ? '\0' : *(start-1);
692 	register uchar lastc;	/* previous c */
693 	register int atbol;
694 	register int ateol;
695 	register uchar *matchp;	/* last p at which a match ended */
696 
697 	AT("slow", start, stop, startst, stopst);
698 	CLEAR(st);
699 	SET1(st, startst);
700 	SP("sstart", st, *p);
701 	matchp = NULL;
702 	for (;;) {
703 		/* next character */
704 		lastc = c;
705 		c = (p == m->endp) ? '\0' : *p;
706 
707 		/* is there an EOL and/or BOL between lastc and c? */
708 		atbol = ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
709 				(lastc == '\0' && !(m->eflags&REG_NOTBOL)) );
710 		ateol = ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
711 				(c == '\0' && !(m->eflags&REG_NOTEOL)) );
712 
713 		/* do we need an expansion before looking at the char? */
714 		if (p == start || atbol || ateol) {
715 			st = expand(m->g, startst, stopst, st, atbol, ateol);
716 			SP("sbef", st, c);
717 		}
718 
719 		/* are we done? */
720 		if (ISSET(st, stopst))
721 			matchp = p;
722 		if (EQ(st, empty) || p == stop)
723 			break;		/* NOTE BREAK OUT */
724 
725 		/* no, we must deal with this character */
726 		ASSIGN(tmp, st);
727 		ASSIGN(st, empty);
728 		st = step(m->g, startst, stopst, tmp, c, st);
729 		SP("saft", st, c);
730 		assert(EQ(expand(m->g, startst, stopst, st, 0, 0), st));
731 		p++;
732 	}
733 
734 	return(matchp);
735 }
736 
737 
738 /*
739  - expand - return set of states reachable from an initial set
740  */
741 static states
742 expand(g, start, stop, st, atbol, ateol)
743 register struct re_guts *g;
744 int start;			/* start state within strip */
745 int stop;			/* state after stop state within strip */
746 register states st;
747 int atbol;			/* at start or just after \n? (for BOL) */
748 int ateol;			/* just before \n or \0? (for EOL) */
749 {
750 	register sop s;
751 	register sopno pc;
752 	register onestate here;		/* note, macros know this name */
753 	register sopno look;
754 	register int i;
755 
756 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
757 		s = g->strip[pc];
758 		switch (OP(s)) {
759 		case OEND:
760 			assert(pc == stop-1);
761 			break;
762 		case OCHAR:		/* can't get past this */
763 			break;
764 		case OBOL:
765 			if (atbol)
766 				FWD(st, st, 1);
767 			break;
768 		case OEOL:
769 			if (ateol)
770 				FWD(st, st, 1);
771 			break;
772 		case OANY:		/* can't get past this either */
773 			break;
774 		case OANYOF:		/* or this */
775 			break;
776 		case OBACK_:		/* not significant here */
777 		case O_BACK:
778 			FWD(st, st, 1);
779 			break;
780 		case OPLUS_:		/* forward, this is just an empty */
781 			FWD(st, st, 1);
782 			break;
783 		case O_PLUS:		/* both forward and back */
784 			FWD(st, st, 1);
785 			i = ISSETBACK(st, OPND(s));
786 			BACK(st, st, OPND(s));
787 			if (!i && ISSETBACK(st, OPND(s))) {
788 				/* oho, must reconsider loop body */
789 				pc -= OPND(s) + 1;
790 				INIT(here, pc);
791 			}
792 			break;
793 		case OQUEST_:		/* two branches, both forward */
794 			FWD(st, st, 1);
795 			FWD(st, st, OPND(s));
796 			break;
797 		case O_QUEST:		/* just an empty */
798 			FWD(st, st, 1);
799 			break;
800 		case OLPAREN:		/* not significant here */
801 		case ORPAREN:
802 			FWD(st, st, 1);
803 			break;
804 		case OCH_:		/* mark the first two branches */
805 			FWD(st, st, 1);
806 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
807 			FWD(st, st, OPND(s));
808 			break;
809 		case OOR1:		/* done a branch, find the O_CH */
810 			if (ISSTATEIN(st, here)) {
811 				for (look = 1;
812 						OP(s = g->strip[pc+look]) != O_CH;
813 						look += OPND(s))
814 					assert(OP(s) == OOR2);
815 				FWD(st, st, look);
816 			}
817 			break;
818 		case OOR2:		/* propagate OCH_'s marking onward */
819 			FWD(st, st, 1);
820 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
821 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
822 				FWD(st, st, OPND(s));
823 			}
824 			break;
825 		case O_CH:		/* just an empty */
826 			FWD(st, st, 1);
827 			break;
828 		default:		/* ooooops... */
829 			assert(0);
830 			break;
831 		}
832 	}
833 
834 	return(st);
835 }
836 
837 /*
838  - step - map set of states reachable before char to set reachable after
839  */
840 static states
841 step(g, start, stop, bef, ch, aft)
842 register struct re_guts *g;
843 int start;			/* start state within strip */
844 int stop;			/* state after stop state within strip */
845 register states bef;		/* states reachable before */
846 u_int ch;
847 register states aft;		/* states already known reachable after */
848 {
849 	register cset *cs;
850 	register sop s;
851 	register sopno pc;
852 	register onestate here;		/* note, macros know this name */
853 	register sopno look;
854 	register int i;
855 
856 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
857 		s = g->strip[pc];
858 		switch (OP(s)) {
859 		case OEND:
860 			assert(pc == stop-1);
861 			break;
862 		case OCHAR:
863 			if (ch == OPND(s))
864 				FWD(aft, bef, 1);
865 			break;
866 		case OBOL:	/* these are looked after by expand() */
867 		case OEOL:
868 			break;
869 		case OANY:
870 			FWD(aft, bef, 1);
871 			break;
872 		case OANYOF:
873 			cs = &g->sets[OPND(s)];
874 			if (CHIN(cs, ch))
875 				FWD(aft, bef, 1);
876 			break;
877 		case OBACK_:		/* ignored here */
878 		case O_BACK:
879 			FWD(aft, aft, 1);
880 			break;
881 		case OPLUS_:		/* forward, this is just an empty */
882 			FWD(aft, aft, 1);
883 			break;
884 		case O_PLUS:		/* both forward and back */
885 			FWD(aft, aft, 1);
886 			i = ISSETBACK(aft, OPND(s));
887 			BACK(aft, aft, OPND(s));
888 			if (!i && ISSETBACK(aft, OPND(s))) {
889 				/* oho, must reconsider loop body */
890 				pc -= OPND(s) + 1;
891 				INIT(here, pc);
892 			}
893 			break;
894 		case OQUEST_:		/* two branches, both forward */
895 			FWD(aft, aft, 1);
896 			FWD(aft, aft, OPND(s));
897 			break;
898 		case O_QUEST:		/* just an empty */
899 			FWD(aft, aft, 1);
900 			break;
901 		case OLPAREN:		/* not significant here */
902 		case ORPAREN:
903 			FWD(aft, aft, 1);
904 			break;
905 		case OCH_:		/* mark the first two branches */
906 			FWD(aft, aft, 1);
907 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
908 			FWD(aft, aft, OPND(s));
909 			break;
910 		case OOR1:		/* done a branch, find the O_CH */
911 			if (ISSTATEIN(aft, here)) {
912 				for (look = 1;
913 						OP(s = g->strip[pc+look]) != O_CH;
914 						look += OPND(s))
915 					assert(OP(s) == OOR2);
916 				FWD(aft, aft, look);
917 			}
918 			break;
919 		case OOR2:		/* propagate OCH_'s marking */
920 			FWD(aft, aft, 1);
921 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
922 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
923 				FWD(aft, aft, OPND(s));
924 			}
925 			break;
926 		case O_CH:		/* just empty */
927 			FWD(aft, aft, 1);
928 			break;
929 		default:		/* ooooops... */
930 			assert(0);
931 			break;
932 		}
933 	}
934 
935 	return(aft);
936 }
937 
938 #ifdef REDEBUG
939 /*
940  - print - print a set of states
941  */
942 static void
943 print(m, caption, st, ch, d)
944 struct match *m;
945 char *caption;
946 states st;
947 u_int ch;
948 FILE *d;
949 {
950 	register struct re_guts *g = m->g;
951 	register int i;
952 	register int first = 1;
953 
954 	if (!(m->eflags&REG_TRACE))
955 		return;
956 
957 	fprintf(d, "%s", caption);
958 	if (ch != '\0')
959 		fprintf(d, " %s", pchar(ch));
960 	for (i = 0; i < g->nstates; i++)
961 		if (ISSET(st, i)) {
962 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
963 			first = 0;
964 		}
965 	fprintf(d, "\n");
966 }
967 
968 /*
969  - at - print current situation
970  */
971 static void
972 at(m, title, start, stop, startst, stopst)
973 struct match *m;
974 char *title;
975 uchar *start;
976 uchar *stop;
977 sopno startst;
978 sopno stopst;
979 {
980 	if (!(m->eflags&REG_TRACE))
981 		return;
982 
983 	printf("%s %s-", title, pchar(*start));
984 	printf("%s ", pchar(*stop));
985 	printf("%ld-%ld\n", (long)startst, (long)stopst);
986 }
987 
988 #ifndef PCHARDONE
989 #define	PCHARDONE	/* never again */
990 /*
991  - pchar - make a character printable
992  *
993  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
994  * duplicate here avoids having a debugging-capable regexec.o tied to
995  * a matching debug.o, and this is convenient.  It all disappears in
996  * the non-debug compilation anyway, so it doesn't matter much.
997  */
998 static char *			/* -> representation */
999 pchar(ch)
1000 int ch;
1001 {
1002 	static char pbuf[10];
1003 
1004 	if (isprint(ch) || ch == ' ')
1005 		sprintf(pbuf, "%c", ch);
1006 	else
1007 		sprintf(pbuf, "\\%o", ch);
1008 	return(pbuf);
1009 }
1010 #endif
1011 #endif
1012 
1013 #undef	matcher
1014 #undef	fast
1015 #undef	slow
1016 #undef	dissect
1017 #undef	backref
1018 #undef	expand
1019 #undef	step
1020 #undef	print
1021 #undef	at
1022 #undef	match
1023