xref: /original-bsd/lib/libc/regex/engine.c (revision 4092c5cc)
1 /*-
2  * Copyright (c) 1992 Henry Spencer.
3  * Copyright (c) 1992, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Henry Spencer of the University of Toronto.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)engine.c	8.1 (Berkeley) 06/04/93
12  */
13 
14 /*
15  * The matching engine and friends.  This file is #included by regexec.c
16  * after suitable #defines of a variety of macros used herein, so that
17  * different state representations can be used without duplicating masses
18  * of code.
19  */
20 
21 #ifdef SNAMES
22 #define	matcher	smatcher
23 #define	fast	sfast
24 #define	slow	sslow
25 #define	dissect	sdissect
26 #define	backref	sbackref
27 #define	step	sstep
28 #define	print	sprint
29 #define	at	sat
30 #define	match	smat
31 #endif
32 #ifdef LNAMES
33 #define	matcher	lmatcher
34 #define	fast	lfast
35 #define	slow	lslow
36 #define	dissect	ldissect
37 #define	backref	lbackref
38 #define	step	lstep
39 #define	print	lprint
40 #define	at	lat
41 #define	match	lmat
42 #endif
43 
44 /* another structure passed up and down to avoid zillions of parameters */
45 struct match {
46 	struct re_guts *g;
47 	int eflags;
48 	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
49 	char *offp;		/* offsets work from here */
50 	char *beginp;		/* start of string -- virtual NUL precedes */
51 	char *endp;		/* end of string -- virtual NUL here */
52 	char *coldp;		/* can be no match starting before here */
53 	char **lastpos;		/* [nplus+1] */
54 	STATEVARS;
55 	states st;		/* current states */
56 	states fresh;		/* states for a fresh start */
57 	states tmp;		/* temporary */
58 	states empty;		/* empty set of states */
59 };
60 
61 static int matcher(/*register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags*/);
62 static char *dissect(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst*/);
63 static char *backref(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev*/);
64 static char *fast(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst*/);
65 static char *slow(/*register struct match *m, char *start, char *stop, sopno startst, sopno stopst*/);
66 static states step(/*register struct re_guts *g, int start, int stop, register states bef, int ch, register states aft*/);
67 #define	BOL	(OUT+1)
68 #define	EOL	(BOL+1)
69 #define	BOLEOL	(BOL+2)
70 #define	NOTHING	(BOL+3)
71 #define	BOW	(BOL+4)
72 #define	EOW	(BOL+5)
73 #define	CODEMAX	(BOL+5)		/* highest code used */
74 #define	NONCHAR(c)	((c) > CHAR_MAX)
75 #define	NNONCHAR	(CODEMAX-CHAR_MAX)
76 #ifdef REDEBUG
77 static void print(/*struct match *m, char *caption, states st, int ch, FILE *d*/);
78 #endif
79 #ifdef REDEBUG
80 static void at(/*struct match *m, char *title, char *start, char *stop, sopno startst, stopno stopst*/);
81 #endif
82 #ifdef REDEBUG
83 static char *pchar(/*int ch*/);
84 #endif
85 
86 #ifdef REDEBUG
87 #define	SP(t, s, c)	print(m, t, s, c, stdout)
88 #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
89 #define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
90 #else
91 #define	SP(t, s, c)	/* nothing */
92 #define	AT(t, p1, p2, s1, s2)	/* nothing */
93 #define	NOTE(s)	/* nothing */
94 #endif
95 
96 /*
97  - matcher - the actual matching engine
98  == static int matcher(register struct re_guts *g, char *string, \
99  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
100  */
101 static int			/* 0 success, REG_NOMATCH failure */
102 matcher(g, string, nmatch, pmatch, eflags)
103 register struct re_guts *g;
104 char *string;
105 size_t nmatch;
106 regmatch_t pmatch[];
107 int eflags;
108 {
109 	register char *endp;
110 	register int i;
111 	struct match mv;
112 	register struct match *m = &mv;
113 	register char *dp;
114 	const register sopno gf = g->firststate+1;	/* +1 for OEND */
115 	const register sopno gl = g->laststate;
116 	char *start;
117 	char *stop;
118 
119 	/* simplify the situation where possible */
120 	if (g->cflags&REG_NOSUB)
121 		nmatch = 0;
122 	if (eflags&REG_STARTEND) {
123 		start = string + pmatch[0].rm_so;
124 		stop = string + pmatch[0].rm_eo;
125 	} else {
126 		start = string;
127 		stop = start + strlen(start);
128 	}
129 	if (stop < start)
130 		return(REG_INVARG);
131 
132 	/* prescreening; this does wonders for this rather slow code */
133 	if (g->must != NULL) {
134 		for (dp = start; dp < stop; dp++)
135 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
136 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
137 				break;
138 		if (dp == stop)		/* we didn't find g->must */
139 			return(REG_NOMATCH);
140 	}
141 
142 	/* match struct setup */
143 	m->g = g;
144 	m->eflags = eflags;
145 	m->pmatch = NULL;
146 	m->lastpos = NULL;
147 	m->offp = string;
148 	m->beginp = start;
149 	m->endp = stop;
150 	STATESETUP(m, 4);
151 	SETUP(m->st);
152 	SETUP(m->fresh);
153 	SETUP(m->tmp);
154 	SETUP(m->empty);
155 	CLEAR(m->empty);
156 
157 	/* this loop does only one repetition except for backrefs */
158 	for (;;) {
159 		endp = fast(m, start, stop, gf, gl);
160 		if (endp == NULL) {		/* a miss */
161 			STATETEARDOWN(m);
162 			return(REG_NOMATCH);
163 		}
164 		if (nmatch == 0 && !g->backrefs)
165 			break;		/* no further info needed */
166 
167 		/* where? */
168 		assert(m->coldp != NULL);
169 		for (;;) {
170 			NOTE("finding start");
171 			endp = slow(m, m->coldp, stop, gf, gl);
172 			if (endp != NULL)
173 				break;
174 			assert(m->coldp < m->endp);
175 			m->coldp++;
176 		}
177 		if (nmatch == 1 && !g->backrefs)
178 			break;		/* no further info needed */
179 
180 		/* oh my, he wants the subexpressions... */
181 		if (m->pmatch == NULL)
182 			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
183 							sizeof(regmatch_t));
184 		if (m->pmatch == NULL) {
185 			STATETEARDOWN(m);
186 			return(REG_ESPACE);
187 		}
188 		for (i = 1; i <= m->g->nsub; i++)
189 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
190 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
191 			NOTE("dissecting");
192 			dp = dissect(m, m->coldp, endp, gf, gl);
193 		} else {
194 			if (g->nplus > 0 && m->lastpos == NULL)
195 				m->lastpos = (char **)malloc((g->nplus+1) *
196 							sizeof(char *));
197 			if (g->nplus > 0 && m->lastpos == NULL) {
198 				free(m->pmatch);
199 				STATETEARDOWN(m);
200 				return(REG_ESPACE);
201 			}
202 			NOTE("backref dissect");
203 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
204 		}
205 		if (dp != NULL)
206 			break;
207 
208 		/* uh-oh... we couldn't find a subexpression-level match */
209 		assert(g->backrefs);	/* must be back references doing it */
210 		assert(g->nplus == 0 || m->lastpos != NULL);
211 		for (;;) {
212 			if (dp != NULL || endp <= m->coldp)
213 				break;		/* defeat */
214 			NOTE("backoff");
215 			endp = slow(m, m->coldp, endp-1, gf, gl);
216 			if (endp == NULL)
217 				break;		/* defeat */
218 			/* try it on a shorter possibility */
219 #ifndef NDEBUG
220 			for (i = 1; i <= m->g->nsub; i++) {
221 				assert(m->pmatch[i].rm_so == -1);
222 				assert(m->pmatch[i].rm_eo == -1);
223 			}
224 #endif
225 			NOTE("backoff dissect");
226 			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
227 		}
228 		assert(dp == NULL || dp == endp);
229 		if (dp != NULL)		/* found a shorter one */
230 			break;
231 
232 		/* despite initial appearances, there is no match here */
233 		NOTE("false alarm");
234 		start = m->coldp + 1;	/* recycle starting later */
235 		assert(start <= stop);
236 	}
237 
238 	/* fill in the details if requested */
239 	if (nmatch > 0) {
240 		pmatch[0].rm_so = m->coldp - m->offp;
241 		pmatch[0].rm_eo = endp - m->offp;
242 	}
243 	if (nmatch > 1) {
244 		assert(m->pmatch != NULL);
245 		for (i = 1; i < nmatch; i++)
246 			if (i <= m->g->nsub)
247 				pmatch[i] = m->pmatch[i];
248 			else {
249 				pmatch[i].rm_so = -1;
250 				pmatch[i].rm_eo = -1;
251 			}
252 	}
253 
254 	if (m->pmatch != NULL)
255 		free((char *)m->pmatch);
256 	if (m->lastpos != NULL)
257 		free((char *)m->lastpos);
258 	STATETEARDOWN(m);
259 	return(0);
260 }
261 
262 /*
263  - dissect - figure out what matched what, no back references
264  == static char *dissect(register struct match *m, char *start, \
265  ==	char *stop, sopno startst, sopno stopst);
266  */
267 static char *			/* == stop (success) always */
268 dissect(m, start, stop, startst, stopst)
269 register struct match *m;
270 char *start;
271 char *stop;
272 sopno startst;
273 sopno stopst;
274 {
275 	register int i;
276 	register sopno ss;	/* start sop of current subRE */
277 	register sopno es;	/* end sop of current subRE */
278 	register char *sp;	/* start of string matched by it */
279 	register char *stp;	/* string matched by it cannot pass here */
280 	register char *rest;	/* start of rest of string */
281 	register char *tail;	/* string unmatched by rest of RE */
282 	register sopno ssub;	/* start sop of subsubRE */
283 	register sopno esub;	/* end sop of subsubRE */
284 	register char *ssp;	/* start of string matched by subsubRE */
285 	register char *sep;	/* end of string matched by subsubRE */
286 	register char *oldssp;	/* previous ssp */
287 	register char *dp;
288 
289 	AT("diss", start, stop, startst, stopst);
290 	sp = start;
291 	for (ss = startst; ss < stopst; ss = es) {
292 		/* identify end of subRE */
293 		es = ss;
294 		switch (OP(m->g->strip[es])) {
295 		case OPLUS_:
296 		case OQUEST_:
297 			es += OPND(m->g->strip[es]);
298 			break;
299 		case OCH_:
300 			while (OP(m->g->strip[es]) != O_CH)
301 				es += OPND(m->g->strip[es]);
302 			break;
303 		}
304 		es++;
305 
306 		/* figure out what it matched */
307 		switch (OP(m->g->strip[ss])) {
308 		case OEND:
309 			assert(nope);
310 			break;
311 		case OCHAR:
312 			sp++;
313 			break;
314 		case OBOL:
315 		case OEOL:
316 		case OBOW:
317 		case OEOW:
318 			break;
319 		case OANY:
320 		case OANYOF:
321 			sp++;
322 			break;
323 		case OBACK_:
324 		case O_BACK:
325 			assert(nope);
326 			break;
327 		/* cases where length of match is hard to find */
328 		case OQUEST_:
329 			stp = stop;
330 			for (;;) {
331 				/* how long could this one be? */
332 				rest = slow(m, sp, stp, ss, es);
333 				assert(rest != NULL);	/* it did match */
334 				/* could the rest match the rest? */
335 				tail = slow(m, rest, stop, es, stopst);
336 				if (tail == stop)
337 					break;		/* yes! */
338 				/* no -- try a shorter match for this one */
339 				stp = rest - 1;
340 				assert(stp >= sp);	/* it did work */
341 			}
342 			ssub = ss + 1;
343 			esub = es - 1;
344 			/* did innards match? */
345 			if (slow(m, sp, rest, ssub, esub) != NULL) {
346 				dp = dissect(m, sp, rest, ssub, esub);
347 				assert(dp == rest);
348 			} else		/* no */
349 				assert(sp == rest);
350 			sp = rest;
351 			break;
352 		case OPLUS_:
353 			stp = stop;
354 			for (;;) {
355 				/* how long could this one be? */
356 				rest = slow(m, sp, stp, ss, es);
357 				assert(rest != NULL);	/* it did match */
358 				/* could the rest match the rest? */
359 				tail = slow(m, rest, stop, es, stopst);
360 				if (tail == stop)
361 					break;		/* yes! */
362 				/* no -- try a shorter match for this one */
363 				stp = rest - 1;
364 				assert(stp >= sp);	/* it did work */
365 			}
366 			ssub = ss + 1;
367 			esub = es - 1;
368 			ssp = sp;
369 			oldssp = ssp;
370 			for (;;) {	/* find last match of innards */
371 				sep = slow(m, ssp, rest, ssub, esub);
372 				if (sep == NULL || sep == ssp)
373 					break;	/* failed or matched null */
374 				oldssp = ssp;	/* on to next try */
375 				ssp = sep;
376 			}
377 			if (sep == NULL) {
378 				/* last successful match */
379 				sep = ssp;
380 				ssp = oldssp;
381 			}
382 			assert(sep == rest);	/* must exhaust substring */
383 			assert(slow(m, ssp, sep, ssub, esub) == rest);
384 			dp = dissect(m, ssp, sep, ssub, esub);
385 			assert(dp == sep);
386 			sp = rest;
387 			break;
388 		case OCH_:
389 			stp = stop;
390 			for (;;) {
391 				/* how long could this one be? */
392 				rest = slow(m, sp, stp, ss, es);
393 				assert(rest != NULL);	/* it did match */
394 				/* could the rest match the rest? */
395 				tail = slow(m, rest, stop, es, stopst);
396 				if (tail == stop)
397 					break;		/* yes! */
398 				/* no -- try a shorter match for this one */
399 				stp = rest - 1;
400 				assert(stp >= sp);	/* it did work */
401 			}
402 			ssub = ss + 1;
403 			esub = ss + OPND(m->g->strip[ss]) - 1;
404 			assert(OP(m->g->strip[esub]) == OOR1);
405 			for (;;) {	/* find first matching branch */
406 				if (slow(m, sp, rest, ssub, esub) == rest)
407 					break;	/* it matched all of it */
408 				/* that one missed, try next one */
409 				assert(OP(m->g->strip[esub]) == OOR1);
410 				esub++;
411 				assert(OP(m->g->strip[esub]) == OOR2);
412 				ssub = esub + 1;
413 				esub += OPND(m->g->strip[esub]);
414 				if (OP(m->g->strip[esub]) == OOR2)
415 					esub--;
416 				else
417 					assert(OP(m->g->strip[esub]) == O_CH);
418 			}
419 			dp = dissect(m, sp, rest, ssub, esub);
420 			assert(dp == rest);
421 			sp = rest;
422 			break;
423 		case O_PLUS:
424 		case O_QUEST:
425 		case OOR1:
426 		case OOR2:
427 		case O_CH:
428 			assert(nope);
429 			break;
430 		case OLPAREN:
431 			i = OPND(m->g->strip[ss]);
432 			m->pmatch[i].rm_so = sp - m->offp;
433 			break;
434 		case ORPAREN:
435 			i = OPND(m->g->strip[ss]);
436 			m->pmatch[i].rm_eo = sp - m->offp;
437 			break;
438 		default:		/* uh oh */
439 			assert(nope);
440 			break;
441 		}
442 	}
443 
444 	assert(sp == stop);
445 	return(sp);
446 }
447 
448 /*
449  - backref - figure out what matched what, figuring in back references
450  == static char *backref(register struct match *m, char *start, \
451  ==	char *stop, sopno startst, sopno stopst, sopno lev);
452  */
453 static char *			/* == stop (success) or NULL (failure) */
454 backref(m, start, stop, startst, stopst, lev)
455 register struct match *m;
456 char *start;
457 char *stop;
458 sopno startst;
459 sopno stopst;
460 sopno lev;			/* PLUS nesting level */
461 {
462 	register int i;
463 	register sopno ss;	/* start sop of current subRE */
464 	register char *sp;	/* start of string matched by it */
465 	register sopno ssub;	/* start sop of subsubRE */
466 	register sopno esub;	/* end sop of subsubRE */
467 	register char *ssp;	/* start of string matched by subsubRE */
468 	register char *dp;
469 	register size_t len;
470 	register int hard;
471 	register sop s;
472 	register regoff_t offsave;
473 	register cset *cs;
474 
475 	AT("back", start, stop, startst, stopst);
476 	sp = start;
477 
478 	/* get as far as we can with easy stuff */
479 	hard = 0;
480 	for (ss = startst; !hard && ss < stopst; ss++)
481 		switch (OP(s = m->g->strip[ss])) {
482 		case OCHAR:
483 			if (sp == stop || *sp++ != (char)OPND(s))
484 				return(NULL);
485 			break;
486 		case OANY:
487 			if (sp == stop)
488 				return(NULL);
489 			sp++;
490 			break;
491 		case OANYOF:
492 			cs = &m->g->sets[OPND(s)];
493 			if (sp == stop || !CHIN(cs, *sp++))
494 				return(NULL);
495 			break;
496 		case OBOL:
497 			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
498 					(sp < m->endp && *(sp-1) == '\n' &&
499 						(m->g->cflags&REG_NEWLINE)) )
500 				{ /* yes */ }
501 			else
502 				return(NULL);
503 			break;
504 		case OEOL:
505 			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
506 					(sp < m->endp && *sp == '\n' &&
507 						(m->g->cflags&REG_NEWLINE)) )
508 				{ /* yes */ }
509 			else
510 				return(NULL);
511 			break;
512 		case OBOW:
513 			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
514 					(sp < m->endp && *(sp-1) == '\n' &&
515 						(m->g->cflags&REG_NEWLINE)) ||
516 					(sp > m->beginp &&
517 							!isalnum(*(sp-1))) ) &&
518 					(sp < m->endp && isalnum(*sp)) )
519 				{ /* yes */ }
520 			else
521 				return(NULL);
522 			break;
523 		case OEOW:
524 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
525 					(sp < m->endp && *sp == '\n' &&
526 						(m->g->cflags&REG_NEWLINE)) ||
527 					(sp < m->endp && !isalnum(*sp)) ) &&
528 					(sp > m->beginp && isalnum(*(sp-1))) )
529 				{ /* yes */ }
530 			else
531 				return(NULL);
532 			break;
533 		case O_QUEST:
534 			break;
535 		case OOR1:	/* matches null but needs to skip */
536 			ss++;
537 			s = m->g->strip[ss];
538 			do {
539 				assert(OP(s) == OOR2);
540 				ss += OPND(s);
541 			} while (OP(s = m->g->strip[ss]) != O_CH);
542 			/* note that the ss++ gets us past the O_CH */
543 			break;
544 		default:	/* have to make a choice */
545 			hard = 1;
546 			break;
547 		}
548 	if (!hard) {		/* that was it! */
549 		if (sp != stop)
550 			return(NULL);
551 		return(sp);
552 	}
553 	ss--;			/* adjust for the for's final increment */
554 
555 	/* the hard stuff */
556 	AT("hard", sp, stop, ss, stopst);
557 	s = m->g->strip[ss];
558 	switch (OP(s)) {
559 	case OBACK_:		/* the vilest depths */
560 		i = OPND(s);
561 		if (m->pmatch[i].rm_eo == -1)
562 			return(NULL);
563 		assert(m->pmatch[i].rm_so != -1);
564 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
565 		assert(stop - m->beginp >= len);
566 		if (sp > stop - len)
567 			return(NULL);	/* not enough left to match */
568 		ssp = m->offp + m->pmatch[i].rm_so;
569 		if (memcmp(sp, ssp, len) != 0)
570 			return(NULL);
571 		while (m->g->strip[ss] != SOP(O_BACK, i))
572 			ss++;
573 		return(backref(m, sp+len, stop, ss+1, stopst, lev));
574 		break;
575 	case OQUEST_:		/* to null or not */
576 		dp = backref(m, sp, stop, ss+1, stopst, lev);
577 		if (dp != NULL)
578 			return(dp);	/* not */
579 		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
580 		break;
581 	case OPLUS_:
582 		assert(m->lastpos != NULL);
583 		assert(lev+1 <= m->g->nplus);
584 		m->lastpos[lev+1] = sp;
585 		return(backref(m, sp, stop, ss+1, stopst, lev+1));
586 		break;
587 	case O_PLUS:
588 		if (sp == m->lastpos[lev])	/* last pass matched null */
589 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
590 		/* try another pass */
591 		m->lastpos[lev] = sp;
592 		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
593 		if (dp == NULL)
594 			return(backref(m, sp, stop, ss+1, stopst, lev-1));
595 		else
596 			return(dp);
597 		break;
598 	case OCH_:		/* find the right one, if any */
599 		ssub = ss + 1;
600 		esub = ss + OPND(s) - 1;
601 		assert(OP(m->g->strip[esub]) == OOR1);
602 		for (;;) {	/* find first matching branch */
603 			dp = backref(m, sp, stop, ssub, esub, lev);
604 			if (dp != NULL)
605 				return(dp);
606 			/* that one missed, try next one */
607 			if (OP(m->g->strip[esub]) == O_CH)
608 				return(NULL);	/* there is none */
609 			esub++;
610 			assert(OP(m->g->strip[esub]) == OOR2);
611 			ssub = esub + 1;
612 			esub += OPND(m->g->strip[esub]);
613 			if (OP(m->g->strip[esub]) == OOR2)
614 				esub--;
615 			else
616 				assert(OP(m->g->strip[esub]) == O_CH);
617 		}
618 		break;
619 	case OLPAREN:		/* must undo assignment if rest fails */
620 		i = OPND(s);
621 		offsave = m->pmatch[i].rm_so;
622 		m->pmatch[i].rm_so = sp - m->offp;
623 		dp = backref(m, sp, stop, ss+1, stopst, lev);
624 		if (dp != NULL)
625 			return(dp);
626 		m->pmatch[i].rm_so = offsave;
627 		return(NULL);
628 		break;
629 	case ORPAREN:		/* must undo assignment if rest fails */
630 		i = OPND(s);
631 		offsave = m->pmatch[i].rm_eo;
632 		m->pmatch[i].rm_eo = sp - m->offp;
633 		dp = backref(m, sp, stop, ss+1, stopst, lev);
634 		if (dp != NULL)
635 			return(dp);
636 		m->pmatch[i].rm_eo = offsave;
637 		return(NULL);
638 		break;
639 	default:		/* uh oh */
640 		assert(nope);
641 		break;
642 	}
643 
644 	/* "can't happen" */
645 	assert(nope);
646 	/* NOTREACHED */
647 }
648 
649 /*
650  - fast - step through the string at top speed
651  == static char *fast(register struct match *m, char *start, \
652  ==	char *stop, sopno startst, sopno stopst);
653  */
654 static char *			/* where tentative match ended, or NULL */
655 fast(m, start, stop, startst, stopst)
656 register struct match *m;
657 char *start;
658 char *stop;
659 sopno startst;
660 sopno stopst;
661 {
662 	register states st = m->st;
663 	register states fresh = m->fresh;
664 	register states tmp = m->tmp;
665 	register char *p = start;
666 	register int c = (start == m->beginp) ? OUT : *(start-1);
667 	register int lastc;	/* previous c */
668 	register int flagch;
669 	register int i;
670 	register char *coldp;	/* last p after which no match was underway */
671 
672 	CLEAR(st);
673 	SET1(st, startst);
674 	st = step(m->g, startst, stopst, st, NOTHING, st);
675 	ASSIGN(fresh, st);
676 	SP("start", st, *p);
677 	coldp = NULL;
678 	for (;;) {
679 		/* next character */
680 		lastc = c;
681 		c = (p == m->endp) ? OUT : *p;
682 		if (EQ(st, fresh))
683 			coldp = p;
684 
685 		/* is there an EOL and/or BOL between lastc and c? */
686 		flagch = '\0';
687 		i = 0;
688 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
689 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
690 			flagch = BOL;
691 			i = m->g->nbol;
692 		}
693 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
694 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
695 			flagch = (flagch == BOL) ? BOLEOL : EOL;
696 			i += m->g->neol;
697 		}
698 		if (i != 0) {
699 			for (; i > 0; i--)
700 				st = step(m->g, startst, stopst, st, flagch, st);
701 			SP("boleol", st, c);
702 		}
703 
704 		/* how about a word boundary? */
705 		if ( (flagch == BOL || (lastc != OUT && !isalnum(lastc))) &&
706 					(c != OUT && isalnum(c)) ) {
707 			flagch = BOW;
708 		}
709 		if ( (lastc != OUT && isalnum(lastc)) &&
710 				(flagch == EOL || (c != OUT && !isalnum(c))) ) {
711 			flagch = EOW;
712 		}
713 		if (flagch == BOW || flagch == EOW) {
714 			st = step(m->g, startst, stopst, st, flagch, st);
715 			SP("boweow", st, c);
716 		}
717 
718 		/* are we done? */
719 		if (ISSET(st, stopst) || p == stop)
720 			break;		/* NOTE BREAK OUT */
721 
722 		/* no, we must deal with this character */
723 		ASSIGN(tmp, st);
724 		ASSIGN(st, fresh);
725 		assert(c != OUT);
726 		st = step(m->g, startst, stopst, tmp, c, st);
727 		SP("aft", st, c);
728 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
729 		p++;
730 	}
731 
732 	assert(coldp != NULL);
733 	m->coldp = coldp;
734 	if (ISSET(st, stopst))
735 		return(p+1);
736 	else
737 		return(NULL);
738 }
739 
740 /*
741  - slow - step through the string more deliberately
742  == static char *slow(register struct match *m, char *start, \
743  ==	char *stop, sopno startst, sopno stopst);
744  */
745 static char *			/* where it ended */
746 slow(m, start, stop, startst, stopst)
747 register struct match *m;
748 char *start;
749 char *stop;
750 sopno startst;
751 sopno stopst;
752 {
753 	register states st = m->st;
754 	register states empty = m->empty;
755 	register states tmp = m->tmp;
756 	register char *p = start;
757 	register int c = (start == m->beginp) ? OUT : *(start-1);
758 	register int lastc;	/* previous c */
759 	register int flagch;
760 	register int i;
761 	register char *matchp;	/* last p at which a match ended */
762 
763 	AT("slow", start, stop, startst, stopst);
764 	CLEAR(st);
765 	SET1(st, startst);
766 	SP("sstart", st, *p);
767 	st = step(m->g, startst, stopst, st, NOTHING, st);
768 	matchp = NULL;
769 	for (;;) {
770 		/* next character */
771 		lastc = c;
772 		c = (p == m->endp) ? OUT : *p;
773 
774 		/* is there an EOL and/or BOL between lastc and c? */
775 		flagch = '\0';
776 		i = 0;
777 		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
778 				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
779 			flagch = BOL;
780 			i = m->g->nbol;
781 		}
782 		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
783 				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
784 			flagch = (flagch == BOL) ? BOLEOL : EOL;
785 			i += m->g->neol;
786 		}
787 		if (i != 0) {
788 			for (; i > 0; i--)
789 				st = step(m->g, startst, stopst, st, flagch, st);
790 			SP("sboleol", st, c);
791 		}
792 
793 		/* how about a word boundary? */
794 		if ( (flagch == BOL || (lastc != OUT && !isalnum(lastc))) &&
795 					(c != OUT && isalnum(c)) ) {
796 			flagch = BOW;
797 		}
798 		if ( (lastc != OUT && isalnum(lastc)) &&
799 				(flagch == EOL || (c != OUT && !isalnum(c))) ) {
800 			flagch = EOW;
801 		}
802 		if (flagch == BOW || flagch == EOW) {
803 			st = step(m->g, startst, stopst, st, flagch, st);
804 			SP("sboweow", st, c);
805 		}
806 
807 		/* are we done? */
808 		if (ISSET(st, stopst))
809 			matchp = p;
810 		if (EQ(st, empty) || p == stop)
811 			break;		/* NOTE BREAK OUT */
812 
813 		/* no, we must deal with this character */
814 		ASSIGN(tmp, st);
815 		ASSIGN(st, empty);
816 		assert(c != OUT);
817 		st = step(m->g, startst, stopst, tmp, c, st);
818 		SP("saft", st, c);
819 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
820 		p++;
821 	}
822 
823 	return(matchp);
824 }
825 
826 
827 /*
828  - step - map set of states reachable before char to set reachable after
829  == static states step(register struct re_guts *g, int start, int stop, \
830  ==	register states bef, int ch, register states aft);
831  == #define	BOL	(OUT+1)
832  == #define	EOL	(BOL+1)
833  == #define	BOLEOL	(BOL+2)
834  == #define	NOTHING	(BOL+3)
835  == #define	BOW	(BOL+4)
836  == #define	EOW	(BOL+5)
837  == #define	CODEMAX	(BOL+5)		// highest code used
838  == #define	NONCHAR(c)	((c) > CHAR_MAX)
839  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
840  */
841 static states
842 step(g, start, stop, bef, ch, aft)
843 register struct re_guts *g;
844 int start;			/* start state within strip */
845 int stop;			/* state after stop state within strip */
846 register states bef;		/* states reachable before */
847 int ch;				/* character or NONCHAR code */
848 register states aft;		/* states already known reachable after */
849 {
850 	register cset *cs;
851 	register sop s;
852 	register sopno pc;
853 	register onestate here;		/* note, macros know this name */
854 	register sopno look;
855 	register int i;
856 
857 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
858 		s = g->strip[pc];
859 		switch (OP(s)) {
860 		case OEND:
861 			assert(pc == stop-1);
862 			break;
863 		case OCHAR:
864 			/* only characters can match */
865 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
866 			if (ch == (char)OPND(s))
867 				FWD(aft, bef, 1);
868 			break;
869 		case OBOL:
870 			if (ch == BOL || ch == BOLEOL)
871 				FWD(aft, bef, 1);
872 			break;
873 		case OEOL:
874 			if (ch == EOL || ch == BOLEOL)
875 				FWD(aft, bef, 1);
876 			break;
877 		case OBOW:
878 			if (ch == BOW)
879 				FWD(aft, bef, 1);
880 			break;
881 		case OEOW:
882 			if (ch == EOW)
883 				FWD(aft, bef, 1);
884 			break;
885 		case OANY:
886 			if (!NONCHAR(ch))
887 				FWD(aft, bef, 1);
888 			break;
889 		case OANYOF:
890 			cs = &g->sets[OPND(s)];
891 			if (!NONCHAR(ch) && CHIN(cs, ch))
892 				FWD(aft, bef, 1);
893 			break;
894 		case OBACK_:		/* ignored here */
895 		case O_BACK:
896 			FWD(aft, aft, 1);
897 			break;
898 		case OPLUS_:		/* forward, this is just an empty */
899 			FWD(aft, aft, 1);
900 			break;
901 		case O_PLUS:		/* both forward and back */
902 			FWD(aft, aft, 1);
903 			i = ISSETBACK(aft, OPND(s));
904 			BACK(aft, aft, OPND(s));
905 			if (!i && ISSETBACK(aft, OPND(s))) {
906 				/* oho, must reconsider loop body */
907 				pc -= OPND(s) + 1;
908 				INIT(here, pc);
909 			}
910 			break;
911 		case OQUEST_:		/* two branches, both forward */
912 			FWD(aft, aft, 1);
913 			FWD(aft, aft, OPND(s));
914 			break;
915 		case O_QUEST:		/* just an empty */
916 			FWD(aft, aft, 1);
917 			break;
918 		case OLPAREN:		/* not significant here */
919 		case ORPAREN:
920 			FWD(aft, aft, 1);
921 			break;
922 		case OCH_:		/* mark the first two branches */
923 			FWD(aft, aft, 1);
924 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
925 			FWD(aft, aft, OPND(s));
926 			break;
927 		case OOR1:		/* done a branch, find the O_CH */
928 			if (ISSTATEIN(aft, here)) {
929 				for (look = 1;
930 						OP(s = g->strip[pc+look]) != O_CH;
931 						look += OPND(s))
932 					assert(OP(s) == OOR2);
933 				FWD(aft, aft, look);
934 			}
935 			break;
936 		case OOR2:		/* propagate OCH_'s marking */
937 			FWD(aft, aft, 1);
938 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
939 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
940 				FWD(aft, aft, OPND(s));
941 			}
942 			break;
943 		case O_CH:		/* just empty */
944 			FWD(aft, aft, 1);
945 			break;
946 		default:		/* ooooops... */
947 			assert(nope);
948 			break;
949 		}
950 	}
951 
952 	return(aft);
953 }
954 
955 #ifdef REDEBUG
956 /*
957  - print - print a set of states
958  == #ifdef REDEBUG
959  == static void print(struct match *m, char *caption, states st, \
960  ==	int ch, FILE *d);
961  == #endif
962  */
963 static void
964 print(m, caption, st, ch, d)
965 struct match *m;
966 char *caption;
967 states st;
968 int ch;
969 FILE *d;
970 {
971 	register struct re_guts *g = m->g;
972 	register int i;
973 	register int first = 1;
974 
975 	if (!(m->eflags&REG_TRACE))
976 		return;
977 
978 	fprintf(d, "%s", caption);
979 	if (ch != '\0')
980 		fprintf(d, " %s", pchar(ch));
981 	for (i = 0; i < g->nstates; i++)
982 		if (ISSET(st, i)) {
983 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
984 			first = 0;
985 		}
986 	fprintf(d, "\n");
987 }
988 
989 /*
990  - at - print current situation
991  == #ifdef REDEBUG
992  == static void at(struct match *m, char *title, char *start, char *stop, \
993  ==						sopno startst, stopno stopst);
994  == #endif
995  */
996 static void
997 at(m, title, start, stop, startst, stopst)
998 struct match *m;
999 char *title;
1000 char *start;
1001 char *stop;
1002 sopno startst;
1003 sopno stopst;
1004 {
1005 	if (!(m->eflags&REG_TRACE))
1006 		return;
1007 
1008 	printf("%s %s-", title, pchar(*start));
1009 	printf("%s ", pchar(*stop));
1010 	printf("%ld-%ld\n", (long)startst, (long)stopst);
1011 }
1012 
1013 #ifndef PCHARDONE
1014 #define	PCHARDONE	/* never again */
1015 /*
1016  - pchar - make a character printable
1017  == #ifdef REDEBUG
1018  == static char *pchar(int ch);
1019  == #endif
1020  *
1021  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1022  * duplicate here avoids having a debugging-capable regexec.o tied to
1023  * a matching debug.o, and this is convenient.  It all disappears in
1024  * the non-debug compilation anyway, so it doesn't matter much.
1025  */
1026 static char *			/* -> representation */
1027 pchar(ch)
1028 int ch;
1029 {
1030 	static char pbuf[10];
1031 
1032 	if (isprint(ch) || ch == ' ')
1033 		sprintf(pbuf, "%c", ch);
1034 	else
1035 		sprintf(pbuf, "\\%o", ch);
1036 	return(pbuf);
1037 }
1038 #endif
1039 #endif
1040 
1041 #undef	matcher
1042 #undef	fast
1043 #undef	slow
1044 #undef	dissect
1045 #undef	backref
1046 #undef	step
1047 #undef	print
1048 #undef	at
1049 #undef	match
1050