1 /*
2  * The matching engine and friends.  This file is #included by regexec.c
3  * after suitable #defines of a variety of macros used herein, so that
4  * different state representations can be used without duplicating masses
5  * of code.
6  */
7 
8 #ifdef SNAMES
9 #define	matcher	smatcher
10 #define	fast	sfast
11 #define	slow	sslow
12 #define	dissect	sdissect
13 #define	backref	sbackref
14 #define	step	sstep
15 #define	print	sprint
16 #define	at	sat
17 #define	match	smat
18 #endif
19 #ifdef LNAMES
20 #define	matcher	lmatcher
21 #define	fast	lfast
22 #define	slow	lslow
23 #define	dissect	ldissect
24 #define	backref	lbackref
25 #define	step	lstep
26 #define	print	lprint
27 #define	at	lat
28 #define	match	lmat
29 #endif
30 
31 /* another structure passed up and down to avoid zillions of parameters */
32 struct match {
33 	struct re_guts *g;
34 	int eflags;
35 	my_regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
36 	char *offp;		/* offsets work from here */
37 	char *beginp;		/* start of string -- virtual NUL precedes */
38 	char *endp;		/* end of string -- virtual NUL here */
39 	char *coldp;		/* can be no match starting before here */
40 	char **lastpos;		/* [nplus+1] */
41 	STATEVARS;
42 	states st;		/* current states */
43 	states fresh;		/* states for a fresh start */
44 	states tmp;		/* temporary */
45 	states empty;		/* empty set of states */
46 };
47 
48 #include "engine.ih"
49 
50 #ifdef REDEBUG
51 #define	SP(t, s, c)	print(m, t, s, c, stdout)
52 #define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
53 #define	NOTE(str)	{ if (m->eflags&MY_REG_TRACE) printf("=%s\n", (str)); }
54 #else
55 #define	SP(t, s, c)	/* nothing */
56 #define	AT(t, p1, p2, s1, s2)	/* nothing */
57 #define	NOTE(s)	/* nothing */
58 #endif
59 
60 /*
61  - matcher - the actual matching engine
62  == static int matcher(register struct re_guts *g, char *string, \
63  ==	size_t nmatch, regmatch_t pmatch[], int eflags);
64  */
65 static int			/* 0 success, MY_REG_NOMATCH failure */
matcher(charset,g,str,nmatch,pmatch,eflags)66 matcher(charset,g, str, nmatch, pmatch, eflags)
67 const CHARSET_INFO *charset;
68 struct re_guts *g;
69 char *str;
70 size_t nmatch;
71 my_regmatch_t pmatch[];
72 int eflags;
73 {
74 	char *endp;
75 	uint i;
76 	struct match mv;
77 	struct match *m = &mv;
78 	char *dp;
79 	const sopno gf = g->firststate+1;	/* +1 for OEND */
80 	const sopno gl = g->laststate;
81 	char *start;
82 	char *stop;
83 
84 	/* simplify the situation where possible */
85 	if (g->cflags&MY_REG_NOSUB)
86 		nmatch = 0;
87 	if (eflags&MY_REG_STARTEND) {
88 		start = str + pmatch[0].rm_so;
89 		stop = str + pmatch[0].rm_eo;
90 	} else {
91 		start = str;
92 		stop = start + strlen(start);
93 	}
94 	if (stop < start)
95 		return(MY_REG_INVARG);
96 
97 	/* prescreening; this does wonders for this rather slow code */
98 	if (g->must != NULL) {
99 		for (dp = start; dp < stop; dp++)
100 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
101 				memcmp(dp, g->must, (size_t)g->mlen) == 0)
102 				break;
103 		if (dp == stop)		/* we didn't find g->must */
104 			return(MY_REG_NOMATCH);
105 	}
106 
107 	/* match struct setup */
108 	m->g = g;
109 	m->eflags = eflags;
110 	m->pmatch = NULL;
111 	m->lastpos = NULL;
112 	m->offp = str;
113 	m->beginp = start;
114 	m->endp = stop;
115 	STATESETUP(m, 4);
116 	SETUP(m->st);
117 	SETUP(m->fresh);
118 	SETUP(m->tmp);
119 	SETUP(m->empty);
120 	CLEAR(m->empty);
121 
122 	/* this loop does only one repetition except for backrefs */
123 	for (;;) {
124 		endp = fast(charset, m, start, stop, gf, gl);
125 		if (endp == NULL) {		/* a miss */
126 		  if (m->pmatch != NULL)
127 		    free((char *)m->pmatch);
128 		  if (m->lastpos != NULL)
129 		    free((char *)m->lastpos);
130 		  STATETEARDOWN(m);
131 		  return(MY_REG_NOMATCH);
132 		}
133 		if (nmatch == 0 && !g->backrefs)
134 			break;		/* no further info needed */
135 
136 		/* where? */
137 		assert(m->coldp != NULL);
138 		for (;;) {
139 			NOTE("finding start");
140 			endp = slow(charset, m, m->coldp, stop, gf, gl);
141 			if (endp != NULL)
142 				break;
143 			assert(m->coldp < m->endp);
144 			m->coldp++;
145 		}
146 		if (nmatch == 1 && !g->backrefs)
147 			break;		/* no further info needed */
148 
149 		/* oh my, he wants the subexpressions... */
150 		if (m->pmatch == NULL)
151 			m->pmatch = (my_regmatch_t *)malloc((m->g->nsub + 1) *
152 							sizeof(my_regmatch_t));
153 		if (m->pmatch == NULL) {
154 		  	if (m->lastpos != NULL)
155 		    	free((char *)m->lastpos);
156 			STATETEARDOWN(m);
157 			return(MY_REG_ESPACE);
158 		}
159 		for (i = 1; i <= m->g->nsub; i++)
160 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
161 		if (!g->backrefs && !(m->eflags&MY_REG_BACKR)) {
162 			NOTE("dissecting");
163 			dp = dissect(charset, m, m->coldp, endp, gf, gl);
164 		} else {
165 			if (g->nplus > 0 && m->lastpos == NULL)
166 				m->lastpos = (char **)malloc((g->nplus+1) *
167 							sizeof(char *));
168 			if (g->nplus > 0 && m->lastpos == NULL) {
169 				free(m->pmatch);
170 				STATETEARDOWN(m);
171 				return(MY_REG_ESPACE);
172 			}
173 			NOTE("backref dissect");
174 			dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
175 		}
176 		if (dp != NULL)
177 			break;
178 
179 		/* uh-oh... we couldn't find a subexpression-level match */
180 		assert(g->backrefs);	/* must be back references doing it */
181 		assert(g->nplus == 0 || m->lastpos != NULL);
182 		for (;;) {
183 			if (dp != NULL || endp <= m->coldp)
184 				break;		/* defeat */
185 			NOTE("backoff");
186 			endp = slow(charset, m, m->coldp, endp-1, gf, gl);
187 			if (endp == NULL)
188 				break;		/* defeat */
189 			/* try it on a shorter possibility */
190 #ifndef NDEBUG
191 			for (i = 1; i <= m->g->nsub; i++) {
192 				assert(m->pmatch[i].rm_so == -1);
193 				assert(m->pmatch[i].rm_eo == -1);
194 			}
195 #endif
196 			NOTE("backoff dissect");
197 			dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
198 		}
199 		assert(dp == NULL || dp == endp);
200 		if (dp != NULL)		/* found a shorter one */
201 			break;
202 
203 		/* despite initial appearances, there is no match here */
204 		NOTE("false alarm");
205 		start = m->coldp + 1;	/* recycle starting later */
206 		assert(start <= stop);
207 	}
208 
209 	/* fill in the details if requested */
210 	if (nmatch > 0) {
211 		pmatch[0].rm_so = m->coldp - m->offp;
212 		pmatch[0].rm_eo = endp - m->offp;
213 	}
214 	if (nmatch > 1) {
215 		assert(m->pmatch != NULL);
216 		for (i = 1; i < nmatch; i++)
217 			if (i <= m->g->nsub)
218 				pmatch[i] = m->pmatch[i];
219 			else {
220 				pmatch[i].rm_so = -1;
221 				pmatch[i].rm_eo = -1;
222 			}
223 	}
224 
225 	if (m->pmatch != NULL)
226 		free((char *)m->pmatch);
227 	if (m->lastpos != NULL)
228 		free((char *)m->lastpos);
229 	STATETEARDOWN(m);
230 	return(0);
231 }
232 
233 /*
234  - dissect - figure out what matched what, no back references
235  == static char *dissect(register struct match *m, char *start, \
236  ==	char *stop, sopno startst, sopno stopst);
237  */
238 static char *			/* == stop (success) always */
dissect(charset,m,start,stop,startst,stopst)239 dissect(charset, m, start, stop, startst, stopst)
240 const CHARSET_INFO *charset;
241 struct match *m;
242 char *start;
243 char *stop;
244 sopno startst;
245 sopno stopst;
246 {
247 	uint i;
248 	sopno ss;	/* start sop of current subRE */
249 	sopno es;	/* end sop of current subRE */
250 	char *sp;	/* start of string matched by it */
251 	char *stp;	/* string matched by it cannot pass here */
252 	char *rest;	/* start of rest of string */
253 	char *tail;	/* string unmatched by rest of RE */
254 	sopno ssub;	/* start sop of subsubRE */
255 	sopno esub;	/* end sop of subsubRE */
256 	char *ssp;	/* start of string matched by subsubRE */
257 	char *sep;	/* end of string matched by subsubRE */
258 	char *oldssp;	/* previous ssp */
259 
260 	AT("diss", start, stop, startst, stopst);
261 	sp = start;
262 	for (ss = startst; ss < stopst; ss = es) {
263 		/* identify end of subRE */
264 		es = ss;
265 		switch (OP(m->g->strip[es])) {
266 		case OPLUS_:
267 		case OQUEST_:
268 			es += OPND(m->g->strip[es]);
269 			break;
270 		case OCH_:
271 			while (OP(m->g->strip[es]) != O_CH)
272 				es += OPND(m->g->strip[es]);
273 			break;
274 		}
275 		es++;
276 
277 		/* figure out what it matched */
278 		switch (OP(m->g->strip[ss])) {
279 		case OEND:
280 			assert(nope);
281 			break;
282 		case OCHAR:
283 			sp++;
284 			break;
285 		case OBOL:
286 		case OEOL:
287 		case OBOW:
288 		case OEOW:
289 			break;
290 		case OANY:
291 		case OANYOF:
292 			sp++;
293 			break;
294 		case OBACK_:
295 		case O_BACK:
296 			assert(nope);
297 			break;
298 		/* cases where length of match is hard to find */
299 		case OQUEST_:
300 			stp = stop;
301 			for (;;) {
302 				/* how long could this one be? */
303 				rest = slow(charset, m, sp, stp, ss, es);
304 				assert(rest != NULL);	/* it did match */
305 				/* could the rest match the rest? */
306 				tail = slow(charset, m, rest, stop, es, stopst);
307 				if (tail == stop)
308 					break;		/* yes! */
309 				/* no -- try a shorter match for this one */
310 				stp = rest - 1;
311 				assert(stp >= sp);	/* it did work */
312 			}
313 			ssub = ss + 1;
314 			esub = es - 1;
315 			/* did innards match? */
316 			if (slow(charset, m, sp, rest, ssub, esub) != NULL)
317 				sp = dissect(charset, m, sp, rest, ssub, esub);
318 			assert(sp == rest);
319 			sp = rest;
320 			break;
321 		case OPLUS_:
322 			stp = stop;
323 			for (;;) {
324 				/* how long could this one be? */
325 				rest = slow(charset, m, sp, stp, ss, es);
326 				assert(rest != NULL);	/* it did match */
327 				/* could the rest match the rest? */
328 				tail = slow(charset, m, rest, stop, es, stopst);
329 				if (tail == stop)
330 					break;		/* yes! */
331 				/* no -- try a shorter match for this one */
332 				stp = rest - 1;
333 				assert(stp >= sp);	/* it did work */
334 			}
335 			ssub = ss + 1;
336 			esub = es - 1;
337 			ssp = sp;
338 			oldssp = ssp;
339 			for (;;) {	/* find last match of innards */
340 				sep = slow(charset, m, ssp, rest, ssub, esub);
341 				if (sep == NULL || sep == ssp)
342 					break;	/* failed or matched null */
343 				oldssp = ssp;	/* on to next try */
344 				ssp = sep;
345 			}
346 			if (sep == NULL) {
347 				/* last successful match */
348 				sep = ssp;
349 				ssp = oldssp;
350 			}
351 			assert(sep == rest);	/* must exhaust substring */
352 			assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
353 			sp = dissect(charset, m, ssp, sep, ssub, esub);
354 			assert(sp == sep);
355 			sp = rest;
356 			break;
357 		case OCH_:
358 			stp = stop;
359 			for (;;) {
360 				/* how long could this one be? */
361 				rest = slow(charset, m, sp, stp, ss, es);
362 				assert(rest != NULL);	/* it did match */
363 				/* could the rest match the rest? */
364 				tail = slow(charset, m, rest, stop, es, stopst);
365 				if (tail == stop)
366 					break;		/* yes! */
367 				/* no -- try a shorter match for this one */
368 				stp = rest - 1;
369 				assert(stp >= sp);	/* it did work */
370 			}
371 			ssub = ss + 1;
372 			esub = ss + OPND(m->g->strip[ss]) - 1;
373 			assert(OP(m->g->strip[esub]) == OOR1);
374 			for (;;) {	/* find first matching branch */
375 				if (slow(charset, m, sp, rest, ssub, esub) == rest)
376 					break;	/* it matched all of it */
377 				/* that one missed, try next one */
378 				assert(OP(m->g->strip[esub]) == OOR1);
379 				esub++;
380 				assert(OP(m->g->strip[esub]) == OOR2);
381 				ssub = esub + 1;
382 				esub += OPND(m->g->strip[esub]);
383 				if (OP(m->g->strip[esub]) == OOR2)
384 					esub--;
385 				else
386 					assert(OP(m->g->strip[esub]) == O_CH);
387 			}
388 			sp = dissect(charset, m, sp, rest, ssub, esub);
389 			assert(sp == rest);
390 			sp = rest;
391 			break;
392 		case O_PLUS:
393 		case O_QUEST:
394 		case OOR1:
395 		case OOR2:
396 		case O_CH:
397 			assert(nope);
398 			break;
399 		case OLPAREN:
400 			i = OPND(m->g->strip[ss]);
401 			assert(0 < i && i <= m->g->nsub);
402 			m->pmatch[i].rm_so = sp - m->offp;
403 			break;
404 		case ORPAREN:
405 			i = OPND(m->g->strip[ss]);
406 			assert(0 < i && i <= m->g->nsub);
407 			m->pmatch[i].rm_eo = sp - m->offp;
408 			break;
409 		default:		/* uh oh */
410 			assert(nope);
411 			break;
412 		}
413 	}
414 
415 	assert(sp == stop);
416 	return(sp);
417 }
418 
419 /*
420  - backref - figure out what matched what, figuring in back references
421  == static char *backref(register struct match *m, char *start, \
422  ==	char *stop, sopno startst, sopno stopst, sopno lev);
423  */
424 static char *			/* == stop (success) or NULL (failure) */
backref(charset,m,start,stop,startst,stopst,lev)425 backref(charset,m, start, stop, startst, stopst, lev)
426 const CHARSET_INFO *charset;
427 struct match *m;
428 char *start;
429 char *stop;
430 sopno startst;
431 sopno stopst;
432 sopno lev;			/* PLUS nesting level */
433 {
434 	uint i;
435 	sopno ss;	/* start sop of current subRE */
436 	char *sp;	/* start of string matched by it */
437 	sopno ssub;	/* start sop of subsubRE */
438 	sopno esub;	/* end sop of subsubRE */
439 	char *ssp;	/* start of string matched by subsubRE */
440 	char *dp;
441 	size_t len;
442 	int hard;
443 	sop s;
444 	my_regoff_t offsave;
445 	cset *cs;
446 
447 	AT("back", start, stop, startst, stopst);
448 	sp = start;
449 
450 	/* get as far as we can with easy stuff */
451 	hard = 0;
452 	for (ss = startst; !hard && ss < stopst; ss++)
453 		switch (OP(s = m->g->strip[ss])) {
454 		case OCHAR:
455 			if (sp == stop || *sp++ != (char)OPND(s))
456 				return(NULL);
457 			break;
458 		case OANY:
459 			if (sp == stop)
460 				return(NULL);
461 			sp++;
462 			break;
463 		case OANYOF:
464 			cs = &m->g->sets[OPND(s)];
465 			if (sp == stop || !CHIN(cs, *sp++))
466 				return(NULL);
467 			break;
468 		case OBOL:
469 			if ( (sp == m->beginp && !(m->eflags&MY_REG_NOTBOL)) ||
470 					(sp < m->endp && *(sp-1) == '\n' &&
471 						(m->g->cflags&MY_REG_NEWLINE)) )
472 				{ /* yes */ }
473 			else
474 				return(NULL);
475 			break;
476 		case OEOL:
477 			if ( (sp == m->endp && !(m->eflags&MY_REG_NOTEOL)) ||
478 					(sp < m->endp && *sp == '\n' &&
479 						(m->g->cflags&MY_REG_NEWLINE)) )
480 				{ /* yes */ }
481 			else
482 				return(NULL);
483 			break;
484 		case OBOW:
485 			if (( (sp == m->beginp && !(m->eflags&MY_REG_NOTBOL)) ||
486 					(sp < m->endp && *(sp-1) == '\n' &&
487 						(m->g->cflags&MY_REG_NEWLINE)) ||
488 					(sp > m->beginp &&
489 							!ISWORD(charset,*(sp-1))) ) &&
490 					(sp < m->endp && ISWORD(charset,*sp)) )
491 				{ /* yes */ }
492 			else
493 				return(NULL);
494 			break;
495 		case OEOW:
496 			if (( (sp == m->endp && !(m->eflags&MY_REG_NOTEOL)) ||
497 					(sp < m->endp && *sp == '\n' &&
498 						(m->g->cflags&MY_REG_NEWLINE)) ||
499 					(sp < m->endp && !ISWORD(charset,*sp)) ) &&
500 					(sp > m->beginp && ISWORD(charset,*(sp-1))) )
501 				{ /* yes */ }
502 			else
503 				return(NULL);
504 			break;
505 		case O_QUEST:
506 			break;
507 		case OOR1:	/* matches null but needs to skip */
508 			ss++;
509 			s = m->g->strip[ss];
510 			do {
511 				assert(OP(s) == OOR2);
512 				ss += OPND(s);
513 			} while (OP(s = m->g->strip[ss]) != O_CH);
514 			/* note that the ss++ gets us past the O_CH */
515 			break;
516 		default:	/* have to make a choice */
517 			hard = 1;
518 			break;
519 		}
520 	if (!hard) {		/* that was it! */
521 		if (sp != stop)
522 			return(NULL);
523 		return(sp);
524 	}
525 	ss--;			/* adjust for the for's final increment */
526 
527 	/* the hard stuff */
528 	AT("hard", sp, stop, ss, stopst);
529 	s = m->g->strip[ss];
530 	switch (OP(s)) {
531 	case OBACK_:		/* the vilest depths */
532 		i = OPND(s);
533 		assert(0 < i && i <= m->g->nsub);
534 		if (m->pmatch[i].rm_eo == -1)
535 			return(NULL);
536 		assert(m->pmatch[i].rm_so != -1);
537 		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
538 		assert((size_t) (stop - m->beginp) >= len);
539 		if (sp > stop - len)
540 			return(NULL);	/* not enough left to match */
541 		ssp = m->offp + m->pmatch[i].rm_so;
542 		if (memcmp(sp, ssp, len) != 0)
543 			return(NULL);
544 		while (m->g->strip[ss] != SOP(O_BACK, i))
545 			ss++;
546 		return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
547 		break;
548 	case OQUEST_:		/* to null or not */
549 		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
550 		if (dp != NULL)
551 			return(dp);	/* not */
552 		return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
553 		break;
554 	case OPLUS_:
555 		assert(m->lastpos != NULL);
556 		assert(lev+1 <= m->g->nplus);
557 		m->lastpos[lev+1] = sp;
558 		return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
559 		break;
560 	case O_PLUS:
561 		if (sp == m->lastpos[lev])	/* last pass matched null */
562 			return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
563 		/* try another pass */
564 		m->lastpos[lev] = sp;
565 		dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
566 		if (dp == NULL)
567 			return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
568 		else
569 			return(dp);
570 		break;
571 	case OCH_:		/* find the right one, if any */
572 		ssub = ss + 1;
573 		esub = ss + OPND(s) - 1;
574 		assert(OP(m->g->strip[esub]) == OOR1);
575 		for (;;) {	/* find first matching branch */
576 			dp = backref(charset, m, sp, stop, ssub, esub, lev);
577 			if (dp != NULL)
578 				return(dp);
579 			/* that one missed, try next one */
580 			if (OP(m->g->strip[esub]) == O_CH)
581 				return(NULL);	/* there is none */
582 			esub++;
583 			assert(OP(m->g->strip[esub]) == OOR2);
584 			ssub = esub + 1;
585 			esub += OPND(m->g->strip[esub]);
586 			if (OP(m->g->strip[esub]) == OOR2)
587 				esub--;
588 			else
589 				assert(OP(m->g->strip[esub]) == O_CH);
590 		}
591 		break;
592 	case OLPAREN:		/* must undo assignment if rest fails */
593 		i = OPND(s);
594 		assert(0 < i && i <= m->g->nsub);
595 		offsave = m->pmatch[i].rm_so;
596 		m->pmatch[i].rm_so = sp - m->offp;
597 		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
598 		if (dp != NULL)
599 			return(dp);
600 		m->pmatch[i].rm_so = offsave;
601 		return(NULL);
602 		break;
603 	case ORPAREN:		/* must undo assignment if rest fails */
604 		i = OPND(s);
605 		assert(0 < i && i <= m->g->nsub);
606 		offsave = m->pmatch[i].rm_eo;
607 		m->pmatch[i].rm_eo = sp - m->offp;
608 		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
609 		if (dp != NULL)
610 			return(dp);
611 		m->pmatch[i].rm_eo = offsave;
612 		return(NULL);
613 		break;
614 	default:		/* uh oh */
615 		assert(nope);
616 		break;
617 	}
618 
619 	/* "can't happen" */
620 	assert(nope);
621 	/* NOTREACHED */
622 	return 0;				/* Keep gcc happy */
623 }
624 
625 /*
626  - fast - step through the string at top speed
627  == static char *fast(register struct match *m, char *start, \
628  ==	char *stop, sopno startst, sopno stopst);
629  */
630 static char *			/* where tentative match ended, or NULL */
fast(charset,m,start,stop,startst,stopst)631 fast(charset, m, start, stop, startst, stopst)
632 const CHARSET_INFO *charset;
633 struct match *m;
634 char *start;
635 char *stop;
636 sopno startst;
637 sopno stopst;
638 {
639 	states st = m->st;
640 	states fresh = m->fresh;
641 	states tmp = m->tmp;
642 	char *p = start;
643 	int c = (start == m->beginp) ? OUT : *(start-1);
644 	int lastc;	/* previous c */
645 	int flagch;
646 	int i;
647 	char *coldp;	/* last p after which no match was underway */
648 
649 	CLEAR(st);
650 	SET1(st, startst);
651 	st = step(m->g, startst, stopst, st, NOTHING, st);
652 	ASSIGN(fresh, st);
653 	SP("start", st, *p);
654 	coldp = NULL;
655 	for (;;) {
656 		/* next character */
657 		lastc = c;
658 		c = (p == m->endp) ? OUT : *p;
659 		if (EQ(st, fresh))
660 			coldp = p;
661 
662 		/* is there an EOL and/or BOL between lastc and c? */
663 		flagch = '\0';
664 		i = 0;
665 		if ( (lastc == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
666 				(lastc == OUT && !(m->eflags&MY_REG_NOTBOL)) ) {
667 			flagch = BOL;
668 			i = m->g->nbol;
669 		}
670 		if ( (c == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
671 				(c == OUT && !(m->eflags&MY_REG_NOTEOL)) ) {
672 			flagch = (flagch == BOL) ? BOLEOL : EOL;
673 			i += m->g->neol;
674 		}
675 		if (i != 0) {
676 			for (; i > 0; i--)
677 				st = step(m->g, startst, stopst, st, flagch, st);
678 			SP("boleol", st, c);
679 		}
680 
681 		/* how about a word boundary? */
682 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
683 					(c != OUT && ISWORD(charset,c)) ) {
684 			flagch = BOW;
685 		}
686 		if ( (lastc != OUT && ISWORD(charset,lastc)) &&
687 				(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
688 			flagch = EOW;
689 		}
690 		if (flagch == BOW || flagch == EOW) {
691 			st = step(m->g, startst, stopst, st, flagch, st);
692 			SP("boweow", st, c);
693 		}
694 
695 		/* are we done? */
696 		if (ISSET(st, stopst) || p == stop)
697 			break;		/* NOTE BREAK OUT */
698 
699 		/* no, we must deal with this character */
700 		ASSIGN(tmp, st);
701 		ASSIGN(st, fresh);
702 		assert(c != OUT);
703 		st = step(m->g, startst, stopst, tmp, c, st);
704 		SP("aft", st, c);
705 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
706 		p++;
707 	}
708 
709 	assert(coldp != NULL);
710 	m->coldp = coldp;
711 	if (ISSET(st, stopst))
712 		return(p+1);
713 	else
714 		return(NULL);
715 }
716 
717 /*
718  - slow - step through the string more deliberately
719  == static char *slow(register struct match *m, char *start, \
720  ==	char *stop, sopno startst, sopno stopst);
721  */
722 static char *			/* where it ended */
slow(charset,m,start,stop,startst,stopst)723 slow(charset, m, start, stop, startst, stopst)
724 const CHARSET_INFO *charset;
725 struct match *m;
726 char *start;
727 char *stop;
728 sopno startst;
729 sopno stopst;
730 {
731 	states st = m->st;
732 	states empty = m->empty;
733 	states tmp = m->tmp;
734 	char *p = start;
735 	int c = (start == m->beginp) ? OUT : *(start-1);
736 	int lastc;	/* previous c */
737 	int flagch;
738 	int i;
739 	char *matchp;	/* last p at which a match ended */
740 
741 	AT("slow", start, stop, startst, stopst);
742 	CLEAR(st);
743 	SET1(st, startst);
744 	SP("sstart", st, *p);
745 	st = step(m->g, startst, stopst, st, NOTHING, st);
746 	matchp = NULL;
747 	for (;;) {
748 		/* next character */
749 		lastc = c;
750 		c = (p == m->endp) ? OUT : *p;
751 
752 		/* is there an EOL and/or BOL between lastc and c? */
753 		flagch = '\0';
754 		i = 0;
755 		if ( (lastc == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
756 				(lastc == OUT && !(m->eflags&MY_REG_NOTBOL)) ) {
757 			flagch = BOL;
758 			i = m->g->nbol;
759 		}
760 		if ( (c == '\n' && m->g->cflags&MY_REG_NEWLINE) ||
761 				(c == OUT && !(m->eflags&MY_REG_NOTEOL)) ) {
762 			flagch = (flagch == BOL) ? BOLEOL : EOL;
763 			i += m->g->neol;
764 		}
765 		if (i != 0) {
766 			for (; i > 0; i--)
767 				st = step(m->g, startst, stopst, st, flagch, st);
768 			SP("sboleol", st, c);
769 		}
770 
771 		/* how about a word boundary? */
772 		if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
773 					(c != OUT && ISWORD(charset,c)) ) {
774 			flagch = BOW;
775 		}
776 		if ( (lastc != OUT && ISWORD(charset,lastc)) &&
777 				(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
778 			flagch = EOW;
779 		}
780 		if (flagch == BOW || flagch == EOW) {
781 			st = step(m->g, startst, stopst, st, flagch, st);
782 			SP("sboweow", st, c);
783 		}
784 
785 		/* are we done? */
786 		if (ISSET(st, stopst))
787 			matchp = p;
788 		if (EQ(st, empty) || p == stop)
789 			break;		/* NOTE BREAK OUT */
790 
791 		/* no, we must deal with this character */
792 		ASSIGN(tmp, st);
793 		ASSIGN(st, empty);
794 		assert(c != OUT);
795 		st = step(m->g, startst, stopst, tmp, c, st);
796 		SP("saft", st, c);
797 		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
798 		p++;
799 	}
800 
801 	return(matchp);
802 }
803 
804 
805 /*
806  - step - map set of states reachable before char to set reachable after
807  == static states step(register struct re_guts *g, sopno start, sopno stop, \
808  ==	register states bef, int ch, register states aft);
809  == #define	BOL	(OUT+1)
810  == #define	EOL	(BOL+1)
811  == #define	BOLEOL	(BOL+2)
812  == #define	NOTHING	(BOL+3)
813  == #define	BOW	(BOL+4)
814  == #define	EOW	(BOL+5)
815  == #define	CODEMAX	(BOL+5)		// highest code used
816  == #define	NONCHAR(c)	((c) > CHAR_MAX)
817  == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
818  */
819 static states
step(g,start,stop,bef,ch,aft)820 step(g, start, stop, bef, ch, aft)
821 struct re_guts *g;
822 sopno start;			/* start state within strip */
823 sopno stop;			/* state after stop state within strip */
824 states bef;		/* states reachable before */
825 int ch;				/* character or NONCHAR code */
826 states aft;		/* states already known reachable after */
827 {
828 	cset *cs;
829 	sop s;
830 	sopno pc;
831 	onestate here;		/* note, macros know this name */
832 	sopno look;
833 	onestate i;		/* Changed from int by Monty */
834 
835 	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
836 		s = g->strip[pc];
837 		switch (OP(s)) {
838 		case OEND:
839 			assert(pc == stop-1);
840 			break;
841 		case OCHAR:
842 			/* only characters can match */
843 			assert(!NONCHAR(ch) || ch != (char)OPND(s));
844 			if (ch == (char)OPND(s))
845 				FWD(aft, bef, 1);
846 			break;
847 		case OBOL:
848 			if (ch == BOL || ch == BOLEOL)
849 				FWD(aft, bef, 1);
850 			break;
851 		case OEOL:
852 			if (ch == EOL || ch == BOLEOL)
853 				FWD(aft, bef, 1);
854 			break;
855 		case OBOW:
856 			if (ch == BOW)
857 				FWD(aft, bef, 1);
858 			break;
859 		case OEOW:
860 			if (ch == EOW)
861 				FWD(aft, bef, 1);
862 			break;
863 		case OANY:
864 			if (!NONCHAR(ch))
865 				FWD(aft, bef, 1);
866 			break;
867 		case OANYOF:
868 			cs = &g->sets[OPND(s)];
869 			if (!NONCHAR(ch) && CHIN(cs, ch))
870 				FWD(aft, bef, 1);
871 			break;
872 		case OBACK_:		/* ignored here */
873 		case O_BACK:
874 			FWD(aft, aft, 1);
875 			break;
876 		case OPLUS_:		/* forward, this is just an empty */
877 			FWD(aft, aft, 1);
878 			break;
879 		case O_PLUS:		/* both forward and back */
880 			FWD(aft, aft, 1);
881 			i = ISSETBACK(aft, OPND(s));
882 			BACK(aft, aft, OPND(s));
883 			if (!i && ISSETBACK(aft, OPND(s))) {
884 				/* oho, must reconsider loop body */
885 				pc -= OPND(s) + 1;
886 				INIT(here, pc);
887 			}
888 			break;
889 		case OQUEST_:		/* two branches, both forward */
890 			FWD(aft, aft, 1);
891 			FWD(aft, aft, OPND(s));
892 			break;
893 		case O_QUEST:		/* just an empty */
894 			FWD(aft, aft, 1);
895 			break;
896 		case OLPAREN:		/* not significant here */
897 		case ORPAREN:
898 			FWD(aft, aft, 1);
899 			break;
900 		case OCH_:		/* mark the first two branches */
901 			FWD(aft, aft, 1);
902 			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
903 			FWD(aft, aft, OPND(s));
904 			break;
905 		case OOR1:		/* done a branch, find the O_CH */
906 			if (ISSTATEIN(aft, here)) {
907 				for (look = 1;
908 						OP(s = g->strip[pc+look]) != O_CH;
909 						look += OPND(s))
910 					assert(OP(s) == OOR2);
911 				FWD(aft, aft, look);
912 			}
913 			break;
914 		case OOR2:		/* propagate OCH_'s marking */
915 			FWD(aft, aft, 1);
916 			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
917 				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
918 				FWD(aft, aft, OPND(s));
919 			}
920 			break;
921 		case O_CH:		/* just empty */
922 			FWD(aft, aft, 1);
923 			break;
924 		default:		/* ooooops... */
925 			assert(nope);
926 			break;
927 		}
928 	}
929 
930 	return(aft);
931 }
932 
933 #ifdef REDEBUG
934 /*
935  - print - print a set of states
936  == #ifdef REDEBUG
937  == static void print(struct match *m, char *caption, states st, \
938  ==	int ch, FILE *d);
939  == #endif
940  */
941 static void
print(m,caption,st,ch,d)942 print(m, caption, st, ch, d)
943 struct match *m;
944 char *caption;
945 states st;
946 int ch;
947 FILE *d;
948 {
949 	struct re_guts *g = m->g;
950 	int i;
951 	int first = 1;
952 	char buf[10];
953 
954 	if (!(m->eflags&MY_REG_TRACE))
955 		return;
956 
957 	fprintf(d, "%s", caption);
958 	if (ch != '\0')
959 		fprintf(d, " %s", printchar(ch,buf));
960 	for (i = 0; i < g->nstates; i++)
961 		if (ISSET(st, i)) {
962 			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
963 			first = 0;
964 		}
965 	fprintf(d, "\n");
966 }
967 
968 /*
969  - at - print current situation
970  == #ifdef REDEBUG
971  == static void at(struct match *m, char *title, char *start, char *stop, \
972  ==						sopno startst, sopno stopst);
973  == #endif
974  */
975 static void
at(m,title,start,stop,startst,stopst)976 at(m, title, start, stop, startst, stopst)
977 struct match *m;
978 char *title;
979 char *start;
980 char *stop;
981 sopno startst;
982 sopno stopst;
983 {
984 	char buf[10];
985 	if (!(m->eflags&MY_REG_TRACE))
986 		return;
987 
988 	printf("%s %s-", title, printchar(*start,buf));
989 	printf("%s ", printchar(*stop,buf));
990 	printf("%ld-%ld\n", (long)startst, (long)stopst,buf);
991 }
992 
993 #ifndef PCHARDONE
994 #define	PCHARDONE	/* never again */
995 /*
996  - printchar - make a character printable
997  == #ifdef REDEBUG
998  == static char *printchar(int ch);
999  == #endif
1000  *
1001  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
1002  * duplicate here avoids having a debugging-capable regexec.o tied to
1003  * a matching debug.o, and this is convenient.  It all disappears in
1004  * the non-debug compilation anyway, so it doesn't matter much.
1005  */
1006 static char *			/* -> representation */
printchar(ch,pbuf)1007 printchar(ch,pbuf)
1008 int ch;
1009 char *pbuf;
1010 {
1011 	if (isprint(ch) || ch == ' ')
1012 		sprintf(pbuf, "%c", ch);
1013 	else
1014 		sprintf(pbuf, "\\%o", ch);
1015 	return(pbuf);
1016 }
1017 #endif
1018 #endif
1019 
1020 #undef	matcher
1021 #undef	fast
1022 #undef	slow
1023 #undef	dissect
1024 #undef	backref
1025 #undef	step
1026 #undef	print
1027 #undef	at
1028 #undef	match
1029