1 /*
2  * re_*exec and friends - match REs
3  *
4  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
5  *
6  * Development of this software was funded, in part, by Cray Research Inc.,
7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8  * Corporation, none of whom are responsible for the results.  The author
9  * thanks all of them.
10  *
11  * Redistribution and use in source and binary forms -- with or without
12  * modification -- are permitted for any purpose, provided that
13  * redistributions in source form retain this entire copyright notice and
14  * indicate the origin and nature of any modifications.
15  *
16  * I'd appreciate being given credit for this package in the documentation
17  * of software which uses it, but that is not a requirement.
18  *
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * src/backend/regex/regexec.c
31  *
32  */
33 
34 #include "regex/regguts.h"
35 
36 
37 
38 /* lazy-DFA representation */
39 struct arcp
40 {								/* "pointer" to an outarc */
41 	struct sset *ss;
42 	color		co;
43 };
44 
45 struct sset
46 {								/* state set */
47 	unsigned   *states;			/* pointer to bitvector */
48 	unsigned	hash;			/* hash of bitvector */
49 #define  HASH(bv, nw)	 (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52 	int			flags;
53 #define  STARTER	 01			/* the initial state set */
54 #define  POSTSTATE	 02			/* includes the goal state */
55 #define  LOCKED		 04			/* locked in cache */
56 #define  NOPROGRESS  010		/* zero-progress state set */
57 	struct arcp ins;			/* chain of inarcs pointing here */
58 	chr		   *lastseen;		/* last entered on arrival here */
59 	struct sset **outs;			/* outarc vector indexed by color */
60 	struct arcp *inchain;		/* chain-pointer vector for outarcs */
61 };
62 
63 struct dfa
64 {
65 	int			nssets;			/* size of cache */
66 	int			nssused;		/* how many entries occupied yet */
67 	int			nstates;		/* number of states */
68 	int			ncolors;		/* length of outarc and inchain vectors */
69 	int			wordsper;		/* length of state-set bitvectors */
70 	struct sset *ssets;			/* state-set cache */
71 	unsigned   *statesarea;		/* bitvector storage */
72 	unsigned   *work;			/* pointer to work area within statesarea */
73 	struct sset **outsarea;		/* outarc-vector storage */
74 	struct arcp *incarea;		/* inchain storage */
75 	struct cnfa *cnfa;
76 	struct colormap *cm;
77 	chr		   *lastpost;		/* location of last cache-flushed success */
78 	chr		   *lastnopr;		/* location of last cache-flushed NOPROGRESS */
79 	struct sset *search;		/* replacement-search-pointer memory */
80 	int			cptsmalloced;	/* were the areas individually malloced? */
81 	char	   *mallocarea;		/* self, or master malloced area, or NULL */
82 };
83 
84 #define WORK	1				/* number of work bitvectors needed */
85 
86 /* setup for non-malloc allocation for small cases */
87 #define FEWSTATES	20			/* must be less than UBITS */
88 #define FEWCOLORS	15
89 struct smalldfa
90 {
91 	struct dfa	dfa;
92 	struct sset ssets[FEWSTATES * 2];
93 	unsigned	statesarea[FEWSTATES * 2 + WORK];
94 	struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
95 	struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
96 };
97 
98 #define DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
99 
100 
101 
102 /* internal variables, bundled for easy passing around */
103 struct vars
104 {
105 	regex_t    *re;
106 	struct guts *g;
107 	int			eflags;			/* copies of arguments */
108 	size_t		nmatch;
109 	regmatch_t *pmatch;
110 	rm_detail_t *details;
111 	chr		   *start;			/* start of string */
112 	chr		   *search_start;	/* search start of string */
113 	chr		   *stop;			/* just past end of string */
114 	int			err;			/* error code if any (0 none) */
115 	struct dfa **subdfas;		/* per-tree-subre DFAs */
116 	struct dfa **ladfas;		/* per-lacon-subre DFAs */
117 	struct sset **lblastcss;	/* per-lacon-subre lookbehind restart data */
118 	chr		  **lblastcp;		/* per-lacon-subre lookbehind restart data */
119 	struct smalldfa dfa1;
120 	struct smalldfa dfa2;
121 };
122 
123 #define VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
124 #define ISERR() VISERR(v)
125 #define VERR(vv,e)	((vv)->err = ((vv)->err ? (vv)->err : (e)))
126 #define ERR(e)	VERR(v, e)		/* record an error */
127 #define NOERR() {if (ISERR()) return v->err;}	/* if error seen, return it */
128 #define OFF(p)	((p) - v->start)
129 #define LOFF(p) ((long)OFF(p))
130 
131 
132 
133 /*
134  * forward declarations
135  */
136 /* === regexec.c === */
137 static struct dfa *getsubdfa(struct vars *, struct subre *);
138 static struct dfa *getladfa(struct vars *, int);
139 static int	find(struct vars *, struct cnfa *, struct colormap *);
140 static int	cfind(struct vars *, struct cnfa *, struct colormap *);
141 static int	cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
142 static void zapallsubs(regmatch_t *, size_t);
143 static void zaptreesubs(struct vars *, struct subre *);
144 static void subset(struct vars *, struct subre *, chr *, chr *);
145 static int	cdissect(struct vars *, struct subre *, chr *, chr *);
146 static int	ccondissect(struct vars *, struct subre *, chr *, chr *);
147 static int	crevcondissect(struct vars *, struct subre *, chr *, chr *);
148 static int	cbrdissect(struct vars *, struct subre *, chr *, chr *);
149 static int	caltdissect(struct vars *, struct subre *, chr *, chr *);
150 static int	citerdissect(struct vars *, struct subre *, chr *, chr *);
151 static int	creviterdissect(struct vars *, struct subre *, chr *, chr *);
152 
153 /* === rege_dfa.c === */
154 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
155 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
156 static int	matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
157 static chr *lastcold(struct vars *, struct dfa *);
158 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
159 static void freedfa(struct dfa *);
160 static unsigned hash(unsigned *, int);
161 static struct sset *initialize(struct vars *, struct dfa *, chr *);
162 static struct sset *miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *);
163 static int	lacon(struct vars *, struct cnfa *, chr *, color);
164 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
165 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
166 
167 
168 /*
169  * pg_regexec - match regular expression
170  */
171 int
pg_regexec(regex_t * re,const chr * string,size_t len,size_t search_start,rm_detail_t * details,size_t nmatch,regmatch_t pmatch[],int flags)172 pg_regexec(regex_t *re,
173 		   const chr *string,
174 		   size_t len,
175 		   size_t search_start,
176 		   rm_detail_t *details,
177 		   size_t nmatch,
178 		   regmatch_t pmatch[],
179 		   int flags)
180 {
181 	struct vars var;
182 	register struct vars *v = &var;
183 	int			st;
184 	size_t		n;
185 	size_t		i;
186 	int			backref;
187 
188 #define  LOCALMAT	 20
189 	regmatch_t	mat[LOCALMAT];
190 
191 #define  LOCALDFAS	 40
192 	struct dfa *subdfas[LOCALDFAS];
193 
194 	/* sanity checks */
195 	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
196 		return REG_INVARG;
197 	if (re->re_csize != sizeof(chr))
198 		return REG_MIXED;
199 	if (search_start > len)
200 		return REG_NOMATCH;
201 
202 	/* Initialize locale-dependent support */
203 	pg_set_regex_collation(re->re_collation);
204 
205 	/* setup */
206 	v->re = re;
207 	v->g = (struct guts *) re->re_guts;
208 	if ((v->g->cflags & REG_EXPECT) && details == NULL)
209 		return REG_INVARG;
210 	if (v->g->info & REG_UIMPOSSIBLE)
211 		return REG_NOMATCH;
212 	backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
213 	v->eflags = flags;
214 	if (v->g->cflags & REG_NOSUB)
215 		nmatch = 0;				/* override client */
216 	v->nmatch = nmatch;
217 	if (backref)
218 	{
219 		/* need work area */
220 		if (v->g->nsub + 1 <= LOCALMAT)
221 			v->pmatch = mat;
222 		else
223 			v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
224 											  sizeof(regmatch_t));
225 		if (v->pmatch == NULL)
226 			return REG_ESPACE;
227 		v->nmatch = v->g->nsub + 1;
228 	}
229 	else
230 		v->pmatch = pmatch;
231 	if (v->nmatch > 0)
232 		zapallsubs(v->pmatch, v->nmatch);
233 	v->details = details;
234 	v->start = (chr *) string;
235 	v->search_start = (chr *) string + search_start;
236 	v->stop = (chr *) string + len;
237 	v->err = 0;
238 	v->subdfas = NULL;
239 	v->ladfas = NULL;
240 	v->lblastcss = NULL;
241 	v->lblastcp = NULL;
242 	/* below this point, "goto cleanup" will behave sanely */
243 
244 	assert(v->g->ntree >= 0);
245 	n = (size_t) v->g->ntree;
246 	if (n <= LOCALDFAS)
247 		v->subdfas = subdfas;
248 	else
249 	{
250 		v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
251 		if (v->subdfas == NULL)
252 		{
253 			st = REG_ESPACE;
254 			goto cleanup;
255 		}
256 	}
257 	for (i = 0; i < n; i++)
258 		v->subdfas[i] = NULL;
259 
260 	assert(v->g->nlacons >= 0);
261 	n = (size_t) v->g->nlacons;
262 	if (n > 0)
263 	{
264 		v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
265 		if (v->ladfas == NULL)
266 		{
267 			st = REG_ESPACE;
268 			goto cleanup;
269 		}
270 		for (i = 0; i < n; i++)
271 			v->ladfas[i] = NULL;
272 		v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
273 		v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
274 		if (v->lblastcss == NULL || v->lblastcp == NULL)
275 		{
276 			st = REG_ESPACE;
277 			goto cleanup;
278 		}
279 		for (i = 0; i < n; i++)
280 		{
281 			v->lblastcss[i] = NULL;
282 			v->lblastcp[i] = NULL;
283 		}
284 	}
285 
286 	/* do it */
287 	assert(v->g->tree != NULL);
288 	if (backref)
289 		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
290 	else
291 		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
292 
293 	/* copy (portion of) match vector over if necessary */
294 	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
295 	{
296 		zapallsubs(pmatch, nmatch);
297 		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
298 		memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
299 	}
300 
301 	/* clean up */
302 cleanup:
303 	if (v->pmatch != pmatch && v->pmatch != mat)
304 		FREE(v->pmatch);
305 	if (v->subdfas != NULL)
306 	{
307 		n = (size_t) v->g->ntree;
308 		for (i = 0; i < n; i++)
309 		{
310 			if (v->subdfas[i] != NULL)
311 				freedfa(v->subdfas[i]);
312 		}
313 		if (v->subdfas != subdfas)
314 			FREE(v->subdfas);
315 	}
316 	if (v->ladfas != NULL)
317 	{
318 		n = (size_t) v->g->nlacons;
319 		for (i = 0; i < n; i++)
320 		{
321 			if (v->ladfas[i] != NULL)
322 				freedfa(v->ladfas[i]);
323 		}
324 		FREE(v->ladfas);
325 	}
326 	if (v->lblastcss != NULL)
327 		FREE(v->lblastcss);
328 	if (v->lblastcp != NULL)
329 		FREE(v->lblastcp);
330 
331 	return st;
332 }
333 
334 /*
335  * getsubdfa - create or re-fetch the DFA for a tree subre node
336  *
337  * We only need to create the DFA once per overall regex execution.
338  * The DFA will be freed by the cleanup step in pg_regexec().
339  */
340 static struct dfa *
getsubdfa(struct vars * v,struct subre * t)341 getsubdfa(struct vars *v,
342 		  struct subre *t)
343 {
344 	if (v->subdfas[t->id] == NULL)
345 	{
346 		v->subdfas[t->id] = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
347 		if (ISERR())
348 			return NULL;
349 	}
350 	return v->subdfas[t->id];
351 }
352 
353 /*
354  * getladfa - create or re-fetch the DFA for a LACON subre node
355  *
356  * Same as above, but for LACONs.
357  */
358 static struct dfa *
getladfa(struct vars * v,int n)359 getladfa(struct vars *v,
360 		 int n)
361 {
362 	assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
363 
364 	if (v->ladfas[n] == NULL)
365 	{
366 		struct subre *sub = &v->g->lacons[n];
367 
368 		v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
369 		if (ISERR())
370 			return NULL;
371 	}
372 	return v->ladfas[n];
373 }
374 
375 /*
376  * find - find a match for the main NFA (no-complications case)
377  */
378 static int
find(struct vars * v,struct cnfa * cnfa,struct colormap * cm)379 find(struct vars *v,
380 	 struct cnfa *cnfa,
381 	 struct colormap *cm)
382 {
383 	struct dfa *s;
384 	struct dfa *d;
385 	chr		   *begin;
386 	chr		   *end = NULL;
387 	chr		   *cold;
388 	chr		   *open;			/* open and close of range of possible starts */
389 	chr		   *close;
390 	int			hitend;
391 	int			shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
392 
393 	/* first, a shot with the search RE */
394 	s = newdfa(v, &v->g->search, cm, &v->dfa1);
395 	assert(!(ISERR() && s != NULL));
396 	NOERR();
397 	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
398 	cold = NULL;
399 	close = shortest(v, s, v->search_start, v->search_start, v->stop,
400 					 &cold, (int *) NULL);
401 	freedfa(s);
402 	NOERR();
403 	if (v->g->cflags & REG_EXPECT)
404 	{
405 		assert(v->details != NULL);
406 		if (cold != NULL)
407 			v->details->rm_extend.rm_so = OFF(cold);
408 		else
409 			v->details->rm_extend.rm_so = OFF(v->stop);
410 		v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
411 	}
412 	if (close == NULL)			/* not found */
413 		return REG_NOMATCH;
414 	if (v->nmatch == 0)			/* found, don't need exact location */
415 		return REG_OKAY;
416 
417 	/* find starting point and match */
418 	assert(cold != NULL);
419 	open = cold;
420 	cold = NULL;
421 	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
422 	d = newdfa(v, cnfa, cm, &v->dfa1);
423 	assert(!(ISERR() && d != NULL));
424 	NOERR();
425 	for (begin = open; begin <= close; begin++)
426 	{
427 		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
428 		if (shorter)
429 			end = shortest(v, d, begin, begin, v->stop,
430 						   (chr **) NULL, &hitend);
431 		else
432 			end = longest(v, d, begin, v->stop, &hitend);
433 		if (ISERR())
434 		{
435 			freedfa(d);
436 			return v->err;
437 		}
438 		if (hitend && cold == NULL)
439 			cold = begin;
440 		if (end != NULL)
441 			break;				/* NOTE BREAK OUT */
442 	}
443 	assert(end != NULL);		/* search RE succeeded so loop should */
444 	freedfa(d);
445 
446 	/* and pin down details */
447 	assert(v->nmatch > 0);
448 	v->pmatch[0].rm_so = OFF(begin);
449 	v->pmatch[0].rm_eo = OFF(end);
450 	if (v->g->cflags & REG_EXPECT)
451 	{
452 		if (cold != NULL)
453 			v->details->rm_extend.rm_so = OFF(cold);
454 		else
455 			v->details->rm_extend.rm_so = OFF(v->stop);
456 		v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
457 	}
458 	if (v->nmatch == 1)			/* no need for submatches */
459 		return REG_OKAY;
460 
461 	/* find submatches */
462 	return cdissect(v, v->g->tree, begin, end);
463 }
464 
465 /*
466  * cfind - find a match for the main NFA (with complications)
467  */
468 static int
cfind(struct vars * v,struct cnfa * cnfa,struct colormap * cm)469 cfind(struct vars *v,
470 	  struct cnfa *cnfa,
471 	  struct colormap *cm)
472 {
473 	struct dfa *s;
474 	struct dfa *d;
475 	chr		   *cold;
476 	int			ret;
477 
478 	s = newdfa(v, &v->g->search, cm, &v->dfa1);
479 	NOERR();
480 	d = newdfa(v, cnfa, cm, &v->dfa2);
481 	if (ISERR())
482 	{
483 		assert(d == NULL);
484 		freedfa(s);
485 		return v->err;
486 	}
487 
488 	ret = cfindloop(v, cnfa, cm, d, s, &cold);
489 
490 	freedfa(d);
491 	freedfa(s);
492 	NOERR();
493 	if (v->g->cflags & REG_EXPECT)
494 	{
495 		assert(v->details != NULL);
496 		if (cold != NULL)
497 			v->details->rm_extend.rm_so = OFF(cold);
498 		else
499 			v->details->rm_extend.rm_so = OFF(v->stop);
500 		v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
501 	}
502 	return ret;
503 }
504 
505 /*
506  * cfindloop - the heart of cfind
507  */
508 static int
cfindloop(struct vars * v,struct cnfa * cnfa,struct colormap * cm,struct dfa * d,struct dfa * s,chr ** coldp)509 cfindloop(struct vars *v,
510 		  struct cnfa *cnfa,
511 		  struct colormap *cm,
512 		  struct dfa *d,
513 		  struct dfa *s,
514 		  chr **coldp)			/* where to put coldstart pointer */
515 {
516 	chr		   *begin;
517 	chr		   *end;
518 	chr		   *cold;
519 	chr		   *open;			/* open and close of range of possible starts */
520 	chr		   *close;
521 	chr		   *estart;
522 	chr		   *estop;
523 	int			er;
524 	int			shorter = v->g->tree->flags & SHORTER;
525 	int			hitend;
526 
527 	assert(d != NULL && s != NULL);
528 	cold = NULL;
529 	close = v->search_start;
530 	do
531 	{
532 		/* Search with the search RE for match range at/beyond "close" */
533 		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
534 		close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
535 		if (ISERR())
536 		{
537 			*coldp = cold;
538 			return v->err;
539 		}
540 		if (close == NULL)
541 			break;				/* no more possible match anywhere */
542 		assert(cold != NULL);
543 		open = cold;
544 		cold = NULL;
545 		/* Search for matches starting between "open" and "close" inclusive */
546 		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
547 		for (begin = open; begin <= close; begin++)
548 		{
549 			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
550 			estart = begin;
551 			estop = v->stop;
552 			for (;;)
553 			{
554 				/* Here we use the top node's detailed RE */
555 				if (shorter)
556 					end = shortest(v, d, begin, estart,
557 								   estop, (chr **) NULL, &hitend);
558 				else
559 					end = longest(v, d, begin, estop,
560 								  &hitend);
561 				if (ISERR())
562 				{
563 					*coldp = cold;
564 					return v->err;
565 				}
566 				if (hitend && cold == NULL)
567 					cold = begin;
568 				if (end == NULL)
569 					break;		/* no match with this begin point, try next */
570 				MDEBUG(("tentative end %ld\n", LOFF(end)));
571 				/* Dissect the potential match to see if it really matches */
572 				er = cdissect(v, v->g->tree, begin, end);
573 				if (er == REG_OKAY)
574 				{
575 					if (v->nmatch > 0)
576 					{
577 						v->pmatch[0].rm_so = OFF(begin);
578 						v->pmatch[0].rm_eo = OFF(end);
579 					}
580 					*coldp = cold;
581 					return REG_OKAY;
582 				}
583 				if (er != REG_NOMATCH)
584 				{
585 					ERR(er);
586 					*coldp = cold;
587 					return er;
588 				}
589 				/* Try next longer/shorter match with same begin point */
590 				if (shorter)
591 				{
592 					if (end == estop)
593 						break;	/* no more, so try next begin point */
594 					estart = end + 1;
595 				}
596 				else
597 				{
598 					if (end == begin)
599 						break;	/* no more, so try next begin point */
600 					estop = end - 1;
601 				}
602 			}					/* end loop over endpoint positions */
603 		}						/* end loop over beginning positions */
604 
605 		/*
606 		 * If we get here, there is no possible match starting at or before
607 		 * "close", so consider matches beyond that.  We'll do a fresh search
608 		 * with the search RE to find a new promising match range.
609 		 */
610 		close++;
611 	} while (close < v->stop);
612 
613 	*coldp = cold;
614 	return REG_NOMATCH;
615 }
616 
617 /*
618  * zapallsubs - initialize all subexpression matches to "no match"
619  *
620  * Note that p[0], the overall-match location, is not touched.
621  */
622 static void
zapallsubs(regmatch_t * p,size_t n)623 zapallsubs(regmatch_t *p,
624 		   size_t n)
625 {
626 	size_t		i;
627 
628 	for (i = n - 1; i > 0; i--)
629 	{
630 		p[i].rm_so = -1;
631 		p[i].rm_eo = -1;
632 	}
633 }
634 
635 /*
636  * zaptreesubs - initialize subexpressions within subtree to "no match"
637  */
638 static void
zaptreesubs(struct vars * v,struct subre * t)639 zaptreesubs(struct vars *v,
640 			struct subre *t)
641 {
642 	if (t->op == '(')
643 	{
644 		int			n = t->subno;
645 
646 		assert(n > 0);
647 		if ((size_t) n < v->nmatch)
648 		{
649 			v->pmatch[n].rm_so = -1;
650 			v->pmatch[n].rm_eo = -1;
651 		}
652 	}
653 
654 	if (t->left != NULL)
655 		zaptreesubs(v, t->left);
656 	if (t->right != NULL)
657 		zaptreesubs(v, t->right);
658 }
659 
660 /*
661  * subset - set subexpression match data for a successful subre
662  */
663 static void
subset(struct vars * v,struct subre * sub,chr * begin,chr * end)664 subset(struct vars *v,
665 	   struct subre *sub,
666 	   chr *begin,
667 	   chr *end)
668 {
669 	int			n = sub->subno;
670 
671 	assert(n > 0);
672 	if ((size_t) n >= v->nmatch)
673 		return;
674 
675 	MDEBUG(("setting %d\n", n));
676 	v->pmatch[n].rm_so = OFF(begin);
677 	v->pmatch[n].rm_eo = OFF(end);
678 }
679 
680 /*
681  * cdissect - check backrefs and determine subexpression matches
682  *
683  * cdissect recursively processes a subre tree to check matching of backrefs
684  * and/or identify submatch boundaries for capture nodes.  The proposed match
685  * runs from "begin" to "end" (not including "end"), and we are basically
686  * "dissecting" it to see where the submatches are.
687  *
688  * Before calling any level of cdissect, the caller must have run the node's
689  * DFA and found that the proposed substring satisfies the DFA.  (We make
690  * the caller do that because in concatenation and iteration nodes, it's
691  * much faster to check all the substrings against the child DFAs before we
692  * recurse.)
693  *
694  * A side-effect of a successful match is to save match locations for
695  * capturing subexpressions in v->pmatch[].  This is a little bit tricky,
696  * so we make the following rules:
697  * 1. Before initial entry to cdissect, all match data must have been
698  *    cleared (this is seen to by zapallsubs).
699  * 2. Before any recursive entry to cdissect, the match data for that
700  *    subexpression tree must be guaranteed clear (see zaptreesubs).
701  * 3. When returning REG_OKAY, each level of cdissect will have saved
702  *    any relevant match locations.
703  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
704  *    that its subexpression match locations are again clear.
705  * 5. No guarantees are made for error cases (i.e., other result codes).
706  * 6. When a level of cdissect abandons a successful sub-match, it will
707  *    clear that subtree's match locations with zaptreesubs before trying
708  *    any new DFA match or cdissect call for that subtree or any subtree
709  *    to its right (that is, any subtree that could have a backref into the
710  *    abandoned match).
711  * This may seem overly complicated, but it's difficult to simplify it
712  * because of the provision that match locations must be reset before
713  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
714  * That means it won't work to just reset relevant match locations at the
715  * start of each cdissect level.
716  */
717 static int						/* regexec return code */
cdissect(struct vars * v,struct subre * t,chr * begin,chr * end)718 cdissect(struct vars *v,
719 		 struct subre *t,
720 		 chr *begin,			/* beginning of relevant substring */
721 		 chr *end)				/* end of same */
722 {
723 	int			er;
724 
725 	assert(t != NULL);
726 	MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
727 
728 	/* handy place to check for operation cancel */
729 	if (CANCEL_REQUESTED(v->re))
730 		return REG_CANCEL;
731 	/* ... and stack overrun */
732 	if (STACK_TOO_DEEP(v->re))
733 		return REG_ETOOBIG;
734 
735 	switch (t->op)
736 	{
737 		case '=':				/* terminal node */
738 			assert(t->left == NULL && t->right == NULL);
739 			er = REG_OKAY;		/* no action, parent did the work */
740 			break;
741 		case 'b':				/* back reference */
742 			assert(t->left == NULL && t->right == NULL);
743 			er = cbrdissect(v, t, begin, end);
744 			break;
745 		case '.':				/* concatenation */
746 			assert(t->left != NULL && t->right != NULL);
747 			if (t->left->flags & SHORTER)	/* reverse scan */
748 				er = crevcondissect(v, t, begin, end);
749 			else
750 				er = ccondissect(v, t, begin, end);
751 			break;
752 		case '|':				/* alternation */
753 			assert(t->left != NULL);
754 			er = caltdissect(v, t, begin, end);
755 			break;
756 		case '*':				/* iteration */
757 			assert(t->left != NULL);
758 			if (t->left->flags & SHORTER)	/* reverse scan */
759 				er = creviterdissect(v, t, begin, end);
760 			else
761 				er = citerdissect(v, t, begin, end);
762 			break;
763 		case '(':				/* capturing */
764 			assert(t->left != NULL && t->right == NULL);
765 			assert(t->subno > 0);
766 			er = cdissect(v, t->left, begin, end);
767 			if (er == REG_OKAY)
768 				subset(v, t, begin, end);
769 			break;
770 		default:
771 			er = REG_ASSERT;
772 			break;
773 	}
774 
775 	/*
776 	 * We should never have a match failure unless backrefs lurk below;
777 	 * otherwise, either caller failed to check the DFA, or there's some
778 	 * inconsistency between the DFA and the node's innards.
779 	 */
780 	assert(er != REG_NOMATCH || (t->flags & BACKR));
781 
782 	return er;
783 }
784 
785 /*
786  * ccondissect - dissect match for concatenation node
787  */
788 static int						/* regexec return code */
ccondissect(struct vars * v,struct subre * t,chr * begin,chr * end)789 ccondissect(struct vars *v,
790 			struct subre *t,
791 			chr *begin,			/* beginning of relevant substring */
792 			chr *end)			/* end of same */
793 {
794 	struct dfa *d;
795 	struct dfa *d2;
796 	chr		   *mid;
797 	int			er;
798 
799 	assert(t->op == '.');
800 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
801 	assert(t->right != NULL && t->right->cnfa.nstates > 0);
802 	assert(!(t->left->flags & SHORTER));
803 
804 	d = getsubdfa(v, t->left);
805 	NOERR();
806 	d2 = getsubdfa(v, t->right);
807 	NOERR();
808 	MDEBUG(("cconcat %d\n", t->id));
809 
810 	/* pick a tentative midpoint */
811 	mid = longest(v, d, begin, end, (int *) NULL);
812 	NOERR();
813 	if (mid == NULL)
814 		return REG_NOMATCH;
815 	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
816 
817 	/* iterate until satisfaction or failure */
818 	for (;;)
819 	{
820 		/* try this midpoint on for size */
821 		if (longest(v, d2, mid, end, (int *) NULL) == end)
822 		{
823 			er = cdissect(v, t->left, begin, mid);
824 			if (er == REG_OKAY)
825 			{
826 				er = cdissect(v, t->right, mid, end);
827 				if (er == REG_OKAY)
828 				{
829 					/* satisfaction */
830 					MDEBUG(("successful\n"));
831 					return REG_OKAY;
832 				}
833 				/* Reset left's matches (right should have done so itself) */
834 				zaptreesubs(v, t->left);
835 			}
836 			if (er != REG_NOMATCH)
837 				return er;
838 		}
839 		NOERR();
840 
841 		/* that midpoint didn't work, find a new one */
842 		if (mid == begin)
843 		{
844 			/* all possibilities exhausted */
845 			MDEBUG(("%d no midpoint\n", t->id));
846 			return REG_NOMATCH;
847 		}
848 		mid = longest(v, d, begin, mid - 1, (int *) NULL);
849 		NOERR();
850 		if (mid == NULL)
851 		{
852 			/* failed to find a new one */
853 			MDEBUG(("%d failed midpoint\n", t->id));
854 			return REG_NOMATCH;
855 		}
856 		MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
857 	}
858 
859 	/* can't get here */
860 	return REG_ASSERT;
861 }
862 
863 /*
864  * crevcondissect - dissect match for concatenation node, shortest-first
865  */
866 static int						/* regexec return code */
crevcondissect(struct vars * v,struct subre * t,chr * begin,chr * end)867 crevcondissect(struct vars *v,
868 			   struct subre *t,
869 			   chr *begin,		/* beginning of relevant substring */
870 			   chr *end)		/* end of same */
871 {
872 	struct dfa *d;
873 	struct dfa *d2;
874 	chr		   *mid;
875 	int			er;
876 
877 	assert(t->op == '.');
878 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
879 	assert(t->right != NULL && t->right->cnfa.nstates > 0);
880 	assert(t->left->flags & SHORTER);
881 
882 	d = getsubdfa(v, t->left);
883 	NOERR();
884 	d2 = getsubdfa(v, t->right);
885 	NOERR();
886 	MDEBUG(("crevcon %d\n", t->id));
887 
888 	/* pick a tentative midpoint */
889 	mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
890 	NOERR();
891 	if (mid == NULL)
892 		return REG_NOMATCH;
893 	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
894 
895 	/* iterate until satisfaction or failure */
896 	for (;;)
897 	{
898 		/* try this midpoint on for size */
899 		if (longest(v, d2, mid, end, (int *) NULL) == end)
900 		{
901 			er = cdissect(v, t->left, begin, mid);
902 			if (er == REG_OKAY)
903 			{
904 				er = cdissect(v, t->right, mid, end);
905 				if (er == REG_OKAY)
906 				{
907 					/* satisfaction */
908 					MDEBUG(("successful\n"));
909 					return REG_OKAY;
910 				}
911 				/* Reset left's matches (right should have done so itself) */
912 				zaptreesubs(v, t->left);
913 			}
914 			if (er != REG_NOMATCH)
915 				return er;
916 		}
917 		NOERR();
918 
919 		/* that midpoint didn't work, find a new one */
920 		if (mid == end)
921 		{
922 			/* all possibilities exhausted */
923 			MDEBUG(("%d no midpoint\n", t->id));
924 			return REG_NOMATCH;
925 		}
926 		mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
927 		NOERR();
928 		if (mid == NULL)
929 		{
930 			/* failed to find a new one */
931 			MDEBUG(("%d failed midpoint\n", t->id));
932 			return REG_NOMATCH;
933 		}
934 		MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
935 	}
936 
937 	/* can't get here */
938 	return REG_ASSERT;
939 }
940 
941 /*
942  * cbrdissect - dissect match for backref node
943  */
944 static int						/* regexec return code */
cbrdissect(struct vars * v,struct subre * t,chr * begin,chr * end)945 cbrdissect(struct vars *v,
946 		   struct subre *t,
947 		   chr *begin,			/* beginning of relevant substring */
948 		   chr *end)			/* end of same */
949 {
950 	int			n = t->subno;
951 	size_t		numreps;
952 	size_t		tlen;
953 	size_t		brlen;
954 	chr		   *brstring;
955 	chr		   *p;
956 	int			min = t->min;
957 	int			max = t->max;
958 
959 	assert(t != NULL);
960 	assert(t->op == 'b');
961 	assert(n >= 0);
962 	assert((size_t) n < v->nmatch);
963 
964 	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
965 
966 	/* get the backreferenced string */
967 	if (v->pmatch[n].rm_so == -1)
968 		return REG_NOMATCH;
969 	brstring = v->start + v->pmatch[n].rm_so;
970 	brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
971 
972 	/* special cases for zero-length strings */
973 	if (brlen == 0)
974 	{
975 		/*
976 		 * matches only if target is zero length, but any number of
977 		 * repetitions can be considered to be present
978 		 */
979 		if (begin == end && min <= max)
980 		{
981 			MDEBUG(("cbackref matched trivially\n"));
982 			return REG_OKAY;
983 		}
984 		return REG_NOMATCH;
985 	}
986 	if (begin == end)
987 	{
988 		/* matches only if zero repetitions are okay */
989 		if (min == 0)
990 		{
991 			MDEBUG(("cbackref matched trivially\n"));
992 			return REG_OKAY;
993 		}
994 		return REG_NOMATCH;
995 	}
996 
997 	/*
998 	 * check target length to see if it could possibly be an allowed number of
999 	 * repetitions of brstring
1000 	 */
1001 	assert(end > begin);
1002 	tlen = end - begin;
1003 	if (tlen % brlen != 0)
1004 		return REG_NOMATCH;
1005 	numreps = tlen / brlen;
1006 	if (numreps < min || (numreps > max && max != DUPINF))
1007 		return REG_NOMATCH;
1008 
1009 	/* okay, compare the actual string contents */
1010 	p = begin;
1011 	while (numreps-- > 0)
1012 	{
1013 		if ((*v->g->compare) (brstring, p, brlen) != 0)
1014 			return REG_NOMATCH;
1015 		p += brlen;
1016 	}
1017 
1018 	MDEBUG(("cbackref matched\n"));
1019 	return REG_OKAY;
1020 }
1021 
1022 /*
1023  * caltdissect - dissect match for alternation node
1024  */
1025 static int						/* regexec return code */
caltdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1026 caltdissect(struct vars *v,
1027 			struct subre *t,
1028 			chr *begin,			/* beginning of relevant substring */
1029 			chr *end)			/* end of same */
1030 {
1031 	struct dfa *d;
1032 	int			er;
1033 
1034 	/* We loop, rather than tail-recurse, to handle a chain of alternatives */
1035 	while (t != NULL)
1036 	{
1037 		assert(t->op == '|');
1038 		assert(t->left != NULL && t->left->cnfa.nstates > 0);
1039 
1040 		MDEBUG(("calt n%d\n", t->id));
1041 
1042 		d = getsubdfa(v, t->left);
1043 		NOERR();
1044 		if (longest(v, d, begin, end, (int *) NULL) == end)
1045 		{
1046 			MDEBUG(("calt matched\n"));
1047 			er = cdissect(v, t->left, begin, end);
1048 			if (er != REG_NOMATCH)
1049 				return er;
1050 		}
1051 		NOERR();
1052 
1053 		t = t->right;
1054 	}
1055 
1056 	return REG_NOMATCH;
1057 }
1058 
1059 /*
1060  * citerdissect - dissect match for iteration node
1061  */
1062 static int						/* regexec return code */
citerdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1063 citerdissect(struct vars *v,
1064 			 struct subre *t,
1065 			 chr *begin,		/* beginning of relevant substring */
1066 			 chr *end)			/* end of same */
1067 {
1068 	struct dfa *d;
1069 	chr		  **endpts;
1070 	chr		   *limit;
1071 	int			min_matches;
1072 	size_t		max_matches;
1073 	int			nverified;
1074 	int			k;
1075 	int			i;
1076 	int			er;
1077 
1078 	assert(t->op == '*');
1079 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
1080 	assert(!(t->left->flags & SHORTER));
1081 	assert(begin <= end);
1082 
1083 	/*
1084 	 * For the moment, assume the minimum number of matches is 1.  If zero
1085 	 * matches are allowed, and the target string is empty, we are allowed to
1086 	 * match regardless of the contents of the iter node --- but we would
1087 	 * prefer to match once, so that capturing parens get set.  (An example of
1088 	 * the concern here is a pattern like "()*\1", which historically this
1089 	 * code has allowed to succeed.)  Therefore, we deal with the zero-matches
1090 	 * case at the bottom, after failing to find any other way to match.
1091 	 */
1092 	min_matches = t->min;
1093 	if (min_matches <= 0)
1094 		min_matches = 1;
1095 
1096 	/*
1097 	 * We need workspace to track the endpoints of each sub-match.  Normally
1098 	 * we consider only nonzero-length sub-matches, so there can be at most
1099 	 * end-begin of them.  However, if min is larger than that, we will also
1100 	 * consider zero-length sub-matches in order to find enough matches.
1101 	 *
1102 	 * For convenience, endpts[0] contains the "begin" pointer and we store
1103 	 * sub-match endpoints in endpts[1..max_matches].
1104 	 */
1105 	max_matches = end - begin;
1106 	if (max_matches > t->max && t->max != DUPINF)
1107 		max_matches = t->max;
1108 	if (max_matches < min_matches)
1109 		max_matches = min_matches;
1110 	endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1111 	if (endpts == NULL)
1112 		return REG_ESPACE;
1113 	endpts[0] = begin;
1114 
1115 	d = getsubdfa(v, t->left);
1116 	if (ISERR())
1117 	{
1118 		FREE(endpts);
1119 		return v->err;
1120 	}
1121 	MDEBUG(("citer %d\n", t->id));
1122 
1123 	/*
1124 	 * Our strategy is to first find a set of sub-match endpoints that are
1125 	 * valid according to the child node's DFA, and then recursively dissect
1126 	 * each sub-match to confirm validity.  If any validity check fails,
1127 	 * backtrack that sub-match and try again.  And, when we next try for a
1128 	 * validity check, we need not recheck any successfully verified
1129 	 * sub-matches that we didn't move the endpoints of.  nverified remembers
1130 	 * how many sub-matches are currently known okay.
1131 	 */
1132 
1133 	/* initialize to consider first sub-match */
1134 	nverified = 0;
1135 	k = 1;
1136 	limit = end;
1137 
1138 	/* iterate until satisfaction or failure */
1139 	while (k > 0)
1140 	{
1141 		/* try to find an endpoint for the k'th sub-match */
1142 		endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1143 		if (ISERR())
1144 		{
1145 			FREE(endpts);
1146 			return v->err;
1147 		}
1148 		if (endpts[k] == NULL)
1149 		{
1150 			/* no match possible, so see if we can shorten previous one */
1151 			k--;
1152 			goto backtrack;
1153 		}
1154 		MDEBUG(("%d: working endpoint %d: %ld\n",
1155 				t->id, k, LOFF(endpts[k])));
1156 
1157 		/* k'th sub-match can no longer be considered verified */
1158 		if (nverified >= k)
1159 			nverified = k - 1;
1160 
1161 		if (endpts[k] != end)
1162 		{
1163 			/* haven't reached end yet, try another iteration if allowed */
1164 			if (k >= max_matches)
1165 			{
1166 				/* must try to shorten some previous match */
1167 				k--;
1168 				goto backtrack;
1169 			}
1170 
1171 			/* reject zero-length match unless necessary to achieve min */
1172 			if (endpts[k] == endpts[k - 1] &&
1173 				(k >= min_matches || min_matches - k < end - endpts[k]))
1174 				goto backtrack;
1175 
1176 			k++;
1177 			limit = end;
1178 			continue;
1179 		}
1180 
1181 		/*
1182 		 * We've identified a way to divide the string into k sub-matches that
1183 		 * works so far as the child DFA can tell.  If k is an allowed number
1184 		 * of matches, start the slow part: recurse to verify each sub-match.
1185 		 * We always have k <= max_matches, needn't check that.
1186 		 */
1187 		if (k < min_matches)
1188 			goto backtrack;
1189 
1190 		MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1191 
1192 		for (i = nverified + 1; i <= k; i++)
1193 		{
1194 			/* zap any match data from a non-last iteration */
1195 			zaptreesubs(v, t->left);
1196 			er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
1197 			if (er == REG_OKAY)
1198 			{
1199 				nverified = i;
1200 				continue;
1201 			}
1202 			if (er == REG_NOMATCH)
1203 				break;
1204 			/* oops, something failed */
1205 			FREE(endpts);
1206 			return er;
1207 		}
1208 
1209 		if (i > k)
1210 		{
1211 			/* satisfaction */
1212 			MDEBUG(("%d successful\n", t->id));
1213 			FREE(endpts);
1214 			return REG_OKAY;
1215 		}
1216 
1217 		/* i'th match failed to verify, so backtrack it */
1218 		k = i;
1219 
1220 backtrack:
1221 
1222 		/*
1223 		 * Must consider shorter versions of the k'th sub-match.  However,
1224 		 * we'll only ask for a zero-length match if necessary.
1225 		 */
1226 		while (k > 0)
1227 		{
1228 			chr		   *prev_end = endpts[k - 1];
1229 
1230 			if (endpts[k] > prev_end)
1231 			{
1232 				limit = endpts[k] - 1;
1233 				if (limit > prev_end ||
1234 					(k < min_matches && min_matches - k >= end - prev_end))
1235 				{
1236 					/* break out of backtrack loop, continue the outer one */
1237 					break;
1238 				}
1239 			}
1240 			/* can't shorten k'th sub-match any more, consider previous one */
1241 			k--;
1242 		}
1243 	}
1244 
1245 	/* all possibilities exhausted */
1246 	FREE(endpts);
1247 
1248 	/*
1249 	 * Now consider the possibility that we can match to a zero-length string
1250 	 * by using zero repetitions.
1251 	 */
1252 	if (t->min == 0 && begin == end)
1253 	{
1254 		MDEBUG(("%d allowing zero matches\n", t->id));
1255 		return REG_OKAY;
1256 	}
1257 
1258 	MDEBUG(("%d failed\n", t->id));
1259 	return REG_NOMATCH;
1260 }
1261 
1262 /*
1263  * creviterdissect - dissect match for iteration node, shortest-first
1264  */
1265 static int						/* regexec return code */
creviterdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1266 creviterdissect(struct vars *v,
1267 				struct subre *t,
1268 				chr *begin,		/* beginning of relevant substring */
1269 				chr *end)		/* end of same */
1270 {
1271 	struct dfa *d;
1272 	chr		  **endpts;
1273 	chr		   *limit;
1274 	int			min_matches;
1275 	size_t		max_matches;
1276 	int			nverified;
1277 	int			k;
1278 	int			i;
1279 	int			er;
1280 
1281 	assert(t->op == '*');
1282 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
1283 	assert(t->left->flags & SHORTER);
1284 	assert(begin <= end);
1285 
1286 	/*
1287 	 * If zero matches are allowed, and target string is empty, just declare
1288 	 * victory.  OTOH, if target string isn't empty, zero matches can't work
1289 	 * so we pretend the min is 1.
1290 	 */
1291 	min_matches = t->min;
1292 	if (min_matches <= 0)
1293 	{
1294 		if (begin == end)
1295 			return REG_OKAY;
1296 		min_matches = 1;
1297 	}
1298 
1299 	/*
1300 	 * We need workspace to track the endpoints of each sub-match.  Normally
1301 	 * we consider only nonzero-length sub-matches, so there can be at most
1302 	 * end-begin of them.  However, if min is larger than that, we will also
1303 	 * consider zero-length sub-matches in order to find enough matches.
1304 	 *
1305 	 * For convenience, endpts[0] contains the "begin" pointer and we store
1306 	 * sub-match endpoints in endpts[1..max_matches].
1307 	 */
1308 	max_matches = end - begin;
1309 	if (max_matches > t->max && t->max != DUPINF)
1310 		max_matches = t->max;
1311 	if (max_matches < min_matches)
1312 		max_matches = min_matches;
1313 	endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1314 	if (endpts == NULL)
1315 		return REG_ESPACE;
1316 	endpts[0] = begin;
1317 
1318 	d = getsubdfa(v, t->left);
1319 	if (ISERR())
1320 	{
1321 		FREE(endpts);
1322 		return v->err;
1323 	}
1324 	MDEBUG(("creviter %d\n", t->id));
1325 
1326 	/*
1327 	 * Our strategy is to first find a set of sub-match endpoints that are
1328 	 * valid according to the child node's DFA, and then recursively dissect
1329 	 * each sub-match to confirm validity.  If any validity check fails,
1330 	 * backtrack that sub-match and try again.  And, when we next try for a
1331 	 * validity check, we need not recheck any successfully verified
1332 	 * sub-matches that we didn't move the endpoints of.  nverified remembers
1333 	 * how many sub-matches are currently known okay.
1334 	 */
1335 
1336 	/* initialize to consider first sub-match */
1337 	nverified = 0;
1338 	k = 1;
1339 	limit = begin;
1340 
1341 	/* iterate until satisfaction or failure */
1342 	while (k > 0)
1343 	{
1344 		/* disallow zero-length match unless necessary to achieve min */
1345 		if (limit == endpts[k - 1] &&
1346 			limit != end &&
1347 			(k >= min_matches || min_matches - k < end - limit))
1348 			limit++;
1349 
1350 		/* if this is the last allowed sub-match, it must reach to the end */
1351 		if (k >= max_matches)
1352 			limit = end;
1353 
1354 		/* try to find an endpoint for the k'th sub-match */
1355 		endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1356 							 (chr **) NULL, (int *) NULL);
1357 		if (ISERR())
1358 		{
1359 			FREE(endpts);
1360 			return v->err;
1361 		}
1362 		if (endpts[k] == NULL)
1363 		{
1364 			/* no match possible, so see if we can lengthen previous one */
1365 			k--;
1366 			goto backtrack;
1367 		}
1368 		MDEBUG(("%d: working endpoint %d: %ld\n",
1369 				t->id, k, LOFF(endpts[k])));
1370 
1371 		/* k'th sub-match can no longer be considered verified */
1372 		if (nverified >= k)
1373 			nverified = k - 1;
1374 
1375 		if (endpts[k] != end)
1376 		{
1377 			/* haven't reached end yet, try another iteration if allowed */
1378 			if (k >= max_matches)
1379 			{
1380 				/* must try to lengthen some previous match */
1381 				k--;
1382 				goto backtrack;
1383 			}
1384 
1385 			k++;
1386 			limit = endpts[k - 1];
1387 			continue;
1388 		}
1389 
1390 		/*
1391 		 * We've identified a way to divide the string into k sub-matches that
1392 		 * works so far as the child DFA can tell.  If k is an allowed number
1393 		 * of matches, start the slow part: recurse to verify each sub-match.
1394 		 * We always have k <= max_matches, needn't check that.
1395 		 */
1396 		if (k < min_matches)
1397 			goto backtrack;
1398 
1399 		MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1400 
1401 		for (i = nverified + 1; i <= k; i++)
1402 		{
1403 			/* zap any match data from a non-last iteration */
1404 			zaptreesubs(v, t->left);
1405 			er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
1406 			if (er == REG_OKAY)
1407 			{
1408 				nverified = i;
1409 				continue;
1410 			}
1411 			if (er == REG_NOMATCH)
1412 				break;
1413 			/* oops, something failed */
1414 			FREE(endpts);
1415 			return er;
1416 		}
1417 
1418 		if (i > k)
1419 		{
1420 			/* satisfaction */
1421 			MDEBUG(("%d successful\n", t->id));
1422 			FREE(endpts);
1423 			return REG_OKAY;
1424 		}
1425 
1426 		/* i'th match failed to verify, so backtrack it */
1427 		k = i;
1428 
1429 backtrack:
1430 
1431 		/*
1432 		 * Must consider longer versions of the k'th sub-match.
1433 		 */
1434 		while (k > 0)
1435 		{
1436 			if (endpts[k] < end)
1437 			{
1438 				limit = endpts[k] + 1;
1439 				/* break out of backtrack loop, continue the outer one */
1440 				break;
1441 			}
1442 			/* can't lengthen k'th sub-match any more, consider previous one */
1443 			k--;
1444 		}
1445 	}
1446 
1447 	/* all possibilities exhausted */
1448 	MDEBUG(("%d failed\n", t->id));
1449 	FREE(endpts);
1450 	return REG_NOMATCH;
1451 }
1452 
1453 
1454 
1455 #include "rege_dfa.c"
1456