1 /*
2  * re_*exec and friends - match REs
3  *
4  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
5  *
6  * Development of this software was funded, in part, by Cray Research Inc.,
7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8  * Corporation, none of whom are responsible for the results.  The author
9  * thanks all of them.
10  *
11  * Redistribution and use in source and binary forms -- with or without
12  * modification -- are permitted for any purpose, provided that
13  * redistributions in source form retain this entire copyright notice and
14  * indicate the origin and nature of any modifications.
15  *
16  * I'd appreciate being given credit for this package in the documentation
17  * of software which uses it, but that is not a requirement.
18  *
19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * src/backend/regex/regexec.c
31  *
32  */
33 
34 #include "regex/regguts.h"
35 
36 
37 
38 /* lazy-DFA representation */
39 struct arcp
40 {								/* "pointer" to an outarc */
41 	struct sset *ss;
42 	color		co;
43 };
44 
45 struct sset
46 {								/* state set */
47 	unsigned   *states;			/* pointer to bitvector */
48 	unsigned	hash;			/* hash of bitvector */
49 #define  HASH(bv, nw)	 (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52 	int			flags;
53 #define  STARTER	 01			/* the initial state set */
54 #define  POSTSTATE	 02			/* includes the goal state */
55 #define  LOCKED		 04			/* locked in cache */
56 #define  NOPROGRESS  010		/* zero-progress state set */
57 	struct arcp ins;			/* chain of inarcs pointing here */
58 	chr		   *lastseen;		/* last entered on arrival here */
59 	struct sset **outs;			/* outarc vector indexed by color */
60 	struct arcp *inchain;		/* chain-pointer vector for outarcs */
61 };
62 
63 struct dfa
64 {
65 	int			nssets;			/* size of cache */
66 	int			nssused;		/* how many entries occupied yet */
67 	int			nstates;		/* number of states */
68 	int			ncolors;		/* length of outarc and inchain vectors */
69 	int			wordsper;		/* length of state-set bitvectors */
70 	struct sset *ssets;			/* state-set cache */
71 	unsigned   *statesarea;		/* bitvector storage */
72 	unsigned   *work;			/* pointer to work area within statesarea */
73 	struct sset **outsarea;		/* outarc-vector storage */
74 	struct arcp *incarea;		/* inchain storage */
75 	struct cnfa *cnfa;
76 	struct colormap *cm;
77 	chr		   *lastpost;		/* location of last cache-flushed success */
78 	chr		   *lastnopr;		/* location of last cache-flushed NOPROGRESS */
79 	struct sset *search;		/* replacement-search-pointer memory */
80 	int			backno;			/* if DFA for a backref, subno it refers to */
81 	short		backmin;		/* min repetitions for backref */
82 	short		backmax;		/* max repetitions for backref */
83 	bool		ismalloced;		/* should this struct dfa be freed? */
84 	bool		arraysmalloced; /* should its subsidiary arrays be freed? */
85 };
86 
87 #define WORK	1				/* number of work bitvectors needed */
88 
89 /* setup for non-malloc allocation for small cases */
90 #define FEWSTATES	20			/* must be less than UBITS */
91 #define FEWCOLORS	15
92 struct smalldfa
93 {
94 	struct dfa	dfa;			/* must be first */
95 	struct sset ssets[FEWSTATES * 2];
96 	unsigned	statesarea[FEWSTATES * 2 + WORK];
97 	struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
98 	struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
99 };
100 
101 #define DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
102 
103 
104 
105 /* internal variables, bundled for easy passing around */
106 struct vars
107 {
108 	regex_t    *re;
109 	struct guts *g;
110 	int			eflags;			/* copies of arguments */
111 	size_t		nmatch;
112 	regmatch_t *pmatch;
113 	rm_detail_t *details;
114 	chr		   *start;			/* start of string */
115 	chr		   *search_start;	/* search start of string */
116 	chr		   *stop;			/* just past end of string */
117 	int			err;			/* error code if any (0 none) */
118 	struct dfa **subdfas;		/* per-tree-subre DFAs */
119 	struct dfa **ladfas;		/* per-lacon-subre DFAs */
120 	struct sset **lblastcss;	/* per-lacon-subre lookbehind restart data */
121 	chr		  **lblastcp;		/* per-lacon-subre lookbehind restart data */
122 	struct smalldfa dfa1;
123 	struct smalldfa dfa2;
124 };
125 
126 #define VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
127 #define ISERR() VISERR(v)
128 #define VERR(vv,e)	((vv)->err = ((vv)->err ? (vv)->err : (e)))
129 #define ERR(e)	VERR(v, e)		/* record an error */
130 #define NOERR() {if (ISERR()) return v->err;}	/* if error seen, return it */
131 #define OFF(p)	((p) - v->start)
132 #define LOFF(p) ((long)OFF(p))
133 
134 
135 
136 /*
137  * forward declarations
138  */
139 /* === regexec.c === */
140 static struct dfa *getsubdfa(struct vars *, struct subre *);
141 static struct dfa *getladfa(struct vars *, int);
142 static int	find(struct vars *, struct cnfa *, struct colormap *);
143 static int	cfind(struct vars *, struct cnfa *, struct colormap *);
144 static int	cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
145 static void zapallsubs(regmatch_t *, size_t);
146 static void zaptreesubs(struct vars *, struct subre *);
147 static void subset(struct vars *, struct subre *, chr *, chr *);
148 static int	cdissect(struct vars *, struct subre *, chr *, chr *);
149 static int	ccondissect(struct vars *, struct subre *, chr *, chr *);
150 static int	crevcondissect(struct vars *, struct subre *, chr *, chr *);
151 static int	cbrdissect(struct vars *, struct subre *, chr *, chr *);
152 static int	caltdissect(struct vars *, struct subre *, chr *, chr *);
153 static int	citerdissect(struct vars *, struct subre *, chr *, chr *);
154 static int	creviterdissect(struct vars *, struct subre *, chr *, chr *);
155 
156 /* === rege_dfa.c === */
157 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
158 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
159 static int	matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
160 static chr *dfa_backref(struct vars *, struct dfa *, chr *, chr *, chr *, bool);
161 static chr *lastcold(struct vars *, struct dfa *);
162 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
163 static void freedfa(struct dfa *);
164 static unsigned hash(unsigned *, int);
165 static struct sset *initialize(struct vars *, struct dfa *, chr *);
166 static struct sset *miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *);
167 static int	lacon(struct vars *, struct cnfa *, chr *, color);
168 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
169 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
170 
171 
172 /*
173  * pg_regexec - match regular expression
174  */
175 int
pg_regexec(regex_t * re,const chr * string,size_t len,size_t search_start,rm_detail_t * details,size_t nmatch,regmatch_t pmatch[],int flags)176 pg_regexec(regex_t *re,
177 		   const chr *string,
178 		   size_t len,
179 		   size_t search_start,
180 		   rm_detail_t *details,
181 		   size_t nmatch,
182 		   regmatch_t pmatch[],
183 		   int flags)
184 {
185 	struct vars var;
186 	register struct vars *v = &var;
187 	int			st;
188 	size_t		n;
189 	size_t		i;
190 	int			backref;
191 
192 #define  LOCALMAT	 20
193 	regmatch_t	mat[LOCALMAT];
194 
195 #define  LOCALDFAS	 40
196 	struct dfa *subdfas[LOCALDFAS];
197 
198 	/* sanity checks */
199 	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200 		return REG_INVARG;
201 	if (re->re_csize != sizeof(chr))
202 		return REG_MIXED;
203 	if (search_start > len)
204 		return REG_NOMATCH;
205 
206 	/* Initialize locale-dependent support */
207 	pg_set_regex_collation(re->re_collation);
208 
209 	/* setup */
210 	v->re = re;
211 	v->g = (struct guts *) re->re_guts;
212 	if ((v->g->cflags & REG_EXPECT) && details == NULL)
213 		return REG_INVARG;
214 	if (v->g->info & REG_UIMPOSSIBLE)
215 		return REG_NOMATCH;
216 	backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
217 	v->eflags = flags;
218 	if (v->g->cflags & REG_NOSUB)
219 		nmatch = 0;				/* override client */
220 	v->nmatch = nmatch;
221 	if (backref)
222 	{
223 		/* need work area */
224 		if (v->g->nsub + 1 <= LOCALMAT)
225 			v->pmatch = mat;
226 		else
227 			v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
228 											  sizeof(regmatch_t));
229 		if (v->pmatch == NULL)
230 			return REG_ESPACE;
231 		v->nmatch = v->g->nsub + 1;
232 	}
233 	else
234 		v->pmatch = pmatch;
235 	if (v->nmatch > 0)
236 		zapallsubs(v->pmatch, v->nmatch);
237 	v->details = details;
238 	v->start = (chr *) string;
239 	v->search_start = (chr *) string + search_start;
240 	v->stop = (chr *) string + len;
241 	v->err = 0;
242 	v->subdfas = NULL;
243 	v->ladfas = NULL;
244 	v->lblastcss = NULL;
245 	v->lblastcp = NULL;
246 	/* below this point, "goto cleanup" will behave sanely */
247 
248 	assert(v->g->ntree >= 0);
249 	n = (size_t) v->g->ntree;
250 	if (n <= LOCALDFAS)
251 		v->subdfas = subdfas;
252 	else
253 	{
254 		v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
255 		if (v->subdfas == NULL)
256 		{
257 			st = REG_ESPACE;
258 			goto cleanup;
259 		}
260 	}
261 	for (i = 0; i < n; i++)
262 		v->subdfas[i] = NULL;
263 
264 	assert(v->g->nlacons >= 0);
265 	n = (size_t) v->g->nlacons;
266 	if (n > 0)
267 	{
268 		v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
269 		if (v->ladfas == NULL)
270 		{
271 			st = REG_ESPACE;
272 			goto cleanup;
273 		}
274 		for (i = 0; i < n; i++)
275 			v->ladfas[i] = NULL;
276 		v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
277 		v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
278 		if (v->lblastcss == NULL || v->lblastcp == NULL)
279 		{
280 			st = REG_ESPACE;
281 			goto cleanup;
282 		}
283 		for (i = 0; i < n; i++)
284 		{
285 			v->lblastcss[i] = NULL;
286 			v->lblastcp[i] = NULL;
287 		}
288 	}
289 
290 	/* do it */
291 	assert(v->g->tree != NULL);
292 	if (backref)
293 		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
294 	else
295 		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
296 
297 	/* copy (portion of) match vector over if necessary */
298 	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
299 	{
300 		zapallsubs(pmatch, nmatch);
301 		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
302 		memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
303 	}
304 
305 	/* clean up */
306 cleanup:
307 	if (v->pmatch != pmatch && v->pmatch != mat)
308 		FREE(v->pmatch);
309 	if (v->subdfas != NULL)
310 	{
311 		n = (size_t) v->g->ntree;
312 		for (i = 0; i < n; i++)
313 		{
314 			if (v->subdfas[i] != NULL)
315 				freedfa(v->subdfas[i]);
316 		}
317 		if (v->subdfas != subdfas)
318 			FREE(v->subdfas);
319 	}
320 	if (v->ladfas != NULL)
321 	{
322 		n = (size_t) v->g->nlacons;
323 		for (i = 0; i < n; i++)
324 		{
325 			if (v->ladfas[i] != NULL)
326 				freedfa(v->ladfas[i]);
327 		}
328 		FREE(v->ladfas);
329 	}
330 	if (v->lblastcss != NULL)
331 		FREE(v->lblastcss);
332 	if (v->lblastcp != NULL)
333 		FREE(v->lblastcp);
334 
335 #ifdef REG_DEBUG
336 	if (v->eflags & (REG_FTRACE | REG_MTRACE))
337 		fflush(stdout);
338 #endif
339 
340 	return st;
341 }
342 
343 /*
344  * getsubdfa - create or re-fetch the DFA for a tree subre node
345  *
346  * We only need to create the DFA once per overall regex execution.
347  * The DFA will be freed by the cleanup step in pg_regexec().
348  */
349 static struct dfa *
getsubdfa(struct vars * v,struct subre * t)350 getsubdfa(struct vars *v,
351 		  struct subre *t)
352 {
353 	struct dfa *d = v->subdfas[t->id];
354 
355 	if (d == NULL)
356 	{
357 		d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
358 		if (d == NULL)
359 			return NULL;
360 		/* set up additional info if this is a backref node */
361 		if (t->op == 'b')
362 		{
363 			d->backno = t->backno;
364 			d->backmin = t->min;
365 			d->backmax = t->max;
366 		}
367 		v->subdfas[t->id] = d;
368 	}
369 	return d;
370 }
371 
372 /*
373  * getladfa - create or re-fetch the DFA for a LACON subre node
374  *
375  * Same as above, but for LACONs.
376  */
377 static struct dfa *
getladfa(struct vars * v,int n)378 getladfa(struct vars *v,
379 		 int n)
380 {
381 	assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
382 
383 	if (v->ladfas[n] == NULL)
384 	{
385 		struct subre *sub = &v->g->lacons[n];
386 
387 		v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
388 		/* a LACON can't contain a backref, so nothing else to do */
389 	}
390 	return v->ladfas[n];
391 }
392 
393 /*
394  * find - find a match for the main NFA (no-complications case)
395  */
396 static int
find(struct vars * v,struct cnfa * cnfa,struct colormap * cm)397 find(struct vars *v,
398 	 struct cnfa *cnfa,
399 	 struct colormap *cm)
400 {
401 	struct dfa *s;
402 	struct dfa *d;
403 	chr		   *begin;
404 	chr		   *end = NULL;
405 	chr		   *cold;
406 	chr		   *open;			/* open and close of range of possible starts */
407 	chr		   *close;
408 	int			hitend;
409 	int			shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
410 
411 	/* first, a shot with the search RE */
412 	s = newdfa(v, &v->g->search, cm, &v->dfa1);
413 	if (s == NULL)
414 		return v->err;
415 	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
416 	cold = NULL;
417 	close = shortest(v, s, v->search_start, v->search_start, v->stop,
418 					 &cold, (int *) NULL);
419 	freedfa(s);
420 	NOERR();
421 	if (v->g->cflags & REG_EXPECT)
422 	{
423 		assert(v->details != NULL);
424 		if (cold != NULL)
425 			v->details->rm_extend.rm_so = OFF(cold);
426 		else
427 			v->details->rm_extend.rm_so = OFF(v->stop);
428 		v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
429 	}
430 	if (close == NULL)			/* not found */
431 		return REG_NOMATCH;
432 	if (v->nmatch == 0)			/* found, don't need exact location */
433 		return REG_OKAY;
434 
435 	/* find starting point and match */
436 	assert(cold != NULL);
437 	open = cold;
438 	cold = NULL;
439 	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
440 	d = newdfa(v, cnfa, cm, &v->dfa1);
441 	if (d == NULL)
442 		return v->err;
443 	for (begin = open; begin <= close; begin++)
444 	{
445 		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
446 		if (shorter)
447 			end = shortest(v, d, begin, begin, v->stop,
448 						   (chr **) NULL, &hitend);
449 		else
450 			end = longest(v, d, begin, v->stop, &hitend);
451 		if (ISERR())
452 		{
453 			freedfa(d);
454 			return v->err;
455 		}
456 		if (hitend && cold == NULL)
457 			cold = begin;
458 		if (end != NULL)
459 			break;				/* NOTE BREAK OUT */
460 	}
461 	assert(end != NULL);		/* search RE succeeded so loop should */
462 	freedfa(d);
463 
464 	/* and pin down details */
465 	assert(v->nmatch > 0);
466 	v->pmatch[0].rm_so = OFF(begin);
467 	v->pmatch[0].rm_eo = OFF(end);
468 	if (v->g->cflags & REG_EXPECT)
469 	{
470 		if (cold != NULL)
471 			v->details->rm_extend.rm_so = OFF(cold);
472 		else
473 			v->details->rm_extend.rm_so = OFF(v->stop);
474 		v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
475 	}
476 	if (v->nmatch == 1)			/* no need for submatches */
477 		return REG_OKAY;
478 
479 	/* find submatches */
480 	return cdissect(v, v->g->tree, begin, end);
481 }
482 
483 /*
484  * cfind - find a match for the main NFA (with complications)
485  */
486 static int
cfind(struct vars * v,struct cnfa * cnfa,struct colormap * cm)487 cfind(struct vars *v,
488 	  struct cnfa *cnfa,
489 	  struct colormap *cm)
490 {
491 	struct dfa *s;
492 	struct dfa *d;
493 	chr		   *cold;
494 	int			ret;
495 
496 	s = newdfa(v, &v->g->search, cm, &v->dfa1);
497 	if (s == NULL)
498 		return v->err;
499 	d = newdfa(v, cnfa, cm, &v->dfa2);
500 	if (d == NULL)
501 	{
502 		freedfa(s);
503 		return v->err;
504 	}
505 
506 	ret = cfindloop(v, cnfa, cm, d, s, &cold);
507 
508 	freedfa(d);
509 	freedfa(s);
510 	NOERR();
511 	if (v->g->cflags & REG_EXPECT)
512 	{
513 		assert(v->details != NULL);
514 		if (cold != NULL)
515 			v->details->rm_extend.rm_so = OFF(cold);
516 		else
517 			v->details->rm_extend.rm_so = OFF(v->stop);
518 		v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
519 	}
520 	return ret;
521 }
522 
523 /*
524  * cfindloop - the heart of cfind
525  */
526 static int
cfindloop(struct vars * v,struct cnfa * cnfa,struct colormap * cm,struct dfa * d,struct dfa * s,chr ** coldp)527 cfindloop(struct vars *v,
528 		  struct cnfa *cnfa,
529 		  struct colormap *cm,
530 		  struct dfa *d,
531 		  struct dfa *s,
532 		  chr **coldp)			/* where to put coldstart pointer */
533 {
534 	chr		   *begin;
535 	chr		   *end;
536 	chr		   *cold;
537 	chr		   *open;			/* open and close of range of possible starts */
538 	chr		   *close;
539 	chr		   *estart;
540 	chr		   *estop;
541 	int			er;
542 	int			shorter = v->g->tree->flags & SHORTER;
543 	int			hitend;
544 
545 	assert(d != NULL && s != NULL);
546 	cold = NULL;
547 	close = v->search_start;
548 	do
549 	{
550 		/* Search with the search RE for match range at/beyond "close" */
551 		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
552 		close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
553 		if (ISERR())
554 		{
555 			*coldp = cold;
556 			return v->err;
557 		}
558 		if (close == NULL)
559 			break;				/* no more possible match anywhere */
560 		assert(cold != NULL);
561 		open = cold;
562 		cold = NULL;
563 		/* Search for matches starting between "open" and "close" inclusive */
564 		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
565 		for (begin = open; begin <= close; begin++)
566 		{
567 			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
568 			estart = begin;
569 			estop = v->stop;
570 			for (;;)
571 			{
572 				/* Here we use the top node's detailed RE */
573 				if (shorter)
574 					end = shortest(v, d, begin, estart,
575 								   estop, (chr **) NULL, &hitend);
576 				else
577 					end = longest(v, d, begin, estop,
578 								  &hitend);
579 				if (ISERR())
580 				{
581 					*coldp = cold;
582 					return v->err;
583 				}
584 				if (hitend && cold == NULL)
585 					cold = begin;
586 				if (end == NULL)
587 					break;		/* no match with this begin point, try next */
588 				MDEBUG(("tentative end %ld\n", LOFF(end)));
589 				/* Dissect the potential match to see if it really matches */
590 				er = cdissect(v, v->g->tree, begin, end);
591 				if (er == REG_OKAY)
592 				{
593 					if (v->nmatch > 0)
594 					{
595 						v->pmatch[0].rm_so = OFF(begin);
596 						v->pmatch[0].rm_eo = OFF(end);
597 					}
598 					*coldp = cold;
599 					return REG_OKAY;
600 				}
601 				if (er != REG_NOMATCH)
602 				{
603 					ERR(er);
604 					*coldp = cold;
605 					return er;
606 				}
607 				/* Try next longer/shorter match with same begin point */
608 				if (shorter)
609 				{
610 					if (end == estop)
611 						break;	/* no more, so try next begin point */
612 					estart = end + 1;
613 				}
614 				else
615 				{
616 					if (end == begin)
617 						break;	/* no more, so try next begin point */
618 					estop = end - 1;
619 				}
620 			}					/* end loop over endpoint positions */
621 		}						/* end loop over beginning positions */
622 
623 		/*
624 		 * If we get here, there is no possible match starting at or before
625 		 * "close", so consider matches beyond that.  We'll do a fresh search
626 		 * with the search RE to find a new promising match range.
627 		 */
628 		close++;
629 	} while (close < v->stop);
630 
631 	*coldp = cold;
632 	return REG_NOMATCH;
633 }
634 
635 /*
636  * zapallsubs - initialize all subexpression matches to "no match"
637  *
638  * Note that p[0], the overall-match location, is not touched.
639  */
640 static void
zapallsubs(regmatch_t * p,size_t n)641 zapallsubs(regmatch_t *p,
642 		   size_t n)
643 {
644 	size_t		i;
645 
646 	for (i = n - 1; i > 0; i--)
647 	{
648 		p[i].rm_so = -1;
649 		p[i].rm_eo = -1;
650 	}
651 }
652 
653 /*
654  * zaptreesubs - initialize subexpressions within subtree to "no match"
655  */
656 static void
zaptreesubs(struct vars * v,struct subre * t)657 zaptreesubs(struct vars *v,
658 			struct subre *t)
659 {
660 	int			n = t->capno;
661 	struct subre *t2;
662 
663 	if (n > 0)
664 	{
665 		if ((size_t) n < v->nmatch)
666 		{
667 			v->pmatch[n].rm_so = -1;
668 			v->pmatch[n].rm_eo = -1;
669 		}
670 	}
671 
672 	for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
673 		zaptreesubs(v, t2);
674 }
675 
676 /*
677  * subset - set subexpression match data for a successful subre
678  */
679 static void
subset(struct vars * v,struct subre * sub,chr * begin,chr * end)680 subset(struct vars *v,
681 	   struct subre *sub,
682 	   chr *begin,
683 	   chr *end)
684 {
685 	int			n = sub->capno;
686 
687 	assert(n > 0);
688 	if ((size_t) n >= v->nmatch)
689 		return;
690 
691 	MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
692 	v->pmatch[n].rm_so = OFF(begin);
693 	v->pmatch[n].rm_eo = OFF(end);
694 }
695 
696 /*
697  * cdissect - check backrefs and determine subexpression matches
698  *
699  * cdissect recursively processes a subre tree to check matching of backrefs
700  * and/or identify submatch boundaries for capture nodes.  The proposed match
701  * runs from "begin" to "end" (not including "end"), and we are basically
702  * "dissecting" it to see where the submatches are.
703  *
704  * Before calling any level of cdissect, the caller must have run the node's
705  * DFA and found that the proposed substring satisfies the DFA.  (We make
706  * the caller do that because in concatenation and iteration nodes, it's
707  * much faster to check all the substrings against the child DFAs before we
708  * recurse.)
709  *
710  * A side-effect of a successful match is to save match locations for
711  * capturing subexpressions in v->pmatch[].  This is a little bit tricky,
712  * so we make the following rules:
713  * 1. Before initial entry to cdissect, all match data must have been
714  *    cleared (this is seen to by zapallsubs).
715  * 2. Before any recursive entry to cdissect, the match data for that
716  *    subexpression tree must be guaranteed clear (see zaptreesubs).
717  * 3. When returning REG_OKAY, each level of cdissect will have saved
718  *    any relevant match locations.
719  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
720  *    that its subexpression match locations are again clear.
721  * 5. No guarantees are made for error cases (i.e., other result codes).
722  * 6. When a level of cdissect abandons a successful sub-match, it will
723  *    clear that subtree's match locations with zaptreesubs before trying
724  *    any new DFA match or cdissect call for that subtree or any subtree
725  *    to its right (that is, any subtree that could have a backref into the
726  *    abandoned match).
727  * This may seem overly complicated, but it's difficult to simplify it
728  * because of the provision that match locations must be reset before
729  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
730  * That means it won't work to just reset relevant match locations at the
731  * start of each cdissect level.
732  */
733 static int						/* regexec return code */
cdissect(struct vars * v,struct subre * t,chr * begin,chr * end)734 cdissect(struct vars *v,
735 		 struct subre *t,
736 		 chr *begin,			/* beginning of relevant substring */
737 		 chr *end)				/* end of same */
738 {
739 	int			er;
740 
741 	assert(t != NULL);
742 	MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
743 
744 	/* handy place to check for operation cancel */
745 	if (CANCEL_REQUESTED(v->re))
746 		return REG_CANCEL;
747 	/* ... and stack overrun */
748 	if (STACK_TOO_DEEP(v->re))
749 		return REG_ETOOBIG;
750 
751 	switch (t->op)
752 	{
753 		case '=':				/* terminal node */
754 			assert(t->child == NULL);
755 			er = REG_OKAY;		/* no action, parent did the work */
756 			break;
757 		case 'b':				/* back reference */
758 			assert(t->child == NULL);
759 			er = cbrdissect(v, t, begin, end);
760 			break;
761 		case '.':				/* concatenation */
762 			assert(t->child != NULL);
763 			if (t->child->flags & SHORTER)	/* reverse scan */
764 				er = crevcondissect(v, t, begin, end);
765 			else
766 				er = ccondissect(v, t, begin, end);
767 			break;
768 		case '|':				/* alternation */
769 			assert(t->child != NULL);
770 			er = caltdissect(v, t, begin, end);
771 			break;
772 		case '*':				/* iteration */
773 			assert(t->child != NULL);
774 			if (t->child->flags & SHORTER)	/* reverse scan */
775 				er = creviterdissect(v, t, begin, end);
776 			else
777 				er = citerdissect(v, t, begin, end);
778 			break;
779 		case '(':				/* no-op capture node */
780 			assert(t->child != NULL);
781 			assert(t->capno > 0);
782 			er = cdissect(v, t->child, begin, end);
783 			break;
784 		default:
785 			er = REG_ASSERT;
786 			break;
787 	}
788 
789 	/*
790 	 * We should never have a match failure unless backrefs lurk below;
791 	 * otherwise, either caller failed to check the DFA, or there's some
792 	 * inconsistency between the DFA and the node's innards.
793 	 */
794 	assert(er != REG_NOMATCH || (t->flags & BACKR));
795 
796 	/*
797 	 * If this node is marked as capturing, save successful match's location.
798 	 */
799 	if (t->capno > 0 && er == REG_OKAY)
800 		subset(v, t, begin, end);
801 
802 	return er;
803 }
804 
805 /*
806  * ccondissect - dissect match for concatenation node
807  */
808 static int						/* regexec return code */
ccondissect(struct vars * v,struct subre * t,chr * begin,chr * end)809 ccondissect(struct vars *v,
810 			struct subre *t,
811 			chr *begin,			/* beginning of relevant substring */
812 			chr *end)			/* end of same */
813 {
814 	struct subre *left = t->child;
815 	struct subre *right = left->sibling;
816 	struct dfa *d;
817 	struct dfa *d2;
818 	chr		   *mid;
819 	int			er;
820 
821 	assert(t->op == '.');
822 	assert(left != NULL && left->cnfa.nstates > 0);
823 	assert(right != NULL && right->cnfa.nstates > 0);
824 	assert(right->sibling == NULL);
825 	assert(!(left->flags & SHORTER));
826 
827 	d = getsubdfa(v, left);
828 	NOERR();
829 	d2 = getsubdfa(v, right);
830 	NOERR();
831 	MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
832 
833 	/* pick a tentative midpoint */
834 	mid = longest(v, d, begin, end, (int *) NULL);
835 	NOERR();
836 	if (mid == NULL)
837 		return REG_NOMATCH;
838 	MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
839 
840 	/* iterate until satisfaction or failure */
841 	for (;;)
842 	{
843 		/* try this midpoint on for size */
844 		if (longest(v, d2, mid, end, (int *) NULL) == end)
845 		{
846 			er = cdissect(v, left, begin, mid);
847 			if (er == REG_OKAY)
848 			{
849 				er = cdissect(v, right, mid, end);
850 				if (er == REG_OKAY)
851 				{
852 					/* satisfaction */
853 					MDEBUG(("%d: successful\n", t->id));
854 					return REG_OKAY;
855 				}
856 				/* Reset left's matches (right should have done so itself) */
857 				zaptreesubs(v, left);
858 			}
859 			if (er != REG_NOMATCH)
860 				return er;
861 		}
862 		NOERR();
863 
864 		/* that midpoint didn't work, find a new one */
865 		if (mid == begin)
866 		{
867 			/* all possibilities exhausted */
868 			MDEBUG(("%d: no midpoint\n", t->id));
869 			return REG_NOMATCH;
870 		}
871 		mid = longest(v, d, begin, mid - 1, (int *) NULL);
872 		NOERR();
873 		if (mid == NULL)
874 		{
875 			/* failed to find a new one */
876 			MDEBUG(("%d: failed midpoint\n", t->id));
877 			return REG_NOMATCH;
878 		}
879 		MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
880 	}
881 
882 	/* can't get here */
883 	return REG_ASSERT;
884 }
885 
886 /*
887  * crevcondissect - dissect match for concatenation node, shortest-first
888  */
889 static int						/* regexec return code */
crevcondissect(struct vars * v,struct subre * t,chr * begin,chr * end)890 crevcondissect(struct vars *v,
891 			   struct subre *t,
892 			   chr *begin,		/* beginning of relevant substring */
893 			   chr *end)		/* end of same */
894 {
895 	struct subre *left = t->child;
896 	struct subre *right = left->sibling;
897 	struct dfa *d;
898 	struct dfa *d2;
899 	chr		   *mid;
900 	int			er;
901 
902 	assert(t->op == '.');
903 	assert(left != NULL && left->cnfa.nstates > 0);
904 	assert(right != NULL && right->cnfa.nstates > 0);
905 	assert(right->sibling == NULL);
906 	assert(left->flags & SHORTER);
907 
908 	d = getsubdfa(v, left);
909 	NOERR();
910 	d2 = getsubdfa(v, right);
911 	NOERR();
912 	MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
913 
914 	/* pick a tentative midpoint */
915 	mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
916 	NOERR();
917 	if (mid == NULL)
918 		return REG_NOMATCH;
919 	MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
920 
921 	/* iterate until satisfaction or failure */
922 	for (;;)
923 	{
924 		/* try this midpoint on for size */
925 		if (longest(v, d2, mid, end, (int *) NULL) == end)
926 		{
927 			er = cdissect(v, left, begin, mid);
928 			if (er == REG_OKAY)
929 			{
930 				er = cdissect(v, right, mid, end);
931 				if (er == REG_OKAY)
932 				{
933 					/* satisfaction */
934 					MDEBUG(("%d: successful\n", t->id));
935 					return REG_OKAY;
936 				}
937 				/* Reset left's matches (right should have done so itself) */
938 				zaptreesubs(v, left);
939 			}
940 			if (er != REG_NOMATCH)
941 				return er;
942 		}
943 		NOERR();
944 
945 		/* that midpoint didn't work, find a new one */
946 		if (mid == end)
947 		{
948 			/* all possibilities exhausted */
949 			MDEBUG(("%d: no midpoint\n", t->id));
950 			return REG_NOMATCH;
951 		}
952 		mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
953 		NOERR();
954 		if (mid == NULL)
955 		{
956 			/* failed to find a new one */
957 			MDEBUG(("%d: failed midpoint\n", t->id));
958 			return REG_NOMATCH;
959 		}
960 		MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
961 	}
962 
963 	/* can't get here */
964 	return REG_ASSERT;
965 }
966 
967 /*
968  * cbrdissect - dissect match for backref node
969  *
970  * The backref match might already have been verified by dfa_backref(),
971  * but we don't know that for sure so must check it here.
972  */
973 static int						/* regexec return code */
cbrdissect(struct vars * v,struct subre * t,chr * begin,chr * end)974 cbrdissect(struct vars *v,
975 		   struct subre *t,
976 		   chr *begin,			/* beginning of relevant substring */
977 		   chr *end)			/* end of same */
978 {
979 	int			n = t->backno;
980 	size_t		numreps;
981 	size_t		tlen;
982 	size_t		brlen;
983 	chr		   *brstring;
984 	chr		   *p;
985 	int			min = t->min;
986 	int			max = t->max;
987 
988 	assert(t != NULL);
989 	assert(t->op == 'b');
990 	assert(n >= 0);
991 	assert((size_t) n < v->nmatch);
992 
993 	MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
994 			LOFF(begin), LOFF(end)));
995 
996 	/* get the backreferenced string */
997 	if (v->pmatch[n].rm_so == -1)
998 		return REG_NOMATCH;
999 	brstring = v->start + v->pmatch[n].rm_so;
1000 	brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1001 
1002 	/* special cases for zero-length strings */
1003 	if (brlen == 0)
1004 	{
1005 		/*
1006 		 * matches only if target is zero length, but any number of
1007 		 * repetitions can be considered to be present
1008 		 */
1009 		if (begin == end && min <= max)
1010 		{
1011 			MDEBUG(("%d: backref matched trivially\n", t->id));
1012 			return REG_OKAY;
1013 		}
1014 		return REG_NOMATCH;
1015 	}
1016 	if (begin == end)
1017 	{
1018 		/* matches only if zero repetitions are okay */
1019 		if (min == 0)
1020 		{
1021 			MDEBUG(("%d: backref matched trivially\n", t->id));
1022 			return REG_OKAY;
1023 		}
1024 		return REG_NOMATCH;
1025 	}
1026 
1027 	/*
1028 	 * check target length to see if it could possibly be an allowed number of
1029 	 * repetitions of brstring
1030 	 */
1031 	assert(end > begin);
1032 	tlen = end - begin;
1033 	if (tlen % brlen != 0)
1034 		return REG_NOMATCH;
1035 	numreps = tlen / brlen;
1036 	if (numreps < min || (numreps > max && max != DUPINF))
1037 		return REG_NOMATCH;
1038 
1039 	/* okay, compare the actual string contents */
1040 	p = begin;
1041 	while (numreps-- > 0)
1042 	{
1043 		if ((*v->g->compare) (brstring, p, brlen) != 0)
1044 			return REG_NOMATCH;
1045 		p += brlen;
1046 	}
1047 
1048 	MDEBUG(("%d: backref matched\n", t->id));
1049 	return REG_OKAY;
1050 }
1051 
1052 /*
1053  * caltdissect - dissect match for alternation node
1054  */
1055 static int						/* regexec return code */
caltdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1056 caltdissect(struct vars *v,
1057 			struct subre *t,
1058 			chr *begin,			/* beginning of relevant substring */
1059 			chr *end)			/* end of same */
1060 {
1061 	struct dfa *d;
1062 	int			er;
1063 
1064 	assert(t->op == '|');
1065 
1066 	t = t->child;
1067 	/* there should be at least 2 alternatives */
1068 	assert(t != NULL && t->sibling != NULL);
1069 
1070 	while (t != NULL)
1071 	{
1072 		assert(t->cnfa.nstates > 0);
1073 
1074 		MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1075 
1076 		d = getsubdfa(v, t);
1077 		NOERR();
1078 		if (longest(v, d, begin, end, (int *) NULL) == end)
1079 		{
1080 			MDEBUG(("%d: caltdissect matched\n", t->id));
1081 			er = cdissect(v, t, begin, end);
1082 			if (er != REG_NOMATCH)
1083 				return er;
1084 		}
1085 		NOERR();
1086 
1087 		t = t->sibling;
1088 	}
1089 
1090 	return REG_NOMATCH;
1091 }
1092 
1093 /*
1094  * citerdissect - dissect match for iteration node
1095  */
1096 static int						/* regexec return code */
citerdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1097 citerdissect(struct vars *v,
1098 			 struct subre *t,
1099 			 chr *begin,		/* beginning of relevant substring */
1100 			 chr *end)			/* end of same */
1101 {
1102 	struct dfa *d;
1103 	chr		  **endpts;
1104 	chr		   *limit;
1105 	int			min_matches;
1106 	size_t		max_matches;
1107 	int			nverified;
1108 	int			k;
1109 	int			i;
1110 	int			er;
1111 
1112 	assert(t->op == '*');
1113 	assert(t->child != NULL && t->child->cnfa.nstates > 0);
1114 	assert(!(t->child->flags & SHORTER));
1115 	assert(begin <= end);
1116 
1117 	MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1118 
1119 	/*
1120 	 * For the moment, assume the minimum number of matches is 1.  If zero
1121 	 * matches are allowed, and the target string is empty, we are allowed to
1122 	 * match regardless of the contents of the iter node --- but we would
1123 	 * prefer to match once, so that capturing parens get set.  (An example of
1124 	 * the concern here is a pattern like "()*\1", which historically this
1125 	 * code has allowed to succeed.)  Therefore, we deal with the zero-matches
1126 	 * case at the bottom, after failing to find any other way to match.
1127 	 */
1128 	min_matches = t->min;
1129 	if (min_matches <= 0)
1130 		min_matches = 1;
1131 
1132 	/*
1133 	 * We need workspace to track the endpoints of each sub-match.  Normally
1134 	 * we consider only nonzero-length sub-matches, so there can be at most
1135 	 * end-begin of them.  However, if min is larger than that, we will also
1136 	 * consider zero-length sub-matches in order to find enough matches.
1137 	 *
1138 	 * For convenience, endpts[0] contains the "begin" pointer and we store
1139 	 * sub-match endpoints in endpts[1..max_matches].
1140 	 */
1141 	max_matches = end - begin;
1142 	if (max_matches > t->max && t->max != DUPINF)
1143 		max_matches = t->max;
1144 	if (max_matches < min_matches)
1145 		max_matches = min_matches;
1146 	endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1147 	if (endpts == NULL)
1148 		return REG_ESPACE;
1149 	endpts[0] = begin;
1150 
1151 	d = getsubdfa(v, t->child);
1152 	if (ISERR())
1153 	{
1154 		FREE(endpts);
1155 		return v->err;
1156 	}
1157 
1158 	/*
1159 	 * Our strategy is to first find a set of sub-match endpoints that are
1160 	 * valid according to the child node's DFA, and then recursively dissect
1161 	 * each sub-match to confirm validity.  If any validity check fails,
1162 	 * backtrack that sub-match and try again.  And, when we next try for a
1163 	 * validity check, we need not recheck any successfully verified
1164 	 * sub-matches that we didn't move the endpoints of.  nverified remembers
1165 	 * how many sub-matches are currently known okay.
1166 	 */
1167 
1168 	/* initialize to consider first sub-match */
1169 	nverified = 0;
1170 	k = 1;
1171 	limit = end;
1172 
1173 	/* iterate until satisfaction or failure */
1174 	while (k > 0)
1175 	{
1176 		/* try to find an endpoint for the k'th sub-match */
1177 		endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1178 		if (ISERR())
1179 		{
1180 			FREE(endpts);
1181 			return v->err;
1182 		}
1183 		if (endpts[k] == NULL)
1184 		{
1185 			/* no match possible, so see if we can shorten previous one */
1186 			k--;
1187 			goto backtrack;
1188 		}
1189 		MDEBUG(("%d: working endpoint %d: %ld\n",
1190 				t->id, k, LOFF(endpts[k])));
1191 
1192 		/* k'th sub-match can no longer be considered verified */
1193 		if (nverified >= k)
1194 			nverified = k - 1;
1195 
1196 		if (endpts[k] != end)
1197 		{
1198 			/* haven't reached end yet, try another iteration if allowed */
1199 			if (k >= max_matches)
1200 			{
1201 				/* must try to shorten some previous match */
1202 				k--;
1203 				goto backtrack;
1204 			}
1205 
1206 			/* reject zero-length match unless necessary to achieve min */
1207 			if (endpts[k] == endpts[k - 1] &&
1208 				(k >= min_matches || min_matches - k < end - endpts[k]))
1209 				goto backtrack;
1210 
1211 			k++;
1212 			limit = end;
1213 			continue;
1214 		}
1215 
1216 		/*
1217 		 * We've identified a way to divide the string into k sub-matches that
1218 		 * works so far as the child DFA can tell.  If k is an allowed number
1219 		 * of matches, start the slow part: recurse to verify each sub-match.
1220 		 * We always have k <= max_matches, needn't check that.
1221 		 */
1222 		if (k < min_matches)
1223 			goto backtrack;
1224 
1225 		MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1226 
1227 		for (i = nverified + 1; i <= k; i++)
1228 		{
1229 			/* zap any match data from a non-last iteration */
1230 			zaptreesubs(v, t->child);
1231 			er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1232 			if (er == REG_OKAY)
1233 			{
1234 				nverified = i;
1235 				continue;
1236 			}
1237 			if (er == REG_NOMATCH)
1238 				break;
1239 			/* oops, something failed */
1240 			FREE(endpts);
1241 			return er;
1242 		}
1243 
1244 		if (i > k)
1245 		{
1246 			/* satisfaction */
1247 			MDEBUG(("%d: successful\n", t->id));
1248 			FREE(endpts);
1249 			return REG_OKAY;
1250 		}
1251 
1252 		/* i'th match failed to verify, so backtrack it */
1253 		k = i;
1254 
1255 backtrack:
1256 
1257 		/*
1258 		 * Must consider shorter versions of the k'th sub-match.  However,
1259 		 * we'll only ask for a zero-length match if necessary.
1260 		 */
1261 		while (k > 0)
1262 		{
1263 			chr		   *prev_end = endpts[k - 1];
1264 
1265 			if (endpts[k] > prev_end)
1266 			{
1267 				limit = endpts[k] - 1;
1268 				if (limit > prev_end ||
1269 					(k < min_matches && min_matches - k >= end - prev_end))
1270 				{
1271 					/* break out of backtrack loop, continue the outer one */
1272 					break;
1273 				}
1274 			}
1275 			/* can't shorten k'th sub-match any more, consider previous one */
1276 			k--;
1277 		}
1278 	}
1279 
1280 	/* all possibilities exhausted */
1281 	FREE(endpts);
1282 
1283 	/*
1284 	 * Now consider the possibility that we can match to a zero-length string
1285 	 * by using zero repetitions.
1286 	 */
1287 	if (t->min == 0 && begin == end)
1288 	{
1289 		MDEBUG(("%d: allowing zero matches\n", t->id));
1290 		return REG_OKAY;
1291 	}
1292 
1293 	MDEBUG(("%d: failed\n", t->id));
1294 	return REG_NOMATCH;
1295 }
1296 
1297 /*
1298  * creviterdissect - dissect match for iteration node, shortest-first
1299  */
1300 static int						/* regexec return code */
creviterdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1301 creviterdissect(struct vars *v,
1302 				struct subre *t,
1303 				chr *begin,		/* beginning of relevant substring */
1304 				chr *end)		/* end of same */
1305 {
1306 	struct dfa *d;
1307 	chr		  **endpts;
1308 	chr		   *limit;
1309 	int			min_matches;
1310 	size_t		max_matches;
1311 	int			nverified;
1312 	int			k;
1313 	int			i;
1314 	int			er;
1315 
1316 	assert(t->op == '*');
1317 	assert(t->child != NULL && t->child->cnfa.nstates > 0);
1318 	assert(t->child->flags & SHORTER);
1319 	assert(begin <= end);
1320 
1321 	MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1322 
1323 	/*
1324 	 * If zero matches are allowed, and target string is empty, just declare
1325 	 * victory.  OTOH, if target string isn't empty, zero matches can't work
1326 	 * so we pretend the min is 1.
1327 	 */
1328 	min_matches = t->min;
1329 	if (min_matches <= 0)
1330 	{
1331 		if (begin == end)
1332 		{
1333 			MDEBUG(("%d: allowing zero matches\n", t->id));
1334 			return REG_OKAY;
1335 		}
1336 		min_matches = 1;
1337 	}
1338 
1339 	/*
1340 	 * We need workspace to track the endpoints of each sub-match.  Normally
1341 	 * we consider only nonzero-length sub-matches, so there can be at most
1342 	 * end-begin of them.  However, if min is larger than that, we will also
1343 	 * consider zero-length sub-matches in order to find enough matches.
1344 	 *
1345 	 * For convenience, endpts[0] contains the "begin" pointer and we store
1346 	 * sub-match endpoints in endpts[1..max_matches].
1347 	 */
1348 	max_matches = end - begin;
1349 	if (max_matches > t->max && t->max != DUPINF)
1350 		max_matches = t->max;
1351 	if (max_matches < min_matches)
1352 		max_matches = min_matches;
1353 	endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1354 	if (endpts == NULL)
1355 		return REG_ESPACE;
1356 	endpts[0] = begin;
1357 
1358 	d = getsubdfa(v, t->child);
1359 	if (ISERR())
1360 	{
1361 		FREE(endpts);
1362 		return v->err;
1363 	}
1364 
1365 	/*
1366 	 * Our strategy is to first find a set of sub-match endpoints that are
1367 	 * valid according to the child node's DFA, and then recursively dissect
1368 	 * each sub-match to confirm validity.  If any validity check fails,
1369 	 * backtrack that sub-match and try again.  And, when we next try for a
1370 	 * validity check, we need not recheck any successfully verified
1371 	 * sub-matches that we didn't move the endpoints of.  nverified remembers
1372 	 * how many sub-matches are currently known okay.
1373 	 */
1374 
1375 	/* initialize to consider first sub-match */
1376 	nverified = 0;
1377 	k = 1;
1378 	limit = begin;
1379 
1380 	/* iterate until satisfaction or failure */
1381 	while (k > 0)
1382 	{
1383 		/* disallow zero-length match unless necessary to achieve min */
1384 		if (limit == endpts[k - 1] &&
1385 			limit != end &&
1386 			(k >= min_matches || min_matches - k < end - limit))
1387 			limit++;
1388 
1389 		/* if this is the last allowed sub-match, it must reach to the end */
1390 		if (k >= max_matches)
1391 			limit = end;
1392 
1393 		/* try to find an endpoint for the k'th sub-match */
1394 		endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1395 							 (chr **) NULL, (int *) NULL);
1396 		if (ISERR())
1397 		{
1398 			FREE(endpts);
1399 			return v->err;
1400 		}
1401 		if (endpts[k] == NULL)
1402 		{
1403 			/* no match possible, so see if we can lengthen previous one */
1404 			k--;
1405 			goto backtrack;
1406 		}
1407 		MDEBUG(("%d: working endpoint %d: %ld\n",
1408 				t->id, k, LOFF(endpts[k])));
1409 
1410 		/* k'th sub-match can no longer be considered verified */
1411 		if (nverified >= k)
1412 			nverified = k - 1;
1413 
1414 		if (endpts[k] != end)
1415 		{
1416 			/* haven't reached end yet, try another iteration if allowed */
1417 			if (k >= max_matches)
1418 			{
1419 				/* must try to lengthen some previous match */
1420 				k--;
1421 				goto backtrack;
1422 			}
1423 
1424 			k++;
1425 			limit = endpts[k - 1];
1426 			continue;
1427 		}
1428 
1429 		/*
1430 		 * We've identified a way to divide the string into k sub-matches that
1431 		 * works so far as the child DFA can tell.  If k is an allowed number
1432 		 * of matches, start the slow part: recurse to verify each sub-match.
1433 		 * We always have k <= max_matches, needn't check that.
1434 		 */
1435 		if (k < min_matches)
1436 			goto backtrack;
1437 
1438 		MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1439 
1440 		for (i = nverified + 1; i <= k; i++)
1441 		{
1442 			/* zap any match data from a non-last iteration */
1443 			zaptreesubs(v, t->child);
1444 			er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1445 			if (er == REG_OKAY)
1446 			{
1447 				nverified = i;
1448 				continue;
1449 			}
1450 			if (er == REG_NOMATCH)
1451 				break;
1452 			/* oops, something failed */
1453 			FREE(endpts);
1454 			return er;
1455 		}
1456 
1457 		if (i > k)
1458 		{
1459 			/* satisfaction */
1460 			MDEBUG(("%d: successful\n", t->id));
1461 			FREE(endpts);
1462 			return REG_OKAY;
1463 		}
1464 
1465 		/* i'th match failed to verify, so backtrack it */
1466 		k = i;
1467 
1468 backtrack:
1469 
1470 		/*
1471 		 * Must consider longer versions of the k'th sub-match.
1472 		 */
1473 		while (k > 0)
1474 		{
1475 			if (endpts[k] < end)
1476 			{
1477 				limit = endpts[k] + 1;
1478 				/* break out of backtrack loop, continue the outer one */
1479 				break;
1480 			}
1481 			/* can't lengthen k'th sub-match any more, consider previous one */
1482 			k--;
1483 		}
1484 	}
1485 
1486 	/* all possibilities exhausted */
1487 	MDEBUG(("%d: failed\n", t->id));
1488 	FREE(endpts);
1489 	return REG_NOMATCH;
1490 }
1491 
1492 
1493 
1494 #include "rege_dfa.c"
1495