1 /*
2 * re_*exec and friends - match REs
3 *
4 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
5 *
6 * Development of this software was funded, in part, by Cray Research Inc.,
7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8 * Corporation, none of whom are responsible for the results. The author
9 * thanks all of them.
10 *
11 * Redistribution and use in source and binary forms -- with or without
12 * modification -- are permitted for any purpose, provided that
13 * redistributions in source form retain this entire copyright notice and
14 * indicate the origin and nature of any modifications.
15 *
16 * I'd appreciate being given credit for this package in the documentation
17 * of software which uses it, but that is not a requirement.
18 *
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * src/backend/regex/regexec.c
31 *
32 */
33
34 #include "regex/regguts.h"
35
36
37
38 /* lazy-DFA representation */
39 struct arcp
40 { /* "pointer" to an outarc */
41 struct sset *ss;
42 color co;
43 };
44
45 struct sset
46 { /* state set */
47 unsigned *states; /* pointer to bitvector */
48 unsigned hash; /* hash of bitvector */
49 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52 int flags;
53 #define STARTER 01 /* the initial state set */
54 #define POSTSTATE 02 /* includes the goal state */
55 #define LOCKED 04 /* locked in cache */
56 #define NOPROGRESS 010 /* zero-progress state set */
57 struct arcp ins; /* chain of inarcs pointing here */
58 chr *lastseen; /* last entered on arrival here */
59 struct sset **outs; /* outarc vector indexed by color */
60 struct arcp *inchain; /* chain-pointer vector for outarcs */
61 };
62
63 struct dfa
64 {
65 int nssets; /* size of cache */
66 int nssused; /* how many entries occupied yet */
67 int nstates; /* number of states */
68 int ncolors; /* length of outarc and inchain vectors */
69 int wordsper; /* length of state-set bitvectors */
70 struct sset *ssets; /* state-set cache */
71 unsigned *statesarea; /* bitvector storage */
72 unsigned *work; /* pointer to work area within statesarea */
73 struct sset **outsarea; /* outarc-vector storage */
74 struct arcp *incarea; /* inchain storage */
75 struct cnfa *cnfa;
76 struct colormap *cm;
77 chr *lastpost; /* location of last cache-flushed success */
78 chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
79 struct sset *search; /* replacement-search-pointer memory */
80 int backno; /* if DFA for a backref, subno it refers to */
81 short backmin; /* min repetitions for backref */
82 short backmax; /* max repetitions for backref */
83 bool ismalloced; /* should this struct dfa be freed? */
84 bool arraysmalloced; /* should its subsidiary arrays be freed? */
85 };
86
87 #define WORK 1 /* number of work bitvectors needed */
88
89 /* setup for non-malloc allocation for small cases */
90 #define FEWSTATES 20 /* must be less than UBITS */
91 #define FEWCOLORS 15
92 struct smalldfa
93 {
94 struct dfa dfa; /* must be first */
95 struct sset ssets[FEWSTATES * 2];
96 unsigned statesarea[FEWSTATES * 2 + WORK];
97 struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
98 struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
99 };
100
101 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
102
103
104
105 /* internal variables, bundled for easy passing around */
106 struct vars
107 {
108 regex_t *re;
109 struct guts *g;
110 int eflags; /* copies of arguments */
111 size_t nmatch;
112 regmatch_t *pmatch;
113 rm_detail_t *details;
114 chr *start; /* start of string */
115 chr *search_start; /* search start of string */
116 chr *stop; /* just past end of string */
117 int err; /* error code if any (0 none) */
118 struct dfa **subdfas; /* per-tree-subre DFAs */
119 struct dfa **ladfas; /* per-lacon-subre DFAs */
120 struct sset **lblastcss; /* per-lacon-subre lookbehind restart data */
121 chr **lblastcp; /* per-lacon-subre lookbehind restart data */
122 struct smalldfa dfa1;
123 struct smalldfa dfa2;
124 };
125
126 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
127 #define ISERR() VISERR(v)
128 #define VERR(vv,e) ((vv)->err = ((vv)->err ? (vv)->err : (e)))
129 #define ERR(e) VERR(v, e) /* record an error */
130 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
131 #define OFF(p) ((p) - v->start)
132 #define LOFF(p) ((long)OFF(p))
133
134
135
136 /*
137 * forward declarations
138 */
139 /* === regexec.c === */
140 static struct dfa *getsubdfa(struct vars *, struct subre *);
141 static struct dfa *getladfa(struct vars *, int);
142 static int find(struct vars *, struct cnfa *, struct colormap *);
143 static int cfind(struct vars *, struct cnfa *, struct colormap *);
144 static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
145 static void zapallsubs(regmatch_t *, size_t);
146 static void zaptreesubs(struct vars *, struct subre *);
147 static void subset(struct vars *, struct subre *, chr *, chr *);
148 static int cdissect(struct vars *, struct subre *, chr *, chr *);
149 static int ccondissect(struct vars *, struct subre *, chr *, chr *);
150 static int crevcondissect(struct vars *, struct subre *, chr *, chr *);
151 static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
152 static int caltdissect(struct vars *, struct subre *, chr *, chr *);
153 static int citerdissect(struct vars *, struct subre *, chr *, chr *);
154 static int creviterdissect(struct vars *, struct subre *, chr *, chr *);
155
156 /* === rege_dfa.c === */
157 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
158 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
159 static int matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
160 static chr *dfa_backref(struct vars *, struct dfa *, chr *, chr *, chr *, bool);
161 static chr *lastcold(struct vars *, struct dfa *);
162 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
163 static void freedfa(struct dfa *);
164 static unsigned hash(unsigned *, int);
165 static struct sset *initialize(struct vars *, struct dfa *, chr *);
166 static struct sset *miss(struct vars *, struct dfa *, struct sset *, color, chr *, chr *);
167 static int lacon(struct vars *, struct cnfa *, chr *, color);
168 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
169 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
170
171
172 /*
173 * pg_regexec - match regular expression
174 */
175 int
pg_regexec(regex_t * re,const chr * string,size_t len,size_t search_start,rm_detail_t * details,size_t nmatch,regmatch_t pmatch[],int flags)176 pg_regexec(regex_t *re,
177 const chr *string,
178 size_t len,
179 size_t search_start,
180 rm_detail_t *details,
181 size_t nmatch,
182 regmatch_t pmatch[],
183 int flags)
184 {
185 struct vars var;
186 register struct vars *v = &var;
187 int st;
188 size_t n;
189 size_t i;
190 int backref;
191
192 #define LOCALMAT 20
193 regmatch_t mat[LOCALMAT];
194
195 #define LOCALDFAS 40
196 struct dfa *subdfas[LOCALDFAS];
197
198 /* sanity checks */
199 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200 return REG_INVARG;
201 if (re->re_csize != sizeof(chr))
202 return REG_MIXED;
203 if (search_start > len)
204 return REG_NOMATCH;
205
206 /* Initialize locale-dependent support */
207 pg_set_regex_collation(re->re_collation);
208
209 /* setup */
210 v->re = re;
211 v->g = (struct guts *) re->re_guts;
212 if ((v->g->cflags & REG_EXPECT) && details == NULL)
213 return REG_INVARG;
214 if (v->g->info & REG_UIMPOSSIBLE)
215 return REG_NOMATCH;
216 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
217 v->eflags = flags;
218 if (v->g->cflags & REG_NOSUB)
219 nmatch = 0; /* override client */
220 v->nmatch = nmatch;
221 if (backref)
222 {
223 /* need work area */
224 if (v->g->nsub + 1 <= LOCALMAT)
225 v->pmatch = mat;
226 else
227 v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
228 sizeof(regmatch_t));
229 if (v->pmatch == NULL)
230 return REG_ESPACE;
231 v->nmatch = v->g->nsub + 1;
232 }
233 else
234 v->pmatch = pmatch;
235 if (v->nmatch > 0)
236 zapallsubs(v->pmatch, v->nmatch);
237 v->details = details;
238 v->start = (chr *) string;
239 v->search_start = (chr *) string + search_start;
240 v->stop = (chr *) string + len;
241 v->err = 0;
242 v->subdfas = NULL;
243 v->ladfas = NULL;
244 v->lblastcss = NULL;
245 v->lblastcp = NULL;
246 /* below this point, "goto cleanup" will behave sanely */
247
248 assert(v->g->ntree >= 0);
249 n = (size_t) v->g->ntree;
250 if (n <= LOCALDFAS)
251 v->subdfas = subdfas;
252 else
253 {
254 v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
255 if (v->subdfas == NULL)
256 {
257 st = REG_ESPACE;
258 goto cleanup;
259 }
260 }
261 for (i = 0; i < n; i++)
262 v->subdfas[i] = NULL;
263
264 assert(v->g->nlacons >= 0);
265 n = (size_t) v->g->nlacons;
266 if (n > 0)
267 {
268 v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
269 if (v->ladfas == NULL)
270 {
271 st = REG_ESPACE;
272 goto cleanup;
273 }
274 for (i = 0; i < n; i++)
275 v->ladfas[i] = NULL;
276 v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
277 v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
278 if (v->lblastcss == NULL || v->lblastcp == NULL)
279 {
280 st = REG_ESPACE;
281 goto cleanup;
282 }
283 for (i = 0; i < n; i++)
284 {
285 v->lblastcss[i] = NULL;
286 v->lblastcp[i] = NULL;
287 }
288 }
289
290 /* do it */
291 assert(v->g->tree != NULL);
292 if (backref)
293 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
294 else
295 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
296
297 /* copy (portion of) match vector over if necessary */
298 if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
299 {
300 zapallsubs(pmatch, nmatch);
301 n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
302 memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
303 }
304
305 /* clean up */
306 cleanup:
307 if (v->pmatch != pmatch && v->pmatch != mat)
308 FREE(v->pmatch);
309 if (v->subdfas != NULL)
310 {
311 n = (size_t) v->g->ntree;
312 for (i = 0; i < n; i++)
313 {
314 if (v->subdfas[i] != NULL)
315 freedfa(v->subdfas[i]);
316 }
317 if (v->subdfas != subdfas)
318 FREE(v->subdfas);
319 }
320 if (v->ladfas != NULL)
321 {
322 n = (size_t) v->g->nlacons;
323 for (i = 0; i < n; i++)
324 {
325 if (v->ladfas[i] != NULL)
326 freedfa(v->ladfas[i]);
327 }
328 FREE(v->ladfas);
329 }
330 if (v->lblastcss != NULL)
331 FREE(v->lblastcss);
332 if (v->lblastcp != NULL)
333 FREE(v->lblastcp);
334
335 #ifdef REG_DEBUG
336 if (v->eflags & (REG_FTRACE | REG_MTRACE))
337 fflush(stdout);
338 #endif
339
340 return st;
341 }
342
343 /*
344 * getsubdfa - create or re-fetch the DFA for a tree subre node
345 *
346 * We only need to create the DFA once per overall regex execution.
347 * The DFA will be freed by the cleanup step in pg_regexec().
348 */
349 static struct dfa *
getsubdfa(struct vars * v,struct subre * t)350 getsubdfa(struct vars *v,
351 struct subre *t)
352 {
353 struct dfa *d = v->subdfas[t->id];
354
355 if (d == NULL)
356 {
357 d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
358 if (d == NULL)
359 return NULL;
360 /* set up additional info if this is a backref node */
361 if (t->op == 'b')
362 {
363 d->backno = t->backno;
364 d->backmin = t->min;
365 d->backmax = t->max;
366 }
367 v->subdfas[t->id] = d;
368 }
369 return d;
370 }
371
372 /*
373 * getladfa - create or re-fetch the DFA for a LACON subre node
374 *
375 * Same as above, but for LACONs.
376 */
377 static struct dfa *
getladfa(struct vars * v,int n)378 getladfa(struct vars *v,
379 int n)
380 {
381 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
382
383 if (v->ladfas[n] == NULL)
384 {
385 struct subre *sub = &v->g->lacons[n];
386
387 v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
388 /* a LACON can't contain a backref, so nothing else to do */
389 }
390 return v->ladfas[n];
391 }
392
393 /*
394 * find - find a match for the main NFA (no-complications case)
395 */
396 static int
find(struct vars * v,struct cnfa * cnfa,struct colormap * cm)397 find(struct vars *v,
398 struct cnfa *cnfa,
399 struct colormap *cm)
400 {
401 struct dfa *s;
402 struct dfa *d;
403 chr *begin;
404 chr *end = NULL;
405 chr *cold;
406 chr *open; /* open and close of range of possible starts */
407 chr *close;
408 int hitend;
409 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
410
411 /* first, a shot with the search RE */
412 s = newdfa(v, &v->g->search, cm, &v->dfa1);
413 if (s == NULL)
414 return v->err;
415 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
416 cold = NULL;
417 close = shortest(v, s, v->search_start, v->search_start, v->stop,
418 &cold, (int *) NULL);
419 freedfa(s);
420 NOERR();
421 if (v->g->cflags & REG_EXPECT)
422 {
423 assert(v->details != NULL);
424 if (cold != NULL)
425 v->details->rm_extend.rm_so = OFF(cold);
426 else
427 v->details->rm_extend.rm_so = OFF(v->stop);
428 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
429 }
430 if (close == NULL) /* not found */
431 return REG_NOMATCH;
432 if (v->nmatch == 0) /* found, don't need exact location */
433 return REG_OKAY;
434
435 /* find starting point and match */
436 assert(cold != NULL);
437 open = cold;
438 cold = NULL;
439 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
440 d = newdfa(v, cnfa, cm, &v->dfa1);
441 if (d == NULL)
442 return v->err;
443 for (begin = open; begin <= close; begin++)
444 {
445 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
446 if (shorter)
447 end = shortest(v, d, begin, begin, v->stop,
448 (chr **) NULL, &hitend);
449 else
450 end = longest(v, d, begin, v->stop, &hitend);
451 if (ISERR())
452 {
453 freedfa(d);
454 return v->err;
455 }
456 if (hitend && cold == NULL)
457 cold = begin;
458 if (end != NULL)
459 break; /* NOTE BREAK OUT */
460 }
461 assert(end != NULL); /* search RE succeeded so loop should */
462 freedfa(d);
463
464 /* and pin down details */
465 assert(v->nmatch > 0);
466 v->pmatch[0].rm_so = OFF(begin);
467 v->pmatch[0].rm_eo = OFF(end);
468 if (v->g->cflags & REG_EXPECT)
469 {
470 if (cold != NULL)
471 v->details->rm_extend.rm_so = OFF(cold);
472 else
473 v->details->rm_extend.rm_so = OFF(v->stop);
474 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
475 }
476 if (v->nmatch == 1) /* no need for submatches */
477 return REG_OKAY;
478
479 /* find submatches */
480 return cdissect(v, v->g->tree, begin, end);
481 }
482
483 /*
484 * cfind - find a match for the main NFA (with complications)
485 */
486 static int
cfind(struct vars * v,struct cnfa * cnfa,struct colormap * cm)487 cfind(struct vars *v,
488 struct cnfa *cnfa,
489 struct colormap *cm)
490 {
491 struct dfa *s;
492 struct dfa *d;
493 chr *cold;
494 int ret;
495
496 s = newdfa(v, &v->g->search, cm, &v->dfa1);
497 if (s == NULL)
498 return v->err;
499 d = newdfa(v, cnfa, cm, &v->dfa2);
500 if (d == NULL)
501 {
502 freedfa(s);
503 return v->err;
504 }
505
506 ret = cfindloop(v, cnfa, cm, d, s, &cold);
507
508 freedfa(d);
509 freedfa(s);
510 NOERR();
511 if (v->g->cflags & REG_EXPECT)
512 {
513 assert(v->details != NULL);
514 if (cold != NULL)
515 v->details->rm_extend.rm_so = OFF(cold);
516 else
517 v->details->rm_extend.rm_so = OFF(v->stop);
518 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
519 }
520 return ret;
521 }
522
523 /*
524 * cfindloop - the heart of cfind
525 */
526 static int
cfindloop(struct vars * v,struct cnfa * cnfa,struct colormap * cm,struct dfa * d,struct dfa * s,chr ** coldp)527 cfindloop(struct vars *v,
528 struct cnfa *cnfa,
529 struct colormap *cm,
530 struct dfa *d,
531 struct dfa *s,
532 chr **coldp) /* where to put coldstart pointer */
533 {
534 chr *begin;
535 chr *end;
536 chr *cold;
537 chr *open; /* open and close of range of possible starts */
538 chr *close;
539 chr *estart;
540 chr *estop;
541 int er;
542 int shorter = v->g->tree->flags & SHORTER;
543 int hitend;
544
545 assert(d != NULL && s != NULL);
546 cold = NULL;
547 close = v->search_start;
548 do
549 {
550 /* Search with the search RE for match range at/beyond "close" */
551 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
552 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
553 if (ISERR())
554 {
555 *coldp = cold;
556 return v->err;
557 }
558 if (close == NULL)
559 break; /* no more possible match anywhere */
560 assert(cold != NULL);
561 open = cold;
562 cold = NULL;
563 /* Search for matches starting between "open" and "close" inclusive */
564 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
565 for (begin = open; begin <= close; begin++)
566 {
567 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
568 estart = begin;
569 estop = v->stop;
570 for (;;)
571 {
572 /* Here we use the top node's detailed RE */
573 if (shorter)
574 end = shortest(v, d, begin, estart,
575 estop, (chr **) NULL, &hitend);
576 else
577 end = longest(v, d, begin, estop,
578 &hitend);
579 if (ISERR())
580 {
581 *coldp = cold;
582 return v->err;
583 }
584 if (hitend && cold == NULL)
585 cold = begin;
586 if (end == NULL)
587 break; /* no match with this begin point, try next */
588 MDEBUG(("tentative end %ld\n", LOFF(end)));
589 /* Dissect the potential match to see if it really matches */
590 er = cdissect(v, v->g->tree, begin, end);
591 if (er == REG_OKAY)
592 {
593 if (v->nmatch > 0)
594 {
595 v->pmatch[0].rm_so = OFF(begin);
596 v->pmatch[0].rm_eo = OFF(end);
597 }
598 *coldp = cold;
599 return REG_OKAY;
600 }
601 if (er != REG_NOMATCH)
602 {
603 ERR(er);
604 *coldp = cold;
605 return er;
606 }
607 /* Try next longer/shorter match with same begin point */
608 if (shorter)
609 {
610 if (end == estop)
611 break; /* no more, so try next begin point */
612 estart = end + 1;
613 }
614 else
615 {
616 if (end == begin)
617 break; /* no more, so try next begin point */
618 estop = end - 1;
619 }
620 } /* end loop over endpoint positions */
621 } /* end loop over beginning positions */
622
623 /*
624 * If we get here, there is no possible match starting at or before
625 * "close", so consider matches beyond that. We'll do a fresh search
626 * with the search RE to find a new promising match range.
627 */
628 close++;
629 } while (close < v->stop);
630
631 *coldp = cold;
632 return REG_NOMATCH;
633 }
634
635 /*
636 * zapallsubs - initialize all subexpression matches to "no match"
637 *
638 * Note that p[0], the overall-match location, is not touched.
639 */
640 static void
zapallsubs(regmatch_t * p,size_t n)641 zapallsubs(regmatch_t *p,
642 size_t n)
643 {
644 size_t i;
645
646 for (i = n - 1; i > 0; i--)
647 {
648 p[i].rm_so = -1;
649 p[i].rm_eo = -1;
650 }
651 }
652
653 /*
654 * zaptreesubs - initialize subexpressions within subtree to "no match"
655 */
656 static void
zaptreesubs(struct vars * v,struct subre * t)657 zaptreesubs(struct vars *v,
658 struct subre *t)
659 {
660 int n = t->capno;
661 struct subre *t2;
662
663 if (n > 0)
664 {
665 if ((size_t) n < v->nmatch)
666 {
667 v->pmatch[n].rm_so = -1;
668 v->pmatch[n].rm_eo = -1;
669 }
670 }
671
672 for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
673 zaptreesubs(v, t2);
674 }
675
676 /*
677 * subset - set subexpression match data for a successful subre
678 */
679 static void
subset(struct vars * v,struct subre * sub,chr * begin,chr * end)680 subset(struct vars *v,
681 struct subre *sub,
682 chr *begin,
683 chr *end)
684 {
685 int n = sub->capno;
686
687 assert(n > 0);
688 if ((size_t) n >= v->nmatch)
689 return;
690
691 MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
692 v->pmatch[n].rm_so = OFF(begin);
693 v->pmatch[n].rm_eo = OFF(end);
694 }
695
696 /*
697 * cdissect - check backrefs and determine subexpression matches
698 *
699 * cdissect recursively processes a subre tree to check matching of backrefs
700 * and/or identify submatch boundaries for capture nodes. The proposed match
701 * runs from "begin" to "end" (not including "end"), and we are basically
702 * "dissecting" it to see where the submatches are.
703 *
704 * Before calling any level of cdissect, the caller must have run the node's
705 * DFA and found that the proposed substring satisfies the DFA. (We make
706 * the caller do that because in concatenation and iteration nodes, it's
707 * much faster to check all the substrings against the child DFAs before we
708 * recurse.)
709 *
710 * A side-effect of a successful match is to save match locations for
711 * capturing subexpressions in v->pmatch[]. This is a little bit tricky,
712 * so we make the following rules:
713 * 1. Before initial entry to cdissect, all match data must have been
714 * cleared (this is seen to by zapallsubs).
715 * 2. Before any recursive entry to cdissect, the match data for that
716 * subexpression tree must be guaranteed clear (see zaptreesubs).
717 * 3. When returning REG_OKAY, each level of cdissect will have saved
718 * any relevant match locations.
719 * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
720 * that its subexpression match locations are again clear.
721 * 5. No guarantees are made for error cases (i.e., other result codes).
722 * 6. When a level of cdissect abandons a successful sub-match, it will
723 * clear that subtree's match locations with zaptreesubs before trying
724 * any new DFA match or cdissect call for that subtree or any subtree
725 * to its right (that is, any subtree that could have a backref into the
726 * abandoned match).
727 * This may seem overly complicated, but it's difficult to simplify it
728 * because of the provision that match locations must be reset before
729 * any fresh DFA match (a rule that is needed to make dfa_backref safe).
730 * That means it won't work to just reset relevant match locations at the
731 * start of each cdissect level.
732 */
733 static int /* regexec return code */
cdissect(struct vars * v,struct subre * t,chr * begin,chr * end)734 cdissect(struct vars *v,
735 struct subre *t,
736 chr *begin, /* beginning of relevant substring */
737 chr *end) /* end of same */
738 {
739 int er;
740
741 assert(t != NULL);
742 MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
743
744 /* handy place to check for operation cancel */
745 if (CANCEL_REQUESTED(v->re))
746 return REG_CANCEL;
747 /* ... and stack overrun */
748 if (STACK_TOO_DEEP(v->re))
749 return REG_ETOOBIG;
750
751 switch (t->op)
752 {
753 case '=': /* terminal node */
754 assert(t->child == NULL);
755 er = REG_OKAY; /* no action, parent did the work */
756 break;
757 case 'b': /* back reference */
758 assert(t->child == NULL);
759 er = cbrdissect(v, t, begin, end);
760 break;
761 case '.': /* concatenation */
762 assert(t->child != NULL);
763 if (t->child->flags & SHORTER) /* reverse scan */
764 er = crevcondissect(v, t, begin, end);
765 else
766 er = ccondissect(v, t, begin, end);
767 break;
768 case '|': /* alternation */
769 assert(t->child != NULL);
770 er = caltdissect(v, t, begin, end);
771 break;
772 case '*': /* iteration */
773 assert(t->child != NULL);
774 if (t->child->flags & SHORTER) /* reverse scan */
775 er = creviterdissect(v, t, begin, end);
776 else
777 er = citerdissect(v, t, begin, end);
778 break;
779 case '(': /* no-op capture node */
780 assert(t->child != NULL);
781 assert(t->capno > 0);
782 er = cdissect(v, t->child, begin, end);
783 break;
784 default:
785 er = REG_ASSERT;
786 break;
787 }
788
789 /*
790 * We should never have a match failure unless backrefs lurk below;
791 * otherwise, either caller failed to check the DFA, or there's some
792 * inconsistency between the DFA and the node's innards.
793 */
794 assert(er != REG_NOMATCH || (t->flags & BACKR));
795
796 /*
797 * If this node is marked as capturing, save successful match's location.
798 */
799 if (t->capno > 0 && er == REG_OKAY)
800 subset(v, t, begin, end);
801
802 return er;
803 }
804
805 /*
806 * ccondissect - dissect match for concatenation node
807 */
808 static int /* regexec return code */
ccondissect(struct vars * v,struct subre * t,chr * begin,chr * end)809 ccondissect(struct vars *v,
810 struct subre *t,
811 chr *begin, /* beginning of relevant substring */
812 chr *end) /* end of same */
813 {
814 struct subre *left = t->child;
815 struct subre *right = left->sibling;
816 struct dfa *d;
817 struct dfa *d2;
818 chr *mid;
819 int er;
820
821 assert(t->op == '.');
822 assert(left != NULL && left->cnfa.nstates > 0);
823 assert(right != NULL && right->cnfa.nstates > 0);
824 assert(right->sibling == NULL);
825 assert(!(left->flags & SHORTER));
826
827 d = getsubdfa(v, left);
828 NOERR();
829 d2 = getsubdfa(v, right);
830 NOERR();
831 MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
832
833 /* pick a tentative midpoint */
834 mid = longest(v, d, begin, end, (int *) NULL);
835 NOERR();
836 if (mid == NULL)
837 return REG_NOMATCH;
838 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
839
840 /* iterate until satisfaction or failure */
841 for (;;)
842 {
843 /* try this midpoint on for size */
844 if (longest(v, d2, mid, end, (int *) NULL) == end)
845 {
846 er = cdissect(v, left, begin, mid);
847 if (er == REG_OKAY)
848 {
849 er = cdissect(v, right, mid, end);
850 if (er == REG_OKAY)
851 {
852 /* satisfaction */
853 MDEBUG(("%d: successful\n", t->id));
854 return REG_OKAY;
855 }
856 /* Reset left's matches (right should have done so itself) */
857 zaptreesubs(v, left);
858 }
859 if (er != REG_NOMATCH)
860 return er;
861 }
862 NOERR();
863
864 /* that midpoint didn't work, find a new one */
865 if (mid == begin)
866 {
867 /* all possibilities exhausted */
868 MDEBUG(("%d: no midpoint\n", t->id));
869 return REG_NOMATCH;
870 }
871 mid = longest(v, d, begin, mid - 1, (int *) NULL);
872 NOERR();
873 if (mid == NULL)
874 {
875 /* failed to find a new one */
876 MDEBUG(("%d: failed midpoint\n", t->id));
877 return REG_NOMATCH;
878 }
879 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
880 }
881
882 /* can't get here */
883 return REG_ASSERT;
884 }
885
886 /*
887 * crevcondissect - dissect match for concatenation node, shortest-first
888 */
889 static int /* regexec return code */
crevcondissect(struct vars * v,struct subre * t,chr * begin,chr * end)890 crevcondissect(struct vars *v,
891 struct subre *t,
892 chr *begin, /* beginning of relevant substring */
893 chr *end) /* end of same */
894 {
895 struct subre *left = t->child;
896 struct subre *right = left->sibling;
897 struct dfa *d;
898 struct dfa *d2;
899 chr *mid;
900 int er;
901
902 assert(t->op == '.');
903 assert(left != NULL && left->cnfa.nstates > 0);
904 assert(right != NULL && right->cnfa.nstates > 0);
905 assert(right->sibling == NULL);
906 assert(left->flags & SHORTER);
907
908 d = getsubdfa(v, left);
909 NOERR();
910 d2 = getsubdfa(v, right);
911 NOERR();
912 MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
913
914 /* pick a tentative midpoint */
915 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
916 NOERR();
917 if (mid == NULL)
918 return REG_NOMATCH;
919 MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
920
921 /* iterate until satisfaction or failure */
922 for (;;)
923 {
924 /* try this midpoint on for size */
925 if (longest(v, d2, mid, end, (int *) NULL) == end)
926 {
927 er = cdissect(v, left, begin, mid);
928 if (er == REG_OKAY)
929 {
930 er = cdissect(v, right, mid, end);
931 if (er == REG_OKAY)
932 {
933 /* satisfaction */
934 MDEBUG(("%d: successful\n", t->id));
935 return REG_OKAY;
936 }
937 /* Reset left's matches (right should have done so itself) */
938 zaptreesubs(v, left);
939 }
940 if (er != REG_NOMATCH)
941 return er;
942 }
943 NOERR();
944
945 /* that midpoint didn't work, find a new one */
946 if (mid == end)
947 {
948 /* all possibilities exhausted */
949 MDEBUG(("%d: no midpoint\n", t->id));
950 return REG_NOMATCH;
951 }
952 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
953 NOERR();
954 if (mid == NULL)
955 {
956 /* failed to find a new one */
957 MDEBUG(("%d: failed midpoint\n", t->id));
958 return REG_NOMATCH;
959 }
960 MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
961 }
962
963 /* can't get here */
964 return REG_ASSERT;
965 }
966
967 /*
968 * cbrdissect - dissect match for backref node
969 *
970 * The backref match might already have been verified by dfa_backref(),
971 * but we don't know that for sure so must check it here.
972 */
973 static int /* regexec return code */
cbrdissect(struct vars * v,struct subre * t,chr * begin,chr * end)974 cbrdissect(struct vars *v,
975 struct subre *t,
976 chr *begin, /* beginning of relevant substring */
977 chr *end) /* end of same */
978 {
979 int n = t->backno;
980 size_t numreps;
981 size_t tlen;
982 size_t brlen;
983 chr *brstring;
984 chr *p;
985 int min = t->min;
986 int max = t->max;
987
988 assert(t != NULL);
989 assert(t->op == 'b');
990 assert(n >= 0);
991 assert((size_t) n < v->nmatch);
992
993 MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
994 LOFF(begin), LOFF(end)));
995
996 /* get the backreferenced string */
997 if (v->pmatch[n].rm_so == -1)
998 return REG_NOMATCH;
999 brstring = v->start + v->pmatch[n].rm_so;
1000 brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
1001
1002 /* special cases for zero-length strings */
1003 if (brlen == 0)
1004 {
1005 /*
1006 * matches only if target is zero length, but any number of
1007 * repetitions can be considered to be present
1008 */
1009 if (begin == end && min <= max)
1010 {
1011 MDEBUG(("%d: backref matched trivially\n", t->id));
1012 return REG_OKAY;
1013 }
1014 return REG_NOMATCH;
1015 }
1016 if (begin == end)
1017 {
1018 /* matches only if zero repetitions are okay */
1019 if (min == 0)
1020 {
1021 MDEBUG(("%d: backref matched trivially\n", t->id));
1022 return REG_OKAY;
1023 }
1024 return REG_NOMATCH;
1025 }
1026
1027 /*
1028 * check target length to see if it could possibly be an allowed number of
1029 * repetitions of brstring
1030 */
1031 assert(end > begin);
1032 tlen = end - begin;
1033 if (tlen % brlen != 0)
1034 return REG_NOMATCH;
1035 numreps = tlen / brlen;
1036 if (numreps < min || (numreps > max && max != DUPINF))
1037 return REG_NOMATCH;
1038
1039 /* okay, compare the actual string contents */
1040 p = begin;
1041 while (numreps-- > 0)
1042 {
1043 if ((*v->g->compare) (brstring, p, brlen) != 0)
1044 return REG_NOMATCH;
1045 p += brlen;
1046 }
1047
1048 MDEBUG(("%d: backref matched\n", t->id));
1049 return REG_OKAY;
1050 }
1051
1052 /*
1053 * caltdissect - dissect match for alternation node
1054 */
1055 static int /* regexec return code */
caltdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1056 caltdissect(struct vars *v,
1057 struct subre *t,
1058 chr *begin, /* beginning of relevant substring */
1059 chr *end) /* end of same */
1060 {
1061 struct dfa *d;
1062 int er;
1063
1064 assert(t->op == '|');
1065
1066 t = t->child;
1067 /* there should be at least 2 alternatives */
1068 assert(t != NULL && t->sibling != NULL);
1069
1070 while (t != NULL)
1071 {
1072 assert(t->cnfa.nstates > 0);
1073
1074 MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1075
1076 d = getsubdfa(v, t);
1077 NOERR();
1078 if (longest(v, d, begin, end, (int *) NULL) == end)
1079 {
1080 MDEBUG(("%d: caltdissect matched\n", t->id));
1081 er = cdissect(v, t, begin, end);
1082 if (er != REG_NOMATCH)
1083 return er;
1084 }
1085 NOERR();
1086
1087 t = t->sibling;
1088 }
1089
1090 return REG_NOMATCH;
1091 }
1092
1093 /*
1094 * citerdissect - dissect match for iteration node
1095 */
1096 static int /* regexec return code */
citerdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1097 citerdissect(struct vars *v,
1098 struct subre *t,
1099 chr *begin, /* beginning of relevant substring */
1100 chr *end) /* end of same */
1101 {
1102 struct dfa *d;
1103 chr **endpts;
1104 chr *limit;
1105 int min_matches;
1106 size_t max_matches;
1107 int nverified;
1108 int k;
1109 int i;
1110 int er;
1111
1112 assert(t->op == '*');
1113 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1114 assert(!(t->child->flags & SHORTER));
1115 assert(begin <= end);
1116
1117 MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1118
1119 /*
1120 * For the moment, assume the minimum number of matches is 1. If zero
1121 * matches are allowed, and the target string is empty, we are allowed to
1122 * match regardless of the contents of the iter node --- but we would
1123 * prefer to match once, so that capturing parens get set. (An example of
1124 * the concern here is a pattern like "()*\1", which historically this
1125 * code has allowed to succeed.) Therefore, we deal with the zero-matches
1126 * case at the bottom, after failing to find any other way to match.
1127 */
1128 min_matches = t->min;
1129 if (min_matches <= 0)
1130 min_matches = 1;
1131
1132 /*
1133 * We need workspace to track the endpoints of each sub-match. Normally
1134 * we consider only nonzero-length sub-matches, so there can be at most
1135 * end-begin of them. However, if min is larger than that, we will also
1136 * consider zero-length sub-matches in order to find enough matches.
1137 *
1138 * For convenience, endpts[0] contains the "begin" pointer and we store
1139 * sub-match endpoints in endpts[1..max_matches].
1140 */
1141 max_matches = end - begin;
1142 if (max_matches > t->max && t->max != DUPINF)
1143 max_matches = t->max;
1144 if (max_matches < min_matches)
1145 max_matches = min_matches;
1146 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1147 if (endpts == NULL)
1148 return REG_ESPACE;
1149 endpts[0] = begin;
1150
1151 d = getsubdfa(v, t->child);
1152 if (ISERR())
1153 {
1154 FREE(endpts);
1155 return v->err;
1156 }
1157
1158 /*
1159 * Our strategy is to first find a set of sub-match endpoints that are
1160 * valid according to the child node's DFA, and then recursively dissect
1161 * each sub-match to confirm validity. If any validity check fails,
1162 * backtrack that sub-match and try again. And, when we next try for a
1163 * validity check, we need not recheck any successfully verified
1164 * sub-matches that we didn't move the endpoints of. nverified remembers
1165 * how many sub-matches are currently known okay.
1166 */
1167
1168 /* initialize to consider first sub-match */
1169 nverified = 0;
1170 k = 1;
1171 limit = end;
1172
1173 /* iterate until satisfaction or failure */
1174 while (k > 0)
1175 {
1176 /* try to find an endpoint for the k'th sub-match */
1177 endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
1178 if (ISERR())
1179 {
1180 FREE(endpts);
1181 return v->err;
1182 }
1183 if (endpts[k] == NULL)
1184 {
1185 /* no match possible, so see if we can shorten previous one */
1186 k--;
1187 goto backtrack;
1188 }
1189 MDEBUG(("%d: working endpoint %d: %ld\n",
1190 t->id, k, LOFF(endpts[k])));
1191
1192 /* k'th sub-match can no longer be considered verified */
1193 if (nverified >= k)
1194 nverified = k - 1;
1195
1196 if (endpts[k] != end)
1197 {
1198 /* haven't reached end yet, try another iteration if allowed */
1199 if (k >= max_matches)
1200 {
1201 /* must try to shorten some previous match */
1202 k--;
1203 goto backtrack;
1204 }
1205
1206 /* reject zero-length match unless necessary to achieve min */
1207 if (endpts[k] == endpts[k - 1] &&
1208 (k >= min_matches || min_matches - k < end - endpts[k]))
1209 goto backtrack;
1210
1211 k++;
1212 limit = end;
1213 continue;
1214 }
1215
1216 /*
1217 * We've identified a way to divide the string into k sub-matches that
1218 * works so far as the child DFA can tell. If k is an allowed number
1219 * of matches, start the slow part: recurse to verify each sub-match.
1220 * We always have k <= max_matches, needn't check that.
1221 */
1222 if (k < min_matches)
1223 goto backtrack;
1224
1225 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1226
1227 for (i = nverified + 1; i <= k; i++)
1228 {
1229 /* zap any match data from a non-last iteration */
1230 zaptreesubs(v, t->child);
1231 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1232 if (er == REG_OKAY)
1233 {
1234 nverified = i;
1235 continue;
1236 }
1237 if (er == REG_NOMATCH)
1238 break;
1239 /* oops, something failed */
1240 FREE(endpts);
1241 return er;
1242 }
1243
1244 if (i > k)
1245 {
1246 /* satisfaction */
1247 MDEBUG(("%d: successful\n", t->id));
1248 FREE(endpts);
1249 return REG_OKAY;
1250 }
1251
1252 /* i'th match failed to verify, so backtrack it */
1253 k = i;
1254
1255 backtrack:
1256
1257 /*
1258 * Must consider shorter versions of the k'th sub-match. However,
1259 * we'll only ask for a zero-length match if necessary.
1260 */
1261 while (k > 0)
1262 {
1263 chr *prev_end = endpts[k - 1];
1264
1265 if (endpts[k] > prev_end)
1266 {
1267 limit = endpts[k] - 1;
1268 if (limit > prev_end ||
1269 (k < min_matches && min_matches - k >= end - prev_end))
1270 {
1271 /* break out of backtrack loop, continue the outer one */
1272 break;
1273 }
1274 }
1275 /* can't shorten k'th sub-match any more, consider previous one */
1276 k--;
1277 }
1278 }
1279
1280 /* all possibilities exhausted */
1281 FREE(endpts);
1282
1283 /*
1284 * Now consider the possibility that we can match to a zero-length string
1285 * by using zero repetitions.
1286 */
1287 if (t->min == 0 && begin == end)
1288 {
1289 MDEBUG(("%d: allowing zero matches\n", t->id));
1290 return REG_OKAY;
1291 }
1292
1293 MDEBUG(("%d: failed\n", t->id));
1294 return REG_NOMATCH;
1295 }
1296
1297 /*
1298 * creviterdissect - dissect match for iteration node, shortest-first
1299 */
1300 static int /* regexec return code */
creviterdissect(struct vars * v,struct subre * t,chr * begin,chr * end)1301 creviterdissect(struct vars *v,
1302 struct subre *t,
1303 chr *begin, /* beginning of relevant substring */
1304 chr *end) /* end of same */
1305 {
1306 struct dfa *d;
1307 chr **endpts;
1308 chr *limit;
1309 int min_matches;
1310 size_t max_matches;
1311 int nverified;
1312 int k;
1313 int i;
1314 int er;
1315
1316 assert(t->op == '*');
1317 assert(t->child != NULL && t->child->cnfa.nstates > 0);
1318 assert(t->child->flags & SHORTER);
1319 assert(begin <= end);
1320
1321 MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
1322
1323 /*
1324 * If zero matches are allowed, and target string is empty, just declare
1325 * victory. OTOH, if target string isn't empty, zero matches can't work
1326 * so we pretend the min is 1.
1327 */
1328 min_matches = t->min;
1329 if (min_matches <= 0)
1330 {
1331 if (begin == end)
1332 {
1333 MDEBUG(("%d: allowing zero matches\n", t->id));
1334 return REG_OKAY;
1335 }
1336 min_matches = 1;
1337 }
1338
1339 /*
1340 * We need workspace to track the endpoints of each sub-match. Normally
1341 * we consider only nonzero-length sub-matches, so there can be at most
1342 * end-begin of them. However, if min is larger than that, we will also
1343 * consider zero-length sub-matches in order to find enough matches.
1344 *
1345 * For convenience, endpts[0] contains the "begin" pointer and we store
1346 * sub-match endpoints in endpts[1..max_matches].
1347 */
1348 max_matches = end - begin;
1349 if (max_matches > t->max && t->max != DUPINF)
1350 max_matches = t->max;
1351 if (max_matches < min_matches)
1352 max_matches = min_matches;
1353 endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
1354 if (endpts == NULL)
1355 return REG_ESPACE;
1356 endpts[0] = begin;
1357
1358 d = getsubdfa(v, t->child);
1359 if (ISERR())
1360 {
1361 FREE(endpts);
1362 return v->err;
1363 }
1364
1365 /*
1366 * Our strategy is to first find a set of sub-match endpoints that are
1367 * valid according to the child node's DFA, and then recursively dissect
1368 * each sub-match to confirm validity. If any validity check fails,
1369 * backtrack that sub-match and try again. And, when we next try for a
1370 * validity check, we need not recheck any successfully verified
1371 * sub-matches that we didn't move the endpoints of. nverified remembers
1372 * how many sub-matches are currently known okay.
1373 */
1374
1375 /* initialize to consider first sub-match */
1376 nverified = 0;
1377 k = 1;
1378 limit = begin;
1379
1380 /* iterate until satisfaction or failure */
1381 while (k > 0)
1382 {
1383 /* disallow zero-length match unless necessary to achieve min */
1384 if (limit == endpts[k - 1] &&
1385 limit != end &&
1386 (k >= min_matches || min_matches - k < end - limit))
1387 limit++;
1388
1389 /* if this is the last allowed sub-match, it must reach to the end */
1390 if (k >= max_matches)
1391 limit = end;
1392
1393 /* try to find an endpoint for the k'th sub-match */
1394 endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
1395 (chr **) NULL, (int *) NULL);
1396 if (ISERR())
1397 {
1398 FREE(endpts);
1399 return v->err;
1400 }
1401 if (endpts[k] == NULL)
1402 {
1403 /* no match possible, so see if we can lengthen previous one */
1404 k--;
1405 goto backtrack;
1406 }
1407 MDEBUG(("%d: working endpoint %d: %ld\n",
1408 t->id, k, LOFF(endpts[k])));
1409
1410 /* k'th sub-match can no longer be considered verified */
1411 if (nverified >= k)
1412 nverified = k - 1;
1413
1414 if (endpts[k] != end)
1415 {
1416 /* haven't reached end yet, try another iteration if allowed */
1417 if (k >= max_matches)
1418 {
1419 /* must try to lengthen some previous match */
1420 k--;
1421 goto backtrack;
1422 }
1423
1424 k++;
1425 limit = endpts[k - 1];
1426 continue;
1427 }
1428
1429 /*
1430 * We've identified a way to divide the string into k sub-matches that
1431 * works so far as the child DFA can tell. If k is an allowed number
1432 * of matches, start the slow part: recurse to verify each sub-match.
1433 * We always have k <= max_matches, needn't check that.
1434 */
1435 if (k < min_matches)
1436 goto backtrack;
1437
1438 MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
1439
1440 for (i = nverified + 1; i <= k; i++)
1441 {
1442 /* zap any match data from a non-last iteration */
1443 zaptreesubs(v, t->child);
1444 er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
1445 if (er == REG_OKAY)
1446 {
1447 nverified = i;
1448 continue;
1449 }
1450 if (er == REG_NOMATCH)
1451 break;
1452 /* oops, something failed */
1453 FREE(endpts);
1454 return er;
1455 }
1456
1457 if (i > k)
1458 {
1459 /* satisfaction */
1460 MDEBUG(("%d: successful\n", t->id));
1461 FREE(endpts);
1462 return REG_OKAY;
1463 }
1464
1465 /* i'th match failed to verify, so backtrack it */
1466 k = i;
1467
1468 backtrack:
1469
1470 /*
1471 * Must consider longer versions of the k'th sub-match.
1472 */
1473 while (k > 0)
1474 {
1475 if (endpts[k] < end)
1476 {
1477 limit = endpts[k] + 1;
1478 /* break out of backtrack loop, continue the outer one */
1479 break;
1480 }
1481 /* can't lengthen k'th sub-match any more, consider previous one */
1482 k--;
1483 }
1484 }
1485
1486 /* all possibilities exhausted */
1487 MDEBUG(("%d: failed\n", t->id));
1488 FREE(endpts);
1489 return REG_NOMATCH;
1490 }
1491
1492
1493
1494 #include "rege_dfa.c"
1495