xref: /dragonfly/contrib/grep/lib/regexec.c (revision 3bafb5c1)
1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4    Copyright (C) 2002-2011 Free Software Foundation, Inc.
5    This file is part of the GNU C Library.
6    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License along
19    with this program; if not, write to the Free Software Foundation,
20    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 
22 #include "verify.h"
23 #include "intprops.h"
24 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
25 				     Idx n) internal_function;
26 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
27 static void match_ctx_free (re_match_context_t *cache) internal_function;
28 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
29 					  Idx str_idx, Idx from, Idx to)
30      internal_function;
31 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
32      internal_function;
33 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
34 					   Idx str_idx) internal_function;
35 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
36 						    Idx node, Idx str_idx)
37      internal_function;
38 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
39 			   re_dfastate_t **limited_sts, Idx last_node,
40 			   Idx last_str_idx)
41      internal_function;
42 static reg_errcode_t re_search_internal (const regex_t *preg,
43 					 const char *string, Idx length,
44 					 Idx start, Idx last_start, Idx stop,
45 					 size_t nmatch, regmatch_t pmatch[],
46 					 int eflags) internal_function;
47 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
48 				  const char *string1, Idx length1,
49 				  const char *string2, Idx length2,
50 				  Idx start, regoff_t range,
51 				  struct re_registers *regs,
52 				  Idx stop, bool ret_len) internal_function;
53 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
54 				const char *string, Idx length, Idx start,
55 				regoff_t range, Idx stop,
56 				struct re_registers *regs,
57 				bool ret_len) internal_function;
58 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
59 				  Idx nregs, int regs_allocated)
60      internal_function;
61 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
62      internal_function;
63 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
64 			   Idx *p_match_first) internal_function;
65 static Idx check_halt_state_context (const re_match_context_t *mctx,
66 				     const re_dfastate_t *state, Idx idx)
67      internal_function;
68 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
69 			 regmatch_t *prev_idx_match, Idx cur_node,
70 			 Idx cur_idx, Idx nmatch) internal_function;
71 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
72 				      Idx str_idx, Idx dest_node, Idx nregs,
73 				      regmatch_t *regs,
74 				      re_node_set *eps_via_nodes)
75      internal_function;
76 static reg_errcode_t set_regs (const regex_t *preg,
77 			       const re_match_context_t *mctx,
78 			       size_t nmatch, regmatch_t *pmatch,
79 			       bool fl_backtrack) internal_function;
80 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
81      internal_function;
82 
83 #ifdef RE_ENABLE_I18N
84 static int sift_states_iter_mb (const re_match_context_t *mctx,
85 				re_sift_context_t *sctx,
86 				Idx node_idx, Idx str_idx, Idx max_str_idx)
87      internal_function;
88 #endif /* RE_ENABLE_I18N */
89 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
90 					   re_sift_context_t *sctx)
91      internal_function;
92 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
93 					  re_sift_context_t *sctx, Idx str_idx,
94 					  re_node_set *cur_dest)
95      internal_function;
96 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
97 					      re_sift_context_t *sctx,
98 					      Idx str_idx,
99 					      re_node_set *dest_nodes)
100      internal_function;
101 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
102 					    re_node_set *dest_nodes,
103 					    const re_node_set *candidates)
104      internal_function;
105 static bool check_dst_limits (const re_match_context_t *mctx,
106 			      const re_node_set *limits,
107 			      Idx dst_node, Idx dst_idx, Idx src_node,
108 			      Idx src_idx) internal_function;
109 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
110 					int boundaries, Idx subexp_idx,
111 					Idx from_node, Idx bkref_idx)
112      internal_function;
113 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
114 				      Idx limit, Idx subexp_idx,
115 				      Idx node, Idx str_idx,
116 				      Idx bkref_idx) internal_function;
117 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
118 					  re_node_set *dest_nodes,
119 					  const re_node_set *candidates,
120 					  re_node_set *limits,
121 					  struct re_backref_cache_entry *bkref_ents,
122 					  Idx str_idx) internal_function;
123 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
124 					re_sift_context_t *sctx,
125 					Idx str_idx, const re_node_set *candidates)
126      internal_function;
127 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
128 					re_dfastate_t **dst,
129 					re_dfastate_t **src, Idx num)
130      internal_function;
131 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
132 					 re_match_context_t *mctx) internal_function;
133 static re_dfastate_t *transit_state (reg_errcode_t *err,
134 				     re_match_context_t *mctx,
135 				     re_dfastate_t *state) internal_function;
136 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
137 					    re_match_context_t *mctx,
138 					    re_dfastate_t *next_state)
139      internal_function;
140 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
141 						re_node_set *cur_nodes,
142 						Idx str_idx) internal_function;
143 #if 0
144 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
145 					re_match_context_t *mctx,
146 					re_dfastate_t *pstate)
147      internal_function;
148 #endif
149 #ifdef RE_ENABLE_I18N
150 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
151 				       re_dfastate_t *pstate)
152      internal_function;
153 #endif /* RE_ENABLE_I18N */
154 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
155 					  const re_node_set *nodes)
156      internal_function;
157 static reg_errcode_t get_subexp (re_match_context_t *mctx,
158 				 Idx bkref_node, Idx bkref_str_idx)
159      internal_function;
160 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
161 				     const re_sub_match_top_t *sub_top,
162 				     re_sub_match_last_t *sub_last,
163 				     Idx bkref_node, Idx bkref_str)
164      internal_function;
165 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
166 			     Idx subexp_idx, int type) internal_function;
167 static reg_errcode_t check_arrival (re_match_context_t *mctx,
168 				    state_array_t *path, Idx top_node,
169 				    Idx top_str, Idx last_node, Idx last_str,
170 				    int type) internal_function;
171 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
172 						   Idx str_idx,
173 						   re_node_set *cur_nodes,
174 						   re_node_set *next_nodes)
175      internal_function;
176 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
177 					       re_node_set *cur_nodes,
178 					       Idx ex_subexp, int type)
179      internal_function;
180 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
181 						   re_node_set *dst_nodes,
182 						   Idx target, Idx ex_subexp,
183 						   int type) internal_function;
184 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
185 					 re_node_set *cur_nodes, Idx cur_str,
186 					 Idx subexp_num, int type)
187      internal_function;
188 static bool build_trtable (const re_dfa_t *dfa,
189 			   re_dfastate_t *state) internal_function;
190 #ifdef RE_ENABLE_I18N
191 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
192 				    const re_string_t *input, Idx idx)
193      internal_function;
194 # ifdef _LIBC
195 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
196 						   size_t name_len)
197      internal_function;
198 # endif /* _LIBC */
199 #endif /* RE_ENABLE_I18N */
200 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
201 				       const re_dfastate_t *state,
202 				       re_node_set *states_node,
203 				       bitset_t *states_ch) internal_function;
204 static bool check_node_accept (const re_match_context_t *mctx,
205 			       const re_token_t *node, Idx idx)
206      internal_function;
207 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
208      internal_function;
209 
210 /* Entry point for POSIX code.  */
211 
212 /* regexec searches for a given pattern, specified by PREG, in the
213    string STRING.
214 
215    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
216    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
217    least NMATCH elements, and we set them to the offsets of the
218    corresponding matched substrings.
219 
220    EFLAGS specifies `execution flags' which affect matching: if
221    REG_NOTBOL is set, then ^ does not match at the beginning of the
222    string; if REG_NOTEOL is set, then $ does not match at the end.
223 
224    We return 0 if we find a match and REG_NOMATCH if not.  */
225 
226 int
227 regexec (preg, string, nmatch, pmatch, eflags)
228     const regex_t *_Restrict_ preg;
229     const char *_Restrict_ string;
230     size_t nmatch;
231     regmatch_t pmatch[_Restrict_arr_];
232     int eflags;
233 {
234   reg_errcode_t err;
235   Idx start, length;
236 #ifdef _LIBC
237   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
238 #endif
239 
240   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
241     return REG_BADPAT;
242 
243   if (eflags & REG_STARTEND)
244     {
245       start = pmatch[0].rm_so;
246       length = pmatch[0].rm_eo;
247     }
248   else
249     {
250       start = 0;
251       length = strlen (string);
252     }
253 
254   __libc_lock_lock (dfa->lock);
255   if (preg->no_sub)
256     err = re_search_internal (preg, string, length, start, length,
257 			      length, 0, NULL, eflags);
258   else
259     err = re_search_internal (preg, string, length, start, length,
260 			      length, nmatch, pmatch, eflags);
261   __libc_lock_unlock (dfa->lock);
262   return err != REG_NOERROR;
263 }
264 
265 #ifdef _LIBC
266 # include <shlib-compat.h>
267 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
268 
269 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
270 __typeof__ (__regexec) __compat_regexec;
271 
272 int
273 attribute_compat_text_section
274 __compat_regexec (const regex_t *_Restrict_ preg,
275 		  const char *_Restrict_ string, size_t nmatch,
276 		  regmatch_t pmatch[], int eflags)
277 {
278   return regexec (preg, string, nmatch, pmatch,
279 		  eflags & (REG_NOTBOL | REG_NOTEOL));
280 }
281 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
282 # endif
283 #endif
284 
285 /* Entry points for GNU code.  */
286 
287 /* re_match, re_search, re_match_2, re_search_2
288 
289    The former two functions operate on STRING with length LENGTH,
290    while the later two operate on concatenation of STRING1 and STRING2
291    with lengths LENGTH1 and LENGTH2, respectively.
292 
293    re_match() matches the compiled pattern in BUFP against the string,
294    starting at index START.
295 
296    re_search() first tries matching at index START, then it tries to match
297    starting from index START + 1, and so on.  The last start position tried
298    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
299    way as re_match().)
300 
301    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
302    the first STOP characters of the concatenation of the strings should be
303    concerned.
304 
305    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
306    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
307    computed relative to the concatenation, not relative to the individual
308    strings.)
309 
310    On success, re_match* functions return the length of the match, re_search*
311    return the position of the start of the match.  Return value -1 means no
312    match was found and -2 indicates an internal error.  */
313 
314 regoff_t
315 re_match (bufp, string, length, start, regs)
316     struct re_pattern_buffer *bufp;
317     const char *string;
318     Idx length, start;
319     struct re_registers *regs;
320 {
321   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
322 }
323 #ifdef _LIBC
324 weak_alias (__re_match, re_match)
325 #endif
326 
327 regoff_t
328 re_search (bufp, string, length, start, range, regs)
329     struct re_pattern_buffer *bufp;
330     const char *string;
331     Idx length, start;
332     regoff_t range;
333     struct re_registers *regs;
334 {
335   return re_search_stub (bufp, string, length, start, range, length, regs,
336 			 false);
337 }
338 #ifdef _LIBC
339 weak_alias (__re_search, re_search)
340 #endif
341 
342 regoff_t
343 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
344     struct re_pattern_buffer *bufp;
345     const char *string1, *string2;
346     Idx length1, length2, start, stop;
347     struct re_registers *regs;
348 {
349   return re_search_2_stub (bufp, string1, length1, string2, length2,
350 			   start, 0, regs, stop, true);
351 }
352 #ifdef _LIBC
353 weak_alias (__re_match_2, re_match_2)
354 #endif
355 
356 regoff_t
357 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
358     struct re_pattern_buffer *bufp;
359     const char *string1, *string2;
360     Idx length1, length2, start, stop;
361     regoff_t range;
362     struct re_registers *regs;
363 {
364   return re_search_2_stub (bufp, string1, length1, string2, length2,
365 			   start, range, regs, stop, false);
366 }
367 #ifdef _LIBC
368 weak_alias (__re_search_2, re_search_2)
369 #endif
370 
371 static regoff_t
372 internal_function
373 re_search_2_stub (struct re_pattern_buffer *bufp,
374 		  const char *string1, Idx length1,
375 		  const char *string2, Idx length2,
376 		  Idx start, regoff_t range, struct re_registers *regs,
377 		  Idx stop, bool ret_len)
378 {
379   const char *str;
380   regoff_t rval;
381   Idx len = length1 + length2;
382   char *s = NULL;
383 
384   verify (! TYPE_SIGNED (Idx));
385   if (BE (len < length1, 0))
386      return -2;
387   /* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
388      return -2; */
389 
390   /* Concatenate the strings.  */
391   if (length2 > 0)
392     if (length1 > 0)
393       {
394 	s = re_malloc (char, len);
395 
396 	if (BE (s == NULL, 0))
397 	  return -2;
398 #ifdef _LIBC
399 	memcpy (__mempcpy (s, string1, length1), string2, length2);
400 #else
401 	memcpy (s, string1, length1);
402 	memcpy (s + length1, string2, length2);
403 #endif
404 	str = s;
405       }
406     else
407       str = string2;
408   else
409     str = string1;
410 
411   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
412 			 ret_len);
413   re_free (s);
414   return rval;
415 }
416 
417 /* The parameters have the same meaning as those of re_search.
418    Additional parameters:
419    If RET_LEN is true the length of the match is returned (re_match style);
420    otherwise the position of the match is returned.  */
421 
422 static regoff_t
423 internal_function
424 re_search_stub (struct re_pattern_buffer *bufp,
425 		const char *string, Idx length,
426 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
427 		bool ret_len)
428 {
429   reg_errcode_t result;
430   regmatch_t *pmatch;
431   Idx nregs;
432   regoff_t rval;
433   int eflags = 0;
434 #ifdef _LIBC
435   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
436 #endif
437   Idx last_start = start + range;
438 
439   /* Check for out-of-range.  */
440   verify (! TYPE_SIGNED (Idx));
441   /* if (BE (start < 0, 0))
442      return -1; */
443   if (BE (start > length, 0))
444      return -1;
445   if (BE (length < last_start || (0 <= range && last_start < start), 0))
446     last_start = length;
447   else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0))
448     last_start = 0;
449 
450   __libc_lock_lock (dfa->lock);
451 
452   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
453   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
454 
455   /* Compile fastmap if we haven't yet.  */
456   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
457     re_compile_fastmap (bufp);
458 
459   if (BE (bufp->no_sub, 0))
460     regs = NULL;
461 
462   /* We need at least 1 register.  */
463   if (regs == NULL)
464     nregs = 1;
465   else if (BE (bufp->regs_allocated == REGS_FIXED
466 	       && regs->num_regs <= bufp->re_nsub, 0))
467     {
468       nregs = regs->num_regs;
469       if (BE (nregs < 1, 0))
470 	{
471 	  /* Nothing can be copied to regs.  */
472 	  regs = NULL;
473 	  nregs = 1;
474 	}
475     }
476   else
477     nregs = bufp->re_nsub + 1;
478   pmatch = re_malloc (regmatch_t, nregs);
479   if (BE (pmatch == NULL, 0))
480     {
481       rval = -2;
482       goto out;
483     }
484 
485   result = re_search_internal (bufp, string, length, start, last_start, stop,
486 			       nregs, pmatch, eflags);
487 
488   rval = 0;
489 
490   /* I hope we needn't fill ther regs with -1's when no match was found.  */
491   if (result != REG_NOERROR)
492     rval = -1;
493   else if (regs != NULL)
494     {
495       /* If caller wants register contents data back, copy them.  */
496       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
497 					   bufp->regs_allocated);
498       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
499 	rval = -2;
500     }
501 
502   if (BE (rval == 0, 1))
503     {
504       if (ret_len)
505 	{
506 	  assert (pmatch[0].rm_so == start);
507 	  rval = pmatch[0].rm_eo - start;
508 	}
509       else
510 	rval = pmatch[0].rm_so;
511     }
512   re_free (pmatch);
513  out:
514   __libc_lock_unlock (dfa->lock);
515   return rval;
516 }
517 
518 static unsigned int
519 internal_function
520 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
521 	      int regs_allocated)
522 {
523   int rval = REGS_REALLOCATE;
524   Idx i;
525   Idx need_regs = nregs + 1;
526   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
527      uses.  */
528 
529   /* Have the register data arrays been allocated?  */
530   if (regs_allocated == REGS_UNALLOCATED)
531     { /* No.  So allocate them with malloc.  */
532       regs->start = re_malloc (regoff_t, need_regs);
533       if (BE (regs->start == NULL, 0))
534 	return REGS_UNALLOCATED;
535       regs->end = re_malloc (regoff_t, need_regs);
536       if (BE (regs->end == NULL, 0))
537 	{
538 	  re_free (regs->start);
539 	  return REGS_UNALLOCATED;
540 	}
541       regs->num_regs = need_regs;
542     }
543   else if (regs_allocated == REGS_REALLOCATE)
544     { /* Yes.  If we need more elements than were already
545 	 allocated, reallocate them.  If we need fewer, just
546 	 leave it alone.  */
547       if (BE (need_regs > regs->num_regs, 0))
548 	{
549 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
550 	  regoff_t *new_end;
551 	  if (BE (new_start == NULL, 0))
552 	    return REGS_UNALLOCATED;
553 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
554 	  if (BE (new_end == NULL, 0))
555 	    {
556 	      re_free (new_start);
557 	      return REGS_UNALLOCATED;
558 	    }
559 	  regs->start = new_start;
560 	  regs->end = new_end;
561 	  regs->num_regs = need_regs;
562 	}
563     }
564   else
565     {
566       assert (regs_allocated == REGS_FIXED);
567       /* This function may not be called with REGS_FIXED and nregs too big.  */
568       assert (regs->num_regs >= nregs);
569       rval = REGS_FIXED;
570     }
571 
572   /* Copy the regs.  */
573   for (i = 0; i < nregs; ++i)
574     {
575       regs->start[i] = pmatch[i].rm_so;
576       regs->end[i] = pmatch[i].rm_eo;
577     }
578   for ( ; i < regs->num_regs; ++i)
579     regs->start[i] = regs->end[i] = -1;
580 
581   return rval;
582 }
583 
584 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
585    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
586    this memory for recording register information.  STARTS and ENDS
587    must be allocated using the malloc library routine, and must each
588    be at least NUM_REGS * sizeof (regoff_t) bytes long.
589 
590    If NUM_REGS == 0, then subsequent matches should allocate their own
591    register data.
592 
593    Unless this function is called, the first search or match using
594    PATTERN_BUFFER will allocate its own register data, without
595    freeing the old data.  */
596 
597 void
598 re_set_registers (bufp, regs, num_regs, starts, ends)
599     struct re_pattern_buffer *bufp;
600     struct re_registers *regs;
601     __re_size_t num_regs;
602     regoff_t *starts, *ends;
603 {
604   if (num_regs)
605     {
606       bufp->regs_allocated = REGS_REALLOCATE;
607       regs->num_regs = num_regs;
608       regs->start = starts;
609       regs->end = ends;
610     }
611   else
612     {
613       bufp->regs_allocated = REGS_UNALLOCATED;
614       regs->num_regs = 0;
615       regs->start = regs->end = NULL;
616     }
617 }
618 #ifdef _LIBC
619 weak_alias (__re_set_registers, re_set_registers)
620 #endif
621 
622 /* Entry points compatible with 4.2 BSD regex library.  We don't define
623    them unless specifically requested.  */
624 
625 #if defined _REGEX_RE_COMP || defined _LIBC
626 int
627 # ifdef _LIBC
628 weak_function
629 # endif
630 re_exec (s)
631      const char *s;
632 {
633   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
634 }
635 #endif /* _REGEX_RE_COMP */
636 
637 /* Internal entry point.  */
638 
639 /* Searches for a compiled pattern PREG in the string STRING, whose
640    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
641    meaning as with regexec.  LAST_START is START + RANGE, where
642    START and RANGE have the same meaning as with re_search.
643    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
644    otherwise return the error code.
645    Note: We assume front end functions already check ranges.
646    (0 <= LAST_START && LAST_START <= LENGTH)  */
647 
648 static reg_errcode_t
649 internal_function __attribute_warn_unused_result__
650 re_search_internal (const regex_t *preg,
651 		    const char *string, Idx length,
652 		    Idx start, Idx last_start, Idx stop,
653 		    size_t nmatch, regmatch_t pmatch[],
654 		    int eflags)
655 {
656   reg_errcode_t err;
657   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
658   Idx left_lim, right_lim;
659   int incr;
660   bool fl_longest_match;
661   int match_kind;
662   Idx match_first;
663   Idx match_last = REG_MISSING;
664   Idx extra_nmatch;
665   bool sb;
666   int ch;
667 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
668   re_match_context_t mctx = { .dfa = dfa };
669 #else
670   re_match_context_t mctx;
671 #endif
672   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
673 		    && start != last_start && !preg->can_be_null)
674 		   ? preg->fastmap : NULL);
675   RE_TRANSLATE_TYPE t = preg->translate;
676 
677 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
678   memset (&mctx, '\0', sizeof (re_match_context_t));
679   mctx.dfa = dfa;
680 #endif
681 
682   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
683   nmatch -= extra_nmatch;
684 
685   /* Check if the DFA haven't been compiled.  */
686   if (BE (preg->used == 0 || dfa->init_state == NULL
687 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
688 	  || dfa->init_state_begbuf == NULL, 0))
689     return REG_NOMATCH;
690 
691 #ifdef DEBUG
692   /* We assume front-end functions already check them.  */
693   assert (0 <= last_start && last_start <= length);
694 #endif
695 
696   /* If initial states with non-begbuf contexts have no elements,
697      the regex must be anchored.  If preg->newline_anchor is set,
698      we'll never use init_state_nl, so do not check it.  */
699   if (dfa->init_state->nodes.nelem == 0
700       && dfa->init_state_word->nodes.nelem == 0
701       && (dfa->init_state_nl->nodes.nelem == 0
702 	  || !preg->newline_anchor))
703     {
704       if (start != 0 && last_start != 0)
705         return REG_NOMATCH;
706       start = last_start = 0;
707     }
708 
709   /* We must check the longest matching, if nmatch > 0.  */
710   fl_longest_match = (nmatch != 0 || dfa->nbackref);
711 
712   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
713 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
714 			    dfa);
715   if (BE (err != REG_NOERROR, 0))
716     goto free_return;
717   mctx.input.stop = stop;
718   mctx.input.raw_stop = stop;
719   mctx.input.newline_anchor = preg->newline_anchor;
720 
721   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
722   if (BE (err != REG_NOERROR, 0))
723     goto free_return;
724 
725   /* We will log all the DFA states through which the dfa pass,
726      if nmatch > 1, or this dfa has "multibyte node", which is a
727      back-reference or a node which can accept multibyte character or
728      multi character collating element.  */
729   if (nmatch > 1 || dfa->has_mb_node)
730     {
731       /* Avoid overflow.  */
732       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
733 	{
734 	  err = REG_ESPACE;
735 	  goto free_return;
736 	}
737 
738       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
739       if (BE (mctx.state_log == NULL, 0))
740 	{
741 	  err = REG_ESPACE;
742 	  goto free_return;
743 	}
744     }
745   else
746     mctx.state_log = NULL;
747 
748   match_first = start;
749   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
750 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
751 
752   /* Check incrementally whether of not the input string match.  */
753   incr = (last_start < start) ? -1 : 1;
754   left_lim = (last_start < start) ? last_start : start;
755   right_lim = (last_start < start) ? start : last_start;
756   sb = dfa->mb_cur_max == 1;
757   match_kind =
758     (fastmap
759      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
760 	| (start <= last_start ? 2 : 0)
761 	| (t != NULL ? 1 : 0))
762      : 8);
763 
764   for (;; match_first += incr)
765     {
766       err = REG_NOMATCH;
767       if (match_first < left_lim || right_lim < match_first)
768 	goto free_return;
769 
770       /* Advance as rapidly as possible through the string, until we
771 	 find a plausible place to start matching.  This may be done
772 	 with varying efficiency, so there are various possibilities:
773 	 only the most common of them are specialized, in order to
774 	 save on code size.  We use a switch statement for speed.  */
775       switch (match_kind)
776 	{
777 	case 8:
778 	  /* No fastmap.  */
779 	  break;
780 
781 	case 7:
782 	  /* Fastmap with single-byte translation, match forward.  */
783 	  while (BE (match_first < right_lim, 1)
784 		 && !fastmap[t[(unsigned char) string[match_first]]])
785 	    ++match_first;
786 	  goto forward_match_found_start_or_reached_end;
787 
788 	case 6:
789 	  /* Fastmap without translation, match forward.  */
790 	  while (BE (match_first < right_lim, 1)
791 		 && !fastmap[(unsigned char) string[match_first]])
792 	    ++match_first;
793 
794 	forward_match_found_start_or_reached_end:
795 	  if (BE (match_first == right_lim, 0))
796 	    {
797 	      ch = match_first >= length
798 		       ? 0 : (unsigned char) string[match_first];
799 	      if (!fastmap[t ? t[ch] : ch])
800 		goto free_return;
801 	    }
802 	  break;
803 
804 	case 4:
805 	case 5:
806 	  /* Fastmap without multi-byte translation, match backwards.  */
807 	  while (match_first >= left_lim)
808 	    {
809 	      ch = match_first >= length
810 		       ? 0 : (unsigned char) string[match_first];
811 	      if (fastmap[t ? t[ch] : ch])
812 		break;
813 	      --match_first;
814 	    }
815 	  if (match_first < left_lim)
816 	    goto free_return;
817 	  break;
818 
819 	default:
820 	  /* In this case, we can't determine easily the current byte,
821 	     since it might be a component byte of a multibyte
822 	     character.  Then we use the constructed buffer instead.  */
823 	  for (;;)
824 	    {
825 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
826 		 buffers.  */
827 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
828 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
829 		{
830 		  err = re_string_reconstruct (&mctx.input, match_first,
831 					       eflags);
832 		  if (BE (err != REG_NOERROR, 0))
833 		    goto free_return;
834 
835 		  offset = match_first - mctx.input.raw_mbs_idx;
836 		}
837 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
838 		 Note that MATCH_FIRST must not be smaller than 0.  */
839 	      ch = (match_first >= length
840 		    ? 0 : re_string_byte_at (&mctx.input, offset));
841 	      if (fastmap[ch])
842 		break;
843 	      match_first += incr;
844 	      if (match_first < left_lim || match_first > right_lim)
845 		{
846 		  err = REG_NOMATCH;
847 		  goto free_return;
848 		}
849 	    }
850 	  break;
851 	}
852 
853       /* Reconstruct the buffers so that the matcher can assume that
854 	 the matching starts from the beginning of the buffer.  */
855       err = re_string_reconstruct (&mctx.input, match_first, eflags);
856       if (BE (err != REG_NOERROR, 0))
857 	goto free_return;
858 
859 #ifdef RE_ENABLE_I18N
860      /* Don't consider this char as a possible match start if it part,
861 	yet isn't the head, of a multibyte character.  */
862       if (!sb && !re_string_first_byte (&mctx.input, 0))
863 	continue;
864 #endif
865 
866       /* It seems to be appropriate one, then use the matcher.  */
867       /* We assume that the matching starts from 0.  */
868       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
869       match_last = check_matching (&mctx, fl_longest_match,
870 				   start <= last_start ? &match_first : NULL);
871       if (match_last != REG_MISSING)
872 	{
873 	  if (BE (match_last == REG_ERROR, 0))
874 	    {
875 	      err = REG_ESPACE;
876 	      goto free_return;
877 	    }
878 	  else
879 	    {
880 	      mctx.match_last = match_last;
881 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
882 		{
883 		  re_dfastate_t *pstate = mctx.state_log[match_last];
884 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
885 							     match_last);
886 		}
887 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
888 		  || dfa->nbackref)
889 		{
890 		  err = prune_impossible_nodes (&mctx);
891 		  if (err == REG_NOERROR)
892 		    break;
893 		  if (BE (err != REG_NOMATCH, 0))
894 		    goto free_return;
895 		  match_last = REG_MISSING;
896 		}
897 	      else
898 		break; /* We found a match.  */
899 	    }
900 	}
901 
902       match_ctx_clean (&mctx);
903     }
904 
905 #ifdef DEBUG
906   assert (match_last != REG_MISSING);
907   assert (err == REG_NOERROR);
908 #endif
909 
910   /* Set pmatch[] if we need.  */
911   if (nmatch > 0)
912     {
913       Idx reg_idx;
914 
915       /* Initialize registers.  */
916       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
917 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
918 
919       /* Set the points where matching start/end.  */
920       pmatch[0].rm_so = 0;
921       pmatch[0].rm_eo = mctx.match_last;
922       /* FIXME: This function should fail if mctx.match_last exceeds
923 	 the maximum possible regoff_t value.  We need a new error
924 	 code REG_OVERFLOW.  */
925 
926       if (!preg->no_sub && nmatch > 1)
927 	{
928 	  err = set_regs (preg, &mctx, nmatch, pmatch,
929 			  dfa->has_plural_match && dfa->nbackref > 0);
930 	  if (BE (err != REG_NOERROR, 0))
931 	    goto free_return;
932 	}
933 
934       /* At last, add the offset to the each registers, since we slided
935 	 the buffers so that we could assume that the matching starts
936 	 from 0.  */
937       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
938 	if (pmatch[reg_idx].rm_so != -1)
939 	  {
940 #ifdef RE_ENABLE_I18N
941 	    if (BE (mctx.input.offsets_needed != 0, 0))
942 	      {
943 		pmatch[reg_idx].rm_so =
944 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
945 		   ? mctx.input.valid_raw_len
946 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
947 		pmatch[reg_idx].rm_eo =
948 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
949 		   ? mctx.input.valid_raw_len
950 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
951 	      }
952 #else
953 	    assert (mctx.input.offsets_needed == 0);
954 #endif
955 	    pmatch[reg_idx].rm_so += match_first;
956 	    pmatch[reg_idx].rm_eo += match_first;
957 	  }
958       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
959 	{
960 	  pmatch[nmatch + reg_idx].rm_so = -1;
961 	  pmatch[nmatch + reg_idx].rm_eo = -1;
962 	}
963 
964       if (dfa->subexp_map)
965 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
966 	  if (dfa->subexp_map[reg_idx] != reg_idx)
967 	    {
968 	      pmatch[reg_idx + 1].rm_so
969 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
970 	      pmatch[reg_idx + 1].rm_eo
971 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
972 	    }
973     }
974 
975  free_return:
976   re_free (mctx.state_log);
977   if (dfa->nbackref)
978     match_ctx_free (&mctx);
979   re_string_destruct (&mctx.input);
980   return err;
981 }
982 
983 static reg_errcode_t
984 internal_function __attribute_warn_unused_result__
985 prune_impossible_nodes (re_match_context_t *mctx)
986 {
987   const re_dfa_t *const dfa = mctx->dfa;
988   Idx halt_node, match_last;
989   reg_errcode_t ret;
990   re_dfastate_t **sifted_states;
991   re_dfastate_t **lim_states = NULL;
992   re_sift_context_t sctx;
993 #ifdef DEBUG
994   assert (mctx->state_log != NULL);
995 #endif
996   match_last = mctx->match_last;
997   halt_node = mctx->last_node;
998 
999   /* Avoid overflow.  */
1000   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
1001     return REG_ESPACE;
1002 
1003   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
1004   if (BE (sifted_states == NULL, 0))
1005     {
1006       ret = REG_ESPACE;
1007       goto free_return;
1008     }
1009   if (dfa->nbackref)
1010     {
1011       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1012       if (BE (lim_states == NULL, 0))
1013 	{
1014 	  ret = REG_ESPACE;
1015 	  goto free_return;
1016 	}
1017       while (1)
1018 	{
1019 	  memset (lim_states, '\0',
1020 		  sizeof (re_dfastate_t *) * (match_last + 1));
1021 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1022 			 match_last);
1023 	  ret = sift_states_backward (mctx, &sctx);
1024 	  re_node_set_free (&sctx.limits);
1025 	  if (BE (ret != REG_NOERROR, 0))
1026 	      goto free_return;
1027 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1028 	    break;
1029 	  do
1030 	    {
1031 	      --match_last;
1032 	      if (! REG_VALID_INDEX (match_last))
1033 		{
1034 		  ret = REG_NOMATCH;
1035 		  goto free_return;
1036 		}
1037 	    } while (mctx->state_log[match_last] == NULL
1038 		     || !mctx->state_log[match_last]->halt);
1039 	  halt_node = check_halt_state_context (mctx,
1040 						mctx->state_log[match_last],
1041 						match_last);
1042 	}
1043       ret = merge_state_array (dfa, sifted_states, lim_states,
1044 			       match_last + 1);
1045       re_free (lim_states);
1046       lim_states = NULL;
1047       if (BE (ret != REG_NOERROR, 0))
1048 	goto free_return;
1049     }
1050   else
1051     {
1052       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1053       ret = sift_states_backward (mctx, &sctx);
1054       re_node_set_free (&sctx.limits);
1055       if (BE (ret != REG_NOERROR, 0))
1056 	goto free_return;
1057       if (sifted_states[0] == NULL)
1058 	{
1059 	  ret = REG_NOMATCH;
1060 	  goto free_return;
1061 	}
1062     }
1063   re_free (mctx->state_log);
1064   mctx->state_log = sifted_states;
1065   sifted_states = NULL;
1066   mctx->last_node = halt_node;
1067   mctx->match_last = match_last;
1068   ret = REG_NOERROR;
1069  free_return:
1070   re_free (sifted_states);
1071   re_free (lim_states);
1072   return ret;
1073 }
1074 
1075 /* Acquire an initial state and return it.
1076    We must select appropriate initial state depending on the context,
1077    since initial states may have constraints like "\<", "^", etc..  */
1078 
1079 static inline re_dfastate_t *
1080 __attribute ((always_inline)) internal_function
1081 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1082 			    Idx idx)
1083 {
1084   const re_dfa_t *const dfa = mctx->dfa;
1085   if (dfa->init_state->has_constraint)
1086     {
1087       unsigned int context;
1088       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1089       if (IS_WORD_CONTEXT (context))
1090 	return dfa->init_state_word;
1091       else if (IS_ORDINARY_CONTEXT (context))
1092 	return dfa->init_state;
1093       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1094 	return dfa->init_state_begbuf;
1095       else if (IS_NEWLINE_CONTEXT (context))
1096 	return dfa->init_state_nl;
1097       else if (IS_BEGBUF_CONTEXT (context))
1098 	{
1099 	  /* It is relatively rare case, then calculate on demand.  */
1100 	  return re_acquire_state_context (err, dfa,
1101 					   dfa->init_state->entrance_nodes,
1102 					   context);
1103 	}
1104       else
1105 	/* Must not happen?  */
1106 	return dfa->init_state;
1107     }
1108   else
1109     return dfa->init_state;
1110 }
1111 
1112 /* Check whether the regular expression match input string INPUT or not,
1113    and return the index where the matching end.  Return REG_MISSING if
1114    there is no match, and return REG_ERROR in case of an error.
1115    FL_LONGEST_MATCH means we want the POSIX longest matching.
1116    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1117    next place where we may want to try matching.
1118    Note that the matcher assume that the maching starts from the current
1119    index of the buffer.  */
1120 
1121 static Idx
1122 internal_function __attribute_warn_unused_result__
1123 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1124 		Idx *p_match_first)
1125 {
1126   const re_dfa_t *const dfa = mctx->dfa;
1127   reg_errcode_t err;
1128   Idx match = 0;
1129   Idx match_last = REG_MISSING;
1130   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1131   re_dfastate_t *cur_state;
1132   bool at_init_state = p_match_first != NULL;
1133   Idx next_start_idx = cur_str_idx;
1134 
1135   err = REG_NOERROR;
1136   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1137   /* An initial state must not be NULL (invalid).  */
1138   if (BE (cur_state == NULL, 0))
1139     {
1140       assert (err == REG_ESPACE);
1141       return REG_ERROR;
1142     }
1143 
1144   if (mctx->state_log != NULL)
1145     {
1146       mctx->state_log[cur_str_idx] = cur_state;
1147 
1148       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1149 	 later.  E.g. Processing back references.  */
1150       if (BE (dfa->nbackref, 0))
1151 	{
1152 	  at_init_state = false;
1153 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1154 	  if (BE (err != REG_NOERROR, 0))
1155 	    return err;
1156 
1157 	  if (cur_state->has_backref)
1158 	    {
1159 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1160 	      if (BE (err != REG_NOERROR, 0))
1161 		return err;
1162 	    }
1163 	}
1164     }
1165 
1166   /* If the RE accepts NULL string.  */
1167   if (BE (cur_state->halt, 0))
1168     {
1169       if (!cur_state->has_constraint
1170 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1171 	{
1172 	  if (!fl_longest_match)
1173 	    return cur_str_idx;
1174 	  else
1175 	    {
1176 	      match_last = cur_str_idx;
1177 	      match = 1;
1178 	    }
1179 	}
1180     }
1181 
1182   while (!re_string_eoi (&mctx->input))
1183     {
1184       re_dfastate_t *old_state = cur_state;
1185       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1186 
1187       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1188 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1189 	      && mctx->input.valid_len < mctx->input.len))
1190 	{
1191 	  err = extend_buffers (mctx);
1192 	  if (BE (err != REG_NOERROR, 0))
1193 	    {
1194 	      assert (err == REG_ESPACE);
1195 	      return REG_ERROR;
1196 	    }
1197 	}
1198 
1199       cur_state = transit_state (&err, mctx, cur_state);
1200       if (mctx->state_log != NULL)
1201 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1202 
1203       if (cur_state == NULL)
1204 	{
1205 	  /* Reached the invalid state or an error.  Try to recover a valid
1206 	     state using the state log, if available and if we have not
1207 	     already found a valid (even if not the longest) match.  */
1208 	  if (BE (err != REG_NOERROR, 0))
1209 	    return REG_ERROR;
1210 
1211 	  if (mctx->state_log == NULL
1212 	      || (match && !fl_longest_match)
1213 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1214 	    break;
1215 	}
1216 
1217       if (BE (at_init_state, 0))
1218 	{
1219 	  if (old_state == cur_state)
1220 	    next_start_idx = next_char_idx;
1221 	  else
1222 	    at_init_state = false;
1223 	}
1224 
1225       if (cur_state->halt)
1226 	{
1227 	  /* Reached a halt state.
1228 	     Check the halt state can satisfy the current context.  */
1229 	  if (!cur_state->has_constraint
1230 	      || check_halt_state_context (mctx, cur_state,
1231 					   re_string_cur_idx (&mctx->input)))
1232 	    {
1233 	      /* We found an appropriate halt state.  */
1234 	      match_last = re_string_cur_idx (&mctx->input);
1235 	      match = 1;
1236 
1237 	      /* We found a match, do not modify match_first below.  */
1238 	      p_match_first = NULL;
1239 	      if (!fl_longest_match)
1240 		break;
1241 	    }
1242 	}
1243     }
1244 
1245   if (p_match_first)
1246     *p_match_first += next_start_idx;
1247 
1248   return match_last;
1249 }
1250 
1251 /* Check NODE match the current context.  */
1252 
1253 static bool
1254 internal_function
1255 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1256 {
1257   re_token_type_t type = dfa->nodes[node].type;
1258   unsigned int constraint = dfa->nodes[node].constraint;
1259   if (type != END_OF_RE)
1260     return false;
1261   if (!constraint)
1262     return true;
1263   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1264     return false;
1265   return true;
1266 }
1267 
1268 /* Check the halt state STATE match the current context.
1269    Return 0 if not match, if the node, STATE has, is a halt node and
1270    match the context, return the node.  */
1271 
1272 static Idx
1273 internal_function
1274 check_halt_state_context (const re_match_context_t *mctx,
1275 			  const re_dfastate_t *state, Idx idx)
1276 {
1277   Idx i;
1278   unsigned int context;
1279 #ifdef DEBUG
1280   assert (state->halt);
1281 #endif
1282   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1283   for (i = 0; i < state->nodes.nelem; ++i)
1284     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1285       return state->nodes.elems[i];
1286   return 0;
1287 }
1288 
1289 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1290    corresponding to the DFA).
1291    Return the destination node, and update EPS_VIA_NODES;
1292    return REG_MISSING in case of errors.  */
1293 
1294 static Idx
1295 internal_function
1296 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1297 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1298 		   struct re_fail_stack_t *fs)
1299 {
1300   const re_dfa_t *const dfa = mctx->dfa;
1301   Idx i;
1302   bool ok;
1303   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1304     {
1305       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1306       re_node_set *edests = &dfa->edests[node];
1307       Idx dest_node;
1308       ok = re_node_set_insert (eps_via_nodes, node);
1309       if (BE (! ok, 0))
1310 	return REG_ERROR;
1311       /* Pick up a valid destination, or return REG_MISSING if none
1312 	 is found.  */
1313       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1314 	{
1315 	  Idx candidate = edests->elems[i];
1316 	  if (!re_node_set_contains (cur_nodes, candidate))
1317 	    continue;
1318           if (dest_node == REG_MISSING)
1319 	    dest_node = candidate;
1320 
1321 	  else
1322 	    {
1323 	      /* In order to avoid infinite loop like "(a*)*", return the second
1324 		 epsilon-transition if the first was already considered.  */
1325 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1326 		return candidate;
1327 
1328 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1329 	      else if (fs != NULL
1330 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1331 					   eps_via_nodes))
1332 		return REG_ERROR;
1333 
1334 	      /* We know we are going to exit.  */
1335 	      break;
1336 	    }
1337 	}
1338       return dest_node;
1339     }
1340   else
1341     {
1342       Idx naccepted = 0;
1343       re_token_type_t type = dfa->nodes[node].type;
1344 
1345 #ifdef RE_ENABLE_I18N
1346       if (dfa->nodes[node].accept_mb)
1347 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1348       else
1349 #endif /* RE_ENABLE_I18N */
1350       if (type == OP_BACK_REF)
1351 	{
1352 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1353 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1354 	  if (fs != NULL)
1355 	    {
1356 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1357 		return REG_MISSING;
1358 	      else if (naccepted)
1359 		{
1360 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1361 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1362 			      naccepted) != 0)
1363 		    return REG_MISSING;
1364 		}
1365 	    }
1366 
1367 	  if (naccepted == 0)
1368 	    {
1369 	      Idx dest_node;
1370 	      ok = re_node_set_insert (eps_via_nodes, node);
1371 	      if (BE (! ok, 0))
1372 		return REG_ERROR;
1373 	      dest_node = dfa->edests[node].elems[0];
1374 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1375 					dest_node))
1376 		return dest_node;
1377 	    }
1378 	}
1379 
1380       if (naccepted != 0
1381 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1382 	{
1383 	  Idx dest_node = dfa->nexts[node];
1384 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1385 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1386 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1387 					       dest_node)))
1388 	    return REG_MISSING;
1389 	  re_node_set_empty (eps_via_nodes);
1390 	  return dest_node;
1391 	}
1392     }
1393   return REG_MISSING;
1394 }
1395 
1396 static reg_errcode_t
1397 internal_function __attribute_warn_unused_result__
1398 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1399 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1400 {
1401   reg_errcode_t err;
1402   Idx num = fs->num++;
1403   if (fs->num == fs->alloc)
1404     {
1405       struct re_fail_stack_ent_t *new_array;
1406       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1407 				       * fs->alloc * 2));
1408       if (new_array == NULL)
1409 	return REG_ESPACE;
1410       fs->alloc *= 2;
1411       fs->stack = new_array;
1412     }
1413   fs->stack[num].idx = str_idx;
1414   fs->stack[num].node = dest_node;
1415   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1416   if (fs->stack[num].regs == NULL)
1417     return REG_ESPACE;
1418   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1419   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1420   return err;
1421 }
1422 
1423 static Idx
1424 internal_function
1425 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1426 		regmatch_t *regs, re_node_set *eps_via_nodes)
1427 {
1428   Idx num = --fs->num;
1429   assert (REG_VALID_INDEX (num));
1430   *pidx = fs->stack[num].idx;
1431   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1432   re_node_set_free (eps_via_nodes);
1433   re_free (fs->stack[num].regs);
1434   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1435   return fs->stack[num].node;
1436 }
1437 
1438 /* Set the positions where the subexpressions are starts/ends to registers
1439    PMATCH.
1440    Note: We assume that pmatch[0] is already set, and
1441    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1442 
1443 static reg_errcode_t
1444 internal_function __attribute_warn_unused_result__
1445 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1446 	  regmatch_t *pmatch, bool fl_backtrack)
1447 {
1448   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1449   Idx idx, cur_node;
1450   re_node_set eps_via_nodes;
1451   struct re_fail_stack_t *fs;
1452   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1453   regmatch_t *prev_idx_match;
1454   bool prev_idx_match_malloced = false;
1455 
1456 #ifdef DEBUG
1457   assert (nmatch > 1);
1458   assert (mctx->state_log != NULL);
1459 #endif
1460   if (fl_backtrack)
1461     {
1462       fs = &fs_body;
1463       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1464       if (fs->stack == NULL)
1465 	return REG_ESPACE;
1466     }
1467   else
1468     fs = NULL;
1469 
1470   cur_node = dfa->init_node;
1471   re_node_set_init_empty (&eps_via_nodes);
1472 
1473   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1474     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1475   else
1476     {
1477       prev_idx_match = re_malloc (regmatch_t, nmatch);
1478       if (prev_idx_match == NULL)
1479 	{
1480 	  free_fail_stack_return (fs);
1481 	  return REG_ESPACE;
1482 	}
1483       prev_idx_match_malloced = true;
1484     }
1485   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1486 
1487   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1488     {
1489       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1490 
1491       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1492 	{
1493 	  Idx reg_idx;
1494 	  if (fs)
1495 	    {
1496 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1497 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1498 		  break;
1499 	      if (reg_idx == nmatch)
1500 		{
1501 		  re_node_set_free (&eps_via_nodes);
1502 		  if (prev_idx_match_malloced)
1503 		    re_free (prev_idx_match);
1504 		  return free_fail_stack_return (fs);
1505 		}
1506 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1507 					 &eps_via_nodes);
1508 	    }
1509 	  else
1510 	    {
1511 	      re_node_set_free (&eps_via_nodes);
1512 	      if (prev_idx_match_malloced)
1513 		re_free (prev_idx_match);
1514 	      return REG_NOERROR;
1515 	    }
1516 	}
1517 
1518       /* Proceed to next node.  */
1519       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1520 				    &eps_via_nodes, fs);
1521 
1522       if (BE (! REG_VALID_INDEX (cur_node), 0))
1523 	{
1524 	  if (BE (cur_node == REG_ERROR, 0))
1525 	    {
1526 	      re_node_set_free (&eps_via_nodes);
1527 	      if (prev_idx_match_malloced)
1528 		re_free (prev_idx_match);
1529 	      free_fail_stack_return (fs);
1530 	      return REG_ESPACE;
1531 	    }
1532 	  if (fs)
1533 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1534 				       &eps_via_nodes);
1535 	  else
1536 	    {
1537 	      re_node_set_free (&eps_via_nodes);
1538 	      if (prev_idx_match_malloced)
1539 		re_free (prev_idx_match);
1540 	      return REG_NOMATCH;
1541 	    }
1542 	}
1543     }
1544   re_node_set_free (&eps_via_nodes);
1545   if (prev_idx_match_malloced)
1546     re_free (prev_idx_match);
1547   return free_fail_stack_return (fs);
1548 }
1549 
1550 static reg_errcode_t
1551 internal_function
1552 free_fail_stack_return (struct re_fail_stack_t *fs)
1553 {
1554   if (fs)
1555     {
1556       Idx fs_idx;
1557       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1558 	{
1559 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1560 	  re_free (fs->stack[fs_idx].regs);
1561 	}
1562       re_free (fs->stack);
1563     }
1564   return REG_NOERROR;
1565 }
1566 
1567 static void
1568 internal_function
1569 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1570 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1571 {
1572   int type = dfa->nodes[cur_node].type;
1573   if (type == OP_OPEN_SUBEXP)
1574     {
1575       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1576 
1577       /* We are at the first node of this sub expression.  */
1578       if (reg_num < nmatch)
1579 	{
1580 	  pmatch[reg_num].rm_so = cur_idx;
1581 	  pmatch[reg_num].rm_eo = -1;
1582 	}
1583     }
1584   else if (type == OP_CLOSE_SUBEXP)
1585     {
1586       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1587       if (reg_num < nmatch)
1588 	{
1589 	  /* We are at the last node of this sub expression.  */
1590 	  if (pmatch[reg_num].rm_so < cur_idx)
1591 	    {
1592 	      pmatch[reg_num].rm_eo = cur_idx;
1593 	      /* This is a non-empty match or we are not inside an optional
1594 		 subexpression.  Accept this right away.  */
1595 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1596 	    }
1597 	  else
1598 	    {
1599 	      if (dfa->nodes[cur_node].opt_subexp
1600 		  && prev_idx_match[reg_num].rm_so != -1)
1601 		/* We transited through an empty match for an optional
1602 		   subexpression, like (a?)*, and this is not the subexp's
1603 		   first match.  Copy back the old content of the registers
1604 		   so that matches of an inner subexpression are undone as
1605 		   well, like in ((a?))*.  */
1606 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1607 	      else
1608 		/* We completed a subexpression, but it may be part of
1609 		   an optional one, so do not update PREV_IDX_MATCH.  */
1610 		pmatch[reg_num].rm_eo = cur_idx;
1611 	    }
1612 	}
1613     }
1614 }
1615 
1616 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1617    and sift the nodes in each states according to the following rules.
1618    Updated state_log will be wrote to STATE_LOG.
1619 
1620    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1621      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1622 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1623 	the LAST_NODE, we throw away the node `a'.
1624      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1625 	string `s' and transit to `b':
1626 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1627 	   away the node `a'.
1628 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1629 	    thrown away, we throw away the node `a'.
1630      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1631 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1632 	   node `a'.
1633 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1634 	    we throw away the node `a'.  */
1635 
1636 #define STATE_NODE_CONTAINS(state,node) \
1637   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1638 
1639 static reg_errcode_t
1640 internal_function
1641 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1642 {
1643   reg_errcode_t err;
1644   int null_cnt = 0;
1645   Idx str_idx = sctx->last_str_idx;
1646   re_node_set cur_dest;
1647 
1648 #ifdef DEBUG
1649   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1650 #endif
1651 
1652   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1653      transit to the last_node and the last_node itself.  */
1654   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1655   if (BE (err != REG_NOERROR, 0))
1656     return err;
1657   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1658   if (BE (err != REG_NOERROR, 0))
1659     goto free_return;
1660 
1661   /* Then check each states in the state_log.  */
1662   while (str_idx > 0)
1663     {
1664       /* Update counters.  */
1665       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1666       if (null_cnt > mctx->max_mb_elem_len)
1667 	{
1668 	  memset (sctx->sifted_states, '\0',
1669 		  sizeof (re_dfastate_t *) * str_idx);
1670 	  re_node_set_free (&cur_dest);
1671 	  return REG_NOERROR;
1672 	}
1673       re_node_set_empty (&cur_dest);
1674       --str_idx;
1675 
1676       if (mctx->state_log[str_idx])
1677 	{
1678 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1679 	  if (BE (err != REG_NOERROR, 0))
1680 	    goto free_return;
1681 	}
1682 
1683       /* Add all the nodes which satisfy the following conditions:
1684 	 - It can epsilon transit to a node in CUR_DEST.
1685 	 - It is in CUR_SRC.
1686 	 And update state_log.  */
1687       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1688       if (BE (err != REG_NOERROR, 0))
1689 	goto free_return;
1690     }
1691   err = REG_NOERROR;
1692  free_return:
1693   re_node_set_free (&cur_dest);
1694   return err;
1695 }
1696 
1697 static reg_errcode_t
1698 internal_function __attribute_warn_unused_result__
1699 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1700 		     Idx str_idx, re_node_set *cur_dest)
1701 {
1702   const re_dfa_t *const dfa = mctx->dfa;
1703   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1704   Idx i;
1705 
1706   /* Then build the next sifted state.
1707      We build the next sifted state on `cur_dest', and update
1708      `sifted_states[str_idx]' with `cur_dest'.
1709      Note:
1710      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1711      `cur_src' points the node_set of the old `state_log[str_idx]'
1712      (with the epsilon nodes pre-filtered out).  */
1713   for (i = 0; i < cur_src->nelem; i++)
1714     {
1715       Idx prev_node = cur_src->elems[i];
1716       int naccepted = 0;
1717       bool ok;
1718 
1719 #ifdef DEBUG
1720       re_token_type_t type = dfa->nodes[prev_node].type;
1721       assert (!IS_EPSILON_NODE (type));
1722 #endif
1723 #ifdef RE_ENABLE_I18N
1724       /* If the node may accept `multi byte'.  */
1725       if (dfa->nodes[prev_node].accept_mb)
1726 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1727 					 str_idx, sctx->last_str_idx);
1728 #endif /* RE_ENABLE_I18N */
1729 
1730       /* We don't check backreferences here.
1731 	 See update_cur_sifted_state().  */
1732       if (!naccepted
1733 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1734 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1735 				  dfa->nexts[prev_node]))
1736 	naccepted = 1;
1737 
1738       if (naccepted == 0)
1739 	continue;
1740 
1741       if (sctx->limits.nelem)
1742 	{
1743 	  Idx to_idx = str_idx + naccepted;
1744 	  if (check_dst_limits (mctx, &sctx->limits,
1745 				dfa->nexts[prev_node], to_idx,
1746 				prev_node, str_idx))
1747 	    continue;
1748 	}
1749       ok = re_node_set_insert (cur_dest, prev_node);
1750       if (BE (! ok, 0))
1751 	return REG_ESPACE;
1752     }
1753 
1754   return REG_NOERROR;
1755 }
1756 
1757 /* Helper functions.  */
1758 
1759 static reg_errcode_t
1760 internal_function
1761 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1762 {
1763   Idx top = mctx->state_log_top;
1764 
1765   if (next_state_log_idx >= mctx->input.bufs_len
1766       || (next_state_log_idx >= mctx->input.valid_len
1767 	  && mctx->input.valid_len < mctx->input.len))
1768     {
1769       reg_errcode_t err;
1770       err = extend_buffers (mctx);
1771       if (BE (err != REG_NOERROR, 0))
1772 	return err;
1773     }
1774 
1775   if (top < next_state_log_idx)
1776     {
1777       memset (mctx->state_log + top + 1, '\0',
1778 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1779       mctx->state_log_top = next_state_log_idx;
1780     }
1781   return REG_NOERROR;
1782 }
1783 
1784 static reg_errcode_t
1785 internal_function
1786 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1787 		   re_dfastate_t **src, Idx num)
1788 {
1789   Idx st_idx;
1790   reg_errcode_t err;
1791   for (st_idx = 0; st_idx < num; ++st_idx)
1792     {
1793       if (dst[st_idx] == NULL)
1794 	dst[st_idx] = src[st_idx];
1795       else if (src[st_idx] != NULL)
1796 	{
1797 	  re_node_set merged_set;
1798 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1799 					&src[st_idx]->nodes);
1800 	  if (BE (err != REG_NOERROR, 0))
1801 	    return err;
1802 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1803 	  re_node_set_free (&merged_set);
1804 	  if (BE (err != REG_NOERROR, 0))
1805 	    return err;
1806 	}
1807     }
1808   return REG_NOERROR;
1809 }
1810 
1811 static reg_errcode_t
1812 internal_function
1813 update_cur_sifted_state (const re_match_context_t *mctx,
1814 			 re_sift_context_t *sctx, Idx str_idx,
1815 			 re_node_set *dest_nodes)
1816 {
1817   const re_dfa_t *const dfa = mctx->dfa;
1818   reg_errcode_t err = REG_NOERROR;
1819   const re_node_set *candidates;
1820   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1821 		: &mctx->state_log[str_idx]->nodes);
1822 
1823   if (dest_nodes->nelem == 0)
1824     sctx->sifted_states[str_idx] = NULL;
1825   else
1826     {
1827       if (candidates)
1828 	{
1829 	  /* At first, add the nodes which can epsilon transit to a node in
1830 	     DEST_NODE.  */
1831 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1832 	  if (BE (err != REG_NOERROR, 0))
1833 	    return err;
1834 
1835 	  /* Then, check the limitations in the current sift_context.  */
1836 	  if (sctx->limits.nelem)
1837 	    {
1838 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1839 					 mctx->bkref_ents, str_idx);
1840 	      if (BE (err != REG_NOERROR, 0))
1841 		return err;
1842 	    }
1843 	}
1844 
1845       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1846       if (BE (err != REG_NOERROR, 0))
1847 	return err;
1848     }
1849 
1850   if (candidates && mctx->state_log[str_idx]->has_backref)
1851     {
1852       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1853       if (BE (err != REG_NOERROR, 0))
1854 	return err;
1855     }
1856   return REG_NOERROR;
1857 }
1858 
1859 static reg_errcode_t
1860 internal_function __attribute_warn_unused_result__
1861 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1862 		       const re_node_set *candidates)
1863 {
1864   reg_errcode_t err = REG_NOERROR;
1865   Idx i;
1866 
1867   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1868   if (BE (err != REG_NOERROR, 0))
1869     return err;
1870 
1871   if (!state->inveclosure.alloc)
1872     {
1873       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1874       if (BE (err != REG_NOERROR, 0))
1875 	return REG_ESPACE;
1876       for (i = 0; i < dest_nodes->nelem; i++)
1877 	{
1878 	  err = re_node_set_merge (&state->inveclosure,
1879 				   dfa->inveclosures + dest_nodes->elems[i]);
1880 	  if (BE (err != REG_NOERROR, 0))
1881 	    return REG_ESPACE;
1882 	}
1883     }
1884   return re_node_set_add_intersect (dest_nodes, candidates,
1885 				    &state->inveclosure);
1886 }
1887 
1888 static reg_errcode_t
1889 internal_function
1890 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1891 		       const re_node_set *candidates)
1892 {
1893     Idx ecl_idx;
1894     reg_errcode_t err;
1895     re_node_set *inv_eclosure = dfa->inveclosures + node;
1896     re_node_set except_nodes;
1897     re_node_set_init_empty (&except_nodes);
1898     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1899       {
1900 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1901 	if (cur_node == node)
1902 	  continue;
1903 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1904 	  {
1905 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1906 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1907 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1908 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1909 		 && re_node_set_contains (dest_nodes, edst1))
1910 		|| (REG_VALID_NONZERO_INDEX (edst2)
1911 		    && !re_node_set_contains (inv_eclosure, edst2)
1912 		    && re_node_set_contains (dest_nodes, edst2)))
1913 	      {
1914 		err = re_node_set_add_intersect (&except_nodes, candidates,
1915 						 dfa->inveclosures + cur_node);
1916 		if (BE (err != REG_NOERROR, 0))
1917 		  {
1918 		    re_node_set_free (&except_nodes);
1919 		    return err;
1920 		  }
1921 	      }
1922 	  }
1923       }
1924     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1925       {
1926 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1927 	if (!re_node_set_contains (&except_nodes, cur_node))
1928 	  {
1929 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1930 	    re_node_set_remove_at (dest_nodes, idx);
1931 	  }
1932       }
1933     re_node_set_free (&except_nodes);
1934     return REG_NOERROR;
1935 }
1936 
1937 static bool
1938 internal_function
1939 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1940 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1941 {
1942   const re_dfa_t *const dfa = mctx->dfa;
1943   Idx lim_idx, src_pos, dst_pos;
1944 
1945   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1946   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1947   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1948     {
1949       Idx subexp_idx;
1950       struct re_backref_cache_entry *ent;
1951       ent = mctx->bkref_ents + limits->elems[lim_idx];
1952       subexp_idx = dfa->nodes[ent->node].opr.idx;
1953 
1954       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1955 					   subexp_idx, dst_node, dst_idx,
1956 					   dst_bkref_idx);
1957       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1958 					   subexp_idx, src_node, src_idx,
1959 					   src_bkref_idx);
1960 
1961       /* In case of:
1962 	 <src> <dst> ( <subexp> )
1963 	 ( <subexp> ) <src> <dst>
1964 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1965       if (src_pos == dst_pos)
1966 	continue; /* This is unrelated limitation.  */
1967       else
1968 	return true;
1969     }
1970   return false;
1971 }
1972 
1973 static int
1974 internal_function
1975 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1976 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1977 {
1978   const re_dfa_t *const dfa = mctx->dfa;
1979   const re_node_set *eclosures = dfa->eclosures + from_node;
1980   Idx node_idx;
1981 
1982   /* Else, we are on the boundary: examine the nodes on the epsilon
1983      closure.  */
1984   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1985     {
1986       Idx node = eclosures->elems[node_idx];
1987       switch (dfa->nodes[node].type)
1988 	{
1989 	case OP_BACK_REF:
1990 	  if (bkref_idx != REG_MISSING)
1991 	    {
1992 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1993 	      do
1994 		{
1995 		  Idx dst;
1996 		  int cpos;
1997 
1998 		  if (ent->node != node)
1999 		    continue;
2000 
2001 		  if (subexp_idx < BITSET_WORD_BITS
2002 		      && !(ent->eps_reachable_subexps_map
2003 			   & ((bitset_word_t) 1 << subexp_idx)))
2004 		    continue;
2005 
2006 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
2007 		     OP_CLOSE_SUBEXP cases below.  But, if the
2008 		     destination node is the same node as the source
2009 		     node, don't recurse because it would cause an
2010 		     infinite loop: a regex that exhibits this behavior
2011 		     is ()\1*\1*  */
2012 		  dst = dfa->edests[node].elems[0];
2013 		  if (dst == from_node)
2014 		    {
2015 		      if (boundaries & 1)
2016 			return -1;
2017 		      else /* if (boundaries & 2) */
2018 			return 0;
2019 		    }
2020 
2021 		  cpos =
2022 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2023 						 dst, bkref_idx);
2024 		  if (cpos == -1 /* && (boundaries & 1) */)
2025 		    return -1;
2026 		  if (cpos == 0 && (boundaries & 2))
2027 		    return 0;
2028 
2029 		  if (subexp_idx < BITSET_WORD_BITS)
2030 		    ent->eps_reachable_subexps_map
2031 		      &= ~((bitset_word_t) 1 << subexp_idx);
2032 		}
2033 	      while (ent++->more);
2034 	    }
2035 	  break;
2036 
2037 	case OP_OPEN_SUBEXP:
2038 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2039 	    return -1;
2040 	  break;
2041 
2042 	case OP_CLOSE_SUBEXP:
2043 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2044 	    return 0;
2045 	  break;
2046 
2047 	default:
2048 	    break;
2049 	}
2050     }
2051 
2052   return (boundaries & 2) ? 1 : 0;
2053 }
2054 
2055 static int
2056 internal_function
2057 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2058 			   Idx subexp_idx, Idx from_node, Idx str_idx,
2059 			   Idx bkref_idx)
2060 {
2061   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2062   int boundaries;
2063 
2064   /* If we are outside the range of the subexpression, return -1 or 1.  */
2065   if (str_idx < lim->subexp_from)
2066     return -1;
2067 
2068   if (lim->subexp_to < str_idx)
2069     return 1;
2070 
2071   /* If we are within the subexpression, return 0.  */
2072   boundaries = (str_idx == lim->subexp_from);
2073   boundaries |= (str_idx == lim->subexp_to) << 1;
2074   if (boundaries == 0)
2075     return 0;
2076 
2077   /* Else, examine epsilon closure.  */
2078   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2079 				      from_node, bkref_idx);
2080 }
2081 
2082 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2083    which are against limitations from DEST_NODES. */
2084 
2085 static reg_errcode_t
2086 internal_function
2087 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2088 		     const re_node_set *candidates, re_node_set *limits,
2089 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2090 {
2091   reg_errcode_t err;
2092   Idx node_idx, lim_idx;
2093 
2094   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2095     {
2096       Idx subexp_idx;
2097       struct re_backref_cache_entry *ent;
2098       ent = bkref_ents + limits->elems[lim_idx];
2099 
2100       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2101 	continue; /* This is unrelated limitation.  */
2102 
2103       subexp_idx = dfa->nodes[ent->node].opr.idx;
2104       if (ent->subexp_to == str_idx)
2105 	{
2106 	  Idx ops_node = REG_MISSING;
2107 	  Idx cls_node = REG_MISSING;
2108 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2109 	    {
2110 	      Idx node = dest_nodes->elems[node_idx];
2111 	      re_token_type_t type = dfa->nodes[node].type;
2112 	      if (type == OP_OPEN_SUBEXP
2113 		  && subexp_idx == dfa->nodes[node].opr.idx)
2114 		ops_node = node;
2115 	      else if (type == OP_CLOSE_SUBEXP
2116 		       && subexp_idx == dfa->nodes[node].opr.idx)
2117 		cls_node = node;
2118 	    }
2119 
2120 	  /* Check the limitation of the open subexpression.  */
2121 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2122 	  if (REG_VALID_INDEX (ops_node))
2123 	    {
2124 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2125 					   candidates);
2126 	      if (BE (err != REG_NOERROR, 0))
2127 		return err;
2128 	    }
2129 
2130 	  /* Check the limitation of the close subexpression.  */
2131 	  if (REG_VALID_INDEX (cls_node))
2132 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2133 	      {
2134 		Idx node = dest_nodes->elems[node_idx];
2135 		if (!re_node_set_contains (dfa->inveclosures + node,
2136 					   cls_node)
2137 		    && !re_node_set_contains (dfa->eclosures + node,
2138 					      cls_node))
2139 		  {
2140 		    /* It is against this limitation.
2141 		       Remove it form the current sifted state.  */
2142 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2143 						 candidates);
2144 		    if (BE (err != REG_NOERROR, 0))
2145 		      return err;
2146 		    --node_idx;
2147 		  }
2148 	      }
2149 	}
2150       else /* (ent->subexp_to != str_idx)  */
2151 	{
2152 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2153 	    {
2154 	      Idx node = dest_nodes->elems[node_idx];
2155 	      re_token_type_t type = dfa->nodes[node].type;
2156 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2157 		{
2158 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2159 		    continue;
2160 		  /* It is against this limitation.
2161 		     Remove it form the current sifted state.  */
2162 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2163 					       candidates);
2164 		  if (BE (err != REG_NOERROR, 0))
2165 		    return err;
2166 		}
2167 	    }
2168 	}
2169     }
2170   return REG_NOERROR;
2171 }
2172 
2173 static reg_errcode_t
2174 internal_function __attribute_warn_unused_result__
2175 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2176 		   Idx str_idx, const re_node_set *candidates)
2177 {
2178   const re_dfa_t *const dfa = mctx->dfa;
2179   reg_errcode_t err;
2180   Idx node_idx, node;
2181   re_sift_context_t local_sctx;
2182   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2183 
2184   if (first_idx == REG_MISSING)
2185     return REG_NOERROR;
2186 
2187   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2188 
2189   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2190     {
2191       Idx enabled_idx;
2192       re_token_type_t type;
2193       struct re_backref_cache_entry *entry;
2194       node = candidates->elems[node_idx];
2195       type = dfa->nodes[node].type;
2196       /* Avoid infinite loop for the REs like "()\1+".  */
2197       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2198 	continue;
2199       if (type != OP_BACK_REF)
2200 	continue;
2201 
2202       entry = mctx->bkref_ents + first_idx;
2203       enabled_idx = first_idx;
2204       do
2205 	{
2206 	  Idx subexp_len;
2207 	  Idx to_idx;
2208 	  Idx dst_node;
2209 	  bool ok;
2210 	  re_dfastate_t *cur_state;
2211 
2212 	  if (entry->node != node)
2213 	    continue;
2214 	  subexp_len = entry->subexp_to - entry->subexp_from;
2215 	  to_idx = str_idx + subexp_len;
2216 	  dst_node = (subexp_len ? dfa->nexts[node]
2217 		      : dfa->edests[node].elems[0]);
2218 
2219 	  if (to_idx > sctx->last_str_idx
2220 	      || sctx->sifted_states[to_idx] == NULL
2221 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2222 	      || check_dst_limits (mctx, &sctx->limits, node,
2223 				   str_idx, dst_node, to_idx))
2224 	    continue;
2225 
2226 	  if (local_sctx.sifted_states == NULL)
2227 	    {
2228 	      local_sctx = *sctx;
2229 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2230 	      if (BE (err != REG_NOERROR, 0))
2231 		goto free_return;
2232 	    }
2233 	  local_sctx.last_node = node;
2234 	  local_sctx.last_str_idx = str_idx;
2235 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2236 	  if (BE (! ok, 0))
2237 	    {
2238 	      err = REG_ESPACE;
2239 	      goto free_return;
2240 	    }
2241 	  cur_state = local_sctx.sifted_states[str_idx];
2242 	  err = sift_states_backward (mctx, &local_sctx);
2243 	  if (BE (err != REG_NOERROR, 0))
2244 	    goto free_return;
2245 	  if (sctx->limited_states != NULL)
2246 	    {
2247 	      err = merge_state_array (dfa, sctx->limited_states,
2248 				       local_sctx.sifted_states,
2249 				       str_idx + 1);
2250 	      if (BE (err != REG_NOERROR, 0))
2251 		goto free_return;
2252 	    }
2253 	  local_sctx.sifted_states[str_idx] = cur_state;
2254 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2255 
2256 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2257 	  entry = mctx->bkref_ents + enabled_idx;
2258 	}
2259       while (enabled_idx++, entry++->more);
2260     }
2261   err = REG_NOERROR;
2262  free_return:
2263   if (local_sctx.sifted_states != NULL)
2264     {
2265       re_node_set_free (&local_sctx.limits);
2266     }
2267 
2268   return err;
2269 }
2270 
2271 
2272 #ifdef RE_ENABLE_I18N
2273 static int
2274 internal_function
2275 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2276 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2277 {
2278   const re_dfa_t *const dfa = mctx->dfa;
2279   int naccepted;
2280   /* Check the node can accept `multi byte'.  */
2281   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2282   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2283       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2284 			    dfa->nexts[node_idx]))
2285     /* The node can't accept the `multi byte', or the
2286        destination was already thrown away, then the node
2287        could't accept the current input `multi byte'.   */
2288     naccepted = 0;
2289   /* Otherwise, it is sure that the node could accept
2290      `naccepted' bytes input.  */
2291   return naccepted;
2292 }
2293 #endif /* RE_ENABLE_I18N */
2294 
2295 
2296 /* Functions for state transition.  */
2297 
2298 /* Return the next state to which the current state STATE will transit by
2299    accepting the current input byte, and update STATE_LOG if necessary.
2300    If STATE can accept a multibyte char/collating element/back reference
2301    update the destination of STATE_LOG.  */
2302 
2303 static re_dfastate_t *
2304 internal_function __attribute_warn_unused_result__
2305 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2306 	       re_dfastate_t *state)
2307 {
2308   re_dfastate_t **trtable;
2309   unsigned char ch;
2310 
2311 #ifdef RE_ENABLE_I18N
2312   /* If the current state can accept multibyte.  */
2313   if (BE (state->accept_mb, 0))
2314     {
2315       *err = transit_state_mb (mctx, state);
2316       if (BE (*err != REG_NOERROR, 0))
2317 	return NULL;
2318     }
2319 #endif /* RE_ENABLE_I18N */
2320 
2321   /* Then decide the next state with the single byte.  */
2322 #if 0
2323   if (0)
2324     /* don't use transition table  */
2325     return transit_state_sb (err, mctx, state);
2326 #endif
2327 
2328   /* Use transition table  */
2329   ch = re_string_fetch_byte (&mctx->input);
2330   for (;;)
2331     {
2332       trtable = state->trtable;
2333       if (BE (trtable != NULL, 1))
2334 	return trtable[ch];
2335 
2336       trtable = state->word_trtable;
2337       if (BE (trtable != NULL, 1))
2338 	{
2339 	  unsigned int context;
2340 	  context
2341 	    = re_string_context_at (&mctx->input,
2342 				    re_string_cur_idx (&mctx->input) - 1,
2343 				    mctx->eflags);
2344 	  if (IS_WORD_CONTEXT (context))
2345 	    return trtable[ch + SBC_MAX];
2346 	  else
2347 	    return trtable[ch];
2348 	}
2349 
2350       if (!build_trtable (mctx->dfa, state))
2351 	{
2352 	  *err = REG_ESPACE;
2353 	  return NULL;
2354 	}
2355 
2356       /* Retry, we now have a transition table.  */
2357     }
2358 }
2359 
2360 /* Update the state_log if we need */
2361 static re_dfastate_t *
2362 internal_function
2363 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2364 		      re_dfastate_t *next_state)
2365 {
2366   const re_dfa_t *const dfa = mctx->dfa;
2367   Idx cur_idx = re_string_cur_idx (&mctx->input);
2368 
2369   if (cur_idx > mctx->state_log_top)
2370     {
2371       mctx->state_log[cur_idx] = next_state;
2372       mctx->state_log_top = cur_idx;
2373     }
2374   else if (mctx->state_log[cur_idx] == 0)
2375     {
2376       mctx->state_log[cur_idx] = next_state;
2377     }
2378   else
2379     {
2380       re_dfastate_t *pstate;
2381       unsigned int context;
2382       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2383       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2384 	 the destination of a multibyte char/collating element/
2385 	 back reference.  Then the next state is the union set of
2386 	 these destinations and the results of the transition table.  */
2387       pstate = mctx->state_log[cur_idx];
2388       log_nodes = pstate->entrance_nodes;
2389       if (next_state != NULL)
2390 	{
2391 	  table_nodes = next_state->entrance_nodes;
2392 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2393 					     log_nodes);
2394 	  if (BE (*err != REG_NOERROR, 0))
2395 	    return NULL;
2396 	}
2397       else
2398 	next_nodes = *log_nodes;
2399       /* Note: We already add the nodes of the initial state,
2400 	 then we don't need to add them here.  */
2401 
2402       context = re_string_context_at (&mctx->input,
2403 				      re_string_cur_idx (&mctx->input) - 1,
2404 				      mctx->eflags);
2405       next_state = mctx->state_log[cur_idx]
2406 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2407       /* We don't need to check errors here, since the return value of
2408 	 this function is next_state and ERR is already set.  */
2409 
2410       if (table_nodes != NULL)
2411 	re_node_set_free (&next_nodes);
2412     }
2413 
2414   if (BE (dfa->nbackref, 0) && next_state != NULL)
2415     {
2416       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2417 	 later.  We must check them here, since the back references in the
2418 	 next state might use them.  */
2419       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2420 					cur_idx);
2421       if (BE (*err != REG_NOERROR, 0))
2422 	return NULL;
2423 
2424       /* If the next state has back references.  */
2425       if (next_state->has_backref)
2426 	{
2427 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2428 	  if (BE (*err != REG_NOERROR, 0))
2429 	    return NULL;
2430 	  next_state = mctx->state_log[cur_idx];
2431 	}
2432     }
2433 
2434   return next_state;
2435 }
2436 
2437 /* Skip bytes in the input that correspond to part of a
2438    multi-byte match, then look in the log for a state
2439    from which to restart matching.  */
2440 static re_dfastate_t *
2441 internal_function
2442 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2443 {
2444   re_dfastate_t *cur_state;
2445   do
2446     {
2447       Idx max = mctx->state_log_top;
2448       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2449 
2450       do
2451 	{
2452 	  if (++cur_str_idx > max)
2453 	    return NULL;
2454 	  re_string_skip_bytes (&mctx->input, 1);
2455 	}
2456       while (mctx->state_log[cur_str_idx] == NULL);
2457 
2458       cur_state = merge_state_with_log (err, mctx, NULL);
2459     }
2460   while (*err == REG_NOERROR && cur_state == NULL);
2461   return cur_state;
2462 }
2463 
2464 /* Helper functions for transit_state.  */
2465 
2466 /* From the node set CUR_NODES, pick up the nodes whose types are
2467    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2468    expression. And register them to use them later for evaluating the
2469    correspoding back references.  */
2470 
2471 static reg_errcode_t
2472 internal_function
2473 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2474 			   Idx str_idx)
2475 {
2476   const re_dfa_t *const dfa = mctx->dfa;
2477   Idx node_idx;
2478   reg_errcode_t err;
2479 
2480   /* TODO: This isn't efficient.
2481 	   Because there might be more than one nodes whose types are
2482 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2483 	   nodes.
2484 	   E.g. RE: (a){2}  */
2485   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2486     {
2487       Idx node = cur_nodes->elems[node_idx];
2488       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2489 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2490 	  && (dfa->used_bkref_map
2491 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2492 	{
2493 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2494 	  if (BE (err != REG_NOERROR, 0))
2495 	    return err;
2496 	}
2497     }
2498   return REG_NOERROR;
2499 }
2500 
2501 #if 0
2502 /* Return the next state to which the current state STATE will transit by
2503    accepting the current input byte.  */
2504 
2505 static re_dfastate_t *
2506 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2507 		  re_dfastate_t *state)
2508 {
2509   const re_dfa_t *const dfa = mctx->dfa;
2510   re_node_set next_nodes;
2511   re_dfastate_t *next_state;
2512   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2513   unsigned int context;
2514 
2515   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2516   if (BE (*err != REG_NOERROR, 0))
2517     return NULL;
2518   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2519     {
2520       Idx cur_node = state->nodes.elems[node_cnt];
2521       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2522 	{
2523 	  *err = re_node_set_merge (&next_nodes,
2524 				    dfa->eclosures + dfa->nexts[cur_node]);
2525 	  if (BE (*err != REG_NOERROR, 0))
2526 	    {
2527 	      re_node_set_free (&next_nodes);
2528 	      return NULL;
2529 	    }
2530 	}
2531     }
2532   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2533   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2534   /* We don't need to check errors here, since the return value of
2535      this function is next_state and ERR is already set.  */
2536 
2537   re_node_set_free (&next_nodes);
2538   re_string_skip_bytes (&mctx->input, 1);
2539   return next_state;
2540 }
2541 #endif
2542 
2543 #ifdef RE_ENABLE_I18N
2544 static reg_errcode_t
2545 internal_function
2546 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2547 {
2548   const re_dfa_t *const dfa = mctx->dfa;
2549   reg_errcode_t err;
2550   Idx i;
2551 
2552   for (i = 0; i < pstate->nodes.nelem; ++i)
2553     {
2554       re_node_set dest_nodes, *new_nodes;
2555       Idx cur_node_idx = pstate->nodes.elems[i];
2556       int naccepted;
2557       Idx dest_idx;
2558       unsigned int context;
2559       re_dfastate_t *dest_state;
2560 
2561       if (!dfa->nodes[cur_node_idx].accept_mb)
2562 	continue;
2563 
2564       if (dfa->nodes[cur_node_idx].constraint)
2565 	{
2566 	  context = re_string_context_at (&mctx->input,
2567 					  re_string_cur_idx (&mctx->input),
2568 					  mctx->eflags);
2569 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2570 					   context))
2571 	    continue;
2572 	}
2573 
2574       /* How many bytes the node can accept?  */
2575       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2576 					   re_string_cur_idx (&mctx->input));
2577       if (naccepted == 0)
2578 	continue;
2579 
2580       /* The node can accepts `naccepted' bytes.  */
2581       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2582       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2583 			       : mctx->max_mb_elem_len);
2584       err = clean_state_log_if_needed (mctx, dest_idx);
2585       if (BE (err != REG_NOERROR, 0))
2586 	return err;
2587 #ifdef DEBUG
2588       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2589 #endif
2590       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2591 
2592       dest_state = mctx->state_log[dest_idx];
2593       if (dest_state == NULL)
2594 	dest_nodes = *new_nodes;
2595       else
2596 	{
2597 	  err = re_node_set_init_union (&dest_nodes,
2598 					dest_state->entrance_nodes, new_nodes);
2599 	  if (BE (err != REG_NOERROR, 0))
2600 	    return err;
2601 	}
2602       context = re_string_context_at (&mctx->input, dest_idx - 1,
2603 				      mctx->eflags);
2604       mctx->state_log[dest_idx]
2605 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2606       if (dest_state != NULL)
2607 	re_node_set_free (&dest_nodes);
2608       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2609 	return err;
2610     }
2611   return REG_NOERROR;
2612 }
2613 #endif /* RE_ENABLE_I18N */
2614 
2615 static reg_errcode_t
2616 internal_function
2617 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2618 {
2619   const re_dfa_t *const dfa = mctx->dfa;
2620   reg_errcode_t err;
2621   Idx i;
2622   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2623 
2624   for (i = 0; i < nodes->nelem; ++i)
2625     {
2626       Idx dest_str_idx, prev_nelem, bkc_idx;
2627       Idx node_idx = nodes->elems[i];
2628       unsigned int context;
2629       const re_token_t *node = dfa->nodes + node_idx;
2630       re_node_set *new_dest_nodes;
2631 
2632       /* Check whether `node' is a backreference or not.  */
2633       if (node->type != OP_BACK_REF)
2634 	continue;
2635 
2636       if (node->constraint)
2637 	{
2638 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2639 					  mctx->eflags);
2640 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2641 	    continue;
2642 	}
2643 
2644       /* `node' is a backreference.
2645 	 Check the substring which the substring matched.  */
2646       bkc_idx = mctx->nbkref_ents;
2647       err = get_subexp (mctx, node_idx, cur_str_idx);
2648       if (BE (err != REG_NOERROR, 0))
2649 	goto free_return;
2650 
2651       /* And add the epsilon closures (which is `new_dest_nodes') of
2652 	 the backreference to appropriate state_log.  */
2653 #ifdef DEBUG
2654       assert (dfa->nexts[node_idx] != REG_MISSING);
2655 #endif
2656       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2657 	{
2658 	  Idx subexp_len;
2659 	  re_dfastate_t *dest_state;
2660 	  struct re_backref_cache_entry *bkref_ent;
2661 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2662 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2663 	    continue;
2664 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2665 	  new_dest_nodes = (subexp_len == 0
2666 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2667 			    : dfa->eclosures + dfa->nexts[node_idx]);
2668 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2669 			  - bkref_ent->subexp_from);
2670 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2671 					  mctx->eflags);
2672 	  dest_state = mctx->state_log[dest_str_idx];
2673 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2674 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2675 	  /* Add `new_dest_node' to state_log.  */
2676 	  if (dest_state == NULL)
2677 	    {
2678 	      mctx->state_log[dest_str_idx]
2679 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2680 					    context);
2681 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2682 		      && err != REG_NOERROR, 0))
2683 		goto free_return;
2684 	    }
2685 	  else
2686 	    {
2687 	      re_node_set dest_nodes;
2688 	      err = re_node_set_init_union (&dest_nodes,
2689 					    dest_state->entrance_nodes,
2690 					    new_dest_nodes);
2691 	      if (BE (err != REG_NOERROR, 0))
2692 		{
2693 		  re_node_set_free (&dest_nodes);
2694 		  goto free_return;
2695 		}
2696 	      mctx->state_log[dest_str_idx]
2697 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2698 	      re_node_set_free (&dest_nodes);
2699 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2700 		      && err != REG_NOERROR, 0))
2701 		goto free_return;
2702 	    }
2703 	  /* We need to check recursively if the backreference can epsilon
2704 	     transit.  */
2705 	  if (subexp_len == 0
2706 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2707 	    {
2708 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2709 					       cur_str_idx);
2710 	      if (BE (err != REG_NOERROR, 0))
2711 		goto free_return;
2712 	      err = transit_state_bkref (mctx, new_dest_nodes);
2713 	      if (BE (err != REG_NOERROR, 0))
2714 		goto free_return;
2715 	    }
2716 	}
2717     }
2718   err = REG_NOERROR;
2719  free_return:
2720   return err;
2721 }
2722 
2723 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2724    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2725    Note that we might collect inappropriate candidates here.
2726    However, the cost of checking them strictly here is too high, then we
2727    delay these checking for prune_impossible_nodes().  */
2728 
2729 static reg_errcode_t
2730 internal_function __attribute_warn_unused_result__
2731 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2732 {
2733   const re_dfa_t *const dfa = mctx->dfa;
2734   Idx subexp_num, sub_top_idx;
2735   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2736   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2737   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2738   if (cache_idx != REG_MISSING)
2739     {
2740       const struct re_backref_cache_entry *entry
2741 	= mctx->bkref_ents + cache_idx;
2742       do
2743 	if (entry->node == bkref_node)
2744 	  return REG_NOERROR; /* We already checked it.  */
2745       while (entry++->more);
2746     }
2747 
2748   subexp_num = dfa->nodes[bkref_node].opr.idx;
2749 
2750   /* For each sub expression  */
2751   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2752     {
2753       reg_errcode_t err;
2754       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2755       re_sub_match_last_t *sub_last;
2756       Idx sub_last_idx, sl_str, bkref_str_off;
2757 
2758       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2759 	continue; /* It isn't related.  */
2760 
2761       sl_str = sub_top->str_idx;
2762       bkref_str_off = bkref_str_idx;
2763       /* At first, check the last node of sub expressions we already
2764 	 evaluated.  */
2765       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2766 	{
2767 	  regoff_t sl_str_diff;
2768 	  sub_last = sub_top->lasts[sub_last_idx];
2769 	  sl_str_diff = sub_last->str_idx - sl_str;
2770 	  /* The matched string by the sub expression match with the substring
2771 	     at the back reference?  */
2772 	  if (sl_str_diff > 0)
2773 	    {
2774 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2775 		{
2776 		  /* Not enough chars for a successful match.  */
2777 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2778 		    break;
2779 
2780 		  err = clean_state_log_if_needed (mctx,
2781 						   bkref_str_off
2782 						   + sl_str_diff);
2783 		  if (BE (err != REG_NOERROR, 0))
2784 		    return err;
2785 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2786 		}
2787 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2788 		/* We don't need to search this sub expression any more.  */
2789 		break;
2790 	    }
2791 	  bkref_str_off += sl_str_diff;
2792 	  sl_str += sl_str_diff;
2793 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2794 				bkref_str_idx);
2795 
2796 	  /* Reload buf, since the preceding call might have reallocated
2797 	     the buffer.  */
2798 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2799 
2800 	  if (err == REG_NOMATCH)
2801 	    continue;
2802 	  if (BE (err != REG_NOERROR, 0))
2803 	    return err;
2804 	}
2805 
2806       if (sub_last_idx < sub_top->nlasts)
2807 	continue;
2808       if (sub_last_idx > 0)
2809 	++sl_str;
2810       /* Then, search for the other last nodes of the sub expression.  */
2811       for (; sl_str <= bkref_str_idx; ++sl_str)
2812 	{
2813 	  Idx cls_node;
2814 	  regoff_t sl_str_off;
2815 	  const re_node_set *nodes;
2816 	  sl_str_off = sl_str - sub_top->str_idx;
2817 	  /* The matched string by the sub expression match with the substring
2818 	     at the back reference?  */
2819 	  if (sl_str_off > 0)
2820 	    {
2821 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2822 		{
2823 		  /* If we are at the end of the input, we cannot match.  */
2824 		  if (bkref_str_off >= mctx->input.len)
2825 		    break;
2826 
2827 		  err = extend_buffers (mctx);
2828 		  if (BE (err != REG_NOERROR, 0))
2829 		    return err;
2830 
2831 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2832 		}
2833 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2834 		break; /* We don't need to search this sub expression
2835 			  any more.  */
2836 	    }
2837 	  if (mctx->state_log[sl_str] == NULL)
2838 	    continue;
2839 	  /* Does this state have a ')' of the sub expression?  */
2840 	  nodes = &mctx->state_log[sl_str]->nodes;
2841 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2842 				       OP_CLOSE_SUBEXP);
2843 	  if (cls_node == REG_MISSING)
2844 	    continue; /* No.  */
2845 	  if (sub_top->path == NULL)
2846 	    {
2847 	      sub_top->path = calloc (sizeof (state_array_t),
2848 				      sl_str - sub_top->str_idx + 1);
2849 	      if (sub_top->path == NULL)
2850 		return REG_ESPACE;
2851 	    }
2852 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2853 	     in the current context?  */
2854 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2855 			       sub_top->str_idx, cls_node, sl_str,
2856 			       OP_CLOSE_SUBEXP);
2857 	  if (err == REG_NOMATCH)
2858 	      continue;
2859 	  if (BE (err != REG_NOERROR, 0))
2860 	      return err;
2861 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2862 	  if (BE (sub_last == NULL, 0))
2863 	    return REG_ESPACE;
2864 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2865 				bkref_str_idx);
2866 	  if (err == REG_NOMATCH)
2867 	    continue;
2868 	}
2869     }
2870   return REG_NOERROR;
2871 }
2872 
2873 /* Helper functions for get_subexp().  */
2874 
2875 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2876    If it can arrive, register the sub expression expressed with SUB_TOP
2877    and SUB_LAST.  */
2878 
2879 static reg_errcode_t
2880 internal_function
2881 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2882 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2883 {
2884   reg_errcode_t err;
2885   Idx to_idx;
2886   /* Can the subexpression arrive the back reference?  */
2887   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2888 		       sub_last->str_idx, bkref_node, bkref_str,
2889 		       OP_OPEN_SUBEXP);
2890   if (err != REG_NOERROR)
2891     return err;
2892   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2893 			     sub_last->str_idx);
2894   if (BE (err != REG_NOERROR, 0))
2895     return err;
2896   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2897   return clean_state_log_if_needed (mctx, to_idx);
2898 }
2899 
2900 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2901    Search '(' if FL_OPEN, or search ')' otherwise.
2902    TODO: This function isn't efficient...
2903 	 Because there might be more than one nodes whose types are
2904 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2905 	 nodes.
2906 	 E.g. RE: (a){2}  */
2907 
2908 static Idx
2909 internal_function
2910 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2911 		  Idx subexp_idx, int type)
2912 {
2913   Idx cls_idx;
2914   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2915     {
2916       Idx cls_node = nodes->elems[cls_idx];
2917       const re_token_t *node = dfa->nodes + cls_node;
2918       if (node->type == type
2919 	  && node->opr.idx == subexp_idx)
2920 	return cls_node;
2921     }
2922   return REG_MISSING;
2923 }
2924 
2925 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2926    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2927    heavily reused.
2928    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2929 
2930 static reg_errcode_t
2931 internal_function __attribute_warn_unused_result__
2932 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2933 	       Idx top_str, Idx last_node, Idx last_str, int type)
2934 {
2935   const re_dfa_t *const dfa = mctx->dfa;
2936   reg_errcode_t err = REG_NOERROR;
2937   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2938   re_dfastate_t *cur_state = NULL;
2939   re_node_set *cur_nodes, next_nodes;
2940   re_dfastate_t **backup_state_log;
2941   unsigned int context;
2942 
2943   subexp_num = dfa->nodes[top_node].opr.idx;
2944   /* Extend the buffer if we need.  */
2945   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2946     {
2947       re_dfastate_t **new_array;
2948       Idx old_alloc = path->alloc;
2949       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2950       if (BE (new_alloc < old_alloc, 0)
2951 	  || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2952 	return REG_ESPACE;
2953       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2954       if (BE (new_array == NULL, 0))
2955 	return REG_ESPACE;
2956       path->array = new_array;
2957       path->alloc = new_alloc;
2958       memset (new_array + old_alloc, '\0',
2959 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2960     }
2961 
2962   str_idx = path->next_idx ? path->next_idx : top_str;
2963 
2964   /* Temporary modify MCTX.  */
2965   backup_state_log = mctx->state_log;
2966   backup_cur_idx = mctx->input.cur_idx;
2967   mctx->state_log = path->array;
2968   mctx->input.cur_idx = str_idx;
2969 
2970   /* Setup initial node set.  */
2971   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2972   if (str_idx == top_str)
2973     {
2974       err = re_node_set_init_1 (&next_nodes, top_node);
2975       if (BE (err != REG_NOERROR, 0))
2976 	return err;
2977       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2978       if (BE (err != REG_NOERROR, 0))
2979 	{
2980 	  re_node_set_free (&next_nodes);
2981 	  return err;
2982 	}
2983     }
2984   else
2985     {
2986       cur_state = mctx->state_log[str_idx];
2987       if (cur_state && cur_state->has_backref)
2988 	{
2989 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2990 	  if (BE (err != REG_NOERROR, 0))
2991 	    return err;
2992 	}
2993       else
2994 	re_node_set_init_empty (&next_nodes);
2995     }
2996   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2997     {
2998       if (next_nodes.nelem)
2999 	{
3000 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3001 				    subexp_num, type);
3002 	  if (BE (err != REG_NOERROR, 0))
3003 	    {
3004 	      re_node_set_free (&next_nodes);
3005 	      return err;
3006 	    }
3007 	}
3008       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3009       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3010 	{
3011 	  re_node_set_free (&next_nodes);
3012 	  return err;
3013 	}
3014       mctx->state_log[str_idx] = cur_state;
3015     }
3016 
3017   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3018     {
3019       re_node_set_empty (&next_nodes);
3020       if (mctx->state_log[str_idx + 1])
3021 	{
3022 	  err = re_node_set_merge (&next_nodes,
3023 				   &mctx->state_log[str_idx + 1]->nodes);
3024 	  if (BE (err != REG_NOERROR, 0))
3025 	    {
3026 	      re_node_set_free (&next_nodes);
3027 	      return err;
3028 	    }
3029 	}
3030       if (cur_state)
3031 	{
3032 	  err = check_arrival_add_next_nodes (mctx, str_idx,
3033 					      &cur_state->non_eps_nodes,
3034 					      &next_nodes);
3035 	  if (BE (err != REG_NOERROR, 0))
3036 	    {
3037 	      re_node_set_free (&next_nodes);
3038 	      return err;
3039 	    }
3040 	}
3041       ++str_idx;
3042       if (next_nodes.nelem)
3043 	{
3044 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3045 	  if (BE (err != REG_NOERROR, 0))
3046 	    {
3047 	      re_node_set_free (&next_nodes);
3048 	      return err;
3049 	    }
3050 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3051 				    subexp_num, type);
3052 	  if (BE (err != REG_NOERROR, 0))
3053 	    {
3054 	      re_node_set_free (&next_nodes);
3055 	      return err;
3056 	    }
3057 	}
3058       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3059       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3060       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3061 	{
3062 	  re_node_set_free (&next_nodes);
3063 	  return err;
3064 	}
3065       mctx->state_log[str_idx] = cur_state;
3066       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3067     }
3068   re_node_set_free (&next_nodes);
3069   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3070 	       : &mctx->state_log[last_str]->nodes);
3071   path->next_idx = str_idx;
3072 
3073   /* Fix MCTX.  */
3074   mctx->state_log = backup_state_log;
3075   mctx->input.cur_idx = backup_cur_idx;
3076 
3077   /* Then check the current node set has the node LAST_NODE.  */
3078   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3079     return REG_NOERROR;
3080 
3081   return REG_NOMATCH;
3082 }
3083 
3084 /* Helper functions for check_arrival.  */
3085 
3086 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3087    to NEXT_NODES.
3088    TODO: This function is similar to the functions transit_state*(),
3089 	 however this function has many additional works.
3090 	 Can't we unify them?  */
3091 
3092 static reg_errcode_t
3093 internal_function __attribute_warn_unused_result__
3094 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3095 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3096 {
3097   const re_dfa_t *const dfa = mctx->dfa;
3098   bool ok;
3099   Idx cur_idx;
3100 #ifdef RE_ENABLE_I18N
3101   reg_errcode_t err = REG_NOERROR;
3102 #endif
3103   re_node_set union_set;
3104   re_node_set_init_empty (&union_set);
3105   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3106     {
3107       int naccepted = 0;
3108       Idx cur_node = cur_nodes->elems[cur_idx];
3109 #ifdef DEBUG
3110       re_token_type_t type = dfa->nodes[cur_node].type;
3111       assert (!IS_EPSILON_NODE (type));
3112 #endif
3113 #ifdef RE_ENABLE_I18N
3114       /* If the node may accept `multi byte'.  */
3115       if (dfa->nodes[cur_node].accept_mb)
3116 	{
3117 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3118 					       str_idx);
3119 	  if (naccepted > 1)
3120 	    {
3121 	      re_dfastate_t *dest_state;
3122 	      Idx next_node = dfa->nexts[cur_node];
3123 	      Idx next_idx = str_idx + naccepted;
3124 	      dest_state = mctx->state_log[next_idx];
3125 	      re_node_set_empty (&union_set);
3126 	      if (dest_state)
3127 		{
3128 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3129 		  if (BE (err != REG_NOERROR, 0))
3130 		    {
3131 		      re_node_set_free (&union_set);
3132 		      return err;
3133 		    }
3134 		}
3135 	      ok = re_node_set_insert (&union_set, next_node);
3136 	      if (BE (! ok, 0))
3137 		{
3138 		  re_node_set_free (&union_set);
3139 		  return REG_ESPACE;
3140 		}
3141 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3142 							    &union_set);
3143 	      if (BE (mctx->state_log[next_idx] == NULL
3144 		      && err != REG_NOERROR, 0))
3145 		{
3146 		  re_node_set_free (&union_set);
3147 		  return err;
3148 		}
3149 	    }
3150 	}
3151 #endif /* RE_ENABLE_I18N */
3152       if (naccepted
3153 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3154 	{
3155 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3156 	  if (BE (! ok, 0))
3157 	    {
3158 	      re_node_set_free (&union_set);
3159 	      return REG_ESPACE;
3160 	    }
3161 	}
3162     }
3163   re_node_set_free (&union_set);
3164   return REG_NOERROR;
3165 }
3166 
3167 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3168    CUR_NODES, however exclude the nodes which are:
3169     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3170     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3171 */
3172 
3173 static reg_errcode_t
3174 internal_function
3175 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3176 			  Idx ex_subexp, int type)
3177 {
3178   reg_errcode_t err;
3179   Idx idx, outside_node;
3180   re_node_set new_nodes;
3181 #ifdef DEBUG
3182   assert (cur_nodes->nelem);
3183 #endif
3184   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3185   if (BE (err != REG_NOERROR, 0))
3186     return err;
3187   /* Create a new node set NEW_NODES with the nodes which are epsilon
3188      closures of the node in CUR_NODES.  */
3189 
3190   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3191     {
3192       Idx cur_node = cur_nodes->elems[idx];
3193       const re_node_set *eclosure = dfa->eclosures + cur_node;
3194       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3195       if (outside_node == REG_MISSING)
3196 	{
3197 	  /* There are no problematic nodes, just merge them.  */
3198 	  err = re_node_set_merge (&new_nodes, eclosure);
3199 	  if (BE (err != REG_NOERROR, 0))
3200 	    {
3201 	      re_node_set_free (&new_nodes);
3202 	      return err;
3203 	    }
3204 	}
3205       else
3206 	{
3207 	  /* There are problematic nodes, re-calculate incrementally.  */
3208 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3209 					      ex_subexp, type);
3210 	  if (BE (err != REG_NOERROR, 0))
3211 	    {
3212 	      re_node_set_free (&new_nodes);
3213 	      return err;
3214 	    }
3215 	}
3216     }
3217   re_node_set_free (cur_nodes);
3218   *cur_nodes = new_nodes;
3219   return REG_NOERROR;
3220 }
3221 
3222 /* Helper function for check_arrival_expand_ecl.
3223    Check incrementally the epsilon closure of TARGET, and if it isn't
3224    problematic append it to DST_NODES.  */
3225 
3226 static reg_errcode_t
3227 internal_function __attribute_warn_unused_result__
3228 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3229 			      Idx target, Idx ex_subexp, int type)
3230 {
3231   Idx cur_node;
3232   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3233     {
3234       bool ok;
3235 
3236       if (dfa->nodes[cur_node].type == type
3237 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3238 	{
3239 	  if (type == OP_CLOSE_SUBEXP)
3240 	    {
3241 	      ok = re_node_set_insert (dst_nodes, cur_node);
3242 	      if (BE (! ok, 0))
3243 		return REG_ESPACE;
3244 	    }
3245 	  break;
3246 	}
3247       ok = re_node_set_insert (dst_nodes, cur_node);
3248       if (BE (! ok, 0))
3249 	return REG_ESPACE;
3250       if (dfa->edests[cur_node].nelem == 0)
3251 	break;
3252       if (dfa->edests[cur_node].nelem == 2)
3253 	{
3254 	  reg_errcode_t err;
3255 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3256 					      dfa->edests[cur_node].elems[1],
3257 					      ex_subexp, type);
3258 	  if (BE (err != REG_NOERROR, 0))
3259 	    return err;
3260 	}
3261       cur_node = dfa->edests[cur_node].elems[0];
3262     }
3263   return REG_NOERROR;
3264 }
3265 
3266 
3267 /* For all the back references in the current state, calculate the
3268    destination of the back references by the appropriate entry
3269    in MCTX->BKREF_ENTS.  */
3270 
3271 static reg_errcode_t
3272 internal_function __attribute_warn_unused_result__
3273 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3274 		    Idx cur_str, Idx subexp_num, int type)
3275 {
3276   const re_dfa_t *const dfa = mctx->dfa;
3277   reg_errcode_t err;
3278   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3279   struct re_backref_cache_entry *ent;
3280 
3281   if (cache_idx_start == REG_MISSING)
3282     return REG_NOERROR;
3283 
3284  restart:
3285   ent = mctx->bkref_ents + cache_idx_start;
3286   do
3287     {
3288       Idx to_idx, next_node;
3289 
3290       /* Is this entry ENT is appropriate?  */
3291       if (!re_node_set_contains (cur_nodes, ent->node))
3292 	continue; /* No.  */
3293 
3294       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3295       /* Calculate the destination of the back reference, and append it
3296 	 to MCTX->STATE_LOG.  */
3297       if (to_idx == cur_str)
3298 	{
3299 	  /* The backreference did epsilon transit, we must re-check all the
3300 	     node in the current state.  */
3301 	  re_node_set new_dests;
3302 	  reg_errcode_t err2, err3;
3303 	  next_node = dfa->edests[ent->node].elems[0];
3304 	  if (re_node_set_contains (cur_nodes, next_node))
3305 	    continue;
3306 	  err = re_node_set_init_1 (&new_dests, next_node);
3307 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3308 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3309 	  re_node_set_free (&new_dests);
3310 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3311 		  || err3 != REG_NOERROR, 0))
3312 	    {
3313 	      err = (err != REG_NOERROR ? err
3314 		     : (err2 != REG_NOERROR ? err2 : err3));
3315 	      return err;
3316 	    }
3317 	  /* TODO: It is still inefficient...  */
3318 	  goto restart;
3319 	}
3320       else
3321 	{
3322 	  re_node_set union_set;
3323 	  next_node = dfa->nexts[ent->node];
3324 	  if (mctx->state_log[to_idx])
3325 	    {
3326 	      bool ok;
3327 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3328 					next_node))
3329 		continue;
3330 	      err = re_node_set_init_copy (&union_set,
3331 					   &mctx->state_log[to_idx]->nodes);
3332 	      ok = re_node_set_insert (&union_set, next_node);
3333 	      if (BE (err != REG_NOERROR || ! ok, 0))
3334 		{
3335 		  re_node_set_free (&union_set);
3336 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3337 		  return err;
3338 		}
3339 	    }
3340 	  else
3341 	    {
3342 	      err = re_node_set_init_1 (&union_set, next_node);
3343 	      if (BE (err != REG_NOERROR, 0))
3344 		return err;
3345 	    }
3346 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3347 	  re_node_set_free (&union_set);
3348 	  if (BE (mctx->state_log[to_idx] == NULL
3349 		  && err != REG_NOERROR, 0))
3350 	    return err;
3351 	}
3352     }
3353   while (ent++->more);
3354   return REG_NOERROR;
3355 }
3356 
3357 /* Build transition table for the state.
3358    Return true if successful.  */
3359 
3360 static bool
3361 internal_function
3362 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3363 {
3364   reg_errcode_t err;
3365   Idx i, j;
3366   int ch;
3367   bool need_word_trtable = false;
3368   bitset_word_t elem, mask;
3369   bool dests_node_malloced = false;
3370   bool dest_states_malloced = false;
3371   Idx ndests; /* Number of the destination states from `state'.  */
3372   re_dfastate_t **trtable;
3373   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3374   re_node_set follows, *dests_node;
3375   bitset_t *dests_ch;
3376   bitset_t acceptable;
3377 
3378   struct dests_alloc
3379   {
3380     re_node_set dests_node[SBC_MAX];
3381     bitset_t dests_ch[SBC_MAX];
3382   } *dests_alloc;
3383 
3384   /* We build DFA states which corresponds to the destination nodes
3385      from `state'.  `dests_node[i]' represents the nodes which i-th
3386      destination state contains, and `dests_ch[i]' represents the
3387      characters which i-th destination state accepts.  */
3388   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3389     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3390   else
3391     {
3392       dests_alloc = re_malloc (struct dests_alloc, 1);
3393       if (BE (dests_alloc == NULL, 0))
3394 	return false;
3395       dests_node_malloced = true;
3396     }
3397   dests_node = dests_alloc->dests_node;
3398   dests_ch = dests_alloc->dests_ch;
3399 
3400   /* Initialize transiton table.  */
3401   state->word_trtable = state->trtable = NULL;
3402 
3403   /* At first, group all nodes belonging to `state' into several
3404      destinations.  */
3405   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3406   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3407     {
3408       if (dests_node_malloced)
3409 	free (dests_alloc);
3410       if (ndests == 0)
3411 	{
3412 	  state->trtable = (re_dfastate_t **)
3413 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3414           if (BE (state->trtable == NULL, 0))
3415             return false;
3416 	  return true;
3417 	}
3418       return false;
3419     }
3420 
3421   err = re_node_set_alloc (&follows, ndests + 1);
3422   if (BE (err != REG_NOERROR, 0))
3423     goto out_free;
3424 
3425   /* Avoid arithmetic overflow in size calculation.  */
3426   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3427 	    / (3 * sizeof (re_dfastate_t *)))
3428 	   < ndests),
3429 	  0))
3430     goto out_free;
3431 
3432   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3433 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3434     dest_states = (re_dfastate_t **)
3435       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3436   else
3437     {
3438       dest_states = (re_dfastate_t **)
3439 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3440       if (BE (dest_states == NULL, 0))
3441 	{
3442 out_free:
3443 	  if (dest_states_malloced)
3444 	    free (dest_states);
3445 	  re_node_set_free (&follows);
3446 	  for (i = 0; i < ndests; ++i)
3447 	    re_node_set_free (dests_node + i);
3448 	  if (dests_node_malloced)
3449 	    free (dests_alloc);
3450 	  return false;
3451 	}
3452       dest_states_malloced = true;
3453     }
3454   dest_states_word = dest_states + ndests;
3455   dest_states_nl = dest_states_word + ndests;
3456   bitset_empty (acceptable);
3457 
3458   /* Then build the states for all destinations.  */
3459   for (i = 0; i < ndests; ++i)
3460     {
3461       Idx next_node;
3462       re_node_set_empty (&follows);
3463       /* Merge the follows of this destination states.  */
3464       for (j = 0; j < dests_node[i].nelem; ++j)
3465 	{
3466 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3467 	  if (next_node != REG_MISSING)
3468 	    {
3469 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3470 	      if (BE (err != REG_NOERROR, 0))
3471 		goto out_free;
3472 	    }
3473 	}
3474       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3475       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3476 	goto out_free;
3477       /* If the new state has context constraint,
3478 	 build appropriate states for these contexts.  */
3479       if (dest_states[i]->has_constraint)
3480 	{
3481 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3482 							  CONTEXT_WORD);
3483 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3484 	    goto out_free;
3485 
3486 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3487 	    need_word_trtable = true;
3488 
3489 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3490 							CONTEXT_NEWLINE);
3491 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3492 	    goto out_free;
3493 	}
3494       else
3495 	{
3496 	  dest_states_word[i] = dest_states[i];
3497 	  dest_states_nl[i] = dest_states[i];
3498 	}
3499       bitset_merge (acceptable, dests_ch[i]);
3500     }
3501 
3502   if (!BE (need_word_trtable, 0))
3503     {
3504       /* We don't care about whether the following character is a word
3505 	 character, or we are in a single-byte character set so we can
3506 	 discern by looking at the character code: allocate a
3507 	 256-entry transition table.  */
3508       trtable = state->trtable =
3509 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3510       if (BE (trtable == NULL, 0))
3511 	goto out_free;
3512 
3513       /* For all characters ch...:  */
3514       for (i = 0; i < BITSET_WORDS; ++i)
3515 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3516 	     elem;
3517 	     mask <<= 1, elem >>= 1, ++ch)
3518 	  if (BE (elem & 1, 0))
3519 	    {
3520 	      /* There must be exactly one destination which accepts
3521 		 character ch.  See group_nodes_into_DFAstates.  */
3522 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3523 		;
3524 
3525 	      /* j-th destination accepts the word character ch.  */
3526 	      if (dfa->word_char[i] & mask)
3527 		trtable[ch] = dest_states_word[j];
3528 	      else
3529 		trtable[ch] = dest_states[j];
3530 	    }
3531     }
3532   else
3533     {
3534       /* We care about whether the following character is a word
3535 	 character, and we are in a multi-byte character set: discern
3536 	 by looking at the character code: build two 256-entry
3537 	 transition tables, one starting at trtable[0] and one
3538 	 starting at trtable[SBC_MAX].  */
3539       trtable = state->word_trtable =
3540 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3541       if (BE (trtable == NULL, 0))
3542 	goto out_free;
3543 
3544       /* For all characters ch...:  */
3545       for (i = 0; i < BITSET_WORDS; ++i)
3546 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3547 	     elem;
3548 	     mask <<= 1, elem >>= 1, ++ch)
3549 	  if (BE (elem & 1, 0))
3550 	    {
3551 	      /* There must be exactly one destination which accepts
3552 		 character ch.  See group_nodes_into_DFAstates.  */
3553 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3554 		;
3555 
3556 	      /* j-th destination accepts the word character ch.  */
3557 	      trtable[ch] = dest_states[j];
3558 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3559 	    }
3560     }
3561 
3562   /* new line */
3563   if (bitset_contain (acceptable, NEWLINE_CHAR))
3564     {
3565       /* The current state accepts newline character.  */
3566       for (j = 0; j < ndests; ++j)
3567 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3568 	  {
3569 	    /* k-th destination accepts newline character.  */
3570 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3571 	    if (need_word_trtable)
3572 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3573 	    /* There must be only one destination which accepts
3574 	       newline.  See group_nodes_into_DFAstates.  */
3575 	    break;
3576 	  }
3577     }
3578 
3579   if (dest_states_malloced)
3580     free (dest_states);
3581 
3582   re_node_set_free (&follows);
3583   for (i = 0; i < ndests; ++i)
3584     re_node_set_free (dests_node + i);
3585 
3586   if (dests_node_malloced)
3587     free (dests_alloc);
3588 
3589   return true;
3590 }
3591 
3592 /* Group all nodes belonging to STATE into several destinations.
3593    Then for all destinations, set the nodes belonging to the destination
3594    to DESTS_NODE[i] and set the characters accepted by the destination
3595    to DEST_CH[i].  This function return the number of destinations.  */
3596 
3597 static Idx
3598 internal_function
3599 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3600 			    re_node_set *dests_node, bitset_t *dests_ch)
3601 {
3602   reg_errcode_t err;
3603   bool ok;
3604   Idx i, j, k;
3605   Idx ndests; /* Number of the destinations from `state'.  */
3606   bitset_t accepts; /* Characters a node can accept.  */
3607   const re_node_set *cur_nodes = &state->nodes;
3608   bitset_empty (accepts);
3609   ndests = 0;
3610 
3611   /* For all the nodes belonging to `state',  */
3612   for (i = 0; i < cur_nodes->nelem; ++i)
3613     {
3614       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3615       re_token_type_t type = node->type;
3616       unsigned int constraint = node->constraint;
3617 
3618       /* Enumerate all single byte character this node can accept.  */
3619       if (type == CHARACTER)
3620 	bitset_set (accepts, node->opr.c);
3621       else if (type == SIMPLE_BRACKET)
3622 	{
3623 	  bitset_merge (accepts, node->opr.sbcset);
3624 	}
3625       else if (type == OP_PERIOD)
3626 	{
3627 #ifdef RE_ENABLE_I18N
3628 	  if (dfa->mb_cur_max > 1)
3629 	    bitset_merge (accepts, dfa->sb_char);
3630 	  else
3631 #endif
3632 	    bitset_set_all (accepts);
3633 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3634 	    bitset_clear (accepts, '\n');
3635 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3636 	    bitset_clear (accepts, '\0');
3637 	}
3638 #ifdef RE_ENABLE_I18N
3639       else if (type == OP_UTF8_PERIOD)
3640 	{
3641 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3642 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3643 	  else
3644 	    bitset_merge (accepts, utf8_sb_map);
3645 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3646 	    bitset_clear (accepts, '\n');
3647 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3648 	    bitset_clear (accepts, '\0');
3649 	}
3650 #endif
3651       else
3652 	continue;
3653 
3654       /* Check the `accepts' and sift the characters which are not
3655 	 match it the context.  */
3656       if (constraint)
3657 	{
3658 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3659 	    {
3660 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3661 	      bitset_empty (accepts);
3662 	      if (accepts_newline)
3663 		bitset_set (accepts, NEWLINE_CHAR);
3664 	      else
3665 		continue;
3666 	    }
3667 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3668 	    {
3669 	      bitset_empty (accepts);
3670 	      continue;
3671 	    }
3672 
3673 	  if (constraint & NEXT_WORD_CONSTRAINT)
3674 	    {
3675 	      bitset_word_t any_set = 0;
3676 	      if (type == CHARACTER && !node->word_char)
3677 		{
3678 		  bitset_empty (accepts);
3679 		  continue;
3680 		}
3681 #ifdef RE_ENABLE_I18N
3682 	      if (dfa->mb_cur_max > 1)
3683 		for (j = 0; j < BITSET_WORDS; ++j)
3684 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3685 	      else
3686 #endif
3687 		for (j = 0; j < BITSET_WORDS; ++j)
3688 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3689 	      if (!any_set)
3690 		continue;
3691 	    }
3692 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3693 	    {
3694 	      bitset_word_t any_set = 0;
3695 	      if (type == CHARACTER && node->word_char)
3696 		{
3697 		  bitset_empty (accepts);
3698 		  continue;
3699 		}
3700 #ifdef RE_ENABLE_I18N
3701 	      if (dfa->mb_cur_max > 1)
3702 		for (j = 0; j < BITSET_WORDS; ++j)
3703 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3704 	      else
3705 #endif
3706 		for (j = 0; j < BITSET_WORDS; ++j)
3707 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3708 	      if (!any_set)
3709 		continue;
3710 	    }
3711 	}
3712 
3713       /* Then divide `accepts' into DFA states, or create a new
3714 	 state.  Above, we make sure that accepts is not empty.  */
3715       for (j = 0; j < ndests; ++j)
3716 	{
3717 	  bitset_t intersec; /* Intersection sets, see below.  */
3718 	  bitset_t remains;
3719 	  /* Flags, see below.  */
3720 	  bitset_word_t has_intersec, not_subset, not_consumed;
3721 
3722 	  /* Optimization, skip if this state doesn't accept the character.  */
3723 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3724 	    continue;
3725 
3726 	  /* Enumerate the intersection set of this state and `accepts'.  */
3727 	  has_intersec = 0;
3728 	  for (k = 0; k < BITSET_WORDS; ++k)
3729 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3730 	  /* And skip if the intersection set is empty.  */
3731 	  if (!has_intersec)
3732 	    continue;
3733 
3734 	  /* Then check if this state is a subset of `accepts'.  */
3735 	  not_subset = not_consumed = 0;
3736 	  for (k = 0; k < BITSET_WORDS; ++k)
3737 	    {
3738 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3739 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3740 	    }
3741 
3742 	  /* If this state isn't a subset of `accepts', create a
3743 	     new group state, which has the `remains'. */
3744 	  if (not_subset)
3745 	    {
3746 	      bitset_copy (dests_ch[ndests], remains);
3747 	      bitset_copy (dests_ch[j], intersec);
3748 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3749 	      if (BE (err != REG_NOERROR, 0))
3750 		goto error_return;
3751 	      ++ndests;
3752 	    }
3753 
3754 	  /* Put the position in the current group. */
3755 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3756 	  if (BE (! ok, 0))
3757 	    goto error_return;
3758 
3759 	  /* If all characters are consumed, go to next node. */
3760 	  if (!not_consumed)
3761 	    break;
3762 	}
3763       /* Some characters remain, create a new group. */
3764       if (j == ndests)
3765 	{
3766 	  bitset_copy (dests_ch[ndests], accepts);
3767 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3768 	  if (BE (err != REG_NOERROR, 0))
3769 	    goto error_return;
3770 	  ++ndests;
3771 	  bitset_empty (accepts);
3772 	}
3773     }
3774   return ndests;
3775  error_return:
3776   for (j = 0; j < ndests; ++j)
3777     re_node_set_free (dests_node + j);
3778   return REG_MISSING;
3779 }
3780 
3781 #ifdef RE_ENABLE_I18N
3782 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3783    Return the number of the bytes the node accepts.
3784    STR_IDX is the current index of the input string.
3785 
3786    This function handles the nodes which can accept one character, or
3787    one collating element like '.', '[a-z]', opposite to the other nodes
3788    can only accept one byte.  */
3789 
3790 static int
3791 internal_function
3792 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3793 			 const re_string_t *input, Idx str_idx)
3794 {
3795   const re_token_t *node = dfa->nodes + node_idx;
3796   int char_len, elem_len;
3797   Idx i;
3798 
3799   if (BE (node->type == OP_UTF8_PERIOD, 0))
3800     {
3801       unsigned char c = re_string_byte_at (input, str_idx), d;
3802       if (BE (c < 0xc2, 1))
3803 	return 0;
3804 
3805       if (str_idx + 2 > input->len)
3806 	return 0;
3807 
3808       d = re_string_byte_at (input, str_idx + 1);
3809       if (c < 0xe0)
3810 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3811       else if (c < 0xf0)
3812 	{
3813 	  char_len = 3;
3814 	  if (c == 0xe0 && d < 0xa0)
3815 	    return 0;
3816 	}
3817       else if (c < 0xf8)
3818 	{
3819 	  char_len = 4;
3820 	  if (c == 0xf0 && d < 0x90)
3821 	    return 0;
3822 	}
3823       else if (c < 0xfc)
3824 	{
3825 	  char_len = 5;
3826 	  if (c == 0xf8 && d < 0x88)
3827 	    return 0;
3828 	}
3829       else if (c < 0xfe)
3830 	{
3831 	  char_len = 6;
3832 	  if (c == 0xfc && d < 0x84)
3833 	    return 0;
3834 	}
3835       else
3836 	return 0;
3837 
3838       if (str_idx + char_len > input->len)
3839 	return 0;
3840 
3841       for (i = 1; i < char_len; ++i)
3842 	{
3843 	  d = re_string_byte_at (input, str_idx + i);
3844 	  if (d < 0x80 || d > 0xbf)
3845 	    return 0;
3846 	}
3847       return char_len;
3848     }
3849 
3850   char_len = re_string_char_size_at (input, str_idx);
3851   if (node->type == OP_PERIOD)
3852     {
3853       if (char_len <= 1)
3854 	return 0;
3855       /* FIXME: I don't think this if is needed, as both '\n'
3856 	 and '\0' are char_len == 1.  */
3857       /* '.' accepts any one character except the following two cases.  */
3858       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3859 	   re_string_byte_at (input, str_idx) == '\n') ||
3860 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3861 	   re_string_byte_at (input, str_idx) == '\0'))
3862 	return 0;
3863       return char_len;
3864     }
3865 
3866   elem_len = re_string_elem_size_at (input, str_idx);
3867   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3868     return 0;
3869 
3870   if (node->type == COMPLEX_BRACKET)
3871     {
3872       const re_charset_t *cset = node->opr.mbcset;
3873 # ifdef _LIBC
3874       const unsigned char *pin
3875 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3876       Idx j;
3877       uint32_t nrules;
3878 # endif /* _LIBC */
3879       int match_len = 0;
3880       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3881 		    ? re_string_wchar_at (input, str_idx) : 0);
3882 
3883       /* match with multibyte character?  */
3884       for (i = 0; i < cset->nmbchars; ++i)
3885 	if (wc == cset->mbchars[i])
3886 	  {
3887 	    match_len = char_len;
3888 	    goto check_node_accept_bytes_match;
3889 	  }
3890       /* match with character_class?  */
3891       for (i = 0; i < cset->nchar_classes; ++i)
3892 	{
3893 	  wctype_t wt = cset->char_classes[i];
3894 	  if (__iswctype (wc, wt))
3895 	    {
3896 	      match_len = char_len;
3897 	      goto check_node_accept_bytes_match;
3898 	    }
3899 	}
3900 
3901 # ifdef _LIBC
3902       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3903       if (nrules != 0)
3904 	{
3905 	  unsigned int in_collseq = 0;
3906 	  const int32_t *table, *indirect;
3907 	  const unsigned char *weights, *extra;
3908 	  const char *collseqwc;
3909 	  int32_t idx;
3910 	  /* This #include defines a local function!  */
3911 #  include <locale/weight.h>
3912 
3913 	  /* match with collating_symbol?  */
3914 	  if (cset->ncoll_syms)
3915 	    extra = (const unsigned char *)
3916 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3917 	  for (i = 0; i < cset->ncoll_syms; ++i)
3918 	    {
3919 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3920 	      /* Compare the length of input collating element and
3921 		 the length of current collating element.  */
3922 	      if (*coll_sym != elem_len)
3923 		continue;
3924 	      /* Compare each bytes.  */
3925 	      for (j = 0; j < *coll_sym; j++)
3926 		if (pin[j] != coll_sym[1 + j])
3927 		  break;
3928 	      if (j == *coll_sym)
3929 		{
3930 		  /* Match if every bytes is equal.  */
3931 		  match_len = j;
3932 		  goto check_node_accept_bytes_match;
3933 		}
3934 	    }
3935 
3936 	  if (cset->nranges)
3937 	    {
3938 	      if (elem_len <= char_len)
3939 		{
3940 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3941 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3942 		}
3943 	      else
3944 		in_collseq = find_collation_sequence_value (pin, elem_len);
3945 	    }
3946 	  /* match with range expression?  */
3947 	  for (i = 0; i < cset->nranges; ++i)
3948 	    if (cset->range_starts[i] <= in_collseq
3949 		&& in_collseq <= cset->range_ends[i])
3950 	      {
3951 		match_len = elem_len;
3952 		goto check_node_accept_bytes_match;
3953 	      }
3954 
3955 	  /* match with equivalence_class?  */
3956 	  if (cset->nequiv_classes)
3957 	    {
3958 	      const unsigned char *cp = pin;
3959 	      table = (const int32_t *)
3960 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3961 	      weights = (const unsigned char *)
3962 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3963 	      extra = (const unsigned char *)
3964 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3965 	      indirect = (const int32_t *)
3966 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3967 	      int32_t idx = findidx (&cp);
3968 	      if (idx > 0)
3969 		for (i = 0; i < cset->nequiv_classes; ++i)
3970 		  {
3971 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3972 		    size_t weight_len = weights[idx & 0xffffff];
3973 		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3974 			&& (idx >> 24) == (equiv_class_idx >> 24))
3975 		      {
3976 			Idx cnt = 0;
3977 
3978 			idx &= 0xffffff;
3979 			equiv_class_idx &= 0xffffff;
3980 
3981 			while (cnt <= weight_len
3982 			       && (weights[equiv_class_idx + 1 + cnt]
3983 				   == weights[idx + 1 + cnt]))
3984 			  ++cnt;
3985 			if (cnt > weight_len)
3986 			  {
3987 			    match_len = elem_len;
3988 			    goto check_node_accept_bytes_match;
3989 			  }
3990 		      }
3991 		  }
3992 	    }
3993 	}
3994       else
3995 # endif /* _LIBC */
3996 	{
3997 	  /* match with range expression?  */
3998 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3999 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
4000 #else
4001 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
4002 	  cmp_buf[2] = wc;
4003 #endif
4004 	  for (i = 0; i < cset->nranges; ++i)
4005 	    {
4006 	      cmp_buf[0] = cset->range_starts[i];
4007 	      cmp_buf[4] = cset->range_ends[i];
4008 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
4009 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
4010 		{
4011 		  match_len = char_len;
4012 		  goto check_node_accept_bytes_match;
4013 		}
4014 	    }
4015 	}
4016     check_node_accept_bytes_match:
4017       if (!cset->non_match)
4018 	return match_len;
4019       else
4020 	{
4021 	  if (match_len > 0)
4022 	    return 0;
4023 	  else
4024 	    return (elem_len > char_len) ? elem_len : char_len;
4025 	}
4026     }
4027   return 0;
4028 }
4029 
4030 # ifdef _LIBC
4031 static unsigned int
4032 internal_function
4033 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4034 {
4035   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4036   if (nrules == 0)
4037     {
4038       if (mbs_len == 1)
4039 	{
4040 	  /* No valid character.  Match it as a single byte character.  */
4041 	  const unsigned char *collseq = (const unsigned char *)
4042 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4043 	  return collseq[mbs[0]];
4044 	}
4045       return UINT_MAX;
4046     }
4047   else
4048     {
4049       int32_t idx;
4050       const unsigned char *extra = (const unsigned char *)
4051 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4052       int32_t extrasize = (const unsigned char *)
4053 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4054 
4055       for (idx = 0; idx < extrasize;)
4056 	{
4057 	  int mbs_cnt;
4058 	  bool found = false;
4059 	  int32_t elem_mbs_len;
4060 	  /* Skip the name of collating element name.  */
4061 	  idx = idx + extra[idx] + 1;
4062 	  elem_mbs_len = extra[idx++];
4063 	  if (mbs_len == elem_mbs_len)
4064 	    {
4065 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4066 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4067 		  break;
4068 	      if (mbs_cnt == elem_mbs_len)
4069 		/* Found the entry.  */
4070 		found = true;
4071 	    }
4072 	  /* Skip the byte sequence of the collating element.  */
4073 	  idx += elem_mbs_len;
4074 	  /* Adjust for the alignment.  */
4075 	  idx = (idx + 3) & ~3;
4076 	  /* Skip the collation sequence value.  */
4077 	  idx += sizeof (uint32_t);
4078 	  /* Skip the wide char sequence of the collating element.  */
4079 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4080 	  /* If we found the entry, return the sequence value.  */
4081 	  if (found)
4082 	    return *(uint32_t *) (extra + idx);
4083 	  /* Skip the collation sequence value.  */
4084 	  idx += sizeof (uint32_t);
4085 	}
4086       return UINT_MAX;
4087     }
4088 }
4089 # endif /* _LIBC */
4090 #endif /* RE_ENABLE_I18N */
4091 
4092 /* Check whether the node accepts the byte which is IDX-th
4093    byte of the INPUT.  */
4094 
4095 static bool
4096 internal_function
4097 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4098 		   Idx idx)
4099 {
4100   unsigned char ch;
4101   ch = re_string_byte_at (&mctx->input, idx);
4102   switch (node->type)
4103     {
4104     case CHARACTER:
4105       if (node->opr.c != ch)
4106         return false;
4107       break;
4108 
4109     case SIMPLE_BRACKET:
4110       if (!bitset_contain (node->opr.sbcset, ch))
4111         return false;
4112       break;
4113 
4114 #ifdef RE_ENABLE_I18N
4115     case OP_UTF8_PERIOD:
4116       if (ch >= ASCII_CHARS)
4117         return false;
4118       /* FALLTHROUGH */
4119 #endif
4120     case OP_PERIOD:
4121       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4122 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4123 	return false;
4124       break;
4125 
4126     default:
4127       return false;
4128     }
4129 
4130   if (node->constraint)
4131     {
4132       /* The node has constraints.  Check whether the current context
4133 	 satisfies the constraints.  */
4134       unsigned int context = re_string_context_at (&mctx->input, idx,
4135 						   mctx->eflags);
4136       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4137 	return false;
4138     }
4139 
4140   return true;
4141 }
4142 
4143 /* Extend the buffers, if the buffers have run out.  */
4144 
4145 static reg_errcode_t
4146 internal_function __attribute_warn_unused_result__
4147 extend_buffers (re_match_context_t *mctx)
4148 {
4149   reg_errcode_t ret;
4150   re_string_t *pstr = &mctx->input;
4151 
4152   /* Avoid overflow.  */
4153   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4154     return REG_ESPACE;
4155 
4156   /* Double the lengthes of the buffers.  */
4157   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4158   if (BE (ret != REG_NOERROR, 0))
4159     return ret;
4160 
4161   if (mctx->state_log != NULL)
4162     {
4163       /* And double the length of state_log.  */
4164       /* XXX We have no indication of the size of this buffer.  If this
4165 	 allocation fail we have no indication that the state_log array
4166 	 does not have the right size.  */
4167       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4168 					      pstr->bufs_len + 1);
4169       if (BE (new_array == NULL, 0))
4170 	return REG_ESPACE;
4171       mctx->state_log = new_array;
4172     }
4173 
4174   /* Then reconstruct the buffers.  */
4175   if (pstr->icase)
4176     {
4177 #ifdef RE_ENABLE_I18N
4178       if (pstr->mb_cur_max > 1)
4179 	{
4180 	  ret = build_wcs_upper_buffer (pstr);
4181 	  if (BE (ret != REG_NOERROR, 0))
4182 	    return ret;
4183 	}
4184       else
4185 #endif /* RE_ENABLE_I18N  */
4186 	build_upper_buffer (pstr);
4187     }
4188   else
4189     {
4190 #ifdef RE_ENABLE_I18N
4191       if (pstr->mb_cur_max > 1)
4192 	build_wcs_buffer (pstr);
4193       else
4194 #endif /* RE_ENABLE_I18N  */
4195 	{
4196 	  if (pstr->trans != NULL)
4197 	    re_string_translate_buffer (pstr);
4198 	}
4199     }
4200   return REG_NOERROR;
4201 }
4202 
4203 
4204 /* Functions for matching context.  */
4205 
4206 /* Initialize MCTX.  */
4207 
4208 static reg_errcode_t
4209 internal_function __attribute_warn_unused_result__
4210 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4211 {
4212   mctx->eflags = eflags;
4213   mctx->match_last = REG_MISSING;
4214   if (n > 0)
4215     {
4216       /* Avoid overflow.  */
4217       size_t max_object_size =
4218 	MAX (sizeof (struct re_backref_cache_entry),
4219 	     sizeof (re_sub_match_top_t *));
4220       if (BE (SIZE_MAX / max_object_size < n, 0))
4221 	return REG_ESPACE;
4222 
4223       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4224       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4225       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4226 	return REG_ESPACE;
4227     }
4228   /* Already zero-ed by the caller.
4229      else
4230        mctx->bkref_ents = NULL;
4231      mctx->nbkref_ents = 0;
4232      mctx->nsub_tops = 0;  */
4233   mctx->abkref_ents = n;
4234   mctx->max_mb_elem_len = 1;
4235   mctx->asub_tops = n;
4236   return REG_NOERROR;
4237 }
4238 
4239 /* Clean the entries which depend on the current input in MCTX.
4240    This function must be invoked when the matcher changes the start index
4241    of the input, or changes the input string.  */
4242 
4243 static void
4244 internal_function
4245 match_ctx_clean (re_match_context_t *mctx)
4246 {
4247   Idx st_idx;
4248   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4249     {
4250       Idx sl_idx;
4251       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4252       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4253 	{
4254 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4255 	  re_free (last->path.array);
4256 	  re_free (last);
4257 	}
4258       re_free (top->lasts);
4259       if (top->path)
4260 	{
4261 	  re_free (top->path->array);
4262 	  re_free (top->path);
4263 	}
4264       free (top);
4265     }
4266 
4267   mctx->nsub_tops = 0;
4268   mctx->nbkref_ents = 0;
4269 }
4270 
4271 /* Free all the memory associated with MCTX.  */
4272 
4273 static void
4274 internal_function
4275 match_ctx_free (re_match_context_t *mctx)
4276 {
4277   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4278   match_ctx_clean (mctx);
4279   re_free (mctx->sub_tops);
4280   re_free (mctx->bkref_ents);
4281 }
4282 
4283 /* Add a new backreference entry to MCTX.
4284    Note that we assume that caller never call this function with duplicate
4285    entry, and call with STR_IDX which isn't smaller than any existing entry.
4286 */
4287 
4288 static reg_errcode_t
4289 internal_function __attribute_warn_unused_result__
4290 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4291 		     Idx to)
4292 {
4293   if (mctx->nbkref_ents >= mctx->abkref_ents)
4294     {
4295       struct re_backref_cache_entry* new_entry;
4296       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4297 			      mctx->abkref_ents * 2);
4298       if (BE (new_entry == NULL, 0))
4299 	{
4300 	  re_free (mctx->bkref_ents);
4301 	  return REG_ESPACE;
4302 	}
4303       mctx->bkref_ents = new_entry;
4304       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4305 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4306       mctx->abkref_ents *= 2;
4307     }
4308   if (mctx->nbkref_ents > 0
4309       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4310     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4311 
4312   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4313   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4314   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4315   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4316 
4317   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4318      If bit N is clear, means that this entry won't epsilon-transition to
4319      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4320      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4321      such node.
4322 
4323      A backreference does not epsilon-transition unless it is empty, so set
4324      to all zeros if FROM != TO.  */
4325   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4326     = (from == to ? -1 : 0);
4327 
4328   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4329   if (mctx->max_mb_elem_len < to - from)
4330     mctx->max_mb_elem_len = to - from;
4331   return REG_NOERROR;
4332 }
4333 
4334 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4335    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4336 
4337 static Idx
4338 internal_function
4339 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4340 {
4341   Idx left, right, mid, last;
4342   last = right = mctx->nbkref_ents;
4343   for (left = 0; left < right;)
4344     {
4345       mid = (left + right) / 2;
4346       if (mctx->bkref_ents[mid].str_idx < str_idx)
4347 	left = mid + 1;
4348       else
4349 	right = mid;
4350     }
4351   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4352     return left;
4353   else
4354     return REG_MISSING;
4355 }
4356 
4357 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4358    at STR_IDX.  */
4359 
4360 static reg_errcode_t
4361 internal_function __attribute_warn_unused_result__
4362 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4363 {
4364 #ifdef DEBUG
4365   assert (mctx->sub_tops != NULL);
4366   assert (mctx->asub_tops > 0);
4367 #endif
4368   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4369     {
4370       Idx new_asub_tops = mctx->asub_tops * 2;
4371       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4372 						   re_sub_match_top_t *,
4373 						   new_asub_tops);
4374       if (BE (new_array == NULL, 0))
4375 	return REG_ESPACE;
4376       mctx->sub_tops = new_array;
4377       mctx->asub_tops = new_asub_tops;
4378     }
4379   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4380   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4381     return REG_ESPACE;
4382   mctx->sub_tops[mctx->nsub_tops]->node = node;
4383   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4384   return REG_NOERROR;
4385 }
4386 
4387 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4388    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4389 
4390 static re_sub_match_last_t *
4391 internal_function
4392 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4393 {
4394   re_sub_match_last_t *new_entry;
4395   if (BE (subtop->nlasts == subtop->alasts, 0))
4396     {
4397       Idx new_alasts = 2 * subtop->alasts + 1;
4398       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4399 						    re_sub_match_last_t *,
4400 						    new_alasts);
4401       if (BE (new_array == NULL, 0))
4402 	return NULL;
4403       subtop->lasts = new_array;
4404       subtop->alasts = new_alasts;
4405     }
4406   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4407   if (BE (new_entry != NULL, 1))
4408     {
4409       subtop->lasts[subtop->nlasts] = new_entry;
4410       new_entry->node = node;
4411       new_entry->str_idx = str_idx;
4412       ++subtop->nlasts;
4413     }
4414   return new_entry;
4415 }
4416 
4417 static void
4418 internal_function
4419 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4420 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4421 {
4422   sctx->sifted_states = sifted_sts;
4423   sctx->limited_states = limited_sts;
4424   sctx->last_node = last_node;
4425   sctx->last_str_idx = last_str_idx;
4426   re_node_set_init_empty (&sctx->limits);
4427 }
4428