xref: /dragonfly/contrib/grep/lib/regexec.c (revision cecb9aae)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002-2012 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License along
17    with this program; if not, see <http://www.gnu.org/licenses/>.  */
18 
19 #include "verify.h"
20 #include "intprops.h"
21 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
22 				     Idx n) internal_function;
23 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
24 static void match_ctx_free (re_match_context_t *cache) internal_function;
25 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
26 					  Idx str_idx, Idx from, Idx to)
27      internal_function;
28 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
29      internal_function;
30 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
31 					   Idx str_idx) internal_function;
32 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
33 						    Idx node, Idx str_idx)
34      internal_function;
35 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
36 			   re_dfastate_t **limited_sts, Idx last_node,
37 			   Idx last_str_idx)
38      internal_function;
39 static reg_errcode_t re_search_internal (const regex_t *preg,
40 					 const char *string, Idx length,
41 					 Idx start, Idx last_start, Idx stop,
42 					 size_t nmatch, regmatch_t pmatch[],
43 					 int eflags) internal_function;
44 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
45 				  const char *string1, Idx length1,
46 				  const char *string2, Idx length2,
47 				  Idx start, regoff_t range,
48 				  struct re_registers *regs,
49 				  Idx stop, bool ret_len) internal_function;
50 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
51 				const char *string, Idx length, Idx start,
52 				regoff_t range, Idx stop,
53 				struct re_registers *regs,
54 				bool ret_len) internal_function;
55 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
56                               Idx nregs, int regs_allocated) internal_function;
57 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
58      internal_function;
59 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
60 			   Idx *p_match_first) internal_function;
61 static Idx check_halt_state_context (const re_match_context_t *mctx,
62 				     const re_dfastate_t *state, Idx idx)
63      internal_function;
64 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
65 			 regmatch_t *prev_idx_match, Idx cur_node,
66 			 Idx cur_idx, Idx nmatch) internal_function;
67 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
68 				      Idx str_idx, Idx dest_node, Idx nregs,
69 				      regmatch_t *regs,
70 				      re_node_set *eps_via_nodes)
71      internal_function;
72 static reg_errcode_t set_regs (const regex_t *preg,
73 			       const re_match_context_t *mctx,
74 			       size_t nmatch, regmatch_t *pmatch,
75 			       bool fl_backtrack) internal_function;
76 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
77      internal_function;
78 
79 #ifdef RE_ENABLE_I18N
80 static int sift_states_iter_mb (const re_match_context_t *mctx,
81 				re_sift_context_t *sctx,
82 				Idx node_idx, Idx str_idx, Idx max_str_idx)
83      internal_function;
84 #endif /* RE_ENABLE_I18N */
85 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
86 					   re_sift_context_t *sctx)
87      internal_function;
88 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
89 					  re_sift_context_t *sctx, Idx str_idx,
90 					  re_node_set *cur_dest)
91      internal_function;
92 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
93 					      re_sift_context_t *sctx,
94 					      Idx str_idx,
95 					      re_node_set *dest_nodes)
96      internal_function;
97 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
98 					    re_node_set *dest_nodes,
99 					    const re_node_set *candidates)
100      internal_function;
101 static bool check_dst_limits (const re_match_context_t *mctx,
102 			      const re_node_set *limits,
103 			      Idx dst_node, Idx dst_idx, Idx src_node,
104 			      Idx src_idx) internal_function;
105 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
106 					int boundaries, Idx subexp_idx,
107 					Idx from_node, Idx bkref_idx)
108      internal_function;
109 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
110 				      Idx limit, Idx subexp_idx,
111 				      Idx node, Idx str_idx,
112 				      Idx bkref_idx) internal_function;
113 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
114 					  re_node_set *dest_nodes,
115 					  const re_node_set *candidates,
116 					  re_node_set *limits,
117 					  struct re_backref_cache_entry *bkref_ents,
118 					  Idx str_idx) internal_function;
119 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
120 					re_sift_context_t *sctx,
121 					Idx str_idx, const re_node_set *candidates)
122      internal_function;
123 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
124 					re_dfastate_t **dst,
125 					re_dfastate_t **src, Idx num)
126      internal_function;
127 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
128 					 re_match_context_t *mctx) internal_function;
129 static re_dfastate_t *transit_state (reg_errcode_t *err,
130 				     re_match_context_t *mctx,
131 				     re_dfastate_t *state) internal_function;
132 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
133 					    re_match_context_t *mctx,
134 					    re_dfastate_t *next_state)
135      internal_function;
136 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
137 						re_node_set *cur_nodes,
138 						Idx str_idx) internal_function;
139 #if 0
140 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
141 					re_match_context_t *mctx,
142 					re_dfastate_t *pstate)
143      internal_function;
144 #endif
145 #ifdef RE_ENABLE_I18N
146 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
147 				       re_dfastate_t *pstate)
148      internal_function;
149 #endif /* RE_ENABLE_I18N */
150 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
151 					  const re_node_set *nodes)
152      internal_function;
153 static reg_errcode_t get_subexp (re_match_context_t *mctx,
154 				 Idx bkref_node, Idx bkref_str_idx)
155      internal_function;
156 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
157 				     const re_sub_match_top_t *sub_top,
158 				     re_sub_match_last_t *sub_last,
159 				     Idx bkref_node, Idx bkref_str)
160      internal_function;
161 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
162 			     Idx subexp_idx, int type) internal_function;
163 static reg_errcode_t check_arrival (re_match_context_t *mctx,
164 				    state_array_t *path, Idx top_node,
165 				    Idx top_str, Idx last_node, Idx last_str,
166 				    int type) internal_function;
167 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
168 						   Idx str_idx,
169 						   re_node_set *cur_nodes,
170 						   re_node_set *next_nodes)
171      internal_function;
172 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
173 					       re_node_set *cur_nodes,
174 					       Idx ex_subexp, int type)
175      internal_function;
176 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
177 						   re_node_set *dst_nodes,
178 						   Idx target, Idx ex_subexp,
179 						   int type) internal_function;
180 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
181 					 re_node_set *cur_nodes, Idx cur_str,
182 					 Idx subexp_num, int type)
183      internal_function;
184 static bool build_trtable (const re_dfa_t *dfa,
185 			   re_dfastate_t *state) internal_function;
186 #ifdef RE_ENABLE_I18N
187 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
188 				    const re_string_t *input, Idx idx)
189      internal_function;
190 # ifdef _LIBC
191 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
192 						   size_t name_len)
193      internal_function;
194 # endif /* _LIBC */
195 #endif /* RE_ENABLE_I18N */
196 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
197 				       const re_dfastate_t *state,
198 				       re_node_set *states_node,
199 				       bitset_t *states_ch) internal_function;
200 static bool check_node_accept (const re_match_context_t *mctx,
201 			       const re_token_t *node, Idx idx)
202      internal_function;
203 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
204      internal_function;
205 
206 /* Entry point for POSIX code.  */
207 
208 /* regexec searches for a given pattern, specified by PREG, in the
209    string STRING.
210 
211    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
212    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
213    least NMATCH elements, and we set them to the offsets of the
214    corresponding matched substrings.
215 
216    EFLAGS specifies "execution flags" which affect matching: if
217    REG_NOTBOL is set, then ^ does not match at the beginning of the
218    string; if REG_NOTEOL is set, then $ does not match at the end.
219 
220    We return 0 if we find a match and REG_NOMATCH if not.  */
221 
222 int
223 regexec (preg, string, nmatch, pmatch, eflags)
224     const regex_t *_Restrict_ preg;
225     const char *_Restrict_ string;
226     size_t nmatch;
227     regmatch_t pmatch[_Restrict_arr_];
228     int eflags;
229 {
230   reg_errcode_t err;
231   Idx start, length;
232 #ifdef _LIBC
233   re_dfa_t *dfa = preg->buffer;
234 #endif
235 
236   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
237     return REG_BADPAT;
238 
239   if (eflags & REG_STARTEND)
240     {
241       start = pmatch[0].rm_so;
242       length = pmatch[0].rm_eo;
243     }
244   else
245     {
246       start = 0;
247       length = strlen (string);
248     }
249 
250   __libc_lock_lock (dfa->lock);
251   if (preg->no_sub)
252     err = re_search_internal (preg, string, length, start, length,
253 			      length, 0, NULL, eflags);
254   else
255     err = re_search_internal (preg, string, length, start, length,
256 			      length, nmatch, pmatch, eflags);
257   __libc_lock_unlock (dfa->lock);
258   return err != REG_NOERROR;
259 }
260 
261 #ifdef _LIBC
262 # include <shlib-compat.h>
263 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
264 
265 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
266 __typeof__ (__regexec) __compat_regexec;
267 
268 int
269 attribute_compat_text_section
270 __compat_regexec (const regex_t *_Restrict_ preg,
271 		  const char *_Restrict_ string, size_t nmatch,
272 		  regmatch_t pmatch[], int eflags)
273 {
274   return regexec (preg, string, nmatch, pmatch,
275 		  eflags & (REG_NOTBOL | REG_NOTEOL));
276 }
277 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
278 # endif
279 #endif
280 
281 /* Entry points for GNU code.  */
282 
283 /* re_match, re_search, re_match_2, re_search_2
284 
285    The former two functions operate on STRING with length LENGTH,
286    while the later two operate on concatenation of STRING1 and STRING2
287    with lengths LENGTH1 and LENGTH2, respectively.
288 
289    re_match() matches the compiled pattern in BUFP against the string,
290    starting at index START.
291 
292    re_search() first tries matching at index START, then it tries to match
293    starting from index START + 1, and so on.  The last start position tried
294    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
295    way as re_match().)
296 
297    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
298    the first STOP characters of the concatenation of the strings should be
299    concerned.
300 
301    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
302    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
303    computed relative to the concatenation, not relative to the individual
304    strings.)
305 
306    On success, re_match* functions return the length of the match, re_search*
307    return the position of the start of the match.  Return value -1 means no
308    match was found and -2 indicates an internal error.  */
309 
310 regoff_t
311 re_match (bufp, string, length, start, regs)
312     struct re_pattern_buffer *bufp;
313     const char *string;
314     Idx length, start;
315     struct re_registers *regs;
316 {
317   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
318 }
319 #ifdef _LIBC
320 weak_alias (__re_match, re_match)
321 #endif
322 
323 regoff_t
324 re_search (bufp, string, length, start, range, regs)
325     struct re_pattern_buffer *bufp;
326     const char *string;
327     Idx length, start;
328     regoff_t range;
329     struct re_registers *regs;
330 {
331   return re_search_stub (bufp, string, length, start, range, length, regs,
332 			 false);
333 }
334 #ifdef _LIBC
335 weak_alias (__re_search, re_search)
336 #endif
337 
338 regoff_t
339 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
340     struct re_pattern_buffer *bufp;
341     const char *string1, *string2;
342     Idx length1, length2, start, stop;
343     struct re_registers *regs;
344 {
345   return re_search_2_stub (bufp, string1, length1, string2, length2,
346 			   start, 0, regs, stop, true);
347 }
348 #ifdef _LIBC
349 weak_alias (__re_match_2, re_match_2)
350 #endif
351 
352 regoff_t
353 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
354     struct re_pattern_buffer *bufp;
355     const char *string1, *string2;
356     Idx length1, length2, start, stop;
357     regoff_t range;
358     struct re_registers *regs;
359 {
360   return re_search_2_stub (bufp, string1, length1, string2, length2,
361 			   start, range, regs, stop, false);
362 }
363 #ifdef _LIBC
364 weak_alias (__re_search_2, re_search_2)
365 #endif
366 
367 static regoff_t
368 re_search_2_stub (struct re_pattern_buffer *bufp,
369 		  const char *string1, Idx length1,
370 		  const char *string2, Idx length2,
371 		  Idx start, regoff_t range, struct re_registers *regs,
372 		  Idx stop, bool ret_len)
373 {
374   const char *str;
375   regoff_t rval;
376   Idx len = length1 + length2;
377   char *s = NULL;
378 
379   verify (! TYPE_SIGNED (Idx));
380   if (BE (len < length1, 0))
381      return -2;
382   /* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
383      return -2; */
384 
385   /* Concatenate the strings.  */
386   if (length2 > 0)
387     if (length1 > 0)
388       {
389 	s = re_malloc (char, len);
390 
391 	if (BE (s == NULL, 0))
392 	  return -2;
393 #ifdef _LIBC
394 	memcpy (__mempcpy (s, string1, length1), string2, length2);
395 #else
396 	memcpy (s, string1, length1);
397 	memcpy (s + length1, string2, length2);
398 #endif
399 	str = s;
400       }
401     else
402       str = string2;
403   else
404     str = string1;
405 
406   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
407 			 ret_len);
408   re_free (s);
409   return rval;
410 }
411 
412 /* The parameters have the same meaning as those of re_search.
413    Additional parameters:
414    If RET_LEN is true the length of the match is returned (re_match style);
415    otherwise the position of the match is returned.  */
416 
417 static regoff_t
418 re_search_stub (struct re_pattern_buffer *bufp,
419 		const char *string, Idx length,
420 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
421 		bool ret_len)
422 {
423   reg_errcode_t result;
424   regmatch_t *pmatch;
425   Idx nregs;
426   regoff_t rval;
427   int eflags = 0;
428 #ifdef _LIBC
429   re_dfa_t *dfa = bufp->buffer;
430 #endif
431   Idx last_start = start + range;
432 
433   /* Check for out-of-range.  */
434   verify (! TYPE_SIGNED (Idx));
435   /* if (BE (start < 0, 0))
436      return -1; */
437   if (BE (start > length, 0))
438      return -1;
439   if (BE (length < last_start || (0 <= range && last_start < start), 0))
440     last_start = length;
441   else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0))
442     last_start = 0;
443 
444   __libc_lock_lock (dfa->lock);
445 
446   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
447   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
448 
449   /* Compile fastmap if we haven't yet.  */
450   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
451     re_compile_fastmap (bufp);
452 
453   if (BE (bufp->no_sub, 0))
454     regs = NULL;
455 
456   /* We need at least 1 register.  */
457   if (regs == NULL)
458     nregs = 1;
459   else if (BE (bufp->regs_allocated == REGS_FIXED
460 	       && regs->num_regs <= bufp->re_nsub, 0))
461     {
462       nregs = regs->num_regs;
463       if (BE (nregs < 1, 0))
464 	{
465 	  /* Nothing can be copied to regs.  */
466 	  regs = NULL;
467 	  nregs = 1;
468 	}
469     }
470   else
471     nregs = bufp->re_nsub + 1;
472   pmatch = re_malloc (regmatch_t, nregs);
473   if (BE (pmatch == NULL, 0))
474     {
475       rval = -2;
476       goto out;
477     }
478 
479   result = re_search_internal (bufp, string, length, start, last_start, stop,
480 			       nregs, pmatch, eflags);
481 
482   rval = 0;
483 
484   /* I hope we needn't fill ther regs with -1's when no match was found.  */
485   if (result != REG_NOERROR)
486     rval = result == REG_NOMATCH ? -1 : -2;
487   else if (regs != NULL)
488     {
489       /* If caller wants register contents data back, copy them.  */
490       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
491 					   bufp->regs_allocated);
492       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
493 	rval = -2;
494     }
495 
496   if (BE (rval == 0, 1))
497     {
498       if (ret_len)
499 	{
500 	  assert (pmatch[0].rm_so == start);
501 	  rval = pmatch[0].rm_eo - start;
502 	}
503       else
504 	rval = pmatch[0].rm_so;
505     }
506   re_free (pmatch);
507  out:
508   __libc_lock_unlock (dfa->lock);
509   return rval;
510 }
511 
512 static unsigned
513 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
514 	      int regs_allocated)
515 {
516   int rval = REGS_REALLOCATE;
517   Idx i;
518   Idx need_regs = nregs + 1;
519   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
520      uses.  */
521 
522   /* Have the register data arrays been allocated?  */
523   if (regs_allocated == REGS_UNALLOCATED)
524     { /* No.  So allocate them with malloc.  */
525       regs->start = re_malloc (regoff_t, need_regs);
526       if (BE (regs->start == NULL, 0))
527 	return REGS_UNALLOCATED;
528       regs->end = re_malloc (regoff_t, need_regs);
529       if (BE (regs->end == NULL, 0))
530 	{
531 	  re_free (regs->start);
532 	  return REGS_UNALLOCATED;
533 	}
534       regs->num_regs = need_regs;
535     }
536   else if (regs_allocated == REGS_REALLOCATE)
537     { /* Yes.  If we need more elements than were already
538 	 allocated, reallocate them.  If we need fewer, just
539 	 leave it alone.  */
540       if (BE (need_regs > regs->num_regs, 0))
541 	{
542 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
543 	  regoff_t *new_end;
544 	  if (BE (new_start == NULL, 0))
545 	    return REGS_UNALLOCATED;
546 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
547 	  if (BE (new_end == NULL, 0))
548 	    {
549 	      re_free (new_start);
550 	      return REGS_UNALLOCATED;
551 	    }
552 	  regs->start = new_start;
553 	  regs->end = new_end;
554 	  regs->num_regs = need_regs;
555 	}
556     }
557   else
558     {
559       assert (regs_allocated == REGS_FIXED);
560       /* This function may not be called with REGS_FIXED and nregs too big.  */
561       assert (regs->num_regs >= nregs);
562       rval = REGS_FIXED;
563     }
564 
565   /* Copy the regs.  */
566   for (i = 0; i < nregs; ++i)
567     {
568       regs->start[i] = pmatch[i].rm_so;
569       regs->end[i] = pmatch[i].rm_eo;
570     }
571   for ( ; i < regs->num_regs; ++i)
572     regs->start[i] = regs->end[i] = -1;
573 
574   return rval;
575 }
576 
577 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
578    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
579    this memory for recording register information.  STARTS and ENDS
580    must be allocated using the malloc library routine, and must each
581    be at least NUM_REGS * sizeof (regoff_t) bytes long.
582 
583    If NUM_REGS == 0, then subsequent matches should allocate their own
584    register data.
585 
586    Unless this function is called, the first search or match using
587    PATTERN_BUFFER will allocate its own register data, without
588    freeing the old data.  */
589 
590 void
591 re_set_registers (bufp, regs, num_regs, starts, ends)
592     struct re_pattern_buffer *bufp;
593     struct re_registers *regs;
594     __re_size_t num_regs;
595     regoff_t *starts, *ends;
596 {
597   if (num_regs)
598     {
599       bufp->regs_allocated = REGS_REALLOCATE;
600       regs->num_regs = num_regs;
601       regs->start = starts;
602       regs->end = ends;
603     }
604   else
605     {
606       bufp->regs_allocated = REGS_UNALLOCATED;
607       regs->num_regs = 0;
608       regs->start = regs->end = NULL;
609     }
610 }
611 #ifdef _LIBC
612 weak_alias (__re_set_registers, re_set_registers)
613 #endif
614 
615 /* Entry points compatible with 4.2 BSD regex library.  We don't define
616    them unless specifically requested.  */
617 
618 #if defined _REGEX_RE_COMP || defined _LIBC
619 int
620 # ifdef _LIBC
621 weak_function
622 # endif
623 re_exec (s)
624      const char *s;
625 {
626   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
627 }
628 #endif /* _REGEX_RE_COMP */
629 
630 /* Internal entry point.  */
631 
632 /* Searches for a compiled pattern PREG in the string STRING, whose
633    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
634    meaning as with regexec.  LAST_START is START + RANGE, where
635    START and RANGE have the same meaning as with re_search.
636    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
637    otherwise return the error code.
638    Note: We assume front end functions already check ranges.
639    (0 <= LAST_START && LAST_START <= LENGTH)  */
640 
641 static reg_errcode_t
642 __attribute_warn_unused_result__
643 re_search_internal (const regex_t *preg,
644 		    const char *string, Idx length,
645 		    Idx start, Idx last_start, Idx stop,
646 		    size_t nmatch, regmatch_t pmatch[],
647 		    int eflags)
648 {
649   reg_errcode_t err;
650   const re_dfa_t *dfa = preg->buffer;
651   Idx left_lim, right_lim;
652   int incr;
653   bool fl_longest_match;
654   int match_kind;
655   Idx match_first;
656   Idx match_last = REG_MISSING;
657   Idx extra_nmatch;
658   bool sb;
659   int ch;
660 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
661   re_match_context_t mctx = { .dfa = dfa };
662 #else
663   re_match_context_t mctx;
664 #endif
665   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
666 		    && start != last_start && !preg->can_be_null)
667 		   ? preg->fastmap : NULL);
668   RE_TRANSLATE_TYPE t = preg->translate;
669 
670 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
671   memset (&mctx, '\0', sizeof (re_match_context_t));
672   mctx.dfa = dfa;
673 #endif
674 
675   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
676   nmatch -= extra_nmatch;
677 
678   /* Check if the DFA haven't been compiled.  */
679   if (BE (preg->used == 0 || dfa->init_state == NULL
680 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
681 	  || dfa->init_state_begbuf == NULL, 0))
682     return REG_NOMATCH;
683 
684 #ifdef DEBUG
685   /* We assume front-end functions already check them.  */
686   assert (0 <= last_start && last_start <= length);
687 #endif
688 
689   /* If initial states with non-begbuf contexts have no elements,
690      the regex must be anchored.  If preg->newline_anchor is set,
691      we'll never use init_state_nl, so do not check it.  */
692   if (dfa->init_state->nodes.nelem == 0
693       && dfa->init_state_word->nodes.nelem == 0
694       && (dfa->init_state_nl->nodes.nelem == 0
695 	  || !preg->newline_anchor))
696     {
697       if (start != 0 && last_start != 0)
698         return REG_NOMATCH;
699       start = last_start = 0;
700     }
701 
702   /* We must check the longest matching, if nmatch > 0.  */
703   fl_longest_match = (nmatch != 0 || dfa->nbackref);
704 
705   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
706 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
707 			    dfa);
708   if (BE (err != REG_NOERROR, 0))
709     goto free_return;
710   mctx.input.stop = stop;
711   mctx.input.raw_stop = stop;
712   mctx.input.newline_anchor = preg->newline_anchor;
713 
714   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
715   if (BE (err != REG_NOERROR, 0))
716     goto free_return;
717 
718   /* We will log all the DFA states through which the dfa pass,
719      if nmatch > 1, or this dfa has "multibyte node", which is a
720      back-reference or a node which can accept multibyte character or
721      multi character collating element.  */
722   if (nmatch > 1 || dfa->has_mb_node)
723     {
724       /* Avoid overflow.  */
725       if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
726                <= mctx.input.bufs_len), 0))
727 	{
728 	  err = REG_ESPACE;
729 	  goto free_return;
730 	}
731 
732       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
733       if (BE (mctx.state_log == NULL, 0))
734 	{
735 	  err = REG_ESPACE;
736 	  goto free_return;
737 	}
738     }
739   else
740     mctx.state_log = NULL;
741 
742   match_first = start;
743   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
744 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
745 
746   /* Check incrementally whether of not the input string match.  */
747   incr = (last_start < start) ? -1 : 1;
748   left_lim = (last_start < start) ? last_start : start;
749   right_lim = (last_start < start) ? start : last_start;
750   sb = dfa->mb_cur_max == 1;
751   match_kind =
752     (fastmap
753      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
754 	| (start <= last_start ? 2 : 0)
755 	| (t != NULL ? 1 : 0))
756      : 8);
757 
758   for (;; match_first += incr)
759     {
760       err = REG_NOMATCH;
761       if (match_first < left_lim || right_lim < match_first)
762 	goto free_return;
763 
764       /* Advance as rapidly as possible through the string, until we
765 	 find a plausible place to start matching.  This may be done
766 	 with varying efficiency, so there are various possibilities:
767 	 only the most common of them are specialized, in order to
768 	 save on code size.  We use a switch statement for speed.  */
769       switch (match_kind)
770 	{
771 	case 8:
772 	  /* No fastmap.  */
773 	  break;
774 
775 	case 7:
776 	  /* Fastmap with single-byte translation, match forward.  */
777 	  while (BE (match_first < right_lim, 1)
778 		 && !fastmap[t[(unsigned char) string[match_first]]])
779 	    ++match_first;
780 	  goto forward_match_found_start_or_reached_end;
781 
782 	case 6:
783 	  /* Fastmap without translation, match forward.  */
784 	  while (BE (match_first < right_lim, 1)
785 		 && !fastmap[(unsigned char) string[match_first]])
786 	    ++match_first;
787 
788 	forward_match_found_start_or_reached_end:
789 	  if (BE (match_first == right_lim, 0))
790 	    {
791 	      ch = match_first >= length
792 		       ? 0 : (unsigned char) string[match_first];
793 	      if (!fastmap[t ? t[ch] : ch])
794 		goto free_return;
795 	    }
796 	  break;
797 
798 	case 4:
799 	case 5:
800 	  /* Fastmap without multi-byte translation, match backwards.  */
801 	  while (match_first >= left_lim)
802 	    {
803 	      ch = match_first >= length
804 		       ? 0 : (unsigned char) string[match_first];
805 	      if (fastmap[t ? t[ch] : ch])
806 		break;
807 	      --match_first;
808 	    }
809 	  if (match_first < left_lim)
810 	    goto free_return;
811 	  break;
812 
813 	default:
814 	  /* In this case, we can't determine easily the current byte,
815 	     since it might be a component byte of a multibyte
816 	     character.  Then we use the constructed buffer instead.  */
817 	  for (;;)
818 	    {
819 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
820 		 buffers.  */
821 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
822 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
823 		{
824 		  err = re_string_reconstruct (&mctx.input, match_first,
825 					       eflags);
826 		  if (BE (err != REG_NOERROR, 0))
827 		    goto free_return;
828 
829 		  offset = match_first - mctx.input.raw_mbs_idx;
830 		}
831 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
832 		 Note that MATCH_FIRST must not be smaller than 0.  */
833 	      ch = (match_first >= length
834 		    ? 0 : re_string_byte_at (&mctx.input, offset));
835 	      if (fastmap[ch])
836 		break;
837 	      match_first += incr;
838 	      if (match_first < left_lim || match_first > right_lim)
839 		{
840 		  err = REG_NOMATCH;
841 		  goto free_return;
842 		}
843 	    }
844 	  break;
845 	}
846 
847       /* Reconstruct the buffers so that the matcher can assume that
848 	 the matching starts from the beginning of the buffer.  */
849       err = re_string_reconstruct (&mctx.input, match_first, eflags);
850       if (BE (err != REG_NOERROR, 0))
851 	goto free_return;
852 
853 #ifdef RE_ENABLE_I18N
854      /* Don't consider this char as a possible match start if it part,
855 	yet isn't the head, of a multibyte character.  */
856       if (!sb && !re_string_first_byte (&mctx.input, 0))
857 	continue;
858 #endif
859 
860       /* It seems to be appropriate one, then use the matcher.  */
861       /* We assume that the matching starts from 0.  */
862       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
863       match_last = check_matching (&mctx, fl_longest_match,
864 				   start <= last_start ? &match_first : NULL);
865       if (match_last != REG_MISSING)
866 	{
867 	  if (BE (match_last == REG_ERROR, 0))
868 	    {
869 	      err = REG_ESPACE;
870 	      goto free_return;
871 	    }
872 	  else
873 	    {
874 	      mctx.match_last = match_last;
875 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
876 		{
877 		  re_dfastate_t *pstate = mctx.state_log[match_last];
878 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
879 							     match_last);
880 		}
881 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
882 		  || dfa->nbackref)
883 		{
884 		  err = prune_impossible_nodes (&mctx);
885 		  if (err == REG_NOERROR)
886 		    break;
887 		  if (BE (err != REG_NOMATCH, 0))
888 		    goto free_return;
889 		  match_last = REG_MISSING;
890 		}
891 	      else
892 		break; /* We found a match.  */
893 	    }
894 	}
895 
896       match_ctx_clean (&mctx);
897     }
898 
899 #ifdef DEBUG
900   assert (match_last != REG_MISSING);
901   assert (err == REG_NOERROR);
902 #endif
903 
904   /* Set pmatch[] if we need.  */
905   if (nmatch > 0)
906     {
907       Idx reg_idx;
908 
909       /* Initialize registers.  */
910       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
911 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
912 
913       /* Set the points where matching start/end.  */
914       pmatch[0].rm_so = 0;
915       pmatch[0].rm_eo = mctx.match_last;
916       /* FIXME: This function should fail if mctx.match_last exceeds
917 	 the maximum possible regoff_t value.  We need a new error
918 	 code REG_OVERFLOW.  */
919 
920       if (!preg->no_sub && nmatch > 1)
921 	{
922 	  err = set_regs (preg, &mctx, nmatch, pmatch,
923 			  dfa->has_plural_match && dfa->nbackref > 0);
924 	  if (BE (err != REG_NOERROR, 0))
925 	    goto free_return;
926 	}
927 
928       /* At last, add the offset to each register, since we slid
929 	 the buffers so that we could assume that the matching starts
930 	 from 0.  */
931       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
932 	if (pmatch[reg_idx].rm_so != -1)
933 	  {
934 #ifdef RE_ENABLE_I18N
935 	    if (BE (mctx.input.offsets_needed != 0, 0))
936 	      {
937 		pmatch[reg_idx].rm_so =
938 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
939 		   ? mctx.input.valid_raw_len
940 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
941 		pmatch[reg_idx].rm_eo =
942 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
943 		   ? mctx.input.valid_raw_len
944 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
945 	      }
946 #else
947 	    assert (mctx.input.offsets_needed == 0);
948 #endif
949 	    pmatch[reg_idx].rm_so += match_first;
950 	    pmatch[reg_idx].rm_eo += match_first;
951 	  }
952       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
953 	{
954 	  pmatch[nmatch + reg_idx].rm_so = -1;
955 	  pmatch[nmatch + reg_idx].rm_eo = -1;
956 	}
957 
958       if (dfa->subexp_map)
959 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
960 	  if (dfa->subexp_map[reg_idx] != reg_idx)
961 	    {
962 	      pmatch[reg_idx + 1].rm_so
963 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
964 	      pmatch[reg_idx + 1].rm_eo
965 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
966 	    }
967     }
968 
969  free_return:
970   re_free (mctx.state_log);
971   if (dfa->nbackref)
972     match_ctx_free (&mctx);
973   re_string_destruct (&mctx.input);
974   return err;
975 }
976 
977 static reg_errcode_t
978 __attribute_warn_unused_result__
979 prune_impossible_nodes (re_match_context_t *mctx)
980 {
981   const re_dfa_t *const dfa = mctx->dfa;
982   Idx halt_node, match_last;
983   reg_errcode_t ret;
984   re_dfastate_t **sifted_states;
985   re_dfastate_t **lim_states = NULL;
986   re_sift_context_t sctx;
987 #ifdef DEBUG
988   assert (mctx->state_log != NULL);
989 #endif
990   match_last = mctx->match_last;
991   halt_node = mctx->last_node;
992 
993   /* Avoid overflow.  */
994   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
995     return REG_ESPACE;
996 
997   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
998   if (BE (sifted_states == NULL, 0))
999     {
1000       ret = REG_ESPACE;
1001       goto free_return;
1002     }
1003   if (dfa->nbackref)
1004     {
1005       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1006       if (BE (lim_states == NULL, 0))
1007 	{
1008 	  ret = REG_ESPACE;
1009 	  goto free_return;
1010 	}
1011       while (1)
1012 	{
1013 	  memset (lim_states, '\0',
1014 		  sizeof (re_dfastate_t *) * (match_last + 1));
1015 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1016 			 match_last);
1017 	  ret = sift_states_backward (mctx, &sctx);
1018 	  re_node_set_free (&sctx.limits);
1019 	  if (BE (ret != REG_NOERROR, 0))
1020 	      goto free_return;
1021 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1022 	    break;
1023 	  do
1024 	    {
1025 	      --match_last;
1026 	      if (! REG_VALID_INDEX (match_last))
1027 		{
1028 		  ret = REG_NOMATCH;
1029 		  goto free_return;
1030 		}
1031 	    } while (mctx->state_log[match_last] == NULL
1032 		     || !mctx->state_log[match_last]->halt);
1033 	  halt_node = check_halt_state_context (mctx,
1034 						mctx->state_log[match_last],
1035 						match_last);
1036 	}
1037       ret = merge_state_array (dfa, sifted_states, lim_states,
1038 			       match_last + 1);
1039       re_free (lim_states);
1040       lim_states = NULL;
1041       if (BE (ret != REG_NOERROR, 0))
1042 	goto free_return;
1043     }
1044   else
1045     {
1046       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1047       ret = sift_states_backward (mctx, &sctx);
1048       re_node_set_free (&sctx.limits);
1049       if (BE (ret != REG_NOERROR, 0))
1050 	goto free_return;
1051       if (sifted_states[0] == NULL)
1052 	{
1053 	  ret = REG_NOMATCH;
1054 	  goto free_return;
1055 	}
1056     }
1057   re_free (mctx->state_log);
1058   mctx->state_log = sifted_states;
1059   sifted_states = NULL;
1060   mctx->last_node = halt_node;
1061   mctx->match_last = match_last;
1062   ret = REG_NOERROR;
1063  free_return:
1064   re_free (sifted_states);
1065   re_free (lim_states);
1066   return ret;
1067 }
1068 
1069 /* Acquire an initial state and return it.
1070    We must select appropriate initial state depending on the context,
1071    since initial states may have constraints like "\<", "^", etc..  */
1072 
1073 static inline re_dfastate_t *
1074 __attribute ((always_inline)) internal_function
1075 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1076 			    Idx idx)
1077 {
1078   const re_dfa_t *const dfa = mctx->dfa;
1079   if (dfa->init_state->has_constraint)
1080     {
1081       unsigned int context;
1082       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1083       if (IS_WORD_CONTEXT (context))
1084 	return dfa->init_state_word;
1085       else if (IS_ORDINARY_CONTEXT (context))
1086 	return dfa->init_state;
1087       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1088 	return dfa->init_state_begbuf;
1089       else if (IS_NEWLINE_CONTEXT (context))
1090 	return dfa->init_state_nl;
1091       else if (IS_BEGBUF_CONTEXT (context))
1092 	{
1093 	  /* It is relatively rare case, then calculate on demand.  */
1094 	  return re_acquire_state_context (err, dfa,
1095 					   dfa->init_state->entrance_nodes,
1096 					   context);
1097 	}
1098       else
1099 	/* Must not happen?  */
1100 	return dfa->init_state;
1101     }
1102   else
1103     return dfa->init_state;
1104 }
1105 
1106 /* Check whether the regular expression match input string INPUT or not,
1107    and return the index where the matching end.  Return REG_MISSING if
1108    there is no match, and return REG_ERROR in case of an error.
1109    FL_LONGEST_MATCH means we want the POSIX longest matching.
1110    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1111    next place where we may want to try matching.
1112    Note that the matcher assumes that the matching starts from the current
1113    index of the buffer.  */
1114 
1115 static Idx
1116 internal_function __attribute_warn_unused_result__
1117 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1118 		Idx *p_match_first)
1119 {
1120   const re_dfa_t *const dfa = mctx->dfa;
1121   reg_errcode_t err;
1122   Idx match = 0;
1123   Idx match_last = REG_MISSING;
1124   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1125   re_dfastate_t *cur_state;
1126   bool at_init_state = p_match_first != NULL;
1127   Idx next_start_idx = cur_str_idx;
1128 
1129   err = REG_NOERROR;
1130   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1131   /* An initial state must not be NULL (invalid).  */
1132   if (BE (cur_state == NULL, 0))
1133     {
1134       assert (err == REG_ESPACE);
1135       return REG_ERROR;
1136     }
1137 
1138   if (mctx->state_log != NULL)
1139     {
1140       mctx->state_log[cur_str_idx] = cur_state;
1141 
1142       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1143 	 later.  E.g. Processing back references.  */
1144       if (BE (dfa->nbackref, 0))
1145 	{
1146 	  at_init_state = false;
1147 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1148 	  if (BE (err != REG_NOERROR, 0))
1149 	    return err;
1150 
1151 	  if (cur_state->has_backref)
1152 	    {
1153 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1154 	      if (BE (err != REG_NOERROR, 0))
1155 		return err;
1156 	    }
1157 	}
1158     }
1159 
1160   /* If the RE accepts NULL string.  */
1161   if (BE (cur_state->halt, 0))
1162     {
1163       if (!cur_state->has_constraint
1164 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1165 	{
1166 	  if (!fl_longest_match)
1167 	    return cur_str_idx;
1168 	  else
1169 	    {
1170 	      match_last = cur_str_idx;
1171 	      match = 1;
1172 	    }
1173 	}
1174     }
1175 
1176   while (!re_string_eoi (&mctx->input))
1177     {
1178       re_dfastate_t *old_state = cur_state;
1179       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1180 
1181       if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1182 	   && mctx->input.bufs_len < mctx->input.len)
1183 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1184 	      && mctx->input.valid_len < mctx->input.len))
1185 	{
1186 	  err = extend_buffers (mctx);
1187 	  if (BE (err != REG_NOERROR, 0))
1188 	    {
1189 	      assert (err == REG_ESPACE);
1190 	      return REG_ERROR;
1191 	    }
1192 	}
1193 
1194       cur_state = transit_state (&err, mctx, cur_state);
1195       if (mctx->state_log != NULL)
1196 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1197 
1198       if (cur_state == NULL)
1199 	{
1200 	  /* Reached the invalid state or an error.  Try to recover a valid
1201 	     state using the state log, if available and if we have not
1202 	     already found a valid (even if not the longest) match.  */
1203 	  if (BE (err != REG_NOERROR, 0))
1204 	    return REG_ERROR;
1205 
1206 	  if (mctx->state_log == NULL
1207 	      || (match && !fl_longest_match)
1208 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1209 	    break;
1210 	}
1211 
1212       if (BE (at_init_state, 0))
1213 	{
1214 	  if (old_state == cur_state)
1215 	    next_start_idx = next_char_idx;
1216 	  else
1217 	    at_init_state = false;
1218 	}
1219 
1220       if (cur_state->halt)
1221 	{
1222 	  /* Reached a halt state.
1223 	     Check the halt state can satisfy the current context.  */
1224 	  if (!cur_state->has_constraint
1225 	      || check_halt_state_context (mctx, cur_state,
1226 					   re_string_cur_idx (&mctx->input)))
1227 	    {
1228 	      /* We found an appropriate halt state.  */
1229 	      match_last = re_string_cur_idx (&mctx->input);
1230 	      match = 1;
1231 
1232 	      /* We found a match, do not modify match_first below.  */
1233 	      p_match_first = NULL;
1234 	      if (!fl_longest_match)
1235 		break;
1236 	    }
1237 	}
1238     }
1239 
1240   if (p_match_first)
1241     *p_match_first += next_start_idx;
1242 
1243   return match_last;
1244 }
1245 
1246 /* Check NODE match the current context.  */
1247 
1248 static bool
1249 internal_function
1250 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1251 {
1252   re_token_type_t type = dfa->nodes[node].type;
1253   unsigned int constraint = dfa->nodes[node].constraint;
1254   if (type != END_OF_RE)
1255     return false;
1256   if (!constraint)
1257     return true;
1258   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1259     return false;
1260   return true;
1261 }
1262 
1263 /* Check the halt state STATE match the current context.
1264    Return 0 if not match, if the node, STATE has, is a halt node and
1265    match the context, return the node.  */
1266 
1267 static Idx
1268 internal_function
1269 check_halt_state_context (const re_match_context_t *mctx,
1270 			  const re_dfastate_t *state, Idx idx)
1271 {
1272   Idx i;
1273   unsigned int context;
1274 #ifdef DEBUG
1275   assert (state->halt);
1276 #endif
1277   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1278   for (i = 0; i < state->nodes.nelem; ++i)
1279     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1280       return state->nodes.elems[i];
1281   return 0;
1282 }
1283 
1284 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1285    corresponding to the DFA).
1286    Return the destination node, and update EPS_VIA_NODES;
1287    return REG_MISSING in case of errors.  */
1288 
1289 static Idx
1290 internal_function
1291 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1292 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1293 		   struct re_fail_stack_t *fs)
1294 {
1295   const re_dfa_t *const dfa = mctx->dfa;
1296   Idx i;
1297   bool ok;
1298   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1299     {
1300       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1301       re_node_set *edests = &dfa->edests[node];
1302       Idx dest_node;
1303       ok = re_node_set_insert (eps_via_nodes, node);
1304       if (BE (! ok, 0))
1305 	return REG_ERROR;
1306       /* Pick up a valid destination, or return REG_MISSING if none
1307 	 is found.  */
1308       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1309 	{
1310 	  Idx candidate = edests->elems[i];
1311 	  if (!re_node_set_contains (cur_nodes, candidate))
1312 	    continue;
1313           if (dest_node == REG_MISSING)
1314 	    dest_node = candidate;
1315 
1316 	  else
1317 	    {
1318 	      /* In order to avoid infinite loop like "(a*)*", return the second
1319 		 epsilon-transition if the first was already considered.  */
1320 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1321 		return candidate;
1322 
1323 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1324 	      else if (fs != NULL
1325 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1326 					   eps_via_nodes))
1327 		return REG_ERROR;
1328 
1329 	      /* We know we are going to exit.  */
1330 	      break;
1331 	    }
1332 	}
1333       return dest_node;
1334     }
1335   else
1336     {
1337       Idx naccepted = 0;
1338       re_token_type_t type = dfa->nodes[node].type;
1339 
1340 #ifdef RE_ENABLE_I18N
1341       if (dfa->nodes[node].accept_mb)
1342 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1343       else
1344 #endif /* RE_ENABLE_I18N */
1345       if (type == OP_BACK_REF)
1346 	{
1347 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1348 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1349 	  if (fs != NULL)
1350 	    {
1351 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1352 		return REG_MISSING;
1353 	      else if (naccepted)
1354 		{
1355 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1356 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1357 			      naccepted) != 0)
1358 		    return REG_MISSING;
1359 		}
1360 	    }
1361 
1362 	  if (naccepted == 0)
1363 	    {
1364 	      Idx dest_node;
1365 	      ok = re_node_set_insert (eps_via_nodes, node);
1366 	      if (BE (! ok, 0))
1367 		return REG_ERROR;
1368 	      dest_node = dfa->edests[node].elems[0];
1369 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1370 					dest_node))
1371 		return dest_node;
1372 	    }
1373 	}
1374 
1375       if (naccepted != 0
1376 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1377 	{
1378 	  Idx dest_node = dfa->nexts[node];
1379 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1380 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1381 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1382 					       dest_node)))
1383 	    return REG_MISSING;
1384 	  re_node_set_empty (eps_via_nodes);
1385 	  return dest_node;
1386 	}
1387     }
1388   return REG_MISSING;
1389 }
1390 
1391 static reg_errcode_t
1392 internal_function __attribute_warn_unused_result__
1393 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1394 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1395 {
1396   reg_errcode_t err;
1397   Idx num = fs->num++;
1398   if (fs->num == fs->alloc)
1399     {
1400       struct re_fail_stack_ent_t *new_array;
1401       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1402 				       * fs->alloc * 2));
1403       if (new_array == NULL)
1404 	return REG_ESPACE;
1405       fs->alloc *= 2;
1406       fs->stack = new_array;
1407     }
1408   fs->stack[num].idx = str_idx;
1409   fs->stack[num].node = dest_node;
1410   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1411   if (fs->stack[num].regs == NULL)
1412     return REG_ESPACE;
1413   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1414   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1415   return err;
1416 }
1417 
1418 static Idx
1419 internal_function
1420 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1421 		regmatch_t *regs, re_node_set *eps_via_nodes)
1422 {
1423   Idx num = --fs->num;
1424   assert (REG_VALID_INDEX (num));
1425   *pidx = fs->stack[num].idx;
1426   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1427   re_node_set_free (eps_via_nodes);
1428   re_free (fs->stack[num].regs);
1429   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1430   return fs->stack[num].node;
1431 }
1432 
1433 /* Set the positions where the subexpressions are starts/ends to registers
1434    PMATCH.
1435    Note: We assume that pmatch[0] is already set, and
1436    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1437 
1438 static reg_errcode_t
1439 internal_function __attribute_warn_unused_result__
1440 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1441 	  regmatch_t *pmatch, bool fl_backtrack)
1442 {
1443   const re_dfa_t *dfa = preg->buffer;
1444   Idx idx, cur_node;
1445   re_node_set eps_via_nodes;
1446   struct re_fail_stack_t *fs;
1447   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1448   regmatch_t *prev_idx_match;
1449   bool prev_idx_match_malloced = false;
1450 
1451 #ifdef DEBUG
1452   assert (nmatch > 1);
1453   assert (mctx->state_log != NULL);
1454 #endif
1455   if (fl_backtrack)
1456     {
1457       fs = &fs_body;
1458       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1459       if (fs->stack == NULL)
1460 	return REG_ESPACE;
1461     }
1462   else
1463     fs = NULL;
1464 
1465   cur_node = dfa->init_node;
1466   re_node_set_init_empty (&eps_via_nodes);
1467 
1468   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1469     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1470   else
1471     {
1472       prev_idx_match = re_malloc (regmatch_t, nmatch);
1473       if (prev_idx_match == NULL)
1474 	{
1475 	  free_fail_stack_return (fs);
1476 	  return REG_ESPACE;
1477 	}
1478       prev_idx_match_malloced = true;
1479     }
1480   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1481 
1482   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1483     {
1484       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1485 
1486       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1487 	{
1488 	  Idx reg_idx;
1489 	  if (fs)
1490 	    {
1491 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1492 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1493 		  break;
1494 	      if (reg_idx == nmatch)
1495 		{
1496 		  re_node_set_free (&eps_via_nodes);
1497 		  if (prev_idx_match_malloced)
1498 		    re_free (prev_idx_match);
1499 		  return free_fail_stack_return (fs);
1500 		}
1501 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1502 					 &eps_via_nodes);
1503 	    }
1504 	  else
1505 	    {
1506 	      re_node_set_free (&eps_via_nodes);
1507 	      if (prev_idx_match_malloced)
1508 		re_free (prev_idx_match);
1509 	      return REG_NOERROR;
1510 	    }
1511 	}
1512 
1513       /* Proceed to next node.  */
1514       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1515 				    &eps_via_nodes, fs);
1516 
1517       if (BE (! REG_VALID_INDEX (cur_node), 0))
1518 	{
1519 	  if (BE (cur_node == REG_ERROR, 0))
1520 	    {
1521 	      re_node_set_free (&eps_via_nodes);
1522 	      if (prev_idx_match_malloced)
1523 		re_free (prev_idx_match);
1524 	      free_fail_stack_return (fs);
1525 	      return REG_ESPACE;
1526 	    }
1527 	  if (fs)
1528 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1529 				       &eps_via_nodes);
1530 	  else
1531 	    {
1532 	      re_node_set_free (&eps_via_nodes);
1533 	      if (prev_idx_match_malloced)
1534 		re_free (prev_idx_match);
1535 	      return REG_NOMATCH;
1536 	    }
1537 	}
1538     }
1539   re_node_set_free (&eps_via_nodes);
1540   if (prev_idx_match_malloced)
1541     re_free (prev_idx_match);
1542   return free_fail_stack_return (fs);
1543 }
1544 
1545 static reg_errcode_t
1546 internal_function
1547 free_fail_stack_return (struct re_fail_stack_t *fs)
1548 {
1549   if (fs)
1550     {
1551       Idx fs_idx;
1552       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1553 	{
1554 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1555 	  re_free (fs->stack[fs_idx].regs);
1556 	}
1557       re_free (fs->stack);
1558     }
1559   return REG_NOERROR;
1560 }
1561 
1562 static void
1563 internal_function
1564 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1565 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1566 {
1567   int type = dfa->nodes[cur_node].type;
1568   if (type == OP_OPEN_SUBEXP)
1569     {
1570       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1571 
1572       /* We are at the first node of this sub expression.  */
1573       if (reg_num < nmatch)
1574 	{
1575 	  pmatch[reg_num].rm_so = cur_idx;
1576 	  pmatch[reg_num].rm_eo = -1;
1577 	}
1578     }
1579   else if (type == OP_CLOSE_SUBEXP)
1580     {
1581       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1582       if (reg_num < nmatch)
1583 	{
1584 	  /* We are at the last node of this sub expression.  */
1585 	  if (pmatch[reg_num].rm_so < cur_idx)
1586 	    {
1587 	      pmatch[reg_num].rm_eo = cur_idx;
1588 	      /* This is a non-empty match or we are not inside an optional
1589 		 subexpression.  Accept this right away.  */
1590 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1591 	    }
1592 	  else
1593 	    {
1594 	      if (dfa->nodes[cur_node].opt_subexp
1595 		  && prev_idx_match[reg_num].rm_so != -1)
1596 		/* We transited through an empty match for an optional
1597 		   subexpression, like (a?)*, and this is not the subexp's
1598 		   first match.  Copy back the old content of the registers
1599 		   so that matches of an inner subexpression are undone as
1600 		   well, like in ((a?))*.  */
1601 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1602 	      else
1603 		/* We completed a subexpression, but it may be part of
1604 		   an optional one, so do not update PREV_IDX_MATCH.  */
1605 		pmatch[reg_num].rm_eo = cur_idx;
1606 	    }
1607 	}
1608     }
1609 }
1610 
1611 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1612    and sift the nodes in each states according to the following rules.
1613    Updated state_log will be wrote to STATE_LOG.
1614 
1615    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1616      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1617 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1618 	the LAST_NODE, we throw away the node 'a'.
1619      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1620 	string 's' and transit to 'b':
1621 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1622 	   away the node 'a'.
1623 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1624 	    thrown away, we throw away the node 'a'.
1625      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1626 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1627 	   node 'a'.
1628 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1629 	    we throw away the node 'a'.  */
1630 
1631 #define STATE_NODE_CONTAINS(state,node) \
1632   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1633 
1634 static reg_errcode_t
1635 internal_function
1636 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1637 {
1638   reg_errcode_t err;
1639   int null_cnt = 0;
1640   Idx str_idx = sctx->last_str_idx;
1641   re_node_set cur_dest;
1642 
1643 #ifdef DEBUG
1644   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1645 #endif
1646 
1647   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1648      transit to the last_node and the last_node itself.  */
1649   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1650   if (BE (err != REG_NOERROR, 0))
1651     return err;
1652   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1653   if (BE (err != REG_NOERROR, 0))
1654     goto free_return;
1655 
1656   /* Then check each states in the state_log.  */
1657   while (str_idx > 0)
1658     {
1659       /* Update counters.  */
1660       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1661       if (null_cnt > mctx->max_mb_elem_len)
1662 	{
1663 	  memset (sctx->sifted_states, '\0',
1664 		  sizeof (re_dfastate_t *) * str_idx);
1665 	  re_node_set_free (&cur_dest);
1666 	  return REG_NOERROR;
1667 	}
1668       re_node_set_empty (&cur_dest);
1669       --str_idx;
1670 
1671       if (mctx->state_log[str_idx])
1672 	{
1673 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1674 	  if (BE (err != REG_NOERROR, 0))
1675 	    goto free_return;
1676 	}
1677 
1678       /* Add all the nodes which satisfy the following conditions:
1679 	 - It can epsilon transit to a node in CUR_DEST.
1680 	 - It is in CUR_SRC.
1681 	 And update state_log.  */
1682       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1683       if (BE (err != REG_NOERROR, 0))
1684 	goto free_return;
1685     }
1686   err = REG_NOERROR;
1687  free_return:
1688   re_node_set_free (&cur_dest);
1689   return err;
1690 }
1691 
1692 static reg_errcode_t
1693 internal_function __attribute_warn_unused_result__
1694 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1695 		     Idx str_idx, re_node_set *cur_dest)
1696 {
1697   const re_dfa_t *const dfa = mctx->dfa;
1698   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1699   Idx i;
1700 
1701   /* Then build the next sifted state.
1702      We build the next sifted state on 'cur_dest', and update
1703      'sifted_states[str_idx]' with 'cur_dest'.
1704      Note:
1705      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1706      'cur_src' points the node_set of the old 'state_log[str_idx]'
1707      (with the epsilon nodes pre-filtered out).  */
1708   for (i = 0; i < cur_src->nelem; i++)
1709     {
1710       Idx prev_node = cur_src->elems[i];
1711       int naccepted = 0;
1712       bool ok;
1713 
1714 #ifdef DEBUG
1715       re_token_type_t type = dfa->nodes[prev_node].type;
1716       assert (!IS_EPSILON_NODE (type));
1717 #endif
1718 #ifdef RE_ENABLE_I18N
1719       /* If the node may accept "multi byte".  */
1720       if (dfa->nodes[prev_node].accept_mb)
1721 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1722 					 str_idx, sctx->last_str_idx);
1723 #endif /* RE_ENABLE_I18N */
1724 
1725       /* We don't check backreferences here.
1726 	 See update_cur_sifted_state().  */
1727       if (!naccepted
1728 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1729 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1730 				  dfa->nexts[prev_node]))
1731 	naccepted = 1;
1732 
1733       if (naccepted == 0)
1734 	continue;
1735 
1736       if (sctx->limits.nelem)
1737 	{
1738 	  Idx to_idx = str_idx + naccepted;
1739 	  if (check_dst_limits (mctx, &sctx->limits,
1740 				dfa->nexts[prev_node], to_idx,
1741 				prev_node, str_idx))
1742 	    continue;
1743 	}
1744       ok = re_node_set_insert (cur_dest, prev_node);
1745       if (BE (! ok, 0))
1746 	return REG_ESPACE;
1747     }
1748 
1749   return REG_NOERROR;
1750 }
1751 
1752 /* Helper functions.  */
1753 
1754 static reg_errcode_t
1755 internal_function
1756 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1757 {
1758   Idx top = mctx->state_log_top;
1759 
1760   if ((next_state_log_idx >= mctx->input.bufs_len
1761        && mctx->input.bufs_len < mctx->input.len)
1762       || (next_state_log_idx >= mctx->input.valid_len
1763 	  && mctx->input.valid_len < mctx->input.len))
1764     {
1765       reg_errcode_t err;
1766       err = extend_buffers (mctx);
1767       if (BE (err != REG_NOERROR, 0))
1768 	return err;
1769     }
1770 
1771   if (top < next_state_log_idx)
1772     {
1773       memset (mctx->state_log + top + 1, '\0',
1774 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1775       mctx->state_log_top = next_state_log_idx;
1776     }
1777   return REG_NOERROR;
1778 }
1779 
1780 static reg_errcode_t
1781 internal_function
1782 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1783 		   re_dfastate_t **src, Idx num)
1784 {
1785   Idx st_idx;
1786   reg_errcode_t err;
1787   for (st_idx = 0; st_idx < num; ++st_idx)
1788     {
1789       if (dst[st_idx] == NULL)
1790 	dst[st_idx] = src[st_idx];
1791       else if (src[st_idx] != NULL)
1792 	{
1793 	  re_node_set merged_set;
1794 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1795 					&src[st_idx]->nodes);
1796 	  if (BE (err != REG_NOERROR, 0))
1797 	    return err;
1798 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1799 	  re_node_set_free (&merged_set);
1800 	  if (BE (err != REG_NOERROR, 0))
1801 	    return err;
1802 	}
1803     }
1804   return REG_NOERROR;
1805 }
1806 
1807 static reg_errcode_t
1808 internal_function
1809 update_cur_sifted_state (const re_match_context_t *mctx,
1810 			 re_sift_context_t *sctx, Idx str_idx,
1811 			 re_node_set *dest_nodes)
1812 {
1813   const re_dfa_t *const dfa = mctx->dfa;
1814   reg_errcode_t err = REG_NOERROR;
1815   const re_node_set *candidates;
1816   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1817 		: &mctx->state_log[str_idx]->nodes);
1818 
1819   if (dest_nodes->nelem == 0)
1820     sctx->sifted_states[str_idx] = NULL;
1821   else
1822     {
1823       if (candidates)
1824 	{
1825 	  /* At first, add the nodes which can epsilon transit to a node in
1826 	     DEST_NODE.  */
1827 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1828 	  if (BE (err != REG_NOERROR, 0))
1829 	    return err;
1830 
1831 	  /* Then, check the limitations in the current sift_context.  */
1832 	  if (sctx->limits.nelem)
1833 	    {
1834 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1835 					 mctx->bkref_ents, str_idx);
1836 	      if (BE (err != REG_NOERROR, 0))
1837 		return err;
1838 	    }
1839 	}
1840 
1841       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1842       if (BE (err != REG_NOERROR, 0))
1843 	return err;
1844     }
1845 
1846   if (candidates && mctx->state_log[str_idx]->has_backref)
1847     {
1848       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1849       if (BE (err != REG_NOERROR, 0))
1850 	return err;
1851     }
1852   return REG_NOERROR;
1853 }
1854 
1855 static reg_errcode_t
1856 internal_function __attribute_warn_unused_result__
1857 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1858 		       const re_node_set *candidates)
1859 {
1860   reg_errcode_t err = REG_NOERROR;
1861   Idx i;
1862 
1863   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1864   if (BE (err != REG_NOERROR, 0))
1865     return err;
1866 
1867   if (!state->inveclosure.alloc)
1868     {
1869       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1870       if (BE (err != REG_NOERROR, 0))
1871 	return REG_ESPACE;
1872       for (i = 0; i < dest_nodes->nelem; i++)
1873 	{
1874 	  err = re_node_set_merge (&state->inveclosure,
1875 				   dfa->inveclosures + dest_nodes->elems[i]);
1876 	  if (BE (err != REG_NOERROR, 0))
1877 	    return REG_ESPACE;
1878 	}
1879     }
1880   return re_node_set_add_intersect (dest_nodes, candidates,
1881 				    &state->inveclosure);
1882 }
1883 
1884 static reg_errcode_t
1885 internal_function
1886 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1887 		       const re_node_set *candidates)
1888 {
1889     Idx ecl_idx;
1890     reg_errcode_t err;
1891     re_node_set *inv_eclosure = dfa->inveclosures + node;
1892     re_node_set except_nodes;
1893     re_node_set_init_empty (&except_nodes);
1894     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1895       {
1896 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1897 	if (cur_node == node)
1898 	  continue;
1899 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1900 	  {
1901 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1902 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1903 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1904 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1905 		 && re_node_set_contains (dest_nodes, edst1))
1906 		|| (REG_VALID_NONZERO_INDEX (edst2)
1907 		    && !re_node_set_contains (inv_eclosure, edst2)
1908 		    && re_node_set_contains (dest_nodes, edst2)))
1909 	      {
1910 		err = re_node_set_add_intersect (&except_nodes, candidates,
1911 						 dfa->inveclosures + cur_node);
1912 		if (BE (err != REG_NOERROR, 0))
1913 		  {
1914 		    re_node_set_free (&except_nodes);
1915 		    return err;
1916 		  }
1917 	      }
1918 	  }
1919       }
1920     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1921       {
1922 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1923 	if (!re_node_set_contains (&except_nodes, cur_node))
1924 	  {
1925 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1926 	    re_node_set_remove_at (dest_nodes, idx);
1927 	  }
1928       }
1929     re_node_set_free (&except_nodes);
1930     return REG_NOERROR;
1931 }
1932 
1933 static bool
1934 internal_function
1935 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1936 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1937 {
1938   const re_dfa_t *const dfa = mctx->dfa;
1939   Idx lim_idx, src_pos, dst_pos;
1940 
1941   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1942   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1943   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1944     {
1945       Idx subexp_idx;
1946       struct re_backref_cache_entry *ent;
1947       ent = mctx->bkref_ents + limits->elems[lim_idx];
1948       subexp_idx = dfa->nodes[ent->node].opr.idx;
1949 
1950       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1951 					   subexp_idx, dst_node, dst_idx,
1952 					   dst_bkref_idx);
1953       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1954 					   subexp_idx, src_node, src_idx,
1955 					   src_bkref_idx);
1956 
1957       /* In case of:
1958 	 <src> <dst> ( <subexp> )
1959 	 ( <subexp> ) <src> <dst>
1960 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1961       if (src_pos == dst_pos)
1962 	continue; /* This is unrelated limitation.  */
1963       else
1964 	return true;
1965     }
1966   return false;
1967 }
1968 
1969 static int
1970 internal_function
1971 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1972 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1973 {
1974   const re_dfa_t *const dfa = mctx->dfa;
1975   const re_node_set *eclosures = dfa->eclosures + from_node;
1976   Idx node_idx;
1977 
1978   /* Else, we are on the boundary: examine the nodes on the epsilon
1979      closure.  */
1980   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1981     {
1982       Idx node = eclosures->elems[node_idx];
1983       switch (dfa->nodes[node].type)
1984 	{
1985 	case OP_BACK_REF:
1986 	  if (bkref_idx != REG_MISSING)
1987 	    {
1988 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1989 	      do
1990 		{
1991 		  Idx dst;
1992 		  int cpos;
1993 
1994 		  if (ent->node != node)
1995 		    continue;
1996 
1997 		  if (subexp_idx < BITSET_WORD_BITS
1998 		      && !(ent->eps_reachable_subexps_map
1999 			   & ((bitset_word_t) 1 << subexp_idx)))
2000 		    continue;
2001 
2002 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
2003 		     OP_CLOSE_SUBEXP cases below.  But, if the
2004 		     destination node is the same node as the source
2005 		     node, don't recurse because it would cause an
2006 		     infinite loop: a regex that exhibits this behavior
2007 		     is ()\1*\1*  */
2008 		  dst = dfa->edests[node].elems[0];
2009 		  if (dst == from_node)
2010 		    {
2011 		      if (boundaries & 1)
2012 			return -1;
2013 		      else /* if (boundaries & 2) */
2014 			return 0;
2015 		    }
2016 
2017 		  cpos =
2018 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2019 						 dst, bkref_idx);
2020 		  if (cpos == -1 /* && (boundaries & 1) */)
2021 		    return -1;
2022 		  if (cpos == 0 && (boundaries & 2))
2023 		    return 0;
2024 
2025 		  if (subexp_idx < BITSET_WORD_BITS)
2026 		    ent->eps_reachable_subexps_map
2027 		      &= ~((bitset_word_t) 1 << subexp_idx);
2028 		}
2029 	      while (ent++->more);
2030 	    }
2031 	  break;
2032 
2033 	case OP_OPEN_SUBEXP:
2034 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2035 	    return -1;
2036 	  break;
2037 
2038 	case OP_CLOSE_SUBEXP:
2039 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2040 	    return 0;
2041 	  break;
2042 
2043 	default:
2044 	    break;
2045 	}
2046     }
2047 
2048   return (boundaries & 2) ? 1 : 0;
2049 }
2050 
2051 static int
2052 internal_function
2053 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2054 			   Idx subexp_idx, Idx from_node, Idx str_idx,
2055 			   Idx bkref_idx)
2056 {
2057   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2058   int boundaries;
2059 
2060   /* If we are outside the range of the subexpression, return -1 or 1.  */
2061   if (str_idx < lim->subexp_from)
2062     return -1;
2063 
2064   if (lim->subexp_to < str_idx)
2065     return 1;
2066 
2067   /* If we are within the subexpression, return 0.  */
2068   boundaries = (str_idx == lim->subexp_from);
2069   boundaries |= (str_idx == lim->subexp_to) << 1;
2070   if (boundaries == 0)
2071     return 0;
2072 
2073   /* Else, examine epsilon closure.  */
2074   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2075 				      from_node, bkref_idx);
2076 }
2077 
2078 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2079    which are against limitations from DEST_NODES. */
2080 
2081 static reg_errcode_t
2082 internal_function
2083 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2084 		     const re_node_set *candidates, re_node_set *limits,
2085 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2086 {
2087   reg_errcode_t err;
2088   Idx node_idx, lim_idx;
2089 
2090   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2091     {
2092       Idx subexp_idx;
2093       struct re_backref_cache_entry *ent;
2094       ent = bkref_ents + limits->elems[lim_idx];
2095 
2096       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2097 	continue; /* This is unrelated limitation.  */
2098 
2099       subexp_idx = dfa->nodes[ent->node].opr.idx;
2100       if (ent->subexp_to == str_idx)
2101 	{
2102 	  Idx ops_node = REG_MISSING;
2103 	  Idx cls_node = REG_MISSING;
2104 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2105 	    {
2106 	      Idx node = dest_nodes->elems[node_idx];
2107 	      re_token_type_t type = dfa->nodes[node].type;
2108 	      if (type == OP_OPEN_SUBEXP
2109 		  && subexp_idx == dfa->nodes[node].opr.idx)
2110 		ops_node = node;
2111 	      else if (type == OP_CLOSE_SUBEXP
2112 		       && subexp_idx == dfa->nodes[node].opr.idx)
2113 		cls_node = node;
2114 	    }
2115 
2116 	  /* Check the limitation of the open subexpression.  */
2117 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2118 	  if (REG_VALID_INDEX (ops_node))
2119 	    {
2120 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2121 					   candidates);
2122 	      if (BE (err != REG_NOERROR, 0))
2123 		return err;
2124 	    }
2125 
2126 	  /* Check the limitation of the close subexpression.  */
2127 	  if (REG_VALID_INDEX (cls_node))
2128 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2129 	      {
2130 		Idx node = dest_nodes->elems[node_idx];
2131 		if (!re_node_set_contains (dfa->inveclosures + node,
2132 					   cls_node)
2133 		    && !re_node_set_contains (dfa->eclosures + node,
2134 					      cls_node))
2135 		  {
2136 		    /* It is against this limitation.
2137 		       Remove it form the current sifted state.  */
2138 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2139 						 candidates);
2140 		    if (BE (err != REG_NOERROR, 0))
2141 		      return err;
2142 		    --node_idx;
2143 		  }
2144 	      }
2145 	}
2146       else /* (ent->subexp_to != str_idx)  */
2147 	{
2148 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2149 	    {
2150 	      Idx node = dest_nodes->elems[node_idx];
2151 	      re_token_type_t type = dfa->nodes[node].type;
2152 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2153 		{
2154 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2155 		    continue;
2156 		  /* It is against this limitation.
2157 		     Remove it form the current sifted state.  */
2158 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2159 					       candidates);
2160 		  if (BE (err != REG_NOERROR, 0))
2161 		    return err;
2162 		}
2163 	    }
2164 	}
2165     }
2166   return REG_NOERROR;
2167 }
2168 
2169 static reg_errcode_t
2170 internal_function __attribute_warn_unused_result__
2171 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2172 		   Idx str_idx, const re_node_set *candidates)
2173 {
2174   const re_dfa_t *const dfa = mctx->dfa;
2175   reg_errcode_t err;
2176   Idx node_idx, node;
2177   re_sift_context_t local_sctx;
2178   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2179 
2180   if (first_idx == REG_MISSING)
2181     return REG_NOERROR;
2182 
2183   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2184 
2185   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2186     {
2187       Idx enabled_idx;
2188       re_token_type_t type;
2189       struct re_backref_cache_entry *entry;
2190       node = candidates->elems[node_idx];
2191       type = dfa->nodes[node].type;
2192       /* Avoid infinite loop for the REs like "()\1+".  */
2193       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2194 	continue;
2195       if (type != OP_BACK_REF)
2196 	continue;
2197 
2198       entry = mctx->bkref_ents + first_idx;
2199       enabled_idx = first_idx;
2200       do
2201 	{
2202 	  Idx subexp_len;
2203 	  Idx to_idx;
2204 	  Idx dst_node;
2205 	  bool ok;
2206 	  re_dfastate_t *cur_state;
2207 
2208 	  if (entry->node != node)
2209 	    continue;
2210 	  subexp_len = entry->subexp_to - entry->subexp_from;
2211 	  to_idx = str_idx + subexp_len;
2212 	  dst_node = (subexp_len ? dfa->nexts[node]
2213 		      : dfa->edests[node].elems[0]);
2214 
2215 	  if (to_idx > sctx->last_str_idx
2216 	      || sctx->sifted_states[to_idx] == NULL
2217 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2218 	      || check_dst_limits (mctx, &sctx->limits, node,
2219 				   str_idx, dst_node, to_idx))
2220 	    continue;
2221 
2222 	  if (local_sctx.sifted_states == NULL)
2223 	    {
2224 	      local_sctx = *sctx;
2225 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2226 	      if (BE (err != REG_NOERROR, 0))
2227 		goto free_return;
2228 	    }
2229 	  local_sctx.last_node = node;
2230 	  local_sctx.last_str_idx = str_idx;
2231 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2232 	  if (BE (! ok, 0))
2233 	    {
2234 	      err = REG_ESPACE;
2235 	      goto free_return;
2236 	    }
2237 	  cur_state = local_sctx.sifted_states[str_idx];
2238 	  err = sift_states_backward (mctx, &local_sctx);
2239 	  if (BE (err != REG_NOERROR, 0))
2240 	    goto free_return;
2241 	  if (sctx->limited_states != NULL)
2242 	    {
2243 	      err = merge_state_array (dfa, sctx->limited_states,
2244 				       local_sctx.sifted_states,
2245 				       str_idx + 1);
2246 	      if (BE (err != REG_NOERROR, 0))
2247 		goto free_return;
2248 	    }
2249 	  local_sctx.sifted_states[str_idx] = cur_state;
2250 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2251 
2252 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2253 	  entry = mctx->bkref_ents + enabled_idx;
2254 	}
2255       while (enabled_idx++, entry++->more);
2256     }
2257   err = REG_NOERROR;
2258  free_return:
2259   if (local_sctx.sifted_states != NULL)
2260     {
2261       re_node_set_free (&local_sctx.limits);
2262     }
2263 
2264   return err;
2265 }
2266 
2267 
2268 #ifdef RE_ENABLE_I18N
2269 static int
2270 internal_function
2271 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2272 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2273 {
2274   const re_dfa_t *const dfa = mctx->dfa;
2275   int naccepted;
2276   /* Check the node can accept "multi byte".  */
2277   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2278   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2279       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2280 			    dfa->nexts[node_idx]))
2281     /* The node can't accept the "multi byte", or the
2282        destination was already thrown away, then the node
2283        could't accept the current input "multi byte".   */
2284     naccepted = 0;
2285   /* Otherwise, it is sure that the node could accept
2286      'naccepted' bytes input.  */
2287   return naccepted;
2288 }
2289 #endif /* RE_ENABLE_I18N */
2290 
2291 
2292 /* Functions for state transition.  */
2293 
2294 /* Return the next state to which the current state STATE will transit by
2295    accepting the current input byte, and update STATE_LOG if necessary.
2296    If STATE can accept a multibyte char/collating element/back reference
2297    update the destination of STATE_LOG.  */
2298 
2299 static re_dfastate_t *
2300 internal_function __attribute_warn_unused_result__
2301 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2302 	       re_dfastate_t *state)
2303 {
2304   re_dfastate_t **trtable;
2305   unsigned char ch;
2306 
2307 #ifdef RE_ENABLE_I18N
2308   /* If the current state can accept multibyte.  */
2309   if (BE (state->accept_mb, 0))
2310     {
2311       *err = transit_state_mb (mctx, state);
2312       if (BE (*err != REG_NOERROR, 0))
2313 	return NULL;
2314     }
2315 #endif /* RE_ENABLE_I18N */
2316 
2317   /* Then decide the next state with the single byte.  */
2318 #if 0
2319   if (0)
2320     /* don't use transition table  */
2321     return transit_state_sb (err, mctx, state);
2322 #endif
2323 
2324   /* Use transition table  */
2325   ch = re_string_fetch_byte (&mctx->input);
2326   for (;;)
2327     {
2328       trtable = state->trtable;
2329       if (BE (trtable != NULL, 1))
2330 	return trtable[ch];
2331 
2332       trtable = state->word_trtable;
2333       if (BE (trtable != NULL, 1))
2334 	{
2335 	  unsigned int context;
2336 	  context
2337 	    = re_string_context_at (&mctx->input,
2338 				    re_string_cur_idx (&mctx->input) - 1,
2339 				    mctx->eflags);
2340 	  if (IS_WORD_CONTEXT (context))
2341 	    return trtable[ch + SBC_MAX];
2342 	  else
2343 	    return trtable[ch];
2344 	}
2345 
2346       if (!build_trtable (mctx->dfa, state))
2347 	{
2348 	  *err = REG_ESPACE;
2349 	  return NULL;
2350 	}
2351 
2352       /* Retry, we now have a transition table.  */
2353     }
2354 }
2355 
2356 /* Update the state_log if we need */
2357 static re_dfastate_t *
2358 internal_function
2359 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2360 		      re_dfastate_t *next_state)
2361 {
2362   const re_dfa_t *const dfa = mctx->dfa;
2363   Idx cur_idx = re_string_cur_idx (&mctx->input);
2364 
2365   if (cur_idx > mctx->state_log_top)
2366     {
2367       mctx->state_log[cur_idx] = next_state;
2368       mctx->state_log_top = cur_idx;
2369     }
2370   else if (mctx->state_log[cur_idx] == 0)
2371     {
2372       mctx->state_log[cur_idx] = next_state;
2373     }
2374   else
2375     {
2376       re_dfastate_t *pstate;
2377       unsigned int context;
2378       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2379       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2380 	 the destination of a multibyte char/collating element/
2381 	 back reference.  Then the next state is the union set of
2382 	 these destinations and the results of the transition table.  */
2383       pstate = mctx->state_log[cur_idx];
2384       log_nodes = pstate->entrance_nodes;
2385       if (next_state != NULL)
2386 	{
2387 	  table_nodes = next_state->entrance_nodes;
2388 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2389 					     log_nodes);
2390 	  if (BE (*err != REG_NOERROR, 0))
2391 	    return NULL;
2392 	}
2393       else
2394 	next_nodes = *log_nodes;
2395       /* Note: We already add the nodes of the initial state,
2396 	 then we don't need to add them here.  */
2397 
2398       context = re_string_context_at (&mctx->input,
2399 				      re_string_cur_idx (&mctx->input) - 1,
2400 				      mctx->eflags);
2401       next_state = mctx->state_log[cur_idx]
2402 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2403       /* We don't need to check errors here, since the return value of
2404 	 this function is next_state and ERR is already set.  */
2405 
2406       if (table_nodes != NULL)
2407 	re_node_set_free (&next_nodes);
2408     }
2409 
2410   if (BE (dfa->nbackref, 0) && next_state != NULL)
2411     {
2412       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2413 	 later.  We must check them here, since the back references in the
2414 	 next state might use them.  */
2415       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2416 					cur_idx);
2417       if (BE (*err != REG_NOERROR, 0))
2418 	return NULL;
2419 
2420       /* If the next state has back references.  */
2421       if (next_state->has_backref)
2422 	{
2423 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2424 	  if (BE (*err != REG_NOERROR, 0))
2425 	    return NULL;
2426 	  next_state = mctx->state_log[cur_idx];
2427 	}
2428     }
2429 
2430   return next_state;
2431 }
2432 
2433 /* Skip bytes in the input that correspond to part of a
2434    multi-byte match, then look in the log for a state
2435    from which to restart matching.  */
2436 static re_dfastate_t *
2437 internal_function
2438 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2439 {
2440   re_dfastate_t *cur_state;
2441   do
2442     {
2443       Idx max = mctx->state_log_top;
2444       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2445 
2446       do
2447 	{
2448 	  if (++cur_str_idx > max)
2449 	    return NULL;
2450 	  re_string_skip_bytes (&mctx->input, 1);
2451 	}
2452       while (mctx->state_log[cur_str_idx] == NULL);
2453 
2454       cur_state = merge_state_with_log (err, mctx, NULL);
2455     }
2456   while (*err == REG_NOERROR && cur_state == NULL);
2457   return cur_state;
2458 }
2459 
2460 /* Helper functions for transit_state.  */
2461 
2462 /* From the node set CUR_NODES, pick up the nodes whose types are
2463    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2464    expression. And register them to use them later for evaluating the
2465    corresponding back references.  */
2466 
2467 static reg_errcode_t
2468 internal_function
2469 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2470 			   Idx str_idx)
2471 {
2472   const re_dfa_t *const dfa = mctx->dfa;
2473   Idx node_idx;
2474   reg_errcode_t err;
2475 
2476   /* TODO: This isn't efficient.
2477 	   Because there might be more than one nodes whose types are
2478 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2479 	   nodes.
2480 	   E.g. RE: (a){2}  */
2481   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2482     {
2483       Idx node = cur_nodes->elems[node_idx];
2484       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2485 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2486 	  && (dfa->used_bkref_map
2487 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2488 	{
2489 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2490 	  if (BE (err != REG_NOERROR, 0))
2491 	    return err;
2492 	}
2493     }
2494   return REG_NOERROR;
2495 }
2496 
2497 #if 0
2498 /* Return the next state to which the current state STATE will transit by
2499    accepting the current input byte.  */
2500 
2501 static re_dfastate_t *
2502 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2503 		  re_dfastate_t *state)
2504 {
2505   const re_dfa_t *const dfa = mctx->dfa;
2506   re_node_set next_nodes;
2507   re_dfastate_t *next_state;
2508   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2509   unsigned int context;
2510 
2511   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2512   if (BE (*err != REG_NOERROR, 0))
2513     return NULL;
2514   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2515     {
2516       Idx cur_node = state->nodes.elems[node_cnt];
2517       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2518 	{
2519 	  *err = re_node_set_merge (&next_nodes,
2520 				    dfa->eclosures + dfa->nexts[cur_node]);
2521 	  if (BE (*err != REG_NOERROR, 0))
2522 	    {
2523 	      re_node_set_free (&next_nodes);
2524 	      return NULL;
2525 	    }
2526 	}
2527     }
2528   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2529   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2530   /* We don't need to check errors here, since the return value of
2531      this function is next_state and ERR is already set.  */
2532 
2533   re_node_set_free (&next_nodes);
2534   re_string_skip_bytes (&mctx->input, 1);
2535   return next_state;
2536 }
2537 #endif
2538 
2539 #ifdef RE_ENABLE_I18N
2540 static reg_errcode_t
2541 internal_function
2542 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2543 {
2544   const re_dfa_t *const dfa = mctx->dfa;
2545   reg_errcode_t err;
2546   Idx i;
2547 
2548   for (i = 0; i < pstate->nodes.nelem; ++i)
2549     {
2550       re_node_set dest_nodes, *new_nodes;
2551       Idx cur_node_idx = pstate->nodes.elems[i];
2552       int naccepted;
2553       Idx dest_idx;
2554       unsigned int context;
2555       re_dfastate_t *dest_state;
2556 
2557       if (!dfa->nodes[cur_node_idx].accept_mb)
2558 	continue;
2559 
2560       if (dfa->nodes[cur_node_idx].constraint)
2561 	{
2562 	  context = re_string_context_at (&mctx->input,
2563 					  re_string_cur_idx (&mctx->input),
2564 					  mctx->eflags);
2565 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2566 					   context))
2567 	    continue;
2568 	}
2569 
2570       /* How many bytes the node can accept?  */
2571       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2572 					   re_string_cur_idx (&mctx->input));
2573       if (naccepted == 0)
2574 	continue;
2575 
2576       /* The node can accepts 'naccepted' bytes.  */
2577       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2578       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2579 			       : mctx->max_mb_elem_len);
2580       err = clean_state_log_if_needed (mctx, dest_idx);
2581       if (BE (err != REG_NOERROR, 0))
2582 	return err;
2583 #ifdef DEBUG
2584       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2585 #endif
2586       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2587 
2588       dest_state = mctx->state_log[dest_idx];
2589       if (dest_state == NULL)
2590 	dest_nodes = *new_nodes;
2591       else
2592 	{
2593 	  err = re_node_set_init_union (&dest_nodes,
2594 					dest_state->entrance_nodes, new_nodes);
2595 	  if (BE (err != REG_NOERROR, 0))
2596 	    return err;
2597 	}
2598       context = re_string_context_at (&mctx->input, dest_idx - 1,
2599 				      mctx->eflags);
2600       mctx->state_log[dest_idx]
2601 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2602       if (dest_state != NULL)
2603 	re_node_set_free (&dest_nodes);
2604       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2605 	return err;
2606     }
2607   return REG_NOERROR;
2608 }
2609 #endif /* RE_ENABLE_I18N */
2610 
2611 static reg_errcode_t
2612 internal_function
2613 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2614 {
2615   const re_dfa_t *const dfa = mctx->dfa;
2616   reg_errcode_t err;
2617   Idx i;
2618   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2619 
2620   for (i = 0; i < nodes->nelem; ++i)
2621     {
2622       Idx dest_str_idx, prev_nelem, bkc_idx;
2623       Idx node_idx = nodes->elems[i];
2624       unsigned int context;
2625       const re_token_t *node = dfa->nodes + node_idx;
2626       re_node_set *new_dest_nodes;
2627 
2628       /* Check whether 'node' is a backreference or not.  */
2629       if (node->type != OP_BACK_REF)
2630 	continue;
2631 
2632       if (node->constraint)
2633 	{
2634 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2635 					  mctx->eflags);
2636 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2637 	    continue;
2638 	}
2639 
2640       /* 'node' is a backreference.
2641 	 Check the substring which the substring matched.  */
2642       bkc_idx = mctx->nbkref_ents;
2643       err = get_subexp (mctx, node_idx, cur_str_idx);
2644       if (BE (err != REG_NOERROR, 0))
2645 	goto free_return;
2646 
2647       /* And add the epsilon closures (which is 'new_dest_nodes') of
2648 	 the backreference to appropriate state_log.  */
2649 #ifdef DEBUG
2650       assert (dfa->nexts[node_idx] != REG_MISSING);
2651 #endif
2652       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2653 	{
2654 	  Idx subexp_len;
2655 	  re_dfastate_t *dest_state;
2656 	  struct re_backref_cache_entry *bkref_ent;
2657 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2658 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2659 	    continue;
2660 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2661 	  new_dest_nodes = (subexp_len == 0
2662 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2663 			    : dfa->eclosures + dfa->nexts[node_idx]);
2664 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2665 			  - bkref_ent->subexp_from);
2666 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2667 					  mctx->eflags);
2668 	  dest_state = mctx->state_log[dest_str_idx];
2669 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2670 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2671 	  /* Add 'new_dest_node' to state_log.  */
2672 	  if (dest_state == NULL)
2673 	    {
2674 	      mctx->state_log[dest_str_idx]
2675 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2676 					    context);
2677 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2678 		      && err != REG_NOERROR, 0))
2679 		goto free_return;
2680 	    }
2681 	  else
2682 	    {
2683 	      re_node_set dest_nodes;
2684 	      err = re_node_set_init_union (&dest_nodes,
2685 					    dest_state->entrance_nodes,
2686 					    new_dest_nodes);
2687 	      if (BE (err != REG_NOERROR, 0))
2688 		{
2689 		  re_node_set_free (&dest_nodes);
2690 		  goto free_return;
2691 		}
2692 	      mctx->state_log[dest_str_idx]
2693 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2694 	      re_node_set_free (&dest_nodes);
2695 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2696 		      && err != REG_NOERROR, 0))
2697 		goto free_return;
2698 	    }
2699 	  /* We need to check recursively if the backreference can epsilon
2700 	     transit.  */
2701 	  if (subexp_len == 0
2702 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2703 	    {
2704 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2705 					       cur_str_idx);
2706 	      if (BE (err != REG_NOERROR, 0))
2707 		goto free_return;
2708 	      err = transit_state_bkref (mctx, new_dest_nodes);
2709 	      if (BE (err != REG_NOERROR, 0))
2710 		goto free_return;
2711 	    }
2712 	}
2713     }
2714   err = REG_NOERROR;
2715  free_return:
2716   return err;
2717 }
2718 
2719 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2720    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2721    Note that we might collect inappropriate candidates here.
2722    However, the cost of checking them strictly here is too high, then we
2723    delay these checking for prune_impossible_nodes().  */
2724 
2725 static reg_errcode_t
2726 internal_function __attribute_warn_unused_result__
2727 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2728 {
2729   const re_dfa_t *const dfa = mctx->dfa;
2730   Idx subexp_num, sub_top_idx;
2731   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2732   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2733   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2734   if (cache_idx != REG_MISSING)
2735     {
2736       const struct re_backref_cache_entry *entry
2737 	= mctx->bkref_ents + cache_idx;
2738       do
2739 	if (entry->node == bkref_node)
2740 	  return REG_NOERROR; /* We already checked it.  */
2741       while (entry++->more);
2742     }
2743 
2744   subexp_num = dfa->nodes[bkref_node].opr.idx;
2745 
2746   /* For each sub expression  */
2747   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2748     {
2749       reg_errcode_t err;
2750       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2751       re_sub_match_last_t *sub_last;
2752       Idx sub_last_idx, sl_str, bkref_str_off;
2753 
2754       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2755 	continue; /* It isn't related.  */
2756 
2757       sl_str = sub_top->str_idx;
2758       bkref_str_off = bkref_str_idx;
2759       /* At first, check the last node of sub expressions we already
2760 	 evaluated.  */
2761       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2762 	{
2763 	  regoff_t sl_str_diff;
2764 	  sub_last = sub_top->lasts[sub_last_idx];
2765 	  sl_str_diff = sub_last->str_idx - sl_str;
2766 	  /* The matched string by the sub expression match with the substring
2767 	     at the back reference?  */
2768 	  if (sl_str_diff > 0)
2769 	    {
2770 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2771 		{
2772 		  /* Not enough chars for a successful match.  */
2773 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2774 		    break;
2775 
2776 		  err = clean_state_log_if_needed (mctx,
2777 						   bkref_str_off
2778 						   + sl_str_diff);
2779 		  if (BE (err != REG_NOERROR, 0))
2780 		    return err;
2781 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2782 		}
2783 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2784 		/* We don't need to search this sub expression any more.  */
2785 		break;
2786 	    }
2787 	  bkref_str_off += sl_str_diff;
2788 	  sl_str += sl_str_diff;
2789 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2790 				bkref_str_idx);
2791 
2792 	  /* Reload buf, since the preceding call might have reallocated
2793 	     the buffer.  */
2794 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2795 
2796 	  if (err == REG_NOMATCH)
2797 	    continue;
2798 	  if (BE (err != REG_NOERROR, 0))
2799 	    return err;
2800 	}
2801 
2802       if (sub_last_idx < sub_top->nlasts)
2803 	continue;
2804       if (sub_last_idx > 0)
2805 	++sl_str;
2806       /* Then, search for the other last nodes of the sub expression.  */
2807       for (; sl_str <= bkref_str_idx; ++sl_str)
2808 	{
2809 	  Idx cls_node;
2810 	  regoff_t sl_str_off;
2811 	  const re_node_set *nodes;
2812 	  sl_str_off = sl_str - sub_top->str_idx;
2813 	  /* The matched string by the sub expression match with the substring
2814 	     at the back reference?  */
2815 	  if (sl_str_off > 0)
2816 	    {
2817 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2818 		{
2819 		  /* If we are at the end of the input, we cannot match.  */
2820 		  if (bkref_str_off >= mctx->input.len)
2821 		    break;
2822 
2823 		  err = extend_buffers (mctx);
2824 		  if (BE (err != REG_NOERROR, 0))
2825 		    return err;
2826 
2827 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2828 		}
2829 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2830 		break; /* We don't need to search this sub expression
2831 			  any more.  */
2832 	    }
2833 	  if (mctx->state_log[sl_str] == NULL)
2834 	    continue;
2835 	  /* Does this state have a ')' of the sub expression?  */
2836 	  nodes = &mctx->state_log[sl_str]->nodes;
2837 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2838 				       OP_CLOSE_SUBEXP);
2839 	  if (cls_node == REG_MISSING)
2840 	    continue; /* No.  */
2841 	  if (sub_top->path == NULL)
2842 	    {
2843 	      sub_top->path = calloc (sizeof (state_array_t),
2844 				      sl_str - sub_top->str_idx + 1);
2845 	      if (sub_top->path == NULL)
2846 		return REG_ESPACE;
2847 	    }
2848 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2849 	     in the current context?  */
2850 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2851 			       sub_top->str_idx, cls_node, sl_str,
2852 			       OP_CLOSE_SUBEXP);
2853 	  if (err == REG_NOMATCH)
2854 	      continue;
2855 	  if (BE (err != REG_NOERROR, 0))
2856 	      return err;
2857 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2858 	  if (BE (sub_last == NULL, 0))
2859 	    return REG_ESPACE;
2860 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2861 				bkref_str_idx);
2862 	  if (err == REG_NOMATCH)
2863 	    continue;
2864 	}
2865     }
2866   return REG_NOERROR;
2867 }
2868 
2869 /* Helper functions for get_subexp().  */
2870 
2871 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2872    If it can arrive, register the sub expression expressed with SUB_TOP
2873    and SUB_LAST.  */
2874 
2875 static reg_errcode_t
2876 internal_function
2877 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2878 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2879 {
2880   reg_errcode_t err;
2881   Idx to_idx;
2882   /* Can the subexpression arrive the back reference?  */
2883   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2884 		       sub_last->str_idx, bkref_node, bkref_str,
2885 		       OP_OPEN_SUBEXP);
2886   if (err != REG_NOERROR)
2887     return err;
2888   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2889 			     sub_last->str_idx);
2890   if (BE (err != REG_NOERROR, 0))
2891     return err;
2892   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2893   return clean_state_log_if_needed (mctx, to_idx);
2894 }
2895 
2896 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2897    Search '(' if FL_OPEN, or search ')' otherwise.
2898    TODO: This function isn't efficient...
2899 	 Because there might be more than one nodes whose types are
2900 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2901 	 nodes.
2902 	 E.g. RE: (a){2}  */
2903 
2904 static Idx
2905 internal_function
2906 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2907 		  Idx subexp_idx, int type)
2908 {
2909   Idx cls_idx;
2910   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2911     {
2912       Idx cls_node = nodes->elems[cls_idx];
2913       const re_token_t *node = dfa->nodes + cls_node;
2914       if (node->type == type
2915 	  && node->opr.idx == subexp_idx)
2916 	return cls_node;
2917     }
2918   return REG_MISSING;
2919 }
2920 
2921 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2922    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2923    heavily reused.
2924    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2925 
2926 static reg_errcode_t
2927 internal_function __attribute_warn_unused_result__
2928 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2929 	       Idx top_str, Idx last_node, Idx last_str, int type)
2930 {
2931   const re_dfa_t *const dfa = mctx->dfa;
2932   reg_errcode_t err = REG_NOERROR;
2933   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2934   re_dfastate_t *cur_state = NULL;
2935   re_node_set *cur_nodes, next_nodes;
2936   re_dfastate_t **backup_state_log;
2937   unsigned int context;
2938 
2939   subexp_num = dfa->nodes[top_node].opr.idx;
2940   /* Extend the buffer if we need.  */
2941   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2942     {
2943       re_dfastate_t **new_array;
2944       Idx old_alloc = path->alloc;
2945       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2946       Idx new_alloc;
2947       if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2948 	return REG_ESPACE;
2949       new_alloc = old_alloc + incr_alloc;
2950       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2951 	return REG_ESPACE;
2952       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2953       if (BE (new_array == NULL, 0))
2954 	return REG_ESPACE;
2955       path->array = new_array;
2956       path->alloc = new_alloc;
2957       memset (new_array + old_alloc, '\0',
2958 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2959     }
2960 
2961   str_idx = path->next_idx ? path->next_idx : top_str;
2962 
2963   /* Temporary modify MCTX.  */
2964   backup_state_log = mctx->state_log;
2965   backup_cur_idx = mctx->input.cur_idx;
2966   mctx->state_log = path->array;
2967   mctx->input.cur_idx = str_idx;
2968 
2969   /* Setup initial node set.  */
2970   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2971   if (str_idx == top_str)
2972     {
2973       err = re_node_set_init_1 (&next_nodes, top_node);
2974       if (BE (err != REG_NOERROR, 0))
2975 	return err;
2976       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2977       if (BE (err != REG_NOERROR, 0))
2978 	{
2979 	  re_node_set_free (&next_nodes);
2980 	  return err;
2981 	}
2982     }
2983   else
2984     {
2985       cur_state = mctx->state_log[str_idx];
2986       if (cur_state && cur_state->has_backref)
2987 	{
2988 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2989 	  if (BE (err != REG_NOERROR, 0))
2990 	    return err;
2991 	}
2992       else
2993 	re_node_set_init_empty (&next_nodes);
2994     }
2995   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2996     {
2997       if (next_nodes.nelem)
2998 	{
2999 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3000 				    subexp_num, type);
3001 	  if (BE (err != REG_NOERROR, 0))
3002 	    {
3003 	      re_node_set_free (&next_nodes);
3004 	      return err;
3005 	    }
3006 	}
3007       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3008       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3009 	{
3010 	  re_node_set_free (&next_nodes);
3011 	  return err;
3012 	}
3013       mctx->state_log[str_idx] = cur_state;
3014     }
3015 
3016   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3017     {
3018       re_node_set_empty (&next_nodes);
3019       if (mctx->state_log[str_idx + 1])
3020 	{
3021 	  err = re_node_set_merge (&next_nodes,
3022 				   &mctx->state_log[str_idx + 1]->nodes);
3023 	  if (BE (err != REG_NOERROR, 0))
3024 	    {
3025 	      re_node_set_free (&next_nodes);
3026 	      return err;
3027 	    }
3028 	}
3029       if (cur_state)
3030 	{
3031 	  err = check_arrival_add_next_nodes (mctx, str_idx,
3032 					      &cur_state->non_eps_nodes,
3033 					      &next_nodes);
3034 	  if (BE (err != REG_NOERROR, 0))
3035 	    {
3036 	      re_node_set_free (&next_nodes);
3037 	      return err;
3038 	    }
3039 	}
3040       ++str_idx;
3041       if (next_nodes.nelem)
3042 	{
3043 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3044 	  if (BE (err != REG_NOERROR, 0))
3045 	    {
3046 	      re_node_set_free (&next_nodes);
3047 	      return err;
3048 	    }
3049 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3050 				    subexp_num, type);
3051 	  if (BE (err != REG_NOERROR, 0))
3052 	    {
3053 	      re_node_set_free (&next_nodes);
3054 	      return err;
3055 	    }
3056 	}
3057       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3058       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3059       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3060 	{
3061 	  re_node_set_free (&next_nodes);
3062 	  return err;
3063 	}
3064       mctx->state_log[str_idx] = cur_state;
3065       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3066     }
3067   re_node_set_free (&next_nodes);
3068   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3069 	       : &mctx->state_log[last_str]->nodes);
3070   path->next_idx = str_idx;
3071 
3072   /* Fix MCTX.  */
3073   mctx->state_log = backup_state_log;
3074   mctx->input.cur_idx = backup_cur_idx;
3075 
3076   /* Then check the current node set has the node LAST_NODE.  */
3077   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3078     return REG_NOERROR;
3079 
3080   return REG_NOMATCH;
3081 }
3082 
3083 /* Helper functions for check_arrival.  */
3084 
3085 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3086    to NEXT_NODES.
3087    TODO: This function is similar to the functions transit_state*(),
3088 	 however this function has many additional works.
3089 	 Can't we unify them?  */
3090 
3091 static reg_errcode_t
3092 internal_function __attribute_warn_unused_result__
3093 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3094 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3095 {
3096   const re_dfa_t *const dfa = mctx->dfa;
3097   bool ok;
3098   Idx cur_idx;
3099 #ifdef RE_ENABLE_I18N
3100   reg_errcode_t err = REG_NOERROR;
3101 #endif
3102   re_node_set union_set;
3103   re_node_set_init_empty (&union_set);
3104   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3105     {
3106       int naccepted = 0;
3107       Idx cur_node = cur_nodes->elems[cur_idx];
3108 #ifdef DEBUG
3109       re_token_type_t type = dfa->nodes[cur_node].type;
3110       assert (!IS_EPSILON_NODE (type));
3111 #endif
3112 #ifdef RE_ENABLE_I18N
3113       /* If the node may accept "multi byte".  */
3114       if (dfa->nodes[cur_node].accept_mb)
3115 	{
3116 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3117 					       str_idx);
3118 	  if (naccepted > 1)
3119 	    {
3120 	      re_dfastate_t *dest_state;
3121 	      Idx next_node = dfa->nexts[cur_node];
3122 	      Idx next_idx = str_idx + naccepted;
3123 	      dest_state = mctx->state_log[next_idx];
3124 	      re_node_set_empty (&union_set);
3125 	      if (dest_state)
3126 		{
3127 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3128 		  if (BE (err != REG_NOERROR, 0))
3129 		    {
3130 		      re_node_set_free (&union_set);
3131 		      return err;
3132 		    }
3133 		}
3134 	      ok = re_node_set_insert (&union_set, next_node);
3135 	      if (BE (! ok, 0))
3136 		{
3137 		  re_node_set_free (&union_set);
3138 		  return REG_ESPACE;
3139 		}
3140 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3141 							    &union_set);
3142 	      if (BE (mctx->state_log[next_idx] == NULL
3143 		      && err != REG_NOERROR, 0))
3144 		{
3145 		  re_node_set_free (&union_set);
3146 		  return err;
3147 		}
3148 	    }
3149 	}
3150 #endif /* RE_ENABLE_I18N */
3151       if (naccepted
3152 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3153 	{
3154 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3155 	  if (BE (! ok, 0))
3156 	    {
3157 	      re_node_set_free (&union_set);
3158 	      return REG_ESPACE;
3159 	    }
3160 	}
3161     }
3162   re_node_set_free (&union_set);
3163   return REG_NOERROR;
3164 }
3165 
3166 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3167    CUR_NODES, however exclude the nodes which are:
3168     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3169     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3170 */
3171 
3172 static reg_errcode_t
3173 internal_function
3174 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3175 			  Idx ex_subexp, int type)
3176 {
3177   reg_errcode_t err;
3178   Idx idx, outside_node;
3179   re_node_set new_nodes;
3180 #ifdef DEBUG
3181   assert (cur_nodes->nelem);
3182 #endif
3183   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3184   if (BE (err != REG_NOERROR, 0))
3185     return err;
3186   /* Create a new node set NEW_NODES with the nodes which are epsilon
3187      closures of the node in CUR_NODES.  */
3188 
3189   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3190     {
3191       Idx cur_node = cur_nodes->elems[idx];
3192       const re_node_set *eclosure = dfa->eclosures + cur_node;
3193       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3194       if (outside_node == REG_MISSING)
3195 	{
3196 	  /* There are no problematic nodes, just merge them.  */
3197 	  err = re_node_set_merge (&new_nodes, eclosure);
3198 	  if (BE (err != REG_NOERROR, 0))
3199 	    {
3200 	      re_node_set_free (&new_nodes);
3201 	      return err;
3202 	    }
3203 	}
3204       else
3205 	{
3206 	  /* There are problematic nodes, re-calculate incrementally.  */
3207 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3208 					      ex_subexp, type);
3209 	  if (BE (err != REG_NOERROR, 0))
3210 	    {
3211 	      re_node_set_free (&new_nodes);
3212 	      return err;
3213 	    }
3214 	}
3215     }
3216   re_node_set_free (cur_nodes);
3217   *cur_nodes = new_nodes;
3218   return REG_NOERROR;
3219 }
3220 
3221 /* Helper function for check_arrival_expand_ecl.
3222    Check incrementally the epsilon closure of TARGET, and if it isn't
3223    problematic append it to DST_NODES.  */
3224 
3225 static reg_errcode_t
3226 internal_function __attribute_warn_unused_result__
3227 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3228 			      Idx target, Idx ex_subexp, int type)
3229 {
3230   Idx cur_node;
3231   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3232     {
3233       bool ok;
3234 
3235       if (dfa->nodes[cur_node].type == type
3236 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3237 	{
3238 	  if (type == OP_CLOSE_SUBEXP)
3239 	    {
3240 	      ok = re_node_set_insert (dst_nodes, cur_node);
3241 	      if (BE (! ok, 0))
3242 		return REG_ESPACE;
3243 	    }
3244 	  break;
3245 	}
3246       ok = re_node_set_insert (dst_nodes, cur_node);
3247       if (BE (! ok, 0))
3248 	return REG_ESPACE;
3249       if (dfa->edests[cur_node].nelem == 0)
3250 	break;
3251       if (dfa->edests[cur_node].nelem == 2)
3252 	{
3253 	  reg_errcode_t err;
3254 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3255 					      dfa->edests[cur_node].elems[1],
3256 					      ex_subexp, type);
3257 	  if (BE (err != REG_NOERROR, 0))
3258 	    return err;
3259 	}
3260       cur_node = dfa->edests[cur_node].elems[0];
3261     }
3262   return REG_NOERROR;
3263 }
3264 
3265 
3266 /* For all the back references in the current state, calculate the
3267    destination of the back references by the appropriate entry
3268    in MCTX->BKREF_ENTS.  */
3269 
3270 static reg_errcode_t
3271 internal_function __attribute_warn_unused_result__
3272 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3273 		    Idx cur_str, Idx subexp_num, int type)
3274 {
3275   const re_dfa_t *const dfa = mctx->dfa;
3276   reg_errcode_t err;
3277   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3278   struct re_backref_cache_entry *ent;
3279 
3280   if (cache_idx_start == REG_MISSING)
3281     return REG_NOERROR;
3282 
3283  restart:
3284   ent = mctx->bkref_ents + cache_idx_start;
3285   do
3286     {
3287       Idx to_idx, next_node;
3288 
3289       /* Is this entry ENT is appropriate?  */
3290       if (!re_node_set_contains (cur_nodes, ent->node))
3291 	continue; /* No.  */
3292 
3293       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3294       /* Calculate the destination of the back reference, and append it
3295 	 to MCTX->STATE_LOG.  */
3296       if (to_idx == cur_str)
3297 	{
3298 	  /* The backreference did epsilon transit, we must re-check all the
3299 	     node in the current state.  */
3300 	  re_node_set new_dests;
3301 	  reg_errcode_t err2, err3;
3302 	  next_node = dfa->edests[ent->node].elems[0];
3303 	  if (re_node_set_contains (cur_nodes, next_node))
3304 	    continue;
3305 	  err = re_node_set_init_1 (&new_dests, next_node);
3306 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3307 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3308 	  re_node_set_free (&new_dests);
3309 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3310 		  || err3 != REG_NOERROR, 0))
3311 	    {
3312 	      err = (err != REG_NOERROR ? err
3313 		     : (err2 != REG_NOERROR ? err2 : err3));
3314 	      return err;
3315 	    }
3316 	  /* TODO: It is still inefficient...  */
3317 	  goto restart;
3318 	}
3319       else
3320 	{
3321 	  re_node_set union_set;
3322 	  next_node = dfa->nexts[ent->node];
3323 	  if (mctx->state_log[to_idx])
3324 	    {
3325 	      bool ok;
3326 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3327 					next_node))
3328 		continue;
3329 	      err = re_node_set_init_copy (&union_set,
3330 					   &mctx->state_log[to_idx]->nodes);
3331 	      ok = re_node_set_insert (&union_set, next_node);
3332 	      if (BE (err != REG_NOERROR || ! ok, 0))
3333 		{
3334 		  re_node_set_free (&union_set);
3335 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3336 		  return err;
3337 		}
3338 	    }
3339 	  else
3340 	    {
3341 	      err = re_node_set_init_1 (&union_set, next_node);
3342 	      if (BE (err != REG_NOERROR, 0))
3343 		return err;
3344 	    }
3345 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3346 	  re_node_set_free (&union_set);
3347 	  if (BE (mctx->state_log[to_idx] == NULL
3348 		  && err != REG_NOERROR, 0))
3349 	    return err;
3350 	}
3351     }
3352   while (ent++->more);
3353   return REG_NOERROR;
3354 }
3355 
3356 /* Build transition table for the state.
3357    Return true if successful.  */
3358 
3359 static bool
3360 internal_function
3361 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3362 {
3363   reg_errcode_t err;
3364   Idx i, j;
3365   int ch;
3366   bool need_word_trtable = false;
3367   bitset_word_t elem, mask;
3368   bool dests_node_malloced = false;
3369   bool dest_states_malloced = false;
3370   Idx ndests; /* Number of the destination states from 'state'.  */
3371   re_dfastate_t **trtable;
3372   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3373   re_node_set follows, *dests_node;
3374   bitset_t *dests_ch;
3375   bitset_t acceptable;
3376 
3377   struct dests_alloc
3378   {
3379     re_node_set dests_node[SBC_MAX];
3380     bitset_t dests_ch[SBC_MAX];
3381   } *dests_alloc;
3382 
3383   /* We build DFA states which corresponds to the destination nodes
3384      from 'state'.  'dests_node[i]' represents the nodes which i-th
3385      destination state contains, and 'dests_ch[i]' represents the
3386      characters which i-th destination state accepts.  */
3387   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3388     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3389   else
3390     {
3391       dests_alloc = re_malloc (struct dests_alloc, 1);
3392       if (BE (dests_alloc == NULL, 0))
3393 	return false;
3394       dests_node_malloced = true;
3395     }
3396   dests_node = dests_alloc->dests_node;
3397   dests_ch = dests_alloc->dests_ch;
3398 
3399   /* Initialize transition table.  */
3400   state->word_trtable = state->trtable = NULL;
3401 
3402   /* At first, group all nodes belonging to 'state' into several
3403      destinations.  */
3404   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3405   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3406     {
3407       if (dests_node_malloced)
3408 	free (dests_alloc);
3409       /* Return false in case of an error, true otherwise.  */
3410       if (ndests == 0)
3411 	{
3412 	  state->trtable = (re_dfastate_t **)
3413 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3414           if (BE (state->trtable == NULL, 0))
3415             return false;
3416 	  return true;
3417 	}
3418       return false;
3419     }
3420 
3421   err = re_node_set_alloc (&follows, ndests + 1);
3422   if (BE (err != REG_NOERROR, 0))
3423     goto out_free;
3424 
3425   /* Avoid arithmetic overflow in size calculation.  */
3426   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3427 	    / (3 * sizeof (re_dfastate_t *)))
3428 	   < ndests),
3429 	  0))
3430     goto out_free;
3431 
3432   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3433 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3434     dest_states = (re_dfastate_t **)
3435       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3436   else
3437     {
3438       dest_states = (re_dfastate_t **)
3439 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3440       if (BE (dest_states == NULL, 0))
3441 	{
3442 out_free:
3443 	  if (dest_states_malloced)
3444 	    free (dest_states);
3445 	  re_node_set_free (&follows);
3446 	  for (i = 0; i < ndests; ++i)
3447 	    re_node_set_free (dests_node + i);
3448 	  if (dests_node_malloced)
3449 	    free (dests_alloc);
3450 	  return false;
3451 	}
3452       dest_states_malloced = true;
3453     }
3454   dest_states_word = dest_states + ndests;
3455   dest_states_nl = dest_states_word + ndests;
3456   bitset_empty (acceptable);
3457 
3458   /* Then build the states for all destinations.  */
3459   for (i = 0; i < ndests; ++i)
3460     {
3461       Idx next_node;
3462       re_node_set_empty (&follows);
3463       /* Merge the follows of this destination states.  */
3464       for (j = 0; j < dests_node[i].nelem; ++j)
3465 	{
3466 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3467 	  if (next_node != REG_MISSING)
3468 	    {
3469 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3470 	      if (BE (err != REG_NOERROR, 0))
3471 		goto out_free;
3472 	    }
3473 	}
3474       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3475       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3476 	goto out_free;
3477       /* If the new state has context constraint,
3478 	 build appropriate states for these contexts.  */
3479       if (dest_states[i]->has_constraint)
3480 	{
3481 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3482 							  CONTEXT_WORD);
3483 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3484 	    goto out_free;
3485 
3486 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3487 	    need_word_trtable = true;
3488 
3489 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3490 							CONTEXT_NEWLINE);
3491 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3492 	    goto out_free;
3493 	}
3494       else
3495 	{
3496 	  dest_states_word[i] = dest_states[i];
3497 	  dest_states_nl[i] = dest_states[i];
3498 	}
3499       bitset_merge (acceptable, dests_ch[i]);
3500     }
3501 
3502   if (!BE (need_word_trtable, 0))
3503     {
3504       /* We don't care about whether the following character is a word
3505 	 character, or we are in a single-byte character set so we can
3506 	 discern by looking at the character code: allocate a
3507 	 256-entry transition table.  */
3508       trtable = state->trtable =
3509 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3510       if (BE (trtable == NULL, 0))
3511 	goto out_free;
3512 
3513       /* For all characters ch...:  */
3514       for (i = 0; i < BITSET_WORDS; ++i)
3515 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3516 	     elem;
3517 	     mask <<= 1, elem >>= 1, ++ch)
3518 	  if (BE (elem & 1, 0))
3519 	    {
3520 	      /* There must be exactly one destination which accepts
3521 		 character ch.  See group_nodes_into_DFAstates.  */
3522 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3523 		;
3524 
3525 	      /* j-th destination accepts the word character ch.  */
3526 	      if (dfa->word_char[i] & mask)
3527 		trtable[ch] = dest_states_word[j];
3528 	      else
3529 		trtable[ch] = dest_states[j];
3530 	    }
3531     }
3532   else
3533     {
3534       /* We care about whether the following character is a word
3535 	 character, and we are in a multi-byte character set: discern
3536 	 by looking at the character code: build two 256-entry
3537 	 transition tables, one starting at trtable[0] and one
3538 	 starting at trtable[SBC_MAX].  */
3539       trtable = state->word_trtable =
3540 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3541       if (BE (trtable == NULL, 0))
3542 	goto out_free;
3543 
3544       /* For all characters ch...:  */
3545       for (i = 0; i < BITSET_WORDS; ++i)
3546 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3547 	     elem;
3548 	     mask <<= 1, elem >>= 1, ++ch)
3549 	  if (BE (elem & 1, 0))
3550 	    {
3551 	      /* There must be exactly one destination which accepts
3552 		 character ch.  See group_nodes_into_DFAstates.  */
3553 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3554 		;
3555 
3556 	      /* j-th destination accepts the word character ch.  */
3557 	      trtable[ch] = dest_states[j];
3558 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3559 	    }
3560     }
3561 
3562   /* new line */
3563   if (bitset_contain (acceptable, NEWLINE_CHAR))
3564     {
3565       /* The current state accepts newline character.  */
3566       for (j = 0; j < ndests; ++j)
3567 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3568 	  {
3569 	    /* k-th destination accepts newline character.  */
3570 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3571 	    if (need_word_trtable)
3572 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3573 	    /* There must be only one destination which accepts
3574 	       newline.  See group_nodes_into_DFAstates.  */
3575 	    break;
3576 	  }
3577     }
3578 
3579   if (dest_states_malloced)
3580     free (dest_states);
3581 
3582   re_node_set_free (&follows);
3583   for (i = 0; i < ndests; ++i)
3584     re_node_set_free (dests_node + i);
3585 
3586   if (dests_node_malloced)
3587     free (dests_alloc);
3588 
3589   return true;
3590 }
3591 
3592 /* Group all nodes belonging to STATE into several destinations.
3593    Then for all destinations, set the nodes belonging to the destination
3594    to DESTS_NODE[i] and set the characters accepted by the destination
3595    to DEST_CH[i].  This function return the number of destinations.  */
3596 
3597 static Idx
3598 internal_function
3599 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3600 			    re_node_set *dests_node, bitset_t *dests_ch)
3601 {
3602   reg_errcode_t err;
3603   bool ok;
3604   Idx i, j, k;
3605   Idx ndests; /* Number of the destinations from 'state'.  */
3606   bitset_t accepts; /* Characters a node can accept.  */
3607   const re_node_set *cur_nodes = &state->nodes;
3608   bitset_empty (accepts);
3609   ndests = 0;
3610 
3611   /* For all the nodes belonging to 'state',  */
3612   for (i = 0; i < cur_nodes->nelem; ++i)
3613     {
3614       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3615       re_token_type_t type = node->type;
3616       unsigned int constraint = node->constraint;
3617 
3618       /* Enumerate all single byte character this node can accept.  */
3619       if (type == CHARACTER)
3620 	bitset_set (accepts, node->opr.c);
3621       else if (type == SIMPLE_BRACKET)
3622 	{
3623 	  bitset_merge (accepts, node->opr.sbcset);
3624 	}
3625       else if (type == OP_PERIOD)
3626 	{
3627 #ifdef RE_ENABLE_I18N
3628 	  if (dfa->mb_cur_max > 1)
3629 	    bitset_merge (accepts, dfa->sb_char);
3630 	  else
3631 #endif
3632 	    bitset_set_all (accepts);
3633 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3634 	    bitset_clear (accepts, '\n');
3635 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3636 	    bitset_clear (accepts, '\0');
3637 	}
3638 #ifdef RE_ENABLE_I18N
3639       else if (type == OP_UTF8_PERIOD)
3640 	{
3641 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3642 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3643 	  else
3644 	    bitset_merge (accepts, utf8_sb_map);
3645 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3646 	    bitset_clear (accepts, '\n');
3647 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3648 	    bitset_clear (accepts, '\0');
3649 	}
3650 #endif
3651       else
3652 	continue;
3653 
3654       /* Check the 'accepts' and sift the characters which are not
3655 	 match it the context.  */
3656       if (constraint)
3657 	{
3658 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3659 	    {
3660 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3661 	      bitset_empty (accepts);
3662 	      if (accepts_newline)
3663 		bitset_set (accepts, NEWLINE_CHAR);
3664 	      else
3665 		continue;
3666 	    }
3667 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3668 	    {
3669 	      bitset_empty (accepts);
3670 	      continue;
3671 	    }
3672 
3673 	  if (constraint & NEXT_WORD_CONSTRAINT)
3674 	    {
3675 	      bitset_word_t any_set = 0;
3676 	      if (type == CHARACTER && !node->word_char)
3677 		{
3678 		  bitset_empty (accepts);
3679 		  continue;
3680 		}
3681 #ifdef RE_ENABLE_I18N
3682 	      if (dfa->mb_cur_max > 1)
3683 		for (j = 0; j < BITSET_WORDS; ++j)
3684 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3685 	      else
3686 #endif
3687 		for (j = 0; j < BITSET_WORDS; ++j)
3688 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3689 	      if (!any_set)
3690 		continue;
3691 	    }
3692 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3693 	    {
3694 	      bitset_word_t any_set = 0;
3695 	      if (type == CHARACTER && node->word_char)
3696 		{
3697 		  bitset_empty (accepts);
3698 		  continue;
3699 		}
3700 #ifdef RE_ENABLE_I18N
3701 	      if (dfa->mb_cur_max > 1)
3702 		for (j = 0; j < BITSET_WORDS; ++j)
3703 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3704 	      else
3705 #endif
3706 		for (j = 0; j < BITSET_WORDS; ++j)
3707 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3708 	      if (!any_set)
3709 		continue;
3710 	    }
3711 	}
3712 
3713       /* Then divide 'accepts' into DFA states, or create a new
3714 	 state.  Above, we make sure that accepts is not empty.  */
3715       for (j = 0; j < ndests; ++j)
3716 	{
3717 	  bitset_t intersec; /* Intersection sets, see below.  */
3718 	  bitset_t remains;
3719 	  /* Flags, see below.  */
3720 	  bitset_word_t has_intersec, not_subset, not_consumed;
3721 
3722 	  /* Optimization, skip if this state doesn't accept the character.  */
3723 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3724 	    continue;
3725 
3726 	  /* Enumerate the intersection set of this state and 'accepts'.  */
3727 	  has_intersec = 0;
3728 	  for (k = 0; k < BITSET_WORDS; ++k)
3729 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3730 	  /* And skip if the intersection set is empty.  */
3731 	  if (!has_intersec)
3732 	    continue;
3733 
3734 	  /* Then check if this state is a subset of 'accepts'.  */
3735 	  not_subset = not_consumed = 0;
3736 	  for (k = 0; k < BITSET_WORDS; ++k)
3737 	    {
3738 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3739 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3740 	    }
3741 
3742 	  /* If this state isn't a subset of 'accepts', create a
3743 	     new group state, which has the 'remains'. */
3744 	  if (not_subset)
3745 	    {
3746 	      bitset_copy (dests_ch[ndests], remains);
3747 	      bitset_copy (dests_ch[j], intersec);
3748 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3749 	      if (BE (err != REG_NOERROR, 0))
3750 		goto error_return;
3751 	      ++ndests;
3752 	    }
3753 
3754 	  /* Put the position in the current group. */
3755 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3756 	  if (BE (! ok, 0))
3757 	    goto error_return;
3758 
3759 	  /* If all characters are consumed, go to next node. */
3760 	  if (!not_consumed)
3761 	    break;
3762 	}
3763       /* Some characters remain, create a new group. */
3764       if (j == ndests)
3765 	{
3766 	  bitset_copy (dests_ch[ndests], accepts);
3767 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3768 	  if (BE (err != REG_NOERROR, 0))
3769 	    goto error_return;
3770 	  ++ndests;
3771 	  bitset_empty (accepts);
3772 	}
3773     }
3774   return ndests;
3775  error_return:
3776   for (j = 0; j < ndests; ++j)
3777     re_node_set_free (dests_node + j);
3778   return REG_MISSING;
3779 }
3780 
3781 #ifdef RE_ENABLE_I18N
3782 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3783    Return the number of the bytes the node accepts.
3784    STR_IDX is the current index of the input string.
3785 
3786    This function handles the nodes which can accept one character, or
3787    one collating element like '.', '[a-z]', opposite to the other nodes
3788    can only accept one byte.  */
3789 
3790 static int
3791 internal_function
3792 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3793 			 const re_string_t *input, Idx str_idx)
3794 {
3795   const re_token_t *node = dfa->nodes + node_idx;
3796   int char_len, elem_len;
3797   Idx i;
3798 
3799   if (BE (node->type == OP_UTF8_PERIOD, 0))
3800     {
3801       unsigned char c = re_string_byte_at (input, str_idx), d;
3802       if (BE (c < 0xc2, 1))
3803 	return 0;
3804 
3805       if (str_idx + 2 > input->len)
3806 	return 0;
3807 
3808       d = re_string_byte_at (input, str_idx + 1);
3809       if (c < 0xe0)
3810 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3811       else if (c < 0xf0)
3812 	{
3813 	  char_len = 3;
3814 	  if (c == 0xe0 && d < 0xa0)
3815 	    return 0;
3816 	}
3817       else if (c < 0xf8)
3818 	{
3819 	  char_len = 4;
3820 	  if (c == 0xf0 && d < 0x90)
3821 	    return 0;
3822 	}
3823       else if (c < 0xfc)
3824 	{
3825 	  char_len = 5;
3826 	  if (c == 0xf8 && d < 0x88)
3827 	    return 0;
3828 	}
3829       else if (c < 0xfe)
3830 	{
3831 	  char_len = 6;
3832 	  if (c == 0xfc && d < 0x84)
3833 	    return 0;
3834 	}
3835       else
3836 	return 0;
3837 
3838       if (str_idx + char_len > input->len)
3839 	return 0;
3840 
3841       for (i = 1; i < char_len; ++i)
3842 	{
3843 	  d = re_string_byte_at (input, str_idx + i);
3844 	  if (d < 0x80 || d > 0xbf)
3845 	    return 0;
3846 	}
3847       return char_len;
3848     }
3849 
3850   char_len = re_string_char_size_at (input, str_idx);
3851   if (node->type == OP_PERIOD)
3852     {
3853       if (char_len <= 1)
3854 	return 0;
3855       /* FIXME: I don't think this if is needed, as both '\n'
3856 	 and '\0' are char_len == 1.  */
3857       /* '.' accepts any one character except the following two cases.  */
3858       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3859 	   re_string_byte_at (input, str_idx) == '\n') ||
3860 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3861 	   re_string_byte_at (input, str_idx) == '\0'))
3862 	return 0;
3863       return char_len;
3864     }
3865 
3866   elem_len = re_string_elem_size_at (input, str_idx);
3867   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3868     return 0;
3869 
3870   if (node->type == COMPLEX_BRACKET)
3871     {
3872       const re_charset_t *cset = node->opr.mbcset;
3873 # ifdef _LIBC
3874       const unsigned char *pin
3875 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3876       Idx j;
3877       uint32_t nrules;
3878 # endif /* _LIBC */
3879       int match_len = 0;
3880       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3881 		    ? re_string_wchar_at (input, str_idx) : 0);
3882 
3883       /* match with multibyte character?  */
3884       for (i = 0; i < cset->nmbchars; ++i)
3885 	if (wc == cset->mbchars[i])
3886 	  {
3887 	    match_len = char_len;
3888 	    goto check_node_accept_bytes_match;
3889 	  }
3890       /* match with character_class?  */
3891       for (i = 0; i < cset->nchar_classes; ++i)
3892 	{
3893 	  wctype_t wt = cset->char_classes[i];
3894 	  if (__iswctype (wc, wt))
3895 	    {
3896 	      match_len = char_len;
3897 	      goto check_node_accept_bytes_match;
3898 	    }
3899 	}
3900 
3901 # ifdef _LIBC
3902       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3903       if (nrules != 0)
3904 	{
3905 	  unsigned int in_collseq = 0;
3906 	  const int32_t *table, *indirect;
3907 	  const unsigned char *weights, *extra;
3908 	  const char *collseqwc;
3909 	  /* This #include defines a local function!  */
3910 #  include <locale/weight.h>
3911 
3912 	  /* match with collating_symbol?  */
3913 	  if (cset->ncoll_syms)
3914 	    extra = (const unsigned char *)
3915 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3916 	  for (i = 0; i < cset->ncoll_syms; ++i)
3917 	    {
3918 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3919 	      /* Compare the length of input collating element and
3920 		 the length of current collating element.  */
3921 	      if (*coll_sym != elem_len)
3922 		continue;
3923 	      /* Compare each bytes.  */
3924 	      for (j = 0; j < *coll_sym; j++)
3925 		if (pin[j] != coll_sym[1 + j])
3926 		  break;
3927 	      if (j == *coll_sym)
3928 		{
3929 		  /* Match if every bytes is equal.  */
3930 		  match_len = j;
3931 		  goto check_node_accept_bytes_match;
3932 		}
3933 	    }
3934 
3935 	  if (cset->nranges)
3936 	    {
3937 	      if (elem_len <= char_len)
3938 		{
3939 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3940 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3941 		}
3942 	      else
3943 		in_collseq = find_collation_sequence_value (pin, elem_len);
3944 	    }
3945 	  /* match with range expression?  */
3946 	  for (i = 0; i < cset->nranges; ++i)
3947 	    if (cset->range_starts[i] <= in_collseq
3948 		&& in_collseq <= cset->range_ends[i])
3949 	      {
3950 		match_len = elem_len;
3951 		goto check_node_accept_bytes_match;
3952 	      }
3953 
3954 	  /* match with equivalence_class?  */
3955 	  if (cset->nequiv_classes)
3956 	    {
3957 	      const unsigned char *cp = pin;
3958 	      table = (const int32_t *)
3959 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3960 	      weights = (const unsigned char *)
3961 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3962 	      extra = (const unsigned char *)
3963 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3964 	      indirect = (const int32_t *)
3965 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3966 	      int32_t idx = findidx (&cp, elem_len);
3967 	      if (idx > 0)
3968 		for (i = 0; i < cset->nequiv_classes; ++i)
3969 		  {
3970 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3971 		    size_t weight_len = weights[idx & 0xffffff];
3972 		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3973 			&& (idx >> 24) == (equiv_class_idx >> 24))
3974 		      {
3975 			Idx cnt = 0;
3976 
3977 			idx &= 0xffffff;
3978 			equiv_class_idx &= 0xffffff;
3979 
3980 			while (cnt <= weight_len
3981 			       && (weights[equiv_class_idx + 1 + cnt]
3982 				   == weights[idx + 1 + cnt]))
3983 			  ++cnt;
3984 			if (cnt > weight_len)
3985 			  {
3986 			    match_len = elem_len;
3987 			    goto check_node_accept_bytes_match;
3988 			  }
3989 		      }
3990 		  }
3991 	    }
3992 	}
3993       else
3994 # endif /* _LIBC */
3995 	{
3996 	  /* match with range expression?  */
3997 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && defined __STRICT_ANSI__)
3998 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3999 #else
4000 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
4001 	  cmp_buf[2] = wc;
4002 #endif
4003 	  for (i = 0; i < cset->nranges; ++i)
4004 	    {
4005 	      cmp_buf[0] = cset->range_starts[i];
4006 	      cmp_buf[4] = cset->range_ends[i];
4007 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
4008 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
4009 		{
4010 		  match_len = char_len;
4011 		  goto check_node_accept_bytes_match;
4012 		}
4013 	    }
4014 	}
4015     check_node_accept_bytes_match:
4016       if (!cset->non_match)
4017 	return match_len;
4018       else
4019 	{
4020 	  if (match_len > 0)
4021 	    return 0;
4022 	  else
4023 	    return (elem_len > char_len) ? elem_len : char_len;
4024 	}
4025     }
4026   return 0;
4027 }
4028 
4029 # ifdef _LIBC
4030 static unsigned int
4031 internal_function
4032 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4033 {
4034   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4035   if (nrules == 0)
4036     {
4037       if (mbs_len == 1)
4038 	{
4039 	  /* No valid character.  Match it as a single byte character.  */
4040 	  const unsigned char *collseq = (const unsigned char *)
4041 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4042 	  return collseq[mbs[0]];
4043 	}
4044       return UINT_MAX;
4045     }
4046   else
4047     {
4048       int32_t idx;
4049       const unsigned char *extra = (const unsigned char *)
4050 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4051       int32_t extrasize = (const unsigned char *)
4052 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4053 
4054       for (idx = 0; idx < extrasize;)
4055 	{
4056 	  int mbs_cnt;
4057 	  bool found = false;
4058 	  int32_t elem_mbs_len;
4059 	  /* Skip the name of collating element name.  */
4060 	  idx = idx + extra[idx] + 1;
4061 	  elem_mbs_len = extra[idx++];
4062 	  if (mbs_len == elem_mbs_len)
4063 	    {
4064 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4065 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4066 		  break;
4067 	      if (mbs_cnt == elem_mbs_len)
4068 		/* Found the entry.  */
4069 		found = true;
4070 	    }
4071 	  /* Skip the byte sequence of the collating element.  */
4072 	  idx += elem_mbs_len;
4073 	  /* Adjust for the alignment.  */
4074 	  idx = (idx + 3) & ~3;
4075 	  /* Skip the collation sequence value.  */
4076 	  idx += sizeof (uint32_t);
4077 	  /* Skip the wide char sequence of the collating element.  */
4078 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4079 	  /* If we found the entry, return the sequence value.  */
4080 	  if (found)
4081 	    return *(uint32_t *) (extra + idx);
4082 	  /* Skip the collation sequence value.  */
4083 	  idx += sizeof (uint32_t);
4084 	}
4085       return UINT_MAX;
4086     }
4087 }
4088 # endif /* _LIBC */
4089 #endif /* RE_ENABLE_I18N */
4090 
4091 /* Check whether the node accepts the byte which is IDX-th
4092    byte of the INPUT.  */
4093 
4094 static bool
4095 internal_function
4096 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4097 		   Idx idx)
4098 {
4099   unsigned char ch;
4100   ch = re_string_byte_at (&mctx->input, idx);
4101   switch (node->type)
4102     {
4103     case CHARACTER:
4104       if (node->opr.c != ch)
4105         return false;
4106       break;
4107 
4108     case SIMPLE_BRACKET:
4109       if (!bitset_contain (node->opr.sbcset, ch))
4110         return false;
4111       break;
4112 
4113 #ifdef RE_ENABLE_I18N
4114     case OP_UTF8_PERIOD:
4115       if (ch >= ASCII_CHARS)
4116         return false;
4117       /* FALLTHROUGH */
4118 #endif
4119     case OP_PERIOD:
4120       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4121 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4122 	return false;
4123       break;
4124 
4125     default:
4126       return false;
4127     }
4128 
4129   if (node->constraint)
4130     {
4131       /* The node has constraints.  Check whether the current context
4132 	 satisfies the constraints.  */
4133       unsigned int context = re_string_context_at (&mctx->input, idx,
4134 						   mctx->eflags);
4135       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4136 	return false;
4137     }
4138 
4139   return true;
4140 }
4141 
4142 /* Extend the buffers, if the buffers have run out.  */
4143 
4144 static reg_errcode_t
4145 internal_function __attribute_warn_unused_result__
4146 extend_buffers (re_match_context_t *mctx)
4147 {
4148   reg_errcode_t ret;
4149   re_string_t *pstr = &mctx->input;
4150 
4151   /* Avoid overflow.  */
4152   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4153           <= pstr->bufs_len, 0))
4154     return REG_ESPACE;
4155 
4156   /* Double the lengths of the buffers.  */
4157   ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2));
4158   if (BE (ret != REG_NOERROR, 0))
4159     return ret;
4160 
4161   if (mctx->state_log != NULL)
4162     {
4163       /* And double the length of state_log.  */
4164       /* XXX We have no indication of the size of this buffer.  If this
4165 	 allocation fail we have no indication that the state_log array
4166 	 does not have the right size.  */
4167       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4168 					      pstr->bufs_len + 1);
4169       if (BE (new_array == NULL, 0))
4170 	return REG_ESPACE;
4171       mctx->state_log = new_array;
4172     }
4173 
4174   /* Then reconstruct the buffers.  */
4175   if (pstr->icase)
4176     {
4177 #ifdef RE_ENABLE_I18N
4178       if (pstr->mb_cur_max > 1)
4179 	{
4180 	  ret = build_wcs_upper_buffer (pstr);
4181 	  if (BE (ret != REG_NOERROR, 0))
4182 	    return ret;
4183 	}
4184       else
4185 #endif /* RE_ENABLE_I18N  */
4186 	build_upper_buffer (pstr);
4187     }
4188   else
4189     {
4190 #ifdef RE_ENABLE_I18N
4191       if (pstr->mb_cur_max > 1)
4192 	build_wcs_buffer (pstr);
4193       else
4194 #endif /* RE_ENABLE_I18N  */
4195 	{
4196 	  if (pstr->trans != NULL)
4197 	    re_string_translate_buffer (pstr);
4198 	}
4199     }
4200   return REG_NOERROR;
4201 }
4202 
4203 
4204 /* Functions for matching context.  */
4205 
4206 /* Initialize MCTX.  */
4207 
4208 static reg_errcode_t
4209 internal_function __attribute_warn_unused_result__
4210 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4211 {
4212   mctx->eflags = eflags;
4213   mctx->match_last = REG_MISSING;
4214   if (n > 0)
4215     {
4216       /* Avoid overflow.  */
4217       size_t max_object_size =
4218 	MAX (sizeof (struct re_backref_cache_entry),
4219 	     sizeof (re_sub_match_top_t *));
4220       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4221 	return REG_ESPACE;
4222 
4223       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4224       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4225       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4226 	return REG_ESPACE;
4227     }
4228   /* Already zero-ed by the caller.
4229      else
4230        mctx->bkref_ents = NULL;
4231      mctx->nbkref_ents = 0;
4232      mctx->nsub_tops = 0;  */
4233   mctx->abkref_ents = n;
4234   mctx->max_mb_elem_len = 1;
4235   mctx->asub_tops = n;
4236   return REG_NOERROR;
4237 }
4238 
4239 /* Clean the entries which depend on the current input in MCTX.
4240    This function must be invoked when the matcher changes the start index
4241    of the input, or changes the input string.  */
4242 
4243 static void
4244 internal_function
4245 match_ctx_clean (re_match_context_t *mctx)
4246 {
4247   Idx st_idx;
4248   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4249     {
4250       Idx sl_idx;
4251       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4252       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4253 	{
4254 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4255 	  re_free (last->path.array);
4256 	  re_free (last);
4257 	}
4258       re_free (top->lasts);
4259       if (top->path)
4260 	{
4261 	  re_free (top->path->array);
4262 	  re_free (top->path);
4263 	}
4264       free (top);
4265     }
4266 
4267   mctx->nsub_tops = 0;
4268   mctx->nbkref_ents = 0;
4269 }
4270 
4271 /* Free all the memory associated with MCTX.  */
4272 
4273 static void
4274 internal_function
4275 match_ctx_free (re_match_context_t *mctx)
4276 {
4277   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4278   match_ctx_clean (mctx);
4279   re_free (mctx->sub_tops);
4280   re_free (mctx->bkref_ents);
4281 }
4282 
4283 /* Add a new backreference entry to MCTX.
4284    Note that we assume that caller never call this function with duplicate
4285    entry, and call with STR_IDX which isn't smaller than any existing entry.
4286 */
4287 
4288 static reg_errcode_t
4289 internal_function __attribute_warn_unused_result__
4290 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4291 		     Idx to)
4292 {
4293   if (mctx->nbkref_ents >= mctx->abkref_ents)
4294     {
4295       struct re_backref_cache_entry* new_entry;
4296       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4297 			      mctx->abkref_ents * 2);
4298       if (BE (new_entry == NULL, 0))
4299 	{
4300 	  re_free (mctx->bkref_ents);
4301 	  return REG_ESPACE;
4302 	}
4303       mctx->bkref_ents = new_entry;
4304       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4305 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4306       mctx->abkref_ents *= 2;
4307     }
4308   if (mctx->nbkref_ents > 0
4309       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4310     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4311 
4312   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4313   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4314   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4315   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4316 
4317   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4318      If bit N is clear, means that this entry won't epsilon-transition to
4319      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4320      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4321      such node.
4322 
4323      A backreference does not epsilon-transition unless it is empty, so set
4324      to all zeros if FROM != TO.  */
4325   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4326     = (from == to ? -1 : 0);
4327 
4328   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4329   if (mctx->max_mb_elem_len < to - from)
4330     mctx->max_mb_elem_len = to - from;
4331   return REG_NOERROR;
4332 }
4333 
4334 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4335    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4336 
4337 static Idx
4338 internal_function
4339 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4340 {
4341   Idx left, right, mid, last;
4342   last = right = mctx->nbkref_ents;
4343   for (left = 0; left < right;)
4344     {
4345       mid = (left + right) / 2;
4346       if (mctx->bkref_ents[mid].str_idx < str_idx)
4347 	left = mid + 1;
4348       else
4349 	right = mid;
4350     }
4351   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4352     return left;
4353   else
4354     return REG_MISSING;
4355 }
4356 
4357 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4358    at STR_IDX.  */
4359 
4360 static reg_errcode_t
4361 internal_function __attribute_warn_unused_result__
4362 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4363 {
4364 #ifdef DEBUG
4365   assert (mctx->sub_tops != NULL);
4366   assert (mctx->asub_tops > 0);
4367 #endif
4368   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4369     {
4370       Idx new_asub_tops = mctx->asub_tops * 2;
4371       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4372 						   re_sub_match_top_t *,
4373 						   new_asub_tops);
4374       if (BE (new_array == NULL, 0))
4375 	return REG_ESPACE;
4376       mctx->sub_tops = new_array;
4377       mctx->asub_tops = new_asub_tops;
4378     }
4379   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4380   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4381     return REG_ESPACE;
4382   mctx->sub_tops[mctx->nsub_tops]->node = node;
4383   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4384   return REG_NOERROR;
4385 }
4386 
4387 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4388    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4389 
4390 static re_sub_match_last_t *
4391 internal_function
4392 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4393 {
4394   re_sub_match_last_t *new_entry;
4395   if (BE (subtop->nlasts == subtop->alasts, 0))
4396     {
4397       Idx new_alasts = 2 * subtop->alasts + 1;
4398       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4399 						    re_sub_match_last_t *,
4400 						    new_alasts);
4401       if (BE (new_array == NULL, 0))
4402 	return NULL;
4403       subtop->lasts = new_array;
4404       subtop->alasts = new_alasts;
4405     }
4406   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4407   if (BE (new_entry != NULL, 1))
4408     {
4409       subtop->lasts[subtop->nlasts] = new_entry;
4410       new_entry->node = node;
4411       new_entry->str_idx = str_idx;
4412       ++subtop->nlasts;
4413     }
4414   return new_entry;
4415 }
4416 
4417 static void
4418 internal_function
4419 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4420 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4421 {
4422   sctx->sifted_states = sifted_sts;
4423   sctx->limited_states = limited_sts;
4424   sctx->last_node = last_node;
4425   sctx->last_str_idx = last_str_idx;
4426   re_node_set_init_empty (&sctx->limits);
4427 }
4428