xref: /dragonfly/contrib/cvs-1.12/lib/regexec.c (revision d4ef6694)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License along
17    with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 				     Idx n) internal_function;
22 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
23 static void match_ctx_free (re_match_context_t *cache) internal_function;
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 					  Idx str_idx, Idx from, Idx to)
26      internal_function;
27 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
28      internal_function;
29 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
30 					   Idx str_idx) internal_function;
31 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32 						    Idx node, Idx str_idx)
33      internal_function;
34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35 			   re_dfastate_t **limited_sts, Idx last_node,
36 			   Idx last_str_idx)
37      internal_function;
38 static reg_errcode_t re_search_internal (const regex_t *preg,
39 					 const char *string, Idx length,
40 					 Idx start, Idx last_start, Idx stop,
41 					 size_t nmatch, regmatch_t pmatch[],
42 					 int eflags) internal_function;
43 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
44 				  const char *string1, Idx length1,
45 				  const char *string2, Idx length2,
46 				  Idx start, regoff_t range,
47 				  struct re_registers *regs,
48 				  Idx stop, bool ret_len) internal_function;
49 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
50 				const char *string, Idx length, Idx start,
51 				regoff_t range, Idx stop,
52 				struct re_registers *regs,
53 				bool ret_len) internal_function;
54 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
55 			      Idx nregs, int regs_allocated) internal_function;
56 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57      internal_function;
58 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
59 			   Idx *p_match_first)
60      internal_function;
61 static Idx check_halt_state_context (const re_match_context_t *mctx,
62 				     const re_dfastate_t *state, Idx idx)
63      internal_function;
64 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
65 			 regmatch_t *prev_idx_match, Idx cur_node,
66 			 Idx cur_idx, Idx nmatch) internal_function;
67 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
68 				      Idx str_idx, Idx dest_node, Idx nregs,
69 				      regmatch_t *regs,
70 				      re_node_set *eps_via_nodes) internal_function;
71 static reg_errcode_t set_regs (const regex_t *preg,
72 			       const re_match_context_t *mctx,
73 			       size_t nmatch, regmatch_t *pmatch,
74 			       bool fl_backtrack) internal_function;
75 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
76 
77 #ifdef RE_ENABLE_I18N
78 static int sift_states_iter_mb (const re_match_context_t *mctx,
79 				re_sift_context_t *sctx,
80 				Idx node_idx, Idx str_idx, Idx max_str_idx) internal_function;
81 #endif /* RE_ENABLE_I18N */
82 static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
83 					   re_sift_context_t *sctx) internal_function;
84 static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
85 					  re_sift_context_t *sctx, Idx str_idx,
86 					  re_node_set *cur_dest) internal_function;
87 static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
88 					      re_sift_context_t *sctx,
89 					      Idx str_idx,
90 					      re_node_set *dest_nodes) internal_function;
91 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
92 					    re_node_set *dest_nodes,
93 					    const re_node_set *candidates) internal_function;
94 static bool check_dst_limits (const re_match_context_t *mctx,
95 			      const re_node_set *limits,
96 			      Idx dst_node, Idx dst_idx, Idx src_node,
97 			      Idx src_idx) internal_function;
98 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
99 					int boundaries, Idx subexp_idx,
100 					Idx from_node, Idx bkref_idx) internal_function;
101 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
102 				      Idx limit, Idx subexp_idx,
103 				      Idx node, Idx str_idx,
104 				      Idx bkref_idx) internal_function;
105 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
106 					  re_node_set *dest_nodes,
107 					  const re_node_set *candidates,
108 					  re_node_set *limits,
109 					  struct re_backref_cache_entry *bkref_ents,
110 					  Idx str_idx) internal_function;
111 static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
112 					re_sift_context_t *sctx,
113 					Idx str_idx, const re_node_set *candidates) internal_function;
114 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
115 					re_dfastate_t **src, Idx num) internal_function;
116 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
117 					 re_match_context_t *mctx) internal_function;
118 static re_dfastate_t *transit_state (reg_errcode_t *err,
119 				     re_match_context_t *mctx,
120 				     re_dfastate_t *state) internal_function;
121 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
122 					    re_match_context_t *mctx,
123 					    re_dfastate_t *next_state) internal_function;
124 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
125 						re_node_set *cur_nodes,
126 						Idx str_idx) internal_function;
127 #if 0
128 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
129 					re_match_context_t *mctx,
130 					re_dfastate_t *pstate) internal_function;
131 #endif
132 #ifdef RE_ENABLE_I18N
133 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
134 				       re_dfastate_t *pstate) internal_function;
135 #endif /* RE_ENABLE_I18N */
136 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
137 					  const re_node_set *nodes) internal_function;
138 static reg_errcode_t get_subexp (re_match_context_t *mctx,
139 				 Idx bkref_node, Idx bkref_str_idx) internal_function;
140 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
141 				     const re_sub_match_top_t *sub_top,
142 				     re_sub_match_last_t *sub_last,
143 				     Idx bkref_node, Idx bkref_str) internal_function;
144 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
145 			     Idx subexp_idx, int type) internal_function;
146 static reg_errcode_t check_arrival (re_match_context_t *mctx,
147 				    state_array_t *path, Idx top_node,
148 				    Idx top_str, Idx last_node, Idx last_str,
149 				    int type) internal_function;
150 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
151 						   Idx str_idx,
152 						   re_node_set *cur_nodes,
153 						   re_node_set *next_nodes) internal_function;
154 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
155 					       re_node_set *cur_nodes,
156 					       Idx ex_subexp, int type) internal_function;
157 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
158 						   re_node_set *dst_nodes,
159 						   Idx target, Idx ex_subexp,
160 						   int type) internal_function;
161 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
162 					 re_node_set *cur_nodes, Idx cur_str,
163 					 Idx subexp_num, int type) internal_function;
164 static bool build_trtable (re_dfa_t *dfa,
165 			   re_dfastate_t *state) internal_function;
166 #ifdef RE_ENABLE_I18N
167 static int check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
168 				    const re_string_t *input, Idx idx) internal_function;
169 # ifdef _LIBC
170 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
171 						   size_t name_len) internal_function;
172 # endif /* _LIBC */
173 #endif /* RE_ENABLE_I18N */
174 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
175 				       const re_dfastate_t *state,
176 				       re_node_set *states_node,
177 				       bitset *states_ch) internal_function;
178 static bool check_node_accept (const re_match_context_t *mctx,
179 			       const re_token_t *node, Idx idx)
180      internal_function;
181 static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
182 
183 /* Entry point for POSIX code.  */
184 
185 /* regexec searches for a given pattern, specified by PREG, in the
186    string STRING.
187 
188    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
189    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
190    least NMATCH elements, and we set them to the offsets of the
191    corresponding matched substrings.
192 
193    EFLAGS specifies `execution flags' which affect matching: if
194    REG_NOTBOL is set, then ^ does not match at the beginning of the
195    string; if REG_NOTEOL is set, then $ does not match at the end.
196 
197    We return 0 if we find a match and REG_NOMATCH if not.  */
198 
199 int
200 regexec (const regex_t *__restrict preg, const char *__restrict string,
201 	 size_t nmatch, regmatch_t pmatch[], int eflags)
202 {
203   reg_errcode_t err;
204   Idx start, length;
205 #ifdef _LIBC
206   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
207 #endif
208 
209   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
210     return REG_BADPAT;
211 
212   if (eflags & REG_STARTEND)
213     {
214       start = pmatch[0].rm_so;
215       length = pmatch[0].rm_eo;
216     }
217   else
218     {
219       start = 0;
220       length = strlen (string);
221     }
222 
223   __libc_lock_lock (dfa->lock);
224   if (preg->re_no_sub)
225     err = re_search_internal (preg, string, length, start, length,
226 			      length, 0, NULL, eflags);
227   else
228     err = re_search_internal (preg, string, length, start, length,
229 			      length, nmatch, pmatch, eflags);
230   __libc_lock_unlock (dfa->lock);
231   return err != REG_NOERROR;
232 }
233 
234 #ifdef _LIBC
235 # include <shlib-compat.h>
236 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
237 
238 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
239 __typeof__ (__regexec) __compat_regexec;
240 
241 int
242 attribute_compat_text_section
243 __compat_regexec (const regex_t *__restrict preg,
244 		  const char *__restrict string, size_t nmatch,
245 		  regmatch_t pmatch[], int eflags)
246 {
247   return regexec (preg, string, nmatch, pmatch,
248 		  eflags & (REG_NOTBOL | REG_NOTEOL));
249 }
250 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
251 # endif
252 #endif
253 
254 /* Entry points for GNU code.  */
255 
256 /* re_match, re_search, re_match_2, re_search_2
257 
258    The former two functions operate on STRING with length LENGTH,
259    while the later two operate on concatenation of STRING1 and STRING2
260    with lengths LENGTH1 and LENGTH2, respectively.
261 
262    re_match() matches the compiled pattern in BUFP against the string,
263    starting at index START.
264 
265    re_search() first tries matching at index START, then it tries to match
266    starting from index START + 1, and so on.  The last start position tried
267    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
268    way as re_match().)
269 
270    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
271    the first STOP characters of the concatenation of the strings should be
272    concerned.
273 
274    If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match
275    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
276    computed relative to the concatenation, not relative to the individual
277    strings.)
278 
279    On success, re_match* functions return the length of the match, re_search*
280    return the position of the start of the match.  Return value -1 means no
281    match was found and -2 indicates an internal error.  */
282 
283 regoff_t
284 re_match (struct re_pattern_buffer *bufp, const char *string,
285 	  Idx length, Idx start, struct re_registers *regs)
286 {
287   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
288 }
289 #ifdef _LIBC
290 weak_alias (__re_match, re_match)
291 #endif
292 
293 regoff_t
294 re_search (struct re_pattern_buffer *bufp, const char *string,
295 	   Idx length, Idx start, regoff_t range, struct re_registers *regs)
296 {
297   return re_search_stub (bufp, string, length, start, range, length, regs,
298 			 false);
299 }
300 #ifdef _LIBC
301 weak_alias (__re_search, re_search)
302 #endif
303 
304 regoff_t
305 re_match_2 (struct re_pattern_buffer *bufp,
306 	    const char *string1, Idx length1,
307 	    const char *string2, Idx length2,
308 	    Idx start, struct re_registers *regs, Idx stop)
309 {
310   return re_search_2_stub (bufp, string1, length1, string2, length2,
311 			   start, 0, regs, stop, true);
312 }
313 #ifdef _LIBC
314 weak_alias (__re_match_2, re_match_2)
315 #endif
316 
317 regoff_t
318 re_search_2 (struct re_pattern_buffer *bufp,
319 	     const char *string1, Idx length1,
320 	     const char *string2, Idx length2,
321 	     Idx start, regoff_t range, struct re_registers *regs, Idx stop)
322 {
323   return re_search_2_stub (bufp, string1, length1, string2, length2,
324 			   start, range, regs, stop, false);
325 }
326 #ifdef _LIBC
327 weak_alias (__re_search_2, re_search_2)
328 #endif
329 
330 static regoff_t
331 internal_function
332 re_search_2_stub (struct re_pattern_buffer *bufp,
333 		  const char *string1, Idx length1,
334 		  const char *string2, Idx length2,
335 		  Idx start, regoff_t range, struct re_registers *regs,
336 		  Idx stop, bool ret_len)
337 {
338   const char *str;
339   regoff_t rval;
340   Idx len = length1 + length2;
341   char *s = NULL;
342 
343   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
344     return -2;
345 
346   /* Concatenate the strings.  */
347   if (length2 > 0)
348     if (length1 > 0)
349       {
350 	s = re_malloc (char, len);
351 
352 	if (BE (s == NULL, 0))
353 	  return -2;
354 	memcpy (s, string1, length1);
355 	memcpy (s + length1, string2, length2);
356 	str = s;
357       }
358     else
359       str = string2;
360   else
361     str = string1;
362 
363   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
364 			 ret_len);
365   re_free (s);
366   return rval;
367 }
368 
369 /* The parameters have the same meaning as those of re_search.
370    Additional parameters:
371    If RET_LEN is true the length of the match is returned (re_match style);
372    otherwise the position of the match is returned.  */
373 
374 static regoff_t
375 internal_function
376 re_search_stub (struct re_pattern_buffer *bufp,
377 		const char *string, Idx length,
378 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
379 		bool ret_len)
380 {
381   reg_errcode_t result;
382   regmatch_t *pmatch;
383   Idx nregs;
384   regoff_t rval;
385   int eflags = 0;
386 #ifdef _LIBC
387   re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
388 #endif
389   Idx last_start = start + range;
390 
391   /* Check for out-of-range.  */
392   if (BE (start < 0 || start > length, 0))
393     return -1;
394   if (sizeof start < sizeof range)
395     {
396       regoff_t length_offset = length;
397       regoff_t start_offset = start;
398       if (BE (length_offset - start_offset < range, 0))
399 	last_start = length;
400       else if (BE (range < - start_offset, 0))
401 	last_start = 0;
402     }
403   else
404     {
405       if (BE ((last_start < start) != (range < 0), 0))
406 	{
407 	  /* Overflow occurred when computing last_start; substitute
408 	     the extreme value.  */
409 	  last_start = range < 0 ? 0 : length;
410 	}
411       else
412 	{
413 	  if (BE (length < last_start, 0))
414 	    last_start = length;
415 	  else if (BE (last_start < 0, 0))
416 	    last_start = 0;
417 	}
418     }
419 
420   __libc_lock_lock (dfa->lock);
421 
422   eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0;
423   eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0;
424 
425   /* Compile fastmap if we haven't yet.  */
426   if (start < last_start && bufp->re_fastmap != NULL
427       && !bufp->re_fastmap_accurate)
428     re_compile_fastmap (bufp);
429 
430   if (BE (bufp->re_no_sub, 0))
431     regs = NULL;
432 
433   /* We need at least 1 register.  */
434   if (regs == NULL)
435     nregs = 1;
436   else if (BE (bufp->re_regs_allocated == REG_FIXED
437 	       && regs->rm_num_regs <= bufp->re_nsub, 0))
438     {
439       nregs = regs->rm_num_regs;
440       if (BE (nregs < 1, 0))
441 	{
442 	  /* Nothing can be copied to regs.  */
443 	  regs = NULL;
444 	  nregs = 1;
445 	}
446     }
447   else
448     nregs = bufp->re_nsub + 1;
449   pmatch = re_xmalloc (regmatch_t, nregs);
450   if (BE (pmatch == NULL, 0))
451     {
452       rval = -2;
453       goto out;
454     }
455 
456   result = re_search_internal (bufp, string, length, start, last_start, stop,
457 			       nregs, pmatch, eflags);
458 
459   rval = 0;
460 
461   /* I hope we needn't fill ther regs with -1's when no match was found.  */
462   if (result != REG_NOERROR)
463     rval = -1;
464   else if (regs != NULL)
465     {
466       /* If caller wants register contents data back, copy them.  */
467       bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs,
468 					      bufp->re_regs_allocated);
469       if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0))
470 	rval = -2;
471     }
472 
473   if (BE (rval == 0, 1))
474     {
475       if (ret_len)
476 	{
477 	  assert (pmatch[0].rm_so == start);
478 	  rval = pmatch[0].rm_eo - start;
479 	}
480       else
481 	rval = pmatch[0].rm_so;
482     }
483   re_free (pmatch);
484  out:
485   __libc_lock_unlock (dfa->lock);
486   return rval;
487 }
488 
489 static unsigned
490 internal_function
491 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
492 	      int regs_allocated)
493 {
494   int rval = REG_REALLOCATE;
495   Idx i;
496   Idx need_regs = nregs + 1;
497   /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code
498      uses.  */
499 
500   /* Have the register data arrays been allocated?  */
501   if (regs_allocated == REG_UNALLOCATED)
502     { /* No.  So allocate them with malloc.  */
503       regs->rm_start = re_xmalloc (regoff_t, need_regs);
504       regs->rm_end = re_malloc (regoff_t, need_regs);
505       if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0))
506 	return REG_UNALLOCATED;
507       regs->rm_num_regs = need_regs;
508     }
509   else if (regs_allocated == REG_REALLOCATE)
510     { /* Yes.  If we need more elements than were already
511 	 allocated, reallocate them.  If we need fewer, just
512 	 leave it alone.  */
513       if (BE (need_regs > regs->rm_num_regs, 0))
514 	{
515 	  regoff_t *new_start =
516 	    re_xrealloc (regs->rm_start, regoff_t, need_regs);
517 	  regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs);
518 	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
519 	    return REG_UNALLOCATED;
520 	  regs->rm_start = new_start;
521 	  regs->rm_end = new_end;
522 	  regs->rm_num_regs = need_regs;
523 	}
524     }
525   else
526     {
527       assert (regs_allocated == REG_FIXED);
528       /* This function may not be called with REG_FIXED and nregs too big.  */
529       assert (regs->rm_num_regs >= nregs);
530       rval = REG_FIXED;
531     }
532 
533   /* Copy the regs.  */
534   for (i = 0; i < nregs; ++i)
535     {
536       regs->rm_start[i] = pmatch[i].rm_so;
537       regs->rm_end[i] = pmatch[i].rm_eo;
538     }
539   for ( ; i < regs->rm_num_regs; ++i)
540     regs->rm_start[i] = regs->rm_end[i] = -1;
541 
542   return rval;
543 }
544 
545 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
546    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
547    this memory for recording register information.  STARTS and ENDS
548    must be allocated using the malloc library routine, and must each
549    be at least NUM_REGS * sizeof (regoff_t) bytes long.
550 
551    If NUM_REGS == 0, then subsequent matches should allocate their own
552    register data.
553 
554    Unless this function is called, the first search or match using
555    PATTERN_BUFFER will allocate its own register data, without
556    freeing the old data.  */
557 
558 void
559 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
560 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
561 {
562   if (num_regs)
563     {
564       bufp->re_regs_allocated = REG_REALLOCATE;
565       regs->rm_num_regs = num_regs;
566       regs->rm_start = starts;
567       regs->rm_end = ends;
568     }
569   else
570     {
571       bufp->re_regs_allocated = REG_UNALLOCATED;
572       regs->rm_num_regs = 0;
573       regs->rm_start = regs->rm_end = NULL;
574     }
575 }
576 #ifdef _LIBC
577 weak_alias (__re_set_registers, re_set_registers)
578 #endif
579 
580 /* Entry points compatible with 4.2 BSD regex library.  We don't define
581    them unless specifically requested.  */
582 
583 #if defined _REGEX_RE_COMP || defined _LIBC
584 int
585 # ifdef _LIBC
586 weak_function
587 # endif
588 re_exec (const char *s)
589 {
590   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
591 }
592 #endif /* _REGEX_RE_COMP */
593 
594 /* Internal entry point.  */
595 
596 /* Searches for a compiled pattern PREG in the string STRING, whose
597    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
598    meaning as with regexec.  LAST_START is START + RANGE, where
599    START and RANGE have the same meaning as with re_search.
600    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
601    otherwise return the error code.
602    Note: We assume front end functions already check ranges.
603    (0 <= LAST_START && LAST_START <= LENGTH)  */
604 
605 static reg_errcode_t
606 internal_function
607 re_search_internal (const regex_t *preg,
608 		    const char *string, Idx length,
609 		    Idx start, Idx last_start, Idx stop,
610 		    size_t nmatch, regmatch_t pmatch[],
611 		    int eflags)
612 {
613   reg_errcode_t err;
614   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
615   Idx left_lim, right_lim;
616   int incr;
617   bool fl_longest_match;
618   int match_kind;
619   Idx match_first, match_last = REG_MISSING;
620   Idx extra_nmatch;
621   bool sb;
622   int ch;
623 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
624   re_match_context_t mctx = { .dfa = dfa };
625 #else
626   re_match_context_t mctx;
627 #endif
628   char *fastmap = ((preg->re_fastmap != NULL && preg->re_fastmap_accurate
629 		    && start != last_start && !preg->re_can_be_null)
630 		   ? preg->re_fastmap : NULL);
631   unsigned REG_TRANSLATE_TYPE t =
632     (unsigned REG_TRANSLATE_TYPE) preg->re_translate;
633 
634 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
635   memset (&mctx, '\0', sizeof (re_match_context_t));
636   mctx.dfa = dfa;
637 #endif
638 
639   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
640   nmatch -= extra_nmatch;
641 
642   /* Check if the DFA haven't been compiled.  */
643   if (BE (preg->re_used == 0 || dfa->init_state == NULL
644 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
645 	  || dfa->init_state_begbuf == NULL, 0))
646     return REG_NOMATCH;
647 
648 #ifdef DEBUG
649   /* We assume front-end functions already check them.  */
650   assert (0 <= last_start && last_start <= length);
651 #endif
652 
653   /* If initial states with non-begbuf contexts have no elements,
654      the regex must be anchored.  If preg->re_newline_anchor is set,
655      we'll never use init_state_nl, so do not check it.  */
656   if (dfa->init_state->nodes.nelem == 0
657       && dfa->init_state_word->nodes.nelem == 0
658       && (dfa->init_state_nl->nodes.nelem == 0
659 	  || !preg->re_newline_anchor))
660     {
661       if (start != 0 && last_start != 0)
662         return REG_NOMATCH;
663       start = last_start = 0;
664     }
665 
666   /* We must check the longest matching, if nmatch > 0.  */
667   fl_longest_match = (nmatch != 0 || dfa->nbackref);
668 
669   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
670 			    preg->re_translate,
671 			    preg->re_syntax & REG_IGNORE_CASE, dfa);
672   if (BE (err != REG_NOERROR, 0))
673     goto free_return;
674   mctx.input.stop = stop;
675   mctx.input.raw_stop = stop;
676   mctx.input.newline_anchor = preg->re_newline_anchor;
677 
678   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
679   if (BE (err != REG_NOERROR, 0))
680     goto free_return;
681 
682   /* We will log all the DFA states through which the dfa pass,
683      if nmatch > 1, or this dfa has "multibyte node", which is a
684      back-reference or a node which can accept multibyte character or
685      multi character collating element.  */
686   if (nmatch > 1 || dfa->has_mb_node)
687     {
688       mctx.state_log = re_xmalloc (re_dfastate_t *, mctx.input.bufs_len + 1);
689       if (BE (mctx.state_log == NULL, 0))
690 	{
691 	  err = REG_ESPACE;
692 	  goto free_return;
693 	}
694     }
695   else
696     mctx.state_log = NULL;
697 
698   match_first = start;
699   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
700 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
701 
702   /* Check incrementally whether of not the input string match.  */
703   incr = (last_start < start) ? -1 : 1;
704   left_lim = (last_start < start) ? last_start : start;
705   right_lim = (last_start < start) ? start : last_start;
706   sb = dfa->mb_cur_max == 1;
707   match_kind =
708     (fastmap
709      ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0)
710 	| (start <= last_start ? 2 : 0)
711 	| (t != NULL ? 1 : 0))
712      : 8);
713 
714   for (;; match_first += incr)
715     {
716       err = REG_NOMATCH;
717       if (match_first < left_lim || right_lim < match_first)
718 	goto free_return;
719 
720       /* Advance as rapidly as possible through the string, until we
721 	 find a plausible place to start matching.  This may be done
722 	 with varying efficiency, so there are various possibilities:
723 	 only the most common of them are specialized, in order to
724 	 save on code size.  We use a switch statement for speed.  */
725       switch (match_kind)
726 	{
727 	case 8:
728 	  /* No fastmap.  */
729 	  break;
730 
731 	case 7:
732 	  /* Fastmap with single-byte translation, match forward.  */
733 	  while (BE (match_first < right_lim, 1)
734 		 && !fastmap[t[(unsigned char) string[match_first]]])
735 	    ++match_first;
736 	  goto forward_match_found_start_or_reached_end;
737 
738 	case 6:
739 	  /* Fastmap without translation, match forward.  */
740 	  while (BE (match_first < right_lim, 1)
741 		 && !fastmap[(unsigned char) string[match_first]])
742 	    ++match_first;
743 
744 	forward_match_found_start_or_reached_end:
745 	  if (BE (match_first == right_lim, 0))
746 	    {
747 	      ch = match_first >= length
748 		       ? 0 : (unsigned char) string[match_first];
749 	      if (!fastmap[t ? t[ch] : ch])
750 		goto free_return;
751 	    }
752 	  break;
753 
754 	case 4:
755 	case 5:
756 	  /* Fastmap without multi-byte translation, match backwards.  */
757 	  while (match_first >= left_lim)
758 	    {
759 	      ch = match_first >= length
760 		       ? 0 : (unsigned char) string[match_first];
761 	      if (fastmap[t ? t[ch] : ch])
762 		break;
763 	      --match_first;
764 	    }
765 	  if (match_first < left_lim)
766 	    goto free_return;
767 	  break;
768 
769 	default:
770 	  /* In this case, we can't determine easily the current byte,
771 	     since it might be a component byte of a multibyte
772 	     character.  Then we use the constructed buffer instead.  */
773 	  for (;;)
774 	    {
775 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
776 		 buffers.  */
777 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
778 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
779 		{
780 		  err = re_string_reconstruct (&mctx.input, match_first,
781 					       eflags);
782 		  if (BE (err != REG_NOERROR, 0))
783 		    goto free_return;
784 
785 		  offset = match_first - mctx.input.raw_mbs_idx;
786 		}
787 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
788 		 Note that MATCH_FIRST must not be smaller than 0.  */
789 	      ch = (match_first >= length
790 		    ? 0 : re_string_byte_at (&mctx.input, offset));
791 	      if (fastmap[ch])
792 		break;
793 	      match_first += incr;
794 	      if (match_first < left_lim || match_first > right_lim)
795 	        {
796 	          err = REG_NOMATCH;
797 	          goto free_return;
798 	        }
799 	    }
800 	  break;
801 	}
802 
803       /* Reconstruct the buffers so that the matcher can assume that
804 	 the matching starts from the beginning of the buffer.  */
805       err = re_string_reconstruct (&mctx.input, match_first, eflags);
806       if (BE (err != REG_NOERROR, 0))
807 	goto free_return;
808 
809 #ifdef RE_ENABLE_I18N
810      /* Don't consider this char as a possible match start if it part,
811 	yet isn't the head, of a multibyte character.  */
812       if (!sb && !re_string_first_byte (&mctx.input, 0))
813 	continue;
814 #endif
815 
816       /* It seems to be appropriate one, then use the matcher.  */
817       /* We assume that the matching starts from 0.  */
818       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
819       match_last = check_matching (&mctx, fl_longest_match,
820 				   start <= last_start ? &match_first : NULL);
821       if (match_last != REG_MISSING)
822 	{
823 	  if (BE (match_last == REG_ERROR, 0))
824 	    {
825 	      err = REG_ESPACE;
826 	      goto free_return;
827 	    }
828 	  else
829 	    {
830 	      mctx.match_last = match_last;
831 	      if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref)
832 		{
833 		  re_dfastate_t *pstate = mctx.state_log[match_last];
834 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
835 							     match_last);
836 		}
837 	      if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match)
838 		  || dfa->nbackref)
839 		{
840 		  err = prune_impossible_nodes (&mctx);
841 		  if (err == REG_NOERROR)
842 		    break;
843 		  if (BE (err != REG_NOMATCH, 0))
844 		    goto free_return;
845 		  match_last = REG_MISSING;
846 		}
847 	      else
848 		break; /* We found a match.  */
849 	    }
850 	}
851 
852       match_ctx_clean (&mctx);
853     }
854 
855 #ifdef DEBUG
856   assert (match_last != REG_MISSING);
857   assert (err == REG_NOERROR);
858 #endif
859 
860   /* Set pmatch[] if we need.  */
861   if (nmatch > 0)
862     {
863       Idx reg_idx;
864 
865       /* Initialize registers.  */
866       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
867 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
868 
869       /* Set the points where matching start/end.  */
870       pmatch[0].rm_so = 0;
871       pmatch[0].rm_eo = mctx.match_last;
872       /* FIXME: This function should fail if mctx.match_last exceeds
873 	 the maximum possible regoff_t value.  We need a new error
874 	 code REG_OVERFLOW.  */
875 
876       if (!preg->re_no_sub && nmatch > 1)
877 	{
878 	  err = set_regs (preg, &mctx, nmatch, pmatch,
879 			  dfa->has_plural_match && dfa->nbackref > 0);
880 	  if (BE (err != REG_NOERROR, 0))
881 	    goto free_return;
882 	}
883 
884       /* At last, add the offset to the each registers, since we slided
885 	 the buffers so that we could assume that the matching starts
886 	 from 0.  */
887       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
888 	if (pmatch[reg_idx].rm_so != -1)
889 	  {
890 #ifdef RE_ENABLE_I18N
891 	    if (BE (mctx.input.offsets_needed != 0, 0))
892 	      {
893 		pmatch[reg_idx].rm_so =
894 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
895 		   ? mctx.input.valid_raw_len
896 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
897 		pmatch[reg_idx].rm_eo =
898 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
899 		   ? mctx.input.valid_raw_len
900 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
901 	      }
902 #else
903 	    assert (mctx.input.offsets_needed == 0);
904 #endif
905 	    pmatch[reg_idx].rm_so += match_first;
906 	    pmatch[reg_idx].rm_eo += match_first;
907 	  }
908       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
909 	{
910 	  pmatch[nmatch + reg_idx].rm_so = -1;
911 	  pmatch[nmatch + reg_idx].rm_eo = -1;
912 	}
913 
914       if (dfa->subexp_map)
915         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
916           if (dfa->subexp_map[reg_idx] != reg_idx)
917             {
918               pmatch[reg_idx + 1].rm_so
919                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
920               pmatch[reg_idx + 1].rm_eo
921                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
922             }
923     }
924 
925  free_return:
926   re_free (mctx.state_log);
927   if (dfa->nbackref)
928     match_ctx_free (&mctx);
929   re_string_destruct (&mctx.input);
930   return err;
931 }
932 
933 static reg_errcode_t
934 internal_function
935 prune_impossible_nodes (re_match_context_t *mctx)
936 {
937   re_dfa_t *const dfa = mctx->dfa;
938   Idx halt_node, match_last;
939   reg_errcode_t ret;
940   re_dfastate_t **sifted_states;
941   re_dfastate_t **lim_states = NULL;
942   re_sift_context_t sctx;
943 #ifdef DEBUG
944   assert (mctx->state_log != NULL);
945 #endif
946   match_last = mctx->match_last;
947   halt_node = mctx->last_node;
948   sifted_states = re_xmalloc (re_dfastate_t *, match_last + 1);
949   if (BE (sifted_states == NULL, 0))
950     {
951       ret = REG_ESPACE;
952       goto free_return;
953     }
954   if (dfa->nbackref)
955     {
956       lim_states = re_xmalloc (re_dfastate_t *, match_last + 1);
957       if (BE (lim_states == NULL, 0))
958 	{
959 	  ret = REG_ESPACE;
960 	  goto free_return;
961 	}
962       while (1)
963 	{
964 	  memset (lim_states, '\0',
965 		  sizeof (re_dfastate_t *) * (match_last + 1));
966 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
967 			 match_last);
968 	  ret = sift_states_backward (mctx, &sctx);
969 	  re_node_set_free (&sctx.limits);
970 	  if (BE (ret != REG_NOERROR, 0))
971 	      goto free_return;
972 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
973 	    break;
974 	  do
975 	    {
976 	      --match_last;
977 	      if (! REG_VALID_INDEX (match_last))
978 		{
979 		  ret = REG_NOMATCH;
980 		  goto free_return;
981 		}
982 	    } while (mctx->state_log[match_last] == NULL
983 		     || !mctx->state_log[match_last]->halt);
984 	  halt_node = check_halt_state_context (mctx,
985 						mctx->state_log[match_last],
986 						match_last);
987 	}
988       ret = merge_state_array (dfa, sifted_states, lim_states,
989 			       match_last + 1);
990       re_free (lim_states);
991       lim_states = NULL;
992       if (BE (ret != REG_NOERROR, 0))
993 	goto free_return;
994     }
995   else
996     {
997       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
998       ret = sift_states_backward (mctx, &sctx);
999       re_node_set_free (&sctx.limits);
1000       if (BE (ret != REG_NOERROR, 0))
1001 	goto free_return;
1002     }
1003   re_free (mctx->state_log);
1004   mctx->state_log = sifted_states;
1005   sifted_states = NULL;
1006   mctx->last_node = halt_node;
1007   mctx->match_last = match_last;
1008   ret = REG_NOERROR;
1009  free_return:
1010   re_free (sifted_states);
1011   re_free (lim_states);
1012   return ret;
1013 }
1014 
1015 /* Acquire an initial state and return it.
1016    We must select appropriate initial state depending on the context,
1017    since initial states may have constraints like "\<", "^", etc..  */
1018 
1019 static inline re_dfastate_t *
1020 __attribute ((always_inline)) internal_function
1021 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1022 			    Idx idx)
1023 {
1024   re_dfa_t *const dfa = mctx->dfa;
1025   if (dfa->init_state->has_constraint)
1026     {
1027       unsigned int context;
1028       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1029       if (IS_WORD_CONTEXT (context))
1030 	return dfa->init_state_word;
1031       else if (IS_ORDINARY_CONTEXT (context))
1032 	return dfa->init_state;
1033       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1034 	return dfa->init_state_begbuf;
1035       else if (IS_NEWLINE_CONTEXT (context))
1036 	return dfa->init_state_nl;
1037       else if (IS_BEGBUF_CONTEXT (context))
1038 	{
1039 	  /* It is relatively rare case, then calculate on demand.  */
1040 	  return re_acquire_state_context (err, dfa,
1041 					   dfa->init_state->entrance_nodes,
1042 					   context);
1043 	}
1044       else
1045 	/* Must not happen?  */
1046 	return dfa->init_state;
1047     }
1048   else
1049     return dfa->init_state;
1050 }
1051 
1052 /* Check whether the regular expression match input string INPUT or not,
1053    and return the index where the matching end.  Return REG_MISSING if
1054    there is no match, and return REG_ERROR in case of an error.
1055    FL_LONGEST_MATCH means we want the POSIX longest matching.
1056    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1057    next place where we may want to try matching.
1058    Note that the matcher assume that the maching starts from the current
1059    index of the buffer.  */
1060 
1061 static Idx
1062 internal_function
1063 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1064 		Idx *p_match_first)
1065 {
1066   re_dfa_t *const dfa = mctx->dfa;
1067   reg_errcode_t err;
1068   Idx match = 0;
1069   Idx match_last = REG_MISSING;
1070   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1071   re_dfastate_t *cur_state;
1072   bool at_init_state = p_match_first != NULL;
1073   Idx next_start_idx = cur_str_idx;
1074 
1075   err = REG_NOERROR;
1076   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1077   /* An initial state must not be NULL (invalid).  */
1078   if (BE (cur_state == NULL, 0))
1079     {
1080       assert (err == REG_ESPACE);
1081       return REG_ERROR;
1082     }
1083 
1084   if (mctx->state_log != NULL)
1085     {
1086       mctx->state_log[cur_str_idx] = cur_state;
1087 
1088       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1089 	 later.  E.g. Processing back references.  */
1090       if (BE (dfa->nbackref, 0))
1091 	{
1092 	  at_init_state = false;
1093 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1094 	  if (BE (err != REG_NOERROR, 0))
1095 	    return err;
1096 
1097 	  if (cur_state->has_backref)
1098 	    {
1099 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1100 	      if (BE (err != REG_NOERROR, 0))
1101 	        return err;
1102 	    }
1103 	}
1104     }
1105 
1106   /* If the RE accepts NULL string.  */
1107   if (BE (cur_state->halt, 0))
1108     {
1109       if (!cur_state->has_constraint
1110 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1111 	{
1112 	  if (!fl_longest_match)
1113 	    return cur_str_idx;
1114 	  else
1115 	    {
1116 	      match_last = cur_str_idx;
1117 	      match = 1;
1118 	    }
1119 	}
1120     }
1121 
1122   while (!re_string_eoi (&mctx->input))
1123     {
1124       re_dfastate_t *old_state = cur_state;
1125       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1126 
1127       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1128           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1129               && mctx->input.valid_len < mctx->input.len))
1130         {
1131           err = extend_buffers (mctx);
1132           if (BE (err != REG_NOERROR, 0))
1133 	    {
1134 	      assert (err == REG_ESPACE);
1135 	      return REG_ERROR;
1136 	    }
1137         }
1138 
1139       cur_state = transit_state (&err, mctx, cur_state);
1140       if (mctx->state_log != NULL)
1141 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1142 
1143       if (cur_state == NULL)
1144 	{
1145 	  /* Reached the invalid state or an error.  Try to recover a valid
1146 	     state using the state log, if available and if we have not
1147 	     already found a valid (even if not the longest) match.  */
1148 	  if (BE (err != REG_NOERROR, 0))
1149 	    return REG_ERROR;
1150 
1151 	  if (mctx->state_log == NULL
1152 	      || (match && !fl_longest_match)
1153 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1154 	    break;
1155 	}
1156 
1157       if (BE (at_init_state, 0))
1158 	{
1159 	  if (old_state == cur_state)
1160 	    next_start_idx = next_char_idx;
1161 	  else
1162 	    at_init_state = false;
1163 	}
1164 
1165       if (cur_state->halt)
1166 	{
1167 	  /* Reached a halt state.
1168 	     Check the halt state can satisfy the current context.  */
1169 	  if (!cur_state->has_constraint
1170 	      || check_halt_state_context (mctx, cur_state,
1171 					   re_string_cur_idx (&mctx->input)))
1172 	    {
1173 	      /* We found an appropriate halt state.  */
1174 	      match_last = re_string_cur_idx (&mctx->input);
1175 	      match = 1;
1176 
1177 	      /* We found a match, do not modify match_first below.  */
1178 	      p_match_first = NULL;
1179 	      if (!fl_longest_match)
1180 		break;
1181 	    }
1182 	}
1183     }
1184 
1185   if (p_match_first)
1186     *p_match_first += next_start_idx;
1187 
1188   return match_last;
1189 }
1190 
1191 /* Check NODE match the current context.  */
1192 
1193 static bool
1194 internal_function
1195 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1196 {
1197   re_token_type_t type = dfa->nodes[node].type;
1198   unsigned int constraint = dfa->nodes[node].constraint;
1199   if (type != END_OF_RE)
1200     return false;
1201   if (!constraint)
1202     return true;
1203   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1204     return false;
1205   return true;
1206 }
1207 
1208 /* Check the halt state STATE match the current context.
1209    Return 0 if not match, if the node, STATE has, is a halt node and
1210    match the context, return the node.  */
1211 
1212 static Idx
1213 internal_function
1214 check_halt_state_context (const re_match_context_t *mctx,
1215 			  const re_dfastate_t *state, Idx idx)
1216 {
1217   Idx i;
1218   unsigned int context;
1219 #ifdef DEBUG
1220   assert (state->halt);
1221 #endif
1222   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1223   for (i = 0; i < state->nodes.nelem; ++i)
1224     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1225       return state->nodes.elems[i];
1226   return 0;
1227 }
1228 
1229 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1230    corresponding to the DFA).
1231    Return the destination node, and update EPS_VIA_NODES;
1232    return REG_MISSING in case of errors.  */
1233 
1234 static Idx
1235 internal_function
1236 proceed_next_node (const re_match_context_t *mctx,
1237 		   Idx nregs, regmatch_t *regs, Idx *pidx, Idx node,
1238 		   re_node_set *eps_via_nodes, struct re_fail_stack_t *fs)
1239 {
1240   re_dfa_t *const dfa = mctx->dfa;
1241   Idx i;
1242   bool ok;
1243   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1244     {
1245       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1246       re_node_set *edests = &dfa->edests[node];
1247       Idx dest_node;
1248       ok = re_node_set_insert (eps_via_nodes, node);
1249       if (BE (! ok, 0))
1250 	return REG_ERROR;
1251       /* Pick up a valid destination, or return REG_MISSING if none
1252 	 is found.  */
1253       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1254 	{
1255 	  Idx candidate = edests->elems[i];
1256 	  if (!re_node_set_contains (cur_nodes, candidate))
1257 	    continue;
1258           if (dest_node == REG_MISSING)
1259 	    dest_node = candidate;
1260 
1261           else
1262 	    {
1263 	      /* In order to avoid infinite loop like "(a*)*", return the second
1264 	         epsilon-transition if the first was already considered.  */
1265 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1266 	        return candidate;
1267 
1268 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1269 	      else if (fs != NULL
1270 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1271 				           eps_via_nodes))
1272 		return REG_ERROR;
1273 
1274 	      /* We know we are going to exit.  */
1275 	      break;
1276 	    }
1277 	}
1278       return dest_node;
1279     }
1280   else
1281     {
1282       Idx naccepted = 0;
1283       re_token_type_t type = dfa->nodes[node].type;
1284 
1285 #ifdef RE_ENABLE_I18N
1286       if (dfa->nodes[node].accept_mb)
1287 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1288       else
1289 #endif /* RE_ENABLE_I18N */
1290       if (type == OP_BACK_REF)
1291 	{
1292 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1293 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1294 	  if (fs != NULL)
1295 	    {
1296 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1297 		return REG_MISSING;
1298 	      else if (naccepted)
1299 		{
1300 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1301 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1302 			      naccepted) != 0)
1303 		    return REG_MISSING;
1304 		}
1305 	    }
1306 
1307 	  if (naccepted == 0)
1308 	    {
1309 	      Idx dest_node;
1310 	      ok = re_node_set_insert (eps_via_nodes, node);
1311 	      if (BE (! ok, 0))
1312 		return REG_ERROR;
1313 	      dest_node = dfa->edests[node].elems[0];
1314 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1315 					dest_node))
1316 		return dest_node;
1317 	    }
1318 	}
1319 
1320       if (naccepted != 0
1321 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1322 	{
1323 	  Idx dest_node = dfa->nexts[node];
1324 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1325 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1326 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1327 					       dest_node)))
1328 	    return REG_MISSING;
1329 	  re_node_set_empty (eps_via_nodes);
1330 	  return dest_node;
1331 	}
1332     }
1333   return REG_MISSING;
1334 }
1335 
1336 static reg_errcode_t
1337 internal_function
1338 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1339 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1340 {
1341   reg_errcode_t err;
1342   Idx num = fs->num++;
1343   if (fs->num == fs->alloc)
1344     {
1345       struct re_fail_stack_ent_t *new_array =
1346 	re_x2realloc (fs->stack, struct re_fail_stack_ent_t, &fs->alloc);
1347       if (new_array == NULL)
1348 	return REG_ESPACE;
1349       fs->stack = new_array;
1350     }
1351   fs->stack[num].idx = str_idx;
1352   fs->stack[num].node = dest_node;
1353   fs->stack[num].regs = re_xmalloc (regmatch_t, nregs);
1354   if (fs->stack[num].regs == NULL)
1355     return REG_ESPACE;
1356   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1357   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1358   return err;
1359 }
1360 
1361 static Idx
1362 internal_function
1363 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx,
1364 		Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1365 {
1366   Idx num = --fs->num;
1367   assert (REG_VALID_INDEX (num));
1368   *pidx = fs->stack[num].idx;
1369   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1370   re_node_set_free (eps_via_nodes);
1371   re_free (fs->stack[num].regs);
1372   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1373   return fs->stack[num].node;
1374 }
1375 
1376 /* Set the positions where the subexpressions are starts/ends to registers
1377    PMATCH.
1378    Note: We assume that pmatch[0] is already set, and
1379    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1380 
1381 static reg_errcode_t
1382 internal_function
1383 set_regs (const regex_t *preg, const re_match_context_t *mctx,
1384 	  size_t nmatch, regmatch_t *pmatch, bool fl_backtrack)
1385 {
1386   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
1387   Idx idx, cur_node;
1388   re_node_set eps_via_nodes;
1389   struct re_fail_stack_t *fs;
1390   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1391   regmatch_t *prev_idx_match;
1392   bool prev_idx_match_malloced = false;
1393 
1394 #ifdef DEBUG
1395   assert (nmatch > 1);
1396   assert (mctx->state_log != NULL);
1397 #endif
1398   if (fl_backtrack)
1399     {
1400       fs = &fs_body;
1401       fs->stack = re_xmalloc (struct re_fail_stack_ent_t, fs->alloc);
1402       if (fs->stack == NULL)
1403 	return REG_ESPACE;
1404     }
1405   else
1406     fs = NULL;
1407 
1408   cur_node = dfa->init_node;
1409   re_node_set_init_empty (&eps_via_nodes);
1410 
1411   if (re_alloc_oversized (nmatch, sizeof (regmatch_t)))
1412     {
1413       free_fail_stack_return (fs);
1414       return REG_ESPACE;
1415     }
1416   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1417     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1418   else
1419     {
1420       prev_idx_match = re_malloc (regmatch_t, nmatch);
1421       if (prev_idx_match == NULL)
1422 	{
1423 	  free_fail_stack_return (fs);
1424 	  return REG_ESPACE;
1425 	}
1426       prev_idx_match_malloced = true;
1427     }
1428   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1429 
1430   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1431     {
1432       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1433 
1434       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1435 	{
1436 	  Idx reg_idx;
1437 	  if (fs)
1438 	    {
1439 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1440 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1441 		  break;
1442 	      if (reg_idx == nmatch)
1443 		{
1444 		  re_node_set_free (&eps_via_nodes);
1445 		  if (prev_idx_match_malloced)
1446 		    re_free (prev_idx_match);
1447 		  return free_fail_stack_return (fs);
1448 		}
1449 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1450 					 &eps_via_nodes);
1451 	    }
1452 	  else
1453 	    {
1454 	      re_node_set_free (&eps_via_nodes);
1455 	      if (prev_idx_match_malloced)
1456 		re_free (prev_idx_match);
1457 	      return REG_NOERROR;
1458 	    }
1459 	}
1460 
1461       /* Proceed to next node.  */
1462       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1463 				    &eps_via_nodes, fs);
1464 
1465       if (BE (! REG_VALID_INDEX (cur_node), 0))
1466 	{
1467 	  if (BE (cur_node == REG_ERROR, 0))
1468 	    {
1469 	      re_node_set_free (&eps_via_nodes);
1470 	      if (prev_idx_match_malloced)
1471 		re_free (prev_idx_match);
1472 	      free_fail_stack_return (fs);
1473 	      return REG_ESPACE;
1474 	    }
1475 	  if (fs)
1476 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1477 				       &eps_via_nodes);
1478 	  else
1479 	    {
1480 	      re_node_set_free (&eps_via_nodes);
1481 	      if (prev_idx_match_malloced)
1482 		re_free (prev_idx_match);
1483 	      return REG_NOMATCH;
1484 	    }
1485 	}
1486     }
1487   re_node_set_free (&eps_via_nodes);
1488   if (prev_idx_match_malloced)
1489     re_free (prev_idx_match);
1490   return free_fail_stack_return (fs);
1491 }
1492 
1493 static reg_errcode_t
1494 internal_function
1495 free_fail_stack_return (struct re_fail_stack_t *fs)
1496 {
1497   if (fs)
1498     {
1499       Idx fs_idx;
1500       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1501 	{
1502 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1503 	  re_free (fs->stack[fs_idx].regs);
1504 	}
1505       re_free (fs->stack);
1506     }
1507   return REG_NOERROR;
1508 }
1509 
1510 static void
1511 internal_function
1512 update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match,
1513 	     Idx cur_node, Idx cur_idx, Idx nmatch)
1514 {
1515   int type = dfa->nodes[cur_node].type;
1516   if (type == OP_OPEN_SUBEXP)
1517     {
1518       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1519 
1520       /* We are at the first node of this sub expression.  */
1521       if (reg_num < nmatch)
1522 	{
1523 	  pmatch[reg_num].rm_so = cur_idx;
1524 	  pmatch[reg_num].rm_eo = -1;
1525 	}
1526     }
1527   else if (type == OP_CLOSE_SUBEXP)
1528     {
1529       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1530       if (reg_num < nmatch)
1531 	{
1532 	  /* We are at the last node of this sub expression.  */
1533 	  if (pmatch[reg_num].rm_so < cur_idx)
1534 	    {
1535 	      pmatch[reg_num].rm_eo = cur_idx;
1536 	      /* This is a non-empty match or we are not inside an optional
1537 		 subexpression.  Accept this right away.  */
1538 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1539 	    }
1540 	  else
1541 	    {
1542 	      if (dfa->nodes[cur_node].opt_subexp
1543 		  && prev_idx_match[reg_num].rm_so != -1)
1544 		/* We transited through an empty match for an optional
1545 		   subexpression, like (a?)*, and this is not the subexp's
1546 		   first match.  Copy back the old content of the registers
1547 		   so that matches of an inner subexpression are undone as
1548 		   well, like in ((a?))*.  */
1549 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1550 	      else
1551 		/* We completed a subexpression, but it may be part of
1552 		   an optional one, so do not update PREV_IDX_MATCH.  */
1553 		pmatch[reg_num].rm_eo = cur_idx;
1554 	    }
1555 	}
1556     }
1557 }
1558 
1559 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1560    and sift the nodes in each states according to the following rules.
1561    Updated state_log will be wrote to STATE_LOG.
1562 
1563    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1564      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1565 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1566 	the LAST_NODE, we throw away the node `a'.
1567      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1568 	string `s' and transit to `b':
1569 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1570 	   away the node `a'.
1571 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1572 	    thrown away, we throw away the node `a'.
1573      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1574 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1575 	   node `a'.
1576 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1577 	    we throw away the node `a'.  */
1578 
1579 #define STATE_NODE_CONTAINS(state,node) \
1580   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1581 
1582 static reg_errcode_t
1583 internal_function
1584 sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx)
1585 {
1586   reg_errcode_t err;
1587   int null_cnt = 0;
1588   Idx str_idx = sctx->last_str_idx;
1589   re_node_set cur_dest;
1590 
1591 #ifdef DEBUG
1592   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1593 #endif
1594 
1595   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1596      transit to the last_node and the last_node itself.  */
1597   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1598   if (BE (err != REG_NOERROR, 0))
1599     return err;
1600   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1601   if (BE (err != REG_NOERROR, 0))
1602     goto free_return;
1603 
1604   /* Then check each states in the state_log.  */
1605   while (str_idx > 0)
1606     {
1607       /* Update counters.  */
1608       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1609       if (null_cnt > mctx->max_mb_elem_len)
1610 	{
1611 	  memset (sctx->sifted_states, '\0',
1612 		  sizeof (re_dfastate_t *) * str_idx);
1613 	  re_node_set_free (&cur_dest);
1614 	  return REG_NOERROR;
1615 	}
1616       re_node_set_empty (&cur_dest);
1617       --str_idx;
1618 
1619       if (mctx->state_log[str_idx])
1620 	{
1621 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1622           if (BE (err != REG_NOERROR, 0))
1623 	    goto free_return;
1624 	}
1625 
1626       /* Add all the nodes which satisfy the following conditions:
1627 	 - It can epsilon transit to a node in CUR_DEST.
1628 	 - It is in CUR_SRC.
1629 	 And update state_log.  */
1630       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1631       if (BE (err != REG_NOERROR, 0))
1632 	goto free_return;
1633     }
1634   err = REG_NOERROR;
1635  free_return:
1636   re_node_set_free (&cur_dest);
1637   return err;
1638 }
1639 
1640 static reg_errcode_t
1641 internal_function
1642 build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx,
1643 		     Idx str_idx, re_node_set *cur_dest)
1644 {
1645   re_dfa_t *const dfa = mctx->dfa;
1646   re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1647   Idx i;
1648 
1649   /* Then build the next sifted state.
1650      We build the next sifted state on `cur_dest', and update
1651      `sifted_states[str_idx]' with `cur_dest'.
1652      Note:
1653      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1654      `cur_src' points the node_set of the old `state_log[str_idx]'
1655      (with the epsilon nodes pre-filtered out).  */
1656   for (i = 0; i < cur_src->nelem; i++)
1657     {
1658       Idx prev_node = cur_src->elems[i];
1659       int naccepted = 0;
1660       bool ok;
1661 
1662 #ifdef DEBUG
1663       re_token_type_t type = dfa->nodes[prev_node].type;
1664       assert (!IS_EPSILON_NODE (type));
1665 #endif
1666 #ifdef RE_ENABLE_I18N
1667       /* If the node may accept `multi byte'.  */
1668       if (dfa->nodes[prev_node].accept_mb)
1669 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1670 					 str_idx, sctx->last_str_idx);
1671 #endif /* RE_ENABLE_I18N */
1672 
1673       /* We don't check backreferences here.
1674 	 See update_cur_sifted_state().  */
1675       if (!naccepted
1676 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1677 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1678 				  dfa->nexts[prev_node]))
1679 	naccepted = 1;
1680 
1681       if (naccepted == 0)
1682 	continue;
1683 
1684       if (sctx->limits.nelem)
1685 	{
1686 	  Idx to_idx = str_idx + naccepted;
1687 	  if (check_dst_limits (mctx, &sctx->limits,
1688 				dfa->nexts[prev_node], to_idx,
1689 				prev_node, str_idx))
1690 	    continue;
1691 	}
1692       ok = re_node_set_insert (cur_dest, prev_node);
1693       if (BE (! ok, 0))
1694 	return REG_ESPACE;
1695     }
1696 
1697   return REG_NOERROR;
1698 }
1699 
1700 /* Helper functions.  */
1701 
1702 static reg_errcode_t
1703 internal_function
1704 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1705 {
1706   Idx top = mctx->state_log_top;
1707 
1708   if (next_state_log_idx >= mctx->input.bufs_len
1709       || (next_state_log_idx >= mctx->input.valid_len
1710 	  && mctx->input.valid_len < mctx->input.len))
1711     {
1712       reg_errcode_t err;
1713       err = extend_buffers (mctx);
1714       if (BE (err != REG_NOERROR, 0))
1715 	return err;
1716     }
1717 
1718   if (top < next_state_log_idx)
1719     {
1720       memset (mctx->state_log + top + 1, '\0',
1721 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1722       mctx->state_log_top = next_state_log_idx;
1723     }
1724   return REG_NOERROR;
1725 }
1726 
1727 static reg_errcode_t
1728 internal_function
1729 merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src,
1730 		   Idx num)
1731 {
1732   Idx st_idx;
1733   reg_errcode_t err;
1734   for (st_idx = 0; st_idx < num; ++st_idx)
1735     {
1736       if (dst[st_idx] == NULL)
1737 	dst[st_idx] = src[st_idx];
1738       else if (src[st_idx] != NULL)
1739 	{
1740 	  re_node_set merged_set;
1741 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1742 					&src[st_idx]->nodes);
1743 	  if (BE (err != REG_NOERROR, 0))
1744 	    return err;
1745 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1746 	  re_node_set_free (&merged_set);
1747 	  if (BE (err != REG_NOERROR, 0))
1748 	    return err;
1749 	}
1750     }
1751   return REG_NOERROR;
1752 }
1753 
1754 static reg_errcode_t
1755 internal_function
1756 update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx,
1757 			 Idx str_idx, re_node_set *dest_nodes)
1758 {
1759   re_dfa_t *const dfa = mctx->dfa;
1760   reg_errcode_t err;
1761   const re_node_set *candidates;
1762   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1763 		: &mctx->state_log[str_idx]->nodes);
1764 
1765   if (dest_nodes->nelem == 0)
1766     sctx->sifted_states[str_idx] = NULL;
1767   else
1768     {
1769       if (candidates)
1770 	{
1771 	  /* At first, add the nodes which can epsilon transit to a node in
1772 	     DEST_NODE.  */
1773 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1774 	  if (BE (err != REG_NOERROR, 0))
1775 	    return err;
1776 
1777 	  /* Then, check the limitations in the current sift_context.  */
1778 	  if (sctx->limits.nelem)
1779 	    {
1780 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1781 					 mctx->bkref_ents, str_idx);
1782 	      if (BE (err != REG_NOERROR, 0))
1783 		return err;
1784 	    }
1785 	}
1786 
1787       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1788       if (BE (err != REG_NOERROR, 0))
1789 	return err;
1790     }
1791 
1792   if (candidates && mctx->state_log[str_idx]->has_backref)
1793     {
1794       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1795       if (BE (err != REG_NOERROR, 0))
1796 	return err;
1797     }
1798   return REG_NOERROR;
1799 }
1800 
1801 static reg_errcode_t
1802 internal_function
1803 add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes,
1804 		       const re_node_set *candidates)
1805 {
1806   reg_errcode_t err = REG_NOERROR;
1807   Idx i;
1808 
1809   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1810   if (BE (err != REG_NOERROR, 0))
1811     return err;
1812 
1813   if (!state->inveclosure.alloc)
1814     {
1815       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1816       if (BE (err != REG_NOERROR, 0))
1817         return REG_ESPACE;
1818       for (i = 0; i < dest_nodes->nelem; i++)
1819         re_node_set_merge (&state->inveclosure,
1820 			   dfa->inveclosures + dest_nodes->elems[i]);
1821     }
1822   return re_node_set_add_intersect (dest_nodes, candidates,
1823 				    &state->inveclosure);
1824 }
1825 
1826 static reg_errcode_t
1827 internal_function
1828 sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1829 		       const re_node_set *candidates)
1830 {
1831     Idx ecl_idx;
1832     reg_errcode_t err;
1833     re_node_set *inv_eclosure = dfa->inveclosures + node;
1834     re_node_set except_nodes;
1835     re_node_set_init_empty (&except_nodes);
1836     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1837       {
1838 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1839 	if (cur_node == node)
1840 	  continue;
1841 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1842 	  {
1843 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1844 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1845 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1846 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1847 		 && re_node_set_contains (dest_nodes, edst1))
1848 		|| (REG_VALID_NONZERO_INDEX (edst2)
1849 		    && !re_node_set_contains (inv_eclosure, edst2)
1850 		    && re_node_set_contains (dest_nodes, edst2)))
1851 	      {
1852 		err = re_node_set_add_intersect (&except_nodes, candidates,
1853 						 dfa->inveclosures + cur_node);
1854 		if (BE (err != REG_NOERROR, 0))
1855 		  {
1856 		    re_node_set_free (&except_nodes);
1857 		    return err;
1858 		  }
1859 	      }
1860 	  }
1861       }
1862     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1863       {
1864 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1865 	if (!re_node_set_contains (&except_nodes, cur_node))
1866 	  {
1867 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1868 	    re_node_set_remove_at (dest_nodes, idx);
1869 	  }
1870       }
1871     re_node_set_free (&except_nodes);
1872     return REG_NOERROR;
1873 }
1874 
1875 static bool
1876 internal_function
1877 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1878 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1879 {
1880   re_dfa_t *const dfa = mctx->dfa;
1881   Idx lim_idx, src_pos, dst_pos;
1882 
1883   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1884   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1885   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1886     {
1887       Idx subexp_idx;
1888       struct re_backref_cache_entry *ent;
1889       ent = mctx->bkref_ents + limits->elems[lim_idx];
1890       subexp_idx = dfa->nodes[ent->node].opr.idx;
1891 
1892       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1893 					   subexp_idx, dst_node, dst_idx,
1894 					   dst_bkref_idx);
1895       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1896 					   subexp_idx, src_node, src_idx,
1897 					   src_bkref_idx);
1898 
1899       /* In case of:
1900 	 <src> <dst> ( <subexp> )
1901 	 ( <subexp> ) <src> <dst>
1902 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1903       if (src_pos == dst_pos)
1904 	continue; /* This is unrelated limitation.  */
1905       else
1906 	return true;
1907     }
1908   return false;
1909 }
1910 
1911 static int
1912 internal_function
1913 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1914 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1915 {
1916   re_dfa_t *const dfa = mctx->dfa;
1917   re_node_set *eclosures = dfa->eclosures + from_node;
1918   Idx node_idx;
1919 
1920   /* Else, we are on the boundary: examine the nodes on the epsilon
1921      closure.  */
1922   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1923     {
1924       Idx node = eclosures->elems[node_idx];
1925       switch (dfa->nodes[node].type)
1926 	{
1927 	case OP_BACK_REF:
1928 	  if (bkref_idx != REG_MISSING)
1929 	    {
1930 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1931 	      do
1932 	        {
1933 		  Idx dst;
1934 		  int cpos;
1935 
1936 		  if (ent->node != node)
1937 		    continue;
1938 
1939 		  if (subexp_idx < BITSET_WORD_BITS
1940 		      && !(ent->eps_reachable_subexps_map
1941 			   & ((bitset_word) 1 << subexp_idx)))
1942 		    continue;
1943 
1944 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1945 		     OP_CLOSE_SUBEXP cases below.  But, if the
1946 		     destination node is the same node as the source
1947 		     node, don't recurse because it would cause an
1948 		     infinite loop: a regex that exhibits this behavior
1949 		     is ()\1*\1*  */
1950 		  dst = dfa->edests[node].elems[0];
1951 		  if (dst == from_node)
1952 		    {
1953 		      if (boundaries & 1)
1954 		        return -1;
1955 		      else /* if (boundaries & 2) */
1956 		        return 0;
1957 		    }
1958 
1959 		  cpos =
1960 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1961 						 dst, bkref_idx);
1962 		  if (cpos == -1 /* && (boundaries & 1) */)
1963 		    return -1;
1964 		  if (cpos == 0 && (boundaries & 2))
1965 		    return 0;
1966 
1967 		  if (subexp_idx < BITSET_WORD_BITS)
1968 		    ent->eps_reachable_subexps_map &=
1969 		      ~ ((bitset_word) 1 << subexp_idx);
1970 	        }
1971 	      while (ent++->more);
1972 	    }
1973 	  break;
1974 
1975 	case OP_OPEN_SUBEXP:
1976 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1977 	    return -1;
1978 	  break;
1979 
1980 	case OP_CLOSE_SUBEXP:
1981 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1982 	    return 0;
1983 	  break;
1984 
1985 	default:
1986 	    break;
1987 	}
1988     }
1989 
1990   return (boundaries & 2) ? 1 : 0;
1991 }
1992 
1993 static int
1994 internal_function
1995 check_dst_limits_calc_pos (const re_match_context_t *mctx,
1996 			   Idx limit, Idx subexp_idx,
1997 			   Idx from_node, Idx str_idx, Idx bkref_idx)
1998 {
1999   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2000   int boundaries;
2001 
2002   /* If we are outside the range of the subexpression, return -1 or 1.  */
2003   if (str_idx < lim->subexp_from)
2004     return -1;
2005 
2006   if (lim->subexp_to < str_idx)
2007     return 1;
2008 
2009   /* If we are within the subexpression, return 0.  */
2010   boundaries = (str_idx == lim->subexp_from);
2011   boundaries |= (str_idx == lim->subexp_to) << 1;
2012   if (boundaries == 0)
2013     return 0;
2014 
2015   /* Else, examine epsilon closure.  */
2016   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2017 				      from_node, bkref_idx);
2018 }
2019 
2020 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2021    which are against limitations from DEST_NODES. */
2022 
2023 static reg_errcode_t
2024 internal_function
2025 check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes,
2026 		     const re_node_set *candidates, re_node_set *limits,
2027 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2028 {
2029   reg_errcode_t err;
2030   Idx node_idx, lim_idx;
2031 
2032   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2033     {
2034       Idx subexp_idx;
2035       struct re_backref_cache_entry *ent;
2036       ent = bkref_ents + limits->elems[lim_idx];
2037 
2038       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2039 	continue; /* This is unrelated limitation.  */
2040 
2041       subexp_idx = dfa->nodes[ent->node].opr.idx;
2042       if (ent->subexp_to == str_idx)
2043 	{
2044 	  Idx ops_node = REG_MISSING;
2045 	  Idx cls_node = REG_MISSING;
2046 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2047 	    {
2048 	      Idx node = dest_nodes->elems[node_idx];
2049 	      re_token_type_t type = dfa->nodes[node].type;
2050 	      if (type == OP_OPEN_SUBEXP
2051 		  && subexp_idx == dfa->nodes[node].opr.idx)
2052 		ops_node = node;
2053 	      else if (type == OP_CLOSE_SUBEXP
2054 		       && subexp_idx == dfa->nodes[node].opr.idx)
2055 		cls_node = node;
2056 	    }
2057 
2058 	  /* Check the limitation of the open subexpression.  */
2059 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2060 	  if (REG_VALID_INDEX (ops_node))
2061 	    {
2062 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2063 					   candidates);
2064 	      if (BE (err != REG_NOERROR, 0))
2065 		return err;
2066 	    }
2067 
2068 	  /* Check the limitation of the close subexpression.  */
2069 	  if (REG_VALID_INDEX (cls_node))
2070 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2071 	      {
2072 		Idx node = dest_nodes->elems[node_idx];
2073 		if (!re_node_set_contains (dfa->inveclosures + node,
2074 					   cls_node)
2075 		    && !re_node_set_contains (dfa->eclosures + node,
2076 					      cls_node))
2077 		  {
2078 		    /* It is against this limitation.
2079 		       Remove it form the current sifted state.  */
2080 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2081 						 candidates);
2082 		    if (BE (err != REG_NOERROR, 0))
2083 		      return err;
2084 		    --node_idx;
2085 		  }
2086 	      }
2087 	}
2088       else /* (ent->subexp_to != str_idx)  */
2089 	{
2090 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2091 	    {
2092 	      Idx node = dest_nodes->elems[node_idx];
2093 	      re_token_type_t type = dfa->nodes[node].type;
2094 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2095 		{
2096 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2097 		    continue;
2098 		  /* It is against this limitation.
2099 		     Remove it form the current sifted state.  */
2100 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2101 					       candidates);
2102 		  if (BE (err != REG_NOERROR, 0))
2103 		    return err;
2104 		}
2105 	    }
2106 	}
2107     }
2108   return REG_NOERROR;
2109 }
2110 
2111 static reg_errcode_t
2112 internal_function
2113 sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx,
2114 		   Idx str_idx, const re_node_set *candidates)
2115 {
2116   re_dfa_t *const dfa = mctx->dfa;
2117   reg_errcode_t err;
2118   Idx node_idx, node;
2119   re_sift_context_t local_sctx;
2120   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2121 
2122   if (first_idx == REG_MISSING)
2123     return REG_NOERROR;
2124 
2125   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2126 
2127   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2128     {
2129       Idx enabled_idx;
2130       re_token_type_t type;
2131       struct re_backref_cache_entry *entry;
2132       node = candidates->elems[node_idx];
2133       type = dfa->nodes[node].type;
2134       /* Avoid infinite loop for the REs like "()\1+".  */
2135       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2136 	continue;
2137       if (type != OP_BACK_REF)
2138 	continue;
2139 
2140       entry = mctx->bkref_ents + first_idx;
2141       enabled_idx = first_idx;
2142       do
2143 	{
2144 	  bool ok;
2145 	  Idx subexp_len, to_idx, dst_node;
2146 	  re_dfastate_t *cur_state;
2147 
2148 	  if (entry->node != node)
2149 	    continue;
2150 	  subexp_len = entry->subexp_to - entry->subexp_from;
2151 	  to_idx = str_idx + subexp_len;
2152 	  dst_node = (subexp_len ? dfa->nexts[node]
2153 		      : dfa->edests[node].elems[0]);
2154 
2155 	  if (to_idx > sctx->last_str_idx
2156 	      || sctx->sifted_states[to_idx] == NULL
2157 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2158 	      || check_dst_limits (mctx, &sctx->limits, node,
2159 				   str_idx, dst_node, to_idx))
2160 	    continue;
2161 
2162 	  if (local_sctx.sifted_states == NULL)
2163 	    {
2164 	      local_sctx = *sctx;
2165 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2166 	      if (BE (err != REG_NOERROR, 0))
2167 		goto free_return;
2168 	    }
2169 	  local_sctx.last_node = node;
2170 	  local_sctx.last_str_idx = str_idx;
2171 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2172 	  if (BE (! ok, 0))
2173 	    {
2174 	      err = REG_ESPACE;
2175 	      goto free_return;
2176 	    }
2177 	  cur_state = local_sctx.sifted_states[str_idx];
2178 	  err = sift_states_backward (mctx, &local_sctx);
2179 	  if (BE (err != REG_NOERROR, 0))
2180 	    goto free_return;
2181 	  if (sctx->limited_states != NULL)
2182 	    {
2183 	      err = merge_state_array (dfa, sctx->limited_states,
2184 				       local_sctx.sifted_states,
2185 				       str_idx + 1);
2186 	      if (BE (err != REG_NOERROR, 0))
2187 		goto free_return;
2188 	    }
2189 	  local_sctx.sifted_states[str_idx] = cur_state;
2190 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2191 
2192 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2193           entry = mctx->bkref_ents + enabled_idx;
2194 	}
2195       while (enabled_idx++, entry++->more);
2196     }
2197   err = REG_NOERROR;
2198  free_return:
2199   if (local_sctx.sifted_states != NULL)
2200     {
2201       re_node_set_free (&local_sctx.limits);
2202     }
2203 
2204   return err;
2205 }
2206 
2207 
2208 #ifdef RE_ENABLE_I18N
2209 static int
2210 internal_function
2211 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2212 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2213 {
2214   re_dfa_t *const dfa = mctx->dfa;
2215   int naccepted;
2216   /* Check the node can accept `multi byte'.  */
2217   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2218   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2219       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2220 			    dfa->nexts[node_idx]))
2221     /* The node can't accept the `multi byte', or the
2222        destination was already thrown away, then the node
2223        could't accept the current input `multi byte'.   */
2224     naccepted = 0;
2225   /* Otherwise, it is sure that the node could accept
2226      `naccepted' bytes input.  */
2227   return naccepted;
2228 }
2229 #endif /* RE_ENABLE_I18N */
2230 
2231 
2232 /* Functions for state transition.  */
2233 
2234 /* Return the next state to which the current state STATE will transit by
2235    accepting the current input byte, and update STATE_LOG if necessary.
2236    If STATE can accept a multibyte char/collating element/back reference
2237    update the destination of STATE_LOG.  */
2238 
2239 static re_dfastate_t *
2240 internal_function
2241 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2242 	       re_dfastate_t *state)
2243 {
2244   re_dfastate_t **trtable;
2245   unsigned char ch;
2246 
2247 #ifdef RE_ENABLE_I18N
2248   /* If the current state can accept multibyte.  */
2249   if (BE (state->accept_mb, 0))
2250     {
2251       *err = transit_state_mb (mctx, state);
2252       if (BE (*err != REG_NOERROR, 0))
2253 	return NULL;
2254     }
2255 #endif /* RE_ENABLE_I18N */
2256 
2257   /* Then decide the next state with the single byte.  */
2258 #if 0
2259   if (0)
2260     /* don't use transition table  */
2261     return transit_state_sb (err, mctx, state);
2262 #endif
2263 
2264   /* Use transition table  */
2265   ch = re_string_fetch_byte (&mctx->input);
2266   for (;;)
2267     {
2268       trtable = state->trtable;
2269       if (BE (trtable != NULL, 1))
2270 	return trtable[ch];
2271 
2272       trtable = state->word_trtable;
2273       if (BE (trtable != NULL, 1))
2274         {
2275 	  unsigned int context;
2276 	  context
2277 	    = re_string_context_at (&mctx->input,
2278 				    re_string_cur_idx (&mctx->input) - 1,
2279 				    mctx->eflags);
2280 	  if (IS_WORD_CONTEXT (context))
2281 	    return trtable[ch + SBC_MAX];
2282 	  else
2283 	    return trtable[ch];
2284 	}
2285 
2286       if (!build_trtable (mctx->dfa, state))
2287 	{
2288 	  *err = REG_ESPACE;
2289 	  return NULL;
2290 	}
2291 
2292       /* Retry, we now have a transition table.  */
2293     }
2294 }
2295 
2296 /* Update the state_log if we need */
2297 re_dfastate_t *
2298 internal_function
2299 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2300 		      re_dfastate_t *next_state)
2301 {
2302   re_dfa_t *const dfa = mctx->dfa;
2303   Idx cur_idx = re_string_cur_idx (&mctx->input);
2304 
2305   if (cur_idx > mctx->state_log_top)
2306     {
2307       mctx->state_log[cur_idx] = next_state;
2308       mctx->state_log_top = cur_idx;
2309     }
2310   else if (mctx->state_log[cur_idx] == 0)
2311     {
2312       mctx->state_log[cur_idx] = next_state;
2313     }
2314   else
2315     {
2316       re_dfastate_t *pstate;
2317       unsigned int context;
2318       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2319       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2320          the destination of a multibyte char/collating element/
2321          back reference.  Then the next state is the union set of
2322          these destinations and the results of the transition table.  */
2323       pstate = mctx->state_log[cur_idx];
2324       log_nodes = pstate->entrance_nodes;
2325       if (next_state != NULL)
2326         {
2327           table_nodes = next_state->entrance_nodes;
2328           *err = re_node_set_init_union (&next_nodes, table_nodes,
2329 					     log_nodes);
2330           if (BE (*err != REG_NOERROR, 0))
2331 	    return NULL;
2332         }
2333       else
2334         next_nodes = *log_nodes;
2335       /* Note: We already add the nodes of the initial state,
2336 	 then we don't need to add them here.  */
2337 
2338       context = re_string_context_at (&mctx->input,
2339 				      re_string_cur_idx (&mctx->input) - 1,
2340 				      mctx->eflags);
2341       next_state = mctx->state_log[cur_idx]
2342         = re_acquire_state_context (err, dfa, &next_nodes, context);
2343       /* We don't need to check errors here, since the return value of
2344          this function is next_state and ERR is already set.  */
2345 
2346       if (table_nodes != NULL)
2347         re_node_set_free (&next_nodes);
2348     }
2349 
2350   if (BE (dfa->nbackref, 0) && next_state != NULL)
2351     {
2352       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2353 	 later.  We must check them here, since the back references in the
2354 	 next state might use them.  */
2355       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2356 					cur_idx);
2357       if (BE (*err != REG_NOERROR, 0))
2358 	return NULL;
2359 
2360       /* If the next state has back references.  */
2361       if (next_state->has_backref)
2362 	{
2363 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2364 	  if (BE (*err != REG_NOERROR, 0))
2365 	    return NULL;
2366 	  next_state = mctx->state_log[cur_idx];
2367 	}
2368     }
2369 
2370   return next_state;
2371 }
2372 
2373 /* Skip bytes in the input that correspond to part of a
2374    multi-byte match, then look in the log for a state
2375    from which to restart matching.  */
2376 static re_dfastate_t *
2377 internal_function
2378 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2379 {
2380   re_dfastate_t *cur_state = NULL;
2381   do
2382     {
2383       Idx max = mctx->state_log_top;
2384       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2385 
2386       do
2387 	{
2388           if (++cur_str_idx > max)
2389             return NULL;
2390           re_string_skip_bytes (&mctx->input, 1);
2391 	}
2392       while (mctx->state_log[cur_str_idx] == NULL);
2393 
2394       cur_state = merge_state_with_log (err, mctx, NULL);
2395     }
2396   while (*err == REG_NOERROR && cur_state == NULL);
2397   return cur_state;
2398 }
2399 
2400 /* Helper functions for transit_state.  */
2401 
2402 /* From the node set CUR_NODES, pick up the nodes whose types are
2403    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2404    expression. And register them to use them later for evaluating the
2405    correspoding back references.  */
2406 
2407 static reg_errcode_t
2408 internal_function
2409 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2410 			   Idx str_idx)
2411 {
2412   re_dfa_t *const dfa = mctx->dfa;
2413   Idx node_idx;
2414   reg_errcode_t err;
2415 
2416   /* TODO: This isn't efficient.
2417 	   Because there might be more than one nodes whose types are
2418 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2419 	   nodes.
2420 	   E.g. RE: (a){2}  */
2421   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2422     {
2423       Idx node = cur_nodes->elems[node_idx];
2424       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2425 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2426 	  && (dfa->used_bkref_map
2427 	      & ((bitset_word) 1 << dfa->nodes[node].opr.idx)))
2428 	{
2429 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2430 	  if (BE (err != REG_NOERROR, 0))
2431 	    return err;
2432 	}
2433     }
2434   return REG_NOERROR;
2435 }
2436 
2437 #if 0
2438 /* Return the next state to which the current state STATE will transit by
2439    accepting the current input byte.  */
2440 
2441 static re_dfastate_t *
2442 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2443 		  re_dfastate_t *state)
2444 {
2445   re_dfa_t *const dfa = mctx->dfa;
2446   re_node_set next_nodes;
2447   re_dfastate_t *next_state;
2448   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2449   unsigned int context;
2450 
2451   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2452   if (BE (*err != REG_NOERROR, 0))
2453     return NULL;
2454   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2455     {
2456       Idx cur_node = state->nodes.elems[node_cnt];
2457       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2458 	{
2459 	  *err = re_node_set_merge (&next_nodes,
2460 				    dfa->eclosures + dfa->nexts[cur_node]);
2461 	  if (BE (*err != REG_NOERROR, 0))
2462 	    {
2463 	      re_node_set_free (&next_nodes);
2464 	      return NULL;
2465 	    }
2466 	}
2467     }
2468   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2469   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2470   /* We don't need to check errors here, since the return value of
2471      this function is next_state and ERR is already set.  */
2472 
2473   re_node_set_free (&next_nodes);
2474   re_string_skip_bytes (&mctx->input, 1);
2475   return next_state;
2476 }
2477 #endif
2478 
2479 #ifdef RE_ENABLE_I18N
2480 static reg_errcode_t
2481 internal_function
2482 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2483 {
2484   re_dfa_t *const dfa = mctx->dfa;
2485   reg_errcode_t err;
2486   Idx i;
2487 
2488   for (i = 0; i < pstate->nodes.nelem; ++i)
2489     {
2490       re_node_set dest_nodes, *new_nodes;
2491       Idx cur_node_idx = pstate->nodes.elems[i];
2492       int naccepted;
2493       Idx dest_idx;
2494       unsigned int context;
2495       re_dfastate_t *dest_state;
2496 
2497       if (!dfa->nodes[cur_node_idx].accept_mb)
2498         continue;
2499 
2500       if (dfa->nodes[cur_node_idx].constraint)
2501 	{
2502 	  context = re_string_context_at (&mctx->input,
2503 					  re_string_cur_idx (&mctx->input),
2504 					  mctx->eflags);
2505 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2506 					   context))
2507 	    continue;
2508 	}
2509 
2510       /* How many bytes the node can accept?  */
2511       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2512 					   re_string_cur_idx (&mctx->input));
2513       if (naccepted == 0)
2514 	continue;
2515 
2516       /* The node can accepts `naccepted' bytes.  */
2517       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2518       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2519 			       : mctx->max_mb_elem_len);
2520       err = clean_state_log_if_needed (mctx, dest_idx);
2521       if (BE (err != REG_NOERROR, 0))
2522 	return err;
2523 #ifdef DEBUG
2524       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2525 #endif
2526       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2527 
2528       dest_state = mctx->state_log[dest_idx];
2529       if (dest_state == NULL)
2530 	dest_nodes = *new_nodes;
2531       else
2532 	{
2533 	  err = re_node_set_init_union (&dest_nodes,
2534 					dest_state->entrance_nodes, new_nodes);
2535 	  if (BE (err != REG_NOERROR, 0))
2536 	    return err;
2537 	}
2538       context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2539       mctx->state_log[dest_idx]
2540 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2541       if (dest_state != NULL)
2542 	re_node_set_free (&dest_nodes);
2543       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2544 	return err;
2545     }
2546   return REG_NOERROR;
2547 }
2548 #endif /* RE_ENABLE_I18N */
2549 
2550 static reg_errcode_t
2551 internal_function
2552 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2553 {
2554   re_dfa_t *const dfa = mctx->dfa;
2555   reg_errcode_t err;
2556   Idx i;
2557   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2558 
2559   for (i = 0; i < nodes->nelem; ++i)
2560     {
2561       Idx dest_str_idx, prev_nelem, bkc_idx;
2562       Idx node_idx = nodes->elems[i];
2563       unsigned int context;
2564       const re_token_t *node = dfa->nodes + node_idx;
2565       re_node_set *new_dest_nodes;
2566 
2567       /* Check whether `node' is a backreference or not.  */
2568       if (node->type != OP_BACK_REF)
2569 	continue;
2570 
2571       if (node->constraint)
2572 	{
2573 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2574 					  mctx->eflags);
2575 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2576 	    continue;
2577 	}
2578 
2579       /* `node' is a backreference.
2580 	 Check the substring which the substring matched.  */
2581       bkc_idx = mctx->nbkref_ents;
2582       err = get_subexp (mctx, node_idx, cur_str_idx);
2583       if (BE (err != REG_NOERROR, 0))
2584 	goto free_return;
2585 
2586       /* And add the epsilon closures (which is `new_dest_nodes') of
2587 	 the backreference to appropriate state_log.  */
2588 #ifdef DEBUG
2589       assert (dfa->nexts[node_idx] != REG_MISSING);
2590 #endif
2591       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2592 	{
2593 	  Idx subexp_len;
2594 	  re_dfastate_t *dest_state;
2595 	  struct re_backref_cache_entry *bkref_ent;
2596 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2597 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2598 	    continue;
2599 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2600 	  new_dest_nodes = (subexp_len == 0
2601 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2602 			    : dfa->eclosures + dfa->nexts[node_idx]);
2603 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2604 			  - bkref_ent->subexp_from);
2605 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2606 					  mctx->eflags);
2607 	  dest_state = mctx->state_log[dest_str_idx];
2608 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2609 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2610 	  /* Add `new_dest_node' to state_log.  */
2611 	  if (dest_state == NULL)
2612 	    {
2613 	      mctx->state_log[dest_str_idx]
2614 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2615 					    context);
2616 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2617 		      && err != REG_NOERROR, 0))
2618 		goto free_return;
2619 	    }
2620 	  else
2621 	    {
2622 	      re_node_set dest_nodes;
2623 	      err = re_node_set_init_union (&dest_nodes,
2624 					    dest_state->entrance_nodes,
2625 					    new_dest_nodes);
2626 	      if (BE (err != REG_NOERROR, 0))
2627 		{
2628 		  re_node_set_free (&dest_nodes);
2629 		  goto free_return;
2630 		}
2631 	      mctx->state_log[dest_str_idx]
2632 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2633 	      re_node_set_free (&dest_nodes);
2634 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2635 		      && err != REG_NOERROR, 0))
2636 		goto free_return;
2637 	    }
2638 	  /* We need to check recursively if the backreference can epsilon
2639 	     transit.  */
2640 	  if (subexp_len == 0
2641 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2642 	    {
2643 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2644 					       cur_str_idx);
2645 	      if (BE (err != REG_NOERROR, 0))
2646 		goto free_return;
2647 	      err = transit_state_bkref (mctx, new_dest_nodes);
2648 	      if (BE (err != REG_NOERROR, 0))
2649 		goto free_return;
2650 	    }
2651 	}
2652     }
2653   err = REG_NOERROR;
2654  free_return:
2655   return err;
2656 }
2657 
2658 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2659    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2660    Note that we might collect inappropriate candidates here.
2661    However, the cost of checking them strictly here is too high, then we
2662    delay these checking for prune_impossible_nodes().  */
2663 
2664 static reg_errcode_t
2665 internal_function
2666 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2667 {
2668   re_dfa_t *const dfa = mctx->dfa;
2669   Idx subexp_num, sub_top_idx;
2670   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2671   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2672   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2673   if (cache_idx != REG_MISSING)
2674     {
2675       const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2676       do
2677         if (entry->node == bkref_node)
2678 	  return REG_NOERROR; /* We already checked it.  */
2679       while (entry++->more);
2680     }
2681 
2682   subexp_num = dfa->nodes[bkref_node].opr.idx;
2683 
2684   /* For each sub expression  */
2685   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2686     {
2687       reg_errcode_t err;
2688       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2689       re_sub_match_last_t *sub_last;
2690       Idx sub_last_idx, sl_str, bkref_str_off;
2691 
2692       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2693 	continue; /* It isn't related.  */
2694 
2695       sl_str = sub_top->str_idx;
2696       bkref_str_off = bkref_str_idx;
2697       /* At first, check the last node of sub expressions we already
2698 	 evaluated.  */
2699       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2700 	{
2701 	  regoff_t sl_str_diff;
2702 	  sub_last = sub_top->lasts[sub_last_idx];
2703 	  sl_str_diff = sub_last->str_idx - sl_str;
2704 	  /* The matched string by the sub expression match with the substring
2705 	     at the back reference?  */
2706 	  if (sl_str_diff > 0)
2707 	    {
2708 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2709 		{
2710 		  /* Not enough chars for a successful match.  */
2711 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2712 		    break;
2713 
2714 		  err = clean_state_log_if_needed (mctx,
2715 						   bkref_str_off
2716 						   + sl_str_diff);
2717 		  if (BE (err != REG_NOERROR, 0))
2718 		    return err;
2719 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2720 		}
2721 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2722 		break; /* We don't need to search this sub expression any more.  */
2723 	    }
2724 	  bkref_str_off += sl_str_diff;
2725 	  sl_str += sl_str_diff;
2726 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2727 				bkref_str_idx);
2728 
2729 	  /* Reload buf, since the preceding call might have reallocated
2730 	     the buffer.  */
2731 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2732 
2733 	  if (err == REG_NOMATCH)
2734 	    continue;
2735 	  if (BE (err != REG_NOERROR, 0))
2736 	    return err;
2737 	}
2738 
2739       if (sub_last_idx < sub_top->nlasts)
2740 	continue;
2741       if (sub_last_idx > 0)
2742 	++sl_str;
2743       /* Then, search for the other last nodes of the sub expression.  */
2744       for (; sl_str <= bkref_str_idx; ++sl_str)
2745 	{
2746 	  Idx cls_node;
2747 	  regoff_t sl_str_off;
2748 	  const re_node_set *nodes;
2749 	  sl_str_off = sl_str - sub_top->str_idx;
2750 	  /* The matched string by the sub expression match with the substring
2751 	     at the back reference?  */
2752 	  if (sl_str_off > 0)
2753 	    {
2754 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2755 		{
2756 		  /* If we are at the end of the input, we cannot match.  */
2757 		  if (bkref_str_off >= mctx->input.len)
2758 		    break;
2759 
2760 		  err = extend_buffers (mctx);
2761 		  if (BE (err != REG_NOERROR, 0))
2762 		    return err;
2763 
2764 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2765 		}
2766 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2767 		break; /* We don't need to search this sub expression
2768 			  any more.  */
2769 	    }
2770 	  if (mctx->state_log[sl_str] == NULL)
2771 	    continue;
2772 	  /* Does this state have a ')' of the sub expression?  */
2773 	  nodes = &mctx->state_log[sl_str]->nodes;
2774 	  cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2775 	  if (cls_node == REG_MISSING)
2776 	    continue; /* No.  */
2777 	  if (sub_top->path == NULL)
2778 	    {
2779 	      sub_top->path = re_calloc (state_array_t,
2780 					 sl_str - sub_top->str_idx + 1);
2781 	      if (sub_top->path == NULL)
2782 		return REG_ESPACE;
2783 	    }
2784 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2785 	     in the current context?  */
2786 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2787 			       sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2788 	  if (err == REG_NOMATCH)
2789 	      continue;
2790 	  if (BE (err != REG_NOERROR, 0))
2791 	      return err;
2792 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2793 	  if (BE (sub_last == NULL, 0))
2794 	    return REG_ESPACE;
2795 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2796 				bkref_str_idx);
2797 	  if (err == REG_NOMATCH)
2798 	    continue;
2799 	}
2800     }
2801   return REG_NOERROR;
2802 }
2803 
2804 /* Helper functions for get_subexp().  */
2805 
2806 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2807    If it can arrive, register the sub expression expressed with SUB_TOP
2808    and SUB_LAST.  */
2809 
2810 static reg_errcode_t
2811 internal_function
2812 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2813 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2814 {
2815   reg_errcode_t err;
2816   Idx to_idx;
2817   /* Can the subexpression arrive the back reference?  */
2818   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2819 		       sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2820   if (err != REG_NOERROR)
2821     return err;
2822   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2823 			     sub_last->str_idx);
2824   if (BE (err != REG_NOERROR, 0))
2825     return err;
2826   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2827   return clean_state_log_if_needed (mctx, to_idx);
2828 }
2829 
2830 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2831    Search '(' if FL_OPEN, or search ')' otherwise.
2832    TODO: This function isn't efficient...
2833 	 Because there might be more than one nodes whose types are
2834 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2835 	 nodes.
2836 	 E.g. RE: (a){2}  */
2837 
2838 static Idx
2839 internal_function
2840 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2841 		  Idx subexp_idx, int type)
2842 {
2843   Idx cls_idx;
2844   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2845     {
2846       Idx cls_node = nodes->elems[cls_idx];
2847       const re_token_t *node = dfa->nodes + cls_node;
2848       if (node->type == type
2849 	  && node->opr.idx == subexp_idx)
2850 	return cls_node;
2851     }
2852   return REG_MISSING;
2853 }
2854 
2855 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2856    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2857    heavily reused.
2858    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2859 
2860 static reg_errcode_t
2861 internal_function
2862 check_arrival (re_match_context_t *mctx, state_array_t *path,
2863 	       Idx top_node, Idx top_str, Idx last_node, Idx last_str,
2864 	       int type)
2865 {
2866   re_dfa_t *const dfa = mctx->dfa;
2867   reg_errcode_t err;
2868   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2869   re_dfastate_t *cur_state = NULL;
2870   re_node_set *cur_nodes, next_nodes;
2871   re_dfastate_t **backup_state_log;
2872   unsigned int context;
2873 
2874   subexp_num = dfa->nodes[top_node].opr.idx;
2875   /* Extend the buffer if we need.  */
2876   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2877     {
2878       re_dfastate_t **new_array;
2879       Idx old_alloc = path->alloc;
2880       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2881       if (BE (new_alloc < old_alloc, 0))
2882 	return REG_ESPACE;
2883       new_array = re_xrealloc (path->array, re_dfastate_t *, new_alloc);
2884       if (BE (new_array == NULL, 0))
2885 	return REG_ESPACE;
2886       path->array = new_array;
2887       path->alloc = new_alloc;
2888       memset (new_array + old_alloc, '\0',
2889 	      sizeof (re_dfastate_t *) * (new_alloc - old_alloc));
2890     }
2891 
2892   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2893 
2894   /* Temporary modify MCTX.  */
2895   backup_state_log = mctx->state_log;
2896   backup_cur_idx = mctx->input.cur_idx;
2897   mctx->state_log = path->array;
2898   mctx->input.cur_idx = str_idx;
2899 
2900   /* Setup initial node set.  */
2901   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2902   if (str_idx == top_str)
2903     {
2904       err = re_node_set_init_1 (&next_nodes, top_node);
2905       if (BE (err != REG_NOERROR, 0))
2906 	return err;
2907       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2908       if (BE (err != REG_NOERROR, 0))
2909 	{
2910 	  re_node_set_free (&next_nodes);
2911 	  return err;
2912 	}
2913     }
2914   else
2915     {
2916       cur_state = mctx->state_log[str_idx];
2917       if (cur_state && cur_state->has_backref)
2918 	{
2919 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2920 	  if (BE ( err != REG_NOERROR, 0))
2921 	    return err;
2922 	}
2923       else
2924 	re_node_set_init_empty (&next_nodes);
2925     }
2926   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2927     {
2928       if (next_nodes.nelem)
2929 	{
2930 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2931 				    subexp_num, type);
2932 	  if (BE ( err != REG_NOERROR, 0))
2933 	    {
2934 	      re_node_set_free (&next_nodes);
2935 	      return err;
2936 	    }
2937 	}
2938       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2939       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2940 	{
2941 	  re_node_set_free (&next_nodes);
2942 	  return err;
2943 	}
2944       mctx->state_log[str_idx] = cur_state;
2945     }
2946 
2947   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2948     {
2949       re_node_set_empty (&next_nodes);
2950       if (mctx->state_log[str_idx + 1])
2951 	{
2952 	  err = re_node_set_merge (&next_nodes,
2953 				   &mctx->state_log[str_idx + 1]->nodes);
2954 	  if (BE (err != REG_NOERROR, 0))
2955 	    {
2956 	      re_node_set_free (&next_nodes);
2957 	      return err;
2958 	    }
2959 	}
2960       if (cur_state)
2961 	{
2962 	  err = check_arrival_add_next_nodes (mctx, str_idx,
2963 					      &cur_state->non_eps_nodes, &next_nodes);
2964 	  if (BE (err != REG_NOERROR, 0))
2965 	    {
2966 	      re_node_set_free (&next_nodes);
2967 	      return err;
2968 	    }
2969 	}
2970       ++str_idx;
2971       if (next_nodes.nelem)
2972 	{
2973 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2974 	  if (BE (err != REG_NOERROR, 0))
2975 	    {
2976 	      re_node_set_free (&next_nodes);
2977 	      return err;
2978 	    }
2979 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2980 				    subexp_num, type);
2981 	  if (BE ( err != REG_NOERROR, 0))
2982 	    {
2983 	      re_node_set_free (&next_nodes);
2984 	      return err;
2985 	    }
2986 	}
2987       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2988       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2989       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2990 	{
2991 	  re_node_set_free (&next_nodes);
2992 	  return err;
2993 	}
2994       mctx->state_log[str_idx] = cur_state;
2995       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2996     }
2997   re_node_set_free (&next_nodes);
2998   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2999 	       : &mctx->state_log[last_str]->nodes);
3000   path->next_idx = str_idx;
3001 
3002   /* Fix MCTX.  */
3003   mctx->state_log = backup_state_log;
3004   mctx->input.cur_idx = backup_cur_idx;
3005 
3006   /* Then check the current node set has the node LAST_NODE.  */
3007   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3008     return REG_NOERROR;
3009 
3010   return REG_NOMATCH;
3011 }
3012 
3013 /* Helper functions for check_arrival.  */
3014 
3015 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3016    to NEXT_NODES.
3017    TODO: This function is similar to the functions transit_state*(),
3018 	 however this function has many additional works.
3019 	 Can't we unify them?  */
3020 
3021 static reg_errcode_t
3022 internal_function
3023 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3024 			      re_node_set *cur_nodes,
3025 			      re_node_set *next_nodes)
3026 {
3027   re_dfa_t *const dfa = mctx->dfa;
3028   bool ok;
3029   Idx cur_idx;
3030   reg_errcode_t err;
3031   re_node_set union_set;
3032   re_node_set_init_empty (&union_set);
3033   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3034     {
3035       int naccepted = 0;
3036       Idx cur_node = cur_nodes->elems[cur_idx];
3037 #ifdef DEBUG
3038       re_token_type_t type = dfa->nodes[cur_node].type;
3039       assert (!IS_EPSILON_NODE (type));
3040 #endif
3041 #ifdef RE_ENABLE_I18N
3042       /* If the node may accept `multi byte'.  */
3043       if (dfa->nodes[cur_node].accept_mb)
3044 	{
3045 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3046 					       str_idx);
3047 	  if (naccepted > 1)
3048 	    {
3049 	      re_dfastate_t *dest_state;
3050 	      Idx next_node = dfa->nexts[cur_node];
3051 	      Idx next_idx = str_idx + naccepted;
3052 	      dest_state = mctx->state_log[next_idx];
3053 	      re_node_set_empty (&union_set);
3054 	      if (dest_state)
3055 		{
3056 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3057 		  if (BE (err != REG_NOERROR, 0))
3058 		    {
3059 		      re_node_set_free (&union_set);
3060 		      return err;
3061 		    }
3062 		}
3063 	      ok = re_node_set_insert (&union_set, next_node);
3064 	      if (BE (! ok, 0))
3065 		{
3066 		  re_node_set_free (&union_set);
3067 		  return REG_ESPACE;
3068 		}
3069 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3070 							    &union_set);
3071 	      if (BE (mctx->state_log[next_idx] == NULL
3072 		      && err != REG_NOERROR, 0))
3073 		{
3074 		  re_node_set_free (&union_set);
3075 		  return err;
3076 		}
3077 	    }
3078 	}
3079 #endif /* RE_ENABLE_I18N */
3080       if (naccepted
3081 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3082 	{
3083 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3084 	  if (BE (! ok, 0))
3085 	    {
3086 	      re_node_set_free (&union_set);
3087 	      return REG_ESPACE;
3088 	    }
3089 	}
3090     }
3091   re_node_set_free (&union_set);
3092   return REG_NOERROR;
3093 }
3094 
3095 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3096    CUR_NODES, however exclude the nodes which are:
3097     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3098     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3099 */
3100 
3101 static reg_errcode_t
3102 internal_function
3103 check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes,
3104 			  Idx ex_subexp, int type)
3105 {
3106   reg_errcode_t err;
3107   Idx idx, outside_node;
3108   re_node_set new_nodes;
3109 #ifdef DEBUG
3110   assert (cur_nodes->nelem);
3111 #endif
3112   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3113   if (BE (err != REG_NOERROR, 0))
3114     return err;
3115   /* Create a new node set NEW_NODES with the nodes which are epsilon
3116      closures of the node in CUR_NODES.  */
3117 
3118   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3119     {
3120       Idx cur_node = cur_nodes->elems[idx];
3121       re_node_set *eclosure = dfa->eclosures + cur_node;
3122       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3123       if (outside_node == REG_MISSING)
3124 	{
3125 	  /* There are no problematic nodes, just merge them.  */
3126 	  err = re_node_set_merge (&new_nodes, eclosure);
3127 	  if (BE (err != REG_NOERROR, 0))
3128 	    {
3129 	      re_node_set_free (&new_nodes);
3130 	      return err;
3131 	    }
3132 	}
3133       else
3134 	{
3135 	  /* There are problematic nodes, re-calculate incrementally.  */
3136 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3137 					      ex_subexp, type);
3138 	  if (BE (err != REG_NOERROR, 0))
3139 	    {
3140 	      re_node_set_free (&new_nodes);
3141 	      return err;
3142 	    }
3143 	}
3144     }
3145   re_node_set_free (cur_nodes);
3146   *cur_nodes = new_nodes;
3147   return REG_NOERROR;
3148 }
3149 
3150 /* Helper function for check_arrival_expand_ecl.
3151    Check incrementally the epsilon closure of TARGET, and if it isn't
3152    problematic append it to DST_NODES.  */
3153 
3154 static reg_errcode_t
3155 internal_function
3156 check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes,
3157 			      Idx target, Idx ex_subexp, int type)
3158 {
3159   Idx cur_node;
3160   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3161     {
3162       bool ok;
3163 
3164       if (dfa->nodes[cur_node].type == type
3165 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3166 	{
3167 	  if (type == OP_CLOSE_SUBEXP)
3168 	    {
3169 	      ok = re_node_set_insert (dst_nodes, cur_node);
3170 	      if (BE (! ok, 0))
3171 		return REG_ESPACE;
3172 	    }
3173 	  break;
3174 	}
3175       ok = re_node_set_insert (dst_nodes, cur_node);
3176       if (BE (! ok, 0))
3177 	return REG_ESPACE;
3178       if (dfa->edests[cur_node].nelem == 0)
3179 	break;
3180       if (dfa->edests[cur_node].nelem == 2)
3181 	{
3182 	  reg_errcode_t ret =
3183 	    check_arrival_expand_ecl_sub (dfa, dst_nodes,
3184 					  dfa->edests[cur_node].elems[1],
3185 					  ex_subexp, type);
3186 	  if (BE (ret != REG_NOERROR, 0))
3187 	    return ret;
3188 	}
3189       cur_node = dfa->edests[cur_node].elems[0];
3190     }
3191   return REG_NOERROR;
3192 }
3193 
3194 
3195 /* For all the back references in the current state, calculate the
3196    destination of the back references by the appropriate entry
3197    in MCTX->BKREF_ENTS.  */
3198 
3199 static reg_errcode_t
3200 internal_function
3201 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3202 		    Idx cur_str, Idx subexp_num, int type)
3203 {
3204   re_dfa_t *const dfa = mctx->dfa;
3205   reg_errcode_t err;
3206   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3207   struct re_backref_cache_entry *ent;
3208 
3209   if (cache_idx_start == REG_MISSING)
3210     return REG_NOERROR;
3211 
3212  restart:
3213   ent = mctx->bkref_ents + cache_idx_start;
3214   do
3215     {
3216       Idx to_idx, next_node;
3217 
3218       /* Is this entry ENT is appropriate?  */
3219       if (!re_node_set_contains (cur_nodes, ent->node))
3220 	continue; /* No.  */
3221 
3222       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3223       /* Calculate the destination of the back reference, and append it
3224 	 to MCTX->STATE_LOG.  */
3225       if (to_idx == cur_str)
3226 	{
3227 	  /* The backreference did epsilon transit, we must re-check all the
3228 	     node in the current state.  */
3229 	  re_node_set new_dests;
3230 	  reg_errcode_t err2, err3;
3231 	  next_node = dfa->edests[ent->node].elems[0];
3232 	  if (re_node_set_contains (cur_nodes, next_node))
3233 	    continue;
3234 	  err = re_node_set_init_1 (&new_dests, next_node);
3235 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3236 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3237 	  re_node_set_free (&new_dests);
3238 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3239 		  || err3 != REG_NOERROR, 0))
3240 	    {
3241 	      err = (err != REG_NOERROR ? err
3242 		     : (err2 != REG_NOERROR ? err2 : err3));
3243 	      return err;
3244 	    }
3245 	  /* TODO: It is still inefficient...  */
3246 	  goto restart;
3247 	}
3248       else
3249 	{
3250 	  re_node_set union_set;
3251 	  next_node = dfa->nexts[ent->node];
3252 	  if (mctx->state_log[to_idx])
3253 	    {
3254 	      bool ok;
3255 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3256 					next_node))
3257 		continue;
3258 	      err = re_node_set_init_copy (&union_set,
3259 					   &mctx->state_log[to_idx]->nodes);
3260 	      ok = re_node_set_insert (&union_set, next_node);
3261 	      if (BE (err != REG_NOERROR || ! ok, 0))
3262 		{
3263 		  re_node_set_free (&union_set);
3264 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3265 		  return err;
3266 		}
3267 	    }
3268 	  else
3269 	    {
3270 	      err = re_node_set_init_1 (&union_set, next_node);
3271 	      if (BE (err != REG_NOERROR, 0))
3272 		return err;
3273 	    }
3274 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3275 	  re_node_set_free (&union_set);
3276 	  if (BE (mctx->state_log[to_idx] == NULL
3277 		  && err != REG_NOERROR, 0))
3278 	    return err;
3279 	}
3280     }
3281   while (ent++->more);
3282   return REG_NOERROR;
3283 }
3284 
3285 /* Build transition table for the state.
3286    Return true if successful.  */
3287 
3288 static bool
3289 internal_function
3290 build_trtable (re_dfa_t *dfa, re_dfastate_t *state)
3291 {
3292   reg_errcode_t err;
3293   Idx i, j;
3294   int ch;
3295   bool need_word_trtable = false;
3296   bitset_word elem, mask;
3297   bool dests_node_malloced = false, dest_states_malloced = false;
3298   Idx ndests; /* Number of the destination states from `state'.  */
3299   re_dfastate_t **trtable;
3300   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3301   re_node_set follows, *dests_node;
3302   bitset *dests_ch;
3303   bitset acceptable;
3304 
3305   struct dests_alloc
3306   {
3307     re_node_set dests_node[SBC_MAX];
3308     bitset dests_ch[SBC_MAX];
3309   } *dests_alloc;
3310 
3311   /* We build DFA states which corresponds to the destination nodes
3312      from `state'.  `dests_node[i]' represents the nodes which i-th
3313      destination state contains, and `dests_ch[i]' represents the
3314      characters which i-th destination state accepts.  */
3315   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3316     dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]);
3317   else
3318     {
3319       dests_alloc = re_malloc (struct dests_alloc, 1);
3320       if (BE (dests_alloc == NULL, 0))
3321 	return false;
3322       dests_node_malloced = true;
3323     }
3324   dests_node = dests_alloc->dests_node;
3325   dests_ch = dests_alloc->dests_ch;
3326 
3327   /* Initialize transiton table.  */
3328   state->word_trtable = state->trtable = NULL;
3329 
3330   /* At first, group all nodes belonging to `state' into several
3331      destinations.  */
3332   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3333   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3334     {
3335       if (dests_node_malloced)
3336 	free (dests_alloc);
3337       if (ndests == 0)
3338 	{
3339 	  state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3340 	  return true;
3341 	}
3342       return false;
3343     }
3344 
3345   err = re_node_set_alloc (&follows, ndests + 1);
3346   if (BE (err != REG_NOERROR, 0))
3347     goto out_free;
3348 
3349   /* Avoid arithmetic overflow in size calculation.  */
3350   if (BE (((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)
3351 	   / (3 * sizeof (re_dfastate_t *)))
3352 	  < ndests, 0))
3353     goto out_free;
3354 
3355   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3356 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3357     dest_states = (re_dfastate_t **)
3358       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3359   else
3360     {
3361       dest_states = (re_dfastate_t **)
3362 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3363       if (BE (dest_states == NULL, 0))
3364 	{
3365 out_free:
3366 	  if (dest_states_malloced)
3367 	    free (dest_states);
3368 	  re_node_set_free (&follows);
3369 	  for (i = 0; i < ndests; ++i)
3370 	    re_node_set_free (dests_node + i);
3371 	  if (dests_node_malloced)
3372 	    free (dests_alloc);
3373 	  return false;
3374 	}
3375       dest_states_malloced = true;
3376     }
3377   dest_states_word = dest_states + ndests;
3378   dest_states_nl = dest_states_word + ndests;
3379   bitset_empty (acceptable);
3380 
3381   /* Then build the states for all destinations.  */
3382   for (i = 0; i < ndests; ++i)
3383     {
3384       Idx next_node;
3385       re_node_set_empty (&follows);
3386       /* Merge the follows of this destination states.  */
3387       for (j = 0; j < dests_node[i].nelem; ++j)
3388 	{
3389 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3390 	  if (next_node != REG_MISSING)
3391 	    {
3392 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3393 	      if (BE (err != REG_NOERROR, 0))
3394 		goto out_free;
3395 	    }
3396 	}
3397       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3398       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3399 	goto out_free;
3400       /* If the new state has context constraint,
3401 	 build appropriate states for these contexts.  */
3402       if (dest_states[i]->has_constraint)
3403 	{
3404 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3405 							  CONTEXT_WORD);
3406 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3407 	    goto out_free;
3408 
3409 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3410 	    need_word_trtable = true;
3411 
3412 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3413 							CONTEXT_NEWLINE);
3414 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3415 	    goto out_free;
3416  	}
3417       else
3418 	{
3419 	  dest_states_word[i] = dest_states[i];
3420 	  dest_states_nl[i] = dest_states[i];
3421 	}
3422       bitset_merge (acceptable, dests_ch[i]);
3423     }
3424 
3425   if (!BE (need_word_trtable, 0))
3426     {
3427       /* We don't care about whether the following character is a word
3428 	 character, or we are in a single-byte character set so we can
3429 	 discern by looking at the character code: allocate a
3430 	 256-entry transition table.  */
3431       trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3432       if (BE (trtable == NULL, 0))
3433 	goto out_free;
3434 
3435       /* For all characters ch...:  */
3436       for (i = 0; i < BITSET_WORDS; ++i)
3437 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3438 	     elem;
3439 	     mask <<= 1, elem >>= 1, ++ch)
3440 	  if (BE (elem & 1, 0))
3441 	    {
3442 	      /* There must be exactly one destination which accepts
3443 		 character ch.  See group_nodes_into_DFAstates.  */
3444 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3445 		;
3446 
3447 	      /* j-th destination accepts the word character ch.  */
3448 	      if (dfa->word_char[i] & mask)
3449 		trtable[ch] = dest_states_word[j];
3450 	      else
3451 		trtable[ch] = dest_states[j];
3452 	    }
3453     }
3454   else
3455     {
3456       /* We care about whether the following character is a word
3457 	 character, and we are in a multi-byte character set: discern
3458 	 by looking at the character code: build two 256-entry
3459 	 transition tables, one starting at trtable[0] and one
3460 	 starting at trtable[SBC_MAX].  */
3461       trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX);
3462       if (BE (trtable == NULL, 0))
3463 	goto out_free;
3464 
3465       /* For all characters ch...:  */
3466       for (i = 0; i < BITSET_WORDS; ++i)
3467 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3468 	     elem;
3469 	     mask <<= 1, elem >>= 1, ++ch)
3470 	  if (BE (elem & 1, 0))
3471 	    {
3472 	      /* There must be exactly one destination which accepts
3473 		 character ch.  See group_nodes_into_DFAstates.  */
3474 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3475 		;
3476 
3477 	      /* j-th destination accepts the word character ch.  */
3478 	      trtable[ch] = dest_states[j];
3479 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3480 	    }
3481     }
3482 
3483   /* new line */
3484   if (bitset_contain (acceptable, NEWLINE_CHAR))
3485     {
3486       /* The current state accepts newline character.  */
3487       for (j = 0; j < ndests; ++j)
3488 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3489 	  {
3490 	    /* k-th destination accepts newline character.  */
3491 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3492 	    if (need_word_trtable)
3493 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3494 	    /* There must be only one destination which accepts
3495 	       newline.  See group_nodes_into_DFAstates.  */
3496 	    break;
3497 	  }
3498     }
3499 
3500   if (dest_states_malloced)
3501     free (dest_states);
3502 
3503   re_node_set_free (&follows);
3504   for (i = 0; i < ndests; ++i)
3505     re_node_set_free (dests_node + i);
3506 
3507   if (dests_node_malloced)
3508     free (dests_alloc);
3509 
3510   return true;
3511 }
3512 
3513 /* Group all nodes belonging to STATE into several destinations.
3514    Then for all destinations, set the nodes belonging to the destination
3515    to DESTS_NODE[i] and set the characters accepted by the destination
3516    to DEST_CH[i].  This function return the number of destinations.  */
3517 
3518 static Idx
3519 internal_function
3520 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3521 			    re_node_set *dests_node, bitset *dests_ch)
3522 {
3523   reg_errcode_t err;
3524   bool ok;
3525   Idx i, j, k;
3526   Idx ndests; /* Number of the destinations from `state'.  */
3527   bitset accepts; /* Characters a node can accept.  */
3528   const re_node_set *cur_nodes = &state->nodes;
3529   bitset_empty (accepts);
3530   ndests = 0;
3531 
3532   /* For all the nodes belonging to `state',  */
3533   for (i = 0; i < cur_nodes->nelem; ++i)
3534     {
3535       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3536       re_token_type_t type = node->type;
3537       unsigned int constraint = node->constraint;
3538 
3539       /* Enumerate all single byte character this node can accept.  */
3540       if (type == CHARACTER)
3541 	bitset_set (accepts, node->opr.c);
3542       else if (type == SIMPLE_BRACKET)
3543 	{
3544 	  bitset_merge (accepts, node->opr.sbcset);
3545 	}
3546       else if (type == OP_PERIOD)
3547 	{
3548 #ifdef RE_ENABLE_I18N
3549 	  if (dfa->mb_cur_max > 1)
3550 	    bitset_merge (accepts, dfa->sb_char);
3551 	  else
3552 #endif
3553 	    bitset_set_all (accepts);
3554 	  if (!(dfa->syntax & REG_DOT_NEWLINE))
3555 	    bitset_clear (accepts, '\n');
3556 	  if (dfa->syntax & REG_DOT_NOT_NULL)
3557 	    bitset_clear (accepts, '\0');
3558 	}
3559 #ifdef RE_ENABLE_I18N
3560       else if (type == OP_UTF8_PERIOD)
3561         {
3562 	  if (SBC_MAX / 2 % BITSET_WORD_BITS == 0)
3563 	    memset (accepts, -1, sizeof accepts / 2);
3564 	  else
3565 	    bitset_merge (accepts, utf8_sb_map);
3566 	  if (!(dfa->syntax & REG_DOT_NEWLINE))
3567 	    bitset_clear (accepts, '\n');
3568 	  if (dfa->syntax & REG_DOT_NOT_NULL)
3569 	    bitset_clear (accepts, '\0');
3570         }
3571 #endif
3572       else
3573 	continue;
3574 
3575       /* Check the `accepts' and sift the characters which are not
3576 	 match it the context.  */
3577       if (constraint)
3578 	{
3579 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3580 	    {
3581 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3582 	      bitset_empty (accepts);
3583 	      if (accepts_newline)
3584 		bitset_set (accepts, NEWLINE_CHAR);
3585 	      else
3586 		continue;
3587 	    }
3588 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3589 	    {
3590 	      bitset_empty (accepts);
3591 	      continue;
3592 	    }
3593 
3594 	  if (constraint & NEXT_WORD_CONSTRAINT)
3595 	    {
3596 	      bitset_word any_set = 0;
3597 	      if (type == CHARACTER && !node->word_char)
3598 		{
3599 		  bitset_empty (accepts);
3600 		  continue;
3601 		}
3602 #ifdef RE_ENABLE_I18N
3603 	      if (dfa->mb_cur_max > 1)
3604 		for (j = 0; j < BITSET_WORDS; ++j)
3605 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3606 	      else
3607 #endif
3608 		for (j = 0; j < BITSET_WORDS; ++j)
3609 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3610 	      if (!any_set)
3611 		continue;
3612 	    }
3613 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3614 	    {
3615 	      bitset_word any_set = 0;
3616 	      if (type == CHARACTER && node->word_char)
3617 		{
3618 		  bitset_empty (accepts);
3619 		  continue;
3620 		}
3621 #ifdef RE_ENABLE_I18N
3622 	      if (dfa->mb_cur_max > 1)
3623 		for (j = 0; j < BITSET_WORDS; ++j)
3624 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3625 	      else
3626 #endif
3627 		for (j = 0; j < BITSET_WORDS; ++j)
3628 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3629 	      if (!any_set)
3630 		continue;
3631 	    }
3632 	}
3633 
3634       /* Then divide `accepts' into DFA states, or create a new
3635 	 state.  Above, we make sure that accepts is not empty.  */
3636       for (j = 0; j < ndests; ++j)
3637 	{
3638 	  bitset intersec; /* Intersection sets, see below.  */
3639 	  bitset remains;
3640 	  /* Flags, see below.  */
3641 	  bitset_word has_intersec, not_subset, not_consumed;
3642 
3643 	  /* Optimization, skip if this state doesn't accept the character.  */
3644 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3645 	    continue;
3646 
3647 	  /* Enumerate the intersection set of this state and `accepts'.  */
3648 	  has_intersec = 0;
3649 	  for (k = 0; k < BITSET_WORDS; ++k)
3650 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3651 	  /* And skip if the intersection set is empty.  */
3652 	  if (!has_intersec)
3653 	    continue;
3654 
3655 	  /* Then check if this state is a subset of `accepts'.  */
3656 	  not_subset = not_consumed = 0;
3657 	  for (k = 0; k < BITSET_WORDS; ++k)
3658 	    {
3659 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3660 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3661 	    }
3662 
3663 	  /* If this state isn't a subset of `accepts', create a
3664 	     new group state, which has the `remains'. */
3665 	  if (not_subset)
3666 	    {
3667 	      bitset_copy (dests_ch[ndests], remains);
3668 	      bitset_copy (dests_ch[j], intersec);
3669 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3670 	      if (BE (err != REG_NOERROR, 0))
3671 		goto error_return;
3672 	      ++ndests;
3673 	    }
3674 
3675 	  /* Put the position in the current group. */
3676 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3677 	  if (BE (! ok, 0))
3678 	    goto error_return;
3679 
3680 	  /* If all characters are consumed, go to next node. */
3681 	  if (!not_consumed)
3682 	    break;
3683 	}
3684       /* Some characters remain, create a new group. */
3685       if (j == ndests)
3686 	{
3687 	  bitset_copy (dests_ch[ndests], accepts);
3688 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3689 	  if (BE (err != REG_NOERROR, 0))
3690 	    goto error_return;
3691 	  ++ndests;
3692 	  bitset_empty (accepts);
3693 	}
3694     }
3695   return ndests;
3696  error_return:
3697   for (j = 0; j < ndests; ++j)
3698     re_node_set_free (dests_node + j);
3699   return REG_MISSING;
3700 }
3701 
3702 #ifdef RE_ENABLE_I18N
3703 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3704    Return the number of the bytes the node accepts.
3705    STR_IDX is the current index of the input string.
3706 
3707    This function handles the nodes which can accept one character, or
3708    one collating element like '.', '[a-z]', opposite to the other nodes
3709    can only accept one byte.  */
3710 
3711 static int
3712 internal_function
3713 check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
3714 			 const re_string_t *input, Idx str_idx)
3715 {
3716   const re_token_t *node = dfa->nodes + node_idx;
3717   int char_len, elem_len;
3718   Idx i;
3719 
3720   if (BE (node->type == OP_UTF8_PERIOD, 0))
3721     {
3722       unsigned char c = re_string_byte_at (input, str_idx), d;
3723       if (BE (c < 0xc2, 1))
3724 	return 0;
3725 
3726       if (str_idx + 2 > input->len)
3727 	return 0;
3728 
3729       d = re_string_byte_at (input, str_idx + 1);
3730       if (c < 0xe0)
3731 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3732       else if (c < 0xf0)
3733 	{
3734 	  char_len = 3;
3735 	  if (c == 0xe0 && d < 0xa0)
3736 	    return 0;
3737 	}
3738       else if (c < 0xf8)
3739 	{
3740 	  char_len = 4;
3741 	  if (c == 0xf0 && d < 0x90)
3742 	    return 0;
3743 	}
3744       else if (c < 0xfc)
3745 	{
3746 	  char_len = 5;
3747 	  if (c == 0xf8 && d < 0x88)
3748 	    return 0;
3749 	}
3750       else if (c < 0xfe)
3751 	{
3752 	  char_len = 6;
3753 	  if (c == 0xfc && d < 0x84)
3754 	    return 0;
3755 	}
3756       else
3757 	return 0;
3758 
3759       if (str_idx + char_len > input->len)
3760 	return 0;
3761 
3762       for (i = 1; i < char_len; ++i)
3763 	{
3764 	  d = re_string_byte_at (input, str_idx + i);
3765 	  if (d < 0x80 || d > 0xbf)
3766 	    return 0;
3767 	}
3768       return char_len;
3769     }
3770 
3771   char_len = re_string_char_size_at (input, str_idx);
3772   if (node->type == OP_PERIOD)
3773     {
3774       if (char_len <= 1)
3775         return 0;
3776       /* FIXME: I don't think this if is needed, as both '\n'
3777 	 and '\0' are char_len == 1.  */
3778       /* '.' accepts any one character except the following two cases.  */
3779       if ((!(dfa->syntax & REG_DOT_NEWLINE) &&
3780 	   re_string_byte_at (input, str_idx) == '\n') ||
3781 	  ((dfa->syntax & REG_DOT_NOT_NULL) &&
3782 	   re_string_byte_at (input, str_idx) == '\0'))
3783 	return 0;
3784       return char_len;
3785     }
3786 
3787   elem_len = re_string_elem_size_at (input, str_idx);
3788   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3789     return 0;
3790 
3791   if (node->type == COMPLEX_BRACKET)
3792     {
3793       const re_charset_t *cset = node->opr.mbcset;
3794 # ifdef _LIBC
3795       const unsigned char *pin
3796 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3797       Idx j;
3798       uint32_t nrules;
3799 # endif /* _LIBC */
3800       int match_len = 0;
3801       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3802 		    ? re_string_wchar_at (input, str_idx) : 0);
3803 
3804       /* match with multibyte character?  */
3805       for (i = 0; i < cset->nmbchars; ++i)
3806 	if (wc == cset->mbchars[i])
3807 	  {
3808 	    match_len = char_len;
3809 	    goto check_node_accept_bytes_match;
3810 	  }
3811       /* match with character_class?  */
3812       for (i = 0; i < cset->nchar_classes; ++i)
3813 	{
3814 	  wctype_t wt = cset->char_classes[i];
3815 	  if (__iswctype (wc, wt))
3816 	    {
3817 	      match_len = char_len;
3818 	      goto check_node_accept_bytes_match;
3819 	    }
3820 	}
3821 
3822 # ifdef _LIBC
3823       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3824       if (nrules != 0)
3825 	{
3826 	  unsigned int in_collseq = 0;
3827 	  const int32_t *table, *indirect;
3828 	  const unsigned char *weights, *extra;
3829 	  const char *collseqwc;
3830 	  int32_t idx;
3831 	  /* This #include defines a local function!  */
3832 #  include <locale/weight.h>
3833 
3834 	  /* match with collating_symbol?  */
3835 	  if (cset->ncoll_syms)
3836 	    extra = (const unsigned char *)
3837 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3838 	  for (i = 0; i < cset->ncoll_syms; ++i)
3839 	    {
3840 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3841 	      /* Compare the length of input collating element and
3842 		 the length of current collating element.  */
3843 	      if (*coll_sym != elem_len)
3844 		continue;
3845 	      /* Compare each bytes.  */
3846 	      for (j = 0; j < *coll_sym; j++)
3847 		if (pin[j] != coll_sym[1 + j])
3848 		  break;
3849 	      if (j == *coll_sym)
3850 		{
3851 		  /* Match if every bytes is equal.  */
3852 		  match_len = j;
3853 		  goto check_node_accept_bytes_match;
3854 		}
3855 	    }
3856 
3857 	  if (cset->nranges)
3858 	    {
3859 	      if (elem_len <= char_len)
3860 		{
3861 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3862 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3863 		}
3864 	      else
3865 		in_collseq = find_collation_sequence_value (pin, elem_len);
3866 	    }
3867 	  /* match with range expression?  */
3868 	  for (i = 0; i < cset->nranges; ++i)
3869 	    if (cset->range_starts[i] <= in_collseq
3870 		&& in_collseq <= cset->range_ends[i])
3871 	      {
3872 		match_len = elem_len;
3873 		goto check_node_accept_bytes_match;
3874 	      }
3875 
3876 	  /* match with equivalence_class?  */
3877 	  if (cset->nequiv_classes)
3878 	    {
3879 	      const unsigned char *cp = pin;
3880 	      table = (const int32_t *)
3881 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3882 	      weights = (const unsigned char *)
3883 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3884 	      extra = (const unsigned char *)
3885 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3886 	      indirect = (const int32_t *)
3887 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3888 	      idx = findidx (&cp);
3889 	      if (idx > 0)
3890 		for (i = 0; i < cset->nequiv_classes; ++i)
3891 		  {
3892 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3893 		    size_t weight_len = weights[idx];
3894 		    if (weight_len == weights[equiv_class_idx])
3895 		      {
3896 			Idx cnt = 0;
3897 			while (cnt <= weight_len
3898 			       && (weights[equiv_class_idx + 1 + cnt]
3899 				   == weights[idx + 1 + cnt]))
3900 			  ++cnt;
3901 			if (cnt > weight_len)
3902 			  {
3903 			    match_len = elem_len;
3904 			    goto check_node_accept_bytes_match;
3905 			  }
3906 		      }
3907 		  }
3908 	    }
3909 	}
3910       else
3911 # endif /* _LIBC */
3912 	{
3913 	  /* match with range expression?  */
3914 #if __GNUC__ >= 2
3915 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3916 #else
3917 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3918 	  cmp_buf[2] = wc;
3919 #endif
3920 	  for (i = 0; i < cset->nranges; ++i)
3921 	    {
3922 	      cmp_buf[0] = cset->range_starts[i];
3923 	      cmp_buf[4] = cset->range_ends[i];
3924 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3925 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3926 		{
3927 		  match_len = char_len;
3928 		  goto check_node_accept_bytes_match;
3929 		}
3930 	    }
3931 	}
3932     check_node_accept_bytes_match:
3933       if (!cset->non_match)
3934 	return match_len;
3935       else
3936 	{
3937 	  if (match_len > 0)
3938 	    return 0;
3939 	  else
3940 	    return (elem_len > char_len) ? elem_len : char_len;
3941 	}
3942     }
3943   return 0;
3944 }
3945 
3946 # ifdef _LIBC
3947 static unsigned int
3948 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3949 {
3950   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3951   if (nrules == 0)
3952     {
3953       if (mbs_len == 1)
3954 	{
3955 	  /* No valid character.  Match it as a single byte character.  */
3956 	  const unsigned char *collseq = (const unsigned char *)
3957 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3958 	  return collseq[mbs[0]];
3959 	}
3960       return UINT_MAX;
3961     }
3962   else
3963     {
3964       int32_t idx;
3965       const unsigned char *extra = (const unsigned char *)
3966 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3967       int32_t extrasize = (const unsigned char *)
3968 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3969 
3970       for (idx = 0; idx < extrasize;)
3971 	{
3972 	  int mbs_cnt;
3973 	  bool found = false;
3974 	  int32_t elem_mbs_len;
3975 	  /* Skip the name of collating element name.  */
3976 	  idx = idx + extra[idx] + 1;
3977 	  elem_mbs_len = extra[idx++];
3978 	  if (mbs_len == elem_mbs_len)
3979 	    {
3980 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3981 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3982 		  break;
3983 	      if (mbs_cnt == elem_mbs_len)
3984 		/* Found the entry.  */
3985 		found = true;
3986 	    }
3987 	  /* Skip the byte sequence of the collating element.  */
3988 	  idx += elem_mbs_len;
3989 	  /* Adjust for the alignment.  */
3990 	  idx = (idx + 3) & ~3;
3991 	  /* Skip the collation sequence value.  */
3992 	  idx += sizeof (uint32_t);
3993 	  /* Skip the wide char sequence of the collating element.  */
3994 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
3995 	  /* If we found the entry, return the sequence value.  */
3996 	  if (found)
3997 	    return *(uint32_t *) (extra + idx);
3998 	  /* Skip the collation sequence value.  */
3999 	  idx += sizeof (uint32_t);
4000 	}
4001       return UINT_MAX;
4002     }
4003 }
4004 # endif /* _LIBC */
4005 #endif /* RE_ENABLE_I18N */
4006 
4007 /* Check whether the node accepts the byte which is IDX-th
4008    byte of the INPUT.  */
4009 
4010 static bool
4011 internal_function
4012 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4013 		   Idx idx)
4014 {
4015   unsigned char ch;
4016   ch = re_string_byte_at (&mctx->input, idx);
4017   switch (node->type)
4018     {
4019     case CHARACTER:
4020       if (node->opr.c != ch)
4021         return false;
4022       break;
4023 
4024     case SIMPLE_BRACKET:
4025       if (!bitset_contain (node->opr.sbcset, ch))
4026         return false;
4027       break;
4028 
4029 #ifdef RE_ENABLE_I18N
4030     case OP_UTF8_PERIOD:
4031       if (ch >= 0x80)
4032         return false;
4033       /* FALLTHROUGH */
4034 #endif
4035     case OP_PERIOD:
4036       if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE))
4037 	  || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL)))
4038 	return false;
4039       break;
4040 
4041     default:
4042       return false;
4043     }
4044 
4045   if (node->constraint)
4046     {
4047       /* The node has constraints.  Check whether the current context
4048 	 satisfies the constraints.  */
4049       unsigned int context = re_string_context_at (&mctx->input, idx,
4050 						   mctx->eflags);
4051       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4052 	return false;
4053     }
4054 
4055   return true;
4056 }
4057 
4058 /* Extend the buffers, if the buffers have run out.  */
4059 
4060 static reg_errcode_t
4061 internal_function
4062 extend_buffers (re_match_context_t *mctx)
4063 {
4064   reg_errcode_t ret;
4065   re_string_t *pstr = &mctx->input;
4066 
4067   /* Double the lengthes of the buffers.  */
4068   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4069   if (BE (ret != REG_NOERROR, 0))
4070     return ret;
4071 
4072   if (mctx->state_log != NULL)
4073     {
4074       /* And double the length of state_log.  */
4075       /* XXX We have no indication of the size of this buffer.  If this
4076 	 allocation fail we have no indication that the state_log array
4077 	 does not have the right size.  */
4078       re_dfastate_t **new_array = re_xrealloc (mctx->state_log, re_dfastate_t *,
4079 					       pstr->bufs_len + 1);
4080       if (BE (new_array == NULL, 0))
4081 	return REG_ESPACE;
4082       mctx->state_log = new_array;
4083     }
4084 
4085   /* Then reconstruct the buffers.  */
4086   if (pstr->icase)
4087     {
4088 #ifdef RE_ENABLE_I18N
4089       if (pstr->mb_cur_max > 1)
4090 	{
4091 	  ret = build_wcs_upper_buffer (pstr);
4092 	  if (BE (ret != REG_NOERROR, 0))
4093 	    return ret;
4094 	}
4095       else
4096 #endif /* RE_ENABLE_I18N  */
4097 	build_upper_buffer (pstr);
4098     }
4099   else
4100     {
4101 #ifdef RE_ENABLE_I18N
4102       if (pstr->mb_cur_max > 1)
4103 	build_wcs_buffer (pstr);
4104       else
4105 #endif /* RE_ENABLE_I18N  */
4106 	{
4107 	  if (pstr->trans != NULL)
4108 	    re_string_translate_buffer (pstr);
4109 	}
4110     }
4111   return REG_NOERROR;
4112 }
4113 
4114 
4115 /* Functions for matching context.  */
4116 
4117 /* Initialize MCTX.  */
4118 
4119 static reg_errcode_t
4120 internal_function
4121 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4122 {
4123   mctx->eflags = eflags;
4124   mctx->match_last = REG_MISSING;
4125   if (n > 0)
4126     {
4127       mctx->bkref_ents = re_xmalloc (struct re_backref_cache_entry, n);
4128       mctx->sub_tops = re_xmalloc (re_sub_match_top_t *, n);
4129       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4130 	return REG_ESPACE;
4131     }
4132   /* Already zero-ed by the caller.
4133      else
4134        mctx->bkref_ents = NULL;
4135      mctx->nbkref_ents = 0;
4136      mctx->nsub_tops = 0;  */
4137   mctx->abkref_ents = n;
4138   mctx->max_mb_elem_len = 1;
4139   mctx->asub_tops = n;
4140   return REG_NOERROR;
4141 }
4142 
4143 /* Clean the entries which depend on the current input in MCTX.
4144    This function must be invoked when the matcher changes the start index
4145    of the input, or changes the input string.  */
4146 
4147 static void
4148 internal_function
4149 match_ctx_clean (re_match_context_t *mctx)
4150 {
4151   Idx st_idx;
4152   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4153     {
4154       Idx sl_idx;
4155       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4156       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4157 	{
4158 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4159 	  re_free (last->path.array);
4160 	  re_free (last);
4161 	}
4162       re_free (top->lasts);
4163       if (top->path)
4164 	{
4165 	  re_free (top->path->array);
4166 	  re_free (top->path);
4167 	}
4168       free (top);
4169     }
4170 
4171   mctx->nsub_tops = 0;
4172   mctx->nbkref_ents = 0;
4173 }
4174 
4175 /* Free all the memory associated with MCTX.  */
4176 
4177 static void
4178 internal_function
4179 match_ctx_free (re_match_context_t *mctx)
4180 {
4181   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4182   match_ctx_clean (mctx);
4183   re_free (mctx->sub_tops);
4184   re_free (mctx->bkref_ents);
4185 }
4186 
4187 /* Add a new backreference entry to MCTX.
4188    Note that we assume that caller never call this function with duplicate
4189    entry, and call with STR_IDX which isn't smaller than any existing entry.
4190 */
4191 
4192 static reg_errcode_t
4193 internal_function
4194 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx,
4195 		     Idx from, Idx to)
4196 {
4197   if (mctx->nbkref_ents >= mctx->abkref_ents)
4198     {
4199       struct re_backref_cache_entry* new_entry;
4200       new_entry = re_x2realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4201 				&mctx->abkref_ents);
4202       if (BE (new_entry == NULL, 0))
4203 	{
4204 	  re_free (mctx->bkref_ents);
4205 	  return REG_ESPACE;
4206 	}
4207       mctx->bkref_ents = new_entry;
4208       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4209 	      (sizeof (struct re_backref_cache_entry)
4210 	       * (mctx->abkref_ents - mctx->nbkref_ents)));
4211     }
4212   if (mctx->nbkref_ents > 0
4213       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4214     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4215 
4216   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4217   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4218   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4219   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4220 
4221   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4222      If bit N is clear, means that this entry won't epsilon-transition to
4223      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4224      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4225      such node.
4226 
4227      A backreference does not epsilon-transition unless it is empty, so set
4228      to all zeros if FROM != TO.  */
4229   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4230     = (from == to ? -1 : 0);
4231 
4232   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4233   if (mctx->max_mb_elem_len < to - from)
4234     mctx->max_mb_elem_len = to - from;
4235   return REG_NOERROR;
4236 }
4237 
4238 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4239    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4240 
4241 static Idx
4242 internal_function
4243 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4244 {
4245   Idx left, right, mid, last;
4246   last = right = mctx->nbkref_ents;
4247   for (left = 0; left < right;)
4248     {
4249       mid = (left + right) / 2;
4250       if (mctx->bkref_ents[mid].str_idx < str_idx)
4251 	left = mid + 1;
4252       else
4253 	right = mid;
4254     }
4255   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4256     return left;
4257   else
4258     return REG_MISSING;
4259 }
4260 
4261 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4262    at STR_IDX.  */
4263 
4264 static reg_errcode_t
4265 internal_function
4266 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4267 {
4268 #ifdef DEBUG
4269   assert (mctx->sub_tops != NULL);
4270   assert (mctx->asub_tops > 0);
4271 #endif
4272   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4273     {
4274       Idx new_asub_tops = mctx->asub_tops;
4275       re_sub_match_top_t **new_array = re_x2realloc (mctx->sub_tops,
4276 						     re_sub_match_top_t *,
4277 						     &new_asub_tops);
4278       if (BE (new_array == NULL, 0))
4279 	return REG_ESPACE;
4280       mctx->sub_tops = new_array;
4281       mctx->asub_tops = new_asub_tops;
4282     }
4283   mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1);
4284   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4285     return REG_ESPACE;
4286   mctx->sub_tops[mctx->nsub_tops]->node = node;
4287   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4288   return REG_NOERROR;
4289 }
4290 
4291 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4292    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4293 
4294 static re_sub_match_last_t *
4295 internal_function
4296 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4297 {
4298   re_sub_match_last_t *new_entry;
4299   if (BE (subtop->nlasts == subtop->alasts, 0))
4300     {
4301       Idx new_alasts = subtop->alasts;
4302       re_sub_match_last_t **new_array = re_x2realloc (subtop->lasts,
4303 						      re_sub_match_last_t *,
4304 						      &new_alasts);
4305       if (BE (new_array == NULL, 0))
4306 	return NULL;
4307       subtop->lasts = new_array;
4308       subtop->alasts = new_alasts;
4309     }
4310   new_entry = re_calloc (re_sub_match_last_t, 1);
4311   if (BE (new_entry != NULL, 1))
4312     {
4313       subtop->lasts[subtop->nlasts] = new_entry;
4314       new_entry->node = node;
4315       new_entry->str_idx = str_idx;
4316       ++subtop->nlasts;
4317     }
4318   return new_entry;
4319 }
4320 
4321 static void
4322 internal_function
4323 sift_ctx_init (re_sift_context_t *sctx,
4324 	       re_dfastate_t **sifted_sts,
4325 	       re_dfastate_t **limited_sts,
4326 	       Idx last_node, Idx last_str_idx)
4327 {
4328   sctx->sifted_states = sifted_sts;
4329   sctx->limited_states = limited_sts;
4330   sctx->last_node = last_node;
4331   sctx->last_str_idx = last_str_idx;
4332   re_node_set_init_empty (&sctx->limits);
4333 }
4334