xref: /dragonfly/contrib/grep/lib/regexec.c (revision e6d22e9b)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002-2020 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU General Public
8    License as published by the Free Software Foundation; either
9    version 3 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    General Public License for more details.
15 
16    You should have received a copy of the GNU General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
19 
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 				     Idx n);
22 static void match_ctx_clean (re_match_context_t *mctx);
23 static void match_ctx_free (re_match_context_t *cache);
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 					  Idx str_idx, Idx from, Idx to);
26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28 					   Idx str_idx);
29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30 						    Idx node, Idx str_idx);
31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
32 			   re_dfastate_t **limited_sts, Idx last_node,
33 			   Idx last_str_idx);
34 static reg_errcode_t re_search_internal (const regex_t *preg,
35 					 const char *string, Idx length,
36 					 Idx start, Idx last_start, Idx stop,
37 					 size_t nmatch, regmatch_t pmatch[],
38 					 int eflags);
39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40 				  const char *string1, Idx length1,
41 				  const char *string2, Idx length2,
42 				  Idx start, regoff_t range,
43 				  struct re_registers *regs,
44 				  Idx stop, bool ret_len);
45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46 				const char *string, Idx length, Idx start,
47 				regoff_t range, Idx stop,
48 				struct re_registers *regs,
49 				bool ret_len);
50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51                               Idx nregs, int regs_allocated);
52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54 			   Idx *p_match_first);
55 static Idx check_halt_state_context (const re_match_context_t *mctx,
56 				     const re_dfastate_t *state, Idx idx);
57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
58 			 regmatch_t *prev_idx_match, Idx cur_node,
59 			 Idx cur_idx, Idx nmatch);
60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61 				      Idx str_idx, Idx dest_node, Idx nregs,
62 				      regmatch_t *regs,
63 				      re_node_set *eps_via_nodes);
64 static reg_errcode_t set_regs (const regex_t *preg,
65 			       const re_match_context_t *mctx,
66 			       size_t nmatch, regmatch_t *pmatch,
67 			       bool fl_backtrack);
68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69 
70 #ifdef RE_ENABLE_I18N
71 static int sift_states_iter_mb (const re_match_context_t *mctx,
72 				re_sift_context_t *sctx,
73 				Idx node_idx, Idx str_idx, Idx max_str_idx);
74 #endif /* RE_ENABLE_I18N */
75 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76 					   re_sift_context_t *sctx);
77 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78 					  re_sift_context_t *sctx, Idx str_idx,
79 					  re_node_set *cur_dest);
80 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81 					      re_sift_context_t *sctx,
82 					      Idx str_idx,
83 					      re_node_set *dest_nodes);
84 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85 					    re_node_set *dest_nodes,
86 					    const re_node_set *candidates);
87 static bool check_dst_limits (const re_match_context_t *mctx,
88 			      const re_node_set *limits,
89 			      Idx dst_node, Idx dst_idx, Idx src_node,
90 			      Idx src_idx);
91 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92 					int boundaries, Idx subexp_idx,
93 					Idx from_node, Idx bkref_idx);
94 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95 				      Idx limit, Idx subexp_idx,
96 				      Idx node, Idx str_idx,
97 				      Idx bkref_idx);
98 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99 					  re_node_set *dest_nodes,
100 					  const re_node_set *candidates,
101 					  re_node_set *limits,
102 					  struct re_backref_cache_entry *bkref_ents,
103 					  Idx str_idx);
104 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105 					re_sift_context_t *sctx,
106 					Idx str_idx, const re_node_set *candidates);
107 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108 					re_dfastate_t **dst,
109 					re_dfastate_t **src, Idx num);
110 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
111 					 re_match_context_t *mctx);
112 static re_dfastate_t *transit_state (reg_errcode_t *err,
113 				     re_match_context_t *mctx,
114 				     re_dfastate_t *state);
115 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
116 					    re_match_context_t *mctx,
117 					    re_dfastate_t *next_state);
118 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119 						re_node_set *cur_nodes,
120 						Idx str_idx);
121 #if 0
122 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
123 					re_match_context_t *mctx,
124 					re_dfastate_t *pstate);
125 #endif
126 #ifdef RE_ENABLE_I18N
127 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128 				       re_dfastate_t *pstate);
129 #endif /* RE_ENABLE_I18N */
130 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131 					  const re_node_set *nodes);
132 static reg_errcode_t get_subexp (re_match_context_t *mctx,
133 				 Idx bkref_node, Idx bkref_str_idx);
134 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135 				     const re_sub_match_top_t *sub_top,
136 				     re_sub_match_last_t *sub_last,
137 				     Idx bkref_node, Idx bkref_str);
138 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
139 			     Idx subexp_idx, int type);
140 static reg_errcode_t check_arrival (re_match_context_t *mctx,
141 				    state_array_t *path, Idx top_node,
142 				    Idx top_str, Idx last_node, Idx last_str,
143 				    int type);
144 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145 						   Idx str_idx,
146 						   re_node_set *cur_nodes,
147 						   re_node_set *next_nodes);
148 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149 					       re_node_set *cur_nodes,
150 					       Idx ex_subexp, int type);
151 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152 						   re_node_set *dst_nodes,
153 						   Idx target, Idx ex_subexp,
154 						   int type);
155 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156 					 re_node_set *cur_nodes, Idx cur_str,
157 					 Idx subexp_num, int type);
158 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
159 #ifdef RE_ENABLE_I18N
160 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161 				    const re_string_t *input, Idx idx);
162 # ifdef _LIBC
163 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164 						   size_t name_len);
165 # endif /* _LIBC */
166 #endif /* RE_ENABLE_I18N */
167 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168 				       const re_dfastate_t *state,
169 				       re_node_set *states_node,
170 				       bitset_t *states_ch);
171 static bool check_node_accept (const re_match_context_t *mctx,
172 			       const re_token_t *node, Idx idx);
173 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
174 
175 /* Entry point for POSIX code.  */
176 
177 /* regexec searches for a given pattern, specified by PREG, in the
178    string STRING.
179 
180    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
182    least NMATCH elements, and we set them to the offsets of the
183    corresponding matched substrings.
184 
185    EFLAGS specifies "execution flags" which affect matching: if
186    REG_NOTBOL is set, then ^ does not match at the beginning of the
187    string; if REG_NOTEOL is set, then $ does not match at the end.
188 
189    We return 0 if we find a match and REG_NOMATCH if not.  */
190 
191 int
192 regexec (const regex_t *__restrict preg, const char *__restrict string,
193 	 size_t nmatch, regmatch_t pmatch[], int eflags)
194 {
195   reg_errcode_t err;
196   Idx start, length;
197   re_dfa_t *dfa = preg->buffer;
198 
199   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
200     return REG_BADPAT;
201 
202   if (eflags & REG_STARTEND)
203     {
204       start = pmatch[0].rm_so;
205       length = pmatch[0].rm_eo;
206     }
207   else
208     {
209       start = 0;
210       length = strlen (string);
211     }
212 
213   lock_lock (dfa->lock);
214   if (preg->no_sub)
215     err = re_search_internal (preg, string, length, start, length,
216 			      length, 0, NULL, eflags);
217   else
218     err = re_search_internal (preg, string, length, start, length,
219 			      length, nmatch, pmatch, eflags);
220   lock_unlock (dfa->lock);
221   return err != REG_NOERROR;
222 }
223 
224 #ifdef _LIBC
225 libc_hidden_def (__regexec)
226 
227 # include <shlib-compat.h>
228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229 
230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231 __typeof__ (__regexec) __compat_regexec;
232 
233 int
234 attribute_compat_text_section
235 __compat_regexec (const regex_t *__restrict preg,
236 		  const char *__restrict string, size_t nmatch,
237 		  regmatch_t pmatch[], int eflags)
238 {
239   return regexec (preg, string, nmatch, pmatch,
240 		  eflags & (REG_NOTBOL | REG_NOTEOL));
241 }
242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243 # endif
244 #endif
245 
246 /* Entry points for GNU code.  */
247 
248 /* re_match, re_search, re_match_2, re_search_2
249 
250    The former two functions operate on STRING with length LENGTH,
251    while the later two operate on concatenation of STRING1 and STRING2
252    with lengths LENGTH1 and LENGTH2, respectively.
253 
254    re_match() matches the compiled pattern in BUFP against the string,
255    starting at index START.
256 
257    re_search() first tries matching at index START, then it tries to match
258    starting from index START + 1, and so on.  The last start position tried
259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
260    way as re_match().)
261 
262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263    the first STOP characters of the concatenation of the strings should be
264    concerned.
265 
266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
268    computed relative to the concatenation, not relative to the individual
269    strings.)
270 
271    On success, re_match* functions return the length of the match, re_search*
272    return the position of the start of the match.  Return value -1 means no
273    match was found and -2 indicates an internal error.  */
274 
275 regoff_t
276 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
277 	  Idx start, struct re_registers *regs)
278 {
279   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
280 }
281 #ifdef _LIBC
282 weak_alias (__re_match, re_match)
283 #endif
284 
285 regoff_t
286 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
287 	   Idx start, regoff_t range, struct re_registers *regs)
288 {
289   return re_search_stub (bufp, string, length, start, range, length, regs,
290 			 false);
291 }
292 #ifdef _LIBC
293 weak_alias (__re_search, re_search)
294 #endif
295 
296 regoff_t
297 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
298 	    const char *string2, Idx length2, Idx start,
299 	    struct re_registers *regs, Idx stop)
300 {
301   return re_search_2_stub (bufp, string1, length1, string2, length2,
302 			   start, 0, regs, stop, true);
303 }
304 #ifdef _LIBC
305 weak_alias (__re_match_2, re_match_2)
306 #endif
307 
308 regoff_t
309 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
310 	     const char *string2, Idx length2, Idx start, regoff_t range,
311 	     struct re_registers *regs, Idx stop)
312 {
313   return re_search_2_stub (bufp, string1, length1, string2, length2,
314 			   start, range, regs, stop, false);
315 }
316 #ifdef _LIBC
317 weak_alias (__re_search_2, re_search_2)
318 #endif
319 
320 static regoff_t
321 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
322 		  Idx length1, const char *string2, Idx length2, Idx start,
323 		  regoff_t range, struct re_registers *regs,
324 		  Idx stop, bool ret_len)
325 {
326   const char *str;
327   regoff_t rval;
328   Idx len;
329   char *s = NULL;
330 
331   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
332 			 || INT_ADD_WRAPV (length1, length2, &len))))
333     return -2;
334 
335   /* Concatenate the strings.  */
336   if (length2 > 0)
337     if (length1 > 0)
338       {
339 	s = re_malloc (char, len);
340 
341 	if (__glibc_unlikely (s == NULL))
342 	  return -2;
343 #ifdef _LIBC
344 	memcpy (__mempcpy (s, string1, length1), string2, length2);
345 #else
346 	memcpy (s, string1, length1);
347 	memcpy (s + length1, string2, length2);
348 #endif
349 	str = s;
350       }
351     else
352       str = string2;
353   else
354     str = string1;
355 
356   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
357 			 ret_len);
358   re_free (s);
359   return rval;
360 }
361 
362 /* The parameters have the same meaning as those of re_search.
363    Additional parameters:
364    If RET_LEN is true the length of the match is returned (re_match style);
365    otherwise the position of the match is returned.  */
366 
367 static regoff_t
368 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
369 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
370 		bool ret_len)
371 {
372   reg_errcode_t result;
373   regmatch_t *pmatch;
374   Idx nregs;
375   regoff_t rval;
376   int eflags = 0;
377   re_dfa_t *dfa = bufp->buffer;
378   Idx last_start = start + range;
379 
380   /* Check for out-of-range.  */
381   if (__glibc_unlikely (start < 0 || start > length))
382     return -1;
383   if (__glibc_unlikely (length < last_start
384 			|| (0 <= range && last_start < start)))
385     last_start = length;
386   else if (__glibc_unlikely (last_start < 0
387 			     || (range < 0 && start <= last_start)))
388     last_start = 0;
389 
390   lock_lock (dfa->lock);
391 
392   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
393   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
394 
395   /* Compile fastmap if we haven't yet.  */
396   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
397     re_compile_fastmap (bufp);
398 
399   if (__glibc_unlikely (bufp->no_sub))
400     regs = NULL;
401 
402   /* We need at least 1 register.  */
403   if (regs == NULL)
404     nregs = 1;
405   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406 			     && regs->num_regs <= bufp->re_nsub))
407     {
408       nregs = regs->num_regs;
409       if (__glibc_unlikely (nregs < 1))
410 	{
411 	  /* Nothing can be copied to regs.  */
412 	  regs = NULL;
413 	  nregs = 1;
414 	}
415     }
416   else
417     nregs = bufp->re_nsub + 1;
418   pmatch = re_malloc (regmatch_t, nregs);
419   if (__glibc_unlikely (pmatch == NULL))
420     {
421       rval = -2;
422       goto out;
423     }
424 
425   result = re_search_internal (bufp, string, length, start, last_start, stop,
426 			       nregs, pmatch, eflags);
427 
428   rval = 0;
429 
430   /* I hope we needn't fill their regs with -1's when no match was found.  */
431   if (result != REG_NOERROR)
432     rval = result == REG_NOMATCH ? -1 : -2;
433   else if (regs != NULL)
434     {
435       /* If caller wants register contents data back, copy them.  */
436       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
437 					   bufp->regs_allocated);
438       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
439 	rval = -2;
440     }
441 
442   if (__glibc_likely (rval == 0))
443     {
444       if (ret_len)
445 	{
446 	  DEBUG_ASSERT (pmatch[0].rm_so == start);
447 	  rval = pmatch[0].rm_eo - start;
448 	}
449       else
450 	rval = pmatch[0].rm_so;
451     }
452   re_free (pmatch);
453  out:
454   lock_unlock (dfa->lock);
455   return rval;
456 }
457 
458 static unsigned
459 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
460 	      int regs_allocated)
461 {
462   int rval = REGS_REALLOCATE;
463   Idx i;
464   Idx need_regs = nregs + 1;
465   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
466      uses.  */
467 
468   /* Have the register data arrays been allocated?  */
469   if (regs_allocated == REGS_UNALLOCATED)
470     { /* No.  So allocate them with malloc.  */
471       regs->start = re_malloc (regoff_t, need_regs);
472       if (__glibc_unlikely (regs->start == NULL))
473 	return REGS_UNALLOCATED;
474       regs->end = re_malloc (regoff_t, need_regs);
475       if (__glibc_unlikely (regs->end == NULL))
476 	{
477 	  re_free (regs->start);
478 	  return REGS_UNALLOCATED;
479 	}
480       regs->num_regs = need_regs;
481     }
482   else if (regs_allocated == REGS_REALLOCATE)
483     { /* Yes.  If we need more elements than were already
484 	 allocated, reallocate them.  If we need fewer, just
485 	 leave it alone.  */
486       if (__glibc_unlikely (need_regs > regs->num_regs))
487 	{
488 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
489 	  regoff_t *new_end;
490 	  if (__glibc_unlikely (new_start == NULL))
491 	    return REGS_UNALLOCATED;
492 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
493 	  if (__glibc_unlikely (new_end == NULL))
494 	    {
495 	      re_free (new_start);
496 	      return REGS_UNALLOCATED;
497 	    }
498 	  regs->start = new_start;
499 	  regs->end = new_end;
500 	  regs->num_regs = need_regs;
501 	}
502     }
503   else
504     {
505       DEBUG_ASSERT (regs_allocated == REGS_FIXED);
506       /* This function may not be called with REGS_FIXED and nregs too big.  */
507       DEBUG_ASSERT (nregs <= regs->num_regs);
508       rval = REGS_FIXED;
509     }
510 
511   /* Copy the regs.  */
512   for (i = 0; i < nregs; ++i)
513     {
514       regs->start[i] = pmatch[i].rm_so;
515       regs->end[i] = pmatch[i].rm_eo;
516     }
517   for ( ; i < regs->num_regs; ++i)
518     regs->start[i] = regs->end[i] = -1;
519 
520   return rval;
521 }
522 
523 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
524    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
525    this memory for recording register information.  STARTS and ENDS
526    must be allocated using the malloc library routine, and must each
527    be at least NUM_REGS * sizeof (regoff_t) bytes long.
528 
529    If NUM_REGS == 0, then subsequent matches should allocate their own
530    register data.
531 
532    Unless this function is called, the first search or match using
533    PATTERN_BUFFER will allocate its own register data, without
534    freeing the old data.  */
535 
536 void
537 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
538 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
539 {
540   if (num_regs)
541     {
542       bufp->regs_allocated = REGS_REALLOCATE;
543       regs->num_regs = num_regs;
544       regs->start = starts;
545       regs->end = ends;
546     }
547   else
548     {
549       bufp->regs_allocated = REGS_UNALLOCATED;
550       regs->num_regs = 0;
551       regs->start = regs->end = NULL;
552     }
553 }
554 #ifdef _LIBC
555 weak_alias (__re_set_registers, re_set_registers)
556 #endif
557 
558 /* Entry points compatible with 4.2 BSD regex library.  We don't define
559    them unless specifically requested.  */
560 
561 #if defined _REGEX_RE_COMP || defined _LIBC
562 int
563 # ifdef _LIBC
564 weak_function
565 # endif
566 re_exec (const char *s)
567 {
568   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
569 }
570 #endif /* _REGEX_RE_COMP */
571 
572 /* Internal entry point.  */
573 
574 /* Searches for a compiled pattern PREG in the string STRING, whose
575    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
576    meaning as with regexec.  LAST_START is START + RANGE, where
577    START and RANGE have the same meaning as with re_search.
578    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
579    otherwise return the error code.
580    Note: We assume front end functions already check ranges.
581    (0 <= LAST_START && LAST_START <= LENGTH)  */
582 
583 static reg_errcode_t
584 __attribute_warn_unused_result__
585 re_search_internal (const regex_t *preg, const char *string, Idx length,
586 		    Idx start, Idx last_start, Idx stop, size_t nmatch,
587 		    regmatch_t pmatch[], int eflags)
588 {
589   reg_errcode_t err;
590   const re_dfa_t *dfa = preg->buffer;
591   Idx left_lim, right_lim;
592   int incr;
593   bool fl_longest_match;
594   int match_kind;
595   Idx match_first;
596   Idx match_last = -1;
597   Idx extra_nmatch;
598   bool sb;
599   int ch;
600   re_match_context_t mctx = { .dfa = dfa };
601   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
602 		    && start != last_start && !preg->can_be_null)
603 		   ? preg->fastmap : NULL);
604   RE_TRANSLATE_TYPE t = preg->translate;
605 
606   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
607   nmatch -= extra_nmatch;
608 
609   /* Check if the DFA haven't been compiled.  */
610   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
611 			|| dfa->init_state_word == NULL
612 			|| dfa->init_state_nl == NULL
613 			|| dfa->init_state_begbuf == NULL))
614     return REG_NOMATCH;
615 
616   /* We assume front-end functions already check them.  */
617   DEBUG_ASSERT (0 <= last_start && last_start <= length);
618 
619   /* If initial states with non-begbuf contexts have no elements,
620      the regex must be anchored.  If preg->newline_anchor is set,
621      we'll never use init_state_nl, so do not check it.  */
622   if (dfa->init_state->nodes.nelem == 0
623       && dfa->init_state_word->nodes.nelem == 0
624       && (dfa->init_state_nl->nodes.nelem == 0
625 	  || !preg->newline_anchor))
626     {
627       if (start != 0 && last_start != 0)
628         return REG_NOMATCH;
629       start = last_start = 0;
630     }
631 
632   /* We must check the longest matching, if nmatch > 0.  */
633   fl_longest_match = (nmatch != 0 || dfa->nbackref);
634 
635   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
636 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
637 			    dfa);
638   if (__glibc_unlikely (err != REG_NOERROR))
639     goto free_return;
640   mctx.input.stop = stop;
641   mctx.input.raw_stop = stop;
642   mctx.input.newline_anchor = preg->newline_anchor;
643 
644   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
645   if (__glibc_unlikely (err != REG_NOERROR))
646     goto free_return;
647 
648   /* We will log all the DFA states through which the dfa pass,
649      if nmatch > 1, or this dfa has "multibyte node", which is a
650      back-reference or a node which can accept multibyte character or
651      multi character collating element.  */
652   if (nmatch > 1 || dfa->has_mb_node)
653     {
654       /* Avoid overflow.  */
655       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
656 			     <= mctx.input.bufs_len)))
657 	{
658 	  err = REG_ESPACE;
659 	  goto free_return;
660 	}
661 
662       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
663       if (__glibc_unlikely (mctx.state_log == NULL))
664 	{
665 	  err = REG_ESPACE;
666 	  goto free_return;
667 	}
668     }
669 
670   match_first = start;
671   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
672 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
673 
674   /* Check incrementally whether the input string matches.  */
675   incr = (last_start < start) ? -1 : 1;
676   left_lim = (last_start < start) ? last_start : start;
677   right_lim = (last_start < start) ? start : last_start;
678   sb = dfa->mb_cur_max == 1;
679   match_kind =
680     (fastmap
681      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
682 	| (start <= last_start ? 2 : 0)
683 	| (t != NULL ? 1 : 0))
684      : 8);
685 
686   for (;; match_first += incr)
687     {
688       err = REG_NOMATCH;
689       if (match_first < left_lim || right_lim < match_first)
690 	goto free_return;
691 
692       /* Advance as rapidly as possible through the string, until we
693 	 find a plausible place to start matching.  This may be done
694 	 with varying efficiency, so there are various possibilities:
695 	 only the most common of them are specialized, in order to
696 	 save on code size.  We use a switch statement for speed.  */
697       switch (match_kind)
698 	{
699 	case 8:
700 	  /* No fastmap.  */
701 	  break;
702 
703 	case 7:
704 	  /* Fastmap with single-byte translation, match forward.  */
705 	  while (__glibc_likely (match_first < right_lim)
706 		 && !fastmap[t[(unsigned char) string[match_first]]])
707 	    ++match_first;
708 	  goto forward_match_found_start_or_reached_end;
709 
710 	case 6:
711 	  /* Fastmap without translation, match forward.  */
712 	  while (__glibc_likely (match_first < right_lim)
713 		 && !fastmap[(unsigned char) string[match_first]])
714 	    ++match_first;
715 
716 	forward_match_found_start_or_reached_end:
717 	  if (__glibc_unlikely (match_first == right_lim))
718 	    {
719 	      ch = match_first >= length
720 		       ? 0 : (unsigned char) string[match_first];
721 	      if (!fastmap[t ? t[ch] : ch])
722 		goto free_return;
723 	    }
724 	  break;
725 
726 	case 4:
727 	case 5:
728 	  /* Fastmap without multi-byte translation, match backwards.  */
729 	  while (match_first >= left_lim)
730 	    {
731 	      ch = match_first >= length
732 		       ? 0 : (unsigned char) string[match_first];
733 	      if (fastmap[t ? t[ch] : ch])
734 		break;
735 	      --match_first;
736 	    }
737 	  if (match_first < left_lim)
738 	    goto free_return;
739 	  break;
740 
741 	default:
742 	  /* In this case, we can't determine easily the current byte,
743 	     since it might be a component byte of a multibyte
744 	     character.  Then we use the constructed buffer instead.  */
745 	  for (;;)
746 	    {
747 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
748 		 buffers.  */
749 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
750 	      if (__glibc_unlikely (offset
751 				    >= (__re_size_t) mctx.input.valid_raw_len))
752 		{
753 		  err = re_string_reconstruct (&mctx.input, match_first,
754 					       eflags);
755 		  if (__glibc_unlikely (err != REG_NOERROR))
756 		    goto free_return;
757 
758 		  offset = match_first - mctx.input.raw_mbs_idx;
759 		}
760 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
761 		 Note that MATCH_FIRST must not be smaller than 0.  */
762 	      ch = (match_first >= length
763 		    ? 0 : re_string_byte_at (&mctx.input, offset));
764 	      if (fastmap[ch])
765 		break;
766 	      match_first += incr;
767 	      if (match_first < left_lim || match_first > right_lim)
768 		{
769 		  err = REG_NOMATCH;
770 		  goto free_return;
771 		}
772 	    }
773 	  break;
774 	}
775 
776       /* Reconstruct the buffers so that the matcher can assume that
777 	 the matching starts from the beginning of the buffer.  */
778       err = re_string_reconstruct (&mctx.input, match_first, eflags);
779       if (__glibc_unlikely (err != REG_NOERROR))
780 	goto free_return;
781 
782 #ifdef RE_ENABLE_I18N
783      /* Don't consider this char as a possible match start if it part,
784 	yet isn't the head, of a multibyte character.  */
785       if (!sb && !re_string_first_byte (&mctx.input, 0))
786 	continue;
787 #endif
788 
789       /* It seems to be appropriate one, then use the matcher.  */
790       /* We assume that the matching starts from 0.  */
791       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
792       match_last = check_matching (&mctx, fl_longest_match,
793 				   start <= last_start ? &match_first : NULL);
794       if (match_last != -1)
795 	{
796 	  if (__glibc_unlikely (match_last == -2))
797 	    {
798 	      err = REG_ESPACE;
799 	      goto free_return;
800 	    }
801 	  else
802 	    {
803 	      mctx.match_last = match_last;
804 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
805 		{
806 		  re_dfastate_t *pstate = mctx.state_log[match_last];
807 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
808 							     match_last);
809 		}
810 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
811 		  || dfa->nbackref)
812 		{
813 		  err = prune_impossible_nodes (&mctx);
814 		  if (err == REG_NOERROR)
815 		    break;
816 		  if (__glibc_unlikely (err != REG_NOMATCH))
817 		    goto free_return;
818 		  match_last = -1;
819 		}
820 	      else
821 		break; /* We found a match.  */
822 	    }
823 	}
824 
825       match_ctx_clean (&mctx);
826     }
827 
828   DEBUG_ASSERT (match_last != -1);
829   DEBUG_ASSERT (err == REG_NOERROR);
830 
831   /* Set pmatch[] if we need.  */
832   if (nmatch > 0)
833     {
834       Idx reg_idx;
835 
836       /* Initialize registers.  */
837       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
838 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
839 
840       /* Set the points where matching start/end.  */
841       pmatch[0].rm_so = 0;
842       pmatch[0].rm_eo = mctx.match_last;
843       /* FIXME: This function should fail if mctx.match_last exceeds
844 	 the maximum possible regoff_t value.  We need a new error
845 	 code REG_OVERFLOW.  */
846 
847       if (!preg->no_sub && nmatch > 1)
848 	{
849 	  err = set_regs (preg, &mctx, nmatch, pmatch,
850 			  dfa->has_plural_match && dfa->nbackref > 0);
851 	  if (__glibc_unlikely (err != REG_NOERROR))
852 	    goto free_return;
853 	}
854 
855       /* At last, add the offset to each register, since we slid
856 	 the buffers so that we could assume that the matching starts
857 	 from 0.  */
858       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
859 	if (pmatch[reg_idx].rm_so != -1)
860 	  {
861 #ifdef RE_ENABLE_I18N
862 	    if (__glibc_unlikely (mctx.input.offsets_needed != 0))
863 	      {
864 		pmatch[reg_idx].rm_so =
865 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
866 		   ? mctx.input.valid_raw_len
867 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
868 		pmatch[reg_idx].rm_eo =
869 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
870 		   ? mctx.input.valid_raw_len
871 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
872 	      }
873 #else
874 	    DEBUG_ASSERT (mctx.input.offsets_needed == 0);
875 #endif
876 	    pmatch[reg_idx].rm_so += match_first;
877 	    pmatch[reg_idx].rm_eo += match_first;
878 	  }
879       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
880 	{
881 	  pmatch[nmatch + reg_idx].rm_so = -1;
882 	  pmatch[nmatch + reg_idx].rm_eo = -1;
883 	}
884 
885       if (dfa->subexp_map)
886 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
887 	  if (dfa->subexp_map[reg_idx] != reg_idx)
888 	    {
889 	      pmatch[reg_idx + 1].rm_so
890 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
891 	      pmatch[reg_idx + 1].rm_eo
892 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
893 	    }
894     }
895 
896  free_return:
897   re_free (mctx.state_log);
898   if (dfa->nbackref)
899     match_ctx_free (&mctx);
900   re_string_destruct (&mctx.input);
901   return err;
902 }
903 
904 static reg_errcode_t
905 __attribute_warn_unused_result__
906 prune_impossible_nodes (re_match_context_t *mctx)
907 {
908   const re_dfa_t *const dfa = mctx->dfa;
909   Idx halt_node, match_last;
910   reg_errcode_t ret;
911   re_dfastate_t **sifted_states;
912   re_dfastate_t **lim_states = NULL;
913   re_sift_context_t sctx;
914   DEBUG_ASSERT (mctx->state_log != NULL);
915   match_last = mctx->match_last;
916   halt_node = mctx->last_node;
917 
918   /* Avoid overflow.  */
919   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
920 			<= match_last))
921     return REG_ESPACE;
922 
923   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
924   if (__glibc_unlikely (sifted_states == NULL))
925     {
926       ret = REG_ESPACE;
927       goto free_return;
928     }
929   if (dfa->nbackref)
930     {
931       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
932       if (__glibc_unlikely (lim_states == NULL))
933 	{
934 	  ret = REG_ESPACE;
935 	  goto free_return;
936 	}
937       while (1)
938 	{
939 	  memset (lim_states, '\0',
940 		  sizeof (re_dfastate_t *) * (match_last + 1));
941 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
942 			 match_last);
943 	  ret = sift_states_backward (mctx, &sctx);
944 	  re_node_set_free (&sctx.limits);
945 	  if (__glibc_unlikely (ret != REG_NOERROR))
946 	      goto free_return;
947 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
948 	    break;
949 	  do
950 	    {
951 	      --match_last;
952 	      if (match_last < 0)
953 		{
954 		  ret = REG_NOMATCH;
955 		  goto free_return;
956 		}
957 	    } while (mctx->state_log[match_last] == NULL
958 		     || !mctx->state_log[match_last]->halt);
959 	  halt_node = check_halt_state_context (mctx,
960 						mctx->state_log[match_last],
961 						match_last);
962 	}
963       ret = merge_state_array (dfa, sifted_states, lim_states,
964 			       match_last + 1);
965       re_free (lim_states);
966       lim_states = NULL;
967       if (__glibc_unlikely (ret != REG_NOERROR))
968 	goto free_return;
969     }
970   else
971     {
972       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
973       ret = sift_states_backward (mctx, &sctx);
974       re_node_set_free (&sctx.limits);
975       if (__glibc_unlikely (ret != REG_NOERROR))
976 	goto free_return;
977       if (sifted_states[0] == NULL)
978 	{
979 	  ret = REG_NOMATCH;
980 	  goto free_return;
981 	}
982     }
983   re_free (mctx->state_log);
984   mctx->state_log = sifted_states;
985   sifted_states = NULL;
986   mctx->last_node = halt_node;
987   mctx->match_last = match_last;
988   ret = REG_NOERROR;
989  free_return:
990   re_free (sifted_states);
991   re_free (lim_states);
992   return ret;
993 }
994 
995 /* Acquire an initial state and return it.
996    We must select appropriate initial state depending on the context,
997    since initial states may have constraints like "\<", "^", etc..  */
998 
999 static inline re_dfastate_t *
1000 __attribute__ ((always_inline))
1001 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1002 			    Idx idx)
1003 {
1004   const re_dfa_t *const dfa = mctx->dfa;
1005   if (dfa->init_state->has_constraint)
1006     {
1007       unsigned int context;
1008       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1009       if (IS_WORD_CONTEXT (context))
1010 	return dfa->init_state_word;
1011       else if (IS_ORDINARY_CONTEXT (context))
1012 	return dfa->init_state;
1013       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1014 	return dfa->init_state_begbuf;
1015       else if (IS_NEWLINE_CONTEXT (context))
1016 	return dfa->init_state_nl;
1017       else if (IS_BEGBUF_CONTEXT (context))
1018 	{
1019 	  /* It is relatively rare case, then calculate on demand.  */
1020 	  return re_acquire_state_context (err, dfa,
1021 					   dfa->init_state->entrance_nodes,
1022 					   context);
1023 	}
1024       else
1025 	/* Must not happen?  */
1026 	return dfa->init_state;
1027     }
1028   else
1029     return dfa->init_state;
1030 }
1031 
1032 /* Check whether the regular expression match input string INPUT or not,
1033    and return the index where the matching end.  Return -1 if
1034    there is no match, and return -2 in case of an error.
1035    FL_LONGEST_MATCH means we want the POSIX longest matching.
1036    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1037    next place where we may want to try matching.
1038    Note that the matcher assumes that the matching starts from the current
1039    index of the buffer.  */
1040 
1041 static Idx
1042 __attribute_warn_unused_result__
1043 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1044 		Idx *p_match_first)
1045 {
1046   const re_dfa_t *const dfa = mctx->dfa;
1047   reg_errcode_t err;
1048   Idx match = 0;
1049   Idx match_last = -1;
1050   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1051   re_dfastate_t *cur_state;
1052   bool at_init_state = p_match_first != NULL;
1053   Idx next_start_idx = cur_str_idx;
1054 
1055   err = REG_NOERROR;
1056   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1057   /* An initial state must not be NULL (invalid).  */
1058   if (__glibc_unlikely (cur_state == NULL))
1059     {
1060       DEBUG_ASSERT (err == REG_ESPACE);
1061       return -2;
1062     }
1063 
1064   if (mctx->state_log != NULL)
1065     {
1066       mctx->state_log[cur_str_idx] = cur_state;
1067 
1068       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1069 	 later.  E.g. Processing back references.  */
1070       if (__glibc_unlikely (dfa->nbackref))
1071 	{
1072 	  at_init_state = false;
1073 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1074 	  if (__glibc_unlikely (err != REG_NOERROR))
1075 	    return err;
1076 
1077 	  if (cur_state->has_backref)
1078 	    {
1079 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1080 	      if (__glibc_unlikely (err != REG_NOERROR))
1081 		return err;
1082 	    }
1083 	}
1084     }
1085 
1086   /* If the RE accepts NULL string.  */
1087   if (__glibc_unlikely (cur_state->halt))
1088     {
1089       if (!cur_state->has_constraint
1090 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1091 	{
1092 	  if (!fl_longest_match)
1093 	    return cur_str_idx;
1094 	  else
1095 	    {
1096 	      match_last = cur_str_idx;
1097 	      match = 1;
1098 	    }
1099 	}
1100     }
1101 
1102   while (!re_string_eoi (&mctx->input))
1103     {
1104       re_dfastate_t *old_state = cur_state;
1105       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1106 
1107       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1108 	   && mctx->input.bufs_len < mctx->input.len)
1109 	  || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1110 	      && mctx->input.valid_len < mctx->input.len))
1111 	{
1112 	  err = extend_buffers (mctx, next_char_idx + 1);
1113 	  if (__glibc_unlikely (err != REG_NOERROR))
1114 	    {
1115 	      DEBUG_ASSERT (err == REG_ESPACE);
1116 	      return -2;
1117 	    }
1118 	}
1119 
1120       cur_state = transit_state (&err, mctx, cur_state);
1121       if (mctx->state_log != NULL)
1122 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1123 
1124       if (cur_state == NULL)
1125 	{
1126 	  /* Reached the invalid state or an error.  Try to recover a valid
1127 	     state using the state log, if available and if we have not
1128 	     already found a valid (even if not the longest) match.  */
1129 	  if (__glibc_unlikely (err != REG_NOERROR))
1130 	    return -2;
1131 
1132 	  if (mctx->state_log == NULL
1133 	      || (match && !fl_longest_match)
1134 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1135 	    break;
1136 	}
1137 
1138       if (__glibc_unlikely (at_init_state))
1139 	{
1140 	  if (old_state == cur_state)
1141 	    next_start_idx = next_char_idx;
1142 	  else
1143 	    at_init_state = false;
1144 	}
1145 
1146       if (cur_state->halt)
1147 	{
1148 	  /* Reached a halt state.
1149 	     Check the halt state can satisfy the current context.  */
1150 	  if (!cur_state->has_constraint
1151 	      || check_halt_state_context (mctx, cur_state,
1152 					   re_string_cur_idx (&mctx->input)))
1153 	    {
1154 	      /* We found an appropriate halt state.  */
1155 	      match_last = re_string_cur_idx (&mctx->input);
1156 	      match = 1;
1157 
1158 	      /* We found a match, do not modify match_first below.  */
1159 	      p_match_first = NULL;
1160 	      if (!fl_longest_match)
1161 		break;
1162 	    }
1163 	}
1164     }
1165 
1166   if (p_match_first)
1167     *p_match_first += next_start_idx;
1168 
1169   return match_last;
1170 }
1171 
1172 /* Check NODE match the current context.  */
1173 
1174 static bool
1175 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1176 {
1177   re_token_type_t type = dfa->nodes[node].type;
1178   unsigned int constraint = dfa->nodes[node].constraint;
1179   if (type != END_OF_RE)
1180     return false;
1181   if (!constraint)
1182     return true;
1183   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1184     return false;
1185   return true;
1186 }
1187 
1188 /* Check the halt state STATE match the current context.
1189    Return 0 if not match, if the node, STATE has, is a halt node and
1190    match the context, return the node.  */
1191 
1192 static Idx
1193 check_halt_state_context (const re_match_context_t *mctx,
1194 			  const re_dfastate_t *state, Idx idx)
1195 {
1196   Idx i;
1197   unsigned int context;
1198   DEBUG_ASSERT (state->halt);
1199   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1200   for (i = 0; i < state->nodes.nelem; ++i)
1201     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1202       return state->nodes.elems[i];
1203   return 0;
1204 }
1205 
1206 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1207    corresponding to the DFA).
1208    Return the destination node, and update EPS_VIA_NODES;
1209    return -1 in case of errors.  */
1210 
1211 static Idx
1212 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1213 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1214 		   struct re_fail_stack_t *fs)
1215 {
1216   const re_dfa_t *const dfa = mctx->dfa;
1217   Idx i;
1218   bool ok;
1219   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1220     {
1221       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1222       re_node_set *edests = &dfa->edests[node];
1223       Idx dest_node;
1224       ok = re_node_set_insert (eps_via_nodes, node);
1225       if (__glibc_unlikely (! ok))
1226 	return -2;
1227       /* Pick up a valid destination, or return -1 if none
1228 	 is found.  */
1229       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1230 	{
1231 	  Idx candidate = edests->elems[i];
1232 	  if (!re_node_set_contains (cur_nodes, candidate))
1233 	    continue;
1234           if (dest_node == -1)
1235 	    dest_node = candidate;
1236 
1237 	  else
1238 	    {
1239 	      /* In order to avoid infinite loop like "(a*)*", return the second
1240 		 epsilon-transition if the first was already considered.  */
1241 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1242 		return candidate;
1243 
1244 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1245 	      else if (fs != NULL
1246 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1247 					   eps_via_nodes))
1248 		return -2;
1249 
1250 	      /* We know we are going to exit.  */
1251 	      break;
1252 	    }
1253 	}
1254       return dest_node;
1255     }
1256   else
1257     {
1258       Idx naccepted = 0;
1259       re_token_type_t type = dfa->nodes[node].type;
1260 
1261 #ifdef RE_ENABLE_I18N
1262       if (dfa->nodes[node].accept_mb)
1263 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1264       else
1265 #endif /* RE_ENABLE_I18N */
1266       if (type == OP_BACK_REF)
1267 	{
1268 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1269 	  if (subexp_idx < nregs)
1270 	    naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1271 	  if (fs != NULL)
1272 	    {
1273 	      if (subexp_idx >= nregs
1274 		  || regs[subexp_idx].rm_so == -1
1275 		  || regs[subexp_idx].rm_eo == -1)
1276 		return -1;
1277 	      else if (naccepted)
1278 		{
1279 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1280 		  if (mctx->input.valid_len - *pidx < naccepted
1281 		      || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1282 				  naccepted)
1283 			  != 0))
1284 		    return -1;
1285 		}
1286 	    }
1287 
1288 	  if (naccepted == 0)
1289 	    {
1290 	      Idx dest_node;
1291 	      ok = re_node_set_insert (eps_via_nodes, node);
1292 	      if (__glibc_unlikely (! ok))
1293 		return -2;
1294 	      dest_node = dfa->edests[node].elems[0];
1295 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1296 					dest_node))
1297 		return dest_node;
1298 	    }
1299 	}
1300 
1301       if (naccepted != 0
1302 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1303 	{
1304 	  Idx dest_node = dfa->nexts[node];
1305 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1306 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1307 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1308 					       dest_node)))
1309 	    return -1;
1310 	  re_node_set_empty (eps_via_nodes);
1311 	  return dest_node;
1312 	}
1313     }
1314   return -1;
1315 }
1316 
1317 static reg_errcode_t
1318 __attribute_warn_unused_result__
1319 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1320 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1321 {
1322   reg_errcode_t err;
1323   Idx num = fs->num++;
1324   if (fs->num == fs->alloc)
1325     {
1326       struct re_fail_stack_ent_t *new_array;
1327       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1328                               fs->alloc * 2);
1329       if (new_array == NULL)
1330 	return REG_ESPACE;
1331       fs->alloc *= 2;
1332       fs->stack = new_array;
1333     }
1334   fs->stack[num].idx = str_idx;
1335   fs->stack[num].node = dest_node;
1336   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1337   if (fs->stack[num].regs == NULL)
1338     return REG_ESPACE;
1339   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1340   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1341   return err;
1342 }
1343 
1344 static Idx
1345 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1346 		regmatch_t *regs, re_node_set *eps_via_nodes)
1347 {
1348   Idx num = --fs->num;
1349   DEBUG_ASSERT (num >= 0);
1350   *pidx = fs->stack[num].idx;
1351   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1352   re_node_set_free (eps_via_nodes);
1353   re_free (fs->stack[num].regs);
1354   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1355   return fs->stack[num].node;
1356 }
1357 
1358 /* Set the positions where the subexpressions are starts/ends to registers
1359    PMATCH.
1360    Note: We assume that pmatch[0] is already set, and
1361    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1362 
1363 static reg_errcode_t
1364 __attribute_warn_unused_result__
1365 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1366 	  regmatch_t *pmatch, bool fl_backtrack)
1367 {
1368   const re_dfa_t *dfa = preg->buffer;
1369   Idx idx, cur_node;
1370   re_node_set eps_via_nodes;
1371   struct re_fail_stack_t *fs;
1372   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1373   regmatch_t *prev_idx_match;
1374   bool prev_idx_match_malloced = false;
1375 
1376   DEBUG_ASSERT (nmatch > 1);
1377   DEBUG_ASSERT (mctx->state_log != NULL);
1378   if (fl_backtrack)
1379     {
1380       fs = &fs_body;
1381       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1382       if (fs->stack == NULL)
1383 	return REG_ESPACE;
1384     }
1385   else
1386     fs = NULL;
1387 
1388   cur_node = dfa->init_node;
1389   re_node_set_init_empty (&eps_via_nodes);
1390 
1391   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1392     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1393   else
1394     {
1395       prev_idx_match = re_malloc (regmatch_t, nmatch);
1396       if (prev_idx_match == NULL)
1397 	{
1398 	  free_fail_stack_return (fs);
1399 	  return REG_ESPACE;
1400 	}
1401       prev_idx_match_malloced = true;
1402     }
1403   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1404 
1405   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1406     {
1407       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1408 
1409       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1410 	{
1411 	  Idx reg_idx;
1412 	  if (fs)
1413 	    {
1414 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1415 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1416 		  break;
1417 	      if (reg_idx == nmatch)
1418 		{
1419 		  re_node_set_free (&eps_via_nodes);
1420 		  if (prev_idx_match_malloced)
1421 		    re_free (prev_idx_match);
1422 		  return free_fail_stack_return (fs);
1423 		}
1424 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1425 					 &eps_via_nodes);
1426 	    }
1427 	  else
1428 	    {
1429 	      re_node_set_free (&eps_via_nodes);
1430 	      if (prev_idx_match_malloced)
1431 		re_free (prev_idx_match);
1432 	      return REG_NOERROR;
1433 	    }
1434 	}
1435 
1436       /* Proceed to next node.  */
1437       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1438 				    &eps_via_nodes, fs);
1439 
1440       if (__glibc_unlikely (cur_node < 0))
1441 	{
1442 	  if (__glibc_unlikely (cur_node == -2))
1443 	    {
1444 	      re_node_set_free (&eps_via_nodes);
1445 	      if (prev_idx_match_malloced)
1446 		re_free (prev_idx_match);
1447 	      free_fail_stack_return (fs);
1448 	      return REG_ESPACE;
1449 	    }
1450 	  if (fs)
1451 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1452 				       &eps_via_nodes);
1453 	  else
1454 	    {
1455 	      re_node_set_free (&eps_via_nodes);
1456 	      if (prev_idx_match_malloced)
1457 		re_free (prev_idx_match);
1458 	      return REG_NOMATCH;
1459 	    }
1460 	}
1461     }
1462   re_node_set_free (&eps_via_nodes);
1463   if (prev_idx_match_malloced)
1464     re_free (prev_idx_match);
1465   return free_fail_stack_return (fs);
1466 }
1467 
1468 static reg_errcode_t
1469 free_fail_stack_return (struct re_fail_stack_t *fs)
1470 {
1471   if (fs)
1472     {
1473       Idx fs_idx;
1474       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1475 	{
1476 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1477 	  re_free (fs->stack[fs_idx].regs);
1478 	}
1479       re_free (fs->stack);
1480     }
1481   return REG_NOERROR;
1482 }
1483 
1484 static void
1485 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1486 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1487 {
1488   int type = dfa->nodes[cur_node].type;
1489   if (type == OP_OPEN_SUBEXP)
1490     {
1491       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1492 
1493       /* We are at the first node of this sub expression.  */
1494       if (reg_num < nmatch)
1495 	{
1496 	  pmatch[reg_num].rm_so = cur_idx;
1497 	  pmatch[reg_num].rm_eo = -1;
1498 	}
1499     }
1500   else if (type == OP_CLOSE_SUBEXP)
1501     {
1502       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1503       if (reg_num < nmatch)
1504 	{
1505 	  /* We are at the last node of this sub expression.  */
1506 	  if (pmatch[reg_num].rm_so < cur_idx)
1507 	    {
1508 	      pmatch[reg_num].rm_eo = cur_idx;
1509 	      /* This is a non-empty match or we are not inside an optional
1510 		 subexpression.  Accept this right away.  */
1511 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1512 	    }
1513 	  else
1514 	    {
1515 	      if (dfa->nodes[cur_node].opt_subexp
1516 		  && prev_idx_match[reg_num].rm_so != -1)
1517 		/* We transited through an empty match for an optional
1518 		   subexpression, like (a?)*, and this is not the subexp's
1519 		   first match.  Copy back the old content of the registers
1520 		   so that matches of an inner subexpression are undone as
1521 		   well, like in ((a?))*.  */
1522 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1523 	      else
1524 		/* We completed a subexpression, but it may be part of
1525 		   an optional one, so do not update PREV_IDX_MATCH.  */
1526 		pmatch[reg_num].rm_eo = cur_idx;
1527 	    }
1528 	}
1529     }
1530 }
1531 
1532 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1533    and sift the nodes in each states according to the following rules.
1534    Updated state_log will be wrote to STATE_LOG.
1535 
1536    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1537      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1538 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1539 	the LAST_NODE, we throw away the node 'a'.
1540      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1541 	string 's' and transit to 'b':
1542 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1543 	   away the node 'a'.
1544 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1545 	    thrown away, we throw away the node 'a'.
1546      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1547 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1548 	   node 'a'.
1549 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1550 	    we throw away the node 'a'.  */
1551 
1552 #define STATE_NODE_CONTAINS(state,node) \
1553   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1554 
1555 static reg_errcode_t
1556 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1557 {
1558   reg_errcode_t err;
1559   int null_cnt = 0;
1560   Idx str_idx = sctx->last_str_idx;
1561   re_node_set cur_dest;
1562 
1563   DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1564 
1565   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1566      transit to the last_node and the last_node itself.  */
1567   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1568   if (__glibc_unlikely (err != REG_NOERROR))
1569     return err;
1570   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1571   if (__glibc_unlikely (err != REG_NOERROR))
1572     goto free_return;
1573 
1574   /* Then check each states in the state_log.  */
1575   while (str_idx > 0)
1576     {
1577       /* Update counters.  */
1578       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1579       if (null_cnt > mctx->max_mb_elem_len)
1580 	{
1581 	  memset (sctx->sifted_states, '\0',
1582 		  sizeof (re_dfastate_t *) * str_idx);
1583 	  re_node_set_free (&cur_dest);
1584 	  return REG_NOERROR;
1585 	}
1586       re_node_set_empty (&cur_dest);
1587       --str_idx;
1588 
1589       if (mctx->state_log[str_idx])
1590 	{
1591 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1592 	  if (__glibc_unlikely (err != REG_NOERROR))
1593 	    goto free_return;
1594 	}
1595 
1596       /* Add all the nodes which satisfy the following conditions:
1597 	 - It can epsilon transit to a node in CUR_DEST.
1598 	 - It is in CUR_SRC.
1599 	 And update state_log.  */
1600       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1601       if (__glibc_unlikely (err != REG_NOERROR))
1602 	goto free_return;
1603     }
1604   err = REG_NOERROR;
1605  free_return:
1606   re_node_set_free (&cur_dest);
1607   return err;
1608 }
1609 
1610 static reg_errcode_t
1611 __attribute_warn_unused_result__
1612 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1613 		     Idx str_idx, re_node_set *cur_dest)
1614 {
1615   const re_dfa_t *const dfa = mctx->dfa;
1616   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1617   Idx i;
1618 
1619   /* Then build the next sifted state.
1620      We build the next sifted state on 'cur_dest', and update
1621      'sifted_states[str_idx]' with 'cur_dest'.
1622      Note:
1623      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1624      'cur_src' points the node_set of the old 'state_log[str_idx]'
1625      (with the epsilon nodes pre-filtered out).  */
1626   for (i = 0; i < cur_src->nelem; i++)
1627     {
1628       Idx prev_node = cur_src->elems[i];
1629       int naccepted = 0;
1630       bool ok;
1631       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1632 
1633 #ifdef RE_ENABLE_I18N
1634       /* If the node may accept "multi byte".  */
1635       if (dfa->nodes[prev_node].accept_mb)
1636 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1637 					 str_idx, sctx->last_str_idx);
1638 #endif /* RE_ENABLE_I18N */
1639 
1640       /* We don't check backreferences here.
1641 	 See update_cur_sifted_state().  */
1642       if (!naccepted
1643 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1644 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1645 				  dfa->nexts[prev_node]))
1646 	naccepted = 1;
1647 
1648       if (naccepted == 0)
1649 	continue;
1650 
1651       if (sctx->limits.nelem)
1652 	{
1653 	  Idx to_idx = str_idx + naccepted;
1654 	  if (check_dst_limits (mctx, &sctx->limits,
1655 				dfa->nexts[prev_node], to_idx,
1656 				prev_node, str_idx))
1657 	    continue;
1658 	}
1659       ok = re_node_set_insert (cur_dest, prev_node);
1660       if (__glibc_unlikely (! ok))
1661 	return REG_ESPACE;
1662     }
1663 
1664   return REG_NOERROR;
1665 }
1666 
1667 /* Helper functions.  */
1668 
1669 static reg_errcode_t
1670 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1671 {
1672   Idx top = mctx->state_log_top;
1673 
1674   if ((next_state_log_idx >= mctx->input.bufs_len
1675        && mctx->input.bufs_len < mctx->input.len)
1676       || (next_state_log_idx >= mctx->input.valid_len
1677 	  && mctx->input.valid_len < mctx->input.len))
1678     {
1679       reg_errcode_t err;
1680       err = extend_buffers (mctx, next_state_log_idx + 1);
1681       if (__glibc_unlikely (err != REG_NOERROR))
1682 	return err;
1683     }
1684 
1685   if (top < next_state_log_idx)
1686     {
1687       memset (mctx->state_log + top + 1, '\0',
1688 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1689       mctx->state_log_top = next_state_log_idx;
1690     }
1691   return REG_NOERROR;
1692 }
1693 
1694 static reg_errcode_t
1695 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1696 		   re_dfastate_t **src, Idx num)
1697 {
1698   Idx st_idx;
1699   reg_errcode_t err;
1700   for (st_idx = 0; st_idx < num; ++st_idx)
1701     {
1702       if (dst[st_idx] == NULL)
1703 	dst[st_idx] = src[st_idx];
1704       else if (src[st_idx] != NULL)
1705 	{
1706 	  re_node_set merged_set;
1707 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1708 					&src[st_idx]->nodes);
1709 	  if (__glibc_unlikely (err != REG_NOERROR))
1710 	    return err;
1711 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1712 	  re_node_set_free (&merged_set);
1713 	  if (__glibc_unlikely (err != REG_NOERROR))
1714 	    return err;
1715 	}
1716     }
1717   return REG_NOERROR;
1718 }
1719 
1720 static reg_errcode_t
1721 update_cur_sifted_state (const re_match_context_t *mctx,
1722 			 re_sift_context_t *sctx, Idx str_idx,
1723 			 re_node_set *dest_nodes)
1724 {
1725   const re_dfa_t *const dfa = mctx->dfa;
1726   reg_errcode_t err = REG_NOERROR;
1727   const re_node_set *candidates;
1728   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1729 		: &mctx->state_log[str_idx]->nodes);
1730 
1731   if (dest_nodes->nelem == 0)
1732     sctx->sifted_states[str_idx] = NULL;
1733   else
1734     {
1735       if (candidates)
1736 	{
1737 	  /* At first, add the nodes which can epsilon transit to a node in
1738 	     DEST_NODE.  */
1739 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1740 	  if (__glibc_unlikely (err != REG_NOERROR))
1741 	    return err;
1742 
1743 	  /* Then, check the limitations in the current sift_context.  */
1744 	  if (sctx->limits.nelem)
1745 	    {
1746 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1747 					 mctx->bkref_ents, str_idx);
1748 	      if (__glibc_unlikely (err != REG_NOERROR))
1749 		return err;
1750 	    }
1751 	}
1752 
1753       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1754       if (__glibc_unlikely (err != REG_NOERROR))
1755 	return err;
1756     }
1757 
1758   if (candidates && mctx->state_log[str_idx]->has_backref)
1759     {
1760       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1761       if (__glibc_unlikely (err != REG_NOERROR))
1762 	return err;
1763     }
1764   return REG_NOERROR;
1765 }
1766 
1767 static reg_errcode_t
1768 __attribute_warn_unused_result__
1769 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1770 		       const re_node_set *candidates)
1771 {
1772   reg_errcode_t err = REG_NOERROR;
1773   Idx i;
1774 
1775   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1776   if (__glibc_unlikely (err != REG_NOERROR))
1777     return err;
1778 
1779   if (!state->inveclosure.alloc)
1780     {
1781       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1782       if (__glibc_unlikely (err != REG_NOERROR))
1783 	return REG_ESPACE;
1784       for (i = 0; i < dest_nodes->nelem; i++)
1785 	{
1786 	  err = re_node_set_merge (&state->inveclosure,
1787 				   dfa->inveclosures + dest_nodes->elems[i]);
1788 	  if (__glibc_unlikely (err != REG_NOERROR))
1789 	    return REG_ESPACE;
1790 	}
1791     }
1792   return re_node_set_add_intersect (dest_nodes, candidates,
1793 				    &state->inveclosure);
1794 }
1795 
1796 static reg_errcode_t
1797 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1798 		       const re_node_set *candidates)
1799 {
1800     Idx ecl_idx;
1801     reg_errcode_t err;
1802     re_node_set *inv_eclosure = dfa->inveclosures + node;
1803     re_node_set except_nodes;
1804     re_node_set_init_empty (&except_nodes);
1805     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1806       {
1807 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1808 	if (cur_node == node)
1809 	  continue;
1810 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1811 	  {
1812 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1813 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1814 			 ? dfa->edests[cur_node].elems[1] : -1);
1815 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1816 		 && re_node_set_contains (dest_nodes, edst1))
1817 		|| (edst2 > 0
1818 		    && !re_node_set_contains (inv_eclosure, edst2)
1819 		    && re_node_set_contains (dest_nodes, edst2)))
1820 	      {
1821 		err = re_node_set_add_intersect (&except_nodes, candidates,
1822 						 dfa->inveclosures + cur_node);
1823 		if (__glibc_unlikely (err != REG_NOERROR))
1824 		  {
1825 		    re_node_set_free (&except_nodes);
1826 		    return err;
1827 		  }
1828 	      }
1829 	  }
1830       }
1831     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1832       {
1833 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1834 	if (!re_node_set_contains (&except_nodes, cur_node))
1835 	  {
1836 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1837 	    re_node_set_remove_at (dest_nodes, idx);
1838 	  }
1839       }
1840     re_node_set_free (&except_nodes);
1841     return REG_NOERROR;
1842 }
1843 
1844 static bool
1845 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1846 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1847 {
1848   const re_dfa_t *const dfa = mctx->dfa;
1849   Idx lim_idx, src_pos, dst_pos;
1850 
1851   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1852   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1853   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1854     {
1855       Idx subexp_idx;
1856       struct re_backref_cache_entry *ent;
1857       ent = mctx->bkref_ents + limits->elems[lim_idx];
1858       subexp_idx = dfa->nodes[ent->node].opr.idx;
1859 
1860       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1861 					   subexp_idx, dst_node, dst_idx,
1862 					   dst_bkref_idx);
1863       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1864 					   subexp_idx, src_node, src_idx,
1865 					   src_bkref_idx);
1866 
1867       /* In case of:
1868 	 <src> <dst> ( <subexp> )
1869 	 ( <subexp> ) <src> <dst>
1870 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1871       if (src_pos == dst_pos)
1872 	continue; /* This is unrelated limitation.  */
1873       else
1874 	return true;
1875     }
1876   return false;
1877 }
1878 
1879 static int
1880 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1881 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1882 {
1883   const re_dfa_t *const dfa = mctx->dfa;
1884   const re_node_set *eclosures = dfa->eclosures + from_node;
1885   Idx node_idx;
1886 
1887   /* Else, we are on the boundary: examine the nodes on the epsilon
1888      closure.  */
1889   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1890     {
1891       Idx node = eclosures->elems[node_idx];
1892       switch (dfa->nodes[node].type)
1893 	{
1894 	case OP_BACK_REF:
1895 	  if (bkref_idx != -1)
1896 	    {
1897 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1898 	      do
1899 		{
1900 		  Idx dst;
1901 		  int cpos;
1902 
1903 		  if (ent->node != node)
1904 		    continue;
1905 
1906 		  if (subexp_idx < BITSET_WORD_BITS
1907 		      && !(ent->eps_reachable_subexps_map
1908 			   & ((bitset_word_t) 1 << subexp_idx)))
1909 		    continue;
1910 
1911 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1912 		     OP_CLOSE_SUBEXP cases below.  But, if the
1913 		     destination node is the same node as the source
1914 		     node, don't recurse because it would cause an
1915 		     infinite loop: a regex that exhibits this behavior
1916 		     is ()\1*\1*  */
1917 		  dst = dfa->edests[node].elems[0];
1918 		  if (dst == from_node)
1919 		    {
1920 		      if (boundaries & 1)
1921 			return -1;
1922 		      else /* if (boundaries & 2) */
1923 			return 0;
1924 		    }
1925 
1926 		  cpos =
1927 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1928 						 dst, bkref_idx);
1929 		  if (cpos == -1 /* && (boundaries & 1) */)
1930 		    return -1;
1931 		  if (cpos == 0 && (boundaries & 2))
1932 		    return 0;
1933 
1934 		  if (subexp_idx < BITSET_WORD_BITS)
1935 		    ent->eps_reachable_subexps_map
1936 		      &= ~((bitset_word_t) 1 << subexp_idx);
1937 		}
1938 	      while (ent++->more);
1939 	    }
1940 	  break;
1941 
1942 	case OP_OPEN_SUBEXP:
1943 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1944 	    return -1;
1945 	  break;
1946 
1947 	case OP_CLOSE_SUBEXP:
1948 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1949 	    return 0;
1950 	  break;
1951 
1952 	default:
1953 	    break;
1954 	}
1955     }
1956 
1957   return (boundaries & 2) ? 1 : 0;
1958 }
1959 
1960 static int
1961 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1962 			   Idx subexp_idx, Idx from_node, Idx str_idx,
1963 			   Idx bkref_idx)
1964 {
1965   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1966   int boundaries;
1967 
1968   /* If we are outside the range of the subexpression, return -1 or 1.  */
1969   if (str_idx < lim->subexp_from)
1970     return -1;
1971 
1972   if (lim->subexp_to < str_idx)
1973     return 1;
1974 
1975   /* If we are within the subexpression, return 0.  */
1976   boundaries = (str_idx == lim->subexp_from);
1977   boundaries |= (str_idx == lim->subexp_to) << 1;
1978   if (boundaries == 0)
1979     return 0;
1980 
1981   /* Else, examine epsilon closure.  */
1982   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1983 				      from_node, bkref_idx);
1984 }
1985 
1986 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1987    which are against limitations from DEST_NODES. */
1988 
1989 static reg_errcode_t
1990 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
1991 		     const re_node_set *candidates, re_node_set *limits,
1992 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
1993 {
1994   reg_errcode_t err;
1995   Idx node_idx, lim_idx;
1996 
1997   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1998     {
1999       Idx subexp_idx;
2000       struct re_backref_cache_entry *ent;
2001       ent = bkref_ents + limits->elems[lim_idx];
2002 
2003       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2004 	continue; /* This is unrelated limitation.  */
2005 
2006       subexp_idx = dfa->nodes[ent->node].opr.idx;
2007       if (ent->subexp_to == str_idx)
2008 	{
2009 	  Idx ops_node = -1;
2010 	  Idx cls_node = -1;
2011 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2012 	    {
2013 	      Idx node = dest_nodes->elems[node_idx];
2014 	      re_token_type_t type = dfa->nodes[node].type;
2015 	      if (type == OP_OPEN_SUBEXP
2016 		  && subexp_idx == dfa->nodes[node].opr.idx)
2017 		ops_node = node;
2018 	      else if (type == OP_CLOSE_SUBEXP
2019 		       && subexp_idx == dfa->nodes[node].opr.idx)
2020 		cls_node = node;
2021 	    }
2022 
2023 	  /* Check the limitation of the open subexpression.  */
2024 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2025 	  if (ops_node >= 0)
2026 	    {
2027 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2028 					   candidates);
2029 	      if (__glibc_unlikely (err != REG_NOERROR))
2030 		return err;
2031 	    }
2032 
2033 	  /* Check the limitation of the close subexpression.  */
2034 	  if (cls_node >= 0)
2035 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2036 	      {
2037 		Idx node = dest_nodes->elems[node_idx];
2038 		if (!re_node_set_contains (dfa->inveclosures + node,
2039 					   cls_node)
2040 		    && !re_node_set_contains (dfa->eclosures + node,
2041 					      cls_node))
2042 		  {
2043 		    /* It is against this limitation.
2044 		       Remove it form the current sifted state.  */
2045 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2046 						 candidates);
2047 		    if (__glibc_unlikely (err != REG_NOERROR))
2048 		      return err;
2049 		    --node_idx;
2050 		  }
2051 	      }
2052 	}
2053       else /* (ent->subexp_to != str_idx)  */
2054 	{
2055 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2056 	    {
2057 	      Idx node = dest_nodes->elems[node_idx];
2058 	      re_token_type_t type = dfa->nodes[node].type;
2059 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2060 		{
2061 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2062 		    continue;
2063 		  /* It is against this limitation.
2064 		     Remove it form the current sifted state.  */
2065 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2066 					       candidates);
2067 		  if (__glibc_unlikely (err != REG_NOERROR))
2068 		    return err;
2069 		}
2070 	    }
2071 	}
2072     }
2073   return REG_NOERROR;
2074 }
2075 
2076 static reg_errcode_t
2077 __attribute_warn_unused_result__
2078 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2079 		   Idx str_idx, const re_node_set *candidates)
2080 {
2081   const re_dfa_t *const dfa = mctx->dfa;
2082   reg_errcode_t err;
2083   Idx node_idx, node;
2084   re_sift_context_t local_sctx;
2085   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2086 
2087   if (first_idx == -1)
2088     return REG_NOERROR;
2089 
2090   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2091 
2092   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2093     {
2094       Idx enabled_idx;
2095       re_token_type_t type;
2096       struct re_backref_cache_entry *entry;
2097       node = candidates->elems[node_idx];
2098       type = dfa->nodes[node].type;
2099       /* Avoid infinite loop for the REs like "()\1+".  */
2100       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2101 	continue;
2102       if (type != OP_BACK_REF)
2103 	continue;
2104 
2105       entry = mctx->bkref_ents + first_idx;
2106       enabled_idx = first_idx;
2107       do
2108 	{
2109 	  Idx subexp_len;
2110 	  Idx to_idx;
2111 	  Idx dst_node;
2112 	  bool ok;
2113 	  re_dfastate_t *cur_state;
2114 
2115 	  if (entry->node != node)
2116 	    continue;
2117 	  subexp_len = entry->subexp_to - entry->subexp_from;
2118 	  to_idx = str_idx + subexp_len;
2119 	  dst_node = (subexp_len ? dfa->nexts[node]
2120 		      : dfa->edests[node].elems[0]);
2121 
2122 	  if (to_idx > sctx->last_str_idx
2123 	      || sctx->sifted_states[to_idx] == NULL
2124 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2125 	      || check_dst_limits (mctx, &sctx->limits, node,
2126 				   str_idx, dst_node, to_idx))
2127 	    continue;
2128 
2129 	  if (local_sctx.sifted_states == NULL)
2130 	    {
2131 	      local_sctx = *sctx;
2132 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2133 	      if (__glibc_unlikely (err != REG_NOERROR))
2134 		goto free_return;
2135 	    }
2136 	  local_sctx.last_node = node;
2137 	  local_sctx.last_str_idx = str_idx;
2138 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2139 	  if (__glibc_unlikely (! ok))
2140 	    {
2141 	      err = REG_ESPACE;
2142 	      goto free_return;
2143 	    }
2144 	  cur_state = local_sctx.sifted_states[str_idx];
2145 	  err = sift_states_backward (mctx, &local_sctx);
2146 	  if (__glibc_unlikely (err != REG_NOERROR))
2147 	    goto free_return;
2148 	  if (sctx->limited_states != NULL)
2149 	    {
2150 	      err = merge_state_array (dfa, sctx->limited_states,
2151 				       local_sctx.sifted_states,
2152 				       str_idx + 1);
2153 	      if (__glibc_unlikely (err != REG_NOERROR))
2154 		goto free_return;
2155 	    }
2156 	  local_sctx.sifted_states[str_idx] = cur_state;
2157 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2158 
2159 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2160 	  entry = mctx->bkref_ents + enabled_idx;
2161 	}
2162       while (enabled_idx++, entry++->more);
2163     }
2164   err = REG_NOERROR;
2165  free_return:
2166   if (local_sctx.sifted_states != NULL)
2167     {
2168       re_node_set_free (&local_sctx.limits);
2169     }
2170 
2171   return err;
2172 }
2173 
2174 
2175 #ifdef RE_ENABLE_I18N
2176 static int
2177 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2178 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2179 {
2180   const re_dfa_t *const dfa = mctx->dfa;
2181   int naccepted;
2182   /* Check the node can accept "multi byte".  */
2183   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2184   if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2185       && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2186 			       dfa->nexts[node_idx]))
2187     /* The node can't accept the "multi byte", or the
2188        destination was already thrown away, then the node
2189        couldn't accept the current input "multi byte".   */
2190     naccepted = 0;
2191   /* Otherwise, it is sure that the node could accept
2192      'naccepted' bytes input.  */
2193   return naccepted;
2194 }
2195 #endif /* RE_ENABLE_I18N */
2196 
2197 
2198 /* Functions for state transition.  */
2199 
2200 /* Return the next state to which the current state STATE will transit by
2201    accepting the current input byte, and update STATE_LOG if necessary.
2202    If STATE can accept a multibyte char/collating element/back reference
2203    update the destination of STATE_LOG.  */
2204 
2205 static re_dfastate_t *
2206 __attribute_warn_unused_result__
2207 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2208 	       re_dfastate_t *state)
2209 {
2210   re_dfastate_t **trtable;
2211   unsigned char ch;
2212 
2213 #ifdef RE_ENABLE_I18N
2214   /* If the current state can accept multibyte.  */
2215   if (__glibc_unlikely (state->accept_mb))
2216     {
2217       *err = transit_state_mb (mctx, state);
2218       if (__glibc_unlikely (*err != REG_NOERROR))
2219 	return NULL;
2220     }
2221 #endif /* RE_ENABLE_I18N */
2222 
2223   /* Then decide the next state with the single byte.  */
2224 #if 0
2225   if (0)
2226     /* don't use transition table  */
2227     return transit_state_sb (err, mctx, state);
2228 #endif
2229 
2230   /* Use transition table  */
2231   ch = re_string_fetch_byte (&mctx->input);
2232   for (;;)
2233     {
2234       trtable = state->trtable;
2235       if (__glibc_likely (trtable != NULL))
2236 	return trtable[ch];
2237 
2238       trtable = state->word_trtable;
2239       if (__glibc_likely (trtable != NULL))
2240 	{
2241 	  unsigned int context;
2242 	  context
2243 	    = re_string_context_at (&mctx->input,
2244 				    re_string_cur_idx (&mctx->input) - 1,
2245 				    mctx->eflags);
2246 	  if (IS_WORD_CONTEXT (context))
2247 	    return trtable[ch + SBC_MAX];
2248 	  else
2249 	    return trtable[ch];
2250 	}
2251 
2252       if (!build_trtable (mctx->dfa, state))
2253 	{
2254 	  *err = REG_ESPACE;
2255 	  return NULL;
2256 	}
2257 
2258       /* Retry, we now have a transition table.  */
2259     }
2260 }
2261 
2262 /* Update the state_log if we need */
2263 static re_dfastate_t *
2264 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2265 		      re_dfastate_t *next_state)
2266 {
2267   const re_dfa_t *const dfa = mctx->dfa;
2268   Idx cur_idx = re_string_cur_idx (&mctx->input);
2269 
2270   if (cur_idx > mctx->state_log_top)
2271     {
2272       mctx->state_log[cur_idx] = next_state;
2273       mctx->state_log_top = cur_idx;
2274     }
2275   else if (mctx->state_log[cur_idx] == 0)
2276     {
2277       mctx->state_log[cur_idx] = next_state;
2278     }
2279   else
2280     {
2281       re_dfastate_t *pstate;
2282       unsigned int context;
2283       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2284       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2285 	 the destination of a multibyte char/collating element/
2286 	 back reference.  Then the next state is the union set of
2287 	 these destinations and the results of the transition table.  */
2288       pstate = mctx->state_log[cur_idx];
2289       log_nodes = pstate->entrance_nodes;
2290       if (next_state != NULL)
2291 	{
2292 	  table_nodes = next_state->entrance_nodes;
2293 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2294 					     log_nodes);
2295 	  if (__glibc_unlikely (*err != REG_NOERROR))
2296 	    return NULL;
2297 	}
2298       else
2299 	next_nodes = *log_nodes;
2300       /* Note: We already add the nodes of the initial state,
2301 	 then we don't need to add them here.  */
2302 
2303       context = re_string_context_at (&mctx->input,
2304 				      re_string_cur_idx (&mctx->input) - 1,
2305 				      mctx->eflags);
2306       next_state = mctx->state_log[cur_idx]
2307 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2308       /* We don't need to check errors here, since the return value of
2309 	 this function is next_state and ERR is already set.  */
2310 
2311       if (table_nodes != NULL)
2312 	re_node_set_free (&next_nodes);
2313     }
2314 
2315   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2316     {
2317       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2318 	 later.  We must check them here, since the back references in the
2319 	 next state might use them.  */
2320       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2321 					cur_idx);
2322       if (__glibc_unlikely (*err != REG_NOERROR))
2323 	return NULL;
2324 
2325       /* If the next state has back references.  */
2326       if (next_state->has_backref)
2327 	{
2328 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2329 	  if (__glibc_unlikely (*err != REG_NOERROR))
2330 	    return NULL;
2331 	  next_state = mctx->state_log[cur_idx];
2332 	}
2333     }
2334 
2335   return next_state;
2336 }
2337 
2338 /* Skip bytes in the input that correspond to part of a
2339    multi-byte match, then look in the log for a state
2340    from which to restart matching.  */
2341 static re_dfastate_t *
2342 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2343 {
2344   re_dfastate_t *cur_state;
2345   do
2346     {
2347       Idx max = mctx->state_log_top;
2348       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2349 
2350       do
2351 	{
2352 	  if (++cur_str_idx > max)
2353 	    return NULL;
2354 	  re_string_skip_bytes (&mctx->input, 1);
2355 	}
2356       while (mctx->state_log[cur_str_idx] == NULL);
2357 
2358       cur_state = merge_state_with_log (err, mctx, NULL);
2359     }
2360   while (*err == REG_NOERROR && cur_state == NULL);
2361   return cur_state;
2362 }
2363 
2364 /* Helper functions for transit_state.  */
2365 
2366 /* From the node set CUR_NODES, pick up the nodes whose types are
2367    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2368    expression. And register them to use them later for evaluating the
2369    corresponding back references.  */
2370 
2371 static reg_errcode_t
2372 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2373 			   Idx str_idx)
2374 {
2375   const re_dfa_t *const dfa = mctx->dfa;
2376   Idx node_idx;
2377   reg_errcode_t err;
2378 
2379   /* TODO: This isn't efficient.
2380 	   Because there might be more than one nodes whose types are
2381 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2382 	   nodes.
2383 	   E.g. RE: (a){2}  */
2384   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2385     {
2386       Idx node = cur_nodes->elems[node_idx];
2387       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2388 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2389 	  && (dfa->used_bkref_map
2390 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2391 	{
2392 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2393 	  if (__glibc_unlikely (err != REG_NOERROR))
2394 	    return err;
2395 	}
2396     }
2397   return REG_NOERROR;
2398 }
2399 
2400 #if 0
2401 /* Return the next state to which the current state STATE will transit by
2402    accepting the current input byte.  */
2403 
2404 static re_dfastate_t *
2405 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2406 		  re_dfastate_t *state)
2407 {
2408   const re_dfa_t *const dfa = mctx->dfa;
2409   re_node_set next_nodes;
2410   re_dfastate_t *next_state;
2411   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2412   unsigned int context;
2413 
2414   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2415   if (__glibc_unlikely (*err != REG_NOERROR))
2416     return NULL;
2417   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2418     {
2419       Idx cur_node = state->nodes.elems[node_cnt];
2420       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2421 	{
2422 	  *err = re_node_set_merge (&next_nodes,
2423 				    dfa->eclosures + dfa->nexts[cur_node]);
2424 	  if (__glibc_unlikely (*err != REG_NOERROR))
2425 	    {
2426 	      re_node_set_free (&next_nodes);
2427 	      return NULL;
2428 	    }
2429 	}
2430     }
2431   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2432   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2433   /* We don't need to check errors here, since the return value of
2434      this function is next_state and ERR is already set.  */
2435 
2436   re_node_set_free (&next_nodes);
2437   re_string_skip_bytes (&mctx->input, 1);
2438   return next_state;
2439 }
2440 #endif
2441 
2442 #ifdef RE_ENABLE_I18N
2443 static reg_errcode_t
2444 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2445 {
2446   const re_dfa_t *const dfa = mctx->dfa;
2447   reg_errcode_t err;
2448   Idx i;
2449 
2450   for (i = 0; i < pstate->nodes.nelem; ++i)
2451     {
2452       re_node_set dest_nodes, *new_nodes;
2453       Idx cur_node_idx = pstate->nodes.elems[i];
2454       int naccepted;
2455       Idx dest_idx;
2456       unsigned int context;
2457       re_dfastate_t *dest_state;
2458 
2459       if (!dfa->nodes[cur_node_idx].accept_mb)
2460 	continue;
2461 
2462       if (dfa->nodes[cur_node_idx].constraint)
2463 	{
2464 	  context = re_string_context_at (&mctx->input,
2465 					  re_string_cur_idx (&mctx->input),
2466 					  mctx->eflags);
2467 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2468 					   context))
2469 	    continue;
2470 	}
2471 
2472       /* How many bytes the node can accept?  */
2473       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2474 					   re_string_cur_idx (&mctx->input));
2475       if (naccepted == 0)
2476 	continue;
2477 
2478       /* The node can accepts 'naccepted' bytes.  */
2479       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2480       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2481 			       : mctx->max_mb_elem_len);
2482       err = clean_state_log_if_needed (mctx, dest_idx);
2483       if (__glibc_unlikely (err != REG_NOERROR))
2484 	return err;
2485       DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
2486       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2487 
2488       dest_state = mctx->state_log[dest_idx];
2489       if (dest_state == NULL)
2490 	dest_nodes = *new_nodes;
2491       else
2492 	{
2493 	  err = re_node_set_init_union (&dest_nodes,
2494 					dest_state->entrance_nodes, new_nodes);
2495 	  if (__glibc_unlikely (err != REG_NOERROR))
2496 	    return err;
2497 	}
2498       context = re_string_context_at (&mctx->input, dest_idx - 1,
2499 				      mctx->eflags);
2500       mctx->state_log[dest_idx]
2501 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2502       if (dest_state != NULL)
2503 	re_node_set_free (&dest_nodes);
2504       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2505 			    && err != REG_NOERROR))
2506 	return err;
2507     }
2508   return REG_NOERROR;
2509 }
2510 #endif /* RE_ENABLE_I18N */
2511 
2512 static reg_errcode_t
2513 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2514 {
2515   const re_dfa_t *const dfa = mctx->dfa;
2516   reg_errcode_t err;
2517   Idx i;
2518   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2519 
2520   for (i = 0; i < nodes->nelem; ++i)
2521     {
2522       Idx dest_str_idx, prev_nelem, bkc_idx;
2523       Idx node_idx = nodes->elems[i];
2524       unsigned int context;
2525       const re_token_t *node = dfa->nodes + node_idx;
2526       re_node_set *new_dest_nodes;
2527 
2528       /* Check whether 'node' is a backreference or not.  */
2529       if (node->type != OP_BACK_REF)
2530 	continue;
2531 
2532       if (node->constraint)
2533 	{
2534 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2535 					  mctx->eflags);
2536 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2537 	    continue;
2538 	}
2539 
2540       /* 'node' is a backreference.
2541 	 Check the substring which the substring matched.  */
2542       bkc_idx = mctx->nbkref_ents;
2543       err = get_subexp (mctx, node_idx, cur_str_idx);
2544       if (__glibc_unlikely (err != REG_NOERROR))
2545 	goto free_return;
2546 
2547       /* And add the epsilon closures (which is 'new_dest_nodes') of
2548 	 the backreference to appropriate state_log.  */
2549       DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
2550       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2551 	{
2552 	  Idx subexp_len;
2553 	  re_dfastate_t *dest_state;
2554 	  struct re_backref_cache_entry *bkref_ent;
2555 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2556 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2557 	    continue;
2558 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2559 	  new_dest_nodes = (subexp_len == 0
2560 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2561 			    : dfa->eclosures + dfa->nexts[node_idx]);
2562 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2563 			  - bkref_ent->subexp_from);
2564 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2565 					  mctx->eflags);
2566 	  dest_state = mctx->state_log[dest_str_idx];
2567 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2568 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2569 	  /* Add 'new_dest_node' to state_log.  */
2570 	  if (dest_state == NULL)
2571 	    {
2572 	      mctx->state_log[dest_str_idx]
2573 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2574 					    context);
2575 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2576 				    && err != REG_NOERROR))
2577 		goto free_return;
2578 	    }
2579 	  else
2580 	    {
2581 	      re_node_set dest_nodes;
2582 	      err = re_node_set_init_union (&dest_nodes,
2583 					    dest_state->entrance_nodes,
2584 					    new_dest_nodes);
2585 	      if (__glibc_unlikely (err != REG_NOERROR))
2586 		{
2587 		  re_node_set_free (&dest_nodes);
2588 		  goto free_return;
2589 		}
2590 	      mctx->state_log[dest_str_idx]
2591 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2592 	      re_node_set_free (&dest_nodes);
2593 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2594 				    && err != REG_NOERROR))
2595 		goto free_return;
2596 	    }
2597 	  /* We need to check recursively if the backreference can epsilon
2598 	     transit.  */
2599 	  if (subexp_len == 0
2600 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2601 	    {
2602 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2603 					       cur_str_idx);
2604 	      if (__glibc_unlikely (err != REG_NOERROR))
2605 		goto free_return;
2606 	      err = transit_state_bkref (mctx, new_dest_nodes);
2607 	      if (__glibc_unlikely (err != REG_NOERROR))
2608 		goto free_return;
2609 	    }
2610 	}
2611     }
2612   err = REG_NOERROR;
2613  free_return:
2614   return err;
2615 }
2616 
2617 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2618    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2619    Note that we might collect inappropriate candidates here.
2620    However, the cost of checking them strictly here is too high, then we
2621    delay these checking for prune_impossible_nodes().  */
2622 
2623 static reg_errcode_t
2624 __attribute_warn_unused_result__
2625 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2626 {
2627   const re_dfa_t *const dfa = mctx->dfa;
2628   Idx subexp_num, sub_top_idx;
2629   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2630   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2631   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2632   if (cache_idx != -1)
2633     {
2634       const struct re_backref_cache_entry *entry
2635 	= mctx->bkref_ents + cache_idx;
2636       do
2637 	if (entry->node == bkref_node)
2638 	  return REG_NOERROR; /* We already checked it.  */
2639       while (entry++->more);
2640     }
2641 
2642   subexp_num = dfa->nodes[bkref_node].opr.idx;
2643 
2644   /* For each sub expression  */
2645   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2646     {
2647       reg_errcode_t err;
2648       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2649       re_sub_match_last_t *sub_last;
2650       Idx sub_last_idx, sl_str, bkref_str_off;
2651 
2652       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2653 	continue; /* It isn't related.  */
2654 
2655       sl_str = sub_top->str_idx;
2656       bkref_str_off = bkref_str_idx;
2657       /* At first, check the last node of sub expressions we already
2658 	 evaluated.  */
2659       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2660 	{
2661 	  regoff_t sl_str_diff;
2662 	  sub_last = sub_top->lasts[sub_last_idx];
2663 	  sl_str_diff = sub_last->str_idx - sl_str;
2664 	  /* The matched string by the sub expression match with the substring
2665 	     at the back reference?  */
2666 	  if (sl_str_diff > 0)
2667 	    {
2668 	      if (__glibc_unlikely (bkref_str_off + sl_str_diff
2669 				    > mctx->input.valid_len))
2670 		{
2671 		  /* Not enough chars for a successful match.  */
2672 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2673 		    break;
2674 
2675 		  err = clean_state_log_if_needed (mctx,
2676 						   bkref_str_off
2677 						   + sl_str_diff);
2678 		  if (__glibc_unlikely (err != REG_NOERROR))
2679 		    return err;
2680 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2681 		}
2682 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2683 		/* We don't need to search this sub expression any more.  */
2684 		break;
2685 	    }
2686 	  bkref_str_off += sl_str_diff;
2687 	  sl_str += sl_str_diff;
2688 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2689 				bkref_str_idx);
2690 
2691 	  /* Reload buf, since the preceding call might have reallocated
2692 	     the buffer.  */
2693 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2694 
2695 	  if (err == REG_NOMATCH)
2696 	    continue;
2697 	  if (__glibc_unlikely (err != REG_NOERROR))
2698 	    return err;
2699 	}
2700 
2701       if (sub_last_idx < sub_top->nlasts)
2702 	continue;
2703       if (sub_last_idx > 0)
2704 	++sl_str;
2705       /* Then, search for the other last nodes of the sub expression.  */
2706       for (; sl_str <= bkref_str_idx; ++sl_str)
2707 	{
2708 	  Idx cls_node;
2709 	  regoff_t sl_str_off;
2710 	  const re_node_set *nodes;
2711 	  sl_str_off = sl_str - sub_top->str_idx;
2712 	  /* The matched string by the sub expression match with the substring
2713 	     at the back reference?  */
2714 	  if (sl_str_off > 0)
2715 	    {
2716 	      if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2717 		{
2718 		  /* If we are at the end of the input, we cannot match.  */
2719 		  if (bkref_str_off >= mctx->input.len)
2720 		    break;
2721 
2722 		  err = extend_buffers (mctx, bkref_str_off + 1);
2723 		  if (__glibc_unlikely (err != REG_NOERROR))
2724 		    return err;
2725 
2726 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2727 		}
2728 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2729 		break; /* We don't need to search this sub expression
2730 			  any more.  */
2731 	    }
2732 	  if (mctx->state_log[sl_str] == NULL)
2733 	    continue;
2734 	  /* Does this state have a ')' of the sub expression?  */
2735 	  nodes = &mctx->state_log[sl_str]->nodes;
2736 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2737 				       OP_CLOSE_SUBEXP);
2738 	  if (cls_node == -1)
2739 	    continue; /* No.  */
2740 	  if (sub_top->path == NULL)
2741 	    {
2742 	      sub_top->path = calloc (sizeof (state_array_t),
2743 				      sl_str - sub_top->str_idx + 1);
2744 	      if (sub_top->path == NULL)
2745 		return REG_ESPACE;
2746 	    }
2747 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2748 	     in the current context?  */
2749 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2750 			       sub_top->str_idx, cls_node, sl_str,
2751 			       OP_CLOSE_SUBEXP);
2752 	  if (err == REG_NOMATCH)
2753 	      continue;
2754 	  if (__glibc_unlikely (err != REG_NOERROR))
2755 	      return err;
2756 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2757 	  if (__glibc_unlikely (sub_last == NULL))
2758 	    return REG_ESPACE;
2759 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2760 				bkref_str_idx);
2761 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2762 	  if (err == REG_NOMATCH)
2763 	    continue;
2764 	  if (__glibc_unlikely (err != REG_NOERROR))
2765 	    return err;
2766 	}
2767     }
2768   return REG_NOERROR;
2769 }
2770 
2771 /* Helper functions for get_subexp().  */
2772 
2773 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2774    If it can arrive, register the sub expression expressed with SUB_TOP
2775    and SUB_LAST.  */
2776 
2777 static reg_errcode_t
2778 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2779 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2780 {
2781   reg_errcode_t err;
2782   Idx to_idx;
2783   /* Can the subexpression arrive the back reference?  */
2784   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2785 		       sub_last->str_idx, bkref_node, bkref_str,
2786 		       OP_OPEN_SUBEXP);
2787   if (err != REG_NOERROR)
2788     return err;
2789   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2790 			     sub_last->str_idx);
2791   if (__glibc_unlikely (err != REG_NOERROR))
2792     return err;
2793   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2794   return clean_state_log_if_needed (mctx, to_idx);
2795 }
2796 
2797 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2798    Search '(' if FL_OPEN, or search ')' otherwise.
2799    TODO: This function isn't efficient...
2800 	 Because there might be more than one nodes whose types are
2801 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2802 	 nodes.
2803 	 E.g. RE: (a){2}  */
2804 
2805 static Idx
2806 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2807 		  Idx subexp_idx, int type)
2808 {
2809   Idx cls_idx;
2810   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2811     {
2812       Idx cls_node = nodes->elems[cls_idx];
2813       const re_token_t *node = dfa->nodes + cls_node;
2814       if (node->type == type
2815 	  && node->opr.idx == subexp_idx)
2816 	return cls_node;
2817     }
2818   return -1;
2819 }
2820 
2821 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2822    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2823    heavily reused.
2824    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2825 
2826 static reg_errcode_t
2827 __attribute_warn_unused_result__
2828 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2829 	       Idx top_str, Idx last_node, Idx last_str, int type)
2830 {
2831   const re_dfa_t *const dfa = mctx->dfa;
2832   reg_errcode_t err = REG_NOERROR;
2833   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2834   re_dfastate_t *cur_state = NULL;
2835   re_node_set *cur_nodes, next_nodes;
2836   re_dfastate_t **backup_state_log;
2837   unsigned int context;
2838 
2839   subexp_num = dfa->nodes[top_node].opr.idx;
2840   /* Extend the buffer if we need.  */
2841   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2842     {
2843       re_dfastate_t **new_array;
2844       Idx old_alloc = path->alloc;
2845       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2846       Idx new_alloc;
2847       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2848 	return REG_ESPACE;
2849       new_alloc = old_alloc + incr_alloc;
2850       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2851 	return REG_ESPACE;
2852       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2853       if (__glibc_unlikely (new_array == NULL))
2854 	return REG_ESPACE;
2855       path->array = new_array;
2856       path->alloc = new_alloc;
2857       memset (new_array + old_alloc, '\0',
2858 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2859     }
2860 
2861   str_idx = path->next_idx ? path->next_idx : top_str;
2862 
2863   /* Temporary modify MCTX.  */
2864   backup_state_log = mctx->state_log;
2865   backup_cur_idx = mctx->input.cur_idx;
2866   mctx->state_log = path->array;
2867   mctx->input.cur_idx = str_idx;
2868 
2869   /* Setup initial node set.  */
2870   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2871   if (str_idx == top_str)
2872     {
2873       err = re_node_set_init_1 (&next_nodes, top_node);
2874       if (__glibc_unlikely (err != REG_NOERROR))
2875 	return err;
2876       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2877       if (__glibc_unlikely (err != REG_NOERROR))
2878 	{
2879 	  re_node_set_free (&next_nodes);
2880 	  return err;
2881 	}
2882     }
2883   else
2884     {
2885       cur_state = mctx->state_log[str_idx];
2886       if (cur_state && cur_state->has_backref)
2887 	{
2888 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2889 	  if (__glibc_unlikely (err != REG_NOERROR))
2890 	    return err;
2891 	}
2892       else
2893 	re_node_set_init_empty (&next_nodes);
2894     }
2895   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2896     {
2897       if (next_nodes.nelem)
2898 	{
2899 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2900 				    subexp_num, type);
2901 	  if (__glibc_unlikely (err != REG_NOERROR))
2902 	    {
2903 	      re_node_set_free (&next_nodes);
2904 	      return err;
2905 	    }
2906 	}
2907       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2908       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2909 	{
2910 	  re_node_set_free (&next_nodes);
2911 	  return err;
2912 	}
2913       mctx->state_log[str_idx] = cur_state;
2914     }
2915 
2916   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2917     {
2918       re_node_set_empty (&next_nodes);
2919       if (mctx->state_log[str_idx + 1])
2920 	{
2921 	  err = re_node_set_merge (&next_nodes,
2922 				   &mctx->state_log[str_idx + 1]->nodes);
2923 	  if (__glibc_unlikely (err != REG_NOERROR))
2924 	    {
2925 	      re_node_set_free (&next_nodes);
2926 	      return err;
2927 	    }
2928 	}
2929       if (cur_state)
2930 	{
2931 	  err = check_arrival_add_next_nodes (mctx, str_idx,
2932 					      &cur_state->non_eps_nodes,
2933 					      &next_nodes);
2934 	  if (__glibc_unlikely (err != REG_NOERROR))
2935 	    {
2936 	      re_node_set_free (&next_nodes);
2937 	      return err;
2938 	    }
2939 	}
2940       ++str_idx;
2941       if (next_nodes.nelem)
2942 	{
2943 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2944 	  if (__glibc_unlikely (err != REG_NOERROR))
2945 	    {
2946 	      re_node_set_free (&next_nodes);
2947 	      return err;
2948 	    }
2949 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2950 				    subexp_num, type);
2951 	  if (__glibc_unlikely (err != REG_NOERROR))
2952 	    {
2953 	      re_node_set_free (&next_nodes);
2954 	      return err;
2955 	    }
2956 	}
2957       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2958       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2959       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2960 	{
2961 	  re_node_set_free (&next_nodes);
2962 	  return err;
2963 	}
2964       mctx->state_log[str_idx] = cur_state;
2965       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2966     }
2967   re_node_set_free (&next_nodes);
2968   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2969 	       : &mctx->state_log[last_str]->nodes);
2970   path->next_idx = str_idx;
2971 
2972   /* Fix MCTX.  */
2973   mctx->state_log = backup_state_log;
2974   mctx->input.cur_idx = backup_cur_idx;
2975 
2976   /* Then check the current node set has the node LAST_NODE.  */
2977   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2978     return REG_NOERROR;
2979 
2980   return REG_NOMATCH;
2981 }
2982 
2983 /* Helper functions for check_arrival.  */
2984 
2985 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2986    to NEXT_NODES.
2987    TODO: This function is similar to the functions transit_state*(),
2988 	 however this function has many additional works.
2989 	 Can't we unify them?  */
2990 
2991 static reg_errcode_t
2992 __attribute_warn_unused_result__
2993 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
2994 			      re_node_set *cur_nodes, re_node_set *next_nodes)
2995 {
2996   const re_dfa_t *const dfa = mctx->dfa;
2997   bool ok;
2998   Idx cur_idx;
2999 #ifdef RE_ENABLE_I18N
3000   reg_errcode_t err = REG_NOERROR;
3001 #endif
3002   re_node_set union_set;
3003   re_node_set_init_empty (&union_set);
3004   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3005     {
3006       int naccepted = 0;
3007       Idx cur_node = cur_nodes->elems[cur_idx];
3008       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
3009 
3010 #ifdef RE_ENABLE_I18N
3011       /* If the node may accept "multi byte".  */
3012       if (dfa->nodes[cur_node].accept_mb)
3013 	{
3014 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3015 					       str_idx);
3016 	  if (naccepted > 1)
3017 	    {
3018 	      re_dfastate_t *dest_state;
3019 	      Idx next_node = dfa->nexts[cur_node];
3020 	      Idx next_idx = str_idx + naccepted;
3021 	      dest_state = mctx->state_log[next_idx];
3022 	      re_node_set_empty (&union_set);
3023 	      if (dest_state)
3024 		{
3025 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3026 		  if (__glibc_unlikely (err != REG_NOERROR))
3027 		    {
3028 		      re_node_set_free (&union_set);
3029 		      return err;
3030 		    }
3031 		}
3032 	      ok = re_node_set_insert (&union_set, next_node);
3033 	      if (__glibc_unlikely (! ok))
3034 		{
3035 		  re_node_set_free (&union_set);
3036 		  return REG_ESPACE;
3037 		}
3038 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3039 							    &union_set);
3040 	      if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3041 				    && err != REG_NOERROR))
3042 		{
3043 		  re_node_set_free (&union_set);
3044 		  return err;
3045 		}
3046 	    }
3047 	}
3048 #endif /* RE_ENABLE_I18N */
3049       if (naccepted
3050 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3051 	{
3052 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3053 	  if (__glibc_unlikely (! ok))
3054 	    {
3055 	      re_node_set_free (&union_set);
3056 	      return REG_ESPACE;
3057 	    }
3058 	}
3059     }
3060   re_node_set_free (&union_set);
3061   return REG_NOERROR;
3062 }
3063 
3064 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3065    CUR_NODES, however exclude the nodes which are:
3066     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3067     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3068 */
3069 
3070 static reg_errcode_t
3071 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3072 			  Idx ex_subexp, int type)
3073 {
3074   reg_errcode_t err;
3075   Idx idx, outside_node;
3076   re_node_set new_nodes;
3077   DEBUG_ASSERT (cur_nodes->nelem);
3078   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3079   if (__glibc_unlikely (err != REG_NOERROR))
3080     return err;
3081   /* Create a new node set NEW_NODES with the nodes which are epsilon
3082      closures of the node in CUR_NODES.  */
3083 
3084   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3085     {
3086       Idx cur_node = cur_nodes->elems[idx];
3087       const re_node_set *eclosure = dfa->eclosures + cur_node;
3088       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3089       if (outside_node == -1)
3090 	{
3091 	  /* There are no problematic nodes, just merge them.  */
3092 	  err = re_node_set_merge (&new_nodes, eclosure);
3093 	  if (__glibc_unlikely (err != REG_NOERROR))
3094 	    {
3095 	      re_node_set_free (&new_nodes);
3096 	      return err;
3097 	    }
3098 	}
3099       else
3100 	{
3101 	  /* There are problematic nodes, re-calculate incrementally.  */
3102 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3103 					      ex_subexp, type);
3104 	  if (__glibc_unlikely (err != REG_NOERROR))
3105 	    {
3106 	      re_node_set_free (&new_nodes);
3107 	      return err;
3108 	    }
3109 	}
3110     }
3111   re_node_set_free (cur_nodes);
3112   *cur_nodes = new_nodes;
3113   return REG_NOERROR;
3114 }
3115 
3116 /* Helper function for check_arrival_expand_ecl.
3117    Check incrementally the epsilon closure of TARGET, and if it isn't
3118    problematic append it to DST_NODES.  */
3119 
3120 static reg_errcode_t
3121 __attribute_warn_unused_result__
3122 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3123 			      Idx target, Idx ex_subexp, int type)
3124 {
3125   Idx cur_node;
3126   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3127     {
3128       bool ok;
3129 
3130       if (dfa->nodes[cur_node].type == type
3131 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3132 	{
3133 	  if (type == OP_CLOSE_SUBEXP)
3134 	    {
3135 	      ok = re_node_set_insert (dst_nodes, cur_node);
3136 	      if (__glibc_unlikely (! ok))
3137 		return REG_ESPACE;
3138 	    }
3139 	  break;
3140 	}
3141       ok = re_node_set_insert (dst_nodes, cur_node);
3142       if (__glibc_unlikely (! ok))
3143 	return REG_ESPACE;
3144       if (dfa->edests[cur_node].nelem == 0)
3145 	break;
3146       if (dfa->edests[cur_node].nelem == 2)
3147 	{
3148 	  reg_errcode_t err;
3149 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3150 					      dfa->edests[cur_node].elems[1],
3151 					      ex_subexp, type);
3152 	  if (__glibc_unlikely (err != REG_NOERROR))
3153 	    return err;
3154 	}
3155       cur_node = dfa->edests[cur_node].elems[0];
3156     }
3157   return REG_NOERROR;
3158 }
3159 
3160 
3161 /* For all the back references in the current state, calculate the
3162    destination of the back references by the appropriate entry
3163    in MCTX->BKREF_ENTS.  */
3164 
3165 static reg_errcode_t
3166 __attribute_warn_unused_result__
3167 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3168 		    Idx cur_str, Idx subexp_num, int type)
3169 {
3170   const re_dfa_t *const dfa = mctx->dfa;
3171   reg_errcode_t err;
3172   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3173   struct re_backref_cache_entry *ent;
3174 
3175   if (cache_idx_start == -1)
3176     return REG_NOERROR;
3177 
3178  restart:
3179   ent = mctx->bkref_ents + cache_idx_start;
3180   do
3181     {
3182       Idx to_idx, next_node;
3183 
3184       /* Is this entry ENT is appropriate?  */
3185       if (!re_node_set_contains (cur_nodes, ent->node))
3186 	continue; /* No.  */
3187 
3188       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3189       /* Calculate the destination of the back reference, and append it
3190 	 to MCTX->STATE_LOG.  */
3191       if (to_idx == cur_str)
3192 	{
3193 	  /* The backreference did epsilon transit, we must re-check all the
3194 	     node in the current state.  */
3195 	  re_node_set new_dests;
3196 	  reg_errcode_t err2, err3;
3197 	  next_node = dfa->edests[ent->node].elems[0];
3198 	  if (re_node_set_contains (cur_nodes, next_node))
3199 	    continue;
3200 	  err = re_node_set_init_1 (&new_dests, next_node);
3201 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3202 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3203 	  re_node_set_free (&new_dests);
3204 	  if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3205 				|| err3 != REG_NOERROR))
3206 	    {
3207 	      err = (err != REG_NOERROR ? err
3208 		     : (err2 != REG_NOERROR ? err2 : err3));
3209 	      return err;
3210 	    }
3211 	  /* TODO: It is still inefficient...  */
3212 	  goto restart;
3213 	}
3214       else
3215 	{
3216 	  re_node_set union_set;
3217 	  next_node = dfa->nexts[ent->node];
3218 	  if (mctx->state_log[to_idx])
3219 	    {
3220 	      bool ok;
3221 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3222 					next_node))
3223 		continue;
3224 	      err = re_node_set_init_copy (&union_set,
3225 					   &mctx->state_log[to_idx]->nodes);
3226 	      ok = re_node_set_insert (&union_set, next_node);
3227 	      if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3228 		{
3229 		  re_node_set_free (&union_set);
3230 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3231 		  return err;
3232 		}
3233 	    }
3234 	  else
3235 	    {
3236 	      err = re_node_set_init_1 (&union_set, next_node);
3237 	      if (__glibc_unlikely (err != REG_NOERROR))
3238 		return err;
3239 	    }
3240 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3241 	  re_node_set_free (&union_set);
3242 	  if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3243 				&& err != REG_NOERROR))
3244 	    return err;
3245 	}
3246     }
3247   while (ent++->more);
3248   return REG_NOERROR;
3249 }
3250 
3251 /* Build transition table for the state.
3252    Return true if successful.  */
3253 
3254 static bool
3255 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3256 {
3257   reg_errcode_t err;
3258   Idx i, j;
3259   int ch;
3260   bool need_word_trtable = false;
3261   bitset_word_t elem, mask;
3262   bool dests_node_malloced = false;
3263   bool dest_states_malloced = false;
3264   Idx ndests; /* Number of the destination states from 'state'.  */
3265   re_dfastate_t **trtable;
3266   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3267   re_node_set follows, *dests_node;
3268   bitset_t *dests_ch;
3269   bitset_t acceptable;
3270 
3271   struct dests_alloc
3272   {
3273     re_node_set dests_node[SBC_MAX];
3274     bitset_t dests_ch[SBC_MAX];
3275   } *dests_alloc;
3276 
3277   /* We build DFA states which corresponds to the destination nodes
3278      from 'state'.  'dests_node[i]' represents the nodes which i-th
3279      destination state contains, and 'dests_ch[i]' represents the
3280      characters which i-th destination state accepts.  */
3281   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3282     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3283   else
3284     {
3285       dests_alloc = re_malloc (struct dests_alloc, 1);
3286       if (__glibc_unlikely (dests_alloc == NULL))
3287 	return false;
3288       dests_node_malloced = true;
3289     }
3290   dests_node = dests_alloc->dests_node;
3291   dests_ch = dests_alloc->dests_ch;
3292 
3293   /* Initialize transition table.  */
3294   state->word_trtable = state->trtable = NULL;
3295 
3296   /* At first, group all nodes belonging to 'state' into several
3297      destinations.  */
3298   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3299   if (__glibc_unlikely (ndests <= 0))
3300     {
3301       if (dests_node_malloced)
3302 	re_free (dests_alloc);
3303       /* Return false in case of an error, true otherwise.  */
3304       if (ndests == 0)
3305 	{
3306 	  state->trtable = (re_dfastate_t **)
3307 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3308           if (__glibc_unlikely (state->trtable == NULL))
3309             return false;
3310 	  return true;
3311 	}
3312       return false;
3313     }
3314 
3315   err = re_node_set_alloc (&follows, ndests + 1);
3316   if (__glibc_unlikely (err != REG_NOERROR))
3317     goto out_free;
3318 
3319   /* Avoid arithmetic overflow in size calculation.  */
3320   size_t ndests_max
3321     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3322        / (3 * sizeof (re_dfastate_t *)));
3323   if (__glibc_unlikely (ndests_max < ndests))
3324     goto out_free;
3325 
3326   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3327 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3328     dest_states = (re_dfastate_t **)
3329       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3330   else
3331     {
3332       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3333       if (__glibc_unlikely (dest_states == NULL))
3334 	{
3335 out_free:
3336 	  if (dest_states_malloced)
3337 	    re_free (dest_states);
3338 	  re_node_set_free (&follows);
3339 	  for (i = 0; i < ndests; ++i)
3340 	    re_node_set_free (dests_node + i);
3341 	  if (dests_node_malloced)
3342 	    re_free (dests_alloc);
3343 	  return false;
3344 	}
3345       dest_states_malloced = true;
3346     }
3347   dest_states_word = dest_states + ndests;
3348   dest_states_nl = dest_states_word + ndests;
3349   bitset_empty (acceptable);
3350 
3351   /* Then build the states for all destinations.  */
3352   for (i = 0; i < ndests; ++i)
3353     {
3354       Idx next_node;
3355       re_node_set_empty (&follows);
3356       /* Merge the follows of this destination states.  */
3357       for (j = 0; j < dests_node[i].nelem; ++j)
3358 	{
3359 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3360 	  if (next_node != -1)
3361 	    {
3362 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3363 	      if (__glibc_unlikely (err != REG_NOERROR))
3364 		goto out_free;
3365 	    }
3366 	}
3367       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3368       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3369 	goto out_free;
3370       /* If the new state has context constraint,
3371 	 build appropriate states for these contexts.  */
3372       if (dest_states[i]->has_constraint)
3373 	{
3374 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3375 							  CONTEXT_WORD);
3376 	  if (__glibc_unlikely (dest_states_word[i] == NULL
3377 				&& err != REG_NOERROR))
3378 	    goto out_free;
3379 
3380 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3381 	    need_word_trtable = true;
3382 
3383 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3384 							CONTEXT_NEWLINE);
3385 	  if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3386 	    goto out_free;
3387 	}
3388       else
3389 	{
3390 	  dest_states_word[i] = dest_states[i];
3391 	  dest_states_nl[i] = dest_states[i];
3392 	}
3393       bitset_merge (acceptable, dests_ch[i]);
3394     }
3395 
3396   if (!__glibc_unlikely (need_word_trtable))
3397     {
3398       /* We don't care about whether the following character is a word
3399 	 character, or we are in a single-byte character set so we can
3400 	 discern by looking at the character code: allocate a
3401 	 256-entry transition table.  */
3402       trtable = state->trtable =
3403 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3404       if (__glibc_unlikely (trtable == NULL))
3405 	goto out_free;
3406 
3407       /* For all characters ch...:  */
3408       for (i = 0; i < BITSET_WORDS; ++i)
3409 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3410 	     elem;
3411 	     mask <<= 1, elem >>= 1, ++ch)
3412 	  if (__glibc_unlikely (elem & 1))
3413 	    {
3414 	      /* There must be exactly one destination which accepts
3415 		 character ch.  See group_nodes_into_DFAstates.  */
3416 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3417 		;
3418 
3419 	      /* j-th destination accepts the word character ch.  */
3420 	      if (dfa->word_char[i] & mask)
3421 		trtable[ch] = dest_states_word[j];
3422 	      else
3423 		trtable[ch] = dest_states[j];
3424 	    }
3425     }
3426   else
3427     {
3428       /* We care about whether the following character is a word
3429 	 character, and we are in a multi-byte character set: discern
3430 	 by looking at the character code: build two 256-entry
3431 	 transition tables, one starting at trtable[0] and one
3432 	 starting at trtable[SBC_MAX].  */
3433       trtable = state->word_trtable =
3434 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3435       if (__glibc_unlikely (trtable == NULL))
3436 	goto out_free;
3437 
3438       /* For all characters ch...:  */
3439       for (i = 0; i < BITSET_WORDS; ++i)
3440 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3441 	     elem;
3442 	     mask <<= 1, elem >>= 1, ++ch)
3443 	  if (__glibc_unlikely (elem & 1))
3444 	    {
3445 	      /* There must be exactly one destination which accepts
3446 		 character ch.  See group_nodes_into_DFAstates.  */
3447 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3448 		;
3449 
3450 	      /* j-th destination accepts the word character ch.  */
3451 	      trtable[ch] = dest_states[j];
3452 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3453 	    }
3454     }
3455 
3456   /* new line */
3457   if (bitset_contain (acceptable, NEWLINE_CHAR))
3458     {
3459       /* The current state accepts newline character.  */
3460       for (j = 0; j < ndests; ++j)
3461 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3462 	  {
3463 	    /* k-th destination accepts newline character.  */
3464 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3465 	    if (need_word_trtable)
3466 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3467 	    /* There must be only one destination which accepts
3468 	       newline.  See group_nodes_into_DFAstates.  */
3469 	    break;
3470 	  }
3471     }
3472 
3473   if (dest_states_malloced)
3474     re_free (dest_states);
3475 
3476   re_node_set_free (&follows);
3477   for (i = 0; i < ndests; ++i)
3478     re_node_set_free (dests_node + i);
3479 
3480   if (dests_node_malloced)
3481     re_free (dests_alloc);
3482 
3483   return true;
3484 }
3485 
3486 /* Group all nodes belonging to STATE into several destinations.
3487    Then for all destinations, set the nodes belonging to the destination
3488    to DESTS_NODE[i] and set the characters accepted by the destination
3489    to DEST_CH[i].  This function return the number of destinations.  */
3490 
3491 static Idx
3492 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3493 			    re_node_set *dests_node, bitset_t *dests_ch)
3494 {
3495   reg_errcode_t err;
3496   bool ok;
3497   Idx i, j, k;
3498   Idx ndests; /* Number of the destinations from 'state'.  */
3499   bitset_t accepts; /* Characters a node can accept.  */
3500   const re_node_set *cur_nodes = &state->nodes;
3501   bitset_empty (accepts);
3502   ndests = 0;
3503 
3504   /* For all the nodes belonging to 'state',  */
3505   for (i = 0; i < cur_nodes->nelem; ++i)
3506     {
3507       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3508       re_token_type_t type = node->type;
3509       unsigned int constraint = node->constraint;
3510 
3511       /* Enumerate all single byte character this node can accept.  */
3512       if (type == CHARACTER)
3513 	bitset_set (accepts, node->opr.c);
3514       else if (type == SIMPLE_BRACKET)
3515 	{
3516 	  bitset_merge (accepts, node->opr.sbcset);
3517 	}
3518       else if (type == OP_PERIOD)
3519 	{
3520 #ifdef RE_ENABLE_I18N
3521 	  if (dfa->mb_cur_max > 1)
3522 	    bitset_merge (accepts, dfa->sb_char);
3523 	  else
3524 #endif
3525 	    bitset_set_all (accepts);
3526 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3527 	    bitset_clear (accepts, '\n');
3528 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3529 	    bitset_clear (accepts, '\0');
3530 	}
3531 #ifdef RE_ENABLE_I18N
3532       else if (type == OP_UTF8_PERIOD)
3533 	{
3534 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3535 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3536 	  else
3537 	    bitset_merge (accepts, utf8_sb_map);
3538 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3539 	    bitset_clear (accepts, '\n');
3540 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3541 	    bitset_clear (accepts, '\0');
3542 	}
3543 #endif
3544       else
3545 	continue;
3546 
3547       /* Check the 'accepts' and sift the characters which are not
3548 	 match it the context.  */
3549       if (constraint)
3550 	{
3551 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3552 	    {
3553 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3554 	      bitset_empty (accepts);
3555 	      if (accepts_newline)
3556 		bitset_set (accepts, NEWLINE_CHAR);
3557 	      else
3558 		continue;
3559 	    }
3560 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3561 	    {
3562 	      bitset_empty (accepts);
3563 	      continue;
3564 	    }
3565 
3566 	  if (constraint & NEXT_WORD_CONSTRAINT)
3567 	    {
3568 	      bitset_word_t any_set = 0;
3569 	      if (type == CHARACTER && !node->word_char)
3570 		{
3571 		  bitset_empty (accepts);
3572 		  continue;
3573 		}
3574 #ifdef RE_ENABLE_I18N
3575 	      if (dfa->mb_cur_max > 1)
3576 		for (j = 0; j < BITSET_WORDS; ++j)
3577 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3578 	      else
3579 #endif
3580 		for (j = 0; j < BITSET_WORDS; ++j)
3581 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3582 	      if (!any_set)
3583 		continue;
3584 	    }
3585 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3586 	    {
3587 	      bitset_word_t any_set = 0;
3588 	      if (type == CHARACTER && node->word_char)
3589 		{
3590 		  bitset_empty (accepts);
3591 		  continue;
3592 		}
3593 #ifdef RE_ENABLE_I18N
3594 	      if (dfa->mb_cur_max > 1)
3595 		for (j = 0; j < BITSET_WORDS; ++j)
3596 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3597 	      else
3598 #endif
3599 		for (j = 0; j < BITSET_WORDS; ++j)
3600 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3601 	      if (!any_set)
3602 		continue;
3603 	    }
3604 	}
3605 
3606       /* Then divide 'accepts' into DFA states, or create a new
3607 	 state.  Above, we make sure that accepts is not empty.  */
3608       for (j = 0; j < ndests; ++j)
3609 	{
3610 	  bitset_t intersec; /* Intersection sets, see below.  */
3611 	  bitset_t remains;
3612 	  /* Flags, see below.  */
3613 	  bitset_word_t has_intersec, not_subset, not_consumed;
3614 
3615 	  /* Optimization, skip if this state doesn't accept the character.  */
3616 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3617 	    continue;
3618 
3619 	  /* Enumerate the intersection set of this state and 'accepts'.  */
3620 	  has_intersec = 0;
3621 	  for (k = 0; k < BITSET_WORDS; ++k)
3622 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3623 	  /* And skip if the intersection set is empty.  */
3624 	  if (!has_intersec)
3625 	    continue;
3626 
3627 	  /* Then check if this state is a subset of 'accepts'.  */
3628 	  not_subset = not_consumed = 0;
3629 	  for (k = 0; k < BITSET_WORDS; ++k)
3630 	    {
3631 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3632 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3633 	    }
3634 
3635 	  /* If this state isn't a subset of 'accepts', create a
3636 	     new group state, which has the 'remains'. */
3637 	  if (not_subset)
3638 	    {
3639 	      bitset_copy (dests_ch[ndests], remains);
3640 	      bitset_copy (dests_ch[j], intersec);
3641 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3642 	      if (__glibc_unlikely (err != REG_NOERROR))
3643 		goto error_return;
3644 	      ++ndests;
3645 	    }
3646 
3647 	  /* Put the position in the current group. */
3648 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3649 	  if (__glibc_unlikely (! ok))
3650 	    goto error_return;
3651 
3652 	  /* If all characters are consumed, go to next node. */
3653 	  if (!not_consumed)
3654 	    break;
3655 	}
3656       /* Some characters remain, create a new group. */
3657       if (j == ndests)
3658 	{
3659 	  bitset_copy (dests_ch[ndests], accepts);
3660 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3661 	  if (__glibc_unlikely (err != REG_NOERROR))
3662 	    goto error_return;
3663 	  ++ndests;
3664 	  bitset_empty (accepts);
3665 	}
3666     }
3667   assume (ndests <= SBC_MAX);
3668   return ndests;
3669  error_return:
3670   for (j = 0; j < ndests; ++j)
3671     re_node_set_free (dests_node + j);
3672   return -1;
3673 }
3674 
3675 #ifdef RE_ENABLE_I18N
3676 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3677    Return the number of the bytes the node accepts.
3678    STR_IDX is the current index of the input string.
3679 
3680    This function handles the nodes which can accept one character, or
3681    one collating element like '.', '[a-z]', opposite to the other nodes
3682    can only accept one byte.  */
3683 
3684 # ifdef _LIBC
3685 #  include <locale/weight.h>
3686 # endif
3687 
3688 static int
3689 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3690 			 const re_string_t *input, Idx str_idx)
3691 {
3692   const re_token_t *node = dfa->nodes + node_idx;
3693   int char_len, elem_len;
3694   Idx i;
3695 
3696   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3697     {
3698       unsigned char c = re_string_byte_at (input, str_idx), d;
3699       if (__glibc_likely (c < 0xc2))
3700 	return 0;
3701 
3702       if (str_idx + 2 > input->len)
3703 	return 0;
3704 
3705       d = re_string_byte_at (input, str_idx + 1);
3706       if (c < 0xe0)
3707 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3708       else if (c < 0xf0)
3709 	{
3710 	  char_len = 3;
3711 	  if (c == 0xe0 && d < 0xa0)
3712 	    return 0;
3713 	}
3714       else if (c < 0xf8)
3715 	{
3716 	  char_len = 4;
3717 	  if (c == 0xf0 && d < 0x90)
3718 	    return 0;
3719 	}
3720       else if (c < 0xfc)
3721 	{
3722 	  char_len = 5;
3723 	  if (c == 0xf8 && d < 0x88)
3724 	    return 0;
3725 	}
3726       else if (c < 0xfe)
3727 	{
3728 	  char_len = 6;
3729 	  if (c == 0xfc && d < 0x84)
3730 	    return 0;
3731 	}
3732       else
3733 	return 0;
3734 
3735       if (str_idx + char_len > input->len)
3736 	return 0;
3737 
3738       for (i = 1; i < char_len; ++i)
3739 	{
3740 	  d = re_string_byte_at (input, str_idx + i);
3741 	  if (d < 0x80 || d > 0xbf)
3742 	    return 0;
3743 	}
3744       return char_len;
3745     }
3746 
3747   char_len = re_string_char_size_at (input, str_idx);
3748   if (node->type == OP_PERIOD)
3749     {
3750       if (char_len <= 1)
3751 	return 0;
3752       /* FIXME: I don't think this if is needed, as both '\n'
3753 	 and '\0' are char_len == 1.  */
3754       /* '.' accepts any one character except the following two cases.  */
3755       if ((!(dfa->syntax & RE_DOT_NEWLINE)
3756 	   && re_string_byte_at (input, str_idx) == '\n')
3757 	  || ((dfa->syntax & RE_DOT_NOT_NULL)
3758 	      && re_string_byte_at (input, str_idx) == '\0'))
3759 	return 0;
3760       return char_len;
3761     }
3762 
3763   elem_len = re_string_elem_size_at (input, str_idx);
3764   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3765     return 0;
3766 
3767   if (node->type == COMPLEX_BRACKET)
3768     {
3769       const re_charset_t *cset = node->opr.mbcset;
3770 # ifdef _LIBC
3771       const unsigned char *pin
3772 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3773       Idx j;
3774       uint32_t nrules;
3775 # endif /* _LIBC */
3776       int match_len = 0;
3777       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3778 		    ? re_string_wchar_at (input, str_idx) : 0);
3779 
3780       /* match with multibyte character?  */
3781       for (i = 0; i < cset->nmbchars; ++i)
3782 	if (wc == cset->mbchars[i])
3783 	  {
3784 	    match_len = char_len;
3785 	    goto check_node_accept_bytes_match;
3786 	  }
3787       /* match with character_class?  */
3788       for (i = 0; i < cset->nchar_classes; ++i)
3789 	{
3790 	  wctype_t wt = cset->char_classes[i];
3791 	  if (__iswctype (wc, wt))
3792 	    {
3793 	      match_len = char_len;
3794 	      goto check_node_accept_bytes_match;
3795 	    }
3796 	}
3797 
3798 # ifdef _LIBC
3799       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3800       if (nrules != 0)
3801 	{
3802 	  unsigned int in_collseq = 0;
3803 	  const int32_t *table, *indirect;
3804 	  const unsigned char *weights, *extra;
3805 	  const char *collseqwc;
3806 
3807 	  /* match with collating_symbol?  */
3808 	  if (cset->ncoll_syms)
3809 	    extra = (const unsigned char *)
3810 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3811 	  for (i = 0; i < cset->ncoll_syms; ++i)
3812 	    {
3813 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3814 	      /* Compare the length of input collating element and
3815 		 the length of current collating element.  */
3816 	      if (*coll_sym != elem_len)
3817 		continue;
3818 	      /* Compare each bytes.  */
3819 	      for (j = 0; j < *coll_sym; j++)
3820 		if (pin[j] != coll_sym[1 + j])
3821 		  break;
3822 	      if (j == *coll_sym)
3823 		{
3824 		  /* Match if every bytes is equal.  */
3825 		  match_len = j;
3826 		  goto check_node_accept_bytes_match;
3827 		}
3828 	    }
3829 
3830 	  if (cset->nranges)
3831 	    {
3832 	      if (elem_len <= char_len)
3833 		{
3834 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3835 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3836 		}
3837 	      else
3838 		in_collseq = find_collation_sequence_value (pin, elem_len);
3839 	    }
3840 	  /* match with range expression?  */
3841 	  /* FIXME: Implement rational ranges here, too.  */
3842 	  for (i = 0; i < cset->nranges; ++i)
3843 	    if (cset->range_starts[i] <= in_collseq
3844 		&& in_collseq <= cset->range_ends[i])
3845 	      {
3846 		match_len = elem_len;
3847 		goto check_node_accept_bytes_match;
3848 	      }
3849 
3850 	  /* match with equivalence_class?  */
3851 	  if (cset->nequiv_classes)
3852 	    {
3853 	      const unsigned char *cp = pin;
3854 	      table = (const int32_t *)
3855 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3856 	      weights = (const unsigned char *)
3857 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3858 	      extra = (const unsigned char *)
3859 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3860 	      indirect = (const int32_t *)
3861 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3862 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3863 	      int32_t rule = idx >> 24;
3864 	      idx &= 0xffffff;
3865 	      if (idx > 0)
3866 		{
3867 		  size_t weight_len = weights[idx];
3868 		  for (i = 0; i < cset->nequiv_classes; ++i)
3869 		    {
3870 		      int32_t equiv_class_idx = cset->equiv_classes[i];
3871 		      int32_t equiv_class_rule = equiv_class_idx >> 24;
3872 		      equiv_class_idx &= 0xffffff;
3873 		      if (weights[equiv_class_idx] == weight_len
3874 			  && equiv_class_rule == rule
3875 			  && memcmp (weights + idx + 1,
3876 				     weights + equiv_class_idx + 1,
3877 				     weight_len) == 0)
3878 			{
3879 			  match_len = elem_len;
3880 			  goto check_node_accept_bytes_match;
3881 			}
3882 		    }
3883 		}
3884 	    }
3885 	}
3886       else
3887 # endif /* _LIBC */
3888 	{
3889 	  /* match with range expression?  */
3890 	  for (i = 0; i < cset->nranges; ++i)
3891 	    {
3892 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3893 		{
3894 		  match_len = char_len;
3895 		  goto check_node_accept_bytes_match;
3896 		}
3897 	    }
3898 	}
3899     check_node_accept_bytes_match:
3900       if (!cset->non_match)
3901 	return match_len;
3902       else
3903 	{
3904 	  if (match_len > 0)
3905 	    return 0;
3906 	  else
3907 	    return (elem_len > char_len) ? elem_len : char_len;
3908 	}
3909     }
3910   return 0;
3911 }
3912 
3913 # ifdef _LIBC
3914 static unsigned int
3915 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3916 {
3917   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3918   if (nrules == 0)
3919     {
3920       if (mbs_len == 1)
3921 	{
3922 	  /* No valid character.  Match it as a single byte character.  */
3923 	  const unsigned char *collseq = (const unsigned char *)
3924 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3925 	  return collseq[mbs[0]];
3926 	}
3927       return UINT_MAX;
3928     }
3929   else
3930     {
3931       int32_t idx;
3932       const unsigned char *extra = (const unsigned char *)
3933 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3934       int32_t extrasize = (const unsigned char *)
3935 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3936 
3937       for (idx = 0; idx < extrasize;)
3938 	{
3939 	  int mbs_cnt;
3940 	  bool found = false;
3941 	  int32_t elem_mbs_len;
3942 	  /* Skip the name of collating element name.  */
3943 	  idx = idx + extra[idx] + 1;
3944 	  elem_mbs_len = extra[idx++];
3945 	  if (mbs_len == elem_mbs_len)
3946 	    {
3947 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3948 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3949 		  break;
3950 	      if (mbs_cnt == elem_mbs_len)
3951 		/* Found the entry.  */
3952 		found = true;
3953 	    }
3954 	  /* Skip the byte sequence of the collating element.  */
3955 	  idx += elem_mbs_len;
3956 	  /* Adjust for the alignment.  */
3957 	  idx = (idx + 3) & ~3;
3958 	  /* Skip the collation sequence value.  */
3959 	  idx += sizeof (uint32_t);
3960 	  /* Skip the wide char sequence of the collating element.  */
3961 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3962 	  /* If we found the entry, return the sequence value.  */
3963 	  if (found)
3964 	    return *(uint32_t *) (extra + idx);
3965 	  /* Skip the collation sequence value.  */
3966 	  idx += sizeof (uint32_t);
3967 	}
3968       return UINT_MAX;
3969     }
3970 }
3971 # endif /* _LIBC */
3972 #endif /* RE_ENABLE_I18N */
3973 
3974 /* Check whether the node accepts the byte which is IDX-th
3975    byte of the INPUT.  */
3976 
3977 static bool
3978 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3979 		   Idx idx)
3980 {
3981   unsigned char ch;
3982   ch = re_string_byte_at (&mctx->input, idx);
3983   switch (node->type)
3984     {
3985     case CHARACTER:
3986       if (node->opr.c != ch)
3987         return false;
3988       break;
3989 
3990     case SIMPLE_BRACKET:
3991       if (!bitset_contain (node->opr.sbcset, ch))
3992         return false;
3993       break;
3994 
3995 #ifdef RE_ENABLE_I18N
3996     case OP_UTF8_PERIOD:
3997       if (ch >= ASCII_CHARS)
3998         return false;
3999       FALLTHROUGH;
4000 #endif
4001     case OP_PERIOD:
4002       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4003 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4004 	return false;
4005       break;
4006 
4007     default:
4008       return false;
4009     }
4010 
4011   if (node->constraint)
4012     {
4013       /* The node has constraints.  Check whether the current context
4014 	 satisfies the constraints.  */
4015       unsigned int context = re_string_context_at (&mctx->input, idx,
4016 						   mctx->eflags);
4017       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4018 	return false;
4019     }
4020 
4021   return true;
4022 }
4023 
4024 /* Extend the buffers, if the buffers have run out.  */
4025 
4026 static reg_errcode_t
4027 __attribute_warn_unused_result__
4028 extend_buffers (re_match_context_t *mctx, int min_len)
4029 {
4030   reg_errcode_t ret;
4031   re_string_t *pstr = &mctx->input;
4032 
4033   /* Avoid overflow.  */
4034   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4035 			<= pstr->bufs_len))
4036     return REG_ESPACE;
4037 
4038   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4039   ret = re_string_realloc_buffers (pstr,
4040 				   MAX (min_len,
4041 					MIN (pstr->len, pstr->bufs_len * 2)));
4042   if (__glibc_unlikely (ret != REG_NOERROR))
4043     return ret;
4044 
4045   if (mctx->state_log != NULL)
4046     {
4047       /* And double the length of state_log.  */
4048       /* XXX We have no indication of the size of this buffer.  If this
4049 	 allocation fail we have no indication that the state_log array
4050 	 does not have the right size.  */
4051       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4052 					      pstr->bufs_len + 1);
4053       if (__glibc_unlikely (new_array == NULL))
4054 	return REG_ESPACE;
4055       mctx->state_log = new_array;
4056     }
4057 
4058   /* Then reconstruct the buffers.  */
4059   if (pstr->icase)
4060     {
4061 #ifdef RE_ENABLE_I18N
4062       if (pstr->mb_cur_max > 1)
4063 	{
4064 	  ret = build_wcs_upper_buffer (pstr);
4065 	  if (__glibc_unlikely (ret != REG_NOERROR))
4066 	    return ret;
4067 	}
4068       else
4069 #endif /* RE_ENABLE_I18N  */
4070 	build_upper_buffer (pstr);
4071     }
4072   else
4073     {
4074 #ifdef RE_ENABLE_I18N
4075       if (pstr->mb_cur_max > 1)
4076 	build_wcs_buffer (pstr);
4077       else
4078 #endif /* RE_ENABLE_I18N  */
4079 	{
4080 	  if (pstr->trans != NULL)
4081 	    re_string_translate_buffer (pstr);
4082 	}
4083     }
4084   return REG_NOERROR;
4085 }
4086 
4087 
4088 /* Functions for matching context.  */
4089 
4090 /* Initialize MCTX.  */
4091 
4092 static reg_errcode_t
4093 __attribute_warn_unused_result__
4094 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4095 {
4096   mctx->eflags = eflags;
4097   mctx->match_last = -1;
4098   if (n > 0)
4099     {
4100       /* Avoid overflow.  */
4101       size_t max_object_size =
4102 	MAX (sizeof (struct re_backref_cache_entry),
4103 	     sizeof (re_sub_match_top_t *));
4104       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4105 	return REG_ESPACE;
4106 
4107       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4108       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4109       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4110 	return REG_ESPACE;
4111     }
4112   /* Already zero-ed by the caller.
4113      else
4114        mctx->bkref_ents = NULL;
4115      mctx->nbkref_ents = 0;
4116      mctx->nsub_tops = 0;  */
4117   mctx->abkref_ents = n;
4118   mctx->max_mb_elem_len = 1;
4119   mctx->asub_tops = n;
4120   return REG_NOERROR;
4121 }
4122 
4123 /* Clean the entries which depend on the current input in MCTX.
4124    This function must be invoked when the matcher changes the start index
4125    of the input, or changes the input string.  */
4126 
4127 static void
4128 match_ctx_clean (re_match_context_t *mctx)
4129 {
4130   Idx st_idx;
4131   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4132     {
4133       Idx sl_idx;
4134       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4135       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4136 	{
4137 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4138 	  re_free (last->path.array);
4139 	  re_free (last);
4140 	}
4141       re_free (top->lasts);
4142       if (top->path)
4143 	{
4144 	  re_free (top->path->array);
4145 	  re_free (top->path);
4146 	}
4147       re_free (top);
4148     }
4149 
4150   mctx->nsub_tops = 0;
4151   mctx->nbkref_ents = 0;
4152 }
4153 
4154 /* Free all the memory associated with MCTX.  */
4155 
4156 static void
4157 match_ctx_free (re_match_context_t *mctx)
4158 {
4159   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4160   match_ctx_clean (mctx);
4161   re_free (mctx->sub_tops);
4162   re_free (mctx->bkref_ents);
4163 }
4164 
4165 /* Add a new backreference entry to MCTX.
4166    Note that we assume that caller never call this function with duplicate
4167    entry, and call with STR_IDX which isn't smaller than any existing entry.
4168 */
4169 
4170 static reg_errcode_t
4171 __attribute_warn_unused_result__
4172 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4173 		     Idx to)
4174 {
4175   if (mctx->nbkref_ents >= mctx->abkref_ents)
4176     {
4177       struct re_backref_cache_entry* new_entry;
4178       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4179 			      mctx->abkref_ents * 2);
4180       if (__glibc_unlikely (new_entry == NULL))
4181 	{
4182 	  re_free (mctx->bkref_ents);
4183 	  return REG_ESPACE;
4184 	}
4185       mctx->bkref_ents = new_entry;
4186       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4187 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4188       mctx->abkref_ents *= 2;
4189     }
4190   if (mctx->nbkref_ents > 0
4191       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4192     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4193 
4194   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4195   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4196   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4197   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4198 
4199   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4200      If bit N is clear, means that this entry won't epsilon-transition to
4201      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4202      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4203      such node.
4204 
4205      A backreference does not epsilon-transition unless it is empty, so set
4206      to all zeros if FROM != TO.  */
4207   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4208     = (from == to ? -1 : 0);
4209 
4210   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4211   if (mctx->max_mb_elem_len < to - from)
4212     mctx->max_mb_elem_len = to - from;
4213   return REG_NOERROR;
4214 }
4215 
4216 /* Return the first entry with the same str_idx, or -1 if none is
4217    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4218 
4219 static Idx
4220 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4221 {
4222   Idx left, right, mid, last;
4223   last = right = mctx->nbkref_ents;
4224   for (left = 0; left < right;)
4225     {
4226       mid = (left + right) / 2;
4227       if (mctx->bkref_ents[mid].str_idx < str_idx)
4228 	left = mid + 1;
4229       else
4230 	right = mid;
4231     }
4232   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4233     return left;
4234   else
4235     return -1;
4236 }
4237 
4238 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4239    at STR_IDX.  */
4240 
4241 static reg_errcode_t
4242 __attribute_warn_unused_result__
4243 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4244 {
4245   DEBUG_ASSERT (mctx->sub_tops != NULL);
4246   DEBUG_ASSERT (mctx->asub_tops > 0);
4247   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4248     {
4249       Idx new_asub_tops = mctx->asub_tops * 2;
4250       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4251 						   re_sub_match_top_t *,
4252 						   new_asub_tops);
4253       if (__glibc_unlikely (new_array == NULL))
4254 	return REG_ESPACE;
4255       mctx->sub_tops = new_array;
4256       mctx->asub_tops = new_asub_tops;
4257     }
4258   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4259   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4260     return REG_ESPACE;
4261   mctx->sub_tops[mctx->nsub_tops]->node = node;
4262   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4263   return REG_NOERROR;
4264 }
4265 
4266 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4267    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4268 
4269 static re_sub_match_last_t *
4270 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4271 {
4272   re_sub_match_last_t *new_entry;
4273   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4274     {
4275       Idx new_alasts = 2 * subtop->alasts + 1;
4276       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4277 						    re_sub_match_last_t *,
4278 						    new_alasts);
4279       if (__glibc_unlikely (new_array == NULL))
4280 	return NULL;
4281       subtop->lasts = new_array;
4282       subtop->alasts = new_alasts;
4283     }
4284   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4285   if (__glibc_likely (new_entry != NULL))
4286     {
4287       subtop->lasts[subtop->nlasts] = new_entry;
4288       new_entry->node = node;
4289       new_entry->str_idx = str_idx;
4290       ++subtop->nlasts;
4291     }
4292   return new_entry;
4293 }
4294 
4295 static void
4296 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4297 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4298 {
4299   sctx->sifted_states = sifted_sts;
4300   sctx->limited_states = limited_sts;
4301   sctx->last_node = last_node;
4302   sctx->last_str_idx = last_str_idx;
4303   re_node_set_init_empty (&sctx->limits);
4304 }
4305