xref: /dragonfly/contrib/grep/lib/regexec.c (revision fcf53d9b)
1 /* -*- buffer-read-only: t -*- vi: set ro: */
2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
3 /* Extended regular expression matching and search library.
4    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
5    Software Foundation, Inc.
6    This file is part of the GNU C Library.
7    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3, or (at your option)
12    any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License along
20    with this program; if not, write to the Free Software Foundation,
21    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include "verify.h"
24 #include "intprops.h"
25 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
26 				     Idx n) internal_function;
27 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
28 static void match_ctx_free (re_match_context_t *cache) internal_function;
29 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
30 					  Idx str_idx, Idx from, Idx to)
31      internal_function;
32 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
33      internal_function;
34 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
35 					   Idx str_idx) internal_function;
36 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
37 						    Idx node, Idx str_idx)
38      internal_function;
39 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
40 			   re_dfastate_t **limited_sts, Idx last_node,
41 			   Idx last_str_idx)
42      internal_function;
43 static reg_errcode_t re_search_internal (const regex_t *preg,
44 					 const char *string, Idx length,
45 					 Idx start, Idx last_start, Idx stop,
46 					 size_t nmatch, regmatch_t pmatch[],
47 					 int eflags) internal_function;
48 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
49 				  const char *string1, Idx length1,
50 				  const char *string2, Idx length2,
51 				  Idx start, regoff_t range,
52 				  struct re_registers *regs,
53 				  Idx stop, bool ret_len) internal_function;
54 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
55 				const char *string, Idx length, Idx start,
56 				regoff_t range, Idx stop,
57 				struct re_registers *regs,
58 				bool ret_len) internal_function;
59 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
60 				  Idx nregs, int regs_allocated)
61      internal_function;
62 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
63      internal_function;
64 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
65 			   Idx *p_match_first) internal_function;
66 static Idx check_halt_state_context (const re_match_context_t *mctx,
67 				     const re_dfastate_t *state, Idx idx)
68      internal_function;
69 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
70 			 regmatch_t *prev_idx_match, Idx cur_node,
71 			 Idx cur_idx, Idx nmatch) internal_function;
72 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
73 				      Idx str_idx, Idx dest_node, Idx nregs,
74 				      regmatch_t *regs,
75 				      re_node_set *eps_via_nodes)
76      internal_function;
77 static reg_errcode_t set_regs (const regex_t *preg,
78 			       const re_match_context_t *mctx,
79 			       size_t nmatch, regmatch_t *pmatch,
80 			       bool fl_backtrack) internal_function;
81 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
82      internal_function;
83 
84 #ifdef RE_ENABLE_I18N
85 static int sift_states_iter_mb (const re_match_context_t *mctx,
86 				re_sift_context_t *sctx,
87 				Idx node_idx, Idx str_idx, Idx max_str_idx)
88      internal_function;
89 #endif /* RE_ENABLE_I18N */
90 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
91 					   re_sift_context_t *sctx)
92      internal_function;
93 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
94 					  re_sift_context_t *sctx, Idx str_idx,
95 					  re_node_set *cur_dest)
96      internal_function;
97 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
98 					      re_sift_context_t *sctx,
99 					      Idx str_idx,
100 					      re_node_set *dest_nodes)
101      internal_function;
102 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
103 					    re_node_set *dest_nodes,
104 					    const re_node_set *candidates)
105      internal_function;
106 static bool check_dst_limits (const re_match_context_t *mctx,
107 			      const re_node_set *limits,
108 			      Idx dst_node, Idx dst_idx, Idx src_node,
109 			      Idx src_idx) internal_function;
110 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
111 					int boundaries, Idx subexp_idx,
112 					Idx from_node, Idx bkref_idx)
113      internal_function;
114 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
115 				      Idx limit, Idx subexp_idx,
116 				      Idx node, Idx str_idx,
117 				      Idx bkref_idx) internal_function;
118 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
119 					  re_node_set *dest_nodes,
120 					  const re_node_set *candidates,
121 					  re_node_set *limits,
122 					  struct re_backref_cache_entry *bkref_ents,
123 					  Idx str_idx) internal_function;
124 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
125 					re_sift_context_t *sctx,
126 					Idx str_idx, const re_node_set *candidates)
127      internal_function;
128 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
129 					re_dfastate_t **dst,
130 					re_dfastate_t **src, Idx num)
131      internal_function;
132 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
133 					 re_match_context_t *mctx) internal_function;
134 static re_dfastate_t *transit_state (reg_errcode_t *err,
135 				     re_match_context_t *mctx,
136 				     re_dfastate_t *state) internal_function;
137 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
138 					    re_match_context_t *mctx,
139 					    re_dfastate_t *next_state)
140      internal_function;
141 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
142 						re_node_set *cur_nodes,
143 						Idx str_idx) internal_function;
144 #if 0
145 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
146 					re_match_context_t *mctx,
147 					re_dfastate_t *pstate)
148      internal_function;
149 #endif
150 #ifdef RE_ENABLE_I18N
151 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
152 				       re_dfastate_t *pstate)
153      internal_function;
154 #endif /* RE_ENABLE_I18N */
155 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
156 					  const re_node_set *nodes)
157      internal_function;
158 static reg_errcode_t get_subexp (re_match_context_t *mctx,
159 				 Idx bkref_node, Idx bkref_str_idx)
160      internal_function;
161 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
162 				     const re_sub_match_top_t *sub_top,
163 				     re_sub_match_last_t *sub_last,
164 				     Idx bkref_node, Idx bkref_str)
165      internal_function;
166 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
167 			     Idx subexp_idx, int type) internal_function;
168 static reg_errcode_t check_arrival (re_match_context_t *mctx,
169 				    state_array_t *path, Idx top_node,
170 				    Idx top_str, Idx last_node, Idx last_str,
171 				    int type) internal_function;
172 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
173 						   Idx str_idx,
174 						   re_node_set *cur_nodes,
175 						   re_node_set *next_nodes)
176      internal_function;
177 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
178 					       re_node_set *cur_nodes,
179 					       Idx ex_subexp, int type)
180      internal_function;
181 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
182 						   re_node_set *dst_nodes,
183 						   Idx target, Idx ex_subexp,
184 						   int type) internal_function;
185 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
186 					 re_node_set *cur_nodes, Idx cur_str,
187 					 Idx subexp_num, int type)
188      internal_function;
189 static bool build_trtable (const re_dfa_t *dfa,
190 			   re_dfastate_t *state) internal_function;
191 #ifdef RE_ENABLE_I18N
192 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
193 				    const re_string_t *input, Idx idx)
194      internal_function;
195 # ifdef _LIBC
196 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
197 						   size_t name_len)
198      internal_function;
199 # endif /* _LIBC */
200 #endif /* RE_ENABLE_I18N */
201 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
202 				       const re_dfastate_t *state,
203 				       re_node_set *states_node,
204 				       bitset_t *states_ch) internal_function;
205 static bool check_node_accept (const re_match_context_t *mctx,
206 			       const re_token_t *node, Idx idx)
207      internal_function;
208 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
209      internal_function;
210 
211 /* Entry point for POSIX code.  */
212 
213 /* regexec searches for a given pattern, specified by PREG, in the
214    string STRING.
215 
216    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
217    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
218    least NMATCH elements, and we set them to the offsets of the
219    corresponding matched substrings.
220 
221    EFLAGS specifies `execution flags' which affect matching: if
222    REG_NOTBOL is set, then ^ does not match at the beginning of the
223    string; if REG_NOTEOL is set, then $ does not match at the end.
224 
225    We return 0 if we find a match and REG_NOMATCH if not.  */
226 
227 int
228 regexec (preg, string, nmatch, pmatch, eflags)
229     const regex_t *_Restrict_ preg;
230     const char *_Restrict_ string;
231     size_t nmatch;
232     regmatch_t pmatch[_Restrict_arr_];
233     int eflags;
234 {
235   reg_errcode_t err;
236   Idx start, length;
237 #ifdef _LIBC
238   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
239 #endif
240 
241   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
242     return REG_BADPAT;
243 
244   if (eflags & REG_STARTEND)
245     {
246       start = pmatch[0].rm_so;
247       length = pmatch[0].rm_eo;
248     }
249   else
250     {
251       start = 0;
252       length = strlen (string);
253     }
254 
255   __libc_lock_lock (dfa->lock);
256   if (preg->no_sub)
257     err = re_search_internal (preg, string, length, start, length,
258 			      length, 0, NULL, eflags);
259   else
260     err = re_search_internal (preg, string, length, start, length,
261 			      length, nmatch, pmatch, eflags);
262   __libc_lock_unlock (dfa->lock);
263   return err != REG_NOERROR;
264 }
265 
266 #ifdef _LIBC
267 # include <shlib-compat.h>
268 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
269 
270 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
271 __typeof__ (__regexec) __compat_regexec;
272 
273 int
274 attribute_compat_text_section
275 __compat_regexec (const regex_t *_Restrict_ preg,
276 		  const char *_Restrict_ string, size_t nmatch,
277 		  regmatch_t pmatch[], int eflags)
278 {
279   return regexec (preg, string, nmatch, pmatch,
280 		  eflags & (REG_NOTBOL | REG_NOTEOL));
281 }
282 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
283 # endif
284 #endif
285 
286 /* Entry points for GNU code.  */
287 
288 /* re_match, re_search, re_match_2, re_search_2
289 
290    The former two functions operate on STRING with length LENGTH,
291    while the later two operate on concatenation of STRING1 and STRING2
292    with lengths LENGTH1 and LENGTH2, respectively.
293 
294    re_match() matches the compiled pattern in BUFP against the string,
295    starting at index START.
296 
297    re_search() first tries matching at index START, then it tries to match
298    starting from index START + 1, and so on.  The last start position tried
299    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
300    way as re_match().)
301 
302    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
303    the first STOP characters of the concatenation of the strings should be
304    concerned.
305 
306    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
307    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
308    computed relative to the concatenation, not relative to the individual
309    strings.)
310 
311    On success, re_match* functions return the length of the match, re_search*
312    return the position of the start of the match.  Return value -1 means no
313    match was found and -2 indicates an internal error.  */
314 
315 regoff_t
316 re_match (bufp, string, length, start, regs)
317     struct re_pattern_buffer *bufp;
318     const char *string;
319     Idx length, start;
320     struct re_registers *regs;
321 {
322   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
323 }
324 #ifdef _LIBC
325 weak_alias (__re_match, re_match)
326 #endif
327 
328 regoff_t
329 re_search (bufp, string, length, start, range, regs)
330     struct re_pattern_buffer *bufp;
331     const char *string;
332     Idx length, start;
333     regoff_t range;
334     struct re_registers *regs;
335 {
336   return re_search_stub (bufp, string, length, start, range, length, regs,
337 			 false);
338 }
339 #ifdef _LIBC
340 weak_alias (__re_search, re_search)
341 #endif
342 
343 regoff_t
344 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
345     struct re_pattern_buffer *bufp;
346     const char *string1, *string2;
347     Idx length1, length2, start, stop;
348     struct re_registers *regs;
349 {
350   return re_search_2_stub (bufp, string1, length1, string2, length2,
351 			   start, 0, regs, stop, true);
352 }
353 #ifdef _LIBC
354 weak_alias (__re_match_2, re_match_2)
355 #endif
356 
357 regoff_t
358 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
359     struct re_pattern_buffer *bufp;
360     const char *string1, *string2;
361     Idx length1, length2, start, stop;
362     regoff_t range;
363     struct re_registers *regs;
364 {
365   return re_search_2_stub (bufp, string1, length1, string2, length2,
366 			   start, range, regs, stop, false);
367 }
368 #ifdef _LIBC
369 weak_alias (__re_search_2, re_search_2)
370 #endif
371 
372 static regoff_t
373 internal_function
374 re_search_2_stub (struct re_pattern_buffer *bufp,
375 		  const char *string1, Idx length1,
376 		  const char *string2, Idx length2,
377 		  Idx start, regoff_t range, struct re_registers *regs,
378 		  Idx stop, bool ret_len)
379 {
380   const char *str;
381   regoff_t rval;
382   Idx len = length1 + length2;
383   char *s = NULL;
384 
385   verify (! TYPE_SIGNED (Idx));
386   if (BE (len < length1, 0))
387      return -2;
388   /* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
389      return -2; */
390 
391   /* Concatenate the strings.  */
392   if (length2 > 0)
393     if (length1 > 0)
394       {
395 	s = re_malloc (char, len);
396 
397 	if (BE (s == NULL, 0))
398 	  return -2;
399 #ifdef _LIBC
400 	memcpy (__mempcpy (s, string1, length1), string2, length2);
401 #else
402 	memcpy (s, string1, length1);
403 	memcpy (s + length1, string2, length2);
404 #endif
405 	str = s;
406       }
407     else
408       str = string2;
409   else
410     str = string1;
411 
412   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
413 			 ret_len);
414   re_free (s);
415   return rval;
416 }
417 
418 /* The parameters have the same meaning as those of re_search.
419    Additional parameters:
420    If RET_LEN is true the length of the match is returned (re_match style);
421    otherwise the position of the match is returned.  */
422 
423 static regoff_t
424 internal_function
425 re_search_stub (struct re_pattern_buffer *bufp,
426 		const char *string, Idx length,
427 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
428 		bool ret_len)
429 {
430   reg_errcode_t result;
431   regmatch_t *pmatch;
432   Idx nregs;
433   regoff_t rval;
434   int eflags = 0;
435 #ifdef _LIBC
436   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
437 #endif
438   Idx last_start = start + range;
439 
440   /* Check for out-of-range.  */
441   verify (! TYPE_SIGNED (Idx));
442   /* if (BE (start < 0, 0))
443      return -1; */
444   if (BE (start > length, 0))
445      return -1;
446   if (BE (length < last_start || (0 <= range && last_start < start), 0))
447     last_start = length;
448   else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0))
449     last_start = 0;
450 
451   __libc_lock_lock (dfa->lock);
452 
453   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
454   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
455 
456   /* Compile fastmap if we haven't yet.  */
457   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
458     re_compile_fastmap (bufp);
459 
460   if (BE (bufp->no_sub, 0))
461     regs = NULL;
462 
463   /* We need at least 1 register.  */
464   if (regs == NULL)
465     nregs = 1;
466   else if (BE (bufp->regs_allocated == REGS_FIXED
467 	       && regs->num_regs <= bufp->re_nsub, 0))
468     {
469       nregs = regs->num_regs;
470       if (BE (nregs < 1, 0))
471 	{
472 	  /* Nothing can be copied to regs.  */
473 	  regs = NULL;
474 	  nregs = 1;
475 	}
476     }
477   else
478     nregs = bufp->re_nsub + 1;
479   pmatch = re_malloc (regmatch_t, nregs);
480   if (BE (pmatch == NULL, 0))
481     {
482       rval = -2;
483       goto out;
484     }
485 
486   result = re_search_internal (bufp, string, length, start, last_start, stop,
487 			       nregs, pmatch, eflags);
488 
489   rval = 0;
490 
491   /* I hope we needn't fill ther regs with -1's when no match was found.  */
492   if (result != REG_NOERROR)
493     rval = -1;
494   else if (regs != NULL)
495     {
496       /* If caller wants register contents data back, copy them.  */
497       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
498 					   bufp->regs_allocated);
499       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
500 	rval = -2;
501     }
502 
503   if (BE (rval == 0, 1))
504     {
505       if (ret_len)
506 	{
507 	  assert (pmatch[0].rm_so == start);
508 	  rval = pmatch[0].rm_eo - start;
509 	}
510       else
511 	rval = pmatch[0].rm_so;
512     }
513   re_free (pmatch);
514  out:
515   __libc_lock_unlock (dfa->lock);
516   return rval;
517 }
518 
519 static unsigned int
520 internal_function
521 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
522 	      int regs_allocated)
523 {
524   int rval = REGS_REALLOCATE;
525   Idx i;
526   Idx need_regs = nregs + 1;
527   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
528      uses.  */
529 
530   /* Have the register data arrays been allocated?  */
531   if (regs_allocated == REGS_UNALLOCATED)
532     { /* No.  So allocate them with malloc.  */
533       regs->start = re_malloc (regoff_t, need_regs);
534       if (BE (regs->start == NULL, 0))
535 	return REGS_UNALLOCATED;
536       regs->end = re_malloc (regoff_t, need_regs);
537       if (BE (regs->end == NULL, 0))
538 	{
539 	  re_free (regs->start);
540 	  return REGS_UNALLOCATED;
541 	}
542       regs->num_regs = need_regs;
543     }
544   else if (regs_allocated == REGS_REALLOCATE)
545     { /* Yes.  If we need more elements than were already
546 	 allocated, reallocate them.  If we need fewer, just
547 	 leave it alone.  */
548       if (BE (need_regs > regs->num_regs, 0))
549 	{
550 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
551 	  regoff_t *new_end;
552 	  if (BE (new_start == NULL, 0))
553 	    return REGS_UNALLOCATED;
554 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
555 	  if (BE (new_end == NULL, 0))
556 	    {
557 	      re_free (new_start);
558 	      return REGS_UNALLOCATED;
559 	    }
560 	  regs->start = new_start;
561 	  regs->end = new_end;
562 	  regs->num_regs = need_regs;
563 	}
564     }
565   else
566     {
567       assert (regs_allocated == REGS_FIXED);
568       /* This function may not be called with REGS_FIXED and nregs too big.  */
569       assert (regs->num_regs >= nregs);
570       rval = REGS_FIXED;
571     }
572 
573   /* Copy the regs.  */
574   for (i = 0; i < nregs; ++i)
575     {
576       regs->start[i] = pmatch[i].rm_so;
577       regs->end[i] = pmatch[i].rm_eo;
578     }
579   for ( ; i < regs->num_regs; ++i)
580     regs->start[i] = regs->end[i] = -1;
581 
582   return rval;
583 }
584 
585 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
586    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
587    this memory for recording register information.  STARTS and ENDS
588    must be allocated using the malloc library routine, and must each
589    be at least NUM_REGS * sizeof (regoff_t) bytes long.
590 
591    If NUM_REGS == 0, then subsequent matches should allocate their own
592    register data.
593 
594    Unless this function is called, the first search or match using
595    PATTERN_BUFFER will allocate its own register data, without
596    freeing the old data.  */
597 
598 void
599 re_set_registers (bufp, regs, num_regs, starts, ends)
600     struct re_pattern_buffer *bufp;
601     struct re_registers *regs;
602     __re_size_t num_regs;
603     regoff_t *starts, *ends;
604 {
605   if (num_regs)
606     {
607       bufp->regs_allocated = REGS_REALLOCATE;
608       regs->num_regs = num_regs;
609       regs->start = starts;
610       regs->end = ends;
611     }
612   else
613     {
614       bufp->regs_allocated = REGS_UNALLOCATED;
615       regs->num_regs = 0;
616       regs->start = regs->end = NULL;
617     }
618 }
619 #ifdef _LIBC
620 weak_alias (__re_set_registers, re_set_registers)
621 #endif
622 
623 /* Entry points compatible with 4.2 BSD regex library.  We don't define
624    them unless specifically requested.  */
625 
626 #if defined _REGEX_RE_COMP || defined _LIBC
627 int
628 # ifdef _LIBC
629 weak_function
630 # endif
631 re_exec (s)
632      const char *s;
633 {
634   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
635 }
636 #endif /* _REGEX_RE_COMP */
637 
638 /* Internal entry point.  */
639 
640 /* Searches for a compiled pattern PREG in the string STRING, whose
641    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
642    meaning as with regexec.  LAST_START is START + RANGE, where
643    START and RANGE have the same meaning as with re_search.
644    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
645    otherwise return the error code.
646    Note: We assume front end functions already check ranges.
647    (0 <= LAST_START && LAST_START <= LENGTH)  */
648 
649 static reg_errcode_t
650 internal_function __attribute_warn_unused_result__
651 re_search_internal (const regex_t *preg,
652 		    const char *string, Idx length,
653 		    Idx start, Idx last_start, Idx stop,
654 		    size_t nmatch, regmatch_t pmatch[],
655 		    int eflags)
656 {
657   reg_errcode_t err;
658   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
659   Idx left_lim, right_lim;
660   int incr;
661   bool fl_longest_match;
662   int match_kind;
663   Idx match_first;
664   Idx match_last = REG_MISSING;
665   Idx extra_nmatch;
666   bool sb;
667   int ch;
668 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
669   re_match_context_t mctx = { .dfa = dfa };
670 #else
671   re_match_context_t mctx;
672 #endif
673   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
674 		    && start != last_start && !preg->can_be_null)
675 		   ? preg->fastmap : NULL);
676   RE_TRANSLATE_TYPE t = preg->translate;
677 
678 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
679   memset (&mctx, '\0', sizeof (re_match_context_t));
680   mctx.dfa = dfa;
681 #endif
682 
683   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
684   nmatch -= extra_nmatch;
685 
686   /* Check if the DFA haven't been compiled.  */
687   if (BE (preg->used == 0 || dfa->init_state == NULL
688 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
689 	  || dfa->init_state_begbuf == NULL, 0))
690     return REG_NOMATCH;
691 
692 #ifdef DEBUG
693   /* We assume front-end functions already check them.  */
694   assert (0 <= last_start && last_start <= length);
695 #endif
696 
697   /* If initial states with non-begbuf contexts have no elements,
698      the regex must be anchored.  If preg->newline_anchor is set,
699      we'll never use init_state_nl, so do not check it.  */
700   if (dfa->init_state->nodes.nelem == 0
701       && dfa->init_state_word->nodes.nelem == 0
702       && (dfa->init_state_nl->nodes.nelem == 0
703 	  || !preg->newline_anchor))
704     {
705       if (start != 0 && last_start != 0)
706         return REG_NOMATCH;
707       start = last_start = 0;
708     }
709 
710   /* We must check the longest matching, if nmatch > 0.  */
711   fl_longest_match = (nmatch != 0 || dfa->nbackref);
712 
713   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
714 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
715 			    dfa);
716   if (BE (err != REG_NOERROR, 0))
717     goto free_return;
718   mctx.input.stop = stop;
719   mctx.input.raw_stop = stop;
720   mctx.input.newline_anchor = preg->newline_anchor;
721 
722   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
723   if (BE (err != REG_NOERROR, 0))
724     goto free_return;
725 
726   /* We will log all the DFA states through which the dfa pass,
727      if nmatch > 1, or this dfa has "multibyte node", which is a
728      back-reference or a node which can accept multibyte character or
729      multi character collating element.  */
730   if (nmatch > 1 || dfa->has_mb_node)
731     {
732       /* Avoid overflow.  */
733       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
734 	{
735 	  err = REG_ESPACE;
736 	  goto free_return;
737 	}
738 
739       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
740       if (BE (mctx.state_log == NULL, 0))
741 	{
742 	  err = REG_ESPACE;
743 	  goto free_return;
744 	}
745     }
746   else
747     mctx.state_log = NULL;
748 
749   match_first = start;
750   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
751 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
752 
753   /* Check incrementally whether of not the input string match.  */
754   incr = (last_start < start) ? -1 : 1;
755   left_lim = (last_start < start) ? last_start : start;
756   right_lim = (last_start < start) ? start : last_start;
757   sb = dfa->mb_cur_max == 1;
758   match_kind =
759     (fastmap
760      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
761 	| (start <= last_start ? 2 : 0)
762 	| (t != NULL ? 1 : 0))
763      : 8);
764 
765   for (;; match_first += incr)
766     {
767       err = REG_NOMATCH;
768       if (match_first < left_lim || right_lim < match_first)
769 	goto free_return;
770 
771       /* Advance as rapidly as possible through the string, until we
772 	 find a plausible place to start matching.  This may be done
773 	 with varying efficiency, so there are various possibilities:
774 	 only the most common of them are specialized, in order to
775 	 save on code size.  We use a switch statement for speed.  */
776       switch (match_kind)
777 	{
778 	case 8:
779 	  /* No fastmap.  */
780 	  break;
781 
782 	case 7:
783 	  /* Fastmap with single-byte translation, match forward.  */
784 	  while (BE (match_first < right_lim, 1)
785 		 && !fastmap[t[(unsigned char) string[match_first]]])
786 	    ++match_first;
787 	  goto forward_match_found_start_or_reached_end;
788 
789 	case 6:
790 	  /* Fastmap without translation, match forward.  */
791 	  while (BE (match_first < right_lim, 1)
792 		 && !fastmap[(unsigned char) string[match_first]])
793 	    ++match_first;
794 
795 	forward_match_found_start_or_reached_end:
796 	  if (BE (match_first == right_lim, 0))
797 	    {
798 	      ch = match_first >= length
799 		       ? 0 : (unsigned char) string[match_first];
800 	      if (!fastmap[t ? t[ch] : ch])
801 		goto free_return;
802 	    }
803 	  break;
804 
805 	case 4:
806 	case 5:
807 	  /* Fastmap without multi-byte translation, match backwards.  */
808 	  while (match_first >= left_lim)
809 	    {
810 	      ch = match_first >= length
811 		       ? 0 : (unsigned char) string[match_first];
812 	      if (fastmap[t ? t[ch] : ch])
813 		break;
814 	      --match_first;
815 	    }
816 	  if (match_first < left_lim)
817 	    goto free_return;
818 	  break;
819 
820 	default:
821 	  /* In this case, we can't determine easily the current byte,
822 	     since it might be a component byte of a multibyte
823 	     character.  Then we use the constructed buffer instead.  */
824 	  for (;;)
825 	    {
826 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
827 		 buffers.  */
828 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
829 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
830 		{
831 		  err = re_string_reconstruct (&mctx.input, match_first,
832 					       eflags);
833 		  if (BE (err != REG_NOERROR, 0))
834 		    goto free_return;
835 
836 		  offset = match_first - mctx.input.raw_mbs_idx;
837 		}
838 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
839 		 Note that MATCH_FIRST must not be smaller than 0.  */
840 	      ch = (match_first >= length
841 		    ? 0 : re_string_byte_at (&mctx.input, offset));
842 	      if (fastmap[ch])
843 		break;
844 	      match_first += incr;
845 	      if (match_first < left_lim || match_first > right_lim)
846 		{
847 		  err = REG_NOMATCH;
848 		  goto free_return;
849 		}
850 	    }
851 	  break;
852 	}
853 
854       /* Reconstruct the buffers so that the matcher can assume that
855 	 the matching starts from the beginning of the buffer.  */
856       err = re_string_reconstruct (&mctx.input, match_first, eflags);
857       if (BE (err != REG_NOERROR, 0))
858 	goto free_return;
859 
860 #ifdef RE_ENABLE_I18N
861      /* Don't consider this char as a possible match start if it part,
862 	yet isn't the head, of a multibyte character.  */
863       if (!sb && !re_string_first_byte (&mctx.input, 0))
864 	continue;
865 #endif
866 
867       /* It seems to be appropriate one, then use the matcher.  */
868       /* We assume that the matching starts from 0.  */
869       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
870       match_last = check_matching (&mctx, fl_longest_match,
871 				   start <= last_start ? &match_first : NULL);
872       if (match_last != REG_MISSING)
873 	{
874 	  if (BE (match_last == REG_ERROR, 0))
875 	    {
876 	      err = REG_ESPACE;
877 	      goto free_return;
878 	    }
879 	  else
880 	    {
881 	      mctx.match_last = match_last;
882 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
883 		{
884 		  re_dfastate_t *pstate = mctx.state_log[match_last];
885 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
886 							     match_last);
887 		}
888 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
889 		  || dfa->nbackref)
890 		{
891 		  err = prune_impossible_nodes (&mctx);
892 		  if (err == REG_NOERROR)
893 		    break;
894 		  if (BE (err != REG_NOMATCH, 0))
895 		    goto free_return;
896 		  match_last = REG_MISSING;
897 		}
898 	      else
899 		break; /* We found a match.  */
900 	    }
901 	}
902 
903       match_ctx_clean (&mctx);
904     }
905 
906 #ifdef DEBUG
907   assert (match_last != REG_MISSING);
908   assert (err == REG_NOERROR);
909 #endif
910 
911   /* Set pmatch[] if we need.  */
912   if (nmatch > 0)
913     {
914       Idx reg_idx;
915 
916       /* Initialize registers.  */
917       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
918 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
919 
920       /* Set the points where matching start/end.  */
921       pmatch[0].rm_so = 0;
922       pmatch[0].rm_eo = mctx.match_last;
923       /* FIXME: This function should fail if mctx.match_last exceeds
924 	 the maximum possible regoff_t value.  We need a new error
925 	 code REG_OVERFLOW.  */
926 
927       if (!preg->no_sub && nmatch > 1)
928 	{
929 	  err = set_regs (preg, &mctx, nmatch, pmatch,
930 			  dfa->has_plural_match && dfa->nbackref > 0);
931 	  if (BE (err != REG_NOERROR, 0))
932 	    goto free_return;
933 	}
934 
935       /* At last, add the offset to the each registers, since we slided
936 	 the buffers so that we could assume that the matching starts
937 	 from 0.  */
938       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
939 	if (pmatch[reg_idx].rm_so != -1)
940 	  {
941 #ifdef RE_ENABLE_I18N
942 	    if (BE (mctx.input.offsets_needed != 0, 0))
943 	      {
944 		pmatch[reg_idx].rm_so =
945 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
946 		   ? mctx.input.valid_raw_len
947 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
948 		pmatch[reg_idx].rm_eo =
949 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
950 		   ? mctx.input.valid_raw_len
951 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
952 	      }
953 #else
954 	    assert (mctx.input.offsets_needed == 0);
955 #endif
956 	    pmatch[reg_idx].rm_so += match_first;
957 	    pmatch[reg_idx].rm_eo += match_first;
958 	  }
959       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
960 	{
961 	  pmatch[nmatch + reg_idx].rm_so = -1;
962 	  pmatch[nmatch + reg_idx].rm_eo = -1;
963 	}
964 
965       if (dfa->subexp_map)
966 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
967 	  if (dfa->subexp_map[reg_idx] != reg_idx)
968 	    {
969 	      pmatch[reg_idx + 1].rm_so
970 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
971 	      pmatch[reg_idx + 1].rm_eo
972 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
973 	    }
974     }
975 
976  free_return:
977   re_free (mctx.state_log);
978   if (dfa->nbackref)
979     match_ctx_free (&mctx);
980   re_string_destruct (&mctx.input);
981   return err;
982 }
983 
984 static reg_errcode_t
985 internal_function __attribute_warn_unused_result__
986 prune_impossible_nodes (re_match_context_t *mctx)
987 {
988   const re_dfa_t *const dfa = mctx->dfa;
989   Idx halt_node, match_last;
990   reg_errcode_t ret;
991   re_dfastate_t **sifted_states;
992   re_dfastate_t **lim_states = NULL;
993   re_sift_context_t sctx;
994 #ifdef DEBUG
995   assert (mctx->state_log != NULL);
996 #endif
997   match_last = mctx->match_last;
998   halt_node = mctx->last_node;
999 
1000   /* Avoid overflow.  */
1001   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
1002     return REG_ESPACE;
1003 
1004   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
1005   if (BE (sifted_states == NULL, 0))
1006     {
1007       ret = REG_ESPACE;
1008       goto free_return;
1009     }
1010   if (dfa->nbackref)
1011     {
1012       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1013       if (BE (lim_states == NULL, 0))
1014 	{
1015 	  ret = REG_ESPACE;
1016 	  goto free_return;
1017 	}
1018       while (1)
1019 	{
1020 	  memset (lim_states, '\0',
1021 		  sizeof (re_dfastate_t *) * (match_last + 1));
1022 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1023 			 match_last);
1024 	  ret = sift_states_backward (mctx, &sctx);
1025 	  re_node_set_free (&sctx.limits);
1026 	  if (BE (ret != REG_NOERROR, 0))
1027 	      goto free_return;
1028 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1029 	    break;
1030 	  do
1031 	    {
1032 	      --match_last;
1033 	      if (! REG_VALID_INDEX (match_last))
1034 		{
1035 		  ret = REG_NOMATCH;
1036 		  goto free_return;
1037 		}
1038 	    } while (mctx->state_log[match_last] == NULL
1039 		     || !mctx->state_log[match_last]->halt);
1040 	  halt_node = check_halt_state_context (mctx,
1041 						mctx->state_log[match_last],
1042 						match_last);
1043 	}
1044       ret = merge_state_array (dfa, sifted_states, lim_states,
1045 			       match_last + 1);
1046       re_free (lim_states);
1047       lim_states = NULL;
1048       if (BE (ret != REG_NOERROR, 0))
1049 	goto free_return;
1050     }
1051   else
1052     {
1053       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1054       ret = sift_states_backward (mctx, &sctx);
1055       re_node_set_free (&sctx.limits);
1056       if (BE (ret != REG_NOERROR, 0))
1057 	goto free_return;
1058       if (sifted_states[0] == NULL)
1059 	{
1060 	  ret = REG_NOMATCH;
1061 	  goto free_return;
1062 	}
1063     }
1064   re_free (mctx->state_log);
1065   mctx->state_log = sifted_states;
1066   sifted_states = NULL;
1067   mctx->last_node = halt_node;
1068   mctx->match_last = match_last;
1069   ret = REG_NOERROR;
1070  free_return:
1071   re_free (sifted_states);
1072   re_free (lim_states);
1073   return ret;
1074 }
1075 
1076 /* Acquire an initial state and return it.
1077    We must select appropriate initial state depending on the context,
1078    since initial states may have constraints like "\<", "^", etc..  */
1079 
1080 static inline re_dfastate_t *
1081 __attribute ((always_inline)) internal_function
1082 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1083 			    Idx idx)
1084 {
1085   const re_dfa_t *const dfa = mctx->dfa;
1086   if (dfa->init_state->has_constraint)
1087     {
1088       unsigned int context;
1089       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1090       if (IS_WORD_CONTEXT (context))
1091 	return dfa->init_state_word;
1092       else if (IS_ORDINARY_CONTEXT (context))
1093 	return dfa->init_state;
1094       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1095 	return dfa->init_state_begbuf;
1096       else if (IS_NEWLINE_CONTEXT (context))
1097 	return dfa->init_state_nl;
1098       else if (IS_BEGBUF_CONTEXT (context))
1099 	{
1100 	  /* It is relatively rare case, then calculate on demand.  */
1101 	  return re_acquire_state_context (err, dfa,
1102 					   dfa->init_state->entrance_nodes,
1103 					   context);
1104 	}
1105       else
1106 	/* Must not happen?  */
1107 	return dfa->init_state;
1108     }
1109   else
1110     return dfa->init_state;
1111 }
1112 
1113 /* Check whether the regular expression match input string INPUT or not,
1114    and return the index where the matching end.  Return REG_MISSING if
1115    there is no match, and return REG_ERROR in case of an error.
1116    FL_LONGEST_MATCH means we want the POSIX longest matching.
1117    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1118    next place where we may want to try matching.
1119    Note that the matcher assume that the maching starts from the current
1120    index of the buffer.  */
1121 
1122 static Idx
1123 internal_function __attribute_warn_unused_result__
1124 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1125 		Idx *p_match_first)
1126 {
1127   const re_dfa_t *const dfa = mctx->dfa;
1128   reg_errcode_t err;
1129   Idx match = 0;
1130   Idx match_last = REG_MISSING;
1131   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1132   re_dfastate_t *cur_state;
1133   bool at_init_state = p_match_first != NULL;
1134   Idx next_start_idx = cur_str_idx;
1135 
1136   err = REG_NOERROR;
1137   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1138   /* An initial state must not be NULL (invalid).  */
1139   if (BE (cur_state == NULL, 0))
1140     {
1141       assert (err == REG_ESPACE);
1142       return REG_ERROR;
1143     }
1144 
1145   if (mctx->state_log != NULL)
1146     {
1147       mctx->state_log[cur_str_idx] = cur_state;
1148 
1149       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1150 	 later.  E.g. Processing back references.  */
1151       if (BE (dfa->nbackref, 0))
1152 	{
1153 	  at_init_state = false;
1154 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1155 	  if (BE (err != REG_NOERROR, 0))
1156 	    return err;
1157 
1158 	  if (cur_state->has_backref)
1159 	    {
1160 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1161 	      if (BE (err != REG_NOERROR, 0))
1162 		return err;
1163 	    }
1164 	}
1165     }
1166 
1167   /* If the RE accepts NULL string.  */
1168   if (BE (cur_state->halt, 0))
1169     {
1170       if (!cur_state->has_constraint
1171 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1172 	{
1173 	  if (!fl_longest_match)
1174 	    return cur_str_idx;
1175 	  else
1176 	    {
1177 	      match_last = cur_str_idx;
1178 	      match = 1;
1179 	    }
1180 	}
1181     }
1182 
1183   while (!re_string_eoi (&mctx->input))
1184     {
1185       re_dfastate_t *old_state = cur_state;
1186       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1187 
1188       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1189 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1190 	      && mctx->input.valid_len < mctx->input.len))
1191 	{
1192 	  err = extend_buffers (mctx);
1193 	  if (BE (err != REG_NOERROR, 0))
1194 	    {
1195 	      assert (err == REG_ESPACE);
1196 	      return REG_ERROR;
1197 	    }
1198 	}
1199 
1200       cur_state = transit_state (&err, mctx, cur_state);
1201       if (mctx->state_log != NULL)
1202 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1203 
1204       if (cur_state == NULL)
1205 	{
1206 	  /* Reached the invalid state or an error.  Try to recover a valid
1207 	     state using the state log, if available and if we have not
1208 	     already found a valid (even if not the longest) match.  */
1209 	  if (BE (err != REG_NOERROR, 0))
1210 	    return REG_ERROR;
1211 
1212 	  if (mctx->state_log == NULL
1213 	      || (match && !fl_longest_match)
1214 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1215 	    break;
1216 	}
1217 
1218       if (BE (at_init_state, 0))
1219 	{
1220 	  if (old_state == cur_state)
1221 	    next_start_idx = next_char_idx;
1222 	  else
1223 	    at_init_state = false;
1224 	}
1225 
1226       if (cur_state->halt)
1227 	{
1228 	  /* Reached a halt state.
1229 	     Check the halt state can satisfy the current context.  */
1230 	  if (!cur_state->has_constraint
1231 	      || check_halt_state_context (mctx, cur_state,
1232 					   re_string_cur_idx (&mctx->input)))
1233 	    {
1234 	      /* We found an appropriate halt state.  */
1235 	      match_last = re_string_cur_idx (&mctx->input);
1236 	      match = 1;
1237 
1238 	      /* We found a match, do not modify match_first below.  */
1239 	      p_match_first = NULL;
1240 	      if (!fl_longest_match)
1241 		break;
1242 	    }
1243 	}
1244     }
1245 
1246   if (p_match_first)
1247     *p_match_first += next_start_idx;
1248 
1249   return match_last;
1250 }
1251 
1252 /* Check NODE match the current context.  */
1253 
1254 static bool
1255 internal_function
1256 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1257 {
1258   re_token_type_t type = dfa->nodes[node].type;
1259   unsigned int constraint = dfa->nodes[node].constraint;
1260   if (type != END_OF_RE)
1261     return false;
1262   if (!constraint)
1263     return true;
1264   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1265     return false;
1266   return true;
1267 }
1268 
1269 /* Check the halt state STATE match the current context.
1270    Return 0 if not match, if the node, STATE has, is a halt node and
1271    match the context, return the node.  */
1272 
1273 static Idx
1274 internal_function
1275 check_halt_state_context (const re_match_context_t *mctx,
1276 			  const re_dfastate_t *state, Idx idx)
1277 {
1278   Idx i;
1279   unsigned int context;
1280 #ifdef DEBUG
1281   assert (state->halt);
1282 #endif
1283   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1284   for (i = 0; i < state->nodes.nelem; ++i)
1285     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1286       return state->nodes.elems[i];
1287   return 0;
1288 }
1289 
1290 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1291    corresponding to the DFA).
1292    Return the destination node, and update EPS_VIA_NODES;
1293    return REG_MISSING in case of errors.  */
1294 
1295 static Idx
1296 internal_function
1297 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1298 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1299 		   struct re_fail_stack_t *fs)
1300 {
1301   const re_dfa_t *const dfa = mctx->dfa;
1302   Idx i;
1303   bool ok;
1304   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1305     {
1306       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1307       re_node_set *edests = &dfa->edests[node];
1308       Idx dest_node;
1309       ok = re_node_set_insert (eps_via_nodes, node);
1310       if (BE (! ok, 0))
1311 	return REG_ERROR;
1312       /* Pick up a valid destination, or return REG_MISSING if none
1313 	 is found.  */
1314       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1315 	{
1316 	  Idx candidate = edests->elems[i];
1317 	  if (!re_node_set_contains (cur_nodes, candidate))
1318 	    continue;
1319           if (dest_node == REG_MISSING)
1320 	    dest_node = candidate;
1321 
1322 	  else
1323 	    {
1324 	      /* In order to avoid infinite loop like "(a*)*", return the second
1325 		 epsilon-transition if the first was already considered.  */
1326 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1327 		return candidate;
1328 
1329 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1330 	      else if (fs != NULL
1331 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1332 					   eps_via_nodes))
1333 		return REG_ERROR;
1334 
1335 	      /* We know we are going to exit.  */
1336 	      break;
1337 	    }
1338 	}
1339       return dest_node;
1340     }
1341   else
1342     {
1343       Idx naccepted = 0;
1344       re_token_type_t type = dfa->nodes[node].type;
1345 
1346 #ifdef RE_ENABLE_I18N
1347       if (dfa->nodes[node].accept_mb)
1348 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1349       else
1350 #endif /* RE_ENABLE_I18N */
1351       if (type == OP_BACK_REF)
1352 	{
1353 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1354 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1355 	  if (fs != NULL)
1356 	    {
1357 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1358 		return REG_MISSING;
1359 	      else if (naccepted)
1360 		{
1361 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1362 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1363 			      naccepted) != 0)
1364 		    return REG_MISSING;
1365 		}
1366 	    }
1367 
1368 	  if (naccepted == 0)
1369 	    {
1370 	      Idx dest_node;
1371 	      ok = re_node_set_insert (eps_via_nodes, node);
1372 	      if (BE (! ok, 0))
1373 		return REG_ERROR;
1374 	      dest_node = dfa->edests[node].elems[0];
1375 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1376 					dest_node))
1377 		return dest_node;
1378 	    }
1379 	}
1380 
1381       if (naccepted != 0
1382 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1383 	{
1384 	  Idx dest_node = dfa->nexts[node];
1385 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1386 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1387 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1388 					       dest_node)))
1389 	    return REG_MISSING;
1390 	  re_node_set_empty (eps_via_nodes);
1391 	  return dest_node;
1392 	}
1393     }
1394   return REG_MISSING;
1395 }
1396 
1397 static reg_errcode_t
1398 internal_function __attribute_warn_unused_result__
1399 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1400 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1401 {
1402   reg_errcode_t err;
1403   Idx num = fs->num++;
1404   if (fs->num == fs->alloc)
1405     {
1406       struct re_fail_stack_ent_t *new_array;
1407       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1408 				       * fs->alloc * 2));
1409       if (new_array == NULL)
1410 	return REG_ESPACE;
1411       fs->alloc *= 2;
1412       fs->stack = new_array;
1413     }
1414   fs->stack[num].idx = str_idx;
1415   fs->stack[num].node = dest_node;
1416   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1417   if (fs->stack[num].regs == NULL)
1418     return REG_ESPACE;
1419   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1420   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1421   return err;
1422 }
1423 
1424 static Idx
1425 internal_function
1426 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1427 		regmatch_t *regs, re_node_set *eps_via_nodes)
1428 {
1429   Idx num = --fs->num;
1430   assert (REG_VALID_INDEX (num));
1431   *pidx = fs->stack[num].idx;
1432   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1433   re_node_set_free (eps_via_nodes);
1434   re_free (fs->stack[num].regs);
1435   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1436   return fs->stack[num].node;
1437 }
1438 
1439 /* Set the positions where the subexpressions are starts/ends to registers
1440    PMATCH.
1441    Note: We assume that pmatch[0] is already set, and
1442    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1443 
1444 static reg_errcode_t
1445 internal_function __attribute_warn_unused_result__
1446 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1447 	  regmatch_t *pmatch, bool fl_backtrack)
1448 {
1449   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1450   Idx idx, cur_node;
1451   re_node_set eps_via_nodes;
1452   struct re_fail_stack_t *fs;
1453   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1454   regmatch_t *prev_idx_match;
1455   bool prev_idx_match_malloced = false;
1456 
1457 #ifdef DEBUG
1458   assert (nmatch > 1);
1459   assert (mctx->state_log != NULL);
1460 #endif
1461   if (fl_backtrack)
1462     {
1463       fs = &fs_body;
1464       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1465       if (fs->stack == NULL)
1466 	return REG_ESPACE;
1467     }
1468   else
1469     fs = NULL;
1470 
1471   cur_node = dfa->init_node;
1472   re_node_set_init_empty (&eps_via_nodes);
1473 
1474   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1475     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1476   else
1477     {
1478       prev_idx_match = re_malloc (regmatch_t, nmatch);
1479       if (prev_idx_match == NULL)
1480 	{
1481 	  free_fail_stack_return (fs);
1482 	  return REG_ESPACE;
1483 	}
1484       prev_idx_match_malloced = true;
1485     }
1486   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1487 
1488   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1489     {
1490       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1491 
1492       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1493 	{
1494 	  Idx reg_idx;
1495 	  if (fs)
1496 	    {
1497 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1498 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1499 		  break;
1500 	      if (reg_idx == nmatch)
1501 		{
1502 		  re_node_set_free (&eps_via_nodes);
1503 		  if (prev_idx_match_malloced)
1504 		    re_free (prev_idx_match);
1505 		  return free_fail_stack_return (fs);
1506 		}
1507 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1508 					 &eps_via_nodes);
1509 	    }
1510 	  else
1511 	    {
1512 	      re_node_set_free (&eps_via_nodes);
1513 	      if (prev_idx_match_malloced)
1514 		re_free (prev_idx_match);
1515 	      return REG_NOERROR;
1516 	    }
1517 	}
1518 
1519       /* Proceed to next node.  */
1520       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1521 				    &eps_via_nodes, fs);
1522 
1523       if (BE (! REG_VALID_INDEX (cur_node), 0))
1524 	{
1525 	  if (BE (cur_node == REG_ERROR, 0))
1526 	    {
1527 	      re_node_set_free (&eps_via_nodes);
1528 	      if (prev_idx_match_malloced)
1529 		re_free (prev_idx_match);
1530 	      free_fail_stack_return (fs);
1531 	      return REG_ESPACE;
1532 	    }
1533 	  if (fs)
1534 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1535 				       &eps_via_nodes);
1536 	  else
1537 	    {
1538 	      re_node_set_free (&eps_via_nodes);
1539 	      if (prev_idx_match_malloced)
1540 		re_free (prev_idx_match);
1541 	      return REG_NOMATCH;
1542 	    }
1543 	}
1544     }
1545   re_node_set_free (&eps_via_nodes);
1546   if (prev_idx_match_malloced)
1547     re_free (prev_idx_match);
1548   return free_fail_stack_return (fs);
1549 }
1550 
1551 static reg_errcode_t
1552 internal_function
1553 free_fail_stack_return (struct re_fail_stack_t *fs)
1554 {
1555   if (fs)
1556     {
1557       Idx fs_idx;
1558       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1559 	{
1560 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1561 	  re_free (fs->stack[fs_idx].regs);
1562 	}
1563       re_free (fs->stack);
1564     }
1565   return REG_NOERROR;
1566 }
1567 
1568 static void
1569 internal_function
1570 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1571 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1572 {
1573   int type = dfa->nodes[cur_node].type;
1574   if (type == OP_OPEN_SUBEXP)
1575     {
1576       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1577 
1578       /* We are at the first node of this sub expression.  */
1579       if (reg_num < nmatch)
1580 	{
1581 	  pmatch[reg_num].rm_so = cur_idx;
1582 	  pmatch[reg_num].rm_eo = -1;
1583 	}
1584     }
1585   else if (type == OP_CLOSE_SUBEXP)
1586     {
1587       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1588       if (reg_num < nmatch)
1589 	{
1590 	  /* We are at the last node of this sub expression.  */
1591 	  if (pmatch[reg_num].rm_so < cur_idx)
1592 	    {
1593 	      pmatch[reg_num].rm_eo = cur_idx;
1594 	      /* This is a non-empty match or we are not inside an optional
1595 		 subexpression.  Accept this right away.  */
1596 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1597 	    }
1598 	  else
1599 	    {
1600 	      if (dfa->nodes[cur_node].opt_subexp
1601 		  && prev_idx_match[reg_num].rm_so != -1)
1602 		/* We transited through an empty match for an optional
1603 		   subexpression, like (a?)*, and this is not the subexp's
1604 		   first match.  Copy back the old content of the registers
1605 		   so that matches of an inner subexpression are undone as
1606 		   well, like in ((a?))*.  */
1607 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1608 	      else
1609 		/* We completed a subexpression, but it may be part of
1610 		   an optional one, so do not update PREV_IDX_MATCH.  */
1611 		pmatch[reg_num].rm_eo = cur_idx;
1612 	    }
1613 	}
1614     }
1615 }
1616 
1617 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1618    and sift the nodes in each states according to the following rules.
1619    Updated state_log will be wrote to STATE_LOG.
1620 
1621    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1622      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1623 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1624 	the LAST_NODE, we throw away the node `a'.
1625      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1626 	string `s' and transit to `b':
1627 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1628 	   away the node `a'.
1629 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1630 	    thrown away, we throw away the node `a'.
1631      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1632 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1633 	   node `a'.
1634 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1635 	    we throw away the node `a'.  */
1636 
1637 #define STATE_NODE_CONTAINS(state,node) \
1638   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1639 
1640 static reg_errcode_t
1641 internal_function
1642 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1643 {
1644   reg_errcode_t err;
1645   int null_cnt = 0;
1646   Idx str_idx = sctx->last_str_idx;
1647   re_node_set cur_dest;
1648 
1649 #ifdef DEBUG
1650   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1651 #endif
1652 
1653   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1654      transit to the last_node and the last_node itself.  */
1655   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1656   if (BE (err != REG_NOERROR, 0))
1657     return err;
1658   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1659   if (BE (err != REG_NOERROR, 0))
1660     goto free_return;
1661 
1662   /* Then check each states in the state_log.  */
1663   while (str_idx > 0)
1664     {
1665       /* Update counters.  */
1666       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1667       if (null_cnt > mctx->max_mb_elem_len)
1668 	{
1669 	  memset (sctx->sifted_states, '\0',
1670 		  sizeof (re_dfastate_t *) * str_idx);
1671 	  re_node_set_free (&cur_dest);
1672 	  return REG_NOERROR;
1673 	}
1674       re_node_set_empty (&cur_dest);
1675       --str_idx;
1676 
1677       if (mctx->state_log[str_idx])
1678 	{
1679 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1680 	  if (BE (err != REG_NOERROR, 0))
1681 	    goto free_return;
1682 	}
1683 
1684       /* Add all the nodes which satisfy the following conditions:
1685 	 - It can epsilon transit to a node in CUR_DEST.
1686 	 - It is in CUR_SRC.
1687 	 And update state_log.  */
1688       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1689       if (BE (err != REG_NOERROR, 0))
1690 	goto free_return;
1691     }
1692   err = REG_NOERROR;
1693  free_return:
1694   re_node_set_free (&cur_dest);
1695   return err;
1696 }
1697 
1698 static reg_errcode_t
1699 internal_function __attribute_warn_unused_result__
1700 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1701 		     Idx str_idx, re_node_set *cur_dest)
1702 {
1703   const re_dfa_t *const dfa = mctx->dfa;
1704   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1705   Idx i;
1706 
1707   /* Then build the next sifted state.
1708      We build the next sifted state on `cur_dest', and update
1709      `sifted_states[str_idx]' with `cur_dest'.
1710      Note:
1711      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1712      `cur_src' points the node_set of the old `state_log[str_idx]'
1713      (with the epsilon nodes pre-filtered out).  */
1714   for (i = 0; i < cur_src->nelem; i++)
1715     {
1716       Idx prev_node = cur_src->elems[i];
1717       int naccepted = 0;
1718       bool ok;
1719 
1720 #ifdef DEBUG
1721       re_token_type_t type = dfa->nodes[prev_node].type;
1722       assert (!IS_EPSILON_NODE (type));
1723 #endif
1724 #ifdef RE_ENABLE_I18N
1725       /* If the node may accept `multi byte'.  */
1726       if (dfa->nodes[prev_node].accept_mb)
1727 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1728 					 str_idx, sctx->last_str_idx);
1729 #endif /* RE_ENABLE_I18N */
1730 
1731       /* We don't check backreferences here.
1732 	 See update_cur_sifted_state().  */
1733       if (!naccepted
1734 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1735 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1736 				  dfa->nexts[prev_node]))
1737 	naccepted = 1;
1738 
1739       if (naccepted == 0)
1740 	continue;
1741 
1742       if (sctx->limits.nelem)
1743 	{
1744 	  Idx to_idx = str_idx + naccepted;
1745 	  if (check_dst_limits (mctx, &sctx->limits,
1746 				dfa->nexts[prev_node], to_idx,
1747 				prev_node, str_idx))
1748 	    continue;
1749 	}
1750       ok = re_node_set_insert (cur_dest, prev_node);
1751       if (BE (! ok, 0))
1752 	return REG_ESPACE;
1753     }
1754 
1755   return REG_NOERROR;
1756 }
1757 
1758 /* Helper functions.  */
1759 
1760 static reg_errcode_t
1761 internal_function
1762 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1763 {
1764   Idx top = mctx->state_log_top;
1765 
1766   if (next_state_log_idx >= mctx->input.bufs_len
1767       || (next_state_log_idx >= mctx->input.valid_len
1768 	  && mctx->input.valid_len < mctx->input.len))
1769     {
1770       reg_errcode_t err;
1771       err = extend_buffers (mctx);
1772       if (BE (err != REG_NOERROR, 0))
1773 	return err;
1774     }
1775 
1776   if (top < next_state_log_idx)
1777     {
1778       memset (mctx->state_log + top + 1, '\0',
1779 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1780       mctx->state_log_top = next_state_log_idx;
1781     }
1782   return REG_NOERROR;
1783 }
1784 
1785 static reg_errcode_t
1786 internal_function
1787 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1788 		   re_dfastate_t **src, Idx num)
1789 {
1790   Idx st_idx;
1791   reg_errcode_t err;
1792   for (st_idx = 0; st_idx < num; ++st_idx)
1793     {
1794       if (dst[st_idx] == NULL)
1795 	dst[st_idx] = src[st_idx];
1796       else if (src[st_idx] != NULL)
1797 	{
1798 	  re_node_set merged_set;
1799 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1800 					&src[st_idx]->nodes);
1801 	  if (BE (err != REG_NOERROR, 0))
1802 	    return err;
1803 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1804 	  re_node_set_free (&merged_set);
1805 	  if (BE (err != REG_NOERROR, 0))
1806 	    return err;
1807 	}
1808     }
1809   return REG_NOERROR;
1810 }
1811 
1812 static reg_errcode_t
1813 internal_function
1814 update_cur_sifted_state (const re_match_context_t *mctx,
1815 			 re_sift_context_t *sctx, Idx str_idx,
1816 			 re_node_set *dest_nodes)
1817 {
1818   const re_dfa_t *const dfa = mctx->dfa;
1819   reg_errcode_t err = REG_NOERROR;
1820   const re_node_set *candidates;
1821   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1822 		: &mctx->state_log[str_idx]->nodes);
1823 
1824   if (dest_nodes->nelem == 0)
1825     sctx->sifted_states[str_idx] = NULL;
1826   else
1827     {
1828       if (candidates)
1829 	{
1830 	  /* At first, add the nodes which can epsilon transit to a node in
1831 	     DEST_NODE.  */
1832 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1833 	  if (BE (err != REG_NOERROR, 0))
1834 	    return err;
1835 
1836 	  /* Then, check the limitations in the current sift_context.  */
1837 	  if (sctx->limits.nelem)
1838 	    {
1839 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1840 					 mctx->bkref_ents, str_idx);
1841 	      if (BE (err != REG_NOERROR, 0))
1842 		return err;
1843 	    }
1844 	}
1845 
1846       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1847       if (BE (err != REG_NOERROR, 0))
1848 	return err;
1849     }
1850 
1851   if (candidates && mctx->state_log[str_idx]->has_backref)
1852     {
1853       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1854       if (BE (err != REG_NOERROR, 0))
1855 	return err;
1856     }
1857   return REG_NOERROR;
1858 }
1859 
1860 static reg_errcode_t
1861 internal_function __attribute_warn_unused_result__
1862 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1863 		       const re_node_set *candidates)
1864 {
1865   reg_errcode_t err = REG_NOERROR;
1866   Idx i;
1867 
1868   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1869   if (BE (err != REG_NOERROR, 0))
1870     return err;
1871 
1872   if (!state->inveclosure.alloc)
1873     {
1874       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1875       if (BE (err != REG_NOERROR, 0))
1876 	return REG_ESPACE;
1877       for (i = 0; i < dest_nodes->nelem; i++)
1878 	{
1879 	  err = re_node_set_merge (&state->inveclosure,
1880 				   dfa->inveclosures + dest_nodes->elems[i]);
1881 	  if (BE (err != REG_NOERROR, 0))
1882 	    return REG_ESPACE;
1883 	}
1884     }
1885   return re_node_set_add_intersect (dest_nodes, candidates,
1886 				    &state->inveclosure);
1887 }
1888 
1889 static reg_errcode_t
1890 internal_function
1891 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1892 		       const re_node_set *candidates)
1893 {
1894     Idx ecl_idx;
1895     reg_errcode_t err;
1896     re_node_set *inv_eclosure = dfa->inveclosures + node;
1897     re_node_set except_nodes;
1898     re_node_set_init_empty (&except_nodes);
1899     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1900       {
1901 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1902 	if (cur_node == node)
1903 	  continue;
1904 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1905 	  {
1906 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1907 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1908 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1909 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1910 		 && re_node_set_contains (dest_nodes, edst1))
1911 		|| (REG_VALID_NONZERO_INDEX (edst2)
1912 		    && !re_node_set_contains (inv_eclosure, edst2)
1913 		    && re_node_set_contains (dest_nodes, edst2)))
1914 	      {
1915 		err = re_node_set_add_intersect (&except_nodes, candidates,
1916 						 dfa->inveclosures + cur_node);
1917 		if (BE (err != REG_NOERROR, 0))
1918 		  {
1919 		    re_node_set_free (&except_nodes);
1920 		    return err;
1921 		  }
1922 	      }
1923 	  }
1924       }
1925     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1926       {
1927 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1928 	if (!re_node_set_contains (&except_nodes, cur_node))
1929 	  {
1930 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1931 	    re_node_set_remove_at (dest_nodes, idx);
1932 	  }
1933       }
1934     re_node_set_free (&except_nodes);
1935     return REG_NOERROR;
1936 }
1937 
1938 static bool
1939 internal_function
1940 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1941 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1942 {
1943   const re_dfa_t *const dfa = mctx->dfa;
1944   Idx lim_idx, src_pos, dst_pos;
1945 
1946   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1947   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1948   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1949     {
1950       Idx subexp_idx;
1951       struct re_backref_cache_entry *ent;
1952       ent = mctx->bkref_ents + limits->elems[lim_idx];
1953       subexp_idx = dfa->nodes[ent->node].opr.idx;
1954 
1955       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1956 					   subexp_idx, dst_node, dst_idx,
1957 					   dst_bkref_idx);
1958       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1959 					   subexp_idx, src_node, src_idx,
1960 					   src_bkref_idx);
1961 
1962       /* In case of:
1963 	 <src> <dst> ( <subexp> )
1964 	 ( <subexp> ) <src> <dst>
1965 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1966       if (src_pos == dst_pos)
1967 	continue; /* This is unrelated limitation.  */
1968       else
1969 	return true;
1970     }
1971   return false;
1972 }
1973 
1974 static int
1975 internal_function
1976 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1977 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1978 {
1979   const re_dfa_t *const dfa = mctx->dfa;
1980   const re_node_set *eclosures = dfa->eclosures + from_node;
1981   Idx node_idx;
1982 
1983   /* Else, we are on the boundary: examine the nodes on the epsilon
1984      closure.  */
1985   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1986     {
1987       Idx node = eclosures->elems[node_idx];
1988       switch (dfa->nodes[node].type)
1989 	{
1990 	case OP_BACK_REF:
1991 	  if (bkref_idx != REG_MISSING)
1992 	    {
1993 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1994 	      do
1995 		{
1996 		  Idx dst;
1997 		  int cpos;
1998 
1999 		  if (ent->node != node)
2000 		    continue;
2001 
2002 		  if (subexp_idx < BITSET_WORD_BITS
2003 		      && !(ent->eps_reachable_subexps_map
2004 			   & ((bitset_word_t) 1 << subexp_idx)))
2005 		    continue;
2006 
2007 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
2008 		     OP_CLOSE_SUBEXP cases below.  But, if the
2009 		     destination node is the same node as the source
2010 		     node, don't recurse because it would cause an
2011 		     infinite loop: a regex that exhibits this behavior
2012 		     is ()\1*\1*  */
2013 		  dst = dfa->edests[node].elems[0];
2014 		  if (dst == from_node)
2015 		    {
2016 		      if (boundaries & 1)
2017 			return -1;
2018 		      else /* if (boundaries & 2) */
2019 			return 0;
2020 		    }
2021 
2022 		  cpos =
2023 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2024 						 dst, bkref_idx);
2025 		  if (cpos == -1 /* && (boundaries & 1) */)
2026 		    return -1;
2027 		  if (cpos == 0 && (boundaries & 2))
2028 		    return 0;
2029 
2030 		  if (subexp_idx < BITSET_WORD_BITS)
2031 		    ent->eps_reachable_subexps_map
2032 		      &= ~((bitset_word_t) 1 << subexp_idx);
2033 		}
2034 	      while (ent++->more);
2035 	    }
2036 	  break;
2037 
2038 	case OP_OPEN_SUBEXP:
2039 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2040 	    return -1;
2041 	  break;
2042 
2043 	case OP_CLOSE_SUBEXP:
2044 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2045 	    return 0;
2046 	  break;
2047 
2048 	default:
2049 	    break;
2050 	}
2051     }
2052 
2053   return (boundaries & 2) ? 1 : 0;
2054 }
2055 
2056 static int
2057 internal_function
2058 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2059 			   Idx subexp_idx, Idx from_node, Idx str_idx,
2060 			   Idx bkref_idx)
2061 {
2062   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2063   int boundaries;
2064 
2065   /* If we are outside the range of the subexpression, return -1 or 1.  */
2066   if (str_idx < lim->subexp_from)
2067     return -1;
2068 
2069   if (lim->subexp_to < str_idx)
2070     return 1;
2071 
2072   /* If we are within the subexpression, return 0.  */
2073   boundaries = (str_idx == lim->subexp_from);
2074   boundaries |= (str_idx == lim->subexp_to) << 1;
2075   if (boundaries == 0)
2076     return 0;
2077 
2078   /* Else, examine epsilon closure.  */
2079   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2080 				      from_node, bkref_idx);
2081 }
2082 
2083 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2084    which are against limitations from DEST_NODES. */
2085 
2086 static reg_errcode_t
2087 internal_function
2088 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2089 		     const re_node_set *candidates, re_node_set *limits,
2090 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2091 {
2092   reg_errcode_t err;
2093   Idx node_idx, lim_idx;
2094 
2095   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2096     {
2097       Idx subexp_idx;
2098       struct re_backref_cache_entry *ent;
2099       ent = bkref_ents + limits->elems[lim_idx];
2100 
2101       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2102 	continue; /* This is unrelated limitation.  */
2103 
2104       subexp_idx = dfa->nodes[ent->node].opr.idx;
2105       if (ent->subexp_to == str_idx)
2106 	{
2107 	  Idx ops_node = REG_MISSING;
2108 	  Idx cls_node = REG_MISSING;
2109 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2110 	    {
2111 	      Idx node = dest_nodes->elems[node_idx];
2112 	      re_token_type_t type = dfa->nodes[node].type;
2113 	      if (type == OP_OPEN_SUBEXP
2114 		  && subexp_idx == dfa->nodes[node].opr.idx)
2115 		ops_node = node;
2116 	      else if (type == OP_CLOSE_SUBEXP
2117 		       && subexp_idx == dfa->nodes[node].opr.idx)
2118 		cls_node = node;
2119 	    }
2120 
2121 	  /* Check the limitation of the open subexpression.  */
2122 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2123 	  if (REG_VALID_INDEX (ops_node))
2124 	    {
2125 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2126 					   candidates);
2127 	      if (BE (err != REG_NOERROR, 0))
2128 		return err;
2129 	    }
2130 
2131 	  /* Check the limitation of the close subexpression.  */
2132 	  if (REG_VALID_INDEX (cls_node))
2133 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2134 	      {
2135 		Idx node = dest_nodes->elems[node_idx];
2136 		if (!re_node_set_contains (dfa->inveclosures + node,
2137 					   cls_node)
2138 		    && !re_node_set_contains (dfa->eclosures + node,
2139 					      cls_node))
2140 		  {
2141 		    /* It is against this limitation.
2142 		       Remove it form the current sifted state.  */
2143 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2144 						 candidates);
2145 		    if (BE (err != REG_NOERROR, 0))
2146 		      return err;
2147 		    --node_idx;
2148 		  }
2149 	      }
2150 	}
2151       else /* (ent->subexp_to != str_idx)  */
2152 	{
2153 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2154 	    {
2155 	      Idx node = dest_nodes->elems[node_idx];
2156 	      re_token_type_t type = dfa->nodes[node].type;
2157 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2158 		{
2159 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2160 		    continue;
2161 		  /* It is against this limitation.
2162 		     Remove it form the current sifted state.  */
2163 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2164 					       candidates);
2165 		  if (BE (err != REG_NOERROR, 0))
2166 		    return err;
2167 		}
2168 	    }
2169 	}
2170     }
2171   return REG_NOERROR;
2172 }
2173 
2174 static reg_errcode_t
2175 internal_function __attribute_warn_unused_result__
2176 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2177 		   Idx str_idx, const re_node_set *candidates)
2178 {
2179   const re_dfa_t *const dfa = mctx->dfa;
2180   reg_errcode_t err;
2181   Idx node_idx, node;
2182   re_sift_context_t local_sctx;
2183   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2184 
2185   if (first_idx == REG_MISSING)
2186     return REG_NOERROR;
2187 
2188   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2189 
2190   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2191     {
2192       Idx enabled_idx;
2193       re_token_type_t type;
2194       struct re_backref_cache_entry *entry;
2195       node = candidates->elems[node_idx];
2196       type = dfa->nodes[node].type;
2197       /* Avoid infinite loop for the REs like "()\1+".  */
2198       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2199 	continue;
2200       if (type != OP_BACK_REF)
2201 	continue;
2202 
2203       entry = mctx->bkref_ents + first_idx;
2204       enabled_idx = first_idx;
2205       do
2206 	{
2207 	  Idx subexp_len;
2208 	  Idx to_idx;
2209 	  Idx dst_node;
2210 	  bool ok;
2211 	  re_dfastate_t *cur_state;
2212 
2213 	  if (entry->node != node)
2214 	    continue;
2215 	  subexp_len = entry->subexp_to - entry->subexp_from;
2216 	  to_idx = str_idx + subexp_len;
2217 	  dst_node = (subexp_len ? dfa->nexts[node]
2218 		      : dfa->edests[node].elems[0]);
2219 
2220 	  if (to_idx > sctx->last_str_idx
2221 	      || sctx->sifted_states[to_idx] == NULL
2222 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2223 	      || check_dst_limits (mctx, &sctx->limits, node,
2224 				   str_idx, dst_node, to_idx))
2225 	    continue;
2226 
2227 	  if (local_sctx.sifted_states == NULL)
2228 	    {
2229 	      local_sctx = *sctx;
2230 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2231 	      if (BE (err != REG_NOERROR, 0))
2232 		goto free_return;
2233 	    }
2234 	  local_sctx.last_node = node;
2235 	  local_sctx.last_str_idx = str_idx;
2236 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2237 	  if (BE (! ok, 0))
2238 	    {
2239 	      err = REG_ESPACE;
2240 	      goto free_return;
2241 	    }
2242 	  cur_state = local_sctx.sifted_states[str_idx];
2243 	  err = sift_states_backward (mctx, &local_sctx);
2244 	  if (BE (err != REG_NOERROR, 0))
2245 	    goto free_return;
2246 	  if (sctx->limited_states != NULL)
2247 	    {
2248 	      err = merge_state_array (dfa, sctx->limited_states,
2249 				       local_sctx.sifted_states,
2250 				       str_idx + 1);
2251 	      if (BE (err != REG_NOERROR, 0))
2252 		goto free_return;
2253 	    }
2254 	  local_sctx.sifted_states[str_idx] = cur_state;
2255 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2256 
2257 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2258 	  entry = mctx->bkref_ents + enabled_idx;
2259 	}
2260       while (enabled_idx++, entry++->more);
2261     }
2262   err = REG_NOERROR;
2263  free_return:
2264   if (local_sctx.sifted_states != NULL)
2265     {
2266       re_node_set_free (&local_sctx.limits);
2267     }
2268 
2269   return err;
2270 }
2271 
2272 
2273 #ifdef RE_ENABLE_I18N
2274 static int
2275 internal_function
2276 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2277 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2278 {
2279   const re_dfa_t *const dfa = mctx->dfa;
2280   int naccepted;
2281   /* Check the node can accept `multi byte'.  */
2282   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2283   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2284       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2285 			    dfa->nexts[node_idx]))
2286     /* The node can't accept the `multi byte', or the
2287        destination was already thrown away, then the node
2288        could't accept the current input `multi byte'.   */
2289     naccepted = 0;
2290   /* Otherwise, it is sure that the node could accept
2291      `naccepted' bytes input.  */
2292   return naccepted;
2293 }
2294 #endif /* RE_ENABLE_I18N */
2295 
2296 
2297 /* Functions for state transition.  */
2298 
2299 /* Return the next state to which the current state STATE will transit by
2300    accepting the current input byte, and update STATE_LOG if necessary.
2301    If STATE can accept a multibyte char/collating element/back reference
2302    update the destination of STATE_LOG.  */
2303 
2304 static re_dfastate_t *
2305 internal_function __attribute_warn_unused_result__
2306 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2307 	       re_dfastate_t *state)
2308 {
2309   re_dfastate_t **trtable;
2310   unsigned char ch;
2311 
2312 #ifdef RE_ENABLE_I18N
2313   /* If the current state can accept multibyte.  */
2314   if (BE (state->accept_mb, 0))
2315     {
2316       *err = transit_state_mb (mctx, state);
2317       if (BE (*err != REG_NOERROR, 0))
2318 	return NULL;
2319     }
2320 #endif /* RE_ENABLE_I18N */
2321 
2322   /* Then decide the next state with the single byte.  */
2323 #if 0
2324   if (0)
2325     /* don't use transition table  */
2326     return transit_state_sb (err, mctx, state);
2327 #endif
2328 
2329   /* Use transition table  */
2330   ch = re_string_fetch_byte (&mctx->input);
2331   for (;;)
2332     {
2333       trtable = state->trtable;
2334       if (BE (trtable != NULL, 1))
2335 	return trtable[ch];
2336 
2337       trtable = state->word_trtable;
2338       if (BE (trtable != NULL, 1))
2339 	{
2340 	  unsigned int context;
2341 	  context
2342 	    = re_string_context_at (&mctx->input,
2343 				    re_string_cur_idx (&mctx->input) - 1,
2344 				    mctx->eflags);
2345 	  if (IS_WORD_CONTEXT (context))
2346 	    return trtable[ch + SBC_MAX];
2347 	  else
2348 	    return trtable[ch];
2349 	}
2350 
2351       if (!build_trtable (mctx->dfa, state))
2352 	{
2353 	  *err = REG_ESPACE;
2354 	  return NULL;
2355 	}
2356 
2357       /* Retry, we now have a transition table.  */
2358     }
2359 }
2360 
2361 /* Update the state_log if we need */
2362 static re_dfastate_t *
2363 internal_function
2364 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2365 		      re_dfastate_t *next_state)
2366 {
2367   const re_dfa_t *const dfa = mctx->dfa;
2368   Idx cur_idx = re_string_cur_idx (&mctx->input);
2369 
2370   if (cur_idx > mctx->state_log_top)
2371     {
2372       mctx->state_log[cur_idx] = next_state;
2373       mctx->state_log_top = cur_idx;
2374     }
2375   else if (mctx->state_log[cur_idx] == 0)
2376     {
2377       mctx->state_log[cur_idx] = next_state;
2378     }
2379   else
2380     {
2381       re_dfastate_t *pstate;
2382       unsigned int context;
2383       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2384       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2385 	 the destination of a multibyte char/collating element/
2386 	 back reference.  Then the next state is the union set of
2387 	 these destinations and the results of the transition table.  */
2388       pstate = mctx->state_log[cur_idx];
2389       log_nodes = pstate->entrance_nodes;
2390       if (next_state != NULL)
2391 	{
2392 	  table_nodes = next_state->entrance_nodes;
2393 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2394 					     log_nodes);
2395 	  if (BE (*err != REG_NOERROR, 0))
2396 	    return NULL;
2397 	}
2398       else
2399 	next_nodes = *log_nodes;
2400       /* Note: We already add the nodes of the initial state,
2401 	 then we don't need to add them here.  */
2402 
2403       context = re_string_context_at (&mctx->input,
2404 				      re_string_cur_idx (&mctx->input) - 1,
2405 				      mctx->eflags);
2406       next_state = mctx->state_log[cur_idx]
2407 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2408       /* We don't need to check errors here, since the return value of
2409 	 this function is next_state and ERR is already set.  */
2410 
2411       if (table_nodes != NULL)
2412 	re_node_set_free (&next_nodes);
2413     }
2414 
2415   if (BE (dfa->nbackref, 0) && next_state != NULL)
2416     {
2417       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2418 	 later.  We must check them here, since the back references in the
2419 	 next state might use them.  */
2420       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2421 					cur_idx);
2422       if (BE (*err != REG_NOERROR, 0))
2423 	return NULL;
2424 
2425       /* If the next state has back references.  */
2426       if (next_state->has_backref)
2427 	{
2428 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2429 	  if (BE (*err != REG_NOERROR, 0))
2430 	    return NULL;
2431 	  next_state = mctx->state_log[cur_idx];
2432 	}
2433     }
2434 
2435   return next_state;
2436 }
2437 
2438 /* Skip bytes in the input that correspond to part of a
2439    multi-byte match, then look in the log for a state
2440    from which to restart matching.  */
2441 static re_dfastate_t *
2442 internal_function
2443 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2444 {
2445   re_dfastate_t *cur_state;
2446   do
2447     {
2448       Idx max = mctx->state_log_top;
2449       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2450 
2451       do
2452 	{
2453 	  if (++cur_str_idx > max)
2454 	    return NULL;
2455 	  re_string_skip_bytes (&mctx->input, 1);
2456 	}
2457       while (mctx->state_log[cur_str_idx] == NULL);
2458 
2459       cur_state = merge_state_with_log (err, mctx, NULL);
2460     }
2461   while (*err == REG_NOERROR && cur_state == NULL);
2462   return cur_state;
2463 }
2464 
2465 /* Helper functions for transit_state.  */
2466 
2467 /* From the node set CUR_NODES, pick up the nodes whose types are
2468    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2469    expression. And register them to use them later for evaluating the
2470    correspoding back references.  */
2471 
2472 static reg_errcode_t
2473 internal_function
2474 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2475 			   Idx str_idx)
2476 {
2477   const re_dfa_t *const dfa = mctx->dfa;
2478   Idx node_idx;
2479   reg_errcode_t err;
2480 
2481   /* TODO: This isn't efficient.
2482 	   Because there might be more than one nodes whose types are
2483 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2484 	   nodes.
2485 	   E.g. RE: (a){2}  */
2486   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2487     {
2488       Idx node = cur_nodes->elems[node_idx];
2489       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2490 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2491 	  && (dfa->used_bkref_map
2492 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2493 	{
2494 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2495 	  if (BE (err != REG_NOERROR, 0))
2496 	    return err;
2497 	}
2498     }
2499   return REG_NOERROR;
2500 }
2501 
2502 #if 0
2503 /* Return the next state to which the current state STATE will transit by
2504    accepting the current input byte.  */
2505 
2506 static re_dfastate_t *
2507 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2508 		  re_dfastate_t *state)
2509 {
2510   const re_dfa_t *const dfa = mctx->dfa;
2511   re_node_set next_nodes;
2512   re_dfastate_t *next_state;
2513   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2514   unsigned int context;
2515 
2516   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2517   if (BE (*err != REG_NOERROR, 0))
2518     return NULL;
2519   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2520     {
2521       Idx cur_node = state->nodes.elems[node_cnt];
2522       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2523 	{
2524 	  *err = re_node_set_merge (&next_nodes,
2525 				    dfa->eclosures + dfa->nexts[cur_node]);
2526 	  if (BE (*err != REG_NOERROR, 0))
2527 	    {
2528 	      re_node_set_free (&next_nodes);
2529 	      return NULL;
2530 	    }
2531 	}
2532     }
2533   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2534   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2535   /* We don't need to check errors here, since the return value of
2536      this function is next_state and ERR is already set.  */
2537 
2538   re_node_set_free (&next_nodes);
2539   re_string_skip_bytes (&mctx->input, 1);
2540   return next_state;
2541 }
2542 #endif
2543 
2544 #ifdef RE_ENABLE_I18N
2545 static reg_errcode_t
2546 internal_function
2547 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2548 {
2549   const re_dfa_t *const dfa = mctx->dfa;
2550   reg_errcode_t err;
2551   Idx i;
2552 
2553   for (i = 0; i < pstate->nodes.nelem; ++i)
2554     {
2555       re_node_set dest_nodes, *new_nodes;
2556       Idx cur_node_idx = pstate->nodes.elems[i];
2557       int naccepted;
2558       Idx dest_idx;
2559       unsigned int context;
2560       re_dfastate_t *dest_state;
2561 
2562       if (!dfa->nodes[cur_node_idx].accept_mb)
2563 	continue;
2564 
2565       if (dfa->nodes[cur_node_idx].constraint)
2566 	{
2567 	  context = re_string_context_at (&mctx->input,
2568 					  re_string_cur_idx (&mctx->input),
2569 					  mctx->eflags);
2570 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2571 					   context))
2572 	    continue;
2573 	}
2574 
2575       /* How many bytes the node can accept?  */
2576       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2577 					   re_string_cur_idx (&mctx->input));
2578       if (naccepted == 0)
2579 	continue;
2580 
2581       /* The node can accepts `naccepted' bytes.  */
2582       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2583       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2584 			       : mctx->max_mb_elem_len);
2585       err = clean_state_log_if_needed (mctx, dest_idx);
2586       if (BE (err != REG_NOERROR, 0))
2587 	return err;
2588 #ifdef DEBUG
2589       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2590 #endif
2591       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2592 
2593       dest_state = mctx->state_log[dest_idx];
2594       if (dest_state == NULL)
2595 	dest_nodes = *new_nodes;
2596       else
2597 	{
2598 	  err = re_node_set_init_union (&dest_nodes,
2599 					dest_state->entrance_nodes, new_nodes);
2600 	  if (BE (err != REG_NOERROR, 0))
2601 	    return err;
2602 	}
2603       context = re_string_context_at (&mctx->input, dest_idx - 1,
2604 				      mctx->eflags);
2605       mctx->state_log[dest_idx]
2606 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2607       if (dest_state != NULL)
2608 	re_node_set_free (&dest_nodes);
2609       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2610 	return err;
2611     }
2612   return REG_NOERROR;
2613 }
2614 #endif /* RE_ENABLE_I18N */
2615 
2616 static reg_errcode_t
2617 internal_function
2618 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2619 {
2620   const re_dfa_t *const dfa = mctx->dfa;
2621   reg_errcode_t err;
2622   Idx i;
2623   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2624 
2625   for (i = 0; i < nodes->nelem; ++i)
2626     {
2627       Idx dest_str_idx, prev_nelem, bkc_idx;
2628       Idx node_idx = nodes->elems[i];
2629       unsigned int context;
2630       const re_token_t *node = dfa->nodes + node_idx;
2631       re_node_set *new_dest_nodes;
2632 
2633       /* Check whether `node' is a backreference or not.  */
2634       if (node->type != OP_BACK_REF)
2635 	continue;
2636 
2637       if (node->constraint)
2638 	{
2639 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2640 					  mctx->eflags);
2641 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2642 	    continue;
2643 	}
2644 
2645       /* `node' is a backreference.
2646 	 Check the substring which the substring matched.  */
2647       bkc_idx = mctx->nbkref_ents;
2648       err = get_subexp (mctx, node_idx, cur_str_idx);
2649       if (BE (err != REG_NOERROR, 0))
2650 	goto free_return;
2651 
2652       /* And add the epsilon closures (which is `new_dest_nodes') of
2653 	 the backreference to appropriate state_log.  */
2654 #ifdef DEBUG
2655       assert (dfa->nexts[node_idx] != REG_MISSING);
2656 #endif
2657       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2658 	{
2659 	  Idx subexp_len;
2660 	  re_dfastate_t *dest_state;
2661 	  struct re_backref_cache_entry *bkref_ent;
2662 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2663 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2664 	    continue;
2665 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2666 	  new_dest_nodes = (subexp_len == 0
2667 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2668 			    : dfa->eclosures + dfa->nexts[node_idx]);
2669 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2670 			  - bkref_ent->subexp_from);
2671 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2672 					  mctx->eflags);
2673 	  dest_state = mctx->state_log[dest_str_idx];
2674 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2675 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2676 	  /* Add `new_dest_node' to state_log.  */
2677 	  if (dest_state == NULL)
2678 	    {
2679 	      mctx->state_log[dest_str_idx]
2680 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2681 					    context);
2682 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2683 		      && err != REG_NOERROR, 0))
2684 		goto free_return;
2685 	    }
2686 	  else
2687 	    {
2688 	      re_node_set dest_nodes;
2689 	      err = re_node_set_init_union (&dest_nodes,
2690 					    dest_state->entrance_nodes,
2691 					    new_dest_nodes);
2692 	      if (BE (err != REG_NOERROR, 0))
2693 		{
2694 		  re_node_set_free (&dest_nodes);
2695 		  goto free_return;
2696 		}
2697 	      mctx->state_log[dest_str_idx]
2698 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2699 	      re_node_set_free (&dest_nodes);
2700 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2701 		      && err != REG_NOERROR, 0))
2702 		goto free_return;
2703 	    }
2704 	  /* We need to check recursively if the backreference can epsilon
2705 	     transit.  */
2706 	  if (subexp_len == 0
2707 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2708 	    {
2709 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2710 					       cur_str_idx);
2711 	      if (BE (err != REG_NOERROR, 0))
2712 		goto free_return;
2713 	      err = transit_state_bkref (mctx, new_dest_nodes);
2714 	      if (BE (err != REG_NOERROR, 0))
2715 		goto free_return;
2716 	    }
2717 	}
2718     }
2719   err = REG_NOERROR;
2720  free_return:
2721   return err;
2722 }
2723 
2724 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2725    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2726    Note that we might collect inappropriate candidates here.
2727    However, the cost of checking them strictly here is too high, then we
2728    delay these checking for prune_impossible_nodes().  */
2729 
2730 static reg_errcode_t
2731 internal_function __attribute_warn_unused_result__
2732 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2733 {
2734   const re_dfa_t *const dfa = mctx->dfa;
2735   Idx subexp_num, sub_top_idx;
2736   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2737   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2738   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2739   if (cache_idx != REG_MISSING)
2740     {
2741       const struct re_backref_cache_entry *entry
2742 	= mctx->bkref_ents + cache_idx;
2743       do
2744 	if (entry->node == bkref_node)
2745 	  return REG_NOERROR; /* We already checked it.  */
2746       while (entry++->more);
2747     }
2748 
2749   subexp_num = dfa->nodes[bkref_node].opr.idx;
2750 
2751   /* For each sub expression  */
2752   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2753     {
2754       reg_errcode_t err;
2755       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2756       re_sub_match_last_t *sub_last;
2757       Idx sub_last_idx, sl_str, bkref_str_off;
2758 
2759       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2760 	continue; /* It isn't related.  */
2761 
2762       sl_str = sub_top->str_idx;
2763       bkref_str_off = bkref_str_idx;
2764       /* At first, check the last node of sub expressions we already
2765 	 evaluated.  */
2766       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2767 	{
2768 	  regoff_t sl_str_diff;
2769 	  sub_last = sub_top->lasts[sub_last_idx];
2770 	  sl_str_diff = sub_last->str_idx - sl_str;
2771 	  /* The matched string by the sub expression match with the substring
2772 	     at the back reference?  */
2773 	  if (sl_str_diff > 0)
2774 	    {
2775 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2776 		{
2777 		  /* Not enough chars for a successful match.  */
2778 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2779 		    break;
2780 
2781 		  err = clean_state_log_if_needed (mctx,
2782 						   bkref_str_off
2783 						   + sl_str_diff);
2784 		  if (BE (err != REG_NOERROR, 0))
2785 		    return err;
2786 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2787 		}
2788 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2789 		/* We don't need to search this sub expression any more.  */
2790 		break;
2791 	    }
2792 	  bkref_str_off += sl_str_diff;
2793 	  sl_str += sl_str_diff;
2794 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2795 				bkref_str_idx);
2796 
2797 	  /* Reload buf, since the preceding call might have reallocated
2798 	     the buffer.  */
2799 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2800 
2801 	  if (err == REG_NOMATCH)
2802 	    continue;
2803 	  if (BE (err != REG_NOERROR, 0))
2804 	    return err;
2805 	}
2806 
2807       if (sub_last_idx < sub_top->nlasts)
2808 	continue;
2809       if (sub_last_idx > 0)
2810 	++sl_str;
2811       /* Then, search for the other last nodes of the sub expression.  */
2812       for (; sl_str <= bkref_str_idx; ++sl_str)
2813 	{
2814 	  Idx cls_node;
2815 	  regoff_t sl_str_off;
2816 	  const re_node_set *nodes;
2817 	  sl_str_off = sl_str - sub_top->str_idx;
2818 	  /* The matched string by the sub expression match with the substring
2819 	     at the back reference?  */
2820 	  if (sl_str_off > 0)
2821 	    {
2822 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2823 		{
2824 		  /* If we are at the end of the input, we cannot match.  */
2825 		  if (bkref_str_off >= mctx->input.len)
2826 		    break;
2827 
2828 		  err = extend_buffers (mctx);
2829 		  if (BE (err != REG_NOERROR, 0))
2830 		    return err;
2831 
2832 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2833 		}
2834 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2835 		break; /* We don't need to search this sub expression
2836 			  any more.  */
2837 	    }
2838 	  if (mctx->state_log[sl_str] == NULL)
2839 	    continue;
2840 	  /* Does this state have a ')' of the sub expression?  */
2841 	  nodes = &mctx->state_log[sl_str]->nodes;
2842 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2843 				       OP_CLOSE_SUBEXP);
2844 	  if (cls_node == REG_MISSING)
2845 	    continue; /* No.  */
2846 	  if (sub_top->path == NULL)
2847 	    {
2848 	      sub_top->path = calloc (sizeof (state_array_t),
2849 				      sl_str - sub_top->str_idx + 1);
2850 	      if (sub_top->path == NULL)
2851 		return REG_ESPACE;
2852 	    }
2853 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2854 	     in the current context?  */
2855 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2856 			       sub_top->str_idx, cls_node, sl_str,
2857 			       OP_CLOSE_SUBEXP);
2858 	  if (err == REG_NOMATCH)
2859 	      continue;
2860 	  if (BE (err != REG_NOERROR, 0))
2861 	      return err;
2862 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2863 	  if (BE (sub_last == NULL, 0))
2864 	    return REG_ESPACE;
2865 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2866 				bkref_str_idx);
2867 	  if (err == REG_NOMATCH)
2868 	    continue;
2869 	}
2870     }
2871   return REG_NOERROR;
2872 }
2873 
2874 /* Helper functions for get_subexp().  */
2875 
2876 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2877    If it can arrive, register the sub expression expressed with SUB_TOP
2878    and SUB_LAST.  */
2879 
2880 static reg_errcode_t
2881 internal_function
2882 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2883 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2884 {
2885   reg_errcode_t err;
2886   Idx to_idx;
2887   /* Can the subexpression arrive the back reference?  */
2888   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2889 		       sub_last->str_idx, bkref_node, bkref_str,
2890 		       OP_OPEN_SUBEXP);
2891   if (err != REG_NOERROR)
2892     return err;
2893   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2894 			     sub_last->str_idx);
2895   if (BE (err != REG_NOERROR, 0))
2896     return err;
2897   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2898   return clean_state_log_if_needed (mctx, to_idx);
2899 }
2900 
2901 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2902    Search '(' if FL_OPEN, or search ')' otherwise.
2903    TODO: This function isn't efficient...
2904 	 Because there might be more than one nodes whose types are
2905 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2906 	 nodes.
2907 	 E.g. RE: (a){2}  */
2908 
2909 static Idx
2910 internal_function
2911 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2912 		  Idx subexp_idx, int type)
2913 {
2914   Idx cls_idx;
2915   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2916     {
2917       Idx cls_node = nodes->elems[cls_idx];
2918       const re_token_t *node = dfa->nodes + cls_node;
2919       if (node->type == type
2920 	  && node->opr.idx == subexp_idx)
2921 	return cls_node;
2922     }
2923   return REG_MISSING;
2924 }
2925 
2926 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2927    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2928    heavily reused.
2929    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2930 
2931 static reg_errcode_t
2932 internal_function __attribute_warn_unused_result__
2933 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2934 	       Idx top_str, Idx last_node, Idx last_str, int type)
2935 {
2936   const re_dfa_t *const dfa = mctx->dfa;
2937   reg_errcode_t err = REG_NOERROR;
2938   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2939   re_dfastate_t *cur_state = NULL;
2940   re_node_set *cur_nodes, next_nodes;
2941   re_dfastate_t **backup_state_log;
2942   unsigned int context;
2943 
2944   subexp_num = dfa->nodes[top_node].opr.idx;
2945   /* Extend the buffer if we need.  */
2946   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2947     {
2948       re_dfastate_t **new_array;
2949       Idx old_alloc = path->alloc;
2950       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2951       if (BE (new_alloc < old_alloc, 0)
2952 	  || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2953 	return REG_ESPACE;
2954       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2955       if (BE (new_array == NULL, 0))
2956 	return REG_ESPACE;
2957       path->array = new_array;
2958       path->alloc = new_alloc;
2959       memset (new_array + old_alloc, '\0',
2960 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2961     }
2962 
2963   str_idx = path->next_idx ? path->next_idx : top_str;
2964 
2965   /* Temporary modify MCTX.  */
2966   backup_state_log = mctx->state_log;
2967   backup_cur_idx = mctx->input.cur_idx;
2968   mctx->state_log = path->array;
2969   mctx->input.cur_idx = str_idx;
2970 
2971   /* Setup initial node set.  */
2972   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2973   if (str_idx == top_str)
2974     {
2975       err = re_node_set_init_1 (&next_nodes, top_node);
2976       if (BE (err != REG_NOERROR, 0))
2977 	return err;
2978       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2979       if (BE (err != REG_NOERROR, 0))
2980 	{
2981 	  re_node_set_free (&next_nodes);
2982 	  return err;
2983 	}
2984     }
2985   else
2986     {
2987       cur_state = mctx->state_log[str_idx];
2988       if (cur_state && cur_state->has_backref)
2989 	{
2990 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2991 	  if (BE (err != REG_NOERROR, 0))
2992 	    return err;
2993 	}
2994       else
2995 	re_node_set_init_empty (&next_nodes);
2996     }
2997   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2998     {
2999       if (next_nodes.nelem)
3000 	{
3001 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3002 				    subexp_num, type);
3003 	  if (BE (err != REG_NOERROR, 0))
3004 	    {
3005 	      re_node_set_free (&next_nodes);
3006 	      return err;
3007 	    }
3008 	}
3009       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3010       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3011 	{
3012 	  re_node_set_free (&next_nodes);
3013 	  return err;
3014 	}
3015       mctx->state_log[str_idx] = cur_state;
3016     }
3017 
3018   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3019     {
3020       re_node_set_empty (&next_nodes);
3021       if (mctx->state_log[str_idx + 1])
3022 	{
3023 	  err = re_node_set_merge (&next_nodes,
3024 				   &mctx->state_log[str_idx + 1]->nodes);
3025 	  if (BE (err != REG_NOERROR, 0))
3026 	    {
3027 	      re_node_set_free (&next_nodes);
3028 	      return err;
3029 	    }
3030 	}
3031       if (cur_state)
3032 	{
3033 	  err = check_arrival_add_next_nodes (mctx, str_idx,
3034 					      &cur_state->non_eps_nodes,
3035 					      &next_nodes);
3036 	  if (BE (err != REG_NOERROR, 0))
3037 	    {
3038 	      re_node_set_free (&next_nodes);
3039 	      return err;
3040 	    }
3041 	}
3042       ++str_idx;
3043       if (next_nodes.nelem)
3044 	{
3045 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3046 	  if (BE (err != REG_NOERROR, 0))
3047 	    {
3048 	      re_node_set_free (&next_nodes);
3049 	      return err;
3050 	    }
3051 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3052 				    subexp_num, type);
3053 	  if (BE (err != REG_NOERROR, 0))
3054 	    {
3055 	      re_node_set_free (&next_nodes);
3056 	      return err;
3057 	    }
3058 	}
3059       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3060       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3061       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3062 	{
3063 	  re_node_set_free (&next_nodes);
3064 	  return err;
3065 	}
3066       mctx->state_log[str_idx] = cur_state;
3067       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3068     }
3069   re_node_set_free (&next_nodes);
3070   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3071 	       : &mctx->state_log[last_str]->nodes);
3072   path->next_idx = str_idx;
3073 
3074   /* Fix MCTX.  */
3075   mctx->state_log = backup_state_log;
3076   mctx->input.cur_idx = backup_cur_idx;
3077 
3078   /* Then check the current node set has the node LAST_NODE.  */
3079   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3080     return REG_NOERROR;
3081 
3082   return REG_NOMATCH;
3083 }
3084 
3085 /* Helper functions for check_arrival.  */
3086 
3087 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3088    to NEXT_NODES.
3089    TODO: This function is similar to the functions transit_state*(),
3090 	 however this function has many additional works.
3091 	 Can't we unify them?  */
3092 
3093 static reg_errcode_t
3094 internal_function __attribute_warn_unused_result__
3095 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3096 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3097 {
3098   const re_dfa_t *const dfa = mctx->dfa;
3099   bool ok;
3100   Idx cur_idx;
3101 #ifdef RE_ENABLE_I18N
3102   reg_errcode_t err = REG_NOERROR;
3103 #endif
3104   re_node_set union_set;
3105   re_node_set_init_empty (&union_set);
3106   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3107     {
3108       int naccepted = 0;
3109       Idx cur_node = cur_nodes->elems[cur_idx];
3110 #ifdef DEBUG
3111       re_token_type_t type = dfa->nodes[cur_node].type;
3112       assert (!IS_EPSILON_NODE (type));
3113 #endif
3114 #ifdef RE_ENABLE_I18N
3115       /* If the node may accept `multi byte'.  */
3116       if (dfa->nodes[cur_node].accept_mb)
3117 	{
3118 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3119 					       str_idx);
3120 	  if (naccepted > 1)
3121 	    {
3122 	      re_dfastate_t *dest_state;
3123 	      Idx next_node = dfa->nexts[cur_node];
3124 	      Idx next_idx = str_idx + naccepted;
3125 	      dest_state = mctx->state_log[next_idx];
3126 	      re_node_set_empty (&union_set);
3127 	      if (dest_state)
3128 		{
3129 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3130 		  if (BE (err != REG_NOERROR, 0))
3131 		    {
3132 		      re_node_set_free (&union_set);
3133 		      return err;
3134 		    }
3135 		}
3136 	      ok = re_node_set_insert (&union_set, next_node);
3137 	      if (BE (! ok, 0))
3138 		{
3139 		  re_node_set_free (&union_set);
3140 		  return REG_ESPACE;
3141 		}
3142 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3143 							    &union_set);
3144 	      if (BE (mctx->state_log[next_idx] == NULL
3145 		      && err != REG_NOERROR, 0))
3146 		{
3147 		  re_node_set_free (&union_set);
3148 		  return err;
3149 		}
3150 	    }
3151 	}
3152 #endif /* RE_ENABLE_I18N */
3153       if (naccepted
3154 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3155 	{
3156 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3157 	  if (BE (! ok, 0))
3158 	    {
3159 	      re_node_set_free (&union_set);
3160 	      return REG_ESPACE;
3161 	    }
3162 	}
3163     }
3164   re_node_set_free (&union_set);
3165   return REG_NOERROR;
3166 }
3167 
3168 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3169    CUR_NODES, however exclude the nodes which are:
3170     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3171     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3172 */
3173 
3174 static reg_errcode_t
3175 internal_function
3176 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3177 			  Idx ex_subexp, int type)
3178 {
3179   reg_errcode_t err;
3180   Idx idx, outside_node;
3181   re_node_set new_nodes;
3182 #ifdef DEBUG
3183   assert (cur_nodes->nelem);
3184 #endif
3185   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3186   if (BE (err != REG_NOERROR, 0))
3187     return err;
3188   /* Create a new node set NEW_NODES with the nodes which are epsilon
3189      closures of the node in CUR_NODES.  */
3190 
3191   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3192     {
3193       Idx cur_node = cur_nodes->elems[idx];
3194       const re_node_set *eclosure = dfa->eclosures + cur_node;
3195       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3196       if (outside_node == REG_MISSING)
3197 	{
3198 	  /* There are no problematic nodes, just merge them.  */
3199 	  err = re_node_set_merge (&new_nodes, eclosure);
3200 	  if (BE (err != REG_NOERROR, 0))
3201 	    {
3202 	      re_node_set_free (&new_nodes);
3203 	      return err;
3204 	    }
3205 	}
3206       else
3207 	{
3208 	  /* There are problematic nodes, re-calculate incrementally.  */
3209 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3210 					      ex_subexp, type);
3211 	  if (BE (err != REG_NOERROR, 0))
3212 	    {
3213 	      re_node_set_free (&new_nodes);
3214 	      return err;
3215 	    }
3216 	}
3217     }
3218   re_node_set_free (cur_nodes);
3219   *cur_nodes = new_nodes;
3220   return REG_NOERROR;
3221 }
3222 
3223 /* Helper function for check_arrival_expand_ecl.
3224    Check incrementally the epsilon closure of TARGET, and if it isn't
3225    problematic append it to DST_NODES.  */
3226 
3227 static reg_errcode_t
3228 internal_function __attribute_warn_unused_result__
3229 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3230 			      Idx target, Idx ex_subexp, int type)
3231 {
3232   Idx cur_node;
3233   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3234     {
3235       bool ok;
3236 
3237       if (dfa->nodes[cur_node].type == type
3238 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3239 	{
3240 	  if (type == OP_CLOSE_SUBEXP)
3241 	    {
3242 	      ok = re_node_set_insert (dst_nodes, cur_node);
3243 	      if (BE (! ok, 0))
3244 		return REG_ESPACE;
3245 	    }
3246 	  break;
3247 	}
3248       ok = re_node_set_insert (dst_nodes, cur_node);
3249       if (BE (! ok, 0))
3250 	return REG_ESPACE;
3251       if (dfa->edests[cur_node].nelem == 0)
3252 	break;
3253       if (dfa->edests[cur_node].nelem == 2)
3254 	{
3255 	  reg_errcode_t err;
3256 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3257 					      dfa->edests[cur_node].elems[1],
3258 					      ex_subexp, type);
3259 	  if (BE (err != REG_NOERROR, 0))
3260 	    return err;
3261 	}
3262       cur_node = dfa->edests[cur_node].elems[0];
3263     }
3264   return REG_NOERROR;
3265 }
3266 
3267 
3268 /* For all the back references in the current state, calculate the
3269    destination of the back references by the appropriate entry
3270    in MCTX->BKREF_ENTS.  */
3271 
3272 static reg_errcode_t
3273 internal_function __attribute_warn_unused_result__
3274 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3275 		    Idx cur_str, Idx subexp_num, int type)
3276 {
3277   const re_dfa_t *const dfa = mctx->dfa;
3278   reg_errcode_t err;
3279   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3280   struct re_backref_cache_entry *ent;
3281 
3282   if (cache_idx_start == REG_MISSING)
3283     return REG_NOERROR;
3284 
3285  restart:
3286   ent = mctx->bkref_ents + cache_idx_start;
3287   do
3288     {
3289       Idx to_idx, next_node;
3290 
3291       /* Is this entry ENT is appropriate?  */
3292       if (!re_node_set_contains (cur_nodes, ent->node))
3293 	continue; /* No.  */
3294 
3295       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3296       /* Calculate the destination of the back reference, and append it
3297 	 to MCTX->STATE_LOG.  */
3298       if (to_idx == cur_str)
3299 	{
3300 	  /* The backreference did epsilon transit, we must re-check all the
3301 	     node in the current state.  */
3302 	  re_node_set new_dests;
3303 	  reg_errcode_t err2, err3;
3304 	  next_node = dfa->edests[ent->node].elems[0];
3305 	  if (re_node_set_contains (cur_nodes, next_node))
3306 	    continue;
3307 	  err = re_node_set_init_1 (&new_dests, next_node);
3308 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3309 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3310 	  re_node_set_free (&new_dests);
3311 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3312 		  || err3 != REG_NOERROR, 0))
3313 	    {
3314 	      err = (err != REG_NOERROR ? err
3315 		     : (err2 != REG_NOERROR ? err2 : err3));
3316 	      return err;
3317 	    }
3318 	  /* TODO: It is still inefficient...  */
3319 	  goto restart;
3320 	}
3321       else
3322 	{
3323 	  re_node_set union_set;
3324 	  next_node = dfa->nexts[ent->node];
3325 	  if (mctx->state_log[to_idx])
3326 	    {
3327 	      bool ok;
3328 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3329 					next_node))
3330 		continue;
3331 	      err = re_node_set_init_copy (&union_set,
3332 					   &mctx->state_log[to_idx]->nodes);
3333 	      ok = re_node_set_insert (&union_set, next_node);
3334 	      if (BE (err != REG_NOERROR || ! ok, 0))
3335 		{
3336 		  re_node_set_free (&union_set);
3337 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3338 		  return err;
3339 		}
3340 	    }
3341 	  else
3342 	    {
3343 	      err = re_node_set_init_1 (&union_set, next_node);
3344 	      if (BE (err != REG_NOERROR, 0))
3345 		return err;
3346 	    }
3347 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3348 	  re_node_set_free (&union_set);
3349 	  if (BE (mctx->state_log[to_idx] == NULL
3350 		  && err != REG_NOERROR, 0))
3351 	    return err;
3352 	}
3353     }
3354   while (ent++->more);
3355   return REG_NOERROR;
3356 }
3357 
3358 /* Build transition table for the state.
3359    Return true if successful.  */
3360 
3361 static bool
3362 internal_function
3363 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3364 {
3365   reg_errcode_t err;
3366   Idx i, j;
3367   int ch;
3368   bool need_word_trtable = false;
3369   bitset_word_t elem, mask;
3370   bool dests_node_malloced = false;
3371   bool dest_states_malloced = false;
3372   Idx ndests; /* Number of the destination states from `state'.  */
3373   re_dfastate_t **trtable;
3374   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3375   re_node_set follows, *dests_node;
3376   bitset_t *dests_ch;
3377   bitset_t acceptable;
3378 
3379   struct dests_alloc
3380   {
3381     re_node_set dests_node[SBC_MAX];
3382     bitset_t dests_ch[SBC_MAX];
3383   } *dests_alloc;
3384 
3385   /* We build DFA states which corresponds to the destination nodes
3386      from `state'.  `dests_node[i]' represents the nodes which i-th
3387      destination state contains, and `dests_ch[i]' represents the
3388      characters which i-th destination state accepts.  */
3389   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3390     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3391   else
3392     {
3393       dests_alloc = re_malloc (struct dests_alloc, 1);
3394       if (BE (dests_alloc == NULL, 0))
3395 	return false;
3396       dests_node_malloced = true;
3397     }
3398   dests_node = dests_alloc->dests_node;
3399   dests_ch = dests_alloc->dests_ch;
3400 
3401   /* Initialize transiton table.  */
3402   state->word_trtable = state->trtable = NULL;
3403 
3404   /* At first, group all nodes belonging to `state' into several
3405      destinations.  */
3406   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3407   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3408     {
3409       if (dests_node_malloced)
3410 	free (dests_alloc);
3411       if (ndests == 0)
3412 	{
3413 	  state->trtable = (re_dfastate_t **)
3414 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3415 	  return true;
3416 	}
3417       return false;
3418     }
3419 
3420   err = re_node_set_alloc (&follows, ndests + 1);
3421   if (BE (err != REG_NOERROR, 0))
3422     goto out_free;
3423 
3424   /* Avoid arithmetic overflow in size calculation.  */
3425   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3426 	    / (3 * sizeof (re_dfastate_t *)))
3427 	   < ndests),
3428 	  0))
3429     goto out_free;
3430 
3431   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3432 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3433     dest_states = (re_dfastate_t **)
3434       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3435   else
3436     {
3437       dest_states = (re_dfastate_t **)
3438 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3439       if (BE (dest_states == NULL, 0))
3440 	{
3441 out_free:
3442 	  if (dest_states_malloced)
3443 	    free (dest_states);
3444 	  re_node_set_free (&follows);
3445 	  for (i = 0; i < ndests; ++i)
3446 	    re_node_set_free (dests_node + i);
3447 	  if (dests_node_malloced)
3448 	    free (dests_alloc);
3449 	  return false;
3450 	}
3451       dest_states_malloced = true;
3452     }
3453   dest_states_word = dest_states + ndests;
3454   dest_states_nl = dest_states_word + ndests;
3455   bitset_empty (acceptable);
3456 
3457   /* Then build the states for all destinations.  */
3458   for (i = 0; i < ndests; ++i)
3459     {
3460       Idx next_node;
3461       re_node_set_empty (&follows);
3462       /* Merge the follows of this destination states.  */
3463       for (j = 0; j < dests_node[i].nelem; ++j)
3464 	{
3465 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3466 	  if (next_node != REG_MISSING)
3467 	    {
3468 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3469 	      if (BE (err != REG_NOERROR, 0))
3470 		goto out_free;
3471 	    }
3472 	}
3473       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3474       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3475 	goto out_free;
3476       /* If the new state has context constraint,
3477 	 build appropriate states for these contexts.  */
3478       if (dest_states[i]->has_constraint)
3479 	{
3480 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3481 							  CONTEXT_WORD);
3482 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3483 	    goto out_free;
3484 
3485 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3486 	    need_word_trtable = true;
3487 
3488 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3489 							CONTEXT_NEWLINE);
3490 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3491 	    goto out_free;
3492 	}
3493       else
3494 	{
3495 	  dest_states_word[i] = dest_states[i];
3496 	  dest_states_nl[i] = dest_states[i];
3497 	}
3498       bitset_merge (acceptable, dests_ch[i]);
3499     }
3500 
3501   if (!BE (need_word_trtable, 0))
3502     {
3503       /* We don't care about whether the following character is a word
3504 	 character, or we are in a single-byte character set so we can
3505 	 discern by looking at the character code: allocate a
3506 	 256-entry transition table.  */
3507       trtable = state->trtable =
3508 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3509       if (BE (trtable == NULL, 0))
3510 	goto out_free;
3511 
3512       /* For all characters ch...:  */
3513       for (i = 0; i < BITSET_WORDS; ++i)
3514 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3515 	     elem;
3516 	     mask <<= 1, elem >>= 1, ++ch)
3517 	  if (BE (elem & 1, 0))
3518 	    {
3519 	      /* There must be exactly one destination which accepts
3520 		 character ch.  See group_nodes_into_DFAstates.  */
3521 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3522 		;
3523 
3524 	      /* j-th destination accepts the word character ch.  */
3525 	      if (dfa->word_char[i] & mask)
3526 		trtable[ch] = dest_states_word[j];
3527 	      else
3528 		trtable[ch] = dest_states[j];
3529 	    }
3530     }
3531   else
3532     {
3533       /* We care about whether the following character is a word
3534 	 character, and we are in a multi-byte character set: discern
3535 	 by looking at the character code: build two 256-entry
3536 	 transition tables, one starting at trtable[0] and one
3537 	 starting at trtable[SBC_MAX].  */
3538       trtable = state->word_trtable =
3539 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3540       if (BE (trtable == NULL, 0))
3541 	goto out_free;
3542 
3543       /* For all characters ch...:  */
3544       for (i = 0; i < BITSET_WORDS; ++i)
3545 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3546 	     elem;
3547 	     mask <<= 1, elem >>= 1, ++ch)
3548 	  if (BE (elem & 1, 0))
3549 	    {
3550 	      /* There must be exactly one destination which accepts
3551 		 character ch.  See group_nodes_into_DFAstates.  */
3552 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3553 		;
3554 
3555 	      /* j-th destination accepts the word character ch.  */
3556 	      trtable[ch] = dest_states[j];
3557 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3558 	    }
3559     }
3560 
3561   /* new line */
3562   if (bitset_contain (acceptable, NEWLINE_CHAR))
3563     {
3564       /* The current state accepts newline character.  */
3565       for (j = 0; j < ndests; ++j)
3566 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3567 	  {
3568 	    /* k-th destination accepts newline character.  */
3569 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3570 	    if (need_word_trtable)
3571 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3572 	    /* There must be only one destination which accepts
3573 	       newline.  See group_nodes_into_DFAstates.  */
3574 	    break;
3575 	  }
3576     }
3577 
3578   if (dest_states_malloced)
3579     free (dest_states);
3580 
3581   re_node_set_free (&follows);
3582   for (i = 0; i < ndests; ++i)
3583     re_node_set_free (dests_node + i);
3584 
3585   if (dests_node_malloced)
3586     free (dests_alloc);
3587 
3588   return true;
3589 }
3590 
3591 /* Group all nodes belonging to STATE into several destinations.
3592    Then for all destinations, set the nodes belonging to the destination
3593    to DESTS_NODE[i] and set the characters accepted by the destination
3594    to DEST_CH[i].  This function return the number of destinations.  */
3595 
3596 static Idx
3597 internal_function
3598 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3599 			    re_node_set *dests_node, bitset_t *dests_ch)
3600 {
3601   reg_errcode_t err;
3602   bool ok;
3603   Idx i, j, k;
3604   Idx ndests; /* Number of the destinations from `state'.  */
3605   bitset_t accepts; /* Characters a node can accept.  */
3606   const re_node_set *cur_nodes = &state->nodes;
3607   bitset_empty (accepts);
3608   ndests = 0;
3609 
3610   /* For all the nodes belonging to `state',  */
3611   for (i = 0; i < cur_nodes->nelem; ++i)
3612     {
3613       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3614       re_token_type_t type = node->type;
3615       unsigned int constraint = node->constraint;
3616 
3617       /* Enumerate all single byte character this node can accept.  */
3618       if (type == CHARACTER)
3619 	bitset_set (accepts, node->opr.c);
3620       else if (type == SIMPLE_BRACKET)
3621 	{
3622 	  bitset_merge (accepts, node->opr.sbcset);
3623 	}
3624       else if (type == OP_PERIOD)
3625 	{
3626 #ifdef RE_ENABLE_I18N
3627 	  if (dfa->mb_cur_max > 1)
3628 	    bitset_merge (accepts, dfa->sb_char);
3629 	  else
3630 #endif
3631 	    bitset_set_all (accepts);
3632 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3633 	    bitset_clear (accepts, '\n');
3634 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3635 	    bitset_clear (accepts, '\0');
3636 	}
3637 #ifdef RE_ENABLE_I18N
3638       else if (type == OP_UTF8_PERIOD)
3639 	{
3640 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3641 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3642 	  else
3643 	    bitset_merge (accepts, utf8_sb_map);
3644 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3645 	    bitset_clear (accepts, '\n');
3646 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3647 	    bitset_clear (accepts, '\0');
3648 	}
3649 #endif
3650       else
3651 	continue;
3652 
3653       /* Check the `accepts' and sift the characters which are not
3654 	 match it the context.  */
3655       if (constraint)
3656 	{
3657 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3658 	    {
3659 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3660 	      bitset_empty (accepts);
3661 	      if (accepts_newline)
3662 		bitset_set (accepts, NEWLINE_CHAR);
3663 	      else
3664 		continue;
3665 	    }
3666 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3667 	    {
3668 	      bitset_empty (accepts);
3669 	      continue;
3670 	    }
3671 
3672 	  if (constraint & NEXT_WORD_CONSTRAINT)
3673 	    {
3674 	      bitset_word_t any_set = 0;
3675 	      if (type == CHARACTER && !node->word_char)
3676 		{
3677 		  bitset_empty (accepts);
3678 		  continue;
3679 		}
3680 #ifdef RE_ENABLE_I18N
3681 	      if (dfa->mb_cur_max > 1)
3682 		for (j = 0; j < BITSET_WORDS; ++j)
3683 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3684 	      else
3685 #endif
3686 		for (j = 0; j < BITSET_WORDS; ++j)
3687 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3688 	      if (!any_set)
3689 		continue;
3690 	    }
3691 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3692 	    {
3693 	      bitset_word_t any_set = 0;
3694 	      if (type == CHARACTER && node->word_char)
3695 		{
3696 		  bitset_empty (accepts);
3697 		  continue;
3698 		}
3699 #ifdef RE_ENABLE_I18N
3700 	      if (dfa->mb_cur_max > 1)
3701 		for (j = 0; j < BITSET_WORDS; ++j)
3702 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3703 	      else
3704 #endif
3705 		for (j = 0; j < BITSET_WORDS; ++j)
3706 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3707 	      if (!any_set)
3708 		continue;
3709 	    }
3710 	}
3711 
3712       /* Then divide `accepts' into DFA states, or create a new
3713 	 state.  Above, we make sure that accepts is not empty.  */
3714       for (j = 0; j < ndests; ++j)
3715 	{
3716 	  bitset_t intersec; /* Intersection sets, see below.  */
3717 	  bitset_t remains;
3718 	  /* Flags, see below.  */
3719 	  bitset_word_t has_intersec, not_subset, not_consumed;
3720 
3721 	  /* Optimization, skip if this state doesn't accept the character.  */
3722 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3723 	    continue;
3724 
3725 	  /* Enumerate the intersection set of this state and `accepts'.  */
3726 	  has_intersec = 0;
3727 	  for (k = 0; k < BITSET_WORDS; ++k)
3728 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3729 	  /* And skip if the intersection set is empty.  */
3730 	  if (!has_intersec)
3731 	    continue;
3732 
3733 	  /* Then check if this state is a subset of `accepts'.  */
3734 	  not_subset = not_consumed = 0;
3735 	  for (k = 0; k < BITSET_WORDS; ++k)
3736 	    {
3737 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3738 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3739 	    }
3740 
3741 	  /* If this state isn't a subset of `accepts', create a
3742 	     new group state, which has the `remains'. */
3743 	  if (not_subset)
3744 	    {
3745 	      bitset_copy (dests_ch[ndests], remains);
3746 	      bitset_copy (dests_ch[j], intersec);
3747 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3748 	      if (BE (err != REG_NOERROR, 0))
3749 		goto error_return;
3750 	      ++ndests;
3751 	    }
3752 
3753 	  /* Put the position in the current group. */
3754 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3755 	  if (BE (! ok, 0))
3756 	    goto error_return;
3757 
3758 	  /* If all characters are consumed, go to next node. */
3759 	  if (!not_consumed)
3760 	    break;
3761 	}
3762       /* Some characters remain, create a new group. */
3763       if (j == ndests)
3764 	{
3765 	  bitset_copy (dests_ch[ndests], accepts);
3766 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3767 	  if (BE (err != REG_NOERROR, 0))
3768 	    goto error_return;
3769 	  ++ndests;
3770 	  bitset_empty (accepts);
3771 	}
3772     }
3773   return ndests;
3774  error_return:
3775   for (j = 0; j < ndests; ++j)
3776     re_node_set_free (dests_node + j);
3777   return REG_MISSING;
3778 }
3779 
3780 #ifdef RE_ENABLE_I18N
3781 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3782    Return the number of the bytes the node accepts.
3783    STR_IDX is the current index of the input string.
3784 
3785    This function handles the nodes which can accept one character, or
3786    one collating element like '.', '[a-z]', opposite to the other nodes
3787    can only accept one byte.  */
3788 
3789 static int
3790 internal_function
3791 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3792 			 const re_string_t *input, Idx str_idx)
3793 {
3794   const re_token_t *node = dfa->nodes + node_idx;
3795   int char_len, elem_len;
3796   Idx i;
3797 
3798   if (BE (node->type == OP_UTF8_PERIOD, 0))
3799     {
3800       unsigned char c = re_string_byte_at (input, str_idx), d;
3801       if (BE (c < 0xc2, 1))
3802 	return 0;
3803 
3804       if (str_idx + 2 > input->len)
3805 	return 0;
3806 
3807       d = re_string_byte_at (input, str_idx + 1);
3808       if (c < 0xe0)
3809 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3810       else if (c < 0xf0)
3811 	{
3812 	  char_len = 3;
3813 	  if (c == 0xe0 && d < 0xa0)
3814 	    return 0;
3815 	}
3816       else if (c < 0xf8)
3817 	{
3818 	  char_len = 4;
3819 	  if (c == 0xf0 && d < 0x90)
3820 	    return 0;
3821 	}
3822       else if (c < 0xfc)
3823 	{
3824 	  char_len = 5;
3825 	  if (c == 0xf8 && d < 0x88)
3826 	    return 0;
3827 	}
3828       else if (c < 0xfe)
3829 	{
3830 	  char_len = 6;
3831 	  if (c == 0xfc && d < 0x84)
3832 	    return 0;
3833 	}
3834       else
3835 	return 0;
3836 
3837       if (str_idx + char_len > input->len)
3838 	return 0;
3839 
3840       for (i = 1; i < char_len; ++i)
3841 	{
3842 	  d = re_string_byte_at (input, str_idx + i);
3843 	  if (d < 0x80 || d > 0xbf)
3844 	    return 0;
3845 	}
3846       return char_len;
3847     }
3848 
3849   char_len = re_string_char_size_at (input, str_idx);
3850   if (node->type == OP_PERIOD)
3851     {
3852       if (char_len <= 1)
3853 	return 0;
3854       /* FIXME: I don't think this if is needed, as both '\n'
3855 	 and '\0' are char_len == 1.  */
3856       /* '.' accepts any one character except the following two cases.  */
3857       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3858 	   re_string_byte_at (input, str_idx) == '\n') ||
3859 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3860 	   re_string_byte_at (input, str_idx) == '\0'))
3861 	return 0;
3862       return char_len;
3863     }
3864 
3865   elem_len = re_string_elem_size_at (input, str_idx);
3866   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3867     return 0;
3868 
3869   if (node->type == COMPLEX_BRACKET)
3870     {
3871       const re_charset_t *cset = node->opr.mbcset;
3872 # ifdef _LIBC
3873       const unsigned char *pin
3874 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3875       Idx j;
3876       uint32_t nrules;
3877 # endif /* _LIBC */
3878       int match_len = 0;
3879       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3880 		    ? re_string_wchar_at (input, str_idx) : 0);
3881 
3882       /* match with multibyte character?  */
3883       for (i = 0; i < cset->nmbchars; ++i)
3884 	if (wc == cset->mbchars[i])
3885 	  {
3886 	    match_len = char_len;
3887 	    goto check_node_accept_bytes_match;
3888 	  }
3889       /* match with character_class?  */
3890       for (i = 0; i < cset->nchar_classes; ++i)
3891 	{
3892 	  wctype_t wt = cset->char_classes[i];
3893 	  if (__iswctype (wc, wt))
3894 	    {
3895 	      match_len = char_len;
3896 	      goto check_node_accept_bytes_match;
3897 	    }
3898 	}
3899 
3900 # ifdef _LIBC
3901       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3902       if (nrules != 0)
3903 	{
3904 	  unsigned int in_collseq = 0;
3905 	  const int32_t *table, *indirect;
3906 	  const unsigned char *weights, *extra;
3907 	  const char *collseqwc;
3908 	  int32_t idx;
3909 	  /* This #include defines a local function!  */
3910 #  include <locale/weight.h>
3911 
3912 	  /* match with collating_symbol?  */
3913 	  if (cset->ncoll_syms)
3914 	    extra = (const unsigned char *)
3915 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3916 	  for (i = 0; i < cset->ncoll_syms; ++i)
3917 	    {
3918 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3919 	      /* Compare the length of input collating element and
3920 		 the length of current collating element.  */
3921 	      if (*coll_sym != elem_len)
3922 		continue;
3923 	      /* Compare each bytes.  */
3924 	      for (j = 0; j < *coll_sym; j++)
3925 		if (pin[j] != coll_sym[1 + j])
3926 		  break;
3927 	      if (j == *coll_sym)
3928 		{
3929 		  /* Match if every bytes is equal.  */
3930 		  match_len = j;
3931 		  goto check_node_accept_bytes_match;
3932 		}
3933 	    }
3934 
3935 	  if (cset->nranges)
3936 	    {
3937 	      if (elem_len <= char_len)
3938 		{
3939 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3940 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3941 		}
3942 	      else
3943 		in_collseq = find_collation_sequence_value (pin, elem_len);
3944 	    }
3945 	  /* match with range expression?  */
3946 	  for (i = 0; i < cset->nranges; ++i)
3947 	    if (cset->range_starts[i] <= in_collseq
3948 		&& in_collseq <= cset->range_ends[i])
3949 	      {
3950 		match_len = elem_len;
3951 		goto check_node_accept_bytes_match;
3952 	      }
3953 
3954 	  /* match with equivalence_class?  */
3955 	  if (cset->nequiv_classes)
3956 	    {
3957 	      const unsigned char *cp = pin;
3958 	      table = (const int32_t *)
3959 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3960 	      weights = (const unsigned char *)
3961 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3962 	      extra = (const unsigned char *)
3963 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3964 	      indirect = (const int32_t *)
3965 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3966 	      int32_t idx = findidx (&cp);
3967 	      if (idx > 0)
3968 		for (i = 0; i < cset->nequiv_classes; ++i)
3969 		  {
3970 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3971 		    size_t weight_len = weights[idx & 0xffffff];
3972 		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3973 			&& (idx >> 24) == (equiv_class_idx >> 24))
3974 		      {
3975 			Idx cnt = 0;
3976 
3977 			idx &= 0xffffff;
3978 			equiv_class_idx &= 0xffffff;
3979 
3980 			while (cnt <= weight_len
3981 			       && (weights[equiv_class_idx + 1 + cnt]
3982 				   == weights[idx + 1 + cnt]))
3983 			  ++cnt;
3984 			if (cnt > weight_len)
3985 			  {
3986 			    match_len = elem_len;
3987 			    goto check_node_accept_bytes_match;
3988 			  }
3989 		      }
3990 		  }
3991 	    }
3992 	}
3993       else
3994 # endif /* _LIBC */
3995 	{
3996 	  /* match with range expression?  */
3997 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
3998 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3999 #else
4000 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
4001 	  cmp_buf[2] = wc;
4002 #endif
4003 	  for (i = 0; i < cset->nranges; ++i)
4004 	    {
4005 	      cmp_buf[0] = cset->range_starts[i];
4006 	      cmp_buf[4] = cset->range_ends[i];
4007 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
4008 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
4009 		{
4010 		  match_len = char_len;
4011 		  goto check_node_accept_bytes_match;
4012 		}
4013 	    }
4014 	}
4015     check_node_accept_bytes_match:
4016       if (!cset->non_match)
4017 	return match_len;
4018       else
4019 	{
4020 	  if (match_len > 0)
4021 	    return 0;
4022 	  else
4023 	    return (elem_len > char_len) ? elem_len : char_len;
4024 	}
4025     }
4026   return 0;
4027 }
4028 
4029 # ifdef _LIBC
4030 static unsigned int
4031 internal_function
4032 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4033 {
4034   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4035   if (nrules == 0)
4036     {
4037       if (mbs_len == 1)
4038 	{
4039 	  /* No valid character.  Match it as a single byte character.  */
4040 	  const unsigned char *collseq = (const unsigned char *)
4041 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4042 	  return collseq[mbs[0]];
4043 	}
4044       return UINT_MAX;
4045     }
4046   else
4047     {
4048       int32_t idx;
4049       const unsigned char *extra = (const unsigned char *)
4050 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4051       int32_t extrasize = (const unsigned char *)
4052 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4053 
4054       for (idx = 0; idx < extrasize;)
4055 	{
4056 	  int mbs_cnt;
4057 	  bool found = false;
4058 	  int32_t elem_mbs_len;
4059 	  /* Skip the name of collating element name.  */
4060 	  idx = idx + extra[idx] + 1;
4061 	  elem_mbs_len = extra[idx++];
4062 	  if (mbs_len == elem_mbs_len)
4063 	    {
4064 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4065 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4066 		  break;
4067 	      if (mbs_cnt == elem_mbs_len)
4068 		/* Found the entry.  */
4069 		found = true;
4070 	    }
4071 	  /* Skip the byte sequence of the collating element.  */
4072 	  idx += elem_mbs_len;
4073 	  /* Adjust for the alignment.  */
4074 	  idx = (idx + 3) & ~3;
4075 	  /* Skip the collation sequence value.  */
4076 	  idx += sizeof (uint32_t);
4077 	  /* Skip the wide char sequence of the collating element.  */
4078 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4079 	  /* If we found the entry, return the sequence value.  */
4080 	  if (found)
4081 	    return *(uint32_t *) (extra + idx);
4082 	  /* Skip the collation sequence value.  */
4083 	  idx += sizeof (uint32_t);
4084 	}
4085       return UINT_MAX;
4086     }
4087 }
4088 # endif /* _LIBC */
4089 #endif /* RE_ENABLE_I18N */
4090 
4091 /* Check whether the node accepts the byte which is IDX-th
4092    byte of the INPUT.  */
4093 
4094 static bool
4095 internal_function
4096 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4097 		   Idx idx)
4098 {
4099   unsigned char ch;
4100   ch = re_string_byte_at (&mctx->input, idx);
4101   switch (node->type)
4102     {
4103     case CHARACTER:
4104       if (node->opr.c != ch)
4105         return false;
4106       break;
4107 
4108     case SIMPLE_BRACKET:
4109       if (!bitset_contain (node->opr.sbcset, ch))
4110         return false;
4111       break;
4112 
4113 #ifdef RE_ENABLE_I18N
4114     case OP_UTF8_PERIOD:
4115       if (ch >= ASCII_CHARS)
4116         return false;
4117       /* FALLTHROUGH */
4118 #endif
4119     case OP_PERIOD:
4120       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4121 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4122 	return false;
4123       break;
4124 
4125     default:
4126       return false;
4127     }
4128 
4129   if (node->constraint)
4130     {
4131       /* The node has constraints.  Check whether the current context
4132 	 satisfies the constraints.  */
4133       unsigned int context = re_string_context_at (&mctx->input, idx,
4134 						   mctx->eflags);
4135       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4136 	return false;
4137     }
4138 
4139   return true;
4140 }
4141 
4142 /* Extend the buffers, if the buffers have run out.  */
4143 
4144 static reg_errcode_t
4145 internal_function __attribute_warn_unused_result__
4146 extend_buffers (re_match_context_t *mctx)
4147 {
4148   reg_errcode_t ret;
4149   re_string_t *pstr = &mctx->input;
4150 
4151   /* Avoid overflow.  */
4152   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4153     return REG_ESPACE;
4154 
4155   /* Double the lengthes of the buffers.  */
4156   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4157   if (BE (ret != REG_NOERROR, 0))
4158     return ret;
4159 
4160   if (mctx->state_log != NULL)
4161     {
4162       /* And double the length of state_log.  */
4163       /* XXX We have no indication of the size of this buffer.  If this
4164 	 allocation fail we have no indication that the state_log array
4165 	 does not have the right size.  */
4166       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4167 					      pstr->bufs_len + 1);
4168       if (BE (new_array == NULL, 0))
4169 	return REG_ESPACE;
4170       mctx->state_log = new_array;
4171     }
4172 
4173   /* Then reconstruct the buffers.  */
4174   if (pstr->icase)
4175     {
4176 #ifdef RE_ENABLE_I18N
4177       if (pstr->mb_cur_max > 1)
4178 	{
4179 	  ret = build_wcs_upper_buffer (pstr);
4180 	  if (BE (ret != REG_NOERROR, 0))
4181 	    return ret;
4182 	}
4183       else
4184 #endif /* RE_ENABLE_I18N  */
4185 	build_upper_buffer (pstr);
4186     }
4187   else
4188     {
4189 #ifdef RE_ENABLE_I18N
4190       if (pstr->mb_cur_max > 1)
4191 	build_wcs_buffer (pstr);
4192       else
4193 #endif /* RE_ENABLE_I18N  */
4194 	{
4195 	  if (pstr->trans != NULL)
4196 	    re_string_translate_buffer (pstr);
4197 	}
4198     }
4199   return REG_NOERROR;
4200 }
4201 
4202 
4203 /* Functions for matching context.  */
4204 
4205 /* Initialize MCTX.  */
4206 
4207 static reg_errcode_t
4208 internal_function __attribute_warn_unused_result__
4209 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4210 {
4211   mctx->eflags = eflags;
4212   mctx->match_last = REG_MISSING;
4213   if (n > 0)
4214     {
4215       /* Avoid overflow.  */
4216       size_t max_object_size =
4217 	MAX (sizeof (struct re_backref_cache_entry),
4218 	     sizeof (re_sub_match_top_t *));
4219       if (BE (SIZE_MAX / max_object_size < n, 0))
4220 	return REG_ESPACE;
4221 
4222       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4223       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4224       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4225 	return REG_ESPACE;
4226     }
4227   /* Already zero-ed by the caller.
4228      else
4229        mctx->bkref_ents = NULL;
4230      mctx->nbkref_ents = 0;
4231      mctx->nsub_tops = 0;  */
4232   mctx->abkref_ents = n;
4233   mctx->max_mb_elem_len = 1;
4234   mctx->asub_tops = n;
4235   return REG_NOERROR;
4236 }
4237 
4238 /* Clean the entries which depend on the current input in MCTX.
4239    This function must be invoked when the matcher changes the start index
4240    of the input, or changes the input string.  */
4241 
4242 static void
4243 internal_function
4244 match_ctx_clean (re_match_context_t *mctx)
4245 {
4246   Idx st_idx;
4247   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4248     {
4249       Idx sl_idx;
4250       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4251       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4252 	{
4253 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4254 	  re_free (last->path.array);
4255 	  re_free (last);
4256 	}
4257       re_free (top->lasts);
4258       if (top->path)
4259 	{
4260 	  re_free (top->path->array);
4261 	  re_free (top->path);
4262 	}
4263       free (top);
4264     }
4265 
4266   mctx->nsub_tops = 0;
4267   mctx->nbkref_ents = 0;
4268 }
4269 
4270 /* Free all the memory associated with MCTX.  */
4271 
4272 static void
4273 internal_function
4274 match_ctx_free (re_match_context_t *mctx)
4275 {
4276   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4277   match_ctx_clean (mctx);
4278   re_free (mctx->sub_tops);
4279   re_free (mctx->bkref_ents);
4280 }
4281 
4282 /* Add a new backreference entry to MCTX.
4283    Note that we assume that caller never call this function with duplicate
4284    entry, and call with STR_IDX which isn't smaller than any existing entry.
4285 */
4286 
4287 static reg_errcode_t
4288 internal_function __attribute_warn_unused_result__
4289 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4290 		     Idx to)
4291 {
4292   if (mctx->nbkref_ents >= mctx->abkref_ents)
4293     {
4294       struct re_backref_cache_entry* new_entry;
4295       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4296 			      mctx->abkref_ents * 2);
4297       if (BE (new_entry == NULL, 0))
4298 	{
4299 	  re_free (mctx->bkref_ents);
4300 	  return REG_ESPACE;
4301 	}
4302       mctx->bkref_ents = new_entry;
4303       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4304 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4305       mctx->abkref_ents *= 2;
4306     }
4307   if (mctx->nbkref_ents > 0
4308       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4309     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4310 
4311   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4312   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4313   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4314   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4315 
4316   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4317      If bit N is clear, means that this entry won't epsilon-transition to
4318      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4319      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4320      such node.
4321 
4322      A backreference does not epsilon-transition unless it is empty, so set
4323      to all zeros if FROM != TO.  */
4324   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4325     = (from == to ? -1 : 0);
4326 
4327   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4328   if (mctx->max_mb_elem_len < to - from)
4329     mctx->max_mb_elem_len = to - from;
4330   return REG_NOERROR;
4331 }
4332 
4333 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4334    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4335 
4336 static Idx
4337 internal_function
4338 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4339 {
4340   Idx left, right, mid, last;
4341   last = right = mctx->nbkref_ents;
4342   for (left = 0; left < right;)
4343     {
4344       mid = (left + right) / 2;
4345       if (mctx->bkref_ents[mid].str_idx < str_idx)
4346 	left = mid + 1;
4347       else
4348 	right = mid;
4349     }
4350   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4351     return left;
4352   else
4353     return REG_MISSING;
4354 }
4355 
4356 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4357    at STR_IDX.  */
4358 
4359 static reg_errcode_t
4360 internal_function __attribute_warn_unused_result__
4361 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4362 {
4363 #ifdef DEBUG
4364   assert (mctx->sub_tops != NULL);
4365   assert (mctx->asub_tops > 0);
4366 #endif
4367   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4368     {
4369       Idx new_asub_tops = mctx->asub_tops * 2;
4370       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4371 						   re_sub_match_top_t *,
4372 						   new_asub_tops);
4373       if (BE (new_array == NULL, 0))
4374 	return REG_ESPACE;
4375       mctx->sub_tops = new_array;
4376       mctx->asub_tops = new_asub_tops;
4377     }
4378   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4379   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4380     return REG_ESPACE;
4381   mctx->sub_tops[mctx->nsub_tops]->node = node;
4382   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4383   return REG_NOERROR;
4384 }
4385 
4386 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4387    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4388 
4389 static re_sub_match_last_t *
4390 internal_function
4391 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4392 {
4393   re_sub_match_last_t *new_entry;
4394   if (BE (subtop->nlasts == subtop->alasts, 0))
4395     {
4396       Idx new_alasts = 2 * subtop->alasts + 1;
4397       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4398 						    re_sub_match_last_t *,
4399 						    new_alasts);
4400       if (BE (new_array == NULL, 0))
4401 	return NULL;
4402       subtop->lasts = new_array;
4403       subtop->alasts = new_alasts;
4404     }
4405   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4406   if (BE (new_entry != NULL, 1))
4407     {
4408       subtop->lasts[subtop->nlasts] = new_entry;
4409       new_entry->node = node;
4410       new_entry->str_idx = str_idx;
4411       ++subtop->nlasts;
4412     }
4413   return new_entry;
4414 }
4415 
4416 static void
4417 internal_function
4418 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4419 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4420 {
4421   sctx->sifted_states = sifted_sts;
4422   sctx->limited_states = limited_sts;
4423   sctx->last_node = last_node;
4424   sctx->last_str_idx = last_str_idx;
4425   re_node_set_init_empty (&sctx->limits);
4426 }
4427