xref: /dragonfly/contrib/diffutils/lib/regexec.c (revision 0db87cb7)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002-2013 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU General Public
8    License as published by the Free Software Foundation; either
9    version 3 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    General Public License for more details.
15 
16    You should have received a copy of the GNU General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19 
20 #include "verify.h"
21 #include "intprops.h"
22 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
23 				     Idx n) internal_function;
24 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
25 static void match_ctx_free (re_match_context_t *cache) internal_function;
26 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
27 					  Idx str_idx, Idx from, Idx to)
28      internal_function;
29 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
30      internal_function;
31 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
32 					   Idx str_idx) internal_function;
33 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
34 						    Idx node, Idx str_idx)
35      internal_function;
36 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
37 			   re_dfastate_t **limited_sts, Idx last_node,
38 			   Idx last_str_idx)
39      internal_function;
40 static reg_errcode_t re_search_internal (const regex_t *preg,
41 					 const char *string, Idx length,
42 					 Idx start, Idx last_start, Idx stop,
43 					 size_t nmatch, regmatch_t pmatch[],
44 					 int eflags) internal_function;
45 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
46 				  const char *string1, Idx length1,
47 				  const char *string2, Idx length2,
48 				  Idx start, regoff_t range,
49 				  struct re_registers *regs,
50 				  Idx stop, bool ret_len) internal_function;
51 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
52 				const char *string, Idx length, Idx start,
53 				regoff_t range, Idx stop,
54 				struct re_registers *regs,
55 				bool ret_len) internal_function;
56 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
57                               Idx nregs, int regs_allocated) internal_function;
58 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
59      internal_function;
60 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
61 			   Idx *p_match_first) internal_function;
62 static Idx check_halt_state_context (const re_match_context_t *mctx,
63 				     const re_dfastate_t *state, Idx idx)
64      internal_function;
65 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
66 			 regmatch_t *prev_idx_match, Idx cur_node,
67 			 Idx cur_idx, Idx nmatch) internal_function;
68 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
69 				      Idx str_idx, Idx dest_node, Idx nregs,
70 				      regmatch_t *regs,
71 				      re_node_set *eps_via_nodes)
72      internal_function;
73 static reg_errcode_t set_regs (const regex_t *preg,
74 			       const re_match_context_t *mctx,
75 			       size_t nmatch, regmatch_t *pmatch,
76 			       bool fl_backtrack) internal_function;
77 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
78      internal_function;
79 
80 #ifdef RE_ENABLE_I18N
81 static int sift_states_iter_mb (const re_match_context_t *mctx,
82 				re_sift_context_t *sctx,
83 				Idx node_idx, Idx str_idx, Idx max_str_idx)
84      internal_function;
85 #endif /* RE_ENABLE_I18N */
86 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
87 					   re_sift_context_t *sctx)
88      internal_function;
89 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
90 					  re_sift_context_t *sctx, Idx str_idx,
91 					  re_node_set *cur_dest)
92      internal_function;
93 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
94 					      re_sift_context_t *sctx,
95 					      Idx str_idx,
96 					      re_node_set *dest_nodes)
97      internal_function;
98 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
99 					    re_node_set *dest_nodes,
100 					    const re_node_set *candidates)
101      internal_function;
102 static bool check_dst_limits (const re_match_context_t *mctx,
103 			      const re_node_set *limits,
104 			      Idx dst_node, Idx dst_idx, Idx src_node,
105 			      Idx src_idx) internal_function;
106 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
107 					int boundaries, Idx subexp_idx,
108 					Idx from_node, Idx bkref_idx)
109      internal_function;
110 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
111 				      Idx limit, Idx subexp_idx,
112 				      Idx node, Idx str_idx,
113 				      Idx bkref_idx) internal_function;
114 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
115 					  re_node_set *dest_nodes,
116 					  const re_node_set *candidates,
117 					  re_node_set *limits,
118 					  struct re_backref_cache_entry *bkref_ents,
119 					  Idx str_idx) internal_function;
120 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
121 					re_sift_context_t *sctx,
122 					Idx str_idx, const re_node_set *candidates)
123      internal_function;
124 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
125 					re_dfastate_t **dst,
126 					re_dfastate_t **src, Idx num)
127      internal_function;
128 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
129 					 re_match_context_t *mctx) internal_function;
130 static re_dfastate_t *transit_state (reg_errcode_t *err,
131 				     re_match_context_t *mctx,
132 				     re_dfastate_t *state) internal_function;
133 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
134 					    re_match_context_t *mctx,
135 					    re_dfastate_t *next_state)
136      internal_function;
137 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
138 						re_node_set *cur_nodes,
139 						Idx str_idx) internal_function;
140 #if 0
141 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
142 					re_match_context_t *mctx,
143 					re_dfastate_t *pstate)
144      internal_function;
145 #endif
146 #ifdef RE_ENABLE_I18N
147 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
148 				       re_dfastate_t *pstate)
149      internal_function;
150 #endif /* RE_ENABLE_I18N */
151 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
152 					  const re_node_set *nodes)
153      internal_function;
154 static reg_errcode_t get_subexp (re_match_context_t *mctx,
155 				 Idx bkref_node, Idx bkref_str_idx)
156      internal_function;
157 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
158 				     const re_sub_match_top_t *sub_top,
159 				     re_sub_match_last_t *sub_last,
160 				     Idx bkref_node, Idx bkref_str)
161      internal_function;
162 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
163 			     Idx subexp_idx, int type) internal_function;
164 static reg_errcode_t check_arrival (re_match_context_t *mctx,
165 				    state_array_t *path, Idx top_node,
166 				    Idx top_str, Idx last_node, Idx last_str,
167 				    int type) internal_function;
168 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
169 						   Idx str_idx,
170 						   re_node_set *cur_nodes,
171 						   re_node_set *next_nodes)
172      internal_function;
173 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
174 					       re_node_set *cur_nodes,
175 					       Idx ex_subexp, int type)
176      internal_function;
177 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
178 						   re_node_set *dst_nodes,
179 						   Idx target, Idx ex_subexp,
180 						   int type) internal_function;
181 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
182 					 re_node_set *cur_nodes, Idx cur_str,
183 					 Idx subexp_num, int type)
184      internal_function;
185 static bool build_trtable (const re_dfa_t *dfa,
186 			   re_dfastate_t *state) internal_function;
187 #ifdef RE_ENABLE_I18N
188 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
189 				    const re_string_t *input, Idx idx)
190      internal_function;
191 # ifdef _LIBC
192 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
193 						   size_t name_len)
194      internal_function;
195 # endif /* _LIBC */
196 #endif /* RE_ENABLE_I18N */
197 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
198 				       const re_dfastate_t *state,
199 				       re_node_set *states_node,
200 				       bitset_t *states_ch) internal_function;
201 static bool check_node_accept (const re_match_context_t *mctx,
202 			       const re_token_t *node, Idx idx)
203      internal_function;
204 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
205      internal_function;
206 
207 /* Entry point for POSIX code.  */
208 
209 /* regexec searches for a given pattern, specified by PREG, in the
210    string STRING.
211 
212    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
213    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
214    least NMATCH elements, and we set them to the offsets of the
215    corresponding matched substrings.
216 
217    EFLAGS specifies "execution flags" which affect matching: if
218    REG_NOTBOL is set, then ^ does not match at the beginning of the
219    string; if REG_NOTEOL is set, then $ does not match at the end.
220 
221    We return 0 if we find a match and REG_NOMATCH if not.  */
222 
223 int
224 regexec (preg, string, nmatch, pmatch, eflags)
225     const regex_t *_Restrict_ preg;
226     const char *_Restrict_ string;
227     size_t nmatch;
228     regmatch_t pmatch[_Restrict_arr_];
229     int eflags;
230 {
231   reg_errcode_t err;
232   Idx start, length;
233 #ifdef _LIBC
234   re_dfa_t *dfa = preg->buffer;
235 #endif
236 
237   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
238     return REG_BADPAT;
239 
240   if (eflags & REG_STARTEND)
241     {
242       start = pmatch[0].rm_so;
243       length = pmatch[0].rm_eo;
244     }
245   else
246     {
247       start = 0;
248       length = strlen (string);
249     }
250 
251   __libc_lock_lock (dfa->lock);
252   if (preg->no_sub)
253     err = re_search_internal (preg, string, length, start, length,
254 			      length, 0, NULL, eflags);
255   else
256     err = re_search_internal (preg, string, length, start, length,
257 			      length, nmatch, pmatch, eflags);
258   __libc_lock_unlock (dfa->lock);
259   return err != REG_NOERROR;
260 }
261 
262 #ifdef _LIBC
263 # include <shlib-compat.h>
264 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
265 
266 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
267 __typeof__ (__regexec) __compat_regexec;
268 
269 int
270 attribute_compat_text_section
271 __compat_regexec (const regex_t *_Restrict_ preg,
272 		  const char *_Restrict_ string, size_t nmatch,
273 		  regmatch_t pmatch[], int eflags)
274 {
275   return regexec (preg, string, nmatch, pmatch,
276 		  eflags & (REG_NOTBOL | REG_NOTEOL));
277 }
278 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
279 # endif
280 #endif
281 
282 /* Entry points for GNU code.  */
283 
284 /* re_match, re_search, re_match_2, re_search_2
285 
286    The former two functions operate on STRING with length LENGTH,
287    while the later two operate on concatenation of STRING1 and STRING2
288    with lengths LENGTH1 and LENGTH2, respectively.
289 
290    re_match() matches the compiled pattern in BUFP against the string,
291    starting at index START.
292 
293    re_search() first tries matching at index START, then it tries to match
294    starting from index START + 1, and so on.  The last start position tried
295    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
296    way as re_match().)
297 
298    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
299    the first STOP characters of the concatenation of the strings should be
300    concerned.
301 
302    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
303    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
304    computed relative to the concatenation, not relative to the individual
305    strings.)
306 
307    On success, re_match* functions return the length of the match, re_search*
308    return the position of the start of the match.  Return value -1 means no
309    match was found and -2 indicates an internal error.  */
310 
311 regoff_t
312 re_match (bufp, string, length, start, regs)
313     struct re_pattern_buffer *bufp;
314     const char *string;
315     Idx length, start;
316     struct re_registers *regs;
317 {
318   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
319 }
320 #ifdef _LIBC
321 weak_alias (__re_match, re_match)
322 #endif
323 
324 regoff_t
325 re_search (bufp, string, length, start, range, regs)
326     struct re_pattern_buffer *bufp;
327     const char *string;
328     Idx length, start;
329     regoff_t range;
330     struct re_registers *regs;
331 {
332   return re_search_stub (bufp, string, length, start, range, length, regs,
333 			 false);
334 }
335 #ifdef _LIBC
336 weak_alias (__re_search, re_search)
337 #endif
338 
339 regoff_t
340 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
341     struct re_pattern_buffer *bufp;
342     const char *string1, *string2;
343     Idx length1, length2, start, stop;
344     struct re_registers *regs;
345 {
346   return re_search_2_stub (bufp, string1, length1, string2, length2,
347 			   start, 0, regs, stop, true);
348 }
349 #ifdef _LIBC
350 weak_alias (__re_match_2, re_match_2)
351 #endif
352 
353 regoff_t
354 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
355     struct re_pattern_buffer *bufp;
356     const char *string1, *string2;
357     Idx length1, length2, start, stop;
358     regoff_t range;
359     struct re_registers *regs;
360 {
361   return re_search_2_stub (bufp, string1, length1, string2, length2,
362 			   start, range, regs, stop, false);
363 }
364 #ifdef _LIBC
365 weak_alias (__re_search_2, re_search_2)
366 #endif
367 
368 static regoff_t
369 re_search_2_stub (struct re_pattern_buffer *bufp,
370 		  const char *string1, Idx length1,
371 		  const char *string2, Idx length2,
372 		  Idx start, regoff_t range, struct re_registers *regs,
373 		  Idx stop, bool ret_len)
374 {
375   const char *str;
376   regoff_t rval;
377   Idx len = length1 + length2;
378   char *s = NULL;
379 
380   verify (! TYPE_SIGNED (Idx));
381   if (BE (len < length1, 0))
382      return -2;
383   /* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
384      return -2; */
385 
386   /* Concatenate the strings.  */
387   if (length2 > 0)
388     if (length1 > 0)
389       {
390 	s = re_malloc (char, len);
391 
392 	if (BE (s == NULL, 0))
393 	  return -2;
394 #ifdef _LIBC
395 	memcpy (__mempcpy (s, string1, length1), string2, length2);
396 #else
397 	memcpy (s, string1, length1);
398 	memcpy (s + length1, string2, length2);
399 #endif
400 	str = s;
401       }
402     else
403       str = string2;
404   else
405     str = string1;
406 
407   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
408 			 ret_len);
409   re_free (s);
410   return rval;
411 }
412 
413 /* The parameters have the same meaning as those of re_search.
414    Additional parameters:
415    If RET_LEN is true the length of the match is returned (re_match style);
416    otherwise the position of the match is returned.  */
417 
418 static regoff_t
419 re_search_stub (struct re_pattern_buffer *bufp,
420 		const char *string, Idx length,
421 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
422 		bool ret_len)
423 {
424   reg_errcode_t result;
425   regmatch_t *pmatch;
426   Idx nregs;
427   regoff_t rval;
428   int eflags = 0;
429 #ifdef _LIBC
430   re_dfa_t *dfa = bufp->buffer;
431 #endif
432   Idx last_start = start + range;
433 
434   /* Check for out-of-range.  */
435   verify (! TYPE_SIGNED (Idx));
436   /* if (BE (start < 0, 0))
437      return -1; */
438   if (BE (start > length, 0))
439      return -1;
440   if (BE (length < last_start || (0 <= range && last_start < start), 0))
441     last_start = length;
442   else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0))
443     last_start = 0;
444 
445   __libc_lock_lock (dfa->lock);
446 
447   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
448   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
449 
450   /* Compile fastmap if we haven't yet.  */
451   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
452     re_compile_fastmap (bufp);
453 
454   if (BE (bufp->no_sub, 0))
455     regs = NULL;
456 
457   /* We need at least 1 register.  */
458   if (regs == NULL)
459     nregs = 1;
460   else if (BE (bufp->regs_allocated == REGS_FIXED
461 	       && regs->num_regs <= bufp->re_nsub, 0))
462     {
463       nregs = regs->num_regs;
464       if (BE (nregs < 1, 0))
465 	{
466 	  /* Nothing can be copied to regs.  */
467 	  regs = NULL;
468 	  nregs = 1;
469 	}
470     }
471   else
472     nregs = bufp->re_nsub + 1;
473   pmatch = re_malloc (regmatch_t, nregs);
474   if (BE (pmatch == NULL, 0))
475     {
476       rval = -2;
477       goto out;
478     }
479 
480   result = re_search_internal (bufp, string, length, start, last_start, stop,
481 			       nregs, pmatch, eflags);
482 
483   rval = 0;
484 
485   /* I hope we needn't fill their regs with -1's when no match was found.  */
486   if (result != REG_NOERROR)
487     rval = result == REG_NOMATCH ? -1 : -2;
488   else if (regs != NULL)
489     {
490       /* If caller wants register contents data back, copy them.  */
491       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
492 					   bufp->regs_allocated);
493       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
494 	rval = -2;
495     }
496 
497   if (BE (rval == 0, 1))
498     {
499       if (ret_len)
500 	{
501 	  assert (pmatch[0].rm_so == start);
502 	  rval = pmatch[0].rm_eo - start;
503 	}
504       else
505 	rval = pmatch[0].rm_so;
506     }
507   re_free (pmatch);
508  out:
509   __libc_lock_unlock (dfa->lock);
510   return rval;
511 }
512 
513 static unsigned
514 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
515 	      int regs_allocated)
516 {
517   int rval = REGS_REALLOCATE;
518   Idx i;
519   Idx need_regs = nregs + 1;
520   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
521      uses.  */
522 
523   /* Have the register data arrays been allocated?  */
524   if (regs_allocated == REGS_UNALLOCATED)
525     { /* No.  So allocate them with malloc.  */
526       regs->start = re_malloc (regoff_t, need_regs);
527       if (BE (regs->start == NULL, 0))
528 	return REGS_UNALLOCATED;
529       regs->end = re_malloc (regoff_t, need_regs);
530       if (BE (regs->end == NULL, 0))
531 	{
532 	  re_free (regs->start);
533 	  return REGS_UNALLOCATED;
534 	}
535       regs->num_regs = need_regs;
536     }
537   else if (regs_allocated == REGS_REALLOCATE)
538     { /* Yes.  If we need more elements than were already
539 	 allocated, reallocate them.  If we need fewer, just
540 	 leave it alone.  */
541       if (BE (need_regs > regs->num_regs, 0))
542 	{
543 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
544 	  regoff_t *new_end;
545 	  if (BE (new_start == NULL, 0))
546 	    return REGS_UNALLOCATED;
547 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
548 	  if (BE (new_end == NULL, 0))
549 	    {
550 	      re_free (new_start);
551 	      return REGS_UNALLOCATED;
552 	    }
553 	  regs->start = new_start;
554 	  regs->end = new_end;
555 	  regs->num_regs = need_regs;
556 	}
557     }
558   else
559     {
560       assert (regs_allocated == REGS_FIXED);
561       /* This function may not be called with REGS_FIXED and nregs too big.  */
562       assert (regs->num_regs >= nregs);
563       rval = REGS_FIXED;
564     }
565 
566   /* Copy the regs.  */
567   for (i = 0; i < nregs; ++i)
568     {
569       regs->start[i] = pmatch[i].rm_so;
570       regs->end[i] = pmatch[i].rm_eo;
571     }
572   for ( ; i < regs->num_regs; ++i)
573     regs->start[i] = regs->end[i] = -1;
574 
575   return rval;
576 }
577 
578 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
579    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
580    this memory for recording register information.  STARTS and ENDS
581    must be allocated using the malloc library routine, and must each
582    be at least NUM_REGS * sizeof (regoff_t) bytes long.
583 
584    If NUM_REGS == 0, then subsequent matches should allocate their own
585    register data.
586 
587    Unless this function is called, the first search or match using
588    PATTERN_BUFFER will allocate its own register data, without
589    freeing the old data.  */
590 
591 void
592 re_set_registers (bufp, regs, num_regs, starts, ends)
593     struct re_pattern_buffer *bufp;
594     struct re_registers *regs;
595     __re_size_t num_regs;
596     regoff_t *starts, *ends;
597 {
598   if (num_regs)
599     {
600       bufp->regs_allocated = REGS_REALLOCATE;
601       regs->num_regs = num_regs;
602       regs->start = starts;
603       regs->end = ends;
604     }
605   else
606     {
607       bufp->regs_allocated = REGS_UNALLOCATED;
608       regs->num_regs = 0;
609       regs->start = regs->end = NULL;
610     }
611 }
612 #ifdef _LIBC
613 weak_alias (__re_set_registers, re_set_registers)
614 #endif
615 
616 /* Entry points compatible with 4.2 BSD regex library.  We don't define
617    them unless specifically requested.  */
618 
619 #if defined _REGEX_RE_COMP || defined _LIBC
620 int
621 # ifdef _LIBC
622 weak_function
623 # endif
624 re_exec (s)
625      const char *s;
626 {
627   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
628 }
629 #endif /* _REGEX_RE_COMP */
630 
631 /* Internal entry point.  */
632 
633 /* Searches for a compiled pattern PREG in the string STRING, whose
634    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
635    meaning as with regexec.  LAST_START is START + RANGE, where
636    START and RANGE have the same meaning as with re_search.
637    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
638    otherwise return the error code.
639    Note: We assume front end functions already check ranges.
640    (0 <= LAST_START && LAST_START <= LENGTH)  */
641 
642 static reg_errcode_t
643 __attribute_warn_unused_result__
644 re_search_internal (const regex_t *preg,
645 		    const char *string, Idx length,
646 		    Idx start, Idx last_start, Idx stop,
647 		    size_t nmatch, regmatch_t pmatch[],
648 		    int eflags)
649 {
650   reg_errcode_t err;
651   const re_dfa_t *dfa = preg->buffer;
652   Idx left_lim, right_lim;
653   int incr;
654   bool fl_longest_match;
655   int match_kind;
656   Idx match_first;
657   Idx match_last = REG_MISSING;
658   Idx extra_nmatch;
659   bool sb;
660   int ch;
661 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
662   re_match_context_t mctx = { .dfa = dfa };
663 #else
664   re_match_context_t mctx;
665 #endif
666   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
667 		    && start != last_start && !preg->can_be_null)
668 		   ? preg->fastmap : NULL);
669   RE_TRANSLATE_TYPE t = preg->translate;
670 
671 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
672   memset (&mctx, '\0', sizeof (re_match_context_t));
673   mctx.dfa = dfa;
674 #endif
675 
676   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
677   nmatch -= extra_nmatch;
678 
679   /* Check if the DFA haven't been compiled.  */
680   if (BE (preg->used == 0 || dfa->init_state == NULL
681 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
682 	  || dfa->init_state_begbuf == NULL, 0))
683     return REG_NOMATCH;
684 
685 #ifdef DEBUG
686   /* We assume front-end functions already check them.  */
687   assert (0 <= last_start && last_start <= length);
688 #endif
689 
690   /* If initial states with non-begbuf contexts have no elements,
691      the regex must be anchored.  If preg->newline_anchor is set,
692      we'll never use init_state_nl, so do not check it.  */
693   if (dfa->init_state->nodes.nelem == 0
694       && dfa->init_state_word->nodes.nelem == 0
695       && (dfa->init_state_nl->nodes.nelem == 0
696 	  || !preg->newline_anchor))
697     {
698       if (start != 0 && last_start != 0)
699         return REG_NOMATCH;
700       start = last_start = 0;
701     }
702 
703   /* We must check the longest matching, if nmatch > 0.  */
704   fl_longest_match = (nmatch != 0 || dfa->nbackref);
705 
706   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
707 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
708 			    dfa);
709   if (BE (err != REG_NOERROR, 0))
710     goto free_return;
711   mctx.input.stop = stop;
712   mctx.input.raw_stop = stop;
713   mctx.input.newline_anchor = preg->newline_anchor;
714 
715   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
716   if (BE (err != REG_NOERROR, 0))
717     goto free_return;
718 
719   /* We will log all the DFA states through which the dfa pass,
720      if nmatch > 1, or this dfa has "multibyte node", which is a
721      back-reference or a node which can accept multibyte character or
722      multi character collating element.  */
723   if (nmatch > 1 || dfa->has_mb_node)
724     {
725       /* Avoid overflow.  */
726       if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
727                <= mctx.input.bufs_len), 0))
728 	{
729 	  err = REG_ESPACE;
730 	  goto free_return;
731 	}
732 
733       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
734       if (BE (mctx.state_log == NULL, 0))
735 	{
736 	  err = REG_ESPACE;
737 	  goto free_return;
738 	}
739     }
740   else
741     mctx.state_log = NULL;
742 
743   match_first = start;
744   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
745 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
746 
747   /* Check incrementally whether the input string matches.  */
748   incr = (last_start < start) ? -1 : 1;
749   left_lim = (last_start < start) ? last_start : start;
750   right_lim = (last_start < start) ? start : last_start;
751   sb = dfa->mb_cur_max == 1;
752   match_kind =
753     (fastmap
754      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
755 	| (start <= last_start ? 2 : 0)
756 	| (t != NULL ? 1 : 0))
757      : 8);
758 
759   for (;; match_first += incr)
760     {
761       err = REG_NOMATCH;
762       if (match_first < left_lim || right_lim < match_first)
763 	goto free_return;
764 
765       /* Advance as rapidly as possible through the string, until we
766 	 find a plausible place to start matching.  This may be done
767 	 with varying efficiency, so there are various possibilities:
768 	 only the most common of them are specialized, in order to
769 	 save on code size.  We use a switch statement for speed.  */
770       switch (match_kind)
771 	{
772 	case 8:
773 	  /* No fastmap.  */
774 	  break;
775 
776 	case 7:
777 	  /* Fastmap with single-byte translation, match forward.  */
778 	  while (BE (match_first < right_lim, 1)
779 		 && !fastmap[t[(unsigned char) string[match_first]]])
780 	    ++match_first;
781 	  goto forward_match_found_start_or_reached_end;
782 
783 	case 6:
784 	  /* Fastmap without translation, match forward.  */
785 	  while (BE (match_first < right_lim, 1)
786 		 && !fastmap[(unsigned char) string[match_first]])
787 	    ++match_first;
788 
789 	forward_match_found_start_or_reached_end:
790 	  if (BE (match_first == right_lim, 0))
791 	    {
792 	      ch = match_first >= length
793 		       ? 0 : (unsigned char) string[match_first];
794 	      if (!fastmap[t ? t[ch] : ch])
795 		goto free_return;
796 	    }
797 	  break;
798 
799 	case 4:
800 	case 5:
801 	  /* Fastmap without multi-byte translation, match backwards.  */
802 	  while (match_first >= left_lim)
803 	    {
804 	      ch = match_first >= length
805 		       ? 0 : (unsigned char) string[match_first];
806 	      if (fastmap[t ? t[ch] : ch])
807 		break;
808 	      --match_first;
809 	    }
810 	  if (match_first < left_lim)
811 	    goto free_return;
812 	  break;
813 
814 	default:
815 	  /* In this case, we can't determine easily the current byte,
816 	     since it might be a component byte of a multibyte
817 	     character.  Then we use the constructed buffer instead.  */
818 	  for (;;)
819 	    {
820 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
821 		 buffers.  */
822 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
823 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
824 		{
825 		  err = re_string_reconstruct (&mctx.input, match_first,
826 					       eflags);
827 		  if (BE (err != REG_NOERROR, 0))
828 		    goto free_return;
829 
830 		  offset = match_first - mctx.input.raw_mbs_idx;
831 		}
832 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
833 		 Note that MATCH_FIRST must not be smaller than 0.  */
834 	      ch = (match_first >= length
835 		    ? 0 : re_string_byte_at (&mctx.input, offset));
836 	      if (fastmap[ch])
837 		break;
838 	      match_first += incr;
839 	      if (match_first < left_lim || match_first > right_lim)
840 		{
841 		  err = REG_NOMATCH;
842 		  goto free_return;
843 		}
844 	    }
845 	  break;
846 	}
847 
848       /* Reconstruct the buffers so that the matcher can assume that
849 	 the matching starts from the beginning of the buffer.  */
850       err = re_string_reconstruct (&mctx.input, match_first, eflags);
851       if (BE (err != REG_NOERROR, 0))
852 	goto free_return;
853 
854 #ifdef RE_ENABLE_I18N
855      /* Don't consider this char as a possible match start if it part,
856 	yet isn't the head, of a multibyte character.  */
857       if (!sb && !re_string_first_byte (&mctx.input, 0))
858 	continue;
859 #endif
860 
861       /* It seems to be appropriate one, then use the matcher.  */
862       /* We assume that the matching starts from 0.  */
863       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
864       match_last = check_matching (&mctx, fl_longest_match,
865 				   start <= last_start ? &match_first : NULL);
866       if (match_last != REG_MISSING)
867 	{
868 	  if (BE (match_last == REG_ERROR, 0))
869 	    {
870 	      err = REG_ESPACE;
871 	      goto free_return;
872 	    }
873 	  else
874 	    {
875 	      mctx.match_last = match_last;
876 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
877 		{
878 		  re_dfastate_t *pstate = mctx.state_log[match_last];
879 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
880 							     match_last);
881 		}
882 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
883 		  || dfa->nbackref)
884 		{
885 		  err = prune_impossible_nodes (&mctx);
886 		  if (err == REG_NOERROR)
887 		    break;
888 		  if (BE (err != REG_NOMATCH, 0))
889 		    goto free_return;
890 		  match_last = REG_MISSING;
891 		}
892 	      else
893 		break; /* We found a match.  */
894 	    }
895 	}
896 
897       match_ctx_clean (&mctx);
898     }
899 
900 #ifdef DEBUG
901   assert (match_last != REG_MISSING);
902   assert (err == REG_NOERROR);
903 #endif
904 
905   /* Set pmatch[] if we need.  */
906   if (nmatch > 0)
907     {
908       Idx reg_idx;
909 
910       /* Initialize registers.  */
911       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
912 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
913 
914       /* Set the points where matching start/end.  */
915       pmatch[0].rm_so = 0;
916       pmatch[0].rm_eo = mctx.match_last;
917       /* FIXME: This function should fail if mctx.match_last exceeds
918 	 the maximum possible regoff_t value.  We need a new error
919 	 code REG_OVERFLOW.  */
920 
921       if (!preg->no_sub && nmatch > 1)
922 	{
923 	  err = set_regs (preg, &mctx, nmatch, pmatch,
924 			  dfa->has_plural_match && dfa->nbackref > 0);
925 	  if (BE (err != REG_NOERROR, 0))
926 	    goto free_return;
927 	}
928 
929       /* At last, add the offset to each register, since we slid
930 	 the buffers so that we could assume that the matching starts
931 	 from 0.  */
932       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
933 	if (pmatch[reg_idx].rm_so != -1)
934 	  {
935 #ifdef RE_ENABLE_I18N
936 	    if (BE (mctx.input.offsets_needed != 0, 0))
937 	      {
938 		pmatch[reg_idx].rm_so =
939 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
940 		   ? mctx.input.valid_raw_len
941 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
942 		pmatch[reg_idx].rm_eo =
943 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
944 		   ? mctx.input.valid_raw_len
945 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
946 	      }
947 #else
948 	    assert (mctx.input.offsets_needed == 0);
949 #endif
950 	    pmatch[reg_idx].rm_so += match_first;
951 	    pmatch[reg_idx].rm_eo += match_first;
952 	  }
953       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
954 	{
955 	  pmatch[nmatch + reg_idx].rm_so = -1;
956 	  pmatch[nmatch + reg_idx].rm_eo = -1;
957 	}
958 
959       if (dfa->subexp_map)
960 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
961 	  if (dfa->subexp_map[reg_idx] != reg_idx)
962 	    {
963 	      pmatch[reg_idx + 1].rm_so
964 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
965 	      pmatch[reg_idx + 1].rm_eo
966 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
967 	    }
968     }
969 
970  free_return:
971   re_free (mctx.state_log);
972   if (dfa->nbackref)
973     match_ctx_free (&mctx);
974   re_string_destruct (&mctx.input);
975   return err;
976 }
977 
978 static reg_errcode_t
979 __attribute_warn_unused_result__
980 prune_impossible_nodes (re_match_context_t *mctx)
981 {
982   const re_dfa_t *const dfa = mctx->dfa;
983   Idx halt_node, match_last;
984   reg_errcode_t ret;
985   re_dfastate_t **sifted_states;
986   re_dfastate_t **lim_states = NULL;
987   re_sift_context_t sctx;
988 #ifdef DEBUG
989   assert (mctx->state_log != NULL);
990 #endif
991   match_last = mctx->match_last;
992   halt_node = mctx->last_node;
993 
994   /* Avoid overflow.  */
995   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
996     return REG_ESPACE;
997 
998   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
999   if (BE (sifted_states == NULL, 0))
1000     {
1001       ret = REG_ESPACE;
1002       goto free_return;
1003     }
1004   if (dfa->nbackref)
1005     {
1006       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
1007       if (BE (lim_states == NULL, 0))
1008 	{
1009 	  ret = REG_ESPACE;
1010 	  goto free_return;
1011 	}
1012       while (1)
1013 	{
1014 	  memset (lim_states, '\0',
1015 		  sizeof (re_dfastate_t *) * (match_last + 1));
1016 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1017 			 match_last);
1018 	  ret = sift_states_backward (mctx, &sctx);
1019 	  re_node_set_free (&sctx.limits);
1020 	  if (BE (ret != REG_NOERROR, 0))
1021 	      goto free_return;
1022 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1023 	    break;
1024 	  do
1025 	    {
1026 	      --match_last;
1027 	      if (! REG_VALID_INDEX (match_last))
1028 		{
1029 		  ret = REG_NOMATCH;
1030 		  goto free_return;
1031 		}
1032 	    } while (mctx->state_log[match_last] == NULL
1033 		     || !mctx->state_log[match_last]->halt);
1034 	  halt_node = check_halt_state_context (mctx,
1035 						mctx->state_log[match_last],
1036 						match_last);
1037 	}
1038       ret = merge_state_array (dfa, sifted_states, lim_states,
1039 			       match_last + 1);
1040       re_free (lim_states);
1041       lim_states = NULL;
1042       if (BE (ret != REG_NOERROR, 0))
1043 	goto free_return;
1044     }
1045   else
1046     {
1047       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1048       ret = sift_states_backward (mctx, &sctx);
1049       re_node_set_free (&sctx.limits);
1050       if (BE (ret != REG_NOERROR, 0))
1051 	goto free_return;
1052       if (sifted_states[0] == NULL)
1053 	{
1054 	  ret = REG_NOMATCH;
1055 	  goto free_return;
1056 	}
1057     }
1058   re_free (mctx->state_log);
1059   mctx->state_log = sifted_states;
1060   sifted_states = NULL;
1061   mctx->last_node = halt_node;
1062   mctx->match_last = match_last;
1063   ret = REG_NOERROR;
1064  free_return:
1065   re_free (sifted_states);
1066   re_free (lim_states);
1067   return ret;
1068 }
1069 
1070 /* Acquire an initial state and return it.
1071    We must select appropriate initial state depending on the context,
1072    since initial states may have constraints like "\<", "^", etc..  */
1073 
1074 static inline re_dfastate_t *
1075 __attribute__ ((always_inline)) internal_function
1076 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1077 			    Idx idx)
1078 {
1079   const re_dfa_t *const dfa = mctx->dfa;
1080   if (dfa->init_state->has_constraint)
1081     {
1082       unsigned int context;
1083       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1084       if (IS_WORD_CONTEXT (context))
1085 	return dfa->init_state_word;
1086       else if (IS_ORDINARY_CONTEXT (context))
1087 	return dfa->init_state;
1088       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1089 	return dfa->init_state_begbuf;
1090       else if (IS_NEWLINE_CONTEXT (context))
1091 	return dfa->init_state_nl;
1092       else if (IS_BEGBUF_CONTEXT (context))
1093 	{
1094 	  /* It is relatively rare case, then calculate on demand.  */
1095 	  return re_acquire_state_context (err, dfa,
1096 					   dfa->init_state->entrance_nodes,
1097 					   context);
1098 	}
1099       else
1100 	/* Must not happen?  */
1101 	return dfa->init_state;
1102     }
1103   else
1104     return dfa->init_state;
1105 }
1106 
1107 /* Check whether the regular expression match input string INPUT or not,
1108    and return the index where the matching end.  Return REG_MISSING if
1109    there is no match, and return REG_ERROR in case of an error.
1110    FL_LONGEST_MATCH means we want the POSIX longest matching.
1111    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1112    next place where we may want to try matching.
1113    Note that the matcher assumes that the matching starts from the current
1114    index of the buffer.  */
1115 
1116 static Idx
1117 internal_function __attribute_warn_unused_result__
1118 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1119 		Idx *p_match_first)
1120 {
1121   const re_dfa_t *const dfa = mctx->dfa;
1122   reg_errcode_t err;
1123   Idx match = 0;
1124   Idx match_last = REG_MISSING;
1125   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1126   re_dfastate_t *cur_state;
1127   bool at_init_state = p_match_first != NULL;
1128   Idx next_start_idx = cur_str_idx;
1129 
1130   err = REG_NOERROR;
1131   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1132   /* An initial state must not be NULL (invalid).  */
1133   if (BE (cur_state == NULL, 0))
1134     {
1135       assert (err == REG_ESPACE);
1136       return REG_ERROR;
1137     }
1138 
1139   if (mctx->state_log != NULL)
1140     {
1141       mctx->state_log[cur_str_idx] = cur_state;
1142 
1143       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1144 	 later.  E.g. Processing back references.  */
1145       if (BE (dfa->nbackref, 0))
1146 	{
1147 	  at_init_state = false;
1148 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1149 	  if (BE (err != REG_NOERROR, 0))
1150 	    return err;
1151 
1152 	  if (cur_state->has_backref)
1153 	    {
1154 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1155 	      if (BE (err != REG_NOERROR, 0))
1156 		return err;
1157 	    }
1158 	}
1159     }
1160 
1161   /* If the RE accepts NULL string.  */
1162   if (BE (cur_state->halt, 0))
1163     {
1164       if (!cur_state->has_constraint
1165 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1166 	{
1167 	  if (!fl_longest_match)
1168 	    return cur_str_idx;
1169 	  else
1170 	    {
1171 	      match_last = cur_str_idx;
1172 	      match = 1;
1173 	    }
1174 	}
1175     }
1176 
1177   while (!re_string_eoi (&mctx->input))
1178     {
1179       re_dfastate_t *old_state = cur_state;
1180       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1181 
1182       if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1183 	   && mctx->input.bufs_len < mctx->input.len)
1184 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1185 	      && mctx->input.valid_len < mctx->input.len))
1186 	{
1187 	  err = extend_buffers (mctx, next_char_idx + 1);
1188 	  if (BE (err != REG_NOERROR, 0))
1189 	    {
1190 	      assert (err == REG_ESPACE);
1191 	      return REG_ERROR;
1192 	    }
1193 	}
1194 
1195       cur_state = transit_state (&err, mctx, cur_state);
1196       if (mctx->state_log != NULL)
1197 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1198 
1199       if (cur_state == NULL)
1200 	{
1201 	  /* Reached the invalid state or an error.  Try to recover a valid
1202 	     state using the state log, if available and if we have not
1203 	     already found a valid (even if not the longest) match.  */
1204 	  if (BE (err != REG_NOERROR, 0))
1205 	    return REG_ERROR;
1206 
1207 	  if (mctx->state_log == NULL
1208 	      || (match && !fl_longest_match)
1209 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1210 	    break;
1211 	}
1212 
1213       if (BE (at_init_state, 0))
1214 	{
1215 	  if (old_state == cur_state)
1216 	    next_start_idx = next_char_idx;
1217 	  else
1218 	    at_init_state = false;
1219 	}
1220 
1221       if (cur_state->halt)
1222 	{
1223 	  /* Reached a halt state.
1224 	     Check the halt state can satisfy the current context.  */
1225 	  if (!cur_state->has_constraint
1226 	      || check_halt_state_context (mctx, cur_state,
1227 					   re_string_cur_idx (&mctx->input)))
1228 	    {
1229 	      /* We found an appropriate halt state.  */
1230 	      match_last = re_string_cur_idx (&mctx->input);
1231 	      match = 1;
1232 
1233 	      /* We found a match, do not modify match_first below.  */
1234 	      p_match_first = NULL;
1235 	      if (!fl_longest_match)
1236 		break;
1237 	    }
1238 	}
1239     }
1240 
1241   if (p_match_first)
1242     *p_match_first += next_start_idx;
1243 
1244   return match_last;
1245 }
1246 
1247 /* Check NODE match the current context.  */
1248 
1249 static bool
1250 internal_function
1251 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1252 {
1253   re_token_type_t type = dfa->nodes[node].type;
1254   unsigned int constraint = dfa->nodes[node].constraint;
1255   if (type != END_OF_RE)
1256     return false;
1257   if (!constraint)
1258     return true;
1259   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1260     return false;
1261   return true;
1262 }
1263 
1264 /* Check the halt state STATE match the current context.
1265    Return 0 if not match, if the node, STATE has, is a halt node and
1266    match the context, return the node.  */
1267 
1268 static Idx
1269 internal_function
1270 check_halt_state_context (const re_match_context_t *mctx,
1271 			  const re_dfastate_t *state, Idx idx)
1272 {
1273   Idx i;
1274   unsigned int context;
1275 #ifdef DEBUG
1276   assert (state->halt);
1277 #endif
1278   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1279   for (i = 0; i < state->nodes.nelem; ++i)
1280     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1281       return state->nodes.elems[i];
1282   return 0;
1283 }
1284 
1285 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1286    corresponding to the DFA).
1287    Return the destination node, and update EPS_VIA_NODES;
1288    return REG_MISSING in case of errors.  */
1289 
1290 static Idx
1291 internal_function
1292 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1293 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1294 		   struct re_fail_stack_t *fs)
1295 {
1296   const re_dfa_t *const dfa = mctx->dfa;
1297   Idx i;
1298   bool ok;
1299   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1300     {
1301       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1302       re_node_set *edests = &dfa->edests[node];
1303       Idx dest_node;
1304       ok = re_node_set_insert (eps_via_nodes, node);
1305       if (BE (! ok, 0))
1306 	return REG_ERROR;
1307       /* Pick up a valid destination, or return REG_MISSING if none
1308 	 is found.  */
1309       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1310 	{
1311 	  Idx candidate = edests->elems[i];
1312 	  if (!re_node_set_contains (cur_nodes, candidate))
1313 	    continue;
1314           if (dest_node == REG_MISSING)
1315 	    dest_node = candidate;
1316 
1317 	  else
1318 	    {
1319 	      /* In order to avoid infinite loop like "(a*)*", return the second
1320 		 epsilon-transition if the first was already considered.  */
1321 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1322 		return candidate;
1323 
1324 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1325 	      else if (fs != NULL
1326 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1327 					   eps_via_nodes))
1328 		return REG_ERROR;
1329 
1330 	      /* We know we are going to exit.  */
1331 	      break;
1332 	    }
1333 	}
1334       return dest_node;
1335     }
1336   else
1337     {
1338       Idx naccepted = 0;
1339       re_token_type_t type = dfa->nodes[node].type;
1340 
1341 #ifdef RE_ENABLE_I18N
1342       if (dfa->nodes[node].accept_mb)
1343 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1344       else
1345 #endif /* RE_ENABLE_I18N */
1346       if (type == OP_BACK_REF)
1347 	{
1348 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1349 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1350 	  if (fs != NULL)
1351 	    {
1352 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1353 		return REG_MISSING;
1354 	      else if (naccepted)
1355 		{
1356 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1357 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1358 			      naccepted) != 0)
1359 		    return REG_MISSING;
1360 		}
1361 	    }
1362 
1363 	  if (naccepted == 0)
1364 	    {
1365 	      Idx dest_node;
1366 	      ok = re_node_set_insert (eps_via_nodes, node);
1367 	      if (BE (! ok, 0))
1368 		return REG_ERROR;
1369 	      dest_node = dfa->edests[node].elems[0];
1370 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1371 					dest_node))
1372 		return dest_node;
1373 	    }
1374 	}
1375 
1376       if (naccepted != 0
1377 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1378 	{
1379 	  Idx dest_node = dfa->nexts[node];
1380 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1381 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1382 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1383 					       dest_node)))
1384 	    return REG_MISSING;
1385 	  re_node_set_empty (eps_via_nodes);
1386 	  return dest_node;
1387 	}
1388     }
1389   return REG_MISSING;
1390 }
1391 
1392 static reg_errcode_t
1393 internal_function __attribute_warn_unused_result__
1394 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1395 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1396 {
1397   reg_errcode_t err;
1398   Idx num = fs->num++;
1399   if (fs->num == fs->alloc)
1400     {
1401       struct re_fail_stack_ent_t *new_array;
1402       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1403 				       * fs->alloc * 2));
1404       if (new_array == NULL)
1405 	return REG_ESPACE;
1406       fs->alloc *= 2;
1407       fs->stack = new_array;
1408     }
1409   fs->stack[num].idx = str_idx;
1410   fs->stack[num].node = dest_node;
1411   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1412   if (fs->stack[num].regs == NULL)
1413     return REG_ESPACE;
1414   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1415   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1416   return err;
1417 }
1418 
1419 static Idx
1420 internal_function
1421 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1422 		regmatch_t *regs, re_node_set *eps_via_nodes)
1423 {
1424   Idx num = --fs->num;
1425   assert (REG_VALID_INDEX (num));
1426   *pidx = fs->stack[num].idx;
1427   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1428   re_node_set_free (eps_via_nodes);
1429   re_free (fs->stack[num].regs);
1430   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1431   return fs->stack[num].node;
1432 }
1433 
1434 /* Set the positions where the subexpressions are starts/ends to registers
1435    PMATCH.
1436    Note: We assume that pmatch[0] is already set, and
1437    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1438 
1439 static reg_errcode_t
1440 internal_function __attribute_warn_unused_result__
1441 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1442 	  regmatch_t *pmatch, bool fl_backtrack)
1443 {
1444   const re_dfa_t *dfa = preg->buffer;
1445   Idx idx, cur_node;
1446   re_node_set eps_via_nodes;
1447   struct re_fail_stack_t *fs;
1448   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1449   regmatch_t *prev_idx_match;
1450   bool prev_idx_match_malloced = false;
1451 
1452 #ifdef DEBUG
1453   assert (nmatch > 1);
1454   assert (mctx->state_log != NULL);
1455 #endif
1456   if (fl_backtrack)
1457     {
1458       fs = &fs_body;
1459       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1460       if (fs->stack == NULL)
1461 	return REG_ESPACE;
1462     }
1463   else
1464     fs = NULL;
1465 
1466   cur_node = dfa->init_node;
1467   re_node_set_init_empty (&eps_via_nodes);
1468 
1469   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1470     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1471   else
1472     {
1473       prev_idx_match = re_malloc (regmatch_t, nmatch);
1474       if (prev_idx_match == NULL)
1475 	{
1476 	  free_fail_stack_return (fs);
1477 	  return REG_ESPACE;
1478 	}
1479       prev_idx_match_malloced = true;
1480     }
1481   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1482 
1483   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1484     {
1485       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1486 
1487       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1488 	{
1489 	  Idx reg_idx;
1490 	  if (fs)
1491 	    {
1492 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1493 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1494 		  break;
1495 	      if (reg_idx == nmatch)
1496 		{
1497 		  re_node_set_free (&eps_via_nodes);
1498 		  if (prev_idx_match_malloced)
1499 		    re_free (prev_idx_match);
1500 		  return free_fail_stack_return (fs);
1501 		}
1502 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1503 					 &eps_via_nodes);
1504 	    }
1505 	  else
1506 	    {
1507 	      re_node_set_free (&eps_via_nodes);
1508 	      if (prev_idx_match_malloced)
1509 		re_free (prev_idx_match);
1510 	      return REG_NOERROR;
1511 	    }
1512 	}
1513 
1514       /* Proceed to next node.  */
1515       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1516 				    &eps_via_nodes, fs);
1517 
1518       if (BE (! REG_VALID_INDEX (cur_node), 0))
1519 	{
1520 	  if (BE (cur_node == REG_ERROR, 0))
1521 	    {
1522 	      re_node_set_free (&eps_via_nodes);
1523 	      if (prev_idx_match_malloced)
1524 		re_free (prev_idx_match);
1525 	      free_fail_stack_return (fs);
1526 	      return REG_ESPACE;
1527 	    }
1528 	  if (fs)
1529 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1530 				       &eps_via_nodes);
1531 	  else
1532 	    {
1533 	      re_node_set_free (&eps_via_nodes);
1534 	      if (prev_idx_match_malloced)
1535 		re_free (prev_idx_match);
1536 	      return REG_NOMATCH;
1537 	    }
1538 	}
1539     }
1540   re_node_set_free (&eps_via_nodes);
1541   if (prev_idx_match_malloced)
1542     re_free (prev_idx_match);
1543   return free_fail_stack_return (fs);
1544 }
1545 
1546 static reg_errcode_t
1547 internal_function
1548 free_fail_stack_return (struct re_fail_stack_t *fs)
1549 {
1550   if (fs)
1551     {
1552       Idx fs_idx;
1553       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1554 	{
1555 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1556 	  re_free (fs->stack[fs_idx].regs);
1557 	}
1558       re_free (fs->stack);
1559     }
1560   return REG_NOERROR;
1561 }
1562 
1563 static void
1564 internal_function
1565 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1566 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1567 {
1568   int type = dfa->nodes[cur_node].type;
1569   if (type == OP_OPEN_SUBEXP)
1570     {
1571       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1572 
1573       /* We are at the first node of this sub expression.  */
1574       if (reg_num < nmatch)
1575 	{
1576 	  pmatch[reg_num].rm_so = cur_idx;
1577 	  pmatch[reg_num].rm_eo = -1;
1578 	}
1579     }
1580   else if (type == OP_CLOSE_SUBEXP)
1581     {
1582       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1583       if (reg_num < nmatch)
1584 	{
1585 	  /* We are at the last node of this sub expression.  */
1586 	  if (pmatch[reg_num].rm_so < cur_idx)
1587 	    {
1588 	      pmatch[reg_num].rm_eo = cur_idx;
1589 	      /* This is a non-empty match or we are not inside an optional
1590 		 subexpression.  Accept this right away.  */
1591 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1592 	    }
1593 	  else
1594 	    {
1595 	      if (dfa->nodes[cur_node].opt_subexp
1596 		  && prev_idx_match[reg_num].rm_so != -1)
1597 		/* We transited through an empty match for an optional
1598 		   subexpression, like (a?)*, and this is not the subexp's
1599 		   first match.  Copy back the old content of the registers
1600 		   so that matches of an inner subexpression are undone as
1601 		   well, like in ((a?))*.  */
1602 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1603 	      else
1604 		/* We completed a subexpression, but it may be part of
1605 		   an optional one, so do not update PREV_IDX_MATCH.  */
1606 		pmatch[reg_num].rm_eo = cur_idx;
1607 	    }
1608 	}
1609     }
1610 }
1611 
1612 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1613    and sift the nodes in each states according to the following rules.
1614    Updated state_log will be wrote to STATE_LOG.
1615 
1616    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1617      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1618 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1619 	the LAST_NODE, we throw away the node 'a'.
1620      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1621 	string 's' and transit to 'b':
1622 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1623 	   away the node 'a'.
1624 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1625 	    thrown away, we throw away the node 'a'.
1626      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1627 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1628 	   node 'a'.
1629 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1630 	    we throw away the node 'a'.  */
1631 
1632 #define STATE_NODE_CONTAINS(state,node) \
1633   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1634 
1635 static reg_errcode_t
1636 internal_function
1637 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1638 {
1639   reg_errcode_t err;
1640   int null_cnt = 0;
1641   Idx str_idx = sctx->last_str_idx;
1642   re_node_set cur_dest;
1643 
1644 #ifdef DEBUG
1645   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1646 #endif
1647 
1648   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1649      transit to the last_node and the last_node itself.  */
1650   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1651   if (BE (err != REG_NOERROR, 0))
1652     return err;
1653   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1654   if (BE (err != REG_NOERROR, 0))
1655     goto free_return;
1656 
1657   /* Then check each states in the state_log.  */
1658   while (str_idx > 0)
1659     {
1660       /* Update counters.  */
1661       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1662       if (null_cnt > mctx->max_mb_elem_len)
1663 	{
1664 	  memset (sctx->sifted_states, '\0',
1665 		  sizeof (re_dfastate_t *) * str_idx);
1666 	  re_node_set_free (&cur_dest);
1667 	  return REG_NOERROR;
1668 	}
1669       re_node_set_empty (&cur_dest);
1670       --str_idx;
1671 
1672       if (mctx->state_log[str_idx])
1673 	{
1674 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1675 	  if (BE (err != REG_NOERROR, 0))
1676 	    goto free_return;
1677 	}
1678 
1679       /* Add all the nodes which satisfy the following conditions:
1680 	 - It can epsilon transit to a node in CUR_DEST.
1681 	 - It is in CUR_SRC.
1682 	 And update state_log.  */
1683       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1684       if (BE (err != REG_NOERROR, 0))
1685 	goto free_return;
1686     }
1687   err = REG_NOERROR;
1688  free_return:
1689   re_node_set_free (&cur_dest);
1690   return err;
1691 }
1692 
1693 static reg_errcode_t
1694 internal_function __attribute_warn_unused_result__
1695 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1696 		     Idx str_idx, re_node_set *cur_dest)
1697 {
1698   const re_dfa_t *const dfa = mctx->dfa;
1699   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1700   Idx i;
1701 
1702   /* Then build the next sifted state.
1703      We build the next sifted state on 'cur_dest', and update
1704      'sifted_states[str_idx]' with 'cur_dest'.
1705      Note:
1706      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1707      'cur_src' points the node_set of the old 'state_log[str_idx]'
1708      (with the epsilon nodes pre-filtered out).  */
1709   for (i = 0; i < cur_src->nelem; i++)
1710     {
1711       Idx prev_node = cur_src->elems[i];
1712       int naccepted = 0;
1713       bool ok;
1714 
1715 #ifdef DEBUG
1716       re_token_type_t type = dfa->nodes[prev_node].type;
1717       assert (!IS_EPSILON_NODE (type));
1718 #endif
1719 #ifdef RE_ENABLE_I18N
1720       /* If the node may accept "multi byte".  */
1721       if (dfa->nodes[prev_node].accept_mb)
1722 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1723 					 str_idx, sctx->last_str_idx);
1724 #endif /* RE_ENABLE_I18N */
1725 
1726       /* We don't check backreferences here.
1727 	 See update_cur_sifted_state().  */
1728       if (!naccepted
1729 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1730 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1731 				  dfa->nexts[prev_node]))
1732 	naccepted = 1;
1733 
1734       if (naccepted == 0)
1735 	continue;
1736 
1737       if (sctx->limits.nelem)
1738 	{
1739 	  Idx to_idx = str_idx + naccepted;
1740 	  if (check_dst_limits (mctx, &sctx->limits,
1741 				dfa->nexts[prev_node], to_idx,
1742 				prev_node, str_idx))
1743 	    continue;
1744 	}
1745       ok = re_node_set_insert (cur_dest, prev_node);
1746       if (BE (! ok, 0))
1747 	return REG_ESPACE;
1748     }
1749 
1750   return REG_NOERROR;
1751 }
1752 
1753 /* Helper functions.  */
1754 
1755 static reg_errcode_t
1756 internal_function
1757 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1758 {
1759   Idx top = mctx->state_log_top;
1760 
1761   if ((next_state_log_idx >= mctx->input.bufs_len
1762        && mctx->input.bufs_len < mctx->input.len)
1763       || (next_state_log_idx >= mctx->input.valid_len
1764 	  && mctx->input.valid_len < mctx->input.len))
1765     {
1766       reg_errcode_t err;
1767       err = extend_buffers (mctx, next_state_log_idx + 1);
1768       if (BE (err != REG_NOERROR, 0))
1769 	return err;
1770     }
1771 
1772   if (top < next_state_log_idx)
1773     {
1774       memset (mctx->state_log + top + 1, '\0',
1775 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1776       mctx->state_log_top = next_state_log_idx;
1777     }
1778   return REG_NOERROR;
1779 }
1780 
1781 static reg_errcode_t
1782 internal_function
1783 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1784 		   re_dfastate_t **src, Idx num)
1785 {
1786   Idx st_idx;
1787   reg_errcode_t err;
1788   for (st_idx = 0; st_idx < num; ++st_idx)
1789     {
1790       if (dst[st_idx] == NULL)
1791 	dst[st_idx] = src[st_idx];
1792       else if (src[st_idx] != NULL)
1793 	{
1794 	  re_node_set merged_set;
1795 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1796 					&src[st_idx]->nodes);
1797 	  if (BE (err != REG_NOERROR, 0))
1798 	    return err;
1799 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1800 	  re_node_set_free (&merged_set);
1801 	  if (BE (err != REG_NOERROR, 0))
1802 	    return err;
1803 	}
1804     }
1805   return REG_NOERROR;
1806 }
1807 
1808 static reg_errcode_t
1809 internal_function
1810 update_cur_sifted_state (const re_match_context_t *mctx,
1811 			 re_sift_context_t *sctx, Idx str_idx,
1812 			 re_node_set *dest_nodes)
1813 {
1814   const re_dfa_t *const dfa = mctx->dfa;
1815   reg_errcode_t err = REG_NOERROR;
1816   const re_node_set *candidates;
1817   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1818 		: &mctx->state_log[str_idx]->nodes);
1819 
1820   if (dest_nodes->nelem == 0)
1821     sctx->sifted_states[str_idx] = NULL;
1822   else
1823     {
1824       if (candidates)
1825 	{
1826 	  /* At first, add the nodes which can epsilon transit to a node in
1827 	     DEST_NODE.  */
1828 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1829 	  if (BE (err != REG_NOERROR, 0))
1830 	    return err;
1831 
1832 	  /* Then, check the limitations in the current sift_context.  */
1833 	  if (sctx->limits.nelem)
1834 	    {
1835 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1836 					 mctx->bkref_ents, str_idx);
1837 	      if (BE (err != REG_NOERROR, 0))
1838 		return err;
1839 	    }
1840 	}
1841 
1842       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1843       if (BE (err != REG_NOERROR, 0))
1844 	return err;
1845     }
1846 
1847   if (candidates && mctx->state_log[str_idx]->has_backref)
1848     {
1849       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1850       if (BE (err != REG_NOERROR, 0))
1851 	return err;
1852     }
1853   return REG_NOERROR;
1854 }
1855 
1856 static reg_errcode_t
1857 internal_function __attribute_warn_unused_result__
1858 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1859 		       const re_node_set *candidates)
1860 {
1861   reg_errcode_t err = REG_NOERROR;
1862   Idx i;
1863 
1864   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1865   if (BE (err != REG_NOERROR, 0))
1866     return err;
1867 
1868   if (!state->inveclosure.alloc)
1869     {
1870       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1871       if (BE (err != REG_NOERROR, 0))
1872 	return REG_ESPACE;
1873       for (i = 0; i < dest_nodes->nelem; i++)
1874 	{
1875 	  err = re_node_set_merge (&state->inveclosure,
1876 				   dfa->inveclosures + dest_nodes->elems[i]);
1877 	  if (BE (err != REG_NOERROR, 0))
1878 	    return REG_ESPACE;
1879 	}
1880     }
1881   return re_node_set_add_intersect (dest_nodes, candidates,
1882 				    &state->inveclosure);
1883 }
1884 
1885 static reg_errcode_t
1886 internal_function
1887 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1888 		       const re_node_set *candidates)
1889 {
1890     Idx ecl_idx;
1891     reg_errcode_t err;
1892     re_node_set *inv_eclosure = dfa->inveclosures + node;
1893     re_node_set except_nodes;
1894     re_node_set_init_empty (&except_nodes);
1895     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1896       {
1897 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1898 	if (cur_node == node)
1899 	  continue;
1900 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1901 	  {
1902 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1903 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1904 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1905 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1906 		 && re_node_set_contains (dest_nodes, edst1))
1907 		|| (REG_VALID_NONZERO_INDEX (edst2)
1908 		    && !re_node_set_contains (inv_eclosure, edst2)
1909 		    && re_node_set_contains (dest_nodes, edst2)))
1910 	      {
1911 		err = re_node_set_add_intersect (&except_nodes, candidates,
1912 						 dfa->inveclosures + cur_node);
1913 		if (BE (err != REG_NOERROR, 0))
1914 		  {
1915 		    re_node_set_free (&except_nodes);
1916 		    return err;
1917 		  }
1918 	      }
1919 	  }
1920       }
1921     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1922       {
1923 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1924 	if (!re_node_set_contains (&except_nodes, cur_node))
1925 	  {
1926 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1927 	    re_node_set_remove_at (dest_nodes, idx);
1928 	  }
1929       }
1930     re_node_set_free (&except_nodes);
1931     return REG_NOERROR;
1932 }
1933 
1934 static bool
1935 internal_function
1936 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1937 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1938 {
1939   const re_dfa_t *const dfa = mctx->dfa;
1940   Idx lim_idx, src_pos, dst_pos;
1941 
1942   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1943   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1944   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1945     {
1946       Idx subexp_idx;
1947       struct re_backref_cache_entry *ent;
1948       ent = mctx->bkref_ents + limits->elems[lim_idx];
1949       subexp_idx = dfa->nodes[ent->node].opr.idx;
1950 
1951       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1952 					   subexp_idx, dst_node, dst_idx,
1953 					   dst_bkref_idx);
1954       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1955 					   subexp_idx, src_node, src_idx,
1956 					   src_bkref_idx);
1957 
1958       /* In case of:
1959 	 <src> <dst> ( <subexp> )
1960 	 ( <subexp> ) <src> <dst>
1961 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1962       if (src_pos == dst_pos)
1963 	continue; /* This is unrelated limitation.  */
1964       else
1965 	return true;
1966     }
1967   return false;
1968 }
1969 
1970 static int
1971 internal_function
1972 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1973 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1974 {
1975   const re_dfa_t *const dfa = mctx->dfa;
1976   const re_node_set *eclosures = dfa->eclosures + from_node;
1977   Idx node_idx;
1978 
1979   /* Else, we are on the boundary: examine the nodes on the epsilon
1980      closure.  */
1981   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1982     {
1983       Idx node = eclosures->elems[node_idx];
1984       switch (dfa->nodes[node].type)
1985 	{
1986 	case OP_BACK_REF:
1987 	  if (bkref_idx != REG_MISSING)
1988 	    {
1989 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1990 	      do
1991 		{
1992 		  Idx dst;
1993 		  int cpos;
1994 
1995 		  if (ent->node != node)
1996 		    continue;
1997 
1998 		  if (subexp_idx < BITSET_WORD_BITS
1999 		      && !(ent->eps_reachable_subexps_map
2000 			   & ((bitset_word_t) 1 << subexp_idx)))
2001 		    continue;
2002 
2003 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
2004 		     OP_CLOSE_SUBEXP cases below.  But, if the
2005 		     destination node is the same node as the source
2006 		     node, don't recurse because it would cause an
2007 		     infinite loop: a regex that exhibits this behavior
2008 		     is ()\1*\1*  */
2009 		  dst = dfa->edests[node].elems[0];
2010 		  if (dst == from_node)
2011 		    {
2012 		      if (boundaries & 1)
2013 			return -1;
2014 		      else /* if (boundaries & 2) */
2015 			return 0;
2016 		    }
2017 
2018 		  cpos =
2019 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2020 						 dst, bkref_idx);
2021 		  if (cpos == -1 /* && (boundaries & 1) */)
2022 		    return -1;
2023 		  if (cpos == 0 && (boundaries & 2))
2024 		    return 0;
2025 
2026 		  if (subexp_idx < BITSET_WORD_BITS)
2027 		    ent->eps_reachable_subexps_map
2028 		      &= ~((bitset_word_t) 1 << subexp_idx);
2029 		}
2030 	      while (ent++->more);
2031 	    }
2032 	  break;
2033 
2034 	case OP_OPEN_SUBEXP:
2035 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2036 	    return -1;
2037 	  break;
2038 
2039 	case OP_CLOSE_SUBEXP:
2040 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2041 	    return 0;
2042 	  break;
2043 
2044 	default:
2045 	    break;
2046 	}
2047     }
2048 
2049   return (boundaries & 2) ? 1 : 0;
2050 }
2051 
2052 static int
2053 internal_function
2054 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2055 			   Idx subexp_idx, Idx from_node, Idx str_idx,
2056 			   Idx bkref_idx)
2057 {
2058   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2059   int boundaries;
2060 
2061   /* If we are outside the range of the subexpression, return -1 or 1.  */
2062   if (str_idx < lim->subexp_from)
2063     return -1;
2064 
2065   if (lim->subexp_to < str_idx)
2066     return 1;
2067 
2068   /* If we are within the subexpression, return 0.  */
2069   boundaries = (str_idx == lim->subexp_from);
2070   boundaries |= (str_idx == lim->subexp_to) << 1;
2071   if (boundaries == 0)
2072     return 0;
2073 
2074   /* Else, examine epsilon closure.  */
2075   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2076 				      from_node, bkref_idx);
2077 }
2078 
2079 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2080    which are against limitations from DEST_NODES. */
2081 
2082 static reg_errcode_t
2083 internal_function
2084 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2085 		     const re_node_set *candidates, re_node_set *limits,
2086 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2087 {
2088   reg_errcode_t err;
2089   Idx node_idx, lim_idx;
2090 
2091   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2092     {
2093       Idx subexp_idx;
2094       struct re_backref_cache_entry *ent;
2095       ent = bkref_ents + limits->elems[lim_idx];
2096 
2097       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2098 	continue; /* This is unrelated limitation.  */
2099 
2100       subexp_idx = dfa->nodes[ent->node].opr.idx;
2101       if (ent->subexp_to == str_idx)
2102 	{
2103 	  Idx ops_node = REG_MISSING;
2104 	  Idx cls_node = REG_MISSING;
2105 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2106 	    {
2107 	      Idx node = dest_nodes->elems[node_idx];
2108 	      re_token_type_t type = dfa->nodes[node].type;
2109 	      if (type == OP_OPEN_SUBEXP
2110 		  && subexp_idx == dfa->nodes[node].opr.idx)
2111 		ops_node = node;
2112 	      else if (type == OP_CLOSE_SUBEXP
2113 		       && subexp_idx == dfa->nodes[node].opr.idx)
2114 		cls_node = node;
2115 	    }
2116 
2117 	  /* Check the limitation of the open subexpression.  */
2118 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2119 	  if (REG_VALID_INDEX (ops_node))
2120 	    {
2121 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2122 					   candidates);
2123 	      if (BE (err != REG_NOERROR, 0))
2124 		return err;
2125 	    }
2126 
2127 	  /* Check the limitation of the close subexpression.  */
2128 	  if (REG_VALID_INDEX (cls_node))
2129 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2130 	      {
2131 		Idx node = dest_nodes->elems[node_idx];
2132 		if (!re_node_set_contains (dfa->inveclosures + node,
2133 					   cls_node)
2134 		    && !re_node_set_contains (dfa->eclosures + node,
2135 					      cls_node))
2136 		  {
2137 		    /* It is against this limitation.
2138 		       Remove it form the current sifted state.  */
2139 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2140 						 candidates);
2141 		    if (BE (err != REG_NOERROR, 0))
2142 		      return err;
2143 		    --node_idx;
2144 		  }
2145 	      }
2146 	}
2147       else /* (ent->subexp_to != str_idx)  */
2148 	{
2149 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2150 	    {
2151 	      Idx node = dest_nodes->elems[node_idx];
2152 	      re_token_type_t type = dfa->nodes[node].type;
2153 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2154 		{
2155 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2156 		    continue;
2157 		  /* It is against this limitation.
2158 		     Remove it form the current sifted state.  */
2159 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2160 					       candidates);
2161 		  if (BE (err != REG_NOERROR, 0))
2162 		    return err;
2163 		}
2164 	    }
2165 	}
2166     }
2167   return REG_NOERROR;
2168 }
2169 
2170 static reg_errcode_t
2171 internal_function __attribute_warn_unused_result__
2172 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2173 		   Idx str_idx, const re_node_set *candidates)
2174 {
2175   const re_dfa_t *const dfa = mctx->dfa;
2176   reg_errcode_t err;
2177   Idx node_idx, node;
2178   re_sift_context_t local_sctx;
2179   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2180 
2181   if (first_idx == REG_MISSING)
2182     return REG_NOERROR;
2183 
2184   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2185 
2186   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2187     {
2188       Idx enabled_idx;
2189       re_token_type_t type;
2190       struct re_backref_cache_entry *entry;
2191       node = candidates->elems[node_idx];
2192       type = dfa->nodes[node].type;
2193       /* Avoid infinite loop for the REs like "()\1+".  */
2194       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2195 	continue;
2196       if (type != OP_BACK_REF)
2197 	continue;
2198 
2199       entry = mctx->bkref_ents + first_idx;
2200       enabled_idx = first_idx;
2201       do
2202 	{
2203 	  Idx subexp_len;
2204 	  Idx to_idx;
2205 	  Idx dst_node;
2206 	  bool ok;
2207 	  re_dfastate_t *cur_state;
2208 
2209 	  if (entry->node != node)
2210 	    continue;
2211 	  subexp_len = entry->subexp_to - entry->subexp_from;
2212 	  to_idx = str_idx + subexp_len;
2213 	  dst_node = (subexp_len ? dfa->nexts[node]
2214 		      : dfa->edests[node].elems[0]);
2215 
2216 	  if (to_idx > sctx->last_str_idx
2217 	      || sctx->sifted_states[to_idx] == NULL
2218 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2219 	      || check_dst_limits (mctx, &sctx->limits, node,
2220 				   str_idx, dst_node, to_idx))
2221 	    continue;
2222 
2223 	  if (local_sctx.sifted_states == NULL)
2224 	    {
2225 	      local_sctx = *sctx;
2226 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2227 	      if (BE (err != REG_NOERROR, 0))
2228 		goto free_return;
2229 	    }
2230 	  local_sctx.last_node = node;
2231 	  local_sctx.last_str_idx = str_idx;
2232 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2233 	  if (BE (! ok, 0))
2234 	    {
2235 	      err = REG_ESPACE;
2236 	      goto free_return;
2237 	    }
2238 	  cur_state = local_sctx.sifted_states[str_idx];
2239 	  err = sift_states_backward (mctx, &local_sctx);
2240 	  if (BE (err != REG_NOERROR, 0))
2241 	    goto free_return;
2242 	  if (sctx->limited_states != NULL)
2243 	    {
2244 	      err = merge_state_array (dfa, sctx->limited_states,
2245 				       local_sctx.sifted_states,
2246 				       str_idx + 1);
2247 	      if (BE (err != REG_NOERROR, 0))
2248 		goto free_return;
2249 	    }
2250 	  local_sctx.sifted_states[str_idx] = cur_state;
2251 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2252 
2253 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2254 	  entry = mctx->bkref_ents + enabled_idx;
2255 	}
2256       while (enabled_idx++, entry++->more);
2257     }
2258   err = REG_NOERROR;
2259  free_return:
2260   if (local_sctx.sifted_states != NULL)
2261     {
2262       re_node_set_free (&local_sctx.limits);
2263     }
2264 
2265   return err;
2266 }
2267 
2268 
2269 #ifdef RE_ENABLE_I18N
2270 static int
2271 internal_function
2272 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2273 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2274 {
2275   const re_dfa_t *const dfa = mctx->dfa;
2276   int naccepted;
2277   /* Check the node can accept "multi byte".  */
2278   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2279   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2280       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2281 			    dfa->nexts[node_idx]))
2282     /* The node can't accept the "multi byte", or the
2283        destination was already thrown away, then the node
2284        could't accept the current input "multi byte".   */
2285     naccepted = 0;
2286   /* Otherwise, it is sure that the node could accept
2287      'naccepted' bytes input.  */
2288   return naccepted;
2289 }
2290 #endif /* RE_ENABLE_I18N */
2291 
2292 
2293 /* Functions for state transition.  */
2294 
2295 /* Return the next state to which the current state STATE will transit by
2296    accepting the current input byte, and update STATE_LOG if necessary.
2297    If STATE can accept a multibyte char/collating element/back reference
2298    update the destination of STATE_LOG.  */
2299 
2300 static re_dfastate_t *
2301 internal_function __attribute_warn_unused_result__
2302 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2303 	       re_dfastate_t *state)
2304 {
2305   re_dfastate_t **trtable;
2306   unsigned char ch;
2307 
2308 #ifdef RE_ENABLE_I18N
2309   /* If the current state can accept multibyte.  */
2310   if (BE (state->accept_mb, 0))
2311     {
2312       *err = transit_state_mb (mctx, state);
2313       if (BE (*err != REG_NOERROR, 0))
2314 	return NULL;
2315     }
2316 #endif /* RE_ENABLE_I18N */
2317 
2318   /* Then decide the next state with the single byte.  */
2319 #if 0
2320   if (0)
2321     /* don't use transition table  */
2322     return transit_state_sb (err, mctx, state);
2323 #endif
2324 
2325   /* Use transition table  */
2326   ch = re_string_fetch_byte (&mctx->input);
2327   for (;;)
2328     {
2329       trtable = state->trtable;
2330       if (BE (trtable != NULL, 1))
2331 	return trtable[ch];
2332 
2333       trtable = state->word_trtable;
2334       if (BE (trtable != NULL, 1))
2335 	{
2336 	  unsigned int context;
2337 	  context
2338 	    = re_string_context_at (&mctx->input,
2339 				    re_string_cur_idx (&mctx->input) - 1,
2340 				    mctx->eflags);
2341 	  if (IS_WORD_CONTEXT (context))
2342 	    return trtable[ch + SBC_MAX];
2343 	  else
2344 	    return trtable[ch];
2345 	}
2346 
2347       if (!build_trtable (mctx->dfa, state))
2348 	{
2349 	  *err = REG_ESPACE;
2350 	  return NULL;
2351 	}
2352 
2353       /* Retry, we now have a transition table.  */
2354     }
2355 }
2356 
2357 /* Update the state_log if we need */
2358 static re_dfastate_t *
2359 internal_function
2360 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2361 		      re_dfastate_t *next_state)
2362 {
2363   const re_dfa_t *const dfa = mctx->dfa;
2364   Idx cur_idx = re_string_cur_idx (&mctx->input);
2365 
2366   if (cur_idx > mctx->state_log_top)
2367     {
2368       mctx->state_log[cur_idx] = next_state;
2369       mctx->state_log_top = cur_idx;
2370     }
2371   else if (mctx->state_log[cur_idx] == 0)
2372     {
2373       mctx->state_log[cur_idx] = next_state;
2374     }
2375   else
2376     {
2377       re_dfastate_t *pstate;
2378       unsigned int context;
2379       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2380       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2381 	 the destination of a multibyte char/collating element/
2382 	 back reference.  Then the next state is the union set of
2383 	 these destinations and the results of the transition table.  */
2384       pstate = mctx->state_log[cur_idx];
2385       log_nodes = pstate->entrance_nodes;
2386       if (next_state != NULL)
2387 	{
2388 	  table_nodes = next_state->entrance_nodes;
2389 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2390 					     log_nodes);
2391 	  if (BE (*err != REG_NOERROR, 0))
2392 	    return NULL;
2393 	}
2394       else
2395 	next_nodes = *log_nodes;
2396       /* Note: We already add the nodes of the initial state,
2397 	 then we don't need to add them here.  */
2398 
2399       context = re_string_context_at (&mctx->input,
2400 				      re_string_cur_idx (&mctx->input) - 1,
2401 				      mctx->eflags);
2402       next_state = mctx->state_log[cur_idx]
2403 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2404       /* We don't need to check errors here, since the return value of
2405 	 this function is next_state and ERR is already set.  */
2406 
2407       if (table_nodes != NULL)
2408 	re_node_set_free (&next_nodes);
2409     }
2410 
2411   if (BE (dfa->nbackref, 0) && next_state != NULL)
2412     {
2413       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2414 	 later.  We must check them here, since the back references in the
2415 	 next state might use them.  */
2416       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2417 					cur_idx);
2418       if (BE (*err != REG_NOERROR, 0))
2419 	return NULL;
2420 
2421       /* If the next state has back references.  */
2422       if (next_state->has_backref)
2423 	{
2424 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2425 	  if (BE (*err != REG_NOERROR, 0))
2426 	    return NULL;
2427 	  next_state = mctx->state_log[cur_idx];
2428 	}
2429     }
2430 
2431   return next_state;
2432 }
2433 
2434 /* Skip bytes in the input that correspond to part of a
2435    multi-byte match, then look in the log for a state
2436    from which to restart matching.  */
2437 static re_dfastate_t *
2438 internal_function
2439 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2440 {
2441   re_dfastate_t *cur_state;
2442   do
2443     {
2444       Idx max = mctx->state_log_top;
2445       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2446 
2447       do
2448 	{
2449 	  if (++cur_str_idx > max)
2450 	    return NULL;
2451 	  re_string_skip_bytes (&mctx->input, 1);
2452 	}
2453       while (mctx->state_log[cur_str_idx] == NULL);
2454 
2455       cur_state = merge_state_with_log (err, mctx, NULL);
2456     }
2457   while (*err == REG_NOERROR && cur_state == NULL);
2458   return cur_state;
2459 }
2460 
2461 /* Helper functions for transit_state.  */
2462 
2463 /* From the node set CUR_NODES, pick up the nodes whose types are
2464    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2465    expression. And register them to use them later for evaluating the
2466    corresponding back references.  */
2467 
2468 static reg_errcode_t
2469 internal_function
2470 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2471 			   Idx str_idx)
2472 {
2473   const re_dfa_t *const dfa = mctx->dfa;
2474   Idx node_idx;
2475   reg_errcode_t err;
2476 
2477   /* TODO: This isn't efficient.
2478 	   Because there might be more than one nodes whose types are
2479 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2480 	   nodes.
2481 	   E.g. RE: (a){2}  */
2482   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2483     {
2484       Idx node = cur_nodes->elems[node_idx];
2485       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2486 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2487 	  && (dfa->used_bkref_map
2488 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2489 	{
2490 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2491 	  if (BE (err != REG_NOERROR, 0))
2492 	    return err;
2493 	}
2494     }
2495   return REG_NOERROR;
2496 }
2497 
2498 #if 0
2499 /* Return the next state to which the current state STATE will transit by
2500    accepting the current input byte.  */
2501 
2502 static re_dfastate_t *
2503 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2504 		  re_dfastate_t *state)
2505 {
2506   const re_dfa_t *const dfa = mctx->dfa;
2507   re_node_set next_nodes;
2508   re_dfastate_t *next_state;
2509   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2510   unsigned int context;
2511 
2512   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2513   if (BE (*err != REG_NOERROR, 0))
2514     return NULL;
2515   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2516     {
2517       Idx cur_node = state->nodes.elems[node_cnt];
2518       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2519 	{
2520 	  *err = re_node_set_merge (&next_nodes,
2521 				    dfa->eclosures + dfa->nexts[cur_node]);
2522 	  if (BE (*err != REG_NOERROR, 0))
2523 	    {
2524 	      re_node_set_free (&next_nodes);
2525 	      return NULL;
2526 	    }
2527 	}
2528     }
2529   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2530   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2531   /* We don't need to check errors here, since the return value of
2532      this function is next_state and ERR is already set.  */
2533 
2534   re_node_set_free (&next_nodes);
2535   re_string_skip_bytes (&mctx->input, 1);
2536   return next_state;
2537 }
2538 #endif
2539 
2540 #ifdef RE_ENABLE_I18N
2541 static reg_errcode_t
2542 internal_function
2543 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2544 {
2545   const re_dfa_t *const dfa = mctx->dfa;
2546   reg_errcode_t err;
2547   Idx i;
2548 
2549   for (i = 0; i < pstate->nodes.nelem; ++i)
2550     {
2551       re_node_set dest_nodes, *new_nodes;
2552       Idx cur_node_idx = pstate->nodes.elems[i];
2553       int naccepted;
2554       Idx dest_idx;
2555       unsigned int context;
2556       re_dfastate_t *dest_state;
2557 
2558       if (!dfa->nodes[cur_node_idx].accept_mb)
2559 	continue;
2560 
2561       if (dfa->nodes[cur_node_idx].constraint)
2562 	{
2563 	  context = re_string_context_at (&mctx->input,
2564 					  re_string_cur_idx (&mctx->input),
2565 					  mctx->eflags);
2566 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2567 					   context))
2568 	    continue;
2569 	}
2570 
2571       /* How many bytes the node can accept?  */
2572       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2573 					   re_string_cur_idx (&mctx->input));
2574       if (naccepted == 0)
2575 	continue;
2576 
2577       /* The node can accepts 'naccepted' bytes.  */
2578       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2579       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2580 			       : mctx->max_mb_elem_len);
2581       err = clean_state_log_if_needed (mctx, dest_idx);
2582       if (BE (err != REG_NOERROR, 0))
2583 	return err;
2584 #ifdef DEBUG
2585       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2586 #endif
2587       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2588 
2589       dest_state = mctx->state_log[dest_idx];
2590       if (dest_state == NULL)
2591 	dest_nodes = *new_nodes;
2592       else
2593 	{
2594 	  err = re_node_set_init_union (&dest_nodes,
2595 					dest_state->entrance_nodes, new_nodes);
2596 	  if (BE (err != REG_NOERROR, 0))
2597 	    return err;
2598 	}
2599       context = re_string_context_at (&mctx->input, dest_idx - 1,
2600 				      mctx->eflags);
2601       mctx->state_log[dest_idx]
2602 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2603       if (dest_state != NULL)
2604 	re_node_set_free (&dest_nodes);
2605       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2606 	return err;
2607     }
2608   return REG_NOERROR;
2609 }
2610 #endif /* RE_ENABLE_I18N */
2611 
2612 static reg_errcode_t
2613 internal_function
2614 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2615 {
2616   const re_dfa_t *const dfa = mctx->dfa;
2617   reg_errcode_t err;
2618   Idx i;
2619   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2620 
2621   for (i = 0; i < nodes->nelem; ++i)
2622     {
2623       Idx dest_str_idx, prev_nelem, bkc_idx;
2624       Idx node_idx = nodes->elems[i];
2625       unsigned int context;
2626       const re_token_t *node = dfa->nodes + node_idx;
2627       re_node_set *new_dest_nodes;
2628 
2629       /* Check whether 'node' is a backreference or not.  */
2630       if (node->type != OP_BACK_REF)
2631 	continue;
2632 
2633       if (node->constraint)
2634 	{
2635 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2636 					  mctx->eflags);
2637 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2638 	    continue;
2639 	}
2640 
2641       /* 'node' is a backreference.
2642 	 Check the substring which the substring matched.  */
2643       bkc_idx = mctx->nbkref_ents;
2644       err = get_subexp (mctx, node_idx, cur_str_idx);
2645       if (BE (err != REG_NOERROR, 0))
2646 	goto free_return;
2647 
2648       /* And add the epsilon closures (which is 'new_dest_nodes') of
2649 	 the backreference to appropriate state_log.  */
2650 #ifdef DEBUG
2651       assert (dfa->nexts[node_idx] != REG_MISSING);
2652 #endif
2653       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2654 	{
2655 	  Idx subexp_len;
2656 	  re_dfastate_t *dest_state;
2657 	  struct re_backref_cache_entry *bkref_ent;
2658 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2659 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2660 	    continue;
2661 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2662 	  new_dest_nodes = (subexp_len == 0
2663 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2664 			    : dfa->eclosures + dfa->nexts[node_idx]);
2665 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2666 			  - bkref_ent->subexp_from);
2667 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2668 					  mctx->eflags);
2669 	  dest_state = mctx->state_log[dest_str_idx];
2670 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2671 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2672 	  /* Add 'new_dest_node' to state_log.  */
2673 	  if (dest_state == NULL)
2674 	    {
2675 	      mctx->state_log[dest_str_idx]
2676 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2677 					    context);
2678 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2679 		      && err != REG_NOERROR, 0))
2680 		goto free_return;
2681 	    }
2682 	  else
2683 	    {
2684 	      re_node_set dest_nodes;
2685 	      err = re_node_set_init_union (&dest_nodes,
2686 					    dest_state->entrance_nodes,
2687 					    new_dest_nodes);
2688 	      if (BE (err != REG_NOERROR, 0))
2689 		{
2690 		  re_node_set_free (&dest_nodes);
2691 		  goto free_return;
2692 		}
2693 	      mctx->state_log[dest_str_idx]
2694 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2695 	      re_node_set_free (&dest_nodes);
2696 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2697 		      && err != REG_NOERROR, 0))
2698 		goto free_return;
2699 	    }
2700 	  /* We need to check recursively if the backreference can epsilon
2701 	     transit.  */
2702 	  if (subexp_len == 0
2703 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2704 	    {
2705 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2706 					       cur_str_idx);
2707 	      if (BE (err != REG_NOERROR, 0))
2708 		goto free_return;
2709 	      err = transit_state_bkref (mctx, new_dest_nodes);
2710 	      if (BE (err != REG_NOERROR, 0))
2711 		goto free_return;
2712 	    }
2713 	}
2714     }
2715   err = REG_NOERROR;
2716  free_return:
2717   return err;
2718 }
2719 
2720 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2721    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2722    Note that we might collect inappropriate candidates here.
2723    However, the cost of checking them strictly here is too high, then we
2724    delay these checking for prune_impossible_nodes().  */
2725 
2726 static reg_errcode_t
2727 internal_function __attribute_warn_unused_result__
2728 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2729 {
2730   const re_dfa_t *const dfa = mctx->dfa;
2731   Idx subexp_num, sub_top_idx;
2732   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2733   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2734   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2735   if (cache_idx != REG_MISSING)
2736     {
2737       const struct re_backref_cache_entry *entry
2738 	= mctx->bkref_ents + cache_idx;
2739       do
2740 	if (entry->node == bkref_node)
2741 	  return REG_NOERROR; /* We already checked it.  */
2742       while (entry++->more);
2743     }
2744 
2745   subexp_num = dfa->nodes[bkref_node].opr.idx;
2746 
2747   /* For each sub expression  */
2748   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2749     {
2750       reg_errcode_t err;
2751       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2752       re_sub_match_last_t *sub_last;
2753       Idx sub_last_idx, sl_str, bkref_str_off;
2754 
2755       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2756 	continue; /* It isn't related.  */
2757 
2758       sl_str = sub_top->str_idx;
2759       bkref_str_off = bkref_str_idx;
2760       /* At first, check the last node of sub expressions we already
2761 	 evaluated.  */
2762       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2763 	{
2764 	  regoff_t sl_str_diff;
2765 	  sub_last = sub_top->lasts[sub_last_idx];
2766 	  sl_str_diff = sub_last->str_idx - sl_str;
2767 	  /* The matched string by the sub expression match with the substring
2768 	     at the back reference?  */
2769 	  if (sl_str_diff > 0)
2770 	    {
2771 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2772 		{
2773 		  /* Not enough chars for a successful match.  */
2774 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2775 		    break;
2776 
2777 		  err = clean_state_log_if_needed (mctx,
2778 						   bkref_str_off
2779 						   + sl_str_diff);
2780 		  if (BE (err != REG_NOERROR, 0))
2781 		    return err;
2782 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2783 		}
2784 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2785 		/* We don't need to search this sub expression any more.  */
2786 		break;
2787 	    }
2788 	  bkref_str_off += sl_str_diff;
2789 	  sl_str += sl_str_diff;
2790 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2791 				bkref_str_idx);
2792 
2793 	  /* Reload buf, since the preceding call might have reallocated
2794 	     the buffer.  */
2795 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2796 
2797 	  if (err == REG_NOMATCH)
2798 	    continue;
2799 	  if (BE (err != REG_NOERROR, 0))
2800 	    return err;
2801 	}
2802 
2803       if (sub_last_idx < sub_top->nlasts)
2804 	continue;
2805       if (sub_last_idx > 0)
2806 	++sl_str;
2807       /* Then, search for the other last nodes of the sub expression.  */
2808       for (; sl_str <= bkref_str_idx; ++sl_str)
2809 	{
2810 	  Idx cls_node;
2811 	  regoff_t sl_str_off;
2812 	  const re_node_set *nodes;
2813 	  sl_str_off = sl_str - sub_top->str_idx;
2814 	  /* The matched string by the sub expression match with the substring
2815 	     at the back reference?  */
2816 	  if (sl_str_off > 0)
2817 	    {
2818 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2819 		{
2820 		  /* If we are at the end of the input, we cannot match.  */
2821 		  if (bkref_str_off >= mctx->input.len)
2822 		    break;
2823 
2824 		  err = extend_buffers (mctx, bkref_str_off + 1);
2825 		  if (BE (err != REG_NOERROR, 0))
2826 		    return err;
2827 
2828 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2829 		}
2830 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2831 		break; /* We don't need to search this sub expression
2832 			  any more.  */
2833 	    }
2834 	  if (mctx->state_log[sl_str] == NULL)
2835 	    continue;
2836 	  /* Does this state have a ')' of the sub expression?  */
2837 	  nodes = &mctx->state_log[sl_str]->nodes;
2838 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2839 				       OP_CLOSE_SUBEXP);
2840 	  if (cls_node == REG_MISSING)
2841 	    continue; /* No.  */
2842 	  if (sub_top->path == NULL)
2843 	    {
2844 	      sub_top->path = calloc (sizeof (state_array_t),
2845 				      sl_str - sub_top->str_idx + 1);
2846 	      if (sub_top->path == NULL)
2847 		return REG_ESPACE;
2848 	    }
2849 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2850 	     in the current context?  */
2851 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2852 			       sub_top->str_idx, cls_node, sl_str,
2853 			       OP_CLOSE_SUBEXP);
2854 	  if (err == REG_NOMATCH)
2855 	      continue;
2856 	  if (BE (err != REG_NOERROR, 0))
2857 	      return err;
2858 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2859 	  if (BE (sub_last == NULL, 0))
2860 	    return REG_ESPACE;
2861 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2862 				bkref_str_idx);
2863 	  if (err == REG_NOMATCH)
2864 	    continue;
2865 	}
2866     }
2867   return REG_NOERROR;
2868 }
2869 
2870 /* Helper functions for get_subexp().  */
2871 
2872 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2873    If it can arrive, register the sub expression expressed with SUB_TOP
2874    and SUB_LAST.  */
2875 
2876 static reg_errcode_t
2877 internal_function
2878 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2879 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2880 {
2881   reg_errcode_t err;
2882   Idx to_idx;
2883   /* Can the subexpression arrive the back reference?  */
2884   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2885 		       sub_last->str_idx, bkref_node, bkref_str,
2886 		       OP_OPEN_SUBEXP);
2887   if (err != REG_NOERROR)
2888     return err;
2889   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2890 			     sub_last->str_idx);
2891   if (BE (err != REG_NOERROR, 0))
2892     return err;
2893   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2894   return clean_state_log_if_needed (mctx, to_idx);
2895 }
2896 
2897 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2898    Search '(' if FL_OPEN, or search ')' otherwise.
2899    TODO: This function isn't efficient...
2900 	 Because there might be more than one nodes whose types are
2901 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2902 	 nodes.
2903 	 E.g. RE: (a){2}  */
2904 
2905 static Idx
2906 internal_function
2907 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2908 		  Idx subexp_idx, int type)
2909 {
2910   Idx cls_idx;
2911   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2912     {
2913       Idx cls_node = nodes->elems[cls_idx];
2914       const re_token_t *node = dfa->nodes + cls_node;
2915       if (node->type == type
2916 	  && node->opr.idx == subexp_idx)
2917 	return cls_node;
2918     }
2919   return REG_MISSING;
2920 }
2921 
2922 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2923    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2924    heavily reused.
2925    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2926 
2927 static reg_errcode_t
2928 internal_function __attribute_warn_unused_result__
2929 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2930 	       Idx top_str, Idx last_node, Idx last_str, int type)
2931 {
2932   const re_dfa_t *const dfa = mctx->dfa;
2933   reg_errcode_t err = REG_NOERROR;
2934   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2935   re_dfastate_t *cur_state = NULL;
2936   re_node_set *cur_nodes, next_nodes;
2937   re_dfastate_t **backup_state_log;
2938   unsigned int context;
2939 
2940   subexp_num = dfa->nodes[top_node].opr.idx;
2941   /* Extend the buffer if we need.  */
2942   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2943     {
2944       re_dfastate_t **new_array;
2945       Idx old_alloc = path->alloc;
2946       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2947       Idx new_alloc;
2948       if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2949 	return REG_ESPACE;
2950       new_alloc = old_alloc + incr_alloc;
2951       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2952 	return REG_ESPACE;
2953       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2954       if (BE (new_array == NULL, 0))
2955 	return REG_ESPACE;
2956       path->array = new_array;
2957       path->alloc = new_alloc;
2958       memset (new_array + old_alloc, '\0',
2959 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2960     }
2961 
2962   str_idx = path->next_idx ? path->next_idx : top_str;
2963 
2964   /* Temporary modify MCTX.  */
2965   backup_state_log = mctx->state_log;
2966   backup_cur_idx = mctx->input.cur_idx;
2967   mctx->state_log = path->array;
2968   mctx->input.cur_idx = str_idx;
2969 
2970   /* Setup initial node set.  */
2971   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2972   if (str_idx == top_str)
2973     {
2974       err = re_node_set_init_1 (&next_nodes, top_node);
2975       if (BE (err != REG_NOERROR, 0))
2976 	return err;
2977       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2978       if (BE (err != REG_NOERROR, 0))
2979 	{
2980 	  re_node_set_free (&next_nodes);
2981 	  return err;
2982 	}
2983     }
2984   else
2985     {
2986       cur_state = mctx->state_log[str_idx];
2987       if (cur_state && cur_state->has_backref)
2988 	{
2989 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2990 	  if (BE (err != REG_NOERROR, 0))
2991 	    return err;
2992 	}
2993       else
2994 	re_node_set_init_empty (&next_nodes);
2995     }
2996   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2997     {
2998       if (next_nodes.nelem)
2999 	{
3000 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3001 				    subexp_num, type);
3002 	  if (BE (err != REG_NOERROR, 0))
3003 	    {
3004 	      re_node_set_free (&next_nodes);
3005 	      return err;
3006 	    }
3007 	}
3008       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3009       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3010 	{
3011 	  re_node_set_free (&next_nodes);
3012 	  return err;
3013 	}
3014       mctx->state_log[str_idx] = cur_state;
3015     }
3016 
3017   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3018     {
3019       re_node_set_empty (&next_nodes);
3020       if (mctx->state_log[str_idx + 1])
3021 	{
3022 	  err = re_node_set_merge (&next_nodes,
3023 				   &mctx->state_log[str_idx + 1]->nodes);
3024 	  if (BE (err != REG_NOERROR, 0))
3025 	    {
3026 	      re_node_set_free (&next_nodes);
3027 	      return err;
3028 	    }
3029 	}
3030       if (cur_state)
3031 	{
3032 	  err = check_arrival_add_next_nodes (mctx, str_idx,
3033 					      &cur_state->non_eps_nodes,
3034 					      &next_nodes);
3035 	  if (BE (err != REG_NOERROR, 0))
3036 	    {
3037 	      re_node_set_free (&next_nodes);
3038 	      return err;
3039 	    }
3040 	}
3041       ++str_idx;
3042       if (next_nodes.nelem)
3043 	{
3044 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3045 	  if (BE (err != REG_NOERROR, 0))
3046 	    {
3047 	      re_node_set_free (&next_nodes);
3048 	      return err;
3049 	    }
3050 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3051 				    subexp_num, type);
3052 	  if (BE (err != REG_NOERROR, 0))
3053 	    {
3054 	      re_node_set_free (&next_nodes);
3055 	      return err;
3056 	    }
3057 	}
3058       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3059       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3060       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3061 	{
3062 	  re_node_set_free (&next_nodes);
3063 	  return err;
3064 	}
3065       mctx->state_log[str_idx] = cur_state;
3066       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3067     }
3068   re_node_set_free (&next_nodes);
3069   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3070 	       : &mctx->state_log[last_str]->nodes);
3071   path->next_idx = str_idx;
3072 
3073   /* Fix MCTX.  */
3074   mctx->state_log = backup_state_log;
3075   mctx->input.cur_idx = backup_cur_idx;
3076 
3077   /* Then check the current node set has the node LAST_NODE.  */
3078   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3079     return REG_NOERROR;
3080 
3081   return REG_NOMATCH;
3082 }
3083 
3084 /* Helper functions for check_arrival.  */
3085 
3086 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3087    to NEXT_NODES.
3088    TODO: This function is similar to the functions transit_state*(),
3089 	 however this function has many additional works.
3090 	 Can't we unify them?  */
3091 
3092 static reg_errcode_t
3093 internal_function __attribute_warn_unused_result__
3094 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3095 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3096 {
3097   const re_dfa_t *const dfa = mctx->dfa;
3098   bool ok;
3099   Idx cur_idx;
3100 #ifdef RE_ENABLE_I18N
3101   reg_errcode_t err = REG_NOERROR;
3102 #endif
3103   re_node_set union_set;
3104   re_node_set_init_empty (&union_set);
3105   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3106     {
3107       int naccepted = 0;
3108       Idx cur_node = cur_nodes->elems[cur_idx];
3109 #ifdef DEBUG
3110       re_token_type_t type = dfa->nodes[cur_node].type;
3111       assert (!IS_EPSILON_NODE (type));
3112 #endif
3113 #ifdef RE_ENABLE_I18N
3114       /* If the node may accept "multi byte".  */
3115       if (dfa->nodes[cur_node].accept_mb)
3116 	{
3117 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3118 					       str_idx);
3119 	  if (naccepted > 1)
3120 	    {
3121 	      re_dfastate_t *dest_state;
3122 	      Idx next_node = dfa->nexts[cur_node];
3123 	      Idx next_idx = str_idx + naccepted;
3124 	      dest_state = mctx->state_log[next_idx];
3125 	      re_node_set_empty (&union_set);
3126 	      if (dest_state)
3127 		{
3128 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3129 		  if (BE (err != REG_NOERROR, 0))
3130 		    {
3131 		      re_node_set_free (&union_set);
3132 		      return err;
3133 		    }
3134 		}
3135 	      ok = re_node_set_insert (&union_set, next_node);
3136 	      if (BE (! ok, 0))
3137 		{
3138 		  re_node_set_free (&union_set);
3139 		  return REG_ESPACE;
3140 		}
3141 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3142 							    &union_set);
3143 	      if (BE (mctx->state_log[next_idx] == NULL
3144 		      && err != REG_NOERROR, 0))
3145 		{
3146 		  re_node_set_free (&union_set);
3147 		  return err;
3148 		}
3149 	    }
3150 	}
3151 #endif /* RE_ENABLE_I18N */
3152       if (naccepted
3153 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3154 	{
3155 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3156 	  if (BE (! ok, 0))
3157 	    {
3158 	      re_node_set_free (&union_set);
3159 	      return REG_ESPACE;
3160 	    }
3161 	}
3162     }
3163   re_node_set_free (&union_set);
3164   return REG_NOERROR;
3165 }
3166 
3167 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3168    CUR_NODES, however exclude the nodes which are:
3169     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3170     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3171 */
3172 
3173 static reg_errcode_t
3174 internal_function
3175 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3176 			  Idx ex_subexp, int type)
3177 {
3178   reg_errcode_t err;
3179   Idx idx, outside_node;
3180   re_node_set new_nodes;
3181 #ifdef DEBUG
3182   assert (cur_nodes->nelem);
3183 #endif
3184   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3185   if (BE (err != REG_NOERROR, 0))
3186     return err;
3187   /* Create a new node set NEW_NODES with the nodes which are epsilon
3188      closures of the node in CUR_NODES.  */
3189 
3190   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3191     {
3192       Idx cur_node = cur_nodes->elems[idx];
3193       const re_node_set *eclosure = dfa->eclosures + cur_node;
3194       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3195       if (outside_node == REG_MISSING)
3196 	{
3197 	  /* There are no problematic nodes, just merge them.  */
3198 	  err = re_node_set_merge (&new_nodes, eclosure);
3199 	  if (BE (err != REG_NOERROR, 0))
3200 	    {
3201 	      re_node_set_free (&new_nodes);
3202 	      return err;
3203 	    }
3204 	}
3205       else
3206 	{
3207 	  /* There are problematic nodes, re-calculate incrementally.  */
3208 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3209 					      ex_subexp, type);
3210 	  if (BE (err != REG_NOERROR, 0))
3211 	    {
3212 	      re_node_set_free (&new_nodes);
3213 	      return err;
3214 	    }
3215 	}
3216     }
3217   re_node_set_free (cur_nodes);
3218   *cur_nodes = new_nodes;
3219   return REG_NOERROR;
3220 }
3221 
3222 /* Helper function for check_arrival_expand_ecl.
3223    Check incrementally the epsilon closure of TARGET, and if it isn't
3224    problematic append it to DST_NODES.  */
3225 
3226 static reg_errcode_t
3227 internal_function __attribute_warn_unused_result__
3228 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3229 			      Idx target, Idx ex_subexp, int type)
3230 {
3231   Idx cur_node;
3232   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3233     {
3234       bool ok;
3235 
3236       if (dfa->nodes[cur_node].type == type
3237 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3238 	{
3239 	  if (type == OP_CLOSE_SUBEXP)
3240 	    {
3241 	      ok = re_node_set_insert (dst_nodes, cur_node);
3242 	      if (BE (! ok, 0))
3243 		return REG_ESPACE;
3244 	    }
3245 	  break;
3246 	}
3247       ok = re_node_set_insert (dst_nodes, cur_node);
3248       if (BE (! ok, 0))
3249 	return REG_ESPACE;
3250       if (dfa->edests[cur_node].nelem == 0)
3251 	break;
3252       if (dfa->edests[cur_node].nelem == 2)
3253 	{
3254 	  reg_errcode_t err;
3255 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3256 					      dfa->edests[cur_node].elems[1],
3257 					      ex_subexp, type);
3258 	  if (BE (err != REG_NOERROR, 0))
3259 	    return err;
3260 	}
3261       cur_node = dfa->edests[cur_node].elems[0];
3262     }
3263   return REG_NOERROR;
3264 }
3265 
3266 
3267 /* For all the back references in the current state, calculate the
3268    destination of the back references by the appropriate entry
3269    in MCTX->BKREF_ENTS.  */
3270 
3271 static reg_errcode_t
3272 internal_function __attribute_warn_unused_result__
3273 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3274 		    Idx cur_str, Idx subexp_num, int type)
3275 {
3276   const re_dfa_t *const dfa = mctx->dfa;
3277   reg_errcode_t err;
3278   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3279   struct re_backref_cache_entry *ent;
3280 
3281   if (cache_idx_start == REG_MISSING)
3282     return REG_NOERROR;
3283 
3284  restart:
3285   ent = mctx->bkref_ents + cache_idx_start;
3286   do
3287     {
3288       Idx to_idx, next_node;
3289 
3290       /* Is this entry ENT is appropriate?  */
3291       if (!re_node_set_contains (cur_nodes, ent->node))
3292 	continue; /* No.  */
3293 
3294       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3295       /* Calculate the destination of the back reference, and append it
3296 	 to MCTX->STATE_LOG.  */
3297       if (to_idx == cur_str)
3298 	{
3299 	  /* The backreference did epsilon transit, we must re-check all the
3300 	     node in the current state.  */
3301 	  re_node_set new_dests;
3302 	  reg_errcode_t err2, err3;
3303 	  next_node = dfa->edests[ent->node].elems[0];
3304 	  if (re_node_set_contains (cur_nodes, next_node))
3305 	    continue;
3306 	  err = re_node_set_init_1 (&new_dests, next_node);
3307 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3308 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3309 	  re_node_set_free (&new_dests);
3310 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3311 		  || err3 != REG_NOERROR, 0))
3312 	    {
3313 	      err = (err != REG_NOERROR ? err
3314 		     : (err2 != REG_NOERROR ? err2 : err3));
3315 	      return err;
3316 	    }
3317 	  /* TODO: It is still inefficient...  */
3318 	  goto restart;
3319 	}
3320       else
3321 	{
3322 	  re_node_set union_set;
3323 	  next_node = dfa->nexts[ent->node];
3324 	  if (mctx->state_log[to_idx])
3325 	    {
3326 	      bool ok;
3327 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3328 					next_node))
3329 		continue;
3330 	      err = re_node_set_init_copy (&union_set,
3331 					   &mctx->state_log[to_idx]->nodes);
3332 	      ok = re_node_set_insert (&union_set, next_node);
3333 	      if (BE (err != REG_NOERROR || ! ok, 0))
3334 		{
3335 		  re_node_set_free (&union_set);
3336 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3337 		  return err;
3338 		}
3339 	    }
3340 	  else
3341 	    {
3342 	      err = re_node_set_init_1 (&union_set, next_node);
3343 	      if (BE (err != REG_NOERROR, 0))
3344 		return err;
3345 	    }
3346 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3347 	  re_node_set_free (&union_set);
3348 	  if (BE (mctx->state_log[to_idx] == NULL
3349 		  && err != REG_NOERROR, 0))
3350 	    return err;
3351 	}
3352     }
3353   while (ent++->more);
3354   return REG_NOERROR;
3355 }
3356 
3357 /* Build transition table for the state.
3358    Return true if successful.  */
3359 
3360 static bool
3361 internal_function
3362 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3363 {
3364   reg_errcode_t err;
3365   Idx i, j;
3366   int ch;
3367   bool need_word_trtable = false;
3368   bitset_word_t elem, mask;
3369   bool dests_node_malloced = false;
3370   bool dest_states_malloced = false;
3371   Idx ndests; /* Number of the destination states from 'state'.  */
3372   re_dfastate_t **trtable;
3373   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3374   re_node_set follows, *dests_node;
3375   bitset_t *dests_ch;
3376   bitset_t acceptable;
3377 
3378   struct dests_alloc
3379   {
3380     re_node_set dests_node[SBC_MAX];
3381     bitset_t dests_ch[SBC_MAX];
3382   } *dests_alloc;
3383 
3384   /* We build DFA states which corresponds to the destination nodes
3385      from 'state'.  'dests_node[i]' represents the nodes which i-th
3386      destination state contains, and 'dests_ch[i]' represents the
3387      characters which i-th destination state accepts.  */
3388   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3389     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3390   else
3391     {
3392       dests_alloc = re_malloc (struct dests_alloc, 1);
3393       if (BE (dests_alloc == NULL, 0))
3394 	return false;
3395       dests_node_malloced = true;
3396     }
3397   dests_node = dests_alloc->dests_node;
3398   dests_ch = dests_alloc->dests_ch;
3399 
3400   /* Initialize transition table.  */
3401   state->word_trtable = state->trtable = NULL;
3402 
3403   /* At first, group all nodes belonging to 'state' into several
3404      destinations.  */
3405   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3406   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3407     {
3408       if (dests_node_malloced)
3409 	free (dests_alloc);
3410       /* Return false in case of an error, true otherwise.  */
3411       if (ndests == 0)
3412 	{
3413 	  state->trtable = (re_dfastate_t **)
3414 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3415           if (BE (state->trtable == NULL, 0))
3416             return false;
3417 	  return true;
3418 	}
3419       return false;
3420     }
3421 
3422   err = re_node_set_alloc (&follows, ndests + 1);
3423   if (BE (err != REG_NOERROR, 0))
3424     goto out_free;
3425 
3426   /* Avoid arithmetic overflow in size calculation.  */
3427   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3428 	    / (3 * sizeof (re_dfastate_t *)))
3429 	   < ndests),
3430 	  0))
3431     goto out_free;
3432 
3433   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3434 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3435     dest_states = (re_dfastate_t **)
3436       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3437   else
3438     {
3439       dest_states = (re_dfastate_t **)
3440 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3441       if (BE (dest_states == NULL, 0))
3442 	{
3443 out_free:
3444 	  if (dest_states_malloced)
3445 	    free (dest_states);
3446 	  re_node_set_free (&follows);
3447 	  for (i = 0; i < ndests; ++i)
3448 	    re_node_set_free (dests_node + i);
3449 	  if (dests_node_malloced)
3450 	    free (dests_alloc);
3451 	  return false;
3452 	}
3453       dest_states_malloced = true;
3454     }
3455   dest_states_word = dest_states + ndests;
3456   dest_states_nl = dest_states_word + ndests;
3457   bitset_empty (acceptable);
3458 
3459   /* Then build the states for all destinations.  */
3460   for (i = 0; i < ndests; ++i)
3461     {
3462       Idx next_node;
3463       re_node_set_empty (&follows);
3464       /* Merge the follows of this destination states.  */
3465       for (j = 0; j < dests_node[i].nelem; ++j)
3466 	{
3467 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3468 	  if (next_node != REG_MISSING)
3469 	    {
3470 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3471 	      if (BE (err != REG_NOERROR, 0))
3472 		goto out_free;
3473 	    }
3474 	}
3475       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3476       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3477 	goto out_free;
3478       /* If the new state has context constraint,
3479 	 build appropriate states for these contexts.  */
3480       if (dest_states[i]->has_constraint)
3481 	{
3482 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3483 							  CONTEXT_WORD);
3484 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3485 	    goto out_free;
3486 
3487 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3488 	    need_word_trtable = true;
3489 
3490 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3491 							CONTEXT_NEWLINE);
3492 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3493 	    goto out_free;
3494 	}
3495       else
3496 	{
3497 	  dest_states_word[i] = dest_states[i];
3498 	  dest_states_nl[i] = dest_states[i];
3499 	}
3500       bitset_merge (acceptable, dests_ch[i]);
3501     }
3502 
3503   if (!BE (need_word_trtable, 0))
3504     {
3505       /* We don't care about whether the following character is a word
3506 	 character, or we are in a single-byte character set so we can
3507 	 discern by looking at the character code: allocate a
3508 	 256-entry transition table.  */
3509       trtable = state->trtable =
3510 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3511       if (BE (trtable == NULL, 0))
3512 	goto out_free;
3513 
3514       /* For all characters ch...:  */
3515       for (i = 0; i < BITSET_WORDS; ++i)
3516 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3517 	     elem;
3518 	     mask <<= 1, elem >>= 1, ++ch)
3519 	  if (BE (elem & 1, 0))
3520 	    {
3521 	      /* There must be exactly one destination which accepts
3522 		 character ch.  See group_nodes_into_DFAstates.  */
3523 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3524 		;
3525 
3526 	      /* j-th destination accepts the word character ch.  */
3527 	      if (dfa->word_char[i] & mask)
3528 		trtable[ch] = dest_states_word[j];
3529 	      else
3530 		trtable[ch] = dest_states[j];
3531 	    }
3532     }
3533   else
3534     {
3535       /* We care about whether the following character is a word
3536 	 character, and we are in a multi-byte character set: discern
3537 	 by looking at the character code: build two 256-entry
3538 	 transition tables, one starting at trtable[0] and one
3539 	 starting at trtable[SBC_MAX].  */
3540       trtable = state->word_trtable =
3541 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3542       if (BE (trtable == NULL, 0))
3543 	goto out_free;
3544 
3545       /* For all characters ch...:  */
3546       for (i = 0; i < BITSET_WORDS; ++i)
3547 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3548 	     elem;
3549 	     mask <<= 1, elem >>= 1, ++ch)
3550 	  if (BE (elem & 1, 0))
3551 	    {
3552 	      /* There must be exactly one destination which accepts
3553 		 character ch.  See group_nodes_into_DFAstates.  */
3554 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3555 		;
3556 
3557 	      /* j-th destination accepts the word character ch.  */
3558 	      trtable[ch] = dest_states[j];
3559 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3560 	    }
3561     }
3562 
3563   /* new line */
3564   if (bitset_contain (acceptable, NEWLINE_CHAR))
3565     {
3566       /* The current state accepts newline character.  */
3567       for (j = 0; j < ndests; ++j)
3568 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3569 	  {
3570 	    /* k-th destination accepts newline character.  */
3571 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3572 	    if (need_word_trtable)
3573 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3574 	    /* There must be only one destination which accepts
3575 	       newline.  See group_nodes_into_DFAstates.  */
3576 	    break;
3577 	  }
3578     }
3579 
3580   if (dest_states_malloced)
3581     free (dest_states);
3582 
3583   re_node_set_free (&follows);
3584   for (i = 0; i < ndests; ++i)
3585     re_node_set_free (dests_node + i);
3586 
3587   if (dests_node_malloced)
3588     free (dests_alloc);
3589 
3590   return true;
3591 }
3592 
3593 /* Group all nodes belonging to STATE into several destinations.
3594    Then for all destinations, set the nodes belonging to the destination
3595    to DESTS_NODE[i] and set the characters accepted by the destination
3596    to DEST_CH[i].  This function return the number of destinations.  */
3597 
3598 static Idx
3599 internal_function
3600 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3601 			    re_node_set *dests_node, bitset_t *dests_ch)
3602 {
3603   reg_errcode_t err;
3604   bool ok;
3605   Idx i, j, k;
3606   Idx ndests; /* Number of the destinations from 'state'.  */
3607   bitset_t accepts; /* Characters a node can accept.  */
3608   const re_node_set *cur_nodes = &state->nodes;
3609   bitset_empty (accepts);
3610   ndests = 0;
3611 
3612   /* For all the nodes belonging to 'state',  */
3613   for (i = 0; i < cur_nodes->nelem; ++i)
3614     {
3615       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3616       re_token_type_t type = node->type;
3617       unsigned int constraint = node->constraint;
3618 
3619       /* Enumerate all single byte character this node can accept.  */
3620       if (type == CHARACTER)
3621 	bitset_set (accepts, node->opr.c);
3622       else if (type == SIMPLE_BRACKET)
3623 	{
3624 	  bitset_merge (accepts, node->opr.sbcset);
3625 	}
3626       else if (type == OP_PERIOD)
3627 	{
3628 #ifdef RE_ENABLE_I18N
3629 	  if (dfa->mb_cur_max > 1)
3630 	    bitset_merge (accepts, dfa->sb_char);
3631 	  else
3632 #endif
3633 	    bitset_set_all (accepts);
3634 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3635 	    bitset_clear (accepts, '\n');
3636 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3637 	    bitset_clear (accepts, '\0');
3638 	}
3639 #ifdef RE_ENABLE_I18N
3640       else if (type == OP_UTF8_PERIOD)
3641 	{
3642 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3643 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3644 	  else
3645 	    bitset_merge (accepts, utf8_sb_map);
3646 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3647 	    bitset_clear (accepts, '\n');
3648 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3649 	    bitset_clear (accepts, '\0');
3650 	}
3651 #endif
3652       else
3653 	continue;
3654 
3655       /* Check the 'accepts' and sift the characters which are not
3656 	 match it the context.  */
3657       if (constraint)
3658 	{
3659 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3660 	    {
3661 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3662 	      bitset_empty (accepts);
3663 	      if (accepts_newline)
3664 		bitset_set (accepts, NEWLINE_CHAR);
3665 	      else
3666 		continue;
3667 	    }
3668 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3669 	    {
3670 	      bitset_empty (accepts);
3671 	      continue;
3672 	    }
3673 
3674 	  if (constraint & NEXT_WORD_CONSTRAINT)
3675 	    {
3676 	      bitset_word_t any_set = 0;
3677 	      if (type == CHARACTER && !node->word_char)
3678 		{
3679 		  bitset_empty (accepts);
3680 		  continue;
3681 		}
3682 #ifdef RE_ENABLE_I18N
3683 	      if (dfa->mb_cur_max > 1)
3684 		for (j = 0; j < BITSET_WORDS; ++j)
3685 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3686 	      else
3687 #endif
3688 		for (j = 0; j < BITSET_WORDS; ++j)
3689 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3690 	      if (!any_set)
3691 		continue;
3692 	    }
3693 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3694 	    {
3695 	      bitset_word_t any_set = 0;
3696 	      if (type == CHARACTER && node->word_char)
3697 		{
3698 		  bitset_empty (accepts);
3699 		  continue;
3700 		}
3701 #ifdef RE_ENABLE_I18N
3702 	      if (dfa->mb_cur_max > 1)
3703 		for (j = 0; j < BITSET_WORDS; ++j)
3704 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3705 	      else
3706 #endif
3707 		for (j = 0; j < BITSET_WORDS; ++j)
3708 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3709 	      if (!any_set)
3710 		continue;
3711 	    }
3712 	}
3713 
3714       /* Then divide 'accepts' into DFA states, or create a new
3715 	 state.  Above, we make sure that accepts is not empty.  */
3716       for (j = 0; j < ndests; ++j)
3717 	{
3718 	  bitset_t intersec; /* Intersection sets, see below.  */
3719 	  bitset_t remains;
3720 	  /* Flags, see below.  */
3721 	  bitset_word_t has_intersec, not_subset, not_consumed;
3722 
3723 	  /* Optimization, skip if this state doesn't accept the character.  */
3724 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3725 	    continue;
3726 
3727 	  /* Enumerate the intersection set of this state and 'accepts'.  */
3728 	  has_intersec = 0;
3729 	  for (k = 0; k < BITSET_WORDS; ++k)
3730 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3731 	  /* And skip if the intersection set is empty.  */
3732 	  if (!has_intersec)
3733 	    continue;
3734 
3735 	  /* Then check if this state is a subset of 'accepts'.  */
3736 	  not_subset = not_consumed = 0;
3737 	  for (k = 0; k < BITSET_WORDS; ++k)
3738 	    {
3739 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3740 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3741 	    }
3742 
3743 	  /* If this state isn't a subset of 'accepts', create a
3744 	     new group state, which has the 'remains'. */
3745 	  if (not_subset)
3746 	    {
3747 	      bitset_copy (dests_ch[ndests], remains);
3748 	      bitset_copy (dests_ch[j], intersec);
3749 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3750 	      if (BE (err != REG_NOERROR, 0))
3751 		goto error_return;
3752 	      ++ndests;
3753 	    }
3754 
3755 	  /* Put the position in the current group. */
3756 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3757 	  if (BE (! ok, 0))
3758 	    goto error_return;
3759 
3760 	  /* If all characters are consumed, go to next node. */
3761 	  if (!not_consumed)
3762 	    break;
3763 	}
3764       /* Some characters remain, create a new group. */
3765       if (j == ndests)
3766 	{
3767 	  bitset_copy (dests_ch[ndests], accepts);
3768 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3769 	  if (BE (err != REG_NOERROR, 0))
3770 	    goto error_return;
3771 	  ++ndests;
3772 	  bitset_empty (accepts);
3773 	}
3774     }
3775   return ndests;
3776  error_return:
3777   for (j = 0; j < ndests; ++j)
3778     re_node_set_free (dests_node + j);
3779   return REG_MISSING;
3780 }
3781 
3782 #ifdef RE_ENABLE_I18N
3783 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3784    Return the number of the bytes the node accepts.
3785    STR_IDX is the current index of the input string.
3786 
3787    This function handles the nodes which can accept one character, or
3788    one collating element like '.', '[a-z]', opposite to the other nodes
3789    can only accept one byte.  */
3790 
3791 static int
3792 internal_function
3793 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3794 			 const re_string_t *input, Idx str_idx)
3795 {
3796   const re_token_t *node = dfa->nodes + node_idx;
3797   int char_len, elem_len;
3798   Idx i;
3799 
3800   if (BE (node->type == OP_UTF8_PERIOD, 0))
3801     {
3802       unsigned char c = re_string_byte_at (input, str_idx), d;
3803       if (BE (c < 0xc2, 1))
3804 	return 0;
3805 
3806       if (str_idx + 2 > input->len)
3807 	return 0;
3808 
3809       d = re_string_byte_at (input, str_idx + 1);
3810       if (c < 0xe0)
3811 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3812       else if (c < 0xf0)
3813 	{
3814 	  char_len = 3;
3815 	  if (c == 0xe0 && d < 0xa0)
3816 	    return 0;
3817 	}
3818       else if (c < 0xf8)
3819 	{
3820 	  char_len = 4;
3821 	  if (c == 0xf0 && d < 0x90)
3822 	    return 0;
3823 	}
3824       else if (c < 0xfc)
3825 	{
3826 	  char_len = 5;
3827 	  if (c == 0xf8 && d < 0x88)
3828 	    return 0;
3829 	}
3830       else if (c < 0xfe)
3831 	{
3832 	  char_len = 6;
3833 	  if (c == 0xfc && d < 0x84)
3834 	    return 0;
3835 	}
3836       else
3837 	return 0;
3838 
3839       if (str_idx + char_len > input->len)
3840 	return 0;
3841 
3842       for (i = 1; i < char_len; ++i)
3843 	{
3844 	  d = re_string_byte_at (input, str_idx + i);
3845 	  if (d < 0x80 || d > 0xbf)
3846 	    return 0;
3847 	}
3848       return char_len;
3849     }
3850 
3851   char_len = re_string_char_size_at (input, str_idx);
3852   if (node->type == OP_PERIOD)
3853     {
3854       if (char_len <= 1)
3855 	return 0;
3856       /* FIXME: I don't think this if is needed, as both '\n'
3857 	 and '\0' are char_len == 1.  */
3858       /* '.' accepts any one character except the following two cases.  */
3859       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3860 	   re_string_byte_at (input, str_idx) == '\n') ||
3861 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3862 	   re_string_byte_at (input, str_idx) == '\0'))
3863 	return 0;
3864       return char_len;
3865     }
3866 
3867   elem_len = re_string_elem_size_at (input, str_idx);
3868   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3869     return 0;
3870 
3871   if (node->type == COMPLEX_BRACKET)
3872     {
3873       const re_charset_t *cset = node->opr.mbcset;
3874 # ifdef _LIBC
3875       const unsigned char *pin
3876 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3877       Idx j;
3878       uint32_t nrules;
3879 # endif /* _LIBC */
3880       int match_len = 0;
3881       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3882 		    ? re_string_wchar_at (input, str_idx) : 0);
3883 
3884       /* match with multibyte character?  */
3885       for (i = 0; i < cset->nmbchars; ++i)
3886 	if (wc == cset->mbchars[i])
3887 	  {
3888 	    match_len = char_len;
3889 	    goto check_node_accept_bytes_match;
3890 	  }
3891       /* match with character_class?  */
3892       for (i = 0; i < cset->nchar_classes; ++i)
3893 	{
3894 	  wctype_t wt = cset->char_classes[i];
3895 	  if (__iswctype (wc, wt))
3896 	    {
3897 	      match_len = char_len;
3898 	      goto check_node_accept_bytes_match;
3899 	    }
3900 	}
3901 
3902 # ifdef _LIBC
3903       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3904       if (nrules != 0)
3905 	{
3906 	  unsigned int in_collseq = 0;
3907 	  const int32_t *table, *indirect;
3908 	  const unsigned char *weights, *extra;
3909 	  const char *collseqwc;
3910 	  /* This #include defines a local function!  */
3911 #  include <locale/weight.h>
3912 
3913 	  /* match with collating_symbol?  */
3914 	  if (cset->ncoll_syms)
3915 	    extra = (const unsigned char *)
3916 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3917 	  for (i = 0; i < cset->ncoll_syms; ++i)
3918 	    {
3919 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3920 	      /* Compare the length of input collating element and
3921 		 the length of current collating element.  */
3922 	      if (*coll_sym != elem_len)
3923 		continue;
3924 	      /* Compare each bytes.  */
3925 	      for (j = 0; j < *coll_sym; j++)
3926 		if (pin[j] != coll_sym[1 + j])
3927 		  break;
3928 	      if (j == *coll_sym)
3929 		{
3930 		  /* Match if every bytes is equal.  */
3931 		  match_len = j;
3932 		  goto check_node_accept_bytes_match;
3933 		}
3934 	    }
3935 
3936 	  if (cset->nranges)
3937 	    {
3938 	      if (elem_len <= char_len)
3939 		{
3940 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3941 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3942 		}
3943 	      else
3944 		in_collseq = find_collation_sequence_value (pin, elem_len);
3945 	    }
3946 	  /* match with range expression?  */
3947 	  /* FIXME: Implement rational ranges here, too.  */
3948 	  for (i = 0; i < cset->nranges; ++i)
3949 	    if (cset->range_starts[i] <= in_collseq
3950 		&& in_collseq <= cset->range_ends[i])
3951 	      {
3952 		match_len = elem_len;
3953 		goto check_node_accept_bytes_match;
3954 	      }
3955 
3956 	  /* match with equivalence_class?  */
3957 	  if (cset->nequiv_classes)
3958 	    {
3959 	      const unsigned char *cp = pin;
3960 	      table = (const int32_t *)
3961 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3962 	      weights = (const unsigned char *)
3963 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3964 	      extra = (const unsigned char *)
3965 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3966 	      indirect = (const int32_t *)
3967 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3968 	      int32_t idx = findidx (&cp, elem_len);
3969 	      if (idx > 0)
3970 		for (i = 0; i < cset->nequiv_classes; ++i)
3971 		  {
3972 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3973 		    size_t weight_len = weights[idx & 0xffffff];
3974 		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3975 			&& (idx >> 24) == (equiv_class_idx >> 24))
3976 		      {
3977 			Idx cnt = 0;
3978 
3979 			idx &= 0xffffff;
3980 			equiv_class_idx &= 0xffffff;
3981 
3982 			while (cnt <= weight_len
3983 			       && (weights[equiv_class_idx + 1 + cnt]
3984 				   == weights[idx + 1 + cnt]))
3985 			  ++cnt;
3986 			if (cnt > weight_len)
3987 			  {
3988 			    match_len = elem_len;
3989 			    goto check_node_accept_bytes_match;
3990 			  }
3991 		      }
3992 		  }
3993 	    }
3994 	}
3995       else
3996 # endif /* _LIBC */
3997 	{
3998 	  /* match with range expression?  */
3999 	  for (i = 0; i < cset->nranges; ++i)
4000 	    {
4001 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
4002 		{
4003 		  match_len = char_len;
4004 		  goto check_node_accept_bytes_match;
4005 		}
4006 	    }
4007 	}
4008     check_node_accept_bytes_match:
4009       if (!cset->non_match)
4010 	return match_len;
4011       else
4012 	{
4013 	  if (match_len > 0)
4014 	    return 0;
4015 	  else
4016 	    return (elem_len > char_len) ? elem_len : char_len;
4017 	}
4018     }
4019   return 0;
4020 }
4021 
4022 # ifdef _LIBC
4023 static unsigned int
4024 internal_function
4025 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4026 {
4027   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4028   if (nrules == 0)
4029     {
4030       if (mbs_len == 1)
4031 	{
4032 	  /* No valid character.  Match it as a single byte character.  */
4033 	  const unsigned char *collseq = (const unsigned char *)
4034 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4035 	  return collseq[mbs[0]];
4036 	}
4037       return UINT_MAX;
4038     }
4039   else
4040     {
4041       int32_t idx;
4042       const unsigned char *extra = (const unsigned char *)
4043 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4044       int32_t extrasize = (const unsigned char *)
4045 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4046 
4047       for (idx = 0; idx < extrasize;)
4048 	{
4049 	  int mbs_cnt;
4050 	  bool found = false;
4051 	  int32_t elem_mbs_len;
4052 	  /* Skip the name of collating element name.  */
4053 	  idx = idx + extra[idx] + 1;
4054 	  elem_mbs_len = extra[idx++];
4055 	  if (mbs_len == elem_mbs_len)
4056 	    {
4057 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4058 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4059 		  break;
4060 	      if (mbs_cnt == elem_mbs_len)
4061 		/* Found the entry.  */
4062 		found = true;
4063 	    }
4064 	  /* Skip the byte sequence of the collating element.  */
4065 	  idx += elem_mbs_len;
4066 	  /* Adjust for the alignment.  */
4067 	  idx = (idx + 3) & ~3;
4068 	  /* Skip the collation sequence value.  */
4069 	  idx += sizeof (uint32_t);
4070 	  /* Skip the wide char sequence of the collating element.  */
4071 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4072 	  /* If we found the entry, return the sequence value.  */
4073 	  if (found)
4074 	    return *(uint32_t *) (extra + idx);
4075 	  /* Skip the collation sequence value.  */
4076 	  idx += sizeof (uint32_t);
4077 	}
4078       return UINT_MAX;
4079     }
4080 }
4081 # endif /* _LIBC */
4082 #endif /* RE_ENABLE_I18N */
4083 
4084 /* Check whether the node accepts the byte which is IDX-th
4085    byte of the INPUT.  */
4086 
4087 static bool
4088 internal_function
4089 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4090 		   Idx idx)
4091 {
4092   unsigned char ch;
4093   ch = re_string_byte_at (&mctx->input, idx);
4094   switch (node->type)
4095     {
4096     case CHARACTER:
4097       if (node->opr.c != ch)
4098         return false;
4099       break;
4100 
4101     case SIMPLE_BRACKET:
4102       if (!bitset_contain (node->opr.sbcset, ch))
4103         return false;
4104       break;
4105 
4106 #ifdef RE_ENABLE_I18N
4107     case OP_UTF8_PERIOD:
4108       if (ch >= ASCII_CHARS)
4109         return false;
4110       /* FALLTHROUGH */
4111 #endif
4112     case OP_PERIOD:
4113       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4114 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4115 	return false;
4116       break;
4117 
4118     default:
4119       return false;
4120     }
4121 
4122   if (node->constraint)
4123     {
4124       /* The node has constraints.  Check whether the current context
4125 	 satisfies the constraints.  */
4126       unsigned int context = re_string_context_at (&mctx->input, idx,
4127 						   mctx->eflags);
4128       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4129 	return false;
4130     }
4131 
4132   return true;
4133 }
4134 
4135 /* Extend the buffers, if the buffers have run out.  */
4136 
4137 static reg_errcode_t
4138 internal_function __attribute_warn_unused_result__
4139 extend_buffers (re_match_context_t *mctx, int min_len)
4140 {
4141   reg_errcode_t ret;
4142   re_string_t *pstr = &mctx->input;
4143 
4144   /* Avoid overflow.  */
4145   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4146           <= pstr->bufs_len, 0))
4147     return REG_ESPACE;
4148 
4149   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4150   ret = re_string_realloc_buffers (pstr,
4151 				   MAX (min_len,
4152 					MIN (pstr->len, pstr->bufs_len * 2)));
4153   if (BE (ret != REG_NOERROR, 0))
4154     return ret;
4155 
4156   if (mctx->state_log != NULL)
4157     {
4158       /* And double the length of state_log.  */
4159       /* XXX We have no indication of the size of this buffer.  If this
4160 	 allocation fail we have no indication that the state_log array
4161 	 does not have the right size.  */
4162       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4163 					      pstr->bufs_len + 1);
4164       if (BE (new_array == NULL, 0))
4165 	return REG_ESPACE;
4166       mctx->state_log = new_array;
4167     }
4168 
4169   /* Then reconstruct the buffers.  */
4170   if (pstr->icase)
4171     {
4172 #ifdef RE_ENABLE_I18N
4173       if (pstr->mb_cur_max > 1)
4174 	{
4175 	  ret = build_wcs_upper_buffer (pstr);
4176 	  if (BE (ret != REG_NOERROR, 0))
4177 	    return ret;
4178 	}
4179       else
4180 #endif /* RE_ENABLE_I18N  */
4181 	build_upper_buffer (pstr);
4182     }
4183   else
4184     {
4185 #ifdef RE_ENABLE_I18N
4186       if (pstr->mb_cur_max > 1)
4187 	build_wcs_buffer (pstr);
4188       else
4189 #endif /* RE_ENABLE_I18N  */
4190 	{
4191 	  if (pstr->trans != NULL)
4192 	    re_string_translate_buffer (pstr);
4193 	}
4194     }
4195   return REG_NOERROR;
4196 }
4197 
4198 
4199 /* Functions for matching context.  */
4200 
4201 /* Initialize MCTX.  */
4202 
4203 static reg_errcode_t
4204 internal_function __attribute_warn_unused_result__
4205 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4206 {
4207   mctx->eflags = eflags;
4208   mctx->match_last = REG_MISSING;
4209   if (n > 0)
4210     {
4211       /* Avoid overflow.  */
4212       size_t max_object_size =
4213 	MAX (sizeof (struct re_backref_cache_entry),
4214 	     sizeof (re_sub_match_top_t *));
4215       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4216 	return REG_ESPACE;
4217 
4218       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4219       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4220       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4221 	return REG_ESPACE;
4222     }
4223   /* Already zero-ed by the caller.
4224      else
4225        mctx->bkref_ents = NULL;
4226      mctx->nbkref_ents = 0;
4227      mctx->nsub_tops = 0;  */
4228   mctx->abkref_ents = n;
4229   mctx->max_mb_elem_len = 1;
4230   mctx->asub_tops = n;
4231   return REG_NOERROR;
4232 }
4233 
4234 /* Clean the entries which depend on the current input in MCTX.
4235    This function must be invoked when the matcher changes the start index
4236    of the input, or changes the input string.  */
4237 
4238 static void
4239 internal_function
4240 match_ctx_clean (re_match_context_t *mctx)
4241 {
4242   Idx st_idx;
4243   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4244     {
4245       Idx sl_idx;
4246       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4247       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4248 	{
4249 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4250 	  re_free (last->path.array);
4251 	  re_free (last);
4252 	}
4253       re_free (top->lasts);
4254       if (top->path)
4255 	{
4256 	  re_free (top->path->array);
4257 	  re_free (top->path);
4258 	}
4259       free (top);
4260     }
4261 
4262   mctx->nsub_tops = 0;
4263   mctx->nbkref_ents = 0;
4264 }
4265 
4266 /* Free all the memory associated with MCTX.  */
4267 
4268 static void
4269 internal_function
4270 match_ctx_free (re_match_context_t *mctx)
4271 {
4272   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4273   match_ctx_clean (mctx);
4274   re_free (mctx->sub_tops);
4275   re_free (mctx->bkref_ents);
4276 }
4277 
4278 /* Add a new backreference entry to MCTX.
4279    Note that we assume that caller never call this function with duplicate
4280    entry, and call with STR_IDX which isn't smaller than any existing entry.
4281 */
4282 
4283 static reg_errcode_t
4284 internal_function __attribute_warn_unused_result__
4285 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4286 		     Idx to)
4287 {
4288   if (mctx->nbkref_ents >= mctx->abkref_ents)
4289     {
4290       struct re_backref_cache_entry* new_entry;
4291       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4292 			      mctx->abkref_ents * 2);
4293       if (BE (new_entry == NULL, 0))
4294 	{
4295 	  re_free (mctx->bkref_ents);
4296 	  return REG_ESPACE;
4297 	}
4298       mctx->bkref_ents = new_entry;
4299       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4300 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4301       mctx->abkref_ents *= 2;
4302     }
4303   if (mctx->nbkref_ents > 0
4304       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4305     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4306 
4307   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4308   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4309   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4310   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4311 
4312   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4313      If bit N is clear, means that this entry won't epsilon-transition to
4314      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4315      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4316      such node.
4317 
4318      A backreference does not epsilon-transition unless it is empty, so set
4319      to all zeros if FROM != TO.  */
4320   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4321     = (from == to ? -1 : 0);
4322 
4323   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4324   if (mctx->max_mb_elem_len < to - from)
4325     mctx->max_mb_elem_len = to - from;
4326   return REG_NOERROR;
4327 }
4328 
4329 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4330    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4331 
4332 static Idx
4333 internal_function
4334 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4335 {
4336   Idx left, right, mid, last;
4337   last = right = mctx->nbkref_ents;
4338   for (left = 0; left < right;)
4339     {
4340       mid = (left + right) / 2;
4341       if (mctx->bkref_ents[mid].str_idx < str_idx)
4342 	left = mid + 1;
4343       else
4344 	right = mid;
4345     }
4346   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4347     return left;
4348   else
4349     return REG_MISSING;
4350 }
4351 
4352 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4353    at STR_IDX.  */
4354 
4355 static reg_errcode_t
4356 internal_function __attribute_warn_unused_result__
4357 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4358 {
4359 #ifdef DEBUG
4360   assert (mctx->sub_tops != NULL);
4361   assert (mctx->asub_tops > 0);
4362 #endif
4363   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4364     {
4365       Idx new_asub_tops = mctx->asub_tops * 2;
4366       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4367 						   re_sub_match_top_t *,
4368 						   new_asub_tops);
4369       if (BE (new_array == NULL, 0))
4370 	return REG_ESPACE;
4371       mctx->sub_tops = new_array;
4372       mctx->asub_tops = new_asub_tops;
4373     }
4374   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4375   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4376     return REG_ESPACE;
4377   mctx->sub_tops[mctx->nsub_tops]->node = node;
4378   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4379   return REG_NOERROR;
4380 }
4381 
4382 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4383    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4384 
4385 static re_sub_match_last_t *
4386 internal_function
4387 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4388 {
4389   re_sub_match_last_t *new_entry;
4390   if (BE (subtop->nlasts == subtop->alasts, 0))
4391     {
4392       Idx new_alasts = 2 * subtop->alasts + 1;
4393       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4394 						    re_sub_match_last_t *,
4395 						    new_alasts);
4396       if (BE (new_array == NULL, 0))
4397 	return NULL;
4398       subtop->lasts = new_array;
4399       subtop->alasts = new_alasts;
4400     }
4401   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4402   if (BE (new_entry != NULL, 1))
4403     {
4404       subtop->lasts[subtop->nlasts] = new_entry;
4405       new_entry->node = node;
4406       new_entry->str_idx = str_idx;
4407       ++subtop->nlasts;
4408     }
4409   return new_entry;
4410 }
4411 
4412 static void
4413 internal_function
4414 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4415 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4416 {
4417   sctx->sifted_states = sifted_sts;
4418   sctx->limited_states = limited_sts;
4419   sctx->last_node = last_node;
4420   sctx->last_str_idx = last_str_idx;
4421   re_node_set_init_empty (&sctx->limits);
4422 }
4423