xref: /dragonfly/contrib/diffutils/lib/regexec.c (revision 6ea1f93e)
144b87433SJohn Marino /* Extended regular expression matching and search library.
2*6ea1f93eSDaniel Fojt    Copyright (C) 2002-2018 Free Software Foundation, Inc.
344b87433SJohn Marino    This file is part of the GNU C Library.
444b87433SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
544b87433SJohn Marino 
64536c563SJohn Marino    The GNU C Library is free software; you can redistribute it and/or
74536c563SJohn Marino    modify it under the terms of the GNU General Public
84536c563SJohn Marino    License as published by the Free Software Foundation; either
94536c563SJohn Marino    version 3 of the License, or (at your option) any later version.
1044b87433SJohn Marino 
114536c563SJohn Marino    The GNU C Library is distributed in the hope that it will be useful,
1244b87433SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
134536c563SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
144536c563SJohn Marino    General Public License for more details.
1544b87433SJohn Marino 
164536c563SJohn Marino    You should have received a copy of the GNU General Public
174536c563SJohn Marino    License along with the GNU C Library; if not, see
18*6ea1f93eSDaniel Fojt    <https://www.gnu.org/licenses/>.  */
1944b87433SJohn Marino 
2044b87433SJohn Marino static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21*6ea1f93eSDaniel Fojt 				     Idx n);
22*6ea1f93eSDaniel Fojt static void match_ctx_clean (re_match_context_t *mctx);
23*6ea1f93eSDaniel Fojt static void match_ctx_free (re_match_context_t *cache);
2444b87433SJohn Marino static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25*6ea1f93eSDaniel Fojt 					  Idx str_idx, Idx from, Idx to);
26*6ea1f93eSDaniel Fojt static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
2744b87433SJohn Marino static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28*6ea1f93eSDaniel Fojt 					   Idx str_idx);
2944b87433SJohn Marino static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30*6ea1f93eSDaniel Fojt 						    Idx node, Idx str_idx);
3144b87433SJohn Marino static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
3244b87433SJohn Marino 			   re_dfastate_t **limited_sts, Idx last_node,
33*6ea1f93eSDaniel Fojt 			   Idx last_str_idx);
3444b87433SJohn Marino static reg_errcode_t re_search_internal (const regex_t *preg,
3544b87433SJohn Marino 					 const char *string, Idx length,
3644b87433SJohn Marino 					 Idx start, Idx last_start, Idx stop,
3744b87433SJohn Marino 					 size_t nmatch, regmatch_t pmatch[],
38*6ea1f93eSDaniel Fojt 					 int eflags);
3944b87433SJohn Marino static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
4044b87433SJohn Marino 				  const char *string1, Idx length1,
4144b87433SJohn Marino 				  const char *string2, Idx length2,
4244b87433SJohn Marino 				  Idx start, regoff_t range,
4344b87433SJohn Marino 				  struct re_registers *regs,
44*6ea1f93eSDaniel Fojt 				  Idx stop, bool ret_len);
4544b87433SJohn Marino static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
4644b87433SJohn Marino 				const char *string, Idx length, Idx start,
4744b87433SJohn Marino 				regoff_t range, Idx stop,
4844b87433SJohn Marino 				struct re_registers *regs,
49*6ea1f93eSDaniel Fojt 				bool ret_len);
504536c563SJohn Marino static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51*6ea1f93eSDaniel Fojt                               Idx nregs, int regs_allocated);
52*6ea1f93eSDaniel Fojt static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
5344b87433SJohn Marino static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54*6ea1f93eSDaniel Fojt 			   Idx *p_match_first);
5544b87433SJohn Marino static Idx check_halt_state_context (const re_match_context_t *mctx,
56*6ea1f93eSDaniel Fojt 				     const re_dfastate_t *state, Idx idx);
5744b87433SJohn Marino static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
5844b87433SJohn Marino 			 regmatch_t *prev_idx_match, Idx cur_node,
59*6ea1f93eSDaniel Fojt 			 Idx cur_idx, Idx nmatch);
6044b87433SJohn Marino static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
6144b87433SJohn Marino 				      Idx str_idx, Idx dest_node, Idx nregs,
6244b87433SJohn Marino 				      regmatch_t *regs,
63*6ea1f93eSDaniel Fojt 				      re_node_set *eps_via_nodes);
6444b87433SJohn Marino static reg_errcode_t set_regs (const regex_t *preg,
6544b87433SJohn Marino 			       const re_match_context_t *mctx,
6644b87433SJohn Marino 			       size_t nmatch, regmatch_t *pmatch,
67*6ea1f93eSDaniel Fojt 			       bool fl_backtrack);
68*6ea1f93eSDaniel Fojt static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
6944b87433SJohn Marino 
7044b87433SJohn Marino #ifdef RE_ENABLE_I18N
7144b87433SJohn Marino static int sift_states_iter_mb (const re_match_context_t *mctx,
7244b87433SJohn Marino 				re_sift_context_t *sctx,
73*6ea1f93eSDaniel Fojt 				Idx node_idx, Idx str_idx, Idx max_str_idx);
7444b87433SJohn Marino #endif /* RE_ENABLE_I18N */
7544b87433SJohn Marino static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76*6ea1f93eSDaniel Fojt 					   re_sift_context_t *sctx);
7744b87433SJohn Marino static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
7844b87433SJohn Marino 					  re_sift_context_t *sctx, Idx str_idx,
79*6ea1f93eSDaniel Fojt 					  re_node_set *cur_dest);
8044b87433SJohn Marino static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
8144b87433SJohn Marino 					      re_sift_context_t *sctx,
8244b87433SJohn Marino 					      Idx str_idx,
83*6ea1f93eSDaniel Fojt 					      re_node_set *dest_nodes);
8444b87433SJohn Marino static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
8544b87433SJohn Marino 					    re_node_set *dest_nodes,
86*6ea1f93eSDaniel Fojt 					    const re_node_set *candidates);
8744b87433SJohn Marino static bool check_dst_limits (const re_match_context_t *mctx,
8844b87433SJohn Marino 			      const re_node_set *limits,
8944b87433SJohn Marino 			      Idx dst_node, Idx dst_idx, Idx src_node,
90*6ea1f93eSDaniel Fojt 			      Idx src_idx);
9144b87433SJohn Marino static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
9244b87433SJohn Marino 					int boundaries, Idx subexp_idx,
93*6ea1f93eSDaniel Fojt 					Idx from_node, Idx bkref_idx);
9444b87433SJohn Marino static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
9544b87433SJohn Marino 				      Idx limit, Idx subexp_idx,
9644b87433SJohn Marino 				      Idx node, Idx str_idx,
97*6ea1f93eSDaniel Fojt 				      Idx bkref_idx);
9844b87433SJohn Marino static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
9944b87433SJohn Marino 					  re_node_set *dest_nodes,
10044b87433SJohn Marino 					  const re_node_set *candidates,
10144b87433SJohn Marino 					  re_node_set *limits,
10244b87433SJohn Marino 					  struct re_backref_cache_entry *bkref_ents,
103*6ea1f93eSDaniel Fojt 					  Idx str_idx);
10444b87433SJohn Marino static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
10544b87433SJohn Marino 					re_sift_context_t *sctx,
106*6ea1f93eSDaniel Fojt 					Idx str_idx, const re_node_set *candidates);
10744b87433SJohn Marino static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
10844b87433SJohn Marino 					re_dfastate_t **dst,
109*6ea1f93eSDaniel Fojt 					re_dfastate_t **src, Idx num);
11044b87433SJohn Marino static re_dfastate_t *find_recover_state (reg_errcode_t *err,
111*6ea1f93eSDaniel Fojt 					 re_match_context_t *mctx);
11244b87433SJohn Marino static re_dfastate_t *transit_state (reg_errcode_t *err,
11344b87433SJohn Marino 				     re_match_context_t *mctx,
114*6ea1f93eSDaniel Fojt 				     re_dfastate_t *state);
11544b87433SJohn Marino static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
11644b87433SJohn Marino 					    re_match_context_t *mctx,
117*6ea1f93eSDaniel Fojt 					    re_dfastate_t *next_state);
11844b87433SJohn Marino static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
11944b87433SJohn Marino 						re_node_set *cur_nodes,
120*6ea1f93eSDaniel Fojt 						Idx str_idx);
12144b87433SJohn Marino #if 0
12244b87433SJohn Marino static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
12344b87433SJohn Marino 					re_match_context_t *mctx,
124*6ea1f93eSDaniel Fojt 					re_dfastate_t *pstate);
12544b87433SJohn Marino #endif
12644b87433SJohn Marino #ifdef RE_ENABLE_I18N
12744b87433SJohn Marino static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128*6ea1f93eSDaniel Fojt 				       re_dfastate_t *pstate);
12944b87433SJohn Marino #endif /* RE_ENABLE_I18N */
13044b87433SJohn Marino static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131*6ea1f93eSDaniel Fojt 					  const re_node_set *nodes);
13244b87433SJohn Marino static reg_errcode_t get_subexp (re_match_context_t *mctx,
133*6ea1f93eSDaniel Fojt 				 Idx bkref_node, Idx bkref_str_idx);
13444b87433SJohn Marino static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
13544b87433SJohn Marino 				     const re_sub_match_top_t *sub_top,
13644b87433SJohn Marino 				     re_sub_match_last_t *sub_last,
137*6ea1f93eSDaniel Fojt 				     Idx bkref_node, Idx bkref_str);
13844b87433SJohn Marino static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
139*6ea1f93eSDaniel Fojt 			     Idx subexp_idx, int type);
14044b87433SJohn Marino static reg_errcode_t check_arrival (re_match_context_t *mctx,
14144b87433SJohn Marino 				    state_array_t *path, Idx top_node,
14244b87433SJohn Marino 				    Idx top_str, Idx last_node, Idx last_str,
143*6ea1f93eSDaniel Fojt 				    int type);
14444b87433SJohn Marino static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
14544b87433SJohn Marino 						   Idx str_idx,
14644b87433SJohn Marino 						   re_node_set *cur_nodes,
147*6ea1f93eSDaniel Fojt 						   re_node_set *next_nodes);
14844b87433SJohn Marino static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
14944b87433SJohn Marino 					       re_node_set *cur_nodes,
150*6ea1f93eSDaniel Fojt 					       Idx ex_subexp, int type);
15144b87433SJohn Marino static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
15244b87433SJohn Marino 						   re_node_set *dst_nodes,
15344b87433SJohn Marino 						   Idx target, Idx ex_subexp,
154*6ea1f93eSDaniel Fojt 						   int type);
15544b87433SJohn Marino static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
15644b87433SJohn Marino 					 re_node_set *cur_nodes, Idx cur_str,
157*6ea1f93eSDaniel Fojt 					 Idx subexp_num, int type);
158*6ea1f93eSDaniel Fojt static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
15944b87433SJohn Marino #ifdef RE_ENABLE_I18N
16044b87433SJohn Marino static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161*6ea1f93eSDaniel Fojt 				    const re_string_t *input, Idx idx);
16244b87433SJohn Marino # ifdef _LIBC
16344b87433SJohn Marino static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164*6ea1f93eSDaniel Fojt 						   size_t name_len);
16544b87433SJohn Marino # endif /* _LIBC */
16644b87433SJohn Marino #endif /* RE_ENABLE_I18N */
16744b87433SJohn Marino static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
16844b87433SJohn Marino 				       const re_dfastate_t *state,
16944b87433SJohn Marino 				       re_node_set *states_node,
170*6ea1f93eSDaniel Fojt 				       bitset_t *states_ch);
17144b87433SJohn Marino static bool check_node_accept (const re_match_context_t *mctx,
172*6ea1f93eSDaniel Fojt 			       const re_token_t *node, Idx idx);
173*6ea1f93eSDaniel Fojt static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
17444b87433SJohn Marino 
17544b87433SJohn Marino /* Entry point for POSIX code.  */
17644b87433SJohn Marino 
17744b87433SJohn Marino /* regexec searches for a given pattern, specified by PREG, in the
17844b87433SJohn Marino    string STRING.
17944b87433SJohn Marino 
18044b87433SJohn Marino    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
1814536c563SJohn Marino    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
18244b87433SJohn Marino    least NMATCH elements, and we set them to the offsets of the
18344b87433SJohn Marino    corresponding matched substrings.
18444b87433SJohn Marino 
1854536c563SJohn Marino    EFLAGS specifies "execution flags" which affect matching: if
18644b87433SJohn Marino    REG_NOTBOL is set, then ^ does not match at the beginning of the
18744b87433SJohn Marino    string; if REG_NOTEOL is set, then $ does not match at the end.
18844b87433SJohn Marino 
18944b87433SJohn Marino    We return 0 if we find a match and REG_NOMATCH if not.  */
19044b87433SJohn Marino 
19144b87433SJohn Marino int
regexec(const regex_t * _Restrict_ preg,const char * _Restrict_ string,size_t nmatch,regmatch_t pmatch[],int eflags)192*6ea1f93eSDaniel Fojt regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
193*6ea1f93eSDaniel Fojt 	 size_t nmatch, regmatch_t pmatch[], int eflags)
19444b87433SJohn Marino {
19544b87433SJohn Marino   reg_errcode_t err;
19644b87433SJohn Marino   Idx start, length;
1974536c563SJohn Marino   re_dfa_t *dfa = preg->buffer;
19844b87433SJohn Marino 
19944b87433SJohn Marino   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
20044b87433SJohn Marino     return REG_BADPAT;
20144b87433SJohn Marino 
20244b87433SJohn Marino   if (eflags & REG_STARTEND)
20344b87433SJohn Marino     {
20444b87433SJohn Marino       start = pmatch[0].rm_so;
20544b87433SJohn Marino       length = pmatch[0].rm_eo;
20644b87433SJohn Marino     }
20744b87433SJohn Marino   else
20844b87433SJohn Marino     {
20944b87433SJohn Marino       start = 0;
21044b87433SJohn Marino       length = strlen (string);
21144b87433SJohn Marino     }
21244b87433SJohn Marino 
213*6ea1f93eSDaniel Fojt   lock_lock (dfa->lock);
21444b87433SJohn Marino   if (preg->no_sub)
21544b87433SJohn Marino     err = re_search_internal (preg, string, length, start, length,
21644b87433SJohn Marino 			      length, 0, NULL, eflags);
21744b87433SJohn Marino   else
21844b87433SJohn Marino     err = re_search_internal (preg, string, length, start, length,
21944b87433SJohn Marino 			      length, nmatch, pmatch, eflags);
220*6ea1f93eSDaniel Fojt   lock_unlock (dfa->lock);
22144b87433SJohn Marino   return err != REG_NOERROR;
22244b87433SJohn Marino }
22344b87433SJohn Marino 
22444b87433SJohn Marino #ifdef _LIBC
225*6ea1f93eSDaniel Fojt libc_hidden_def (__regexec)
226*6ea1f93eSDaniel Fojt 
22744b87433SJohn Marino # include <shlib-compat.h>
22844b87433SJohn Marino versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
22944b87433SJohn Marino 
23044b87433SJohn Marino # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
23144b87433SJohn Marino __typeof__ (__regexec) __compat_regexec;
23244b87433SJohn Marino 
23344b87433SJohn Marino int
23444b87433SJohn Marino attribute_compat_text_section
__compat_regexec(const regex_t * _Restrict_ preg,const char * _Restrict_ string,size_t nmatch,regmatch_t pmatch[],int eflags)23544b87433SJohn Marino __compat_regexec (const regex_t *_Restrict_ preg,
23644b87433SJohn Marino 		  const char *_Restrict_ string, size_t nmatch,
23744b87433SJohn Marino 		  regmatch_t pmatch[], int eflags)
23844b87433SJohn Marino {
23944b87433SJohn Marino   return regexec (preg, string, nmatch, pmatch,
24044b87433SJohn Marino 		  eflags & (REG_NOTBOL | REG_NOTEOL));
24144b87433SJohn Marino }
24244b87433SJohn Marino compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
24344b87433SJohn Marino # endif
24444b87433SJohn Marino #endif
24544b87433SJohn Marino 
24644b87433SJohn Marino /* Entry points for GNU code.  */
24744b87433SJohn Marino 
24844b87433SJohn Marino /* re_match, re_search, re_match_2, re_search_2
24944b87433SJohn Marino 
25044b87433SJohn Marino    The former two functions operate on STRING with length LENGTH,
25144b87433SJohn Marino    while the later two operate on concatenation of STRING1 and STRING2
25244b87433SJohn Marino    with lengths LENGTH1 and LENGTH2, respectively.
25344b87433SJohn Marino 
25444b87433SJohn Marino    re_match() matches the compiled pattern in BUFP against the string,
25544b87433SJohn Marino    starting at index START.
25644b87433SJohn Marino 
25744b87433SJohn Marino    re_search() first tries matching at index START, then it tries to match
25844b87433SJohn Marino    starting from index START + 1, and so on.  The last start position tried
25944b87433SJohn Marino    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
26044b87433SJohn Marino    way as re_match().)
26144b87433SJohn Marino 
26244b87433SJohn Marino    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
26344b87433SJohn Marino    the first STOP characters of the concatenation of the strings should be
26444b87433SJohn Marino    concerned.
26544b87433SJohn Marino 
26644b87433SJohn Marino    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
26744b87433SJohn Marino    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
26844b87433SJohn Marino    computed relative to the concatenation, not relative to the individual
26944b87433SJohn Marino    strings.)
27044b87433SJohn Marino 
27144b87433SJohn Marino    On success, re_match* functions return the length of the match, re_search*
27244b87433SJohn Marino    return the position of the start of the match.  Return value -1 means no
27344b87433SJohn Marino    match was found and -2 indicates an internal error.  */
27444b87433SJohn Marino 
27544b87433SJohn Marino regoff_t
re_match(struct re_pattern_buffer * bufp,const char * string,Idx length,Idx start,struct re_registers * regs)276*6ea1f93eSDaniel Fojt re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
277*6ea1f93eSDaniel Fojt 	  Idx start, struct re_registers *regs)
27844b87433SJohn Marino {
27944b87433SJohn Marino   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
28044b87433SJohn Marino }
28144b87433SJohn Marino #ifdef _LIBC
weak_alias(__re_match,re_match)28244b87433SJohn Marino weak_alias (__re_match, re_match)
28344b87433SJohn Marino #endif
28444b87433SJohn Marino 
28544b87433SJohn Marino regoff_t
286*6ea1f93eSDaniel Fojt re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
287*6ea1f93eSDaniel Fojt 	   Idx start, regoff_t range, struct re_registers *regs)
28844b87433SJohn Marino {
28944b87433SJohn Marino   return re_search_stub (bufp, string, length, start, range, length, regs,
29044b87433SJohn Marino 			 false);
29144b87433SJohn Marino }
29244b87433SJohn Marino #ifdef _LIBC
weak_alias(__re_search,re_search)29344b87433SJohn Marino weak_alias (__re_search, re_search)
29444b87433SJohn Marino #endif
29544b87433SJohn Marino 
29644b87433SJohn Marino regoff_t
297*6ea1f93eSDaniel Fojt re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
298*6ea1f93eSDaniel Fojt 	    const char *string2, Idx length2, Idx start,
299*6ea1f93eSDaniel Fojt 	    struct re_registers *regs, Idx stop)
30044b87433SJohn Marino {
30144b87433SJohn Marino   return re_search_2_stub (bufp, string1, length1, string2, length2,
30244b87433SJohn Marino 			   start, 0, regs, stop, true);
30344b87433SJohn Marino }
30444b87433SJohn Marino #ifdef _LIBC
weak_alias(__re_match_2,re_match_2)30544b87433SJohn Marino weak_alias (__re_match_2, re_match_2)
30644b87433SJohn Marino #endif
30744b87433SJohn Marino 
30844b87433SJohn Marino regoff_t
309*6ea1f93eSDaniel Fojt re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
310*6ea1f93eSDaniel Fojt 	     const char *string2, Idx length2, Idx start, regoff_t range,
311*6ea1f93eSDaniel Fojt 	     struct re_registers *regs, Idx stop)
31244b87433SJohn Marino {
31344b87433SJohn Marino   return re_search_2_stub (bufp, string1, length1, string2, length2,
31444b87433SJohn Marino 			   start, range, regs, stop, false);
31544b87433SJohn Marino }
31644b87433SJohn Marino #ifdef _LIBC
weak_alias(__re_search_2,re_search_2)31744b87433SJohn Marino weak_alias (__re_search_2, re_search_2)
31844b87433SJohn Marino #endif
31944b87433SJohn Marino 
32044b87433SJohn Marino static regoff_t
321*6ea1f93eSDaniel Fojt re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
322*6ea1f93eSDaniel Fojt 		  Idx length1, const char *string2, Idx length2, Idx start,
323*6ea1f93eSDaniel Fojt 		  regoff_t range, struct re_registers *regs,
32444b87433SJohn Marino 		  Idx stop, bool ret_len)
32544b87433SJohn Marino {
32644b87433SJohn Marino   const char *str;
32744b87433SJohn Marino   regoff_t rval;
328*6ea1f93eSDaniel Fojt   Idx len;
32944b87433SJohn Marino   char *s = NULL;
33044b87433SJohn Marino 
331*6ea1f93eSDaniel Fojt   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
332*6ea1f93eSDaniel Fojt 			 || INT_ADD_WRAPV (length1, length2, &len))))
33344b87433SJohn Marino     return -2;
33444b87433SJohn Marino 
33544b87433SJohn Marino   /* Concatenate the strings.  */
33644b87433SJohn Marino   if (length2 > 0)
33744b87433SJohn Marino     if (length1 > 0)
33844b87433SJohn Marino       {
33944b87433SJohn Marino 	s = re_malloc (char, len);
34044b87433SJohn Marino 
341*6ea1f93eSDaniel Fojt 	if (__glibc_unlikely (s == NULL))
34244b87433SJohn Marino 	  return -2;
34344b87433SJohn Marino #ifdef _LIBC
34444b87433SJohn Marino 	memcpy (__mempcpy (s, string1, length1), string2, length2);
34544b87433SJohn Marino #else
34644b87433SJohn Marino 	memcpy (s, string1, length1);
34744b87433SJohn Marino 	memcpy (s + length1, string2, length2);
34844b87433SJohn Marino #endif
34944b87433SJohn Marino 	str = s;
35044b87433SJohn Marino       }
35144b87433SJohn Marino     else
35244b87433SJohn Marino       str = string2;
35344b87433SJohn Marino   else
35444b87433SJohn Marino     str = string1;
35544b87433SJohn Marino 
35644b87433SJohn Marino   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
35744b87433SJohn Marino 			 ret_len);
35844b87433SJohn Marino   re_free (s);
35944b87433SJohn Marino   return rval;
36044b87433SJohn Marino }
36144b87433SJohn Marino 
36244b87433SJohn Marino /* The parameters have the same meaning as those of re_search.
36344b87433SJohn Marino    Additional parameters:
36444b87433SJohn Marino    If RET_LEN is true the length of the match is returned (re_match style);
36544b87433SJohn Marino    otherwise the position of the match is returned.  */
36644b87433SJohn Marino 
36744b87433SJohn Marino static regoff_t
re_search_stub(struct re_pattern_buffer * bufp,const char * string,Idx length,Idx start,regoff_t range,Idx stop,struct re_registers * regs,bool ret_len)368*6ea1f93eSDaniel Fojt re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
36944b87433SJohn Marino 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
37044b87433SJohn Marino 		bool ret_len)
37144b87433SJohn Marino {
37244b87433SJohn Marino   reg_errcode_t result;
37344b87433SJohn Marino   regmatch_t *pmatch;
37444b87433SJohn Marino   Idx nregs;
37544b87433SJohn Marino   regoff_t rval;
37644b87433SJohn Marino   int eflags = 0;
3774536c563SJohn Marino   re_dfa_t *dfa = bufp->buffer;
37844b87433SJohn Marino   Idx last_start = start + range;
37944b87433SJohn Marino 
38044b87433SJohn Marino   /* Check for out-of-range.  */
381*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (start < 0 || start > length))
38244b87433SJohn Marino     return -1;
383*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (length < last_start
384*6ea1f93eSDaniel Fojt 			|| (0 <= range && last_start < start)))
38544b87433SJohn Marino     last_start = length;
386*6ea1f93eSDaniel Fojt   else if (__glibc_unlikely (last_start < 0
387*6ea1f93eSDaniel Fojt 			     || (range < 0 && start <= last_start)))
38844b87433SJohn Marino     last_start = 0;
38944b87433SJohn Marino 
390*6ea1f93eSDaniel Fojt   lock_lock (dfa->lock);
39144b87433SJohn Marino 
39244b87433SJohn Marino   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
39344b87433SJohn Marino   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
39444b87433SJohn Marino 
39544b87433SJohn Marino   /* Compile fastmap if we haven't yet.  */
39644b87433SJohn Marino   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
39744b87433SJohn Marino     re_compile_fastmap (bufp);
39844b87433SJohn Marino 
399*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (bufp->no_sub))
40044b87433SJohn Marino     regs = NULL;
40144b87433SJohn Marino 
40244b87433SJohn Marino   /* We need at least 1 register.  */
40344b87433SJohn Marino   if (regs == NULL)
40444b87433SJohn Marino     nregs = 1;
405*6ea1f93eSDaniel Fojt   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406*6ea1f93eSDaniel Fojt 			     && regs->num_regs <= bufp->re_nsub))
40744b87433SJohn Marino     {
40844b87433SJohn Marino       nregs = regs->num_regs;
409*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (nregs < 1))
41044b87433SJohn Marino 	{
41144b87433SJohn Marino 	  /* Nothing can be copied to regs.  */
41244b87433SJohn Marino 	  regs = NULL;
41344b87433SJohn Marino 	  nregs = 1;
41444b87433SJohn Marino 	}
41544b87433SJohn Marino     }
41644b87433SJohn Marino   else
41744b87433SJohn Marino     nregs = bufp->re_nsub + 1;
41844b87433SJohn Marino   pmatch = re_malloc (regmatch_t, nregs);
419*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (pmatch == NULL))
42044b87433SJohn Marino     {
42144b87433SJohn Marino       rval = -2;
42244b87433SJohn Marino       goto out;
42344b87433SJohn Marino     }
42444b87433SJohn Marino 
42544b87433SJohn Marino   result = re_search_internal (bufp, string, length, start, last_start, stop,
42644b87433SJohn Marino 			       nregs, pmatch, eflags);
42744b87433SJohn Marino 
42844b87433SJohn Marino   rval = 0;
42944b87433SJohn Marino 
4304536c563SJohn Marino   /* I hope we needn't fill their regs with -1's when no match was found.  */
43144b87433SJohn Marino   if (result != REG_NOERROR)
4324536c563SJohn Marino     rval = result == REG_NOMATCH ? -1 : -2;
43344b87433SJohn Marino   else if (regs != NULL)
43444b87433SJohn Marino     {
43544b87433SJohn Marino       /* If caller wants register contents data back, copy them.  */
43644b87433SJohn Marino       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
43744b87433SJohn Marino 					   bufp->regs_allocated);
438*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
43944b87433SJohn Marino 	rval = -2;
44044b87433SJohn Marino     }
44144b87433SJohn Marino 
442*6ea1f93eSDaniel Fojt   if (__glibc_likely (rval == 0))
44344b87433SJohn Marino     {
44444b87433SJohn Marino       if (ret_len)
44544b87433SJohn Marino 	{
44644b87433SJohn Marino 	  assert (pmatch[0].rm_so == start);
44744b87433SJohn Marino 	  rval = pmatch[0].rm_eo - start;
44844b87433SJohn Marino 	}
44944b87433SJohn Marino       else
45044b87433SJohn Marino 	rval = pmatch[0].rm_so;
45144b87433SJohn Marino     }
45244b87433SJohn Marino   re_free (pmatch);
45344b87433SJohn Marino  out:
454*6ea1f93eSDaniel Fojt   lock_unlock (dfa->lock);
45544b87433SJohn Marino   return rval;
45644b87433SJohn Marino }
45744b87433SJohn Marino 
4584536c563SJohn Marino static unsigned
re_copy_regs(struct re_registers * regs,regmatch_t * pmatch,Idx nregs,int regs_allocated)45944b87433SJohn Marino re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
46044b87433SJohn Marino 	      int regs_allocated)
46144b87433SJohn Marino {
46244b87433SJohn Marino   int rval = REGS_REALLOCATE;
46344b87433SJohn Marino   Idx i;
46444b87433SJohn Marino   Idx need_regs = nregs + 1;
4654536c563SJohn Marino   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
46644b87433SJohn Marino      uses.  */
46744b87433SJohn Marino 
46844b87433SJohn Marino   /* Have the register data arrays been allocated?  */
46944b87433SJohn Marino   if (regs_allocated == REGS_UNALLOCATED)
47044b87433SJohn Marino     { /* No.  So allocate them with malloc.  */
47144b87433SJohn Marino       regs->start = re_malloc (regoff_t, need_regs);
472*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (regs->start == NULL))
47344b87433SJohn Marino 	return REGS_UNALLOCATED;
47444b87433SJohn Marino       regs->end = re_malloc (regoff_t, need_regs);
475*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (regs->end == NULL))
47644b87433SJohn Marino 	{
47744b87433SJohn Marino 	  re_free (regs->start);
47844b87433SJohn Marino 	  return REGS_UNALLOCATED;
47944b87433SJohn Marino 	}
48044b87433SJohn Marino       regs->num_regs = need_regs;
48144b87433SJohn Marino     }
48244b87433SJohn Marino   else if (regs_allocated == REGS_REALLOCATE)
48344b87433SJohn Marino     { /* Yes.  If we need more elements than were already
48444b87433SJohn Marino 	 allocated, reallocate them.  If we need fewer, just
48544b87433SJohn Marino 	 leave it alone.  */
486*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (need_regs > regs->num_regs))
48744b87433SJohn Marino 	{
48844b87433SJohn Marino 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
48944b87433SJohn Marino 	  regoff_t *new_end;
490*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (new_start == NULL))
49144b87433SJohn Marino 	    return REGS_UNALLOCATED;
49244b87433SJohn Marino 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
493*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (new_end == NULL))
49444b87433SJohn Marino 	    {
49544b87433SJohn Marino 	      re_free (new_start);
49644b87433SJohn Marino 	      return REGS_UNALLOCATED;
49744b87433SJohn Marino 	    }
49844b87433SJohn Marino 	  regs->start = new_start;
49944b87433SJohn Marino 	  regs->end = new_end;
50044b87433SJohn Marino 	  regs->num_regs = need_regs;
50144b87433SJohn Marino 	}
50244b87433SJohn Marino     }
50344b87433SJohn Marino   else
50444b87433SJohn Marino     {
50544b87433SJohn Marino       assert (regs_allocated == REGS_FIXED);
50644b87433SJohn Marino       /* This function may not be called with REGS_FIXED and nregs too big.  */
50744b87433SJohn Marino       assert (regs->num_regs >= nregs);
50844b87433SJohn Marino       rval = REGS_FIXED;
50944b87433SJohn Marino     }
51044b87433SJohn Marino 
51144b87433SJohn Marino   /* Copy the regs.  */
51244b87433SJohn Marino   for (i = 0; i < nregs; ++i)
51344b87433SJohn Marino     {
51444b87433SJohn Marino       regs->start[i] = pmatch[i].rm_so;
51544b87433SJohn Marino       regs->end[i] = pmatch[i].rm_eo;
51644b87433SJohn Marino     }
51744b87433SJohn Marino   for ( ; i < regs->num_regs; ++i)
51844b87433SJohn Marino     regs->start[i] = regs->end[i] = -1;
51944b87433SJohn Marino 
52044b87433SJohn Marino   return rval;
52144b87433SJohn Marino }
52244b87433SJohn Marino 
52344b87433SJohn Marino /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
52444b87433SJohn Marino    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
52544b87433SJohn Marino    this memory for recording register information.  STARTS and ENDS
52644b87433SJohn Marino    must be allocated using the malloc library routine, and must each
52744b87433SJohn Marino    be at least NUM_REGS * sizeof (regoff_t) bytes long.
52844b87433SJohn Marino 
52944b87433SJohn Marino    If NUM_REGS == 0, then subsequent matches should allocate their own
53044b87433SJohn Marino    register data.
53144b87433SJohn Marino 
53244b87433SJohn Marino    Unless this function is called, the first search or match using
53344b87433SJohn Marino    PATTERN_BUFFER will allocate its own register data, without
53444b87433SJohn Marino    freeing the old data.  */
53544b87433SJohn Marino 
53644b87433SJohn Marino void
re_set_registers(struct re_pattern_buffer * bufp,struct re_registers * regs,__re_size_t num_regs,regoff_t * starts,regoff_t * ends)537*6ea1f93eSDaniel Fojt re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
538*6ea1f93eSDaniel Fojt 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
53944b87433SJohn Marino {
54044b87433SJohn Marino   if (num_regs)
54144b87433SJohn Marino     {
54244b87433SJohn Marino       bufp->regs_allocated = REGS_REALLOCATE;
54344b87433SJohn Marino       regs->num_regs = num_regs;
54444b87433SJohn Marino       regs->start = starts;
54544b87433SJohn Marino       regs->end = ends;
54644b87433SJohn Marino     }
54744b87433SJohn Marino   else
54844b87433SJohn Marino     {
54944b87433SJohn Marino       bufp->regs_allocated = REGS_UNALLOCATED;
55044b87433SJohn Marino       regs->num_regs = 0;
55144b87433SJohn Marino       regs->start = regs->end = NULL;
55244b87433SJohn Marino     }
55344b87433SJohn Marino }
55444b87433SJohn Marino #ifdef _LIBC
weak_alias(__re_set_registers,re_set_registers)55544b87433SJohn Marino weak_alias (__re_set_registers, re_set_registers)
55644b87433SJohn Marino #endif
55744b87433SJohn Marino 
55844b87433SJohn Marino /* Entry points compatible with 4.2 BSD regex library.  We don't define
55944b87433SJohn Marino    them unless specifically requested.  */
56044b87433SJohn Marino 
56144b87433SJohn Marino #if defined _REGEX_RE_COMP || defined _LIBC
56244b87433SJohn Marino int
56344b87433SJohn Marino # ifdef _LIBC
56444b87433SJohn Marino weak_function
56544b87433SJohn Marino # endif
566*6ea1f93eSDaniel Fojt re_exec (const char *s)
56744b87433SJohn Marino {
56844b87433SJohn Marino   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
56944b87433SJohn Marino }
57044b87433SJohn Marino #endif /* _REGEX_RE_COMP */
57144b87433SJohn Marino 
57244b87433SJohn Marino /* Internal entry point.  */
57344b87433SJohn Marino 
57444b87433SJohn Marino /* Searches for a compiled pattern PREG in the string STRING, whose
57544b87433SJohn Marino    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
57644b87433SJohn Marino    meaning as with regexec.  LAST_START is START + RANGE, where
57744b87433SJohn Marino    START and RANGE have the same meaning as with re_search.
57844b87433SJohn Marino    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
57944b87433SJohn Marino    otherwise return the error code.
58044b87433SJohn Marino    Note: We assume front end functions already check ranges.
58144b87433SJohn Marino    (0 <= LAST_START && LAST_START <= LENGTH)  */
58244b87433SJohn Marino 
58344b87433SJohn Marino static reg_errcode_t
5844536c563SJohn Marino __attribute_warn_unused_result__
re_search_internal(const regex_t * preg,const char * string,Idx length,Idx start,Idx last_start,Idx stop,size_t nmatch,regmatch_t pmatch[],int eflags)585*6ea1f93eSDaniel Fojt re_search_internal (const regex_t *preg, const char *string, Idx length,
586*6ea1f93eSDaniel Fojt 		    Idx start, Idx last_start, Idx stop, size_t nmatch,
587*6ea1f93eSDaniel Fojt 		    regmatch_t pmatch[], int eflags)
58844b87433SJohn Marino {
58944b87433SJohn Marino   reg_errcode_t err;
5904536c563SJohn Marino   const re_dfa_t *dfa = preg->buffer;
59144b87433SJohn Marino   Idx left_lim, right_lim;
59244b87433SJohn Marino   int incr;
59344b87433SJohn Marino   bool fl_longest_match;
59444b87433SJohn Marino   int match_kind;
59544b87433SJohn Marino   Idx match_first;
596*6ea1f93eSDaniel Fojt   Idx match_last = -1;
59744b87433SJohn Marino   Idx extra_nmatch;
59844b87433SJohn Marino   bool sb;
59944b87433SJohn Marino   int ch;
60044b87433SJohn Marino #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
60144b87433SJohn Marino   re_match_context_t mctx = { .dfa = dfa };
60244b87433SJohn Marino #else
60344b87433SJohn Marino   re_match_context_t mctx;
60444b87433SJohn Marino #endif
60544b87433SJohn Marino   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
60644b87433SJohn Marino 		    && start != last_start && !preg->can_be_null)
60744b87433SJohn Marino 		   ? preg->fastmap : NULL);
60844b87433SJohn Marino   RE_TRANSLATE_TYPE t = preg->translate;
60944b87433SJohn Marino 
61044b87433SJohn Marino #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
61144b87433SJohn Marino   memset (&mctx, '\0', sizeof (re_match_context_t));
61244b87433SJohn Marino   mctx.dfa = dfa;
61344b87433SJohn Marino #endif
61444b87433SJohn Marino 
61544b87433SJohn Marino   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
61644b87433SJohn Marino   nmatch -= extra_nmatch;
61744b87433SJohn Marino 
61844b87433SJohn Marino   /* Check if the DFA haven't been compiled.  */
619*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
620*6ea1f93eSDaniel Fojt 			|| dfa->init_state_word == NULL
621*6ea1f93eSDaniel Fojt 			|| dfa->init_state_nl == NULL
622*6ea1f93eSDaniel Fojt 			|| dfa->init_state_begbuf == NULL))
62344b87433SJohn Marino     return REG_NOMATCH;
62444b87433SJohn Marino 
62544b87433SJohn Marino #ifdef DEBUG
62644b87433SJohn Marino   /* We assume front-end functions already check them.  */
62744b87433SJohn Marino   assert (0 <= last_start && last_start <= length);
62844b87433SJohn Marino #endif
62944b87433SJohn Marino 
63044b87433SJohn Marino   /* If initial states with non-begbuf contexts have no elements,
63144b87433SJohn Marino      the regex must be anchored.  If preg->newline_anchor is set,
63244b87433SJohn Marino      we'll never use init_state_nl, so do not check it.  */
63344b87433SJohn Marino   if (dfa->init_state->nodes.nelem == 0
63444b87433SJohn Marino       && dfa->init_state_word->nodes.nelem == 0
63544b87433SJohn Marino       && (dfa->init_state_nl->nodes.nelem == 0
63644b87433SJohn Marino 	  || !preg->newline_anchor))
63744b87433SJohn Marino     {
63844b87433SJohn Marino       if (start != 0 && last_start != 0)
63944b87433SJohn Marino         return REG_NOMATCH;
64044b87433SJohn Marino       start = last_start = 0;
64144b87433SJohn Marino     }
64244b87433SJohn Marino 
64344b87433SJohn Marino   /* We must check the longest matching, if nmatch > 0.  */
64444b87433SJohn Marino   fl_longest_match = (nmatch != 0 || dfa->nbackref);
64544b87433SJohn Marino 
64644b87433SJohn Marino   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
64744b87433SJohn Marino 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
64844b87433SJohn Marino 			    dfa);
649*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
65044b87433SJohn Marino     goto free_return;
65144b87433SJohn Marino   mctx.input.stop = stop;
65244b87433SJohn Marino   mctx.input.raw_stop = stop;
65344b87433SJohn Marino   mctx.input.newline_anchor = preg->newline_anchor;
65444b87433SJohn Marino 
65544b87433SJohn Marino   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
656*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
65744b87433SJohn Marino     goto free_return;
65844b87433SJohn Marino 
65944b87433SJohn Marino   /* We will log all the DFA states through which the dfa pass,
66044b87433SJohn Marino      if nmatch > 1, or this dfa has "multibyte node", which is a
66144b87433SJohn Marino      back-reference or a node which can accept multibyte character or
66244b87433SJohn Marino      multi character collating element.  */
66344b87433SJohn Marino   if (nmatch > 1 || dfa->has_mb_node)
66444b87433SJohn Marino     {
66544b87433SJohn Marino       /* Avoid overflow.  */
666*6ea1f93eSDaniel Fojt       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
667*6ea1f93eSDaniel Fojt 			     <= mctx.input.bufs_len)))
66844b87433SJohn Marino 	{
66944b87433SJohn Marino 	  err = REG_ESPACE;
67044b87433SJohn Marino 	  goto free_return;
67144b87433SJohn Marino 	}
67244b87433SJohn Marino 
67344b87433SJohn Marino       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
674*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (mctx.state_log == NULL))
67544b87433SJohn Marino 	{
67644b87433SJohn Marino 	  err = REG_ESPACE;
67744b87433SJohn Marino 	  goto free_return;
67844b87433SJohn Marino 	}
67944b87433SJohn Marino     }
68044b87433SJohn Marino   else
68144b87433SJohn Marino     mctx.state_log = NULL;
68244b87433SJohn Marino 
68344b87433SJohn Marino   match_first = start;
68444b87433SJohn Marino   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
68544b87433SJohn Marino 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
68644b87433SJohn Marino 
6874536c563SJohn Marino   /* Check incrementally whether the input string matches.  */
68844b87433SJohn Marino   incr = (last_start < start) ? -1 : 1;
68944b87433SJohn Marino   left_lim = (last_start < start) ? last_start : start;
69044b87433SJohn Marino   right_lim = (last_start < start) ? start : last_start;
69144b87433SJohn Marino   sb = dfa->mb_cur_max == 1;
69244b87433SJohn Marino   match_kind =
69344b87433SJohn Marino     (fastmap
69444b87433SJohn Marino      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
69544b87433SJohn Marino 	| (start <= last_start ? 2 : 0)
69644b87433SJohn Marino 	| (t != NULL ? 1 : 0))
69744b87433SJohn Marino      : 8);
69844b87433SJohn Marino 
69944b87433SJohn Marino   for (;; match_first += incr)
70044b87433SJohn Marino     {
70144b87433SJohn Marino       err = REG_NOMATCH;
70244b87433SJohn Marino       if (match_first < left_lim || right_lim < match_first)
70344b87433SJohn Marino 	goto free_return;
70444b87433SJohn Marino 
70544b87433SJohn Marino       /* Advance as rapidly as possible through the string, until we
70644b87433SJohn Marino 	 find a plausible place to start matching.  This may be done
70744b87433SJohn Marino 	 with varying efficiency, so there are various possibilities:
70844b87433SJohn Marino 	 only the most common of them are specialized, in order to
70944b87433SJohn Marino 	 save on code size.  We use a switch statement for speed.  */
71044b87433SJohn Marino       switch (match_kind)
71144b87433SJohn Marino 	{
71244b87433SJohn Marino 	case 8:
71344b87433SJohn Marino 	  /* No fastmap.  */
71444b87433SJohn Marino 	  break;
71544b87433SJohn Marino 
71644b87433SJohn Marino 	case 7:
71744b87433SJohn Marino 	  /* Fastmap with single-byte translation, match forward.  */
718*6ea1f93eSDaniel Fojt 	  while (__glibc_likely (match_first < right_lim)
71944b87433SJohn Marino 		 && !fastmap[t[(unsigned char) string[match_first]]])
72044b87433SJohn Marino 	    ++match_first;
72144b87433SJohn Marino 	  goto forward_match_found_start_or_reached_end;
72244b87433SJohn Marino 
72344b87433SJohn Marino 	case 6:
72444b87433SJohn Marino 	  /* Fastmap without translation, match forward.  */
725*6ea1f93eSDaniel Fojt 	  while (__glibc_likely (match_first < right_lim)
72644b87433SJohn Marino 		 && !fastmap[(unsigned char) string[match_first]])
72744b87433SJohn Marino 	    ++match_first;
72844b87433SJohn Marino 
72944b87433SJohn Marino 	forward_match_found_start_or_reached_end:
730*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (match_first == right_lim))
73144b87433SJohn Marino 	    {
73244b87433SJohn Marino 	      ch = match_first >= length
73344b87433SJohn Marino 		       ? 0 : (unsigned char) string[match_first];
73444b87433SJohn Marino 	      if (!fastmap[t ? t[ch] : ch])
73544b87433SJohn Marino 		goto free_return;
73644b87433SJohn Marino 	    }
73744b87433SJohn Marino 	  break;
73844b87433SJohn Marino 
73944b87433SJohn Marino 	case 4:
74044b87433SJohn Marino 	case 5:
74144b87433SJohn Marino 	  /* Fastmap without multi-byte translation, match backwards.  */
74244b87433SJohn Marino 	  while (match_first >= left_lim)
74344b87433SJohn Marino 	    {
74444b87433SJohn Marino 	      ch = match_first >= length
74544b87433SJohn Marino 		       ? 0 : (unsigned char) string[match_first];
74644b87433SJohn Marino 	      if (fastmap[t ? t[ch] : ch])
74744b87433SJohn Marino 		break;
74844b87433SJohn Marino 	      --match_first;
74944b87433SJohn Marino 	    }
75044b87433SJohn Marino 	  if (match_first < left_lim)
75144b87433SJohn Marino 	    goto free_return;
75244b87433SJohn Marino 	  break;
75344b87433SJohn Marino 
75444b87433SJohn Marino 	default:
75544b87433SJohn Marino 	  /* In this case, we can't determine easily the current byte,
75644b87433SJohn Marino 	     since it might be a component byte of a multibyte
75744b87433SJohn Marino 	     character.  Then we use the constructed buffer instead.  */
75844b87433SJohn Marino 	  for (;;)
75944b87433SJohn Marino 	    {
76044b87433SJohn Marino 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
76144b87433SJohn Marino 		 buffers.  */
76244b87433SJohn Marino 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
763*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (offset
764*6ea1f93eSDaniel Fojt 				    >= (__re_size_t) mctx.input.valid_raw_len))
76544b87433SJohn Marino 		{
76644b87433SJohn Marino 		  err = re_string_reconstruct (&mctx.input, match_first,
76744b87433SJohn Marino 					       eflags);
768*6ea1f93eSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
76944b87433SJohn Marino 		    goto free_return;
77044b87433SJohn Marino 
77144b87433SJohn Marino 		  offset = match_first - mctx.input.raw_mbs_idx;
77244b87433SJohn Marino 		}
77344b87433SJohn Marino 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
77444b87433SJohn Marino 		 Note that MATCH_FIRST must not be smaller than 0.  */
77544b87433SJohn Marino 	      ch = (match_first >= length
77644b87433SJohn Marino 		    ? 0 : re_string_byte_at (&mctx.input, offset));
77744b87433SJohn Marino 	      if (fastmap[ch])
77844b87433SJohn Marino 		break;
77944b87433SJohn Marino 	      match_first += incr;
78044b87433SJohn Marino 	      if (match_first < left_lim || match_first > right_lim)
78144b87433SJohn Marino 		{
78244b87433SJohn Marino 		  err = REG_NOMATCH;
78344b87433SJohn Marino 		  goto free_return;
78444b87433SJohn Marino 		}
78544b87433SJohn Marino 	    }
78644b87433SJohn Marino 	  break;
78744b87433SJohn Marino 	}
78844b87433SJohn Marino 
78944b87433SJohn Marino       /* Reconstruct the buffers so that the matcher can assume that
79044b87433SJohn Marino 	 the matching starts from the beginning of the buffer.  */
79144b87433SJohn Marino       err = re_string_reconstruct (&mctx.input, match_first, eflags);
792*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
79344b87433SJohn Marino 	goto free_return;
79444b87433SJohn Marino 
79544b87433SJohn Marino #ifdef RE_ENABLE_I18N
79644b87433SJohn Marino      /* Don't consider this char as a possible match start if it part,
79744b87433SJohn Marino 	yet isn't the head, of a multibyte character.  */
79844b87433SJohn Marino       if (!sb && !re_string_first_byte (&mctx.input, 0))
79944b87433SJohn Marino 	continue;
80044b87433SJohn Marino #endif
80144b87433SJohn Marino 
80244b87433SJohn Marino       /* It seems to be appropriate one, then use the matcher.  */
80344b87433SJohn Marino       /* We assume that the matching starts from 0.  */
80444b87433SJohn Marino       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
80544b87433SJohn Marino       match_last = check_matching (&mctx, fl_longest_match,
80644b87433SJohn Marino 				   start <= last_start ? &match_first : NULL);
807*6ea1f93eSDaniel Fojt       if (match_last != -1)
80844b87433SJohn Marino 	{
809*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (match_last == -2))
81044b87433SJohn Marino 	    {
81144b87433SJohn Marino 	      err = REG_ESPACE;
81244b87433SJohn Marino 	      goto free_return;
81344b87433SJohn Marino 	    }
81444b87433SJohn Marino 	  else
81544b87433SJohn Marino 	    {
81644b87433SJohn Marino 	      mctx.match_last = match_last;
81744b87433SJohn Marino 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
81844b87433SJohn Marino 		{
81944b87433SJohn Marino 		  re_dfastate_t *pstate = mctx.state_log[match_last];
82044b87433SJohn Marino 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
82144b87433SJohn Marino 							     match_last);
82244b87433SJohn Marino 		}
82344b87433SJohn Marino 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
82444b87433SJohn Marino 		  || dfa->nbackref)
82544b87433SJohn Marino 		{
82644b87433SJohn Marino 		  err = prune_impossible_nodes (&mctx);
82744b87433SJohn Marino 		  if (err == REG_NOERROR)
82844b87433SJohn Marino 		    break;
829*6ea1f93eSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOMATCH))
83044b87433SJohn Marino 		    goto free_return;
831*6ea1f93eSDaniel Fojt 		  match_last = -1;
83244b87433SJohn Marino 		}
83344b87433SJohn Marino 	      else
83444b87433SJohn Marino 		break; /* We found a match.  */
83544b87433SJohn Marino 	    }
83644b87433SJohn Marino 	}
83744b87433SJohn Marino 
83844b87433SJohn Marino       match_ctx_clean (&mctx);
83944b87433SJohn Marino     }
84044b87433SJohn Marino 
84144b87433SJohn Marino #ifdef DEBUG
842*6ea1f93eSDaniel Fojt   assert (match_last != -1);
84344b87433SJohn Marino   assert (err == REG_NOERROR);
84444b87433SJohn Marino #endif
84544b87433SJohn Marino 
84644b87433SJohn Marino   /* Set pmatch[] if we need.  */
84744b87433SJohn Marino   if (nmatch > 0)
84844b87433SJohn Marino     {
84944b87433SJohn Marino       Idx reg_idx;
85044b87433SJohn Marino 
85144b87433SJohn Marino       /* Initialize registers.  */
85244b87433SJohn Marino       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
85344b87433SJohn Marino 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
85444b87433SJohn Marino 
85544b87433SJohn Marino       /* Set the points where matching start/end.  */
85644b87433SJohn Marino       pmatch[0].rm_so = 0;
85744b87433SJohn Marino       pmatch[0].rm_eo = mctx.match_last;
85844b87433SJohn Marino       /* FIXME: This function should fail if mctx.match_last exceeds
85944b87433SJohn Marino 	 the maximum possible regoff_t value.  We need a new error
86044b87433SJohn Marino 	 code REG_OVERFLOW.  */
86144b87433SJohn Marino 
86244b87433SJohn Marino       if (!preg->no_sub && nmatch > 1)
86344b87433SJohn Marino 	{
86444b87433SJohn Marino 	  err = set_regs (preg, &mctx, nmatch, pmatch,
86544b87433SJohn Marino 			  dfa->has_plural_match && dfa->nbackref > 0);
866*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
86744b87433SJohn Marino 	    goto free_return;
86844b87433SJohn Marino 	}
86944b87433SJohn Marino 
8704536c563SJohn Marino       /* At last, add the offset to each register, since we slid
87144b87433SJohn Marino 	 the buffers so that we could assume that the matching starts
87244b87433SJohn Marino 	 from 0.  */
87344b87433SJohn Marino       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
87444b87433SJohn Marino 	if (pmatch[reg_idx].rm_so != -1)
87544b87433SJohn Marino 	  {
87644b87433SJohn Marino #ifdef RE_ENABLE_I18N
877*6ea1f93eSDaniel Fojt 	    if (__glibc_unlikely (mctx.input.offsets_needed != 0))
87844b87433SJohn Marino 	      {
87944b87433SJohn Marino 		pmatch[reg_idx].rm_so =
88044b87433SJohn Marino 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
88144b87433SJohn Marino 		   ? mctx.input.valid_raw_len
88244b87433SJohn Marino 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
88344b87433SJohn Marino 		pmatch[reg_idx].rm_eo =
88444b87433SJohn Marino 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
88544b87433SJohn Marino 		   ? mctx.input.valid_raw_len
88644b87433SJohn Marino 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
88744b87433SJohn Marino 	      }
88844b87433SJohn Marino #else
88944b87433SJohn Marino 	    assert (mctx.input.offsets_needed == 0);
89044b87433SJohn Marino #endif
89144b87433SJohn Marino 	    pmatch[reg_idx].rm_so += match_first;
89244b87433SJohn Marino 	    pmatch[reg_idx].rm_eo += match_first;
89344b87433SJohn Marino 	  }
89444b87433SJohn Marino       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
89544b87433SJohn Marino 	{
89644b87433SJohn Marino 	  pmatch[nmatch + reg_idx].rm_so = -1;
89744b87433SJohn Marino 	  pmatch[nmatch + reg_idx].rm_eo = -1;
89844b87433SJohn Marino 	}
89944b87433SJohn Marino 
90044b87433SJohn Marino       if (dfa->subexp_map)
90144b87433SJohn Marino 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
90244b87433SJohn Marino 	  if (dfa->subexp_map[reg_idx] != reg_idx)
90344b87433SJohn Marino 	    {
90444b87433SJohn Marino 	      pmatch[reg_idx + 1].rm_so
90544b87433SJohn Marino 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
90644b87433SJohn Marino 	      pmatch[reg_idx + 1].rm_eo
90744b87433SJohn Marino 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
90844b87433SJohn Marino 	    }
90944b87433SJohn Marino     }
91044b87433SJohn Marino 
91144b87433SJohn Marino  free_return:
91244b87433SJohn Marino   re_free (mctx.state_log);
91344b87433SJohn Marino   if (dfa->nbackref)
91444b87433SJohn Marino     match_ctx_free (&mctx);
91544b87433SJohn Marino   re_string_destruct (&mctx.input);
91644b87433SJohn Marino   return err;
91744b87433SJohn Marino }
91844b87433SJohn Marino 
91944b87433SJohn Marino static reg_errcode_t
9204536c563SJohn Marino __attribute_warn_unused_result__
prune_impossible_nodes(re_match_context_t * mctx)92144b87433SJohn Marino prune_impossible_nodes (re_match_context_t *mctx)
92244b87433SJohn Marino {
92344b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
92444b87433SJohn Marino   Idx halt_node, match_last;
92544b87433SJohn Marino   reg_errcode_t ret;
92644b87433SJohn Marino   re_dfastate_t **sifted_states;
92744b87433SJohn Marino   re_dfastate_t **lim_states = NULL;
92844b87433SJohn Marino   re_sift_context_t sctx;
92944b87433SJohn Marino #ifdef DEBUG
93044b87433SJohn Marino   assert (mctx->state_log != NULL);
93144b87433SJohn Marino #endif
93244b87433SJohn Marino   match_last = mctx->match_last;
93344b87433SJohn Marino   halt_node = mctx->last_node;
93444b87433SJohn Marino 
93544b87433SJohn Marino   /* Avoid overflow.  */
936*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
937*6ea1f93eSDaniel Fojt 			<= match_last))
93844b87433SJohn Marino     return REG_ESPACE;
93944b87433SJohn Marino 
94044b87433SJohn Marino   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
941*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (sifted_states == NULL))
94244b87433SJohn Marino     {
94344b87433SJohn Marino       ret = REG_ESPACE;
94444b87433SJohn Marino       goto free_return;
94544b87433SJohn Marino     }
94644b87433SJohn Marino   if (dfa->nbackref)
94744b87433SJohn Marino     {
94844b87433SJohn Marino       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
949*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (lim_states == NULL))
95044b87433SJohn Marino 	{
95144b87433SJohn Marino 	  ret = REG_ESPACE;
95244b87433SJohn Marino 	  goto free_return;
95344b87433SJohn Marino 	}
95444b87433SJohn Marino       while (1)
95544b87433SJohn Marino 	{
95644b87433SJohn Marino 	  memset (lim_states, '\0',
95744b87433SJohn Marino 		  sizeof (re_dfastate_t *) * (match_last + 1));
95844b87433SJohn Marino 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
95944b87433SJohn Marino 			 match_last);
96044b87433SJohn Marino 	  ret = sift_states_backward (mctx, &sctx);
96144b87433SJohn Marino 	  re_node_set_free (&sctx.limits);
962*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (ret != REG_NOERROR))
96344b87433SJohn Marino 	      goto free_return;
96444b87433SJohn Marino 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
96544b87433SJohn Marino 	    break;
96644b87433SJohn Marino 	  do
96744b87433SJohn Marino 	    {
96844b87433SJohn Marino 	      --match_last;
969*6ea1f93eSDaniel Fojt 	      if (match_last < 0)
97044b87433SJohn Marino 		{
97144b87433SJohn Marino 		  ret = REG_NOMATCH;
97244b87433SJohn Marino 		  goto free_return;
97344b87433SJohn Marino 		}
97444b87433SJohn Marino 	    } while (mctx->state_log[match_last] == NULL
97544b87433SJohn Marino 		     || !mctx->state_log[match_last]->halt);
97644b87433SJohn Marino 	  halt_node = check_halt_state_context (mctx,
97744b87433SJohn Marino 						mctx->state_log[match_last],
97844b87433SJohn Marino 						match_last);
97944b87433SJohn Marino 	}
98044b87433SJohn Marino       ret = merge_state_array (dfa, sifted_states, lim_states,
98144b87433SJohn Marino 			       match_last + 1);
98244b87433SJohn Marino       re_free (lim_states);
98344b87433SJohn Marino       lim_states = NULL;
984*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (ret != REG_NOERROR))
98544b87433SJohn Marino 	goto free_return;
98644b87433SJohn Marino     }
98744b87433SJohn Marino   else
98844b87433SJohn Marino     {
98944b87433SJohn Marino       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
99044b87433SJohn Marino       ret = sift_states_backward (mctx, &sctx);
99144b87433SJohn Marino       re_node_set_free (&sctx.limits);
992*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (ret != REG_NOERROR))
99344b87433SJohn Marino 	goto free_return;
99444b87433SJohn Marino       if (sifted_states[0] == NULL)
99544b87433SJohn Marino 	{
99644b87433SJohn Marino 	  ret = REG_NOMATCH;
99744b87433SJohn Marino 	  goto free_return;
99844b87433SJohn Marino 	}
99944b87433SJohn Marino     }
100044b87433SJohn Marino   re_free (mctx->state_log);
100144b87433SJohn Marino   mctx->state_log = sifted_states;
100244b87433SJohn Marino   sifted_states = NULL;
100344b87433SJohn Marino   mctx->last_node = halt_node;
100444b87433SJohn Marino   mctx->match_last = match_last;
100544b87433SJohn Marino   ret = REG_NOERROR;
100644b87433SJohn Marino  free_return:
100744b87433SJohn Marino   re_free (sifted_states);
100844b87433SJohn Marino   re_free (lim_states);
100944b87433SJohn Marino   return ret;
101044b87433SJohn Marino }
101144b87433SJohn Marino 
101244b87433SJohn Marino /* Acquire an initial state and return it.
101344b87433SJohn Marino    We must select appropriate initial state depending on the context,
101444b87433SJohn Marino    since initial states may have constraints like "\<", "^", etc..  */
101544b87433SJohn Marino 
101644b87433SJohn Marino static inline re_dfastate_t *
1017*6ea1f93eSDaniel Fojt __attribute__ ((always_inline))
acquire_init_state_context(reg_errcode_t * err,const re_match_context_t * mctx,Idx idx)101844b87433SJohn Marino acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
101944b87433SJohn Marino 			    Idx idx)
102044b87433SJohn Marino {
102144b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
102244b87433SJohn Marino   if (dfa->init_state->has_constraint)
102344b87433SJohn Marino     {
102444b87433SJohn Marino       unsigned int context;
102544b87433SJohn Marino       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
102644b87433SJohn Marino       if (IS_WORD_CONTEXT (context))
102744b87433SJohn Marino 	return dfa->init_state_word;
102844b87433SJohn Marino       else if (IS_ORDINARY_CONTEXT (context))
102944b87433SJohn Marino 	return dfa->init_state;
103044b87433SJohn Marino       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
103144b87433SJohn Marino 	return dfa->init_state_begbuf;
103244b87433SJohn Marino       else if (IS_NEWLINE_CONTEXT (context))
103344b87433SJohn Marino 	return dfa->init_state_nl;
103444b87433SJohn Marino       else if (IS_BEGBUF_CONTEXT (context))
103544b87433SJohn Marino 	{
103644b87433SJohn Marino 	  /* It is relatively rare case, then calculate on demand.  */
103744b87433SJohn Marino 	  return re_acquire_state_context (err, dfa,
103844b87433SJohn Marino 					   dfa->init_state->entrance_nodes,
103944b87433SJohn Marino 					   context);
104044b87433SJohn Marino 	}
104144b87433SJohn Marino       else
104244b87433SJohn Marino 	/* Must not happen?  */
104344b87433SJohn Marino 	return dfa->init_state;
104444b87433SJohn Marino     }
104544b87433SJohn Marino   else
104644b87433SJohn Marino     return dfa->init_state;
104744b87433SJohn Marino }
104844b87433SJohn Marino 
104944b87433SJohn Marino /* Check whether the regular expression match input string INPUT or not,
1050*6ea1f93eSDaniel Fojt    and return the index where the matching end.  Return -1 if
1051*6ea1f93eSDaniel Fojt    there is no match, and return -2 in case of an error.
105244b87433SJohn Marino    FL_LONGEST_MATCH means we want the POSIX longest matching.
105344b87433SJohn Marino    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
105444b87433SJohn Marino    next place where we may want to try matching.
10554536c563SJohn Marino    Note that the matcher assumes that the matching starts from the current
105644b87433SJohn Marino    index of the buffer.  */
105744b87433SJohn Marino 
105844b87433SJohn Marino static Idx
1059*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
check_matching(re_match_context_t * mctx,bool fl_longest_match,Idx * p_match_first)106044b87433SJohn Marino check_matching (re_match_context_t *mctx, bool fl_longest_match,
106144b87433SJohn Marino 		Idx *p_match_first)
106244b87433SJohn Marino {
106344b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
106444b87433SJohn Marino   reg_errcode_t err;
106544b87433SJohn Marino   Idx match = 0;
1066*6ea1f93eSDaniel Fojt   Idx match_last = -1;
106744b87433SJohn Marino   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
106844b87433SJohn Marino   re_dfastate_t *cur_state;
106944b87433SJohn Marino   bool at_init_state = p_match_first != NULL;
107044b87433SJohn Marino   Idx next_start_idx = cur_str_idx;
107144b87433SJohn Marino 
107244b87433SJohn Marino   err = REG_NOERROR;
107344b87433SJohn Marino   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
107444b87433SJohn Marino   /* An initial state must not be NULL (invalid).  */
1075*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (cur_state == NULL))
107644b87433SJohn Marino     {
107744b87433SJohn Marino       assert (err == REG_ESPACE);
1078*6ea1f93eSDaniel Fojt       return -2;
107944b87433SJohn Marino     }
108044b87433SJohn Marino 
108144b87433SJohn Marino   if (mctx->state_log != NULL)
108244b87433SJohn Marino     {
108344b87433SJohn Marino       mctx->state_log[cur_str_idx] = cur_state;
108444b87433SJohn Marino 
108544b87433SJohn Marino       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
108644b87433SJohn Marino 	 later.  E.g. Processing back references.  */
1087*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (dfa->nbackref))
108844b87433SJohn Marino 	{
108944b87433SJohn Marino 	  at_init_state = false;
109044b87433SJohn Marino 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1091*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
109244b87433SJohn Marino 	    return err;
109344b87433SJohn Marino 
109444b87433SJohn Marino 	  if (cur_state->has_backref)
109544b87433SJohn Marino 	    {
109644b87433SJohn Marino 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1097*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
109844b87433SJohn Marino 		return err;
109944b87433SJohn Marino 	    }
110044b87433SJohn Marino 	}
110144b87433SJohn Marino     }
110244b87433SJohn Marino 
110344b87433SJohn Marino   /* If the RE accepts NULL string.  */
1104*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (cur_state->halt))
110544b87433SJohn Marino     {
110644b87433SJohn Marino       if (!cur_state->has_constraint
110744b87433SJohn Marino 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
110844b87433SJohn Marino 	{
110944b87433SJohn Marino 	  if (!fl_longest_match)
111044b87433SJohn Marino 	    return cur_str_idx;
111144b87433SJohn Marino 	  else
111244b87433SJohn Marino 	    {
111344b87433SJohn Marino 	      match_last = cur_str_idx;
111444b87433SJohn Marino 	      match = 1;
111544b87433SJohn Marino 	    }
111644b87433SJohn Marino 	}
111744b87433SJohn Marino     }
111844b87433SJohn Marino 
111944b87433SJohn Marino   while (!re_string_eoi (&mctx->input))
112044b87433SJohn Marino     {
112144b87433SJohn Marino       re_dfastate_t *old_state = cur_state;
112244b87433SJohn Marino       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
112344b87433SJohn Marino 
1124*6ea1f93eSDaniel Fojt       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
11254536c563SJohn Marino 	   && mctx->input.bufs_len < mctx->input.len)
1126*6ea1f93eSDaniel Fojt 	  || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
112744b87433SJohn Marino 	      && mctx->input.valid_len < mctx->input.len))
112844b87433SJohn Marino 	{
11294536c563SJohn Marino 	  err = extend_buffers (mctx, next_char_idx + 1);
1130*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
113144b87433SJohn Marino 	    {
113244b87433SJohn Marino 	      assert (err == REG_ESPACE);
1133*6ea1f93eSDaniel Fojt 	      return -2;
113444b87433SJohn Marino 	    }
113544b87433SJohn Marino 	}
113644b87433SJohn Marino 
113744b87433SJohn Marino       cur_state = transit_state (&err, mctx, cur_state);
113844b87433SJohn Marino       if (mctx->state_log != NULL)
113944b87433SJohn Marino 	cur_state = merge_state_with_log (&err, mctx, cur_state);
114044b87433SJohn Marino 
114144b87433SJohn Marino       if (cur_state == NULL)
114244b87433SJohn Marino 	{
114344b87433SJohn Marino 	  /* Reached the invalid state or an error.  Try to recover a valid
114444b87433SJohn Marino 	     state using the state log, if available and if we have not
114544b87433SJohn Marino 	     already found a valid (even if not the longest) match.  */
1146*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
1147*6ea1f93eSDaniel Fojt 	    return -2;
114844b87433SJohn Marino 
114944b87433SJohn Marino 	  if (mctx->state_log == NULL
115044b87433SJohn Marino 	      || (match && !fl_longest_match)
115144b87433SJohn Marino 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
115244b87433SJohn Marino 	    break;
115344b87433SJohn Marino 	}
115444b87433SJohn Marino 
1155*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (at_init_state))
115644b87433SJohn Marino 	{
115744b87433SJohn Marino 	  if (old_state == cur_state)
115844b87433SJohn Marino 	    next_start_idx = next_char_idx;
115944b87433SJohn Marino 	  else
116044b87433SJohn Marino 	    at_init_state = false;
116144b87433SJohn Marino 	}
116244b87433SJohn Marino 
116344b87433SJohn Marino       if (cur_state->halt)
116444b87433SJohn Marino 	{
116544b87433SJohn Marino 	  /* Reached a halt state.
116644b87433SJohn Marino 	     Check the halt state can satisfy the current context.  */
116744b87433SJohn Marino 	  if (!cur_state->has_constraint
116844b87433SJohn Marino 	      || check_halt_state_context (mctx, cur_state,
116944b87433SJohn Marino 					   re_string_cur_idx (&mctx->input)))
117044b87433SJohn Marino 	    {
117144b87433SJohn Marino 	      /* We found an appropriate halt state.  */
117244b87433SJohn Marino 	      match_last = re_string_cur_idx (&mctx->input);
117344b87433SJohn Marino 	      match = 1;
117444b87433SJohn Marino 
117544b87433SJohn Marino 	      /* We found a match, do not modify match_first below.  */
117644b87433SJohn Marino 	      p_match_first = NULL;
117744b87433SJohn Marino 	      if (!fl_longest_match)
117844b87433SJohn Marino 		break;
117944b87433SJohn Marino 	    }
118044b87433SJohn Marino 	}
118144b87433SJohn Marino     }
118244b87433SJohn Marino 
118344b87433SJohn Marino   if (p_match_first)
118444b87433SJohn Marino     *p_match_first += next_start_idx;
118544b87433SJohn Marino 
118644b87433SJohn Marino   return match_last;
118744b87433SJohn Marino }
118844b87433SJohn Marino 
118944b87433SJohn Marino /* Check NODE match the current context.  */
119044b87433SJohn Marino 
119144b87433SJohn Marino static bool
check_halt_node_context(const re_dfa_t * dfa,Idx node,unsigned int context)119244b87433SJohn Marino check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
119344b87433SJohn Marino {
119444b87433SJohn Marino   re_token_type_t type = dfa->nodes[node].type;
119544b87433SJohn Marino   unsigned int constraint = dfa->nodes[node].constraint;
119644b87433SJohn Marino   if (type != END_OF_RE)
119744b87433SJohn Marino     return false;
119844b87433SJohn Marino   if (!constraint)
119944b87433SJohn Marino     return true;
120044b87433SJohn Marino   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
120144b87433SJohn Marino     return false;
120244b87433SJohn Marino   return true;
120344b87433SJohn Marino }
120444b87433SJohn Marino 
120544b87433SJohn Marino /* Check the halt state STATE match the current context.
120644b87433SJohn Marino    Return 0 if not match, if the node, STATE has, is a halt node and
120744b87433SJohn Marino    match the context, return the node.  */
120844b87433SJohn Marino 
120944b87433SJohn Marino static Idx
check_halt_state_context(const re_match_context_t * mctx,const re_dfastate_t * state,Idx idx)121044b87433SJohn Marino check_halt_state_context (const re_match_context_t *mctx,
121144b87433SJohn Marino 			  const re_dfastate_t *state, Idx idx)
121244b87433SJohn Marino {
121344b87433SJohn Marino   Idx i;
121444b87433SJohn Marino   unsigned int context;
121544b87433SJohn Marino #ifdef DEBUG
121644b87433SJohn Marino   assert (state->halt);
121744b87433SJohn Marino #endif
121844b87433SJohn Marino   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
121944b87433SJohn Marino   for (i = 0; i < state->nodes.nelem; ++i)
122044b87433SJohn Marino     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
122144b87433SJohn Marino       return state->nodes.elems[i];
122244b87433SJohn Marino   return 0;
122344b87433SJohn Marino }
122444b87433SJohn Marino 
122544b87433SJohn Marino /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
122644b87433SJohn Marino    corresponding to the DFA).
122744b87433SJohn Marino    Return the destination node, and update EPS_VIA_NODES;
1228*6ea1f93eSDaniel Fojt    return -1 in case of errors.  */
122944b87433SJohn Marino 
123044b87433SJohn Marino static Idx
proceed_next_node(const re_match_context_t * mctx,Idx nregs,regmatch_t * regs,Idx * pidx,Idx node,re_node_set * eps_via_nodes,struct re_fail_stack_t * fs)123144b87433SJohn Marino proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
123244b87433SJohn Marino 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
123344b87433SJohn Marino 		   struct re_fail_stack_t *fs)
123444b87433SJohn Marino {
123544b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
123644b87433SJohn Marino   Idx i;
123744b87433SJohn Marino   bool ok;
123844b87433SJohn Marino   if (IS_EPSILON_NODE (dfa->nodes[node].type))
123944b87433SJohn Marino     {
124044b87433SJohn Marino       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
124144b87433SJohn Marino       re_node_set *edests = &dfa->edests[node];
124244b87433SJohn Marino       Idx dest_node;
124344b87433SJohn Marino       ok = re_node_set_insert (eps_via_nodes, node);
1244*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (! ok))
1245*6ea1f93eSDaniel Fojt 	return -2;
1246*6ea1f93eSDaniel Fojt       /* Pick up a valid destination, or return -1 if none
124744b87433SJohn Marino 	 is found.  */
1248*6ea1f93eSDaniel Fojt       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
124944b87433SJohn Marino 	{
125044b87433SJohn Marino 	  Idx candidate = edests->elems[i];
125144b87433SJohn Marino 	  if (!re_node_set_contains (cur_nodes, candidate))
125244b87433SJohn Marino 	    continue;
1253*6ea1f93eSDaniel Fojt           if (dest_node == -1)
125444b87433SJohn Marino 	    dest_node = candidate;
125544b87433SJohn Marino 
125644b87433SJohn Marino 	  else
125744b87433SJohn Marino 	    {
125844b87433SJohn Marino 	      /* In order to avoid infinite loop like "(a*)*", return the second
125944b87433SJohn Marino 		 epsilon-transition if the first was already considered.  */
126044b87433SJohn Marino 	      if (re_node_set_contains (eps_via_nodes, dest_node))
126144b87433SJohn Marino 		return candidate;
126244b87433SJohn Marino 
126344b87433SJohn Marino 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
126444b87433SJohn Marino 	      else if (fs != NULL
126544b87433SJohn Marino 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
126644b87433SJohn Marino 					   eps_via_nodes))
1267*6ea1f93eSDaniel Fojt 		return -2;
126844b87433SJohn Marino 
126944b87433SJohn Marino 	      /* We know we are going to exit.  */
127044b87433SJohn Marino 	      break;
127144b87433SJohn Marino 	    }
127244b87433SJohn Marino 	}
127344b87433SJohn Marino       return dest_node;
127444b87433SJohn Marino     }
127544b87433SJohn Marino   else
127644b87433SJohn Marino     {
127744b87433SJohn Marino       Idx naccepted = 0;
127844b87433SJohn Marino       re_token_type_t type = dfa->nodes[node].type;
127944b87433SJohn Marino 
128044b87433SJohn Marino #ifdef RE_ENABLE_I18N
128144b87433SJohn Marino       if (dfa->nodes[node].accept_mb)
128244b87433SJohn Marino 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
128344b87433SJohn Marino       else
128444b87433SJohn Marino #endif /* RE_ENABLE_I18N */
128544b87433SJohn Marino       if (type == OP_BACK_REF)
128644b87433SJohn Marino 	{
128744b87433SJohn Marino 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
128844b87433SJohn Marino 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
128944b87433SJohn Marino 	  if (fs != NULL)
129044b87433SJohn Marino 	    {
129144b87433SJohn Marino 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1292*6ea1f93eSDaniel Fojt 		return -1;
129344b87433SJohn Marino 	      else if (naccepted)
129444b87433SJohn Marino 		{
129544b87433SJohn Marino 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
129644b87433SJohn Marino 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
129744b87433SJohn Marino 			      naccepted) != 0)
1298*6ea1f93eSDaniel Fojt 		    return -1;
129944b87433SJohn Marino 		}
130044b87433SJohn Marino 	    }
130144b87433SJohn Marino 
130244b87433SJohn Marino 	  if (naccepted == 0)
130344b87433SJohn Marino 	    {
130444b87433SJohn Marino 	      Idx dest_node;
130544b87433SJohn Marino 	      ok = re_node_set_insert (eps_via_nodes, node);
1306*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (! ok))
1307*6ea1f93eSDaniel Fojt 		return -2;
130844b87433SJohn Marino 	      dest_node = dfa->edests[node].elems[0];
130944b87433SJohn Marino 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
131044b87433SJohn Marino 					dest_node))
131144b87433SJohn Marino 		return dest_node;
131244b87433SJohn Marino 	    }
131344b87433SJohn Marino 	}
131444b87433SJohn Marino 
131544b87433SJohn Marino       if (naccepted != 0
131644b87433SJohn Marino 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
131744b87433SJohn Marino 	{
131844b87433SJohn Marino 	  Idx dest_node = dfa->nexts[node];
131944b87433SJohn Marino 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
132044b87433SJohn Marino 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
132144b87433SJohn Marino 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
132244b87433SJohn Marino 					       dest_node)))
1323*6ea1f93eSDaniel Fojt 	    return -1;
132444b87433SJohn Marino 	  re_node_set_empty (eps_via_nodes);
132544b87433SJohn Marino 	  return dest_node;
132644b87433SJohn Marino 	}
132744b87433SJohn Marino     }
1328*6ea1f93eSDaniel Fojt   return -1;
132944b87433SJohn Marino }
133044b87433SJohn Marino 
133144b87433SJohn Marino static reg_errcode_t
1332*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
push_fail_stack(struct re_fail_stack_t * fs,Idx str_idx,Idx dest_node,Idx nregs,regmatch_t * regs,re_node_set * eps_via_nodes)133344b87433SJohn Marino push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
133444b87433SJohn Marino 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
133544b87433SJohn Marino {
133644b87433SJohn Marino   reg_errcode_t err;
133744b87433SJohn Marino   Idx num = fs->num++;
133844b87433SJohn Marino   if (fs->num == fs->alloc)
133944b87433SJohn Marino     {
134044b87433SJohn Marino       struct re_fail_stack_ent_t *new_array;
1341*6ea1f93eSDaniel Fojt       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1342*6ea1f93eSDaniel Fojt                               fs->alloc * 2);
134344b87433SJohn Marino       if (new_array == NULL)
134444b87433SJohn Marino 	return REG_ESPACE;
134544b87433SJohn Marino       fs->alloc *= 2;
134644b87433SJohn Marino       fs->stack = new_array;
134744b87433SJohn Marino     }
134844b87433SJohn Marino   fs->stack[num].idx = str_idx;
134944b87433SJohn Marino   fs->stack[num].node = dest_node;
135044b87433SJohn Marino   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
135144b87433SJohn Marino   if (fs->stack[num].regs == NULL)
135244b87433SJohn Marino     return REG_ESPACE;
135344b87433SJohn Marino   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
135444b87433SJohn Marino   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
135544b87433SJohn Marino   return err;
135644b87433SJohn Marino }
135744b87433SJohn Marino 
135844b87433SJohn Marino static Idx
pop_fail_stack(struct re_fail_stack_t * fs,Idx * pidx,Idx nregs,regmatch_t * regs,re_node_set * eps_via_nodes)135944b87433SJohn Marino pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
136044b87433SJohn Marino 		regmatch_t *regs, re_node_set *eps_via_nodes)
136144b87433SJohn Marino {
136244b87433SJohn Marino   Idx num = --fs->num;
1363*6ea1f93eSDaniel Fojt   assert (num >= 0);
136444b87433SJohn Marino   *pidx = fs->stack[num].idx;
136544b87433SJohn Marino   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
136644b87433SJohn Marino   re_node_set_free (eps_via_nodes);
136744b87433SJohn Marino   re_free (fs->stack[num].regs);
136844b87433SJohn Marino   *eps_via_nodes = fs->stack[num].eps_via_nodes;
136944b87433SJohn Marino   return fs->stack[num].node;
137044b87433SJohn Marino }
137144b87433SJohn Marino 
137244b87433SJohn Marino /* Set the positions where the subexpressions are starts/ends to registers
137344b87433SJohn Marino    PMATCH.
137444b87433SJohn Marino    Note: We assume that pmatch[0] is already set, and
137544b87433SJohn Marino    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
137644b87433SJohn Marino 
137744b87433SJohn Marino static reg_errcode_t
1378*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
set_regs(const regex_t * preg,const re_match_context_t * mctx,size_t nmatch,regmatch_t * pmatch,bool fl_backtrack)137944b87433SJohn Marino set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
138044b87433SJohn Marino 	  regmatch_t *pmatch, bool fl_backtrack)
138144b87433SJohn Marino {
13824536c563SJohn Marino   const re_dfa_t *dfa = preg->buffer;
138344b87433SJohn Marino   Idx idx, cur_node;
138444b87433SJohn Marino   re_node_set eps_via_nodes;
138544b87433SJohn Marino   struct re_fail_stack_t *fs;
138644b87433SJohn Marino   struct re_fail_stack_t fs_body = { 0, 2, NULL };
138744b87433SJohn Marino   regmatch_t *prev_idx_match;
138844b87433SJohn Marino   bool prev_idx_match_malloced = false;
138944b87433SJohn Marino 
139044b87433SJohn Marino #ifdef DEBUG
139144b87433SJohn Marino   assert (nmatch > 1);
139244b87433SJohn Marino   assert (mctx->state_log != NULL);
139344b87433SJohn Marino #endif
139444b87433SJohn Marino   if (fl_backtrack)
139544b87433SJohn Marino     {
139644b87433SJohn Marino       fs = &fs_body;
139744b87433SJohn Marino       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
139844b87433SJohn Marino       if (fs->stack == NULL)
139944b87433SJohn Marino 	return REG_ESPACE;
140044b87433SJohn Marino     }
140144b87433SJohn Marino   else
140244b87433SJohn Marino     fs = NULL;
140344b87433SJohn Marino 
140444b87433SJohn Marino   cur_node = dfa->init_node;
140544b87433SJohn Marino   re_node_set_init_empty (&eps_via_nodes);
140644b87433SJohn Marino 
140744b87433SJohn Marino   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
140844b87433SJohn Marino     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
140944b87433SJohn Marino   else
141044b87433SJohn Marino     {
141144b87433SJohn Marino       prev_idx_match = re_malloc (regmatch_t, nmatch);
141244b87433SJohn Marino       if (prev_idx_match == NULL)
141344b87433SJohn Marino 	{
141444b87433SJohn Marino 	  free_fail_stack_return (fs);
141544b87433SJohn Marino 	  return REG_ESPACE;
141644b87433SJohn Marino 	}
141744b87433SJohn Marino       prev_idx_match_malloced = true;
141844b87433SJohn Marino     }
141944b87433SJohn Marino   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
142044b87433SJohn Marino 
142144b87433SJohn Marino   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
142244b87433SJohn Marino     {
142344b87433SJohn Marino       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
142444b87433SJohn Marino 
142544b87433SJohn Marino       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
142644b87433SJohn Marino 	{
142744b87433SJohn Marino 	  Idx reg_idx;
142844b87433SJohn Marino 	  if (fs)
142944b87433SJohn Marino 	    {
143044b87433SJohn Marino 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
143144b87433SJohn Marino 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
143244b87433SJohn Marino 		  break;
143344b87433SJohn Marino 	      if (reg_idx == nmatch)
143444b87433SJohn Marino 		{
143544b87433SJohn Marino 		  re_node_set_free (&eps_via_nodes);
143644b87433SJohn Marino 		  if (prev_idx_match_malloced)
143744b87433SJohn Marino 		    re_free (prev_idx_match);
143844b87433SJohn Marino 		  return free_fail_stack_return (fs);
143944b87433SJohn Marino 		}
144044b87433SJohn Marino 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
144144b87433SJohn Marino 					 &eps_via_nodes);
144244b87433SJohn Marino 	    }
144344b87433SJohn Marino 	  else
144444b87433SJohn Marino 	    {
144544b87433SJohn Marino 	      re_node_set_free (&eps_via_nodes);
144644b87433SJohn Marino 	      if (prev_idx_match_malloced)
144744b87433SJohn Marino 		re_free (prev_idx_match);
144844b87433SJohn Marino 	      return REG_NOERROR;
144944b87433SJohn Marino 	    }
145044b87433SJohn Marino 	}
145144b87433SJohn Marino 
145244b87433SJohn Marino       /* Proceed to next node.  */
145344b87433SJohn Marino       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
145444b87433SJohn Marino 				    &eps_via_nodes, fs);
145544b87433SJohn Marino 
1456*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (cur_node < 0))
145744b87433SJohn Marino 	{
1458*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (cur_node == -2))
145944b87433SJohn Marino 	    {
146044b87433SJohn Marino 	      re_node_set_free (&eps_via_nodes);
146144b87433SJohn Marino 	      if (prev_idx_match_malloced)
146244b87433SJohn Marino 		re_free (prev_idx_match);
146344b87433SJohn Marino 	      free_fail_stack_return (fs);
146444b87433SJohn Marino 	      return REG_ESPACE;
146544b87433SJohn Marino 	    }
146644b87433SJohn Marino 	  if (fs)
146744b87433SJohn Marino 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
146844b87433SJohn Marino 				       &eps_via_nodes);
146944b87433SJohn Marino 	  else
147044b87433SJohn Marino 	    {
147144b87433SJohn Marino 	      re_node_set_free (&eps_via_nodes);
147244b87433SJohn Marino 	      if (prev_idx_match_malloced)
147344b87433SJohn Marino 		re_free (prev_idx_match);
147444b87433SJohn Marino 	      return REG_NOMATCH;
147544b87433SJohn Marino 	    }
147644b87433SJohn Marino 	}
147744b87433SJohn Marino     }
147844b87433SJohn Marino   re_node_set_free (&eps_via_nodes);
147944b87433SJohn Marino   if (prev_idx_match_malloced)
148044b87433SJohn Marino     re_free (prev_idx_match);
148144b87433SJohn Marino   return free_fail_stack_return (fs);
148244b87433SJohn Marino }
148344b87433SJohn Marino 
148444b87433SJohn Marino static reg_errcode_t
free_fail_stack_return(struct re_fail_stack_t * fs)148544b87433SJohn Marino free_fail_stack_return (struct re_fail_stack_t *fs)
148644b87433SJohn Marino {
148744b87433SJohn Marino   if (fs)
148844b87433SJohn Marino     {
148944b87433SJohn Marino       Idx fs_idx;
149044b87433SJohn Marino       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
149144b87433SJohn Marino 	{
149244b87433SJohn Marino 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
149344b87433SJohn Marino 	  re_free (fs->stack[fs_idx].regs);
149444b87433SJohn Marino 	}
149544b87433SJohn Marino       re_free (fs->stack);
149644b87433SJohn Marino     }
149744b87433SJohn Marino   return REG_NOERROR;
149844b87433SJohn Marino }
149944b87433SJohn Marino 
150044b87433SJohn Marino static void
update_regs(const re_dfa_t * dfa,regmatch_t * pmatch,regmatch_t * prev_idx_match,Idx cur_node,Idx cur_idx,Idx nmatch)150144b87433SJohn Marino update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
150244b87433SJohn Marino 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
150344b87433SJohn Marino {
150444b87433SJohn Marino   int type = dfa->nodes[cur_node].type;
150544b87433SJohn Marino   if (type == OP_OPEN_SUBEXP)
150644b87433SJohn Marino     {
150744b87433SJohn Marino       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
150844b87433SJohn Marino 
150944b87433SJohn Marino       /* We are at the first node of this sub expression.  */
151044b87433SJohn Marino       if (reg_num < nmatch)
151144b87433SJohn Marino 	{
151244b87433SJohn Marino 	  pmatch[reg_num].rm_so = cur_idx;
151344b87433SJohn Marino 	  pmatch[reg_num].rm_eo = -1;
151444b87433SJohn Marino 	}
151544b87433SJohn Marino     }
151644b87433SJohn Marino   else if (type == OP_CLOSE_SUBEXP)
151744b87433SJohn Marino     {
151844b87433SJohn Marino       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
151944b87433SJohn Marino       if (reg_num < nmatch)
152044b87433SJohn Marino 	{
152144b87433SJohn Marino 	  /* We are at the last node of this sub expression.  */
152244b87433SJohn Marino 	  if (pmatch[reg_num].rm_so < cur_idx)
152344b87433SJohn Marino 	    {
152444b87433SJohn Marino 	      pmatch[reg_num].rm_eo = cur_idx;
152544b87433SJohn Marino 	      /* This is a non-empty match or we are not inside an optional
152644b87433SJohn Marino 		 subexpression.  Accept this right away.  */
152744b87433SJohn Marino 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
152844b87433SJohn Marino 	    }
152944b87433SJohn Marino 	  else
153044b87433SJohn Marino 	    {
153144b87433SJohn Marino 	      if (dfa->nodes[cur_node].opt_subexp
153244b87433SJohn Marino 		  && prev_idx_match[reg_num].rm_so != -1)
153344b87433SJohn Marino 		/* We transited through an empty match for an optional
153444b87433SJohn Marino 		   subexpression, like (a?)*, and this is not the subexp's
153544b87433SJohn Marino 		   first match.  Copy back the old content of the registers
153644b87433SJohn Marino 		   so that matches of an inner subexpression are undone as
153744b87433SJohn Marino 		   well, like in ((a?))*.  */
153844b87433SJohn Marino 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
153944b87433SJohn Marino 	      else
154044b87433SJohn Marino 		/* We completed a subexpression, but it may be part of
154144b87433SJohn Marino 		   an optional one, so do not update PREV_IDX_MATCH.  */
154244b87433SJohn Marino 		pmatch[reg_num].rm_eo = cur_idx;
154344b87433SJohn Marino 	    }
154444b87433SJohn Marino 	}
154544b87433SJohn Marino     }
154644b87433SJohn Marino }
154744b87433SJohn Marino 
154844b87433SJohn Marino /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
154944b87433SJohn Marino    and sift the nodes in each states according to the following rules.
155044b87433SJohn Marino    Updated state_log will be wrote to STATE_LOG.
155144b87433SJohn Marino 
15524536c563SJohn Marino    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
155344b87433SJohn Marino      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
15544536c563SJohn Marino 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
15554536c563SJohn Marino 	the LAST_NODE, we throw away the node 'a'.
15564536c563SJohn Marino      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
15574536c563SJohn Marino 	string 's' and transit to 'b':
155844b87433SJohn Marino 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
15594536c563SJohn Marino 	   away the node 'a'.
156044b87433SJohn Marino 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
15614536c563SJohn Marino 	    thrown away, we throw away the node 'a'.
156244b87433SJohn Marino      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
156344b87433SJohn Marino 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
15644536c563SJohn Marino 	   node 'a'.
156544b87433SJohn Marino 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
15664536c563SJohn Marino 	    we throw away the node 'a'.  */
156744b87433SJohn Marino 
156844b87433SJohn Marino #define STATE_NODE_CONTAINS(state,node) \
156944b87433SJohn Marino   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
157044b87433SJohn Marino 
157144b87433SJohn Marino static reg_errcode_t
sift_states_backward(const re_match_context_t * mctx,re_sift_context_t * sctx)157244b87433SJohn Marino sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
157344b87433SJohn Marino {
157444b87433SJohn Marino   reg_errcode_t err;
157544b87433SJohn Marino   int null_cnt = 0;
157644b87433SJohn Marino   Idx str_idx = sctx->last_str_idx;
157744b87433SJohn Marino   re_node_set cur_dest;
157844b87433SJohn Marino 
157944b87433SJohn Marino #ifdef DEBUG
158044b87433SJohn Marino   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
158144b87433SJohn Marino #endif
158244b87433SJohn Marino 
158344b87433SJohn Marino   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
158444b87433SJohn Marino      transit to the last_node and the last_node itself.  */
158544b87433SJohn Marino   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1586*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
158744b87433SJohn Marino     return err;
158844b87433SJohn Marino   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1589*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
159044b87433SJohn Marino     goto free_return;
159144b87433SJohn Marino 
159244b87433SJohn Marino   /* Then check each states in the state_log.  */
159344b87433SJohn Marino   while (str_idx > 0)
159444b87433SJohn Marino     {
159544b87433SJohn Marino       /* Update counters.  */
159644b87433SJohn Marino       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
159744b87433SJohn Marino       if (null_cnt > mctx->max_mb_elem_len)
159844b87433SJohn Marino 	{
159944b87433SJohn Marino 	  memset (sctx->sifted_states, '\0',
160044b87433SJohn Marino 		  sizeof (re_dfastate_t *) * str_idx);
160144b87433SJohn Marino 	  re_node_set_free (&cur_dest);
160244b87433SJohn Marino 	  return REG_NOERROR;
160344b87433SJohn Marino 	}
160444b87433SJohn Marino       re_node_set_empty (&cur_dest);
160544b87433SJohn Marino       --str_idx;
160644b87433SJohn Marino 
160744b87433SJohn Marino       if (mctx->state_log[str_idx])
160844b87433SJohn Marino 	{
160944b87433SJohn Marino 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1610*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
161144b87433SJohn Marino 	    goto free_return;
161244b87433SJohn Marino 	}
161344b87433SJohn Marino 
161444b87433SJohn Marino       /* Add all the nodes which satisfy the following conditions:
161544b87433SJohn Marino 	 - It can epsilon transit to a node in CUR_DEST.
161644b87433SJohn Marino 	 - It is in CUR_SRC.
161744b87433SJohn Marino 	 And update state_log.  */
161844b87433SJohn Marino       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1619*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
162044b87433SJohn Marino 	goto free_return;
162144b87433SJohn Marino     }
162244b87433SJohn Marino   err = REG_NOERROR;
162344b87433SJohn Marino  free_return:
162444b87433SJohn Marino   re_node_set_free (&cur_dest);
162544b87433SJohn Marino   return err;
162644b87433SJohn Marino }
162744b87433SJohn Marino 
162844b87433SJohn Marino static reg_errcode_t
1629*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
build_sifted_states(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,re_node_set * cur_dest)163044b87433SJohn Marino build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
163144b87433SJohn Marino 		     Idx str_idx, re_node_set *cur_dest)
163244b87433SJohn Marino {
163344b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
163444b87433SJohn Marino   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
163544b87433SJohn Marino   Idx i;
163644b87433SJohn Marino 
163744b87433SJohn Marino   /* Then build the next sifted state.
16384536c563SJohn Marino      We build the next sifted state on 'cur_dest', and update
16394536c563SJohn Marino      'sifted_states[str_idx]' with 'cur_dest'.
164044b87433SJohn Marino      Note:
16414536c563SJohn Marino      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
16424536c563SJohn Marino      'cur_src' points the node_set of the old 'state_log[str_idx]'
164344b87433SJohn Marino      (with the epsilon nodes pre-filtered out).  */
164444b87433SJohn Marino   for (i = 0; i < cur_src->nelem; i++)
164544b87433SJohn Marino     {
164644b87433SJohn Marino       Idx prev_node = cur_src->elems[i];
164744b87433SJohn Marino       int naccepted = 0;
164844b87433SJohn Marino       bool ok;
164944b87433SJohn Marino 
165044b87433SJohn Marino #ifdef DEBUG
165144b87433SJohn Marino       re_token_type_t type = dfa->nodes[prev_node].type;
165244b87433SJohn Marino       assert (!IS_EPSILON_NODE (type));
165344b87433SJohn Marino #endif
165444b87433SJohn Marino #ifdef RE_ENABLE_I18N
16554536c563SJohn Marino       /* If the node may accept "multi byte".  */
165644b87433SJohn Marino       if (dfa->nodes[prev_node].accept_mb)
165744b87433SJohn Marino 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
165844b87433SJohn Marino 					 str_idx, sctx->last_str_idx);
165944b87433SJohn Marino #endif /* RE_ENABLE_I18N */
166044b87433SJohn Marino 
166144b87433SJohn Marino       /* We don't check backreferences here.
166244b87433SJohn Marino 	 See update_cur_sifted_state().  */
166344b87433SJohn Marino       if (!naccepted
166444b87433SJohn Marino 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
166544b87433SJohn Marino 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
166644b87433SJohn Marino 				  dfa->nexts[prev_node]))
166744b87433SJohn Marino 	naccepted = 1;
166844b87433SJohn Marino 
166944b87433SJohn Marino       if (naccepted == 0)
167044b87433SJohn Marino 	continue;
167144b87433SJohn Marino 
167244b87433SJohn Marino       if (sctx->limits.nelem)
167344b87433SJohn Marino 	{
167444b87433SJohn Marino 	  Idx to_idx = str_idx + naccepted;
167544b87433SJohn Marino 	  if (check_dst_limits (mctx, &sctx->limits,
167644b87433SJohn Marino 				dfa->nexts[prev_node], to_idx,
167744b87433SJohn Marino 				prev_node, str_idx))
167844b87433SJohn Marino 	    continue;
167944b87433SJohn Marino 	}
168044b87433SJohn Marino       ok = re_node_set_insert (cur_dest, prev_node);
1681*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (! ok))
168244b87433SJohn Marino 	return REG_ESPACE;
168344b87433SJohn Marino     }
168444b87433SJohn Marino 
168544b87433SJohn Marino   return REG_NOERROR;
168644b87433SJohn Marino }
168744b87433SJohn Marino 
168844b87433SJohn Marino /* Helper functions.  */
168944b87433SJohn Marino 
169044b87433SJohn Marino static reg_errcode_t
clean_state_log_if_needed(re_match_context_t * mctx,Idx next_state_log_idx)169144b87433SJohn Marino clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
169244b87433SJohn Marino {
169344b87433SJohn Marino   Idx top = mctx->state_log_top;
169444b87433SJohn Marino 
16954536c563SJohn Marino   if ((next_state_log_idx >= mctx->input.bufs_len
16964536c563SJohn Marino        && mctx->input.bufs_len < mctx->input.len)
169744b87433SJohn Marino       || (next_state_log_idx >= mctx->input.valid_len
169844b87433SJohn Marino 	  && mctx->input.valid_len < mctx->input.len))
169944b87433SJohn Marino     {
170044b87433SJohn Marino       reg_errcode_t err;
17014536c563SJohn Marino       err = extend_buffers (mctx, next_state_log_idx + 1);
1702*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
170344b87433SJohn Marino 	return err;
170444b87433SJohn Marino     }
170544b87433SJohn Marino 
170644b87433SJohn Marino   if (top < next_state_log_idx)
170744b87433SJohn Marino     {
170844b87433SJohn Marino       memset (mctx->state_log + top + 1, '\0',
170944b87433SJohn Marino 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
171044b87433SJohn Marino       mctx->state_log_top = next_state_log_idx;
171144b87433SJohn Marino     }
171244b87433SJohn Marino   return REG_NOERROR;
171344b87433SJohn Marino }
171444b87433SJohn Marino 
171544b87433SJohn Marino static reg_errcode_t
merge_state_array(const re_dfa_t * dfa,re_dfastate_t ** dst,re_dfastate_t ** src,Idx num)171644b87433SJohn Marino merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
171744b87433SJohn Marino 		   re_dfastate_t **src, Idx num)
171844b87433SJohn Marino {
171944b87433SJohn Marino   Idx st_idx;
172044b87433SJohn Marino   reg_errcode_t err;
172144b87433SJohn Marino   for (st_idx = 0; st_idx < num; ++st_idx)
172244b87433SJohn Marino     {
172344b87433SJohn Marino       if (dst[st_idx] == NULL)
172444b87433SJohn Marino 	dst[st_idx] = src[st_idx];
172544b87433SJohn Marino       else if (src[st_idx] != NULL)
172644b87433SJohn Marino 	{
172744b87433SJohn Marino 	  re_node_set merged_set;
172844b87433SJohn Marino 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
172944b87433SJohn Marino 					&src[st_idx]->nodes);
1730*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
173144b87433SJohn Marino 	    return err;
173244b87433SJohn Marino 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
173344b87433SJohn Marino 	  re_node_set_free (&merged_set);
1734*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
173544b87433SJohn Marino 	    return err;
173644b87433SJohn Marino 	}
173744b87433SJohn Marino     }
173844b87433SJohn Marino   return REG_NOERROR;
173944b87433SJohn Marino }
174044b87433SJohn Marino 
174144b87433SJohn Marino static reg_errcode_t
update_cur_sifted_state(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,re_node_set * dest_nodes)174244b87433SJohn Marino update_cur_sifted_state (const re_match_context_t *mctx,
174344b87433SJohn Marino 			 re_sift_context_t *sctx, Idx str_idx,
174444b87433SJohn Marino 			 re_node_set *dest_nodes)
174544b87433SJohn Marino {
174644b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
174744b87433SJohn Marino   reg_errcode_t err = REG_NOERROR;
174844b87433SJohn Marino   const re_node_set *candidates;
174944b87433SJohn Marino   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
175044b87433SJohn Marino 		: &mctx->state_log[str_idx]->nodes);
175144b87433SJohn Marino 
175244b87433SJohn Marino   if (dest_nodes->nelem == 0)
175344b87433SJohn Marino     sctx->sifted_states[str_idx] = NULL;
175444b87433SJohn Marino   else
175544b87433SJohn Marino     {
175644b87433SJohn Marino       if (candidates)
175744b87433SJohn Marino 	{
175844b87433SJohn Marino 	  /* At first, add the nodes which can epsilon transit to a node in
175944b87433SJohn Marino 	     DEST_NODE.  */
176044b87433SJohn Marino 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1761*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
176244b87433SJohn Marino 	    return err;
176344b87433SJohn Marino 
176444b87433SJohn Marino 	  /* Then, check the limitations in the current sift_context.  */
176544b87433SJohn Marino 	  if (sctx->limits.nelem)
176644b87433SJohn Marino 	    {
176744b87433SJohn Marino 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
176844b87433SJohn Marino 					 mctx->bkref_ents, str_idx);
1769*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
177044b87433SJohn Marino 		return err;
177144b87433SJohn Marino 	    }
177244b87433SJohn Marino 	}
177344b87433SJohn Marino 
177444b87433SJohn Marino       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1775*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
177644b87433SJohn Marino 	return err;
177744b87433SJohn Marino     }
177844b87433SJohn Marino 
177944b87433SJohn Marino   if (candidates && mctx->state_log[str_idx]->has_backref)
178044b87433SJohn Marino     {
178144b87433SJohn Marino       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1782*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
178344b87433SJohn Marino 	return err;
178444b87433SJohn Marino     }
178544b87433SJohn Marino   return REG_NOERROR;
178644b87433SJohn Marino }
178744b87433SJohn Marino 
178844b87433SJohn Marino static reg_errcode_t
1789*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
add_epsilon_src_nodes(const re_dfa_t * dfa,re_node_set * dest_nodes,const re_node_set * candidates)179044b87433SJohn Marino add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
179144b87433SJohn Marino 		       const re_node_set *candidates)
179244b87433SJohn Marino {
179344b87433SJohn Marino   reg_errcode_t err = REG_NOERROR;
179444b87433SJohn Marino   Idx i;
179544b87433SJohn Marino 
179644b87433SJohn Marino   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1797*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
179844b87433SJohn Marino     return err;
179944b87433SJohn Marino 
180044b87433SJohn Marino   if (!state->inveclosure.alloc)
180144b87433SJohn Marino     {
180244b87433SJohn Marino       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1803*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
180444b87433SJohn Marino 	return REG_ESPACE;
180544b87433SJohn Marino       for (i = 0; i < dest_nodes->nelem; i++)
180644b87433SJohn Marino 	{
180744b87433SJohn Marino 	  err = re_node_set_merge (&state->inveclosure,
180844b87433SJohn Marino 				   dfa->inveclosures + dest_nodes->elems[i]);
1809*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
181044b87433SJohn Marino 	    return REG_ESPACE;
181144b87433SJohn Marino 	}
181244b87433SJohn Marino     }
181344b87433SJohn Marino   return re_node_set_add_intersect (dest_nodes, candidates,
181444b87433SJohn Marino 				    &state->inveclosure);
181544b87433SJohn Marino }
181644b87433SJohn Marino 
181744b87433SJohn Marino static reg_errcode_t
sub_epsilon_src_nodes(const re_dfa_t * dfa,Idx node,re_node_set * dest_nodes,const re_node_set * candidates)181844b87433SJohn Marino sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
181944b87433SJohn Marino 		       const re_node_set *candidates)
182044b87433SJohn Marino {
182144b87433SJohn Marino     Idx ecl_idx;
182244b87433SJohn Marino     reg_errcode_t err;
182344b87433SJohn Marino     re_node_set *inv_eclosure = dfa->inveclosures + node;
182444b87433SJohn Marino     re_node_set except_nodes;
182544b87433SJohn Marino     re_node_set_init_empty (&except_nodes);
182644b87433SJohn Marino     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
182744b87433SJohn Marino       {
182844b87433SJohn Marino 	Idx cur_node = inv_eclosure->elems[ecl_idx];
182944b87433SJohn Marino 	if (cur_node == node)
183044b87433SJohn Marino 	  continue;
183144b87433SJohn Marino 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
183244b87433SJohn Marino 	  {
183344b87433SJohn Marino 	    Idx edst1 = dfa->edests[cur_node].elems[0];
183444b87433SJohn Marino 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1835*6ea1f93eSDaniel Fojt 			 ? dfa->edests[cur_node].elems[1] : -1);
183644b87433SJohn Marino 	    if ((!re_node_set_contains (inv_eclosure, edst1)
183744b87433SJohn Marino 		 && re_node_set_contains (dest_nodes, edst1))
1838*6ea1f93eSDaniel Fojt 		|| (edst2 > 0
183944b87433SJohn Marino 		    && !re_node_set_contains (inv_eclosure, edst2)
184044b87433SJohn Marino 		    && re_node_set_contains (dest_nodes, edst2)))
184144b87433SJohn Marino 	      {
184244b87433SJohn Marino 		err = re_node_set_add_intersect (&except_nodes, candidates,
184344b87433SJohn Marino 						 dfa->inveclosures + cur_node);
1844*6ea1f93eSDaniel Fojt 		if (__glibc_unlikely (err != REG_NOERROR))
184544b87433SJohn Marino 		  {
184644b87433SJohn Marino 		    re_node_set_free (&except_nodes);
184744b87433SJohn Marino 		    return err;
184844b87433SJohn Marino 		  }
184944b87433SJohn Marino 	      }
185044b87433SJohn Marino 	  }
185144b87433SJohn Marino       }
185244b87433SJohn Marino     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
185344b87433SJohn Marino       {
185444b87433SJohn Marino 	Idx cur_node = inv_eclosure->elems[ecl_idx];
185544b87433SJohn Marino 	if (!re_node_set_contains (&except_nodes, cur_node))
185644b87433SJohn Marino 	  {
185744b87433SJohn Marino 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
185844b87433SJohn Marino 	    re_node_set_remove_at (dest_nodes, idx);
185944b87433SJohn Marino 	  }
186044b87433SJohn Marino       }
186144b87433SJohn Marino     re_node_set_free (&except_nodes);
186244b87433SJohn Marino     return REG_NOERROR;
186344b87433SJohn Marino }
186444b87433SJohn Marino 
186544b87433SJohn Marino static bool
check_dst_limits(const re_match_context_t * mctx,const re_node_set * limits,Idx dst_node,Idx dst_idx,Idx src_node,Idx src_idx)186644b87433SJohn Marino check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
186744b87433SJohn Marino 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
186844b87433SJohn Marino {
186944b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
187044b87433SJohn Marino   Idx lim_idx, src_pos, dst_pos;
187144b87433SJohn Marino 
187244b87433SJohn Marino   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
187344b87433SJohn Marino   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
187444b87433SJohn Marino   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
187544b87433SJohn Marino     {
187644b87433SJohn Marino       Idx subexp_idx;
187744b87433SJohn Marino       struct re_backref_cache_entry *ent;
187844b87433SJohn Marino       ent = mctx->bkref_ents + limits->elems[lim_idx];
187944b87433SJohn Marino       subexp_idx = dfa->nodes[ent->node].opr.idx;
188044b87433SJohn Marino 
188144b87433SJohn Marino       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
188244b87433SJohn Marino 					   subexp_idx, dst_node, dst_idx,
188344b87433SJohn Marino 					   dst_bkref_idx);
188444b87433SJohn Marino       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
188544b87433SJohn Marino 					   subexp_idx, src_node, src_idx,
188644b87433SJohn Marino 					   src_bkref_idx);
188744b87433SJohn Marino 
188844b87433SJohn Marino       /* In case of:
188944b87433SJohn Marino 	 <src> <dst> ( <subexp> )
189044b87433SJohn Marino 	 ( <subexp> ) <src> <dst>
189144b87433SJohn Marino 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
189244b87433SJohn Marino       if (src_pos == dst_pos)
189344b87433SJohn Marino 	continue; /* This is unrelated limitation.  */
189444b87433SJohn Marino       else
189544b87433SJohn Marino 	return true;
189644b87433SJohn Marino     }
189744b87433SJohn Marino   return false;
189844b87433SJohn Marino }
189944b87433SJohn Marino 
190044b87433SJohn Marino static int
check_dst_limits_calc_pos_1(const re_match_context_t * mctx,int boundaries,Idx subexp_idx,Idx from_node,Idx bkref_idx)190144b87433SJohn Marino check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
190244b87433SJohn Marino 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
190344b87433SJohn Marino {
190444b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
190544b87433SJohn Marino   const re_node_set *eclosures = dfa->eclosures + from_node;
190644b87433SJohn Marino   Idx node_idx;
190744b87433SJohn Marino 
190844b87433SJohn Marino   /* Else, we are on the boundary: examine the nodes on the epsilon
190944b87433SJohn Marino      closure.  */
191044b87433SJohn Marino   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
191144b87433SJohn Marino     {
191244b87433SJohn Marino       Idx node = eclosures->elems[node_idx];
191344b87433SJohn Marino       switch (dfa->nodes[node].type)
191444b87433SJohn Marino 	{
191544b87433SJohn Marino 	case OP_BACK_REF:
1916*6ea1f93eSDaniel Fojt 	  if (bkref_idx != -1)
191744b87433SJohn Marino 	    {
191844b87433SJohn Marino 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
191944b87433SJohn Marino 	      do
192044b87433SJohn Marino 		{
192144b87433SJohn Marino 		  Idx dst;
192244b87433SJohn Marino 		  int cpos;
192344b87433SJohn Marino 
192444b87433SJohn Marino 		  if (ent->node != node)
192544b87433SJohn Marino 		    continue;
192644b87433SJohn Marino 
192744b87433SJohn Marino 		  if (subexp_idx < BITSET_WORD_BITS
192844b87433SJohn Marino 		      && !(ent->eps_reachable_subexps_map
192944b87433SJohn Marino 			   & ((bitset_word_t) 1 << subexp_idx)))
193044b87433SJohn Marino 		    continue;
193144b87433SJohn Marino 
193244b87433SJohn Marino 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
193344b87433SJohn Marino 		     OP_CLOSE_SUBEXP cases below.  But, if the
193444b87433SJohn Marino 		     destination node is the same node as the source
193544b87433SJohn Marino 		     node, don't recurse because it would cause an
193644b87433SJohn Marino 		     infinite loop: a regex that exhibits this behavior
193744b87433SJohn Marino 		     is ()\1*\1*  */
193844b87433SJohn Marino 		  dst = dfa->edests[node].elems[0];
193944b87433SJohn Marino 		  if (dst == from_node)
194044b87433SJohn Marino 		    {
194144b87433SJohn Marino 		      if (boundaries & 1)
194244b87433SJohn Marino 			return -1;
194344b87433SJohn Marino 		      else /* if (boundaries & 2) */
194444b87433SJohn Marino 			return 0;
194544b87433SJohn Marino 		    }
194644b87433SJohn Marino 
194744b87433SJohn Marino 		  cpos =
194844b87433SJohn Marino 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
194944b87433SJohn Marino 						 dst, bkref_idx);
195044b87433SJohn Marino 		  if (cpos == -1 /* && (boundaries & 1) */)
195144b87433SJohn Marino 		    return -1;
195244b87433SJohn Marino 		  if (cpos == 0 && (boundaries & 2))
195344b87433SJohn Marino 		    return 0;
195444b87433SJohn Marino 
195544b87433SJohn Marino 		  if (subexp_idx < BITSET_WORD_BITS)
195644b87433SJohn Marino 		    ent->eps_reachable_subexps_map
195744b87433SJohn Marino 		      &= ~((bitset_word_t) 1 << subexp_idx);
195844b87433SJohn Marino 		}
195944b87433SJohn Marino 	      while (ent++->more);
196044b87433SJohn Marino 	    }
196144b87433SJohn Marino 	  break;
196244b87433SJohn Marino 
196344b87433SJohn Marino 	case OP_OPEN_SUBEXP:
196444b87433SJohn Marino 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
196544b87433SJohn Marino 	    return -1;
196644b87433SJohn Marino 	  break;
196744b87433SJohn Marino 
196844b87433SJohn Marino 	case OP_CLOSE_SUBEXP:
196944b87433SJohn Marino 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
197044b87433SJohn Marino 	    return 0;
197144b87433SJohn Marino 	  break;
197244b87433SJohn Marino 
197344b87433SJohn Marino 	default:
197444b87433SJohn Marino 	    break;
197544b87433SJohn Marino 	}
197644b87433SJohn Marino     }
197744b87433SJohn Marino 
197844b87433SJohn Marino   return (boundaries & 2) ? 1 : 0;
197944b87433SJohn Marino }
198044b87433SJohn Marino 
198144b87433SJohn Marino static int
check_dst_limits_calc_pos(const re_match_context_t * mctx,Idx limit,Idx subexp_idx,Idx from_node,Idx str_idx,Idx bkref_idx)198244b87433SJohn Marino check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
198344b87433SJohn Marino 			   Idx subexp_idx, Idx from_node, Idx str_idx,
198444b87433SJohn Marino 			   Idx bkref_idx)
198544b87433SJohn Marino {
198644b87433SJohn Marino   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
198744b87433SJohn Marino   int boundaries;
198844b87433SJohn Marino 
198944b87433SJohn Marino   /* If we are outside the range of the subexpression, return -1 or 1.  */
199044b87433SJohn Marino   if (str_idx < lim->subexp_from)
199144b87433SJohn Marino     return -1;
199244b87433SJohn Marino 
199344b87433SJohn Marino   if (lim->subexp_to < str_idx)
199444b87433SJohn Marino     return 1;
199544b87433SJohn Marino 
199644b87433SJohn Marino   /* If we are within the subexpression, return 0.  */
199744b87433SJohn Marino   boundaries = (str_idx == lim->subexp_from);
199844b87433SJohn Marino   boundaries |= (str_idx == lim->subexp_to) << 1;
199944b87433SJohn Marino   if (boundaries == 0)
200044b87433SJohn Marino     return 0;
200144b87433SJohn Marino 
200244b87433SJohn Marino   /* Else, examine epsilon closure.  */
200344b87433SJohn Marino   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
200444b87433SJohn Marino 				      from_node, bkref_idx);
200544b87433SJohn Marino }
200644b87433SJohn Marino 
200744b87433SJohn Marino /* Check the limitations of sub expressions LIMITS, and remove the nodes
200844b87433SJohn Marino    which are against limitations from DEST_NODES. */
200944b87433SJohn Marino 
201044b87433SJohn Marino static reg_errcode_t
check_subexp_limits(const re_dfa_t * dfa,re_node_set * dest_nodes,const re_node_set * candidates,re_node_set * limits,struct re_backref_cache_entry * bkref_ents,Idx str_idx)201144b87433SJohn Marino check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
201244b87433SJohn Marino 		     const re_node_set *candidates, re_node_set *limits,
201344b87433SJohn Marino 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
201444b87433SJohn Marino {
201544b87433SJohn Marino   reg_errcode_t err;
201644b87433SJohn Marino   Idx node_idx, lim_idx;
201744b87433SJohn Marino 
201844b87433SJohn Marino   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
201944b87433SJohn Marino     {
202044b87433SJohn Marino       Idx subexp_idx;
202144b87433SJohn Marino       struct re_backref_cache_entry *ent;
202244b87433SJohn Marino       ent = bkref_ents + limits->elems[lim_idx];
202344b87433SJohn Marino 
202444b87433SJohn Marino       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
202544b87433SJohn Marino 	continue; /* This is unrelated limitation.  */
202644b87433SJohn Marino 
202744b87433SJohn Marino       subexp_idx = dfa->nodes[ent->node].opr.idx;
202844b87433SJohn Marino       if (ent->subexp_to == str_idx)
202944b87433SJohn Marino 	{
2030*6ea1f93eSDaniel Fojt 	  Idx ops_node = -1;
2031*6ea1f93eSDaniel Fojt 	  Idx cls_node = -1;
203244b87433SJohn Marino 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
203344b87433SJohn Marino 	    {
203444b87433SJohn Marino 	      Idx node = dest_nodes->elems[node_idx];
203544b87433SJohn Marino 	      re_token_type_t type = dfa->nodes[node].type;
203644b87433SJohn Marino 	      if (type == OP_OPEN_SUBEXP
203744b87433SJohn Marino 		  && subexp_idx == dfa->nodes[node].opr.idx)
203844b87433SJohn Marino 		ops_node = node;
203944b87433SJohn Marino 	      else if (type == OP_CLOSE_SUBEXP
204044b87433SJohn Marino 		       && subexp_idx == dfa->nodes[node].opr.idx)
204144b87433SJohn Marino 		cls_node = node;
204244b87433SJohn Marino 	    }
204344b87433SJohn Marino 
204444b87433SJohn Marino 	  /* Check the limitation of the open subexpression.  */
204544b87433SJohn Marino 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2046*6ea1f93eSDaniel Fojt 	  if (ops_node >= 0)
204744b87433SJohn Marino 	    {
204844b87433SJohn Marino 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
204944b87433SJohn Marino 					   candidates);
2050*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
205144b87433SJohn Marino 		return err;
205244b87433SJohn Marino 	    }
205344b87433SJohn Marino 
205444b87433SJohn Marino 	  /* Check the limitation of the close subexpression.  */
2055*6ea1f93eSDaniel Fojt 	  if (cls_node >= 0)
205644b87433SJohn Marino 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
205744b87433SJohn Marino 	      {
205844b87433SJohn Marino 		Idx node = dest_nodes->elems[node_idx];
205944b87433SJohn Marino 		if (!re_node_set_contains (dfa->inveclosures + node,
206044b87433SJohn Marino 					   cls_node)
206144b87433SJohn Marino 		    && !re_node_set_contains (dfa->eclosures + node,
206244b87433SJohn Marino 					      cls_node))
206344b87433SJohn Marino 		  {
206444b87433SJohn Marino 		    /* It is against this limitation.
206544b87433SJohn Marino 		       Remove it form the current sifted state.  */
206644b87433SJohn Marino 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
206744b87433SJohn Marino 						 candidates);
2068*6ea1f93eSDaniel Fojt 		    if (__glibc_unlikely (err != REG_NOERROR))
206944b87433SJohn Marino 		      return err;
207044b87433SJohn Marino 		    --node_idx;
207144b87433SJohn Marino 		  }
207244b87433SJohn Marino 	      }
207344b87433SJohn Marino 	}
207444b87433SJohn Marino       else /* (ent->subexp_to != str_idx)  */
207544b87433SJohn Marino 	{
207644b87433SJohn Marino 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
207744b87433SJohn Marino 	    {
207844b87433SJohn Marino 	      Idx node = dest_nodes->elems[node_idx];
207944b87433SJohn Marino 	      re_token_type_t type = dfa->nodes[node].type;
208044b87433SJohn Marino 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
208144b87433SJohn Marino 		{
208244b87433SJohn Marino 		  if (subexp_idx != dfa->nodes[node].opr.idx)
208344b87433SJohn Marino 		    continue;
208444b87433SJohn Marino 		  /* It is against this limitation.
208544b87433SJohn Marino 		     Remove it form the current sifted state.  */
208644b87433SJohn Marino 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
208744b87433SJohn Marino 					       candidates);
2088*6ea1f93eSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
208944b87433SJohn Marino 		    return err;
209044b87433SJohn Marino 		}
209144b87433SJohn Marino 	    }
209244b87433SJohn Marino 	}
209344b87433SJohn Marino     }
209444b87433SJohn Marino   return REG_NOERROR;
209544b87433SJohn Marino }
209644b87433SJohn Marino 
209744b87433SJohn Marino static reg_errcode_t
2098*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
sift_states_bkref(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,const re_node_set * candidates)209944b87433SJohn Marino sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
210044b87433SJohn Marino 		   Idx str_idx, const re_node_set *candidates)
210144b87433SJohn Marino {
210244b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
210344b87433SJohn Marino   reg_errcode_t err;
210444b87433SJohn Marino   Idx node_idx, node;
210544b87433SJohn Marino   re_sift_context_t local_sctx;
210644b87433SJohn Marino   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
210744b87433SJohn Marino 
2108*6ea1f93eSDaniel Fojt   if (first_idx == -1)
210944b87433SJohn Marino     return REG_NOERROR;
211044b87433SJohn Marino 
211144b87433SJohn Marino   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
211244b87433SJohn Marino 
211344b87433SJohn Marino   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
211444b87433SJohn Marino     {
211544b87433SJohn Marino       Idx enabled_idx;
211644b87433SJohn Marino       re_token_type_t type;
211744b87433SJohn Marino       struct re_backref_cache_entry *entry;
211844b87433SJohn Marino       node = candidates->elems[node_idx];
211944b87433SJohn Marino       type = dfa->nodes[node].type;
212044b87433SJohn Marino       /* Avoid infinite loop for the REs like "()\1+".  */
212144b87433SJohn Marino       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
212244b87433SJohn Marino 	continue;
212344b87433SJohn Marino       if (type != OP_BACK_REF)
212444b87433SJohn Marino 	continue;
212544b87433SJohn Marino 
212644b87433SJohn Marino       entry = mctx->bkref_ents + first_idx;
212744b87433SJohn Marino       enabled_idx = first_idx;
212844b87433SJohn Marino       do
212944b87433SJohn Marino 	{
213044b87433SJohn Marino 	  Idx subexp_len;
213144b87433SJohn Marino 	  Idx to_idx;
213244b87433SJohn Marino 	  Idx dst_node;
213344b87433SJohn Marino 	  bool ok;
213444b87433SJohn Marino 	  re_dfastate_t *cur_state;
213544b87433SJohn Marino 
213644b87433SJohn Marino 	  if (entry->node != node)
213744b87433SJohn Marino 	    continue;
213844b87433SJohn Marino 	  subexp_len = entry->subexp_to - entry->subexp_from;
213944b87433SJohn Marino 	  to_idx = str_idx + subexp_len;
214044b87433SJohn Marino 	  dst_node = (subexp_len ? dfa->nexts[node]
214144b87433SJohn Marino 		      : dfa->edests[node].elems[0]);
214244b87433SJohn Marino 
214344b87433SJohn Marino 	  if (to_idx > sctx->last_str_idx
214444b87433SJohn Marino 	      || sctx->sifted_states[to_idx] == NULL
214544b87433SJohn Marino 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
214644b87433SJohn Marino 	      || check_dst_limits (mctx, &sctx->limits, node,
214744b87433SJohn Marino 				   str_idx, dst_node, to_idx))
214844b87433SJohn Marino 	    continue;
214944b87433SJohn Marino 
215044b87433SJohn Marino 	  if (local_sctx.sifted_states == NULL)
215144b87433SJohn Marino 	    {
215244b87433SJohn Marino 	      local_sctx = *sctx;
215344b87433SJohn Marino 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2154*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
215544b87433SJohn Marino 		goto free_return;
215644b87433SJohn Marino 	    }
215744b87433SJohn Marino 	  local_sctx.last_node = node;
215844b87433SJohn Marino 	  local_sctx.last_str_idx = str_idx;
215944b87433SJohn Marino 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2160*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (! ok))
216144b87433SJohn Marino 	    {
216244b87433SJohn Marino 	      err = REG_ESPACE;
216344b87433SJohn Marino 	      goto free_return;
216444b87433SJohn Marino 	    }
216544b87433SJohn Marino 	  cur_state = local_sctx.sifted_states[str_idx];
216644b87433SJohn Marino 	  err = sift_states_backward (mctx, &local_sctx);
2167*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
216844b87433SJohn Marino 	    goto free_return;
216944b87433SJohn Marino 	  if (sctx->limited_states != NULL)
217044b87433SJohn Marino 	    {
217144b87433SJohn Marino 	      err = merge_state_array (dfa, sctx->limited_states,
217244b87433SJohn Marino 				       local_sctx.sifted_states,
217344b87433SJohn Marino 				       str_idx + 1);
2174*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
217544b87433SJohn Marino 		goto free_return;
217644b87433SJohn Marino 	    }
217744b87433SJohn Marino 	  local_sctx.sifted_states[str_idx] = cur_state;
217844b87433SJohn Marino 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
217944b87433SJohn Marino 
218044b87433SJohn Marino 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
218144b87433SJohn Marino 	  entry = mctx->bkref_ents + enabled_idx;
218244b87433SJohn Marino 	}
218344b87433SJohn Marino       while (enabled_idx++, entry++->more);
218444b87433SJohn Marino     }
218544b87433SJohn Marino   err = REG_NOERROR;
218644b87433SJohn Marino  free_return:
218744b87433SJohn Marino   if (local_sctx.sifted_states != NULL)
218844b87433SJohn Marino     {
218944b87433SJohn Marino       re_node_set_free (&local_sctx.limits);
219044b87433SJohn Marino     }
219144b87433SJohn Marino 
219244b87433SJohn Marino   return err;
219344b87433SJohn Marino }
219444b87433SJohn Marino 
219544b87433SJohn Marino 
219644b87433SJohn Marino #ifdef RE_ENABLE_I18N
219744b87433SJohn Marino static int
sift_states_iter_mb(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx node_idx,Idx str_idx,Idx max_str_idx)219844b87433SJohn Marino sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
219944b87433SJohn Marino 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
220044b87433SJohn Marino {
220144b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
220244b87433SJohn Marino   int naccepted;
22034536c563SJohn Marino   /* Check the node can accept "multi byte".  */
220444b87433SJohn Marino   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
220544b87433SJohn Marino   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
220644b87433SJohn Marino       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
220744b87433SJohn Marino 			    dfa->nexts[node_idx]))
22084536c563SJohn Marino     /* The node can't accept the "multi byte", or the
220944b87433SJohn Marino        destination was already thrown away, then the node
22104536c563SJohn Marino        could't accept the current input "multi byte".   */
221144b87433SJohn Marino     naccepted = 0;
221244b87433SJohn Marino   /* Otherwise, it is sure that the node could accept
22134536c563SJohn Marino      'naccepted' bytes input.  */
221444b87433SJohn Marino   return naccepted;
221544b87433SJohn Marino }
221644b87433SJohn Marino #endif /* RE_ENABLE_I18N */
221744b87433SJohn Marino 
221844b87433SJohn Marino 
221944b87433SJohn Marino /* Functions for state transition.  */
222044b87433SJohn Marino 
222144b87433SJohn Marino /* Return the next state to which the current state STATE will transit by
222244b87433SJohn Marino    accepting the current input byte, and update STATE_LOG if necessary.
222344b87433SJohn Marino    If STATE can accept a multibyte char/collating element/back reference
222444b87433SJohn Marino    update the destination of STATE_LOG.  */
222544b87433SJohn Marino 
222644b87433SJohn Marino static re_dfastate_t *
2227*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
transit_state(reg_errcode_t * err,re_match_context_t * mctx,re_dfastate_t * state)222844b87433SJohn Marino transit_state (reg_errcode_t *err, re_match_context_t *mctx,
222944b87433SJohn Marino 	       re_dfastate_t *state)
223044b87433SJohn Marino {
223144b87433SJohn Marino   re_dfastate_t **trtable;
223244b87433SJohn Marino   unsigned char ch;
223344b87433SJohn Marino 
223444b87433SJohn Marino #ifdef RE_ENABLE_I18N
223544b87433SJohn Marino   /* If the current state can accept multibyte.  */
2236*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (state->accept_mb))
223744b87433SJohn Marino     {
223844b87433SJohn Marino       *err = transit_state_mb (mctx, state);
2239*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (*err != REG_NOERROR))
224044b87433SJohn Marino 	return NULL;
224144b87433SJohn Marino     }
224244b87433SJohn Marino #endif /* RE_ENABLE_I18N */
224344b87433SJohn Marino 
224444b87433SJohn Marino   /* Then decide the next state with the single byte.  */
224544b87433SJohn Marino #if 0
224644b87433SJohn Marino   if (0)
224744b87433SJohn Marino     /* don't use transition table  */
224844b87433SJohn Marino     return transit_state_sb (err, mctx, state);
224944b87433SJohn Marino #endif
225044b87433SJohn Marino 
225144b87433SJohn Marino   /* Use transition table  */
225244b87433SJohn Marino   ch = re_string_fetch_byte (&mctx->input);
225344b87433SJohn Marino   for (;;)
225444b87433SJohn Marino     {
225544b87433SJohn Marino       trtable = state->trtable;
2256*6ea1f93eSDaniel Fojt       if (__glibc_likely (trtable != NULL))
225744b87433SJohn Marino 	return trtable[ch];
225844b87433SJohn Marino 
225944b87433SJohn Marino       trtable = state->word_trtable;
2260*6ea1f93eSDaniel Fojt       if (__glibc_likely (trtable != NULL))
226144b87433SJohn Marino 	{
226244b87433SJohn Marino 	  unsigned int context;
226344b87433SJohn Marino 	  context
226444b87433SJohn Marino 	    = re_string_context_at (&mctx->input,
226544b87433SJohn Marino 				    re_string_cur_idx (&mctx->input) - 1,
226644b87433SJohn Marino 				    mctx->eflags);
226744b87433SJohn Marino 	  if (IS_WORD_CONTEXT (context))
226844b87433SJohn Marino 	    return trtable[ch + SBC_MAX];
226944b87433SJohn Marino 	  else
227044b87433SJohn Marino 	    return trtable[ch];
227144b87433SJohn Marino 	}
227244b87433SJohn Marino 
227344b87433SJohn Marino       if (!build_trtable (mctx->dfa, state))
227444b87433SJohn Marino 	{
227544b87433SJohn Marino 	  *err = REG_ESPACE;
227644b87433SJohn Marino 	  return NULL;
227744b87433SJohn Marino 	}
227844b87433SJohn Marino 
227944b87433SJohn Marino       /* Retry, we now have a transition table.  */
228044b87433SJohn Marino     }
228144b87433SJohn Marino }
228244b87433SJohn Marino 
228344b87433SJohn Marino /* Update the state_log if we need */
228444b87433SJohn Marino static re_dfastate_t *
merge_state_with_log(reg_errcode_t * err,re_match_context_t * mctx,re_dfastate_t * next_state)228544b87433SJohn Marino merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
228644b87433SJohn Marino 		      re_dfastate_t *next_state)
228744b87433SJohn Marino {
228844b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
228944b87433SJohn Marino   Idx cur_idx = re_string_cur_idx (&mctx->input);
229044b87433SJohn Marino 
229144b87433SJohn Marino   if (cur_idx > mctx->state_log_top)
229244b87433SJohn Marino     {
229344b87433SJohn Marino       mctx->state_log[cur_idx] = next_state;
229444b87433SJohn Marino       mctx->state_log_top = cur_idx;
229544b87433SJohn Marino     }
229644b87433SJohn Marino   else if (mctx->state_log[cur_idx] == 0)
229744b87433SJohn Marino     {
229844b87433SJohn Marino       mctx->state_log[cur_idx] = next_state;
229944b87433SJohn Marino     }
230044b87433SJohn Marino   else
230144b87433SJohn Marino     {
230244b87433SJohn Marino       re_dfastate_t *pstate;
230344b87433SJohn Marino       unsigned int context;
230444b87433SJohn Marino       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
230544b87433SJohn Marino       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
230644b87433SJohn Marino 	 the destination of a multibyte char/collating element/
230744b87433SJohn Marino 	 back reference.  Then the next state is the union set of
230844b87433SJohn Marino 	 these destinations and the results of the transition table.  */
230944b87433SJohn Marino       pstate = mctx->state_log[cur_idx];
231044b87433SJohn Marino       log_nodes = pstate->entrance_nodes;
231144b87433SJohn Marino       if (next_state != NULL)
231244b87433SJohn Marino 	{
231344b87433SJohn Marino 	  table_nodes = next_state->entrance_nodes;
231444b87433SJohn Marino 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
231544b87433SJohn Marino 					     log_nodes);
2316*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (*err != REG_NOERROR))
231744b87433SJohn Marino 	    return NULL;
231844b87433SJohn Marino 	}
231944b87433SJohn Marino       else
232044b87433SJohn Marino 	next_nodes = *log_nodes;
232144b87433SJohn Marino       /* Note: We already add the nodes of the initial state,
232244b87433SJohn Marino 	 then we don't need to add them here.  */
232344b87433SJohn Marino 
232444b87433SJohn Marino       context = re_string_context_at (&mctx->input,
232544b87433SJohn Marino 				      re_string_cur_idx (&mctx->input) - 1,
232644b87433SJohn Marino 				      mctx->eflags);
232744b87433SJohn Marino       next_state = mctx->state_log[cur_idx]
232844b87433SJohn Marino 	= re_acquire_state_context (err, dfa, &next_nodes, context);
232944b87433SJohn Marino       /* We don't need to check errors here, since the return value of
233044b87433SJohn Marino 	 this function is next_state and ERR is already set.  */
233144b87433SJohn Marino 
233244b87433SJohn Marino       if (table_nodes != NULL)
233344b87433SJohn Marino 	re_node_set_free (&next_nodes);
233444b87433SJohn Marino     }
233544b87433SJohn Marino 
2336*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
233744b87433SJohn Marino     {
233844b87433SJohn Marino       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
233944b87433SJohn Marino 	 later.  We must check them here, since the back references in the
234044b87433SJohn Marino 	 next state might use them.  */
234144b87433SJohn Marino       *err = check_subexp_matching_top (mctx, &next_state->nodes,
234244b87433SJohn Marino 					cur_idx);
2343*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (*err != REG_NOERROR))
234444b87433SJohn Marino 	return NULL;
234544b87433SJohn Marino 
234644b87433SJohn Marino       /* If the next state has back references.  */
234744b87433SJohn Marino       if (next_state->has_backref)
234844b87433SJohn Marino 	{
234944b87433SJohn Marino 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2350*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (*err != REG_NOERROR))
235144b87433SJohn Marino 	    return NULL;
235244b87433SJohn Marino 	  next_state = mctx->state_log[cur_idx];
235344b87433SJohn Marino 	}
235444b87433SJohn Marino     }
235544b87433SJohn Marino 
235644b87433SJohn Marino   return next_state;
235744b87433SJohn Marino }
235844b87433SJohn Marino 
235944b87433SJohn Marino /* Skip bytes in the input that correspond to part of a
236044b87433SJohn Marino    multi-byte match, then look in the log for a state
236144b87433SJohn Marino    from which to restart matching.  */
236244b87433SJohn Marino static re_dfastate_t *
find_recover_state(reg_errcode_t * err,re_match_context_t * mctx)236344b87433SJohn Marino find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
236444b87433SJohn Marino {
236544b87433SJohn Marino   re_dfastate_t *cur_state;
236644b87433SJohn Marino   do
236744b87433SJohn Marino     {
236844b87433SJohn Marino       Idx max = mctx->state_log_top;
236944b87433SJohn Marino       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
237044b87433SJohn Marino 
237144b87433SJohn Marino       do
237244b87433SJohn Marino 	{
237344b87433SJohn Marino 	  if (++cur_str_idx > max)
237444b87433SJohn Marino 	    return NULL;
237544b87433SJohn Marino 	  re_string_skip_bytes (&mctx->input, 1);
237644b87433SJohn Marino 	}
237744b87433SJohn Marino       while (mctx->state_log[cur_str_idx] == NULL);
237844b87433SJohn Marino 
237944b87433SJohn Marino       cur_state = merge_state_with_log (err, mctx, NULL);
238044b87433SJohn Marino     }
238144b87433SJohn Marino   while (*err == REG_NOERROR && cur_state == NULL);
238244b87433SJohn Marino   return cur_state;
238344b87433SJohn Marino }
238444b87433SJohn Marino 
238544b87433SJohn Marino /* Helper functions for transit_state.  */
238644b87433SJohn Marino 
238744b87433SJohn Marino /* From the node set CUR_NODES, pick up the nodes whose types are
238844b87433SJohn Marino    OP_OPEN_SUBEXP and which have corresponding back references in the regular
238944b87433SJohn Marino    expression. And register them to use them later for evaluating the
23904536c563SJohn Marino    corresponding back references.  */
239144b87433SJohn Marino 
239244b87433SJohn Marino static reg_errcode_t
check_subexp_matching_top(re_match_context_t * mctx,re_node_set * cur_nodes,Idx str_idx)239344b87433SJohn Marino check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
239444b87433SJohn Marino 			   Idx str_idx)
239544b87433SJohn Marino {
239644b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
239744b87433SJohn Marino   Idx node_idx;
239844b87433SJohn Marino   reg_errcode_t err;
239944b87433SJohn Marino 
240044b87433SJohn Marino   /* TODO: This isn't efficient.
240144b87433SJohn Marino 	   Because there might be more than one nodes whose types are
240244b87433SJohn Marino 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
240344b87433SJohn Marino 	   nodes.
240444b87433SJohn Marino 	   E.g. RE: (a){2}  */
240544b87433SJohn Marino   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
240644b87433SJohn Marino     {
240744b87433SJohn Marino       Idx node = cur_nodes->elems[node_idx];
240844b87433SJohn Marino       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
240944b87433SJohn Marino 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
241044b87433SJohn Marino 	  && (dfa->used_bkref_map
241144b87433SJohn Marino 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
241244b87433SJohn Marino 	{
241344b87433SJohn Marino 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2414*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
241544b87433SJohn Marino 	    return err;
241644b87433SJohn Marino 	}
241744b87433SJohn Marino     }
241844b87433SJohn Marino   return REG_NOERROR;
241944b87433SJohn Marino }
242044b87433SJohn Marino 
242144b87433SJohn Marino #if 0
242244b87433SJohn Marino /* Return the next state to which the current state STATE will transit by
242344b87433SJohn Marino    accepting the current input byte.  */
242444b87433SJohn Marino 
242544b87433SJohn Marino static re_dfastate_t *
242644b87433SJohn Marino transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
242744b87433SJohn Marino 		  re_dfastate_t *state)
242844b87433SJohn Marino {
242944b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
243044b87433SJohn Marino   re_node_set next_nodes;
243144b87433SJohn Marino   re_dfastate_t *next_state;
243244b87433SJohn Marino   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
243344b87433SJohn Marino   unsigned int context;
243444b87433SJohn Marino 
243544b87433SJohn Marino   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2436*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (*err != REG_NOERROR))
243744b87433SJohn Marino     return NULL;
243844b87433SJohn Marino   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
243944b87433SJohn Marino     {
244044b87433SJohn Marino       Idx cur_node = state->nodes.elems[node_cnt];
244144b87433SJohn Marino       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
244244b87433SJohn Marino 	{
244344b87433SJohn Marino 	  *err = re_node_set_merge (&next_nodes,
244444b87433SJohn Marino 				    dfa->eclosures + dfa->nexts[cur_node]);
2445*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (*err != REG_NOERROR))
244644b87433SJohn Marino 	    {
244744b87433SJohn Marino 	      re_node_set_free (&next_nodes);
244844b87433SJohn Marino 	      return NULL;
244944b87433SJohn Marino 	    }
245044b87433SJohn Marino 	}
245144b87433SJohn Marino     }
245244b87433SJohn Marino   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
245344b87433SJohn Marino   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
245444b87433SJohn Marino   /* We don't need to check errors here, since the return value of
245544b87433SJohn Marino      this function is next_state and ERR is already set.  */
245644b87433SJohn Marino 
245744b87433SJohn Marino   re_node_set_free (&next_nodes);
245844b87433SJohn Marino   re_string_skip_bytes (&mctx->input, 1);
245944b87433SJohn Marino   return next_state;
246044b87433SJohn Marino }
246144b87433SJohn Marino #endif
246244b87433SJohn Marino 
246344b87433SJohn Marino #ifdef RE_ENABLE_I18N
246444b87433SJohn Marino static reg_errcode_t
transit_state_mb(re_match_context_t * mctx,re_dfastate_t * pstate)246544b87433SJohn Marino transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
246644b87433SJohn Marino {
246744b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
246844b87433SJohn Marino   reg_errcode_t err;
246944b87433SJohn Marino   Idx i;
247044b87433SJohn Marino 
247144b87433SJohn Marino   for (i = 0; i < pstate->nodes.nelem; ++i)
247244b87433SJohn Marino     {
247344b87433SJohn Marino       re_node_set dest_nodes, *new_nodes;
247444b87433SJohn Marino       Idx cur_node_idx = pstate->nodes.elems[i];
247544b87433SJohn Marino       int naccepted;
247644b87433SJohn Marino       Idx dest_idx;
247744b87433SJohn Marino       unsigned int context;
247844b87433SJohn Marino       re_dfastate_t *dest_state;
247944b87433SJohn Marino 
248044b87433SJohn Marino       if (!dfa->nodes[cur_node_idx].accept_mb)
248144b87433SJohn Marino 	continue;
248244b87433SJohn Marino 
248344b87433SJohn Marino       if (dfa->nodes[cur_node_idx].constraint)
248444b87433SJohn Marino 	{
248544b87433SJohn Marino 	  context = re_string_context_at (&mctx->input,
248644b87433SJohn Marino 					  re_string_cur_idx (&mctx->input),
248744b87433SJohn Marino 					  mctx->eflags);
248844b87433SJohn Marino 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
248944b87433SJohn Marino 					   context))
249044b87433SJohn Marino 	    continue;
249144b87433SJohn Marino 	}
249244b87433SJohn Marino 
249344b87433SJohn Marino       /* How many bytes the node can accept?  */
249444b87433SJohn Marino       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
249544b87433SJohn Marino 					   re_string_cur_idx (&mctx->input));
249644b87433SJohn Marino       if (naccepted == 0)
249744b87433SJohn Marino 	continue;
249844b87433SJohn Marino 
24994536c563SJohn Marino       /* The node can accepts 'naccepted' bytes.  */
250044b87433SJohn Marino       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
250144b87433SJohn Marino       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
250244b87433SJohn Marino 			       : mctx->max_mb_elem_len);
250344b87433SJohn Marino       err = clean_state_log_if_needed (mctx, dest_idx);
2504*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
250544b87433SJohn Marino 	return err;
250644b87433SJohn Marino #ifdef DEBUG
2507*6ea1f93eSDaniel Fojt       assert (dfa->nexts[cur_node_idx] != -1);
250844b87433SJohn Marino #endif
250944b87433SJohn Marino       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
251044b87433SJohn Marino 
251144b87433SJohn Marino       dest_state = mctx->state_log[dest_idx];
251244b87433SJohn Marino       if (dest_state == NULL)
251344b87433SJohn Marino 	dest_nodes = *new_nodes;
251444b87433SJohn Marino       else
251544b87433SJohn Marino 	{
251644b87433SJohn Marino 	  err = re_node_set_init_union (&dest_nodes,
251744b87433SJohn Marino 					dest_state->entrance_nodes, new_nodes);
2518*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
251944b87433SJohn Marino 	    return err;
252044b87433SJohn Marino 	}
252144b87433SJohn Marino       context = re_string_context_at (&mctx->input, dest_idx - 1,
252244b87433SJohn Marino 				      mctx->eflags);
252344b87433SJohn Marino       mctx->state_log[dest_idx]
252444b87433SJohn Marino 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
252544b87433SJohn Marino       if (dest_state != NULL)
252644b87433SJohn Marino 	re_node_set_free (&dest_nodes);
2527*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2528*6ea1f93eSDaniel Fojt 			    && err != REG_NOERROR))
252944b87433SJohn Marino 	return err;
253044b87433SJohn Marino     }
253144b87433SJohn Marino   return REG_NOERROR;
253244b87433SJohn Marino }
253344b87433SJohn Marino #endif /* RE_ENABLE_I18N */
253444b87433SJohn Marino 
253544b87433SJohn Marino static reg_errcode_t
transit_state_bkref(re_match_context_t * mctx,const re_node_set * nodes)253644b87433SJohn Marino transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
253744b87433SJohn Marino {
253844b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
253944b87433SJohn Marino   reg_errcode_t err;
254044b87433SJohn Marino   Idx i;
254144b87433SJohn Marino   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
254244b87433SJohn Marino 
254344b87433SJohn Marino   for (i = 0; i < nodes->nelem; ++i)
254444b87433SJohn Marino     {
254544b87433SJohn Marino       Idx dest_str_idx, prev_nelem, bkc_idx;
254644b87433SJohn Marino       Idx node_idx = nodes->elems[i];
254744b87433SJohn Marino       unsigned int context;
254844b87433SJohn Marino       const re_token_t *node = dfa->nodes + node_idx;
254944b87433SJohn Marino       re_node_set *new_dest_nodes;
255044b87433SJohn Marino 
25514536c563SJohn Marino       /* Check whether 'node' is a backreference or not.  */
255244b87433SJohn Marino       if (node->type != OP_BACK_REF)
255344b87433SJohn Marino 	continue;
255444b87433SJohn Marino 
255544b87433SJohn Marino       if (node->constraint)
255644b87433SJohn Marino 	{
255744b87433SJohn Marino 	  context = re_string_context_at (&mctx->input, cur_str_idx,
255844b87433SJohn Marino 					  mctx->eflags);
255944b87433SJohn Marino 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
256044b87433SJohn Marino 	    continue;
256144b87433SJohn Marino 	}
256244b87433SJohn Marino 
25634536c563SJohn Marino       /* 'node' is a backreference.
256444b87433SJohn Marino 	 Check the substring which the substring matched.  */
256544b87433SJohn Marino       bkc_idx = mctx->nbkref_ents;
256644b87433SJohn Marino       err = get_subexp (mctx, node_idx, cur_str_idx);
2567*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
256844b87433SJohn Marino 	goto free_return;
256944b87433SJohn Marino 
25704536c563SJohn Marino       /* And add the epsilon closures (which is 'new_dest_nodes') of
257144b87433SJohn Marino 	 the backreference to appropriate state_log.  */
257244b87433SJohn Marino #ifdef DEBUG
2573*6ea1f93eSDaniel Fojt       assert (dfa->nexts[node_idx] != -1);
257444b87433SJohn Marino #endif
257544b87433SJohn Marino       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
257644b87433SJohn Marino 	{
257744b87433SJohn Marino 	  Idx subexp_len;
257844b87433SJohn Marino 	  re_dfastate_t *dest_state;
257944b87433SJohn Marino 	  struct re_backref_cache_entry *bkref_ent;
258044b87433SJohn Marino 	  bkref_ent = mctx->bkref_ents + bkc_idx;
258144b87433SJohn Marino 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
258244b87433SJohn Marino 	    continue;
258344b87433SJohn Marino 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
258444b87433SJohn Marino 	  new_dest_nodes = (subexp_len == 0
258544b87433SJohn Marino 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
258644b87433SJohn Marino 			    : dfa->eclosures + dfa->nexts[node_idx]);
258744b87433SJohn Marino 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
258844b87433SJohn Marino 			  - bkref_ent->subexp_from);
258944b87433SJohn Marino 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
259044b87433SJohn Marino 					  mctx->eflags);
259144b87433SJohn Marino 	  dest_state = mctx->state_log[dest_str_idx];
259244b87433SJohn Marino 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
259344b87433SJohn Marino 			: mctx->state_log[cur_str_idx]->nodes.nelem);
25944536c563SJohn Marino 	  /* Add 'new_dest_node' to state_log.  */
259544b87433SJohn Marino 	  if (dest_state == NULL)
259644b87433SJohn Marino 	    {
259744b87433SJohn Marino 	      mctx->state_log[dest_str_idx]
259844b87433SJohn Marino 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
259944b87433SJohn Marino 					    context);
2600*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2601*6ea1f93eSDaniel Fojt 				    && err != REG_NOERROR))
260244b87433SJohn Marino 		goto free_return;
260344b87433SJohn Marino 	    }
260444b87433SJohn Marino 	  else
260544b87433SJohn Marino 	    {
260644b87433SJohn Marino 	      re_node_set dest_nodes;
260744b87433SJohn Marino 	      err = re_node_set_init_union (&dest_nodes,
260844b87433SJohn Marino 					    dest_state->entrance_nodes,
260944b87433SJohn Marino 					    new_dest_nodes);
2610*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
261144b87433SJohn Marino 		{
261244b87433SJohn Marino 		  re_node_set_free (&dest_nodes);
261344b87433SJohn Marino 		  goto free_return;
261444b87433SJohn Marino 		}
261544b87433SJohn Marino 	      mctx->state_log[dest_str_idx]
261644b87433SJohn Marino 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
261744b87433SJohn Marino 	      re_node_set_free (&dest_nodes);
2618*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2619*6ea1f93eSDaniel Fojt 				    && err != REG_NOERROR))
262044b87433SJohn Marino 		goto free_return;
262144b87433SJohn Marino 	    }
262244b87433SJohn Marino 	  /* We need to check recursively if the backreference can epsilon
262344b87433SJohn Marino 	     transit.  */
262444b87433SJohn Marino 	  if (subexp_len == 0
262544b87433SJohn Marino 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
262644b87433SJohn Marino 	    {
262744b87433SJohn Marino 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
262844b87433SJohn Marino 					       cur_str_idx);
2629*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
263044b87433SJohn Marino 		goto free_return;
263144b87433SJohn Marino 	      err = transit_state_bkref (mctx, new_dest_nodes);
2632*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
263344b87433SJohn Marino 		goto free_return;
263444b87433SJohn Marino 	    }
263544b87433SJohn Marino 	}
263644b87433SJohn Marino     }
263744b87433SJohn Marino   err = REG_NOERROR;
263844b87433SJohn Marino  free_return:
263944b87433SJohn Marino   return err;
264044b87433SJohn Marino }
264144b87433SJohn Marino 
264244b87433SJohn Marino /* Enumerate all the candidates which the backreference BKREF_NODE can match
264344b87433SJohn Marino    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
264444b87433SJohn Marino    Note that we might collect inappropriate candidates here.
264544b87433SJohn Marino    However, the cost of checking them strictly here is too high, then we
264644b87433SJohn Marino    delay these checking for prune_impossible_nodes().  */
264744b87433SJohn Marino 
264844b87433SJohn Marino static reg_errcode_t
2649*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
get_subexp(re_match_context_t * mctx,Idx bkref_node,Idx bkref_str_idx)265044b87433SJohn Marino get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
265144b87433SJohn Marino {
265244b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
265344b87433SJohn Marino   Idx subexp_num, sub_top_idx;
265444b87433SJohn Marino   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
265544b87433SJohn Marino   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
265644b87433SJohn Marino   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2657*6ea1f93eSDaniel Fojt   if (cache_idx != -1)
265844b87433SJohn Marino     {
265944b87433SJohn Marino       const struct re_backref_cache_entry *entry
266044b87433SJohn Marino 	= mctx->bkref_ents + cache_idx;
266144b87433SJohn Marino       do
266244b87433SJohn Marino 	if (entry->node == bkref_node)
266344b87433SJohn Marino 	  return REG_NOERROR; /* We already checked it.  */
266444b87433SJohn Marino       while (entry++->more);
266544b87433SJohn Marino     }
266644b87433SJohn Marino 
266744b87433SJohn Marino   subexp_num = dfa->nodes[bkref_node].opr.idx;
266844b87433SJohn Marino 
266944b87433SJohn Marino   /* For each sub expression  */
267044b87433SJohn Marino   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
267144b87433SJohn Marino     {
267244b87433SJohn Marino       reg_errcode_t err;
267344b87433SJohn Marino       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
267444b87433SJohn Marino       re_sub_match_last_t *sub_last;
267544b87433SJohn Marino       Idx sub_last_idx, sl_str, bkref_str_off;
267644b87433SJohn Marino 
267744b87433SJohn Marino       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
267844b87433SJohn Marino 	continue; /* It isn't related.  */
267944b87433SJohn Marino 
268044b87433SJohn Marino       sl_str = sub_top->str_idx;
268144b87433SJohn Marino       bkref_str_off = bkref_str_idx;
268244b87433SJohn Marino       /* At first, check the last node of sub expressions we already
268344b87433SJohn Marino 	 evaluated.  */
268444b87433SJohn Marino       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
268544b87433SJohn Marino 	{
268644b87433SJohn Marino 	  regoff_t sl_str_diff;
268744b87433SJohn Marino 	  sub_last = sub_top->lasts[sub_last_idx];
268844b87433SJohn Marino 	  sl_str_diff = sub_last->str_idx - sl_str;
268944b87433SJohn Marino 	  /* The matched string by the sub expression match with the substring
269044b87433SJohn Marino 	     at the back reference?  */
269144b87433SJohn Marino 	  if (sl_str_diff > 0)
269244b87433SJohn Marino 	    {
2693*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (bkref_str_off + sl_str_diff
2694*6ea1f93eSDaniel Fojt 				    > mctx->input.valid_len))
269544b87433SJohn Marino 		{
269644b87433SJohn Marino 		  /* Not enough chars for a successful match.  */
269744b87433SJohn Marino 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
269844b87433SJohn Marino 		    break;
269944b87433SJohn Marino 
270044b87433SJohn Marino 		  err = clean_state_log_if_needed (mctx,
270144b87433SJohn Marino 						   bkref_str_off
270244b87433SJohn Marino 						   + sl_str_diff);
2703*6ea1f93eSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
270444b87433SJohn Marino 		    return err;
270544b87433SJohn Marino 		  buf = (const char *) re_string_get_buffer (&mctx->input);
270644b87433SJohn Marino 		}
270744b87433SJohn Marino 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
270844b87433SJohn Marino 		/* We don't need to search this sub expression any more.  */
270944b87433SJohn Marino 		break;
271044b87433SJohn Marino 	    }
271144b87433SJohn Marino 	  bkref_str_off += sl_str_diff;
271244b87433SJohn Marino 	  sl_str += sl_str_diff;
271344b87433SJohn Marino 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
271444b87433SJohn Marino 				bkref_str_idx);
271544b87433SJohn Marino 
271644b87433SJohn Marino 	  /* Reload buf, since the preceding call might have reallocated
271744b87433SJohn Marino 	     the buffer.  */
271844b87433SJohn Marino 	  buf = (const char *) re_string_get_buffer (&mctx->input);
271944b87433SJohn Marino 
272044b87433SJohn Marino 	  if (err == REG_NOMATCH)
272144b87433SJohn Marino 	    continue;
2722*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
272344b87433SJohn Marino 	    return err;
272444b87433SJohn Marino 	}
272544b87433SJohn Marino 
272644b87433SJohn Marino       if (sub_last_idx < sub_top->nlasts)
272744b87433SJohn Marino 	continue;
272844b87433SJohn Marino       if (sub_last_idx > 0)
272944b87433SJohn Marino 	++sl_str;
273044b87433SJohn Marino       /* Then, search for the other last nodes of the sub expression.  */
273144b87433SJohn Marino       for (; sl_str <= bkref_str_idx; ++sl_str)
273244b87433SJohn Marino 	{
273344b87433SJohn Marino 	  Idx cls_node;
273444b87433SJohn Marino 	  regoff_t sl_str_off;
273544b87433SJohn Marino 	  const re_node_set *nodes;
273644b87433SJohn Marino 	  sl_str_off = sl_str - sub_top->str_idx;
273744b87433SJohn Marino 	  /* The matched string by the sub expression match with the substring
273844b87433SJohn Marino 	     at the back reference?  */
273944b87433SJohn Marino 	  if (sl_str_off > 0)
274044b87433SJohn Marino 	    {
2741*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
274244b87433SJohn Marino 		{
274344b87433SJohn Marino 		  /* If we are at the end of the input, we cannot match.  */
274444b87433SJohn Marino 		  if (bkref_str_off >= mctx->input.len)
274544b87433SJohn Marino 		    break;
274644b87433SJohn Marino 
27474536c563SJohn Marino 		  err = extend_buffers (mctx, bkref_str_off + 1);
2748*6ea1f93eSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
274944b87433SJohn Marino 		    return err;
275044b87433SJohn Marino 
275144b87433SJohn Marino 		  buf = (const char *) re_string_get_buffer (&mctx->input);
275244b87433SJohn Marino 		}
275344b87433SJohn Marino 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
275444b87433SJohn Marino 		break; /* We don't need to search this sub expression
275544b87433SJohn Marino 			  any more.  */
275644b87433SJohn Marino 	    }
275744b87433SJohn Marino 	  if (mctx->state_log[sl_str] == NULL)
275844b87433SJohn Marino 	    continue;
275944b87433SJohn Marino 	  /* Does this state have a ')' of the sub expression?  */
276044b87433SJohn Marino 	  nodes = &mctx->state_log[sl_str]->nodes;
276144b87433SJohn Marino 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
276244b87433SJohn Marino 				       OP_CLOSE_SUBEXP);
2763*6ea1f93eSDaniel Fojt 	  if (cls_node == -1)
276444b87433SJohn Marino 	    continue; /* No.  */
276544b87433SJohn Marino 	  if (sub_top->path == NULL)
276644b87433SJohn Marino 	    {
276744b87433SJohn Marino 	      sub_top->path = calloc (sizeof (state_array_t),
276844b87433SJohn Marino 				      sl_str - sub_top->str_idx + 1);
276944b87433SJohn Marino 	      if (sub_top->path == NULL)
277044b87433SJohn Marino 		return REG_ESPACE;
277144b87433SJohn Marino 	    }
277244b87433SJohn Marino 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
277344b87433SJohn Marino 	     in the current context?  */
277444b87433SJohn Marino 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
277544b87433SJohn Marino 			       sub_top->str_idx, cls_node, sl_str,
277644b87433SJohn Marino 			       OP_CLOSE_SUBEXP);
277744b87433SJohn Marino 	  if (err == REG_NOMATCH)
277844b87433SJohn Marino 	      continue;
2779*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
278044b87433SJohn Marino 	      return err;
278144b87433SJohn Marino 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2782*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (sub_last == NULL))
278344b87433SJohn Marino 	    return REG_ESPACE;
278444b87433SJohn Marino 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
278544b87433SJohn Marino 				bkref_str_idx);
2786*6ea1f93eSDaniel Fojt 	  buf = (const char *) re_string_get_buffer (&mctx->input);
278744b87433SJohn Marino 	  if (err == REG_NOMATCH)
278844b87433SJohn Marino 	    continue;
2789*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
2790*6ea1f93eSDaniel Fojt 	    return err;
279144b87433SJohn Marino 	}
279244b87433SJohn Marino     }
279344b87433SJohn Marino   return REG_NOERROR;
279444b87433SJohn Marino }
279544b87433SJohn Marino 
279644b87433SJohn Marino /* Helper functions for get_subexp().  */
279744b87433SJohn Marino 
279844b87433SJohn Marino /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
279944b87433SJohn Marino    If it can arrive, register the sub expression expressed with SUB_TOP
280044b87433SJohn Marino    and SUB_LAST.  */
280144b87433SJohn Marino 
280244b87433SJohn Marino static reg_errcode_t
get_subexp_sub(re_match_context_t * mctx,const re_sub_match_top_t * sub_top,re_sub_match_last_t * sub_last,Idx bkref_node,Idx bkref_str)280344b87433SJohn Marino get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
280444b87433SJohn Marino 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
280544b87433SJohn Marino {
280644b87433SJohn Marino   reg_errcode_t err;
280744b87433SJohn Marino   Idx to_idx;
280844b87433SJohn Marino   /* Can the subexpression arrive the back reference?  */
280944b87433SJohn Marino   err = check_arrival (mctx, &sub_last->path, sub_last->node,
281044b87433SJohn Marino 		       sub_last->str_idx, bkref_node, bkref_str,
281144b87433SJohn Marino 		       OP_OPEN_SUBEXP);
281244b87433SJohn Marino   if (err != REG_NOERROR)
281344b87433SJohn Marino     return err;
281444b87433SJohn Marino   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
281544b87433SJohn Marino 			     sub_last->str_idx);
2816*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
281744b87433SJohn Marino     return err;
281844b87433SJohn Marino   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
281944b87433SJohn Marino   return clean_state_log_if_needed (mctx, to_idx);
282044b87433SJohn Marino }
282144b87433SJohn Marino 
282244b87433SJohn Marino /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
282344b87433SJohn Marino    Search '(' if FL_OPEN, or search ')' otherwise.
282444b87433SJohn Marino    TODO: This function isn't efficient...
282544b87433SJohn Marino 	 Because there might be more than one nodes whose types are
282644b87433SJohn Marino 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
282744b87433SJohn Marino 	 nodes.
282844b87433SJohn Marino 	 E.g. RE: (a){2}  */
282944b87433SJohn Marino 
283044b87433SJohn Marino static Idx
find_subexp_node(const re_dfa_t * dfa,const re_node_set * nodes,Idx subexp_idx,int type)283144b87433SJohn Marino find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
283244b87433SJohn Marino 		  Idx subexp_idx, int type)
283344b87433SJohn Marino {
283444b87433SJohn Marino   Idx cls_idx;
283544b87433SJohn Marino   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
283644b87433SJohn Marino     {
283744b87433SJohn Marino       Idx cls_node = nodes->elems[cls_idx];
283844b87433SJohn Marino       const re_token_t *node = dfa->nodes + cls_node;
283944b87433SJohn Marino       if (node->type == type
284044b87433SJohn Marino 	  && node->opr.idx == subexp_idx)
284144b87433SJohn Marino 	return cls_node;
284244b87433SJohn Marino     }
2843*6ea1f93eSDaniel Fojt   return -1;
284444b87433SJohn Marino }
284544b87433SJohn Marino 
284644b87433SJohn Marino /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
284744b87433SJohn Marino    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
284844b87433SJohn Marino    heavily reused.
284944b87433SJohn Marino    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
285044b87433SJohn Marino 
285144b87433SJohn Marino static reg_errcode_t
2852*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
check_arrival(re_match_context_t * mctx,state_array_t * path,Idx top_node,Idx top_str,Idx last_node,Idx last_str,int type)285344b87433SJohn Marino check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
285444b87433SJohn Marino 	       Idx top_str, Idx last_node, Idx last_str, int type)
285544b87433SJohn Marino {
285644b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
285744b87433SJohn Marino   reg_errcode_t err = REG_NOERROR;
285844b87433SJohn Marino   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
285944b87433SJohn Marino   re_dfastate_t *cur_state = NULL;
286044b87433SJohn Marino   re_node_set *cur_nodes, next_nodes;
286144b87433SJohn Marino   re_dfastate_t **backup_state_log;
286244b87433SJohn Marino   unsigned int context;
286344b87433SJohn Marino 
286444b87433SJohn Marino   subexp_num = dfa->nodes[top_node].opr.idx;
286544b87433SJohn Marino   /* Extend the buffer if we need.  */
2866*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
286744b87433SJohn Marino     {
286844b87433SJohn Marino       re_dfastate_t **new_array;
286944b87433SJohn Marino       Idx old_alloc = path->alloc;
28704536c563SJohn Marino       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
28714536c563SJohn Marino       Idx new_alloc;
2872*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
28734536c563SJohn Marino 	return REG_ESPACE;
28744536c563SJohn Marino       new_alloc = old_alloc + incr_alloc;
2875*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
287644b87433SJohn Marino 	return REG_ESPACE;
287744b87433SJohn Marino       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2878*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
287944b87433SJohn Marino 	return REG_ESPACE;
288044b87433SJohn Marino       path->array = new_array;
288144b87433SJohn Marino       path->alloc = new_alloc;
288244b87433SJohn Marino       memset (new_array + old_alloc, '\0',
288344b87433SJohn Marino 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
288444b87433SJohn Marino     }
288544b87433SJohn Marino 
288644b87433SJohn Marino   str_idx = path->next_idx ? path->next_idx : top_str;
288744b87433SJohn Marino 
288844b87433SJohn Marino   /* Temporary modify MCTX.  */
288944b87433SJohn Marino   backup_state_log = mctx->state_log;
289044b87433SJohn Marino   backup_cur_idx = mctx->input.cur_idx;
289144b87433SJohn Marino   mctx->state_log = path->array;
289244b87433SJohn Marino   mctx->input.cur_idx = str_idx;
289344b87433SJohn Marino 
289444b87433SJohn Marino   /* Setup initial node set.  */
289544b87433SJohn Marino   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
289644b87433SJohn Marino   if (str_idx == top_str)
289744b87433SJohn Marino     {
289844b87433SJohn Marino       err = re_node_set_init_1 (&next_nodes, top_node);
2899*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
290044b87433SJohn Marino 	return err;
290144b87433SJohn Marino       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2902*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
290344b87433SJohn Marino 	{
290444b87433SJohn Marino 	  re_node_set_free (&next_nodes);
290544b87433SJohn Marino 	  return err;
290644b87433SJohn Marino 	}
290744b87433SJohn Marino     }
290844b87433SJohn Marino   else
290944b87433SJohn Marino     {
291044b87433SJohn Marino       cur_state = mctx->state_log[str_idx];
291144b87433SJohn Marino       if (cur_state && cur_state->has_backref)
291244b87433SJohn Marino 	{
291344b87433SJohn Marino 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2914*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
291544b87433SJohn Marino 	    return err;
291644b87433SJohn Marino 	}
291744b87433SJohn Marino       else
291844b87433SJohn Marino 	re_node_set_init_empty (&next_nodes);
291944b87433SJohn Marino     }
292044b87433SJohn Marino   if (str_idx == top_str || (cur_state && cur_state->has_backref))
292144b87433SJohn Marino     {
292244b87433SJohn Marino       if (next_nodes.nelem)
292344b87433SJohn Marino 	{
292444b87433SJohn Marino 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
292544b87433SJohn Marino 				    subexp_num, type);
2926*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
292744b87433SJohn Marino 	    {
292844b87433SJohn Marino 	      re_node_set_free (&next_nodes);
292944b87433SJohn Marino 	      return err;
293044b87433SJohn Marino 	    }
293144b87433SJohn Marino 	}
293244b87433SJohn Marino       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2933*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
293444b87433SJohn Marino 	{
293544b87433SJohn Marino 	  re_node_set_free (&next_nodes);
293644b87433SJohn Marino 	  return err;
293744b87433SJohn Marino 	}
293844b87433SJohn Marino       mctx->state_log[str_idx] = cur_state;
293944b87433SJohn Marino     }
294044b87433SJohn Marino 
294144b87433SJohn Marino   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
294244b87433SJohn Marino     {
294344b87433SJohn Marino       re_node_set_empty (&next_nodes);
294444b87433SJohn Marino       if (mctx->state_log[str_idx + 1])
294544b87433SJohn Marino 	{
294644b87433SJohn Marino 	  err = re_node_set_merge (&next_nodes,
294744b87433SJohn Marino 				   &mctx->state_log[str_idx + 1]->nodes);
2948*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
294944b87433SJohn Marino 	    {
295044b87433SJohn Marino 	      re_node_set_free (&next_nodes);
295144b87433SJohn Marino 	      return err;
295244b87433SJohn Marino 	    }
295344b87433SJohn Marino 	}
295444b87433SJohn Marino       if (cur_state)
295544b87433SJohn Marino 	{
295644b87433SJohn Marino 	  err = check_arrival_add_next_nodes (mctx, str_idx,
295744b87433SJohn Marino 					      &cur_state->non_eps_nodes,
295844b87433SJohn Marino 					      &next_nodes);
2959*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
296044b87433SJohn Marino 	    {
296144b87433SJohn Marino 	      re_node_set_free (&next_nodes);
296244b87433SJohn Marino 	      return err;
296344b87433SJohn Marino 	    }
296444b87433SJohn Marino 	}
296544b87433SJohn Marino       ++str_idx;
296644b87433SJohn Marino       if (next_nodes.nelem)
296744b87433SJohn Marino 	{
296844b87433SJohn Marino 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2969*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
297044b87433SJohn Marino 	    {
297144b87433SJohn Marino 	      re_node_set_free (&next_nodes);
297244b87433SJohn Marino 	      return err;
297344b87433SJohn Marino 	    }
297444b87433SJohn Marino 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
297544b87433SJohn Marino 				    subexp_num, type);
2976*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
297744b87433SJohn Marino 	    {
297844b87433SJohn Marino 	      re_node_set_free (&next_nodes);
297944b87433SJohn Marino 	      return err;
298044b87433SJohn Marino 	    }
298144b87433SJohn Marino 	}
298244b87433SJohn Marino       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
298344b87433SJohn Marino       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2984*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
298544b87433SJohn Marino 	{
298644b87433SJohn Marino 	  re_node_set_free (&next_nodes);
298744b87433SJohn Marino 	  return err;
298844b87433SJohn Marino 	}
298944b87433SJohn Marino       mctx->state_log[str_idx] = cur_state;
299044b87433SJohn Marino       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
299144b87433SJohn Marino     }
299244b87433SJohn Marino   re_node_set_free (&next_nodes);
299344b87433SJohn Marino   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
299444b87433SJohn Marino 	       : &mctx->state_log[last_str]->nodes);
299544b87433SJohn Marino   path->next_idx = str_idx;
299644b87433SJohn Marino 
299744b87433SJohn Marino   /* Fix MCTX.  */
299844b87433SJohn Marino   mctx->state_log = backup_state_log;
299944b87433SJohn Marino   mctx->input.cur_idx = backup_cur_idx;
300044b87433SJohn Marino 
300144b87433SJohn Marino   /* Then check the current node set has the node LAST_NODE.  */
300244b87433SJohn Marino   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
300344b87433SJohn Marino     return REG_NOERROR;
300444b87433SJohn Marino 
300544b87433SJohn Marino   return REG_NOMATCH;
300644b87433SJohn Marino }
300744b87433SJohn Marino 
300844b87433SJohn Marino /* Helper functions for check_arrival.  */
300944b87433SJohn Marino 
301044b87433SJohn Marino /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
301144b87433SJohn Marino    to NEXT_NODES.
301244b87433SJohn Marino    TODO: This function is similar to the functions transit_state*(),
301344b87433SJohn Marino 	 however this function has many additional works.
301444b87433SJohn Marino 	 Can't we unify them?  */
301544b87433SJohn Marino 
301644b87433SJohn Marino static reg_errcode_t
3017*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
check_arrival_add_next_nodes(re_match_context_t * mctx,Idx str_idx,re_node_set * cur_nodes,re_node_set * next_nodes)301844b87433SJohn Marino check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
301944b87433SJohn Marino 			      re_node_set *cur_nodes, re_node_set *next_nodes)
302044b87433SJohn Marino {
302144b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
302244b87433SJohn Marino   bool ok;
302344b87433SJohn Marino   Idx cur_idx;
302444b87433SJohn Marino #ifdef RE_ENABLE_I18N
302544b87433SJohn Marino   reg_errcode_t err = REG_NOERROR;
302644b87433SJohn Marino #endif
302744b87433SJohn Marino   re_node_set union_set;
302844b87433SJohn Marino   re_node_set_init_empty (&union_set);
302944b87433SJohn Marino   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
303044b87433SJohn Marino     {
303144b87433SJohn Marino       int naccepted = 0;
303244b87433SJohn Marino       Idx cur_node = cur_nodes->elems[cur_idx];
303344b87433SJohn Marino #ifdef DEBUG
303444b87433SJohn Marino       re_token_type_t type = dfa->nodes[cur_node].type;
303544b87433SJohn Marino       assert (!IS_EPSILON_NODE (type));
303644b87433SJohn Marino #endif
303744b87433SJohn Marino #ifdef RE_ENABLE_I18N
30384536c563SJohn Marino       /* If the node may accept "multi byte".  */
303944b87433SJohn Marino       if (dfa->nodes[cur_node].accept_mb)
304044b87433SJohn Marino 	{
304144b87433SJohn Marino 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
304244b87433SJohn Marino 					       str_idx);
304344b87433SJohn Marino 	  if (naccepted > 1)
304444b87433SJohn Marino 	    {
304544b87433SJohn Marino 	      re_dfastate_t *dest_state;
304644b87433SJohn Marino 	      Idx next_node = dfa->nexts[cur_node];
304744b87433SJohn Marino 	      Idx next_idx = str_idx + naccepted;
304844b87433SJohn Marino 	      dest_state = mctx->state_log[next_idx];
304944b87433SJohn Marino 	      re_node_set_empty (&union_set);
305044b87433SJohn Marino 	      if (dest_state)
305144b87433SJohn Marino 		{
305244b87433SJohn Marino 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3053*6ea1f93eSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
305444b87433SJohn Marino 		    {
305544b87433SJohn Marino 		      re_node_set_free (&union_set);
305644b87433SJohn Marino 		      return err;
305744b87433SJohn Marino 		    }
305844b87433SJohn Marino 		}
305944b87433SJohn Marino 	      ok = re_node_set_insert (&union_set, next_node);
3060*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (! ok))
306144b87433SJohn Marino 		{
306244b87433SJohn Marino 		  re_node_set_free (&union_set);
306344b87433SJohn Marino 		  return REG_ESPACE;
306444b87433SJohn Marino 		}
306544b87433SJohn Marino 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
306644b87433SJohn Marino 							    &union_set);
3067*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3068*6ea1f93eSDaniel Fojt 				    && err != REG_NOERROR))
306944b87433SJohn Marino 		{
307044b87433SJohn Marino 		  re_node_set_free (&union_set);
307144b87433SJohn Marino 		  return err;
307244b87433SJohn Marino 		}
307344b87433SJohn Marino 	    }
307444b87433SJohn Marino 	}
307544b87433SJohn Marino #endif /* RE_ENABLE_I18N */
307644b87433SJohn Marino       if (naccepted
307744b87433SJohn Marino 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
307844b87433SJohn Marino 	{
307944b87433SJohn Marino 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3080*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (! ok))
308144b87433SJohn Marino 	    {
308244b87433SJohn Marino 	      re_node_set_free (&union_set);
308344b87433SJohn Marino 	      return REG_ESPACE;
308444b87433SJohn Marino 	    }
308544b87433SJohn Marino 	}
308644b87433SJohn Marino     }
308744b87433SJohn Marino   re_node_set_free (&union_set);
308844b87433SJohn Marino   return REG_NOERROR;
308944b87433SJohn Marino }
309044b87433SJohn Marino 
309144b87433SJohn Marino /* For all the nodes in CUR_NODES, add the epsilon closures of them to
309244b87433SJohn Marino    CUR_NODES, however exclude the nodes which are:
309344b87433SJohn Marino     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
309444b87433SJohn Marino     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
309544b87433SJohn Marino */
309644b87433SJohn Marino 
309744b87433SJohn Marino static reg_errcode_t
check_arrival_expand_ecl(const re_dfa_t * dfa,re_node_set * cur_nodes,Idx ex_subexp,int type)309844b87433SJohn Marino check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
309944b87433SJohn Marino 			  Idx ex_subexp, int type)
310044b87433SJohn Marino {
310144b87433SJohn Marino   reg_errcode_t err;
310244b87433SJohn Marino   Idx idx, outside_node;
310344b87433SJohn Marino   re_node_set new_nodes;
310444b87433SJohn Marino #ifdef DEBUG
310544b87433SJohn Marino   assert (cur_nodes->nelem);
310644b87433SJohn Marino #endif
310744b87433SJohn Marino   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3108*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
310944b87433SJohn Marino     return err;
311044b87433SJohn Marino   /* Create a new node set NEW_NODES with the nodes which are epsilon
311144b87433SJohn Marino      closures of the node in CUR_NODES.  */
311244b87433SJohn Marino 
311344b87433SJohn Marino   for (idx = 0; idx < cur_nodes->nelem; ++idx)
311444b87433SJohn Marino     {
311544b87433SJohn Marino       Idx cur_node = cur_nodes->elems[idx];
311644b87433SJohn Marino       const re_node_set *eclosure = dfa->eclosures + cur_node;
311744b87433SJohn Marino       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3118*6ea1f93eSDaniel Fojt       if (outside_node == -1)
311944b87433SJohn Marino 	{
312044b87433SJohn Marino 	  /* There are no problematic nodes, just merge them.  */
312144b87433SJohn Marino 	  err = re_node_set_merge (&new_nodes, eclosure);
3122*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
312344b87433SJohn Marino 	    {
312444b87433SJohn Marino 	      re_node_set_free (&new_nodes);
312544b87433SJohn Marino 	      return err;
312644b87433SJohn Marino 	    }
312744b87433SJohn Marino 	}
312844b87433SJohn Marino       else
312944b87433SJohn Marino 	{
313044b87433SJohn Marino 	  /* There are problematic nodes, re-calculate incrementally.  */
313144b87433SJohn Marino 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
313244b87433SJohn Marino 					      ex_subexp, type);
3133*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
313444b87433SJohn Marino 	    {
313544b87433SJohn Marino 	      re_node_set_free (&new_nodes);
313644b87433SJohn Marino 	      return err;
313744b87433SJohn Marino 	    }
313844b87433SJohn Marino 	}
313944b87433SJohn Marino     }
314044b87433SJohn Marino   re_node_set_free (cur_nodes);
314144b87433SJohn Marino   *cur_nodes = new_nodes;
314244b87433SJohn Marino   return REG_NOERROR;
314344b87433SJohn Marino }
314444b87433SJohn Marino 
314544b87433SJohn Marino /* Helper function for check_arrival_expand_ecl.
314644b87433SJohn Marino    Check incrementally the epsilon closure of TARGET, and if it isn't
314744b87433SJohn Marino    problematic append it to DST_NODES.  */
314844b87433SJohn Marino 
314944b87433SJohn Marino static reg_errcode_t
3150*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
check_arrival_expand_ecl_sub(const re_dfa_t * dfa,re_node_set * dst_nodes,Idx target,Idx ex_subexp,int type)315144b87433SJohn Marino check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
315244b87433SJohn Marino 			      Idx target, Idx ex_subexp, int type)
315344b87433SJohn Marino {
315444b87433SJohn Marino   Idx cur_node;
315544b87433SJohn Marino   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
315644b87433SJohn Marino     {
315744b87433SJohn Marino       bool ok;
315844b87433SJohn Marino 
315944b87433SJohn Marino       if (dfa->nodes[cur_node].type == type
316044b87433SJohn Marino 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
316144b87433SJohn Marino 	{
316244b87433SJohn Marino 	  if (type == OP_CLOSE_SUBEXP)
316344b87433SJohn Marino 	    {
316444b87433SJohn Marino 	      ok = re_node_set_insert (dst_nodes, cur_node);
3165*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (! ok))
316644b87433SJohn Marino 		return REG_ESPACE;
316744b87433SJohn Marino 	    }
316844b87433SJohn Marino 	  break;
316944b87433SJohn Marino 	}
317044b87433SJohn Marino       ok = re_node_set_insert (dst_nodes, cur_node);
3171*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (! ok))
317244b87433SJohn Marino 	return REG_ESPACE;
317344b87433SJohn Marino       if (dfa->edests[cur_node].nelem == 0)
317444b87433SJohn Marino 	break;
317544b87433SJohn Marino       if (dfa->edests[cur_node].nelem == 2)
317644b87433SJohn Marino 	{
317744b87433SJohn Marino 	  reg_errcode_t err;
317844b87433SJohn Marino 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
317944b87433SJohn Marino 					      dfa->edests[cur_node].elems[1],
318044b87433SJohn Marino 					      ex_subexp, type);
3181*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
318244b87433SJohn Marino 	    return err;
318344b87433SJohn Marino 	}
318444b87433SJohn Marino       cur_node = dfa->edests[cur_node].elems[0];
318544b87433SJohn Marino     }
318644b87433SJohn Marino   return REG_NOERROR;
318744b87433SJohn Marino }
318844b87433SJohn Marino 
318944b87433SJohn Marino 
319044b87433SJohn Marino /* For all the back references in the current state, calculate the
319144b87433SJohn Marino    destination of the back references by the appropriate entry
319244b87433SJohn Marino    in MCTX->BKREF_ENTS.  */
319344b87433SJohn Marino 
319444b87433SJohn Marino static reg_errcode_t
3195*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
expand_bkref_cache(re_match_context_t * mctx,re_node_set * cur_nodes,Idx cur_str,Idx subexp_num,int type)319644b87433SJohn Marino expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
319744b87433SJohn Marino 		    Idx cur_str, Idx subexp_num, int type)
319844b87433SJohn Marino {
319944b87433SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
320044b87433SJohn Marino   reg_errcode_t err;
320144b87433SJohn Marino   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
320244b87433SJohn Marino   struct re_backref_cache_entry *ent;
320344b87433SJohn Marino 
3204*6ea1f93eSDaniel Fojt   if (cache_idx_start == -1)
320544b87433SJohn Marino     return REG_NOERROR;
320644b87433SJohn Marino 
320744b87433SJohn Marino  restart:
320844b87433SJohn Marino   ent = mctx->bkref_ents + cache_idx_start;
320944b87433SJohn Marino   do
321044b87433SJohn Marino     {
321144b87433SJohn Marino       Idx to_idx, next_node;
321244b87433SJohn Marino 
321344b87433SJohn Marino       /* Is this entry ENT is appropriate?  */
321444b87433SJohn Marino       if (!re_node_set_contains (cur_nodes, ent->node))
321544b87433SJohn Marino 	continue; /* No.  */
321644b87433SJohn Marino 
321744b87433SJohn Marino       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
321844b87433SJohn Marino       /* Calculate the destination of the back reference, and append it
321944b87433SJohn Marino 	 to MCTX->STATE_LOG.  */
322044b87433SJohn Marino       if (to_idx == cur_str)
322144b87433SJohn Marino 	{
322244b87433SJohn Marino 	  /* The backreference did epsilon transit, we must re-check all the
322344b87433SJohn Marino 	     node in the current state.  */
322444b87433SJohn Marino 	  re_node_set new_dests;
322544b87433SJohn Marino 	  reg_errcode_t err2, err3;
322644b87433SJohn Marino 	  next_node = dfa->edests[ent->node].elems[0];
322744b87433SJohn Marino 	  if (re_node_set_contains (cur_nodes, next_node))
322844b87433SJohn Marino 	    continue;
322944b87433SJohn Marino 	  err = re_node_set_init_1 (&new_dests, next_node);
323044b87433SJohn Marino 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
323144b87433SJohn Marino 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
323244b87433SJohn Marino 	  re_node_set_free (&new_dests);
3233*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3234*6ea1f93eSDaniel Fojt 				|| err3 != REG_NOERROR))
323544b87433SJohn Marino 	    {
323644b87433SJohn Marino 	      err = (err != REG_NOERROR ? err
323744b87433SJohn Marino 		     : (err2 != REG_NOERROR ? err2 : err3));
323844b87433SJohn Marino 	      return err;
323944b87433SJohn Marino 	    }
324044b87433SJohn Marino 	  /* TODO: It is still inefficient...  */
324144b87433SJohn Marino 	  goto restart;
324244b87433SJohn Marino 	}
324344b87433SJohn Marino       else
324444b87433SJohn Marino 	{
324544b87433SJohn Marino 	  re_node_set union_set;
324644b87433SJohn Marino 	  next_node = dfa->nexts[ent->node];
324744b87433SJohn Marino 	  if (mctx->state_log[to_idx])
324844b87433SJohn Marino 	    {
324944b87433SJohn Marino 	      bool ok;
325044b87433SJohn Marino 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
325144b87433SJohn Marino 					next_node))
325244b87433SJohn Marino 		continue;
325344b87433SJohn Marino 	      err = re_node_set_init_copy (&union_set,
325444b87433SJohn Marino 					   &mctx->state_log[to_idx]->nodes);
325544b87433SJohn Marino 	      ok = re_node_set_insert (&union_set, next_node);
3256*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR || ! ok))
325744b87433SJohn Marino 		{
325844b87433SJohn Marino 		  re_node_set_free (&union_set);
325944b87433SJohn Marino 		  err = err != REG_NOERROR ? err : REG_ESPACE;
326044b87433SJohn Marino 		  return err;
326144b87433SJohn Marino 		}
326244b87433SJohn Marino 	    }
326344b87433SJohn Marino 	  else
326444b87433SJohn Marino 	    {
326544b87433SJohn Marino 	      err = re_node_set_init_1 (&union_set, next_node);
3266*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
326744b87433SJohn Marino 		return err;
326844b87433SJohn Marino 	    }
326944b87433SJohn Marino 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
327044b87433SJohn Marino 	  re_node_set_free (&union_set);
3271*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3272*6ea1f93eSDaniel Fojt 				&& err != REG_NOERROR))
327344b87433SJohn Marino 	    return err;
327444b87433SJohn Marino 	}
327544b87433SJohn Marino     }
327644b87433SJohn Marino   while (ent++->more);
327744b87433SJohn Marino   return REG_NOERROR;
327844b87433SJohn Marino }
327944b87433SJohn Marino 
328044b87433SJohn Marino /* Build transition table for the state.
328144b87433SJohn Marino    Return true if successful.  */
328244b87433SJohn Marino 
328344b87433SJohn Marino static bool
build_trtable(const re_dfa_t * dfa,re_dfastate_t * state)328444b87433SJohn Marino build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
328544b87433SJohn Marino {
328644b87433SJohn Marino   reg_errcode_t err;
328744b87433SJohn Marino   Idx i, j;
328844b87433SJohn Marino   int ch;
328944b87433SJohn Marino   bool need_word_trtable = false;
329044b87433SJohn Marino   bitset_word_t elem, mask;
329144b87433SJohn Marino   bool dests_node_malloced = false;
329244b87433SJohn Marino   bool dest_states_malloced = false;
32934536c563SJohn Marino   Idx ndests; /* Number of the destination states from 'state'.  */
329444b87433SJohn Marino   re_dfastate_t **trtable;
329544b87433SJohn Marino   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
329644b87433SJohn Marino   re_node_set follows, *dests_node;
329744b87433SJohn Marino   bitset_t *dests_ch;
329844b87433SJohn Marino   bitset_t acceptable;
329944b87433SJohn Marino 
330044b87433SJohn Marino   struct dests_alloc
330144b87433SJohn Marino   {
330244b87433SJohn Marino     re_node_set dests_node[SBC_MAX];
330344b87433SJohn Marino     bitset_t dests_ch[SBC_MAX];
330444b87433SJohn Marino   } *dests_alloc;
330544b87433SJohn Marino 
330644b87433SJohn Marino   /* We build DFA states which corresponds to the destination nodes
33074536c563SJohn Marino      from 'state'.  'dests_node[i]' represents the nodes which i-th
33084536c563SJohn Marino      destination state contains, and 'dests_ch[i]' represents the
330944b87433SJohn Marino      characters which i-th destination state accepts.  */
331044b87433SJohn Marino   if (__libc_use_alloca (sizeof (struct dests_alloc)))
331144b87433SJohn Marino     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
331244b87433SJohn Marino   else
331344b87433SJohn Marino     {
331444b87433SJohn Marino       dests_alloc = re_malloc (struct dests_alloc, 1);
3315*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (dests_alloc == NULL))
331644b87433SJohn Marino 	return false;
331744b87433SJohn Marino       dests_node_malloced = true;
331844b87433SJohn Marino     }
331944b87433SJohn Marino   dests_node = dests_alloc->dests_node;
332044b87433SJohn Marino   dests_ch = dests_alloc->dests_ch;
332144b87433SJohn Marino 
33224536c563SJohn Marino   /* Initialize transition table.  */
332344b87433SJohn Marino   state->word_trtable = state->trtable = NULL;
332444b87433SJohn Marino 
33254536c563SJohn Marino   /* At first, group all nodes belonging to 'state' into several
332644b87433SJohn Marino      destinations.  */
332744b87433SJohn Marino   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3328*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (ndests <= 0))
332944b87433SJohn Marino     {
333044b87433SJohn Marino       if (dests_node_malloced)
3331*6ea1f93eSDaniel Fojt 	re_free (dests_alloc);
33324536c563SJohn Marino       /* Return false in case of an error, true otherwise.  */
333344b87433SJohn Marino       if (ndests == 0)
333444b87433SJohn Marino 	{
333544b87433SJohn Marino 	  state->trtable = (re_dfastate_t **)
333644b87433SJohn Marino 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3337*6ea1f93eSDaniel Fojt           if (__glibc_unlikely (state->trtable == NULL))
3338008e37b6SJohn Marino             return false;
333944b87433SJohn Marino 	  return true;
334044b87433SJohn Marino 	}
334144b87433SJohn Marino       return false;
334244b87433SJohn Marino     }
334344b87433SJohn Marino 
334444b87433SJohn Marino   err = re_node_set_alloc (&follows, ndests + 1);
3345*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
334644b87433SJohn Marino     goto out_free;
334744b87433SJohn Marino 
334844b87433SJohn Marino   /* Avoid arithmetic overflow in size calculation.  */
3349*6ea1f93eSDaniel Fojt   size_t ndests_max
3350*6ea1f93eSDaniel Fojt     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3351*6ea1f93eSDaniel Fojt        / (3 * sizeof (re_dfastate_t *)));
3352*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (ndests_max < ndests))
335344b87433SJohn Marino     goto out_free;
335444b87433SJohn Marino 
335544b87433SJohn Marino   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
335644b87433SJohn Marino 			 + ndests * 3 * sizeof (re_dfastate_t *)))
335744b87433SJohn Marino     dest_states = (re_dfastate_t **)
335844b87433SJohn Marino       alloca (ndests * 3 * sizeof (re_dfastate_t *));
335944b87433SJohn Marino   else
336044b87433SJohn Marino     {
3361*6ea1f93eSDaniel Fojt       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3362*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (dest_states == NULL))
336344b87433SJohn Marino 	{
336444b87433SJohn Marino out_free:
336544b87433SJohn Marino 	  if (dest_states_malloced)
3366*6ea1f93eSDaniel Fojt 	    re_free (dest_states);
336744b87433SJohn Marino 	  re_node_set_free (&follows);
336844b87433SJohn Marino 	  for (i = 0; i < ndests; ++i)
336944b87433SJohn Marino 	    re_node_set_free (dests_node + i);
337044b87433SJohn Marino 	  if (dests_node_malloced)
3371*6ea1f93eSDaniel Fojt 	    re_free (dests_alloc);
337244b87433SJohn Marino 	  return false;
337344b87433SJohn Marino 	}
337444b87433SJohn Marino       dest_states_malloced = true;
337544b87433SJohn Marino     }
337644b87433SJohn Marino   dest_states_word = dest_states + ndests;
337744b87433SJohn Marino   dest_states_nl = dest_states_word + ndests;
337844b87433SJohn Marino   bitset_empty (acceptable);
337944b87433SJohn Marino 
338044b87433SJohn Marino   /* Then build the states for all destinations.  */
338144b87433SJohn Marino   for (i = 0; i < ndests; ++i)
338244b87433SJohn Marino     {
338344b87433SJohn Marino       Idx next_node;
338444b87433SJohn Marino       re_node_set_empty (&follows);
338544b87433SJohn Marino       /* Merge the follows of this destination states.  */
338644b87433SJohn Marino       for (j = 0; j < dests_node[i].nelem; ++j)
338744b87433SJohn Marino 	{
338844b87433SJohn Marino 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3389*6ea1f93eSDaniel Fojt 	  if (next_node != -1)
339044b87433SJohn Marino 	    {
339144b87433SJohn Marino 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3392*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
339344b87433SJohn Marino 		goto out_free;
339444b87433SJohn Marino 	    }
339544b87433SJohn Marino 	}
339644b87433SJohn Marino       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3397*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
339844b87433SJohn Marino 	goto out_free;
339944b87433SJohn Marino       /* If the new state has context constraint,
340044b87433SJohn Marino 	 build appropriate states for these contexts.  */
340144b87433SJohn Marino       if (dest_states[i]->has_constraint)
340244b87433SJohn Marino 	{
340344b87433SJohn Marino 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
340444b87433SJohn Marino 							  CONTEXT_WORD);
3405*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (dest_states_word[i] == NULL
3406*6ea1f93eSDaniel Fojt 				&& err != REG_NOERROR))
340744b87433SJohn Marino 	    goto out_free;
340844b87433SJohn Marino 
340944b87433SJohn Marino 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
341044b87433SJohn Marino 	    need_word_trtable = true;
341144b87433SJohn Marino 
341244b87433SJohn Marino 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
341344b87433SJohn Marino 							CONTEXT_NEWLINE);
3414*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
341544b87433SJohn Marino 	    goto out_free;
341644b87433SJohn Marino 	}
341744b87433SJohn Marino       else
341844b87433SJohn Marino 	{
341944b87433SJohn Marino 	  dest_states_word[i] = dest_states[i];
342044b87433SJohn Marino 	  dest_states_nl[i] = dest_states[i];
342144b87433SJohn Marino 	}
342244b87433SJohn Marino       bitset_merge (acceptable, dests_ch[i]);
342344b87433SJohn Marino     }
342444b87433SJohn Marino 
3425*6ea1f93eSDaniel Fojt   if (!__glibc_unlikely (need_word_trtable))
342644b87433SJohn Marino     {
342744b87433SJohn Marino       /* We don't care about whether the following character is a word
342844b87433SJohn Marino 	 character, or we are in a single-byte character set so we can
342944b87433SJohn Marino 	 discern by looking at the character code: allocate a
343044b87433SJohn Marino 	 256-entry transition table.  */
343144b87433SJohn Marino       trtable = state->trtable =
343244b87433SJohn Marino 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3433*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (trtable == NULL))
343444b87433SJohn Marino 	goto out_free;
343544b87433SJohn Marino 
343644b87433SJohn Marino       /* For all characters ch...:  */
343744b87433SJohn Marino       for (i = 0; i < BITSET_WORDS; ++i)
343844b87433SJohn Marino 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
343944b87433SJohn Marino 	     elem;
344044b87433SJohn Marino 	     mask <<= 1, elem >>= 1, ++ch)
3441*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (elem & 1))
344244b87433SJohn Marino 	    {
344344b87433SJohn Marino 	      /* There must be exactly one destination which accepts
344444b87433SJohn Marino 		 character ch.  See group_nodes_into_DFAstates.  */
344544b87433SJohn Marino 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
344644b87433SJohn Marino 		;
344744b87433SJohn Marino 
344844b87433SJohn Marino 	      /* j-th destination accepts the word character ch.  */
344944b87433SJohn Marino 	      if (dfa->word_char[i] & mask)
345044b87433SJohn Marino 		trtable[ch] = dest_states_word[j];
345144b87433SJohn Marino 	      else
345244b87433SJohn Marino 		trtable[ch] = dest_states[j];
345344b87433SJohn Marino 	    }
345444b87433SJohn Marino     }
345544b87433SJohn Marino   else
345644b87433SJohn Marino     {
345744b87433SJohn Marino       /* We care about whether the following character is a word
345844b87433SJohn Marino 	 character, and we are in a multi-byte character set: discern
345944b87433SJohn Marino 	 by looking at the character code: build two 256-entry
346044b87433SJohn Marino 	 transition tables, one starting at trtable[0] and one
346144b87433SJohn Marino 	 starting at trtable[SBC_MAX].  */
346244b87433SJohn Marino       trtable = state->word_trtable =
346344b87433SJohn Marino 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3464*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (trtable == NULL))
346544b87433SJohn Marino 	goto out_free;
346644b87433SJohn Marino 
346744b87433SJohn Marino       /* For all characters ch...:  */
346844b87433SJohn Marino       for (i = 0; i < BITSET_WORDS; ++i)
346944b87433SJohn Marino 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
347044b87433SJohn Marino 	     elem;
347144b87433SJohn Marino 	     mask <<= 1, elem >>= 1, ++ch)
3472*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (elem & 1))
347344b87433SJohn Marino 	    {
347444b87433SJohn Marino 	      /* There must be exactly one destination which accepts
347544b87433SJohn Marino 		 character ch.  See group_nodes_into_DFAstates.  */
347644b87433SJohn Marino 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
347744b87433SJohn Marino 		;
347844b87433SJohn Marino 
347944b87433SJohn Marino 	      /* j-th destination accepts the word character ch.  */
348044b87433SJohn Marino 	      trtable[ch] = dest_states[j];
348144b87433SJohn Marino 	      trtable[ch + SBC_MAX] = dest_states_word[j];
348244b87433SJohn Marino 	    }
348344b87433SJohn Marino     }
348444b87433SJohn Marino 
348544b87433SJohn Marino   /* new line */
348644b87433SJohn Marino   if (bitset_contain (acceptable, NEWLINE_CHAR))
348744b87433SJohn Marino     {
348844b87433SJohn Marino       /* The current state accepts newline character.  */
348944b87433SJohn Marino       for (j = 0; j < ndests; ++j)
349044b87433SJohn Marino 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
349144b87433SJohn Marino 	  {
349244b87433SJohn Marino 	    /* k-th destination accepts newline character.  */
349344b87433SJohn Marino 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
349444b87433SJohn Marino 	    if (need_word_trtable)
349544b87433SJohn Marino 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
349644b87433SJohn Marino 	    /* There must be only one destination which accepts
349744b87433SJohn Marino 	       newline.  See group_nodes_into_DFAstates.  */
349844b87433SJohn Marino 	    break;
349944b87433SJohn Marino 	  }
350044b87433SJohn Marino     }
350144b87433SJohn Marino 
350244b87433SJohn Marino   if (dest_states_malloced)
3503*6ea1f93eSDaniel Fojt     re_free (dest_states);
350444b87433SJohn Marino 
350544b87433SJohn Marino   re_node_set_free (&follows);
350644b87433SJohn Marino   for (i = 0; i < ndests; ++i)
350744b87433SJohn Marino     re_node_set_free (dests_node + i);
350844b87433SJohn Marino 
350944b87433SJohn Marino   if (dests_node_malloced)
3510*6ea1f93eSDaniel Fojt     re_free (dests_alloc);
351144b87433SJohn Marino 
351244b87433SJohn Marino   return true;
351344b87433SJohn Marino }
351444b87433SJohn Marino 
351544b87433SJohn Marino /* Group all nodes belonging to STATE into several destinations.
351644b87433SJohn Marino    Then for all destinations, set the nodes belonging to the destination
351744b87433SJohn Marino    to DESTS_NODE[i] and set the characters accepted by the destination
351844b87433SJohn Marino    to DEST_CH[i].  This function return the number of destinations.  */
351944b87433SJohn Marino 
352044b87433SJohn Marino static Idx
group_nodes_into_DFAstates(const re_dfa_t * dfa,const re_dfastate_t * state,re_node_set * dests_node,bitset_t * dests_ch)352144b87433SJohn Marino group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
352244b87433SJohn Marino 			    re_node_set *dests_node, bitset_t *dests_ch)
352344b87433SJohn Marino {
352444b87433SJohn Marino   reg_errcode_t err;
352544b87433SJohn Marino   bool ok;
352644b87433SJohn Marino   Idx i, j, k;
35274536c563SJohn Marino   Idx ndests; /* Number of the destinations from 'state'.  */
352844b87433SJohn Marino   bitset_t accepts; /* Characters a node can accept.  */
352944b87433SJohn Marino   const re_node_set *cur_nodes = &state->nodes;
353044b87433SJohn Marino   bitset_empty (accepts);
353144b87433SJohn Marino   ndests = 0;
353244b87433SJohn Marino 
35334536c563SJohn Marino   /* For all the nodes belonging to 'state',  */
353444b87433SJohn Marino   for (i = 0; i < cur_nodes->nelem; ++i)
353544b87433SJohn Marino     {
353644b87433SJohn Marino       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
353744b87433SJohn Marino       re_token_type_t type = node->type;
353844b87433SJohn Marino       unsigned int constraint = node->constraint;
353944b87433SJohn Marino 
354044b87433SJohn Marino       /* Enumerate all single byte character this node can accept.  */
354144b87433SJohn Marino       if (type == CHARACTER)
354244b87433SJohn Marino 	bitset_set (accepts, node->opr.c);
354344b87433SJohn Marino       else if (type == SIMPLE_BRACKET)
354444b87433SJohn Marino 	{
354544b87433SJohn Marino 	  bitset_merge (accepts, node->opr.sbcset);
354644b87433SJohn Marino 	}
354744b87433SJohn Marino       else if (type == OP_PERIOD)
354844b87433SJohn Marino 	{
354944b87433SJohn Marino #ifdef RE_ENABLE_I18N
355044b87433SJohn Marino 	  if (dfa->mb_cur_max > 1)
355144b87433SJohn Marino 	    bitset_merge (accepts, dfa->sb_char);
355244b87433SJohn Marino 	  else
355344b87433SJohn Marino #endif
355444b87433SJohn Marino 	    bitset_set_all (accepts);
355544b87433SJohn Marino 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
355644b87433SJohn Marino 	    bitset_clear (accepts, '\n');
355744b87433SJohn Marino 	  if (dfa->syntax & RE_DOT_NOT_NULL)
355844b87433SJohn Marino 	    bitset_clear (accepts, '\0');
355944b87433SJohn Marino 	}
356044b87433SJohn Marino #ifdef RE_ENABLE_I18N
356144b87433SJohn Marino       else if (type == OP_UTF8_PERIOD)
356244b87433SJohn Marino 	{
356344b87433SJohn Marino 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
356444b87433SJohn Marino 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
356544b87433SJohn Marino 	  else
356644b87433SJohn Marino 	    bitset_merge (accepts, utf8_sb_map);
356744b87433SJohn Marino 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
356844b87433SJohn Marino 	    bitset_clear (accepts, '\n');
356944b87433SJohn Marino 	  if (dfa->syntax & RE_DOT_NOT_NULL)
357044b87433SJohn Marino 	    bitset_clear (accepts, '\0');
357144b87433SJohn Marino 	}
357244b87433SJohn Marino #endif
357344b87433SJohn Marino       else
357444b87433SJohn Marino 	continue;
357544b87433SJohn Marino 
35764536c563SJohn Marino       /* Check the 'accepts' and sift the characters which are not
357744b87433SJohn Marino 	 match it the context.  */
357844b87433SJohn Marino       if (constraint)
357944b87433SJohn Marino 	{
358044b87433SJohn Marino 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
358144b87433SJohn Marino 	    {
358244b87433SJohn Marino 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
358344b87433SJohn Marino 	      bitset_empty (accepts);
358444b87433SJohn Marino 	      if (accepts_newline)
358544b87433SJohn Marino 		bitset_set (accepts, NEWLINE_CHAR);
358644b87433SJohn Marino 	      else
358744b87433SJohn Marino 		continue;
358844b87433SJohn Marino 	    }
358944b87433SJohn Marino 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
359044b87433SJohn Marino 	    {
359144b87433SJohn Marino 	      bitset_empty (accepts);
359244b87433SJohn Marino 	      continue;
359344b87433SJohn Marino 	    }
359444b87433SJohn Marino 
359544b87433SJohn Marino 	  if (constraint & NEXT_WORD_CONSTRAINT)
359644b87433SJohn Marino 	    {
359744b87433SJohn Marino 	      bitset_word_t any_set = 0;
359844b87433SJohn Marino 	      if (type == CHARACTER && !node->word_char)
359944b87433SJohn Marino 		{
360044b87433SJohn Marino 		  bitset_empty (accepts);
360144b87433SJohn Marino 		  continue;
360244b87433SJohn Marino 		}
360344b87433SJohn Marino #ifdef RE_ENABLE_I18N
360444b87433SJohn Marino 	      if (dfa->mb_cur_max > 1)
360544b87433SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
360644b87433SJohn Marino 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
360744b87433SJohn Marino 	      else
360844b87433SJohn Marino #endif
360944b87433SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
361044b87433SJohn Marino 		  any_set |= (accepts[j] &= dfa->word_char[j]);
361144b87433SJohn Marino 	      if (!any_set)
361244b87433SJohn Marino 		continue;
361344b87433SJohn Marino 	    }
361444b87433SJohn Marino 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
361544b87433SJohn Marino 	    {
361644b87433SJohn Marino 	      bitset_word_t any_set = 0;
361744b87433SJohn Marino 	      if (type == CHARACTER && node->word_char)
361844b87433SJohn Marino 		{
361944b87433SJohn Marino 		  bitset_empty (accepts);
362044b87433SJohn Marino 		  continue;
362144b87433SJohn Marino 		}
362244b87433SJohn Marino #ifdef RE_ENABLE_I18N
362344b87433SJohn Marino 	      if (dfa->mb_cur_max > 1)
362444b87433SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
362544b87433SJohn Marino 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
362644b87433SJohn Marino 	      else
362744b87433SJohn Marino #endif
362844b87433SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
362944b87433SJohn Marino 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
363044b87433SJohn Marino 	      if (!any_set)
363144b87433SJohn Marino 		continue;
363244b87433SJohn Marino 	    }
363344b87433SJohn Marino 	}
363444b87433SJohn Marino 
36354536c563SJohn Marino       /* Then divide 'accepts' into DFA states, or create a new
363644b87433SJohn Marino 	 state.  Above, we make sure that accepts is not empty.  */
363744b87433SJohn Marino       for (j = 0; j < ndests; ++j)
363844b87433SJohn Marino 	{
363944b87433SJohn Marino 	  bitset_t intersec; /* Intersection sets, see below.  */
364044b87433SJohn Marino 	  bitset_t remains;
364144b87433SJohn Marino 	  /* Flags, see below.  */
364244b87433SJohn Marino 	  bitset_word_t has_intersec, not_subset, not_consumed;
364344b87433SJohn Marino 
364444b87433SJohn Marino 	  /* Optimization, skip if this state doesn't accept the character.  */
364544b87433SJohn Marino 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
364644b87433SJohn Marino 	    continue;
364744b87433SJohn Marino 
36484536c563SJohn Marino 	  /* Enumerate the intersection set of this state and 'accepts'.  */
364944b87433SJohn Marino 	  has_intersec = 0;
365044b87433SJohn Marino 	  for (k = 0; k < BITSET_WORDS; ++k)
365144b87433SJohn Marino 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
365244b87433SJohn Marino 	  /* And skip if the intersection set is empty.  */
365344b87433SJohn Marino 	  if (!has_intersec)
365444b87433SJohn Marino 	    continue;
365544b87433SJohn Marino 
36564536c563SJohn Marino 	  /* Then check if this state is a subset of 'accepts'.  */
365744b87433SJohn Marino 	  not_subset = not_consumed = 0;
365844b87433SJohn Marino 	  for (k = 0; k < BITSET_WORDS; ++k)
365944b87433SJohn Marino 	    {
366044b87433SJohn Marino 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
366144b87433SJohn Marino 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
366244b87433SJohn Marino 	    }
366344b87433SJohn Marino 
36644536c563SJohn Marino 	  /* If this state isn't a subset of 'accepts', create a
36654536c563SJohn Marino 	     new group state, which has the 'remains'. */
366644b87433SJohn Marino 	  if (not_subset)
366744b87433SJohn Marino 	    {
366844b87433SJohn Marino 	      bitset_copy (dests_ch[ndests], remains);
366944b87433SJohn Marino 	      bitset_copy (dests_ch[j], intersec);
367044b87433SJohn Marino 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3671*6ea1f93eSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
367244b87433SJohn Marino 		goto error_return;
367344b87433SJohn Marino 	      ++ndests;
367444b87433SJohn Marino 	    }
367544b87433SJohn Marino 
367644b87433SJohn Marino 	  /* Put the position in the current group. */
367744b87433SJohn Marino 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3678*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (! ok))
367944b87433SJohn Marino 	    goto error_return;
368044b87433SJohn Marino 
368144b87433SJohn Marino 	  /* If all characters are consumed, go to next node. */
368244b87433SJohn Marino 	  if (!not_consumed)
368344b87433SJohn Marino 	    break;
368444b87433SJohn Marino 	}
368544b87433SJohn Marino       /* Some characters remain, create a new group. */
368644b87433SJohn Marino       if (j == ndests)
368744b87433SJohn Marino 	{
368844b87433SJohn Marino 	  bitset_copy (dests_ch[ndests], accepts);
368944b87433SJohn Marino 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3690*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
369144b87433SJohn Marino 	    goto error_return;
369244b87433SJohn Marino 	  ++ndests;
369344b87433SJohn Marino 	  bitset_empty (accepts);
369444b87433SJohn Marino 	}
369544b87433SJohn Marino     }
369644b87433SJohn Marino   return ndests;
369744b87433SJohn Marino  error_return:
369844b87433SJohn Marino   for (j = 0; j < ndests; ++j)
369944b87433SJohn Marino     re_node_set_free (dests_node + j);
3700*6ea1f93eSDaniel Fojt   return -1;
370144b87433SJohn Marino }
370244b87433SJohn Marino 
370344b87433SJohn Marino #ifdef RE_ENABLE_I18N
37044536c563SJohn Marino /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
370544b87433SJohn Marino    Return the number of the bytes the node accepts.
370644b87433SJohn Marino    STR_IDX is the current index of the input string.
370744b87433SJohn Marino 
370844b87433SJohn Marino    This function handles the nodes which can accept one character, or
370944b87433SJohn Marino    one collating element like '.', '[a-z]', opposite to the other nodes
371044b87433SJohn Marino    can only accept one byte.  */
371144b87433SJohn Marino 
3712*6ea1f93eSDaniel Fojt # ifdef _LIBC
3713*6ea1f93eSDaniel Fojt #  include <locale/weight.h>
3714*6ea1f93eSDaniel Fojt # endif
3715*6ea1f93eSDaniel Fojt 
371644b87433SJohn Marino static int
check_node_accept_bytes(const re_dfa_t * dfa,Idx node_idx,const re_string_t * input,Idx str_idx)371744b87433SJohn Marino check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
371844b87433SJohn Marino 			 const re_string_t *input, Idx str_idx)
371944b87433SJohn Marino {
372044b87433SJohn Marino   const re_token_t *node = dfa->nodes + node_idx;
372144b87433SJohn Marino   int char_len, elem_len;
372244b87433SJohn Marino   Idx i;
372344b87433SJohn Marino 
3724*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
372544b87433SJohn Marino     {
372644b87433SJohn Marino       unsigned char c = re_string_byte_at (input, str_idx), d;
3727*6ea1f93eSDaniel Fojt       if (__glibc_likely (c < 0xc2))
372844b87433SJohn Marino 	return 0;
372944b87433SJohn Marino 
373044b87433SJohn Marino       if (str_idx + 2 > input->len)
373144b87433SJohn Marino 	return 0;
373244b87433SJohn Marino 
373344b87433SJohn Marino       d = re_string_byte_at (input, str_idx + 1);
373444b87433SJohn Marino       if (c < 0xe0)
373544b87433SJohn Marino 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
373644b87433SJohn Marino       else if (c < 0xf0)
373744b87433SJohn Marino 	{
373844b87433SJohn Marino 	  char_len = 3;
373944b87433SJohn Marino 	  if (c == 0xe0 && d < 0xa0)
374044b87433SJohn Marino 	    return 0;
374144b87433SJohn Marino 	}
374244b87433SJohn Marino       else if (c < 0xf8)
374344b87433SJohn Marino 	{
374444b87433SJohn Marino 	  char_len = 4;
374544b87433SJohn Marino 	  if (c == 0xf0 && d < 0x90)
374644b87433SJohn Marino 	    return 0;
374744b87433SJohn Marino 	}
374844b87433SJohn Marino       else if (c < 0xfc)
374944b87433SJohn Marino 	{
375044b87433SJohn Marino 	  char_len = 5;
375144b87433SJohn Marino 	  if (c == 0xf8 && d < 0x88)
375244b87433SJohn Marino 	    return 0;
375344b87433SJohn Marino 	}
375444b87433SJohn Marino       else if (c < 0xfe)
375544b87433SJohn Marino 	{
375644b87433SJohn Marino 	  char_len = 6;
375744b87433SJohn Marino 	  if (c == 0xfc && d < 0x84)
375844b87433SJohn Marino 	    return 0;
375944b87433SJohn Marino 	}
376044b87433SJohn Marino       else
376144b87433SJohn Marino 	return 0;
376244b87433SJohn Marino 
376344b87433SJohn Marino       if (str_idx + char_len > input->len)
376444b87433SJohn Marino 	return 0;
376544b87433SJohn Marino 
376644b87433SJohn Marino       for (i = 1; i < char_len; ++i)
376744b87433SJohn Marino 	{
376844b87433SJohn Marino 	  d = re_string_byte_at (input, str_idx + i);
376944b87433SJohn Marino 	  if (d < 0x80 || d > 0xbf)
377044b87433SJohn Marino 	    return 0;
377144b87433SJohn Marino 	}
377244b87433SJohn Marino       return char_len;
377344b87433SJohn Marino     }
377444b87433SJohn Marino 
377544b87433SJohn Marino   char_len = re_string_char_size_at (input, str_idx);
377644b87433SJohn Marino   if (node->type == OP_PERIOD)
377744b87433SJohn Marino     {
377844b87433SJohn Marino       if (char_len <= 1)
377944b87433SJohn Marino 	return 0;
378044b87433SJohn Marino       /* FIXME: I don't think this if is needed, as both '\n'
378144b87433SJohn Marino 	 and '\0' are char_len == 1.  */
378244b87433SJohn Marino       /* '.' accepts any one character except the following two cases.  */
378344b87433SJohn Marino       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
378444b87433SJohn Marino 	   re_string_byte_at (input, str_idx) == '\n') ||
378544b87433SJohn Marino 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
378644b87433SJohn Marino 	   re_string_byte_at (input, str_idx) == '\0'))
378744b87433SJohn Marino 	return 0;
378844b87433SJohn Marino       return char_len;
378944b87433SJohn Marino     }
379044b87433SJohn Marino 
379144b87433SJohn Marino   elem_len = re_string_elem_size_at (input, str_idx);
379244b87433SJohn Marino   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
379344b87433SJohn Marino     return 0;
379444b87433SJohn Marino 
379544b87433SJohn Marino   if (node->type == COMPLEX_BRACKET)
379644b87433SJohn Marino     {
379744b87433SJohn Marino       const re_charset_t *cset = node->opr.mbcset;
379844b87433SJohn Marino # ifdef _LIBC
379944b87433SJohn Marino       const unsigned char *pin
380044b87433SJohn Marino 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
380144b87433SJohn Marino       Idx j;
380244b87433SJohn Marino       uint32_t nrules;
380344b87433SJohn Marino # endif /* _LIBC */
380444b87433SJohn Marino       int match_len = 0;
380544b87433SJohn Marino       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
380644b87433SJohn Marino 		    ? re_string_wchar_at (input, str_idx) : 0);
380744b87433SJohn Marino 
380844b87433SJohn Marino       /* match with multibyte character?  */
380944b87433SJohn Marino       for (i = 0; i < cset->nmbchars; ++i)
381044b87433SJohn Marino 	if (wc == cset->mbchars[i])
381144b87433SJohn Marino 	  {
381244b87433SJohn Marino 	    match_len = char_len;
381344b87433SJohn Marino 	    goto check_node_accept_bytes_match;
381444b87433SJohn Marino 	  }
381544b87433SJohn Marino       /* match with character_class?  */
381644b87433SJohn Marino       for (i = 0; i < cset->nchar_classes; ++i)
381744b87433SJohn Marino 	{
381844b87433SJohn Marino 	  wctype_t wt = cset->char_classes[i];
381944b87433SJohn Marino 	  if (__iswctype (wc, wt))
382044b87433SJohn Marino 	    {
382144b87433SJohn Marino 	      match_len = char_len;
382244b87433SJohn Marino 	      goto check_node_accept_bytes_match;
382344b87433SJohn Marino 	    }
382444b87433SJohn Marino 	}
382544b87433SJohn Marino 
382644b87433SJohn Marino # ifdef _LIBC
382744b87433SJohn Marino       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
382844b87433SJohn Marino       if (nrules != 0)
382944b87433SJohn Marino 	{
383044b87433SJohn Marino 	  unsigned int in_collseq = 0;
383144b87433SJohn Marino 	  const int32_t *table, *indirect;
383244b87433SJohn Marino 	  const unsigned char *weights, *extra;
383344b87433SJohn Marino 	  const char *collseqwc;
383444b87433SJohn Marino 
383544b87433SJohn Marino 	  /* match with collating_symbol?  */
383644b87433SJohn Marino 	  if (cset->ncoll_syms)
383744b87433SJohn Marino 	    extra = (const unsigned char *)
383844b87433SJohn Marino 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
383944b87433SJohn Marino 	  for (i = 0; i < cset->ncoll_syms; ++i)
384044b87433SJohn Marino 	    {
384144b87433SJohn Marino 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
384244b87433SJohn Marino 	      /* Compare the length of input collating element and
384344b87433SJohn Marino 		 the length of current collating element.  */
384444b87433SJohn Marino 	      if (*coll_sym != elem_len)
384544b87433SJohn Marino 		continue;
384644b87433SJohn Marino 	      /* Compare each bytes.  */
384744b87433SJohn Marino 	      for (j = 0; j < *coll_sym; j++)
384844b87433SJohn Marino 		if (pin[j] != coll_sym[1 + j])
384944b87433SJohn Marino 		  break;
385044b87433SJohn Marino 	      if (j == *coll_sym)
385144b87433SJohn Marino 		{
385244b87433SJohn Marino 		  /* Match if every bytes is equal.  */
385344b87433SJohn Marino 		  match_len = j;
385444b87433SJohn Marino 		  goto check_node_accept_bytes_match;
385544b87433SJohn Marino 		}
385644b87433SJohn Marino 	    }
385744b87433SJohn Marino 
385844b87433SJohn Marino 	  if (cset->nranges)
385944b87433SJohn Marino 	    {
386044b87433SJohn Marino 	      if (elem_len <= char_len)
386144b87433SJohn Marino 		{
386244b87433SJohn Marino 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
386344b87433SJohn Marino 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
386444b87433SJohn Marino 		}
386544b87433SJohn Marino 	      else
386644b87433SJohn Marino 		in_collseq = find_collation_sequence_value (pin, elem_len);
386744b87433SJohn Marino 	    }
386844b87433SJohn Marino 	  /* match with range expression?  */
38694536c563SJohn Marino 	  /* FIXME: Implement rational ranges here, too.  */
387044b87433SJohn Marino 	  for (i = 0; i < cset->nranges; ++i)
387144b87433SJohn Marino 	    if (cset->range_starts[i] <= in_collseq
387244b87433SJohn Marino 		&& in_collseq <= cset->range_ends[i])
387344b87433SJohn Marino 	      {
387444b87433SJohn Marino 		match_len = elem_len;
387544b87433SJohn Marino 		goto check_node_accept_bytes_match;
387644b87433SJohn Marino 	      }
387744b87433SJohn Marino 
387844b87433SJohn Marino 	  /* match with equivalence_class?  */
387944b87433SJohn Marino 	  if (cset->nequiv_classes)
388044b87433SJohn Marino 	    {
388144b87433SJohn Marino 	      const unsigned char *cp = pin;
388244b87433SJohn Marino 	      table = (const int32_t *)
388344b87433SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
388444b87433SJohn Marino 	      weights = (const unsigned char *)
388544b87433SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
388644b87433SJohn Marino 	      extra = (const unsigned char *)
388744b87433SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
388844b87433SJohn Marino 	      indirect = (const int32_t *)
388944b87433SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3890*6ea1f93eSDaniel Fojt 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3891*6ea1f93eSDaniel Fojt 	      int32_t rule = idx >> 24;
3892*6ea1f93eSDaniel Fojt 	      idx &= 0xffffff;
389344b87433SJohn Marino 	      if (idx > 0)
3894*6ea1f93eSDaniel Fojt 		{
3895*6ea1f93eSDaniel Fojt 		  size_t weight_len = weights[idx];
389644b87433SJohn Marino 		  for (i = 0; i < cset->nequiv_classes; ++i)
389744b87433SJohn Marino 		    {
389844b87433SJohn Marino 		      int32_t equiv_class_idx = cset->equiv_classes[i];
3899*6ea1f93eSDaniel Fojt 		      int32_t equiv_class_rule = equiv_class_idx >> 24;
390044b87433SJohn Marino 		      equiv_class_idx &= 0xffffff;
3901*6ea1f93eSDaniel Fojt 		      if (weights[equiv_class_idx] == weight_len
3902*6ea1f93eSDaniel Fojt 			  && equiv_class_rule == rule
3903*6ea1f93eSDaniel Fojt 			  && memcmp (weights + idx + 1,
3904*6ea1f93eSDaniel Fojt 				     weights + equiv_class_idx + 1,
3905*6ea1f93eSDaniel Fojt 				     weight_len) == 0)
390644b87433SJohn Marino 			{
390744b87433SJohn Marino 			  match_len = elem_len;
390844b87433SJohn Marino 			  goto check_node_accept_bytes_match;
390944b87433SJohn Marino 			}
391044b87433SJohn Marino 		    }
391144b87433SJohn Marino 		}
391244b87433SJohn Marino 	    }
391344b87433SJohn Marino 	}
391444b87433SJohn Marino       else
391544b87433SJohn Marino # endif /* _LIBC */
391644b87433SJohn Marino 	{
391744b87433SJohn Marino 	  /* match with range expression?  */
391844b87433SJohn Marino 	  for (i = 0; i < cset->nranges; ++i)
391944b87433SJohn Marino 	    {
39204536c563SJohn Marino 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
392144b87433SJohn Marino 		{
392244b87433SJohn Marino 		  match_len = char_len;
392344b87433SJohn Marino 		  goto check_node_accept_bytes_match;
392444b87433SJohn Marino 		}
392544b87433SJohn Marino 	    }
392644b87433SJohn Marino 	}
392744b87433SJohn Marino     check_node_accept_bytes_match:
392844b87433SJohn Marino       if (!cset->non_match)
392944b87433SJohn Marino 	return match_len;
393044b87433SJohn Marino       else
393144b87433SJohn Marino 	{
393244b87433SJohn Marino 	  if (match_len > 0)
393344b87433SJohn Marino 	    return 0;
393444b87433SJohn Marino 	  else
393544b87433SJohn Marino 	    return (elem_len > char_len) ? elem_len : char_len;
393644b87433SJohn Marino 	}
393744b87433SJohn Marino     }
393844b87433SJohn Marino   return 0;
393944b87433SJohn Marino }
394044b87433SJohn Marino 
394144b87433SJohn Marino # ifdef _LIBC
394244b87433SJohn Marino static unsigned int
find_collation_sequence_value(const unsigned char * mbs,size_t mbs_len)394344b87433SJohn Marino find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
394444b87433SJohn Marino {
394544b87433SJohn Marino   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
394644b87433SJohn Marino   if (nrules == 0)
394744b87433SJohn Marino     {
394844b87433SJohn Marino       if (mbs_len == 1)
394944b87433SJohn Marino 	{
395044b87433SJohn Marino 	  /* No valid character.  Match it as a single byte character.  */
395144b87433SJohn Marino 	  const unsigned char *collseq = (const unsigned char *)
395244b87433SJohn Marino 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
395344b87433SJohn Marino 	  return collseq[mbs[0]];
395444b87433SJohn Marino 	}
395544b87433SJohn Marino       return UINT_MAX;
395644b87433SJohn Marino     }
395744b87433SJohn Marino   else
395844b87433SJohn Marino     {
395944b87433SJohn Marino       int32_t idx;
396044b87433SJohn Marino       const unsigned char *extra = (const unsigned char *)
396144b87433SJohn Marino 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
396244b87433SJohn Marino       int32_t extrasize = (const unsigned char *)
396344b87433SJohn Marino 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
396444b87433SJohn Marino 
396544b87433SJohn Marino       for (idx = 0; idx < extrasize;)
396644b87433SJohn Marino 	{
396744b87433SJohn Marino 	  int mbs_cnt;
396844b87433SJohn Marino 	  bool found = false;
396944b87433SJohn Marino 	  int32_t elem_mbs_len;
397044b87433SJohn Marino 	  /* Skip the name of collating element name.  */
397144b87433SJohn Marino 	  idx = idx + extra[idx] + 1;
397244b87433SJohn Marino 	  elem_mbs_len = extra[idx++];
397344b87433SJohn Marino 	  if (mbs_len == elem_mbs_len)
397444b87433SJohn Marino 	    {
397544b87433SJohn Marino 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
397644b87433SJohn Marino 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
397744b87433SJohn Marino 		  break;
397844b87433SJohn Marino 	      if (mbs_cnt == elem_mbs_len)
397944b87433SJohn Marino 		/* Found the entry.  */
398044b87433SJohn Marino 		found = true;
398144b87433SJohn Marino 	    }
398244b87433SJohn Marino 	  /* Skip the byte sequence of the collating element.  */
398344b87433SJohn Marino 	  idx += elem_mbs_len;
398444b87433SJohn Marino 	  /* Adjust for the alignment.  */
398544b87433SJohn Marino 	  idx = (idx + 3) & ~3;
398644b87433SJohn Marino 	  /* Skip the collation sequence value.  */
398744b87433SJohn Marino 	  idx += sizeof (uint32_t);
398844b87433SJohn Marino 	  /* Skip the wide char sequence of the collating element.  */
39894536c563SJohn Marino 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
399044b87433SJohn Marino 	  /* If we found the entry, return the sequence value.  */
399144b87433SJohn Marino 	  if (found)
399244b87433SJohn Marino 	    return *(uint32_t *) (extra + idx);
399344b87433SJohn Marino 	  /* Skip the collation sequence value.  */
399444b87433SJohn Marino 	  idx += sizeof (uint32_t);
399544b87433SJohn Marino 	}
399644b87433SJohn Marino       return UINT_MAX;
399744b87433SJohn Marino     }
399844b87433SJohn Marino }
399944b87433SJohn Marino # endif /* _LIBC */
400044b87433SJohn Marino #endif /* RE_ENABLE_I18N */
400144b87433SJohn Marino 
400244b87433SJohn Marino /* Check whether the node accepts the byte which is IDX-th
400344b87433SJohn Marino    byte of the INPUT.  */
400444b87433SJohn Marino 
400544b87433SJohn Marino static bool
check_node_accept(const re_match_context_t * mctx,const re_token_t * node,Idx idx)400644b87433SJohn Marino check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
400744b87433SJohn Marino 		   Idx idx)
400844b87433SJohn Marino {
400944b87433SJohn Marino   unsigned char ch;
401044b87433SJohn Marino   ch = re_string_byte_at (&mctx->input, idx);
401144b87433SJohn Marino   switch (node->type)
401244b87433SJohn Marino     {
401344b87433SJohn Marino     case CHARACTER:
401444b87433SJohn Marino       if (node->opr.c != ch)
401544b87433SJohn Marino         return false;
401644b87433SJohn Marino       break;
401744b87433SJohn Marino 
401844b87433SJohn Marino     case SIMPLE_BRACKET:
401944b87433SJohn Marino       if (!bitset_contain (node->opr.sbcset, ch))
402044b87433SJohn Marino         return false;
402144b87433SJohn Marino       break;
402244b87433SJohn Marino 
402344b87433SJohn Marino #ifdef RE_ENABLE_I18N
402444b87433SJohn Marino     case OP_UTF8_PERIOD:
402544b87433SJohn Marino       if (ch >= ASCII_CHARS)
402644b87433SJohn Marino         return false;
4027*6ea1f93eSDaniel Fojt       FALLTHROUGH;
402844b87433SJohn Marino #endif
402944b87433SJohn Marino     case OP_PERIOD:
403044b87433SJohn Marino       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
403144b87433SJohn Marino 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
403244b87433SJohn Marino 	return false;
403344b87433SJohn Marino       break;
403444b87433SJohn Marino 
403544b87433SJohn Marino     default:
403644b87433SJohn Marino       return false;
403744b87433SJohn Marino     }
403844b87433SJohn Marino 
403944b87433SJohn Marino   if (node->constraint)
404044b87433SJohn Marino     {
404144b87433SJohn Marino       /* The node has constraints.  Check whether the current context
404244b87433SJohn Marino 	 satisfies the constraints.  */
404344b87433SJohn Marino       unsigned int context = re_string_context_at (&mctx->input, idx,
404444b87433SJohn Marino 						   mctx->eflags);
404544b87433SJohn Marino       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
404644b87433SJohn Marino 	return false;
404744b87433SJohn Marino     }
404844b87433SJohn Marino 
404944b87433SJohn Marino   return true;
405044b87433SJohn Marino }
405144b87433SJohn Marino 
405244b87433SJohn Marino /* Extend the buffers, if the buffers have run out.  */
405344b87433SJohn Marino 
405444b87433SJohn Marino static reg_errcode_t
4055*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
extend_buffers(re_match_context_t * mctx,int min_len)40564536c563SJohn Marino extend_buffers (re_match_context_t *mctx, int min_len)
405744b87433SJohn Marino {
405844b87433SJohn Marino   reg_errcode_t ret;
405944b87433SJohn Marino   re_string_t *pstr = &mctx->input;
406044b87433SJohn Marino 
406144b87433SJohn Marino   /* Avoid overflow.  */
4062*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4063*6ea1f93eSDaniel Fojt 			<= pstr->bufs_len))
406444b87433SJohn Marino     return REG_ESPACE;
406544b87433SJohn Marino 
40664536c563SJohn Marino   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
40674536c563SJohn Marino   ret = re_string_realloc_buffers (pstr,
40684536c563SJohn Marino 				   MAX (min_len,
40694536c563SJohn Marino 					MIN (pstr->len, pstr->bufs_len * 2)));
4070*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (ret != REG_NOERROR))
407144b87433SJohn Marino     return ret;
407244b87433SJohn Marino 
407344b87433SJohn Marino   if (mctx->state_log != NULL)
407444b87433SJohn Marino     {
407544b87433SJohn Marino       /* And double the length of state_log.  */
407644b87433SJohn Marino       /* XXX We have no indication of the size of this buffer.  If this
407744b87433SJohn Marino 	 allocation fail we have no indication that the state_log array
407844b87433SJohn Marino 	 does not have the right size.  */
407944b87433SJohn Marino       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
408044b87433SJohn Marino 					      pstr->bufs_len + 1);
4081*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
408244b87433SJohn Marino 	return REG_ESPACE;
408344b87433SJohn Marino       mctx->state_log = new_array;
408444b87433SJohn Marino     }
408544b87433SJohn Marino 
408644b87433SJohn Marino   /* Then reconstruct the buffers.  */
408744b87433SJohn Marino   if (pstr->icase)
408844b87433SJohn Marino     {
408944b87433SJohn Marino #ifdef RE_ENABLE_I18N
409044b87433SJohn Marino       if (pstr->mb_cur_max > 1)
409144b87433SJohn Marino 	{
409244b87433SJohn Marino 	  ret = build_wcs_upper_buffer (pstr);
4093*6ea1f93eSDaniel Fojt 	  if (__glibc_unlikely (ret != REG_NOERROR))
409444b87433SJohn Marino 	    return ret;
409544b87433SJohn Marino 	}
409644b87433SJohn Marino       else
409744b87433SJohn Marino #endif /* RE_ENABLE_I18N  */
409844b87433SJohn Marino 	build_upper_buffer (pstr);
409944b87433SJohn Marino     }
410044b87433SJohn Marino   else
410144b87433SJohn Marino     {
410244b87433SJohn Marino #ifdef RE_ENABLE_I18N
410344b87433SJohn Marino       if (pstr->mb_cur_max > 1)
410444b87433SJohn Marino 	build_wcs_buffer (pstr);
410544b87433SJohn Marino       else
410644b87433SJohn Marino #endif /* RE_ENABLE_I18N  */
410744b87433SJohn Marino 	{
410844b87433SJohn Marino 	  if (pstr->trans != NULL)
410944b87433SJohn Marino 	    re_string_translate_buffer (pstr);
411044b87433SJohn Marino 	}
411144b87433SJohn Marino     }
411244b87433SJohn Marino   return REG_NOERROR;
411344b87433SJohn Marino }
411444b87433SJohn Marino 
411544b87433SJohn Marino 
411644b87433SJohn Marino /* Functions for matching context.  */
411744b87433SJohn Marino 
411844b87433SJohn Marino /* Initialize MCTX.  */
411944b87433SJohn Marino 
412044b87433SJohn Marino static reg_errcode_t
4121*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
match_ctx_init(re_match_context_t * mctx,int eflags,Idx n)412244b87433SJohn Marino match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
412344b87433SJohn Marino {
412444b87433SJohn Marino   mctx->eflags = eflags;
4125*6ea1f93eSDaniel Fojt   mctx->match_last = -1;
412644b87433SJohn Marino   if (n > 0)
412744b87433SJohn Marino     {
412844b87433SJohn Marino       /* Avoid overflow.  */
412944b87433SJohn Marino       size_t max_object_size =
413044b87433SJohn Marino 	MAX (sizeof (struct re_backref_cache_entry),
413144b87433SJohn Marino 	     sizeof (re_sub_match_top_t *));
4132*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
413344b87433SJohn Marino 	return REG_ESPACE;
413444b87433SJohn Marino 
413544b87433SJohn Marino       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
413644b87433SJohn Marino       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4137*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
413844b87433SJohn Marino 	return REG_ESPACE;
413944b87433SJohn Marino     }
414044b87433SJohn Marino   /* Already zero-ed by the caller.
414144b87433SJohn Marino      else
414244b87433SJohn Marino        mctx->bkref_ents = NULL;
414344b87433SJohn Marino      mctx->nbkref_ents = 0;
414444b87433SJohn Marino      mctx->nsub_tops = 0;  */
414544b87433SJohn Marino   mctx->abkref_ents = n;
414644b87433SJohn Marino   mctx->max_mb_elem_len = 1;
414744b87433SJohn Marino   mctx->asub_tops = n;
414844b87433SJohn Marino   return REG_NOERROR;
414944b87433SJohn Marino }
415044b87433SJohn Marino 
415144b87433SJohn Marino /* Clean the entries which depend on the current input in MCTX.
415244b87433SJohn Marino    This function must be invoked when the matcher changes the start index
415344b87433SJohn Marino    of the input, or changes the input string.  */
415444b87433SJohn Marino 
415544b87433SJohn Marino static void
match_ctx_clean(re_match_context_t * mctx)415644b87433SJohn Marino match_ctx_clean (re_match_context_t *mctx)
415744b87433SJohn Marino {
415844b87433SJohn Marino   Idx st_idx;
415944b87433SJohn Marino   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
416044b87433SJohn Marino     {
416144b87433SJohn Marino       Idx sl_idx;
416244b87433SJohn Marino       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
416344b87433SJohn Marino       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
416444b87433SJohn Marino 	{
416544b87433SJohn Marino 	  re_sub_match_last_t *last = top->lasts[sl_idx];
416644b87433SJohn Marino 	  re_free (last->path.array);
416744b87433SJohn Marino 	  re_free (last);
416844b87433SJohn Marino 	}
416944b87433SJohn Marino       re_free (top->lasts);
417044b87433SJohn Marino       if (top->path)
417144b87433SJohn Marino 	{
417244b87433SJohn Marino 	  re_free (top->path->array);
417344b87433SJohn Marino 	  re_free (top->path);
417444b87433SJohn Marino 	}
4175*6ea1f93eSDaniel Fojt       re_free (top);
417644b87433SJohn Marino     }
417744b87433SJohn Marino 
417844b87433SJohn Marino   mctx->nsub_tops = 0;
417944b87433SJohn Marino   mctx->nbkref_ents = 0;
418044b87433SJohn Marino }
418144b87433SJohn Marino 
418244b87433SJohn Marino /* Free all the memory associated with MCTX.  */
418344b87433SJohn Marino 
418444b87433SJohn Marino static void
match_ctx_free(re_match_context_t * mctx)418544b87433SJohn Marino match_ctx_free (re_match_context_t *mctx)
418644b87433SJohn Marino {
418744b87433SJohn Marino   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
418844b87433SJohn Marino   match_ctx_clean (mctx);
418944b87433SJohn Marino   re_free (mctx->sub_tops);
419044b87433SJohn Marino   re_free (mctx->bkref_ents);
419144b87433SJohn Marino }
419244b87433SJohn Marino 
419344b87433SJohn Marino /* Add a new backreference entry to MCTX.
419444b87433SJohn Marino    Note that we assume that caller never call this function with duplicate
419544b87433SJohn Marino    entry, and call with STR_IDX which isn't smaller than any existing entry.
419644b87433SJohn Marino */
419744b87433SJohn Marino 
419844b87433SJohn Marino static reg_errcode_t
4199*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
match_ctx_add_entry(re_match_context_t * mctx,Idx node,Idx str_idx,Idx from,Idx to)420044b87433SJohn Marino match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
420144b87433SJohn Marino 		     Idx to)
420244b87433SJohn Marino {
420344b87433SJohn Marino   if (mctx->nbkref_ents >= mctx->abkref_ents)
420444b87433SJohn Marino     {
420544b87433SJohn Marino       struct re_backref_cache_entry* new_entry;
420644b87433SJohn Marino       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
420744b87433SJohn Marino 			      mctx->abkref_ents * 2);
4208*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (new_entry == NULL))
420944b87433SJohn Marino 	{
421044b87433SJohn Marino 	  re_free (mctx->bkref_ents);
421144b87433SJohn Marino 	  return REG_ESPACE;
421244b87433SJohn Marino 	}
421344b87433SJohn Marino       mctx->bkref_ents = new_entry;
421444b87433SJohn Marino       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
421544b87433SJohn Marino 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
421644b87433SJohn Marino       mctx->abkref_ents *= 2;
421744b87433SJohn Marino     }
421844b87433SJohn Marino   if (mctx->nbkref_ents > 0
421944b87433SJohn Marino       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
422044b87433SJohn Marino     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
422144b87433SJohn Marino 
422244b87433SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].node = node;
422344b87433SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
422444b87433SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
422544b87433SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
422644b87433SJohn Marino 
422744b87433SJohn Marino   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
422844b87433SJohn Marino      If bit N is clear, means that this entry won't epsilon-transition to
422944b87433SJohn Marino      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
423044b87433SJohn Marino      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
423144b87433SJohn Marino      such node.
423244b87433SJohn Marino 
423344b87433SJohn Marino      A backreference does not epsilon-transition unless it is empty, so set
423444b87433SJohn Marino      to all zeros if FROM != TO.  */
423544b87433SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
423644b87433SJohn Marino     = (from == to ? -1 : 0);
423744b87433SJohn Marino 
423844b87433SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
423944b87433SJohn Marino   if (mctx->max_mb_elem_len < to - from)
424044b87433SJohn Marino     mctx->max_mb_elem_len = to - from;
424144b87433SJohn Marino   return REG_NOERROR;
424244b87433SJohn Marino }
424344b87433SJohn Marino 
4244*6ea1f93eSDaniel Fojt /* Return the first entry with the same str_idx, or -1 if none is
424544b87433SJohn Marino    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
424644b87433SJohn Marino 
424744b87433SJohn Marino static Idx
search_cur_bkref_entry(const re_match_context_t * mctx,Idx str_idx)424844b87433SJohn Marino search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
424944b87433SJohn Marino {
425044b87433SJohn Marino   Idx left, right, mid, last;
425144b87433SJohn Marino   last = right = mctx->nbkref_ents;
425244b87433SJohn Marino   for (left = 0; left < right;)
425344b87433SJohn Marino     {
425444b87433SJohn Marino       mid = (left + right) / 2;
425544b87433SJohn Marino       if (mctx->bkref_ents[mid].str_idx < str_idx)
425644b87433SJohn Marino 	left = mid + 1;
425744b87433SJohn Marino       else
425844b87433SJohn Marino 	right = mid;
425944b87433SJohn Marino     }
426044b87433SJohn Marino   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
426144b87433SJohn Marino     return left;
426244b87433SJohn Marino   else
4263*6ea1f93eSDaniel Fojt     return -1;
426444b87433SJohn Marino }
426544b87433SJohn Marino 
426644b87433SJohn Marino /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
426744b87433SJohn Marino    at STR_IDX.  */
426844b87433SJohn Marino 
426944b87433SJohn Marino static reg_errcode_t
4270*6ea1f93eSDaniel Fojt __attribute_warn_unused_result__
match_ctx_add_subtop(re_match_context_t * mctx,Idx node,Idx str_idx)427144b87433SJohn Marino match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
427244b87433SJohn Marino {
427344b87433SJohn Marino #ifdef DEBUG
427444b87433SJohn Marino   assert (mctx->sub_tops != NULL);
427544b87433SJohn Marino   assert (mctx->asub_tops > 0);
427644b87433SJohn Marino #endif
4277*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
427844b87433SJohn Marino     {
427944b87433SJohn Marino       Idx new_asub_tops = mctx->asub_tops * 2;
428044b87433SJohn Marino       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
428144b87433SJohn Marino 						   re_sub_match_top_t *,
428244b87433SJohn Marino 						   new_asub_tops);
4283*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
428444b87433SJohn Marino 	return REG_ESPACE;
428544b87433SJohn Marino       mctx->sub_tops = new_array;
428644b87433SJohn Marino       mctx->asub_tops = new_asub_tops;
428744b87433SJohn Marino     }
428844b87433SJohn Marino   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4289*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
429044b87433SJohn Marino     return REG_ESPACE;
429144b87433SJohn Marino   mctx->sub_tops[mctx->nsub_tops]->node = node;
429244b87433SJohn Marino   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
429344b87433SJohn Marino   return REG_NOERROR;
429444b87433SJohn Marino }
429544b87433SJohn Marino 
429644b87433SJohn Marino /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
429744b87433SJohn Marino    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
429844b87433SJohn Marino 
429944b87433SJohn Marino static re_sub_match_last_t *
match_ctx_add_sublast(re_sub_match_top_t * subtop,Idx node,Idx str_idx)430044b87433SJohn Marino match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
430144b87433SJohn Marino {
430244b87433SJohn Marino   re_sub_match_last_t *new_entry;
4303*6ea1f93eSDaniel Fojt   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
430444b87433SJohn Marino     {
430544b87433SJohn Marino       Idx new_alasts = 2 * subtop->alasts + 1;
430644b87433SJohn Marino       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
430744b87433SJohn Marino 						    re_sub_match_last_t *,
430844b87433SJohn Marino 						    new_alasts);
4309*6ea1f93eSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
431044b87433SJohn Marino 	return NULL;
431144b87433SJohn Marino       subtop->lasts = new_array;
431244b87433SJohn Marino       subtop->alasts = new_alasts;
431344b87433SJohn Marino     }
431444b87433SJohn Marino   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4315*6ea1f93eSDaniel Fojt   if (__glibc_likely (new_entry != NULL))
431644b87433SJohn Marino     {
431744b87433SJohn Marino       subtop->lasts[subtop->nlasts] = new_entry;
431844b87433SJohn Marino       new_entry->node = node;
431944b87433SJohn Marino       new_entry->str_idx = str_idx;
432044b87433SJohn Marino       ++subtop->nlasts;
432144b87433SJohn Marino     }
432244b87433SJohn Marino   return new_entry;
432344b87433SJohn Marino }
432444b87433SJohn Marino 
432544b87433SJohn Marino static void
sift_ctx_init(re_sift_context_t * sctx,re_dfastate_t ** sifted_sts,re_dfastate_t ** limited_sts,Idx last_node,Idx last_str_idx)432644b87433SJohn Marino sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
432744b87433SJohn Marino 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
432844b87433SJohn Marino {
432944b87433SJohn Marino   sctx->sifted_states = sifted_sts;
433044b87433SJohn Marino   sctx->limited_states = limited_sts;
433144b87433SJohn Marino   sctx->last_node = last_node;
433244b87433SJohn Marino   sctx->last_str_idx = last_str_idx;
433344b87433SJohn Marino   re_node_set_init_empty (&sctx->limits);
433444b87433SJohn Marino }
4335