xref: /dragonfly/contrib/grep/lib/regex_internal.c (revision 09d4459f)
195b7b453SJohn Marino /* Extended regular expression matching and search library.
2*09d4459fSDaniel Fojt    Copyright (C) 2002-2020 Free Software Foundation, Inc.
395b7b453SJohn Marino    This file is part of the GNU C Library.
495b7b453SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
595b7b453SJohn Marino 
6680a9cb8SJohn Marino    The GNU C Library is free software; you can redistribute it and/or
7680a9cb8SJohn Marino    modify it under the terms of the GNU General Public
8680a9cb8SJohn Marino    License as published by the Free Software Foundation; either
9680a9cb8SJohn Marino    version 3 of the License, or (at your option) any later version.
1095b7b453SJohn Marino 
11680a9cb8SJohn Marino    The GNU C Library is distributed in the hope that it will be useful,
1295b7b453SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
13680a9cb8SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14680a9cb8SJohn Marino    General Public License for more details.
1595b7b453SJohn Marino 
16680a9cb8SJohn Marino    You should have received a copy of the GNU General Public
17680a9cb8SJohn Marino    License along with the GNU C Library; if not, see
18*09d4459fSDaniel Fojt    <https://www.gnu.org/licenses/>.  */
1995b7b453SJohn Marino 
2095b7b453SJohn Marino static void re_string_construct_common (const char *str, Idx len,
2195b7b453SJohn Marino 					re_string_t *pstr,
2295b7b453SJohn Marino 					RE_TRANSLATE_TYPE trans, bool icase,
23*09d4459fSDaniel Fojt 					const re_dfa_t *dfa);
2495b7b453SJohn Marino static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
2595b7b453SJohn Marino 					  const re_node_set *nodes,
26*09d4459fSDaniel Fojt 					  re_hashval_t hash);
2795b7b453SJohn Marino static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
2895b7b453SJohn Marino 					  const re_node_set *nodes,
2995b7b453SJohn Marino 					  unsigned int context,
30*09d4459fSDaniel Fojt 					  re_hashval_t hash);
31*09d4459fSDaniel Fojt static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32*09d4459fSDaniel Fojt 						Idx new_buf_len);
33*09d4459fSDaniel Fojt #ifdef RE_ENABLE_I18N
34*09d4459fSDaniel Fojt static void build_wcs_buffer (re_string_t *pstr);
35*09d4459fSDaniel Fojt static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
36*09d4459fSDaniel Fojt #endif /* RE_ENABLE_I18N */
37*09d4459fSDaniel Fojt static void build_upper_buffer (re_string_t *pstr);
38*09d4459fSDaniel Fojt static void re_string_translate_buffer (re_string_t *pstr);
39*09d4459fSDaniel Fojt static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
40*09d4459fSDaniel Fojt 					  int eflags) __attribute__ ((pure));
4195b7b453SJohn Marino 
4295b7b453SJohn Marino /* Functions for string operation.  */
4395b7b453SJohn Marino 
4495b7b453SJohn Marino /* This function allocate the buffers.  It is necessary to call
4595b7b453SJohn Marino    re_string_reconstruct before using the object.  */
4695b7b453SJohn Marino 
4795b7b453SJohn Marino static reg_errcode_t
48*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_string_allocate(re_string_t * pstr,const char * str,Idx len,Idx init_len,RE_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)4995b7b453SJohn Marino re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
5095b7b453SJohn Marino 		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
5195b7b453SJohn Marino {
5295b7b453SJohn Marino   reg_errcode_t ret;
5395b7b453SJohn Marino   Idx init_buf_len;
5495b7b453SJohn Marino 
5595b7b453SJohn Marino   /* Ensure at least one character fits into the buffers.  */
5695b7b453SJohn Marino   if (init_len < dfa->mb_cur_max)
5795b7b453SJohn Marino     init_len = dfa->mb_cur_max;
5895b7b453SJohn Marino   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
5995b7b453SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
6095b7b453SJohn Marino 
6195b7b453SJohn Marino   ret = re_string_realloc_buffers (pstr, init_buf_len);
62*09d4459fSDaniel Fojt   if (__glibc_unlikely (ret != REG_NOERROR))
6395b7b453SJohn Marino     return ret;
6495b7b453SJohn Marino 
6595b7b453SJohn Marino   pstr->word_char = dfa->word_char;
6695b7b453SJohn Marino   pstr->word_ops_used = dfa->word_ops_used;
6795b7b453SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
6895b7b453SJohn Marino   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
6995b7b453SJohn Marino   pstr->valid_raw_len = pstr->valid_len;
7095b7b453SJohn Marino   return REG_NOERROR;
7195b7b453SJohn Marino }
7295b7b453SJohn Marino 
7395b7b453SJohn Marino /* This function allocate the buffers, and initialize them.  */
7495b7b453SJohn Marino 
7595b7b453SJohn Marino static reg_errcode_t
76*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_string_construct(re_string_t * pstr,const char * str,Idx len,RE_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)7795b7b453SJohn Marino re_string_construct (re_string_t *pstr, const char *str, Idx len,
7895b7b453SJohn Marino 		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
7995b7b453SJohn Marino {
8095b7b453SJohn Marino   reg_errcode_t ret;
8195b7b453SJohn Marino   memset (pstr, '\0', sizeof (re_string_t));
8295b7b453SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
8395b7b453SJohn Marino 
8495b7b453SJohn Marino   if (len > 0)
8595b7b453SJohn Marino     {
8695b7b453SJohn Marino       ret = re_string_realloc_buffers (pstr, len + 1);
87*09d4459fSDaniel Fojt       if (__glibc_unlikely (ret != REG_NOERROR))
8895b7b453SJohn Marino 	return ret;
8995b7b453SJohn Marino     }
9095b7b453SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
9195b7b453SJohn Marino 
9295b7b453SJohn Marino   if (icase)
9395b7b453SJohn Marino     {
9495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
9595b7b453SJohn Marino       if (dfa->mb_cur_max > 1)
9695b7b453SJohn Marino 	{
9795b7b453SJohn Marino 	  while (1)
9895b7b453SJohn Marino 	    {
9995b7b453SJohn Marino 	      ret = build_wcs_upper_buffer (pstr);
100*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (ret != REG_NOERROR))
10195b7b453SJohn Marino 		return ret;
10295b7b453SJohn Marino 	      if (pstr->valid_raw_len >= len)
10395b7b453SJohn Marino 		break;
10495b7b453SJohn Marino 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
10595b7b453SJohn Marino 		break;
10695b7b453SJohn Marino 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
107*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (ret != REG_NOERROR))
10895b7b453SJohn Marino 		return ret;
10995b7b453SJohn Marino 	    }
11095b7b453SJohn Marino 	}
11195b7b453SJohn Marino       else
11295b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
11395b7b453SJohn Marino 	build_upper_buffer (pstr);
11495b7b453SJohn Marino     }
11595b7b453SJohn Marino   else
11695b7b453SJohn Marino     {
11795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
11895b7b453SJohn Marino       if (dfa->mb_cur_max > 1)
11995b7b453SJohn Marino 	build_wcs_buffer (pstr);
12095b7b453SJohn Marino       else
12195b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
12295b7b453SJohn Marino 	{
12395b7b453SJohn Marino 	  if (trans != NULL)
12495b7b453SJohn Marino 	    re_string_translate_buffer (pstr);
12595b7b453SJohn Marino 	  else
12695b7b453SJohn Marino 	    {
12795b7b453SJohn Marino 	      pstr->valid_len = pstr->bufs_len;
12895b7b453SJohn Marino 	      pstr->valid_raw_len = pstr->bufs_len;
12995b7b453SJohn Marino 	    }
13095b7b453SJohn Marino 	}
13195b7b453SJohn Marino     }
13295b7b453SJohn Marino 
13395b7b453SJohn Marino   return REG_NOERROR;
13495b7b453SJohn Marino }
13595b7b453SJohn Marino 
13695b7b453SJohn Marino /* Helper functions for re_string_allocate, and re_string_construct.  */
13795b7b453SJohn Marino 
13895b7b453SJohn Marino static reg_errcode_t
139*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_string_realloc_buffers(re_string_t * pstr,Idx new_buf_len)14095b7b453SJohn Marino re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
14195b7b453SJohn Marino {
14295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
14395b7b453SJohn Marino   if (pstr->mb_cur_max > 1)
14495b7b453SJohn Marino     {
14595b7b453SJohn Marino       wint_t *new_wcs;
14695b7b453SJohn Marino 
147cf28ed85SJohn Marino       /* Avoid overflow in realloc.  */
148cf28ed85SJohn Marino       const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
149*09d4459fSDaniel Fojt       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
150*09d4459fSDaniel Fojt 			    < new_buf_len))
15195b7b453SJohn Marino 	return REG_ESPACE;
15295b7b453SJohn Marino 
15395b7b453SJohn Marino       new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
154*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_wcs == NULL))
15595b7b453SJohn Marino 	return REG_ESPACE;
15695b7b453SJohn Marino       pstr->wcs = new_wcs;
15795b7b453SJohn Marino       if (pstr->offsets != NULL)
15895b7b453SJohn Marino 	{
15995b7b453SJohn Marino 	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
160*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (new_offsets == NULL))
16195b7b453SJohn Marino 	    return REG_ESPACE;
16295b7b453SJohn Marino 	  pstr->offsets = new_offsets;
16395b7b453SJohn Marino 	}
16495b7b453SJohn Marino     }
16595b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
16695b7b453SJohn Marino   if (pstr->mbs_allocated)
16795b7b453SJohn Marino     {
16895b7b453SJohn Marino       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
16995b7b453SJohn Marino 					   new_buf_len);
170*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_mbs == NULL))
17195b7b453SJohn Marino 	return REG_ESPACE;
17295b7b453SJohn Marino       pstr->mbs = new_mbs;
17395b7b453SJohn Marino     }
17495b7b453SJohn Marino   pstr->bufs_len = new_buf_len;
17595b7b453SJohn Marino   return REG_NOERROR;
17695b7b453SJohn Marino }
17795b7b453SJohn Marino 
17895b7b453SJohn Marino 
17995b7b453SJohn Marino static void
re_string_construct_common(const char * str,Idx len,re_string_t * pstr,RE_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)18095b7b453SJohn Marino re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
18195b7b453SJohn Marino 			    RE_TRANSLATE_TYPE trans, bool icase,
18295b7b453SJohn Marino 			    const re_dfa_t *dfa)
18395b7b453SJohn Marino {
18495b7b453SJohn Marino   pstr->raw_mbs = (const unsigned char *) str;
18595b7b453SJohn Marino   pstr->len = len;
18695b7b453SJohn Marino   pstr->raw_len = len;
18795b7b453SJohn Marino   pstr->trans = trans;
18895b7b453SJohn Marino   pstr->icase = icase;
18995b7b453SJohn Marino   pstr->mbs_allocated = (trans != NULL || icase);
19095b7b453SJohn Marino   pstr->mb_cur_max = dfa->mb_cur_max;
19195b7b453SJohn Marino   pstr->is_utf8 = dfa->is_utf8;
19295b7b453SJohn Marino   pstr->map_notascii = dfa->map_notascii;
19395b7b453SJohn Marino   pstr->stop = pstr->len;
19495b7b453SJohn Marino   pstr->raw_stop = pstr->stop;
19595b7b453SJohn Marino }
19695b7b453SJohn Marino 
19795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
19895b7b453SJohn Marino 
19995b7b453SJohn Marino /* Build wide character buffer PSTR->WCS.
20095b7b453SJohn Marino    If the byte sequence of the string are:
20195b7b453SJohn Marino      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
20295b7b453SJohn Marino    Then wide character buffer will be:
20395b7b453SJohn Marino      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
20495b7b453SJohn Marino    We use WEOF for padding, they indicate that the position isn't
20595b7b453SJohn Marino    a first byte of a multibyte character.
20695b7b453SJohn Marino 
20795b7b453SJohn Marino    Note that this function assumes PSTR->VALID_LEN elements are already
20895b7b453SJohn Marino    built and starts from PSTR->VALID_LEN.  */
20995b7b453SJohn Marino 
21095b7b453SJohn Marino static void
build_wcs_buffer(re_string_t * pstr)21195b7b453SJohn Marino build_wcs_buffer (re_string_t *pstr)
21295b7b453SJohn Marino {
21395b7b453SJohn Marino #ifdef _LIBC
21495b7b453SJohn Marino   unsigned char buf[MB_LEN_MAX];
215*09d4459fSDaniel Fojt   DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max);
21695b7b453SJohn Marino #else
21795b7b453SJohn Marino   unsigned char buf[64];
21895b7b453SJohn Marino #endif
21995b7b453SJohn Marino   mbstate_t prev_st;
22095b7b453SJohn Marino   Idx byte_idx, end_idx, remain_len;
22195b7b453SJohn Marino   size_t mbclen;
22295b7b453SJohn Marino 
22395b7b453SJohn Marino   /* Build the buffers from pstr->valid_len to either pstr->len or
22495b7b453SJohn Marino      pstr->bufs_len.  */
22595b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
22695b7b453SJohn Marino   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
22795b7b453SJohn Marino     {
22895b7b453SJohn Marino       wchar_t wc;
22995b7b453SJohn Marino       const char *p;
23095b7b453SJohn Marino 
23195b7b453SJohn Marino       remain_len = end_idx - byte_idx;
23295b7b453SJohn Marino       prev_st = pstr->cur_state;
23395b7b453SJohn Marino       /* Apply the translation if we need.  */
234*09d4459fSDaniel Fojt       if (__glibc_unlikely (pstr->trans != NULL))
23595b7b453SJohn Marino 	{
23695b7b453SJohn Marino 	  int i, ch;
23795b7b453SJohn Marino 
23895b7b453SJohn Marino 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
23995b7b453SJohn Marino 	    {
24095b7b453SJohn Marino 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
24195b7b453SJohn Marino 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
24295b7b453SJohn Marino 	    }
24395b7b453SJohn Marino 	  p = (const char *) buf;
24495b7b453SJohn Marino 	}
24595b7b453SJohn Marino       else
24695b7b453SJohn Marino 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
24795b7b453SJohn Marino       mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
248*09d4459fSDaniel Fojt       if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0
249*09d4459fSDaniel Fojt 			    || (mbclen == (size_t) -2
250*09d4459fSDaniel Fojt 				&& pstr->bufs_len >= pstr->len)))
25195b7b453SJohn Marino 	{
25295b7b453SJohn Marino 	  /* We treat these cases as a singlebyte character.  */
25395b7b453SJohn Marino 	  mbclen = 1;
25495b7b453SJohn Marino 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
255*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (pstr->trans != NULL))
25695b7b453SJohn Marino 	    wc = pstr->trans[wc];
25795b7b453SJohn Marino 	  pstr->cur_state = prev_st;
25895b7b453SJohn Marino 	}
259*09d4459fSDaniel Fojt       else if (__glibc_unlikely (mbclen == (size_t) -2))
260cf28ed85SJohn Marino 	{
261cf28ed85SJohn Marino 	  /* The buffer doesn't have enough space, finish to build.  */
262cf28ed85SJohn Marino 	  pstr->cur_state = prev_st;
263cf28ed85SJohn Marino 	  break;
264cf28ed85SJohn Marino 	}
26595b7b453SJohn Marino 
26695b7b453SJohn Marino       /* Write wide character and padding.  */
26795b7b453SJohn Marino       pstr->wcs[byte_idx++] = wc;
26895b7b453SJohn Marino       /* Write paddings.  */
26995b7b453SJohn Marino       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
27095b7b453SJohn Marino 	pstr->wcs[byte_idx++] = WEOF;
27195b7b453SJohn Marino     }
27295b7b453SJohn Marino   pstr->valid_len = byte_idx;
27395b7b453SJohn Marino   pstr->valid_raw_len = byte_idx;
27495b7b453SJohn Marino }
27595b7b453SJohn Marino 
27695b7b453SJohn Marino /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
27795b7b453SJohn Marino    but for REG_ICASE.  */
27895b7b453SJohn Marino 
27995b7b453SJohn Marino static reg_errcode_t
280*09d4459fSDaniel Fojt __attribute_warn_unused_result__
build_wcs_upper_buffer(re_string_t * pstr)28195b7b453SJohn Marino build_wcs_upper_buffer (re_string_t *pstr)
28295b7b453SJohn Marino {
28395b7b453SJohn Marino   mbstate_t prev_st;
28495b7b453SJohn Marino   Idx src_idx, byte_idx, end_idx, remain_len;
28595b7b453SJohn Marino   size_t mbclen;
28695b7b453SJohn Marino #ifdef _LIBC
28795b7b453SJohn Marino   char buf[MB_LEN_MAX];
288*09d4459fSDaniel Fojt   DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX);
28995b7b453SJohn Marino #else
29095b7b453SJohn Marino   char buf[64];
29195b7b453SJohn Marino #endif
29295b7b453SJohn Marino 
29395b7b453SJohn Marino   byte_idx = pstr->valid_len;
29495b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
29595b7b453SJohn Marino 
29695b7b453SJohn Marino   /* The following optimization assumes that ASCII characters can be
29795b7b453SJohn Marino      mapped to wide characters with a simple cast.  */
29895b7b453SJohn Marino   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
29995b7b453SJohn Marino     {
30095b7b453SJohn Marino       while (byte_idx < end_idx)
30195b7b453SJohn Marino 	{
30295b7b453SJohn Marino 	  wchar_t wc;
30395b7b453SJohn Marino 
30495b7b453SJohn Marino 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
30595b7b453SJohn Marino 	      && mbsinit (&pstr->cur_state))
30695b7b453SJohn Marino 	    {
30795b7b453SJohn Marino 	      /* In case of a singlebyte character.  */
30895b7b453SJohn Marino 	      pstr->mbs[byte_idx]
30995b7b453SJohn Marino 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
31095b7b453SJohn Marino 	      /* The next step uses the assumption that wchar_t is encoded
31195b7b453SJohn Marino 		 ASCII-safe: all ASCII values can be converted like this.  */
31295b7b453SJohn Marino 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
31395b7b453SJohn Marino 	      ++byte_idx;
31495b7b453SJohn Marino 	      continue;
31595b7b453SJohn Marino 	    }
31695b7b453SJohn Marino 
31795b7b453SJohn Marino 	  remain_len = end_idx - byte_idx;
31895b7b453SJohn Marino 	  prev_st = pstr->cur_state;
31995b7b453SJohn Marino 	  mbclen = __mbrtowc (&wc,
32095b7b453SJohn Marino 			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
32195b7b453SJohn Marino 			       + byte_idx), remain_len, &pstr->cur_state);
322*09d4459fSDaniel Fojt 	  if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
32395b7b453SJohn Marino 	    {
324dc7c36e4SJohn Marino 	      wchar_t wcu = __towupper (wc);
325680a9cb8SJohn Marino 	      if (wcu != wc)
32695b7b453SJohn Marino 		{
32795b7b453SJohn Marino 		  size_t mbcdlen;
32895b7b453SJohn Marino 
329dc7c36e4SJohn Marino 		  mbcdlen = __wcrtomb (buf, wcu, &prev_st);
330*09d4459fSDaniel Fojt 		  if (__glibc_likely (mbclen == mbcdlen))
33195b7b453SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
33295b7b453SJohn Marino 		  else
33395b7b453SJohn Marino 		    {
33495b7b453SJohn Marino 		      src_idx = byte_idx;
33595b7b453SJohn Marino 		      goto offsets_needed;
33695b7b453SJohn Marino 		    }
33795b7b453SJohn Marino 		}
33895b7b453SJohn Marino 	      else
33995b7b453SJohn Marino 		memcpy (pstr->mbs + byte_idx,
34095b7b453SJohn Marino 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
34195b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = wcu;
34295b7b453SJohn Marino 	      /* Write paddings.  */
34395b7b453SJohn Marino 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
34495b7b453SJohn Marino 		pstr->wcs[byte_idx++] = WEOF;
34595b7b453SJohn Marino 	    }
346cf28ed85SJohn Marino 	  else if (mbclen == (size_t) -1 || mbclen == 0
347cf28ed85SJohn Marino 		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
34895b7b453SJohn Marino 	    {
349cf28ed85SJohn Marino 	      /* It is an invalid character, an incomplete character
350cf28ed85SJohn Marino 		 at the end of the string, or '\0'.  Just use the byte.  */
35195b7b453SJohn Marino 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
35295b7b453SJohn Marino 	      pstr->mbs[byte_idx] = ch;
35395b7b453SJohn Marino 	      /* And also cast it to wide char.  */
35495b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
355*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (mbclen == (size_t) -1))
35695b7b453SJohn Marino 		pstr->cur_state = prev_st;
35795b7b453SJohn Marino 	    }
35895b7b453SJohn Marino 	  else
35995b7b453SJohn Marino 	    {
36095b7b453SJohn Marino 	      /* The buffer doesn't have enough space, finish to build.  */
36195b7b453SJohn Marino 	      pstr->cur_state = prev_st;
36295b7b453SJohn Marino 	      break;
36395b7b453SJohn Marino 	    }
36495b7b453SJohn Marino 	}
36595b7b453SJohn Marino       pstr->valid_len = byte_idx;
36695b7b453SJohn Marino       pstr->valid_raw_len = byte_idx;
36795b7b453SJohn Marino       return REG_NOERROR;
36895b7b453SJohn Marino     }
36995b7b453SJohn Marino   else
37095b7b453SJohn Marino     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
37195b7b453SJohn Marino       {
37295b7b453SJohn Marino 	wchar_t wc;
37395b7b453SJohn Marino 	const char *p;
37495b7b453SJohn Marino       offsets_needed:
37595b7b453SJohn Marino 	remain_len = end_idx - byte_idx;
37695b7b453SJohn Marino 	prev_st = pstr->cur_state;
377*09d4459fSDaniel Fojt 	if (__glibc_unlikely (pstr->trans != NULL))
37895b7b453SJohn Marino 	  {
37995b7b453SJohn Marino 	    int i, ch;
38095b7b453SJohn Marino 
38195b7b453SJohn Marino 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
38295b7b453SJohn Marino 	      {
38395b7b453SJohn Marino 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
38495b7b453SJohn Marino 		buf[i] = pstr->trans[ch];
38595b7b453SJohn Marino 	      }
38695b7b453SJohn Marino 	    p = (const char *) buf;
38795b7b453SJohn Marino 	  }
38895b7b453SJohn Marino 	else
38995b7b453SJohn Marino 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
39095b7b453SJohn Marino 	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
391*09d4459fSDaniel Fojt 	if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
39295b7b453SJohn Marino 	  {
393dc7c36e4SJohn Marino 	    wchar_t wcu = __towupper (wc);
394680a9cb8SJohn Marino 	    if (wcu != wc)
39595b7b453SJohn Marino 	      {
39695b7b453SJohn Marino 		size_t mbcdlen;
39795b7b453SJohn Marino 
398*09d4459fSDaniel Fojt 		mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st);
399*09d4459fSDaniel Fojt 		if (__glibc_likely (mbclen == mbcdlen))
40095b7b453SJohn Marino 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
40195b7b453SJohn Marino 		else if (mbcdlen != (size_t) -1)
40295b7b453SJohn Marino 		  {
40395b7b453SJohn Marino 		    size_t i;
40495b7b453SJohn Marino 
40595b7b453SJohn Marino 		    if (byte_idx + mbcdlen > pstr->bufs_len)
40695b7b453SJohn Marino 		      {
40795b7b453SJohn Marino 			pstr->cur_state = prev_st;
40895b7b453SJohn Marino 			break;
40995b7b453SJohn Marino 		      }
41095b7b453SJohn Marino 
41195b7b453SJohn Marino 		    if (pstr->offsets == NULL)
41295b7b453SJohn Marino 		      {
41395b7b453SJohn Marino 			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
41495b7b453SJohn Marino 
41595b7b453SJohn Marino 			if (pstr->offsets == NULL)
41695b7b453SJohn Marino 			  return REG_ESPACE;
41795b7b453SJohn Marino 		      }
41895b7b453SJohn Marino 		    if (!pstr->offsets_needed)
41995b7b453SJohn Marino 		      {
42095b7b453SJohn Marino 			for (i = 0; i < (size_t) byte_idx; ++i)
42195b7b453SJohn Marino 			  pstr->offsets[i] = i;
42295b7b453SJohn Marino 			pstr->offsets_needed = 1;
42395b7b453SJohn Marino 		      }
42495b7b453SJohn Marino 
42595b7b453SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
42695b7b453SJohn Marino 		    pstr->wcs[byte_idx] = wcu;
42795b7b453SJohn Marino 		    pstr->offsets[byte_idx] = src_idx;
42895b7b453SJohn Marino 		    for (i = 1; i < mbcdlen; ++i)
42995b7b453SJohn Marino 		      {
43095b7b453SJohn Marino 			pstr->offsets[byte_idx + i]
43195b7b453SJohn Marino 			  = src_idx + (i < mbclen ? i : mbclen - 1);
43295b7b453SJohn Marino 			pstr->wcs[byte_idx + i] = WEOF;
43395b7b453SJohn Marino 		      }
43495b7b453SJohn Marino 		    pstr->len += mbcdlen - mbclen;
43595b7b453SJohn Marino 		    if (pstr->raw_stop > src_idx)
43695b7b453SJohn Marino 		      pstr->stop += mbcdlen - mbclen;
43795b7b453SJohn Marino 		    end_idx = (pstr->bufs_len > pstr->len)
43895b7b453SJohn Marino 			      ? pstr->len : pstr->bufs_len;
43995b7b453SJohn Marino 		    byte_idx += mbcdlen;
44095b7b453SJohn Marino 		    src_idx += mbclen;
44195b7b453SJohn Marino 		    continue;
44295b7b453SJohn Marino 		  }
44395b7b453SJohn Marino 		else
44495b7b453SJohn Marino 		  memcpy (pstr->mbs + byte_idx, p, mbclen);
44595b7b453SJohn Marino 	      }
44695b7b453SJohn Marino 	    else
44795b7b453SJohn Marino 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
44895b7b453SJohn Marino 
449*09d4459fSDaniel Fojt 	    if (__glibc_unlikely (pstr->offsets_needed != 0))
45095b7b453SJohn Marino 	      {
45195b7b453SJohn Marino 		size_t i;
45295b7b453SJohn Marino 		for (i = 0; i < mbclen; ++i)
45395b7b453SJohn Marino 		  pstr->offsets[byte_idx + i] = src_idx + i;
45495b7b453SJohn Marino 	      }
45595b7b453SJohn Marino 	    src_idx += mbclen;
45695b7b453SJohn Marino 
45795b7b453SJohn Marino 	    pstr->wcs[byte_idx++] = wcu;
45895b7b453SJohn Marino 	    /* Write paddings.  */
45995b7b453SJohn Marino 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
46095b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = WEOF;
46195b7b453SJohn Marino 	  }
462cf28ed85SJohn Marino 	else if (mbclen == (size_t) -1 || mbclen == 0
463cf28ed85SJohn Marino 		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
46495b7b453SJohn Marino 	  {
46595b7b453SJohn Marino 	    /* It is an invalid character or '\0'.  Just use the byte.  */
46695b7b453SJohn Marino 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
46795b7b453SJohn Marino 
468*09d4459fSDaniel Fojt 	    if (__glibc_unlikely (pstr->trans != NULL))
46995b7b453SJohn Marino 	      ch = pstr->trans [ch];
47095b7b453SJohn Marino 	    pstr->mbs[byte_idx] = ch;
47195b7b453SJohn Marino 
472*09d4459fSDaniel Fojt 	    if (__glibc_unlikely (pstr->offsets_needed != 0))
47395b7b453SJohn Marino 	      pstr->offsets[byte_idx] = src_idx;
47495b7b453SJohn Marino 	    ++src_idx;
47595b7b453SJohn Marino 
47695b7b453SJohn Marino 	    /* And also cast it to wide char.  */
47795b7b453SJohn Marino 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
478*09d4459fSDaniel Fojt 	    if (__glibc_unlikely (mbclen == (size_t) -1))
47995b7b453SJohn Marino 	      pstr->cur_state = prev_st;
48095b7b453SJohn Marino 	  }
48195b7b453SJohn Marino 	else
48295b7b453SJohn Marino 	  {
48395b7b453SJohn Marino 	    /* The buffer doesn't have enough space, finish to build.  */
48495b7b453SJohn Marino 	    pstr->cur_state = prev_st;
48595b7b453SJohn Marino 	    break;
48695b7b453SJohn Marino 	  }
48795b7b453SJohn Marino       }
48895b7b453SJohn Marino   pstr->valid_len = byte_idx;
48995b7b453SJohn Marino   pstr->valid_raw_len = src_idx;
49095b7b453SJohn Marino   return REG_NOERROR;
49195b7b453SJohn Marino }
49295b7b453SJohn Marino 
49395b7b453SJohn Marino /* Skip characters until the index becomes greater than NEW_RAW_IDX.
49495b7b453SJohn Marino    Return the index.  */
49595b7b453SJohn Marino 
49695b7b453SJohn Marino static Idx
re_string_skip_chars(re_string_t * pstr,Idx new_raw_idx,wint_t * last_wc)49795b7b453SJohn Marino re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
49895b7b453SJohn Marino {
49995b7b453SJohn Marino   mbstate_t prev_st;
50095b7b453SJohn Marino   Idx rawbuf_idx;
50195b7b453SJohn Marino   size_t mbclen;
50295b7b453SJohn Marino   wint_t wc = WEOF;
50395b7b453SJohn Marino 
50495b7b453SJohn Marino   /* Skip the characters which are not necessary to check.  */
50595b7b453SJohn Marino   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
50695b7b453SJohn Marino        rawbuf_idx < new_raw_idx;)
50795b7b453SJohn Marino     {
50895b7b453SJohn Marino       wchar_t wc2;
509cf28ed85SJohn Marino       Idx remain_len = pstr->raw_len - rawbuf_idx;
51095b7b453SJohn Marino       prev_st = pstr->cur_state;
51195b7b453SJohn Marino       mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
51295b7b453SJohn Marino 			  remain_len, &pstr->cur_state);
513*09d4459fSDaniel Fojt       if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1
514*09d4459fSDaniel Fojt 			    || mbclen == 0))
51595b7b453SJohn Marino 	{
51695b7b453SJohn Marino 	  /* We treat these cases as a single byte character.  */
51795b7b453SJohn Marino 	  if (mbclen == 0 || remain_len == 0)
51895b7b453SJohn Marino 	    wc = L'\0';
51995b7b453SJohn Marino 	  else
52095b7b453SJohn Marino 	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
52195b7b453SJohn Marino 	  mbclen = 1;
52295b7b453SJohn Marino 	  pstr->cur_state = prev_st;
52395b7b453SJohn Marino 	}
52495b7b453SJohn Marino       else
52595b7b453SJohn Marino 	wc = wc2;
52695b7b453SJohn Marino       /* Then proceed the next character.  */
52795b7b453SJohn Marino       rawbuf_idx += mbclen;
52895b7b453SJohn Marino     }
52995b7b453SJohn Marino   *last_wc = wc;
53095b7b453SJohn Marino   return rawbuf_idx;
53195b7b453SJohn Marino }
53295b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
53395b7b453SJohn Marino 
53495b7b453SJohn Marino /* Build the buffer PSTR->MBS, and apply the translation if we need.
53595b7b453SJohn Marino    This function is used in case of REG_ICASE.  */
53695b7b453SJohn Marino 
53795b7b453SJohn Marino static void
build_upper_buffer(re_string_t * pstr)53895b7b453SJohn Marino build_upper_buffer (re_string_t *pstr)
53995b7b453SJohn Marino {
54095b7b453SJohn Marino   Idx char_idx, end_idx;
54195b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
54295b7b453SJohn Marino 
54395b7b453SJohn Marino   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
54495b7b453SJohn Marino     {
54595b7b453SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
546*09d4459fSDaniel Fojt       if (__glibc_unlikely (pstr->trans != NULL))
54795b7b453SJohn Marino 	ch = pstr->trans[ch];
54895b7b453SJohn Marino       pstr->mbs[char_idx] = toupper (ch);
54995b7b453SJohn Marino     }
55095b7b453SJohn Marino   pstr->valid_len = char_idx;
55195b7b453SJohn Marino   pstr->valid_raw_len = char_idx;
55295b7b453SJohn Marino }
55395b7b453SJohn Marino 
55495b7b453SJohn Marino /* Apply TRANS to the buffer in PSTR.  */
55595b7b453SJohn Marino 
55695b7b453SJohn Marino static void
re_string_translate_buffer(re_string_t * pstr)55795b7b453SJohn Marino re_string_translate_buffer (re_string_t *pstr)
55895b7b453SJohn Marino {
55995b7b453SJohn Marino   Idx buf_idx, end_idx;
56095b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
56195b7b453SJohn Marino 
56295b7b453SJohn Marino   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
56395b7b453SJohn Marino     {
56495b7b453SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
56595b7b453SJohn Marino       pstr->mbs[buf_idx] = pstr->trans[ch];
56695b7b453SJohn Marino     }
56795b7b453SJohn Marino 
56895b7b453SJohn Marino   pstr->valid_len = buf_idx;
56995b7b453SJohn Marino   pstr->valid_raw_len = buf_idx;
57095b7b453SJohn Marino }
57195b7b453SJohn Marino 
57295b7b453SJohn Marino /* This function re-construct the buffers.
57395b7b453SJohn Marino    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
57495b7b453SJohn Marino    convert to upper case in case of REG_ICASE, apply translation.  */
57595b7b453SJohn Marino 
57695b7b453SJohn Marino static reg_errcode_t
577*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_string_reconstruct(re_string_t * pstr,Idx idx,int eflags)57895b7b453SJohn Marino re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
57995b7b453SJohn Marino {
58095b7b453SJohn Marino   Idx offset;
58195b7b453SJohn Marino 
582*09d4459fSDaniel Fojt   if (__glibc_unlikely (pstr->raw_mbs_idx <= idx))
58395b7b453SJohn Marino     offset = idx - pstr->raw_mbs_idx;
58495b7b453SJohn Marino   else
58595b7b453SJohn Marino     {
58695b7b453SJohn Marino       /* Reset buffer.  */
58795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
58895b7b453SJohn Marino       if (pstr->mb_cur_max > 1)
58995b7b453SJohn Marino 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
59095b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
59195b7b453SJohn Marino       pstr->len = pstr->raw_len;
59295b7b453SJohn Marino       pstr->stop = pstr->raw_stop;
59395b7b453SJohn Marino       pstr->valid_len = 0;
59495b7b453SJohn Marino       pstr->raw_mbs_idx = 0;
59595b7b453SJohn Marino       pstr->valid_raw_len = 0;
59695b7b453SJohn Marino       pstr->offsets_needed = 0;
59795b7b453SJohn Marino       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
59895b7b453SJohn Marino 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
59995b7b453SJohn Marino       if (!pstr->mbs_allocated)
60095b7b453SJohn Marino 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
60195b7b453SJohn Marino       offset = idx;
60295b7b453SJohn Marino     }
60395b7b453SJohn Marino 
604*09d4459fSDaniel Fojt   if (__glibc_likely (offset != 0))
60595b7b453SJohn Marino     {
60695b7b453SJohn Marino       /* Should the already checked characters be kept?  */
607*09d4459fSDaniel Fojt       if (__glibc_likely (offset < pstr->valid_raw_len))
60895b7b453SJohn Marino 	{
60995b7b453SJohn Marino 	  /* Yes, move them to the front of the buffer.  */
61095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
611*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (pstr->offsets_needed))
61295b7b453SJohn Marino 	    {
61395b7b453SJohn Marino 	      Idx low = 0, high = pstr->valid_len, mid;
61495b7b453SJohn Marino 	      do
61595b7b453SJohn Marino 		{
61695b7b453SJohn Marino 		  mid = (high + low) / 2;
61795b7b453SJohn Marino 		  if (pstr->offsets[mid] > offset)
61895b7b453SJohn Marino 		    high = mid;
61995b7b453SJohn Marino 		  else if (pstr->offsets[mid] < offset)
62095b7b453SJohn Marino 		    low = mid + 1;
62195b7b453SJohn Marino 		  else
62295b7b453SJohn Marino 		    break;
62395b7b453SJohn Marino 		}
62495b7b453SJohn Marino 	      while (low < high);
62595b7b453SJohn Marino 	      if (pstr->offsets[mid] < offset)
62695b7b453SJohn Marino 		++mid;
62795b7b453SJohn Marino 	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
62895b7b453SJohn Marino 							eflags);
62995b7b453SJohn Marino 	      /* This can be quite complicated, so handle specially
63095b7b453SJohn Marino 		 only the common and easy case where the character with
63195b7b453SJohn Marino 		 different length representation of lower and upper
63295b7b453SJohn Marino 		 case is present at or after offset.  */
63395b7b453SJohn Marino 	      if (pstr->valid_len > offset
63495b7b453SJohn Marino 		  && mid == offset && pstr->offsets[mid] == offset)
63595b7b453SJohn Marino 		{
63695b7b453SJohn Marino 		  memmove (pstr->wcs, pstr->wcs + offset,
63795b7b453SJohn Marino 			   (pstr->valid_len - offset) * sizeof (wint_t));
63895b7b453SJohn Marino 		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
63995b7b453SJohn Marino 		  pstr->valid_len -= offset;
64095b7b453SJohn Marino 		  pstr->valid_raw_len -= offset;
64195b7b453SJohn Marino 		  for (low = 0; low < pstr->valid_len; low++)
64295b7b453SJohn Marino 		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
64395b7b453SJohn Marino 		}
64495b7b453SJohn Marino 	      else
64595b7b453SJohn Marino 		{
64695b7b453SJohn Marino 		  /* Otherwise, just find out how long the partial multibyte
64795b7b453SJohn Marino 		     character at offset is and fill it with WEOF/255.  */
64895b7b453SJohn Marino 		  pstr->len = pstr->raw_len - idx + offset;
64995b7b453SJohn Marino 		  pstr->stop = pstr->raw_stop - idx + offset;
65095b7b453SJohn Marino 		  pstr->offsets_needed = 0;
65195b7b453SJohn Marino 		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
65295b7b453SJohn Marino 		    --mid;
65395b7b453SJohn Marino 		  while (mid < pstr->valid_len)
65495b7b453SJohn Marino 		    if (pstr->wcs[mid] != WEOF)
65595b7b453SJohn Marino 		      break;
65695b7b453SJohn Marino 		    else
65795b7b453SJohn Marino 		      ++mid;
65895b7b453SJohn Marino 		  if (mid == pstr->valid_len)
65995b7b453SJohn Marino 		    pstr->valid_len = 0;
66095b7b453SJohn Marino 		  else
66195b7b453SJohn Marino 		    {
66295b7b453SJohn Marino 		      pstr->valid_len = pstr->offsets[mid] - offset;
66395b7b453SJohn Marino 		      if (pstr->valid_len)
66495b7b453SJohn Marino 			{
66595b7b453SJohn Marino 			  for (low = 0; low < pstr->valid_len; ++low)
66695b7b453SJohn Marino 			    pstr->wcs[low] = WEOF;
66795b7b453SJohn Marino 			  memset (pstr->mbs, 255, pstr->valid_len);
66895b7b453SJohn Marino 			}
66995b7b453SJohn Marino 		    }
67095b7b453SJohn Marino 		  pstr->valid_raw_len = pstr->valid_len;
67195b7b453SJohn Marino 		}
67295b7b453SJohn Marino 	    }
67395b7b453SJohn Marino 	  else
67495b7b453SJohn Marino #endif
67595b7b453SJohn Marino 	    {
67695b7b453SJohn Marino 	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
67795b7b453SJohn Marino 							eflags);
67895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
67995b7b453SJohn Marino 	      if (pstr->mb_cur_max > 1)
68095b7b453SJohn Marino 		memmove (pstr->wcs, pstr->wcs + offset,
68195b7b453SJohn Marino 			 (pstr->valid_len - offset) * sizeof (wint_t));
68295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
683*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (pstr->mbs_allocated))
68495b7b453SJohn Marino 		memmove (pstr->mbs, pstr->mbs + offset,
68595b7b453SJohn Marino 			 pstr->valid_len - offset);
68695b7b453SJohn Marino 	      pstr->valid_len -= offset;
68795b7b453SJohn Marino 	      pstr->valid_raw_len -= offset;
688*09d4459fSDaniel Fojt 	      DEBUG_ASSERT (pstr->valid_len > 0);
68995b7b453SJohn Marino 	    }
69095b7b453SJohn Marino 	}
69195b7b453SJohn Marino       else
69295b7b453SJohn Marino 	{
69395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
69495b7b453SJohn Marino 	  /* No, skip all characters until IDX.  */
69595b7b453SJohn Marino 	  Idx prev_valid_len = pstr->valid_len;
69695b7b453SJohn Marino 
697*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (pstr->offsets_needed))
69895b7b453SJohn Marino 	    {
69995b7b453SJohn Marino 	      pstr->len = pstr->raw_len - idx + offset;
70095b7b453SJohn Marino 	      pstr->stop = pstr->raw_stop - idx + offset;
70195b7b453SJohn Marino 	      pstr->offsets_needed = 0;
70295b7b453SJohn Marino 	    }
70395b7b453SJohn Marino #endif
70495b7b453SJohn Marino 	  pstr->valid_len = 0;
70595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
70695b7b453SJohn Marino 	  if (pstr->mb_cur_max > 1)
70795b7b453SJohn Marino 	    {
70895b7b453SJohn Marino 	      Idx wcs_idx;
70995b7b453SJohn Marino 	      wint_t wc = WEOF;
71095b7b453SJohn Marino 
71195b7b453SJohn Marino 	      if (pstr->is_utf8)
71295b7b453SJohn Marino 		{
71395b7b453SJohn Marino 		  const unsigned char *raw, *p, *end;
71495b7b453SJohn Marino 
71595b7b453SJohn Marino 		  /* Special case UTF-8.  Multi-byte chars start with any
71695b7b453SJohn Marino 		     byte other than 0x80 - 0xbf.  */
71795b7b453SJohn Marino 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
71895b7b453SJohn Marino 		  end = raw + (offset - pstr->mb_cur_max);
71995b7b453SJohn Marino 		  if (end < pstr->raw_mbs)
72095b7b453SJohn Marino 		    end = pstr->raw_mbs;
72195b7b453SJohn Marino 		  p = raw + offset - 1;
72295b7b453SJohn Marino #ifdef _LIBC
72395b7b453SJohn Marino 		  /* We know the wchar_t encoding is UCS4, so for the simple
72495b7b453SJohn Marino 		     case, ASCII characters, skip the conversion step.  */
725*09d4459fSDaniel Fojt 		  if (isascii (*p) && __glibc_likely (pstr->trans == NULL))
72695b7b453SJohn Marino 		    {
72795b7b453SJohn Marino 		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
72895b7b453SJohn Marino 		      /* pstr->valid_len = 0; */
72995b7b453SJohn Marino 		      wc = (wchar_t) *p;
73095b7b453SJohn Marino 		    }
73195b7b453SJohn Marino 		  else
73295b7b453SJohn Marino #endif
73395b7b453SJohn Marino 		    for (; p >= end; --p)
73495b7b453SJohn Marino 		      if ((*p & 0xc0) != 0x80)
73595b7b453SJohn Marino 			{
73695b7b453SJohn Marino 			  mbstate_t cur_state;
73795b7b453SJohn Marino 			  wchar_t wc2;
73895b7b453SJohn Marino 			  Idx mlen = raw + pstr->len - p;
739cf28ed85SJohn Marino 			  unsigned char buf[6];
74095b7b453SJohn Marino 			  size_t mbclen;
74195b7b453SJohn Marino 
742cf28ed85SJohn Marino 			  const unsigned char *pp = p;
743*09d4459fSDaniel Fojt 			  if (__glibc_unlikely (pstr->trans != NULL))
74495b7b453SJohn Marino 			    {
74595b7b453SJohn Marino 			      int i = mlen < 6 ? mlen : 6;
74695b7b453SJohn Marino 			      while (--i >= 0)
74795b7b453SJohn Marino 				buf[i] = pstr->trans[p[i]];
748cf28ed85SJohn Marino 			      pp = buf;
74995b7b453SJohn Marino 			    }
75095b7b453SJohn Marino 			  /* XXX Don't use mbrtowc, we know which conversion
75195b7b453SJohn Marino 			     to use (UTF-8 -> UCS4).  */
75295b7b453SJohn Marino 			  memset (&cur_state, 0, sizeof (cur_state));
753cf28ed85SJohn Marino 			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
75495b7b453SJohn Marino 					      &cur_state);
75595b7b453SJohn Marino 			  if (raw + offset - p <= mbclen
75695b7b453SJohn Marino 			      && mbclen < (size_t) -2)
75795b7b453SJohn Marino 			    {
75895b7b453SJohn Marino 			      memset (&pstr->cur_state, '\0',
75995b7b453SJohn Marino 				      sizeof (mbstate_t));
76095b7b453SJohn Marino 			      pstr->valid_len = mbclen - (raw + offset - p);
76195b7b453SJohn Marino 			      wc = wc2;
76295b7b453SJohn Marino 			    }
76395b7b453SJohn Marino 			  break;
76495b7b453SJohn Marino 			}
76595b7b453SJohn Marino 		}
76695b7b453SJohn Marino 
76795b7b453SJohn Marino 	      if (wc == WEOF)
76895b7b453SJohn Marino 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
76995b7b453SJohn Marino 	      if (wc == WEOF)
77095b7b453SJohn Marino 		pstr->tip_context
77195b7b453SJohn Marino 		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
77295b7b453SJohn Marino 	      else
773*09d4459fSDaniel Fojt 		pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0)
77495b7b453SJohn Marino 				      && IS_WIDE_WORD_CHAR (wc))
77595b7b453SJohn Marino 				     ? CONTEXT_WORD
77695b7b453SJohn Marino 				     : ((IS_WIDE_NEWLINE (wc)
77795b7b453SJohn Marino 					 && pstr->newline_anchor)
77895b7b453SJohn Marino 					? CONTEXT_NEWLINE : 0));
779*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (pstr->valid_len))
78095b7b453SJohn Marino 		{
78195b7b453SJohn Marino 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
78295b7b453SJohn Marino 		    pstr->wcs[wcs_idx] = WEOF;
78395b7b453SJohn Marino 		  if (pstr->mbs_allocated)
78495b7b453SJohn Marino 		    memset (pstr->mbs, 255, pstr->valid_len);
78595b7b453SJohn Marino 		}
78695b7b453SJohn Marino 	      pstr->valid_raw_len = pstr->valid_len;
78795b7b453SJohn Marino 	    }
78895b7b453SJohn Marino 	  else
78995b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
79095b7b453SJohn Marino 	    {
79195b7b453SJohn Marino 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
79295b7b453SJohn Marino 	      pstr->valid_raw_len = 0;
79395b7b453SJohn Marino 	      if (pstr->trans)
79495b7b453SJohn Marino 		c = pstr->trans[c];
79595b7b453SJohn Marino 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
79695b7b453SJohn Marino 				   ? CONTEXT_WORD
79795b7b453SJohn Marino 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
79895b7b453SJohn Marino 				      ? CONTEXT_NEWLINE : 0));
79995b7b453SJohn Marino 	    }
80095b7b453SJohn Marino 	}
801*09d4459fSDaniel Fojt       if (!__glibc_unlikely (pstr->mbs_allocated))
80295b7b453SJohn Marino 	pstr->mbs += offset;
80395b7b453SJohn Marino     }
80495b7b453SJohn Marino   pstr->raw_mbs_idx = idx;
80595b7b453SJohn Marino   pstr->len -= offset;
80695b7b453SJohn Marino   pstr->stop -= offset;
80795b7b453SJohn Marino 
80895b7b453SJohn Marino   /* Then build the buffers.  */
80995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
81095b7b453SJohn Marino   if (pstr->mb_cur_max > 1)
81195b7b453SJohn Marino     {
81295b7b453SJohn Marino       if (pstr->icase)
81395b7b453SJohn Marino 	{
81495b7b453SJohn Marino 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
815*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (ret != REG_NOERROR))
81695b7b453SJohn Marino 	    return ret;
81795b7b453SJohn Marino 	}
81895b7b453SJohn Marino       else
81995b7b453SJohn Marino 	build_wcs_buffer (pstr);
82095b7b453SJohn Marino     }
82195b7b453SJohn Marino   else
82295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
823*09d4459fSDaniel Fojt     if (__glibc_unlikely (pstr->mbs_allocated))
82495b7b453SJohn Marino       {
82595b7b453SJohn Marino 	if (pstr->icase)
82695b7b453SJohn Marino 	  build_upper_buffer (pstr);
82795b7b453SJohn Marino 	else if (pstr->trans != NULL)
82895b7b453SJohn Marino 	  re_string_translate_buffer (pstr);
82995b7b453SJohn Marino       }
83095b7b453SJohn Marino     else
83195b7b453SJohn Marino       pstr->valid_len = pstr->len;
83295b7b453SJohn Marino 
83395b7b453SJohn Marino   pstr->cur_idx = 0;
83495b7b453SJohn Marino   return REG_NOERROR;
83595b7b453SJohn Marino }
83695b7b453SJohn Marino 
83795b7b453SJohn Marino static unsigned char
838*09d4459fSDaniel Fojt __attribute__ ((pure))
re_string_peek_byte_case(const re_string_t * pstr,Idx idx)83995b7b453SJohn Marino re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
84095b7b453SJohn Marino {
84195b7b453SJohn Marino   int ch;
84295b7b453SJohn Marino   Idx off;
84395b7b453SJohn Marino 
84495b7b453SJohn Marino   /* Handle the common (easiest) cases first.  */
845*09d4459fSDaniel Fojt   if (__glibc_likely (!pstr->mbs_allocated))
84695b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
84795b7b453SJohn Marino 
84895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
84995b7b453SJohn Marino   if (pstr->mb_cur_max > 1
85095b7b453SJohn Marino       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
85195b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
85295b7b453SJohn Marino #endif
85395b7b453SJohn Marino 
85495b7b453SJohn Marino   off = pstr->cur_idx + idx;
85595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
85695b7b453SJohn Marino   if (pstr->offsets_needed)
85795b7b453SJohn Marino     off = pstr->offsets[off];
85895b7b453SJohn Marino #endif
85995b7b453SJohn Marino 
86095b7b453SJohn Marino   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
86195b7b453SJohn Marino 
86295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
86395b7b453SJohn Marino   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
86495b7b453SJohn Marino      this function returns CAPITAL LETTER I instead of first byte of
86595b7b453SJohn Marino      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
86695b7b453SJohn Marino      since peek_byte_case doesn't advance cur_idx in any way.  */
86795b7b453SJohn Marino   if (pstr->offsets_needed && !isascii (ch))
86895b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
86995b7b453SJohn Marino #endif
87095b7b453SJohn Marino 
87195b7b453SJohn Marino   return ch;
87295b7b453SJohn Marino }
87395b7b453SJohn Marino 
87495b7b453SJohn Marino static unsigned char
re_string_fetch_byte_case(re_string_t * pstr)87595b7b453SJohn Marino re_string_fetch_byte_case (re_string_t *pstr)
87695b7b453SJohn Marino {
877*09d4459fSDaniel Fojt   if (__glibc_likely (!pstr->mbs_allocated))
87895b7b453SJohn Marino     return re_string_fetch_byte (pstr);
87995b7b453SJohn Marino 
88095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
88195b7b453SJohn Marino   if (pstr->offsets_needed)
88295b7b453SJohn Marino     {
88395b7b453SJohn Marino       Idx off;
88495b7b453SJohn Marino       int ch;
88595b7b453SJohn Marino 
88695b7b453SJohn Marino       /* For tr_TR.UTF-8 [[:islower:]] there is
88795b7b453SJohn Marino 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
88895b7b453SJohn Marino 	 in that case the whole multi-byte character and return
88995b7b453SJohn Marino 	 the original letter.  On the other side, with
89095b7b453SJohn Marino 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
89195b7b453SJohn Marino 	 anything else would complicate things too much.  */
89295b7b453SJohn Marino 
89395b7b453SJohn Marino       if (!re_string_first_byte (pstr, pstr->cur_idx))
89495b7b453SJohn Marino 	return re_string_fetch_byte (pstr);
89595b7b453SJohn Marino 
89695b7b453SJohn Marino       off = pstr->offsets[pstr->cur_idx];
89795b7b453SJohn Marino       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
89895b7b453SJohn Marino 
89995b7b453SJohn Marino       if (! isascii (ch))
90095b7b453SJohn Marino 	return re_string_fetch_byte (pstr);
90195b7b453SJohn Marino 
90295b7b453SJohn Marino       re_string_skip_bytes (pstr,
90395b7b453SJohn Marino 			    re_string_char_size_at (pstr, pstr->cur_idx));
90495b7b453SJohn Marino       return ch;
90595b7b453SJohn Marino     }
90695b7b453SJohn Marino #endif
90795b7b453SJohn Marino 
90895b7b453SJohn Marino   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
90995b7b453SJohn Marino }
91095b7b453SJohn Marino 
91195b7b453SJohn Marino static void
re_string_destruct(re_string_t * pstr)91295b7b453SJohn Marino re_string_destruct (re_string_t *pstr)
91395b7b453SJohn Marino {
91495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
91595b7b453SJohn Marino   re_free (pstr->wcs);
91695b7b453SJohn Marino   re_free (pstr->offsets);
91795b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
91895b7b453SJohn Marino   if (pstr->mbs_allocated)
91995b7b453SJohn Marino     re_free (pstr->mbs);
92095b7b453SJohn Marino }
92195b7b453SJohn Marino 
92295b7b453SJohn Marino /* Return the context at IDX in INPUT.  */
92395b7b453SJohn Marino 
92495b7b453SJohn Marino static unsigned int
re_string_context_at(const re_string_t * input,Idx idx,int eflags)92595b7b453SJohn Marino re_string_context_at (const re_string_t *input, Idx idx, int eflags)
92695b7b453SJohn Marino {
92795b7b453SJohn Marino   int c;
928*09d4459fSDaniel Fojt   if (__glibc_unlikely (idx < 0))
92995b7b453SJohn Marino     /* In this case, we use the value stored in input->tip_context,
93095b7b453SJohn Marino        since we can't know the character in input->mbs[-1] here.  */
93195b7b453SJohn Marino     return input->tip_context;
932*09d4459fSDaniel Fojt   if (__glibc_unlikely (idx == input->len))
93395b7b453SJohn Marino     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
93495b7b453SJohn Marino 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
93595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
93695b7b453SJohn Marino   if (input->mb_cur_max > 1)
93795b7b453SJohn Marino     {
93895b7b453SJohn Marino       wint_t wc;
93995b7b453SJohn Marino       Idx wc_idx = idx;
94095b7b453SJohn Marino       while(input->wcs[wc_idx] == WEOF)
94195b7b453SJohn Marino 	{
942*09d4459fSDaniel Fojt 	  DEBUG_ASSERT (wc_idx >= 0);
94395b7b453SJohn Marino 	  --wc_idx;
944*09d4459fSDaniel Fojt 	  if (wc_idx < 0)
94595b7b453SJohn Marino 	    return input->tip_context;
94695b7b453SJohn Marino 	}
94795b7b453SJohn Marino       wc = input->wcs[wc_idx];
948*09d4459fSDaniel Fojt       if (__glibc_unlikely (input->word_ops_used != 0)
949*09d4459fSDaniel Fojt 	  && IS_WIDE_WORD_CHAR (wc))
95095b7b453SJohn Marino 	return CONTEXT_WORD;
95195b7b453SJohn Marino       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
95295b7b453SJohn Marino 	      ? CONTEXT_NEWLINE : 0);
95395b7b453SJohn Marino     }
95495b7b453SJohn Marino   else
95595b7b453SJohn Marino #endif
95695b7b453SJohn Marino     {
95795b7b453SJohn Marino       c = re_string_byte_at (input, idx);
95895b7b453SJohn Marino       if (bitset_contain (input->word_char, c))
95995b7b453SJohn Marino 	return CONTEXT_WORD;
96095b7b453SJohn Marino       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
96195b7b453SJohn Marino     }
96295b7b453SJohn Marino }
96395b7b453SJohn Marino 
96495b7b453SJohn Marino /* Functions for set operation.  */
96595b7b453SJohn Marino 
96695b7b453SJohn Marino static reg_errcode_t
967*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_alloc(re_node_set * set,Idx size)96895b7b453SJohn Marino re_node_set_alloc (re_node_set *set, Idx size)
96995b7b453SJohn Marino {
97095b7b453SJohn Marino   set->alloc = size;
97195b7b453SJohn Marino   set->nelem = 0;
97295b7b453SJohn Marino   set->elems = re_malloc (Idx, size);
973*09d4459fSDaniel Fojt   if (__glibc_unlikely (set->elems == NULL)
974*09d4459fSDaniel Fojt       && (MALLOC_0_IS_NONNULL || size != 0))
97595b7b453SJohn Marino     return REG_ESPACE;
97695b7b453SJohn Marino   return REG_NOERROR;
97795b7b453SJohn Marino }
97895b7b453SJohn Marino 
97995b7b453SJohn Marino static reg_errcode_t
980*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_init_1(re_node_set * set,Idx elem)98195b7b453SJohn Marino re_node_set_init_1 (re_node_set *set, Idx elem)
98295b7b453SJohn Marino {
98395b7b453SJohn Marino   set->alloc = 1;
98495b7b453SJohn Marino   set->nelem = 1;
98595b7b453SJohn Marino   set->elems = re_malloc (Idx, 1);
986*09d4459fSDaniel Fojt   if (__glibc_unlikely (set->elems == NULL))
98795b7b453SJohn Marino     {
98895b7b453SJohn Marino       set->alloc = set->nelem = 0;
98995b7b453SJohn Marino       return REG_ESPACE;
99095b7b453SJohn Marino     }
99195b7b453SJohn Marino   set->elems[0] = elem;
99295b7b453SJohn Marino   return REG_NOERROR;
99395b7b453SJohn Marino }
99495b7b453SJohn Marino 
99595b7b453SJohn Marino static reg_errcode_t
996*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_init_2(re_node_set * set,Idx elem1,Idx elem2)99795b7b453SJohn Marino re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
99895b7b453SJohn Marino {
99995b7b453SJohn Marino   set->alloc = 2;
100095b7b453SJohn Marino   set->elems = re_malloc (Idx, 2);
1001*09d4459fSDaniel Fojt   if (__glibc_unlikely (set->elems == NULL))
100295b7b453SJohn Marino     return REG_ESPACE;
100395b7b453SJohn Marino   if (elem1 == elem2)
100495b7b453SJohn Marino     {
100595b7b453SJohn Marino       set->nelem = 1;
100695b7b453SJohn Marino       set->elems[0] = elem1;
100795b7b453SJohn Marino     }
100895b7b453SJohn Marino   else
100995b7b453SJohn Marino     {
101095b7b453SJohn Marino       set->nelem = 2;
101195b7b453SJohn Marino       if (elem1 < elem2)
101295b7b453SJohn Marino 	{
101395b7b453SJohn Marino 	  set->elems[0] = elem1;
101495b7b453SJohn Marino 	  set->elems[1] = elem2;
101595b7b453SJohn Marino 	}
101695b7b453SJohn Marino       else
101795b7b453SJohn Marino 	{
101895b7b453SJohn Marino 	  set->elems[0] = elem2;
101995b7b453SJohn Marino 	  set->elems[1] = elem1;
102095b7b453SJohn Marino 	}
102195b7b453SJohn Marino     }
102295b7b453SJohn Marino   return REG_NOERROR;
102395b7b453SJohn Marino }
102495b7b453SJohn Marino 
102595b7b453SJohn Marino static reg_errcode_t
1026*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_init_copy(re_node_set * dest,const re_node_set * src)102795b7b453SJohn Marino re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
102895b7b453SJohn Marino {
102995b7b453SJohn Marino   dest->nelem = src->nelem;
103095b7b453SJohn Marino   if (src->nelem > 0)
103195b7b453SJohn Marino     {
103295b7b453SJohn Marino       dest->alloc = dest->nelem;
103395b7b453SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
1034*09d4459fSDaniel Fojt       if (__glibc_unlikely (dest->elems == NULL))
103595b7b453SJohn Marino 	{
103695b7b453SJohn Marino 	  dest->alloc = dest->nelem = 0;
103795b7b453SJohn Marino 	  return REG_ESPACE;
103895b7b453SJohn Marino 	}
103995b7b453SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
104095b7b453SJohn Marino     }
104195b7b453SJohn Marino   else
104295b7b453SJohn Marino     re_node_set_init_empty (dest);
104395b7b453SJohn Marino   return REG_NOERROR;
104495b7b453SJohn Marino }
104595b7b453SJohn Marino 
104695b7b453SJohn Marino /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
104795b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
104895b7b453SJohn Marino    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
104995b7b453SJohn Marino 
105095b7b453SJohn Marino static reg_errcode_t
1051*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_add_intersect(re_node_set * dest,const re_node_set * src1,const re_node_set * src2)105295b7b453SJohn Marino re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
105395b7b453SJohn Marino 			   const re_node_set *src2)
105495b7b453SJohn Marino {
105595b7b453SJohn Marino   Idx i1, i2, is, id, delta, sbase;
105695b7b453SJohn Marino   if (src1->nelem == 0 || src2->nelem == 0)
105795b7b453SJohn Marino     return REG_NOERROR;
105895b7b453SJohn Marino 
105995b7b453SJohn Marino   /* We need dest->nelem + 2 * elems_in_intersection; this is a
106095b7b453SJohn Marino      conservative estimate.  */
106195b7b453SJohn Marino   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
106295b7b453SJohn Marino     {
106395b7b453SJohn Marino       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
106495b7b453SJohn Marino       Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1065*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_elems == NULL))
106695b7b453SJohn Marino 	return REG_ESPACE;
106795b7b453SJohn Marino       dest->elems = new_elems;
106895b7b453SJohn Marino       dest->alloc = new_alloc;
106995b7b453SJohn Marino     }
107095b7b453SJohn Marino 
107195b7b453SJohn Marino   /* Find the items in the intersection of SRC1 and SRC2, and copy
107295b7b453SJohn Marino      into the top of DEST those that are not already in DEST itself.  */
107395b7b453SJohn Marino   sbase = dest->nelem + src1->nelem + src2->nelem;
107495b7b453SJohn Marino   i1 = src1->nelem - 1;
107595b7b453SJohn Marino   i2 = src2->nelem - 1;
107695b7b453SJohn Marino   id = dest->nelem - 1;
107795b7b453SJohn Marino   for (;;)
107895b7b453SJohn Marino     {
107995b7b453SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
108095b7b453SJohn Marino 	{
108195b7b453SJohn Marino 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
1082*09d4459fSDaniel Fojt 	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
108395b7b453SJohn Marino 	    --id;
108495b7b453SJohn Marino 
1085*09d4459fSDaniel Fojt 	  if (id < 0 || dest->elems[id] != src1->elems[i1])
108695b7b453SJohn Marino             dest->elems[--sbase] = src1->elems[i1];
108795b7b453SJohn Marino 
1088*09d4459fSDaniel Fojt 	  if (--i1 < 0 || --i2 < 0)
108995b7b453SJohn Marino 	    break;
109095b7b453SJohn Marino 	}
109195b7b453SJohn Marino 
109295b7b453SJohn Marino       /* Lower the highest of the two items.  */
109395b7b453SJohn Marino       else if (src1->elems[i1] < src2->elems[i2])
109495b7b453SJohn Marino 	{
1095*09d4459fSDaniel Fojt 	  if (--i2 < 0)
109695b7b453SJohn Marino 	    break;
109795b7b453SJohn Marino 	}
109895b7b453SJohn Marino       else
109995b7b453SJohn Marino 	{
1100*09d4459fSDaniel Fojt 	  if (--i1 < 0)
110195b7b453SJohn Marino 	    break;
110295b7b453SJohn Marino 	}
110395b7b453SJohn Marino     }
110495b7b453SJohn Marino 
110595b7b453SJohn Marino   id = dest->nelem - 1;
110695b7b453SJohn Marino   is = dest->nelem + src1->nelem + src2->nelem - 1;
110795b7b453SJohn Marino   delta = is - sbase + 1;
110895b7b453SJohn Marino 
110995b7b453SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
111095b7b453SJohn Marino      DEST elements are already in place; this is more or
111195b7b453SJohn Marino      less the same loop that is in re_node_set_merge.  */
111295b7b453SJohn Marino   dest->nelem += delta;
1113*09d4459fSDaniel Fojt   if (delta > 0 && id >= 0)
111495b7b453SJohn Marino     for (;;)
111595b7b453SJohn Marino       {
111695b7b453SJohn Marino 	if (dest->elems[is] > dest->elems[id])
111795b7b453SJohn Marino 	  {
111895b7b453SJohn Marino 	    /* Copy from the top.  */
111995b7b453SJohn Marino 	    dest->elems[id + delta--] = dest->elems[is--];
112095b7b453SJohn Marino 	    if (delta == 0)
112195b7b453SJohn Marino 	      break;
112295b7b453SJohn Marino 	  }
112395b7b453SJohn Marino 	else
112495b7b453SJohn Marino 	  {
112595b7b453SJohn Marino 	    /* Slide from the bottom.  */
112695b7b453SJohn Marino 	    dest->elems[id + delta] = dest->elems[id];
1127*09d4459fSDaniel Fojt 	    if (--id < 0)
112895b7b453SJohn Marino 	      break;
112995b7b453SJohn Marino 	  }
113095b7b453SJohn Marino       }
113195b7b453SJohn Marino 
113295b7b453SJohn Marino   /* Copy remaining SRC elements.  */
113395b7b453SJohn Marino   memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
113495b7b453SJohn Marino 
113595b7b453SJohn Marino   return REG_NOERROR;
113695b7b453SJohn Marino }
113795b7b453SJohn Marino 
113895b7b453SJohn Marino /* Calculate the union set of the sets SRC1 and SRC2. And store it to
113995b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
114095b7b453SJohn Marino 
114195b7b453SJohn Marino static reg_errcode_t
1142*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_init_union(re_node_set * dest,const re_node_set * src1,const re_node_set * src2)114395b7b453SJohn Marino re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
114495b7b453SJohn Marino 			const re_node_set *src2)
114595b7b453SJohn Marino {
114695b7b453SJohn Marino   Idx i1, i2, id;
114795b7b453SJohn Marino   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
114895b7b453SJohn Marino     {
114995b7b453SJohn Marino       dest->alloc = src1->nelem + src2->nelem;
115095b7b453SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
1151*09d4459fSDaniel Fojt       if (__glibc_unlikely (dest->elems == NULL))
115295b7b453SJohn Marino 	return REG_ESPACE;
115395b7b453SJohn Marino     }
115495b7b453SJohn Marino   else
115595b7b453SJohn Marino     {
115695b7b453SJohn Marino       if (src1 != NULL && src1->nelem > 0)
115795b7b453SJohn Marino 	return re_node_set_init_copy (dest, src1);
115895b7b453SJohn Marino       else if (src2 != NULL && src2->nelem > 0)
115995b7b453SJohn Marino 	return re_node_set_init_copy (dest, src2);
116095b7b453SJohn Marino       else
116195b7b453SJohn Marino 	re_node_set_init_empty (dest);
116295b7b453SJohn Marino       return REG_NOERROR;
116395b7b453SJohn Marino     }
116495b7b453SJohn Marino   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
116595b7b453SJohn Marino     {
116695b7b453SJohn Marino       if (src1->elems[i1] > src2->elems[i2])
116795b7b453SJohn Marino 	{
116895b7b453SJohn Marino 	  dest->elems[id++] = src2->elems[i2++];
116995b7b453SJohn Marino 	  continue;
117095b7b453SJohn Marino 	}
117195b7b453SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
117295b7b453SJohn Marino 	++i2;
117395b7b453SJohn Marino       dest->elems[id++] = src1->elems[i1++];
117495b7b453SJohn Marino     }
117595b7b453SJohn Marino   if (i1 < src1->nelem)
117695b7b453SJohn Marino     {
117795b7b453SJohn Marino       memcpy (dest->elems + id, src1->elems + i1,
117895b7b453SJohn Marino 	     (src1->nelem - i1) * sizeof (Idx));
117995b7b453SJohn Marino       id += src1->nelem - i1;
118095b7b453SJohn Marino     }
118195b7b453SJohn Marino   else if (i2 < src2->nelem)
118295b7b453SJohn Marino     {
118395b7b453SJohn Marino       memcpy (dest->elems + id, src2->elems + i2,
118495b7b453SJohn Marino 	     (src2->nelem - i2) * sizeof (Idx));
118595b7b453SJohn Marino       id += src2->nelem - i2;
118695b7b453SJohn Marino     }
118795b7b453SJohn Marino   dest->nelem = id;
118895b7b453SJohn Marino   return REG_NOERROR;
118995b7b453SJohn Marino }
119095b7b453SJohn Marino 
119195b7b453SJohn Marino /* Calculate the union set of the sets DEST and SRC. And store it to
119295b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
119395b7b453SJohn Marino 
119495b7b453SJohn Marino static reg_errcode_t
1195*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_merge(re_node_set * dest,const re_node_set * src)119695b7b453SJohn Marino re_node_set_merge (re_node_set *dest, const re_node_set *src)
119795b7b453SJohn Marino {
119895b7b453SJohn Marino   Idx is, id, sbase, delta;
119995b7b453SJohn Marino   if (src == NULL || src->nelem == 0)
120095b7b453SJohn Marino     return REG_NOERROR;
120195b7b453SJohn Marino   if (dest->alloc < 2 * src->nelem + dest->nelem)
120295b7b453SJohn Marino     {
120395b7b453SJohn Marino       Idx new_alloc = 2 * (src->nelem + dest->alloc);
120495b7b453SJohn Marino       Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1205*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_buffer == NULL))
120695b7b453SJohn Marino 	return REG_ESPACE;
120795b7b453SJohn Marino       dest->elems = new_buffer;
120895b7b453SJohn Marino       dest->alloc = new_alloc;
120995b7b453SJohn Marino     }
121095b7b453SJohn Marino 
1211*09d4459fSDaniel Fojt   if (__glibc_unlikely (dest->nelem == 0))
121295b7b453SJohn Marino     {
121395b7b453SJohn Marino       dest->nelem = src->nelem;
121495b7b453SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
121595b7b453SJohn Marino       return REG_NOERROR;
121695b7b453SJohn Marino     }
121795b7b453SJohn Marino 
121895b7b453SJohn Marino   /* Copy into the top of DEST the items of SRC that are not
121995b7b453SJohn Marino      found in DEST.  Maybe we could binary search in DEST?  */
122095b7b453SJohn Marino   for (sbase = dest->nelem + 2 * src->nelem,
1221*09d4459fSDaniel Fojt        is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
122295b7b453SJohn Marino     {
122395b7b453SJohn Marino       if (dest->elems[id] == src->elems[is])
122495b7b453SJohn Marino 	is--, id--;
122595b7b453SJohn Marino       else if (dest->elems[id] < src->elems[is])
122695b7b453SJohn Marino 	dest->elems[--sbase] = src->elems[is--];
122795b7b453SJohn Marino       else /* if (dest->elems[id] > src->elems[is]) */
122895b7b453SJohn Marino 	--id;
122995b7b453SJohn Marino     }
123095b7b453SJohn Marino 
1231*09d4459fSDaniel Fojt   if (is >= 0)
123295b7b453SJohn Marino     {
123395b7b453SJohn Marino       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
123495b7b453SJohn Marino       sbase -= is + 1;
123595b7b453SJohn Marino       memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
123695b7b453SJohn Marino     }
123795b7b453SJohn Marino 
123895b7b453SJohn Marino   id = dest->nelem - 1;
123995b7b453SJohn Marino   is = dest->nelem + 2 * src->nelem - 1;
124095b7b453SJohn Marino   delta = is - sbase + 1;
124195b7b453SJohn Marino   if (delta == 0)
124295b7b453SJohn Marino     return REG_NOERROR;
124395b7b453SJohn Marino 
124495b7b453SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
124595b7b453SJohn Marino      DEST elements are already in place.  */
124695b7b453SJohn Marino   dest->nelem += delta;
124795b7b453SJohn Marino   for (;;)
124895b7b453SJohn Marino     {
124995b7b453SJohn Marino       if (dest->elems[is] > dest->elems[id])
125095b7b453SJohn Marino 	{
125195b7b453SJohn Marino 	  /* Copy from the top.  */
125295b7b453SJohn Marino 	  dest->elems[id + delta--] = dest->elems[is--];
125395b7b453SJohn Marino 	  if (delta == 0)
125495b7b453SJohn Marino 	    break;
125595b7b453SJohn Marino 	}
125695b7b453SJohn Marino       else
125795b7b453SJohn Marino 	{
125895b7b453SJohn Marino 	  /* Slide from the bottom.  */
125995b7b453SJohn Marino 	  dest->elems[id + delta] = dest->elems[id];
1260*09d4459fSDaniel Fojt 	  if (--id < 0)
126195b7b453SJohn Marino 	    {
126295b7b453SJohn Marino 	      /* Copy remaining SRC elements.  */
126395b7b453SJohn Marino 	      memcpy (dest->elems, dest->elems + sbase,
126495b7b453SJohn Marino 		      delta * sizeof (Idx));
126595b7b453SJohn Marino 	      break;
126695b7b453SJohn Marino 	    }
126795b7b453SJohn Marino 	}
126895b7b453SJohn Marino     }
126995b7b453SJohn Marino 
127095b7b453SJohn Marino   return REG_NOERROR;
127195b7b453SJohn Marino }
127295b7b453SJohn Marino 
127395b7b453SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
127495b7b453SJohn Marino    SET should not already have ELEM.
127595b7b453SJohn Marino    Return true if successful.  */
127695b7b453SJohn Marino 
127795b7b453SJohn Marino static bool
1278*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_insert(re_node_set * set,Idx elem)127995b7b453SJohn Marino re_node_set_insert (re_node_set *set, Idx elem)
128095b7b453SJohn Marino {
128195b7b453SJohn Marino   Idx idx;
128295b7b453SJohn Marino   /* In case the set is empty.  */
128395b7b453SJohn Marino   if (set->alloc == 0)
1284*09d4459fSDaniel Fojt     return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR);
128595b7b453SJohn Marino 
1286*09d4459fSDaniel Fojt   if (__glibc_unlikely (set->nelem) == 0)
128795b7b453SJohn Marino     {
128895b7b453SJohn Marino       /* We already guaranteed above that set->alloc != 0.  */
128995b7b453SJohn Marino       set->elems[0] = elem;
129095b7b453SJohn Marino       ++set->nelem;
129195b7b453SJohn Marino       return true;
129295b7b453SJohn Marino     }
129395b7b453SJohn Marino 
129495b7b453SJohn Marino   /* Realloc if we need.  */
129595b7b453SJohn Marino   if (set->alloc == set->nelem)
129695b7b453SJohn Marino     {
129795b7b453SJohn Marino       Idx *new_elems;
129895b7b453SJohn Marino       set->alloc = set->alloc * 2;
129995b7b453SJohn Marino       new_elems = re_realloc (set->elems, Idx, set->alloc);
1300*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_elems == NULL))
130195b7b453SJohn Marino 	return false;
130295b7b453SJohn Marino       set->elems = new_elems;
130395b7b453SJohn Marino     }
130495b7b453SJohn Marino 
130595b7b453SJohn Marino   /* Move the elements which follows the new element.  Test the
130695b7b453SJohn Marino      first element separately to skip a check in the inner loop.  */
130795b7b453SJohn Marino   if (elem < set->elems[0])
130895b7b453SJohn Marino     {
130995b7b453SJohn Marino       for (idx = set->nelem; idx > 0; idx--)
131095b7b453SJohn Marino 	set->elems[idx] = set->elems[idx - 1];
131195b7b453SJohn Marino     }
131295b7b453SJohn Marino   else
131395b7b453SJohn Marino     {
131495b7b453SJohn Marino       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
131595b7b453SJohn Marino 	set->elems[idx] = set->elems[idx - 1];
131695b7b453SJohn Marino     }
131795b7b453SJohn Marino 
131895b7b453SJohn Marino   /* Insert the new element.  */
131995b7b453SJohn Marino   set->elems[idx] = elem;
132095b7b453SJohn Marino   ++set->nelem;
132195b7b453SJohn Marino   return true;
132295b7b453SJohn Marino }
132395b7b453SJohn Marino 
132495b7b453SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
132595b7b453SJohn Marino    SET should not already have any element greater than or equal to ELEM.
132695b7b453SJohn Marino    Return true if successful.  */
132795b7b453SJohn Marino 
132895b7b453SJohn Marino static bool
1329*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_node_set_insert_last(re_node_set * set,Idx elem)133095b7b453SJohn Marino re_node_set_insert_last (re_node_set *set, Idx elem)
133195b7b453SJohn Marino {
133295b7b453SJohn Marino   /* Realloc if we need.  */
133395b7b453SJohn Marino   if (set->alloc == set->nelem)
133495b7b453SJohn Marino     {
133595b7b453SJohn Marino       Idx *new_elems;
133695b7b453SJohn Marino       set->alloc = (set->alloc + 1) * 2;
133795b7b453SJohn Marino       new_elems = re_realloc (set->elems, Idx, set->alloc);
1338*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_elems == NULL))
133995b7b453SJohn Marino 	return false;
134095b7b453SJohn Marino       set->elems = new_elems;
134195b7b453SJohn Marino     }
134295b7b453SJohn Marino 
134395b7b453SJohn Marino   /* Insert the new element.  */
134495b7b453SJohn Marino   set->elems[set->nelem++] = elem;
134595b7b453SJohn Marino   return true;
134695b7b453SJohn Marino }
134795b7b453SJohn Marino 
134895b7b453SJohn Marino /* Compare two node sets SET1 and SET2.
134995b7b453SJohn Marino    Return true if SET1 and SET2 are equivalent.  */
135095b7b453SJohn Marino 
135195b7b453SJohn Marino static bool
1352*09d4459fSDaniel Fojt __attribute__ ((pure))
re_node_set_compare(const re_node_set * set1,const re_node_set * set2)135395b7b453SJohn Marino re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
135495b7b453SJohn Marino {
135595b7b453SJohn Marino   Idx i;
135695b7b453SJohn Marino   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
135795b7b453SJohn Marino     return false;
1358*09d4459fSDaniel Fojt   for (i = set1->nelem ; --i >= 0 ; )
135995b7b453SJohn Marino     if (set1->elems[i] != set2->elems[i])
136095b7b453SJohn Marino       return false;
136195b7b453SJohn Marino   return true;
136295b7b453SJohn Marino }
136395b7b453SJohn Marino 
136495b7b453SJohn Marino /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
136595b7b453SJohn Marino 
136695b7b453SJohn Marino static Idx
1367*09d4459fSDaniel Fojt __attribute__ ((pure))
re_node_set_contains(const re_node_set * set,Idx elem)136895b7b453SJohn Marino re_node_set_contains (const re_node_set *set, Idx elem)
136995b7b453SJohn Marino {
137095b7b453SJohn Marino   __re_size_t idx, right, mid;
1371*09d4459fSDaniel Fojt   if (set->nelem <= 0)
137295b7b453SJohn Marino     return 0;
137395b7b453SJohn Marino 
137495b7b453SJohn Marino   /* Binary search the element.  */
137595b7b453SJohn Marino   idx = 0;
137695b7b453SJohn Marino   right = set->nelem - 1;
137795b7b453SJohn Marino   while (idx < right)
137895b7b453SJohn Marino     {
137995b7b453SJohn Marino       mid = (idx + right) / 2;
138095b7b453SJohn Marino       if (set->elems[mid] < elem)
138195b7b453SJohn Marino 	idx = mid + 1;
138295b7b453SJohn Marino       else
138395b7b453SJohn Marino 	right = mid;
138495b7b453SJohn Marino     }
138595b7b453SJohn Marino   return set->elems[idx] == elem ? idx + 1 : 0;
138695b7b453SJohn Marino }
138795b7b453SJohn Marino 
138895b7b453SJohn Marino static void
re_node_set_remove_at(re_node_set * set,Idx idx)138995b7b453SJohn Marino re_node_set_remove_at (re_node_set *set, Idx idx)
139095b7b453SJohn Marino {
1391680a9cb8SJohn Marino   if (idx < 0 || idx >= set->nelem)
139295b7b453SJohn Marino     return;
139395b7b453SJohn Marino   --set->nelem;
139495b7b453SJohn Marino   for (; idx < set->nelem; idx++)
139595b7b453SJohn Marino     set->elems[idx] = set->elems[idx + 1];
139695b7b453SJohn Marino }
139795b7b453SJohn Marino 
139895b7b453SJohn Marino 
139995b7b453SJohn Marino /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1400*09d4459fSDaniel Fojt    Or return -1 if an error occurred.  */
140195b7b453SJohn Marino 
140295b7b453SJohn Marino static Idx
re_dfa_add_node(re_dfa_t * dfa,re_token_t token)140395b7b453SJohn Marino re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
140495b7b453SJohn Marino {
1405*09d4459fSDaniel Fojt   if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc))
140695b7b453SJohn Marino     {
140795b7b453SJohn Marino       size_t new_nodes_alloc = dfa->nodes_alloc * 2;
140895b7b453SJohn Marino       Idx *new_nexts, *new_indices;
140995b7b453SJohn Marino       re_node_set *new_edests, *new_eclosures;
141095b7b453SJohn Marino       re_token_t *new_nodes;
1411cf28ed85SJohn Marino 
1412cf28ed85SJohn Marino       /* Avoid overflows in realloc.  */
1413cf28ed85SJohn Marino       const size_t max_object_size = MAX (sizeof (re_token_t),
141495b7b453SJohn Marino 					  MAX (sizeof (re_node_set),
141595b7b453SJohn Marino 					       sizeof (Idx)));
1416*09d4459fSDaniel Fojt       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
1417*09d4459fSDaniel Fojt 			    < new_nodes_alloc))
1418*09d4459fSDaniel Fojt 	return -1;
141995b7b453SJohn Marino 
142095b7b453SJohn Marino       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1421*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_nodes == NULL))
1422*09d4459fSDaniel Fojt 	return -1;
142395b7b453SJohn Marino       dfa->nodes = new_nodes;
142495b7b453SJohn Marino       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
142595b7b453SJohn Marino       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
142695b7b453SJohn Marino       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
142795b7b453SJohn Marino       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1428*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL
1429*09d4459fSDaniel Fojt 			    || new_edests == NULL || new_eclosures == NULL))
1430*09d4459fSDaniel Fojt 	{
1431*09d4459fSDaniel Fojt 	   re_free (new_nexts);
1432*09d4459fSDaniel Fojt 	   re_free (new_indices);
1433*09d4459fSDaniel Fojt 	   re_free (new_edests);
1434*09d4459fSDaniel Fojt 	   re_free (new_eclosures);
1435*09d4459fSDaniel Fojt 	   return -1;
1436*09d4459fSDaniel Fojt 	}
143795b7b453SJohn Marino       dfa->nexts = new_nexts;
143895b7b453SJohn Marino       dfa->org_indices = new_indices;
143995b7b453SJohn Marino       dfa->edests = new_edests;
144095b7b453SJohn Marino       dfa->eclosures = new_eclosures;
144195b7b453SJohn Marino       dfa->nodes_alloc = new_nodes_alloc;
144295b7b453SJohn Marino     }
144395b7b453SJohn Marino   dfa->nodes[dfa->nodes_len] = token;
144495b7b453SJohn Marino   dfa->nodes[dfa->nodes_len].constraint = 0;
144595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
144695b7b453SJohn Marino   dfa->nodes[dfa->nodes_len].accept_mb =
1447680a9cb8SJohn Marino     ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1448680a9cb8SJohn Marino      || token.type == COMPLEX_BRACKET);
144995b7b453SJohn Marino #endif
1450*09d4459fSDaniel Fojt   dfa->nexts[dfa->nodes_len] = -1;
145195b7b453SJohn Marino   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
145295b7b453SJohn Marino   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
145395b7b453SJohn Marino   return dfa->nodes_len++;
145495b7b453SJohn Marino }
145595b7b453SJohn Marino 
1456680a9cb8SJohn Marino static re_hashval_t
calc_state_hash(const re_node_set * nodes,unsigned int context)145795b7b453SJohn Marino calc_state_hash (const re_node_set *nodes, unsigned int context)
145895b7b453SJohn Marino {
145995b7b453SJohn Marino   re_hashval_t hash = nodes->nelem + context;
146095b7b453SJohn Marino   Idx i;
146195b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
146295b7b453SJohn Marino     hash += nodes->elems[i];
146395b7b453SJohn Marino   return hash;
146495b7b453SJohn Marino }
146595b7b453SJohn Marino 
146695b7b453SJohn Marino /* Search for the state whose node_set is equivalent to NODES.
146795b7b453SJohn Marino    Return the pointer to the state, if we found it in the DFA.
146895b7b453SJohn Marino    Otherwise create the new one and return it.  In case of an error
146995b7b453SJohn Marino    return NULL and set the error code in ERR.
147095b7b453SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
147195b7b453SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
147295b7b453SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
147395b7b453SJohn Marino 	   optimization.  */
147495b7b453SJohn Marino 
147595b7b453SJohn Marino static re_dfastate_t *
1476*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_acquire_state(reg_errcode_t * err,const re_dfa_t * dfa,const re_node_set * nodes)147795b7b453SJohn Marino re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
147895b7b453SJohn Marino 		  const re_node_set *nodes)
147995b7b453SJohn Marino {
148095b7b453SJohn Marino   re_hashval_t hash;
148195b7b453SJohn Marino   re_dfastate_t *new_state;
148295b7b453SJohn Marino   struct re_state_table_entry *spot;
148395b7b453SJohn Marino   Idx i;
1484*09d4459fSDaniel Fojt #if defined GCC_LINT || defined lint
148595b7b453SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
148695b7b453SJohn Marino   *err = REG_NOERROR;
148795b7b453SJohn Marino #endif
1488*09d4459fSDaniel Fojt   if (__glibc_unlikely (nodes->nelem == 0))
148995b7b453SJohn Marino     {
149095b7b453SJohn Marino       *err = REG_NOERROR;
149195b7b453SJohn Marino       return NULL;
149295b7b453SJohn Marino     }
149395b7b453SJohn Marino   hash = calc_state_hash (nodes, 0);
149495b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
149595b7b453SJohn Marino 
149695b7b453SJohn Marino   for (i = 0 ; i < spot->num ; i++)
149795b7b453SJohn Marino     {
149895b7b453SJohn Marino       re_dfastate_t *state = spot->array[i];
149995b7b453SJohn Marino       if (hash != state->hash)
150095b7b453SJohn Marino 	continue;
150195b7b453SJohn Marino       if (re_node_set_compare (&state->nodes, nodes))
150295b7b453SJohn Marino 	return state;
150395b7b453SJohn Marino     }
150495b7b453SJohn Marino 
150595b7b453SJohn Marino   /* There are no appropriate state in the dfa, create the new one.  */
150695b7b453SJohn Marino   new_state = create_ci_newstate (dfa, nodes, hash);
1507*09d4459fSDaniel Fojt   if (__glibc_unlikely (new_state == NULL))
150895b7b453SJohn Marino     *err = REG_ESPACE;
150995b7b453SJohn Marino 
151095b7b453SJohn Marino   return new_state;
151195b7b453SJohn Marino }
151295b7b453SJohn Marino 
151395b7b453SJohn Marino /* Search for the state whose node_set is equivalent to NODES and
151495b7b453SJohn Marino    whose context is equivalent to CONTEXT.
151595b7b453SJohn Marino    Return the pointer to the state, if we found it in the DFA.
151695b7b453SJohn Marino    Otherwise create the new one and return it.  In case of an error
151795b7b453SJohn Marino    return NULL and set the error code in ERR.
151895b7b453SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
151995b7b453SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
152095b7b453SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
152195b7b453SJohn Marino 	   optimization.  */
152295b7b453SJohn Marino 
152395b7b453SJohn Marino static re_dfastate_t *
1524*09d4459fSDaniel Fojt __attribute_warn_unused_result__
re_acquire_state_context(reg_errcode_t * err,const re_dfa_t * dfa,const re_node_set * nodes,unsigned int context)152595b7b453SJohn Marino re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
152695b7b453SJohn Marino 			  const re_node_set *nodes, unsigned int context)
152795b7b453SJohn Marino {
152895b7b453SJohn Marino   re_hashval_t hash;
152995b7b453SJohn Marino   re_dfastate_t *new_state;
153095b7b453SJohn Marino   struct re_state_table_entry *spot;
153195b7b453SJohn Marino   Idx i;
1532*09d4459fSDaniel Fojt #if defined GCC_LINT || defined lint
153395b7b453SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
153495b7b453SJohn Marino   *err = REG_NOERROR;
153595b7b453SJohn Marino #endif
153695b7b453SJohn Marino   if (nodes->nelem == 0)
153795b7b453SJohn Marino     {
153895b7b453SJohn Marino       *err = REG_NOERROR;
153995b7b453SJohn Marino       return NULL;
154095b7b453SJohn Marino     }
154195b7b453SJohn Marino   hash = calc_state_hash (nodes, context);
154295b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
154395b7b453SJohn Marino 
154495b7b453SJohn Marino   for (i = 0 ; i < spot->num ; i++)
154595b7b453SJohn Marino     {
154695b7b453SJohn Marino       re_dfastate_t *state = spot->array[i];
154795b7b453SJohn Marino       if (state->hash == hash
154895b7b453SJohn Marino 	  && state->context == context
154995b7b453SJohn Marino 	  && re_node_set_compare (state->entrance_nodes, nodes))
155095b7b453SJohn Marino 	return state;
155195b7b453SJohn Marino     }
1552cf28ed85SJohn Marino   /* There are no appropriate state in 'dfa', create the new one.  */
155395b7b453SJohn Marino   new_state = create_cd_newstate (dfa, nodes, context, hash);
1554*09d4459fSDaniel Fojt   if (__glibc_unlikely (new_state == NULL))
155595b7b453SJohn Marino     *err = REG_ESPACE;
155695b7b453SJohn Marino 
155795b7b453SJohn Marino   return new_state;
155895b7b453SJohn Marino }
155995b7b453SJohn Marino 
156095b7b453SJohn Marino /* Finish initialization of the new state NEWSTATE, and using its hash value
156195b7b453SJohn Marino    HASH put in the appropriate bucket of DFA's state table.  Return value
156295b7b453SJohn Marino    indicates the error code if failed.  */
156395b7b453SJohn Marino 
156495b7b453SJohn Marino static reg_errcode_t
156595b7b453SJohn Marino __attribute_warn_unused_result__
register_state(const re_dfa_t * dfa,re_dfastate_t * newstate,re_hashval_t hash)156695b7b453SJohn Marino register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
156795b7b453SJohn Marino 		re_hashval_t hash)
156895b7b453SJohn Marino {
156995b7b453SJohn Marino   struct re_state_table_entry *spot;
157095b7b453SJohn Marino   reg_errcode_t err;
157195b7b453SJohn Marino   Idx i;
157295b7b453SJohn Marino 
157395b7b453SJohn Marino   newstate->hash = hash;
157495b7b453SJohn Marino   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1575*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
157695b7b453SJohn Marino     return REG_ESPACE;
157795b7b453SJohn Marino   for (i = 0; i < newstate->nodes.nelem; i++)
157895b7b453SJohn Marino     {
157995b7b453SJohn Marino       Idx elem = newstate->nodes.elems[i];
158095b7b453SJohn Marino       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1581cf28ed85SJohn Marino 	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
158295b7b453SJohn Marino 	  return REG_ESPACE;
158395b7b453SJohn Marino     }
158495b7b453SJohn Marino 
158595b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1586*09d4459fSDaniel Fojt   if (__glibc_unlikely (spot->alloc <= spot->num))
158795b7b453SJohn Marino     {
158895b7b453SJohn Marino       Idx new_alloc = 2 * spot->num + 2;
158995b7b453SJohn Marino       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
159095b7b453SJohn Marino 					      new_alloc);
1591*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
159295b7b453SJohn Marino 	return REG_ESPACE;
159395b7b453SJohn Marino       spot->array = new_array;
159495b7b453SJohn Marino       spot->alloc = new_alloc;
159595b7b453SJohn Marino     }
159695b7b453SJohn Marino   spot->array[spot->num++] = newstate;
159795b7b453SJohn Marino   return REG_NOERROR;
159895b7b453SJohn Marino }
159995b7b453SJohn Marino 
160095b7b453SJohn Marino static void
free_state(re_dfastate_t * state)160195b7b453SJohn Marino free_state (re_dfastate_t *state)
160295b7b453SJohn Marino {
160395b7b453SJohn Marino   re_node_set_free (&state->non_eps_nodes);
160495b7b453SJohn Marino   re_node_set_free (&state->inveclosure);
160595b7b453SJohn Marino   if (state->entrance_nodes != &state->nodes)
160695b7b453SJohn Marino     {
160795b7b453SJohn Marino       re_node_set_free (state->entrance_nodes);
160895b7b453SJohn Marino       re_free (state->entrance_nodes);
160995b7b453SJohn Marino     }
161095b7b453SJohn Marino   re_node_set_free (&state->nodes);
161195b7b453SJohn Marino   re_free (state->word_trtable);
161295b7b453SJohn Marino   re_free (state->trtable);
161395b7b453SJohn Marino   re_free (state);
161495b7b453SJohn Marino }
161595b7b453SJohn Marino 
1616cf28ed85SJohn Marino /* Create the new state which is independent of contexts.
161795b7b453SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
161895b7b453SJohn Marino 
161995b7b453SJohn Marino static re_dfastate_t *
1620*09d4459fSDaniel Fojt __attribute_warn_unused_result__
create_ci_newstate(const re_dfa_t * dfa,const re_node_set * nodes,re_hashval_t hash)162195b7b453SJohn Marino create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
162295b7b453SJohn Marino 		    re_hashval_t hash)
162395b7b453SJohn Marino {
162495b7b453SJohn Marino   Idx i;
162595b7b453SJohn Marino   reg_errcode_t err;
162695b7b453SJohn Marino   re_dfastate_t *newstate;
162795b7b453SJohn Marino 
162895b7b453SJohn Marino   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1629*09d4459fSDaniel Fojt   if (__glibc_unlikely (newstate == NULL))
163095b7b453SJohn Marino     return NULL;
163195b7b453SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
1632*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
163395b7b453SJohn Marino     {
163495b7b453SJohn Marino       re_free (newstate);
163595b7b453SJohn Marino       return NULL;
163695b7b453SJohn Marino     }
163795b7b453SJohn Marino 
163895b7b453SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
163995b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
164095b7b453SJohn Marino     {
164195b7b453SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
164295b7b453SJohn Marino       re_token_type_t type = node->type;
164395b7b453SJohn Marino       if (type == CHARACTER && !node->constraint)
164495b7b453SJohn Marino 	continue;
164595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
164695b7b453SJohn Marino       newstate->accept_mb |= node->accept_mb;
164795b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
164895b7b453SJohn Marino 
164995b7b453SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
165095b7b453SJohn Marino       if (type == END_OF_RE)
165195b7b453SJohn Marino 	newstate->halt = 1;
165295b7b453SJohn Marino       else if (type == OP_BACK_REF)
165395b7b453SJohn Marino 	newstate->has_backref = 1;
165495b7b453SJohn Marino       else if (type == ANCHOR || node->constraint)
165595b7b453SJohn Marino 	newstate->has_constraint = 1;
165695b7b453SJohn Marino     }
165795b7b453SJohn Marino   err = register_state (dfa, newstate, hash);
1658*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
165995b7b453SJohn Marino     {
166095b7b453SJohn Marino       free_state (newstate);
166195b7b453SJohn Marino       newstate = NULL;
166295b7b453SJohn Marino     }
166395b7b453SJohn Marino   return newstate;
166495b7b453SJohn Marino }
166595b7b453SJohn Marino 
166695b7b453SJohn Marino /* Create the new state which is depend on the context CONTEXT.
166795b7b453SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
166895b7b453SJohn Marino 
166995b7b453SJohn Marino static re_dfastate_t *
1670*09d4459fSDaniel Fojt __attribute_warn_unused_result__
create_cd_newstate(const re_dfa_t * dfa,const re_node_set * nodes,unsigned int context,re_hashval_t hash)167195b7b453SJohn Marino create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
167295b7b453SJohn Marino 		    unsigned int context, re_hashval_t hash)
167395b7b453SJohn Marino {
167495b7b453SJohn Marino   Idx i, nctx_nodes = 0;
167595b7b453SJohn Marino   reg_errcode_t err;
167695b7b453SJohn Marino   re_dfastate_t *newstate;
167795b7b453SJohn Marino 
167895b7b453SJohn Marino   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1679*09d4459fSDaniel Fojt   if (__glibc_unlikely (newstate == NULL))
168095b7b453SJohn Marino     return NULL;
168195b7b453SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
1682*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
168395b7b453SJohn Marino     {
168495b7b453SJohn Marino       re_free (newstate);
168595b7b453SJohn Marino       return NULL;
168695b7b453SJohn Marino     }
168795b7b453SJohn Marino 
168895b7b453SJohn Marino   newstate->context = context;
168995b7b453SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
169095b7b453SJohn Marino 
169195b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
169295b7b453SJohn Marino     {
169395b7b453SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
169495b7b453SJohn Marino       re_token_type_t type = node->type;
169595b7b453SJohn Marino       unsigned int constraint = node->constraint;
169695b7b453SJohn Marino 
169795b7b453SJohn Marino       if (type == CHARACTER && !constraint)
169895b7b453SJohn Marino 	continue;
169995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
170095b7b453SJohn Marino       newstate->accept_mb |= node->accept_mb;
170195b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
170295b7b453SJohn Marino 
170395b7b453SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
170495b7b453SJohn Marino       if (type == END_OF_RE)
170595b7b453SJohn Marino 	newstate->halt = 1;
170695b7b453SJohn Marino       else if (type == OP_BACK_REF)
170795b7b453SJohn Marino 	newstate->has_backref = 1;
170895b7b453SJohn Marino 
170995b7b453SJohn Marino       if (constraint)
171095b7b453SJohn Marino 	{
171195b7b453SJohn Marino 	  if (newstate->entrance_nodes == &newstate->nodes)
171295b7b453SJohn Marino 	    {
1713*09d4459fSDaniel Fojt 	      re_node_set *entrance_nodes = re_malloc (re_node_set, 1);
1714*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (entrance_nodes == NULL))
171595b7b453SJohn Marino 		{
171695b7b453SJohn Marino 		  free_state (newstate);
171795b7b453SJohn Marino 		  return NULL;
171895b7b453SJohn Marino 		}
1719*09d4459fSDaniel Fojt 	      newstate->entrance_nodes = entrance_nodes;
172095b7b453SJohn Marino 	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
172195b7b453SJohn Marino 		  != REG_NOERROR)
1722*09d4459fSDaniel Fojt 		{
1723*09d4459fSDaniel Fojt 		  free_state (newstate);
172495b7b453SJohn Marino 		  return NULL;
1725*09d4459fSDaniel Fojt 		}
172695b7b453SJohn Marino 	      nctx_nodes = 0;
172795b7b453SJohn Marino 	      newstate->has_constraint = 1;
172895b7b453SJohn Marino 	    }
172995b7b453SJohn Marino 
173095b7b453SJohn Marino 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
173195b7b453SJohn Marino 	    {
173295b7b453SJohn Marino 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
173395b7b453SJohn Marino 	      ++nctx_nodes;
173495b7b453SJohn Marino 	    }
173595b7b453SJohn Marino 	}
173695b7b453SJohn Marino     }
173795b7b453SJohn Marino   err = register_state (dfa, newstate, hash);
1738*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
173995b7b453SJohn Marino     {
174095b7b453SJohn Marino       free_state (newstate);
174195b7b453SJohn Marino       newstate = NULL;
174295b7b453SJohn Marino     }
174395b7b453SJohn Marino   return  newstate;
174495b7b453SJohn Marino }
1745