xref: /dragonfly/contrib/grep/lib/regex_internal.h (revision 09d4459f)
195b7b453SJohn Marino /* Extended regular expression matching and search library.
2*09d4459fSDaniel Fojt    Copyright (C) 2002-2020 Free Software Foundation, Inc.
395b7b453SJohn Marino    This file is part of the GNU C Library.
495b7b453SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
595b7b453SJohn Marino 
6680a9cb8SJohn Marino    The GNU C Library is free software; you can redistribute it and/or
7680a9cb8SJohn Marino    modify it under the terms of the GNU General Public
8680a9cb8SJohn Marino    License as published by the Free Software Foundation; either
9680a9cb8SJohn Marino    version 3 of the License, or (at your option) any later version.
1095b7b453SJohn Marino 
11680a9cb8SJohn Marino    The GNU C Library is distributed in the hope that it will be useful,
1295b7b453SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
13680a9cb8SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14680a9cb8SJohn Marino    General Public License for more details.
1595b7b453SJohn Marino 
16680a9cb8SJohn Marino    You should have received a copy of the GNU General Public
17680a9cb8SJohn Marino    License along with the GNU C Library; if not, see
18*09d4459fSDaniel Fojt    <https://www.gnu.org/licenses/>.  */
1995b7b453SJohn Marino 
2095b7b453SJohn Marino #ifndef _REGEX_INTERNAL_H
2195b7b453SJohn Marino #define _REGEX_INTERNAL_H 1
2295b7b453SJohn Marino 
2395b7b453SJohn Marino #include <ctype.h>
2495b7b453SJohn Marino #include <stdio.h>
2595b7b453SJohn Marino #include <stdlib.h>
2695b7b453SJohn Marino #include <string.h>
2795b7b453SJohn Marino 
2895b7b453SJohn Marino #include <langinfo.h>
2995b7b453SJohn Marino #include <locale.h>
3095b7b453SJohn Marino #include <wchar.h>
3195b7b453SJohn Marino #include <wctype.h>
32cf28ed85SJohn Marino #include <stdbool.h>
3395b7b453SJohn Marino #include <stdint.h>
34680a9cb8SJohn Marino 
35*09d4459fSDaniel Fojt #include <intprops.h>
36*09d4459fSDaniel Fojt #include <verify.h>
37*09d4459fSDaniel Fojt 
38*09d4459fSDaniel Fojt #if defined DEBUG && DEBUG != 0
39*09d4459fSDaniel Fojt # include <assert.h>
40*09d4459fSDaniel Fojt # define DEBUG_ASSERT(x) assert (x)
41*09d4459fSDaniel Fojt #else
42*09d4459fSDaniel Fojt # define DEBUG_ASSERT(x) assume (x)
43*09d4459fSDaniel Fojt #endif
44*09d4459fSDaniel Fojt 
45680a9cb8SJohn Marino #ifdef _LIBC
46dc7c36e4SJohn Marino # include <libc-lock.h>
47680a9cb8SJohn Marino # define lock_define(name) __libc_lock_define (, name)
48680a9cb8SJohn Marino # define lock_init(lock) (__libc_lock_init (lock), 0)
49*09d4459fSDaniel Fojt # define lock_fini(lock) ((void) 0)
50680a9cb8SJohn Marino # define lock_lock(lock) __libc_lock_lock (lock)
51680a9cb8SJohn Marino # define lock_unlock(lock) __libc_lock_unlock (lock)
52dc7c36e4SJohn Marino #elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO
53680a9cb8SJohn Marino # include "glthread/lock.h"
54680a9cb8SJohn Marino # define lock_define(name) gl_lock_define (, name)
55680a9cb8SJohn Marino # define lock_init(lock) glthread_lock_init (&(lock))
56680a9cb8SJohn Marino # define lock_fini(lock) glthread_lock_destroy (&(lock))
57680a9cb8SJohn Marino # define lock_lock(lock) glthread_lock_lock (&(lock))
58680a9cb8SJohn Marino # define lock_unlock(lock) glthread_lock_unlock (&(lock))
59dc7c36e4SJohn Marino #elif defined GNULIB_PTHREAD && !defined USE_UNLOCKED_IO
60680a9cb8SJohn Marino # include <pthread.h>
61680a9cb8SJohn Marino # define lock_define(name) pthread_mutex_t name;
62680a9cb8SJohn Marino # define lock_init(lock) pthread_mutex_init (&(lock), 0)
63680a9cb8SJohn Marino # define lock_fini(lock) pthread_mutex_destroy (&(lock))
64680a9cb8SJohn Marino # define lock_lock(lock) pthread_mutex_lock (&(lock))
65680a9cb8SJohn Marino # define lock_unlock(lock) pthread_mutex_unlock (&(lock))
66680a9cb8SJohn Marino #else
67680a9cb8SJohn Marino # define lock_define(name)
68680a9cb8SJohn Marino # define lock_init(lock) 0
69680a9cb8SJohn Marino # define lock_fini(lock) ((void) 0)
70680a9cb8SJohn Marino   /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC.  */
71680a9cb8SJohn Marino # define lock_lock(lock) ((void) dfa)
72680a9cb8SJohn Marino # define lock_unlock(lock) ((void) 0)
7395b7b453SJohn Marino #endif
7495b7b453SJohn Marino 
7595b7b453SJohn Marino /* In case that the system doesn't have isblank().  */
7695b7b453SJohn Marino #if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
7795b7b453SJohn Marino # define isblank(ch) ((ch) == ' ' || (ch) == '\t')
7895b7b453SJohn Marino #endif
7995b7b453SJohn Marino 
8095b7b453SJohn Marino #ifdef _LIBC
8195b7b453SJohn Marino # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
8295b7b453SJohn Marino #  define _RE_DEFINE_LOCALE_FUNCTIONS 1
8395b7b453SJohn Marino #   include <locale/localeinfo.h>
8495b7b453SJohn Marino #   include <locale/coll-lookup.h>
8595b7b453SJohn Marino # endif
8695b7b453SJohn Marino #endif
8795b7b453SJohn Marino 
8895b7b453SJohn Marino /* This is for other GNU distributions with internationalized messages.  */
8995b7b453SJohn Marino #if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
9095b7b453SJohn Marino # include <libintl.h>
9195b7b453SJohn Marino # ifdef _LIBC
9295b7b453SJohn Marino #  undef gettext
9395b7b453SJohn Marino #  define gettext(msgid) \
94680a9cb8SJohn Marino   __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES)
9595b7b453SJohn Marino # endif
9695b7b453SJohn Marino #else
97*09d4459fSDaniel Fojt # undef gettext
9895b7b453SJohn Marino # define gettext(msgid) (msgid)
9995b7b453SJohn Marino #endif
10095b7b453SJohn Marino 
10195b7b453SJohn Marino #ifndef gettext_noop
10295b7b453SJohn Marino /* This define is so xgettext can find the internationalizable
10395b7b453SJohn Marino    strings.  */
10495b7b453SJohn Marino # define gettext_noop(String) String
10595b7b453SJohn Marino #endif
10695b7b453SJohn Marino 
107680a9cb8SJohn Marino #if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC
10895b7b453SJohn Marino # define RE_ENABLE_I18N
10995b7b453SJohn Marino #endif
11095b7b453SJohn Marino 
11195b7b453SJohn Marino /* Number of ASCII characters.  */
11295b7b453SJohn Marino #define ASCII_CHARS 0x80
11395b7b453SJohn Marino 
11495b7b453SJohn Marino /* Number of single byte characters.  */
11595b7b453SJohn Marino #define SBC_MAX (UCHAR_MAX + 1)
11695b7b453SJohn Marino 
11795b7b453SJohn Marino #define COLL_ELEM_LEN_MAX 8
11895b7b453SJohn Marino 
11995b7b453SJohn Marino /* The character which represents newline.  */
12095b7b453SJohn Marino #define NEWLINE_CHAR '\n'
12195b7b453SJohn Marino #define WIDE_NEWLINE_CHAR L'\n'
12295b7b453SJohn Marino 
12395b7b453SJohn Marino /* Rename to standard API for using out of glibc.  */
12495b7b453SJohn Marino #ifndef _LIBC
125680a9cb8SJohn Marino # undef __wctype
126*09d4459fSDaniel Fojt # undef __iswalnum
127680a9cb8SJohn Marino # undef __iswctype
128*09d4459fSDaniel Fojt # undef __towlower
129*09d4459fSDaniel Fojt # undef __towupper
13095b7b453SJohn Marino # define __wctype wctype
131dc7c36e4SJohn Marino # define __iswalnum iswalnum
13295b7b453SJohn Marino # define __iswctype iswctype
133dc7c36e4SJohn Marino # define __towlower towlower
134dc7c36e4SJohn Marino # define __towupper towupper
13595b7b453SJohn Marino # define __btowc btowc
13695b7b453SJohn Marino # define __mbrtowc mbrtowc
137cf28ed85SJohn Marino # define __wcrtomb wcrtomb
13895b7b453SJohn Marino # define __regfree regfree
13995b7b453SJohn Marino #endif /* not _LIBC */
14095b7b453SJohn Marino 
141*09d4459fSDaniel Fojt #ifndef SSIZE_MAX
142*09d4459fSDaniel Fojt # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
14395b7b453SJohn Marino #endif
14495b7b453SJohn Marino 
145*09d4459fSDaniel Fojt /* The type of indexes into strings.  This is signed, not size_t,
146*09d4459fSDaniel Fojt    since the API requires indexes to fit in regoff_t anyway, and using
147*09d4459fSDaniel Fojt    signed integers makes the code a bit smaller and presumably faster.
148*09d4459fSDaniel Fojt    The traditional GNU regex implementation uses int for indexes.
149*09d4459fSDaniel Fojt    The POSIX-compatible implementation uses a possibly-wider type.
150*09d4459fSDaniel Fojt    The name 'Idx' is three letters to minimize the hassle of
151*09d4459fSDaniel Fojt    reindenting a lot of regex code that formerly used 'int'.  */
152*09d4459fSDaniel Fojt typedef regoff_t Idx;
153cf28ed85SJohn Marino #ifdef _REGEX_LARGE_OFFSETS
154*09d4459fSDaniel Fojt # define IDX_MAX SSIZE_MAX
155cf28ed85SJohn Marino #else
156cf28ed85SJohn Marino # define IDX_MAX INT_MAX
157cf28ed85SJohn Marino #endif
15895b7b453SJohn Marino 
15995b7b453SJohn Marino /* A hash value, suitable for computing hash tables.  */
16095b7b453SJohn Marino typedef __re_size_t re_hashval_t;
16195b7b453SJohn Marino 
16295b7b453SJohn Marino /* An integer used to represent a set of bits.  It must be unsigned,
16395b7b453SJohn Marino    and must be at least as wide as unsigned int.  */
16495b7b453SJohn Marino typedef unsigned long int bitset_word_t;
16595b7b453SJohn Marino /* All bits set in a bitset_word_t.  */
16695b7b453SJohn Marino #define BITSET_WORD_MAX ULONG_MAX
16795b7b453SJohn Marino 
16895b7b453SJohn Marino /* Number of bits in a bitset_word_t.  For portability to hosts with
16995b7b453SJohn Marino    padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
17095b7b453SJohn Marino    instead, deduce it directly from BITSET_WORD_MAX.  Avoid
17195b7b453SJohn Marino    greater-than-32-bit integers and unconditional shifts by more than
17295b7b453SJohn Marino    31 bits, as they're not portable.  */
17395b7b453SJohn Marino #if BITSET_WORD_MAX == 0xffffffffUL
17495b7b453SJohn Marino # define BITSET_WORD_BITS 32
17595b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 4 == 1
17695b7b453SJohn Marino # define BITSET_WORD_BITS 36
17795b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 16 == 1
17895b7b453SJohn Marino # define BITSET_WORD_BITS 48
17995b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 28 == 1
18095b7b453SJohn Marino # define BITSET_WORD_BITS 60
18195b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
18295b7b453SJohn Marino # define BITSET_WORD_BITS 64
18395b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
18495b7b453SJohn Marino # define BITSET_WORD_BITS 72
18595b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
18695b7b453SJohn Marino # define BITSET_WORD_BITS 128
18795b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
18895b7b453SJohn Marino # define BITSET_WORD_BITS 256
18995b7b453SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
19095b7b453SJohn Marino # define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
19195b7b453SJohn Marino # if BITSET_WORD_BITS <= SBC_MAX
19295b7b453SJohn Marino #  error "Invalid SBC_MAX"
19395b7b453SJohn Marino # endif
19495b7b453SJohn Marino #else
19595b7b453SJohn Marino # error "Add case for new bitset_word_t size"
19695b7b453SJohn Marino #endif
19795b7b453SJohn Marino 
19895b7b453SJohn Marino /* Number of bitset_word_t values in a bitset_t.  */
19995b7b453SJohn Marino #define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
20095b7b453SJohn Marino 
20195b7b453SJohn Marino typedef bitset_word_t bitset_t[BITSET_WORDS];
20295b7b453SJohn Marino typedef bitset_word_t *re_bitset_ptr_t;
20395b7b453SJohn Marino typedef const bitset_word_t *re_const_bitset_ptr_t;
20495b7b453SJohn Marino 
20595b7b453SJohn Marino #define PREV_WORD_CONSTRAINT 0x0001
20695b7b453SJohn Marino #define PREV_NOTWORD_CONSTRAINT 0x0002
20795b7b453SJohn Marino #define NEXT_WORD_CONSTRAINT 0x0004
20895b7b453SJohn Marino #define NEXT_NOTWORD_CONSTRAINT 0x0008
20995b7b453SJohn Marino #define PREV_NEWLINE_CONSTRAINT 0x0010
21095b7b453SJohn Marino #define NEXT_NEWLINE_CONSTRAINT 0x0020
21195b7b453SJohn Marino #define PREV_BEGBUF_CONSTRAINT 0x0040
21295b7b453SJohn Marino #define NEXT_ENDBUF_CONSTRAINT 0x0080
21395b7b453SJohn Marino #define WORD_DELIM_CONSTRAINT 0x0100
21495b7b453SJohn Marino #define NOT_WORD_DELIM_CONSTRAINT 0x0200
21595b7b453SJohn Marino 
21695b7b453SJohn Marino typedef enum
21795b7b453SJohn Marino {
21895b7b453SJohn Marino   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
21995b7b453SJohn Marino   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
22095b7b453SJohn Marino   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
22195b7b453SJohn Marino   INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
22295b7b453SJohn Marino   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
22395b7b453SJohn Marino   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
22495b7b453SJohn Marino   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
22595b7b453SJohn Marino   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
22695b7b453SJohn Marino   WORD_DELIM = WORD_DELIM_CONSTRAINT,
22795b7b453SJohn Marino   NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
22895b7b453SJohn Marino } re_context_type;
22995b7b453SJohn Marino 
23095b7b453SJohn Marino typedef struct
23195b7b453SJohn Marino {
23295b7b453SJohn Marino   Idx alloc;
23395b7b453SJohn Marino   Idx nelem;
23495b7b453SJohn Marino   Idx *elems;
23595b7b453SJohn Marino } re_node_set;
23695b7b453SJohn Marino 
23795b7b453SJohn Marino typedef enum
23895b7b453SJohn Marino {
23995b7b453SJohn Marino   NON_TYPE = 0,
24095b7b453SJohn Marino 
24195b7b453SJohn Marino   /* Node type, These are used by token, node, tree.  */
24295b7b453SJohn Marino   CHARACTER = 1,
24395b7b453SJohn Marino   END_OF_RE = 2,
24495b7b453SJohn Marino   SIMPLE_BRACKET = 3,
24595b7b453SJohn Marino   OP_BACK_REF = 4,
24695b7b453SJohn Marino   OP_PERIOD = 5,
24795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
24895b7b453SJohn Marino   COMPLEX_BRACKET = 6,
24995b7b453SJohn Marino   OP_UTF8_PERIOD = 7,
25095b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
25195b7b453SJohn Marino 
25295b7b453SJohn Marino   /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
25395b7b453SJohn Marino      when the debugger shows values of this enum type.  */
25495b7b453SJohn Marino #define EPSILON_BIT 8
25595b7b453SJohn Marino   OP_OPEN_SUBEXP = EPSILON_BIT | 0,
25695b7b453SJohn Marino   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
25795b7b453SJohn Marino   OP_ALT = EPSILON_BIT | 2,
25895b7b453SJohn Marino   OP_DUP_ASTERISK = EPSILON_BIT | 3,
25995b7b453SJohn Marino   ANCHOR = EPSILON_BIT | 4,
26095b7b453SJohn Marino 
26195b7b453SJohn Marino   /* Tree type, these are used only by tree. */
26295b7b453SJohn Marino   CONCAT = 16,
26395b7b453SJohn Marino   SUBEXP = 17,
26495b7b453SJohn Marino 
26595b7b453SJohn Marino   /* Token type, these are used only by token.  */
26695b7b453SJohn Marino   OP_DUP_PLUS = 18,
26795b7b453SJohn Marino   OP_DUP_QUESTION,
26895b7b453SJohn Marino   OP_OPEN_BRACKET,
26995b7b453SJohn Marino   OP_CLOSE_BRACKET,
27095b7b453SJohn Marino   OP_CHARSET_RANGE,
27195b7b453SJohn Marino   OP_OPEN_DUP_NUM,
27295b7b453SJohn Marino   OP_CLOSE_DUP_NUM,
27395b7b453SJohn Marino   OP_NON_MATCH_LIST,
27495b7b453SJohn Marino   OP_OPEN_COLL_ELEM,
27595b7b453SJohn Marino   OP_CLOSE_COLL_ELEM,
27695b7b453SJohn Marino   OP_OPEN_EQUIV_CLASS,
27795b7b453SJohn Marino   OP_CLOSE_EQUIV_CLASS,
27895b7b453SJohn Marino   OP_OPEN_CHAR_CLASS,
27995b7b453SJohn Marino   OP_CLOSE_CHAR_CLASS,
28095b7b453SJohn Marino   OP_WORD,
28195b7b453SJohn Marino   OP_NOTWORD,
28295b7b453SJohn Marino   OP_SPACE,
28395b7b453SJohn Marino   OP_NOTSPACE,
28495b7b453SJohn Marino   BACK_SLASH
28595b7b453SJohn Marino 
28695b7b453SJohn Marino } re_token_type_t;
28795b7b453SJohn Marino 
28895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
28995b7b453SJohn Marino typedef struct
29095b7b453SJohn Marino {
29195b7b453SJohn Marino   /* Multibyte characters.  */
29295b7b453SJohn Marino   wchar_t *mbchars;
29395b7b453SJohn Marino 
29495b7b453SJohn Marino   /* Collating symbols.  */
29595b7b453SJohn Marino # ifdef _LIBC
29695b7b453SJohn Marino   int32_t *coll_syms;
29795b7b453SJohn Marino # endif
29895b7b453SJohn Marino 
29995b7b453SJohn Marino   /* Equivalence classes. */
30095b7b453SJohn Marino # ifdef _LIBC
30195b7b453SJohn Marino   int32_t *equiv_classes;
30295b7b453SJohn Marino # endif
30395b7b453SJohn Marino 
30495b7b453SJohn Marino   /* Range expressions. */
30595b7b453SJohn Marino # ifdef _LIBC
30695b7b453SJohn Marino   uint32_t *range_starts;
30795b7b453SJohn Marino   uint32_t *range_ends;
30895b7b453SJohn Marino # else /* not _LIBC */
30995b7b453SJohn Marino   wchar_t *range_starts;
31095b7b453SJohn Marino   wchar_t *range_ends;
31195b7b453SJohn Marino # endif /* not _LIBC */
31295b7b453SJohn Marino 
31395b7b453SJohn Marino   /* Character classes. */
31495b7b453SJohn Marino   wctype_t *char_classes;
31595b7b453SJohn Marino 
31695b7b453SJohn Marino   /* If this character set is the non-matching list.  */
31795b7b453SJohn Marino   unsigned int non_match : 1;
31895b7b453SJohn Marino 
31995b7b453SJohn Marino   /* # of multibyte characters.  */
32095b7b453SJohn Marino   Idx nmbchars;
32195b7b453SJohn Marino 
32295b7b453SJohn Marino   /* # of collating symbols.  */
32395b7b453SJohn Marino   Idx ncoll_syms;
32495b7b453SJohn Marino 
32595b7b453SJohn Marino   /* # of equivalence classes. */
32695b7b453SJohn Marino   Idx nequiv_classes;
32795b7b453SJohn Marino 
32895b7b453SJohn Marino   /* # of range expressions. */
32995b7b453SJohn Marino   Idx nranges;
33095b7b453SJohn Marino 
33195b7b453SJohn Marino   /* # of character classes. */
33295b7b453SJohn Marino   Idx nchar_classes;
33395b7b453SJohn Marino } re_charset_t;
33495b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
33595b7b453SJohn Marino 
33695b7b453SJohn Marino typedef struct
33795b7b453SJohn Marino {
33895b7b453SJohn Marino   union
33995b7b453SJohn Marino   {
34095b7b453SJohn Marino     unsigned char c;		/* for CHARACTER */
34195b7b453SJohn Marino     re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
34295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
34395b7b453SJohn Marino     re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
34495b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
34595b7b453SJohn Marino     Idx idx;			/* for BACK_REF */
34695b7b453SJohn Marino     re_context_type ctx_type;	/* for ANCHOR */
34795b7b453SJohn Marino   } opr;
348cf28ed85SJohn Marino #if __GNUC__ >= 2 && !defined __STRICT_ANSI__
34995b7b453SJohn Marino   re_token_type_t type : 8;
35095b7b453SJohn Marino #else
35195b7b453SJohn Marino   re_token_type_t type;
35295b7b453SJohn Marino #endif
35395b7b453SJohn Marino   unsigned int constraint : 10;	/* context constraint */
35495b7b453SJohn Marino   unsigned int duplicated : 1;
35595b7b453SJohn Marino   unsigned int opt_subexp : 1;
35695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
35795b7b453SJohn Marino   unsigned int accept_mb : 1;
35895b7b453SJohn Marino   /* These 2 bits can be moved into the union if needed (e.g. if running out
35995b7b453SJohn Marino      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
36095b7b453SJohn Marino   unsigned int mb_partial : 1;
36195b7b453SJohn Marino #endif
36295b7b453SJohn Marino   unsigned int word_char : 1;
36395b7b453SJohn Marino } re_token_t;
36495b7b453SJohn Marino 
36595b7b453SJohn Marino #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
36695b7b453SJohn Marino 
36795b7b453SJohn Marino struct re_string_t
36895b7b453SJohn Marino {
36995b7b453SJohn Marino   /* Indicate the raw buffer which is the original string passed as an
37095b7b453SJohn Marino      argument of regexec(), re_search(), etc..  */
37195b7b453SJohn Marino   const unsigned char *raw_mbs;
37295b7b453SJohn Marino   /* Store the multibyte string.  In case of "case insensitive mode" like
37395b7b453SJohn Marino      REG_ICASE, upper cases of the string are stored, otherwise MBS points
37495b7b453SJohn Marino      the same address that RAW_MBS points.  */
37595b7b453SJohn Marino   unsigned char *mbs;
37695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
37795b7b453SJohn Marino   /* Store the wide character string which is corresponding to MBS.  */
37895b7b453SJohn Marino   wint_t *wcs;
37995b7b453SJohn Marino   Idx *offsets;
38095b7b453SJohn Marino   mbstate_t cur_state;
38195b7b453SJohn Marino #endif
38295b7b453SJohn Marino   /* Index in RAW_MBS.  Each character mbs[i] corresponds to
38395b7b453SJohn Marino      raw_mbs[raw_mbs_idx + i].  */
38495b7b453SJohn Marino   Idx raw_mbs_idx;
38595b7b453SJohn Marino   /* The length of the valid characters in the buffers.  */
38695b7b453SJohn Marino   Idx valid_len;
38795b7b453SJohn Marino   /* The corresponding number of bytes in raw_mbs array.  */
38895b7b453SJohn Marino   Idx valid_raw_len;
38995b7b453SJohn Marino   /* The length of the buffers MBS and WCS.  */
39095b7b453SJohn Marino   Idx bufs_len;
39195b7b453SJohn Marino   /* The index in MBS, which is updated by re_string_fetch_byte.  */
39295b7b453SJohn Marino   Idx cur_idx;
39395b7b453SJohn Marino   /* length of RAW_MBS array.  */
39495b7b453SJohn Marino   Idx raw_len;
39595b7b453SJohn Marino   /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
39695b7b453SJohn Marino   Idx len;
39795b7b453SJohn Marino   /* End of the buffer may be shorter than its length in the cases such
39895b7b453SJohn Marino      as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
39995b7b453SJohn Marino      instead of LEN.  */
40095b7b453SJohn Marino   Idx raw_stop;
40195b7b453SJohn Marino   /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
40295b7b453SJohn Marino   Idx stop;
40395b7b453SJohn Marino 
40495b7b453SJohn Marino   /* The context of mbs[0].  We store the context independently, since
40595b7b453SJohn Marino      the context of mbs[0] may be different from raw_mbs[0], which is
40695b7b453SJohn Marino      the beginning of the input string.  */
40795b7b453SJohn Marino   unsigned int tip_context;
40895b7b453SJohn Marino   /* The translation passed as a part of an argument of re_compile_pattern.  */
40995b7b453SJohn Marino   RE_TRANSLATE_TYPE trans;
41095b7b453SJohn Marino   /* Copy of re_dfa_t's word_char.  */
41195b7b453SJohn Marino   re_const_bitset_ptr_t word_char;
41295b7b453SJohn Marino   /* true if REG_ICASE.  */
41395b7b453SJohn Marino   unsigned char icase;
41495b7b453SJohn Marino   unsigned char is_utf8;
41595b7b453SJohn Marino   unsigned char map_notascii;
41695b7b453SJohn Marino   unsigned char mbs_allocated;
41795b7b453SJohn Marino   unsigned char offsets_needed;
41895b7b453SJohn Marino   unsigned char newline_anchor;
41995b7b453SJohn Marino   unsigned char word_ops_used;
42095b7b453SJohn Marino   int mb_cur_max;
42195b7b453SJohn Marino };
42295b7b453SJohn Marino typedef struct re_string_t re_string_t;
42395b7b453SJohn Marino 
42495b7b453SJohn Marino 
42595b7b453SJohn Marino struct re_dfa_t;
42695b7b453SJohn Marino typedef struct re_dfa_t re_dfa_t;
42795b7b453SJohn Marino 
42895b7b453SJohn Marino #ifndef _LIBC
429dc7c36e4SJohn Marino # define IS_IN(libc) false
43095b7b453SJohn Marino #endif
43195b7b453SJohn Marino 
43295b7b453SJohn Marino #define re_string_peek_byte(pstr, offset) \
43395b7b453SJohn Marino   ((pstr)->mbs[(pstr)->cur_idx + offset])
43495b7b453SJohn Marino #define re_string_fetch_byte(pstr) \
43595b7b453SJohn Marino   ((pstr)->mbs[(pstr)->cur_idx++])
43695b7b453SJohn Marino #define re_string_first_byte(pstr, idx) \
43795b7b453SJohn Marino   ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
43895b7b453SJohn Marino #define re_string_is_single_byte_char(pstr, idx) \
43995b7b453SJohn Marino   ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
44095b7b453SJohn Marino 				|| (pstr)->wcs[(idx) + 1] != WEOF))
44195b7b453SJohn Marino #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
44295b7b453SJohn Marino #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
44395b7b453SJohn Marino #define re_string_get_buffer(pstr) ((pstr)->mbs)
44495b7b453SJohn Marino #define re_string_length(pstr) ((pstr)->len)
44595b7b453SJohn Marino #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
44695b7b453SJohn Marino #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
44795b7b453SJohn Marino #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
44895b7b453SJohn Marino 
449680a9cb8SJohn Marino #if defined _LIBC || HAVE_ALLOCA
45095b7b453SJohn Marino # include <alloca.h>
451680a9cb8SJohn Marino #endif
45295b7b453SJohn Marino 
45395b7b453SJohn Marino #ifndef _LIBC
45495b7b453SJohn Marino # if HAVE_ALLOCA
45595b7b453SJohn Marino /* The OS usually guarantees only one guard page at the bottom of the stack,
45695b7b453SJohn Marino    and a page size can be as small as 4096 bytes.  So we cannot safely
45795b7b453SJohn Marino    allocate anything larger than 4096 bytes.  Also care for the possibility
45895b7b453SJohn Marino    of a few compiler-allocated temporary stack slots.  */
45995b7b453SJohn Marino #  define __libc_use_alloca(n) ((n) < 4032)
46095b7b453SJohn Marino # else
46195b7b453SJohn Marino /* alloca is implemented with malloc, so just use malloc.  */
46295b7b453SJohn Marino #  define __libc_use_alloca(n) 0
46395b7b453SJohn Marino #  undef alloca
46495b7b453SJohn Marino #  define alloca(n) malloc (n)
46595b7b453SJohn Marino # endif
46695b7b453SJohn Marino #endif
46795b7b453SJohn Marino 
468680a9cb8SJohn Marino #ifdef _LIBC
469680a9cb8SJohn Marino # define MALLOC_0_IS_NONNULL 1
470680a9cb8SJohn Marino #elif !defined MALLOC_0_IS_NONNULL
471680a9cb8SJohn Marino # define MALLOC_0_IS_NONNULL 0
472680a9cb8SJohn Marino #endif
473680a9cb8SJohn Marino 
47495b7b453SJohn Marino #ifndef MAX
47595b7b453SJohn Marino # define MAX(a,b) ((a) < (b) ? (b) : (a))
47695b7b453SJohn Marino #endif
477cf28ed85SJohn Marino #ifndef MIN
478cf28ed85SJohn Marino # define MIN(a,b) ((a) < (b) ? (a) : (b))
479cf28ed85SJohn Marino #endif
48095b7b453SJohn Marino 
48195b7b453SJohn Marino #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
48295b7b453SJohn Marino #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
48395b7b453SJohn Marino #define re_free(p) free (p)
48495b7b453SJohn Marino 
48595b7b453SJohn Marino struct bin_tree_t
48695b7b453SJohn Marino {
48795b7b453SJohn Marino   struct bin_tree_t *parent;
48895b7b453SJohn Marino   struct bin_tree_t *left;
48995b7b453SJohn Marino   struct bin_tree_t *right;
49095b7b453SJohn Marino   struct bin_tree_t *first;
49195b7b453SJohn Marino   struct bin_tree_t *next;
49295b7b453SJohn Marino 
49395b7b453SJohn Marino   re_token_t token;
49495b7b453SJohn Marino 
495cf28ed85SJohn Marino   /* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
496cf28ed85SJohn Marino      Otherwise 'type' indicate the type of this node.  */
49795b7b453SJohn Marino   Idx node_idx;
49895b7b453SJohn Marino };
49995b7b453SJohn Marino typedef struct bin_tree_t bin_tree_t;
50095b7b453SJohn Marino 
50195b7b453SJohn Marino #define BIN_TREE_STORAGE_SIZE \
50295b7b453SJohn Marino   ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
50395b7b453SJohn Marino 
50495b7b453SJohn Marino struct bin_tree_storage_t
50595b7b453SJohn Marino {
50695b7b453SJohn Marino   struct bin_tree_storage_t *next;
50795b7b453SJohn Marino   bin_tree_t data[BIN_TREE_STORAGE_SIZE];
50895b7b453SJohn Marino };
50995b7b453SJohn Marino typedef struct bin_tree_storage_t bin_tree_storage_t;
51095b7b453SJohn Marino 
51195b7b453SJohn Marino #define CONTEXT_WORD 1
51295b7b453SJohn Marino #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
51395b7b453SJohn Marino #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
51495b7b453SJohn Marino #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
51595b7b453SJohn Marino 
51695b7b453SJohn Marino #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
51795b7b453SJohn Marino #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
51895b7b453SJohn Marino #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
51995b7b453SJohn Marino #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
52095b7b453SJohn Marino #define IS_ORDINARY_CONTEXT(c) ((c) == 0)
52195b7b453SJohn Marino 
52295b7b453SJohn Marino #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
52395b7b453SJohn Marino #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
524dc7c36e4SJohn Marino #define IS_WIDE_WORD_CHAR(ch) (__iswalnum (ch) || (ch) == L'_')
52595b7b453SJohn Marino #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
52695b7b453SJohn Marino 
52795b7b453SJohn Marino #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
52895b7b453SJohn Marino  ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
52995b7b453SJohn Marino   || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
53095b7b453SJohn Marino   || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
53195b7b453SJohn Marino   || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
53295b7b453SJohn Marino 
53395b7b453SJohn Marino #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
53495b7b453SJohn Marino  ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
53595b7b453SJohn Marino   || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
53695b7b453SJohn Marino   || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
53795b7b453SJohn Marino   || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
53895b7b453SJohn Marino 
53995b7b453SJohn Marino struct re_dfastate_t
54095b7b453SJohn Marino {
54195b7b453SJohn Marino   re_hashval_t hash;
54295b7b453SJohn Marino   re_node_set nodes;
54395b7b453SJohn Marino   re_node_set non_eps_nodes;
54495b7b453SJohn Marino   re_node_set inveclosure;
54595b7b453SJohn Marino   re_node_set *entrance_nodes;
54695b7b453SJohn Marino   struct re_dfastate_t **trtable, **word_trtable;
54795b7b453SJohn Marino   unsigned int context : 4;
54895b7b453SJohn Marino   unsigned int halt : 1;
549cf28ed85SJohn Marino   /* If this state can accept "multi byte".
55095b7b453SJohn Marino      Note that we refer to multibyte characters, and multi character
551cf28ed85SJohn Marino      collating elements as "multi byte".  */
55295b7b453SJohn Marino   unsigned int accept_mb : 1;
55395b7b453SJohn Marino   /* If this state has backreference node(s).  */
55495b7b453SJohn Marino   unsigned int has_backref : 1;
55595b7b453SJohn Marino   unsigned int has_constraint : 1;
55695b7b453SJohn Marino };
55795b7b453SJohn Marino typedef struct re_dfastate_t re_dfastate_t;
55895b7b453SJohn Marino 
55995b7b453SJohn Marino struct re_state_table_entry
56095b7b453SJohn Marino {
56195b7b453SJohn Marino   Idx num;
56295b7b453SJohn Marino   Idx alloc;
56395b7b453SJohn Marino   re_dfastate_t **array;
56495b7b453SJohn Marino };
56595b7b453SJohn Marino 
56695b7b453SJohn Marino /* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
56795b7b453SJohn Marino 
56895b7b453SJohn Marino typedef struct
56995b7b453SJohn Marino {
57095b7b453SJohn Marino   Idx next_idx;
57195b7b453SJohn Marino   Idx alloc;
57295b7b453SJohn Marino   re_dfastate_t **array;
57395b7b453SJohn Marino } state_array_t;
57495b7b453SJohn Marino 
57595b7b453SJohn Marino /* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
57695b7b453SJohn Marino 
57795b7b453SJohn Marino typedef struct
57895b7b453SJohn Marino {
57995b7b453SJohn Marino   Idx node;
58095b7b453SJohn Marino   Idx str_idx; /* The position NODE match at.  */
58195b7b453SJohn Marino   state_array_t path;
58295b7b453SJohn Marino } re_sub_match_last_t;
58395b7b453SJohn Marino 
58495b7b453SJohn Marino /* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
58595b7b453SJohn Marino    And information about the node, whose type is OP_CLOSE_SUBEXP,
58695b7b453SJohn Marino    corresponding to NODE is stored in LASTS.  */
58795b7b453SJohn Marino 
58895b7b453SJohn Marino typedef struct
58995b7b453SJohn Marino {
59095b7b453SJohn Marino   Idx str_idx;
59195b7b453SJohn Marino   Idx node;
59295b7b453SJohn Marino   state_array_t *path;
59395b7b453SJohn Marino   Idx alasts; /* Allocation size of LASTS.  */
59495b7b453SJohn Marino   Idx nlasts; /* The number of LASTS.  */
59595b7b453SJohn Marino   re_sub_match_last_t **lasts;
59695b7b453SJohn Marino } re_sub_match_top_t;
59795b7b453SJohn Marino 
59895b7b453SJohn Marino struct re_backref_cache_entry
59995b7b453SJohn Marino {
60095b7b453SJohn Marino   Idx node;
60195b7b453SJohn Marino   Idx str_idx;
60295b7b453SJohn Marino   Idx subexp_from;
60395b7b453SJohn Marino   Idx subexp_to;
60495b7b453SJohn Marino   char more;
60595b7b453SJohn Marino   char unused;
60695b7b453SJohn Marino   unsigned short int eps_reachable_subexps_map;
60795b7b453SJohn Marino };
60895b7b453SJohn Marino 
60995b7b453SJohn Marino typedef struct
61095b7b453SJohn Marino {
61195b7b453SJohn Marino   /* The string object corresponding to the input string.  */
61295b7b453SJohn Marino   re_string_t input;
61395b7b453SJohn Marino   const re_dfa_t *const dfa;
61495b7b453SJohn Marino   /* EFLAGS of the argument of regexec.  */
61595b7b453SJohn Marino   int eflags;
61695b7b453SJohn Marino   /* Where the matching ends.  */
61795b7b453SJohn Marino   Idx match_last;
61895b7b453SJohn Marino   Idx last_node;
61995b7b453SJohn Marino   /* The state log used by the matcher.  */
62095b7b453SJohn Marino   re_dfastate_t **state_log;
62195b7b453SJohn Marino   Idx state_log_top;
62295b7b453SJohn Marino   /* Back reference cache.  */
62395b7b453SJohn Marino   Idx nbkref_ents;
62495b7b453SJohn Marino   Idx abkref_ents;
62595b7b453SJohn Marino   struct re_backref_cache_entry *bkref_ents;
62695b7b453SJohn Marino   int max_mb_elem_len;
62795b7b453SJohn Marino   Idx nsub_tops;
62895b7b453SJohn Marino   Idx asub_tops;
62995b7b453SJohn Marino   re_sub_match_top_t **sub_tops;
63095b7b453SJohn Marino } re_match_context_t;
63195b7b453SJohn Marino 
63295b7b453SJohn Marino typedef struct
63395b7b453SJohn Marino {
63495b7b453SJohn Marino   re_dfastate_t **sifted_states;
63595b7b453SJohn Marino   re_dfastate_t **limited_states;
63695b7b453SJohn Marino   Idx last_node;
63795b7b453SJohn Marino   Idx last_str_idx;
63895b7b453SJohn Marino   re_node_set limits;
63995b7b453SJohn Marino } re_sift_context_t;
64095b7b453SJohn Marino 
64195b7b453SJohn Marino struct re_fail_stack_ent_t
64295b7b453SJohn Marino {
64395b7b453SJohn Marino   Idx idx;
64495b7b453SJohn Marino   Idx node;
64595b7b453SJohn Marino   regmatch_t *regs;
64695b7b453SJohn Marino   re_node_set eps_via_nodes;
64795b7b453SJohn Marino };
64895b7b453SJohn Marino 
64995b7b453SJohn Marino struct re_fail_stack_t
65095b7b453SJohn Marino {
65195b7b453SJohn Marino   Idx num;
65295b7b453SJohn Marino   Idx alloc;
65395b7b453SJohn Marino   struct re_fail_stack_ent_t *stack;
65495b7b453SJohn Marino };
65595b7b453SJohn Marino 
65695b7b453SJohn Marino struct re_dfa_t
65795b7b453SJohn Marino {
65895b7b453SJohn Marino   re_token_t *nodes;
65995b7b453SJohn Marino   size_t nodes_alloc;
66095b7b453SJohn Marino   size_t nodes_len;
66195b7b453SJohn Marino   Idx *nexts;
66295b7b453SJohn Marino   Idx *org_indices;
66395b7b453SJohn Marino   re_node_set *edests;
66495b7b453SJohn Marino   re_node_set *eclosures;
66595b7b453SJohn Marino   re_node_set *inveclosures;
66695b7b453SJohn Marino   struct re_state_table_entry *state_table;
66795b7b453SJohn Marino   re_dfastate_t *init_state;
66895b7b453SJohn Marino   re_dfastate_t *init_state_word;
66995b7b453SJohn Marino   re_dfastate_t *init_state_nl;
67095b7b453SJohn Marino   re_dfastate_t *init_state_begbuf;
67195b7b453SJohn Marino   bin_tree_t *str_tree;
67295b7b453SJohn Marino   bin_tree_storage_t *str_tree_storage;
67395b7b453SJohn Marino   re_bitset_ptr_t sb_char;
67495b7b453SJohn Marino   int str_tree_storage_idx;
67595b7b453SJohn Marino 
676cf28ed85SJohn Marino   /* number of subexpressions 're_nsub' is in regex_t.  */
67795b7b453SJohn Marino   re_hashval_t state_hash_mask;
67895b7b453SJohn Marino   Idx init_node;
67995b7b453SJohn Marino   Idx nbackref; /* The number of backreference in this dfa.  */
68095b7b453SJohn Marino 
68195b7b453SJohn Marino   /* Bitmap expressing which backreference is used.  */
68295b7b453SJohn Marino   bitset_word_t used_bkref_map;
68395b7b453SJohn Marino   bitset_word_t completed_bkref_map;
68495b7b453SJohn Marino 
68595b7b453SJohn Marino   unsigned int has_plural_match : 1;
68695b7b453SJohn Marino   /* If this dfa has "multibyte node", which is a backreference or
68795b7b453SJohn Marino      a node which can accept multibyte character or multi character
68895b7b453SJohn Marino      collating element.  */
68995b7b453SJohn Marino   unsigned int has_mb_node : 1;
69095b7b453SJohn Marino   unsigned int is_utf8 : 1;
69195b7b453SJohn Marino   unsigned int map_notascii : 1;
69295b7b453SJohn Marino   unsigned int word_ops_used : 1;
69395b7b453SJohn Marino   int mb_cur_max;
69495b7b453SJohn Marino   bitset_t word_char;
69595b7b453SJohn Marino   reg_syntax_t syntax;
69695b7b453SJohn Marino   Idx *subexp_map;
69795b7b453SJohn Marino #ifdef DEBUG
69895b7b453SJohn Marino   char* re_str;
69995b7b453SJohn Marino #endif
700680a9cb8SJohn Marino   lock_define (lock)
70195b7b453SJohn Marino };
70295b7b453SJohn Marino 
70395b7b453SJohn Marino #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
70495b7b453SJohn Marino #define re_node_set_remove(set,id) \
70595b7b453SJohn Marino   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
70695b7b453SJohn Marino #define re_node_set_empty(p) ((p)->nelem = 0)
70795b7b453SJohn Marino #define re_node_set_free(set) re_free ((set)->elems)
70895b7b453SJohn Marino 
70995b7b453SJohn Marino 
71095b7b453SJohn Marino typedef enum
71195b7b453SJohn Marino {
71295b7b453SJohn Marino   SB_CHAR,
71395b7b453SJohn Marino   MB_CHAR,
71495b7b453SJohn Marino   EQUIV_CLASS,
71595b7b453SJohn Marino   COLL_SYM,
71695b7b453SJohn Marino   CHAR_CLASS
71795b7b453SJohn Marino } bracket_elem_type;
71895b7b453SJohn Marino 
71995b7b453SJohn Marino typedef struct
72095b7b453SJohn Marino {
72195b7b453SJohn Marino   bracket_elem_type type;
72295b7b453SJohn Marino   union
72395b7b453SJohn Marino   {
72495b7b453SJohn Marino     unsigned char ch;
72595b7b453SJohn Marino     unsigned char *name;
72695b7b453SJohn Marino     wchar_t wch;
72795b7b453SJohn Marino   } opr;
72895b7b453SJohn Marino } bracket_elem_t;
72995b7b453SJohn Marino 
73095b7b453SJohn Marino 
731680a9cb8SJohn Marino /* Functions for bitset_t operation.  */
73295b7b453SJohn Marino 
733*09d4459fSDaniel Fojt static inline void
bitset_set(bitset_t set,Idx i)73495b7b453SJohn Marino bitset_set (bitset_t set, Idx i)
73595b7b453SJohn Marino {
73695b7b453SJohn Marino   set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
73795b7b453SJohn Marino }
73895b7b453SJohn Marino 
739*09d4459fSDaniel Fojt static inline void
bitset_clear(bitset_t set,Idx i)74095b7b453SJohn Marino bitset_clear (bitset_t set, Idx i)
74195b7b453SJohn Marino {
74295b7b453SJohn Marino   set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
74395b7b453SJohn Marino }
74495b7b453SJohn Marino 
745*09d4459fSDaniel Fojt static inline bool
bitset_contain(const bitset_t set,Idx i)74695b7b453SJohn Marino bitset_contain (const bitset_t set, Idx i)
74795b7b453SJohn Marino {
74895b7b453SJohn Marino   return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
74995b7b453SJohn Marino }
75095b7b453SJohn Marino 
751*09d4459fSDaniel Fojt static inline void
bitset_empty(bitset_t set)75295b7b453SJohn Marino bitset_empty (bitset_t set)
75395b7b453SJohn Marino {
75495b7b453SJohn Marino   memset (set, '\0', sizeof (bitset_t));
75595b7b453SJohn Marino }
75695b7b453SJohn Marino 
757*09d4459fSDaniel Fojt static inline void
bitset_set_all(bitset_t set)75895b7b453SJohn Marino bitset_set_all (bitset_t set)
75995b7b453SJohn Marino {
76095b7b453SJohn Marino   memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
76195b7b453SJohn Marino   if (SBC_MAX % BITSET_WORD_BITS != 0)
76295b7b453SJohn Marino     set[BITSET_WORDS - 1] =
76395b7b453SJohn Marino       ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
76495b7b453SJohn Marino }
76595b7b453SJohn Marino 
766*09d4459fSDaniel Fojt static inline void
bitset_copy(bitset_t dest,const bitset_t src)76795b7b453SJohn Marino bitset_copy (bitset_t dest, const bitset_t src)
76895b7b453SJohn Marino {
76995b7b453SJohn Marino   memcpy (dest, src, sizeof (bitset_t));
77095b7b453SJohn Marino }
77195b7b453SJohn Marino 
772*09d4459fSDaniel Fojt static inline void
bitset_not(bitset_t set)77395b7b453SJohn Marino bitset_not (bitset_t set)
77495b7b453SJohn Marino {
77595b7b453SJohn Marino   int bitset_i;
77695b7b453SJohn Marino   for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
77795b7b453SJohn Marino     set[bitset_i] = ~set[bitset_i];
77895b7b453SJohn Marino   if (SBC_MAX % BITSET_WORD_BITS != 0)
77995b7b453SJohn Marino     set[BITSET_WORDS - 1] =
78095b7b453SJohn Marino       ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
78195b7b453SJohn Marino        & ~set[BITSET_WORDS - 1]);
78295b7b453SJohn Marino }
78395b7b453SJohn Marino 
784*09d4459fSDaniel Fojt static inline void
bitset_merge(bitset_t dest,const bitset_t src)78595b7b453SJohn Marino bitset_merge (bitset_t dest, const bitset_t src)
78695b7b453SJohn Marino {
78795b7b453SJohn Marino   int bitset_i;
78895b7b453SJohn Marino   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
78995b7b453SJohn Marino     dest[bitset_i] |= src[bitset_i];
79095b7b453SJohn Marino }
79195b7b453SJohn Marino 
792*09d4459fSDaniel Fojt static inline void
bitset_mask(bitset_t dest,const bitset_t src)79395b7b453SJohn Marino bitset_mask (bitset_t dest, const bitset_t src)
79495b7b453SJohn Marino {
79595b7b453SJohn Marino   int bitset_i;
79695b7b453SJohn Marino   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
79795b7b453SJohn Marino     dest[bitset_i] &= src[bitset_i];
79895b7b453SJohn Marino }
79995b7b453SJohn Marino 
80095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
801680a9cb8SJohn Marino /* Functions for re_string.  */
802680a9cb8SJohn Marino static int
803*09d4459fSDaniel Fojt __attribute__ ((pure, unused))
re_string_char_size_at(const re_string_t * pstr,Idx idx)80495b7b453SJohn Marino re_string_char_size_at (const re_string_t *pstr, Idx idx)
80595b7b453SJohn Marino {
80695b7b453SJohn Marino   int byte_idx;
80795b7b453SJohn Marino   if (pstr->mb_cur_max == 1)
80895b7b453SJohn Marino     return 1;
80995b7b453SJohn Marino   for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
81095b7b453SJohn Marino     if (pstr->wcs[idx + byte_idx] != WEOF)
81195b7b453SJohn Marino       break;
81295b7b453SJohn Marino   return byte_idx;
81395b7b453SJohn Marino }
81495b7b453SJohn Marino 
815680a9cb8SJohn Marino static wint_t
816*09d4459fSDaniel Fojt __attribute__ ((pure, unused))
re_string_wchar_at(const re_string_t * pstr,Idx idx)81795b7b453SJohn Marino re_string_wchar_at (const re_string_t *pstr, Idx idx)
81895b7b453SJohn Marino {
81995b7b453SJohn Marino   if (pstr->mb_cur_max == 1)
82095b7b453SJohn Marino     return (wint_t) pstr->mbs[idx];
82195b7b453SJohn Marino   return (wint_t) pstr->wcs[idx];
82295b7b453SJohn Marino }
82395b7b453SJohn Marino 
824dc7c36e4SJohn Marino # ifdef _LIBC
825dc7c36e4SJohn Marino #  include <locale/weight.h>
826dc7c36e4SJohn Marino # endif
827dc7c36e4SJohn Marino 
82895b7b453SJohn Marino static int
829*09d4459fSDaniel Fojt __attribute__ ((pure, unused))
re_string_elem_size_at(const re_string_t * pstr,Idx idx)830680a9cb8SJohn Marino re_string_elem_size_at (const re_string_t *pstr, Idx idx)
83195b7b453SJohn Marino {
83295b7b453SJohn Marino # ifdef _LIBC
83395b7b453SJohn Marino   const unsigned char *p, *extra;
83495b7b453SJohn Marino   const int32_t *table, *indirect;
83595b7b453SJohn Marino   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
83695b7b453SJohn Marino 
83795b7b453SJohn Marino   if (nrules != 0)
83895b7b453SJohn Marino     {
83995b7b453SJohn Marino       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
84095b7b453SJohn Marino       extra = (const unsigned char *)
84195b7b453SJohn Marino 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
84295b7b453SJohn Marino       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
84395b7b453SJohn Marino 						_NL_COLLATE_INDIRECTMB);
84495b7b453SJohn Marino       p = pstr->mbs + idx;
845dc7c36e4SJohn Marino       findidx (table, indirect, extra, &p, pstr->len - idx);
84695b7b453SJohn Marino       return p - pstr->mbs - idx;
84795b7b453SJohn Marino     }
84895b7b453SJohn Marino   else
84995b7b453SJohn Marino # endif /* _LIBC */
85095b7b453SJohn Marino     return 1;
85195b7b453SJohn Marino }
85295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
85395b7b453SJohn Marino 
854*09d4459fSDaniel Fojt #ifndef FALLTHROUGH
855*09d4459fSDaniel Fojt # if __GNUC__ < 7
856*09d4459fSDaniel Fojt #  define FALLTHROUGH ((void) 0)
85795b7b453SJohn Marino # else
858*09d4459fSDaniel Fojt #  define FALLTHROUGH __attribute__ ((__fallthrough__))
85995b7b453SJohn Marino # endif
86095b7b453SJohn Marino #endif
86195b7b453SJohn Marino 
86295b7b453SJohn Marino #endif /*  _REGEX_INTERNAL_H */
863