144b87433SJohn Marino /* Extended regular expression matching and search library.
2*6ea1f93eSDaniel Fojt    Copyright (C) 2002-2018 Free Software Foundation, Inc.
344b87433SJohn Marino    This file is part of the GNU C Library.
444b87433SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
544b87433SJohn Marino 
64536c563SJohn Marino    The GNU C Library is free software; you can redistribute it and/or
74536c563SJohn Marino    modify it under the terms of the GNU General Public
84536c563SJohn Marino    License as published by the Free Software Foundation; either
94536c563SJohn Marino    version 3 of the License, or (at your option) any later version.
1044b87433SJohn Marino 
114536c563SJohn Marino    The GNU C Library is distributed in the hope that it will be useful,
1244b87433SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
134536c563SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
144536c563SJohn Marino    General Public License for more details.
1544b87433SJohn Marino 
164536c563SJohn Marino    You should have received a copy of the GNU General Public
174536c563SJohn Marino    License along with the GNU C Library; if not, see
18*6ea1f93eSDaniel Fojt    <https://www.gnu.org/licenses/>.  */
1944b87433SJohn Marino 
2044b87433SJohn Marino #ifndef _REGEX_INTERNAL_H
2144b87433SJohn Marino #define _REGEX_INTERNAL_H 1
2244b87433SJohn Marino 
2344b87433SJohn Marino #include <assert.h>
2444b87433SJohn Marino #include <ctype.h>
2544b87433SJohn Marino #include <stdio.h>
2644b87433SJohn Marino #include <stdlib.h>
2744b87433SJohn Marino #include <string.h>
2844b87433SJohn Marino 
2944b87433SJohn Marino #include <langinfo.h>
3044b87433SJohn Marino #include <locale.h>
3144b87433SJohn Marino #include <wchar.h>
3244b87433SJohn Marino #include <wctype.h>
334536c563SJohn Marino #include <stdbool.h>
3444b87433SJohn Marino #include <stdint.h>
35*6ea1f93eSDaniel Fojt 
36*6ea1f93eSDaniel Fojt #include <intprops.h>
37*6ea1f93eSDaniel Fojt 
38*6ea1f93eSDaniel Fojt #ifdef _LIBC
39*6ea1f93eSDaniel Fojt # include <libc-lock.h>
40*6ea1f93eSDaniel Fojt # define lock_define(name) __libc_lock_define (, name)
41*6ea1f93eSDaniel Fojt # define lock_init(lock) (__libc_lock_init (lock), 0)
42*6ea1f93eSDaniel Fojt # define lock_fini(lock) ((void) 0)
43*6ea1f93eSDaniel Fojt # define lock_lock(lock) __libc_lock_lock (lock)
44*6ea1f93eSDaniel Fojt # define lock_unlock(lock) __libc_lock_unlock (lock)
45*6ea1f93eSDaniel Fojt #elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO
46*6ea1f93eSDaniel Fojt # include "glthread/lock.h"
47*6ea1f93eSDaniel Fojt   /* Use gl_lock_define if empty macro arguments are known to work.
48*6ea1f93eSDaniel Fojt      Otherwise, fall back on less-portable substitutes.  */
49*6ea1f93eSDaniel Fojt # if ((defined __GNUC__ && !defined __STRICT_ANSI__) \
50*6ea1f93eSDaniel Fojt       || (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__))
51*6ea1f93eSDaniel Fojt #  define lock_define(name) gl_lock_define (, name)
52*6ea1f93eSDaniel Fojt # elif USE_POSIX_THREADS
53*6ea1f93eSDaniel Fojt #  define lock_define(name) pthread_mutex_t name;
54*6ea1f93eSDaniel Fojt # elif USE_PTH_THREADS
55*6ea1f93eSDaniel Fojt #  define lock_define(name) pth_mutex_t name;
56*6ea1f93eSDaniel Fojt # elif USE_SOLARIS_THREADS
57*6ea1f93eSDaniel Fojt #  define lock_define(name) mutex_t name;
58*6ea1f93eSDaniel Fojt # elif USE_WINDOWS_THREADS
59*6ea1f93eSDaniel Fojt #  define lock_define(name) gl_lock_t name;
6044b87433SJohn Marino # else
61*6ea1f93eSDaniel Fojt #  define lock_define(name)
62*6ea1f93eSDaniel Fojt # endif
63*6ea1f93eSDaniel Fojt # define lock_init(lock) glthread_lock_init (&(lock))
64*6ea1f93eSDaniel Fojt # define lock_fini(lock) glthread_lock_destroy (&(lock))
65*6ea1f93eSDaniel Fojt # define lock_lock(lock) glthread_lock_lock (&(lock))
66*6ea1f93eSDaniel Fojt # define lock_unlock(lock) glthread_lock_unlock (&(lock))
67*6ea1f93eSDaniel Fojt #elif defined GNULIB_PTHREAD && !defined USE_UNLOCKED_IO
68*6ea1f93eSDaniel Fojt # include <pthread.h>
69*6ea1f93eSDaniel Fojt # define lock_define(name) pthread_mutex_t name;
70*6ea1f93eSDaniel Fojt # define lock_init(lock) pthread_mutex_init (&(lock), 0)
71*6ea1f93eSDaniel Fojt # define lock_fini(lock) pthread_mutex_destroy (&(lock))
72*6ea1f93eSDaniel Fojt # define lock_lock(lock) pthread_mutex_lock (&(lock))
73*6ea1f93eSDaniel Fojt # define lock_unlock(lock) pthread_mutex_unlock (&(lock))
74*6ea1f93eSDaniel Fojt #else
75*6ea1f93eSDaniel Fojt # define lock_define(name)
76*6ea1f93eSDaniel Fojt # define lock_init(lock) 0
77*6ea1f93eSDaniel Fojt # define lock_fini(lock) ((void) 0)
78*6ea1f93eSDaniel Fojt   /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC.  */
79*6ea1f93eSDaniel Fojt # define lock_lock(lock) ((void) dfa)
80*6ea1f93eSDaniel Fojt # define lock_unlock(lock) ((void) 0)
8144b87433SJohn Marino #endif
8244b87433SJohn Marino 
8344b87433SJohn Marino /* In case that the system doesn't have isblank().  */
8444b87433SJohn Marino #if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
8544b87433SJohn Marino # define isblank(ch) ((ch) == ' ' || (ch) == '\t')
8644b87433SJohn Marino #endif
8744b87433SJohn Marino 
8844b87433SJohn Marino #ifdef _LIBC
8944b87433SJohn Marino # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
9044b87433SJohn Marino #  define _RE_DEFINE_LOCALE_FUNCTIONS 1
9144b87433SJohn Marino #   include <locale/localeinfo.h>
9244b87433SJohn Marino #   include <locale/coll-lookup.h>
9344b87433SJohn Marino # endif
9444b87433SJohn Marino #endif
9544b87433SJohn Marino 
9644b87433SJohn Marino /* This is for other GNU distributions with internationalized messages.  */
9744b87433SJohn Marino #if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
9844b87433SJohn Marino # include <libintl.h>
9944b87433SJohn Marino # ifdef _LIBC
10044b87433SJohn Marino #  undef gettext
10144b87433SJohn Marino #  define gettext(msgid) \
1024536c563SJohn Marino   __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES)
10344b87433SJohn Marino # endif
10444b87433SJohn Marino #else
105*6ea1f93eSDaniel Fojt # undef gettext
10644b87433SJohn Marino # define gettext(msgid) (msgid)
10744b87433SJohn Marino #endif
10844b87433SJohn Marino 
10944b87433SJohn Marino #ifndef gettext_noop
11044b87433SJohn Marino /* This define is so xgettext can find the internationalizable
11144b87433SJohn Marino    strings.  */
11244b87433SJohn Marino # define gettext_noop(String) String
11344b87433SJohn Marino #endif
11444b87433SJohn Marino 
115*6ea1f93eSDaniel Fojt #if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC
11644b87433SJohn Marino # define RE_ENABLE_I18N
11744b87433SJohn Marino #endif
11844b87433SJohn Marino 
11944b87433SJohn Marino /* Number of ASCII characters.  */
12044b87433SJohn Marino #define ASCII_CHARS 0x80
12144b87433SJohn Marino 
12244b87433SJohn Marino /* Number of single byte characters.  */
12344b87433SJohn Marino #define SBC_MAX (UCHAR_MAX + 1)
12444b87433SJohn Marino 
12544b87433SJohn Marino #define COLL_ELEM_LEN_MAX 8
12644b87433SJohn Marino 
12744b87433SJohn Marino /* The character which represents newline.  */
12844b87433SJohn Marino #define NEWLINE_CHAR '\n'
12944b87433SJohn Marino #define WIDE_NEWLINE_CHAR L'\n'
13044b87433SJohn Marino 
13144b87433SJohn Marino /* Rename to standard API for using out of glibc.  */
13244b87433SJohn Marino #ifndef _LIBC
1334536c563SJohn Marino # undef __wctype
134*6ea1f93eSDaniel Fojt # undef __iswalnum
1354536c563SJohn Marino # undef __iswctype
136*6ea1f93eSDaniel Fojt # undef __towlower
137*6ea1f93eSDaniel Fojt # undef __towupper
13844b87433SJohn Marino # define __wctype wctype
139*6ea1f93eSDaniel Fojt # define __iswalnum iswalnum
14044b87433SJohn Marino # define __iswctype iswctype
141*6ea1f93eSDaniel Fojt # define __towlower towlower
142*6ea1f93eSDaniel Fojt # define __towupper towupper
14344b87433SJohn Marino # define __btowc btowc
14444b87433SJohn Marino # define __mbrtowc mbrtowc
1454536c563SJohn Marino # define __wcrtomb wcrtomb
14644b87433SJohn Marino # define __regfree regfree
14744b87433SJohn Marino # define attribute_hidden
14844b87433SJohn Marino #endif /* not _LIBC */
14944b87433SJohn Marino 
1504536c563SJohn Marino #if __GNUC__ < 3 + (__GNUC_MINOR__ < 1)
1514536c563SJohn Marino # define __attribute__(arg)
15244b87433SJohn Marino #endif
15344b87433SJohn Marino 
154*6ea1f93eSDaniel Fojt #ifndef SSIZE_MAX
155*6ea1f93eSDaniel Fojt # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
156*6ea1f93eSDaniel Fojt #endif
157*6ea1f93eSDaniel Fojt 
158*6ea1f93eSDaniel Fojt /* The type of indexes into strings.  This is signed, not size_t,
159*6ea1f93eSDaniel Fojt    since the API requires indexes to fit in regoff_t anyway, and using
160*6ea1f93eSDaniel Fojt    signed integers makes the code a bit smaller and presumably faster.
161*6ea1f93eSDaniel Fojt    The traditional GNU regex implementation uses int for indexes.
162*6ea1f93eSDaniel Fojt    The POSIX-compatible implementation uses a possibly-wider type.
163*6ea1f93eSDaniel Fojt    The name 'Idx' is three letters to minimize the hassle of
164*6ea1f93eSDaniel Fojt    reindenting a lot of regex code that formerly used 'int'.  */
165*6ea1f93eSDaniel Fojt typedef regoff_t Idx;
1664536c563SJohn Marino #ifdef _REGEX_LARGE_OFFSETS
167*6ea1f93eSDaniel Fojt # define IDX_MAX SSIZE_MAX
1684536c563SJohn Marino #else
1694536c563SJohn Marino # define IDX_MAX INT_MAX
1704536c563SJohn Marino #endif
17144b87433SJohn Marino 
17244b87433SJohn Marino /* A hash value, suitable for computing hash tables.  */
17344b87433SJohn Marino typedef __re_size_t re_hashval_t;
17444b87433SJohn Marino 
17544b87433SJohn Marino /* An integer used to represent a set of bits.  It must be unsigned,
17644b87433SJohn Marino    and must be at least as wide as unsigned int.  */
17744b87433SJohn Marino typedef unsigned long int bitset_word_t;
17844b87433SJohn Marino /* All bits set in a bitset_word_t.  */
17944b87433SJohn Marino #define BITSET_WORD_MAX ULONG_MAX
18044b87433SJohn Marino 
18144b87433SJohn Marino /* Number of bits in a bitset_word_t.  For portability to hosts with
18244b87433SJohn Marino    padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
18344b87433SJohn Marino    instead, deduce it directly from BITSET_WORD_MAX.  Avoid
18444b87433SJohn Marino    greater-than-32-bit integers and unconditional shifts by more than
18544b87433SJohn Marino    31 bits, as they're not portable.  */
18644b87433SJohn Marino #if BITSET_WORD_MAX == 0xffffffffUL
18744b87433SJohn Marino # define BITSET_WORD_BITS 32
18844b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 4 == 1
18944b87433SJohn Marino # define BITSET_WORD_BITS 36
19044b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 16 == 1
19144b87433SJohn Marino # define BITSET_WORD_BITS 48
19244b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 28 == 1
19344b87433SJohn Marino # define BITSET_WORD_BITS 60
19444b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
19544b87433SJohn Marino # define BITSET_WORD_BITS 64
19644b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
19744b87433SJohn Marino # define BITSET_WORD_BITS 72
19844b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
19944b87433SJohn Marino # define BITSET_WORD_BITS 128
20044b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
20144b87433SJohn Marino # define BITSET_WORD_BITS 256
20244b87433SJohn Marino #elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
20344b87433SJohn Marino # define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
20444b87433SJohn Marino # if BITSET_WORD_BITS <= SBC_MAX
20544b87433SJohn Marino #  error "Invalid SBC_MAX"
20644b87433SJohn Marino # endif
20744b87433SJohn Marino #else
20844b87433SJohn Marino # error "Add case for new bitset_word_t size"
20944b87433SJohn Marino #endif
21044b87433SJohn Marino 
21144b87433SJohn Marino /* Number of bitset_word_t values in a bitset_t.  */
21244b87433SJohn Marino #define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
21344b87433SJohn Marino 
21444b87433SJohn Marino typedef bitset_word_t bitset_t[BITSET_WORDS];
21544b87433SJohn Marino typedef bitset_word_t *re_bitset_ptr_t;
21644b87433SJohn Marino typedef const bitset_word_t *re_const_bitset_ptr_t;
21744b87433SJohn Marino 
21844b87433SJohn Marino #define PREV_WORD_CONSTRAINT 0x0001
21944b87433SJohn Marino #define PREV_NOTWORD_CONSTRAINT 0x0002
22044b87433SJohn Marino #define NEXT_WORD_CONSTRAINT 0x0004
22144b87433SJohn Marino #define NEXT_NOTWORD_CONSTRAINT 0x0008
22244b87433SJohn Marino #define PREV_NEWLINE_CONSTRAINT 0x0010
22344b87433SJohn Marino #define NEXT_NEWLINE_CONSTRAINT 0x0020
22444b87433SJohn Marino #define PREV_BEGBUF_CONSTRAINT 0x0040
22544b87433SJohn Marino #define NEXT_ENDBUF_CONSTRAINT 0x0080
22644b87433SJohn Marino #define WORD_DELIM_CONSTRAINT 0x0100
22744b87433SJohn Marino #define NOT_WORD_DELIM_CONSTRAINT 0x0200
22844b87433SJohn Marino 
22944b87433SJohn Marino typedef enum
23044b87433SJohn Marino {
23144b87433SJohn Marino   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
23244b87433SJohn Marino   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
23344b87433SJohn Marino   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
23444b87433SJohn Marino   INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
23544b87433SJohn Marino   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
23644b87433SJohn Marino   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
23744b87433SJohn Marino   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
23844b87433SJohn Marino   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
23944b87433SJohn Marino   WORD_DELIM = WORD_DELIM_CONSTRAINT,
24044b87433SJohn Marino   NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
24144b87433SJohn Marino } re_context_type;
24244b87433SJohn Marino 
24344b87433SJohn Marino typedef struct
24444b87433SJohn Marino {
24544b87433SJohn Marino   Idx alloc;
24644b87433SJohn Marino   Idx nelem;
24744b87433SJohn Marino   Idx *elems;
24844b87433SJohn Marino } re_node_set;
24944b87433SJohn Marino 
25044b87433SJohn Marino typedef enum
25144b87433SJohn Marino {
25244b87433SJohn Marino   NON_TYPE = 0,
25344b87433SJohn Marino 
25444b87433SJohn Marino   /* Node type, These are used by token, node, tree.  */
25544b87433SJohn Marino   CHARACTER = 1,
25644b87433SJohn Marino   END_OF_RE = 2,
25744b87433SJohn Marino   SIMPLE_BRACKET = 3,
25844b87433SJohn Marino   OP_BACK_REF = 4,
25944b87433SJohn Marino   OP_PERIOD = 5,
26044b87433SJohn Marino #ifdef RE_ENABLE_I18N
26144b87433SJohn Marino   COMPLEX_BRACKET = 6,
26244b87433SJohn Marino   OP_UTF8_PERIOD = 7,
26344b87433SJohn Marino #endif /* RE_ENABLE_I18N */
26444b87433SJohn Marino 
26544b87433SJohn Marino   /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
26644b87433SJohn Marino      when the debugger shows values of this enum type.  */
26744b87433SJohn Marino #define EPSILON_BIT 8
26844b87433SJohn Marino   OP_OPEN_SUBEXP = EPSILON_BIT | 0,
26944b87433SJohn Marino   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
27044b87433SJohn Marino   OP_ALT = EPSILON_BIT | 2,
27144b87433SJohn Marino   OP_DUP_ASTERISK = EPSILON_BIT | 3,
27244b87433SJohn Marino   ANCHOR = EPSILON_BIT | 4,
27344b87433SJohn Marino 
27444b87433SJohn Marino   /* Tree type, these are used only by tree. */
27544b87433SJohn Marino   CONCAT = 16,
27644b87433SJohn Marino   SUBEXP = 17,
27744b87433SJohn Marino 
27844b87433SJohn Marino   /* Token type, these are used only by token.  */
27944b87433SJohn Marino   OP_DUP_PLUS = 18,
28044b87433SJohn Marino   OP_DUP_QUESTION,
28144b87433SJohn Marino   OP_OPEN_BRACKET,
28244b87433SJohn Marino   OP_CLOSE_BRACKET,
28344b87433SJohn Marino   OP_CHARSET_RANGE,
28444b87433SJohn Marino   OP_OPEN_DUP_NUM,
28544b87433SJohn Marino   OP_CLOSE_DUP_NUM,
28644b87433SJohn Marino   OP_NON_MATCH_LIST,
28744b87433SJohn Marino   OP_OPEN_COLL_ELEM,
28844b87433SJohn Marino   OP_CLOSE_COLL_ELEM,
28944b87433SJohn Marino   OP_OPEN_EQUIV_CLASS,
29044b87433SJohn Marino   OP_CLOSE_EQUIV_CLASS,
29144b87433SJohn Marino   OP_OPEN_CHAR_CLASS,
29244b87433SJohn Marino   OP_CLOSE_CHAR_CLASS,
29344b87433SJohn Marino   OP_WORD,
29444b87433SJohn Marino   OP_NOTWORD,
29544b87433SJohn Marino   OP_SPACE,
29644b87433SJohn Marino   OP_NOTSPACE,
29744b87433SJohn Marino   BACK_SLASH
29844b87433SJohn Marino 
29944b87433SJohn Marino } re_token_type_t;
30044b87433SJohn Marino 
30144b87433SJohn Marino #ifdef RE_ENABLE_I18N
30244b87433SJohn Marino typedef struct
30344b87433SJohn Marino {
30444b87433SJohn Marino   /* Multibyte characters.  */
30544b87433SJohn Marino   wchar_t *mbchars;
30644b87433SJohn Marino 
30744b87433SJohn Marino   /* Collating symbols.  */
30844b87433SJohn Marino # ifdef _LIBC
30944b87433SJohn Marino   int32_t *coll_syms;
31044b87433SJohn Marino # endif
31144b87433SJohn Marino 
31244b87433SJohn Marino   /* Equivalence classes. */
31344b87433SJohn Marino # ifdef _LIBC
31444b87433SJohn Marino   int32_t *equiv_classes;
31544b87433SJohn Marino # endif
31644b87433SJohn Marino 
31744b87433SJohn Marino   /* Range expressions. */
31844b87433SJohn Marino # ifdef _LIBC
31944b87433SJohn Marino   uint32_t *range_starts;
32044b87433SJohn Marino   uint32_t *range_ends;
32144b87433SJohn Marino # else /* not _LIBC */
32244b87433SJohn Marino   wchar_t *range_starts;
32344b87433SJohn Marino   wchar_t *range_ends;
32444b87433SJohn Marino # endif /* not _LIBC */
32544b87433SJohn Marino 
32644b87433SJohn Marino   /* Character classes. */
32744b87433SJohn Marino   wctype_t *char_classes;
32844b87433SJohn Marino 
32944b87433SJohn Marino   /* If this character set is the non-matching list.  */
33044b87433SJohn Marino   unsigned int non_match : 1;
33144b87433SJohn Marino 
33244b87433SJohn Marino   /* # of multibyte characters.  */
33344b87433SJohn Marino   Idx nmbchars;
33444b87433SJohn Marino 
33544b87433SJohn Marino   /* # of collating symbols.  */
33644b87433SJohn Marino   Idx ncoll_syms;
33744b87433SJohn Marino 
33844b87433SJohn Marino   /* # of equivalence classes. */
33944b87433SJohn Marino   Idx nequiv_classes;
34044b87433SJohn Marino 
34144b87433SJohn Marino   /* # of range expressions. */
34244b87433SJohn Marino   Idx nranges;
34344b87433SJohn Marino 
34444b87433SJohn Marino   /* # of character classes. */
34544b87433SJohn Marino   Idx nchar_classes;
34644b87433SJohn Marino } re_charset_t;
34744b87433SJohn Marino #endif /* RE_ENABLE_I18N */
34844b87433SJohn Marino 
34944b87433SJohn Marino typedef struct
35044b87433SJohn Marino {
35144b87433SJohn Marino   union
35244b87433SJohn Marino   {
35344b87433SJohn Marino     unsigned char c;		/* for CHARACTER */
35444b87433SJohn Marino     re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
35544b87433SJohn Marino #ifdef RE_ENABLE_I18N
35644b87433SJohn Marino     re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
35744b87433SJohn Marino #endif /* RE_ENABLE_I18N */
35844b87433SJohn Marino     Idx idx;			/* for BACK_REF */
35944b87433SJohn Marino     re_context_type ctx_type;	/* for ANCHOR */
36044b87433SJohn Marino   } opr;
3614536c563SJohn Marino #if __GNUC__ >= 2 && !defined __STRICT_ANSI__
36244b87433SJohn Marino   re_token_type_t type : 8;
36344b87433SJohn Marino #else
36444b87433SJohn Marino   re_token_type_t type;
36544b87433SJohn Marino #endif
36644b87433SJohn Marino   unsigned int constraint : 10;	/* context constraint */
36744b87433SJohn Marino   unsigned int duplicated : 1;
36844b87433SJohn Marino   unsigned int opt_subexp : 1;
36944b87433SJohn Marino #ifdef RE_ENABLE_I18N
37044b87433SJohn Marino   unsigned int accept_mb : 1;
37144b87433SJohn Marino   /* These 2 bits can be moved into the union if needed (e.g. if running out
37244b87433SJohn Marino      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
37344b87433SJohn Marino   unsigned int mb_partial : 1;
37444b87433SJohn Marino #endif
37544b87433SJohn Marino   unsigned int word_char : 1;
37644b87433SJohn Marino } re_token_t;
37744b87433SJohn Marino 
37844b87433SJohn Marino #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
37944b87433SJohn Marino 
38044b87433SJohn Marino struct re_string_t
38144b87433SJohn Marino {
38244b87433SJohn Marino   /* Indicate the raw buffer which is the original string passed as an
38344b87433SJohn Marino      argument of regexec(), re_search(), etc..  */
38444b87433SJohn Marino   const unsigned char *raw_mbs;
38544b87433SJohn Marino   /* Store the multibyte string.  In case of "case insensitive mode" like
38644b87433SJohn Marino      REG_ICASE, upper cases of the string are stored, otherwise MBS points
38744b87433SJohn Marino      the same address that RAW_MBS points.  */
38844b87433SJohn Marino   unsigned char *mbs;
38944b87433SJohn Marino #ifdef RE_ENABLE_I18N
39044b87433SJohn Marino   /* Store the wide character string which is corresponding to MBS.  */
39144b87433SJohn Marino   wint_t *wcs;
39244b87433SJohn Marino   Idx *offsets;
39344b87433SJohn Marino   mbstate_t cur_state;
39444b87433SJohn Marino #endif
39544b87433SJohn Marino   /* Index in RAW_MBS.  Each character mbs[i] corresponds to
39644b87433SJohn Marino      raw_mbs[raw_mbs_idx + i].  */
39744b87433SJohn Marino   Idx raw_mbs_idx;
39844b87433SJohn Marino   /* The length of the valid characters in the buffers.  */
39944b87433SJohn Marino   Idx valid_len;
40044b87433SJohn Marino   /* The corresponding number of bytes in raw_mbs array.  */
40144b87433SJohn Marino   Idx valid_raw_len;
40244b87433SJohn Marino   /* The length of the buffers MBS and WCS.  */
40344b87433SJohn Marino   Idx bufs_len;
40444b87433SJohn Marino   /* The index in MBS, which is updated by re_string_fetch_byte.  */
40544b87433SJohn Marino   Idx cur_idx;
40644b87433SJohn Marino   /* length of RAW_MBS array.  */
40744b87433SJohn Marino   Idx raw_len;
40844b87433SJohn Marino   /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
40944b87433SJohn Marino   Idx len;
41044b87433SJohn Marino   /* End of the buffer may be shorter than its length in the cases such
41144b87433SJohn Marino      as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
41244b87433SJohn Marino      instead of LEN.  */
41344b87433SJohn Marino   Idx raw_stop;
41444b87433SJohn Marino   /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
41544b87433SJohn Marino   Idx stop;
41644b87433SJohn Marino 
41744b87433SJohn Marino   /* The context of mbs[0].  We store the context independently, since
41844b87433SJohn Marino      the context of mbs[0] may be different from raw_mbs[0], which is
41944b87433SJohn Marino      the beginning of the input string.  */
42044b87433SJohn Marino   unsigned int tip_context;
42144b87433SJohn Marino   /* The translation passed as a part of an argument of re_compile_pattern.  */
42244b87433SJohn Marino   RE_TRANSLATE_TYPE trans;
42344b87433SJohn Marino   /* Copy of re_dfa_t's word_char.  */
42444b87433SJohn Marino   re_const_bitset_ptr_t word_char;
42544b87433SJohn Marino   /* true if REG_ICASE.  */
42644b87433SJohn Marino   unsigned char icase;
42744b87433SJohn Marino   unsigned char is_utf8;
42844b87433SJohn Marino   unsigned char map_notascii;
42944b87433SJohn Marino   unsigned char mbs_allocated;
43044b87433SJohn Marino   unsigned char offsets_needed;
43144b87433SJohn Marino   unsigned char newline_anchor;
43244b87433SJohn Marino   unsigned char word_ops_used;
43344b87433SJohn Marino   int mb_cur_max;
43444b87433SJohn Marino };
43544b87433SJohn Marino typedef struct re_string_t re_string_t;
43644b87433SJohn Marino 
43744b87433SJohn Marino 
43844b87433SJohn Marino struct re_dfa_t;
43944b87433SJohn Marino typedef struct re_dfa_t re_dfa_t;
44044b87433SJohn Marino 
44144b87433SJohn Marino #ifndef _LIBC
442*6ea1f93eSDaniel Fojt # define IS_IN(libc) false
44344b87433SJohn Marino #endif
44444b87433SJohn Marino 
44544b87433SJohn Marino #define re_string_peek_byte(pstr, offset) \
44644b87433SJohn Marino   ((pstr)->mbs[(pstr)->cur_idx + offset])
44744b87433SJohn Marino #define re_string_fetch_byte(pstr) \
44844b87433SJohn Marino   ((pstr)->mbs[(pstr)->cur_idx++])
44944b87433SJohn Marino #define re_string_first_byte(pstr, idx) \
45044b87433SJohn Marino   ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
45144b87433SJohn Marino #define re_string_is_single_byte_char(pstr, idx) \
45244b87433SJohn Marino   ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
45344b87433SJohn Marino 				|| (pstr)->wcs[(idx) + 1] != WEOF))
45444b87433SJohn Marino #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
45544b87433SJohn Marino #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
45644b87433SJohn Marino #define re_string_get_buffer(pstr) ((pstr)->mbs)
45744b87433SJohn Marino #define re_string_length(pstr) ((pstr)->len)
45844b87433SJohn Marino #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
45944b87433SJohn Marino #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
46044b87433SJohn Marino #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
46144b87433SJohn Marino 
4624536c563SJohn Marino #if defined _LIBC || HAVE_ALLOCA
46344b87433SJohn Marino # include <alloca.h>
4644536c563SJohn Marino #endif
46544b87433SJohn Marino 
46644b87433SJohn Marino #ifndef _LIBC
46744b87433SJohn Marino # if HAVE_ALLOCA
46844b87433SJohn Marino /* The OS usually guarantees only one guard page at the bottom of the stack,
46944b87433SJohn Marino    and a page size can be as small as 4096 bytes.  So we cannot safely
47044b87433SJohn Marino    allocate anything larger than 4096 bytes.  Also care for the possibility
47144b87433SJohn Marino    of a few compiler-allocated temporary stack slots.  */
47244b87433SJohn Marino #  define __libc_use_alloca(n) ((n) < 4032)
47344b87433SJohn Marino # else
47444b87433SJohn Marino /* alloca is implemented with malloc, so just use malloc.  */
47544b87433SJohn Marino #  define __libc_use_alloca(n) 0
476008e37b6SJohn Marino #  undef alloca
477008e37b6SJohn Marino #  define alloca(n) malloc (n)
47844b87433SJohn Marino # endif
47944b87433SJohn Marino #endif
48044b87433SJohn Marino 
4814536c563SJohn Marino #ifdef _LIBC
4824536c563SJohn Marino # define MALLOC_0_IS_NONNULL 1
4834536c563SJohn Marino #elif !defined MALLOC_0_IS_NONNULL
4844536c563SJohn Marino # define MALLOC_0_IS_NONNULL 0
4854536c563SJohn Marino #endif
4864536c563SJohn Marino 
48744b87433SJohn Marino #ifndef MAX
48844b87433SJohn Marino # define MAX(a,b) ((a) < (b) ? (b) : (a))
48944b87433SJohn Marino #endif
4904536c563SJohn Marino #ifndef MIN
4914536c563SJohn Marino # define MIN(a,b) ((a) < (b) ? (a) : (b))
4924536c563SJohn Marino #endif
49344b87433SJohn Marino 
49444b87433SJohn Marino #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
49544b87433SJohn Marino #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
49644b87433SJohn Marino #define re_free(p) free (p)
49744b87433SJohn Marino 
49844b87433SJohn Marino struct bin_tree_t
49944b87433SJohn Marino {
50044b87433SJohn Marino   struct bin_tree_t *parent;
50144b87433SJohn Marino   struct bin_tree_t *left;
50244b87433SJohn Marino   struct bin_tree_t *right;
50344b87433SJohn Marino   struct bin_tree_t *first;
50444b87433SJohn Marino   struct bin_tree_t *next;
50544b87433SJohn Marino 
50644b87433SJohn Marino   re_token_t token;
50744b87433SJohn Marino 
5084536c563SJohn Marino   /* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
5094536c563SJohn Marino      Otherwise 'type' indicate the type of this node.  */
51044b87433SJohn Marino   Idx node_idx;
51144b87433SJohn Marino };
51244b87433SJohn Marino typedef struct bin_tree_t bin_tree_t;
51344b87433SJohn Marino 
51444b87433SJohn Marino #define BIN_TREE_STORAGE_SIZE \
51544b87433SJohn Marino   ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
51644b87433SJohn Marino 
51744b87433SJohn Marino struct bin_tree_storage_t
51844b87433SJohn Marino {
51944b87433SJohn Marino   struct bin_tree_storage_t *next;
52044b87433SJohn Marino   bin_tree_t data[BIN_TREE_STORAGE_SIZE];
52144b87433SJohn Marino };
52244b87433SJohn Marino typedef struct bin_tree_storage_t bin_tree_storage_t;
52344b87433SJohn Marino 
52444b87433SJohn Marino #define CONTEXT_WORD 1
52544b87433SJohn Marino #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
52644b87433SJohn Marino #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
52744b87433SJohn Marino #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
52844b87433SJohn Marino 
52944b87433SJohn Marino #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
53044b87433SJohn Marino #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
53144b87433SJohn Marino #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
53244b87433SJohn Marino #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
53344b87433SJohn Marino #define IS_ORDINARY_CONTEXT(c) ((c) == 0)
53444b87433SJohn Marino 
53544b87433SJohn Marino #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
53644b87433SJohn Marino #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
537*6ea1f93eSDaniel Fojt #define IS_WIDE_WORD_CHAR(ch) (__iswalnum (ch) || (ch) == L'_')
53844b87433SJohn Marino #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
53944b87433SJohn Marino 
54044b87433SJohn Marino #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
54144b87433SJohn Marino  ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
54244b87433SJohn Marino   || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
54344b87433SJohn Marino   || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
54444b87433SJohn Marino   || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
54544b87433SJohn Marino 
54644b87433SJohn Marino #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
54744b87433SJohn Marino  ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
54844b87433SJohn Marino   || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
54944b87433SJohn Marino   || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
55044b87433SJohn Marino   || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
55144b87433SJohn Marino 
55244b87433SJohn Marino struct re_dfastate_t
55344b87433SJohn Marino {
55444b87433SJohn Marino   re_hashval_t hash;
55544b87433SJohn Marino   re_node_set nodes;
55644b87433SJohn Marino   re_node_set non_eps_nodes;
55744b87433SJohn Marino   re_node_set inveclosure;
55844b87433SJohn Marino   re_node_set *entrance_nodes;
55944b87433SJohn Marino   struct re_dfastate_t **trtable, **word_trtable;
56044b87433SJohn Marino   unsigned int context : 4;
56144b87433SJohn Marino   unsigned int halt : 1;
5624536c563SJohn Marino   /* If this state can accept "multi byte".
56344b87433SJohn Marino      Note that we refer to multibyte characters, and multi character
5644536c563SJohn Marino      collating elements as "multi byte".  */
56544b87433SJohn Marino   unsigned int accept_mb : 1;
56644b87433SJohn Marino   /* If this state has backreference node(s).  */
56744b87433SJohn Marino   unsigned int has_backref : 1;
56844b87433SJohn Marino   unsigned int has_constraint : 1;
56944b87433SJohn Marino };
57044b87433SJohn Marino typedef struct re_dfastate_t re_dfastate_t;
57144b87433SJohn Marino 
57244b87433SJohn Marino struct re_state_table_entry
57344b87433SJohn Marino {
57444b87433SJohn Marino   Idx num;
57544b87433SJohn Marino   Idx alloc;
57644b87433SJohn Marino   re_dfastate_t **array;
57744b87433SJohn Marino };
57844b87433SJohn Marino 
57944b87433SJohn Marino /* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
58044b87433SJohn Marino 
58144b87433SJohn Marino typedef struct
58244b87433SJohn Marino {
58344b87433SJohn Marino   Idx next_idx;
58444b87433SJohn Marino   Idx alloc;
58544b87433SJohn Marino   re_dfastate_t **array;
58644b87433SJohn Marino } state_array_t;
58744b87433SJohn Marino 
58844b87433SJohn Marino /* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
58944b87433SJohn Marino 
59044b87433SJohn Marino typedef struct
59144b87433SJohn Marino {
59244b87433SJohn Marino   Idx node;
59344b87433SJohn Marino   Idx str_idx; /* The position NODE match at.  */
59444b87433SJohn Marino   state_array_t path;
59544b87433SJohn Marino } re_sub_match_last_t;
59644b87433SJohn Marino 
59744b87433SJohn Marino /* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
59844b87433SJohn Marino    And information about the node, whose type is OP_CLOSE_SUBEXP,
59944b87433SJohn Marino    corresponding to NODE is stored in LASTS.  */
60044b87433SJohn Marino 
60144b87433SJohn Marino typedef struct
60244b87433SJohn Marino {
60344b87433SJohn Marino   Idx str_idx;
60444b87433SJohn Marino   Idx node;
60544b87433SJohn Marino   state_array_t *path;
60644b87433SJohn Marino   Idx alasts; /* Allocation size of LASTS.  */
60744b87433SJohn Marino   Idx nlasts; /* The number of LASTS.  */
60844b87433SJohn Marino   re_sub_match_last_t **lasts;
60944b87433SJohn Marino } re_sub_match_top_t;
61044b87433SJohn Marino 
61144b87433SJohn Marino struct re_backref_cache_entry
61244b87433SJohn Marino {
61344b87433SJohn Marino   Idx node;
61444b87433SJohn Marino   Idx str_idx;
61544b87433SJohn Marino   Idx subexp_from;
61644b87433SJohn Marino   Idx subexp_to;
61744b87433SJohn Marino   char more;
61844b87433SJohn Marino   char unused;
61944b87433SJohn Marino   unsigned short int eps_reachable_subexps_map;
62044b87433SJohn Marino };
62144b87433SJohn Marino 
62244b87433SJohn Marino typedef struct
62344b87433SJohn Marino {
62444b87433SJohn Marino   /* The string object corresponding to the input string.  */
62544b87433SJohn Marino   re_string_t input;
62644b87433SJohn Marino #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
62744b87433SJohn Marino   const re_dfa_t *const dfa;
62844b87433SJohn Marino #else
62944b87433SJohn Marino   const re_dfa_t *dfa;
63044b87433SJohn Marino #endif
63144b87433SJohn Marino   /* EFLAGS of the argument of regexec.  */
63244b87433SJohn Marino   int eflags;
63344b87433SJohn Marino   /* Where the matching ends.  */
63444b87433SJohn Marino   Idx match_last;
63544b87433SJohn Marino   Idx last_node;
63644b87433SJohn Marino   /* The state log used by the matcher.  */
63744b87433SJohn Marino   re_dfastate_t **state_log;
63844b87433SJohn Marino   Idx state_log_top;
63944b87433SJohn Marino   /* Back reference cache.  */
64044b87433SJohn Marino   Idx nbkref_ents;
64144b87433SJohn Marino   Idx abkref_ents;
64244b87433SJohn Marino   struct re_backref_cache_entry *bkref_ents;
64344b87433SJohn Marino   int max_mb_elem_len;
64444b87433SJohn Marino   Idx nsub_tops;
64544b87433SJohn Marino   Idx asub_tops;
64644b87433SJohn Marino   re_sub_match_top_t **sub_tops;
64744b87433SJohn Marino } re_match_context_t;
64844b87433SJohn Marino 
64944b87433SJohn Marino typedef struct
65044b87433SJohn Marino {
65144b87433SJohn Marino   re_dfastate_t **sifted_states;
65244b87433SJohn Marino   re_dfastate_t **limited_states;
65344b87433SJohn Marino   Idx last_node;
65444b87433SJohn Marino   Idx last_str_idx;
65544b87433SJohn Marino   re_node_set limits;
65644b87433SJohn Marino } re_sift_context_t;
65744b87433SJohn Marino 
65844b87433SJohn Marino struct re_fail_stack_ent_t
65944b87433SJohn Marino {
66044b87433SJohn Marino   Idx idx;
66144b87433SJohn Marino   Idx node;
66244b87433SJohn Marino   regmatch_t *regs;
66344b87433SJohn Marino   re_node_set eps_via_nodes;
66444b87433SJohn Marino };
66544b87433SJohn Marino 
66644b87433SJohn Marino struct re_fail_stack_t
66744b87433SJohn Marino {
66844b87433SJohn Marino   Idx num;
66944b87433SJohn Marino   Idx alloc;
67044b87433SJohn Marino   struct re_fail_stack_ent_t *stack;
67144b87433SJohn Marino };
67244b87433SJohn Marino 
67344b87433SJohn Marino struct re_dfa_t
67444b87433SJohn Marino {
67544b87433SJohn Marino   re_token_t *nodes;
67644b87433SJohn Marino   size_t nodes_alloc;
67744b87433SJohn Marino   size_t nodes_len;
67844b87433SJohn Marino   Idx *nexts;
67944b87433SJohn Marino   Idx *org_indices;
68044b87433SJohn Marino   re_node_set *edests;
68144b87433SJohn Marino   re_node_set *eclosures;
68244b87433SJohn Marino   re_node_set *inveclosures;
68344b87433SJohn Marino   struct re_state_table_entry *state_table;
68444b87433SJohn Marino   re_dfastate_t *init_state;
68544b87433SJohn Marino   re_dfastate_t *init_state_word;
68644b87433SJohn Marino   re_dfastate_t *init_state_nl;
68744b87433SJohn Marino   re_dfastate_t *init_state_begbuf;
68844b87433SJohn Marino   bin_tree_t *str_tree;
68944b87433SJohn Marino   bin_tree_storage_t *str_tree_storage;
69044b87433SJohn Marino   re_bitset_ptr_t sb_char;
69144b87433SJohn Marino   int str_tree_storage_idx;
69244b87433SJohn Marino 
6934536c563SJohn Marino   /* number of subexpressions 're_nsub' is in regex_t.  */
69444b87433SJohn Marino   re_hashval_t state_hash_mask;
69544b87433SJohn Marino   Idx init_node;
69644b87433SJohn Marino   Idx nbackref; /* The number of backreference in this dfa.  */
69744b87433SJohn Marino 
69844b87433SJohn Marino   /* Bitmap expressing which backreference is used.  */
69944b87433SJohn Marino   bitset_word_t used_bkref_map;
70044b87433SJohn Marino   bitset_word_t completed_bkref_map;
70144b87433SJohn Marino 
70244b87433SJohn Marino   unsigned int has_plural_match : 1;
70344b87433SJohn Marino   /* If this dfa has "multibyte node", which is a backreference or
70444b87433SJohn Marino      a node which can accept multibyte character or multi character
70544b87433SJohn Marino      collating element.  */
70644b87433SJohn Marino   unsigned int has_mb_node : 1;
70744b87433SJohn Marino   unsigned int is_utf8 : 1;
70844b87433SJohn Marino   unsigned int map_notascii : 1;
70944b87433SJohn Marino   unsigned int word_ops_used : 1;
71044b87433SJohn Marino   int mb_cur_max;
71144b87433SJohn Marino   bitset_t word_char;
71244b87433SJohn Marino   reg_syntax_t syntax;
71344b87433SJohn Marino   Idx *subexp_map;
71444b87433SJohn Marino #ifdef DEBUG
71544b87433SJohn Marino   char* re_str;
71644b87433SJohn Marino #endif
717*6ea1f93eSDaniel Fojt   lock_define (lock)
71844b87433SJohn Marino };
71944b87433SJohn Marino 
72044b87433SJohn Marino #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
72144b87433SJohn Marino #define re_node_set_remove(set,id) \
72244b87433SJohn Marino   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
72344b87433SJohn Marino #define re_node_set_empty(p) ((p)->nelem = 0)
72444b87433SJohn Marino #define re_node_set_free(set) re_free ((set)->elems)
72544b87433SJohn Marino 
72644b87433SJohn Marino 
72744b87433SJohn Marino typedef enum
72844b87433SJohn Marino {
72944b87433SJohn Marino   SB_CHAR,
73044b87433SJohn Marino   MB_CHAR,
73144b87433SJohn Marino   EQUIV_CLASS,
73244b87433SJohn Marino   COLL_SYM,
73344b87433SJohn Marino   CHAR_CLASS
73444b87433SJohn Marino } bracket_elem_type;
73544b87433SJohn Marino 
73644b87433SJohn Marino typedef struct
73744b87433SJohn Marino {
73844b87433SJohn Marino   bracket_elem_type type;
73944b87433SJohn Marino   union
74044b87433SJohn Marino   {
74144b87433SJohn Marino     unsigned char ch;
74244b87433SJohn Marino     unsigned char *name;
74344b87433SJohn Marino     wchar_t wch;
74444b87433SJohn Marino   } opr;
74544b87433SJohn Marino } bracket_elem_t;
74644b87433SJohn Marino 
74744b87433SJohn Marino 
7484536c563SJohn Marino /* Functions for bitset_t operation.  */
74944b87433SJohn Marino 
750*6ea1f93eSDaniel Fojt static inline void
bitset_set(bitset_t set,Idx i)75144b87433SJohn Marino bitset_set (bitset_t set, Idx i)
75244b87433SJohn Marino {
75344b87433SJohn Marino   set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
75444b87433SJohn Marino }
75544b87433SJohn Marino 
756*6ea1f93eSDaniel Fojt static inline void
bitset_clear(bitset_t set,Idx i)75744b87433SJohn Marino bitset_clear (bitset_t set, Idx i)
75844b87433SJohn Marino {
75944b87433SJohn Marino   set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
76044b87433SJohn Marino }
76144b87433SJohn Marino 
762*6ea1f93eSDaniel Fojt static inline bool
bitset_contain(const bitset_t set,Idx i)76344b87433SJohn Marino bitset_contain (const bitset_t set, Idx i)
76444b87433SJohn Marino {
76544b87433SJohn Marino   return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
76644b87433SJohn Marino }
76744b87433SJohn Marino 
768*6ea1f93eSDaniel Fojt static inline void
bitset_empty(bitset_t set)76944b87433SJohn Marino bitset_empty (bitset_t set)
77044b87433SJohn Marino {
77144b87433SJohn Marino   memset (set, '\0', sizeof (bitset_t));
77244b87433SJohn Marino }
77344b87433SJohn Marino 
774*6ea1f93eSDaniel Fojt static inline void
bitset_set_all(bitset_t set)77544b87433SJohn Marino bitset_set_all (bitset_t set)
77644b87433SJohn Marino {
77744b87433SJohn Marino   memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
77844b87433SJohn Marino   if (SBC_MAX % BITSET_WORD_BITS != 0)
77944b87433SJohn Marino     set[BITSET_WORDS - 1] =
78044b87433SJohn Marino       ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
78144b87433SJohn Marino }
78244b87433SJohn Marino 
783*6ea1f93eSDaniel Fojt static inline void
bitset_copy(bitset_t dest,const bitset_t src)78444b87433SJohn Marino bitset_copy (bitset_t dest, const bitset_t src)
78544b87433SJohn Marino {
78644b87433SJohn Marino   memcpy (dest, src, sizeof (bitset_t));
78744b87433SJohn Marino }
78844b87433SJohn Marino 
789*6ea1f93eSDaniel Fojt static inline void
bitset_not(bitset_t set)79044b87433SJohn Marino bitset_not (bitset_t set)
79144b87433SJohn Marino {
79244b87433SJohn Marino   int bitset_i;
79344b87433SJohn Marino   for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
79444b87433SJohn Marino     set[bitset_i] = ~set[bitset_i];
79544b87433SJohn Marino   if (SBC_MAX % BITSET_WORD_BITS != 0)
79644b87433SJohn Marino     set[BITSET_WORDS - 1] =
79744b87433SJohn Marino       ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
79844b87433SJohn Marino        & ~set[BITSET_WORDS - 1]);
79944b87433SJohn Marino }
80044b87433SJohn Marino 
801*6ea1f93eSDaniel Fojt static inline void
bitset_merge(bitset_t dest,const bitset_t src)80244b87433SJohn Marino bitset_merge (bitset_t dest, const bitset_t src)
80344b87433SJohn Marino {
80444b87433SJohn Marino   int bitset_i;
80544b87433SJohn Marino   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
80644b87433SJohn Marino     dest[bitset_i] |= src[bitset_i];
80744b87433SJohn Marino }
80844b87433SJohn Marino 
809*6ea1f93eSDaniel Fojt static inline void
bitset_mask(bitset_t dest,const bitset_t src)81044b87433SJohn Marino bitset_mask (bitset_t dest, const bitset_t src)
81144b87433SJohn Marino {
81244b87433SJohn Marino   int bitset_i;
81344b87433SJohn Marino   for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
81444b87433SJohn Marino     dest[bitset_i] &= src[bitset_i];
81544b87433SJohn Marino }
81644b87433SJohn Marino 
81744b87433SJohn Marino #ifdef RE_ENABLE_I18N
8184536c563SJohn Marino /* Functions for re_string.  */
8194536c563SJohn Marino static int
820*6ea1f93eSDaniel Fojt __attribute__ ((pure, unused))
re_string_char_size_at(const re_string_t * pstr,Idx idx)82144b87433SJohn Marino re_string_char_size_at (const re_string_t *pstr, Idx idx)
82244b87433SJohn Marino {
82344b87433SJohn Marino   int byte_idx;
82444b87433SJohn Marino   if (pstr->mb_cur_max == 1)
82544b87433SJohn Marino     return 1;
82644b87433SJohn Marino   for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
82744b87433SJohn Marino     if (pstr->wcs[idx + byte_idx] != WEOF)
82844b87433SJohn Marino       break;
82944b87433SJohn Marino   return byte_idx;
83044b87433SJohn Marino }
83144b87433SJohn Marino 
8324536c563SJohn Marino static wint_t
833*6ea1f93eSDaniel Fojt __attribute__ ((pure, unused))
re_string_wchar_at(const re_string_t * pstr,Idx idx)83444b87433SJohn Marino re_string_wchar_at (const re_string_t *pstr, Idx idx)
83544b87433SJohn Marino {
83644b87433SJohn Marino   if (pstr->mb_cur_max == 1)
83744b87433SJohn Marino     return (wint_t) pstr->mbs[idx];
83844b87433SJohn Marino   return (wint_t) pstr->wcs[idx];
83944b87433SJohn Marino }
84044b87433SJohn Marino 
841*6ea1f93eSDaniel Fojt # ifdef _LIBC
842*6ea1f93eSDaniel Fojt #  include <locale/weight.h>
843*6ea1f93eSDaniel Fojt # endif
844*6ea1f93eSDaniel Fojt 
84544b87433SJohn Marino static int
846*6ea1f93eSDaniel Fojt __attribute__ ((pure, unused))
re_string_elem_size_at(const re_string_t * pstr,Idx idx)8474536c563SJohn Marino re_string_elem_size_at (const re_string_t *pstr, Idx idx)
84844b87433SJohn Marino {
84944b87433SJohn Marino # ifdef _LIBC
85044b87433SJohn Marino   const unsigned char *p, *extra;
85144b87433SJohn Marino   const int32_t *table, *indirect;
85244b87433SJohn Marino   uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
85344b87433SJohn Marino 
85444b87433SJohn Marino   if (nrules != 0)
85544b87433SJohn Marino     {
85644b87433SJohn Marino       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
85744b87433SJohn Marino       extra = (const unsigned char *)
85844b87433SJohn Marino 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
85944b87433SJohn Marino       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
86044b87433SJohn Marino 						_NL_COLLATE_INDIRECTMB);
86144b87433SJohn Marino       p = pstr->mbs + idx;
862*6ea1f93eSDaniel Fojt       findidx (table, indirect, extra, &p, pstr->len - idx);
86344b87433SJohn Marino       return p - pstr->mbs - idx;
86444b87433SJohn Marino     }
86544b87433SJohn Marino   else
86644b87433SJohn Marino # endif /* _LIBC */
86744b87433SJohn Marino     return 1;
86844b87433SJohn Marino }
86944b87433SJohn Marino #endif /* RE_ENABLE_I18N */
87044b87433SJohn Marino 
87144b87433SJohn Marino #ifndef __GNUC_PREREQ
87244b87433SJohn Marino # if defined __GNUC__ && defined __GNUC_MINOR__
87344b87433SJohn Marino #  define __GNUC_PREREQ(maj, min) \
87444b87433SJohn Marino          ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
87544b87433SJohn Marino # else
87644b87433SJohn Marino #  define __GNUC_PREREQ(maj, min) 0
87744b87433SJohn Marino # endif
87844b87433SJohn Marino #endif
87944b87433SJohn Marino 
88044b87433SJohn Marino #if __GNUC_PREREQ (3,4)
88144b87433SJohn Marino # undef __attribute_warn_unused_result__
88244b87433SJohn Marino # define __attribute_warn_unused_result__ \
88344b87433SJohn Marino    __attribute__ ((__warn_unused_result__))
88444b87433SJohn Marino #else
88544b87433SJohn Marino # define __attribute_warn_unused_result__ /* empty */
88644b87433SJohn Marino #endif
88744b87433SJohn Marino 
888*6ea1f93eSDaniel Fojt #ifndef FALLTHROUGH
889*6ea1f93eSDaniel Fojt # if __GNUC__ < 7
890*6ea1f93eSDaniel Fojt #  define FALLTHROUGH ((void) 0)
891*6ea1f93eSDaniel Fojt # else
892*6ea1f93eSDaniel Fojt #  define FALLTHROUGH __attribute__ ((__fallthrough__))
893*6ea1f93eSDaniel Fojt # endif
894*6ea1f93eSDaniel Fojt #endif
895*6ea1f93eSDaniel Fojt 
89644b87433SJohn Marino #endif /*  _REGEX_INTERNAL_H */
897