110d565efSmrg /* Extended regular expression matching and search library,
210d565efSmrg version 0.12.
310d565efSmrg (Implements POSIX draft P1003.2/D11.2, except for some of the
410d565efSmrg internationalization features.)
510d565efSmrg
6*ec02198aSmrg Copyright (C) 1993-2020 Free Software Foundation, Inc.
710d565efSmrg This file is part of the GNU C Library.
810d565efSmrg
910d565efSmrg The GNU C Library is free software; you can redistribute it and/or
1010d565efSmrg modify it under the terms of the GNU Lesser General Public
1110d565efSmrg License as published by the Free Software Foundation; either
1210d565efSmrg version 2.1 of the License, or (at your option) any later version.
1310d565efSmrg
1410d565efSmrg The GNU C Library is distributed in the hope that it will be useful,
1510d565efSmrg but WITHOUT ANY WARRANTY; without even the implied warranty of
1610d565efSmrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1710d565efSmrg Lesser General Public License for more details.
1810d565efSmrg
1910d565efSmrg You should have received a copy of the GNU Lesser General Public
2010d565efSmrg License along with the GNU C Library; if not, write to the Free
2110d565efSmrg Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
2210d565efSmrg 02110-1301 USA. */
2310d565efSmrg
2410d565efSmrg /* This file has been modified for usage in libiberty. It includes "xregex.h"
2510d565efSmrg instead of <regex.h>. The "xregex.h" header file renames all external
2610d565efSmrg routines with an "x" prefix so they do not collide with the native regex
2710d565efSmrg routines or with other components regex routines. */
2810d565efSmrg /* AIX requires this to be the first thing in the file. */
2910d565efSmrg #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
3010d565efSmrg #pragma alloca
3110d565efSmrg #endif
3210d565efSmrg
3310d565efSmrg #undef _GNU_SOURCE
3410d565efSmrg #define _GNU_SOURCE
3510d565efSmrg
3610d565efSmrg #ifndef INSIDE_RECURSION
3710d565efSmrg # ifdef HAVE_CONFIG_H
3810d565efSmrg # include <config.h>
3910d565efSmrg # endif
4010d565efSmrg #endif
4110d565efSmrg
4210d565efSmrg #include <ansidecl.h>
4310d565efSmrg
4410d565efSmrg #ifndef INSIDE_RECURSION
4510d565efSmrg
4610d565efSmrg # if defined STDC_HEADERS && !defined emacs
4710d565efSmrg # include <stddef.h>
4810d565efSmrg # define PTR_INT_TYPE ptrdiff_t
4910d565efSmrg # else
5010d565efSmrg /* We need this for `regex.h', and perhaps for the Emacs include files. */
5110d565efSmrg # include <sys/types.h>
5210d565efSmrg # define PTR_INT_TYPE long
5310d565efSmrg # endif
5410d565efSmrg
5510d565efSmrg # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
5610d565efSmrg
5710d565efSmrg /* For platform which support the ISO C amendement 1 functionality we
5810d565efSmrg support user defined character classes. */
5910d565efSmrg # if defined _LIBC || WIDE_CHAR_SUPPORT
6010d565efSmrg /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
6110d565efSmrg # include <wchar.h>
6210d565efSmrg # include <wctype.h>
6310d565efSmrg # endif
6410d565efSmrg
6510d565efSmrg # ifdef _LIBC
6610d565efSmrg /* We have to keep the namespace clean. */
6710d565efSmrg # define regfree(preg) __regfree (preg)
6810d565efSmrg # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
6910d565efSmrg # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
7010d565efSmrg # define regerror(errcode, preg, errbuf, errbuf_size) \
7110d565efSmrg __regerror(errcode, preg, errbuf, errbuf_size)
7210d565efSmrg # define re_set_registers(bu, re, nu, st, en) \
7310d565efSmrg __re_set_registers (bu, re, nu, st, en)
7410d565efSmrg # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
7510d565efSmrg __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
7610d565efSmrg # define re_match(bufp, string, size, pos, regs) \
7710d565efSmrg __re_match (bufp, string, size, pos, regs)
7810d565efSmrg # define re_search(bufp, string, size, startpos, range, regs) \
7910d565efSmrg __re_search (bufp, string, size, startpos, range, regs)
8010d565efSmrg # define re_compile_pattern(pattern, length, bufp) \
8110d565efSmrg __re_compile_pattern (pattern, length, bufp)
8210d565efSmrg # define re_set_syntax(syntax) __re_set_syntax (syntax)
8310d565efSmrg # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
8410d565efSmrg __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
8510d565efSmrg # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
8610d565efSmrg
8710d565efSmrg # define btowc __btowc
8810d565efSmrg
8910d565efSmrg /* We are also using some library internals. */
9010d565efSmrg # include <locale/localeinfo.h>
9110d565efSmrg # include <locale/elem-hash.h>
9210d565efSmrg # include <langinfo.h>
9310d565efSmrg # include <locale/coll-lookup.h>
9410d565efSmrg # endif
9510d565efSmrg
9610d565efSmrg /* This is for other GNU distributions with internationalized messages. */
9710d565efSmrg # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
9810d565efSmrg # include <libintl.h>
9910d565efSmrg # ifdef _LIBC
10010d565efSmrg # undef gettext
10110d565efSmrg # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
10210d565efSmrg # endif
10310d565efSmrg # else
10410d565efSmrg # define gettext(msgid) (msgid)
10510d565efSmrg # endif
10610d565efSmrg
10710d565efSmrg # ifndef gettext_noop
10810d565efSmrg /* This define is so xgettext can find the internationalizable
10910d565efSmrg strings. */
11010d565efSmrg # define gettext_noop(String) String
11110d565efSmrg # endif
11210d565efSmrg
11310d565efSmrg /* The `emacs' switch turns on certain matching commands
11410d565efSmrg that make sense only in Emacs. */
11510d565efSmrg # ifdef emacs
11610d565efSmrg
11710d565efSmrg # include "lisp.h"
11810d565efSmrg # include "buffer.h"
11910d565efSmrg # include "syntax.h"
12010d565efSmrg
12110d565efSmrg # else /* not emacs */
12210d565efSmrg
12310d565efSmrg /* If we are not linking with Emacs proper,
12410d565efSmrg we can't use the relocating allocator
12510d565efSmrg even if config.h says that we can. */
12610d565efSmrg # undef REL_ALLOC
12710d565efSmrg
12810d565efSmrg # if defined STDC_HEADERS || defined _LIBC
12910d565efSmrg # include <stdlib.h>
13010d565efSmrg # else
13110d565efSmrg char *malloc ();
13210d565efSmrg char *realloc ();
13310d565efSmrg # endif
13410d565efSmrg
13510d565efSmrg /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
13610d565efSmrg If nothing else has been done, use the method below. */
13710d565efSmrg # ifdef INHIBIT_STRING_HEADER
13810d565efSmrg # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
13910d565efSmrg # if !defined bzero && !defined bcopy
14010d565efSmrg # undef INHIBIT_STRING_HEADER
14110d565efSmrg # endif
14210d565efSmrg # endif
14310d565efSmrg # endif
14410d565efSmrg
14510d565efSmrg /* This is the normal way of making sure we have a bcopy and a bzero.
14610d565efSmrg This is used in most programs--a few other programs avoid this
14710d565efSmrg by defining INHIBIT_STRING_HEADER. */
14810d565efSmrg # ifndef INHIBIT_STRING_HEADER
14910d565efSmrg # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
15010d565efSmrg # include <string.h>
15110d565efSmrg # ifndef bzero
15210d565efSmrg # ifndef _LIBC
15310d565efSmrg # define bzero(s, n) ((void) memset (s, '\0', n))
15410d565efSmrg # else
15510d565efSmrg # define bzero(s, n) __bzero (s, n)
15610d565efSmrg # endif
15710d565efSmrg # endif
15810d565efSmrg # else
15910d565efSmrg # include <strings.h>
16010d565efSmrg # ifndef memcmp
16110d565efSmrg # define memcmp(s1, s2, n) bcmp (s1, s2, n)
16210d565efSmrg # endif
16310d565efSmrg # ifndef memcpy
16410d565efSmrg # define memcpy(d, s, n) (bcopy (s, d, n), (d))
16510d565efSmrg # endif
16610d565efSmrg # endif
16710d565efSmrg # endif
16810d565efSmrg
16910d565efSmrg /* Define the syntax stuff for \<, \>, etc. */
17010d565efSmrg
17110d565efSmrg /* This must be nonzero for the wordchar and notwordchar pattern
17210d565efSmrg commands in re_match_2. */
17310d565efSmrg # ifndef Sword
17410d565efSmrg # define Sword 1
17510d565efSmrg # endif
17610d565efSmrg
17710d565efSmrg # ifdef SWITCH_ENUM_BUG
17810d565efSmrg # define SWITCH_ENUM_CAST(x) ((int)(x))
17910d565efSmrg # else
18010d565efSmrg # define SWITCH_ENUM_CAST(x) (x)
18110d565efSmrg # endif
18210d565efSmrg
18310d565efSmrg # endif /* not emacs */
18410d565efSmrg
18510d565efSmrg # if defined _LIBC || HAVE_LIMITS_H
18610d565efSmrg # include <limits.h>
18710d565efSmrg # endif
18810d565efSmrg
18910d565efSmrg # ifndef MB_LEN_MAX
19010d565efSmrg # define MB_LEN_MAX 1
19110d565efSmrg # endif
19210d565efSmrg
19310d565efSmrg /* Get the interface, including the syntax bits. */
19410d565efSmrg # include "xregex.h" /* change for libiberty */
19510d565efSmrg
19610d565efSmrg /* isalpha etc. are used for the character classes. */
19710d565efSmrg # include <ctype.h>
19810d565efSmrg
19910d565efSmrg /* Jim Meyering writes:
20010d565efSmrg
20110d565efSmrg "... Some ctype macros are valid only for character codes that
20210d565efSmrg isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
20310d565efSmrg using /bin/cc or gcc but without giving an ansi option). So, all
20410d565efSmrg ctype uses should be through macros like ISPRINT... If
20510d565efSmrg STDC_HEADERS is defined, then autoconf has verified that the ctype
20610d565efSmrg macros don't need to be guarded with references to isascii. ...
20710d565efSmrg Defining isascii to 1 should let any compiler worth its salt
20810d565efSmrg eliminate the && through constant folding."
20910d565efSmrg Solaris defines some of these symbols so we must undefine them first. */
21010d565efSmrg
21110d565efSmrg # undef ISASCII
21210d565efSmrg # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
21310d565efSmrg # define ISASCII(c) 1
21410d565efSmrg # else
21510d565efSmrg # define ISASCII(c) isascii(c)
21610d565efSmrg # endif
21710d565efSmrg
21810d565efSmrg # ifdef isblank
21910d565efSmrg # define ISBLANK(c) (ISASCII (c) && isblank (c))
22010d565efSmrg # else
22110d565efSmrg # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
22210d565efSmrg # endif
22310d565efSmrg # ifdef isgraph
22410d565efSmrg # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
22510d565efSmrg # else
22610d565efSmrg # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
22710d565efSmrg # endif
22810d565efSmrg
22910d565efSmrg # undef ISPRINT
23010d565efSmrg # define ISPRINT(c) (ISASCII (c) && isprint (c))
23110d565efSmrg # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
23210d565efSmrg # define ISALNUM(c) (ISASCII (c) && isalnum (c))
23310d565efSmrg # define ISALPHA(c) (ISASCII (c) && isalpha (c))
23410d565efSmrg # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
23510d565efSmrg # define ISLOWER(c) (ISASCII (c) && islower (c))
23610d565efSmrg # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
23710d565efSmrg # define ISSPACE(c) (ISASCII (c) && isspace (c))
23810d565efSmrg # define ISUPPER(c) (ISASCII (c) && isupper (c))
23910d565efSmrg # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
24010d565efSmrg
24110d565efSmrg # ifdef _tolower
24210d565efSmrg # define TOLOWER(c) _tolower(c)
24310d565efSmrg # else
24410d565efSmrg # define TOLOWER(c) tolower(c)
24510d565efSmrg # endif
24610d565efSmrg
24710d565efSmrg # ifndef NULL
24810d565efSmrg # define NULL (void *)0
24910d565efSmrg # endif
25010d565efSmrg
25110d565efSmrg /* We remove any previous definition of `SIGN_EXTEND_CHAR',
25210d565efSmrg since ours (we hope) works properly with all combinations of
25310d565efSmrg machines, compilers, `char' and `unsigned char' argument types.
25410d565efSmrg (Per Bothner suggested the basic approach.) */
25510d565efSmrg # undef SIGN_EXTEND_CHAR
25610d565efSmrg # if __STDC__
25710d565efSmrg # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
25810d565efSmrg # else /* not __STDC__ */
25910d565efSmrg /* As in Harbison and Steele. */
26010d565efSmrg # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
26110d565efSmrg # endif
26210d565efSmrg
26310d565efSmrg # ifndef emacs
26410d565efSmrg /* How many characters in the character set. */
26510d565efSmrg # define CHAR_SET_SIZE 256
26610d565efSmrg
26710d565efSmrg # ifdef SYNTAX_TABLE
26810d565efSmrg
26910d565efSmrg extern char *re_syntax_table;
27010d565efSmrg
27110d565efSmrg # else /* not SYNTAX_TABLE */
27210d565efSmrg
27310d565efSmrg static char re_syntax_table[CHAR_SET_SIZE];
27410d565efSmrg
27510d565efSmrg static void init_syntax_once (void);
27610d565efSmrg
27710d565efSmrg static void
init_syntax_once(void)27810d565efSmrg init_syntax_once (void)
27910d565efSmrg {
28010d565efSmrg register int c;
28110d565efSmrg static int done = 0;
28210d565efSmrg
28310d565efSmrg if (done)
28410d565efSmrg return;
28510d565efSmrg bzero (re_syntax_table, sizeof re_syntax_table);
28610d565efSmrg
28710d565efSmrg for (c = 0; c < CHAR_SET_SIZE; ++c)
28810d565efSmrg if (ISALNUM (c))
28910d565efSmrg re_syntax_table[c] = Sword;
29010d565efSmrg
29110d565efSmrg re_syntax_table['_'] = Sword;
29210d565efSmrg
29310d565efSmrg done = 1;
29410d565efSmrg }
29510d565efSmrg
29610d565efSmrg # endif /* not SYNTAX_TABLE */
29710d565efSmrg
29810d565efSmrg # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
29910d565efSmrg
30010d565efSmrg # endif /* emacs */
30110d565efSmrg
30210d565efSmrg /* Integer type for pointers. */
30310d565efSmrg # if !defined _LIBC && !defined HAVE_UINTPTR_T
30410d565efSmrg typedef unsigned long int uintptr_t;
30510d565efSmrg # endif
30610d565efSmrg
30710d565efSmrg /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
30810d565efSmrg use `alloca' instead of `malloc'. This is because using malloc in
30910d565efSmrg re_search* or re_match* could cause memory leaks when C-g is used in
31010d565efSmrg Emacs; also, malloc is slower and causes storage fragmentation. On
31110d565efSmrg the other hand, malloc is more portable, and easier to debug.
31210d565efSmrg
31310d565efSmrg Because we sometimes use alloca, some routines have to be macros,
31410d565efSmrg not functions -- `alloca'-allocated space disappears at the end of the
31510d565efSmrg function it is called in. */
31610d565efSmrg
31710d565efSmrg # ifdef REGEX_MALLOC
31810d565efSmrg
31910d565efSmrg # define REGEX_ALLOCATE malloc
32010d565efSmrg # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
32110d565efSmrg # define REGEX_FREE free
32210d565efSmrg
32310d565efSmrg # else /* not REGEX_MALLOC */
32410d565efSmrg
32510d565efSmrg /* Emacs already defines alloca, sometimes. */
32610d565efSmrg # ifndef alloca
32710d565efSmrg
32810d565efSmrg /* Make alloca work the best possible way. */
32910d565efSmrg # ifdef __GNUC__
33010d565efSmrg # define alloca __builtin_alloca
33110d565efSmrg # else /* not __GNUC__ */
33210d565efSmrg # if HAVE_ALLOCA_H
33310d565efSmrg # include <alloca.h>
33410d565efSmrg # endif /* HAVE_ALLOCA_H */
33510d565efSmrg # endif /* not __GNUC__ */
33610d565efSmrg
33710d565efSmrg # endif /* not alloca */
33810d565efSmrg
33910d565efSmrg # define REGEX_ALLOCATE alloca
34010d565efSmrg
34110d565efSmrg /* Assumes a `char *destination' variable. */
34210d565efSmrg # define REGEX_REALLOCATE(source, osize, nsize) \
34310d565efSmrg (destination = (char *) alloca (nsize), \
34410d565efSmrg memcpy (destination, source, osize))
34510d565efSmrg
34610d565efSmrg /* No need to do anything to free, after alloca. */
34710d565efSmrg # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
34810d565efSmrg
34910d565efSmrg # endif /* not REGEX_MALLOC */
35010d565efSmrg
35110d565efSmrg /* Define how to allocate the failure stack. */
35210d565efSmrg
35310d565efSmrg # if defined REL_ALLOC && defined REGEX_MALLOC
35410d565efSmrg
35510d565efSmrg # define REGEX_ALLOCATE_STACK(size) \
35610d565efSmrg r_alloc (&failure_stack_ptr, (size))
35710d565efSmrg # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
35810d565efSmrg r_re_alloc (&failure_stack_ptr, (nsize))
35910d565efSmrg # define REGEX_FREE_STACK(ptr) \
36010d565efSmrg r_alloc_free (&failure_stack_ptr)
36110d565efSmrg
36210d565efSmrg # else /* not using relocating allocator */
36310d565efSmrg
36410d565efSmrg # ifdef REGEX_MALLOC
36510d565efSmrg
36610d565efSmrg # define REGEX_ALLOCATE_STACK malloc
36710d565efSmrg # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
36810d565efSmrg # define REGEX_FREE_STACK free
36910d565efSmrg
37010d565efSmrg # else /* not REGEX_MALLOC */
37110d565efSmrg
37210d565efSmrg # define REGEX_ALLOCATE_STACK alloca
37310d565efSmrg
37410d565efSmrg # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
37510d565efSmrg REGEX_REALLOCATE (source, osize, nsize)
37610d565efSmrg /* No need to explicitly free anything. */
37710d565efSmrg # define REGEX_FREE_STACK(arg)
37810d565efSmrg
37910d565efSmrg # endif /* not REGEX_MALLOC */
38010d565efSmrg # endif /* not using relocating allocator */
38110d565efSmrg
38210d565efSmrg
38310d565efSmrg /* True if `size1' is non-NULL and PTR is pointing anywhere inside
38410d565efSmrg `string1' or just past its end. This works if PTR is NULL, which is
38510d565efSmrg a good thing. */
38610d565efSmrg # define FIRST_STRING_P(ptr) \
38710d565efSmrg (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
38810d565efSmrg
38910d565efSmrg /* (Re)Allocate N items of type T using malloc, or fail. */
39010d565efSmrg # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
39110d565efSmrg # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
39210d565efSmrg # define RETALLOC_IF(addr, n, t) \
39310d565efSmrg if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
39410d565efSmrg # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
39510d565efSmrg
39610d565efSmrg # define BYTEWIDTH 8 /* In bits. */
39710d565efSmrg
39810d565efSmrg # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
39910d565efSmrg
40010d565efSmrg # undef MAX
40110d565efSmrg # undef MIN
40210d565efSmrg # define MAX(a, b) ((a) > (b) ? (a) : (b))
40310d565efSmrg # define MIN(a, b) ((a) < (b) ? (a) : (b))
40410d565efSmrg
40510d565efSmrg typedef char boolean;
40610d565efSmrg # define false 0
40710d565efSmrg # define true 1
40810d565efSmrg
40910d565efSmrg static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
41010d565efSmrg reg_syntax_t syntax,
41110d565efSmrg struct re_pattern_buffer *bufp);
41210d565efSmrg
41310d565efSmrg static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
41410d565efSmrg const char *string1, int size1,
41510d565efSmrg const char *string2, int size2,
41610d565efSmrg int pos,
41710d565efSmrg struct re_registers *regs,
41810d565efSmrg int stop);
41910d565efSmrg static int byte_re_search_2 (struct re_pattern_buffer *bufp,
42010d565efSmrg const char *string1, int size1,
42110d565efSmrg const char *string2, int size2,
42210d565efSmrg int startpos, int range,
42310d565efSmrg struct re_registers *regs, int stop);
42410d565efSmrg static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
42510d565efSmrg
42610d565efSmrg #ifdef MBS_SUPPORT
42710d565efSmrg static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
42810d565efSmrg reg_syntax_t syntax,
42910d565efSmrg struct re_pattern_buffer *bufp);
43010d565efSmrg
43110d565efSmrg
43210d565efSmrg static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
43310d565efSmrg const char *cstring1, int csize1,
43410d565efSmrg const char *cstring2, int csize2,
43510d565efSmrg int pos,
43610d565efSmrg struct re_registers *regs,
43710d565efSmrg int stop,
43810d565efSmrg wchar_t *string1, int size1,
43910d565efSmrg wchar_t *string2, int size2,
44010d565efSmrg int *mbs_offset1, int *mbs_offset2);
44110d565efSmrg static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
44210d565efSmrg const char *string1, int size1,
44310d565efSmrg const char *string2, int size2,
44410d565efSmrg int startpos, int range,
44510d565efSmrg struct re_registers *regs, int stop);
44610d565efSmrg static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
44710d565efSmrg #endif
44810d565efSmrg
44910d565efSmrg /* These are the command codes that appear in compiled regular
45010d565efSmrg expressions. Some opcodes are followed by argument bytes. A
45110d565efSmrg command code can specify any interpretation whatsoever for its
45210d565efSmrg arguments. Zero bytes may appear in the compiled regular expression. */
45310d565efSmrg
45410d565efSmrg typedef enum
45510d565efSmrg {
45610d565efSmrg no_op = 0,
45710d565efSmrg
45810d565efSmrg /* Succeed right away--no more backtracking. */
45910d565efSmrg succeed,
46010d565efSmrg
46110d565efSmrg /* Followed by one byte giving n, then by n literal bytes. */
46210d565efSmrg exactn,
46310d565efSmrg
46410d565efSmrg # ifdef MBS_SUPPORT
46510d565efSmrg /* Same as exactn, but contains binary data. */
46610d565efSmrg exactn_bin,
46710d565efSmrg # endif
46810d565efSmrg
46910d565efSmrg /* Matches any (more or less) character. */
47010d565efSmrg anychar,
47110d565efSmrg
47210d565efSmrg /* Matches any one char belonging to specified set. First
47310d565efSmrg following byte is number of bitmap bytes. Then come bytes
47410d565efSmrg for a bitmap saying which chars are in. Bits in each byte
47510d565efSmrg are ordered low-bit-first. A character is in the set if its
47610d565efSmrg bit is 1. A character too large to have a bit in the map is
47710d565efSmrg automatically not in the set. */
47810d565efSmrg /* ifdef MBS_SUPPORT, following element is length of character
47910d565efSmrg classes, length of collating symbols, length of equivalence
48010d565efSmrg classes, length of character ranges, and length of characters.
48110d565efSmrg Next, character class element, collating symbols elements,
48210d565efSmrg equivalence class elements, range elements, and character
48310d565efSmrg elements follow.
48410d565efSmrg See regex_compile function. */
48510d565efSmrg charset,
48610d565efSmrg
48710d565efSmrg /* Same parameters as charset, but match any character that is
48810d565efSmrg not one of those specified. */
48910d565efSmrg charset_not,
49010d565efSmrg
49110d565efSmrg /* Start remembering the text that is matched, for storing in a
49210d565efSmrg register. Followed by one byte with the register number, in
49310d565efSmrg the range 0 to one less than the pattern buffer's re_nsub
49410d565efSmrg field. Then followed by one byte with the number of groups
49510d565efSmrg inner to this one. (This last has to be part of the
49610d565efSmrg start_memory only because we need it in the on_failure_jump
49710d565efSmrg of re_match_2.) */
49810d565efSmrg start_memory,
49910d565efSmrg
50010d565efSmrg /* Stop remembering the text that is matched and store it in a
50110d565efSmrg memory register. Followed by one byte with the register
50210d565efSmrg number, in the range 0 to one less than `re_nsub' in the
50310d565efSmrg pattern buffer, and one byte with the number of inner groups,
50410d565efSmrg just like `start_memory'. (We need the number of inner
50510d565efSmrg groups here because we don't have any easy way of finding the
50610d565efSmrg corresponding start_memory when we're at a stop_memory.) */
50710d565efSmrg stop_memory,
50810d565efSmrg
50910d565efSmrg /* Match a duplicate of something remembered. Followed by one
51010d565efSmrg byte containing the register number. */
51110d565efSmrg duplicate,
51210d565efSmrg
51310d565efSmrg /* Fail unless at beginning of line. */
51410d565efSmrg begline,
51510d565efSmrg
51610d565efSmrg /* Fail unless at end of line. */
51710d565efSmrg endline,
51810d565efSmrg
51910d565efSmrg /* Succeeds if at beginning of buffer (if emacs) or at beginning
52010d565efSmrg of string to be matched (if not). */
52110d565efSmrg begbuf,
52210d565efSmrg
52310d565efSmrg /* Analogously, for end of buffer/string. */
52410d565efSmrg endbuf,
52510d565efSmrg
52610d565efSmrg /* Followed by two byte relative address to which to jump. */
52710d565efSmrg jump,
52810d565efSmrg
52910d565efSmrg /* Same as jump, but marks the end of an alternative. */
53010d565efSmrg jump_past_alt,
53110d565efSmrg
53210d565efSmrg /* Followed by two-byte relative address of place to resume at
53310d565efSmrg in case of failure. */
53410d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
53510d565efSmrg on_failure_jump,
53610d565efSmrg
53710d565efSmrg /* Like on_failure_jump, but pushes a placeholder instead of the
53810d565efSmrg current string position when executed. */
53910d565efSmrg on_failure_keep_string_jump,
54010d565efSmrg
54110d565efSmrg /* Throw away latest failure point and then jump to following
54210d565efSmrg two-byte relative address. */
54310d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
54410d565efSmrg pop_failure_jump,
54510d565efSmrg
54610d565efSmrg /* Change to pop_failure_jump if know won't have to backtrack to
54710d565efSmrg match; otherwise change to jump. This is used to jump
54810d565efSmrg back to the beginning of a repeat. If what follows this jump
54910d565efSmrg clearly won't match what the repeat does, such that we can be
55010d565efSmrg sure that there is no use backtracking out of repetitions
55110d565efSmrg already matched, then we change it to a pop_failure_jump.
55210d565efSmrg Followed by two-byte address. */
55310d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
55410d565efSmrg maybe_pop_jump,
55510d565efSmrg
55610d565efSmrg /* Jump to following two-byte address, and push a dummy failure
55710d565efSmrg point. This failure point will be thrown away if an attempt
55810d565efSmrg is made to use it for a failure. A `+' construct makes this
55910d565efSmrg before the first repeat. Also used as an intermediary kind
56010d565efSmrg of jump when compiling an alternative. */
56110d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
56210d565efSmrg dummy_failure_jump,
56310d565efSmrg
56410d565efSmrg /* Push a dummy failure point and continue. Used at the end of
56510d565efSmrg alternatives. */
56610d565efSmrg push_dummy_failure,
56710d565efSmrg
56810d565efSmrg /* Followed by two-byte relative address and two-byte number n.
56910d565efSmrg After matching N times, jump to the address upon failure. */
57010d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
57110d565efSmrg succeed_n,
57210d565efSmrg
57310d565efSmrg /* Followed by two-byte relative address, and two-byte number n.
57410d565efSmrg Jump to the address N times, then fail. */
57510d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
57610d565efSmrg jump_n,
57710d565efSmrg
57810d565efSmrg /* Set the following two-byte relative address to the
57910d565efSmrg subsequent two-byte number. The address *includes* the two
58010d565efSmrg bytes of number. */
58110d565efSmrg /* ifdef MBS_SUPPORT, the size of address is 1. */
58210d565efSmrg set_number_at,
58310d565efSmrg
58410d565efSmrg wordchar, /* Matches any word-constituent character. */
58510d565efSmrg notwordchar, /* Matches any char that is not a word-constituent. */
58610d565efSmrg
58710d565efSmrg wordbeg, /* Succeeds if at word beginning. */
58810d565efSmrg wordend, /* Succeeds if at word end. */
58910d565efSmrg
59010d565efSmrg wordbound, /* Succeeds if at a word boundary. */
59110d565efSmrg notwordbound /* Succeeds if not at a word boundary. */
59210d565efSmrg
59310d565efSmrg # ifdef emacs
59410d565efSmrg ,before_dot, /* Succeeds if before point. */
59510d565efSmrg at_dot, /* Succeeds if at point. */
59610d565efSmrg after_dot, /* Succeeds if after point. */
59710d565efSmrg
59810d565efSmrg /* Matches any character whose syntax is specified. Followed by
59910d565efSmrg a byte which contains a syntax code, e.g., Sword. */
60010d565efSmrg syntaxspec,
60110d565efSmrg
60210d565efSmrg /* Matches any character whose syntax is not that specified. */
60310d565efSmrg notsyntaxspec
60410d565efSmrg # endif /* emacs */
60510d565efSmrg } re_opcode_t;
60610d565efSmrg #endif /* not INSIDE_RECURSION */
60710d565efSmrg
60810d565efSmrg
60910d565efSmrg #ifdef BYTE
61010d565efSmrg # define CHAR_T char
61110d565efSmrg # define UCHAR_T unsigned char
61210d565efSmrg # define COMPILED_BUFFER_VAR bufp->buffer
61310d565efSmrg # define OFFSET_ADDRESS_SIZE 2
61410d565efSmrg # define PREFIX(name) byte_##name
61510d565efSmrg # define ARG_PREFIX(name) name
61610d565efSmrg # define PUT_CHAR(c) putchar (c)
61710d565efSmrg #else
61810d565efSmrg # ifdef WCHAR
61910d565efSmrg # define CHAR_T wchar_t
62010d565efSmrg # define UCHAR_T wchar_t
62110d565efSmrg # define COMPILED_BUFFER_VAR wc_buffer
62210d565efSmrg # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
62310d565efSmrg # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
62410d565efSmrg # define PREFIX(name) wcs_##name
62510d565efSmrg # define ARG_PREFIX(name) c##name
62610d565efSmrg /* Should we use wide stream?? */
62710d565efSmrg # define PUT_CHAR(c) printf ("%C", c);
62810d565efSmrg # define TRUE 1
62910d565efSmrg # define FALSE 0
63010d565efSmrg # else
63110d565efSmrg # ifdef MBS_SUPPORT
63210d565efSmrg # define WCHAR
63310d565efSmrg # define INSIDE_RECURSION
63410d565efSmrg # include "regex.c"
63510d565efSmrg # undef INSIDE_RECURSION
63610d565efSmrg # endif
63710d565efSmrg # define BYTE
63810d565efSmrg # define INSIDE_RECURSION
63910d565efSmrg # include "regex.c"
64010d565efSmrg # undef INSIDE_RECURSION
64110d565efSmrg # endif
64210d565efSmrg #endif
64310d565efSmrg
64410d565efSmrg #ifdef INSIDE_RECURSION
64510d565efSmrg /* Common operations on the compiled pattern. */
64610d565efSmrg
64710d565efSmrg /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
64810d565efSmrg /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
64910d565efSmrg
65010d565efSmrg # ifdef WCHAR
65110d565efSmrg # define STORE_NUMBER(destination, number) \
65210d565efSmrg do { \
65310d565efSmrg *(destination) = (UCHAR_T)(number); \
65410d565efSmrg } while (0)
65510d565efSmrg # else /* BYTE */
65610d565efSmrg # define STORE_NUMBER(destination, number) \
65710d565efSmrg do { \
65810d565efSmrg (destination)[0] = (number) & 0377; \
65910d565efSmrg (destination)[1] = (number) >> 8; \
66010d565efSmrg } while (0)
66110d565efSmrg # endif /* WCHAR */
66210d565efSmrg
66310d565efSmrg /* Same as STORE_NUMBER, except increment DESTINATION to
66410d565efSmrg the byte after where the number is stored. Therefore, DESTINATION
66510d565efSmrg must be an lvalue. */
66610d565efSmrg /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
66710d565efSmrg
66810d565efSmrg # define STORE_NUMBER_AND_INCR(destination, number) \
66910d565efSmrg do { \
67010d565efSmrg STORE_NUMBER (destination, number); \
67110d565efSmrg (destination) += OFFSET_ADDRESS_SIZE; \
67210d565efSmrg } while (0)
67310d565efSmrg
67410d565efSmrg /* Put into DESTINATION a number stored in two contiguous bytes starting
67510d565efSmrg at SOURCE. */
67610d565efSmrg /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
67710d565efSmrg
67810d565efSmrg # ifdef WCHAR
67910d565efSmrg # define EXTRACT_NUMBER(destination, source) \
68010d565efSmrg do { \
68110d565efSmrg (destination) = *(source); \
68210d565efSmrg } while (0)
68310d565efSmrg # else /* BYTE */
68410d565efSmrg # define EXTRACT_NUMBER(destination, source) \
68510d565efSmrg do { \
68610d565efSmrg (destination) = *(source) & 0377; \
68710d565efSmrg (destination) += ((unsigned) SIGN_EXTEND_CHAR (*((source) + 1))) << 8; \
68810d565efSmrg } while (0)
68910d565efSmrg # endif
69010d565efSmrg
69110d565efSmrg # ifdef DEBUG
69210d565efSmrg static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
69310d565efSmrg static void
PREFIX(extract_number)69410d565efSmrg PREFIX(extract_number) (int *dest, UCHAR_T *source)
69510d565efSmrg {
69610d565efSmrg # ifdef WCHAR
69710d565efSmrg *dest = *source;
69810d565efSmrg # else /* BYTE */
69910d565efSmrg int temp = SIGN_EXTEND_CHAR (*(source + 1));
70010d565efSmrg *dest = *source & 0377;
70110d565efSmrg *dest += temp << 8;
70210d565efSmrg # endif
70310d565efSmrg }
70410d565efSmrg
70510d565efSmrg # ifndef EXTRACT_MACROS /* To debug the macros. */
70610d565efSmrg # undef EXTRACT_NUMBER
70710d565efSmrg # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
70810d565efSmrg # endif /* not EXTRACT_MACROS */
70910d565efSmrg
71010d565efSmrg # endif /* DEBUG */
71110d565efSmrg
71210d565efSmrg /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
71310d565efSmrg SOURCE must be an lvalue. */
71410d565efSmrg
71510d565efSmrg # define EXTRACT_NUMBER_AND_INCR(destination, source) \
71610d565efSmrg do { \
71710d565efSmrg EXTRACT_NUMBER (destination, source); \
71810d565efSmrg (source) += OFFSET_ADDRESS_SIZE; \
71910d565efSmrg } while (0)
72010d565efSmrg
72110d565efSmrg # ifdef DEBUG
72210d565efSmrg static void PREFIX(extract_number_and_incr) (int *destination,
72310d565efSmrg UCHAR_T **source);
72410d565efSmrg static void
PREFIX(extract_number_and_incr)72510d565efSmrg PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
72610d565efSmrg {
72710d565efSmrg PREFIX(extract_number) (destination, *source);
72810d565efSmrg *source += OFFSET_ADDRESS_SIZE;
72910d565efSmrg }
73010d565efSmrg
73110d565efSmrg # ifndef EXTRACT_MACROS
73210d565efSmrg # undef EXTRACT_NUMBER_AND_INCR
73310d565efSmrg # define EXTRACT_NUMBER_AND_INCR(dest, src) \
73410d565efSmrg PREFIX(extract_number_and_incr) (&dest, &src)
73510d565efSmrg # endif /* not EXTRACT_MACROS */
73610d565efSmrg
73710d565efSmrg # endif /* DEBUG */
73810d565efSmrg
73910d565efSmrg
74010d565efSmrg
74110d565efSmrg /* If DEBUG is defined, Regex prints many voluminous messages about what
74210d565efSmrg it is doing (if the variable `debug' is nonzero). If linked with the
74310d565efSmrg main program in `iregex.c', you can enter patterns and strings
74410d565efSmrg interactively. And if linked with the main program in `main.c' and
74510d565efSmrg the other test files, you can run the already-written tests. */
74610d565efSmrg
74710d565efSmrg # ifdef DEBUG
74810d565efSmrg
74910d565efSmrg # ifndef DEFINED_ONCE
75010d565efSmrg
75110d565efSmrg /* We use standard I/O for debugging. */
75210d565efSmrg # include <stdio.h>
75310d565efSmrg
75410d565efSmrg /* It is useful to test things that ``must'' be true when debugging. */
75510d565efSmrg # include <assert.h>
75610d565efSmrg
75710d565efSmrg static int debug;
75810d565efSmrg
75910d565efSmrg # define DEBUG_STATEMENT(e) e
76010d565efSmrg # define DEBUG_PRINT1(x) if (debug) printf (x)
76110d565efSmrg # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
76210d565efSmrg # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
76310d565efSmrg # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
76410d565efSmrg # endif /* not DEFINED_ONCE */
76510d565efSmrg
76610d565efSmrg # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
76710d565efSmrg if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
76810d565efSmrg # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
76910d565efSmrg if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
77010d565efSmrg
77110d565efSmrg
77210d565efSmrg /* Print the fastmap in human-readable form. */
77310d565efSmrg
77410d565efSmrg # ifndef DEFINED_ONCE
77510d565efSmrg void
print_fastmap(char * fastmap)77610d565efSmrg print_fastmap (char *fastmap)
77710d565efSmrg {
77810d565efSmrg unsigned was_a_range = 0;
77910d565efSmrg unsigned i = 0;
78010d565efSmrg
78110d565efSmrg while (i < (1 << BYTEWIDTH))
78210d565efSmrg {
78310d565efSmrg if (fastmap[i++])
78410d565efSmrg {
78510d565efSmrg was_a_range = 0;
78610d565efSmrg putchar (i - 1);
78710d565efSmrg while (i < (1 << BYTEWIDTH) && fastmap[i])
78810d565efSmrg {
78910d565efSmrg was_a_range = 1;
79010d565efSmrg i++;
79110d565efSmrg }
79210d565efSmrg if (was_a_range)
79310d565efSmrg {
79410d565efSmrg printf ("-");
79510d565efSmrg putchar (i - 1);
79610d565efSmrg }
79710d565efSmrg }
79810d565efSmrg }
79910d565efSmrg putchar ('\n');
80010d565efSmrg }
80110d565efSmrg # endif /* not DEFINED_ONCE */
80210d565efSmrg
80310d565efSmrg
80410d565efSmrg /* Print a compiled pattern string in human-readable form, starting at
80510d565efSmrg the START pointer into it and ending just before the pointer END. */
80610d565efSmrg
80710d565efSmrg void
PREFIX(print_partial_compiled_pattern)80810d565efSmrg PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
80910d565efSmrg {
81010d565efSmrg int mcnt, mcnt2;
81110d565efSmrg UCHAR_T *p1;
81210d565efSmrg UCHAR_T *p = start;
81310d565efSmrg UCHAR_T *pend = end;
81410d565efSmrg
81510d565efSmrg if (start == NULL)
81610d565efSmrg {
81710d565efSmrg printf ("(null)\n");
81810d565efSmrg return;
81910d565efSmrg }
82010d565efSmrg
82110d565efSmrg /* Loop over pattern commands. */
82210d565efSmrg while (p < pend)
82310d565efSmrg {
82410d565efSmrg # ifdef _LIBC
82510d565efSmrg printf ("%td:\t", p - start);
82610d565efSmrg # else
82710d565efSmrg printf ("%ld:\t", (long int) (p - start));
82810d565efSmrg # endif
82910d565efSmrg
83010d565efSmrg switch ((re_opcode_t) *p++)
83110d565efSmrg {
83210d565efSmrg case no_op:
83310d565efSmrg printf ("/no_op");
83410d565efSmrg break;
83510d565efSmrg
83610d565efSmrg case exactn:
83710d565efSmrg mcnt = *p++;
83810d565efSmrg printf ("/exactn/%d", mcnt);
83910d565efSmrg do
84010d565efSmrg {
84110d565efSmrg putchar ('/');
84210d565efSmrg PUT_CHAR (*p++);
84310d565efSmrg }
84410d565efSmrg while (--mcnt);
84510d565efSmrg break;
84610d565efSmrg
84710d565efSmrg # ifdef MBS_SUPPORT
84810d565efSmrg case exactn_bin:
84910d565efSmrg mcnt = *p++;
85010d565efSmrg printf ("/exactn_bin/%d", mcnt);
85110d565efSmrg do
85210d565efSmrg {
85310d565efSmrg printf("/%lx", (long int) *p++);
85410d565efSmrg }
85510d565efSmrg while (--mcnt);
85610d565efSmrg break;
85710d565efSmrg # endif /* MBS_SUPPORT */
85810d565efSmrg
85910d565efSmrg case start_memory:
86010d565efSmrg mcnt = *p++;
86110d565efSmrg printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
86210d565efSmrg break;
86310d565efSmrg
86410d565efSmrg case stop_memory:
86510d565efSmrg mcnt = *p++;
86610d565efSmrg printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
86710d565efSmrg break;
86810d565efSmrg
86910d565efSmrg case duplicate:
87010d565efSmrg printf ("/duplicate/%ld", (long int) *p++);
87110d565efSmrg break;
87210d565efSmrg
87310d565efSmrg case anychar:
87410d565efSmrg printf ("/anychar");
87510d565efSmrg break;
87610d565efSmrg
87710d565efSmrg case charset:
87810d565efSmrg case charset_not:
87910d565efSmrg {
88010d565efSmrg # ifdef WCHAR
88110d565efSmrg int i, length;
88210d565efSmrg wchar_t *workp = p;
88310d565efSmrg printf ("/charset [%s",
88410d565efSmrg (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
88510d565efSmrg p += 5;
88610d565efSmrg length = *workp++; /* the length of char_classes */
88710d565efSmrg for (i=0 ; i<length ; i++)
88810d565efSmrg printf("[:%lx:]", (long int) *p++);
88910d565efSmrg length = *workp++; /* the length of collating_symbol */
89010d565efSmrg for (i=0 ; i<length ;)
89110d565efSmrg {
89210d565efSmrg printf("[.");
89310d565efSmrg while(*p != 0)
89410d565efSmrg PUT_CHAR((i++,*p++));
89510d565efSmrg i++,p++;
89610d565efSmrg printf(".]");
89710d565efSmrg }
89810d565efSmrg length = *workp++; /* the length of equivalence_class */
89910d565efSmrg for (i=0 ; i<length ;)
90010d565efSmrg {
90110d565efSmrg printf("[=");
90210d565efSmrg while(*p != 0)
90310d565efSmrg PUT_CHAR((i++,*p++));
90410d565efSmrg i++,p++;
90510d565efSmrg printf("=]");
90610d565efSmrg }
90710d565efSmrg length = *workp++; /* the length of char_range */
90810d565efSmrg for (i=0 ; i<length ; i++)
90910d565efSmrg {
91010d565efSmrg wchar_t range_start = *p++;
91110d565efSmrg wchar_t range_end = *p++;
91210d565efSmrg printf("%C-%C", range_start, range_end);
91310d565efSmrg }
91410d565efSmrg length = *workp++; /* the length of char */
91510d565efSmrg for (i=0 ; i<length ; i++)
91610d565efSmrg printf("%C", *p++);
91710d565efSmrg putchar (']');
91810d565efSmrg # else
91910d565efSmrg register int c, last = -100;
92010d565efSmrg register int in_range = 0;
92110d565efSmrg
92210d565efSmrg printf ("/charset [%s",
92310d565efSmrg (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
92410d565efSmrg
92510d565efSmrg assert (p + *p < pend);
92610d565efSmrg
92710d565efSmrg for (c = 0; c < 256; c++)
92810d565efSmrg if (c / 8 < *p
92910d565efSmrg && (p[1 + (c/8)] & (1 << (c % 8))))
93010d565efSmrg {
93110d565efSmrg /* Are we starting a range? */
93210d565efSmrg if (last + 1 == c && ! in_range)
93310d565efSmrg {
93410d565efSmrg putchar ('-');
93510d565efSmrg in_range = 1;
93610d565efSmrg }
93710d565efSmrg /* Have we broken a range? */
93810d565efSmrg else if (last + 1 != c && in_range)
93910d565efSmrg {
94010d565efSmrg putchar (last);
94110d565efSmrg in_range = 0;
94210d565efSmrg }
94310d565efSmrg
94410d565efSmrg if (! in_range)
94510d565efSmrg putchar (c);
94610d565efSmrg
94710d565efSmrg last = c;
94810d565efSmrg }
94910d565efSmrg
95010d565efSmrg if (in_range)
95110d565efSmrg putchar (last);
95210d565efSmrg
95310d565efSmrg putchar (']');
95410d565efSmrg
95510d565efSmrg p += 1 + *p;
95610d565efSmrg # endif /* WCHAR */
95710d565efSmrg }
95810d565efSmrg break;
95910d565efSmrg
96010d565efSmrg case begline:
96110d565efSmrg printf ("/begline");
96210d565efSmrg break;
96310d565efSmrg
96410d565efSmrg case endline:
96510d565efSmrg printf ("/endline");
96610d565efSmrg break;
96710d565efSmrg
96810d565efSmrg case on_failure_jump:
96910d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
97010d565efSmrg # ifdef _LIBC
97110d565efSmrg printf ("/on_failure_jump to %td", p + mcnt - start);
97210d565efSmrg # else
97310d565efSmrg printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
97410d565efSmrg # endif
97510d565efSmrg break;
97610d565efSmrg
97710d565efSmrg case on_failure_keep_string_jump:
97810d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
97910d565efSmrg # ifdef _LIBC
98010d565efSmrg printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
98110d565efSmrg # else
98210d565efSmrg printf ("/on_failure_keep_string_jump to %ld",
98310d565efSmrg (long int) (p + mcnt - start));
98410d565efSmrg # endif
98510d565efSmrg break;
98610d565efSmrg
98710d565efSmrg case dummy_failure_jump:
98810d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
98910d565efSmrg # ifdef _LIBC
99010d565efSmrg printf ("/dummy_failure_jump to %td", p + mcnt - start);
99110d565efSmrg # else
99210d565efSmrg printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
99310d565efSmrg # endif
99410d565efSmrg break;
99510d565efSmrg
99610d565efSmrg case push_dummy_failure:
99710d565efSmrg printf ("/push_dummy_failure");
99810d565efSmrg break;
99910d565efSmrg
100010d565efSmrg case maybe_pop_jump:
100110d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
100210d565efSmrg # ifdef _LIBC
100310d565efSmrg printf ("/maybe_pop_jump to %td", p + mcnt - start);
100410d565efSmrg # else
100510d565efSmrg printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
100610d565efSmrg # endif
100710d565efSmrg break;
100810d565efSmrg
100910d565efSmrg case pop_failure_jump:
101010d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
101110d565efSmrg # ifdef _LIBC
101210d565efSmrg printf ("/pop_failure_jump to %td", p + mcnt - start);
101310d565efSmrg # else
101410d565efSmrg printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
101510d565efSmrg # endif
101610d565efSmrg break;
101710d565efSmrg
101810d565efSmrg case jump_past_alt:
101910d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
102010d565efSmrg # ifdef _LIBC
102110d565efSmrg printf ("/jump_past_alt to %td", p + mcnt - start);
102210d565efSmrg # else
102310d565efSmrg printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
102410d565efSmrg # endif
102510d565efSmrg break;
102610d565efSmrg
102710d565efSmrg case jump:
102810d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
102910d565efSmrg # ifdef _LIBC
103010d565efSmrg printf ("/jump to %td", p + mcnt - start);
103110d565efSmrg # else
103210d565efSmrg printf ("/jump to %ld", (long int) (p + mcnt - start));
103310d565efSmrg # endif
103410d565efSmrg break;
103510d565efSmrg
103610d565efSmrg case succeed_n:
103710d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
103810d565efSmrg p1 = p + mcnt;
103910d565efSmrg PREFIX(extract_number_and_incr) (&mcnt2, &p);
104010d565efSmrg # ifdef _LIBC
104110d565efSmrg printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
104210d565efSmrg # else
104310d565efSmrg printf ("/succeed_n to %ld, %d times",
104410d565efSmrg (long int) (p1 - start), mcnt2);
104510d565efSmrg # endif
104610d565efSmrg break;
104710d565efSmrg
104810d565efSmrg case jump_n:
104910d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
105010d565efSmrg p1 = p + mcnt;
105110d565efSmrg PREFIX(extract_number_and_incr) (&mcnt2, &p);
105210d565efSmrg printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
105310d565efSmrg break;
105410d565efSmrg
105510d565efSmrg case set_number_at:
105610d565efSmrg PREFIX(extract_number_and_incr) (&mcnt, &p);
105710d565efSmrg p1 = p + mcnt;
105810d565efSmrg PREFIX(extract_number_and_incr) (&mcnt2, &p);
105910d565efSmrg # ifdef _LIBC
106010d565efSmrg printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
106110d565efSmrg # else
106210d565efSmrg printf ("/set_number_at location %ld to %d",
106310d565efSmrg (long int) (p1 - start), mcnt2);
106410d565efSmrg # endif
106510d565efSmrg break;
106610d565efSmrg
106710d565efSmrg case wordbound:
106810d565efSmrg printf ("/wordbound");
106910d565efSmrg break;
107010d565efSmrg
107110d565efSmrg case notwordbound:
107210d565efSmrg printf ("/notwordbound");
107310d565efSmrg break;
107410d565efSmrg
107510d565efSmrg case wordbeg:
107610d565efSmrg printf ("/wordbeg");
107710d565efSmrg break;
107810d565efSmrg
107910d565efSmrg case wordend:
108010d565efSmrg printf ("/wordend");
108110d565efSmrg break;
108210d565efSmrg
108310d565efSmrg # ifdef emacs
108410d565efSmrg case before_dot:
108510d565efSmrg printf ("/before_dot");
108610d565efSmrg break;
108710d565efSmrg
108810d565efSmrg case at_dot:
108910d565efSmrg printf ("/at_dot");
109010d565efSmrg break;
109110d565efSmrg
109210d565efSmrg case after_dot:
109310d565efSmrg printf ("/after_dot");
109410d565efSmrg break;
109510d565efSmrg
109610d565efSmrg case syntaxspec:
109710d565efSmrg printf ("/syntaxspec");
109810d565efSmrg mcnt = *p++;
109910d565efSmrg printf ("/%d", mcnt);
110010d565efSmrg break;
110110d565efSmrg
110210d565efSmrg case notsyntaxspec:
110310d565efSmrg printf ("/notsyntaxspec");
110410d565efSmrg mcnt = *p++;
110510d565efSmrg printf ("/%d", mcnt);
110610d565efSmrg break;
110710d565efSmrg # endif /* emacs */
110810d565efSmrg
110910d565efSmrg case wordchar:
111010d565efSmrg printf ("/wordchar");
111110d565efSmrg break;
111210d565efSmrg
111310d565efSmrg case notwordchar:
111410d565efSmrg printf ("/notwordchar");
111510d565efSmrg break;
111610d565efSmrg
111710d565efSmrg case begbuf:
111810d565efSmrg printf ("/begbuf");
111910d565efSmrg break;
112010d565efSmrg
112110d565efSmrg case endbuf:
112210d565efSmrg printf ("/endbuf");
112310d565efSmrg break;
112410d565efSmrg
112510d565efSmrg default:
112610d565efSmrg printf ("?%ld", (long int) *(p-1));
112710d565efSmrg }
112810d565efSmrg
112910d565efSmrg putchar ('\n');
113010d565efSmrg }
113110d565efSmrg
113210d565efSmrg # ifdef _LIBC
113310d565efSmrg printf ("%td:\tend of pattern.\n", p - start);
113410d565efSmrg # else
113510d565efSmrg printf ("%ld:\tend of pattern.\n", (long int) (p - start));
113610d565efSmrg # endif
113710d565efSmrg }
113810d565efSmrg
113910d565efSmrg
114010d565efSmrg void
PREFIX(print_compiled_pattern)114110d565efSmrg PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
114210d565efSmrg {
114310d565efSmrg UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
114410d565efSmrg
114510d565efSmrg PREFIX(print_partial_compiled_pattern) (buffer, buffer
114610d565efSmrg + bufp->used / sizeof(UCHAR_T));
114710d565efSmrg printf ("%ld bytes used/%ld bytes allocated.\n",
114810d565efSmrg bufp->used, bufp->allocated);
114910d565efSmrg
115010d565efSmrg if (bufp->fastmap_accurate && bufp->fastmap)
115110d565efSmrg {
115210d565efSmrg printf ("fastmap: ");
115310d565efSmrg print_fastmap (bufp->fastmap);
115410d565efSmrg }
115510d565efSmrg
115610d565efSmrg # ifdef _LIBC
115710d565efSmrg printf ("re_nsub: %Zd\t", bufp->re_nsub);
115810d565efSmrg # else
115910d565efSmrg printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
116010d565efSmrg # endif
116110d565efSmrg printf ("regs_alloc: %d\t", bufp->regs_allocated);
116210d565efSmrg printf ("can_be_null: %d\t", bufp->can_be_null);
116310d565efSmrg printf ("newline_anchor: %d\n", bufp->newline_anchor);
116410d565efSmrg printf ("no_sub: %d\t", bufp->no_sub);
116510d565efSmrg printf ("not_bol: %d\t", bufp->not_bol);
116610d565efSmrg printf ("not_eol: %d\t", bufp->not_eol);
116710d565efSmrg printf ("syntax: %lx\n", bufp->syntax);
116810d565efSmrg /* Perhaps we should print the translate table? */
116910d565efSmrg }
117010d565efSmrg
117110d565efSmrg
117210d565efSmrg void
PREFIX(print_double_string)117310d565efSmrg PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
117410d565efSmrg int size1, const CHAR_T *string2, int size2)
117510d565efSmrg {
117610d565efSmrg int this_char;
117710d565efSmrg
117810d565efSmrg if (where == NULL)
117910d565efSmrg printf ("(null)");
118010d565efSmrg else
118110d565efSmrg {
118210d565efSmrg int cnt;
118310d565efSmrg
118410d565efSmrg if (FIRST_STRING_P (where))
118510d565efSmrg {
118610d565efSmrg for (this_char = where - string1; this_char < size1; this_char++)
118710d565efSmrg PUT_CHAR (string1[this_char]);
118810d565efSmrg
118910d565efSmrg where = string2;
119010d565efSmrg }
119110d565efSmrg
119210d565efSmrg cnt = 0;
119310d565efSmrg for (this_char = where - string2; this_char < size2; this_char++)
119410d565efSmrg {
119510d565efSmrg PUT_CHAR (string2[this_char]);
119610d565efSmrg if (++cnt > 100)
119710d565efSmrg {
119810d565efSmrg fputs ("...", stdout);
119910d565efSmrg break;
120010d565efSmrg }
120110d565efSmrg }
120210d565efSmrg }
120310d565efSmrg }
120410d565efSmrg
120510d565efSmrg # ifndef DEFINED_ONCE
120610d565efSmrg void
printchar(int c)120710d565efSmrg printchar (int c)
120810d565efSmrg {
120910d565efSmrg putc (c, stderr);
121010d565efSmrg }
121110d565efSmrg # endif
121210d565efSmrg
121310d565efSmrg # else /* not DEBUG */
121410d565efSmrg
121510d565efSmrg # ifndef DEFINED_ONCE
121610d565efSmrg # undef assert
121710d565efSmrg # define assert(e)
121810d565efSmrg
121910d565efSmrg # define DEBUG_STATEMENT(e)
122010d565efSmrg # define DEBUG_PRINT1(x)
122110d565efSmrg # define DEBUG_PRINT2(x1, x2)
122210d565efSmrg # define DEBUG_PRINT3(x1, x2, x3)
122310d565efSmrg # define DEBUG_PRINT4(x1, x2, x3, x4)
122410d565efSmrg # endif /* not DEFINED_ONCE */
122510d565efSmrg # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
122610d565efSmrg # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
122710d565efSmrg
122810d565efSmrg # endif /* not DEBUG */
122910d565efSmrg
123010d565efSmrg
123110d565efSmrg
123210d565efSmrg # ifdef WCHAR
123310d565efSmrg /* This convert a multibyte string to a wide character string.
123410d565efSmrg And write their correspondances to offset_buffer(see below)
123510d565efSmrg and write whether each wchar_t is binary data to is_binary.
123610d565efSmrg This assume invalid multibyte sequences as binary data.
123710d565efSmrg We assume offset_buffer and is_binary is already allocated
123810d565efSmrg enough space. */
123910d565efSmrg
124010d565efSmrg static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
124110d565efSmrg size_t len, int *offset_buffer,
124210d565efSmrg char *is_binary);
124310d565efSmrg static size_t
convert_mbs_to_wcs(CHAR_T * dest,const unsigned char * src,size_t len,int * offset_buffer,char * is_binary)124410d565efSmrg convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
124510d565efSmrg int *offset_buffer, char *is_binary)
124610d565efSmrg /* It hold correspondances between src(char string) and
124710d565efSmrg dest(wchar_t string) for optimization.
124810d565efSmrg e.g. src = "xxxyzz"
124910d565efSmrg dest = {'X', 'Y', 'Z'}
125010d565efSmrg (each "xxx", "y" and "zz" represent one multibyte character
125110d565efSmrg corresponding to 'X', 'Y' and 'Z'.)
125210d565efSmrg offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
125310d565efSmrg = {0, 3, 4, 6}
125410d565efSmrg */
125510d565efSmrg {
125610d565efSmrg wchar_t *pdest = dest;
125710d565efSmrg const unsigned char *psrc = src;
125810d565efSmrg size_t wc_count = 0;
125910d565efSmrg
126010d565efSmrg mbstate_t mbs;
126110d565efSmrg int i, consumed;
126210d565efSmrg size_t mb_remain = len;
126310d565efSmrg size_t mb_count = 0;
126410d565efSmrg
126510d565efSmrg /* Initialize the conversion state. */
126610d565efSmrg memset (&mbs, 0, sizeof (mbstate_t));
126710d565efSmrg
126810d565efSmrg offset_buffer[0] = 0;
126910d565efSmrg for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
127010d565efSmrg psrc += consumed)
127110d565efSmrg {
127210d565efSmrg #ifdef _LIBC
127310d565efSmrg consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
127410d565efSmrg #else
127510d565efSmrg consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
127610d565efSmrg #endif
127710d565efSmrg
127810d565efSmrg if (consumed <= 0)
127910d565efSmrg /* failed to convert. maybe src contains binary data.
128010d565efSmrg So we consume 1 byte manualy. */
128110d565efSmrg {
128210d565efSmrg *pdest = *psrc;
128310d565efSmrg consumed = 1;
128410d565efSmrg is_binary[wc_count] = TRUE;
128510d565efSmrg }
128610d565efSmrg else
128710d565efSmrg is_binary[wc_count] = FALSE;
128810d565efSmrg /* In sjis encoding, we use yen sign as escape character in
128910d565efSmrg place of reverse solidus. So we convert 0x5c(yen sign in
129010d565efSmrg sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
129110d565efSmrg solidus in UCS2). */
129210d565efSmrg if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
129310d565efSmrg *pdest = (wchar_t) *psrc;
129410d565efSmrg
129510d565efSmrg offset_buffer[wc_count + 1] = mb_count += consumed;
129610d565efSmrg }
129710d565efSmrg
129810d565efSmrg /* Fill remain of the buffer with sentinel. */
129910d565efSmrg for (i = wc_count + 1 ; i <= len ; i++)
130010d565efSmrg offset_buffer[i] = mb_count + 1;
130110d565efSmrg
130210d565efSmrg return wc_count;
130310d565efSmrg }
130410d565efSmrg
130510d565efSmrg # endif /* WCHAR */
130610d565efSmrg
130710d565efSmrg #else /* not INSIDE_RECURSION */
130810d565efSmrg
130910d565efSmrg /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
131010d565efSmrg also be assigned to arbitrarily: each pattern buffer stores its own
131110d565efSmrg syntax, so it can be changed between regex compilations. */
131210d565efSmrg /* This has no initializer because initialized variables in Emacs
131310d565efSmrg become read-only after dumping. */
131410d565efSmrg reg_syntax_t re_syntax_options;
131510d565efSmrg
131610d565efSmrg
131710d565efSmrg /* Specify the precise syntax of regexps for compilation. This provides
131810d565efSmrg for compatibility for various utilities which historically have
131910d565efSmrg different, incompatible syntaxes.
132010d565efSmrg
132110d565efSmrg The argument SYNTAX is a bit mask comprised of the various bits
132210d565efSmrg defined in regex.h. We return the old syntax. */
132310d565efSmrg
132410d565efSmrg reg_syntax_t
re_set_syntax(reg_syntax_t syntax)132510d565efSmrg re_set_syntax (reg_syntax_t syntax)
132610d565efSmrg {
132710d565efSmrg reg_syntax_t ret = re_syntax_options;
132810d565efSmrg
132910d565efSmrg re_syntax_options = syntax;
133010d565efSmrg # ifdef DEBUG
133110d565efSmrg if (syntax & RE_DEBUG)
133210d565efSmrg debug = 1;
133310d565efSmrg else if (debug) /* was on but now is not */
133410d565efSmrg debug = 0;
133510d565efSmrg # endif /* DEBUG */
133610d565efSmrg return ret;
133710d565efSmrg }
133810d565efSmrg # ifdef _LIBC
133910d565efSmrg weak_alias (__re_set_syntax, re_set_syntax)
134010d565efSmrg # endif
134110d565efSmrg
134210d565efSmrg /* This table gives an error message for each of the error codes listed
134310d565efSmrg in regex.h. Obviously the order here has to be same as there.
134410d565efSmrg POSIX doesn't require that we do anything for REG_NOERROR,
134510d565efSmrg but why not be nice? */
134610d565efSmrg
134710d565efSmrg static const char *re_error_msgid[] =
134810d565efSmrg {
134910d565efSmrg gettext_noop ("Success"), /* REG_NOERROR */
135010d565efSmrg gettext_noop ("No match"), /* REG_NOMATCH */
135110d565efSmrg gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
135210d565efSmrg gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
135310d565efSmrg gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
135410d565efSmrg gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
135510d565efSmrg gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
135610d565efSmrg gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
135710d565efSmrg gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
135810d565efSmrg gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
135910d565efSmrg gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
136010d565efSmrg gettext_noop ("Invalid range end"), /* REG_ERANGE */
136110d565efSmrg gettext_noop ("Memory exhausted"), /* REG_ESPACE */
136210d565efSmrg gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
136310d565efSmrg gettext_noop ("Premature end of regular expression"), /* REG_EEND */
136410d565efSmrg gettext_noop ("Regular expression too big"), /* REG_ESIZE */
136510d565efSmrg gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
136610d565efSmrg };
136710d565efSmrg
136810d565efSmrg #endif /* INSIDE_RECURSION */
136910d565efSmrg
137010d565efSmrg #ifndef DEFINED_ONCE
137110d565efSmrg /* Avoiding alloca during matching, to placate r_alloc. */
137210d565efSmrg
137310d565efSmrg /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
137410d565efSmrg searching and matching functions should not call alloca. On some
137510d565efSmrg systems, alloca is implemented in terms of malloc, and if we're
137610d565efSmrg using the relocating allocator routines, then malloc could cause a
137710d565efSmrg relocation, which might (if the strings being searched are in the
137810d565efSmrg ralloc heap) shift the data out from underneath the regexp
137910d565efSmrg routines.
138010d565efSmrg
138110d565efSmrg Here's another reason to avoid allocation: Emacs
138210d565efSmrg processes input from X in a signal handler; processing X input may
138310d565efSmrg call malloc; if input arrives while a matching routine is calling
138410d565efSmrg malloc, then we're scrod. But Emacs can't just block input while
138510d565efSmrg calling matching routines; then we don't notice interrupts when
138610d565efSmrg they come in. So, Emacs blocks input around all regexp calls
138710d565efSmrg except the matching calls, which it leaves unprotected, in the
138810d565efSmrg faith that they will not malloc. */
138910d565efSmrg
139010d565efSmrg /* Normally, this is fine. */
139110d565efSmrg # define MATCH_MAY_ALLOCATE
139210d565efSmrg
139310d565efSmrg /* When using GNU C, we are not REALLY using the C alloca, no matter
139410d565efSmrg what config.h may say. So don't take precautions for it. */
139510d565efSmrg # ifdef __GNUC__
139610d565efSmrg # undef C_ALLOCA
139710d565efSmrg # endif
139810d565efSmrg
139910d565efSmrg /* The match routines may not allocate if (1) they would do it with malloc
140010d565efSmrg and (2) it's not safe for them to use malloc.
140110d565efSmrg Note that if REL_ALLOC is defined, matching would not use malloc for the
140210d565efSmrg failure stack, but we would still use it for the register vectors;
140310d565efSmrg so REL_ALLOC should not affect this. */
140410d565efSmrg # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
140510d565efSmrg # undef MATCH_MAY_ALLOCATE
140610d565efSmrg # endif
140710d565efSmrg #endif /* not DEFINED_ONCE */
140810d565efSmrg
140910d565efSmrg #ifdef INSIDE_RECURSION
141010d565efSmrg /* Failure stack declarations and macros; both re_compile_fastmap and
141110d565efSmrg re_match_2 use a failure stack. These have to be macros because of
141210d565efSmrg REGEX_ALLOCATE_STACK. */
141310d565efSmrg
141410d565efSmrg
141510d565efSmrg /* Number of failure points for which to initially allocate space
141610d565efSmrg when matching. If this number is exceeded, we allocate more
141710d565efSmrg space, so it is not a hard limit. */
141810d565efSmrg # ifndef INIT_FAILURE_ALLOC
141910d565efSmrg # define INIT_FAILURE_ALLOC 5
142010d565efSmrg # endif
142110d565efSmrg
142210d565efSmrg /* Roughly the maximum number of failure points on the stack. Would be
142310d565efSmrg exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
142410d565efSmrg This is a variable only so users of regex can assign to it; we never
142510d565efSmrg change it ourselves. */
142610d565efSmrg
142710d565efSmrg # ifdef INT_IS_16BIT
142810d565efSmrg
142910d565efSmrg # ifndef DEFINED_ONCE
143010d565efSmrg # if defined MATCH_MAY_ALLOCATE
143110d565efSmrg /* 4400 was enough to cause a crash on Alpha OSF/1,
143210d565efSmrg whose default stack limit is 2mb. */
143310d565efSmrg long int re_max_failures = 4000;
143410d565efSmrg # else
143510d565efSmrg long int re_max_failures = 2000;
143610d565efSmrg # endif
143710d565efSmrg # endif
143810d565efSmrg
PREFIX(fail_stack_elt)143910d565efSmrg union PREFIX(fail_stack_elt)
144010d565efSmrg {
144110d565efSmrg UCHAR_T *pointer;
144210d565efSmrg long int integer;
144310d565efSmrg };
144410d565efSmrg
144510d565efSmrg typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
144610d565efSmrg
144710d565efSmrg typedef struct
144810d565efSmrg {
144910d565efSmrg PREFIX(fail_stack_elt_t) *stack;
145010d565efSmrg unsigned long int size;
145110d565efSmrg unsigned long int avail; /* Offset of next open position. */
145210d565efSmrg } PREFIX(fail_stack_type);
145310d565efSmrg
145410d565efSmrg # else /* not INT_IS_16BIT */
145510d565efSmrg
145610d565efSmrg # ifndef DEFINED_ONCE
145710d565efSmrg # if defined MATCH_MAY_ALLOCATE
145810d565efSmrg /* 4400 was enough to cause a crash on Alpha OSF/1,
145910d565efSmrg whose default stack limit is 2mb. */
146010d565efSmrg int re_max_failures = 4000;
146110d565efSmrg # else
146210d565efSmrg int re_max_failures = 2000;
146310d565efSmrg # endif
146410d565efSmrg # endif
146510d565efSmrg
PREFIX(fail_stack_elt)146610d565efSmrg union PREFIX(fail_stack_elt)
146710d565efSmrg {
146810d565efSmrg UCHAR_T *pointer;
146910d565efSmrg int integer;
147010d565efSmrg };
147110d565efSmrg
147210d565efSmrg typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
147310d565efSmrg
147410d565efSmrg typedef struct
147510d565efSmrg {
147610d565efSmrg PREFIX(fail_stack_elt_t) *stack;
147710d565efSmrg unsigned size;
147810d565efSmrg unsigned avail; /* Offset of next open position. */
147910d565efSmrg } PREFIX(fail_stack_type);
148010d565efSmrg
148110d565efSmrg # endif /* INT_IS_16BIT */
148210d565efSmrg
148310d565efSmrg # ifndef DEFINED_ONCE
148410d565efSmrg # define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
148510d565efSmrg # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
148610d565efSmrg # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
148710d565efSmrg # endif
148810d565efSmrg
148910d565efSmrg
149010d565efSmrg /* Define macros to initialize and free the failure stack.
149110d565efSmrg Do `return -2' if the alloc fails. */
149210d565efSmrg
149310d565efSmrg # ifdef MATCH_MAY_ALLOCATE
149410d565efSmrg # define INIT_FAIL_STACK() \
149510d565efSmrg do { \
149610d565efSmrg fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
149710d565efSmrg REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
149810d565efSmrg \
149910d565efSmrg if (fail_stack.stack == NULL) \
150010d565efSmrg return -2; \
150110d565efSmrg \
150210d565efSmrg fail_stack.size = INIT_FAILURE_ALLOC; \
150310d565efSmrg fail_stack.avail = 0; \
150410d565efSmrg } while (0)
150510d565efSmrg
150610d565efSmrg # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
150710d565efSmrg # else
150810d565efSmrg # define INIT_FAIL_STACK() \
150910d565efSmrg do { \
151010d565efSmrg fail_stack.avail = 0; \
151110d565efSmrg } while (0)
151210d565efSmrg
151310d565efSmrg # define RESET_FAIL_STACK()
151410d565efSmrg # endif
151510d565efSmrg
151610d565efSmrg
151710d565efSmrg /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
151810d565efSmrg
151910d565efSmrg Return 1 if succeeds, and 0 if either ran out of memory
152010d565efSmrg allocating space for it or it was already too large.
152110d565efSmrg
152210d565efSmrg REGEX_REALLOCATE_STACK requires `destination' be declared. */
152310d565efSmrg
152410d565efSmrg # define DOUBLE_FAIL_STACK(fail_stack) \
152510d565efSmrg ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
152610d565efSmrg ? 0 \
152710d565efSmrg : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
152810d565efSmrg REGEX_REALLOCATE_STACK ((fail_stack).stack, \
152910d565efSmrg (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
153010d565efSmrg ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
153110d565efSmrg \
153210d565efSmrg (fail_stack).stack == NULL \
153310d565efSmrg ? 0 \
153410d565efSmrg : ((fail_stack).size <<= 1, \
153510d565efSmrg 1)))
153610d565efSmrg
153710d565efSmrg
153810d565efSmrg /* Push pointer POINTER on FAIL_STACK.
153910d565efSmrg Return 1 if was able to do so and 0 if ran out of memory allocating
154010d565efSmrg space to do so. */
154110d565efSmrg # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
154210d565efSmrg ((FAIL_STACK_FULL () \
154310d565efSmrg && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
154410d565efSmrg ? 0 \
154510d565efSmrg : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
154610d565efSmrg 1))
154710d565efSmrg
154810d565efSmrg /* Push a pointer value onto the failure stack.
154910d565efSmrg Assumes the variable `fail_stack'. Probably should only
155010d565efSmrg be called from within `PUSH_FAILURE_POINT'. */
155110d565efSmrg # define PUSH_FAILURE_POINTER(item) \
155210d565efSmrg fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
155310d565efSmrg
155410d565efSmrg /* This pushes an integer-valued item onto the failure stack.
155510d565efSmrg Assumes the variable `fail_stack'. Probably should only
155610d565efSmrg be called from within `PUSH_FAILURE_POINT'. */
155710d565efSmrg # define PUSH_FAILURE_INT(item) \
155810d565efSmrg fail_stack.stack[fail_stack.avail++].integer = (item)
155910d565efSmrg
156010d565efSmrg /* Push a fail_stack_elt_t value onto the failure stack.
156110d565efSmrg Assumes the variable `fail_stack'. Probably should only
156210d565efSmrg be called from within `PUSH_FAILURE_POINT'. */
156310d565efSmrg # define PUSH_FAILURE_ELT(item) \
156410d565efSmrg fail_stack.stack[fail_stack.avail++] = (item)
156510d565efSmrg
156610d565efSmrg /* These three POP... operations complement the three PUSH... operations.
156710d565efSmrg All assume that `fail_stack' is nonempty. */
156810d565efSmrg # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
156910d565efSmrg # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
157010d565efSmrg # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
157110d565efSmrg
157210d565efSmrg /* Used to omit pushing failure point id's when we're not debugging. */
157310d565efSmrg # ifdef DEBUG
157410d565efSmrg # define DEBUG_PUSH PUSH_FAILURE_INT
157510d565efSmrg # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
157610d565efSmrg # else
157710d565efSmrg # define DEBUG_PUSH(item)
157810d565efSmrg # define DEBUG_POP(item_addr)
157910d565efSmrg # endif
158010d565efSmrg
158110d565efSmrg
158210d565efSmrg /* Push the information about the state we will need
158310d565efSmrg if we ever fail back to it.
158410d565efSmrg
158510d565efSmrg Requires variables fail_stack, regstart, regend, reg_info, and
158610d565efSmrg num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
158710d565efSmrg be declared.
158810d565efSmrg
158910d565efSmrg Does `return FAILURE_CODE' if runs out of memory. */
159010d565efSmrg
159110d565efSmrg # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
159210d565efSmrg do { \
159310d565efSmrg char *destination; \
159410d565efSmrg /* Must be int, so when we don't save any registers, the arithmetic \
159510d565efSmrg of 0 + -1 isn't done as unsigned. */ \
159610d565efSmrg /* Can't be int, since there is not a shred of a guarantee that int \
159710d565efSmrg is wide enough to hold a value of something to which pointer can \
159810d565efSmrg be assigned */ \
159910d565efSmrg active_reg_t this_reg; \
160010d565efSmrg \
160110d565efSmrg DEBUG_STATEMENT (failure_id++); \
160210d565efSmrg DEBUG_STATEMENT (nfailure_points_pushed++); \
160310d565efSmrg DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
160410d565efSmrg DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
160510d565efSmrg DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
160610d565efSmrg \
160710d565efSmrg DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
160810d565efSmrg DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
160910d565efSmrg \
161010d565efSmrg /* Ensure we have enough space allocated for what we will push. */ \
161110d565efSmrg while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
161210d565efSmrg { \
161310d565efSmrg if (!DOUBLE_FAIL_STACK (fail_stack)) \
161410d565efSmrg return failure_code; \
161510d565efSmrg \
161610d565efSmrg DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
161710d565efSmrg (fail_stack).size); \
161810d565efSmrg DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
161910d565efSmrg } \
162010d565efSmrg \
162110d565efSmrg /* Push the info, starting with the registers. */ \
162210d565efSmrg DEBUG_PRINT1 ("\n"); \
162310d565efSmrg \
162410d565efSmrg if (1) \
162510d565efSmrg for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
162610d565efSmrg this_reg++) \
162710d565efSmrg { \
162810d565efSmrg DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
162910d565efSmrg DEBUG_STATEMENT (num_regs_pushed++); \
163010d565efSmrg \
163110d565efSmrg DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
163210d565efSmrg PUSH_FAILURE_POINTER (regstart[this_reg]); \
163310d565efSmrg \
163410d565efSmrg DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
163510d565efSmrg PUSH_FAILURE_POINTER (regend[this_reg]); \
163610d565efSmrg \
163710d565efSmrg DEBUG_PRINT2 (" info: %p\n ", \
163810d565efSmrg reg_info[this_reg].word.pointer); \
163910d565efSmrg DEBUG_PRINT2 (" match_null=%d", \
164010d565efSmrg REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
164110d565efSmrg DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
164210d565efSmrg DEBUG_PRINT2 (" matched_something=%d", \
164310d565efSmrg MATCHED_SOMETHING (reg_info[this_reg])); \
164410d565efSmrg DEBUG_PRINT2 (" ever_matched=%d", \
164510d565efSmrg EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
164610d565efSmrg DEBUG_PRINT1 ("\n"); \
164710d565efSmrg PUSH_FAILURE_ELT (reg_info[this_reg].word); \
164810d565efSmrg } \
164910d565efSmrg \
165010d565efSmrg DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
165110d565efSmrg PUSH_FAILURE_INT (lowest_active_reg); \
165210d565efSmrg \
165310d565efSmrg DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
165410d565efSmrg PUSH_FAILURE_INT (highest_active_reg); \
165510d565efSmrg \
165610d565efSmrg DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
165710d565efSmrg DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
165810d565efSmrg PUSH_FAILURE_POINTER (pattern_place); \
165910d565efSmrg \
166010d565efSmrg DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
166110d565efSmrg DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
166210d565efSmrg size2); \
166310d565efSmrg DEBUG_PRINT1 ("'\n"); \
166410d565efSmrg PUSH_FAILURE_POINTER (string_place); \
166510d565efSmrg \
166610d565efSmrg DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
166710d565efSmrg DEBUG_PUSH (failure_id); \
166810d565efSmrg } while (0)
166910d565efSmrg
167010d565efSmrg # ifndef DEFINED_ONCE
167110d565efSmrg /* This is the number of items that are pushed and popped on the stack
167210d565efSmrg for each register. */
167310d565efSmrg # define NUM_REG_ITEMS 3
167410d565efSmrg
167510d565efSmrg /* Individual items aside from the registers. */
167610d565efSmrg # ifdef DEBUG
167710d565efSmrg # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
167810d565efSmrg # else
167910d565efSmrg # define NUM_NONREG_ITEMS 4
168010d565efSmrg # endif
168110d565efSmrg
168210d565efSmrg /* We push at most this many items on the stack. */
168310d565efSmrg /* We used to use (num_regs - 1), which is the number of registers
168410d565efSmrg this regexp will save; but that was changed to 5
168510d565efSmrg to avoid stack overflow for a regexp with lots of parens. */
168610d565efSmrg # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
168710d565efSmrg
168810d565efSmrg /* We actually push this many items. */
168910d565efSmrg # define NUM_FAILURE_ITEMS \
169010d565efSmrg (((0 \
169110d565efSmrg ? 0 : highest_active_reg - lowest_active_reg + 1) \
169210d565efSmrg * NUM_REG_ITEMS) \
169310d565efSmrg + NUM_NONREG_ITEMS)
169410d565efSmrg
169510d565efSmrg /* How many items can still be added to the stack without overflowing it. */
169610d565efSmrg # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
169710d565efSmrg # endif /* not DEFINED_ONCE */
169810d565efSmrg
169910d565efSmrg
170010d565efSmrg /* Pops what PUSH_FAIL_STACK pushes.
170110d565efSmrg
170210d565efSmrg We restore into the parameters, all of which should be lvalues:
170310d565efSmrg STR -- the saved data position.
170410d565efSmrg PAT -- the saved pattern position.
170510d565efSmrg LOW_REG, HIGH_REG -- the highest and lowest active registers.
170610d565efSmrg REGSTART, REGEND -- arrays of string positions.
170710d565efSmrg REG_INFO -- array of information about each subexpression.
170810d565efSmrg
170910d565efSmrg Also assumes the variables `fail_stack' and (if debugging), `bufp',
171010d565efSmrg `pend', `string1', `size1', `string2', and `size2'. */
171110d565efSmrg # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
171210d565efSmrg { \
171310d565efSmrg DEBUG_STATEMENT (unsigned failure_id;) \
171410d565efSmrg active_reg_t this_reg; \
171510d565efSmrg const UCHAR_T *string_temp; \
171610d565efSmrg \
171710d565efSmrg assert (!FAIL_STACK_EMPTY ()); \
171810d565efSmrg \
171910d565efSmrg /* Remove failure points and point to how many regs pushed. */ \
172010d565efSmrg DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
172110d565efSmrg DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
172210d565efSmrg DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
172310d565efSmrg \
172410d565efSmrg assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
172510d565efSmrg \
172610d565efSmrg DEBUG_POP (&failure_id); \
172710d565efSmrg DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
172810d565efSmrg \
172910d565efSmrg /* If the saved string location is NULL, it came from an \
173010d565efSmrg on_failure_keep_string_jump opcode, and we want to throw away the \
173110d565efSmrg saved NULL, thus retaining our current position in the string. */ \
173210d565efSmrg string_temp = POP_FAILURE_POINTER (); \
173310d565efSmrg if (string_temp != NULL) \
173410d565efSmrg str = (const CHAR_T *) string_temp; \
173510d565efSmrg \
173610d565efSmrg DEBUG_PRINT2 (" Popping string %p: `", str); \
173710d565efSmrg DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
173810d565efSmrg DEBUG_PRINT1 ("'\n"); \
173910d565efSmrg \
174010d565efSmrg pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
174110d565efSmrg DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
174210d565efSmrg DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
174310d565efSmrg \
174410d565efSmrg /* Restore register info. */ \
174510d565efSmrg high_reg = (active_reg_t) POP_FAILURE_INT (); \
174610d565efSmrg DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
174710d565efSmrg \
174810d565efSmrg low_reg = (active_reg_t) POP_FAILURE_INT (); \
174910d565efSmrg DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
175010d565efSmrg \
175110d565efSmrg if (1) \
175210d565efSmrg for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
175310d565efSmrg { \
175410d565efSmrg DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
175510d565efSmrg \
175610d565efSmrg reg_info[this_reg].word = POP_FAILURE_ELT (); \
175710d565efSmrg DEBUG_PRINT2 (" info: %p\n", \
175810d565efSmrg reg_info[this_reg].word.pointer); \
175910d565efSmrg \
176010d565efSmrg regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
176110d565efSmrg DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
176210d565efSmrg \
176310d565efSmrg regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
176410d565efSmrg DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
176510d565efSmrg } \
176610d565efSmrg else \
176710d565efSmrg { \
176810d565efSmrg for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
176910d565efSmrg { \
177010d565efSmrg reg_info[this_reg].word.integer = 0; \
177110d565efSmrg regend[this_reg] = 0; \
177210d565efSmrg regstart[this_reg] = 0; \
177310d565efSmrg } \
177410d565efSmrg highest_active_reg = high_reg; \
177510d565efSmrg } \
177610d565efSmrg \
177710d565efSmrg set_regs_matched_done = 0; \
177810d565efSmrg DEBUG_STATEMENT (nfailure_points_popped++); \
177910d565efSmrg } /* POP_FAILURE_POINT */
178010d565efSmrg
178110d565efSmrg /* Structure for per-register (a.k.a. per-group) information.
178210d565efSmrg Other register information, such as the
178310d565efSmrg starting and ending positions (which are addresses), and the list of
178410d565efSmrg inner groups (which is a bits list) are maintained in separate
178510d565efSmrg variables.
178610d565efSmrg
178710d565efSmrg We are making a (strictly speaking) nonportable assumption here: that
178810d565efSmrg the compiler will pack our bit fields into something that fits into
178910d565efSmrg the type of `word', i.e., is something that fits into one item on the
179010d565efSmrg failure stack. */
179110d565efSmrg
179210d565efSmrg
179310d565efSmrg /* Declarations and macros for re_match_2. */
179410d565efSmrg
179510d565efSmrg typedef union
179610d565efSmrg {
179710d565efSmrg PREFIX(fail_stack_elt_t) word;
179810d565efSmrg struct
179910d565efSmrg {
180010d565efSmrg /* This field is one if this group can match the empty string,
180110d565efSmrg zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
180210d565efSmrg # define MATCH_NULL_UNSET_VALUE 3
180310d565efSmrg unsigned match_null_string_p : 2;
180410d565efSmrg unsigned is_active : 1;
180510d565efSmrg unsigned matched_something : 1;
180610d565efSmrg unsigned ever_matched_something : 1;
180710d565efSmrg } bits;
180810d565efSmrg } PREFIX(register_info_type);
180910d565efSmrg
181010d565efSmrg # ifndef DEFINED_ONCE
181110d565efSmrg # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
181210d565efSmrg # define IS_ACTIVE(R) ((R).bits.is_active)
181310d565efSmrg # define MATCHED_SOMETHING(R) ((R).bits.matched_something)
181410d565efSmrg # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
181510d565efSmrg
181610d565efSmrg
181710d565efSmrg /* Call this when have matched a real character; it sets `matched' flags
181810d565efSmrg for the subexpressions which we are currently inside. Also records
181910d565efSmrg that those subexprs have matched. */
182010d565efSmrg # define SET_REGS_MATCHED() \
182110d565efSmrg do \
182210d565efSmrg { \
182310d565efSmrg if (!set_regs_matched_done) \
182410d565efSmrg { \
182510d565efSmrg active_reg_t r; \
182610d565efSmrg set_regs_matched_done = 1; \
182710d565efSmrg for (r = lowest_active_reg; r <= highest_active_reg; r++) \
182810d565efSmrg { \
182910d565efSmrg MATCHED_SOMETHING (reg_info[r]) \
183010d565efSmrg = EVER_MATCHED_SOMETHING (reg_info[r]) \
183110d565efSmrg = 1; \
183210d565efSmrg } \
183310d565efSmrg } \
183410d565efSmrg } \
183510d565efSmrg while (0)
183610d565efSmrg # endif /* not DEFINED_ONCE */
183710d565efSmrg
183810d565efSmrg /* Registers are set to a sentinel when they haven't yet matched. */
183910d565efSmrg static CHAR_T PREFIX(reg_unset_dummy);
184010d565efSmrg # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
184110d565efSmrg # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
184210d565efSmrg
184310d565efSmrg /* Subroutine declarations and macros for regex_compile. */
184410d565efSmrg static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
184510d565efSmrg static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
184610d565efSmrg int arg1, int arg2);
184710d565efSmrg static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
184810d565efSmrg int arg, UCHAR_T *end);
184910d565efSmrg static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
185010d565efSmrg int arg1, int arg2, UCHAR_T *end);
185110d565efSmrg static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
185210d565efSmrg const CHAR_T *p,
185310d565efSmrg reg_syntax_t syntax);
185410d565efSmrg static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
185510d565efSmrg const CHAR_T *pend,
185610d565efSmrg reg_syntax_t syntax);
185710d565efSmrg # ifdef WCHAR
185810d565efSmrg static reg_errcode_t wcs_compile_range (CHAR_T range_start,
185910d565efSmrg const CHAR_T **p_ptr,
186010d565efSmrg const CHAR_T *pend,
186110d565efSmrg char *translate,
186210d565efSmrg reg_syntax_t syntax,
186310d565efSmrg UCHAR_T *b,
186410d565efSmrg CHAR_T *char_set);
186510d565efSmrg static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
186610d565efSmrg # else /* BYTE */
186710d565efSmrg static reg_errcode_t byte_compile_range (unsigned int range_start,
186810d565efSmrg const char **p_ptr,
186910d565efSmrg const char *pend,
187010d565efSmrg char *translate,
187110d565efSmrg reg_syntax_t syntax,
187210d565efSmrg unsigned char *b);
187310d565efSmrg # endif /* WCHAR */
187410d565efSmrg
187510d565efSmrg /* Fetch the next character in the uncompiled pattern---translating it
187610d565efSmrg if necessary. Also cast from a signed character in the constant
187710d565efSmrg string passed to us by the user to an unsigned char that we can use
187810d565efSmrg as an array index (in, e.g., `translate'). */
187910d565efSmrg /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
188010d565efSmrg because it is impossible to allocate 4GB array for some encodings
188110d565efSmrg which have 4 byte character_set like UCS4. */
188210d565efSmrg # ifndef PATFETCH
188310d565efSmrg # ifdef WCHAR
188410d565efSmrg # define PATFETCH(c) \
188510d565efSmrg do {if (p == pend) return REG_EEND; \
188610d565efSmrg c = (UCHAR_T) *p++; \
188710d565efSmrg if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
188810d565efSmrg } while (0)
188910d565efSmrg # else /* BYTE */
189010d565efSmrg # define PATFETCH(c) \
189110d565efSmrg do {if (p == pend) return REG_EEND; \
189210d565efSmrg c = (unsigned char) *p++; \
189310d565efSmrg if (translate) c = (unsigned char) translate[c]; \
189410d565efSmrg } while (0)
189510d565efSmrg # endif /* WCHAR */
189610d565efSmrg # endif
189710d565efSmrg
189810d565efSmrg /* Fetch the next character in the uncompiled pattern, with no
189910d565efSmrg translation. */
190010d565efSmrg # define PATFETCH_RAW(c) \
190110d565efSmrg do {if (p == pend) return REG_EEND; \
190210d565efSmrg c = (UCHAR_T) *p++; \
190310d565efSmrg } while (0)
190410d565efSmrg
190510d565efSmrg /* Go backwards one character in the pattern. */
190610d565efSmrg # define PATUNFETCH p--
190710d565efSmrg
190810d565efSmrg
190910d565efSmrg /* If `translate' is non-null, return translate[D], else just D. We
191010d565efSmrg cast the subscript to translate because some data is declared as
191110d565efSmrg `char *', to avoid warnings when a string constant is passed. But
191210d565efSmrg when we use a character as a subscript we must make it unsigned. */
191310d565efSmrg /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
191410d565efSmrg because it is impossible to allocate 4GB array for some encodings
191510d565efSmrg which have 4 byte character_set like UCS4. */
191610d565efSmrg
191710d565efSmrg # ifndef TRANSLATE
191810d565efSmrg # ifdef WCHAR
191910d565efSmrg # define TRANSLATE(d) \
192010d565efSmrg ((translate && ((UCHAR_T) (d)) <= 0xff) \
192110d565efSmrg ? (char) translate[(unsigned char) (d)] : (d))
192210d565efSmrg # else /* BYTE */
192310d565efSmrg # define TRANSLATE(d) \
192410d565efSmrg (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
192510d565efSmrg # endif /* WCHAR */
192610d565efSmrg # endif
192710d565efSmrg
192810d565efSmrg
192910d565efSmrg /* Macros for outputting the compiled pattern into `buffer'. */
193010d565efSmrg
193110d565efSmrg /* If the buffer isn't allocated when it comes in, use this. */
193210d565efSmrg # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
193310d565efSmrg
193410d565efSmrg /* Make sure we have at least N more bytes of space in buffer. */
193510d565efSmrg # ifdef WCHAR
193610d565efSmrg # define GET_BUFFER_SPACE(n) \
193710d565efSmrg while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
193810d565efSmrg + (n)*sizeof(CHAR_T)) > bufp->allocated) \
193910d565efSmrg EXTEND_BUFFER ()
194010d565efSmrg # else /* BYTE */
194110d565efSmrg # define GET_BUFFER_SPACE(n) \
194210d565efSmrg while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
194310d565efSmrg EXTEND_BUFFER ()
194410d565efSmrg # endif /* WCHAR */
194510d565efSmrg
194610d565efSmrg /* Make sure we have one more byte of buffer space and then add C to it. */
194710d565efSmrg # define BUF_PUSH(c) \
194810d565efSmrg do { \
194910d565efSmrg GET_BUFFER_SPACE (1); \
195010d565efSmrg *b++ = (UCHAR_T) (c); \
195110d565efSmrg } while (0)
195210d565efSmrg
195310d565efSmrg
195410d565efSmrg /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
195510d565efSmrg # define BUF_PUSH_2(c1, c2) \
195610d565efSmrg do { \
195710d565efSmrg GET_BUFFER_SPACE (2); \
195810d565efSmrg *b++ = (UCHAR_T) (c1); \
195910d565efSmrg *b++ = (UCHAR_T) (c2); \
196010d565efSmrg } while (0)
196110d565efSmrg
196210d565efSmrg
196310d565efSmrg /* As with BUF_PUSH_2, except for three bytes. */
196410d565efSmrg # define BUF_PUSH_3(c1, c2, c3) \
196510d565efSmrg do { \
196610d565efSmrg GET_BUFFER_SPACE (3); \
196710d565efSmrg *b++ = (UCHAR_T) (c1); \
196810d565efSmrg *b++ = (UCHAR_T) (c2); \
196910d565efSmrg *b++ = (UCHAR_T) (c3); \
197010d565efSmrg } while (0)
197110d565efSmrg
197210d565efSmrg /* Store a jump with opcode OP at LOC to location TO. We store a
197310d565efSmrg relative address offset by the three bytes the jump itself occupies. */
197410d565efSmrg # define STORE_JUMP(op, loc, to) \
197510d565efSmrg PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
197610d565efSmrg
197710d565efSmrg /* Likewise, for a two-argument jump. */
197810d565efSmrg # define STORE_JUMP2(op, loc, to, arg) \
197910d565efSmrg PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
198010d565efSmrg
198110d565efSmrg /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
198210d565efSmrg # define INSERT_JUMP(op, loc, to) \
198310d565efSmrg PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
198410d565efSmrg
198510d565efSmrg /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
198610d565efSmrg # define INSERT_JUMP2(op, loc, to, arg) \
198710d565efSmrg PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
198810d565efSmrg arg, b)
198910d565efSmrg
199010d565efSmrg /* This is not an arbitrary limit: the arguments which represent offsets
199110d565efSmrg into the pattern are two bytes long. So if 2^16 bytes turns out to
199210d565efSmrg be too small, many things would have to change. */
199310d565efSmrg /* Any other compiler which, like MSC, has allocation limit below 2^16
199410d565efSmrg bytes will have to use approach similar to what was done below for
199510d565efSmrg MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
199610d565efSmrg reallocating to 0 bytes. Such thing is not going to work too well.
199710d565efSmrg You have been warned!! */
199810d565efSmrg # ifndef DEFINED_ONCE
199910d565efSmrg # if defined _MSC_VER && !defined WIN32
200010d565efSmrg /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
200110d565efSmrg The REALLOC define eliminates a flurry of conversion warnings,
200210d565efSmrg but is not required. */
200310d565efSmrg # define MAX_BUF_SIZE 65500L
200410d565efSmrg # define REALLOC(p,s) realloc ((p), (size_t) (s))
200510d565efSmrg # else
200610d565efSmrg # define MAX_BUF_SIZE (1L << 16)
200710d565efSmrg # define REALLOC(p,s) realloc ((p), (s))
200810d565efSmrg # endif
200910d565efSmrg
201010d565efSmrg /* Extend the buffer by twice its current size via realloc and
201110d565efSmrg reset the pointers that pointed into the old block to point to the
201210d565efSmrg correct places in the new one. If extending the buffer results in it
201310d565efSmrg being larger than MAX_BUF_SIZE, then flag memory exhausted. */
201410d565efSmrg # if __BOUNDED_POINTERS__
201510d565efSmrg # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
201610d565efSmrg # define MOVE_BUFFER_POINTER(P) \
201710d565efSmrg (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
201810d565efSmrg # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
201910d565efSmrg else \
202010d565efSmrg { \
202110d565efSmrg SET_HIGH_BOUND (b); \
202210d565efSmrg SET_HIGH_BOUND (begalt); \
202310d565efSmrg if (fixup_alt_jump) \
202410d565efSmrg SET_HIGH_BOUND (fixup_alt_jump); \
202510d565efSmrg if (laststart) \
202610d565efSmrg SET_HIGH_BOUND (laststart); \
202710d565efSmrg if (pending_exact) \
202810d565efSmrg SET_HIGH_BOUND (pending_exact); \
202910d565efSmrg }
203010d565efSmrg # else
203110d565efSmrg # define MOVE_BUFFER_POINTER(P) (P) += incr
203210d565efSmrg # define ELSE_EXTEND_BUFFER_HIGH_BOUND
203310d565efSmrg # endif
203410d565efSmrg # endif /* not DEFINED_ONCE */
203510d565efSmrg
203610d565efSmrg # ifdef WCHAR
203710d565efSmrg # define EXTEND_BUFFER() \
203810d565efSmrg do { \
203910d565efSmrg UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
204010d565efSmrg int wchar_count; \
204110d565efSmrg if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
204210d565efSmrg return REG_ESIZE; \
204310d565efSmrg bufp->allocated <<= 1; \
204410d565efSmrg if (bufp->allocated > MAX_BUF_SIZE) \
204510d565efSmrg bufp->allocated = MAX_BUF_SIZE; \
204610d565efSmrg /* How many characters the new buffer can have? */ \
204710d565efSmrg wchar_count = bufp->allocated / sizeof(UCHAR_T); \
204810d565efSmrg if (wchar_count == 0) wchar_count = 1; \
204910d565efSmrg /* Truncate the buffer to CHAR_T align. */ \
205010d565efSmrg bufp->allocated = wchar_count * sizeof(UCHAR_T); \
205110d565efSmrg RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
205210d565efSmrg bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
205310d565efSmrg if (COMPILED_BUFFER_VAR == NULL) \
205410d565efSmrg return REG_ESPACE; \
205510d565efSmrg /* If the buffer moved, move all the pointers into it. */ \
205610d565efSmrg if (old_buffer != COMPILED_BUFFER_VAR) \
205710d565efSmrg { \
205810d565efSmrg PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \
205910d565efSmrg MOVE_BUFFER_POINTER (b); \
206010d565efSmrg MOVE_BUFFER_POINTER (begalt); \
206110d565efSmrg if (fixup_alt_jump) \
206210d565efSmrg MOVE_BUFFER_POINTER (fixup_alt_jump); \
206310d565efSmrg if (laststart) \
206410d565efSmrg MOVE_BUFFER_POINTER (laststart); \
206510d565efSmrg if (pending_exact) \
206610d565efSmrg MOVE_BUFFER_POINTER (pending_exact); \
206710d565efSmrg } \
206810d565efSmrg ELSE_EXTEND_BUFFER_HIGH_BOUND \
206910d565efSmrg } while (0)
207010d565efSmrg # else /* BYTE */
207110d565efSmrg # define EXTEND_BUFFER() \
207210d565efSmrg do { \
207310d565efSmrg UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
207410d565efSmrg if (bufp->allocated == MAX_BUF_SIZE) \
207510d565efSmrg return REG_ESIZE; \
207610d565efSmrg bufp->allocated <<= 1; \
207710d565efSmrg if (bufp->allocated > MAX_BUF_SIZE) \
207810d565efSmrg bufp->allocated = MAX_BUF_SIZE; \
207910d565efSmrg bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
208010d565efSmrg bufp->allocated); \
208110d565efSmrg if (COMPILED_BUFFER_VAR == NULL) \
208210d565efSmrg return REG_ESPACE; \
208310d565efSmrg /* If the buffer moved, move all the pointers into it. */ \
208410d565efSmrg if (old_buffer != COMPILED_BUFFER_VAR) \
208510d565efSmrg { \
208610d565efSmrg PTR_INT_TYPE incr = COMPILED_BUFFER_VAR - old_buffer; \
208710d565efSmrg MOVE_BUFFER_POINTER (b); \
208810d565efSmrg MOVE_BUFFER_POINTER (begalt); \
208910d565efSmrg if (fixup_alt_jump) \
209010d565efSmrg MOVE_BUFFER_POINTER (fixup_alt_jump); \
209110d565efSmrg if (laststart) \
209210d565efSmrg MOVE_BUFFER_POINTER (laststart); \
209310d565efSmrg if (pending_exact) \
209410d565efSmrg MOVE_BUFFER_POINTER (pending_exact); \
209510d565efSmrg } \
209610d565efSmrg ELSE_EXTEND_BUFFER_HIGH_BOUND \
209710d565efSmrg } while (0)
209810d565efSmrg # endif /* WCHAR */
209910d565efSmrg
210010d565efSmrg # ifndef DEFINED_ONCE
210110d565efSmrg /* Since we have one byte reserved for the register number argument to
210210d565efSmrg {start,stop}_memory, the maximum number of groups we can report
210310d565efSmrg things about is what fits in that byte. */
210410d565efSmrg # define MAX_REGNUM 255
210510d565efSmrg
210610d565efSmrg /* But patterns can have more than `MAX_REGNUM' registers. We just
210710d565efSmrg ignore the excess. */
210810d565efSmrg typedef unsigned regnum_t;
210910d565efSmrg
211010d565efSmrg
211110d565efSmrg /* Macros for the compile stack. */
211210d565efSmrg
211310d565efSmrg /* Since offsets can go either forwards or backwards, this type needs to
211410d565efSmrg be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
211510d565efSmrg /* int may be not enough when sizeof(int) == 2. */
211610d565efSmrg typedef long pattern_offset_t;
211710d565efSmrg
211810d565efSmrg typedef struct
211910d565efSmrg {
212010d565efSmrg pattern_offset_t begalt_offset;
212110d565efSmrg pattern_offset_t fixup_alt_jump;
212210d565efSmrg pattern_offset_t inner_group_offset;
212310d565efSmrg pattern_offset_t laststart_offset;
212410d565efSmrg regnum_t regnum;
212510d565efSmrg } compile_stack_elt_t;
212610d565efSmrg
212710d565efSmrg
212810d565efSmrg typedef struct
212910d565efSmrg {
213010d565efSmrg compile_stack_elt_t *stack;
213110d565efSmrg unsigned size;
213210d565efSmrg unsigned avail; /* Offset of next open position. */
213310d565efSmrg } compile_stack_type;
213410d565efSmrg
213510d565efSmrg
213610d565efSmrg # define INIT_COMPILE_STACK_SIZE 32
213710d565efSmrg
213810d565efSmrg # define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
213910d565efSmrg # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
214010d565efSmrg
214110d565efSmrg /* The next available element. */
214210d565efSmrg # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
214310d565efSmrg
214410d565efSmrg # endif /* not DEFINED_ONCE */
214510d565efSmrg
214610d565efSmrg /* Set the bit for character C in a list. */
214710d565efSmrg # ifndef DEFINED_ONCE
214810d565efSmrg # define SET_LIST_BIT(c) \
214910d565efSmrg (b[((unsigned char) (c)) / BYTEWIDTH] \
215010d565efSmrg |= 1 << (((unsigned char) c) % BYTEWIDTH))
215110d565efSmrg # endif /* DEFINED_ONCE */
215210d565efSmrg
215310d565efSmrg /* Get the next unsigned number in the uncompiled pattern. */
215410d565efSmrg # define GET_UNSIGNED_NUMBER(num) \
215510d565efSmrg { \
215610d565efSmrg while (p != pend) \
215710d565efSmrg { \
215810d565efSmrg PATFETCH (c); \
215910d565efSmrg if (c < '0' || c > '9') \
216010d565efSmrg break; \
216110d565efSmrg if (num <= RE_DUP_MAX) \
216210d565efSmrg { \
216310d565efSmrg if (num < 0) \
216410d565efSmrg num = 0; \
216510d565efSmrg num = num * 10 + c - '0'; \
216610d565efSmrg } \
216710d565efSmrg } \
216810d565efSmrg }
216910d565efSmrg
217010d565efSmrg # ifndef DEFINED_ONCE
217110d565efSmrg # if defined _LIBC || WIDE_CHAR_SUPPORT
217210d565efSmrg /* The GNU C library provides support for user-defined character classes
217310d565efSmrg and the functions from ISO C amendement 1. */
217410d565efSmrg # ifdef CHARCLASS_NAME_MAX
217510d565efSmrg # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
217610d565efSmrg # else
217710d565efSmrg /* This shouldn't happen but some implementation might still have this
217810d565efSmrg problem. Use a reasonable default value. */
217910d565efSmrg # define CHAR_CLASS_MAX_LENGTH 256
218010d565efSmrg # endif
218110d565efSmrg
218210d565efSmrg # ifdef _LIBC
218310d565efSmrg # define IS_CHAR_CLASS(string) __wctype (string)
218410d565efSmrg # else
218510d565efSmrg # define IS_CHAR_CLASS(string) wctype (string)
218610d565efSmrg # endif
218710d565efSmrg # else
218810d565efSmrg # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
218910d565efSmrg
219010d565efSmrg # define IS_CHAR_CLASS(string) \
219110d565efSmrg (STREQ (string, "alpha") || STREQ (string, "upper") \
219210d565efSmrg || STREQ (string, "lower") || STREQ (string, "digit") \
219310d565efSmrg || STREQ (string, "alnum") || STREQ (string, "xdigit") \
219410d565efSmrg || STREQ (string, "space") || STREQ (string, "print") \
219510d565efSmrg || STREQ (string, "punct") || STREQ (string, "graph") \
219610d565efSmrg || STREQ (string, "cntrl") || STREQ (string, "blank"))
219710d565efSmrg # endif
219810d565efSmrg # endif /* DEFINED_ONCE */
219910d565efSmrg
220010d565efSmrg # ifndef MATCH_MAY_ALLOCATE
220110d565efSmrg
220210d565efSmrg /* If we cannot allocate large objects within re_match_2_internal,
220310d565efSmrg we make the fail stack and register vectors global.
220410d565efSmrg The fail stack, we grow to the maximum size when a regexp
220510d565efSmrg is compiled.
220610d565efSmrg The register vectors, we adjust in size each time we
220710d565efSmrg compile a regexp, according to the number of registers it needs. */
220810d565efSmrg
220910d565efSmrg static PREFIX(fail_stack_type) fail_stack;
221010d565efSmrg
221110d565efSmrg /* Size with which the following vectors are currently allocated.
221210d565efSmrg That is so we can make them bigger as needed,
221310d565efSmrg but never make them smaller. */
221410d565efSmrg # ifdef DEFINED_ONCE
221510d565efSmrg static int regs_allocated_size;
221610d565efSmrg
221710d565efSmrg static const char ** regstart, ** regend;
221810d565efSmrg static const char ** old_regstart, ** old_regend;
221910d565efSmrg static const char **best_regstart, **best_regend;
222010d565efSmrg static const char **reg_dummy;
222110d565efSmrg # endif /* DEFINED_ONCE */
222210d565efSmrg
222310d565efSmrg static PREFIX(register_info_type) *PREFIX(reg_info);
222410d565efSmrg static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
222510d565efSmrg
222610d565efSmrg /* Make the register vectors big enough for NUM_REGS registers,
222710d565efSmrg but don't make them smaller. */
222810d565efSmrg
222910d565efSmrg static void
PREFIX(regex_grow_registers)223010d565efSmrg PREFIX(regex_grow_registers) (int num_regs)
223110d565efSmrg {
223210d565efSmrg if (num_regs > regs_allocated_size)
223310d565efSmrg {
223410d565efSmrg RETALLOC_IF (regstart, num_regs, const char *);
223510d565efSmrg RETALLOC_IF (regend, num_regs, const char *);
223610d565efSmrg RETALLOC_IF (old_regstart, num_regs, const char *);
223710d565efSmrg RETALLOC_IF (old_regend, num_regs, const char *);
223810d565efSmrg RETALLOC_IF (best_regstart, num_regs, const char *);
223910d565efSmrg RETALLOC_IF (best_regend, num_regs, const char *);
224010d565efSmrg RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
224110d565efSmrg RETALLOC_IF (reg_dummy, num_regs, const char *);
224210d565efSmrg RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
224310d565efSmrg
224410d565efSmrg regs_allocated_size = num_regs;
224510d565efSmrg }
224610d565efSmrg }
224710d565efSmrg
224810d565efSmrg # endif /* not MATCH_MAY_ALLOCATE */
224910d565efSmrg
225010d565efSmrg # ifndef DEFINED_ONCE
225110d565efSmrg static boolean group_in_compile_stack (compile_stack_type compile_stack,
225210d565efSmrg regnum_t regnum);
225310d565efSmrg # endif /* not DEFINED_ONCE */
225410d565efSmrg
225510d565efSmrg /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
225610d565efSmrg Returns one of error codes defined in `regex.h', or zero for success.
225710d565efSmrg
225810d565efSmrg Assumes the `allocated' (and perhaps `buffer') and `translate'
225910d565efSmrg fields are set in BUFP on entry.
226010d565efSmrg
226110d565efSmrg If it succeeds, results are put in BUFP (if it returns an error, the
226210d565efSmrg contents of BUFP are undefined):
226310d565efSmrg `buffer' is the compiled pattern;
226410d565efSmrg `syntax' is set to SYNTAX;
226510d565efSmrg `used' is set to the length of the compiled pattern;
226610d565efSmrg `fastmap_accurate' is zero;
226710d565efSmrg `re_nsub' is the number of subexpressions in PATTERN;
226810d565efSmrg `not_bol' and `not_eol' are zero;
226910d565efSmrg
227010d565efSmrg The `fastmap' and `newline_anchor' fields are neither
227110d565efSmrg examined nor set. */
227210d565efSmrg
227310d565efSmrg /* Return, freeing storage we allocated. */
227410d565efSmrg # ifdef WCHAR
227510d565efSmrg # define FREE_STACK_RETURN(value) \
227610d565efSmrg return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
227710d565efSmrg # else
227810d565efSmrg # define FREE_STACK_RETURN(value) \
227910d565efSmrg return (free (compile_stack.stack), value)
228010d565efSmrg # endif /* WCHAR */
228110d565efSmrg
228210d565efSmrg static reg_errcode_t
PREFIX(regex_compile)228310d565efSmrg PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
228410d565efSmrg size_t ARG_PREFIX(size), reg_syntax_t syntax,
228510d565efSmrg struct re_pattern_buffer *bufp)
228610d565efSmrg {
228710d565efSmrg /* We fetch characters from PATTERN here. Even though PATTERN is
228810d565efSmrg `char *' (i.e., signed), we declare these variables as unsigned, so
228910d565efSmrg they can be reliably used as array indices. */
229010d565efSmrg register UCHAR_T c, c1;
229110d565efSmrg
229210d565efSmrg #ifdef WCHAR
229310d565efSmrg /* A temporary space to keep wchar_t pattern and compiled pattern. */
229410d565efSmrg CHAR_T *pattern, *COMPILED_BUFFER_VAR;
229510d565efSmrg size_t size;
229610d565efSmrg /* offset buffer for optimization. See convert_mbs_to_wc. */
229710d565efSmrg int *mbs_offset = NULL;
229810d565efSmrg /* It hold whether each wchar_t is binary data or not. */
229910d565efSmrg char *is_binary = NULL;
230010d565efSmrg /* A flag whether exactn is handling binary data or not. */
230110d565efSmrg char is_exactn_bin = FALSE;
230210d565efSmrg #endif /* WCHAR */
230310d565efSmrg
230410d565efSmrg /* A random temporary spot in PATTERN. */
230510d565efSmrg const CHAR_T *p1;
230610d565efSmrg
230710d565efSmrg /* Points to the end of the buffer, where we should append. */
230810d565efSmrg register UCHAR_T *b;
230910d565efSmrg
231010d565efSmrg /* Keeps track of unclosed groups. */
231110d565efSmrg compile_stack_type compile_stack;
231210d565efSmrg
231310d565efSmrg /* Points to the current (ending) position in the pattern. */
231410d565efSmrg #ifdef WCHAR
231510d565efSmrg const CHAR_T *p;
231610d565efSmrg const CHAR_T *pend;
231710d565efSmrg #else /* BYTE */
231810d565efSmrg const CHAR_T *p = pattern;
231910d565efSmrg const CHAR_T *pend = pattern + size;
232010d565efSmrg #endif /* WCHAR */
232110d565efSmrg
232210d565efSmrg /* How to translate the characters in the pattern. */
232310d565efSmrg RE_TRANSLATE_TYPE translate = bufp->translate;
232410d565efSmrg
232510d565efSmrg /* Address of the count-byte of the most recently inserted `exactn'
232610d565efSmrg command. This makes it possible to tell if a new exact-match
232710d565efSmrg character can be added to that command or if the character requires
232810d565efSmrg a new `exactn' command. */
232910d565efSmrg UCHAR_T *pending_exact = 0;
233010d565efSmrg
233110d565efSmrg /* Address of start of the most recently finished expression.
233210d565efSmrg This tells, e.g., postfix * where to find the start of its
233310d565efSmrg operand. Reset at the beginning of groups and alternatives. */
233410d565efSmrg UCHAR_T *laststart = 0;
233510d565efSmrg
233610d565efSmrg /* Address of beginning of regexp, or inside of last group. */
233710d565efSmrg UCHAR_T *begalt;
233810d565efSmrg
233910d565efSmrg /* Address of the place where a forward jump should go to the end of
234010d565efSmrg the containing expression. Each alternative of an `or' -- except the
234110d565efSmrg last -- ends with a forward jump of this sort. */
234210d565efSmrg UCHAR_T *fixup_alt_jump = 0;
234310d565efSmrg
234410d565efSmrg /* Counts open-groups as they are encountered. Remembered for the
234510d565efSmrg matching close-group on the compile stack, so the same register
234610d565efSmrg number is put in the stop_memory as the start_memory. */
234710d565efSmrg regnum_t regnum = 0;
234810d565efSmrg
234910d565efSmrg #ifdef WCHAR
235010d565efSmrg /* Initialize the wchar_t PATTERN and offset_buffer. */
235110d565efSmrg p = pend = pattern = TALLOC(csize + 1, CHAR_T);
235210d565efSmrg mbs_offset = TALLOC(csize + 1, int);
235310d565efSmrg is_binary = TALLOC(csize + 1, char);
235410d565efSmrg if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
235510d565efSmrg {
235610d565efSmrg free(pattern);
235710d565efSmrg free(mbs_offset);
235810d565efSmrg free(is_binary);
235910d565efSmrg return REG_ESPACE;
236010d565efSmrg }
236110d565efSmrg pattern[csize] = L'\0'; /* sentinel */
236210d565efSmrg size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
236310d565efSmrg pend = p + size;
236410d565efSmrg if (size < 0)
236510d565efSmrg {
236610d565efSmrg free(pattern);
236710d565efSmrg free(mbs_offset);
236810d565efSmrg free(is_binary);
236910d565efSmrg return REG_BADPAT;
237010d565efSmrg }
237110d565efSmrg #endif
237210d565efSmrg
237310d565efSmrg #ifdef DEBUG
237410d565efSmrg DEBUG_PRINT1 ("\nCompiling pattern: ");
237510d565efSmrg if (debug)
237610d565efSmrg {
237710d565efSmrg unsigned debug_count;
237810d565efSmrg
237910d565efSmrg for (debug_count = 0; debug_count < size; debug_count++)
238010d565efSmrg PUT_CHAR (pattern[debug_count]);
238110d565efSmrg putchar ('\n');
238210d565efSmrg }
238310d565efSmrg #endif /* DEBUG */
238410d565efSmrg
238510d565efSmrg /* Initialize the compile stack. */
238610d565efSmrg compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
238710d565efSmrg if (compile_stack.stack == NULL)
238810d565efSmrg {
238910d565efSmrg #ifdef WCHAR
239010d565efSmrg free(pattern);
239110d565efSmrg free(mbs_offset);
239210d565efSmrg free(is_binary);
239310d565efSmrg #endif
239410d565efSmrg return REG_ESPACE;
239510d565efSmrg }
239610d565efSmrg
239710d565efSmrg compile_stack.size = INIT_COMPILE_STACK_SIZE;
239810d565efSmrg compile_stack.avail = 0;
239910d565efSmrg
240010d565efSmrg /* Initialize the pattern buffer. */
240110d565efSmrg bufp->syntax = syntax;
240210d565efSmrg bufp->fastmap_accurate = 0;
240310d565efSmrg bufp->not_bol = bufp->not_eol = 0;
240410d565efSmrg
240510d565efSmrg /* Set `used' to zero, so that if we return an error, the pattern
240610d565efSmrg printer (for debugging) will think there's no pattern. We reset it
240710d565efSmrg at the end. */
240810d565efSmrg bufp->used = 0;
240910d565efSmrg
241010d565efSmrg /* Always count groups, whether or not bufp->no_sub is set. */
241110d565efSmrg bufp->re_nsub = 0;
241210d565efSmrg
241310d565efSmrg #if !defined emacs && !defined SYNTAX_TABLE
241410d565efSmrg /* Initialize the syntax table. */
241510d565efSmrg init_syntax_once ();
241610d565efSmrg #endif
241710d565efSmrg
241810d565efSmrg if (bufp->allocated == 0)
241910d565efSmrg {
242010d565efSmrg if (bufp->buffer)
242110d565efSmrg { /* If zero allocated, but buffer is non-null, try to realloc
242210d565efSmrg enough space. This loses if buffer's address is bogus, but
242310d565efSmrg that is the user's responsibility. */
242410d565efSmrg #ifdef WCHAR
242510d565efSmrg /* Free bufp->buffer and allocate an array for wchar_t pattern
242610d565efSmrg buffer. */
242710d565efSmrg free(bufp->buffer);
242810d565efSmrg COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
242910d565efSmrg UCHAR_T);
243010d565efSmrg #else
243110d565efSmrg RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
243210d565efSmrg #endif /* WCHAR */
243310d565efSmrg }
243410d565efSmrg else
243510d565efSmrg { /* Caller did not allocate a buffer. Do it for them. */
243610d565efSmrg COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
243710d565efSmrg UCHAR_T);
243810d565efSmrg }
243910d565efSmrg
244010d565efSmrg if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
244110d565efSmrg #ifdef WCHAR
244210d565efSmrg bufp->buffer = (char*)COMPILED_BUFFER_VAR;
244310d565efSmrg #endif /* WCHAR */
244410d565efSmrg bufp->allocated = INIT_BUF_SIZE;
244510d565efSmrg }
244610d565efSmrg #ifdef WCHAR
244710d565efSmrg else
244810d565efSmrg COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
244910d565efSmrg #endif
245010d565efSmrg
245110d565efSmrg begalt = b = COMPILED_BUFFER_VAR;
245210d565efSmrg
245310d565efSmrg /* Loop through the uncompiled pattern until we're at the end. */
245410d565efSmrg while (p != pend)
245510d565efSmrg {
245610d565efSmrg PATFETCH (c);
245710d565efSmrg
245810d565efSmrg switch (c)
245910d565efSmrg {
246010d565efSmrg case '^':
246110d565efSmrg {
246210d565efSmrg if ( /* If at start of pattern, it's an operator. */
246310d565efSmrg p == pattern + 1
246410d565efSmrg /* If context independent, it's an operator. */
246510d565efSmrg || syntax & RE_CONTEXT_INDEP_ANCHORS
246610d565efSmrg /* Otherwise, depends on what's come before. */
246710d565efSmrg || PREFIX(at_begline_loc_p) (pattern, p, syntax))
246810d565efSmrg BUF_PUSH (begline);
246910d565efSmrg else
247010d565efSmrg goto normal_char;
247110d565efSmrg }
247210d565efSmrg break;
247310d565efSmrg
247410d565efSmrg
247510d565efSmrg case '$':
247610d565efSmrg {
247710d565efSmrg if ( /* If at end of pattern, it's an operator. */
247810d565efSmrg p == pend
247910d565efSmrg /* If context independent, it's an operator. */
248010d565efSmrg || syntax & RE_CONTEXT_INDEP_ANCHORS
248110d565efSmrg /* Otherwise, depends on what's next. */
248210d565efSmrg || PREFIX(at_endline_loc_p) (p, pend, syntax))
248310d565efSmrg BUF_PUSH (endline);
248410d565efSmrg else
248510d565efSmrg goto normal_char;
248610d565efSmrg }
248710d565efSmrg break;
248810d565efSmrg
248910d565efSmrg
249010d565efSmrg case '+':
249110d565efSmrg case '?':
249210d565efSmrg if ((syntax & RE_BK_PLUS_QM)
249310d565efSmrg || (syntax & RE_LIMITED_OPS))
249410d565efSmrg goto normal_char;
249510d565efSmrg /* Fall through. */
249610d565efSmrg handle_plus:
249710d565efSmrg case '*':
249810d565efSmrg /* If there is no previous pattern... */
249910d565efSmrg if (!laststart)
250010d565efSmrg {
250110d565efSmrg if (syntax & RE_CONTEXT_INVALID_OPS)
250210d565efSmrg FREE_STACK_RETURN (REG_BADRPT);
250310d565efSmrg else if (!(syntax & RE_CONTEXT_INDEP_OPS))
250410d565efSmrg goto normal_char;
250510d565efSmrg }
250610d565efSmrg
250710d565efSmrg {
250810d565efSmrg /* Are we optimizing this jump? */
250910d565efSmrg boolean keep_string_p = false;
251010d565efSmrg
251110d565efSmrg /* 1 means zero (many) matches is allowed. */
251210d565efSmrg char zero_times_ok = 0, many_times_ok = 0;
251310d565efSmrg
251410d565efSmrg /* If there is a sequence of repetition chars, collapse it
251510d565efSmrg down to just one (the right one). We can't combine
251610d565efSmrg interval operators with these because of, e.g., `a{2}*',
251710d565efSmrg which should only match an even number of `a's. */
251810d565efSmrg
251910d565efSmrg for (;;)
252010d565efSmrg {
252110d565efSmrg zero_times_ok |= c != '+';
252210d565efSmrg many_times_ok |= c != '?';
252310d565efSmrg
252410d565efSmrg if (p == pend)
252510d565efSmrg break;
252610d565efSmrg
252710d565efSmrg PATFETCH (c);
252810d565efSmrg
252910d565efSmrg if (c == '*'
253010d565efSmrg || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
253110d565efSmrg ;
253210d565efSmrg
253310d565efSmrg else if (syntax & RE_BK_PLUS_QM && c == '\\')
253410d565efSmrg {
253510d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
253610d565efSmrg
253710d565efSmrg PATFETCH (c1);
253810d565efSmrg if (!(c1 == '+' || c1 == '?'))
253910d565efSmrg {
254010d565efSmrg PATUNFETCH;
254110d565efSmrg PATUNFETCH;
254210d565efSmrg break;
254310d565efSmrg }
254410d565efSmrg
254510d565efSmrg c = c1;
254610d565efSmrg }
254710d565efSmrg else
254810d565efSmrg {
254910d565efSmrg PATUNFETCH;
255010d565efSmrg break;
255110d565efSmrg }
255210d565efSmrg
255310d565efSmrg /* If we get here, we found another repeat character. */
255410d565efSmrg }
255510d565efSmrg
255610d565efSmrg /* Star, etc. applied to an empty pattern is equivalent
255710d565efSmrg to an empty pattern. */
255810d565efSmrg if (!laststart)
255910d565efSmrg break;
256010d565efSmrg
256110d565efSmrg /* Now we know whether or not zero matches is allowed
256210d565efSmrg and also whether or not two or more matches is allowed. */
256310d565efSmrg if (many_times_ok)
256410d565efSmrg { /* More than one repetition is allowed, so put in at the
256510d565efSmrg end a backward relative jump from `b' to before the next
256610d565efSmrg jump we're going to put in below (which jumps from
256710d565efSmrg laststart to after this jump).
256810d565efSmrg
256910d565efSmrg But if we are at the `*' in the exact sequence `.*\n',
257010d565efSmrg insert an unconditional jump backwards to the .,
257110d565efSmrg instead of the beginning of the loop. This way we only
257210d565efSmrg push a failure point once, instead of every time
257310d565efSmrg through the loop. */
257410d565efSmrg assert (p - 1 > pattern);
257510d565efSmrg
257610d565efSmrg /* Allocate the space for the jump. */
257710d565efSmrg GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
257810d565efSmrg
257910d565efSmrg /* We know we are not at the first character of the pattern,
258010d565efSmrg because laststart was nonzero. And we've already
258110d565efSmrg incremented `p', by the way, to be the character after
258210d565efSmrg the `*'. Do we have to do something analogous here
258310d565efSmrg for null bytes, because of RE_DOT_NOT_NULL? */
258410d565efSmrg if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
258510d565efSmrg && zero_times_ok
258610d565efSmrg && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
258710d565efSmrg && !(syntax & RE_DOT_NEWLINE))
258810d565efSmrg { /* We have .*\n. */
258910d565efSmrg STORE_JUMP (jump, b, laststart);
259010d565efSmrg keep_string_p = true;
259110d565efSmrg }
259210d565efSmrg else
259310d565efSmrg /* Anything else. */
259410d565efSmrg STORE_JUMP (maybe_pop_jump, b, laststart -
259510d565efSmrg (1 + OFFSET_ADDRESS_SIZE));
259610d565efSmrg
259710d565efSmrg /* We've added more stuff to the buffer. */
259810d565efSmrg b += 1 + OFFSET_ADDRESS_SIZE;
259910d565efSmrg }
260010d565efSmrg
260110d565efSmrg /* On failure, jump from laststart to b + 3, which will be the
260210d565efSmrg end of the buffer after this jump is inserted. */
260310d565efSmrg /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
260410d565efSmrg 'b + 3'. */
260510d565efSmrg GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
260610d565efSmrg INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
260710d565efSmrg : on_failure_jump,
260810d565efSmrg laststart, b + 1 + OFFSET_ADDRESS_SIZE);
260910d565efSmrg pending_exact = 0;
261010d565efSmrg b += 1 + OFFSET_ADDRESS_SIZE;
261110d565efSmrg
261210d565efSmrg if (!zero_times_ok)
261310d565efSmrg {
261410d565efSmrg /* At least one repetition is required, so insert a
261510d565efSmrg `dummy_failure_jump' before the initial
261610d565efSmrg `on_failure_jump' instruction of the loop. This
261710d565efSmrg effects a skip over that instruction the first time
261810d565efSmrg we hit that loop. */
261910d565efSmrg GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
262010d565efSmrg INSERT_JUMP (dummy_failure_jump, laststart, laststart +
262110d565efSmrg 2 + 2 * OFFSET_ADDRESS_SIZE);
262210d565efSmrg b += 1 + OFFSET_ADDRESS_SIZE;
262310d565efSmrg }
262410d565efSmrg }
262510d565efSmrg break;
262610d565efSmrg
262710d565efSmrg
262810d565efSmrg case '.':
262910d565efSmrg laststart = b;
263010d565efSmrg BUF_PUSH (anychar);
263110d565efSmrg break;
263210d565efSmrg
263310d565efSmrg
263410d565efSmrg case '[':
263510d565efSmrg {
263610d565efSmrg boolean had_char_class = false;
263710d565efSmrg #ifdef WCHAR
263810d565efSmrg CHAR_T range_start = 0xffffffff;
263910d565efSmrg #else
264010d565efSmrg unsigned int range_start = 0xffffffff;
264110d565efSmrg #endif
264210d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
264310d565efSmrg
264410d565efSmrg #ifdef WCHAR
264510d565efSmrg /* We assume a charset(_not) structure as a wchar_t array.
264610d565efSmrg charset[0] = (re_opcode_t) charset(_not)
264710d565efSmrg charset[1] = l (= length of char_classes)
264810d565efSmrg charset[2] = m (= length of collating_symbols)
264910d565efSmrg charset[3] = n (= length of equivalence_classes)
265010d565efSmrg charset[4] = o (= length of char_ranges)
265110d565efSmrg charset[5] = p (= length of chars)
265210d565efSmrg
265310d565efSmrg charset[6] = char_class (wctype_t)
265410d565efSmrg charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
265510d565efSmrg ...
265610d565efSmrg charset[l+5] = char_class (wctype_t)
265710d565efSmrg
265810d565efSmrg charset[l+6] = collating_symbol (wchar_t)
265910d565efSmrg ...
266010d565efSmrg charset[l+m+5] = collating_symbol (wchar_t)
266110d565efSmrg ifdef _LIBC we use the index if
266210d565efSmrg _NL_COLLATE_SYMB_EXTRAMB instead of
266310d565efSmrg wchar_t string.
266410d565efSmrg
266510d565efSmrg charset[l+m+6] = equivalence_classes (wchar_t)
266610d565efSmrg ...
266710d565efSmrg charset[l+m+n+5] = equivalence_classes (wchar_t)
266810d565efSmrg ifdef _LIBC we use the index in
266910d565efSmrg _NL_COLLATE_WEIGHT instead of
267010d565efSmrg wchar_t string.
267110d565efSmrg
267210d565efSmrg charset[l+m+n+6] = range_start
267310d565efSmrg charset[l+m+n+7] = range_end
267410d565efSmrg ...
267510d565efSmrg charset[l+m+n+2o+4] = range_start
267610d565efSmrg charset[l+m+n+2o+5] = range_end
267710d565efSmrg ifdef _LIBC we use the value looked up
267810d565efSmrg in _NL_COLLATE_COLLSEQ instead of
267910d565efSmrg wchar_t character.
268010d565efSmrg
268110d565efSmrg charset[l+m+n+2o+6] = char
268210d565efSmrg ...
268310d565efSmrg charset[l+m+n+2o+p+5] = char
268410d565efSmrg
268510d565efSmrg */
268610d565efSmrg
268710d565efSmrg /* We need at least 6 spaces: the opcode, the length of
268810d565efSmrg char_classes, the length of collating_symbols, the length of
268910d565efSmrg equivalence_classes, the length of char_ranges, the length of
269010d565efSmrg chars. */
269110d565efSmrg GET_BUFFER_SPACE (6);
269210d565efSmrg
269310d565efSmrg /* Save b as laststart. And We use laststart as the pointer
269410d565efSmrg to the first element of the charset here.
269510d565efSmrg In other words, laststart[i] indicates charset[i]. */
269610d565efSmrg laststart = b;
269710d565efSmrg
269810d565efSmrg /* We test `*p == '^' twice, instead of using an if
269910d565efSmrg statement, so we only need one BUF_PUSH. */
270010d565efSmrg BUF_PUSH (*p == '^' ? charset_not : charset);
270110d565efSmrg if (*p == '^')
270210d565efSmrg p++;
270310d565efSmrg
270410d565efSmrg /* Push the length of char_classes, the length of
270510d565efSmrg collating_symbols, the length of equivalence_classes, the
270610d565efSmrg length of char_ranges and the length of chars. */
270710d565efSmrg BUF_PUSH_3 (0, 0, 0);
270810d565efSmrg BUF_PUSH_2 (0, 0);
270910d565efSmrg
271010d565efSmrg /* Remember the first position in the bracket expression. */
271110d565efSmrg p1 = p;
271210d565efSmrg
271310d565efSmrg /* charset_not matches newline according to a syntax bit. */
271410d565efSmrg if ((re_opcode_t) b[-6] == charset_not
271510d565efSmrg && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
271610d565efSmrg {
271710d565efSmrg BUF_PUSH('\n');
271810d565efSmrg laststart[5]++; /* Update the length of characters */
271910d565efSmrg }
272010d565efSmrg
272110d565efSmrg /* Read in characters and ranges, setting map bits. */
272210d565efSmrg for (;;)
272310d565efSmrg {
272410d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
272510d565efSmrg
272610d565efSmrg PATFETCH (c);
272710d565efSmrg
272810d565efSmrg /* \ might escape characters inside [...] and [^...]. */
272910d565efSmrg if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
273010d565efSmrg {
273110d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
273210d565efSmrg
273310d565efSmrg PATFETCH (c1);
273410d565efSmrg BUF_PUSH(c1);
273510d565efSmrg laststart[5]++; /* Update the length of chars */
273610d565efSmrg range_start = c1;
273710d565efSmrg continue;
273810d565efSmrg }
273910d565efSmrg
274010d565efSmrg /* Could be the end of the bracket expression. If it's
274110d565efSmrg not (i.e., when the bracket expression is `[]' so
274210d565efSmrg far), the ']' character bit gets set way below. */
274310d565efSmrg if (c == ']' && p != p1 + 1)
274410d565efSmrg break;
274510d565efSmrg
274610d565efSmrg /* Look ahead to see if it's a range when the last thing
274710d565efSmrg was a character class. */
274810d565efSmrg if (had_char_class && c == '-' && *p != ']')
274910d565efSmrg FREE_STACK_RETURN (REG_ERANGE);
275010d565efSmrg
275110d565efSmrg /* Look ahead to see if it's a range when the last thing
275210d565efSmrg was a character: if this is a hyphen not at the
275310d565efSmrg beginning or the end of a list, then it's the range
275410d565efSmrg operator. */
275510d565efSmrg if (c == '-'
275610d565efSmrg && !(p - 2 >= pattern && p[-2] == '[')
275710d565efSmrg && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
275810d565efSmrg && *p != ']')
275910d565efSmrg {
276010d565efSmrg reg_errcode_t ret;
276110d565efSmrg /* Allocate the space for range_start and range_end. */
276210d565efSmrg GET_BUFFER_SPACE (2);
276310d565efSmrg /* Update the pointer to indicate end of buffer. */
276410d565efSmrg b += 2;
276510d565efSmrg ret = wcs_compile_range (range_start, &p, pend, translate,
276610d565efSmrg syntax, b, laststart);
276710d565efSmrg if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
276810d565efSmrg range_start = 0xffffffff;
276910d565efSmrg }
277010d565efSmrg else if (p[0] == '-' && p[1] != ']')
277110d565efSmrg { /* This handles ranges made up of characters only. */
277210d565efSmrg reg_errcode_t ret;
277310d565efSmrg
277410d565efSmrg /* Move past the `-'. */
277510d565efSmrg PATFETCH (c1);
277610d565efSmrg /* Allocate the space for range_start and range_end. */
277710d565efSmrg GET_BUFFER_SPACE (2);
277810d565efSmrg /* Update the pointer to indicate end of buffer. */
277910d565efSmrg b += 2;
278010d565efSmrg ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
278110d565efSmrg laststart);
278210d565efSmrg if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
278310d565efSmrg range_start = 0xffffffff;
278410d565efSmrg }
278510d565efSmrg
278610d565efSmrg /* See if we're at the beginning of a possible character
278710d565efSmrg class. */
278810d565efSmrg else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
278910d565efSmrg { /* Leave room for the null. */
279010d565efSmrg char str[CHAR_CLASS_MAX_LENGTH + 1];
279110d565efSmrg
279210d565efSmrg PATFETCH (c);
279310d565efSmrg c1 = 0;
279410d565efSmrg
279510d565efSmrg /* If pattern is `[[:'. */
279610d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
279710d565efSmrg
279810d565efSmrg for (;;)
279910d565efSmrg {
280010d565efSmrg PATFETCH (c);
280110d565efSmrg if ((c == ':' && *p == ']') || p == pend)
280210d565efSmrg break;
280310d565efSmrg if (c1 < CHAR_CLASS_MAX_LENGTH)
280410d565efSmrg str[c1++] = c;
280510d565efSmrg else
280610d565efSmrg /* This is in any case an invalid class name. */
280710d565efSmrg str[0] = '\0';
280810d565efSmrg }
280910d565efSmrg str[c1] = '\0';
281010d565efSmrg
281110d565efSmrg /* If isn't a word bracketed by `[:' and `:]':
281210d565efSmrg undo the ending character, the letters, and leave
281310d565efSmrg the leading `:' and `[' (but store them as character). */
281410d565efSmrg if (c == ':' && *p == ']')
281510d565efSmrg {
281610d565efSmrg wctype_t wt;
281710d565efSmrg uintptr_t alignedp;
281810d565efSmrg
281910d565efSmrg /* Query the character class as wctype_t. */
282010d565efSmrg wt = IS_CHAR_CLASS (str);
282110d565efSmrg if (wt == 0)
282210d565efSmrg FREE_STACK_RETURN (REG_ECTYPE);
282310d565efSmrg
282410d565efSmrg /* Throw away the ] at the end of the character
282510d565efSmrg class. */
282610d565efSmrg PATFETCH (c);
282710d565efSmrg
282810d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
282910d565efSmrg
283010d565efSmrg /* Allocate the space for character class. */
283110d565efSmrg GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
283210d565efSmrg /* Update the pointer to indicate end of buffer. */
283310d565efSmrg b += CHAR_CLASS_SIZE;
283410d565efSmrg /* Move data which follow character classes
283510d565efSmrg not to violate the data. */
283610d565efSmrg insert_space(CHAR_CLASS_SIZE,
283710d565efSmrg laststart + 6 + laststart[1],
283810d565efSmrg b - 1);
283910d565efSmrg alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
284010d565efSmrg + __alignof__(wctype_t) - 1)
284110d565efSmrg & ~(uintptr_t)(__alignof__(wctype_t) - 1);
284210d565efSmrg /* Store the character class. */
284310d565efSmrg *((wctype_t*)alignedp) = wt;
284410d565efSmrg /* Update length of char_classes */
284510d565efSmrg laststart[1] += CHAR_CLASS_SIZE;
284610d565efSmrg
284710d565efSmrg had_char_class = true;
284810d565efSmrg }
284910d565efSmrg else
285010d565efSmrg {
285110d565efSmrg c1++;
285210d565efSmrg while (c1--)
285310d565efSmrg PATUNFETCH;
285410d565efSmrg BUF_PUSH ('[');
285510d565efSmrg BUF_PUSH (':');
285610d565efSmrg laststart[5] += 2; /* Update the length of characters */
285710d565efSmrg range_start = ':';
285810d565efSmrg had_char_class = false;
285910d565efSmrg }
286010d565efSmrg }
286110d565efSmrg else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
286210d565efSmrg || *p == '.'))
286310d565efSmrg {
286410d565efSmrg CHAR_T str[128]; /* Should be large enough. */
286510d565efSmrg CHAR_T delim = *p; /* '=' or '.' */
286610d565efSmrg # ifdef _LIBC
286710d565efSmrg uint32_t nrules =
286810d565efSmrg _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
286910d565efSmrg # endif
287010d565efSmrg PATFETCH (c);
287110d565efSmrg c1 = 0;
287210d565efSmrg
287310d565efSmrg /* If pattern is `[[=' or '[[.'. */
287410d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
287510d565efSmrg
287610d565efSmrg for (;;)
287710d565efSmrg {
287810d565efSmrg PATFETCH (c);
287910d565efSmrg if ((c == delim && *p == ']') || p == pend)
288010d565efSmrg break;
288110d565efSmrg if (c1 < sizeof (str) - 1)
288210d565efSmrg str[c1++] = c;
288310d565efSmrg else
288410d565efSmrg /* This is in any case an invalid class name. */
288510d565efSmrg str[0] = '\0';
288610d565efSmrg }
288710d565efSmrg str[c1] = '\0';
288810d565efSmrg
288910d565efSmrg if (c == delim && *p == ']' && str[0] != '\0')
289010d565efSmrg {
289110d565efSmrg unsigned int i, offset;
289210d565efSmrg /* If we have no collation data we use the default
289310d565efSmrg collation in which each character is in a class
289410d565efSmrg by itself. It also means that ASCII is the
289510d565efSmrg character set and therefore we cannot have character
289610d565efSmrg with more than one byte in the multibyte
289710d565efSmrg representation. */
289810d565efSmrg
289910d565efSmrg /* If not defined _LIBC, we push the name and
290010d565efSmrg `\0' for the sake of matching performance. */
290110d565efSmrg int datasize = c1 + 1;
290210d565efSmrg
290310d565efSmrg # ifdef _LIBC
290410d565efSmrg int32_t idx = 0;
290510d565efSmrg if (nrules == 0)
290610d565efSmrg # endif
290710d565efSmrg {
290810d565efSmrg if (c1 != 1)
290910d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
291010d565efSmrg }
291110d565efSmrg # ifdef _LIBC
291210d565efSmrg else
291310d565efSmrg {
291410d565efSmrg const int32_t *table;
291510d565efSmrg const int32_t *weights;
291610d565efSmrg const int32_t *extra;
291710d565efSmrg const int32_t *indirect;
291810d565efSmrg wint_t *cp;
291910d565efSmrg
292010d565efSmrg /* This #include defines a local function! */
292110d565efSmrg # include <locale/weightwc.h>
292210d565efSmrg
292310d565efSmrg if(delim == '=')
292410d565efSmrg {
292510d565efSmrg /* We push the index for equivalence class. */
292610d565efSmrg cp = (wint_t*)str;
292710d565efSmrg
292810d565efSmrg table = (const int32_t *)
292910d565efSmrg _NL_CURRENT (LC_COLLATE,
293010d565efSmrg _NL_COLLATE_TABLEWC);
293110d565efSmrg weights = (const int32_t *)
293210d565efSmrg _NL_CURRENT (LC_COLLATE,
293310d565efSmrg _NL_COLLATE_WEIGHTWC);
293410d565efSmrg extra = (const int32_t *)
293510d565efSmrg _NL_CURRENT (LC_COLLATE,
293610d565efSmrg _NL_COLLATE_EXTRAWC);
293710d565efSmrg indirect = (const int32_t *)
293810d565efSmrg _NL_CURRENT (LC_COLLATE,
293910d565efSmrg _NL_COLLATE_INDIRECTWC);
294010d565efSmrg
294110d565efSmrg idx = findidx ((const wint_t**)&cp);
294210d565efSmrg if (idx == 0 || cp < (wint_t*) str + c1)
294310d565efSmrg /* This is no valid character. */
294410d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
294510d565efSmrg
294610d565efSmrg str[0] = (wchar_t)idx;
294710d565efSmrg }
294810d565efSmrg else /* delim == '.' */
294910d565efSmrg {
295010d565efSmrg /* We push collation sequence value
295110d565efSmrg for collating symbol. */
295210d565efSmrg int32_t table_size;
295310d565efSmrg const int32_t *symb_table;
295410d565efSmrg const unsigned char *extra;
295510d565efSmrg int32_t idx;
295610d565efSmrg int32_t elem;
295710d565efSmrg int32_t second;
295810d565efSmrg int32_t hash;
295910d565efSmrg char char_str[c1];
296010d565efSmrg
296110d565efSmrg /* We have to convert the name to a single-byte
296210d565efSmrg string. This is possible since the names
296310d565efSmrg consist of ASCII characters and the internal
296410d565efSmrg representation is UCS4. */
296510d565efSmrg for (i = 0; i < c1; ++i)
296610d565efSmrg char_str[i] = str[i];
296710d565efSmrg
296810d565efSmrg table_size =
296910d565efSmrg _NL_CURRENT_WORD (LC_COLLATE,
297010d565efSmrg _NL_COLLATE_SYMB_HASH_SIZEMB);
297110d565efSmrg symb_table = (const int32_t *)
297210d565efSmrg _NL_CURRENT (LC_COLLATE,
297310d565efSmrg _NL_COLLATE_SYMB_TABLEMB);
297410d565efSmrg extra = (const unsigned char *)
297510d565efSmrg _NL_CURRENT (LC_COLLATE,
297610d565efSmrg _NL_COLLATE_SYMB_EXTRAMB);
297710d565efSmrg
297810d565efSmrg /* Locate the character in the hashing table. */
297910d565efSmrg hash = elem_hash (char_str, c1);
298010d565efSmrg
298110d565efSmrg idx = 0;
298210d565efSmrg elem = hash % table_size;
298310d565efSmrg second = hash % (table_size - 2);
298410d565efSmrg while (symb_table[2 * elem] != 0)
298510d565efSmrg {
298610d565efSmrg /* First compare the hashing value. */
298710d565efSmrg if (symb_table[2 * elem] == hash
298810d565efSmrg && c1 == extra[symb_table[2 * elem + 1]]
298910d565efSmrg && memcmp (char_str,
299010d565efSmrg &extra[symb_table[2 * elem + 1]
299110d565efSmrg + 1], c1) == 0)
299210d565efSmrg {
299310d565efSmrg /* Yep, this is the entry. */
299410d565efSmrg idx = symb_table[2 * elem + 1];
299510d565efSmrg idx += 1 + extra[idx];
299610d565efSmrg break;
299710d565efSmrg }
299810d565efSmrg
299910d565efSmrg /* Next entry. */
300010d565efSmrg elem += second;
300110d565efSmrg }
300210d565efSmrg
300310d565efSmrg if (symb_table[2 * elem] != 0)
300410d565efSmrg {
300510d565efSmrg /* Compute the index of the byte sequence
300610d565efSmrg in the table. */
300710d565efSmrg idx += 1 + extra[idx];
300810d565efSmrg /* Adjust for the alignment. */
300910d565efSmrg idx = (idx + 3) & ~3;
301010d565efSmrg
301110d565efSmrg str[0] = (wchar_t) idx + 4;
301210d565efSmrg }
301310d565efSmrg else if (symb_table[2 * elem] == 0 && c1 == 1)
301410d565efSmrg {
301510d565efSmrg /* No valid character. Match it as a
301610d565efSmrg single byte character. */
301710d565efSmrg had_char_class = false;
301810d565efSmrg BUF_PUSH(str[0]);
301910d565efSmrg /* Update the length of characters */
302010d565efSmrg laststart[5]++;
302110d565efSmrg range_start = str[0];
302210d565efSmrg
302310d565efSmrg /* Throw away the ] at the end of the
302410d565efSmrg collating symbol. */
302510d565efSmrg PATFETCH (c);
302610d565efSmrg /* exit from the switch block. */
302710d565efSmrg continue;
302810d565efSmrg }
302910d565efSmrg else
303010d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
303110d565efSmrg }
303210d565efSmrg datasize = 1;
303310d565efSmrg }
303410d565efSmrg # endif
303510d565efSmrg /* Throw away the ] at the end of the equivalence
303610d565efSmrg class (or collating symbol). */
303710d565efSmrg PATFETCH (c);
303810d565efSmrg
303910d565efSmrg /* Allocate the space for the equivalence class
304010d565efSmrg (or collating symbol) (and '\0' if needed). */
304110d565efSmrg GET_BUFFER_SPACE(datasize);
304210d565efSmrg /* Update the pointer to indicate end of buffer. */
304310d565efSmrg b += datasize;
304410d565efSmrg
304510d565efSmrg if (delim == '=')
304610d565efSmrg { /* equivalence class */
304710d565efSmrg /* Calculate the offset of char_ranges,
304810d565efSmrg which is next to equivalence_classes. */
304910d565efSmrg offset = laststart[1] + laststart[2]
305010d565efSmrg + laststart[3] +6;
305110d565efSmrg /* Insert space. */
305210d565efSmrg insert_space(datasize, laststart + offset, b - 1);
305310d565efSmrg
305410d565efSmrg /* Write the equivalence_class and \0. */
305510d565efSmrg for (i = 0 ; i < datasize ; i++)
305610d565efSmrg laststart[offset + i] = str[i];
305710d565efSmrg
305810d565efSmrg /* Update the length of equivalence_classes. */
305910d565efSmrg laststart[3] += datasize;
306010d565efSmrg had_char_class = true;
306110d565efSmrg }
306210d565efSmrg else /* delim == '.' */
306310d565efSmrg { /* collating symbol */
306410d565efSmrg /* Calculate the offset of the equivalence_classes,
306510d565efSmrg which is next to collating_symbols. */
306610d565efSmrg offset = laststart[1] + laststart[2] + 6;
306710d565efSmrg /* Insert space and write the collationg_symbol
306810d565efSmrg and \0. */
306910d565efSmrg insert_space(datasize, laststart + offset, b-1);
307010d565efSmrg for (i = 0 ; i < datasize ; i++)
307110d565efSmrg laststart[offset + i] = str[i];
307210d565efSmrg
307310d565efSmrg /* In re_match_2_internal if range_start < -1, we
307410d565efSmrg assume -range_start is the offset of the
307510d565efSmrg collating symbol which is specified as
307610d565efSmrg the character of the range start. So we assign
307710d565efSmrg -(laststart[1] + laststart[2] + 6) to
307810d565efSmrg range_start. */
307910d565efSmrg range_start = -(laststart[1] + laststart[2] + 6);
308010d565efSmrg /* Update the length of collating_symbol. */
308110d565efSmrg laststart[2] += datasize;
308210d565efSmrg had_char_class = false;
308310d565efSmrg }
308410d565efSmrg }
308510d565efSmrg else
308610d565efSmrg {
308710d565efSmrg c1++;
308810d565efSmrg while (c1--)
308910d565efSmrg PATUNFETCH;
309010d565efSmrg BUF_PUSH ('[');
309110d565efSmrg BUF_PUSH (delim);
309210d565efSmrg laststart[5] += 2; /* Update the length of characters */
309310d565efSmrg range_start = delim;
309410d565efSmrg had_char_class = false;
309510d565efSmrg }
309610d565efSmrg }
309710d565efSmrg else
309810d565efSmrg {
309910d565efSmrg had_char_class = false;
310010d565efSmrg BUF_PUSH(c);
310110d565efSmrg laststart[5]++; /* Update the length of characters */
310210d565efSmrg range_start = c;
310310d565efSmrg }
310410d565efSmrg }
310510d565efSmrg
310610d565efSmrg #else /* BYTE */
310710d565efSmrg /* Ensure that we have enough space to push a charset: the
310810d565efSmrg opcode, the length count, and the bitset; 34 bytes in all. */
310910d565efSmrg GET_BUFFER_SPACE (34);
311010d565efSmrg
311110d565efSmrg laststart = b;
311210d565efSmrg
311310d565efSmrg /* We test `*p == '^' twice, instead of using an if
311410d565efSmrg statement, so we only need one BUF_PUSH. */
311510d565efSmrg BUF_PUSH (*p == '^' ? charset_not : charset);
311610d565efSmrg if (*p == '^')
311710d565efSmrg p++;
311810d565efSmrg
311910d565efSmrg /* Remember the first position in the bracket expression. */
312010d565efSmrg p1 = p;
312110d565efSmrg
312210d565efSmrg /* Push the number of bytes in the bitmap. */
312310d565efSmrg BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
312410d565efSmrg
312510d565efSmrg /* Clear the whole map. */
312610d565efSmrg bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
312710d565efSmrg
312810d565efSmrg /* charset_not matches newline according to a syntax bit. */
312910d565efSmrg if ((re_opcode_t) b[-2] == charset_not
313010d565efSmrg && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
313110d565efSmrg SET_LIST_BIT ('\n');
313210d565efSmrg
313310d565efSmrg /* Read in characters and ranges, setting map bits. */
313410d565efSmrg for (;;)
313510d565efSmrg {
313610d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
313710d565efSmrg
313810d565efSmrg PATFETCH (c);
313910d565efSmrg
314010d565efSmrg /* \ might escape characters inside [...] and [^...]. */
314110d565efSmrg if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
314210d565efSmrg {
314310d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
314410d565efSmrg
314510d565efSmrg PATFETCH (c1);
314610d565efSmrg SET_LIST_BIT (c1);
314710d565efSmrg range_start = c1;
314810d565efSmrg continue;
314910d565efSmrg }
315010d565efSmrg
315110d565efSmrg /* Could be the end of the bracket expression. If it's
315210d565efSmrg not (i.e., when the bracket expression is `[]' so
315310d565efSmrg far), the ']' character bit gets set way below. */
315410d565efSmrg if (c == ']' && p != p1 + 1)
315510d565efSmrg break;
315610d565efSmrg
315710d565efSmrg /* Look ahead to see if it's a range when the last thing
315810d565efSmrg was a character class. */
315910d565efSmrg if (had_char_class && c == '-' && *p != ']')
316010d565efSmrg FREE_STACK_RETURN (REG_ERANGE);
316110d565efSmrg
316210d565efSmrg /* Look ahead to see if it's a range when the last thing
316310d565efSmrg was a character: if this is a hyphen not at the
316410d565efSmrg beginning or the end of a list, then it's the range
316510d565efSmrg operator. */
316610d565efSmrg if (c == '-'
316710d565efSmrg && !(p - 2 >= pattern && p[-2] == '[')
316810d565efSmrg && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
316910d565efSmrg && *p != ']')
317010d565efSmrg {
317110d565efSmrg reg_errcode_t ret
317210d565efSmrg = byte_compile_range (range_start, &p, pend, translate,
317310d565efSmrg syntax, b);
317410d565efSmrg if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
317510d565efSmrg range_start = 0xffffffff;
317610d565efSmrg }
317710d565efSmrg
317810d565efSmrg else if (p[0] == '-' && p[1] != ']')
317910d565efSmrg { /* This handles ranges made up of characters only. */
318010d565efSmrg reg_errcode_t ret;
318110d565efSmrg
318210d565efSmrg /* Move past the `-'. */
318310d565efSmrg PATFETCH (c1);
318410d565efSmrg
318510d565efSmrg ret = byte_compile_range (c, &p, pend, translate, syntax, b);
318610d565efSmrg if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
318710d565efSmrg range_start = 0xffffffff;
318810d565efSmrg }
318910d565efSmrg
319010d565efSmrg /* See if we're at the beginning of a possible character
319110d565efSmrg class. */
319210d565efSmrg
319310d565efSmrg else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
319410d565efSmrg { /* Leave room for the null. */
319510d565efSmrg char str[CHAR_CLASS_MAX_LENGTH + 1];
319610d565efSmrg
319710d565efSmrg PATFETCH (c);
319810d565efSmrg c1 = 0;
319910d565efSmrg
320010d565efSmrg /* If pattern is `[[:'. */
320110d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
320210d565efSmrg
320310d565efSmrg for (;;)
320410d565efSmrg {
320510d565efSmrg PATFETCH (c);
320610d565efSmrg if ((c == ':' && *p == ']') || p == pend)
320710d565efSmrg break;
320810d565efSmrg if (c1 < CHAR_CLASS_MAX_LENGTH)
320910d565efSmrg str[c1++] = c;
321010d565efSmrg else
321110d565efSmrg /* This is in any case an invalid class name. */
321210d565efSmrg str[0] = '\0';
321310d565efSmrg }
321410d565efSmrg str[c1] = '\0';
321510d565efSmrg
321610d565efSmrg /* If isn't a word bracketed by `[:' and `:]':
321710d565efSmrg undo the ending character, the letters, and leave
321810d565efSmrg the leading `:' and `[' (but set bits for them). */
321910d565efSmrg if (c == ':' && *p == ']')
322010d565efSmrg {
322110d565efSmrg # if defined _LIBC || WIDE_CHAR_SUPPORT
322210d565efSmrg boolean is_lower = STREQ (str, "lower");
322310d565efSmrg boolean is_upper = STREQ (str, "upper");
322410d565efSmrg wctype_t wt;
322510d565efSmrg int ch;
322610d565efSmrg
322710d565efSmrg wt = IS_CHAR_CLASS (str);
322810d565efSmrg if (wt == 0)
322910d565efSmrg FREE_STACK_RETURN (REG_ECTYPE);
323010d565efSmrg
323110d565efSmrg /* Throw away the ] at the end of the character
323210d565efSmrg class. */
323310d565efSmrg PATFETCH (c);
323410d565efSmrg
323510d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
323610d565efSmrg
323710d565efSmrg for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
323810d565efSmrg {
323910d565efSmrg # ifdef _LIBC
324010d565efSmrg if (__iswctype (__btowc (ch), wt))
324110d565efSmrg SET_LIST_BIT (ch);
324210d565efSmrg # else
324310d565efSmrg if (iswctype (btowc (ch), wt))
324410d565efSmrg SET_LIST_BIT (ch);
324510d565efSmrg # endif
324610d565efSmrg
324710d565efSmrg if (translate && (is_upper || is_lower)
324810d565efSmrg && (ISUPPER (ch) || ISLOWER (ch)))
324910d565efSmrg SET_LIST_BIT (ch);
325010d565efSmrg }
325110d565efSmrg
325210d565efSmrg had_char_class = true;
325310d565efSmrg # else
325410d565efSmrg int ch;
325510d565efSmrg boolean is_alnum = STREQ (str, "alnum");
325610d565efSmrg boolean is_alpha = STREQ (str, "alpha");
325710d565efSmrg boolean is_blank = STREQ (str, "blank");
325810d565efSmrg boolean is_cntrl = STREQ (str, "cntrl");
325910d565efSmrg boolean is_digit = STREQ (str, "digit");
326010d565efSmrg boolean is_graph = STREQ (str, "graph");
326110d565efSmrg boolean is_lower = STREQ (str, "lower");
326210d565efSmrg boolean is_print = STREQ (str, "print");
326310d565efSmrg boolean is_punct = STREQ (str, "punct");
326410d565efSmrg boolean is_space = STREQ (str, "space");
326510d565efSmrg boolean is_upper = STREQ (str, "upper");
326610d565efSmrg boolean is_xdigit = STREQ (str, "xdigit");
326710d565efSmrg
326810d565efSmrg if (!IS_CHAR_CLASS (str))
326910d565efSmrg FREE_STACK_RETURN (REG_ECTYPE);
327010d565efSmrg
327110d565efSmrg /* Throw away the ] at the end of the character
327210d565efSmrg class. */
327310d565efSmrg PATFETCH (c);
327410d565efSmrg
327510d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
327610d565efSmrg
327710d565efSmrg for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
327810d565efSmrg {
327910d565efSmrg /* This was split into 3 if's to
328010d565efSmrg avoid an arbitrary limit in some compiler. */
328110d565efSmrg if ( (is_alnum && ISALNUM (ch))
328210d565efSmrg || (is_alpha && ISALPHA (ch))
328310d565efSmrg || (is_blank && ISBLANK (ch))
328410d565efSmrg || (is_cntrl && ISCNTRL (ch)))
328510d565efSmrg SET_LIST_BIT (ch);
328610d565efSmrg if ( (is_digit && ISDIGIT (ch))
328710d565efSmrg || (is_graph && ISGRAPH (ch))
328810d565efSmrg || (is_lower && ISLOWER (ch))
328910d565efSmrg || (is_print && ISPRINT (ch)))
329010d565efSmrg SET_LIST_BIT (ch);
329110d565efSmrg if ( (is_punct && ISPUNCT (ch))
329210d565efSmrg || (is_space && ISSPACE (ch))
329310d565efSmrg || (is_upper && ISUPPER (ch))
329410d565efSmrg || (is_xdigit && ISXDIGIT (ch)))
329510d565efSmrg SET_LIST_BIT (ch);
329610d565efSmrg if ( translate && (is_upper || is_lower)
329710d565efSmrg && (ISUPPER (ch) || ISLOWER (ch)))
329810d565efSmrg SET_LIST_BIT (ch);
329910d565efSmrg }
330010d565efSmrg had_char_class = true;
330110d565efSmrg # endif /* libc || wctype.h */
330210d565efSmrg }
330310d565efSmrg else
330410d565efSmrg {
330510d565efSmrg c1++;
330610d565efSmrg while (c1--)
330710d565efSmrg PATUNFETCH;
330810d565efSmrg SET_LIST_BIT ('[');
330910d565efSmrg SET_LIST_BIT (':');
331010d565efSmrg range_start = ':';
331110d565efSmrg had_char_class = false;
331210d565efSmrg }
331310d565efSmrg }
331410d565efSmrg else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
331510d565efSmrg {
331610d565efSmrg unsigned char str[MB_LEN_MAX + 1];
331710d565efSmrg # ifdef _LIBC
331810d565efSmrg uint32_t nrules =
331910d565efSmrg _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
332010d565efSmrg # endif
332110d565efSmrg
332210d565efSmrg PATFETCH (c);
332310d565efSmrg c1 = 0;
332410d565efSmrg
332510d565efSmrg /* If pattern is `[[='. */
332610d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
332710d565efSmrg
332810d565efSmrg for (;;)
332910d565efSmrg {
333010d565efSmrg PATFETCH (c);
333110d565efSmrg if ((c == '=' && *p == ']') || p == pend)
333210d565efSmrg break;
333310d565efSmrg if (c1 < MB_LEN_MAX)
333410d565efSmrg str[c1++] = c;
333510d565efSmrg else
333610d565efSmrg /* This is in any case an invalid class name. */
333710d565efSmrg str[0] = '\0';
333810d565efSmrg }
333910d565efSmrg str[c1] = '\0';
334010d565efSmrg
334110d565efSmrg if (c == '=' && *p == ']' && str[0] != '\0')
334210d565efSmrg {
334310d565efSmrg /* If we have no collation data we use the default
334410d565efSmrg collation in which each character is in a class
334510d565efSmrg by itself. It also means that ASCII is the
334610d565efSmrg character set and therefore we cannot have character
334710d565efSmrg with more than one byte in the multibyte
334810d565efSmrg representation. */
334910d565efSmrg # ifdef _LIBC
335010d565efSmrg if (nrules == 0)
335110d565efSmrg # endif
335210d565efSmrg {
335310d565efSmrg if (c1 != 1)
335410d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
335510d565efSmrg
335610d565efSmrg /* Throw away the ] at the end of the equivalence
335710d565efSmrg class. */
335810d565efSmrg PATFETCH (c);
335910d565efSmrg
336010d565efSmrg /* Set the bit for the character. */
336110d565efSmrg SET_LIST_BIT (str[0]);
336210d565efSmrg }
336310d565efSmrg # ifdef _LIBC
336410d565efSmrg else
336510d565efSmrg {
336610d565efSmrg /* Try to match the byte sequence in `str' against
336710d565efSmrg those known to the collate implementation.
336810d565efSmrg First find out whether the bytes in `str' are
336910d565efSmrg actually from exactly one character. */
337010d565efSmrg const int32_t *table;
337110d565efSmrg const unsigned char *weights;
337210d565efSmrg const unsigned char *extra;
337310d565efSmrg const int32_t *indirect;
337410d565efSmrg int32_t idx;
337510d565efSmrg const unsigned char *cp = str;
337610d565efSmrg int ch;
337710d565efSmrg
337810d565efSmrg /* This #include defines a local function! */
337910d565efSmrg # include <locale/weight.h>
338010d565efSmrg
338110d565efSmrg table = (const int32_t *)
338210d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
338310d565efSmrg weights = (const unsigned char *)
338410d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
338510d565efSmrg extra = (const unsigned char *)
338610d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
338710d565efSmrg indirect = (const int32_t *)
338810d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
338910d565efSmrg
339010d565efSmrg idx = findidx (&cp);
339110d565efSmrg if (idx == 0 || cp < str + c1)
339210d565efSmrg /* This is no valid character. */
339310d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
339410d565efSmrg
339510d565efSmrg /* Throw away the ] at the end of the equivalence
339610d565efSmrg class. */
339710d565efSmrg PATFETCH (c);
339810d565efSmrg
339910d565efSmrg /* Now we have to go through the whole table
340010d565efSmrg and find all characters which have the same
340110d565efSmrg first level weight.
340210d565efSmrg
340310d565efSmrg XXX Note that this is not entirely correct.
340410d565efSmrg we would have to match multibyte sequences
340510d565efSmrg but this is not possible with the current
340610d565efSmrg implementation. */
340710d565efSmrg for (ch = 1; ch < 256; ++ch)
340810d565efSmrg /* XXX This test would have to be changed if we
340910d565efSmrg would allow matching multibyte sequences. */
341010d565efSmrg if (table[ch] > 0)
341110d565efSmrg {
341210d565efSmrg int32_t idx2 = table[ch];
341310d565efSmrg size_t len = weights[idx2];
341410d565efSmrg
341510d565efSmrg /* Test whether the lenghts match. */
341610d565efSmrg if (weights[idx] == len)
341710d565efSmrg {
341810d565efSmrg /* They do. New compare the bytes of
341910d565efSmrg the weight. */
342010d565efSmrg size_t cnt = 0;
342110d565efSmrg
342210d565efSmrg while (cnt < len
342310d565efSmrg && (weights[idx + 1 + cnt]
342410d565efSmrg == weights[idx2 + 1 + cnt]))
342510d565efSmrg ++cnt;
342610d565efSmrg
342710d565efSmrg if (cnt == len)
342810d565efSmrg /* They match. Mark the character as
342910d565efSmrg acceptable. */
343010d565efSmrg SET_LIST_BIT (ch);
343110d565efSmrg }
343210d565efSmrg }
343310d565efSmrg }
343410d565efSmrg # endif
343510d565efSmrg had_char_class = true;
343610d565efSmrg }
343710d565efSmrg else
343810d565efSmrg {
343910d565efSmrg c1++;
344010d565efSmrg while (c1--)
344110d565efSmrg PATUNFETCH;
344210d565efSmrg SET_LIST_BIT ('[');
344310d565efSmrg SET_LIST_BIT ('=');
344410d565efSmrg range_start = '=';
344510d565efSmrg had_char_class = false;
344610d565efSmrg }
344710d565efSmrg }
344810d565efSmrg else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
344910d565efSmrg {
345010d565efSmrg unsigned char str[128]; /* Should be large enough. */
345110d565efSmrg # ifdef _LIBC
345210d565efSmrg uint32_t nrules =
345310d565efSmrg _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
345410d565efSmrg # endif
345510d565efSmrg
345610d565efSmrg PATFETCH (c);
345710d565efSmrg c1 = 0;
345810d565efSmrg
345910d565efSmrg /* If pattern is `[[.'. */
346010d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
346110d565efSmrg
346210d565efSmrg for (;;)
346310d565efSmrg {
346410d565efSmrg PATFETCH (c);
346510d565efSmrg if ((c == '.' && *p == ']') || p == pend)
346610d565efSmrg break;
346710d565efSmrg if (c1 < sizeof (str))
346810d565efSmrg str[c1++] = c;
346910d565efSmrg else
347010d565efSmrg /* This is in any case an invalid class name. */
347110d565efSmrg str[0] = '\0';
347210d565efSmrg }
347310d565efSmrg str[c1] = '\0';
347410d565efSmrg
347510d565efSmrg if (c == '.' && *p == ']' && str[0] != '\0')
347610d565efSmrg {
347710d565efSmrg /* If we have no collation data we use the default
347810d565efSmrg collation in which each character is the name
347910d565efSmrg for its own class which contains only the one
348010d565efSmrg character. It also means that ASCII is the
348110d565efSmrg character set and therefore we cannot have character
348210d565efSmrg with more than one byte in the multibyte
348310d565efSmrg representation. */
348410d565efSmrg # ifdef _LIBC
348510d565efSmrg if (nrules == 0)
348610d565efSmrg # endif
348710d565efSmrg {
348810d565efSmrg if (c1 != 1)
348910d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
349010d565efSmrg
349110d565efSmrg /* Throw away the ] at the end of the equivalence
349210d565efSmrg class. */
349310d565efSmrg PATFETCH (c);
349410d565efSmrg
349510d565efSmrg /* Set the bit for the character. */
349610d565efSmrg SET_LIST_BIT (str[0]);
349710d565efSmrg range_start = ((const unsigned char *) str)[0];
349810d565efSmrg }
349910d565efSmrg # ifdef _LIBC
350010d565efSmrg else
350110d565efSmrg {
350210d565efSmrg /* Try to match the byte sequence in `str' against
350310d565efSmrg those known to the collate implementation.
350410d565efSmrg First find out whether the bytes in `str' are
350510d565efSmrg actually from exactly one character. */
350610d565efSmrg int32_t table_size;
350710d565efSmrg const int32_t *symb_table;
350810d565efSmrg const unsigned char *extra;
350910d565efSmrg int32_t idx;
351010d565efSmrg int32_t elem;
351110d565efSmrg int32_t second;
351210d565efSmrg int32_t hash;
351310d565efSmrg
351410d565efSmrg table_size =
351510d565efSmrg _NL_CURRENT_WORD (LC_COLLATE,
351610d565efSmrg _NL_COLLATE_SYMB_HASH_SIZEMB);
351710d565efSmrg symb_table = (const int32_t *)
351810d565efSmrg _NL_CURRENT (LC_COLLATE,
351910d565efSmrg _NL_COLLATE_SYMB_TABLEMB);
352010d565efSmrg extra = (const unsigned char *)
352110d565efSmrg _NL_CURRENT (LC_COLLATE,
352210d565efSmrg _NL_COLLATE_SYMB_EXTRAMB);
352310d565efSmrg
352410d565efSmrg /* Locate the character in the hashing table. */
352510d565efSmrg hash = elem_hash (str, c1);
352610d565efSmrg
352710d565efSmrg idx = 0;
352810d565efSmrg elem = hash % table_size;
352910d565efSmrg second = hash % (table_size - 2);
353010d565efSmrg while (symb_table[2 * elem] != 0)
353110d565efSmrg {
353210d565efSmrg /* First compare the hashing value. */
353310d565efSmrg if (symb_table[2 * elem] == hash
353410d565efSmrg && c1 == extra[symb_table[2 * elem + 1]]
353510d565efSmrg && memcmp (str,
353610d565efSmrg &extra[symb_table[2 * elem + 1]
353710d565efSmrg + 1],
353810d565efSmrg c1) == 0)
353910d565efSmrg {
354010d565efSmrg /* Yep, this is the entry. */
354110d565efSmrg idx = symb_table[2 * elem + 1];
354210d565efSmrg idx += 1 + extra[idx];
354310d565efSmrg break;
354410d565efSmrg }
354510d565efSmrg
354610d565efSmrg /* Next entry. */
354710d565efSmrg elem += second;
354810d565efSmrg }
354910d565efSmrg
355010d565efSmrg if (symb_table[2 * elem] == 0)
355110d565efSmrg /* This is no valid character. */
355210d565efSmrg FREE_STACK_RETURN (REG_ECOLLATE);
355310d565efSmrg
355410d565efSmrg /* Throw away the ] at the end of the equivalence
355510d565efSmrg class. */
355610d565efSmrg PATFETCH (c);
355710d565efSmrg
355810d565efSmrg /* Now add the multibyte character(s) we found
355910d565efSmrg to the accept list.
356010d565efSmrg
356110d565efSmrg XXX Note that this is not entirely correct.
356210d565efSmrg we would have to match multibyte sequences
356310d565efSmrg but this is not possible with the current
356410d565efSmrg implementation. Also, we have to match
356510d565efSmrg collating symbols, which expand to more than
356610d565efSmrg one file, as a whole and not allow the
356710d565efSmrg individual bytes. */
356810d565efSmrg c1 = extra[idx++];
356910d565efSmrg if (c1 == 1)
357010d565efSmrg range_start = extra[idx];
357110d565efSmrg while (c1-- > 0)
357210d565efSmrg {
357310d565efSmrg SET_LIST_BIT (extra[idx]);
357410d565efSmrg ++idx;
357510d565efSmrg }
357610d565efSmrg }
357710d565efSmrg # endif
357810d565efSmrg had_char_class = false;
357910d565efSmrg }
358010d565efSmrg else
358110d565efSmrg {
358210d565efSmrg c1++;
358310d565efSmrg while (c1--)
358410d565efSmrg PATUNFETCH;
358510d565efSmrg SET_LIST_BIT ('[');
358610d565efSmrg SET_LIST_BIT ('.');
358710d565efSmrg range_start = '.';
358810d565efSmrg had_char_class = false;
358910d565efSmrg }
359010d565efSmrg }
359110d565efSmrg else
359210d565efSmrg {
359310d565efSmrg had_char_class = false;
359410d565efSmrg SET_LIST_BIT (c);
359510d565efSmrg range_start = c;
359610d565efSmrg }
359710d565efSmrg }
359810d565efSmrg
359910d565efSmrg /* Discard any (non)matching list bytes that are all 0 at the
360010d565efSmrg end of the map. Decrease the map-length byte too. */
360110d565efSmrg while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
360210d565efSmrg b[-1]--;
360310d565efSmrg b += b[-1];
360410d565efSmrg #endif /* WCHAR */
360510d565efSmrg }
360610d565efSmrg break;
360710d565efSmrg
360810d565efSmrg
360910d565efSmrg case '(':
361010d565efSmrg if (syntax & RE_NO_BK_PARENS)
361110d565efSmrg goto handle_open;
361210d565efSmrg else
361310d565efSmrg goto normal_char;
361410d565efSmrg
361510d565efSmrg
361610d565efSmrg case ')':
361710d565efSmrg if (syntax & RE_NO_BK_PARENS)
361810d565efSmrg goto handle_close;
361910d565efSmrg else
362010d565efSmrg goto normal_char;
362110d565efSmrg
362210d565efSmrg
362310d565efSmrg case '\n':
362410d565efSmrg if (syntax & RE_NEWLINE_ALT)
362510d565efSmrg goto handle_alt;
362610d565efSmrg else
362710d565efSmrg goto normal_char;
362810d565efSmrg
362910d565efSmrg
363010d565efSmrg case '|':
363110d565efSmrg if (syntax & RE_NO_BK_VBAR)
363210d565efSmrg goto handle_alt;
363310d565efSmrg else
363410d565efSmrg goto normal_char;
363510d565efSmrg
363610d565efSmrg
363710d565efSmrg case '{':
363810d565efSmrg if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
363910d565efSmrg goto handle_interval;
364010d565efSmrg else
364110d565efSmrg goto normal_char;
364210d565efSmrg
364310d565efSmrg
364410d565efSmrg case '\\':
364510d565efSmrg if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
364610d565efSmrg
364710d565efSmrg /* Do not translate the character after the \, so that we can
364810d565efSmrg distinguish, e.g., \B from \b, even if we normally would
364910d565efSmrg translate, e.g., B to b. */
365010d565efSmrg PATFETCH_RAW (c);
365110d565efSmrg
365210d565efSmrg switch (c)
365310d565efSmrg {
365410d565efSmrg case '(':
365510d565efSmrg if (syntax & RE_NO_BK_PARENS)
365610d565efSmrg goto normal_backslash;
365710d565efSmrg
365810d565efSmrg handle_open:
365910d565efSmrg bufp->re_nsub++;
366010d565efSmrg regnum++;
366110d565efSmrg
366210d565efSmrg if (COMPILE_STACK_FULL)
366310d565efSmrg {
366410d565efSmrg RETALLOC (compile_stack.stack, compile_stack.size << 1,
366510d565efSmrg compile_stack_elt_t);
366610d565efSmrg if (compile_stack.stack == NULL) return REG_ESPACE;
366710d565efSmrg
366810d565efSmrg compile_stack.size <<= 1;
366910d565efSmrg }
367010d565efSmrg
367110d565efSmrg /* These are the values to restore when we hit end of this
367210d565efSmrg group. They are all relative offsets, so that if the
367310d565efSmrg whole pattern moves because of realloc, they will still
367410d565efSmrg be valid. */
367510d565efSmrg COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
367610d565efSmrg COMPILE_STACK_TOP.fixup_alt_jump
367710d565efSmrg = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
367810d565efSmrg COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
367910d565efSmrg COMPILE_STACK_TOP.regnum = regnum;
368010d565efSmrg
368110d565efSmrg /* We will eventually replace the 0 with the number of
368210d565efSmrg groups inner to this one. But do not push a
368310d565efSmrg start_memory for groups beyond the last one we can
368410d565efSmrg represent in the compiled pattern. */
368510d565efSmrg if (regnum <= MAX_REGNUM)
368610d565efSmrg {
368710d565efSmrg COMPILE_STACK_TOP.inner_group_offset = b
368810d565efSmrg - COMPILED_BUFFER_VAR + 2;
368910d565efSmrg BUF_PUSH_3 (start_memory, regnum, 0);
369010d565efSmrg }
369110d565efSmrg
369210d565efSmrg compile_stack.avail++;
369310d565efSmrg
369410d565efSmrg fixup_alt_jump = 0;
369510d565efSmrg laststart = 0;
369610d565efSmrg begalt = b;
369710d565efSmrg /* If we've reached MAX_REGNUM groups, then this open
369810d565efSmrg won't actually generate any code, so we'll have to
369910d565efSmrg clear pending_exact explicitly. */
370010d565efSmrg pending_exact = 0;
370110d565efSmrg break;
370210d565efSmrg
370310d565efSmrg
370410d565efSmrg case ')':
370510d565efSmrg if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
370610d565efSmrg
370710d565efSmrg if (COMPILE_STACK_EMPTY)
370810d565efSmrg {
370910d565efSmrg if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
371010d565efSmrg goto normal_backslash;
371110d565efSmrg else
371210d565efSmrg FREE_STACK_RETURN (REG_ERPAREN);
371310d565efSmrg }
371410d565efSmrg
371510d565efSmrg handle_close:
371610d565efSmrg if (fixup_alt_jump)
371710d565efSmrg { /* Push a dummy failure point at the end of the
371810d565efSmrg alternative for a possible future
371910d565efSmrg `pop_failure_jump' to pop. See comments at
372010d565efSmrg `push_dummy_failure' in `re_match_2'. */
372110d565efSmrg BUF_PUSH (push_dummy_failure);
372210d565efSmrg
372310d565efSmrg /* We allocated space for this jump when we assigned
372410d565efSmrg to `fixup_alt_jump', in the `handle_alt' case below. */
372510d565efSmrg STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
372610d565efSmrg }
372710d565efSmrg
372810d565efSmrg /* See similar code for backslashed left paren above. */
372910d565efSmrg if (COMPILE_STACK_EMPTY)
373010d565efSmrg {
373110d565efSmrg if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
373210d565efSmrg goto normal_char;
373310d565efSmrg else
373410d565efSmrg FREE_STACK_RETURN (REG_ERPAREN);
373510d565efSmrg }
373610d565efSmrg
373710d565efSmrg /* Since we just checked for an empty stack above, this
373810d565efSmrg ``can't happen''. */
373910d565efSmrg assert (compile_stack.avail != 0);
374010d565efSmrg {
374110d565efSmrg /* We don't just want to restore into `regnum', because
374210d565efSmrg later groups should continue to be numbered higher,
374310d565efSmrg as in `(ab)c(de)' -- the second group is #2. */
374410d565efSmrg regnum_t this_group_regnum;
374510d565efSmrg
374610d565efSmrg compile_stack.avail--;
374710d565efSmrg begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
374810d565efSmrg fixup_alt_jump
374910d565efSmrg = COMPILE_STACK_TOP.fixup_alt_jump
375010d565efSmrg ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
375110d565efSmrg : 0;
375210d565efSmrg laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
375310d565efSmrg this_group_regnum = COMPILE_STACK_TOP.regnum;
375410d565efSmrg /* If we've reached MAX_REGNUM groups, then this open
375510d565efSmrg won't actually generate any code, so we'll have to
375610d565efSmrg clear pending_exact explicitly. */
375710d565efSmrg pending_exact = 0;
375810d565efSmrg
375910d565efSmrg /* We're at the end of the group, so now we know how many
376010d565efSmrg groups were inside this one. */
376110d565efSmrg if (this_group_regnum <= MAX_REGNUM)
376210d565efSmrg {
376310d565efSmrg UCHAR_T *inner_group_loc
376410d565efSmrg = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
376510d565efSmrg
376610d565efSmrg *inner_group_loc = regnum - this_group_regnum;
376710d565efSmrg BUF_PUSH_3 (stop_memory, this_group_regnum,
376810d565efSmrg regnum - this_group_regnum);
376910d565efSmrg }
377010d565efSmrg }
377110d565efSmrg break;
377210d565efSmrg
377310d565efSmrg
377410d565efSmrg case '|': /* `\|'. */
377510d565efSmrg if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
377610d565efSmrg goto normal_backslash;
377710d565efSmrg handle_alt:
377810d565efSmrg if (syntax & RE_LIMITED_OPS)
377910d565efSmrg goto normal_char;
378010d565efSmrg
378110d565efSmrg /* Insert before the previous alternative a jump which
378210d565efSmrg jumps to this alternative if the former fails. */
378310d565efSmrg GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
378410d565efSmrg INSERT_JUMP (on_failure_jump, begalt,
378510d565efSmrg b + 2 + 2 * OFFSET_ADDRESS_SIZE);
378610d565efSmrg pending_exact = 0;
378710d565efSmrg b += 1 + OFFSET_ADDRESS_SIZE;
378810d565efSmrg
378910d565efSmrg /* The alternative before this one has a jump after it
379010d565efSmrg which gets executed if it gets matched. Adjust that
379110d565efSmrg jump so it will jump to this alternative's analogous
379210d565efSmrg jump (put in below, which in turn will jump to the next
379310d565efSmrg (if any) alternative's such jump, etc.). The last such
379410d565efSmrg jump jumps to the correct final destination. A picture:
379510d565efSmrg _____ _____
379610d565efSmrg | | | |
379710d565efSmrg | v | v
379810d565efSmrg a | b | c
379910d565efSmrg
380010d565efSmrg If we are at `b', then fixup_alt_jump right now points to a
380110d565efSmrg three-byte space after `a'. We'll put in the jump, set
380210d565efSmrg fixup_alt_jump to right after `b', and leave behind three
380310d565efSmrg bytes which we'll fill in when we get to after `c'. */
380410d565efSmrg
380510d565efSmrg if (fixup_alt_jump)
380610d565efSmrg STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
380710d565efSmrg
380810d565efSmrg /* Mark and leave space for a jump after this alternative,
380910d565efSmrg to be filled in later either by next alternative or
381010d565efSmrg when know we're at the end of a series of alternatives. */
381110d565efSmrg fixup_alt_jump = b;
381210d565efSmrg GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
381310d565efSmrg b += 1 + OFFSET_ADDRESS_SIZE;
381410d565efSmrg
381510d565efSmrg laststart = 0;
381610d565efSmrg begalt = b;
381710d565efSmrg break;
381810d565efSmrg
381910d565efSmrg
382010d565efSmrg case '{':
382110d565efSmrg /* If \{ is a literal. */
382210d565efSmrg if (!(syntax & RE_INTERVALS)
382310d565efSmrg /* If we're at `\{' and it's not the open-interval
382410d565efSmrg operator. */
382510d565efSmrg || (syntax & RE_NO_BK_BRACES))
382610d565efSmrg goto normal_backslash;
382710d565efSmrg
382810d565efSmrg handle_interval:
382910d565efSmrg {
383010d565efSmrg /* If got here, then the syntax allows intervals. */
383110d565efSmrg
383210d565efSmrg /* At least (most) this many matches must be made. */
383310d565efSmrg int lower_bound = -1, upper_bound = -1;
383410d565efSmrg
383510d565efSmrg /* Place in the uncompiled pattern (i.e., just after
383610d565efSmrg the '{') to go back to if the interval is invalid. */
383710d565efSmrg const CHAR_T *beg_interval = p;
383810d565efSmrg
383910d565efSmrg if (p == pend)
384010d565efSmrg goto invalid_interval;
384110d565efSmrg
384210d565efSmrg GET_UNSIGNED_NUMBER (lower_bound);
384310d565efSmrg
384410d565efSmrg if (c == ',')
384510d565efSmrg {
384610d565efSmrg GET_UNSIGNED_NUMBER (upper_bound);
384710d565efSmrg if (upper_bound < 0)
384810d565efSmrg upper_bound = RE_DUP_MAX;
384910d565efSmrg }
385010d565efSmrg else
385110d565efSmrg /* Interval such as `{1}' => match exactly once. */
385210d565efSmrg upper_bound = lower_bound;
385310d565efSmrg
385410d565efSmrg if (! (0 <= lower_bound && lower_bound <= upper_bound))
385510d565efSmrg goto invalid_interval;
385610d565efSmrg
385710d565efSmrg if (!(syntax & RE_NO_BK_BRACES))
385810d565efSmrg {
385910d565efSmrg if (c != '\\' || p == pend)
386010d565efSmrg goto invalid_interval;
386110d565efSmrg PATFETCH (c);
386210d565efSmrg }
386310d565efSmrg
386410d565efSmrg if (c != '}')
386510d565efSmrg goto invalid_interval;
386610d565efSmrg
386710d565efSmrg /* If it's invalid to have no preceding re. */
386810d565efSmrg if (!laststart)
386910d565efSmrg {
387010d565efSmrg if (syntax & RE_CONTEXT_INVALID_OPS
387110d565efSmrg && !(syntax & RE_INVALID_INTERVAL_ORD))
387210d565efSmrg FREE_STACK_RETURN (REG_BADRPT);
387310d565efSmrg else if (syntax & RE_CONTEXT_INDEP_OPS)
387410d565efSmrg laststart = b;
387510d565efSmrg else
387610d565efSmrg goto unfetch_interval;
387710d565efSmrg }
387810d565efSmrg
387910d565efSmrg /* We just parsed a valid interval. */
388010d565efSmrg
388110d565efSmrg if (RE_DUP_MAX < upper_bound)
388210d565efSmrg FREE_STACK_RETURN (REG_BADBR);
388310d565efSmrg
388410d565efSmrg /* If the upper bound is zero, don't want to succeed at
388510d565efSmrg all; jump from `laststart' to `b + 3', which will be
388610d565efSmrg the end of the buffer after we insert the jump. */
388710d565efSmrg /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
388810d565efSmrg instead of 'b + 3'. */
388910d565efSmrg if (upper_bound == 0)
389010d565efSmrg {
389110d565efSmrg GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
389210d565efSmrg INSERT_JUMP (jump, laststart, b + 1
389310d565efSmrg + OFFSET_ADDRESS_SIZE);
389410d565efSmrg b += 1 + OFFSET_ADDRESS_SIZE;
389510d565efSmrg }
389610d565efSmrg
389710d565efSmrg /* Otherwise, we have a nontrivial interval. When
389810d565efSmrg we're all done, the pattern will look like:
389910d565efSmrg set_number_at <jump count> <upper bound>
390010d565efSmrg set_number_at <succeed_n count> <lower bound>
390110d565efSmrg succeed_n <after jump addr> <succeed_n count>
390210d565efSmrg <body of loop>
390310d565efSmrg jump_n <succeed_n addr> <jump count>
390410d565efSmrg (The upper bound and `jump_n' are omitted if
390510d565efSmrg `upper_bound' is 1, though.) */
390610d565efSmrg else
390710d565efSmrg { /* If the upper bound is > 1, we need to insert
390810d565efSmrg more at the end of the loop. */
390910d565efSmrg unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
391010d565efSmrg (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
391110d565efSmrg
391210d565efSmrg GET_BUFFER_SPACE (nbytes);
391310d565efSmrg
391410d565efSmrg /* Initialize lower bound of the `succeed_n', even
391510d565efSmrg though it will be set during matching by its
391610d565efSmrg attendant `set_number_at' (inserted next),
391710d565efSmrg because `re_compile_fastmap' needs to know.
391810d565efSmrg Jump to the `jump_n' we might insert below. */
391910d565efSmrg INSERT_JUMP2 (succeed_n, laststart,
392010d565efSmrg b + 1 + 2 * OFFSET_ADDRESS_SIZE
392110d565efSmrg + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
392210d565efSmrg , lower_bound);
392310d565efSmrg b += 1 + 2 * OFFSET_ADDRESS_SIZE;
392410d565efSmrg
392510d565efSmrg /* Code to initialize the lower bound. Insert
392610d565efSmrg before the `succeed_n'. The `5' is the last two
392710d565efSmrg bytes of this `set_number_at', plus 3 bytes of
392810d565efSmrg the following `succeed_n'. */
392910d565efSmrg /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
393010d565efSmrg is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
393110d565efSmrg of the following `succeed_n'. */
393210d565efSmrg PREFIX(insert_op2) (set_number_at, laststart, 1
393310d565efSmrg + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
393410d565efSmrg b += 1 + 2 * OFFSET_ADDRESS_SIZE;
393510d565efSmrg
393610d565efSmrg if (upper_bound > 1)
393710d565efSmrg { /* More than one repetition is allowed, so
393810d565efSmrg append a backward jump to the `succeed_n'
393910d565efSmrg that starts this interval.
394010d565efSmrg
394110d565efSmrg When we've reached this during matching,
394210d565efSmrg we'll have matched the interval once, so
394310d565efSmrg jump back only `upper_bound - 1' times. */
394410d565efSmrg STORE_JUMP2 (jump_n, b, laststart
394510d565efSmrg + 2 * OFFSET_ADDRESS_SIZE + 1,
394610d565efSmrg upper_bound - 1);
394710d565efSmrg b += 1 + 2 * OFFSET_ADDRESS_SIZE;
394810d565efSmrg
394910d565efSmrg /* The location we want to set is the second
395010d565efSmrg parameter of the `jump_n'; that is `b-2' as
395110d565efSmrg an absolute address. `laststart' will be
395210d565efSmrg the `set_number_at' we're about to insert;
395310d565efSmrg `laststart+3' the number to set, the source
395410d565efSmrg for the relative address. But we are
395510d565efSmrg inserting into the middle of the pattern --
395610d565efSmrg so everything is getting moved up by 5.
395710d565efSmrg Conclusion: (b - 2) - (laststart + 3) + 5,
395810d565efSmrg i.e., b - laststart.
395910d565efSmrg
396010d565efSmrg We insert this at the beginning of the loop
396110d565efSmrg so that if we fail during matching, we'll
396210d565efSmrg reinitialize the bounds. */
396310d565efSmrg PREFIX(insert_op2) (set_number_at, laststart,
396410d565efSmrg b - laststart,
396510d565efSmrg upper_bound - 1, b);
396610d565efSmrg b += 1 + 2 * OFFSET_ADDRESS_SIZE;
396710d565efSmrg }
396810d565efSmrg }
396910d565efSmrg pending_exact = 0;
397010d565efSmrg break;
397110d565efSmrg
397210d565efSmrg invalid_interval:
397310d565efSmrg if (!(syntax & RE_INVALID_INTERVAL_ORD))
397410d565efSmrg FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
397510d565efSmrg unfetch_interval:
397610d565efSmrg /* Match the characters as literals. */
397710d565efSmrg p = beg_interval;
397810d565efSmrg c = '{';
397910d565efSmrg if (syntax & RE_NO_BK_BRACES)
398010d565efSmrg goto normal_char;
398110d565efSmrg else
398210d565efSmrg goto normal_backslash;
398310d565efSmrg }
398410d565efSmrg
398510d565efSmrg #ifdef emacs
398610d565efSmrg /* There is no way to specify the before_dot and after_dot
398710d565efSmrg operators. rms says this is ok. --karl */
398810d565efSmrg case '=':
398910d565efSmrg BUF_PUSH (at_dot);
399010d565efSmrg break;
399110d565efSmrg
399210d565efSmrg case 's':
399310d565efSmrg laststart = b;
399410d565efSmrg PATFETCH (c);
399510d565efSmrg BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
399610d565efSmrg break;
399710d565efSmrg
399810d565efSmrg case 'S':
399910d565efSmrg laststart = b;
400010d565efSmrg PATFETCH (c);
400110d565efSmrg BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
400210d565efSmrg break;
400310d565efSmrg #endif /* emacs */
400410d565efSmrg
400510d565efSmrg
400610d565efSmrg case 'w':
400710d565efSmrg if (syntax & RE_NO_GNU_OPS)
400810d565efSmrg goto normal_char;
400910d565efSmrg laststart = b;
401010d565efSmrg BUF_PUSH (wordchar);
401110d565efSmrg break;
401210d565efSmrg
401310d565efSmrg
401410d565efSmrg case 'W':
401510d565efSmrg if (syntax & RE_NO_GNU_OPS)
401610d565efSmrg goto normal_char;
401710d565efSmrg laststart = b;
401810d565efSmrg BUF_PUSH (notwordchar);
401910d565efSmrg break;
402010d565efSmrg
402110d565efSmrg
402210d565efSmrg case '<':
402310d565efSmrg if (syntax & RE_NO_GNU_OPS)
402410d565efSmrg goto normal_char;
402510d565efSmrg BUF_PUSH (wordbeg);
402610d565efSmrg break;
402710d565efSmrg
402810d565efSmrg case '>':
402910d565efSmrg if (syntax & RE_NO_GNU_OPS)
403010d565efSmrg goto normal_char;
403110d565efSmrg BUF_PUSH (wordend);
403210d565efSmrg break;
403310d565efSmrg
403410d565efSmrg case 'b':
403510d565efSmrg if (syntax & RE_NO_GNU_OPS)
403610d565efSmrg goto normal_char;
403710d565efSmrg BUF_PUSH (wordbound);
403810d565efSmrg break;
403910d565efSmrg
404010d565efSmrg case 'B':
404110d565efSmrg if (syntax & RE_NO_GNU_OPS)
404210d565efSmrg goto normal_char;
404310d565efSmrg BUF_PUSH (notwordbound);
404410d565efSmrg break;
404510d565efSmrg
404610d565efSmrg case '`':
404710d565efSmrg if (syntax & RE_NO_GNU_OPS)
404810d565efSmrg goto normal_char;
404910d565efSmrg BUF_PUSH (begbuf);
405010d565efSmrg break;
405110d565efSmrg
405210d565efSmrg case '\'':
405310d565efSmrg if (syntax & RE_NO_GNU_OPS)
405410d565efSmrg goto normal_char;
405510d565efSmrg BUF_PUSH (endbuf);
405610d565efSmrg break;
405710d565efSmrg
405810d565efSmrg case '1': case '2': case '3': case '4': case '5':
405910d565efSmrg case '6': case '7': case '8': case '9':
406010d565efSmrg if (syntax & RE_NO_BK_REFS)
406110d565efSmrg goto normal_char;
406210d565efSmrg
406310d565efSmrg c1 = c - '0';
406410d565efSmrg
406510d565efSmrg if (c1 > regnum)
406610d565efSmrg FREE_STACK_RETURN (REG_ESUBREG);
406710d565efSmrg
406810d565efSmrg /* Can't back reference to a subexpression if inside of it. */
406910d565efSmrg if (group_in_compile_stack (compile_stack, (regnum_t) c1))
407010d565efSmrg goto normal_char;
407110d565efSmrg
407210d565efSmrg laststart = b;
407310d565efSmrg BUF_PUSH_2 (duplicate, c1);
407410d565efSmrg break;
407510d565efSmrg
407610d565efSmrg
407710d565efSmrg case '+':
407810d565efSmrg case '?':
407910d565efSmrg if (syntax & RE_BK_PLUS_QM)
408010d565efSmrg goto handle_plus;
408110d565efSmrg else
408210d565efSmrg goto normal_backslash;
408310d565efSmrg
408410d565efSmrg default:
408510d565efSmrg normal_backslash:
408610d565efSmrg /* You might think it would be useful for \ to mean
408710d565efSmrg not to translate; but if we don't translate it
408810d565efSmrg it will never match anything. */
408910d565efSmrg c = TRANSLATE (c);
409010d565efSmrg goto normal_char;
409110d565efSmrg }
409210d565efSmrg break;
409310d565efSmrg
409410d565efSmrg
409510d565efSmrg default:
409610d565efSmrg /* Expects the character in `c'. */
409710d565efSmrg normal_char:
409810d565efSmrg /* If no exactn currently being built. */
409910d565efSmrg if (!pending_exact
410010d565efSmrg #ifdef WCHAR
410110d565efSmrg /* If last exactn handle binary(or character) and
410210d565efSmrg new exactn handle character(or binary). */
410310d565efSmrg || is_exactn_bin != is_binary[p - 1 - pattern]
410410d565efSmrg #endif /* WCHAR */
410510d565efSmrg
410610d565efSmrg /* If last exactn not at current position. */
410710d565efSmrg || pending_exact + *pending_exact + 1 != b
410810d565efSmrg
410910d565efSmrg /* We have only one byte following the exactn for the count. */
411010d565efSmrg || *pending_exact == (1 << BYTEWIDTH) - 1
411110d565efSmrg
411210d565efSmrg /* If followed by a repetition operator. */
411310d565efSmrg || *p == '*' || *p == '^'
411410d565efSmrg || ((syntax & RE_BK_PLUS_QM)
411510d565efSmrg ? *p == '\\' && (p[1] == '+' || p[1] == '?')
411610d565efSmrg : (*p == '+' || *p == '?'))
411710d565efSmrg || ((syntax & RE_INTERVALS)
411810d565efSmrg && ((syntax & RE_NO_BK_BRACES)
411910d565efSmrg ? *p == '{'
412010d565efSmrg : (p[0] == '\\' && p[1] == '{'))))
412110d565efSmrg {
412210d565efSmrg /* Start building a new exactn. */
412310d565efSmrg
412410d565efSmrg laststart = b;
412510d565efSmrg
412610d565efSmrg #ifdef WCHAR
412710d565efSmrg /* Is this exactn binary data or character? */
412810d565efSmrg is_exactn_bin = is_binary[p - 1 - pattern];
412910d565efSmrg if (is_exactn_bin)
413010d565efSmrg BUF_PUSH_2 (exactn_bin, 0);
413110d565efSmrg else
413210d565efSmrg BUF_PUSH_2 (exactn, 0);
413310d565efSmrg #else
413410d565efSmrg BUF_PUSH_2 (exactn, 0);
413510d565efSmrg #endif /* WCHAR */
413610d565efSmrg pending_exact = b - 1;
413710d565efSmrg }
413810d565efSmrg
413910d565efSmrg BUF_PUSH (c);
414010d565efSmrg (*pending_exact)++;
414110d565efSmrg break;
414210d565efSmrg } /* switch (c) */
414310d565efSmrg } /* while p != pend */
414410d565efSmrg
414510d565efSmrg
414610d565efSmrg /* Through the pattern now. */
414710d565efSmrg
414810d565efSmrg if (fixup_alt_jump)
414910d565efSmrg STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
415010d565efSmrg
415110d565efSmrg if (!COMPILE_STACK_EMPTY)
415210d565efSmrg FREE_STACK_RETURN (REG_EPAREN);
415310d565efSmrg
415410d565efSmrg /* If we don't want backtracking, force success
415510d565efSmrg the first time we reach the end of the compiled pattern. */
415610d565efSmrg if (syntax & RE_NO_POSIX_BACKTRACKING)
415710d565efSmrg BUF_PUSH (succeed);
415810d565efSmrg
415910d565efSmrg #ifdef WCHAR
416010d565efSmrg free (pattern);
416110d565efSmrg free (mbs_offset);
416210d565efSmrg free (is_binary);
416310d565efSmrg #endif
416410d565efSmrg free (compile_stack.stack);
416510d565efSmrg
416610d565efSmrg /* We have succeeded; set the length of the buffer. */
416710d565efSmrg #ifdef WCHAR
416810d565efSmrg bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
416910d565efSmrg #else
417010d565efSmrg bufp->used = b - bufp->buffer;
417110d565efSmrg #endif
417210d565efSmrg
417310d565efSmrg #ifdef DEBUG
417410d565efSmrg if (debug)
417510d565efSmrg {
417610d565efSmrg DEBUG_PRINT1 ("\nCompiled pattern: \n");
417710d565efSmrg PREFIX(print_compiled_pattern) (bufp);
417810d565efSmrg }
417910d565efSmrg #endif /* DEBUG */
418010d565efSmrg
418110d565efSmrg #ifndef MATCH_MAY_ALLOCATE
418210d565efSmrg /* Initialize the failure stack to the largest possible stack. This
418310d565efSmrg isn't necessary unless we're trying to avoid calling alloca in
418410d565efSmrg the search and match routines. */
418510d565efSmrg {
418610d565efSmrg int num_regs = bufp->re_nsub + 1;
418710d565efSmrg
418810d565efSmrg /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
418910d565efSmrg is strictly greater than re_max_failures, the largest possible stack
419010d565efSmrg is 2 * re_max_failures failure points. */
419110d565efSmrg if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
419210d565efSmrg {
419310d565efSmrg fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
419410d565efSmrg
419510d565efSmrg # ifdef emacs
419610d565efSmrg if (! fail_stack.stack)
419710d565efSmrg fail_stack.stack
419810d565efSmrg = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
419910d565efSmrg * sizeof (PREFIX(fail_stack_elt_t)));
420010d565efSmrg else
420110d565efSmrg fail_stack.stack
420210d565efSmrg = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
420310d565efSmrg (fail_stack.size
420410d565efSmrg * sizeof (PREFIX(fail_stack_elt_t))));
420510d565efSmrg # else /* not emacs */
420610d565efSmrg if (! fail_stack.stack)
420710d565efSmrg fail_stack.stack
420810d565efSmrg = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
420910d565efSmrg * sizeof (PREFIX(fail_stack_elt_t)));
421010d565efSmrg else
421110d565efSmrg fail_stack.stack
421210d565efSmrg = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
421310d565efSmrg (fail_stack.size
421410d565efSmrg * sizeof (PREFIX(fail_stack_elt_t))));
421510d565efSmrg # endif /* not emacs */
421610d565efSmrg }
421710d565efSmrg
421810d565efSmrg PREFIX(regex_grow_registers) (num_regs);
421910d565efSmrg }
422010d565efSmrg #endif /* not MATCH_MAY_ALLOCATE */
422110d565efSmrg
422210d565efSmrg return REG_NOERROR;
422310d565efSmrg } /* regex_compile */
422410d565efSmrg
422510d565efSmrg /* Subroutines for `regex_compile'. */
422610d565efSmrg
422710d565efSmrg /* Store OP at LOC followed by two-byte integer parameter ARG. */
422810d565efSmrg /* ifdef WCHAR, integer parameter is 1 wchar_t. */
422910d565efSmrg
423010d565efSmrg static void
PREFIX(store_op1)423110d565efSmrg PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
423210d565efSmrg {
423310d565efSmrg *loc = (UCHAR_T) op;
423410d565efSmrg STORE_NUMBER (loc + 1, arg);
423510d565efSmrg }
423610d565efSmrg
423710d565efSmrg
423810d565efSmrg /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
423910d565efSmrg /* ifdef WCHAR, integer parameter is 1 wchar_t. */
424010d565efSmrg
424110d565efSmrg static void
PREFIX(store_op2)424210d565efSmrg PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
424310d565efSmrg {
424410d565efSmrg *loc = (UCHAR_T) op;
424510d565efSmrg STORE_NUMBER (loc + 1, arg1);
424610d565efSmrg STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
424710d565efSmrg }
424810d565efSmrg
424910d565efSmrg
425010d565efSmrg /* Copy the bytes from LOC to END to open up three bytes of space at LOC
425110d565efSmrg for OP followed by two-byte integer parameter ARG. */
425210d565efSmrg /* ifdef WCHAR, integer parameter is 1 wchar_t. */
425310d565efSmrg
425410d565efSmrg static void
PREFIX(insert_op1)425510d565efSmrg PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
425610d565efSmrg {
425710d565efSmrg register UCHAR_T *pfrom = end;
425810d565efSmrg register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
425910d565efSmrg
426010d565efSmrg while (pfrom != loc)
426110d565efSmrg *--pto = *--pfrom;
426210d565efSmrg
426310d565efSmrg PREFIX(store_op1) (op, loc, arg);
426410d565efSmrg }
426510d565efSmrg
426610d565efSmrg
426710d565efSmrg /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
426810d565efSmrg /* ifdef WCHAR, integer parameter is 1 wchar_t. */
426910d565efSmrg
427010d565efSmrg static void
PREFIX(insert_op2)427110d565efSmrg PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
427210d565efSmrg int arg2, UCHAR_T *end)
427310d565efSmrg {
427410d565efSmrg register UCHAR_T *pfrom = end;
427510d565efSmrg register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
427610d565efSmrg
427710d565efSmrg while (pfrom != loc)
427810d565efSmrg *--pto = *--pfrom;
427910d565efSmrg
428010d565efSmrg PREFIX(store_op2) (op, loc, arg1, arg2);
428110d565efSmrg }
428210d565efSmrg
428310d565efSmrg
428410d565efSmrg /* P points to just after a ^ in PATTERN. Return true if that ^ comes
428510d565efSmrg after an alternative or a begin-subexpression. We assume there is at
428610d565efSmrg least one character before the ^. */
428710d565efSmrg
428810d565efSmrg static boolean
PREFIX(at_begline_loc_p)428910d565efSmrg PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
429010d565efSmrg reg_syntax_t syntax)
429110d565efSmrg {
429210d565efSmrg const CHAR_T *prev = p - 2;
429310d565efSmrg boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
429410d565efSmrg
429510d565efSmrg return
429610d565efSmrg /* After a subexpression? */
429710d565efSmrg (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
429810d565efSmrg /* After an alternative? */
429910d565efSmrg || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
430010d565efSmrg }
430110d565efSmrg
430210d565efSmrg
430310d565efSmrg /* The dual of at_begline_loc_p. This one is for $. We assume there is
430410d565efSmrg at least one character after the $, i.e., `P < PEND'. */
430510d565efSmrg
430610d565efSmrg static boolean
PREFIX(at_endline_loc_p)430710d565efSmrg PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
430810d565efSmrg reg_syntax_t syntax)
430910d565efSmrg {
431010d565efSmrg const CHAR_T *next = p;
431110d565efSmrg boolean next_backslash = *next == '\\';
431210d565efSmrg const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
431310d565efSmrg
431410d565efSmrg return
431510d565efSmrg /* Before a subexpression? */
431610d565efSmrg (syntax & RE_NO_BK_PARENS ? *next == ')'
431710d565efSmrg : next_backslash && next_next && *next_next == ')')
431810d565efSmrg /* Before an alternative? */
431910d565efSmrg || (syntax & RE_NO_BK_VBAR ? *next == '|'
432010d565efSmrg : next_backslash && next_next && *next_next == '|');
432110d565efSmrg }
432210d565efSmrg
432310d565efSmrg #else /* not INSIDE_RECURSION */
432410d565efSmrg
432510d565efSmrg /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
432610d565efSmrg false if it's not. */
432710d565efSmrg
432810d565efSmrg static boolean
group_in_compile_stack(compile_stack_type compile_stack,regnum_t regnum)432910d565efSmrg group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
433010d565efSmrg {
433110d565efSmrg int this_element;
433210d565efSmrg
433310d565efSmrg for (this_element = compile_stack.avail - 1;
433410d565efSmrg this_element >= 0;
433510d565efSmrg this_element--)
433610d565efSmrg if (compile_stack.stack[this_element].regnum == regnum)
433710d565efSmrg return true;
433810d565efSmrg
433910d565efSmrg return false;
434010d565efSmrg }
434110d565efSmrg #endif /* not INSIDE_RECURSION */
434210d565efSmrg
434310d565efSmrg #ifdef INSIDE_RECURSION
434410d565efSmrg
434510d565efSmrg #ifdef WCHAR
434610d565efSmrg /* This insert space, which size is "num", into the pattern at "loc".
434710d565efSmrg "end" must point the end of the allocated buffer. */
434810d565efSmrg static void
insert_space(int num,CHAR_T * loc,CHAR_T * end)434910d565efSmrg insert_space (int num, CHAR_T *loc, CHAR_T *end)
435010d565efSmrg {
435110d565efSmrg register CHAR_T *pto = end;
435210d565efSmrg register CHAR_T *pfrom = end - num;
435310d565efSmrg
435410d565efSmrg while (pfrom >= loc)
435510d565efSmrg *pto-- = *pfrom--;
435610d565efSmrg }
435710d565efSmrg #endif /* WCHAR */
435810d565efSmrg
435910d565efSmrg #ifdef WCHAR
436010d565efSmrg static reg_errcode_t
wcs_compile_range(CHAR_T range_start_char,const CHAR_T ** p_ptr,const CHAR_T * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,CHAR_T * b,CHAR_T * char_set)436110d565efSmrg wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
436210d565efSmrg const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
436310d565efSmrg reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
436410d565efSmrg {
436510d565efSmrg const CHAR_T *p = *p_ptr;
436610d565efSmrg CHAR_T range_start, range_end;
436710d565efSmrg reg_errcode_t ret;
436810d565efSmrg # ifdef _LIBC
436910d565efSmrg uint32_t nrules;
437010d565efSmrg uint32_t start_val, end_val;
437110d565efSmrg # endif
437210d565efSmrg if (p == pend)
437310d565efSmrg return REG_ERANGE;
437410d565efSmrg
437510d565efSmrg # ifdef _LIBC
437610d565efSmrg nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
437710d565efSmrg if (nrules != 0)
437810d565efSmrg {
437910d565efSmrg const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
438010d565efSmrg _NL_COLLATE_COLLSEQWC);
438110d565efSmrg const unsigned char *extra = (const unsigned char *)
438210d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
438310d565efSmrg
438410d565efSmrg if (range_start_char < -1)
438510d565efSmrg {
438610d565efSmrg /* range_start is a collating symbol. */
438710d565efSmrg int32_t *wextra;
438810d565efSmrg /* Retreive the index and get collation sequence value. */
438910d565efSmrg wextra = (int32_t*)(extra + char_set[-range_start_char]);
439010d565efSmrg start_val = wextra[1 + *wextra];
439110d565efSmrg }
439210d565efSmrg else
439310d565efSmrg start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
439410d565efSmrg
439510d565efSmrg end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
439610d565efSmrg
439710d565efSmrg /* Report an error if the range is empty and the syntax prohibits
439810d565efSmrg this. */
439910d565efSmrg ret = ((syntax & RE_NO_EMPTY_RANGES)
440010d565efSmrg && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
440110d565efSmrg
440210d565efSmrg /* Insert space to the end of the char_ranges. */
440310d565efSmrg insert_space(2, b - char_set[5] - 2, b - 1);
440410d565efSmrg *(b - char_set[5] - 2) = (wchar_t)start_val;
440510d565efSmrg *(b - char_set[5] - 1) = (wchar_t)end_val;
440610d565efSmrg char_set[4]++; /* ranges_index */
440710d565efSmrg }
440810d565efSmrg else
440910d565efSmrg # endif
441010d565efSmrg {
441110d565efSmrg range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
441210d565efSmrg range_start_char;
441310d565efSmrg range_end = TRANSLATE (p[0]);
441410d565efSmrg /* Report an error if the range is empty and the syntax prohibits
441510d565efSmrg this. */
441610d565efSmrg ret = ((syntax & RE_NO_EMPTY_RANGES)
441710d565efSmrg && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
441810d565efSmrg
441910d565efSmrg /* Insert space to the end of the char_ranges. */
442010d565efSmrg insert_space(2, b - char_set[5] - 2, b - 1);
442110d565efSmrg *(b - char_set[5] - 2) = range_start;
442210d565efSmrg *(b - char_set[5] - 1) = range_end;
442310d565efSmrg char_set[4]++; /* ranges_index */
442410d565efSmrg }
442510d565efSmrg /* Have to increment the pointer into the pattern string, so the
442610d565efSmrg caller isn't still at the ending character. */
442710d565efSmrg (*p_ptr)++;
442810d565efSmrg
442910d565efSmrg return ret;
443010d565efSmrg }
443110d565efSmrg #else /* BYTE */
443210d565efSmrg /* Read the ending character of a range (in a bracket expression) from the
443310d565efSmrg uncompiled pattern *P_PTR (which ends at PEND). We assume the
443410d565efSmrg starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
443510d565efSmrg Then we set the translation of all bits between the starting and
443610d565efSmrg ending characters (inclusive) in the compiled pattern B.
443710d565efSmrg
443810d565efSmrg Return an error code.
443910d565efSmrg
444010d565efSmrg We use these short variable names so we can use the same macros as
444110d565efSmrg `regex_compile' itself. */
444210d565efSmrg
444310d565efSmrg static reg_errcode_t
byte_compile_range(unsigned int range_start_char,const char ** p_ptr,const char * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,unsigned char * b)444410d565efSmrg byte_compile_range (unsigned int range_start_char, const char **p_ptr,
444510d565efSmrg const char *pend, RE_TRANSLATE_TYPE translate,
444610d565efSmrg reg_syntax_t syntax, unsigned char *b)
444710d565efSmrg {
444810d565efSmrg unsigned this_char;
444910d565efSmrg const char *p = *p_ptr;
445010d565efSmrg reg_errcode_t ret;
445110d565efSmrg # if _LIBC
445210d565efSmrg const unsigned char *collseq;
445310d565efSmrg unsigned int start_colseq;
445410d565efSmrg unsigned int end_colseq;
445510d565efSmrg # else
445610d565efSmrg unsigned end_char;
445710d565efSmrg # endif
445810d565efSmrg
445910d565efSmrg if (p == pend)
446010d565efSmrg return REG_ERANGE;
446110d565efSmrg
446210d565efSmrg /* Have to increment the pointer into the pattern string, so the
446310d565efSmrg caller isn't still at the ending character. */
446410d565efSmrg (*p_ptr)++;
446510d565efSmrg
446610d565efSmrg /* Report an error if the range is empty and the syntax prohibits this. */
446710d565efSmrg ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
446810d565efSmrg
446910d565efSmrg # if _LIBC
447010d565efSmrg collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
447110d565efSmrg _NL_COLLATE_COLLSEQMB);
447210d565efSmrg
447310d565efSmrg start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
447410d565efSmrg end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
447510d565efSmrg for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
447610d565efSmrg {
447710d565efSmrg unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
447810d565efSmrg
447910d565efSmrg if (start_colseq <= this_colseq && this_colseq <= end_colseq)
448010d565efSmrg {
448110d565efSmrg SET_LIST_BIT (TRANSLATE (this_char));
448210d565efSmrg ret = REG_NOERROR;
448310d565efSmrg }
448410d565efSmrg }
448510d565efSmrg # else
448610d565efSmrg /* Here we see why `this_char' has to be larger than an `unsigned
448710d565efSmrg char' -- we would otherwise go into an infinite loop, since all
448810d565efSmrg characters <= 0xff. */
448910d565efSmrg range_start_char = TRANSLATE (range_start_char);
449010d565efSmrg /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
449110d565efSmrg and some compilers cast it to int implicitly, so following for_loop
449210d565efSmrg may fall to (almost) infinite loop.
449310d565efSmrg e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
449410d565efSmrg To avoid this, we cast p[0] to unsigned int and truncate it. */
449510d565efSmrg end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
449610d565efSmrg
449710d565efSmrg for (this_char = range_start_char; this_char <= end_char; ++this_char)
449810d565efSmrg {
449910d565efSmrg SET_LIST_BIT (TRANSLATE (this_char));
450010d565efSmrg ret = REG_NOERROR;
450110d565efSmrg }
450210d565efSmrg # endif
450310d565efSmrg
450410d565efSmrg return ret;
450510d565efSmrg }
450610d565efSmrg #endif /* WCHAR */
450710d565efSmrg
450810d565efSmrg /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
450910d565efSmrg BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
451010d565efSmrg characters can start a string that matches the pattern. This fastmap
451110d565efSmrg is used by re_search to skip quickly over impossible starting points.
451210d565efSmrg
451310d565efSmrg The caller must supply the address of a (1 << BYTEWIDTH)-byte data
451410d565efSmrg area as BUFP->fastmap.
451510d565efSmrg
451610d565efSmrg We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
451710d565efSmrg the pattern buffer.
451810d565efSmrg
451910d565efSmrg Returns 0 if we succeed, -2 if an internal error. */
452010d565efSmrg
452110d565efSmrg #ifdef WCHAR
452210d565efSmrg /* local function for re_compile_fastmap.
452310d565efSmrg truncate wchar_t character to char. */
452410d565efSmrg static unsigned char truncate_wchar (CHAR_T c);
452510d565efSmrg
452610d565efSmrg static unsigned char
truncate_wchar(CHAR_T c)452710d565efSmrg truncate_wchar (CHAR_T c)
452810d565efSmrg {
452910d565efSmrg unsigned char buf[MB_CUR_MAX];
453010d565efSmrg mbstate_t state;
453110d565efSmrg int retval;
453210d565efSmrg memset (&state, '\0', sizeof (state));
453310d565efSmrg # ifdef _LIBC
453410d565efSmrg retval = __wcrtomb (buf, c, &state);
453510d565efSmrg # else
453610d565efSmrg retval = wcrtomb (buf, c, &state);
453710d565efSmrg # endif
453810d565efSmrg return retval > 0 ? buf[0] : (unsigned char) c;
453910d565efSmrg }
454010d565efSmrg #endif /* WCHAR */
454110d565efSmrg
454210d565efSmrg static int
PREFIX(re_compile_fastmap)454310d565efSmrg PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
454410d565efSmrg {
454510d565efSmrg int j, k;
454610d565efSmrg #ifdef MATCH_MAY_ALLOCATE
454710d565efSmrg PREFIX(fail_stack_type) fail_stack;
454810d565efSmrg #endif
454910d565efSmrg #ifndef REGEX_MALLOC
455010d565efSmrg char *destination;
455110d565efSmrg #endif
455210d565efSmrg
455310d565efSmrg register char *fastmap = bufp->fastmap;
455410d565efSmrg
455510d565efSmrg #ifdef WCHAR
455610d565efSmrg /* We need to cast pattern to (wchar_t*), because we casted this compiled
455710d565efSmrg pattern to (char*) in regex_compile. */
455810d565efSmrg UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
455910d565efSmrg register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
456010d565efSmrg #else /* BYTE */
456110d565efSmrg UCHAR_T *pattern = bufp->buffer;
456210d565efSmrg register UCHAR_T *pend = pattern + bufp->used;
456310d565efSmrg #endif /* WCHAR */
456410d565efSmrg UCHAR_T *p = pattern;
456510d565efSmrg
456610d565efSmrg #ifdef REL_ALLOC
456710d565efSmrg /* This holds the pointer to the failure stack, when
456810d565efSmrg it is allocated relocatably. */
456910d565efSmrg fail_stack_elt_t *failure_stack_ptr;
457010d565efSmrg #endif
457110d565efSmrg
457210d565efSmrg /* Assume that each path through the pattern can be null until
457310d565efSmrg proven otherwise. We set this false at the bottom of switch
457410d565efSmrg statement, to which we get only if a particular path doesn't
457510d565efSmrg match the empty string. */
457610d565efSmrg boolean path_can_be_null = true;
457710d565efSmrg
457810d565efSmrg /* We aren't doing a `succeed_n' to begin with. */
457910d565efSmrg boolean succeed_n_p = false;
458010d565efSmrg
458110d565efSmrg assert (fastmap != NULL && p != NULL);
458210d565efSmrg
458310d565efSmrg INIT_FAIL_STACK ();
458410d565efSmrg bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
458510d565efSmrg bufp->fastmap_accurate = 1; /* It will be when we're done. */
458610d565efSmrg bufp->can_be_null = 0;
458710d565efSmrg
458810d565efSmrg while (1)
458910d565efSmrg {
459010d565efSmrg if (p == pend || *p == (UCHAR_T) succeed)
459110d565efSmrg {
459210d565efSmrg /* We have reached the (effective) end of pattern. */
459310d565efSmrg if (!FAIL_STACK_EMPTY ())
459410d565efSmrg {
459510d565efSmrg bufp->can_be_null |= path_can_be_null;
459610d565efSmrg
459710d565efSmrg /* Reset for next path. */
459810d565efSmrg path_can_be_null = true;
459910d565efSmrg
460010d565efSmrg p = fail_stack.stack[--fail_stack.avail].pointer;
460110d565efSmrg
460210d565efSmrg continue;
460310d565efSmrg }
460410d565efSmrg else
460510d565efSmrg break;
460610d565efSmrg }
460710d565efSmrg
460810d565efSmrg /* We should never be about to go beyond the end of the pattern. */
460910d565efSmrg assert (p < pend);
461010d565efSmrg
461110d565efSmrg switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
461210d565efSmrg {
461310d565efSmrg
461410d565efSmrg /* I guess the idea here is to simply not bother with a fastmap
461510d565efSmrg if a backreference is used, since it's too hard to figure out
461610d565efSmrg the fastmap for the corresponding group. Setting
461710d565efSmrg `can_be_null' stops `re_search_2' from using the fastmap, so
461810d565efSmrg that is all we do. */
461910d565efSmrg case duplicate:
462010d565efSmrg bufp->can_be_null = 1;
462110d565efSmrg goto done;
462210d565efSmrg
462310d565efSmrg
462410d565efSmrg /* Following are the cases which match a character. These end
462510d565efSmrg with `break'. */
462610d565efSmrg
462710d565efSmrg #ifdef WCHAR
462810d565efSmrg case exactn:
462910d565efSmrg fastmap[truncate_wchar(p[1])] = 1;
463010d565efSmrg break;
463110d565efSmrg #else /* BYTE */
463210d565efSmrg case exactn:
463310d565efSmrg fastmap[p[1]] = 1;
463410d565efSmrg break;
463510d565efSmrg #endif /* WCHAR */
463610d565efSmrg #ifdef MBS_SUPPORT
463710d565efSmrg case exactn_bin:
463810d565efSmrg fastmap[p[1]] = 1;
463910d565efSmrg break;
464010d565efSmrg #endif
464110d565efSmrg
464210d565efSmrg #ifdef WCHAR
464310d565efSmrg /* It is hard to distinguish fastmap from (multi byte) characters
464410d565efSmrg which depends on current locale. */
464510d565efSmrg case charset:
464610d565efSmrg case charset_not:
464710d565efSmrg case wordchar:
464810d565efSmrg case notwordchar:
464910d565efSmrg bufp->can_be_null = 1;
465010d565efSmrg goto done;
465110d565efSmrg #else /* BYTE */
465210d565efSmrg case charset:
465310d565efSmrg for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
465410d565efSmrg if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
465510d565efSmrg fastmap[j] = 1;
465610d565efSmrg break;
465710d565efSmrg
465810d565efSmrg
465910d565efSmrg case charset_not:
466010d565efSmrg /* Chars beyond end of map must be allowed. */
466110d565efSmrg for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
466210d565efSmrg fastmap[j] = 1;
466310d565efSmrg
466410d565efSmrg for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
466510d565efSmrg if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
466610d565efSmrg fastmap[j] = 1;
466710d565efSmrg break;
466810d565efSmrg
466910d565efSmrg
467010d565efSmrg case wordchar:
467110d565efSmrg for (j = 0; j < (1 << BYTEWIDTH); j++)
467210d565efSmrg if (SYNTAX (j) == Sword)
467310d565efSmrg fastmap[j] = 1;
467410d565efSmrg break;
467510d565efSmrg
467610d565efSmrg
467710d565efSmrg case notwordchar:
467810d565efSmrg for (j = 0; j < (1 << BYTEWIDTH); j++)
467910d565efSmrg if (SYNTAX (j) != Sword)
468010d565efSmrg fastmap[j] = 1;
468110d565efSmrg break;
468210d565efSmrg #endif /* WCHAR */
468310d565efSmrg
468410d565efSmrg case anychar:
468510d565efSmrg {
468610d565efSmrg int fastmap_newline = fastmap['\n'];
468710d565efSmrg
468810d565efSmrg /* `.' matches anything ... */
468910d565efSmrg for (j = 0; j < (1 << BYTEWIDTH); j++)
469010d565efSmrg fastmap[j] = 1;
469110d565efSmrg
469210d565efSmrg /* ... except perhaps newline. */
469310d565efSmrg if (!(bufp->syntax & RE_DOT_NEWLINE))
469410d565efSmrg fastmap['\n'] = fastmap_newline;
469510d565efSmrg
469610d565efSmrg /* Return if we have already set `can_be_null'; if we have,
469710d565efSmrg then the fastmap is irrelevant. Something's wrong here. */
469810d565efSmrg else if (bufp->can_be_null)
469910d565efSmrg goto done;
470010d565efSmrg
470110d565efSmrg /* Otherwise, have to check alternative paths. */
470210d565efSmrg break;
470310d565efSmrg }
470410d565efSmrg
470510d565efSmrg #ifdef emacs
470610d565efSmrg case syntaxspec:
470710d565efSmrg k = *p++;
470810d565efSmrg for (j = 0; j < (1 << BYTEWIDTH); j++)
470910d565efSmrg if (SYNTAX (j) == (enum syntaxcode) k)
471010d565efSmrg fastmap[j] = 1;
471110d565efSmrg break;
471210d565efSmrg
471310d565efSmrg
471410d565efSmrg case notsyntaxspec:
471510d565efSmrg k = *p++;
471610d565efSmrg for (j = 0; j < (1 << BYTEWIDTH); j++)
471710d565efSmrg if (SYNTAX (j) != (enum syntaxcode) k)
471810d565efSmrg fastmap[j] = 1;
471910d565efSmrg break;
472010d565efSmrg
472110d565efSmrg
472210d565efSmrg /* All cases after this match the empty string. These end with
472310d565efSmrg `continue'. */
472410d565efSmrg
472510d565efSmrg
472610d565efSmrg case before_dot:
472710d565efSmrg case at_dot:
472810d565efSmrg case after_dot:
472910d565efSmrg continue;
473010d565efSmrg #endif /* emacs */
473110d565efSmrg
473210d565efSmrg
473310d565efSmrg case no_op:
473410d565efSmrg case begline:
473510d565efSmrg case endline:
473610d565efSmrg case begbuf:
473710d565efSmrg case endbuf:
473810d565efSmrg case wordbound:
473910d565efSmrg case notwordbound:
474010d565efSmrg case wordbeg:
474110d565efSmrg case wordend:
474210d565efSmrg case push_dummy_failure:
474310d565efSmrg continue;
474410d565efSmrg
474510d565efSmrg
474610d565efSmrg case jump_n:
474710d565efSmrg case pop_failure_jump:
474810d565efSmrg case maybe_pop_jump:
474910d565efSmrg case jump:
475010d565efSmrg case jump_past_alt:
475110d565efSmrg case dummy_failure_jump:
475210d565efSmrg EXTRACT_NUMBER_AND_INCR (j, p);
475310d565efSmrg p += j;
475410d565efSmrg if (j > 0)
475510d565efSmrg continue;
475610d565efSmrg
475710d565efSmrg /* Jump backward implies we just went through the body of a
475810d565efSmrg loop and matched nothing. Opcode jumped to should be
475910d565efSmrg `on_failure_jump' or `succeed_n'. Just treat it like an
476010d565efSmrg ordinary jump. For a * loop, it has pushed its failure
476110d565efSmrg point already; if so, discard that as redundant. */
476210d565efSmrg if ((re_opcode_t) *p != on_failure_jump
476310d565efSmrg && (re_opcode_t) *p != succeed_n)
476410d565efSmrg continue;
476510d565efSmrg
476610d565efSmrg p++;
476710d565efSmrg EXTRACT_NUMBER_AND_INCR (j, p);
476810d565efSmrg p += j;
476910d565efSmrg
477010d565efSmrg /* If what's on the stack is where we are now, pop it. */
477110d565efSmrg if (!FAIL_STACK_EMPTY ()
477210d565efSmrg && fail_stack.stack[fail_stack.avail - 1].pointer == p)
477310d565efSmrg fail_stack.avail--;
477410d565efSmrg
477510d565efSmrg continue;
477610d565efSmrg
477710d565efSmrg
477810d565efSmrg case on_failure_jump:
477910d565efSmrg case on_failure_keep_string_jump:
478010d565efSmrg handle_on_failure_jump:
478110d565efSmrg EXTRACT_NUMBER_AND_INCR (j, p);
478210d565efSmrg
478310d565efSmrg /* For some patterns, e.g., `(a?)?', `p+j' here points to the
478410d565efSmrg end of the pattern. We don't want to push such a point,
478510d565efSmrg since when we restore it above, entering the switch will
478610d565efSmrg increment `p' past the end of the pattern. We don't need
478710d565efSmrg to push such a point since we obviously won't find any more
478810d565efSmrg fastmap entries beyond `pend'. Such a pattern can match
478910d565efSmrg the null string, though. */
479010d565efSmrg if (p + j < pend)
479110d565efSmrg {
479210d565efSmrg if (!PUSH_PATTERN_OP (p + j, fail_stack))
479310d565efSmrg {
479410d565efSmrg RESET_FAIL_STACK ();
479510d565efSmrg return -2;
479610d565efSmrg }
479710d565efSmrg }
479810d565efSmrg else
479910d565efSmrg bufp->can_be_null = 1;
480010d565efSmrg
480110d565efSmrg if (succeed_n_p)
480210d565efSmrg {
480310d565efSmrg EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
480410d565efSmrg succeed_n_p = false;
480510d565efSmrg }
480610d565efSmrg
480710d565efSmrg continue;
480810d565efSmrg
480910d565efSmrg
481010d565efSmrg case succeed_n:
481110d565efSmrg /* Get to the number of times to succeed. */
481210d565efSmrg p += OFFSET_ADDRESS_SIZE;
481310d565efSmrg
481410d565efSmrg /* Increment p past the n for when k != 0. */
481510d565efSmrg EXTRACT_NUMBER_AND_INCR (k, p);
481610d565efSmrg if (k == 0)
481710d565efSmrg {
481810d565efSmrg p -= 2 * OFFSET_ADDRESS_SIZE;
481910d565efSmrg succeed_n_p = true; /* Spaghetti code alert. */
482010d565efSmrg goto handle_on_failure_jump;
482110d565efSmrg }
482210d565efSmrg continue;
482310d565efSmrg
482410d565efSmrg
482510d565efSmrg case set_number_at:
482610d565efSmrg p += 2 * OFFSET_ADDRESS_SIZE;
482710d565efSmrg continue;
482810d565efSmrg
482910d565efSmrg
483010d565efSmrg case start_memory:
483110d565efSmrg case stop_memory:
483210d565efSmrg p += 2;
483310d565efSmrg continue;
483410d565efSmrg
483510d565efSmrg
483610d565efSmrg default:
483710d565efSmrg abort (); /* We have listed all the cases. */
483810d565efSmrg } /* switch *p++ */
483910d565efSmrg
484010d565efSmrg /* Getting here means we have found the possible starting
484110d565efSmrg characters for one path of the pattern -- and that the empty
484210d565efSmrg string does not match. We need not follow this path further.
484310d565efSmrg Instead, look at the next alternative (remembered on the
484410d565efSmrg stack), or quit if no more. The test at the top of the loop
484510d565efSmrg does these things. */
484610d565efSmrg path_can_be_null = false;
484710d565efSmrg p = pend;
484810d565efSmrg } /* while p */
484910d565efSmrg
485010d565efSmrg /* Set `can_be_null' for the last path (also the first path, if the
485110d565efSmrg pattern is empty). */
485210d565efSmrg bufp->can_be_null |= path_can_be_null;
485310d565efSmrg
485410d565efSmrg done:
485510d565efSmrg RESET_FAIL_STACK ();
485610d565efSmrg return 0;
485710d565efSmrg }
485810d565efSmrg
485910d565efSmrg #else /* not INSIDE_RECURSION */
486010d565efSmrg
486110d565efSmrg int
re_compile_fastmap(struct re_pattern_buffer * bufp)486210d565efSmrg re_compile_fastmap (struct re_pattern_buffer *bufp)
486310d565efSmrg {
486410d565efSmrg # ifdef MBS_SUPPORT
486510d565efSmrg if (MB_CUR_MAX != 1)
486610d565efSmrg return wcs_re_compile_fastmap(bufp);
486710d565efSmrg else
486810d565efSmrg # endif
486910d565efSmrg return byte_re_compile_fastmap(bufp);
487010d565efSmrg } /* re_compile_fastmap */
487110d565efSmrg #ifdef _LIBC
weak_alias(__re_compile_fastmap,re_compile_fastmap)487210d565efSmrg weak_alias (__re_compile_fastmap, re_compile_fastmap)
487310d565efSmrg #endif
487410d565efSmrg
487510d565efSmrg
487610d565efSmrg /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
487710d565efSmrg ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
487810d565efSmrg this memory for recording register information. STARTS and ENDS
487910d565efSmrg must be allocated using the malloc library routine, and must each
488010d565efSmrg be at least NUM_REGS * sizeof (regoff_t) bytes long.
488110d565efSmrg
488210d565efSmrg If NUM_REGS == 0, then subsequent matches should allocate their own
488310d565efSmrg register data.
488410d565efSmrg
488510d565efSmrg Unless this function is called, the first search or match using
488610d565efSmrg PATTERN_BUFFER will allocate its own register data, without
488710d565efSmrg freeing the old data. */
488810d565efSmrg
488910d565efSmrg void
489010d565efSmrg re_set_registers (struct re_pattern_buffer *bufp,
489110d565efSmrg struct re_registers *regs, unsigned num_regs,
489210d565efSmrg regoff_t *starts, regoff_t *ends)
489310d565efSmrg {
489410d565efSmrg if (num_regs)
489510d565efSmrg {
489610d565efSmrg bufp->regs_allocated = REGS_REALLOCATE;
489710d565efSmrg regs->num_regs = num_regs;
489810d565efSmrg regs->start = starts;
489910d565efSmrg regs->end = ends;
490010d565efSmrg }
490110d565efSmrg else
490210d565efSmrg {
490310d565efSmrg bufp->regs_allocated = REGS_UNALLOCATED;
490410d565efSmrg regs->num_regs = 0;
490510d565efSmrg regs->start = regs->end = (regoff_t *) 0;
490610d565efSmrg }
490710d565efSmrg }
490810d565efSmrg #ifdef _LIBC
weak_alias(__re_set_registers,re_set_registers)490910d565efSmrg weak_alias (__re_set_registers, re_set_registers)
491010d565efSmrg #endif
491110d565efSmrg
491210d565efSmrg /* Searching routines. */
491310d565efSmrg
491410d565efSmrg /* Like re_search_2, below, but only one string is specified, and
491510d565efSmrg doesn't let you say where to stop matching. */
491610d565efSmrg
491710d565efSmrg int
491810d565efSmrg re_search (struct re_pattern_buffer *bufp, const char *string, int size,
491910d565efSmrg int startpos, int range, struct re_registers *regs)
492010d565efSmrg {
492110d565efSmrg return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
492210d565efSmrg regs, size);
492310d565efSmrg }
492410d565efSmrg #ifdef _LIBC
weak_alias(__re_search,re_search)492510d565efSmrg weak_alias (__re_search, re_search)
492610d565efSmrg #endif
492710d565efSmrg
492810d565efSmrg
492910d565efSmrg /* Using the compiled pattern in BUFP->buffer, first tries to match the
493010d565efSmrg virtual concatenation of STRING1 and STRING2, starting first at index
493110d565efSmrg STARTPOS, then at STARTPOS + 1, and so on.
493210d565efSmrg
493310d565efSmrg STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
493410d565efSmrg
493510d565efSmrg RANGE is how far to scan while trying to match. RANGE = 0 means try
493610d565efSmrg only at STARTPOS; in general, the last start tried is STARTPOS +
493710d565efSmrg RANGE.
493810d565efSmrg
493910d565efSmrg In REGS, return the indices of the virtual concatenation of STRING1
494010d565efSmrg and STRING2 that matched the entire BUFP->buffer and its contained
494110d565efSmrg subexpressions.
494210d565efSmrg
494310d565efSmrg Do not consider matching one past the index STOP in the virtual
494410d565efSmrg concatenation of STRING1 and STRING2.
494510d565efSmrg
494610d565efSmrg We return either the position in the strings at which the match was
494710d565efSmrg found, -1 if no match, or -2 if error (such as failure
494810d565efSmrg stack overflow). */
494910d565efSmrg
495010d565efSmrg int
495110d565efSmrg re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
495210d565efSmrg const char *string2, int size2, int startpos, int range,
495310d565efSmrg struct re_registers *regs, int stop)
495410d565efSmrg {
495510d565efSmrg # ifdef MBS_SUPPORT
495610d565efSmrg if (MB_CUR_MAX != 1)
495710d565efSmrg return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
495810d565efSmrg range, regs, stop);
495910d565efSmrg else
496010d565efSmrg # endif
496110d565efSmrg return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
496210d565efSmrg range, regs, stop);
496310d565efSmrg } /* re_search_2 */
496410d565efSmrg #ifdef _LIBC
weak_alias(__re_search_2,re_search_2)496510d565efSmrg weak_alias (__re_search_2, re_search_2)
496610d565efSmrg #endif
496710d565efSmrg
496810d565efSmrg #endif /* not INSIDE_RECURSION */
496910d565efSmrg
497010d565efSmrg #ifdef INSIDE_RECURSION
497110d565efSmrg
497210d565efSmrg #ifdef MATCH_MAY_ALLOCATE
497310d565efSmrg # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
497410d565efSmrg #else
497510d565efSmrg # define FREE_VAR(var) free (var); var = NULL
497610d565efSmrg #endif
497710d565efSmrg
497810d565efSmrg #ifdef WCHAR
497910d565efSmrg # define MAX_ALLOCA_SIZE 2000
498010d565efSmrg
498110d565efSmrg # define FREE_WCS_BUFFERS() \
498210d565efSmrg do { \
498310d565efSmrg if (size1 > MAX_ALLOCA_SIZE) \
498410d565efSmrg { \
498510d565efSmrg free (wcs_string1); \
498610d565efSmrg free (mbs_offset1); \
498710d565efSmrg } \
498810d565efSmrg else \
498910d565efSmrg { \
499010d565efSmrg FREE_VAR (wcs_string1); \
499110d565efSmrg FREE_VAR (mbs_offset1); \
499210d565efSmrg } \
499310d565efSmrg if (size2 > MAX_ALLOCA_SIZE) \
499410d565efSmrg { \
499510d565efSmrg free (wcs_string2); \
499610d565efSmrg free (mbs_offset2); \
499710d565efSmrg } \
499810d565efSmrg else \
499910d565efSmrg { \
500010d565efSmrg FREE_VAR (wcs_string2); \
500110d565efSmrg FREE_VAR (mbs_offset2); \
500210d565efSmrg } \
500310d565efSmrg } while (0)
500410d565efSmrg
500510d565efSmrg #endif
500610d565efSmrg
500710d565efSmrg
500810d565efSmrg static int
500910d565efSmrg PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
501010d565efSmrg int size1, const char *string2, int size2,
501110d565efSmrg int startpos, int range,
501210d565efSmrg struct re_registers *regs, int stop)
501310d565efSmrg {
501410d565efSmrg int val;
501510d565efSmrg register char *fastmap = bufp->fastmap;
501610d565efSmrg register RE_TRANSLATE_TYPE translate = bufp->translate;
501710d565efSmrg int total_size = size1 + size2;
501810d565efSmrg int endpos = startpos + range;
501910d565efSmrg #ifdef WCHAR
502010d565efSmrg /* We need wchar_t* buffers correspond to cstring1, cstring2. */
502110d565efSmrg wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
502210d565efSmrg /* We need the size of wchar_t buffers correspond to csize1, csize2. */
502310d565efSmrg int wcs_size1 = 0, wcs_size2 = 0;
502410d565efSmrg /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
502510d565efSmrg int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
502610d565efSmrg /* They hold whether each wchar_t is binary data or not. */
502710d565efSmrg char *is_binary = NULL;
502810d565efSmrg #endif /* WCHAR */
502910d565efSmrg
503010d565efSmrg /* Check for out-of-range STARTPOS. */
503110d565efSmrg if (startpos < 0 || startpos > total_size)
503210d565efSmrg return -1;
503310d565efSmrg
503410d565efSmrg /* Fix up RANGE if it might eventually take us outside
503510d565efSmrg the virtual concatenation of STRING1 and STRING2.
503610d565efSmrg Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
503710d565efSmrg if (endpos < 0)
503810d565efSmrg range = 0 - startpos;
503910d565efSmrg else if (endpos > total_size)
504010d565efSmrg range = total_size - startpos;
504110d565efSmrg
504210d565efSmrg /* If the search isn't to be a backwards one, don't waste time in a
504310d565efSmrg search for a pattern that must be anchored. */
504410d565efSmrg if (bufp->used > 0 && range > 0
504510d565efSmrg && ((re_opcode_t) bufp->buffer[0] == begbuf
504610d565efSmrg /* `begline' is like `begbuf' if it cannot match at newlines. */
504710d565efSmrg || ((re_opcode_t) bufp->buffer[0] == begline
504810d565efSmrg && !bufp->newline_anchor)))
504910d565efSmrg {
505010d565efSmrg if (startpos > 0)
505110d565efSmrg return -1;
505210d565efSmrg else
505310d565efSmrg range = 1;
505410d565efSmrg }
505510d565efSmrg
505610d565efSmrg #ifdef emacs
505710d565efSmrg /* In a forward search for something that starts with \=.
505810d565efSmrg don't keep searching past point. */
505910d565efSmrg if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
506010d565efSmrg {
506110d565efSmrg range = PT - startpos;
506210d565efSmrg if (range <= 0)
506310d565efSmrg return -1;
506410d565efSmrg }
506510d565efSmrg #endif /* emacs */
506610d565efSmrg
506710d565efSmrg /* Update the fastmap now if not correct already. */
506810d565efSmrg if (fastmap && !bufp->fastmap_accurate)
506910d565efSmrg if (re_compile_fastmap (bufp) == -2)
507010d565efSmrg return -2;
507110d565efSmrg
507210d565efSmrg #ifdef WCHAR
507310d565efSmrg /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
507410d565efSmrg fill them with converted string. */
507510d565efSmrg if (size1 != 0)
507610d565efSmrg {
507710d565efSmrg if (size1 > MAX_ALLOCA_SIZE)
507810d565efSmrg {
507910d565efSmrg wcs_string1 = TALLOC (size1 + 1, CHAR_T);
508010d565efSmrg mbs_offset1 = TALLOC (size1 + 1, int);
508110d565efSmrg is_binary = TALLOC (size1 + 1, char);
508210d565efSmrg }
508310d565efSmrg else
508410d565efSmrg {
508510d565efSmrg wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
508610d565efSmrg mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
508710d565efSmrg is_binary = REGEX_TALLOC (size1 + 1, char);
508810d565efSmrg }
508910d565efSmrg if (!wcs_string1 || !mbs_offset1 || !is_binary)
509010d565efSmrg {
509110d565efSmrg if (size1 > MAX_ALLOCA_SIZE)
509210d565efSmrg {
509310d565efSmrg free (wcs_string1);
509410d565efSmrg free (mbs_offset1);
509510d565efSmrg free (is_binary);
509610d565efSmrg }
509710d565efSmrg else
509810d565efSmrg {
509910d565efSmrg FREE_VAR (wcs_string1);
510010d565efSmrg FREE_VAR (mbs_offset1);
510110d565efSmrg FREE_VAR (is_binary);
510210d565efSmrg }
510310d565efSmrg return -2;
510410d565efSmrg }
510510d565efSmrg wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
510610d565efSmrg mbs_offset1, is_binary);
510710d565efSmrg wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */
510810d565efSmrg if (size1 > MAX_ALLOCA_SIZE)
510910d565efSmrg free (is_binary);
511010d565efSmrg else
511110d565efSmrg FREE_VAR (is_binary);
511210d565efSmrg }
511310d565efSmrg if (size2 != 0)
511410d565efSmrg {
511510d565efSmrg if (size2 > MAX_ALLOCA_SIZE)
511610d565efSmrg {
511710d565efSmrg wcs_string2 = TALLOC (size2 + 1, CHAR_T);
511810d565efSmrg mbs_offset2 = TALLOC (size2 + 1, int);
511910d565efSmrg is_binary = TALLOC (size2 + 1, char);
512010d565efSmrg }
512110d565efSmrg else
512210d565efSmrg {
512310d565efSmrg wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
512410d565efSmrg mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
512510d565efSmrg is_binary = REGEX_TALLOC (size2 + 1, char);
512610d565efSmrg }
512710d565efSmrg if (!wcs_string2 || !mbs_offset2 || !is_binary)
512810d565efSmrg {
512910d565efSmrg FREE_WCS_BUFFERS ();
513010d565efSmrg if (size2 > MAX_ALLOCA_SIZE)
513110d565efSmrg free (is_binary);
513210d565efSmrg else
513310d565efSmrg FREE_VAR (is_binary);
513410d565efSmrg return -2;
513510d565efSmrg }
513610d565efSmrg wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
513710d565efSmrg mbs_offset2, is_binary);
513810d565efSmrg wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */
513910d565efSmrg if (size2 > MAX_ALLOCA_SIZE)
514010d565efSmrg free (is_binary);
514110d565efSmrg else
514210d565efSmrg FREE_VAR (is_binary);
514310d565efSmrg }
514410d565efSmrg #endif /* WCHAR */
514510d565efSmrg
514610d565efSmrg
514710d565efSmrg /* Loop through the string, looking for a place to start matching. */
514810d565efSmrg for (;;)
514910d565efSmrg {
515010d565efSmrg /* If a fastmap is supplied, skip quickly over characters that
515110d565efSmrg cannot be the start of a match. If the pattern can match the
515210d565efSmrg null string, however, we don't need to skip characters; we want
515310d565efSmrg the first null string. */
515410d565efSmrg if (fastmap && startpos < total_size && !bufp->can_be_null)
515510d565efSmrg {
515610d565efSmrg if (range > 0) /* Searching forwards. */
515710d565efSmrg {
515810d565efSmrg register const char *d;
515910d565efSmrg register int lim = 0;
516010d565efSmrg int irange = range;
516110d565efSmrg
516210d565efSmrg if (startpos < size1 && startpos + range >= size1)
516310d565efSmrg lim = range - (size1 - startpos);
516410d565efSmrg
516510d565efSmrg d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
516610d565efSmrg
516710d565efSmrg /* Written out as an if-else to avoid testing `translate'
516810d565efSmrg inside the loop. */
516910d565efSmrg if (translate)
517010d565efSmrg while (range > lim
517110d565efSmrg && !fastmap[(unsigned char)
517210d565efSmrg translate[(unsigned char) *d++]])
517310d565efSmrg range--;
517410d565efSmrg else
517510d565efSmrg while (range > lim && !fastmap[(unsigned char) *d++])
517610d565efSmrg range--;
517710d565efSmrg
517810d565efSmrg startpos += irange - range;
517910d565efSmrg }
518010d565efSmrg else /* Searching backwards. */
518110d565efSmrg {
518210d565efSmrg register CHAR_T c = (size1 == 0 || startpos >= size1
518310d565efSmrg ? string2[startpos - size1]
518410d565efSmrg : string1[startpos]);
518510d565efSmrg
518610d565efSmrg if (!fastmap[(unsigned char) TRANSLATE (c)])
518710d565efSmrg goto advance;
518810d565efSmrg }
518910d565efSmrg }
519010d565efSmrg
519110d565efSmrg /* If can't match the null string, and that's all we have left, fail. */
519210d565efSmrg if (range >= 0 && startpos == total_size && fastmap
519310d565efSmrg && !bufp->can_be_null)
519410d565efSmrg {
519510d565efSmrg #ifdef WCHAR
519610d565efSmrg FREE_WCS_BUFFERS ();
519710d565efSmrg #endif
519810d565efSmrg return -1;
519910d565efSmrg }
520010d565efSmrg
520110d565efSmrg #ifdef WCHAR
520210d565efSmrg val = wcs_re_match_2_internal (bufp, string1, size1, string2,
520310d565efSmrg size2, startpos, regs, stop,
520410d565efSmrg wcs_string1, wcs_size1,
520510d565efSmrg wcs_string2, wcs_size2,
520610d565efSmrg mbs_offset1, mbs_offset2);
520710d565efSmrg #else /* BYTE */
520810d565efSmrg val = byte_re_match_2_internal (bufp, string1, size1, string2,
520910d565efSmrg size2, startpos, regs, stop);
521010d565efSmrg #endif /* BYTE */
521110d565efSmrg
521210d565efSmrg #ifndef REGEX_MALLOC
521310d565efSmrg # ifdef C_ALLOCA
521410d565efSmrg alloca (0);
521510d565efSmrg # endif
521610d565efSmrg #endif
521710d565efSmrg
521810d565efSmrg if (val >= 0)
521910d565efSmrg {
522010d565efSmrg #ifdef WCHAR
522110d565efSmrg FREE_WCS_BUFFERS ();
522210d565efSmrg #endif
522310d565efSmrg return startpos;
522410d565efSmrg }
522510d565efSmrg
522610d565efSmrg if (val == -2)
522710d565efSmrg {
522810d565efSmrg #ifdef WCHAR
522910d565efSmrg FREE_WCS_BUFFERS ();
523010d565efSmrg #endif
523110d565efSmrg return -2;
523210d565efSmrg }
523310d565efSmrg
523410d565efSmrg advance:
523510d565efSmrg if (!range)
523610d565efSmrg break;
523710d565efSmrg else if (range > 0)
523810d565efSmrg {
523910d565efSmrg range--;
524010d565efSmrg startpos++;
524110d565efSmrg }
524210d565efSmrg else
524310d565efSmrg {
524410d565efSmrg range++;
524510d565efSmrg startpos--;
524610d565efSmrg }
524710d565efSmrg }
524810d565efSmrg #ifdef WCHAR
524910d565efSmrg FREE_WCS_BUFFERS ();
525010d565efSmrg #endif
525110d565efSmrg return -1;
525210d565efSmrg }
525310d565efSmrg
525410d565efSmrg #ifdef WCHAR
525510d565efSmrg /* This converts PTR, a pointer into one of the search wchar_t strings
525610d565efSmrg `string1' and `string2' into an multibyte string offset from the
525710d565efSmrg beginning of that string. We use mbs_offset to optimize.
525810d565efSmrg See convert_mbs_to_wcs. */
525910d565efSmrg # define POINTER_TO_OFFSET(ptr) \
526010d565efSmrg (FIRST_STRING_P (ptr) \
526110d565efSmrg ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
526210d565efSmrg : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
526310d565efSmrg + csize1)))
526410d565efSmrg #else /* BYTE */
526510d565efSmrg /* This converts PTR, a pointer into one of the search strings `string1'
526610d565efSmrg and `string2' into an offset from the beginning of that string. */
526710d565efSmrg # define POINTER_TO_OFFSET(ptr) \
526810d565efSmrg (FIRST_STRING_P (ptr) \
526910d565efSmrg ? ((regoff_t) ((ptr) - string1)) \
527010d565efSmrg : ((regoff_t) ((ptr) - string2 + size1)))
527110d565efSmrg #endif /* WCHAR */
527210d565efSmrg
527310d565efSmrg /* Macros for dealing with the split strings in re_match_2. */
527410d565efSmrg
527510d565efSmrg #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
527610d565efSmrg
527710d565efSmrg /* Call before fetching a character with *d. This switches over to
527810d565efSmrg string2 if necessary. */
527910d565efSmrg #define PREFETCH() \
528010d565efSmrg while (d == dend) \
528110d565efSmrg { \
528210d565efSmrg /* End of string2 => fail. */ \
528310d565efSmrg if (dend == end_match_2) \
528410d565efSmrg goto fail; \
528510d565efSmrg /* End of string1 => advance to string2. */ \
528610d565efSmrg d = string2; \
528710d565efSmrg dend = end_match_2; \
528810d565efSmrg }
528910d565efSmrg
529010d565efSmrg /* Test if at very beginning or at very end of the virtual concatenation
529110d565efSmrg of `string1' and `string2'. If only one string, it's `string2'. */
529210d565efSmrg #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
529310d565efSmrg #define AT_STRINGS_END(d) ((d) == end2)
529410d565efSmrg
529510d565efSmrg
529610d565efSmrg /* Test if D points to a character which is word-constituent. We have
529710d565efSmrg two special cases to check for: if past the end of string1, look at
529810d565efSmrg the first character in string2; and if before the beginning of
529910d565efSmrg string2, look at the last character in string1. */
530010d565efSmrg #ifdef WCHAR
530110d565efSmrg /* Use internationalized API instead of SYNTAX. */
530210d565efSmrg # define WORDCHAR_P(d) \
530310d565efSmrg (iswalnum ((wint_t)((d) == end1 ? *string2 \
530410d565efSmrg : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \
530510d565efSmrg || ((d) == end1 ? *string2 \
530610d565efSmrg : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
530710d565efSmrg #else /* BYTE */
530810d565efSmrg # define WORDCHAR_P(d) \
530910d565efSmrg (SYNTAX ((d) == end1 ? *string2 \
531010d565efSmrg : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
531110d565efSmrg == Sword)
531210d565efSmrg #endif /* WCHAR */
531310d565efSmrg
531410d565efSmrg /* Disabled due to a compiler bug -- see comment at case wordbound */
531510d565efSmrg #if 0
531610d565efSmrg /* Test if the character before D and the one at D differ with respect
531710d565efSmrg to being word-constituent. */
531810d565efSmrg #define AT_WORD_BOUNDARY(d) \
531910d565efSmrg (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
532010d565efSmrg || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
532110d565efSmrg #endif
532210d565efSmrg
532310d565efSmrg /* Free everything we malloc. */
532410d565efSmrg #ifdef MATCH_MAY_ALLOCATE
532510d565efSmrg # ifdef WCHAR
532610d565efSmrg # define FREE_VARIABLES() \
532710d565efSmrg do { \
532810d565efSmrg REGEX_FREE_STACK (fail_stack.stack); \
532910d565efSmrg FREE_VAR (regstart); \
533010d565efSmrg FREE_VAR (regend); \
533110d565efSmrg FREE_VAR (old_regstart); \
533210d565efSmrg FREE_VAR (old_regend); \
533310d565efSmrg FREE_VAR (best_regstart); \
533410d565efSmrg FREE_VAR (best_regend); \
533510d565efSmrg FREE_VAR (reg_info); \
533610d565efSmrg FREE_VAR (reg_dummy); \
533710d565efSmrg FREE_VAR (reg_info_dummy); \
533810d565efSmrg if (!cant_free_wcs_buf) \
533910d565efSmrg { \
534010d565efSmrg FREE_VAR (string1); \
534110d565efSmrg FREE_VAR (string2); \
534210d565efSmrg FREE_VAR (mbs_offset1); \
534310d565efSmrg FREE_VAR (mbs_offset2); \
534410d565efSmrg } \
534510d565efSmrg } while (0)
534610d565efSmrg # else /* BYTE */
534710d565efSmrg # define FREE_VARIABLES() \
534810d565efSmrg do { \
534910d565efSmrg REGEX_FREE_STACK (fail_stack.stack); \
535010d565efSmrg FREE_VAR (regstart); \
535110d565efSmrg FREE_VAR (regend); \
535210d565efSmrg FREE_VAR (old_regstart); \
535310d565efSmrg FREE_VAR (old_regend); \
535410d565efSmrg FREE_VAR (best_regstart); \
535510d565efSmrg FREE_VAR (best_regend); \
535610d565efSmrg FREE_VAR (reg_info); \
535710d565efSmrg FREE_VAR (reg_dummy); \
535810d565efSmrg FREE_VAR (reg_info_dummy); \
535910d565efSmrg } while (0)
536010d565efSmrg # endif /* WCHAR */
536110d565efSmrg #else
536210d565efSmrg # ifdef WCHAR
536310d565efSmrg # define FREE_VARIABLES() \
536410d565efSmrg do { \
536510d565efSmrg if (!cant_free_wcs_buf) \
536610d565efSmrg { \
536710d565efSmrg FREE_VAR (string1); \
536810d565efSmrg FREE_VAR (string2); \
536910d565efSmrg FREE_VAR (mbs_offset1); \
537010d565efSmrg FREE_VAR (mbs_offset2); \
537110d565efSmrg } \
537210d565efSmrg } while (0)
537310d565efSmrg # else /* BYTE */
537410d565efSmrg # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
537510d565efSmrg # endif /* WCHAR */
537610d565efSmrg #endif /* not MATCH_MAY_ALLOCATE */
537710d565efSmrg
537810d565efSmrg /* These values must meet several constraints. They must not be valid
537910d565efSmrg register values; since we have a limit of 255 registers (because
538010d565efSmrg we use only one byte in the pattern for the register number), we can
538110d565efSmrg use numbers larger than 255. They must differ by 1, because of
538210d565efSmrg NUM_FAILURE_ITEMS above. And the value for the lowest register must
538310d565efSmrg be larger than the value for the highest register, so we do not try
538410d565efSmrg to actually save any registers when none are active. */
538510d565efSmrg #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
538610d565efSmrg #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
538710d565efSmrg
538810d565efSmrg #else /* not INSIDE_RECURSION */
538910d565efSmrg /* Matching routines. */
539010d565efSmrg
539110d565efSmrg #ifndef emacs /* Emacs never uses this. */
539210d565efSmrg /* re_match is like re_match_2 except it takes only a single string. */
539310d565efSmrg
539410d565efSmrg int
539510d565efSmrg re_match (struct re_pattern_buffer *bufp, const char *string,
539610d565efSmrg int size, int pos, struct re_registers *regs)
539710d565efSmrg {
539810d565efSmrg int result;
539910d565efSmrg # ifdef MBS_SUPPORT
540010d565efSmrg if (MB_CUR_MAX != 1)
540110d565efSmrg result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
540210d565efSmrg pos, regs, size,
540310d565efSmrg NULL, 0, NULL, 0, NULL, NULL);
540410d565efSmrg else
540510d565efSmrg # endif
540610d565efSmrg result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
540710d565efSmrg pos, regs, size);
540810d565efSmrg # ifndef REGEX_MALLOC
540910d565efSmrg # ifdef C_ALLOCA
541010d565efSmrg alloca (0);
541110d565efSmrg # endif
541210d565efSmrg # endif
541310d565efSmrg return result;
541410d565efSmrg }
541510d565efSmrg # ifdef _LIBC
541610d565efSmrg weak_alias (__re_match, re_match)
541710d565efSmrg # endif
541810d565efSmrg #endif /* not emacs */
541910d565efSmrg
542010d565efSmrg #endif /* not INSIDE_RECURSION */
542110d565efSmrg
542210d565efSmrg #ifdef INSIDE_RECURSION
542310d565efSmrg static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
542410d565efSmrg UCHAR_T *end,
542510d565efSmrg PREFIX(register_info_type) *reg_info);
542610d565efSmrg static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
542710d565efSmrg UCHAR_T *end,
542810d565efSmrg PREFIX(register_info_type) *reg_info);
542910d565efSmrg static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
543010d565efSmrg UCHAR_T *end,
543110d565efSmrg PREFIX(register_info_type) *reg_info);
543210d565efSmrg static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
543310d565efSmrg int len, char *translate);
543410d565efSmrg #else /* not INSIDE_RECURSION */
543510d565efSmrg
543610d565efSmrg /* re_match_2 matches the compiled pattern in BUFP against the
543710d565efSmrg the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
543810d565efSmrg and SIZE2, respectively). We start matching at POS, and stop
543910d565efSmrg matching at STOP.
544010d565efSmrg
544110d565efSmrg If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
544210d565efSmrg store offsets for the substring each group matched in REGS. See the
544310d565efSmrg documentation for exactly how many groups we fill.
544410d565efSmrg
544510d565efSmrg We return -1 if no match, -2 if an internal error (such as the
544610d565efSmrg failure stack overflowing). Otherwise, we return the length of the
544710d565efSmrg matched substring. */
544810d565efSmrg
544910d565efSmrg int
re_match_2(struct re_pattern_buffer * bufp,const char * string1,int size1,const char * string2,int size2,int pos,struct re_registers * regs,int stop)545010d565efSmrg re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
545110d565efSmrg const char *string2, int size2, int pos,
545210d565efSmrg struct re_registers *regs, int stop)
545310d565efSmrg {
545410d565efSmrg int result;
545510d565efSmrg # ifdef MBS_SUPPORT
545610d565efSmrg if (MB_CUR_MAX != 1)
545710d565efSmrg result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
545810d565efSmrg pos, regs, stop,
545910d565efSmrg NULL, 0, NULL, 0, NULL, NULL);
546010d565efSmrg else
546110d565efSmrg # endif
546210d565efSmrg result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
546310d565efSmrg pos, regs, stop);
546410d565efSmrg
546510d565efSmrg #ifndef REGEX_MALLOC
546610d565efSmrg # ifdef C_ALLOCA
546710d565efSmrg alloca (0);
546810d565efSmrg # endif
546910d565efSmrg #endif
547010d565efSmrg return result;
547110d565efSmrg }
547210d565efSmrg #ifdef _LIBC
547310d565efSmrg weak_alias (__re_match_2, re_match_2)
547410d565efSmrg #endif
547510d565efSmrg
547610d565efSmrg #endif /* not INSIDE_RECURSION */
547710d565efSmrg
547810d565efSmrg #ifdef INSIDE_RECURSION
547910d565efSmrg
548010d565efSmrg #ifdef WCHAR
548110d565efSmrg static int count_mbs_length (int *, int);
548210d565efSmrg
548310d565efSmrg /* This check the substring (from 0, to length) of the multibyte string,
548410d565efSmrg to which offset_buffer correspond. And count how many wchar_t_characters
548510d565efSmrg the substring occupy. We use offset_buffer to optimization.
548610d565efSmrg See convert_mbs_to_wcs. */
548710d565efSmrg
548810d565efSmrg static int
count_mbs_length(int * offset_buffer,int length)548910d565efSmrg count_mbs_length(int *offset_buffer, int length)
549010d565efSmrg {
549110d565efSmrg int upper, lower;
549210d565efSmrg
549310d565efSmrg /* Check whether the size is valid. */
549410d565efSmrg if (length < 0)
549510d565efSmrg return -1;
549610d565efSmrg
549710d565efSmrg if (offset_buffer == NULL)
549810d565efSmrg return 0;
549910d565efSmrg
550010d565efSmrg /* If there are no multibyte character, offset_buffer[i] == i.
550110d565efSmrg Optmize for this case. */
550210d565efSmrg if (offset_buffer[length] == length)
550310d565efSmrg return length;
550410d565efSmrg
550510d565efSmrg /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */
550610d565efSmrg upper = length;
550710d565efSmrg lower = 0;
550810d565efSmrg
550910d565efSmrg while (true)
551010d565efSmrg {
551110d565efSmrg int middle = (lower + upper) / 2;
551210d565efSmrg if (middle == lower || middle == upper)
551310d565efSmrg break;
551410d565efSmrg if (offset_buffer[middle] > length)
551510d565efSmrg upper = middle;
551610d565efSmrg else if (offset_buffer[middle] < length)
551710d565efSmrg lower = middle;
551810d565efSmrg else
551910d565efSmrg return middle;
552010d565efSmrg }
552110d565efSmrg
552210d565efSmrg return -1;
552310d565efSmrg }
552410d565efSmrg #endif /* WCHAR */
552510d565efSmrg
552610d565efSmrg /* This is a separate function so that we can force an alloca cleanup
552710d565efSmrg afterwards. */
552810d565efSmrg #ifdef WCHAR
552910d565efSmrg static int
wcs_re_match_2_internal(struct re_pattern_buffer * bufp,const char * cstring1,int csize1,const char * cstring2,int csize2,int pos,struct re_registers * regs,int stop,wchar_t * string1,int size1,wchar_t * string2,int size2,int * mbs_offset1,int * mbs_offset2)553010d565efSmrg wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
553110d565efSmrg const char *cstring1, int csize1,
553210d565efSmrg const char *cstring2, int csize2,
553310d565efSmrg int pos,
553410d565efSmrg struct re_registers *regs,
553510d565efSmrg int stop,
553610d565efSmrg /* string1 == string2 == NULL means string1/2, size1/2 and
553710d565efSmrg mbs_offset1/2 need seting up in this function. */
553810d565efSmrg /* We need wchar_t* buffers correspond to cstring1, cstring2. */
553910d565efSmrg wchar_t *string1, int size1,
554010d565efSmrg wchar_t *string2, int size2,
554110d565efSmrg /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
554210d565efSmrg int *mbs_offset1, int *mbs_offset2)
554310d565efSmrg #else /* BYTE */
554410d565efSmrg static int
554510d565efSmrg byte_re_match_2_internal (struct re_pattern_buffer *bufp,
554610d565efSmrg const char *string1, int size1,
554710d565efSmrg const char *string2, int size2,
554810d565efSmrg int pos,
554910d565efSmrg struct re_registers *regs, int stop)
555010d565efSmrg #endif /* BYTE */
555110d565efSmrg {
555210d565efSmrg /* General temporaries. */
555310d565efSmrg int mcnt;
555410d565efSmrg UCHAR_T *p1;
555510d565efSmrg #ifdef WCHAR
555610d565efSmrg /* They hold whether each wchar_t is binary data or not. */
555710d565efSmrg char *is_binary = NULL;
555810d565efSmrg /* If true, we can't free string1/2, mbs_offset1/2. */
555910d565efSmrg int cant_free_wcs_buf = 1;
556010d565efSmrg #endif /* WCHAR */
556110d565efSmrg
556210d565efSmrg /* Just past the end of the corresponding string. */
556310d565efSmrg const CHAR_T *end1, *end2;
556410d565efSmrg
556510d565efSmrg /* Pointers into string1 and string2, just past the last characters in
556610d565efSmrg each to consider matching. */
556710d565efSmrg const CHAR_T *end_match_1, *end_match_2;
556810d565efSmrg
556910d565efSmrg /* Where we are in the data, and the end of the current string. */
557010d565efSmrg const CHAR_T *d, *dend;
557110d565efSmrg
557210d565efSmrg /* Where we are in the pattern, and the end of the pattern. */
557310d565efSmrg #ifdef WCHAR
557410d565efSmrg UCHAR_T *pattern, *p;
557510d565efSmrg register UCHAR_T *pend;
557610d565efSmrg #else /* BYTE */
557710d565efSmrg UCHAR_T *p = bufp->buffer;
557810d565efSmrg register UCHAR_T *pend = p + bufp->used;
557910d565efSmrg #endif /* WCHAR */
558010d565efSmrg
558110d565efSmrg /* Mark the opcode just after a start_memory, so we can test for an
558210d565efSmrg empty subpattern when we get to the stop_memory. */
558310d565efSmrg UCHAR_T *just_past_start_mem = 0;
558410d565efSmrg
558510d565efSmrg /* We use this to map every character in the string. */
558610d565efSmrg RE_TRANSLATE_TYPE translate = bufp->translate;
558710d565efSmrg
558810d565efSmrg /* Failure point stack. Each place that can handle a failure further
558910d565efSmrg down the line pushes a failure point on this stack. It consists of
559010d565efSmrg restart, regend, and reg_info for all registers corresponding to
559110d565efSmrg the subexpressions we're currently inside, plus the number of such
559210d565efSmrg registers, and, finally, two char *'s. The first char * is where
559310d565efSmrg to resume scanning the pattern; the second one is where to resume
559410d565efSmrg scanning the strings. If the latter is zero, the failure point is
559510d565efSmrg a ``dummy''; if a failure happens and the failure point is a dummy,
559610d565efSmrg it gets discarded and the next next one is tried. */
559710d565efSmrg #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
559810d565efSmrg PREFIX(fail_stack_type) fail_stack;
559910d565efSmrg #endif
560010d565efSmrg #ifdef DEBUG
560110d565efSmrg static unsigned failure_id;
560210d565efSmrg unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
560310d565efSmrg #endif
560410d565efSmrg
560510d565efSmrg #ifdef REL_ALLOC
560610d565efSmrg /* This holds the pointer to the failure stack, when
560710d565efSmrg it is allocated relocatably. */
560810d565efSmrg fail_stack_elt_t *failure_stack_ptr;
560910d565efSmrg #endif
561010d565efSmrg
561110d565efSmrg /* We fill all the registers internally, independent of what we
561210d565efSmrg return, for use in backreferences. The number here includes
561310d565efSmrg an element for register zero. */
561410d565efSmrg size_t num_regs = bufp->re_nsub + 1;
561510d565efSmrg
561610d565efSmrg /* The currently active registers. */
561710d565efSmrg active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
561810d565efSmrg active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
561910d565efSmrg
562010d565efSmrg /* Information on the contents of registers. These are pointers into
562110d565efSmrg the input strings; they record just what was matched (on this
562210d565efSmrg attempt) by a subexpression part of the pattern, that is, the
562310d565efSmrg regnum-th regstart pointer points to where in the pattern we began
562410d565efSmrg matching and the regnum-th regend points to right after where we
562510d565efSmrg stopped matching the regnum-th subexpression. (The zeroth register
562610d565efSmrg keeps track of what the whole pattern matches.) */
562710d565efSmrg #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
562810d565efSmrg const CHAR_T **regstart, **regend;
562910d565efSmrg #endif
563010d565efSmrg
563110d565efSmrg /* If a group that's operated upon by a repetition operator fails to
563210d565efSmrg match anything, then the register for its start will need to be
563310d565efSmrg restored because it will have been set to wherever in the string we
563410d565efSmrg are when we last see its open-group operator. Similarly for a
563510d565efSmrg register's end. */
563610d565efSmrg #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
563710d565efSmrg const CHAR_T **old_regstart, **old_regend;
563810d565efSmrg #endif
563910d565efSmrg
564010d565efSmrg /* The is_active field of reg_info helps us keep track of which (possibly
564110d565efSmrg nested) subexpressions we are currently in. The matched_something
564210d565efSmrg field of reg_info[reg_num] helps us tell whether or not we have
564310d565efSmrg matched any of the pattern so far this time through the reg_num-th
564410d565efSmrg subexpression. These two fields get reset each time through any
564510d565efSmrg loop their register is in. */
564610d565efSmrg #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
564710d565efSmrg PREFIX(register_info_type) *reg_info;
564810d565efSmrg #endif
564910d565efSmrg
565010d565efSmrg /* The following record the register info as found in the above
565110d565efSmrg variables when we find a match better than any we've seen before.
565210d565efSmrg This happens as we backtrack through the failure points, which in
565310d565efSmrg turn happens only if we have not yet matched the entire string. */
565410d565efSmrg unsigned best_regs_set = false;
565510d565efSmrg #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
565610d565efSmrg const CHAR_T **best_regstart, **best_regend;
565710d565efSmrg #endif
565810d565efSmrg
565910d565efSmrg /* Logically, this is `best_regend[0]'. But we don't want to have to
566010d565efSmrg allocate space for that if we're not allocating space for anything
566110d565efSmrg else (see below). Also, we never need info about register 0 for
566210d565efSmrg any of the other register vectors, and it seems rather a kludge to
566310d565efSmrg treat `best_regend' differently than the rest. So we keep track of
566410d565efSmrg the end of the best match so far in a separate variable. We
566510d565efSmrg initialize this to NULL so that when we backtrack the first time
566610d565efSmrg and need to test it, it's not garbage. */
566710d565efSmrg const CHAR_T *match_end = NULL;
566810d565efSmrg
566910d565efSmrg /* This helps SET_REGS_MATCHED avoid doing redundant work. */
567010d565efSmrg int set_regs_matched_done = 0;
567110d565efSmrg
567210d565efSmrg /* Used when we pop values we don't care about. */
567310d565efSmrg #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
567410d565efSmrg const CHAR_T **reg_dummy;
567510d565efSmrg PREFIX(register_info_type) *reg_info_dummy;
567610d565efSmrg #endif
567710d565efSmrg
567810d565efSmrg #ifdef DEBUG
567910d565efSmrg /* Counts the total number of registers pushed. */
568010d565efSmrg unsigned num_regs_pushed = 0;
568110d565efSmrg #endif
568210d565efSmrg
568310d565efSmrg DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
568410d565efSmrg
568510d565efSmrg INIT_FAIL_STACK ();
568610d565efSmrg
568710d565efSmrg #ifdef MATCH_MAY_ALLOCATE
568810d565efSmrg /* Do not bother to initialize all the register variables if there are
568910d565efSmrg no groups in the pattern, as it takes a fair amount of time. If
569010d565efSmrg there are groups, we include space for register 0 (the whole
569110d565efSmrg pattern), even though we never use it, since it simplifies the
569210d565efSmrg array indexing. We should fix this. */
569310d565efSmrg if (bufp->re_nsub)
569410d565efSmrg {
569510d565efSmrg regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
569610d565efSmrg regend = REGEX_TALLOC (num_regs, const CHAR_T *);
569710d565efSmrg old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
569810d565efSmrg old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
569910d565efSmrg best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
570010d565efSmrg best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
570110d565efSmrg reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
570210d565efSmrg reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
570310d565efSmrg reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
570410d565efSmrg
570510d565efSmrg if (!(regstart && regend && old_regstart && old_regend && reg_info
570610d565efSmrg && best_regstart && best_regend && reg_dummy && reg_info_dummy))
570710d565efSmrg {
570810d565efSmrg FREE_VARIABLES ();
570910d565efSmrg return -2;
571010d565efSmrg }
571110d565efSmrg }
571210d565efSmrg else
571310d565efSmrg {
571410d565efSmrg /* We must initialize all our variables to NULL, so that
571510d565efSmrg `FREE_VARIABLES' doesn't try to free them. */
571610d565efSmrg regstart = regend = old_regstart = old_regend = best_regstart
571710d565efSmrg = best_regend = reg_dummy = NULL;
571810d565efSmrg reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
571910d565efSmrg }
572010d565efSmrg #endif /* MATCH_MAY_ALLOCATE */
572110d565efSmrg
572210d565efSmrg /* The starting position is bogus. */
572310d565efSmrg #ifdef WCHAR
572410d565efSmrg if (pos < 0 || pos > csize1 + csize2)
572510d565efSmrg #else /* BYTE */
572610d565efSmrg if (pos < 0 || pos > size1 + size2)
572710d565efSmrg #endif
572810d565efSmrg {
572910d565efSmrg FREE_VARIABLES ();
573010d565efSmrg return -1;
573110d565efSmrg }
573210d565efSmrg
573310d565efSmrg #ifdef WCHAR
573410d565efSmrg /* Allocate wchar_t array for string1 and string2 and
573510d565efSmrg fill them with converted string. */
573610d565efSmrg if (string1 == NULL && string2 == NULL)
573710d565efSmrg {
573810d565efSmrg /* We need seting up buffers here. */
573910d565efSmrg
574010d565efSmrg /* We must free wcs buffers in this function. */
574110d565efSmrg cant_free_wcs_buf = 0;
574210d565efSmrg
574310d565efSmrg if (csize1 != 0)
574410d565efSmrg {
574510d565efSmrg string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
574610d565efSmrg mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
574710d565efSmrg is_binary = REGEX_TALLOC (csize1 + 1, char);
574810d565efSmrg if (!string1 || !mbs_offset1 || !is_binary)
574910d565efSmrg {
575010d565efSmrg FREE_VAR (string1);
575110d565efSmrg FREE_VAR (mbs_offset1);
575210d565efSmrg FREE_VAR (is_binary);
575310d565efSmrg return -2;
575410d565efSmrg }
575510d565efSmrg }
575610d565efSmrg if (csize2 != 0)
575710d565efSmrg {
575810d565efSmrg string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
575910d565efSmrg mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
576010d565efSmrg is_binary = REGEX_TALLOC (csize2 + 1, char);
576110d565efSmrg if (!string2 || !mbs_offset2 || !is_binary)
576210d565efSmrg {
576310d565efSmrg FREE_VAR (string1);
576410d565efSmrg FREE_VAR (mbs_offset1);
576510d565efSmrg FREE_VAR (string2);
576610d565efSmrg FREE_VAR (mbs_offset2);
576710d565efSmrg FREE_VAR (is_binary);
576810d565efSmrg return -2;
576910d565efSmrg }
577010d565efSmrg size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
577110d565efSmrg mbs_offset2, is_binary);
577210d565efSmrg string2[size2] = L'\0'; /* for a sentinel */
577310d565efSmrg FREE_VAR (is_binary);
577410d565efSmrg }
577510d565efSmrg }
577610d565efSmrg
577710d565efSmrg /* We need to cast pattern to (wchar_t*), because we casted this compiled
577810d565efSmrg pattern to (char*) in regex_compile. */
577910d565efSmrg p = pattern = (CHAR_T*)bufp->buffer;
578010d565efSmrg pend = (CHAR_T*)(bufp->buffer + bufp->used);
578110d565efSmrg
578210d565efSmrg #endif /* WCHAR */
578310d565efSmrg
578410d565efSmrg /* Initialize subexpression text positions to -1 to mark ones that no
578510d565efSmrg start_memory/stop_memory has been seen for. Also initialize the
578610d565efSmrg register information struct. */
578710d565efSmrg for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
578810d565efSmrg {
578910d565efSmrg regstart[mcnt] = regend[mcnt]
579010d565efSmrg = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
579110d565efSmrg
579210d565efSmrg REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
579310d565efSmrg IS_ACTIVE (reg_info[mcnt]) = 0;
579410d565efSmrg MATCHED_SOMETHING (reg_info[mcnt]) = 0;
579510d565efSmrg EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
579610d565efSmrg }
579710d565efSmrg
579810d565efSmrg /* We move `string1' into `string2' if the latter's empty -- but not if
579910d565efSmrg `string1' is null. */
580010d565efSmrg if (size2 == 0 && string1 != NULL)
580110d565efSmrg {
580210d565efSmrg string2 = string1;
580310d565efSmrg size2 = size1;
580410d565efSmrg string1 = 0;
580510d565efSmrg size1 = 0;
580610d565efSmrg #ifdef WCHAR
580710d565efSmrg mbs_offset2 = mbs_offset1;
580810d565efSmrg csize2 = csize1;
580910d565efSmrg mbs_offset1 = NULL;
581010d565efSmrg csize1 = 0;
581110d565efSmrg #endif
581210d565efSmrg }
581310d565efSmrg end1 = string1 + size1;
581410d565efSmrg end2 = string2 + size2;
581510d565efSmrg
581610d565efSmrg /* Compute where to stop matching, within the two strings. */
581710d565efSmrg #ifdef WCHAR
581810d565efSmrg if (stop <= csize1)
581910d565efSmrg {
582010d565efSmrg mcnt = count_mbs_length(mbs_offset1, stop);
582110d565efSmrg end_match_1 = string1 + mcnt;
582210d565efSmrg end_match_2 = string2;
582310d565efSmrg }
582410d565efSmrg else
582510d565efSmrg {
582610d565efSmrg if (stop > csize1 + csize2)
582710d565efSmrg stop = csize1 + csize2;
582810d565efSmrg end_match_1 = end1;
582910d565efSmrg mcnt = count_mbs_length(mbs_offset2, stop-csize1);
583010d565efSmrg end_match_2 = string2 + mcnt;
583110d565efSmrg }
583210d565efSmrg if (mcnt < 0)
583310d565efSmrg { /* count_mbs_length return error. */
583410d565efSmrg FREE_VARIABLES ();
583510d565efSmrg return -1;
583610d565efSmrg }
583710d565efSmrg #else
583810d565efSmrg if (stop <= size1)
583910d565efSmrg {
584010d565efSmrg end_match_1 = string1 + stop;
584110d565efSmrg end_match_2 = string2;
584210d565efSmrg }
584310d565efSmrg else
584410d565efSmrg {
584510d565efSmrg end_match_1 = end1;
584610d565efSmrg end_match_2 = string2 + stop - size1;
584710d565efSmrg }
584810d565efSmrg #endif /* WCHAR */
584910d565efSmrg
585010d565efSmrg /* `p' scans through the pattern as `d' scans through the data.
585110d565efSmrg `dend' is the end of the input string that `d' points within. `d'
585210d565efSmrg is advanced into the following input string whenever necessary, but
585310d565efSmrg this happens before fetching; therefore, at the beginning of the
585410d565efSmrg loop, `d' can be pointing at the end of a string, but it cannot
585510d565efSmrg equal `string2'. */
585610d565efSmrg #ifdef WCHAR
585710d565efSmrg if (size1 > 0 && pos <= csize1)
585810d565efSmrg {
585910d565efSmrg mcnt = count_mbs_length(mbs_offset1, pos);
586010d565efSmrg d = string1 + mcnt;
586110d565efSmrg dend = end_match_1;
586210d565efSmrg }
586310d565efSmrg else
586410d565efSmrg {
586510d565efSmrg mcnt = count_mbs_length(mbs_offset2, pos-csize1);
586610d565efSmrg d = string2 + mcnt;
586710d565efSmrg dend = end_match_2;
586810d565efSmrg }
586910d565efSmrg
587010d565efSmrg if (mcnt < 0)
587110d565efSmrg { /* count_mbs_length return error. */
587210d565efSmrg FREE_VARIABLES ();
587310d565efSmrg return -1;
587410d565efSmrg }
587510d565efSmrg #else
587610d565efSmrg if (size1 > 0 && pos <= size1)
587710d565efSmrg {
587810d565efSmrg d = string1 + pos;
587910d565efSmrg dend = end_match_1;
588010d565efSmrg }
588110d565efSmrg else
588210d565efSmrg {
588310d565efSmrg d = string2 + pos - size1;
588410d565efSmrg dend = end_match_2;
588510d565efSmrg }
588610d565efSmrg #endif /* WCHAR */
588710d565efSmrg
588810d565efSmrg DEBUG_PRINT1 ("The compiled pattern is:\n");
588910d565efSmrg DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
589010d565efSmrg DEBUG_PRINT1 ("The string to match is: `");
589110d565efSmrg DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
589210d565efSmrg DEBUG_PRINT1 ("'\n");
589310d565efSmrg
589410d565efSmrg /* This loops over pattern commands. It exits by returning from the
589510d565efSmrg function if the match is complete, or it drops through if the match
589610d565efSmrg fails at this starting point in the input data. */
589710d565efSmrg for (;;)
589810d565efSmrg {
589910d565efSmrg #ifdef _LIBC
590010d565efSmrg DEBUG_PRINT2 ("\n%p: ", p);
590110d565efSmrg #else
590210d565efSmrg DEBUG_PRINT2 ("\n0x%x: ", p);
590310d565efSmrg #endif
590410d565efSmrg
590510d565efSmrg if (p == pend)
590610d565efSmrg { /* End of pattern means we might have succeeded. */
590710d565efSmrg DEBUG_PRINT1 ("end of pattern ... ");
590810d565efSmrg
590910d565efSmrg /* If we haven't matched the entire string, and we want the
591010d565efSmrg longest match, try backtracking. */
591110d565efSmrg if (d != end_match_2)
591210d565efSmrg {
591310d565efSmrg /* 1 if this match ends in the same string (string1 or string2)
591410d565efSmrg as the best previous match. */
591510d565efSmrg boolean same_str_p;
591610d565efSmrg
591710d565efSmrg /* 1 if this match is the best seen so far. */
591810d565efSmrg boolean best_match_p;
591910d565efSmrg
592010d565efSmrg same_str_p = (FIRST_STRING_P (match_end)
592110d565efSmrg == MATCHING_IN_FIRST_STRING);
592210d565efSmrg
592310d565efSmrg /* AIX compiler got confused when this was combined
592410d565efSmrg with the previous declaration. */
592510d565efSmrg if (same_str_p)
592610d565efSmrg best_match_p = d > match_end;
592710d565efSmrg else
592810d565efSmrg best_match_p = !MATCHING_IN_FIRST_STRING;
592910d565efSmrg
593010d565efSmrg DEBUG_PRINT1 ("backtracking.\n");
593110d565efSmrg
593210d565efSmrg if (!FAIL_STACK_EMPTY ())
593310d565efSmrg { /* More failure points to try. */
593410d565efSmrg
593510d565efSmrg /* If exceeds best match so far, save it. */
593610d565efSmrg if (!best_regs_set || best_match_p)
593710d565efSmrg {
593810d565efSmrg best_regs_set = true;
593910d565efSmrg match_end = d;
594010d565efSmrg
594110d565efSmrg DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
594210d565efSmrg
594310d565efSmrg for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
594410d565efSmrg {
594510d565efSmrg best_regstart[mcnt] = regstart[mcnt];
594610d565efSmrg best_regend[mcnt] = regend[mcnt];
594710d565efSmrg }
594810d565efSmrg }
594910d565efSmrg goto fail;
595010d565efSmrg }
595110d565efSmrg
595210d565efSmrg /* If no failure points, don't restore garbage. And if
595310d565efSmrg last match is real best match, don't restore second
595410d565efSmrg best one. */
595510d565efSmrg else if (best_regs_set && !best_match_p)
595610d565efSmrg {
595710d565efSmrg restore_best_regs:
595810d565efSmrg /* Restore best match. It may happen that `dend ==
595910d565efSmrg end_match_1' while the restored d is in string2.
596010d565efSmrg For example, the pattern `x.*y.*z' against the
596110d565efSmrg strings `x-' and `y-z-', if the two strings are
596210d565efSmrg not consecutive in memory. */
596310d565efSmrg DEBUG_PRINT1 ("Restoring best registers.\n");
596410d565efSmrg
596510d565efSmrg d = match_end;
596610d565efSmrg dend = ((d >= string1 && d <= end1)
596710d565efSmrg ? end_match_1 : end_match_2);
596810d565efSmrg
596910d565efSmrg for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
597010d565efSmrg {
597110d565efSmrg regstart[mcnt] = best_regstart[mcnt];
597210d565efSmrg regend[mcnt] = best_regend[mcnt];
597310d565efSmrg }
597410d565efSmrg }
597510d565efSmrg } /* d != end_match_2 */
597610d565efSmrg
597710d565efSmrg succeed_label:
597810d565efSmrg DEBUG_PRINT1 ("Accepting match.\n");
597910d565efSmrg /* If caller wants register contents data back, do it. */
598010d565efSmrg if (regs && !bufp->no_sub)
598110d565efSmrg {
598210d565efSmrg /* Have the register data arrays been allocated? */
598310d565efSmrg if (bufp->regs_allocated == REGS_UNALLOCATED)
598410d565efSmrg { /* No. So allocate them with malloc. We need one
598510d565efSmrg extra element beyond `num_regs' for the `-1' marker
598610d565efSmrg GNU code uses. */
598710d565efSmrg regs->num_regs = MAX (RE_NREGS, num_regs + 1);
598810d565efSmrg regs->start = TALLOC (regs->num_regs, regoff_t);
598910d565efSmrg regs->end = TALLOC (regs->num_regs, regoff_t);
599010d565efSmrg if (regs->start == NULL || regs->end == NULL)
599110d565efSmrg {
599210d565efSmrg FREE_VARIABLES ();
599310d565efSmrg return -2;
599410d565efSmrg }
599510d565efSmrg bufp->regs_allocated = REGS_REALLOCATE;
599610d565efSmrg }
599710d565efSmrg else if (bufp->regs_allocated == REGS_REALLOCATE)
599810d565efSmrg { /* Yes. If we need more elements than were already
599910d565efSmrg allocated, reallocate them. If we need fewer, just
600010d565efSmrg leave it alone. */
600110d565efSmrg if (regs->num_regs < num_regs + 1)
600210d565efSmrg {
600310d565efSmrg regs->num_regs = num_regs + 1;
600410d565efSmrg RETALLOC (regs->start, regs->num_regs, regoff_t);
600510d565efSmrg RETALLOC (regs->end, regs->num_regs, regoff_t);
600610d565efSmrg if (regs->start == NULL || regs->end == NULL)
600710d565efSmrg {
600810d565efSmrg FREE_VARIABLES ();
600910d565efSmrg return -2;
601010d565efSmrg }
601110d565efSmrg }
601210d565efSmrg }
601310d565efSmrg else
601410d565efSmrg {
601510d565efSmrg /* These braces fend off a "empty body in an else-statement"
601610d565efSmrg warning under GCC when assert expands to nothing. */
601710d565efSmrg assert (bufp->regs_allocated == REGS_FIXED);
601810d565efSmrg }
601910d565efSmrg
602010d565efSmrg /* Convert the pointer data in `regstart' and `regend' to
602110d565efSmrg indices. Register zero has to be set differently,
602210d565efSmrg since we haven't kept track of any info for it. */
602310d565efSmrg if (regs->num_regs > 0)
602410d565efSmrg {
602510d565efSmrg regs->start[0] = pos;
602610d565efSmrg #ifdef WCHAR
602710d565efSmrg if (MATCHING_IN_FIRST_STRING)
602810d565efSmrg regs->end[0] = mbs_offset1 != NULL ?
602910d565efSmrg mbs_offset1[d-string1] : 0;
603010d565efSmrg else
603110d565efSmrg regs->end[0] = csize1 + (mbs_offset2 != NULL ?
603210d565efSmrg mbs_offset2[d-string2] : 0);
603310d565efSmrg #else
603410d565efSmrg regs->end[0] = (MATCHING_IN_FIRST_STRING
603510d565efSmrg ? ((regoff_t) (d - string1))
603610d565efSmrg : ((regoff_t) (d - string2 + size1)));
603710d565efSmrg #endif /* WCHAR */
603810d565efSmrg }
603910d565efSmrg
604010d565efSmrg /* Go through the first `min (num_regs, regs->num_regs)'
604110d565efSmrg registers, since that is all we initialized. */
604210d565efSmrg for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
604310d565efSmrg mcnt++)
604410d565efSmrg {
604510d565efSmrg if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
604610d565efSmrg regs->start[mcnt] = regs->end[mcnt] = -1;
604710d565efSmrg else
604810d565efSmrg {
604910d565efSmrg regs->start[mcnt]
605010d565efSmrg = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
605110d565efSmrg regs->end[mcnt]
605210d565efSmrg = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
605310d565efSmrg }
605410d565efSmrg }
605510d565efSmrg
605610d565efSmrg /* If the regs structure we return has more elements than
605710d565efSmrg were in the pattern, set the extra elements to -1. If
605810d565efSmrg we (re)allocated the registers, this is the case,
605910d565efSmrg because we always allocate enough to have at least one
606010d565efSmrg -1 at the end. */
606110d565efSmrg for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
606210d565efSmrg regs->start[mcnt] = regs->end[mcnt] = -1;
606310d565efSmrg } /* regs && !bufp->no_sub */
606410d565efSmrg
606510d565efSmrg DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
606610d565efSmrg nfailure_points_pushed, nfailure_points_popped,
606710d565efSmrg nfailure_points_pushed - nfailure_points_popped);
606810d565efSmrg DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
606910d565efSmrg
607010d565efSmrg #ifdef WCHAR
607110d565efSmrg if (MATCHING_IN_FIRST_STRING)
607210d565efSmrg mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
607310d565efSmrg else
607410d565efSmrg mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
607510d565efSmrg csize1;
607610d565efSmrg mcnt -= pos;
607710d565efSmrg #else
607810d565efSmrg mcnt = d - pos - (MATCHING_IN_FIRST_STRING
607910d565efSmrg ? string1
608010d565efSmrg : string2 - size1);
608110d565efSmrg #endif /* WCHAR */
608210d565efSmrg
608310d565efSmrg DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
608410d565efSmrg
608510d565efSmrg FREE_VARIABLES ();
608610d565efSmrg return mcnt;
608710d565efSmrg }
608810d565efSmrg
608910d565efSmrg /* Otherwise match next pattern command. */
609010d565efSmrg switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
609110d565efSmrg {
609210d565efSmrg /* Ignore these. Used to ignore the n of succeed_n's which
609310d565efSmrg currently have n == 0. */
609410d565efSmrg case no_op:
609510d565efSmrg DEBUG_PRINT1 ("EXECUTING no_op.\n");
609610d565efSmrg break;
609710d565efSmrg
609810d565efSmrg case succeed:
609910d565efSmrg DEBUG_PRINT1 ("EXECUTING succeed.\n");
610010d565efSmrg goto succeed_label;
610110d565efSmrg
610210d565efSmrg /* Match the next n pattern characters exactly. The following
610310d565efSmrg byte in the pattern defines n, and the n bytes after that
610410d565efSmrg are the characters to match. */
610510d565efSmrg case exactn:
610610d565efSmrg #ifdef MBS_SUPPORT
610710d565efSmrg case exactn_bin:
610810d565efSmrg #endif
610910d565efSmrg mcnt = *p++;
611010d565efSmrg DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
611110d565efSmrg
611210d565efSmrg /* This is written out as an if-else so we don't waste time
611310d565efSmrg testing `translate' inside the loop. */
611410d565efSmrg if (translate)
611510d565efSmrg {
611610d565efSmrg do
611710d565efSmrg {
611810d565efSmrg PREFETCH ();
611910d565efSmrg #ifdef WCHAR
612010d565efSmrg if (*d <= 0xff)
612110d565efSmrg {
612210d565efSmrg if ((UCHAR_T) translate[(unsigned char) *d++]
612310d565efSmrg != (UCHAR_T) *p++)
612410d565efSmrg goto fail;
612510d565efSmrg }
612610d565efSmrg else
612710d565efSmrg {
612810d565efSmrg if (*d++ != (CHAR_T) *p++)
612910d565efSmrg goto fail;
613010d565efSmrg }
613110d565efSmrg #else
613210d565efSmrg if ((UCHAR_T) translate[(unsigned char) *d++]
613310d565efSmrg != (UCHAR_T) *p++)
613410d565efSmrg goto fail;
613510d565efSmrg #endif /* WCHAR */
613610d565efSmrg }
613710d565efSmrg while (--mcnt);
613810d565efSmrg }
613910d565efSmrg else
614010d565efSmrg {
614110d565efSmrg do
614210d565efSmrg {
614310d565efSmrg PREFETCH ();
614410d565efSmrg if (*d++ != (CHAR_T) *p++) goto fail;
614510d565efSmrg }
614610d565efSmrg while (--mcnt);
614710d565efSmrg }
614810d565efSmrg SET_REGS_MATCHED ();
614910d565efSmrg break;
615010d565efSmrg
615110d565efSmrg
615210d565efSmrg /* Match any character except possibly a newline or a null. */
615310d565efSmrg case anychar:
615410d565efSmrg DEBUG_PRINT1 ("EXECUTING anychar.\n");
615510d565efSmrg
615610d565efSmrg PREFETCH ();
615710d565efSmrg
615810d565efSmrg if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
615910d565efSmrg || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
616010d565efSmrg goto fail;
616110d565efSmrg
616210d565efSmrg SET_REGS_MATCHED ();
616310d565efSmrg DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
616410d565efSmrg d++;
616510d565efSmrg break;
616610d565efSmrg
616710d565efSmrg
616810d565efSmrg case charset:
616910d565efSmrg case charset_not:
617010d565efSmrg {
617110d565efSmrg register UCHAR_T c;
617210d565efSmrg #ifdef WCHAR
617310d565efSmrg unsigned int i, char_class_length, coll_symbol_length,
617410d565efSmrg equiv_class_length, ranges_length, chars_length, length;
617510d565efSmrg CHAR_T *workp, *workp2, *charset_top;
617610d565efSmrg #define WORK_BUFFER_SIZE 128
617710d565efSmrg CHAR_T str_buf[WORK_BUFFER_SIZE];
617810d565efSmrg # ifdef _LIBC
617910d565efSmrg uint32_t nrules;
618010d565efSmrg # endif /* _LIBC */
618110d565efSmrg #endif /* WCHAR */
618210d565efSmrg boolean negate = (re_opcode_t) *(p - 1) == charset_not;
618310d565efSmrg
618410d565efSmrg DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
618510d565efSmrg PREFETCH ();
618610d565efSmrg c = TRANSLATE (*d); /* The character to match. */
618710d565efSmrg #ifdef WCHAR
618810d565efSmrg # ifdef _LIBC
618910d565efSmrg nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
619010d565efSmrg # endif /* _LIBC */
619110d565efSmrg charset_top = p - 1;
619210d565efSmrg char_class_length = *p++;
619310d565efSmrg coll_symbol_length = *p++;
619410d565efSmrg equiv_class_length = *p++;
619510d565efSmrg ranges_length = *p++;
619610d565efSmrg chars_length = *p++;
619710d565efSmrg /* p points charset[6], so the address of the next instruction
619810d565efSmrg (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
619910d565efSmrg where l=length of char_classes, m=length of collating_symbol,
620010d565efSmrg n=equivalence_class, o=length of char_range,
620110d565efSmrg p'=length of character. */
620210d565efSmrg workp = p;
620310d565efSmrg /* Update p to indicate the next instruction. */
620410d565efSmrg p += char_class_length + coll_symbol_length+ equiv_class_length +
620510d565efSmrg 2*ranges_length + chars_length;
620610d565efSmrg
620710d565efSmrg /* match with char_class? */
620810d565efSmrg for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
620910d565efSmrg {
621010d565efSmrg wctype_t wctype;
621110d565efSmrg uintptr_t alignedp = ((uintptr_t)workp
621210d565efSmrg + __alignof__(wctype_t) - 1)
621310d565efSmrg & ~(uintptr_t)(__alignof__(wctype_t) - 1);
621410d565efSmrg wctype = *((wctype_t*)alignedp);
621510d565efSmrg workp += CHAR_CLASS_SIZE;
621610d565efSmrg # ifdef _LIBC
621710d565efSmrg if (__iswctype((wint_t)c, wctype))
621810d565efSmrg goto char_set_matched;
621910d565efSmrg # else
622010d565efSmrg if (iswctype((wint_t)c, wctype))
622110d565efSmrg goto char_set_matched;
622210d565efSmrg # endif
622310d565efSmrg }
622410d565efSmrg
622510d565efSmrg /* match with collating_symbol? */
622610d565efSmrg # ifdef _LIBC
622710d565efSmrg if (nrules != 0)
622810d565efSmrg {
622910d565efSmrg const unsigned char *extra = (const unsigned char *)
623010d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
623110d565efSmrg
623210d565efSmrg for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
623310d565efSmrg workp++)
623410d565efSmrg {
623510d565efSmrg int32_t *wextra;
623610d565efSmrg wextra = (int32_t*)(extra + *workp++);
623710d565efSmrg for (i = 0; i < *wextra; ++i)
623810d565efSmrg if (TRANSLATE(d[i]) != wextra[1 + i])
623910d565efSmrg break;
624010d565efSmrg
624110d565efSmrg if (i == *wextra)
624210d565efSmrg {
624310d565efSmrg /* Update d, however d will be incremented at
624410d565efSmrg char_set_matched:, we decrement d here. */
624510d565efSmrg d += i - 1;
624610d565efSmrg goto char_set_matched;
624710d565efSmrg }
624810d565efSmrg }
624910d565efSmrg }
625010d565efSmrg else /* (nrules == 0) */
625110d565efSmrg # endif
625210d565efSmrg /* If we can't look up collation data, we use wcscoll
625310d565efSmrg instead. */
625410d565efSmrg {
625510d565efSmrg for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
625610d565efSmrg {
625710d565efSmrg const CHAR_T *backup_d = d, *backup_dend = dend;
625810d565efSmrg # ifdef _LIBC
625910d565efSmrg length = __wcslen (workp);
626010d565efSmrg # else
626110d565efSmrg length = wcslen (workp);
626210d565efSmrg # endif
626310d565efSmrg
626410d565efSmrg /* If wcscoll(the collating symbol, whole string) > 0,
626510d565efSmrg any substring of the string never match with the
626610d565efSmrg collating symbol. */
626710d565efSmrg # ifdef _LIBC
626810d565efSmrg if (__wcscoll (workp, d) > 0)
626910d565efSmrg # else
627010d565efSmrg if (wcscoll (workp, d) > 0)
627110d565efSmrg # endif
627210d565efSmrg {
627310d565efSmrg workp += length + 1;
627410d565efSmrg continue;
627510d565efSmrg }
627610d565efSmrg
627710d565efSmrg /* First, we compare the collating symbol with
627810d565efSmrg the first character of the string.
627910d565efSmrg If it don't match, we add the next character to
628010d565efSmrg the compare buffer in turn. */
628110d565efSmrg for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
628210d565efSmrg {
628310d565efSmrg int match;
628410d565efSmrg if (d == dend)
628510d565efSmrg {
628610d565efSmrg if (dend == end_match_2)
628710d565efSmrg break;
628810d565efSmrg d = string2;
628910d565efSmrg dend = end_match_2;
629010d565efSmrg }
629110d565efSmrg
629210d565efSmrg /* add next character to the compare buffer. */
629310d565efSmrg str_buf[i] = TRANSLATE(*d);
629410d565efSmrg str_buf[i+1] = '\0';
629510d565efSmrg
629610d565efSmrg # ifdef _LIBC
629710d565efSmrg match = __wcscoll (workp, str_buf);
629810d565efSmrg # else
629910d565efSmrg match = wcscoll (workp, str_buf);
630010d565efSmrg # endif
630110d565efSmrg if (match == 0)
630210d565efSmrg goto char_set_matched;
630310d565efSmrg
630410d565efSmrg if (match < 0)
630510d565efSmrg /* (str_buf > workp) indicate (str_buf + X > workp),
630610d565efSmrg because for all X (str_buf + X > str_buf).
630710d565efSmrg So we don't need continue this loop. */
630810d565efSmrg break;
630910d565efSmrg
631010d565efSmrg /* Otherwise(str_buf < workp),
631110d565efSmrg (str_buf+next_character) may equals (workp).
631210d565efSmrg So we continue this loop. */
631310d565efSmrg }
631410d565efSmrg /* not matched */
631510d565efSmrg d = backup_d;
631610d565efSmrg dend = backup_dend;
631710d565efSmrg workp += length + 1;
631810d565efSmrg }
631910d565efSmrg }
632010d565efSmrg /* match with equivalence_class? */
632110d565efSmrg # ifdef _LIBC
632210d565efSmrg if (nrules != 0)
632310d565efSmrg {
632410d565efSmrg const CHAR_T *backup_d = d, *backup_dend = dend;
632510d565efSmrg /* Try to match the equivalence class against
632610d565efSmrg those known to the collate implementation. */
632710d565efSmrg const int32_t *table;
632810d565efSmrg const int32_t *weights;
632910d565efSmrg const int32_t *extra;
633010d565efSmrg const int32_t *indirect;
633110d565efSmrg int32_t idx, idx2;
633210d565efSmrg wint_t *cp;
633310d565efSmrg size_t len;
633410d565efSmrg
633510d565efSmrg /* This #include defines a local function! */
633610d565efSmrg # include <locale/weightwc.h>
633710d565efSmrg
633810d565efSmrg table = (const int32_t *)
633910d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
634010d565efSmrg weights = (const wint_t *)
634110d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
634210d565efSmrg extra = (const wint_t *)
634310d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
634410d565efSmrg indirect = (const int32_t *)
634510d565efSmrg _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
634610d565efSmrg
634710d565efSmrg /* Write 1 collating element to str_buf, and
634810d565efSmrg get its index. */
634910d565efSmrg idx2 = 0;
635010d565efSmrg
635110d565efSmrg for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
635210d565efSmrg {
635310d565efSmrg cp = (wint_t*)str_buf;
635410d565efSmrg if (d == dend)
635510d565efSmrg {
635610d565efSmrg if (dend == end_match_2)
635710d565efSmrg break;
635810d565efSmrg d = string2;
635910d565efSmrg dend = end_match_2;
636010d565efSmrg }
636110d565efSmrg str_buf[i] = TRANSLATE(*(d+i));
636210d565efSmrg str_buf[i+1] = '\0'; /* sentinel */
636310d565efSmrg idx2 = findidx ((const wint_t**)&cp);
636410d565efSmrg }
636510d565efSmrg
636610d565efSmrg /* Update d, however d will be incremented at
636710d565efSmrg char_set_matched:, we decrement d here. */
636810d565efSmrg d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
636910d565efSmrg if (d >= dend)
637010d565efSmrg {
637110d565efSmrg if (dend == end_match_2)
637210d565efSmrg d = dend;
637310d565efSmrg else
637410d565efSmrg {
637510d565efSmrg d = string2;
637610d565efSmrg dend = end_match_2;
637710d565efSmrg }
637810d565efSmrg }
637910d565efSmrg
638010d565efSmrg len = weights[idx2];
638110d565efSmrg
638210d565efSmrg for (workp2 = workp + equiv_class_length ; workp < workp2 ;
638310d565efSmrg workp++)
638410d565efSmrg {
638510d565efSmrg idx = (int32_t)*workp;
638610d565efSmrg /* We already checked idx != 0 in regex_compile. */
638710d565efSmrg
638810d565efSmrg if (idx2 != 0 && len == weights[idx])
638910d565efSmrg {
639010d565efSmrg int cnt = 0;
639110d565efSmrg while (cnt < len && (weights[idx + 1 + cnt]
639210d565efSmrg == weights[idx2 + 1 + cnt]))
639310d565efSmrg ++cnt;
639410d565efSmrg
639510d565efSmrg if (cnt == len)
639610d565efSmrg goto char_set_matched;
639710d565efSmrg }
639810d565efSmrg }
639910d565efSmrg /* not matched */
640010d565efSmrg d = backup_d;
640110d565efSmrg dend = backup_dend;
640210d565efSmrg }
640310d565efSmrg else /* (nrules == 0) */
640410d565efSmrg # endif
640510d565efSmrg /* If we can't look up collation data, we use wcscoll
640610d565efSmrg instead. */
640710d565efSmrg {
640810d565efSmrg for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
640910d565efSmrg {
641010d565efSmrg const CHAR_T *backup_d = d, *backup_dend = dend;
641110d565efSmrg # ifdef _LIBC
641210d565efSmrg length = __wcslen (workp);
641310d565efSmrg # else
641410d565efSmrg length = wcslen (workp);
641510d565efSmrg # endif
641610d565efSmrg
641710d565efSmrg /* If wcscoll(the collating symbol, whole string) > 0,
641810d565efSmrg any substring of the string never match with the
641910d565efSmrg collating symbol. */
642010d565efSmrg # ifdef _LIBC
642110d565efSmrg if (__wcscoll (workp, d) > 0)
642210d565efSmrg # else
642310d565efSmrg if (wcscoll (workp, d) > 0)
642410d565efSmrg # endif
642510d565efSmrg {
642610d565efSmrg workp += length + 1;
642710d565efSmrg break;
642810d565efSmrg }
642910d565efSmrg
643010d565efSmrg /* First, we compare the equivalence class with
643110d565efSmrg the first character of the string.
643210d565efSmrg If it don't match, we add the next character to
643310d565efSmrg the compare buffer in turn. */
643410d565efSmrg for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
643510d565efSmrg {
643610d565efSmrg int match;
643710d565efSmrg if (d == dend)
643810d565efSmrg {
643910d565efSmrg if (dend == end_match_2)
644010d565efSmrg break;
644110d565efSmrg d = string2;
644210d565efSmrg dend = end_match_2;
644310d565efSmrg }
644410d565efSmrg
644510d565efSmrg /* add next character to the compare buffer. */
644610d565efSmrg str_buf[i] = TRANSLATE(*d);
644710d565efSmrg str_buf[i+1] = '\0';
644810d565efSmrg
644910d565efSmrg # ifdef _LIBC
645010d565efSmrg match = __wcscoll (workp, str_buf);
645110d565efSmrg # else
645210d565efSmrg match = wcscoll (workp, str_buf);
645310d565efSmrg # endif
645410d565efSmrg
645510d565efSmrg if (match == 0)
645610d565efSmrg goto char_set_matched;
645710d565efSmrg
645810d565efSmrg if (match < 0)
645910d565efSmrg /* (str_buf > workp) indicate (str_buf + X > workp),
646010d565efSmrg because for all X (str_buf + X > str_buf).
646110d565efSmrg So we don't need continue this loop. */
646210d565efSmrg break;
646310d565efSmrg
646410d565efSmrg /* Otherwise(str_buf < workp),
646510d565efSmrg (str_buf+next_character) may equals (workp).
646610d565efSmrg So we continue this loop. */
646710d565efSmrg }
646810d565efSmrg /* not matched */
646910d565efSmrg d = backup_d;
647010d565efSmrg dend = backup_dend;
647110d565efSmrg workp += length + 1;
647210d565efSmrg }
647310d565efSmrg }
647410d565efSmrg
647510d565efSmrg /* match with char_range? */
647610d565efSmrg # ifdef _LIBC
647710d565efSmrg if (nrules != 0)
647810d565efSmrg {
647910d565efSmrg uint32_t collseqval;
648010d565efSmrg const char *collseq = (const char *)
648110d565efSmrg _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
648210d565efSmrg
648310d565efSmrg collseqval = collseq_table_lookup (collseq, c);
648410d565efSmrg
648510d565efSmrg for (; workp < p - chars_length ;)
648610d565efSmrg {
648710d565efSmrg uint32_t start_val, end_val;
648810d565efSmrg
648910d565efSmrg /* We already compute the collation sequence value
649010d565efSmrg of the characters (or collating symbols). */
649110d565efSmrg start_val = (uint32_t) *workp++; /* range_start */
649210d565efSmrg end_val = (uint32_t) *workp++; /* range_end */
649310d565efSmrg
649410d565efSmrg if (start_val <= collseqval && collseqval <= end_val)
649510d565efSmrg goto char_set_matched;
649610d565efSmrg }
649710d565efSmrg }
649810d565efSmrg else
649910d565efSmrg # endif
650010d565efSmrg {
650110d565efSmrg /* We set range_start_char at str_buf[0], range_end_char
650210d565efSmrg at str_buf[4], and compared char at str_buf[2]. */
650310d565efSmrg str_buf[1] = 0;
650410d565efSmrg str_buf[2] = c;
650510d565efSmrg str_buf[3] = 0;
650610d565efSmrg str_buf[5] = 0;
650710d565efSmrg for (; workp < p - chars_length ;)
650810d565efSmrg {
650910d565efSmrg wchar_t *range_start_char, *range_end_char;
651010d565efSmrg
651110d565efSmrg /* match if (range_start_char <= c <= range_end_char). */
651210d565efSmrg
651310d565efSmrg /* If range_start(or end) < 0, we assume -range_start(end)
651410d565efSmrg is the offset of the collating symbol which is specified
651510d565efSmrg as the character of the range start(end). */
651610d565efSmrg
651710d565efSmrg /* range_start */
651810d565efSmrg if (*workp < 0)
651910d565efSmrg range_start_char = charset_top - (*workp++);
652010d565efSmrg else
652110d565efSmrg {
652210d565efSmrg str_buf[0] = *workp++;
652310d565efSmrg range_start_char = str_buf;
652410d565efSmrg }
652510d565efSmrg
652610d565efSmrg /* range_end */
652710d565efSmrg if (*workp < 0)
652810d565efSmrg range_end_char = charset_top - (*workp++);
652910d565efSmrg else
653010d565efSmrg {
653110d565efSmrg str_buf[4] = *workp++;
653210d565efSmrg range_end_char = str_buf + 4;
653310d565efSmrg }
653410d565efSmrg
653510d565efSmrg # ifdef _LIBC
653610d565efSmrg if (__wcscoll (range_start_char, str_buf+2) <= 0
653710d565efSmrg && __wcscoll (str_buf+2, range_end_char) <= 0)
653810d565efSmrg # else
653910d565efSmrg if (wcscoll (range_start_char, str_buf+2) <= 0
654010d565efSmrg && wcscoll (str_buf+2, range_end_char) <= 0)
654110d565efSmrg # endif
654210d565efSmrg goto char_set_matched;
654310d565efSmrg }
654410d565efSmrg }
654510d565efSmrg
654610d565efSmrg /* match with char? */
654710d565efSmrg for (; workp < p ; workp++)
654810d565efSmrg if (c == *workp)
654910d565efSmrg goto char_set_matched;
655010d565efSmrg
655110d565efSmrg negate = !negate;
655210d565efSmrg
655310d565efSmrg char_set_matched:
655410d565efSmrg if (negate) goto fail;
655510d565efSmrg #else
655610d565efSmrg /* Cast to `unsigned' instead of `unsigned char' in case the
655710d565efSmrg bit list is a full 32 bytes long. */
655810d565efSmrg if (c < (unsigned) (*p * BYTEWIDTH)
655910d565efSmrg && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
656010d565efSmrg negate = !negate;
656110d565efSmrg
656210d565efSmrg p += 1 + *p;
656310d565efSmrg
656410d565efSmrg if (!negate) goto fail;
656510d565efSmrg #undef WORK_BUFFER_SIZE
656610d565efSmrg #endif /* WCHAR */
656710d565efSmrg SET_REGS_MATCHED ();
656810d565efSmrg d++;
656910d565efSmrg break;
657010d565efSmrg }
657110d565efSmrg
657210d565efSmrg
657310d565efSmrg /* The beginning of a group is represented by start_memory.
657410d565efSmrg The arguments are the register number in the next byte, and the
657510d565efSmrg number of groups inner to this one in the next. The text
657610d565efSmrg matched within the group is recorded (in the internal
657710d565efSmrg registers data structure) under the register number. */
657810d565efSmrg case start_memory:
657910d565efSmrg DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
658010d565efSmrg (long int) *p, (long int) p[1]);
658110d565efSmrg
658210d565efSmrg /* Find out if this group can match the empty string. */
658310d565efSmrg p1 = p; /* To send to group_match_null_string_p. */
658410d565efSmrg
658510d565efSmrg if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
658610d565efSmrg REG_MATCH_NULL_STRING_P (reg_info[*p])
658710d565efSmrg = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
658810d565efSmrg
658910d565efSmrg /* Save the position in the string where we were the last time
659010d565efSmrg we were at this open-group operator in case the group is
659110d565efSmrg operated upon by a repetition operator, e.g., with `(a*)*b'
659210d565efSmrg against `ab'; then we want to ignore where we are now in
659310d565efSmrg the string in case this attempt to match fails. */
659410d565efSmrg old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
659510d565efSmrg ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
659610d565efSmrg : regstart[*p];
659710d565efSmrg DEBUG_PRINT2 (" old_regstart: %d\n",
659810d565efSmrg POINTER_TO_OFFSET (old_regstart[*p]));
659910d565efSmrg
660010d565efSmrg regstart[*p] = d;
660110d565efSmrg DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
660210d565efSmrg
660310d565efSmrg IS_ACTIVE (reg_info[*p]) = 1;
660410d565efSmrg MATCHED_SOMETHING (reg_info[*p]) = 0;
660510d565efSmrg
660610d565efSmrg /* Clear this whenever we change the register activity status. */
660710d565efSmrg set_regs_matched_done = 0;
660810d565efSmrg
660910d565efSmrg /* This is the new highest active register. */
661010d565efSmrg highest_active_reg = *p;
661110d565efSmrg
661210d565efSmrg /* If nothing was active before, this is the new lowest active
661310d565efSmrg register. */
661410d565efSmrg if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
661510d565efSmrg lowest_active_reg = *p;
661610d565efSmrg
661710d565efSmrg /* Move past the register number and inner group count. */
661810d565efSmrg p += 2;
661910d565efSmrg just_past_start_mem = p;
662010d565efSmrg
662110d565efSmrg break;
662210d565efSmrg
662310d565efSmrg
662410d565efSmrg /* The stop_memory opcode represents the end of a group. Its
662510d565efSmrg arguments are the same as start_memory's: the register
662610d565efSmrg number, and the number of inner groups. */
662710d565efSmrg case stop_memory:
662810d565efSmrg DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
662910d565efSmrg (long int) *p, (long int) p[1]);
663010d565efSmrg
663110d565efSmrg /* We need to save the string position the last time we were at
663210d565efSmrg this close-group operator in case the group is operated
663310d565efSmrg upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
663410d565efSmrg against `aba'; then we want to ignore where we are now in
663510d565efSmrg the string in case this attempt to match fails. */
663610d565efSmrg old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
663710d565efSmrg ? REG_UNSET (regend[*p]) ? d : regend[*p]
663810d565efSmrg : regend[*p];
663910d565efSmrg DEBUG_PRINT2 (" old_regend: %d\n",
664010d565efSmrg POINTER_TO_OFFSET (old_regend[*p]));
664110d565efSmrg
664210d565efSmrg regend[*p] = d;
664310d565efSmrg DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
664410d565efSmrg
664510d565efSmrg /* This register isn't active anymore. */
664610d565efSmrg IS_ACTIVE (reg_info[*p]) = 0;
664710d565efSmrg
664810d565efSmrg /* Clear this whenever we change the register activity status. */
664910d565efSmrg set_regs_matched_done = 0;
665010d565efSmrg
665110d565efSmrg /* If this was the only register active, nothing is active
665210d565efSmrg anymore. */
665310d565efSmrg if (lowest_active_reg == highest_active_reg)
665410d565efSmrg {
665510d565efSmrg lowest_active_reg = NO_LOWEST_ACTIVE_REG;
665610d565efSmrg highest_active_reg = NO_HIGHEST_ACTIVE_REG;
665710d565efSmrg }
665810d565efSmrg else
665910d565efSmrg { /* We must scan for the new highest active register, since
666010d565efSmrg it isn't necessarily one less than now: consider
666110d565efSmrg (a(b)c(d(e)f)g). When group 3 ends, after the f), the
666210d565efSmrg new highest active register is 1. */
666310d565efSmrg UCHAR_T r = *p - 1;
666410d565efSmrg while (r > 0 && !IS_ACTIVE (reg_info[r]))
666510d565efSmrg r--;
666610d565efSmrg
666710d565efSmrg /* If we end up at register zero, that means that we saved
666810d565efSmrg the registers as the result of an `on_failure_jump', not
666910d565efSmrg a `start_memory', and we jumped to past the innermost
667010d565efSmrg `stop_memory'. For example, in ((.)*) we save
667110d565efSmrg registers 1 and 2 as a result of the *, but when we pop
667210d565efSmrg back to the second ), we are at the stop_memory 1.
667310d565efSmrg Thus, nothing is active. */
667410d565efSmrg if (r == 0)
667510d565efSmrg {
667610d565efSmrg lowest_active_reg = NO_LOWEST_ACTIVE_REG;
667710d565efSmrg highest_active_reg = NO_HIGHEST_ACTIVE_REG;
667810d565efSmrg }
667910d565efSmrg else
668010d565efSmrg highest_active_reg = r;
668110d565efSmrg }
668210d565efSmrg
668310d565efSmrg /* If just failed to match something this time around with a
668410d565efSmrg group that's operated on by a repetition operator, try to
668510d565efSmrg force exit from the ``loop'', and restore the register
668610d565efSmrg information for this group that we had before trying this
668710d565efSmrg last match. */
668810d565efSmrg if ((!MATCHED_SOMETHING (reg_info[*p])
668910d565efSmrg || just_past_start_mem == p - 1)
669010d565efSmrg && (p + 2) < pend)
669110d565efSmrg {
669210d565efSmrg boolean is_a_jump_n = false;
669310d565efSmrg
669410d565efSmrg p1 = p + 2;
669510d565efSmrg mcnt = 0;
669610d565efSmrg switch ((re_opcode_t) *p1++)
669710d565efSmrg {
669810d565efSmrg case jump_n:
669910d565efSmrg is_a_jump_n = true;
670010d565efSmrg /* Fall through. */
670110d565efSmrg case pop_failure_jump:
670210d565efSmrg case maybe_pop_jump:
670310d565efSmrg case jump:
670410d565efSmrg case dummy_failure_jump:
670510d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
670610d565efSmrg if (is_a_jump_n)
670710d565efSmrg p1 += OFFSET_ADDRESS_SIZE;
670810d565efSmrg break;
670910d565efSmrg
671010d565efSmrg default:
671110d565efSmrg /* do nothing */ ;
671210d565efSmrg }
671310d565efSmrg p1 += mcnt;
671410d565efSmrg
671510d565efSmrg /* If the next operation is a jump backwards in the pattern
671610d565efSmrg to an on_failure_jump right before the start_memory
671710d565efSmrg corresponding to this stop_memory, exit from the loop
671810d565efSmrg by forcing a failure after pushing on the stack the
671910d565efSmrg on_failure_jump's jump in the pattern, and d. */
672010d565efSmrg if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
672110d565efSmrg && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
672210d565efSmrg && p1[2+OFFSET_ADDRESS_SIZE] == *p)
672310d565efSmrg {
672410d565efSmrg /* If this group ever matched anything, then restore
672510d565efSmrg what its registers were before trying this last
672610d565efSmrg failed match, e.g., with `(a*)*b' against `ab' for
672710d565efSmrg regstart[1], and, e.g., with `((a*)*(b*)*)*'
672810d565efSmrg against `aba' for regend[3].
672910d565efSmrg
673010d565efSmrg Also restore the registers for inner groups for,
673110d565efSmrg e.g., `((a*)(b*))*' against `aba' (register 3 would
673210d565efSmrg otherwise get trashed). */
673310d565efSmrg
673410d565efSmrg if (EVER_MATCHED_SOMETHING (reg_info[*p]))
673510d565efSmrg {
673610d565efSmrg unsigned r;
673710d565efSmrg
673810d565efSmrg EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
673910d565efSmrg
674010d565efSmrg /* Restore this and inner groups' (if any) registers. */
674110d565efSmrg for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
674210d565efSmrg r++)
674310d565efSmrg {
674410d565efSmrg regstart[r] = old_regstart[r];
674510d565efSmrg
674610d565efSmrg /* xx why this test? */
674710d565efSmrg if (old_regend[r] >= regstart[r])
674810d565efSmrg regend[r] = old_regend[r];
674910d565efSmrg }
675010d565efSmrg }
675110d565efSmrg p1++;
675210d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
675310d565efSmrg PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
675410d565efSmrg
675510d565efSmrg goto fail;
675610d565efSmrg }
675710d565efSmrg }
675810d565efSmrg
675910d565efSmrg /* Move past the register number and the inner group count. */
676010d565efSmrg p += 2;
676110d565efSmrg break;
676210d565efSmrg
676310d565efSmrg
676410d565efSmrg /* \<digit> has been turned into a `duplicate' command which is
676510d565efSmrg followed by the numeric value of <digit> as the register number. */
676610d565efSmrg case duplicate:
676710d565efSmrg {
676810d565efSmrg register const CHAR_T *d2, *dend2;
676910d565efSmrg int regno = *p++; /* Get which register to match against. */
677010d565efSmrg DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
677110d565efSmrg
677210d565efSmrg /* Can't back reference a group which we've never matched. */
677310d565efSmrg if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
677410d565efSmrg goto fail;
677510d565efSmrg
677610d565efSmrg /* Where in input to try to start matching. */
677710d565efSmrg d2 = regstart[regno];
677810d565efSmrg
677910d565efSmrg /* Where to stop matching; if both the place to start and
678010d565efSmrg the place to stop matching are in the same string, then
678110d565efSmrg set to the place to stop, otherwise, for now have to use
678210d565efSmrg the end of the first string. */
678310d565efSmrg
678410d565efSmrg dend2 = ((FIRST_STRING_P (regstart[regno])
678510d565efSmrg == FIRST_STRING_P (regend[regno]))
678610d565efSmrg ? regend[regno] : end_match_1);
678710d565efSmrg for (;;)
678810d565efSmrg {
678910d565efSmrg /* If necessary, advance to next segment in register
679010d565efSmrg contents. */
679110d565efSmrg while (d2 == dend2)
679210d565efSmrg {
679310d565efSmrg if (dend2 == end_match_2) break;
679410d565efSmrg if (dend2 == regend[regno]) break;
679510d565efSmrg
679610d565efSmrg /* End of string1 => advance to string2. */
679710d565efSmrg d2 = string2;
679810d565efSmrg dend2 = regend[regno];
679910d565efSmrg }
680010d565efSmrg /* At end of register contents => success */
680110d565efSmrg if (d2 == dend2) break;
680210d565efSmrg
680310d565efSmrg /* If necessary, advance to next segment in data. */
680410d565efSmrg PREFETCH ();
680510d565efSmrg
680610d565efSmrg /* How many characters left in this segment to match. */
680710d565efSmrg mcnt = dend - d;
680810d565efSmrg
680910d565efSmrg /* Want how many consecutive characters we can match in
681010d565efSmrg one shot, so, if necessary, adjust the count. */
681110d565efSmrg if (mcnt > dend2 - d2)
681210d565efSmrg mcnt = dend2 - d2;
681310d565efSmrg
681410d565efSmrg /* Compare that many; failure if mismatch, else move
681510d565efSmrg past them. */
681610d565efSmrg if (translate
681710d565efSmrg ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
681810d565efSmrg : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
681910d565efSmrg goto fail;
682010d565efSmrg d += mcnt, d2 += mcnt;
682110d565efSmrg
682210d565efSmrg /* Do this because we've match some characters. */
682310d565efSmrg SET_REGS_MATCHED ();
682410d565efSmrg }
682510d565efSmrg }
682610d565efSmrg break;
682710d565efSmrg
682810d565efSmrg
682910d565efSmrg /* begline matches the empty string at the beginning of the string
683010d565efSmrg (unless `not_bol' is set in `bufp'), and, if
683110d565efSmrg `newline_anchor' is set, after newlines. */
683210d565efSmrg case begline:
683310d565efSmrg DEBUG_PRINT1 ("EXECUTING begline.\n");
683410d565efSmrg
683510d565efSmrg if (AT_STRINGS_BEG (d))
683610d565efSmrg {
683710d565efSmrg if (!bufp->not_bol) break;
683810d565efSmrg }
683910d565efSmrg else if (d[-1] == '\n' && bufp->newline_anchor)
684010d565efSmrg {
684110d565efSmrg break;
684210d565efSmrg }
684310d565efSmrg /* In all other cases, we fail. */
684410d565efSmrg goto fail;
684510d565efSmrg
684610d565efSmrg
684710d565efSmrg /* endline is the dual of begline. */
684810d565efSmrg case endline:
684910d565efSmrg DEBUG_PRINT1 ("EXECUTING endline.\n");
685010d565efSmrg
685110d565efSmrg if (AT_STRINGS_END (d))
685210d565efSmrg {
685310d565efSmrg if (!bufp->not_eol) break;
685410d565efSmrg }
685510d565efSmrg
685610d565efSmrg /* We have to ``prefetch'' the next character. */
685710d565efSmrg else if ((d == end1 ? *string2 : *d) == '\n'
685810d565efSmrg && bufp->newline_anchor)
685910d565efSmrg {
686010d565efSmrg break;
686110d565efSmrg }
686210d565efSmrg goto fail;
686310d565efSmrg
686410d565efSmrg
686510d565efSmrg /* Match at the very beginning of the data. */
686610d565efSmrg case begbuf:
686710d565efSmrg DEBUG_PRINT1 ("EXECUTING begbuf.\n");
686810d565efSmrg if (AT_STRINGS_BEG (d))
686910d565efSmrg break;
687010d565efSmrg goto fail;
687110d565efSmrg
687210d565efSmrg
687310d565efSmrg /* Match at the very end of the data. */
687410d565efSmrg case endbuf:
687510d565efSmrg DEBUG_PRINT1 ("EXECUTING endbuf.\n");
687610d565efSmrg if (AT_STRINGS_END (d))
687710d565efSmrg break;
687810d565efSmrg goto fail;
687910d565efSmrg
688010d565efSmrg
688110d565efSmrg /* on_failure_keep_string_jump is used to optimize `.*\n'. It
688210d565efSmrg pushes NULL as the value for the string on the stack. Then
688310d565efSmrg `pop_failure_point' will keep the current value for the
688410d565efSmrg string, instead of restoring it. To see why, consider
688510d565efSmrg matching `foo\nbar' against `.*\n'. The .* matches the foo;
688610d565efSmrg then the . fails against the \n. But the next thing we want
688710d565efSmrg to do is match the \n against the \n; if we restored the
688810d565efSmrg string value, we would be back at the foo.
688910d565efSmrg
689010d565efSmrg Because this is used only in specific cases, we don't need to
689110d565efSmrg check all the things that `on_failure_jump' does, to make
689210d565efSmrg sure the right things get saved on the stack. Hence we don't
689310d565efSmrg share its code. The only reason to push anything on the
689410d565efSmrg stack at all is that otherwise we would have to change
689510d565efSmrg `anychar's code to do something besides goto fail in this
689610d565efSmrg case; that seems worse than this. */
689710d565efSmrg case on_failure_keep_string_jump:
689810d565efSmrg DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
689910d565efSmrg
690010d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p);
690110d565efSmrg #ifdef _LIBC
690210d565efSmrg DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
690310d565efSmrg #else
690410d565efSmrg DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
690510d565efSmrg #endif
690610d565efSmrg
690710d565efSmrg PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
690810d565efSmrg break;
690910d565efSmrg
691010d565efSmrg
691110d565efSmrg /* Uses of on_failure_jump:
691210d565efSmrg
691310d565efSmrg Each alternative starts with an on_failure_jump that points
691410d565efSmrg to the beginning of the next alternative. Each alternative
691510d565efSmrg except the last ends with a jump that in effect jumps past
691610d565efSmrg the rest of the alternatives. (They really jump to the
691710d565efSmrg ending jump of the following alternative, because tensioning
691810d565efSmrg these jumps is a hassle.)
691910d565efSmrg
692010d565efSmrg Repeats start with an on_failure_jump that points past both
692110d565efSmrg the repetition text and either the following jump or
692210d565efSmrg pop_failure_jump back to this on_failure_jump. */
692310d565efSmrg case on_failure_jump:
692410d565efSmrg on_failure:
692510d565efSmrg DEBUG_PRINT1 ("EXECUTING on_failure_jump");
692610d565efSmrg
692710d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p);
692810d565efSmrg #ifdef _LIBC
692910d565efSmrg DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
693010d565efSmrg #else
693110d565efSmrg DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
693210d565efSmrg #endif
693310d565efSmrg
693410d565efSmrg /* If this on_failure_jump comes right before a group (i.e.,
693510d565efSmrg the original * applied to a group), save the information
693610d565efSmrg for that group and all inner ones, so that if we fail back
693710d565efSmrg to this point, the group's information will be correct.
693810d565efSmrg For example, in \(a*\)*\1, we need the preceding group,
693910d565efSmrg and in \(zz\(a*\)b*\)\2, we need the inner group. */
694010d565efSmrg
694110d565efSmrg /* We can't use `p' to check ahead because we push
694210d565efSmrg a failure point to `p + mcnt' after we do this. */
694310d565efSmrg p1 = p;
694410d565efSmrg
694510d565efSmrg /* We need to skip no_op's before we look for the
694610d565efSmrg start_memory in case this on_failure_jump is happening as
694710d565efSmrg the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
694810d565efSmrg against aba. */
694910d565efSmrg while (p1 < pend && (re_opcode_t) *p1 == no_op)
695010d565efSmrg p1++;
695110d565efSmrg
695210d565efSmrg if (p1 < pend && (re_opcode_t) *p1 == start_memory)
695310d565efSmrg {
695410d565efSmrg /* We have a new highest active register now. This will
695510d565efSmrg get reset at the start_memory we are about to get to,
695610d565efSmrg but we will have saved all the registers relevant to
695710d565efSmrg this repetition op, as described above. */
695810d565efSmrg highest_active_reg = *(p1 + 1) + *(p1 + 2);
695910d565efSmrg if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
696010d565efSmrg lowest_active_reg = *(p1 + 1);
696110d565efSmrg }
696210d565efSmrg
696310d565efSmrg DEBUG_PRINT1 (":\n");
696410d565efSmrg PUSH_FAILURE_POINT (p + mcnt, d, -2);
696510d565efSmrg break;
696610d565efSmrg
696710d565efSmrg
696810d565efSmrg /* A smart repeat ends with `maybe_pop_jump'.
696910d565efSmrg We change it to either `pop_failure_jump' or `jump'. */
697010d565efSmrg case maybe_pop_jump:
697110d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p);
697210d565efSmrg DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
697310d565efSmrg {
697410d565efSmrg register UCHAR_T *p2 = p;
697510d565efSmrg
697610d565efSmrg /* Compare the beginning of the repeat with what in the
697710d565efSmrg pattern follows its end. If we can establish that there
697810d565efSmrg is nothing that they would both match, i.e., that we
697910d565efSmrg would have to backtrack because of (as in, e.g., `a*a')
698010d565efSmrg then we can change to pop_failure_jump, because we'll
698110d565efSmrg never have to backtrack.
698210d565efSmrg
698310d565efSmrg This is not true in the case of alternatives: in
698410d565efSmrg `(a|ab)*' we do need to backtrack to the `ab' alternative
698510d565efSmrg (e.g., if the string was `ab'). But instead of trying to
698610d565efSmrg detect that here, the alternative has put on a dummy
698710d565efSmrg failure point which is what we will end up popping. */
698810d565efSmrg
698910d565efSmrg /* Skip over open/close-group commands.
699010d565efSmrg If what follows this loop is a ...+ construct,
699110d565efSmrg look at what begins its body, since we will have to
699210d565efSmrg match at least one of that. */
699310d565efSmrg while (1)
699410d565efSmrg {
699510d565efSmrg if (p2 + 2 < pend
699610d565efSmrg && ((re_opcode_t) *p2 == stop_memory
699710d565efSmrg || (re_opcode_t) *p2 == start_memory))
699810d565efSmrg p2 += 3;
699910d565efSmrg else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
700010d565efSmrg && (re_opcode_t) *p2 == dummy_failure_jump)
700110d565efSmrg p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
700210d565efSmrg else
700310d565efSmrg break;
700410d565efSmrg }
700510d565efSmrg
700610d565efSmrg p1 = p + mcnt;
700710d565efSmrg /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
700810d565efSmrg to the `maybe_finalize_jump' of this case. Examine what
700910d565efSmrg follows. */
701010d565efSmrg
701110d565efSmrg /* If we're at the end of the pattern, we can change. */
701210d565efSmrg if (p2 == pend)
701310d565efSmrg {
701410d565efSmrg /* Consider what happens when matching ":\(.*\)"
701510d565efSmrg against ":/". I don't really understand this code
701610d565efSmrg yet. */
701710d565efSmrg p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
701810d565efSmrg pop_failure_jump;
701910d565efSmrg DEBUG_PRINT1
702010d565efSmrg (" End of pattern: change to `pop_failure_jump'.\n");
702110d565efSmrg }
702210d565efSmrg
702310d565efSmrg else if ((re_opcode_t) *p2 == exactn
702410d565efSmrg #ifdef MBS_SUPPORT
702510d565efSmrg || (re_opcode_t) *p2 == exactn_bin
702610d565efSmrg #endif
702710d565efSmrg || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
702810d565efSmrg {
702910d565efSmrg register UCHAR_T c
703010d565efSmrg = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
703110d565efSmrg
703210d565efSmrg if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
703310d565efSmrg #ifdef MBS_SUPPORT
703410d565efSmrg || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
703510d565efSmrg #endif
703610d565efSmrg ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
703710d565efSmrg {
703810d565efSmrg p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
703910d565efSmrg pop_failure_jump;
704010d565efSmrg #ifdef WCHAR
704110d565efSmrg DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
704210d565efSmrg (wint_t) c,
704310d565efSmrg (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
704410d565efSmrg #else
704510d565efSmrg DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
704610d565efSmrg (char) c,
704710d565efSmrg (char) p1[3+OFFSET_ADDRESS_SIZE]);
704810d565efSmrg #endif
704910d565efSmrg }
705010d565efSmrg
705110d565efSmrg #ifndef WCHAR
705210d565efSmrg else if ((re_opcode_t) p1[3] == charset
705310d565efSmrg || (re_opcode_t) p1[3] == charset_not)
705410d565efSmrg {
705510d565efSmrg int negate = (re_opcode_t) p1[3] == charset_not;
705610d565efSmrg
705710d565efSmrg if (c < (unsigned) (p1[4] * BYTEWIDTH)
705810d565efSmrg && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
705910d565efSmrg negate = !negate;
706010d565efSmrg
706110d565efSmrg /* `negate' is equal to 1 if c would match, which means
706210d565efSmrg that we can't change to pop_failure_jump. */
706310d565efSmrg if (!negate)
706410d565efSmrg {
706510d565efSmrg p[-3] = (unsigned char) pop_failure_jump;
706610d565efSmrg DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
706710d565efSmrg }
706810d565efSmrg }
706910d565efSmrg #endif /* not WCHAR */
707010d565efSmrg }
707110d565efSmrg #ifndef WCHAR
707210d565efSmrg else if ((re_opcode_t) *p2 == charset)
707310d565efSmrg {
707410d565efSmrg /* We win if the first character of the loop is not part
707510d565efSmrg of the charset. */
707610d565efSmrg if ((re_opcode_t) p1[3] == exactn
707710d565efSmrg && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
707810d565efSmrg && (p2[2 + p1[5] / BYTEWIDTH]
707910d565efSmrg & (1 << (p1[5] % BYTEWIDTH)))))
708010d565efSmrg {
708110d565efSmrg p[-3] = (unsigned char) pop_failure_jump;
708210d565efSmrg DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
708310d565efSmrg }
708410d565efSmrg
708510d565efSmrg else if ((re_opcode_t) p1[3] == charset_not)
708610d565efSmrg {
708710d565efSmrg int idx;
708810d565efSmrg /* We win if the charset_not inside the loop
708910d565efSmrg lists every character listed in the charset after. */
709010d565efSmrg for (idx = 0; idx < (int) p2[1]; idx++)
709110d565efSmrg if (! (p2[2 + idx] == 0
709210d565efSmrg || (idx < (int) p1[4]
709310d565efSmrg && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
709410d565efSmrg break;
709510d565efSmrg
709610d565efSmrg if (idx == p2[1])
709710d565efSmrg {
709810d565efSmrg p[-3] = (unsigned char) pop_failure_jump;
709910d565efSmrg DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
710010d565efSmrg }
710110d565efSmrg }
710210d565efSmrg else if ((re_opcode_t) p1[3] == charset)
710310d565efSmrg {
710410d565efSmrg int idx;
710510d565efSmrg /* We win if the charset inside the loop
710610d565efSmrg has no overlap with the one after the loop. */
710710d565efSmrg for (idx = 0;
710810d565efSmrg idx < (int) p2[1] && idx < (int) p1[4];
710910d565efSmrg idx++)
711010d565efSmrg if ((p2[2 + idx] & p1[5 + idx]) != 0)
711110d565efSmrg break;
711210d565efSmrg
711310d565efSmrg if (idx == p2[1] || idx == p1[4])
711410d565efSmrg {
711510d565efSmrg p[-3] = (unsigned char) pop_failure_jump;
711610d565efSmrg DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
711710d565efSmrg }
711810d565efSmrg }
711910d565efSmrg }
712010d565efSmrg #endif /* not WCHAR */
712110d565efSmrg }
712210d565efSmrg p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */
712310d565efSmrg if ((re_opcode_t) p[-1] != pop_failure_jump)
712410d565efSmrg {
712510d565efSmrg p[-1] = (UCHAR_T) jump;
712610d565efSmrg DEBUG_PRINT1 (" Match => jump.\n");
712710d565efSmrg goto unconditional_jump;
712810d565efSmrg }
712910d565efSmrg /* Fall through. */
713010d565efSmrg
713110d565efSmrg
713210d565efSmrg /* The end of a simple repeat has a pop_failure_jump back to
713310d565efSmrg its matching on_failure_jump, where the latter will push a
713410d565efSmrg failure point. The pop_failure_jump takes off failure
713510d565efSmrg points put on by this pop_failure_jump's matching
713610d565efSmrg on_failure_jump; we got through the pattern to here from the
713710d565efSmrg matching on_failure_jump, so didn't fail. */
713810d565efSmrg case pop_failure_jump:
713910d565efSmrg {
714010d565efSmrg /* We need to pass separate storage for the lowest and
714110d565efSmrg highest registers, even though we don't care about the
714210d565efSmrg actual values. Otherwise, we will restore only one
714310d565efSmrg register from the stack, since lowest will == highest in
714410d565efSmrg `pop_failure_point'. */
714510d565efSmrg active_reg_t dummy_low_reg, dummy_high_reg;
714610d565efSmrg UCHAR_T *pdummy ATTRIBUTE_UNUSED = NULL;
714710d565efSmrg const CHAR_T *sdummy ATTRIBUTE_UNUSED = NULL;
714810d565efSmrg
714910d565efSmrg DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
715010d565efSmrg POP_FAILURE_POINT (sdummy, pdummy,
715110d565efSmrg dummy_low_reg, dummy_high_reg,
715210d565efSmrg reg_dummy, reg_dummy, reg_info_dummy);
715310d565efSmrg }
715410d565efSmrg /* Fall through. */
715510d565efSmrg
715610d565efSmrg unconditional_jump:
715710d565efSmrg #ifdef _LIBC
715810d565efSmrg DEBUG_PRINT2 ("\n%p: ", p);
715910d565efSmrg #else
716010d565efSmrg DEBUG_PRINT2 ("\n0x%x: ", p);
716110d565efSmrg #endif
716210d565efSmrg /* Note fall through. */
716310d565efSmrg
716410d565efSmrg /* Unconditionally jump (without popping any failure points). */
716510d565efSmrg case jump:
716610d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
716710d565efSmrg DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
716810d565efSmrg p += mcnt; /* Do the jump. */
716910d565efSmrg #ifdef _LIBC
717010d565efSmrg DEBUG_PRINT2 ("(to %p).\n", p);
717110d565efSmrg #else
717210d565efSmrg DEBUG_PRINT2 ("(to 0x%x).\n", p);
717310d565efSmrg #endif
717410d565efSmrg break;
717510d565efSmrg
717610d565efSmrg
717710d565efSmrg /* We need this opcode so we can detect where alternatives end
717810d565efSmrg in `group_match_null_string_p' et al. */
717910d565efSmrg case jump_past_alt:
718010d565efSmrg DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
718110d565efSmrg goto unconditional_jump;
718210d565efSmrg
718310d565efSmrg
718410d565efSmrg /* Normally, the on_failure_jump pushes a failure point, which
718510d565efSmrg then gets popped at pop_failure_jump. We will end up at
718610d565efSmrg pop_failure_jump, also, and with a pattern of, say, `a+', we
718710d565efSmrg are skipping over the on_failure_jump, so we have to push
718810d565efSmrg something meaningless for pop_failure_jump to pop. */
718910d565efSmrg case dummy_failure_jump:
719010d565efSmrg DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
719110d565efSmrg /* It doesn't matter what we push for the string here. What
719210d565efSmrg the code at `fail' tests is the value for the pattern. */
719310d565efSmrg PUSH_FAILURE_POINT (NULL, NULL, -2);
719410d565efSmrg goto unconditional_jump;
719510d565efSmrg
719610d565efSmrg
719710d565efSmrg /* At the end of an alternative, we need to push a dummy failure
719810d565efSmrg point in case we are followed by a `pop_failure_jump', because
719910d565efSmrg we don't want the failure point for the alternative to be
720010d565efSmrg popped. For example, matching `(a|ab)*' against `aab'
720110d565efSmrg requires that we match the `ab' alternative. */
720210d565efSmrg case push_dummy_failure:
720310d565efSmrg DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
720410d565efSmrg /* See comments just above at `dummy_failure_jump' about the
720510d565efSmrg two zeroes. */
720610d565efSmrg PUSH_FAILURE_POINT (NULL, NULL, -2);
720710d565efSmrg break;
720810d565efSmrg
720910d565efSmrg /* Have to succeed matching what follows at least n times.
721010d565efSmrg After that, handle like `on_failure_jump'. */
721110d565efSmrg case succeed_n:
721210d565efSmrg EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
721310d565efSmrg DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
721410d565efSmrg
721510d565efSmrg assert (mcnt >= 0);
721610d565efSmrg /* Originally, this is how many times we HAVE to succeed. */
721710d565efSmrg if (mcnt > 0)
721810d565efSmrg {
721910d565efSmrg mcnt--;
722010d565efSmrg p += OFFSET_ADDRESS_SIZE;
722110d565efSmrg STORE_NUMBER_AND_INCR (p, mcnt);
722210d565efSmrg #ifdef _LIBC
722310d565efSmrg DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
722410d565efSmrg , mcnt);
722510d565efSmrg #else
722610d565efSmrg DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
722710d565efSmrg , mcnt);
722810d565efSmrg #endif
722910d565efSmrg }
723010d565efSmrg else if (mcnt == 0)
723110d565efSmrg {
723210d565efSmrg #ifdef _LIBC
723310d565efSmrg DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
723410d565efSmrg p + OFFSET_ADDRESS_SIZE);
723510d565efSmrg #else
723610d565efSmrg DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
723710d565efSmrg p + OFFSET_ADDRESS_SIZE);
723810d565efSmrg #endif /* _LIBC */
723910d565efSmrg
724010d565efSmrg #ifdef WCHAR
724110d565efSmrg p[1] = (UCHAR_T) no_op;
724210d565efSmrg #else
724310d565efSmrg p[2] = (UCHAR_T) no_op;
724410d565efSmrg p[3] = (UCHAR_T) no_op;
724510d565efSmrg #endif /* WCHAR */
724610d565efSmrg goto on_failure;
724710d565efSmrg }
724810d565efSmrg break;
724910d565efSmrg
725010d565efSmrg case jump_n:
725110d565efSmrg EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
725210d565efSmrg DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
725310d565efSmrg
725410d565efSmrg /* Originally, this is how many times we CAN jump. */
725510d565efSmrg if (mcnt)
725610d565efSmrg {
725710d565efSmrg mcnt--;
725810d565efSmrg STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
725910d565efSmrg
726010d565efSmrg #ifdef _LIBC
726110d565efSmrg DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
726210d565efSmrg mcnt);
726310d565efSmrg #else
726410d565efSmrg DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
726510d565efSmrg mcnt);
726610d565efSmrg #endif /* _LIBC */
726710d565efSmrg goto unconditional_jump;
726810d565efSmrg }
726910d565efSmrg /* If don't have to jump any more, skip over the rest of command. */
727010d565efSmrg else
727110d565efSmrg p += 2 * OFFSET_ADDRESS_SIZE;
727210d565efSmrg break;
727310d565efSmrg
727410d565efSmrg case set_number_at:
727510d565efSmrg {
727610d565efSmrg DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
727710d565efSmrg
727810d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p);
727910d565efSmrg p1 = p + mcnt;
728010d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p);
728110d565efSmrg #ifdef _LIBC
728210d565efSmrg DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
728310d565efSmrg #else
728410d565efSmrg DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
728510d565efSmrg #endif
728610d565efSmrg STORE_NUMBER (p1, mcnt);
728710d565efSmrg break;
728810d565efSmrg }
728910d565efSmrg
729010d565efSmrg #if 0
729110d565efSmrg /* The DEC Alpha C compiler 3.x generates incorrect code for the
729210d565efSmrg test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
729310d565efSmrg AT_WORD_BOUNDARY, so this code is disabled. Expanding the
729410d565efSmrg macro and introducing temporary variables works around the bug. */
729510d565efSmrg
729610d565efSmrg case wordbound:
729710d565efSmrg DEBUG_PRINT1 ("EXECUTING wordbound.\n");
729810d565efSmrg if (AT_WORD_BOUNDARY (d))
729910d565efSmrg break;
730010d565efSmrg goto fail;
730110d565efSmrg
730210d565efSmrg case notwordbound:
730310d565efSmrg DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
730410d565efSmrg if (AT_WORD_BOUNDARY (d))
730510d565efSmrg goto fail;
730610d565efSmrg break;
730710d565efSmrg #else
730810d565efSmrg case wordbound:
730910d565efSmrg {
731010d565efSmrg boolean prevchar, thischar;
731110d565efSmrg
731210d565efSmrg DEBUG_PRINT1 ("EXECUTING wordbound.\n");
731310d565efSmrg if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
731410d565efSmrg break;
731510d565efSmrg
731610d565efSmrg prevchar = WORDCHAR_P (d - 1);
731710d565efSmrg thischar = WORDCHAR_P (d);
731810d565efSmrg if (prevchar != thischar)
731910d565efSmrg break;
732010d565efSmrg goto fail;
732110d565efSmrg }
732210d565efSmrg
732310d565efSmrg case notwordbound:
732410d565efSmrg {
732510d565efSmrg boolean prevchar, thischar;
732610d565efSmrg
732710d565efSmrg DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
732810d565efSmrg if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
732910d565efSmrg goto fail;
733010d565efSmrg
733110d565efSmrg prevchar = WORDCHAR_P (d - 1);
733210d565efSmrg thischar = WORDCHAR_P (d);
733310d565efSmrg if (prevchar != thischar)
733410d565efSmrg goto fail;
733510d565efSmrg break;
733610d565efSmrg }
733710d565efSmrg #endif
733810d565efSmrg
733910d565efSmrg case wordbeg:
734010d565efSmrg DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
734110d565efSmrg if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
734210d565efSmrg && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
734310d565efSmrg break;
734410d565efSmrg goto fail;
734510d565efSmrg
734610d565efSmrg case wordend:
734710d565efSmrg DEBUG_PRINT1 ("EXECUTING wordend.\n");
734810d565efSmrg if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
734910d565efSmrg && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
735010d565efSmrg break;
735110d565efSmrg goto fail;
735210d565efSmrg
735310d565efSmrg #ifdef emacs
735410d565efSmrg case before_dot:
735510d565efSmrg DEBUG_PRINT1 ("EXECUTING before_dot.\n");
735610d565efSmrg if (PTR_CHAR_POS ((unsigned char *) d) >= point)
735710d565efSmrg goto fail;
735810d565efSmrg break;
735910d565efSmrg
736010d565efSmrg case at_dot:
736110d565efSmrg DEBUG_PRINT1 ("EXECUTING at_dot.\n");
736210d565efSmrg if (PTR_CHAR_POS ((unsigned char *) d) != point)
736310d565efSmrg goto fail;
736410d565efSmrg break;
736510d565efSmrg
736610d565efSmrg case after_dot:
736710d565efSmrg DEBUG_PRINT1 ("EXECUTING after_dot.\n");
736810d565efSmrg if (PTR_CHAR_POS ((unsigned char *) d) <= point)
736910d565efSmrg goto fail;
737010d565efSmrg break;
737110d565efSmrg
737210d565efSmrg case syntaxspec:
737310d565efSmrg DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
737410d565efSmrg mcnt = *p++;
737510d565efSmrg goto matchsyntax;
737610d565efSmrg
737710d565efSmrg case wordchar:
737810d565efSmrg DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
737910d565efSmrg mcnt = (int) Sword;
738010d565efSmrg matchsyntax:
738110d565efSmrg PREFETCH ();
738210d565efSmrg /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
738310d565efSmrg d++;
738410d565efSmrg if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
738510d565efSmrg goto fail;
738610d565efSmrg SET_REGS_MATCHED ();
738710d565efSmrg break;
738810d565efSmrg
738910d565efSmrg case notsyntaxspec:
739010d565efSmrg DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
739110d565efSmrg mcnt = *p++;
739210d565efSmrg goto matchnotsyntax;
739310d565efSmrg
739410d565efSmrg case notwordchar:
739510d565efSmrg DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
739610d565efSmrg mcnt = (int) Sword;
739710d565efSmrg matchnotsyntax:
739810d565efSmrg PREFETCH ();
739910d565efSmrg /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
740010d565efSmrg d++;
740110d565efSmrg if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
740210d565efSmrg goto fail;
740310d565efSmrg SET_REGS_MATCHED ();
740410d565efSmrg break;
740510d565efSmrg
740610d565efSmrg #else /* not emacs */
740710d565efSmrg case wordchar:
740810d565efSmrg DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
740910d565efSmrg PREFETCH ();
741010d565efSmrg if (!WORDCHAR_P (d))
741110d565efSmrg goto fail;
741210d565efSmrg SET_REGS_MATCHED ();
741310d565efSmrg d++;
741410d565efSmrg break;
741510d565efSmrg
741610d565efSmrg case notwordchar:
741710d565efSmrg DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
741810d565efSmrg PREFETCH ();
741910d565efSmrg if (WORDCHAR_P (d))
742010d565efSmrg goto fail;
742110d565efSmrg SET_REGS_MATCHED ();
742210d565efSmrg d++;
742310d565efSmrg break;
742410d565efSmrg #endif /* not emacs */
742510d565efSmrg
742610d565efSmrg default:
742710d565efSmrg abort ();
742810d565efSmrg }
742910d565efSmrg continue; /* Successfully executed one pattern command; keep going. */
743010d565efSmrg
743110d565efSmrg
743210d565efSmrg /* We goto here if a matching operation fails. */
743310d565efSmrg fail:
743410d565efSmrg if (!FAIL_STACK_EMPTY ())
743510d565efSmrg { /* A restart point is known. Restore to that state. */
743610d565efSmrg DEBUG_PRINT1 ("\nFAIL:\n");
743710d565efSmrg POP_FAILURE_POINT (d, p,
743810d565efSmrg lowest_active_reg, highest_active_reg,
743910d565efSmrg regstart, regend, reg_info);
744010d565efSmrg
744110d565efSmrg /* If this failure point is a dummy, try the next one. */
744210d565efSmrg if (!p)
744310d565efSmrg goto fail;
744410d565efSmrg
744510d565efSmrg /* If we failed to the end of the pattern, don't examine *p. */
744610d565efSmrg assert (p <= pend);
744710d565efSmrg if (p < pend)
744810d565efSmrg {
744910d565efSmrg boolean is_a_jump_n = false;
745010d565efSmrg
745110d565efSmrg /* If failed to a backwards jump that's part of a repetition
745210d565efSmrg loop, need to pop this failure point and use the next one. */
745310d565efSmrg switch ((re_opcode_t) *p)
745410d565efSmrg {
745510d565efSmrg case jump_n:
745610d565efSmrg is_a_jump_n = true;
745710d565efSmrg /* Fall through. */
745810d565efSmrg case maybe_pop_jump:
745910d565efSmrg case pop_failure_jump:
746010d565efSmrg case jump:
746110d565efSmrg p1 = p + 1;
746210d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
746310d565efSmrg p1 += mcnt;
746410d565efSmrg
746510d565efSmrg if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
746610d565efSmrg || (!is_a_jump_n
746710d565efSmrg && (re_opcode_t) *p1 == on_failure_jump))
746810d565efSmrg goto fail;
746910d565efSmrg break;
747010d565efSmrg default:
747110d565efSmrg /* do nothing */ ;
747210d565efSmrg }
747310d565efSmrg }
747410d565efSmrg
747510d565efSmrg if (d >= string1 && d <= end1)
747610d565efSmrg dend = end_match_1;
747710d565efSmrg }
747810d565efSmrg else
747910d565efSmrg break; /* Matching at this starting point really fails. */
748010d565efSmrg } /* for (;;) */
748110d565efSmrg
748210d565efSmrg if (best_regs_set)
748310d565efSmrg goto restore_best_regs;
748410d565efSmrg
748510d565efSmrg FREE_VARIABLES ();
748610d565efSmrg
748710d565efSmrg return -1; /* Failure to match. */
748810d565efSmrg } /* re_match_2 */
748910d565efSmrg
749010d565efSmrg /* Subroutine definitions for re_match_2. */
749110d565efSmrg
749210d565efSmrg
749310d565efSmrg /* We are passed P pointing to a register number after a start_memory.
749410d565efSmrg
749510d565efSmrg Return true if the pattern up to the corresponding stop_memory can
749610d565efSmrg match the empty string, and false otherwise.
749710d565efSmrg
749810d565efSmrg If we find the matching stop_memory, sets P to point to one past its number.
749910d565efSmrg Otherwise, sets P to an undefined byte less than or equal to END.
750010d565efSmrg
750110d565efSmrg We don't handle duplicates properly (yet). */
750210d565efSmrg
750310d565efSmrg static boolean
PREFIX(group_match_null_string_p)750410d565efSmrg PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
750510d565efSmrg PREFIX(register_info_type) *reg_info)
750610d565efSmrg {
750710d565efSmrg int mcnt;
750810d565efSmrg /* Point to after the args to the start_memory. */
750910d565efSmrg UCHAR_T *p1 = *p + 2;
751010d565efSmrg
751110d565efSmrg while (p1 < end)
751210d565efSmrg {
751310d565efSmrg /* Skip over opcodes that can match nothing, and return true or
751410d565efSmrg false, as appropriate, when we get to one that can't, or to the
751510d565efSmrg matching stop_memory. */
751610d565efSmrg
751710d565efSmrg switch ((re_opcode_t) *p1)
751810d565efSmrg {
751910d565efSmrg /* Could be either a loop or a series of alternatives. */
752010d565efSmrg case on_failure_jump:
752110d565efSmrg p1++;
752210d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
752310d565efSmrg
752410d565efSmrg /* If the next operation is not a jump backwards in the
752510d565efSmrg pattern. */
752610d565efSmrg
752710d565efSmrg if (mcnt >= 0)
752810d565efSmrg {
752910d565efSmrg /* Go through the on_failure_jumps of the alternatives,
753010d565efSmrg seeing if any of the alternatives cannot match nothing.
753110d565efSmrg The last alternative starts with only a jump,
753210d565efSmrg whereas the rest start with on_failure_jump and end
753310d565efSmrg with a jump, e.g., here is the pattern for `a|b|c':
753410d565efSmrg
753510d565efSmrg /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
753610d565efSmrg /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
753710d565efSmrg /exactn/1/c
753810d565efSmrg
753910d565efSmrg So, we have to first go through the first (n-1)
754010d565efSmrg alternatives and then deal with the last one separately. */
754110d565efSmrg
754210d565efSmrg
754310d565efSmrg /* Deal with the first (n-1) alternatives, which start
754410d565efSmrg with an on_failure_jump (see above) that jumps to right
754510d565efSmrg past a jump_past_alt. */
754610d565efSmrg
754710d565efSmrg while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
754810d565efSmrg jump_past_alt)
754910d565efSmrg {
755010d565efSmrg /* `mcnt' holds how many bytes long the alternative
755110d565efSmrg is, including the ending `jump_past_alt' and
755210d565efSmrg its number. */
755310d565efSmrg
755410d565efSmrg if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
755510d565efSmrg (1 + OFFSET_ADDRESS_SIZE),
755610d565efSmrg reg_info))
755710d565efSmrg return false;
755810d565efSmrg
755910d565efSmrg /* Move to right after this alternative, including the
756010d565efSmrg jump_past_alt. */
756110d565efSmrg p1 += mcnt;
756210d565efSmrg
756310d565efSmrg /* Break if it's the beginning of an n-th alternative
756410d565efSmrg that doesn't begin with an on_failure_jump. */
756510d565efSmrg if ((re_opcode_t) *p1 != on_failure_jump)
756610d565efSmrg break;
756710d565efSmrg
756810d565efSmrg /* Still have to check that it's not an n-th
756910d565efSmrg alternative that starts with an on_failure_jump. */
757010d565efSmrg p1++;
757110d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
757210d565efSmrg if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
757310d565efSmrg jump_past_alt)
757410d565efSmrg {
757510d565efSmrg /* Get to the beginning of the n-th alternative. */
757610d565efSmrg p1 -= 1 + OFFSET_ADDRESS_SIZE;
757710d565efSmrg break;
757810d565efSmrg }
757910d565efSmrg }
758010d565efSmrg
758110d565efSmrg /* Deal with the last alternative: go back and get number
758210d565efSmrg of the `jump_past_alt' just before it. `mcnt' contains
758310d565efSmrg the length of the alternative. */
758410d565efSmrg EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
758510d565efSmrg
758610d565efSmrg if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
758710d565efSmrg return false;
758810d565efSmrg
758910d565efSmrg p1 += mcnt; /* Get past the n-th alternative. */
759010d565efSmrg } /* if mcnt > 0 */
759110d565efSmrg break;
759210d565efSmrg
759310d565efSmrg
759410d565efSmrg case stop_memory:
759510d565efSmrg assert (p1[1] == **p);
759610d565efSmrg *p = p1 + 2;
759710d565efSmrg return true;
759810d565efSmrg
759910d565efSmrg
760010d565efSmrg default:
760110d565efSmrg if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
760210d565efSmrg return false;
760310d565efSmrg }
760410d565efSmrg } /* while p1 < end */
760510d565efSmrg
760610d565efSmrg return false;
760710d565efSmrg } /* group_match_null_string_p */
760810d565efSmrg
760910d565efSmrg
761010d565efSmrg /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
761110d565efSmrg It expects P to be the first byte of a single alternative and END one
761210d565efSmrg byte past the last. The alternative can contain groups. */
761310d565efSmrg
761410d565efSmrg static boolean
PREFIX(alt_match_null_string_p)761510d565efSmrg PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
761610d565efSmrg PREFIX(register_info_type) *reg_info)
761710d565efSmrg {
761810d565efSmrg int mcnt;
761910d565efSmrg UCHAR_T *p1 = p;
762010d565efSmrg
762110d565efSmrg while (p1 < end)
762210d565efSmrg {
762310d565efSmrg /* Skip over opcodes that can match nothing, and break when we get
762410d565efSmrg to one that can't. */
762510d565efSmrg
762610d565efSmrg switch ((re_opcode_t) *p1)
762710d565efSmrg {
762810d565efSmrg /* It's a loop. */
762910d565efSmrg case on_failure_jump:
763010d565efSmrg p1++;
763110d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
763210d565efSmrg p1 += mcnt;
763310d565efSmrg break;
763410d565efSmrg
763510d565efSmrg default:
763610d565efSmrg if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
763710d565efSmrg return false;
763810d565efSmrg }
763910d565efSmrg } /* while p1 < end */
764010d565efSmrg
764110d565efSmrg return true;
764210d565efSmrg } /* alt_match_null_string_p */
764310d565efSmrg
764410d565efSmrg
764510d565efSmrg /* Deals with the ops common to group_match_null_string_p and
764610d565efSmrg alt_match_null_string_p.
764710d565efSmrg
764810d565efSmrg Sets P to one after the op and its arguments, if any. */
764910d565efSmrg
765010d565efSmrg static boolean
PREFIX(common_op_match_null_string_p)765110d565efSmrg PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
765210d565efSmrg PREFIX(register_info_type) *reg_info)
765310d565efSmrg {
765410d565efSmrg int mcnt;
765510d565efSmrg boolean ret;
765610d565efSmrg int reg_no;
765710d565efSmrg UCHAR_T *p1 = *p;
765810d565efSmrg
765910d565efSmrg switch ((re_opcode_t) *p1++)
766010d565efSmrg {
766110d565efSmrg case no_op:
766210d565efSmrg case begline:
766310d565efSmrg case endline:
766410d565efSmrg case begbuf:
766510d565efSmrg case endbuf:
766610d565efSmrg case wordbeg:
766710d565efSmrg case wordend:
766810d565efSmrg case wordbound:
766910d565efSmrg case notwordbound:
767010d565efSmrg #ifdef emacs
767110d565efSmrg case before_dot:
767210d565efSmrg case at_dot:
767310d565efSmrg case after_dot:
767410d565efSmrg #endif
767510d565efSmrg break;
767610d565efSmrg
767710d565efSmrg case start_memory:
767810d565efSmrg reg_no = *p1;
767910d565efSmrg assert (reg_no > 0 && reg_no <= MAX_REGNUM);
768010d565efSmrg ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
768110d565efSmrg
768210d565efSmrg /* Have to set this here in case we're checking a group which
768310d565efSmrg contains a group and a back reference to it. */
768410d565efSmrg
768510d565efSmrg if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
768610d565efSmrg REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
768710d565efSmrg
768810d565efSmrg if (!ret)
768910d565efSmrg return false;
769010d565efSmrg break;
769110d565efSmrg
769210d565efSmrg /* If this is an optimized succeed_n for zero times, make the jump. */
769310d565efSmrg case jump:
769410d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
769510d565efSmrg if (mcnt >= 0)
769610d565efSmrg p1 += mcnt;
769710d565efSmrg else
769810d565efSmrg return false;
769910d565efSmrg break;
770010d565efSmrg
770110d565efSmrg case succeed_n:
770210d565efSmrg /* Get to the number of times to succeed. */
770310d565efSmrg p1 += OFFSET_ADDRESS_SIZE;
770410d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
770510d565efSmrg
770610d565efSmrg if (mcnt == 0)
770710d565efSmrg {
770810d565efSmrg p1 -= 2 * OFFSET_ADDRESS_SIZE;
770910d565efSmrg EXTRACT_NUMBER_AND_INCR (mcnt, p1);
771010d565efSmrg p1 += mcnt;
771110d565efSmrg }
771210d565efSmrg else
771310d565efSmrg return false;
771410d565efSmrg break;
771510d565efSmrg
771610d565efSmrg case duplicate:
771710d565efSmrg if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
771810d565efSmrg return false;
771910d565efSmrg break;
772010d565efSmrg
772110d565efSmrg case set_number_at:
772210d565efSmrg p1 += 2 * OFFSET_ADDRESS_SIZE;
772310d565efSmrg return false;
772410d565efSmrg
772510d565efSmrg default:
772610d565efSmrg /* All other opcodes mean we cannot match the empty string. */
772710d565efSmrg return false;
772810d565efSmrg }
772910d565efSmrg
773010d565efSmrg *p = p1;
773110d565efSmrg return true;
773210d565efSmrg } /* common_op_match_null_string_p */
773310d565efSmrg
773410d565efSmrg
773510d565efSmrg /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
773610d565efSmrg bytes; nonzero otherwise. */
773710d565efSmrg
773810d565efSmrg static int
PREFIX(bcmp_translate)773910d565efSmrg PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
774010d565efSmrg RE_TRANSLATE_TYPE translate)
774110d565efSmrg {
774210d565efSmrg register const UCHAR_T *p1 = (const UCHAR_T *) s1;
774310d565efSmrg register const UCHAR_T *p2 = (const UCHAR_T *) s2;
774410d565efSmrg while (len)
774510d565efSmrg {
774610d565efSmrg #ifdef WCHAR
774710d565efSmrg if (((*p1<=0xff)?translate[*p1++]:*p1++)
774810d565efSmrg != ((*p2<=0xff)?translate[*p2++]:*p2++))
774910d565efSmrg return 1;
775010d565efSmrg #else /* BYTE */
775110d565efSmrg if (translate[*p1++] != translate[*p2++]) return 1;
775210d565efSmrg #endif /* WCHAR */
775310d565efSmrg len--;
775410d565efSmrg }
775510d565efSmrg return 0;
775610d565efSmrg }
775710d565efSmrg
775810d565efSmrg
775910d565efSmrg #else /* not INSIDE_RECURSION */
776010d565efSmrg
776110d565efSmrg /* Entry points for GNU code. */
776210d565efSmrg
776310d565efSmrg /* re_compile_pattern is the GNU regular expression compiler: it
776410d565efSmrg compiles PATTERN (of length SIZE) and puts the result in BUFP.
776510d565efSmrg Returns 0 if the pattern was valid, otherwise an error string.
776610d565efSmrg
776710d565efSmrg Assumes the `allocated' (and perhaps `buffer') and `translate' fields
776810d565efSmrg are set in BUFP on entry.
776910d565efSmrg
777010d565efSmrg We call regex_compile to do the actual compilation. */
777110d565efSmrg
777210d565efSmrg const char *
777310d565efSmrg re_compile_pattern (const char *pattern, size_t length,
777410d565efSmrg struct re_pattern_buffer *bufp)
777510d565efSmrg {
777610d565efSmrg reg_errcode_t ret;
777710d565efSmrg
777810d565efSmrg /* GNU code is written to assume at least RE_NREGS registers will be set
777910d565efSmrg (and at least one extra will be -1). */
778010d565efSmrg bufp->regs_allocated = REGS_UNALLOCATED;
778110d565efSmrg
778210d565efSmrg /* And GNU code determines whether or not to get register information
778310d565efSmrg by passing null for the REGS argument to re_match, etc., not by
778410d565efSmrg setting no_sub. */
778510d565efSmrg bufp->no_sub = 0;
778610d565efSmrg
778710d565efSmrg /* Match anchors at newline. */
778810d565efSmrg bufp->newline_anchor = 1;
778910d565efSmrg
779010d565efSmrg # ifdef MBS_SUPPORT
779110d565efSmrg if (MB_CUR_MAX != 1)
779210d565efSmrg ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
779310d565efSmrg else
779410d565efSmrg # endif
779510d565efSmrg ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
779610d565efSmrg
779710d565efSmrg if (!ret)
779810d565efSmrg return NULL;
779910d565efSmrg return gettext (re_error_msgid[(int) ret]);
780010d565efSmrg }
780110d565efSmrg #ifdef _LIBC
780210d565efSmrg weak_alias (__re_compile_pattern, re_compile_pattern)
780310d565efSmrg #endif
780410d565efSmrg
780510d565efSmrg /* Entry points compatible with 4.2 BSD regex library. We don't define
780610d565efSmrg them unless specifically requested. */
780710d565efSmrg
780810d565efSmrg #if defined _REGEX_RE_COMP || defined _LIBC
780910d565efSmrg
781010d565efSmrg /* BSD has one and only one pattern buffer. */
781110d565efSmrg static struct re_pattern_buffer re_comp_buf;
781210d565efSmrg
781310d565efSmrg char *
781410d565efSmrg #ifdef _LIBC
781510d565efSmrg /* Make these definitions weak in libc, so POSIX programs can redefine
781610d565efSmrg these names if they don't use our functions, and still use
781710d565efSmrg regcomp/regexec below without link errors. */
781810d565efSmrg weak_function
781910d565efSmrg #endif
782010d565efSmrg re_comp (const char *s)
782110d565efSmrg {
782210d565efSmrg reg_errcode_t ret;
782310d565efSmrg
782410d565efSmrg if (!s)
782510d565efSmrg {
782610d565efSmrg if (!re_comp_buf.buffer)
782710d565efSmrg return (char *) gettext ("No previous regular expression");
782810d565efSmrg return 0;
782910d565efSmrg }
783010d565efSmrg
783110d565efSmrg if (!re_comp_buf.buffer)
783210d565efSmrg {
783310d565efSmrg re_comp_buf.buffer = (unsigned char *) malloc (200);
783410d565efSmrg if (re_comp_buf.buffer == NULL)
783510d565efSmrg return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
783610d565efSmrg re_comp_buf.allocated = 200;
783710d565efSmrg
783810d565efSmrg re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
783910d565efSmrg if (re_comp_buf.fastmap == NULL)
784010d565efSmrg return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
784110d565efSmrg }
784210d565efSmrg
784310d565efSmrg /* Since `re_exec' always passes NULL for the `regs' argument, we
784410d565efSmrg don't need to initialize the pattern buffer fields which affect it. */
784510d565efSmrg
784610d565efSmrg /* Match anchors at newlines. */
784710d565efSmrg re_comp_buf.newline_anchor = 1;
784810d565efSmrg
784910d565efSmrg # ifdef MBS_SUPPORT
785010d565efSmrg if (MB_CUR_MAX != 1)
785110d565efSmrg ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
785210d565efSmrg else
785310d565efSmrg # endif
785410d565efSmrg ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
785510d565efSmrg
785610d565efSmrg if (!ret)
785710d565efSmrg return NULL;
785810d565efSmrg
785910d565efSmrg /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
786010d565efSmrg return (char *) gettext (re_error_msgid[(int) ret]);
786110d565efSmrg }
786210d565efSmrg
786310d565efSmrg
786410d565efSmrg int
786510d565efSmrg #ifdef _LIBC
786610d565efSmrg weak_function
786710d565efSmrg #endif
786810d565efSmrg re_exec (const char *s)
786910d565efSmrg {
787010d565efSmrg const int len = strlen (s);
787110d565efSmrg return
787210d565efSmrg 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
787310d565efSmrg }
787410d565efSmrg
787510d565efSmrg #endif /* _REGEX_RE_COMP */
787610d565efSmrg
787710d565efSmrg /* POSIX.2 functions. Don't define these for Emacs. */
787810d565efSmrg
787910d565efSmrg #ifndef emacs
788010d565efSmrg
788110d565efSmrg /* regcomp takes a regular expression as a string and compiles it.
788210d565efSmrg
788310d565efSmrg PREG is a regex_t *. We do not expect any fields to be initialized,
788410d565efSmrg since POSIX says we shouldn't. Thus, we set
788510d565efSmrg
788610d565efSmrg `buffer' to the compiled pattern;
788710d565efSmrg `used' to the length of the compiled pattern;
788810d565efSmrg `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
788910d565efSmrg REG_EXTENDED bit in CFLAGS is set; otherwise, to
789010d565efSmrg RE_SYNTAX_POSIX_BASIC;
789110d565efSmrg `newline_anchor' to REG_NEWLINE being set in CFLAGS;
789210d565efSmrg `fastmap' to an allocated space for the fastmap;
789310d565efSmrg `fastmap_accurate' to zero;
789410d565efSmrg `re_nsub' to the number of subexpressions in PATTERN.
789510d565efSmrg
789610d565efSmrg PATTERN is the address of the pattern string.
789710d565efSmrg
789810d565efSmrg CFLAGS is a series of bits which affect compilation.
789910d565efSmrg
790010d565efSmrg If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
790110d565efSmrg use POSIX basic syntax.
790210d565efSmrg
790310d565efSmrg If REG_NEWLINE is set, then . and [^...] don't match newline.
790410d565efSmrg Also, regexec will try a match beginning after every newline.
790510d565efSmrg
790610d565efSmrg If REG_ICASE is set, then we considers upper- and lowercase
790710d565efSmrg versions of letters to be equivalent when matching.
790810d565efSmrg
790910d565efSmrg If REG_NOSUB is set, then when PREG is passed to regexec, that
791010d565efSmrg routine will report only success or failure, and nothing about the
791110d565efSmrg registers.
791210d565efSmrg
791310d565efSmrg It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
791410d565efSmrg the return codes and their meanings.) */
791510d565efSmrg
791610d565efSmrg int
791710d565efSmrg regcomp (regex_t *preg, const char *pattern, int cflags)
791810d565efSmrg {
791910d565efSmrg reg_errcode_t ret;
792010d565efSmrg reg_syntax_t syntax
792110d565efSmrg = (cflags & REG_EXTENDED) ?
792210d565efSmrg RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
792310d565efSmrg
792410d565efSmrg /* regex_compile will allocate the space for the compiled pattern. */
792510d565efSmrg preg->buffer = 0;
792610d565efSmrg preg->allocated = 0;
792710d565efSmrg preg->used = 0;
792810d565efSmrg
792910d565efSmrg /* Try to allocate space for the fastmap. */
793010d565efSmrg preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
793110d565efSmrg
793210d565efSmrg if (cflags & REG_ICASE)
793310d565efSmrg {
793410d565efSmrg int i;
793510d565efSmrg
793610d565efSmrg preg->translate
793710d565efSmrg = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
793810d565efSmrg * sizeof (*(RE_TRANSLATE_TYPE)0));
793910d565efSmrg if (preg->translate == NULL)
794010d565efSmrg return (int) REG_ESPACE;
794110d565efSmrg
794210d565efSmrg /* Map uppercase characters to corresponding lowercase ones. */
794310d565efSmrg for (i = 0; i < CHAR_SET_SIZE; i++)
794410d565efSmrg preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
794510d565efSmrg }
794610d565efSmrg else
794710d565efSmrg preg->translate = NULL;
794810d565efSmrg
794910d565efSmrg /* If REG_NEWLINE is set, newlines are treated differently. */
795010d565efSmrg if (cflags & REG_NEWLINE)
795110d565efSmrg { /* REG_NEWLINE implies neither . nor [^...] match newline. */
795210d565efSmrg syntax &= ~RE_DOT_NEWLINE;
795310d565efSmrg syntax |= RE_HAT_LISTS_NOT_NEWLINE;
795410d565efSmrg /* It also changes the matching behavior. */
795510d565efSmrg preg->newline_anchor = 1;
795610d565efSmrg }
795710d565efSmrg else
795810d565efSmrg preg->newline_anchor = 0;
795910d565efSmrg
796010d565efSmrg preg->no_sub = !!(cflags & REG_NOSUB);
796110d565efSmrg
796210d565efSmrg /* POSIX says a null character in the pattern terminates it, so we
796310d565efSmrg can use strlen here in compiling the pattern. */
796410d565efSmrg # ifdef MBS_SUPPORT
796510d565efSmrg if (MB_CUR_MAX != 1)
796610d565efSmrg ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
796710d565efSmrg else
796810d565efSmrg # endif
796910d565efSmrg ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
797010d565efSmrg
797110d565efSmrg /* POSIX doesn't distinguish between an unmatched open-group and an
797210d565efSmrg unmatched close-group: both are REG_EPAREN. */
797310d565efSmrg if (ret == REG_ERPAREN) ret = REG_EPAREN;
797410d565efSmrg
797510d565efSmrg if (ret == REG_NOERROR && preg->fastmap)
797610d565efSmrg {
797710d565efSmrg /* Compute the fastmap now, since regexec cannot modify the pattern
797810d565efSmrg buffer. */
797910d565efSmrg if (re_compile_fastmap (preg) == -2)
798010d565efSmrg {
798110d565efSmrg /* Some error occurred while computing the fastmap, just forget
798210d565efSmrg about it. */
798310d565efSmrg free (preg->fastmap);
798410d565efSmrg preg->fastmap = NULL;
798510d565efSmrg }
798610d565efSmrg }
798710d565efSmrg
798810d565efSmrg return (int) ret;
798910d565efSmrg }
799010d565efSmrg #ifdef _LIBC
799110d565efSmrg weak_alias (__regcomp, regcomp)
799210d565efSmrg #endif
799310d565efSmrg
799410d565efSmrg
799510d565efSmrg /* regexec searches for a given pattern, specified by PREG, in the
799610d565efSmrg string STRING.
799710d565efSmrg
799810d565efSmrg If NMATCH is zero or REG_NOSUB was set in the cflags argument to
799910d565efSmrg `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
800010d565efSmrg least NMATCH elements, and we set them to the offsets of the
800110d565efSmrg corresponding matched substrings.
800210d565efSmrg
800310d565efSmrg EFLAGS specifies `execution flags' which affect matching: if
800410d565efSmrg REG_NOTBOL is set, then ^ does not match at the beginning of the
800510d565efSmrg string; if REG_NOTEOL is set, then $ does not match at the end.
800610d565efSmrg
800710d565efSmrg We return 0 if we find a match and REG_NOMATCH if not. */
800810d565efSmrg
800910d565efSmrg int
801010d565efSmrg regexec (const regex_t *preg, const char *string, size_t nmatch,
801110d565efSmrg regmatch_t pmatch[], int eflags)
801210d565efSmrg {
801310d565efSmrg int ret;
801410d565efSmrg struct re_registers regs;
801510d565efSmrg regex_t private_preg;
801610d565efSmrg int len = strlen (string);
801710d565efSmrg boolean want_reg_info = !preg->no_sub && nmatch > 0;
801810d565efSmrg
801910d565efSmrg private_preg = *preg;
802010d565efSmrg
802110d565efSmrg private_preg.not_bol = !!(eflags & REG_NOTBOL);
802210d565efSmrg private_preg.not_eol = !!(eflags & REG_NOTEOL);
802310d565efSmrg
802410d565efSmrg /* The user has told us exactly how many registers to return
802510d565efSmrg information about, via `nmatch'. We have to pass that on to the
802610d565efSmrg matching routines. */
802710d565efSmrg private_preg.regs_allocated = REGS_FIXED;
802810d565efSmrg
802910d565efSmrg if (want_reg_info)
803010d565efSmrg {
803110d565efSmrg regs.num_regs = nmatch;
803210d565efSmrg regs.start = TALLOC (nmatch * 2, regoff_t);
803310d565efSmrg if (regs.start == NULL)
803410d565efSmrg return (int) REG_NOMATCH;
803510d565efSmrg regs.end = regs.start + nmatch;
803610d565efSmrg }
803710d565efSmrg
803810d565efSmrg /* Perform the searching operation. */
803910d565efSmrg ret = re_search (&private_preg, string, len,
804010d565efSmrg /* start: */ 0, /* range: */ len,
804110d565efSmrg want_reg_info ? ®s : (struct re_registers *) 0);
804210d565efSmrg
804310d565efSmrg /* Copy the register information to the POSIX structure. */
804410d565efSmrg if (want_reg_info)
804510d565efSmrg {
804610d565efSmrg if (ret >= 0)
804710d565efSmrg {
804810d565efSmrg unsigned r;
804910d565efSmrg
805010d565efSmrg for (r = 0; r < nmatch; r++)
805110d565efSmrg {
805210d565efSmrg pmatch[r].rm_so = regs.start[r];
805310d565efSmrg pmatch[r].rm_eo = regs.end[r];
805410d565efSmrg }
805510d565efSmrg }
805610d565efSmrg
805710d565efSmrg /* If we needed the temporary register info, free the space now. */
805810d565efSmrg free (regs.start);
805910d565efSmrg }
806010d565efSmrg
806110d565efSmrg /* We want zero return to mean success, unlike `re_search'. */
806210d565efSmrg return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
806310d565efSmrg }
806410d565efSmrg #ifdef _LIBC
806510d565efSmrg weak_alias (__regexec, regexec)
806610d565efSmrg #endif
806710d565efSmrg
806810d565efSmrg
806910d565efSmrg /* Returns a message corresponding to an error code, ERRCODE, returned
807010d565efSmrg from either regcomp or regexec. We don't use PREG here. */
807110d565efSmrg
807210d565efSmrg size_t
807310d565efSmrg regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
807410d565efSmrg char *errbuf, size_t errbuf_size)
807510d565efSmrg {
807610d565efSmrg const char *msg;
807710d565efSmrg size_t msg_size;
807810d565efSmrg
807910d565efSmrg if (errcode < 0
808010d565efSmrg || errcode >= (int) (sizeof (re_error_msgid)
808110d565efSmrg / sizeof (re_error_msgid[0])))
808210d565efSmrg /* Only error codes returned by the rest of the code should be passed
808310d565efSmrg to this routine. If we are given anything else, or if other regex
808410d565efSmrg code generates an invalid error code, then the program has a bug.
808510d565efSmrg Dump core so we can fix it. */
808610d565efSmrg abort ();
808710d565efSmrg
808810d565efSmrg msg = gettext (re_error_msgid[errcode]);
808910d565efSmrg
809010d565efSmrg msg_size = strlen (msg) + 1; /* Includes the null. */
809110d565efSmrg
809210d565efSmrg if (errbuf_size != 0)
809310d565efSmrg {
809410d565efSmrg if (msg_size > errbuf_size)
809510d565efSmrg {
809610d565efSmrg #if defined HAVE_MEMPCPY || defined _LIBC
809710d565efSmrg *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
809810d565efSmrg #else
809910d565efSmrg (void) memcpy (errbuf, msg, errbuf_size - 1);
810010d565efSmrg errbuf[errbuf_size - 1] = 0;
810110d565efSmrg #endif
810210d565efSmrg }
810310d565efSmrg else
810410d565efSmrg (void) memcpy (errbuf, msg, msg_size);
810510d565efSmrg }
810610d565efSmrg
810710d565efSmrg return msg_size;
810810d565efSmrg }
810910d565efSmrg #ifdef _LIBC
811010d565efSmrg weak_alias (__regerror, regerror)
811110d565efSmrg #endif
811210d565efSmrg
811310d565efSmrg
811410d565efSmrg /* Free dynamically allocated space used by PREG. */
811510d565efSmrg
811610d565efSmrg void
811710d565efSmrg regfree (regex_t *preg)
811810d565efSmrg {
811910d565efSmrg free (preg->buffer);
812010d565efSmrg preg->buffer = NULL;
812110d565efSmrg
812210d565efSmrg preg->allocated = 0;
812310d565efSmrg preg->used = 0;
812410d565efSmrg
812510d565efSmrg free (preg->fastmap);
812610d565efSmrg preg->fastmap = NULL;
812710d565efSmrg preg->fastmap_accurate = 0;
812810d565efSmrg
812910d565efSmrg free (preg->translate);
813010d565efSmrg preg->translate = NULL;
813110d565efSmrg }
813210d565efSmrg #ifdef _LIBC
813310d565efSmrg weak_alias (__regfree, regfree)
813410d565efSmrg #endif
813510d565efSmrg
813610d565efSmrg #endif /* not emacs */
813710d565efSmrg
813810d565efSmrg #endif /* not INSIDE_RECURSION */
813910d565efSmrg
814010d565efSmrg
814110d565efSmrg #undef STORE_NUMBER
814210d565efSmrg #undef STORE_NUMBER_AND_INCR
814310d565efSmrg #undef EXTRACT_NUMBER
814410d565efSmrg #undef EXTRACT_NUMBER_AND_INCR
814510d565efSmrg
814610d565efSmrg #undef DEBUG_PRINT_COMPILED_PATTERN
814710d565efSmrg #undef DEBUG_PRINT_DOUBLE_STRING
814810d565efSmrg
814910d565efSmrg #undef INIT_FAIL_STACK
815010d565efSmrg #undef RESET_FAIL_STACK
815110d565efSmrg #undef DOUBLE_FAIL_STACK
815210d565efSmrg #undef PUSH_PATTERN_OP
815310d565efSmrg #undef PUSH_FAILURE_POINTER
815410d565efSmrg #undef PUSH_FAILURE_INT
815510d565efSmrg #undef PUSH_FAILURE_ELT
815610d565efSmrg #undef POP_FAILURE_POINTER
815710d565efSmrg #undef POP_FAILURE_INT
815810d565efSmrg #undef POP_FAILURE_ELT
815910d565efSmrg #undef DEBUG_PUSH
816010d565efSmrg #undef DEBUG_POP
816110d565efSmrg #undef PUSH_FAILURE_POINT
816210d565efSmrg #undef POP_FAILURE_POINT
816310d565efSmrg
816410d565efSmrg #undef REG_UNSET_VALUE
816510d565efSmrg #undef REG_UNSET
816610d565efSmrg
816710d565efSmrg #undef PATFETCH
816810d565efSmrg #undef PATFETCH_RAW
816910d565efSmrg #undef PATUNFETCH
817010d565efSmrg #undef TRANSLATE
817110d565efSmrg
817210d565efSmrg #undef INIT_BUF_SIZE
817310d565efSmrg #undef GET_BUFFER_SPACE
817410d565efSmrg #undef BUF_PUSH
817510d565efSmrg #undef BUF_PUSH_2
817610d565efSmrg #undef BUF_PUSH_3
817710d565efSmrg #undef STORE_JUMP
817810d565efSmrg #undef STORE_JUMP2
817910d565efSmrg #undef INSERT_JUMP
818010d565efSmrg #undef INSERT_JUMP2
818110d565efSmrg #undef EXTEND_BUFFER
818210d565efSmrg #undef GET_UNSIGNED_NUMBER
818310d565efSmrg #undef FREE_STACK_RETURN
818410d565efSmrg
818510d565efSmrg # undef POINTER_TO_OFFSET
818610d565efSmrg # undef MATCHING_IN_FRST_STRING
818710d565efSmrg # undef PREFETCH
818810d565efSmrg # undef AT_STRINGS_BEG
818910d565efSmrg # undef AT_STRINGS_END
819010d565efSmrg # undef WORDCHAR_P
819110d565efSmrg # undef FREE_VAR
819210d565efSmrg # undef FREE_VARIABLES
819310d565efSmrg # undef NO_HIGHEST_ACTIVE_REG
819410d565efSmrg # undef NO_LOWEST_ACTIVE_REG
819510d565efSmrg
819610d565efSmrg # undef CHAR_T
819710d565efSmrg # undef UCHAR_T
819810d565efSmrg # undef COMPILED_BUFFER_VAR
819910d565efSmrg # undef OFFSET_ADDRESS_SIZE
820010d565efSmrg # undef CHAR_CLASS_SIZE
820110d565efSmrg # undef PREFIX
820210d565efSmrg # undef ARG_PREFIX
820310d565efSmrg # undef PUT_CHAR
820410d565efSmrg # undef BYTE
820510d565efSmrg # undef WCHAR
820610d565efSmrg
820710d565efSmrg # define DEFINED_ONCE
8208